From 8a7840ae1ef7914ef6ae16115ad7297de814d37b Mon Sep 17 00:00:00 2001 From: tompng Date: Sun, 14 Jun 2026 23:30:04 +0900 Subject: [PATCH] Change namespace cache strategy Namespace calculation for each node cant' be cached to document because document lookup is slow for deeply nested nodes. Change namespace cache strategy, inject cached hash as an argument to retrieve namespace/namespaces from XPath match operation. --- lib/rexml/document.rb | 27 +++++++++----- lib/rexml/element.rb | 78 +++++++++++++++++++++++---------------- lib/rexml/xpath_parser.rb | 65 +++++++++++++++++++++----------- test/test_core.rb | 27 ++++++++++++++ test/xpath/test_base.rb | 10 ----- 5 files changed, 134 insertions(+), 73 deletions(-) diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index d5db713d..5b22969e 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -449,17 +449,24 @@ def document private - attr_accessor :namespaces_cache - - # New document level cache is created and available in this block. - # This API is thread unsafe. Users can't change this document in this block. - def enable_cache - @namespaces_cache = {} - begin - yield - ensure - @namespaces_cache = nil + # Returns namespaces defined in attribute list declarations for each element name. + # { element_name => { prefix => uri, ... }, ... } + def attlist_per_element_namespaces # :nodoc: + per_element_namespaces = {} + if doctype + doctype.each do |child| + next unless child.kind_of? AttlistDecl + element_name = child.element_name + child.each do |name, value| + attr = Attribute.new(name, value) + if attr.prefix == 'xmlns' || attr.name == 'xmlns' + namespaces = per_element_namespaces[element_name] ||= {} + namespaces[attr.name] = attr.value + end + end + end end + per_element_namespaces end def build( source ) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index abdb28a7..91d5f5a5 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -588,12 +588,7 @@ def prefixes # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"} # def namespaces - namespaces_cache = document&.__send__(:namespaces_cache) - if namespaces_cache - namespaces_cache[self] ||= calculate_namespaces - else - calculate_namespaces - end + calculate_namespaces end # :call-seq: @@ -618,13 +613,10 @@ def namespaces # def namespace(prefix=nil) if prefix.nil? - prefix = prefix() + namespace_internal + else + namespace_lookup_internal(prefix) end - prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") - ns = namespaces[prefix] - - ns = '' if ns.nil? and prefix == 'xmlns' - ns end # :call-seq: @@ -1508,12 +1500,34 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) end private - def calculate_namespaces - if parent - parent.namespaces.merge(attributes.namespaces) - else - attributes.namespaces - end + + # Returns namespace of the element + def namespace_internal(namespaces = calculate_namespaces) + namespace_lookup_internal(prefix, namespaces) + end + + # Lookup namespace for the given prefix in the context of the element + def namespace_lookup_internal(prefix, namespaces = calculate_namespaces) + prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") + ns = namespaces[prefix] + ns = '' if ns.nil? and prefix == 'xmlns' + ns + end + + def calculate_namespaces(cache_hash = nil, attlist_element_namespaces = nil) + return cache_hash[self] if cache_hash && cache_hash.key?(self) + + inherited_namespaces = parent ? parent.send(:calculate_namespaces, cache_hash, attlist_element_namespaces) : {} + attlist_element_namespaces ||= document&.send(:attlist_per_element_namespaces) + attlist_namespaces = attlist_element_namespaces&.[](is_a?(Document) ? doctype&.name : expanded_name) + own_namespaces = attributes.send(:own_namespaces) + + # Inherited namespaces can be overridden by attribute list declaration, and both can be overridden by its own attributes + namespaces = inherited_namespaces + namespaces = namespaces.merge(attlist_namespaces) if attlist_namespaces + namespaces = namespaces.merge(own_namespaces) if own_namespaces.any? + cache_hash[self] = namespaces if cache_hash + namespaces end def __to_xpath_helper node @@ -2386,6 +2400,15 @@ def []=( name, value ) @element end + # Returns namespaces directly declared in this attribute set + private def own_namespaces # :nodoc: + namespaces = {} + each_attribute do |attribute| + namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' + end + namespaces + end + # :call-seq: # prefixes -> array_of_prefix_strings # @@ -2424,19 +2447,12 @@ def prefixes # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"} # def namespaces - namespaces = {} - each_attribute do |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - } - end + doc = @element.document + doctype = doc&.doctype + attlist_element_namespaces = doc&.send(:attlist_per_element_namespaces) + attlist_namespaces = attlist_element_namespaces&.[](@element.is_a?(Document) ? doctype&.name : @element.expanded_name) + namespaces = own_namespaces + namespaces = attlist_namespaces.merge(namespaces) if attlist_namespaces namespaces end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 821cca95..c73db927 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -63,6 +63,9 @@ def initialize(strict: false) @namespaces = nil @variables = {} @functions = FunctionsClass.new + @attlist_per_element_namespaces = nil + @document = nil + @element_namespaces_cache = {} @nest = 0 @strict = strict end @@ -85,14 +88,8 @@ def parse path, node node = node.first end - document = node.document - if document - document.__send__(:enable_cache) do - match( path_stack, node ) - end - else - match( path_stack, node ) - end + @document = node.document + match( path_stack, node ) end def get_first path, node @@ -150,7 +147,6 @@ def first( path_stack, node ) end end - def match(path_stack, node) nodeset = [node] result = expr(path_stack, nodeset) @@ -167,20 +163,45 @@ def strict? @strict end - # Returns a String namespace for a node, given a prefix + # Returns a String namespace for a prefix used in xpath. # The rules are: # # 1. Use the supplied namespace mapping first. - # 2. If no mapping was supplied, use the context node to look up the namespace - def get_namespace( node, prefix ) + # 2. If no mapping was supplied, use the context node to look up the namespace as a fallback. + def get_xpath_namespace( node, prefix ) if @namespaces @namespaces[prefix] || '' + elsif node.node_type == :element + element_namespace_lookup(node, prefix) else - return node.namespace( prefix ) if node.node_type == :element '' end end + # Returns attribute's namespace URI while caching the + # intermediate result to speed up retrieval of namespaces + def attribute_namespace(attribute) + attribute.prefix == '' ? '' : element_namespace_lookup(attribute.element, attribute.prefix) + end + + # Return element's namespace URI while caching the + # intermediate result to speed up retrieval of namespaces + def element_namespace(element) + element.send(:namespace_internal, element_namespaces(element)) + end + + # Returns a hash of namespaces for the given element while caching the + # intermediate result to speed up retrieval of namespaces + def element_namespaces(element) + @attlist_per_element_namespaces ||= @document&.send(:attlist_per_element_namespaces) || {} + element.send(:calculate_namespaces, @element_namespaces_cache, @attlist_per_element_namespaces) + end + + # Returns namespace of the prefix in the context of the element, + # while caching the intermediate result to speed up retrieval of namespaces + def element_namespace_lookup(element, prefix) + element.send(:namespace_lookup_internal, prefix, element_namespaces(element)) + end # Expr takes a stack of path elements and a set of nodes (either a Parent # or an Array and returns an Array of matching nodes @@ -641,20 +662,20 @@ def node_test(path_stack, any_type: :element) node.name == name elsif prefix.empty? if strict? - node.name == name and node.namespace == "" + node.name == name and element_namespace(node) == "" else - node.name == name and node.namespace == get_namespace(node, prefix) + node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix) end else - node.name == name and node.namespace == get_namespace(node, prefix) + node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix) end when :attribute if prefix.nil? node.name == name elsif prefix.empty? - node.name == name and node.namespace == "" + node.name == name and attribute_namespace(node) == "" else - node.name == name and node.namespace == get_namespace(node.element, prefix) + node.name == name and attribute_namespace(node) == get_xpath_namespace(node.element, prefix) end else false @@ -665,11 +686,11 @@ def node_test(path_stack, any_type: :element) ->(node) do case node.node_type when :element - namespaces = @namespaces || node.namespaces - node.namespace == namespaces[prefix] + namespaces = @namespaces || element_namespaces(node) + element_namespace(node) == namespaces[prefix] when :attribute - namespaces = @namespaces || node.element.namespaces - node.namespace == namespaces[prefix] + namespaces = @namespaces || element_namespaces(node.element) + attribute_namespace(node) == namespaces[prefix] else false end diff --git a/test/test_core.rb b/test/test_core.rb index beaaeed7..d66beb45 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -916,6 +916,33 @@ def test_attlist_decl assert_equal correct, prefixes end + def test_attlist_namespace_priority + doc = Document.new <<~XML + + ]> + + + + + + XML + a1, a2, d = doc.root.children.grep(REXML::Element) + b = a1.first + c = a2.first + assert_equal('foo', doc.root.namespace) + assert_equal('bar', a1.namespace) + assert_equal('bar', b.namespace) + assert_equal('baz', a2.namespace) + assert_equal('baz', c.namespace) + assert_equal('foo', d.namespace) + assert_equal({}, doc.attributes.namespaces) + assert_equal({ 'xmlns' => 'foo' }, doc.root.attributes.namespaces) + assert_equal({ 'xmlns' => 'bar' }, a1.attributes.namespaces) + assert_equal({ 'xmlns' => 'baz' }, a2.attributes.namespaces) + end + def test_attlist_write doc = File.open(fixture_path("foo.xml")) {|file| Document.new file } out = '' diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 43fe3e99..0e7635a5 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -1312,16 +1312,6 @@ def test_namespaces_0 assert_equal( 1, XPath.match( d, "//x:*" ).size ) end - def test_namespaces_cache - doc = Document.new("") - assert_equal("", XPath.first(doc, "//b[namespace-uri()='1']").to_s) - assert_nil(XPath.first(doc, "//b[namespace-uri()='']")) - - doc.root.delete_namespace - assert_nil(XPath.first(doc, "//b[namespace-uri()='1']")) - assert_equal("", XPath.first(doc, "//b[namespace-uri()='']").to_s) - end - def test_ticket_71 doc = Document.new(%Q{}) el = doc.root.elements[1]