From b5ef35566593bb76b4291134c3eb4c99aef15b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dyego=20Aur=C3=A9lio?= Date: Sat, 29 Nov 2025 16:10:40 -0300 Subject: [PATCH 1/2] Add duplicate attribute tracking for CSP nonce validation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements detection and propagation of duplicate attributes through the tokenizer, tree builder, and TreeSink interface to support CSP (Content Security Policy) nonce validation. This enables html5ever consumers (e.g., Servo) to properly implement step 3 of the CSP "is element nonceable" algorithm by checking the `ElementFlags.had_duplicate_attrs` field during nonce validation. Reference: - https://www.w3.org/TR/CSP/#is-element-nonceable - https://github.com/servo/servo/commit/4821bc0ab01e1ed0bb27e86c2df545019bd3856a Signed-off-by: Dyego Aurélio --- html5ever/src/tokenizer/interface.rs | 4 + html5ever/src/tokenizer/mod.rs | 8 + html5ever/src/tree_builder/mod.rs | 77 +++++- html5ever/src/tree_builder/rules.rs | 7 +- markup5ever/interface/tree_builder.rs | 24 ++ .../duplicate-attributes.test | 66 +++++ .../tests/duplicate-attributes-integration.rs | 241 ++++++++++++++++++ rcdom/tests/html-tokenizer.rs | 3 + 8 files changed, 415 insertions(+), 15 deletions(-) create mode 100644 rcdom/custom-html5lib-tokenizer-tests/duplicate-attributes.test create mode 100644 rcdom/tests/duplicate-attributes-integration.rs diff --git a/html5ever/src/tokenizer/interface.rs b/html5ever/src/tokenizer/interface.rs index f2707a02..b1436a71 100644 --- a/html5ever/src/tokenizer/interface.rs +++ b/html5ever/src/tokenizer/interface.rs @@ -49,6 +49,10 @@ pub struct Tag { /// An example of a self closing tag is ``. pub self_closing: bool, pub attrs: Vec, + /// Whether duplicate attributes were encountered during tokenization. + /// This is used for CSP nonce validation - elements with duplicate + /// attributes are not nonceable per the CSP spec. + pub had_duplicate_attributes: bool, } impl Tag { diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 20c061ea..7dd690ea 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -147,6 +147,9 @@ pub struct Tokenizer { /// Current tag is self-closing? current_tag_self_closing: Cell, + /// Current tag had duplicate attributes? + current_tag_had_duplicate_attributes: Cell, + /// Current tag attributes. current_tag_attrs: RefCell>, @@ -200,6 +203,7 @@ impl Tokenizer { current_tag_kind: Cell::new(StartTag), current_tag_name: RefCell::new(StrTendril::new()), current_tag_self_closing: Cell::new(false), + current_tag_had_duplicate_attributes: Cell::new(false), current_tag_attrs: RefCell::new(vec![]), current_attr_name: RefCell::new(StrTendril::new()), current_attr_value: RefCell::new(StrTendril::new()), @@ -460,6 +464,7 @@ impl Tokenizer { name, self_closing: self.current_tag_self_closing.get(), attrs: std::mem::take(&mut self.current_tag_attrs.borrow_mut()), + had_duplicate_attributes: self.current_tag_had_duplicate_attributes.get(), }); match self.process_token(token) { @@ -504,6 +509,7 @@ impl Tokenizer { fn discard_tag(&self) { self.current_tag_name.borrow_mut().clear(); self.current_tag_self_closing.set(false); + self.current_tag_had_duplicate_attributes.set(false); *self.current_tag_attrs.borrow_mut() = vec![]; } @@ -546,6 +552,7 @@ impl Tokenizer { if dup { self.emit_error(Borrowed("Duplicate attribute")); + self.current_tag_had_duplicate_attributes.set(true); self.current_attr_name.borrow_mut().clear(); self.current_attr_value.borrow_mut().clear(); } else { @@ -2240,6 +2247,7 @@ mod test { name, self_closing: false, attrs: vec![], + had_duplicate_attributes: false, }) } diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index f7552214..8a0379ea 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -12,6 +12,7 @@ pub use crate::interface::{create_element, ElemName, ElementFlags, Tracer, TreeSink}; pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText}; pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; +pub use markup5ever::interface::tree_builder::create_element_with_flags; use self::types::*; @@ -736,6 +737,7 @@ where name: subject, self_closing: false, attrs: vec![], + had_duplicate_attributes: false, }); }; @@ -831,10 +833,11 @@ where }; // FIXME: Is there a way to avoid cloning the attributes twice here (once on their // own, once as part of t.clone() above)? - let new_element = create_element( + let new_element = create_element_with_flags( &self.sink, QualName::new(None, ns!(html), tag.name.clone()), tag.attrs.clone(), + tag.had_duplicate_attributes, ); self.open_elems.borrow_mut()[node_index] = new_element.clone(); self.active_formatting.borrow_mut()[node_formatting_index] = @@ -863,10 +866,11 @@ where // 15. // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, // once as part of t.clone() above)? - let new_element = create_element( + let new_element = create_element_with_flags( &self.sink, QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), fmt_elem_tag.attrs.clone(), + fmt_elem_tag.had_duplicate_attributes, ); let new_entry = FormatEntry::Element(new_element.clone(), fmt_elem_tag); @@ -1014,6 +1018,7 @@ where ns!(html), tag.name.clone(), tag.attrs.clone(), + tag.had_duplicate_attributes, ); // Step 9. Replace the entry for entry in the list with an entry for new element. @@ -1358,6 +1363,7 @@ where ns: Namespace, name: LocalName, attrs: Vec, + had_duplicate_attributes: bool, ) -> Handle { declare_tag_set!(form_associatable = "button" "fieldset" "input" "object" @@ -1367,7 +1373,12 @@ where // Step 7. let qname = QualName::new(None, ns, name); - let elem = create_element(&self.sink, qname.clone(), attrs.clone()); + let elem = create_element_with_flags( + &self.sink, + qname.clone(), + attrs.clone(), + had_duplicate_attributes, + ); let insertion_point = self.appropriate_place_for_insertion(None); let (node1, node2) = match insertion_point { @@ -1405,15 +1416,27 @@ where } fn insert_element_for(&self, tag: Tag) -> Handle { - self.insert_element(PushFlag::Push, ns!(html), tag.name, tag.attrs) + self.insert_element( + PushFlag::Push, + ns!(html), + tag.name, + tag.attrs, + tag.had_duplicate_attributes, + ) } fn insert_and_pop_element_for(&self, tag: Tag) -> Handle { - self.insert_element(PushFlag::NoPush, ns!(html), tag.name, tag.attrs) + self.insert_element( + PushFlag::NoPush, + ns!(html), + tag.name, + tag.attrs, + tag.had_duplicate_attributes, + ) } fn insert_phantom(&self, name: LocalName) -> Handle { - self.insert_element(PushFlag::Push, ns!(html), name, vec![]) + self.insert_element(PushFlag::Push, ns!(html), name, vec![], false) } /// @@ -1424,8 +1447,13 @@ where only_add_to_element_stack: bool, ) -> Handle { let adjusted_insertion_location = self.appropriate_place_for_insertion(None); - let qname = QualName::new(None, ns, tag.name); - let elem = create_element(&self.sink, qname.clone(), tag.attrs.clone()); + let qname = QualName::new(None, ns, tag.name.clone()); + let elem = create_element_with_flags( + &self.sink, + qname.clone(), + tag.attrs.clone(), + tag.had_duplicate_attributes, + ); if !only_add_to_element_stack { self.insert_at(adjusted_insertion_location, AppendNode(elem.clone())); @@ -1515,6 +1543,7 @@ where ns!(html), tag.name.clone(), tag.attrs.clone(), + tag.had_duplicate_attributes, ); self.active_formatting .borrow_mut() @@ -1650,10 +1679,22 @@ where self.adjust_foreign_attributes(&mut tag); if tag.self_closing { - self.insert_element(PushFlag::NoPush, ns, tag.name, tag.attrs); + self.insert_element( + PushFlag::NoPush, + ns, + tag.name, + tag.attrs, + tag.had_duplicate_attributes, + ); ProcessResult::DoneAckSelfClosing } else { - self.insert_element(PushFlag::Push, ns, tag.name, tag.attrs); + self.insert_element( + PushFlag::Push, + ns, + tag.name, + tag.attrs, + tag.had_duplicate_attributes, + ); ProcessResult::Done } } @@ -1818,10 +1859,22 @@ where self.adjust_foreign_attributes(&mut tag); if tag.self_closing { // FIXME(#118): "#; + + let sink = driver::parse_fragment( + sink, + driver::ParseOpts::default(), + QualName::new(None, ns!(html), local_name!("body")), + vec![], + false, + ) + .one(input) + .finish(); + + let flags = sink.had_duplicate_attributes_flags.borrow(); + let has_duplicate = flags.iter().any(|&f| f); + + assert!( + has_duplicate, + "Expected script with duplicate nonce to have had_duplicate_attributes=true" + ); +} diff --git a/rcdom/tests/html-tokenizer.rs b/rcdom/tests/html-tokenizer.rs index 9ff7dc69..c7f43ce9 100644 --- a/rcdom/tests/html-tokenizer.rs +++ b/rcdom/tests/html-tokenizer.rs @@ -135,6 +135,7 @@ impl TokenSink for TokenLogger { }, _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)), } + t.had_duplicate_attributes = false; self.push(TagToken(t)); }, @@ -250,6 +251,7 @@ fn json_to_token(js: &Value) -> Token { Some(b) => b.get_bool(), None => false, }, + had_duplicate_attributes: false, }), "EndTag" => TagToken(Tag { @@ -257,6 +259,7 @@ fn json_to_token(js: &Value) -> Token { name: LocalName::from(&*args[0].get_str()), attrs: vec![], self_closing: false, + had_duplicate_attributes: false, }), "Comment" => CommentToken(args[0].get_tendril()), From cedc11a2f299fe0311d0a002b0d21ff5b615ade0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dyego=20Aur=C3=A9lio?= Date: Wed, 11 Mar 2026 22:33:30 -0300 Subject: [PATCH 2/2] Bump version to 0.39.0 for release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dyego Aurélio --- Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 37a24305..e08ec590 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ members = [ ] [workspace.package] -version = "0.38.0" +version = "0.39.0" license = "MIT OR Apache-2.0" authors = [ "The html5ever Project Developers" ] repository = "https://github.com/servo/html5ever" @@ -21,9 +21,9 @@ rust-version = "1.71.0" # Repo dependencies tendril = { version = "0.5", path = "tendril" } web_atoms = { version = "0.2.3", path = "web_atoms" } -markup5ever = { version = "0.38", path = "markup5ever" } -xml5ever = { version = "0.38", path = "xml5ever" } -html5ever = { version = "0.38", path = "html5ever" } +markup5ever = { version = "0.39", path = "markup5ever" } +xml5ever = { version = "0.39", path = "xml5ever" } +html5ever = { version = "0.39", path = "html5ever" } # External dependencies encoding_rs = "0.8.12"