Skip to content

Commit 6c8fbae

Browse files
committed
Add duplicate attribute tracking for CSP nonce validation.
Implements detection and propagation of duplicate attributes through the tokenizer, tree builder, and TreeSink interface to support CSP (Content Security Policy) nonce validation. This enables html5ever consumers (e.g., Servo) to properly implement step 3 of the CSP "is element nonceable" algorithm by checking the `ElementFlags.had_duplicate_attrs` field during nonce validation. Reference: - https://www.w3.org/TR/CSP/#is-element-nonceable - servo/servo@4821bc0 Signed-off-by: Dyego Aurélio <dyegoaurelio@gmail.com>
1 parent 5ba5652 commit 6c8fbae

8 files changed

Lines changed: 415 additions & 15 deletions

File tree

html5ever/src/tokenizer/interface.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ pub struct Tag {
4949
/// An example of a self closing tag is `<foo />`.
5050
pub self_closing: bool,
5151
pub attrs: Vec<Attribute>,
52+
/// Whether duplicate attributes were encountered during tokenization.
53+
/// This is used for CSP nonce validation - elements with duplicate
54+
/// attributes are not nonceable per the CSP spec.
55+
pub had_duplicate_attrs: bool,
5256
}
5357

5458
impl Tag {

html5ever/src/tokenizer/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ pub struct Tokenizer<Sink> {
147147
/// Current tag is self-closing?
148148
current_tag_self_closing: Cell<bool>,
149149

150+
/// Current tag had duplicate attributes?
151+
current_tag_had_duplicate_attrs: Cell<bool>,
152+
150153
/// Current tag attributes.
151154
current_tag_attrs: RefCell<Vec<Attribute>>,
152155

@@ -200,6 +203,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
200203
current_tag_kind: Cell::new(StartTag),
201204
current_tag_name: RefCell::new(StrTendril::new()),
202205
current_tag_self_closing: Cell::new(false),
206+
current_tag_had_duplicate_attrs: Cell::new(false),
203207
current_tag_attrs: RefCell::new(vec![]),
204208
current_attr_name: RefCell::new(StrTendril::new()),
205209
current_attr_value: RefCell::new(StrTendril::new()),
@@ -460,6 +464,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
460464
name,
461465
self_closing: self.current_tag_self_closing.get(),
462466
attrs: std::mem::take(&mut self.current_tag_attrs.borrow_mut()),
467+
had_duplicate_attrs: self.current_tag_had_duplicate_attrs.get(),
463468
});
464469

465470
match self.process_token(token) {
@@ -504,6 +509,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
504509
fn discard_tag(&self) {
505510
self.current_tag_name.borrow_mut().clear();
506511
self.current_tag_self_closing.set(false);
512+
self.current_tag_had_duplicate_attrs.set(false);
507513
*self.current_tag_attrs.borrow_mut() = vec![];
508514
}
509515

@@ -546,6 +552,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
546552

547553
if dup {
548554
self.emit_error(Borrowed("Duplicate attribute"));
555+
self.current_tag_had_duplicate_attrs.set(true);
549556
self.current_attr_name.borrow_mut().clear();
550557
self.current_attr_value.borrow_mut().clear();
551558
} else {
@@ -2240,6 +2247,7 @@ mod test {
22402247
name,
22412248
self_closing: false,
22422249
attrs: vec![],
2250+
had_duplicate_attrs: false,
22432251
})
22442252
}
22452253

html5ever/src/tree_builder/mod.rs

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
pub use crate::interface::{create_element, ElemName, ElementFlags, Tracer, TreeSink};
1313
pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText};
1414
pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
15+
pub use markup5ever::interface::tree_builder::create_element_with_flags;
1516

1617
use self::types::*;
1718

@@ -736,6 +737,7 @@ where
736737
name: subject,
737738
self_closing: false,
738739
attrs: vec![],
740+
had_duplicate_attrs: false,
739741
});
740742
};
741743

@@ -831,10 +833,11 @@ where
831833
};
832834
// FIXME: Is there a way to avoid cloning the attributes twice here (once on their
833835
// own, once as part of t.clone() above)?
834-
let new_element = create_element(
836+
let new_element = create_element_with_flags(
835837
&self.sink,
836838
QualName::new(None, ns!(html), tag.name.clone()),
837839
tag.attrs.clone(),
840+
tag.had_duplicate_attrs,
838841
);
839842
self.open_elems.borrow_mut()[node_index] = new_element.clone();
840843
self.active_formatting.borrow_mut()[node_formatting_index] =
@@ -863,10 +866,11 @@ where
863866
// 15.
864867
// FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
865868
// once as part of t.clone() above)?
866-
let new_element = create_element(
869+
let new_element = create_element_with_flags(
867870
&self.sink,
868871
QualName::new(None, ns!(html), fmt_elem_tag.name.clone()),
869872
fmt_elem_tag.attrs.clone(),
873+
fmt_elem_tag.had_duplicate_attrs,
870874
);
871875
let new_entry = FormatEntry::Element(new_element.clone(), fmt_elem_tag);
872876

@@ -1014,6 +1018,7 @@ where
10141018
ns!(html),
10151019
tag.name.clone(),
10161020
tag.attrs.clone(),
1021+
tag.had_duplicate_attrs,
10171022
);
10181023

10191024
// Step 9. Replace the entry for entry in the list with an entry for new element.
@@ -1358,6 +1363,7 @@ where
13581363
ns: Namespace,
13591364
name: LocalName,
13601365
attrs: Vec<Attribute>,
1366+
had_duplicate_attrs: bool,
13611367
) -> Handle {
13621368
declare_tag_set!(form_associatable =
13631369
"button" "fieldset" "input" "object"
@@ -1367,7 +1373,12 @@ where
13671373

13681374
// Step 7.
13691375
let qname = QualName::new(None, ns, name);
1370-
let elem = create_element(&self.sink, qname.clone(), attrs.clone());
1376+
let elem = create_element_with_flags(
1377+
&self.sink,
1378+
qname.clone(),
1379+
attrs.clone(),
1380+
had_duplicate_attrs,
1381+
);
13711382

13721383
let insertion_point = self.appropriate_place_for_insertion(None);
13731384
let (node1, node2) = match insertion_point {
@@ -1405,15 +1416,27 @@ where
14051416
}
14061417

14071418
fn insert_element_for(&self, tag: Tag) -> Handle {
1408-
self.insert_element(PushFlag::Push, ns!(html), tag.name, tag.attrs)
1419+
self.insert_element(
1420+
PushFlag::Push,
1421+
ns!(html),
1422+
tag.name,
1423+
tag.attrs,
1424+
tag.had_duplicate_attrs,
1425+
)
14091426
}
14101427

14111428
fn insert_and_pop_element_for(&self, tag: Tag) -> Handle {
1412-
self.insert_element(PushFlag::NoPush, ns!(html), tag.name, tag.attrs)
1429+
self.insert_element(
1430+
PushFlag::NoPush,
1431+
ns!(html),
1432+
tag.name,
1433+
tag.attrs,
1434+
tag.had_duplicate_attrs,
1435+
)
14131436
}
14141437

14151438
fn insert_phantom(&self, name: LocalName) -> Handle {
1416-
self.insert_element(PushFlag::Push, ns!(html), name, vec![])
1439+
self.insert_element(PushFlag::Push, ns!(html), name, vec![], false)
14171440
}
14181441

14191442
/// <https://html.spec.whatwg.org/multipage/parsing.html#insert-an-element-at-the-adjusted-insertion-location>
@@ -1424,8 +1447,13 @@ where
14241447
only_add_to_element_stack: bool,
14251448
) -> Handle {
14261449
let adjusted_insertion_location = self.appropriate_place_for_insertion(None);
1427-
let qname = QualName::new(None, ns, tag.name);
1428-
let elem = create_element(&self.sink, qname.clone(), tag.attrs.clone());
1450+
let qname = QualName::new(None, ns, tag.name.clone());
1451+
let elem = create_element_with_flags(
1452+
&self.sink,
1453+
qname.clone(),
1454+
tag.attrs.clone(),
1455+
tag.had_duplicate_attrs,
1456+
);
14291457

14301458
if !only_add_to_element_stack {
14311459
self.insert_at(adjusted_insertion_location, AppendNode(elem.clone()));
@@ -1515,6 +1543,7 @@ where
15151543
ns!(html),
15161544
tag.name.clone(),
15171545
tag.attrs.clone(),
1546+
tag.had_duplicate_attrs,
15181547
);
15191548
self.active_formatting
15201549
.borrow_mut()
@@ -1650,10 +1679,22 @@ where
16501679
self.adjust_foreign_attributes(&mut tag);
16511680

16521681
if tag.self_closing {
1653-
self.insert_element(PushFlag::NoPush, ns, tag.name, tag.attrs);
1682+
self.insert_element(
1683+
PushFlag::NoPush,
1684+
ns,
1685+
tag.name,
1686+
tag.attrs,
1687+
tag.had_duplicate_attrs,
1688+
);
16541689
ProcessResult::DoneAckSelfClosing
16551690
} else {
1656-
self.insert_element(PushFlag::Push, ns, tag.name, tag.attrs);
1691+
self.insert_element(
1692+
PushFlag::Push,
1693+
ns,
1694+
tag.name,
1695+
tag.attrs,
1696+
tag.had_duplicate_attrs,
1697+
);
16571698
ProcessResult::Done
16581699
}
16591700
}
@@ -1818,10 +1859,22 @@ where
18181859
self.adjust_foreign_attributes(&mut tag);
18191860
if tag.self_closing {
18201861
// FIXME(#118): <script /> in SVG
1821-
self.insert_element(PushFlag::NoPush, current_ns, tag.name, tag.attrs);
1862+
self.insert_element(
1863+
PushFlag::NoPush,
1864+
current_ns,
1865+
tag.name,
1866+
tag.attrs,
1867+
tag.had_duplicate_attrs,
1868+
);
18221869
ProcessResult::DoneAckSelfClosing
18231870
} else {
1824-
self.insert_element(PushFlag::Push, current_ns, tag.name, tag.attrs);
1871+
self.insert_element(
1872+
PushFlag::Push,
1873+
current_ns,
1874+
tag.name,
1875+
tag.attrs,
1876+
tag.had_duplicate_attrs,
1877+
);
18251878
ProcessResult::Done
18261879
}
18271880
}

html5ever/src/tree_builder/rules.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ use crate::tokenizer::TagKind::{EndTag, StartTag};
1616
use crate::tree_builder::tag_sets::*;
1717
use crate::tree_builder::types::*;
1818
use crate::tree_builder::{
19-
create_element, html_elem, ElemName, NodeOrText::AppendNode, StrTendril, Tag, TreeBuilder,
20-
TreeSink,
19+
html_elem, ElemName, NodeOrText::AppendNode, StrTendril, Tag, TreeBuilder, TreeSink,
2120
};
2221
use crate::QualName;
22+
use markup5ever::interface::tree_builder::create_element_with_flags;
2323
use markup5ever::{expanded_name, local_name, ns};
2424
use std::borrow::Cow::Borrowed;
2525

@@ -232,10 +232,11 @@ where
232232
},
233233

234234
Token::Tag(tag @ tag!(<script>)) => {
235-
let elem = create_element(
235+
let elem = create_element_with_flags(
236236
&self.sink,
237237
QualName::new(None, ns!(html), local_name!("script")),
238238
tag.attrs,
239+
tag.had_duplicate_attrs,
239240
);
240241
if self.is_fragment() {
241242
self.sink.mark_script_already_started(&elem);

markup5ever/interface/tree_builder.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ pub struct ElementFlags {
6262
///
6363
/// [whatwg integration-point]: https://html.spec.whatwg.org/multipage/#html-integration-point
6464
pub mathml_annotation_xml_integration_point: bool,
65+
66+
/// Whether duplicate attributes were encountered during tokenization.
67+
/// This is used for CSP nonce validation - elements with duplicate
68+
/// attributes are not nonceable per the CSP spec.
69+
///
70+
/// See [CSP Level 3 - Is element nonceable](https://www.w3.org/TR/CSP/#is-element-nonceable)
71+
pub had_duplicate_attrs: bool,
6572
}
6673

6774
/// A constructor for an element.
@@ -70,6 +77,22 @@ pub struct ElementFlags {
7077
///
7178
/// Create an element like `<div class="test-class-name"></div>`:
7279
pub fn create_element<Sink>(sink: &Sink, name: QualName, attrs: Vec<Attribute>) -> Sink::Handle
80+
where
81+
Sink: TreeSink,
82+
{
83+
create_element_with_flags(sink, name, attrs, false)
84+
}
85+
86+
/// A constructor for an element with duplicate attribute information.
87+
///
88+
/// This variant allows passing whether duplicate attributes were encountered
89+
/// during tokenization, which is needed for CSP nonce validation.
90+
pub fn create_element_with_flags<Sink>(
91+
sink: &Sink,
92+
name: QualName,
93+
attrs: Vec<Attribute>,
94+
had_duplicate_attrs: bool,
95+
) -> Sink::Handle
7396
where
7497
Sink: TreeSink,
7598
{
@@ -85,6 +108,7 @@ where
85108
},
86109
_ => {},
87110
}
111+
flags.had_duplicate_attrs = had_duplicate_attrs;
88112
sink.create_element(name, attrs, flags)
89113
}
90114

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
{"tests": [
2+
3+
{"description": "Duplicate attribute - simple case",
4+
"input": "<div id='first' id='second'>",
5+
"output": [
6+
["StartTag", "div", {"id": "first"}]
7+
],
8+
"errors": [
9+
{"code": "duplicate-attribute"}
10+
]},
11+
12+
{"description": "Duplicate attribute - three duplicates",
13+
"input": "<div id='first' id='second' id='third'>",
14+
"output": [
15+
["StartTag", "div", {"id": "first"}]
16+
],
17+
"errors": [
18+
{"code": "duplicate-attribute"},
19+
{"code": "duplicate-attribute"}
20+
]},
21+
22+
{"description": "Duplicate attribute - mixed with other attrs",
23+
"input": "<div class='foo' id='first' title='bar' id='second'>",
24+
"output": [
25+
["StartTag", "div", {"class": "foo", "id": "first", "title": "bar"}]
26+
],
27+
"errors": [
28+
{"code": "duplicate-attribute"}
29+
]},
30+
31+
{"description": "Duplicate nonce attribute",
32+
"input": "<script nonce='abc123' nonce='xyz789'>",
33+
"output": [
34+
["StartTag", "script", {"nonce": "abc123"}]
35+
],
36+
"errors": [
37+
{"code": "duplicate-attribute"}
38+
]},
39+
40+
{"description": "No duplicate - different attributes",
41+
"input": "<div id='test' class='foo'>",
42+
"output": [
43+
["StartTag", "div", {"id": "test", "class": "foo"}]
44+
],
45+
"errors": []},
46+
47+
{"description": "Duplicate attribute - case insensitive (ID vs id)",
48+
"input": "<div ID='first' id='second'>",
49+
"output": [
50+
["StartTag", "div", {"id": "first"}]
51+
],
52+
"errors": [
53+
{"code": "duplicate-attribute"}
54+
]},
55+
56+
{"description": "Multiple different duplicates",
57+
"input": "<div id='a' id='b' class='x' class='y'>",
58+
"output": [
59+
["StartTag", "div", {"id": "a", "class": "x"}]
60+
],
61+
"errors": [
62+
{"code": "duplicate-attribute"},
63+
{"code": "duplicate-attribute"}
64+
]}
65+
66+
]}

0 commit comments

Comments
 (0)