Skip to content

Commit 45e811f

Browse files
committed
fix: propagate bozo flag from entry-level fields
Thread bozo signal through parse_item, parse_entry, and parse_rss10_item by replacing read_text_str with read_text and OR-accumulating the bozo flag via &mut bool. Covers title, description, summary, content, id, author, and namespace extension fields across RSS 2.0, Atom, and RSS 1.0 parsers. Closes #70
1 parent ff4b2a3 commit 45e811f

3 files changed

Lines changed: 300 additions & 33 deletions

File tree

crates/feedparser-rs-core/src/parser/atom.rs

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,23 @@ fn parse_feed_element(
225225
entry_ctx.update_base(&xml_base);
226226
}
227227

228-
match parse_entry(reader, &mut buf, limits, depth, &entry_ctx) {
229-
Ok(entry) => feed.entries.push(entry),
228+
let mut entry_bozo = false;
229+
match parse_entry(
230+
reader,
231+
&mut buf,
232+
limits,
233+
depth,
234+
&entry_ctx,
235+
&mut entry_bozo,
236+
) {
237+
Ok(entry) => {
238+
if entry_bozo && !feed.bozo {
239+
feed.bozo = true;
240+
feed.bozo_exception =
241+
Some("Unresolvable entity in entry field".to_string());
242+
}
243+
feed.entries.push(entry);
244+
}
230245
Err(e) => {
231246
feed.bozo = true;
232247
feed.bozo_exception = Some(e.to_string());
@@ -284,6 +299,7 @@ fn parse_entry(
284299
limits: &ParserLimits,
285300
depth: &mut usize,
286301
base_ctx: &BaseUrlContext,
302+
bozo: &mut bool,
287303
) -> Result<Entry> {
288304
let mut entry = Entry::with_capacity();
289305

@@ -327,7 +343,9 @@ fn parse_entry(
327343
}
328344
}
329345
b"id" if !is_empty => {
330-
entry.id = Some(read_text_str(reader, buf, limits)?.into());
346+
let (text, had_bozo) = read_text(reader, buf, limits)?;
347+
*bozo |= had_bozo;
348+
entry.id = Some(text.into());
331349
}
332350
b"updated" if !is_empty => {
333351
let text = read_text_str(reader, buf, limits)?;
@@ -383,14 +401,16 @@ fn parse_entry(
383401
let handled = if let Some(dc_element) = is_dc_tag(tag) {
384402
let dc_elem = dc_element.to_string();
385403
if !is_empty {
386-
let text = read_text_str(reader, buf, limits)?;
404+
let (text, had_bozo) = read_text(reader, buf, limits)?;
405+
*bozo |= had_bozo;
387406
dublin_core::handle_entry_element(&dc_elem, &text, &mut entry);
388407
}
389408
true
390409
} else if let Some(content_element) = is_content_tag(tag) {
391410
let content_elem = content_element.to_string();
392411
if !is_empty {
393-
let text = read_text_str(reader, buf, limits)?;
412+
let (text, had_bozo) = read_text(reader, buf, limits)?;
413+
*bozo |= had_bozo;
394414
content::handle_entry_element(&content_elem, &text, &mut entry);
395415
}
396416
true
@@ -423,7 +443,8 @@ fn parse_entry(
423443
} else {
424444
let media_elem = media_element.to_string();
425445
if !is_empty {
426-
let text = read_text_str(reader, buf, limits)?;
446+
let (text, had_bozo) = read_text(reader, buf, limits)?;
447+
*bozo |= had_bozo;
427448
media_rss::handle_entry_element(&media_elem, &text, &mut entry);
428449
}
429450
}
@@ -980,4 +1001,55 @@ mod tests {
9801001
Some("https://example.com/entry/1")
9811002
);
9821003
}
1004+
1005+
#[test]
1006+
fn test_entry_bozo_on_unresolvable_entity_in_id() {
1007+
let xml = br#"<?xml version="1.0"?>
1008+
<feed xmlns="http://www.w3.org/2005/Atom">
1009+
<title>Test</title>
1010+
<id>urn:test</id>
1011+
<updated>2024-01-01T00:00:00Z</updated>
1012+
<entry>
1013+
<title>Test Entry</title>
1014+
<id>urn:entry-&unresolvable;</id>
1015+
<updated>2024-01-01T00:00:00Z</updated>
1016+
</entry>
1017+
</feed>"#;
1018+
1019+
let feed = parse_atom10(xml).unwrap();
1020+
assert!(
1021+
feed.bozo,
1022+
"bozo should be true when entry id has unresolvable entity"
1023+
);
1024+
assert_eq!(
1025+
feed.bozo_exception.as_deref(),
1026+
Some("Unresolvable entity in entry field")
1027+
);
1028+
assert_eq!(
1029+
feed.entries.len(),
1030+
1,
1031+
"entry should still be parsed despite bozo"
1032+
);
1033+
}
1034+
1035+
#[test]
1036+
fn test_clean_atom_entry_no_bozo() {
1037+
let xml = br#"<?xml version="1.0"?>
1038+
<feed xmlns="http://www.w3.org/2005/Atom">
1039+
<title>Test</title>
1040+
<id>urn:test</id>
1041+
<updated>2024-01-01T00:00:00Z</updated>
1042+
<entry>
1043+
<title>Normal &amp; Clean Entry</title>
1044+
<id>urn:entry-1</id>
1045+
<updated>2024-01-01T00:00:00Z</updated>
1046+
</entry>
1047+
</feed>"#;
1048+
1049+
let feed = parse_atom10(xml).unwrap();
1050+
assert!(!feed.bozo, "standard XML entities should not trigger bozo");
1051+
assert_eq!(feed.entries.len(), 1);
1052+
// parse_text_construct handles title - entity decoding is handled by quick-xml
1053+
assert!(feed.entries[0].title.is_some());
1054+
}
9831055
}

0 commit comments

Comments
 (0)