@@ -225,8 +225,23 @@ fn parse_feed_element(
225225 entry_ctx. update_base ( & xml_base) ;
226226 }
227227
228- match parse_entry ( reader, & mut buf, limits, depth, & entry_ctx) {
229- Ok ( entry) => feed. entries . push ( entry) ,
228+ let mut entry_bozo = false ;
229+ match parse_entry (
230+ reader,
231+ & mut buf,
232+ limits,
233+ depth,
234+ & entry_ctx,
235+ & mut entry_bozo,
236+ ) {
237+ Ok ( entry) => {
238+ if entry_bozo && !feed. bozo {
239+ feed. bozo = true ;
240+ feed. bozo_exception =
241+ Some ( "Unresolvable entity in entry field" . to_string ( ) ) ;
242+ }
243+ feed. entries . push ( entry) ;
244+ }
230245 Err ( e) => {
231246 feed. bozo = true ;
232247 feed. bozo_exception = Some ( e. to_string ( ) ) ;
@@ -284,6 +299,7 @@ fn parse_entry(
284299 limits : & ParserLimits ,
285300 depth : & mut usize ,
286301 base_ctx : & BaseUrlContext ,
302+ bozo : & mut bool ,
287303) -> Result < Entry > {
288304 let mut entry = Entry :: with_capacity ( ) ;
289305
@@ -327,7 +343,9 @@ fn parse_entry(
327343 }
328344 }
329345 b"id" if !is_empty => {
330- entry. id = Some ( read_text_str ( reader, buf, limits) ?. into ( ) ) ;
346+ let ( text, had_bozo) = read_text ( reader, buf, limits) ?;
347+ * bozo |= had_bozo;
348+ entry. id = Some ( text. into ( ) ) ;
331349 }
332350 b"updated" if !is_empty => {
333351 let text = read_text_str ( reader, buf, limits) ?;
@@ -383,14 +401,16 @@ fn parse_entry(
383401 let handled = if let Some ( dc_element) = is_dc_tag ( tag) {
384402 let dc_elem = dc_element. to_string ( ) ;
385403 if !is_empty {
386- let text = read_text_str ( reader, buf, limits) ?;
404+ let ( text, had_bozo) = read_text ( reader, buf, limits) ?;
405+ * bozo |= had_bozo;
387406 dublin_core:: handle_entry_element ( & dc_elem, & text, & mut entry) ;
388407 }
389408 true
390409 } else if let Some ( content_element) = is_content_tag ( tag) {
391410 let content_elem = content_element. to_string ( ) ;
392411 if !is_empty {
393- let text = read_text_str ( reader, buf, limits) ?;
412+ let ( text, had_bozo) = read_text ( reader, buf, limits) ?;
413+ * bozo |= had_bozo;
394414 content:: handle_entry_element ( & content_elem, & text, & mut entry) ;
395415 }
396416 true
@@ -423,7 +443,8 @@ fn parse_entry(
423443 } else {
424444 let media_elem = media_element. to_string ( ) ;
425445 if !is_empty {
426- let text = read_text_str ( reader, buf, limits) ?;
446+ let ( text, had_bozo) = read_text ( reader, buf, limits) ?;
447+ * bozo |= had_bozo;
427448 media_rss:: handle_entry_element ( & media_elem, & text, & mut entry) ;
428449 }
429450 }
@@ -980,4 +1001,55 @@ mod tests {
9801001 Some ( "https://example.com/entry/1" )
9811002 ) ;
9821003 }
1004+
1005+ #[ test]
1006+ fn test_entry_bozo_on_unresolvable_entity_in_id ( ) {
1007+ let xml = br#"<?xml version="1.0"?>
1008+ <feed xmlns="http://www.w3.org/2005/Atom">
1009+ <title>Test</title>
1010+ <id>urn:test</id>
1011+ <updated>2024-01-01T00:00:00Z</updated>
1012+ <entry>
1013+ <title>Test Entry</title>
1014+ <id>urn:entry-&unresolvable;</id>
1015+ <updated>2024-01-01T00:00:00Z</updated>
1016+ </entry>
1017+ </feed>"# ;
1018+
1019+ let feed = parse_atom10 ( xml) . unwrap ( ) ;
1020+ assert ! (
1021+ feed. bozo,
1022+ "bozo should be true when entry id has unresolvable entity"
1023+ ) ;
1024+ assert_eq ! (
1025+ feed. bozo_exception. as_deref( ) ,
1026+ Some ( "Unresolvable entity in entry field" )
1027+ ) ;
1028+ assert_eq ! (
1029+ feed. entries. len( ) ,
1030+ 1 ,
1031+ "entry should still be parsed despite bozo"
1032+ ) ;
1033+ }
1034+
1035+ #[ test]
1036+ fn test_clean_atom_entry_no_bozo ( ) {
1037+ let xml = br#"<?xml version="1.0"?>
1038+ <feed xmlns="http://www.w3.org/2005/Atom">
1039+ <title>Test</title>
1040+ <id>urn:test</id>
1041+ <updated>2024-01-01T00:00:00Z</updated>
1042+ <entry>
1043+ <title>Normal & Clean Entry</title>
1044+ <id>urn:entry-1</id>
1045+ <updated>2024-01-01T00:00:00Z</updated>
1046+ </entry>
1047+ </feed>"# ;
1048+
1049+ let feed = parse_atom10 ( xml) . unwrap ( ) ;
1050+ assert ! ( !feed. bozo, "standard XML entities should not trigger bozo" ) ;
1051+ assert_eq ! ( feed. entries. len( ) , 1 ) ;
1052+ // parse_text_construct handles title - entity decoding is handled by quick-xml
1053+ assert ! ( feed. entries[ 0 ] . title. is_some( ) ) ;
1054+ }
9831055}
0 commit comments