@@ -22,10 +22,14 @@ class ScriptureTextType(Enum):
2222
2323
2424def _is_embed_style (marker : Optional [str ]) -> bool :
25- return marker is not None and ( marker .strip ("*" ) in _EMBED_STYLES or marker . startswith ( "z" ))
25+ return marker is not None and marker .strip ("*" ) in _EMBED_STYLES
2626
2727
28- class ScriptureRefUsfmParserHandler (UsfmParserHandler , ABC ):
28+ def _is_private_use_marker (marker : str ) -> bool :
29+ return marker is not None and marker .startswith ("z" )
30+
31+
32+ class ScriptureRefUsfmParserHandlerBase (UsfmParserHandler , ABC ):
2933 def __init__ (self ) -> None :
3034 self ._cur_verse_ref : VerseRef = VerseRef ()
3135 self ._cur_elements_stack : List [ScriptureElement ] = []
@@ -46,22 +50,29 @@ def chapter(self, state: UsfmParserState, number: str, marker: str, alt_number:
4650 def verse (
4751 self , state : UsfmParserState , number : str , marker : str , alt_number : Optional [str ], pub_number : Optional [str ]
4852 ) -> None :
49- if state .verse_ref == self ._cur_verse_ref and not self ._duplicate_verse :
50- self ._end_verse_text (state , self ._create_verse_refs ())
51- # ignore duplicate verses
52- self ._duplicate_verse = True
53+ # Non-latin numbers are implicitly handled
54+
55+ if state .chapter_has_verse_zero and state .verse_ref .verse_num == 0 :
56+ # Fall through for the special case of verse 0 being specified in the USFM
57+ pass
58+ elif state .verse_ref == self ._cur_verse_ref and not self ._duplicate_verse :
59+ if state .verse_ref .verse_num > 0 :
60+ self ._end_verse_text (state , self ._create_verse_refs ())
61+ # ignore duplicate verses
62+ self ._duplicate_verse = True
63+ return
5364 elif are_overlapping_verse_ranges (verse1 = number , verse2 = self ._cur_verse_ref .verse ):
5465 # merge overlapping verse ranges in to one range
5566 verse_ref : VerseRef = self ._cur_verse_ref .copy ()
5667 verse_ref .verse = merge_verse_ranges (number , self ._cur_verse_ref .verse )
5768 self ._update_verse_ref (verse_ref , marker )
69+ return
70+ if self ._current_text_type == ScriptureTextType .NONVERSE :
71+ self ._end_non_verse_text_wrapper (state )
5872 else :
59- if self ._current_text_type == ScriptureTextType .NONVERSE :
60- self ._end_non_verse_text_wrapper (state )
61- elif self ._current_text_type == ScriptureTextType .VERSE :
62- self ._end_verse_text_wrapper (state )
63- self ._update_verse_ref (state .verse_ref , marker )
64- self ._start_verse_text_wrapper (state )
73+ self ._end_verse_text_wrapper (state )
74+ self ._update_verse_ref (state .verse_ref , marker )
75+ self ._start_verse_text_wrapper (state )
6576
6677 def start_para (
6778 self ,
@@ -70,13 +81,21 @@ def start_para(
7081 unknown : Optional [bool ],
7182 attributes : Optional [Sequence [UsfmAttribute ]],
7283 ) -> None :
84+ # ignore private-use markers
85+ if _is_private_use_marker (marker ):
86+ return
87+
7388 if self ._cur_verse_ref .is_default :
7489 self ._update_verse_ref (state .verse_ref , marker )
7590 if not state .is_verse_text :
7691 self ._start_parent_element (marker )
7792 self ._start_non_verse_text_wrapper (state )
7893
7994 def end_para (self , state : UsfmParserState , marker : str ) -> None :
95+ # ignore private-use markers
96+ if _is_private_use_marker (marker ):
97+ return
98+
8099 if self ._current_text_type == ScriptureTextType .NONVERSE :
81100 self ._end_parent_element ()
82101 self ._end_non_verse_text_wrapper (state )
@@ -126,6 +145,10 @@ def opt_break(self, state: UsfmParserState) -> None:
126145 def start_char (
127146 self , state : UsfmParserState , marker : str , unknown : bool , attributes : Optional [Sequence [UsfmAttribute ]]
128147 ) -> None :
148+ # ignore private-use markers
149+ if _is_private_use_marker (marker ):
150+ return
151+
129152 # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
130153 self ._check_convert_verse_para_to_non_verse (state )
131154
@@ -135,6 +158,10 @@ def start_char(
135158 def end_char (
136159 self , state : UsfmParserState , marker : str , attributes : Optional [Sequence [UsfmAttribute ]], closed : bool
137160 ) -> None :
161+ # ignore private-use markers
162+ if _is_private_use_marker (marker ):
163+ return
164+
138165 if _is_embed_style (marker ):
139166 self ._end_embed_text_wrapper (state )
140167
@@ -162,9 +189,9 @@ def _start_verse_text_wrapper(self, state: UsfmParserState) -> None:
162189 self ._start_verse_text (state , self ._create_verse_refs ())
163190
164191 def _end_verse_text_wrapper (self , state : UsfmParserState ) -> None :
165- if not self ._duplicate_verse and self ._cur_verse_ref .verse_num > 0 :
192+ if not self ._duplicate_verse and ( self ._cur_verse_ref .verse_num > 0 or state . chapter_has_verse_zero ) :
166193 self ._end_verse_text (state , self ._create_verse_refs ())
167- if self ._cur_verse_ref .verse_num > 0 :
194+ if self ._cur_verse_ref .verse_num > 0 or state . chapter_has_verse_zero :
168195 self ._cur_text_type_stack .pop ()
169196
170197 def _start_non_verse_text_wrapper (self , state : UsfmParserState ) -> None :
@@ -177,7 +204,17 @@ def _end_non_verse_text_wrapper(self, state: UsfmParserState) -> None:
177204 self ._cur_text_type_stack .pop ()
178205
179206 def _update_verse_ref (self , verse_ref : VerseRef , marker : str ) -> None :
180- if not are_overlapping_verse_ranges (verse_ref , self ._cur_verse_ref ):
207+ if (
208+ self ._cur_verse_ref .verse_num == 0
209+ and verse_ref .verse_num == 0
210+ and not verse_ref .has_multiple
211+ and marker == "v"
212+ ):
213+ # As the verse 0 marker appears within the middle of verse 0,
214+ # we should not break the position of current element stack by clearing it.
215+ # Instead, we just need to pop the current element off the stack.
216+ self ._cur_elements_stack .pop ()
217+ elif not are_overlapping_verse_ranges (verse_ref , self ._cur_verse_ref ):
181218 self ._cur_elements_stack .clear ()
182219 self ._cur_elements_stack .append (ScriptureElement (0 , marker ))
183220 self ._cur_verse_ref = verse_ref .copy ()
@@ -239,6 +276,8 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
239276 and para_tag .marker != "tr"
240277 and state .is_verse_para
241278 and self ._cur_verse_ref .verse_num == 0
279+ and not state .chapter_has_verse_zero
280+ and not _is_private_use_marker (para_tag .marker )
242281 ):
243282 self ._start_parent_element (para_tag .marker )
244283 self ._start_non_verse_text_wrapper (state )
0 commit comments