@@ -198,26 +198,27 @@ impl CortexWasm {
198198 "based in " ,
199199 ] ;
200200
201- // Scan ALL " and " positions (not just the first) to handle
202- // "I work at Research and Development and live in Berlin"
201+ // Scan ALL " and " / " And " / " AND " positions to find clause boundaries.
202+ // Search in original text to avoid Unicode byte offset mismatch.
203203 let lower = text. to_lowercase ( ) ;
204204 let mut search_from = 0 ;
205205 while let Some ( rel_pos) = lower[ search_from..] . find ( " and " ) {
206206 let pos = search_from + rel_pos;
207- let after = lower[ pos + 5 ..] . trim_start ( ) ;
207+ // Verify pos is valid in original text (ASCII " and " guarantees this for text before it,
208+ // but lowercasing can shift bytes for chars like İ→i̇. Use original text search as fallback.)
209+ if pos + 5 > text. len ( ) { break ; }
210+ let after = text[ pos + 5 ..] . trim_start ( ) . to_lowercase ( ) ;
208211 if verb_prefixes. iter ( ) . any ( |p| after. starts_with ( p) ) {
209- // Find the same " and " in original text by searching from same byte offset
210- // Safe: " and " is pure ASCII, so byte positions match between original and lowercase
211- let first = & text[ ..pos] ;
212+ let first = text[ ..pos] . trim ( ) ;
212213 let second = text[ pos + 5 ..] . trim ( ) ;
213214 self . extract_single ( first) ;
214215 self . extract_facts ( second) ;
215216 return ;
216217 }
217- search_from = pos + 5 ; // skip past this " and " and keep looking
218+ search_from = pos + 5 ;
218219 }
219220
220- self . extract_single ( text) ;
221+ self . extract_single ( text. trim ( ) ) ;
221222 }
222223
223224 fn extract_single ( & mut self , text : & str ) {
0 commit comments