Skip to content

Commit b818e9d

Browse files
gambletanclaude
andcommitted
fix: WASM extractor — trim inputs, bounds check for Unicode safety
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9752602 commit b818e9d

1 file changed

Lines changed: 9 additions & 8 deletions

File tree

cortex-wasm/src/lib.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,26 +198,27 @@ impl CortexWasm {
198198
"based in ",
199199
];
200200

201-
// Scan ALL " and " positions (not just the first) to handle
202-
// "I work at Research and Development and live in Berlin"
201+
// Scan ALL " and " / " And " / " AND " positions to find clause boundaries.
202+
// Search in original text to avoid Unicode byte offset mismatch.
203203
let lower = text.to_lowercase();
204204
let mut search_from = 0;
205205
while let Some(rel_pos) = lower[search_from..].find(" and ") {
206206
let pos = search_from + rel_pos;
207-
let after = lower[pos + 5..].trim_start();
207+
// Verify pos is valid in original text (ASCII " and " guarantees this for text before it,
208+
// but lowercasing can shift bytes for chars like İ→i̇. Use original text search as fallback.)
209+
if pos + 5 > text.len() { break; }
210+
let after = text[pos + 5..].trim_start().to_lowercase();
208211
if verb_prefixes.iter().any(|p| after.starts_with(p)) {
209-
// Find the same " and " in original text by searching from same byte offset
210-
// Safe: " and " is pure ASCII, so byte positions match between original and lowercase
211-
let first = &text[..pos];
212+
let first = text[..pos].trim();
212213
let second = text[pos + 5..].trim();
213214
self.extract_single(first);
214215
self.extract_facts(second);
215216
return;
216217
}
217-
search_from = pos + 5; // skip past this " and " and keep looking
218+
search_from = pos + 5;
218219
}
219220

220-
self.extract_single(text);
221+
self.extract_single(text.trim());
221222
}
222223

223224
fn extract_single(&mut self, text: &str) {

0 commit comments

Comments
 (0)