Skip to content

Commit f870e84

Browse files
gambletanclaude
andcommitted
fix: WASM extractor — add 'based in' prefix, safe ASCII offset for Unicode
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 099ccfc commit f870e84

1 file changed

Lines changed: 11 additions & 11 deletions

File tree

cortex-wasm/src/lib.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -191,30 +191,30 @@ impl CortexWasm {
191191
/// without breaking values that contain "and" ("Research and Development").
192192
/// Recurses for 3+ clauses. Accepts "I" prefix in second clause.
193193
fn extract_facts(&mut self, text: &str) {
194-
let lower = text.to_lowercase();
195-
196-
// Known second-clause verb prefixes (with and without repeated "I")
194+
// Known second-clause prefixes (with and without repeated "I")
197195
let verb_prefixes = [
198196
"work at ", "work for ", "i work at ", "i work for ",
199197
"i'm a ", "i am a ", "i'm an ", "i am an ",
200-
"live in ", "i live in ",
198+
"live in ", "i live in ", "i'm based in ", "i am based in ",
199+
"based in ",
201200
];
202201

203-
// Try splitting on " and " only if the second part starts with a known verb
202+
// Find " and " case-insensitively in the original text
203+
// " and " is ASCII so byte offsets are safe for any Unicode string
204+
let lower = text.to_lowercase();
204205
if let Some(pos) = lower.find(" and ") {
205-
let after = lower[pos + 5..].trim_start();
206-
let is_verb_clause = verb_prefixes.iter().any(|p| after.starts_with(p));
207-
if is_verb_clause {
206+
let after = &lower[pos + 5..];
207+
let after_trimmed = after.trim_start();
208+
if verb_prefixes.iter().any(|p| after_trimmed.starts_with(p)) {
209+
// pos is valid for original text since " and " is ASCII
208210
let first = &text[..pos];
209211
let second = text[pos + 5..].trim();
210212
self.extract_single(first);
211-
// Recurse on the rest to handle 3+ clauses
212-
self.extract_facts(second);
213+
self.extract_facts(second); // recurse for 3+ clauses
213214
return;
214215
}
215216
}
216217

217-
// No split — extract from full text
218218
self.extract_single(text);
219219
}
220220

0 commit comments

Comments
 (0)