Skip to content

Commit f487620

Browse files
fix: memory quality audit — 6 issues resolved
- Fix Infinite Memory migration typo (events → raw_events) - Replace IVFFlat with HNSW index for semantic search (0.48→0.94 scores) - Add observations.updated_at migration - Add /api/semantic-search route alias - Backfill 5 observations with empty metadata - Clean up 4 duplicate MTProto knowledge entries Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 64e6e99 commit f487620

15 files changed

Lines changed: 390 additions & 160 deletions

File tree

AGENTS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,9 @@ LLM always creates NEW observations even when near-identical ones exist. The `ex
274274
- ~~CLI search bypasses SearchService~~ — fixed: uses smart_search() for semantic/hybrid routing
275275
- ~~backfill-metadata single-batch truncation~~ — fixed: proper loop with progress tracking and infinite-loop prevention
276276
- ~~Session summaries never generated (0/2168 sessions)~~`get_sessions_without_summaries` joined on `sessions.id` (UUID) but observations store IDE content session IDs (`ses_*`) that never match. Fixed: query now groups observations by `session_id` directly, bypassing the sessions table. `generate_pending_summaries` uses `save_summary` instead of `update_session_status_with_summary`.
277+
- ~~Infinite Memory migration 20260314000000 references `events` instead of `raw_events`~~ — fixed table name in normalize_project_names migration
278+
- ~~Semantic search poor relevance (scores 0.48-0.55)~~ — root cause: IVFFlat index with lists=100 on 959 vectors, probes=1 searched only 1% of vector space. Fixed by replacing with HNSW(m=16, ef_construction=64). Scores improved to 0.55-0.94 with correct semantic ranking.
279+
- ~~Missing `updated_at` column on observations table~~ — added migration 20260315000003
280+
- ~~Knowledge duplicates (4x Telegram MTProto entries)~~ — cleaned up, kept entry with highest usage_count
281+
- ~~5 observations with empty metadata from manual import~~ — backfilled via CLI
282+
- ~~`/api/semantic-search` route inconsistency~~ — added alias alongside existing `/semantic-search`

crates/cli/src/commands/import_insights.rs

Lines changed: 92 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ use std::sync::LazyLock;
1313
reason = "static regex patterns are compile-time validated"
1414
)]
1515
static INSIGHT_RE: LazyLock<Regex> = LazyLock::new(|| {
16-
Regex::new(r"###\s*\u{418}\u{43d}\u{441}\u{430}\u{439}\u{442}\s*\d+:\s*\[([^\]]+)\]").unwrap()
16+
Regex::new(r"###\s*\u{418}\u{43d}\u{441}\u{430}\u{439}\u{442}\s*\d+:\s*(?:\[([^\]]+)\]|(.+))")
17+
.unwrap()
1718
});
1819

1920
#[expect(
@@ -63,16 +64,15 @@ struct ParsedInsight {
6364

6465
/// Map Russian category to `KnowledgeType`
6566
fn category_to_knowledge_type(category: &str) -> KnowledgeType {
66-
match category.to_lowercase().as_str() {
67-
"слабость" => KnowledgeType::Gotcha,
68-
"паттерн" => KnowledgeType::Pattern,
69-
"missing for agi" => KnowledgeType::Gotcha,
70-
"неоптимальные решения" => KnowledgeType::Gotcha,
71-
"планирование" => KnowledgeType::Pattern,
72-
"ригидность" => KnowledgeType::Gotcha,
73-
"галлюцинации" => KnowledgeType::Gotcha,
74-
"позитивный" => KnowledgeType::Pattern,
75-
_ => KnowledgeType::Gotcha,
67+
let lower = category.to_lowercase();
68+
if lower.starts_with("паттерн")
69+
|| lower.starts_with("планирован")
70+
|| lower.starts_with("позитивн")
71+
{
72+
KnowledgeType::Pattern
73+
} else {
74+
// Слабость, Missing, Галлюцинации, Неоптимальные, Ригидность, etc.
75+
KnowledgeType::Gotcha
7676
}
7777
}
7878

@@ -99,10 +99,11 @@ fn parse_insights(content: &str) -> Vec<ParsedInsight> {
9999
for section in sections.iter().skip(1) {
100100
let full_section = format!("### \u{418}\u{43d}\u{441}\u{430}\u{439}\u{442}{section}");
101101

102-
let title = INSIGHT_RE
103-
.captures(&full_section)
104-
.and_then(|c| c.get(1))
105-
.map(|m| m.as_str().to_owned());
102+
let title = INSIGHT_RE.captures(&full_section).and_then(|c| {
103+
c.get(1)
104+
.or_else(|| c.get(2))
105+
.map(|m| m.as_str().trim().to_owned())
106+
});
106107

107108
let category = CATEGORY_RE
108109
.captures(&full_section)
@@ -192,6 +193,82 @@ async fn import_file(storage: &StorageBackend, path: &Path) -> Result<(usize, us
192193
Ok((imported, skipped))
193194
}
194195

196+
#[cfg(test)]
197+
mod tests {
198+
use super::*;
199+
200+
#[test]
201+
fn parse_unbracketed_title() {
202+
let md = r#"### Инсайт 1: Синдром "Работает на моём curl"
203+
**Категория:** Слабость (Ригидность / Неоптимальные решения)
204+
**Наблюдение:** Агент починил ошибку и объявил победу.
205+
**Импликация для AGI:** AGI должен понимать разницу.
206+
**Рекомендация:** Запретить агенту объявлять задачу решенной.
207+
"#;
208+
let insights = parse_insights(md);
209+
assert_eq!(insights.len(), 1);
210+
let i = &insights[0];
211+
assert_eq!(i.title, "Синдром \"Работает на моём curl\"");
212+
assert_eq!(i.category, "Слабость (Ригидность / Неоптимальные решения)");
213+
assert!(i.observation.starts_with("Агент починил"));
214+
assert!(i.implication.is_some());
215+
assert!(i.recommendation.is_some());
216+
}
217+
218+
#[test]
219+
fn parse_bracketed_title() {
220+
let md = r#"### Инсайт 1: [Title In Brackets]
221+
**Категория:** Паттерн
222+
**Наблюдение:** Some observation text.
223+
"#;
224+
let insights = parse_insights(md);
225+
assert_eq!(insights.len(), 1);
226+
assert_eq!(insights[0].title, "Title In Brackets");
227+
}
228+
229+
#[test]
230+
fn parse_multiple_insights() {
231+
let md = r#"## Session 1
232+
233+
### Инсайт 1: First Title
234+
**Категория:** Слабость
235+
**Наблюдение:** First observation.
236+
237+
### Инсайт 2: Second Title
238+
**Категория:** Паттерн (Повторяющиеся ошибки)
239+
**Наблюдение:** Second observation.
240+
**Импликация для AGI:** Some implication.
241+
**Рекомендация:** Some recommendation.
242+
"#;
243+
let insights = parse_insights(md);
244+
assert_eq!(insights.len(), 2);
245+
assert_eq!(insights[0].title, "First Title");
246+
assert_eq!(insights[1].title, "Second Title");
247+
assert!(insights[0].implication.is_none());
248+
assert!(insights[1].implication.is_some());
249+
}
250+
251+
#[test]
252+
fn category_mapping_with_parenthetical() {
253+
assert_eq!(
254+
category_to_knowledge_type("Слабость (Ригидность)"),
255+
KnowledgeType::Gotcha
256+
);
257+
assert_eq!(
258+
category_to_knowledge_type("Паттерн (Повторяющиеся ошибки)"),
259+
KnowledgeType::Pattern
260+
);
261+
assert_eq!(
262+
category_to_knowledge_type("Missing for AGI (Мета-когниция)"),
263+
KnowledgeType::Gotcha
264+
);
265+
assert_eq!(
266+
category_to_knowledge_type("Галлюцинации / Слабость"),
267+
KnowledgeType::Gotcha
268+
);
269+
}
270+
}
271+
195272
/// Run import-insights command
196273
pub(crate) async fn run(file: Option<String>, dir: Option<String>) -> Result<()> {
197274
let storage = crate::create_storage_from_env().await?;

crates/core/src/lib.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@ pub fn strip_uuid_from_title(title: &str) -> String {
4949
.unwrap()
5050
});
5151

52-
UUID_RE.replace_all(title, " ").trim().to_string()
52+
let result = UUID_RE.replace_all(title, " ").trim().to_string();
53+
if result.is_empty() {
54+
title.to_string()
55+
} else {
56+
result
57+
}
5358
}
5459

5560
/// Truncates a string to the given maximum length at a char boundary.
@@ -107,6 +112,14 @@ mod tests {
107112
assert_eq!(strip_uuid_from_title(""), "");
108113
}
109114

115+
#[test]
116+
fn strip_uuid_only_uuid() {
117+
assert_eq!(
118+
strip_uuid_from_title("b3b61de2-1234-5678-9abc-def012345678"),
119+
"b3b61de2-1234-5678-9abc-def012345678"
120+
);
121+
}
122+
110123
#[test]
111124
fn strip_uuid_multiple() {
112125
assert_eq!(

0 commit comments

Comments
 (0)