Skip to content

Commit 57f069e

Browse files
committed
fix: add timeout + fail-open to recall search path
Fixes #1452 — auto-recall can block gateway startup / first-turn path long enough to fail health checks when embedding or LLM calls are slow. Changes: - Add configurable `recall.timeoutMs` (default 10s) - Wrap embedder.embedQuery() with timeout in RecallEngine.search(); falls back to FTS-only results on timeout - Wrap LLM skill relevance judgment with timeout in searchSkills(); falls back to returning all candidates on timeout - Add top-level timeout in memory_search tool handler; returns empty results with `timedOut: true` flag on timeout - All timeouts fail-open: partial/empty results, never throw - Recall exceptions never propagate to gateway top level
1 parent 7df9f7d commit 57f069e

4 files changed

Lines changed: 112 additions & 33 deletions

File tree

apps/memos-local-plugin/src/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ export function resolveConfig(raw: Partial<MemosLocalConfig> | undefined, stateD
6565
mmrLambda: cfg.recall?.mmrLambda ?? DEFAULTS.mmrLambda,
6666
recencyHalfLifeDays: cfg.recall?.recencyHalfLifeDays ?? DEFAULTS.recencyHalfLifeDays,
6767
vectorSearchMaxChunks: cfg.recall?.vectorSearchMaxChunks ?? DEFAULTS.vectorSearchMaxChunks,
68+
timeoutMs: cfg.recall?.timeoutMs ?? DEFAULTS.recallTimeoutMs,
6869
},
6970
dedup: {
7071
similarityThreshold: cfg.dedup?.similarityThreshold ?? DEFAULTS.dedupSimilarityThreshold,

apps/memos-local-plugin/src/recall/engine.ts

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,25 @@ import { Summarizer } from "../ingest/providers";
99

1010
export type SkillSearchScope = "mix" | "self" | "public";
1111

12+
/** Race a promise against a timeout. Returns fallback value on timeout instead of throwing. */
13+
function withTimeout<T>(promise: Promise<T>, ms: number, fallback: T, label: string, log: { warn: (msg: string, ...args: unknown[]) => void }): Promise<T> {
14+
if (ms <= 0) return promise;
15+
return new Promise<T>((resolve) => {
16+
let settled = false;
17+
const timer = setTimeout(() => {
18+
if (!settled) {
19+
settled = true;
20+
log.warn(`recall: ${label} timed out after ${ms}ms — returning fallback`);
21+
resolve(fallback);
22+
}
23+
}, ms);
24+
promise.then(
25+
(val) => { if (!settled) { settled = true; clearTimeout(timer); resolve(val); } },
26+
(err) => { if (!settled) { settled = true; clearTimeout(timer); log.warn(`recall: ${label} failed: ${err}`); resolve(fallback); } },
27+
);
28+
});
29+
}
30+
1231
export interface RecallOptions {
1332
query?: string;
1433
maxResults?: number;
@@ -48,13 +67,24 @@ export class RecallEngine {
4867
: [];
4968

5069
let vecCandidates: Array<{ chunkId: string; score: number }> = [];
70+
const timeoutMs = this.ctx.config.recall!.timeoutMs ?? 10_000;
5171
if (query) {
5272
try {
53-
const queryVec = await this.embedder.embedQuery(query);
54-
const maxChunks = recallCfg.vectorSearchMaxChunks && recallCfg.vectorSearchMaxChunks > 0
55-
? recallCfg.vectorSearchMaxChunks
56-
: undefined;
57-
vecCandidates = vectorSearch(this.store, queryVec, candidatePool, maxChunks, ownerFilter);
73+
const queryVec = await withTimeout(
74+
this.embedder.embedQuery(query),
75+
timeoutMs,
76+
null,
77+
"embedQuery",
78+
this.ctx.log,
79+
);
80+
if (queryVec) {
81+
const maxChunks = recallCfg.vectorSearchMaxChunks && recallCfg.vectorSearchMaxChunks > 0
82+
? recallCfg.vectorSearchMaxChunks
83+
: undefined;
84+
vecCandidates = vectorSearch(this.store, queryVec, candidatePool, maxChunks, ownerFilter);
85+
} else {
86+
this.ctx.log.warn("Vector search skipped (embedding timed out), using FTS only");
87+
}
5888
} catch (err) {
5989
this.ctx.log.warn(`Vector search failed, using FTS only: ${err}`);
6090
}
@@ -101,7 +131,13 @@ export class RecallEngine {
101131
}
102132

103133
try {
104-
const qv = await this.embedder.embedQuery(query).catch(() => null);
134+
const qv = await withTimeout(
135+
this.embedder.embedQuery(query).catch(() => null),
136+
timeoutMs,
137+
null,
138+
"hubMemEmbedQuery",
139+
this.ctx.log,
140+
);
105141
if (qv) {
106142
const memEmbs = this.store.getVisibleHubMemoryEmbeddings("__hub__");
107143
const scored: Array<{ id: string; score: number }> = [];
@@ -302,15 +338,24 @@ export class RecallEngine {
302338

303339
// Vector search on description embedding
304340
let vecCandidates: Array<{ skillId: string; score: number }> = [];
341+
const timeoutMs = this.ctx.config.recall!.timeoutMs ?? 10_000;
305342
try {
306-
const queryVec = await this.embedder.embedQuery(query);
307-
const allEmb = this.store.getSkillEmbeddings(scope, currentOwner);
308-
vecCandidates = allEmb.map((row) => ({
309-
skillId: row.skillId,
310-
score: cosineSimilarity(queryVec, row.vector),
311-
}));
312-
vecCandidates.sort((a, b) => b.score - a.score);
313-
vecCandidates = vecCandidates.slice(0, TOP_CANDIDATES);
343+
const queryVec = await withTimeout(
344+
this.embedder.embedQuery(query),
345+
timeoutMs,
346+
null,
347+
"skillEmbedQuery",
348+
this.ctx.log,
349+
);
350+
if (queryVec) {
351+
const allEmb = this.store.getSkillEmbeddings(scope, currentOwner);
352+
vecCandidates = allEmb.map((row) => ({
353+
skillId: row.skillId,
354+
score: cosineSimilarity(queryVec, row.vector),
355+
}));
356+
vecCandidates.sort((a, b) => b.score - a.score);
357+
vecCandidates = vecCandidates.slice(0, TOP_CANDIDATES);
358+
}
314359
} catch (err) {
315360
this.ctx.log.warn(`Skill vector search failed, using FTS only: ${err}`);
316361
}
@@ -336,9 +381,16 @@ export class RecallEngine {
336381

337382
if (candidateSkills.length === 0) return [];
338383

339-
// LLM relevance judgment
384+
// LLM relevance judgment (with timeout — fail-open returns all candidates)
340385
const summarizer = new Summarizer(this.ctx.config.summarizer, this.ctx.log, this.ctx.openclawAPI);
341-
const relevantIndices = await this.judgeSkillRelevance(summarizer, query, candidateSkills);
386+
const allIndices = candidateSkills.map((_, i) => i);
387+
const relevantIndices = await withTimeout(
388+
this.judgeSkillRelevance(summarizer, query, candidateSkills),
389+
timeoutMs,
390+
allIndices,
391+
"judgeSkillRelevance",
392+
this.ctx.log,
393+
);
342394

343395
return relevantIndices.map((idx) => {
344396
const { skill, rrfScore } = candidateSkills[idx];

apps/memos-local-plugin/src/tools/memory-search.ts

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ function emptyHubResult(scope: HubScope): HubSearchResult {
2525
}
2626

2727
export function createMemorySearchTool(engine: RecallEngine, store?: SqliteStore, ctx?: PluginContext, sharedState?: { lastSearchTime: number }): ToolDefinition {
28+
const EMPTY_RESULT = { hits: [], meta: { usedMinScore: 0, usedMaxResults: 0, totalCandidates: 0, timedOut: true, note: "Search timed out \u2014 returning empty results to avoid blocking the critical path." } };
29+
2830
return {
2931
name: "memory_search",
3032
description:
@@ -66,27 +68,48 @@ export function createMemorySearchTool(engine: RecallEngine, store?: SqliteStore
6668
const minScore = input.minScore as number | undefined;
6769
const ownerFilter = resolveOwnerFilter(input.owner);
6870
const scope = resolveScope(input.scope);
71+
const timeoutMs = ctx?.config?.recall?.timeoutMs ?? 10_000;
6972

70-
const localSearch = engine.search({
71-
query,
72-
maxResults,
73-
minScore,
74-
ownerFilter,
75-
});
73+
// Top-level timeout: never block the critical path longer than timeoutMs
74+
const doSearch = async () => {
75+
const localSearch = engine.search({
76+
query,
77+
maxResults,
78+
minScore,
79+
ownerFilter,
80+
});
81+
82+
if (scope === "local" || !store || !ctx) {
83+
return localSearch;
84+
}
7685

77-
if (scope === "local" || !store || !ctx) {
78-
return localSearch;
79-
}
86+
const [local, hub] = await Promise.all([
87+
localSearch,
88+
hubSearchMemories(store, ctx, { query, maxResults, scope, hubAddress: input.hubAddress as string | undefined, userToken: input.userToken as string | undefined }).catch((err) => {
89+
ctx.log.warn(`Hub search failed, using local-only results: ${err}`);
90+
return emptyHubResult(scope);
91+
}),
92+
]);
8093

81-
const [local, hub] = await Promise.all([
82-
localSearch,
83-
hubSearchMemories(store, ctx, { query, maxResults, scope, hubAddress: input.hubAddress as string | undefined, userToken: input.userToken as string | undefined }).catch((err) => {
84-
ctx.log.warn(`Hub search failed, using local-only results: ${err}`);
85-
return emptyHubResult(scope);
86-
}),
87-
]);
94+
return { local, hub };
95+
};
8896

89-
return { local, hub };
97+
if (timeoutMs <= 0) return doSearch();
98+
99+
return new Promise((resolve) => {
100+
let settled = false;
101+
const timer = setTimeout(() => {
102+
if (!settled) {
103+
settled = true;
104+
ctx?.log?.warn?.(`memory_search timed out after ${timeoutMs}ms \u2014 returning empty results`);
105+
resolve(EMPTY_RESULT);
106+
}
107+
}, timeoutMs);
108+
doSearch().then(
109+
(val) => { if (!settled) { settled = true; clearTimeout(timer); resolve(val); } },
110+
(err) => { if (!settled) { settled = true; clearTimeout(timer); ctx?.log?.warn?.(`memory_search failed: ${err}`); resolve(EMPTY_RESULT); } },
111+
);
112+
});
90113
},
91114
};
92115
}

apps/memos-local-plugin/src/types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ export interface MemosLocalConfig {
312312
recencyHalfLifeDays?: number;
313313
/** Cap vector search to this many most recent chunks. 0 = no cap (search all; may get slower with 200k+ chunks). If you set a cap for performance, use a large value (e.g. 200000–300000) so older memories are still in the window; FTS always searches all. */
314314
vectorSearchMaxChunks?: number;
315+
/** Hard timeout in milliseconds for the entire recall search path. When exceeded, partial results (FTS-only) are returned instead of blocking. 0 = no timeout. Default 10000 (10s). */
316+
timeoutMs?: number;
315317
};
316318
dedup?: {
317319
similarityThreshold?: number;
@@ -337,6 +339,7 @@ export const DEFAULTS = {
337339
mmrLambda: 0.7,
338340
recencyHalfLifeDays: 14,
339341
vectorSearchMaxChunks: 0,
342+
recallTimeoutMs: 10_000,
340343
dedupSimilarityThreshold: 0.80,
341344
evidenceWrapperTag: "STORED_MEMORY",
342345
excerptMinChars: 200,

0 commit comments

Comments
 (0)