BYK · BYK · Apr 14, 2026 · Apr 14, 2026
diff --git a/src/markdown.ts b/src/markdown.ts
@@ -22,12 +22,31 @@ export function serialize(tree: Root): string {
   return processor.stringify(tree);
 }
 
+/**
+ * Replace unpaired Unicode surrogates with U+FFFD (replacement character).
+ *
+ * Unpaired surrogates (a high surrogate U+D800-U+DBFF without a following low
+ * surrogate U+DC00-U+DFFF, or a lone low surrogate) are technically invalid in
+ * UTF-8/JSON. They can appear in tool outputs (binary file contents, command
+ * output) and survive through SQLite storage into recall results. When the
+ * resulting string is serialized to JSON for the LLM API, the API rejects it
+ * with "no low surrogate in string".
+ */
+export function sanitizeSurrogates(value: string): string {
+  // eslint-disable-next-line no-control-regex
+  return value.replace(
+    /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
+    "\uFFFD",
+  );
+}
+
 // Collapse newlines in LLM-generated text before inserting into a text node.
 // Embedded blank lines (\n\n) cause list items to become "spread" (loose),
 // which then breaks the surrounding markdown structure on re-parse.
 // Newlines within a single fact/narrative are replaced with a space.
+// Also sanitizes unpaired surrogates to prevent JSON serialization failures.
 export function inline(value: string): string {
-  return value.replace(/\s*\n\s*/g, " ").trim();
+  return sanitizeSurrogates(value).replace(/\s*\n\s*/g, " ").trim();
 }
 
 // Normalize arbitrary markdown via parse → stringify roundtrip.

diff --git a/src/temporal.ts b/src/temporal.ts
@@ -1,5 +1,6 @@
 import { db, ensureProject } from "./db";
 import { ftsQuery, ftsQueryOr, EMPTY_QUERY } from "./search";
+import { sanitizeSurrogates } from "./markdown";
 import type { Message, Part } from "@opencode-ai/sdk";
 
 // ~3 chars per token — validated as best heuristic against real API data.
@@ -16,7 +17,10 @@ function partsToText(parts: Part[]): string {
     else if (part.type === "tool" && part.state.status === "completed")
       chunks.push(`[tool:${part.tool}] ${part.state.output}`);
   }
-  return chunks.join("\n");
+  // Sanitize unpaired surrogates from tool outputs and other raw text.
+  // Without this, surrogates survive into the DB and later break JSON
+  // serialization when included in recall tool responses.
+  return sanitizeSurrogates(chunks.join("\n"));
 }
 
 function messageMetadata(info: Message, parts: Part[]): string {

diff --git a/test/markdown.test.ts b/test/markdown.test.ts
@@ -1,7 +1,7 @@
 import { describe, test, expect } from "bun:test";
 import fc from "fast-check";
 import { remark } from "remark";
-import { normalize, unescapeMarkdown } from "../src/markdown";
+import { normalize, unescapeMarkdown, sanitizeSurrogates, inline } from "../src/markdown";
 import { formatDistillations, formatKnowledge } from "../src/prompt";
 import { isContextOverflow, buildRecoveryMessage } from "../src/index";
 
@@ -351,3 +351,56 @@ describe("buildRecoveryMessage", () => {
     expect(msg).toContain("No distilled history available");
   });
 });
+
+describe("sanitizeSurrogates", () => {
+  test("passes through normal text unchanged", () => {
+    expect(sanitizeSurrogates("hello world")).toBe("hello world");
+  });
+
+  test("passes through valid surrogate pairs (emoji)", () => {
+    // 😀 is U+1F600 = surrogate pair \uD83D\uDE00
+    expect(sanitizeSurrogates("hello 😀 world")).toBe("hello 😀 world");
+  });
+
+  test("replaces lone high surrogate with U+FFFD", () => {
+    const bad = "before\uD800after";
+    expect(sanitizeSurrogates(bad)).toBe("before\uFFFDafter");
+  });
+
+  test("replaces lone low surrogate with U+FFFD", () => {
+    const bad = "before\uDC00after";
+    expect(sanitizeSurrogates(bad)).toBe("before\uFFFDafter");
+  });
+
+  test("replaces high surrogate at end of string", () => {
+    const bad = "trailing\uD800";
+    expect(sanitizeSurrogates(bad)).toBe("trailing\uFFFD");
+  });
+
+  test("replaces multiple unpaired surrogates", () => {
+    const bad = "\uD800x\uDBFF\uDC00y\uDC00";
+    // \uD800 = lone high → replaced
+    // \uDBFF\uDC00 = valid pair → preserved
+    // \uDC00 = lone low → replaced
+    expect(sanitizeSurrogates(bad)).toBe("\uFFFDx\uDBFF\uDC00y\uFFFD");
+  });
+
+  test("result is always valid for JSON.stringify", () => {
+    // Construct string with various surrogate scenarios
+    const nasty = "ok\uD800\uDBFFpair\uDBFF\uDC00tail\uDC00";
+    const sanitized = sanitizeSurrogates(nasty);
+    // Must not throw when serialized to JSON
+    const json = JSON.stringify(sanitized);
+    expect(JSON.parse(json)).toBe(sanitized);
+  });
+});
+
+describe("inline sanitizes surrogates", () => {
+  test("inline strips unpaired surrogates from text", () => {
+    const bad = "line one\n  \uD800middle\n  end";
+    const result = inline(bad);
+    expect(result).toBe("line one \uFFFDmiddle end");
+    // Must be JSON-safe
+    expect(() => JSON.stringify(result)).not.toThrow();
+  });
+});