diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index cbe3696..4a23579 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -34,7 +34,28 @@
       "Bash(/Users/nick/.claude/skills/gstack/browse/dist/browse goto *)",
       "Bash(/Users/nick/.claude/skills/gstack/browse/dist/browse text *)",
       "Bash(/Users/nick/.claude/skills/gstack/browse/dist/browse wait *)",
-      "Bash(/Users/nick/.claude/skills/gstack/browse/dist/browse useragent *)"
+      "Bash(/Users/nick/.claude/skills/gstack/browse/dist/browse useragent *)",
+      "mcp__plugin_firebase_firebase__firebase_get_environment",
+      "Bash(node -e 'const now=Date.now\\(\\); console.log\\(\"now_ms\",now\\); console.log\\(\"cutoff_ms\",now-7*86400000\\); console.log\\(\"now_iso\",new Date\\(now\\).toISOString\\(\\)\\); console.log\\(\"cutoff_iso\",new Date\\(now-7*86400000\\).toISOString\\(\\)\\);')",
+      "mcp__plugin_firebase_firebase__firestore_query_collection",
+      "mcp__plugin_firebase_firebase__functions_get_logs",
+      "Bash(jq -r .entries[].severity /Users/nick/.claude/projects/-Users-nick-git-metacortex/1e637d4d-3ecc-4a86-9686-ce889b97a544/tool-results/mcp-plugin_firebase_firebase-functions_get_logs-1780688372658.txt)",
+      "Read(//private/tmp/**)",
+      "Bash(node -e \"require\\('pptxgenjs'\\); console.log\\('pptxgenjs ok'\\)\")",
+      "Bash(node -e \"require\\('react-icons/fa'\\); require\\('sharp'\\); console.log\\('icons ok'\\)\")",
+      "Bash(node -e 'require\\('\\\\''__TRACKED_VAR__'\\\\''\\)')",
+      "Bash(npm root *)",
+      "Bash(echo \"npm root: $\\(npm root -g\\)\")",
+      "Bash(npm install *)",
+      "Bash(node build.js)",
+      "Bash(which soffice *)",
+      "Bash(python -m markitdown MetaCortex-Usage-Report.pptx)",
+      "Bash(grep -iE \"\\\\bx{3,}\\\\b|lorem|ipsum|undefined|\\\\[insert|placeholder\")",
+      "Bash(cp /tmp/deck/MetaCortex-Usage-Report.pptx ~/Desktop/MetaCortex-Usage-Report.pptx)",
+      "Read(//Users/nick/Desktop/**)",
+      "Bash(cd /tmp/deck && node build.js 2>&1 | tail -3 && cp MetaCortex-Usage-Report.pptx ~/Desktop/MetaCortex-Usage-Report.pptx && echo \"redeployed to Desktop\")",
+      "Bash(ls /Applications/Keynote.app >/dev/null 2>&1 && echo \"Keynote present\" || echo \"no Keynote\")",
+      "Bash(node -e ' *)"
     ]
   }
 }
diff --git a/.claude/worktrees/compassionate-lederberg b/.claude/worktrees/compassionate-lederberg
deleted file mode 160000
index 944e406..0000000
--- a/.claude/worktrees/compassionate-lederberg
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 944e406589c2523c0e78d49cbc913eba9274433b
diff --git a/.claude/worktrees/sweet-villani b/.claude/worktrees/sweet-villani
deleted file mode 160000
index 1596b56..0000000
--- a/.claude/worktrees/sweet-villani
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 1596b56719f6e80172d0e767f2a29c58cc232f20
diff --git a/CLAUDE.md b/CLAUDE.md
index 9834b43..806d490 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -141,7 +141,7 @@ Test fakes in `functions/test/support/fakes.ts`:
 | Variable | Default | Purpose |
 |----------|---------|---------|
 | `GEMINI_EMBEDDING_MODEL` | `text-embedding-004` | Embedding model name |
-| `GEMINI_MULTIMODAL_MODEL` | `gemini-3.1-flash-lite-preview` | Multimodal normalization model |
+| `GEMINI_MULTIMODAL_MODEL` | `gemini-3.1-flash-lite` | Multimodal normalization model |
 | `GEMINI_EMBEDDING_DIMENSIONS` | `768` | Embedding vector dimensions |
 | `MEMORY_COLLECTION` | `memory_vectors` | Firestore collection name |
 | `SEARCH_RESULT_LIMIT` | `5` | Max search results returned |
diff --git a/README.md b/README.md
index 5535186..60b32ec 100644
--- a/README.md
+++ b/README.md
@@ -254,7 +254,6 @@ Typical result:
       "id": "abc123",
       "summary": "We use Ktor for shared Android and iOS networking.",
       "score": 0.92,
-      "content_preview": "We use Ktor for shared Android and iOS networking.",
       "metadata": {
         "topic": "kmp-networking",
         "branch_state": "active",
@@ -287,7 +286,7 @@ If nothing matches, the result is:
 
 ### `fetch_context`
 
-Preferred input: pass the same `id` returned by `remember_context` or `search_context`.
+Preferred input: pass the same `id` returned by `remember_context` or `search_context`. `document_id` is accepted as a compatibility alias for older connector wrappers.
 
 Example input:
 
@@ -297,6 +296,14 @@ Example input:
 }
 ```
 
+Compatibility alias:
+
+```json
+{
+  "document_id": "abc123"
+}
+```
+
 Typical result:
 
 ```json
@@ -401,6 +408,7 @@ After deployment, there are three places to look:
 - `event_type`
 - `status`
 - `timestamp`
+- `expires_at`
 - `latency_ms`
 - a compact `request` summary
 - either a compact `response` summary, an `error`, or a request rejection reason
@@ -427,6 +435,12 @@ What is intentionally not stored in observability events:
 
 Search events do include a short `query_preview`, but the observability collection is designed to track behavior, not duplicate the corpus.
 
+Retention is handled with Firestore TTL policies:
+
+- `memory_events.expires_at` targets 90-day audit retention
+- `memory_vectors_write_fingerprints.expires_at` targets 30-day fingerprint retention
+- fingerprint documents keep numeric `dedupe_expires_at` for the short duplicate-write window
+
 ## Quick start
 
 1. Install dependencies:
diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
index 8f9d8dd..70f4ec4 100644
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -20,11 +20,11 @@ The deploy path in this repo currently assumes:
 - Firestore collection `memory_events` for audit and observability
 - embedding output pinned to `768` dimensions
 - embedding model pinned to `text-embedding-004`
-- multimodal normalization model pinned to `gemini-3.1-flash-lite-preview`
-- total MCP surface of 4 tools
+- multimodal normalization model pinned to `gemini-3.1-flash-lite`
+- total MCP surface of 5 tools
 - public/browser toolset of 3 tools: `remember_context`, `search_context`, `fetch_context`
-- admin-only maintenance tool: `deprecate_context`
-- WIP consolidation handled internally, not through a public MCP tool
+- admin-only maintenance tools: `deprecate_context`, `consolidate_context`
+- WIP consolidation is available only through the admin maintenance surface
 
 For the first production release, if `memory_vectors` is empty, no embedding migration is required.
 
@@ -79,7 +79,8 @@ Minimum required production values:
 GEMINI_API_KEY=...
 MCP_ADMIN_TOKEN=...
 GEMINI_EMBEDDING_MODEL=text-embedding-004
-GEMINI_MULTIMODAL_MODEL=gemini-3.1-flash-lite-preview
+GEMINI_MULTIMODAL_MODEL=gemini-3.1-flash-lite
+GEMINI_GENERATION_VERTEX_LOCATION=global
 GEMINI_EMBEDDING_DIMENSIONS=768
 MEMORY_COLLECTION=memory_vectors
 ```
@@ -154,6 +155,13 @@ That script checks:
 - full test suite
 - TypeScript build
 
+Validate the live Gemini model configuration before production deploy:
+
+```bash
+cd /Users/nick/git/metacortex
+npm --prefix functions run validate:models
+```
+
 If you want a manual local round-trip before production:
 
 ```bash
@@ -192,6 +200,7 @@ The automated tests and build can also be run directly:
 cd /Users/nick/git/metacortex
 npm --prefix functions test
 npm --prefix functions run build
+npm --prefix functions run validate:models
 ```
 
 ## Deploy
@@ -215,6 +224,8 @@ Verify that `functions/.env.prod` or the dotenv file you plan to deploy with inc
 - `MCP_ALLOWED_ORIGINS` only if you intentionally want browser access to the admin endpoint
 - `MCP_CLIENT_PROFILES_JSON` with both `chatgpt-web` and `claude-web` profiles
 - `GEMINI_EMBEDDING_MODEL=text-embedding-004`
+- `GEMINI_MULTIMODAL_MODEL=gemini-3.1-flash-lite`
+- `GEMINI_GENERATION_VERTEX_LOCATION=global`
 - `GEMINI_EMBEDDING_DIMENSIONS=768`
 - `MEMORY_COLLECTION=memory_vectors`
 
@@ -229,7 +240,30 @@ Also confirm the actual web-client registration values you will use:
 - each bearer token comes from the matching client profile, not `MCP_ADMIN_TOKEN`
 - each web origin must match the profile's `allowedOrigins`
 
-### 3. Deploy Firestore indexes
+### 3. Backfill TTL fields
+
+The hardening release uses Firestore TTL policies for unbounded operational collections:
+
+- `memory_vectors_write_fingerprints.expires_at`: 30-day retention
+- `memory_events.expires_at`: 90-day retention
+
+Run a dry run first:
+
+```bash
+cd /Users/nick/git/metacortex
+npm --prefix functions run backfill:ttl
+```
+
+If the counts look correct, apply the backfill:
+
+```bash
+cd /Users/nick/git/metacortex
+npm --prefix functions run backfill:ttl -- --write --project my-brain-88870
+```
+
+The backfill preserves numeric event `timestamp`, copies legacy numeric fingerprint `expires_at` into `dedupe_expires_at` when needed, and writes Date-valued `expires_at` fields for Firestore TTL.
+
+### 4. Deploy Firestore indexes
 
 ```bash
 cd /Users/nick/git/metacortex
@@ -244,7 +278,7 @@ Required vector indexes:
 
 Wait until those indexes are fully built before trusting search results.
 
-### 4. Deploy the function
+### 5. Deploy the function
 
 ```bash
 cd /Users/nick/git/metacortex
@@ -266,6 +300,21 @@ The useful production routes are:
 - `<FUNCTION_BASE_URL>/mcp`
 - `<FUNCTION_BASE_URL>/clients/<CLIENT_ID>/mcp`
 
+### 6. Enable Firestore TTL policies
+
+Enable TTL policies after the `expires_at` fields exist:
+
+```bash
+cd /Users/nick/git/metacortex
+./scripts/deploy-firestore-ttl.sh --project my-brain-88870
+```
+
+Verify the policies:
+
+```bash
+gcloud firestore fields ttls list --project=my-brain-88870
+```
+
 ## Post-deploy verification
 
 ### 1. Health check
@@ -402,7 +451,7 @@ Expected:
 
 - `remember_context` accepts the image-backed memory
 - returned JSON metadata includes `modality=mixed` when both text and image are present
-- `search_context` returns a summary with the same `id=...`
+- `search_context` returns a summary-only result with the same `id=...`
 - `fetch_context` accepts that same `id` and returns the same `artifact_refs`
 
 ## Token Management
diff --git a/docs/SECURITY.md b/docs/SECURITY.md
index f696d14..f374f9d 100644
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@@ -4,17 +4,21 @@ Known security warnings for the deployed MetaCortex service. These are documente
 
 ---
 
-## WARN-1: `memory_events` collection not in Firestore rules
+## FIXED-1: `memory_events` and fingerprint collections explicitly denied
 
 **File:** `firestore.rules`
 
-The `memory_events` audit log collection is not explicitly covered by Firestore security rules. It is currently protected only by Firestore's implicit default-deny behavior. A future rules edit could inadvertently open it.
+`memory_events` and `memory_vectors_write_fingerprints` are explicitly covered by deny-all Firestore security rules. They are server-only collections and should remain inaccessible to client SDK traffic.
 
-**Fix:** Add an explicit deny rule for `memory_events`:
+Current rule shape:
 ```
 match /memory_events/{document=**} {
   allow read, write: if false;
 }
+
+match /memory_vectors_write_fingerprints/{document=**} {
+  allow read, write: if false;
+}
 ```
 
 ---
diff --git a/firestore-debug.log b/firestore-debug.log
index dab8fef..f64c39a 100644
--- a/firestore-debug.log
+++ b/firestore-debug.log
@@ -1,4 +1,4 @@
-Mar 10, 2026 10:41:40 PM com.google.cloud.datastore.emulator.firestore.websocket.WebSocketServer start
+Jun 01, 2026 8:59:26 PM com.google.cloud.datastore.emulator.firestore.websocket.WebSocketServer start
 INFO: Started WebSocket server on ws://127.0.0.1:9150
 API endpoint: http://127.0.0.1:8080
 If you are using a library that supports the FIRESTORE_EMULATOR_HOST environment variable, run:
@@ -12,7 +12,5 @@ If you are running a Firestore in Datastore Mode project, run:
 Note: Support for Datastore Mode is in preview. If you encounter any bugs please file at https://github.com/firebase/firebase-tools/issues.
 Dev App Server is now running.
 
-Mar 10, 2026 10:41:48 PM io.gapi.emulators.netty.HttpVersionRoutingHandler channelRead
-INFO: Detected HTTP/2 connection.
 *** shutting down gRPC server since JVM is shutting down
 *** server shut down
diff --git a/functions/.env.example b/functions/.env.example
index f11ff62..d74f71a 100644
--- a/functions/.env.example
+++ b/functions/.env.example
@@ -7,7 +7,8 @@ MCP_ALLOWED_FILTER_STATES=active,merged,deprecated,wip
 # Browser and other scoped clients should use allowedOrigins inside MCP_CLIENT_PROFILES_JSON.
 MCP_CLIENT_PROFILES_JSON=[{"id":"chatgpt-web","token":"replace-chatgpt-token","allowedTools":["remember_context","search_context","fetch_context"],"allowedFilterStates":["active"],"allowedOrigins":["https://chatgpt.com"]},{"id":"claude-web","token":"replace-claude-token","allowedTools":["remember_context","search_context","fetch_context"],"allowedFilterStates":["active"],"allowedOrigins":["https://claude.ai"]}]
 GEMINI_EMBEDDING_MODEL=text-embedding-004
-GEMINI_MULTIMODAL_MODEL=gemini-3.1-flash-lite-preview
+GEMINI_MULTIMODAL_MODEL=gemini-3.1-flash-lite
+GEMINI_GENERATION_VERTEX_LOCATION=global
 GEMINI_EMBEDDING_DIMENSIONS=768
 MEMORY_COLLECTION=memory_vectors
 SEARCH_RESULT_LIMIT=5
diff --git a/functions/package.json b/functions/package.json
index 1c2285f..25c1a05 100644
--- a/functions/package.json
+++ b/functions/package.json
@@ -10,9 +10,11 @@
   "scripts": {
     "build": "tsc -p tsconfig.json",
     "clean": "node -e \"const fs=require('fs'); fs.rmSync('lib',{recursive:true,force:true}); fs.rmSync('coverage',{recursive:true,force:true});\"",
+    "backfill:ttl": "node scripts/backfill-firestore-ttl.mjs",
     "serve": "cd .. && firebase emulators:start --only functions,firestore",
     "shell": "firebase functions:shell",
     "smoke": "node scripts/mcp-smoke-test.mjs",
+    "validate:models": "node scripts/validate-models.mjs",
     "test": "vitest run --coverage",
     "test:watch": "vitest"
   },
diff --git a/functions/scripts/backfill-firestore-ttl.mjs b/functions/scripts/backfill-firestore-ttl.mjs
new file mode 100644
index 0000000..d7f256f
--- /dev/null
+++ b/functions/scripts/backfill-firestore-ttl.mjs
@@ -0,0 +1,262 @@
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { getApps, initializeApp } from "firebase-admin/app";
+import { getFirestore } from "firebase-admin/firestore";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const functionsDir = path.resolve(scriptDir, "..");
+const repoRoot = path.resolve(functionsDir, "..");
+const explicitEnvKeys = new Set(Object.keys(process.env));
+const loadedEnv = {};
+
+for (const fileName of [".env", ".env.prod"]) {
+  loadEnvFile(path.join(functionsDir, fileName), loadedEnv);
+}
+
+for (const [key, value] of Object.entries(loadedEnv)) {
+  if (!explicitEnvKeys.has(key)) {
+    process.env[key] = value;
+  }
+}
+
+const args = process.argv.slice(2);
+const write = args.includes("--write");
+const projectId =
+  readArg("project") ??
+  process.env.GOOGLE_CLOUD_PROJECT ??
+  process.env.GCLOUD_PROJECT ??
+  readFirebaseProject(repoRoot) ??
+  "my-brain-88870";
+const memoryCollection =
+  readArg("memory-collection") ??
+  process.env.MEMORY_COLLECTION?.trim() ??
+  "memory_vectors";
+const batchSize = positiveInt(readArg("batch-size"), 250, "batch-size");
+
+if (getApps().length === 0) {
+  initializeApp({ projectId });
+}
+
+const firestore = getFirestore();
+const fingerprintCollection = `${memoryCollection}_write_fingerprints`;
+
+console.log(`project: ${projectId}`);
+console.log(`mode: ${write ? "write" : "dry-run"}`);
+console.log(`fingerprint collection: ${fingerprintCollection}`);
+console.log("event collection: memory_events");
+
+const fingerprintResult = await backfillFingerprints(
+  firestore.collection(fingerprintCollection),
+  batchSize,
+  write
+);
+const eventResult = await backfillEvents(
+  firestore.collection("memory_events"),
+  batchSize,
+  write
+);
+
+console.log("fingerprints:", fingerprintResult);
+console.log("memory_events:", eventResult);
+
+if (!write) {
+  console.log("Dry run complete. Re-run with --write to apply updates.");
+}
+
+async function backfillFingerprints(collection, batchLimit, shouldWrite) {
+  // This legacy backfill reads the collection in one pass; for collections above
+  // roughly 100k docs, switch to paginated reads with limit/startAfter.
+  const snapshot = await collection.get();
+  const updates = [];
+  let skipped = 0;
+
+  for (const doc of snapshot.docs) {
+    const data = doc.data();
+    const update = {};
+    const dedupeExpiresAt =
+      typeof data.dedupe_expires_at === "number"
+        ? data.dedupe_expires_at
+        : typeof data.expires_at === "number"
+          ? data.expires_at
+          : undefined;
+    const updatedAt =
+      typeof data.updated_at === "number"
+        ? data.updated_at
+        : typeof dedupeExpiresAt === "number"
+          ? dedupeExpiresAt - 15 * 60 * 1000
+          : undefined;
+
+    if (typeof dedupeExpiresAt === "number" && typeof data.dedupe_expires_at !== "number") {
+      update.dedupe_expires_at = dedupeExpiresAt;
+    }
+
+    if (!hasFirestoreTimestamp(data.expires_at)) {
+      if (typeof updatedAt !== "number") {
+        skipped += 1;
+        continue;
+      }
+
+      update.expires_at = new Date(updatedAt + 30 * 24 * 60 * 60 * 1000);
+    }
+
+    if (typeof data.updated_at !== "number" && typeof updatedAt === "number") {
+      update.updated_at = updatedAt;
+    }
+
+    if (Object.keys(update).length > 0) {
+      updates.push({ ref: doc.ref, update });
+    }
+  }
+
+  if (shouldWrite) {
+    await commitUpdates(updates, batchLimit);
+  }
+
+  return {
+    scanned: snapshot.size,
+    update_count: updates.length,
+    skipped
+  };
+}
+
+async function backfillEvents(collection, batchLimit, shouldWrite) {
+  // batchLimit controls write batching only; this read is intentionally unpaged.
+  // For large collections, page reads with query.limit(batchLimit).startAfter(lastDoc).
+  const snapshot = await collection.get();
+  const updates = [];
+  let skipped = 0;
+
+  for (const doc of snapshot.docs) {
+    const data = doc.data();
+
+    if (hasFirestoreTimestamp(data.expires_at)) {
+      continue;
+    }
+
+    const timestamp =
+      typeof data.timestamp === "number"
+        ? data.timestamp
+        : timestampToMillis(data.timestamp);
+
+    if (typeof timestamp !== "number") {
+      skipped += 1;
+      continue;
+    }
+
+    updates.push({
+      ref: doc.ref,
+      update: {
+        expires_at: new Date(timestamp + 90 * 24 * 60 * 60 * 1000)
+      }
+    });
+  }
+
+  if (shouldWrite) {
+    await commitUpdates(updates, batchLimit);
+  }
+
+  return {
+    scanned: snapshot.size,
+    update_count: updates.length,
+    skipped
+  };
+}
+
+async function commitUpdates(updates, batchLimit) {
+  for (let index = 0; index < updates.length; index += batchLimit) {
+    const batch = firestore.batch();
+
+    for (const { ref, update } of updates.slice(index, index + batchLimit)) {
+      batch.set(ref, update, { merge: true });
+    }
+
+    await batch.commit();
+  }
+}
+
+function hasFirestoreTimestamp(value) {
+  return value instanceof Date || typeof value?.toDate === "function";
+}
+
+function timestampToMillis(value) {
+  if (value instanceof Date) {
+    return value.getTime();
+  }
+
+  if (typeof value?.toMillis === "function") {
+    return value.toMillis();
+  }
+
+  return undefined;
+}
+
+function readArg(name) {
+  const index = args.findIndex(arg => arg === `--${name}`);
+
+  if (index === -1) {
+    return undefined;
+  }
+
+  return args[index + 1];
+}
+
+function positiveInt(value, fallback, key) {
+  if (!value) {
+    return fallback;
+  }
+
+  const parsed = Number.parseInt(value, 10);
+
+  if (!Number.isInteger(parsed) || parsed <= 0) {
+    throw new Error(`${key} must be a positive integer`);
+  }
+
+  return parsed;
+}
+
+function loadEnvFile(filePath, target) {
+  if (!fs.existsSync(filePath)) {
+    return;
+  }
+
+  for (const rawLine of fs.readFileSync(filePath, "utf8").split(/\r?\n/)) {
+    const line = rawLine.trim();
+
+    if (!line || line.startsWith("#")) {
+      continue;
+    }
+
+    const separatorIndex = line.indexOf("=");
+
+    if (separatorIndex === -1) {
+      continue;
+    }
+
+    const key = line.slice(0, separatorIndex).trim();
+    let value = line.slice(separatorIndex + 1).trim();
+
+    if (
+      (value.startsWith("\"") && value.endsWith("\"")) ||
+      (value.startsWith("'") && value.endsWith("'"))
+    ) {
+      value = value.slice(1, -1);
+    }
+
+    target[key] = value;
+  }
+}
+
+function readFirebaseProject(rootDir) {
+  const firebaseRcPath = path.join(rootDir, ".firebaserc");
+
+  if (!fs.existsSync(firebaseRcPath)) {
+    return undefined;
+  }
+
+  const firebaseRc = JSON.parse(fs.readFileSync(firebaseRcPath, "utf8"));
+  const project = firebaseRc.projects?.prod ?? firebaseRc.projects?.default;
+
+  return typeof project === "string" && project.trim() ? project.trim() : undefined;
+}
diff --git a/functions/scripts/inspect-prod.mjs b/functions/scripts/inspect-prod.mjs
new file mode 100644
index 0000000..b10138b
--- /dev/null
+++ b/functions/scripts/inspect-prod.mjs
@@ -0,0 +1,91 @@
+import { spawn } from "node:child_process";
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const functionsDir = path.resolve(scriptDir, "..");
+const envProdPath = path.join(functionsDir, ".env.prod");
+
+console.log("Reading production environment configuration...");
+
+if (!fs.existsSync(envProdPath)) {
+  console.error(`Error: Production environment file not found at: ${envProdPath}`);
+  console.error("Please create a valid '.env.prod' file in the functions directory.");
+  process.exit(1);
+}
+
+const loadedEnv = {};
+try {
+  const fileContent = fs.readFileSync(envProdPath, "utf8");
+  for (const rawLine of fileContent.split(/\r?\n/)) {
+    const line = rawLine.trim();
+
+    if (!line || line.startsWith("#")) {
+      continue;
+    }
+
+    const separatorIndex = line.indexOf("=");
+    if (separatorIndex === -1) {
+      continue;
+    }
+
+    const key = line.slice(0, separatorIndex).trim();
+    let value = line.slice(separatorIndex + 1).trim();
+
+    if (
+      (value.startsWith("\"") && value.endsWith("\"")) ||
+      (value.startsWith("'") && value.endsWith("'"))
+    ) {
+      value = value.slice(1, -1);
+    }
+
+    loadedEnv[key] = value;
+  }
+} catch (error) {
+  console.error(`Error reading ${envProdPath}:`, error);
+  process.exit(1);
+}
+
+const baseUrl = loadedEnv.FUNCTION_BASE_URL;
+const adminToken = loadedEnv.MCP_ADMIN_TOKEN;
+
+if (!baseUrl) {
+  console.error("Error: 'FUNCTION_BASE_URL' not defined in .env.prod");
+  process.exit(1);
+}
+
+if (!adminToken) {
+  console.error("Error: 'MCP_ADMIN_TOKEN' not defined in .env.prod");
+  process.exit(1);
+}
+
+// Clean and construct URL
+const cleanBaseUrl = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
+const targetUrl = `${cleanBaseUrl}/mcp?auth_token=${adminToken}`;
+
+console.log(`Production URL: ${cleanBaseUrl}/mcp`);
+console.log("Launching MCP Inspector...");
+
+const child = spawn("npx", [
+  "@modelcontextprotocol/inspector",
+  "--transport",
+  "http",
+  "--server-url",
+  targetUrl
+], {
+  stdio: "inherit",
+  shell: true
+});
+
+child.on("error", (error) => {
+  console.error("Failed to start the MCP Inspector process:", error);
+  process.exit(1);
+});
+
+child.on("exit", (code) => {
+  if (code !== null && code !== 0) {
+    console.error(`MCP Inspector process exited with code ${code}`);
+    process.exit(code);
+  }
+});
diff --git a/functions/scripts/mcp-smoke-test.mjs b/functions/scripts/mcp-smoke-test.mjs
index 11f3ab5..0db2841 100644
--- a/functions/scripts/mcp-smoke-test.mjs
+++ b/functions/scripts/mcp-smoke-test.mjs
@@ -1,5 +1,23 @@
 import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const functionsDir = path.resolve(scriptDir, "..");
+const explicitEnvKeys = new Set(Object.keys(process.env));
+const loadedEnv = {};
+
+for (const fileName of [".env", ".env.prod"]) {
+  loadEnvFile(path.join(functionsDir, fileName), loadedEnv);
+}
+
+for (const [key, value] of Object.entries(loadedEnv)) {
+  if (!explicitEnvKeys.has(key)) {
+    process.env[key] = value;
+  }
+}
 
 const args = process.argv.slice(2);
 
@@ -13,9 +31,12 @@ function readArg(name, fallback) {
 }
 
 const url = readArg("url", process.env.MCP_BASE_URL);
+const clientId = readArg("client-id", process.env.MCP_CLIENT_ID);
 const token = readArg(
   "token",
-  process.env.MCP_ADMIN_TOKEN ?? process.env.MCP_AUTH_TOKEN
+  resolveProfileToken(clientId) ??
+    process.env.MCP_ADMIN_TOKEN ??
+    process.env.MCP_AUTH_TOKEN
 );
 const mode = readArg(
   "mode",
@@ -34,10 +55,13 @@ const branchState = readArg(
   "branch-state",
   process.env.MCP_BRANCH_STATE ?? "active"
 );
-const imageBase64 = readArg("image-base64", process.env.MCP_IMAGE_BASE64);
+const imageFile = readArg("image-file", process.env.MCP_IMAGE_FILE);
+const imageBase64 = imageFile
+  ? fs.readFileSync(path.resolve(imageFile)).toString("base64")
+  : readArg("image-base64", process.env.MCP_IMAGE_BASE64);
 const imageMimeType = readArg(
   "image-mime-type",
-  process.env.MCP_IMAGE_MIME_TYPE
+  process.env.MCP_IMAGE_MIME_TYPE ?? inferMimeType(imageFile)
 );
 const artifactRef = readArg("artifact-ref", process.env.MCP_ARTIFACT_REF);
 
@@ -65,6 +89,7 @@ const transport = new StreamableHTTPClientTransport(new URL(url), {
     }
   }
 });
+let rememberedId;
 
 try {
   await client.connect(transport);
@@ -100,7 +125,9 @@ try {
     });
 
     console.log("\nremember_context:");
-    console.log(textContent(rememberResult));
+    const rememberText = requireSuccessfulToolResult(rememberResult, "remember_context");
+    console.log(rememberText);
+    rememberedId = extractRememberedId(rememberText);
   } else if (mode === "browser-read-write") {
     ensureTools(toolNames, ["remember_context", "search_context", "fetch_context"]);
 
@@ -125,7 +152,9 @@ try {
     });
 
     console.log("\nremember_context:");
-    console.log(textContent(rememberResult));
+    const rememberText = requireSuccessfulToolResult(rememberResult, "remember_context");
+    console.log(rememberText);
+    rememberedId = extractRememberedId(rememberText);
   } else if (mode === "search-only") {
     ensureTools(toolNames, ["search_context"]);
 
@@ -146,15 +175,15 @@ try {
   });
 
   console.log("\nsearch_context:");
-  const searchText = textContent(searchResult);
+  const searchText = requireSuccessfulToolResult(searchResult, "search_context");
   console.log(searchText);
 
   if (mode === "browser-read-write") {
-    const memoryId = extractMemoryId(searchText);
+    const memoryId = rememberedId ?? extractMemoryId(searchText);
 
     if (!memoryId) {
       throw new Error(
-        "browser-read-write mode expected search_context to return an id"
+        "browser-read-write mode expected remember_context or search_context to return an id"
       );
     }
 
@@ -166,7 +195,7 @@ try {
     });
 
     console.log("\nfetch_context:");
-    console.log(textContent(fetchResult));
+    console.log(requireSuccessfulToolResult(fetchResult, "fetch_context"));
   }
 } finally {
   await client.close().catch(() => undefined);
@@ -188,7 +217,109 @@ function ensureTools(toolNames, required) {
   }
 }
 
+function requireSuccessfulToolResult(result, toolName) {
+  const text = textContent(result);
+
+  if (result.isError) {
+    throw new Error(`${toolName} returned MCP error: ${text}`);
+  }
+
+  const payload = JSON.parse(text);
+
+  if (payload.error) {
+    throw new Error(`${toolName} returned error payload: ${text}`);
+  }
+
+  return text;
+}
+
 function extractMemoryId(searchText) {
   const payload = JSON.parse(searchText);
   return payload.matches?.[0]?.id;
 }
+
+function extractRememberedId(rememberText) {
+  const payload = JSON.parse(rememberText);
+  return payload.item?.id;
+}
+
+function loadEnvFile(filePath, target) {
+  if (!fs.existsSync(filePath)) {
+    return;
+  }
+
+  for (const rawLine of fs.readFileSync(filePath, "utf8").split(/\r?\n/)) {
+    const line = rawLine.trim();
+
+    if (!line || line.startsWith("#")) {
+      continue;
+    }
+
+    const separatorIndex = line.indexOf("=");
+
+    if (separatorIndex === -1) {
+      continue;
+    }
+
+    const key = line.slice(0, separatorIndex).trim();
+    let value = line.slice(separatorIndex + 1).trim();
+
+    if (
+      (value.startsWith("\"") && value.endsWith("\"")) ||
+      (value.startsWith("'") && value.endsWith("'"))
+    ) {
+      value = value.slice(1, -1);
+    }
+
+    target[key] = value;
+  }
+}
+
+function resolveProfileToken(profileId) {
+  if (!profileId) {
+    return undefined;
+  }
+
+  const rawProfiles = process.env.MCP_CLIENT_PROFILES_JSON;
+
+  if (!rawProfiles) {
+    return undefined;
+  }
+
+  const profiles = JSON.parse(rawProfiles);
+
+  if (!Array.isArray(profiles)) {
+    return undefined;
+  }
+
+  const profile = profiles.find(
+    candidate =>
+      candidate &&
+      typeof candidate === "object" &&
+      candidate.id === profileId
+  );
+
+  return typeof profile?.token === "string" ? profile.token : undefined;
+}
+
+function inferMimeType(filePath) {
+  if (!filePath) {
+    return undefined;
+  }
+
+  const extension = path.extname(filePath).toLowerCase();
+
+  if (extension === ".png") {
+    return "image/png";
+  }
+
+  if (extension === ".jpg" || extension === ".jpeg") {
+    return "image/jpeg";
+  }
+
+  if (extension === ".webp") {
+    return "image/webp";
+  }
+
+  return undefined;
+}
diff --git a/functions/scripts/validate-models.mjs b/functions/scripts/validate-models.mjs
new file mode 100644
index 0000000..396e204
--- /dev/null
+++ b/functions/scripts/validate-models.mjs
@@ -0,0 +1,217 @@
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { GoogleGenAI } from "@google/genai";
+
+const scriptDir = path.dirname(fileURLToPath(import.meta.url));
+const functionsDir = path.resolve(scriptDir, "..");
+const repoRoot = path.resolve(functionsDir, "..");
+const explicitEnvKeys = new Set(Object.keys(process.env));
+const loadedEnv = {};
+
+for (const fileName of [".env", ".env.prod"]) {
+  loadEnvFile(path.join(functionsDir, fileName), loadedEnv);
+}
+
+for (const [key, value] of Object.entries(loadedEnv)) {
+  if (!explicitEnvKeys.has(key)) {
+    process.env[key] = value;
+  }
+}
+
+const apiKey = requiredEnv("GEMINI_API_KEY");
+const embeddingModel = process.env.GEMINI_EMBEDDING_MODEL?.trim() || "text-embedding-004";
+const multimodalModel = process.env.GEMINI_MULTIMODAL_MODEL?.trim() || "gemini-3.1-flash-lite";
+const projectId =
+  process.env.GOOGLE_CLOUD_PROJECT ??
+  process.env.GCLOUD_PROJECT ??
+  readFirebaseProject(repoRoot);
+const embeddingVertexLocation =
+  process.env.GEMINI_VERTEX_LOCATION?.trim() || "us-central1";
+const generationVertexLocation =
+  process.env.GEMINI_GENERATION_VERTEX_LOCATION?.trim() || "global";
+const embeddingDimensions = positiveInt(
+  process.env.GEMINI_EMBEDDING_DIMENSIONS,
+  768,
+  "GEMINI_EMBEDDING_DIMENSIONS"
+);
+
+const originalGeminiApiKey = process.env.GEMINI_API_KEY;
+
+if (projectId) {
+  delete process.env.GEMINI_API_KEY;
+}
+
+const embeddingClient = projectId
+  ? new GoogleGenAI({
+      vertexai: true,
+      project: projectId,
+      location: embeddingVertexLocation
+    })
+  : new GoogleGenAI({ apiKey });
+const multimodalClient = projectId
+  ? new GoogleGenAI({
+      vertexai: true,
+      project: projectId,
+      location: generationVertexLocation
+    })
+  : new GoogleGenAI({ apiKey });
+
+if (originalGeminiApiKey) {
+  process.env.GEMINI_API_KEY = originalGeminiApiKey;
+}
+
+console.log("Validating Gemini model configuration...");
+console.log(`embedding model: ${embeddingModel}`);
+console.log(`embedding provider: ${projectId ? `Vertex AI (${projectId}, ${embeddingVertexLocation})` : "Gemini API key"}`);
+console.log(`embedding dimensions: ${embeddingDimensions}`);
+console.log(`multimodal model: ${multimodalModel}`);
+console.log(`generation provider: ${projectId ? `Vertex AI (${projectId}, ${generationVertexLocation})` : "Gemini API key"}`);
+
+try {
+  const embeddingResponse = await embeddingClient.models.embedContent({
+    model: embeddingModel,
+    contents: "MetaCortex deployment model validation.",
+    config: {
+      taskType: "RETRIEVAL_DOCUMENT",
+      title: "metacortex",
+      outputDimensionality: embeddingDimensions
+    }
+  });
+
+  const embedding = embeddingResponse.embeddings?.[0]?.values;
+
+  if (!embedding) {
+    throw new Error("Gemini embedding validation returned no embedding data");
+  }
+
+  if (embedding.length !== embeddingDimensions) {
+    throw new Error(
+      `Embedding dimension mismatch. Expected ${embeddingDimensions}, received ${embedding.length}`
+    );
+  }
+} catch (error) {
+  if (!isMissingAdcError(error)) {
+    throw error;
+  }
+
+  console.warn(
+    "Skipping Vertex embedding validation because Application Default Credentials are not configured locally. Production smoke tests must validate remember/search after deploy."
+  );
+}
+
+try {
+  const imageResponse = await multimodalClient.models.generateContent({
+    model: multimodalModel,
+    contents: [
+      {
+        role: "user",
+        parts: [
+          {
+            text: "Return exactly: ok"
+          },
+          {
+            inlineData: {
+              data:
+                "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=",
+              mimeType: "image/png"
+            }
+          }
+        ]
+      }
+    ],
+    config: {
+      responseMimeType: "text/plain",
+      temperature: 0,
+      maxOutputTokens: 8
+    }
+  });
+
+  if (!imageResponse.text?.trim()) {
+    throw new Error("Gemini multimodal validation returned no text output");
+  }
+} catch (error) {
+  if (!isMissingAdcError(error)) {
+    throw error;
+  }
+
+  console.warn(
+    "Skipping Vertex multimodal validation because Application Default Credentials are not configured locally. Production multimodal smoke tests must validate image-backed memories after deploy."
+  );
+}
+
+console.log("Model validation completed.");
+
+function loadEnvFile(filePath, target) {
+  if (!fs.existsSync(filePath)) {
+    return;
+  }
+
+  for (const rawLine of fs.readFileSync(filePath, "utf8").split(/\r?\n/)) {
+    const line = rawLine.trim();
+
+    if (!line || line.startsWith("#")) {
+      continue;
+    }
+
+    const separatorIndex = line.indexOf("=");
+
+    if (separatorIndex === -1) {
+      continue;
+    }
+
+    const key = line.slice(0, separatorIndex).trim();
+    let value = line.slice(separatorIndex + 1).trim();
+
+    if (
+      (value.startsWith("\"") && value.endsWith("\"")) ||
+      (value.startsWith("'") && value.endsWith("'"))
+    ) {
+      value = value.slice(1, -1);
+    }
+
+    target[key] = value;
+  }
+}
+
+function requiredEnv(key) {
+  const value = process.env[key]?.trim();
+
+  if (!value) {
+    throw new Error(`Missing required environment variable: ${key}`);
+  }
+
+  return value;
+}
+
+function positiveInt(value, fallback, key) {
+  if (!value) {
+    return fallback;
+  }
+
+  const parsed = Number.parseInt(value, 10);
+
+  if (!Number.isInteger(parsed) || parsed <= 0) {
+    throw new Error(`${key} must be a positive integer`);
+  }
+
+  return parsed;
+}
+
+function isMissingAdcError(error) {
+  return error instanceof Error && error.message.includes("default credentials");
+}
+
+function readFirebaseProject(rootDir) {
+  const firebaseRcPath = path.join(rootDir, ".firebaserc");
+
+  if (!fs.existsSync(firebaseRcPath)) {
+    return undefined;
+  }
+
+  const firebaseRc = JSON.parse(fs.readFileSync(firebaseRcPath, "utf8"));
+  const project = firebaseRc.projects?.prod ?? firebaseRc.projects?.default;
+
+  return typeof project === "string" && project.trim() ? project.trim() : undefined;
+}
diff --git a/functions/src/config.ts b/functions/src/config.ts
index f8dd2b9..9427044 100644
--- a/functions/src/config.ts
+++ b/functions/src/config.ts
@@ -20,6 +20,7 @@ export interface AppConfig {
   geminiApiKey: string;
   embeddingModel: string;
   multimodalModel: string;
+  generationVertexLocation: string;
   embeddingDimensions: number;
   memoryCollection: string;
   topK: number;
@@ -264,7 +265,9 @@ export function loadConfig(env: NodeJS.ProcessEnv = process.env): AppConfig {
     geminiApiKey: requireEnv(env, "GEMINI_API_KEY"),
     embeddingModel: env.GEMINI_EMBEDDING_MODEL?.trim() || "text-embedding-004",
     multimodalModel:
-      env.GEMINI_MULTIMODAL_MODEL?.trim() || "gemini-3.1-flash-lite-preview",
+      env.GEMINI_MULTIMODAL_MODEL?.trim() || "gemini-3.1-flash-lite",
+    generationVertexLocation:
+      env.GEMINI_GENERATION_VERTEX_LOCATION?.trim() || "global",
     embeddingDimensions: parsePositiveInteger(
       env.GEMINI_EMBEDDING_DIMENSIONS,
       768,
diff --git a/functions/src/embeddings.ts b/functions/src/embeddings.ts
index 031a9ac..3755b78 100644
--- a/functions/src/embeddings.ts
+++ b/functions/src/embeddings.ts
@@ -44,7 +44,10 @@ export interface MemoryContentPreparer {
 }
 
 export interface GeminiMultimodalPreparerOptions {
-  apiKey: string;
+  apiKey?: string;
+  vertexai?: boolean;
+  project?: string;
+  location?: string;
   model: string;
 }
 
@@ -53,7 +56,7 @@ export class GeminiEmbeddingClient implements EmbeddingClient {
 
   constructor(private readonly options: GeminiEmbeddingClientOptions) {
     this.client = options.vertexai
-      ? new GoogleGenAI({
+      ? createVertexClient({
           vertexai: true,
           project: options.project,
           location: options.location ?? "us-central1"
@@ -92,9 +95,15 @@ export class GeminiMultimodalPreparer implements MemoryContentPreparer {
   private readonly client: GoogleGenAI;
 
   constructor(private readonly options: GeminiMultimodalPreparerOptions) {
-    this.client = new GoogleGenAI({
-      apiKey: options.apiKey
-    });
+    this.client = options.vertexai
+      ? createVertexClient({
+          vertexai: true,
+          project: options.project,
+          location: options.location ?? "us-central1"
+        })
+      : new GoogleGenAI({
+          apiKey: options.apiKey!
+        });
   }
 
   async prepare(input: MemoryPreparationInput): Promise<PreparedMemoryContent> {
@@ -193,3 +202,21 @@ function buildImageNormalizationPrompt(
     "Return plain text only."
   ].join("\n\n");
 }
+
+function createVertexClient(options: {
+  vertexai: true;
+  project?: string;
+  location: string;
+}): GoogleGenAI {
+  const originalGeminiApiKey = process.env.GEMINI_API_KEY;
+
+  delete process.env.GEMINI_API_KEY;
+
+  try {
+    return new GoogleGenAI(options);
+  } finally {
+    if (typeof originalGeminiApiKey === "string") {
+      process.env.GEMINI_API_KEY = originalGeminiApiKey;
+    }
+  }
+}
diff --git a/functions/src/mcpServer.ts b/functions/src/mcpServer.ts
index f717d00..2593c9d 100644
--- a/functions/src/mcpServer.ts
+++ b/functions/src/mcpServer.ts
@@ -81,9 +81,67 @@ export function createMetaCortexMcpServer(
       id: z
         .string()
         .min(1)
+        .optional()
         .describe(
           "The stable memory id returned by remember_context or search_context."
+        ),
+      document_id: z
+        .string()
+        .min(1)
+        .optional()
+        .describe(
+          "Compatibility alias for id. Prefer id for new clients."
+        )
+    })
+    .superRefine((value, ctx) => {
+      const id = normalizeOptionalText(value.id);
+      const documentId = normalizeOptionalText(value.document_id);
+
+      if (!id && !documentId) {
+        ctx.addIssue({
+          code: "custom",
+          path: ["id"],
+          message: "Provide id or document_id"
+        });
+      }
+
+      if (id && documentId && id !== documentId) {
+        ctx.addIssue({
+          code: "custom",
+          path: ["document_id"],
+          message: "id and document_id must match when both are provided"
+        });
+      }
+    });
+  const consolidateContextInputSchema = z
+    .object({
+      topic: z
+        .string()
+        .optional()
+        .describe(
+          "Topic whose WIP memory queue will be consolidated. Defaults to general. Ignored when source_ids is provided — in that case topic labels the merged output."
+        ),
+      source_ids: z
+        .array(z.string().min(1))
+        .optional()
+        .describe(
+          "Explicit list of unique memory ids to consolidate. When provided, these memories are merged regardless of their branch_state. At least 2 ids required."
         )
+    })
+    .superRefine((value, ctx) => {
+      if (!value.source_ids) {
+        return;
+      }
+
+      const uniqueIds = new Set(value.source_ids);
+
+      if (uniqueIds.size !== value.source_ids.length) {
+        ctx.addIssue({
+          code: "custom",
+          path: ["source_ids"],
+          message: "source_ids must be unique"
+        });
+      }
     });
   const server = new McpServer(
     {
@@ -250,7 +308,8 @@ export function createMetaCortexMcpServer(
       },
       async args => {
         const requestSummary = {
-          id: args.id
+          id: args.id ?? args.document_id,
+          used_document_id_alias: Boolean(args.document_id && !args.id)
         };
         const result = await observeToolCall(
           "fetch_context",
@@ -264,8 +323,8 @@ export function createMetaCortexMcpServer(
               )
             ) {
               throw new HttpError(
-                403,
-                `branch_state '${fetched.item.metadata.branch_state}' is not allowed for this client`
+                404,
+                "Document not found"
               );
             }
 
@@ -335,20 +394,7 @@ export function createMetaCortexMcpServer(
         title: "Consolidate Context",
         description:
           "Merge multiple related memories into one canonical active memory. By default, consolidates all WIP (draft) memories for a topic. Pass source_ids to consolidate specific memories regardless of their current state. Deprecates all source memories and links them to the merged result.",
-        inputSchema: {
-          topic: z
-            .string()
-            .optional()
-            .describe(
-              "Topic whose WIP memory queue will be consolidated. Defaults to general. Ignored when source_ids is provided — in that case topic labels the merged output."
-            ),
-          source_ids: z
-            .array(z.string().min(1))
-            .optional()
-            .describe(
-              "Explicit list of memory ids to consolidate. When provided, these memories are merged regardless of their branch_state. At least 2 ids required."
-            )
-        }
+        inputSchema: consolidateContextInputSchema
       },
       async args => {
         const requestSummary = {
diff --git a/functions/src/memoryRepository.ts b/functions/src/memoryRepository.ts
index 2a40226..3c8b75e 100644
--- a/functions/src/memoryRepository.ts
+++ b/functions/src/memoryRepository.ts
@@ -43,11 +43,13 @@ interface FirestoreMemoryDocument {
 
 interface FirestoreWriteFingerprintDocument {
   id: string;
-  expires_at: number;
-  updated_at: number;
+  dedupe_expires_at?: number;
+  expires_at?: unknown;
+  updated_at?: number;
 }
 
 const WRITE_FINGERPRINT_WINDOW_MS = 15 * 60 * 1000;
+const WRITE_FINGERPRINT_TTL_MS = 30 * 24 * 60 * 60 * 1000;
 
 export class FirestoreMemoryRepository implements MemoryRepository {
   private readonly fingerprintCollectionName: string;
@@ -70,8 +72,9 @@ export class FirestoreMemoryRepository implements MemoryRepository {
 
       if (fingerprintSnapshot.exists) {
         const fingerprint = fingerprintSnapshot.data() as FirestoreWriteFingerprintDocument;
+        const dedupeExpiresAt = getDedupeExpiresAt(fingerprint);
 
-        if (fingerprint.expires_at >= now) {
+        if (typeof dedupeExpiresAt === "number" && dedupeExpiresAt >= now) {
           const existingSnapshot = await transaction.get(
             this.firestore.collection(this.collectionName).doc(fingerprint.id)
           );
@@ -98,7 +101,8 @@ export class FirestoreMemoryRepository implements MemoryRepository {
       });
       transaction.set(fingerprintRef, {
         id: docRef.id,
-        expires_at: now + WRITE_FINGERPRINT_WINDOW_MS,
+        dedupe_expires_at: now + WRITE_FINGERPRINT_WINDOW_MS,
+        expires_at: new Date(now + WRITE_FINGERPRINT_TTL_MS),
         updated_at: now
       });
 
@@ -210,6 +214,20 @@ export class FirestoreMemoryRepository implements MemoryRepository {
   }
 }
 
+function getDedupeExpiresAt(
+  fingerprint: FirestoreWriteFingerprintDocument
+): number | undefined {
+  if (typeof fingerprint.dedupe_expires_at === "number") {
+    return fingerprint.dedupe_expires_at;
+  }
+
+  if (typeof fingerprint.expires_at === "number") {
+    return fingerprint.expires_at;
+  }
+
+  return undefined;
+}
+
 function mapFirestoreDocument(
   id: string,
   data: FirestoreMemoryDocument
diff --git a/functions/src/merging.ts b/functions/src/merging.ts
index ad1877c..8cc0c46 100644
--- a/functions/src/merging.ts
+++ b/functions/src/merging.ts
@@ -14,7 +14,10 @@ export interface LlmMergeClient {
 }
 
 export interface GeminiMergeClientOptions {
-  apiKey: string;
+  apiKey?: string;
+  vertexai?: boolean;
+  project?: string;
+  location?: string;
   model: string;
 }
 
@@ -22,7 +25,13 @@ export class GeminiMergeClient implements LlmMergeClient {
   private readonly client: GoogleGenAI;
 
   constructor(private readonly options: GeminiMergeClientOptions) {
-    this.client = new GoogleGenAI({ apiKey: options.apiKey });
+    this.client = options.vertexai
+      ? createVertexClient({
+          vertexai: true,
+          project: options.project,
+          location: options.location ?? "us-central1"
+        })
+      : new GoogleGenAI({ apiKey: options.apiKey! });
   }
 
   async merge(request: MergeMemoriesRequest): Promise<MergeMemoriesResult> {
@@ -72,3 +81,21 @@ function buildMergePrompt(request: MergeMemoriesRequest): string {
     sourceList
   ].join("\n");
 }
+
+function createVertexClient(options: {
+  vertexai: true;
+  project?: string;
+  location: string;
+}): GoogleGenAI {
+  const originalGeminiApiKey = process.env.GEMINI_API_KEY;
+
+  delete process.env.GEMINI_API_KEY;
+
+  try {
+    return new GoogleGenAI(options);
+  } finally {
+    if (typeof originalGeminiApiKey === "string") {
+      process.env.GEMINI_API_KEY = originalGeminiApiKey;
+    }
+  }
+}
diff --git a/functions/src/observability.ts b/functions/src/observability.ts
index 1e6cb61..264745c 100644
--- a/functions/src/observability.ts
+++ b/functions/src/observability.ts
@@ -5,6 +5,7 @@ import { Firestore } from "firebase-admin/firestore";
 import type { McpToolName } from "./types.js";
 
 export const MEMORY_EVENT_COLLECTION = "memory_events";
+const MEMORY_EVENT_TTL_MS = 90 * 24 * 60 * 60 * 1000;
 
 export type RequestEventReason =
   | "origin_not_allowed"
@@ -23,6 +24,7 @@ export interface ToolCallEvent {
   tool_name: McpToolName;
   status: "success" | "error";
   timestamp: number;
+  expires_at: Date;
   latency_ms?: number;
   request: Record<string, unknown>;
   response?: Record<string, unknown>;
@@ -39,6 +41,7 @@ export interface RequestEvent {
   status_code: number;
   reason: RequestEventReason;
   timestamp: number;
+  expires_at: Date;
   latency_ms?: number;
 }
 
@@ -78,10 +81,12 @@ export class FirestoreToolCallObserver implements ToolCallObserver {
   ) {}
 
   async record(input: RecordToolCallEventInput): Promise<void> {
+    const timestamp = input.timestamp ?? Date.now();
     const event: ToolCallEvent = {
       event_id: randomUUID(),
       event_type: "tool_call",
-      timestamp: input.timestamp ?? Date.now(),
+      timestamp,
+      expires_at: new Date(timestamp + MEMORY_EVENT_TTL_MS),
       client_id: input.client_id,
       tool_name: input.tool_name,
       status: input.status,
@@ -97,10 +102,12 @@ export class FirestoreToolCallObserver implements ToolCallObserver {
   }
 
   async recordRequest(input: RecordRequestEventInput): Promise<void> {
+    const timestamp = input.timestamp ?? Date.now();
     const event: RequestEvent = {
       event_id: randomUUID(),
       event_type: "request",
-      timestamp: input.timestamp ?? Date.now(),
+      timestamp,
+      expires_at: new Date(timestamp + MEMORY_EVENT_TTL_MS),
       client_id: input.client_id,
       method: input.method,
       path: input.path,
@@ -116,13 +123,15 @@ export class FirestoreToolCallObserver implements ToolCallObserver {
   }
 
   private async persist(message: string, event: ObservabilityEvent): Promise<void> {
-    console.info(message, event);
+    const sanitizedEvent = stripUndefined(event) as ObservabilityEvent;
+
+    console.info(message, sanitizedEvent);
 
     try {
       await this.firestore
         .collection(this.collectionName)
-        .doc(event.event_id)
-        .set(event);
+        .doc(sanitizedEvent.event_id)
+        .set(sanitizedEvent);
     } catch (error) {
       console.error("metaCortexMcp observability event persist failed", {
         event_id: event.event_id,
@@ -148,3 +157,29 @@ function serializeUnknownError(error: unknown): Record<string, unknown> {
     value: error
   };
 }
+
+function stripUndefined(value: unknown): unknown {
+  if (typeof value === "undefined") {
+    return undefined;
+  }
+
+  if (value instanceof Date) {
+    return value;
+  }
+
+  if (Array.isArray(value)) {
+    return value
+      .map(item => stripUndefined(item))
+      .filter(item => typeof item !== "undefined");
+  }
+
+  if (value && typeof value === "object") {
+    return Object.fromEntries(
+      Object.entries(value)
+        .map(([key, item]) => [key, stripUndefined(item)] as const)
+        .filter(([, item]) => typeof item !== "undefined")
+    );
+  }
+
+  return value;
+}
diff --git a/functions/src/runtime.ts b/functions/src/runtime.ts
index cd091f9..c93b26a 100644
--- a/functions/src/runtime.ts
+++ b/functions/src/runtime.ts
@@ -51,11 +51,27 @@ function createRuntimeFromConfig(config: AppConfig): RuntimeDependencies {
         }
   );
   const contentPreparer: MemoryContentPreparer = new GeminiMultimodalPreparer({
-    apiKey: config.geminiApiKey,
+    ...(gcpProject
+      ? {
+          vertexai: true,
+          project: gcpProject,
+          location: config.generationVertexLocation
+        }
+      : {
+          apiKey: config.geminiApiKey
+        }),
     model: config.multimodalModel
   });
   const mergeClient: LlmMergeClient = new GeminiMergeClient({
-    apiKey: config.geminiApiKey,
+    ...(gcpProject
+      ? {
+          vertexai: true,
+          project: gcpProject,
+          location: config.generationVertexLocation
+        }
+      : {
+          apiKey: config.geminiApiKey
+        }),
     model: config.multimodalModel
   });
   const repository: MemoryRepository = new FirestoreMemoryRepository(
diff --git a/functions/src/service.ts b/functions/src/service.ts
index 6cb0f45..4622902 100644
--- a/functions/src/service.ts
+++ b/functions/src/service.ts
@@ -130,7 +130,7 @@ export class MetaCortexService {
   }
 
   async fetchContext(input: FetchContextInput): Promise<FetchContextResult> {
-    const id = normalizeRequiredText(input.id, "id");
+    const id = resolveFetchContextId(input);
     const item = await this.repository.get(id);
 
     if (!item) {
@@ -177,8 +177,9 @@ export class MetaCortexService {
     let sources: Array<{ id: string; content: string }>;
 
     if (input.source_ids && input.source_ids.length > 0) {
+      const uniqueSourceIds = [...new Set(input.source_ids)];
       const fetched = await Promise.all(
-        input.source_ids.map(id => this.repository.get(id))
+        uniqueSourceIds.map(id => this.repository.get(id))
       );
 
       for (const doc of fetched) {
@@ -235,6 +236,23 @@ function normalizeRequiredText(value: string, fieldName: string): string {
   return normalized;
 }
 
+function resolveFetchContextId(input: FetchContextInput): string {
+  const id = normalizeOptionalText(input.id);
+  const documentId = normalizeOptionalText(input.document_id);
+
+  if (id && documentId && id !== documentId) {
+    throw new HttpError(400, "id and document_id must match when both are provided");
+  }
+
+  const resolved = id ?? documentId;
+
+  if (!resolved) {
+    throw new HttpError(400, "id or document_id must be provided");
+  }
+
+  return resolved;
+}
+
 export function buildSearchPayload(result: SearchContextResult): Record<string, unknown> {
   return {
     matches: result.matches.map(match => ({
@@ -243,7 +261,6 @@ export function buildSearchPayload(result: SearchContextResult): Record<string,
       ...(typeof match.distance === "number"
         ? { score: distanceToScore(match.distance) }
         : {}),
-      content_preview: previewMemoryContent(match.content),
       metadata: buildPublicMetadata(match)
     })),
     applied_filters: {
@@ -378,16 +395,6 @@ function distanceToScore(distance: number): number {
   return Math.max(0, Number((1 - distance).toFixed(6)));
 }
 
-function previewMemoryContent(value: string, limit = 400): string {
-  const normalized = value.replace(/\s+/g, " ").trim();
-
-  if (normalized.length <= limit) {
-    return normalized;
-  }
-
-  return `${normalized.slice(0, limit - 3)}...`;
-}
-
 function normalizePublicModality(value: string): "text" | "image" | "mixed" {
   if (value === "text_image") {
     return "mixed";
diff --git a/functions/src/types.ts b/functions/src/types.ts
index 08c077a..359f0be 100644
--- a/functions/src/types.ts
+++ b/functions/src/types.ts
@@ -91,7 +91,8 @@ export interface SearchContextResult {
 }
 
 export interface FetchContextInput {
-  id: string;
+  id?: string;
+  document_id?: string;
 }
 
 export interface FetchContextResult {
diff --git a/functions/test/app.test.ts b/functions/test/app.test.ts
index 559fe04..beeb046 100644
--- a/functions/test/app.test.ts
+++ b/functions/test/app.test.ts
@@ -56,6 +56,7 @@ describe("createMetaCortexApp", () => {
         reason: "unauthorized"
       })
     );
+    expect(runtime.observer.listEvents()[0]?.expires_at).toBeInstanceOf(Date);
   });
 
   it("surfaces configuration failures as 500s", async () => {
diff --git a/functions/test/config.test.ts b/functions/test/config.test.ts
index 8627701..3ed06b9 100644
--- a/functions/test/config.test.ts
+++ b/functions/test/config.test.ts
@@ -19,7 +19,8 @@ describe("loadConfig", () => {
     });
 
     expect(config.embeddingModel).toBe("text-embedding-004");
-    expect(config.multimodalModel).toBe("gemini-3.1-flash-lite-preview");
+    expect(config.multimodalModel).toBe("gemini-3.1-flash-lite");
+    expect(config.generationVertexLocation).toBe("global");
     expect(config.embeddingDimensions).toBe(768);
     expect(config.defaultFilterState).toBe("active");
     expect(config.topK).toBe(5);
diff --git a/functions/test/mcp.integration.test.ts b/functions/test/mcp.integration.test.ts
index 37bca05..46ddee4 100644
--- a/functions/test/mcp.integration.test.ts
+++ b/functions/test/mcp.integration.test.ts
@@ -101,7 +101,8 @@ describe("MCP integration", () => {
       description: expect.stringContaining("returned by remember_context"),
       inputSchema: {
         properties: expect.objectContaining({
-          id: expect.any(Object)
+          id: expect.any(Object),
+          document_id: expect.any(Object)
         })
       }
     });
@@ -138,7 +139,8 @@ describe("MCP integration", () => {
       }
     });
 
-    expect(parseJsonTextContent(searchResult)).toMatchObject({
+    const searchPayload = parseJsonTextContent(searchResult);
+    expect(searchPayload).toMatchObject({
       matches: [
         {
           id: "memory-1",
@@ -154,6 +156,7 @@ describe("MCP integration", () => {
         filter_state: "active"
       }
     });
+    expect(searchPayload.matches?.[0]).not.toHaveProperty("content_preview");
 
     const replacementResult = await client.callTool({
       name: "remember_context",
@@ -275,7 +278,8 @@ describe("MCP integration", () => {
       }
     });
 
-    expect(parseJsonTextContent(searchResult)).toMatchObject({
+    const scopedSearchPayload = parseJsonTextContent(searchResult);
+    expect(scopedSearchPayload).toMatchObject({
       matches: [
         {
           id: "memory-1",
@@ -283,6 +287,7 @@ describe("MCP integration", () => {
         }
       ]
     });
+    expect(scopedSearchPayload.matches?.[0]).not.toHaveProperty("content_preview");
     const disallowedResult = await client.callTool({
       name: "remember_context",
       arguments: {
@@ -368,6 +373,98 @@ describe("MCP integration", () => {
     expect(typeof payload.item.merged_id).toBe("string");
   });
 
+  it("rejects duplicate source_ids for consolidate_context", async () => {
+    const runtime = createTestRuntime();
+    const baseUrl = await startServer(
+      createMetaCortexApp({
+        getConfig: () => runtime.config,
+        getObserver: () => runtime.observer,
+        getRuntime: () => runtime
+      }),
+      cleanup
+    );
+
+    const client = new Client({ name: "test-client", version: "1.0.0" });
+    const transport = new StreamableHTTPClientTransport(new URL(`${baseUrl}/mcp`), {
+      requestInit: {
+        headers: { [authorizationHeaderName]: bearerHeader("test") }
+      }
+    });
+    cleanup.push(async () => { await client.close(); });
+    await client.connect(transport);
+
+    const result = await client.callTool({
+      name: "consolidate_context",
+      arguments: {
+        topic: "kmp-networking",
+        source_ids: ["memory-1", "memory-1"]
+      }
+    });
+
+    expect(result.isError).toBe(true);
+    expect(textContent(result)).toContain("source_ids must be unique");
+  });
+
+  it("returns neutral 404 when fetch_context targets a disallowed branch state", async () => {
+    const runtime = createTestRuntime({
+      clientProfiles: [
+        {
+          id: "chatgpt-web",
+          [authTokenField]: accessCredential("chatgpt"),
+          allowedOrigins: ["https://chatgpt.com"],
+          allowedTools: ["fetch_context"],
+          allowedFilterStates: ["active"]
+        }
+      ]
+    });
+    await runtime.service.storeContext({
+      content: "Deprecated networking note.",
+      module_name: "kmp-networking",
+      branch_state: "deprecated"
+    });
+    const baseUrl = await startServer(
+      createMetaCortexApp({
+        getConfig: () => runtime.config,
+        getObserver: () => runtime.observer,
+        getRuntime: () => runtime
+      }),
+      cleanup
+    );
+
+    const client = new Client({ name: "chatgpt-client", version: "1.0.0" });
+    const transport = new StreamableHTTPClientTransport(
+      new URL(`${baseUrl}/clients/chatgpt-web/mcp`),
+      {
+        requestInit: {
+          headers: {
+            [authorizationHeaderName]: bearerHeader("chatgpt")
+          }
+        }
+      }
+    );
+    cleanup.push(async () => { await client.close(); });
+    await client.connect(transport);
+
+    const result = await client.callTool({
+      name: "fetch_context",
+      arguments: {
+        id: "memory-1"
+      }
+    });
+
+    expect(result.isError).toBe(true);
+    expect(textContent(result)).toContain("Document not found");
+    expect(runtime.observer.listEvents().at(-1)).toMatchObject({
+      client_id: "chatgpt-web",
+      tool_name: "fetch_context",
+      status: "error",
+      error: {
+        message: "Document not found",
+        status_code: 404
+      }
+    });
+  });
+
   it("supports ChatGPT web remember, search, and fetch flows", async () => {
     const runtime = createTestRuntime({
       clientProfiles: [
@@ -466,7 +563,7 @@ describe("MCP integration", () => {
     const fetchResult = await client.callTool({
       name: "fetch_context",
       arguments: {
-        id: "memory-1"
+        document_id: "memory-1"
       }
     });
 
@@ -484,6 +581,19 @@ describe("MCP integration", () => {
       "item.retrieval_text"
     );
 
+    const conflictingFetchResult = await client.callTool({
+      name: "fetch_context",
+      arguments: {
+        id: "memory-1",
+        document_id: "memory-2"
+      }
+    });
+
+    expect(conflictingFetchResult.isError).toBe(true);
+    expect(textContent(conflictingFetchResult)).toContain(
+      "id and document_id must match"
+    );
+
     expect(runtime.observer.listEvents()).toMatchObject([
       {
         client_id: "chatgpt-web",
diff --git a/functions/test/memoryRepository.test.ts b/functions/test/memoryRepository.test.ts
new file mode 100644
index 0000000..db563c6
--- /dev/null
+++ b/functions/test/memoryRepository.test.ts
@@ -0,0 +1,182 @@
+import { describe, expect, it } from "vitest";
+import type { Firestore } from "firebase-admin/firestore";
+
+import { FirestoreMemoryRepository } from "../src/memoryRepository.js";
+import type { MemoryMetadata } from "../src/types.js";
+
+describe("FirestoreMemoryRepository", () => {
+  it("writes separate dedupe and TTL expiration fields for fingerprints", async () => {
+    const firestore = new FakeFirestore();
+    const repository = new FirestoreMemoryRepository(
+      firestore as unknown as Firestore,
+      "memory_vectors"
+    );
+    const now = 1_700_000_000_000;
+
+    await repository.store({
+      content: "Ktor networking memory.",
+      retrievalText: "Ktor networking memory.",
+      embedding: [1, 0, 0],
+      idempotencyKey: "fingerprint-1",
+      metadata: buildMetadata(now)
+    });
+
+    const fingerprint = firestore.getRawDocument(
+      "memory_vectors_write_fingerprints",
+      "fingerprint-1"
+    );
+
+    expect(fingerprint).toMatchObject({
+      id: "memory-1",
+      dedupe_expires_at: now + 15 * 60 * 1000,
+      updated_at: now
+    });
+    expect(fingerprint?.expires_at).toBeInstanceOf(Date);
+    expect((fingerprint?.expires_at as Date).getTime()).toBe(
+      now + 30 * 24 * 60 * 60 * 1000
+    );
+  });
+
+  it("treats legacy numeric fingerprint expires_at as the dedupe window", async () => {
+    const firestore = new FakeFirestore();
+    const repository = new FirestoreMemoryRepository(
+      firestore as unknown as Firestore,
+      "memory_vectors"
+    );
+    const now = 1_700_000_000_000;
+    const metadata = buildMetadata(now);
+
+    firestore.setRawDocument("memory_vectors", "memory-existing", {
+      content: "Existing Ktor networking memory.",
+      retrieval_text: "Existing Ktor networking memory.",
+      embedding: [1, 0, 0],
+      metadata
+    });
+    firestore.setRawDocument("memory_vectors_write_fingerprints", "fingerprint-1", {
+      id: "memory-existing",
+      expires_at: now + 15 * 60 * 1000,
+      updated_at: now
+    });
+
+    const result = await repository.store({
+      content: "New Ktor networking memory.",
+      retrievalText: "New Ktor networking memory.",
+      embedding: [1, 0, 0],
+      idempotencyKey: "fingerprint-1",
+      metadata
+    });
+
+    expect(result.created).toBe(false);
+    expect(result.document.id).toBe("memory-existing");
+    expect(result.document.content).toBe("Existing Ktor networking memory.");
+  });
+});
+
+function buildMetadata(now: number): MemoryMetadata {
+  return {
+    module_name: "kmp-networking",
+    branch_state: "active",
+    created_at: now,
+    updated_at: now,
+    modality: "text"
+  };
+}
+
+class FakeFirestore {
+  private readonly collections = new Map<string, Map<string, Record<string, unknown>>>();
+  private nextId = 1;
+
+  collection(name: string): FakeCollectionReference {
+    return new FakeCollectionReference(this, name);
+  }
+
+  async runTransaction<T>(
+    callback: (transaction: FakeTransaction) => Promise<T>
+  ): Promise<T> {
+    return callback(new FakeTransaction(this));
+  }
+
+  getRawDocument(
+    collectionName: string,
+    documentId: string
+  ): Record<string, unknown> | undefined {
+    return this.collections.get(collectionName)?.get(documentId);
+  }
+
+  setRawDocument(
+    collectionName: string,
+    documentId: string,
+    data: Record<string, unknown>
+  ): void {
+    this.ensureCollection(collectionName).set(documentId, data);
+  }
+
+  createDocumentId(): string {
+    return `memory-${this.nextId++}`;
+  }
+
+  private ensureCollection(name: string): Map<string, Record<string, unknown>> {
+    let collection = this.collections.get(name);
+
+    if (!collection) {
+      collection = new Map<string, Record<string, unknown>>();
+      this.collections.set(name, collection);
+    }
+
+    return collection;
+  }
+}
+
+class FakeCollectionReference {
+  constructor(
+    private readonly firestore: FakeFirestore,
+    private readonly name: string
+  ) {}
+
+  doc(documentId?: string): FakeDocumentReference {
+    return new FakeDocumentReference(
+      this.firestore,
+      this.name,
+      documentId ?? this.firestore.createDocumentId()
+    );
+  }
+}
+
+class FakeDocumentReference {
+  constructor(
+    private readonly firestore: FakeFirestore,
+    readonly collectionName: string,
+    readonly id: string
+  ) {}
+
+  get data(): Record<string, unknown> | undefined {
+    return this.firestore.getRawDocument(this.collectionName, this.id);
+  }
+}
+
+class FakeTransaction {
+  constructor(private readonly firestore: FakeFirestore) {}
+
+  async get(ref: FakeDocumentReference): Promise<FakeDocumentSnapshot> {
+    return new FakeDocumentSnapshot(ref.id, ref.data);
+  }
+
+  set(ref: FakeDocumentReference, data: Record<string, unknown>): void {
+    this.firestore.setRawDocument(ref.collectionName, ref.id, data);
+  }
+}
+
+class FakeDocumentSnapshot {
+  constructor(
+    readonly id: string,
+    private readonly value: Record<string, unknown> | undefined
+  ) {}
+
+  get exists(): boolean {
+    return Boolean(this.value);
+  }
+
+  data(): Record<string, unknown> | undefined {
+    return this.value;
+  }
+}
diff --git a/functions/test/service.test.ts b/functions/test/service.test.ts
index 383a3d0..451f28b 100644
--- a/functions/test/service.test.ts
+++ b/functions/test/service.test.ts
@@ -194,6 +194,38 @@ describe("MetaCortexService", () => {
     expect(fetched.item.content).toBe(stored.content);
   });
 
+  it("fetches a stored document by document_id compatibility alias", async () => {
+    const { service } = createService();
+    const stored = await service.storeContext({
+      content: "Ktor networking pattern.",
+      module_name: "kmp-networking",
+      branch_state: "active"
+    });
+    const fetched = await service.fetchContext({ document_id: stored.id });
+
+    expect(fetched.item.id).toBe(stored.id);
+    expect(fetched.item.content).toBe(stored.content);
+  });
+
+  it("rejects conflicting fetch id aliases", async () => {
+    const { service } = createService();
+
+    await expect(
+      service.fetchContext({
+        id: "memory-1",
+        document_id: "memory-2"
+      })
+    ).rejects.toThrow("id and document_id must match");
+  });
+
+  it("rejects fetch without an id", async () => {
+    const { service } = createService();
+
+    await expect(service.fetchContext({})).rejects.toThrow(
+      "id or document_id must be provided"
+    );
+  });
+
   it("fetches rejects unknown id", async () => {
     const { service } = createService();
 
@@ -297,6 +329,47 @@ describe("MetaCortexService", () => {
       expect(result.merged_content).toContain("Kubernetes");
     });
 
+    it("deduplicates explicit source_ids before consolidation", async () => {
+      const { service } = createService();
+
+      const a = await service.storeContext({
+        content: "Active learning goal: Xcode literacy.",
+        module_name: "learning",
+        branch_state: "active"
+      });
+      const b = await service.storeContext({
+        content: "Active learning goal: Kubernetes basics.",
+        module_name: "learning",
+        branch_state: "active"
+      });
+
+      const result = await service.consolidateContext({
+        topic: "learning",
+        source_ids: [a.id, a.id, b.id]
+      });
+
+      expect(result.source_count).toBe(2);
+      expect(result.deprecated_ids).toEqual([a.id, b.id]);
+      expect(result.merged_content.match(/Xcode/g)).toHaveLength(1);
+    });
+
+    it("throws 422 when explicit source_ids collapse below 2 unique sources", async () => {
+      const { service } = createService();
+
+      const a = await service.storeContext({
+        content: "Active learning goal: Xcode literacy.",
+        module_name: "learning",
+        branch_state: "active"
+      });
+
+      await expect(
+        service.consolidateContext({
+          topic: "learning",
+          source_ids: [a.id, a.id]
+        })
+      ).rejects.toThrow("At least 2 source memories are required");
+    });
+
     it("defaults topic to general when not provided", async () => {
       const { service } = createService();
 
diff --git a/functions/test/support/fakes.ts b/functions/test/support/fakes.ts
index 552a180..71ea377 100644
--- a/functions/test/support/fakes.ts
+++ b/functions/test/support/fakes.ts
@@ -220,10 +220,13 @@ export class InMemoryToolCallObserver implements ToolCallObserver {
   private readonly events: ObservabilityEvent[] = [];
 
   async record(input: RecordToolCallEventInput): Promise<void> {
+    const timestamp = input.timestamp ?? Date.now();
+
     this.events.push({
       event_id: `event-${this.nextId++}`,
       event_type: "tool_call",
-      timestamp: input.timestamp ?? Date.now(),
+      timestamp,
+      expires_at: new Date(timestamp + 90 * 24 * 60 * 60 * 1000),
       client_id: input.client_id,
       tool_name: input.tool_name,
       status: input.status,
@@ -237,10 +240,13 @@ export class InMemoryToolCallObserver implements ToolCallObserver {
   }
 
   async recordRequest(input: RecordRequestEventInput): Promise<void> {
+    const timestamp = input.timestamp ?? Date.now();
+
     this.events.push({
       event_id: `event-${this.nextId++}`,
       event_type: "request",
-      timestamp: input.timestamp ?? Date.now(),
+      timestamp,
+      expires_at: new Date(timestamp + 90 * 24 * 60 * 60 * 1000),
       client_id: input.client_id,
       method: input.method,
       path: input.path,
@@ -274,7 +280,8 @@ export function createTestConfig(overrides: Partial<AppConfig> = {}): AppConfig
     authToken,
     geminiApiKey: "test-gemini-key",
     embeddingModel: "text-embedding-004",
-    multimodalModel: "gemini-3.1-flash-lite-preview",
+    multimodalModel: "gemini-3.1-flash-lite",
+    generationVertexLocation: "global",
     embeddingDimensions: 768,
     memoryCollection: "memory_vectors",
     topK: 5,
diff --git a/metacortexplan.md b/metacortexplan.md
index c7be508..7811ace 100644
--- a/metacortexplan.md
+++ b/metacortexplan.md
@@ -1,13 +1,12 @@
-# MetaCortex: Strategic Plan — Cut, Keep, Rethink
+# MetaCortex: Strategic Plan
 
-**Date:** 2026-03-22
-**Context:** Codebase audit + competitive landscape analysis (Mem0, Letta, Zep/Graphiti, OpenViking, Cognee, Supermemory)
+**Date:** 2026-06-11
 
 ---
 
 ## Scope Philosophy
 
-MetaCortex is a **user memory** system — it stores what the user *knows*, not what the user *has access to*. Email, calendars, documents, and external data sources are for the agent to dig through on demand and advise the user on. MetaCortex persists the durable knowledge that results from those interactions: decisions made, context learned, facts established, things deprecated.
+MetaCortex is a **user memory** system — it stores what the user *knows*, not what the user *has access to*. Email, calendars, documents, and external data sources are for the agent to dig through on demand and advise the user on. MetaCortex persists the durable knowledge that results from those interactions: preferences, decisions made, context learned, facts established, things deprecated.
 
 This means MetaCortex will never include connectors (GDrive, Gmail, Notion), ingestion pipelines, browser extensions, or document indexing. That's a different product (and it's what supermemory, Mem0, and others are building). MetaCortex's scope is: **durable memories that persist across agent sessions, with explicit lifecycle control.**
 
@@ -15,149 +14,52 @@ This means MetaCortex will never include connectors (GDrive, Gmail, Notion), ing
 
 ## Executive Summary
 
-MetaCortex is a well-architected MCP memory server in rapid early development. The core — vector search, idempotent writes, client profile scoping, multimodal pipeline — is solid. But the 6-tool surface has redundancy, some features won't scale on Firestore, and the competitive landscape reveals both gaps to close and differentiators to double down on.
+MetaCortex is a well-architected MCP memory server. The core — vector search, idempotent writes, client profile scoping, and multimodal pipeline — is solid. The tool surface has been simplified from 6 tools to 4.
 
-Progress since this plan was drafted:
-
-- SSE transport has been removed. Streamable HTTP is now the only supported transport.
-- Response formats have been normalized to JSON across the remaining MCP tools.
-- `store_context` has been removed from the MCP surface. `remember_context` is now the single write tool and supports optional explicit `branch_state` for advanced admin workflows.
-- `get_consolidation_queue` has been removed from the MCP surface. WIP consolidation is now an internal maintenance workflow.
-- `fetch_context` no longer exposes `retrieval_text` in its public payload.
-
-Remaining near-term work:
-
-- add TTL policies for unbounded Firestore collections
-- simplify search payloads before investing in tiering and temporal validity
+The first hardening release addressed Firestore collection scaling, payload optimization, and model validation. The remaining strategic work is focused on proposed advanced features (context tiering, temporal validity).
 
 ---
 
-## KEEP — What's Working
-
-### 1. Core write/search/fetch loop
-The `store → search → fetch` pipeline is clean, tested, and correct. Fingerprint-based idempotency prevents duplicates. Asymmetric embedding (RETRIEVAL_DOCUMENT vs RETRIEVAL_QUERY task types) is a smart use of Vertex AI. The integration tests using real MCP SDK transports are unusually thorough.
+## Outstanding Tasks & Redesigns
 
-### 2. Client profile scoping
-Per-client `allowedTools`, `allowedFilterStates`, and CORS origins is a genuinely useful multi-tenant model. None of the competitors do this well — Mem0 has basic user/agent separation, but nothing as granular as MetaCortex's profile system. This is a differentiator for enterprise-ish deployments.
+### 1. Unbounded Firestore Collections (TTL Policies)
+* **Status:** Implemented 2026-06-11
+* **Problem:** Two collections grow without bound:
+  * `memory_vectors_write_fingerprints` (deduplication fingerprints)
+  * `memory_events` (observability/audit trail)
+* **Resolution:**
+  * New fingerprint writes store numeric `dedupe_expires_at` for the 15-minute duplicate window and Date-valued `expires_at` for 30-day Firestore TTL.
+  * New `memory_events` writes preserve numeric `timestamp` and add Date-valued `expires_at` for 90-day Firestore TTL.
+  * Added dry-run/write TTL backfill and `gcloud` TTL deployment scripts.
 
-### 3. Gemini multimodal pipeline
-**This is your clearest competitive advantage.** Mem0, Letta, Graphiti, and Cognee are all text-only. OpenViking supports multimodal but through a different approach (binary storage with text summaries). MetaCortex's Gemini-powered image normalization into searchable text is a real capability gap in the market. Keep and improve.
+### 2. Search Result Redundancy
+* **Status:** Implemented 2026-06-11
+* **Problem:** Each search result includes both `summary` (220 chars) and `content_preview` (400 chars) — two truncations of the same content. Wastes tokens and confuses clients.
+* **Resolution:** `search_context` now returns `summary` only. If the agent wants full content, it calls `fetch_context`.
 
-### 4. MCP protocol compliance
-Being a first-class MCP server is the right distribution strategy. Mem0 and Letta have their own SDKs and APIs. MetaCortex plugs into any MCP client (Claude, ChatGPT via custom GPT, etc.) without custom integration. This is a moat via ecosystem alignment.
-
-### 5. Observability / audit trail
-`memory_events` collection with tool call timing and request metadata is solid operational infrastructure. Needs TTL policies (see Rethink section) but the concept is right.
+### 3. Model Default Validation
+* **Status:** Implemented 2026-06-11
+* **Problem:** Verify the `gemini-3.1-flash-lite-preview` multimodal model default still exists and is the right choice.
+* **Resolution:** Google shut down `gemini-3.1-flash-lite-preview` on 2026-05-25. The default is now stable `gemini-3.1-flash-lite`, with a live validation script.
 
 ---
 
-## CUT — Features to Eliminate
-
-### 1. `store_context` — remove from MCP surface
-
-**Status:** Completed on 2026-03-22.
+## Proposed New Capabilities (Invest)
 
-**Why cut:** Two write tools with overlapping behavior create contract drift, duplicate testing, and needless client complexity. The public-facing schema should use terms clients understand (`topic`, `draft`, `remember`) while still allowing explicit lifecycle control for admin use cases.
+### 1. Context Tiering (L0/L1/L2 equivalent)
+* **Status:** Proposed
+* **Goal:** Reduce token costs by returning a summary first, fetching full details only when needed.
+* **Proposal:** When storing a memory, use Gemini to generate:
+  * A `summary` field (~100 tokens) stored alongside the full content.
+  * The existing `content` (full fidelity) remains for fetch.
+  * Search results return the summary. Agents call `fetch_context` only when they need the full thing.
+* **Effort:** Medium.
 
-- `remember_context` is the clearer public verb
-- `topic` is easier for clients than `module_name`
-- one write tool eliminates divergent schemas and examples
-- advanced lifecycle control still matters, so the surviving write tool must support explicit `branch_state`
-
-**What changed:** `store_context` was removed from the MCP tool surface. `remember_context` now handles both simple writes and advanced writes via optional `branch_state`. This reduced the tool surface from 6 to 5.
-
-### 2. `get_consolidation_queue` — remove entirely
-
-**Status:** Completed on 2026-03-22.
-
-**Why cut:** This tool exposes an internal workflow concept ("consolidation") that doesn't match how any current agent memory system actually works in practice. The intended workflow is: agent stores rough notes as `wip` → later reviews the queue → consolidates into canonical `active` memories → deprecates the originals.
-
-Problems:
-- No agent today will spontaneously run a consolidation workflow. This requires complex multi-step reasoning about its own memory hygiene.
-- The tool has no result limit — a large WIP backlog returns everything in one call
-- The response format is pipe-delimited flat text (inconsistent with everything else)
-- Neither Mem0, Letta, Zep, nor OpenViking expose a "consolidation queue" concept. They all handle memory evolution automatically or not at all.
-- The `wip` branch state itself is fine to keep (useful for draft/scratch memories), but the explicit queue tool for reviewing them is over-engineering a workflow that won't happen organically
-
-**What changed:** The tool was removed from the MCP surface. `wip` remains a valid `branch_state`, but reviewing or consolidating WIP memories is now an internal maintenance workflow rather than a client-facing tool. This reduced the surface from 5 tools to 4.
-
----
-
-## RETHINK — Features That Need Redesign
-
-### 1. SSE transport on Cloud Functions
-
-**Status:** Completed on 2026-03-22.
-
-**Problem:** SSE sessions are stored in a module-level `Map`. Cloud Functions runs multiple instances. An SSE connection on instance A won't be found when a subsequent POST lands on instance B. This is a fundamental mismatch between stateful SSE and stateless serverless.
-
-**Competitors' approach:** Most competitor servers either run on persistent processes (not serverless) or use only stateless transports.
-
-**What changed:** SSE transport and its message endpoints were removed. Streamable HTTP is now the only supported transport.
-
-### 2. Unbounded Firestore collections
-
-**Status:** Not started.
-
-Two collections grow without bound:
-- `memory_vectors_write_fingerprints` — deduplication fingerprints with `expires_at` set but nothing deleting them
-- `memory_events` — observability/audit trail with no TTL
-
-**Recommendation:**
-- Enable Firestore TTL policies on both collections. Fingerprints can expire after 30 days (if you're going to store a duplicate, it'll happen within hours, not months). Events can expire after 90 days.
-- Add a `limit` parameter to `getConsolidationQueue` (if you keep it) or any future list operations.
-
-### 3. Response format inconsistency
-
-**Status:** Completed on 2026-03-22.
-
-`store_context` and `deprecate_context` return flat key=value text. Everything else returns JSON. This is a tax on every client.
-
-**What changed:** All remaining MCP tools now return JSON payloads.
-
-### 4. `retrieval_text` exposure in fetch responses
-
-**Status:** Completed on 2026-03-22.
-
-For text memories, `retrieval_text` duplicates `content`. For multimodal memories, it's a Gemini-generated artifact the client can't meaningfully use. Exposing it leaks implementation detail.
-
-**What changed:** `fetch_context` now returns canonical `content` plus public metadata only. Internal `retrieval_text` is still stored for embeddings and storage internals, but it is no longer part of the public MCP payload.
-
-### 5. Search result redundancy
-
-**Status:** Not started.
-
-Each search result includes both `summary` (220 chars) and `content_preview` (400 chars) — two truncations of the same content. Wastes tokens and confuses clients about which to use.
-
-**Recommendation:** Keep only `content_preview` (or rename to `preview`). If the agent wants the full content, it calls `fetch_context`. Two levels of truncation serve no purpose.
-
----
-
-## INVEST — New Capabilities Worth Building
-
-### 1. Context tiering (inspired by OpenViking's L0/L1/L2)
-
-**Why:** OpenViking's most compelling idea is that not all context needs to be loaded at full fidelity. Their L0/L1/L2 system (100-token abstract → 2k-token overview → full content) claims 95% token cost reduction.
-
-MetaCortex already has a primitive version of this: `search_context` returns truncated previews, and `fetch_context` returns full content. But it's not designed as a tiered system — it's an accident of truncation.
-
-**Proposal:** When storing a memory, use Gemini to generate:
-- A `summary` field (~100 tokens) stored alongside the full content
-- The existing `content` (full fidelity) remains for fetch
-
-Search results return the summary. Agents call `fetch_context` only when they need the full thing. This is achievable with one additional Gemini call at write time and gives you OpenViking's core token-efficiency benefit without their filesystem complexity.
-
-**Effort:** Medium. One new field in the Firestore schema, one Gemini call in the write path, update search response format.
-
-### 2. Temporal validity / fact versioning (inspired by Graphiti)
-
-**Why:** Graphiti's bi-temporal tracking (when a fact was recorded vs. when it was true) is the most sophisticated approach in the market. MetaCortex has `created_at` and `updated_at` timestamps but no concept of "this fact was true from X to Y."
-
-Without this, an agent searching for "what's our auth strategy?" might get both the old strategy and the new one, with no way to know which is current beyond `branch_state`.
-
-**Proposal:** Add optional `valid_from` and `valid_until` fields to stored memories. Search results can filter by temporal validity. The `deprecate_context` tool already captures "superseded by" — extending this with temporal bounds is a natural fit.
-
-**Effort:** Low-medium. Two optional fields, one new filter in search, update deprecation to set `valid_until` automatically.
+### 2. Temporal Validity / Fact Versioning
+* **Status:** Proposed
+* **Goal:** Enable the agent to distinguish old facts from current ones beyond `branch_state`.
+* **Proposal:** Add optional `valid_from` and `valid_until` fields to stored memories. Search results can filter by temporal validity. Update deprecation to set `valid_until` automatically.
+* **Effort:** Low-medium.
 
 ---
 
@@ -195,7 +97,7 @@ These should be addressed regardless of strategic direction:
 4. **Partially fixed:** stale `CLAUDE.md` descriptions were refreshed where touched by the contract cleanup.
 5. **Fixed:** `WWW-Authenticate` realm no longer uses the old placeholder service name.
 6. **Fixed:** default `serviceName` no longer uses the old placeholder service name.
-7. **Pending:** verify the `gemini-3.1-flash-lite-preview` multimodal model default still exists and is the right choice.
+7. **Fixed:** replaced the shut-down `gemini-3.1-flash-lite-preview` multimodal model default with stable `gemini-3.1-flash-lite` and added live model validation.
 
 ---
 
@@ -208,4 +110,17 @@ These should be addressed regardless of strategic direction:
 | `fetch_context` | Get full content by ID | read-only |
 | `deprecate_context` | Soft-delete with supersession tracking | destructive |
 
-The current MCP surface is down from 6 tools to 4.
+---
+
+## Completed Work (Archived)
+
+* **SSE Transport Removal:** Streamable HTTP is now the only supported transport; stateful SSE endpoints removed. (Completed: 2026-03-22)
+* **Response Normalization:** All remaining MCP tools normalized to return JSON payloads instead of flat key=value text. (Completed: 2026-03-22)
+* **`store_context` Elimination:** Removed from MCP surface; `remember_context` is the unified write tool. (Completed: 2026-03-22)
+* **`get_consolidation_queue` Removal:** Removed from MCP surface; WIP queue is now an internal workflow. (Completed: 2026-03-22)
+* **`retrieval_text` Exposure Fix:** Removed `retrieval_text` from public `fetch_context` response to prevent leaking implementation details. (Completed: 2026-03-22)
+* **Roadmap Hardening Release:** Added Firestore TTL-ready fields and scripts, removed `content_preview` from search payloads, added `document_id` fetch compatibility, updated Gemini multimodal defaults, deployed production TTL policies, and verified production smoke tests. (Completed: 2026-06-11)
+* **Codebase Bugs Fixed:**
+  * Fixed environment variable naming mismatch (`MCP_ADMIN_TOKEN` vs `MCP_AUTH_TOKEN`).
+  * Updated references in `CLAUDE.md` from `openBrainMcp` to `metaCortexMcp`.
+  * Standardized `WWW-Authenticate` realm and default `serviceName` to use the correct service name.
diff --git a/scripts/deploy-firestore-ttl.sh b/scripts/deploy-firestore-ttl.sh
new file mode 100755
index 0000000..634eacf
--- /dev/null
+++ b/scripts/deploy-firestore-ttl.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+PROJECT_ID="${PROJECT_ID:-my-brain-88870}"
+MEMORY_COLLECTION="${MEMORY_COLLECTION:-memory_vectors}"
+DRY_RUN="false"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --project)
+      PROJECT_ID="$2"
+      shift 2
+      ;;
+    --memory-collection)
+      MEMORY_COLLECTION="$2"
+      shift 2
+      ;;
+    --dry-run)
+      DRY_RUN="true"
+      shift
+      ;;
+    *)
+      echo "Unknown argument: $1" >&2
+      exit 1
+      ;;
+  esac
+done
+
+collection_groups=(
+  "${MEMORY_COLLECTION}_write_fingerprints"
+  "memory_events"
+)
+
+for collection_group in "${collection_groups[@]}"; do
+  command=(
+    gcloud firestore fields ttls update
+    expires_at
+    "--collection-group=${collection_group}"
+    --enable-ttl
+    "--project=${PROJECT_ID}"
+    --async
+  )
+
+  if [[ "$DRY_RUN" == "true" ]]; then
+    printf 'dry-run:'
+    printf ' %q' "${command[@]}"
+    printf '\n'
+  else
+    "${command[@]}"
+  fi
+done
+
+echo "Verify with:"
+echo "gcloud firestore fields ttls list --project=${PROJECT_ID}"