superdoc-dev
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rag/.env.example‎
Lines changed: 3 additions & 0 deletions b/‎rag/.env.example‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎rag/README.md‎
Lines changed: 15 additions & 11 deletions b/‎rag/README.md‎
Lines changed: 15 additions & 11 deletions
diff --git a/‎rag/apps/api/package.json‎
Lines changed: 2 additions & 1 deletion b/‎rag/apps/api/package.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎rag/apps/api/schema.sql‎
Lines changed: 7 additions & 4 deletions b/‎rag/apps/api/schema.sql‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎rag/apps/api/src/cf/neon.ts‎
Lines changed: 147 additions & 0 deletions b/‎rag/apps/api/src/cf/neon.ts‎
Lines changed: 147 additions & 0 deletions
@@ -10,7 +10,7 @@ Each demo is a standalone app that showcases a real-world use case: extracting d
 
 Ask your documents. Get cited answers. Upload `.docx` files and get AI-powered answers with citations that scroll to the exact paragraph, comment, or tracked change in the source document.
 
-**Stack**: Cloudflare Workers + R2 + D1 + Vectorize, React, SuperDoc, Claude, OpenAI embeddings
+**Stack**: Cloudflare Workers + R2, PostgreSQL + pgvector, React, SuperDoc, Claude, OpenAI embeddings
 
 **What it shows**:
 - Extract text, comments, and tracked changes from `.docx` files using the SuperDoc SDK
 
@@ -1,3 +1,6 @@
+# Database (PostgreSQL + pgvector)
+DATABASE_URL=postgresql://...
+
 # API keys
 OPENAI_API_KEY=sk-...
 ANTHROPIC_API_KEY=sk-ant-...
 
@@ -9,8 +9,8 @@ Upload `.docx` files, ask questions in natural language, and get answers with ci
 1. **Upload** `.docx` files through the UI
 2. **Extract** text, comments, and tracked changes using the [SuperDoc SDK](https://docs.superdoc.dev)
 3. **Chunk** each paragraph with its stable node ID, embed with OpenAI
-4. **Store** chunks in Cloudflare Vectorize, metadata in D1, files in R2
-5. **Query** — ask a question, relevant chunks are retrieved via vector search
+4. **Store** chunks with embeddings in PostgreSQL + pgvector, files in R2
+5. **Query** — ask a question, relevant chunks are retrieved via vector similarity search
 6. **Answer** — Claude generates a response with `[cite:ID]` references
 7. **Navigate** — click a citation to scroll to the source in the SuperDoc viewer
 
@@ -20,19 +20,22 @@ Upload `.docx` files, ask questions in natural language, and get answers with ci
 apps/
   api/              Cloudflare Worker — query, documents, file serving
   web/              React frontend — document viewer + chat sidebar
-  ingest-service/   Docker service — automated extraction for VM deployment
+  ingest/   Docker service — automated extraction for VM deployment
 packages/
   shared/           SuperDoc extraction, chunking, embedding client
 docs/               Sample .docx files
 ```
 
+**Stack**: Cloudflare Workers, PostgreSQL + pgvector, Cloudflare R2, React, SuperDoc, Claude, OpenAI embeddings
+
 ## Quick Start
 
 ### Prerequisites
 
 - [Bun](https://bun.sh) v1.1+
 - [Wrangler](https://developers.cloudflare.com/workers/wrangler/) (installed automatically)
 - Cloudflare account (free tier works)
+- PostgreSQL database with [pgvector](https://github.com/pgvector/pgvector)
 - OpenAI API key (for embeddings)
 - Anthropic API key (for Claude)
 
@@ -41,18 +44,19 @@ docs/               Sample .docx files
 ```bash
 bun install
 
-# Create Cloudflare resources
-cd apps/api
-npx wrangler d1 create docrag
-# Copy the database_id into wrangler.toml
+# Create the database schema (run against your Neon database)
+psql $DATABASE_URL -f apps/api/schema.sql
 
-npx wrangler d1 execute docrag --local --file=schema.sql
-npx wrangler vectorize create rag-chunks --dimensions=1536 --metric=cosine
+# Create Cloudflare R2 bucket
+cd apps/api
+npx wrangler r2 bucket create rag-demo-docs
 
 # Add secrets for local dev
 cat > .dev.vars << EOF
+DATABASE_URL=postgresql://...your-connection-string...
 OPENAI_API_KEY=sk-...
 ANTHROPIC_API_KEY=sk-ant-...
+INGEST_SERVICE_URL=http://localhost:4000
 EOF
 cd ../..
 ```
@@ -86,9 +90,9 @@ Try these across the sample documents:
 ```bash
 # Deploy API Worker
 cd apps/api
+wrangler secret put DATABASE_URL
 wrangler secret put OPENAI_API_KEY
 wrangler secret put ANTHROPIC_API_KEY
-wrangler d1 execute docrag --remote --file=schema.sql
 wrangler deploy
 
 # Deploy frontend to Cloudflare Pages
@@ -101,7 +105,7 @@ bun run deploy:web
 For automated ingestion, deploy the Docker service to a VM:
 
 ```bash
-docker build -f apps/ingest-service/Dockerfile -t docrag-ingest .
+docker build -f apps/ingest/Dockerfile -t docrag-ingest .
 docker run -d \
   -e API_URL=https://docrag-api.<account>.workers.dev \
   -e OPENAI_API_KEY=sk-... \
 
@@ -10,7 +10,8 @@
 	},
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.30.0",
-		"@docrag/shared": "workspace:*"
+		"@docrag/shared": "workspace:*",
+		"@neondatabase/serverless": "^1.0.2"
 	},
 	"devDependencies": {
 		"@cloudflare/workers-types": "^4.20250326.0",
 
@@ -1,23 +1,26 @@
+CREATE EXTENSION IF NOT EXISTS vector;
+
 CREATE TABLE IF NOT EXISTS documents (
-  id INTEGER PRIMARY KEY,
+  id BIGINT PRIMARY KEY,
   filename TEXT NOT NULL,
   r2_key TEXT NOT NULL,
   file_hash TEXT,
   status TEXT DEFAULT 'ready',
-  created_at TEXT DEFAULT (datetime('now'))
+  created_at TIMESTAMPTZ DEFAULT now()
 );
 
 CREATE TABLE IF NOT EXISTS chunks (
   id TEXT PRIMARY KEY,
-  document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+  document_id BIGINT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
   block_id TEXT NOT NULL,
   target_id TEXT NOT NULL,
   target_type TEXT NOT NULL,
   node_type TEXT NOT NULL,
   content TEXT NOT NULL,
   context_type TEXT DEFAULT 'body',
   metadata TEXT DEFAULT '{}',
-  created_at TEXT DEFAULT (datetime('now'))
+  embedding vector(1536),
+  created_at TIMESTAMPTZ DEFAULT now()
 );
 
 CREATE INDEX IF NOT EXISTS idx_chunks_document ON chunks(document_id);
 
@@ -0,0 +1,147 @@
+import { neon } from "@neondatabase/serverless";
+
+export type DocumentRow = {
+	id: number;
+	filename: string;
+	r2Key: string;
+	fileHash: string | null;
+	status: string;
+	createdAt: string;
+};
+
+export function createNeonClient(databaseUrl: string) {
+	const sql = neon(databaseUrl);
+
+	return {
+		async insertDocument(
+			id: number,
+			filename: string,
+			r2Key: string,
+			status = "ready",
+			fileHash: string | null = null,
+		): Promise<void> {
+			await sql`INSERT INTO documents (id, filename, r2_key, status, file_hash) VALUES (${id}, ${filename}, ${r2Key}, ${status}, ${fileHash})`;
+		},
+
+		async findByHash(hash: string): Promise<DocumentRow | null> {
+			const rows =
+				await sql`SELECT id, filename, r2_key, file_hash, status, created_at FROM documents WHERE file_hash = ${hash} LIMIT 1`;
+			if (rows.length === 0) return null;
+			const r = rows[0];
+			return {
+				id: r.id,
+				filename: r.filename,
+				r2Key: r.r2_key,
+				fileHash: r.file_hash,
+				status: r.status,
+				createdAt: r.created_at,
+			};
+		},
+
+		async insertChunks(
+			chunks: Array<{
+				id: string;
+				documentId: number;
+				blockId: string;
+				targetId: string;
+				targetType: string;
+				nodeType: string;
+				content: string;
+				contextType: string;
+				metadata: string;
+				embedding: number[];
+			}>,
+		): Promise<void> {
+			for (const c of chunks) {
+				const embeddingStr = `[${c.embedding.join(",")}]`;
+				await sql`INSERT INTO chunks (id, document_id, block_id, target_id, target_type, node_type, content, context_type, metadata, embedding) VALUES (${c.id}, ${c.documentId}, ${c.blockId}, ${c.targetId}, ${c.targetType}, ${c.nodeType}, ${c.content}, ${c.contextType}, ${c.metadata}, ${embeddingStr}::vector)`;
+			}
+		},
+
+		async searchChunks(
+			queryEmbedding: number[],
+			limit = 8,
+		): Promise<
+			Array<{
+				id: string;
+				documentId: number;
+				filename: string;
+				blockId: string;
+				targetId: string;
+				targetType: string;
+				nodeType: string;
+				content: string;
+				contextType: string;
+				metadata: string;
+			}>
+		> {
+			const embeddingStr = `[${queryEmbedding.join(",")}]`;
+			const rows = await sql`
+				SELECT c.id, c.document_id, d.filename, c.block_id, c.target_id, c.target_type, c.node_type, c.content, c.context_type, c.metadata
+				FROM chunks c
+				JOIN documents d ON d.id = c.document_id
+				ORDER BY c.embedding <=> ${embeddingStr}::vector
+				LIMIT ${limit}
+			`;
+			return rows.map((r: any) => ({
+				id: r.id,
+				documentId: r.document_id,
+				filename: r.filename,
+				blockId: r.block_id,
+				targetId: r.target_id,
+				targetType: r.target_type,
+				nodeType: r.node_type,
+				content: r.content,
+				contextType: r.context_type,
+				metadata: r.metadata,
+			}));
+		},
+
+		async listDocuments(): Promise<DocumentRow[]> {
+			const rows =
+				await sql`SELECT id, filename, r2_key, file_hash, status, created_at FROM documents ORDER BY created_at DESC`;
+			return rows.map((r: any) => ({
+				id: r.id,
+				filename: r.filename,
+				r2Key: r.r2_key,
+				fileHash: r.file_hash,
+				status: r.status,
+				createdAt: r.created_at,
+			}));
+		},
+
+		async getDocument(id: number): Promise<DocumentRow | null> {
+			const rows =
+				await sql`SELECT id, filename, r2_key, file_hash, status, created_at FROM documents WHERE id = ${id} LIMIT 1`;
+			if (rows.length === 0) return null;
+			const r = rows[0];
+			return {
+				id: r.id,
+				filename: r.filename,
+				r2Key: r.r2_key,
+				fileHash: r.file_hash,
+				status: r.status,
+				createdAt: r.created_at,
+			};
+		},
+
+		async chunkCount(): Promise<number> {
+			const rows = await sql`SELECT COUNT(*) as count FROM chunks`;
+			return Number(rows[0].count);
+		},
+
+		async getChunkIdsByDocument(documentId: number): Promise<string[]> {
+			const rows =
+				await sql`SELECT id FROM chunks WHERE document_id = ${documentId}`;
+			return rows.map((r: any) => r.id);
+		},
+
+		async deleteDocument(id: number): Promise<void> {
+			await sql`DELETE FROM documents WHERE id = ${id}`;
+		},
+
+		async updateDocumentStatus(id: number, status: string): Promise<void> {
+			await sql`UPDATE documents SET status = ${status} WHERE id = ${id}`;
+		},
+	};
+}