Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions firestore.indexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,40 @@
}
]
},
{
"collectionGroup": "publishedTestimony",
"queryScope": "COLLECTION_GROUP",
"fields": [
{
"fieldPath": "billId",
"order": "ASCENDING"
},
{
"fieldPath": "vector_embedding",
"vectorConfig": {
"dimension": 3072,
"flat": {}
}
}
]
},
{
"collectionGroup": "publishedTestimony",
"queryScope": "COLLECTION_GROUP",
"fields": [
{
"fieldPath": "ballotQuestionId",
"order": "ASCENDING"
},
{
"fieldPath": "vector_embedding",
"vectorConfig": {
"dimension": 3072,
"flat": {}
}
}
]
},
{
"collectionGroup": "archivedTestimony",
"queryScope": "COLLECTION",
Expand Down Expand Up @@ -923,6 +957,45 @@
"queryScope": "COLLECTION_GROUP"
}
]
},
{
"collectionGroup": "bills",
"fieldPath": "vector_embedding",
"indexes": [
{
"queryScope": "COLLECTION_GROUP",
"vectorConfig": {
"dimension": 3072,
"flat": {}
}
}
]
},
{
"collectionGroup": "publishedTestimony",
"fieldPath": "vector_embedding",
"indexes": [
{
"queryScope": "COLLECTION_GROUP",
"vectorConfig": {
"dimension": 3072,
"flat": {}
}
}
]
},
{
"collectionGroup": "ballotQuestions",
"fieldPath": "vector_embedding",
"indexes": [
{
"queryScope": "COLLECTION",
"vectorConfig": {
"dimension": 3072,
"flat": {}
}
}
]
}
]
}
3 changes: 2 additions & 1 deletion functions/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"assemblyai": "^4.9.0",
"axios": "^0.25.0",
"date-fns": "^2.30.0",
"firebase-admin": "^11.11.1",
"firebase-admin": "^12.0.0",
"@google-cloud/aiplatform": "^3.9.0",
"firebase-functions": "^5.1.1",
"fluent-ffmpeg": "^2.1.3",
"fuse.js": "6.5.3",
Expand Down
8 changes: 8 additions & 0 deletions functions/src/ballotQuestions/vector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { createVectorIndexer } from "../search/createVectorIndexer"

export const syncBallotQuestionToVectorIndex = createVectorIndexer({
documentTrigger: "ballotQuestions/{id}",
textFields: ["title", "description", "fullSummary"],
vectorField: "vector_embedding",
titleField: "title"
})
8 changes: 8 additions & 0 deletions functions/src/bills/vector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { createVectorIndexer } from "../search/createVectorIndexer"

export const syncBillToVectorIndex = createVectorIndexer({
documentTrigger: "generalCourts/{court}/bills/{id}",
textFields: ["content.Title", "content.DocumentText"],
vectorField: "vector_embedding",
titleField: "content.Title"
})
98 changes: 98 additions & 0 deletions functions/src/search/createVectorIndexer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import { runWith } from "firebase-functions"
import * as admin from "firebase-admin"
import { PredictionServiceClient, helpers } from "@google-cloud/aiplatform"
import hash from "object-hash"

export interface VectorIndexerConfig {
documentTrigger: string
textFields: string[] // Fields to combine for the embedding
vectorField: string // Destination field for the embedding (e.g., 'vector_embedding')
titleField?: string // Optional field to use as the title for prefixing
}

export function createVectorIndexer(config: VectorIndexerConfig) {
const location = "us-central1"
const publisher = "google"
const model = "gemini-embedding-2"

return runWith({
timeoutSeconds: 60,
memory: "512MB",
secrets: ["GOOGLE_APPLICATION_CREDENTIALS"] // If needed, though usually automatic on GCP
})
.firestore.document(config.documentTrigger)
.onWrite(async change => {
const data = change.after.exists ? change.after.data() : null
if (!data) return // Deleted

// Extract text to embed
const textToEmbed = config.textFields
.map(field => {
const parts = field.split(".")
let val: any = data
for (const part of parts) val = val?.[part]
return val
})
.filter(Boolean)
.join("\n\n")

if (!textToEmbed) return

// Extract title for gemini-embedding-2 prefixing
let title = "none"
if (config.titleField) {
const parts = config.titleField.split(".")
let val: any = data
for (const part of parts) val = val?.[part]
title = val || "none"
}

// Check if text has changed to avoid redundant API calls
const textHash = hash({ textToEmbed, title })
const previousHash = (
change.before.exists ? change.before.data() : null
)?.[`${config.vectorField}_hash`]

if (textHash === previousHash && data[config.vectorField]) {
return // Nothing changed
}

// Initialize Vertex AI client
const project = admin.app().options.projectId
const endpoint = `projects/${project}/locations/${location}/publishers/${publisher}/models/${model}`
const client = new PredictionServiceClient({
apiEndpoint: `${location}-aiplatform.googleapis.com`
})

// Get embedding with multimodal/task prefix
const formattedText = `title: ${title} | text: ${textToEmbed}`
const instance = helpers.toValue({ content: formattedText })!
const responseArray = (await client.predict({
endpoint,
instances: [instance]
})) as any
const response = responseArray[0]

if (!response.predictions || response.predictions.length === 0) {
throw new Error("No predictions returned from Vertex AI")
}

const prediction = helpers.fromValue(
response.predictions[0] as any
) as any
const embedding =
prediction.embeddings?.values || prediction.embedding?.values

if (!embedding) {
throw new Error(
`Unexpected prediction format: ${JSON.stringify(prediction)}`
)
}

// Update document
await change.after.ref.update({
[config.vectorField]: embedding,
[`${config.vectorField}_hash`]: textHash // Store hash to track changes
})
})
}
8 changes: 8 additions & 0 deletions functions/src/testimony/vector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { createVectorIndexer } from "../search/createVectorIndexer"

export const syncTestimonyToVectorIndex = createVectorIndexer({
documentTrigger: "users/{uid}/publishedTestimony/{id}",
textFields: ["content"],
vectorField: "vector_embedding",
titleField: "billTitle"
})
Loading
Loading