Skip to content

Commit 0b052bb

Browse files
committed
fix(connectors): finalize Google Meet transcripts before indexing
- Only index a meeting once every transcript is FILE_GENERATED, so a partial transcript is never stored under an endTime-keyed hash that would never refresh - Sort merged transcript entries by start time to preserve chronology across multiple transcripts in one conference
1 parent c30b0ca commit 0b052bb

1 file changed

Lines changed: 23 additions & 1 deletion

File tree

apps/sim/connectors/google-meet/google-meet.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,16 @@ function recordToStub(record: ConferenceRecord): ExternalDocument {
153153
}
154154
}
155155

156+
/**
157+
* Returns a transcript entry's start time as epoch milliseconds for chronological
158+
* sorting. Entries without a parseable start time sort last (stably).
159+
*/
160+
function entryStartMs(entry: TranscriptEntry): number {
161+
if (!entry.startTime) return Number.POSITIVE_INFINITY
162+
const ms = new Date(entry.startTime).getTime()
163+
return Number.isNaN(ms) ? Number.POSITIVE_INFINITY : ms
164+
}
165+
156166
/**
157167
* Resolves a participant's display name across the identity oneof, falling back to a
158168
* stable placeholder when no name is exposed (e.g. anonymous joins).
@@ -400,10 +410,22 @@ export const googleMeetConnector: ConnectorConfig = {
400410
const transcripts = await fetchTranscripts(accessToken, recordName)
401411
if (transcripts.length === 0) return null
402412

413+
// Only index once every transcript is fully generated. Before then the entry set
414+
// is still being populated, and because the content hash is keyed on the (now
415+
// fixed) conference endTime, a partial transcript stored here would never be
416+
// refreshed on later syncs. Waiting for FILE_GENERATED keeps indexed content final.
417+
if (transcripts.some((transcript) => transcript.state !== 'FILE_GENERATED')) {
418+
logger.info('Google Meet transcript not finalized yet', { externalId })
419+
return null
420+
}
421+
403422
const entryGroups = await Promise.all(
404423
transcripts.map((transcript) => fetchTranscriptEntries(accessToken, transcript.name))
405424
)
406-
const entries = entryGroups.flat()
425+
// The API guarantees chronological order only within a single transcript, so sort
426+
// the merged entries by start time to keep speaker lines in sequence when a
427+
// conference has more than one transcript.
428+
const entries = entryGroups.flat().sort((a, b) => entryStartMs(a) - entryStartMs(b))
407429

408430
const hasText = entries.some((entry) => entry.text?.trim())
409431
if (!hasText) {

0 commit comments

Comments
 (0)