@@ -21,30 +21,34 @@ export function chunkArray<T>(arr: T[], size: number): T[][] {
2121 return out
2222}
2323
24- export interface SelectByWorkspaceChunksOptions {
24+ export interface SelectByIdChunksOptions {
2525 /** Cap on rows returned across all chunks. Defaults to a full per-table cleanup budget. */
2626 overallLimit ?: number
27- workspaceChunkSize ?: number
27+ chunkSize ?: number
2828}
2929
3030/**
31- * Run a SELECT query once per workspace chunk and concatenate results up to
31+ * Run a SELECT query once per ID chunk and concatenate results up to
3232 * `overallLimit`. Each chunk's query is passed the remaining row budget so the
3333 * total never exceeds the cap. Use this when you need the selected row set
3434 * (e.g. to drive S3 or copilot-backend cleanup alongside the DB delete).
35+ *
36+ * Works for any large ID set — workspace IDs, workflow IDs, etc. Avoids
37+ * sending one massive `IN (...)` list that would blow Postgres's statement
38+ * timeout.
3539 */
36- export async function selectRowsByWorkspaceChunks < T > (
37- workspaceIds : string [ ] ,
40+ export async function selectRowsByIdChunks < T > (
41+ ids : string [ ] ,
3842 query : ( chunkIds : string [ ] , chunkLimit : number ) => Promise < T [ ] > ,
3943 {
4044 overallLimit = DEFAULT_BATCH_SIZE * DEFAULT_MAX_BATCHES_PER_TABLE ,
41- workspaceChunkSize = DEFAULT_WORKSPACE_CHUNK_SIZE ,
42- } : SelectByWorkspaceChunksOptions = { }
45+ chunkSize = DEFAULT_WORKSPACE_CHUNK_SIZE ,
46+ } : SelectByIdChunksOptions = { }
4347) : Promise < T [ ] > {
44- if ( workspaceIds . length === 0 ) return [ ]
48+ if ( ids . length === 0 ) return [ ]
4549
4650 const rows : T [ ] = [ ]
47- for ( const chunkIds of chunkArray ( workspaceIds , workspaceChunkSize ) ) {
51+ for ( const chunkIds of chunkArray ( ids , chunkSize ) ) {
4852 if ( rows . length >= overallLimit ) break
4953 const remaining = overallLimit - rows . length
5054 const chunkRows = await query ( chunkIds , remaining )
@@ -68,8 +72,14 @@ export interface ChunkedBatchDeleteOptions<TRow extends { id: string }> {
6872 /** Runs between SELECT and DELETE; receives the just-selected rows. */
6973 onBatch ?: ( rows : TRow [ ] ) => Promise < void >
7074 batchSize ?: number
71- /** Max batches per workspace chunk. Total per-run cap = chunks * maxBatches * batchSize. */
75+ /** Max batches per workspace chunk. */
7276 maxBatches ?: number
77+ /**
78+ * Hard cap on rows processed (deleted + failed) across all chunks per call.
79+ * Defaults to `DEFAULT_BATCH_SIZE * DEFAULT_MAX_BATCHES_PER_TABLE`. Cron
80+ * runs frequently enough to catch up the backlog over multiple invocations.
81+ */
82+ totalRowLimit ?: number
7383 workspaceChunkSize ?: number
7484}
7585
@@ -78,7 +88,8 @@ export interface ChunkedBatchDeleteOptions<TRow extends { id: string }> {
7888 *
7989 * For each workspace chunk: SELECT a batch of eligible rows → run optional
8090 * `onBatch` hook (e.g. to delete S3 files) → DELETE those rows by ID. Repeats
81- * until exhausted or `maxBatches` is hit, then moves to the next chunk.
91+ * until exhausted or `maxBatches` is hit, then moves to the next chunk. Stops
92+ * the whole call once `totalRowLimit` rows have been processed.
8293 *
8394 * Workspace IDs are chunked before the SELECT — see
8495 * `DEFAULT_WORKSPACE_CHUNK_SIZE` for why.
@@ -91,6 +102,7 @@ export async function chunkedBatchDelete<TRow extends { id: string }>({
91102 onBatch,
92103 batchSize = DEFAULT_BATCH_SIZE ,
93104 maxBatches = DEFAULT_MAX_BATCHES_PER_TABLE ,
105+ totalRowLimit = DEFAULT_BATCH_SIZE * DEFAULT_MAX_BATCHES_PER_TABLE ,
94106 workspaceChunkSize = DEFAULT_WORKSPACE_CHUNK_SIZE ,
95107} : ChunkedBatchDeleteOptions < TRow > ) : Promise < TableCleanupResult > {
96108 const result : TableCleanupResult = { table : tableName , deleted : 0 , failed : 0 }
@@ -101,14 +113,25 @@ export async function chunkedBatchDelete<TRow extends { id: string }>({
101113 }
102114
103115 const chunks = chunkArray ( workspaceIds , workspaceChunkSize )
116+ let stoppedEarly = false
104117
105118 for ( const [ chunkIdx , chunkIds ] of chunks . entries ( ) ) {
119+ if ( result . deleted + result . failed >= totalRowLimit ) {
120+ stoppedEarly = true
121+ break
122+ }
123+
106124 let batchesProcessed = 0
107125 let hasMore = true
108126
109- while ( hasMore && batchesProcessed < maxBatches ) {
127+ while (
128+ hasMore &&
129+ batchesProcessed < maxBatches &&
130+ result . deleted + result . failed < totalRowLimit
131+ ) {
132+ let rows : TRow [ ] = [ ]
110133 try {
111- const rows = await selectChunk ( chunkIds , batchSize )
134+ rows = await selectChunk ( chunkIds , batchSize )
112135
113136 if ( rows . length === 0 ) {
114137 hasMore = false
@@ -127,17 +150,19 @@ export async function chunkedBatchDelete<TRow extends { id: string }>({
127150 hasMore = rows . length === batchSize
128151 batchesProcessed ++
129152 } catch ( error ) {
130- result . failed ++
131- logger . error ( `[${ tableName } ] Batch failed (chunk ${ chunkIdx + 1 } /${ chunks . length } ):` , {
132- error,
133- } )
153+ // Count rows we tried to delete; SELECT-stage errors leave rows=[].
154+ result . failed += rows . length
155+ logger . error (
156+ `[${ tableName } ] Batch failed (chunk ${ chunkIdx + 1 } /${ chunks . length } , ${ rows . length } rows):` ,
157+ { error }
158+ )
134159 hasMore = false
135160 }
136161 }
137162 }
138163
139164 logger . info (
140- `[${ tableName } ] Complete: ${ result . deleted } rows deleted across ${ chunks . length } chunks ( ${ result . failed } chunk failures) `
165+ `[${ tableName } ] Complete: ${ result . deleted } deleted, ${ result . failed } failed across ${ chunks . length } chunks${ stoppedEarly ? ' (row-limit reached, remaining chunks deferred to next run)' : '' } `
141166 )
142167
143168 return result
0 commit comments