diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts index ca65b43..a63c104 100644 --- a/src/export/dump.test.ts +++ b/src/export/dump.test.ts @@ -128,6 +128,44 @@ describe('Database Dump Module', () => { ) }) + it('should paginate through large tables instead of loading all rows at once', async () => { + // A full page signals there may be more rows, triggering another query. + const fullPage = Array.from({ length: 1000 }, (_, i) => ({ + id: i + 1, + name: `User${i + 1}`, + })) + const partialPage = [{ id: 1001, name: 'User1001' }] + + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) // table list + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + ]) // schema + .mockResolvedValueOnce(fullPage) // data page 1 (full -> fetch more) + .mockResolvedValueOnce(partialPage) // data page 2 (partial -> stop) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + + expect(response).toBeInstanceOf(Response) + const dumpText = await response.text() + + // tables + schema + 2 data pages + expect(executeOperation).toHaveBeenCalledTimes(4) + expect(dumpText).toContain("INSERT INTO users VALUES (1, 'User1');") + expect(dumpText).toContain( + "INSERT INTO users VALUES (1001, 'User1001');" + ) + + // Data queries should be paginated with LIMIT/OFFSET. + const firstDataQuery = + vi.mocked(executeOperation).mock.calls[2][0][0].sql + const secondDataQuery = + vi.mocked(executeOperation).mock.calls[3][0][0].sql + expect(firstDataQuery).toContain('LIMIT') + expect(firstDataQuery).toContain('OFFSET 0') + expect(secondDataQuery).toContain('OFFSET 1000') + }) + it('should return a 500 response when an error occurs', async () => { const consoleErrorMock = vi .spyOn(console, 'error') diff --git a/src/export/dump.ts b/src/export/dump.ts index 91a2e89..16fdd6a 100644 --- a/src/export/dump.ts +++ b/src/export/dump.ts @@ -3,12 +3,18 @@ import { StarbaseDBConfiguration } from '../handler' import { DataSource } from '../types' import { createResponse } from '../utils' +// Number of rows to read from a table per query. Keeping this bounded means +// we never materialize an entire (potentially multi-GB) table in memory at +// once while building the dump. +const DUMP_PAGE_SIZE = 1000 + export async function dumpDatabaseRoute( dataSource: DataSource, config: StarbaseDBConfiguration ): Promise { try { - // Get all table names + // Get all table names up front so we can fail fast (with a proper 500 + // response) if the database is unreachable. const tablesResult = await executeOperation( [{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }], dataSource, @@ -16,54 +22,91 @@ export async function dumpDatabaseRoute( ) const tables = tablesResult.map((row: any) => row.name) - let dumpContent = 'SQLite format 3\0' // SQLite file header + const encoder = new TextEncoder() + + // Stream the dump out instead of buffering the whole database into a + // single string. This keeps memory usage flat regardless of database + // size, and because the response body is produced incrementally the + // connection stays alive past the 30s request window for large dumps. + const stream = new ReadableStream({ + async start(controller) { + try { + controller.enqueue(encoder.encode('SQLite format 3\0')) // SQLite file header - // Iterate through all tables - for (const table of tables) { - // Get table schema - const schemaResult = await executeOperation( - [ - { - sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`, - }, - ], - dataSource, - config - ) + // Iterate through all tables + for (const table of tables) { + // Get table schema + const schemaResult = await executeOperation( + [ + { + sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`, + }, + ], + dataSource, + config + ) - if (schemaResult.length) { - const schema = schemaResult[0].sql - dumpContent += `\n-- Table: ${table}\n${schema};\n\n` - } + if (schemaResult.length) { + const schema = schemaResult[0].sql + controller.enqueue( + encoder.encode( + `\n-- Table: ${table}\n${schema};\n\n` + ) + ) + } - // Get table data - const dataResult = await executeOperation( - [{ sql: `SELECT * FROM ${table};` }], - dataSource, - config - ) + // Get table data one page at a time so a single large + // table never has to fit in memory all at once. + let offset = 0 + while (true) { + const dataResult = await executeOperation( + [ + { + sql: `SELECT * FROM ${table} LIMIT ${DUMP_PAGE_SIZE} OFFSET ${offset};`, + }, + ], + dataSource, + config + ) - for (const row of dataResult) { - const values = Object.values(row).map((value) => - typeof value === 'string' - ? `'${value.replace(/'/g, "''")}'` - : value - ) - dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n` - } + for (const row of dataResult) { + const values = Object.values(row).map( + (value) => + typeof value === 'string' + ? `'${value.replace(/'/g, "''")}'` + : value + ) + controller.enqueue( + encoder.encode( + `INSERT INTO ${table} VALUES (${values.join(', ')});\n` + ) + ) + } - dumpContent += '\n' - } + // A short page means we've reached the end of the table. + if (dataResult.length < DUMP_PAGE_SIZE) { + break + } + offset += DUMP_PAGE_SIZE + } - // Create a Blob from the dump content - const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' }) + controller.enqueue(encoder.encode('\n')) + } + + controller.close() + } catch (error: any) { + console.error('Database Dump Error:', error) + controller.error(error) + } + }, + }) const headers = new Headers({ 'Content-Type': 'application/x-sqlite3', 'Content-Disposition': 'attachment; filename="database_dump.sql"', }) - return new Response(blob, { headers }) + return new Response(stream, { headers }) } catch (error: any) { console.error('Database Dump Error:', error) return createResponse(undefined, 'Failed to create database dump', 500)