Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dist/plugins.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ export { ChangeDataCapturePlugin } from '../plugins/cdc'
export { QueryLogPlugin } from '../plugins/query-log'
export { ResendPlugin } from '../plugins/resend'
export { ClerkPlugin } from '../plugins/clerk'
export { DataSyncPlugin } from '../plugins/data-sync'
41 changes: 41 additions & 0 deletions plugins/data-sync/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Data Sync Plugin

The Data Sync plugin provides the first reusable replication layer for pulling
rows from the configured external source into StarbaseDB's internal SQLite
database.

It focuses on the core mechanics needed by issue #72:

- table allowlists
- cursor-based incremental pulls
- bounded batches
- external SQL select planning for PostgreSQL, MySQL, and SQLite sources
- internal SQLite insert/upsert planning
- checkpoint and run-history tables
- an authenticated admin run endpoint at `/data-sync/run`

## Example

```ts
import { DataSyncPlugin } from '@outerbase/starbasedb/plugins'

const plugin = new DataSyncPlugin([
{
sourceTable: 'public.users',
targetTable: 'public_users',
cursorColumn: 'updated_at',
primaryKeyColumns: ['id'],
columns: ['id', 'email', 'updated_at'],
batchSize: 500,
},
])
```

When `runOnce()` or `POST /data-sync/run` is called, the plugin reads the last
stored checkpoint, pulls rows with `cursorColumn > checkpoint`, writes each row
to internal SQLite, then stores the newest cursor value.

The checkpoint and run-history tables are internal implementation details:

- `tmp_data_sync_checkpoints`
- `tmp_data_sync_runs`
136 changes: 136 additions & 0 deletions plugins/data-sync/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import { describe, expect, it } from 'vitest'
import {
buildIncrementalSelect,
buildSqliteUpsert,
getNextCursor,
} from './index'

describe('DataSyncPlugin query planning', () => {
it('builds an initial incremental select with mapped columns', () => {
const result = buildIncrementalSelect({
dialect: 'postgresql',
config: {
sourceTable: 'public.users',
cursorColumn: 'updated_at',
primaryKeyColumns: ['id'],
columns: ['id', { source: 'email_address', target: 'email' }],
batchSize: 100,
},
})

expect(result).toEqual({
sql: 'SELECT "id", "email_address" AS "email", "updated_at" FROM "public"."users" ORDER BY "updated_at" ASC LIMIT 100',
params: [],
})
})

it('adds a cursor predicate after a checkpoint', () => {
const result = buildIncrementalSelect({
dialect: 'mysql',
cursorValue: '2026-05-13T00:00:00Z',
config: {
sourceTable: 'users',
cursorColumn: 'updated_at',
batchSize: 25,
},
})

expect(result).toEqual({
sql: 'SELECT * FROM `users` WHERE `updated_at` > ? ORDER BY `updated_at` ASC LIMIT 25',
params: ['2026-05-13T00:00:00Z'],
})
})

it('defaults to selecting all columns when no allowlist is configured', () => {
const result = buildIncrementalSelect({
dialect: 'postgresql',
config: {
sourceTable: 'orders',
cursorColumn: 'id',
},
})

expect(result.sql).toBe(
'SELECT * FROM "orders" ORDER BY "id" ASC LIMIT 500'
)
})

it('clamps batch size to avoid unbounded pulls', () => {
const result = buildIncrementalSelect({
dialect: 'postgresql',
config: {
sourceTable: 'events',
cursorColumn: 'id',
batchSize: 999999,
},
})

expect(result.sql).toContain('LIMIT 5000')
})
})

describe('DataSyncPlugin SQLite upsert planning', () => {
it('builds an upsert for rows with primary keys', () => {
const result = buildSqliteUpsert({
table: 'public_users',
primaryKeyColumns: ['id'],
row: {
id: 1,
email: 'user@example.com',
updated_at: '2026-05-13T00:00:00Z',
},
})

expect(result).toEqual({
sql: 'INSERT INTO "public_users" ("id", "email", "updated_at") VALUES (?, ?, ?) ON CONFLICT("id") DO UPDATE SET "email" = excluded."email", "updated_at" = excluded."updated_at"',
params: [1, 'user@example.com', '2026-05-13T00:00:00Z'],
})
})

it('uses plain insert when no primary key is configured', () => {
const result = buildSqliteUpsert({
table: 'events',
row: { id: 1, name: 'created' },
})

expect(result).toEqual({
sql: 'INSERT INTO "events" ("id", "name") VALUES (?, ?)',
params: [1, 'created'],
})
})

it('returns the last cursor from an ordered batch', () => {
expect(
getNextCursor(
[
{ id: 1, updated_at: 'a' },
{ id: 2, updated_at: 'b' },
],
'updated_at'
)
).toBe('b')
})
})

describe('DataSyncPlugin identifier safety', () => {
it('rejects unsafe source table identifiers', () => {
expect(() =>
buildIncrementalSelect({
dialect: 'postgresql',
config: {
sourceTable: 'users;DROP TABLE users',
cursorColumn: 'id',
},
})
).toThrow('Unsafe SQL identifier')
})

it('rejects unsafe SQLite target identifiers', () => {
expect(() =>
buildSqliteUpsert({
table: 'users;DROP',
row: { id: 1 },
})
).toThrow('Unsafe SQL identifier')
})
})
Loading