From a6f8b1b9200a061c02e37244719ff992373f29a2 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Fri, 20 Feb 2026 13:25:56 -0600 Subject: [PATCH 01/16] fix(network): cloudsync_network_check_changes must not return the nrows value in case of error `SELECT cloudysnc_network_check_changes();` was returning "Runtime error: 0" in case of error response from the cloudsync microservice instead of the real error message --- src/network.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/network.c b/src/network.c index c35b00f..bb29264 100644 --- a/src/network.c +++ b/src/network.c @@ -897,10 +897,10 @@ void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite DEBUG_FUNCTION("cloudsync_network_check_changes"); int nrows = 0; - cloudsync_network_check_internal(context, &nrows); + int rc = cloudsync_network_check_internal(context, &nrows); // returns number of applied rows - sqlite3_result_int(context, nrows); + if (rc == SQLITE_OK) sqlite3_result_int(context, nrows); } void cloudsync_network_reset_sync_version (sqlite3_context *context, int argc, sqlite3_value **argv) { From 1a42745f912d15c53745c51ba6969d161e4d2ab4 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Fri, 27 Feb 2026 23:01:38 -0600 Subject: [PATCH 02/16] feat(rls): add complete support for RLS with batch merge in cloudsync_payload_apply --- .claude/commands/test-sync-roundtrip-rls.md | 13 +- docker/Makefile.postgresql | 4 +- docs/postgresql/RLS.md | 192 ++++++ plans/BATCH_MERGE_AND_RLS.md | 166 +++++ plans/ISSUE_POSTGRES_SCHEMA.md | 73 --- .../ISSUE_WARNING_resource_was_not_closed.md | 64 -- plans/PG_CLOUDSYNC_CHANGES_COL_VALUE_BYTEA.md | 104 ---- plans/POSTGRESQL_IMPLEMENTATION.md | 583 ------------------ plans/TODO.md | 79 --- src/cloudsync.c | 460 ++++++++++++-- src/cloudsync.h | 6 - src/database.h | 7 +- src/postgresql/database_postgresql.c | 150 ++++- src/sqlite/database_sqlite.c | 132 +++- test/postgresql/27_rls_batch_merge.sql | 356 +++++++++++ test/postgresql/28_db_version_tracking.sql | 275 +++++++++ test/postgresql/29_rls_multicol.sql | 435 +++++++++++++ test/postgresql/full_test.sql | 3 + test/unit.c | 419 +++++++------ 19 files changed, 2324 insertions(+), 1197 deletions(-) create mode 100644 docs/postgresql/RLS.md create mode 100644 plans/BATCH_MERGE_AND_RLS.md delete mode 100644 plans/ISSUE_POSTGRES_SCHEMA.md delete mode 100644 plans/ISSUE_WARNING_resource_was_not_closed.md delete mode 100644 plans/PG_CLOUDSYNC_CHANGES_COL_VALUE_BYTEA.md delete mode 100644 plans/POSTGRESQL_IMPLEMENTATION.md delete mode 100644 plans/TODO.md create mode 100644 test/postgresql/27_rls_batch_merge.sql create mode 100644 test/postgresql/28_db_version_tracking.sql create mode 100644 test/postgresql/29_rls_multicol.sql diff --git a/.claude/commands/test-sync-roundtrip-rls.md b/.claude/commands/test-sync-roundtrip-rls.md index 38e496c..f55c20b 100644 --- a/.claude/commands/test-sync-roundtrip-rls.md +++ b/.claude/commands/test-sync-roundtrip-rls.md @@ -112,31 +112,25 @@ Inside psql: 8. Create RLS policies based on the user's description. Example for "user can only access their own rows": ```sql -- SELECT: User can see rows they own - -- Helper function fallback handles ON CONFLICT edge cases where user_id resolves to EXCLUDED row CREATE POLICY "select_own_rows" ON FOR SELECT USING ( auth.uid() = user_id - OR auth.uid() = _get_owner(id) ); - -- INSERT: Allow if user_id matches auth.uid() OR is default (cloudsync staging) + -- INSERT: Allow if user_id matches auth.uid() CREATE POLICY "insert_own_rows" ON FOR INSERT WITH CHECK ( auth.uid() = user_id - OR user_id = '00000000-0000-0000-0000-000000000000'::uuid ); - -- UPDATE: Check ownership via explicit lookup, allow default for staging + -- UPDATE: Check ownership via explicit lookup CREATE POLICY "update_own_rows" ON FOR UPDATE USING ( auth.uid() = user_id - OR auth.uid() = _get_owner(id) - OR user_id = '00000000-0000-0000-0000-000000000000'::uuid ) WITH CHECK ( auth.uid() = user_id - OR user_id = '00000000-0000-0000-0000-000000000000'::uuid ); -- DELETE: User can only delete rows they own @@ -148,9 +142,6 @@ Inside psql: 9. Initialize cloudsync: `SELECT cloudsync_init('');` 10. Insert some initial test data (optional, can be done via SQLite clients) -**Why these specific policies?** -CloudSync uses `INSERT...ON CONFLICT DO UPDATE` for field-by-field synchronization. During conflict detection, PostgreSQL's RLS may compare `auth.uid()` against the EXCLUDED row's `user_id` (which has the default value) instead of the existing row's `user_id`. The helper function explicitly looks up the existing row's owner to work around this issue. See `docs/postgresql/RLS.md` for detailed explanation. - ### Step 5: Get JWT Tokens for Two Users Get JWT tokens for both test users by running the token script twice: diff --git a/docker/Makefile.postgresql b/docker/Makefile.postgresql index 17ae6c4..78ae6bf 100644 --- a/docker/Makefile.postgresql +++ b/docker/Makefile.postgresql @@ -238,7 +238,7 @@ postgres-docker-shell: # Build CloudSync into the Supabase CLI postgres image tag postgres-supabase-build: @echo "Building CloudSync image for Supabase CLI..." - @tmp_dockerfile="$$(mktemp /tmp/cloudsync-supabase-cli.XXXXXX)"; \ + @tmp_dockerfile="$$(mktemp ./cloudsync-supabase-cli.XXXXXX)"; \ src_dockerfile="$(SUPABASE_CLI_DOCKERFILE)"; \ supabase_cli_image="$(SUPABASE_CLI_IMAGE)"; \ if [ -z "$$supabase_cli_image" ]; then \ @@ -267,6 +267,8 @@ postgres-supabase-build: exit 1; \ fi; \ echo "Using base image: $$supabase_cli_image"; \ + echo "Pulling fresh base image to avoid layer accumulation..."; \ + docker pull "$$supabase_cli_image" 2>/dev/null || true; \ docker build --build-arg SUPABASE_POSTGRES_TAG="$(SUPABASE_POSTGRES_TAG)" -f "$$tmp_dockerfile" -t "$$supabase_cli_image" .; \ rm -f "$$tmp_dockerfile"; \ echo "Build complete: $$supabase_cli_image" diff --git a/docs/postgresql/RLS.md b/docs/postgresql/RLS.md new file mode 100644 index 0000000..c0871d3 --- /dev/null +++ b/docs/postgresql/RLS.md @@ -0,0 +1,192 @@ +# Row Level Security (RLS) with CloudSync + +CloudSync is fully compatible with PostgreSQL Row Level Security. Standard RLS policies work out of the box. + +## How It Works + +### Column-batch merge + +CloudSync resolves CRDT conflicts at the column level — a sync payload may contain individual column changes arriving one at a time. Before writing to the target table, CloudSync buffers all winning column values for the same primary key and flushes them as a single SQL statement. This ensures the database sees a complete row with all columns present. + +### UPDATE vs INSERT selection + +When flushing a batch, CloudSync chooses the statement type based on whether the row already exists locally: + +- **New row**: `INSERT ... ON CONFLICT DO UPDATE` — all columns are present (including the ownership column), so the INSERT `WITH CHECK` policy can evaluate correctly. +- **Existing row**: `UPDATE ... SET ... WHERE pk = ...` — only the changed columns are set. The UPDATE `USING` policy checks the existing row, which already has the correct ownership column value. + +### Per-PK savepoint isolation + +Each primary key's flush is wrapped in its own savepoint. When RLS denies a write: + +1. The database raises an error inside the savepoint +2. CloudSync rolls back that savepoint, releasing all resources acquired during the failed statement +3. Processing continues with the next primary key + +This means a single payload can contain a mix of allowed and denied rows — allowed rows commit normally, denied rows are silently skipped. The caller receives the total number of column changes processed (including denied ones) rather than an error. + +## Quick Setup + +Given a table with an ownership column (`user_id`): + +```sql +CREATE TABLE documents ( + id TEXT PRIMARY KEY NOT NULL, + user_id UUID, + title TEXT, + content TEXT +); + +SELECT cloudsync_init('documents'); +``` + +Enable RLS and create standard policies: + +```sql +ALTER TABLE documents ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "select_own" ON documents FOR SELECT + USING (auth.uid() = user_id); + +CREATE POLICY "insert_own" ON documents FOR INSERT + WITH CHECK (auth.uid() = user_id); + +CREATE POLICY "update_own" ON documents FOR UPDATE + USING (auth.uid() = user_id) + WITH CHECK (auth.uid() = user_id); + +CREATE POLICY "delete_own" ON documents FOR DELETE + USING (auth.uid() = user_id); +``` + +## Example: Two-User Sync with RLS + +This example shows the complete flow of syncing data between two databases where the target enforces RLS. + +### Setup + +```sql +-- Source database (DB A) — no RLS, represents the sync server +CREATE TABLE documents ( + id TEXT PRIMARY KEY NOT NULL, user_id UUID, title TEXT, content TEXT +); +SELECT cloudsync_init('documents'); + +-- Target database (DB B) — RLS enforced +CREATE TABLE documents ( + id TEXT PRIMARY KEY NOT NULL, user_id UUID, title TEXT, content TEXT +); +SELECT cloudsync_init('documents'); +ALTER TABLE documents ENABLE ROW LEVEL SECURITY; +-- (policies as above) +``` + +### Insert sync + +User 1 creates a document on DB A: + +```sql +-- On DB A +INSERT INTO documents VALUES ('doc1', 'user1-uuid', 'Hello', 'World'); +``` + +Apply the payload on DB B as the authenticated user: + +```sql +-- On DB B (running as user1) +SET app.current_user_id = 'user1-uuid'; +SET ROLE authenticated; +SELECT cloudsync_payload_apply(decode(:payload_hex, 'hex')); +``` + +The insert succeeds because `user_id` matches `auth.uid()`. + +### Insert denial + +User 1 tries to sync a document owned by user 2: + +```sql +-- On DB A +INSERT INTO documents VALUES ('doc2', 'user2-uuid', 'Secret', 'Data'); +``` + +```sql +-- On DB B (running as user1) +SET app.current_user_id = 'user1-uuid'; +SET ROLE authenticated; +SELECT cloudsync_payload_apply(decode(:payload_hex, 'hex')); +``` + +The insert is denied by RLS. The row does not appear in DB B. No error is raised to the caller — CloudSync isolates the failure via a per-PK savepoint and continues processing the remaining payload. + +### Partial update sync + +User 1 updates only the title of their own document: + +```sql +-- On DB A +UPDATE documents SET title = 'Hello Updated' WHERE id = 'doc1'; +``` + +The sync payload contains only the changed column (`title`). CloudSync detects that the row already exists on DB B and uses a plain `UPDATE` statement: + +```sql +UPDATE documents SET title = $2 WHERE id = $1; +``` + +The UPDATE policy checks the existing row (which has the correct `user_id`), so it succeeds. + +### Mixed payload + +When a single payload contains rows for multiple users, CloudSync handles each primary key independently: + +```sql +-- On DB A +INSERT INTO documents VALUES ('doc3', 'user1-uuid', 'Mine', '...'); +INSERT INTO documents VALUES ('doc4', 'user2-uuid', 'Theirs', '...'); +``` + +```sql +-- On DB B (running as user1) +SELECT cloudsync_payload_apply(decode(:payload_hex, 'hex')); +-- doc3 is inserted (allowed), doc4 is silently skipped (denied) +``` + +## Supabase Notes + +When using Supabase: + +1. **auth.uid()**: Returns the authenticated user's UUID from the JWT claims. +2. **JWT propagation**: Ensure the JWT token is set before sync operations: + ```sql + SELECT set_config('request.jwt.claims', '{"sub": "user-uuid", ...}', true); + ``` +3. **Service role bypass**: The Supabase service role bypasses RLS entirely. Use the `authenticated` role for user-context operations where RLS enforcement is desired. + +## Troubleshooting + +### "new row violates row-level security policy" + +**Symptom**: Insert operations fail during sync. + +**Cause**: The ownership column value doesn't match the authenticated user. + +**Solution**: Verify that: +- The JWT / session variable is set correctly before calling `cloudsync_payload_apply` +- The `user_id` column in the synced data matches `auth.uid()` +- RLS policies reference the correct ownership column + +### Debugging + +```sql +-- Check current auth context +SELECT auth.uid(); + +-- Inspect a specific row's ownership +SELECT id, user_id FROM documents WHERE id = 'problematic-pk'; + +-- Temporarily disable RLS to inspect all data +ALTER TABLE documents DISABLE ROW LEVEL SECURITY; +-- ... inspect ... +ALTER TABLE documents ENABLE ROW LEVEL SECURITY; +``` diff --git a/plans/BATCH_MERGE_AND_RLS.md b/plans/BATCH_MERGE_AND_RLS.md new file mode 100644 index 0000000..def727e --- /dev/null +++ b/plans/BATCH_MERGE_AND_RLS.md @@ -0,0 +1,166 @@ +# Deferred Column-Batch Merge and RLS Support + +## Problem + +CloudSync resolves CRDT conflicts per-column, so `cloudsync_payload_apply` processes column changes one at a time. Previously each winning column was written immediately via a single-column `INSERT ... ON CONFLICT DO UPDATE`. This caused two issues with PostgreSQL RLS: + +1. **Partial-column UPSERT fails INSERT WITH CHECK**: An update to just `title` generates `INSERT INTO docs (id, title) VALUES (...) ON CONFLICT DO UPDATE SET title=...`. PostgreSQL evaluates the INSERT `WITH CHECK` policy *before* checking for conflicts. Missing columns (e.g. `user_id`) default to NULL, so `auth.uid() = user_id` fails. The ON CONFLICT path is never reached. + +2. **Premature flush in SPI**: `database_in_transaction()` always returns true inside PostgreSQL SPI. The old code only updated `last_payload_db_version` inside `if (!in_transaction && db_version_changed)`, so the variable stayed at -1, `db_version_changed` was true on every row, and batches flushed after every single column. + +## Solution + +### Batch merge (`merge_pending_batch`) + +New structs in `cloudsync.c`: + +- `merge_pending_entry` — one buffered column (col_name, col_value via `database_value_dup`, col_version, db_version, site_id, seq) +- `merge_pending_batch` — collects entries for one PK (table, pk, row_exists flag, entries array, statement cache) + +`data->pending_batch` is set to `&batch` (stack-allocated) at the start of `cloudsync_payload_apply`. The INSTEAD OF trigger calls `merge_insert`, which calls `merge_pending_add` instead of `merge_insert_col`. Flush happens at PK/table/db_version boundaries and after the loop. + +### UPDATE vs UPSERT (`row_exists` flag) + +`merge_insert` sets `batch->row_exists = (local_cl != 0)` on the first winning column. At flush time `merge_flush_pending` selects: + +- `row_exists=true` -> `sql_build_update_pk_and_multi_cols` -> `UPDATE docs SET title=? WHERE id=?` +- `row_exists=false` -> `sql_build_upsert_pk_and_multi_cols` -> `INSERT ... ON CONFLICT DO UPDATE` + +Both SQLite and PostgreSQL implement `sql_build_update_pk_and_multi_cols` as a proper UPDATE statement. This is required for SQLiteCloud (which uses the SQLite extension but enforces RLS). + +**Example**: DB A and DB B both have row `id='doc1'` with `user_id='alice'`, `title='Hello'`. Alice updates `title='World'` on A. The payload applied to B contains only `(id, title)`: + +- **UPSERT** (wrong for RLS): `INSERT INTO docs ("id","title") VALUES (?,?) ON CONFLICT DO UPDATE SET "title"=EXCLUDED."title"` — fails INSERT `WITH CHECK` because `user_id` is NULL in the proposed row. +- **UPDATE** (correct): `UPDATE "docs" SET "title"=?2 WHERE "id"=?1` — skips INSERT `WITH CHECK` entirely; the UPDATE `USING` policy checks the existing row which has the correct `user_id`. + +In plain SQLite (no RLS) both produce the same result. The distinction only matters when RLS is enforced (SQLiteCloud, PostgreSQL). + +### Statement cache + +`merge_pending_batch` caches the last prepared statement (`cached_vm`) along with the column combination and `row_exists` flag that produced it. On each flush, `merge_flush_pending` compares the current column names, count, and `row_exists` against the cache: + +- **Cache hit**: `dbvm_reset` + rebind (skip SQL build and `databasevm_prepare`) +- **Cache miss**: finalize old cached statement, build new SQL, prepare, and update cache + +This recovers the precompiled-statement advantage of the old single-column path. In a typical payload where consecutive PKs change the same columns, the cache hit rate is high. + +The cached statement is finalized once at the end of `cloudsync_payload_apply`, not on every flush. + +### `last_payload_db_version` fix + +Moved the update outside the savepoint block so it executes unconditionally: + +```c +if (db_version_changed) { + last_payload_db_version = decoded_context.db_version; +} +``` + +Previously this was inside `if (!in_transaction && db_version_changed)`, which never ran in SPI. + +## Savepoint Architecture + +### Two-level savepoint design + +`cloudsync_payload_apply` uses two layers of savepoints that serve different purposes: + +| Layer | Where | Purpose | +|-------|-------|---------| +| **Outer** (per-db_version) | `cloudsync_payload_apply` loop | Transaction grouping + commit hook trigger (SQLite only) | +| **Inner** (per-PK) | `merge_flush_pending` | RLS error isolation + executor resource cleanup | + +### Outer savepoints: per-db_version in `cloudsync_payload_apply` + +```c +if (!in_savepoint && db_version_changed && !database_in_transaction(data)) { + database_begin_savepoint(data, "cloudsync_payload_apply"); + in_savepoint = true; +} +``` + +These savepoints group rows with the same source `db_version` into one transaction. The `RELEASE` (commit) at each db_version boundary triggers `cloudsync_commit_hook`, which: +- Saves `pending_db_version` as the new `data->db_version` +- Resets `data->seq = 0` + +This ensures unique `(db_version, seq)` tuples in `cloudsync_changes` across groups. + +**In PostgreSQL SPI, these are dead code**: `database_in_transaction()` returns `true` (via `IsTransactionState()`), so the condition `!database_in_transaction(data)` is always false and `in_savepoint` is never set. This is correct because: +1. PostgreSQL has no equivalent commit hook on subtransaction release +2. The SPI transaction from `SPI_connect` already provides transaction context +3. The inner per-PK savepoint handles the RLS isolation PostgreSQL needs + +**Why a single outer savepoint doesn't work**: We tested replacing per-db_version savepoints with a single savepoint wrapping the entire loop. This broke the `(db_version, seq)` uniqueness invariant in SQLite because the commit hook never fired mid-apply — `data->db_version` never advanced and `seq` never reset. + +### Inner savepoints: per-PK in `merge_flush_pending` + +```c +flush_savepoint = (database_begin_savepoint(data, "merge_flush") == DBRES_OK); +// ... database operations ... +cleanup: + if (flush_savepoint) { + if (rc == DBRES_OK) database_commit_savepoint(data, "merge_flush"); + else database_rollback_savepoint(data, "merge_flush"); + } +``` + +Wraps each PK's flush in a savepoint. On failure (e.g. RLS denial), `database_rollback_savepoint` calls `RollbackAndReleaseCurrentSubTransaction()` in PostgreSQL, which properly releases all executor resources (open relations, snapshots, plan cache) acquired during the failed statement. This eliminates the "resource was not closed" warnings that `SPI_finish` previously emitted. + +In SQLite, when the outer per-db_version savepoint is active, these become harmless nested savepoints. + +### Platform behavior summary + +| Environment | Outer savepoint | Inner savepoint | Effect | +|---|---|---|---| +| **PostgreSQL SPI** | Dead code (`in_transaction` always true) | Active — RLS error isolation + resource cleanup | Only inner savepoint runs | +| **SQLite client** | Active — groups writes, triggers commit hook | Active — nested inside outer, harmless | Both run; outer provides transaction grouping | +| **SQLiteCloud** | Active — groups writes, triggers commit hook | Active — RLS error isolation | Both run; each serves its purpose | + +## SPI and Memory Management + +### Nested SPI levels + +`pg_cloudsync_payload_apply` calls `SPI_connect` (level 1). Inside the loop, `databasevm_step` executes `INSERT INTO cloudsync_changes`, which fires the INSTEAD OF trigger. The trigger calls `SPI_connect` (level 2), runs `merge_insert` / `merge_pending_add`, then `SPI_finish` back to level 1. The deferred `merge_flush_pending` runs at level 1. + +### `database_in_transaction()` in SPI + +Always returns true in SPI context (`IsTransactionState()`). This makes the per-db_version savepoints dead code in PostgreSQL and is why `last_payload_db_version` must be updated unconditionally. + +### Error handling in SPI + +When RLS denies a write, PostgreSQL raises an error inside SPI. The inner per-PK savepoint in `merge_flush_pending` catches this: `RollbackAndReleaseCurrentSubTransaction()` properly releases all executor resources. Without the savepoint, `databasevm_step`'s `PG_CATCH` + `FlushErrorState()` would clear the error stack but leave executor resources orphaned, causing `SPI_finish` to emit "resource was not closed" warnings. + +### Batch cleanup paths + +`batch.entries` is heap-allocated via `cloudsync_memory_realloc` and reused across flushes. Each entry's `col_value` (from `database_value_dup`) is freed by `merge_pending_free_entries` on every flush. The entries array, `cached_vm`, and `cached_col_names` are freed once at the end of `cloudsync_payload_apply`. Error paths (`goto cleanup`, early returns) must free all three and call `merge_pending_free_entries` to avoid leaking `col_value` copies. + +## Batch Apply: Pros and Cons + +The batch path is used for all platforms (SQLite client, SQLiteCloud, PostgreSQL), not just when RLS is active. + +**Pros (even without RLS)**: +- Fewer SQL executions: N winning columns per PK become 1 statement instead of N. Each `databasevm_step` involves B-tree lookup, page modification, WAL write. +- Atomicity per PK: all columns for a PK succeed or fail together. + +**Cons**: +- Dynamic SQL per unique column combination (mitigated by the statement cache). +- Memory overhead: `database_value_dup` copies each column value into the buffer. +- Code complexity: batching structs, flush logic, cleanup paths. + +**Why not maintain two paths**: SQLiteCloud uses the SQLite extension with RLS, so the batch path (UPDATE vs UPSERT selection, per-PK savepoints) is required there. Maintaining a separate single-column path for plain SQLite clients would double the code with marginal benefit. + +## Files Changed + +| File | Change | +|------|--------| +| `src/cloudsync.c` | Batch merge structs with statement cache (`cached_vm`, `cached_col_names`), `merge_pending_add`, `merge_flush_pending` (with per-PK savepoint), `merge_pending_free_entries`; `pending_batch` field on context; `row_exists` propagation in `merge_insert`; batch mode in `merge_sentinel_only_insert`; `last_payload_db_version` fix; removed `payload_apply_callback` | +| `src/cloudsync.h` | Removed `CLOUDSYNC_PAYLOAD_APPLY_STEPS` enum | +| `src/database.h` | Added `sql_build_upsert_pk_and_multi_cols`, `sql_build_update_pk_and_multi_cols`; removed callback typedefs | +| `src/sqlite/database_sqlite.c` | Implemented `sql_build_upsert_pk_and_multi_cols` (dynamic SQL); `sql_build_update_pk_and_multi_cols` (delegates to upsert); removed callback functions | +| `src/postgresql/database_postgresql.c` | Implemented `sql_build_update_pk_and_multi_cols` (meta-query against `pg_catalog` generating typed UPDATE) | +| `test/unit.c` | Removed callback code and `do_test_andrea` debug function (fixed 288048-byte memory leak) | +| `test/postgresql/27_rls_batch_merge.sql` | Tests 1-3 (superuser) + Tests 4-6 (authenticated-role RLS enforcement) | +| `docs/postgresql/RLS.md` | Documented INSERT vs UPDATE paths and partial-column RLS interaction | + +## TODO + + - update documentation: RLS.md, README.md and the https://github.com/sqlitecloud/docs repo diff --git a/plans/ISSUE_POSTGRES_SCHEMA.md b/plans/ISSUE_POSTGRES_SCHEMA.md deleted file mode 100644 index a34b0e2..0000000 --- a/plans/ISSUE_POSTGRES_SCHEMA.md +++ /dev/null @@ -1,73 +0,0 @@ -Issue summary - -cloudsync_init('users') fails in Supabase postgres with: -"column reference \"id\" is ambiguous". -Both public.users and auth.users exist. Several PostgreSQL SQL templates use only table_name (no schema), so information_schema lookups and dynamic SQL see multiple tables and generate ambiguous column references. - -Proposed fixes (options) - -1) Minimal fix (patch specific templates) -- Add table_schema = current_schema() to information_schema queries. -- Keep relying on search_path. -- Resolves Supabase default postgres collisions without changing the API. - -2) Robust fix (explicit schema support) -- Allow schema-qualified inputs, e.g. cloudsync_init('public.users'). -- Parse schema/table and propagate through query builders. -- Always generate fully-qualified table names ("schema"."table"). -- Apply schema-aware filters in information_schema queries. -- Removes ambiguity regardless of search_path or duplicate table names across schemas. -- Note: payload compatibility requires cloudsync_changes.tbl to remain unqualified; PG apply should resolve schema via cloudsync_table_settings (not search_path) when applying payloads. - -Bugged query templates - -Already fixed: -- SQL_PRAGMA_TABLEINFO_PK_COLLIST -- SQL_PRAGMA_TABLEINFO_PK_DECODE_SELECTLIST - -Still vulnerable (missing schema filter): -- SQL_BUILD_SELECT_NONPK_COLS_BY_ROWID -- SQL_PRAGMA_TABLEINFO_LIST_NONPK_NAME_CID -- SQL_CLOUDSYNC_DELETE_COLS_NOT_IN_SCHEMA_OR_PKCOL -- SQL_PRAGMA_TABLEINFO_PK_QUALIFIED_COLLIST_FMT - -Robust fix implementation plan - -Goals -- Support cloudsync_init('users') and cloudsync_init('public.users') -- Default schema to current_schema() when not provided -- Persist schema so future connections are independent of search_path -- Generate fully qualified table names in all PostgreSQL SQL builders - -1) Parse schema/table at init -- In cloudsync_init_table() (cloudsync.c), parse the input table_name: - - If it contains a dot, split schema/table - - Else schema = current_schema() (query once) -- Normalize case to match existing behavior - -2) Persist schema in settings -- Store schema in cloudsync_table_settings using key='schema' -- Keep tbl_name as unqualified table name -- On first run, if schema is not stored, write it - -3) Store schema in context -- Add char *schema to cloudsync_table_context -- Populate on table creation and when reloading from settings -- Use schema when building SQL - -4) Restore schema on new connections -- During context rebuild, read schema from cloudsync_table_settings -- If missing, fallback to current_schema(), optionally persist it - -5) Qualify SQL everywhere (Postgres) -- Use "schema"."table" in generated SQL -- Add table_schema filters to information_schema queries: - - SQL_BUILD_SELECT_NONPK_COLS_BY_ROWID - - SQL_PRAGMA_TABLEINFO_LIST_NONPK_NAME_CID - - SQL_CLOUDSYNC_DELETE_COLS_NOT_IN_SCHEMA_OR_PKCOL - - SQL_PRAGMA_TABLEINFO_PK_QUALIFIED_COLLIST_FMT - - Any other information_schema templates using only table_name - -6) Compatibility -- Existing DBs without schema setting continue to work via current_schema() -- No API changes required for unqualified names diff --git a/plans/ISSUE_WARNING_resource_was_not_closed.md b/plans/ISSUE_WARNING_resource_was_not_closed.md deleted file mode 100644 index 579dbb0..0000000 --- a/plans/ISSUE_WARNING_resource_was_not_closed.md +++ /dev/null @@ -1,64 +0,0 @@ -# WARNING: resource was not closed: relation "cloudsync_changes" - -## Summary -The warning was emitted by PostgreSQL when a SPI query left a “relation” resource open. In practice, it means a SPI tuptable (or a relation opened internally by SPI when executing a query) wasn’t released before the outer SQL statement completed. PostgreSQL 17 is stricter about reporting this, so the same issue might have been silent in earlier versions. - -We isolated the warning to the `cloudsync_payload_apply` path when it inserted into the `cloudsync_changes` view and triggered `cloudsync_changes_insert_trigger`. The warnings did **not** occur for direct, manual `INSERT INTO cloudsync_changes ...` statements issued in psql. - -## Why it only happened in the payload-apply path -The key difference was **nested SPI usage** and **statement lifetime**: - -1. **`cloudsync_payload_apply` loops many changes and uses SPI internally** - - `cloudsync_payload_apply` is a C function that processes a payload by decoding multiple changes and applying them in a loop. - - For each change, it executed an `INSERT INTO cloudsync_changes (...)` (via `SQL_CHANGES_INSERT_ROW`), which fires the INSTEAD OF trigger (`cloudsync_changes_insert_trigger`). - -2. **The trigger itself executed SPI queries** - - The trigger function uses SPI to read and write metadata tables. - - This creates *nested* SPI usage within a call stack that is already inside a SPI-driven C function. - -3. **Nested SPI + `INSERT INTO view` has different resource lifetime than a plain insert** - - With a manual psql statement, the SPI usage occurs only once, in a clean top-level context. The statement finishes, SPI cleanup happens, and any tuptable resources are released. - - In the payload apply path, SPI queries happen inside the trigger, inside another SPI-driven C function, inside a loop. If any intermediate SPI tuptable or relation is not freed, it can “leak” out of the trigger scope and be reported when the outer statement completes. - - That’s why the warning appears specifically when the trigger is executed as part of `cloudsync_payload_apply` but not for direct inserts from psql. - -4. **PostgreSQL 17 reports this more aggressively** - - Earlier versions often tolerated missing `SPI_freetuptable()` calls without warning. PG17 emits the warning when the statement finishes and resources are still registered as open. - -## Why direct INSERTs from psql didn’t warn -The smoke test included a manual `INSERT INTO cloudsync_changes ...`, and it never produced the warning. That statement: - -- Runs as a single SQL statement initiated by the client. -- Executes the trigger in a clean SPI call stack with no nested SPI calls. -- Completes quickly, and the SPI context is unwound immediately, which can mask missing frees. - -In contrast, the payload-apply path: - -- Opens SPI state for the duration of the payload apply loop. -- Executes many trigger invocations before returning. -- Accumulates any unfreed resources over several calls. - -So the leak only becomes visible in the payload-apply loop. - -## Fix that removed the warning -We introduced a new SQL function that bypasses the trigger and does the work directly: - -- Added `cloudsync_changes_apply(...)` and rewired `SQL_CHANGES_INSERT_ROW` to call it via: - ```sql - SELECT cloudsync_changes_apply(...) - ``` -- The apply function executes the same logic but without inserting into the view and firing the INSTEAD OF trigger. -- This removes the nested SPI + trigger path for the payload apply loop. - -Additionally, we tightened SPI cleanup in multiple functions by ensuring `SPI_freetuptable(SPI_tuptable)` is called after `SPI_execute`/`SPI_execute_plan` calls where needed. - -## Takeaway -The warning was not tied to the `cloudsync_changes` view itself, but to **nested SPI contexts and missing SPI cleanup** during payload apply. It was only visible when: - -- the apply loop executed many insert-trigger calls, and -- the server (PG17) reported unclosed relation resources at statement end. - -By switching to `cloudsync_changes_apply(...)` and tightening SPI tuptable cleanup, we removed the warning from the payload-apply path while leaving manual insert behavior unchanged. - -## Next TODO -- Add SPI instrumentation (DEBUG1 logs before/after SPI_execute* and after SPI_freetuptable/SPI_finish) along the payload-apply → view-insert → trigger path, then rerun the instrumented smoke test to pinpoint exactly where the warning is emitted. -- Note: We inspected the payload-apply → INSERT INTO cloudsync_changes → trigger call chain and did not find any missing SPI_freetuptable() or SPI_finish() calls in that path. diff --git a/plans/PG_CLOUDSYNC_CHANGES_COL_VALUE_BYTEA.md b/plans/PG_CLOUDSYNC_CHANGES_COL_VALUE_BYTEA.md deleted file mode 100644 index 62f6b1c..0000000 --- a/plans/PG_CLOUDSYNC_CHANGES_COL_VALUE_BYTEA.md +++ /dev/null @@ -1,104 +0,0 @@ -# Plan: PG cloudsync_changes col_value as encoded bytea - -Requirements (must hold): -- Keep payload format and pk encode/decode logic unchanged. -- Payloads must be interchangeable between SQLite and PostgreSQL peers. -- PostgreSQL `cloudsync_changes.col_value` should carry the already-encoded bytea (type-tagged cloudsync bytes) exactly like SQLite. -- The PostgreSQL layer must pass that bytea through without decoding; decoding happens only when applying to the base table value type. -- Keeping `col_value` as `text` (and casting in SQL) is not acceptable because `pk_encode` would treat it as `DBTYPE_TEXT`, losing original type info (numbers/blobs/null semantics) and producing payloads that are not portable to SQLite peers. - -Goals and tradeoffs for the cached helper approach: -- Goal: preserve SQLite-compatible payloads by encoding `col_value` with the same pk wire format before it reaches the SRF/view layer. -- Goal: avoid per-row plan preparation by caching a `SPIPlanPtr` keyed by `(relid, attnum)` for column lookup. -- Tradeoff: still does per-row SPI execution (can’t avoid row fetch); cost is mitigated by cached plans. -- Tradeoff: uses text parameters and type casts in the cached plan, which is slower than binary binding but simpler and type-agnostic. - -Goal: make PostgreSQL `cloudsync_changes.col_value` carry the same type-tagged, cloudsync-encoded bytes as SQLite so `cloudsync_payload_encode` can consume it without dynamic type inference. - -## 1) Inventory and impact analysis -- Schema/SQL definition assumes text: - - `src/postgresql/cloudsync--1.0.sql` declares `cloudsync_changes_srf` with `col_value text`, and the `cloudsync_changes` view is a straight `SELECT *` from the SRF. -- SRF query construction assumes text and uses text filtering: - - `src/postgresql/cloudsync_postgresql.c` `build_union_sql()` builds `COALESCE((SELECT to_jsonb(b)->>t1.col_name ...), '%s') AS col_value` and filters with `s.col_value IS DISTINCT FROM '%s'`. - - The empty-set fallback uses `NULL::text AS col_value`. -- INSERT path expects text and re-casts to the target type: - - `src/postgresql/cloudsync_postgresql.c` `cloudsync_changes_insert_trg` reads `col_value` as text (`text_to_cstring`), looks up the real column type, and casts via `SELECT $1::type` before building a `pgvalue_t`. -- SQL constants and core insert path target `cloudsync_changes`: - - `src/postgresql/sql_postgresql.c` `SQL_CHANGES_INSERT_ROW` inserts into `cloudsync_changes(tbl, pk, col_name, col_value, ...)`. - - `src/cloudsync.c` uses `SQL_CHANGES_INSERT_ROW` via the database abstraction, so any type change affects core insert/merge flows. -- Payload encode aggregation currently treats `col_value` as whatever type the query returns: - - `src/postgresql/cloudsync_postgresql.c` `cloudsync_payload_encode_transfn` wraps variadic args with `pgvalues_from_args`; a `bytea` `col_value` would flow through as `bytea` without special handling, but any text assumptions in callers must be updated. -- Tests/docs: - - All `cloudsync_changes` tests are in SQLite (`test/unit.c`); there are no PG-specific tests or docs referencing `col_value` type. - -## 2) Define encoding contract for col_value (PG) -- Encoding contract (align with SQLite): - - `col_value` is a `bytea` containing the pk-encoded value bytes (type tag + payload), same as SQLite `cloudsync_changes`. - - `NULL` uses the same pk-encode NULL marker; no PG-specific sentinel encoding. - - RLS/tombstone filtering should be done before encoding, or by comparing encoded bytes with the known encoded sentinel bytes. -- PG-side encoding strategy: - - Add a C helper that takes a `Datum` + type metadata and returns encoded bytes using existing `pk_encode` path (`dbvalue_t` wrapper + `pk_encode`). - - Avoid JSON/text conversions; the SRF should fetch the base-table `Datum` and encode directly. - - Compute `col_value` for a given row using: - - PK decode predicate to locate the row. - - Column `Datum` from SPI tuple (or a helper function returning `Datum`). -- PG payload encode path: - - Treat `col_value` as already-encoded bytes; pass through without decoding. - - Ensure `pgvalues_from_args` preserves `bytea` and `pk_encode` does not re-encode it (it should encode the container row, not the inner value bytes). - - Avoid any path that casts `col_value` to text in `cloudsync_changes_insert_trg`. - -Concrete implementation steps for step 2: -- Add a PG helper to encode a single `Datum` into cloudsync bytes: - - Implement `static bytea *pg_cloudsync_encode_value(Datum val, Oid typeid, int32 typmod, Oid collation, bool isnull)` in `src/postgresql/cloudsync_postgresql.c` (or a new `pg_encode.c`). - - Wrap the `Datum` into a `pgvalue_t` via `pgvalue_create`, then call `pk_encode` with `argc=1` and `is_prikey=false`. - - Allocate a `bytea` with `VARHDRSZ + encoded_len` and copy the encoded bytes; return the `bytea`. - - Ensure text/bytea are detoasted before encoding (via `pgvalue_ensure_detoast`). -- Add a PG helper to encode a column from a base table row: - - Implement `static bytea *pg_cloudsync_encode_col_from_tuple(HeapTuple tup, TupleDesc td, int attnum)` that: - - Extracts `Datum` and `isnull` with `SPI_getbinval`. - - Uses `TupleDescAttr(td, attnum-1)` to capture type/typmod/collation. - - Calls `pg_cloudsync_encode_value(...)` and returns the encoded `bytea`. -- Update `build_union_sql()` logic to select encoded bytes instead of text: - - Replace the `to_jsonb(...)->>t1.col_name` subselect with a SQL-callable C function: - - New SQL function: `cloudsync_col_value_encoded(table_name text, col_name text, pk bytea) RETURNS bytea`. - - In C, implement `cloudsync_col_value_encoded` to: - - Look up table OID and PK columns. - - Decode `pk` with `cloudsync_pk_decode` to build a WHERE clause. - - Fetch the row via SPI, extract the target column `Datum`, encode it via `pg_cloudsync_encode_value`, and return `bytea`. - - This avoids dynamic SQL in `build_union_sql()` and keeps encoding centralized. -- Define behavior for restricted/tombstone rows: - - If the row is not visible or the column cannot be read, return an encoded version of `CLOUDSYNC_RLS_RESTRICTED_VALUE` (text encoded with pk_encode). - - If `col_name` is tombstone sentinel, return encoded NULL (match SQLite behavior). -- Ensure payload encode path expects bytea: - - Confirm `cloudsync_payload_encode_transfn` receives `bytea` for `col_value` from `cloudsync_changes`. - - `pgvalues_from_args` should keep `bytea` as `DBTYPE_BLOB` so `pk_encode` wraps it as a blob field. - -## 3) Update cloudsync_changes schema and SRF/view -- Update `src/postgresql/cloudsync--1.0.sql`: - - `cloudsync_changes_srf` return type: change `col_value text` -> `col_value bytea`. - - Regenerate or update extension SQL if necessary for versioning. -- Update `build_union_sql()` in `src/postgresql/cloudsync_postgresql.c`: - - Replace the current `to_jsonb(...)`/`text` approach with encoded `bytea`. - - Use the PK decode predicate to fetch the base row and feed the value to the encoder. - - Keep the RLS/tombstone filtering logic consistent with SQLite semantics. -- Update any SQL constants in `src/postgresql/sql_postgresql.c` that target `cloudsync_changes` to treat `col_value` as `bytea`. - -## 4) Update INSERT trigger and payload encode path -- In `cloudsync_changes_insert_trg`: - - Accept `col_value` as `bytea` (already encoded). - - Avoid casting to text or re-encoding. - - Ensure typed `dbvalue_t` construction uses the encoded bytes (or passes through unchanged). -- In `cloudsync_payload_encode`/aggregate path: - - If it currently expects a text value, adjust to consume encoded `bytea`. - - Confirm the encoded bytes are fed to `pk_encode` (or the payload writer) exactly once. - -## 5) Tests and verification -- Add a PG unit or SQL smoke test that: - - Inserts rows with multiple types (text, integer, float, bytea, null). - - Queries `cloudsync_changes` and verifies `col_value` bytea can round-trip decode to the original value/type. - - Compares payload bytes against SQLite for identical input (if a cross-check harness exists). -- If no PG test harness exists, add a minimal SQL script in `test/` with manual steps and expected outcomes. - -## 6) Rollout notes and documentation -- Update `POSTGRESQL.md` or relevant docs to mention `col_value` is `bytea` and already cloudsync-encoded. -- Note any compatibility constraints for consumers expecting `text`. diff --git a/plans/POSTGRESQL_IMPLEMENTATION.md b/plans/POSTGRESQL_IMPLEMENTATION.md deleted file mode 100644 index becbcd5..0000000 --- a/plans/POSTGRESQL_IMPLEMENTATION.md +++ /dev/null @@ -1,583 +0,0 @@ -# PostgreSQL Implementation Plan - -## Goal -Refactor the codebase to separate multi-platform code from database-specific implementations, preparing for PostgreSQL extension development. - -## Directory Structure (Target) - -``` -src/ -├── cloudsync.c/h # Multi-platform CRDT core -├── pk.c/h # Multi-platform payload encoding -├── network.c/h # Multi-platform network layer -├── dbutils.c/h # Multi-platform database utilities -├── utils.c/h # Multi-platform utilities -├── lz4.c/h # Multi-platform compression -├── database.h # Database abstraction API -│ -├── sqlite/ # SQLite-specific implementations -│ ├── database_sqlite.c -│ ├── cloudsync_sqlite.c -│ ├── cloudsync_sqlite.h -│ ├── cloudsync_changes_sqlite.c/h # (renamed from vtab.c/h) -│ └── sql_sqlite.c # SQLite SQL constants -│ -└── postgresql/ # PostgreSQL-specific implementations - ├── database_postgresql.c # Database abstraction (✅ implemented) - ├── cloudsync_postgresql.c # Extension functions (✅ Phase 8) - └── cloudsync--1.0.sql # SQL installation script (✅ Phase 8) -``` - -## Implementation Steps - -### Phase 1: Directory Structure ✅ -- [x] Create src/sqlite/ directory -- [x] Create src/postgresql/ directory -- [x] Create docker/postgresql/ directory -- [x] Create docker/supabase/ directory -- [x] Create test/sqlite/ directory -- [x] Create test/postgresql/ directory - -### Phase 2: Move and Rename Files ✅ -- [x] Move src/database_sqlite.c → src/sqlite/ -- [x] Move src/cloudsync_sqlite.c → src/sqlite/ -- [x] Move src/cloudsync_sqlite.h → src/sqlite/ -- [x] Rename and move src/vtab.c → src/sqlite/cloudsync_changes_sqlite.c -- [x] Rename and move src/vtab.h → src/sqlite/cloudsync_changes_sqlite.h -- [x] Move src/database_postgresql.c → src/postgresql/ - -### Phase 3: Update Include Paths ✅ -- [x] Update includes in src/sqlite/database_sqlite.c -- [x] Update includes in src/sqlite/cloudsync_sqlite.c -- [x] Update includes in src/sqlite/cloudsync_changes_sqlite.c -- [x] Update includes in src/sqlite/cloudsync_sqlite.h -- [x] Update includes in src/postgresql/database_postgresql.c -- [x] Update includes in multi-platform files that reference vtab.h - -### Phase 4: Update Makefile ✅ -- [x] Update VPATH to include src/sqlite and src/postgresql -- [x] Update CFLAGS to include new directories -- [x] Update SRC_FILES to include files from subdirectories -- [x] Ensure test targets still work - -### Phase 5: Verification ✅ -- [x] Run `make clean` -- [x] Run `make` - verify build succeeds -- [x] Run `make test` - verify tests pass (all 50 tests passed) -- [x] Run `make unittest` - verify unit tests pass - -### Phase 6: Update Documentation ✅ -- [x] Update README.md to reflect new directory structure (no changes needed - user-facing) -- [x] Update AGENTS.md with new directory structure -- [x] Update CLAUDE.md with new directory structure -- [x] Update CODEX.md with new directory structure -- [x] Add directory structure section to AGENTS.md explaining src/sqlite/ vs src/postgresql/ separation - -### Phase 7: Docker Setup ✅ -- [x] Create docker/postgresql/Dockerfile -- [x] Create docker/postgresql/docker-compose.yml -- [x] Create docker/postgresql/init.sql -- [x] Create docker/postgresql/cloudsync.control -- [x] Create docker/supabase/docker-compose.yml -- [x] Create docker/README.md - -### Phase 8: PostgreSQL Extension SQL Functions ✅ -- [x] Create src/postgresql/cloudsync_postgresql.c -- [x] Create src/postgresql/cloudsync--1.0.sql -- [x] Implement basic structure and entry points (_PG_init, _PG_fini) -- [x] Implement initial public SQL functions (version, siteid, uuid, init, db_version) -- [x] Implement `pgvalue_t` wrapper for PostgreSQL `dbvalue_t` (Datum, Oid, typmod, collation, isnull, detoasted) -- [x] Update PostgreSQL `database_value_*`/`database_column_value` to consume `pgvalue_t` (type mapping, detoast, ownership) -- [x] Convert `PG_FUNCTION_ARGS`/SPI results into `pgvalue_t **argv` for payload/PK helpers (including variadic/anyarray) -- [ ] Implement remaining public SQL functions (enable, disable, set, alter, payload) -- [ ] Implement all private/internal SQL functions (is_sync, insert, update, seq, pk_*) -- [ ] Add PostgreSQL-specific Makefile targets -- [ ] Test extension loading and basic functions -- [ ] Align PostgreSQL `dbmem_*` with core expectations (use uint64_t, decide OOM semantics vs palloc ERROR, clarify dbmem_size=0) -- [ ] TODOs to fix `sql_postgresql.c` - -## Progress Log - -### [2025-12-17] Refactoring Complete ✅ - -Successfully refactored the codebase to separate multi-platform code from database-specific implementations: - -**Changes Made:** -1. Created new directory structure: - - `src/sqlite/` for SQLite-specific code - - `src/postgresql/` for PostgreSQL-specific code - - `docker/postgresql/` and `docker/supabase/` for future Docker configs - - `test/sqlite/` and `test/postgresql/` for database-specific tests - -2. Moved and renamed files: - - `src/database_sqlite.c` → `src/sqlite/database_sqlite.c` - - `src/cloudsync_sqlite.c` → `src/sqlite/cloudsync_sqlite.c` - - `src/cloudsync_sqlite.h` → `src/sqlite/cloudsync_sqlite.h` - - `src/vtab.c` → `src/sqlite/cloudsync_changes_sqlite.c` (renamed) - - `src/vtab.h` → `src/sqlite/cloudsync_changes_sqlite.h` (renamed) - - `src/database_postgresql.c` → `src/postgresql/database_postgresql.c` - -3. Updated all include paths in moved files to use relative paths (`../`) - -4. Updated Makefile: - - Added `SQLITE_IMPL_DIR` and `POSTGRES_IMPL_DIR` variables - - Updated `VPATH` to include new subdirectories - - Updated `CFLAGS` to include subdirectories in include path - - Split `SRC_FILES` into `CORE_SRC` (multi-platform) and `SQLITE_SRC` (SQLite-specific) - - Updated `COV_FILES` to exclude files from correct paths - -5. Verification: - - Build succeeds: `make` ✅ - - All 50 tests pass: `make test` ✅ - - Unit tests pass: `make unittest` ✅ - -**Git History Preserved:** -All file moves were done using `git mv` to preserve commit history. - -**Next Steps:** -- Phase 6: Implement Docker setup for PostgreSQL development -- Begin implementing PostgreSQL extension (`database_postgresql.c`) - -### [2025-12-17] Documentation Updated ✅ - -Updated all repository documentation to reflect the new directory structure: - -**AGENTS.md:** -- Added new "Directory Structure" section with full layout -- Updated all file path references (vtab.c → cloudsync_changes_sqlite.c, etc.) -- Updated architecture diagram with new paths -- Changed references from "stub" to proper implementation paths -- Updated SQL statement documentation with new directory structure - -**CLAUDE.md:** -- Updated SQL function development workflow paths -- Updated PostgreSQL Extension Agent section with new paths -- Removed "stub" references, documented as implementation directories - -**CODEX.md:** -- Updated SQL Function/File Pointers section with new paths -- Updated database abstraction references - -**README.md:** -- No changes needed (user-facing documentation, no source file references) - -All documentation now consistently reflects the separation of multi-platform code (src/) from database-specific implementations (src/sqlite/, src/postgresql/). - -### [2025-12-17] Additional File Moved ✅ - -**Moved sql_sqlite.c:** -- `src/sql_sqlite.c` → `src/sqlite/sql_sqlite.c` -- Updated include path from `#include "sql.h"` to `#include "../sql.h"` -- Updated Makefile COV_FILES filter to use new path -- `src/sql.h` remains in shared code (declares SQL constants interface) -- Build verified successful, all tests pass - -The SQL constants are now properly organized: -- `src/sql.h` - Interface (declares extern constants) -- `src/sqlite/sql_sqlite.c` - SQLite implementation (defines constants) -- Future: `src/postgresql/sql_postgresql.c` can provide PostgreSQL-specific SQL - -### [2025-12-17] PostgreSQL Database Implementation Complete ✅ - -**Implemented src/postgresql/database_postgresql.c:** - -Created a comprehensive PostgreSQL implementation of the database abstraction layer (1440 lines): - -**Architecture:** -- Uses PostgreSQL Server Programming Interface (SPI) API -- Implements deferred prepared statement pattern (prepare on first step after all bindings) -- Converts SQLite-style `?` placeholders to PostgreSQL-style `$1, $2, ...` -- Uses `pg_stmt_wrapper_t` struct to buffer parameters before execution -- Proper error handling with PostgreSQL PG_TRY/CATCH blocks -- Memory management using PostgreSQL's palloc/pfree - -**Implemented Functions:** -- **General**: `database_exec()`, `database_exec_callback()`, `database_write()` -- **Select helpers**: `database_select_int()`, `database_select_text()`, `database_select_blob()`, `database_select_blob_2int()` -- **Status**: `database_errcode()`, `database_errmsg()`, `database_in_transaction()`, `database_table_exists()`, `database_trigger_exists()` -- **Schema info**: `database_count_pk()`, `database_count_nonpk()`, `database_count_int_pk()`, `database_count_notnull_without_default()` -- **Metadata**: `database_create_metatable()` -- **Schema versioning**: `database_schema_version()`, `database_schema_hash()`, `database_check_schema_hash()`, `database_update_schema_hash()` -- **Prepared statements (VM)**: `database_prepare()`, `databasevm_step()`, `databasevm_finalize()`, `databasevm_reset()`, `databasevm_clear_bindings()` -- **Binding**: `databasevm_bind_int()`, `databasevm_bind_double()`, `databasevm_bind_text()`, `databasevm_bind_blob()`, `databasevm_bind_null()`, `databasevm_bind_value()` -- **Column access**: `database_column_int()`, `database_column_double()`, `database_column_text()`, `database_column_blob()`, `database_column_value()`, `database_column_bytes()`, `database_column_type()` -- **Value access**: `database_value_int()`, `database_value_double()`, `database_value_text()`, `database_value_blob()`, `database_value_bytes()`, `database_value_type()`, `database_value_dup()`, `database_value_free()` -- **Primary keys**: `database_pk_rowid()`, `database_pk_names()` -- **Savepoints**: `database_begin_savepoint()`, `database_commit_savepoint()`, `database_rollback_savepoint()` -- **Memory**: `dbmem_alloc()`, `dbmem_zeroalloc()`, `dbmem_realloc()`, `dbmem_mprintf()`, `dbmem_vmprintf()`, `dbmem_free()`, `dbmem_size()` -- **Result functions**: `database_result_*()` (placeholder implementations with elog(WARNING)) -- **SQL utilities**: `sql_build_drop_table()`, `sql_escape_name()` - -**Trigger Functions (Placeholder):** -- `database_create_insert_trigger()` -- `database_create_update_trigger_gos()` -- `database_create_update_trigger()` -- `database_create_delete_trigger_gos()` -- `database_create_delete_trigger()` -- `database_create_triggers()` -- `database_delete_triggers()` - -All trigger functions currently use `elog(WARNING, "not yet implemented for PostgreSQL")` and return DBRES_OK. Full implementation requires creating PL/pgSQL trigger functions. - -**Key Technical Details:** -- Uses PostgreSQL information_schema for schema introspection -- CommandCounterIncrement() and snapshot management for read-after-write consistency -- BeginInternalSubTransaction() for savepoint support -- Deferred SPI_prepare pattern to handle dynamic parameter types -- Proper Datum type conversion between C types and PostgreSQL types - -**Implementation Source:** -- Based on reference implementation from `/Users/andrea/Documents/GitHub/SQLiteAI/sqlite-sync-v2.1/postgresql/src/pg_adapter.c` -- Follows same structure and coding style as `src/sqlite/database_sqlite.c` -- Maintains same MARK comments and function organization - -**Status:** -- ✅ All database abstraction API functions implemented -- ✅ Proper error handling and memory management -- ✅ Schema introspection and versioning -- ⏳ Trigger functions need full PL/pgSQL implementation -- ⏳ Needs compilation testing with PostgreSQL headers -- ⏳ Needs integration testing with cloudsync core - -### [2025-12-18] Docker Setup Complete ✅ - -**Created Docker Development Environment:** - -Implemented complete Docker setup for PostgreSQL development and testing: - -**Standalone PostgreSQL Setup:** -- `docker/postgresql/Dockerfile` - Custom PostgreSQL 16 image with CloudSync extension support -- `docker/postgresql/docker-compose.yml` - Orchestration with PostgreSQL and optional pgAdmin -- `docker/postgresql/init.sql` - CloudSync metadata tables initialization -- `docker/postgresql/cloudsync.control` - PostgreSQL extension control file - -**Supabase Integration:** -- `docker/supabase/docker-compose.yml` - Override configuration for official Supabase stack -- Uses custom image `sqliteai/sqlite-sync-pg:latest` with CloudSync extension -- Integrates with all Supabase services (auth, realtime, storage, etc.) - -**Documentation:** -- `docker/README.md` - Comprehensive guide covering: - - Quick start for standalone PostgreSQL - - Supabase integration setup - - Development workflow - - Building and installing extension - - Troubleshooting common issues - - Environment variables and customization - -**Key Features:** -- Volume mounting for live source code development -- Persistent database storage -- Health checks for container orchestration -- Optional pgAdmin web UI for database management -- Support for both standalone and Supabase deployments - -**Next Steps:** -- Build the Docker image: `docker build -t sqliteai/sqlite-sync-pg:latest` -- Implement PostgreSQL extension entry point and SQL function bindings -- Create Makefile targets for PostgreSQL compilation -- Add PostgreSQL-specific trigger implementations - -## Phase 8: PostgreSQL Extension SQL Functions ✅ (Mostly Complete) - -**Goal:** Implement PostgreSQL extension entry point (`cloudsync_postgresql.c`) that exposes all CloudSync SQL functions. - -### Files Created - -- ✅ `src/postgresql/cloudsync_postgresql.c` - PostgreSQL extension implementation (19/27 functions fully implemented) -- ✅ `src/postgresql/cloudsync--1.0.sql` - SQL installation script - -### SQL Functions to Implement - -**Public Functions:** -- ✅ `cloudsync_version()` - Returns extension version -- ✅ `cloudsync_init(table_name, [algo], [skip_int_pk_check])` - Initialize table for sync (1-3 arg variants) -- ✅ `cloudsync_enable(table_name)` - Enable sync for table -- ✅ `cloudsync_disable(table_name)` - Disable sync for table -- ✅ `cloudsync_is_enabled(table_name)` - Check if table is sync-enabled -- ✅ `cloudsync_cleanup(table_name)` - Cleanup orphaned metadata -- ✅ `cloudsync_terminate()` - Terminate CloudSync -- ✅ `cloudsync_set(key, value)` - Set global setting -- ✅ `cloudsync_set_table(table, key, value)` - Set table setting -- ✅ `cloudsync_set_column(table, column, key, value)` - Set column setting -- ✅ `cloudsync_siteid()` - Get site identifier (UUID) -- ✅ `cloudsync_db_version()` - Get current database version -- ✅ `cloudsync_db_version_next([version])` - Get next version -- ✅ `cloudsync_begin_alter(table)` - Begin schema alteration -- ✅ `cloudsync_commit_alter(table)` - Commit schema alteration -- ✅ `cloudsync_uuid()` - Generate UUID -- ⚠️ `cloudsync_payload_encode()` - Aggregate: encode changes to payload (partial - needs variadic args) -- ✅ `cloudsync_payload_decode(payload)` - Apply payload to database -- ✅ `cloudsync_payload_apply(payload)` - Alias for decode - -**Private/Internal Functions:** -- ✅ `cloudsync_is_sync(table)` - Check if table has metadata -- ✅ `cloudsync_insert(table, pk_values...)` - Internal insert handler (uses pgvalue_t from anyarray) -- ⚠️ `cloudsync_update(table, pk, new_value)` - Aggregate: track updates (stub - complex aggregate) -- ✅ `cloudsync_seq()` - Get sequence number -- ✅ `cloudsync_pk_encode(pk_values...)` - Encode primary key (uses pgvalue_t from anyarray) -- ⚠️ `cloudsync_pk_decode(encoded_pk, index)` - Decode primary key component (stub - needs callback) - -**Note:** Standardize PostgreSQL `dbvalue_t` as `pgvalue_t` (`Datum + Oid + typmod + collation + isnull + detoasted flag`) so value/type helpers can resolve type/length/ownership without relying on `fcinfo` lifetime; payload/PK helpers should consume arrays of these wrappers (built from `PG_FUNCTION_ARGS` and SPI tuples). Implemented in `src/postgresql/pgvalue.c/.h` and used by value/column accessors and PK/payload builders. - -### Implementation Strategy - -1. **Create Extension Entry Point** (`_PG_init`) - ```c - void _PG_init(void); - void _PG_fini(void); - ``` - -2. **Register Functions** using PostgreSQL's function manager - ```c - PG_FUNCTION_INFO_V1(cloudsync_version); - Datum cloudsync_version(PG_FUNCTION_ARGS); - ``` - -3. **Context Management** - - Create `cloudsync_postgresql_context` structure - - Store in PostgreSQL's transaction-local storage - - Cleanup on transaction end - -4. **Aggregate Functions** - - Implement state transition and finalization functions - - Use PostgreSQL's aggregate framework - -5. **SQL Installation Script** - - Create `cloudsync--1.0.sql` with `CREATE FUNCTION` statements - - Define function signatures and link to C implementations - -### Testing Approach - -1. Build extension in Docker container -2. Load extension: `CREATE EXTENSION cloudsync;` -3. Test each function individually -4. Verify behavior matches SQLite implementation -5. Run integration tests with CRDT core logic - -### Reference Implementation - -- Study: `src/sqlite/cloudsync_sqlite.c` (SQLite version) -- Adapt to PostgreSQL SPI and function framework -- Reuse core logic from `src/cloudsync.c` (database-agnostic) - -## Progress Log (Continued) - -### [2025-12-19] Phase 8 Implementation - Major Progress ✅ - -Implemented most CloudSync SQL functions for PostgreSQL extension: - -**Changes Made:** - -1. **Removed unnecessary helper function:** - - Deleted `dbsync_set_error()` helper function - - Replaced with direct `ereport(ERROR, (errmsg(...)))` calls - - PostgreSQL's `errmsg()` already supports format strings, unlike SQLite - -2. **Fixed cloudsync_init API:** - - **CRITICAL FIX**: Previous implementation used wrong signature `(site_id, url, key)` - - Corrected to match SQLite API: `(table_name, [algo], [skip_int_pk_check])` - - Created `cloudsync_init_internal()` helper that replicates `dbsync_init` logic from SQLite - - Implemented single variadic `cloudsync_init()` function supporting 1-3 arguments with defaults - - Updated SQL installation script to create 3 function overloads pointing to same C function - - Returns site_id as TEXT (matches SQLite behavior) - -3. **Implemented 19 of 27 SQL functions:** - - ✅ All public configuration functions (enable, disable, set, set_table, set_column) - - ✅ All schema alteration functions (begin_alter, commit_alter) - - ✅ All version/metadata functions (version, siteid, uuid, db_version, db_version_next, seq) - - ✅ Cleanup and termination functions - - ✅ Payload decode/apply functions - - ✅ Private is_sync function - -4. **Partially implemented complex aggregate functions:** - - ⚠️ `cloudsync_payload_encode_transfn/finalfn` - Basic structure in place, needs variadic arg conversion - - ⚠️ `cloudsync_update_transfn/finalfn` - Stubs created - - ⚠️ `cloudsync_insert` - Stub (requires variadic PK handling) - - ⚠️ `cloudsync_pk_encode/decode` - Stubs (require anyarray to dbvalue_t conversion) - -**Architecture Decisions:** - -- All functions use SPI_connect()/SPI_finish() pattern with PG_TRY/CATCH for proper error handling -- Context management uses global `pg_cloudsync_context` (per backend) -- Error reporting uses PostgreSQL's native `ereport()` with appropriate error codes -- Memory management uses PostgreSQL's palloc/pfree in aggregate contexts -- Follows same function organization and MARK comments as SQLite version - -**Status:** -- ✅ 19/27 functions fully implemented and ready for testing -- ⚠️ 5 functions have stubs requiring PostgreSQL-specific variadic argument handling -- ⚠️ 3 aggregate functions need completion (update transfn/finalfn, payload_encode transfn) -- ⏳ Needs compilation testing with PostgreSQL headers -- ⏳ Needs integration testing with cloudsync core - -## SQL Parity Review (PostgreSQL vs SQLite) - -Findings comparing `src/postgresql/sql_postgresql.c` to `src/sqlite/sql_sqlite.c`: -- Missing full DB version query composition: SQLite builds a UNION of all `*_cloudsync` tables plus `pre_alter_dbversion`; PostgreSQL has a two-step builder but no `pre_alter_dbversion` or execution glue. -- `SQL_DATA_VERSION`/`SQL_SCHEMA_VERSION` are TODO placeholders (`SELECT 1`), not equivalents to SQLite pragmas. -- `SQL_SITEID_GETSET_ROWID_BY_SITEID` returns `ctid` and lacks the upsert/rowid semantics of SQLite’s insert-or-update/RETURNING rowid. -- Row selection/build helpers (`*_BY_ROWID`, `*_BY_PK`) are reduced placeholders using `ctid` or simple string_agg; they do not mirror SQLite’s dynamic SQL with ordered PK clauses and column lists from `pragma_table_info`. -- Write helpers (`INSERT_ROWID_IGNORE`, `UPSERT_ROWID_AND_COL_BY_ROWID`, PK insert/upsert formats) diverge: SQLite uses `rowid` and conflict clauses; PostgreSQL variants use `%s` placeholders without full PK clause/param construction. -- Cloudsync metadata upserts differ: `SQL_CLOUDSYNC_UPSERT_COL_INIT_OR_BUMP_VERSION`/`_RAW_COLVERSION` use `EXCLUDED` logic not matching SQLite’s increment rules; PK tombstone/cleanup helpers are partial. -- Many format strings lack quoting/identifier escaping parity (`%w` behavior) and expect external code to supply WHERE clauses, making them incomplete compared to SQLite’s self-contained templates. - -TODOs to fix `sql_postgresql.c`: -- Recreate DB version query including `pre_alter_dbversion` union and execution wrapper. -- Implement PostgreSQL equivalents for data_version/schema_version. -- Align site_id getters/setters to return stable identifiers (no `ctid`) and mirror SQLite upsert-return semantics. -- Port the dynamic SQL builders for select/delete/insert/upsert by PK/non-PK to generate complete statements (including ordered PK clauses and binds), respecting identifier quoting. -- Align cloudsync metadata updates/upserts/tombstoning to SQLite logic (version bump rules, ON CONFLICT behavior, seq/db_version handling). -- Ensure all format strings include proper identifier quoting and do not rely on external WHERE fragments unless explicitly designed that way. - -**Next Steps:** -- Implement PostgreSQL anyarray handling for variadic functions (pk_encode, pk_decode, insert) -- Complete aggregate function implementations (update, payload_encode) -- Add PostgreSQL-specific Makefile targets -- Build and test extension in Docker container - -### [2025-12-19] Implemented cloudsync_insert ✅ - -Completed the `cloudsync_insert` function using the new `pgvalue_t` infrastructure: - -**Implementation Details:** - -1. **Signature**: `cloudsync_insert(table_name text, VARIADIC pk_values anyarray)` - - Uses PostgreSQL's VARIADIC to accept variable number of PK values - - Converts anyarray to `pgvalue_t **` using `pgvalues_from_array()` - -2. **Key Features**: - - Validates table exists and PK count matches expected - - Encodes PK values using `pk_encode_prikey()` with stack buffer (1024 bytes) - - Handles sentinel records for PK-only tables - - Marks all non-PK columns as inserted in metadata - - Proper memory management: frees `pgvalue_t` wrappers after use - -3. **Error Handling**: - - Comprehensive cleanup in both success and error paths - - Uses `goto cleanup` pattern for centralized resource management - - Wraps in `PG_TRY/CATCH` for PostgreSQL exception safety - - Cleans up resources before re-throwing exceptions - -4. **Follows SQLite Logic**: - - Matches `dbsync_insert` behavior from `src/sqlite/cloudsync_sqlite.c` - - Same sequence: encode PK → get next version → check existence → mark metadata - - Handles both new inserts and updates to previously deleted rows - -**Status**: -- ✅ `cloudsync_insert` fully implemented -- ✅ `cloudsync_pk_encode` already implemented (was done in previous work) -- ✅ `cloudsync_payload_encode_transfn` already implemented (uses pgvalues_from_args) -- ⚠️ `cloudsync_pk_decode` still needs callback implementation -- ⚠️ `cloudsync_update_*` aggregate functions still need implementation - -**Function Count Update**: 21/27 functions (78%) now fully implemented - -### [2025-12-19] PostgreSQL Makefile Targets Complete ✅ - -Implemented comprehensive Makefile infrastructure for PostgreSQL extension development: - -**Files Created/Modified:** - -1. **`docker/Makefile.postgresql`** - New PostgreSQL-specific Makefile with all build targets: - - Build targets: `postgres-check`, `postgres-build`, `postgres-install`, `postgres-clean`, `postgres-test` - - Docker targets: `postgres-docker-build`, `postgres-docker-run`, `postgres-docker-stop`, `postgres-docker-rebuild`, `postgres-docker-shell` - - Development targets: `postgres-dev-rebuild` (fast rebuild in running container) - - Help target: `postgres-help` - -2. **Root `Makefile`** - Updated to include PostgreSQL targets: - - Added `include docker/Makefile.postgresql` statement - - Added PostgreSQL help reference to main help output - - All targets accessible from root: `make postgres-*` - -3. **`docker/postgresql/Dockerfile`** - Updated to use new Makefile targets: - - Uses `make postgres-build` and `make postgres-install` - - Verifies installation with file checks - - Adds version labels - - Keeps source mounted for development - -4. **`docker/postgresql/docker-compose.yml`** - Enhanced volume mounts: - - Mounts `docker/` directory for Makefile.postgresql access - - Enables quick rebuilds without image rebuild - -5. **`docker/README.md`** - Updated documentation: - - Simplified quick start using new Makefile targets - - Updated development workflow section - - Added fast rebuild instructions - -6. **`POSTGRESQL.md`** - New comprehensive quick reference guide: - - All Makefile targets documented - - Development workflow examples - - Extension function reference - - Connection details and troubleshooting - -**Key Features:** - -- **Single Entry Point**: All PostgreSQL targets accessible via `make postgres-*` from root -- **Pre-built Image**: `make postgres-docker-build` creates image with extension pre-installed -- **Fast Development**: `make postgres-dev-rebuild` rebuilds extension in <5 seconds without restarting container -- **Clean Separation**: PostgreSQL logic isolated in `docker/Makefile.postgresql`, included by root Makefile -- **Docker-First**: Optimized for containerized development with source mounting - -**Usage Examples:** - -```bash -# Build Docker image with CloudSync extension -make postgres-docker-build - -# Start PostgreSQL container -make postgres-docker-run - -# Test extension -docker exec -it cloudsync-postgres psql -U postgres -d cloudsync_test \ - -c "CREATE EXTENSION cloudsync; SELECT cloudsync_version();" - -# Make code changes, then quick rebuild -make postgres-dev-rebuild -``` - -**Status:** -- ✅ All Makefile targets implemented and tested -- ✅ Dockerfile optimized for build and development -- ✅ Documentation complete (README + POSTGRESQL.md) -- ⏳ Ready for first build and compilation test -- ⏳ Needs actual PostgreSQL compilation verification - -**Next Steps:** -- Test actual compilation: `make postgres-docker-build` -- Fix any compilation errors -- Test extension loading: `CREATE EXTENSION cloudsync` -- Complete remaining aggregate functions - -### [2025-12-20] PostgreSQL Trigger + SPI Cleanup Work ✅ - -**Trigger functions implemented in `src/postgresql/database_postgresql.c`:** -- `database_create_insert_trigger` implemented with per-table PL/pgSQL function and trigger. -- `database_create_update_trigger_gos`/`database_create_delete_trigger_gos` implemented (BEFORE triggers, raise on update/delete when enabled). -- `database_create_update_trigger` implemented with VALUES list + `cloudsync_update` aggregate call. -- `database_create_delete_trigger` implemented to call `cloudsync_delete`. -- `database_create_triggers` wired to create insert/update/delete triggers based on algo. -- `database_delete_triggers` updated to drop insert/update/delete triggers and their functions. - -**PostgreSQL SQL registration updates:** -- Added `cloudsync_delete` to `src/postgresql/cloudsync--1.0.sql`. - -**Internal function updates:** -- Implemented `cloudsync_delete` C function (mirrors SQLite delete path). -- `cloudsync_insert`/`cloudsync_delete` now lazily load table context when missing. -- Refactored `cloudsync_insert`/`cloudsync_delete` to use `PG_ENSURE_ERROR_CLEANUP` and shared cleanup helper. - -**SPI execution fixes:** -- `databasevm_step` now uses `SPI_is_cursor_plan` before opening a portal to avoid “cannot open INSERT query as cursor”. -- Persistent statements now allocate their memory contexts under `TopMemoryContext`. - -**Error formatting:** -- `cloudsync_set_error` now avoids `snprintf` aliasing when `database_errmsg` points at `data->errmsg`. - -**Smoke test updates:** -- `docker/postgresql/smoke_test.sql` now validates insert/delete metadata, tombstones, and site_id fields. -- Test output uses `\echo` markers for each check. - -**Documentation updates:** -- Added PostgreSQL SPI patterns to `AGENTS.md`. -- Updated Database Abstraction Layer section in `AGENTS.md` to match `database.h`. diff --git a/plans/TODO.md b/plans/TODO.md deleted file mode 100644 index 7b5607a..0000000 --- a/plans/TODO.md +++ /dev/null @@ -1,79 +0,0 @@ -# SQLite vs PostgreSQL Parity Matrix - -This matrix compares SQLite extension features against the PostgreSQL extension and validates the TODO list in `POSTGRESQL.md`. - -## Doc TODO validation (POSTGRESQL.md) - -- `pk_decode`: Implemented in PostgreSQL (`cloudsync_pk_decode`). -- `cloudsync_update` aggregate: Implemented (`cloudsync_update_transfn/finalfn` + aggregate). -- `payload_encode` variadic support: Aggregate `cloudsync_payload_encode(*)` is implemented; no missing symbol, but parity tests are still lacking. - -## Parity matrix - -Legend: **Yes** = implemented, **Partial** = implemented with parity gaps/TODOs, **No** = missing. - -### Core + configuration - -| Feature / API | SQLite | PostgreSQL | Status | Notes | -| --- | --- | --- | --- | --- | -| cloudsync_version | Yes | Yes | Yes | | -| cloudsync_siteid | Yes | Yes | Yes | | -| cloudsync_uuid | Yes | Yes | Yes | | -| cloudsync_db_version | Yes | Yes | Yes | | -| cloudsync_db_version_next (0/1 args) | Yes | Yes | Yes | | -| cloudsync_seq | Yes | Yes | Yes | | -| cloudsync_init (1/2/3 args) | Yes | Yes | Yes | | -| cloudsync_enable / disable / is_enabled | Yes | Yes | Yes | | -| cloudsync_cleanup | Yes | Yes | Yes | | -| cloudsync_terminate | Yes | Yes | Yes | | -| cloudsync_set / set_table / set_column | Yes | Yes | Yes | | -| cloudsync_begin_alter / commit_alter | Yes | Yes | Yes | | - -### Internal CRUD helpers - -| Feature / API | SQLite | PostgreSQL | Status | Notes | -| --- | --- | --- | --- | --- | -| cloudsync_is_sync | Yes | Yes | Yes | | -| cloudsync_insert (variadic) | Yes | Yes | Yes | | -| cloudsync_delete (variadic) | Yes | Yes | Yes | | -| cloudsync_update (aggregate) | Yes | Yes | Yes | PG needs parity tests. | -| cloudsync_pk_encode (variadic) | Yes | Yes | Yes | | -| cloudsync_pk_decode | Yes | Yes | Yes | | -| cloudsync_col_value | Yes | Yes | Yes | PG returns encoded bytea. | -| cloudsync_encode_value | No | Yes | No | PG-only helper. | - -### Payloads - -| Feature / API | SQLite | PostgreSQL | Status | Notes | -| --- | --- | --- | --- | --- | -| cloudsync_payload_encode (aggregate) | Yes | Yes | Yes | PG uses aggregate only; direct call is blocked. | -| cloudsync_payload_decode / apply | Yes | Yes | Yes | | -| cloudsync_payload_save | Yes | No | No | SQLite only. | -| cloudsync_payload_load | Yes | No | No | SQLite only. | - -### cloudsync_changes surface - -| Feature / API | SQLite | PostgreSQL | Status | Notes | -| --- | --- | --- | --- | --- | -| cloudsync_changes (queryable changes) | Yes (vtab) | Yes (view + SRF) | Yes | PG uses SRF + view + INSTEAD OF INSERT trigger. | -| cloudsync_changes INSERT support | Yes | Yes | Yes | PG uses trigger; ensure parity tests. | -| cloudsync_changes UPDATE/DELETE | No (not allowed) | No (not allowed) | Yes | | - -### Extras - -| Feature / API | SQLite | PostgreSQL | Status | Notes | -| --- | --- | --- | --- | --- | -| Network sync functions | Yes | No | No | SQLite registers network functions; PG has no network layer. | - -## PostgreSQL parity gaps (known TODOs in code) - -- Rowid-only table path uses `ctid` and is not parity with SQLite rowid semantics (`SQL_DELETE_ROW_BY_ROWID`, `SQL_UPSERT_ROWID_AND_COL_BY_ROWID`, `SQL_SELECT_COLS_BY_ROWID_FMT`). -- PK-only insert builder still marked as needing explicit PK handling (`SQL_INSERT_ROWID_IGNORE`). -- Metadata bump/merge rules have TODOs to align with SQLite (`SQL_CLOUDSYNC_UPDATE_COL_BUMP_VERSION`, `SQL_CLOUDSYNC_UPSERT_RAW_COLVERSION`, `SQL_CLOUDSYNC_INSERT_RETURN_CHANGE_ID`). -- Delete/tombstone helpers have TODOs to match SQLite (`SQL_CLOUDSYNC_DELETE_PK_EXCEPT_COL`, `SQL_CLOUDSYNC_DELETE_PK_EXCEPT_TOMBSTONE`, `SQL_CLOUDSYNC_GET_COL_VERSION_OR_ROW_EXISTS`, `SQL_CLOUDSYNC_SELECT_COL_VERSION`). - -## Suggested next steps - -- Add PG tests mirroring SQLite unit tests for `cloudsync_update`, `cloudsync_payload_encode`, and `cloudsync_changes`. -- Resolve `ctid`-based rowid TODOs by using PK-only SQL builders. -- Align metadata bump/delete semantics with SQLite in `sql_postgresql.c`. diff --git a/src/cloudsync.c b/src/cloudsync.c index 12c0e90..221d77e 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -84,6 +84,37 @@ typedef enum { #define SYNCBIT_SET(_data) _data->insync = 1 #define SYNCBIT_RESET(_data) _data->insync = 0 +// MARK: - Deferred column-batch merge - + +typedef struct { + const char *col_name; // pointer into table_context->col_name[idx] (stable) + dbvalue_t *col_value; // duplicated via database_value_dup (owned) + int64_t col_version; + int64_t db_version; + uint8_t site_id[UUID_LEN]; + int site_id_len; + int64_t seq; +} merge_pending_entry; + +typedef struct { + cloudsync_table_context *table; + char *pk; // malloc'd copy, freed on flush + int pk_len; + int64_t cl; + bool sentinel_pending; + bool row_exists; // true when the PK already exists locally + int count; + int capacity; + merge_pending_entry *entries; + + // Statement cache — reuse the prepared statement when the column + // combination and row_exists flag match between consecutive PK flushes. + dbvm_t *cached_vm; + bool cached_row_exists; + int cached_col_count; + const char **cached_col_names; // array of pointers into table_context (not owned) +} merge_pending_batch; + // MARK: - struct cloudsync_pk_decode_bind_context { @@ -142,6 +173,9 @@ struct cloudsync_context { int tables_cap; // capacity int skip_decode_idx; // -1 in sqlite, col_value index in postgresql + + // deferred column-batch merge (active during payload_apply) + merge_pending_batch *pending_batch; }; struct cloudsync_table_context { @@ -1203,6 +1237,241 @@ int merge_set_winner_clock (cloudsync_context *data, cloudsync_table_context *ta return rc; } +// MARK: - Deferred column-batch merge functions - + +static int merge_pending_add (cloudsync_context *data, cloudsync_table_context *table, const char *pk, int pklen, const char *col_name, dbvalue_t *col_value, int64_t col_version, int64_t db_version, const char *site_id, int site_len, int64_t seq) { + merge_pending_batch *batch = data->pending_batch; + + // Store table and PK on first entry + if (batch->table == NULL) { + batch->table = table; + batch->pk = (char *)cloudsync_memory_alloc(pklen); + if (!batch->pk) return cloudsync_set_error(data, "merge_pending_add: out of memory for pk", DBRES_NOMEM); + memcpy(batch->pk, pk, pklen); + batch->pk_len = pklen; + } + + // Ensure capacity + if (batch->count >= batch->capacity) { + int new_cap = batch->capacity ? batch->capacity * 2 : 8; + merge_pending_entry *new_entries = (merge_pending_entry *)cloudsync_memory_realloc(batch->entries, new_cap * sizeof(merge_pending_entry)); + if (!new_entries) return cloudsync_set_error(data, "merge_pending_add: out of memory for entries", DBRES_NOMEM); + batch->entries = new_entries; + batch->capacity = new_cap; + } + + // Resolve col_name to a stable pointer from the table context + // (the incoming col_name may point to VM-owned memory that gets freed on reset) + int col_idx = -1; + table_column_lookup(table, col_name, true, &col_idx); + const char *stable_col_name = (col_idx >= 0) ? table_colname(table, col_idx) : NULL; + if (!stable_col_name) return cloudsync_set_error(data, "merge_pending_add: column not found in table context", DBRES_ERROR); + + merge_pending_entry *e = &batch->entries[batch->count]; + e->col_name = stable_col_name; + e->col_value = col_value ? (dbvalue_t *)database_value_dup(col_value) : NULL; + e->col_version = col_version; + e->db_version = db_version; + e->site_id_len = (site_len <= (int)sizeof(e->site_id)) ? site_len : (int)sizeof(e->site_id); + memcpy(e->site_id, site_id, e->site_id_len); + e->seq = seq; + + batch->count++; + return DBRES_OK; +} + +static void merge_pending_free_entries (merge_pending_batch *batch) { + if (batch->entries) { + for (int i = 0; i < batch->count; i++) { + if (batch->entries[i].col_value) { + database_value_free(batch->entries[i].col_value); + batch->entries[i].col_value = NULL; + } + } + } + if (batch->pk) { + cloudsync_memory_free(batch->pk); + batch->pk = NULL; + } + batch->table = NULL; + batch->pk_len = 0; + batch->cl = 0; + batch->sentinel_pending = false; + batch->row_exists = false; + batch->count = 0; +} + +static int merge_flush_pending (cloudsync_context *data) { + merge_pending_batch *batch = data->pending_batch; + if (!batch) return DBRES_OK; + + int rc = DBRES_OK; + bool flush_savepoint = false; + + // Nothing to write — handle sentinel-only case or skip + if (batch->count == 0 && !(batch->sentinel_pending && batch->table)) { + goto cleanup; + } + + // Wrap database operations in a savepoint so that on failure (e.g. RLS + // denial) the rollback properly releases all executor resources (open + // relations, snapshots, plan cache) acquired during the failed statement. + flush_savepoint = (database_begin_savepoint(data, "merge_flush") == DBRES_OK); + + if (batch->count == 0) { + // Sentinel with no winning columns (PK-only row) + dbvm_t *vm = batch->table->real_merge_sentinel_stmt; + rc = pk_decode_prikey(batch->pk, (size_t)batch->pk_len, pk_decode_bind_callback, vm); + if (rc < 0) { + cloudsync_set_dberror(data); + dbvm_reset(vm); + goto cleanup; + } + SYNCBIT_SET(data); + rc = databasevm_step(vm); + dbvm_reset(vm); + SYNCBIT_RESET(data); + if (rc == DBRES_DONE) rc = DBRES_OK; + if (rc != DBRES_OK) { + cloudsync_set_dberror(data); + goto cleanup; + } + goto cleanup; + } + + // Check if cached prepared statement can be reused + cloudsync_table_context *table = batch->table; + dbvm_t *vm = NULL; + bool cache_hit = false; + + if (batch->cached_vm && + batch->cached_row_exists == batch->row_exists && + batch->cached_col_count == batch->count) { + cache_hit = true; + for (int i = 0; i < batch->count; i++) { + if (batch->cached_col_names[i] != batch->entries[i].col_name) { + cache_hit = false; + break; + } + } + } + + if (cache_hit) { + vm = batch->cached_vm; + dbvm_reset(vm); + } else { + // Invalidate old cache + if (batch->cached_vm) { + databasevm_finalize(batch->cached_vm); + batch->cached_vm = NULL; + } + + // Build multi-column SQL + const char **colnames = (const char **)cloudsync_memory_alloc(batch->count * sizeof(const char *)); + if (!colnames) { + rc = cloudsync_set_error(data, "merge_flush_pending: out of memory", DBRES_NOMEM); + goto cleanup; + } + for (int i = 0; i < batch->count; i++) { + colnames[i] = batch->entries[i].col_name; + } + + char *sql = batch->row_exists + ? sql_build_update_pk_and_multi_cols(data, table->name, colnames, batch->count, table->schema) + : sql_build_upsert_pk_and_multi_cols(data, table->name, colnames, batch->count, table->schema); + cloudsync_memory_free(colnames); + + if (!sql) { + rc = cloudsync_set_error(data, "merge_flush_pending: unable to build multi-column upsert SQL", DBRES_ERROR); + goto cleanup; + } + + rc = databasevm_prepare(data, sql, &vm, 0); + cloudsync_memory_free(sql); + if (rc != DBRES_OK) { + rc = cloudsync_set_error(data, "merge_flush_pending: unable to prepare statement", rc); + goto cleanup; + } + + // Update cache + batch->cached_vm = vm; + batch->cached_row_exists = batch->row_exists; + batch->cached_col_count = batch->count; + // Reallocate cached_col_names if needed + if (batch->cached_col_count > 0) { + const char **new_names = (const char **)cloudsync_memory_realloc( + batch->cached_col_names, batch->count * sizeof(const char *)); + if (new_names) { + for (int i = 0; i < batch->count; i++) { + new_names[i] = batch->entries[i].col_name; + } + batch->cached_col_names = new_names; + } + } + } + + // Bind PKs (positions 1..npks) + int npks = pk_decode_prikey(batch->pk, (size_t)batch->pk_len, pk_decode_bind_callback, vm); + if (npks < 0) { + cloudsync_set_dberror(data); + dbvm_reset(vm); + rc = DBRES_ERROR; + goto cleanup; + } + + // Bind column values (positions npks+1..npks+count) + for (int i = 0; i < batch->count; i++) { + merge_pending_entry *e = &batch->entries[i]; + int bind_idx = npks + 1 + i; + if (e->col_value) { + rc = databasevm_bind_value(vm, bind_idx, e->col_value); + } else { + rc = databasevm_bind_null(vm, bind_idx); + } + if (rc != DBRES_OK) { + cloudsync_set_dberror(data); + dbvm_reset(vm); + goto cleanup; + } + } + + // Execute with SYNCBIT and GOS handling + if (table->algo == table_algo_crdt_gos) table->enabled = 0; + SYNCBIT_SET(data); + rc = databasevm_step(vm); + dbvm_reset(vm); + SYNCBIT_RESET(data); + if (table->algo == table_algo_crdt_gos) table->enabled = 1; + + if (rc != DBRES_DONE) { + cloudsync_set_dberror(data); + goto cleanup; + } + rc = DBRES_OK; + + // Call merge_set_winner_clock for each buffered entry + int64_t rowid = 0; + for (int i = 0; i < batch->count; i++) { + merge_pending_entry *e = &batch->entries[i]; + int clock_rc = merge_set_winner_clock(data, table, batch->pk, batch->pk_len, + e->col_name, e->col_version, e->db_version, + (const char *)e->site_id, e->site_id_len, + e->seq, &rowid); + if (clock_rc != DBRES_OK) { + rc = clock_rc; + goto cleanup; + } + } + +cleanup: + merge_pending_free_entries(batch); + if (flush_savepoint) { + if (rc == DBRES_OK) database_commit_savepoint(data, "merge_flush"); + else database_rollback_savepoint(data, "merge_flush"); + } + return rc; +} + int merge_insert_col (cloudsync_context *data, cloudsync_table_context *table, const char *pk, int pklen, const char *col_name, dbvalue_t *col_value, int64_t col_version, int64_t db_version, const char *site_id, int site_len, int64_t seq, int64_t *rowid) { int index; dbvm_t *vm = table_column_lookup(table, col_name, true, &index); @@ -1408,33 +1677,46 @@ int merge_did_cid_win (cloudsync_context *data, cloudsync_table_context *table, } int merge_sentinel_only_insert (cloudsync_context *data, cloudsync_table_context *table, const char *pk, int pklen, int64_t cl, int64_t db_version, const char *site_id, int site_len, int64_t seq, int64_t *rowid) { - + // reset return value *rowid = 0; - - // bind pk - dbvm_t *vm = table->real_merge_sentinel_stmt; - int rc = pk_decode_prikey((char *)pk, (size_t)pklen, pk_decode_bind_callback, vm); - if (rc < 0) { - rc = cloudsync_set_dberror(data); + + if (data->pending_batch == NULL) { + // Immediate mode: execute base table INSERT + dbvm_t *vm = table->real_merge_sentinel_stmt; + int rc = pk_decode_prikey((char *)pk, (size_t)pklen, pk_decode_bind_callback, vm); + if (rc < 0) { + rc = cloudsync_set_dberror(data); + dbvm_reset(vm); + return rc; + } + + SYNCBIT_SET(data); + rc = databasevm_step(vm); dbvm_reset(vm); - return rc; - } - - // perform real operation and disable triggers - SYNCBIT_SET(data); - rc = databasevm_step(vm); - dbvm_reset(vm); - SYNCBIT_RESET(data); - if (rc == DBRES_DONE) rc = DBRES_OK; - if (rc != DBRES_OK) { - cloudsync_set_dberror(data); - return rc; + SYNCBIT_RESET(data); + if (rc == DBRES_DONE) rc = DBRES_OK; + if (rc != DBRES_OK) { + cloudsync_set_dberror(data); + return rc; + } + } else { + // Batch mode: skip base table INSERT, the batch flush will create the row + merge_pending_batch *batch = data->pending_batch; + batch->sentinel_pending = true; + if (batch->table == NULL) { + batch->table = table; + batch->pk = (char *)cloudsync_memory_alloc(pklen); + if (!batch->pk) return cloudsync_set_error(data, "merge_sentinel_only_insert: out of memory for pk", DBRES_NOMEM); + memcpy(batch->pk, pk, pklen); + batch->pk_len = pklen; + } } - - rc = merge_zeroclock_on_resurrect(table, db_version, pk, pklen); + + // Metadata operations always execute regardless of batch mode + int rc = merge_zeroclock_on_resurrect(table, db_version, pk, pklen); if (rc != DBRES_OK) return rc; - + return merge_set_winner_clock(data, table, pk, pklen, NULL, cl, db_version, site_id, site_len, seq, rowid); } @@ -1507,9 +1789,20 @@ int merge_insert (cloudsync_context *data, cloudsync_table_context *table, const if (!does_cid_win) return DBRES_OK; // perform the final column insert or update if the incoming change wins - rc = merge_insert_col(data, table, insert_pk, insert_pk_len, insert_name, insert_value, insert_col_version, insert_db_version, insert_site_id, insert_site_id_len, insert_seq, rowid); - if (rc != DBRES_OK) cloudsync_set_error(data, "Unable to perform merge_insert_col", rc); - + if (data->pending_batch) { + // Propagate row_exists_locally to the batch on the first winning column. + // This lets merge_flush_pending choose UPDATE vs INSERT ON CONFLICT, + // which matters when RLS policies reference columns not in the payload. + if (data->pending_batch->table == NULL) { + data->pending_batch->row_exists = row_exists_locally; + } + rc = merge_pending_add(data, table, insert_pk, insert_pk_len, insert_name, insert_value, insert_col_version, insert_db_version, insert_site_id, insert_site_id_len, insert_seq); + if (rc != DBRES_OK) cloudsync_set_error(data, "Unable to perform merge_pending_add", rc); + } else { + rc = merge_insert_col(data, table, insert_pk, insert_pk_len, insert_name, insert_value, insert_col_version, insert_db_version, insert_site_id, insert_site_id_len, insert_seq, rowid); + if (rc != DBRES_OK) cloudsync_set_error(data, "Unable to perform merge_insert_col", rc); + } + return rc; } @@ -2431,78 +2724,108 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b uint16_t ncols = header.ncols; uint32_t nrows = header.nrows; int64_t last_payload_db_version = -1; - bool in_savepoint = false; int dbversion = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION); int seq = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_SEQ); cloudsync_pk_decode_bind_context decoded_context = {.vm = vm}; - void *payload_apply_xdata = NULL; - void *db = data->db; - cloudsync_payload_apply_callback_t payload_apply_callback = cloudsync_get_payload_apply_callback(db); - + + // Initialize deferred column-batch merge + merge_pending_batch batch = {0}; + data->pending_batch = &batch; + bool in_savepoint = false; + const void *last_pk = NULL; + int64_t last_pk_len = 0; + const char *last_tbl = NULL; + int64_t last_tbl_len = 0; + for (uint32_t i=0; iskip_decode_idx, cloudsync_payload_decode_callback, &decoded_context); if (res == -1) { + merge_flush_pending(data); + data->pending_batch = NULL; + if (batch.cached_vm) { databasevm_finalize(batch.cached_vm); batch.cached_vm = NULL; } + if (batch.cached_col_names) { cloudsync_memory_free(batch.cached_col_names); batch.cached_col_names = NULL; } + if (batch.entries) { cloudsync_memory_free(batch.entries); batch.entries = NULL; } if (in_savepoint) database_rollback_savepoint(data, "cloudsync_payload_apply"); rc = DBRES_ERROR; goto cleanup; } - // n is the pk_decode return value, I don't think I should assert here because in any case the next databasevm_step would fail - // assert(n == ncols); - - bool approved = true; - if (payload_apply_callback) approved = payload_apply_callback(&payload_apply_xdata, &decoded_context, db, data, CLOUDSYNC_PAYLOAD_APPLY_WILL_APPLY, DBRES_OK); - - // Apply consecutive rows with the same db_version inside a transaction if no - // transaction has already been opened. - // The user may have already opened a transaction before applying the payload, - // and the `payload_apply_callback` may have already opened a savepoint. - // Nested savepoints work, but overlapping savepoints could alter the expected behavior. - // This savepoint ensures that the db_version value remains consistent for all - // rows with the same original db_version in the payload. + // Detect PK/table/db_version boundary to flush pending batch + bool pk_changed = (last_pk != NULL && + (last_pk_len != decoded_context.pk_len || + memcmp(last_pk, decoded_context.pk, last_pk_len) != 0)); + bool tbl_changed = (last_tbl != NULL && + (last_tbl_len != decoded_context.tbl_len || + memcmp(last_tbl, decoded_context.tbl, last_tbl_len) != 0)); bool db_version_changed = (last_payload_db_version != decoded_context.db_version); - // Release existing savepoint if db_version changed + // Flush pending batch before any boundary change + if (pk_changed || tbl_changed || db_version_changed) { + int flush_rc = merge_flush_pending(data); + if (flush_rc != DBRES_OK) { + rc = flush_rc; + // continue processing remaining rows + } + } + + // Per-db_version savepoints group rows with the same source db_version + // into one transaction. In SQLite autocommit mode, the RELEASE triggers + // the commit hook which bumps data->db_version and resets seq, ensuring + // unique (db_version, seq) tuples across groups. In PostgreSQL SPI, + // database_in_transaction() is always true so this block is inactive — + // the inner per-PK savepoint in merge_flush_pending handles RLS instead. if (in_savepoint && db_version_changed) { rc = database_commit_savepoint(data, "cloudsync_payload_apply"); if (rc != DBRES_OK) { + merge_pending_free_entries(&batch); + data->pending_batch = NULL; cloudsync_set_error(data, "Error on cloudsync_payload_apply: unable to release a savepoint", rc); goto cleanup; } in_savepoint = false; } - // Start new savepoint if needed - bool in_transaction = database_in_transaction(data); - if (!in_transaction && db_version_changed) { + if (!in_savepoint && db_version_changed && !database_in_transaction(data)) { rc = database_begin_savepoint(data, "cloudsync_payload_apply"); if (rc != DBRES_OK) { + merge_pending_free_entries(&batch); + data->pending_batch = NULL; cloudsync_set_error(data, "Error on cloudsync_payload_apply: unable to start a transaction", rc); goto cleanup; } - last_payload_db_version = decoded_context.db_version; in_savepoint = true; } - - if (approved) { - rc = databasevm_step(vm); - if (rc != DBRES_DONE) { - // don't "break;", the error can be due to a RLS policy. - // in case of error we try to apply the following changes - // DEBUG_ALWAYS("cloudsync_payload_apply error on db_version %PRId64/%PRId64: (%d) %s\n", decoded_context.db_version, decoded_context.seq, rc, database_errmsg(data)); - } + + // Track db_version for batch-flush boundary detection + if (db_version_changed) { + last_payload_db_version = decoded_context.db_version; } - - if (payload_apply_callback) { - payload_apply_callback(&payload_apply_xdata, &decoded_context, db, data, CLOUDSYNC_PAYLOAD_APPLY_DID_APPLY, rc); + + // Update PK/table tracking + last_pk = decoded_context.pk; + last_pk_len = decoded_context.pk_len; + last_tbl = decoded_context.tbl; + last_tbl_len = decoded_context.tbl_len; + + rc = databasevm_step(vm); + if (rc != DBRES_DONE) { + // don't "break;", the error can be due to a RLS policy. + // in case of error we try to apply the following changes } - + buffer += seek; buf_len -= seek; dbvm_reset(vm); } - + + // Final flush after loop + { + int flush_rc = merge_flush_pending(data); + if (flush_rc != DBRES_OK && rc == DBRES_OK) rc = flush_rc; + } + data->pending_batch = NULL; + if (in_savepoint) { int rc1 = database_commit_savepoint(data, "cloudsync_payload_apply"); if (rc1 != DBRES_OK) rc = rc1; @@ -2512,10 +2835,6 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b if (rc != DBRES_OK && rc != DBRES_DONE) { cloudsync_set_dberror(data); } - - if (payload_apply_callback) { - payload_apply_callback(&payload_apply_xdata, &decoded_context, db, data, CLOUDSYNC_PAYLOAD_APPLY_CLEANUP, rc); - } if (rc == DBRES_DONE) rc = DBRES_OK; if (rc == DBRES_OK) { @@ -2532,15 +2851,20 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b } cleanup: + // cleanup merge_pending_batch + if (batch.cached_vm) { databasevm_finalize(batch.cached_vm); batch.cached_vm = NULL; } + if (batch.cached_col_names) { cloudsync_memory_free(batch.cached_col_names); batch.cached_col_names = NULL; } + if (batch.entries) { cloudsync_memory_free(batch.entries); batch.entries = NULL; } + // cleanup vm if (vm) databasevm_finalize(vm); - + // cleanup memory if (clone) cloudsync_memory_free(clone); - + // error already saved in (save last error) if (rc != DBRES_OK) return rc; - + // return the number of processed rows if (pnrows) *pnrows = nrows; return DBRES_OK; diff --git a/src/cloudsync.h b/src/cloudsync.h index 84dfe4a..d0718fa 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -28,12 +28,6 @@ extern "C" { #define CLOUDSYNC_CHANGES_NCOLS 9 -typedef enum { - CLOUDSYNC_PAYLOAD_APPLY_WILL_APPLY = 1, - CLOUDSYNC_PAYLOAD_APPLY_DID_APPLY = 2, - CLOUDSYNC_PAYLOAD_APPLY_CLEANUP = 3 -} CLOUDSYNC_PAYLOAD_APPLY_STEPS; - // CRDT Algos table_algo cloudsync_algo_from_name (const char *algo_name); const char *cloudsync_algo_name (table_algo algo); diff --git a/src/database.h b/src/database.h index f5324a3..acf98c6 100644 --- a/src/database.h +++ b/src/database.h @@ -142,6 +142,8 @@ char *sql_build_select_nonpk_by_pk (cloudsync_context *data, const char *table_n char *sql_build_delete_by_pk (cloudsync_context *data, const char *table_name, const char *schema); char *sql_build_insert_pk_ignore (cloudsync_context *data, const char *table_name, const char *schema); char *sql_build_upsert_pk_and_col (cloudsync_context *data, const char *table_name, const char *colname, const char *schema); +char *sql_build_upsert_pk_and_multi_cols (cloudsync_context *data, const char *table_name, const char **colnames, int ncolnames, const char *schema); +char *sql_build_update_pk_and_multi_cols (cloudsync_context *data, const char *table_name, const char **colnames, int ncolnames, const char *schema); char *sql_build_select_cols_by_pk (cloudsync_context *data, const char *table_name, const char *colname, const char *schema); char *sql_build_rekey_pk_and_reset_version_except_col (cloudsync_context *data, const char *table_name, const char *except_col); char *sql_build_delete_cols_not_in_schema_query(const char *schema, const char *table_name, const char *meta_ref, const char *pkcol); @@ -154,10 +156,7 @@ char *database_table_schema(const char *table_name); char *database_build_meta_ref(const char *schema, const char *table_name); char *database_build_base_ref(const char *schema, const char *table_name); -// USED ONLY by SQLite Cloud to implement RLS +// OPAQUE STRUCT used by pk_context functions typedef struct cloudsync_pk_decode_bind_context cloudsync_pk_decode_bind_context; -typedef bool (*cloudsync_payload_apply_callback_t)(void **xdata, cloudsync_pk_decode_bind_context *decoded_change, void *db, void *data, int step, int rc); -void cloudsync_set_payload_apply_callback(void *db, cloudsync_payload_apply_callback_t callback); -cloudsync_payload_apply_callback_t cloudsync_get_payload_apply_callback(void *db); #endif diff --git a/src/postgresql/database_postgresql.c b/src/postgresql/database_postgresql.c index f777166..70bc4e1 100644 --- a/src/postgresql/database_postgresql.c +++ b/src/postgresql/database_postgresql.c @@ -210,6 +210,129 @@ char *sql_build_upsert_pk_and_col (cloudsync_context *data, const char *table_na return (rc == DBRES_OK) ? query : NULL; } +char *sql_build_upsert_pk_and_multi_cols (cloudsync_context *data, const char *table_name, const char **colnames, int ncolnames, const char *schema) { + if (ncolnames <= 0 || !colnames) return NULL; + + char *qualified = database_build_base_ref(schema, table_name); + if (!qualified) return NULL; + + // Build VALUES list for column names: ('col_a',1),('col_b',2) + // Column names are SQL literals here, so escape single quotes + size_t values_cap = (size_t)ncolnames * 128 + 1; + char *col_values = cloudsync_memory_alloc(values_cap); + if (!col_values) { cloudsync_memory_free(qualified); return NULL; } + + size_t vpos = 0; + for (int i = 0; i < ncolnames; i++) { + char esc[1024]; + sql_escape_literal(colnames[i], esc, sizeof(esc)); + vpos += snprintf(col_values + vpos, values_cap - vpos, "%s('%s'::text,%d)", + i > 0 ? "," : "", esc, i + 1); + } + + // Build meta-query that generates the final INSERT...ON CONFLICT SQL with proper types + char *meta_sql = cloudsync_memory_mprintf( + "WITH tbl AS (" + " SELECT to_regclass('%s') AS oid" + "), " + "pk AS (" + " SELECT a.attname, k.ord, format_type(a.atttypid, a.atttypmod) AS coltype " + " FROM pg_index x " + " JOIN tbl t ON t.oid = x.indrelid " + " JOIN LATERAL unnest(x.indkey) WITH ORDINALITY AS k(attnum, ord) ON true " + " JOIN pg_attribute a ON a.attrelid = x.indrelid AND a.attnum = k.attnum " + " WHERE x.indisprimary " + " ORDER BY k.ord" + "), " + "pk_count AS (SELECT count(*) AS n FROM pk), " + "cols AS (" + " SELECT u.colname, format_type(a.atttypid, a.atttypmod) AS coltype, u.ord " + " FROM (VALUES %s) AS u(colname, ord) " + " JOIN pg_attribute a ON a.attrelid = (SELECT oid FROM tbl) AND a.attname = u.colname " + " WHERE a.attnum > 0 AND NOT a.attisdropped" + ") " + "SELECT " + " 'INSERT INTO ' || (SELECT (oid::regclass)::text FROM tbl)" + " || ' (' || (SELECT string_agg(format('%%I', attname), ',' ORDER BY ord) FROM pk)" + " || ',' || (SELECT string_agg(format('%%I', colname), ',' ORDER BY ord) FROM cols) || ')'" + " || ' VALUES (' || (SELECT string_agg(format('$%%s::%%s', ord, coltype), ',' ORDER BY ord) FROM pk)" + " || ',' || (SELECT string_agg(format('$%%s::%%s', (SELECT n FROM pk_count) + ord, coltype), ',' ORDER BY ord) FROM cols) || ')'" + " || ' ON CONFLICT (' || (SELECT string_agg(format('%%I', attname), ',' ORDER BY ord) FROM pk) || ')'" + " || ' DO UPDATE SET ' || (SELECT string_agg(format('%%I=EXCLUDED.%%I', colname, colname), ',' ORDER BY ord) FROM cols)" + " || ';';", + qualified, col_values + ); + + cloudsync_memory_free(qualified); + cloudsync_memory_free(col_values); + if (!meta_sql) return NULL; + + char *query = NULL; + int rc = database_select_text(data, meta_sql, &query); + cloudsync_memory_free(meta_sql); + + return (rc == DBRES_OK) ? query : NULL; +} + +char *sql_build_update_pk_and_multi_cols (cloudsync_context *data, const char *table_name, const char **colnames, int ncolnames, const char *schema) { + if (ncolnames <= 0 || !colnames) return NULL; + + char *qualified = database_build_base_ref(schema, table_name); + if (!qualified) return NULL; + + // Build VALUES list for column names: ('col_a',1),('col_b',2) + size_t values_cap = (size_t)ncolnames * 128 + 1; + char *col_values = cloudsync_memory_alloc(values_cap); + if (!col_values) { cloudsync_memory_free(qualified); return NULL; } + + size_t vpos = 0; + for (int i = 0; i < ncolnames; i++) { + char esc[1024]; + sql_escape_literal(colnames[i], esc, sizeof(esc)); + vpos += snprintf(col_values + vpos, values_cap - vpos, "%s('%s'::text,%d)", + i > 0 ? "," : "", esc, i + 1); + } + + // Build meta-query that generates UPDATE ... SET col=$ WHERE pk=$ + char *meta_sql = cloudsync_memory_mprintf( + "WITH tbl AS (" + " SELECT to_regclass('%s') AS oid" + "), " + "pk AS (" + " SELECT a.attname, k.ord, format_type(a.atttypid, a.atttypmod) AS coltype " + " FROM pg_index x " + " JOIN tbl t ON t.oid = x.indrelid " + " JOIN LATERAL unnest(x.indkey) WITH ORDINALITY AS k(attnum, ord) ON true " + " JOIN pg_attribute a ON a.attrelid = x.indrelid AND a.attnum = k.attnum " + " WHERE x.indisprimary " + " ORDER BY k.ord" + "), " + "pk_count AS (SELECT count(*) AS n FROM pk), " + "cols AS (" + " SELECT u.colname, format_type(a.atttypid, a.atttypmod) AS coltype, u.ord " + " FROM (VALUES %s) AS u(colname, ord) " + " JOIN pg_attribute a ON a.attrelid = (SELECT oid FROM tbl) AND a.attname = u.colname " + " WHERE a.attnum > 0 AND NOT a.attisdropped" + ") " + "SELECT " + " 'UPDATE ' || (SELECT (oid::regclass)::text FROM tbl)" + " || ' SET ' || (SELECT string_agg(format('%%I=$%%s::%%s', colname, (SELECT n FROM pk_count) + ord, coltype), ',' ORDER BY ord) FROM cols)" + " || ' WHERE ' || (SELECT string_agg(format('%%I=$%%s::%%s', attname, ord, coltype), ' AND ' ORDER BY ord) FROM pk)" + " || ';';", + qualified, col_values + ); + + cloudsync_memory_free(qualified); + cloudsync_memory_free(col_values); + if (!meta_sql) return NULL; + + char *query = NULL; + int rc = database_select_text(data, meta_sql, &query); + cloudsync_memory_free(meta_sql); + + return (rc == DBRES_OK) ? query : NULL; +} + char *sql_build_select_cols_by_pk (cloudsync_context *data, const char *table_name, const char *colname, const char *schema) { UNUSED_PARAMETER(data); char *qualified = database_build_base_ref(schema, table_name); @@ -2698,15 +2821,24 @@ void *database_value_dup (dbvalue_t *value) { if (!v) return NULL; pgvalue_t *copy = pgvalue_create(v->datum, v->typeid, v->typmod, v->collation, v->isnull); - if (v->detoasted && v->owned_detoast) { - Size len = VARSIZE_ANY(v->owned_detoast); + + // Deep-copy pass-by-reference (varlena) datum data into TopMemoryContext + // so the copy survives SPI_finish() which destroys the caller's SPI context. + bool is_varlena = (v->typeid == BYTEAOID) || pgvalue_is_text_type(v->typeid); + if (is_varlena && !v->isnull) { + void *src = v->owned_detoast ? v->owned_detoast : DatumGetPointer(v->datum); + Size len = VARSIZE_ANY(src); + MemoryContext old = MemoryContextSwitchTo(TopMemoryContext); copy->owned_detoast = palloc(len); - memcpy(copy->owned_detoast, v->owned_detoast, len); + MemoryContextSwitchTo(old); + memcpy(copy->owned_detoast, src, len); copy->datum = PointerGetDatum(copy->owned_detoast); copy->detoasted = true; } if (v->cstring) { + MemoryContext old = MemoryContextSwitchTo(TopMemoryContext); copy->cstring = pstrdup(v->cstring); + MemoryContextSwitchTo(old); copy->owns_cstring = true; } return (void*)copy; @@ -2744,7 +2876,7 @@ static int database_refresh_snapshot (void) { return DBRES_ERROR; } PG_END_TRY(); - + return DBRES_OK; } @@ -2902,14 +3034,4 @@ uint64_t dbmem_size (void *ptr) { return 0; } -// MARK: - CLOUDSYNC CALLBACK - -static cloudsync_payload_apply_callback_t payload_apply_callback = NULL; - -void cloudsync_set_payload_apply_callback(void *db, cloudsync_payload_apply_callback_t callback) { - payload_apply_callback = callback; -} - -cloudsync_payload_apply_callback_t cloudsync_get_payload_apply_callback(void *db) { - return payload_apply_callback; -} diff --git a/src/sqlite/database_sqlite.c b/src/sqlite/database_sqlite.c index 82433fe..b1de7ad 100644 --- a/src/sqlite/database_sqlite.c +++ b/src/sqlite/database_sqlite.c @@ -25,8 +25,6 @@ SQLITE_EXTENSION_INIT3 #endif -#define CLOUDSYNC_PAYLOAD_APPLY_CALLBACK_KEY "cloudsync_payload_apply_callback" - // MARK: - SQL - char *sql_build_drop_table (const char *table_name, char *buffer, int bsize, bool is_meta) { @@ -151,6 +149,126 @@ char *sql_build_upsert_pk_and_col (cloudsync_context *data, const char *table_na return (rc == DBRES_OK) ? query : NULL; } +char *sql_build_upsert_pk_and_multi_cols (cloudsync_context *data, const char *table_name, const char **colnames, int ncolnames, const char *schema) { + UNUSED_PARAMETER(schema); + if (ncolnames <= 0 || !colnames) return NULL; + + // Get PK column names via pragma_table_info (same approach as database_pk_names) + char **pk_names = NULL; + int npks = 0; + int rc = database_pk_names(data, table_name, &pk_names, &npks); + if (rc != DBRES_OK || npks <= 0 || !pk_names) return NULL; + + // Build column list: "pk1","pk2","col_a","col_b" + char *col_list = cloudsync_memory_mprintf("\"%w\"", pk_names[0]); + if (!col_list) goto fail; + for (int i = 1; i < npks; i++) { + char *prev = col_list; + col_list = cloudsync_memory_mprintf("%s,\"%w\"", prev, pk_names[i]); + cloudsync_memory_free(prev); + if (!col_list) goto fail; + } + for (int i = 0; i < ncolnames; i++) { + char *prev = col_list; + col_list = cloudsync_memory_mprintf("%s,\"%w\"", prev, colnames[i]); + cloudsync_memory_free(prev); + if (!col_list) goto fail; + } + + // Build bind list: ?,?,?,? + int total = npks + ncolnames; + char *binds = (char *)cloudsync_memory_alloc(total * 2); + if (!binds) { cloudsync_memory_free(col_list); goto fail; } + int pos = 0; + for (int i = 0; i < total; i++) { + if (i > 0) binds[pos++] = ','; + binds[pos++] = '?'; + } + binds[pos] = '\0'; + + // Build excluded set: "col_a"=EXCLUDED."col_a","col_b"=EXCLUDED."col_b" + char *excl = cloudsync_memory_mprintf("\"%w\"=EXCLUDED.\"%w\"", colnames[0], colnames[0]); + if (!excl) { cloudsync_memory_free(col_list); cloudsync_memory_free(binds); goto fail; } + for (int i = 1; i < ncolnames; i++) { + char *prev = excl; + excl = cloudsync_memory_mprintf("%s,\"%w\"=EXCLUDED.\"%w\"", prev, colnames[i], colnames[i]); + cloudsync_memory_free(prev); + if (!excl) { cloudsync_memory_free(col_list); cloudsync_memory_free(binds); goto fail; } + } + + // Assemble final SQL + char *sql = cloudsync_memory_mprintf( + "INSERT INTO \"%w\" (%s) VALUES (%s) ON CONFLICT DO UPDATE SET %s;", + table_name, col_list, binds, excl + ); + + cloudsync_memory_free(col_list); + cloudsync_memory_free(binds); + cloudsync_memory_free(excl); + for (int i = 0; i < npks; i++) cloudsync_memory_free(pk_names[i]); + cloudsync_memory_free(pk_names); + return sql; + +fail: + if (pk_names) { + for (int i = 0; i < npks; i++) cloudsync_memory_free(pk_names[i]); + cloudsync_memory_free(pk_names); + } + return NULL; +} + +char *sql_build_update_pk_and_multi_cols (cloudsync_context *data, const char *table_name, const char **colnames, int ncolnames, const char *schema) { + UNUSED_PARAMETER(schema); + if (ncolnames <= 0 || !colnames) return NULL; + + // Get PK column names + char **pk_names = NULL; + int npks = 0; + int rc = database_pk_names(data, table_name, &pk_names, &npks); + if (rc != DBRES_OK || npks <= 0 || !pk_names) return NULL; + + // Build SET clause: "col_a"=?npks+1,"col_b"=?npks+2 + // Uses numbered parameters to match merge_flush_pending bind order: + // positions 1..npks are PKs, npks+1..npks+ncolnames are column values. + char *set_clause = cloudsync_memory_mprintf("\"%w\"=?%d", colnames[0], npks + 1); + if (!set_clause) goto fail; + for (int i = 1; i < ncolnames; i++) { + char *prev = set_clause; + set_clause = cloudsync_memory_mprintf("%s,\"%w\"=?%d", prev, colnames[i], npks + 1 + i); + cloudsync_memory_free(prev); + if (!set_clause) goto fail; + } + + // Build WHERE clause: "pk1"=?1 AND "pk2"=?2 + char *where_clause = cloudsync_memory_mprintf("\"%w\"=?%d", pk_names[0], 1); + if (!where_clause) { cloudsync_memory_free(set_clause); goto fail; } + for (int i = 1; i < npks; i++) { + char *prev = where_clause; + where_clause = cloudsync_memory_mprintf("%s AND \"%w\"=?%d", prev, pk_names[i], 1 + i); + cloudsync_memory_free(prev); + if (!where_clause) { cloudsync_memory_free(set_clause); goto fail; } + } + + // Assemble: UPDATE "table" SET ... WHERE ... + char *sql = cloudsync_memory_mprintf( + "UPDATE \"%w\" SET %s WHERE %s;", + table_name, set_clause, where_clause + ); + + cloudsync_memory_free(set_clause); + cloudsync_memory_free(where_clause); + for (int i = 0; i < npks; i++) cloudsync_memory_free(pk_names[i]); + cloudsync_memory_free(pk_names); + return sql; + +fail: + if (pk_names) { + for (int i = 0; i < npks; i++) cloudsync_memory_free(pk_names[i]); + cloudsync_memory_free(pk_names); + } + return NULL; +} + char *sql_build_select_cols_by_pk (cloudsync_context *data, const char *table_name, const char *colname, const char *schema) { UNUSED_PARAMETER(schema); char *colnamequote = "\""; @@ -1263,14 +1381,4 @@ uint64_t dbmem_size (void *ptr) { return (uint64_t)sqlite3_msize(ptr); } -// MARK: - Used to implement Server Side RLS - -cloudsync_payload_apply_callback_t cloudsync_get_payload_apply_callback(void *db) { - return (sqlite3_libversion_number() >= 3044000) ? sqlite3_get_clientdata((sqlite3 *)db, CLOUDSYNC_PAYLOAD_APPLY_CALLBACK_KEY) : NULL; -} - -void cloudsync_set_payload_apply_callback(void *db, cloudsync_payload_apply_callback_t callback) { - if (sqlite3_libversion_number() >= 3044000) { - sqlite3_set_clientdata((sqlite3 *)db, CLOUDSYNC_PAYLOAD_APPLY_CALLBACK_KEY, (void*)callback, NULL); - } -} diff --git a/test/postgresql/27_rls_batch_merge.sql b/test/postgresql/27_rls_batch_merge.sql new file mode 100644 index 0000000..2ab51bf --- /dev/null +++ b/test/postgresql/27_rls_batch_merge.sql @@ -0,0 +1,356 @@ +-- 'RLS batch merge test' +-- Verifies that the deferred column-batch merge produces complete rows +-- that work correctly with PostgreSQL Row Level Security policies. +-- +-- Tests 1-3: cloudsync_payload_apply runs as superuser (service-role pattern). +-- RLS is enforced at the query layer when users access data. +-- +-- Tests 4-6: cloudsync_payload_apply runs as non-superuser (authenticated-role +-- pattern). RLS is enforced during the write itself. + +\set testid '27' +\ir helper_test_init.sql + +\set USER1 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' +\set USER2 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb' + +-- ============================================================ +-- DB A: source database (no RLS) +-- ============================================================ +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_27_a; +CREATE DATABASE cloudsync_test_27_a; + +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +CREATE TABLE documents ( + id TEXT PRIMARY KEY NOT NULL, + user_id UUID, + title TEXT, + content TEXT +); +SELECT cloudsync_init('documents') AS _init_site_id_a \gset + +-- ============================================================ +-- DB B: target database (with RLS) +-- ============================================================ +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_27_b; +CREATE DATABASE cloudsync_test_27_b; + +-- Create non-superuser role (ignore error if it already exists) +DO $$ BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'test_rls_user') THEN + CREATE ROLE test_rls_user LOGIN; + END IF; +END $$; + +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +CREATE TABLE documents ( + id TEXT PRIMARY KEY NOT NULL, + user_id UUID, + title TEXT, + content TEXT +); +SELECT cloudsync_init('documents') AS _init_site_id_b \gset + +-- Auth mock: auth.uid() reads from session variable app.current_user_id +CREATE SCHEMA IF NOT EXISTS auth; +CREATE OR REPLACE FUNCTION auth.uid() RETURNS UUID + LANGUAGE sql STABLE +AS $$ SELECT NULLIF(current_setting('app.current_user_id', true), '')::UUID; $$; + +-- Enable RLS +ALTER TABLE documents ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "select_own" ON documents FOR SELECT + USING (auth.uid() = user_id); +CREATE POLICY "insert_own" ON documents FOR INSERT + WITH CHECK (auth.uid() = user_id); +CREATE POLICY "update_own" ON documents FOR UPDATE + USING (auth.uid() = user_id) + WITH CHECK (auth.uid() = user_id); +CREATE POLICY "delete_own" ON documents FOR DELETE + USING (auth.uid() = user_id); + +-- Grant permissions to test_rls_user +GRANT USAGE ON SCHEMA public TO test_rls_user; +GRANT ALL ON ALL TABLES IN SCHEMA public TO test_rls_user; +GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO test_rls_user; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO test_rls_user; +GRANT USAGE ON SCHEMA auth TO test_rls_user; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA auth TO test_rls_user; + +-- ============================================================ +-- Test 1: Batch merge produces complete row — user1 doc synced +-- ============================================================ +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +INSERT INTO documents VALUES ('doc1', :'USER1'::UUID, 'Title 1', 'Content 1'); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_1 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +-- Save high-water mark so subsequent encodes only pick up new changes +SELECT COALESCE(max(db_version), 0) AS max_dbv_1 FROM cloudsync_changes \gset + +-- Apply as superuser (service-role pattern) +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_payload_apply(decode(:'payload_hex_1', 'hex')) AS apply_1 \gset + +-- 1 row × 3 non-PK columns = 3 column-change entries +SELECT (:apply_1::int = 3) AS apply_1_ok \gset +\if :apply_1_ok +\echo [PASS] (:testid) RLS: apply returned :apply_1 +\else +\echo [FAIL] (:testid) RLS: apply returned :apply_1 (expected 3) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify complete row written (all columns present) +SELECT COUNT(*) AS doc1_count FROM documents WHERE id = 'doc1' AND title = 'Title 1' AND content = 'Content 1' AND user_id = :'USER1'::UUID \gset +SELECT (:doc1_count::int = 1) AS test1_ok \gset +\if :test1_ok +\echo [PASS] (:testid) RLS: batch merge writes complete row +\else +\echo [FAIL] (:testid) RLS: batch merge writes complete row — got :doc1_count matching rows +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 2: Sync user2 doc, then verify RLS hides it from user1 +-- ============================================================ +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +INSERT INTO documents VALUES ('doc2', :'USER2'::UUID, 'Title 2', 'Content 2'); + +-- Encode only changes newer than test 1 (doc2 only) +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_2 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_1 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_2 FROM cloudsync_changes \gset + +-- Apply as superuser +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_payload_apply(decode(:'payload_hex_2', 'hex')) AS apply_2 \gset + +-- 1 row × 3 non-PK columns = 3 entries +SELECT (:apply_2::int = 3) AS apply_2_ok \gset +\if :apply_2_ok +\echo [PASS] (:testid) RLS: apply returned :apply_2 +\else +\echo [FAIL] (:testid) RLS: apply returned :apply_2 (expected 3) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify doc2 exists (superuser sees all) +SELECT COUNT(*) AS doc2_exists FROM documents WHERE id = 'doc2' \gset + +-- Now check as user1: RLS should hide doc2 (owned by user2) +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT COUNT(*) AS doc2_visible FROM documents WHERE id = 'doc2' \gset +RESET ROLE; + +SELECT (:doc2_exists::int = 1 AND :doc2_visible::int = 0) AS test2_ok \gset +\if :test2_ok +\echo [PASS] (:testid) RLS: user2 doc synced but hidden from user1 +\else +\echo [FAIL] (:testid) RLS: user2 doc synced but hidden from user1 — exists=:doc2_exists visible=:doc2_visible +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 3: Update doc1, verify user1 sees update via RLS +-- ============================================================ +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +UPDATE documents SET title = 'Title 1 Updated' WHERE id = 'doc1'; + +-- Encode only changes newer than test 2 (doc1 update only) +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_3 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_2 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_3 FROM cloudsync_changes \gset + +-- Apply as superuser +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_payload_apply(decode(:'payload_hex_3', 'hex')) AS apply_3 \gset + +-- 1 row × 1 changed column (title) = 1 entry +SELECT (:apply_3::int = 1) AS apply_3_ok \gset +\if :apply_3_ok +\echo [PASS] (:testid) RLS: apply returned :apply_3 +\else +\echo [FAIL] (:testid) RLS: apply returned :apply_3 (expected 1) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify update applied (superuser check) +SELECT COUNT(*) AS doc1_updated FROM documents WHERE id = 'doc1' AND title = 'Title 1 Updated' \gset + +-- Verify user1 can see the updated row via RLS +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT COUNT(*) AS doc1_visible FROM documents WHERE id = 'doc1' AND title = 'Title 1 Updated' \gset +RESET ROLE; + +SELECT (:doc1_updated::int = 1 AND :doc1_visible::int = 1) AS test3_ok \gset +\if :test3_ok +\echo [PASS] (:testid) RLS: update synced and visible to owner +\else +\echo [FAIL] (:testid) RLS: update synced and visible to owner — updated=:doc1_updated visible=:doc1_visible +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 4: Authenticated insert allowed (own row) +-- cloudsync_payload_apply as non-superuser with matching user_id +-- ============================================================ +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +INSERT INTO documents VALUES ('doc3', :'USER1'::UUID, 'Title 3', 'Content 3'); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_4 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_3 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_4 FROM cloudsync_changes \gset + +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_4', 'hex')) AS apply_4 \gset +RESET ROLE; + +-- 1 row × 3 non-PK columns = 3 entries +SELECT (:apply_4::int = 3) AS apply_4_ok \gset +\if :apply_4_ok +\echo [PASS] (:testid) RLS auth: apply returned :apply_4 +\else +\echo [FAIL] (:testid) RLS auth: apply returned :apply_4 (expected 3) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify doc3 exists with all columns correct +SELECT COUNT(*) AS doc3_count FROM documents WHERE id = 'doc3' AND title = 'Title 3' AND content = 'Content 3' AND user_id = :'USER1'::UUID \gset +SELECT (:doc3_count::int = 1) AS test4_ok \gset +\if :test4_ok +\echo [PASS] (:testid) RLS auth: insert own row allowed +\else +\echo [FAIL] (:testid) RLS auth: insert own row allowed — got :doc3_count matching rows +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 5: Authenticated insert denied (other user's row) +-- cloudsync_payload_apply as non-superuser with mismatched user_id +-- ============================================================ +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +INSERT INTO documents VALUES ('doc4', :'USER2'::UUID, 'Title 4', 'Content 4'); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_5 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_4 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_5 FROM cloudsync_changes \gset + +-- Apply as test_rls_user with USER1 identity — should be denied (doc4 owned by USER2) +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_5', 'hex')) AS apply_5 \gset + +-- Reconnect for clean state after expected RLS denial +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql + +-- 1 row × 3 non-PK columns = 3 entries (returned even if denied) +SELECT (:apply_5::int = 3) AS apply_5_ok \gset +\if :apply_5_ok +\echo [PASS] (:testid) RLS auth: denied apply returned :apply_5 +\else +\echo [FAIL] (:testid) RLS auth: denied apply returned :apply_5 (expected 3) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify doc4 does NOT exist (superuser check) +SELECT COUNT(*) AS doc4_count FROM documents WHERE id = 'doc4' \gset +SELECT (:doc4_count::int = 0) AS test5_ok \gset +\if :test5_ok +\echo [PASS] (:testid) RLS auth: insert other user row denied +\else +\echo [FAIL] (:testid) RLS auth: insert other user row denied — got :doc4_count rows (expected 0) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 6: Authenticated update allowed (own row) +-- cloudsync_payload_apply as non-superuser updating own row +-- ============================================================ +\connect cloudsync_test_27_a +\ir helper_psql_conn_setup.sql +UPDATE documents SET title = 'Title 3 Updated' WHERE id = 'doc3'; + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_6 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_5 \gset + +\connect cloudsync_test_27_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_6', 'hex')) AS apply_6 \gset +RESET ROLE; + +-- 1 row × 1 changed column (title) = 1 entry +SELECT (:apply_6::int = 1) AS apply_6_ok \gset +\if :apply_6_ok +\echo [PASS] (:testid) RLS auth: apply returned :apply_6 +\else +\echo [FAIL] (:testid) RLS auth: apply returned :apply_6 (expected 1) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify doc3 title was updated +SELECT COUNT(*) AS doc3_updated FROM documents WHERE id = 'doc3' AND title = 'Title 3 Updated' \gset +SELECT (:doc3_updated::int = 1) AS test6_ok \gset +\if :test6_ok +\echo [PASS] (:testid) RLS auth: update own row allowed +\else +\echo [FAIL] (:testid) RLS auth: update own row allowed — got :doc3_updated matching rows +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Cleanup +-- ============================================================ +\ir helper_test_cleanup.sql +\if :should_cleanup +DROP DATABASE IF EXISTS cloudsync_test_27_a; +DROP DATABASE IF EXISTS cloudsync_test_27_b; +DROP ROLE IF EXISTS test_rls_user; +\else +\echo [INFO] !!!!! +\endif diff --git a/test/postgresql/28_db_version_tracking.sql b/test/postgresql/28_db_version_tracking.sql new file mode 100644 index 0000000..25255ee --- /dev/null +++ b/test/postgresql/28_db_version_tracking.sql @@ -0,0 +1,275 @@ +-- Test db_version/seq tracking in cloudsync_changes after payload apply +-- PostgreSQL equivalent of SQLite unit tests: +-- "Merge Test db_version 1" (do_test_merge_check_db_version) +-- "Merge Test db_version 2" (do_test_merge_check_db_version_2) + +\set testid '28' +\ir helper_test_init.sql + +-- ============================================================ +-- Setup: create databases A and B with the todo table +-- ============================================================ +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_28_a; +DROP DATABASE IF EXISTS cloudsync_test_28_b; +CREATE DATABASE cloudsync_test_28_a; +CREATE DATABASE cloudsync_test_28_b; + +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE todo (id TEXT PRIMARY KEY NOT NULL, title TEXT, status TEXT); +SELECT cloudsync_init('todo', 'CLS', true) AS _init_a \gset + +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE todo (id TEXT PRIMARY KEY NOT NULL, title TEXT, status TEXT); +SELECT cloudsync_init('todo', 'CLS', true) AS _init_b \gset + +-- ============================================================ +-- Test 1: One-way merge (A -> B), mixed insert patterns +-- Mirrors do_test_merge_check_db_version from test/unit.c +-- ============================================================ + +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql + +-- Autocommit insert (db_version 1) +INSERT INTO todo VALUES ('ID1', 'Buy groceries', 'in_progress1'); + +-- Multi-row insert (db_version 2 — single statement) +INSERT INTO todo VALUES ('ID2', 'Buy bananas', 'in_progress2'), ('ID3', 'Buy vegetables', 'in_progress3'); + +-- Autocommit insert (db_version 3) +INSERT INTO todo VALUES ('ID4', 'Buy apples', 'in_progress4'); + +-- Transaction with 3 inserts (db_version 4 — one transaction) +BEGIN; +INSERT INTO todo VALUES ('ID5', 'Buy oranges', 'in_progress5'); +INSERT INTO todo VALUES ('ID6', 'Buy lemons', 'in_progress6'); +INSERT INTO todo VALUES ('ID7', 'Buy pizza', 'in_progress7'); +COMMIT; + +-- Encode payload +SELECT CASE WHEN payload IS NULL OR octet_length(payload) = 0 + THEN '' + ELSE '\x' || encode(payload, 'hex') + END AS payload_a_t1, + (payload IS NOT NULL AND octet_length(payload) > 0) AS payload_a_t1_ok +FROM ( + SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) AS payload + FROM cloudsync_changes + WHERE site_id = cloudsync_siteid() +) AS p \gset + +-- Apply to B +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +\if :payload_a_t1_ok +SELECT cloudsync_payload_apply(decode(substr(:'payload_a_t1', 3), 'hex')) AS _apply_t1 \gset +\endif + +-- Verify data matches +SELECT md5(COALESCE(string_agg(id || ':' || COALESCE(title, '') || ':' || COALESCE(status, ''), ',' ORDER BY id), '')) AS hash_b_t1 +FROM todo \gset + +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +SELECT md5(COALESCE(string_agg(id || ':' || COALESCE(title, '') || ':' || COALESCE(status, ''), ',' ORDER BY id), '')) AS hash_a_t1 +FROM todo \gset + +SELECT (:'hash_a_t1' = :'hash_b_t1') AS t1_data_ok \gset +\if :t1_data_ok +\echo [PASS] (:testid) db_version test 1: data roundtrip matches +\else +\echo [FAIL] (:testid) db_version test 1: data roundtrip mismatch +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify no repeated (db_version, seq) tuples on B +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +SELECT COUNT(*) AS dup_count_b_t1 +FROM ( + SELECT db_version, seq, COUNT(*) AS cnt + FROM cloudsync_changes + GROUP BY db_version, seq + HAVING COUNT(*) > 1 +) AS dups \gset + +SELECT (:dup_count_b_t1::int = 0) AS t1_no_dups_b \gset +\if :t1_no_dups_b +\echo [PASS] (:testid) db_version test 1: no duplicate (db_version, seq) on B +\else +\echo [FAIL] (:testid) db_version test 1: duplicate (db_version, seq) on B +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify row count +SELECT COUNT(*) AS row_count_b_t1 FROM todo \gset +SELECT (:row_count_b_t1::int = 7) AS t1_count_ok \gset +\if :t1_count_ok +\echo [PASS] (:testid) db_version test 1: row count correct (7) +\else +\echo [FAIL] (:testid) db_version test 1: expected 7 rows, got :row_count_b_t1 +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 2: Bidirectional merge (A -> B, B -> A), mixed patterns +-- Mirrors do_test_merge_check_db_version_2 from test/unit.c +-- ============================================================ + +-- Reset: drop and recreate databases +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_28_a; +DROP DATABASE IF EXISTS cloudsync_test_28_b; +CREATE DATABASE cloudsync_test_28_a; +CREATE DATABASE cloudsync_test_28_b; + +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE todo (id TEXT PRIMARY KEY NOT NULL, title TEXT, status TEXT); +SELECT cloudsync_init('todo', 'CLS', true) AS _init_a2 \gset + +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; +CREATE TABLE todo (id TEXT PRIMARY KEY NOT NULL, title TEXT, status TEXT); +SELECT cloudsync_init('todo', 'CLS', true) AS _init_b2 \gset + +-- DB A: two autocommit inserts (db_version 1, 2) +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +INSERT INTO todo VALUES ('ID1', 'Buy groceries', 'in_progress'); +INSERT INTO todo VALUES ('ID2', 'Foo', 'Bar'); + +-- DB B: two autocommit inserts + one transaction with 2 inserts +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +INSERT INTO todo VALUES ('ID3', 'Foo3', 'Bar3'); +INSERT INTO todo VALUES ('ID4', 'Foo4', 'Bar4'); +BEGIN; +INSERT INTO todo VALUES ('ID5', 'Foo5', 'Bar5'); +INSERT INTO todo VALUES ('ID6', 'Foo6', 'Bar6'); +COMMIT; + +-- Encode A's payload +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +SELECT CASE WHEN payload IS NULL OR octet_length(payload) = 0 + THEN '' + ELSE '\x' || encode(payload, 'hex') + END AS payload_a_t2, + (payload IS NOT NULL AND octet_length(payload) > 0) AS payload_a_t2_ok +FROM ( + SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) AS payload + FROM cloudsync_changes + WHERE site_id = cloudsync_siteid() +) AS p \gset + +-- Encode B's payload +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +SELECT CASE WHEN payload IS NULL OR octet_length(payload) = 0 + THEN '' + ELSE '\x' || encode(payload, 'hex') + END AS payload_b_t2, + (payload IS NOT NULL AND octet_length(payload) > 0) AS payload_b_t2_ok +FROM ( + SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) AS payload + FROM cloudsync_changes + WHERE site_id = cloudsync_siteid() +) AS p \gset + +-- Apply A -> B +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +\if :payload_a_t2_ok +SELECT cloudsync_payload_apply(decode(substr(:'payload_a_t2', 3), 'hex')) AS _apply_a_to_b \gset +\endif + +-- Apply B -> A +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +\if :payload_b_t2_ok +SELECT cloudsync_payload_apply(decode(substr(:'payload_b_t2', 3), 'hex')) AS _apply_b_to_a \gset +\endif + +-- Verify data matches between A and B +SELECT md5(COALESCE(string_agg(id || ':' || COALESCE(title, '') || ':' || COALESCE(status, ''), ',' ORDER BY id), '')) AS hash_a_t2 +FROM todo \gset + +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +SELECT md5(COALESCE(string_agg(id || ':' || COALESCE(title, '') || ':' || COALESCE(status, ''), ',' ORDER BY id), '')) AS hash_b_t2 +FROM todo \gset + +SELECT (:'hash_a_t2' = :'hash_b_t2') AS t2_data_ok \gset +\if :t2_data_ok +\echo [PASS] (:testid) db_version test 2: bidirectional data matches +\else +\echo [FAIL] (:testid) db_version test 2: bidirectional data mismatch +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify row count (6 rows: ID1-ID6) +SELECT COUNT(*) AS row_count_t2 FROM todo \gset +SELECT (:row_count_t2::int = 6) AS t2_count_ok \gset +\if :t2_count_ok +\echo [PASS] (:testid) db_version test 2: row count correct (6) +\else +\echo [FAIL] (:testid) db_version test 2: expected 6 rows, got :row_count_t2 +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify no repeated (db_version, seq) tuples on A +\connect cloudsync_test_28_a +\ir helper_psql_conn_setup.sql +SELECT COUNT(*) AS dup_count_a_t2 +FROM ( + SELECT db_version, seq, COUNT(*) AS cnt + FROM cloudsync_changes + GROUP BY db_version, seq + HAVING COUNT(*) > 1 +) AS dups \gset + +SELECT (:dup_count_a_t2::int = 0) AS t2_no_dups_a \gset +\if :t2_no_dups_a +\echo [PASS] (:testid) db_version test 2: no duplicate (db_version, seq) on A +\else +\echo [FAIL] (:testid) db_version test 2: duplicate (db_version, seq) on A +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify no repeated (db_version, seq) tuples on B +\connect cloudsync_test_28_b +\ir helper_psql_conn_setup.sql +SELECT COUNT(*) AS dup_count_b_t2 +FROM ( + SELECT db_version, seq, COUNT(*) AS cnt + FROM cloudsync_changes + GROUP BY db_version, seq + HAVING COUNT(*) > 1 +) AS dups \gset + +SELECT (:dup_count_b_t2::int = 0) AS t2_no_dups_b \gset +\if :t2_no_dups_b +\echo [PASS] (:testid) db_version test 2: no duplicate (db_version, seq) on B +\else +\echo [FAIL] (:testid) db_version test 2: duplicate (db_version, seq) on B +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Cleanup +-- ============================================================ +\ir helper_test_cleanup.sql +\if :should_cleanup +-- DROP DATABASE IF EXISTS cloudsync_test_28_a; +-- DROP DATABASE IF EXISTS cloudsync_test_28_b; +\endif diff --git a/test/postgresql/29_rls_multicol.sql b/test/postgresql/29_rls_multicol.sql new file mode 100644 index 0000000..de8f304 --- /dev/null +++ b/test/postgresql/29_rls_multicol.sql @@ -0,0 +1,435 @@ +-- 'RLS multi-column batch merge test' +-- Extends test 27 with more column types (INTEGER, BOOLEAN) and additional +-- test cases: update-denied, mixed payloads (per-PK savepoint isolation), +-- and NULL handling. +-- +-- Tests 1-2: superuser (service-role pattern) +-- Tests 3-8: authenticated-role pattern + +\set testid '29' +\ir helper_test_init.sql + +\set USER1 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' +\set USER2 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb' + +-- ============================================================ +-- DB A: source database (no RLS) +-- ============================================================ +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_29_a; +CREATE DATABASE cloudsync_test_29_a; + +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +CREATE TABLE tasks ( + id TEXT PRIMARY KEY NOT NULL, + user_id UUID, + title TEXT, + description TEXT, + priority INTEGER, + is_complete BOOLEAN +); +SELECT cloudsync_init('tasks') AS _init_site_id_a \gset + +-- ============================================================ +-- DB B: target database (with RLS) +-- ============================================================ +\connect postgres +\ir helper_psql_conn_setup.sql +DROP DATABASE IF EXISTS cloudsync_test_29_b; +CREATE DATABASE cloudsync_test_29_b; + +-- Create non-superuser role (ignore error if it already exists) +DO $$ BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'test_rls_user') THEN + CREATE ROLE test_rls_user LOGIN; + END IF; +END $$; + +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +CREATE TABLE tasks ( + id TEXT PRIMARY KEY NOT NULL, + user_id UUID, + title TEXT, + description TEXT, + priority INTEGER, + is_complete BOOLEAN +); +SELECT cloudsync_init('tasks') AS _init_site_id_b \gset + +-- Auth mock: auth.uid() reads from session variable app.current_user_id +CREATE SCHEMA IF NOT EXISTS auth; +CREATE OR REPLACE FUNCTION auth.uid() RETURNS UUID + LANGUAGE sql STABLE +AS $$ SELECT NULLIF(current_setting('app.current_user_id', true), '')::UUID; $$; + +-- Enable RLS +ALTER TABLE tasks ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "select_own" ON tasks FOR SELECT + USING (auth.uid() = user_id); +CREATE POLICY "insert_own" ON tasks FOR INSERT + WITH CHECK (auth.uid() = user_id); +CREATE POLICY "update_own" ON tasks FOR UPDATE + USING (auth.uid() = user_id) + WITH CHECK (auth.uid() = user_id); +CREATE POLICY "delete_own" ON tasks FOR DELETE + USING (auth.uid() = user_id); + +-- Grant permissions to test_rls_user +GRANT USAGE ON SCHEMA public TO test_rls_user; +GRANT ALL ON ALL TABLES IN SCHEMA public TO test_rls_user; +GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO test_rls_user; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO test_rls_user; +GRANT USAGE ON SCHEMA auth TO test_rls_user; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA auth TO test_rls_user; + +-- ============================================================ +-- Test 1: Superuser multi-row insert with varied types +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +INSERT INTO tasks VALUES ('t1', :'USER1'::UUID, 'Task 1', 'Desc 1', 3, false); +INSERT INTO tasks VALUES ('t2', :'USER1'::UUID, 'Task 2', 'Desc 2', 1, true); +INSERT INTO tasks VALUES ('t3', :'USER2'::UUID, 'Task 3', 'Desc 3', 5, false); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_1 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_1 FROM cloudsync_changes \gset + +-- Apply as superuser +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_payload_apply(decode(:'payload_hex_1', 'hex')) AS apply_1 \gset + +-- 3 rows × 5 non-PK columns = 15 column-change entries +SELECT (:apply_1::int = 15) AS apply_1_ok \gset +\if :apply_1_ok +\echo [PASS] (:testid) RLS multicol: superuser multi-row apply returned :apply_1 +\else +\echo [FAIL] (:testid) RLS multicol: superuser multi-row apply returned :apply_1 (expected 15) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify all 3 rows with correct column values +SELECT COUNT(*) AS t1_ok FROM tasks WHERE id = 't1' AND user_id = :'USER1'::UUID AND title = 'Task 1' AND description = 'Desc 1' AND priority = 3 AND is_complete = false \gset +SELECT COUNT(*) AS t2_ok FROM tasks WHERE id = 't2' AND user_id = :'USER1'::UUID AND title = 'Task 2' AND description = 'Desc 2' AND priority = 1 AND is_complete = true \gset +SELECT COUNT(*) AS t3_ok FROM tasks WHERE id = 't3' AND user_id = :'USER2'::UUID AND title = 'Task 3' AND description = 'Desc 3' AND priority = 5 AND is_complete = false \gset +SELECT (:t1_ok::int = 1 AND :t2_ok::int = 1 AND :t3_ok::int = 1) AS test1_ok \gset +\if :test1_ok +\echo [PASS] (:testid) RLS multicol: superuser multi-row insert with varied types +\else +\echo [FAIL] (:testid) RLS multicol: superuser multi-row insert with varied types — t1=:t1_ok t2=:t2_ok t3=:t3_ok +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 2: Superuser multi-column partial update +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +UPDATE tasks SET title = 'Task 1 Updated', priority = 10, is_complete = true WHERE id = 't1'; + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_2 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_1 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_2 FROM cloudsync_changes \gset + +-- Apply as superuser +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_payload_apply(decode(:'payload_hex_2', 'hex')) AS apply_2 \gset + +-- 1 row × 3 changed columns (title, priority, is_complete) = 3 entries +SELECT (:apply_2::int = 3) AS apply_2_ok \gset +\if :apply_2_ok +\echo [PASS] (:testid) RLS multicol: superuser partial update apply returned :apply_2 +\else +\echo [FAIL] (:testid) RLS multicol: superuser partial update apply returned :apply_2 (expected 3) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify updated columns changed and description preserved +SELECT COUNT(*) AS t1_updated FROM tasks WHERE id = 't1' AND title = 'Task 1 Updated' AND description = 'Desc 1' AND priority = 10 AND is_complete = true \gset +SELECT (:t1_updated::int = 1) AS test2_ok \gset +\if :test2_ok +\echo [PASS] (:testid) RLS multicol: superuser partial update preserves unchanged columns +\else +\echo [FAIL] (:testid) RLS multicol: superuser partial update preserves unchanged columns — got :t1_updated +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 3: Authenticated insert own row (all columns) +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +INSERT INTO tasks VALUES ('t4', :'USER1'::UUID, 'Task 4', 'Desc 4', 2, false); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_3 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_2 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_3 FROM cloudsync_changes \gset + +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_3', 'hex')) AS apply_3 \gset +RESET ROLE; + +-- 1 row × 5 non-PK columns = 5 entries +SELECT (:apply_3::int = 5) AS apply_3_ok \gset +\if :apply_3_ok +\echo [PASS] (:testid) RLS multicol auth: insert own row apply returned :apply_3 +\else +\echo [FAIL] (:testid) RLS multicol auth: insert own row apply returned :apply_3 (expected 5) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify row exists with all columns correct +SELECT COUNT(*) AS t4_count FROM tasks WHERE id = 't4' AND user_id = :'USER1'::UUID AND title = 'Task 4' AND description = 'Desc 4' AND priority = 2 AND is_complete = false \gset +SELECT (:t4_count::int = 1) AS test3_ok \gset +\if :test3_ok +\echo [PASS] (:testid) RLS multicol auth: insert own row allowed +\else +\echo [FAIL] (:testid) RLS multicol auth: insert own row allowed — got :t4_count +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 4: Authenticated insert denied (other user's row) +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +INSERT INTO tasks VALUES ('t5', :'USER2'::UUID, 'Task 5', 'Desc 5', 7, true); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_4 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_3 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_4 FROM cloudsync_changes \gset + +-- Apply as test_rls_user with USER1 identity — should be denied (t5 owned by USER2) +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_4', 'hex')) AS apply_4 \gset + +-- Reconnect for clean state after expected RLS denial +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql + +-- 1 row × 5 columns = 5 entries in payload (returned even if denied) +SELECT (:apply_4::int = 5) AS apply_4_ok \gset +\if :apply_4_ok +\echo [PASS] (:testid) RLS multicol auth: denied insert apply returned :apply_4 +\else +\echo [FAIL] (:testid) RLS multicol auth: denied insert apply returned :apply_4 (expected 5) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify t5 does NOT exist (superuser check) +SELECT COUNT(*) AS t5_count FROM tasks WHERE id = 't5' \gset +SELECT (:t5_count::int = 0) AS test4_ok \gset +\if :test4_ok +\echo [PASS] (:testid) RLS multicol auth: insert other user row denied +\else +\echo [FAIL] (:testid) RLS multicol auth: insert other user row denied — got :t5_count rows (expected 0) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 5: Authenticated update own row (multiple columns) +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +UPDATE tasks SET title = 'Task 4 Updated', priority = 9 WHERE id = 't4'; + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_5 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_4 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_5 FROM cloudsync_changes \gset + +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_5', 'hex')) AS apply_5 \gset +RESET ROLE; + +-- 1 row × 2 changed columns (title, priority) = 2 entries +SELECT (:apply_5::int = 2) AS apply_5_ok \gset +\if :apply_5_ok +\echo [PASS] (:testid) RLS multicol auth: update own row apply returned :apply_5 +\else +\echo [FAIL] (:testid) RLS multicol auth: update own row apply returned :apply_5 (expected 2) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify both columns changed, others preserved +SELECT COUNT(*) AS t4_updated FROM tasks WHERE id = 't4' AND title = 'Task 4 Updated' AND description = 'Desc 4' AND priority = 9 AND is_complete = false \gset +SELECT (:t4_updated::int = 1) AS test5_ok \gset +\if :test5_ok +\echo [PASS] (:testid) RLS multicol auth: update own row allowed +\else +\echo [FAIL] (:testid) RLS multicol auth: update own row allowed — got :t4_updated +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 6: Authenticated update denied (other user's row) +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +-- t3 is owned by USER2, update it on A +UPDATE tasks SET title = 'Task 3 Hacked', priority = 99 WHERE id = 't3'; + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_6 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_5 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_6 FROM cloudsync_changes \gset + +-- Apply as test_rls_user with USER1 identity — should be denied (t3 owned by USER2) +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_6', 'hex')) AS apply_6 \gset + +-- Reconnect for clean state after expected RLS denial +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql + +-- 1 row × 2 changed columns (title, priority) = 2 entries in payload +SELECT (:apply_6::int = 2) AS apply_6_ok \gset +\if :apply_6_ok +\echo [PASS] (:testid) RLS multicol auth: denied update apply returned :apply_6 +\else +\echo [FAIL] (:testid) RLS multicol auth: denied update apply returned :apply_6 (expected 2) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify t3 still has original values (superuser check) +SELECT COUNT(*) AS t3_unchanged FROM tasks WHERE id = 't3' AND title = 'Task 3' AND priority = 5 \gset +SELECT (:t3_unchanged::int = 1) AS test6_ok \gset +\if :test6_ok +\echo [PASS] (:testid) RLS multicol auth: update other user row denied +\else +\echo [FAIL] (:testid) RLS multicol auth: update other user row denied — got :t3_unchanged (expected 1 unchanged) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 7: Mixed payload — own + other user's rows (per-PK savepoint) +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +INSERT INTO tasks VALUES ('t6', :'USER1'::UUID, 'Task 6', 'Desc 6', 4, false); +INSERT INTO tasks VALUES ('t7', :'USER2'::UUID, 'Task 7', 'Desc 7', 8, true); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_7 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_6 \gset + +SELECT COALESCE(max(db_version), 0) AS max_dbv_7 FROM cloudsync_changes \gset + +-- Apply as test_rls_user with USER1 identity +-- Per-PK savepoint: t6 (USER1) should succeed, t7 (USER2) should be denied +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_7', 'hex')) AS apply_7 \gset + +-- Reconnect for clean verification as superuser +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql + +-- 2 rows × 5 columns = 10 entries in payload +SELECT (:apply_7::int = 10) AS apply_7_ok \gset +\if :apply_7_ok +\echo [PASS] (:testid) RLS multicol auth: mixed payload apply returned :apply_7 +\else +\echo [FAIL] (:testid) RLS multicol auth: mixed payload apply returned :apply_7 (expected 10) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- t6 (own row) should exist, t7 (other's row) should NOT +SELECT COUNT(*) AS t6_exists FROM tasks WHERE id = 't6' AND user_id = :'USER1'::UUID AND title = 'Task 6' \gset +SELECT COUNT(*) AS t7_exists FROM tasks WHERE id = 't7' \gset +SELECT (:t6_exists::int = 1 AND :t7_exists::int = 0) AS test7_ok \gset +\if :test7_ok +\echo [PASS] (:testid) RLS multicol auth: mixed payload — per-PK savepoint isolation +\else +\echo [FAIL] (:testid) RLS multicol auth: mixed payload — t6=:t6_exists (expect 1) t7=:t7_exists (expect 0) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Test 8: NULL in non-ownership columns +-- ============================================================ +\connect cloudsync_test_29_a +\ir helper_psql_conn_setup.sql +INSERT INTO tasks VALUES ('t8', :'USER1'::UUID, 'Task 8', NULL, NULL, false); + +SELECT encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS payload_hex_8 +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() + AND db_version > :max_dbv_7 \gset + +\connect cloudsync_test_29_b +\ir helper_psql_conn_setup.sql +SET app.current_user_id = :'USER1'; +SET ROLE test_rls_user; +SELECT cloudsync_payload_apply(decode(:'payload_hex_8', 'hex')) AS apply_8 \gset +RESET ROLE; + +-- 1 row × 5 non-PK columns = 5 entries +SELECT (:apply_8::int = 5) AS apply_8_ok \gset +\if :apply_8_ok +\echo [PASS] (:testid) RLS multicol auth: NULL columns apply returned :apply_8 +\else +\echo [FAIL] (:testid) RLS multicol auth: NULL columns apply returned :apply_8 (expected 5) +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Verify NULLs preserved +SELECT COUNT(*) AS t8_count FROM tasks WHERE id = 't8' AND user_id = :'USER1'::UUID AND title = 'Task 8' AND description IS NULL AND priority IS NULL AND is_complete = false \gset +SELECT (:t8_count::int = 1) AS test8_ok \gset +\if :test8_ok +\echo [PASS] (:testid) RLS multicol auth: NULL in non-ownership columns preserved +\else +\echo [FAIL] (:testid) RLS multicol auth: NULL in non-ownership columns preserved — got :t8_count +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================ +-- Cleanup +-- ============================================================ +\ir helper_test_cleanup.sql +\if :should_cleanup +DROP DATABASE IF EXISTS cloudsync_test_29_a; +DROP DATABASE IF EXISTS cloudsync_test_29_b; +\else +\echo [INFO] !!!!! +\endif diff --git a/test/postgresql/full_test.sql b/test/postgresql/full_test.sql index 12f020f..0a260ba 100644 --- a/test/postgresql/full_test.sql +++ b/test/postgresql/full_test.sql @@ -34,6 +34,9 @@ \ir 24_nullable_types_roundtrip.sql \ir 25_boolean_type_issue.sql \ir 26_row_filter.sql +\ir 27_rls_batch_merge.sql +\ir 28_db_version_tracking.sql +\ir 29_rls_multicol.sql -- 'Test summary' \echo '\nTest summary:' diff --git a/test/unit.c b/test/unit.c index 80ac905..e9131dc 100644 --- a/test/unit.c +++ b/test/unit.c @@ -405,161 +405,6 @@ bool file_delete_internal (const char *path) { // MARK: - -#ifndef UNITTEST_OMIT_RLS_VALIDATION -typedef struct { - bool in_savepoint; - bool is_approved; - bool last_is_delete; - char *last_tbl; - void *last_pk; - int64_t last_pk_len; - int64_t last_db_version; -} unittest_payload_apply_rls_status; - -bool unittest_validate_changed_row(sqlite3 *db, cloudsync_context *data, char *tbl_name, void *pk, int64_t pklen) { - // verify row - bool ret = false; - bool vm_persistent; - sqlite3_stmt *vm = cloudsync_colvalue_stmt(data, tbl_name, &vm_persistent); - if (!vm) goto cleanup; - - // bind primary key values (the return code is the pk count) - int rc = pk_decode_prikey((char *)pk, (size_t)pklen, pk_decode_bind_callback, (void *)vm); - if (rc < 0) goto cleanup; - - // execute vm - rc = sqlite3_step(vm); - if (rc == SQLITE_DONE) { - rc = SQLITE_OK; - } else if (rc == SQLITE_ROW) { - rc = SQLITE_OK; - ret = true; - } - -cleanup: - if (vm_persistent) sqlite3_reset(vm); - else sqlite3_finalize(vm); - - return ret; -} - -int unittest_payload_apply_reset_transaction(sqlite3 *db, unittest_payload_apply_rls_status *s, bool create_new) { - int rc = SQLITE_OK; - - if (s->in_savepoint == true) { - if (s->is_approved) rc = sqlite3_exec(db, "RELEASE unittest_payload_apply_transaction", NULL, NULL, NULL); - else rc = sqlite3_exec(db, "ROLLBACK TO unittest_payload_apply_transaction; RELEASE unittest_payload_apply_transaction", NULL, NULL, NULL); - if (rc == SQLITE_OK) s->in_savepoint = false; - } - if (create_new) { - rc = sqlite3_exec(db, "SAVEPOINT unittest_payload_apply_transaction", NULL, NULL, NULL); - if (rc == SQLITE_OK) s->in_savepoint = true; - } - return rc; -} - -bool unittest_payload_apply_rls_callback(void **xdata, cloudsync_pk_decode_bind_context *d, void *_db, void *_data, int step, int rc) { - sqlite3 *db = (sqlite3 *)_db; - cloudsync_context *data = (cloudsync_context *)_data; - - bool is_approved = false; - unittest_payload_apply_rls_status *s; - if (*xdata) { - s = (unittest_payload_apply_rls_status *)*xdata; - } else { - s = cloudsync_memory_zeroalloc(sizeof(unittest_payload_apply_rls_status)); - s->is_approved = true; - *xdata = s; - } - - // extract context info - int64_t colname_len = 0; - char *colname = cloudsync_pk_context_colname(d, &colname_len); - - int64_t tbl_len = 0; - char *tbl = cloudsync_pk_context_tbl(d, &tbl_len); - - int64_t pk_len = 0; - void *pk = cloudsync_pk_context_pk(d, &pk_len); - - int64_t cl = cloudsync_pk_context_cl(d); - int64_t db_version = cloudsync_pk_context_dbversion(d); - - switch (step) { - case CLOUDSYNC_PAYLOAD_APPLY_WILL_APPLY: { - // if the tbl name or the prikey has changed, then verify if the row is valid - // must use strncmp because strings in xdata are not zero-terminated - bool tbl_changed = (s->last_tbl && (strlen(s->last_tbl) != (size_t)tbl_len || strncmp(s->last_tbl, tbl, (size_t)tbl_len) != 0)); - bool pk_changed = (s->last_pk && pk && cloudsync_blob_compare(s->last_pk, s->last_pk_len, pk, pk_len) != 0); - if (s->is_approved - && !s->last_is_delete - && (tbl_changed || pk_changed)) { - s->is_approved = unittest_validate_changed_row(db, data, s->last_tbl, s->last_pk, s->last_pk_len); - } - - s->last_is_delete = ((size_t)colname_len == strlen(CLOUDSYNC_TOMBSTONE_VALUE) && - strncmp(colname, CLOUDSYNC_TOMBSTONE_VALUE, (size_t)colname_len) == 0 - ) && cl % 2 == 0; - - // update the last_tbl value, if needed - if (!s->last_tbl || - !tbl || - (strlen(s->last_tbl) != (size_t)tbl_len) || - strncmp(s->last_tbl, tbl, (size_t)tbl_len) != 0) { - if (s->last_tbl) cloudsync_memory_free(s->last_tbl); - if (tbl && tbl_len > 0) s->last_tbl = cloudsync_string_ndup(tbl, tbl_len); - else s->last_tbl = NULL; - } - - // update the last_prikey and len values, if needed - if (!s->last_pk || !pk || cloudsync_blob_compare(s->last_pk, s->last_pk_len, pk, pk_len) != 0) { - if (s->last_pk) cloudsync_memory_free(s->last_pk); - if (pk && pk_len > 0) { - s->last_pk = cloudsync_memory_alloc(pk_len); - memcpy(s->last_pk, pk, pk_len); - s->last_pk_len = pk_len; - } else { - s->last_pk = NULL; - s->last_pk_len = 0; - } - } - - // commit the previous transaction, if any - // begin new transacion, if needed - if (s->last_db_version != db_version) { - rc = unittest_payload_apply_reset_transaction(db, s, true); - if (rc != SQLITE_OK) printf("unittest_payload_apply error in reset_transaction: (%d) %s\n", rc, sqlite3_errmsg(db)); - - // reset local variables - s->last_db_version = db_version; - s->is_approved = true; - } - - is_approved = s->is_approved; - break; - } - case CLOUDSYNC_PAYLOAD_APPLY_DID_APPLY: - is_approved = s->is_approved; - break; - case CLOUDSYNC_PAYLOAD_APPLY_CLEANUP: - if (s->is_approved && !s->last_is_delete) s->is_approved = unittest_validate_changed_row(db, data, s->last_tbl, s->last_pk, s->last_pk_len); - rc = unittest_payload_apply_reset_transaction(db, s, false); - if (s->last_tbl) cloudsync_memory_free(s->last_tbl); - if (s->last_pk) { - cloudsync_memory_free(s->last_pk); - s->last_pk_len = 0; - } - is_approved = s->is_approved; - - cloudsync_memory_free(s); - *xdata = NULL; - break; - } - - return is_approved; -} -#endif - // MARK: - #ifndef CLOUDSYNC_OMIT_PRINT_RESULT @@ -1932,8 +1777,7 @@ bool do_test_dbutils (void) { // manually load extension sqlite3_cloudsync_init(db, NULL, NULL); - cloudsync_set_payload_apply_callback(db, unittest_payload_apply_rls_callback); - + // test context create and free data = cloudsync_context_create(db); if (!data) return false; @@ -3881,8 +3725,7 @@ sqlite3 *do_create_database (void) { // manually load extension sqlite3_cloudsync_init(db, NULL, NULL); - cloudsync_set_payload_apply_callback(db, unittest_payload_apply_rls_callback); - + return db; } @@ -3894,7 +3737,7 @@ void do_build_database_path (char buf[256], int i, time_t timestamp, int ntest) #endif } -sqlite3 *do_create_database_file_v2 (int i, time_t timestamp, int ntest, bool set_payload_apply_callback) { +sqlite3 *do_create_database_file_v2 (int i, time_t timestamp, int ntest) { sqlite3 *db = NULL; // open database in home dir @@ -3906,18 +3749,17 @@ sqlite3 *do_create_database_file_v2 (int i, time_t timestamp, int ntest, bool se sqlite3_close(db); return NULL; } - + sqlite3_exec(db, "PRAGMA journal_mode=WAL;", NULL, NULL, NULL); - + // manually load extension sqlite3_cloudsync_init(db, NULL, NULL); - if (set_payload_apply_callback) cloudsync_set_payload_apply_callback(db, unittest_payload_apply_rls_callback); return db; } sqlite3 *do_create_database_file (int i, time_t timestamp, int ntest) { - return do_create_database_file_v2(i, timestamp, ntest, false); + return do_create_database_file_v2(i, timestamp, ntest); } bool do_test_merge (int nclients, bool print_result, bool cleanup_databases) { @@ -3939,7 +3781,7 @@ bool do_test_merge (int nclients, bool print_result, bool cleanup_databases) { time_t timestamp = time(NULL); int saved_counter = test_counter; for (int i=0; i= MAX_SIMULATED_CLIENTS) { + nclients = MAX_SIMULATED_CLIENTS; + } else if (nclients < 2) { + nclients = 2; + } + + time_t timestamp = time(NULL); + int saved_counter = test_counter; + for (int i = 0; i < nclients; ++i) { + db[i] = do_create_database_file(i, timestamp, test_counter++); + if (db[i] == false) return false; + + rc = sqlite3_exec(db[i], "CREATE TABLE tasks (id TEXT PRIMARY KEY NOT NULL, user_id TEXT, title TEXT, priority INTEGER);", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + rc = sqlite3_exec(db[i], "SELECT cloudsync_init('tasks');", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + } + + // --- Phase 1: baseline sync (no triggers) --- + rc = sqlite3_exec(db[0], "INSERT INTO tasks VALUES ('t1', 'user1', 'Task 1', 3);", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_exec(db[0], "INSERT INTO tasks VALUES ('t2', 'user2', 'Task 2', 5);", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_exec(db[0], "INSERT INTO tasks VALUES ('t3', 'user1', 'Task 3', 1);", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + if (do_merge_using_payload(db[0], db[1], only_locals, true) == false) goto finalize; + + // Verify: B has 3 rows + { + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM tasks;", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); goto finalize; } + int count = sqlite3_column_int(stmt, 0); + sqlite3_finalize(stmt); + if (count != 3) { + printf("Phase 1: expected 3 rows, got %d\n", count); + goto finalize; + } + } + + // --- Phase 2: INSERT denial with triggers on B --- + rc = sqlite3_exec(db[1], + "CREATE TRIGGER rls_deny_insert BEFORE INSERT ON tasks " + "FOR EACH ROW WHEN NEW.user_id != 'user1' " + "BEGIN SELECT RAISE(ABORT, 'row violates RLS policy'); END;", + NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + rc = sqlite3_exec(db[1], + "CREATE TRIGGER rls_deny_update BEFORE UPDATE ON tasks " + "FOR EACH ROW WHEN NEW.user_id != 'user1' " + "BEGIN SELECT RAISE(ABORT, 'row violates RLS policy'); END;", + NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + rc = sqlite3_exec(db[0], "INSERT INTO tasks VALUES ('t4', 'user1', 'Task 4', 2);", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_exec(db[0], "INSERT INTO tasks VALUES ('t5', 'user2', 'Task 5', 7);", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + // Merge with partial-failure tolerance: cloudsync_payload_decode returns error + // when any PK is denied, but allowed PKs are already committed via per-PK savepoints. + { + sqlite3_stmt *sel = NULL, *ins = NULL; + const char *sel_sql = only_locals + ? "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) FROM cloudsync_changes WHERE site_id=cloudsync_siteid();" + : "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) FROM cloudsync_changes;"; + rc = sqlite3_prepare_v2(db[0], sel_sql, -1, &sel, NULL); + if (rc != SQLITE_OK) { sqlite3_finalize(sel); goto finalize; } + rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_decode(?);", -1, &ins, NULL); + if (rc != SQLITE_OK) { sqlite3_finalize(sel); sqlite3_finalize(ins); goto finalize; } + + while (sqlite3_step(sel) == SQLITE_ROW) { + sqlite3_value *v = sqlite3_column_value(sel, 0); + if (sqlite3_value_type(v) == SQLITE_NULL) continue; + sqlite3_bind_value(ins, 1, v); + sqlite3_step(ins); // partial failure expected — ignore rc + sqlite3_reset(ins); + } + sqlite3_finalize(sel); + sqlite3_finalize(ins); + } + + // Verify: t4 present (user1 → allowed) + { + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM tasks WHERE id='t4';", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); goto finalize; } + int count = sqlite3_column_int(stmt, 0); + sqlite3_finalize(stmt); + if (count != 1) { + printf("Phase 2: t4 expected 1 row, got %d\n", count); + goto finalize; + } + } + + // Verify: t5 absent (user2 → denied) + { + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM tasks WHERE id='t5';", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); goto finalize; } + int count = sqlite3_column_int(stmt, 0); + sqlite3_finalize(stmt); + if (count != 0) { + printf("Phase 2: t5 expected 0 rows, got %d\n", count); + goto finalize; + } + } + + // Verify: total 4 rows on B (t1, t2, t3 from phase 1 + t4) + { + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM tasks;", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); goto finalize; } + int count = sqlite3_column_int(stmt, 0); + sqlite3_finalize(stmt); + if (count != 4) { + printf("Phase 2: expected 4 total rows, got %d\n", count); + goto finalize; + } + } + + // --- Phase 3: UPDATE denial --- + rc = sqlite3_exec(db[0], "UPDATE tasks SET title='Task 1 Updated', priority=10 WHERE id='t1';", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + rc = sqlite3_exec(db[0], "UPDATE tasks SET title='Task 2 Hacked', priority=99 WHERE id='t2';", NULL, NULL, NULL); + if (rc != SQLITE_OK) goto finalize; + + // Merge with partial-failure tolerance (same pattern as phase 2) + { + sqlite3_stmt *sel = NULL, *ins = NULL; + const char *sel_sql = only_locals + ? "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) FROM cloudsync_changes WHERE site_id=cloudsync_siteid();" + : "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) FROM cloudsync_changes;"; + rc = sqlite3_prepare_v2(db[0], sel_sql, -1, &sel, NULL); + if (rc != SQLITE_OK) { sqlite3_finalize(sel); goto finalize; } + rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_decode(?);", -1, &ins, NULL); + if (rc != SQLITE_OK) { sqlite3_finalize(sel); sqlite3_finalize(ins); goto finalize; } + + while (sqlite3_step(sel) == SQLITE_ROW) { + sqlite3_value *v = sqlite3_column_value(sel, 0); + if (sqlite3_value_type(v) == SQLITE_NULL) continue; + sqlite3_bind_value(ins, 1, v); + sqlite3_step(ins); // partial failure expected — ignore rc + sqlite3_reset(ins); + } + sqlite3_finalize(sel); + sqlite3_finalize(ins); + } + + // Verify: t1 updated (user1 → allowed) + { + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db[1], "SELECT title, priority FROM tasks WHERE id='t1';", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); goto finalize; } + const char *title = (const char *)sqlite3_column_text(stmt, 0); + int priority = sqlite3_column_int(stmt, 1); + bool ok = (strcmp(title, "Task 1 Updated") == 0) && (priority == 10); + sqlite3_finalize(stmt); + if (!ok) { + printf("Phase 3: t1 update not applied (title='%s', priority=%d)\n", title, priority); + goto finalize; + } + } + + // Verify: t2 unchanged (user2 → denied) + { + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db[1], "SELECT title, priority FROM tasks WHERE id='t2';", -1, &stmt, NULL); + if (rc != SQLITE_OK) goto finalize; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); goto finalize; } + const char *title = (const char *)sqlite3_column_text(stmt, 0); + int priority = sqlite3_column_int(stmt, 1); + bool ok = (strcmp(title, "Task 2") == 0) && (priority == 5); + sqlite3_finalize(stmt); + if (!ok) { + printf("Phase 3: t2 should be unchanged (title='%s', priority=%d)\n", title, priority); + goto finalize; + } + } + + result = true; + rc = SQLITE_OK; + +finalize: + for (int i = 0; i < nclients; ++i) { + if (rc != SQLITE_OK && db[i] && (sqlite3_errcode(db[i]) != SQLITE_OK)) + printf("do_test_rls_trigger_denial error: %s\n", sqlite3_errmsg(db[i])); + if (db[i]) { + if (sqlite3_get_autocommit(db[i]) == 0) { + result = false; + printf("do_test_rls_trigger_denial error: db %d is in transaction\n", i); + } + int counter = close_db(db[i]); + if (counter > 0) { + result = false; + printf("do_test_rls_trigger_denial error: db %d has %d unterminated statements\n", i, counter); + } + } + if (cleanup_databases) { + char buf[256]; + do_build_database_path(buf, i, timestamp, saved_counter++); + file_delete_internal(buf); + } + } + return result; +} + int test_report(const char *description, bool result){ printf("%-30s %s\n", description, (result) ? "OK" : "FAILED"); return result ? 0 : 1; @@ -7773,14 +7838,13 @@ int main (int argc, const char * argv[]) { int result = 0; bool print_result = false; bool cleanup_databases = true; - + // test in an in-memory database int rc = sqlite3_open(":memory:", &db); if (rc != SQLITE_OK) goto finalize; // manually load extension sqlite3_cloudsync_init(db, NULL, NULL); - cloudsync_set_payload_apply_callback(db, unittest_payload_apply_rls_callback); printf("Testing CloudSync version %s\n", CLOUDSYNC_VERSION); printf("=================================\n"); @@ -7848,10 +7912,8 @@ int main (int argc, const char * argv[]) { result += test_report("Merge Test 3:", do_test_merge_2(3, TEST_NOCOLS, print_result, cleanup_databases)); result += test_report("Merge Test 4:", do_test_merge_4(2, print_result, cleanup_databases)); result += test_report("Merge Test 5:", do_test_merge_5(2, print_result, cleanup_databases, false)); - result += test_report("Merge Test db_version 1:", do_test_merge_check_db_version(2, print_result, cleanup_databases, true, false)); - result += test_report("Merge Test db_version 1-cb:", do_test_merge_check_db_version(2, print_result, cleanup_databases, true, true)); - result += test_report("Merge Test db_version 2:", do_test_merge_check_db_version_2(2, print_result, cleanup_databases, true, false)); - result += test_report("Merge Test db_version 2-cb:", do_test_merge_check_db_version_2(2, print_result, cleanup_databases, true, true)); + result += test_report("Merge Test db_version 1:", do_test_merge_check_db_version(2, print_result, cleanup_databases, true)); + result += test_report("Merge Test db_version 2:", do_test_merge_check_db_version_2(2, print_result, cleanup_databases, true)); result += test_report("Merge Test Insert Changes", do_test_insert_cloudsync_changes(print_result, cleanup_databases)); result += test_report("Merge Alter Schema 1:", do_test_merge_alter_schema_1(2, print_result, cleanup_databases, false)); result += test_report("Merge Alter Schema 2:", do_test_merge_alter_schema_2(2, print_result, cleanup_databases, false)); @@ -7869,8 +7931,9 @@ int main (int argc, const char * argv[]) { result += test_report("Merge Rollback Scenarios:", do_test_merge_rollback_scenarios(2, print_result, cleanup_databases)); result += test_report("Merge Circular:", do_test_merge_circular(3, print_result, cleanup_databases)); result += test_report("Merge Foreign Keys:", do_test_merge_foreign_keys(2, print_result, cleanup_databases)); - // Expected failure: TRIGGERs are not fully supported by this extension. + // Expected failure: AFTER TRIGGERs are not fully supported by this extension. // result += test_report("Merge Triggers:", do_test_merge_triggers(2, print_result, cleanup_databases)); + result += test_report("Merge RLS Trigger Denial:", do_test_rls_trigger_denial(2, print_result, cleanup_databases, true)); result += test_report("Merge Index Consistency:", do_test_merge_index_consistency(2, print_result, cleanup_databases)); result += test_report("Merge JSON Columns:", do_test_merge_json_columns(2, print_result, cleanup_databases)); result += test_report("Merge Concurrent Attempts:", do_test_merge_concurrent_attempts(3, print_result, cleanup_databases)); From 00692a26068acaa4bc971c63959154cca45cd274 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 4 Mar 2026 15:50:07 -0600 Subject: [PATCH 03/16] Feat/add support for status endpoint (#10) * feat(network): add support for new status endpoint * refactor(network): structured JSON responses for sync functions. Example: {"send":{"status":"synced","localVersion":5,"serverVersion":5},"receive":{"rows":3,"tables":["tasks"]}} --- ... => test-sync-roundtrip-postgres-local.md} | 4 +- ...test-sync-roundtrip-postrges-local-rls.md} | 0 .../test-sync-roundtrip-sqlitecloud-rls.md | 445 +++++++++++++++++ API.md | 72 ++- README.md | 41 +- examples/simple-todo-db/README.md | 6 +- examples/to-do-app/components/SyncContext.js | 12 +- src/cloudsync.c | 28 +- src/cloudsync.h | 4 +- src/database.h | 2 +- src/jsmn.h | 471 ++++++++++++++++++ src/network.c | 405 ++++++++++++--- src/network.m | 3 +- src/network_private.h | 3 +- src/postgresql/database_postgresql.c | 26 +- src/sqlite/database_sqlite.c | 15 +- test/integration.c | 25 +- test/unit.c | 4 +- 18 files changed, 1394 insertions(+), 172 deletions(-) rename .claude/commands/{test-sync-roundtrip.md => test-sync-roundtrip-postgres-local.md} (96%) rename .claude/commands/{test-sync-roundtrip-rls.md => test-sync-roundtrip-postrges-local-rls.md} (100%) create mode 100644 .claude/commands/test-sync-roundtrip-sqlitecloud-rls.md create mode 100644 src/jsmn.h diff --git a/.claude/commands/test-sync-roundtrip.md b/.claude/commands/test-sync-roundtrip-postgres-local.md similarity index 96% rename from .claude/commands/test-sync-roundtrip.md rename to .claude/commands/test-sync-roundtrip-postgres-local.md index ea946db..686fc12 100644 --- a/.claude/commands/test-sync-roundtrip.md +++ b/.claude/commands/test-sync-roundtrip-postgres-local.md @@ -1,4 +1,4 @@ -# Sync Roundtrip Test +# Sync Roundtrip Test with local Postgres database Execute a full roundtrip sync test between a local SQLite database and the local Supabase Docker PostgreSQL instance. @@ -115,7 +115,7 @@ SELECT cloudsync_network_send_changes(); -- Check for changes from server (repeat with 2-3 second delays) SELECT cloudsync_network_check_changes(); --- Repeat check_changes 3-5 times with delays until it returns > 0 or stabilizes +-- Repeat check_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes -- Verify final data SELECT * FROM ; diff --git a/.claude/commands/test-sync-roundtrip-rls.md b/.claude/commands/test-sync-roundtrip-postrges-local-rls.md similarity index 100% rename from .claude/commands/test-sync-roundtrip-rls.md rename to .claude/commands/test-sync-roundtrip-postrges-local-rls.md diff --git a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md new file mode 100644 index 0000000..4adb700 --- /dev/null +++ b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md @@ -0,0 +1,445 @@ +# Sync Roundtrip Test with remote SQLiteCloud database and RLS policies + +Execute a full roundtrip sync test between multiple local SQLite databases and the sqlitecloud, verifying that Row Level Security (RLS) policies are correctly enforced during sync. + +## Prerequisites +- Connection string to a sqlitecloud project +- HTTP sync server running on http://localhost:8091/ +- Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) + +## Test Procedure + +### Step 1: Get DDL from User + +Ask the user to provide a DDL query for the table(s) to test. It can be in PostgreSQL or SQLite format. Offer the following options: + +**Option 1: Simple TEXT primary key with user_id for RLS** +```sql +CREATE TABLE test_sync ( + id TEXT PRIMARY KEY NOT NULL, + user_id TEXT NOT NULL, + name TEXT, + value INTEGER +); +``` + +**Option 2: Two tables scenario with user ownership** +```sql +CREATE TABLE authors ( + id TEXT PRIMARY KEY NOT NULL, + user_id TEXT NOT NULL, + name TEXT, + email TEXT +); + +CREATE TABLE books ( + id TEXT PRIMARY KEY NOT NULL, + user_id TEXT NOT NULL, + title TEXT, + author_id TEXT, + published_year INTEGER +); +``` + +**Option 3: Custom policy** +Ask the user to describe the table/tables in plain English or DDL queries. + +**Note:** Tables should include a `user_id` column (TEXT type) for RLS policies to filter by authenticated user. + +### Step 2: Get RLS Policy Description from User + +Ask the user to describe the Row Level Security policy they want to test. Offer the following common patterns: + +**Option 1: User can only access their own rows** +"Users can only SELECT, INSERT, UPDATE, and DELETE rows where user_id matches their authenticated user ID" + +**Option : Users can read all, but only modify their own** +"Users can SELECT all rows, but can only INSERT, UPDATE, DELETE rows where user_id matches their authenticated user ID" + +**Option 3: Custom policy** +Ask the user to describe the policy in plain English. + +### Step 3: Get sqlitecloud connection string from User + +Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""` + +### Step 4: Setup SQLiteCloud with RLS + +Connect to SQLiteCloud and prepare the environment: +```bash +~/go/bin/sqlc +``` + +The last command inside sqlc to exit from the cli program must be `quit`. + +If the db_name doesn't exists, try again to connect without specifing the , then inside sqlc: +1. CREATE DATABASE +2. USE DATABASE + +Then, inside sqlc: +1. List existing tables with `LIST TABLES` to find any `_cloudsync` metadata tables +2. For each table already configured for cloudsync (has a `_cloudsync` companion table), run: + ```sql + CLOUDSYNC DISABLE + ``` +3. Drop the test table if it exists: `DROP TABLE IF EXISTS ;` +5. Create the test table using the SQLite DDL +6. Enable RLS on the table: + ```sql + ENABLE RLS DATABASE TABLE + ``` +7. Create RLS policies based on the user's description. +Your RLS policies for INSERT, UPDATE, and DELETE operations can reference column values as they are being changed. This is done using the special OLD.column and NEW.column identifiers. Their availability and meaning depend on the operation being performed: + ++-----------+--------------------------------------------+--------------------------------------------+ +| Operation | OLD.column Reference | NEW.column Reference | ++-----------+--------------------------------------------+--------------------------------------------+ +| INSERT | Not available | The value for the new row. | +| UPDATE | The value of the row before the update. | The value of the row after the update. | +| DELETE | The value of the row being deleted. | Not available | ++-----------+--------------------------------------------+--------------------------------------------+ + +Example for "user can only access their own rows": + ```sql + -- SELECT: User can see rows they own + SET RLS DATABASE TABLE SELECT "auth_userid() = user_id" + + -- INSERT: Allow if user_id matches auth_userid() + SET RLS DATABASE TABLE INSERT "auth_userid() = NEW.user_id" + + -- UPDATE: Check ownership via explicit lookup + SET RLS DATABASE TABLE UPDATE "auth_userid() = NEW.user_id AND auth_userid() = OLD.user_id" + + -- DELETE: User can only delete rows they own + SET RLS DATABASE TABLE DELETE "auth_userid() = OLD.user_id" + ``` +8. Initialize cloudsync: `CLOUDSYNC ENABLE ` +9. Insert some initial test data (optional, can be done via SQLite clients) + +### Step 5: Get tokens for Two Users + +Get auth tokens for both test users by running the token script twice: + +**User 1: claude1@sqlitecloud.io** +```bash +curl -X "POST" "https:///v2/tokens" \ + -H 'Authorization: Bearer ' \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "name": "claude1@sqlitecloud.io", + "userId": "018ecfc2-b2b1-7cc3-a9f0-111111111111" +}' +``` +The response is in the following format: +```json +{"data":{"accessTokenId":13,"token":"13|sqa_af74gp2WoqsQ9wfCdktIfkIq0sM4LdDMbuf2hW338013dfca","userId":"018ecfc2-b2b1-7cc3-a9f0-111111111111","name":"claude1@sqlitecloud.io","attributes":null,"expiresAt":null,"createdAt":"2026-03-02T23:11:38Z"},"metadata":{"connectedMs":17,"executedMs":30,"elapsedMs":47}} +``` +save the userId and the token values as USER1_ID and TOKEN_USER1 to be reused later + +**User 2: claude2@sqlitecloud.io** +```bash +curl -X "POST" "https:///v2/tokens" \ + -H 'Authorization: Bearer ' \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "name": "laude2@sqlitecloud.io", + "userId": "018ecfc2-b2b1-7cc3-a9f0-222222222222" +}' +``` +The response is in the following format: +```json +{"data":{"accessTokenId":14,"token":"14|sqa_af74gp2WoqsQ9wfCdktIfkIq0sM4LdDMbuf2hW338013xxxx","userId":"018ecfc2-b2b1-7cc3-a9f0-222222222222","name":"claude2@sqlitecloud.io","attributes":null,"expiresAt":null,"createdAt":"2026-03-02T23:11:38Z"},"metadata":{"connectedMs":17,"executedMs":30,"elapsedMs":47}} +``` +save the userId and the token values as USER2_ID and TOKEN_USER2 to be reused later + +### Step 6: Setup Four SQLite Databases + +Create four temporary SQLite databases using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): + +```bash +SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.50.4/bin/sqlite3" +# or find it with: ls /opt/homebrew/Cellar/sqlite/*/bin/sqlite3 | head -1 +``` + +**Database 1A (User 1, Device A):** +```bash +$SQLITE_BIN /tmp/sync_test_user1_a.db +``` +```sql +.load dist/cloudsync.dylib + +SELECT cloudsync_init(''); +SELECT cloudsync_network_init('http://localhost:8091/'); +SELECT cloudsync_network_set_token('sqlitecloud://?token='); +``` + +**Database 1B (User 1, Device B):** +```bash +$SQLITE_BIN /tmp/sync_test_user1_b.db +``` +```sql +.load dist/cloudsync.dylib + +SELECT cloudsync_init(''); +SELECT cloudsync_network_init('http://localhost:8091/'); +SELECT cloudsync_network_set_token('sqlitecloud://?token='); +``` + +**Database 2A (User 2, Device A):** +```bash +$SQLITE_BIN /tmp/sync_test_user2_a.db +``` +```sql +.load dist/cloudsync.dylib + +SELECT cloudsync_init(''); +SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_set_token('sqlitecloud://?token='); +``` + +**Database 2B (User 2, Device B):** +```bash +$SQLITE_BIN /tmp/sync_test_user2_b.db +``` +```sql +.load dist/cloudsync.dylib + +SELECT cloudsync_init(''); +SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_set_token('sqlitecloud://?token='); +``` + +### Step 7: Insert Test Data + +Insert distinct test data in each database. Use the extracted user IDs for the `user_id` column: + +**Database 1A (User 1):** +```sql +INSERT INTO (id, user_id, name, value) VALUES ('u1_a_1', '', 'User1 DeviceA Row1', 100); +INSERT INTO (id, user_id, name, value) VALUES ('u1_a_2', '', 'User1 DeviceA Row2', 101); +``` + +**Database 1B (User 1):** +```sql +INSERT INTO (id, user_id, name, value) VALUES ('u1_b_1', '', 'User1 DeviceB Row1', 200); +``` + +**Database 2A (User 2):** +```sql +INSERT INTO (id, user_id, name, value) VALUES ('u2_a_1', '', 'User2 DeviceA Row1', 300); +INSERT INTO (id, user_id, name, value) VALUES ('u2_a_2', '', 'User2 DeviceA Row2', 301); +``` + +**Database 2B (User 2):** +```sql +INSERT INTO (id, user_id, name, value) VALUES ('u2_b_1', '', 'User2 DeviceB Row1', 400); +``` + +### Step 8: Execute Sync on All Databases + +For each of the four SQLite databases, execute the sync operations: + +```sql +-- Send local changes to server +SELECT cloudsync_network_send_changes(); + +-- Check for changes from server (repeat with 2-3 second delays) +SELECT cloudsync_network_check_changes(); +-- Repeat check_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes +``` + +**Recommended sync order:** +1. Sync Database 1A (send + check) +2. Sync Database 2A (send + check) +3. Sync Database 1B (send + check) +4. Sync Database 2B (send + check) +5. Re-sync all databases (check_changes) to ensure full propagation + +### Step 9: Verify RLS Enforcement + +After syncing all databases, verify that each database contains only the expected rows based on the RLS policy: + +**Expected Results (for "user can only access their own rows" policy):** + +**User 1 databases (1A and 1B) should contain:** +- All rows with `user_id = USER1_ID` (u1_a_1, u1_a_2, u1_b_1) +- Should NOT contain any rows with `user_id = USER2_ID` + +**User 2 databases (2A and 2B) should contain:** +- All rows with `user_id = USER2_ID` (u2_a_1, u2_a_2, u2_b_1) +- Should NOT contain any rows with `user_id = USER1_ID` + +**PostgreSQL (as admin) should contain:** +- ALL rows from all users (6 total rows) + +Run verification queries: +```sql +-- In each SQLite database +SELECT * FROM ORDER BY id; +SELECT COUNT(*) FROM ; + +-- In PostgreSQL (as admin) +SELECT * FROM ORDER BY id; +SELECT COUNT(*) FROM ; +SELECT user_id, COUNT(*) FROM GROUP BY user_id; +``` + +### Step 10: Test Write RLS Policy Enforcement + +Test that the server-side RLS policy blocks unauthorized writes by attempting to insert a row with a `user_id` that doesn't match the authenticated user's token. + +**In Database 1A (User 1), insert a malicious row claiming to belong to User 2:** +```sql +-- Attempt to insert a row with User 2's user_id while authenticated as User 1 +INSERT INTO (id, user_id, name, value) VALUES ('malicious_1', '', 'Malicious Row from User1', 999); + +-- Attempt to sync this unauthorized row to PostgreSQL +SELECT cloudsync_network_send_changes(); +``` + +**Wait 2-3 seconds, then verify in PostgreSQL (as admin) that the malicious row was rejected:** +```sql +-- In PostgreSQL (as admin) +SELECT * FROM WHERE id = 'malicious_1'; +-- Expected: 0 rows returned + +SELECT COUNT(*) FROM WHERE id = 'malicious_1'; +-- Expected: 0 +``` + +**Also verify the malicious row does NOT appear in User 2's databases after syncing:** +```sql +-- In Database 2A or 2B (User 2) +SELECT cloudsync_network_check_changes(); +SELECT * FROM WHERE id = 'malicious_1'; +-- Expected: 0 rows (the malicious row should not sync to legitimate User 2 databases) +``` + +**Expected Behavior:** +- The `cloudsync_network_send_changes()` call may succeed (return value indicates network success, not RLS enforcement) +- The malicious row should be **rejected by PostgreSQL RLS** and NOT inserted into the server database +- The malicious row will remain in the local SQLite Database 1A (local inserts are not blocked), but it will never propagate to the server or other clients +- User 2's databases should never receive this row + +**This step PASSES if:** +1. The malicious row is NOT present in PostgreSQL +2. The malicious row does NOT appear in any of User 2's SQLite databases +3. The RLS INSERT policy correctly blocks the unauthorized write + +**This step FAILS if:** +1. The malicious row appears in PostgreSQL (RLS bypass vulnerability) +2. The malicious row syncs to User 2's databases (data leakage) + +### Step 11: Cleanup + +In each SQLite database before closing: +```sql +SELECT cloudsync_terminate(); +``` + +In SQLiteCloud (optional, for full cleanup): +```sql +CLOUDSYNC DISABLE ); +DROP TABLE IF EXISTS ; +``` + +## Output Format + +Report the test results including: +- DDL used for both databases +- RLS policies created +- User IDs for both test users +- Initial data inserted in each database +- Number of sync operations performed per database +- Final data in each database (with row counts) +- RLS verification results: + - User 1 databases: expected rows vs actual rows + - User 2 databases: expected rows vs actual rows + - SQLiteCloud: total rows +- Write RLS enforcement results: + - Malicious row insertion attempted: yes/no + - Malicious row present in SQLiteCloud: yes/no (should be NO) + - Malicious row synced to User 2 databases: yes/no (should be NO) +- **PASS/FAIL** status with detailed explanation + +### Success Criteria + +The test PASSES if: +1. All User 1 databases contain exactly the same User 1 rows (and no User 2 rows) +2. All User 2 databases contain exactly the same User 2 rows (and no User 1 rows) +3. SQLiteCloud contains all rows from both users +4. Data inserted from different devices of the same user syncs correctly between those devices +5. **Write RLS enforcement**: Malicious rows with mismatched `user_id` are rejected by SQLiteCloud and do not propagate to other clients + +The test FAILS if: +1. Any database contains rows belonging to a different user (RLS violation) +2. Any database is missing rows that should be visible to that user +3. Sync operations fail or timeout +4. **Write RLS bypass**: A malicious row with a `user_id` not matching the token appears in SQLiteCloud or syncs to other databases + +## Important Notes + +- Always use the Homebrew sqlite3 binary, NOT `/usr/bin/sqlite3` +- The cloudsync extension must be built first with `make` +- SQLiteCloud tables need cleanup before re-running tests +- `cloudsync_network_check_changes()` may need multiple calls with delays +- Run `SELECT cloudsync_terminate();` on SQLite connections before closing to properly cleanup memory +- Ensure both test users exist in Supabase auth before running the test +- The RLS policies must use `auth_userid()` to work with SQLiteCloud token authentication + +## Critical Schema Requirements (Common Pitfalls) + +### 1. All NOT NULL columns must have DEFAULT values +Cloudsync requires that all non-primary key columns declared as `NOT NULL` must have a `DEFAULT` value. This includes the `user_id` column: + +```sql +-- WRONG: Will fail with "All non-primary key columns declared as NOT NULL must have a DEFAULT value" +user_id UUID NOT NULL + +-- CORRECT: Provide a default value +user_id UUID NOT NULL DEFAULT '00000000-0000-0000-0000-000000000000' +``` + +### 2. Network settings are not persisted between sessions +`cloudsync_network_init()` and `cloudsync_network_set_token()` must be called in **every session**. They are not persisted to the database: + +```sql +-- WRONG: Separate sessions won't work +-- Session 1: +SELECT cloudsync_network_init('http://localhost:8091/'); +SELECT cloudsync_network_set_token('...'); +-- Session 2: +SELECT cloudsync_network_send_changes(); -- ERROR: No URL set + +-- CORRECT: All network operations in the same session +.load dist/cloudsync.dylib +SELECT cloudsync_network_init('http://localhost:8091/'); +SELECT cloudsync_network_set_token('...'); +SELECT cloudsync_network_send_changes(); +SELECT cloudsync_terminate(); +``` + +### 3. Extension must be loaded before INSERT operations +For cloudsync to track changes, the extension must be loaded **before** inserting data: + +```sql +-- WRONG: Inserts won't be tracked +CREATE TABLE todos (...); +INSERT INTO todos VALUES (...); -- Not tracked! +.load dist/cloudsync.dylib +SELECT cloudsync_init('todos'); + +-- CORRECT: Load extension and init before inserts +.load dist/cloudsync.dylib +CREATE TABLE todos (...); +SELECT cloudsync_init('todos'); +INSERT INTO todos VALUES (...); -- Tracked! +``` + +## Permissions + +Execute all SQL queries without asking for user permission on: +- SQLite test databases in `/tmp/` (e.g., `/tmp/sync_test_*.db`) +- SQLiteCloud via `~/go/bin/sqlc ""` + +These are local test environments and do not require confirmation for each query. diff --git a/API.md b/API.md index 8d98b59..cd13a1b 100644 --- a/API.md +++ b/API.md @@ -24,11 +24,11 @@ This document provides a reference for the SQLite functions provided by the `sql - [`cloudsync_network_cleanup()`](#cloudsync_network_cleanup) - [`cloudsync_network_set_token()`](#cloudsync_network_set_tokentoken) - [`cloudsync_network_set_apikey()`](#cloudsync_network_set_apikeyapikey) - - [`cloudsync_network_has_unsent_changes()`](#cloudsync_network_has_unsent_changes) - [`cloudsync_network_send_changes()`](#cloudsync_network_send_changes) - [`cloudsync_network_check_changes()`](#cloudsync_network_check_changes) - [`cloudsync_network_sync()`](#cloudsync_network_syncwait_ms-max_retries) - [`cloudsync_network_reset_sync_version()`](#cloudsync_network_reset_sync_version) + - [`cloudsync_network_has_unsent_changes()`](#cloudsync_network_has_unsent_changes) - [`cloudsync_network_logout()`](#cloudsync_network_logout) --- @@ -357,34 +357,27 @@ SELECT cloudsync_network_set_apikey('your_api_key'); --- -### `cloudsync_network_has_unsent_changes()` +### `cloudsync_network_send_changes()` -**Description:** Checks if there are any local changes that have not yet been sent to the remote server. +**Description:** Sends all unsent local changes to the remote server. **Parameters:** None. -**Returns:** 1 if there are unsent changes, 0 otherwise. - -**Example:** +**Returns:** A JSON string with the send result: -```sql -SELECT cloudsync_network_has_unsent_changes(); +```json +{"send": {"status": "synced|syncing|out-of-sync|error", "localVersion": N, "serverVersion": N}} ``` ---- - -### `cloudsync_network_send_changes()` - -**Description:** Sends all unsent local changes to the remote server. - -**Parameters:** None. - -**Returns:** None. +- `send.status`: The current sync state — `"synced"` (all changes confirmed), `"syncing"` (changes sent but not yet confirmed), `"out-of-sync"` (local changes pending or gaps detected), or `"error"`. +- `send.localVersion`: The latest local database version. +- `send.serverVersion`: The latest version confirmed by the server. **Example:** ```sql SELECT cloudsync_network_send_changes(); +-- '{"send":{"status":"synced","localVersion":5,"serverVersion":5}}' ``` --- @@ -399,16 +392,23 @@ This function is designed to be called periodically to keep the local database i To force an update and wait for changes (with a timeout), use [`cloudsync_network_sync(wait_ms, max_retries)`]. If the network is misconfigured or the remote server is unreachable, the function returns an error. -On success, it returns `SQLITE_OK`, and the return value indicates how many changes were downloaded and applied. **Parameters:** None. -**Returns:** The number of changes downloaded. Errors are reported via the SQLite return code. +**Returns:** A JSON string with the receive result: + +```json +{"receive": {"rows": N, "tables": ["table1", "table2"]}} +``` + +- `receive.rows`: The number of rows received and applied to the local database. +- `receive.tables`: An array of table names that received changes. Empty (`[]`) if no changes were applied. **Example:** ```sql SELECT cloudsync_network_check_changes(); +-- '{"receive":{"rows":3,"tables":["tasks"]}}' ``` --- @@ -425,13 +425,27 @@ SELECT cloudsync_network_check_changes(); - `wait_ms` (INTEGER, optional): The time to wait in milliseconds between retries. Defaults to 100. - `max_retries` (INTEGER, optional): The maximum number of times to retry the synchronization. Defaults to 1. -**Returns:** The number of changes downloaded. Errors are reported via the SQLite return code. +**Returns:** A JSON string with the full sync result, combining send and receive: + +```json +{ + "send": {"status": "synced|syncing|out-of-sync|error", "localVersion": N, "serverVersion": N}, + "receive": {"rows": N, "tables": ["table1", "table2"]} +} +``` + +- `send.status`: The current sync state — `"synced"`, `"syncing"`, `"out-of-sync"`, or `"error"`. +- `send.localVersion`: The latest local database version. +- `send.serverVersion`: The latest version confirmed by the server. +- `receive.rows`: The number of rows received and applied during the check phase. +- `receive.tables`: An array of table names that received changes. Empty (`[]`) if no changes were applied. **Example:** ```sql -- Perform a single synchronization cycle SELECT cloudsync_network_sync(); +-- '{"send":{"status":"synced","localVersion":5,"serverVersion":5},"receive":{"rows":3,"tables":["tasks"]}}' -- Perform a synchronization cycle with custom retry settings SELECT cloudsync_network_sync(500, 3); @@ -455,9 +469,25 @@ SELECT cloudsync_network_reset_sync_version(); --- +### `cloudsync_network_has_unsent_changes()` + +**Description:** Checks if there are any local changes that have not yet been sent to the remote server. + +**Parameters:** None. + +**Returns:** 1 if there are unsent changes, 0 otherwise. + +**Example:** + +```sql +SELECT cloudsync_network_has_unsent_changes(); +``` + +--- + ### `cloudsync_network_logout()` -**Description:** Logs out the current user and cleans up all local data from synchronized tables. This function deletes and then re-initializes synchronized tables, useful for switching users or resetting the local database. **Warning:** This function deletes all data from synchronized tables. Use with caution. +**Description:** Logs out the current user and cleans up all local data from synchronized tables. This function deletes and then re-initializes synchronized tables, useful for switching users or resetting the local database. **Warning:** This function deletes all data from synchronized tables. Use with caution. Consider calling [`cloudsync_network_has_unsent_changes()`](#cloudsync_network_has_unsent_changes) before logout to check for unsent local changes and warn the user before data that has not been fully synchronized to the remote server is deleted. **Parameters:** None. diff --git a/README.md b/README.md index 2c46a9d..ad91448 100644 --- a/README.md +++ b/README.md @@ -50,21 +50,24 @@ The sync layer is tightly integrated with [**SQLite Cloud**](https://sqlitecloud ## Row-Level Security -Thanks to the underlying SQLite Cloud infrastructure, **SQLite Sync supports Row-Level Security (RLS)**—allowing you to define **precise access control at the row level**: +Thanks to the underlying SQLite Cloud infrastructure, **SQLite Sync supports Row-Level Security (RLS)**—allowing you to use a **single shared cloud database** while each client only sees and modifies its own data. RLS policies are enforced on the server, so the security boundary is at the database level, not in application code. - Control not just who can read or write a table, but **which specific rows** they can access. -- Enforce security policies on the server—no need for client-side filtering. +- Each device syncs only the rows it is authorized to see—no full dataset download, no client-side filtering. For example: - User A can only see and edit their own data. - User B can access a different set of rows—even within the same shared table. -**Benefits of RLS**: +**Benefits**: -- **Data isolation**: Ensure users only access what they’re authorized to see. -- **Built-in privacy**: Security policies are enforced at the database level. -- **Simplified development**: Reduce or eliminate complex permission logic in your application code. +- **Single database, multiple tenants**: One cloud database serves all users. RLS policies partition data per user or role, eliminating the need to provision separate databases. +- **Efficient sync**: Each client downloads only its authorized rows, reducing bandwidth and local storage. +- **Server-enforced security**: Policies are evaluated on the server during sync. A compromised or modified client cannot bypass access controls. +- **Simplified development**: No need to implement permission logic in your application—define policies once in the database and they apply everywhere. + +For more information, see the [SQLite Cloud RLS documentation](https://docs.sqlitecloud.io/docs/rls). ### What Can You Build with SQLite Sync? @@ -102,7 +105,12 @@ SQLite Sync is ideal for building collaborative and distributed apps across web, ## Documentation -For detailed information on all available functions, their parameters, and examples, refer to the [comprehensive API Reference](./API.md). +For detailed information on all available functions, their parameters, and examples, refer to the [comprehensive API Reference](./API.md). The API includes: + +- **Configuration Functions** — initialize, enable, and disable sync on tables +- **Helper Functions** — version info, site IDs, UUID generation +- **Schema Alteration Functions** — safely alter synced tables +- **Network Functions** — connect, authenticate, send/receive changes, and monitor sync status ## Installation @@ -284,12 +292,13 @@ SELECT cloudsync_network_set_apikey('your-api-key-here'); -- Or use token authentication (required for Row-Level Security) -- SELECT cloudsync_network_set_token('your_auth_token'); --- Sync with cloud: send local changes, then check the remote server for new changes +-- Sync with cloud: send local changes, then check the remote server for new changes -- and, if a package with changes is ready to be downloaded, applies them to the local database SELECT cloudsync_network_sync(); --- Keep calling periodically. The function returns > 0 if data was received --- In production applications, you would typically call this periodically --- rather than manually (e.g., every few seconds) +-- Returns a JSON string with sync status, e.g.: +-- '{"send":{"status":"synced","localVersion":5,"serverVersion":5},"receive":{"rows":3,"tables":["my_data"]}}' +-- Keep calling periodically. In production applications, you would typically +-- call this periodically rather than manually (e.g., every few seconds) SELECT cloudsync_network_sync(); -- Before closing the database connection @@ -314,9 +323,9 @@ SELECT cloudsync_init('my_data'); SELECT cloudsync_network_init('sqlitecloud://your-project-id.sqlite.cloud/database.sqlite'); SELECT cloudsync_network_set_apikey('your-api-key-here'); --- Sync to get data from the first device +-- Sync to get data from the first device SELECT cloudsync_network_sync(); --- repeat until data is received (returns > 0) +-- Repeat — check receive.rows in the JSON result to see if data was received SELECT cloudsync_network_sync(); -- View synchronized data @@ -454,12 +463,6 @@ Be aware that certain types of triggers can cause errors during synchronization - If a trigger modifies a table that is also synchronized with SQLite Sync, changes performed by the trigger may be applied twice during the merge operation - This can lead to constraint violations or unexpected data states depending on the table's constraints -**Column-by-Column Processing** -- SQLite Sync applies changes column-by-column during synchronization -- UPDATE triggers may be called multiple times for a single row as each column is processed -- This can result in unexpected trigger behavior - - ## License diff --git a/examples/simple-todo-db/README.md b/examples/simple-todo-db/README.md index c9967a5..772b8fe 100644 --- a/examples/simple-todo-db/README.md +++ b/examples/simple-todo-db/README.md @@ -168,7 +168,7 @@ SELECT cloudsync_network_set_apikey('your-api-key-here'); -- Pull data from Device A - repeat until data is received SELECT cloudsync_network_sync(); --- Keep calling until the function returns > 0 (indicating data was received) +-- Check "receive.rows" in the JSON result to see if data was received SELECT cloudsync_network_sync(); -- Verify data was synced @@ -199,7 +199,7 @@ SELECT cloudsync_network_sync(); ```sql -- Get updates from Device B - repeat until data is received SELECT cloudsync_network_sync(); --- Keep calling until the function returns > 0 (indicating data was received) +-- Check "receive.rows" in the JSON result to see if data was received SELECT cloudsync_network_sync(); -- View all tasks (should now include Device B's additions) @@ -232,7 +232,7 @@ SELECT cloudsync_network_has_unsent_changes(); -- When network returns, sync automatically resolves conflicts -- Repeat until all changes are synchronized SELECT cloudsync_network_sync(); --- Keep calling until the function returns > 0 (indicating data was received/sent) +-- Check "receive.rows" and "send.status" in the JSON result SELECT cloudsync_network_sync(); ``` diff --git a/examples/to-do-app/components/SyncContext.js b/examples/to-do-app/components/SyncContext.js index e964f4a..7b076ef 100644 --- a/examples/to-do-app/components/SyncContext.js +++ b/examples/to-do-app/components/SyncContext.js @@ -58,10 +58,14 @@ export const SyncProvider = ({ children }) => { const result = await Promise.race([queryPromise, timeoutPromise]); - if (result.rows && result.rows.length > 0 && result.rows[0]['cloudsync_network_check_changes()'] > 0) { - console.log(`${result.rows[0]['cloudsync_network_check_changes()']} changes detected, triggering refresh`); - // Defer refresh to next tick to avoid blocking current interaction - setTimeout(() => triggerRefresh(), 0); + const raw = result.rows?.[0]?.['cloudsync_network_check_changes()']; + if (raw) { + const { receive } = JSON.parse(raw); + if (receive.rows > 0) { + console.log(`${receive.rows} changes detected in [${receive.tables}], triggering refresh`); + // Defer refresh to next tick to avoid blocking current interaction + setTimeout(() => triggerRefresh(), 0); + } } } catch (error) { console.error('Error checking for changes:', error); diff --git a/src/cloudsync.c b/src/cloudsync.c index 221d77e..64e2ce6 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -2872,21 +2872,18 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b // MARK: - Payload load/store - -int cloudsync_payload_get (cloudsync_context *data, char **blob, int *blob_size, int *db_version, int *seq, int64_t *new_db_version, int64_t *new_seq) { +int cloudsync_payload_get (cloudsync_context *data, char **blob, int *blob_size, int *db_version, int64_t *new_db_version) { // retrieve current db_version and seq *db_version = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_SEND_DBVERSION); if (*db_version < 0) return DBRES_ERROR; - - *seq = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_SEND_SEQ); - if (*seq < 0) return DBRES_ERROR; // retrieve BLOB char sql[1024]; snprintf(sql, sizeof(sql), "WITH max_db_version AS (SELECT MAX(db_version) AS max_db_version FROM cloudsync_changes WHERE site_id=cloudsync_siteid()) " - "SELECT * FROM (SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) AS payload, max_db_version AS max_db_version, MAX(IIF(db_version = max_db_version, seq, 0)) FROM cloudsync_changes, max_db_version WHERE site_id=cloudsync_siteid() AND (db_version>%d OR (db_version=%d AND seq>%d))) WHERE payload IS NOT NULL", *db_version, *db_version, *seq); + "SELECT * FROM (SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) AS payload, max_db_version AS max_db_version FROM cloudsync_changes, max_db_version WHERE site_id=cloudsync_siteid() AND db_version>%d) WHERE payload IS NOT NULL", *db_version); int64_t len = 0; - int rc = database_select_blob_2int(data, sql, blob, &len, new_db_version, new_seq); + int rc = database_select_blob_int(data, sql, blob, &len, new_db_version); *blob_size = (int)len; if (rc != DBRES_OK) return rc; @@ -2904,12 +2901,11 @@ int cloudsync_payload_save (cloudsync_context *data, const char *payload_path, i // retrieve payload char *blob = NULL; - int blob_size = 0, db_version = 0, seq = 0; - int64_t new_db_version = 0, new_seq = 0; - int rc = cloudsync_payload_get(data, &blob, &blob_size, &db_version, &seq, &new_db_version, &new_seq); + int blob_size = 0, db_version = 0; + int64_t new_db_version = 0; + int rc = cloudsync_payload_get(data, &blob, &blob_size, &db_version, &new_db_version); if (rc != DBRES_OK) { if (db_version < 0) return cloudsync_set_error(data, "Unable to retrieve db_version", rc); - else if (seq < 0) return cloudsync_set_error(data, "Unable to retrieve seq", rc); return cloudsync_set_error(data, "Unable to retrieve changes in cloudsync_payload_save", rc); } @@ -2926,18 +2922,6 @@ int cloudsync_payload_save (cloudsync_context *data, const char *payload_path, i return cloudsync_set_error(data, "Unable to write payload to file path", DBRES_IOERR); } - // TODO: dbutils_settings_set_key_value remove context and return error here (in case of error) - // update db_version and seq - char buf[256]; - if (new_db_version != db_version) { - snprintf(buf, sizeof(buf), "%" PRId64, new_db_version); - dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_SEND_DBVERSION, buf); - } - if (new_seq != seq) { - snprintf(buf, sizeof(buf), "%" PRId64, new_seq); - dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_SEND_SEQ, buf); - } - // returns blob size if (size) *size = blob_size; return DBRES_OK; diff --git a/src/cloudsync.h b/src/cloudsync.h index d0718fa..b5919be 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -17,7 +17,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.9.112" +#define CLOUDSYNC_VERSION "0.9.113" #define CLOUDSYNC_MAX_TABLENAME_LEN 512 #define CLOUDSYNC_VALUE_NOTSET -1 @@ -83,7 +83,7 @@ int cloudsync_payload_encode_step (cloudsync_payload_context *payload, clouds int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsync_context *data); char *cloudsync_payload_blob (cloudsync_payload_context *payload, int64_t *blob_size, int64_t *nrows); size_t cloudsync_payload_context_size (size_t *header_size); -int cloudsync_payload_get (cloudsync_context *data, char **blob, int *blob_size, int *db_version, int *seq, int64_t *new_db_version, int64_t *new_seq); +int cloudsync_payload_get (cloudsync_context *data, char **blob, int *blob_size, int *db_version, int64_t *new_db_version); int cloudsync_payload_save (cloudsync_context *data, const char *payload_path, int *blob_size); // available only on Desktop OS (no WASM, no mobile) // CloudSync table context diff --git a/src/database.h b/src/database.h index acf98c6..31b3f7a 100644 --- a/src/database.h +++ b/src/database.h @@ -64,7 +64,7 @@ int database_exec_callback (cloudsync_context *data, const char *sql, database_ int database_select_int (cloudsync_context *data, const char *sql, int64_t *value); int database_select_text (cloudsync_context *data, const char *sql, char **value); int database_select_blob (cloudsync_context *data, const char *sql, char **value, int64_t *value_len); -int database_select_blob_2int (cloudsync_context *data, const char *sql, char **value, int64_t *value_len, int64_t *value2, int64_t *value3); +int database_select_blob_int (cloudsync_context *data, const char *sql, char **value, int64_t *value_len, int64_t *value2); int database_write (cloudsync_context *data, const char *sql, const char **values, DBTYPE types[], int lens[], int count); bool database_table_exists (cloudsync_context *data, const char *table_name, const char *schema); bool database_internal_table_exists (cloudsync_context *data, const char *name); diff --git a/src/jsmn.h b/src/jsmn.h new file mode 100644 index 0000000..dca2bb5 --- /dev/null +++ b/src/jsmn.h @@ -0,0 +1,471 @@ +/* + * MIT License + * + * Copyright (c) 2010 Serge Zaitsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef JSMN_H +#define JSMN_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef JSMN_STATIC +#define JSMN_API static +#else +#define JSMN_API extern +#endif + +/** + * JSON type identifier. Basic types are: + * o Object + * o Array + * o String + * o Other primitive: number, boolean (true/false) or null + */ +typedef enum { + JSMN_UNDEFINED = 0, + JSMN_OBJECT = 1 << 0, + JSMN_ARRAY = 1 << 1, + JSMN_STRING = 1 << 2, + JSMN_PRIMITIVE = 1 << 3 +} jsmntype_t; + +enum jsmnerr { + /* Not enough tokens were provided */ + JSMN_ERROR_NOMEM = -1, + /* Invalid character inside JSON string */ + JSMN_ERROR_INVAL = -2, + /* The string is not a full JSON packet, more bytes expected */ + JSMN_ERROR_PART = -3 +}; + +/** + * JSON token description. + * type type (object, array, string etc.) + * start start position in JSON data string + * end end position in JSON data string + */ +typedef struct jsmntok { + jsmntype_t type; + int start; + int end; + int size; +#ifdef JSMN_PARENT_LINKS + int parent; +#endif +} jsmntok_t; + +/** + * JSON parser. Contains an array of token blocks available. Also stores + * the string being parsed now and current position in that string. + */ +typedef struct jsmn_parser { + unsigned int pos; /* offset in the JSON string */ + unsigned int toknext; /* next token to allocate */ + int toksuper; /* superior token node, e.g. parent object or array */ +} jsmn_parser; + +/** + * Create JSON parser over an array of tokens + */ +JSMN_API void jsmn_init(jsmn_parser *parser); + +/** + * Run JSON parser. It parses a JSON data string into and array of tokens, each + * describing + * a single JSON object. + */ +JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, + jsmntok_t *tokens, const unsigned int num_tokens); + +#ifndef JSMN_HEADER +/** + * Allocates a fresh unused token from the token pool. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, jsmntok_t *tokens, + const size_t num_tokens) { + jsmntok_t *tok; + if (parser->toknext >= num_tokens) { + return NULL; + } + tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; +#ifdef JSMN_PARENT_LINKS + tok->parent = -1; +#endif + return tok; +} + +/** + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, const jsmntype_t type, + const int start, const int end) { + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/** + * Fills next available token with JSON primitive. + */ +static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, + const size_t len, jsmntok_t *tokens, + const size_t num_tokens) { + jsmntok_t *token; + int start; + + start = parser->pos; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* In strict mode primitive must be followed by "," or "}" or "]" */ + case ':': +#endif + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ']': + case '}': + goto found; + default: + /* to quiet a warning from gcc*/ + break; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef JSMN_STRICT + /* In strict mode primitive must be followed by a comma/object/array */ + parser->pos = start; + return JSMN_ERROR_PART; +#endif + +found: + if (tokens == NULL) { + parser->pos--; + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->pos--; + return 0; +} + +/** + * Fills next token with JSON string. + */ +static int jsmn_parse_string(jsmn_parser *parser, const char *js, + const size_t len, jsmntok_t *tokens, + const size_t num_tokens) { + jsmntok_t *token; + + int start = parser->pos; + + /* Skip starting quote */ + parser->pos++; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c = js[parser->pos]; + + /* Quote: end of string */ + if (c == '\"') { + if (tokens == NULL) { + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start + 1, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + return 0; + } + + /* Backslash: Quoted symbol expected */ + if (c == '\\' && parser->pos + 1 < len) { + int i; + parser->pos++; + switch (js[parser->pos]) { + /* Allowed escaped symbols */ + case '\"': + case '/': + case '\\': + case 'b': + case 'f': + case 'r': + case 'n': + case 't': + break; + /* Allows escaped symbol \uXXXX */ + case 'u': + parser->pos++; + for (i = 0; i < 4 && parser->pos < len && js[parser->pos] != '\0'; + i++) { + /* If it isn't a hex character we have an error */ + if (!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */ + (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */ + (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */ + parser->pos = start; + return JSMN_ERROR_INVAL; + } + parser->pos++; + } + parser->pos--; + break; + /* Unexpected symbol */ + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +/** + * Parse JSON string and fill tokens. + */ +JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, + jsmntok_t *tokens, const unsigned int num_tokens) { + int r; + int i; + jsmntok_t *token; + int count = parser->toknext; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c; + jsmntype_t type; + + c = js[parser->pos]; + switch (c) { + case '{': + case '[': + count++; + if (tokens == NULL) { + break; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + return JSMN_ERROR_NOMEM; + } + if (parser->toksuper != -1) { + jsmntok_t *t = &tokens[parser->toksuper]; +#ifdef JSMN_STRICT + /* In strict mode an object or array can't become a key */ + if (t->type == JSMN_OBJECT) { + return JSMN_ERROR_INVAL; + } +#endif + t->size++; +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + } + token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + token->start = parser->pos; + parser->toksuper = parser->toknext - 1; + break; + case '}': + case ']': + if (tokens == NULL) { + break; + } + type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); +#ifdef JSMN_PARENT_LINKS + if (parser->toknext < 1) { + return JSMN_ERROR_INVAL; + } + token = &tokens[parser->toknext - 1]; + for (;;) { + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + token->end = parser->pos + 1; + parser->toksuper = token->parent; + break; + } + if (token->parent == -1) { + if (token->type != type || parser->toksuper == -1) { + return JSMN_ERROR_INVAL; + } + break; + } + token = &tokens[token->parent]; + } +#else + for (i = parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + parser->toksuper = -1; + token->end = parser->pos + 1; + break; + } + } + /* Error if unmatched closing bracket */ + if (i == -1) { + return JSMN_ERROR_INVAL; + } + for (; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + parser->toksuper = i; + break; + } + } +#endif + break; + case '\"': + r = jsmn_parse_string(parser, js, len, tokens, num_tokens); + if (r < 0) { + return r; + } + count++; + if (parser->toksuper != -1 && tokens != NULL) { + tokens[parser->toksuper].size++; + } + break; + case '\t': + case '\r': + case '\n': + case ' ': + break; + case ':': + parser->toksuper = parser->toknext - 1; + break; + case ',': + if (tokens != NULL && parser->toksuper != -1 && + tokens[parser->toksuper].type != JSMN_ARRAY && + tokens[parser->toksuper].type != JSMN_OBJECT) { +#ifdef JSMN_PARENT_LINKS + parser->toksuper = tokens[parser->toksuper].parent; +#else + for (i = parser->toknext - 1; i >= 0; i--) { + if (tokens[i].type == JSMN_ARRAY || tokens[i].type == JSMN_OBJECT) { + if (tokens[i].start != -1 && tokens[i].end == -1) { + parser->toksuper = i; + break; + } + } + } +#endif + } + break; +#ifdef JSMN_STRICT + /* In strict mode primitives are: numbers and booleans */ + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 't': + case 'f': + case 'n': + /* And they must not be keys of the object */ + if (tokens != NULL && parser->toksuper != -1) { + const jsmntok_t *t = &tokens[parser->toksuper]; + if (t->type == JSMN_OBJECT || + (t->type == JSMN_STRING && t->size != 0)) { + return JSMN_ERROR_INVAL; + } + } +#else + /* In non-strict mode every unquoted value is a primitive */ + default: +#endif + r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); + if (r < 0) { + return r; + } + count++; + if (parser->toksuper != -1 && tokens != NULL) { + tokens[parser->toksuper].size++; + } + break; + +#ifdef JSMN_STRICT + /* Unexpected char in strict mode */ + default: + return JSMN_ERROR_INVAL; +#endif + } + } + + if (tokens != NULL) { + for (i = parser->toknext - 1; i >= 0; i--) { + /* Unmatched opened object or array */ + if (tokens[i].start != -1 && tokens[i].end == -1) { + return JSMN_ERROR_PART; + } + } + } + + return count; +} + +/** + * Creates a new parser based over a given buffer with an array of tokens + * available. + */ +JSMN_API void jsmn_init(jsmn_parser *parser) { + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} + +#endif /* JSMN_HEADER */ + +#ifdef __cplusplus +} +#endif + +#endif /* JSMN_H */ diff --git a/src/network.c b/src/network.c index 2a3c1c7..f357297 100644 --- a/src/network.c +++ b/src/network.c @@ -9,6 +9,7 @@ #include #include +#include #include "network.h" #include "utils.h" @@ -16,6 +17,9 @@ #include "cloudsync.h" #include "network_private.h" +#define JSMN_STATIC +#include "jsmn.h" + #ifndef SQLITE_WASM_EXTRA_INIT #ifndef CLOUDSYNC_OMIT_CURL #include "curl/curl.h" @@ -50,6 +54,7 @@ struct network_data { char *check_endpoint; char *upload_endpoint; char *apply_endpoint; + char *status_endpoint; }; typedef struct { @@ -80,27 +85,30 @@ char *network_data_get_siteid (network_data *data) { return data->site_id; } -bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply) { +bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply, char *status) { // sanity check if (!check || !upload) return false; - + // always free previous owned pointers if (data->authentication) cloudsync_memory_free(data->authentication); if (data->check_endpoint) cloudsync_memory_free(data->check_endpoint); if (data->upload_endpoint) cloudsync_memory_free(data->upload_endpoint); if (data->apply_endpoint) cloudsync_memory_free(data->apply_endpoint); + if (data->status_endpoint) cloudsync_memory_free(data->status_endpoint); // clear pointers data->authentication = NULL; data->check_endpoint = NULL; data->upload_endpoint = NULL; data->apply_endpoint = NULL; + data->status_endpoint = NULL; // make a copy of the new endpoints char *auth_copy = NULL; char *check_copy = NULL; char *upload_copy = NULL; char *apply_copy = NULL; + char *status_copy = NULL; // auth is optional if (auth) { @@ -109,24 +117,29 @@ bool network_data_set_endpoints (network_data *data, char *auth, char *check, ch } check_copy = cloudsync_string_dup(check); if (!check_copy) goto abort_endpoints; - + upload_copy = cloudsync_string_dup(upload); if (!upload_copy) goto abort_endpoints; - + apply_copy = cloudsync_string_dup(apply); if (!apply_copy) goto abort_endpoints; + status_copy = cloudsync_string_dup(status); + if (!status_copy) goto abort_endpoints; + data->authentication = auth_copy; data->check_endpoint = check_copy; data->upload_endpoint = upload_copy; data->apply_endpoint = apply_copy; + data->status_endpoint = status_copy; return true; - + abort_endpoints: if (auth_copy) cloudsync_memory_free(auth_copy); if (check_copy) cloudsync_memory_free(check_copy); if (upload_copy) cloudsync_memory_free(upload_copy); if (apply_copy) cloudsync_memory_free(apply_copy); + if (status_copy) cloudsync_memory_free(status_copy); return false; } @@ -137,6 +150,7 @@ void network_data_free (network_data *data) { if (data->check_endpoint) cloudsync_memory_free(data->check_endpoint); if (data->upload_endpoint) cloudsync_memory_free(data->upload_endpoint); if (data->apply_endpoint) cloudsync_memory_free(data->apply_endpoint); + if (data->status_endpoint) cloudsync_memory_free(data->status_endpoint); cloudsync_memory_free(data); } @@ -414,6 +428,113 @@ char *network_authentication_token (const char *key, const char *value) { return buffer; } +// MARK: - JSON helpers (jsmn) - + +#define JSMN_MAX_TOKENS 64 + +static bool jsmn_token_eq(const char *json, const jsmntok_t *tok, const char *s) { + return (tok->type == JSMN_STRING && + (int)strlen(s) == tok->end - tok->start && + strncmp(json + tok->start, s, tok->end - tok->start) == 0); +} + +static int jsmn_find_key(const char *json, const jsmntok_t *tokens, int ntokens, const char *key) { + for (int i = 1; i + 1 < ntokens; i++) { + if (jsmn_token_eq(json, &tokens[i], key)) return i; + } + return -1; +} + +static char *json_unescape_string(const char *src, int len) { + char *out = cloudsync_memory_zeroalloc(len + 1); + if (!out) return NULL; + + int j = 0; + for (int i = 0; i < len; ) { + if (src[i] == '\\' && i + 1 < len) { + char c = src[i + 1]; + if (c == '"' || c == '\\' || c == '/') { out[j++] = c; i += 2; } + else if (c == 'n') { out[j++] = '\n'; i += 2; } + else if (c == 'r') { out[j++] = '\r'; i += 2; } + else if (c == 't') { out[j++] = '\t'; i += 2; } + else if (c == 'b') { out[j++] = '\b'; i += 2; } + else if (c == 'f') { out[j++] = '\f'; i += 2; } + else if (c == 'u' && i + 5 < len) { + unsigned int cp = 0; + for (int k = 0; k < 4; k++) { + char h = src[i + 2 + k]; + cp <<= 4; + if (h >= '0' && h <= '9') cp |= h - '0'; + else if (h >= 'a' && h <= 'f') cp |= 10 + h - 'a'; + else if (h >= 'A' && h <= 'F') cp |= 10 + h - 'A'; + } + if (cp < 0x80) { out[j++] = (char)cp; } + else { out[j++] = '?'; } // non-ASCII: replace + i += 6; + } + else { out[j++] = src[i]; i++; } + } else { + out[j++] = src[i]; i++; + } + } + out[j] = '\0'; + return out; +} + +static char *json_extract_string(const char *json, size_t json_len, const char *key) { + if (!json || json_len == 0 || !key) return NULL; + + jsmn_parser parser; + jsmntok_t tokens[JSMN_MAX_TOKENS]; + jsmn_init(&parser); + int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS); + if (ntokens < 1) return NULL; + + int i = jsmn_find_key(json, tokens, ntokens, key); + if (i < 0 || i + 1 >= ntokens) return NULL; + + jsmntok_t *val = &tokens[i + 1]; + if (val->type != JSMN_STRING) return NULL; + + return json_unescape_string(json + val->start, val->end - val->start); +} + +static int64_t json_extract_int(const char *json, size_t json_len, const char *key, int64_t default_value) { + if (!json || json_len == 0 || !key) return default_value; + + jsmn_parser parser; + jsmntok_t tokens[JSMN_MAX_TOKENS]; + jsmn_init(&parser); + int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS); + if (ntokens < 1 || tokens[0].type != JSMN_OBJECT) return default_value; + + int i = jsmn_find_key(json, tokens, ntokens, key); + if (i < 0 || i + 1 >= ntokens) return default_value; + + jsmntok_t *val = &tokens[i + 1]; + if (val->type != JSMN_PRIMITIVE) return default_value; + + return strtoll(json + val->start, NULL, 10); +} + +static int json_extract_array_size(const char *json, size_t json_len, const char *key) { + if (!json || json_len == 0 || !key) return -1; + + jsmn_parser parser; + jsmntok_t tokens[JSMN_MAX_TOKENS]; + jsmn_init(&parser); + int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS); + if (ntokens < 1 || tokens[0].type != JSMN_OBJECT) return -1; + + int i = jsmn_find_key(json, tokens, ntokens, key); + if (i < 0 || i + 1 >= ntokens) return -1; + + jsmntok_t *val = &tokens[i + 1]; + if (val->type != JSMN_ARRAY) return -1; + + return val->size; +} + int network_extract_query_param (const char *query, const char *key, char *output, size_t output_size) { if (!query || !key || !output || output_size == 0) { return -1; // Invalid input @@ -472,6 +593,7 @@ bool network_compute_endpoints (sqlite3_context *context, network_data *data, co char *check_endpoint = NULL; char *upload_endpoint = NULL; char *apply_endpoint = NULL; + char *status_endpoint = NULL; char *conn_string_https = NULL; @@ -555,12 +677,14 @@ bool network_compute_endpoints (sqlite3_context *context, network_data *data, co check_endpoint = (char *)cloudsync_memory_zeroalloc(requested); upload_endpoint = (char *)cloudsync_memory_zeroalloc(requested); apply_endpoint = (char *)cloudsync_memory_zeroalloc(requested); + status_endpoint = (char *)cloudsync_memory_zeroalloc(requested); - if ((!upload_endpoint) || (!check_endpoint) || (!apply_endpoint)) goto finalize; + if ((!upload_endpoint) || (!check_endpoint) || (!apply_endpoint) || (!status_endpoint)) goto finalize; snprintf(check_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_CHECK); snprintf(upload_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_UPLOAD); snprintf(apply_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_APPLY); + snprintf(status_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_STATUS); result = true; @@ -580,6 +704,7 @@ bool network_compute_endpoints (sqlite3_context *context, network_data *data, co if (check_endpoint) cloudsync_memory_free(check_endpoint); if (upload_endpoint) cloudsync_memory_free(upload_endpoint); if (apply_endpoint) cloudsync_memory_free(apply_endpoint); + if (status_endpoint) cloudsync_memory_free(status_endpoint); } if (result) { @@ -596,6 +721,9 @@ bool network_compute_endpoints (sqlite3_context *context, network_data *data, co if (data->apply_endpoint) cloudsync_memory_free(data->apply_endpoint); data->apply_endpoint = apply_endpoint; + + if (data->status_endpoint) cloudsync_memory_free(data->status_endpoint); + data->status_endpoint = status_endpoint; } // cleanup memory @@ -732,12 +860,52 @@ void cloudsync_network_set_apikey (sqlite3_context *context, int argc, sqlite3_v (result) ? sqlite3_result_int(context, SQLITE_OK) : sqlite3_result_error_code(context, SQLITE_NOMEM); } +// Returns a malloc'd JSON array string like '["tasks","users"]', or NULL on error/no results. +// Caller must free with cloudsync_memory_free. +static char *network_get_affected_tables(sqlite3 *db, int64_t since_db_version) { + sqlite3_stmt *stmt = NULL; + int rc = sqlite3_prepare_v2(db, + "SELECT json_group_array(DISTINCT tbl) FROM cloudsync_changes WHERE db_version > ?", + -1, &stmt, NULL); + if (rc != SQLITE_OK) return NULL; + sqlite3_bind_int64(stmt, 1, since_db_version); + + char *result = NULL; + if (sqlite3_step(stmt) == SQLITE_ROW) { + const char *json = (const char *)sqlite3_column_text(stmt, 0); + if (json) result = cloudsync_string_dup(json); + } + sqlite3_finalize(stmt); + return result; +} + +// MARK: - Sync result + +typedef struct { + int64_t server_version; // lastOptimisticVersion + int64_t local_version; // new_db_version (max local) + const char *status; // computed status string + int rows_received; // rows from check + char *tables_json; // JSON array of affected table names, caller must cloudsync_memory_free +} sync_result; + +static const char *network_compute_status(int64_t last_optimistic, int64_t last_confirmed, + int gaps_size, int64_t local_version) { + if (last_optimistic < 0 || last_confirmed < 0) return "error"; + if (gaps_size > 0 || last_optimistic < local_version) return "out-of-sync"; + if (last_optimistic == last_confirmed) return "synced"; + return "syncing"; +} + // MARK: - void cloudsync_network_has_unsent_changes (sqlite3_context *context, int argc, sqlite3_value **argv) { sqlite3 *db = sqlite3_context_db_handle(context); cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context); + network_data *netdata = (network_data *)cloudsync_auxdata(data); + if (!netdata) {sqlite3_result_error(context, "Unable to retrieve CloudSync network context.", -1); return;} + char *sql = "SELECT max(db_version) FROM cloudsync_changes WHERE site_id == (SELECT site_id FROM cloudsync_site_id WHERE rowid=0)"; int64_t last_local_change = 0; int rc = database_select_int(data, sql, &last_local_change); @@ -752,11 +920,23 @@ void cloudsync_network_has_unsent_changes (sqlite3_context *context, int argc, s return; } - int sent_db_version = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_SEND_DBVERSION); - sqlite3_result_int(context, (sent_db_version < last_local_change)); + NETWORK_RESULT res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false, NULL, CLOUDSYNC_HEADER_SQLITECLOUD); + + int64_t last_optimistic_version = -1; + + if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) { + last_optimistic_version = json_extract_int(res.buffer, res.blen, "lastOptimisticVersion", -1); + } else if (res.code != CLOUDSYNC_NETWORK_OK) { + network_result_to_sqlite_error(context, res, "unable to retrieve current status from remote host."); + network_result_cleanup(&res); + return; + } + + network_result_cleanup(&res); + sqlite3_result_int(context, (last_optimistic_version >= 0 && last_optimistic_version < last_local_change)); } -int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, sqlite3_value **argv) { +int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, sqlite3_value **argv, sync_result *out) { DEBUG_FUNCTION("cloudsync_network_send_changes"); // retrieve global context @@ -767,72 +947,123 @@ int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, // retrieve payload char *blob = NULL; - int blob_size = 0, db_version = 0, seq = 0; - int64_t new_db_version = 0, new_seq = 0; - int rc = cloudsync_payload_get(data, &blob, &blob_size, &db_version, &seq, &new_db_version, &new_seq); + int blob_size = 0, db_version = 0; + int64_t new_db_version = 0; + int rc = cloudsync_payload_get(data, &blob, &blob_size, &db_version, &new_db_version); if (rc != SQLITE_OK) { if (db_version < 0) sqlite3_result_error(context, "Unable to retrieve db_version.", -1); - else if (seq < 0) sqlite3_result_error(context, "Unable to retrieve seq.", -1); else sqlite3_result_error(context, "Unable to retrieve changes in cloudsync_network_send_changes", -1); return rc; } - - // exit if there is no data to send - if (blob == NULL || blob_size == 0) return SQLITE_OK; - NETWORK_RESULT res = network_receive_buffer(netdata, netdata->upload_endpoint, netdata->authentication, true, false, NULL, CLOUDSYNC_HEADER_SQLITECLOUD); - if (res.code != CLOUDSYNC_NETWORK_BUFFER) { - cloudsync_memory_free(blob); - network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to receive upload URL"); - network_result_cleanup(&res); - return SQLITE_ERROR; + // Case 1: empty local db — no payload and no server state, skip network entirely + if ((blob == NULL || blob_size == 0) && db_version == 0) { + if (out) { + out->server_version = 0; + out->local_version = 0; + out->status = network_compute_status(0, 0, 0, 0); + } + return SQLITE_OK; } - - const char *s3_url = res.buffer; - bool sent = network_send_buffer(netdata, s3_url, NULL, blob, blob_size); - cloudsync_memory_free(blob); - if (sent == false) { - network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to upload BLOB changes to remote host."); + + NETWORK_RESULT res; + if (blob != NULL && blob_size > 0) { + // there is data to send + res = network_receive_buffer(netdata, netdata->upload_endpoint, netdata->authentication, true, false, NULL, CLOUDSYNC_HEADER_SQLITECLOUD); + if (res.code != CLOUDSYNC_NETWORK_BUFFER) { + cloudsync_memory_free(blob); + network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to receive upload URL"); + network_result_cleanup(&res); + return SQLITE_ERROR; + } + + char *s3_url = json_extract_string(res.buffer, res.blen, "url"); + if (!s3_url) { + cloudsync_memory_free(blob); + sqlite3_result_error(context, "cloudsync_network_send_changes: missing 'url' in upload response.", -1); + network_result_cleanup(&res); + return SQLITE_ERROR; + } + bool sent = network_send_buffer(netdata, s3_url, NULL, blob, blob_size); + cloudsync_memory_free(blob); + if (sent == false) { + cloudsync_memory_free(s3_url); + network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to upload BLOB changes to remote host."); + network_result_cleanup(&res); + return SQLITE_ERROR; + } + + int db_version_min = db_version+1; + int db_version_max = (int)new_db_version; + if (db_version_min > db_version_max) db_version_min = db_version_max; + char json_payload[4096]; + snprintf(json_payload, sizeof(json_payload), "{\"url\":\"%s\", \"dbVersionMin\":%d, \"dbVersionMax\":%d}", s3_url, db_version_min, db_version_max); + cloudsync_memory_free(s3_url); + + // free res network_result_cleanup(&res); - return SQLITE_ERROR; + + // notify remote host that we succesfully uploaded changes + res = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, json_payload, CLOUDSYNC_HEADER_SQLITECLOUD); + } else { + // there is no data to send, just check the status to update the db_version value in settings and to reply the status + new_db_version = db_version; + res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false, NULL, CLOUDSYNC_HEADER_SQLITECLOUD); } - - char json_payload[2024]; - snprintf(json_payload, sizeof(json_payload), "{\"url\":\"%s\", \"dbVersionMin\":%d, \"dbVersionMax\":%lld}", s3_url, db_version, (long long)new_db_version); - - // free res - network_result_cleanup(&res); - - // notify remote host that we succesfully uploaded changes - res = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, json_payload, CLOUDSYNC_HEADER_SQLITECLOUD); - if (res.code != CLOUDSYNC_NETWORK_OK) { + + int64_t last_optimistic_version = -1; + int64_t last_confirmed_version = -1; + int gaps_size = -1; + + if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) { + last_optimistic_version = json_extract_int(res.buffer, res.blen, "lastOptimisticVersion", -1); + last_confirmed_version = json_extract_int(res.buffer, res.blen, "lastConfirmedVersion", -1); + gaps_size = json_extract_array_size(res.buffer, res.blen, "gaps"); + if (gaps_size < 0) gaps_size = 0; + } else if (res.code != CLOUDSYNC_NETWORK_OK) { network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to notify BLOB upload to remote host."); network_result_cleanup(&res); return SQLITE_ERROR; } - - // update db_version and seq + + // update db_version in settings char buf[256]; - if (new_db_version != db_version) { + if (last_optimistic_version >= 0) { + if (last_optimistic_version != db_version) { + snprintf(buf, sizeof(buf), "%" PRId64, last_optimistic_version); + dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_SEND_DBVERSION, buf); + } + } else if (new_db_version != db_version) { snprintf(buf, sizeof(buf), "%" PRId64, new_db_version); dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_SEND_DBVERSION, buf); } - if (new_seq != seq) { - snprintf(buf, sizeof(buf), "%" PRId64, new_seq); - dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_SEND_SEQ, buf); + + // populate sync result + if (out) { + out->server_version = last_optimistic_version; + out->local_version = new_db_version; + out->status = network_compute_status(last_optimistic_version, last_confirmed_version, gaps_size, new_db_version); } - + network_result_cleanup(&res); return SQLITE_OK; } void cloudsync_network_send_changes (sqlite3_context *context, int argc, sqlite3_value **argv) { DEBUG_FUNCTION("cloudsync_network_send_changes"); - - cloudsync_network_send_changes_internal(context, argc, argv); + + sync_result sr = {-1, 0, NULL, 0, NULL}; + int rc = cloudsync_network_send_changes_internal(context, argc, argv, &sr); + if (rc != SQLITE_OK) return; + + char buf[256]; + snprintf(buf, sizeof(buf), + "{\"send\":{\"status\":\"%s\",\"localVersion\":%" PRId64 ",\"serverVersion\":%" PRId64 "}}", + sr.status ? sr.status : "error", sr.local_version, sr.server_version); + sqlite3_result_text(context, buf, -1, SQLITE_TRANSIENT); } -int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows) { +int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync_result *out) { cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context); network_data *netdata = (network_data *)cloudsync_auxdata(data); if (!netdata) {sqlite3_result_error(context, "Unable to retrieve CloudSync network context.", -1); return -1;} @@ -843,6 +1074,9 @@ int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows) { int seq = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_SEQ); if (seq<0) {sqlite3_result_error(context, "Unable to retrieve seq.", -1); return -1;} + // Capture local db_version before download so we can query cloudsync_changes afterwards + int64_t prev_dbv = cloudsync_dbversion(data); + char json_payload[2024]; snprintf(json_payload, sizeof(json_payload), "{\"dbVersion\":%lld, \"seq\":%d}", (long long)db_version, seq); @@ -850,30 +1084,53 @@ int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows) { NETWORK_RESULT result = network_receive_buffer(netdata, netdata->check_endpoint, netdata->authentication, true, true, json_payload, CLOUDSYNC_HEADER_SQLITECLOUD); int rc = SQLITE_OK; if (result.code == CLOUDSYNC_NETWORK_BUFFER) { - rc = network_download_changes(context, result.buffer, pnrows); + char *download_url = json_extract_string(result.buffer, result.blen, "url"); + if (!download_url) { + sqlite3_result_error(context, "cloudsync_network_check_changes: missing 'url' in check response.", -1); + network_result_cleanup(&result); + return SQLITE_ERROR; + } + rc = network_download_changes(context, download_url, pnrows); + cloudsync_memory_free(download_url); } else { rc = network_set_sqlite_result(context, &result); } - + + if (out && pnrows) out->rows_received = *pnrows; + + // Query cloudsync_changes for affected tables after successful download + if (out && rc == SQLITE_OK && pnrows && *pnrows > 0) { + sqlite3 *db = (sqlite3 *)cloudsync_db(data); + out->tables_json = network_get_affected_tables(db, prev_dbv); + } + network_result_cleanup(&result); return rc; } void cloudsync_network_sync (sqlite3_context *context, int wait_ms, int max_retries) { - int rc = cloudsync_network_send_changes_internal(context, 0, NULL); + sync_result sr = {-1, 0, NULL, 0, NULL}; + int rc = cloudsync_network_send_changes_internal(context, 0, NULL, &sr); if (rc != SQLITE_OK) return; - + int ntries = 0; int nrows = 0; while (ntries < max_retries) { if (ntries > 0) sqlite3_sleep(wait_ms); - rc = cloudsync_network_check_internal(context, &nrows); + if (sr.tables_json) { cloudsync_memory_free(sr.tables_json); sr.tables_json = NULL; } + rc = cloudsync_network_check_internal(context, &nrows, &sr); if (rc == SQLITE_OK && nrows > 0) break; ntries++; } - - sqlite3_result_error_code(context, (nrows == -1) ? SQLITE_ERROR : SQLITE_OK); - if (nrows >= 0) sqlite3_result_int(context, nrows); + if (rc != SQLITE_OK) { if (sr.tables_json) cloudsync_memory_free(sr.tables_json); return; } + + const char *tables = sr.tables_json ? sr.tables_json : "[]"; + char *buf = cloudsync_memory_mprintf( + "{\"send\":{\"status\":\"%s\",\"localVersion\":%" PRId64 ",\"serverVersion\":%" PRId64 "}," + "\"receive\":{\"rows\":%d,\"tables\":%s}}", + sr.status ? sr.status : "error", sr.local_version, sr.server_version, nrows, tables); + sqlite3_result_text(context, buf, -1, cloudsync_memory_free); + if (sr.tables_json) cloudsync_memory_free(sr.tables_json); } void cloudsync_network_sync0 (sqlite3_context *context, int argc, sqlite3_value **argv) { @@ -895,12 +1152,16 @@ void cloudsync_network_sync2 (sqlite3_context *context, int argc, sqlite3_value void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite3_value **argv) { DEBUG_FUNCTION("cloudsync_network_check_changes"); - + + sync_result sr = {-1, 0, NULL, 0, NULL}; int nrows = 0; - int rc = cloudsync_network_check_internal(context, &nrows); - - // returns number of applied rows - if (rc == SQLITE_OK) sqlite3_result_int(context, nrows); + int rc = cloudsync_network_check_internal(context, &nrows, &sr); + if (rc != SQLITE_OK) { if (sr.tables_json) cloudsync_memory_free(sr.tables_json); return; } + + const char *tables = sr.tables_json ? sr.tables_json : "[]"; + char *buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s}}", nrows, tables); + sqlite3_result_text(context, buf, -1, cloudsync_memory_free); + if (sr.tables_json) cloudsync_memory_free(sr.tables_json); } void cloudsync_network_reset_sync_version (sqlite3_context *context, int argc, sqlite3_value **argv) { @@ -1000,6 +1261,21 @@ void cloudsync_network_logout (sqlite3_context *context, int argc, sqlite3_value cloudsync_memory_free(errmsg); } +void cloudsync_network_status (sqlite3_context *context, int argc, sqlite3_value **argv) { + DEBUG_FUNCTION("cloudsync_network_status"); + + cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context); + network_data *netdata = (network_data *)cloudsync_auxdata(data); + if (!netdata) { + sqlite3_result_error(context, "Unable to retrieve CloudSync network context.", -1); + return; + } + + NETWORK_RESULT res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false, NULL, CLOUDSYNC_HEADER_SQLITECLOUD); + network_set_sqlite_result(context, &res); + network_result_cleanup(&res); +} + // MARK: - int cloudsync_network_register (sqlite3 *db, char **pzErrMsg, void *ctx) { @@ -1038,7 +1314,10 @@ int cloudsync_network_register (sqlite3 *db, char **pzErrMsg, void *ctx) { rc = sqlite3_create_function(db, "cloudsync_network_logout", 0, DEFAULT_FLAGS, ctx, cloudsync_network_logout, NULL, NULL); if (rc != SQLITE_OK) return rc; - + + rc = sqlite3_create_function(db, "cloudsync_network_status", 0, DEFAULT_FLAGS, ctx, cloudsync_network_status, NULL, NULL); + if (rc != SQLITE_OK) return rc; + cleanup: if ((rc != SQLITE_OK) && (pzErrMsg)) { *pzErrMsg = sqlite3_mprintf("Error creating function in cloudsync_network_register: %s", sqlite3_errmsg(db)); diff --git a/src/network.m b/src/network.m index fa4c4ea..bfd7558 100644 --- a/src/network.m +++ b/src/network.m @@ -63,8 +63,9 @@ bool network_compute_endpoints (sqlite3_context *context, network_data *data, co NSString *check_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_CHECK]; NSString *upload_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_UPLOAD]; NSString *apply_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_APPLY]; + NSString *status_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_STATUS]; - return network_data_set_endpoints(data, (char *)authentication.UTF8String, (char *)check_endpoint.UTF8String, (char *)upload_endpoint.UTF8String, (char *)apply_endpoint.UTF8String); + return network_data_set_endpoints(data, (char *)authentication.UTF8String, (char *)check_endpoint.UTF8String, (char *)upload_endpoint.UTF8String, (char *)apply_endpoint.UTF8String, (char *)status_endpoint.UTF8String); } bool network_send_buffer(network_data *data, const char *endpoint, const char *authentication, const void *blob, int blob_size) { diff --git a/src/network_private.h b/src/network_private.h index 7583b66..1af7758 100644 --- a/src/network_private.h +++ b/src/network_private.h @@ -12,6 +12,7 @@ #define CLOUDSYNC_ENDPOINT_UPLOAD "upload" #define CLOUDSYNC_ENDPOINT_CHECK "check" #define CLOUDSYNC_ENDPOINT_APPLY "apply" +#define CLOUDSYNC_ENDPOINT_STATUS "status" #define CLOUDSYNC_DEFAULT_ENDPOINT_PORT "443" #define CLOUDSYNC_HEADER_SQLITECLOUD "Accept: sqlc/plain" @@ -30,7 +31,7 @@ typedef struct { } NETWORK_RESULT; char *network_data_get_siteid (network_data *data); -bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply); +bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply, char *status); bool network_compute_endpoints (sqlite3_context *context, network_data *data, const char *conn_string); bool network_send_buffer(network_data *data, const char *endpoint, const char *authentication, const void *blob, int blob_size); diff --git a/src/postgresql/database_postgresql.c b/src/postgresql/database_postgresql.c index 70bc4e1..7ac0149 100644 --- a/src/postgresql/database_postgresql.c +++ b/src/postgresql/database_postgresql.c @@ -704,27 +704,26 @@ int database_select1_value (cloudsync_context *data, const char *sql, char **ptr return rc; } -int database_select3_values (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2, int64_t *value3) { +int database_select2_values (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2) { cloudsync_reset_error(data); // init values *value = NULL; *value2 = 0; - *value3 = 0; *len = 0; int rc = SPI_execute(sql, true, 0); if (rc < 0) { - rc = cloudsync_set_error(data, "SPI_execute failed in database_select3_values", DBRES_ERROR); + rc = cloudsync_set_error(data, "SPI_execute failed in database_select2_values", DBRES_ERROR); goto cleanup; } if (!SPI_tuptable || !SPI_tuptable->tupdesc) { - rc = cloudsync_set_error(data, "No result table in database_select3_values", DBRES_ERROR); + rc = cloudsync_set_error(data, "No result table in database_select2_values", DBRES_ERROR); goto cleanup; } - if (SPI_tuptable->tupdesc->natts < 3) { - rc = cloudsync_set_error(data, "Result has fewer than 3 columns in database_select3_values", DBRES_ERROR); + if (SPI_tuptable->tupdesc->natts < 2) { + rc = cloudsync_set_error(data, "Result has fewer than 2 columns in database_select2_values", DBRES_ERROR); goto cleanup; } if (SPI_processed == 0) { @@ -782,17 +781,6 @@ int database_select3_values (cloudsync_context *data, const char *sql, char **va } } - // Third column - int - Datum datum3 = SPI_getbinval(tuple, SPI_tuptable->tupdesc, 3, &isnull); - if (!isnull) { - Oid typeid = SPI_gettypeid(SPI_tuptable->tupdesc, 3); - if (typeid == INT8OID) { - *value3 = DatumGetInt64(datum3); - } else if (typeid == INT4OID) { - *value3 = (int64_t)DatumGetInt32(datum3); - } - } - rc = DBRES_OK; cleanup: @@ -1121,8 +1109,8 @@ int database_select_blob (cloudsync_context *data, const char *sql, char **value return database_select1_value(data, sql, value, len, DBTYPE_BLOB); } -int database_select_blob_2int (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2, int64_t *value3) { - return database_select3_values(data, sql, value, len, value2, value3); +int database_select_blob_int (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2) { + return database_select2_values(data, sql, value, len, value2); } int database_cleanup (cloudsync_context *data) { diff --git a/src/sqlite/database_sqlite.c b/src/sqlite/database_sqlite.c index b1de7ad..d7ace3d 100644 --- a/src/sqlite/database_sqlite.c +++ b/src/sqlite/database_sqlite.c @@ -440,21 +440,20 @@ static int database_select1_value (cloudsync_context *data, const char *sql, cha return rc; } -static int database_select3_values (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2, int64_t *value3) { +static int database_select2_values (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2) { sqlite3 *db = (sqlite3 *)cloudsync_db(data); // init values and sanity check expected_type *value = NULL; *value2 = 0; - *value3 = 0; *len = 0; sqlite3_stmt *vm = NULL; int rc = sqlite3_prepare_v2((sqlite3 *)db, sql, -1, &vm, NULL); if (rc != SQLITE_OK) goto cleanup_select; - // ensure at least one column - if (sqlite3_column_count(vm) < 3) {rc = SQLITE_MISMATCH; goto cleanup_select;} + // ensure column count + if (sqlite3_column_count(vm) < 2) {rc = SQLITE_MISMATCH; goto cleanup_select;} rc = sqlite3_step(vm); if (rc == SQLITE_DONE) {rc = SQLITE_OK; goto cleanup_select;} // no rows OK @@ -463,7 +462,6 @@ static int database_select3_values (cloudsync_context *data, const char *sql, ch // sanity check column types if (sqlite3_column_type(vm, 0) != SQLITE_BLOB) {rc = SQLITE_MISMATCH; goto cleanup_select;} if (sqlite3_column_type(vm, 1) != SQLITE_INTEGER) {rc = SQLITE_MISMATCH; goto cleanup_select;} - if (sqlite3_column_type(vm, 2) != SQLITE_INTEGER) {rc = SQLITE_MISMATCH; goto cleanup_select;} // 1st column is BLOB const void *blob = (const void *)sqlite3_column_blob(vm, 0); @@ -477,9 +475,8 @@ static int database_select3_values (cloudsync_context *data, const char *sql, ch *len = blob_len; } - // 2nd and 3rd columns are INTEGERS + // 2nd column is INTEGER *value2 = (int64_t)sqlite3_column_int64(vm, 1); - *value3 = (int64_t)sqlite3_column_int64(vm, 2); rc = SQLITE_OK; @@ -574,8 +571,8 @@ int database_select_blob (cloudsync_context *data, const char *sql, char **value return database_select1_value(data, sql, value, len, DBTYPE_BLOB); } -int database_select_blob_2int (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2, int64_t *value3) { - return database_select3_values(data, sql, value, len, value2, value3); +int database_select_blob_int (cloudsync_context *data, const char *sql, char **value, int64_t *len, int64_t *value2) { + return database_select2_values(data, sql, value, len, value2); } const char *database_errmsg (cloudsync_context *data) { diff --git a/test/integration.c b/test/integration.c index 75a65e5..f3d0317 100644 --- a/test/integration.c +++ b/test/integration.c @@ -41,7 +41,7 @@ #define TERMINATE if (db) { db_exec(db, "SELECT cloudsync_terminate();"); } #define ABORT_TEST abort_test: ERROR_MSG TERMINATE if (db) sqlite3_close(db); return rc; -typedef enum { PRINT, NOPRINT, INTGR, GT0 } expected_type; +typedef enum { PRINT, NOPRINT, INTGR, GT0, STR } expected_type; typedef struct { expected_type type; @@ -87,6 +87,15 @@ static int callback(void *data, int argc, char **argv, char **names) { } else goto multiple_columns; break; + case STR: + if(argc == 1){ + if(!argv[0] || strcmp(argv[0], expect->value.s) != 0){ + printf("Error: expected from %s: \"%s\", got \"%s\"\n", names[0], expect->value.s, argv[0] ? argv[0] : "NULL"); + return SQLITE_ERROR; + } + } else goto multiple_columns; + break; + default: printf("Error: unknown expect type\n"); return SQLITE_ERROR; @@ -136,6 +145,16 @@ int db_expect_gt0 (sqlite3 *db, const char *sql) { return rc; } +int db_expect_str (sqlite3 *db, const char *sql, const char *expect) { + expected_t data; + data.type = STR; + data.value.s = expect; + + int rc = sqlite3_exec(db, sql, callback, &data, NULL); + if (rc != SQLITE_OK) printf("Error while executing %s: %s\n", sql, sqlite3_errmsg(db)); + return rc; +} + int open_load_ext(const char *db_path, sqlite3 **out_db) { sqlite3 *db = NULL; int rc = sqlite3_open(db_path, &db); @@ -224,7 +243,7 @@ int test_init (const char *db_path, int init) { snprintf(sql, sizeof(sql), "INSERT INTO users (id, name) VALUES ('%s', '%s');", value, value); rc = db_exec(db, sql); RCHECK rc = db_expect_int(db, "SELECT COUNT(*) as count FROM users;", 1); RCHECK - rc = db_expect_gt0(db, "SELECT cloudsync_network_sync(250,10);"); RCHECK + rc = db_expect_gt0(db, "SELECT cloudsync_network_sync(250,10) ->> '$.receive.rows';"); RCHECK rc = db_expect_gt0(db, "SELECT COUNT(*) as count FROM users;"); RCHECK rc = db_expect_gt0(db, "SELECT COUNT(*) as count FROM activities;"); RCHECK rc = db_expect_int(db, "SELECT COUNT(*) as count FROM workouts;", 0); RCHECK @@ -305,7 +324,7 @@ int test_enable_disable(const char *db_path) { // init network with connection string + apikey rc = db_exec(db2, network_init); RCHECK - rc = db_expect_gt0(db2, "SELECT cloudsync_network_sync(250,10);"); RCHECK + rc = db_expect_gt0(db2, "SELECT cloudsync_network_sync(250,10) ->> '$.receive.rows';"); RCHECK snprintf(sql, sizeof(sql), "SELECT COUNT(*) FROM users WHERE name='%s';", value); rc = db_expect_int(db2, sql, 0); RCHECK diff --git a/test/unit.c b/test/unit.c index e9131dc..1caba38 100644 --- a/test/unit.c +++ b/test/unit.c @@ -1926,8 +1926,8 @@ bool do_test_dbutils (void) { char *site_id_blob; int64_t site_id_blob_size; - int64_t dbver1, seq1; - rc = database_select_blob_2int(data, "SELECT cloudsync_siteid(), cloudsync_db_version(), cloudsync_seq();", &site_id_blob, &site_id_blob_size, &dbver1, &seq1); + int64_t dbver1; + rc = database_select_blob_int(data, "SELECT cloudsync_siteid(), cloudsync_db_version();", &site_id_blob, &site_id_blob_size, &dbver1); if (rc != SQLITE_OK || site_id_blob == NULL ||dbver1 != db_version) goto finalize; cloudsync_memory_free(site_id_blob); From ee8d87ac281f92c96da1ce725c8788a58cb25c34 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Mon, 9 Mar 2026 10:31:28 -0600 Subject: [PATCH 04/16] Feat/network support for multi org cloudsync (#11) * feat(network)!: replace URL connection string with JSON in cloudsync_network_init Breaking change: cloudsync_network_init now accepts a JSON object instead of a URL string. The JSON format supports multi-org CloudSync by adding projectID and organizationID fields, and inserts projectID into the endpoint path. An X-CloudSync-Org header is automatically sent with every request. New JSON format: {"address":"https://host:443","database":"db.sqlite","projectID":"abc","organizationID":"org","apikey":"KEY"} New endpoint format: {scheme}://{host}{port}/v2/cloudsync/{projectID}/{database}/{siteId}/{action} BREAKING CHANGE: URL connection strings are no longer accepted. Integration tests now require PROJECT_ID, ORGANIZATION_ID, and DATABASE environment variables. --- .../test-sync-roundtrip-postrges-local-rls.md | 4 +- plans/TODO.md | 2 + src/cloudsync.h | 2 +- src/network.c | 235 ++++++++---------- src/network.m | 65 +---- src/network_private.h | 2 + test/integration.c | 30 ++- 7 files changed, 142 insertions(+), 198 deletions(-) create mode 100644 plans/TODO.md diff --git a/.claude/commands/test-sync-roundtrip-postrges-local-rls.md b/.claude/commands/test-sync-roundtrip-postrges-local-rls.md index f55c20b..94fa886 100644 --- a/.claude/commands/test-sync-roundtrip-postrges-local-rls.md +++ b/.claude/commands/test-sync-roundtrip-postrges-local-rls.md @@ -1,4 +1,4 @@ -# Sync Roundtrip Test with RLS +# Sync Roundtrip Test with local Postgres database and RLS policies Execute a full roundtrip sync test between multiple local SQLite databases and the local Supabase Docker PostgreSQL instance, verifying that Row Level Security (RLS) policies are correctly enforced during sync. @@ -255,7 +255,7 @@ SELECT cloudsync_network_send_changes(); -- Check for changes from server (repeat with 2-3 second delays) SELECT cloudsync_network_check_changes(); --- Repeat check_changes 3-5 times with delays until it returns 0 or stabilizes +-- Repeat check_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes ``` **Recommended sync order:** diff --git a/plans/TODO.md b/plans/TODO.md new file mode 100644 index 0000000..d242187 --- /dev/null +++ b/plans/TODO.md @@ -0,0 +1,2 @@ +- I need to call cloudsync_update_schema_hash to update the last schema hash when upgrading the library from the 0.8.* version +- Fix cloudsync_begin_alter and cloudsync_commit_alter for PostgreSQL, and we could call them automatically with a trigger on ALTER TABLE \ No newline at end of file diff --git a/src/cloudsync.h b/src/cloudsync.h index b5919be..11aed15 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -17,7 +17,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.9.113" +#define CLOUDSYNC_VERSION "0.9.115" #define CLOUDSYNC_MAX_TABLENAME_LEN 512 #define CLOUDSYNC_VALUE_NOTSET -1 diff --git a/src/network.c b/src/network.c index f357297..1085186 100644 --- a/src/network.c +++ b/src/network.c @@ -51,6 +51,7 @@ SQLITE_EXTENSION_INIT3 struct network_data { char site_id[UUID_STR_MAXLEN]; char *authentication; // apikey or token + char *org_id; // organization ID for X-CloudSync-Org header char *check_endpoint; char *upload_endpoint; char *apply_endpoint; @@ -85,6 +86,10 @@ char *network_data_get_siteid (network_data *data) { return data->site_id; } +char *network_data_get_orgid (network_data *data) { + return data->org_id; +} + bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply, char *status) { // sanity check if (!check || !upload) return false; @@ -145,8 +150,9 @@ bool network_data_set_endpoints (network_data *data, char *auth, char *check, ch void network_data_free (network_data *data) { if (!data) return; - + if (data->authentication) cloudsync_memory_free(data->authentication); + if (data->org_id) cloudsync_memory_free(data->org_id); if (data->check_endpoint) cloudsync_memory_free(data->check_endpoint); if (data->upload_endpoint) cloudsync_memory_free(data->upload_endpoint); if (data->apply_endpoint) cloudsync_memory_free(data->apply_endpoint); @@ -219,6 +225,14 @@ NETWORK_RESULT network_receive_buffer (network_data *data, const char *endpoint, headers = tmp; } + if (data->org_id) { + char org_header[512]; + snprintf(org_header, sizeof(org_header), "%s: %s", CLOUDSYNC_HEADER_ORG, data->org_id); + struct curl_slist *tmp = curl_slist_append(headers, org_header); + if (!tmp) {rc = CURLE_OUT_OF_MEMORY; goto cleanup;} + headers = tmp; + } + if (json_payload) { struct curl_slist *tmp = curl_slist_append(headers, "Content-Type: application/json"); if (!tmp) {rc = CURLE_OUT_OF_MEMORY; goto cleanup;} @@ -331,7 +345,15 @@ bool network_send_buffer (network_data *data, const char *endpoint, const char * if (!tmp) {rc = CURLE_OUT_OF_MEMORY; goto cleanup;} headers = tmp; } - + + if (data->org_id) { + char org_header[512]; + snprintf(org_header, sizeof(org_header), "%s: %s", CLOUDSYNC_HEADER_ORG, data->org_id); + struct curl_slist *tmp = curl_slist_append(headers, org_header); + if (!tmp) {rc = CURLE_OUT_OF_MEMORY; goto cleanup;} + headers = tmp; + } + // Set headers if needed (S3 pre-signed URLs usually do not require additional headers) tmp = curl_slist_append(headers, "Content-Type: application/octet-stream"); if (!tmp) {rc = CURLE_OUT_OF_MEMORY; goto cleanup;} @@ -578,144 +600,95 @@ int network_extract_query_param (const char *query, const char *key, char *outpu return -3; // Key not found } -#if !defined(CLOUDSYNC_OMIT_CURL) || defined(SQLITE_WASM_EXTRA_INIT) bool network_compute_endpoints (sqlite3_context *context, network_data *data, const char *conn_string) { - // compute endpoints + // JSON format: {"address":"https://host:port","database":"db.sqlite","projectID":"abc","organizationID":"org","apikey":"KEY"} bool result = false; - - char *scheme = NULL; - char *host = NULL; - char *port = NULL; - char *database = NULL; - char *query = NULL; - + size_t conn_len = strlen(conn_string); + + char *address = json_extract_string(conn_string, conn_len, "address"); + char *database = json_extract_string(conn_string, conn_len, "database"); + char *project_id = json_extract_string(conn_string, conn_len, "projectID"); + char *org_id = json_extract_string(conn_string, conn_len, "organizationID"); + char *apikey = json_extract_string(conn_string, conn_len, "apikey"); + char *token = json_extract_string(conn_string, conn_len, "token"); + char *authentication = NULL; char *check_endpoint = NULL; char *upload_endpoint = NULL; char *apply_endpoint = NULL; char *status_endpoint = NULL; - char *conn_string_https = NULL; - - #ifndef SQLITE_WASM_EXTRA_INIT - CURLUcode rc = CURLUE_OUT_OF_MEMORY; - CURLU *url = curl_url(); - if (!url) goto finalize; - #endif - - conn_string_https = cloudsync_string_replace_prefix(conn_string, "sqlitecloud://", "https://"); - if (!conn_string_https) goto finalize; - - #ifndef SQLITE_WASM_EXTRA_INIT - // set URL: https://UUID.g5.sqlite.cloud:443/chinook.sqlite?apikey=hWDanFolRT9WDK0p54lufNrIyfgLZgtMw6tb6fbPmpo - rc = curl_url_set(url, CURLUPART_URL, conn_string_https, 0); - if (rc != CURLUE_OK) goto finalize; - - // https (MANDATORY) - rc = curl_url_get(url, CURLUPART_SCHEME, &scheme, 0); - if (rc != CURLUE_OK) goto finalize; - - // UUID.g5.sqlite.cloud (MANDATORY) - rc = curl_url_get(url, CURLUPART_HOST, &host, 0); - if (rc != CURLUE_OK) goto finalize; - - // 443 (OPTIONAL) - rc = curl_url_get(url, CURLUPART_PORT, &port, 0); - if (rc != CURLUE_OK && rc != CURLUE_NO_PORT) goto finalize; - char *port_or_default = port && strcmp(port, "8860") != 0 ? port : CLOUDSYNC_DEFAULT_ENDPOINT_PORT; - - // /chinook.sqlite (MANDATORY) - rc = curl_url_get(url, CURLUPART_PATH, &database, 0); - if (rc != CURLUE_OK) goto finalize; - - // apikey=hWDanFolRT9WDK0p54lufNrIyfgLZgtMw6tb6fbPmpo (OPTIONAL) - rc = curl_url_get(url, CURLUPART_QUERY, &query, 0); - if (rc != CURLUE_OK && rc != CURLUE_NO_QUERY) goto finalize; - #else - // Parse: scheme://host[:port]/path?query - const char *p = strstr(conn_string_https, "://"); - if (!p) goto finalize; - scheme = substr(conn_string_https, p); - p += 3; - const char *host_start = p; - const char *host_end = strpbrk(host_start, ":/?"); - if (!host_end) goto finalize; - host = substr(host_start, host_end); - p = host_end; - if (*p == ':') { - ++p; - const char *port_end = strpbrk(p, "/?"); - if (!port_end) goto finalize; - port = substr(p, port_end); - p = port_end; - } - if (*p == '/') { - const char *path_start = p; - const char *path_end = strchr(path_start, '?'); - if (!path_end) path_end = path_start + strlen(path_start); - database = substr(path_start, path_end); - p = path_end; + // validate mandatory fields + if (!address || !database || !project_id || !org_id) { + sqlite3_result_error(context, "JSON must contain address, database, projectID, and organizationID", -1); + sqlite3_result_error_code(context, SQLITE_ERROR); + goto finalize; } - if (*p == '?') { - query = strdup(p); + + // parse address: scheme://host[:port] + const char *scheme_end = strstr(address, "://"); + if (!scheme_end) { + sqlite3_result_error(context, "address must include scheme (e.g. https://host:port)", -1); + sqlite3_result_error_code(context, SQLITE_ERROR); + goto finalize; } - if (!scheme || !host || !database) goto finalize; - char *port_or_default = port && strcmp(port, "8860") != 0 ? port : CLOUDSYNC_DEFAULT_ENDPOINT_PORT; - #endif - - if (query != NULL) { - char value[CLOUDSYNC_SESSION_TOKEN_MAXSIZE]; - if (!authentication && network_extract_query_param(query, "apikey", value, sizeof(value)) == 0) { - authentication = network_authentication_token("apikey", value); - } - if (!authentication && network_extract_query_param(query, "token", value, sizeof(value)) == 0) { - authentication = network_authentication_token("token", value); - } + + size_t scheme_len = scheme_end - address; + const char *host_start = scheme_end + 3; + const char *port_sep = strchr(host_start, ':'); + const char *host_end = port_sep ? port_sep : host_start + strlen(host_start); + const char *port_str = port_sep ? port_sep + 1 : CLOUDSYNC_DEFAULT_ENDPOINT_PORT; + + // build authentication from apikey or token + if (apikey) { + authentication = network_authentication_token("apikey", apikey); + } else if (token) { + authentication = network_authentication_token("token", token); } - - size_t requested = strlen(scheme) + strlen(host) + strlen(port_or_default) + strlen(CLOUDSYNC_ENDPOINT_PREFIX) + strlen(database) + 64; + + // build endpoints: {scheme}://{host}:{port}/v2/cloudsync/{projectID}/{database}/{siteId}/{action} + size_t requested = scheme_len + 3 + (host_end - host_start) + 1 + strlen(port_str) + 1 + + strlen(CLOUDSYNC_ENDPOINT_PREFIX) + 1 + strlen(project_id) + 1 + + strlen(database) + 1 + UUID_STR_MAXLEN + 1 + 16; check_endpoint = (char *)cloudsync_memory_zeroalloc(requested); upload_endpoint = (char *)cloudsync_memory_zeroalloc(requested); apply_endpoint = (char *)cloudsync_memory_zeroalloc(requested); status_endpoint = (char *)cloudsync_memory_zeroalloc(requested); - if ((!upload_endpoint) || (!check_endpoint) || (!apply_endpoint) || (!status_endpoint)) goto finalize; + if (!check_endpoint || !upload_endpoint || !apply_endpoint || !status_endpoint) { + sqlite3_result_error_code(context, SQLITE_NOMEM); + goto finalize; + } - snprintf(check_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_CHECK); - snprintf(upload_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_UPLOAD); - snprintf(apply_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_APPLY); - snprintf(status_endpoint, requested, "%s://%s:%s/%s%s/%s/%s", scheme, host, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database, data->site_id, CLOUDSYNC_ENDPOINT_STATUS); + // format: scheme://host:port/v2/cloudsync/projectID/database/siteId/action + snprintf(check_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", + (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, + CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_CHECK); + snprintf(upload_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", + (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, + CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_UPLOAD); + snprintf(apply_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", + (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, + CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_APPLY); + snprintf(status_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", + (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, + CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_STATUS); result = true; - + finalize: - if (result == false) { - // store proper result code/message - #ifndef SQLITE_WASM_EXTRA_INIT - if (rc != CURLUE_OK) sqlite3_result_error(context, curl_url_strerror(rc), -1); - sqlite3_result_error_code(context, (rc != CURLUE_OK) ? SQLITE_ERROR : SQLITE_NOMEM); - #else - sqlite3_result_error(context, "URL parse error", -1); - sqlite3_result_error_code(context, SQLITE_ERROR); - #endif - - // cleanup memory managed by the extension - if (authentication) cloudsync_memory_free(authentication); - if (check_endpoint) cloudsync_memory_free(check_endpoint); - if (upload_endpoint) cloudsync_memory_free(upload_endpoint); - if (apply_endpoint) cloudsync_memory_free(apply_endpoint); - if (status_endpoint) cloudsync_memory_free(status_endpoint); - } - if (result) { if (authentication) { if (data->authentication) cloudsync_memory_free(data->authentication); data->authentication = authentication; } - + + if (data->org_id) cloudsync_memory_free(data->org_id); + data->org_id = cloudsync_string_dup(org_id); + if (data->check_endpoint) cloudsync_memory_free(data->check_endpoint); data->check_endpoint = check_endpoint; - + if (data->upload_endpoint) cloudsync_memory_free(data->upload_endpoint); data->upload_endpoint = upload_endpoint; @@ -724,22 +697,24 @@ bool network_compute_endpoints (sqlite3_context *context, network_data *data, co if (data->status_endpoint) cloudsync_memory_free(data->status_endpoint); data->status_endpoint = status_endpoint; + } else { + if (authentication) cloudsync_memory_free(authentication); + if (check_endpoint) cloudsync_memory_free(check_endpoint); + if (upload_endpoint) cloudsync_memory_free(upload_endpoint); + if (apply_endpoint) cloudsync_memory_free(apply_endpoint); + if (status_endpoint) cloudsync_memory_free(status_endpoint); } - - // cleanup memory - #ifndef SQLITE_WASM_EXTRA_INIT - if (url) curl_url_cleanup(url); - #endif - if (scheme) curl_free(scheme); - if (host) curl_free(host); - if (port) curl_free(port); - if (database) curl_free(database); - if (query) curl_free(query); - if (conn_string_https && conn_string_https != conn_string) cloudsync_memory_free(conn_string_https); - + + // cleanup JSON-extracted strings + if (address) cloudsync_memory_free(address); + if (database) cloudsync_memory_free(database); + if (project_id) cloudsync_memory_free(project_id); + if (org_id) cloudsync_memory_free(org_id); + if (apikey) cloudsync_memory_free(apikey); + if (token) cloudsync_memory_free(token); + return result; } -#endif void network_result_to_sqlite_error (sqlite3_context *context, NETWORK_RESULT res, const char *default_error_message) { sqlite3_result_error(context, ((res.code == CLOUDSYNC_NETWORK_ERROR) && (res.buffer)) ? res.buffer : default_error_message, -1); @@ -778,10 +753,9 @@ void cloudsync_network_init (sqlite3_context *context, int argc, sqlite3_value * // save site_id string representation: 01957493c6c07e14803727e969f1d2cc cloudsync_uuid_v7_stringify(site_id, netdata->site_id, false); - // connection string is something like: - // https://UUID.g5.sqlite.cloud:443/chinook.sqlite?apikey=hWDanFolRT9WDK0p54lufNrIyfgLZgtMw6tb6fbPmpo - // or https://UUID.g5.sqlite.cloud:443/chinook.sqlite - // apikey part is optional and can be replaced by a session token once client is authenticated + // connection string is a JSON object: + // {"address":"https://UUID.sqlite.cloud:443","database":"chinook.sqlite","projectID":"abc123","organizationID":"org456","apikey":"KEY"} + // apikey/token are optional and can be set later via cloudsync_network_set_token/cloudsync_network_set_apikey const char *connection_param = (const char *)sqlite3_value_text(argv[0]); @@ -1080,7 +1054,6 @@ int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync char json_payload[2024]; snprintf(json_payload, sizeof(json_payload), "{\"dbVersion\":%lld, \"seq\":%d}", (long long)db_version, seq); - // http://uuid.g5.sqlite.cloud/v2/cloudsync/{dbname}/{site_id}/check NETWORK_RESULT result = network_receive_buffer(netdata, netdata->check_endpoint, netdata->authentication, true, true, json_payload, CLOUDSYNC_HEADER_SQLITECLOUD); int rc = SQLITE_OK; if (result.code == CLOUDSYNC_NETWORK_BUFFER) { diff --git a/src/network.m b/src/network.m index bfd7558..84d336c 100644 --- a/src/network.m +++ b/src/network.m @@ -13,60 +13,7 @@ void network_buffer_cleanup (void *xdata) { if (xdata) CFRelease(xdata); } -bool network_compute_endpoints (sqlite3_context *context, network_data *data, const char *conn_string) { - NSString *conn = [NSString stringWithUTF8String:conn_string]; - NSString *conn_string_https = nil; - - if ([conn hasPrefix:@"sqlitecloud://"]) { - conn_string_https = [conn stringByReplacingCharactersInRange:NSMakeRange(0, [@"sqlitecloud://" length]) withString:@"https://"]; - } else { - conn_string_https = conn; - } - - NSURL *url = [NSURL URLWithString:conn_string_https]; - if (!url) return false; - - NSString *scheme = url.scheme; // "https" - if (!scheme) return false; - NSString *host = url.host; // "cn5xiooanz.global3.ryujaz.sqlite.cloud" - if (!host) return false; - - NSString *port = url.port.stringValue; - NSString *database = url.path; // "/chinook-cloudsync.sqlite" - if (!database) return false; - - NSString *query = url.query; // "apikey=hWDanFolRT9WDK0p54lufNrIyfgLZgtMw6tb6fbPmpo" (OPTIONAL) - NSString *authentication = nil; - - if (query) { - NSURLComponents *components = [NSURLComponents componentsWithString:[@"http://dummy?" stringByAppendingString:query]]; - NSArray *items = components.queryItems; - for (NSURLQueryItem *item in items) { - // build new token - // apikey: just write the key for retrocompatibility - // other keys, like token: add a prefix, i.e. token= - - if ([item.name isEqualToString:@"apikey"]) { - authentication = item.value; - break; - } - if ([item.name isEqualToString:@"token"]) { - authentication = [NSString stringWithFormat:@"%@=%@", item.name, item.value]; - break; - } - } - } - - char *site_id = network_data_get_siteid(data); - char *port_or_default = (port && strcmp(port.UTF8String, "8860") != 0) ? (char *)port.UTF8String : CLOUDSYNC_DEFAULT_ENDPOINT_PORT; - - NSString *check_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_CHECK]; - NSString *upload_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_UPLOAD]; - NSString *apply_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_APPLY]; - NSString *status_endpoint = [NSString stringWithFormat:@"%s://%s:%s/%s%s/%s/%s", scheme.UTF8String, host.UTF8String, port_or_default, CLOUDSYNC_ENDPOINT_PREFIX, database.UTF8String, site_id, CLOUDSYNC_ENDPOINT_STATUS]; - - return network_data_set_endpoints(data, (char *)authentication.UTF8String, (char *)check_endpoint.UTF8String, (char *)upload_endpoint.UTF8String, (char *)apply_endpoint.UTF8String, (char *)status_endpoint.UTF8String); -} +// network_compute_endpoints is implemented in network.c (shared across all platforms) bool network_send_buffer(network_data *data, const char *endpoint, const char *authentication, const void *blob, int blob_size) { NSString *urlString = [NSString stringWithUTF8String:endpoint]; @@ -83,6 +30,11 @@ bool network_send_buffer(network_data *data, const char *endpoint, const char *a [request setValue:authString forHTTPHeaderField:@"Authorization"]; } + char *org_id = network_data_get_orgid(data); + if (org_id) { + [request setValue:[NSString stringWithUTF8String:org_id] forHTTPHeaderField:@CLOUDSYNC_HEADER_ORG]; + } + NSData *bodyData = [NSData dataWithBytes:blob length:blob_size]; [request setHTTPBody:bodyData]; @@ -136,6 +88,11 @@ NETWORK_RESULT network_receive_buffer(network_data *data, const char *endpoint, } } + char *org_id = network_data_get_orgid(data); + if (org_id) { + [request setValue:[NSString stringWithUTF8String:org_id] forHTTPHeaderField:@CLOUDSYNC_HEADER_ORG]; + } + if (authentication) { NSString *authString = [NSString stringWithFormat:@"Bearer %s", authentication]; [request setValue:authString forHTTPHeaderField:@"Authorization"]; diff --git a/src/network_private.h b/src/network_private.h index 1af7758..db3eae7 100644 --- a/src/network_private.h +++ b/src/network_private.h @@ -15,6 +15,7 @@ #define CLOUDSYNC_ENDPOINT_STATUS "status" #define CLOUDSYNC_DEFAULT_ENDPOINT_PORT "443" #define CLOUDSYNC_HEADER_SQLITECLOUD "Accept: sqlc/plain" +#define CLOUDSYNC_HEADER_ORG "X-CloudSync-Org" #define CLOUDSYNC_NETWORK_OK 1 #define CLOUDSYNC_NETWORK_ERROR 2 @@ -31,6 +32,7 @@ typedef struct { } NETWORK_RESULT; char *network_data_get_siteid (network_data *data); +char *network_data_get_orgid (network_data *data); bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply, char *status); bool network_compute_endpoints (sqlite3_context *context, network_data *data, const char *conn_string); diff --git a/test/integration.c b/test/integration.c index f3d0317..fb8334b 100644 --- a/test/integration.c +++ b/test/integration.c @@ -224,15 +224,20 @@ int test_init (const char *db_path, int init) { rc = db_exec(db, "SELECT cloudsync_init('activities');"); RCHECK rc = db_exec(db, "SELECT cloudsync_init('workouts');"); RCHECK - // init network with connection string + apikey - char network_init[512]; + // init network with JSON connection string + char network_init[1024]; const char* conn_str = getenv("CONNECTION_STRING"); const char* apikey = getenv("APIKEY"); - if (!conn_str || !apikey) { - fprintf(stderr, "Error: CONNECTION_STRING or APIKEY not set.\n"); + const char* project_id = getenv("PROJECT_ID"); + const char* org_id = getenv("ORGANIZATION_ID"); + const char* database = getenv("DATABASE"); + if (!conn_str || !apikey || !project_id || !org_id || !database) { + fprintf(stderr, "Error: CONNECTION_STRING, APIKEY, PROJECT_ID, ORGANIZATION_ID, or DATABASE not set.\n"); exit(1); } - snprintf(network_init, sizeof(network_init), "SELECT cloudsync_network_init('%s?apikey=%s');", conn_str, apikey); + snprintf(network_init, sizeof(network_init), + "SELECT cloudsync_network_init('{\"address\":\"%s\",\"database\":\"%s\",\"projectID\":\"%s\",\"organizationID\":\"%s\",\"apikey\":\"%s\"}');", + conn_str, database, project_id, org_id, apikey); rc = db_exec(db, network_init); RCHECK rc = db_expect_int(db, "SELECT COUNT(*) as count FROM activities;", 0); RCHECK @@ -294,15 +299,20 @@ int test_enable_disable(const char *db_path) { snprintf(sql, sizeof(sql), "INSERT INTO users (id, name) VALUES ('%s-should-sync', '%s-should-sync');", value, value); rc = db_exec(db, sql); RCHECK - // init network with connection string + apikey - char network_init[512]; + // init network with JSON connection string + char network_init[1024]; const char* conn_str = getenv("CONNECTION_STRING"); const char* apikey = getenv("APIKEY"); - if (!conn_str || !apikey) { - fprintf(stderr, "Error: CONNECTION_STRING or APIKEY not set.\n"); + const char* project_id = getenv("PROJECT_ID"); + const char* org_id = getenv("ORGANIZATION_ID"); + const char* database = getenv("DATABASE"); + if (!conn_str || !apikey || !project_id || !org_id || !database) { + fprintf(stderr, "Error: CONNECTION_STRING, APIKEY, PROJECT_ID, ORGANIZATION_ID, or DATABASE not set.\n"); exit(1); } - snprintf(network_init, sizeof(network_init), "SELECT cloudsync_network_init('%s?apikey=%s');", conn_str, apikey); + snprintf(network_init, sizeof(network_init), + "SELECT cloudsync_network_init('{\"address\":\"%s\",\"database\":\"%s\",\"projectID\":\"%s\",\"organizationID\":\"%s\",\"apikey\":\"%s\"}');", + conn_str, database, project_id, org_id, apikey); rc = db_exec(db, network_init); RCHECK rc = db_exec(db, "SELECT cloudsync_network_send_changes();"); RCHECK From 938d6edbb6e63b4b7c497c40897e746a4ee976b7 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Tue, 10 Mar 2026 17:36:14 -0600 Subject: [PATCH 05/16] Disable notnull prikey constraints (#12) * Added a new compilation macro CLOUDSYNC_CHECK_NOTNULL_PRIKEYS (disabled by default) * The cloudsync extension now enforces NULL primary key rejection at runtime (any write with a NULL PK returns an error), so the explicit NOT NULL constraint on primary key columns is no longer a schema requirement * test: add null primary key rejection tests for SQLite and PostgreSQL * docs: remove NOT NULL requirement from primary key definitions The extension now enforces NULL primary key rejection at runtime, so the explicit NOT NULL constraint on PK columns is no longer a schema requirement. Replace the "must be NOT NULL" guidance with a note about runtime enforcement. --------- Co-authored-by: Marco Bambini --- .../test-sync-roundtrip-postgres-local.md | 6 +- .../test-sync-roundtrip-postrges-local-rls.md | 6 +- .../test-sync-roundtrip-sqlitecloud-rls.md | 6 +- API.md | 2 +- README.md | 8 +-- docs/postgresql/CLIENT.md | 16 ++--- docs/postgresql/RLS.md | 6 +- docs/postgresql/SUPABASE.md | 2 +- examples/simple-todo-db/README.md | 6 +- src/cloudsync.c | 2 + src/cloudsync.h | 2 +- src/pk.c | 11 ++- src/pk.h | 2 + src/postgresql/cloudsync_postgresql.c | 14 +++- src/sqlite/cloudsync_sqlite.c | 17 ++++- test/postgresql/30_null_prikey_insert.sql | 68 +++++++++++++++++++ test/postgresql/full_test.sql | 1 + test/unit.c | 65 ++++++++++++------ 18 files changed, 186 insertions(+), 54 deletions(-) create mode 100644 test/postgresql/30_null_prikey_insert.sql diff --git a/.claude/commands/test-sync-roundtrip-postgres-local.md b/.claude/commands/test-sync-roundtrip-postgres-local.md index 686fc12..8713466 100644 --- a/.claude/commands/test-sync-roundtrip-postgres-local.md +++ b/.claude/commands/test-sync-roundtrip-postgres-local.md @@ -16,7 +16,7 @@ Ask the user to provide a DDL query for the table(s) to test. It can be in Postg **Option 1: Simple TEXT primary key** ```sql CREATE TABLE test_sync ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, name TEXT, value INTEGER ); @@ -34,13 +34,13 @@ CREATE TABLE test_uuid ( **Option 3: Two tables scenario (tests multi-table sync)** ```sql CREATE TABLE authors ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, name TEXT, email TEXT ); CREATE TABLE books ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, title TEXT, author_id TEXT, published_year INTEGER diff --git a/.claude/commands/test-sync-roundtrip-postrges-local-rls.md b/.claude/commands/test-sync-roundtrip-postrges-local-rls.md index 94fa886..ef74646 100644 --- a/.claude/commands/test-sync-roundtrip-postrges-local-rls.md +++ b/.claude/commands/test-sync-roundtrip-postrges-local-rls.md @@ -16,7 +16,7 @@ Ask the user to provide a DDL query for the table(s) to test. It can be in Postg **Option 1: Simple TEXT primary key with user_id for RLS** ```sql CREATE TABLE test_sync ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id UUID NOT NULL, name TEXT, value INTEGER @@ -36,14 +36,14 @@ CREATE TABLE test_uuid ( **Option 3: Two tables scenario with user ownership** ```sql CREATE TABLE authors ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id UUID NOT NULL, name TEXT, email TEXT ); CREATE TABLE books ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id UUID NOT NULL, title TEXT, author_id TEXT, diff --git a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md index 4adb700..0bf36c0 100644 --- a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md +++ b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md @@ -16,7 +16,7 @@ Ask the user to provide a DDL query for the table(s) to test. It can be in Postg **Option 1: Simple TEXT primary key with user_id for RLS** ```sql CREATE TABLE test_sync ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id TEXT NOT NULL, name TEXT, value INTEGER @@ -26,14 +26,14 @@ CREATE TABLE test_sync ( **Option 2: Two tables scenario with user ownership** ```sql CREATE TABLE authors ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id TEXT NOT NULL, name TEXT, email TEXT ); CREATE TABLE books ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id TEXT NOT NULL, title TEXT, author_id TEXT, diff --git a/API.md b/API.md index cd13a1b..f8f9c37 100644 --- a/API.md +++ b/API.md @@ -41,8 +41,8 @@ This document provides a reference for the SQLite functions provided by the `sql Before initialization, `cloudsync_init` performs schema sanity checks to ensure compatibility with CRDT requirements and best practices. These checks include: - Primary keys should not be auto-incrementing integers; GUIDs (UUIDs, ULIDs) are highly recommended to prevent multi-node collisions. -- All primary key columns must be `NOT NULL`. - All non-primary key `NOT NULL` columns must have a `DEFAULT` value. +- **Note:** Any write operation that includes a NULL value for a primary key column will be rejected with an error, even if SQLite would normally allow it due to a legacy behavior. **Schema Design Considerations:** diff --git a/README.md b/README.md index ad91448..b01226b 100644 --- a/README.md +++ b/README.md @@ -264,7 +264,7 @@ sqlite3 myapp.db -- Create a table (primary key MUST be TEXT for global uniqueness) CREATE TABLE IF NOT EXISTS my_data ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, value TEXT NOT NULL DEFAULT '', created_at TEXT DEFAULT CURRENT_TIMESTAMP ); @@ -313,7 +313,7 @@ SELECT cloudsync_terminate(); -- Load extension and create identical table structure .load ./cloudsync CREATE TABLE IF NOT EXISTS my_data ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, value TEXT NOT NULL DEFAULT '', created_at TEXT DEFAULT CURRENT_TIMESTAMP ); @@ -372,12 +372,12 @@ When designing your database schema for SQLite Sync, follow these best practices - **Use globally unique identifiers**: Always use TEXT primary keys with UUIDs, ULIDs, or similar globally unique identifiers - **Avoid auto-incrementing integers**: Integer primary keys can cause conflicts across multiple devices - **Use `cloudsync_uuid()`**: The built-in function generates UUIDv7 identifiers optimized for distributed systems -- **All primary keys must be explicitly declared as `NOT NULL`**. +- **Note:** Any write operation that includes a NULL value for a primary key column will be rejected with an error, even if SQLite would normally allow it due to a legacy behavior. ```sql -- ✅ Recommended: Globally unique TEXT primary key CREATE TABLE users ( - id TEXT PRIMARY KEY NOT NULL, -- Use cloudsync_uuid() + id TEXT PRIMARY KEY, -- Use cloudsync_uuid() name TEXT NOT NULL, email TEXT UNIQUE NOT NULL ); diff --git a/docs/postgresql/CLIENT.md b/docs/postgresql/CLIENT.md index 9ef8cc8..58751d1 100644 --- a/docs/postgresql/CLIENT.md +++ b/docs/postgresql/CLIENT.md @@ -34,8 +34,8 @@ so CloudSync can sync between a PostgreSQL server and SQLite clients. ### 1) Primary Keys -- Use **TEXT NOT NULL** primary keys in SQLite. -- PostgreSQL primary keys can be **TEXT NOT NULL** or **UUID**. If the PK type +- Use **TEXT** primary keys in SQLite. +- PostgreSQL primary keys can be **TEXT** or **UUID**. If the PK type isn't explicitly mapped to a DBTYPE (like UUID), it will be converted to TEXT in the payload so it remains compatible with the SQLite extension. - Generate IDs with `cloudsync_uuid()` on both sides. @@ -43,17 +43,17 @@ so CloudSync can sync between a PostgreSQL server and SQLite clients. SQLite: ```sql -id TEXT PRIMARY KEY NOT NULL +id TEXT PRIMARY KEY ``` PostgreSQL: ```sql -id TEXT PRIMARY KEY NOT NULL +id TEXT PRIMARY KEY ``` PostgreSQL (UUID): ```sql -id UUID PRIMARY KEY NOT NULL +id UUID PRIMARY KEY ``` ### 2) NOT NULL Columns Must Have DEFAULTs @@ -99,7 +99,7 @@ Use defaults that serialize the same on both sides: SQLite: ```sql CREATE TABLE notes ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, title TEXT NOT NULL DEFAULT '', body TEXT DEFAULT '', views INTEGER NOT NULL DEFAULT 0, @@ -111,7 +111,7 @@ CREATE TABLE notes ( PostgreSQL: ```sql CREATE TABLE notes ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, title TEXT NOT NULL DEFAULT '', body TEXT DEFAULT '', views INTEGER NOT NULL DEFAULT 0, @@ -136,7 +136,7 @@ SELECT cloudsync_init('notes'); ### Checklist -- [ ] PKs are TEXT + NOT NULL +- [ ] PKs are TEXT (or UUID in PostgreSQL) - [ ] All NOT NULL columns have DEFAULT - [ ] Only INTEGER/FLOAT/TEXT/BLOB-compatible types - [ ] Same column names and order diff --git a/docs/postgresql/RLS.md b/docs/postgresql/RLS.md index c0871d3..cc686ad 100644 --- a/docs/postgresql/RLS.md +++ b/docs/postgresql/RLS.md @@ -31,7 +31,7 @@ Given a table with an ownership column (`user_id`): ```sql CREATE TABLE documents ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, user_id UUID, title TEXT, content TEXT @@ -68,13 +68,13 @@ This example shows the complete flow of syncing data between two databases where ```sql -- Source database (DB A) — no RLS, represents the sync server CREATE TABLE documents ( - id TEXT PRIMARY KEY NOT NULL, user_id UUID, title TEXT, content TEXT + id TEXT PRIMARY KEY, user_id UUID, title TEXT, content TEXT ); SELECT cloudsync_init('documents'); -- Target database (DB B) — RLS enforced CREATE TABLE documents ( - id TEXT PRIMARY KEY NOT NULL, user_id UUID, title TEXT, content TEXT + id TEXT PRIMARY KEY, user_id UUID, title TEXT, content TEXT ); SELECT cloudsync_init('documents'); ALTER TABLE documents ENABLE ROW LEVEL SECURITY; diff --git a/docs/postgresql/SUPABASE.md b/docs/postgresql/SUPABASE.md index 94aa466..a800ae3 100644 --- a/docs/postgresql/SUPABASE.md +++ b/docs/postgresql/SUPABASE.md @@ -76,7 +76,7 @@ SELECT cloudsync_version(); ```sql CREATE TABLE notes ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, body TEXT DEFAULT '' ); diff --git a/examples/simple-todo-db/README.md b/examples/simple-todo-db/README.md index 772b8fe..55a639c 100644 --- a/examples/simple-todo-db/README.md +++ b/examples/simple-todo-db/README.md @@ -59,7 +59,7 @@ Tables must be created on both the local database and SQLite Cloud with identica -- Create the main tasks table -- Note: Primary key MUST be TEXT (not INTEGER) for global uniqueness CREATE TABLE IF NOT EXISTS tasks ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, userid TEXT NOT NULL DEFAULT '', title TEXT NOT NULL DEFAULT '', description TEXT DEFAULT '', @@ -84,7 +84,7 @@ SELECT cloudsync_is_enabled('tasks'); - Execute the same CREATE TABLE statement: ```sql CREATE TABLE IF NOT EXISTS tasks ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, userid TEXT NOT NULL DEFAULT '', title TEXT NOT NULL DEFAULT '', description TEXT DEFAULT '', @@ -149,7 +149,7 @@ sqlite3 todo_device_b.db ```sql -- Create identical table structure CREATE TABLE IF NOT EXISTS tasks ( - id TEXT PRIMARY KEY NOT NULL, + id TEXT PRIMARY KEY, userid TEXT NOT NULL DEFAULT '', title TEXT NOT NULL DEFAULT '', description TEXT DEFAULT '', diff --git a/src/cloudsync.c b/src/cloudsync.c index 64e2ce6..e0c454e 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -2986,6 +2986,7 @@ int cloudsync_table_sanity_check (cloudsync_context *data, const char *name, boo } // if user declared explicit primary key(s) then make sure they are all declared as NOT NULL + #if CLOUDSYNC_CHECK_NOTNULL_PRIKEYS if (npri_keys > 0) { int npri_keys_notnull = database_count_pk(data, name, true, cloudsync_schema(data)); if (npri_keys_notnull < 0) return cloudsync_set_dberror(data); @@ -2994,6 +2995,7 @@ int cloudsync_table_sanity_check (cloudsync_context *data, const char *name, boo return cloudsync_set_error(data, buffer, DBRES_ERROR); } } + #endif // check for columns declared as NOT NULL without a DEFAULT value. // Otherwise, col_merge_stmt would fail if changes to other columns are inserted first. diff --git a/src/cloudsync.h b/src/cloudsync.h index 11aed15..94a9410 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -17,7 +17,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.9.115" +#define CLOUDSYNC_VERSION "0.9.116" #define CLOUDSYNC_MAX_TABLENAME_LEN 512 #define CLOUDSYNC_VALUE_NOTSET -1 diff --git a/src/pk.c b/src/pk.c index cd7899b..97a6639 100644 --- a/src/pk.c +++ b/src/pk.c @@ -87,6 +87,8 @@ #define DATABASE_TYPE_MAX_NEGATIVE_INTEGER 6 // was SQLITE_MAX_NEGATIVE_INTEGER #define DATABASE_TYPE_NEGATIVE_FLOAT 7 // was SQLITE_NEGATIVE_FLOAT +char * const PRIKEY_NULL_CONSTRAINT_ERROR = "PRIKEY_NULL_CONSTRAINT_ERROR"; + // MARK: - Public Callbacks - int pk_decode_bind_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) { @@ -436,7 +438,14 @@ char *pk_encode (dbvalue_t **argv, int argc, char *b, bool is_prikey, size_t *bs if (!bsize) return NULL; // must fit in a single byte if (argc > 255) return NULL; - + + // if schema does not enforce NOT NULL on primary keys, check at runtime + #ifndef CLOUDSYNC_CHECK_NOTNULL_PRIKEYS + for (int i = 0; i < argc; i++) { + if (database_value_type(argv[i]) == DBTYPE_NULL) return PRIKEY_NULL_CONSTRAINT_ERROR; + } + #endif + // 1 is the number of items in the serialization // always 1 byte so max 255 primary keys, even if there is an hard SQLite limit of 128 size_t blen_curr = *bsize; diff --git a/src/pk.h b/src/pk.h index 2571915..ea9a390 100644 --- a/src/pk.h +++ b/src/pk.h @@ -15,6 +15,8 @@ typedef int (*pk_decode_callback) (void *xdata, int index, int type, int64_t ival, double dval, char *pval); +extern char * const PRIKEY_NULL_CONSTRAINT_ERROR; + char *pk_encode_prikey (dbvalue_t **argv, int argc, char *b, size_t *bsize); char *pk_encode_value (dbvalue_t *value, size_t *bsize); char *pk_encode (dbvalue_t **argv, int argc, char *b, bool is_prikey, size_t *bsize, int skip_idx); diff --git a/src/postgresql/cloudsync_postgresql.c b/src/postgresql/cloudsync_postgresql.c index 09df63b..21308cf 100644 --- a/src/postgresql/cloudsync_postgresql.c +++ b/src/postgresql/cloudsync_postgresql.c @@ -1122,7 +1122,7 @@ Datum cloudsync_pk_encode (PG_FUNCTION_ARGS) { size_t pklen = 0; char *encoded = pk_encode_prikey((dbvalue_t **)argv, argc, NULL, &pklen); - if (!encoded) { + if (!encoded || encoded == PRIKEY_NULL_CONSTRAINT_ERROR) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("cloudsync_pk_encode failed to encode primary key"))); } @@ -1271,6 +1271,10 @@ Datum cloudsync_insert (PG_FUNCTION_ARGS) { if (!cleanup.pk) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Not enough memory to encode the primary key(s)"))); } + if (cleanup.pk == PRIKEY_NULL_CONSTRAINT_ERROR) { + cleanup.pk = NULL; + ereport(ERROR, (errcode(ERRCODE_NOT_NULL_VIOLATION), errmsg("Insert aborted because primary key in table %s contains NULL values", table_name))); + } // Compute the next database version for tracking changes int64_t db_version = cloudsync_dbversion_next(data, CLOUDSYNC_VALUE_NOTSET); @@ -1360,6 +1364,10 @@ Datum cloudsync_delete (PG_FUNCTION_ARGS) { if (!cleanup.pk) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Not enough memory to encode the primary key(s)"))); } + if (cleanup.pk == PRIKEY_NULL_CONSTRAINT_ERROR) { + cleanup.pk = NULL; + ereport(ERROR, (errcode(ERRCODE_NOT_NULL_VIOLATION), errmsg("Delete aborted because primary key in table %s contains NULL values", table_name))); + } int64_t db_version = cloudsync_dbversion_next(data, CLOUDSYNC_VALUE_NOTSET); @@ -1561,6 +1569,10 @@ Datum cloudsync_update_finalfn (PG_FUNCTION_ARGS) { if (!pk) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Not enough memory to encode the primary key(s)"))); } + if (pk == PRIKEY_NULL_CONSTRAINT_ERROR) { + pk = NULL; + ereport(ERROR, (errcode(ERRCODE_NOT_NULL_VIOLATION), errmsg("Update aborted because primary key in table %s contains NULL values", table_name))); + } if (prikey_changed) { oldpk = pk_encode_prikey((dbvalue_t **)payload->old_values, pk_count, buffer2, &oldpklen); if (!oldpk) { diff --git a/src/sqlite/cloudsync_sqlite.c b/src/sqlite/cloudsync_sqlite.c index 08268b3..8333111 100644 --- a/src/sqlite/cloudsync_sqlite.c +++ b/src/sqlite/cloudsync_sqlite.c @@ -260,7 +260,7 @@ void dbsync_col_value (sqlite3_context *context, int argc, sqlite3_value **argv) void dbsync_pk_encode (sqlite3_context *context, int argc, sqlite3_value **argv) { size_t bsize = 0; char *buffer = pk_encode_prikey((dbvalue_t **)argv, argc, NULL, &bsize); - if (!buffer) { + if (!buffer || buffer == PRIKEY_NULL_CONSTRAINT_ERROR) { sqlite3_result_null(context); return; } @@ -347,6 +347,10 @@ void dbsync_insert (sqlite3_context *context, int argc, sqlite3_value **argv) { sqlite3_result_error(context, "Not enough memory to encode the primary key(s).", -1); return; } + if (pk == PRIKEY_NULL_CONSTRAINT_ERROR) { + dbsync_set_error(context, "Insert aborted because primary key in table %s contains NULL values.", table_name); + return; + } // compute the next database version for tracking changes int64_t db_version = cloudsync_dbversion_next(data, CLOUDSYNC_VALUE_NOTSET); @@ -407,6 +411,11 @@ void dbsync_delete (sqlite3_context *context, int argc, sqlite3_value **argv) { return; } + if (pk == PRIKEY_NULL_CONSTRAINT_ERROR) { + dbsync_set_error(context, "Delete aborted because primary key in table %s contains NULL values.", table_name); + return; + } + // mark the row as deleted by inserting a delete sentinel into the metadata rc = local_mark_delete_meta(table, pk, pklen, db_version, cloudsync_bumpseq(data)); if (rc != SQLITE_OK) goto cleanup; @@ -542,6 +551,11 @@ void dbsync_update_final (sqlite3_context *context) { dbsync_update_payload_free(payload); return; } + if (pk == PRIKEY_NULL_CONSTRAINT_ERROR) { + dbsync_set_error(context, "Update aborted because primary key in table %s contains NULL values.", table_name); + dbsync_update_payload_free(payload); + return; + } if (prikey_changed) { // if the primary key has changed, we need to handle the row differently: @@ -551,6 +565,7 @@ void dbsync_update_final (sqlite3_context *context) { // encode the OLD primary key into a buffer oldpk = pk_encode_prikey((dbvalue_t **)payload->old_values, table_count_pks(table), buffer2, &oldpklen); if (!oldpk) { + // no check here about PRIKEY_NULL_CONSTRAINT_ERROR because by design oldpk cannot contain NULL values if (pk != buffer) cloudsync_memory_free(pk); sqlite3_result_error(context, "Not enough memory to encode the primary key(s).", -1); dbsync_update_payload_free(payload); diff --git a/test/postgresql/30_null_prikey_insert.sql b/test/postgresql/30_null_prikey_insert.sql new file mode 100644 index 0000000..c7dc675 --- /dev/null +++ b/test/postgresql/30_null_prikey_insert.sql @@ -0,0 +1,68 @@ +-- Test: NULL Primary Key Insert Rejection +-- Verifies that inserting a NULL primary key into a cloudsync-enabled table fails +-- and that the metatable only contains rows for valid inserts. + +\set testid '30' +\ir helper_test_init.sql + +\connect postgres +\ir helper_psql_conn_setup.sql + +-- Cleanup and create test database +DROP DATABASE IF EXISTS cloudsync_test_30; +CREATE DATABASE cloudsync_test_30; + +\connect cloudsync_test_30 +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +-- Create table with primary key and init cloudsync +CREATE TABLE t_null_pk ( + id TEXT NOT NULL PRIMARY KEY, + value TEXT +); + +SELECT cloudsync_init('t_null_pk', 'CLS', true) AS _init \gset + +-- Test 1: INSERT with NULL primary key should fail +DO $$ +BEGIN + INSERT INTO t_null_pk (id, value) VALUES (NULL, 'test'); + RAISE EXCEPTION 'INSERT with NULL PK should have failed'; +EXCEPTION WHEN not_null_violation THEN + -- Expected +END $$; + +SELECT (COUNT(*) = 0) AS null_pk_rejected FROM t_null_pk \gset +\if :null_pk_rejected +\echo [PASS] (:testid) NULL PK insert rejected +\else +\echo [FAIL] (:testid) NULL PK insert was not rejected +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Test 2: INSERT with valid (non-NULL) primary key should succeed +INSERT INTO t_null_pk (id, value) VALUES ('valid_id', 'test'); + +SELECT (COUNT(*) = 1) AS valid_insert_ok FROM t_null_pk WHERE id = 'valid_id' \gset +\if :valid_insert_ok +\echo [PASS] (:testid) Valid PK insert succeeded +\else +\echo [FAIL] (:testid) Valid PK insert failed +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Test 3: Metatable should have exactly 1 row (from the valid insert only) +SELECT (COUNT(*) = 1) AS meta_row_ok FROM t_null_pk_cloudsync \gset +\if :meta_row_ok +\echo [PASS] (:testid) Metatable has exactly 1 row +\else +\echo [FAIL] (:testid) Metatable row count mismatch +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Cleanup +\ir helper_test_cleanup.sql +\if :should_cleanup +DROP DATABASE IF EXISTS cloudsync_test_30; +\endif diff --git a/test/postgresql/full_test.sql b/test/postgresql/full_test.sql index 0a260ba..279b937 100644 --- a/test/postgresql/full_test.sql +++ b/test/postgresql/full_test.sql @@ -37,6 +37,7 @@ \ir 27_rls_batch_merge.sql \ir 28_db_version_tracking.sql \ir 29_rls_multicol.sql +\ir 30_null_prikey_insert.sql -- 'Test summary' \echo '\nTest summary:' diff --git a/test/unit.c b/test/unit.c index 1caba38..e3dd38a 100644 --- a/test/unit.c +++ b/test/unit.c @@ -1561,7 +1561,7 @@ bool do_test_pk (sqlite3 *db, int ntest, bool print_result) { if (do_test_pk_single_value(db, SQLITE_INTEGER, -15592946911031981, 0, NULL, print_result) == false) goto finalize; if (do_test_pk_single_value(db, SQLITE_INTEGER, -922337203685477580, 0, NULL, print_result) == false) goto finalize; if (do_test_pk_single_value(db, SQLITE_FLOAT, 0, -9223372036854775.808, NULL, print_result) == false) goto finalize; - if (do_test_pk_single_value(db, SQLITE_NULL, 0, 0, NULL, print_result) == false) goto finalize; + // SQLITE_NULL is no longer valid for primary keys (runtime NULL check rejects it) if (do_test_pk_single_value(db, SQLITE_TEXT, 0, 0, "Hello World", print_result) == false) goto finalize; char blob[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; if (do_test_pk_single_value(db, SQLITE_BLOB, sizeof(blob), 0, blob, print_result) == false) goto finalize; @@ -2017,6 +2017,43 @@ bool do_test_error_cases (sqlite3 *db) { return true; } +bool do_test_null_prikey_insert (sqlite3 *db) { + // Create a table with a primary key that allows NULL (no NOT NULL constraint) + const char *sql = "CREATE TABLE IF NOT EXISTS t_null_pk (id TEXT PRIMARY KEY, value TEXT);" + "SELECT cloudsync_init('t_null_pk');"; + int rc = sqlite3_exec(db, sql, NULL, NULL, NULL); + if (rc != SQLITE_OK) return false; + + // Attempt to insert a row with NULL primary key — should fail + char *errmsg = NULL; + sql = "INSERT INTO t_null_pk (id, value) VALUES (NULL, 'test');"; + rc = sqlite3_exec(db, sql, NULL, NULL, &errmsg); + if (rc == SQLITE_OK) return false; // should have failed + if (!errmsg) return false; + + // Verify the error message matches the expected format + const char *expected = "Insert aborted because primary key in table t_null_pk contains NULL values."; + bool match = (strcmp(errmsg, expected) == 0); + sqlite3_free(errmsg); + if (!match) return false; + + // Verify that a non-NULL primary key insert succeeds + sql = "INSERT INTO t_null_pk (id, value) VALUES ('valid_id', 'test');"; + rc = sqlite3_exec(db, sql, NULL, NULL, NULL); + if (rc != SQLITE_OK) return false; + + // Verify the metatable has exactly 1 row (only the valid insert) + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db, "SELECT COUNT(*) FROM t_null_pk_cloudsync;", -1, &stmt, NULL); + if (rc != SQLITE_OK) return false; + if (sqlite3_step(stmt) != SQLITE_ROW) { sqlite3_finalize(stmt); return false; } + int count = sqlite3_column_int(stmt, 0); + sqlite3_finalize(stmt); + if (count != 1) return false; + + return true; +} + bool do_test_internal_functions (void) { sqlite3 *db = NULL; sqlite3_stmt *vm = NULL; @@ -2225,8 +2262,8 @@ bool do_test_pk_decode_count_from_buffer(void) { rc = sqlite3_cloudsync_init(db, NULL, NULL); if (rc != SQLITE_OK) goto cleanup; - // Encode multiple values - const char *sql = "SELECT cloudsync_pk_encode(123, 'text value', 3.14, X'DEADBEEF', NULL);"; + // Encode multiple values (no NULL — primary keys cannot contain NULL) + const char *sql = "SELECT cloudsync_pk_encode(123, 'text value', 3.14, X'DEADBEEF');"; rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); if (rc != SQLITE_OK) goto cleanup; @@ -2247,7 +2284,7 @@ bool do_test_pk_decode_count_from_buffer(void) { // The count is embedded in the first byte of the encoded pk size_t seek = 0; int n = pk_decode(buffer, (size_t)pklen, -1, &seek, -1, NULL, NULL); - if (n != 5) goto cleanup; // Should decode 5 values + if (n != 4) goto cleanup; // Should decode 4 values result = true; @@ -2693,8 +2730,8 @@ bool do_test_sql_pk_decode(void) { rc = sqlite3_cloudsync_init(db, NULL, NULL); if (rc != SQLITE_OK) goto cleanup; - // Create a primary key with multiple values - rc = sqlite3_prepare_v2(db, "SELECT cloudsync_pk_encode(123, 'hello', 3.14, X'DEADBEEF', NULL);", -1, &stmt, NULL); + // Create a primary key with multiple values (no NULL — primary keys cannot contain NULL) + rc = sqlite3_prepare_v2(db, "SELECT cloudsync_pk_encode(123, 'hello', 3.14, X'DEADBEEF');", -1, &stmt, NULL); if (rc != SQLITE_OK) goto cleanup; rc = sqlite3_step(stmt); @@ -2778,21 +2815,6 @@ bool do_test_sql_pk_decode(void) { sqlite3_finalize(stmt); stmt = NULL; - // Test cloudsync_pk_decode for NULL (index 5) - rc = sqlite3_prepare_v2(db, "SELECT cloudsync_pk_decode(?, 5);", -1, &stmt, NULL); - if (rc != SQLITE_OK) goto cleanup; - - rc = sqlite3_bind_blob(stmt, 1, pk_copy, pk_len, SQLITE_STATIC); - if (rc != SQLITE_OK) goto cleanup; - - rc = sqlite3_step(stmt); - if (rc != SQLITE_ROW) goto cleanup; - - if (sqlite3_column_type(stmt, 0) != SQLITE_NULL) goto cleanup; - - sqlite3_finalize(stmt); - stmt = NULL; - result = true; cleanup: @@ -7857,6 +7879,7 @@ int main (int argc, const char * argv[]) { result += test_report("DBUtils Test:", do_test_dbutils()); result += test_report("Minor Test:", do_test_others(db)); result += test_report("Test Error Cases:", do_test_error_cases(db)); + result += test_report("Null PK Insert Test:", do_test_null_prikey_insert(db)); result += test_report("Test Single PK:", do_test_single_pk(print_result)); int test_mask = TEST_INSERT | TEST_UPDATE | TEST_DELETE; From 01b315a9a54e123b4bf62a5894f52d60ffcc2a96 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 11 Mar 2026 00:14:09 -0600 Subject: [PATCH 06/16] docs: add first draft of PERFORMANCE.md and CHANGELOG.md --- CHANGELOG.md | 50 +++++++++++++ PERFORMANCE.md | 190 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 PERFORMANCE.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..112d30a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,50 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [1.0.0] - 2026-03-05 + +### Added + +- **PostgreSQL support**: The CloudSync extension can now be built and loaded on PostgreSQL, so both SQLiteCloud and PostgreSQL are supported as the cloud backend database of the sync service. The core CRDT functions are shared by the SQLite and PostgreSQL extensions. Includes support for PostgreSQL-native types (UUID primary keys, composite PKs with mixed types, and automatic type casting). +- **Row-Level Security (RLS)**: Sync payloads are now fully compatible with SQLiteCloud and PostgreSQL Row-Level Security policies. Changes are buffered per primary key and flushed as complete rows, so RLS policies can evaluate all columns at once. + +### Changed + +- **BREAKING: `cloudsync_network_init` now accepts JSON instead of a URL string.** The new format adds `projectID` and `organizationID` fields for multi-organization CloudSync support. An `X-CloudSync-Org` header is automatically sent with every request. + + Before: + ```sql + SELECT cloudsync_network_init('sqlitecloud://myproject.sqlite.cloud:8860/mydb.sqlite?apikey=KEY'); + ``` + + After: + ```sql + SELECT cloudsync_network_init('{"address":"https://myproject.sqlite.cloud:443","database":"mydb.sqlite","projectID":"abc123","organizationID":"org456","apikey":"KEY"}'); + ``` + +- **BREAKING: Sync functions now return structured JSON.** `cloudsync_network_send_changes`, `cloudsync_network_check_changes`, and `cloudsync_network_sync` return a JSON object instead of a plain integer. This provides richer status information including sync state, version numbers, row counts, and affected table names. + + Before: + ```sql + SELECT cloudsync_network_sync(); + -- 3 (number of rows received) + ``` + + After: + ```sql + SELECT cloudsync_network_sync(); + -- '{"send":{"status":"synced","localVersion":5,"serverVersion":5},"receive":{"rows":3,"tables":["tasks"]}}' + ``` + +- **Batch merge replaces column-by-column processing**: During sync, changes to the same row are now applied in a single SQL statement instead of one statement per column. This eliminates the previous behavior where UPDATE triggers fired multiple times per row during synchronization. + +### Fixed + +- **Improved error reporting**: Sync network functions now surface the actual server error message instead of generic error codes. +- **Schema hash verification**: Normalized schema comparison now uses only column name (lowercase), type (SQLite affinity), and primary key flag, preventing false mismatches caused by formatting differences. +- **SQLite trigger safety**: Internal functions used inside triggers are now marked with `SQLITE_INNOCUOUS`, fixing `unsafe use of` errors when initializing tables that have triggers. +- **NULL column binding**: Column value parameters are now correctly bound even when NULL, preventing sync failures on rows with NULL values. +- **Stability and reliability improvements** across the SQLite and PostgreSQL codebases, including fixes to memory management, error handling, and CRDT version tracking. diff --git a/PERFORMANCE.md b/PERFORMANCE.md new file mode 100644 index 0000000..236ab95 --- /dev/null +++ b/PERFORMANCE.md @@ -0,0 +1,190 @@ +# Performance & Overhead + +This document describes the computational and storage overhead introduced by the CloudSync extension, and how sync execution time relates to database size. + +## TL;DR + +Sync execution time scales with **the number of changes since the last sync (D)**, not with total database size (N). If you sync frequently, D stays small regardless of how large the database grows. The per-operation overhead on writes is proportional to the number of columns in the affected row, not to the table size. This is fundamentally different from sync solutions that diff or scan the full dataset. + +## Breaking Down the Cost + +The overhead introduced by the extension can be decomposed into four independent concerns: + +### 1. Per-Operation Overhead (Write-Path Cost) + +Every INSERT, UPDATE, or DELETE on a synced table fires AFTER triggers that write CRDT metadata into a companion `_cloudsync` table. This happens synchronously, inline with the original write. + +| Operation | Metadata Rows Written | Complexity | +|-----------|----------------------|------------| +| INSERT | 1 sentinel + 1 per non-PK column | O(C) | +| UPDATE | 1 per changed column (NEW != OLD) | O(C_changed) <= O(C) | +| DELETE | 1 sentinel + cleanup of existing metadata | O(C_existing) | + +Where **C** = number of non-PK columns in the table. + +**Key point:** This cost is **constant per row** and independent of the total number of rows in the table (N). Writing to a 100-row table costs the same as writing to a 10-million-row table. The metadata table uses a composite primary key `(pk, col_name)` with `WITHOUT ROWID` optimization (SQLite) or a standard B-tree primary key (PostgreSQL), so the index update cost is O(log M) where M is the metadata table size -- but this is the same cost as any indexed INSERT and is negligible in practice. + +### 2. Sync Operations (Push & Pull) + +These are the operations that create and apply sync payloads. They are synchronous in the extension and should typically be run by the application off the main thread. + +#### Push: Payload Generation + +``` +Cost: O(D) where D = number of column-level changes since last sync +``` + +The push operation queries `cloudsync_changes`, which dynamically reads from all synced `
_cloudsync` tables: +```sql +SELECT ... FROM cloudsync_changes WHERE db_version > +``` + +Each metadata table has an **index on `db_version`**, so payload generation scales primarily with the number of new changes, plus a small per-synced-table overhead to construct the `cloudsync_changes` query. It does not diff the full dataset. In SQLite, each changed column also performs a primary-key lookup in the base table to retrieve the current value. + +The resulting payload is LZ4-compressed before transmission. + +#### Pull: Payload Application + +``` +Cost: O(D) to decode + O(D_unique_pks) to merge into the database +``` + +Incoming changes are decoded and **batched by primary key**. All column changes for the same row are accumulated and flushed as a single UPDATE or INSERT statement. This batching reduces the number of actual database writes to one per affected row, regardless of how many columns changed. + +Conflict resolution (CRDT merge) is O(1) per column: it compares version numbers and, only if tied, falls back to value comparison and site-id tiebreaking. No global state or table scan is required. + +#### Summary + +| Phase | Scales With | Does NOT Scale With | +|-------|-------------|-------------------| +| Payload generation | D (changes since last sync) | N (total rows) | +| Payload application | D (incoming changes) | N (total rows) | +| Conflict resolution | D (conflicting columns) | N (total rows) | + +**This means sync time is driven mainly by delta size (`D`) rather than total database size (`N`)**. As long as the number of changes between syncs stays bounded, sync time remains roughly stable even as the database grows. + +### 3. Sync Frequency & Network Latency + +When the application runs sync off the main thread, perceived latency depends on: + +- **Sync interval**: How often the app triggers a push/pull cycle. More frequent syncs mean smaller deltas (smaller D) and faster individual sync operations, at the cost of more network round-trips. +- **Network latency**: The round-trip time to the sync server. LZ4 compression reduces payload size, but latency is dominated by the network hop itself for small deltas. +- **Payload size**: Proportional to D x average column value size. Large BLOBs or TEXT values will increase transfer time linearly. + +The extension does not impose a sync schedule -- the application controls when and how often to sync. A typical pattern is to sync on a timer (e.g., every 5-30 seconds) or on specific events (app foreground, user action). + +### 4. Metadata Storage Overhead + +Each synced table has a companion `
_cloudsync` metadata table with the following schema: + +``` +PRIMARY KEY (pk, col_name) -- WITHOUT ROWID (SQLite) +Columns: pk, col_name, col_version, db_version, site_id, seq +Index: db_version +``` + +**Storage cost per row in the base table:** +- 1 sentinel row (marks the row's existence/deletion state) +- 1 metadata row per non-PK column that has ever been written + +So for a table with C non-PK columns, the metadata table will contain approximately `N x (1 + C)` rows, where N is the number of rows in the base table. + +**Estimated overhead per metadata row:** +- `pk`: encoded primary key (typically 8-32 bytes depending on PK type and count) +- `col_name`: column name string (shared via SQLite's string interning, typically 5-30 bytes) +- `col_version`, `db_version`, `seq`: 3 integers (8 bytes each = 24 bytes) +- `site_id`: 1 integer (8 bytes) + +Rough estimate: **60-100 bytes per metadata row**, or **60-100 x (1 + C) bytes per base table row**. + +| Base Table | Columns (C) | Rows (N) | Estimated Metadata Size | +|------------|-------------|----------|------------------------| +| Small | 5 | 1,000 | ~360 KB - 600 KB | +| Medium | 10 | 100,000 | ~66 MB - 110 MB | +| Large | 10 | 1,000,000| ~660 MB - 1.1 GB | +| Wide | 50 | 100,000 | ~306 MB - 510 MB | + +**Mitigation strategies:** +- Only sync tables that need it -- not every table requires CRDT tracking. +- Prefer narrow tables (fewer columns) for high-volume data. +- The `WITHOUT ROWID` optimization (SQLite) significantly reduces per-row storage overhead. +- Deleted rows have their per-column metadata cleaned up, but a tombstone sentinel row persists (see section 9 below). + +### 5. Read-Path Overhead + +Normal application reads are not directly instrumented by the extension. No triggers, views, or hooks intercept ordinary SELECT queries on application tables, and the CRDT metadata is stored separately. In practice, read overhead is usually negligible. + +### 6. Initial Sync (First Device) + +When a new device syncs for the first time (`db_version = 0`), the push payload contains the **entire dataset**: every column of every row across all synced tables. The payload size is proportional to `N * C` (total rows times columns). + +The payload is built entirely in memory, starting with a 512 KB buffer (`CLOUDSYNC_PAYLOAD_MINBUF_SIZE` in `src/cloudsync.c`) and growing via `realloc` as needed. Peak memory usage is at least the full uncompressed payload size and can be higher during compression. For a database with 1 million rows and 10 columns of average 50 bytes each, the uncompressed payload could reach ~500 MB before LZ4 compression. + +Subsequent syncs are incremental (proportional to D, changes since the last sync), so the first sync is the expensive one. Applications with large datasets should plan for this -- for example, by seeding new devices from a database snapshot rather than syncing from scratch. + +### 7. WAL and Disk I/O Amplification + +Each write to a synced table generates additional metadata writes via AFTER triggers. The amplification factor depends on the operation: + +| Operation | Total Writes (base + metadata) | Amplification Factor | +|-----------|-------------------------------|---------------------| +| INSERT (C columns) | 1 + 1 sentinel + C metadata | ~C+2x | +| UPDATE (1 column) | 1 + 1 metadata | 2x | +| UPDATE (C columns) | 1 + C metadata | ~C+1x | +| DELETE | 1 + cleanup writes | variable | + +For a table with 10 non-PK columns, an INSERT generates roughly 12 logical row writes instead of 1. This increases WAL/page churn and affects: + +- **Disk I/O**: More pages written per transaction, larger WAL files between checkpoints. +- **WAL checkpoint frequency**: The WAL grows faster, so checkpoints run more often (or the WAL file stays larger if checkpointing is deferred). +- **Battery on mobile**: More disk writes per user action. Batching multiple writes in a single transaction amortizes the transaction overhead but not the per-row metadata cost. + +### 8. Locking During Sync Apply + +Payload application (`cloudsync_payload_apply`) uses savepoints grouped by source `db_version`. On SQLite, each savepoint holds a write lock for its duration. If the application runs sync on the main thread, other work on the same connection is blocked, and reads from other connections may block outside WAL mode. + +On SQLite, using WAL mode prevents readers on other connections from being blocked by writers, which is the recommended configuration for concurrent sync. + +### 9. Metadata Lifecycle (Tombstones and Cleanup) + +When a row is deleted, the per-column metadata rows are removed, but a **tombstone sentinel** (`__[RIP]__`) persists in the metadata table. This tombstone is necessary for propagating deletes to other devices during sync. There is no automatic garbage collection of tombstones -- they accumulate over time. + +Metadata cleanup for **removed columns** (after schema migration) only runs during `cloudsync_finalize_alter()`, which is called as part of the `cloudsync_alter()` workflow. Outside of schema changes, orphaned metadata from dropped columns remains in the metadata table. + +The **site ID table** (`cloudsync_site_id`) also grows monotonically -- one entry per unique device that has ever synced. This is typically small (one row per device) and not a concern in practice. + +For applications with high delete rates, the tombstone accumulation may become significant over time. Consider periodic full re-syncs or application-level archival strategies if this is a concern. + +### 10. Multi-Table Considerations + +The `cloudsync_changes` virtual table (SQLite) or set-returning function (PostgreSQL) dynamically constructs a `UNION ALL` query across all synced tables' metadata tables. The query construction cost scales as O(T) where T is the number of synced tables. + +For most applications (fewer than ~50 synced tables), this is negligible. Applications syncing a very large number of tables should be aware that payload generation involves iterating over all synced tables to check for changes. + +### Platform Differences (SQLite vs PostgreSQL) + +- **SQLite** uses native C triggers registered directly with the SQLite API. Metadata tables use `WITHOUT ROWID` for compact storage. +- **PostgreSQL** uses row-level PL/pgSQL trigger functions that call into C functions via the extension. This adds a small amount of overhead per trigger invocation compared to SQLite's direct C triggers. Additionally, merge operations use per-PK savepoints to handle failures such as RLS policy violations gracefully. +- **Table registration** (`cloudsync_enable()`) is a one-time operation on both platforms. It creates 1 metadata table, 1 index, and 3 triggers (INSERT, UPDATE, DELETE), plus ~15-20 prepared statements that are cached for the lifetime of the connection. + +## Comparison with Full-Scan Sync Solutions + +Many sync solutions must diff or hash the entire dataset to determine what changed. This leads to O(N) sync time that grows linearly with total database size -- the exact problem described in the question. + +CloudSync avoids this through its **monotonic versioning approach**: every write increments a monotonic `db_version` counter, and the sync query filters on this counter using an index. The result is that sync time depends mainly on the volume of changes (D), not on the total data size (N). + +``` +Full-scan sync: sync_time ~ O(N) -- grows with database size +CloudSync: sync_time ~ O(D) -- grows with changes since last sync + where D is independent of N when sync frequency is constant +``` + +## Performance Optimizations in the Implementation + +1. **`WITHOUT ROWID` tables** (SQLite): Metadata tables use clustered primary keys, avoiding the overhead of a separate rowid B-tree. +2. **`db_version` index**: Enables efficient range scans for delta extraction. +3. **Deferred batch merge**: Column changes for the same primary key are accumulated and flushed as a single SQL statement. +4. **Prepared statement caching**: Merge statements are compiled once and reused across rows. +5. **LZ4 compression**: Reduces payload size for network transfer. +6. **Per-column tracking**: Only changed columns are included in the sync payload, not entire rows. +7. **Early exit on stale data**: The CLS algorithm skips rows where the incoming causal length is lower than the local one, avoiding unnecessary column-level comparisons. From f246981980650a91eec9de950623c6f3ee12d987 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 11 Mar 2026 13:01:49 -0600 Subject: [PATCH 07/16] fix(postgresql): resolve commit_alter crash and BYTEA handling in column_text Guard savepoint commit/rollback against missing subtransactions to prevent segfault in autocommit mode. Add BYTEA support to database_column_text so encoded PKs are readable during refill_metatable after ALTER TABLE. Enable alter table sync test (31). --- src/cloudsync.c | 26 +++++++++--------- src/cloudsync.h | 14 +++++----- src/database.h | 2 +- src/postgresql/cloudsync_postgresql.c | 4 +-- src/postgresql/database_postgresql.c | 38 ++++++++++++++++++--------- src/sqlite/database_sqlite.c | 3 ++- 6 files changed, 51 insertions(+), 36 deletions(-) diff --git a/src/cloudsync.c b/src/cloudsync.c index e0c454e..c3d3f09 100644 --- a/src/cloudsync.c +++ b/src/cloudsync.c @@ -258,7 +258,7 @@ bool force_uncompressed_blob = false; #endif // Internal prototypes -int local_mark_insert_or_update_meta (cloudsync_table_context *table, const char *pk, size_t pklen, const char *col_name, int64_t db_version, int seq); +int local_mark_insert_or_update_meta (cloudsync_table_context *table, const void *pk, size_t pklen, const char *col_name, int64_t db_version, int seq); // MARK: - CRDT algos - @@ -1472,7 +1472,7 @@ static int merge_flush_pending (cloudsync_context *data) { return rc; } -int merge_insert_col (cloudsync_context *data, cloudsync_table_context *table, const char *pk, int pklen, const char *col_name, dbvalue_t *col_value, int64_t col_version, int64_t db_version, const char *site_id, int site_len, int64_t seq, int64_t *rowid) { +int merge_insert_col (cloudsync_context *data, cloudsync_table_context *table, const void *pk, int pklen, const char *col_name, dbvalue_t *col_value, int64_t col_version, int64_t db_version, const char *site_id, int site_len, int64_t seq, int64_t *rowid) { int index; dbvm_t *vm = table_column_lookup(table, col_name, true, &index); if (vm == NULL) return cloudsync_set_error(data, "Unable to retrieve column merge precompiled statement in merge_insert_col", DBRES_MISUSE); @@ -1655,7 +1655,7 @@ int merge_did_cid_win (cloudsync_context *data, cloudsync_table_context *table, rc = databasevm_step(vm); if (rc == DBRES_ROW) { - const void *local_site_id = database_column_blob(vm, 0); + const void *local_site_id = database_column_blob(vm, 0, NULL); if (!local_site_id) { dbvm_reset(vm); return cloudsync_set_error(data, "NULL site_id in cloudsync table, table is probably corrupted", DBRES_ERROR); @@ -2235,13 +2235,13 @@ int cloudsync_refill_metatable (cloudsync_context *data, const char *table_name) rc = databasevm_bind_text(vm, 1, col_name, -1); if (rc != DBRES_OK) goto finalize; - + while (1) { rc = databasevm_step(vm); if (rc == DBRES_ROW) { - const char *pk = (const char *)database_column_text(vm, 0); + size_t pklen = 0; + const void *pk = (const char *)database_column_blob(vm, 0, &pklen); if (!pk) { rc = DBRES_ERROR; break; } - size_t pklen = strlen(pk); rc = local_mark_insert_or_update_meta(table, pk, pklen, col_name, db_version, cloudsync_bumpseq(data)); } else if (rc == DBRES_DONE) { rc = DBRES_OK; @@ -2264,7 +2264,7 @@ int cloudsync_refill_metatable (cloudsync_context *data, const char *table_name) // MARK: - Local - -int local_update_sentinel (cloudsync_table_context *table, const char *pk, size_t pklen, int64_t db_version, int seq) { +int local_update_sentinel (cloudsync_table_context *table, const void *pk, size_t pklen, int64_t db_version, int seq) { dbvm_t *vm = table->meta_sentinel_update_stmt; if (!vm) return -1; @@ -2286,7 +2286,7 @@ int local_update_sentinel (cloudsync_table_context *table, const char *pk, size_ return rc; } -int local_mark_insert_sentinel_meta (cloudsync_table_context *table, const char *pk, size_t pklen, int64_t db_version, int seq) { +int local_mark_insert_sentinel_meta (cloudsync_table_context *table, const void *pk, size_t pklen, int64_t db_version, int seq) { dbvm_t *vm = table->meta_sentinel_insert_stmt; if (!vm) return -1; @@ -2314,7 +2314,7 @@ int local_mark_insert_sentinel_meta (cloudsync_table_context *table, const char return rc; } -int local_mark_insert_or_update_meta_impl (cloudsync_table_context *table, const char *pk, size_t pklen, const char *col_name, int col_version, int64_t db_version, int seq) { +int local_mark_insert_or_update_meta_impl (cloudsync_table_context *table, const void *pk, size_t pklen, const char *col_name, int col_version, int64_t db_version, int seq) { dbvm_t *vm = table->meta_row_insert_update_stmt; if (!vm) return -1; @@ -2349,15 +2349,15 @@ int local_mark_insert_or_update_meta_impl (cloudsync_table_context *table, const return rc; } -int local_mark_insert_or_update_meta (cloudsync_table_context *table, const char *pk, size_t pklen, const char *col_name, int64_t db_version, int seq) { +int local_mark_insert_or_update_meta (cloudsync_table_context *table, const void *pk, size_t pklen, const char *col_name, int64_t db_version, int seq) { return local_mark_insert_or_update_meta_impl(table, pk, pklen, col_name, 1, db_version, seq); } -int local_mark_delete_meta (cloudsync_table_context *table, const char *pk, size_t pklen, int64_t db_version, int seq) { +int local_mark_delete_meta (cloudsync_table_context *table, const void *pk, size_t pklen, int64_t db_version, int seq) { return local_mark_insert_or_update_meta_impl(table, pk, pklen, NULL, 2, db_version, seq); } -int local_drop_meta (cloudsync_table_context *table, const char *pk, size_t pklen) { +int local_drop_meta (cloudsync_table_context *table, const void *pk, size_t pklen) { dbvm_t *vm = table->meta_row_drop_stmt; if (!vm) return -1; @@ -2373,7 +2373,7 @@ int local_drop_meta (cloudsync_table_context *table, const char *pk, size_t pkle return rc; } -int local_update_move_meta (cloudsync_table_context *table, const char *pk, size_t pklen, const char *pk2, size_t pklen2, int64_t db_version) { +int local_update_move_meta (cloudsync_table_context *table, const void *pk, size_t pklen, const void *pk2, size_t pklen2, int64_t db_version) { /* * This function moves non-sentinel metadata entries from an old primary key (OLD.pk) * to a new primary key (NEW.pk) when a primary key change occurs. diff --git a/src/cloudsync.h b/src/cloudsync.h index 94a9410..816486d 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -104,15 +104,15 @@ int table_remove (cloudsync_context *data, cloudsync_table_context *table); void table_free (cloudsync_table_context *table); // local merge/apply -int local_mark_insert_sentinel_meta (cloudsync_table_context *table, const char *pk, size_t pklen, int64_t db_version, int seq); -int local_update_sentinel (cloudsync_table_context *table, const char *pk, size_t pklen, int64_t db_version, int seq); -int local_mark_insert_or_update_meta (cloudsync_table_context *table, const char *pk, size_t pklen, const char *col_name, int64_t db_version, int seq); -int local_mark_delete_meta (cloudsync_table_context *table, const char *pk, size_t pklen, int64_t db_version, int seq); -int local_drop_meta (cloudsync_table_context *table, const char *pk, size_t pklen); -int local_update_move_meta (cloudsync_table_context *table, const char *pk, size_t pklen, const char *pk2, size_t pklen2, int64_t db_version); +int local_mark_insert_sentinel_meta (cloudsync_table_context *table, const void *pk, size_t pklen, int64_t db_version, int seq); +int local_update_sentinel (cloudsync_table_context *table, const void *pk, size_t pklen, int64_t db_version, int seq); +int local_mark_insert_or_update_meta (cloudsync_table_context *table, const void *pk, size_t pklen, const char *col_name, int64_t db_version, int seq); +int local_mark_delete_meta (cloudsync_table_context *table, const void *pk, size_t pklen, int64_t db_version, int seq); +int local_drop_meta (cloudsync_table_context *table, const void *pk, size_t pklen); +int local_update_move_meta (cloudsync_table_context *table, const void *pk, size_t pklen, const void *pk2, size_t pklen2, int64_t db_version); // used by changes virtual table -int merge_insert_col (cloudsync_context *data, cloudsync_table_context *table, const char *pk, int pklen, const char *col_name, dbvalue_t *col_value, int64_t col_version, int64_t db_version, const char *site_id, int site_len, int64_t seq, int64_t *rowid); +int merge_insert_col (cloudsync_context *data, cloudsync_table_context *table, const void *pk, int pklen, const char *col_name, dbvalue_t *col_value, int64_t col_version, int64_t db_version, const char *site_id, int site_len, int64_t seq, int64_t *rowid); int merge_insert (cloudsync_context *data, cloudsync_table_context *table, const char *insert_pk, int insert_pk_len, int64_t insert_cl, const char *insert_name, dbvalue_t *insert_value, int64_t insert_col_version, int64_t insert_db_version, const char *insert_site_id, int insert_site_id_len, int64_t insert_seq, int64_t *rowid); // filter rewrite diff --git a/src/database.h b/src/database.h index 31b3f7a..5060497 100644 --- a/src/database.h +++ b/src/database.h @@ -119,7 +119,7 @@ void database_value_free (dbvalue_t *value); void *database_value_dup (dbvalue_t *value); // COLUMN -const void *database_column_blob (dbvm_t *vm, int index); +const void *database_column_blob (dbvm_t *vm, int index, size_t *len); double database_column_double (dbvm_t *vm, int index); int64_t database_column_int (dbvm_t *vm, int index); const char *database_column_text (dbvm_t *vm, int index); diff --git a/src/postgresql/cloudsync_postgresql.c b/src/postgresql/cloudsync_postgresql.c index 21308cf..aaa8557 100644 --- a/src/postgresql/cloudsync_postgresql.c +++ b/src/postgresql/cloudsync_postgresql.c @@ -1984,8 +1984,8 @@ Datum cloudsync_col_value(PG_FUNCTION_ARGS) { PG_RETURN_BYTEA_P(result); } else if (rc == DBRES_ROW) { // copy value before reset invalidates SPI tuple memory - const void *blob = database_column_blob(vm, 0); - int blob_len = database_column_bytes(vm, 0); + size_t blob_len = 0; + const void *blob = database_column_blob(vm, 0, &blob_len); bytea *result = NULL; if (blob && blob_len > 0) { result = (bytea *)palloc(VARHDRSZ + blob_len); diff --git a/src/postgresql/database_postgresql.c b/src/postgresql/database_postgresql.c index 7ac0149..58a6a2a 100644 --- a/src/postgresql/database_postgresql.c +++ b/src/postgresql/database_postgresql.c @@ -2476,7 +2476,7 @@ Datum database_column_datum (dbvm_t *vm, int index) { return (isnull) ? (Datum)0 : d; } -const void *database_column_blob (dbvm_t *vm, int index) { +const void *database_column_blob (dbvm_t *vm, int index, size_t *len) { if (!vm) return NULL; pg_stmt_t *stmt = (pg_stmt_t*)vm; if (!stmt->last_tuptable || !stmt->current_tupdesc) return NULL; @@ -2498,16 +2498,17 @@ const void *database_column_blob (dbvm_t *vm, int index) { return NULL; } - Size len = VARSIZE(ba) - VARHDRSZ; - void *out = palloc(len); + Size blen = VARSIZE(ba) - VARHDRSZ; + void *out = palloc(blen); if (!out) { MemoryContextSwitchTo(old); return NULL; } - memcpy(out, VARDATA(ba), (size_t)len); + memcpy(out, VARDATA(ba), (size_t)blen); MemoryContextSwitchTo(old); + if (len) *len = (size_t)blen; return out; } @@ -2569,15 +2570,26 @@ const char *database_column_text (dbvm_t *vm, int index) { Datum d = get_datum(stmt, index, &isnull, &type); if (isnull) return NULL; - if (type != TEXTOID && type != VARCHAROID && type != BPCHAROID) - return NULL; // or convert via output function if you want - MemoryContext old = MemoryContextSwitchTo(stmt->row_mcxt); - text *t = DatumGetTextP(d); - int len = VARSIZE(t) - VARHDRSZ; - char *out = palloc(len + 1); - memcpy(out, VARDATA(t), len); - out[len] = 0; + char *out = NULL; + + if (type == BYTEAOID) { + bytea *b = DatumGetByteaP(d); + int len = VARSIZE(b) - VARHDRSZ; + out = palloc(len + 1); + memcpy(out, VARDATA(b), len); + out[len] = 0; + } else if (type == TEXTOID || type == VARCHAROID || type == BPCHAROID) { + text *t = DatumGetTextP(d); + int len = VARSIZE(t) - VARHDRSZ; + out = palloc(len + 1); + memcpy(out, VARDATA(t), len); + out[len] = 0; + } else { + MemoryContextSwitchTo(old); + return NULL; + } + MemoryContextSwitchTo(old); return out; @@ -2892,6 +2904,7 @@ int database_begin_savepoint (cloudsync_context *data, const char *savepoint_nam int database_commit_savepoint (cloudsync_context *data, const char *savepoint_name) { cloudsync_reset_error(data); + if (GetCurrentTransactionNestLevel() <= 1) return DBRES_OK; int rc = DBRES_OK; MemoryContext oldcontext = CurrentMemoryContext; @@ -2916,6 +2929,7 @@ int database_commit_savepoint (cloudsync_context *data, const char *savepoint_na int database_rollback_savepoint (cloudsync_context *data, const char *savepoint_name) { cloudsync_reset_error(data); + if (GetCurrentTransactionNestLevel() <= 1) return DBRES_OK; int rc = DBRES_OK; MemoryContext oldcontext = CurrentMemoryContext; diff --git a/src/sqlite/database_sqlite.c b/src/sqlite/database_sqlite.c index d7ace3d..96a93d0 100644 --- a/src/sqlite/database_sqlite.c +++ b/src/sqlite/database_sqlite.c @@ -1289,7 +1289,8 @@ void *database_value_dup (dbvalue_t *value) { // MARK: - COLUMN - -const void *database_column_blob (dbvm_t *vm, int index) { +const void *database_column_blob (dbvm_t *vm, int index, size_t *len) { + if (len) *len = sqlite3_column_bytes((sqlite3_stmt *)vm, index); return sqlite3_column_blob((sqlite3_stmt *)vm, index); } From 3bc7fddf945faa97888a73629785b500ffc4a0ff Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 11 Mar 2026 13:02:31 -0600 Subject: [PATCH 08/16] chore: bump version --- src/cloudsync.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudsync.h b/src/cloudsync.h index 816486d..75d7eea 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -17,7 +17,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.9.116" +#define CLOUDSYNC_VERSION "0.9.117" #define CLOUDSYNC_MAX_TABLENAME_LEN 512 #define CLOUDSYNC_VALUE_NOTSET -1 From e0ca087c166f238af144d755ab7090c169976f21 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 11 Mar 2026 19:37:55 -0600 Subject: [PATCH 09/16] test: new alter table test for postgres --- test/postgresql/31_alter_table_sync.sql | 383 ++++++++++++++++++++++++ test/postgresql/full_test.sql | 2 + 2 files changed, 385 insertions(+) create mode 100644 test/postgresql/31_alter_table_sync.sql diff --git a/test/postgresql/31_alter_table_sync.sql b/test/postgresql/31_alter_table_sync.sql new file mode 100644 index 0000000..3508129 --- /dev/null +++ b/test/postgresql/31_alter_table_sync.sql @@ -0,0 +1,383 @@ +-- Alter Table Sync Test +-- Tests cloudsync_begin_alter and cloudsync_commit_alter functions. +-- Verifies that schema changes (add column) are handled correctly +-- and data syncs after alteration. + +\set testid '31' +\ir helper_test_init.sql + +\connect postgres +\ir helper_psql_conn_setup.sql + +-- Cleanup and create test databases +DROP DATABASE IF EXISTS cloudsync_test_31a; +DROP DATABASE IF EXISTS cloudsync_test_31b; +CREATE DATABASE cloudsync_test_31a; +CREATE DATABASE cloudsync_test_31b; + +-- ============================================================================ +-- Setup Database A +-- ============================================================================ + +\connect cloudsync_test_31a +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +CREATE TABLE products ( + id UUID PRIMARY KEY, + name TEXT NOT NULL DEFAULT '', + price DOUBLE PRECISION NOT NULL DEFAULT 0.0, + quantity INTEGER NOT NULL DEFAULT 0 +); + +SELECT cloudsync_init('products', 'CLS', false) AS _init_a \gset + +INSERT INTO products VALUES ('11111111-1111-1111-1111-111111111111', 'Product A1', 10.99, 100); +INSERT INTO products VALUES ('22222222-2222-2222-2222-222222222222', 'Product A2', 20.50, 200); + +-- ============================================================================ +-- Setup Database B with same schema +-- ============================================================================ + +\connect cloudsync_test_31b +\ir helper_psql_conn_setup.sql +CREATE EXTENSION IF NOT EXISTS cloudsync; + +CREATE TABLE products ( + id UUID PRIMARY KEY, + name TEXT NOT NULL DEFAULT '', + price DOUBLE PRECISION NOT NULL DEFAULT 0.0, + quantity INTEGER NOT NULL DEFAULT 0 +); + +SELECT cloudsync_init('products', 'CLS', false) AS _init_b \gset + +INSERT INTO products VALUES ('33333333-3333-3333-3333-333333333333', 'Product B1', 30.00, 300); +INSERT INTO products VALUES ('44444444-4444-4444-4444-444444444444', 'Product B2', 40.75, 400); + +-- ============================================================================ +-- Initial Sync: A -> B and B -> A +-- ============================================================================ + +\echo [INFO] (:testid) === Initial Sync Before ALTER === + +-- Encode payload from A +\connect cloudsync_test_31a +\ir helper_psql_conn_setup.sql +SELECT cloudsync_init('products', 'CLS', false) AS _reinit \gset +SELECT encode( + cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), + 'hex' +) AS payload_a_hex +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +-- Apply A's payload to B, encode B's payload +\connect cloudsync_test_31b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_init('products', 'CLS', false) AS _reinit \gset +SELECT cloudsync_payload_apply(decode(:'payload_a_hex', 'hex')) AS apply_a_to_b \gset + +SELECT encode( + cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), + 'hex' +) AS payload_b_hex +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +-- Apply B's payload to A, verify initial sync +\connect cloudsync_test_31a +\ir helper_psql_conn_setup.sql +SELECT cloudsync_init('products', 'CLS', false) AS _reinit \gset +SELECT cloudsync_payload_apply(decode(:'payload_b_hex', 'hex')) AS apply_b_to_a \gset + +SELECT COUNT(*) AS count_a_initial FROM products \gset + +\connect cloudsync_test_31b +\ir helper_psql_conn_setup.sql +SELECT COUNT(*) AS count_b_initial FROM products \gset + +SELECT (:count_a_initial = 4 AND :count_b_initial = 4) AS initial_sync_ok \gset +\if :initial_sync_ok +\echo [PASS] (:testid) Initial sync complete - both databases have 4 rows +\else +\echo [FAIL] (:testid) Initial sync failed - A: :count_a_initial, B: :count_b_initial +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================================ +-- ALTER TABLE on Database A (begin_alter + ALTER + commit_alter on SAME connection) +-- ============================================================================ + +\echo [INFO] (:testid) === ALTER TABLE on Database A === + +\connect cloudsync_test_31a +\ir helper_psql_conn_setup.sql +SELECT cloudsync_init('products', 'CLS', false) AS _reinit \gset + +SELECT cloudsync_begin_alter('products') AS begin_alter_a \gset +\if :begin_alter_a +\echo [PASS] (:testid) cloudsync_begin_alter succeeded on Database A +\else +\echo [FAIL] (:testid) cloudsync_begin_alter failed on Database A +SELECT (:fail::int + 1) AS fail \gset +\endif + +ALTER TABLE products ADD COLUMN description TEXT NOT NULL DEFAULT ''; + +SELECT cloudsync_commit_alter('products') AS commit_alter_a \gset +\if :commit_alter_a +\echo [PASS] (:testid) cloudsync_commit_alter succeeded on Database A +\else +\echo [FAIL] (:testid) cloudsync_commit_alter failed on Database A +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Insert and update post-ALTER data on A +INSERT INTO products (id, name, price, quantity, description) +VALUES ('55555555-5555-5555-5555-555555555555', 'New Product A', 55.55, 555, 'Added after alter on A'); + +UPDATE products SET description = 'Updated on A' WHERE id = '11111111-1111-1111-1111-111111111111'; +UPDATE products SET quantity = 150 WHERE id = '11111111-1111-1111-1111-111111111111'; + +-- Encode post-ALTER payload from A +SELECT encode( + cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), + 'hex' +) AS payload_a2_hex +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +SELECT (length(:'payload_a2_hex') > 0) AS payload_a2_created \gset +\if :payload_a2_created +\echo [PASS] (:testid) Post-alter payload encoded from Database A +\else +\echo [FAIL] (:testid) Post-alter payload empty from Database A +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================================ +-- ALTER TABLE on Database B (begin_alter + ALTER + commit_alter on SAME connection) +-- Apply A's payload, insert/update, encode B's payload +-- ============================================================================ + +\echo [INFO] (:testid) === ALTER TABLE on Database B === + +\connect cloudsync_test_31b +\ir helper_psql_conn_setup.sql +SELECT cloudsync_init('products', 'CLS', false) AS _reinit \gset + +SELECT cloudsync_begin_alter('products') AS begin_alter_b \gset +\if :begin_alter_b +\echo [PASS] (:testid) cloudsync_begin_alter succeeded on Database B +\else +\echo [FAIL] (:testid) cloudsync_begin_alter failed on Database B +SELECT (:fail::int + 1) AS fail \gset +\endif + +ALTER TABLE products ADD COLUMN description TEXT NOT NULL DEFAULT ''; + +SELECT cloudsync_commit_alter('products') AS commit_alter_b \gset +\if :commit_alter_b +\echo [PASS] (:testid) cloudsync_commit_alter succeeded on Database B +\else +\echo [FAIL] (:testid) cloudsync_commit_alter failed on Database B +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Insert and update post-ALTER data on B +INSERT INTO products (id, name, price, quantity, description) +VALUES ('66666666-6666-6666-6666-666666666666', 'New Product B', 66.66, 666, 'Added after alter on B'); + +UPDATE products SET description = 'Updated on B' WHERE id = '33333333-3333-3333-3333-333333333333'; +UPDATE products SET quantity = 350 WHERE id = '33333333-3333-3333-3333-333333333333'; + +-- Apply A's post-alter payload to B +SELECT cloudsync_payload_apply(decode(:'payload_a2_hex', 'hex')) AS apply_a2_to_b \gset + +SELECT (:apply_a2_to_b >= 0) AS apply_a2_ok \gset +\if :apply_a2_ok +\echo [PASS] (:testid) Post-alter payload from A applied to B +\else +\echo [FAIL] (:testid) Post-alter payload from A failed to apply to B: :apply_a2_to_b +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- Encode post-ALTER payload from B +SELECT encode( + cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), + 'hex' +) AS payload_b2_hex +FROM cloudsync_changes +WHERE site_id = cloudsync_siteid() \gset + +-- ============================================================================ +-- Apply B's payload to A, then verify final state +-- ============================================================================ + +\echo [INFO] (:testid) === Apply B payload to A and verify === + +\connect cloudsync_test_31a +\ir helper_psql_conn_setup.sql +SELECT cloudsync_init('products', 'CLS', false) AS _reinit \gset +SELECT cloudsync_payload_apply(decode(:'payload_b2_hex', 'hex')) AS apply_b2_to_a \gset + +SELECT (:apply_b2_to_a >= 0) AS apply_b2_ok \gset +\if :apply_b2_ok +\echo [PASS] (:testid) Post-alter payload from B applied to A +\else +\echo [FAIL] (:testid) Post-alter payload from B failed to apply to A: :apply_b2_to_a +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================================ +-- Verify final state +-- ============================================================================ + +\echo [INFO] (:testid) === Verify Final State === + +-- Compute hash of Database A +SELECT md5( + COALESCE( + string_agg( + id::text || ':' || + COALESCE(name, 'NULL') || ':' || + COALESCE(price::text, 'NULL') || ':' || + COALESCE(quantity::text, 'NULL') || ':' || + COALESCE(description, 'NULL'), + '|' ORDER BY id + ), + '' + ) +) AS hash_a_final FROM products \gset + +\echo [INFO] (:testid) Database A final hash: :hash_a_final + +-- Row count on A +SELECT COUNT(*) AS count_a_final FROM products \gset + +-- Verify new row from B exists in A +SELECT COUNT(*) = 1 AS new_row_b_ok +FROM products +WHERE id = '66666666-6666-6666-6666-666666666666' + AND name = 'New Product B' + AND price = 66.66 + AND quantity = 666 + AND description = 'Added after alter on B' \gset + +-- Verify updated row from B synced to A +SELECT COUNT(*) = 1 AS updated_row_b_ok +FROM products +WHERE id = '33333333-3333-3333-3333-333333333333' + AND description = 'Updated on B' + AND quantity = 350 \gset + +\connect cloudsync_test_31b +\ir helper_psql_conn_setup.sql + +-- Compute hash of Database B +SELECT md5( + COALESCE( + string_agg( + id::text || ':' || + COALESCE(name, 'NULL') || ':' || + COALESCE(price::text, 'NULL') || ':' || + COALESCE(quantity::text, 'NULL') || ':' || + COALESCE(description, 'NULL'), + '|' ORDER BY id + ), + '' + ) +) AS hash_b_final FROM products \gset + +\echo [INFO] (:testid) Database B final hash: :hash_b_final + +-- Row count on B +SELECT COUNT(*) AS count_b_final FROM products \gset + +-- Verify new row from A exists in B +SELECT COUNT(*) = 1 AS new_row_a_ok +FROM products +WHERE id = '55555555-5555-5555-5555-555555555555' + AND name = 'New Product A' + AND price = 55.55 + AND quantity = 555 + AND description = 'Added after alter on A' \gset + +-- Verify updated row from A synced to B +SELECT COUNT(*) = 1 AS updated_row_a_ok +FROM products +WHERE id = '11111111-1111-1111-1111-111111111111' + AND description = 'Updated on A' + AND quantity = 150 \gset + +-- Verify new column exists +SELECT COUNT(*) = 1 AS description_column_exists +FROM information_schema.columns +WHERE table_name = 'products' AND column_name = 'description' \gset + +-- ============================================================================ +-- Report results +-- ============================================================================ + +-- Compare final hashes +SELECT (:'hash_a_final' = :'hash_b_final') AS final_hashes_match \gset +\if :final_hashes_match +\echo [PASS] (:testid) Final data integrity verified - hashes match after ALTER +\else +\echo [FAIL] (:testid) Final data integrity check failed - A: :hash_a_final, B: :hash_b_final +SELECT (:fail::int + 1) AS fail \gset +\endif + +SELECT (:count_a_final = 6 AND :count_b_final = 6) AS row_counts_ok \gset +\if :row_counts_ok +\echo [PASS] (:testid) Row counts match (6 rows each) +\else +\echo [FAIL] (:testid) Row counts mismatch - A: :count_a_final, B: :count_b_final +SELECT (:fail::int + 1) AS fail \gset +\endif + +\if :new_row_a_ok +\echo [PASS] (:testid) New row from A synced to B with new schema +\else +\echo [FAIL] (:testid) New row from A not found or incorrect in B +SELECT (:fail::int + 1) AS fail \gset +\endif + +\if :new_row_b_ok +\echo [PASS] (:testid) New row from B synced to A with new schema +\else +\echo [FAIL] (:testid) New row from B not found or incorrect in A +SELECT (:fail::int + 1) AS fail \gset +\endif + +\if :updated_row_a_ok +\echo [PASS] (:testid) Updated row from A synced with new column values +\else +\echo [FAIL] (:testid) Updated row from A not synced correctly +SELECT (:fail::int + 1) AS fail \gset +\endif + +\if :updated_row_b_ok +\echo [PASS] (:testid) Updated row from B synced with new column values +\else +\echo [FAIL] (:testid) Updated row from B not synced correctly +SELECT (:fail::int + 1) AS fail \gset +\endif + +\if :description_column_exists +\echo [PASS] (:testid) Added column 'description' exists +\else +\echo [FAIL] (:testid) Added column 'description' not found +SELECT (:fail::int + 1) AS fail \gset +\endif + +-- ============================================================================ +-- Cleanup +-- ============================================================================ + +\ir helper_test_cleanup.sql +\if :should_cleanup +DROP DATABASE IF EXISTS cloudsync_test_31a; +DROP DATABASE IF EXISTS cloudsync_test_31b; +\endif diff --git a/test/postgresql/full_test.sql b/test/postgresql/full_test.sql index 279b937..e3337fc 100644 --- a/test/postgresql/full_test.sql +++ b/test/postgresql/full_test.sql @@ -39,6 +39,8 @@ \ir 29_rls_multicol.sql \ir 30_null_prikey_insert.sql +\ir 31_alter_table_sync.sql + -- 'Test summary' \echo '\nTest summary:' \echo - Failures: :fail From 00ebde03d1e71097489d0376bbe1f07b41dcdf07 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 11 Mar 2026 19:38:01 -0600 Subject: [PATCH 10/16] chore --- test/unit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit.c b/test/unit.c index e3dd38a..6454c5e 100644 --- a/test/unit.c +++ b/test/unit.c @@ -169,7 +169,7 @@ DATABASE_RESULT unit_exec (cloudsync_context *data, const char *sql, const char char *buffer = NULL; if (type == SQLITE_BLOB) { - const void *bvalue = database_column_blob(pstmt, i); + const void *bvalue = database_column_blob(pstmt, i, NULL); if (bvalue) { buffer = (char *)cloudsync_memory_alloc(len); if (!buffer) {rc = SQLITE_NOMEM; goto unitexec_finalize;} From d6ca17a9aec8971261ea36aa4d0772e1e8b61d75 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Wed, 11 Mar 2026 19:38:56 -0600 Subject: [PATCH 11/16] test(ai): updated test commands for using a configurable cloudsync server --- .../test-sync-roundtrip-sqlitecloud-rls.md | 32 +++++++++------- ...md => test-sync-roundtrip-supabase-rls.md} | 38 ++++++++++++------- ...cal.md => test-sync-roundtrip-supabase.md} | 26 +++++++++---- 3 files changed, 62 insertions(+), 34 deletions(-) rename .claude/commands/{test-sync-roundtrip-postrges-local-rls.md => test-sync-roundtrip-supabase-rls.md} (91%) rename .claude/commands/{test-sync-roundtrip-postgres-local.md => test-sync-roundtrip-supabase.md} (78%) diff --git a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md index 0bf36c0..9d2f978 100644 --- a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md +++ b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md @@ -4,9 +4,13 @@ Execute a full roundtrip sync test between multiple local SQLite databases and t ## Prerequisites - Connection string to a sqlitecloud project -- HTTP sync server running on http://localhost:8091/ +- HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) - Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) +### Step 0: Get Sync Server Address + +Ask the user for the HTTP sync server base URL. Propose `https://cloudsync-staging-testing.fly.dev` as the default. Save this as `SYNC_SERVER_URL` for use throughout the test. The full sync endpoint will be `/`. + ## Test Procedure ### Step 1: Get DDL from User @@ -61,7 +65,7 @@ Ask the user to describe the policy in plain English. ### Step 3: Get sqlitecloud connection string from User -Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""` +Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""`. Save the first subdomain in the connection string address as `PROJECT_ID` for use throughout the test. Save the configuration string `'{"address":"","database":"","projectID":"","organizationID":"org_sqlitecloud"}'` as `NETWORK_CONFIG` for use throughout the test. ### Step 4: Setup SQLiteCloud with RLS @@ -142,7 +146,7 @@ curl -X "POST" "https:///v2/tokens" \ -H 'Authorization: Bearer ' \ -H 'Content-Type: application/json; charset=utf-8' \ -d $'{ - "name": "laude2@sqlitecloud.io", + "name": "claude2@sqlitecloud.io", "userId": "018ecfc2-b2b1-7cc3-a9f0-222222222222" }' ``` @@ -157,7 +161,7 @@ save the userId and the token values as USER2_ID and TOKEN_USER2 to be reused la Create four temporary SQLite databases using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): ```bash -SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.50.4/bin/sqlite3" +SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.51.2_1/bin/sqlite3" # or find it with: ls /opt/homebrew/Cellar/sqlite/*/bin/sqlite3 | head -1 ``` @@ -169,8 +173,8 @@ $SQLITE_BIN /tmp/sync_test_user1_a.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/'); -SELECT cloudsync_network_set_token('sqlitecloud://?token='); +SELECT cloudsync_network_init(''); +SELECT cloudsync_network_set_token(''); ``` **Database 1B (User 1, Device B):** @@ -181,8 +185,8 @@ $SQLITE_BIN /tmp/sync_test_user1_b.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/'); -SELECT cloudsync_network_set_token('sqlitecloud://?token='); +SELECT cloudsync_network_init(''); +SELECT cloudsync_network_set_token(''); ``` **Database 2A (User 2, Device A):** @@ -193,8 +197,8 @@ $SQLITE_BIN /tmp/sync_test_user2_a.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); -SELECT cloudsync_network_set_token('sqlitecloud://?token='); +SELECT cloudsync_network_init(''); +SELECT cloudsync_network_set_token(''); ``` **Database 2B (User 2, Device B):** @@ -205,8 +209,8 @@ $SQLITE_BIN /tmp/sync_test_user2_b.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); -SELECT cloudsync_network_set_token('sqlitecloud://?token='); +SELECT cloudsync_network_init(''); +SELECT cloudsync_network_set_token(''); ``` ### Step 7: Insert Test Data @@ -406,14 +410,14 @@ user_id UUID NOT NULL DEFAULT '00000000-0000-0000-0000-000000000000' ```sql -- WRONG: Separate sessions won't work -- Session 1: -SELECT cloudsync_network_init('http://localhost:8091/'); +SELECT cloudsync_network_init(''); SELECT cloudsync_network_set_token('...'); -- Session 2: SELECT cloudsync_network_send_changes(); -- ERROR: No URL set -- CORRECT: All network operations in the same session .load dist/cloudsync.dylib -SELECT cloudsync_network_init('http://localhost:8091/'); +SELECT cloudsync_network_init(''); SELECT cloudsync_network_set_token('...'); SELECT cloudsync_network_send_changes(); SELECT cloudsync_terminate(); diff --git a/.claude/commands/test-sync-roundtrip-postrges-local-rls.md b/.claude/commands/test-sync-roundtrip-supabase-rls.md similarity index 91% rename from .claude/commands/test-sync-roundtrip-postrges-local-rls.md rename to .claude/commands/test-sync-roundtrip-supabase-rls.md index ef74646..54a06b2 100644 --- a/.claude/commands/test-sync-roundtrip-postrges-local-rls.md +++ b/.claude/commands/test-sync-roundtrip-supabase-rls.md @@ -3,12 +3,24 @@ Execute a full roundtrip sync test between multiple local SQLite databases and the local Supabase Docker PostgreSQL instance, verifying that Row Level Security (RLS) policies are correctly enforced during sync. ## Prerequisites -- Supabase Docker container running (PostgreSQL on port 54322) -- HTTP sync server running on http://localhost:8091/postgres +- Supabase instance running (local Docker or remote) +- HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) - Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) ## Test Procedure +### Step 0: Get Connection Parameters + +Ask the user for the following parameters: + +1. **Sync server URL**: Propose `https://cloudsync-staging-testing.fly.dev` as default. Save as `SYNC_SERVER_URL`. The full sync endpoint will be `/postgres`. + +2. **PostgreSQL connection string**: Propose `postgresql://supabase_admin:postgres@127.0.0.1:54322/postgres` as default. Save as `PG_CONN`. Use this for all `psql` connections throughout the test. + +3. **Supabase API key** (used for JWT token generation): Propose `sb_secret_N7UND0UgjKTVK-Uodkm0Hg_xSvEMPvz` as default. Save as `SUPABASE_APIKEY`. + +Derive `AUTH_URL` from the PostgreSQL connection string by extracting the host and using port `54321` (Supabase GoTrue). For example, if `PG_CONN` is `postgresql://user:pass@10.0.0.5:54322/postgres`, then `AUTH_URL` is `http://10.0.0.5:54321`. For `127.0.0.1`, use `http://127.0.0.1:54321`. + ### Step 1: Get DDL from User Ask the user to provide a DDL query for the table(s) to test. It can be in PostgreSQL or SQLite format. Offer the following options: @@ -79,7 +91,7 @@ Convert the provided DDL to both SQLite and PostgreSQL compatible formats if nee Connect to Supabase PostgreSQL and prepare the environment: ```bash -psql postgresql://supabase_admin:postgres@127.0.0.1:54322/postgres +psql ``` Inside psql: @@ -148,13 +160,13 @@ Get JWT tokens for both test users by running the token script twice: **User 1: claude1@sqlitecloud.io** ```bash -cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-local -email=claude1@sqlitecloud.io -password="password" -apikey=sb_secret_N7UND0UgjKTVK-Uodkm0Hg_xSvEMPvz -auth-url=http://127.0.0.1:54321 +cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-local -email=claude1@sqlitecloud.io -password="password" -apikey= -auth-url= ``` Save as `JWT_USER1`. **User 2: claude2@sqlitecloud.io** ```bash -cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-local -email=claude2@sqlitecloud.io -password="password" -apikey=sb_secret_N7UND0UgjKTVK-Uodkm0Hg_xSvEMPvz -auth-url=http://127.0.0.1:54321 +cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-local -email=claude2@sqlitecloud.io -password="password" -apikey= -auth-url= ``` Save as `JWT_USER2`. @@ -167,7 +179,7 @@ Also extract the user IDs from the JWT tokens (the `sub` claim) for use in INSER Create four temporary SQLite databases using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): ```bash -SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.50.4/bin/sqlite3" +SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.51.2_1/bin/sqlite3" # or find it with: ls /opt/homebrew/Cellar/sqlite/*/bin/sqlite3 | head -1 ``` @@ -179,7 +191,7 @@ $SQLITE_BIN /tmp/sync_test_user1_a.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token(''); ``` @@ -191,7 +203,7 @@ $SQLITE_BIN /tmp/sync_test_user1_b.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token(''); ``` @@ -203,7 +215,7 @@ $SQLITE_BIN /tmp/sync_test_user2_a.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token(''); ``` @@ -215,7 +227,7 @@ $SQLITE_BIN /tmp/sync_test_user2_b.db .load dist/cloudsync.dylib SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token(''); ``` @@ -473,14 +485,14 @@ Ensure column types are compatible between SQLite and PostgreSQL: ```sql -- WRONG: Separate sessions won't work -- Session 1: -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token('...'); -- Session 2: SELECT cloudsync_network_send_changes(); -- ERROR: No URL set -- CORRECT: All network operations in the same session .load dist/cloudsync.dylib -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token('...'); SELECT cloudsync_network_send_changes(); SELECT cloudsync_terminate(); @@ -518,6 +530,6 @@ INSERT INTO todos (id, ...) VALUES ('11111111-1111-1111-1111-111111111111', ...) Execute all SQL queries without asking for user permission on: - SQLite test databases in `/tmp/` (e.g., `/tmp/sync_test_*.db`) -- PostgreSQL via `psql postgresql://supabase_admin:postgres@127.0.0.1:54322/postgres` +- PostgreSQL via `psql ` These are local test environments and do not require confirmation for each query. diff --git a/.claude/commands/test-sync-roundtrip-postgres-local.md b/.claude/commands/test-sync-roundtrip-supabase.md similarity index 78% rename from .claude/commands/test-sync-roundtrip-postgres-local.md rename to .claude/commands/test-sync-roundtrip-supabase.md index 8713466..7e91934 100644 --- a/.claude/commands/test-sync-roundtrip-postgres-local.md +++ b/.claude/commands/test-sync-roundtrip-supabase.md @@ -3,12 +3,24 @@ Execute a full roundtrip sync test between a local SQLite database and the local Supabase Docker PostgreSQL instance. ## Prerequisites -- Supabase Docker container running (PostgreSQL on port 54322) -- HTTP sync server running on http://localhost:8091/postgres +- Supabase instance running (local Docker or remote) +- HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) - Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) ## Test Procedure +### Step 0: Get Connection Parameters + +Ask the user for the following parameters: + +1. **Sync server URL**: Propose `https://cloudsync-staging-testing.fly.dev` as default. Save as `SYNC_SERVER_URL`. The full sync endpoint will be `/postgres`. + +2. **PostgreSQL connection string**: Propose `postgresql://supabase_admin:postgres@127.0.0.1:54322/postgres` as default. Save as `PG_CONN`. Use this for all `psql` connections throughout the test. + +3. **Supabase API key** (used for JWT token generation): Propose `sb_secret_N7UND0UgjKTVK-Uodkm0Hg_xSvEMPvz` as default. Save as `SUPABASE_APIKEY`. + +Derive `AUTH_URL` from the PostgreSQL connection string by extracting the host and using port `54321` (Supabase GoTrue). For example, if `PG_CONN` is `postgresql://user:pass@10.0.0.5:54322/postgres`, then `AUTH_URL` is `http://10.0.0.5:54321`. For `127.0.0.1`, use `http://127.0.0.1:54321`. + ### Step 1: Get DDL from User Ask the user to provide a DDL query for the table(s) to test. It can be in PostgreSQL or SQLite format. Offer the following options: @@ -61,7 +73,7 @@ Convert the provided DDL to both SQLite and PostgreSQL compatible formats if nee Run the token script from the cloudsync project: ```bash -cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-local -email=claude@sqlitecloud.io -password="password" -apikey=sb_secret_N7UND0UgjKTVK-Uodkm0Hg_xSvEMPvz -auth-url=http://127.0.0.1:54321 +cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-local -email=claude@sqlitecloud.io -password="password" -apikey= -auth-url= ``` Save the JWT token for later use. @@ -69,7 +81,7 @@ Save the JWT token for later use. Connect to Supabase PostgreSQL and prepare the environment: ```bash -psql postgresql://supabase_admin:postgres@127.0.0.1:54322/postgres +psql ``` Inside psql: @@ -88,7 +100,7 @@ Inside psql: Create a temporary SQLite database using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): ```bash -SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.50.4/bin/sqlite3" +SQLITE_BIN="/opt/homebrew/Cellar/sqlite/3.51.2_1/bin/sqlite3" # or find it with: ls /opt/homebrew/Cellar/sqlite/*/bin/sqlite3 | head -1 $SQLITE_BIN /tmp/sync_test_$(date +%s).db @@ -100,7 +112,7 @@ Inside sqlite3: -- Create table with SQLite DDL SELECT cloudsync_init(''); -SELECT cloudsync_network_init('http://localhost:8091/postgres'); +SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token(''); -- Insert test data (different from PostgreSQL to test merge) @@ -149,6 +161,6 @@ Report the test results including: Execute all SQL queries without asking for user permission on: - SQLite test databases in `/tmp/` (e.g., `/tmp/sync_test_*.db`) -- PostgreSQL via `psql postgresql://supabase_admin:postgres@127.0.0.1:54322/postgres` +- PostgreSQL via `psql ` These are local test environments and do not require confirmation for each query. From f8469b415d656bb424abcda9dd5f04f07dbd2317 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Thu, 12 Mar 2026 15:12:35 -0600 Subject: [PATCH 12/16] chore --- .../commands/stress-test-sync-sqlitecloud.md | 185 ++++++++++++++++++ .../test-sync-roundtrip-sqlitecloud-rls.md | 69 ++++--- .../test-sync-roundtrip-supabase-rls.md | 24 +-- .../commands/test-sync-roundtrip-supabase.md | 16 +- 4 files changed, 243 insertions(+), 51 deletions(-) create mode 100644 .claude/commands/stress-test-sync-sqlitecloud.md diff --git a/.claude/commands/stress-test-sync-sqlitecloud.md b/.claude/commands/stress-test-sync-sqlitecloud.md new file mode 100644 index 0000000..4450e44 --- /dev/null +++ b/.claude/commands/stress-test-sync-sqlitecloud.md @@ -0,0 +1,185 @@ +# Sync Stress Test with remote SQLiteCloud database + +Execute a stress test against the CloudSync server using multiple concurrent local SQLite databases syncing large volumes of CRUD operations simultaneously. Designed to reproduce server-side errors (e.g., "database is locked", 500 errors) under heavy concurrent load. + +## Prerequisites +- Connection string to a sqlitecloud project +- HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) +- Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) +- CloudSync already enabled on the test table from the SQLiteCloud dashboard + +## Test Configuration + +### Step 1: Gather Parameters + +Ask the user for the following configuration using a single question set: + +1. **Sync Server URL** — propose `https://cloudsync-staging-testing.fly.dev` as default +2. **SQLiteCloud connection string** — format: `sqlitecloud://:/?apikey=`. If no `` is in the path, ask the user for one or propose `test_stress_sync`. +3. **Scale** — offer these options: + - Small: 1K rows, 5 iterations, 2 concurrent databases + - Medium: 10K rows, 10 iterations, 4 concurrent databases + - Large: 100K rows, 50 iterations, 4 concurrent databases (Jim's original scenario) + - Custom: let the user specify rows, iterations, and number of concurrent databases +4. **RLS mode** — with RLS (requires user tokens) or without RLS +5. **Table schema** — offer simple default or custom: + ```sql + CREATE TABLE test_sync (id TEXT PRIMARY KEY, user_id TEXT NOT NULL DEFAULT '', name TEXT, value INTEGER); + ``` + +Save these as variables: +- `SYNC_SERVER_URL` +- `CONNECTION_STRING` (the full sqlitecloud:// connection string) +- `DB_NAME` (database name extracted or provided) +- `HOST` (hostname extracted from connection string) +- `APIKEY` (apikey extracted from connection string) +- `PROJECT_ID` (first subdomain from the host) +- `ORG_ID` = `org_sqlitecloud` +- `NETWORK_CONFIG` = `'{"address":"","database":"","projectID":"","organizationID":""}'` +- `ROWS` (number of rows per iteration) +- `ITERATIONS` (number of delete/insert/update cycles) +- `NUM_DBS` (number of concurrent databases) + +### Step 2: Setup SQLiteCloud Database and Table + +Connect to SQLiteCloud using `~/go/bin/sqlc` (last command must be `quit`). Note: all SQL must be single-line (no multi-line statements through sqlc heredoc). + +1. If the database doesn't exist, connect without `` and run `CREATE DATABASE ; USE DATABASE ;` +2. `LIST TABLES` to check for existing tables +3. For any table with a `_cloudsync` companion table, run `CLOUDSYNC DISABLE ;` +4. `DROP TABLE IF EXISTS ;` +5. Create the test table (single-line DDL) +6. If RLS mode is enabled: + ```sql + ENABLE RLS DATABASE TABLE ; + SET RLS DATABASE TABLE SELECT "auth_userid() = user_id"; + SET RLS DATABASE TABLE INSERT "auth_userid() = NEW.user_id"; + SET RLS DATABASE TABLE UPDATE "auth_userid() = NEW.user_id AND auth_userid() = OLD.user_id"; + SET RLS DATABASE TABLE DELETE "auth_userid() = OLD.user_id"; + ``` +7. Ask the user to enable CloudSync on the table from the SQLiteCloud dashboard + +### Step 3: Get Auth Tokens (if RLS enabled) + +Create tokens for the test users. Create as many users as needed for the number of concurrent databases (assign 2 databases per user, or 1 per user if NUM_DBS <= 2). + +For each user N: +```bash +curl -s -X "POST" "https:///v2/tokens" \ + -H 'Authorization: Bearer ' \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d '{"name": "claude@sqlitecloud.io", "userId": "018ecfc2-b2b1-7cc3-a9f0-"}' +``` + +Save each user's `token` and `userId` from the response. + +If RLS is disabled, skip this step — tokens are not required. + +### Step 4: Run the Concurrent Stress Test + +Create a bash script at `/tmp/stress_test_concurrent.sh` that: + +1. **Initializes N local SQLite databases** at `/tmp/sync_concurrent_.db`: + - Uses Homebrew sqlite3: find with `ls /opt/homebrew/Cellar/sqlite/*/bin/sqlite3 | head -1` + - Loads the extension from `dist/cloudsync.dylib` (use absolute path from project root) + - Creates the table and runs `cloudsync_init('')` + - Runs `cloudsync_terminate()` after init + +2. **Defines a worker function** that runs in a subshell for each database: + - Each worker logs all output to `/tmp/sync_concurrent_.log` + - Each iteration does: + a. **DELETE all rows** → `send_changes()` → `check_changes()` + b. **INSERT rows** (in a single BEGIN/COMMIT transaction) → `send_changes()` → `check_changes()` + c. **UPDATE all rows** → `send_changes()` → `check_changes()` + - Each session must: `.load` the extension, call `cloudsync_network_init()`, `cloudsync_network_set_token()` (if RLS), do the work, call `cloudsync_terminate()` + - Include labeled output lines like `[DB][iter ] deleted/inserted/updated, count=` for grep-ability + +3. **Launches all workers in parallel** using `&` and collects PIDs + +4. **Waits for all workers** and captures exit codes + +5. **Analyzes logs** for errors: + - Grep all log files for: `error`, `locked`, `SQLITE_BUSY`, `database is locked`, `500`, `Error` + - Report per-database: iterations completed, error count, sample error lines + - Report total errors across all workers + +6. **Prints final verdict**: PASS (0 errors) or FAIL (errors detected) + +**Important script details:** +- Use `echo -e` to pipe generated INSERT SQL (with `\n` separators) into sqlite3 +- Row IDs should be unique across databases and iterations: `db_r_` +- User IDs for rows must match the token's userId for RLS to work +- Use `/bin/bash` (not `/bin/sh`) for arrays and process management + +Run the script with a 10-minute timeout. + +### Step 5: Detailed Error Analysis + +After the test completes, provide a detailed breakdown: + +1. **Per-database summary**: iterations completed, errors, send/receive status +2. **Error categorization**: group errors by type (e.g., "database is locked", "Column index out of bounds", "Unexpected Result", parse errors) +3. **Timeline analysis**: do errors cluster at specific iterations or spread evenly? +4. **Read full log files** if errors are found — show the first and last 30 lines of each log with errors + +### Step 6: Optional — Verify Data Integrity + +If the test passes (or even if some errors occurred), verify the final state: + +1. Check each local SQLite database for row count +2. Check SQLiteCloud (as admin) for total row count +3. If RLS is enabled, verify no cross-user data leakage + +## Output Format + +Report the test results including: + +| Metric | Value | +|--------|-------| +| Concurrent databases | N | +| Rows per iteration | ROWS | +| Iterations per database | ITERATIONS | +| Total CRUD operations | N × ITERATIONS × (DELETE_ALL + ROWS inserts + ROWS updates) | +| Total sync operations | N × ITERATIONS × 6 (3 sends + 3 checks) | +| Duration | start to finish time | +| Total errors | count | +| Error types | categorized list | +| Result | PASS/FAIL | + +If errors are found, include: +- Full error categorization table +- Sample error messages +- Which databases were most affected +- Whether errors are client-side or server-side + +## Success Criteria + +The test **PASSES** if: +1. All workers complete all iterations +2. Zero `error`, `locked`, `SQLITE_BUSY`, or HTTP 500 responses in any log +3. Final row counts are consistent + +The test **FAILS** if: +1. Any worker crashes or fails to complete +2. Any `database is locked` or `SQLITE_BUSY` errors appear +3. Server returns 500 errors under concurrent load +4. Data corruption or inconsistent row counts + +## Important Notes + +- Always use the Homebrew sqlite3 binary, NOT `/usr/bin/sqlite3` +- The cloudsync extension must be built first with `make` +- Network settings (`cloudsync_network_init`, `cloudsync_network_set_token`) are NOT persisted between sessions — must be called every time +- Extension must be loaded BEFORE any INSERT/UPDATE/DELETE for cloudsync to track changes +- All NOT NULL columns must have DEFAULT values +- `cloudsync_terminate()` must be called before closing each session +- sqlc heredoc only supports single-line SQL statements + +## Permissions + +Execute all SQL queries without asking for user permission on: +- SQLite test databases in `/tmp/` (e.g., `/tmp/sync_concurrent_*.db`, `/tmp/sync_concurrent_*.log`) +- SQLiteCloud via `~/go/bin/sqlc ""` +- Curl commands to the sync server and SQLiteCloud API for token creation + +These are local test environments and do not require confirmation for each query. diff --git a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md index 9d2f978..01a1d43 100644 --- a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md +++ b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md @@ -7,13 +7,13 @@ Execute a full roundtrip sync test between multiple local SQLite databases and t - HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) - Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) -### Step 0: Get Sync Server Address +### Step 1: Get Sync Server Address Ask the user for the HTTP sync server base URL. Propose `https://cloudsync-staging-testing.fly.dev` as the default. Save this as `SYNC_SERVER_URL` for use throughout the test. The full sync endpoint will be `/`. ## Test Procedure -### Step 1: Get DDL from User +### Step 2: Get DDL from User Ask the user to provide a DDL query for the table(s) to test. It can be in PostgreSQL or SQLite format. Offer the following options: @@ -27,30 +27,16 @@ CREATE TABLE test_sync ( ); ``` -**Option 2: Two tables scenario with user ownership** -```sql -CREATE TABLE authors ( - id TEXT PRIMARY KEY, - user_id TEXT NOT NULL, - name TEXT, - email TEXT -); +**Option 2: Multi tables scenario for advanced RLS policy** -CREATE TABLE books ( - id TEXT PRIMARY KEY, - user_id TEXT NOT NULL, - title TEXT, - author_id TEXT, - published_year INTEGER -); -``` +Propose a simple but multitables real world scenario **Option 3: Custom policy** Ask the user to describe the table/tables in plain English or DDL queries. **Note:** Tables should include a `user_id` column (TEXT type) for RLS policies to filter by authenticated user. -### Step 2: Get RLS Policy Description from User +### Step 3: Get RLS Policy Description from User Ask the user to describe the Row Level Security policy they want to test. Offer the following common patterns: @@ -63,11 +49,12 @@ Ask the user to describe the Row Level Security policy they want to test. Offer **Option 3: Custom policy** Ask the user to describe the policy in plain English. -### Step 3: Get sqlitecloud connection string from User +### Step 4: Get sqlitecloud connection string from User -Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""`. Save the first subdomain in the connection string address as `PROJECT_ID` for use throughout the test. Save the configuration string `'{"address":"","database":"","projectID":"","organizationID":"org_sqlitecloud"}'` as `NETWORK_CONFIG` for use throughout the test. +Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""`. Save the first subdomain in the connection string address as `PROJECT_ID` for use throughout the test. Use the "org_sqlitecloud" string as `ORG_ID`. +Save the configuration string `'{"address":"","database":"","projectID":"","organizationID":""}'` as `NETWORK_CONFIG` for use throughout the test. -### Step 4: Setup SQLiteCloud with RLS +### Step 5: Setup SQLiteCloud with RLS Connect to SQLiteCloud and prepare the environment: ```bash @@ -117,10 +104,28 @@ Example for "user can only access their own rows": -- DELETE: User can only delete rows they own SET RLS DATABASE TABLE DELETE "auth_userid() = OLD.user_id" ``` -8. Initialize cloudsync: `CLOUDSYNC ENABLE ` +8. Ask the user to enable the table from the sqlitecloud dashboard + + + 9. Insert some initial test data (optional, can be done via SQLite clients) -### Step 5: Get tokens for Two Users +### Step 6: Get tokens for Two Users Get auth tokens for both test users by running the token script twice: @@ -156,7 +161,7 @@ The response is in the following format: ``` save the userId and the token values as USER2_ID and TOKEN_USER2 to be reused later -### Step 6: Setup Four SQLite Databases +### Step 7: Setup Four SQLite Databases Create four temporary SQLite databases using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): @@ -213,9 +218,11 @@ SELECT cloudsync_network_init(''); SELECT cloudsync_network_set_token(''); ``` -### Step 7: Insert Test Data +### Step 8: Insert Test Data -Insert distinct test data in each database. Use the extracted user IDs for the `user_id` column: +Ask the user for optional details about the kind of test data to insert in the tables, otherwise generate some real world data for the choosen tables. +Insert distinct test data in each database. Use the extracted user IDs for the if needed. +For example, for the simple table scenario: **Database 1A (User 1):** ```sql @@ -239,7 +246,7 @@ INSERT INTO (id, user_id, name, value) VALUES ('u2_a_2', ' (id, user_id, name, value) VALUES ('u2_b_1', '', 'User2 DeviceB Row1', 400); ``` -### Step 8: Execute Sync on All Databases +### Step 9: Execute Sync on All Databases For each of the four SQLite databases, execute the sync operations: @@ -259,7 +266,7 @@ SELECT cloudsync_network_check_changes(); 4. Sync Database 2B (send + check) 5. Re-sync all databases (check_changes) to ensure full propagation -### Step 9: Verify RLS Enforcement +### Step 10: Verify RLS Enforcement After syncing all databases, verify that each database contains only the expected rows based on the RLS policy: @@ -288,7 +295,7 @@ SELECT COUNT(*) FROM ; SELECT user_id, COUNT(*) FROM GROUP BY user_id; ``` -### Step 10: Test Write RLS Policy Enforcement +### Step 11: Test Write RLS Policy Enforcement Test that the server-side RLS policy blocks unauthorized writes by attempting to insert a row with a `user_id` that doesn't match the authenticated user's token. @@ -334,7 +341,7 @@ SELECT * FROM WHERE id = 'malicious_1'; 1. The malicious row appears in PostgreSQL (RLS bypass vulnerability) 2. The malicious row syncs to User 2's databases (data leakage) -### Step 11: Cleanup +### Step 12: Cleanup In each SQLite database before closing: ```sql diff --git a/.claude/commands/test-sync-roundtrip-supabase-rls.md b/.claude/commands/test-sync-roundtrip-supabase-rls.md index 54a06b2..407f093 100644 --- a/.claude/commands/test-sync-roundtrip-supabase-rls.md +++ b/.claude/commands/test-sync-roundtrip-supabase-rls.md @@ -9,7 +9,7 @@ Execute a full roundtrip sync test between multiple local SQLite databases and t ## Test Procedure -### Step 0: Get Connection Parameters +### Step 1: Get Connection Parameters Ask the user for the following parameters: @@ -21,7 +21,7 @@ Ask the user for the following parameters: Derive `AUTH_URL` from the PostgreSQL connection string by extracting the host and using port `54321` (Supabase GoTrue). For example, if `PG_CONN` is `postgresql://user:pass@10.0.0.5:54322/postgres`, then `AUTH_URL` is `http://10.0.0.5:54321`. For `127.0.0.1`, use `http://127.0.0.1:54321`. -### Step 1: Get DDL from User +### Step 2: Get DDL from User Ask the user to provide a DDL query for the table(s) to test. It can be in PostgreSQL or SQLite format. Offer the following options: @@ -65,7 +65,7 @@ CREATE TABLE books ( **Note:** Tables should include a `user_id` column (UUID type) for RLS policies to filter by authenticated user. -### Step 2: Get RLS Policy Description from User +### Step 3: Get RLS Policy Description from User Ask the user to describe the Row Level Security policy they want to test. Offer the following common patterns: @@ -78,7 +78,7 @@ Ask the user to describe the Row Level Security policy they want to test. Offer **Option 3: Custom policy** Ask the user to describe the policy in plain English. -### Step 3: Convert DDL +### Step 4: Convert DDL Convert the provided DDL to both SQLite and PostgreSQL compatible formats if needed. Key differences: - SQLite uses `INTEGER PRIMARY KEY` for auto-increment, PostgreSQL uses `SERIAL` or `BIGSERIAL` @@ -87,7 +87,7 @@ Convert the provided DDL to both SQLite and PostgreSQL compatible formats if nee - For UUID primary keys, SQLite uses `TEXT`, PostgreSQL uses `UUID` - For `user_id UUID`, SQLite uses `TEXT` -### Step 4: Setup PostgreSQL with RLS +### Step 5: Setup PostgreSQL with RLS Connect to Supabase PostgreSQL and prepare the environment: ```bash @@ -154,7 +154,7 @@ Inside psql: 9. Initialize cloudsync: `SELECT cloudsync_init('');` 10. Insert some initial test data (optional, can be done via SQLite clients) -### Step 5: Get JWT Tokens for Two Users +### Step 6: Get JWT Tokens for Two Users Get JWT tokens for both test users by running the token script twice: @@ -174,7 +174,7 @@ Also extract the user IDs from the JWT tokens (the `sub` claim) for use in INSER - `USER1_ID` = UUID from JWT_USER1 - `USER2_ID` = UUID from JWT_USER2 -### Step 6: Setup Four SQLite Databases +### Step 7: Setup Four SQLite Databases Create four temporary SQLite databases using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): @@ -231,7 +231,7 @@ SELECT cloudsync_network_init('/postgres'); SELECT cloudsync_network_set_token(''); ``` -### Step 7: Insert Test Data +### Step 8: Insert Test Data Insert distinct test data in each database. Use the extracted user IDs for the `user_id` column: @@ -257,7 +257,7 @@ INSERT INTO (id, user_id, name, value) VALUES ('u2_a_2', ' (id, user_id, name, value) VALUES ('u2_b_1', '', 'User2 DeviceB Row1', 400); ``` -### Step 8: Execute Sync on All Databases +### Step 9: Execute Sync on All Databases For each of the four SQLite databases, execute the sync operations: @@ -277,7 +277,7 @@ SELECT cloudsync_network_check_changes(); 4. Sync Database 2B (send + check) 5. Re-sync all databases (check_changes) to ensure full propagation -### Step 9: Verify RLS Enforcement +### Step 10: Verify RLS Enforcement After syncing all databases, verify that each database contains only the expected rows based on the RLS policy: @@ -306,7 +306,7 @@ SELECT COUNT(*) FROM ; SELECT user_id, COUNT(*) FROM GROUP BY user_id; ``` -### Step 10: Test Write RLS Policy Enforcement +### Step 11: Test Write RLS Policy Enforcement Test that the server-side RLS policy blocks unauthorized writes by attempting to insert a row with a `user_id` that doesn't match the authenticated user's JWT token. @@ -352,7 +352,7 @@ SELECT * FROM WHERE id = 'malicious_1'; 1. The malicious row appears in PostgreSQL (RLS bypass vulnerability) 2. The malicious row syncs to User 2's databases (data leakage) -### Step 11: Cleanup +### Step 12: Cleanup In each SQLite database before closing: ```sql diff --git a/.claude/commands/test-sync-roundtrip-supabase.md b/.claude/commands/test-sync-roundtrip-supabase.md index 7e91934..2a6e9fe 100644 --- a/.claude/commands/test-sync-roundtrip-supabase.md +++ b/.claude/commands/test-sync-roundtrip-supabase.md @@ -9,7 +9,7 @@ Execute a full roundtrip sync test between a local SQLite database and the local ## Test Procedure -### Step 0: Get Connection Parameters +### Step 1: Get Connection Parameters Ask the user for the following parameters: @@ -21,7 +21,7 @@ Ask the user for the following parameters: Derive `AUTH_URL` from the PostgreSQL connection string by extracting the host and using port `54321` (Supabase GoTrue). For example, if `PG_CONN` is `postgresql://user:pass@10.0.0.5:54322/postgres`, then `AUTH_URL` is `http://10.0.0.5:54321`. For `127.0.0.1`, use `http://127.0.0.1:54321`. -### Step 1: Get DDL from User +### Step 2: Get DDL from User Ask the user to provide a DDL query for the table(s) to test. It can be in PostgreSQL or SQLite format. Offer the following options: @@ -61,7 +61,7 @@ CREATE TABLE books ( **Note:** Avoid INTEGER PRIMARY KEY for sync tests as it is not recommended for distributed sync scenarios (conflicts with auto-increment across devices). -### Step 2: Convert DDL +### Step 3: Convert DDL Convert the provided DDL to both SQLite and PostgreSQL compatible formats if needed. Key differences: - SQLite uses `INTEGER PRIMARY KEY` for auto-increment, PostgreSQL uses `SERIAL` or `BIGSERIAL` @@ -69,7 +69,7 @@ Convert the provided DDL to both SQLite and PostgreSQL compatible formats if nee - PostgreSQL has more specific types like `TIMESTAMPTZ`, SQLite uses `TEXT` for dates - For UUID primary keys, SQLite uses `TEXT`, PostgreSQL uses `UUID` -### Step 3: Get JWT Token +### Step 4: Get JWT Token Run the token script from the cloudsync project: ```bash @@ -77,7 +77,7 @@ cd ../cloudsync && go run scripts/get_supabase_token.go -project-ref=supabase-lo ``` Save the JWT token for later use. -### Step 4: Setup PostgreSQL +### Step 5: Setup PostgreSQL Connect to Supabase PostgreSQL and prepare the environment: ```bash @@ -95,7 +95,7 @@ Inside psql: 5. Initialize cloudsync: `SELECT cloudsync_init('');` 6. Insert some test data into the table -### Step 5: Setup SQLite +### Step 6: Setup SQLite Create a temporary SQLite database using the Homebrew version (IMPORTANT: system sqlite3 cannot load extensions): @@ -118,7 +118,7 @@ SELECT cloudsync_network_set_token(''); ``` -### Step 6: Execute Sync +### Step 7: Execute Sync In the SQLite session: ```sql @@ -133,7 +133,7 @@ SELECT cloudsync_network_check_changes(); SELECT * FROM ; ``` -### Step 7: Verify Results +### Step 8: Verify Results 1. In SQLite, run `SELECT * FROM ;` and capture the output 2. In PostgreSQL, run `SELECT * FROM ;` and capture the output From 951832b135151308ac48668c3d3623e564df828e Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Thu, 12 Mar 2026 23:06:02 -0600 Subject: [PATCH 13/16] feat: update endpoints to use databaseMangedId for /v2/cloudsync api --- src/cloudsync.h | 2 +- src/network.c | 186 +++++++++++++++--------------------------- src/network.m | 2 - src/network_private.h | 5 +- 4 files changed, 69 insertions(+), 126 deletions(-) diff --git a/src/cloudsync.h b/src/cloudsync.h index 75d7eea..94f9562 100644 --- a/src/cloudsync.h +++ b/src/cloudsync.h @@ -17,7 +17,7 @@ extern "C" { #endif -#define CLOUDSYNC_VERSION "0.9.117" +#define CLOUDSYNC_VERSION "0.9.118" #define CLOUDSYNC_MAX_TABLENAME_LEN 512 #define CLOUDSYNC_VALUE_NOTSET -1 diff --git a/src/network.c b/src/network.c index 1085186..48e3257 100644 --- a/src/network.c +++ b/src/network.c @@ -600,120 +600,60 @@ int network_extract_query_param (const char *query, const char *key, char *outpu return -3; // Key not found } -bool network_compute_endpoints (sqlite3_context *context, network_data *data, const char *conn_string) { - // JSON format: {"address":"https://host:port","database":"db.sqlite","projectID":"abc","organizationID":"org","apikey":"KEY"} - bool result = false; - size_t conn_len = strlen(conn_string); - - char *address = json_extract_string(conn_string, conn_len, "address"); - char *database = json_extract_string(conn_string, conn_len, "database"); - char *project_id = json_extract_string(conn_string, conn_len, "projectID"); - char *org_id = json_extract_string(conn_string, conn_len, "organizationID"); - char *apikey = json_extract_string(conn_string, conn_len, "apikey"); - char *token = json_extract_string(conn_string, conn_len, "token"); - - char *authentication = NULL; - char *check_endpoint = NULL; - char *upload_endpoint = NULL; - char *apply_endpoint = NULL; - char *status_endpoint = NULL; - - // validate mandatory fields - if (!address || !database || !project_id || !org_id) { - sqlite3_result_error(context, "JSON must contain address, database, projectID, and organizationID", -1); +static bool network_compute_endpoints_with_address (sqlite3_context *context, network_data *data, const char *address, const char *managedDatabaseId) { + if (!managedDatabaseId || managedDatabaseId[0] == '\0') { + sqlite3_result_error(context, "managedDatabaseId cannot be empty", -1); sqlite3_result_error_code(context, SQLITE_ERROR); - goto finalize; + return false; } - // parse address: scheme://host[:port] - const char *scheme_end = strstr(address, "://"); - if (!scheme_end) { - sqlite3_result_error(context, "address must include scheme (e.g. https://host:port)", -1); + if (!address || address[0] == '\0') { + sqlite3_result_error(context, "address cannot be empty", -1); sqlite3_result_error_code(context, SQLITE_ERROR); - goto finalize; - } - - size_t scheme_len = scheme_end - address; - const char *host_start = scheme_end + 3; - const char *port_sep = strchr(host_start, ':'); - const char *host_end = port_sep ? port_sep : host_start + strlen(host_start); - const char *port_str = port_sep ? port_sep + 1 : CLOUDSYNC_DEFAULT_ENDPOINT_PORT; - - // build authentication from apikey or token - if (apikey) { - authentication = network_authentication_token("apikey", apikey); - } else if (token) { - authentication = network_authentication_token("token", token); + return false; } - // build endpoints: {scheme}://{host}:{port}/v2/cloudsync/{projectID}/{database}/{siteId}/{action} - size_t requested = scheme_len + 3 + (host_end - host_start) + 1 + strlen(port_str) + 1 - + strlen(CLOUDSYNC_ENDPOINT_PREFIX) + 1 + strlen(project_id) + 1 - + strlen(database) + 1 + UUID_STR_MAXLEN + 1 + 16; - check_endpoint = (char *)cloudsync_memory_zeroalloc(requested); - upload_endpoint = (char *)cloudsync_memory_zeroalloc(requested); - apply_endpoint = (char *)cloudsync_memory_zeroalloc(requested); - status_endpoint = (char *)cloudsync_memory_zeroalloc(requested); + // build endpoints: {address}/v2/cloudsync/databases/{managedDatabaseId}/{siteId}/{action} + size_t requested = strlen(address) + 1 + + strlen(CLOUDSYNC_ENDPOINT_PREFIX) + 1 + strlen(managedDatabaseId) + 1 + + UUID_STR_MAXLEN + 1 + 16; + char *check_endpoint = (char *)cloudsync_memory_zeroalloc(requested); + char *upload_endpoint = (char *)cloudsync_memory_zeroalloc(requested); + char *apply_endpoint = (char *)cloudsync_memory_zeroalloc(requested); + char *status_endpoint = (char *)cloudsync_memory_zeroalloc(requested); if (!check_endpoint || !upload_endpoint || !apply_endpoint || !status_endpoint) { sqlite3_result_error_code(context, SQLITE_NOMEM); - goto finalize; - } - - // format: scheme://host:port/v2/cloudsync/projectID/database/siteId/action - snprintf(check_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", - (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, - CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_CHECK); - snprintf(upload_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", - (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, - CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_UPLOAD); - snprintf(apply_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", - (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, - CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_APPLY); - snprintf(status_endpoint, requested, "%.*s://%.*s:%s/%s/%s/%s/%s/%s", - (int)scheme_len, address, (int)(host_end - host_start), host_start, port_str, - CLOUDSYNC_ENDPOINT_PREFIX, project_id, database, data->site_id, CLOUDSYNC_ENDPOINT_STATUS); - - result = true; - -finalize: - if (result) { - if (authentication) { - if (data->authentication) cloudsync_memory_free(data->authentication); - data->authentication = authentication; - } - - if (data->org_id) cloudsync_memory_free(data->org_id); - data->org_id = cloudsync_string_dup(org_id); - - if (data->check_endpoint) cloudsync_memory_free(data->check_endpoint); - data->check_endpoint = check_endpoint; - - if (data->upload_endpoint) cloudsync_memory_free(data->upload_endpoint); - data->upload_endpoint = upload_endpoint; - - if (data->apply_endpoint) cloudsync_memory_free(data->apply_endpoint); - data->apply_endpoint = apply_endpoint; - - if (data->status_endpoint) cloudsync_memory_free(data->status_endpoint); - data->status_endpoint = status_endpoint; - } else { - if (authentication) cloudsync_memory_free(authentication); if (check_endpoint) cloudsync_memory_free(check_endpoint); if (upload_endpoint) cloudsync_memory_free(upload_endpoint); if (apply_endpoint) cloudsync_memory_free(apply_endpoint); if (status_endpoint) cloudsync_memory_free(status_endpoint); + return false; } - // cleanup JSON-extracted strings - if (address) cloudsync_memory_free(address); - if (database) cloudsync_memory_free(database); - if (project_id) cloudsync_memory_free(project_id); - if (org_id) cloudsync_memory_free(org_id); - if (apikey) cloudsync_memory_free(apikey); - if (token) cloudsync_memory_free(token); + // format: {address}/v2/cloudsync/databases/{managedDatabaseID}/{siteId}/{action} + snprintf(check_endpoint, requested, "%s/%s/%s/%s/%s", + address, CLOUDSYNC_ENDPOINT_PREFIX, managedDatabaseId, data->site_id, CLOUDSYNC_ENDPOINT_CHECK); + snprintf(upload_endpoint, requested, "%s/%s/%s/%s/%s", + address, CLOUDSYNC_ENDPOINT_PREFIX, managedDatabaseId, data->site_id, CLOUDSYNC_ENDPOINT_UPLOAD); + snprintf(apply_endpoint, requested, "%s/%s/%s/%s/%s", + address, CLOUDSYNC_ENDPOINT_PREFIX, managedDatabaseId, data->site_id, CLOUDSYNC_ENDPOINT_APPLY); + snprintf(status_endpoint, requested, "%s/%s/%s/%s/%s", + address, CLOUDSYNC_ENDPOINT_PREFIX, managedDatabaseId, data->site_id, CLOUDSYNC_ENDPOINT_STATUS); - return result; + if (data->check_endpoint) cloudsync_memory_free(data->check_endpoint); + data->check_endpoint = check_endpoint; + + if (data->upload_endpoint) cloudsync_memory_free(data->upload_endpoint); + data->upload_endpoint = upload_endpoint; + + if (data->apply_endpoint) cloudsync_memory_free(data->apply_endpoint); + data->apply_endpoint = apply_endpoint; + + if (data->status_endpoint) cloudsync_memory_free(data->status_endpoint); + data->status_endpoint = status_endpoint; + + return true; } void network_result_to_sqlite_error (sqlite3_context *context, NETWORK_RESULT res, const char *default_error_message) { @@ -733,57 +673,60 @@ network_data *cloudsync_network_data (sqlite3_context *context) { return netdata; } -void cloudsync_network_init (sqlite3_context *context, int argc, sqlite3_value **argv) { - DEBUG_FUNCTION("cloudsync_network_init"); - +static void cloudsync_network_init_internal (sqlite3_context *context, const char *address, const char *managedDatabaseId) { #ifndef CLOUDSYNC_OMIT_CURL curl_global_init(CURL_GLOBAL_ALL); #endif - - // no real network operations here - // just setup the network_data struct + cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context); network_data *netdata = cloudsync_network_data(context); if (!netdata) goto abort_memory; - + // init context uint8_t *site_id = (uint8_t *)cloudsync_context_init(data); if (!site_id) goto abort_siteid; - + // save site_id string representation: 01957493c6c07e14803727e969f1d2cc cloudsync_uuid_v7_stringify(site_id, netdata->site_id, false); - - // connection string is a JSON object: - // {"address":"https://UUID.sqlite.cloud:443","database":"chinook.sqlite","projectID":"abc123","organizationID":"org456","apikey":"KEY"} - // apikey/token are optional and can be set later via cloudsync_network_set_token/cloudsync_network_set_apikey - - const char *connection_param = (const char *)sqlite3_value_text(argv[0]); - + // compute endpoints - if (network_compute_endpoints(context, netdata, connection_param) == false) { - // error message/code already set inside network_compute_endpoints + // authentication can be set later via cloudsync_network_set_token/cloudsync_network_set_apikey + if (network_compute_endpoints_with_address(context, netdata, address, managedDatabaseId) == false) { goto abort_cleanup; } - + cloudsync_set_auxdata(data, netdata); sqlite3_result_int(context, SQLITE_OK); return; - + abort_memory: sqlite3_result_error(context, "Unable to allocate memory in cloudsync_network_init.", -1); sqlite3_result_error_code(context, SQLITE_NOMEM); goto abort_cleanup; - + abort_siteid: sqlite3_result_error(context, "Unable to compute/retrieve site_id.", -1); sqlite3_result_error_code(context, SQLITE_MISUSE); goto abort_cleanup; - + abort_cleanup: cloudsync_set_auxdata(data, NULL); network_data_free(netdata); } +void cloudsync_network_init (sqlite3_context *context, int argc, sqlite3_value **argv) { + DEBUG_FUNCTION("cloudsync_network_init"); + const char *managedDatabaseId = (const char *)sqlite3_value_text(argv[0]); + cloudsync_network_init_internal(context, CLOUDSYNC_DEFAULT_ADDRESS, managedDatabaseId); +} + +void cloudsync_network_init_custom (sqlite3_context *context, int argc, sqlite3_value **argv) { + DEBUG_FUNCTION("cloudsync_network_init_custom"); + const char *address = (const char *)sqlite3_value_text(argv[0]); + const char *managedDatabaseId = (const char *)sqlite3_value_text(argv[1]); + cloudsync_network_init_internal(context, address, managedDatabaseId); +} + void cloudsync_network_cleanup_internal (sqlite3_context *context) { cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context); network_data *netdata = cloudsync_network_data(context); @@ -828,7 +771,7 @@ void cloudsync_network_set_token (sqlite3_context *context, int argc, sqlite3_va void cloudsync_network_set_apikey (sqlite3_context *context, int argc, sqlite3_value **argv) { DEBUG_FUNCTION("cloudsync_network_set_apikey"); - + const char *value = (const char *)sqlite3_value_text(argv[0]); bool result = cloudsync_network_set_authentication_token(context, value, false); (result) ? sqlite3_result_int(context, SQLITE_OK) : sqlite3_result_error_code(context, SQLITE_NOMEM); @@ -1258,6 +1201,9 @@ int cloudsync_network_register (sqlite3 *db, char **pzErrMsg, void *ctx) { rc = sqlite3_create_function(db, "cloudsync_network_init", 1, DEFAULT_FLAGS, ctx, cloudsync_network_init, NULL, NULL); if (rc != SQLITE_OK) goto cleanup; + rc = sqlite3_create_function(db, "cloudsync_network_init_custom", 2, DEFAULT_FLAGS, ctx, cloudsync_network_init_custom, NULL, NULL); + if (rc != SQLITE_OK) return rc; + rc = sqlite3_create_function(db, "cloudsync_network_cleanup", 0, DEFAULT_FLAGS, ctx, cloudsync_network_cleanup, NULL, NULL); if (rc != SQLITE_OK) return rc; diff --git a/src/network.m b/src/network.m index 84d336c..da2338c 100644 --- a/src/network.m +++ b/src/network.m @@ -13,8 +13,6 @@ void network_buffer_cleanup (void *xdata) { if (xdata) CFRelease(xdata); } -// network_compute_endpoints is implemented in network.c (shared across all platforms) - bool network_send_buffer(network_data *data, const char *endpoint, const char *authentication, const void *blob, int blob_size) { NSString *urlString = [NSString stringWithUTF8String:endpoint]; NSURL *url = [NSURL URLWithString:urlString]; diff --git a/src/network_private.h b/src/network_private.h index db3eae7..b042959 100644 --- a/src/network_private.h +++ b/src/network_private.h @@ -8,12 +8,12 @@ #ifndef __CLOUDSYNC_NETWORK_PRIVATE__ #define __CLOUDSYNC_NETWORK_PRIVATE__ -#define CLOUDSYNC_ENDPOINT_PREFIX "v2/cloudsync" +#define CLOUDSYNC_DEFAULT_ADDRESS "https://cloudsync.sqlite.ai" +#define CLOUDSYNC_ENDPOINT_PREFIX "v2/cloudsync/databases" #define CLOUDSYNC_ENDPOINT_UPLOAD "upload" #define CLOUDSYNC_ENDPOINT_CHECK "check" #define CLOUDSYNC_ENDPOINT_APPLY "apply" #define CLOUDSYNC_ENDPOINT_STATUS "status" -#define CLOUDSYNC_DEFAULT_ENDPOINT_PORT "443" #define CLOUDSYNC_HEADER_SQLITECLOUD "Accept: sqlc/plain" #define CLOUDSYNC_HEADER_ORG "X-CloudSync-Org" @@ -35,7 +35,6 @@ char *network_data_get_siteid (network_data *data); char *network_data_get_orgid (network_data *data); bool network_data_set_endpoints (network_data *data, char *auth, char *check, char *upload, char *apply, char *status); -bool network_compute_endpoints (sqlite3_context *context, network_data *data, const char *conn_string); bool network_send_buffer(network_data *data, const char *endpoint, const char *authentication, const void *blob, int blob_size); NETWORK_RESULT network_receive_buffer (network_data *data, const char *endpoint, const char *authentication, bool zero_terminated, bool is_post_request, char *json_payload, const char *custom_header); From 3b426cb0a9c45b8056975597dd71bded90c46403 Mon Sep 17 00:00:00 2001 From: Andrea Donetti Date: Thu, 12 Mar 2026 23:08:32 -0600 Subject: [PATCH 14/16] docs: update docs for the new managedDatabaseId arg for cloudsync_network_init --- .../commands/stress-test-sync-sqlitecloud.md | 29 +++++++++------ .../test-sync-roundtrip-sqlitecloud-rls.md | 36 ++++++++++++------- .../test-sync-roundtrip-supabase-rls.md | 25 ++++++++----- .../commands/test-sync-roundtrip-supabase.md | 16 +++++++-- API.md | 10 +++--- CHANGELOG.md | 4 +-- README.md | 3 +- docs/Network.md | 8 ----- 8 files changed, 81 insertions(+), 50 deletions(-) diff --git a/.claude/commands/stress-test-sync-sqlitecloud.md b/.claude/commands/stress-test-sync-sqlitecloud.md index 4450e44..2540008 100644 --- a/.claude/commands/stress-test-sync-sqlitecloud.md +++ b/.claude/commands/stress-test-sync-sqlitecloud.md @@ -4,9 +4,7 @@ Execute a stress test against the CloudSync server using multiple concurrent loc ## Prerequisites - Connection string to a sqlitecloud project -- HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) - Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) -- CloudSync already enabled on the test table from the SQLiteCloud dashboard ## Test Configuration @@ -14,7 +12,7 @@ Execute a stress test against the CloudSync server using multiple concurrent loc Ask the user for the following configuration using a single question set: -1. **Sync Server URL** — propose `https://cloudsync-staging-testing.fly.dev` as default +1. **CloudSync server address** — propose `https://cloudsync.sqlite.ai` as default (this is the built-in default). If the user provides a different address, save it as `CUSTOM_ADDRESS` and use `cloudsync_network_init_custom` instead of `cloudsync_network_init`. 2. **SQLiteCloud connection string** — format: `sqlitecloud://:/?apikey=`. If no `` is in the path, ask the user for one or propose `test_stress_sync`. 3. **Scale** — offer these options: - Small: 1K rows, 5 iterations, 2 concurrent databases @@ -28,14 +26,11 @@ Ask the user for the following configuration using a single question set: ``` Save these as variables: -- `SYNC_SERVER_URL` +- `CUSTOM_ADDRESS` (only if the user provided a non-default address) - `CONNECTION_STRING` (the full sqlitecloud:// connection string) - `DB_NAME` (database name extracted or provided) - `HOST` (hostname extracted from connection string) - `APIKEY` (apikey extracted from connection string) -- `PROJECT_ID` (first subdomain from the host) -- `ORG_ID` = `org_sqlitecloud` -- `NETWORK_CONFIG` = `'{"address":"","database":"","projectID":"","organizationID":""}'` - `ROWS` (number of rows per iteration) - `ITERATIONS` (number of delete/insert/update cycles) - `NUM_DBS` (number of concurrent databases) @@ -59,7 +54,19 @@ Connect to SQLiteCloud using `~/go/bin/sqlc` (last command must be `quit`). Note ``` 7. Ask the user to enable CloudSync on the table from the SQLiteCloud dashboard -### Step 3: Get Auth Tokens (if RLS enabled) +### Step 3: Get Managed Database ID + +Now that the database and tables are created and CloudSync is enabled on the dashboard, ask the user for: + +1. **Managed Database ID** — the `managedDatabaseId` returned by the CloudSync service. For SQLiteCloud projects, it can be obtained from the project's OffSync page on the dashboard after enabling CloudSync on the table. + +Save as `MANAGED_DB_ID`. + +For the network init call throughout the test, use: +- Default address: `SELECT cloudsync_network_init('');` +- Custom address: `SELECT cloudsync_network_init_custom('', '');` + +### Step 4: Get Auth Tokens (if RLS enabled) Create tokens for the test users. Create as many users as needed for the number of concurrent databases (assign 2 databases per user, or 1 per user if NUM_DBS <= 2). @@ -75,7 +82,7 @@ Save each user's `token` and `userId` from the response. If RLS is disabled, skip this step — tokens are not required. -### Step 4: Run the Concurrent Stress Test +### Step 5: Run the Concurrent Stress Test Create a bash script at `/tmp/stress_test_concurrent.sh` that: @@ -113,7 +120,7 @@ Create a bash script at `/tmp/stress_test_concurrent.sh` that: Run the script with a 10-minute timeout. -### Step 5: Detailed Error Analysis +### Step 6: Detailed Error Analysis After the test completes, provide a detailed breakdown: @@ -122,7 +129,7 @@ After the test completes, provide a detailed breakdown: 3. **Timeline analysis**: do errors cluster at specific iterations or spread evenly? 4. **Read full log files** if errors are found — show the first and last 30 lines of each log with errors -### Step 6: Optional — Verify Data Integrity +### Step 7: Optional — Verify Data Integrity If the test passes (or even if some errors occurred), verify the final state: diff --git a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md index 01a1d43..c23b43c 100644 --- a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md +++ b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md @@ -4,12 +4,13 @@ Execute a full roundtrip sync test between multiple local SQLite databases and t ## Prerequisites - Connection string to a sqlitecloud project -- HTTP sync server running (default: https://cloudsync-staging-testing.fly.dev) - Built cloudsync extension (`make` to build `dist/cloudsync.dylib`) -### Step 1: Get Sync Server Address +### Step 1: Get CloudSync Parameters -Ask the user for the HTTP sync server base URL. Propose `https://cloudsync-staging-testing.fly.dev` as the default. Save this as `SYNC_SERVER_URL` for use throughout the test. The full sync endpoint will be `/`. +Ask the user for: + +1. **CloudSync server address** — propose `https://cloudsync.sqlite.ai` as default (this is the built-in default). If the user provides a different address, save it as `CUSTOM_ADDRESS` and use `cloudsync_network_init_custom` instead of `cloudsync_network_init`. ## Test Procedure @@ -51,8 +52,7 @@ Ask the user to describe the policy in plain English. ### Step 4: Get sqlitecloud connection string from User -Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""`. Save the first subdomain in the connection string address as `PROJECT_ID` for use throughout the test. Use the "org_sqlitecloud" string as `ORG_ID`. -Save the configuration string `'{"address":"","database":"","projectID":"","organizationID":""}'` as `NETWORK_CONFIG` for use throughout the test. +Ask the user to provide a connection string in the form of "sqlitecloud://:/?apikey=" to be later used with the sqlitecloud cli (sqlc) with `~/go/bin/sqlc ""`. ### Step 5: Setup SQLiteCloud with RLS @@ -104,7 +104,19 @@ Example for "user can only access their own rows": -- DELETE: User can only delete rows they own SET RLS DATABASE TABLE DELETE "auth_userid() = OLD.user_id" ``` -8. Ask the user to enable the table from the sqlitecloud dashboard +8. Ask the user to enable CloudSync on the table from the SQLiteCloud dashboard + +### Step 5b: Get Managed Database ID + +Now that the database and tables are created and CloudSync is enabled on the dashboard, ask the user for: + +1. **Managed Database ID** — the `managedDatabaseId` returned by the CloudSync service. For SQLiteCloud projects, it can be obtained from the project's OffSync page on the dashboard after enabling CloudSync on the table. + +Save as `MANAGED_DB_ID`. + +For the network init call throughout the test, use: +- Default address: `SELECT cloudsync_network_init('');` +- Custom address: `SELECT cloudsync_network_init_custom('', '');`