Skip to content

Commit da3f540

Browse files
d-csclaude
andcommitted
feat(webapp): add run-ops split-mode opt-in flag + distinct-DB sentinel (W0-FND-07)
Add RUN_OPS_SPLIT_ENABLED (OFF by default) plus the run-ops DB URL env keys, a distinct-DB runtime sentinel that fingerprints each database by its Postgres control-file system identifier, and the isSplitEnabled() Wave-0 gate. The gate returns true only when the flag is on AND the sentinel proves the two run-ops URLs resolve to physically distinct databases, so a pooler/replica can never arm a false split and the migration family stays unreachable in single-DB mode. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent b1987dc commit da3f540

7 files changed

Lines changed: 247 additions & 0 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
area: webapp
3+
type: feature
4+
---
5+
6+
Add the run-ops DB-split opt-in flag (RUN_OPS_SPLIT_ENABLED, OFF by default) and a
7+
distinct-DB runtime sentinel. isSplitEnabled() gates the whole migration/routing
8+
family OFF unless the flag is on AND the two run-ops URLs are proven to be
9+
physically distinct databases. Self-host behavior is unchanged (single-DB).

apps/webapp/app/env.server.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,27 @@ const EnvironmentSchema = z
125125
"DIRECT_URL is invalid, for details please check the additional output above this message."
126126
),
127127
DATABASE_READ_REPLICA_URL: z.string().optional(),
128+
// --- Run-ops DB split (W0-FND-07) — Cloud-only scaling concern; OFF by default. ---
129+
// Explicit positive opt-in. Split behavior is unreachable unless this is true
130+
// AND the distinct-DB sentinel confirms the two URLs are physically distinct DBs.
131+
RUN_OPS_SPLIT_ENABLED: BoolEnv.default(false),
132+
// The NEW run-ops DB (PlanetScale/PG17) writer. Optional so single-DB installs never set it.
133+
TASK_RUN_DATABASE_URL: z
134+
.string()
135+
.refine(isValidDatabaseUrl, "TASK_RUN_DATABASE_URL is invalid")
136+
.optional(),
137+
// The NEW run-ops DB unpooled/direct endpoint (Prisma migrate/introspection;
138+
// PlanetScale poolers break advisory locks). Consumed by the migration units.
139+
TASK_RUN_DATABASE_DIRECT_URL: z
140+
.string()
141+
.refine(isValidDatabaseUrl, "TASK_RUN_DATABASE_DIRECT_URL is invalid")
142+
.optional(),
143+
// The LEGACY run-ops DB (RDS/PG14, drains during Wave 1). When unset, legacy
144+
// run-ops reuses the existing DATABASE_URL (legacy run-ops == control-plane RDS in Wave 1).
145+
TASK_RUN_LEGACY_DATABASE_URL: z
146+
.string()
147+
.refine(isValidDatabaseUrl, "TASK_RUN_LEGACY_DATABASE_URL is invalid")
148+
.optional(),
128149
SESSION_SECRET: z.string(),
129150
MAGIC_LINK_SECRET: z.string(),
130151
ENCRYPTION_KEY: z
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { PrismaClient } from "@trigger.dev/database";
2+
3+
export type DatabaseFingerprint = { systemIdentifier: string; databaseName: string };
4+
5+
export type SentinelResult = { distinct: true } | { distinct: false; reason: string };
6+
7+
export async function readDatabaseFingerprint(url: string): Promise<DatabaseFingerprint> {
8+
const client = new PrismaClient({ datasources: { db: { url } } });
9+
try {
10+
const rows = await client.$queryRawUnsafe<
11+
Array<{ system_identifier: string; database_name: string }>
12+
>(
13+
"SELECT system_identifier::text AS system_identifier, current_database() AS database_name FROM pg_control_system()"
14+
);
15+
const row = rows[0];
16+
if (!row) {
17+
throw new Error("distinct-db sentinel: pg_control_system() returned no rows");
18+
}
19+
return { systemIdentifier: row.system_identifier, databaseName: row.database_name };
20+
} finally {
21+
await client.$disconnect();
22+
}
23+
}
24+
25+
export async function probeDistinctDatabases(
26+
legacyUrl: string,
27+
newUrl: string,
28+
opts?: { logger?: { warn: (msg: string, meta?: Record<string, unknown>) => void } }
29+
): Promise<SentinelResult> {
30+
try {
31+
const [legacy, next] = await Promise.all([
32+
readDatabaseFingerprint(legacyUrl),
33+
readDatabaseFingerprint(newUrl),
34+
]);
35+
const sameCluster = legacy.systemIdentifier === next.systemIdentifier;
36+
const sameDb = sameCluster && legacy.databaseName === next.databaseName;
37+
// Same-cluster-different-database policy: two databases inside the SAME cluster
38+
// (same system identifier, different current_database()) are reported distinct: true.
39+
// That is acceptable — they are genuinely separate Postgres databases with separate
40+
// WAL-visible state for our purposes, and the Cloud topology always uses separate
41+
// clusters anyway. A stricter "must be a different cluster" policy would gate on
42+
// sameCluster alone; that is flagged as an open question, not decided here.
43+
if (sameDb) {
44+
const reason =
45+
"run-ops legacy and new URLs resolve to the SAME physical database " +
46+
`(systemIdentifier=${legacy.systemIdentifier}, database=${legacy.databaseName}); ` +
47+
"refusing to enable split — pooler/replica likely.";
48+
opts?.logger?.warn(reason);
49+
return { distinct: false, reason };
50+
}
51+
return { distinct: true };
52+
} catch (error) {
53+
const reason = `distinct-db sentinel probe failed; failing closed (single-DB). ${String(error)}`;
54+
opts?.logger?.warn(reason, { error });
55+
return { distinct: false, reason };
56+
}
57+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/**
2+
* isSplitEnabled() is the Wave-0 gate. The entire migration/routing/FK-drop family
3+
* MUST be unreachable when this returns false. Default is false (single-DB). Never
4+
* infer split-vs-single from URL string-equality — distinctness is proven by the
5+
* runtime sentinel.
6+
*/
7+
import { env } from "~/env.server";
8+
import { logger } from "~/services/logger.server";
9+
import { probeDistinctDatabases as defaultProbe } from "./distinctDbSentinel.server";
10+
11+
export type SplitModeConfig = {
12+
flagEnabled: boolean;
13+
legacyUrl?: string;
14+
newUrl?: string;
15+
};
16+
17+
export type SplitModeDeps = {
18+
probe?: typeof defaultProbe;
19+
logger?: { warn: (msg: string, meta?: Record<string, unknown>) => void };
20+
};
21+
22+
export async function computeSplitEnabled(
23+
config: SplitModeConfig,
24+
deps: SplitModeDeps = {}
25+
): Promise<boolean> {
26+
// Hard gate #1: explicit positive opt-in. OFF by default -> never probe.
27+
if (!config.flagEnabled) {
28+
return false;
29+
}
30+
// Both URLs are required to even consider a split.
31+
if (!config.legacyUrl || !config.newUrl) {
32+
deps.logger?.warn(
33+
"RUN_OPS_SPLIT_ENABLED is on but TASK_RUN_LEGACY_DATABASE_URL / TASK_RUN_DATABASE_URL are not both set; staying single-DB."
34+
);
35+
return false;
36+
}
37+
// Hard gate #2: runtime sentinel must confirm physically-distinct DBs.
38+
const probe = deps.probe ?? defaultProbe;
39+
const result = await probe(config.legacyUrl, config.newUrl, { logger: deps.logger });
40+
return result.distinct === true;
41+
}
42+
43+
let cached: Promise<boolean> | undefined;
44+
45+
export function isSplitEnabled(): Promise<boolean> {
46+
if (!cached) {
47+
cached = computeSplitEnabled(
48+
{
49+
flagEnabled: env.RUN_OPS_SPLIT_ENABLED,
50+
legacyUrl: env.TASK_RUN_LEGACY_DATABASE_URL,
51+
newUrl: env.TASK_RUN_DATABASE_URL,
52+
},
53+
{ logger }
54+
);
55+
}
56+
return cached;
57+
}
58+
59+
export function __resetSplitModeCacheForTests(): void {
60+
cached = undefined;
61+
}

apps/webapp/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@
249249
"@swc/helpers": "^0.4.11",
250250
"@tailwindcss/forms": "^0.5.3",
251251
"@tailwindcss/typography": "^0.5.9",
252+
"@testcontainers/postgresql": "^11.14.0",
252253
"@total-typescript/ts-reset": "^0.4.2",
253254
"@types/bcryptjs": "^2.4.2",
254255
"@types/compression": "^1.7.2",
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import { describe, expect, it, vi } from "vitest";
2+
// @testcontainers/postgresql resolves because Task 0 declared it in apps/webapp/package.json.
3+
import { PostgreSqlContainer } from "@testcontainers/postgresql";
4+
import { computeSplitEnabled } from "~/v3/runOpsMigration/splitMode.server";
5+
import { probeDistinctDatabases } from "~/v3/runOpsMigration/distinctDbSentinel.server";
6+
7+
describe("computeSplitEnabled (pure)", () => {
8+
it("is OFF by default and never probes when the flag is off", async () => {
9+
const probe = vi.fn();
10+
const result = await computeSplitEnabled(
11+
{ flagEnabled: false, legacyUrl: "postgres://a", newUrl: "postgres://b" },
12+
{ probe }
13+
);
14+
expect(result).toBe(false);
15+
expect(probe).not.toHaveBeenCalled(); // self-host opens no second connection
16+
});
17+
18+
it("stays single-DB when flag is on but URLs are missing", async () => {
19+
const probe = vi.fn();
20+
expect(await computeSplitEnabled({ flagEnabled: true }, { probe })).toBe(false);
21+
expect(probe).not.toHaveBeenCalled();
22+
});
23+
24+
it("enables split only when flag is on AND sentinel confirms distinct", async () => {
25+
const probe = vi.fn().mockResolvedValue({ distinct: true });
26+
expect(
27+
await computeSplitEnabled(
28+
{ flagEnabled: true, legacyUrl: "postgres://a", newUrl: "postgres://b" },
29+
{ probe }
30+
)
31+
).toBe(true);
32+
});
33+
34+
it("stays single-DB when sentinel reports NOT distinct", async () => {
35+
const probe = vi.fn().mockResolvedValue({ distinct: false, reason: "same DB" });
36+
expect(
37+
await computeSplitEnabled(
38+
{ flagEnabled: true, legacyUrl: "postgres://a", newUrl: "postgres://b" },
39+
{ probe }
40+
)
41+
).toBe(false);
42+
});
43+
44+
// Migration-family unreachability proof: with the flag off the gate returns false and
45+
// no probe runs. Downstream migration-family units are required to early-return on
46+
// !isSplitEnabled(); this unit proves the gate's value, each downstream unit's own test
47+
// proves it honors the gate. (W0-FND-08's "split OFF collapses to a single prisma/$replica
48+
// pair with no second connection opened" depends on this no-probe behavior.)
49+
it("is provably unreachable (no probe) when the flag is off", async () => {
50+
const probe = vi.fn();
51+
expect(
52+
await computeSplitEnabled(
53+
{ flagEnabled: false, legacyUrl: "postgres://a", newUrl: "postgres://b" },
54+
{ probe }
55+
)
56+
).toBe(false);
57+
expect(probe).not.toHaveBeenCalled();
58+
});
59+
});
60+
61+
describe("distinct-DB sentinel (real Postgres)", () => {
62+
it("reports NOT distinct when both URLs hit the same physical cluster", async () => {
63+
const pg = await new PostgreSqlContainer("docker.io/postgres:14").start();
64+
try {
65+
const url = pg.getConnectionUri();
66+
const result = await probeDistinctDatabases(url, url);
67+
expect(result.distinct).toBe(false); // identical URL -> false-split prevented
68+
} finally {
69+
await pg.stop();
70+
}
71+
}, 60_000);
72+
73+
it("reports distinct when URLs hit two separate clusters (PG14 legacy + PG17 new)", async () => {
74+
const legacy = await new PostgreSqlContainer("docker.io/postgres:14").start();
75+
const next = await new PostgreSqlContainer("docker.io/postgres:17").start();
76+
try {
77+
const result = await probeDistinctDatabases(
78+
legacy.getConnectionUri(),
79+
next.getConnectionUri()
80+
);
81+
expect(result.distinct).toBe(true);
82+
} finally {
83+
await legacy.stop();
84+
await next.stop();
85+
}
86+
}, 120_000);
87+
88+
it("fails closed (single-DB) when a DB is unreachable", async () => {
89+
const result = await probeDistinctDatabases(
90+
"postgresql://nouser:nopass@127.0.0.1:1/none",
91+
"postgresql://nouser:nopass@127.0.0.1:2/none"
92+
);
93+
expect(result.distinct).toBe(false);
94+
}, 30_000);
95+
});

pnpm-lock.yaml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)