Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/mollifier-tag-cap.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@trigger.dev/redis-worker": patch
---

Mollifier `mutateSnapshot` now enforces a tag cap: an `append_tags` patch carrying `maxTags` returns `"limit_exceeded"` (writing nothing) when the deduped tag count would exceed the limit, so a buffered run can't accumulate more tags via the tags API than the trigger validator allows at creation.
6 changes: 6 additions & 0 deletions .server-changes/mollifier-mutations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
area: webapp
type: feature
---

Mollifier API mutations on buffered runs: tag, metadata, replay, reschedule, cancel, and idempotency-key reset via a buffer-snapshot fallback. When a mutation races a mid-drain run, the wait-and-bounce loop watches the buffer entry in Redis (cheap) and reads the primary exactly once for the actual mutation, instead of polling the writer on a fixed cadence; polls use jittered exponential backoff.
198 changes: 189 additions & 9 deletions apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,136 @@
import type { LoaderFunctionArgs } from "@remix-run/server-runtime";
import { json } from "@remix-run/server-runtime";
import { tryCatch } from "@trigger.dev/core/utils";
import type { RunMetadataChangeOperation } from "@trigger.dev/core/v3/schemas";
import { UpdateMetadataRequestBody } from "@trigger.dev/core/v3";
import { z } from "zod";
import { $replica } from "~/db.server";
import type { AuthenticatedEnvironment } from "~/services/apiAuth.server";
import { authenticateApiRequest } from "~/services/apiAuth.server";
import { logger } from "~/services/logger.server";
import { updateMetadataService } from "~/services/metadata/updateMetadataInstance.server";
import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
import { ServiceValidationError } from "~/v3/services/common.server";
import { applyMetadataMutationToBufferedRun } from "~/v3/mollifier/applyMetadataMutation.server";
import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.server";

const ParamsSchema = z.object({
runId: z.string(),
});

// GET handler added to fix the pre-existing route bug where this URL
// returned a Remix "no loader" 400 — only PUT (update) was exported, so
// GET had no handler. Returns `{ metadata, metadataType }` from either
// the Postgres row or the mollifier buffer snapshot.
export async function loader({ request, params }: LoaderFunctionArgs) {
const authenticationResult = await authenticateApiRequest(request);
if (!authenticationResult) {
return json({ error: "Invalid or Missing API Key" }, { status: 401 });
}

const parsed = ParamsSchema.safeParse(params);
if (!parsed.success) {
return json({ error: "Invalid or missing run ID" }, { status: 400 });
}

const env = authenticationResult.environment;

const pgRun = await $replica.taskRun.findFirst({
where: { friendlyId: parsed.data.runId, runtimeEnvironmentId: env.id },
select: { metadata: true, metadataType: true },
});
if (pgRun) {
return json({ metadata: pgRun.metadata, metadataType: pgRun.metadataType }, { status: 200 });
}

const buffered = await findRunByIdWithMollifierFallback({
runId: parsed.data.runId,
environmentId: env.id,
organizationId: env.organizationId,
});
if (buffered) {
return json(
{
metadata: buffered.metadata ?? null,
metadataType: buffered.metadataType ?? "application/json",
},
{ status: 200 }
);
}

return json({ error: "Run not found" }, { status: 404 });
}

// Route parent/root operations to the existing PG service by directly
// invoking it against the parent/root runId. The service ingests via
// its batching worker, which targets PG by id. If the parent/root is
// itself buffered we recurse through our buffered-mutation helper.
// `_ingestion_only` flag: a synthetic body that has the operations
// promoted to top-level `operations` so the service applies them to
// `targetRunId` directly.
async function routeOperationsToRun(
targetRunId: string | undefined,
operations: RunMetadataChangeOperation[] | undefined,
env: AuthenticatedEnvironment
): Promise<void> {
if (!targetRunId || !operations || operations.length === 0) return;

// Try PG first via the existing service (this is how parent/root
// operations have always landed; preserve that). Accepts the full
// AuthenticatedEnvironment so we don't have to recover the unsafe
// `as unknown` cast that the previous narrowed `{ id, organizationId }`
// signature forced on us.
//
// Two non-success outcomes from `call`:
// * throws — PG threw (e.g. "Cannot update metadata for a completed
// run", or a transient PG outage).
// * resolves with undefined — PG row didn't exist (the target may be
// buffered, not yet materialised).
// Either way we want to try the buffer fallback below; treating the
// undefined-return as success would make the fallback unreachable.
const [error, result] = await tryCatch(
updateMetadataService.call(targetRunId, { operations }, env)
);
Comment thread
d-cs marked this conversation as resolved.
if (!error && result !== undefined) return;

Comment thread
d-cs marked this conversation as resolved.
if (error) {
// PG threw — auxiliary op, stay best-effort and don't surface this
// to the caller (the caller's primary mutation already landed). But
// warn so a genuine PG outage on these ops isn't invisible.
logger.warn("metadata route: parent/root PG op failed", {
targetRunId,
error: error instanceof Error ? error.message : String(error),
});
}

// Buffer fallback only makes sense for friendlyId-keyed entries. The
// PG-side parent/root IDs are internal cuids; the buffer keys entries
// by friendlyId, so passing the internal id would silently no-op.
// Skip explicitly — a buffered child's parent is always materialised
// in PG already (a buffered run hasn't executed, so it can't have
// triggered the child), so the buffered-parent branch isn't actually
// reachable. Treating the no-op as intentional rather than incidental.
if (!targetRunId.startsWith("run_")) return;

// Best-effort buffer fallback. Wrap so a transient Redis throw on
// this auxiliary op can't 500 the request after the primary mutation
// already succeeded.
const [bufferError] = await tryCatch(
applyMetadataMutationToBufferedRun({
runId: targetRunId,
environmentId: env.id,
organizationId: env.organizationId,
body: { operations },
})
);
if (bufferError) {
logger.warn("metadata route: buffer fallback for parent/root op failed", {
targetRunId,
error: bufferError instanceof Error ? bufferError.message : String(bufferError),
});
}
}

const { action } = createActionApiRoute(
{
params: ParamsSchema,
Expand All @@ -18,23 +139,82 @@ const { action } = createActionApiRoute(
method: "PUT",
},
async ({ authentication, body, params }) => {
const [error, result] = await tryCatch(
updateMetadataService.call(params.runId, body, authentication.environment)
);
const env = authentication.environment;
const runId = params.runId;

if (error) {
if (error instanceof ServiceValidationError) {
return json({ error: error.message }, { status: error.status ?? 422 });
// PG-canonical path. If the run is in PG, the existing service
// owns the full request shape including parent/root operations,
// metadataVersion CAS, batching, validation — none of which the
// buffer side needs to reimplement.
const [pgError, pgResult] = await tryCatch(
updateMetadataService.call(runId, body, env)
);
if (pgError) {
if (pgError instanceof ServiceValidationError) {
return json({ error: pgError.message }, { status: pgError.status ?? 422 });
}

return json({ error: "Internal Server Error" }, { status: 500 });
}
if (pgResult) {
return json(pgResult, { status: 200 });
}

// PG miss. Target run is either buffered or genuinely absent.
const bufferOutcome = await applyMetadataMutationToBufferedRun({
runId,
environmentId: env.id,
organizationId: env.organizationId,
body: { metadata: body.metadata, operations: body.operations },
});

if (!result) {
if (bufferOutcome.kind === "not_found") {
return json({ error: "Task Run not found" }, { status: 404 });
}
if (bufferOutcome.kind === "busy") {
// Entry is materialising. Best path is to retry the PG call —
// the row may be visible now. We don't waste a roundtrip in
// the happy path, but a 503 here would be customer-visible
// breakage for legitimately-burst workloads. Hand back 503 with
// a retry hint; SDK retry policy converges.
return json({ error: "Run materialising, retry shortly" }, { status: 503 });
}
if (bufferOutcome.kind === "version_exhausted") {
// Pathological contention — many concurrent metadata writers on
// the same buffered runId. Surface as 503 rather than silently
// dropping the request.
return json({ error: "Metadata write contention; retry shortly" }, { status: 503 });
}

// Buffered metadata mutation succeeded. Fan parent/root operations
// out to their respective runs (parent/root are typically PG-
// materialised by the time the child is buffered, so the existing
// service handles them; if they're also buffered, the helper
// recurses through the buffered mutation path).
const bufferedEntry = await findRunByIdWithMollifierFallback({
runId,
environmentId: env.id,
organizationId: env.organizationId,
});
if (bufferedEntry) {
await Promise.all([
routeOperationsToRun(bufferedEntry.parentTaskRunId, body.parentOperations, env),
// The PG service routes rootOperations to
Comment thread
d-cs marked this conversation as resolved.
// `taskRun.rootTaskRun?.id ?? taskRun.id` — the actual root, not
// the parent. The snapshot carries the root's *friendlyId*
// (parentTaskRunId is an internal id; root is friendlyId because
// it's what the engine passes through). Use it; if absent,
// route to the run itself (matches PG's self-fallback) rather
// than misrouting to the parent for grandchild → child → root
// hierarchies.
routeOperationsToRun(
bufferedEntry.rootTaskRunFriendlyId ?? runId,
body.rootOperations,
env,
),
]);
}

return json(result, { status: 200 });
return json({ metadata: bufferOutcome.newMetadata }, { status: 200 });
}
);

Expand Down
101 changes: 58 additions & 43 deletions apps/webapp/app/routes/api.v1.runs.$runId.tags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@ import { z } from "zod";
import { prisma } from "~/db.server";
import { MAX_TAGS_PER_RUN } from "~/models/taskRunTag.server";
import { authenticateApiRequest } from "~/services/apiAuth.server";
import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server";
import { logger } from "~/services/logger.server";
import { mutateWithFallback } from "~/v3/mollifier/mutateWithFallback.server";

const ParamsSchema = z.object({
runId: z.string(),
});

export async function action({ request, params }: ActionFunctionArgs) {
// Ensure this is a POST request
if (request.method.toUpperCase() !== "POST") {
return { status: 405, body: "Method Not Allowed" };
}

// Authenticate the request
const authenticationResult = await authenticateApiRequest(request);
if (!authenticationResult) {
return json({ error: "Invalid or Missing API Key" }, { status: 401 });
Expand All @@ -32,59 +32,74 @@ export async function action({ request, params }: ActionFunctionArgs) {

try {
const anyBody = await request.json();

const body = AddTagsRequestBody.safeParse(anyBody);
if (!body.success) {
return json({ error: "Invalid request body", issues: body.error.issues }, { status: 400 });
}

const run = await prisma.taskRun.findFirst({
where: {
friendlyId: parsedParams.data.runId,
runtimeEnvironmentId: authenticationResult.environment.id,
},
select: {
runTags: true,
},
});

const existingTags = run?.runTags ?? [];

//remove duplicate tags from the new tags
const bodyTags = typeof body.data.tags === "string" ? [body.data.tags] : body.data.tags;
const newTags = bodyTags.filter((tag) => {
if (tag.trim().length === 0) return false;
return !existingTags.includes(tag);
});

if (existingTags.length + newTags.length > MAX_TAGS_PER_RUN) {
return json(
{
error: `Runs can only have ${MAX_TAGS_PER_RUN} tags, you're trying to set ${
existingTags.length + newTags.length
}. These tags have not been set: ${newTags.map((t) => `'${t}'`).join(", ")}.`,
},
{ status: 422 }
);
}
const nonEmptyTags = bodyTags.filter((t) => t.trim().length > 0);

if (newTags.length === 0) {
if (nonEmptyTags.length === 0) {
return json({ message: "No new tags to add" }, { status: 200 });
}

await prisma.taskRun.update({
where: {
friendlyId: parsedParams.data.runId,
runtimeEnvironmentId: authenticationResult.environment.id,
},
data: {
runTags: {
push: newTags,
},
const env = authenticationResult.environment;
const outcome = await mutateWithFallback<Response>({
runId: parsedParams.data.runId,
environmentId: env.id,
organizationId: env.organizationId,
bufferPatch: { type: "append_tags", tags: nonEmptyTags, maxTags: MAX_TAGS_PER_RUN },
pgMutation: async (taskRun) => {
const existing = taskRun.runTags ?? [];
const newTags = nonEmptyTags.filter((t) => !existing.includes(t));

if (existing.length + newTags.length > MAX_TAGS_PER_RUN) {
return json(
{
error: `Runs can only have ${MAX_TAGS_PER_RUN} tags, you're trying to set ${
existing.length + newTags.length
}. These tags have not been set: ${newTags.map((t) => `'${t}'`).join(", ")}.`,
},
{ status: 422 }
);
}
if (newTags.length === 0) {
return json({ message: "No new tags to add" }, { status: 200 });
}
await prisma.taskRun.update({
where: {
id: taskRun.id,
runtimeEnvironmentId: env.id,
},
data: { runTags: { push: newTags } },
});
return json({ message: `Successfully set ${newTags.length} new tags.` }, { status: 200 });
},
// Buffer-applied patch path. The mutateSnapshot Lua deduplicates
// against existing snapshot tags atomically and enforces
// MAX_TAGS_PER_RUN via the `maxTags` we pass in `bufferPatch` —
// matching the PG-path cap above so a buffered run can't exceed the
// limit the trigger validator applies at creation.
synthesisedResponse: () =>
json({ message: `Successfully set ${nonEmptyTags.length} new tags.` }, { status: 200 }),
// Buffer rejected the append because it would exceed the cap. We
// don't know the exact deduped overflow count here (the Lua does),
// so report the limit rather than a precise "trying to set N".
rejectedResponse: () =>
json(
{ error: `Runs can only have ${MAX_TAGS_PER_RUN} tags.` },
{ status: 422 }
),
abortSignal: getRequestAbortSignal(),
});

return json({ message: `Successfully set ${newTags.length} new tags.` }, { status: 200 });
if (outcome.kind === "not_found") {
return json({ error: "Run not found" }, { status: 404 });
}
if (outcome.kind === "timed_out") {
return json({ error: "Run materialisation timed out" }, { status: 503 });
}
return outcome.response;
} catch (error) {
logger.error("Failed to add run tags", { error });
return json({ error: "Something went wrong, please try again." }, { status: 500 });
Expand Down
Loading