triggerdotdev
diff --git a/‎.changeset/mollifier-redis-worker-primitives.md‎
Lines changed: 9 additions & 0 deletions b/‎.changeset/mollifier-redis-worker-primitives.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎.server-changes/mollifier-burst-protection.md‎
Lines changed: 6 additions & 0 deletions b/‎.server-changes/mollifier-burst-protection.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎apps/webapp/app/entry.server.tsx‎
Lines changed: 3 additions & 0 deletions b/‎apps/webapp/app/entry.server.tsx‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/webapp/app/env.server.ts‎
Lines changed: 41 additions & 0 deletions b/‎apps/webapp/app/env.server.ts‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎apps/webapp/app/runEngine/services/triggerTask.server.ts‎
Lines changed: 114 additions & 0 deletions b/‎apps/webapp/app/runEngine/services/triggerTask.server.ts‎
Lines changed: 114 additions & 0 deletions
diff --git a/‎apps/webapp/app/services/worker.server.ts‎
Lines changed: 21 additions & 0 deletions b/‎apps/webapp/app/services/worker.server.ts‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎apps/webapp/app/v3/featureFlags.ts‎
Lines changed: 2 additions & 0 deletions b/‎apps/webapp/app/v3/featureFlags.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎apps/webapp/app/v3/mollifier/bufferedTriggerPayload.server.ts‎
Lines changed: 107 additions & 0 deletions b/‎apps/webapp/app/v3/mollifier/bufferedTriggerPayload.server.ts‎
Lines changed: 107 additions & 0 deletions
@@ -0,0 +1,9 @@
+---
+"@trigger.dev/redis-worker": patch
+---
+
+Add MollifierBuffer and MollifierDrainer primitives for trigger burst smoothing.
+
+MollifierBuffer (`accept`, `pop`, `ack`, `requeue`, `fail`, `evaluateTrip`) is a per-env FIFO over Redis with atomic Lua transitions for status tracking. `evaluateTrip` is a sliding-window trip evaluator the webapp gate uses to detect per-env trigger bursts.
+
+MollifierDrainer pops entries through a polling loop with a user-supplied handler. The loop survives transient Redis errors via capped exponential backoff (up to 5s), and per-env pop failures don't poison the rest of the batch — one env's blip is logged and counted as failed for that tick. Rotation is two-level: orgs at the top, envs within each org. The buffer maintains `mollifier:orgs` and `mollifier:org-envs:${orgId}` atomically with per-env queues, so the drainer walks orgs → envs directly without an in-memory cache. The `maxOrgsPerTick` option (default 500) caps how many orgs are scheduled per tick; for each picked org, one env is popped (rotating round-robin within the org). An org with N envs gets the same per-tick scheduling slot as an org with 1 env, so tenant-level drainage throughput is determined by org count rather than env count.
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: feature
+---
+
+Lay the groundwork for an opt-in burst-protection layer on the trigger hot path. This release ships **monitoring only** — operators can observe per-env trigger storms via two opt-in modes, but no trigger calls are diverted or rate-limited yet (active burst smoothing follows in a later release). All new env vars are prefixed `TRIGGER_MOLLIFIER_*` and default off, so existing deployments see no behaviour change. With `TRIGGER_MOLLIFIER_SHADOW_MODE=1`, each trigger evaluates a per-env rate counter and logs `mollifier.would_mollify` when the threshold is crossed. With `TRIGGER_MOLLIFIER_ENABLED=1` plus a per-org `mollifierEnabled` flag, over-threshold triggers are also recorded in a Redis audit buffer alongside the normal `engine.trigger` call, drained by a background no-op consumer. The drainer has its own switch (`TRIGGER_MOLLIFIER_DRAINER_ENABLED`) so multi-replica deployments can pin the polling loop to a single worker service while every replica still produces into the buffer; unset, it inherits `TRIGGER_MOLLIFIER_ENABLED` so single-container self-hosters need only one flag. Drainer misconfiguration (shutdown-timeout reconciliation against `GRACEFUL_SHUTDOWN_TIMEOUT`, or `TRIGGER_MOLLIFIER_ENABLED=1` with no buffer Redis) now throws `MollifierConfigurationError` at boot and crashes the process, so the misconfig surfaces to the orchestrator instead of disappearing into a log line; transient init failures (Redis blip) are still logged-and-swallowed. Emits the `mollifier.decisions` OTel counter for per-env rate visibility.
@@ -6,6 +6,7 @@ import isbot from "isbot";
 import { renderToPipeableStream } from "react-dom/server";
 import { PassThrough } from "stream";
 import * as Worker from "~/services/worker.server";
+import { initMollifierDrainerWorker } from "~/v3/mollifierDrainerWorker.server";
 import { bootstrap } from "./bootstrap";
 import { LocaleContextProvider } from "./components/primitives/LocaleProvider";
 import {
@@ -247,6 +248,8 @@ Worker.init().catch((error) => {
   logError(error);
 });
 
+initMollifierDrainerWorker();
+
 bootstrap().catch((error) => {
   logError(error);
 });
 
@@ -1054,6 +1054,47 @@ const EnvironmentSchema = z
     COMMON_WORKER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
     COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"),
 
+    TRIGGER_MOLLIFIER_ENABLED: z.string().default("0"),
+    // Separate switch for the drainer (consumer side) so it can be split
+    // off onto a dedicated worker service. Unset → inherits
+    // TRIGGER_MOLLIFIER_ENABLED, so single-container self-hosters don't have to
+    // flip two switches. In multi-replica deployments, set this to "0"
+    // explicitly on every replica except the one dedicated drainer
+    // service — otherwise every replica's polling loop races for the
+    // same buffer entries. `TRIGGER_MOLLIFIER_ENABLED` is still the master kill
+    // switch; setting this to "1" while `TRIGGER_MOLLIFIER_ENABLED` is "0" is a
+    // no-op because the gate-side singleton refuses to construct a
+    // buffer when the system is off.
+    TRIGGER_MOLLIFIER_DRAINER_ENABLED: z.string().default(process.env.TRIGGER_MOLLIFIER_ENABLED ?? "0"),
+    TRIGGER_MOLLIFIER_SHADOW_MODE: z.string().default("0"),
+    TRIGGER_MOLLIFIER_REDIS_HOST: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.REDIS_HOST),
+    TRIGGER_MOLLIFIER_REDIS_PORT: z.coerce
+      .number()
+      .optional()
+      .transform(
+        (v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined),
+      ),
+    TRIGGER_MOLLIFIER_REDIS_USERNAME: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.REDIS_USERNAME),
+    TRIGGER_MOLLIFIER_REDIS_PASSWORD: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.REDIS_PASSWORD),
+    TRIGGER_MOLLIFIER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
+    TRIGGER_MOLLIFIER_TRIP_WINDOW_MS: z.coerce.number().int().positive().default(200),
+    TRIGGER_MOLLIFIER_TRIP_THRESHOLD: z.coerce.number().int().positive().default(100),
+    TRIGGER_MOLLIFIER_HOLD_MS: z.coerce.number().int().positive().default(500),
+    TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY: z.coerce.number().int().positive().default(50),
+    TRIGGER_MOLLIFIER_ENTRY_TTL_S: z.coerce.number().int().positive().default(600),
+    TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS: z.coerce.number().int().positive().default(3),
+    TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS: z.coerce.number().int().positive().default(30_000),
+    TRIGGER_MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK: z.coerce.number().int().positive().default(500),
+
     BATCH_TRIGGER_PROCESS_JOB_VISIBILITY_TIMEOUT_MS: z.coerce
       .number()
       .int()
 
@@ -40,6 +40,18 @@ import type {
   TriggerTaskRequest,
   TriggerTaskValidator,
 } from "../types";
+import { env } from "~/env.server";
+import {
+  evaluateGate as defaultEvaluateGate,
+  type GateOutcome,
+  type MollifierEvaluateGate,
+} from "~/v3/mollifier/mollifierGate.server";
+import {
+  getMollifierBuffer as defaultGetMollifierBuffer,
+  type MollifierGetBuffer,
+} from "~/v3/mollifier/mollifierBuffer.server";
+import { buildBufferedTriggerPayload } from "~/v3/mollifier/bufferedTriggerPayload.server";
+import { serialiseSnapshot } from "@trigger.dev/redis-worker";
 import { QueueSizeLimitExceededError, ServiceValidationError } from "~/v3/services/common.server";
 
 class NoopTriggerRacepointSystem implements TriggerRacepointSystem {
@@ -59,6 +71,14 @@ export class RunEngineTriggerTaskService {
   private readonly traceEventConcern: TraceEventConcern;
   private readonly triggerRacepointSystem: TriggerRacepointSystem;
   private readonly metadataMaximumSize: number;
+  // Mollifier hooks are DI'd so tests can drive the call-site's mollify branch
+  // deterministically (stub the gate to return mollify, inject a real or fake
+  // buffer, force the global-enabled predicate to true so the call site
+  // doesn't short-circuit on an unset env). In production all three default
+  // to the live module-level singletons + env read.
+  private readonly evaluateGate: MollifierEvaluateGate;
+  private readonly getMollifierBuffer: MollifierGetBuffer;
+  private readonly isMollifierGloballyEnabled: () => boolean;
 
   constructor(opts: {
     prisma: PrismaClientOrTransaction;
@@ -71,6 +91,9 @@ export class RunEngineTriggerTaskService {
     tracer: Tracer;
     metadataMaximumSize: number;
     triggerRacepointSystem?: TriggerRacepointSystem;
+    evaluateGate?: MollifierEvaluateGate;
+    getMollifierBuffer?: MollifierGetBuffer;
+    isMollifierGloballyEnabled?: () => boolean;
   }) {
     this.prisma = opts.prisma;
     this.engine = opts.engine;
@@ -82,6 +105,10 @@ export class RunEngineTriggerTaskService {
     this.traceEventConcern = opts.traceEventConcern;
     this.metadataMaximumSize = opts.metadataMaximumSize;
     this.triggerRacepointSystem = opts.triggerRacepointSystem ?? new NoopTriggerRacepointSystem();
+    this.evaluateGate = opts.evaluateGate ?? defaultEvaluateGate;
+    this.getMollifierBuffer = opts.getMollifierBuffer ?? defaultGetMollifierBuffer;
+    this.isMollifierGloballyEnabled =
+      opts.isMollifierGloballyEnabled ?? (() => env.TRIGGER_MOLLIFIER_ENABLED === "1");
   }
 
   public async call({
@@ -316,6 +343,25 @@ export class RunEngineTriggerTaskService {
         taskKind: taskKind ?? "STANDARD",
       };
 
+      // Short-circuit before the gate when mollifier is globally off (the
+      // default for every deployment that hasn't opted in). Avoids the
+      // GateInputs allocation, the deps spread inside `evaluateGate`, and
+      // the `mollifier.decisions{outcome=pass_through}` OTel increment on
+      // every trigger — `triggerTask` is the highest-throughput code path
+      // in the system. The check goes through a DI'd predicate so unit
+      // tests that inject a custom `evaluateGate` can also override the
+      // gate-on check (the default reads `env.TRIGGER_MOLLIFIER_ENABLED`,
+      // which is "0" in CI where no .env file is present).
+      const mollifierOutcome: GateOutcome | null = this.isMollifierGloballyEnabled()
+        ? await this.evaluateGate({
+            envId: environment.id,
+            orgId: environment.organizationId,
+            taskId,
+            orgFeatureFlags:
+              (environment.organization.featureFlags as Record<string, unknown> | null) ?? null,
+          })
+        : null;
+
       try {
         return await this.traceEventConcern.traceRun(
           triggerRequest,
@@ -328,6 +374,74 @@ export class RunEngineTriggerTaskService {
 
             const payloadPacket = await this.payloadProcessor.process(triggerRequest);
 
+            // Phase 1 dual-write: if the org has the mollifier feature flag
+            // enabled and the per-env trip evaluator says divert, write the
+            // canonical replay payload to the buffer AND continue through
+            // engine.trigger as normal. The buffer entry is an audit/preview
+            // copy; the drainer's no-op handler consumes it to prove the
+            // dequeue mechanism works. Phase 2 will replace engine.trigger
+            // (below) with a synthesised 200 response and rely on the
+            // drainer to perform the Postgres write via replay.
+            if (mollifierOutcome?.action === "mollify") {
+              const buffer = this.getMollifierBuffer();
+              if (buffer) {
+                const canonicalPayload = buildBufferedTriggerPayload({
+                  runFriendlyId,
+                  taskId,
+                  envId: environment.id,
+                  envType: environment.type,
+                  envSlug: environment.slug,
+                  orgId: environment.organizationId,
+                  orgSlug: environment.organization.slug,
+                  projectId: environment.projectId,
+                  projectRef: environment.project.externalRef,
+                  body,
+                  idempotencyKey: idempotencyKey ?? null,
+                  idempotencyKeyExpiresAt: idempotencyKey
+                    ? idempotencyKeyExpiresAt ?? null
+                    : null,
+                  tags,
+                  parentRunFriendlyId: parentRun?.friendlyId ?? null,
+                  traceContext: event.traceContext,
+                  triggerSource,
+                  triggerAction,
+                  serviceOptions: options,
+                  createdAt: new Date(),
+                });
+
+                try {
+                  const serialisedPayload = serialiseSnapshot(canonicalPayload);
+                  await buffer.accept({
+                    runId: runFriendlyId,
+                    envId: environment.id,
+                    orgId: environment.organizationId,
+                    payload: serialisedPayload,
+                  });
+                  // Light log on the hot path — keep this synchronous work
+                  // O(1) per trigger. The drainer computes the payload hash
+                  // off-path; operators correlate `mollifier.buffered` →
+                  // `mollifier.drained` by runId.
+                  logger.debug("mollifier.buffered", {
+                    runId: runFriendlyId,
+                    envId: environment.id,
+                    orgId: environment.organizationId,
+                    taskId,
+                    payloadBytes: serialisedPayload.length,
+                  });
+                } catch (err) {
+                  // Fail-open: buffer write must never block the customer's
+                  // trigger. engine.trigger below is the primary write path
+                  // in Phase 1 — the customer still gets a valid run.
+                  logger.error("mollifier.buffer_accept_failed", {
+                    runId: runFriendlyId,
+                    envId: environment.id,
+                    taskId,
+                    err: err instanceof Error ? err.message : String(err),
+                  });
+                }
+              }
+            }
+
             const taskRun = await this.engine.trigger(
               {
                 friendlyId: runFriendlyId,
 
@@ -1,3 +1,24 @@
+/**
+ * ⚠️ LEGACY — Graphile-worker / ZodWorker setup. Do not touch.
+ *
+ * This file wires the original background-job system the webapp was
+ * built on (`@internal/zod-worker` → graphile-worker → Postgres). It is
+ * now in deprecation mode: every task in `workerCatalog` below is
+ * annotated with `@deprecated, moved to <new home>` and the live jobs
+ * for new features all run on `@trigger.dev/redis-worker` instead.
+ *
+ * Where to put new things:
+ *   - Background jobs / queues → use redis-worker, alongside
+ *     `~/v3/commonWorker.server.ts`, `~/v3/alertsWorker.server.ts`, or
+ *     `~/v3/batchTriggerWorker.server.ts`.
+ *   - Run lifecycle → `@internal/run-engine` via `~/v3/runEngine.server`.
+ *   - Custom polling loops with their own Redis connection → keep them
+ *     in their own lifecycle module (e.g. `~/v3/mollifierDrainerWorker.server.ts`)
+ *     and wire the bootstrap from `entry.server.tsx`. Don't reach into
+ *     `init()` below.
+ *
+ * Edit only when removing legacy paths.
+ */
 import { ZodWorker } from "@internal/zod-worker";
 import { DeliverEmailSchema } from "emails";
 import { z } from "zod";
 
@@ -8,6 +8,7 @@ export const FEATURE_FLAG = {
   hasAiAccess: "hasAiAccess",
   hasComputeAccess: "hasComputeAccess",
   hasPrivateConnections: "hasPrivateConnections",
+  mollifierEnabled: "mollifierEnabled",
 } as const;
 
 export const FeatureFlagCatalog = {
@@ -18,6 +19,7 @@ export const FeatureFlagCatalog = {
   [FEATURE_FLAG.hasAiAccess]: z.coerce.boolean(),
   [FEATURE_FLAG.hasComputeAccess]: z.coerce.boolean(),
   [FEATURE_FLAG.hasPrivateConnections]: z.coerce.boolean(),
+  [FEATURE_FLAG.mollifierEnabled]: z.coerce.boolean(),
 };
 
 export type FeatureFlagKey = keyof typeof FeatureFlagCatalog;
 
@@ -0,0 +1,107 @@
+import type { TriggerTaskRequestBody } from "@trigger.dev/core/v3";
+import type { TriggerTaskServiceOptions } from "~/v3/services/triggerTask.server";
+
+// Canonical payload shape written to the mollifier buffer when the gate
+// decides to mollify a trigger. Phase 1 ALSO calls engine.trigger directly
+// (dual-write) so this is currently an audit/preview record. Phase 2 will
+// make the buffer the primary write path: the drainer's handler will read
+// this payload and replay it through engine.trigger to create the run in
+// Postgres, and read-fallback endpoints will synthesise a Run view from it
+// while it is still QUEUED.
+//
+// CONTRACT: this shape must contain everything needed for Phase 2's
+// drainer-replay to reconstruct an equivalent engine.trigger call. Phase 1
+// emits it to logs; Phase 2 will serialise it into Redis and rebuild it on
+// the drain side. Keep it serialisable — no functions, no class instances.
+export type BufferedTriggerPayload = {
+  runFriendlyId: string;
+
+  // Routing identifiers — let the drainer re-fetch full AuthenticatedEnvironment
+  // at replay time rather than embedding it in the payload.
+  envId: string;
+  envType: string;
+  envSlug: string;
+  orgId: string;
+  orgSlug: string;
+  projectId: string;
+  projectRef: string;
+
+  // Task identifier — looked up against the locked BackgroundWorkerTask
+  // at replay time to recover task-defaults.
+  taskId: string;
+
+  // Customer-supplied trigger body (payload, options, context).
+  body: TriggerTaskRequestBody;
+
+  // Resolved values from upstream concerns. The drainer should NOT re-resolve
+  // these — that would create a second idempotency-key check, etc.
+  idempotencyKey: string | null;
+  idempotencyKeyExpiresAt: string | null;
+  tags: string[];
+
+  // Parent/root linkage for nested triggers.
+  parentRunFriendlyId: string | null;
+
+  // Trace context — propagates the original triggering span across the
+  // buffer→drain boundary so the run's lifecycle stays under one trace.
+  traceContext: Record<string, unknown>;
+
+  // Annotations + service options that influence routing/replay.
+  triggerSource: string;
+  triggerAction: string;
+  serviceOptions: TriggerTaskServiceOptions;
+
+  // Wall-clock instants relevant to the run.
+  createdAt: string;
+};
+
+// Assemble the canonical payload from the inputs available at the point
+// `evaluateGate` returns "mollify" in `RunEngineTriggerTaskService.call`.
+// All fields must be derivable from data already in scope at that call site;
+// nothing should require an extra DB lookup.
+export function buildBufferedTriggerPayload(input: {
+  runFriendlyId: string;
+  taskId: string;
+  envId: string;
+  envType: string;
+  envSlug: string;
+  orgId: string;
+  orgSlug: string;
+  projectId: string;
+  projectRef: string;
+  body: TriggerTaskRequestBody;
+  idempotencyKey: string | null;
+  idempotencyKeyExpiresAt: Date | null;
+  tags: string[];
+  parentRunFriendlyId: string | null;
+  traceContext: Record<string, unknown>;
+  triggerSource: string;
+  triggerAction: string;
+  serviceOptions: TriggerTaskServiceOptions;
+  createdAt: Date;
+}): BufferedTriggerPayload {
+  return {
+    runFriendlyId: input.runFriendlyId,
+    envId: input.envId,
+    envType: input.envType,
+    envSlug: input.envSlug,
+    orgId: input.orgId,
+    orgSlug: input.orgSlug,
+    projectId: input.projectId,
+    projectRef: input.projectRef,
+    taskId: input.taskId,
+    body: input.body,
+    idempotencyKey: input.idempotencyKey,
+    idempotencyKeyExpiresAt:
+      input.idempotencyKey && input.idempotencyKeyExpiresAt
+        ? input.idempotencyKeyExpiresAt.toISOString()
+        : null,
+    tags: input.tags,
+    parentRunFriendlyId: input.parentRunFriendlyId,
+    traceContext: input.traceContext,
+    triggerSource: input.triggerSource,
+    triggerAction: input.triggerAction,
+    serviceOptions: input.serviceOptions,
+    createdAt: input.createdAt.toISOString(),
+  };
+}