simstudioai
diff --git a/‎apps/docs/content/docs/en/enterprise/data-drains.mdx‎
Lines changed: 165 additions & 0 deletions b/‎apps/docs/content/docs/en/enterprise/data-drains.mdx‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎apps/docs/content/docs/en/enterprise/index.mdx‎
Lines changed: 7 additions & 0 deletions b/‎apps/docs/content/docs/en/enterprise/index.mdx‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎apps/docs/content/docs/en/enterprise/meta.json‎
Lines changed: 9 additions & 1 deletion b/‎apps/docs/content/docs/en/enterprise/meta.json‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎apps/sim/app/api/cron/run-data-drains/route.ts‎
Lines changed: 22 additions & 0 deletions b/‎apps/sim/app/api/cron/run-data-drains/route.ts‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎apps/sim/app/api/organizations/[id]/data-drains/[drainId]/route.ts‎
Lines changed: 173 additions & 0 deletions b/‎apps/sim/app/api/organizations/[id]/data-drains/[drainId]/route.ts‎
Lines changed: 173 additions & 0 deletions
@@ -0,0 +1,165 @@
+---
+title: Data Drains
+description: Continuously export workflow logs, audit logs, and Mothership data to your own S3 bucket or HTTPS endpoint on a schedule
+---
+
+import { FAQ } from '@/components/ui/faq'
+
+Data Drains let organization owners and admins on Enterprise plans continuously export Sim data to a destination they control — a customer-owned S3 bucket or an HTTPS webhook. A drain runs on a schedule, picks up only new rows since its last successful run, and writes them as NDJSON to the destination.
+
+Drains pair naturally with [Data Retention](/enterprise/data-retention): drain into long-term storage first, then let retention safely delete from Sim.
+
+---
+
+## Setup
+
+Go to **Settings → Enterprise → Data Drains** in your workspace, then click **New drain**.
+
+Each drain has four pieces:
+
+1. A **source** — the category of data to export
+2. A **destination** — where the data goes
+3. A **schedule** — how often it runs
+4. A **name** — unique within your organization
+
+---
+
+## Sources
+
+A drain exports exactly one source. To export multiple sources, create multiple drains.
+
+| Source | Description |
+|---|---|
+| **Workflow logs** | Workflow execution records (one row per execution, only after the run reaches a terminal state). |
+| **Job logs** | Background job records (deployed APIs, schedules, webhooks). Only terminal-state rows are exported. |
+| **Audit logs** | Organization and workspace audit events — logins, permission changes, resource creation/deletion, drain configuration changes. |
+| **Copilot chats** | Mothership chat history. |
+| **Copilot runs** | Mothership run records (terminal state only). |
+
+Each row is delivered as a single line of NDJSON. The shape of each row is part of the public schema and stable across versions; every row carries an `id` field that downstream consumers can use to dedupe.
+
+---
+
+## Destinations
+
+### Amazon S3 (or any S3-compatible store)
+
+Writes one NDJSON object per delivered chunk to your bucket.
+
+- **Bucket** — the bucket name. Must already exist; Sim does not create buckets.
+- **Region** — AWS region (e.g. `us-east-1`).
+- **Prefix** *(optional)* — folder path inside the bucket. Trailing slash optional.
+- **Access key ID / Secret access key** — IAM credentials with `s3:PutObject` on the bucket. The "Test connection" button performs a real write probe to verify, then deletes it.
+- **Endpoint** *(optional)* — for non-AWS stores like MinIO, Cloudflare R2, or GCS S3-interop. Leave blank for AWS S3.
+- **Force path-style** *(optional)* — required for MinIO/Ceph, must be off for AWS S3 and R2.
+
+Object keys are deterministic:
+
+```
+{prefix}/{source}/{drainId}/{yyyy}/{mm}/{dd}/{runId}-{seq}.ndjson
+```
+
+Objects are written with `AES256` server-side encryption.
+
+### HTTPS Webhook
+
+POSTs each chunk as NDJSON to your endpoint.
+
+- **URL** — must be HTTPS. Sim resolves the hostname and refuses to deliver to private, loopback, or cloud-metadata IPs. The resolved IP is pinned for the duration of a run to prevent DNS rebinding.
+- **Signing secret** — shared secret used for HMAC-SHA256 signing.
+- **Bearer token** *(optional)* — sent as `Authorization: Bearer <token>`.
+- **Signature header name** *(optional)* — defaults to `X-Sim-Signature`.
+
+Each request includes:
+
+```
+Content-Type: application/x-ndjson
+User-Agent: Sim-DataDrain/1.0
+X-Sim-Timestamp: <unix-seconds>
+X-Sim-Signature-Version: v1
+X-Sim-Signature: t=<unix-seconds>,v1=<hex(hmac-sha256)>
+X-Sim-Drain-Id: <drain id>
+X-Sim-Run-Id: <run id>
+X-Sim-Source: <source name>
+X-Sim-Sequence: <chunk index>
+X-Sim-Row-Count: <rows in this chunk>
+Idempotency-Key: <runId>-<sequence>
+```
+
+The signature is computed as `HMAC-SHA256(secret, "${timestamp}.${body}")` and serialized as `t=<timestamp>,v1=<hex>`. Verify by recomputing over the same string and rejecting timestamps older than ~5 minutes — this defends against captured-request replay attacks.
+
+Failed deliveries retry up to 3 times with exponential backoff (500ms, 1s, 2s with ±20% jitter), respecting `Retry-After` on 429/503. Non-retryable 4xx responses fail the run immediately.
+
+---
+
+## Schedule
+
+| Cadence | Drain runs |
+|---|---|
+| **Hourly** | Once per hour, on the dispatcher tick. |
+| **Daily** | Once per day. |
+
+You can also disable a drain with the **Enabled** toggle (it stops running but is preserved), or trigger an out-of-schedule run with **Run now** on any drain row.
+
+---
+
+## Delivery semantics
+
+Drains use an **opaque cursor** that advances only on full success. If a delivery fails partway through a run, the cursor is unchanged and the next run replays from the last successful position.
+
+This is **at-least-once delivery**. Combined with the `id` field on every row and the `Idempotency-Key` header on every webhook chunk, downstream systems can dedupe deterministically.
+
+The **last 10 runs** for each drain are visible by expanding its row in the settings page, with status, row count, bytes written, destination locator (`s3://...` or webhook URL), and the error message if it failed.
+
+---
+
+## Security
+
+- Destination credentials are encrypted at rest using the same key-rotation–aware encryption that protects OAuth tokens.
+- Credentials are **never** returned by the Sim API after creation. Updates accept new credentials; omitting them leaves the existing encrypted blob in place.
+- Webhook URLs are SSRF-validated: HTTPS-only, no private/loopback/metadata IPs, with the resolved IP pinned to defeat DNS rebinding.
+- Every create, update, delete, manual run, and test-connection call is recorded in the [Audit Log](/enterprise/audit-logs).
+
+---
+
+<FAQ items={[
+  {
+    question: "Who can configure data drains?",
+    answer: "Only organization owners and admins can create, edit, run, or delete drains. On Sim Cloud, the organization must be on an Enterprise plan."
+  },
+  {
+    question: "Will drained data be duplicated if a run fails?",
+    answer: "The drain cursor only advances on overall success, so a failure replays the same chunks on the next run. Every row has a stable `id` field and every webhook chunk has an `Idempotency-Key` header so receivers can dedupe."
+  },
+  {
+    question: "Can I export multiple sources to the same destination?",
+    answer: "Yes — create one drain per source, all pointing at the same bucket or endpoint. S3 destinations namespace by source automatically; webhook receivers can branch on the `X-Sim-Source` header."
+  },
+  {
+    question: "Does deleting a drain delete the data already exported?",
+    answer: "No. Deletion only removes the drain's configuration and its run history from Sim. Data already written to your bucket or sent to your webhook is yours and is unaffected."
+  },
+  {
+    question: "What happens if my credentials stop working mid-run?",
+    answer: "The run fails, the drain cursor does not advance, and the failed run is recorded with the error. Once you fix the credentials with an Update or by re-creating the drain, the next run replays from where the last successful run left off."
+  },
+  {
+    question: "What format is the data in?",
+    answer: "NDJSON — newline-delimited JSON, one row per line. Each chunk is a single S3 object or a single POST body."
+  }
+]} />
+
+---
+
+## Self-hosted setup
+
+### Environment variables
+
+```bash
+DATA_DRAINS_ENABLED=true
+NEXT_PUBLIC_DATA_DRAINS_ENABLED=true
+```
+
+`NEXT_PUBLIC_DATA_DRAINS_ENABLED` shows the **Settings → Enterprise → Data Drains** page in the UI. `DATA_DRAINS_ENABLED` is reserved for server-side feature gating on self-hosted deployments. Both should be set to `true` together.
+
+Data Drains otherwise rely on the standard Trigger.dev background job infrastructure used elsewhere in Sim — no additional setup is required. The cron dispatcher runs hourly and fans out due drains as background jobs.
@@ -59,6 +59,12 @@ Configure how long execution logs, soft-deleted resources, and Mothership data a
 
 ---
 
+## Data Drains
+
+Continuously export workflow logs, audit logs, and Mothership data to a customer-owned S3 bucket or HTTPS webhook on a schedule. See the [data drains guide](/docs/enterprise/data-drains).
+
+---
+
 <FAQ items={[
   { question: "Who can manage Enterprise features?", answer: "Workspace admins on an Enterprise-entitled workspace. Access Control, SSO, whitelabeling, audit logs, and data retention are all configured per workspace under Settings → Enterprise." },
   { question: "Which SSO providers are supported?", answer: "Sim supports SAML 2.0 and OIDC, which works with virtually any enterprise identity provider including Okta, Azure AD (Entra ID), Google Workspace, ADFS, and OneLogin." },
@@ -79,6 +85,7 @@ Self-hosted deployments enable enterprise features via environment variables ins
 | `WHITELABELING_ENABLED`, `NEXT_PUBLIC_WHITELABELING_ENABLED` | Custom branding |
 | `AUDIT_LOGS_ENABLED`, `NEXT_PUBLIC_AUDIT_LOGS_ENABLED` | Audit logging |
 | `NEXT_PUBLIC_DATA_RETENTION_ENABLED` | Data retention configuration |
+| `DATA_DRAINS_ENABLED`, `NEXT_PUBLIC_DATA_DRAINS_ENABLED` | Data drains |
 | `CREDENTIAL_SETS_ENABLED`, `NEXT_PUBLIC_CREDENTIAL_SETS_ENABLED` | Polling groups for email triggers |
 | `INBOX_ENABLED`, `NEXT_PUBLIC_INBOX_ENABLED` | Sim Mailer inbox |
 | `DISABLE_INVITATIONS`, `NEXT_PUBLIC_DISABLE_INVITATIONS` | Disable invitations; manage membership via Admin API |
 
@@ -1,5 +1,13 @@
 {
   "title": "Enterprise",
-  "pages": ["index", "sso", "access-control", "whitelabeling", "audit-logs", "data-retention"],
+  "pages": [
+    "index",
+    "sso",
+    "access-control",
+    "whitelabeling",
+    "audit-logs",
+    "data-retention",
+    "data-drains"
+  ],
   "defaultOpen": false
 }
@@ -0,0 +1,22 @@
+import { createLogger } from '@sim/logger'
+import { toError } from '@sim/utils/errors'
+import { type NextRequest, NextResponse } from 'next/server'
+import { verifyCronAuth } from '@/lib/auth/internal'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { dispatchDueDrains } from '@/lib/data-drains/dispatcher'
+
+const logger = createLogger('CronRunDataDrains')
+
+export const GET = withRouteHandler(async (request: NextRequest) => {
+  const authError = verifyCronAuth(request, 'Data drain dispatcher')
+  if (authError) return authError
+
+  try {
+    const result = await dispatchDueDrains()
+    logger.info('Data drain dispatcher run complete', result)
+    return NextResponse.json({ success: true, ...result })
+  } catch (error) {
+    logger.error('Data drain dispatcher run failed', { error: toError(error).message })
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
+  }
+})
@@ -0,0 +1,173 @@
+import { AuditAction, AuditResourceType, recordAudit } from '@sim/audit'
+import { db } from '@sim/db'
+import { dataDrains } from '@sim/db/schema'
+import { createLogger } from '@sim/logger'
+import { and, eq, ne } from 'drizzle-orm'
+import { type NextRequest, NextResponse } from 'next/server'
+import {
+  deleteDataDrainContract,
+  getDataDrainContract,
+  updateDataDrainContract,
+} from '@/lib/api/contracts/data-drains'
+import { parseRequest, validationErrorResponse } from '@/lib/api/server'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { authorizeDrainAccess, loadDrain } from '@/lib/data-drains/access'
+import { getDestination } from '@/lib/data-drains/destinations/registry'
+import { encryptCredentials } from '@/lib/data-drains/encryption'
+import { serializeDrain } from '@/lib/data-drains/serializers'
+
+const logger = createLogger('DataDrainAPI')
+
+type RouteContext = { params: Promise<{ id: string; drainId: string }> }
+
+export const GET = withRouteHandler(async (request: NextRequest, context: RouteContext) => {
+  const { id: organizationId, drainId } = await context.params
+  const access = await authorizeDrainAccess(organizationId, { requireMutating: false })
+  if (!access.ok) return access.response
+
+  const parsed = await parseRequest(getDataDrainContract, request, context)
+  if (!parsed.success) return parsed.response
+
+  const drain = await loadDrain(organizationId, drainId)
+  if (!drain) {
+    return NextResponse.json({ error: 'Data drain not found' }, { status: 404 })
+  }
+  return NextResponse.json({ drain: serializeDrain(drain) })
+})
+
+export const PUT = withRouteHandler(async (request: NextRequest, context: RouteContext) => {
+  const { id: organizationId, drainId } = await context.params
+  const access = await authorizeDrainAccess(organizationId, { requireMutating: true })
+  if (!access.ok) return access.response
+
+  const parsed = await parseRequest(updateDataDrainContract, request, context)
+  if (!parsed.success) return parsed.response
+
+  const body = parsed.data.body
+
+  const drain = await loadDrain(organizationId, drainId)
+  if (!drain) {
+    return NextResponse.json({ error: 'Data drain not found' }, { status: 404 })
+  }
+
+  if (body.name !== undefined && body.name !== drain.name) {
+    const [conflict] = await db
+      .select({ id: dataDrains.id })
+      .from(dataDrains)
+      .where(
+        and(
+          eq(dataDrains.organizationId, organizationId),
+          eq(dataDrains.name, body.name),
+          ne(dataDrains.id, drainId)
+        )
+      )
+      .limit(1)
+    if (conflict) {
+      return NextResponse.json(
+        { error: 'A data drain with this name already exists in this organization' },
+        { status: 409 }
+      )
+    }
+  }
+
+  if (body.source !== undefined && body.source !== drain.source) {
+    return NextResponse.json({ error: 'source cannot be changed after creation' }, { status: 400 })
+  }
+
+  const updates: Partial<typeof dataDrains.$inferInsert> = { updatedAt: new Date() }
+  if (body.name !== undefined) updates.name = body.name
+  if (body.scheduleCadence !== undefined) updates.scheduleCadence = body.scheduleCadence
+  if (body.enabled !== undefined) updates.enabled = body.enabled
+
+  if (body.destinationType !== undefined && body.destinationType !== drain.destinationType) {
+    return NextResponse.json(
+      { error: 'destinationType cannot be changed after creation' },
+      { status: 400 }
+    )
+  }
+  if (body.destinationConfig !== undefined || body.destinationCredentials !== undefined) {
+    const destination = getDestination(drain.destinationType)
+    if (body.destinationConfig !== undefined) {
+      const configResult = destination.configSchema.safeParse(body.destinationConfig)
+      if (!configResult.success) return validationErrorResponse(configResult.error)
+      updates.destinationConfig = configResult.data as Record<string, unknown>
+    }
+    if (body.destinationCredentials !== undefined) {
+      const credentialsResult = destination.credentialsSchema.safeParse(body.destinationCredentials)
+      if (!credentialsResult.success) return validationErrorResponse(credentialsResult.error)
+      updates.destinationCredentials = await encryptCredentials(credentialsResult.data)
+    }
+  }
+
+  const [updated] = await db
+    .update(dataDrains)
+    .set(updates)
+    .where(eq(dataDrains.id, drainId))
+    .returning()
+
+  logger.info('Data drain updated', { drainId, organizationId })
+
+  recordAudit({
+    workspaceId: null,
+    actorId: access.session.user.id,
+    action: AuditAction.DATA_DRAIN_UPDATED,
+    resourceType: AuditResourceType.DATA_DRAIN,
+    resourceId: drainId,
+    actorName: access.session.user.name ?? undefined,
+    actorEmail: access.session.user.email ?? undefined,
+    resourceName: updated.name,
+    description: `Updated data drain '${updated.name}'`,
+    metadata: {
+      organizationId,
+      changes: {
+        name: body.name,
+        source: body.source,
+        scheduleCadence: body.scheduleCadence,
+        enabled: body.enabled,
+        destinationConfigChanged: body.destinationConfig !== undefined,
+        destinationCredentialsChanged: body.destinationCredentials !== undefined,
+      },
+    },
+    request,
+  })
+
+  return NextResponse.json({ drain: serializeDrain(updated) })
+})
+
+export const DELETE = withRouteHandler(async (request: NextRequest, context: RouteContext) => {
+  const { id: organizationId, drainId } = await context.params
+  const access = await authorizeDrainAccess(organizationId, { requireMutating: true })
+  if (!access.ok) return access.response
+
+  const parsed = await parseRequest(deleteDataDrainContract, request, context)
+  if (!parsed.success) return parsed.response
+
+  const drain = await loadDrain(organizationId, drainId)
+  if (!drain) {
+    return NextResponse.json({ error: 'Data drain not found' }, { status: 404 })
+  }
+
+  await db.delete(dataDrains).where(eq(dataDrains.id, drainId))
+
+  logger.info('Data drain deleted', { drainId, organizationId })
+
+  recordAudit({
+    workspaceId: null,
+    actorId: access.session.user.id,
+    action: AuditAction.DATA_DRAIN_DELETED,
+    resourceType: AuditResourceType.DATA_DRAIN,
+    resourceId: drainId,
+    actorName: access.session.user.name ?? undefined,
+    actorEmail: access.session.user.email ?? undefined,
+    resourceName: drain.name,
+    description: `Deleted data drain '${drain.name}'`,
+    metadata: {
+      organizationId,
+      source: drain.source,
+      destinationType: drain.destinationType,
+    },
+    request,
+  })
+
+  return NextResponse.json({ success: true as const })
+})