Skip to content

Commit d68546a

Browse files
fix(guardrails): chunk + time-bound internal PII mask requests
- chunk maskPIIBatchViaHttp by count (2000) and bytes (256KB) so large executions split across requests and never hit the contract's 100k cap - add AbortSignal.timeout(45s) per request so a slow/unreachable app container aborts and the caller scrubs, instead of hanging the trigger.dev job - catch maskPIIBatch failures in the route: log and return a structured 500 (broken venv fails loudly server-side; caller still scrubs, no leak) - add mask-client tests (order across chunks, count split, non-2xx, empty)
1 parent 34ab1d2 commit d68546a

3 files changed

Lines changed: 142 additions & 7 deletions

File tree

apps/sim/app/api/guardrails/mask-batch/route.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { createLogger } from '@sim/logger'
2+
import { getErrorMessage } from '@sim/utils/errors'
23
import { type NextRequest, NextResponse } from 'next/server'
34
import { guardrailsMaskBatchContract } from '@/lib/api/contracts'
45
import { parseRequest } from '@/lib/api/server'
@@ -25,7 +26,20 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
2526

2627
const { texts, entityTypes, language } = parsed.data.body
2728

28-
const masked = await maskPIIBatch(texts, entityTypes, language)
29-
logger.info('Masked PII batch', { count: texts.length })
30-
return NextResponse.json({ masked })
29+
try {
30+
const masked = await maskPIIBatch(texts, entityTypes, language)
31+
logger.info('Masked PII batch', { count: texts.length })
32+
return NextResponse.json({ masked })
33+
} catch (error) {
34+
// A broken/absent venv makes maskPIIBatch throw; fail loudly here (the
35+
// caller scrubs to REDACTION_FAILED, so PII is never leaked).
36+
logger.error('PII batch masking failed', {
37+
error: getErrorMessage(error),
38+
count: texts.length,
39+
})
40+
return NextResponse.json(
41+
{ error: getErrorMessage(error, 'PII masking failed') },
42+
{ status: 500 }
43+
)
44+
}
3145
})
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/**
2+
* @vitest-environment node
3+
*/
4+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
5+
6+
const { mockToken, mockBaseUrl } = vi.hoisted(() => ({
7+
mockToken: vi.fn(),
8+
mockBaseUrl: vi.fn(),
9+
}))
10+
11+
vi.mock('@/lib/auth/internal', () => ({ generateInternalToken: mockToken }))
12+
vi.mock('@/lib/core/utils/urls', () => ({ getInternalApiBaseUrl: mockBaseUrl }))
13+
14+
import { maskPIIBatchViaHttp } from '@/lib/guardrails/mask-client'
15+
16+
describe('maskPIIBatchViaHttp', () => {
17+
let fetchMock: ReturnType<typeof vi.fn>
18+
19+
beforeEach(() => {
20+
vi.clearAllMocks()
21+
mockToken.mockResolvedValue('tok')
22+
mockBaseUrl.mockReturnValue('http://app.internal:3000')
23+
fetchMock = vi.fn(async (_url: string, init: { body: string }) => {
24+
const { texts } = JSON.parse(init.body) as { texts: string[] }
25+
return new Response(JSON.stringify({ masked: texts.map((t) => `M(${t})`) }), {
26+
status: 200,
27+
headers: { 'content-type': 'application/json' },
28+
})
29+
})
30+
vi.stubGlobal('fetch', fetchMock)
31+
})
32+
33+
afterEach(() => {
34+
vi.unstubAllGlobals()
35+
})
36+
37+
it('masks a small batch in a single request, with an abort timeout', async () => {
38+
const out = await maskPIIBatchViaHttp(['a', 'b', 'c'], ['EMAIL_ADDRESS'])
39+
40+
expect(out).toEqual(['M(a)', 'M(b)', 'M(c)'])
41+
expect(fetchMock).toHaveBeenCalledTimes(1)
42+
expect(fetchMock.mock.calls[0][1].signal).toBeInstanceOf(AbortSignal)
43+
})
44+
45+
it('splits by count into multiple requests, preserving global order', async () => {
46+
const texts = Array.from({ length: 5000 }, (_, i) => `t${i}`)
47+
48+
const out = await maskPIIBatchViaHttp(texts, [])
49+
50+
expect(out).toHaveLength(5000)
51+
expect(out[0]).toBe('M(t0)')
52+
expect(out[4999]).toBe('M(t4999)')
53+
expect(fetchMock).toHaveBeenCalledTimes(3) // 2000-per-request cap
54+
})
55+
56+
it('throws on a non-2xx response so the caller can scrub', async () => {
57+
fetchMock.mockResolvedValueOnce(new Response('boom', { status: 500 }))
58+
59+
await expect(maskPIIBatchViaHttp(['a'], [])).rejects.toThrow(/mask-batch request failed/)
60+
})
61+
62+
it('returns [] without any request for empty input', async () => {
63+
const out = await maskPIIBatchViaHttp([], [])
64+
65+
expect(out).toEqual([])
66+
expect(fetchMock).not.toHaveBeenCalled()
67+
})
68+
})

apps/sim/lib/guardrails/mask-client.ts

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,77 @@ import type { GuardrailsMaskBatchResult } from '@/lib/api/contracts'
22
import { generateInternalToken } from '@/lib/auth/internal'
33
import { getInternalApiBaseUrl } from '@/lib/core/utils/urls'
44

5+
/**
6+
* Per-request limits. A chunk is flushed when it hits either bound, keeping each
7+
* request small enough for one short Presidio pass under a tight timeout and far
8+
* below the contract's 100k-entry cap — so large executions split across
9+
* requests instead of failing validation.
10+
*/
11+
const REQUEST_MAX_BYTES = 256 * 1024
12+
const REQUEST_MAX_COUNT = 2_000
13+
/** Slightly above the 30s Python subprocess timeout so a hung app container aborts gracefully. */
14+
const REQUEST_TIMEOUT_MS = 45_000
15+
516
/**
617
* Mask PII across many strings via the internal app-container endpoint.
718
*
819
* Presidio (a Python venv) only exists in the app container, but the
920
* log-redaction persist path also runs inside the trigger.dev runtime — so
1021
* redaction always routes through HTTP, the same way the guardrails tool does.
11-
* Order is preserved: the returned array matches `texts` length.
22+
* Strings are grouped into byte/count-budgeted chunks; order is preserved, so
23+
* the returned array matches `texts` length.
1224
*
13-
* Rejects on any non-2xx or shape mismatch so the caller can apply its own
14-
* fail-safe (scrubbing rather than leaking).
25+
* Rejects on any non-2xx, timeout, or shape mismatch so the caller can apply
26+
* its own fail-safe (scrubbing rather than leaking).
1527
*/
1628
export async function maskPIIBatchViaHttp(
1729
texts: string[],
1830
entityTypes: string[],
1931
language?: string
2032
): Promise<string[]> {
33+
if (texts.length === 0) return []
34+
2135
const token = await generateInternalToken()
2236
const url = `${getInternalApiBaseUrl()}/api/guardrails/mask-batch`
2337

38+
const masked: string[] = []
39+
let batch: string[] = []
40+
let batchBytes = 0
41+
42+
const flush = async () => {
43+
if (batch.length === 0) return
44+
const out = await postChunk(url, token, batch, entityTypes, language)
45+
if (out.length !== batch.length) {
46+
throw new Error('PII mask-batch returned an unexpected result')
47+
}
48+
for (const item of out) masked.push(item)
49+
batch = []
50+
batchBytes = 0
51+
}
52+
53+
for (const text of texts) {
54+
const bytes = Buffer.byteLength(text, 'utf8')
55+
if (
56+
batch.length > 0 &&
57+
(batch.length >= REQUEST_MAX_COUNT || batchBytes + bytes > REQUEST_MAX_BYTES)
58+
) {
59+
await flush()
60+
}
61+
batch.push(text)
62+
batchBytes += bytes
63+
}
64+
await flush()
65+
66+
return masked
67+
}
68+
69+
async function postChunk(
70+
url: string,
71+
token: string,
72+
texts: string[],
73+
entityTypes: string[],
74+
language: string | undefined
75+
): Promise<string[]> {
2476
// boundary-raw-fetch: internal server-to-server call to the app container (internal JWT auth, configurable base URL)
2577
const response = await fetch(url, {
2678
method: 'POST',
@@ -29,6 +81,7 @@ export async function maskPIIBatchViaHttp(
2981
authorization: `Bearer ${token}`,
3082
},
3183
body: JSON.stringify({ texts, entityTypes, language }),
84+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
3285
})
3386

3487
if (!response.ok) {
@@ -37,7 +90,7 @@ export async function maskPIIBatchViaHttp(
3790
}
3891

3992
const data = (await response.json()) as GuardrailsMaskBatchResult
40-
if (!Array.isArray(data.masked) || data.masked.length !== texts.length) {
93+
if (!Array.isArray(data.masked)) {
4194
throw new Error('PII mask-batch returned an unexpected result')
4295
}
4396
return data.masked

0 commit comments

Comments
 (0)