Skip to content

Commit d056056

Browse files
Sync public snapshot from freebuff-private
Source: CodebuffAI/freebuff-private@7aea393e7d4fa714477defccaed91e2725669b0b
1 parent a889a5f commit d056056

8 files changed

Lines changed: 294 additions & 0 deletions

File tree

cli/src/types/env.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ export type CliEnv = BaseEnv & {
7575
CODEBUFF_SCROLL_MULTIPLIER?: string
7676
CODEBUFF_PERF_TEST?: string
7777
CODEBUFF_TRACE?: string
78+
// Toggle for mirroring CLI logs to the server's /api/logs sink (Axiom).
79+
CODEBUFF_SHIP_LOGS?: string
7880
FREEBUFF_MODE?: string
7981
}
8082

cli/src/utils/env.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ export const getCliEnv = (): CliEnv => ({
7575
CODEBUFF_SCROLL_MULTIPLIER: process.env.CODEBUFF_SCROLL_MULTIPLIER,
7676
CODEBUFF_PERF_TEST: process.env.CODEBUFF_PERF_TEST,
7777
CODEBUFF_TRACE: process.env.CODEBUFF_TRACE,
78+
CODEBUFF_SHIP_LOGS: process.env.CODEBUFF_SHIP_LOGS,
7879
FREEBUFF_MODE: process.env.FREEBUFF_MODE,
7980
})
8081

cli/src/utils/log-shipper.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import { IS_DEV, IS_TEST, IS_CI } from '@codebuff/common/env'
2+
3+
import { getApiClient } from './codebuff-api'
4+
import { getCliEnv } from './env'
5+
6+
import type { LogRecordInput } from '@codebuff/common/schemas/logs'
7+
8+
/**
9+
* Client-side shipper that mirrors CLI logs/events into the server's Axiom
10+
* logs sink via POST /api/logs. Runs alongside PostHog (it does not replace
11+
* it). Fully best-effort: batched, fire-and-forget, never throws, never logs
12+
* through the app logger (which would recurse).
13+
*
14+
* Tuning via env:
15+
* - CODEBUFF_SHIP_LOGS 'true' | 'false' (default: on outside dev/test)
16+
*/
17+
18+
const MAX_BATCH = 50
19+
const FLUSH_INTERVAL_MS = 10_000
20+
const MAX_BUFFER = 1_000
21+
22+
let buffer: LogRecordInput[] = []
23+
let timer: ReturnType<typeof setInterval> | null = null
24+
let flushing = false
25+
let shutdownRegistered = false
26+
27+
function enabled(): boolean {
28+
const flag = getCliEnv().CODEBUFF_SHIP_LOGS
29+
if (flag === 'true') return true
30+
if (flag === 'false') return false
31+
return !IS_DEV && !IS_TEST && !IS_CI
32+
}
33+
34+
function ensureTimer(): void {
35+
if (timer) return
36+
timer = setInterval(() => {
37+
void flushClientLogs()
38+
}, FLUSH_INTERVAL_MS)
39+
;(timer as { unref?: () => void }).unref?.()
40+
}
41+
42+
function registerShutdown(): void {
43+
if (shutdownRegistered) return
44+
shutdownRegistered = true
45+
const onExit = () => {
46+
void flushClientLogs()
47+
}
48+
process.once('beforeExit', onExit)
49+
process.once('SIGTERM', onExit)
50+
process.once('SIGINT', onExit)
51+
}
52+
53+
/** Buffer one record for shipping. Cheap, synchronous, never throws. */
54+
export function enqueueClientLog(record: LogRecordInput): void {
55+
if (!enabled()) return
56+
if (buffer.length >= MAX_BUFFER) {
57+
buffer.shift()
58+
}
59+
buffer.push(record)
60+
ensureTimer()
61+
registerShutdown()
62+
if (buffer.length >= MAX_BATCH) {
63+
void flushClientLogs()
64+
}
65+
}
66+
67+
/** Flush a batch to /api/logs. Requeues if not yet authenticated. */
68+
export async function flushClientLogs(): Promise<void> {
69+
if (flushing || buffer.length === 0) return
70+
flushing = true
71+
const batch = buffer.splice(0, MAX_BATCH)
72+
try {
73+
const client = getApiClient()
74+
if (!client.authToken) {
75+
// Not logged in yet — put the batch back (bounded by MAX_BUFFER) so we
76+
// can ship it once auth is available.
77+
buffer.unshift(...batch)
78+
return
79+
}
80+
await client.post(
81+
'/api/logs',
82+
{ records: batch },
83+
{ includeAuth: true, retry: false, timeoutMs: 5_000 },
84+
)
85+
} catch {
86+
// Best-effort: drop on error rather than risk unbounded growth.
87+
} finally {
88+
flushing = false
89+
}
90+
}

cli/src/utils/logger.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@ import {
1919
setAnalyticsErrorLogger,
2020
trackEvent,
2121
} from './analytics'
22+
import { enqueueClientLog } from './log-shipper'
2223
import { getCurrentChatDir, getProjectRoot } from '../project-files'
2324

25+
import type { LogRecordInput } from '@codebuff/common/schemas/logs'
26+
2427
/** Name of the per-chat debug log file written in production builds */
2528
export const CHAT_LOG_FILENAME = 'log.jsonl'
2629

@@ -198,6 +201,42 @@ function sendAnalyticsAndLog(
198201
})
199202
}
200203

204+
// Mirror the log/event into the server-side Axiom logs sink via /api/logs
205+
// (in addition to PostHog). Best-effort and batched; skip noisy debug logs
206+
// and anything before we know who the user is.
207+
if (!IS_DEV && !IS_TEST && !IS_CI && loggerContext.userId && level !== 'debug') {
208+
const eventId =
209+
includeData && typeof normalizedData === 'object'
210+
? getAnalyticsEventId(normalizedData)
211+
: null
212+
// Mirror the PostHog path's redaction: only ship raw payloads for errors or
213+
// when full telemetry is enabled; otherwise ship a summary. Keeps PII/data
214+
// volume symmetric across the two sinks.
215+
const includeRawData =
216+
isFullTelemetryEnabled({
217+
distinctId: loggerContext.userId,
218+
properties: loggerContext,
219+
}) ||
220+
level === 'error' ||
221+
level === 'fatal'
222+
const shipData = includeData
223+
? includeRawData
224+
? normalizedData
225+
: summarizeAnalyticsValue(normalizedData)
226+
: undefined
227+
const record: LogRecordInput = {
228+
timestamp: new Date().toISOString(),
229+
level,
230+
event: eventId ? String(eventId) : undefined,
231+
message: stringFormat(normalizedMsg ?? '', ...args),
232+
client_session_id: loggerContext.clientSessionId,
233+
client_request_id: loggerContext.clientRequestId,
234+
fingerprint_id: loggerContext.fingerprintId,
235+
data: shipData,
236+
}
237+
enqueueClientLog(record)
238+
}
239+
201240
// In dev mode, use appendFileSync for real-time logging (Bun has issues with pino sync)
202241
// In prod mode, use pino for better performance
203242
if (IS_DEV && logPath) {

common/src/constants/analytics-events.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ export enum AnalyticsEvent {
134134
FEEDBACK_AUTH_ERROR = 'api.feedback_auth_error',
135135
FEEDBACK_VALIDATION_ERROR = 'api.feedback_validation_error',
136136

137+
// Web - Logs ingest API (client logs/events → BigQuery)
138+
LOGS_INGEST_AUTH_ERROR = 'api.logs_ingest_auth_error',
139+
LOGS_INGEST_VALIDATION_ERROR = 'api.logs_ingest_validation_error',
140+
137141
// Web - Ads API
138142
ADS_API_AUTH_ERROR = 'api.ads_auth_error',
139143
ADS_CLICKED = 'ads.clicked',

common/src/schemas/logs.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { z } from 'zod/v4'
2+
3+
/**
4+
* Wire schema for the client → server log/event ingest endpoint (`/api/logs`).
5+
*
6+
* Clients (CLI, browser) POST a batch of records. The server stamps `source`,
7+
* `env`, `user_id` (from auth) and a received-at `timestamp` if missing, then
8+
* enqueues into the Axiom logs sink. See docs/logging.md.
9+
*
10+
* Caps exist to bound per-row storage and protect the ingest path from abuse.
11+
*/
12+
13+
export const MAX_LOG_RECORDS_PER_BATCH = 500
14+
export const MAX_LOG_MESSAGE_LENGTH = 4_000
15+
/** Max serialized size of a single record's `data` payload (≈64 KB). */
16+
export const MAX_LOG_DATA_BYTES = 64_000
17+
/**
18+
* Hard ceiling on the raw ingest request body (~1 MB). Enforced via
19+
* Content-Length BEFORE parsing so an unauthenticated client cannot force the
20+
* server to buffer/parse a huge body (Next.js app-router handlers have no
21+
* default body limit). See `isLogBodyTooLarge`.
22+
*/
23+
export const MAX_LOG_BODY_BYTES = 1_000_000
24+
25+
/**
26+
* Returns true if a request declares (or omits, when `required`) a body larger
27+
* than MAX_LOG_BODY_BYTES. Pass the `Content-Length` header value.
28+
*/
29+
export function isLogBodyTooLarge(contentLength: string | null): boolean {
30+
if (contentLength == null) return false // chunked / unknown; schema caps still apply
31+
const len = Number(contentLength)
32+
return Number.isFinite(len) && len > MAX_LOG_BODY_BYTES
33+
}
34+
35+
export const logLevelSchema = z.enum([
36+
'debug',
37+
'info',
38+
'warn',
39+
'error',
40+
'fatal',
41+
])
42+
43+
export const logRecordSchema = z.object({
44+
/** Client-supplied event time (ISO 8601). Server falls back to now. */
45+
timestamp: z.string().datetime().optional(),
46+
level: logLevelSchema.default('info'),
47+
/** AnalyticsEvent name when this is an analytics event. */
48+
event: z.string().max(200).nullish(),
49+
message: z.string().max(MAX_LOG_MESSAGE_LENGTH).nullish(),
50+
client_session_id: z.string().max(200).nullish(),
51+
client_request_id: z.string().max(200).nullish(),
52+
fingerprint_id: z.string().max(200).nullish(),
53+
/**
54+
* Structured payload. Kept as unknown JSON; the server truncates if the
55+
* serialized form exceeds MAX_LOG_DATA_BYTES.
56+
*/
57+
data: z.unknown().optional(),
58+
})
59+
60+
export type LogRecordInput = z.infer<typeof logRecordSchema>
61+
62+
export const logIngestSchema = z.object({
63+
records: z.array(logRecordSchema).min(1).max(MAX_LOG_RECORDS_PER_BATCH),
64+
})
65+
66+
export type LogIngestBody = z.infer<typeof logIngestSchema>

common/src/types/contracts/logs.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/** Severity levels for the unified logs stream. */
2+
export type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'fatal'
3+
4+
/** Origin of a log row. */
5+
export type LogSource = 'server' | 'cli' | 'browser'
6+
7+
/**
8+
* One normalized log/event record. This is the provider-agnostic shape the
9+
* sink ingests (currently into Axiom — see `@codebuff/logging`). An "event" is
10+
* just a log row with `event` populated, so all logs and analytics events live
11+
* in one stream. See docs/logging.md.
12+
*/
13+
export type LogRow = {
14+
/** UUID for this row. */
15+
id: string
16+
/** Event time (becomes Axiom's `_time`). */
17+
timestamp: Date
18+
level: LogLevel
19+
source: LogSource
20+
/** Emitting service, e.g. 'web', 'agent-runtime', 'freebuff-web', 'cli'. */
21+
service?: string | null
22+
/** Deploy environment: 'dev' | 'test' | 'prod'. */
23+
env: string
24+
/** AnalyticsEvent name when this row is an analytics event, else null. */
25+
event?: string | null
26+
/** Human-readable message (the formatted pino msg). */
27+
message?: string | null
28+
user_id?: string | null
29+
client_session_id?: string | null
30+
client_request_id?: string | null
31+
fingerprint_id?: string | null
32+
/** Structured payload. Serialized to a single string field on ingest. */
33+
data?: unknown
34+
}

common/src/util/log-ingest.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import { MAX_LOG_DATA_BYTES } from '../schemas/logs'
2+
3+
import type { LogRecordInput } from '../schemas/logs'
4+
import type { LogRow, LogSource } from '../types/contracts/logs'
5+
6+
/**
7+
* Truncate an oversized payload so a single client cannot bloat ingest volume.
8+
* Returns the original value when small.
9+
*/
10+
function truncateData(data: unknown): unknown {
11+
if (data === undefined) return null
12+
let serialized: string
13+
try {
14+
serialized = JSON.stringify(data)
15+
} catch {
16+
return { _unserializable: true }
17+
}
18+
if (serialized.length <= MAX_LOG_DATA_BYTES) return data
19+
return {
20+
_truncated: true,
21+
original_bytes: serialized.length,
22+
preview: serialized.slice(0, MAX_LOG_DATA_BYTES),
23+
}
24+
}
25+
26+
/**
27+
* Map validated client ingest records onto `LogRow`s. The server is the source
28+
* of truth for identity/environment: it stamps `source`, `service`, `env`, the
29+
* authenticated `user_id`, and a received-at fallback timestamp.
30+
*/
31+
export function buildLogRows(params: {
32+
records: LogRecordInput[]
33+
source: LogSource
34+
service: string
35+
env: string
36+
userId?: string | null
37+
now: Date
38+
}): LogRow[] {
39+
const { records, source, service, env, userId = null, now } = params
40+
return records.map((record) => {
41+
const ts = record.timestamp ? new Date(record.timestamp) : now
42+
return {
43+
id: crypto.randomUUID(),
44+
timestamp: isNaN(ts.getTime()) ? now : ts,
45+
level: record.level,
46+
source,
47+
service,
48+
env,
49+
event: record.event ?? null,
50+
message: record.message ?? null,
51+
user_id: userId,
52+
client_session_id: record.client_session_id ?? null,
53+
client_request_id: record.client_request_id ?? null,
54+
fingerprint_id: record.fingerprint_id ?? null,
55+
data: truncateData(record.data),
56+
}
57+
})
58+
}

0 commit comments

Comments
 (0)