Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions infrastructure/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,31 @@ For local development you can let Tilt generate Langfuse init secrets automatica
- Tilt runs Kustomize on `infrastructure/kustomize/langfuse` and applies the resulting `langfuse-init-secrets` (hash disabled) before Helm resources.
- This is dev-only. For production, create/manage secrets with your secret manager and set `secretKeyRef.name` in `values.yaml` to your managed secret.

**Langfuse Trace Retention via ClickHouse TTL (without Enterprise)**
If you want automatic deletion (for example after 1 year) without Langfuse Enterprise data-retention management, enable the chart-level retention CronJob:

```yaml
langfuseRetention:
enabled: true
retentionDays: 365
schedule: "15 */6 * * *"
hardDelete:
enabled: true
schedule: "30 3 * * *"
mutationSync: 0
clickhouse:
database: "default"
onCluster: true
clusterName: "default"
```
Comment thread
a-klos marked this conversation as resolved.

Notes:
- ClickHouse connection/auth for retention jobs is taken from `langfuse.clickhouse.*` (same source as Langfuse itself).
- The CronJob applies idempotent `ALTER TABLE ... MODIFY TTL` statements on Langfuse tables (`traces`, `observations`, `scores`).
- If `hardDelete.enabled=true`, an additional CronJob executes deterministic `ALTER TABLE ... DELETE WHERE ...` mutations.
- Deletion is then handled by ClickHouse background merges (not instant at the exact cutoff timestamp).
- Avoid applying TTL blindly to every table. Some tables are views/metadata and should not be retention-trimmed.

### 1.2 Qdrant

The deployment of the Qdrant can be disabled by setting the following value in the helm-chart:
Expand Down
31 changes: 31 additions & 0 deletions infrastructure/rag/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,34 @@
{{- toYaml $data -}}
{{- end }}
{{- end -}}

{{/* Shared ClickHouse env for Langfuse retention CronJobs. */}}
{{- define "rag.langfuseRetentionClickhouseEnv" -}}
{{- $chHost := default (printf "%s-clickhouse" .Release.Name | trunc 63 | trimSuffix "-") .Values.langfuse.clickhouse.host -}}
{{- $chUser := default "default" .Values.langfuse.clickhouse.auth.username -}}
{{- $chPasswordSecretName := default (printf "%s-clickhouse" .Release.Name | trunc 63 | trimSuffix "-") .Values.langfuse.clickhouse.auth.existingSecret -}}
{{- $chPasswordKey := .Values.langfuse.clickhouse.auth.existingSecretKey -}}
{{- $chNativePort := default 9000 .Values.langfuse.clickhouse.nativePort -}}
- name: CLICKHOUSE_HOST
value: {{ $chHost | quote }}
- name: CLICKHOUSE_PORT
value: {{ $chNativePort | quote }}
- name: CLICKHOUSE_USER
value: {{ $chUser | quote }}
- name: CLICKHOUSE_DATABASE
value: {{ .Values.langfuseRetention.clickhouse.database | quote }}
- name: CLICKHOUSE_ON_CLUSTER
value: {{ ternary "true" "false" .Values.langfuseRetention.clickhouse.onCluster | quote }}
- name: CLICKHOUSE_CLUSTER_NAME
value: {{ .Values.langfuseRetention.clickhouse.clusterName | quote }}
- name: RETENTION_DAYS
value: {{ .Values.langfuseRetention.retentionDays | quote }}
- name: CLICKHOUSE_PASSWORD_LITERAL
value: {{ .Values.langfuse.clickhouse.auth.password | quote }}
Comment thread
a-klos marked this conversation as resolved.
- name: CLICKHOUSE_PASSWORD
valueFrom:
secretKeyRef:
name: {{ $chPasswordSecretName | quote }}
key: {{ default "CLICKHOUSE_PASSWORD" $chPasswordKey | quote }}
optional: true
{{- end -}}
70 changes: 70 additions & 0 deletions infrastructure/rag/templates/langfuse-retention-cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{{- if and .Values.features.langfuse.enabled .Values.langfuseRetention.enabled }}
{{- $retentionImage := printf "%s:%s" .Values.langfuseRetention.image.repository .Values.langfuseRetention.image.tag -}}
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ printf "%s-langfuse-retention" .Release.Name | trunc 63 | trimSuffix "-" }}
labels:
app.kubernetes.io/name: rag
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
schedule: {{ .Values.langfuseRetention.schedule | quote }}
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
metadata:
labels:
app.kubernetes.io/name: rag
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
restartPolicy: OnFailure
Comment thread
a-klos marked this conversation as resolved.
containers:
- name: apply-clickhouse-ttl
image: {{ $retentionImage | quote }}
Comment thread
a-klos marked this conversation as resolved.
imagePullPolicy: {{ .Values.langfuseRetention.image.pullPolicy | quote }}
command:
- /bin/bash
- -ec
args:
- |
set -euo pipefail

PASSWORD="${CLICKHOUSE_PASSWORD:-}"
if [ -z "${PASSWORD}" ]; then
PASSWORD="${CLICKHOUSE_PASSWORD_LITERAL:-}"
fi

if [ -z "${PASSWORD}" ]; then
echo "No ClickHouse password found. Check langfuse.clickhouse.auth settings and secret."
exit 1
fi

ON_CLUSTER_CLAUSE=""
if [ "${CLICKHOUSE_ON_CLUSTER}" = "true" ]; then
ON_CLUSTER_CLAUSE=" ON CLUSTER ${CLICKHOUSE_CLUSTER_NAME}"
fi

TABLE_ROWS="$(cat <<'EOF_TABLES'
{{- range .Values.langfuseRetention.clickhouse.tables }}
{{ .name }} {{ .timestampColumn }}
{{- end }}
EOF_TABLES
)"

while IFS=$'\t' read -r table ts_col; do
[ -z "${table}" ] && continue

echo "Applying TTL=${RETENTION_DAYS}d to ${CLICKHOUSE_DATABASE}.${table} (${ts_col})"
clickhouse-client \
--host "${CLICKHOUSE_HOST}" \
--port "${CLICKHOUSE_PORT}" \
--user "${CLICKHOUSE_USER}" \
--password "${PASSWORD}" \
Comment thread
a-klos marked this conversation as resolved.
Outdated
--query "ALTER TABLE ${CLICKHOUSE_DATABASE}.${table}${ON_CLUSTER_CLAUSE} MODIFY TTL toDateTime(${ts_col}) + toIntervalDay(${RETENTION_DAYS})"
Comment thread
a-klos marked this conversation as resolved.
Outdated
Comment thread
a-klos marked this conversation as resolved.
Outdated
Comment thread
a-klos marked this conversation as resolved.
Outdated
done <<< "${TABLE_ROWS}"
Comment thread
a-klos marked this conversation as resolved.
env:
{{ include "rag.langfuseRetentionClickhouseEnv" . | nindent 16 }}
{{- end }}
Comment thread
a-klos marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{{- if and .Values.features.langfuse.enabled .Values.langfuseRetention.hardDelete.enabled }}
Comment thread
a-klos marked this conversation as resolved.
{{- $retentionImage := printf "%s:%s" .Values.langfuseRetention.image.repository .Values.langfuseRetention.image.tag -}}
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ printf "%s-langfuse-retention-delete" .Release.Name | trunc 63 | trimSuffix "-" }}
labels:
app.kubernetes.io/name: rag
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
schedule: {{ .Values.langfuseRetention.hardDelete.schedule | quote }}
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
metadata:
labels:
app.kubernetes.io/name: rag
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
restartPolicy: OnFailure
Comment thread
a-klos marked this conversation as resolved.
containers:
- name: delete-expired-rows
image: {{ $retentionImage | quote }}
Comment thread
a-klos marked this conversation as resolved.
imagePullPolicy: {{ .Values.langfuseRetention.image.pullPolicy | quote }}
command:
- /bin/bash
- -ec
args:
- |
set -euo pipefail

PASSWORD="${CLICKHOUSE_PASSWORD:-}"
if [ -z "${PASSWORD}" ]; then
PASSWORD="${CLICKHOUSE_PASSWORD_LITERAL:-}"
fi

if [ -z "${PASSWORD}" ]; then
echo "No ClickHouse password found. Check langfuse.clickhouse.auth settings and secret."
exit 1
fi

ON_CLUSTER_CLAUSE=""
if [ "${CLICKHOUSE_ON_CLUSTER}" = "true" ]; then
ON_CLUSTER_CLAUSE=" ON CLUSTER ${CLICKHOUSE_CLUSTER_NAME}"
fi

TABLE_ROWS="$(cat <<'EOF_TABLES'
{{- range .Values.langfuseRetention.clickhouse.tables }}
{{ .name }} {{ .timestampColumn }}
{{- end }}
EOF_TABLES
)"

CUTOFF_UNIX="$(( $(date -u +%s) - RETENTION_DAYS * 86400 ))"

while IFS=$'\t' read -r table ts_col; do
[ -z "${table}" ] && continue

echo "Deleting rows older than ${RETENTION_DAYS}d from ${CLICKHOUSE_DATABASE}.${table} (${ts_col})"
clickhouse-client \
--host "${CLICKHOUSE_HOST}" \
--port "${CLICKHOUSE_PORT}" \
--user "${CLICKHOUSE_USER}" \
--password "${PASSWORD}" \
Comment thread
a-klos marked this conversation as resolved.
Outdated
--query "ALTER TABLE ${CLICKHOUSE_DATABASE}.${table}${ON_CLUSTER_CLAUSE} DELETE WHERE toDateTime(${ts_col}) < toDateTime(${CUTOFF_UNIX}) SETTINGS mutations_sync = ${MUTATION_SYNC}"
Comment thread
a-klos marked this conversation as resolved.
Outdated
Comment thread
a-klos marked this conversation as resolved.
Outdated
Comment thread
a-klos marked this conversation as resolved.
Outdated
done <<< "${TABLE_ROWS}"
env:
- name: MUTATION_SYNC
value: {{ .Values.langfuseRetention.hardDelete.mutationSync | quote }}
{{ include "rag.langfuseRetentionClickhouseEnv" . | nindent 16 }}
Comment thread
a-klos marked this conversation as resolved.
{{- end }}
31 changes: 31 additions & 0 deletions infrastructure/rag/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,37 @@ langfuse:
name: ""
key: ""

# Optional: enforce a ClickHouse TTL for Langfuse traces without Enterprise data retention management.
# This runs as a CronJob and applies idempotent ALTER TABLE ... MODIFY TTL commands.
langfuseRetention:
enabled: false
retentionDays: 365
schedule: "15 */6 * * *"
# Optional deterministic deletion in addition to TTL.
# Uses ALTER TABLE ... DELETE WHERE ... and can run nightly.
hardDelete:
enabled: false
schedule: "30 3 * * *"
# ClickHouse mutations_sync setting:
# 0 = async (default), 1 = wait for local completion, 2 = wait for all replicas.
mutationSync: 0
image:
repository: "bitnamilegacy/clickhouse"
Comment thread
a-klos marked this conversation as resolved.
tag: "25.2.1-debian-12-r0"
pullPolicy: IfNotPresent
clickhouse:
# Connection/auth are taken from langfuse.clickhouse.*.
database: "default"
onCluster: true
Comment thread
a-klos marked this conversation as resolved.
Outdated
clusterName: "default"
tables:
- name: "traces"
timestampColumn: "timestamp"
- name: "observations"
timestampColumn: "event_ts"
- name: "scores"
timestampColumn: "timestamp"

minio:
image:
repository: bitnamilegacy/minio
Expand Down