From 2731a24ffb337887c4af621c959ab82ea0774fcc Mon Sep 17 00:00:00 2001 From: junaway <7041392+junaway@users.noreply.github.com> Date: Wed, 27 May 2026 00:45:23 +0000 Subject: [PATCH 1/2] v0.100.4 --- api/pyproject.toml | 2 +- api/uv.lock | 6 +++--- clients/python/pyproject.toml | 2 +- clients/python/uv.lock | 2 +- hosting/kubernetes/helm/Chart.yaml | 4 ++-- sdks/python/pyproject.toml | 2 +- sdks/python/uv.lock | 4 ++-- services/pyproject.toml | 2 +- services/uv.lock | 6 +++--- web/ee/package.json | 2 +- web/oss/package.json | 2 +- web/package.json | 2 +- web/packages/agenta-api-client/package.json | 2 +- 13 files changed, 19 insertions(+), 19 deletions(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index d265bc5119..15f3796916 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "api" -version = "0.100.3" +version = "0.100.4" description = "Agenta API" requires-python = ">=3.11,<3.14" authors = [ diff --git a/api/uv.lock b/api/uv.lock index 746ec28129..b72d93dbf3 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -8,7 +8,7 @@ resolution-markers = [ [[package]] name = "agenta" -version = "0.100.3" +version = "0.100.4" source = { editable = "../sdks/python" } dependencies = [ { name = "agenta-client" }, @@ -68,7 +68,7 @@ dev = [ [[package]] name = "agenta-client" -version = "0.100.3" +version = "0.100.4" source = { editable = "../clients/python" } dependencies = [ { name = "httpx" }, @@ -248,7 +248,7 @@ wheels = [ [[package]] name = "api" -version = "0.100.3" +version = "0.100.4" source = { virtual = "." } dependencies = [ { name = "agenta" }, diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index f4f6e486a0..e4caf0e724 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agenta-client" -version = "0.100.3" +version = "0.100.4" description = "Fern-generated Python client for the Agenta API." requires-python = ">=3.11,<3.14" authors = [ diff --git a/clients/python/uv.lock b/clients/python/uv.lock index ffcd1713f6..3b735cc519 100644 --- a/clients/python/uv.lock +++ b/clients/python/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.11, <3.14" [[package]] name = "agenta-client" -version = "0.100.3" +version = "0.100.4" source = { editable = "." } dependencies = [ { name = "httpx" }, diff --git a/hosting/kubernetes/helm/Chart.yaml b/hosting/kubernetes/helm/Chart.yaml index 836afc976b..307d21b414 100644 --- a/hosting/kubernetes/helm/Chart.yaml +++ b/hosting/kubernetes/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: agenta description: A Helm chart for deploying Agenta (OSS or EE) on Kubernetes type: application -version: 0.100.3 -appVersion: "v0.100.3" +version: 0.100.4 +appVersion: "v0.100.4" keywords: - agenta - llm diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml index ce06a839ac..f5ef94e34a 100644 --- a/sdks/python/pyproject.toml +++ b/sdks/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agenta" -version = "0.100.3" +version = "0.100.4" description = "The SDK for agenta is an open-source LLMOps platform." readme = "README.md" requires-python = ">=3.11,<3.14" diff --git a/sdks/python/uv.lock b/sdks/python/uv.lock index e34eb9028d..34b80da85a 100644 --- a/sdks/python/uv.lock +++ b/sdks/python/uv.lock @@ -4,7 +4,7 @@ requires-python = ">=3.11, <3.14" [[package]] name = "agenta" -version = "0.100.3" +version = "0.100.4" source = { editable = "." } dependencies = [ { name = "agenta-client" }, @@ -81,7 +81,7 @@ dev = [ [[package]] name = "agenta-client" -version = "0.100.3" +version = "0.100.4" source = { editable = "../../clients/python" } dependencies = [ { name = "httpx" }, diff --git a/services/pyproject.toml b/services/pyproject.toml index 43af96581c..c41ca6a57b 100644 --- a/services/pyproject.toml +++ b/services/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "services" -version = "0.100.3" +version = "0.100.4" description = "Agenta Services (Chat & Completion)" requires-python = ">=3.11,<3.14" authors = [ diff --git a/services/uv.lock b/services/uv.lock index 6aac80d5fe..fa9bf5d910 100644 --- a/services/uv.lock +++ b/services/uv.lock @@ -8,7 +8,7 @@ resolution-markers = [ [[package]] name = "agenta" -version = "0.100.3" +version = "0.100.4" source = { editable = "../sdks/python" } dependencies = [ { name = "agenta-client" }, @@ -68,7 +68,7 @@ dev = [ [[package]] name = "agenta-client" -version = "0.100.3" +version = "0.100.4" source = { editable = "../clients/python" } dependencies = [ { name = "httpx" }, @@ -2330,7 +2330,7 @@ wheels = [ [[package]] name = "services" -version = "0.100.3" +version = "0.100.4" source = { virtual = "." } dependencies = [ { name = "agenta" }, diff --git a/web/ee/package.json b/web/ee/package.json index a8e8c86be8..cab35a63cf 100644 --- a/web/ee/package.json +++ b/web/ee/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/ee", - "version": "0.100.3", + "version": "0.100.4", "private": true, "engines": { "node": "24.x" diff --git a/web/oss/package.json b/web/oss/package.json index 715d141fcd..df4eadb35a 100644 --- a/web/oss/package.json +++ b/web/oss/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/oss", - "version": "0.100.3", + "version": "0.100.4", "private": true, "engines": { "node": "24.x" diff --git a/web/package.json b/web/package.json index efedb9c675..bde79a36e4 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "agenta-web", - "version": "0.100.3", + "version": "0.100.4", "workspaces": [ "ee", "oss", diff --git a/web/packages/agenta-api-client/package.json b/web/packages/agenta-api-client/package.json index 9af5eefa5c..f2e1184e8b 100644 --- a/web/packages/agenta-api-client/package.json +++ b/web/packages/agenta-api-client/package.json @@ -1,6 +1,6 @@ { "name": "@agentaai/api-client", - "version": "0.100.3", + "version": "0.100.4", "private": true, "type": "module", "main": "./dist/index.js", From 28e3fab2452210a52d601433b7af4e2b1084a822 Mon Sep 17 00:00:00 2001 From: Juan Pablo Vega Date: Wed, 27 May 2026 15:52:59 +0200 Subject: [PATCH 2/2] [research] Support local/public certificates, in oss/e, in docker-compose/kubernetes --- docs/designs/tls-certificates/gap.md | 126 ++++++++++++ docs/designs/tls-certificates/proposal.md | 239 ++++++++++++++++++++++ docs/designs/tls-certificates/research.md | 212 +++++++++++++++++++ docs/designs/tls-certificates/tasks.md | 131 ++++++++++++ 4 files changed, 708 insertions(+) create mode 100644 docs/designs/tls-certificates/gap.md create mode 100644 docs/designs/tls-certificates/proposal.md create mode 100644 docs/designs/tls-certificates/research.md create mode 100644 docs/designs/tls-certificates/tasks.md diff --git a/docs/designs/tls-certificates/gap.md b/docs/designs/tls-certificates/gap.md new file mode 100644 index 0000000000..6154c0a293 --- /dev/null +++ b/docs/designs/tls-certificates/gap.md @@ -0,0 +1,126 @@ +# Gap analysis - local TLS certificates and private CA support + +## Summary + +Agenta has partial HTTPS hosting support, but not a coherent TLS and private +CA feature across editions and deployment modes. The major gap is +consistency: ingress TLS, SDK clients, OpenTelemetry export, backend HTTP +clients, Compose, and Helm all need a documented way to use public +certificates or trust the same operator-provided private CA bundle. + +## Current capabilities + +- OSS Compose can run through Traefik with TLS using ACME. +- Kubernetes Helm can render ingress TLS and derive `https://` public URLs. +- The generated Python client allows advanced callers to pass a custom + `httpx.Client`. +- Operators can manually set standard env vars like `SSL_CERT_FILE`, + `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, `NODE_EXTRA_CA_CERTS`, and + OpenTelemetry certificate variables in some deployments. + +## Missing capabilities + +### Compose + +- OSS SSL compose supports ACME, not static local cert/key files. +- EE compose has no SSL parity path. +- Compose files do not mount a CA bundle into all containers. +- Compose env examples mention Traefik SSL directories but do not define a + complete cert/key/CA configuration contract. + +### Kubernetes + +- Ingress TLS works generically, but there is no first-class CA bundle mount + for Agenta pods. +- The chart does not expose `tls.caBundle` values. +- There are env overrides, but no volume/volumeMount support dedicated to CA + bundle distribution. + +### API and workers + +- `env.agenta` has URL fields but no Agenta-specific CA bundle field. This is + acceptable if the implementation standardizes on runtime env vars. +- Direct `httpx.AsyncClient()` calls are scattered across API, workers, and + EE services. +- There is no shared HTTP client factory or TLS verification helper. + +### SDK + +- `ag.init()` does not accept `ca_bundle`. +- `ag.init()` constructs generated API clients without custom `httpx` + clients. +- SDK helper paths create direct `httpx.Client()` / `AsyncClient()` instances + and would not be fixed by generated-client changes alone. +- Auth, vault, resolver, and authed helper calls need the same CA behavior. +- This may still work without SDK API changes if all `httpx` clients preserve + default environment trust. + +### OpenTelemetry + +- `OTLPExporter` subclasses OpenTelemetry's HTTP exporter but does not expose + CA bundle configuration. +- The issue specifically reports tracing/export failure over HTTPS. + +### Web + +- Browser trust must be solved outside Agenta. +- Web container server-side Node calls may need `NODE_EXTRA_CA_CERTS`, but the + hosting manifests do not wire it today. + +## Design questions + +1. Should Agenta introduce `AGENTA_CA_BUNDLE`, or rely on standard env vars? + + Recommendation: rely on standard env vars first. Add an Agenta-specific + alias only if a required code path cannot consume those standards. + +2. Should Agenta support client certificates/mTLS now? + + Recommendation: no for this issue. The request is about trusting local + server certificates and root CA chains. mTLS can be a later design. + +3. Should Compose use one SSL file or separate ACME/static variants? + + Recommendation: keep the current ACME path working and add static-cert + support in a way that is explicit. A separate static SSL compose file may + be easier to document and test. + +4. Should the Helm CA bundle be ConfigMap or Secret backed? + + Recommendation: support both. Root CA bundles are often not secret, but + organizations may still prefer Secret distribution. + +## Risks + +- A custom CA bundle can accidentally remove public roots if operators provide + a file containing only the internal CA. That can break OpenAI, Anthropic, + Composio, Cloudflare, SendGrid, PostHog, and other public HTTPS calls. +- OpenTelemetry's exporter may not use the same `httpx`/OpenSSL path as the + rest of the SDK, so it needs dedicated testing. +- Changing generated SDK files may be overwritten by Fern regeneration unless + the generator config or post-generation patch strategy is updated. +- Browser trust cannot be fixed by application config; docs must be explicit + to avoid false expectations. +- EE Compose parity may require private image testing with GHCR auth. + +## Testing gaps + +- No local HTTPS fixture exists for a private CA-signed Agenta endpoint. +- No SDK test currently validates standard TLS env vars with the default + `ag.init()` path. +- No OTLP export test validates HTTPS with a private CA. +- No Helm render test validates CA bundle volumes/env across all components. +- No Compose smoke test validates static cert/key TLS and SDK trust. + +## Acceptance criteria + +- OSS Compose supports public ACME certificates, static local cert/key TLS + termination, and CA bundle trust for Agenta containers. +- EE Compose supports the same. +- Helm supports ingress TLS plus CA bundle mounting through first-class + values. +- Standard env vars allow SDK API calls and OTLP export to an Agenta endpoint + signed by a private CA. +- Backend workflow service invocation and webhook delivery can call HTTPS + endpoints signed by the configured CA. +- Existing public-CA deployments behave unchanged. diff --git a/docs/designs/tls-certificates/proposal.md b/docs/designs/tls-certificates/proposal.md new file mode 100644 index 0000000000..b545f3e5cd --- /dev/null +++ b/docs/designs/tls-certificates/proposal.md @@ -0,0 +1,239 @@ +# Proposal - local TLS certificates and private CA support + +## Goal + +Provide a coherent, edition-neutral way to run Agenta behind both public and +locally managed TLS certificates, and to trust an internal/private CA across +Compose, Kubernetes, API, services, workers, SDK, and tracing paths. + +The feature should work for both OSS and EE, and for both Docker Compose and +Kubernetes. Issue #2407 focuses on OSS local hosting, but the implementation +should avoid OSS-only behavior. + +## Non-goals + +- Do not make browsers trust a private CA. Operators must distribute the root + CA through their device management or browser trust process. +- Do not replace Kubernetes ingress controller TLS configuration. The chart + should expose/document the expected values and trust bundle wiring. +- Do not disable TLS verification as the primary path. Insecure flags may + remain separate escape hatches, but private CA support should preserve + verification. + +## Configuration model + +Use standard runtime and library environment variables as the primary trust +mechanism. Do not introduce an Agenta-specific CA bundle variable unless a +later implementation discovers a code path that cannot consume the standard +variables. + +For private/local CA trust, mount one CA bundle file into the relevant +containers and point the standard variables at it: + +```env +SSL_CERT_FILE=/app/certs/ca.pem +REQUESTS_CA_BUNDLE=/app/certs/ca.pem +CURL_CA_BUNDLE=/app/certs/ca.pem +NODE_EXTRA_CA_CERTS=/app/certs/ca.pem +OTEL_EXPORTER_OTLP_CERTIFICATE=/app/certs/ca.pem +OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE=/app/certs/ca.pem +``` + +For public CA certificates, these variables are normally unnecessary. Public +certificates should work through the default trust stores. + +### Precedence + +1. Explicit code-level TLS configuration, for example + `httpx.Client(verify="/path/to/ca.pem")`. +2. Library-specific environment variables, for example + `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`, `OTEL_EXPORTER_OTLP_CERTIFICATE`, + `REQUESTS_CA_BUNDLE`, and `CURL_CA_BUNDLE`. +3. Runtime-wide environment variables, especially `SSL_CERT_FILE`. +4. Runtime/library default trust store. +5. Explicit insecure override, such as `verify=False`. This remains an escape + hatch and should not be the primary implementation. + +### Naming notes + +`*_CA_BUNDLE` and `*_CERT_FILE` are ecosystem-specific names. For this use +case they all point to a trusted CA bundle, not to Agenta's server +certificate: + +- `SSL_CERT_FILE`: OpenSSL/Python default CA bundle path. +- `REQUESTS_CA_BUNDLE`: Python `requests` CA bundle path. +- `CURL_CA_BUNDLE`: curl/libcurl CA bundle path. +- `NODE_EXTRA_CA_CERTS`: additional CA certificates for Node. +- `OTEL_EXPORTER_OTLP_CERTIFICATE`: OpenTelemetry's CA certificate file for + OTLP TLS verification. +- `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`: trace-specific OpenTelemetry CA + certificate file. + +## Docker Compose + +### Static TLS certificates for ingress + +Add static-certificate support for Traefik in Compose. + +For OSS and EE, Compose should support both public ACME certificates and +mounted local cert/key files. + +Static local certificate support should expose a small cert/key contract such +as: + +```env +TRAEFIK_DOMAIN=agenta.internal +AGENTA_TLS_CERT_FILE=/certs/tls.crt +AGENTA_TLS_KEY_FILE=/certs/tls.key +``` + +Mount a local certificate directory into Traefik and configure Traefik dynamic +TLS certificates instead of ACME when cert/key files are supplied. + +EE should have parity with the OSS SSL compose path. EE should not remain +HTTP-only when OSS has a TLS entrypoint. The edition differences should be +limited to image names, migration commands, env examples, and network names. + +### CA bundle mounting + +Mount the same CA bundle into all containers that may make outbound HTTPS +requests: + +- `api` +- `worker-evaluations` +- `worker-tracing` +- `worker-webhooks` +- `worker-events` +- `cron` +- `alembic` +- `services` +- `web` + +Set the standard env vars in each relevant service: + +```yaml +environment: + - SSL_CERT_FILE=/app/certs/ca.pem + - REQUESTS_CA_BUNDLE=/app/certs/ca.pem + - CURL_CA_BUNDLE=/app/certs/ca.pem + - NODE_EXTRA_CA_CERTS=/app/certs/ca.pem + - OTEL_EXPORTER_OTLP_CERTIFICATE=/app/certs/ca.pem + - OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE=/app/certs/ca.pem +``` + +Node-only env vars are harmless for Python containers, but implementation may +choose to set `NODE_EXTRA_CA_CERTS` only on `web` if preferred. + +## Kubernetes + +### Ingress TLS + +Continue using the existing generic `ingress.tls` surface: + +```yaml +ingress: + enabled: true + host: agenta.internal + tls: + - secretName: agenta-tls + hosts: + - agenta.internal +``` + +This works for both public certificates managed by cert-manager and manually +created TLS Secrets backed by local/private certificates. It already makes +effective public URLs derive `https://` when URLs are not explicitly set. + +### Private CA bundle + +Add a chart-level TLS trust configuration: + +```yaml +tls: + caBundle: + existingConfigMap: agenta-ca-bundle + key: ca.pem + mountPath: /app/certs/ca.pem +``` + +Alternative Secret-backed form: + +```yaml +tls: + caBundle: + existingSecret: agenta-ca-bundle + key: ca.pem + mountPath: /app/certs/ca.pem +``` + +Render the volume, volumeMount, and standard env vars into backend pods and +web pods. This should apply to API, workers, cron, alembic, services, and web +unless a component explicitly opts out. + +Per-component env overrides should remain available for unusual deployments, +but the first-class `tls.caBundle` value should cover the common case. + +## Runtime HTTP clients + +Most Agenta-owned Python HTTP paths use `httpx.Client()` or +`httpx.AsyncClient()` with default `trust_env=True`, so `SSL_CERT_FILE` should +be enough for those paths. The implementation should first verify that no +client disables environment trust. + +If a code path disables env trust or constructs custom SSL contexts, prefer +removing that custom behavior or teaching it to respect the standard env vars +instead of adding an Agenta-specific config path. + +For third-party public SaaS calls, document that a custom CA bundle file may +need to include both internal and public roots if it replaces the default +trust store. + +## SDK and generated client + +The generated Python client already accepts custom `httpx` clients for users +who need explicit code-level TLS configuration. For the default SDK path, +standard env vars should be enough as long as the generated client and SDK +helpers keep `trust_env=True`. + +Do not add `ag.init(ca_bundle=...)` in the first implementation. It can be a +future convenience API if standard env vars are insufficient or too hard to +document. + +## OpenTelemetry + +Configure OTLP through OpenTelemetry's standard certificate env vars: + +```env +OTEL_EXPORTER_OTLP_CERTIFICATE=/app/certs/ca.pem +OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE=/app/certs/ca.pem +``` + +Verify that the pinned `OTLPSpanExporter` honors these variables. If the +custom `OTLPExporter` subclass bypasses or overrides that behavior, adjust it +to preserve OpenTelemetry's standard precedence. + +The SDK should not swallow TLS failures as a generic "traces will not be +exported" warning without enough context. At minimum, log the endpoint and +that certificate verification failed. + +## Documentation + +Add self-hosting docs for: + +- Compose OSS with local cert/key and CA bundle. +- Compose EE with local cert/key and CA bundle. +- Compose OSS/EE with public ACME certificates. +- Kubernetes ingress TLS Secret and CA bundle ConfigMap/Secret. +- Kubernetes public certificate manager flow. +- SDK usage with standard env vars and optional explicit custom `httpx` + clients. +- Browser trust requirements. + +## Compatibility + +Default behavior remains unchanged when no CA bundle is configured. + +Existing deployments using public CA certificates keep working. Existing +manual env-var workarounds continue to work because `SSL_CERT_FILE`, +`REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, `NODE_EXTRA_CA_CERTS`, and +OpenTelemetry's certificate env vars are the documented runtime surface. diff --git a/docs/designs/tls-certificates/research.md b/docs/designs/tls-certificates/research.md new file mode 100644 index 0000000000..06103ed2ca --- /dev/null +++ b/docs/designs/tls-certificates/research.md @@ -0,0 +1,212 @@ +# Research - local TLS certificates and private CA support + +## Origin + +GitHub issue [#2407](https://github.com/Agenta-AI/agenta/issues/2407) +asks for support for locally generated/internal certificates when hosting +Agenta locally in OSS mode. The user specifically reports that HTTP works, +but HTTPS fails for the Agenta CLI/SDK and tracing export when the server +certificate is signed by an internal CA. + +Although the issue is OSS-focused, the same capability should work in OSS and +EE, and should cover both public certificates and local/private certificates. + +The issue is open, labeled `enhancement` and `backlog`, assigned to +`junaway`, and linked from Linear as `AGE-3786`. + +## Problem shape + +This is broader than enabling HTTPS on the public entrypoint. There are four +distinct trust surfaces: + +1. User/browser to web/API/services. +2. External SDK/CLI clients to Agenta API and OTLP endpoints. +3. Backend/workers/services calling internal or customer-controlled HTTPS + endpoints. +4. SDK/runtime code paths inside workflow services, evaluator execution, and + tracing exporters. + +Supporting only ingress TLS does not solve the SDK, OpenTelemetry, webhook, +or server-side HTTP client failures when a private CA is involved. + +## Docker Compose state + +OSS has an HTTPS-specific compose file: + +- `hosting/docker-compose/oss/docker-compose.gh.ssl.yml` + +That file enables Traefik TLS routers for web, API, and services: + +- web: `traefik.http.routers.web.tls=true` +- API: `traefik.http.routers.api.tls=true` +- services: `traefik.http.routers.services.tls=true` + +The Traefik config at `hosting/docker-compose/oss/ssl/traefik.yml` uses ACME: + +```yaml +certificatesResolvers: + myResolver: + acme: + tlschallenge: true + storage: "acme.json" +``` + +This is useful for public certificates, but it does not provide a static +local certificate/key path for internally provisioned certs. + +EE compose currently has no equivalent SSL file. Its Traefik service in +`hosting/docker-compose/ee/docker-compose.gh.yml` exposes only the `web` +entrypoint on port 80: + +```yaml +command: + - --api.dashboard=true + - --providers.docker + - --entrypoints.web.address=:80 +ports: + - "${TRAEFIK_PORT:-80}:80" +``` + +## Kubernetes state + +The Helm chart has generic ingress TLS support: + +- `hosting/kubernetes/helm/templates/ingress.yaml` renders `spec.tls` from + `ingress.tls`. +- `hosting/kubernetes/helm/templates/_helpers.tpl` derives public URL scheme + from ingress TLS: if `ingress.tls` exists, the effective web/API/services + URLs use `https://`. + +That covers public TLS termination when the operator supplies a Kubernetes +TLS Secret through ingress config. It does not provide first-class mounting +of a private CA bundle into API, worker, cron, services, or web pods. + +Per-component environment overrides exist through the component schema: + +```json +"env": { + "type": "object", + "additionalProperties": { + "type": ["string", "number", "boolean"] + } +} +``` + +That means operators can manually set env vars like `SSL_CERT_FILE`, but +there is no chart-level `tls.caBundle` or `customCa` value that mounts the +bundle everywhere it is needed. + +## Runtime configuration state + +The central API config in `api/oss/src/utils/env.py` has URL settings: + +```python +class AgentaConfig(BaseModel): + web_url: str = os.getenv("AGENTA_WEB_URL") or "http://localhost" + services_url: str = os.getenv("AGENTA_SERVICES_URL") or "http://localhost/services" + api_url: str = os.getenv("AGENTA_API_URL") or "http://localhost/api" + api_internal_url: str | None = os.getenv("AGENTA_API_INTERNAL_URL") +``` + +There is no Agenta-specific CA bundle, cert file, key file, or TLS +verification setting in the shared `env.agenta` object today. That may be +fine: standard runtime variables are a better first option if the underlying +libraries already honor them. + +## SDK and generated client state + +The generated Python client supports custom `httpx` clients: + +```python +AgentaApi(..., httpx_client: Optional[httpx.Client] = None) +AsyncAgentaApi(..., httpx_client: Optional[httpx.AsyncClient] = None) +``` + +That allows advanced callers to use explicit code-level TLS configuration: + +```python +httpx.Client(verify="/path/to/ca.pem") +``` + +However, `ag.init()` does not expose a CA bundle option. That may not be a +problem if `httpx` keeps its default `trust_env=True` behavior, because +`SSL_CERT_FILE` can be enough for the default path. + +Several SDK helper paths also create plain `httpx.Client()` or +`httpx.AsyncClient()` directly: + +- `sdks/python/agenta/sdk/utils/client.py` +- `sdks/python/agenta/sdk/middlewares/routing/auth.py` +- `sdks/python/agenta/sdk/middlewares/running/vault.py` +- `sdks/python/agenta/sdk/middlewares/running/resolver.py` +- `sdks/python/agenta/sdk/engines/running/handlers.py` + +A generated-client-only change would therefore leave important SDK paths +unfixed. + +## OpenTelemetry state + +`sdks/python/agenta/sdk/engines/tracing/exporters.py` subclasses +`OTLPSpanExporter`: + +```python +class OTLPExporter(OTLPSpanExporter): + ... +``` + +It injects credentials into the exporter session but does not expose or +configure certificate verification behavior. This matches the issue report: +OTLP/tracing may fail even when normal HTTPS calls can be made with standard +certificate configuration. + +## Backend outbound HTTP state + +The API and workers contain direct `httpx.AsyncClient()` usage without a +shared TLS configuration helper. Relevant examples: + +- `api/oss/src/core/workflows/service.py` invokes workflow service URLs. +- `api/oss/src/core/webhooks/delivery.py` sends webhook deliveries. +- `api/oss/src/core/ai_services/client.py` calls Agenta AI services. +- `api/oss/src/core/tools/providers/composio/adapter.py` calls Composio. +- `api/oss/src/core/auth/turnstile.py` calls Cloudflare Turnstile. +- EE service helpers also use direct `httpx`. + +Not every external SaaS call should necessarily use an internal-only CA +bundle. If operators set a bundle that replaces default roots, that bundle may +need to include both internal and public roots. + +## Browser and web container state + +The web entrypoint writes runtime browser config to `public/__env.js`: + +```sh +NEXT_PUBLIC_AGENTA_API_URL: "${AGENTA_API_URL:-http://localhost/api}" +``` + +Browser trust is controlled by the user's OS/browser trust store. Agenta +cannot make a browser trust an internal CA through application config. The +operator must install the root CA on user devices or use a certificate chain +trusted by those devices. + +Server-side Node code in the web container may need `NODE_EXTRA_CA_CERTS` +when it performs HTTPS calls to internal endpoints. + +## Existing escape hatches + +Operators can sometimes work around the issue with platform-native env vars: + +- `SSL_CERT_FILE` for Python/OpenSSL/httpx. +- `REQUESTS_CA_BUNDLE` for requests-based clients. +- `CURL_CA_BUNDLE` for curl/libcurl. +- `NODE_EXTRA_CA_CERTS` for Node. +- `OTEL_EXPORTER_OTLP_CERTIFICATE` for OpenTelemetry OTLP TLS verification. +- `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE` for trace-specific OTLP TLS + verification. + +These are not documented as a coherent Agenta feature, are not mounted by the +Compose/Helm manifests, and do not give SDK users an obvious `ag.init(...)` +option. + +The naming is ecosystem-specific. In this use case, `*_CERT_FILE` and +`*_CA_BUNDLE` variables all point to a trusted CA bundle file, not to +Agenta's server certificate. diff --git a/docs/designs/tls-certificates/tasks.md b/docs/designs/tls-certificates/tasks.md new file mode 100644 index 0000000000..1da2effaed --- /dev/null +++ b/docs/designs/tls-certificates/tasks.md @@ -0,0 +1,131 @@ +# Tasks + +Ordered for incremental delivery. Each block should be testable on its own. + +## 1. Confirm requirements and current behavior + +- [ ] Reproduce issue #2407 with a local HTTPS Agenta endpoint signed by a + private CA. +- [ ] Confirm which paths fail: generated client, `ag.init()`, OTLP export, + workflow invocation, webhook delivery, and web-to-API calls. +- [ ] Confirm whether standard env vars fix each failing path: + `SSL_CERT_FILE`, `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, + `NODE_EXTRA_CA_CERTS`, `OTEL_EXPORTER_OTLP_CERTIFICATE`, and + `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`. +- [ ] Decide final ingress cert/key names for static local certificates, such + as `AGENTA_TLS_CERT_FILE` and `AGENTA_TLS_KEY_FILE`. +- [ ] Decide whether static-cert Compose support is a new compose file or an + extension of the existing SSL files. +- [ ] Confirm the solution is edition-neutral: OSS and EE should expose the + same TLS shape. + +## 2. Standard env var verification + +- [ ] Verify default `httpx.Client()` and `httpx.AsyncClient()` paths honor + `SSL_CERT_FILE` in this repo's pinned dependency set. +- [ ] Verify Python `requests` paths honor `REQUESTS_CA_BUNDLE`. +- [ ] Verify curl/script paths honor `CURL_CA_BUNDLE` where relevant. +- [ ] Verify Node server-side HTTPS calls honor `NODE_EXTRA_CA_CERTS`. +- [ ] Verify OpenTelemetry OTLP export honors + `OTEL_EXPORTER_OTLP_CERTIFICATE` and + `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`. +- [ ] Only if a required path ignores standard variables, design a narrowly + scoped code change for that path. + +## 3. Backend HTTP clients + +- [ ] Audit direct `httpx.Client` and `httpx.AsyncClient` usage under + `api/oss/src` and `api/ee/src`. +- [ ] Confirm none of those clients set `trust_env=False` or custom SSL + contexts that bypass `SSL_CERT_FILE`. +- [ ] Confirm workflow service invocation in + `api/oss/src/core/workflows/service.py` works with `SSL_CERT_FILE`. +- [ ] Confirm webhook delivery in `api/oss/src/core/webhooks/delivery.py` + works with `SSL_CERT_FILE`. +- [ ] Add targeted tests only for paths that need code changes. + +## 4. SDK initialization + +- [ ] Verify generated `AgentaApi` and `AsyncAgentaApi` work with + `SSL_CERT_FILE`. +- [ ] Verify SDK helper clients in `sdks/python/agenta/sdk/utils/client.py` + work with `SSL_CERT_FILE`. +- [ ] Verify SDK middleware clients in routing auth, running vault, and + running resolver work with `SSL_CERT_FILE`. +- [ ] Document explicit custom `httpx.Client(verify=...)` usage for advanced + users who want code-level precedence. +- [ ] Defer `ag.init(ca_bundle=...)` unless standard env vars prove + insufficient. + +## 5. OpenTelemetry export + +- [ ] Check the pinned OpenTelemetry exporter API and env handling for + `OTEL_EXPORTER_OTLP_CERTIFICATE` and + `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE`. +- [ ] Confirm Agenta's custom `OTLPExporter` subclass preserves standard OTLP + certificate env behavior. +- [ ] Add a test or smoke fixture that exports a span to an HTTPS OTLP + endpoint signed by a private CA. +- [ ] Improve TLS failure logging enough to distinguish certificate failures + from generic export failures. + +## 6. Docker Compose - OSS + +- [ ] Preserve existing public ACME certificate support. +- [ ] Add static local certificate support for Traefik. +- [ ] Support cert/key env vars or documented file locations. +- [ ] Mount the local cert/key into Traefik. +- [ ] Mount the CA bundle into API, workers, cron, alembic, services, and web. +- [ ] Set `SSL_CERT_FILE`, `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, + `NODE_EXTRA_CA_CERTS`, `OTEL_EXPORTER_OTLP_CERTIFICATE`, and + `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE` where appropriate. +- [ ] Update OSS env examples with commented TLS/CA variables. + +## 7. Docker Compose - EE + +- [ ] Add EE SSL compose parity with OSS. +- [ ] Support both public ACME certificates and static local cert/key files. +- [ ] Use EE image names and EE network names. +- [ ] Mount cert/key and CA bundle consistently with OSS. +- [ ] Update EE env examples with commented TLS/CA variables. +- [ ] Smoke test with GHCR-authenticated EE images if available. + +## 8. Kubernetes Helm + +- [ ] Add chart values for `tls.caBundle.existingConfigMap`. +- [ ] Add chart values for `tls.caBundle.existingSecret`. +- [ ] Add `key` and `mountPath` values. +- [ ] Render volume and volumeMounts into API, workers, cron, alembic, + services, and web deployments/jobs. +- [ ] Render `SSL_CERT_FILE`, `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, + `NODE_EXTRA_CA_CERTS`, `OTEL_EXPORTER_OTLP_CERTIFICATE`, and + `OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE` env vars where appropriate. +- [ ] Update `values.schema.json`. +- [ ] Add example values to both OSS and EE Kubernetes example files. +- [ ] Add Helm render tests or snapshot checks for ConfigMap and Secret modes. + +## 9. Documentation + +- [ ] Document browser trust requirements. +- [ ] Document OSS Compose with static cert/key and CA bundle. +- [ ] Document EE Compose with static cert/key and CA bundle. +- [ ] Document public certificate flows for OSS/EE Compose. +- [ ] Document Kubernetes ingress TLS Secret setup. +- [ ] Document public cert-manager style Kubernetes certificate flow. +- [ ] Document Kubernetes CA bundle ConfigMap/Secret setup. +- [ ] Document SDK usage with standard env vars. +- [ ] Document optional explicit custom `httpx.Client(verify=...)`. +- [ ] Warn that custom CA bundles may need to include public roots if the + deployment also calls public SaaS APIs. + +## 10. End-to-end verification + +- [ ] Generate a local root CA and server certificate for `agenta.local`. +- [ ] Start OSS Compose over HTTPS with static cert/key. +- [ ] Verify browser access after trusting the root CA locally. +- [ ] Verify Python SDK API call with standard env vars. +- [ ] Verify OTLP/tracing export with standard env vars. +- [ ] Verify workflow service invocation over HTTPS. +- [ ] Verify webhook delivery to a private CA-signed endpoint. +- [ ] Repeat the hosting smoke on EE Compose if credentials are available. +- [ ] Render Helm OSS and EE values with ingress TLS and CA bundle enabled.