diff --git a/CHANGELOG.md b/CHANGELOG.md index 9eed0fc..5b6c618 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial open-source release under Apache-2.0 license - **Core SDK** - `enable()` / `disable()` bootstrap functions for SDK initialization - - `@botanu_use_case` decorator with UUIDv7 run_id generation + - `@botanu_workflow` decorator with UUIDv7 run_id generation - `@botanu_outcome` decorator for sub-function outcome tracking - `emit_outcome()` helper for recording business outcomes - `set_business_context()` for cost attribution dimensions diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 643856c..74795e1 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,3 +1,20 @@ -# Botanu Code of Conduct +# Code of Conduct -In the interest of fostering an open and welcoming environment, we as contributors and maintainers agree to abide by the Code of Conduct available at https://lfprojects.org/policies/code-of-conduct/ \ No newline at end of file +All participants in the Botanu project — including contributors, maintainers, +and community members — are expected to treat each other with respect and +professionalism. + +This project has adopted the +[Contributor Covenant v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) +as its Code of Conduct. + +The Code of Conduct applies to all project spaces, including GitHub repositories, +issue trackers, discussion forums, and events. + +## Reporting + +If you experience or witness unacceptable behaviour, please report it by +contacting the project maintainers listed in [MAINTAINERS.md](./MAINTAINERS.md). + +All reports will be reviewed and investigated promptly and fairly. Maintainers +are obligated to respect the privacy and security of the reporter. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6d13cd5..5319a03 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,24 +1,28 @@ # Contributing to Botanu SDK -Thank you for your interest in contributing to Botanu SDK! This document provides guidelines and instructions for contributing. +We welcome contributions of all kinds — bug fixes, new features, documentation +improvements, and more. This guide explains how to get started. ## Developer Certificate of Origin (DCO) -This project requires all commits to be signed off in accordance with the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). This certifies that you have the right to submit your contribution under the project's open source license. +This project requires all commits to be signed off in accordance with the +[Developer Certificate of Origin (DCO)](https://developercertificate.org/). +The DCO certifies that you have the right to submit your contribution under the +project's open-source license. -To sign off your commits, add the `-s` flag to your git commit command: +To sign off your commits, add the `-s` flag: ```bash git commit -s -m "Your commit message" ``` -This will add a `Signed-off-by` line to your commit message: +This adds a `Signed-off-by` line to your commit message: ``` Signed-off-by: Your Name ``` -If you've already made commits without signing off, you can amend them: +If you have already made commits without signing off, you can amend them: ```bash # Amend the last commit @@ -43,12 +47,17 @@ git rebase --signoff HEAD~N # where N is the number of commits pip install -e ".[dev]" ``` -3. Run tests: +3. Install pre-commit hooks: + ```bash + pre-commit install + ``` + +4. Run tests: ```bash pytest tests/ ``` -4. Run linting and type checks: +5. Run linting and type checks: ```bash ruff check src/ tests/ ruff format src/ tests/ @@ -57,11 +66,26 @@ git rebase --signoff HEAD~N # where N is the number of commits ## Pull Request Process -1. Fork the repository and create a feature branch +1. Fork the repository and create a feature branch from `main` 2. Make your changes with appropriate tests 3. Ensure all tests pass and linting is clean 4. Sign off all commits with DCO -5. Submit a pull request with a clear description +5. Submit a pull request with a clear description of the change + +Pull requests require approval from at least one [maintainer](./MAINTAINERS.md) +before merging. + +## Finding Work + +- Look for issues labelled + [`good first issue`](https://github.com/botanu-ai/botanu-sdk-python/labels/good%20first%20issue) + if you are new to the project +- Issues labelled + [`help wanted`](https://github.com/botanu-ai/botanu-sdk-python/labels/help%20wanted) + are ready for community contributions +- Join the discussion on + [GitHub Discussions](https://github.com/botanu-ai/botanu-sdk-python/discussions) + to ask questions or propose ideas ## Code Style @@ -72,16 +96,21 @@ git rebase --signoff HEAD~N # where N is the number of commits ## Reporting Issues -Please use GitHub Issues to report bugs or request features. Include: +Please use [GitHub Issues](https://github.com/botanu-ai/botanu-sdk-python/issues) +to report bugs or request features. Include: + - A clear description of the issue - Steps to reproduce (for bugs) -- Expected vs actual behavior +- Expected versus actual behaviour - Python version and OS ## Code of Conduct -This project follows the [LF Projects Code of Conduct](https://lfprojects.org/policies/code-of-conduct/). +This project follows the +[LF Projects Code of Conduct](https://lfprojects.org/policies/code-of-conduct/). +See [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md). ## License -By contributing, you agree that your contributions will be licensed under the Apache License 2.0. +By contributing, you agree that your contributions will be licensed under the +[Apache License 2.0](./LICENSE). diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 9f7a9f0..da1dadd 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -1,12 +1,25 @@ # Governance -This project follows the governance model of the [LF AI & Data Foundation](https://lfaidata.foundation/). +This document describes the governance model for the Botanu SDK project. ## Roles +### Users + +Anyone who uses the SDK. Users are encouraged to participate by filing issues, +asking questions on [GitHub Discussions](https://github.com/botanu-ai/botanu-sdk-python/discussions), +and providing feedback. + +### Contributors + +Anyone who contributes to the project — opening issues, submitting pull requests, +improving documentation, or participating in discussions. All contributions +require [DCO sign-off](./CONTRIBUTING.md#developer-certificate-of-origin-dco). + ### Maintainers Maintainers are responsible for: + - Reviewing and merging pull requests - Triaging issues - Releasing new versions @@ -14,21 +27,40 @@ Maintainers are responsible for: Current maintainers are listed in [MAINTAINERS.md](./MAINTAINERS.md). -### Contributors +## Becoming a Maintainer -Anyone can contribute by: -- Opening issues -- Submitting pull requests -- Participating in discussions -- Improving documentation +Maintainers are contributors who have demonstrated: -See [CONTRIBUTING.md](./CONTRIBUTING.md) for contribution guidelines. +- Sustained, high-quality contributions over time +- Deep understanding of the codebase and project goals +- Commitment to the community and the Code of Conduct + +New maintainers are nominated by existing maintainers and approved by consensus. ## Decision Making -- Technical decisions are made through pull request reviews +- Day-to-day technical decisions are made through pull request reviews - Significant changes require approval from at least one maintainer -- Disputes are resolved by maintainer consensus +- Architectural or breaking changes should be discussed in a GitHub issue or + discussion before implementation +- Disputes are resolved by maintainer consensus; if consensus cannot be reached, + the lead maintainer has final say + +## Scope + +### In Scope + +- The Botanu Python SDK (`botanu` PyPI package) +- Documentation in the `docs/` directory +- CI/CD workflows and release automation +- Integration guides for OpenTelemetry Collector + +### Out of Scope + +- The Botanu Collector (separate repository) +- The Botanu Cost Engine (separate repository) +- The Botanu UI (separate repository) +- Vendor-specific backend integrations ## Code of Conduct @@ -36,4 +68,4 @@ All participants must follow the [Code of Conduct](./CODE_OF_CONDUCT.md). ## License -This project is licensed under Apache-2.0. See [LICENSE](./LICENSE). +This project is licensed under [Apache-2.0](./LICENSE). diff --git a/README.md b/README.md index 8bebc94..bdf9393 100644 --- a/README.md +++ b/README.md @@ -1,156 +1,106 @@ # Botanu SDK for Python +[![CI](https://github.com/botanu-ai/botanu-sdk-python/actions/workflows/ci.yml/badge.svg)](https://github.com/botanu-ai/botanu-sdk-python/actions/workflows/ci.yml) [![PyPI version](https://img.shields.io/pypi/v/botanu)](https://pypi.org/project/botanu/) -[![Python](https://img.shields.io/badge/python-3.9%20|%203.10%20|%203.11%20|%203.12%20|%203.13-blue)](https://www.python.org/) +[![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/) +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE) -**Run-level cost attribution for AI workflows, built on OpenTelemetry.** -Botanu adds **runs** on top of distributed tracing. A run represents one business transaction that may span multiple LLM calls, database queries, and microservices. By correlating every operation to a stable `run_id`, you get per-transaction cost attribution without sampling artifacts. +Event-level cost attribution for AI workflows, built on [OpenTelemetry](https://opentelemetry.io/). -## How It Works +An **event** is one business transaction — resolving a support ticket, processing +an order, generating a report. Each event may involve multiple **runs** (LLM calls, +retries, sub-workflows) across multiple services. By correlating every run to a +stable `event_id`, Botanu gives you per-event cost attribution and outcome +tracking without sampling artifacts. -``` -User Request - | - v - Entry Service Intermediate Service LLM / DB - @botanu_use_case --> enable() propagates --> auto-instrumented - creates run_id run_id via W3C Baggage spans tagged with run_id -``` - -1. **Entry point** creates a `run_id` with `@botanu_use_case` -2. **Every service** calls `enable()` to propagate the `run_id` via W3C Baggage -3. **All spans** across all services share the same `run_id` -4. **Traces export** to your OTel Collector via OTLP (configured by environment variable) - -## Quick Start - -### Install +## Getting Started ```bash pip install botanu ``` -One install. Includes OTel SDK, OTLP exporter, and auto-instrumentation for 50+ libraries. - -### Instrument Your Code - -**Entry service** (where the workflow begins): +One install. Includes OTel SDK, OTLP exporter, and auto-instrumentation for +50+ libraries. ```python -from botanu import enable, botanu_use_case +from botanu import enable, botanu_workflow, emit_outcome -enable() # reads config from env vars +enable() # reads config from environment variables -@botanu_use_case(name="Customer Support") -async def handle_ticket(ticket_id: str): - data = await db.query(ticket_id) - result = await llm.complete(data) +@botanu_workflow("my-workflow", event_id="evt-001", customer_id="cust-42") +async def do_work(): + result = await do_something() + emit_outcome("success") return result ``` -**Every other service** (intermediate, downstream): - -```python -from botanu import enable - -enable() # propagates run_id from incoming request -``` - -That's it. No collector endpoint in code. No manual span creation. - -### Configure via Environment Variables - -All configuration is via environment variables. **Zero hardcoded values in code.** - -| Variable | Description | Default | -|----------|-------------|---------| -| `OTEL_EXPORTER_OTLP_ENDPOINT` | Collector endpoint | `http://localhost:4318` | -| `OTEL_SERVICE_NAME` | Service name | `unknown_service` | -| `BOTANU_ENVIRONMENT` | Deployment environment | `production` | - -```yaml -# docker-compose.yml / Kubernetes deployment -environment: - - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 - - OTEL_SERVICE_NAME=my-service -``` - -See [Configuration Reference](./docs/getting-started/configuration.md) for all options. +Entry points use `@botanu_workflow`. Every other service only needs `enable()`. +All configuration is via environment variables — zero hardcoded values in code. -## Auto-Instrumentation +See the [Quick Start](./docs/getting-started/quickstart.md) guide for a full walkthrough. -Everything is included and auto-detected. If the library is in your dependencies, it gets instrumented: - -| Category | Libraries | -|----------|-----------| -| **LLM Providers** | OpenAI, Anthropic, Vertex AI, Google GenAI, LangChain, Ollama, CrewAI | -| **Web Frameworks** | FastAPI, Flask, Django, Starlette, Falcon, Pyramid, Tornado | -| **HTTP Clients** | requests, httpx, urllib3, aiohttp | -| **Databases** | PostgreSQL (psycopg2/3, asyncpg), MySQL, SQLite, MongoDB, Redis, SQLAlchemy, Elasticsearch, Cassandra | -| **Messaging** | Celery, Kafka, RabbitMQ (pika) | -| **AWS** | botocore, boto3 (SQS) | -| **gRPC** | Client + Server | -| **Runtime** | logging, threading, asyncio | +## Documentation -No manual instrumentation required. Libraries not installed are silently skipped. +| Topic | Description | +|-------|-------------| +| [Installation](./docs/getting-started/installation.md) | Install and configure the SDK | +| [Quick Start](./docs/getting-started/quickstart.md) | Get up and running in 5 minutes | +| [Configuration](./docs/getting-started/configuration.md) | Environment variables and options | +| [Core Concepts](./docs/concepts/) | Events, runs, context propagation, architecture | +| [LLM Tracking](./docs/tracking/llm-tracking.md) | Track model calls and token usage | +| [Data Tracking](./docs/tracking/data-tracking.md) | Database, storage, and messaging | +| [Outcomes](./docs/tracking/outcomes.md) | Record business outcomes for ROI | +| [Auto-Instrumentation](./docs/integration/auto-instrumentation.md) | Supported libraries and frameworks | +| [Kubernetes](./docs/integration/kubernetes.md) | Zero-code instrumentation at scale | +| [API Reference](./docs/api/) | Decorators, tracking API, configuration | +| [Best Practices](./docs/patterns/best-practices.md) | Recommended patterns | -## Kubernetes at Scale +## Requirements -For large deployments (2000+ services), only entry points need code changes: +- Python 3.9+ +- OpenTelemetry Collector (recommended for production) -| Service Type | Code Change | Configuration | -|--------------|-------------|---------------| -| Entry point | `@botanu_use_case` decorator | `OTEL_EXPORTER_OTLP_ENDPOINT` env var | -| Intermediate | `enable()` call only | `OTEL_EXPORTER_OTLP_ENDPOINT` env var | +## Contributing -See [Kubernetes Deployment Guide](./docs/integration/kubernetes.md) for details. +We welcome contributions from the community. Please read our +[Contributing Guide](./CONTRIBUTING.md) before submitting a pull request. -## Architecture +This project requires [DCO sign-off](https://developercertificate.org/) on all +commits: -``` - +---------+ +---------+ +---------+ - | Service | --> | Service | --> | Service | - | enable()| --> | enable()| --> | enable()| - +---------+ +---------+ +---------+ - | | | - v v v - +-------------------------------------+ - | OTel Collector (OTLP) | - +-------------------------------------+ - | | | - v v v - Jaeger/Tempo Prometheus Your Backend +```bash +git commit -s -m "Your commit message" ``` -The SDK is a thin layer on OpenTelemetry: -- **SDK**: Generates `run_id`, propagates context, auto-instruments -- **Collector**: PII redaction, cardinality limits, routing, vendor enrichment +Looking for a place to start? Check the +[good first issues](https://github.com/botanu-ai/botanu-sdk-python/labels/good%20first%20issue). -## Documentation +## Community -- [Getting Started](./docs/getting-started/) - Installation, quickstart, configuration -- [Concepts](./docs/concepts/) - Runs, context propagation, cost attribution -- [Integration](./docs/integration/) - Auto-instrumentation, Kubernetes, collector setup -- [API Reference](./docs/api/) - `enable()`, `@botanu_use_case`, `emit_outcome()` +- [GitHub Discussions](https://github.com/botanu-ai/botanu-sdk-python/discussions) — questions, ideas, show & tell +- [GitHub Issues](https://github.com/botanu-ai/botanu-sdk-python/issues) — bug reports and feature requests -## Requirements +## Governance -- Python 3.9+ -- OpenTelemetry Collector (recommended for production) +See [GOVERNANCE.md](./GOVERNANCE.md) for details on roles, decision-making, +and the contributor ladder. -## Contributing +Current maintainers are listed in [MAINTAINERS.md](./MAINTAINERS.md). -We welcome contributions. See [CONTRIBUTING.md](./CONTRIBUTING.md). +## Security -This project follows the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). Sign off your commits: +To report a security vulnerability, please use +[GitHub Security Advisories](https://github.com/botanu-ai/botanu-sdk-python/security/advisories/new) +or see [SECURITY.md](./SECURITY.md) for full details. **Do not file a public issue.** -```bash -git commit -s -m "Your commit message" -``` +## Code of Conduct + +This project follows the +[LF Projects Code of Conduct](https://lfprojects.org/policies/code-of-conduct/). +See [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md). ## License -[Apache-2.0](./LICENSE) +[Apache License 2.0](./LICENSE) -This project is an [LF AI & Data Foundation](https://lfaidata.foundation/) project. diff --git a/RELEASE.md b/RELEASE.md index d2454ea..0686052 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -94,7 +94,7 @@ python -c "import botanu; print(botanu.__version__)" # Run quick test python -c " -from botanu import enable, botanu_use_case +from botanu import enable, botanu_workflow enable(service_name='test') print('Botanu SDK loaded successfully!') " diff --git a/docs/api/configuration.md b/docs/api/configuration.md index cf417ac..b5f8901 100644 --- a/docs/api/configuration.md +++ b/docs/api/configuration.md @@ -12,18 +12,19 @@ from botanu.sdk.config import BotanuConfig | Field | Type | Default | Description | |-------|------|---------|-------------| -| `service_name` | `str` | `"unknown_service"` | Service name (from `OTEL_SERVICE_NAME`) | -| `service_version` | `str` | `None` | Service version (from `OTEL_SERVICE_VERSION`) | -| `service_namespace` | `str` | `None` | Service namespace (from `OTEL_SERVICE_NAMESPACE`) | -| `deployment_environment` | `str` | `"production"` | Environment (from `OTEL_DEPLOYMENT_ENVIRONMENT` or `BOTANU_ENVIRONMENT`) | +| `service_name` | `str` | From env / `"unknown_service"` | Service name | +| `service_version` | `str` | From env | Service version | +| `service_namespace` | `str` | From env | Service namespace | +| `deployment_environment` | `str` | From env / `"production"` | Deployment environment | | `auto_detect_resources` | `bool` | `True` | Auto-detect cloud resources | -| `otlp_endpoint` | `str` | `"http://localhost:4318/v1/traces"` | OTLP endpoint | +| `otlp_endpoint` | `str` | From env / `"http://localhost:4318"` | OTLP endpoint | | `otlp_headers` | `dict` | `None` | Custom headers for OTLP exporter | | `max_export_batch_size` | `int` | `512` | Max spans per batch | -| `max_queue_size` | `int` | `2048` | Max spans in queue | +| `max_queue_size` | `int` | `65536` | Max spans in queue (~64 MB at ~1 KB/span) | | `schedule_delay_millis` | `int` | `5000` | Delay between batch exports | +| `export_timeout_millis` | `int` | `30000` | Timeout for export operations | | `propagation_mode` | `str` | `"lean"` | `"lean"` or `"full"` | -| `auto_instrument_packages` | `list` | `[...]` | Packages to auto-instrument | +| `auto_instrument_packages` | `list` | See below | Packages to auto-instrument | ### Constructor @@ -31,7 +32,7 @@ from botanu.sdk.config import BotanuConfig config = BotanuConfig( service_name="my-service", deployment_environment="production", - otlp_endpoint="http://collector:4318/v1/traces", + otlp_endpoint="http://collector:4318", ) ``` @@ -50,7 +51,7 @@ def from_yaml(cls, path: Optional[str] = None) -> BotanuConfig - `path`: Path to YAML config file **Raises:** -- `FileNotFoundError`: If config file doesn't exist +- `FileNotFoundError`: If config file does not exist - `ValueError`: If YAML is malformed - `ImportError`: If PyYAML is not installed @@ -62,7 +63,7 @@ config = BotanuConfig.from_yaml("config/botanu.yaml") #### from_file_or_env() -Load config from file if exists, otherwise use environment variables. +Load config from file if it exists, otherwise use environment variables. ```python @classmethod @@ -135,6 +136,7 @@ export: batch_size: integer # Max spans per batch queue_size: integer # Max spans in queue delay_ms: integer # Delay between exports + export_timeout_ms: integer # Export timeout propagation: mode: string # "lean" or "full" @@ -151,7 +153,7 @@ service: environment: ${ENVIRONMENT} otlp: - endpoint: ${COLLECTOR_URL:-http://localhost:4318}/v1/traces + endpoint: ${COLLECTOR_URL:-http://localhost:4318} headers: Authorization: Bearer ${API_TOKEN} ``` @@ -164,7 +166,7 @@ Syntax: ## enable() -Bootstrap function to initialize the SDK. +Bootstrap function to initialise the SDK. ```python from botanu import enable @@ -172,12 +174,13 @@ from botanu import enable enable( service_name: Optional[str] = None, otlp_endpoint: Optional[str] = None, + environment: Optional[str] = None, + auto_instrumentation: bool = True, + propagators: Optional[List[str]] = None, + log_level: str = "INFO", config: Optional[BotanuConfig] = None, - auto_instrument: bool = True, - auto_instrument_packages: Optional[List[str]] = None, - propagation_mode: Optional[str] = None, - **kwargs: Any, -) -> None + config_file: Optional[str] = None, +) -> bool ``` ### Parameters @@ -186,17 +189,22 @@ enable( |-----------|------|---------|-------------| | `service_name` | `str` | From env | Service name | | `otlp_endpoint` | `str` | From env | OTLP endpoint URL | -| `config` | `BotanuConfig` | `None` | Pre-built configuration | -| `auto_instrument` | `bool` | `True` | Enable auto-instrumentation | -| `auto_instrument_packages` | `list` | `None` | Override default packages | -| `propagation_mode` | `str` | `None` | `"lean"` or `"full"` | -| `**kwargs` | `Any` | `{}` | Additional config fields | +| `environment` | `str` | From env | Deployment environment | +| `auto_instrumentation` | `bool` | `True` | Enable auto-instrumentation | +| `propagators` | `list[str]` | `["tracecontext", "baggage"]` | Propagator list | +| `log_level` | `str` | `"INFO"` | Logging level | +| `config` | `BotanuConfig` | `None` | Pre-built configuration (overrides individual params) | +| `config_file` | `str` | `None` | Path to YAML config file | + +### Returns + +`True` if successfully initialised, `False` if already initialised. -### Behavior +### Behaviour 1. Creates/merges `BotanuConfig` 2. Configures `TracerProvider` with `RunContextEnricher` -3. Sets up OTLP exporter (if SDK extras installed) +3. Sets up OTLP exporter 4. Enables auto-instrumentation (if requested) 5. Configures W3C Baggage propagation @@ -220,15 +228,13 @@ config = BotanuConfig.from_yaml("config/botanu.yaml") enable(config=config) ``` -#### Custom Options +#### From environment only ```python -enable( - service_name="my-service", - otlp_endpoint="http://collector:4318/v1/traces", - auto_instrument_packages=["fastapi", "openai_v2"], - propagation_mode="full", -) +from botanu import enable + +# Reads OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT, etc. +enable() ``` --- @@ -243,7 +249,7 @@ from botanu import disable disable() -> None ``` -### Behavior +### Behaviour 1. Flushes pending spans 2. Shuts down span processors @@ -292,12 +298,17 @@ if not is_enabled(): | `BOTANU_PROPAGATION_MODE` | `"lean"` or `"full"` | `"lean"` | | `BOTANU_AUTO_DETECT_RESOURCES` | Auto-detect cloud resources | `"true"` | | `BOTANU_CONFIG_FILE` | Path to YAML config file | None | +| `BOTANU_COLLECTOR_ENDPOINT` | Override for OTLP endpoint | None | +| `BOTANU_MAX_QUEUE_SIZE` | Override max queue size | `65536` | +| `BOTANU_MAX_EXPORT_BATCH_SIZE` | Override max batch size | `512` | +| `BOTANU_EXPORT_TIMEOUT_MILLIS` | Override export timeout | `30000` | --- ## RunContext -Model for run metadata. +Model for run metadata. Created automatically by `@botanu_workflow` and +`run_botanu`. ```python from botanu.models.run_context import RunContext @@ -313,23 +324,27 @@ Create a new run context. @classmethod def create( cls, - use_case: str, - workflow: Optional[str] = None, + workflow: str, + event_id: str, + customer_id: str, workflow_version: Optional[str] = None, environment: Optional[str] = None, tenant_id: Optional[str] = None, parent_run_id: Optional[str] = None, + root_run_id: Optional[str] = None, + attempt: int = 1, + retry_of_run_id: Optional[str] = None, deadline_seconds: Optional[float] = None, ) -> RunContext ``` #### create_retry() -Create a retry context from an original run. +Create a retry context from a previous run. ```python @classmethod -def create_retry(cls, original: RunContext) -> RunContext +def create_retry(cls, previous: RunContext) -> RunContext ``` #### from_baggage() @@ -345,26 +360,18 @@ def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext] #### to_baggage_dict() -Serialize to baggage format. +Serialise to baggage format. ```python -def to_baggage_dict(self, lean_mode: bool = True) -> Dict[str, str] +def to_baggage_dict(self, lean_mode: Optional[bool] = None) -> Dict[str, str] ``` #### to_span_attributes() -Serialize to span attributes. - -```python -def to_span_attributes(self) -> Dict[str, Any] -``` - -#### as_current() - -Context manager to set this as the current run. +Serialise to span attributes. ```python -def as_current(self) -> ContextManager +def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]] ``` #### complete() @@ -375,39 +382,73 @@ Mark the run as complete. def complete( self, status: RunStatus, + reason_code: Optional[str] = None, error_class: Optional[str] = None, + value_type: Optional[str] = None, + value_amount: Optional[float] = None, + confidence: Optional[float] = None, ) -> None ``` +#### is_past_deadline() + +```python +def is_past_deadline(self) -> bool +``` + +#### is_cancelled() + +```python +def is_cancelled(self) -> bool +``` + +#### request_cancellation() + +```python +def request_cancellation(self, reason: str = "user") -> None +``` + +#### remaining_time_seconds() + +```python +def remaining_time_seconds(self) -> Optional[float] +``` + ### Fields | Field | Type | Description | |-------|------|-------------| | `run_id` | `str` | Unique UUIDv7 identifier | -| `root_run_id` | `str` | Root run ID (same as run_id for first attempt) | -| `use_case` | `str` | Business use case name | -| `workflow` | `str` | Workflow/function name | -| `workflow_version` | `str` | Version hash | +| `workflow` | `str` | Workflow name | +| `event_id` | `str` | Business event identifier | +| `customer_id` | `str` | Customer identifier | | `environment` | `str` | Deployment environment | +| `workflow_version` | `str` | Version hash | | `tenant_id` | `str` | Tenant identifier | | `parent_run_id` | `str` | Parent run ID | +| `root_run_id` | `str` | Root run ID (same as `run_id` for first attempt) | | `attempt` | `int` | Attempt number | +| `retry_of_run_id` | `str` | Run ID of the previous attempt | | `start_time` | `datetime` | Run start time | +| `deadline` | `float` | Absolute deadline (epoch seconds) | +| `cancelled` | `bool` | Whether the run is cancelled | | `outcome` | `RunOutcome` | Recorded outcome | --- ## RunStatus -Enum for run status. +Enum for run outcome status. ```python from botanu.models.run_context import RunStatus -class RunStatus(Enum): +class RunStatus(str, Enum): SUCCESS = "success" FAILURE = "failure" PARTIAL = "partial" + TIMEOUT = "timeout" + CANCELED = "canceled" ``` ## See Also diff --git a/docs/api/decorators.md b/docs/api/decorators.md index 36eb768..e88c971 100644 --- a/docs/api/decorators.md +++ b/docs/api/decorators.md @@ -1,17 +1,21 @@ # Decorators API Reference -## @botanu_use_case +## @botanu_workflow -The primary decorator for creating runs with automatic context propagation. +The primary decorator for creating workflow runs with automatic context propagation. ```python -from botanu import botanu_use_case +from botanu import botanu_workflow -@botanu_use_case( +@botanu_workflow( name: str, - workflow: Optional[str] = None, + *, + event_id: Union[str, Callable[..., str]], + customer_id: Union[str, Callable[..., str]], environment: Optional[str] = None, tenant_id: Optional[str] = None, + auto_outcome_on_success: bool = True, + span_kind: SpanKind = SpanKind.SERVER, ) ``` @@ -19,21 +23,33 @@ from botanu import botanu_use_case | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `name` | `str` | Required | Use case name for grouping | -| `workflow` | `str` | Function name | Workflow identifier | +| `name` | `str` | Required | Workflow name (low cardinality, e.g. `"Customer Support"`) | +| `event_id` | `str \| Callable` | Required | Business transaction identifier (e.g. ticket ID). Can be a static string or a callable that receives the same `(*args, **kwargs)` as the decorated function. | +| `customer_id` | `str \| Callable` | Required | End-customer being served (e.g. org ID). Same static/callable rules as `event_id`. | | `environment` | `str` | From env | Deployment environment | | `tenant_id` | `str` | `None` | Tenant identifier for multi-tenant systems | +| `auto_outcome_on_success` | `bool` | `True` | Emit `"success"` outcome if no exception | +| `span_kind` | `SpanKind` | `SERVER` | OpenTelemetry span kind | ### Example ```python -from botanu import botanu_use_case +from botanu import botanu_workflow -@botanu_use_case(name="my_workflow") -def my_function(): - data = db.query(...) - result = llm.complete(...) +# Static values: +@botanu_workflow("my-workflow", event_id="evt-001", customer_id="cust-42") +def do_work(): + result = do_something() return result + +# Dynamic values extracted from function arguments: +@botanu_workflow( + "my-workflow", + event_id=lambda request: request.event_id, + customer_id=lambda request: request.customer_id, +) +async def handle_request(request): + ... ``` ### Span Attributes @@ -41,59 +57,60 @@ def my_function(): | Attribute | Description | |-----------|-------------| | `botanu.run_id` | Generated UUIDv7 | -| `botanu.use_case` | `name` parameter | -| `botanu.workflow` | `workflow` parameter or function name | +| `botanu.workflow` | `name` parameter | +| `botanu.event_id` | Resolved `event_id` | +| `botanu.customer_id` | Resolved `customer_id` | | `botanu.environment` | Deployment environment | | `botanu.tenant_id` | Tenant identifier (if provided) | ### Alias -`use_case` is an alias for `botanu_use_case`: +`workflow` is an alias for `botanu_workflow`: ```python -from botanu import use_case +from botanu import workflow -@use_case(name="my_workflow") -def my_function(): - return db.query(...) +@workflow("my-workflow", event_id="evt-001", customer_id="cust-42") +def do_work(): + ... ``` -## @botanu_outcome +--- + +## run_botanu -Decorator for sub-functions to emit outcomes based on success/failure. +Context manager alternative to `@botanu_workflow` for cases where you cannot +use a decorator (dynamic workflows, scripts, runtime-determined names). ```python -from botanu import botanu_outcome +from botanu import run_botanu -@botanu_outcome() -def extract_data(): - return fetch_from_source() +with run_botanu( + name: str, + *, + event_id: str, + customer_id: str, + environment: Optional[str] = None, + tenant_id: Optional[str] = None, + auto_outcome_on_success: bool = True, + span_kind: SpanKind = SpanKind.SERVER, +) as run_ctx: RunContext ``` -- Emits "success" on completion -- Emits "failed" with exception class name if exception raised -- Does NOT create a new run - ### Example ```python -from botanu import botanu_use_case, botanu_outcome - -@botanu_use_case(name="my_workflow") -def my_function(): - step_one() - step_two() +from botanu import run_botanu, emit_outcome -@botanu_outcome() -def step_one(): - return do_work() - -@botanu_outcome() -def step_two(): - return do_more_work() +with run_botanu("my-workflow", event_id="evt-001", customer_id="cust-42") as run: + result = do_something() + emit_outcome("success") ``` +The yielded `RunContext` contains `run_id`, `workflow`, `event_id`, and other +metadata. Parameters are identical to `@botanu_workflow`. + ## See Also -- [Quickstart](../getting-started/quickstart.md) +- [Quick Start](../getting-started/quickstart.md) - [Run Context](../concepts/run-context.md) diff --git a/docs/api/tracking.md b/docs/api/tracking.md index dcd35f7..b3041e6 100644 --- a/docs/api/tracking.md +++ b/docs/api/tracking.md @@ -402,6 +402,8 @@ emit_outcome( value_amount: Optional[float] = None, confidence: Optional[float] = None, reason: Optional[str] = None, + error_type: Optional[str] = None, + metadata: Optional[dict[str, str]] = None, ) -> None ``` @@ -409,17 +411,19 @@ emit_outcome( | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `status` | `str` | Required | Outcome status ("success", "partial", "failed") | +| `status` | `str` | Required | Outcome status: `"success"`, `"partial"`, `"failed"`, `"timeout"`, `"canceled"`, `"abandoned"` | | `value_type` | `str` | `None` | Type of business value achieved | | `value_amount` | `float` | `None` | Quantified value amount | | `confidence` | `float` | `None` | Confidence score (0.0-1.0) | | `reason` | `str` | `None` | Reason for the outcome | +| `error_type` | `str` | `None` | Error classification (e.g. `"TimeoutError"`) | +| `metadata` | `dict[str, str]` | `None` | Additional key-value metadata | #### Example ```python -emit_outcome("success", value_type="tickets_resolved", value_amount=1) -emit_outcome("failed", reason="rate_limit_exceeded") +emit_outcome("success", value_type="items_processed", value_amount=1) +emit_outcome("failed", error_type="TimeoutError", reason="LLM took >30s") ``` --- @@ -463,14 +467,14 @@ from botanu import get_run_id run_id = get_run_id() ``` -### get_use_case() +### get_workflow() -Get the current use case from baggage. +Get the current workflow name from baggage. ```python -from botanu import get_use_case +from botanu import get_workflow -use_case = get_use_case() +workflow = get_workflow() ``` ### get_baggage() diff --git a/docs/concepts/architecture.md b/docs/concepts/architecture.md index 2d87ccb..5c4e366 100644 --- a/docs/concepts/architecture.md +++ b/docs/concepts/architecture.md @@ -30,56 +30,6 @@ Heavy operations happen in the OTel Collector: - Cardinality management - Aggregation and sampling -## Component Overview - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Your Application │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ -│ │ @botanu_use_ │ │ track_llm_ │ │ track_db_ │ │ -│ │ case() │ │ call() │ │ operation() │ │ -│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ -│ │ │ │ │ -│ └──────────────────────┼──────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌───────────────────────────────────────────────────────────────────────┐ │ -│ │ Botanu SDK Core │ │ -│ ├───────────────────────────────────────────────────────────────────────┤ │ -│ │ RunContext │ RunContextEnricher │ BotanuConfig │ │ -│ │ - generate_run_id() │ - on_start() │ - service_name │ │ -│ │ - to_baggage_dict() │ - reads baggage │ - otlp_endpoint │ │ -│ │ - to_span_attrs() │ - writes to spans │ - propagation_mode │ │ -│ └───────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌───────────────────────────────────────────────────────────────────────┐ │ -│ │ OpenTelemetry SDK │ │ -│ │ TracerProvider → BatchSpanProcessor → OTLPSpanExporter │ │ -│ └───────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - │ OTLP (HTTP or gRPC) - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ OpenTelemetry Collector │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ receivers: │ -│ otlp: │ -│ │ -│ processors: │ -│ transform: # Normalize vendor names │ -│ redaction: # Remove PII from gen_ai.content.* │ -│ attributes: # Cardinality limits │ -│ botanu/cost: # Calculate $ from tokens │ -│ │ -│ exporters: │ -│ clickhouse: # Or your preferred backend │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` ## SDK Components @@ -106,8 +56,9 @@ Holds run metadata and provides serialization: class RunContext: run_id: str root_run_id: str - use_case: str - workflow: Optional[str] + workflow: str + event_id: str + customer_id: str attempt: int # ... ``` @@ -139,8 +90,8 @@ Context managers for manual instrumentation: ### 1. Run Initiation ```python -@botanu_use_case("Customer Support") -def handle_ticket(): +@botanu_workflow("process", event_id="evt-001", customer_id="cust-42") +def do_work(): pass ``` @@ -241,8 +192,8 @@ from opentelemetry.instrumentation.requests import RequestsInstrumentor RequestsInstrumentor().instrument() # Add Botanu -from botanu import init_botanu -init_botanu(service_name="my-service") +from botanu import enable +enable(service_name="my-service") # Both work together - requests are instrumented AND get run_id ``` diff --git a/docs/concepts/context-propagation.md b/docs/concepts/context-propagation.md index 80bf319..9bea134 100644 --- a/docs/concepts/context-propagation.md +++ b/docs/concepts/context-propagation.md @@ -1,18 +1,18 @@ # Context Propagation -Context propagation ensures that the `run_id` and other metadata flow through your entire application — across function calls, HTTP requests, message queues, and async workers. +Context propagation ensures that the `run_id` and other metadata flow through your entire application -- across function calls, HTTP requests, message queues, and async workers. ## How It Works Botanu uses **W3C Baggage** for context propagation, the same standard used by OpenTelemetry for distributed tracing. ``` -┌─────────────────────────────────────────────────────────────────┐ -│ HTTP Request Headers │ -├─────────────────────────────────────────────────────────────────┤ -│ traceparent: 00-{trace_id}-{span_id}-01 │ -│ baggage: botanu.run_id=019abc12...,botanu.use_case=Support │ -└─────────────────────────────────────────────────────────────────┘ ++-----------------------------------------------------------------+ +| HTTP Request Headers | ++-----------------------------------------------------------------+ +| traceparent: 00-{trace_id}-{span_id}-01 | +| baggage: botanu.run_id=019abc12...,botanu.workflow=process | ++-----------------------------------------------------------------+ ``` When you make an outbound HTTP request, the `botanu.run_id` travels in the `baggage` header alongside the trace context. @@ -23,22 +23,26 @@ When you make an outbound HTTP request, the `botanu.run_id` travels in the `bagg Only propagates essential fields to minimize header size: - `botanu.run_id` -- `botanu.use_case` +- `botanu.workflow` +- `botanu.event_id` +- `botanu.customer_id` ```python -# Lean mode baggage (~100 bytes) -baggage: botanu.run_id=019abc12-def3-7890-abcd-1234567890ab,botanu.use_case=Customer%20Support +# Lean mode baggage (~120 bytes) +baggage: botanu.run_id=019abc12-def3-7890-abcd-1234567890ab,botanu.workflow=process,botanu.event_id=evt-001,botanu.customer_id=cust-456 ``` ### Full Mode -Propagates all context fields: -- `botanu.run_id` -- `botanu.use_case` -- `botanu.workflow` +Propagates all context fields. In addition to the lean fields, full mode adds: - `botanu.environment` - `botanu.tenant_id` - `botanu.parent_run_id` +- `botanu.root_run_id` +- `botanu.attempt` +- `botanu.retry_of_run_id` +- `botanu.deadline` +- `botanu.cancelled` ```python # Enable full mode @@ -51,15 +55,15 @@ os.environ["BOTANU_PROPAGATION_MODE"] = "full" Within a single process, context is propagated via Python's `contextvars`: ```python -from botanu import botanu_use_case +from botanu import botanu_workflow -@botanu_use_case("Customer Support") -def handle_ticket(ticket_id: str): +@botanu_workflow("process", event_id="evt-001", customer_id="cust-456") +def do_work(): # Context is set here - fetch_context(ticket_id) # Inherits context - call_llm() # Inherits context - save_result() # Inherits context + do_something() # Inherits context + do_more_work() # Inherits context + save_result() # Inherits context ``` The `RunContextEnricher` span processor automatically reads baggage and writes to span attributes: @@ -67,13 +71,13 @@ The `RunContextEnricher` span processor automatically reads baggage and writes t ```python class RunContextEnricher(SpanProcessor): def on_start(self, span, parent_context): - for key in ["botanu.run_id", "botanu.use_case"]: + for key in ["botanu.run_id", "botanu.workflow"]: value = baggage.get_baggage(key, parent_context) if value: span.set_attribute(key, value) ``` -This ensures **every span** — including auto-instrumented ones — gets the `run_id`. +This ensures **every span** -- including auto-instrumented ones -- gets the `run_id`. ## HTTP Propagation @@ -84,8 +88,8 @@ When using instrumented HTTP clients (`requests`, `httpx`, `urllib3`), baggage i ```python import requests -@botanu_use_case("Fetch Data") -def fetch_data(): +@botanu_workflow("process", event_id="evt-001", customer_id="cust-456") +def do_work(): # Baggage is automatically added to headers response = requests.get("https://api.example.com/data") ``` @@ -101,8 +105,8 @@ from botanu.sdk.middleware import BotanuMiddleware app = FastAPI() app.add_middleware(BotanuMiddleware) -@app.post("/tickets") -def create_ticket(request: Request): +@app.post("/tasks") +def process(request: Request): # RunContext is extracted from incoming baggage # or created if not present pass @@ -115,15 +119,18 @@ For async messaging systems, you need to manually inject and extract context. ### Injecting Context (Producer) ```python -from botanu.sdk.context import get_current_run_context +from botanu.sdk.context import get_run_id, get_baggage def publish_message(queue, payload): - ctx = get_current_run_context() + run_id = get_run_id() message = { "payload": payload, "metadata": { - "baggage": ctx.to_baggage_dict() if ctx else {} + "run_id": run_id, + "workflow": get_baggage("botanu.workflow"), + "event_id": get_baggage("botanu.event_id"), + "customer_id": get_baggage("botanu.customer_id"), } } queue.publish(message) @@ -133,29 +140,38 @@ def publish_message(queue, payload): ```python from botanu.models.run_context import RunContext +from botanu import run_botanu def process_message(message): - baggage = message.get("metadata", {}).get("baggage", {}) + baggage = message.get("metadata", {}) ctx = RunContext.from_baggage(baggage) if ctx: - # Continue with existing context - with ctx.as_current(): - handle_message(message["payload"]) + # Continue with existing context using context manager + with run_botanu( + ctx.workflow, + event_id=ctx.event_id, + customer_id=ctx.customer_id, + ): + do_work(message["payload"]) else: # Create new context - with RunContext.create(use_case="Message Processing").as_current(): - handle_message(message["payload"]) + with run_botanu( + "process_message", + event_id="evt-fallback", + customer_id="unknown", + ): + do_work(message["payload"]) ``` ## Cross-Service Propagation ``` -┌──────────────┐ HTTP ┌──────────────┐ Kafka ┌──────────────┐ -│ Service A │ ────────────► │ Service B │ ────────────► │ Service C │ -│ │ baggage: │ │ message │ │ -│ run_id=X │ run_id=X │ run_id=X │ run_id=X │ run_id=X │ -└──────────────┘ └──────────────┘ └──────────────┘ ++--------------+ HTTP +--------------+ Kafka +--------------+ +| Service A | ------------> | Service B | ------------> | Service C | +| | baggage: | | message | | +| run_id=X | run_id=X | run_id=X | run_id=X | run_id=X | ++--------------+ +--------------+ +--------------+ ``` The same `run_id` flows through all services, enabling: @@ -169,19 +185,19 @@ W3C Baggage has practical size limits. The SDK uses lean mode by default to stay | Mode | Typical Size | Recommendation | |------|--------------|----------------| -| Lean | ~100 bytes | Use for most cases | -| Full | ~300 bytes | Use when you need all context downstream | +| Lean | ~120 bytes | Use for most cases | +| Full | ~350 bytes | Use when you need all context downstream | ## Propagation and Auto-Instrumentation The SDK works seamlessly with OTel auto-instrumentation: ```python -from botanu import init_botanu +from botanu import enable -init_botanu( +enable( service_name="my-service", - auto_instrument=True, # Enable auto-instrumentation + auto_instrumentation=True, # Enable auto-instrumentation ) ``` @@ -201,8 +217,8 @@ from botanu.sdk.context import get_baggage, get_run_id run_id = get_run_id() print(f"Current run_id: {run_id}") -use_case = get_baggage("botanu.use_case") -print(f"Current use_case: {use_case}") +workflow = get_baggage("botanu.workflow") +print(f"Current workflow: {workflow}") ``` ### Verify Header Propagation @@ -224,13 +240,13 @@ def debug_request(): ### Context Not Propagating -1. **Missing initialization**: Ensure `init_botanu()` is called at startup +1. **Missing initialization**: Ensure `enable()` is called at startup 2. **Missing middleware**: Add `BotanuMiddleware` to your web framework 3. **Async context loss**: Use `contextvars`-aware async patterns ### Duplicate run_ids -1. **Multiple decorators**: Only use `@botanu_use_case` at the entry point +1. **Multiple decorators**: Only use `@botanu_workflow` at the entry point 2. **Middleware + decorator**: Choose one, not both ## See Also diff --git a/docs/concepts/run-context.md b/docs/concepts/run-context.md index 436be03..8e752d1 100644 --- a/docs/concepts/run-context.md +++ b/docs/concepts/run-context.md @@ -1,26 +1,29 @@ # Run Context -The Run Context is the core concept in Botanu SDK. It represents a single business transaction or workflow execution that you want to track for cost attribution. +The Run Context is the core concept in Botanu SDK. It represents a single execution attempt of a business event that you want to track for cost attribution. -## What is a Run? +## Events and Runs -A **run** is a logical unit of work that produces a business outcome. Examples: +An **event** is one business transaction -- a logical unit of work that produces a business outcome. Examples: -- Resolving a customer support ticket -- Processing a document -- Generating a report -- Handling a chatbot conversation +- Processing an incoming request +- Handling a scheduled job +- Executing a pipeline step +- Responding to a webhook + +A **run** is one execution attempt within an event. Each retry of the same event gets a new `run_id` but shares the same `event_id`. A single run may involve: -A single run may involve: - Multiple LLM calls (possibly to different providers) - Database queries - Storage operations - External API calls - Message queue operations +An event will have an **outcome** -- the business result of the work (success, failure, partial, etc.). + ## The run_id -Every run is identified by a unique `run_id` — a UUIDv7 that is: +Every run is identified by a unique `run_id` -- a UUIDv7 that is: - **Time-sortable**: IDs generated later sort after earlier ones - **Globally unique**: No collisions across services @@ -41,8 +44,9 @@ The `RunContext` dataclass holds all metadata for a run: from botanu.models.run_context import RunContext ctx = RunContext.create( - use_case="Customer Support", - workflow="handle_ticket", + workflow="process", + event_id="evt-001", + customer_id="cust-456", environment="production", tenant_id="tenant-123", ) @@ -58,8 +62,9 @@ print(ctx.attempt) # 1 (first attempt) |-------|-------------| | `run_id` | Unique identifier for this run (UUIDv7) | | `root_run_id` | ID of the original run (for retries, same as `run_id` for first attempt) | -| `use_case` | Business use case name (e.g., "Customer Support") | -| `workflow` | Optional workflow/function name | +| `event_id` | Identifier for the business event (same across retries) | +| `customer_id` | Identifier for the customer this event belongs to | +| `workflow` | Workflow/function name | | `environment` | Deployment environment (production, staging, etc.) | | `attempt` | Attempt number (1 for first, 2+ for retries) | | `tenant_id` | Optional tenant identifier for multi-tenant systems | @@ -69,23 +74,45 @@ print(ctx.attempt) # 1 (first attempt) ### Using the Decorator (Recommended) ```python -from botanu import botanu_use_case +from botanu import botanu_workflow -@botanu_use_case("Customer Support") -def handle_ticket(ticket_id: str): +@botanu_workflow("process", event_id="evt-001", customer_id="cust-456") +def do_work(): # RunContext is automatically created and propagated # All operations inside inherit the same run_id pass ``` +The `workflow` alias also works: + +```python +from botanu import workflow + +@workflow("process", event_id="evt-001", customer_id="cust-456") +def do_work(): + pass +``` + +### Using the Context Manager + +```python +from botanu import run_botanu + +def do_work(): + with run_botanu("process", event_id="evt-001", customer_id="cust-456"): + # RunContext is active within this block + pass +``` + ### Manual Creation ```python from botanu.models.run_context import RunContext ctx = RunContext.create( - use_case="Document Processing", - workflow="extract_entities", + workflow="process", + event_id="evt-001", + customer_id="cust-456", tenant_id="acme-corp", ) @@ -98,20 +125,24 @@ ctx = RunContext.create( When a run fails and is retried, use `create_retry()` to maintain lineage: ```python -original = RunContext.create(use_case="Process Order") +previous = RunContext.create( + workflow="process", + event_id="evt-001", + customer_id="cust-456", +) # First attempt fails... -retry = RunContext.create_retry(original) +retry = RunContext.create_retry(previous) print(retry.attempt) # 2 -print(retry.retry_of_run_id) # Original run_id -print(retry.root_run_id) # Same as original.run_id +print(retry.retry_of_run_id) # Previous run_id +print(retry.root_run_id) # Same as previous.run_id print(retry.run_id) # New unique ID ``` This enables: -- Tracking total attempts for a business operation -- Correlating retries back to the original request +- Tracking total attempts for a business event +- Correlating retries back to the previous request - Calculating aggregate cost across all attempts ## Deadlines and Cancellation @@ -120,7 +151,9 @@ RunContext supports deadline and cancellation tracking: ```python ctx = RunContext.create( - use_case="Long Running Task", + workflow="process", + event_id="evt-001", + customer_id="cust-456", deadline_seconds=30.0, # 30 second deadline ) @@ -138,25 +171,47 @@ if ctx.is_cancelled(): pass ``` +## Outcomes + +Record the business outcome of a run using `emit_outcome`: + +```python +from botanu import emit_outcome +from botanu.models.run_context import RunStatus + +emit_outcome( + RunStatus.SUCCESS, + value_type="task_completed", + value_amount=1.0, + confidence=0.95, + reason="Completed successfully", +) +``` + +`RunStatus` values: `SUCCESS`, `FAILURE`, `PARTIAL`, `TIMEOUT`, `CANCELED`. + +`emit_outcome` accepts these keyword arguments: `value_type`, `value_amount`, `confidence`, `reason`, `error_type`, `metadata`. + ## Serialization ### To Baggage (for HTTP propagation) ```python -# Lean mode (default): only run_id and use_case +# Lean mode (default): essential fields baggage = ctx.to_baggage_dict() -# {"botanu.run_id": "...", "botanu.use_case": "..."} +# {"botanu.run_id": "...", "botanu.workflow": "...", "botanu.event_id": "...", "botanu.customer_id": "..."} # Full mode: all fields baggage = ctx.to_baggage_dict(lean_mode=False) -# Includes workflow, environment, tenant_id, etc. +# Adds: botanu.environment, botanu.tenant_id, botanu.parent_run_id, botanu.root_run_id, +# botanu.attempt, botanu.retry_of_run_id, botanu.deadline, botanu.cancelled ``` ### To Span Attributes ```python attrs = ctx.to_span_attributes() -# {"botanu.run_id": "...", "botanu.use_case": "...", ...} +# {"botanu.run_id": "...", "botanu.workflow": "...", ...} ``` ### From Baggage (receiving side) @@ -165,7 +220,7 @@ attrs = ctx.to_span_attributes() ctx = RunContext.from_baggage(baggage_dict) if ctx is None: # Required fields missing, create new context - ctx = RunContext.create(use_case="Unknown") + ctx = RunContext.create(workflow="unknown", event_id="evt-fallback", customer_id="unknown") ``` ## Environment Variables @@ -177,10 +232,11 @@ if ctx is None: ## Best Practices -1. **One run per business outcome**: Don't create runs for internal operations -2. **Use descriptive use_case names**: They appear in dashboards and queries +1. **One event per business outcome**: Don't create events for internal operations +2. **Use descriptive workflow names**: They appear in dashboards and queries 3. **Leverage tenant_id**: Essential for multi-tenant cost attribution 4. **Handle retries properly**: Always use `create_retry()` for retry attempts +5. **Always provide event_id and customer_id**: They are required for proper cost attribution ## See Also diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 48c8c1d..cd1e4bd 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -78,8 +78,9 @@ class BotanuConfig: # Span export max_export_batch_size: int = 512 - max_queue_size: int = 2048 + max_queue_size: int = 65536 schedule_delay_millis: int = 5000 + export_timeout_millis: int = 30000 # Propagation mode propagation_mode: str = "lean" # BOTANU_PROPAGATION_MODE @@ -180,7 +181,9 @@ config = BotanuConfig() Propagates only essential fields to minimize header size: - `botanu.run_id` -- `botanu.use_case` +- `botanu.workflow` +- `botanu.event_id` +- `botanu.customer_id` Best for high-traffic systems where header size matters. @@ -189,8 +192,9 @@ Best for high-traffic systems where header size matters. Propagates all context fields: - `botanu.run_id` -- `botanu.use_case` - `botanu.workflow` +- `botanu.event_id` +- `botanu.customer_id` - `botanu.environment` - `botanu.tenant_id` - `botanu.parent_run_id` @@ -201,12 +205,6 @@ Enable with: export BOTANU_PROPAGATION_MODE=full ``` -Or: - -```python -enable(service_name="my-service", propagation_mode="full") -``` - ## Auto-Instrumentation ### Default Packages @@ -234,22 +232,20 @@ By default, Botanu enables instrumentation for: ### Customizing Packages +Override the default list via `BotanuConfig`: + ```python from botanu import enable +from botanu.sdk.config import BotanuConfig -enable( - service_name="my-service", - auto_instrument_packages=["requests", "fastapi", "openai_v2"], -) +config = BotanuConfig(auto_instrument_packages=["requests", "fastapi", "openai_v2"]) +enable(config=config) ``` ### Disabling Auto-Instrumentation ```python -enable( - service_name="my-service", - auto_instrument_packages=[], # Empty list disables -) +enable(auto_instrumentation=False) ``` ## Exporting Configuration diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 3acbf97..dad5081 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -1,6 +1,6 @@ # Quickstart -Get run-level cost attribution working in 5 minutes. +Get event-level cost attribution working in 5 minutes. ## Prerequisites @@ -41,33 +41,35 @@ Call `enable()` once at application startup. It reads configuration from environ ## Step 4: Define Entry Point ```python -from botanu import botanu_use_case +from botanu import botanu_workflow -@botanu_use_case(name="Customer Support") -async def handle_ticket(ticket_id: str): - data = await db.query(ticket_id) +@botanu_workflow("my-workflow", event_id="evt-001", customer_id="cust-42") +async def do_work(): + data = await db.query(...) result = await llm.complete(data) return result ``` -All LLM calls, database queries, and HTTP requests inside the function are automatically tracked with the same `run_id`. +All LLM calls, database queries, and HTTP requests inside the function are automatically tracked with the same `run_id` tied to the `event_id`. ## Complete Example **Entry service** (`entry/app.py`): ```python -from botanu import enable, botanu_use_case +from botanu import enable, botanu_workflow, emit_outcome enable() -@botanu_use_case(name="Customer Support") -async def handle_ticket(ticket_id: str): - data = await db.query(ticket_id) - result = await openai.chat.completions.create( - model="gpt-4", - messages=[{"role": "user", "content": data}] - ) +@botanu_workflow( + "my-workflow", + event_id=lambda req: req.event_id, + customer_id=lambda req: req.customer_id, +) +async def handle_request(req): + data = await fetch_data(req) + result = await process(data) + emit_outcome("success") return result ``` @@ -84,12 +86,14 @@ enable() # propagates run_id from incoming request — no decorator needed | Attribute | Example | Description | |-----------|---------|-------------| | `botanu.run_id` | `019abc12-...` | Unique run identifier (UUIDv7) | -| `botanu.use_case` | `Customer Support` | Business use case | +| `botanu.workflow` | `my-workflow` | Workflow name | +| `botanu.event_id` | `evt-001` | Business event identifier | +| `botanu.customer_id` | `cust-42` | Customer identifier | | `gen_ai.usage.input_tokens` | `150` | LLM input tokens | | `gen_ai.usage.output_tokens` | `200` | LLM output tokens | | `db.system` | `postgresql` | Database system | -All spans across all services share the same `run_id`, enabling cost-per-transaction analytics. +All spans across all services share the same `run_id`, enabling cost-per-event analytics. ## Next Steps diff --git a/docs/index.md b/docs/index.md index 1f77d25..ad62bb1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,65 +1,71 @@ # Botanu SDK Documentation -Botanu SDK provides OpenTelemetry-native run-level cost attribution for AI workflows. +Botanu SDK provides OpenTelemetry-native event-level cost attribution for AI +workflows. ## Overview -Traditional observability tools trace individual requests. But AI workflows are different — a single business outcome (resolving a support ticket, processing an order) might span multiple LLM calls, retries, tool executions, and data operations across different vendors. +Traditional observability tools trace individual requests. But AI workflows are +different — a single business event (resolving a support ticket, processing an +order) might involve multiple runs spanning LLM calls, retries, tool executions, +and data operations across different services and vendors. -Botanu introduces **run-level attribution**: a unique `run_id` that follows your entire workflow, enabling you to answer "How much did this outcome cost?" +Botanu introduces **event-level attribution**: a stable `event_id` that follows +your entire business transaction, enabling you to answer "How much did this +event cost?" and "What was the outcome?" ## Documentation ### Getting Started -- [Installation](getting-started/installation.md) - Install and configure the SDK -- [Quick Start](getting-started/quickstart.md) - Get up and running in 5 minutes -- [Configuration](getting-started/configuration.md) - Configuration options and environment variables +- [Installation](getting-started/installation.md) — Install and configure the SDK +- [Quick Start](getting-started/quickstart.md) — Get up and running in 5 minutes +- [Configuration](getting-started/configuration.md) — Environment variables and options ### Core Concepts -- [Run Context](concepts/run-context.md) - Understanding `run_id` and context propagation -- [Context Propagation](concepts/context-propagation.md) - How context flows through your application -- [Architecture](concepts/architecture.md) - SDK design and component overview +- [Run Context](concepts/run-context.md) — Events, runs, and context propagation +- [Context Propagation](concepts/context-propagation.md) — How context flows across services +- [Architecture](concepts/architecture.md) — SDK design and component overview ### Tracking -- [LLM Tracking](tracking/llm-tracking.md) - Track AI model calls and token usage -- [Data Tracking](tracking/data-tracking.md) - Track database, storage, and messaging operations -- [Outcomes](tracking/outcomes.md) - Record business outcomes for ROI calculation +- [LLM Tracking](tracking/llm-tracking.md) — Track AI model calls and token usage +- [Data Tracking](tracking/data-tracking.md) — Track database, storage, and messaging operations +- [Outcomes](tracking/outcomes.md) — Record business outcomes for ROI calculation ### Integration -- [Auto-Instrumentation](integration/auto-instrumentation.md) - Automatic instrumentation for common libraries -- [Kubernetes Deployment](integration/kubernetes.md) - Zero-code instrumentation at scale -- [Existing OTel Setup](integration/existing-otel.md) - Integrate with existing OpenTelemetry deployments -- [Collector Configuration](integration/collector.md) - Configure the OpenTelemetry Collector +- [Auto-Instrumentation](integration/auto-instrumentation.md) — Supported libraries and frameworks +- [Kubernetes Deployment](integration/kubernetes.md) — Zero-code instrumentation at scale +- [Existing OTel Setup](integration/existing-otel.md) — Integrate with existing OpenTelemetry deployments +- [Collector Configuration](integration/collector.md) — Configure the OpenTelemetry Collector ### Patterns -- [Best Practices](patterns/best-practices.md) - Recommended patterns for production use -- [Anti-Patterns](patterns/anti-patterns.md) - Common mistakes to avoid +- [Best Practices](patterns/best-practices.md) — Recommended patterns for production use +- [Anti-Patterns](patterns/anti-patterns.md) — Common mistakes to avoid ### API Reference -- [Decorators](api/decorators.md) - `@botanu_use_case` and related decorators -- [Tracking API](api/tracking.md) - Manual tracking context managers -- [Configuration API](api/configuration.md) - `BotanuConfig` and initialization +- [Decorators](api/decorators.md) — `@botanu_workflow` and related decorators +- [Tracking API](api/tracking.md) — Manual tracking context managers +- [Configuration API](api/configuration.md) — `BotanuConfig` and initialization ## Quick Example ```python -from botanu import enable, botanu_use_case +from botanu import enable, botanu_workflow, emit_outcome -enable(service_name="my-service") +enable() -@botanu_use_case(name="my_workflow") -def my_function(): - data = db.query(...) - result = llm.complete(...) +@botanu_workflow("my-workflow", event_id="evt-001", customer_id="cust-42") +async def do_work(): + result = await do_something() + emit_outcome("success") return result ``` ## License -Apache License 2.0. See [LICENSE](https://github.com/botanu-ai/botanu-sdk-python/blob/main/LICENSE). +[Apache License 2.0](https://github.com/botanu-ai/botanu-sdk-python/blob/main/LICENSE) diff --git a/docs/integration/existing-otel.md b/docs/integration/existing-otel.md index a008cdb..539b845 100644 --- a/docs/integration/existing-otel.md +++ b/docs/integration/existing-otel.md @@ -179,10 +179,10 @@ class RunContextEnricher(SpanProcessor): if run_id: span.set_attribute("botanu.run_id", run_id) - # Read use_case from baggage - use_case = baggage.get_baggage("botanu.use_case", parent_context) - if use_case: - span.set_attribute("botanu.use_case", use_case) + # Read workflow from baggage + workflow = baggage.get_baggage("botanu.workflow", parent_context) + if workflow: + span.set_attribute("botanu.workflow", workflow) ``` This means: @@ -195,16 +195,15 @@ This means: With the enricher in place, use Botanu decorators: ```python -from botanu import botanu_use_case, emit_outcome +from botanu import botanu_workflow, emit_outcome -@botanu_use_case("Customer Support") -async def handle_ticket(ticket_id: str): +@botanu_workflow("do_work", event_id=event_id, customer_id=customer_id) +async def do_work(event_id: str, customer_id: str): # All spans created here (by any instrumentation) get run_id - context = requests.get(f"/api/tickets/{ticket_id}") - response = await openai_call(context) - await database.save(response) + data = do_something() + result = await process(data) - emit_outcome("success", value_type="tickets_resolved", value_amount=1) + emit_outcome("success") ``` ## Without Botanu Bootstrap @@ -233,7 +232,7 @@ Check that run_id appears on spans: ```python from opentelemetry import trace, baggage, context -# Set baggage (normally done by @botanu_use_case) +# Set baggage (normally done by @botanu_workflow) ctx = baggage.set_baggage("botanu.run_id", "test-123") token = context.attach(ctx) @@ -276,7 +275,7 @@ However, `RunContextEnricher` uses `on_start()`, so it runs before export regard print(baggage.get_baggage("botanu.run_id")) ``` -3. Ensure `@botanu_use_case` is used at entry points +3. Ensure `@botanu_workflow` is used at entry points ### Baggage Not Propagating diff --git a/docs/integration/kubernetes.md b/docs/integration/kubernetes.md index c71cf4e..765c736 100644 --- a/docs/integration/kubernetes.md +++ b/docs/integration/kubernetes.md @@ -10,7 +10,7 @@ For organizations with thousands of applications, modifying code in every repo i | Service Type | Code Change | Config Change | |--------------|-------------|---------------| -| **Entry point** | `@botanu_use_case` decorator (generates `run_id`) | K8s annotation | +| **Entry point** | `@botanu_workflow` decorator (generates `run_id`) | K8s annotation | | **Intermediate services** | None | K8s annotation only | **Entry point** = The service where the business transaction starts (API gateway, webhook handler, queue consumer). @@ -38,7 +38,8 @@ With zero-code instrumentation, the following are automatically traced: │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │ App A │ │ App B │ │ App C │ │ │ │ (entry) │ │ (no change) │ │ (no change) │ │ -│ │ @use_case │ │ │ │ │ │ +│ │ @botanu_ │ │ │ │ │ │ +│ │ workflow │ │ │ │ │ │ │ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ │ │ │ │ │ │ │ OTel auto-injected via Operator │ @@ -246,24 +247,23 @@ spec: ## Entry Point Service (Code Change Required) -The entry point service is the **only** service that needs a code change. It must use `@botanu_use_case` to generate the `run_id`: +The entry point service is the **only** service that needs a code change. It must use `@botanu_workflow` to generate the `run_id`: ```python -from botanu import enable, botanu_use_case +from botanu import enable, botanu_workflow enable(service_name="entry-service") -@botanu_use_case(name="my_workflow") -def my_function(): - data = db.query(...) - result = llm.complete(...) - downstream_service.call(result) +@botanu_workflow("do_work", event_id=event_id, customer_id=customer_id) +def do_work(event_id: str, customer_id: str): + data = do_something() + result = process(data) return result ``` -The `@botanu_use_case` decorator generates a `run_id` and propagates it via W3C Baggage to all downstream calls. +The `@botanu_workflow` decorator generates a `run_id` and propagates it via W3C Baggage to all downstream calls. -**Downstream services (B, C, D, etc.) need zero code changes** — they just need the K8s annotation. +**Downstream services (B, C, D, etc.) need zero code changes** -- they just need the K8s annotation. ## Helm Chart @@ -351,7 +351,7 @@ For 2000+ applications: 2. **Phase 2**: Install OTel Operator 3. **Phase 3**: Create Instrumentation resource 4. **Phase 4**: Add annotations via GitOps (batch by team/namespace) -5. **Phase 5**: Instrument entry points with `@botanu_use_case` +5. **Phase 5**: Instrument entry points with `@botanu_workflow` Each phase is independent. Annotations can be rolled out gradually. diff --git a/docs/patterns/anti-patterns.md b/docs/patterns/anti-patterns.md index 1e09f23..426e796 100644 --- a/docs/patterns/anti-patterns.md +++ b/docs/patterns/anti-patterns.md @@ -10,78 +10,78 @@ Common mistakes to avoid when using Botanu SDK. ```python # BAD - Too many runs -@botanu_use_case("Fetch Context") # Don't do this -async def fetch_context(ticket_id): +@botanu_workflow("fetch_data", event_id=event_id, customer_id=customer_id) # Don't do this +async def fetch_data(event_id, customer_id): return await db.query(...) -@botanu_use_case("Generate Response") # Or this -async def generate_response(context): +@botanu_workflow("do_work", event_id=event_id, customer_id=customer_id) # Or this +async def do_work(event_id, customer_id): return await llm.complete(...) -@botanu_use_case("Customer Support") -async def handle_ticket(ticket_id): - context = await fetch_context(ticket_id) - response = await generate_response(context) - return response +@botanu_workflow("handle_request", event_id=event_id, customer_id=customer_id) +async def handle_request(event_id, customer_id): + data = await fetch_data(event_id, customer_id) + result = await do_work(event_id, customer_id) + return result ``` **Do** use a single run at the entry point: ```python # GOOD - One run for the business outcome -@botanu_use_case("Customer Support") -async def handle_ticket(ticket_id): - context = await fetch_context(ticket_id) # Not decorated - response = await generate_response(context) # Not decorated - emit_outcome("success", value_type="tickets_resolved", value_amount=1) - return response +@botanu_workflow("handle_request", event_id=event_id, customer_id=customer_id) +async def handle_request(event_id: str, customer_id: str): + data = await fetch_data(event_id) # Not decorated + result = await do_work(data) # Not decorated + emit_outcome("success", value_type="requests_processed", value_amount=1) + return result ``` -### Nesting @botanu_use_case Decorators +### Nesting @botanu_workflow Decorators -**Don't** nest use case decorators: +**Don't** nest workflow decorators: ```python # BAD - Nested runs create confusion -@botanu_use_case("Outer") +@botanu_workflow("outer", event_id=event_id, customer_id=customer_id) async def outer(): await inner() # Creates a second run -@botanu_use_case("Inner") # Don't do this +@botanu_workflow("inner", event_id=event_id, customer_id=customer_id) # Don't do this async def inner(): ... ``` -**Do** use @botanu_use_case only at entry points: +**Do** use @botanu_workflow only at entry points: ```python # GOOD - Only entry point is decorated -@botanu_use_case("Main Workflow") -async def main(): +@botanu_workflow("main_flow", event_id=event_id, customer_id=customer_id) +async def main_flow(): await step_one() # No decorator await step_two() # No decorator ``` -### Generic Use Case Names +### Generic Workflow Names **Don't** use vague names: ```python # BAD - Meaningless in dashboards -@botanu_use_case("Process") -@botanu_use_case("Handle") -@botanu_use_case("Main") -@botanu_use_case("DoWork") +@botanu_workflow("process", event_id=event_id, customer_id=customer_id) +@botanu_workflow("handle", event_id=event_id, customer_id=customer_id) +@botanu_workflow("main", event_id=event_id, customer_id=customer_id) +@botanu_workflow("do_work", event_id=event_id, customer_id=customer_id) ``` **Do** use descriptive business names: ```python # GOOD - Clear in reports -@botanu_use_case("Customer Support") -@botanu_use_case("Invoice Processing") -@botanu_use_case("Lead Qualification") -@botanu_use_case("Document Analysis") +@botanu_workflow("support_resolution", event_id=event_id, customer_id=customer_id) +@botanu_workflow("invoice_processing", event_id=event_id, customer_id=customer_id) +@botanu_workflow("lead_scoring", event_id=event_id, customer_id=customer_id) +@botanu_workflow("document_analysis", event_id=event_id, customer_id=customer_id) ``` ## Outcome Anti-Patterns @@ -92,8 +92,8 @@ async def main(): ```python # BAD - No outcome recorded -@botanu_use_case("Process Order") -async def process_order(order_id): +@botanu_workflow("process_order", event_id=order_id, customer_id=customer_id) +async def process_order(order_id, customer_id): result = await process(order_id) return result # Where's the outcome? ``` @@ -102,8 +102,8 @@ async def process_order(order_id): ```python # GOOD - Explicit outcome -@botanu_use_case("Process Order") -async def process_order(order_id): +@botanu_workflow("process_order", event_id=order_id, customer_id=customer_id) +async def process_order(order_id, customer_id): try: result = await process(order_id) emit_outcome("success", value_type="orders_processed", value_amount=1) @@ -119,7 +119,7 @@ async def process_order(order_id): ```python # BAD - Multiple outcomes are confusing -@botanu_use_case("Batch Processing") +@botanu_workflow("batch_processing", event_id=batch_id, customer_id=customer_id) async def process_batch(items): for item in items: await process(item) @@ -130,7 +130,7 @@ async def process_batch(items): ```python # GOOD - One outcome at the end -@botanu_use_case("Batch Processing") +@botanu_workflow("batch_processing", event_id=batch_id, customer_id=customer_id) async def process_batch(items): processed = 0 for item in items: @@ -270,21 +270,15 @@ enable(service_name=os.environ["OTEL_SERVICE_NAME"]) # BAD - Missing automatic tracing enable( service_name="my-service", - auto_instrument_packages=[], # Why? + auto_instrumentation=False, # Why? ) ``` **Do** keep defaults or be selective: ```python -# GOOD - Default instrumentation +# GOOD - Default instrumentation (auto_instrumentation=True by default) enable(service_name="my-service") - -# Or selective -enable( - service_name="my-service", - auto_instrument_packages=["fastapi", "openai_v2", "sqlalchemy"], -) ``` ## Context Propagation Anti-Patterns @@ -295,12 +289,12 @@ enable( ```python # BAD - Context lost -@botanu_use_case("Parallel Processing") -async def process(): +@botanu_workflow("parallel_work", event_id=event_id, customer_id=customer_id) +async def do_parallel_work(): # These tasks don't inherit context await asyncio.gather( - task_one(), - task_two(), + do_something(), + do_something_else(), ) ``` @@ -308,12 +302,12 @@ async def process(): ```python # GOOD - Context flows through asyncio -@botanu_use_case("Parallel Processing") -async def process(): +@botanu_workflow("parallel_work", event_id=event_id, customer_id=customer_id) +async def do_parallel_work(): # asyncio with contextvars works correctly await asyncio.gather( - task_one(), # Inherits context - task_two(), # Inherits context + do_something(), # Inherits context + do_something_else(), # Inherits context ) ``` @@ -325,19 +319,20 @@ async def process(): # BAD - Context not extracted def process_message(message): # run_id from producer is lost - handle_payload(message["payload"]) + do_work(message["payload"]) ``` **Do** extract and use context: ```python # GOOD - Context continues +from botanu.sdk import set_baggage + def process_message(message): baggage = message.get("baggage", {}) - ctx = RunContext.from_baggage(baggage) - if ctx: - with ctx.as_current(): - handle_payload(message["payload"]) + for key, value in baggage.items(): + set_baggage(key, value) + do_work(message["payload"]) ``` ## Data Tracking Anti-Patterns @@ -348,8 +343,8 @@ def process_message(message): ```python # BAD - Only LLM tracked -@botanu_use_case("Analysis") -async def analyze(): +@botanu_workflow("analyze_data", event_id=event_id, customer_id=customer_id) +async def analyze_data(): data = await snowflake.query(expensive_query) # Not tracked! with track_llm_call(...) as tracker: result = await llm.complete(data) @@ -360,8 +355,8 @@ async def analyze(): ```python # GOOD - Complete cost picture -@botanu_use_case("Analysis") -async def analyze(): +@botanu_workflow("analyze_data", event_id=event_id, customer_id=customer_id) +async def analyze_data(): with track_db_operation(system="snowflake", operation="SELECT") as db: data = await snowflake.query(expensive_query) db.set_bytes_scanned(data.bytes_scanned) @@ -426,7 +421,7 @@ with track_llm_call(...) as tracker: ```python # BAD - All items fail if one fails -@botanu_use_case("Batch") +@botanu_workflow("batch_work", event_id=batch_id, customer_id=customer_id) async def process_batch(items): for item in items: await process(item) # If one fails, no outcome @@ -437,7 +432,7 @@ async def process_batch(items): ```python # GOOD - Partial success recorded -@botanu_use_case("Batch") +@botanu_workflow("batch_work", event_id=batch_id, customer_id=customer_id) async def process_batch(items): processed = 0 failed = 0 @@ -467,7 +462,7 @@ async def process_batch(items): # BAD - Tests hit real collector def test_workflow(): enable(service_name="test") # Sends to real endpoint! - await my_workflow() + await do_work() ``` **Do** use NoOp or in-memory exporters: @@ -480,7 +475,7 @@ def setup_test(): trace.set_tracer_provider(NoOpTracerProvider()) def test_workflow(): - await my_workflow() # No external calls + await do_work() # No external calls ``` ## See Also diff --git a/docs/patterns/best-practices.md b/docs/patterns/best-practices.md index 26372d1..ce22b2d 100644 --- a/docs/patterns/best-practices.md +++ b/docs/patterns/best-practices.md @@ -10,53 +10,38 @@ A run should represent a complete business transaction: ```python # GOOD - One run for one business outcome -@botanu_use_case("Customer Support") -async def resolve_ticket(ticket_id: str): - context = await fetch_context(ticket_id) - response = await generate_response(context) - await send_response(ticket_id, response) - emit_outcome("success", value_type="tickets_resolved", value_amount=1) +@botanu_workflow("process_order", event_id=order_id, customer_id=customer_id) +async def process_order(order_id: str, customer_id: str): + data = await fetch_data(order_id) + result = await do_work(data) + emit_outcome("success", value_type="orders_processed", value_amount=1) ``` ```python # BAD - Multiple runs for one outcome -@botanu_use_case("Fetch Context") -async def fetch_context(ticket_id: str): +@botanu_workflow("fetch_data", event_id=event_id, customer_id=customer_id) +async def fetch_data(event_id: str, customer_id: str): ... -@botanu_use_case("Generate Response") # Don't do this -async def generate_response(context): +@botanu_workflow("do_work", event_id=event_id, customer_id=customer_id) # Don't do this +async def do_work(event_id: str, customer_id: str): ... ``` -### Use Descriptive Use Case Names +### Use Descriptive Workflow Names -Use cases appear in dashboards and queries. Choose names carefully: +Workflow names appear in dashboards and queries. Choose names carefully: ```python # GOOD - Clear, descriptive names -@botanu_use_case("Customer Support") -@botanu_use_case("Document Analysis") -@botanu_use_case("Lead Qualification") +@botanu_workflow("support_resolution", event_id=event_id, customer_id=customer_id) +@botanu_workflow("document_analysis", event_id=event_id, customer_id=customer_id) +@botanu_workflow("lead_scoring", event_id=event_id, customer_id=customer_id) # BAD - Generic or technical names -@botanu_use_case("HandleRequest") -@botanu_use_case("Process") -@botanu_use_case("Main") -``` - -### Include Workflow Names - -Workflow names help distinguish different paths within a use case: - -```python -@botanu_use_case("Customer Support", workflow="ticket_resolution") -async def resolve_ticket(): - ... - -@botanu_use_case("Customer Support", workflow="escalation") -async def escalate_ticket(): - ... +@botanu_workflow("handle", event_id=event_id, customer_id=customer_id) +@botanu_workflow("process", event_id=event_id, customer_id=customer_id) +@botanu_workflow("main", event_id=event_id, customer_id=customer_id) ``` ## Outcome Recording @@ -66,8 +51,8 @@ async def escalate_ticket(): Every run should have an explicit outcome: ```python -@botanu_use_case("Data Processing") -async def process_data(data_id: str): +@botanu_workflow("process_data", event_id=data_id, customer_id=customer_id) +async def process_data(data_id: str, customer_id: str): try: result = await process(data_id) emit_outcome("success", value_type="records_processed", value_amount=result.count) @@ -86,7 +71,7 @@ Include value amounts for better ROI analysis: ```python # GOOD - Quantified outcomes -emit_outcome("success", value_type="emails_sent", value_amount=50) +emit_outcome("success", value_type="items_sent", value_amount=50) emit_outcome("success", value_type="revenue_generated", value_amount=1299.99) emit_outcome("success", value_type="documents_processed", value_amount=10) @@ -101,14 +86,14 @@ Standardize your value types across the organization: ```python # Define standard value types class ValueTypes: - TICKETS_RESOLVED = "tickets_resolved" - DOCUMENTS_PROCESSED = "documents_processed" - LEADS_QUALIFIED = "leads_qualified" - EMAILS_SENT = "emails_sent" + ITEMS_PROCESSED = "items_processed" + DOCUMENTS_ANALYZED = "documents_analyzed" + LEADS_SCORED = "leads_scored" + MESSAGES_SENT = "messages_sent" REVENUE_GENERATED = "revenue_generated" # Use consistently -emit_outcome("success", value_type=ValueTypes.TICKETS_RESOLVED, value_amount=1) +emit_outcome("success", value_type=ValueTypes.ITEMS_PROCESSED, value_amount=1) ``` ### Include Reasons for Failures @@ -189,8 +174,8 @@ with track_llm_call(provider="openai", model="text-embedding-3-small", operation Include databases, storage, and messaging: ```python -@botanu_use_case("ETL Pipeline") -async def run_etl(): +@botanu_workflow("run_pipeline", event_id=pipeline_id, customer_id=customer_id) +async def run_pipeline(pipeline_id: str, customer_id: str): # Track warehouse query (billed by bytes scanned) with track_db_operation(system="snowflake", operation="SELECT") as db: db.set_bytes_scanned(result.bytes_scanned) @@ -235,21 +220,26 @@ app.add_middleware(BotanuMiddleware) Inject and extract context manually for async messaging: ```python +from botanu.sdk import set_baggage, get_baggage + # Producer def publish_message(payload): - ctx = get_current_run_context() message = { "payload": payload, - "baggage": ctx.to_baggage_dict() if ctx else {} + "baggage": { + "botanu.workflow": get_baggage("botanu.workflow"), + "botanu.event_id": get_baggage("botanu.event_id"), + "botanu.customer_id": get_baggage("botanu.customer_id"), + } } queue.publish(message) # Consumer def process_message(message): baggage = message.get("baggage", {}) - ctx = RunContext.from_baggage(baggage) - with ctx.as_current(): - handle_payload(message["payload"]) + for key, value in baggage.items(): + set_baggage(key, value) + do_work(message["payload"]) ``` ### Use Lean Mode for High-Traffic Systems @@ -258,10 +248,11 @@ Default lean mode minimizes header overhead: ```python # Lean mode: ~100 bytes of baggage -# Propagates: run_id, use_case +# Propagates: run_id, botanu.workflow # Full mode: ~300 bytes of baggage -# Propagates: run_id, use_case, workflow, environment, tenant_id, parent_run_id +# Propagates: run_id, botanu.workflow, botanu.event_id, botanu.customer_id, +# environment, tenant_id, parent_run_id ``` ## Configuration @@ -300,22 +291,19 @@ propagation: For accurate per-tenant cost attribution: ```python -@botanu_use_case("Customer Support", tenant_id=request.tenant_id) -async def handle_ticket(request): +@botanu_workflow("handle_request", event_id=request_id, customer_id=cust_id, tenant_id=request.tenant_id) +async def handle_request(request): ... ``` ### Use Business Context -Add additional attribution dimensions: +Add additional attribution dimensions via baggage: ```python -set_business_context( - customer_id=request.customer_id, - team="engineering", - cost_center="R&D", - region="us-west-2", -) +set_baggage("team", "engineering") +set_baggage("cost_center", "R&D") +set_baggage("region", "us-west-2") ``` ## Error Handling @@ -338,10 +326,10 @@ with track_llm_call(provider="openai", model="gpt-4") as tracker: Even failed runs should have outcomes: ```python -@botanu_use_case("Data Processing") -async def process(data_id): +@botanu_workflow("process_data", event_id=data_id, customer_id=customer_id) +async def process_data(data_id: str, customer_id: str): try: - await process_data(data_id) + await do_work(data_id) emit_outcome("success", value_type="items_processed", value_amount=1) except ValidationError: emit_outcome("failed", reason="validation_error") @@ -361,7 +349,7 @@ For async applications, ensure tracking is non-blocking: # The SDK uses span events, not separate API calls # This is already non-blocking with track_llm_call(provider="openai", model="gpt-4") as tracker: - response = await async_llm_call() + response = await do_something() tracker.set_tokens(...) # Immediate, non-blocking ``` @@ -405,8 +393,8 @@ from unittest.mock import patch def test_successful_outcome(): with patch("botanu.sdk.span_helpers.emit_outcome") as mock_emit: - result = await handle_ticket("123") - mock_emit.assert_called_with("success", value_type="tickets_resolved", value_amount=1) + result = await do_work("123") + mock_emit.assert_called_with("success", value_type="items_processed", value_amount=1) ``` ## See Also diff --git a/docs/tracking/data-tracking.md b/docs/tracking/data-tracking.md index 9c066a8..bf7e06c 100644 --- a/docs/tracking/data-tracking.md +++ b/docs/tracking/data-tracking.md @@ -303,7 +303,7 @@ set_warehouse_metrics( ## Example: Complete Data Pipeline ```python -from botanu import botanu_use_case, emit_outcome +from botanu import botanu_workflow, emit_outcome from botanu.tracking.data import ( track_db_operation, track_storage_operation, @@ -312,7 +312,7 @@ from botanu.tracking.data import ( ) from botanu.tracking.llm import track_llm_call -@botanu_use_case("ETL Pipeline") +@botanu_workflow("etl-pipeline", event_id=batch_id, customer_id=customer_id) async def process_batch(batch_id: str): """Complete ETL pipeline with cost tracking.""" diff --git a/docs/tracking/llm-tracking.md b/docs/tracking/llm-tracking.md index 138cd7f..e2053ba 100644 --- a/docs/tracking/llm-tracking.md +++ b/docs/tracking/llm-tracking.md @@ -202,7 +202,7 @@ Track tool calls triggered by LLMs: from botanu.tracking.llm import track_tool_call with track_tool_call(tool_name="search_database", tool_call_id="call_abc123") as tool: - results = await search_database(query) + results = await do_work(query) tool.set_result( success=True, items_returned=len(results), @@ -288,41 +288,35 @@ The SDK automatically records these metrics: ## Example: Multi-Provider Workflow ```python -from botanu import botanu_use_case, emit_outcome +from botanu import botanu_workflow, emit_outcome from botanu.tracking.llm import track_llm_call -@botanu_use_case("Document Analysis") -async def analyze_with_fallback(document: str): - """Try Claude first, fall back to GPT-4.""" +@botanu_workflow("process-with-fallback", event_id=event_id, customer_id=customer_id) +async def process_with_fallback(data: str): + """Try one provider first, fall back to another.""" try: with track_llm_call(provider="anthropic", model="claude-3-opus") as tracker: tracker.set_attempt(1) - response = await anthropic_client.messages.create( - model="claude-3-opus-20240229", - messages=[{"role": "user", "content": document}] - ) + response = await do_work(data, provider="anthropic") tracker.set_tokens( input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, ) - emit_outcome("success", value_type="analyses_completed", value_amount=1) - return response.content[0].text + emit_outcome("success", value_type="items_processed", value_amount=1) + return response.content - except anthropic.RateLimitError: - # Fallback to OpenAI + except RateLimitError: + # Fallback to second provider with track_llm_call(provider="openai", model="gpt-4") as tracker: tracker.set_attempt(2) - response = await openai_client.chat.completions.create( - model="gpt-4", - messages=[{"role": "user", "content": document}] - ) + response = await do_work(data, provider="openai") tracker.set_tokens( input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.completion_tokens, ) - emit_outcome("success", value_type="analyses_completed", value_amount=1) - return response.choices[0].message.content + emit_outcome("success", value_type="items_processed", value_amount=1) + return response.content ``` ## See Also diff --git a/docs/tracking/outcomes.md b/docs/tracking/outcomes.md index 0e974ae..0e870ac 100644 --- a/docs/tracking/outcomes.md +++ b/docs/tracking/outcomes.md @@ -4,30 +4,38 @@ Record business outcomes to enable cost-per-outcome analysis. ## Overview -Outcomes connect infrastructure costs to business value. By recording what was achieved (tickets resolved, documents processed, leads qualified), you can calculate the true ROI of your AI workflows. +Outcomes connect infrastructure costs to business value. By recording what each event achieved, you can calculate the true ROI of your AI workflows. + +**Terminology:** +- An **event** is one business transaction (e.g., a customer request, a pipeline trigger). +- A **run** is one execution attempt within an event. +- An event will have an **outcome** describing what was achieved. ## Basic Usage ```python -from botanu import botanu_use_case, emit_outcome +from botanu import botanu_workflow, emit_outcome -@botanu_use_case("Customer Support") -async def handle_ticket(ticket_id: str): - # ... process ticket ... +@botanu_workflow("process-items", event_id=request.id, customer_id=customer.id) +async def handle_request(): + result = await do_work() # Record the business outcome - emit_outcome("success", value_type="tickets_resolved", value_amount=1) + emit_outcome("success", value_type="items_processed", value_amount=result.count) ``` ## emit_outcome() Parameters ```python emit_outcome( - status: str, # Required: "success", "partial", "failed" + status: str, # Required: "success", "partial", "failed", "timeout", "canceled", "abandoned" + *, value_type: str = None, # What was achieved value_amount: float = None, # How much confidence: float = None, # Confidence score (0.0-1.0) reason: str = None, # Why (especially for failures) + error_type: str = None, # Error classification + metadata: dict = None, # Additional key-value pairs ) ``` @@ -35,20 +43,23 @@ emit_outcome( The outcome status: -| Status | Description | Use Case | -|--------|-------------|----------| -| `success` | Fully achieved goal | Ticket resolved, document processed | +| Status | Description | Example | +|--------|-------------|---------| +| `success` | Fully achieved goal | All items processed | | `partial` | Partially achieved | 3 of 5 items processed | -| `failed` | Did not achieve goal | Error, timeout, rejection | +| `failed` | Did not achieve goal | Error during processing | +| `timeout` | Timed out before completing | Deadline exceeded | +| `canceled` | Canceled by user or system | User aborted the request | +| `abandoned` | Abandoned without completion | No response from upstream | ### value_type A descriptive label for what was achieved: ```python -emit_outcome("success", value_type="tickets_resolved", value_amount=1) -emit_outcome("success", value_type="documents_processed", value_amount=5) -emit_outcome("success", value_type="leads_qualified", value_amount=1) +emit_outcome("success", value_type="items_processed", value_amount=1) +emit_outcome("success", value_type="documents_generated", value_amount=5) +emit_outcome("success", value_type="tasks_completed", value_amount=1) emit_outcome("success", value_type="revenue_generated", value_amount=499.99) ``` @@ -58,13 +69,13 @@ The quantified value: ```python # Count -emit_outcome("success", value_type="emails_sent", value_amount=100) +emit_outcome("success", value_type="records_written", value_amount=100) # Revenue emit_outcome("success", value_type="order_value", value_amount=1299.99) # Score -emit_outcome("success", value_type="satisfaction_score", value_amount=4.5) +emit_outcome("success", value_type="quality_score", value_amount=4.5) ``` ### confidence @@ -74,7 +85,7 @@ For probabilistic outcomes: ```python emit_outcome( "success", - value_type="intent_classified", + value_type="classifications_completed", value_amount=1, confidence=0.92, ) @@ -90,15 +101,36 @@ emit_outcome("failed", reason="invalid_input") emit_outcome("partial", reason="timeout_partial_results", value_amount=3) ``` +### error_type + +Classify the error for aggregation: + +```python +emit_outcome("failed", reason="upstream service unavailable", error_type="ServiceUnavailable") +emit_outcome("timeout", reason="model took too long", error_type="DeadlineExceeded") +``` + +### metadata + +Attach arbitrary key-value pairs: + +```python +emit_outcome( + "success", + value_type="items_processed", + value_amount=10, + metadata={"batch_id": "abc-123", "retry_count": 2}, +) +``` + ## Outcome Patterns ### Success with Value ```python -@botanu_use_case("Order Processing") -async def process_order(order_id: str): - order = await fetch_order(order_id) - await fulfill_order(order) +@botanu_workflow("fulfill-order", event_id=order.id, customer_id=customer.id) +async def process_order(): + result = await do_work() emit_outcome( "success", @@ -110,15 +142,15 @@ async def process_order(order_id: str): ### Success with Revenue ```python -@botanu_use_case("Sales Bot") -async def handle_inquiry(inquiry_id: str): - result = await process_sale(inquiry_id) +@botanu_workflow("handle-inquiry", event_id=inquiry.id, customer_id=customer.id) +async def handle_inquiry(): + result = await process() - if result.sale_completed: + if result.completed: emit_outcome( "success", value_type="revenue_generated", - value_amount=result.order_total, + value_amount=result.total, ) else: emit_outcome( @@ -131,12 +163,12 @@ async def handle_inquiry(inquiry_id: str): ### Partial Success ```python -@botanu_use_case("Batch Processing") +@botanu_workflow("batch-process", event_id=batch.id, customer_id=customer.id) async def process_batch(items: list): processed = 0 for item in items: try: - await process_item(item) + await do_something(item) processed += 1 except Exception: continue @@ -157,49 +189,49 @@ async def process_batch(items: list): ### Failure with Reason ```python -@botanu_use_case("Document Analysis") -async def analyze_document(doc_id: str): +@botanu_workflow("analyze", event_id=job.id, customer_id=customer.id) +async def analyze(doc_id: str): try: - document = await fetch_document(doc_id) - if not document: - emit_outcome("failed", reason="document_not_found") + data = await do_work(doc_id) + if not data: + emit_outcome("failed", reason="not_found", error_type="NotFound") return None - result = await analyze(document) - emit_outcome("success", value_type="documents_analyzed", value_amount=1) + result = await process(data) + emit_outcome("success", value_type="items_analyzed", value_amount=1) return result except RateLimitError: - emit_outcome("failed", reason="rate_limit_exceeded") + emit_outcome("failed", reason="rate_limit_exceeded", error_type="RateLimitError") raise except TimeoutError: - emit_outcome("failed", reason="analysis_timeout") + emit_outcome("timeout", reason="analysis_timeout", error_type="TimeoutError") raise ``` ### Classification with Confidence ```python -@botanu_use_case("Intent Classification") -async def classify_intent(message: str): - result = await classifier.predict(message) +@botanu_workflow("classify", event_id=request.id, customer_id=customer.id) +async def classify(message: str): + result = await do_work(message) emit_outcome( "success", - value_type="intents_classified", + value_type="classifications_completed", value_amount=1, confidence=result.confidence, ) - return result.intent + return result.label ``` ## Automatic Outcomes -The `@botanu_use_case` decorator automatically emits outcomes: +The `@botanu_workflow` decorator automatically emits outcomes: ```python -@botanu_use_case("My Use Case", auto_outcome_on_success=True) # Default +@botanu_workflow("my-workflow", event_id=event_id, customer_id=customer_id, auto_outcome_on_success=True) # Default async def my_function(): # If no exception and no explicit emit_outcome, emits "success" return result @@ -210,33 +242,24 @@ If an exception is raised, it automatically emits `"failed"` with the exception To disable: ```python -@botanu_use_case("My Use Case", auto_outcome_on_success=False) +@botanu_workflow("my-workflow", event_id=event_id, customer_id=customer_id, auto_outcome_on_success=False) async def my_function(): # Must call emit_outcome explicitly emit_outcome("success") ``` -## @botanu_outcome Decorator +## Context Manager Alternative -For sub-functions within a use case: +Use `run_botanu` when you need workflow tracking without a decorator: ```python -from botanu import botanu_use_case, botanu_outcome - -@botanu_use_case("Data Pipeline") -async def run_pipeline(): - await step_one() - await step_two() - -@botanu_outcome() -async def step_one(): - # Emits "success" on completion, "failed" on exception - await process_data() - -@botanu_outcome(success="data_extracted", failed="extraction_failed") -async def step_two(): - # Custom outcome labels - await extract_data() +from botanu import run_botanu, emit_outcome + +async def my_function(event_id: str, customer_id: str): + async with run_botanu("my-workflow", event_id=event_id, customer_id=customer_id): + result = await do_work() + emit_outcome("success", value_type="items_processed", value_amount=result.count) + return result ``` ## Span Attributes @@ -245,11 +268,12 @@ Outcomes are recorded as span attributes: | Attribute | Description | |-----------|-------------| -| `botanu.outcome` | Status (success/partial/failed) | +| `botanu.outcome` | Status (success/partial/failed/timeout/canceled/abandoned) | | `botanu.outcome.value_type` | What was achieved | | `botanu.outcome.value_amount` | Quantified value | | `botanu.outcome.confidence` | Confidence score | | `botanu.outcome.reason` | Reason for outcome | +| `botanu.outcome.error_type` | Error classification | ## Span Events @@ -259,7 +283,7 @@ An event is also emitted for timeline visibility: # Event: botanu.outcome_emitted # Attributes: # status: "success" -# value_type: "tickets_resolved" +# value_type: "items_processed" # value_amount: 1 ``` @@ -268,39 +292,39 @@ An event is also emitted for timeline visibility: With outcomes recorded, you can calculate: ```sql --- Cost per successful ticket resolution +-- Cost per successful outcome SELECT - AVG(total_cost) as avg_cost_per_resolution + AVG(total_cost) as avg_cost_per_success FROM runs -WHERE use_case = 'Customer Support' +WHERE workflow = 'fulfill-order' AND outcome_status = 'success' - AND outcome_value_type = 'tickets_resolved'; + AND outcome_value_type = 'orders_fulfilled'; --- ROI by use case +-- ROI by workflow SELECT - use_case, + workflow, SUM(outcome_value_amount * value_per_unit) as total_value, SUM(total_cost) as total_cost, (SUM(outcome_value_amount * value_per_unit) - SUM(total_cost)) / SUM(total_cost) as roi FROM runs -GROUP BY use_case; +GROUP BY workflow; ``` ## Best Practices ### 1. Always Record Outcomes -Every use case should emit an outcome: +Every workflow should emit an outcome: ```python -@botanu_use_case("My Use Case") +@botanu_workflow("my-workflow", event_id=event_id, customer_id=customer_id) async def my_function(): try: result = await do_work() emit_outcome("success", value_type="items_processed", value_amount=result.count) return result except Exception as e: - emit_outcome("failed", reason=type(e).__name__) + emit_outcome("failed", reason=type(e).__name__, error_type=type(e).__name__) raise ``` @@ -310,11 +334,11 @@ Define standard value types for your organization: ```python # Good - consistent naming -emit_outcome("success", value_type="tickets_resolved", value_amount=1) -emit_outcome("success", value_type="documents_processed", value_amount=1) +emit_outcome("success", value_type="items_processed", value_amount=1) +emit_outcome("success", value_type="documents_generated", value_amount=1) # Bad - inconsistent -emit_outcome("success", value_type="ticket_done", value_amount=1) +emit_outcome("success", value_type="item_done", value_amount=1) emit_outcome("success", value_type="doc processed", value_amount=1) ``` @@ -324,7 +348,7 @@ Include amounts for better analysis: ```python # Good - quantified -emit_outcome("success", value_type="emails_sent", value_amount=50) +emit_outcome("success", value_type="records_written", value_amount=50) # Less useful - no amount emit_outcome("success") @@ -335,17 +359,17 @@ emit_outcome("success") Always explain why something failed: ```python -emit_outcome("failed", reason="api_rate_limit") -emit_outcome("failed", reason="invalid_input_format") -emit_outcome("failed", reason="model_unavailable") +emit_outcome("failed", reason="api_rate_limit", error_type="RateLimitError") +emit_outcome("failed", reason="invalid_input_format", error_type="ValidationError") +emit_outcome("timeout", reason="model_unavailable", error_type="TimeoutError") ``` ### 5. One Outcome Per Run -Emit only one outcome per use case execution: +Emit only one outcome per workflow execution: ```python -@botanu_use_case("Process Items") +@botanu_workflow("process-items", event_id=event_id, customer_id=customer_id) async def process_items(items): successful = 0 for item in items: diff --git a/pyproject.toml b/pyproject.toml index c0f3d65..a8a26e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,9 +123,22 @@ Changelog = "https://github.com/botanu-ai/botanu-sdk-python/blob/main/CHANGELOG. Issues = "https://github.com/botanu-ai/botanu-sdk-python/issues" # --------------------------------------------------------------------------- -# Optional extras (dev only — base install includes everything) +# Optional extras # --------------------------------------------------------------------------- [project.optional-dependencies] +# Cloud resource detectors — lightweight, auto-detect env and no-op elsewhere. +# K8s: use the OTel Operator or K8s downward API to set OTEL_RESOURCE_ATTRIBUTES. +# EKS/GKE detection is included in the aws/gcp extras respectively. +aws = ["opentelemetry-resource-detector-aws >= 0.1b0"] +gcp = ["opentelemetry-resource-detector-gcp >= 0.1b0"] +azure = ["opentelemetry-resource-detector-azure >= 0.1b0"] +container = ["opentelemetry-resource-detector-container >= 0.1b0"] +cloud = [ + "opentelemetry-resource-detector-aws >= 0.1b0", + "opentelemetry-resource-detector-gcp >= 0.1b0", + "opentelemetry-resource-detector-azure >= 0.1b0", + "opentelemetry-resource-detector-container >= 0.1b0", +] dev = [ "pytest >= 7.4.0", "pytest-asyncio >= 0.21.0", @@ -133,7 +146,7 @@ dev = [ "coverage[toml] >= 7.0", "httpx >= 0.24.0, < 0.28.0; python_version < '3.10'", "httpx >= 0.24.0; python_version >= '3.10'", - "starlette >= 0.27.0, < 0.30.0; python_version < '3.10'", + "starlette >= 0.27.0, < 0.30.0; python_version >= '3.10'", "starlette >= 0.27.0; python_version >= '3.10'", "ruff >= 0.4.0", "mypy >= 1.7.0", diff --git a/src/botanu/__init__.py b/src/botanu/__init__.py index 527714b..08e2576 100644 --- a/src/botanu/__init__.py +++ b/src/botanu/__init__.py @@ -5,11 +5,11 @@ Quick Start:: - from botanu import enable, botanu_use_case, emit_outcome + from botanu import enable, botanu_workflow, emit_outcome enable() # reads config from OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT env vars - @botanu_use_case(name="Customer Support") + @botanu_workflow(name="Customer Support") async def handle_request(data): result = await process(data) emit_outcome("success", value_type="tickets_resolved", value_amount=1) @@ -38,12 +38,12 @@ async def handle_request(data): get_baggage, get_current_span, get_run_id, - get_use_case, + get_workflow, set_baggage, ) # Decorators (primary integration point) -from botanu.sdk.decorators import botanu_outcome, botanu_use_case, use_case +from botanu.sdk.decorators import botanu_workflow, run_botanu, workflow # Span helpers from botanu.sdk.span_helpers import emit_outcome, set_business_context @@ -56,17 +56,17 @@ async def handle_request(data): "is_enabled", # Configuration "BotanuConfig", - # Decorators - "botanu_use_case", - "use_case", - "botanu_outcome", + # Decorators / context managers + "botanu_workflow", + "run_botanu", + "workflow", # Span helpers "emit_outcome", "set_business_context", "get_current_span", # Context "get_run_id", - "get_use_case", + "get_workflow", "set_baggage", "get_baggage", # Run context diff --git a/src/botanu/integrations/__init__.py b/src/botanu/integrations/__init__.py new file mode 100644 index 0000000..b1c7c4c --- /dev/null +++ b/src/botanu/integrations/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu integrations with third-party libraries.""" diff --git a/src/botanu/integrations/tenacity.py b/src/botanu/integrations/tenacity.py new file mode 100644 index 0000000..7708f05 --- /dev/null +++ b/src/botanu/integrations/tenacity.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tenacity retry integration — automatic attempt tracking for LLM calls. + +Stamps ``botanu.request.attempt`` on every span created inside a tenacity +retry loop so the collector and cost engine can see how many attempts an +event required. + +Usage:: + + from tenacity import retry, stop_after_attempt, wait_exponential + from botanu.integrations.tenacity import botanu_before, botanu_after_all + from botanu.tracking.llm import track_llm_call + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(min=1, max=10), + before=botanu_before, + after=botanu_after_all, # optional — resets attempt counter + ) + def call_llm(): + with track_llm_call("openai", "gpt-4") as tracker: + response = openai.chat.completions.create(...) + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) + return response + +The ``track_llm_call`` context manager reads the attempt number +automatically — no need to call ``tracker.set_attempt()`` manually. +""" + +from __future__ import annotations + +from typing import Any + +from botanu.tracking.llm import _retry_attempt + + +def botanu_before(retry_state: Any) -> None: + """Tenacity ``before`` callback — sets the current attempt number. + + Use as ``@retry(before=botanu_before)`` so that every + ``track_llm_call`` inside the retried function automatically + gets the correct attempt number on its span. + """ + _retry_attempt.set(retry_state.attempt_number) + + +def botanu_after_all(retry_state: Any) -> None: + """Tenacity ``after`` callback — resets the attempt counter. + + Optional but recommended. Prevents a stale attempt number from + leaking into subsequent non-retried calls on the same thread. + + Use as ``@retry(after=botanu_after_all)``. + """ + _retry_attempt.set(0) diff --git a/src/botanu/models/run_context.py b/src/botanu/models/run_context.py index 264801f..1656d8a 100644 --- a/src/botanu/models/run_context.py +++ b/src/botanu/models/run_context.py @@ -5,7 +5,7 @@ A "Run" is orthogonal to tracing: - Trace context (W3C): ties distributed spans together (trace_id, span_id) -- Run context (Botanu): ties business execution together (run_id, use_case, outcome) +- Run context (Botanu): ties business execution together (run_id, workflow, outcome) Invariant: A run can span multiple traces (retries, async fanout). The run_id must remain stable across those boundaries. @@ -85,9 +85,10 @@ class RunContext: """ run_id: str - use_case: str + workflow: str + event_id: str + customer_id: str environment: str - workflow: Optional[str] = None workflow_version: Optional[str] = None tenant_id: Optional[str] = None parent_run_id: Optional[str] = None @@ -111,8 +112,9 @@ def __post_init__(self) -> None: @classmethod def create( cls, - use_case: str, - workflow: Optional[str] = None, + workflow: str, + event_id: str, + customer_id: str, workflow_version: Optional[str] = None, environment: Optional[str] = None, tenant_id: Optional[str] = None, @@ -131,9 +133,10 @@ def create( return cls( run_id=run_id, - use_case=use_case, - environment=env, workflow=workflow, + event_id=event_id, + customer_id=customer_id, + environment=env, workflow_version=workflow_version, tenant_id=tenant_id, parent_run_id=parent_run_id, @@ -147,8 +150,9 @@ def create( def create_retry(cls, previous: RunContext) -> RunContext: """Create a new RunContext for a retry attempt.""" return cls.create( - use_case=previous.use_case, workflow=previous.workflow, + event_id=previous.event_id, + customer_id=previous.customer_id, workflow_version=previous.workflow_version, environment=previous.environment, tenant_id=previous.tenant_id, @@ -215,14 +219,14 @@ def to_baggage_dict(self, lean_mode: Optional[bool] = None) -> Dict[str, str]: baggage: Dict[str, str] = { "botanu.run_id": self.run_id, - "botanu.use_case": self.use_case, + "botanu.workflow": self.workflow, + "botanu.event_id": self.event_id, + "botanu.customer_id": self.customer_id, } if lean_mode: return baggage baggage["botanu.environment"] = self.environment - if self.workflow: - baggage["botanu.workflow"] = self.workflow if self.tenant_id: baggage["botanu.tenant_id"] = self.tenant_id if self.parent_run_id: @@ -243,12 +247,12 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]: """Convert to dict for span attributes.""" attrs: Dict[str, Union[str, float, int, bool]] = { "botanu.run_id": self.run_id, - "botanu.use_case": self.use_case, + "botanu.workflow": self.workflow, + "botanu.event_id": self.event_id, + "botanu.customer_id": self.customer_id, "botanu.environment": self.environment, "botanu.run.start_time": self.start_time.isoformat(), } - if self.workflow: - attrs["botanu.workflow"] = self.workflow if self.workflow_version: attrs["botanu.workflow.version"] = self.workflow_version if self.tenant_id: @@ -285,8 +289,8 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]: def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext]: """Reconstruct RunContext from baggage dict.""" run_id = baggage.get("botanu.run_id") - use_case = baggage.get("botanu.use_case") - if not run_id or not use_case: + workflow = baggage.get("botanu.workflow") + if not run_id or not workflow: return None attempt_str = baggage.get("botanu.attempt", "1") @@ -305,11 +309,15 @@ def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext]: cancelled = baggage.get("botanu.cancelled", "").lower() == "true" + event_id = baggage.get("botanu.event_id", "") + customer_id = baggage.get("botanu.customer_id", "") + return cls( run_id=run_id, - use_case=use_case, + workflow=workflow, + event_id=event_id, + customer_id=customer_id, environment=baggage.get("botanu.environment", "unknown"), - workflow=baggage.get("botanu.workflow"), tenant_id=baggage.get("botanu.tenant_id"), parent_run_id=baggage.get("botanu.parent_run_id"), root_run_id=baggage.get("botanu.root_run_id") or run_id, diff --git a/src/botanu/processors/enricher.py b/src/botanu/processors/enricher.py index 85b3f78..ee77377 100644 --- a/src/botanu/processors/enricher.py +++ b/src/botanu/processors/enricher.py @@ -30,19 +30,20 @@ class RunContextEnricher(SpanProcessor): """Enriches ALL spans with run context from baggage. This ensures that every span (including auto-instrumented ones) - gets ``botanu.run_id``, ``botanu.use_case``, etc. attributes. + gets ``botanu.run_id``, ``botanu.workflow``, etc. attributes. Without this processor, only the root ``botanu.run`` span would have these attributes. - In ``lean_mode`` (default), only ``run_id`` and ``use_case`` are + In ``lean_mode`` (default), only ``run_id`` and ``workflow`` are propagated to minimise per-span overhead. """ BAGGAGE_KEYS_FULL: ClassVar[List[str]] = [ "botanu.run_id", - "botanu.use_case", "botanu.workflow", + "botanu.event_id", + "botanu.customer_id", "botanu.environment", "botanu.tenant_id", "botanu.parent_run_id", @@ -50,7 +51,9 @@ class RunContextEnricher(SpanProcessor): BAGGAGE_KEYS_LEAN: ClassVar[List[str]] = [ "botanu.run_id", - "botanu.use_case", + "botanu.workflow", + "botanu.event_id", + "botanu.customer_id", ] def __init__(self, lean_mode: bool = True) -> None: diff --git a/src/botanu/resources/__init__.py b/src/botanu/resources/__init__.py index 474c051..265662b 100644 --- a/src/botanu/resources/__init__.py +++ b/src/botanu/resources/__init__.py @@ -1,8 +1,87 @@ # SPDX-FileCopyrightText: 2026 The Botanu Authors # SPDX-License-Identifier: Apache-2.0 -"""Botanu resource detection.""" +"""Resource detection using official OTel community detectors. -from botanu.resources.detector import detect_all_resources, get_resource_attributes +Instead of a custom reimplementation, we try to import the official +OpenTelemetry resource detector packages. Each one is a lightweight +pip package that auto-detects environment attributes (K8s, AWS, GCP, +Azure, container). If a package isn't installed, we gracefully skip it. -__all__ = ["detect_all_resources", "get_resource_attributes"] +Install detectors for your environment:: + + pip install botanu[aws] # AWS EC2/ECS/EKS/Lambda + pip install botanu[gcp] # GCE/GKE/Cloud Run/Cloud Functions + pip install botanu[azure] # Azure VMs/App Service/Functions + pip install botanu[cloud] # All cloud detectors +""" + +from __future__ import annotations + +import importlib +import logging +from typing import Any, Dict, List, Tuple + +logger = logging.getLogger(__name__) + +# (module_path, class_name) — tried in order. +# Each entry corresponds to a pip package from opentelemetry-python-contrib. +_DETECTOR_REGISTRY: List[Tuple[str, str]] = [ + # Built-in (opentelemetry-sdk — always available) + ("opentelemetry.sdk.resources", "ProcessResourceDetector"), + # opentelemetry-resource-detector-aws + ("opentelemetry.resource.detector.aws.ec2", "AwsEc2ResourceDetector"), + ("opentelemetry.resource.detector.aws.ecs", "AwsEcsResourceDetector"), + ("opentelemetry.resource.detector.aws.eks", "AwsEksResourceDetector"), + ("opentelemetry.resource.detector.aws.lambda_", "AwsLambdaResourceDetector"), + # opentelemetry-resource-detector-gcp + ("opentelemetry.resource.detector.gcp", "GoogleCloudResourceDetector"), + # opentelemetry-resource-detector-azure + ("opentelemetry.resource.detector.azure.vm", "AzureVMResourceDetector"), + ("opentelemetry.resource.detector.azure.app_service", "AzureAppServiceResourceDetector"), + # opentelemetry-resource-detector-container + ("opentelemetry.resource.detector.container", "ContainerResourceDetector"), +] + + +def collect_detectors() -> list: + """Return instances of all importable OTel resource detectors. + + Each detector implements ``opentelemetry.sdk.resources.ResourceDetector``. + Missing packages are silently skipped. + """ + detectors: list = [] + for module_path, class_name in _DETECTOR_REGISTRY: + try: + mod = importlib.import_module(module_path) + cls = getattr(mod, class_name) + detectors.append(cls()) + except (ImportError, AttributeError): + pass + + if detectors: + names = [type(d).__name__ for d in detectors] + logger.debug("Available resource detectors: %s", names) + + return detectors + + +def detect_resource_attrs() -> Dict[str, Any]: + """Detect environment attributes using available OTel detectors. + + Returns a flat dict of resource attributes. This is a convenience + wrapper for callers that just need a dict (like bootstrap.py). + """ + attrs: Dict[str, Any] = {} + for detector in collect_detectors(): + try: + resource = detector.detect() + attrs.update(dict(resource.attributes)) + except Exception: + # Community detectors may raise on network timeouts, missing + # metadata endpoints, etc. Never let detection break SDK init. + logger.debug("Resource detector %s failed", type(detector).__name__, exc_info=True) + return attrs + + +__all__ = ["collect_detectors", "detect_resource_attrs"] diff --git a/src/botanu/resources/detector.py b/src/botanu/resources/detector.py deleted file mode 100644 index 1a6bf50..0000000 --- a/src/botanu/resources/detector.py +++ /dev/null @@ -1,366 +0,0 @@ -# SPDX-FileCopyrightText: 2026 The Botanu Authors -# SPDX-License-Identifier: Apache-2.0 - -"""Resource Detector — auto-detect execution environment for cost attribution. - -Detects attributes from: -- Kubernetes (``k8s.*``) -- Cloud providers (``cloud.*``, ``aws.*``, ``gcp.*``, ``azure.*``) -- Host / VM (``host.*``, ``os.*``) -- Container (``container.*``) -- Serverless / FaaS (``faas.*``) -- Process (``process.*``) -""" - -from __future__ import annotations - -import os -import platform -import socket -import sys -from functools import lru_cache -from typing import Any, Dict, Optional - -# ========================================================================= -# Environment Variable Mappings -# ========================================================================= - -K8S_ENV_MAPPINGS: Dict[str, Optional[str]] = { - "KUBERNETES_SERVICE_HOST": None, - "HOSTNAME": "k8s.pod.name", - "K8S_POD_NAME": "k8s.pod.name", - "K8S_POD_UID": "k8s.pod.uid", - "K8S_NAMESPACE": "k8s.namespace.name", - "K8S_NODE_NAME": "k8s.node.name", - "K8S_CLUSTER_NAME": "k8s.cluster.name", - "K8S_DEPLOYMENT_NAME": "k8s.deployment.name", - "K8S_STATEFULSET_NAME": "k8s.statefulset.name", - "K8S_CONTAINER_NAME": "k8s.container.name", -} - -AWS_ENV_MAPPINGS: Dict[str, Optional[str]] = { - "AWS_REGION": "cloud.region", - "AWS_DEFAULT_REGION": "cloud.region", - "AWS_ACCOUNT_ID": "cloud.account.id", - "ECS_CONTAINER_METADATA_URI": None, - "ECS_CLUSTER": "aws.ecs.cluster.name", - "ECS_TASK_ARN": "aws.ecs.task.arn", - "ECS_TASK_DEFINITION_FAMILY": "aws.ecs.task.family", - "AWS_LAMBDA_FUNCTION_NAME": "faas.name", - "AWS_LAMBDA_FUNCTION_VERSION": "faas.version", - "AWS_LAMBDA_LOG_GROUP_NAME": "aws.lambda.log_group", - "AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "faas.max_memory", -} - -GCP_ENV_MAPPINGS: Dict[str, Optional[str]] = { - "GOOGLE_CLOUD_PROJECT": "cloud.account.id", - "GCLOUD_PROJECT": "cloud.account.id", - "GCP_PROJECT": "cloud.account.id", - "GOOGLE_CLOUD_REGION": "cloud.region", - "K_SERVICE": "faas.name", - "K_REVISION": "faas.version", - "K_CONFIGURATION": "gcp.cloud_run.configuration", - "FUNCTION_NAME": "faas.name", - "FUNCTION_TARGET": "faas.trigger", - "FUNCTION_SIGNATURE_TYPE": "gcp.function.signature_type", -} - -AZURE_ENV_MAPPINGS: Dict[str, Optional[str]] = { - "AZURE_SUBSCRIPTION_ID": "cloud.account.id", - "AZURE_RESOURCE_GROUP": "azure.resource_group", - "WEBSITE_SITE_NAME": "faas.name", - "FUNCTIONS_EXTENSION_VERSION": "azure.functions.version", - "WEBSITE_INSTANCE_ID": "faas.instance", - "REGION_NAME": "cloud.region", -} - - -# ========================================================================= -# Detection Functions -# ========================================================================= - - -def detect_kubernetes() -> Dict[str, Any]: - attrs: Dict[str, Any] = {} - if not os.environ.get("KUBERNETES_SERVICE_HOST"): - return attrs - - for env_var, attr_name in K8S_ENV_MAPPINGS.items(): - value = os.environ.get(env_var) - if attr_name and value: - attrs[attr_name] = value - - if "k8s.pod.name" not in attrs: - hostname = os.environ.get("HOSTNAME", socket.gethostname()) - if hostname: - attrs["k8s.pod.name"] = hostname - - namespace_file = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" - if "k8s.namespace.name" not in attrs and os.path.exists(namespace_file): - try: - with open(namespace_file) as fh: - attrs["k8s.namespace.name"] = fh.read().strip() - except OSError: - pass - - return attrs - - -def detect_cloud_provider() -> Dict[str, Any]: - attrs: Dict[str, Any] = {} - - if _is_aws(): - attrs["cloud.provider"] = "aws" - for env_var, attr_name in AWS_ENV_MAPPINGS.items(): - value = os.environ.get(env_var) - if attr_name and value: - attrs[attr_name] = value - - if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): - attrs["faas.id"] = ( - f"arn:aws:lambda:{attrs.get('cloud.region', 'unknown')}:" - f"{attrs.get('cloud.account.id', 'unknown')}:" - f"function:{os.environ['AWS_LAMBDA_FUNCTION_NAME']}" - ) - - az = _get_aws_availability_zone() - if az: - attrs["cloud.availability_zone"] = az - if "cloud.region" not in attrs: - attrs["cloud.region"] = az[:-1] - - elif _is_gcp(): - attrs["cloud.provider"] = "gcp" - for env_var, attr_name in GCP_ENV_MAPPINGS.items(): - value = os.environ.get(env_var) - if attr_name and value: - attrs[attr_name] = value - if os.environ.get("K_SERVICE"): - attrs["faas.trigger"] = "http" - elif os.environ.get("FUNCTION_NAME"): - attrs["faas.trigger"] = os.environ.get("FUNCTION_TRIGGER_TYPE", "unknown") - - elif _is_azure(): - attrs["cloud.provider"] = "azure" - for env_var, attr_name in AZURE_ENV_MAPPINGS.items(): - value = os.environ.get(env_var) - if attr_name and value: - attrs[attr_name] = value - - return attrs - - -def _is_aws() -> bool: - indicators = [ - "AWS_REGION", - "AWS_DEFAULT_REGION", - "AWS_LAMBDA_FUNCTION_NAME", - "ECS_CONTAINER_METADATA_URI", - "AWS_EXECUTION_ENV", - ] - return any(os.environ.get(var) for var in indicators) - - -def _is_gcp() -> bool: - indicators = [ - "GOOGLE_CLOUD_PROJECT", - "GCLOUD_PROJECT", - "GCP_PROJECT", - "K_SERVICE", - "FUNCTION_NAME", - ] - return any(os.environ.get(var) for var in indicators) - - -def _is_azure() -> bool: - indicators = [ - "WEBSITE_SITE_NAME", - "AZURE_FUNCTIONS_ENVIRONMENT", - "AZURE_SUBSCRIPTION_ID", - ] - return any(os.environ.get(var) for var in indicators) - - -def _get_aws_availability_zone() -> Optional[str]: - """Get AWS availability zone from EC2 instance metadata. - - Uses IMDS (Instance Metadata Service) which is only accessible from within EC2. - Configure via environment variables: - - AWS_EC2_METADATA_SERVICE_ENDPOINT: Override the metadata endpoint - - AWS_EC2_METADATA_DISABLED: Set to 'true' to disable metadata calls - """ - if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): - return None - - # Respect AWS SDK standard env vars for disabling/configuring metadata - if os.environ.get("AWS_EC2_METADATA_DISABLED", "").lower() == "true": - return None - - # Use AWS SDK standard endpoint override, or default to standard IMDS address - endpoint = os.environ.get("AWS_EC2_METADATA_SERVICE_ENDPOINT", "http://169.254.169.254") - if not endpoint or not endpoint.startswith(("http://", "https://")): - return None - - try: - import urllib.request - - url = f"{endpoint}/latest/meta-data/placement/availability-zone" - req = urllib.request.Request(url, headers={"Accept": "text/plain"}) # noqa: S310 - with urllib.request.urlopen(req, timeout=0.5) as resp: # noqa: S310 - return resp.read().decode("utf-8").strip() - except Exception: - return None - - -def detect_host() -> Dict[str, Any]: - attrs: Dict[str, Any] = {} - try: - hostname = socket.gethostname() - if hostname: - attrs["host.name"] = hostname - except Exception: - pass - - host_id = os.environ.get("HOST_ID") or os.environ.get("INSTANCE_ID") - if host_id: - attrs["host.id"] = host_id - elif "host.name" in attrs: - attrs["host.id"] = attrs["host.name"] - - attrs["os.type"] = sys.platform - attrs["host.arch"] = platform.machine() - return attrs - - -def detect_container() -> Dict[str, Any]: - attrs: Dict[str, Any] = {} - container_id = _get_container_id() - if container_id: - attrs["container.id"] = container_id - - if os.path.exists("/.dockerenv"): - attrs["container.runtime"] = "docker" - elif os.environ.get("KUBERNETES_SERVICE_HOST"): - attrs["container.runtime"] = "containerd" - return attrs - - -def _get_container_id() -> Optional[str]: - container_id = os.environ.get("CONTAINER_ID") or os.environ.get("HOSTNAME") - - cgroup_path = "/proc/self/cgroup" - if os.path.exists(cgroup_path): - try: - with open(cgroup_path) as fh: - for line in fh: - if "docker" in line or "kubepods" in line: - parts = line.strip().split("/") - if parts: - last = parts[-1] - if last.startswith("cri-containerd-"): - last = last[15:] - if len(last) >= 12: - return last[:64] - except OSError: - pass - - return container_id if container_id and len(container_id) >= 12 else None - - -def detect_process() -> Dict[str, Any]: - attrs: Dict[str, Any] = {} - attrs["process.pid"] = os.getpid() - attrs["process.runtime.name"] = "python" - attrs["process.runtime.version"] = sys.version.split()[0] - if sys.argv: - attrs["process.command"] = sys.argv[0][:200] - return attrs - - -def detect_serverless() -> Dict[str, Any]: - attrs: Dict[str, Any] = {} - - if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): - attrs["faas.name"] = os.environ["AWS_LAMBDA_FUNCTION_NAME"] - version = os.environ.get("AWS_LAMBDA_FUNCTION_VERSION") - if version: - attrs["faas.version"] = version - memory = os.environ.get("AWS_LAMBDA_FUNCTION_MEMORY_SIZE") - if memory: - attrs["faas.max_memory"] = int(memory) * 1024 * 1024 - - elif os.environ.get("K_SERVICE"): - attrs["faas.name"] = os.environ["K_SERVICE"] - revision = os.environ.get("K_REVISION") - if revision: - attrs["faas.version"] = revision - - elif os.environ.get("FUNCTION_NAME"): - attrs["faas.name"] = os.environ["FUNCTION_NAME"] - target = os.environ.get("FUNCTION_TARGET") - if target: - attrs["faas.trigger"] = target - - elif os.environ.get("WEBSITE_SITE_NAME"): - attrs["faas.name"] = os.environ["WEBSITE_SITE_NAME"] - instance = os.environ.get("WEBSITE_INSTANCE_ID") - if instance: - attrs["faas.instance"] = instance - - return attrs - - -# ========================================================================= -# Main Detection -# ========================================================================= - - -@lru_cache(maxsize=1) -def detect_all_resources() -> Dict[str, Any]: - """Detect all environment resource attributes. - - Results are cached (environment doesn't change during runtime). - """ - attrs: Dict[str, Any] = {} - attrs.update(detect_host()) - attrs.update(detect_process()) - attrs.update(detect_container()) - attrs.update(detect_cloud_provider()) - attrs.update(detect_kubernetes()) - attrs.update(detect_serverless()) - - if "service.instance.id" not in attrs: - container_id = attrs.get("container.id") - if container_id: - attrs["service.instance.id"] = container_id[:12] - elif pod_name := attrs.get("k8s.pod.name"): - attrs["service.instance.id"] = pod_name - elif host_id := attrs.get("host.id"): - attrs["service.instance.id"] = host_id - - return attrs - - -def get_resource_attributes( - include_host: bool = True, - include_process: bool = True, - include_container: bool = True, - include_cloud: bool = True, - include_k8s: bool = True, - include_faas: bool = True, -) -> Dict[str, Any]: - """Get resource attributes with selective detection.""" - attrs: Dict[str, Any] = {} - if include_host: - attrs.update(detect_host()) - if include_process: - attrs.update(detect_process()) - if include_container: - attrs.update(detect_container()) - if include_cloud: - attrs.update(detect_cloud_provider()) - if include_k8s: - attrs.update(detect_kubernetes()) - if include_faas: - attrs.update(detect_serverless()) - return attrs diff --git a/src/botanu/sdk/__init__.py b/src/botanu/sdk/__init__.py index 2a6229d..820284d 100644 --- a/src/botanu/sdk/__init__.py +++ b/src/botanu/sdk/__init__.py @@ -11,17 +11,16 @@ get_baggage, get_current_span, get_run_id, - get_use_case, get_workflow, set_baggage, ) -from botanu.sdk.decorators import botanu_outcome, botanu_use_case, use_case +from botanu.sdk.decorators import botanu_outcome, botanu_workflow, run_botanu, workflow from botanu.sdk.span_helpers import emit_outcome, set_business_context __all__ = [ "BotanuConfig", "botanu_outcome", - "botanu_use_case", + "botanu_workflow", "disable", "emit_outcome", "enable", @@ -29,10 +28,10 @@ "get_config", "get_current_span", "get_run_id", - "get_use_case", "get_workflow", "is_enabled", + "run_botanu", "set_baggage", "set_business_context", - "use_case", + "workflow", ] diff --git a/src/botanu/sdk/bootstrap.py b/src/botanu/sdk/bootstrap.py index 879bffd..548e4af 100644 --- a/src/botanu/sdk/bootstrap.py +++ b/src/botanu/sdk/bootstrap.py @@ -129,7 +129,7 @@ def enable( try: from botanu._version import __version__ from botanu.processors import RunContextEnricher - from botanu.resources.detector import detect_all_resources + from botanu.resources import detect_resource_attrs resource_attrs = { "service.name": cfg.service_name, @@ -143,7 +143,7 @@ def enable( resource_attrs["service.namespace"] = cfg.service_namespace if cfg.auto_detect_resources: - detected = detect_all_resources() + detected = detect_resource_attrs() for key, value in detected.items(): if key not in resource_attrs: resource_attrs[key] = value @@ -152,13 +152,8 @@ def enable( resource = Resource.create(resource_attrs) - existing = trace.get_tracer_provider() - if isinstance(existing, TracerProvider): - provider = existing - logger.info("Reusing existing TracerProvider — adding Botanu processors") - else: - provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) - trace.set_tracer_provider(provider) + provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) + trace.set_tracer_provider(provider) lean_mode = cfg.propagation_mode == "lean" provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) @@ -188,6 +183,25 @@ def enable( logger.info("Botanu SDK tracing initialized") + # Set up LoggerProvider for outcome event emission + try: + from opentelemetry._logs import set_logger_provider + from opentelemetry.sdk._logs import LoggerProvider as _LoggerProvider + from opentelemetry.sdk._logs.export import BatchLogRecordProcessor + from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter + + logs_endpoint = cfg.otlp_endpoint + if logs_endpoint and not logs_endpoint.endswith("/v1/logs"): + logs_endpoint = f"{logs_endpoint.rstrip('/')}/v1/logs" + + log_provider = _LoggerProvider(resource=resource) + log_exporter = OTLPLogExporter(endpoint=logs_endpoint, headers=cfg.otlp_headers or {}) + log_provider.add_log_record_processor(BatchLogRecordProcessor(log_exporter)) + set_logger_provider(log_provider) + logger.info("Botanu SDK log provider initialized") + except ImportError: + logger.debug("OTel log exporter not available; outcome log emission disabled") + if auto_instrumentation: _enable_auto_instrumentation() @@ -373,6 +387,16 @@ def disable() -> None: if hasattr(provider, "shutdown"): provider.shutdown() + # Flush LoggerProvider (don't shutdown — it may be shared/external) + try: + from opentelemetry._logs import get_logger_provider + + log_provider = get_logger_provider() + if hasattr(log_provider, "force_flush"): + log_provider.force_flush(timeout_millis=5000) + except Exception: + pass + _initialized = False _current_config = None logger.info("Botanu SDK shutdown complete") diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py index 10b6646..525074b 100644 --- a/src/botanu/sdk/config.py +++ b/src/botanu/sdk/config.py @@ -6,7 +6,7 @@ The SDK is intentionally minimal on the hot path. Heavy processing happens in the OpenTelemetry Collector, not in the application: -- **SDK responsibility**: Generate run_id, propagate minimal context (run_id, use_case) +- **SDK responsibility**: Generate run_id, propagate minimal context (run_id, workflow) - **Collector responsibility**: PII redaction, vendor detection, attribute enrichment Configuration precedence (highest to lowest): @@ -65,7 +65,7 @@ class BotanuConfig: schedule_delay_millis: int = 5000 export_timeout_millis: int = 30000 - # Propagation mode: "lean" (run_id + use_case only) or "full" (all context) + # Propagation mode: "lean" (run_id + workflow only) or "full" (all context) propagation_mode: str = "lean" # Auto-instrumentation packages to enable diff --git a/src/botanu/sdk/context.py b/src/botanu/sdk/context.py index 5a75e3f..05290a5 100644 --- a/src/botanu/sdk/context.py +++ b/src/botanu/sdk/context.py @@ -68,11 +68,6 @@ def get_run_id() -> Optional[str]: return get_baggage("botanu.run_id") -def get_use_case() -> Optional[str]: - """Get the current ``use_case`` from baggage.""" - return get_baggage("botanu.use_case") - - def get_workflow() -> Optional[str]: """Get the current ``workflow`` from baggage.""" return get_baggage("botanu.workflow") diff --git a/src/botanu/sdk/decorators.py b/src/botanu/sdk/decorators.py index 4bffa6c..da9d786 100644 --- a/src/botanu/sdk/decorators.py +++ b/src/botanu/sdk/decorators.py @@ -3,7 +3,7 @@ """Decorators for automatic run span creation and context propagation. -The ``@botanu_use_case`` decorator is the primary integration point. +The ``@botanu_workflow`` decorator is the primary integration point. It creates a "run span" that: - Generates a UUIDv7 run_id - Emits ``run.started`` and ``run.completed`` events @@ -13,12 +13,14 @@ from __future__ import annotations +import contextlib import functools import hashlib import inspect from collections.abc import Mapping +from contextlib import asynccontextmanager, contextmanager from datetime import datetime, timezone -from typing import Any, Callable, Dict, Optional, TypeVar, Union +from typing import Any, Callable, Dict, Generator, Optional, TypeVar, Union from opentelemetry import baggage as otel_baggage from opentelemetry import trace @@ -46,10 +48,11 @@ def _get_parent_run_id() -> Optional[str]: return get_baggage("botanu.run_id") -def botanu_use_case( +def botanu_workflow( name: str, - workflow: Optional[str] = None, *, + event_id: Union[str, Callable[..., str]], + customer_id: Union[str, Callable[..., str]], environment: Optional[str] = None, tenant_id: Optional[str] = None, auto_outcome_on_success: bool = True, @@ -66,33 +69,53 @@ def botanu_use_case( 5. On completion: emits ``run.completed`` event with outcome Args: - name: Use case name (low cardinality, e.g. ``"Customer Support"``). - workflow: Workflow name (defaults to function qualified name). + name: Workflow name (low cardinality, e.g. ``"Customer Support"``). + event_id: Business unit of work (e.g. ticket ID). Required. + Can be a static string or a callable that receives the same + ``(*args, **kwargs)`` as the decorated function and returns a string. + customer_id: End-customer being served (e.g. org ID). Required. + Can be a static string or a callable (same signature as *event_id*). environment: Deployment environment. tenant_id: Tenant identifier for multi-tenant apps. auto_outcome_on_success: Emit ``"success"`` if no exception. span_kind: OpenTelemetry span kind (default: ``SERVER``). - Example:: + Examples:: - @botanu_use_case("Customer Support") - async def handle_ticket(ticket_id: str): - result = await process_ticket(ticket_id) - emit_outcome("success", value_type="tickets_resolved", value_amount=1) - return result + # Static values (known at decoration time): + @botanu_workflow("Support", event_id="ticket-123", customer_id="acme-corp") + async def handle_ticket(): ... + + # Dynamic values (extracted from function arguments at call time): + @botanu_workflow( + "Support", + event_id=lambda request: request.workflow_id, + customer_id=lambda request: request.customer_id, + ) + async def handle_ticket(request: TicketRequest): ... """ + if isinstance(event_id, str) and not event_id: + raise ValueError("event_id is required and must be a non-empty string") + if isinstance(customer_id, str) and not customer_id: + raise ValueError("customer_id is required and must be a non-empty string") + if not callable(event_id) and not isinstance(event_id, str): + raise ValueError("event_id must be a non-empty string or a callable") + if not callable(customer_id) and not isinstance(customer_id, str): + raise ValueError("customer_id must be a non-empty string or a callable") def decorator(func: Callable[..., T]) -> Callable[..., T]: - workflow_name = workflow or func.__qualname__ workflow_version = _compute_workflow_version(func) is_async = inspect.iscoroutinefunction(func) @functools.wraps(func) async def async_wrapper(*args: Any, **kwargs: Any) -> T: + resolved_event_id = event_id(*args, **kwargs) if callable(event_id) else event_id + resolved_customer_id = customer_id(*args, **kwargs) if callable(customer_id) else customer_id parent_run_id = _get_parent_run_id() run_ctx = RunContext.create( - use_case=name, - workflow=workflow_name, + workflow=name, + event_id=resolved_event_id, + customer_id=resolved_customer_id, workflow_version=workflow_version, environment=environment, tenant_id=tenant_id, @@ -110,8 +133,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> T: "botanu.run.started", attributes={ "run_id": run_ctx.run_id, - "use_case": run_ctx.use_case, - "workflow": workflow_name, + "workflow": run_ctx.workflow, }, ) @@ -151,10 +173,13 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> T: @functools.wraps(func) def sync_wrapper(*args: Any, **kwargs: Any) -> T: + resolved_event_id = event_id(*args, **kwargs) if callable(event_id) else event_id + resolved_customer_id = customer_id(*args, **kwargs) if callable(customer_id) else customer_id parent_run_id = _get_parent_run_id() run_ctx = RunContext.create( - use_case=name, - workflow=workflow_name, + workflow=name, + event_id=resolved_event_id, + customer_id=resolved_customer_id, workflow_version=workflow_version, environment=environment, tenant_id=tenant_id, @@ -172,8 +197,7 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> T: "botanu.run.started", attributes={ "run_id": run_ctx.run_id, - "use_case": run_ctx.use_case, - "workflow": workflow_name, + "workflow": run_ctx.workflow, }, ) @@ -228,7 +252,7 @@ def _emit_run_completed( event_attrs: Dict[str, Union[str, float]] = { "run_id": run_ctx.run_id, - "use_case": run_ctx.use_case, + "workflow": run_ctx.workflow, "status": status.value, "duration_ms": duration_ms, } @@ -245,7 +269,7 @@ def _emit_run_completed( span.set_attribute("botanu.run.duration_ms", duration_ms) -use_case = botanu_use_case +workflow = botanu_workflow def botanu_outcome( @@ -255,8 +279,8 @@ def botanu_outcome( ) -> Callable[[Callable[..., T]], Callable[..., T]]: """Decorator to automatically emit outcomes based on function result. - This is a convenience decorator for sub-functions within a use case. - It does NOT create a new run — use ``@botanu_use_case`` for that. + This is a convenience decorator for sub-functions within a workflow. + It does NOT create a new run — use ``@botanu_workflow`` for that. """ from botanu.sdk.span_helpers import emit_outcome @@ -292,3 +316,92 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> T: return sync_wrapper # type: ignore[return-value] return decorator + + +@contextmanager +def run_botanu( + name: str, + *, + event_id: str, + customer_id: str, + environment: Optional[str] = None, + tenant_id: Optional[str] = None, + auto_outcome_on_success: bool = True, + span_kind: SpanKind = SpanKind.SERVER, +) -> Generator[RunContext, None, None]: + """Context manager to create a run span — non-decorator alternative to ``@botanu_workflow``. + + Use this when you can't decorate a function (dynamic workflows, simple scripts, + or when the workflow name is determined at runtime). + + Args: + name: Workflow name (low cardinality, e.g. ``"Customer Support"``). + event_id: Business unit of work (e.g. ticket ID). + customer_id: End-customer being served (e.g. org ID). + environment: Deployment environment. + tenant_id: Tenant identifier for multi-tenant apps. + auto_outcome_on_success: Emit ``"success"`` if no exception. + span_kind: OpenTelemetry span kind (default: ``SERVER``). + + Yields: + RunContext with the generated run_id and metadata. + + Example:: + + with run_botanu("Support", event_id="ticket-42", customer_id="acme") as run: + result = call_llm(...) + emit_outcome("success", value_type="tickets_resolved", value_amount=1) + """ + parent_run_id = _get_parent_run_id() + run_ctx = RunContext.create( + workflow=name, + event_id=event_id, + customer_id=customer_id, + environment=environment, + tenant_id=tenant_id, + parent_run_id=parent_run_id, + ) + + with tracer.start_as_current_span( + name=f"botanu.run/{name}", + kind=span_kind, + ) as span: + for key, value in run_ctx.to_span_attributes().items(): + span.set_attribute(key, value) + + span.add_event( + "botanu.run.started", + attributes={"run_id": run_ctx.run_id, "workflow": run_ctx.workflow}, + ) + + ctx = get_current() + for key, value in run_ctx.to_baggage_dict().items(): + ctx = otel_baggage.set_baggage(key, value, context=ctx) + baggage_token = attach(ctx) + + try: + yield run_ctx + + span_attrs = getattr(span, "attributes", None) + existing_outcome = ( + span_attrs.get("botanu.outcome.status") + if isinstance(span_attrs, Mapping) + else None + ) + + if existing_outcome is None and auto_outcome_on_success: + run_ctx.complete(RunStatus.SUCCESS) + + span.set_status(Status(StatusCode.OK)) + _emit_run_completed(span, run_ctx, RunStatus.SUCCESS) + + except Exception as exc: + span.set_status(Status(StatusCode.ERROR, str(exc))) + span.record_exception(exc) + run_ctx.complete(RunStatus.FAILURE, error_class=exc.__class__.__name__) + _emit_run_completed( + span, run_ctx, RunStatus.FAILURE, error_class=exc.__class__.__name__, + ) + raise + finally: + detach(baggage_token) diff --git a/src/botanu/sdk/middleware.py b/src/botanu/sdk/middleware.py index 83eb742..1f01175 100644 --- a/src/botanu/sdk/middleware.py +++ b/src/botanu/sdk/middleware.py @@ -37,8 +37,7 @@ class BotanuMiddleware(BaseHTTPMiddleware): FastAPIInstrumentor.instrument_app(app) app.add_middleware( BotanuMiddleware, - use_case="customer_support", - workflow="ticket_api", + workflow="customer_support", ) """ @@ -46,13 +45,11 @@ def __init__( self, app: object, *, - use_case: str, - workflow: Optional[str] = None, + workflow: str, auto_generate_run_id: bool = True, ) -> None: super().__init__(app) # type: ignore[arg-type] - self.use_case = use_case - self.workflow = workflow or use_case + self.workflow = workflow self.auto_generate_run_id = auto_generate_run_id async def dispatch(self, request: Request, call_next: object) -> Response: # type: ignore[override] @@ -66,9 +63,6 @@ async def dispatch(self, request: Request, call_next: object) -> Response: # ty if not run_id and self.auto_generate_run_id: run_id = str(uuid.uuid4()) - use_case = ( - otel_baggage.get_baggage("botanu.use_case") or request.headers.get("x-botanu-use-case") or self.use_case - ) workflow = ( otel_baggage.get_baggage("botanu.workflow") or request.headers.get("x-botanu-workflow") or self.workflow ) @@ -76,7 +70,6 @@ async def dispatch(self, request: Request, call_next: object) -> Response: # ty if run_id: span.set_attribute("botanu.run_id", run_id) - span.set_attribute("botanu.use_case", use_case) span.set_attribute("botanu.workflow", workflow) if customer_id: span.set_attribute("botanu.customer_id", customer_id) @@ -87,7 +80,6 @@ async def dispatch(self, request: Request, call_next: object) -> Response: # ty ctx = get_current() if run_id: ctx = otel_baggage.set_baggage("botanu.run_id", run_id, context=ctx) - ctx = otel_baggage.set_baggage("botanu.use_case", use_case, context=ctx) ctx = otel_baggage.set_baggage("botanu.workflow", workflow, context=ctx) if customer_id: ctx = otel_baggage.set_baggage("botanu.customer_id", customer_id, context=ctx) @@ -100,7 +92,6 @@ async def dispatch(self, request: Request, call_next: object) -> Response: # ty if run_id: response.headers["x-botanu-run-id"] = run_id - response.headers["x-botanu-use-case"] = use_case response.headers["x-botanu-workflow"] = workflow return response diff --git a/src/botanu/sdk/span_helpers.py b/src/botanu/sdk/span_helpers.py index f7388ff..2d1889c 100644 --- a/src/botanu/sdk/span_helpers.py +++ b/src/botanu/sdk/span_helpers.py @@ -8,10 +8,19 @@ from __future__ import annotations +import logging from typing import Optional from opentelemetry import trace +from botanu.sdk.context import get_baggage + +logger = logging.getLogger(__name__) + +VALID_OUTCOME_STATUSES = { + "success", "partial", "failed", "timeout", "canceled", "abandoned", +} + def emit_outcome( status: str, @@ -20,23 +29,38 @@ def emit_outcome( value_amount: Optional[float] = None, confidence: Optional[float] = None, reason: Optional[str] = None, + error_type: Optional[str] = None, + metadata: Optional[dict[str, str]] = None, ) -> None: """Emit an outcome for the current span. Sets span attributes for outcome tracking and ROI calculation. + Also emits an OTel log record to trigger collector flush. Args: - status: Outcome status (``"success"``, ``"partial"``, ``"failed"``). + status: Outcome status. Must be one of ``"success"``, ``"partial"``, + ``"failed"``, ``"timeout"``, ``"canceled"``, ``"abandoned"``. value_type: Type of business value (e.g., ``"tickets_resolved"``). value_amount: Quantified value amount. confidence: Confidence score (0.0–1.0). reason: Optional reason for the outcome. + error_type: Error classification (e.g., ``"ValidationError"``). + metadata: Additional key-value metadata to attach to the outcome. + + Raises: + ValueError: If *status* is not a recognised outcome status. Example:: >>> emit_outcome("success", value_type="tickets_resolved", value_amount=1) - >>> emit_outcome("failed", reason="missing_context") + >>> emit_outcome("failed", error_type="TimeoutError", reason="LLM took >30s") """ + if status not in VALID_OUTCOME_STATUSES: + raise ValueError( + f"Invalid outcome status '{status}'. " + f"Must be one of: {', '.join(sorted(VALID_OUTCOME_STATUSES))}" + ) + span = trace.get_current_span() span.set_attribute("botanu.outcome", status) @@ -53,14 +77,41 @@ def emit_outcome( if reason: span.set_attribute("botanu.outcome.reason", reason) + if error_type: + span.set_attribute("botanu.outcome.error_type", error_type) + + if metadata: + for key, value in metadata.items(): + span.set_attribute(f"botanu.outcome.metadata.{key}", value) + event_attrs: dict[str, object] = {"status": status} if value_type: event_attrs["value_type"] = value_type if value_amount is not None: event_attrs["value_amount"] = value_amount + if error_type: + event_attrs["error_type"] = error_type span.add_event("botanu.outcome_emitted", event_attrs) + # Emit OTel log record for collector flush trigger + event_id = get_baggage("botanu.event_id") + if event_id: + try: + from opentelemetry._logs import get_logger_provider + + logger_provider = get_logger_provider() + otel_logger = logger_provider.get_logger("botanu.outcome") + otel_logger.emit( + body=f"outcome:{status}", + attributes={ + "botanu.event_id": event_id, + "botanu.outcome.status": status, + }, + ) + except Exception: + pass # Don't break user's code if logs not configured + def set_business_context( *, diff --git a/src/botanu/tracking/__init__.py b/src/botanu/tracking/__init__.py index 5933aa6..3135ce8 100644 --- a/src/botanu/tracking/__init__.py +++ b/src/botanu/tracking/__init__.py @@ -6,7 +6,6 @@ Provides tracking for different operation types: - LLM/GenAI model calls - Database, storage, and messaging operations -- Attempt ledger for durable cost tracking """ from __future__ import annotations @@ -21,17 +20,6 @@ track_messaging_operation, track_storage_operation, ) -from botanu.tracking.ledger import ( - AttemptLedger, - AttemptStatus, - LedgerEventType, - get_ledger, - record_attempt_ended, - record_attempt_started, - record_llm_attempted, - record_tool_attempted, - set_ledger, -) from botanu.tracking.llm import ( BotanuAttributes, GenAIAttributes, @@ -64,14 +52,4 @@ "DBOperation", "StorageOperation", "MessagingOperation", - # Attempt ledger - "AttemptLedger", - "get_ledger", - "set_ledger", - "record_attempt_started", - "record_attempt_ended", - "record_llm_attempted", - "record_tool_attempted", - "LedgerEventType", - "AttemptStatus", ] diff --git a/src/botanu/tracking/ledger.py b/src/botanu/tracking/ledger.py deleted file mode 100644 index 3fe982a..0000000 --- a/src/botanu/tracking/ledger.py +++ /dev/null @@ -1,420 +0,0 @@ -# SPDX-FileCopyrightText: 2026 The Botanu Authors -# SPDX-License-Identifier: Apache-2.0 - -"""Attempt Ledger — durable event log for invisible cost tracking. - -An append-only event log that is NEVER sampled and survives crashes. -Uses OTel Logs API to emit structured events. - -Event Types: -- ``attempt.started``: Run/attempt began -- ``llm.attempted``: LLM call attempt (with tokens, cost) -- ``tool.attempted``: Tool execution attempt -- ``attempt.ended``: Run/attempt completed -- ``cancellation.requested``: Cancellation was requested -- ``zombie.detected``: Work continued after timeout -""" - -from __future__ import annotations - -import logging -import os -import time -from dataclasses import dataclass, field -from enum import Enum -from functools import lru_cache -from typing import Any, Dict, Optional - -from opentelemetry import trace - -logger = logging.getLogger(__name__) - - -class LedgerEventType(str, Enum): - ATTEMPT_STARTED = "attempt.started" - ATTEMPT_ENDED = "attempt.ended" - LLM_ATTEMPTED = "llm.attempted" - TOOL_ATTEMPTED = "tool.attempted" - CANCEL_REQUESTED = "cancellation.requested" - CANCEL_ACKNOWLEDGED = "cancellation.acknowledged" - ZOMBIE_DETECTED = "zombie.detected" - REDELIVERY_DETECTED = "redelivery.detected" - - -class AttemptStatus(str, Enum): - SUCCESS = "success" - ERROR = "error" - TIMEOUT = "timeout" - CANCELLED = "cancelled" - RATE_LIMITED = "rate_limited" - - -@dataclass -class AttemptLedger: - """Durable event ledger for cost tracking. - - Emits structured log records that are never sampled, providing a - reliable source of truth for attempt counts, token costs, and zombie work. - """ - - service_name: str = field( - default_factory=lambda: os.getenv("OTEL_SERVICE_NAME", "unknown"), - ) - otlp_endpoint: Optional[str] = field(default=None) - _logger: Any = field(default=None, init=False, repr=False) - _initialized: bool = field(default=False, init=False) - - def __post_init__(self) -> None: - self._initialize_logger() - - def _initialize_logger(self) -> None: - try: - from opentelemetry._logs import get_logger_provider, set_logger_provider - from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter - from opentelemetry.sdk._logs import LoggerProvider - from opentelemetry.sdk._logs.export import BatchLogRecordProcessor - - provider = get_logger_provider() - - endpoint = self.otlp_endpoint - if not endpoint: - traces_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") - if traces_endpoint: - endpoint = f"{traces_endpoint.rstrip('/')}/v1/logs" - else: - endpoint = "http://localhost:4318/v1/logs" - - if provider is None or not hasattr(provider, "get_logger"): - new_provider = LoggerProvider() - exporter = OTLPLogExporter(endpoint=endpoint) - new_provider.add_log_record_processor(BatchLogRecordProcessor(exporter)) - set_logger_provider(new_provider) - provider = new_provider - - self._logger = provider.get_logger("botanu.attempt_ledger") - self._initialized = True - logger.debug("AttemptLedger initialized with endpoint: %s", endpoint) - - except Exception as exc: - logger.warning("Failed to initialize AttemptLedger: %s", exc) - self._initialized = False - - def _get_trace_context(self) -> Dict[str, str]: - span = trace.get_current_span() - ctx = span.get_span_context() if span else None - if ctx and ctx.is_valid: - return { - "trace_id": format(ctx.trace_id, "032x"), - "span_id": format(ctx.span_id, "016x"), - } - return {} - - def _emit( - self, - event_type: LedgerEventType, - severity: Any, - attributes: Dict[str, Any], - ) -> None: - if not self._initialized or not self._logger: - return - - try: - from opentelemetry.sdk._logs import LogRecord - - attrs = { - "event.name": event_type.value, - "service.name": self.service_name, - "timestamp_ms": int(time.time() * 1000), - **self._get_trace_context(), - **attributes, - } - - self._logger.emit( - LogRecord( - timestamp=int(time.time_ns()), - severity_number=severity, - severity_text=severity.name, - body=event_type.value, - attributes=attrs, - ) - ) - except Exception as exc: - logger.debug("Failed to emit ledger event: %s", exc) - - # ----------------------------------------------------------------- - # Attempt Lifecycle - # ----------------------------------------------------------------- - - def attempt_started( - self, - run_id: str, - use_case: str, - attempt: int = 1, - root_run_id: Optional[str] = None, - workflow: Optional[str] = None, - tenant_id: Optional[str] = None, - deadline_ts: Optional[float] = None, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.ATTEMPT_STARTED, - SeverityNumber.INFO, - { - "botanu.run_id": run_id, - "botanu.use_case": use_case, - "botanu.attempt": attempt, - "botanu.root_run_id": root_run_id or run_id, - "botanu.workflow": workflow, - "botanu.tenant_id": tenant_id, - "botanu.deadline_ts": deadline_ts, - }, - ) - - def attempt_ended( - self, - run_id: str, - status: str, - duration_ms: Optional[float] = None, - error_class: Optional[str] = None, - reason_code: Optional[str] = None, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.ATTEMPT_ENDED, - SeverityNumber.INFO if status == "success" else SeverityNumber.WARN, - { - "botanu.run_id": run_id, - "status": status, - "duration_ms": duration_ms, - "error_class": error_class, - "reason_code": reason_code, - }, - ) - - # ----------------------------------------------------------------- - # LLM Attempt Events - # ----------------------------------------------------------------- - - def llm_attempted( - self, - run_id: str, - provider: str, - model: str, - operation: str = "chat", - attempt_number: int = 1, - input_tokens: int = 0, - output_tokens: int = 0, - cached_tokens: int = 0, - duration_ms: Optional[float] = None, - status: str = "success", - error_class: Optional[str] = None, - provider_request_id: Optional[str] = None, - estimated_cost_usd: Optional[float] = None, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.LLM_ATTEMPTED, - SeverityNumber.INFO if status == "success" else SeverityNumber.WARN, - { - "botanu.run_id": run_id, - "gen_ai.provider.name": provider, - "gen_ai.request.model": model, - "gen_ai.operation.name": operation, - "botanu.attempt": attempt_number, - "gen_ai.usage.input_tokens": input_tokens, - "gen_ai.usage.output_tokens": output_tokens, - "botanu.usage.cached_tokens": cached_tokens, - "duration_ms": duration_ms, - "status": status, - "error_class": error_class, - "gen_ai.response.id": provider_request_id, - "botanu.cost.estimated_usd": estimated_cost_usd, - }, - ) - - def tool_attempted( - self, - run_id: str, - tool_name: str, - tool_call_id: Optional[str] = None, - attempt_number: int = 1, - duration_ms: Optional[float] = None, - status: str = "success", - error_class: Optional[str] = None, - items_returned: int = 0, - bytes_processed: int = 0, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.TOOL_ATTEMPTED, - SeverityNumber.INFO if status == "success" else SeverityNumber.WARN, - { - "botanu.run_id": run_id, - "gen_ai.tool.name": tool_name, - "gen_ai.tool.call.id": tool_call_id, - "botanu.attempt": attempt_number, - "duration_ms": duration_ms, - "status": status, - "error_class": error_class, - "items_returned": items_returned, - "bytes_processed": bytes_processed, - }, - ) - - # ----------------------------------------------------------------- - # Cancellation & Zombie Detection - # ----------------------------------------------------------------- - - def cancel_requested( - self, - run_id: str, - reason: str = "user", - requested_at_ms: Optional[float] = None, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.CANCEL_REQUESTED, - SeverityNumber.WARN, - { - "botanu.run_id": run_id, - "cancellation.reason": reason, - "cancellation.requested_at_ms": requested_at_ms or int(time.time() * 1000), - }, - ) - - def cancel_acknowledged( - self, - run_id: str, - acknowledged_by: str, - latency_ms: Optional[float] = None, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.CANCEL_ACKNOWLEDGED, - SeverityNumber.INFO, - { - "botanu.run_id": run_id, - "cancellation.acknowledged_by": acknowledged_by, - "cancellation.latency_ms": latency_ms, - }, - ) - - def zombie_detected( - self, - run_id: str, - deadline_ts: float, - actual_end_ts: float, - zombie_duration_ms: float, - component: str, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.ZOMBIE_DETECTED, - SeverityNumber.ERROR, - { - "botanu.run_id": run_id, - "deadline_ts": deadline_ts, - "actual_end_ts": actual_end_ts, - "zombie_duration_ms": zombie_duration_ms, - "zombie_component": component, - }, - ) - - def redelivery_detected( - self, - run_id: str, - queue_name: str, - delivery_count: int, - original_message_id: Optional[str] = None, - ) -> None: - from opentelemetry._logs import SeverityNumber - - self._emit( - LedgerEventType.REDELIVERY_DETECTED, - SeverityNumber.WARN, - { - "botanu.run_id": run_id, - "queue.name": queue_name, - "delivery_count": delivery_count, - "original_message_id": original_message_id, - }, - ) - - # ----------------------------------------------------------------- - # Lifecycle - # ----------------------------------------------------------------- - - def flush(self, timeout_ms: int = 5000) -> bool: - if not self._initialized: - return True - try: - from opentelemetry._logs import get_logger_provider - - provider = get_logger_provider() - if hasattr(provider, "force_flush"): - return provider.force_flush(timeout_ms) - return True - except Exception as exc: - logger.debug("Failed to flush AttemptLedger: %s", exc) - return False - - def shutdown(self) -> None: - if not self._initialized: - return - try: - from opentelemetry._logs import get_logger_provider - - provider = get_logger_provider() - if hasattr(provider, "shutdown"): - provider.shutdown() - except Exception as exc: - logger.debug("Failed to shutdown AttemptLedger: %s", exc) - - -# ========================================================================= -# Global ledger -# ========================================================================= - -_global_ledger: Optional[AttemptLedger] = None - - -@lru_cache(maxsize=1) -def _create_default_ledger() -> AttemptLedger: - """Create default ledger instance (thread-safe via lru_cache).""" - return AttemptLedger() - - -def get_ledger() -> AttemptLedger: - """Get the global attempt ledger instance (thread-safe).""" - if _global_ledger is not None: - return _global_ledger - return _create_default_ledger() - - -def set_ledger(ledger: AttemptLedger) -> None: - """Set the global attempt ledger instance.""" - global _global_ledger - _global_ledger = ledger - - -def record_attempt_started(**kwargs: Any) -> None: - get_ledger().attempt_started(**kwargs) - - -def record_attempt_ended(**kwargs: Any) -> None: - get_ledger().attempt_ended(**kwargs) - - -def record_llm_attempted(**kwargs: Any) -> None: - get_ledger().llm_attempted(**kwargs) - - -def record_tool_attempted(**kwargs: Any) -> None: - get_ledger().tool_attempted(**kwargs) diff --git a/src/botanu/tracking/llm.py b/src/botanu/tracking/llm.py index 9ddccc4..67c0182 100644 --- a/src/botanu/tracking/llm.py +++ b/src/botanu/tracking/llm.py @@ -10,7 +10,7 @@ from botanu.tracking.llm import track_llm_call, track_tool_call - with track_llm_call(provider="openai", model="gpt-4") as tracker: + with track_llm_call(vendor="openai", model="gpt-4") as tracker: response = openai.chat.completions.create(...) tracker.set_tokens( input_tokens=response.usage.prompt_tokens, @@ -21,6 +21,7 @@ from __future__ import annotations +import contextvars import functools from contextlib import contextmanager from dataclasses import dataclass, field @@ -30,6 +31,12 @@ from opentelemetry import metrics, trace from opentelemetry.trace import Span, SpanKind, Status, StatusCode +# Context variable for automatic retry detection (set by tenacity integration). +# Default 0 means "not set by retry callback"; 1+ means the attempt number. +_retry_attempt: contextvars.ContextVar[int] = contextvars.ContextVar( + "botanu_retry_attempt", default=0 +) + # ========================================================================= # OTel GenAI Semantic Convention Attribute Names # ========================================================================= @@ -60,8 +67,8 @@ class GenAIAttributes: class BotanuAttributes: """Botanu-specific attributes for cost attribution.""" - PROVIDER_REQUEST_ID = "botanu.provider.request_id" - CLIENT_REQUEST_ID = "botanu.provider.client_request_id" + VENDOR_REQUEST_ID = "botanu.vendor.request_id" + VENDOR_CLIENT_REQUEST_ID = "botanu.vendor.client_request_id" TOKENS_CACHED = "botanu.usage.cached_tokens" TOKENS_CACHED_READ = "botanu.usage.cache_read_tokens" TOKENS_CACHED_WRITE = "botanu.usage.cache_write_tokens" @@ -76,10 +83,10 @@ class BotanuAttributes: # ========================================================================= -# Provider name mapping +# Vendor name normalization # ========================================================================= -LLM_PROVIDERS: Dict[str, str] = { +LLM_VENDORS: Dict[str, str] = { "openai": "openai", "azure_openai": "azure.openai", "azure-openai": "azure.openai", @@ -160,7 +167,7 @@ class ModelOperation: def _record_token_metrics( - provider: str, + vendor: str, model: str, operation: str, input_tokens: int, @@ -169,7 +176,7 @@ def _record_token_metrics( ) -> None: base_attrs: Dict[str, str] = { GenAIAttributes.OPERATION_NAME: operation, - GenAIAttributes.PROVIDER_NAME: provider, + GenAIAttributes.PROVIDER_NAME: vendor, GenAIAttributes.REQUEST_MODEL: model, } if error_type: @@ -188,7 +195,7 @@ def _record_token_metrics( def _record_duration_metric( - provider: str, + vendor: str, model: str, operation: str, duration_seconds: float, @@ -196,7 +203,7 @@ def _record_duration_metric( ) -> None: attrs: Dict[str, str] = { GenAIAttributes.OPERATION_NAME: operation, - GenAIAttributes.PROVIDER_NAME: provider, + GenAIAttributes.PROVIDER_NAME: vendor, GenAIAttributes.REQUEST_MODEL: model, } if error_type: @@ -214,7 +221,7 @@ def _record_duration_metric( class LLMTracker: """Context manager for tracking LLM calls with OTel GenAI semconv.""" - provider: str + vendor: str model: str operation: str = ModelOperation.CHAT span: Optional[Span] = field(default=None, repr=False) @@ -226,7 +233,7 @@ class LLMTracker: cache_read_tokens: int = 0 cache_write_tokens: int = 0 - provider_request_id: Optional[str] = None + vendor_request_id: Optional[str] = None client_request_id: Optional[str] = None response_model: Optional[str] = None finish_reason: Optional[str] = None @@ -263,19 +270,19 @@ def set_tokens( def set_request_id( self, - provider_request_id: Optional[str] = None, + vendor_request_id: Optional[str] = None, client_request_id: Optional[str] = None, ) -> LLMTracker: - """Set provider request IDs for billing reconciliation.""" - if provider_request_id: - self.provider_request_id = provider_request_id + """Set vendor request IDs for billing reconciliation.""" + if vendor_request_id: + self.vendor_request_id = vendor_request_id if self.span: - self.span.set_attribute(GenAIAttributes.RESPONSE_ID, provider_request_id) - self.span.set_attribute(BotanuAttributes.PROVIDER_REQUEST_ID, provider_request_id) + self.span.set_attribute(GenAIAttributes.RESPONSE_ID, vendor_request_id) + self.span.set_attribute(BotanuAttributes.VENDOR_REQUEST_ID, vendor_request_id) if client_request_id: self.client_request_id = client_request_id if self.span: - self.span.set_attribute(BotanuAttributes.CLIENT_REQUEST_ID, client_request_id) + self.span.set_attribute(BotanuAttributes.VENDOR_CLIENT_REQUEST_ID, client_request_id) return self def set_response_model(self, model: str) -> LLMTracker: @@ -362,7 +369,7 @@ def _finalize(self) -> None: duration_seconds = (datetime.now(timezone.utc) - self.start_time).total_seconds() _record_token_metrics( - provider=self.provider, + vendor=self.vendor, model=self.model, operation=self.operation, input_tokens=self.input_tokens, @@ -370,7 +377,7 @@ def _finalize(self) -> None: error_type=self.error_type, ) _record_duration_metric( - provider=self.provider, + vendor=self.vendor, model=self.model, operation=self.operation, duration_seconds=duration_seconds, @@ -379,7 +386,7 @@ def _finalize(self) -> None: _attempt_counter.add( 1, { - GenAIAttributes.PROVIDER_NAME: self.provider, + GenAIAttributes.PROVIDER_NAME: self.vendor, GenAIAttributes.REQUEST_MODEL: self.model, GenAIAttributes.OPERATION_NAME: self.operation, "status": "error" if self.error_type else "success", @@ -389,7 +396,7 @@ def _finalize(self) -> None: @contextmanager def track_llm_call( - provider: str, + vendor: str, model: str, operation: str = ModelOperation.CHAT, client_request_id: Optional[str] = None, @@ -398,7 +405,7 @@ def track_llm_call( """Context manager for tracking LLM/model calls with OTel GenAI semconv. Args: - provider: LLM provider (openai, anthropic, bedrock, vertex, …). + vendor: LLM vendor (openai, anthropic, bedrock, vertex, …). model: Model name/ID (gpt-4, claude-3-opus, …). operation: Type of operation (chat, embeddings, text_completion, …). client_request_id: Optional client-generated request ID. @@ -408,21 +415,21 @@ def track_llm_call( :class:`LLMTracker` instance. """ tracer = trace.get_tracer("botanu.gen_ai") - normalized_provider = LLM_PROVIDERS.get(provider.lower(), provider.lower()) + normalized_vendor = LLM_VENDORS.get(vendor.lower(), vendor.lower()) span_name = f"{operation} {model}" with tracer.start_as_current_span(name=span_name, kind=SpanKind.CLIENT) as span: span.set_attribute(GenAIAttributes.OPERATION_NAME, operation) - span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized_provider) + span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized_vendor) span.set_attribute(GenAIAttributes.REQUEST_MODEL, model) - span.set_attribute(BotanuAttributes.VENDOR, normalized_provider) + span.set_attribute(BotanuAttributes.VENDOR, normalized_vendor) for key, value in kwargs.items(): attr_key = key if key.startswith(("botanu.", "gen_ai.")) else f"botanu.{key}" span.set_attribute(attr_key, value) tracker = LLMTracker( - provider=normalized_provider, + vendor=normalized_vendor, model=model, operation=operation, span=span, @@ -430,6 +437,11 @@ def track_llm_call( if client_request_id: tracker.set_request_id(client_request_id=client_request_id) + # Auto-detect retry attempt from tenacity integration. + ctx_attempt = _retry_attempt.get() + if ctx_attempt > 0: + tracker.set_attempt(ctx_attempt) + try: yield tracker except Exception as exc: @@ -462,7 +474,7 @@ class ToolTracker: tool_name: str tool_call_id: Optional[str] = None - provider: Optional[str] = None + vendor: Optional[str] = None span: Optional[Span] = field(default=None, repr=False) start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) @@ -524,8 +536,8 @@ def _finalize(self) -> None: GenAIAttributes.TOOL_NAME: self.tool_name, "status": "error" if self.error_type else "success", } - if self.provider: - attrs[GenAIAttributes.PROVIDER_NAME] = self.provider + if self.vendor: + attrs[GenAIAttributes.PROVIDER_NAME] = self.vendor _tool_duration_histogram.record(duration_seconds, attrs) _tool_counter.add(1, attrs) @@ -535,7 +547,7 @@ def _finalize(self) -> None: def track_tool_call( tool_name: str, tool_call_id: Optional[str] = None, - provider: Optional[str] = None, + vendor: Optional[str] = None, **kwargs: Any, ) -> Generator[ToolTracker, None, None]: """Context manager for tracking tool/function calls. @@ -543,7 +555,7 @@ def track_tool_call( Args: tool_name: Name of the tool/function. tool_call_id: Tool call ID from the LLM response. - provider: Tool provider if external (e.g., ``"tavily"``). + vendor: Tool vendor if external (e.g., ``"tavily"``). **kwargs: Additional span attributes. Yields: @@ -558,8 +570,8 @@ def track_tool_call( if tool_call_id: span.set_attribute(GenAIAttributes.TOOL_CALL_ID, tool_call_id) - if provider: - normalized = LLM_PROVIDERS.get(provider.lower(), provider.lower()) + if vendor: + normalized = LLM_VENDORS.get(vendor.lower(), vendor.lower()) span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized) span.set_attribute(BotanuAttributes.VENDOR, normalized) @@ -570,7 +582,7 @@ def track_tool_call( tracker = ToolTracker( tool_name=tool_name, tool_call_id=tool_call_id, - provider=provider, + vendor=vendor, span=span, ) @@ -589,14 +601,14 @@ def track_tool_call( def set_llm_attributes( - provider: str, + vendor: str, model: str, operation: str = ModelOperation.CHAT, input_tokens: int = 0, output_tokens: int = 0, cached_tokens: int = 0, streaming: bool = False, - provider_request_id: Optional[str] = None, + vendor_request_id: Optional[str] = None, span: Optional[Span] = None, ) -> None: """Set LLM attributes on the current span using OTel GenAI semconv.""" @@ -604,12 +616,12 @@ def set_llm_attributes( if not target_span or not target_span.is_recording(): return - normalized_provider = LLM_PROVIDERS.get(provider.lower(), provider.lower()) + normalized_vendor = LLM_VENDORS.get(vendor.lower(), vendor.lower()) target_span.set_attribute(GenAIAttributes.OPERATION_NAME, operation) - target_span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized_provider) + target_span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized_vendor) target_span.set_attribute(GenAIAttributes.REQUEST_MODEL, model) - target_span.set_attribute(BotanuAttributes.VENDOR, normalized_provider) + target_span.set_attribute(BotanuAttributes.VENDOR, normalized_vendor) if input_tokens > 0: target_span.set_attribute(GenAIAttributes.USAGE_INPUT_TOKENS, input_tokens) @@ -619,12 +631,12 @@ def set_llm_attributes( target_span.set_attribute(BotanuAttributes.TOKENS_CACHED, cached_tokens) if streaming: target_span.set_attribute(BotanuAttributes.STREAMING, True) - if provider_request_id: - target_span.set_attribute(GenAIAttributes.RESPONSE_ID, provider_request_id) - target_span.set_attribute(BotanuAttributes.PROVIDER_REQUEST_ID, provider_request_id) + if vendor_request_id: + target_span.set_attribute(GenAIAttributes.RESPONSE_ID, vendor_request_id) + target_span.set_attribute(BotanuAttributes.VENDOR_REQUEST_ID, vendor_request_id) _record_token_metrics( - provider=normalized_provider, + vendor=normalized_vendor, model=model, operation=operation, input_tokens=input_tokens, @@ -651,14 +663,14 @@ def set_token_usage( def llm_instrumented( - provider: str, + vendor: str, model_param: str = "model", tokens_from_response: bool = True, ) -> Any: """Decorator to auto-instrument LLM client methods. Args: - provider: LLM provider name. + vendor: LLM vendor name. model_param: Name of the parameter containing the model name. tokens_from_response: Whether to extract tokens from ``response.usage``. """ @@ -668,7 +680,7 @@ def decorator(func: Any) -> Any: def wrapper(*args: Any, **kwargs: Any) -> Any: model = kwargs.get(model_param) or (args[1] if len(args) > 1 else "unknown") - with track_llm_call(provider, model) as tracker: + with track_llm_call(vendor, model) as tracker: if kwargs.get("stream"): tracker.set_streaming(True) diff --git a/tests/conftest.py b/tests/conftest.py index 202e424..4cd5ad8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,9 @@ import pytest from opentelemetry import trace +from opentelemetry._logs import set_logger_provider +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import SimpleLogRecordProcessor, InMemoryLogExporter from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter @@ -16,6 +19,13 @@ _provider: TracerProvider = None _exporter: InMemorySpanExporter = None +# Log provider/exporter — set eagerly at module level before any code accesses +# get_logger_provider(), because OTel only allows set_logger_provider() once. +_log_exporter = InMemoryLogExporter() +_log_provider = LoggerProvider() +_log_provider.add_log_record_processor(SimpleLogRecordProcessor(_log_exporter)) +set_logger_provider(_log_provider) + def _get_or_create_provider() -> tuple[TracerProvider, InMemorySpanExporter]: """Get or create the global test provider.""" @@ -57,3 +67,10 @@ def memory_exporter(): def tracer(tracer_provider): """Get a tracer instance.""" return trace.get_tracer("test-tracer") + + +@pytest.fixture +def log_exporter(): + """Get the in-memory log exporter for testing.""" + _log_exporter.clear() + return _log_exporter diff --git a/tests/unit/test_bootstrap.py b/tests/unit/test_bootstrap.py index ac0a2c9..378939f 100644 --- a/tests/unit/test_bootstrap.py +++ b/tests/unit/test_bootstrap.py @@ -656,15 +656,15 @@ def test_conftest_uses_always_on(self): # --------------------------------------------------------------------------- -class TestProviderReuse: - """Botanu must not create a second TracerProvider if one already exists.""" +class TestProviderCreation: + """Botanu must always create a fresh TracerProvider.""" - def test_reuse_existing_provider_code_path(self): - """Bootstrap source must check for existing TracerProvider.""" + def test_creates_new_provider(self): + """Bootstrap source must create a new TracerProvider.""" import inspect from botanu.sdk import bootstrap source = inspect.getsource(bootstrap.enable) - assert "get_tracer_provider" in source, "enable() must check for existing TracerProvider" - assert "isinstance" in source, "enable() must use isinstance to check provider type" + assert "TracerProvider(" in source, "enable() must create a new TracerProvider" + assert "set_tracer_provider" in source, "enable() must call set_tracer_provider" diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index 91feca4..6fdcbd6 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -11,7 +11,6 @@ get_baggage, get_current_span, get_run_id, - get_use_case, get_workflow, set_baggage, ) @@ -42,10 +41,6 @@ def test_get_run_id_not_set(self): # Result could be None or a previously set value assert result is None or isinstance(result, str) - def test_get_use_case(self): - set_baggage("botanu.use_case", "Customer Support") - assert get_use_case() == "Customer Support" - def test_get_workflow(self): set_baggage("botanu.workflow", "ticket_handler") assert get_workflow() == "ticket_handler" diff --git a/tests/unit/test_decorators.py b/tests/unit/test_decorators.py index b63f906..e0676d4 100644 --- a/tests/unit/test_decorators.py +++ b/tests/unit/test_decorators.py @@ -10,7 +10,7 @@ from opentelemetry import context as otel_context from opentelemetry.context import get_current -from botanu.sdk.decorators import botanu_outcome, botanu_use_case +from botanu.sdk.decorators import botanu_outcome, botanu_workflow @pytest.fixture(autouse=True) @@ -21,11 +21,11 @@ def _clean_otel_context(): otel_context.detach(token) -class TestBotanuUseCaseDecorator: - """Tests for @botanu_use_case decorator.""" +class TestBotanuWorkflowDecorator: + """Tests for @botanu_workflow decorator.""" def test_sync_function_creates_span(self, memory_exporter): - @botanu_use_case("Test Use Case") + @botanu_workflow("Test Workflow", event_id="evt-1", customer_id="cust-1") def my_function(): return "result" @@ -34,10 +34,10 @@ def my_function(): assert result == "result" spans = memory_exporter.get_finished_spans() assert len(spans) == 1 - assert spans[0].name == "botanu.run/Test Use Case" + assert spans[0].name == "botanu.run/Test Workflow" def test_span_has_run_attributes(self, memory_exporter): - @botanu_use_case("Customer Support", workflow="handle_ticket") + @botanu_workflow("Customer Support", event_id="ticket-42", customer_id="bigretail") def my_function(): return "done" @@ -47,11 +47,12 @@ def my_function(): attrs = dict(spans[0].attributes) assert "botanu.run_id" in attrs - assert attrs["botanu.use_case"] == "Customer Support" - assert attrs["botanu.workflow"] == "handle_ticket" + assert attrs["botanu.workflow"] == "Customer Support" + assert attrs["botanu.event_id"] == "ticket-42" + assert attrs["botanu.customer_id"] == "bigretail" def test_emits_started_event(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def my_function(): pass @@ -64,7 +65,7 @@ def my_function(): assert len(started_events) == 1 def test_emits_completed_event(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def my_function(): return "done" @@ -78,7 +79,7 @@ def my_function(): assert completed_events[0].attributes["status"] == "success" def test_records_exception_on_failure(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def failing_function(): raise ValueError("test error") @@ -96,7 +97,7 @@ def failing_function(): @pytest.mark.asyncio async def test_async_function_creates_span(self, memory_exporter): - @botanu_use_case("Async Test") + @botanu_workflow("Async Test", event_id="evt-1", customer_id="cust-1") async def async_function(): return "async result" @@ -109,7 +110,7 @@ async def async_function(): @pytest.mark.asyncio async def test_async_exception_handling(self, memory_exporter): - @botanu_use_case("Async Test") + @botanu_workflow("Async Test", event_id="evt-1", customer_id="cust-1") async def failing_async(): raise RuntimeError("async error") @@ -122,7 +123,7 @@ async def failing_async(): assert completed_events[0].attributes["status"] == "failure" def test_workflow_version_computed(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def versioned_function(): return "versioned" @@ -135,7 +136,7 @@ def versioned_function(): assert attrs["botanu.workflow.version"].startswith("v:") def test_return_value_preserved(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def returns_dict(): return {"key": "value", "count": 42} @@ -144,7 +145,7 @@ def returns_dict(): @pytest.mark.asyncio async def test_async_return_value_preserved(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") async def returns_data(): return [1, 2, 3] @@ -152,7 +153,7 @@ async def returns_data(): assert result == [1, 2, 3] def test_exception_re_raised(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def raises(): raise TypeError("bad type") @@ -160,7 +161,7 @@ def raises(): raises() def test_outcome_status_set_on_success(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def my_fn(): return "ok" @@ -170,7 +171,7 @@ def my_fn(): assert attrs["botanu.outcome.status"] == "success" def test_outcome_status_set_on_failure(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def failing(): raise RuntimeError("boom") @@ -182,7 +183,7 @@ def failing(): assert attrs["botanu.outcome.status"] == "failure" def test_duration_ms_recorded(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def quick_fn(): return "done" @@ -195,7 +196,7 @@ def quick_fn(): def test_custom_span_kind(self, memory_exporter): from opentelemetry.trace import SpanKind - @botanu_use_case("Test", span_kind=SpanKind.CLIENT) + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1", span_kind=SpanKind.CLIENT) def client_fn(): return "ok" @@ -204,7 +205,7 @@ def client_fn(): assert spans[0].kind == SpanKind.CLIENT def test_root_run_id_equals_run_id_for_root(self, memory_exporter): - @botanu_use_case("Test") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1") def root_fn(): return "root" @@ -215,7 +216,7 @@ def root_fn(): assert attrs["botanu.root_run_id"] == attrs["botanu.run_id"] def test_tenant_id_propagated(self, memory_exporter): - @botanu_use_case("Test", tenant_id="tenant-abc") + @botanu_workflow("Test", event_id="evt-1", customer_id="cust-1", tenant_id="tenant-abc") def tenant_fn(): return "ok" @@ -227,7 +228,7 @@ def tenant_fn(): def test_baggage_cleaned_up_after_sync(self, memory_exporter): """Verify baggage does NOT leak after the decorated function completes.""" - @botanu_use_case("Leak Test") + @botanu_workflow("Leak Test", event_id="evt-1", customer_id="cust-1") def my_fn(): # Inside the function, baggage should be set assert baggage.get_baggage("botanu.run_id", get_current()) is not None @@ -245,7 +246,7 @@ def my_fn(): async def test_baggage_cleaned_up_after_async(self, memory_exporter): """Verify baggage does NOT leak after an async decorated function.""" - @botanu_use_case("Async Leak Test") + @botanu_workflow("Async Leak Test", event_id="evt-1", customer_id="cust-1") async def my_fn(): assert baggage.get_baggage("botanu.run_id", get_current()) is not None return "ok" @@ -259,7 +260,7 @@ async def my_fn(): def test_baggage_cleaned_up_after_exception(self, memory_exporter): """Verify baggage is cleaned up even when the function raises.""" - @botanu_use_case("Exception Leak Test") + @botanu_workflow("Exception Leak Test", event_id="evt-1", customer_id="cust-1") def failing_fn(): raise RuntimeError("boom") @@ -271,6 +272,31 @@ def failing_fn(): # Must be cleaned up despite the exception assert baggage.get_baggage("botanu.run_id", get_current()) is None + def test_event_id_required(self): + """Should raise ValueError if event_id is missing.""" + with pytest.raises(ValueError, match="event_id is required"): + @botanu_workflow("Test", event_id="", customer_id="cust-1") + def my_fn(): + pass + + def test_customer_id_required(self): + """Should raise ValueError if customer_id is missing.""" + with pytest.raises(ValueError, match="customer_id is required"): + @botanu_workflow("Test", event_id="evt-1", customer_id="") + def my_fn(): + pass + + def test_event_id_and_customer_id_in_baggage(self, memory_exporter): + """Verify event_id and customer_id are propagated via baggage.""" + + @botanu_workflow("Baggage Test", event_id="ticket-99", customer_id="acme-corp") + def my_fn(): + assert baggage.get_baggage("botanu.event_id", get_current()) == "ticket-99" + assert baggage.get_baggage("botanu.customer_id", get_current()) == "acme-corp" + return "ok" + + my_fn() + class TestBotanuOutcomeDecorator: """Tests for @botanu_outcome decorator.""" diff --git a/tests/unit/test_enricher.py b/tests/unit/test_enricher.py index a08cfbb..ed4dde1 100644 --- a/tests/unit/test_enricher.py +++ b/tests/unit/test_enricher.py @@ -35,7 +35,9 @@ def test_on_start_reads_baggage(self, memory_exporter): # Set up baggage context - start from a clean context ctx = context.Context() ctx = baggage.set_baggage("botanu.run_id", "test-run-123", context=ctx) - ctx = baggage.set_baggage("botanu.use_case", "Test Case", context=ctx) + ctx = baggage.set_baggage("botanu.workflow", "Test Case", context=ctx) + ctx = baggage.set_baggage("botanu.event_id", "evt-42", context=ctx) + ctx = baggage.set_baggage("botanu.customer_id", "cust-abc", context=ctx) # Create a span with the baggage context tracer = trace.get_tracer("test") @@ -51,7 +53,9 @@ def test_on_start_reads_baggage(self, memory_exporter): assert len(spans) == 1 attrs = dict(spans[0].attributes) assert attrs.get("botanu.run_id") == "test-run-123" - assert attrs.get("botanu.use_case") == "Test Case" + assert attrs.get("botanu.workflow") == "Test Case" + assert attrs.get("botanu.event_id") == "evt-42" + assert attrs.get("botanu.customer_id") == "cust-abc" def test_on_start_full_mode(self, memory_exporter): """Full mode should read all baggage keys.""" @@ -60,8 +64,9 @@ def test_on_start_full_mode(self, memory_exporter): # Set up baggage context with all keys - start from a clean context ctx = context.Context() ctx = baggage.set_baggage("botanu.run_id", "run-456", context=ctx) - ctx = baggage.set_baggage("botanu.use_case", "Full Test", context=ctx) - ctx = baggage.set_baggage("botanu.workflow", "my_workflow", context=ctx) + ctx = baggage.set_baggage("botanu.workflow", "Full Test", context=ctx) + ctx = baggage.set_baggage("botanu.event_id", "evt-789", context=ctx) + ctx = baggage.set_baggage("botanu.customer_id", "cust-xyz", context=ctx) ctx = baggage.set_baggage("botanu.environment", "staging", context=ctx) ctx = baggage.set_baggage("botanu.tenant_id", "tenant-789", context=ctx) @@ -76,8 +81,9 @@ def test_on_start_full_mode(self, memory_exporter): spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) assert attrs.get("botanu.run_id") == "run-456" - assert attrs.get("botanu.use_case") == "Full Test" - assert attrs.get("botanu.workflow") == "my_workflow" + assert attrs.get("botanu.workflow") == "Full Test" + assert attrs.get("botanu.event_id") == "evt-789" + assert attrs.get("botanu.customer_id") == "cust-xyz" assert attrs.get("botanu.environment") == "staging" assert attrs.get("botanu.tenant_id") == "tenant-789" @@ -109,7 +115,9 @@ def test_on_start_does_not_override_existing(self, memory_exporter): # Set up baggage context ctx = context.Context() ctx = baggage.set_baggage("botanu.run_id", "baggage-id", context=ctx) - ctx = baggage.set_baggage("botanu.use_case", "Baggage Case", context=ctx) + ctx = baggage.set_baggage("botanu.workflow", "Baggage Case", context=ctx) + ctx = baggage.set_baggage("botanu.event_id", "baggage-evt", context=ctx) + ctx = baggage.set_baggage("botanu.customer_id", "baggage-cust", context=ctx) tracer = trace.get_tracer("test") token = context.attach(ctx) @@ -126,8 +134,11 @@ def test_on_start_does_not_override_existing(self, memory_exporter): attrs = dict(spans[0].attributes) # Should keep existing value assert attrs.get("botanu.run_id") == "existing-id" - # But should set use_case since it wasn't set before - assert attrs.get("botanu.use_case") == "Baggage Case" + # But should set workflow since it wasn't set before + assert attrs.get("botanu.workflow") == "Baggage Case" + # Should set event_id and customer_id since they weren't set before + assert attrs.get("botanu.event_id") == "baggage-evt" + assert attrs.get("botanu.customer_id") == "baggage-cust" def test_on_end_noop(self): """on_end should be a no-op.""" @@ -151,10 +162,14 @@ def test_force_flush_returns_true(self): def test_baggage_keys_constants(self): """Verify baggage key constants.""" assert "botanu.run_id" in RunContextEnricher.BAGGAGE_KEYS_LEAN - assert "botanu.use_case" in RunContextEnricher.BAGGAGE_KEYS_LEAN - assert len(RunContextEnricher.BAGGAGE_KEYS_LEAN) == 2 + assert "botanu.workflow" in RunContextEnricher.BAGGAGE_KEYS_LEAN + assert "botanu.event_id" in RunContextEnricher.BAGGAGE_KEYS_LEAN + assert "botanu.customer_id" in RunContextEnricher.BAGGAGE_KEYS_LEAN + assert len(RunContextEnricher.BAGGAGE_KEYS_LEAN) == 4 assert "botanu.run_id" in RunContextEnricher.BAGGAGE_KEYS_FULL + assert "botanu.event_id" in RunContextEnricher.BAGGAGE_KEYS_FULL + assert "botanu.customer_id" in RunContextEnricher.BAGGAGE_KEYS_FULL assert "botanu.workflow" in RunContextEnricher.BAGGAGE_KEYS_FULL assert "botanu.environment" in RunContextEnricher.BAGGAGE_KEYS_FULL - assert len(RunContextEnricher.BAGGAGE_KEYS_FULL) == 6 + assert len(RunContextEnricher.BAGGAGE_KEYS_FULL) == 7 diff --git a/tests/unit/test_ledger.py b/tests/unit/test_ledger.py deleted file mode 100644 index 9c492b2..0000000 --- a/tests/unit/test_ledger.py +++ /dev/null @@ -1,495 +0,0 @@ -# SPDX-FileCopyrightText: 2026 The Botanu Authors -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for Attempt Ledger.""" - -from __future__ import annotations - -import os -from unittest import mock - -from opentelemetry import trace - -from botanu.tracking.ledger import ( - AttemptLedger, - AttemptStatus, - LedgerEventType, - get_ledger, - record_attempt_ended, - record_attempt_started, - record_llm_attempted, - record_tool_attempted, - set_ledger, -) - - -class TestLedgerEventType: - """Tests for LedgerEventType enum.""" - - def test_event_types_are_strings(self): - assert LedgerEventType.ATTEMPT_STARTED == "attempt.started" - assert LedgerEventType.ATTEMPT_ENDED == "attempt.ended" - assert LedgerEventType.LLM_ATTEMPTED == "llm.attempted" - assert LedgerEventType.TOOL_ATTEMPTED == "tool.attempted" - assert LedgerEventType.CANCEL_REQUESTED == "cancellation.requested" - assert LedgerEventType.CANCEL_ACKNOWLEDGED == "cancellation.acknowledged" - assert LedgerEventType.ZOMBIE_DETECTED == "zombie.detected" - assert LedgerEventType.REDELIVERY_DETECTED == "redelivery.detected" - - -class TestAttemptStatus: - """Tests for AttemptStatus enum.""" - - def test_status_values(self): - assert AttemptStatus.SUCCESS == "success" - assert AttemptStatus.ERROR == "error" - assert AttemptStatus.TIMEOUT == "timeout" - assert AttemptStatus.CANCELLED == "cancelled" - assert AttemptStatus.RATE_LIMITED == "rate_limited" - - -class TestAttemptLedger: - """Tests for AttemptLedger class.""" - - def test_default_service_name(self): - """Should use environment variable for default service name.""" - with mock.patch.dict(os.environ, {"OTEL_SERVICE_NAME": "test-service"}): - ledger = AttemptLedger.__new__(AttemptLedger) - ledger.service_name = os.getenv("OTEL_SERVICE_NAME", "unknown") - ledger._initialized = False - assert ledger.service_name == "test-service" - - def test_get_trace_context_no_span(self): - """Should return empty dict when no active span.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - - # No span context - should return empty - ctx = ledger._get_trace_context() - assert ctx == {} or "trace_id" in ctx # May have context from other tests - - def test_get_trace_context_with_span(self, memory_exporter): - """Should return trace context when span is active.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - - tracer = trace.get_tracer("test") - with tracer.start_as_current_span("test-span") as span: - span_ctx = span.get_span_context() - ctx = ledger._get_trace_context() - - assert "trace_id" in ctx - assert "span_id" in ctx - assert ctx["trace_id"] == format(span_ctx.trace_id, "032x") - assert ctx["span_id"] == format(span_ctx.span_id, "016x") - - def test_emit_when_not_initialized(self): - """Should not raise when emitting without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - - # Should not raise - ledger._emit(LedgerEventType.ATTEMPT_STARTED, None, {"test": "value"}) - - def test_attempt_started_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.attempt_started( - run_id="run-123", - use_case="Test Case", - attempt=1, - ) - - def test_attempt_ended_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.attempt_ended( - run_id="run-123", - status="success", - duration_ms=1000.0, - ) - - def test_llm_attempted_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.llm_attempted( - run_id="run-123", - provider="openai", - model="gpt-4", - input_tokens=100, - output_tokens=50, - ) - - def test_tool_attempted_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.tool_attempted( - run_id="run-123", - tool_name="search", - ) - - def test_cancel_requested_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.cancel_requested(run_id="run-123", reason="user") - - def test_cancel_acknowledged_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.cancel_acknowledged(run_id="run-123", acknowledged_by="handler") - - def test_zombie_detected_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.zombie_detected( - run_id="run-123", - deadline_ts=1000.0, - actual_end_ts=2000.0, - zombie_duration_ms=1000.0, - component="handler", - ) - - def test_redelivery_detected_not_initialized(self): - """Should not raise when calling methods without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - ledger._logger = None - ledger.service_name = "test" - - # Should not raise - ledger.redelivery_detected( - run_id="run-123", - queue_name="my-queue", - delivery_count=3, - ) - - def test_flush_when_not_initialized(self): - """Should return True when flushing without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - - result = ledger.flush() - assert result is True - - def test_shutdown_when_not_initialized(self): - """Should not raise when shutting down without initialization.""" - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = False - - # Should not raise - ledger.shutdown() - - -class TestGlobalLedger: - """Tests for global ledger functions.""" - - def test_get_ledger_creates_instance(self): - """get_ledger should create a ledger if none exists.""" - # Reset global - import botanu.tracking.ledger as ledger_module - - ledger_module._global_ledger = None - - ledger = get_ledger() - assert isinstance(ledger, AttemptLedger) - - def test_set_ledger(self): - """set_ledger should update the global instance.""" - custom_ledger = AttemptLedger.__new__(AttemptLedger) - custom_ledger._initialized = False - custom_ledger.service_name = "custom-service" - - set_ledger(custom_ledger) - assert get_ledger() is custom_ledger - - def test_record_attempt_started(self): - """record_attempt_started should call the global ledger.""" - mock_ledger = mock.MagicMock(spec=AttemptLedger) - set_ledger(mock_ledger) - - record_attempt_started(run_id="run-123", use_case="Test") - - mock_ledger.attempt_started.assert_called_once_with(run_id="run-123", use_case="Test") - - def test_record_attempt_ended(self): - """record_attempt_ended should call the global ledger.""" - mock_ledger = mock.MagicMock(spec=AttemptLedger) - set_ledger(mock_ledger) - - record_attempt_ended(run_id="run-123", status="success") - - mock_ledger.attempt_ended.assert_called_once_with(run_id="run-123", status="success") - - def test_record_llm_attempted(self): - """record_llm_attempted should call the global ledger.""" - mock_ledger = mock.MagicMock(spec=AttemptLedger) - set_ledger(mock_ledger) - - record_llm_attempted(run_id="run-123", provider="openai", model="gpt-4") - - mock_ledger.llm_attempted.assert_called_once_with(run_id="run-123", provider="openai", model="gpt-4") - - def test_record_tool_attempted(self): - """record_tool_attempted should call the global ledger.""" - mock_ledger = mock.MagicMock(spec=AttemptLedger) - set_ledger(mock_ledger) - - record_tool_attempted(run_id="run-123", tool_name="search") - - mock_ledger.tool_attempted.assert_called_once_with(run_id="run-123", tool_name="search") - - -class TestAttemptLedgerEmitMocked: - """Tests for ledger methods with mocked _emit to verify event attributes.""" - - def _make_ledger(self): - ledger = AttemptLedger.__new__(AttemptLedger) - ledger._initialized = True - ledger._logger = mock.MagicMock() - ledger.service_name = "test-svc" - return ledger - - def test_attempt_started_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.attempt_started( - run_id="run-100", - use_case="billing", - attempt=2, - root_run_id="root-50", - workflow="invoice", - tenant_id="t-001", - deadline_ts=1700000000.0, - ) - - ledger._emit.assert_called_once() - event_type, _severity, attrs = ledger._emit.call_args[0] - assert event_type == LedgerEventType.ATTEMPT_STARTED - assert attrs["botanu.run_id"] == "run-100" - assert attrs["botanu.use_case"] == "billing" - assert attrs["botanu.attempt"] == 2 - assert attrs["botanu.root_run_id"] == "root-50" - assert attrs["botanu.workflow"] == "invoice" - assert attrs["botanu.tenant_id"] == "t-001" - assert attrs["botanu.deadline_ts"] == 1700000000.0 - - def test_attempt_ended_success(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.attempt_ended( - run_id="run-200", - status="success", - duration_ms=1500.0, - ) - - _, _severity, attrs = ledger._emit.call_args[0] - assert attrs["botanu.run_id"] == "run-200" - assert attrs["status"] == "success" - assert attrs["duration_ms"] == 1500.0 - - def test_attempt_ended_error(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.attempt_ended( - run_id="run-201", - status="error", - error_class="ValueError", - reason_code="INVALID_INPUT", - ) - - _, _severity, attrs = ledger._emit.call_args[0] - assert attrs["status"] == "error" - assert attrs["error_class"] == "ValueError" - assert attrs["reason_code"] == "INVALID_INPUT" - - def test_llm_attempted_full_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.llm_attempted( - run_id="run-300", - provider="openai", - model="gpt-4", - operation="chat", - attempt_number=1, - input_tokens=500, - output_tokens=200, - cached_tokens=100, - duration_ms=800.0, - status="success", - provider_request_id="resp-abc", - estimated_cost_usd=0.0075, - ) - - _, _, attrs = ledger._emit.call_args[0] - assert attrs["gen_ai.provider.name"] == "openai" - assert attrs["gen_ai.request.model"] == "gpt-4" - assert attrs["gen_ai.usage.input_tokens"] == 500 - assert attrs["gen_ai.usage.output_tokens"] == 200 - assert attrs["botanu.usage.cached_tokens"] == 100 - assert attrs["botanu.cost.estimated_usd"] == 0.0075 - - def test_tool_attempted_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.tool_attempted( - run_id="run-400", - tool_name="search", - tool_call_id="call-xyz", - duration_ms=250.0, - items_returned=3, - bytes_processed=4096, - ) - - _, _, attrs = ledger._emit.call_args[0] - assert attrs["gen_ai.tool.name"] == "search" - assert attrs["gen_ai.tool.call.id"] == "call-xyz" - assert attrs["items_returned"] == 3 - assert attrs["bytes_processed"] == 4096 - - def test_cancel_requested_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.cancel_requested( - run_id="run-500", - reason="timeout", - requested_at_ms=1700000001000.0, - ) - - event_type, _, attrs = ledger._emit.call_args[0] - assert event_type == LedgerEventType.CANCEL_REQUESTED - assert attrs["cancellation.reason"] == "timeout" - assert attrs["cancellation.requested_at_ms"] == 1700000001000.0 - - def test_cancel_acknowledged_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.cancel_acknowledged( - run_id="run-600", - acknowledged_by="worker-3", - latency_ms=150.0, - ) - - event_type, _, attrs = ledger._emit.call_args[0] - assert event_type == LedgerEventType.CANCEL_ACKNOWLEDGED - assert attrs["cancellation.acknowledged_by"] == "worker-3" - assert attrs["cancellation.latency_ms"] == 150.0 - - def test_zombie_detected_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.zombie_detected( - run_id="run-700", - deadline_ts=1000.0, - actual_end_ts=5000.0, - zombie_duration_ms=4000.0, - component="agent_loop", - ) - - event_type, _, attrs = ledger._emit.call_args[0] - assert event_type == LedgerEventType.ZOMBIE_DETECTED - assert attrs["zombie_duration_ms"] == 4000.0 - assert attrs["zombie_component"] == "agent_loop" - - def test_redelivery_detected_attributes(self): - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.redelivery_detected( - run_id="run-800", - queue_name="tasks-queue", - delivery_count=3, - original_message_id="msg-original", - ) - - event_type, _, attrs = ledger._emit.call_args[0] - assert event_type == LedgerEventType.REDELIVERY_DETECTED - assert attrs["queue.name"] == "tasks-queue" - assert attrs["delivery_count"] == 3 - assert attrs["original_message_id"] == "msg-original" - - def test_attempt_started_default_root_run_id(self): - """root_run_id defaults to run_id when not provided.""" - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.attempt_started(run_id="run-solo", use_case="test") - - _, _, attrs = ledger._emit.call_args[0] - assert attrs["botanu.root_run_id"] == "run-solo" - - def test_cancel_requested_auto_timestamp(self): - """requested_at_ms uses current time when not provided.""" - ledger = self._make_ledger() - ledger._emit = mock.MagicMock() - - ledger.cancel_requested(run_id="run-ts", reason="user") - - _, _, attrs = ledger._emit.call_args[0] - assert attrs["cancellation.requested_at_ms"] > 0 - - -class TestLedgerGlobalReset: - """Tests for global ledger cleanup.""" - - def test_set_ledger_overrides_default(self): - import botanu.tracking.ledger as ledger_module - - ledger_module._global_ledger = None - default = get_ledger() - - custom = AttemptLedger.__new__(AttemptLedger) - custom._initialized = False - custom.service_name = "override" - set_ledger(custom) - - assert get_ledger() is custom - assert get_ledger() is not default - - # Cleanup - ledger_module._global_ledger = None diff --git a/tests/unit/test_llm_tracking.py b/tests/unit/test_llm_tracking.py index dd09cf9..1b6ed68 100644 --- a/tests/unit/test_llm_tracking.py +++ b/tests/unit/test_llm_tracking.py @@ -18,7 +18,7 @@ class TestTrackLLMCall: """Tests for track_llm_call context manager.""" def test_creates_span_with_model_name(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_tokens(input_tokens=100, output_tokens=50) spans = memory_exporter.get_finished_spans() @@ -27,7 +27,7 @@ def test_creates_span_with_model_name(self, memory_exporter): assert spans[0].name == "chat gpt-4" def test_records_token_usage(self, memory_exporter): - with track_llm_call(model="claude-3-opus", provider="anthropic") as tracker: + with track_llm_call(model="claude-3-opus", vendor="anthropic") as tracker: tracker.set_tokens(input_tokens=500, output_tokens=200) spans = memory_exporter.get_finished_spans() @@ -38,7 +38,7 @@ def test_records_token_usage(self, memory_exporter): def test_records_error_on_exception(self, memory_exporter): with pytest.raises(ValueError): - with track_llm_call(model="gpt-4", provider="openai") as _tracker: + with track_llm_call(model="gpt-4", vendor="openai") as _tracker: raise ValueError("API error") spans = memory_exporter.get_finished_spans() @@ -48,7 +48,7 @@ def test_records_error_on_exception(self, memory_exporter): def test_operation_type_attribute(self, memory_exporter): with track_llm_call( model="gpt-4", - provider="openai", + vendor="openai", operation=ModelOperation.EMBEDDINGS, ): pass @@ -60,7 +60,7 @@ def test_operation_type_attribute(self, memory_exporter): def test_request_params(self, memory_exporter): with track_llm_call( model="gpt-4", - provider="openai", + vendor="openai", ) as tracker: tracker.set_request_params(temperature=0.7, max_tokens=1000) @@ -74,15 +74,15 @@ class TestLLMTracker: """Tests for LLMTracker helper methods.""" def test_set_request_id(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: - tracker.set_request_id(provider_request_id="resp_123") + with track_llm_call(model="gpt-4", vendor="openai") as tracker: + tracker.set_request_id(vendor_request_id="resp_123") spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) assert attrs[GenAIAttributes.RESPONSE_ID] == "resp_123" def test_set_finish_reason(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_finish_reason("stop") spans = memory_exporter.get_finished_spans() @@ -91,11 +91,11 @@ def test_set_finish_reason(self, memory_exporter): assert attrs[GenAIAttributes.RESPONSE_FINISH_REASONS] == ("stop",) -class TestProviderNormalization: +class TestVendorNormalization: """Tests for provider name normalization.""" def test_openai_normalized(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="OpenAI"): + with track_llm_call(model="gpt-4", vendor="OpenAI"): pass spans = memory_exporter.get_finished_spans() @@ -103,7 +103,7 @@ def test_openai_normalized(self, memory_exporter): assert attrs[GenAIAttributes.PROVIDER_NAME] == "openai" def test_anthropic_normalized(self, memory_exporter): - with track_llm_call(model="claude-3", provider="Anthropic"): + with track_llm_call(model="claude-3", vendor="Anthropic"): pass spans = memory_exporter.get_finished_spans() @@ -111,7 +111,7 @@ def test_anthropic_normalized(self, memory_exporter): assert attrs[GenAIAttributes.PROVIDER_NAME] == "anthropic" def test_bedrock_normalized(self, memory_exporter): - with track_llm_call(model="claude-v2", provider="bedrock"): + with track_llm_call(model="claude-v2", vendor="bedrock"): pass spans = memory_exporter.get_finished_spans() @@ -119,7 +119,7 @@ def test_bedrock_normalized(self, memory_exporter): assert attrs[GenAIAttributes.PROVIDER_NAME] == "aws.bedrock" def test_vertex_normalized(self, memory_exporter): - with track_llm_call(model="gemini-pro", provider="vertex_ai"): + with track_llm_call(model="gemini-pro", vendor="vertex_ai"): pass spans = memory_exporter.get_finished_spans() @@ -127,7 +127,7 @@ def test_vertex_normalized(self, memory_exporter): assert attrs[GenAIAttributes.PROVIDER_NAME] == "gcp.vertex_ai" def test_azure_openai_normalized(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="azure_openai"): + with track_llm_call(model="gpt-4", vendor="azure_openai"): pass spans = memory_exporter.get_finished_spans() @@ -136,7 +136,7 @@ def test_azure_openai_normalized(self, memory_exporter): def test_unknown_provider_passthrough(self, memory_exporter): """Unknown provider names should be normalized to lowercase.""" - with track_llm_call(model="custom-model", provider="CustomProvider"): + with track_llm_call(model="custom-model", vendor="CustomProvider"): pass spans = memory_exporter.get_finished_spans() @@ -150,7 +150,7 @@ class TestLLMTrackerExtended: def test_set_streaming(self, memory_exporter): from botanu.tracking.llm import BotanuAttributes - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_streaming(True) spans = memory_exporter.get_finished_spans() @@ -160,7 +160,7 @@ def test_set_streaming(self, memory_exporter): def test_set_cache_hit(self, memory_exporter): from botanu.tracking.llm import BotanuAttributes - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_cache_hit(True) spans = memory_exporter.get_finished_spans() @@ -170,7 +170,7 @@ def test_set_cache_hit(self, memory_exporter): def test_set_attempt(self, memory_exporter): from botanu.tracking.llm import BotanuAttributes - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_attempt(3) spans = memory_exporter.get_finished_spans() @@ -178,7 +178,7 @@ def test_set_attempt(self, memory_exporter): assert attrs[BotanuAttributes.ATTEMPT_NUMBER] == 3 def test_set_response_model(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_response_model("gpt-4-0125-preview") spans = memory_exporter.get_finished_spans() @@ -188,7 +188,7 @@ def test_set_response_model(self, memory_exporter): def test_set_tokens_with_cache(self, memory_exporter): from botanu.tracking.llm import BotanuAttributes - with track_llm_call(model="claude-3", provider="anthropic") as tracker: + with track_llm_call(model="claude-3", vendor="anthropic") as tracker: tracker.set_tokens( input_tokens=100, output_tokens=50, @@ -206,19 +206,19 @@ def test_set_tokens_with_cache(self, memory_exporter): def test_set_request_id_with_client_id(self, memory_exporter): from botanu.tracking.llm import BotanuAttributes - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_request_id( - provider_request_id="resp_123", + vendor_request_id="resp_123", client_request_id="client_456", ) spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) assert attrs[GenAIAttributes.RESPONSE_ID] == "resp_123" - assert attrs[BotanuAttributes.CLIENT_REQUEST_ID] == "client_456" + assert attrs[BotanuAttributes.VENDOR_CLIENT_REQUEST_ID] == "client_456" def test_set_request_params_extended(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.set_request_params( temperature=0.8, top_p=0.95, @@ -239,7 +239,7 @@ def test_set_request_params_extended(self, memory_exporter): assert attrs[GenAIAttributes.REQUEST_PRESENCE_PENALTY] == 0.3 def test_add_metadata(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.add_metadata(custom_field="value", another_field=123) spans = memory_exporter.get_finished_spans() @@ -248,7 +248,7 @@ def test_add_metadata(self, memory_exporter): assert attrs["botanu.another_field"] == 123 def test_add_metadata_preserves_prefix(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: tracker.add_metadata(**{"botanu.explicit": "prefixed"}) spans = memory_exporter.get_finished_spans() @@ -256,7 +256,7 @@ def test_add_metadata_preserves_prefix(self, memory_exporter): assert attrs["botanu.explicit"] == "prefixed" def test_set_error_manually(self, memory_exporter): - with track_llm_call(model="gpt-4", provider="openai") as tracker: + with track_llm_call(model="gpt-4", vendor="openai") as tracker: error = RuntimeError("Rate limit exceeded") tracker.set_error(error) @@ -326,7 +326,7 @@ def test_tool_call_attributes(self, memory_exporter): with track_tool_call( tool_name="web_search", tool_call_id="call_abc123", - provider="tavily", + vendor="tavily", ): pass @@ -403,12 +403,12 @@ def test_set_llm_attributes(self, memory_exporter): tracer = otl_trace.get_tracer("test") with tracer.start_as_current_span("test-llm-attrs"): set_llm_attributes( - provider="openai", + vendor="openai", model="gpt-4", input_tokens=150, output_tokens=75, streaming=True, - provider_request_id="resp_abc", + vendor_request_id="resp_abc", ) spans = memory_exporter.get_finished_spans() @@ -424,7 +424,7 @@ def test_set_llm_attributes_no_active_span(self): from botanu.tracking.llm import set_llm_attributes # Should not raise when no recording span - set_llm_attributes(provider="openai", model="gpt-4") + set_llm_attributes(vendor="openai", model="gpt-4") def test_set_token_usage(self, memory_exporter): from opentelemetry import trace as otl_trace @@ -454,7 +454,7 @@ class TestLLMInstrumentedDecorator: def test_decorator_creates_span(self, memory_exporter): from botanu.tracking.llm import llm_instrumented - @llm_instrumented(provider="openai") + @llm_instrumented(vendor="openai") def fake_completion(prompt, model="gpt-4"): class _Usage: prompt_tokens = 10 @@ -479,7 +479,7 @@ class _Response: def test_decorator_with_streaming(self, memory_exporter): from botanu.tracking.llm import BotanuAttributes, llm_instrumented - @llm_instrumented(provider="anthropic") + @llm_instrumented(vendor="anthropic") def fake_stream(prompt, model="claude-3", stream=False): return "streamed" @@ -492,7 +492,7 @@ def fake_stream(prompt, model="claude-3", stream=False): def test_decorator_without_usage(self, memory_exporter): from botanu.tracking.llm import llm_instrumented - @llm_instrumented(provider="custom", tokens_from_response=False) + @llm_instrumented(vendor="custom", tokens_from_response=False) def no_usage_fn(prompt, model="custom-model"): return "done" @@ -511,14 +511,14 @@ def test_client_request_id_on_track_llm_call(self, memory_exporter): with track_llm_call( model="gpt-4", - provider="openai", + vendor="openai", client_request_id="cli-req-001", ): pass spans = memory_exporter.get_finished_spans() attrs = dict(spans[0].attributes) - assert attrs[BotanuAttributes.CLIENT_REQUEST_ID] == "cli-req-001" + assert attrs[BotanuAttributes.VENDOR_CLIENT_REQUEST_ID] == "cli-req-001" class TestKwargsPassthrough: @@ -527,7 +527,7 @@ class TestKwargsPassthrough: def test_custom_kwargs(self, memory_exporter): with track_llm_call( model="gpt-4", - provider="openai", + vendor="openai", deployment_id="dep-001", ): pass diff --git a/tests/unit/test_middleware.py b/tests/unit/test_middleware.py index b41b838..efb0a6c 100644 --- a/tests/unit/test_middleware.py +++ b/tests/unit/test_middleware.py @@ -15,7 +15,7 @@ from botanu.sdk.middleware import BotanuMiddleware -def _make_app(*, use_case: str = "test_uc", workflow: str | None = None, auto_generate_run_id: bool = True): +def _make_app(*, workflow: str = "test_wf", auto_generate_run_id: bool = True): """Build a minimal Starlette app with BotanuMiddleware.""" async def homepage(request): @@ -24,7 +24,6 @@ async def homepage(request): app = Starlette(routes=[Route("/", homepage)]) app.add_middleware( BotanuMiddleware, - use_case=use_case, workflow=workflow, auto_generate_run_id=auto_generate_run_id, ) @@ -42,16 +41,11 @@ def _clean_otel_context(): class TestBotanuMiddleware: """Tests for BotanuMiddleware dispatch behaviour.""" - def test_response_contains_use_case_header(self, memory_exporter): - client = TestClient(_make_app(use_case="billing")) - resp = client.get("/") - assert resp.status_code == 200 - assert resp.headers["x-botanu-use-case"] == "billing" - def test_response_contains_workflow_header(self, memory_exporter): - client = TestClient(_make_app(use_case="billing", workflow="invoice_flow")) + client = TestClient(_make_app(workflow="billing")) resp = client.get("/") - assert resp.headers["x-botanu-workflow"] == "invoice_flow" + assert resp.status_code == 200 + assert resp.headers["x-botanu-workflow"] == "billing" def test_auto_generated_run_id_in_response(self, memory_exporter): client = TestClient(_make_app()) @@ -65,13 +59,8 @@ def test_run_id_propagated_from_header(self, memory_exporter): resp = client.get("/", headers={"x-botanu-run-id": "my-custom-run-123"}) assert resp.headers["x-botanu-run-id"] == "my-custom-run-123" - def test_use_case_propagated_from_header(self, memory_exporter): - client = TestClient(_make_app(use_case="default_uc")) - resp = client.get("/", headers={"x-botanu-use-case": "overridden_uc"}) - assert resp.headers["x-botanu-use-case"] == "overridden_uc" - def test_workflow_propagated_from_header(self, memory_exporter): - client = TestClient(_make_app(use_case="uc", workflow="default_wf")) + client = TestClient(_make_app(workflow="default_wf")) resp = client.get("/", headers={"x-botanu-workflow": "overridden_wf"}) assert resp.headers["x-botanu-workflow"] == "overridden_wf" @@ -81,11 +70,6 @@ def test_no_auto_run_id_when_disabled(self, memory_exporter): # Should not have a run_id header since none was provided and auto-gen is off assert "x-botanu-run-id" not in resp.headers - def test_workflow_defaults_to_use_case(self, memory_exporter): - client = TestClient(_make_app(use_case="my_uc")) - resp = client.get("/") - assert resp.headers["x-botanu-workflow"] == "my_uc" - def test_customer_id_propagated_from_header(self, memory_exporter): client = TestClient(_make_app()) resp = client.get("/", headers={"x-botanu-customer-id": "cust-456"}) @@ -121,25 +105,23 @@ def test_baggage_does_not_leak_between_requests(self, memory_exporter): assert data2.get("run_id") != "leak-test-001" def test_header_priority_over_constructor_defaults(self, memory_exporter): - """x-botanu-use-case header should override constructor default.""" - client = TestClient(_make_app(use_case="default_uc")) - resp = client.get("/", headers={"x-botanu-use-case": "header_uc"}) - assert resp.headers["x-botanu-use-case"] == "header_uc" + """x-botanu-workflow header should override constructor default.""" + client = TestClient(_make_app(workflow="default_wf")) + resp = client.get("/", headers={"x-botanu-workflow": "header_wf"}) + assert resp.headers["x-botanu-workflow"] == "header_wf" def test_multiple_headers_propagated(self, memory_exporter): """All x-botanu-* headers should be propagated together.""" - client = TestClient(_make_app(use_case="uc")) + client = TestClient(_make_app(workflow="wf")) resp = client.get( "/", headers={ "x-botanu-run-id": "multi-001", - "x-botanu-use-case": "multi-uc", "x-botanu-workflow": "multi-wf", "x-botanu-customer-id": "cust-multi", }, ) assert resp.headers["x-botanu-run-id"] == "multi-001" - assert resp.headers["x-botanu-use-case"] == "multi-uc" assert resp.headers["x-botanu-workflow"] == "multi-wf" def test_exception_in_handler_still_detaches_context(self, memory_exporter): @@ -160,7 +142,7 @@ async def check_baggage(request): return JSONResponse({"run_id": run_id}) app = Starlette(routes=[Route("/check", check_baggage)]) - app.add_middleware(BotanuMiddleware, use_case="test") + app.add_middleware(BotanuMiddleware, workflow="test") return app @@ -171,5 +153,5 @@ async def error_handler(request): raise RuntimeError("Intentional test error") app = Starlette(routes=[Route("/error", error_handler)]) - app.add_middleware(BotanuMiddleware, use_case="error_test") + app.add_middleware(BotanuMiddleware, workflow="error_test") return app diff --git a/tests/unit/test_resource_detector.py b/tests/unit/test_resource_detector.py deleted file mode 100644 index dad4d3d..0000000 --- a/tests/unit/test_resource_detector.py +++ /dev/null @@ -1,455 +0,0 @@ -# SPDX-FileCopyrightText: 2026 The Botanu Authors -# SPDX-License-Identifier: Apache-2.0 - -"""Tests for resource detection.""" - -from __future__ import annotations - -import os -import sys -from unittest import mock - -from botanu.resources.detector import ( - detect_all_resources, - detect_cloud_provider, - detect_container, - detect_host, - detect_kubernetes, - detect_process, - detect_serverless, - get_resource_attributes, -) - - -class TestDetectHost: - """Tests for host detection.""" - - def test_detects_hostname(self): - attrs = detect_host() - assert "host.name" in attrs - assert isinstance(attrs["host.name"], str) - - def test_detects_os_type(self): - attrs = detect_host() - assert attrs["os.type"] == sys.platform - - def test_detects_host_arch(self): - attrs = detect_host() - assert "host.arch" in attrs - - -class TestDetectProcess: - """Tests for process detection.""" - - def test_detects_pid(self): - attrs = detect_process() - assert attrs["process.pid"] == os.getpid() - - def test_detects_runtime(self): - attrs = detect_process() - assert attrs["process.runtime.name"] == "python" - assert "process.runtime.version" in attrs - - -class TestDetectKubernetes: - """Tests for Kubernetes detection.""" - - def test_no_k8s_when_not_in_cluster(self): - with mock.patch.dict(os.environ, {}, clear=True): - os.environ.pop("KUBERNETES_SERVICE_HOST", None) - attrs = detect_kubernetes() - assert attrs == {} - - def test_detects_k8s_pod_name(self): - with mock.patch.dict( - os.environ, - { - "KUBERNETES_SERVICE_HOST": "10.0.0.1", - "HOSTNAME": "my-pod-abc123", - "K8S_NAMESPACE": "default", - }, - ): - attrs = detect_kubernetes() - assert attrs.get("k8s.pod.name") == "my-pod-abc123" - assert attrs.get("k8s.namespace.name") == "default" - - def test_detects_k8s_from_env_vars(self): - with mock.patch.dict( - os.environ, - { - "KUBERNETES_SERVICE_HOST": "10.0.0.1", - "K8S_POD_NAME": "explicit-pod", - "K8S_POD_UID": "uid-12345", - "K8S_CLUSTER_NAME": "prod-cluster", - }, - ): - attrs = detect_kubernetes() - assert attrs.get("k8s.pod.name") == "explicit-pod" - assert attrs.get("k8s.pod.uid") == "uid-12345" - assert attrs.get("k8s.cluster.name") == "prod-cluster" - - -class TestDetectCloudProvider: - """Tests for cloud provider detection.""" - - def test_no_cloud_when_not_in_cloud(self): - with mock.patch.dict(os.environ, {}, clear=True): - # Clear all cloud env vars - for key in list(os.environ.keys()): - if any( - prefix in key - for prefix in ["AWS_", "GOOGLE_", "GCLOUD_", "GCP_", "AZURE_", "K_", "FUNCTION_", "WEBSITE_"] - ): - os.environ.pop(key, None) - attrs = detect_cloud_provider() - assert "cloud.provider" not in attrs - - def test_detects_aws(self): - with mock.patch.dict( - os.environ, - { - "AWS_REGION": "us-east-1", - "AWS_ACCOUNT_ID": "123456789012", - }, - clear=False, - ): - attrs = detect_cloud_provider() - assert attrs.get("cloud.provider") == "aws" - assert attrs.get("cloud.region") == "us-east-1" - - def test_detects_aws_lambda(self): - with mock.patch.dict( - os.environ, - { - "AWS_LAMBDA_FUNCTION_NAME": "my-function", - "AWS_LAMBDA_FUNCTION_VERSION": "$LATEST", - "AWS_REGION": "us-west-2", - }, - clear=False, - ): - attrs = detect_cloud_provider() - assert attrs.get("cloud.provider") == "aws" - assert attrs.get("faas.name") == "my-function" - - def test_detects_gcp(self): - with mock.patch.dict( - os.environ, - {"GOOGLE_CLOUD_PROJECT": "my-project", "GOOGLE_CLOUD_REGION": "us-central1"}, - clear=False, - ): - # Clear AWS vars - os.environ.pop("AWS_REGION", None) - os.environ.pop("AWS_DEFAULT_REGION", None) - attrs = detect_cloud_provider() - assert attrs.get("cloud.provider") == "gcp" - assert attrs.get("cloud.account.id") == "my-project" - - def test_detects_gcp_cloud_run(self): - with mock.patch.dict( - os.environ, - { - "K_SERVICE": "my-service", - "K_REVISION": "my-service-00001", - "GOOGLE_CLOUD_PROJECT": "my-project", - }, - clear=False, - ): - os.environ.pop("AWS_REGION", None) - attrs = detect_cloud_provider() - assert attrs.get("cloud.provider") == "gcp" - assert attrs.get("faas.name") == "my-service" - - def test_detects_azure(self): - with mock.patch.dict( - os.environ, - { - "WEBSITE_SITE_NAME": "my-app", - "AZURE_SUBSCRIPTION_ID": "sub-12345", - "REGION_NAME": "eastus", - }, - clear=False, - ): - # Clear other cloud vars - os.environ.pop("AWS_REGION", None) - os.environ.pop("GOOGLE_CLOUD_PROJECT", None) - attrs = detect_cloud_provider() - assert attrs.get("cloud.provider") == "azure" - assert attrs.get("faas.name") == "my-app" - - -class TestDetectContainer: - """Tests for container detection.""" - - def test_detects_container_id_from_env(self): - with mock.patch.dict(os.environ, {"CONTAINER_ID": "abc123def456"}): - attrs = detect_container() - # Container ID detection depends on cgroup files - # In test environment, may or may not detect - assert isinstance(attrs, dict) - - -class TestDetectServerless: - """Tests for serverless/FaaS detection.""" - - def test_detects_lambda(self): - with mock.patch.dict( - os.environ, - { - "AWS_LAMBDA_FUNCTION_NAME": "my-lambda", - "AWS_LAMBDA_FUNCTION_VERSION": "1", - "AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "512", - }, - ): - attrs = detect_serverless() - assert attrs.get("faas.name") == "my-lambda" - assert attrs.get("faas.version") == "1" - assert attrs.get("faas.max_memory") == 512 * 1024 * 1024 - - def test_detects_cloud_run(self): - with mock.patch.dict( - os.environ, - { - "K_SERVICE": "cloud-run-service", - "K_REVISION": "rev-001", - }, - ): - # Clear Lambda vars - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - attrs = detect_serverless() - assert attrs.get("faas.name") == "cloud-run-service" - assert attrs.get("faas.version") == "rev-001" - - -class TestDetectAllResources: - """Tests for combined resource detection.""" - - def test_returns_dict(self): - attrs = detect_all_resources() - assert isinstance(attrs, dict) - - def test_includes_host_info(self): - # Clear cache to ensure fresh detection - detect_all_resources.cache_clear() - attrs = detect_all_resources() - assert "host.name" in attrs - assert "process.pid" in attrs - - def test_caches_results(self): - detect_all_resources.cache_clear() - result1 = detect_all_resources() - result2 = detect_all_resources() - assert result1 is result2 # Same object due to caching - - -class TestGetResourceAttributes: - """Tests for selective resource detection.""" - - def test_include_host_only(self): - attrs = get_resource_attributes( - include_host=True, - include_process=False, - include_container=False, - include_cloud=False, - include_k8s=False, - include_faas=False, - ) - assert "host.name" in attrs - assert "process.pid" not in attrs - - def test_include_process_only(self): - attrs = get_resource_attributes( - include_host=False, - include_process=True, - include_container=False, - include_cloud=False, - include_k8s=False, - include_faas=False, - ) - assert "process.pid" in attrs - assert "host.name" not in attrs - - -class TestAWSAvailabilityZone: - """Tests for _get_aws_availability_zone.""" - - def test_returns_none_for_lambda(self): - from botanu.resources.detector import _get_aws_availability_zone - - with mock.patch.dict(os.environ, {"AWS_LAMBDA_FUNCTION_NAME": "fn"}): - assert _get_aws_availability_zone() is None - - def test_returns_none_when_metadata_disabled(self): - from botanu.resources.detector import _get_aws_availability_zone - - with mock.patch.dict(os.environ, {"AWS_EC2_METADATA_DISABLED": "true"}, clear=True): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - assert _get_aws_availability_zone() is None - - def test_returns_none_when_invalid_endpoint(self): - from botanu.resources.detector import _get_aws_availability_zone - - with mock.patch.dict( - os.environ, - { - "AWS_EC2_METADATA_SERVICE_ENDPOINT": "not-a-url", - }, - clear=True, - ): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - assert _get_aws_availability_zone() is None - - def test_returns_none_on_network_error(self): - from botanu.resources.detector import _get_aws_availability_zone - - with mock.patch.dict(os.environ, {}, clear=True): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - os.environ.pop("AWS_EC2_METADATA_DISABLED", None) - # Default endpoint (169.254.169.254) will fail in test env - result = _get_aws_availability_zone() - assert result is None - - -class TestCloudRegionFromAZ: - """Tests for cloud region derivation from availability zone.""" - - def test_region_derived_from_az(self): - """When AZ is 'us-east-1a', region should be 'us-east-1'.""" - - with mock.patch.dict( - os.environ, - { - "AWS_REGION": "", - "AWS_DEFAULT_REGION": "", - "AWS_ACCOUNT_ID": "123456789012", - }, - clear=True, - ): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - - # Mock the IMDS call to return an AZ - with mock.patch( - "botanu.resources.detector._get_aws_availability_zone", - return_value="us-west-2c", - ): - attrs = detect_cloud_provider() - if "cloud.availability_zone" in attrs: - assert attrs["cloud.region"] == "us-west-2" - - -class TestContainerId: - """Tests for container ID extraction.""" - - def test_container_id_from_env(self): - from botanu.resources.detector import _get_container_id - - # Short container IDs (< 12 chars) are ignored - with mock.patch.dict(os.environ, {"CONTAINER_ID": "short"}, clear=True): - os.environ.pop("HOSTNAME", None) - result = _get_container_id() - assert result is None - - # Long enough IDs are returned - with mock.patch.dict(os.environ, {"CONTAINER_ID": "abcdef123456"}, clear=True): - os.environ.pop("HOSTNAME", None) - result = _get_container_id() - # May be overridden by cgroup parsing, but at minimum not None - assert result is None or len(result) >= 12 - - -class TestDetectHostExtended: - """Extended host detection tests.""" - - def test_host_id_from_env(self): - with mock.patch.dict(os.environ, {"HOST_ID": "i-0123456789"}): - attrs = detect_host() - assert attrs["host.id"] == "i-0123456789" - - def test_host_id_from_instance_id(self): - with mock.patch.dict(os.environ, {"INSTANCE_ID": "vm-abc"}, clear=True): - os.environ.pop("HOST_ID", None) - attrs = detect_host() - assert attrs["host.id"] == "vm-abc" - - def test_host_id_falls_back_to_hostname(self): - with mock.patch.dict(os.environ, {}, clear=True): - os.environ.pop("HOST_ID", None) - os.environ.pop("INSTANCE_ID", None) - attrs = detect_host() - assert attrs.get("host.id") == attrs.get("host.name") - - -class TestDetectServerlessExtended: - """Extended serverless detection tests.""" - - def test_gcp_cloud_function(self): - with mock.patch.dict( - os.environ, - { - "FUNCTION_NAME": "my-function", - "FUNCTION_TARGET": "handle_event", - }, - clear=True, - ): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - os.environ.pop("K_SERVICE", None) - attrs = detect_serverless() - assert attrs["faas.name"] == "my-function" - assert attrs["faas.trigger"] == "handle_event" - - def test_azure_functions(self): - with mock.patch.dict( - os.environ, - { - "WEBSITE_SITE_NAME": "my-azure-fn", - "WEBSITE_INSTANCE_ID": "inst-123", - }, - clear=True, - ): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - os.environ.pop("K_SERVICE", None) - os.environ.pop("FUNCTION_NAME", None) - attrs = detect_serverless() - assert attrs["faas.name"] == "my-azure-fn" - assert attrs["faas.instance"] == "inst-123" - - def test_no_serverless_detected(self): - with mock.patch.dict(os.environ, {}, clear=True): - os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) - os.environ.pop("K_SERVICE", None) - os.environ.pop("FUNCTION_NAME", None) - os.environ.pop("WEBSITE_SITE_NAME", None) - attrs = detect_serverless() - assert attrs == {} - - -class TestDetectProcessExtended: - """Extended process detection tests.""" - - def test_process_command(self): - attrs = detect_process() - assert "process.command" in attrs - assert isinstance(attrs["process.command"], str) - - def test_process_runtime_version_format(self): - attrs = detect_process() - version = attrs["process.runtime.version"] - parts = version.split(".") - assert len(parts) >= 2 # major.minor at minimum - - -class TestServiceInstanceId: - """Tests for service.instance.id derivation in detect_all_resources.""" - - def test_instance_id_from_hostname_in_k8s(self): - detect_all_resources.cache_clear() - with mock.patch.dict( - os.environ, - { - "KUBERNETES_SERVICE_HOST": "10.0.0.1", - "HOSTNAME": "my-pod-abc123xyz", - }, - ): - attrs = detect_all_resources() - # Should have service.instance.id - assert "service.instance.id" in attrs - detect_all_resources.cache_clear() diff --git a/tests/unit/test_run_context.py b/tests/unit/test_run_context.py index 0869676..038137e 100644 --- a/tests/unit/test_run_context.py +++ b/tests/unit/test_run_context.py @@ -43,28 +43,30 @@ class TestRunContextCreate: """Tests for RunContext.create factory.""" def test_creates_with_required_fields(self): - ctx = RunContext.create(use_case="Customer Support") + ctx = RunContext.create(workflow="Customer Support", event_id="evt-1", customer_id="cust-1") assert ctx.run_id is not None - assert ctx.use_case == "Customer Support" + assert ctx.workflow == "Customer Support" + assert ctx.event_id == "evt-1" + assert ctx.customer_id == "cust-1" assert ctx.environment == "production" # default assert ctx.attempt == 1 def test_root_run_id_defaults_to_run_id(self): - ctx = RunContext.create(use_case="test") + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1") assert ctx.root_run_id == ctx.run_id def test_accepts_custom_root_run_id(self): - ctx = RunContext.create(use_case="test", root_run_id="custom-root") + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1", root_run_id="custom-root") assert ctx.root_run_id == "custom-root" def test_environment_from_env_var(self): with mock.patch.dict(os.environ, {"BOTANU_ENVIRONMENT": "staging"}): - ctx = RunContext.create(use_case="test") + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1") assert ctx.environment == "staging" def test_explicit_environment_overrides_env_var(self): with mock.patch.dict(os.environ, {"BOTANU_ENVIRONMENT": "staging"}): - ctx = RunContext.create(use_case="test", environment="production") + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1", environment="production") assert ctx.environment == "production" @@ -72,7 +74,7 @@ class TestRunContextRetry: """Tests for retry handling.""" def test_create_retry_increments_attempt(self): - original = RunContext.create(use_case="test") + original = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1") retry = RunContext.create_retry(original) assert retry.attempt == 2 @@ -80,8 +82,15 @@ def test_create_retry_increments_attempt(self): assert retry.root_run_id == original.root_run_id assert retry.run_id != original.run_id + def test_create_retry_preserves_event_and_customer(self): + original = RunContext.create(workflow="test", event_id="ticket-42", customer_id="bigretail") + retry = RunContext.create_retry(original) + + assert retry.event_id == "ticket-42" + assert retry.customer_id == "bigretail" + def test_multiple_retries_preserve_root(self): - original = RunContext.create(use_case="test") + original = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1") retry1 = RunContext.create_retry(original) retry2 = RunContext.create_retry(retry1) @@ -93,17 +102,17 @@ class TestRunContextDeadline: """Tests for deadline handling.""" def test_deadline_seconds(self): - ctx = RunContext.create(use_case="test", deadline_seconds=10.0) + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1", deadline_seconds=10.0) assert ctx.deadline is not None assert ctx.deadline > time.time() def test_is_past_deadline(self): - ctx = RunContext.create(use_case="test", deadline_seconds=0.001) + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1", deadline_seconds=0.001) time.sleep(0.01) assert ctx.is_past_deadline() is True def test_remaining_time_seconds(self): - ctx = RunContext.create(use_case="test", deadline_seconds=10.0) + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1", deadline_seconds=10.0) remaining = ctx.remaining_time_seconds() assert remaining is not None assert 9.0 < remaining <= 10.0 @@ -113,7 +122,7 @@ class TestRunContextCancellation: """Tests for cancellation handling.""" def test_request_cancellation(self): - ctx = RunContext.create(use_case="test") + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1") assert ctx.is_cancelled() is False ctx.request_cancellation("user") @@ -125,7 +134,7 @@ class TestRunContextOutcome: """Tests for outcome recording.""" def test_complete_sets_outcome(self): - ctx = RunContext.create(use_case="test") + ctx = RunContext.create(workflow="test", event_id="evt-1", customer_id="cust-1") ctx.complete( status=RunStatus.SUCCESS, value_type="tickets_resolved", @@ -144,47 +153,54 @@ class TestRunContextSerialization: def test_to_baggage_dict_lean_mode(self): with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "lean"}): ctx = RunContext.create( - use_case="Customer Support", - workflow="handle_ticket", + workflow="Customer Support", + event_id="ticket-42", + customer_id="bigretail", tenant_id="tenant-123", ) baggage = ctx.to_baggage_dict() - # Lean mode only includes run_id and use_case + # Lean mode includes run_id, workflow, event_id, customer_id assert "botanu.run_id" in baggage - assert "botanu.use_case" in baggage - assert "botanu.workflow" not in baggage + assert "botanu.workflow" in baggage + assert baggage["botanu.event_id"] == "ticket-42" + assert baggage["botanu.customer_id"] == "bigretail" assert "botanu.tenant_id" not in baggage def test_to_baggage_dict_full_mode(self): with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "full"}): ctx = RunContext.create( - use_case="Customer Support", - workflow="handle_ticket", + workflow="Customer Support", + event_id="ticket-42", + customer_id="bigretail", tenant_id="tenant-123", ) baggage = ctx.to_baggage_dict() - assert baggage["botanu.workflow"] == "handle_ticket" + assert baggage["botanu.event_id"] == "ticket-42" + assert baggage["botanu.customer_id"] == "bigretail" assert baggage["botanu.tenant_id"] == "tenant-123" def test_to_span_attributes(self): ctx = RunContext.create( - use_case="Customer Support", - workflow="handle_ticket", + workflow="Customer Support", + event_id="ticket-42", + customer_id="bigretail", tenant_id="tenant-123", ) attrs = ctx.to_span_attributes() assert attrs["botanu.run_id"] == ctx.run_id - assert attrs["botanu.use_case"] == "Customer Support" - assert attrs["botanu.workflow"] == "handle_ticket" + assert attrs["botanu.workflow"] == "Customer Support" + assert attrs["botanu.event_id"] == "ticket-42" + assert attrs["botanu.customer_id"] == "bigretail" assert attrs["botanu.tenant_id"] == "tenant-123" def test_from_baggage_roundtrip(self): original = RunContext.create( - use_case="test", - workflow="my_workflow", + workflow="test", + event_id="ticket-42", + customer_id="bigretail", tenant_id="tenant-abc", ) baggage = original.to_baggage_dict(lean_mode=False) @@ -192,8 +208,9 @@ def test_from_baggage_roundtrip(self): assert restored is not None assert restored.run_id == original.run_id - assert restored.use_case == original.use_case assert restored.workflow == original.workflow + assert restored.event_id == original.event_id + assert restored.customer_id == original.customer_id assert restored.tenant_id == original.tenant_id def test_from_baggage_returns_none_for_missing_fields(self): diff --git a/tests/unit/test_span_helpers.py b/tests/unit/test_span_helpers.py index 799bcf4..e5793b8 100644 --- a/tests/unit/test_span_helpers.py +++ b/tests/unit/test_span_helpers.py @@ -5,7 +5,7 @@ from __future__ import annotations -from opentelemetry import trace +from opentelemetry import baggage, context, trace from botanu.sdk.span_helpers import emit_outcome, set_business_context @@ -66,6 +66,48 @@ def test_emit_outcome_adds_event(self, memory_exporter): assert len(events) == 1 assert events[0].attributes["status"] == "success" + def test_emit_outcome_emits_log_record(self, memory_exporter, log_exporter): + """emit_outcome should emit an OTel log record when event_id is in baggage.""" + tracer = trace.get_tracer("test") + + # Set up baggage with event_id + ctx = context.Context() + ctx = baggage.set_baggage("botanu.event_id", "ticket-42", context=ctx) + token = context.attach(ctx) + + try: + with tracer.start_as_current_span("test-span"): + emit_outcome("success") + finally: + context.detach(token) + + # Verify log record was emitted + logs = log_exporter.get_finished_logs() + assert len(logs) >= 1 + + log = logs[0] + assert log.log_record.body == "outcome:success" + assert log.log_record.attributes["botanu.event_id"] == "ticket-42" + assert log.log_record.attributes["botanu.outcome.status"] == "success" + + def test_emit_outcome_no_log_without_event_id(self, memory_exporter, log_exporter): + """emit_outcome should NOT emit a log record when no event_id in baggage.""" + tracer = trace.get_tracer("test") + + # No baggage set - use clean context + ctx = context.Context() + token = context.attach(ctx) + + try: + with tracer.start_as_current_span("test-span"): + emit_outcome("success") + finally: + context.detach(token) + + # No log records should be emitted + logs = log_exporter.get_finished_logs() + assert len(logs) == 0 + class TestSetBusinessContext: """Tests for set_business_context function."""