From 0c777ca40808043f3f2a9932413f8cdda09e4cd1 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Sun, 15 Mar 2026 20:55:34 +0100 Subject: [PATCH 1/3] feat(telemetry): introduce telemetry system for usage data collection - Added a new telemetry module to collect and export usage data. - Implemented three telemetry tiers: Off, Internal, and Public. - Integrated OpenTelemetry for data export. - Created user prompts for telemetry consent on first run. - Updated documentation to explain telemetry features and configuration. This enhancement aims to improve CodeCarbon by gathering anonymous usage data while ensuring user privacy and consent. --- codecarbon/__init__.py | 16 +- codecarbon/core/telemetry/collector.py | 545 +++++++++++++++++++++ codecarbon/core/telemetry/config.py | 175 +++++++ codecarbon/core/telemetry/otel_exporter.py | 223 +++++++++ codecarbon/core/telemetry/prompt.py | 169 +++++++ codecarbon/core/telemetry/service.py | 221 +++++++++ docs/telemetry.md | 112 +++++ mkdocs.yml | 1 + pyproject.toml | 8 + 9 files changed, 1469 insertions(+), 1 deletion(-) create mode 100644 codecarbon/core/telemetry/collector.py create mode 100644 codecarbon/core/telemetry/config.py create mode 100644 codecarbon/core/telemetry/otel_exporter.py create mode 100644 codecarbon/core/telemetry/prompt.py create mode 100644 codecarbon/core/telemetry/service.py create mode 100644 docs/telemetry.md diff --git a/codecarbon/__init__.py b/codecarbon/__init__.py index f602f2635..0d4db21d3 100644 --- a/codecarbon/__init__.py +++ b/codecarbon/__init__.py @@ -8,6 +8,20 @@ OfflineEmissionsTracker, track_emissions, ) +from .core.telemetry import ( + TelemetryConfig, + TelemetryTier, + init_telemetry, + set_telemetry, +) -__all__ = ["EmissionsTracker", "OfflineEmissionsTracker", "track_emissions"] +__all__ = [ + "EmissionsTracker", + "OfflineEmissionsTracker", + "track_emissions", + "TelemetryConfig", + "TelemetryTier", + "init_telemetry", + "set_telemetry", +] __app_name__ = "codecarbon" diff --git a/codecarbon/core/telemetry/collector.py b/codecarbon/core/telemetry/collector.py new file mode 100644 index 000000000..8ca9741df --- /dev/null +++ b/codecarbon/core/telemetry/collector.py @@ -0,0 +1,545 @@ +""" +Telemetry data collector. + +Collects environment, hardware, usage, and ML ecosystem data. +""" + +import hashlib +import os +import platform +import sys +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +from codecarbon._version import __version__ +from codecarbon.core.config import get_hierarchical_config +from codecarbon.external.logger import logger + + +@dataclass +class TelemetryData: + """Container for all telemetry data.""" + + # Environment & Hardware (Tier 1: Internal) + os: str = "" + python_version: str = "" + python_implementation: str = "" + python_executable_hash: str = "" + python_env_type: str = "" + codecarbon_version: str = "" + codecarbon_install_method: str = "" + + cpu_count: int = 0 + cpu_physical_count: int = 0 + cpu_model: str = "" + cpu_architecture: str = "" + + gpu_count: int = 0 + gpu_model: str = "" + gpu_driver_version: str = "" + gpu_memory_total_gb: float = 0.0 + + ram_total_size_gb: float = 0.0 + + cuda_version: str = "" + cudnn_version: str = "" + + cloud_provider: str = "" + cloud_region: str = "" + + # Usage Patterns (Tier 1: Internal) + tracking_mode: str = "" + api_mode: str = "" # offline, online + output_methods: list = field(default_factory=list) + hardware_tracked: list = field(default_factory=list) + measure_power_interval_secs: float = 15.0 + + # ML Ecosystem (Tier 1: Internal) + has_torch: bool = False + torch_version: str = "" + has_transformers: bool = False + transformers_version: str = "" + has_diffusers: bool = False + diffusers_version: str = "" + has_tensorflow: bool = False + tensorflow_version: str = "" + has_keras: bool = False + keras_version: str = "" + has_pytorch_lightning: bool = False + pytorch_lightning_version: str = "" + has_fastai: bool = False + fastai_version: str = "" + ml_framework_primary: str = "" + + # Performance & Errors (Tier 1: Internal) + hardware_detection_success: bool = True + rapl_available: bool = False + gpu_detection_method: str = "" + errors_encountered: list = field(default_factory=list) + tracking_overhead_percent: float = 0.0 + + # Context (Tier 1: Internal) + ide_used: str = "" + notebook_environment: str = "" + ci_environment: str = "" + python_package_manager: str = "" + container_runtime: str = "" + in_container: bool = False + + # Emissions Data (Tier 2: Public only) + total_emissions_kg: float = 0.0 + emissions_rate_kg_per_sec: float = 0.0 + energy_consumed_kwh: float = 0.0 + cpu_energy_kwh: float = 0.0 + gpu_energy_kwh: float = 0.0 + ram_energy_kwh: float = 0.0 + duration_seconds: float = 0.0 + cpu_utilization_avg: float = 0.0 + gpu_utilization_avg: float = 0.0 + ram_utilization_avg: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for export.""" + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} + + +class TelemetryCollector: + """Collects telemetry data.""" + + def __init__(self): + self._data = TelemetryData() + + @property + def data(self) -> TelemetryData: + return self._data + + def collect_environment(self) -> "TelemetryCollector": + """Collect Python environment info.""" + self._data.python_version = platform.python_version() + self._data.python_implementation = platform.python_implementation() + + # Hash executable path for privacy + executable = sys.executable + if executable: + self._data.python_executable_hash = hashlib.sha256( + executable.encode() + ).hexdigest()[:16] + + # Detect environment type + self._data.python_env_type = self._detect_python_env_type() + + # CodeCarbon version + self._data.codecarbon_version = __version__ + + # Install method detection + self._data.codecarbon_install_method = self._detect_install_method() + + # OS + self._data.os = platform.platform() + + # Architecture + self._data.cpu_architecture = platform.machine() + + return self + + def _detect_python_env_type(self) -> str: + """Detect Python environment type.""" + if "conda" in sys.prefix.lower(): + return "conda" + elif hasattr(sys, "real_prefix") or ( + hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix + ): + # Check for common venv patterns + if os.environ.get("VIRTUAL_ENV"): + return "venv" + # Check for uv + if os.environ.get("UV"): + return "uv" + return "virtualenv" + elif os.environ.get("VIRTUAL_ENV"): + return "venv" + elif os.environ.get("UV"): + return "uv" + return "system" + + def _detect_install_method(self) -> str: + """Detect how CodeCarbon was installed.""" + # Check if editable install + import codecarbon + + codecarbon_path = os.path.dirname(codecarbon.__file__) + if ".egg-link" in codecarbon_path or ".editable" in codecarbon_path: + return "editable" + + # Check common package managers + # This is a heuristic - check if in common locations + if "site-packages" in codecarbon_path: + # Could be pip, uv, or conda + if "uv" in codecarbon_path: + return "uv" + elif "conda" in codecarbon_path: + return "conda" + return "pip" + return "unknown" + + def collect_hardware( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + ) -> "TelemetryCollector": + """Collect hardware info.""" + self._data.cpu_count = cpu_count + self._data.cpu_physical_count = cpu_physical_count + self._data.cpu_model = cpu_model + self._data.ram_total_size_gb = ram_total_gb + self._data.gpu_count = gpu_count + self._data.gpu_model = gpu_model + + # Try to detect CUDA + self._detect_cuda() + + # Try to detect GPU driver + self._detect_gpu_driver() + + return self + + def _detect_cuda(self) -> None: + """Detect CUDA version.""" + try: + import torch + + if hasattr(torch, "version") and torch.version: + self._data.cuda_version = str(torch.version.cuda) + if hasattr(torch.backends, "cudnn") and torch.backends.cudnn.is_available(): + self._data.cudnn_version = str(torch.backends.cudnn.version()) + except ImportError: + pass + + def _detect_gpu_driver(self) -> None: + """Detect GPU driver version.""" + try: + import subprocess + + result = subprocess.run( + ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + self._data.gpu_driver_version = result.stdout.strip().split("\n")[0] + self._data.gpu_detection_method = "nvidia-smi" + + # Also get GPU memory + result = subprocess.run( + [ + "nvidia-smi", + "--query-gpu=memory.total", + "--format=csv,noheader,nounits", + ], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + mem_mb = result.stdout.strip().split("\n")[0] + self._data.gpu_memory_total_gb = float(mem_mb) / 1024 + except (FileNotFoundError, subprocess.TimeoutExpired, ValueError): + pass + + def collect_usage( + self, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + ) -> "TelemetryCollector": + """Collect usage patterns.""" + self._data.tracking_mode = tracking_mode + self._data.api_mode = api_mode + self._data.output_methods = output_methods or [] + self._data.hardware_tracked = hardware_tracked or [] + self._data.measure_power_interval_secs = measure_power_interval + + return self + + def collect_ml_ecosystem(self) -> "TelemetryCollector": + """Detect ML frameworks and libraries.""" + frameworks = [] + + # PyTorch + try: + import torch + + self._data.has_torch = True + self._data.torch_version = torch.__version__ + frameworks.append("pytorch") + except ImportError: + pass + + # Transformers + try: + import transformers + + self._data.has_transformers = True + self._data.transformers_version = transformers.__version__ + except ImportError: + pass + + # Diffusers + try: + import diffusers + + self._data.has_diffusers = True + self._data.diffusers_version = diffusers.__version__ + except ImportError: + pass + + # TensorFlow + try: + import tensorflow + + self._data.has_tensorflow = True + self._data.tensorflow_version = tensorflow.__version__ + frameworks.append("tensorflow") + except ImportError: + pass + + # Keras + try: + import keras + + self._data.has_keras = True + self._data.keras_version = keras.__version__ + except ImportError: + pass + + # PyTorch Lightning + try: + import pytorch_lightning + + self._data.has_pytorch_lightning = True + self._data.pytorch_lightning_version = pytorch_lightning.__version__ + except ImportError: + pass + + # FastAI + try: + import fastai + + self._data.has_fastai = True + self._data.fastai_version = fastai.__version__ + except ImportError: + pass + + # Primary framework + self._data.ml_framework_primary = frameworks[0] if frameworks else "" + + return self + + def collect_context(self) -> "TelemetryCollector": + """Collect development context (IDE, notebook, CI).""" + # Detect notebook + self._data.notebook_environment = self._detect_notebook() + + # Detect CI + self._data.ci_environment = self._detect_ci() + + # Detect container + self._detect_container() + + # Detect package manager + self._data.python_package_manager = self._detect_package_manager() + + return self + + def _detect_notebook(self) -> str: + """Detect notebook environment.""" + try: + # Check for Jupyter + import ipykernel + + return "jupyter" + except ImportError: + pass + + # Check environment variables common in cloud notebooks + if os.environ.get("COLAB_RELEASE_TAG"): + return "colab" + if os.environ.get("KAGGLE_URL_BASE"): + return "kaggle" + + return "none" + + def _detect_ci(self) -> str: + """Detect CI environment.""" + ci_vars = { + "GITHUB_ACTIONS": "github-actions", + "GITLAB_CI": "gitlab", + "JENKINS_URL": "jenkins", + "CIRCLECI": "circleci", + "TRAVIS": "travis", + "BUILDKITE": "buildkite", + "AWS_CODEBUILD": "codebuild", + } + + for var, name in ci_vars.items(): + if os.environ.get(var): + return name + + return "none" + + def _detect_container(self) -> None: + """Detect container runtime.""" + # Check for Docker + if os.path.exists("/.dockerenv"): + self._data.in_container = True + self._data.container_runtime = "docker" + return + + # Check for container environment variables + if os.environ.get("KUBERNETES_SERVICE_HOST"): + self._data.in_container = True + self._data.container_runtime = "kubernetes" + return + + # Check cgroup + try: + with open("/proc/1/cgroup", "r") as f: + content = f.read() + if "docker" in content or "containerd" in content: + self._data.in_container = True + self._data.container_runtime = "docker" + return + except FileNotFoundError: + pass + + self._data.in_container = False + self._data.container_runtime = "none" + + def _detect_package_manager(self) -> str: + """Detect Python package manager.""" + # Check for poetry + if os.path.exists("pyproject.toml"): + with open("pyproject.toml", "r") as f: + if "[tool.poetry]" in f.read(): + return "poetry" + + # Check for uv + if os.path.exists("uv.lock"): + return "uv" + + # Check for pipenv + if os.path.exists("Pipfile"): + return "pipenv" + + # Check for conda + if os.path.exists("environment.yml") or os.path.exists("environment.yaml"): + return "conda" + + return "pip" + + def collect_errors( + self, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + ) -> "TelemetryCollector": + """Collect error information.""" + self._data.rapl_available = rapl_available + self._data.hardware_detection_success = hardware_detection_success + self._data.errors_encountered = errors or [] + + return self + + def collect_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> "TelemetryCollector": + """Collect emissions data (Tier 2: Public).""" + self._data.total_emissions_kg = total_emissions_kg + self._data.emissions_rate_kg_per_sec = emissions_rate_kg_per_sec + self._data.energy_consumed_kwh = energy_consumed_kwh + self._data.cpu_energy_kwh = cpu_energy_kwh + self._data.gpu_energy_kwh = gpu_energy_kwh + self._data.ram_energy_kwh = ram_energy_kwh + self._data.duration_seconds = duration_seconds + self._data.cpu_utilization_avg = cpu_utilization_avg + self._data.gpu_utilization_avg = gpu_utilization_avg + self._data.ram_utilization_avg = ram_utilization_avg + + return self + + def collect_cloud_info( + self, cloud_provider: str = "", cloud_region: str = "" + ) -> "TelemetryCollector": + """Collect cloud information.""" + self._data.cloud_provider = cloud_provider + self._data.cloud_region = cloud_region + + return self + + def collect_all( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> TelemetryData: + """Collect all available telemetry data.""" + ( + self.collect_environment() + .collect_hardware( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + ) + .collect_usage( + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + ) + .collect_ml_ecosystem() + .collect_context() + .collect_errors( + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + ) + .collect_cloud_info( + cloud_provider=cloud_provider, cloud_region=cloud_region + ) + ) + + return self._data diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py new file mode 100644 index 000000000..e6db7e414 --- /dev/null +++ b/codecarbon/core/telemetry/config.py @@ -0,0 +1,175 @@ +""" +Telemetry configuration module. + +Handles the 3-tier telemetry system: +- off: No telemetry +- internal: Private telemetry (helps CodeCarbon improve) +- public: Public telemetry (shares emissions for leaderboard) +""" + +import os +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + +import appdirs + +from codecarbon.external.logger import logger + +# Environment variable name for telemetry setting +TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" + +# Environment variable for OTEL endpoint +OTEL_ENDPOINT_ENV_VAR = "CODECARBON_OTEL_ENDPOINT" + +# Default OTEL endpoint (can be configured by CodeCarbon team) +DEFAULT_OTEL_ENDPOINT = "https://otlp.example.com/v1/traces" + + +class TelemetryTier(str, Enum): + """Telemetry tiers.""" + + OFF = "off" + INTERNAL = "internal" + PUBLIC = "public" + + +@dataclass +class TelemetryConfig: + """Telemetry configuration.""" + + tier: TelemetryTier + otel_endpoint: Optional[str] + has_consent: bool + first_run: bool + + @property + def is_enabled(self) -> bool: + """Check if telemetry is enabled.""" + return self.tier != TelemetryTier.OFF + + @property + def is_public(self) -> bool: + """Check if public telemetry (emissions shared).""" + return self.tier == TelemetryTier.PUBLIC + + @property + def is_internal(self) -> bool: + """Check if internal telemetry (private).""" + return self.tier == TelemetryTier.INTERNAL + + +def get_user_config_dir() -> Path: + """Get the user config directory.""" + return Path(appdirs.user_config_dir("codecarbon", "CodeCarbon")) + + +def get_telemetry_preference_file() -> Path: + """Get the file path for storing telemetry preference.""" + return get_user_config_dir() / "telemetry_preference.txt" + + +def save_telemetry_preference(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Save user's telemetry preference.""" + config_dir = get_user_config_dir() + config_dir.mkdir(parents=True, exist_ok=True) + + pref_file = get_telemetry_preference_file() + content = f"{tier.value}\n" + if dont_ask_again: + content += "dont_ask_again\n" + pref_file.write_text(content) + logger.info(f"Saved telemetry preference: {tier.value}") + + +def load_telemetry_preference() -> Optional[tuple[TelemetryTier, bool]]: + """Load user's saved telemetry preference. + + Returns: + Tuple of (tier, dont_ask_again) or None if not set. + """ + pref_file = get_telemetry_preference_file() + if not pref_file.exists(): + return None + + try: + content = pref_file.read_text().strip() + lines = content.split("\n") + tier = TelemetryTier(lines[0]) + dont_ask_again = len(lines) > 1 and "dont_ask_again" in lines[1] + return (tier, dont_ask_again) + except (ValueError, IndexError) as e: + logger.debug(f"Could not parse telemetry preference: {e}") + return None + + +def detect_tier_from_env() -> Optional[TelemetryTier]: + """Detect telemetry tier from environment variable.""" + env_value = os.environ.get(TELEMETRY_ENV_VAR, "").lower().strip() + if not env_value: + return None + + try: + return TelemetryTier(env_value) + except ValueError: + logger.warning( + f"Invalid CODECARBON_TELEMETRY value: {env_value}. " + f"Valid values: {', '.join(t.value for t in TelemetryTier)}" + ) + return None + + +def get_otel_endpoint() -> Optional[str]: + """Get OTEL endpoint from environment or return None for default.""" + return os.environ.get(OTEL_ENDPOINT_ENV_VAR) + + +def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: + """ + Get the telemetry configuration. + + Priority order: + 1. Environment variable (CODECARBON_TELEMETRY) + 2. Saved user preference + 3. Default to internal (first run) - telemetry enabled by default + + Args: + force_first_run: Force treating this as first run (for testing) + + Returns: + TelemetryConfig object + """ + # Check environment variable first + tier = detect_tier_from_env() + if tier is not None: + return TelemetryConfig( + tier=tier, + otel_endpoint=get_otel_endpoint(), + has_consent=True, + first_run=False, + ) + + # Check saved preference + saved = load_telemetry_preference() + if saved is not None: + tier, dont_ask = saved + return TelemetryConfig( + tier=tier, + otel_endpoint=get_otel_endpoint(), + has_consent=True, + first_run=False, + ) + + # First run - default to internal (telemetry enabled by default to help CodeCarbon improve) + return TelemetryConfig( + tier=TelemetryTier.INTERNAL, + otel_endpoint=get_otel_endpoint(), + has_consent=True, + first_run=True, + ) + + +def set_telemetry_tier(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Set the telemetry tier.""" + save_telemetry_preference(tier, dont_ask_again) diff --git a/codecarbon/core/telemetry/otel_exporter.py b/codecarbon/core/telemetry/otel_exporter.py new file mode 100644 index 000000000..a223b33c8 --- /dev/null +++ b/codecarbon/core/telemetry/otel_exporter.py @@ -0,0 +1,223 @@ +""" +OpenTelemetry exporter for CodeCarbon telemetry. + +Sends telemetry data to an OTEL-compatible endpoint. +""" + +from typing import Any, Dict, Optional + +from codecarbon.core.telemetry.collector import TelemetryData +from codecarbon.core.telemetry.config import TelemetryConfig, TelemetryTier +from codecarbon.external.logger import logger + +# Try to import OpenTelemetry +try: + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP + + OTEL_AVAILABLE = True +except ImportError: + OTEL_AVAILABLE = False + logger.debug("OpenTelemetry not available, telemetry will not be exported") + + +class TelemetryExporter: + """ + Exports telemetry data via OpenTelemetry. + + Supports both gRPC and HTTP exporters. + """ + + def __init__(self, config: TelemetryConfig): + """ + Initialize the exporter. + + Args: + config: Telemetry configuration + """ + self._config = config + self._tracer = None + self._initialized = False + + if not OTEL_AVAILABLE: + logger.warning( + "OpenTelemetry not installed. " + "Install with: pip install opentelemetry-api opentelemetry-sdk " + "opentelemetry-exporter-otlp" + ) + return + + if not config.is_enabled: + logger.debug("Telemetry disabled, not initializing exporter") + return + + self._initialize() + + def _initialize(self) -> None: + """Initialize OpenTelemetry tracer.""" + if self._initialized: + return + + try: + # Set up tracer provider + provider = TracerProvider() + trace.set_tracer_provider(provider) + + # Determine endpoint + endpoint = self._config.otel_endpoint + if not endpoint: + logger.debug("No OTEL endpoint configured, skipping exporter init") + return + + # Choose HTTP or gRPC based on endpoint + if endpoint.startswith("http://") or endpoint.startswith("https://"): + # HTTP exporter + exporter = OTLPSpanExporterHTTP(endpoint=endpoint) + else: + # Default to gRPC + exporter = OTLPSpanExporter(endpoint=endpoint) + + # Add batch processor + processor = BatchSpanProcessor(exporter) + provider.add_span_processor(processor) + + # Get tracer + self._tracer = trace.get_tracer(__name__) + self._initialized = True + logger.info(f"Telemetry exporter initialized with endpoint: {endpoint}") + + except Exception as e: + logger.warning(f"Failed to initialize OpenTelemetry exporter: {e}") + self._initialized = False + + def export_telemetry( + self, + data: TelemetryData, + emissions_data: Optional[TelemetryData] = None, + ) -> bool: + """ + Export telemetry data. + + Args: + data: The telemetry data to export + emissions_data: Optional emissions data (only for public tier) + + Returns: + True if export succeeded, False otherwise + """ + if not self._initialized or not self._tracer: + logger.debug("Telemetry exporter not initialized, skipping export") + return False + + if not self._config.is_enabled: + return False + + try: + with self._tracer.start_as_current_span("codecarbon.telemetry") as span: + # Add attributes based on tier + self._add_attributes(span, data) + + # For public tier, also add emissions data + if self._config.is_public and emissions_data: + self._add_emissions_attributes(span, emissions_data) + + logger.debug("Telemetry data exported successfully") + return True + + except Exception as e: + logger.warning(f"Failed to export telemetry: {e}") + return False + + def _add_attributes(self, span, data: TelemetryData) -> None: + """Add telemetry attributes to span.""" + # Environment & Hardware (always for internal/public) + if self._config.is_internal or self._config.is_public: + span.set_attribute("codecarbon.os", data.os) + span.set_attribute("codecarbon.python_version", data.python_version) + span.set_attribute("codecarbon.python_implementation", data.python_implementation) + span.set_attribute("codecarbon.python_env_type", data.python_env_type) + span.set_attribute("codecarbon.codecarbon_version", data.codecarbon_version) + span.set_attribute("codecarbon.codecarbon_install_method", data.codecarbon_install_method) + + # Hardware + span.set_attribute("codecarbon.cpu_count", data.cpu_count) + span.set_attribute("codecarbon.cpu_physical_count", data.cpu_physical_count) + span.set_attribute("codecarbon.cpu_model", data.cpu_model) + span.set_attribute("codecarbon.cpu_architecture", data.cpu_architecture) + span.set_attribute("codecarbon.gpu_count", data.gpu_count) + span.set_attribute("codecarbon.gpu_model", data.gpu_model) + span.set_attribute("codecarbon.ram_total_gb", data.ram_total_size_gb) + + # CUDA/GPU + if data.cuda_version: + span.set_attribute("codecarbon.cuda_version", data.cuda_version) + if data.gpu_driver_version: + span.set_attribute("codecarbon.gpu_driver_version", data.gpu_driver_version) + + # Usage patterns + span.set_attribute("codecarbon.tracking_mode", data.tracking_mode) + span.set_attribute("codecarbon.api_mode", data.api_mode) + span.set_attribute("codecarbon.hardware_tracked", ",".join(data.hardware_tracked)) + span.set_attribute("codecarbon.output_methods", ",".join(data.output_methods)) + span.set_attribute("codecarbon.measure_power_interval", data.measure_power_interval_secs) + + # ML Ecosystem + span.set_attribute("codecarbon.has_torch", data.has_torch) + span.set_attribute("codecarbon.torch_version", data.torch_version or "") + span.set_attribute("codecarbon.has_transformers", data.has_transformers) + span.set_attribute("codecarbon.has_diffusers", data.has_diffusers) + span.set_attribute("codecarbon.has_tensorflow", data.has_tensorflow) + span.set_attribute("codecarbon.has_keras", data.has_keras) + span.set_attribute("codecarbon.ml_framework_primary", data.ml_framework_primary) + + # Context + span.set_attribute("codecarbon.notebook_environment", data.notebook_environment) + span.set_attribute("codecarbon.ci_environment", data.ci_environment) + span.set_attribute("codecarbon.container_runtime", data.container_runtime) + span.set_attribute("codecarbon.in_container", data.in_container) + span.set_attribute("codecarbon.python_package_manager", data.python_package_manager) + + # Performance + span.set_attribute("codecarbon.hardware_detection_success", data.hardware_detection_success) + span.set_attribute("codecarbon.rapl_available", data.rapl_available) + span.set_attribute("codecarbon.gpu_detection_method", data.gpu_detection_method) + + # Cloud + span.set_attribute("codecarbon.cloud_provider", data.cloud_provider) + span.set_attribute("codecarbon.cloud_region", data.cloud_region) + + def _add_emissions_attributes(self, span, data: TelemetryData) -> None: + """Add emissions attributes to span (public tier only).""" + # Emissions data - shared publicly + span.set_attribute("codecarbon.emissions_kg", data.total_emissions_kg) + span.set_attribute("codecarbon.emissions_rate_kg_per_sec", data.emissions_rate_kg_per_sec) + span.set_attribute("codecarbon.energy_consumed_kwh", data.energy_consumed_kwh) + span.set_attribute("codecarbon.cpu_energy_kwh", data.cpu_energy_kwh) + span.set_attribute("codecarbon.gpu_energy_kwh", data.gpu_energy_kwh) + span.set_attribute("codecarbon.ram_energy_kwh", data.ram_energy_kwh) + span.set_attribute("codecarbon.duration_seconds", data.duration_seconds) + span.set_attribute("codecarbon.cpu_utilization_avg", data.cpu_utilization_avg) + span.set_attribute("codecarbon.gpu_utilization_avg", data.gpu_utilization_avg) + span.set_attribute("codecarbon.ram_utilization_avg", data.ram_utilization_avg) + + +def create_exporter(config: TelemetryConfig) -> Optional[TelemetryExporter]: + """ + Create a telemetry exporter based on config. + + Args: + config: Telemetry configuration + + Returns: + TelemetryExporter instance or None if not available + """ + if not OTEL_AVAILABLE: + return None + + if not config.is_enabled: + return None + + return TelemetryExporter(config) diff --git a/codecarbon/core/telemetry/prompt.py b/codecarbon/core/telemetry/prompt.py new file mode 100644 index 000000000..0a61950d1 --- /dev/null +++ b/codecarbon/core/telemetry/prompt.py @@ -0,0 +1,169 @@ +""" +First-run prompt for telemetry consent. + +Shows an interactive prompt to let users choose their telemetry level. +""" + +from typing import Optional + +from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + save_telemetry_preference, +) +from codecarbon.external.logger import logger + +# Try to import rich/questionary for interactive prompts +# Falls back to simple input if not available +try: + from rich.console import Console + from rich.prompt import Prompt + + RICH_AVAILABLE = True +except ImportError: + RICH_AVAILABLE = False + +try: + import questionary + + QUESTIONARY_AVAILABLE = True +except ImportError: + QUESTIONARY_AVAILABLE = False + + +console = Console() if RICH_AVAILABLE else None + + +def prompt_for_telemetry_consent() -> Optional[TelemetryTier]: + """ + Prompt user for telemetry consent on first run. + + Returns: + The chosen TelemetryTier, or None if prompt should not be shown. + """ + config = get_telemetry_config() + + # Don't prompt if consent already given via env var or saved preference + if config.has_consent: + return config.tier + + # Check if we should prompt (first run without saved preference) + if not config.first_run: + return config.tier + + # Try interactive prompt, but don't fail if not available + if QUESTIONARY_AVAILABLE: + return _prompt_interactive_questionary() + elif RICH_AVAILABLE: + return _prompt_interactive_rich() + else: + return _prompt_simple() + + +def _prompt_interactive_questionary() -> Optional[TelemetryTier]: + """Prompt using questionary library.""" + try: + answer = questionary.select( + "📊 CodeCarbon Telemetry\n\n" + "Help improve CodeCarbon by sharing anonymous usage data?\n", + choices=[ + "Internal - Basic environment info (PRIVATE)", + "Public - Full telemetry (SHARED PUBLICLY on leaderboard)", + "Off - No telemetry", + ], + default="Internal - Basic environment info (PRIVATE)", + ).ask() + + if answer is None: + return TelemetryTier.OFF + + if "Internal" in answer: + return TelemetryTier.INTERNAL + elif "Public" in answer: + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Questionary prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_interactive_rich() -> Optional[TelemetryTier]: + """Prompt using rich library.""" + if console is None: + return TelemetryTier.OFF + + try: + console.print("\n📊 [bold]CodeCarbon Telemetry[/bold]\n") + console.print( + "Help improve CodeCarbon by sharing anonymous usage data?\n" + ) + console.print(" [1] Internal - Basic environment info (PRIVATE)") + console.print(" • Python version, OS, CPU/GPU hardware") + console.print(" • Usage patterns, ML frameworks") + console.print(" • Helps us improve the library") + console.print() + console.print(" [2] Public - Full telemetry (SHARED PUBLICLY)") + console.print(" • All of internal + emissions data") + console.print(" • Shown on public leaderboard") + console.print() + console.print(" [3] Off - No telemetry") + console.print() + + answer = Prompt.ask( + "Select option", + choices=["1", "2", "3"], + default="1", + ) + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Rich prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_simple() -> Optional[TelemetryTier]: + """Simple input-based prompt.""" + try: + print("\n📊 CodeCarbon Telemetry") + print("=" * 40) + print("Help improve CodeCarbon by sharing anonymous usage data?") + print() + print(" 1) Internal - Basic environment info (PRIVATE)") + print(" 2) Public - Full telemetry (SHARED PUBLICLY)") + print(" 3) Off - No telemetry") + print() + answer = input("Select option [1]: ").strip() or "1" + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Simple prompt failed: {e}") + return TelemetryTier.OFF + + +def prompt_and_save() -> TelemetryTier: + """ + Prompt user and save their choice. + + Returns: + The chosen TelemetryTier. + """ + tier = prompt_for_telemetry_consent() + + if tier is None: + tier = TelemetryTier.OFF + + # Save the preference (don't ask again) + save_telemetry_preference(tier, dont_ask_again=True) + + return tier diff --git a/codecarbon/core/telemetry/service.py b/codecarbon/core/telemetry/service.py new file mode 100644 index 000000000..8d0554ad1 --- /dev/null +++ b/codecarbon/core/telemetry/service.py @@ -0,0 +1,221 @@ +""" +Telemetry service - integrates telemetry with CodeCarbon. + +This module provides functions to initialize and use telemetry. +""" + +from typing import Optional + +from codecarbon.core.telemetry.collector import TelemetryCollector, TelemetryData +from codecarbon.core.telemetry.config import ( + TelemetryConfig, + TelemetryTier, + get_telemetry_config, + set_telemetry_tier, +) +from codecarbon.core.telemetry.otel_exporter import TelemetryExporter, create_exporter +from codecarbon.core.telemetry.prompt import prompt_for_telemetry_consent +from codecarbon.external.logger import logger + + +class TelemetryService: + """Service for managing telemetry.""" + + _instance: Optional["TelemetryService"] = None + _initialized: bool = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if self._initialized: + return + self._config: Optional[TelemetryConfig] = None + self._exporter: Optional[TelemetryExporter] = None + self._collector: Optional[TelemetryCollector] = None + self._initialized = True + + def initialize(self, force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry service. + + Args: + force_prompt: Force showing the consent prompt + + Returns: + TelemetryConfig + """ + # Get configuration + self._config = get_telemetry_config() + + # If first run and not forced, try to prompt + if self._config.first_run and not self._config.has_consent: + if force_prompt: + # This will show prompt if needed + pass + + # Create exporter if enabled + if self._config.is_enabled: + self._exporter = create_exporter(self._config) + self._collector = TelemetryCollector() + + logger.info( + f"Telemetry initialized: tier={self._config.tier.value}, " + f"enabled={self._config.is_enabled}" + ) + + return self._config + + def get_config(self) -> Optional[TelemetryConfig]: + """Get current telemetry config.""" + return self._config + + def collect_and_export( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> bool: + """ + Collect and export telemetry data. + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_enabled: + return False + + if not self._collector or not self._exporter: + return False + + try: + # Collect data + data = self._collector.collect_all( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + cloud_provider=cloud_provider, + cloud_region=cloud_region, + ) + + # Export + return self._exporter.export_telemetry(data) + + except Exception as e: + logger.warning(f"Failed to collect/export telemetry: {e}") + return False + + def export_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> bool: + """ + Export emissions data (only for public tier). + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_public: + return False + + if not self._collector or not self._exporter: + return False + + try: + # Collect emissions data + data = TelemetryData() + data.collect_emissions( + total_emissions_kg=total_emissions_kg, + emissions_rate_kg_per_sec=emissions_rate_kg_per_sec, + energy_consumed_kwh=energy_consumed_kwh, + cpu_energy_kwh=cpu_energy_kwh, + gpu_energy_kwh=gpu_energy_kwh, + ram_energy_kwh=ram_energy_kwh, + duration_seconds=duration_seconds, + cpu_utilization_avg=cpu_utilization_avg, + gpu_utilization_avg=gpu_utilization_avg, + ram_utilization_avg=ram_utilization_avg, + ) + + # Export + return self._exporter.export_telemetry(data) + + except Exception as e: + logger.warning(f"Failed to export emissions telemetry: {e}") + return False + + +# Global instance +_telemetry_service: Optional[TelemetryService] = None + + +def get_telemetry_service() -> TelemetryService: + """Get the global telemetry service instance.""" + global _telemetry_service + if _telemetry_service is None: + _telemetry_service = TelemetryService() + return _telemetry_service + + +def init_telemetry(force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry. + + Args: + force_prompt: Force showing consent prompt + + Returns: + TelemetryConfig + """ + service = get_telemetry_service() + return service.initialize(force_prompt=force_prompt) + + +def set_telemetry(tier: str, dont_ask_again: bool = True) -> None: + """ + Set telemetry tier programmatically. + + Args: + tier: "off", "internal", or "public" + dont_ask_again: Don't ask again in future + """ + try: + tier_enum = TelemetryTier(tier) + set_telemetry_tier(tier_enum, dont_ask_again=dont_ask_again) + except ValueError: + logger.warning(f"Invalid telemetry tier: {tier}") diff --git a/docs/telemetry.md b/docs/telemetry.md new file mode 100644 index 000000000..f1c1b3dc5 --- /dev/null +++ b/docs/telemetry.md @@ -0,0 +1,112 @@ +# Telemetry + +CodeCarbon collects anonymous usage data to help improve the library. This page explains what we collect, how we handle your data, and how you can control it. + +## Telemetry Tiers + +CodeCarbon supports three telemetry levels: + +| Tier | Env Variable | Description | +|------|-------------|-------------| +| Off | `CODECARBON_TELEMETRY=off` | No telemetry collected | +| Internal | `CODECARBON_TELEMETRY=internal` | Private usage data (helps us improve CodeCarbon) | +| Public | `CODECARBON_TELEMETRY=public` | Full telemetry including emissions (shared on public leaderboard) | + +## What We Collect + +### Internal (Private) + +When you enable Internal telemetry, we collect: + +- **Environment**: Python version, OS, CodeCarbon version, installation method +- **Hardware**: CPU model/count, GPU model/count, RAM, CUDA version +- **Usage Patterns**: Tracking mode, output methods configured, hardware tracked +- **ML Ecosystem**: Detected frameworks (PyTorch, TensorFlow, Transformers, etc.) +- **Context**: Notebook environment, CI/CD detection, container runtime +- **Performance**: Hardware detection success, RAPL availability, errors + +### Public (Leaderboard) + +When you enable Public telemetry, everything above **plus**: + +- **Emissions Data**: Total CO2 emissions, energy consumed, duration +- **Utilization**: CPU, GPU, RAM utilization averages + +This data is shared publicly on the CodeCarbon leaderboard to encourage green computing practices. + +## Privacy + +We're committed to protecting your privacy: + +- **No PII**: We don't collect personally identifiable information +- **Anonymized**: Machine identifiers are hashed +- **GPS Precision**: Geographic coordinates are rounded to ~10km +- **GDPR Compliant**: We support opt-in consent and data deletion requests +- **Minimal Data**: We only collect what's needed to improve the library + +## Configuration + +### Environment Variables + +```bash +# Set telemetry tier +export CODECARBON_TELEMETRY=internal + +# Set custom OTEL endpoint (optional) +export CODECARBON_OTEL_ENDPOINT=https://your-otel-endpoint.com/v1/traces +``` + +### In Code + +```python +from codecarbon import EmissionsTracker + +# Telemetry can also be set in the tracker +tracker = EmissionsTracker( + project_name="my-project", + telemetry="internal" # or "public" or "off" +) +``` + +## First-Run Prompt + +On first run, CodeCarbon will prompt you to choose your telemetry level if: + +- No `CODECARBON_TELEMETRY` environment variable is set +- No previous preference was saved + +You can skip the prompt by setting the environment variable before running CodeCarbon. + +## Disabling Telemetry + +To completely disable telemetry: + +```bash +export CODECARBON_TELEMETRY=off +``` + +Or in your code: + +```python +tracker = EmissionsTracker(telemetry="off") +``` + +## OTEL Integration + +Telemetry data is sent via OpenTelemetry (OTEL). To use your own OTEL collector: + +```bash +export CODECARBON_OTEL_ENDPOINT=https://your-collector:4318/v1/traces +``` + +Install the OTEL extras if you want to export telemetry: + +```bash +pip install codecarbon[telemetry] +``` + +## Data Retention + +- Internal telemetry: Retained for 12 months +- Public leaderboard data: Displayed indefinitely +- You can request data deletion by contacting the CodeCarbon team diff --git a/mkdocs.yml b/mkdocs.yml index 4517f6b98..c6d4923e8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,4 +128,5 @@ nav: - Output: logging/output.md - Collecting emissions to a logger: logging/to_logger.md - Visualize: logging/visualize.md + - Telemetry: telemetry.md - Track GenAI API Calls (EcoLogits) ↗: https://ecologits.ai/latest/?utm_source=codecarbon&utm_medium=docs diff --git a/pyproject.toml b/pyproject.toml index 518acb7ed..d5a4169f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ + "appdirs", "arrow", "authlib>=1.2.1", "click", @@ -42,6 +43,13 @@ dependencies = [ "typer", ] +[project.optional-dependencies] +telemetry = [ + "opentelemetry-api", + "opentelemetry-sdk", + "opentelemetry-exporter-otlp", +] + [tool.setuptools.dynamic] version = {attr = "codecarbon._version.__version__"} From 5fcde060a657d1db6dfc1e350f76dee0547f3c98 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Tue, 17 Mar 2026 20:00:03 +0100 Subject: [PATCH 2/3] feat(telemetry): enhance telemetry configuration and management - Added functions to save and load telemetry configuration from a file. - Introduced a command-line interface for managing telemetry settings, including setup and current configuration display. - Implemented logging for telemetry configuration warnings at the end of each run. - Enhanced the API client to send telemetry data to the new endpoint. These changes improve user experience by providing better management of telemetry settings and ensuring proper configuration is communicated effectively. --- codecarbon/cli/cli_utils.py | 78 +++++++++++++++++++++++ codecarbon/cli/main.py | 95 +++++++++++++++++++++++++++++ codecarbon/core/api_client.py | 29 +++++++++ codecarbon/core/telemetry/config.py | 48 ++++++++++++--- codecarbon/emissions_tracker.py | 30 +++++++++ 5 files changed, 273 insertions(+), 7 deletions(-) diff --git a/codecarbon/cli/cli_utils.py b/codecarbon/cli/cli_utils.py index 4f3daf4c6..c578b58d6 100644 --- a/codecarbon/cli/cli_utils.py +++ b/codecarbon/cli/cli_utils.py @@ -108,3 +108,81 @@ def create_new_config_file(): f.write("[codecarbon]\n") typer.echo(f"Config file created at {file_path}") return file_path + + +def save_telemetry_config_to_file( + tier: str = None, + project_token: str = None, + api_endpoint: str = None, + path: Path = None +) -> None: + """ + Save telemetry configuration as JSON in the existing config file. + + Args: + tier: Telemetry tier (off, internal, public) + project_token: Project token for Tier 2 + api_endpoint: API endpoint for telemetry + path: Path to config file (defaults to ~/.codecarbon.config) + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + # Read existing config or create new + config = configparser.ConfigParser() + if p.exists(): + config.read(str(p)) + + if "codecarbon" not in config.sections(): + config.add_section("codecarbon") + + # Build JSON config for telemetry + telemetry_config = {} + if tier: + telemetry_config["telemetry_tier"] = tier + if project_token: + telemetry_config["telemetry_project_token"] = project_token + if api_endpoint: + telemetry_config["telemetry_api_endpoint"] = api_endpoint + + # Save as JSON string + if telemetry_config: + config["codecarbon"]["telemetry"] = json.dumps(telemetry_config) + + with p.open("w") as f: + config.write(f) + logger.info(f"Telemetry config saved to {p}") + + +def load_telemetry_config_from_file(path: Path = None) -> dict: + """ + Load telemetry configuration from the existing config file. + + Args: + path: Path to config file (defaults to ~/.codecarbon.config) + + Returns: + Dictionary with telemetry configuration + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + if not p.exists(): + return {} + + config = configparser.ConfigParser() + config.read(str(p)) + + if "codecarbon" not in config.sections(): + return {} + + telemetry_str = config["codecarbon"].get("telemetry") + if telemetry_str: + try: + return json.loads(telemetry_str) + except json.JSONDecodeError: + return {} + + return {} diff --git a/codecarbon/cli/main.py b/codecarbon/cli/main.py index 7fd097b45..2c279ef65 100644 --- a/codecarbon/cli/main.py +++ b/codecarbon/cli/main.py @@ -436,5 +436,100 @@ def questionary_prompt(prompt, list_options, default): return value +@codecarbon.command("telemetry", short_help="Configure CodeCarbon telemetry") +def telemetry(): + """ + Manage CodeCarbon telemetry settings. + + Use subcommands to configure or view telemetry settings: + - codecarbon telemetry setup : Interactive setup wizard + - codecarbon telemetry config : Show current configuration + - codecarbon telemetry enable : Enable telemetry + """ + print("CodeCarbon Telemetry") + print("Use --help for more information on subcommands.") + + +@telemetry.command("setup", short_help="Interactive telemetry setup wizard") +def telemetry_setup(): + """ + Interactive wizard to configure CodeCarbon telemetry. + """ + from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + set_telemetry_tier, + ) + from codecarbon.core.telemetry.config import ( + TELEMETRY_PROJECT_TOKEN_ENV_VAR, + TELEMETRY_API_ENDPOINT_ENV_VAR, + TELEMETRY_ENV_VAR, + ) + + print("\n=== CodeCarbon Telemetry Setup ===\n") + + # Show current config + config = get_telemetry_config() + print(f"Current tier: {config.tier.value}") + print(f"Current project token: {'set' if config.project_token else 'not set'}") + print(f"Current API endpoint: {config.api_endpoint or 'default'}") + + # Ask for tier + print("\nChoose telemetry tier:") + tier_choice = questionary.select( + "Telemetry tier:", + ["off", "internal", "public"], + default=config.tier.value, + ).ask() + + # Save tier preference + set_telemetry_tier(TelemetryTier(tier_choice), dont_ask_again=True) + print(f"\nTelemetry tier set to: {tier_choice}") + + # Ask for project token if public tier + if tier_choice == "public": + project_token = typer.prompt( + f"Project token (from {TELEMETRY_PROJECT_TOKEN_ENV_VAR} env var)", + default=config.project_token or "", + ) + if project_token: + print(f"\nTo enable Tier 2 (public) telemetry, set:") + print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}={project_token}") + + # Ask for API endpoint + api_endpoint = typer.prompt( + f"API endpoint (default: https://api.codecarbon.io)", + default=config.api_endpoint or "https://api.codecarbon.io", + ) + if api_endpoint and api_endpoint != "https://api.codecarbon.io": + print(f"\nTo use custom API endpoint, set:") + print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + + print("\n=== Setup Complete ===") + print("\nEnvironment variables to configure:") + print(f" export {TELEMETRY_ENV_VAR}={tier_choice}") + if tier_choice == "public" and project_token: + print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=") + print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + + +@telemetry.command("config", short_help="Show current telemetry configuration") +def telemetry_config(): + """ + Display current telemetry configuration. + """ + from codecarbon.core.telemetry.config import get_telemetry_config + + config = get_telemetry_config() + + print("\n=== Current Telemetry Configuration ===\n") + print(f"Tier: {config.tier.value}") + print(f"Enabled: {config.is_enabled}") + print(f"Project Token: {'configured' if config.project_token else 'not configured'}") + print(f"API Endpoint: {config.api_endpoint or 'default (https://api.codecarbon.io)'}") + print(f"First Run: {config.first_run}") + print(f"Has Consent: {config.has_consent}") + + if __name__ == "__main__": main() diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 34067c71c..62495e738 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -353,6 +353,35 @@ def close_experiment(self): Tell the API that the experiment has ended. """ + def add_telemetry(self, telemetry_data: dict, api_key: str = None) -> bool: + """ + Send telemetry data to the /telemetry endpoint (Tier 1). + + Args: + telemetry_data: Dictionary containing telemetry payload + api_key: Optional API key for authentication + + Returns: + True if successful, False otherwise + """ + try: + url = self.url + "/telemetry" + headers = self._get_headers() + + # Use provided api_key or fall back to instance api_key + if api_key: + headers["x-api-token"] = api_key + + r = requests.post(url=url, json=telemetry_data, timeout=5, headers=headers) + if r.status_code not in (200, 201): + self._log_error(url, telemetry_data, r) + return False + logger.debug(f"Telemetry data sent successfully to {url}") + return True + except Exception as e: + logger.error(f"Failed to send telemetry data: {e}") + return False + class simple_utc(tzinfo): def tzname(self, **kwargs): diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py index e6db7e414..d130bf8cd 100644 --- a/codecarbon/core/telemetry/config.py +++ b/codecarbon/core/telemetry/config.py @@ -5,8 +5,12 @@ - off: No telemetry - internal: Private telemetry (helps CodeCarbon improve) - public: Public telemetry (shares emissions for leaderboard) + +For Tier 1 (internal): POST to /telemetry endpoint +For Tier 2 (public): Uses core public API with project token """ +import json import os from dataclasses import dataclass from enum import Enum @@ -20,11 +24,17 @@ # Environment variable name for telemetry setting TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" -# Environment variable for OTEL endpoint +# Environment variable for project token (Tier 2 / public) +TELEMETRY_PROJECT_TOKEN_ENV_VAR = "CODECARBON_TELEMETRY_PROJECT_TOKEN" + +# Environment variable for API endpoint +TELEMETRY_API_ENDPOINT_ENV_VAR = "CODECARBON_TELEMETRY_API_ENDPOINT" + +# Legacy OTEL endpoint env var (for backward compatibility) OTEL_ENDPOINT_ENV_VAR = "CODECARBON_OTEL_ENDPOINT" -# Default OTEL endpoint (can be configured by CodeCarbon team) -DEFAULT_OTEL_ENDPOINT = "https://otlp.example.com/v1/traces" +# Default API endpoint +DEFAULT_API_ENDPOINT = "https://api.codecarbon.io" class TelemetryTier(str, Enum): @@ -40,9 +50,12 @@ class TelemetryConfig: """Telemetry configuration.""" tier: TelemetryTier - otel_endpoint: Optional[str] + project_token: Optional[str] + api_endpoint: Optional[str] has_consent: bool first_run: bool + # Legacy OTEL support (still used for internal tier) + otel_endpoint: Optional[str] = None @property def is_enabled(self) -> bool: @@ -125,6 +138,16 @@ def get_otel_endpoint() -> Optional[str]: return os.environ.get(OTEL_ENDPOINT_ENV_VAR) +def get_telemetry_project_token() -> Optional[str]: + """Get telemetry project token from environment.""" + return os.environ.get(TELEMETRY_PROJECT_TOKEN_ENV_VAR) + + +def get_telemetry_api_endpoint() -> Optional[str]: + """Get telemetry API endpoint from environment.""" + return os.environ.get(TELEMETRY_API_ENDPOINT_ENV_VAR) + + def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: """ Get the telemetry configuration. @@ -140,12 +163,19 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: Returns: TelemetryConfig object """ + # Get common config values + project_token = get_telemetry_project_token() + api_endpoint = get_telemetry_api_endpoint() + otel_endpoint = get_otel_endpoint() + # Check environment variable first tier = detect_tier_from_env() if tier is not None: return TelemetryConfig( tier=tier, - otel_endpoint=get_otel_endpoint(), + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, has_consent=True, first_run=False, ) @@ -156,7 +186,9 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: tier, dont_ask = saved return TelemetryConfig( tier=tier, - otel_endpoint=get_otel_endpoint(), + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, has_consent=True, first_run=False, ) @@ -164,7 +196,9 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: # First run - default to internal (telemetry enabled by default to help CodeCarbon improve) return TelemetryConfig( tier=TelemetryTier.INTERNAL, - otel_endpoint=get_otel_endpoint(), + project_token=project_token, + api_endpoint=api_endpoint, + otel_endpoint=otel_endpoint, has_consent=True, first_run=True, ) diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index a070ea56c..64b00adb5 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -756,8 +756,38 @@ def stop(self) -> Optional[float]: for handler in self._output_handlers: handler.exit() + # Log telemetry configuration warning + self._log_telemetry_warning() + return emissions_data.emissions + def _log_telemetry_warning(self) -> None: + """ + Log a warning about telemetry configuration at the end of each run. + """ + from codecarbon.core.telemetry.config import ( + TELEMETRY_ENV_VAR, + TELEMETRY_PROJECT_TOKEN_ENV_VAR, + get_telemetry_config, + ) + + config = get_telemetry_config() + + if not config.is_enabled: + logger.warning( + f"Telemetry is disabled. To enable, run: codecarbon telemetry setup\n" + f"Or set environment variable: export {TELEMETRY_ENV_VAR}=internal" + ) + elif config.is_public and not config.project_token: + logger.warning( + f"Telemetry is set to 'public' but no project token is configured.\n" + f"To configure Tier 2 (public) telemetry, run: codecarbon telemetry setup\n" + f"Or set: export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=" + ) + elif config.is_enabled and not config.first_run: + # Telemetry is properly configured + logger.debug(f"Telemetry enabled: tier={config.tier.value}") + def _persist_data( self, total_emissions: EmissionsData, From fe19f7c0d6869884e31258afea029d8d4884e1c1 Mon Sep 17 00:00:00 2001 From: davidberenstein1957 Date: Sun, 5 Apr 2026 08:34:17 +0200 Subject: [PATCH 3/3] feat(telemetry): implement telemetry data sending and configuration updates - Removed the optional dependencies section for telemetry in `pyproject.toml`. - Added a new `TELEMETRY_README.md` file detailing the telemetry feature, including setup instructions and API specifications. - Enhanced the `BaseEmissionsTracker` to send telemetry data upon completion of emissions tracking. - Introduced functions for sending Tier-1 and public Tier-2 telemetry data via HTTP. - Updated CLI commands for managing telemetry settings, including interactive setup and configuration display. These changes improve the telemetry system by providing comprehensive documentation and enabling data collection for better insights into usage patterns. --- codecarbon/cli/cli_utils.py | 48 ++-- codecarbon/cli/main.py | 151 ++++++++----- codecarbon/core/api_client.py | 28 +++ codecarbon/core/telemetry/config.py | 177 ++++++++++++--- codecarbon/core/telemetry/http_sender.py | 55 +++++ codecarbon/core/telemetry/otel_exporter.py | 223 ------------------- codecarbon/core/telemetry/service.py | 83 ++++--- codecarbon/emissions_tracker.py | 71 +++++- docs/telemetry.md | 127 +++++------ pyproject.toml | 7 - tests/conftest.py | 12 + tests/test_telemetry_http.py | 242 +++++++++++++++++++++ uv.lock | 11 + 13 files changed, 792 insertions(+), 443 deletions(-) create mode 100644 codecarbon/core/telemetry/http_sender.py delete mode 100644 codecarbon/core/telemetry/otel_exporter.py create mode 100644 tests/conftest.py create mode 100644 tests/test_telemetry_http.py diff --git a/codecarbon/cli/cli_utils.py b/codecarbon/cli/cli_utils.py index c578b58d6..f4fed42b3 100644 --- a/codecarbon/cli/cli_utils.py +++ b/codecarbon/cli/cli_utils.py @@ -5,6 +5,8 @@ import typer from rich.prompt import Confirm +from codecarbon.external.logger import logger + def get_config(path: Optional[Path] = None): p = path or Path.cwd().resolve() / ".codecarbon.config" @@ -113,43 +115,51 @@ def create_new_config_file(): def save_telemetry_config_to_file( tier: str = None, project_token: str = None, - api_endpoint: str = None, - path: Path = None + telemetry_api_endpoint: str = None, + telemetry_api_key: str = None, + path: Path = None, ) -> None: """ Save telemetry configuration as JSON in the existing config file. - + Args: tier: Telemetry tier (off, internal, public) - project_token: Project token for Tier 2 - api_endpoint: API endpoint for telemetry + project_token: Telemetry auth token (stored as ``telemetry_api_key`` and legacy + ``telemetry_project_token`` in JSON) + telemetry_api_endpoint: Base URL for telemetry HTTP (optional) + telemetry_api_key: Telemetry auth token (optional; overrides ``project_token`` if both set) path: Path to config file (defaults to ~/.codecarbon.config) """ import json - + p = path or Path.home() / ".codecarbon.config" - - # Read existing config or create new + config = configparser.ConfigParser() if p.exists(): config.read(str(p)) - + if "codecarbon" not in config.sections(): config.add_section("codecarbon") - - # Build JSON config for telemetry - telemetry_config = {} + + existing = load_telemetry_config_from_file(p) + telemetry_config = dict(existing) if existing else {} if tier: telemetry_config["telemetry_tier"] = tier - if project_token: - telemetry_config["telemetry_project_token"] = project_token - if api_endpoint: - telemetry_config["telemetry_api_endpoint"] = api_endpoint - - # Save as JSON string + token = telemetry_api_key or project_token + if token: + t = str(token).strip() + telemetry_config["telemetry_api_key"] = t + telemetry_config["telemetry_project_token"] = t + if telemetry_api_endpoint is not None: + te = str(telemetry_api_endpoint).strip().rstrip("/") + if te: + telemetry_config["telemetry_api_endpoint"] = te + else: + telemetry_config.pop("telemetry_api_endpoint", None) + if telemetry_config: config["codecarbon"]["telemetry"] = json.dumps(telemetry_config) - + with p.open("w") as f: config.write(f) logger.info(f"Telemetry config saved to {p}") diff --git a/codecarbon/cli/main.py b/codecarbon/cli/main.py index 2c279ef65..1d8986a6b 100644 --- a/codecarbon/cli/main.py +++ b/codecarbon/cli/main.py @@ -32,6 +32,7 @@ DEFAULT_ORGANIzATION_ID = "e60afa92-17b7-4720-91a0-1ae91e409ba1" codecarbon = typer.Typer(no_args_is_help=True) +telemetry_app = typer.Typer(no_args_is_help=True) def main(): @@ -436,84 +437,122 @@ def questionary_prompt(prompt, list_options, default): return value -@codecarbon.command("telemetry", short_help="Configure CodeCarbon telemetry") -def telemetry(): - """ - Manage CodeCarbon telemetry settings. - - Use subcommands to configure or view telemetry settings: - - codecarbon telemetry setup : Interactive setup wizard - - codecarbon telemetry config : Show current configuration - - codecarbon telemetry enable : Enable telemetry - """ - print("CodeCarbon Telemetry") - print("Use --help for more information on subcommands.") +def _get_project_token() -> Optional[str]: + """Resolve telemetry auth token (env, JSON, preference, default constant).""" + from codecarbon.core.telemetry.config import get_telemetry_auth_token + return get_telemetry_auth_token() -@telemetry.command("setup", short_help="Interactive telemetry setup wizard") -def telemetry_setup(): + +@telemetry_app.command("setup", short_help="Interactive telemetry setup wizard") +def telemetry_setup( + token: Annotated[ + Optional[str], + typer.Option( + "--token", + "-t", + help="Telemetry API key for public tier (same as CODECARBON_TELEMETRY_API_KEY)", + ) + ] = None, + tier: Annotated[ + Optional[str], + typer.Option("--tier", help="Telemetry tier: off, internal, or public") + ] = None, +): """ Interactive wizard to configure CodeCarbon telemetry. + + Examples: + # Interactive mode + codecarbon telemetry setup + + # Non-interactive mode with options + codecarbon telemetry setup --tier public --token YOUR_TOKEN + + This command automatically saves configuration to ~/.codecarbon.config + and writes environment variables to your shell config. """ from codecarbon.core.telemetry.config import ( + TELEMETRY_ENV_VAR, TelemetryTier, get_telemetry_config, + save_telemetry_project_token, set_telemetry_tier, ) - from codecarbon.core.telemetry.config import ( - TELEMETRY_PROJECT_TOKEN_ENV_VAR, - TELEMETRY_API_ENDPOINT_ENV_VAR, - TELEMETRY_ENV_VAR, - ) print("\n=== CodeCarbon Telemetry Setup ===\n") # Show current config config = get_telemetry_config() print(f"Current tier: {config.tier.value}") - print(f"Current project token: {'set' if config.project_token else 'not set'}") + print( + f"Current telemetry API key: {'set' if config.project_token else 'not set'}" + ) print(f"Current API endpoint: {config.api_endpoint or 'default'}") - # Ask for tier - print("\nChoose telemetry tier:") - tier_choice = questionary.select( - "Telemetry tier:", - ["off", "internal", "public"], - default=config.tier.value, - ).ask() + # Determine tier (use provided value or prompt) + if tier is not None: + try: + tier_choice = TelemetryTier(tier).value + except ValueError: + print(f"[red]Invalid tier: {tier}. Valid values: off, internal, public[/red]") + raise typer.Exit(1) + else: + print("\nChoose telemetry tier:") + tier_choice = questionary.select( + "Telemetry tier:", + ["off", "internal", "public"], + default=config.tier.value, + ).ask() - # Save tier preference + # Save tier preference to file set_telemetry_tier(TelemetryTier(tier_choice), dont_ask_again=True) print(f"\nTelemetry tier set to: {tier_choice}") - # Ask for project token if public tier - if tier_choice == "public": + # Get project token (priority: CLI option > env var > config file) + project_token = token or _get_project_token() + if tier_choice == "public" and not project_token: project_token = typer.prompt( - f"Project token (from {TELEMETRY_PROJECT_TOKEN_ENV_VAR} env var)", - default=config.project_token or "", + "Telemetry API key (CODECARBON_TELEMETRY_API_KEY; not your dashboard api_key)", + default="", ) - if project_token: - print(f"\nTo enable Tier 2 (public) telemetry, set:") - print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}={project_token}") - - # Ask for API endpoint - api_endpoint = typer.prompt( - f"API endpoint (default: https://api.codecarbon.io)", - default=config.api_endpoint or "https://api.codecarbon.io", - ) - if api_endpoint and api_endpoint != "https://api.codecarbon.io": - print(f"\nTo use custom API endpoint, set:") - print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") - print("\n=== Setup Complete ===") - print("\nEnvironment variables to configure:") - print(f" export {TELEMETRY_ENV_VAR}={tier_choice}") + # Save project token to JSON config file (so it persists without env vars) if tier_choice == "public" and project_token: - print(f" export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=") - print(f" export {TELEMETRY_API_ENDPOINT_ENV_VAR}={api_endpoint}") + save_telemetry_project_token(project_token) + print("[green]Telemetry API key saved to config file[/green]") + + # Write to shell rc file automatically + shell_rc_path = Path.home() / ".zshrc" + if not shell_rc_path.exists(): + shell_rc_path = Path.home() / ".bashrc" + + # Read existing content + existing_content = "" + if shell_rc_path.exists(): + existing_content = shell_rc_path.read_text() + + env_vars = { + TELEMETRY_ENV_VAR: tier_choice, + } + purge_markers = (*env_vars.keys(), "CODECARBON_TELEMETRY_PROJECT_TOKEN") + new_lines = [] + for line in existing_content.split("\n"): + if not any(marker in line for marker in purge_markers): + new_lines.append(line) + + # Add new environment variables + for var_name, var_value in env_vars.items(): + new_lines.append(f'export {var_name}="{var_value}"') + + # Write back + shell_rc_path.write_text("\n".join(new_lines) + "\n") + print(f"\n[green]Environment variables written to {shell_rc_path}[/green]") + print(f"[yellow]Run 'source {shell_rc_path}' or restart your terminal to apply[/yellow]") + print("\n[green]Setup complete! Configuration saved.[/green]") -@telemetry.command("config", short_help="Show current telemetry configuration") +@telemetry_app.command("config", short_help="Show current telemetry configuration") def telemetry_config(): """ Display current telemetry configuration. @@ -525,11 +564,19 @@ def telemetry_config(): print("\n=== Current Telemetry Configuration ===\n") print(f"Tier: {config.tier.value}") print(f"Enabled: {config.is_enabled}") - print(f"Project Token: {'configured' if config.project_token else 'not configured'}") - print(f"API Endpoint: {config.api_endpoint or 'default (https://api.codecarbon.io)'}") + print( + f"Telemetry API key: {'configured' if config.project_token else 'not configured'}" + ) + print( + f"Telemetry base URL: {config.api_endpoint or 'default (https://api.codecarbon.io)'}" + ) print(f"First Run: {config.first_run}") print(f"Has Consent: {config.has_consent}") +# Register telemetry as a subcommand of codecarbon +codecarbon.add_typer(telemetry_app, name="telemetry") + + if __name__ == "__main__": main() diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 62495e738..9f7a56a44 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -353,6 +353,34 @@ def close_experiment(self): Tell the API that the experiment has ended. """ + def add_public_emissions(self, payload: dict, project_token: str) -> bool: + """ + Send public-tier emissions payload to POST /emissions (flat JSON, project token). + + Args: + payload: JSON-serializable body (e.g. utilization and energy fields). + project_token: Project token sent as x-api-token. + + Returns: + True if the server accepted the request (HTTP 200 or 201). + """ + if not project_token: + logger.warning("add_public_emissions: missing project_token") + return False + try: + url = self.url + "/emissions" + headers = self._get_headers() + headers["x-api-token"] = project_token + r = requests.post(url=url, json=payload, timeout=5, headers=headers) + if r.status_code not in (200, 201): + self._log_error(url, payload, r) + return False + logger.debug(f"Public emissions telemetry sent successfully to {url}") + return True + except Exception as e: + logger.error(f"Failed to send public emissions telemetry: {e}") + return False + def add_telemetry(self, telemetry_data: dict, api_key: str = None) -> bool: """ Send telemetry data to the /telemetry endpoint (Tier 1). diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py index d130bf8cd..a342dfa85 100644 --- a/codecarbon/core/telemetry/config.py +++ b/codecarbon/core/telemetry/config.py @@ -6,11 +6,14 @@ - internal: Private telemetry (helps CodeCarbon improve) - public: Public telemetry (shares emissions for leaderboard) -For Tier 1 (internal): POST to /telemetry endpoint -For Tier 2 (public): Uses core public API with project token +For Tier 1 (internal): POST to /telemetry endpoint. +For Tier 2 (public): POST /emissions uses a telemetry-only auth chain (env, JSON, preference, +``DEFAULT_PUBLIC_TELEMETRY_TOKEN``); hierarchical ``api_key`` is for dashboard/API logging only. + +Telemetry base URL: env ``CODECARBON_TELEMETRY_API_ENDPOINT``, JSON ``telemetry_api_endpoint``, +then hierarchical ``api_endpoint`` (default ``https://api.codecarbon.io``). """ -import json import os from dataclasses import dataclass from enum import Enum @@ -24,18 +27,15 @@ # Environment variable name for telemetry setting TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" -# Environment variable for project token (Tier 2 / public) -TELEMETRY_PROJECT_TOKEN_ENV_VAR = "CODECARBON_TELEMETRY_PROJECT_TOKEN" - -# Environment variable for API endpoint TELEMETRY_API_ENDPOINT_ENV_VAR = "CODECARBON_TELEMETRY_API_ENDPOINT" +TELEMETRY_API_KEY_ENV_VAR = "CODECARBON_TELEMETRY_API_KEY" -# Legacy OTEL endpoint env var (for backward compatibility) -OTEL_ENDPOINT_ENV_VAR = "CODECARBON_OTEL_ENDPOINT" - -# Default API endpoint +# Default API base URL when hierarchical config has no api_endpoint (same default as EmissionsTracker) DEFAULT_API_ENDPOINT = "https://api.codecarbon.io" +# Shared ingest token for public-tier POST /emissions when no user-specific telemetry token is set. +DEFAULT_PUBLIC_TELEMETRY_TOKEN: str = "" + class TelemetryTier(str, Enum): """Telemetry tiers.""" @@ -47,15 +47,18 @@ class TelemetryTier(str, Enum): @dataclass class TelemetryConfig: - """Telemetry configuration.""" + """Telemetry configuration. + + Attributes: + project_token: Resolved value for telemetry ``x-api-token`` when set; independent of + dashboard ``api_key`` / ``CODECARBON_API_KEY``. + """ tier: TelemetryTier project_token: Optional[str] api_endpoint: Optional[str] has_consent: bool first_run: bool - # Legacy OTEL support (still used for internal tier) - otel_endpoint: Optional[str] = None @property def is_enabled(self) -> bool: @@ -133,19 +136,145 @@ def detect_tier_from_env() -> Optional[TelemetryTier]: return None -def get_otel_endpoint() -> Optional[str]: - """Get OTEL endpoint from environment or return None for default.""" - return os.environ.get(OTEL_ENDPOINT_ENV_VAR) +def _hierarchical_config_dict() -> dict: + """Load hierarchical CodeCarbon config (indirection for tests).""" + from codecarbon.core.config import get_hierarchical_config + + return get_hierarchical_config() + + +def get_telemetry_auth_token() -> Optional[str]: + """Resolve ``x-api-token`` for telemetry ``POST /telemetry`` (optional) and ``POST /emissions``. + + Order: + 1. Environment variable ``CODECARBON_TELEMETRY_API_KEY`` + 2. ``telemetry_api_key`` or ``telemetry_project_token`` in ``[codecarbon]`` telemetry JSON + 3. ``project_token=`` line in telemetry preference file (legacy) + 4. :data:`DEFAULT_PUBLIC_TELEMETRY_TOKEN` when non-empty + + Hierarchical ``api_key`` / ``CODECARBON_API_KEY`` is not consulted (dashboard only). + + Returns: + Token string or None if nothing is configured. + """ + env_val = os.environ.get(TELEMETRY_API_KEY_ENV_VAR, "").strip() + if env_val: + return env_val + + try: + from codecarbon.cli.cli_utils import load_telemetry_config_from_file + + json_config = load_telemetry_config_from_file() + if json_config: + for key in ("telemetry_api_key", "telemetry_project_token"): + raw = json_config.get(key) + if raw: + s = str(raw).strip() + if s: + return s + except Exception: + pass + + pref_file = get_telemetry_preference_file() + if pref_file.exists(): + try: + content = pref_file.read_text() + lines = content.split("\n") + for line in lines[2:]: + if line.startswith("project_token="): + s = line.split("=", 1)[1].strip() + if s: + return s + except Exception as e: + logger.debug(f"Could not parse telemetry project token: {e}") + + if DEFAULT_PUBLIC_TELEMETRY_TOKEN.strip(): + return DEFAULT_PUBLIC_TELEMETRY_TOKEN.strip() + + return None + + +def get_public_telemetry_auth_token() -> Optional[str]: + """Alias for :func:`get_telemetry_auth_token`.""" + return get_telemetry_auth_token() def get_telemetry_project_token() -> Optional[str]: - """Get telemetry project token from environment.""" - return os.environ.get(TELEMETRY_PROJECT_TOKEN_ENV_VAR) + """Deprecated name; use :func:`get_telemetry_auth_token`.""" + return get_telemetry_auth_token() + + +def save_telemetry_project_token(token: str) -> None: + """Save telemetry project token to JSON config file.""" + try: + from codecarbon.cli.cli_utils import save_telemetry_config_to_file + save_telemetry_config_to_file(project_token=token) + logger.info("Saved telemetry project token to JSON config") + except Exception as e: + logger.warning(f"Failed to save to JSON config: {e}, using legacy format") + # Fallback to legacy text format + pref_file = get_telemetry_preference_file() + existing_content = "" + if pref_file.exists(): + existing_content = pref_file.read_text() + + lines = existing_content.split("\n") + new_lines = [] + found_token = False + for line in lines: + if line.startswith("project_token="): + new_lines.append(f"project_token={token}") + found_token = True + else: + new_lines.append(line) + + if not found_token: + new_lines.append(f"project_token={token}") + + pref_file.write_text("\n".join(new_lines)) + logger.info("Saved telemetry project token") def get_telemetry_api_endpoint() -> Optional[str]: - """Get telemetry API endpoint from environment.""" - return os.environ.get(TELEMETRY_API_ENDPOINT_ENV_VAR) + """Resolve telemetry HTTP base URL (no trailing slash in return value). + + Order: + 1. Environment variable ``CODECARBON_TELEMETRY_API_ENDPOINT`` + 2. ``telemetry_api_endpoint`` in ``[codecarbon]`` telemetry JSON + 3. Hierarchical ``api_endpoint`` (same as tracker / dashboard default host) + """ + env_val = os.environ.get(TELEMETRY_API_ENDPOINT_ENV_VAR, "").strip() + if env_val: + return env_val.rstrip("/") + + try: + from codecarbon.cli.cli_utils import load_telemetry_config_from_file + + json_config = load_telemetry_config_from_file() + raw = (json_config or {}).get("telemetry_api_endpoint") + if raw: + s = str(raw).strip().rstrip("/") + if s: + return s + except Exception: + pass + + try: + conf = _hierarchical_config_dict() + raw = conf.get("api_endpoint") + if raw: + s = str(raw).strip().rstrip("/") + if s: + return s + except Exception: + pass + return None + + +def resolve_telemetry_base_url(api_endpoint: Optional[str]) -> str: + """Return normalized base URL for telemetry HTTP requests (no trailing slash).""" + base = (api_endpoint or DEFAULT_API_ENDPOINT).strip() + return base.rstrip("/") def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: @@ -164,9 +293,8 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: TelemetryConfig object """ # Get common config values - project_token = get_telemetry_project_token() + project_token = get_telemetry_auth_token() api_endpoint = get_telemetry_api_endpoint() - otel_endpoint = get_otel_endpoint() # Check environment variable first tier = detect_tier_from_env() @@ -175,7 +303,6 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: tier=tier, project_token=project_token, api_endpoint=api_endpoint, - otel_endpoint=otel_endpoint, has_consent=True, first_run=False, ) @@ -188,7 +315,6 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: tier=tier, project_token=project_token, api_endpoint=api_endpoint, - otel_endpoint=otel_endpoint, has_consent=True, first_run=False, ) @@ -198,7 +324,6 @@ def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: tier=TelemetryTier.INTERNAL, project_token=project_token, api_endpoint=api_endpoint, - otel_endpoint=otel_endpoint, has_consent=True, first_run=True, ) diff --git a/codecarbon/core/telemetry/http_sender.py b/codecarbon/core/telemetry/http_sender.py new file mode 100644 index 000000000..1870fa59c --- /dev/null +++ b/codecarbon/core/telemetry/http_sender.py @@ -0,0 +1,55 @@ +"""HTTP payload helpers for telemetry (Tier 1 vs public emissions).""" + +from typing import Any, Dict + +from codecarbon.core.telemetry.collector import TelemetryData +from codecarbon.core.telemetry.config import TelemetryTier + +_TIER1_EXCLUDE_KEYS: frozenset[str] = frozenset( + { + "total_emissions_kg", + "emissions_rate_kg_per_sec", + "energy_consumed_kwh", + "cpu_energy_kwh", + "gpu_energy_kwh", + "ram_energy_kwh", + "duration_seconds", + "cpu_utilization_avg", + "gpu_utilization_avg", + "ram_utilization_avg", + } +) + +def tier1_telemetry_body(data: TelemetryData, tier: TelemetryTier) -> Dict[str, Any]: + """Build POST /telemetry JSON body: Tier-1 fields only, plus telemetry_tier.""" + raw = data.to_dict() + body = {k: v for k, v in raw.items() if k not in _TIER1_EXCLUDE_KEYS} + body["telemetry_tier"] = tier.value + return body + + +def public_emissions_body( + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, +) -> Dict[str, Any]: + """Build flat POST /emissions JSON body (public tier).""" + return { + "total_emissions_kg": total_emissions_kg, + "emissions_rate_kg_per_sec": emissions_rate_kg_per_sec, + "energy_consumed_kwh": energy_consumed_kwh, + "cpu_energy_kwh": cpu_energy_kwh, + "gpu_energy_kwh": gpu_energy_kwh, + "ram_energy_kwh": ram_energy_kwh, + "duration_seconds": duration_seconds, + "cpu_utilization_avg": cpu_utilization_avg, + "gpu_utilization_avg": gpu_utilization_avg, + "ram_utilization_avg": ram_utilization_avg, + } diff --git a/codecarbon/core/telemetry/otel_exporter.py b/codecarbon/core/telemetry/otel_exporter.py deleted file mode 100644 index a223b33c8..000000000 --- a/codecarbon/core/telemetry/otel_exporter.py +++ /dev/null @@ -1,223 +0,0 @@ -""" -OpenTelemetry exporter for CodeCarbon telemetry. - -Sends telemetry data to an OTEL-compatible endpoint. -""" - -from typing import Any, Dict, Optional - -from codecarbon.core.telemetry.collector import TelemetryData -from codecarbon.core.telemetry.config import TelemetryConfig, TelemetryTier -from codecarbon.external.logger import logger - -# Try to import OpenTelemetry -try: - from opentelemetry import trace - from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import BatchSpanProcessor - from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter - from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as OTLPSpanExporterHTTP - - OTEL_AVAILABLE = True -except ImportError: - OTEL_AVAILABLE = False - logger.debug("OpenTelemetry not available, telemetry will not be exported") - - -class TelemetryExporter: - """ - Exports telemetry data via OpenTelemetry. - - Supports both gRPC and HTTP exporters. - """ - - def __init__(self, config: TelemetryConfig): - """ - Initialize the exporter. - - Args: - config: Telemetry configuration - """ - self._config = config - self._tracer = None - self._initialized = False - - if not OTEL_AVAILABLE: - logger.warning( - "OpenTelemetry not installed. " - "Install with: pip install opentelemetry-api opentelemetry-sdk " - "opentelemetry-exporter-otlp" - ) - return - - if not config.is_enabled: - logger.debug("Telemetry disabled, not initializing exporter") - return - - self._initialize() - - def _initialize(self) -> None: - """Initialize OpenTelemetry tracer.""" - if self._initialized: - return - - try: - # Set up tracer provider - provider = TracerProvider() - trace.set_tracer_provider(provider) - - # Determine endpoint - endpoint = self._config.otel_endpoint - if not endpoint: - logger.debug("No OTEL endpoint configured, skipping exporter init") - return - - # Choose HTTP or gRPC based on endpoint - if endpoint.startswith("http://") or endpoint.startswith("https://"): - # HTTP exporter - exporter = OTLPSpanExporterHTTP(endpoint=endpoint) - else: - # Default to gRPC - exporter = OTLPSpanExporter(endpoint=endpoint) - - # Add batch processor - processor = BatchSpanProcessor(exporter) - provider.add_span_processor(processor) - - # Get tracer - self._tracer = trace.get_tracer(__name__) - self._initialized = True - logger.info(f"Telemetry exporter initialized with endpoint: {endpoint}") - - except Exception as e: - logger.warning(f"Failed to initialize OpenTelemetry exporter: {e}") - self._initialized = False - - def export_telemetry( - self, - data: TelemetryData, - emissions_data: Optional[TelemetryData] = None, - ) -> bool: - """ - Export telemetry data. - - Args: - data: The telemetry data to export - emissions_data: Optional emissions data (only for public tier) - - Returns: - True if export succeeded, False otherwise - """ - if not self._initialized or not self._tracer: - logger.debug("Telemetry exporter not initialized, skipping export") - return False - - if not self._config.is_enabled: - return False - - try: - with self._tracer.start_as_current_span("codecarbon.telemetry") as span: - # Add attributes based on tier - self._add_attributes(span, data) - - # For public tier, also add emissions data - if self._config.is_public and emissions_data: - self._add_emissions_attributes(span, emissions_data) - - logger.debug("Telemetry data exported successfully") - return True - - except Exception as e: - logger.warning(f"Failed to export telemetry: {e}") - return False - - def _add_attributes(self, span, data: TelemetryData) -> None: - """Add telemetry attributes to span.""" - # Environment & Hardware (always for internal/public) - if self._config.is_internal or self._config.is_public: - span.set_attribute("codecarbon.os", data.os) - span.set_attribute("codecarbon.python_version", data.python_version) - span.set_attribute("codecarbon.python_implementation", data.python_implementation) - span.set_attribute("codecarbon.python_env_type", data.python_env_type) - span.set_attribute("codecarbon.codecarbon_version", data.codecarbon_version) - span.set_attribute("codecarbon.codecarbon_install_method", data.codecarbon_install_method) - - # Hardware - span.set_attribute("codecarbon.cpu_count", data.cpu_count) - span.set_attribute("codecarbon.cpu_physical_count", data.cpu_physical_count) - span.set_attribute("codecarbon.cpu_model", data.cpu_model) - span.set_attribute("codecarbon.cpu_architecture", data.cpu_architecture) - span.set_attribute("codecarbon.gpu_count", data.gpu_count) - span.set_attribute("codecarbon.gpu_model", data.gpu_model) - span.set_attribute("codecarbon.ram_total_gb", data.ram_total_size_gb) - - # CUDA/GPU - if data.cuda_version: - span.set_attribute("codecarbon.cuda_version", data.cuda_version) - if data.gpu_driver_version: - span.set_attribute("codecarbon.gpu_driver_version", data.gpu_driver_version) - - # Usage patterns - span.set_attribute("codecarbon.tracking_mode", data.tracking_mode) - span.set_attribute("codecarbon.api_mode", data.api_mode) - span.set_attribute("codecarbon.hardware_tracked", ",".join(data.hardware_tracked)) - span.set_attribute("codecarbon.output_methods", ",".join(data.output_methods)) - span.set_attribute("codecarbon.measure_power_interval", data.measure_power_interval_secs) - - # ML Ecosystem - span.set_attribute("codecarbon.has_torch", data.has_torch) - span.set_attribute("codecarbon.torch_version", data.torch_version or "") - span.set_attribute("codecarbon.has_transformers", data.has_transformers) - span.set_attribute("codecarbon.has_diffusers", data.has_diffusers) - span.set_attribute("codecarbon.has_tensorflow", data.has_tensorflow) - span.set_attribute("codecarbon.has_keras", data.has_keras) - span.set_attribute("codecarbon.ml_framework_primary", data.ml_framework_primary) - - # Context - span.set_attribute("codecarbon.notebook_environment", data.notebook_environment) - span.set_attribute("codecarbon.ci_environment", data.ci_environment) - span.set_attribute("codecarbon.container_runtime", data.container_runtime) - span.set_attribute("codecarbon.in_container", data.in_container) - span.set_attribute("codecarbon.python_package_manager", data.python_package_manager) - - # Performance - span.set_attribute("codecarbon.hardware_detection_success", data.hardware_detection_success) - span.set_attribute("codecarbon.rapl_available", data.rapl_available) - span.set_attribute("codecarbon.gpu_detection_method", data.gpu_detection_method) - - # Cloud - span.set_attribute("codecarbon.cloud_provider", data.cloud_provider) - span.set_attribute("codecarbon.cloud_region", data.cloud_region) - - def _add_emissions_attributes(self, span, data: TelemetryData) -> None: - """Add emissions attributes to span (public tier only).""" - # Emissions data - shared publicly - span.set_attribute("codecarbon.emissions_kg", data.total_emissions_kg) - span.set_attribute("codecarbon.emissions_rate_kg_per_sec", data.emissions_rate_kg_per_sec) - span.set_attribute("codecarbon.energy_consumed_kwh", data.energy_consumed_kwh) - span.set_attribute("codecarbon.cpu_energy_kwh", data.cpu_energy_kwh) - span.set_attribute("codecarbon.gpu_energy_kwh", data.gpu_energy_kwh) - span.set_attribute("codecarbon.ram_energy_kwh", data.ram_energy_kwh) - span.set_attribute("codecarbon.duration_seconds", data.duration_seconds) - span.set_attribute("codecarbon.cpu_utilization_avg", data.cpu_utilization_avg) - span.set_attribute("codecarbon.gpu_utilization_avg", data.gpu_utilization_avg) - span.set_attribute("codecarbon.ram_utilization_avg", data.ram_utilization_avg) - - -def create_exporter(config: TelemetryConfig) -> Optional[TelemetryExporter]: - """ - Create a telemetry exporter based on config. - - Args: - config: Telemetry configuration - - Returns: - TelemetryExporter instance or None if not available - """ - if not OTEL_AVAILABLE: - return None - - if not config.is_enabled: - return None - - return TelemetryExporter(config) diff --git a/codecarbon/core/telemetry/service.py b/codecarbon/core/telemetry/service.py index 8d0554ad1..7b5cb07f4 100644 --- a/codecarbon/core/telemetry/service.py +++ b/codecarbon/core/telemetry/service.py @@ -6,14 +6,19 @@ from typing import Optional -from codecarbon.core.telemetry.collector import TelemetryCollector, TelemetryData +from codecarbon.core.api_client import ApiClient +from codecarbon.core.telemetry.collector import TelemetryCollector from codecarbon.core.telemetry.config import ( TelemetryConfig, TelemetryTier, get_telemetry_config, + resolve_telemetry_base_url, set_telemetry_tier, ) -from codecarbon.core.telemetry.otel_exporter import TelemetryExporter, create_exporter +from codecarbon.core.telemetry.http_sender import ( + public_emissions_body, + tier1_telemetry_body, +) from codecarbon.core.telemetry.prompt import prompt_for_telemetry_consent from codecarbon.external.logger import logger @@ -33,33 +38,39 @@ def __init__(self): if self._initialized: return self._config: Optional[TelemetryConfig] = None - self._exporter: Optional[TelemetryExporter] = None + self._api_client: Optional[ApiClient] = None self._collector: Optional[TelemetryCollector] = None self._initialized = True def initialize(self, force_prompt: bool = False) -> TelemetryConfig: """ Initialize telemetry service. - + Args: force_prompt: Force showing the consent prompt - + Returns: TelemetryConfig """ - # Get configuration self._config = get_telemetry_config() - # If first run and not forced, try to prompt - if self._config.first_run and not self._config.has_consent: - if force_prompt: - # This will show prompt if needed - pass + if force_prompt and self._config.first_run: + prompt_for_telemetry_consent() + self._config = get_telemetry_config() - # Create exporter if enabled if self._config.is_enabled: - self._exporter = create_exporter(self._config) + base = resolve_telemetry_base_url(self._config.api_endpoint) + telemetry_key = self._config.project_token or None + self._api_client = ApiClient( + endpoint_url=base, + experiment_id=None, + api_key=telemetry_key, + create_run_automatically=False, + ) self._collector = TelemetryCollector() + else: + self._api_client = None + self._collector = None logger.info( f"Telemetry initialized: tier={self._config.tier.value}, " @@ -82,29 +93,28 @@ def collect_and_export( ram_total_gb: float = 0.0, tracking_mode: str = "machine", api_mode: str = "online", - output_methods: list = None, - hardware_tracked: list = None, + output_methods: Optional[list] = None, + hardware_tracked: Optional[list] = None, measure_power_interval: float = 15.0, rapl_available: bool = False, hardware_detection_success: bool = True, - errors: list = None, + errors: Optional[list] = None, cloud_provider: str = "", cloud_region: str = "", ) -> bool: """ - Collect and export telemetry data. - + Collect Tier-1 telemetry and POST to /telemetry. + Returns: True if successful, False otherwise """ if not self._config or not self._config.is_enabled: return False - if not self._collector or not self._exporter: + if not self._collector or not self._api_client: return False try: - # Collect data data = self._collector.collect_all( cpu_count=cpu_count, cpu_physical_count=cpu_physical_count, @@ -124,8 +134,8 @@ def collect_and_export( cloud_region=cloud_region, ) - # Export - return self._exporter.export_telemetry(data) + body = tier1_telemetry_body(data, self._config.tier) + return self._api_client.add_telemetry(body) except Exception as e: logger.warning(f"Failed to collect/export telemetry: {e}") @@ -145,21 +155,25 @@ def export_emissions( ram_utilization_avg: float = 0.0, ) -> bool: """ - Export emissions data (only for public tier). - + Export emissions data via POST /emissions (public tier only). + Returns: True if successful, False otherwise """ if not self._config or not self._config.is_public: return False - if not self._collector or not self._exporter: + if not self._config.project_token or not self._api_client: + return False + + if duration_seconds < 1: + logger.debug( + "Telemetry public emissions skipped: duration < 1 second" + ) return False try: - # Collect emissions data - data = TelemetryData() - data.collect_emissions( + payload = public_emissions_body( total_emissions_kg=total_emissions_kg, emissions_rate_kg_per_sec=emissions_rate_kg_per_sec, energy_consumed_kwh=energy_consumed_kwh, @@ -171,16 +185,15 @@ def export_emissions( gpu_utilization_avg=gpu_utilization_avg, ram_utilization_avg=ram_utilization_avg, ) - - # Export - return self._exporter.export_telemetry(data) + return self._api_client.add_public_emissions( + payload, self._config.project_token + ) except Exception as e: logger.warning(f"Failed to export emissions telemetry: {e}") return False -# Global instance _telemetry_service: Optional[TelemetryService] = None @@ -195,10 +208,10 @@ def get_telemetry_service() -> TelemetryService: def init_telemetry(force_prompt: bool = False) -> TelemetryConfig: """ Initialize telemetry. - + Args: force_prompt: Force showing consent prompt - + Returns: TelemetryConfig """ @@ -209,7 +222,7 @@ def init_telemetry(force_prompt: bool = False) -> TelemetryConfig: def set_telemetry(tier: str, dont_ask_again: bool = True) -> None: """ Set telemetry tier programmatically. - + Args: tier: "off", "internal", or "public" dont_ask_again: Don't ask again in future diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 64b00adb5..963871f5b 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -756,23 +756,73 @@ def stop(self) -> Optional[float]: for handler in self._output_handlers: handler.exit() + self._send_telemetry(emissions_data) + # Log telemetry configuration warning self._log_telemetry_warning() return emissions_data.emissions + @suppress(Exception) + def _send_telemetry(self, emissions_data: EmissionsData) -> None: + """Send Tier-1 (and public Tier-2) telemetry via HTTP when enabled.""" + from codecarbon.core.telemetry import get_telemetry_service, init_telemetry + + init_telemetry() + svc = get_telemetry_service() + cfg = svc.get_config() + if not cfg or not cfg.is_enabled: + return + + hi = self.get_detected_hardware() + cloud: CloudMetadata = self._get_cloud_metadata() + api_mode = "online" if self._electricitymaps_api_token else "offline" + output_methods = [type(h).__name__ for h in self._output_handlers] + rapl_available = any( + getattr(h, "_mode", None) == "intel_rapl" for h in self._hardware + ) + + svc.collect_and_export( + cpu_count=int(hi.get("cpu_count") or 0), + cpu_physical_count=int(hi.get("cpu_physical_count") or 0), + cpu_model=str(hi.get("cpu_model") or ""), + gpu_count=int(hi.get("gpu_count") or 0), + gpu_model=str(hi.get("gpu_model") or ""), + ram_total_gb=float(hi.get("ram_total_size") or 0.0), + tracking_mode=str(self._tracking_mode), + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=list(self._conf.get("hardware") or []), + measure_power_interval=float(self._measure_power_secs), + rapl_available=rapl_available, + hardware_detection_success=True, + errors=[], + cloud_provider=str(cloud.provider or ""), + cloud_region=str(cloud.region or ""), + ) + + if cfg.is_public and cfg.project_token: + svc.export_emissions( + total_emissions_kg=emissions_data.emissions, + emissions_rate_kg_per_sec=emissions_data.emissions_rate, + energy_consumed_kwh=emissions_data.energy_consumed, + cpu_energy_kwh=emissions_data.cpu_energy, + gpu_energy_kwh=emissions_data.gpu_energy, + ram_energy_kwh=emissions_data.ram_energy, + duration_seconds=float(emissions_data.duration), + cpu_utilization_avg=emissions_data.cpu_utilization_percent, + gpu_utilization_avg=emissions_data.gpu_utilization_percent, + ram_utilization_avg=emissions_data.ram_utilization_percent, + ) + def _log_telemetry_warning(self) -> None: """ Log a warning about telemetry configuration at the end of each run. """ - from codecarbon.core.telemetry.config import ( - TELEMETRY_ENV_VAR, - TELEMETRY_PROJECT_TOKEN_ENV_VAR, - get_telemetry_config, - ) - + from codecarbon.core.telemetry.config import TELEMETRY_ENV_VAR, get_telemetry_config + config = get_telemetry_config() - + if not config.is_enabled: logger.warning( f"Telemetry is disabled. To enable, run: codecarbon telemetry setup\n" @@ -780,9 +830,10 @@ def _log_telemetry_warning(self) -> None: ) elif config.is_public and not config.project_token: logger.warning( - f"Telemetry is set to 'public' but no project token is configured.\n" - f"To configure Tier 2 (public) telemetry, run: codecarbon telemetry setup\n" - f"Or set: export {TELEMETRY_PROJECT_TOKEN_ENV_VAR}=" + "Telemetry is set to 'public' but no telemetry auth token is available for " + "POST /emissions.\n" + "Set CODECARBON_TELEMETRY_API_KEY, add telemetry_api_key in the telemetry JSON in " + ".codecarbon.config, or run: codecarbon telemetry setup" ) elif config.is_enabled and not config.first_run: # Telemetry is properly configured diff --git a/docs/telemetry.md b/docs/telemetry.md index f1c1b3dc5..ceb171a1a 100644 --- a/docs/telemetry.md +++ b/docs/telemetry.md @@ -1,112 +1,97 @@ # Telemetry -CodeCarbon collects anonymous usage data to help improve the library. This page explains what we collect, how we handle your data, and how you can control it. +CodeCarbon can send **anonymous usage and diagnostics** over HTTPS to help maintainers improve the library. Optionally, you can opt in to **public** sharing of **run-level emissions summaries** (for example for leaderboards). This page explains the three tiers, **what** is collected in each case, **why**, and how to control it. -## Telemetry Tiers +Telemetry HTTP uses its own **base URL resolution**: `CODECARBON_TELEMETRY_API_ENDPOINT`, optional JSON `telemetry_api_endpoint` in the `[codecarbon]` telemetry blob, then the same hierarchical `api_endpoint` / `CODECARBON_API_ENDPOINT` as the rest of CodeCarbon (default `https://api.codecarbon.io`). **Dashboard** uploads (`save_to_api`, `CodeCarbonAPIOutput`) still use only `api_endpoint` + `api_key`; you can point telemetry at a different host in the same process. -CodeCarbon supports three telemetry levels: +## Telemetry tiers -| Tier | Env Variable | Description | -|------|-------------|-------------| -| Off | `CODECARBON_TELEMETRY=off` | No telemetry collected | -| Internal | `CODECARBON_TELEMETRY=internal` | Private usage data (helps us improve CodeCarbon) | -| Public | `CODECARBON_TELEMETRY=public` | Full telemetry including emissions (shared on public leaderboard) | +| Tier | How to choose it | What is sent | Why | +|------|-------------------|--------------|-----| +| **Off** | `CODECARBON_TELEMETRY=off`, CLI setup, or saved preference | **Nothing** over the network for telemetry | You do not want CodeCarbon to phone home with usage statistics or emissions summaries. | +| **Internal** | Default when no preference exists, or `CODECARBON_TELEMETRY=internal`, or CLI | After each `EmissionsTracker.stop()`, one **`POST /telemetry`** with **environment, hardware, usage, and library diagnostics** (no per-run CO₂ totals on this request) | Helps the team understand real-world setups (OS, GPUs, frameworks, tracking modes), spot breakage patterns, and prioritise improvements—without publishing your emissions. | +| **Public** | `CODECARBON_TELEMETRY=public`, or CLI | Same **`POST /telemetry`** as internal **plus**, when configured, a second **`POST /emissions`** with **energy, emissions, duration, and utilization averages** for that run (skipped if the run is shorter than one second) | Lets you contribute **aggregated run outcomes** for transparency and leaderboards, alongside the same diagnostic bundle as internal tier. | -## What We Collect +The client adds a field **`telemetry_tier`** (`internal` or `public`) on `/telemetry` so the server knows the user’s choice. -### Internal (Private) +## What we collect by tier -When you enable Internal telemetry, we collect: +### Off -- **Environment**: Python version, OS, CodeCarbon version, installation method -- **Hardware**: CPU model/count, GPU model/count, RAM, CUDA version -- **Usage Patterns**: Tracking mode, output methods configured, hardware tracked -- **ML Ecosystem**: Detected frameworks (PyTorch, TensorFlow, Transformers, etc.) -- **Context**: Notebook environment, CI/CD detection, container runtime -- **Performance**: Hardware detection success, RAPL availability, errors +- **No telemetry HTTP requests.** Local tracking (CSV, logs, your own API key flows) behaves as you configure it separately. -### Public (Leaderboard) +### Internal — `POST /telemetry` only -When you enable Public telemetry, everything above **plus**: +**Goal:** Improve CodeCarbon for everyone without exposing your experiment’s carbon results. -- **Emissions Data**: Total CO2 emissions, energy consumed, duration -- **Utilization**: CPU, GPU, RAM utilization averages +Typical categories in the payload (exact keys may evolve with the library): -This data is shared publicly on the CodeCarbon leaderboard to encourage green computing practices. +- **Environment:** Python version, OS, CodeCarbon version, how Python/CodeCarbon appear to be installed (heuristic). +- **Hardware:** CPU/GPU model and counts, RAM size, CUDA/cuDNN when detectable—not your hostname or raw serial numbers. +- **How you use CodeCarbon:** Tracking mode (`machine` / `process`), which output backends are enabled (file, logger, API, …), power measurement interval, which hardware types are tracked. +- **ML stack (import-based):** Whether common frameworks (e.g. PyTorch, TensorFlow, Transformers) are present and their versions, to prioritise integrations. +- **Context heuristics:** e.g. notebook vs script, CI hints, container hints, optional **cloud provider / region** strings when your tracker knows them (same metadata you already use for emission factors). +- **Diagnostics:** Whether RAPL or certain GPU paths worked, whether hardware detection succeeded, optional non-sensitive error snippets to debug widespread failures. -## Privacy +**Not sent on `/telemetry`:** Per-run **kg CO₂**, **kWh**, **duration**, or **utilization averages** (those belong on the separate emissions payload for public tier only). -We're committed to protecting your privacy: +### Public — `POST /telemetry` and optionally `POST /emissions` -- **No PII**: We don't collect personally identifiable information -- **Anonymized**: Machine identifiers are hashed -- **GPS Precision**: Geographic coordinates are rounded to ~10km -- **GDPR Compliant**: We support opt-in consent and data deletion requests -- **Minimal Data**: We only collect what's needed to improve the library +- **Everything internal sends on `/telemetry`** (same reasons: product quality and compatibility). +- **Additionally**, when you have configured a **telemetry auth token** (`CODECARBON_TELEMETRY_API_KEY`, or `telemetry_api_key` / legacy `telemetry_project_token` in the telemetry JSON, etc.), a **second request** sends a **flat summary** of that run: total emissions, energy by component where available, duration, and CPU/GPU/RAM utilization averages. -## Configuration +**Why a second request:** Keeps **usage/diagnostics** and **publishable run metrics** separated so internal analytics can stay minimal while public/leaderboard flows can validate and store emissions-shaped records. -### Environment Variables +## Privacy and data minimisation -```bash -# Set telemetry tier -export CODECARBON_TELEMETRY=internal +- **No deliberate collection of personal identifiers** (name, email, etc.) in the telemetry payloads described above. +- **Some fields are pseudonymous or coarse by design** (e.g. a short hash of the Python executable path, coarse cloud region strings rather than precise GPS in telemetry). +- **You control the tier** via environment variable, CLI, or saved preference. +- **Public emissions** use the **telemetry** token chain, not your dashboard `api_key` / `CODECARBON_API_KEY`; treat telemetry tokens like any other secret. +- For **retention, deletion, and legal requests**, follow the policies of the **API operator** hosting the telemetry base URL (and separately the dashboard API host if you use it). -# Set custom OTEL endpoint (optional) -export CODECARBON_OTEL_ENDPOINT=https://your-otel-endpoint.com/v1/traces -``` +## Configuration -### In Code +### Environment variable (tier) -```python -from codecarbon import EmissionsTracker - -# Telemetry can also be set in the tracker -tracker = EmissionsTracker( - project_name="my-project", - telemetry="internal" # or "public" or "off" -) +```bash +export CODECARBON_TELEMETRY=internal # or public, or off ``` -## First-Run Prompt - -On first run, CodeCarbon will prompt you to choose your telemetry level if: - -- No `CODECARBON_TELEMETRY` environment variable is set -- No previous preference was saved +### Telemetry base URL and auth (separate from dashboard) -You can skip the prompt by setting the environment variable before running CodeCarbon. +- **`CODECARBON_TELEMETRY_API_ENDPOINT`** (optional) — overrides where `/telemetry` and `/emissions` are sent; otherwise JSON `telemetry_api_endpoint`, then **`api_endpoint` / `CODECARBON_API_ENDPOINT`**. +- **`CODECARBON_TELEMETRY_API_KEY`** (or telemetry JSON keys) — required for **`POST /emissions`** in public tier when you want emissions uploaded. **Not** the same as **`api_key` / `CODECARBON_API_KEY`**, which are only for dashboard / `save_to_api` logging. -## Disabling Telemetry - -To completely disable telemetry: +### CLI ```bash -export CODECARBON_TELEMETRY=off +codecarbon telemetry setup # interactive +codecarbon telemetry config # show effective tier and whether a token is available ``` -Or in your code: +### In Python (tier) + +Tier is **not** a constructor argument on `EmissionsTracker`. Set the environment variable before import/run, use `codecarbon telemetry setup`, or use the public helpers: ```python -tracker = EmissionsTracker(telemetry="off") +from codecarbon import set_telemetry + +set_telemetry("internal", dont_ask_again=True) ``` -## OTEL Integration +## When data is sent -Telemetry data is sent via OpenTelemetry (OTEL). To use your own OTEL collector: +Telemetry runs **once per completed tracker session**, when **`EmissionsTracker.stop()`** (or equivalent base implementation) finishes flushing outputs—not continuously while your job runs. -```bash -export CODECARBON_OTEL_ENDPOINT=https://your-collector:4318/v1/traces -``` - -Install the OTEL extras if you want to export telemetry: +## Disabling telemetry ```bash -pip install codecarbon[telemetry] +export CODECARBON_TELEMETRY=off ``` -## Data Retention +Or use `codecarbon telemetry setup` and choose **off**, or call `set_telemetry("off", dont_ask_again=True)` early in your process. + +## Further reading (developers) -- Internal telemetry: Retained for 12 months -- Public leaderboard data: Displayed indefinitely -- You can request data deletion by contacting the CodeCarbon team +For the exact HTTP contract, payload exclusions, and backend implementation checklist, see **`TELEMETRY_README.md`** at the root of the CodeCarbon source repository (next to the `docs/` folder). That file is aimed at contributors and API implementers; it is not part of the built docs site. diff --git a/pyproject.toml b/pyproject.toml index d5a4169f9..f72cce994 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,13 +43,6 @@ dependencies = [ "typer", ] -[project.optional-dependencies] -telemetry = [ - "opentelemetry-api", - "opentelemetry-sdk", - "opentelemetry-exporter-otlp", -] - [tool.setuptools.dynamic] version = {attr = "codecarbon._version.__version__"} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..e44e0d4f3 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,12 @@ +"""Shared pytest configuration.""" + +import pytest + + +@pytest.fixture(autouse=True) +def isolate_telemetry_env(monkeypatch: pytest.MonkeyPatch) -> None: + """Avoid accidental HTTP telemetry during tests from developer/CI env.""" + monkeypatch.setenv("CODECARBON_TELEMETRY", "off") + monkeypatch.delenv("CODECARBON_TELEMETRY_PROJECT_TOKEN", raising=False) + monkeypatch.delenv("CODECARBON_TELEMETRY_API_KEY", raising=False) + monkeypatch.delenv("CODECARBON_TELEMETRY_API_ENDPOINT", raising=False) diff --git a/tests/test_telemetry_http.py b/tests/test_telemetry_http.py new file mode 100644 index 000000000..1c838c574 --- /dev/null +++ b/tests/test_telemetry_http.py @@ -0,0 +1,242 @@ +"""Tests for HTTP-based telemetry (POST /telemetry and POST /emissions).""" + +import json +from pathlib import Path + +import pytest +import responses + +import codecarbon.core.telemetry.service as telemetry_service_module +from codecarbon.core.telemetry import config as telemetry_config_module +from codecarbon.core.telemetry.config import ( + TELEMETRY_API_ENDPOINT_ENV_VAR, + TELEMETRY_API_KEY_ENV_VAR, + TELEMETRY_ENV_VAR, +) +from codecarbon.core.telemetry.service import TelemetryService, init_telemetry + + +@pytest.fixture +def reset_telemetry_service(): + telemetry_service_module._telemetry_service = None + TelemetryService._instance = None + yield + telemetry_service_module._telemetry_service = None + TelemetryService._instance = None + + +@pytest.fixture +def telemetry_internal_env(monkeypatch, reset_telemetry_service): + monkeypatch.setenv(TELEMETRY_ENV_VAR, "internal") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + + +@pytest.fixture +def telemetry_public_env(monkeypatch, reset_telemetry_service): + monkeypatch.delenv("CODECARBON_API_KEY", raising=False) + monkeypatch.setenv(TELEMETRY_ENV_VAR, "public") + monkeypatch.setenv(TELEMETRY_API_KEY_ENV_VAR, "test-project-token") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + monkeypatch.setattr( + telemetry_config_module, + "DEFAULT_PUBLIC_TELEMETRY_TOKEN", + "", + ) + monkeypatch.setattr( + "codecarbon.cli.cli_utils.load_telemetry_config_from_file", + lambda path=None: {}, + ) + monkeypatch.setattr( + "codecarbon.core.telemetry.config.load_telemetry_preference", + lambda: None, + ) + monkeypatch.setattr( + telemetry_config_module, + "get_telemetry_preference_file", + lambda: Path("/nonexistent/codecarbon-telemetry-pref.txt"), + ) + + +@responses.activate +def test_internal_posts_telemetry_without_emission_keys( + telemetry_internal_env, +): + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + ok = svc.collect_and_export(cpu_count=4, cpu_model="TestCPU", gpu_count=0) + + assert ok is True + assert len(responses.calls) == 1 + body = json.loads(responses.calls[0].request.body) + assert body.get("telemetry_tier") == "internal" + assert "total_emissions_kg" not in body + assert body.get("cpu_count") == 4 + assert body.get("cpu_model") == "TestCPU" + + +@responses.activate +def test_public_posts_telemetry_and_emissions(telemetry_public_env): + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + responses.add( + responses.POST, + "https://telemetry.test/emissions", + json={}, + status=200, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + + assert svc.collect_and_export(cpu_count=2, cpu_model="x") + assert svc.export_emissions( + total_emissions_kg=0.01, + emissions_rate_kg_per_sec=1e-4, + energy_consumed_kwh=0.5, + cpu_energy_kwh=0.3, + gpu_energy_kwh=0.1, + ram_energy_kwh=0.1, + duration_seconds=60.0, + cpu_utilization_avg=12.5, + gpu_utilization_avg=0.0, + ram_utilization_avg=40.0, + ) + + assert len(responses.calls) == 2 + assert responses.calls[0].request.url.endswith("/telemetry") + tel_body = json.loads(responses.calls[0].request.body) + assert tel_body.get("telemetry_tier") == "public" + + em_req = responses.calls[1].request + assert em_req.url.endswith("/emissions") + assert em_req.headers.get("x-api-token") == "test-project-token" + em_body = json.loads(em_req.body) + assert em_body["total_emissions_kg"] == 0.01 + assert em_body["duration_seconds"] == 60.0 + + +@responses.activate +def test_public_emissions_skipped_short_duration(telemetry_public_env): + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + svc.collect_and_export(cpu_count=1) + ok = svc.export_emissions(duration_seconds=0.5, total_emissions_kg=0.001) + + assert ok is False + assert len(responses.calls) == 1 + + +@responses.activate +def test_public_uses_telemetry_api_key_not_dashboard_api_key( + monkeypatch, reset_telemetry_service +): + monkeypatch.delenv("CODECARBON_API_KEY", raising=False) + monkeypatch.setenv(TELEMETRY_ENV_VAR, "public") + monkeypatch.setenv(TELEMETRY_API_KEY_ENV_VAR, "telemetry-only-key") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + monkeypatch.setattr(telemetry_config_module, "DEFAULT_PUBLIC_TELEMETRY_TOKEN", "") + monkeypatch.setattr( + "codecarbon.cli.cli_utils.load_telemetry_config_from_file", + lambda path=None: {}, + ) + monkeypatch.setattr( + "codecarbon.core.telemetry.config.load_telemetry_preference", + lambda: None, + ) + monkeypatch.setattr( + telemetry_config_module, + "get_telemetry_preference_file", + lambda: Path("/nonexistent/codecarbon-telemetry-pref.txt"), + ) + monkeypatch.setattr( + telemetry_config_module, + "_hierarchical_config_dict", + lambda: { + "api_key": "dashboard-key", + "api_endpoint": "https://dashboard.example", + }, + ) + + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + responses.add( + responses.POST, + "https://telemetry.test/emissions", + json={}, + status=200, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + svc.collect_and_export(cpu_count=1) + svc.export_emissions(duration_seconds=2.0, total_emissions_kg=0.001) + + assert responses.calls[1].request.headers.get("x-api-token") == "telemetry-only-key" + + +@responses.activate +def test_public_dashboard_api_key_alone_does_not_enable_emissions_post( + monkeypatch, reset_telemetry_service +): + monkeypatch.delenv(TELEMETRY_API_KEY_ENV_VAR, raising=False) + monkeypatch.setenv(TELEMETRY_ENV_VAR, "public") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + monkeypatch.setattr(telemetry_config_module, "DEFAULT_PUBLIC_TELEMETRY_TOKEN", "") + monkeypatch.setattr( + "codecarbon.cli.cli_utils.load_telemetry_config_from_file", + lambda path=None: {}, + ) + monkeypatch.setattr( + "codecarbon.core.telemetry.config.load_telemetry_preference", + lambda: None, + ) + monkeypatch.setattr( + telemetry_config_module, + "get_telemetry_preference_file", + lambda: Path("/nonexistent/codecarbon-telemetry-pref.txt"), + ) + monkeypatch.setattr( + telemetry_config_module, + "_hierarchical_config_dict", + lambda: { + "api_key": "dashboard-only", + "api_endpoint": "https://ignored-for-telemetry-host.test", + }, + ) + + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + svc.collect_and_export(cpu_count=1) + ok = svc.export_emissions(duration_seconds=2.0, total_emissions_kg=0.001) + + assert ok is False + assert len(responses.calls) == 1 diff --git a/uv.lock b/uv.lock index 8161de636..fa0405946 100644 --- a/uv.lock +++ b/uv.lock @@ -26,6 +26,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "appdirs" +version = "1.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470, upload-time = "2020-05-11T07:59:51.037Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566, upload-time = "2020-05-11T07:59:49.499Z" }, +] + [[package]] name = "arrow" version = "1.4.0" @@ -324,6 +333,7 @@ wheels = [ name = "codecarbon" source = { editable = "." } dependencies = [ + { name = "appdirs" }, { name = "arrow" }, { name = "authlib" }, { name = "click" }, @@ -376,6 +386,7 @@ doc = [ [package.metadata] requires-dist = [ + { name = "appdirs" }, { name = "arrow" }, { name = "authlib", specifier = ">=1.2.1" }, { name = "click" },