diff --git a/codecarbon/__init__.py b/codecarbon/__init__.py index f602f2635..0d4db21d3 100644 --- a/codecarbon/__init__.py +++ b/codecarbon/__init__.py @@ -8,6 +8,20 @@ OfflineEmissionsTracker, track_emissions, ) +from .core.telemetry import ( + TelemetryConfig, + TelemetryTier, + init_telemetry, + set_telemetry, +) -__all__ = ["EmissionsTracker", "OfflineEmissionsTracker", "track_emissions"] +__all__ = [ + "EmissionsTracker", + "OfflineEmissionsTracker", + "track_emissions", + "TelemetryConfig", + "TelemetryTier", + "init_telemetry", + "set_telemetry", +] __app_name__ = "codecarbon" diff --git a/codecarbon/cli/cli_utils.py b/codecarbon/cli/cli_utils.py index 4f3daf4c6..f4fed42b3 100644 --- a/codecarbon/cli/cli_utils.py +++ b/codecarbon/cli/cli_utils.py @@ -5,6 +5,8 @@ import typer from rich.prompt import Confirm +from codecarbon.external.logger import logger + def get_config(path: Optional[Path] = None): p = path or Path.cwd().resolve() / ".codecarbon.config" @@ -108,3 +110,89 @@ def create_new_config_file(): f.write("[codecarbon]\n") typer.echo(f"Config file created at {file_path}") return file_path + + +def save_telemetry_config_to_file( + tier: str = None, + project_token: str = None, + telemetry_api_endpoint: str = None, + telemetry_api_key: str = None, + path: Path = None, +) -> None: + """ + Save telemetry configuration as JSON in the existing config file. + + Args: + tier: Telemetry tier (off, internal, public) + project_token: Telemetry auth token (stored as ``telemetry_api_key`` and legacy + ``telemetry_project_token`` in JSON) + telemetry_api_endpoint: Base URL for telemetry HTTP (optional) + telemetry_api_key: Telemetry auth token (optional; overrides ``project_token`` if both set) + path: Path to config file (defaults to ~/.codecarbon.config) + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + config = configparser.ConfigParser() + if p.exists(): + config.read(str(p)) + + if "codecarbon" not in config.sections(): + config.add_section("codecarbon") + + existing = load_telemetry_config_from_file(p) + telemetry_config = dict(existing) if existing else {} + if tier: + telemetry_config["telemetry_tier"] = tier + token = telemetry_api_key or project_token + if token: + t = str(token).strip() + telemetry_config["telemetry_api_key"] = t + telemetry_config["telemetry_project_token"] = t + if telemetry_api_endpoint is not None: + te = str(telemetry_api_endpoint).strip().rstrip("/") + if te: + telemetry_config["telemetry_api_endpoint"] = te + else: + telemetry_config.pop("telemetry_api_endpoint", None) + + if telemetry_config: + config["codecarbon"]["telemetry"] = json.dumps(telemetry_config) + + with p.open("w") as f: + config.write(f) + logger.info(f"Telemetry config saved to {p}") + + +def load_telemetry_config_from_file(path: Path = None) -> dict: + """ + Load telemetry configuration from the existing config file. + + Args: + path: Path to config file (defaults to ~/.codecarbon.config) + + Returns: + Dictionary with telemetry configuration + """ + import json + + p = path or Path.home() / ".codecarbon.config" + + if not p.exists(): + return {} + + config = configparser.ConfigParser() + config.read(str(p)) + + if "codecarbon" not in config.sections(): + return {} + + telemetry_str = config["codecarbon"].get("telemetry") + if telemetry_str: + try: + return json.loads(telemetry_str) + except json.JSONDecodeError: + return {} + + return {} diff --git a/codecarbon/cli/main.py b/codecarbon/cli/main.py index 7fd097b45..1d8986a6b 100644 --- a/codecarbon/cli/main.py +++ b/codecarbon/cli/main.py @@ -32,6 +32,7 @@ DEFAULT_ORGANIzATION_ID = "e60afa92-17b7-4720-91a0-1ae91e409ba1" codecarbon = typer.Typer(no_args_is_help=True) +telemetry_app = typer.Typer(no_args_is_help=True) def main(): @@ -436,5 +437,146 @@ def questionary_prompt(prompt, list_options, default): return value +def _get_project_token() -> Optional[str]: + """Resolve telemetry auth token (env, JSON, preference, default constant).""" + from codecarbon.core.telemetry.config import get_telemetry_auth_token + + return get_telemetry_auth_token() + + +@telemetry_app.command("setup", short_help="Interactive telemetry setup wizard") +def telemetry_setup( + token: Annotated[ + Optional[str], + typer.Option( + "--token", + "-t", + help="Telemetry API key for public tier (same as CODECARBON_TELEMETRY_API_KEY)", + ) + ] = None, + tier: Annotated[ + Optional[str], + typer.Option("--tier", help="Telemetry tier: off, internal, or public") + ] = None, +): + """ + Interactive wizard to configure CodeCarbon telemetry. + + Examples: + # Interactive mode + codecarbon telemetry setup + + # Non-interactive mode with options + codecarbon telemetry setup --tier public --token YOUR_TOKEN + + This command automatically saves configuration to ~/.codecarbon.config + and writes environment variables to your shell config. + """ + from codecarbon.core.telemetry.config import ( + TELEMETRY_ENV_VAR, + TelemetryTier, + get_telemetry_config, + save_telemetry_project_token, + set_telemetry_tier, + ) + + print("\n=== CodeCarbon Telemetry Setup ===\n") + + # Show current config + config = get_telemetry_config() + print(f"Current tier: {config.tier.value}") + print( + f"Current telemetry API key: {'set' if config.project_token else 'not set'}" + ) + print(f"Current API endpoint: {config.api_endpoint or 'default'}") + + # Determine tier (use provided value or prompt) + if tier is not None: + try: + tier_choice = TelemetryTier(tier).value + except ValueError: + print(f"[red]Invalid tier: {tier}. Valid values: off, internal, public[/red]") + raise typer.Exit(1) + else: + print("\nChoose telemetry tier:") + tier_choice = questionary.select( + "Telemetry tier:", + ["off", "internal", "public"], + default=config.tier.value, + ).ask() + + # Save tier preference to file + set_telemetry_tier(TelemetryTier(tier_choice), dont_ask_again=True) + print(f"\nTelemetry tier set to: {tier_choice}") + + # Get project token (priority: CLI option > env var > config file) + project_token = token or _get_project_token() + if tier_choice == "public" and not project_token: + project_token = typer.prompt( + "Telemetry API key (CODECARBON_TELEMETRY_API_KEY; not your dashboard api_key)", + default="", + ) + + # Save project token to JSON config file (so it persists without env vars) + if tier_choice == "public" and project_token: + save_telemetry_project_token(project_token) + print("[green]Telemetry API key saved to config file[/green]") + + # Write to shell rc file automatically + shell_rc_path = Path.home() / ".zshrc" + if not shell_rc_path.exists(): + shell_rc_path = Path.home() / ".bashrc" + + # Read existing content + existing_content = "" + if shell_rc_path.exists(): + existing_content = shell_rc_path.read_text() + + env_vars = { + TELEMETRY_ENV_VAR: tier_choice, + } + purge_markers = (*env_vars.keys(), "CODECARBON_TELEMETRY_PROJECT_TOKEN") + new_lines = [] + for line in existing_content.split("\n"): + if not any(marker in line for marker in purge_markers): + new_lines.append(line) + + # Add new environment variables + for var_name, var_value in env_vars.items(): + new_lines.append(f'export {var_name}="{var_value}"') + + # Write back + shell_rc_path.write_text("\n".join(new_lines) + "\n") + print(f"\n[green]Environment variables written to {shell_rc_path}[/green]") + print(f"[yellow]Run 'source {shell_rc_path}' or restart your terminal to apply[/yellow]") + print("\n[green]Setup complete! Configuration saved.[/green]") + + +@telemetry_app.command("config", short_help="Show current telemetry configuration") +def telemetry_config(): + """ + Display current telemetry configuration. + """ + from codecarbon.core.telemetry.config import get_telemetry_config + + config = get_telemetry_config() + + print("\n=== Current Telemetry Configuration ===\n") + print(f"Tier: {config.tier.value}") + print(f"Enabled: {config.is_enabled}") + print( + f"Telemetry API key: {'configured' if config.project_token else 'not configured'}" + ) + print( + f"Telemetry base URL: {config.api_endpoint or 'default (https://api.codecarbon.io)'}" + ) + print(f"First Run: {config.first_run}") + print(f"Has Consent: {config.has_consent}") + + +# Register telemetry as a subcommand of codecarbon +codecarbon.add_typer(telemetry_app, name="telemetry") + + if __name__ == "__main__": main() diff --git a/codecarbon/core/api_client.py b/codecarbon/core/api_client.py index 34067c71c..9f7a56a44 100644 --- a/codecarbon/core/api_client.py +++ b/codecarbon/core/api_client.py @@ -353,6 +353,63 @@ def close_experiment(self): Tell the API that the experiment has ended. """ + def add_public_emissions(self, payload: dict, project_token: str) -> bool: + """ + Send public-tier emissions payload to POST /emissions (flat JSON, project token). + + Args: + payload: JSON-serializable body (e.g. utilization and energy fields). + project_token: Project token sent as x-api-token. + + Returns: + True if the server accepted the request (HTTP 200 or 201). + """ + if not project_token: + logger.warning("add_public_emissions: missing project_token") + return False + try: + url = self.url + "/emissions" + headers = self._get_headers() + headers["x-api-token"] = project_token + r = requests.post(url=url, json=payload, timeout=5, headers=headers) + if r.status_code not in (200, 201): + self._log_error(url, payload, r) + return False + logger.debug(f"Public emissions telemetry sent successfully to {url}") + return True + except Exception as e: + logger.error(f"Failed to send public emissions telemetry: {e}") + return False + + def add_telemetry(self, telemetry_data: dict, api_key: str = None) -> bool: + """ + Send telemetry data to the /telemetry endpoint (Tier 1). + + Args: + telemetry_data: Dictionary containing telemetry payload + api_key: Optional API key for authentication + + Returns: + True if successful, False otherwise + """ + try: + url = self.url + "/telemetry" + headers = self._get_headers() + + # Use provided api_key or fall back to instance api_key + if api_key: + headers["x-api-token"] = api_key + + r = requests.post(url=url, json=telemetry_data, timeout=5, headers=headers) + if r.status_code not in (200, 201): + self._log_error(url, telemetry_data, r) + return False + logger.debug(f"Telemetry data sent successfully to {url}") + return True + except Exception as e: + logger.error(f"Failed to send telemetry data: {e}") + return False + class simple_utc(tzinfo): def tzname(self, **kwargs): diff --git a/codecarbon/core/telemetry/collector.py b/codecarbon/core/telemetry/collector.py new file mode 100644 index 000000000..8ca9741df --- /dev/null +++ b/codecarbon/core/telemetry/collector.py @@ -0,0 +1,545 @@ +""" +Telemetry data collector. + +Collects environment, hardware, usage, and ML ecosystem data. +""" + +import hashlib +import os +import platform +import sys +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +from codecarbon._version import __version__ +from codecarbon.core.config import get_hierarchical_config +from codecarbon.external.logger import logger + + +@dataclass +class TelemetryData: + """Container for all telemetry data.""" + + # Environment & Hardware (Tier 1: Internal) + os: str = "" + python_version: str = "" + python_implementation: str = "" + python_executable_hash: str = "" + python_env_type: str = "" + codecarbon_version: str = "" + codecarbon_install_method: str = "" + + cpu_count: int = 0 + cpu_physical_count: int = 0 + cpu_model: str = "" + cpu_architecture: str = "" + + gpu_count: int = 0 + gpu_model: str = "" + gpu_driver_version: str = "" + gpu_memory_total_gb: float = 0.0 + + ram_total_size_gb: float = 0.0 + + cuda_version: str = "" + cudnn_version: str = "" + + cloud_provider: str = "" + cloud_region: str = "" + + # Usage Patterns (Tier 1: Internal) + tracking_mode: str = "" + api_mode: str = "" # offline, online + output_methods: list = field(default_factory=list) + hardware_tracked: list = field(default_factory=list) + measure_power_interval_secs: float = 15.0 + + # ML Ecosystem (Tier 1: Internal) + has_torch: bool = False + torch_version: str = "" + has_transformers: bool = False + transformers_version: str = "" + has_diffusers: bool = False + diffusers_version: str = "" + has_tensorflow: bool = False + tensorflow_version: str = "" + has_keras: bool = False + keras_version: str = "" + has_pytorch_lightning: bool = False + pytorch_lightning_version: str = "" + has_fastai: bool = False + fastai_version: str = "" + ml_framework_primary: str = "" + + # Performance & Errors (Tier 1: Internal) + hardware_detection_success: bool = True + rapl_available: bool = False + gpu_detection_method: str = "" + errors_encountered: list = field(default_factory=list) + tracking_overhead_percent: float = 0.0 + + # Context (Tier 1: Internal) + ide_used: str = "" + notebook_environment: str = "" + ci_environment: str = "" + python_package_manager: str = "" + container_runtime: str = "" + in_container: bool = False + + # Emissions Data (Tier 2: Public only) + total_emissions_kg: float = 0.0 + emissions_rate_kg_per_sec: float = 0.0 + energy_consumed_kwh: float = 0.0 + cpu_energy_kwh: float = 0.0 + gpu_energy_kwh: float = 0.0 + ram_energy_kwh: float = 0.0 + duration_seconds: float = 0.0 + cpu_utilization_avg: float = 0.0 + gpu_utilization_avg: float = 0.0 + ram_utilization_avg: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for export.""" + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} + + +class TelemetryCollector: + """Collects telemetry data.""" + + def __init__(self): + self._data = TelemetryData() + + @property + def data(self) -> TelemetryData: + return self._data + + def collect_environment(self) -> "TelemetryCollector": + """Collect Python environment info.""" + self._data.python_version = platform.python_version() + self._data.python_implementation = platform.python_implementation() + + # Hash executable path for privacy + executable = sys.executable + if executable: + self._data.python_executable_hash = hashlib.sha256( + executable.encode() + ).hexdigest()[:16] + + # Detect environment type + self._data.python_env_type = self._detect_python_env_type() + + # CodeCarbon version + self._data.codecarbon_version = __version__ + + # Install method detection + self._data.codecarbon_install_method = self._detect_install_method() + + # OS + self._data.os = platform.platform() + + # Architecture + self._data.cpu_architecture = platform.machine() + + return self + + def _detect_python_env_type(self) -> str: + """Detect Python environment type.""" + if "conda" in sys.prefix.lower(): + return "conda" + elif hasattr(sys, "real_prefix") or ( + hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix + ): + # Check for common venv patterns + if os.environ.get("VIRTUAL_ENV"): + return "venv" + # Check for uv + if os.environ.get("UV"): + return "uv" + return "virtualenv" + elif os.environ.get("VIRTUAL_ENV"): + return "venv" + elif os.environ.get("UV"): + return "uv" + return "system" + + def _detect_install_method(self) -> str: + """Detect how CodeCarbon was installed.""" + # Check if editable install + import codecarbon + + codecarbon_path = os.path.dirname(codecarbon.__file__) + if ".egg-link" in codecarbon_path or ".editable" in codecarbon_path: + return "editable" + + # Check common package managers + # This is a heuristic - check if in common locations + if "site-packages" in codecarbon_path: + # Could be pip, uv, or conda + if "uv" in codecarbon_path: + return "uv" + elif "conda" in codecarbon_path: + return "conda" + return "pip" + return "unknown" + + def collect_hardware( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + ) -> "TelemetryCollector": + """Collect hardware info.""" + self._data.cpu_count = cpu_count + self._data.cpu_physical_count = cpu_physical_count + self._data.cpu_model = cpu_model + self._data.ram_total_size_gb = ram_total_gb + self._data.gpu_count = gpu_count + self._data.gpu_model = gpu_model + + # Try to detect CUDA + self._detect_cuda() + + # Try to detect GPU driver + self._detect_gpu_driver() + + return self + + def _detect_cuda(self) -> None: + """Detect CUDA version.""" + try: + import torch + + if hasattr(torch, "version") and torch.version: + self._data.cuda_version = str(torch.version.cuda) + if hasattr(torch.backends, "cudnn") and torch.backends.cudnn.is_available(): + self._data.cudnn_version = str(torch.backends.cudnn.version()) + except ImportError: + pass + + def _detect_gpu_driver(self) -> None: + """Detect GPU driver version.""" + try: + import subprocess + + result = subprocess.run( + ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + self._data.gpu_driver_version = result.stdout.strip().split("\n")[0] + self._data.gpu_detection_method = "nvidia-smi" + + # Also get GPU memory + result = subprocess.run( + [ + "nvidia-smi", + "--query-gpu=memory.total", + "--format=csv,noheader,nounits", + ], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + mem_mb = result.stdout.strip().split("\n")[0] + self._data.gpu_memory_total_gb = float(mem_mb) / 1024 + except (FileNotFoundError, subprocess.TimeoutExpired, ValueError): + pass + + def collect_usage( + self, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + ) -> "TelemetryCollector": + """Collect usage patterns.""" + self._data.tracking_mode = tracking_mode + self._data.api_mode = api_mode + self._data.output_methods = output_methods or [] + self._data.hardware_tracked = hardware_tracked or [] + self._data.measure_power_interval_secs = measure_power_interval + + return self + + def collect_ml_ecosystem(self) -> "TelemetryCollector": + """Detect ML frameworks and libraries.""" + frameworks = [] + + # PyTorch + try: + import torch + + self._data.has_torch = True + self._data.torch_version = torch.__version__ + frameworks.append("pytorch") + except ImportError: + pass + + # Transformers + try: + import transformers + + self._data.has_transformers = True + self._data.transformers_version = transformers.__version__ + except ImportError: + pass + + # Diffusers + try: + import diffusers + + self._data.has_diffusers = True + self._data.diffusers_version = diffusers.__version__ + except ImportError: + pass + + # TensorFlow + try: + import tensorflow + + self._data.has_tensorflow = True + self._data.tensorflow_version = tensorflow.__version__ + frameworks.append("tensorflow") + except ImportError: + pass + + # Keras + try: + import keras + + self._data.has_keras = True + self._data.keras_version = keras.__version__ + except ImportError: + pass + + # PyTorch Lightning + try: + import pytorch_lightning + + self._data.has_pytorch_lightning = True + self._data.pytorch_lightning_version = pytorch_lightning.__version__ + except ImportError: + pass + + # FastAI + try: + import fastai + + self._data.has_fastai = True + self._data.fastai_version = fastai.__version__ + except ImportError: + pass + + # Primary framework + self._data.ml_framework_primary = frameworks[0] if frameworks else "" + + return self + + def collect_context(self) -> "TelemetryCollector": + """Collect development context (IDE, notebook, CI).""" + # Detect notebook + self._data.notebook_environment = self._detect_notebook() + + # Detect CI + self._data.ci_environment = self._detect_ci() + + # Detect container + self._detect_container() + + # Detect package manager + self._data.python_package_manager = self._detect_package_manager() + + return self + + def _detect_notebook(self) -> str: + """Detect notebook environment.""" + try: + # Check for Jupyter + import ipykernel + + return "jupyter" + except ImportError: + pass + + # Check environment variables common in cloud notebooks + if os.environ.get("COLAB_RELEASE_TAG"): + return "colab" + if os.environ.get("KAGGLE_URL_BASE"): + return "kaggle" + + return "none" + + def _detect_ci(self) -> str: + """Detect CI environment.""" + ci_vars = { + "GITHUB_ACTIONS": "github-actions", + "GITLAB_CI": "gitlab", + "JENKINS_URL": "jenkins", + "CIRCLECI": "circleci", + "TRAVIS": "travis", + "BUILDKITE": "buildkite", + "AWS_CODEBUILD": "codebuild", + } + + for var, name in ci_vars.items(): + if os.environ.get(var): + return name + + return "none" + + def _detect_container(self) -> None: + """Detect container runtime.""" + # Check for Docker + if os.path.exists("/.dockerenv"): + self._data.in_container = True + self._data.container_runtime = "docker" + return + + # Check for container environment variables + if os.environ.get("KUBERNETES_SERVICE_HOST"): + self._data.in_container = True + self._data.container_runtime = "kubernetes" + return + + # Check cgroup + try: + with open("/proc/1/cgroup", "r") as f: + content = f.read() + if "docker" in content or "containerd" in content: + self._data.in_container = True + self._data.container_runtime = "docker" + return + except FileNotFoundError: + pass + + self._data.in_container = False + self._data.container_runtime = "none" + + def _detect_package_manager(self) -> str: + """Detect Python package manager.""" + # Check for poetry + if os.path.exists("pyproject.toml"): + with open("pyproject.toml", "r") as f: + if "[tool.poetry]" in f.read(): + return "poetry" + + # Check for uv + if os.path.exists("uv.lock"): + return "uv" + + # Check for pipenv + if os.path.exists("Pipfile"): + return "pipenv" + + # Check for conda + if os.path.exists("environment.yml") or os.path.exists("environment.yaml"): + return "conda" + + return "pip" + + def collect_errors( + self, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + ) -> "TelemetryCollector": + """Collect error information.""" + self._data.rapl_available = rapl_available + self._data.hardware_detection_success = hardware_detection_success + self._data.errors_encountered = errors or [] + + return self + + def collect_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> "TelemetryCollector": + """Collect emissions data (Tier 2: Public).""" + self._data.total_emissions_kg = total_emissions_kg + self._data.emissions_rate_kg_per_sec = emissions_rate_kg_per_sec + self._data.energy_consumed_kwh = energy_consumed_kwh + self._data.cpu_energy_kwh = cpu_energy_kwh + self._data.gpu_energy_kwh = gpu_energy_kwh + self._data.ram_energy_kwh = ram_energy_kwh + self._data.duration_seconds = duration_seconds + self._data.cpu_utilization_avg = cpu_utilization_avg + self._data.gpu_utilization_avg = gpu_utilization_avg + self._data.ram_utilization_avg = ram_utilization_avg + + return self + + def collect_cloud_info( + self, cloud_provider: str = "", cloud_region: str = "" + ) -> "TelemetryCollector": + """Collect cloud information.""" + self._data.cloud_provider = cloud_provider + self._data.cloud_region = cloud_region + + return self + + def collect_all( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: list = None, + hardware_tracked: list = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: list = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> TelemetryData: + """Collect all available telemetry data.""" + ( + self.collect_environment() + .collect_hardware( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + ) + .collect_usage( + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + ) + .collect_ml_ecosystem() + .collect_context() + .collect_errors( + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + ) + .collect_cloud_info( + cloud_provider=cloud_provider, cloud_region=cloud_region + ) + ) + + return self._data diff --git a/codecarbon/core/telemetry/config.py b/codecarbon/core/telemetry/config.py new file mode 100644 index 000000000..a342dfa85 --- /dev/null +++ b/codecarbon/core/telemetry/config.py @@ -0,0 +1,334 @@ +""" +Telemetry configuration module. + +Handles the 3-tier telemetry system: +- off: No telemetry +- internal: Private telemetry (helps CodeCarbon improve) +- public: Public telemetry (shares emissions for leaderboard) + +For Tier 1 (internal): POST to /telemetry endpoint. +For Tier 2 (public): POST /emissions uses a telemetry-only auth chain (env, JSON, preference, +``DEFAULT_PUBLIC_TELEMETRY_TOKEN``); hierarchical ``api_key`` is for dashboard/API logging only. + +Telemetry base URL: env ``CODECARBON_TELEMETRY_API_ENDPOINT``, JSON ``telemetry_api_endpoint``, +then hierarchical ``api_endpoint`` (default ``https://api.codecarbon.io``). +""" + +import os +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + +import appdirs + +from codecarbon.external.logger import logger + +# Environment variable name for telemetry setting +TELEMETRY_ENV_VAR = "CODECARBON_TELEMETRY" + +TELEMETRY_API_ENDPOINT_ENV_VAR = "CODECARBON_TELEMETRY_API_ENDPOINT" +TELEMETRY_API_KEY_ENV_VAR = "CODECARBON_TELEMETRY_API_KEY" + +# Default API base URL when hierarchical config has no api_endpoint (same default as EmissionsTracker) +DEFAULT_API_ENDPOINT = "https://api.codecarbon.io" + +# Shared ingest token for public-tier POST /emissions when no user-specific telemetry token is set. +DEFAULT_PUBLIC_TELEMETRY_TOKEN: str = "" + + +class TelemetryTier(str, Enum): + """Telemetry tiers.""" + + OFF = "off" + INTERNAL = "internal" + PUBLIC = "public" + + +@dataclass +class TelemetryConfig: + """Telemetry configuration. + + Attributes: + project_token: Resolved value for telemetry ``x-api-token`` when set; independent of + dashboard ``api_key`` / ``CODECARBON_API_KEY``. + """ + + tier: TelemetryTier + project_token: Optional[str] + api_endpoint: Optional[str] + has_consent: bool + first_run: bool + + @property + def is_enabled(self) -> bool: + """Check if telemetry is enabled.""" + return self.tier != TelemetryTier.OFF + + @property + def is_public(self) -> bool: + """Check if public telemetry (emissions shared).""" + return self.tier == TelemetryTier.PUBLIC + + @property + def is_internal(self) -> bool: + """Check if internal telemetry (private).""" + return self.tier == TelemetryTier.INTERNAL + + +def get_user_config_dir() -> Path: + """Get the user config directory.""" + return Path(appdirs.user_config_dir("codecarbon", "CodeCarbon")) + + +def get_telemetry_preference_file() -> Path: + """Get the file path for storing telemetry preference.""" + return get_user_config_dir() / "telemetry_preference.txt" + + +def save_telemetry_preference(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Save user's telemetry preference.""" + config_dir = get_user_config_dir() + config_dir.mkdir(parents=True, exist_ok=True) + + pref_file = get_telemetry_preference_file() + content = f"{tier.value}\n" + if dont_ask_again: + content += "dont_ask_again\n" + pref_file.write_text(content) + logger.info(f"Saved telemetry preference: {tier.value}") + + +def load_telemetry_preference() -> Optional[tuple[TelemetryTier, bool]]: + """Load user's saved telemetry preference. + + Returns: + Tuple of (tier, dont_ask_again) or None if not set. + """ + pref_file = get_telemetry_preference_file() + if not pref_file.exists(): + return None + + try: + content = pref_file.read_text().strip() + lines = content.split("\n") + tier = TelemetryTier(lines[0]) + dont_ask_again = len(lines) > 1 and "dont_ask_again" in lines[1] + return (tier, dont_ask_again) + except (ValueError, IndexError) as e: + logger.debug(f"Could not parse telemetry preference: {e}") + return None + + +def detect_tier_from_env() -> Optional[TelemetryTier]: + """Detect telemetry tier from environment variable.""" + env_value = os.environ.get(TELEMETRY_ENV_VAR, "").lower().strip() + if not env_value: + return None + + try: + return TelemetryTier(env_value) + except ValueError: + logger.warning( + f"Invalid CODECARBON_TELEMETRY value: {env_value}. " + f"Valid values: {', '.join(t.value for t in TelemetryTier)}" + ) + return None + + +def _hierarchical_config_dict() -> dict: + """Load hierarchical CodeCarbon config (indirection for tests).""" + from codecarbon.core.config import get_hierarchical_config + + return get_hierarchical_config() + + +def get_telemetry_auth_token() -> Optional[str]: + """Resolve ``x-api-token`` for telemetry ``POST /telemetry`` (optional) and ``POST /emissions``. + + Order: + 1. Environment variable ``CODECARBON_TELEMETRY_API_KEY`` + 2. ``telemetry_api_key`` or ``telemetry_project_token`` in ``[codecarbon]`` telemetry JSON + 3. ``project_token=`` line in telemetry preference file (legacy) + 4. :data:`DEFAULT_PUBLIC_TELEMETRY_TOKEN` when non-empty + + Hierarchical ``api_key`` / ``CODECARBON_API_KEY`` is not consulted (dashboard only). + + Returns: + Token string or None if nothing is configured. + """ + env_val = os.environ.get(TELEMETRY_API_KEY_ENV_VAR, "").strip() + if env_val: + return env_val + + try: + from codecarbon.cli.cli_utils import load_telemetry_config_from_file + + json_config = load_telemetry_config_from_file() + if json_config: + for key in ("telemetry_api_key", "telemetry_project_token"): + raw = json_config.get(key) + if raw: + s = str(raw).strip() + if s: + return s + except Exception: + pass + + pref_file = get_telemetry_preference_file() + if pref_file.exists(): + try: + content = pref_file.read_text() + lines = content.split("\n") + for line in lines[2:]: + if line.startswith("project_token="): + s = line.split("=", 1)[1].strip() + if s: + return s + except Exception as e: + logger.debug(f"Could not parse telemetry project token: {e}") + + if DEFAULT_PUBLIC_TELEMETRY_TOKEN.strip(): + return DEFAULT_PUBLIC_TELEMETRY_TOKEN.strip() + + return None + + +def get_public_telemetry_auth_token() -> Optional[str]: + """Alias for :func:`get_telemetry_auth_token`.""" + return get_telemetry_auth_token() + + +def get_telemetry_project_token() -> Optional[str]: + """Deprecated name; use :func:`get_telemetry_auth_token`.""" + return get_telemetry_auth_token() + + +def save_telemetry_project_token(token: str) -> None: + """Save telemetry project token to JSON config file.""" + try: + from codecarbon.cli.cli_utils import save_telemetry_config_to_file + save_telemetry_config_to_file(project_token=token) + logger.info("Saved telemetry project token to JSON config") + except Exception as e: + logger.warning(f"Failed to save to JSON config: {e}, using legacy format") + # Fallback to legacy text format + pref_file = get_telemetry_preference_file() + existing_content = "" + if pref_file.exists(): + existing_content = pref_file.read_text() + + lines = existing_content.split("\n") + new_lines = [] + found_token = False + for line in lines: + if line.startswith("project_token="): + new_lines.append(f"project_token={token}") + found_token = True + else: + new_lines.append(line) + + if not found_token: + new_lines.append(f"project_token={token}") + + pref_file.write_text("\n".join(new_lines)) + logger.info("Saved telemetry project token") + + +def get_telemetry_api_endpoint() -> Optional[str]: + """Resolve telemetry HTTP base URL (no trailing slash in return value). + + Order: + 1. Environment variable ``CODECARBON_TELEMETRY_API_ENDPOINT`` + 2. ``telemetry_api_endpoint`` in ``[codecarbon]`` telemetry JSON + 3. Hierarchical ``api_endpoint`` (same as tracker / dashboard default host) + """ + env_val = os.environ.get(TELEMETRY_API_ENDPOINT_ENV_VAR, "").strip() + if env_val: + return env_val.rstrip("/") + + try: + from codecarbon.cli.cli_utils import load_telemetry_config_from_file + + json_config = load_telemetry_config_from_file() + raw = (json_config or {}).get("telemetry_api_endpoint") + if raw: + s = str(raw).strip().rstrip("/") + if s: + return s + except Exception: + pass + + try: + conf = _hierarchical_config_dict() + raw = conf.get("api_endpoint") + if raw: + s = str(raw).strip().rstrip("/") + if s: + return s + except Exception: + pass + return None + + +def resolve_telemetry_base_url(api_endpoint: Optional[str]) -> str: + """Return normalized base URL for telemetry HTTP requests (no trailing slash).""" + base = (api_endpoint or DEFAULT_API_ENDPOINT).strip() + return base.rstrip("/") + + +def get_telemetry_config(force_first_run: bool = False) -> TelemetryConfig: + """ + Get the telemetry configuration. + + Priority order: + 1. Environment variable (CODECARBON_TELEMETRY) + 2. Saved user preference + 3. Default to internal (first run) - telemetry enabled by default + + Args: + force_first_run: Force treating this as first run (for testing) + + Returns: + TelemetryConfig object + """ + # Get common config values + project_token = get_telemetry_auth_token() + api_endpoint = get_telemetry_api_endpoint() + + # Check environment variable first + tier = detect_tier_from_env() + if tier is not None: + return TelemetryConfig( + tier=tier, + project_token=project_token, + api_endpoint=api_endpoint, + has_consent=True, + first_run=False, + ) + + # Check saved preference + saved = load_telemetry_preference() + if saved is not None: + tier, dont_ask = saved + return TelemetryConfig( + tier=tier, + project_token=project_token, + api_endpoint=api_endpoint, + has_consent=True, + first_run=False, + ) + + # First run - default to internal (telemetry enabled by default to help CodeCarbon improve) + return TelemetryConfig( + tier=TelemetryTier.INTERNAL, + project_token=project_token, + api_endpoint=api_endpoint, + has_consent=True, + first_run=True, + ) + + +def set_telemetry_tier(tier: TelemetryTier, dont_ask_again: bool = False) -> None: + """Set the telemetry tier.""" + save_telemetry_preference(tier, dont_ask_again) diff --git a/codecarbon/core/telemetry/http_sender.py b/codecarbon/core/telemetry/http_sender.py new file mode 100644 index 000000000..1870fa59c --- /dev/null +++ b/codecarbon/core/telemetry/http_sender.py @@ -0,0 +1,55 @@ +"""HTTP payload helpers for telemetry (Tier 1 vs public emissions).""" + +from typing import Any, Dict + +from codecarbon.core.telemetry.collector import TelemetryData +from codecarbon.core.telemetry.config import TelemetryTier + +_TIER1_EXCLUDE_KEYS: frozenset[str] = frozenset( + { + "total_emissions_kg", + "emissions_rate_kg_per_sec", + "energy_consumed_kwh", + "cpu_energy_kwh", + "gpu_energy_kwh", + "ram_energy_kwh", + "duration_seconds", + "cpu_utilization_avg", + "gpu_utilization_avg", + "ram_utilization_avg", + } +) + +def tier1_telemetry_body(data: TelemetryData, tier: TelemetryTier) -> Dict[str, Any]: + """Build POST /telemetry JSON body: Tier-1 fields only, plus telemetry_tier.""" + raw = data.to_dict() + body = {k: v for k, v in raw.items() if k not in _TIER1_EXCLUDE_KEYS} + body["telemetry_tier"] = tier.value + return body + + +def public_emissions_body( + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, +) -> Dict[str, Any]: + """Build flat POST /emissions JSON body (public tier).""" + return { + "total_emissions_kg": total_emissions_kg, + "emissions_rate_kg_per_sec": emissions_rate_kg_per_sec, + "energy_consumed_kwh": energy_consumed_kwh, + "cpu_energy_kwh": cpu_energy_kwh, + "gpu_energy_kwh": gpu_energy_kwh, + "ram_energy_kwh": ram_energy_kwh, + "duration_seconds": duration_seconds, + "cpu_utilization_avg": cpu_utilization_avg, + "gpu_utilization_avg": gpu_utilization_avg, + "ram_utilization_avg": ram_utilization_avg, + } diff --git a/codecarbon/core/telemetry/prompt.py b/codecarbon/core/telemetry/prompt.py new file mode 100644 index 000000000..0a61950d1 --- /dev/null +++ b/codecarbon/core/telemetry/prompt.py @@ -0,0 +1,169 @@ +""" +First-run prompt for telemetry consent. + +Shows an interactive prompt to let users choose their telemetry level. +""" + +from typing import Optional + +from codecarbon.core.telemetry.config import ( + TelemetryTier, + get_telemetry_config, + save_telemetry_preference, +) +from codecarbon.external.logger import logger + +# Try to import rich/questionary for interactive prompts +# Falls back to simple input if not available +try: + from rich.console import Console + from rich.prompt import Prompt + + RICH_AVAILABLE = True +except ImportError: + RICH_AVAILABLE = False + +try: + import questionary + + QUESTIONARY_AVAILABLE = True +except ImportError: + QUESTIONARY_AVAILABLE = False + + +console = Console() if RICH_AVAILABLE else None + + +def prompt_for_telemetry_consent() -> Optional[TelemetryTier]: + """ + Prompt user for telemetry consent on first run. + + Returns: + The chosen TelemetryTier, or None if prompt should not be shown. + """ + config = get_telemetry_config() + + # Don't prompt if consent already given via env var or saved preference + if config.has_consent: + return config.tier + + # Check if we should prompt (first run without saved preference) + if not config.first_run: + return config.tier + + # Try interactive prompt, but don't fail if not available + if QUESTIONARY_AVAILABLE: + return _prompt_interactive_questionary() + elif RICH_AVAILABLE: + return _prompt_interactive_rich() + else: + return _prompt_simple() + + +def _prompt_interactive_questionary() -> Optional[TelemetryTier]: + """Prompt using questionary library.""" + try: + answer = questionary.select( + "📊 CodeCarbon Telemetry\n\n" + "Help improve CodeCarbon by sharing anonymous usage data?\n", + choices=[ + "Internal - Basic environment info (PRIVATE)", + "Public - Full telemetry (SHARED PUBLICLY on leaderboard)", + "Off - No telemetry", + ], + default="Internal - Basic environment info (PRIVATE)", + ).ask() + + if answer is None: + return TelemetryTier.OFF + + if "Internal" in answer: + return TelemetryTier.INTERNAL + elif "Public" in answer: + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Questionary prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_interactive_rich() -> Optional[TelemetryTier]: + """Prompt using rich library.""" + if console is None: + return TelemetryTier.OFF + + try: + console.print("\n📊 [bold]CodeCarbon Telemetry[/bold]\n") + console.print( + "Help improve CodeCarbon by sharing anonymous usage data?\n" + ) + console.print(" [1] Internal - Basic environment info (PRIVATE)") + console.print(" • Python version, OS, CPU/GPU hardware") + console.print(" • Usage patterns, ML frameworks") + console.print(" • Helps us improve the library") + console.print() + console.print(" [2] Public - Full telemetry (SHARED PUBLICLY)") + console.print(" • All of internal + emissions data") + console.print(" • Shown on public leaderboard") + console.print() + console.print(" [3] Off - No telemetry") + console.print() + + answer = Prompt.ask( + "Select option", + choices=["1", "2", "3"], + default="1", + ) + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Rich prompt failed: {e}") + return TelemetryTier.OFF + + +def _prompt_simple() -> Optional[TelemetryTier]: + """Simple input-based prompt.""" + try: + print("\n📊 CodeCarbon Telemetry") + print("=" * 40) + print("Help improve CodeCarbon by sharing anonymous usage data?") + print() + print(" 1) Internal - Basic environment info (PRIVATE)") + print(" 2) Public - Full telemetry (SHARED PUBLICLY)") + print(" 3) Off - No telemetry") + print() + answer = input("Select option [1]: ").strip() or "1" + + if answer == "1": + return TelemetryTier.INTERNAL + elif answer == "2": + return TelemetryTier.PUBLIC + else: + return TelemetryTier.OFF + except Exception as e: + logger.debug(f"Simple prompt failed: {e}") + return TelemetryTier.OFF + + +def prompt_and_save() -> TelemetryTier: + """ + Prompt user and save their choice. + + Returns: + The chosen TelemetryTier. + """ + tier = prompt_for_telemetry_consent() + + if tier is None: + tier = TelemetryTier.OFF + + # Save the preference (don't ask again) + save_telemetry_preference(tier, dont_ask_again=True) + + return tier diff --git a/codecarbon/core/telemetry/service.py b/codecarbon/core/telemetry/service.py new file mode 100644 index 000000000..7b5cb07f4 --- /dev/null +++ b/codecarbon/core/telemetry/service.py @@ -0,0 +1,234 @@ +""" +Telemetry service - integrates telemetry with CodeCarbon. + +This module provides functions to initialize and use telemetry. +""" + +from typing import Optional + +from codecarbon.core.api_client import ApiClient +from codecarbon.core.telemetry.collector import TelemetryCollector +from codecarbon.core.telemetry.config import ( + TelemetryConfig, + TelemetryTier, + get_telemetry_config, + resolve_telemetry_base_url, + set_telemetry_tier, +) +from codecarbon.core.telemetry.http_sender import ( + public_emissions_body, + tier1_telemetry_body, +) +from codecarbon.core.telemetry.prompt import prompt_for_telemetry_consent +from codecarbon.external.logger import logger + + +class TelemetryService: + """Service for managing telemetry.""" + + _instance: Optional["TelemetryService"] = None + _initialized: bool = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if self._initialized: + return + self._config: Optional[TelemetryConfig] = None + self._api_client: Optional[ApiClient] = None + self._collector: Optional[TelemetryCollector] = None + self._initialized = True + + def initialize(self, force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry service. + + Args: + force_prompt: Force showing the consent prompt + + Returns: + TelemetryConfig + """ + self._config = get_telemetry_config() + + if force_prompt and self._config.first_run: + prompt_for_telemetry_consent() + self._config = get_telemetry_config() + + if self._config.is_enabled: + base = resolve_telemetry_base_url(self._config.api_endpoint) + telemetry_key = self._config.project_token or None + self._api_client = ApiClient( + endpoint_url=base, + experiment_id=None, + api_key=telemetry_key, + create_run_automatically=False, + ) + self._collector = TelemetryCollector() + else: + self._api_client = None + self._collector = None + + logger.info( + f"Telemetry initialized: tier={self._config.tier.value}, " + f"enabled={self._config.is_enabled}" + ) + + return self._config + + def get_config(self) -> Optional[TelemetryConfig]: + """Get current telemetry config.""" + return self._config + + def collect_and_export( + self, + cpu_count: int = 0, + cpu_physical_count: int = 0, + cpu_model: str = "", + gpu_count: int = 0, + gpu_model: str = "", + ram_total_gb: float = 0.0, + tracking_mode: str = "machine", + api_mode: str = "online", + output_methods: Optional[list] = None, + hardware_tracked: Optional[list] = None, + measure_power_interval: float = 15.0, + rapl_available: bool = False, + hardware_detection_success: bool = True, + errors: Optional[list] = None, + cloud_provider: str = "", + cloud_region: str = "", + ) -> bool: + """ + Collect Tier-1 telemetry and POST to /telemetry. + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_enabled: + return False + + if not self._collector or not self._api_client: + return False + + try: + data = self._collector.collect_all( + cpu_count=cpu_count, + cpu_physical_count=cpu_physical_count, + cpu_model=cpu_model, + gpu_count=gpu_count, + gpu_model=gpu_model, + ram_total_gb=ram_total_gb, + tracking_mode=tracking_mode, + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=hardware_tracked, + measure_power_interval=measure_power_interval, + rapl_available=rapl_available, + hardware_detection_success=hardware_detection_success, + errors=errors, + cloud_provider=cloud_provider, + cloud_region=cloud_region, + ) + + body = tier1_telemetry_body(data, self._config.tier) + return self._api_client.add_telemetry(body) + + except Exception as e: + logger.warning(f"Failed to collect/export telemetry: {e}") + return False + + def export_emissions( + self, + total_emissions_kg: float = 0.0, + emissions_rate_kg_per_sec: float = 0.0, + energy_consumed_kwh: float = 0.0, + cpu_energy_kwh: float = 0.0, + gpu_energy_kwh: float = 0.0, + ram_energy_kwh: float = 0.0, + duration_seconds: float = 0.0, + cpu_utilization_avg: float = 0.0, + gpu_utilization_avg: float = 0.0, + ram_utilization_avg: float = 0.0, + ) -> bool: + """ + Export emissions data via POST /emissions (public tier only). + + Returns: + True if successful, False otherwise + """ + if not self._config or not self._config.is_public: + return False + + if not self._config.project_token or not self._api_client: + return False + + if duration_seconds < 1: + logger.debug( + "Telemetry public emissions skipped: duration < 1 second" + ) + return False + + try: + payload = public_emissions_body( + total_emissions_kg=total_emissions_kg, + emissions_rate_kg_per_sec=emissions_rate_kg_per_sec, + energy_consumed_kwh=energy_consumed_kwh, + cpu_energy_kwh=cpu_energy_kwh, + gpu_energy_kwh=gpu_energy_kwh, + ram_energy_kwh=ram_energy_kwh, + duration_seconds=duration_seconds, + cpu_utilization_avg=cpu_utilization_avg, + gpu_utilization_avg=gpu_utilization_avg, + ram_utilization_avg=ram_utilization_avg, + ) + return self._api_client.add_public_emissions( + payload, self._config.project_token + ) + + except Exception as e: + logger.warning(f"Failed to export emissions telemetry: {e}") + return False + + +_telemetry_service: Optional[TelemetryService] = None + + +def get_telemetry_service() -> TelemetryService: + """Get the global telemetry service instance.""" + global _telemetry_service + if _telemetry_service is None: + _telemetry_service = TelemetryService() + return _telemetry_service + + +def init_telemetry(force_prompt: bool = False) -> TelemetryConfig: + """ + Initialize telemetry. + + Args: + force_prompt: Force showing consent prompt + + Returns: + TelemetryConfig + """ + service = get_telemetry_service() + return service.initialize(force_prompt=force_prompt) + + +def set_telemetry(tier: str, dont_ask_again: bool = True) -> None: + """ + Set telemetry tier programmatically. + + Args: + tier: "off", "internal", or "public" + dont_ask_again: Don't ask again in future + """ + try: + tier_enum = TelemetryTier(tier) + set_telemetry_tier(tier_enum, dont_ask_again=dont_ask_again) + except ValueError: + logger.warning(f"Invalid telemetry tier: {tier}") diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index a070ea56c..963871f5b 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -756,8 +756,89 @@ def stop(self) -> Optional[float]: for handler in self._output_handlers: handler.exit() + self._send_telemetry(emissions_data) + + # Log telemetry configuration warning + self._log_telemetry_warning() + return emissions_data.emissions + @suppress(Exception) + def _send_telemetry(self, emissions_data: EmissionsData) -> None: + """Send Tier-1 (and public Tier-2) telemetry via HTTP when enabled.""" + from codecarbon.core.telemetry import get_telemetry_service, init_telemetry + + init_telemetry() + svc = get_telemetry_service() + cfg = svc.get_config() + if not cfg or not cfg.is_enabled: + return + + hi = self.get_detected_hardware() + cloud: CloudMetadata = self._get_cloud_metadata() + api_mode = "online" if self._electricitymaps_api_token else "offline" + output_methods = [type(h).__name__ for h in self._output_handlers] + rapl_available = any( + getattr(h, "_mode", None) == "intel_rapl" for h in self._hardware + ) + + svc.collect_and_export( + cpu_count=int(hi.get("cpu_count") or 0), + cpu_physical_count=int(hi.get("cpu_physical_count") or 0), + cpu_model=str(hi.get("cpu_model") or ""), + gpu_count=int(hi.get("gpu_count") or 0), + gpu_model=str(hi.get("gpu_model") or ""), + ram_total_gb=float(hi.get("ram_total_size") or 0.0), + tracking_mode=str(self._tracking_mode), + api_mode=api_mode, + output_methods=output_methods, + hardware_tracked=list(self._conf.get("hardware") or []), + measure_power_interval=float(self._measure_power_secs), + rapl_available=rapl_available, + hardware_detection_success=True, + errors=[], + cloud_provider=str(cloud.provider or ""), + cloud_region=str(cloud.region or ""), + ) + + if cfg.is_public and cfg.project_token: + svc.export_emissions( + total_emissions_kg=emissions_data.emissions, + emissions_rate_kg_per_sec=emissions_data.emissions_rate, + energy_consumed_kwh=emissions_data.energy_consumed, + cpu_energy_kwh=emissions_data.cpu_energy, + gpu_energy_kwh=emissions_data.gpu_energy, + ram_energy_kwh=emissions_data.ram_energy, + duration_seconds=float(emissions_data.duration), + cpu_utilization_avg=emissions_data.cpu_utilization_percent, + gpu_utilization_avg=emissions_data.gpu_utilization_percent, + ram_utilization_avg=emissions_data.ram_utilization_percent, + ) + + def _log_telemetry_warning(self) -> None: + """ + Log a warning about telemetry configuration at the end of each run. + """ + from codecarbon.core.telemetry.config import TELEMETRY_ENV_VAR, get_telemetry_config + + config = get_telemetry_config() + + if not config.is_enabled: + logger.warning( + f"Telemetry is disabled. To enable, run: codecarbon telemetry setup\n" + f"Or set environment variable: export {TELEMETRY_ENV_VAR}=internal" + ) + elif config.is_public and not config.project_token: + logger.warning( + "Telemetry is set to 'public' but no telemetry auth token is available for " + "POST /emissions.\n" + "Set CODECARBON_TELEMETRY_API_KEY, add telemetry_api_key in the telemetry JSON in " + ".codecarbon.config, or run: codecarbon telemetry setup" + ) + elif config.is_enabled and not config.first_run: + # Telemetry is properly configured + logger.debug(f"Telemetry enabled: tier={config.tier.value}") + def _persist_data( self, total_emissions: EmissionsData, diff --git a/docs/telemetry.md b/docs/telemetry.md new file mode 100644 index 000000000..ceb171a1a --- /dev/null +++ b/docs/telemetry.md @@ -0,0 +1,97 @@ +# Telemetry + +CodeCarbon can send **anonymous usage and diagnostics** over HTTPS to help maintainers improve the library. Optionally, you can opt in to **public** sharing of **run-level emissions summaries** (for example for leaderboards). This page explains the three tiers, **what** is collected in each case, **why**, and how to control it. + +Telemetry HTTP uses its own **base URL resolution**: `CODECARBON_TELEMETRY_API_ENDPOINT`, optional JSON `telemetry_api_endpoint` in the `[codecarbon]` telemetry blob, then the same hierarchical `api_endpoint` / `CODECARBON_API_ENDPOINT` as the rest of CodeCarbon (default `https://api.codecarbon.io`). **Dashboard** uploads (`save_to_api`, `CodeCarbonAPIOutput`) still use only `api_endpoint` + `api_key`; you can point telemetry at a different host in the same process. + +## Telemetry tiers + +| Tier | How to choose it | What is sent | Why | +|------|-------------------|--------------|-----| +| **Off** | `CODECARBON_TELEMETRY=off`, CLI setup, or saved preference | **Nothing** over the network for telemetry | You do not want CodeCarbon to phone home with usage statistics or emissions summaries. | +| **Internal** | Default when no preference exists, or `CODECARBON_TELEMETRY=internal`, or CLI | After each `EmissionsTracker.stop()`, one **`POST /telemetry`** with **environment, hardware, usage, and library diagnostics** (no per-run CO₂ totals on this request) | Helps the team understand real-world setups (OS, GPUs, frameworks, tracking modes), spot breakage patterns, and prioritise improvements—without publishing your emissions. | +| **Public** | `CODECARBON_TELEMETRY=public`, or CLI | Same **`POST /telemetry`** as internal **plus**, when configured, a second **`POST /emissions`** with **energy, emissions, duration, and utilization averages** for that run (skipped if the run is shorter than one second) | Lets you contribute **aggregated run outcomes** for transparency and leaderboards, alongside the same diagnostic bundle as internal tier. | + +The client adds a field **`telemetry_tier`** (`internal` or `public`) on `/telemetry` so the server knows the user’s choice. + +## What we collect by tier + +### Off + +- **No telemetry HTTP requests.** Local tracking (CSV, logs, your own API key flows) behaves as you configure it separately. + +### Internal — `POST /telemetry` only + +**Goal:** Improve CodeCarbon for everyone without exposing your experiment’s carbon results. + +Typical categories in the payload (exact keys may evolve with the library): + +- **Environment:** Python version, OS, CodeCarbon version, how Python/CodeCarbon appear to be installed (heuristic). +- **Hardware:** CPU/GPU model and counts, RAM size, CUDA/cuDNN when detectable—not your hostname or raw serial numbers. +- **How you use CodeCarbon:** Tracking mode (`machine` / `process`), which output backends are enabled (file, logger, API, …), power measurement interval, which hardware types are tracked. +- **ML stack (import-based):** Whether common frameworks (e.g. PyTorch, TensorFlow, Transformers) are present and their versions, to prioritise integrations. +- **Context heuristics:** e.g. notebook vs script, CI hints, container hints, optional **cloud provider / region** strings when your tracker knows them (same metadata you already use for emission factors). +- **Diagnostics:** Whether RAPL or certain GPU paths worked, whether hardware detection succeeded, optional non-sensitive error snippets to debug widespread failures. + +**Not sent on `/telemetry`:** Per-run **kg CO₂**, **kWh**, **duration**, or **utilization averages** (those belong on the separate emissions payload for public tier only). + +### Public — `POST /telemetry` and optionally `POST /emissions` + +- **Everything internal sends on `/telemetry`** (same reasons: product quality and compatibility). +- **Additionally**, when you have configured a **telemetry auth token** (`CODECARBON_TELEMETRY_API_KEY`, or `telemetry_api_key` / legacy `telemetry_project_token` in the telemetry JSON, etc.), a **second request** sends a **flat summary** of that run: total emissions, energy by component where available, duration, and CPU/GPU/RAM utilization averages. + +**Why a second request:** Keeps **usage/diagnostics** and **publishable run metrics** separated so internal analytics can stay minimal while public/leaderboard flows can validate and store emissions-shaped records. + +## Privacy and data minimisation + +- **No deliberate collection of personal identifiers** (name, email, etc.) in the telemetry payloads described above. +- **Some fields are pseudonymous or coarse by design** (e.g. a short hash of the Python executable path, coarse cloud region strings rather than precise GPS in telemetry). +- **You control the tier** via environment variable, CLI, or saved preference. +- **Public emissions** use the **telemetry** token chain, not your dashboard `api_key` / `CODECARBON_API_KEY`; treat telemetry tokens like any other secret. +- For **retention, deletion, and legal requests**, follow the policies of the **API operator** hosting the telemetry base URL (and separately the dashboard API host if you use it). + +## Configuration + +### Environment variable (tier) + +```bash +export CODECARBON_TELEMETRY=internal # or public, or off +``` + +### Telemetry base URL and auth (separate from dashboard) + +- **`CODECARBON_TELEMETRY_API_ENDPOINT`** (optional) — overrides where `/telemetry` and `/emissions` are sent; otherwise JSON `telemetry_api_endpoint`, then **`api_endpoint` / `CODECARBON_API_ENDPOINT`**. +- **`CODECARBON_TELEMETRY_API_KEY`** (or telemetry JSON keys) — required for **`POST /emissions`** in public tier when you want emissions uploaded. **Not** the same as **`api_key` / `CODECARBON_API_KEY`**, which are only for dashboard / `save_to_api` logging. + +### CLI + +```bash +codecarbon telemetry setup # interactive +codecarbon telemetry config # show effective tier and whether a token is available +``` + +### In Python (tier) + +Tier is **not** a constructor argument on `EmissionsTracker`. Set the environment variable before import/run, use `codecarbon telemetry setup`, or use the public helpers: + +```python +from codecarbon import set_telemetry + +set_telemetry("internal", dont_ask_again=True) +``` + +## When data is sent + +Telemetry runs **once per completed tracker session**, when **`EmissionsTracker.stop()`** (or equivalent base implementation) finishes flushing outputs—not continuously while your job runs. + +## Disabling telemetry + +```bash +export CODECARBON_TELEMETRY=off +``` + +Or use `codecarbon telemetry setup` and choose **off**, or call `set_telemetry("off", dont_ask_again=True)` early in your process. + +## Further reading (developers) + +For the exact HTTP contract, payload exclusions, and backend implementation checklist, see **`TELEMETRY_README.md`** at the root of the CodeCarbon source repository (next to the `docs/` folder). That file is aimed at contributors and API implementers; it is not part of the built docs site. diff --git a/mkdocs.yml b/mkdocs.yml index 4517f6b98..c6d4923e8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,4 +128,5 @@ nav: - Output: logging/output.md - Collecting emissions to a logger: logging/to_logger.md - Visualize: logging/visualize.md + - Telemetry: telemetry.md - Track GenAI API Calls (EcoLogits) ↗: https://ecologits.ai/latest/?utm_source=codecarbon&utm_medium=docs diff --git a/pyproject.toml b/pyproject.toml index 518acb7ed..f72cce994 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ + "appdirs", "arrow", "authlib>=1.2.1", "click", diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..e44e0d4f3 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,12 @@ +"""Shared pytest configuration.""" + +import pytest + + +@pytest.fixture(autouse=True) +def isolate_telemetry_env(monkeypatch: pytest.MonkeyPatch) -> None: + """Avoid accidental HTTP telemetry during tests from developer/CI env.""" + monkeypatch.setenv("CODECARBON_TELEMETRY", "off") + monkeypatch.delenv("CODECARBON_TELEMETRY_PROJECT_TOKEN", raising=False) + monkeypatch.delenv("CODECARBON_TELEMETRY_API_KEY", raising=False) + monkeypatch.delenv("CODECARBON_TELEMETRY_API_ENDPOINT", raising=False) diff --git a/tests/test_telemetry_http.py b/tests/test_telemetry_http.py new file mode 100644 index 000000000..1c838c574 --- /dev/null +++ b/tests/test_telemetry_http.py @@ -0,0 +1,242 @@ +"""Tests for HTTP-based telemetry (POST /telemetry and POST /emissions).""" + +import json +from pathlib import Path + +import pytest +import responses + +import codecarbon.core.telemetry.service as telemetry_service_module +from codecarbon.core.telemetry import config as telemetry_config_module +from codecarbon.core.telemetry.config import ( + TELEMETRY_API_ENDPOINT_ENV_VAR, + TELEMETRY_API_KEY_ENV_VAR, + TELEMETRY_ENV_VAR, +) +from codecarbon.core.telemetry.service import TelemetryService, init_telemetry + + +@pytest.fixture +def reset_telemetry_service(): + telemetry_service_module._telemetry_service = None + TelemetryService._instance = None + yield + telemetry_service_module._telemetry_service = None + TelemetryService._instance = None + + +@pytest.fixture +def telemetry_internal_env(monkeypatch, reset_telemetry_service): + monkeypatch.setenv(TELEMETRY_ENV_VAR, "internal") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + + +@pytest.fixture +def telemetry_public_env(monkeypatch, reset_telemetry_service): + monkeypatch.delenv("CODECARBON_API_KEY", raising=False) + monkeypatch.setenv(TELEMETRY_ENV_VAR, "public") + monkeypatch.setenv(TELEMETRY_API_KEY_ENV_VAR, "test-project-token") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + monkeypatch.setattr( + telemetry_config_module, + "DEFAULT_PUBLIC_TELEMETRY_TOKEN", + "", + ) + monkeypatch.setattr( + "codecarbon.cli.cli_utils.load_telemetry_config_from_file", + lambda path=None: {}, + ) + monkeypatch.setattr( + "codecarbon.core.telemetry.config.load_telemetry_preference", + lambda: None, + ) + monkeypatch.setattr( + telemetry_config_module, + "get_telemetry_preference_file", + lambda: Path("/nonexistent/codecarbon-telemetry-pref.txt"), + ) + + +@responses.activate +def test_internal_posts_telemetry_without_emission_keys( + telemetry_internal_env, +): + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + ok = svc.collect_and_export(cpu_count=4, cpu_model="TestCPU", gpu_count=0) + + assert ok is True + assert len(responses.calls) == 1 + body = json.loads(responses.calls[0].request.body) + assert body.get("telemetry_tier") == "internal" + assert "total_emissions_kg" not in body + assert body.get("cpu_count") == 4 + assert body.get("cpu_model") == "TestCPU" + + +@responses.activate +def test_public_posts_telemetry_and_emissions(telemetry_public_env): + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + responses.add( + responses.POST, + "https://telemetry.test/emissions", + json={}, + status=200, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + + assert svc.collect_and_export(cpu_count=2, cpu_model="x") + assert svc.export_emissions( + total_emissions_kg=0.01, + emissions_rate_kg_per_sec=1e-4, + energy_consumed_kwh=0.5, + cpu_energy_kwh=0.3, + gpu_energy_kwh=0.1, + ram_energy_kwh=0.1, + duration_seconds=60.0, + cpu_utilization_avg=12.5, + gpu_utilization_avg=0.0, + ram_utilization_avg=40.0, + ) + + assert len(responses.calls) == 2 + assert responses.calls[0].request.url.endswith("/telemetry") + tel_body = json.loads(responses.calls[0].request.body) + assert tel_body.get("telemetry_tier") == "public" + + em_req = responses.calls[1].request + assert em_req.url.endswith("/emissions") + assert em_req.headers.get("x-api-token") == "test-project-token" + em_body = json.loads(em_req.body) + assert em_body["total_emissions_kg"] == 0.01 + assert em_body["duration_seconds"] == 60.0 + + +@responses.activate +def test_public_emissions_skipped_short_duration(telemetry_public_env): + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + svc.collect_and_export(cpu_count=1) + ok = svc.export_emissions(duration_seconds=0.5, total_emissions_kg=0.001) + + assert ok is False + assert len(responses.calls) == 1 + + +@responses.activate +def test_public_uses_telemetry_api_key_not_dashboard_api_key( + monkeypatch, reset_telemetry_service +): + monkeypatch.delenv("CODECARBON_API_KEY", raising=False) + monkeypatch.setenv(TELEMETRY_ENV_VAR, "public") + monkeypatch.setenv(TELEMETRY_API_KEY_ENV_VAR, "telemetry-only-key") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + monkeypatch.setattr(telemetry_config_module, "DEFAULT_PUBLIC_TELEMETRY_TOKEN", "") + monkeypatch.setattr( + "codecarbon.cli.cli_utils.load_telemetry_config_from_file", + lambda path=None: {}, + ) + monkeypatch.setattr( + "codecarbon.core.telemetry.config.load_telemetry_preference", + lambda: None, + ) + monkeypatch.setattr( + telemetry_config_module, + "get_telemetry_preference_file", + lambda: Path("/nonexistent/codecarbon-telemetry-pref.txt"), + ) + monkeypatch.setattr( + telemetry_config_module, + "_hierarchical_config_dict", + lambda: { + "api_key": "dashboard-key", + "api_endpoint": "https://dashboard.example", + }, + ) + + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + responses.add( + responses.POST, + "https://telemetry.test/emissions", + json={}, + status=200, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + svc.collect_and_export(cpu_count=1) + svc.export_emissions(duration_seconds=2.0, total_emissions_kg=0.001) + + assert responses.calls[1].request.headers.get("x-api-token") == "telemetry-only-key" + + +@responses.activate +def test_public_dashboard_api_key_alone_does_not_enable_emissions_post( + monkeypatch, reset_telemetry_service +): + monkeypatch.delenv(TELEMETRY_API_KEY_ENV_VAR, raising=False) + monkeypatch.setenv(TELEMETRY_ENV_VAR, "public") + monkeypatch.setenv(TELEMETRY_API_ENDPOINT_ENV_VAR, "https://telemetry.test") + monkeypatch.setattr(telemetry_config_module, "DEFAULT_PUBLIC_TELEMETRY_TOKEN", "") + monkeypatch.setattr( + "codecarbon.cli.cli_utils.load_telemetry_config_from_file", + lambda path=None: {}, + ) + monkeypatch.setattr( + "codecarbon.core.telemetry.config.load_telemetry_preference", + lambda: None, + ) + monkeypatch.setattr( + telemetry_config_module, + "get_telemetry_preference_file", + lambda: Path("/nonexistent/codecarbon-telemetry-pref.txt"), + ) + monkeypatch.setattr( + telemetry_config_module, + "_hierarchical_config_dict", + lambda: { + "api_key": "dashboard-only", + "api_endpoint": "https://ignored-for-telemetry-host.test", + }, + ) + + responses.add( + responses.POST, + "https://telemetry.test/telemetry", + json={}, + status=201, + ) + + init_telemetry() + svc = telemetry_service_module.get_telemetry_service() + svc.collect_and_export(cpu_count=1) + ok = svc.export_emissions(duration_seconds=2.0, total_emissions_kg=0.001) + + assert ok is False + assert len(responses.calls) == 1 diff --git a/uv.lock b/uv.lock index 8161de636..fa0405946 100644 --- a/uv.lock +++ b/uv.lock @@ -26,6 +26,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "appdirs" +version = "1.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470, upload-time = "2020-05-11T07:59:51.037Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566, upload-time = "2020-05-11T07:59:49.499Z" }, +] + [[package]] name = "arrow" version = "1.4.0" @@ -324,6 +333,7 @@ wheels = [ name = "codecarbon" source = { editable = "." } dependencies = [ + { name = "appdirs" }, { name = "arrow" }, { name = "authlib" }, { name = "click" }, @@ -376,6 +386,7 @@ doc = [ [package.metadata] requires-dist = [ + { name = "appdirs" }, { name = "arrow" }, { name = "authlib", specifier = ">=1.2.1" }, { name = "click" },