diff --git a/codecarbon/emissions_tracker.py b/codecarbon/emissions_tracker.py index 88da92628..db49b463d 100644 --- a/codecarbon/emissions_tracker.py +++ b/codecarbon/emissions_tracker.py @@ -4,6 +4,7 @@ """ import dataclasses +import json import os import platform import re @@ -403,6 +404,7 @@ def __init__( allow_multiple_runs: Optional[bool] = _sentinel, rapl_include_dram: Optional[bool] = _sentinel, rapl_prefer_psys: Optional[bool] = _sentinel, + metadata: Optional[Union[dict, str]] = _sentinel, ): """ :param project_name: Project name for current experiment run, default name @@ -496,6 +498,16 @@ def __init__( (CPU + chipset + PCIe). When False, uses package domains which are more reliable. Note: psys can report higher values than CPU TDP and may be unreliable on older systems. + :param metadata: Free-form metadata bag to enrich outputs. Either a dict or a + path to a JSON file containing a dict. Individual output methods + read their own section from it. When ``OutputMethod.BOAMPS`` is + used, the ``boamps`` key (or, for backward compatibility, the whole + dict) is read following the BoAmps schema structure (with ``task``, + ``header``, ``quality``, ``infrastructure``, ``environment`` + sections) to fill the required BoAmps fields (taskStage, taskFamily, + algorithms, dataset) that cannot be auto-detected by CodeCarbon. + Can also be set in config as a path to a JSON file: + ``metadata=metadata.json``. """ # logger.info("base tracker init") @@ -557,6 +569,7 @@ def __init__( self._set_from_conf(output_handlers, "output_handlers", []) self._set_from_conf(tracking_mode, "tracking_mode", "machine") self._set_from_conf(on_csv_write, "on_csv_write", "append") + self._set_from_conf(metadata, "metadata") self._set_from_conf(logger_preamble, "logger_preamble", "") self._set_from_conf(force_cpu_power, "force_cpu_power", None, float) self._set_from_conf(force_ram_power, "force_ram_power", None, float) @@ -634,7 +647,38 @@ def _init_output_methods(self, *, api_key: str = None): self._output_handlers.append(LogfireOutput()) if OutputMethod.BOAMPS in methods: - self._output_handlers.append(BoAmpsOutput(output_dir=self._output_dir)) + self._output_handlers.append(self._build_boamps_output()) + + def _build_boamps_output(self) -> BoAmpsOutput: + """ + Build a BoAmpsOutput, enriched with user-provided metadata when available. + + The generic ``metadata`` (a dict or a path to a JSON file) is resolved to a + dict, then the BoAmps section is read from its ``boamps`` key. For backward + compatibility, if there is no ``boamps`` key the whole dict is used as the + BoAmps context. Falls back to a bare BoAmpsOutput when no metadata is provided. + """ + metadata = getattr(self, "_metadata", None) + + if isinstance(metadata, str) and metadata: + try: + with open(metadata) as f: + metadata = json.load(f) + except FileNotFoundError: + logger.error(f"Metadata file not found: {metadata}") + metadata = None + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in metadata file '{metadata}': {e}") + metadata = None + + if isinstance(metadata, dict): + boamps_context = metadata.get("boamps", metadata) + if isinstance(boamps_context, dict) and boamps_context: + return BoAmpsOutput.from_dict( + boamps_context, output_dir=self._output_dir + ) + + return BoAmpsOutput(output_dir=self._output_dir) def get_detected_hardware(self) -> Dict[str, Any]: """ diff --git a/codecarbon/output_methods/boamps/output.py b/codecarbon/output_methods/boamps/output.py index d70336221..ff172e9fa 100644 --- a/codecarbon/output_methods/boamps/output.py +++ b/codecarbon/output_methods/boamps/output.py @@ -93,6 +93,32 @@ def from_file(cls, context_file_path: str, output_dir: str = ".") -> "BoAmpsOutp f"BoAmps context file not found: {context_file_path}" ) + return cls.from_dict(context, output_dir=output_dir) + + @classmethod + def from_dict(cls, context: dict, output_dir: str = ".") -> "BoAmpsOutput": + """ + Build a BoAmpsOutput from a free-form metadata dictionary. + + The dictionary should follow the BoAmps report schema structure, + containing fields that cannot be auto-detected by CodeCarbon + (e.g., ``task``, ``header``, ``quality``, ``infrastructure``, + ``environment``). Any recognized section is merged with the + auto-detected values; unknown keys are ignored. + + Args: + context: A dictionary of BoAmps metadata (camelCase keys, as in + the BoAmps JSON schema). + output_dir: Directory to write output reports to. + + Returns: + A configured BoAmpsOutput instance. + """ + if not isinstance(context, dict): + raise TypeError( + f"BoAmps metadata must be a dict, got {type(context).__name__}." + ) + task = None header = None quality = None diff --git a/docs/reference/output.md b/docs/reference/output.md index 720e0a883..bd8b1b3d3 100644 --- a/docs/reference/output.md +++ b/docs/reference/output.md @@ -149,7 +149,37 @@ tracker.stop() CodeCarbon writes a final report named `boamps_report_.json` in `output_dir`. -If you need to enrich the report with task metadata, datasets, or publisher information, use `BoAmpsOutput` directly through `output_handlers` or start from [examples/boamps_output.py](https://github.com/mlco2/codecarbon/blob/master/examples/boamps_output.py). +If you need to enrich the report with task metadata, datasets, or publisher information, +use the generic `metadata` parameter and put the BoAmps context under the `boamps` key: + +```python-skip +from codecarbon import OfflineEmissionsTracker, OutputMethoddocs/reference/output.md + +tracker = OfflineEmissionsTracker( + project_name="my_project", + country_iso_code="USA", + output_methods=[OutputMethod.BOAMPS], + metadata={ + "boamps": { + "task": { + "taskStage": "training", + "taskFamily": "classification", + "algorithms": ["random_forest"], + "dataset": "my_dataset", + }, + "quality": "medium", + }, + "my_other_metadata": {"owner": "ml-team"}, + }, +) +tracker.start() +# Your code here +tracker.stop() +``` + +`metadata` can also be a path to a JSON file (`metadata="metadata.json"`). +For backward compatibility, if `metadata` is a dict without a `boamps` key, +the full dict is interpreted as BoAmps metadata. Sample output: ```json diff --git a/examples/boamps_output.py b/examples/boamps_output.py index 4571a4a98..5715a9c13 100644 --- a/examples/boamps_output.py +++ b/examples/boamps_output.py @@ -15,6 +15,17 @@ def cpu_load_task(number): force_mode_cpu_load=False, log_level="debug", output_methods=[OutputMethod.BOAMPS], + metadata={ + "boamps": { + "task": { + "taskStage": "training", + "taskFamily": "classification", + "algorithms": ["random_forest"], + "dataset": "synthetic_cpu_benchmark", + }, + "quality": "medium", + } + }, ) try: tracker.start() diff --git a/tests/test_boamps_output.py b/tests/test_boamps_output.py index beeccfb84..06e46c13f 100644 --- a/tests/test_boamps_output.py +++ b/tests/test_boamps_output.py @@ -877,6 +877,11 @@ def test_malformed_json_raises(self): with self.assertRaises(json.JSONDecodeError): BoAmpsOutput.from_file(path) + def test_from_dict_rejects_non_dict_input(self): + """from_dict should fail fast when context is not a dictionary.""" + with self.assertRaises(TypeError): + BoAmpsOutput.from_dict("not-a-dict") + def test_context_with_infrastructure_overrides(self): """Infrastructure fields from context file are applied as overrides.""" context = { diff --git a/tests/test_emissions_tracker.py b/tests/test_emissions_tracker.py index 37f1000de..5a47972d9 100644 --- a/tests/test_emissions_tracker.py +++ b/tests/test_emissions_tracker.py @@ -1,3 +1,4 @@ +import json import os import shutil import sys @@ -235,6 +236,150 @@ def test_output_methods_boamps_adds_boamps_output_handler( ) ) + def test_output_methods_boamps_uses_metadata_boamps_section( + self, + mock_cli_setup, + mock_log_values, + mocked_get_gpu_details, + mocked_env_cloud_details, + mocked_is_gpu_details_available, + mocked_is_nvidia_system, + ): + tracker = EmissionsTracker( + output_dir=self.temp_path, + output_handlers=[], + output_methods=[OutputMethod.BOAMPS], + metadata={ + "boamps": { + "task": { + "taskStage": "training", + "taskFamily": "classification", + "algorithms": [{"algorithmType": "random_forest"}], + "dataset": [ + { + "dataUsage": "input", + "dataType": "table", + "dataQuantity": 100, + } + ], + }, + "quality": "high", + }, + "other": {"owner": "ml-team"}, + }, + ) + + boamps_handler = next( + handler + for handler in tracker._output_handlers + if isinstance(handler, BoAmpsOutput) + ) + self.assertEqual(boamps_handler._quality, "high") + self.assertIsNotNone(boamps_handler._task) + self.assertEqual(boamps_handler._task.task_stage, "training") + self.assertEqual(boamps_handler._task.task_family, "classification") + self.assertEqual( + boamps_handler._task.algorithms[0].algorithm_type, + "random_forest", + ) + + def test_output_methods_boamps_metadata_back_compat_without_boamps_key( + self, + mock_cli_setup, + mock_log_values, + mocked_get_gpu_details, + mocked_env_cloud_details, + mocked_is_gpu_details_available, + mocked_is_nvidia_system, + ): + tracker = EmissionsTracker( + output_dir=self.temp_path, + output_handlers=[], + output_methods=[OutputMethod.BOAMPS], + metadata={ + "task": { + "taskStage": "inference", + "taskFamily": "chatbot", + }, + "quality": "medium", + }, + ) + + boamps_handler = next( + handler + for handler in tracker._output_handlers + if isinstance(handler, BoAmpsOutput) + ) + self.assertEqual(boamps_handler._quality, "medium") + self.assertEqual(boamps_handler._task.task_stage, "inference") + self.assertEqual(boamps_handler._task.task_family, "chatbot") + + def test_output_methods_boamps_reads_metadata_from_json_file( + self, + mock_cli_setup, + mock_log_values, + mocked_get_gpu_details, + mocked_env_cloud_details, + mocked_is_gpu_details_available, + mocked_is_nvidia_system, + ): + metadata_path = self.temp_path / "metadata.json" + metadata_path.write_text( + json.dumps( + { + "boamps": { + "task": { + "taskStage": "training", + "taskFamily": "classification", + }, + "quality": "high", + } + } + ) + ) + + tracker = EmissionsTracker( + output_dir=self.temp_path, + output_handlers=[], + output_methods=[OutputMethod.BOAMPS], + metadata=str(metadata_path), + ) + + boamps_handler = next( + handler + for handler in tracker._output_handlers + if isinstance(handler, BoAmpsOutput) + ) + self.assertEqual(boamps_handler._quality, "high") + self.assertEqual(boamps_handler._task.task_stage, "training") + + def test_output_methods_boamps_falls_back_on_invalid_metadata_file( + self, + mock_cli_setup, + mock_log_values, + mocked_get_gpu_details, + mocked_env_cloud_details, + mocked_is_gpu_details_available, + mocked_is_nvidia_system, + ): + metadata_path = self.temp_path / "bad_metadata.json" + metadata_path.write_text("{ invalid json") + + tracker = EmissionsTracker( + output_dir=self.temp_path, + output_handlers=[], + output_methods=[OutputMethod.BOAMPS], + metadata=str(metadata_path), + ) + + boamps_handler = next( + handler + for handler in tracker._output_handlers + if isinstance(handler, BoAmpsOutput) + ) + self.assertIsNone(boamps_handler._task) + self.assertIsNone(boamps_handler._quality) + def test_default_output_methods_is_csv( self, mock_cli_setup,