mlco2 · benoit-cty · Jun 9, 2026 · Jun 9, 2026 · Jun 10, 2026
@@ -4,6 +4,7 @@
 """
 
 import dataclasses
+import json
 import os
 import platform
 import re
@@ -403,6 +404,7 @@ def __init__(
         allow_multiple_runs: Optional[bool] = _sentinel,
         rapl_include_dram: Optional[bool] = _sentinel,
         rapl_prefer_psys: Optional[bool] = _sentinel,
+        metadata: Optional[Union[dict, str]] = _sentinel,
     ):
         """
         :param project_name: Project name for current experiment run, default name
@@ -496,6 +498,16 @@ def __init__(
                                  (CPU + chipset + PCIe). When False, uses package domains which
                                  are more reliable. Note: psys can report higher values than
                                  CPU TDP and may be unreliable on older systems.
+        :param metadata: Free-form metadata bag to enrich outputs. Either a dict or a
+                         path to a JSON file containing a dict. Individual output methods
+                         read their own section from it. When ``OutputMethod.BOAMPS`` is
+                         used, the ``boamps`` key (or, for backward compatibility, the whole
+                         dict) is read following the BoAmps schema structure (with ``task``,
+                         ``header``, ``quality``, ``infrastructure``, ``environment``
+                         sections) to fill the required BoAmps fields (taskStage, taskFamily,
+                         algorithms, dataset) that cannot be auto-detected by CodeCarbon.
+                         Can also be set in config as a path to a JSON file:
+                         ``metadata=metadata.json``.
         """
 
         # logger.info("base tracker init")
@@ -557,6 +569,7 @@ def __init__(
         self._set_from_conf(output_handlers, "output_handlers", [])
         self._set_from_conf(tracking_mode, "tracking_mode", "machine")
         self._set_from_conf(on_csv_write, "on_csv_write", "append")
+        self._set_from_conf(metadata, "metadata")
         self._set_from_conf(logger_preamble, "logger_preamble", "")
         self._set_from_conf(force_cpu_power, "force_cpu_power", None, float)
         self._set_from_conf(force_ram_power, "force_ram_power", None, float)
@@ -634,7 +647,38 @@ def _init_output_methods(self, *, api_key: str = None):
             self._output_handlers.append(LogfireOutput())
 
         if OutputMethod.BOAMPS in methods:
-            self._output_handlers.append(BoAmpsOutput(output_dir=self._output_dir))
+            self._output_handlers.append(self._build_boamps_output())
+
+    def _build_boamps_output(self) -> BoAmpsOutput:
+        """
+        Build a BoAmpsOutput, enriched with user-provided metadata when available.
+
+        The generic ``metadata`` (a dict or a path to a JSON file) is resolved to a
+        dict, then the BoAmps section is read from its ``boamps`` key. For backward
+        compatibility, if there is no ``boamps`` key the whole dict is used as the
+        BoAmps context. Falls back to a bare BoAmpsOutput when no metadata is provided.
+        """
+        metadata = getattr(self, "_metadata", None)
+
+        if isinstance(metadata, str) and metadata:
+            try:
+                with open(metadata) as f:
+                    metadata = json.load(f)
+            except FileNotFoundError:
+                logger.error(f"Metadata file not found: {metadata}")
+                metadata = None
+            except json.JSONDecodeError as e:
+                logger.error(f"Invalid JSON in metadata file '{metadata}': {e}")
+                metadata = None
+
+        if isinstance(metadata, dict):
+            boamps_context = metadata.get("boamps", metadata)
+            if isinstance(boamps_context, dict) and boamps_context:
+                return BoAmpsOutput.from_dict(
+                    boamps_context, output_dir=self._output_dir
+                )
+
+        return BoAmpsOutput(output_dir=self._output_dir)
 
     def get_detected_hardware(self) -> Dict[str, Any]:
         """

@@ -93,6 +93,32 @@ def from_file(cls, context_file_path: str, output_dir: str = ".") -> "BoAmpsOutp
                 f"BoAmps context file not found: {context_file_path}"
             )
 
+        return cls.from_dict(context, output_dir=output_dir)
+
+    @classmethod
+    def from_dict(cls, context: dict, output_dir: str = ".") -> "BoAmpsOutput":
+        """
+        Build a BoAmpsOutput from a free-form metadata dictionary.
+
+        The dictionary should follow the BoAmps report schema structure,
+        containing fields that cannot be auto-detected by CodeCarbon
+        (e.g., ``task``, ``header``, ``quality``, ``infrastructure``,
+        ``environment``). Any recognized section is merged with the
+        auto-detected values; unknown keys are ignored.
+
+        Args:
+            context: A dictionary of BoAmps metadata (camelCase keys, as in
+                the BoAmps JSON schema).
+            output_dir: Directory to write output reports to.
+
+        Returns:
+            A configured BoAmpsOutput instance.
+        """
+        if not isinstance(context, dict):
+            raise TypeError(
+                f"BoAmps metadata must be a dict, got {type(context).__name__}."
+            )
+
         task = None
         header = None
         quality = None

@@ -149,7 +149,37 @@ tracker.stop()
 
 CodeCarbon writes a final report named `boamps_report_<run_id>.json` in `output_dir`.
 
-If you need to enrich the report with task metadata, datasets, or publisher information, use `BoAmpsOutput` directly through `output_handlers` or start from [examples/boamps_output.py](https://github.com/mlco2/codecarbon/blob/master/examples/boamps_output.py).
+If you need to enrich the report with task metadata, datasets, or publisher information,
+use the generic `metadata` parameter and put the BoAmps context under the `boamps` key:
+
+```python-skip
+from codecarbon import OfflineEmissionsTracker, OutputMethoddocs/reference/output.md
+
+tracker = OfflineEmissionsTracker(
+  project_name="my_project",
+  country_iso_code="USA",
+  output_methods=[OutputMethod.BOAMPS],
+  metadata={
+    "boamps": {
+      "task": {
+        "taskStage": "training",
+        "taskFamily": "classification",
+        "algorithms": ["random_forest"],
+        "dataset": "my_dataset",
+      },
+      "quality": "medium",
+    },
+    "my_other_metadata": {"owner": "ml-team"},
+  },
+)
+tracker.start()
+# Your code here
+tracker.stop()
+```
+
+`metadata` can also be a path to a JSON file (`metadata="metadata.json"`).
+For backward compatibility, if `metadata` is a dict without a `boamps` key,
+the full dict is interpreted as BoAmps metadata.
 
 Sample output:
 ```json

@@ -15,6 +15,17 @@ def cpu_load_task(number):
     force_mode_cpu_load=False,
     log_level="debug",
     output_methods=[OutputMethod.BOAMPS],
+    metadata={
+        "boamps": {
+            "task": {
+                "taskStage": "training",
+                "taskFamily": "classification",
+                "algorithms": ["random_forest"],
+                "dataset": "synthetic_cpu_benchmark",
+            },
+            "quality": "medium",
+        }
+    },
 )
 try:
     tracker.start()

@@ -877,6 +877,11 @@ def test_malformed_json_raises(self):
         with self.assertRaises(json.JSONDecodeError):
             BoAmpsOutput.from_file(path)
 
+    def test_from_dict_rejects_non_dict_input(self):
+        """from_dict should fail fast when context is not a dictionary."""
+        with self.assertRaises(TypeError):
+            BoAmpsOutput.from_dict("not-a-dict")
+
     def test_context_with_infrastructure_overrides(self):
         """Infrastructure fields from context file are applied as overrides."""
         context = {

@@ -1,3 +1,4 @@
+import json
 import os
 import shutil
 import sys
@@ -235,6 +236,150 @@ def test_output_methods_boamps_adds_boamps_output_handler(
             )
         )
 
+    def test_output_methods_boamps_uses_metadata_boamps_section(
+        self,
+        mock_cli_setup,
+        mock_log_values,
+        mocked_get_gpu_details,
+        mocked_env_cloud_details,
+        mocked_is_gpu_details_available,
+        mocked_is_nvidia_system,
+    ):
+        tracker = EmissionsTracker(
+            output_dir=self.temp_path,
+            output_handlers=[],
+            output_methods=[OutputMethod.BOAMPS],
+            metadata={
+                "boamps": {
+                    "task": {
+                        "taskStage": "training",
+                        "taskFamily": "classification",
+                        "algorithms": [{"algorithmType": "random_forest"}],
+                        "dataset": [
+                            {
+                                "dataUsage": "input",
+                                "dataType": "table",
+                                "dataQuantity": 100,
+                            }
+                        ],
+                    },
+                    "quality": "high",
+                },
+                "other": {"owner": "ml-team"},
+            },
+        )
+
+        boamps_handler = next(
+            handler
+            for handler in tracker._output_handlers
+            if isinstance(handler, BoAmpsOutput)
+        )
+        self.assertEqual(boamps_handler._quality, "high")
+        self.assertIsNotNone(boamps_handler._task)
+        self.assertEqual(boamps_handler._task.task_stage, "training")
+        self.assertEqual(boamps_handler._task.task_family, "classification")
+        self.assertEqual(
+            boamps_handler._task.algorithms[0].algorithm_type,
+            "random_forest",
+        )
+
+    def test_output_methods_boamps_metadata_back_compat_without_boamps_key(
+        self,
+        mock_cli_setup,
+        mock_log_values,
+        mocked_get_gpu_details,
+        mocked_env_cloud_details,
+        mocked_is_gpu_details_available,
+        mocked_is_nvidia_system,
+    ):
+        tracker = EmissionsTracker(
+            output_dir=self.temp_path,
+            output_handlers=[],
+            output_methods=[OutputMethod.BOAMPS],
+            metadata={
+                "task": {
+                    "taskStage": "inference",
+                    "taskFamily": "chatbot",
+                },
+                "quality": "medium",
+            },
+        )
+
+        boamps_handler = next(
+            handler
+            for handler in tracker._output_handlers
+            if isinstance(handler, BoAmpsOutput)
+        )
+        self.assertEqual(boamps_handler._quality, "medium")
+        self.assertEqual(boamps_handler._task.task_stage, "inference")
+        self.assertEqual(boamps_handler._task.task_family, "chatbot")
+
+    def test_output_methods_boamps_reads_metadata_from_json_file(
+        self,
+        mock_cli_setup,
+        mock_log_values,
+        mocked_get_gpu_details,
+        mocked_env_cloud_details,
+        mocked_is_gpu_details_available,
+        mocked_is_nvidia_system,
+    ):
+        metadata_path = self.temp_path / "metadata.json"
+        metadata_path.write_text(
+            json.dumps(
+                {
+                    "boamps": {
+                        "task": {
+                            "taskStage": "training",
+                            "taskFamily": "classification",
+                        },
+                        "quality": "high",
+                    }
+                }
+            )
+        )
+
+        tracker = EmissionsTracker(
+            output_dir=self.temp_path,
+            output_handlers=[],
+            output_methods=[OutputMethod.BOAMPS],
+            metadata=str(metadata_path),
+        )
+
+        boamps_handler = next(
+            handler
+            for handler in tracker._output_handlers
+            if isinstance(handler, BoAmpsOutput)
+        )
+        self.assertEqual(boamps_handler._quality, "high")
+        self.assertEqual(boamps_handler._task.task_stage, "training")
+
+    def test_output_methods_boamps_falls_back_on_invalid_metadata_file(
+        self,
+        mock_cli_setup,
+        mock_log_values,
+        mocked_get_gpu_details,
+        mocked_env_cloud_details,
+        mocked_is_gpu_details_available,
+        mocked_is_nvidia_system,
+    ):
+        metadata_path = self.temp_path / "bad_metadata.json"
+        metadata_path.write_text("{ invalid json")
+
+        tracker = EmissionsTracker(
+            output_dir=self.temp_path,
+            output_handlers=[],
+            output_methods=[OutputMethod.BOAMPS],
+            metadata=str(metadata_path),
+        )
+
+        boamps_handler = next(
+            handler
+            for handler in tracker._output_handlers
+            if isinstance(handler, BoAmpsOutput)
+        )
+        self.assertIsNone(boamps_handler._task)
+        self.assertIsNone(boamps_handler._quality)
+
     def test_default_output_methods_is_csv(
         self,
         mock_cli_setup,