Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion codecarbon/emissions_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import dataclasses
import json
import os
import platform
import re
Expand Down Expand Up @@ -403,6 +404,7 @@ def __init__(
allow_multiple_runs: Optional[bool] = _sentinel,
rapl_include_dram: Optional[bool] = _sentinel,
rapl_prefer_psys: Optional[bool] = _sentinel,
metadata: Optional[Union[dict, str]] = _sentinel,
):
"""
:param project_name: Project name for current experiment run, default name
Expand Down Expand Up @@ -496,6 +498,16 @@ def __init__(
(CPU + chipset + PCIe). When False, uses package domains which
are more reliable. Note: psys can report higher values than
CPU TDP and may be unreliable on older systems.
:param metadata: Free-form metadata bag to enrich outputs. Either a dict or a
path to a JSON file containing a dict. Individual output methods
read their own section from it. When ``OutputMethod.BOAMPS`` is
used, the ``boamps`` key (or, for backward compatibility, the whole
dict) is read following the BoAmps schema structure (with ``task``,
``header``, ``quality``, ``infrastructure``, ``environment``
sections) to fill the required BoAmps fields (taskStage, taskFamily,
algorithms, dataset) that cannot be auto-detected by CodeCarbon.
Can also be set in config as a path to a JSON file:
``metadata=metadata.json``.
"""

# logger.info("base tracker init")
Expand Down Expand Up @@ -557,6 +569,7 @@ def __init__(
self._set_from_conf(output_handlers, "output_handlers", [])
self._set_from_conf(tracking_mode, "tracking_mode", "machine")
self._set_from_conf(on_csv_write, "on_csv_write", "append")
self._set_from_conf(metadata, "metadata")
self._set_from_conf(logger_preamble, "logger_preamble", "")
self._set_from_conf(force_cpu_power, "force_cpu_power", None, float)
self._set_from_conf(force_ram_power, "force_ram_power", None, float)
Expand Down Expand Up @@ -634,7 +647,38 @@ def _init_output_methods(self, *, api_key: str = None):
self._output_handlers.append(LogfireOutput())

if OutputMethod.BOAMPS in methods:
self._output_handlers.append(BoAmpsOutput(output_dir=self._output_dir))
self._output_handlers.append(self._build_boamps_output())

def _build_boamps_output(self) -> BoAmpsOutput:
"""
Build a BoAmpsOutput, enriched with user-provided metadata when available.

The generic ``metadata`` (a dict or a path to a JSON file) is resolved to a
dict, then the BoAmps section is read from its ``boamps`` key. For backward
compatibility, if there is no ``boamps`` key the whole dict is used as the
BoAmps context. Falls back to a bare BoAmpsOutput when no metadata is provided.
"""
metadata = getattr(self, "_metadata", None)

if isinstance(metadata, str) and metadata:
try:
with open(metadata) as f:
metadata = json.load(f)
except FileNotFoundError:
logger.error(f"Metadata file not found: {metadata}")
metadata = None
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON in metadata file '{metadata}': {e}")
metadata = None

if isinstance(metadata, dict):
boamps_context = metadata.get("boamps", metadata)
if isinstance(boamps_context, dict) and boamps_context:
return BoAmpsOutput.from_dict(
boamps_context, output_dir=self._output_dir
)

return BoAmpsOutput(output_dir=self._output_dir)

def get_detected_hardware(self) -> Dict[str, Any]:
"""
Expand Down
26 changes: 26 additions & 0 deletions codecarbon/output_methods/boamps/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,32 @@ def from_file(cls, context_file_path: str, output_dir: str = ".") -> "BoAmpsOutp
f"BoAmps context file not found: {context_file_path}"
)

return cls.from_dict(context, output_dir=output_dir)

@classmethod
def from_dict(cls, context: dict, output_dir: str = ".") -> "BoAmpsOutput":
"""
Build a BoAmpsOutput from a free-form metadata dictionary.

The dictionary should follow the BoAmps report schema structure,
containing fields that cannot be auto-detected by CodeCarbon
(e.g., ``task``, ``header``, ``quality``, ``infrastructure``,
``environment``). Any recognized section is merged with the
auto-detected values; unknown keys are ignored.

Args:
context: A dictionary of BoAmps metadata (camelCase keys, as in
the BoAmps JSON schema).
output_dir: Directory to write output reports to.

Returns:
A configured BoAmpsOutput instance.
"""
if not isinstance(context, dict):
raise TypeError(
f"BoAmps metadata must be a dict, got {type(context).__name__}."
)

task = None
header = None
quality = None
Expand Down
32 changes: 31 additions & 1 deletion docs/reference/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,37 @@ tracker.stop()

CodeCarbon writes a final report named `boamps_report_<run_id>.json` in `output_dir`.

If you need to enrich the report with task metadata, datasets, or publisher information, use `BoAmpsOutput` directly through `output_handlers` or start from [examples/boamps_output.py](https://github.com/mlco2/codecarbon/blob/master/examples/boamps_output.py).
If you need to enrich the report with task metadata, datasets, or publisher information,
use the generic `metadata` parameter and put the BoAmps context under the `boamps` key:

```python-skip
from codecarbon import OfflineEmissionsTracker, OutputMethoddocs/reference/output.md

tracker = OfflineEmissionsTracker(
project_name="my_project",
country_iso_code="USA",
output_methods=[OutputMethod.BOAMPS],
metadata={
"boamps": {
"task": {
"taskStage": "training",
"taskFamily": "classification",
"algorithms": ["random_forest"],
"dataset": "my_dataset",
},
"quality": "medium",
},
"my_other_metadata": {"owner": "ml-team"},
},
)
tracker.start()
# Your code here
tracker.stop()
```

`metadata` can also be a path to a JSON file (`metadata="metadata.json"`).
For backward compatibility, if `metadata` is a dict without a `boamps` key,
the full dict is interpreted as BoAmps metadata.

Sample output:
```json
Expand Down
11 changes: 11 additions & 0 deletions examples/boamps_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ def cpu_load_task(number):
force_mode_cpu_load=False,
log_level="debug",
output_methods=[OutputMethod.BOAMPS],
metadata={
"boamps": {
"task": {
"taskStage": "training",
"taskFamily": "classification",
"algorithms": ["random_forest"],
"dataset": "synthetic_cpu_benchmark",
},
"quality": "medium",
}
},
)
try:
tracker.start()
Expand Down
5 changes: 5 additions & 0 deletions tests/test_boamps_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,11 @@ def test_malformed_json_raises(self):
with self.assertRaises(json.JSONDecodeError):
BoAmpsOutput.from_file(path)

def test_from_dict_rejects_non_dict_input(self):
"""from_dict should fail fast when context is not a dictionary."""
with self.assertRaises(TypeError):
BoAmpsOutput.from_dict("not-a-dict")

def test_context_with_infrastructure_overrides(self):
"""Infrastructure fields from context file are applied as overrides."""
context = {
Expand Down
145 changes: 145 additions & 0 deletions tests/test_emissions_tracker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import shutil
import sys
Expand Down Expand Up @@ -235,6 +236,150 @@ def test_output_methods_boamps_adds_boamps_output_handler(
)
)

def test_output_methods_boamps_uses_metadata_boamps_section(
self,
mock_cli_setup,
mock_log_values,
mocked_get_gpu_details,
mocked_env_cloud_details,
mocked_is_gpu_details_available,
mocked_is_nvidia_system,
):
tracker = EmissionsTracker(
output_dir=self.temp_path,
output_handlers=[],
output_methods=[OutputMethod.BOAMPS],
metadata={
"boamps": {
"task": {
"taskStage": "training",
"taskFamily": "classification",
"algorithms": [{"algorithmType": "random_forest"}],
"dataset": [
{
"dataUsage": "input",
"dataType": "table",
"dataQuantity": 100,
}
],
},
"quality": "high",
},
"other": {"owner": "ml-team"},
},
)

boamps_handler = next(
handler
for handler in tracker._output_handlers
if isinstance(handler, BoAmpsOutput)
)
self.assertEqual(boamps_handler._quality, "high")
self.assertIsNotNone(boamps_handler._task)
self.assertEqual(boamps_handler._task.task_stage, "training")
self.assertEqual(boamps_handler._task.task_family, "classification")
self.assertEqual(
boamps_handler._task.algorithms[0].algorithm_type,
"random_forest",
)

def test_output_methods_boamps_metadata_back_compat_without_boamps_key(
self,
mock_cli_setup,
mock_log_values,
mocked_get_gpu_details,
mocked_env_cloud_details,
mocked_is_gpu_details_available,
mocked_is_nvidia_system,
):
tracker = EmissionsTracker(
output_dir=self.temp_path,
output_handlers=[],
output_methods=[OutputMethod.BOAMPS],
metadata={
"task": {
"taskStage": "inference",
"taskFamily": "chatbot",
},
"quality": "medium",
},
)

boamps_handler = next(
handler
for handler in tracker._output_handlers
if isinstance(handler, BoAmpsOutput)
)
self.assertEqual(boamps_handler._quality, "medium")
self.assertEqual(boamps_handler._task.task_stage, "inference")
self.assertEqual(boamps_handler._task.task_family, "chatbot")

def test_output_methods_boamps_reads_metadata_from_json_file(
self,
mock_cli_setup,
mock_log_values,
mocked_get_gpu_details,
mocked_env_cloud_details,
mocked_is_gpu_details_available,
mocked_is_nvidia_system,
):
metadata_path = self.temp_path / "metadata.json"
metadata_path.write_text(
json.dumps(
{
"boamps": {
"task": {
"taskStage": "training",
"taskFamily": "classification",
},
"quality": "high",
}
}
)
)

tracker = EmissionsTracker(
output_dir=self.temp_path,
output_handlers=[],
output_methods=[OutputMethod.BOAMPS],
metadata=str(metadata_path),
)

boamps_handler = next(
handler
for handler in tracker._output_handlers
if isinstance(handler, BoAmpsOutput)
)
self.assertEqual(boamps_handler._quality, "high")
self.assertEqual(boamps_handler._task.task_stage, "training")

def test_output_methods_boamps_falls_back_on_invalid_metadata_file(
self,
mock_cli_setup,
mock_log_values,
mocked_get_gpu_details,
mocked_env_cloud_details,
mocked_is_gpu_details_available,
mocked_is_nvidia_system,
):
metadata_path = self.temp_path / "bad_metadata.json"
metadata_path.write_text("{ invalid json")

tracker = EmissionsTracker(
output_dir=self.temp_path,
output_handlers=[],
output_methods=[OutputMethod.BOAMPS],
metadata=str(metadata_path),
)

boamps_handler = next(
handler
for handler in tracker._output_handlers
if isinstance(handler, BoAmpsOutput)
)
self.assertIsNone(boamps_handler._task)
self.assertIsNone(boamps_handler._quality)

def test_default_output_methods_is_csv(
self,
mock_cli_setup,
Expand Down
Loading