From 46078d78f4a44717591bcced9480f0ceb41aeac0 Mon Sep 17 00:00:00 2001 From: Doug Ollerenshaw Date: Wed, 20 May 2026 14:25:45 -0700 Subject: [PATCH 1/3] Changed update_metadata to auto-capture provenance --- code/02_update_metadata.py | 68 ++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/code/02_update_metadata.py b/code/02_update_metadata.py index e4abf97..ffd8285 100644 --- a/code/02_update_metadata.py +++ b/code/02_update_metadata.py @@ -12,11 +12,16 @@ any schema violation raises and aborts the run. """ +import base64 +import json +import os import shutil +import urllib.error +import urllib.request from datetime import datetime, timezone from pathlib import Path -from aind_data_schema.components.identifiers import Code +from aind_data_schema.components.identifiers import Code, DataAsset from aind_data_schema.core.data_description import DataDescription from aind_data_schema.core.processing import DataProcess, ProcessStage, Processing from aind_data_schema_models.process_names import ProcessName @@ -26,10 +31,52 @@ DATA_DIR = Path("/data") EXPERIMENTERS = ["Polina Kosillo"] -# Code Ocean capsule URL recorded in processing.json. Hardcoded to the released -# capsule so the published metadata always references the canonical capsule, -# regardless of which working/test capsule is running this script. -CAPSULE_URL = "https://codeocean.allenneuraldynamics.org/capsule/3953531/tree" +CO_API_BASE = "https://codeocean.allenneuraldynamics.org/api/v1" +CO_WEB_BASE = "https://codeocean.allenneuraldynamics.org/capsule" +AIND_OPEN_DATA_BUCKET = "s3://aind-open-data" + + +def fetch_co_provenance() -> tuple[str, str | None]: + """Return (capsule_url, version) for the running Code Ocean capsule. + + Calls the Code Ocean REST API at runtime to look up the capsule's web URL + (built from the slug) and the release version of this run. ``version`` is + None when running an editable (non-release) capsule. The capsule URL is + always set on success. + + Requires the "Code Ocean API Credentials" Secret to be attached to the + capsule (Capsule Settings -> Credentials), which exposes API_KEY at + runtime. Raises RuntimeError if any required env var is missing or the + API call fails -- the conversion can't produce valid metadata without + the capsule URL. + """ + api_key = os.environ.get("API_KEY") + capsule_id = os.environ.get("CO_CAPSULE_ID") + computation_id = os.environ.get("CO_COMPUTATION_ID") + if not api_key or not capsule_id or not computation_id: + raise RuntimeError( + "Missing Code Ocean env vars (API_KEY / CO_CAPSULE_ID / " + "CO_COMPUTATION_ID). Attach the 'Code Ocean API Credentials' " + "Secret to the capsule (Capsule Settings -> Credentials)." + ) + + auth = base64.b64encode(f"{api_key}:".encode()).decode() + headers = {"Authorization": f"Basic {auth}"} + + def _get(path: str) -> dict: + req = urllib.request.Request(f"{CO_API_BASE}{path}", headers=headers) + with urllib.request.urlopen(req, timeout=10) as resp: + return json.loads(resp.read()) + + try: + capsule = _get(f"/capsules/{capsule_id}") + computation = _get(f"/computations/{computation_id}") + except (urllib.error.URLError, json.JSONDecodeError) as e: + raise RuntimeError(f"Code Ocean API call failed: {e}") from e + + capsule_url = f"{CO_WEB_BASE}/{capsule['slug']}/tree" + version = f"v{computation['version']}.0" if "version" in computation else None + return capsule_url, version def find_output_dirs() -> list[Path]: @@ -74,13 +121,16 @@ def write_data_description(input_dir: Path, out_dir: Path) -> None: print(f" wrote derived data_description.json (source: {raw_dd.name})") -def write_processing(out_dir: Path) -> None: +def write_processing(out_dir: Path, input_asset: str) -> None: """Build and write a processing.json describing this conversion run.""" + capsule_url, version = fetch_co_provenance() code = Code( - url=CAPSULE_URL, + url=capsule_url, name="LC-NE_BARseq_MAT-RDS_conversion", + version=version, run_script=Path("code/run"), language="R", + input_data=[DataAsset(url=f"{AIND_OPEN_DATA_BUCKET}/{input_asset}")], ) process = DataProcess( process_type=ProcessName.FILE_FORMAT_CONVERSION, @@ -106,13 +156,11 @@ def update_metadata_for_subject(out_dir: Path) -> None: copy_peer_metadata(input_dir, out_dir) write_data_description(input_dir, out_dir) - write_processing(out_dir) + write_processing(out_dir, input_asset) def main() -> None: """Discover output folders under /results/ and generate metadata for each.""" - print(f"Capsule URL: {CAPSULE_URL}\n") - out_dirs = find_output_dirs() if not out_dirs: raise RuntimeError(f"No output directories found under {RESULTS_DIR}") From b3007c1a2f7d38bc12d152c59bc654e3c42dd143 Mon Sep 17 00:00:00 2001 From: Doug Ollerenshaw Date: Wed, 20 May 2026 15:15:48 -0700 Subject: [PATCH 2/3] Add text to 'version' indicating when asset comes from non-release capsule --- code/02_update_metadata.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/code/02_update_metadata.py b/code/02_update_metadata.py index ffd8285..3748dad 100644 --- a/code/02_update_metadata.py +++ b/code/02_update_metadata.py @@ -36,13 +36,13 @@ AIND_OPEN_DATA_BUCKET = "s3://aind-open-data" -def fetch_co_provenance() -> tuple[str, str | None]: +def fetch_co_provenance() -> tuple[str, str]: """Return (capsule_url, version) for the running Code Ocean capsule. Calls the Code Ocean REST API at runtime to look up the capsule's web URL - (built from the slug) and the release version of this run. ``version`` is - None when running an editable (non-release) capsule. The capsule URL is - always set on success. + (built from the slug) and the release version of this run. ``version`` + is ``"from non-release editable capsule"`` when running an editable + (non-release) capsule; otherwise it's a string like ``"v3.0"``. Requires the "Code Ocean API Credentials" Secret to be attached to the capsule (Capsule Settings -> Credentials), which exposes API_KEY at @@ -75,7 +75,10 @@ def _get(path: str) -> dict: raise RuntimeError(f"Code Ocean API call failed: {e}") from e capsule_url = f"{CO_WEB_BASE}/{capsule['slug']}/tree" - version = f"v{computation['version']}.0" if "version" in computation else None + if "version" in computation: + version = f"v{computation['version']}.0" + else: + version = "from non-release editable capsule" return capsule_url, version From 8c08041eafc56804206144cae475fafadfba0d40 Mon Sep 17 00:00:00 2001 From: dougollerenshaw Date: Wed, 20 May 2026 22:16:03 +0000 Subject: [PATCH 3/3] Added secrets.json --- .codeocean/secrets.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .codeocean/secrets.json diff --git a/.codeocean/secrets.json b/.codeocean/secrets.json new file mode 100644 index 0000000..dbaccaf --- /dev/null +++ b/.codeocean/secrets.json @@ -0,0 +1,12 @@ +{ + "version": 1, + "secrets": [ + { + "type": "api-key", + "id": "UZZM1usjDUeETAGx", + "description": "Code Ocean API", + "key": "API_KEY", + "secret": "API_SECRET" + } + ] +} \ No newline at end of file