Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .codeocean/secrets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": 1,
"secrets": [
{
"type": "api-key",
"id": "UZZM1usjDUeETAGx",
"description": "Code Ocean API",
"key": "API_KEY",
"secret": "API_SECRET"
}
]
}
71 changes: 61 additions & 10 deletions code/02_update_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@
any schema violation raises and aborts the run.
"""

import base64
import json
import os
import shutil
import urllib.error
import urllib.request
from datetime import datetime, timezone
from pathlib import Path

from aind_data_schema.components.identifiers import Code
from aind_data_schema.components.identifiers import Code, DataAsset
from aind_data_schema.core.data_description import DataDescription
from aind_data_schema.core.processing import DataProcess, ProcessStage, Processing
from aind_data_schema_models.process_names import ProcessName
Expand All @@ -26,10 +31,55 @@
DATA_DIR = Path("/data")
EXPERIMENTERS = ["Polina Kosillo"]

# Code Ocean capsule URL recorded in processing.json. Hardcoded to the released
# capsule so the published metadata always references the canonical capsule,
# regardless of which working/test capsule is running this script.
CAPSULE_URL = "https://codeocean.allenneuraldynamics.org/capsule/3953531/tree"
CO_API_BASE = "https://codeocean.allenneuraldynamics.org/api/v1"
CO_WEB_BASE = "https://codeocean.allenneuraldynamics.org/capsule"
AIND_OPEN_DATA_BUCKET = "s3://aind-open-data"


def fetch_co_provenance() -> tuple[str, str]:
"""Return (capsule_url, version) for the running Code Ocean capsule.

Calls the Code Ocean REST API at runtime to look up the capsule's web URL
(built from the slug) and the release version of this run. ``version``
is ``"from non-release editable capsule"`` when running an editable
(non-release) capsule; otherwise it's a string like ``"v3.0"``.

Requires the "Code Ocean API Credentials" Secret to be attached to the
capsule (Capsule Settings -> Credentials), which exposes API_KEY at
runtime. Raises RuntimeError if any required env var is missing or the
API call fails -- the conversion can't produce valid metadata without
the capsule URL.
"""
api_key = os.environ.get("API_KEY")
capsule_id = os.environ.get("CO_CAPSULE_ID")
computation_id = os.environ.get("CO_COMPUTATION_ID")
if not api_key or not capsule_id or not computation_id:
raise RuntimeError(
"Missing Code Ocean env vars (API_KEY / CO_CAPSULE_ID / "
"CO_COMPUTATION_ID). Attach the 'Code Ocean API Credentials' "
"Secret to the capsule (Capsule Settings -> Credentials)."
)

auth = base64.b64encode(f"{api_key}:".encode()).decode()
headers = {"Authorization": f"Basic {auth}"}

def _get(path: str) -> dict:
req = urllib.request.Request(f"{CO_API_BASE}{path}", headers=headers)
with urllib.request.urlopen(req, timeout=10) as resp:
return json.loads(resp.read())

try:
capsule = _get(f"/capsules/{capsule_id}")
computation = _get(f"/computations/{computation_id}")
except (urllib.error.URLError, json.JSONDecodeError) as e:
raise RuntimeError(f"Code Ocean API call failed: {e}") from e

capsule_url = f"{CO_WEB_BASE}/{capsule['slug']}/tree"
if "version" in computation:
version = f"v{computation['version']}.0"
else:
version = "from non-release editable capsule"
return capsule_url, version


def find_output_dirs() -> list[Path]:
Expand Down Expand Up @@ -74,13 +124,16 @@ def write_data_description(input_dir: Path, out_dir: Path) -> None:
print(f" wrote derived data_description.json (source: {raw_dd.name})")


def write_processing(out_dir: Path) -> None:
def write_processing(out_dir: Path, input_asset: str) -> None:
"""Build and write a processing.json describing this conversion run."""
capsule_url, version = fetch_co_provenance()
code = Code(
url=CAPSULE_URL,
url=capsule_url,
name="LC-NE_BARseq_MAT-RDS_conversion",
version=version,
run_script=Path("code/run"),
language="R",
input_data=[DataAsset(url=f"{AIND_OPEN_DATA_BUCKET}/{input_asset}")],
)
process = DataProcess(
process_type=ProcessName.FILE_FORMAT_CONVERSION,
Expand All @@ -106,13 +159,11 @@ def update_metadata_for_subject(out_dir: Path) -> None:

copy_peer_metadata(input_dir, out_dir)
write_data_description(input_dir, out_dir)
write_processing(out_dir)
write_processing(out_dir, input_asset)


def main() -> None:
"""Discover output folders under /results/ and generate metadata for each."""
print(f"Capsule URL: {CAPSULE_URL}\n")

out_dirs = find_output_dirs()
if not out_dirs:
raise RuntimeError(f"No output directories found under {RESULTS_DIR}")
Expand Down