Skip to content
Merged
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ license-files = ["LICENSE-MIT.md"]
requires-python = ">=3.12"
dependencies = [
"polars>=1.36.0",
"python-dotenv>=1.2.2",
"requests>=2.32.5",
"seedcase-sprout>=0.49.0",
]

Expand All @@ -36,5 +38,6 @@ dev = [
"commitizen>=4.10.0",
"pre-commit>=4.5.0",
"ruff>=0.14.8",
"types-requests>=2.32.4.20260107",
"typos>=1.40.0",
]
6 changes: 6 additions & 0 deletions scripts/__init__.py
Comment thread
martonvago marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
"""Processing and build pipeline scripts."""

from .redcap import save_data_dict

__all__ = [
"save_data_dict",
]
Comment thread
martonvago marked this conversation as resolved.
Outdated
34 changes: 34 additions & 0 deletions scripts/redcap.py
Comment thread
martonvago marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import json
import os
from pathlib import Path

import requests
from dotenv import load_dotenv

load_dotenv()
Comment thread
lwjohnst86 marked this conversation as resolved.


def get_data_dict_from_redcap() -> dict[str, str]:
"""Gets the data dictionary from REDCap."""
token = os.environ.get("REDCAP_TOKEN")
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, the token has CPH_API_KEY in the name on GenomeDK. Should we keep the reference to CPH? I went with a more general name here.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for the generic code we should use the more general name, for the implementation of this code on ON LiMiT Feasibility we'll need to know that this is the code for the instance on REDCap in Copenhagen. Later there will be tokens for Copenhagen, Aarhus and Odense.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will all the locations be in the same repo or different repos?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the REDCap locations will pull into this one Data Package repo, as far I know.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay! The metadata will be exactly the same, right (it's enough to get it from one location)? Then I will change the token name back to the CPH-specific one.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For feasibility the only instance that we will be pulling 'real' data from is the Copenhagen one, but for testing we should be able to use the Aarhus version. I'm not sure we should be testing the data transfer with real data to start with. It would have been easier if we had some time to run tests on the system in Cph before we went into production, hopefully we'll have that for the main study.

The main study is still being designed, but it does look like we'll run several REDCap instances on the Aarhus server, each with slightly different data dictionaries...

if not token:
raise RuntimeError("REDCAP_TOKEN environment variable is not set.")

data = {
"token": token,
"content": "metadata",
"format": "json",
"returnFormat": "json",
}
response = requests.post("https://redcap.au.dk/api/", data=data, timeout=30)
Comment thread
martonvago marked this conversation as resolved.
Outdated
response.raise_for_status()
return response.json()


def save_data_dict():
"""Saves the data dictionary from REDCap to `scripts/data_dictionary.json`."""
data_dict = get_data_dict_from_redcap()
file_path = Path("scripts") / "data_dictionary.json"
file_path.parent.mkdir(exist_ok=True)
with open(file_path, "w") as f:
json.dump(data_dict, f, indent=2, ensure_ascii=False)
Loading