Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9259005
updating json-ld context file for wikidata and minor correction to th…
Feb 25, 2026
0641887
minor cleanup
Feb 25, 2026
ec0e464
clearing some individual test failures
Feb 25, 2026
b8dd9a1
uv based cleaning
Feb 25, 2026
778d6e6
fix to isa model to enable serialization of @id for key objects, fixi…
Mar 2, 2026
97127ba
fixes #615, more tests on rdf conversion
Mar 2, 2026
2daeaba
dropping offending iso8601-fail test, reinstating introspection.gql i…
Mar 2, 2026
cfc9574
Fixed json2jsonld, commented on failing test.
knirirr Mar 3, 2026
fe09bc6
"Fixed" test.
knirirr Mar 3, 2026
94f24af
Merge branch 'rdf-test' into rdf-test-mt
knirirr Mar 3, 2026
626671e
Fixed merge error.
knirirr Mar 3, 2026
531adfb
Merge pull request #616 from ISA-tools/rdf-test-mt
proccaserra Mar 6, 2026
8173fbb
sanitizing json2jsonld serialization and fixing failing tests resulti…
Mar 9, 2026
fa4b6a8
fixing import linting
Mar 9, 2026
e7668a1
Merge pull request #617 from ISA-tools/rdf-test-fix
proccaserra Mar 13, 2026
3e3a0a0
Merge pull request #614 from ISA-tools/rdf-test
proccaserra Mar 13, 2026
ea32cc5
allowing full uri doi and pmid to be used instead of strict validatio…
Mar 23, 2026
9e5539d
allowing full uri doi and pmid to be used instead of strict validatio…
Mar 23, 2026
d9df08a
fixing linting issues
Mar 23, 2026
adeb06f
minor edits/linting
Mar 23, 2026
593855f
clearing the uv run pytest error
Mar 23, 2026
09efcb8
addressing review comment on PR
Mar 23, 2026
ff96305
Mocked tests to avoid failing call to ontology service.
knirirr Mar 23, 2026
64274f4
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
Mar 23, 2026
c5e709e
attempting at addressing issue #581
Mar 23, 2026
1de0b83
addressing https://github.com/ISA-tools/isa-api/security/dependabot/58
Mar 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 289 additions & 0 deletions isa-cookbook/content/notebooks/ISA-MTBLS-curation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e77e7aaf-2576-4c66-be66-b2f4bc41ae7e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/oerc0042/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/fs/__init__.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
" __import__(\"pkg_resources\").declare_namespace(__name__) # type: ignore\n"
]
}
],
"source": [
"from isatools import isatab\n",
"from isatools.isajson import ISAJSONEncoder\n",
"from isatools.convert import isatab2json\n",
"from isatools import isajson\n",
"from rdflib import Graph\n",
"\n",
"import os\n",
"import json\n",
"import isatools"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f80b3857-d16f-43c4-8b9d-e3f20001c54c",
"metadata": {},
"outputs": [],
"source": [
"# Read back the ISA-Tab \n",
"\n",
"# MTBLS718_117783_compressed_files\n",
"\n",
"# MTBLS1820_135004_compressed_files \n",
"# MTBLS2289_155095_compressed_files\n",
"# MTBLS3563_133388_compressed_files\n",
"# MTBLS4381_183928_compressed_files\n",
"data_dir = './MTBLS4381_183928_compressed_files/'\n",
"\n",
"with open(os.path.join(data_dir, 'i_Investigation.txt')) as f:\n",
" loaded_investigation = isatab.load(f)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "311ab08b-1bcd-45bf-83ba-e5a3f8935d42",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Name: isatools\n",
"Version: 0.14.3\n",
"Summary: Metadata tracking tools help to manage an increasingly diverse set of life science, environmental and biomedical experiments\n",
"Home-page: \n",
"Author: ISA Infrastructure Team\n",
"Author-email: ISA Infrastructure Team <isatools@googlegroups.com>\n",
"License-Expression: CPAL-1.0\n",
"Location: /Users/oerc0042/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages\n",
"Requires: beautifulsoup4, biopython, chardet, flask, flask-sqlalchemy, graphene, graphql-core, iso8601, jinja2, jsonschema, lxml, mzml2isa, networkx, numpy, openpyxl, pandas, progressbar2, pytest-timeout, python-dateutil, pyyaml, rdflib, requests, ruff, setuptools, sqlalchemy\n",
"Required-by: \n"
]
}
],
"source": [
"!pip show isatools"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "84aedb36-5154-48d1-9d1b-62905c6465ae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"<p>Testing the effect of blue mussel ensilage as a novel cost-efficient protein feed source for Atlantic salmon feed. Fermentation of novel raw materials for fish feed is a new practice with little prior experience of direct relevance to aquaculture and fish farming. In Trial A, an alternative marine compound is included in the feed, which is based on seaweed. In the trial were multiple inclusion levels used to investigate the effect of seaweed inclusion in an aquaculture setting.</p><p><br></p><p><strong>HoloFood Trial A</strong> - <strong>seaweed-dose response</strong>&nbsp;is reported in the current study&nbsp;<a href='https://www.ebi.ac.uk/metabolights/MTBLS4381' rel='noopener noreferrer' target='_blank'><strong>MTBLS4381</strong></a>.</p><p><strong>HoloFood Trial B - blue mussel-dose response</strong>&nbsp;is reported in&nbsp;<a href='https://www.ebi.ac.uk/metabolights/MTBLS4382' rel='noopener noreferrer' target='_blank'><strong>MTBLS4382</strong></a>.</p><p><strong>HoloFood Trial C</strong> - <strong>blue mussel ensilage-dose response</strong>&nbsp;is reported in&nbsp;<a href='https://www.ebi.ac.uk/metabolights/MTBLS4384' rel='noopener noreferrer' target='_blank'><strong>MTBLS4384</strong></a>.</p><p><strong>HoloFood Trial D</strong> - <strong>fermented seaweed open water-dose response</strong>&nbsp;is reported in&nbsp;<a href='https://www.ebi.ac.uk/metabolights/MTBLS6733' rel='noopener noreferrer' target='_blank'><strong>MTBLS6733</strong></a>.</p><p><strong>HoloFood Trial 1/2/3</strong>&nbsp;is reported in&nbsp;<a href='https://www.ebi.ac.uk/metabolights/MTBLS6988' rel='noopener noreferrer' target='_blank'><strong>MTBLS6988</strong></a>.</p><p><br></p><p><strong>Linked cross omic data sets:</strong></p><p>Nucleic acid data associated with this study are available in the European Nucleotide Archive (ENA): accession number&nbsp;<a href='https://www.ebi.ac.uk/ena/browser/view/PRJEB43192' rel='noopener noreferrer' target='_blank'>PRJEB43192</a>.</p><p>Metagenomic data associated with this study are available from MGnify under the Super Study '<a href='https://www.ebi.ac.uk/metagenomics/super-studies/holofood' rel='noopener noreferrer' target='_blank'>holofood</a>'.</p>\""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loaded_investigation.studies[0].description"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "57f00977-5b9f-4d73-a15a-fbec0a2394c0",
"metadata": {},
"outputs": [],
"source": [
"validation_report = isatab.validate(open(os.path.join(data_dir, 'i_Investigation.txt')))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8f76d77c-877a-4152-93af-8284aff0a141",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'message': 'A required property is missing',\n",
" 'supplemental': 'A property value in Study PubMed ID of investigation file at column 1 is required',\n",
" 'code': 4003},\n",
" {'message': 'A required property is missing',\n",
" 'supplemental': 'A property value in Study Publication DOI of investigation file at column 1 is required',\n",
" 'code': 4003}]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"validation_report[\"errors\"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f849053b-c952-4396-b807-3265e3b53127",
"metadata": {},
"outputs": [],
"source": [
"isa_json = isatab2json.convert(data_dir,use_new_parser=True)\n",
" \n",
"with open(os.path.join(data_dir, 'isa.json'), 'w') as out_fp:\n",
" json.dump(isa_json, out_fp)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "308ee48c-31a5-4427-a305-17d20715ed0b",
"metadata": {},
"outputs": [],
"source": [
"from isatools.convert.json2jsonld import ISALDSerializer\n",
"from json import load"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f16c6ddf-786f-4391-8af3-e8394b83b958",
"metadata": {},
"outputs": [],
"source": [
"instance_path = os.path.join(data_dir, \"isa.json\")\n",
"\n",
"with open(instance_path, 'r') as instance_file:\n",
" instance = load(instance_file)\n",
" instance_file.close()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "638af7a1-76ed-4310-9bac-0df34e10430d",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'NoneType' object is not iterable",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m ontology = \u001b[33m\"\u001b[39m\u001b[33mwd\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m serializer = \u001b[43mISALDSerializer\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m serializer.set_ontology(ontology)\n\u001b[32m 5\u001b[39m serializer.set_instance(instance)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:29\u001b[39m, in \u001b[36mISALDSerializer.__init__\u001b[39m\u001b[34m(self, json_instance, ontology, combined)\u001b[39m\n\u001b[32m 27\u001b[39m \u001b[38;5;28mself\u001b[39m.ontology = ontology\n\u001b[32m 28\u001b[39m \u001b[38;5;28mself\u001b[39m._resolve_network()\n\u001b[32m---> \u001b[39m\u001b[32m29\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mset_instance\u001b[49m\u001b[43m(\u001b[49m\u001b[43mjson_instance\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:74\u001b[39m, in \u001b[36mISALDSerializer.set_instance\u001b[39m\u001b[34m(self, instance)\u001b[39m\n\u001b[32m 72\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(instance, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m (instance.startswith(\u001b[33m\"\u001b[39m\u001b[33mhttp://\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m instance.startswith(\u001b[33m\"\u001b[39m\u001b[33mhttps://\u001b[39m\u001b[33m\"\u001b[39m)):\n\u001b[32m 73\u001b[39m \u001b[38;5;28mself\u001b[39m.instance = json.loads(get(instance).text)\n\u001b[32m---> \u001b[39m\u001b[32m74\u001b[39m \u001b[38;5;28mself\u001b[39m.output = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_inject_ld\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmain_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:91\u001b[39m, in \u001b[36mISALDSerializer._inject_ld\u001b[39m\u001b[34m(self, schema_name, output, instance)\u001b[39m\n\u001b[32m 84\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 85\u001b[39m \u001b[33;03m:param schema_name: name of the schema\u001b[39;00m\n\u001b[32m 86\u001b[39m \u001b[33;03m:param output: the output to inject the ld attributes into\u001b[39;00m\n\u001b[32m 87\u001b[39m \u001b[33;03m:param instance: the json instance to get the fields\u001b[39;00m\n\u001b[32m 88\u001b[39m \u001b[33;03m:return:\u001b[39;00m\n\u001b[32m 89\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 90\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m.combined:\n\u001b[32m---> \u001b[39m\u001b[32m91\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_inject_ld_split\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minstance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 92\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 93\u001b[39m filename = \u001b[33m\"\u001b[39m\u001b[33m../resources/json-context/\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m/isa_\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m_allinone_context.jsonld\u001b[39m\u001b[33m\"\u001b[39m % (\u001b[38;5;28mself\u001b[39m.ontology, \u001b[38;5;28mself\u001b[39m.ontology)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.13.5/envs/py313isa/lib/python3.13/site-packages/isatools/convert/json2jsonld.py:117\u001b[39m, in \u001b[36mISALDSerializer._inject_ld_split\u001b[39m\u001b[34m(self, schema_name, output, instance, reference)\u001b[39m\n\u001b[32m 115\u001b[39m output[\u001b[33m\"\u001b[39m\u001b[33m@context\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mself\u001b[39m._get_context_url(reference)\n\u001b[32m 116\u001b[39m output[\u001b[33m\"\u001b[39m\u001b[33m@type\u001b[39m\u001b[33m\"\u001b[39m] = context_key\n\u001b[32m--> \u001b[39m\u001b[32m117\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43minstance\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 118\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfield\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mprops\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 119\u001b[39m \u001b[43m \u001b[49m\u001b[43mfield_props\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mprops\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfield\u001b[49m\u001b[43m]\u001b[49m\n",
"\u001b[31mTypeError\u001b[39m: 'NoneType' object is not iterable"
]
}
],
"source": [
"\n",
"ontology = \"wd\"\n",
"\n",
"serializer = ISALDSerializer(instance)\n",
"serializer.set_ontology(ontology)\n",
"serializer.set_instance(instance)\n",
"serializer.combined=True\n",
"jsonldcontent = serializer.output\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa01a6a3-0b78-455f-8052-3acc96a375ea",
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join(data_dir, 'isa-rdf.json'), 'w') as outld_fp:\n",
" json.dump(jsonldcontent, outld_fp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "93ade57e-9dc0-4305-b08b-b8e7e346cb32",
"metadata": {},
"outputs": [],
"source": [
"graph = Graph()\n",
"graph.parse(os.path.join(data_dir, 'isa-rdf.json'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df1b8017-96f9-4383-9c8f-dc503023a22f",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Graph g has {len(graph)} statements.\")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "eba2eecc-c9d8-45b8-b373-e3b0428bed8b",
"metadata": {},
"outputs": [],
"source": [
"# Write turtle file\n",
"rdf_path=os.path.join(data_dir, \"isa-rdf-\" + ontology + \"v3.ttl\")\n",
"with open(rdf_path, 'w') as rdf_file:\n",
" rdf_file.write(graph.serialize(format='turtle'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ed05d6f-c94f-443c-b4ec-d474b334cacd",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b41de0e2-25dc-42a3-b502-c1ca835d24cd",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install git+https://github.com/isa-tools/isa-api.git@develop"
"#!pip install git+https://github.com/isa-tools/isa-api.git@develop"
]
},
{
Expand Down Expand Up @@ -125,6 +125,14 @@
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7699e28-dac1-44f0-be46-5b8c732de381",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -143,7 +151,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down
Loading
Loading