Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions tests/nodenorm/by_issue/biothings/test_issue_10.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Test for https://github.com/biothings/NodeNormalizationAPI/issues/10
# Querying UNII:2ZM8CX04RZ with drug_chemical_conflate=True causes HTTP 500.
# The same query with drug_chemical_conflate=False succeeds.
import urllib.parse

import pytest
import requests

CURIE = "UNII:2ZM8CX04RZ"


def _post(nodenorm_url, drug_chemical_conflate):
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
return requests.post(
url,
json={"curies": [CURIE], "conflate": True, "drug_chemical_conflate": drug_chemical_conflate},
)


def _assert_ok(response, nodenorm_url, drug_chemical_conflate):
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
assert response.ok, (
f"POST {url} returned HTTP {response.status_code} for {CURIE} "
f"(drug_chemical_conflate={drug_chemical_conflate}): {response.text[:500]}"
)
result = response.json()
assert isinstance(result, dict), (
f"Expected a dict response, got {type(result).__name__}: {str(result)[:500]}"
)
assert CURIE in result, f"{CURIE} missing from response"


def test_without_drug_chemical_conflate(target_info):
"""UNII:2ZM8CX04RZ should succeed when drug_chemical_conflate=False."""
response = _post(target_info["NodeNormURL"], drug_chemical_conflate=False)
_assert_ok(response, target_info["NodeNormURL"], drug_chemical_conflate=False)


def test_with_drug_chemical_conflate(target_info):
"""UNII:2ZM8CX04RZ triggers HTTP 500 when drug_chemical_conflate=True."""
response = _post(target_info["NodeNormURL"], drug_chemical_conflate=True)
_assert_ok(response, target_info["NodeNormURL"], drug_chemical_conflate=True)
93 changes: 93 additions & 0 deletions tests/nodenorm/by_issue/biothings/test_issue_11.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Test for https://github.com/biothings/NodeNormalizationAPI/issues/11
# When querying UMLS:C0106127, CHEMBL.COMPOUND:CHEMBL221542, and
# PUBCHEM.COMPOUND:222284 with drug_chemical_conflate=true, NodeNorm Redis
# returned 146 entries in equivalent_identifiers while NodeNorm ES returned 32.
# The Redis result contained the same identifier (e.g. CAS:76772-70-8) up to
# six times. Regardless of which backend is serving the request, the
# equivalent_identifiers list for each normalized node must not contain
# duplicate identifiers.
import urllib.parse

import pytest
import requests

CURIES = [
"UMLS:C0106127",
"CHEMBL.COMPOUND:CHEMBL221542",
"PUBCHEM.COMPOUND:222284",
]

# Expected number of unique CURIEs in equivalent_identifiers for each setting,
# taken from the issue (17 without conflate on both backends, 32 with conflate
# on ES, and 32 unique on Redis once the duplicates are removed).
EXPECTED_UNIQUE_COUNTS = {False: 17, True: 32}


def _post(nodenorm_url, curie, drug_chemical_conflate):
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
return requests.post(
url,
json={
"curies": [curie],
"conflate": True,
"drug_chemical_conflate": drug_chemical_conflate,
},
)


def _format_identifier_list(counts: dict[str, int]) -> str:
lines = []
for i, (ident, count) in enumerate(counts.items(), 1):
repeat = f" [x{count} DUPLICATE]" if count > 1 else ""
lines.append(f" {i:3d}. {ident}{repeat}")
return "\n".join(lines)


@pytest.mark.parametrize("curie", CURIES)
@pytest.mark.parametrize("drug_chemical_conflate", [False, True])
def test_equivalent_identifiers(target_info, curie, drug_chemical_conflate):
"""equivalent_identifiers must contain each identifier at most once,
and the unique count must match the values reported in the issue."""
nodenorm_url = target_info["NodeNormURL"]
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")

response = _post(nodenorm_url, curie, drug_chemical_conflate=drug_chemical_conflate)
assert response.ok, (
f"POST {url} returned HTTP {response.status_code} for {curie!r} "
f"(drug_chemical_conflate={drug_chemical_conflate}): {response.text[:500]}"
)

result = response.json()
assert isinstance(result, dict), (
f"Expected a dict response, got {type(result).__name__}: {str(result)[:500]}"
)

node = result.get(curie)
assert isinstance(node, dict), (
f"{curie} returned null with drug_chemical_conflate={drug_chemical_conflate}"
)

equiv = node.get("equivalent_identifiers", [])
identifiers = [entry["identifier"] for entry in equiv]
counts: dict[str, int] = {}
for ident in identifiers:
counts[ident] = counts.get(ident, 0) + 1

identifier_list = _format_identifier_list(counts)
context = (
f"{curie} (drug_chemical_conflate={drug_chemical_conflate}): "
f"{len(identifiers)} total, {len(counts)} unique\n{identifier_list}"
)

duplicates = sorted(ident for ident, count in counts.items() if count > 1)
assert not duplicates, (
f"{curie} (drug_chemical_conflate={drug_chemical_conflate}) returned "
f"{len(identifiers)} equivalent_identifiers with {len(duplicates)} duplicated "
f"identifiers: {duplicates}\n{context}"
)

expected_unique = EXPECTED_UNIQUE_COUNTS[drug_chemical_conflate]
assert len(counts) == expected_unique, (
f"{curie} (drug_chemical_conflate={drug_chemical_conflate}) returned "
f"{len(counts)} unique equivalent_identifiers, expected {expected_unique}\n{context}"
)
128 changes: 128 additions & 0 deletions tests/nodenorm/by_issue/biothings/test_issue_14.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Test for https://github.com/biothings/NodeNormalizationAPI/issues/14
# NodeNorm ES returns HTTP 500 when queried with conflate=False and
# drug_chemical_conflate=True for a list that contains both CHEBI:17310 and
# DRUGBANK:DB00058. Bisection confirmed this pair is the minimal reproducer;
# neither CURIE fails on its own.
import urllib.parse

import pytest
import requests

# The two CURIEs that together trigger the 500 under drug_chemical_conflate=True.
FAILING_PAIR = ["CHEBI:17310", "DRUGBANK:DB00058"]

ALL_CURIES = [
"PUBCHEM.COMPOUND:5288464", "CHEBI:18332", "CHEBI:29678", "UNII:EVS87XF13W",
"CHEBI:134990", "CHEBI:4027", "CHEBI:65329", "CHEBI:33007", "CHEBI:27584",
"CHEBI:7956", "CHEBI:33364", "CHEBI:28299", "MESH:C000717949", "CHEBI:65408",
"DRUGBANK:DB10510", "CHEBI:192408", "CHEBI:17992", "CHEBI:68656", "MESH:C545474",
"CHEBI:3750", "CHEBI:16240", "CHEBI:34440", "CHEBI:28201", "CHEBI:5132",
"MESH:C097284", "CHEBI:84002", "CHEBI:50867", "CHEBI:41607", "CHEBI:39168",
"CHEBI:33341", "CHEBI:51450", "CHEBI:28757", "CHEBI:87235", "CHEBI:17241",
"CHEBI:37527", "CHEBI:3093", "CHEBI:17574", "PUBCHEM.COMPOUND:16130978",
"CHEBI:28741", "CHEBI:10106", "CHEBI:22652", "UNII:RX077P88RY", "CHEBI:77543",
"CHEBI:2504", "CHEBI:16335", "CHEBI:23414", "CHEBI:27470", "MESH:D014028",
"CHEBI:27214", "CHEBI:59331", "CHEBI:75142", "CHEBI:15365", "CHEBI:3360",
"CHEBI:28939", "CHEBI:16664", "CHEBI:73755", "CHEBI:30513", "CHEBI:138000",
"CHEBI:18145", "PUBCHEM.COMPOUND:95335", "CHEBI:46195", "CHEBI:27568",
"MESH:C000628730", "CHEBI:47807", "CHEBI:34202", "CHEBI:35591", "CHEBI:46677",
"CHEBI:41879", "CHEBI:15366", "CHEBI:2930", "CHEBI:310312", "UNII:Z41TGB4080",
"CHEBI:52726", "CHEBI:17303", "CHEBI:42355", "CHEBI:16973", "CHEBI:28420",
"CHEBI:72544", "CHEBI:9381", "CHEBI:32506", "DRUGBANK:DB10710", "UNII:641ILF0QGZ",
"CHEBI:230644", "CHEBI:4034", "CHEBI:30563", "CHEBI:52492", "UNII:0YO8J06WCR",
"CHEBI:28984", "CHEBI:38705", "CHEBI:61058", "PUBCHEM.COMPOUND:21897015",
"CHEBI:29014", "CHEBI:254496", "CHEBI:2567", "CHEBI:16236", "PUBCHEM.COMPOUND:16014",
"CHEBI:95082", "CHEBI:28262", "CHEBI:27902", "CHEBI:191396", "CHEBI:17243",
"CHEBI:17941", "CHEBI:34873", "UNII:HS813P8QPX", "CHEBI:15379", "CHEBI:48843",
"CHEBI:4470", "CHEBI:8665", "CHEBI:27432", "CHEBI:17747", "CHEBI:26214",
"CHEBI:18227", "CHEBI:17158", "CHEBI:4356", "CHEBI:2922", "CHEBI:28619",
"CHEBI:41774", "CHEBI:17588", "CHEBI:50924", "CHEBI:25322", "CHEBI:22689",
"MESH:D012906", "CHEBI:29287", "CHEBI:8346", "CHEBI:229916", "CHEBI:30621",
"CHEBI:16164", "CHEBI:28119", "CHEBI:30314", "CHEBI:16856", "CHEBI:15971",
"UNII:97PTR2F3Z8", "CHEBI:30114", "CHEBI:16393", "CHEBI:34696", "CHEBI:27385",
"CHEBI:6030", "CHEBI:17549", "CHEBI:64317", "CHEBI:81543", "CHEBI:29866",
"CHEBI:47458", "CHEBI:6078", "CHEBI:34317", "CHEBI:15571", "MESH:C000602261",
"CHEBI:91541", "CHEBI:28260", "CHEBI:1367", "CHEBI:6801", "CHEBI:15756",
"UNII:9G2MP84A8W", "CHEBI:15930", "CHEBI:3962", "CHEBI:34631", "CHEBI:33216",
"CHEBI:30512", "CHEBI:25944", "UNII:24R8721A3S", "CHEBI:35455", "MESH:D013870",
"CHEBI:39170", "CHEBI:3478", "CHEBI:43602", "CHEBI:39176", "CHEBI:34911",
"PUBCHEM.COMPOUND:125922", "CHEBI:32509", "CHEBI:16469", "CHEBI:29035", "CHEBI:7719",
"CHEBI:68642", "CHEBI:25681", "CHEBI:38157", "CHEBI:230671", "CHEBI:88522",
"CHEBI:50122", "DRUGBANK:DB10543", "CHEBI:17051", "CHEBI:34687", "CHEBI:34887",
"CHEBI:28112", "DRUGBANK:DB09337", "CHEBI:5001", "CHEBI:35255", "CHEBI:17688",
"DRUGBANK:DB11010", "CHEBI:75958", "CHEBI:32234", "CHEBI:9495", "CHEBI:5864",
"CHEBI:18721", "CHEBI:16069", "CHEBI:17276", "CHEBI:6842", "CHEBI:27789",
"CHEBI:9288", "CHEBI:28125", "CHEBI:44185", "CHEBI:15948", "CHEBI:4042",
"CHEBI:17230", "PUBCHEM.COMPOUND:9904141", "CHEBI:33262", "CHEBI:5131",
"NCBIGene:2876", "CHEBI:34680", "MESH:C474021", "CHEBI:31653", "CHEBI:17996",
"CHEBI:44445", "CHEBI:81760", "DRUGBANK:DB10429", "CHEBI:28786", "CHEBI:39483",
"MESH:D004041", "CHEBI:3175", "PUBCHEM.COMPOUND:122360683", "CHEBI:27899",
"CHEBI:25805", "CHEBI:48095", "CHEBI:17846", "CHEBI:17310", "DRUGBANK:DB00058",
"CHEBI:27732", "CHEBI:17895", "CHEBI:95089", "CHEBI:30136", "PUBCHEM.COMPOUND:9855813",
"DRUGBANK:DB14377", "CHEBI:22977", "MESH:D019443", "CHEBI:3558", "CHEBI:26523",
"CHEBI:305790", "CHEBI:92360", "CHEBI:144804", "CHEBI:15367", "CHEBI:47808",
"CHEBI:28854", "CHEBI:2663", "CHEBI:22586", "CHEBI:25016", "CHEBI:64163",
"CHEBI:32599", "CHEBI:4791", "CHEBI:27958", "CHEBI:17234", "CHEBI:86368",
"CHEBI:2962", "CHEBI:31348", "CHEBI:28177", "CHEBI:16347", "CHEBI:27744",
"CHEBI:6129", "CHEBI:9249", "CHEBI:30785", "CHEBI:6809", "CHEBI:75955",
"MESH:D001335", "CHEBI:27856", "CHEBI:26401", "CHEBI:16482", "CHEBI:83732",
"CHEBI:31823", "CHEBI:6908", "CHEBI:6605", "PUBCHEM.COMPOUND:3018355", "CHEBI:32497",
"CHEBI:31746", "CHEBI:4806", "CHEBI:16412", "CHEBI:34682", "MESH:D007461",
"CHEBI:39185", "CHEBI:24757", "CHEBI:3179", "CHEBI:28748", "CHEBI:50594",
"PUBCHEM.COMPOUND:3467", "CHEBI:9753", "DRUGBANK:DB13961", "CHEBI:39285",
"MESH:C509867", "CHEBI:34372", "CHEBI:52497", "CHEBI:4903", "CHEBI:8113",
"CHEBI:28694", "DRUGBANK:DB14242", "CHEBI:166520", "MESH:D052638", "CHEBI:641",
"MESH:D000336", "CHEBI:145499", "CHEBI:16908", "CHEBI:28216", "CHEBI:63317",
"CHEBI:73169", "CHEBI:9150", "MESH:C000612485", "CHEBI:37825", "CHEBI:8069",
"CHEBI:6437", "CHEBI:7494", "CHEBI:29865", "CHEBI:28842", "CHEBI:44658",
"CHEBI:38221", "CHEBI:9943", "CHEBI:16796", "CHEBI:82538", "CHEBI:63933",
"CHEBI:16480", "DRUGBANK:DB10575", "CHEBI:46345",
]

REMAINING_CURIES = [c for c in ALL_CURIES if c not in FAILING_PAIR]


def _post(nodenorm_url, curies):
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
return requests.post(
url,
json={"curies": curies, "conflate": False, "drug_chemical_conflate": True},
)


def _assert_ok(response, url, curies):
assert response.ok, (
f"POST {url} returned HTTP {response.status_code} for {curies}: "
f"{response.text[:500]}"
)
result = response.json()
assert isinstance(result, dict), (
f"Expected a dict response, got {type(result).__name__}: {str(result)[:500]}"
)
missing = [c for c in curies if c not in result]
assert not missing, f"{len(missing)} CURIEs missing from response: {missing}"


def test_all_curies_except_failing_pair(target_info):
"""All CURIEs from the original report, minus the known-bad pair, should succeed."""
nodenorm_url = target_info["NodeNormURL"]
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
response = _post(nodenorm_url, REMAINING_CURIES)
_assert_ok(response, url, REMAINING_CURIES)


def test_failing_pair(target_info):
"""CHEBI:17310 and DRUGBANK:DB00058 together trigger HTTP 500 — minimal reproducer."""
nodenorm_url = target_info["NodeNormURL"]
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
response = _post(nodenorm_url, FAILING_PAIR)
_assert_ok(response, url, FAILING_PAIR)


@pytest.mark.parametrize("curie", ALL_CURIES)
def test_individual_curie(target_info, curie):
"""Every CURIE from the original report should succeed when queried on its own."""
nodenorm_url = target_info["NodeNormURL"]
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
response = _post(nodenorm_url, [curie])
_assert_ok(response, url, [curie])
53 changes: 53 additions & 0 deletions tests/nodenorm/by_issue/biothings/test_pending_api_338.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Test for https://github.com/biothings/pending.api/issues/338
# NodeNorm returns null for NCBITaxon:2 when queried alongside MESH:C029371,
# but returns it correctly alongside MESH:D008795. The bug triggers only when
# the two CURIEs are in the same request; each CURIE succeeds on its own.
import urllib.parse

import pytest
import requests

TRIGGERING_PAIR = ["MESH:C029371", "NCBITaxon:2"]
WORKING_PAIR = ["MESH:D008795", "NCBITaxon:2"]
PARAMS = {"conflate": True, "description": False, "drug_chemical_conflate": False}


def _post(nodenorm_url, curies):
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
return requests.post(url, json={"curies": curies, **PARAMS})


def _assert_ok(response, nodenorm_url, curies):
url = urllib.parse.urljoin(nodenorm_url, "get_normalized_nodes")
assert response.ok, (
f"POST {url} returned HTTP {response.status_code} for {curies}: "
f"{response.text[:500]}"
)
result = response.json()
assert isinstance(result, dict), (
f"Expected a dict response, got {type(result).__name__}: {str(result)[:500]}"
)
null_curies = [c for c in curies if result.get(c) is None]
assert not null_curies, f"CURIEs returned as null: {null_curies}"


def test_working_pair(target_info):
"""MESH:D008795 and NCBITaxon:2 together should both be non-null (positive control)."""
nodenorm_url = target_info["NodeNormURL"]
response = _post(nodenorm_url, WORKING_PAIR)
_assert_ok(response, nodenorm_url, WORKING_PAIR)


def test_triggering_pair(target_info):
"""MESH:C029371 alongside NCBITaxon:2 causes NCBITaxon:2 to return null."""
nodenorm_url = target_info["NodeNormURL"]
response = _post(nodenorm_url, TRIGGERING_PAIR)
_assert_ok(response, nodenorm_url, TRIGGERING_PAIR)


@pytest.mark.parametrize("curie", sorted(set(TRIGGERING_PAIR + WORKING_PAIR)))
def test_individual_curie(target_info, curie):
"""Each CURIE should succeed when queried on its own."""
nodenorm_url = target_info["NodeNormURL"]
response = _post(nodenorm_url, [curie])
_assert_ok(response, nodenorm_url, [curie])
12 changes: 9 additions & 3 deletions tests/nodenorm/by_issue/test_nodenorm_229.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
# This is a test for https://github.com/TranslatorSRI/NodeNormalization/issues/229

import pytest
import requests
import urllib


def test_nodenorm_229(target_info):
nodenorm_url = target_info["NodeNormURL"]

# We can't test this unless NodeNorm has a /query endpoint.
query_url = urllib.parse.urljoin(nodenorm_url, "query")
response = requests.head(query_url)
if response.status_code == 404:
pytest.skip(f"NodeNorm {nodenorm_url} not have a /query endpoint, cannot test issue #229")
return

input_json = {
"message": {
"query_graph": {
Expand Down Expand Up @@ -498,8 +505,7 @@ def test_nodenorm_229(target_info):
},
}

url = urllib.parse.urljoin(nodenorm_url, "query")
response = requests.post(url, json=input_json)
response = requests.post(query_url, json=input_json)
assert response.ok, f"Could not POST test content to {url}: {response.json()}"
actual_output = response.json()

Expand Down
35 changes: 26 additions & 9 deletions tests/nodenorm/test_nodenorm_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# These tests are intended to ensure that all the API endpoints on NodeNorm are working as intended.
#
import urllib.parse
import logging

import pytest
import requests
Expand All @@ -13,14 +14,30 @@
def test_openapi_json(target_info):
nodenorm_url = target_info['NodeNormURL']

url = urllib.parse.urljoin(nodenorm_url, 'openapi.json')
response = requests.get(url)
assert response.ok, f"Could not GET {url}: {response}"
# NodeNorm keeps its openapi.json at /openapi.json, but
# NodeNorm ES keeps it at /webapp/openapi.json -- so we should check both.

openapi_json = response.json()
assert openapi_json['info']['x-translator']['infores'] == 'infores:sri-node-normalizer'
openapi_paths = [
'openapi.json',
'webapp/openapi.json',
]

try:
validate_url(url)
except OpenAPIValidationError as e:
pytest.fail(f"Could not validate OpenAPI at {url}: {e}")
flag_test_passed = False
for openapi_path in openapi_paths:
url = urllib.parse.urljoin(nodenorm_url, openapi_path)
response = requests.get(url)
if not response.ok:
logging.warning(f"Could not GET {url}: {response}")
continue

openapi_json = response.json()
assert openapi_json['info']['x-translator']['infores'] == 'infores:sri-node-normalizer'

try:
validate_url(url)
flag_test_passed = True
except OpenAPIValidationError as e:
pytest.fail(f"Could not validate OpenAPI at {url}: {e}")

if not flag_test_passed:
pytest.fail(f"Could not validate OpenAPI on NodeNorm {nodenorm_url} at any of the expected URLs: {openapi_paths}")
Loading
Loading