Skip to content

Commit 06bd147

Browse files
committed
Unit tests for ClinVar namespaces in variant data CSV export
1 parent f121ebe commit 06bd147

2 files changed

Lines changed: 141 additions & 0 deletions

File tree

tests/helpers/util/score_set.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,23 @@ def link_clinical_controls_to_mapped_variants(db, score_set):
225225
db.commit()
226226

227227

228+
def link_clinvar_control_to_mapped_variant(db, score_set):
229+
"""Link the seeded ClinVar clinical control (id=1) to the first mapped variant of a score set."""
230+
mapped_variants = db.scalars(
231+
select(MappedVariantDbModel)
232+
.join(VariantDbModel)
233+
.join(ScoreSetDbModel)
234+
.where(ScoreSetDbModel.urn == score_set["urn"])
235+
).all()
236+
237+
mapped_variants[0].clinical_controls.append(
238+
db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 1))
239+
)
240+
241+
db.add(mapped_variants[0])
242+
db.commit()
243+
244+
228245
def link_gnomad_variants_to_mapped_variants(db, score_set):
229246
mapped_variants = db.scalars(
230247
select(MappedVariantDbModel)

tests/routers/test_score_set.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
TEST_BIORXIV_IDENTIFIER,
3838
TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED,
3939
TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED,
40+
TEST_CLINVAR_CONTROL,
4041
TEST_CROSSREF_IDENTIFIER,
4142
TEST_EXPERIMENT_WITH_KEYWORD,
4243
TEST_GNOMAD_DATA_VERSION,
@@ -76,6 +77,7 @@
7677
create_seq_score_set_with_mapped_variants,
7778
create_seq_score_set_with_variants,
7879
link_clinical_controls_to_mapped_variants,
80+
link_clinvar_control_to_mapped_variant,
7981
link_gnomad_variants_to_mapped_variants,
8082
publish_score_set,
8183
)
@@ -3154,6 +3156,128 @@ def test_download_gnomad_file_in_variant_data_path(session, data_provider, clien
31543156
assert "gnomad.gnomad_af" in reader.fieldnames
31553157

31563158

3159+
def test_download_clinvar_namespace_in_variant_data_path(session, data_provider, client, setup_router_db, data_files):
3160+
"""ClinVar namespace returns clinical_significance and clinical_review_status columns with correct values."""
3161+
# The ClinVar control seeded in setup_router_db has db_version="11_2024", mapping to namespace clinvar.2024_11.
3162+
clinvar_namespace = "clinvar.2024_11"
3163+
experiment = create_experiment(client)
3164+
score_set = create_seq_score_set_with_mapped_variants(
3165+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
3166+
)
3167+
link_clinvar_control_to_mapped_variant(session, score_set)
3168+
3169+
with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
3170+
published_score_set = publish_score_set(client, score_set["urn"])
3171+
worker_queue.assert_called_once()
3172+
3173+
response = client.get(
3174+
f"/api/v1/score-sets/{published_score_set['urn']}/variants/data"
3175+
f"?namespaces={clinvar_namespace}&drop_na_columns=false"
3176+
)
3177+
assert response.status_code == 200
3178+
reader = csv.DictReader(StringIO(response.text))
3179+
assert f"{clinvar_namespace}.clinical_significance" in reader.fieldnames
3180+
assert f"{clinvar_namespace}.clinical_review_status" in reader.fieldnames
3181+
3182+
rows = list(reader)
3183+
# The first variant is linked to the ClinVar control; check its values.
3184+
assert rows[0][f"{clinvar_namespace}.clinical_significance"] == TEST_CLINVAR_CONTROL["clinical_significance"]
3185+
assert rows[0][f"{clinvar_namespace}.clinical_review_status"] == TEST_CLINVAR_CONTROL["clinical_review_status"]
3186+
# Other variants have no linked control for this version; they should be NA.
3187+
assert all(row[f"{clinvar_namespace}.clinical_significance"] == "NA" for row in rows[1:])
3188+
assert all(row[f"{clinvar_namespace}.clinical_review_status"] == "NA" for row in rows[1:])
3189+
3190+
3191+
def test_download_clinvar_namespace_with_no_matching_version(
3192+
session, data_provider, client, setup_router_db, data_files
3193+
):
3194+
"""When no controls match the requested ClinVar version, all rows return NA."""
3195+
# clinvar.2023_01 does not match the seeded control (11_2024), so all rows should be NA.
3196+
clinvar_namespace = "clinvar.2023_01"
3197+
experiment = create_experiment(client)
3198+
score_set = create_seq_score_set_with_mapped_variants(
3199+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
3200+
)
3201+
link_clinvar_control_to_mapped_variant(session, score_set)
3202+
3203+
with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
3204+
published_score_set = publish_score_set(client, score_set["urn"])
3205+
worker_queue.assert_called_once()
3206+
3207+
response = client.get(
3208+
f"/api/v1/score-sets/{published_score_set['urn']}/variants/data"
3209+
f"?namespaces={clinvar_namespace}&drop_na_columns=false"
3210+
)
3211+
assert response.status_code == 200
3212+
reader = csv.DictReader(StringIO(response.text))
3213+
assert f"{clinvar_namespace}.clinical_significance" in reader.fieldnames
3214+
assert f"{clinvar_namespace}.clinical_review_status" in reader.fieldnames
3215+
3216+
rows = list(reader)
3217+
assert all(row[f"{clinvar_namespace}.clinical_significance"] == "NA" for row in rows)
3218+
assert all(row[f"{clinvar_namespace}.clinical_review_status"] == "NA" for row in rows)
3219+
3220+
3221+
def test_download_multiple_clinvar_namespaces_in_variant_data_path(
3222+
session, data_provider, client, setup_router_db, data_files
3223+
):
3224+
"""Multiple ClinVar namespaces produce distinct column sets; only the matching version has real data."""
3225+
matching_ns = "clinvar.2024_11" # matches db_version="11_2024" seeded in setup_router_db
3226+
non_matching_ns = "clinvar.2023_01" # no controls with this version
3227+
experiment = create_experiment(client)
3228+
score_set = create_seq_score_set_with_mapped_variants(
3229+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
3230+
)
3231+
link_clinvar_control_to_mapped_variant(session, score_set)
3232+
3233+
with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
3234+
published_score_set = publish_score_set(client, score_set["urn"])
3235+
worker_queue.assert_called_once()
3236+
3237+
response = client.get(
3238+
f"/api/v1/score-sets/{published_score_set['urn']}/variants/data"
3239+
f"?namespaces={matching_ns}&namespaces={non_matching_ns}&drop_na_columns=false"
3240+
)
3241+
assert response.status_code == 200
3242+
reader = csv.DictReader(StringIO(response.text))
3243+
fieldnames = reader.fieldnames
3244+
# Both namespaces produce columns.
3245+
assert f"{matching_ns}.clinical_significance" in fieldnames
3246+
assert f"{matching_ns}.clinical_review_status" in fieldnames
3247+
assert f"{non_matching_ns}.clinical_significance" in fieldnames
3248+
assert f"{non_matching_ns}.clinical_review_status" in fieldnames
3249+
3250+
rows = list(reader)
3251+
# Matching version: first variant has data.
3252+
assert rows[0][f"{matching_ns}.clinical_significance"] == TEST_CLINVAR_CONTROL["clinical_significance"]
3253+
assert rows[0][f"{matching_ns}.clinical_review_status"] == TEST_CLINVAR_CONTROL["clinical_review_status"]
3254+
# Non-matching version: all rows are NA.
3255+
assert all(row[f"{non_matching_ns}.clinical_significance"] == "NA" for row in rows)
3256+
assert all(row[f"{non_matching_ns}.clinical_review_status"] == "NA" for row in rows)
3257+
3258+
3259+
def test_invalid_clinvar_namespace_returns_422(client, setup_router_db, data_files):
3260+
"""A clinvar namespace with an out-of-range month (13) is rejected with 422."""
3261+
experiment = create_experiment(client)
3262+
score_set = create_seq_score_set(client, experiment["urn"])
3263+
3264+
response = client.get(
3265+
f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=clinvar.2024_13"
3266+
)
3267+
assert response.status_code == 422
3268+
3269+
3270+
def test_unrecognized_namespace_returns_422(client, setup_router_db, data_files):
3271+
"""An entirely unrecognized namespace string is rejected with 422."""
3272+
experiment = create_experiment(client)
3273+
score_set = create_seq_score_set(client, experiment["urn"])
3274+
3275+
response = client.get(
3276+
f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=unknown_namespace"
3277+
)
3278+
assert response.status_code == 422
3279+
3280+
31573281
########################################################################################################################
31583282
# Fetching clinical controls and control options for a score set
31593283
########################################################################################################################

0 commit comments

Comments
 (0)