|
37 | 37 | TEST_BIORXIV_IDENTIFIER, |
38 | 38 | TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, |
39 | 39 | TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, |
| 40 | + TEST_CLINVAR_CONTROL, |
40 | 41 | TEST_CROSSREF_IDENTIFIER, |
41 | 42 | TEST_EXPERIMENT_WITH_KEYWORD, |
42 | 43 | TEST_GNOMAD_DATA_VERSION, |
|
76 | 77 | create_seq_score_set_with_mapped_variants, |
77 | 78 | create_seq_score_set_with_variants, |
78 | 79 | link_clinical_controls_to_mapped_variants, |
| 80 | + link_clinvar_control_to_mapped_variant, |
79 | 81 | link_gnomad_variants_to_mapped_variants, |
80 | 82 | publish_score_set, |
81 | 83 | ) |
@@ -3154,6 +3156,128 @@ def test_download_gnomad_file_in_variant_data_path(session, data_provider, clien |
3154 | 3156 | assert "gnomad.gnomad_af" in reader.fieldnames |
3155 | 3157 |
|
3156 | 3158 |
|
| 3159 | +def test_download_clinvar_namespace_in_variant_data_path(session, data_provider, client, setup_router_db, data_files): |
| 3160 | + """ClinVar namespace returns clinical_significance and clinical_review_status columns with correct values.""" |
| 3161 | + # The ClinVar control seeded in setup_router_db has db_version="11_2024", mapping to namespace clinvar.2024_11. |
| 3162 | + clinvar_namespace = "clinvar.2024_11" |
| 3163 | + experiment = create_experiment(client) |
| 3164 | + score_set = create_seq_score_set_with_mapped_variants( |
| 3165 | + client, session, data_provider, experiment["urn"], data_files / "scores.csv" |
| 3166 | + ) |
| 3167 | + link_clinvar_control_to_mapped_variant(session, score_set) |
| 3168 | + |
| 3169 | + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: |
| 3170 | + published_score_set = publish_score_set(client, score_set["urn"]) |
| 3171 | + worker_queue.assert_called_once() |
| 3172 | + |
| 3173 | + response = client.get( |
| 3174 | + f"/api/v1/score-sets/{published_score_set['urn']}/variants/data" |
| 3175 | + f"?namespaces={clinvar_namespace}&drop_na_columns=false" |
| 3176 | + ) |
| 3177 | + assert response.status_code == 200 |
| 3178 | + reader = csv.DictReader(StringIO(response.text)) |
| 3179 | + assert f"{clinvar_namespace}.clinical_significance" in reader.fieldnames |
| 3180 | + assert f"{clinvar_namespace}.clinical_review_status" in reader.fieldnames |
| 3181 | + |
| 3182 | + rows = list(reader) |
| 3183 | + # The first variant is linked to the ClinVar control; check its values. |
| 3184 | + assert rows[0][f"{clinvar_namespace}.clinical_significance"] == TEST_CLINVAR_CONTROL["clinical_significance"] |
| 3185 | + assert rows[0][f"{clinvar_namespace}.clinical_review_status"] == TEST_CLINVAR_CONTROL["clinical_review_status"] |
| 3186 | + # Other variants have no linked control for this version; they should be NA. |
| 3187 | + assert all(row[f"{clinvar_namespace}.clinical_significance"] == "NA" for row in rows[1:]) |
| 3188 | + assert all(row[f"{clinvar_namespace}.clinical_review_status"] == "NA" for row in rows[1:]) |
| 3189 | + |
| 3190 | + |
| 3191 | +def test_download_clinvar_namespace_with_no_matching_version( |
| 3192 | + session, data_provider, client, setup_router_db, data_files |
| 3193 | +): |
| 3194 | + """When no controls match the requested ClinVar version, all rows return NA.""" |
| 3195 | + # clinvar.2023_01 does not match the seeded control (11_2024), so all rows should be NA. |
| 3196 | + clinvar_namespace = "clinvar.2023_01" |
| 3197 | + experiment = create_experiment(client) |
| 3198 | + score_set = create_seq_score_set_with_mapped_variants( |
| 3199 | + client, session, data_provider, experiment["urn"], data_files / "scores.csv" |
| 3200 | + ) |
| 3201 | + link_clinvar_control_to_mapped_variant(session, score_set) |
| 3202 | + |
| 3203 | + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: |
| 3204 | + published_score_set = publish_score_set(client, score_set["urn"]) |
| 3205 | + worker_queue.assert_called_once() |
| 3206 | + |
| 3207 | + response = client.get( |
| 3208 | + f"/api/v1/score-sets/{published_score_set['urn']}/variants/data" |
| 3209 | + f"?namespaces={clinvar_namespace}&drop_na_columns=false" |
| 3210 | + ) |
| 3211 | + assert response.status_code == 200 |
| 3212 | + reader = csv.DictReader(StringIO(response.text)) |
| 3213 | + assert f"{clinvar_namespace}.clinical_significance" in reader.fieldnames |
| 3214 | + assert f"{clinvar_namespace}.clinical_review_status" in reader.fieldnames |
| 3215 | + |
| 3216 | + rows = list(reader) |
| 3217 | + assert all(row[f"{clinvar_namespace}.clinical_significance"] == "NA" for row in rows) |
| 3218 | + assert all(row[f"{clinvar_namespace}.clinical_review_status"] == "NA" for row in rows) |
| 3219 | + |
| 3220 | + |
| 3221 | +def test_download_multiple_clinvar_namespaces_in_variant_data_path( |
| 3222 | + session, data_provider, client, setup_router_db, data_files |
| 3223 | +): |
| 3224 | + """Multiple ClinVar namespaces produce distinct column sets; only the matching version has real data.""" |
| 3225 | + matching_ns = "clinvar.2024_11" # matches db_version="11_2024" seeded in setup_router_db |
| 3226 | + non_matching_ns = "clinvar.2023_01" # no controls with this version |
| 3227 | + experiment = create_experiment(client) |
| 3228 | + score_set = create_seq_score_set_with_mapped_variants( |
| 3229 | + client, session, data_provider, experiment["urn"], data_files / "scores.csv" |
| 3230 | + ) |
| 3231 | + link_clinvar_control_to_mapped_variant(session, score_set) |
| 3232 | + |
| 3233 | + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: |
| 3234 | + published_score_set = publish_score_set(client, score_set["urn"]) |
| 3235 | + worker_queue.assert_called_once() |
| 3236 | + |
| 3237 | + response = client.get( |
| 3238 | + f"/api/v1/score-sets/{published_score_set['urn']}/variants/data" |
| 3239 | + f"?namespaces={matching_ns}&namespaces={non_matching_ns}&drop_na_columns=false" |
| 3240 | + ) |
| 3241 | + assert response.status_code == 200 |
| 3242 | + reader = csv.DictReader(StringIO(response.text)) |
| 3243 | + fieldnames = reader.fieldnames |
| 3244 | + # Both namespaces produce columns. |
| 3245 | + assert f"{matching_ns}.clinical_significance" in fieldnames |
| 3246 | + assert f"{matching_ns}.clinical_review_status" in fieldnames |
| 3247 | + assert f"{non_matching_ns}.clinical_significance" in fieldnames |
| 3248 | + assert f"{non_matching_ns}.clinical_review_status" in fieldnames |
| 3249 | + |
| 3250 | + rows = list(reader) |
| 3251 | + # Matching version: first variant has data. |
| 3252 | + assert rows[0][f"{matching_ns}.clinical_significance"] == TEST_CLINVAR_CONTROL["clinical_significance"] |
| 3253 | + assert rows[0][f"{matching_ns}.clinical_review_status"] == TEST_CLINVAR_CONTROL["clinical_review_status"] |
| 3254 | + # Non-matching version: all rows are NA. |
| 3255 | + assert all(row[f"{non_matching_ns}.clinical_significance"] == "NA" for row in rows) |
| 3256 | + assert all(row[f"{non_matching_ns}.clinical_review_status"] == "NA" for row in rows) |
| 3257 | + |
| 3258 | + |
| 3259 | +def test_invalid_clinvar_namespace_returns_422(client, setup_router_db, data_files): |
| 3260 | + """A clinvar namespace with an out-of-range month (13) is rejected with 422.""" |
| 3261 | + experiment = create_experiment(client) |
| 3262 | + score_set = create_seq_score_set(client, experiment["urn"]) |
| 3263 | + |
| 3264 | + response = client.get( |
| 3265 | + f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=clinvar.2024_13" |
| 3266 | + ) |
| 3267 | + assert response.status_code == 422 |
| 3268 | + |
| 3269 | + |
| 3270 | +def test_unrecognized_namespace_returns_422(client, setup_router_db, data_files): |
| 3271 | + """An entirely unrecognized namespace string is rejected with 422.""" |
| 3272 | + experiment = create_experiment(client) |
| 3273 | + score_set = create_seq_score_set(client, experiment["urn"]) |
| 3274 | + |
| 3275 | + response = client.get( |
| 3276 | + f"/api/v1/score-sets/{score_set['urn']}/variants/data?namespaces=unknown_namespace" |
| 3277 | + ) |
| 3278 | + assert response.status_code == 422 |
| 3279 | + |
| 3280 | + |
3157 | 3281 | ######################################################################################################################## |
3158 | 3282 | # Fetching clinical controls and control options for a score set |
3159 | 3283 | ######################################################################################################################## |
|
0 commit comments