From 5368f2c969e0fdebeeb8fb01d40f24afb79bc293 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 4 Mar 2026 16:54:04 -0800 Subject: [PATCH 01/16] fix: correct false-positive URN search tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 7 URN search tests sent {"urn": ...} in the search payload, but ScoreSetsSearch has no `urn` field — Pydantic silently dropped it. Tests passed coincidentally because each created only one score set, making unfiltered results match the expected count. Fix by using {"text": ...} (matching actual frontend behavior) and adding a second decoy score set to each test so the filter is actually exercised. --- tests/routers/test_score_set.py | 39 +++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index e170bedd..8073967d 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1935,8 +1935,10 @@ def test_search_private_score_sets_urn_match(session, data_provider, client, set experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") - search_payload = {"urn": score_set["urn"]} + search_payload = {"text": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -1944,14 +1946,15 @@ def test_search_private_score_sets_urn_match(session, data_provider, client, set assert response.json()["scoreSets"][0]["urn"] == score_set["urn"] -# There is space in the end of test urn. The search result returned nothing before. def test_search_private_score_sets_urn_with_space_match(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") urn_with_space = score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -1989,26 +1992,29 @@ def test_search_others_private_score_sets_urn_match(session, data_provider, clie experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set["urn"]} + search_payload = {"text": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 0 assert len(response.json()["scoreSets"]) == 0 -# There is space in the end of test urn. The search result returned nothing before. def test_search_others_private_score_sets_urn_with_space_match( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) urn_with_space = score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 0 @@ -2131,13 +2137,16 @@ def test_search_public_score_sets_urn_with_space_match(session, data_provider, c experiment = create_experiment(client, {"title": "Experiment 1"}) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() + publish_score_set(client, decoy["urn"]) + assert worker_queue.call_count == 2 urn_with_space = published_score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -2187,13 +2196,16 @@ def test_search_others_public_score_sets_urn_match(session, data_provider, clien experiment = create_experiment(client, {"title": "Experiment 1"}) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() + publish_score_set(client, decoy["urn"]) + assert worker_queue.call_count == 2 change_ownership(session, published_score_set["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set["urn"]} + search_payload = {"text": published_score_set["urn"]} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -2207,14 +2219,17 @@ def test_search_others_public_score_sets_urn_with_space_match( experiment = create_experiment(client, {"title": "Experiment 1"}) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() + publish_score_set(client, decoy["urn"]) + assert worker_queue.call_count == 2 change_ownership(session, published_score_set["urn"], ScoreSetDbModel) urn_with_space = published_score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 From 95fab8551e6114d4a90441c904f83767f39bdbfa Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 6 Mar 2026 13:38:02 -0800 Subject: [PATCH 02/16] fix: capture return value of query filter builder in search count query The count query in `search_score_sets` was discarding the return value of `build_search_score_sets_query_filter`, so the count would reflect all score sets in the database rather than only those matching the search filters. --- src/mavedb/lib/score_sets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 134457f7..27f611fc 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -292,7 +292,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: # query. score_sets = score_sets[: search.limit] count_query = db.query(ScoreSet) - build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search) + count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search) num_score_sets = count_query.order_by(None).limit(None).count() save_to_logging_context({"matching_resources": num_score_sets}) From 6c77ef0b9640f01bf9e6aa091b8900c1321c33ce Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Fri, 6 Mar 2026 13:45:31 -0800 Subject: [PATCH 03/16] fix: use selectinload for experiment relationships in score set search Switch one-to-many experiment relationship loading (keyword_objs, doi_identifiers, publication_identifier_associations, raw_read_identifiers) from joinedload to selectinload inside the contains_eager block. This prevents row multiplication from causing the SQL LIMIT to apply to multiplied rows rather than unique score sets, which resulted in search returning fewer results than expected on databases with rich experiment metadata. --- src/mavedb/lib/score_sets.py | 16 +++++++++--- tests/routers/test_score_set.py | 46 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 27f611fc..d6fa605f 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -238,18 +238,26 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: score_sets: list[ScoreSet] = ( query.join(ScoreSet.experiment) .options( + # Use selectinload for one-to-many experiment relationships to avoid row + # multiplication in the main query. joinedload would LEFT OUTER JOIN these + # into the main SQL query, and because they're nested inside contains_eager, + # SQLAlchemy's subquery-wrapping logic doesn't protect the LIMIT clause from + # being applied to multiplied rows rather than unique score sets. This would + # cause the count of returned score sets to be less than the requested limit, + # and the count query would be triggered even when the number of unique score + # sets in the main query results exceeds the limit. contains_eager(ScoreSet.experiment).options( joinedload(Experiment.experiment_set), - joinedload(Experiment.keyword_objs).joinedload( + selectinload(Experiment.keyword_objs).joinedload( ExperimentControlledKeywordAssociation.controlled_keyword ), joinedload(Experiment.created_by), joinedload(Experiment.modified_by), - joinedload(Experiment.doi_identifiers), - joinedload(Experiment.publication_identifier_associations).joinedload( + selectinload(Experiment.doi_identifiers), + selectinload(Experiment.publication_identifier_associations).joinedload( ExperimentPublicationIdentifierAssociation.publication ), - joinedload(Experiment.raw_read_identifiers), + selectinload(Experiment.raw_read_identifiers), selectinload(Experiment.score_sets).options( joinedload(ScoreSet.doi_identifiers), joinedload(ScoreSet.publication_identifier_associations).joinedload( diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index e170bedd..f412b16a 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -38,6 +38,7 @@ TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_CROSSREF_IDENTIFIER, + TEST_EXPERIMENT_WITH_KEYWORD, TEST_GNOMAD_DATA_VERSION, TEST_INACTIVE_LICENSE, TEST_MAPPED_VARIANT_WITH_HGVS_G_EXPRESSION, @@ -2403,6 +2404,51 @@ def test_search_filter_options_hidden_by_published_superseding_version( assert target_name not in target_names +def test_search_score_sets_reports_correct_total_count_with_limit( + session, data_provider, client, setup_router_db, data_files +): + """When more published score sets exist than the search limit, num_score_sets should reflect the true total.""" + num_score_sets = 3 + for i in range(num_score_sets): + experiment = create_experiment(client, {"title": f"Experiment {i}"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + publish_score_set(client, score_set["urn"]) + + search_payload = {"limit": 2} + response = client.post("/api/v1/score-sets/search", json=search_payload) + assert response.status_code == 200 + assert len(response.json()["scoreSets"]) == 2 + assert response.json()["numScoreSets"] == num_score_sets + + +def test_search_score_sets_not_affected_by_experiment_metadata( + session, data_provider, client, setup_router_db, data_files +): + """Experiments with multiple keywords should not reduce the number of score sets returned by search. + + This is a regression test for a bug where joinedload on one-to-many experiment relationships caused row + multiplication in the main SQL query. The LIMIT clause was applied to the multiplied rows rather than unique + score sets, resulting in fewer results than expected. + """ + num_score_sets = 3 + for i in range(num_score_sets): + experiment = create_experiment(client, {**TEST_EXPERIMENT_WITH_KEYWORD, "title": f"Experiment {i}"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + publish_score_set(client, score_set["urn"]) + + search_payload = {"limit": 2} + response = client.post("/api/v1/score-sets/search", json=search_payload) + assert response.status_code == 200 + assert len(response.json()["scoreSets"]) == 2 + assert response.json()["numScoreSets"] == num_score_sets + + ######################################################################################################################## # Score set deletion ######################################################################################################################## From 1d3536586cc72b8042710a7c6765ef0c04f5eb20 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 10 Mar 2026 12:46:11 -0700 Subject: [PATCH 04/16] feat: configure database connection pool via environment variables Add DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables to control SQLAlchemy connection pool settings per environment. Enable pool_pre_ping to discard stale connections on checkout. Defaults remain at 5/10 for local development. --- src/mavedb/db/session.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mavedb/db/session.py b/src/mavedb/db/session.py index ab75604a..0ddb1c32 100644 --- a/src/mavedb/db/session.py +++ b/src/mavedb/db/session.py @@ -9,14 +9,15 @@ DB_DATABASE_NAME = os.getenv("DB_DATABASE_NAME") DB_USERNAME = os.getenv("DB_USERNAME") DB_PASSWORD = os.getenv("DB_PASSWORD") +DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5")) +DB_MAX_OVERFLOW = int(os.getenv("DB_MAX_OVERFLOW", "10")) -# DB_URL = "sqlite:///./sql_app.db" DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE_NAME}" engine = create_engine( - # For PostgreSQL: - DB_URL - # For SQLite: - # DB_URL, connect_args={"check_same_thread": False} + DB_URL, + pool_size=DB_POOL_SIZE, + max_overflow=DB_MAX_OVERFLOW, + pool_pre_ping=True, ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) From 4462d609460bb7a204367e96893346f31787784a Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:09:41 -0700 Subject: [PATCH 05/16] GET request for multiple score sets --- src/mavedb/routers/score_sets.py | 32 ++++++++++++++++++++ tests/routers/test_score_set.py | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 7376ca4b..bab41df9 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -672,6 +672,38 @@ def search_my_score_sets( return {"score_sets": enriched_score_sets, "num_score_sets": num_score_sets} +@router.get( + "/score-sets/", + status_code=200, + response_model=list[score_set.ScoreSet], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + response_model_exclude_none=True, + summary="Fetch score sets by URN list", +) +async def show_score_sets( + *, + urns: str = Query(..., description="Comma-separated list of score set URNs"), + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user), +) -> Any: + """ + Fetch score sets identified by a list of URNs. + """ + urn_list = [urn.strip() for urn in urns.split(",") if urn.strip()] + if not urn_list: + raise HTTPException(status_code=422, detail="At least one URN is required") + + save_to_logging_context({"requested_resource": urn_list}) + response_items: list[score_set.ScoreSet] = [] + for urn in urn_list: + item = await fetch_score_set_by_urn(db, urn, user_data, None, False) + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment}) + response_items.append(response_item) + + return response_items + + @router.get( "/score-sets/{urn}", status_code=200, diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index f412b16a..85e17d64 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -713,6 +713,56 @@ def test_get_own_private_score_set(client, setup_router_db): assert (key, expected_response[key]) == (key, response_data[key]) +def test_get_score_sets_by_comma_separated_urns(client, setup_router_db): + experiment = create_experiment(client) + first_score_set = create_seq_score_set(client, experiment["urn"]) + second_score_set = create_seq_score_set(client, experiment["urn"]) + + response = client.get( + "/api/v1/score-sets/", + params={"urns": f"{first_score_set['urn']}, {second_score_set['urn']}"}, + ) + assert response.status_code == 200 + + response_data = response.json() + assert [item["urn"] for item in response_data] == [first_score_set["urn"], second_score_set["urn"]] + + for item in response_data: + jsonschema.validate(instance=item, schema=ScoreSet.model_json_schema()) + + +def test_get_score_sets_requires_at_least_one_urn(client, setup_router_db): + response = client.get("/api/v1/score-sets/", params={"urns": " , "}) + assert response.status_code == 422 + assert response.json()["detail"] == "At least one URN is required" + + +def test_get_score_sets_with_mixed_valid_and_invalid_urns_returns_404(client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + missing_urn = "urn:mavedb:99999999-z-9" + + response = client.get( + "/api/v1/score-sets/", + params={"urns": f"{score_set['urn']},{missing_urn}"}, + ) + assert response.status_code == 404 + assert response.json()["detail"] == f"score set with URN '{missing_urn}' not found" + + +def test_get_score_sets_with_whitespace_around_urns_in_mixed_list_returns_404(client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + missing_urn = "urn:mavedb:99999999-z-9" + + response = client.get( + "/api/v1/score-sets/", + params={"urns": f" {score_set['urn']} , {missing_urn} "}, + ) + assert response.status_code == 404 + assert response.json()["detail"] == f"score set with URN '{missing_urn}' not found" + + def test_cannot_get_other_user_private_score_set(session, client, setup_router_db): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) From b99c1fdb9cfa98b3331644582fd52388e87387ea Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 18 Mar 2026 16:51:47 -0700 Subject: [PATCH 06/16] API endpoint for fetching recently published score sets --- src/mavedb/routers/score_sets.py | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 7376ca4b..a8cd1968 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -672,6 +672,54 @@ def search_my_score_sets( return {"score_sets": enriched_score_sets, "num_score_sets": num_score_sets} +RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT = 20 + + +@router.get( + "/score-sets/recently-published", + status_code=200, + response_model=list[score_set.ScoreSet], + response_model_exclude_none=True, + summary="List recently published score sets", +) +def list_recently_published_score_sets( + limit: int = Query( + default=10, + ge=1, + le=RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT, + description=f"Number of score sets to return (maximum {RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT}).", + ), + db: Session = Depends(deps.get_db), + user_data: Optional[UserData] = Depends(get_current_user), +) -> Any: + """ + Return the most recently published score sets, ordered by publication date descending. + """ + save_to_logging_context({"requested_resource": "recently-published", "limit": limit}) + + items = ( + db.query(ScoreSet) + .filter(ScoreSet.published_date.isnot(None), ScoreSet.private.is_(False)) + .order_by(ScoreSet.published_date.desc(), ScoreSet.urn.desc()) + .limit(limit) + .all() + ) + + result = [] + for item in items: + if not has_permission(user_data, item, Action.READ).permitted: + continue + if ( + item.superseding_score_set + and not has_permission(user_data, item.superseding_score_set, Action.READ).permitted + ): + item.superseding_score_set = None + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + result.append(score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment})) + + return result + + @router.get( "/score-sets/{urn}", status_code=200, From cd0b333b5f61eeb0b865e14be2cce08d90c28135 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 18 Mar 2026 16:52:04 -0700 Subject: [PATCH 07/16] Unit tests for recently published score sets API endpoint --- tests/helpers/util/score_set.py | 12 ++--- tests/routers/test_experiments.py | 76 ++++++++++++++++------------ tests/routers/test_score_set.py | 84 +++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 39 deletions(-) diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index b2a8b2c6..cd9f1bed 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -165,9 +165,9 @@ def create_seq_score_set_with_variants( count_columns_metadata_json_path, ) - assert score_set["numVariants"] == 3, ( - f"Could not create sequence based score set with variants within experiment {experiment_urn}" - ) + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) return score_set @@ -196,9 +196,9 @@ def create_acc_score_set_with_variants( count_columns_metadata_json_path, ) - assert score_set["numVariants"] == 3, ( - f"Could not create sequence based score set with variants within experiment {experiment_urn}" - ) + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) return score_set diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 1a04ed6a..2b6be3b5 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -363,10 +363,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_me assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -401,10 +400,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_sy assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -478,10 +476,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_syst assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -516,10 +513,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_mech assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -717,23 +713,28 @@ def test_update_experiment_keywords(session, client, setup_router_db): assert response.status_code == 200 experiment = response.json() experiment_post_payload = experiment.copy() - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": { - "key": "Phenotypic Assay Profiling Strategy", - "label": "Shotgun sequencing", - "special": False, - "description": "Description" - }, - "description": "Details of phenotypic assay profiling strategy", - }, - - ]}) + "keywords": [ + { + "keyword": { + "key": "Phenotypic Assay Profiling Strategy", + "label": "Shotgun sequencing", + "special": False, + "description": "Description", + }, + "description": "Details of phenotypic assay profiling strategy", + }, + ] + } + ) updated_response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) assert updated_response.status_code == 200 updated_experiment = updated_response.json() updated_expected_response = deepcopy(TEST_EXPERIMENT_WITH_UPDATE_KEYWORD_RESPONSE) - updated_expected_response.update({"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]}) + updated_expected_response.update( + {"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]} + ) assert sorted(updated_expected_response.keys()) == sorted(updated_experiment.keys()) for key in updated_experiment: assert (key, updated_expected_response[key]) == (key, updated_experiment[key]) @@ -745,12 +746,21 @@ def test_update_experiment_keywords_case_insensitive(session, client, setup_rout experiment = create_experiment(client) experiment_post_payload = experiment.copy() # Test database has Delivery Method. The updating keyword's key is delivery method. - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": {"key": "delivery method", "label": "Other", "special": False, "description": "Description"}, - "description": "Details of delivery method", - }, - ]}) + "keywords": [ + { + "keyword": { + "key": "delivery method", + "label": "Other", + "special": False, + "description": "Description", + }, + "description": "Details of delivery method", + }, + ] + } + ) response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) response_data = response.json() expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index f412b16a..656a7f93 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1414,6 +1414,90 @@ def test_cannot_publish_score_set_without_variants(client, setup_router_db): assert "cannot publish score set without variant scores" in response_data["detail"] +######################################################################################################################## +# Recently published score sets +######################################################################################################################## + + +def test_recently_published_returns_empty_list_when_no_score_sets_published(client, setup_router_db): + response = client.get("/api/v1/score-sets/recently-published") + assert response.status_code == 200 + assert response.json() == [] + + +def test_recently_published_returns_published_score_sets(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"]) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published_1 = publish_score_set(client, score_set_1["urn"]) + published_2 = publish_score_set(client, score_set_2["urn"]) + + response = client.get("/api/v1/score-sets/recently-published") + assert response.status_code == 200 + response_data = response.json() + + returned_urns = [ss["urn"] for ss in response_data] + assert published_1["urn"] in returned_urns + assert published_2["urn"] in returned_urns + + +def test_recently_published_does_not_return_unpublished_score_sets(client, setup_router_db): + experiment = create_experiment(client) + create_seq_score_set(client, experiment["urn"]) + + response = client.get("/api/v1/score-sets/recently-published") + assert response.status_code == 200 + assert response.json() == [] + + +def test_recently_published_respects_limit_parameter(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + published_urns = [] + for _ in range(3): + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published = publish_score_set(client, score_set["urn"]) + published_urns.append(published["urn"]) + + response = client.get("/api/v1/score-sets/recently-published?limit=2") + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 2 + + +def test_recently_published_rejects_limit_exceeding_maximum(client, setup_router_db): + response = client.get("/api/v1/score-sets/recently-published?limit=21") + assert response.status_code == 422 + + +def test_recently_published_rejects_limit_of_zero(client, setup_router_db): + response = client.get("/api/v1/score-sets/recently-published?limit=0") + assert response.status_code == 422 + + +def test_recently_published_accessible_to_anonymous_user( + session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published = publish_score_set(client, score_set["urn"]) + + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/score-sets/recently-published") + + assert response.status_code == 200 + returned_urns = [ss["urn"] for ss in response.json()] + assert published["urn"] in returned_urns + + def test_cannot_publish_other_user_private_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) From 28d2a30f70ce12eb3474a0eefef2c5b2d9bc50cf Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 18 Mar 2026 18:24:28 -0700 Subject: [PATCH 08/16] Unit test fix --- tests/routers/test_score_set.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 656a7f93..72615ceb 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1456,13 +1456,19 @@ def test_recently_published_does_not_return_unpublished_score_sets(client, setup def test_recently_published_respects_limit_parameter(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - published_urns = [] + # Create and upload variants for all score sets before publishing any, because publishing + # changes the experiment URN from a tmp URN to a permanent URN. + score_sets = [] for _ in range(3): score_set = create_seq_score_set(client, experiment["urn"]) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + score_sets.append(score_set) + + published_urns = [] + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + for score_set in score_sets: published = publish_score_set(client, score_set["urn"]) - published_urns.append(published["urn"]) + published_urns.append(published["urn"]) response = client.get("/api/v1/score-sets/recently-published?limit=2") assert response.status_code == 200 From 7e204af04d99a20729c517d6909356aeb1941c9f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Wed, 18 Mar 2026 22:15:37 -0700 Subject: [PATCH 09/16] fix: duplicate score sets appearing in GET results --- src/mavedb/routers/experiments.py | 15 ++-- tests/routers/test_experiments.py | 116 +++++++++++++++++++++--------- 2 files changed, 94 insertions(+), 37 deletions(-) diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index 2777f1f6..a428429d 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -195,10 +195,17 @@ def get_experiment_score_sets( .all() ) - filter_superseded_score_set_tails = [ - find_superseded_score_set_tail(score_set, Action.READ, user_data) for score_set in score_set_result - ] - filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None] + # Multiple chain heads can resolve to the same visible ancestor via find_superseded_score_set_tail + # (e.g. when several private superseding score sets all trace back to the same published score set). + # Deduplicate by ID to avoid returning the same score set more than once. + seen_ids: set[int] = set() + filtered_score_sets: list[ScoreSet] = [] + for ss in score_set_result: + tail = find_superseded_score_set_tail(ss, Action.READ, user_data) + if tail is not None and tail.id not in seen_ids: + seen_ids.add(tail.id) + filtered_score_sets.append(tail) + if not filtered_score_sets: save_to_logging_context({"associated_resources": []}) logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context()) diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 1a04ed6a..17ab2ab9 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -363,10 +363,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_me assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -401,10 +400,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_sy assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -478,10 +476,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_syst assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -516,10 +513,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_mech assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -717,23 +713,28 @@ def test_update_experiment_keywords(session, client, setup_router_db): assert response.status_code == 200 experiment = response.json() experiment_post_payload = experiment.copy() - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": { - "key": "Phenotypic Assay Profiling Strategy", - "label": "Shotgun sequencing", - "special": False, - "description": "Description" - }, - "description": "Details of phenotypic assay profiling strategy", - }, - - ]}) + "keywords": [ + { + "keyword": { + "key": "Phenotypic Assay Profiling Strategy", + "label": "Shotgun sequencing", + "special": False, + "description": "Description", + }, + "description": "Details of phenotypic assay profiling strategy", + }, + ] + } + ) updated_response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) assert updated_response.status_code == 200 updated_experiment = updated_response.json() updated_expected_response = deepcopy(TEST_EXPERIMENT_WITH_UPDATE_KEYWORD_RESPONSE) - updated_expected_response.update({"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]}) + updated_expected_response.update( + {"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]} + ) assert sorted(updated_expected_response.keys()) == sorted(updated_experiment.keys()) for key in updated_experiment: assert (key, updated_expected_response[key]) == (key, updated_experiment[key]) @@ -745,12 +746,21 @@ def test_update_experiment_keywords_case_insensitive(session, client, setup_rout experiment = create_experiment(client) experiment_post_payload = experiment.copy() # Test database has Delivery Method. The updating keyword's key is delivery method. - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": {"key": "delivery method", "label": "Other", "special": False, "description": "Description"}, - "description": "Details of delivery method", - }, - ]}) + "keywords": [ + { + "keyword": { + "key": "delivery method", + "label": "Other", + "special": False, + "description": "Description", + }, + "description": "Details of delivery method", + }, + ] + } + ) response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) response_data = response.json() expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE) @@ -1786,6 +1796,46 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( assert response.json()[0]["urn"] == published_superseding_score_set["urn"] +def test_non_owner_searches_multiple_unpublished_superseding_score_sets_no_duplicates( + session, client, setup_router_db, data_files, data_provider +): + """When multiple private score sets supersede the same published score set, + a non-owner should see the published score set exactly once (no duplicates).""" + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = published_score_set["experiment"]["urn"] + + # Create two unpublished score sets that both supersede the same published score set. + superseding_1_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + superseding_1_payload["experimentUrn"] = experiment_urn + superseding_1_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_1_response = client.post("/api/v1/score-sets/", json=superseding_1_payload) + assert superseding_1_response.status_code == 200 + + superseding_2_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + superseding_2_payload["experimentUrn"] = experiment_urn + superseding_2_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_2_response = client.post("/api/v1/score-sets/", json=superseding_2_payload) + assert superseding_2_response.status_code == 200 + + # Transfer ownership so the requesting user is a non-owner of the superseding score sets. + change_ownership(session, superseding_1_response.json()["urn"], ScoreSetDbModel) + change_ownership(session, superseding_2_response.json()["urn"], ScoreSetDbModel) + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + + response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") + assert response.status_code == 200 + response_urns = [ss["urn"] for ss in response.json()] + assert len(response_urns) == 1 + assert response_urns[0] == published_score_set["urn"] + + def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) From 3aaecfa5eac6a5c660c7300a5472037d9c077855 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:31:23 -0700 Subject: [PATCH 10/16] Unit tests of API-level authorization when getting multiple score sets --- tests/routers/test_score_set.py | 237 ++++++++++++++++++++++---------- 1 file changed, 166 insertions(+), 71 deletions(-) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 85e17d64..60aeeff3 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -713,6 +713,97 @@ def test_get_own_private_score_set(client, setup_router_db): assert (key, expected_response[key]) == (key, response_data[key]) +def test_cannot_get_other_user_private_score_set(session, client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + assert response.status_code == 404 + response_data = response.json() + assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] + + +def test_anonymous_user_cannot_get_user_private_score_set(session, client, setup_router_db, anonymous_app_overrides): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + with DependencyOverrider(anonymous_app_overrides): + response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + + assert response.status_code == 404 + response_data = response.json() + assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] + + +def test_contributor_can_get_other_users_private_score_set(session, client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + add_contributor( + session, + score_set["urn"], + ScoreSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + + expected_response = update_expected_response_for_created_resources( + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set + ) + expected_response["contributors"] = [ + { + "recordType": "Contributor", + "orcidId": TEST_USER["username"], + "givenName": TEST_USER["first_name"], + "familyName": TEST_USER["last_name"], + } + ] + expected_response["createdBy"] = { + "recordType": "User", + "orcidId": EXTRA_USER["username"], + "firstName": EXTRA_USER["first_name"], + "lastName": EXTRA_USER["last_name"], + } + expected_response["modifiedBy"] = { + "recordType": "User", + "orcidId": EXTRA_USER["username"], + "firstName": EXTRA_USER["first_name"], + "lastName": EXTRA_USER["last_name"], + } + expected_response["experiment"].update({"numScoreSets": 1}) + + response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + assert response.status_code == 200 + response_data = response.json() + + assert sorted(expected_response.keys()) == sorted(response_data.keys()) + for key in expected_response: + assert (key, expected_response[key]) == (key, response_data[key]) + + +def test_admin_can_get_other_user_private_score_set(session, client, admin_app_overrides, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + expected_response = update_expected_response_for_created_resources( + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set + ) + expected_response["experiment"].update({"numScoreSets": 1}) + with DependencyOverrider(admin_app_overrides): + response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + + assert response.status_code == 200 + response_data = response.json() + assert sorted(expected_response.keys()) == sorted(response_data.keys()) + for key in expected_response: + assert (key, expected_response[key]) == (key, response_data[key]) + + +######################################################################################################################## +# Multiple score set fetching +######################################################################################################################## + + def test_get_score_sets_by_comma_separated_urns(client, setup_router_db): experiment = create_experiment(client) first_score_set = create_seq_score_set(client, experiment["urn"]) @@ -763,26 +854,94 @@ def test_get_score_sets_with_whitespace_around_urns_in_mixed_list_returns_404(cl assert response.json()["detail"] == f"score set with URN '{missing_urn}' not found" -def test_cannot_get_other_user_private_score_set(session, client, setup_router_db): +def test_show_score_sets_anonymous_can_fetch_public_score_sets( + session, client, setup_router_db, anonymous_app_overrides, data_provider, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published_score_set = publish_score_set(client, score_set["urn"]) + + with DependencyOverrider(anonymous_app_overrides): + response = client.get( + "/api/v1/score-sets/", + params={"urns": published_score_set["urn"]}, + ) + + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 1 + assert response_data[0]["urn"] == published_score_set["urn"] + + +def test_show_score_sets_anonymous_cannot_fetch_private_score_sets(session, client, setup_router_db, anonymous_app_overrides): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + # Score set is private (not published); change ownership so it belongs to another user change_ownership(session, score_set["urn"], ScoreSetDbModel) - response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + + with DependencyOverrider(anonymous_app_overrides): + response = client.get( + "/api/v1/score-sets/", + params={"urns": score_set["urn"]}, + ) + assert response.status_code == 404 + assert f"score set with URN '{score_set['urn']}' not found" in response.json()["detail"] + + +def test_show_score_sets_authenticated_user_can_fetch_own_private_score_sets(client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + + response = client.get( + "/api/v1/score-sets/", + params={"urns": score_set["urn"]}, + ) + + assert response.status_code == 200 response_data = response.json() - assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] + assert len(response_data) == 1 + assert response_data[0]["urn"] == score_set["urn"] -def test_anonymous_user_cannot_get_user_private_score_set(session, client, setup_router_db, anonymous_app_overrides): +def test_show_score_sets_authenticated_user_cannot_fetch_other_users_private_score_sets( + session, client, setup_router_db +): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) change_ownership(session, score_set["urn"], ScoreSetDbModel) + + response = client.get( + "/api/v1/score-sets/", + params={"urns": score_set["urn"]}, + ) + + assert response.status_code == 404 + assert f"score set with URN '{score_set['urn']}' not found" in response.json()["detail"] + + +def test_show_score_sets_mixed_public_and_private_returns_404( + session, client, setup_router_db, anonymous_app_overrides, data_provider, data_files +): + experiment = create_experiment(client) + public_score_set = create_seq_score_set(client, experiment["urn"]) + public_score_set = mock_worker_variant_insertion(client, session, data_provider, public_score_set, data_files / "scores.csv") + private_score_set = create_seq_score_set(client, experiment["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published_score_set = publish_score_set(client, public_score_set["urn"]) + # Make private_score_set belong to a different user to make it inaccessible anonymously + change_ownership(session, private_score_set["urn"], ScoreSetDbModel) + with DependencyOverrider(anonymous_app_overrides): - response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + response = client.get( + "/api/v1/score-sets/", + params={"urns": f"{published_score_set['urn']},{private_score_set['urn']}"}, + ) assert response.status_code == 404 - response_data = response.json() - assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] + assert f"score set with URN '{private_score_set['urn']}' not found" in response.json()["detail"] def test_can_add_contributor_in_both_experiment_and_score_set(session, client, setup_router_db): @@ -818,70 +977,6 @@ def test_can_add_contributor_in_both_experiment_and_score_set(session, client, s assert any(c["orcidId"] == TEST_USER["username"] for c in exp_response_data["contributors"]) -def test_contributor_can_get_other_users_private_score_set(session, client, setup_router_db): - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - change_ownership(session, score_set["urn"], ScoreSetDbModel) - add_contributor( - session, - score_set["urn"], - ScoreSetDbModel, - TEST_USER["username"], - TEST_USER["first_name"], - TEST_USER["last_name"], - ) - - expected_response = update_expected_response_for_created_resources( - deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set - ) - expected_response["contributors"] = [ - { - "recordType": "Contributor", - "orcidId": TEST_USER["username"], - "givenName": TEST_USER["first_name"], - "familyName": TEST_USER["last_name"], - } - ] - expected_response["createdBy"] = { - "recordType": "User", - "orcidId": EXTRA_USER["username"], - "firstName": EXTRA_USER["first_name"], - "lastName": EXTRA_USER["last_name"], - } - expected_response["modifiedBy"] = { - "recordType": "User", - "orcidId": EXTRA_USER["username"], - "firstName": EXTRA_USER["first_name"], - "lastName": EXTRA_USER["last_name"], - } - expected_response["experiment"].update({"numScoreSets": 1}) - - response = client.get(f"/api/v1/score-sets/{score_set['urn']}") - assert response.status_code == 200 - response_data = response.json() - - assert sorted(expected_response.keys()) == sorted(response_data.keys()) - for key in expected_response: - assert (key, expected_response[key]) == (key, response_data[key]) - - -def test_admin_can_get_other_user_private_score_set(session, client, admin_app_overrides, setup_router_db): - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - expected_response = update_expected_response_for_created_resources( - deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set - ) - expected_response["experiment"].update({"numScoreSets": 1}) - with DependencyOverrider(admin_app_overrides): - response = client.get(f"/api/v1/score-sets/{score_set['urn']}") - - assert response.status_code == 200 - response_data = response.json() - assert sorted(expected_response.keys()) == sorted(response_data.keys()) - for key in expected_response: - assert (key, expected_response[key]) == (key, response_data[key]) - - @pytest.mark.parametrize( "mock_publication_fetch", [ From 8f40fab588284459ba8898d0a6df347fd2feba9a Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Thu, 19 Mar 2026 11:32:58 -0700 Subject: [PATCH 11/16] feat: add endpoint for user created calibrations --- src/mavedb/routers/score_calibrations.py | 23 +++- tests/routers/test_score_calibrations.py | 130 +++++++++++++++++++++++ 2 files changed, 152 insertions(+), 1 deletion(-) diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index e56a0935..de8618a9 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -6,7 +6,7 @@ from mavedb import deps from mavedb.lib.authentication import get_current_user -from mavedb.lib.authorization import require_current_user_with_email +from mavedb.lib.authorization import require_current_user, require_current_user_with_email from mavedb.lib.flexible_model_loader import json_or_form_loader from mavedb.lib.logging import LoggedRoute from mavedb.lib.logging.context import ( @@ -54,6 +54,27 @@ ) +@router.get( + "/me", + status_code=200, + response_model=list[score_calibration.ScoreCalibrationWithScoreSetUrn], + responses={401: {}, 403: {}}, + summary="List my calibrations", +) +def list_my_calibrations( + *, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user), +) -> list[ScoreCalibration]: + """List all score calibrations created by the current user.""" + return ( + db.query(ScoreCalibration) + .filter(ScoreCalibration.created_by_id == user_data.user.id) + .options(selectinload(ScoreCalibration.score_set).selectinload(ScoreSet.contributors)) + .all() + ) + + @router.get( "/{urn}", response_model=score_calibration.ScoreCalibrationWithScoreSetUrn, diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py index 63c5c43c..fe1aeba7 100644 --- a/tests/routers/test_score_calibrations.py +++ b/tests/routers/test_score_calibrations.py @@ -4884,3 +4884,133 @@ def test_cannot_create_score_calibration_without_email( assert response.status_code == 403 assert "email" in response.json()["detail"].lower() + + +########################################################### +# GET /score-calibrations/me +########################################################### + + +def test_anonymous_user_cannot_list_my_calibrations(client, setup_router_db, anonymous_app_overrides): + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 401 + + +def test_authenticated_user_with_no_calibrations_returns_empty_list(client, setup_router_db): + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + assert response.json() == [] + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_authenticated_user_sees_own_calibrations( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + calibrations = response.json() + assert len(calibrations) == 1 + assert calibrations[0]["urn"] == calibration["urn"] + assert calibrations[0]["scoreSetUrn"] == score_set["urn"] + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_user_does_not_see_other_users_calibrations( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files, extra_user_app_overrides +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + with DependencyOverrider(extra_user_app_overrides): + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + assert response.json() == [] + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_user_sees_calibrations_across_multiple_score_sets( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + score_set_2 = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + cal_1 = create_test_score_calibration_in_score_set_via_client( + client, score_set_1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + cal_2 = create_test_score_calibration_in_score_set_via_client( + client, score_set_2["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + calibrations = response.json() + assert len(calibrations) == 2 + returned_urns = {c["urn"] for c in calibrations} + assert cal_1["urn"] in returned_urns + assert cal_2["urn"] in returned_urns From 319d859da024dfe227c647a333c1ace93f188e3c Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 22 Mar 2026 18:17:36 +1100 Subject: [PATCH 12/16] fix: protect score set tail reads from null IDs --- src/mavedb/routers/experiments.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index a428429d..66ce079e 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -202,8 +202,9 @@ def get_experiment_score_sets( filtered_score_sets: list[ScoreSet] = [] for ss in score_set_result: tail = find_superseded_score_set_tail(ss, Action.READ, user_data) - if tail is not None and tail.id not in seen_ids: - seen_ids.add(tail.id) + tail_id = tail.id if tail is not None else None + if tail is not None and tail_id is not None and tail_id not in seen_ids: + seen_ids.add(tail_id) filtered_score_sets.append(tail) if not filtered_score_sets: From e11911d4724c95b7495257370ec1e520d18f41ea Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Sun, 22 Mar 2026 18:24:00 +1100 Subject: [PATCH 13/16] fix: use shared error responses on calibration endpoints --- src/mavedb/routers/score_calibrations.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index de8618a9..e8b31478 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -31,6 +31,7 @@ from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_set import ScoreSet +from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES from mavedb.view_models import score_calibration logger = logging.getLogger(__name__) @@ -38,7 +39,7 @@ router = APIRouter( prefix="/api/v1/score-calibrations", tags=["Score Calibrations"], - responses={404: {"description": "Not found"}}, + responses={**PUBLIC_ERROR_RESPONSES}, route_class=LoggedRoute, ) @@ -58,7 +59,7 @@ "/me", status_code=200, response_model=list[score_calibration.ScoreCalibrationWithScoreSetUrn], - responses={401: {}, 403: {}}, + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, summary="List my calibrations", ) def list_my_calibrations( From e33e0108cb3fcd6941c3c2550b126e0a0e2c17e5 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 23 Mar 2026 09:43:00 +1100 Subject: [PATCH 14/16] fix: register missing ScoreCalibrationModify schema in OpenAPI spec --- src/mavedb/server_main.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index 23717e43..c7be2162 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -9,6 +9,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware from fastapi.openapi.utils import get_openapi +from pydantic.json_schema import models_json_schema from sqlalchemy.orm import configure_mappers from starlette.requests import Request from starlette.responses import JSONResponse @@ -240,6 +241,23 @@ def customize_openapi_schema(): variants.metadata, ] + # ScoreCalibrationModify (and its sub-models) are used in the PUT /score-calibrations/{urn} + # endpoint's openapi_extra $ref, but FastAPI only registers schemas it discovers through + # direct Body() parameters or response_model — not through Depends(). The flexible_model_loader + # pattern wraps the model in a generic async function (return type `T`), so FastAPI never sees + # the concrete type and never adds it to components/schemas. We register those missing schemas + # here explicitly to keep the generated OpenAPI spec valid. Eventually, this schema may be + # registered in other endpoints and this workaround can be removed, but for now this is the only + # endpoint where we use the ScoreCalibrationModify model. + from mavedb.view_models.score_calibration import ScoreCalibrationModify + + _, extra_schemas = models_json_schema( + [(ScoreCalibrationModify, "validation")], + ref_template="#/components/schemas/{model}", + ) + for name, schema in extra_schemas.get("$defs", {}).items(): + openapi_schema["components"]["schemas"].setdefault(name, schema) + app.openapi_schema = openapi_schema return app.openapi_schema From 15c6e78c3f90e8ff11c9cc07e11c58c508293e2f Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Mon, 23 Mar 2026 09:43:30 +1100 Subject: [PATCH 15/16] chore: bump version to 2026.1.2 --- pyproject.toml | 2 +- src/mavedb/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3df0e60d..664b7541 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "mavedb" -version = "2026.1.1" +version = "2026.1.2" description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect." license = "AGPL-3.0-only" readme = "README.md" diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index 97cd7350..dd3119a0 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -6,7 +6,7 @@ logger = module_logging.getLogger(__name__) __project__ = "mavedb-api" -__version__ = "2026.1.1" +__version__ = "2026.1.2" logger.info(f"MaveDB {__version__}") From c93b38e1f776a9e2742db87ea31d87d5dcc5fc70 Mon Sep 17 00:00:00 2001 From: Benjamin Capodanno Date: Tue, 24 Mar 2026 23:52:52 +1100 Subject: [PATCH 16/16] fix: update loading strategy in search_score_sets to use selectinload for all relationships --- src/mavedb/lib/score_sets.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index d6fa605f..142623dd 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -238,21 +238,22 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: score_sets: list[ScoreSet] = ( query.join(ScoreSet.experiment) .options( - # Use selectinload for one-to-many experiment relationships to avoid row - # multiplication in the main query. joinedload would LEFT OUTER JOIN these - # into the main SQL query, and because they're nested inside contains_eager, - # SQLAlchemy's subquery-wrapping logic doesn't protect the LIMIT clause from - # being applied to multiplied rows rather than unique score sets. This would - # cause the count of returned score sets to be less than the requested limit, - # and the count query would be triggered even when the number of unique score - # sets in the main query results exceeds the limit. + # Use selectinload for ALL relationships loaded via the main query. The presence of + # contains_eager disables SQLAlchemy's subquery-wrapping logic for the ENTIRE query, + # not just the relationships nested inside it. This means any joinedload that adds a + # LEFT OUTER JOIN to the main SQL query — even for many-to-one relationships — can + # corrupt the LIMIT clause by applying it to joined rows rather than unique score sets, + # causing fewer results than expected and suppressing the count query fallback. + # The only JOINs that should remain in the main query are the explicit experiment + # INNER JOIN (required by contains_eager) and the superseding score set LEFT OUTER JOIN + # added by the filter builder. contains_eager(ScoreSet.experiment).options( - joinedload(Experiment.experiment_set), + selectinload(Experiment.experiment_set), selectinload(Experiment.keyword_objs).joinedload( ExperimentControlledKeywordAssociation.controlled_keyword ), - joinedload(Experiment.created_by), - joinedload(Experiment.modified_by), + selectinload(Experiment.created_by), + selectinload(Experiment.modified_by), selectinload(Experiment.doi_identifiers), selectinload(Experiment.publication_identifier_associations).joinedload( ExperimentPublicationIdentifierAssociation.publication @@ -272,12 +273,12 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: ), ), ), - joinedload(ScoreSet.license), - joinedload(ScoreSet.doi_identifiers), - joinedload(ScoreSet.publication_identifier_associations).joinedload( + selectinload(ScoreSet.license), + selectinload(ScoreSet.doi_identifiers), + selectinload(ScoreSet.publication_identifier_associations).joinedload( ScoreSetPublicationIdentifierAssociation.publication ), - joinedload(ScoreSet.target_genes).options( + selectinload(ScoreSet.target_genes).options( joinedload(TargetGene.ensembl_offset).joinedload(EnsemblOffset.identifier), joinedload(TargetGene.refseq_offset).joinedload(RefseqOffset.identifier), joinedload(TargetGene.uniprot_offset).joinedload(UniprotOffset.identifier),