diff --git a/pyproject.toml b/pyproject.toml index 3df0e60dd..664b75412 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "mavedb" -version = "2026.1.1" +version = "2026.1.2" description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect." license = "AGPL-3.0-only" readme = "README.md" diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index 97cd73507..dd3119a07 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -6,7 +6,7 @@ logger = module_logging.getLogger(__name__) __project__ = "mavedb-api" -__version__ = "2026.1.1" +__version__ = "2026.1.2" logger.info(f"MaveDB {__version__}") diff --git a/src/mavedb/db/session.py b/src/mavedb/db/session.py index ab75604ad..0ddb1c320 100644 --- a/src/mavedb/db/session.py +++ b/src/mavedb/db/session.py @@ -9,14 +9,15 @@ DB_DATABASE_NAME = os.getenv("DB_DATABASE_NAME") DB_USERNAME = os.getenv("DB_USERNAME") DB_PASSWORD = os.getenv("DB_PASSWORD") +DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5")) +DB_MAX_OVERFLOW = int(os.getenv("DB_MAX_OVERFLOW", "10")) -# DB_URL = "sqlite:///./sql_app.db" DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE_NAME}" engine = create_engine( - # For PostgreSQL: - DB_URL - # For SQLite: - # DB_URL, connect_args={"check_same_thread": False} + DB_URL, + pool_size=DB_POOL_SIZE, + max_overflow=DB_MAX_OVERFLOW, + pool_pre_ping=True, ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 134457f77..d6fa605fa 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -238,18 +238,26 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: score_sets: list[ScoreSet] = ( query.join(ScoreSet.experiment) .options( + # Use selectinload for one-to-many experiment relationships to avoid row + # multiplication in the main query. joinedload would LEFT OUTER JOIN these + # into the main SQL query, and because they're nested inside contains_eager, + # SQLAlchemy's subquery-wrapping logic doesn't protect the LIMIT clause from + # being applied to multiplied rows rather than unique score sets. This would + # cause the count of returned score sets to be less than the requested limit, + # and the count query would be triggered even when the number of unique score + # sets in the main query results exceeds the limit. contains_eager(ScoreSet.experiment).options( joinedload(Experiment.experiment_set), - joinedload(Experiment.keyword_objs).joinedload( + selectinload(Experiment.keyword_objs).joinedload( ExperimentControlledKeywordAssociation.controlled_keyword ), joinedload(Experiment.created_by), joinedload(Experiment.modified_by), - joinedload(Experiment.doi_identifiers), - joinedload(Experiment.publication_identifier_associations).joinedload( + selectinload(Experiment.doi_identifiers), + selectinload(Experiment.publication_identifier_associations).joinedload( ExperimentPublicationIdentifierAssociation.publication ), - joinedload(Experiment.raw_read_identifiers), + selectinload(Experiment.raw_read_identifiers), selectinload(Experiment.score_sets).options( joinedload(ScoreSet.doi_identifiers), joinedload(ScoreSet.publication_identifier_associations).joinedload( @@ -292,7 +300,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: # query. score_sets = score_sets[: search.limit] count_query = db.query(ScoreSet) - build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search) + count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search) num_score_sets = count_query.order_by(None).limit(None).count() save_to_logging_context({"matching_resources": num_score_sets}) diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index 2777f1f69..66ce079e4 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -195,10 +195,18 @@ def get_experiment_score_sets( .all() ) - filter_superseded_score_set_tails = [ - find_superseded_score_set_tail(score_set, Action.READ, user_data) for score_set in score_set_result - ] - filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None] + # Multiple chain heads can resolve to the same visible ancestor via find_superseded_score_set_tail + # (e.g. when several private superseding score sets all trace back to the same published score set). + # Deduplicate by ID to avoid returning the same score set more than once. + seen_ids: set[int] = set() + filtered_score_sets: list[ScoreSet] = [] + for ss in score_set_result: + tail = find_superseded_score_set_tail(ss, Action.READ, user_data) + tail_id = tail.id if tail is not None else None + if tail is not None and tail_id is not None and tail_id not in seen_ids: + seen_ids.add(tail_id) + filtered_score_sets.append(tail) + if not filtered_score_sets: save_to_logging_context({"associated_resources": []}) logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context()) diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py index e56a09359..e8b31478c 100644 --- a/src/mavedb/routers/score_calibrations.py +++ b/src/mavedb/routers/score_calibrations.py @@ -6,7 +6,7 @@ from mavedb import deps from mavedb.lib.authentication import get_current_user -from mavedb.lib.authorization import require_current_user_with_email +from mavedb.lib.authorization import require_current_user, require_current_user_with_email from mavedb.lib.flexible_model_loader import json_or_form_loader from mavedb.lib.logging import LoggedRoute from mavedb.lib.logging.context import ( @@ -31,6 +31,7 @@ from mavedb.models.score_calibration import ScoreCalibration from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification from mavedb.models.score_set import ScoreSet +from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES from mavedb.view_models import score_calibration logger = logging.getLogger(__name__) @@ -38,7 +39,7 @@ router = APIRouter( prefix="/api/v1/score-calibrations", tags=["Score Calibrations"], - responses={404: {"description": "Not found"}}, + responses={**PUBLIC_ERROR_RESPONSES}, route_class=LoggedRoute, ) @@ -54,6 +55,27 @@ ) +@router.get( + "/me", + status_code=200, + response_model=list[score_calibration.ScoreCalibrationWithScoreSetUrn], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + summary="List my calibrations", +) +def list_my_calibrations( + *, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user), +) -> list[ScoreCalibration]: + """List all score calibrations created by the current user.""" + return ( + db.query(ScoreCalibration) + .filter(ScoreCalibration.created_by_id == user_data.user.id) + .options(selectinload(ScoreCalibration.score_set).selectinload(ScoreSet.contributors)) + .all() + ) + + @router.get( "/{urn}", response_model=score_calibration.ScoreCalibrationWithScoreSetUrn, diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 7376ca4b1..47532cd31 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -672,6 +672,86 @@ def search_my_score_sets( return {"score_sets": enriched_score_sets, "num_score_sets": num_score_sets} +RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT = 20 + + +@router.get( + "/score-sets/recently-published", + status_code=200, + response_model=list[score_set.ScoreSet], + response_model_exclude_none=True, + summary="List recently published score sets", +) +def list_recently_published_score_sets( + limit: int = Query( + default=10, + ge=1, + le=RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT, + description=f"Number of score sets to return (maximum {RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT}).", + ), + db: Session = Depends(deps.get_db), + user_data: Optional[UserData] = Depends(get_current_user), +) -> Any: + """ + Return the most recently published score sets, ordered by publication date descending. + """ + save_to_logging_context({"requested_resource": "recently-published", "limit": limit}) + + items = ( + db.query(ScoreSet) + .filter(ScoreSet.published_date.isnot(None), ScoreSet.private.is_(False)) + .order_by(ScoreSet.published_date.desc(), ScoreSet.urn.desc()) + .limit(limit) + .all() + ) + + result = [] + for item in items: + if not has_permission(user_data, item, Action.READ).permitted: + continue + if ( + item.superseding_score_set + and not has_permission(user_data, item.superseding_score_set, Action.READ).permitted + ): + item.superseding_score_set = None + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + result.append(score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment})) + + return result + + +@router.get( + "/score-sets/", + status_code=200, + response_model=list[score_set.ScoreSet], + responses={**ACCESS_CONTROL_ERROR_RESPONSES}, + response_model_exclude_none=True, + summary="Fetch score sets by URN list", +) +async def show_score_sets( + *, + urns: str = Query(..., description="Comma-separated list of score set URNs"), + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user), +) -> Any: + """ + Fetch score sets identified by a list of URNs. + """ + urn_list = [urn.strip() for urn in urns.split(",") if urn.strip()] + if not urn_list: + raise HTTPException(status_code=422, detail="At least one URN is required") + + save_to_logging_context({"requested_resource": urn_list}) + response_items: list[score_set.ScoreSet] = [] + for urn in urn_list: + item = await fetch_score_set_by_urn(db, urn, user_data, None, False) + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment}) + response_items.append(response_item) + + return response_items + + @router.get( "/score-sets/{urn}", status_code=200, diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index 23717e431..c7be2162f 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -9,6 +9,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware from fastapi.openapi.utils import get_openapi +from pydantic.json_schema import models_json_schema from sqlalchemy.orm import configure_mappers from starlette.requests import Request from starlette.responses import JSONResponse @@ -240,6 +241,23 @@ def customize_openapi_schema(): variants.metadata, ] + # ScoreCalibrationModify (and its sub-models) are used in the PUT /score-calibrations/{urn} + # endpoint's openapi_extra $ref, but FastAPI only registers schemas it discovers through + # direct Body() parameters or response_model — not through Depends(). The flexible_model_loader + # pattern wraps the model in a generic async function (return type `T`), so FastAPI never sees + # the concrete type and never adds it to components/schemas. We register those missing schemas + # here explicitly to keep the generated OpenAPI spec valid. Eventually, this schema may be + # registered in other endpoints and this workaround can be removed, but for now this is the only + # endpoint where we use the ScoreCalibrationModify model. + from mavedb.view_models.score_calibration import ScoreCalibrationModify + + _, extra_schemas = models_json_schema( + [(ScoreCalibrationModify, "validation")], + ref_template="#/components/schemas/{model}", + ) + for name, schema in extra_schemas.get("$defs", {}).items(): + openapi_schema["components"]["schemas"].setdefault(name, schema) + app.openapi_schema = openapi_schema return app.openapi_schema diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index b2a8b2c64..cd9f1bedc 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -165,9 +165,9 @@ def create_seq_score_set_with_variants( count_columns_metadata_json_path, ) - assert score_set["numVariants"] == 3, ( - f"Could not create sequence based score set with variants within experiment {experiment_urn}" - ) + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) return score_set @@ -196,9 +196,9 @@ def create_acc_score_set_with_variants( count_columns_metadata_json_path, ) - assert score_set["numVariants"] == 3, ( - f"Could not create sequence based score set with variants within experiment {experiment_urn}" - ) + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema()) return score_set diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 1a04ed6a2..17ab2ab9d 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -363,10 +363,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_me assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -401,10 +400,9 @@ def test_cannot_create_experiment_that_keywords_has_endogenous_without_method_sy assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " - "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'Endogenous locus library method', " + "both 'Endogenous Locus Library Method System' and 'Endogenous Locus Library Method Mechanism' " + "must be present." ) @@ -478,10 +476,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_syst assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -516,10 +513,9 @@ def test_cannot_create_experiment_that_keywords_has_in_vitro_without_method_mech assert response.status_code == 422 response_data = response.json() assert ( - response_data["detail"] - == "If 'Variant Library Creation Method' is 'In vitro construct library method', " - "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " - "must be present." + response_data["detail"] == "If 'Variant Library Creation Method' is 'In vitro construct library method', " + "both 'In Vitro Construct Library Method System' and 'In Vitro Construct Library Method Mechanism' " + "must be present." ) @@ -717,23 +713,28 @@ def test_update_experiment_keywords(session, client, setup_router_db): assert response.status_code == 200 experiment = response.json() experiment_post_payload = experiment.copy() - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": { - "key": "Phenotypic Assay Profiling Strategy", - "label": "Shotgun sequencing", - "special": False, - "description": "Description" - }, - "description": "Details of phenotypic assay profiling strategy", - }, - - ]}) + "keywords": [ + { + "keyword": { + "key": "Phenotypic Assay Profiling Strategy", + "label": "Shotgun sequencing", + "special": False, + "description": "Description", + }, + "description": "Details of phenotypic assay profiling strategy", + }, + ] + } + ) updated_response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) assert updated_response.status_code == 200 updated_experiment = updated_response.json() updated_expected_response = deepcopy(TEST_EXPERIMENT_WITH_UPDATE_KEYWORD_RESPONSE) - updated_expected_response.update({"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]}) + updated_expected_response.update( + {"urn": updated_experiment["urn"], "experimentSetUrn": updated_experiment["experimentSetUrn"]} + ) assert sorted(updated_expected_response.keys()) == sorted(updated_experiment.keys()) for key in updated_experiment: assert (key, updated_expected_response[key]) == (key, updated_experiment[key]) @@ -745,12 +746,21 @@ def test_update_experiment_keywords_case_insensitive(session, client, setup_rout experiment = create_experiment(client) experiment_post_payload = experiment.copy() # Test database has Delivery Method. The updating keyword's key is delivery method. - experiment_post_payload.update({"keywords": [ + experiment_post_payload.update( { - "keyword": {"key": "delivery method", "label": "Other", "special": False, "description": "Description"}, - "description": "Details of delivery method", - }, - ]}) + "keywords": [ + { + "keyword": { + "key": "delivery method", + "label": "Other", + "special": False, + "description": "Description", + }, + "description": "Details of delivery method", + }, + ] + } + ) response = client.put(f"/api/v1/experiments/{experiment['urn']}", json=experiment_post_payload) response_data = response.json() expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE) @@ -1786,6 +1796,46 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( assert response.json()[0]["urn"] == published_superseding_score_set["urn"] +def test_non_owner_searches_multiple_unpublished_superseding_score_sets_no_duplicates( + session, client, setup_router_db, data_files, data_provider +): + """When multiple private score sets supersede the same published score set, + a non-owner should see the published score set exactly once (no duplicates).""" + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = published_score_set["experiment"]["urn"] + + # Create two unpublished score sets that both supersede the same published score set. + superseding_1_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + superseding_1_payload["experimentUrn"] = experiment_urn + superseding_1_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_1_response = client.post("/api/v1/score-sets/", json=superseding_1_payload) + assert superseding_1_response.status_code == 200 + + superseding_2_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + superseding_2_payload["experimentUrn"] = experiment_urn + superseding_2_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_2_response = client.post("/api/v1/score-sets/", json=superseding_2_payload) + assert superseding_2_response.status_code == 200 + + # Transfer ownership so the requesting user is a non-owner of the superseding score sets. + change_ownership(session, superseding_1_response.json()["urn"], ScoreSetDbModel) + change_ownership(session, superseding_2_response.json()["urn"], ScoreSetDbModel) + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + + response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") + assert response.status_code == 200 + response_urns = [ss["urn"] for ss in response.json()] + assert len(response_urns) == 1 + assert response_urns[0] == published_score_set["urn"] + + def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py index 63c5c43c1..fe1aeba7c 100644 --- a/tests/routers/test_score_calibrations.py +++ b/tests/routers/test_score_calibrations.py @@ -4884,3 +4884,133 @@ def test_cannot_create_score_calibration_without_email( assert response.status_code == 403 assert "email" in response.json()["detail"].lower() + + +########################################################### +# GET /score-calibrations/me +########################################################### + + +def test_anonymous_user_cannot_list_my_calibrations(client, setup_router_db, anonymous_app_overrides): + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 401 + + +def test_authenticated_user_with_no_calibrations_returns_empty_list(client, setup_router_db): + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + assert response.json() == [] + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_authenticated_user_sees_own_calibrations( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + calibration = create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + calibrations = response.json() + assert len(calibrations) == 1 + assert calibrations[0]["urn"] == calibration["urn"] + assert calibrations[0]["scoreSetUrn"] == score_set["urn"] + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_user_does_not_see_other_users_calibrations( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files, extra_user_app_overrides +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + create_test_score_calibration_in_score_set_via_client( + client, score_set["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + with DependencyOverrider(extra_user_app_overrides): + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + assert response.json() == [] + + +@pytest.mark.parametrize( + "mock_publication_fetch", + [ + [ + {"dbName": "PubMed", "identifier": TEST_PUBMED_IDENTIFIER}, + {"dbName": "bioRxiv", "identifier": TEST_BIORXIV_IDENTIFIER}, + ] + ], + indirect=["mock_publication_fetch"], +) +def test_user_sees_calibrations_across_multiple_score_sets( + client, setup_router_db, mock_publication_fetch, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + score_set_2 = create_seq_score_set_with_mapped_variants( + client, + session, + data_provider, + experiment["urn"], + data_files / "scores.csv", + ) + cal_1 = create_test_score_calibration_in_score_set_via_client( + client, score_set_1["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + cal_2 = create_test_score_calibration_in_score_set_via_client( + client, score_set_2["urn"], deepcamelize(TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED) + ) + + response = client.get("/api/v1/score-calibrations/me") + + assert response.status_code == 200 + calibrations = response.json() + assert len(calibrations) == 2 + returned_urns = {c["urn"] for c in calibrations} + assert cal_1["urn"] in returned_urns + assert cal_2["urn"] in returned_urns diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index e170bedda..c1476a65b 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -38,6 +38,7 @@ TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED, TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED, TEST_CROSSREF_IDENTIFIER, + TEST_EXPERIMENT_WITH_KEYWORD, TEST_GNOMAD_DATA_VERSION, TEST_INACTIVE_LICENSE, TEST_MAPPED_VARIANT_WITH_HGVS_G_EXPRESSION, @@ -734,39 +735,6 @@ def test_anonymous_user_cannot_get_user_private_score_set(session, client, setup assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] -def test_can_add_contributor_in_both_experiment_and_score_set(session, client, setup_router_db): - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - change_ownership(session, score_set["urn"], ScoreSetDbModel) - change_ownership(session, experiment["urn"], ExperimentDbModel) - add_contributor( - session, - score_set["urn"], - ScoreSetDbModel, - TEST_USER["username"], - TEST_USER["first_name"], - TEST_USER["last_name"], - ) - add_contributor( - session, - experiment["urn"], - ExperimentDbModel, - TEST_USER["username"], - TEST_USER["first_name"], - TEST_USER["last_name"], - ) - score_set_response = client.get(f"/api/v1/score-sets/{score_set['urn']}") - assert score_set_response.status_code == 200 - ss_response_data = score_set_response.json() - assert len(ss_response_data["contributors"]) == 1 - assert any(c["orcidId"] == TEST_USER["username"] for c in ss_response_data["contributors"]) - experiment_response = client.get(f"/api/v1/experiments/{experiment['urn']}") - assert experiment_response.status_code == 200 - exp_response_data = experiment_response.json() - assert len(exp_response_data["contributors"]) == 1 - assert any(c["orcidId"] == TEST_USER["username"] for c in exp_response_data["contributors"]) - - def test_contributor_can_get_other_users_private_score_set(session, client, setup_router_db): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -831,6 +799,184 @@ def test_admin_can_get_other_user_private_score_set(session, client, admin_app_o assert (key, expected_response[key]) == (key, response_data[key]) +######################################################################################################################## +# Multiple score set fetching +######################################################################################################################## + + +def test_get_score_sets_by_comma_separated_urns(client, setup_router_db): + experiment = create_experiment(client) + first_score_set = create_seq_score_set(client, experiment["urn"]) + second_score_set = create_seq_score_set(client, experiment["urn"]) + + response = client.get( + "/api/v1/score-sets/", + params={"urns": f"{first_score_set['urn']}, {second_score_set['urn']}"}, + ) + assert response.status_code == 200 + + response_data = response.json() + assert [item["urn"] for item in response_data] == [first_score_set["urn"], second_score_set["urn"]] + + for item in response_data: + jsonschema.validate(instance=item, schema=ScoreSet.model_json_schema()) + + +def test_get_score_sets_requires_at_least_one_urn(client, setup_router_db): + response = client.get("/api/v1/score-sets/", params={"urns": " , "}) + assert response.status_code == 422 + assert response.json()["detail"] == "At least one URN is required" + + +def test_get_score_sets_with_mixed_valid_and_invalid_urns_returns_404(client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + missing_urn = "urn:mavedb:99999999-z-9" + + response = client.get( + "/api/v1/score-sets/", + params={"urns": f"{score_set['urn']},{missing_urn}"}, + ) + assert response.status_code == 404 + assert response.json()["detail"] == f"score set with URN '{missing_urn}' not found" + + +def test_get_score_sets_with_whitespace_around_urns_in_mixed_list_returns_404(client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + missing_urn = "urn:mavedb:99999999-z-9" + + response = client.get( + "/api/v1/score-sets/", + params={"urns": f" {score_set['urn']} , {missing_urn} "}, + ) + assert response.status_code == 404 + assert response.json()["detail"] == f"score set with URN '{missing_urn}' not found" + + +def test_show_score_sets_anonymous_can_fetch_public_score_sets( + session, client, setup_router_db, anonymous_app_overrides, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published_score_set = publish_score_set(client, score_set["urn"]) + + with DependencyOverrider(anonymous_app_overrides): + response = client.get( + "/api/v1/score-sets/", + params={"urns": published_score_set["urn"]}, + ) + + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 1 + assert response_data[0]["urn"] == published_score_set["urn"] + + +def test_show_score_sets_anonymous_cannot_fetch_private_score_sets(session, client, setup_router_db, anonymous_app_overrides): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + # Score set is private (not published); change ownership so it belongs to another user + change_ownership(session, score_set["urn"], ScoreSetDbModel) + + with DependencyOverrider(anonymous_app_overrides): + response = client.get( + "/api/v1/score-sets/", + params={"urns": score_set["urn"]}, + ) + + assert response.status_code == 404 + assert f"score set with URN '{score_set['urn']}' not found" in response.json()["detail"] + + +def test_show_score_sets_authenticated_user_can_fetch_own_private_score_sets(client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + + response = client.get( + "/api/v1/score-sets/", + params={"urns": score_set["urn"]}, + ) + + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 1 + assert response_data[0]["urn"] == score_set["urn"] + + +def test_show_score_sets_authenticated_user_cannot_fetch_other_users_private_score_sets( + session, client, setup_router_db +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + + response = client.get( + "/api/v1/score-sets/", + params={"urns": score_set["urn"]}, + ) + + assert response.status_code == 404 + assert f"score set with URN '{score_set['urn']}' not found" in response.json()["detail"] + + +def test_show_score_sets_mixed_public_and_private_returns_404( + session, client, setup_router_db, anonymous_app_overrides, data_provider, data_files +): + experiment = create_experiment(client) + public_score_set = create_seq_score_set(client, experiment["urn"]) + public_score_set = mock_worker_variant_insertion(client, session, data_provider, public_score_set, data_files / "scores.csv") + private_score_set = create_seq_score_set(client, experiment["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published_score_set = publish_score_set(client, public_score_set["urn"]) + # Make private_score_set belong to a different user to make it inaccessible anonymously + change_ownership(session, private_score_set["urn"], ScoreSetDbModel) + + with DependencyOverrider(anonymous_app_overrides): + response = client.get( + "/api/v1/score-sets/", + params={"urns": f"{published_score_set['urn']},{private_score_set['urn']}"}, + ) + + assert response.status_code == 404 + assert f"score set with URN '{private_score_set['urn']}' not found" in response.json()["detail"] + + +def test_can_add_contributor_in_both_experiment_and_score_set(session, client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + change_ownership(session, experiment["urn"], ExperimentDbModel) + add_contributor( + session, + score_set["urn"], + ScoreSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + add_contributor( + session, + experiment["urn"], + ExperimentDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + score_set_response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + assert score_set_response.status_code == 200 + ss_response_data = score_set_response.json() + assert len(ss_response_data["contributors"]) == 1 + assert any(c["orcidId"] == TEST_USER["username"] for c in ss_response_data["contributors"]) + experiment_response = client.get(f"/api/v1/experiments/{experiment['urn']}") + assert experiment_response.status_code == 200 + exp_response_data = experiment_response.json() + assert len(exp_response_data["contributors"]) == 1 + assert any(c["orcidId"] == TEST_USER["username"] for c in exp_response_data["contributors"]) + + @pytest.mark.parametrize( "mock_publication_fetch", [ @@ -1413,6 +1559,96 @@ def test_cannot_publish_score_set_without_variants(client, setup_router_db): assert "cannot publish score set without variant scores" in response_data["detail"] +######################################################################################################################## +# Recently published score sets +######################################################################################################################## + + +def test_recently_published_returns_empty_list_when_no_score_sets_published(client, setup_router_db): + response = client.get("/api/v1/score-sets/recently-published") + assert response.status_code == 200 + assert response.json() == [] + + +def test_recently_published_returns_published_score_sets(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"]) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published_1 = publish_score_set(client, score_set_1["urn"]) + published_2 = publish_score_set(client, score_set_2["urn"]) + + response = client.get("/api/v1/score-sets/recently-published") + assert response.status_code == 200 + response_data = response.json() + + returned_urns = [ss["urn"] for ss in response_data] + assert published_1["urn"] in returned_urns + assert published_2["urn"] in returned_urns + + +def test_recently_published_does_not_return_unpublished_score_sets(client, setup_router_db): + experiment = create_experiment(client) + create_seq_score_set(client, experiment["urn"]) + + response = client.get("/api/v1/score-sets/recently-published") + assert response.status_code == 200 + assert response.json() == [] + + +def test_recently_published_respects_limit_parameter(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + # Create and upload variants for all score sets before publishing any, because publishing + # changes the experiment URN from a tmp URN to a permanent URN. + score_sets = [] + for _ in range(3): + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + score_sets.append(score_set) + + published_urns = [] + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + for score_set in score_sets: + published = publish_score_set(client, score_set["urn"]) + published_urns.append(published["urn"]) + + response = client.get("/api/v1/score-sets/recently-published?limit=2") + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 2 + + +def test_recently_published_rejects_limit_exceeding_maximum(client, setup_router_db): + response = client.get("/api/v1/score-sets/recently-published?limit=21") + assert response.status_code == 422 + + +def test_recently_published_rejects_limit_of_zero(client, setup_router_db): + response = client.get("/api/v1/score-sets/recently-published?limit=0") + assert response.status_code == 422 + + +def test_recently_published_accessible_to_anonymous_user( + session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides +): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published = publish_score_set(client, score_set["urn"]) + + with DependencyOverrider(anonymous_app_overrides): + response = client.get("/api/v1/score-sets/recently-published") + + assert response.status_code == 200 + returned_urns = [ss["urn"] for ss in response.json()] + assert published["urn"] in returned_urns + + def test_cannot_publish_other_user_private_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -1935,8 +2171,10 @@ def test_search_private_score_sets_urn_match(session, data_provider, client, set experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") - search_payload = {"urn": score_set["urn"]} + search_payload = {"text": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -1944,14 +2182,15 @@ def test_search_private_score_sets_urn_match(session, data_provider, client, set assert response.json()["scoreSets"][0]["urn"] == score_set["urn"] -# There is space in the end of test urn. The search result returned nothing before. def test_search_private_score_sets_urn_with_space_match(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") urn_with_space = score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -1989,26 +2228,29 @@ def test_search_others_private_score_sets_urn_match(session, data_provider, clie experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set["urn"]} + search_payload = {"text": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 0 assert len(response.json()["scoreSets"]) == 0 -# There is space in the end of test urn. The search result returned nothing before. def test_search_others_private_score_sets_urn_with_space_match( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) urn_with_space = score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 0 @@ -2131,13 +2373,16 @@ def test_search_public_score_sets_urn_with_space_match(session, data_provider, c experiment = create_experiment(client, {"title": "Experiment 1"}) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() + publish_score_set(client, decoy["urn"]) + assert worker_queue.call_count == 2 urn_with_space = published_score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -2187,13 +2432,16 @@ def test_search_others_public_score_sets_urn_match(session, data_provider, clien experiment = create_experiment(client, {"title": "Experiment 1"}) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() + publish_score_set(client, decoy["urn"]) + assert worker_queue.call_count == 2 change_ownership(session, published_score_set["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set["urn"]} + search_payload = {"text": published_score_set["urn"]} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -2207,14 +2455,17 @@ def test_search_others_public_score_sets_urn_with_space_match( experiment = create_experiment(client, {"title": "Experiment 1"}) score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + decoy = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + decoy = mock_worker_variant_insertion(client, session, data_provider, decoy, data_files / "scores.csv") with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() + publish_score_set(client, decoy["urn"]) + assert worker_queue.call_count == 2 change_ownership(session, published_score_set["urn"], ScoreSetDbModel) urn_with_space = published_score_set["urn"] + " " - search_payload = {"urn": urn_with_space} + search_payload = {"text": urn_with_space} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert response.json()["numScoreSets"] == 1 @@ -2403,6 +2654,51 @@ def test_search_filter_options_hidden_by_published_superseding_version( assert target_name not in target_names +def test_search_score_sets_reports_correct_total_count_with_limit( + session, data_provider, client, setup_router_db, data_files +): + """When more published score sets exist than the search limit, num_score_sets should reflect the true total.""" + num_score_sets = 3 + for i in range(num_score_sets): + experiment = create_experiment(client, {"title": f"Experiment {i}"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + publish_score_set(client, score_set["urn"]) + + search_payload = {"limit": 2} + response = client.post("/api/v1/score-sets/search", json=search_payload) + assert response.status_code == 200 + assert len(response.json()["scoreSets"]) == 2 + assert response.json()["numScoreSets"] == num_score_sets + + +def test_search_score_sets_not_affected_by_experiment_metadata( + session, data_provider, client, setup_router_db, data_files +): + """Experiments with multiple keywords should not reduce the number of score sets returned by search. + + This is a regression test for a bug where joinedload on one-to-many experiment relationships caused row + multiplication in the main SQL query. The LIMIT clause was applied to the multiplied rows rather than unique + score sets, resulting in fewer results than expected. + """ + num_score_sets = 3 + for i in range(num_score_sets): + experiment = create_experiment(client, {**TEST_EXPERIMENT_WITH_KEYWORD, "title": f"Experiment {i}"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + publish_score_set(client, score_set["urn"]) + + search_payload = {"limit": 2} + response = client.post("/api/v1/score-sets/search", json=search_payload) + assert response.status_code == 200 + assert len(response.json()["scoreSets"]) == 2 + assert response.json()["numScoreSets"] == num_score_sets + + ######################################################################################################################## # Score set deletion ########################################################################################################################