Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5368f2c
fix: correct false-positive URN search tests
bencap Mar 5, 2026
95fab85
fix: capture return value of query filter builder in search count query
bencap Mar 6, 2026
6c77ef0
fix: use selectinload for experiment relationships in score set search
bencap Mar 6, 2026
be04d52
Merge pull request #676 from VariantEffect/bugfix/bencap/675/search-r…
bencap Mar 10, 2026
1d35365
feat: configure database connection pool via environment variables
bencap Mar 10, 2026
4462d60
GET request for multiple score sets
jstone-dev Mar 18, 2026
b99c1fd
API endpoint for fetching recently published score sets
jstone-dev Mar 18, 2026
cd0b333
Unit tests for recently published score sets API endpoint
jstone-dev Mar 18, 2026
28d2a30
Unit test fix
jstone-dev Mar 19, 2026
7e204af
fix: duplicate score sets appearing in GET results
bencap Mar 19, 2026
3aaecfa
Unit tests of API-level authorization when getting multiple score sets
jstone-dev Mar 19, 2026
9edc76c
Merge pull request #681 from VariantEffect/chore/bencap/680/bump-pool…
bencap Mar 19, 2026
1c63322
Merge pull request #673 from VariantEffect/bugfix/bencap/483/unexpect…
bencap Mar 19, 2026
8f40fab
feat: add endpoint for user created calibrations
bencap Mar 19, 2026
230df86
Merge pull request #683 from VariantEffect/score-set-batch-request
bencap Mar 22, 2026
9a340fd
Merge pull request #686 from VariantEffect/recently-published-score-sets
bencap Mar 22, 2026
319d859
fix: protect score set tail reads from null IDs
bencap Mar 22, 2026
e11911d
fix: use shared error responses on calibration endpoints
bencap Mar 22, 2026
bda4ff1
Merge pull request #690 from VariantEffect/bugfix/bencap/687/duplicat…
bencap Mar 22, 2026
349b745
Merge pull request #689 from VariantEffect/feature/bencap/688/score-c…
bencap Mar 22, 2026
e33e010
fix: register missing ScoreCalibrationModify schema in OpenAPI spec
bencap Mar 22, 2026
15c6e78
chore: bump version to 2026.1.2
bencap Mar 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "mavedb"
version = "2026.1.1"
version = "2026.1.2"
description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect."
license = "AGPL-3.0-only"
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/mavedb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
logger = module_logging.getLogger(__name__)

__project__ = "mavedb-api"
__version__ = "2026.1.1"
__version__ = "2026.1.2"

logger.info(f"MaveDB {__version__}")

Expand Down
11 changes: 6 additions & 5 deletions src/mavedb/db/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
DB_DATABASE_NAME = os.getenv("DB_DATABASE_NAME")
DB_USERNAME = os.getenv("DB_USERNAME")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5"))
DB_MAX_OVERFLOW = int(os.getenv("DB_MAX_OVERFLOW", "10"))

# DB_URL = "sqlite:///./sql_app.db"
DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE_NAME}"

engine = create_engine(
# For PostgreSQL:
DB_URL
# For SQLite:
# DB_URL, connect_args={"check_same_thread": False}
DB_URL,
pool_size=DB_POOL_SIZE,
max_overflow=DB_MAX_OVERFLOW,
pool_pre_ping=True,
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
18 changes: 13 additions & 5 deletions src/mavedb/lib/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,26 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
score_sets: list[ScoreSet] = (
query.join(ScoreSet.experiment)
.options(
# Use selectinload for one-to-many experiment relationships to avoid row
# multiplication in the main query. joinedload would LEFT OUTER JOIN these
# into the main SQL query, and because they're nested inside contains_eager,
# SQLAlchemy's subquery-wrapping logic doesn't protect the LIMIT clause from
# being applied to multiplied rows rather than unique score sets. This would
# cause the count of returned score sets to be less than the requested limit,
# and the count query would be triggered even when the number of unique score
# sets in the main query results exceeds the limit.
contains_eager(ScoreSet.experiment).options(
joinedload(Experiment.experiment_set),
joinedload(Experiment.keyword_objs).joinedload(
selectinload(Experiment.keyword_objs).joinedload(
ExperimentControlledKeywordAssociation.controlled_keyword
),
joinedload(Experiment.created_by),
joinedload(Experiment.modified_by),
joinedload(Experiment.doi_identifiers),
joinedload(Experiment.publication_identifier_associations).joinedload(
selectinload(Experiment.doi_identifiers),
selectinload(Experiment.publication_identifier_associations).joinedload(
ExperimentPublicationIdentifierAssociation.publication
),
joinedload(Experiment.raw_read_identifiers),
selectinload(Experiment.raw_read_identifiers),
selectinload(Experiment.score_sets).options(
joinedload(ScoreSet.doi_identifiers),
joinedload(ScoreSet.publication_identifier_associations).joinedload(
Expand Down Expand Up @@ -292,7 +300,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
# query.
score_sets = score_sets[: search.limit]
count_query = db.query(ScoreSet)
build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
num_score_sets = count_query.order_by(None).limit(None).count()

save_to_logging_context({"matching_resources": num_score_sets})
Expand Down
16 changes: 12 additions & 4 deletions src/mavedb/routers/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,18 @@ def get_experiment_score_sets(
.all()
)

filter_superseded_score_set_tails = [
find_superseded_score_set_tail(score_set, Action.READ, user_data) for score_set in score_set_result
]
filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None]
# Multiple chain heads can resolve to the same visible ancestor via find_superseded_score_set_tail
# (e.g. when several private superseding score sets all trace back to the same published score set).
# Deduplicate by ID to avoid returning the same score set more than once.
seen_ids: set[int] = set()
filtered_score_sets: list[ScoreSet] = []
for ss in score_set_result:
tail = find_superseded_score_set_tail(ss, Action.READ, user_data)
tail_id = tail.id if tail is not None else None
if tail is not None and tail_id is not None and tail_id not in seen_ids:
seen_ids.add(tail_id)
filtered_score_sets.append(tail)

if not filtered_score_sets:
save_to_logging_context({"associated_resources": []})
logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context())
Expand Down
26 changes: 24 additions & 2 deletions src/mavedb/routers/score_calibrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from mavedb import deps
from mavedb.lib.authentication import get_current_user
from mavedb.lib.authorization import require_current_user_with_email
from mavedb.lib.authorization import require_current_user, require_current_user_with_email
from mavedb.lib.flexible_model_loader import json_or_form_loader
from mavedb.lib.logging import LoggedRoute
from mavedb.lib.logging.context import (
Expand All @@ -31,14 +31,15 @@
from mavedb.models.score_calibration import ScoreCalibration
from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
from mavedb.models.score_set import ScoreSet
from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES
from mavedb.view_models import score_calibration

logger = logging.getLogger(__name__)

router = APIRouter(
prefix="/api/v1/score-calibrations",
tags=["Score Calibrations"],
responses={404: {"description": "Not found"}},
responses={**PUBLIC_ERROR_RESPONSES},
route_class=LoggedRoute,
)

Expand All @@ -54,6 +55,27 @@
)


@router.get(
"/me",
status_code=200,
response_model=list[score_calibration.ScoreCalibrationWithScoreSetUrn],
responses={**ACCESS_CONTROL_ERROR_RESPONSES},
summary="List my calibrations",
)
def list_my_calibrations(
*,
db: Session = Depends(deps.get_db),
user_data: UserData = Depends(require_current_user),
) -> list[ScoreCalibration]:
"""List all score calibrations created by the current user."""
return (
db.query(ScoreCalibration)
.filter(ScoreCalibration.created_by_id == user_data.user.id)
.options(selectinload(ScoreCalibration.score_set).selectinload(ScoreSet.contributors))
.all()
)


@router.get(
"/{urn}",
response_model=score_calibration.ScoreCalibrationWithScoreSetUrn,
Expand Down
80 changes: 80 additions & 0 deletions src/mavedb/routers/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,86 @@ def search_my_score_sets(
return {"score_sets": enriched_score_sets, "num_score_sets": num_score_sets}


RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT = 20


@router.get(
"/score-sets/recently-published",
status_code=200,
response_model=list[score_set.ScoreSet],
response_model_exclude_none=True,
summary="List recently published score sets",
)
def list_recently_published_score_sets(
limit: int = Query(
default=10,
ge=1,
le=RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT,
description=f"Number of score sets to return (maximum {RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT}).",
),
db: Session = Depends(deps.get_db),
user_data: Optional[UserData] = Depends(get_current_user),
) -> Any:
"""
Return the most recently published score sets, ordered by publication date descending.
"""
save_to_logging_context({"requested_resource": "recently-published", "limit": limit})

items = (
db.query(ScoreSet)
.filter(ScoreSet.published_date.isnot(None), ScoreSet.private.is_(False))
.order_by(ScoreSet.published_date.desc(), ScoreSet.urn.desc())
.limit(limit)
.all()
)

result = []
for item in items:
if not has_permission(user_data, item, Action.READ).permitted:
continue
if (
item.superseding_score_set
and not has_permission(user_data, item.superseding_score_set, Action.READ).permitted
):
item.superseding_score_set = None
enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data)
result.append(score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment}))

return result


@router.get(
"/score-sets/",
status_code=200,
response_model=list[score_set.ScoreSet],
responses={**ACCESS_CONTROL_ERROR_RESPONSES},
response_model_exclude_none=True,
summary="Fetch score sets by URN list",
)
async def show_score_sets(
*,
urns: str = Query(..., description="Comma-separated list of score set URNs"),
db: Session = Depends(deps.get_db),
user_data: UserData = Depends(get_current_user),
) -> Any:
"""
Fetch score sets identified by a list of URNs.
"""
urn_list = [urn.strip() for urn in urns.split(",") if urn.strip()]
if not urn_list:
raise HTTPException(status_code=422, detail="At least one URN is required")

save_to_logging_context({"requested_resource": urn_list})
response_items: list[score_set.ScoreSet] = []
for urn in urn_list:
item = await fetch_score_set_by_urn(db, urn, user_data, None, False)
enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data)
response_item = score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment})
response_items.append(response_item)

return response_items


@router.get(
"/score-sets/{urn}",
status_code=200,
Expand Down
18 changes: 18 additions & 0 deletions src/mavedb/server_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.openapi.utils import get_openapi
from pydantic.json_schema import models_json_schema
from sqlalchemy.orm import configure_mappers
from starlette.requests import Request
from starlette.responses import JSONResponse
Expand Down Expand Up @@ -240,6 +241,23 @@ def customize_openapi_schema():
variants.metadata,
]

# ScoreCalibrationModify (and its sub-models) are used in the PUT /score-calibrations/{urn}
# endpoint's openapi_extra $ref, but FastAPI only registers schemas it discovers through
# direct Body() parameters or response_model — not through Depends(). The flexible_model_loader
# pattern wraps the model in a generic async function (return type `T`), so FastAPI never sees
# the concrete type and never adds it to components/schemas. We register those missing schemas
# here explicitly to keep the generated OpenAPI spec valid. Eventually, this schema may be
# registered in other endpoints and this workaround can be removed, but for now this is the only
# endpoint where we use the ScoreCalibrationModify model.
from mavedb.view_models.score_calibration import ScoreCalibrationModify

_, extra_schemas = models_json_schema(
[(ScoreCalibrationModify, "validation")],
ref_template="#/components/schemas/{model}",
)
for name, schema in extra_schemas.get("$defs", {}).items():
openapi_schema["components"]["schemas"].setdefault(name, schema)

app.openapi_schema = openapi_schema
return app.openapi_schema

Expand Down
12 changes: 6 additions & 6 deletions tests/helpers/util/score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ def create_seq_score_set_with_variants(
count_columns_metadata_json_path,
)

assert score_set["numVariants"] == 3, (
f"Could not create sequence based score set with variants within experiment {experiment_urn}"
)
assert (
score_set["numVariants"] == 3
), f"Could not create sequence based score set with variants within experiment {experiment_urn}"

jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema())
return score_set
Expand Down Expand Up @@ -196,9 +196,9 @@ def create_acc_score_set_with_variants(
count_columns_metadata_json_path,
)

assert score_set["numVariants"] == 3, (
f"Could not create sequence based score set with variants within experiment {experiment_urn}"
)
assert (
score_set["numVariants"] == 3
), f"Could not create sequence based score set with variants within experiment {experiment_urn}"

jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema())
return score_set
Expand Down
Loading
Loading