Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""add unique constraint on replaces_id

Revision ID: 4dbf24ed1857
Revises: 398067c53257
Create Date: 2026-05-13 11:22:59.646605

"""
from alembic import op

# revision identifiers, used by Alembic.
revision = '4dbf24ed1857'
down_revision = '398067c53257'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_scoresets_replaces_id', table_name='scoresets')
op.create_index(op.f('ix_scoresets_replaces_id'), 'scoresets', ['replaces_id'], unique=True)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_scoresets_replaces_id'), table_name='scoresets')
op.create_index('ix_scoresets_replaces_id', 'scoresets', ['replaces_id'], unique=False)
# ### end Alembic commands ###
14 changes: 14 additions & 0 deletions src/mavedb/lib/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pandas.testing import assert_index_equal
from sqlalchemy import Integer, and_, cast, func, or_, select
from sqlalchemy.orm import Query, Session, aliased, contains_eager, joinedload, selectinload
from sqlalchemy.exc import IntegrityError

from mavedb.lib.exceptions import ValidationError
from mavedb.lib.logging.context import logging_context, save_to_logging_context
Expand Down Expand Up @@ -807,6 +808,19 @@ def is_null(value):
return null_values_re.fullmatch(value) or not value


def is_replaces_id_unique_violation(exc: IntegrityError) -> bool:
"""
Return True if the IntegrityError was caused by the unique constraint on score_set.replaces_id.
"""
orig = getattr(exc, "orig", None)
if orig is None:
return False

diag = getattr(orig, "diag", None)
detail = getattr(diag, "detail", "") or ""
return "replaces_id" in detail


def variant_to_csv_row(
variant: Variant,
columns: dict[str, list[str]],
Expand Down
2 changes: 1 addition & 1 deletion src/mavedb/models/score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class ScoreSet(Base):
# TODO Standardize on US or GB spelling for licenc/se.
licence_id = Column(Integer, ForeignKey("licenses.id"), index=True, nullable=False)
license: Mapped["License"] = relationship("License")
superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True)
superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True, unique=True,)
superseded_score_set: Mapped[Optional["ScoreSet"]] = relationship(
"ScoreSet",
uselist=False,
Expand Down
28 changes: 24 additions & 4 deletions src/mavedb/routers/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ga4gh.va_spec.base.core import ExperimentalVariantFunctionalImpactStudyResult, Statement
from pydantic import ValidationError
from sqlalchemy import or_, select
from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.exc import MultipleResultsFound, IntegrityError
from sqlalchemy.orm import Session, contains_eager

from mavedb import deps
Expand Down Expand Up @@ -55,6 +55,7 @@
fetch_score_set_search_filter_options,
find_meta_analyses_for_experiment_sets,
get_score_set_variants_as_csv,
is_replaces_id_unique_violation,
refresh_variant_urns,
variants_to_csv_rows,
)
Expand Down Expand Up @@ -1604,7 +1605,7 @@ async def create_score_set(
db, item_create.superseded_score_set_urn, user_data, user_data, True
)

if superseded_score_set is None:
if superseded_score_set is None or superseded_score_set.private:
logger.info(
msg="Failed to create score set; The requested superseded score set does not exist.",
extra=logging_context(),
Expand All @@ -1613,6 +1614,16 @@ async def create_score_set(
status_code=404,
detail="The requested superseded score set does not exist",
)

if superseded_score_set.superseding_score_set:
logger.info(
msg=f"Failed to create score set. This score set has been superseded by score set: {superseded_score_set.superseding_score_set.urn}.",
extra=logging_context(),
)
raise HTTPException(
status_code=409,
detail=f"This score set has been superseded by score set: {superseded_score_set.superseding_score_set.urn}.",
)
else:
superseded_score_set = None

Expand Down Expand Up @@ -1867,8 +1878,17 @@ async def create_score_set(
score_calibrations=score_calibrations,
) # type: ignore[call-arg]

db.add(item)
db.commit()
try:
db.add(item)
db.commit()
except IntegrityError as e:
db.rollback()
if is_replaces_id_unique_violation(e):
raise HTTPException(
status_code=409,
detail="The requested score set has already been superseded.",
)
raise
db.refresh(item)

save_to_logging_context({"created_resource": item.urn})
Expand Down
40 changes: 0 additions & 40 deletions tests/routers/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1796,46 +1796,6 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments(
assert response.json()[0]["urn"] == published_superseding_score_set["urn"]


def test_non_owner_searches_multiple_unpublished_superseding_score_sets_no_duplicates(
session, client, setup_router_db, data_files, data_provider
):
"""When multiple private score sets supersede the same published score set,
a non-owner should see the published score set exactly once (no duplicates)."""
experiment = create_experiment(client)
score_set = create_seq_score_set(client, experiment["urn"])
score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")

with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
published_score_set = publish_score_set(client, score_set["urn"])
worker_queue.assert_called_once()

experiment_urn = published_score_set["experiment"]["urn"]

# Create two unpublished score sets that both supersede the same published score set.
superseding_1_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
superseding_1_payload["experimentUrn"] = experiment_urn
superseding_1_payload["supersededScoreSetUrn"] = published_score_set["urn"]
superseding_1_response = client.post("/api/v1/score-sets/", json=superseding_1_payload)
assert superseding_1_response.status_code == 200

superseding_2_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
superseding_2_payload["experimentUrn"] = experiment_urn
superseding_2_payload["supersededScoreSetUrn"] = published_score_set["urn"]
superseding_2_response = client.post("/api/v1/score-sets/", json=superseding_2_payload)
assert superseding_2_response.status_code == 200

# Transfer ownership so the requesting user is a non-owner of the superseding score sets.
change_ownership(session, superseding_1_response.json()["urn"], ScoreSetDbModel)
change_ownership(session, superseding_2_response.json()["urn"], ScoreSetDbModel)
change_ownership(session, published_score_set["urn"], ScoreSetDbModel)

response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
assert response.status_code == 200
response_urns = [ss["urn"] for ss in response.json()]
assert len(response_urns) == 1
assert response_urns[0] == published_score_set["urn"]


def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider):
experiment = create_experiment(client)
score_set = create_seq_score_set(client, experiment["urn"])
Expand Down
55 changes: 41 additions & 14 deletions tests/routers/test_score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -2890,16 +2890,45 @@ def test_search_score_sets_not_affected_by_experiment_metadata(
assert response.json()["numScoreSets"] == num_score_sets


def test_search_score_sets_not_affected_by_multiple_superseding_versions(
def test_cannot_create_multiple_superseding_versions(
session, data_provider, client, setup_router_db, data_files
):
"""Attempting to create multiple superseding versions should fail."""
experiment = create_experiment(client, {"title": "Original Experiment"})
score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Original Score Set"})
score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")

with patch.object(arq.ArqRedis, "enqueue_job", return_value=None):
published = publish_score_set(client, score_set["urn"])

# Create the first private superseding score set successfully
first_superseding = create_seq_score_set(
client,
create_experiment(client, {"title": "First Superseding Experiment"})["urn"],
update={"title": "First Superseding", "supersededScoreSetUrn": published["urn"]},
)
assert first_superseding is not None

# Attempt to create the second private superseding score set for the same published score set
# This should fail due to the unique constraint on replaces_id
experiment2 = create_experiment(client)
score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
score_set_post_payload["experimentUrn"] = experiment2["urn"]
score_set_post_payload["supersededScoreSetUrn"] = published["urn"]

response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
assert response.status_code == 409
assert (f"This score set has been superseded by score set: {first_superseding['urn']}.") in response.json()["detail"]


def test_search_score_sets_not_affected_by_an_unpublishing_superseding_versions(
session, data_provider, client, setup_router_db, data_files
):
"""Multiple unpublished superseding versions of the same score set should not reduce search page size.
"""One unpublished superseding versions of the same score set should not reduce search page size.

Regression test for a bug where the superseding score set filter used a LEFT OUTER JOIN
(scoresets LEFT JOIN scoresets AS s ON scoresets.id = s.replaces_id). Since replaces_id has
no uniqueness constraint, a score set with N superseding versions produces N rows, all inside
the LIMIT boundary. This consumed extra row budget and caused paginated searches to return
fewer unique score sets than the requested limit.
(scoresets LEFT JOIN scoresets AS s ON scoresets.id = s.replaces_id). replaces_id has
uniqueness constraint now.
"""
num_published = 3
published_urns = []
Expand All @@ -2912,14 +2941,12 @@ def test_search_score_sets_not_affected_by_multiple_superseding_versions(
published = publish_score_set(client, score_set["urn"])
published_urns.append(published["urn"])

# Create multiple unpublished superseding versions for the first score set.
# These share the same replaces_id, which caused row multiplication with the old LEFT JOIN filter.
for j in range(3):
create_seq_score_set(
client,
create_experiment(client, {"title": f"Superseding Experiment {j}"})["urn"],
update={"title": f"Superseding {j}", "supersededScoreSetUrn": published_urns[0]},
)
# Create an unpublished superseding versions for the first score set.
create_seq_score_set(
client,
create_experiment(client, {"title": f"Superseding Experiment {1}"})["urn"],
update={"title": f"Superseding {1}", "supersededScoreSetUrn": published_urns[0]},
)

search_payload = {"limit": 2}
response = client.post("/api/v1/score-sets/search", json=search_payload)
Expand Down
Loading