Skip to content

Commit 9c65d44

Browse files
authored
Merge pull request #644 from VariantEffect/release-2025.6.0
Release 2026.1.0
2 parents 37659db + 4c103a8 commit 9c65d44

135 files changed

Lines changed: 13554 additions & 1738 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
"""
2+
Migration script to convert JSONB functional_ranges to the new row-based implementation.
3+
4+
This script migrates data from ScoreCalibration.functional_ranges (JSONB column)
5+
to the new ScoreCalibrationFunctionalClassification table with proper foreign key relationships.
6+
"""
7+
from typing import Any, Dict
8+
9+
import sqlalchemy as sa
10+
from sqlalchemy.orm import Session, configure_mappers
11+
12+
from mavedb.models import *
13+
from mavedb.db.session import SessionLocal
14+
from mavedb.models.acmg_classification import ACMGClassification
15+
from mavedb.models.enums.acmg_criterion import ACMGCriterion
16+
from mavedb.models.enums.functional_classification import FunctionalClassification
17+
from mavedb.models.enums.strength_of_evidence import StrengthOfEvidenceProvided
18+
from mavedb.models.score_calibration import ScoreCalibration
19+
from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
20+
from mavedb.models.score_calibration_functional_classification_variant_association import (
21+
score_calibration_functional_classification_variants_association_table
22+
)
23+
from mavedb.models.variant import Variant
24+
from mavedb.view_models.acmg_classification import ACMGClassificationCreate
25+
26+
configure_mappers()
27+
28+
29+
def populate_variant_associations(
30+
db: Session,
31+
functional_classification: ScoreCalibrationFunctionalClassification,
32+
calibration: ScoreCalibration,
33+
) -> int:
34+
"""Populate the association table with variants that fall within this functional range."""
35+
# Create a view model instance to use the existing range checking logic
36+
if not functional_classification or not functional_classification.range:
37+
print(f" Skipping variant association - no valid range or view model")
38+
return 0
39+
40+
print(f" Finding variants within range {functional_classification.range} (lower_inclusive={functional_classification.inclusive_lower_bound}, upper_inclusive={functional_classification.inclusive_upper_bound})")
41+
42+
# Get all variants for this score set and their scores
43+
variants_query = db.execute(sa.select(Variant).where(
44+
Variant.score_set_id == calibration.score_set_id,
45+
)).scalars().all()
46+
47+
variants_in_range = []
48+
total_variants = 0
49+
50+
for variant in variants_query:
51+
total_variants += 1
52+
53+
# Extract score from JSONB data
54+
try:
55+
score_data = variant.data.get("score_data", {}).get("score") if variant.data else None
56+
if score_data is not None:
57+
variant_score = float(score_data)
58+
59+
# Use the existing view model method for range checking
60+
if functional_classification.score_is_contained_in_range(variant_score):
61+
variants_in_range.append(variant)
62+
63+
except (ValueError, TypeError) as e:
64+
print(f" Warning: Could not parse score for variant {variant.id}: {e}")
65+
continue
66+
67+
print(f" Found {len(variants_in_range)} variants in range out of {total_variants} total variants")
68+
69+
# Bulk insert associations
70+
if variants_in_range:
71+
associations = [
72+
{
73+
"functional_classification_id": functional_classification.id,
74+
"variant_id": variant.id
75+
}
76+
for variant in variants_in_range
77+
]
78+
79+
db.execute(
80+
score_calibration_functional_classification_variants_association_table.insert(),
81+
associations
82+
)
83+
84+
return len(variants_in_range)
85+
86+
87+
def migrate_functional_range_to_row(
88+
db: Session,
89+
calibration: ScoreCalibration,
90+
functional_range: Dict[str, Any],
91+
acmg_classification_cache: Dict[str, ACMGClassification]
92+
) -> ScoreCalibrationFunctionalClassification:
93+
"""Convert a single functional range from JSONB to table row."""
94+
95+
# Handle ACMG classification if present
96+
acmg_classification_id = None
97+
acmg_data = functional_range.get("acmg_classification")
98+
if acmg_data:
99+
# Create a cache key for the ACMG classification
100+
criterion = acmg_data.get("criterion").upper() if acmg_data.get("criterion") else None
101+
evidence_strength = acmg_data.get("evidence_strength").upper() if acmg_data.get("evidence_strength") else None
102+
points = acmg_data.get("points")
103+
104+
classification = ACMGClassificationCreate(
105+
criterion=ACMGCriterion(criterion) if criterion else None,
106+
evidence_strength=StrengthOfEvidenceProvided(evidence_strength) if evidence_strength else None,
107+
points=points
108+
)
109+
110+
cache_key = f"{classification.criterion}_{classification.evidence_strength}_{classification.points}"
111+
112+
if cache_key not in acmg_classification_cache:
113+
# Create new ACMG classification
114+
acmg_classification = ACMGClassification(
115+
criterion=classification.criterion,
116+
evidence_strength=classification.evidence_strength,
117+
points=classification.points
118+
)
119+
db.add(acmg_classification)
120+
db.flush() # Get the ID
121+
acmg_classification_cache[cache_key] = acmg_classification
122+
123+
acmg_classification_id = acmg_classification_cache[cache_key].id
124+
125+
# Create the functional classification row
126+
functional_classification = ScoreCalibrationFunctionalClassification(
127+
calibration_id=calibration.id,
128+
label=functional_range.get("label", ""),
129+
description=functional_range.get("description"),
130+
functional_classification=FunctionalClassification(functional_range.get("classification", "not_specified")),
131+
range=functional_range.get("range"),
132+
inclusive_lower_bound=functional_range.get("inclusive_lower_bound"),
133+
inclusive_upper_bound=functional_range.get("inclusive_upper_bound"),
134+
oddspaths_ratio=functional_range.get("oddspaths_ratio"),
135+
positive_likelihood_ratio=functional_range.get("positive_likelihood_ratio"),
136+
acmg_classification_id=acmg_classification_id
137+
)
138+
139+
return functional_classification
140+
141+
142+
def do_migration(db: Session):
143+
"""Main migration function."""
144+
print("Starting migration of JSONB functional_ranges to table rows...")
145+
146+
# Find all calibrations with functional_ranges
147+
calibrations_with_ranges = db.scalars(
148+
sa.select(ScoreCalibration).where(ScoreCalibration.functional_ranges_deprecated_json.isnot(None))
149+
).all()
150+
151+
print(f"Found {len(calibrations_with_ranges)} calibrations with functional ranges to migrate.")
152+
153+
# Cache for ACMG classifications to avoid duplicates
154+
acmg_classification_cache: Dict[str, ACMGClassification] = {}
155+
156+
migrated_count = 0
157+
error_count = 0
158+
159+
for calibration in calibrations_with_ranges:
160+
try:
161+
print(f"Migrating calibration {calibration.id} (URN: {calibration.urn})...")
162+
163+
functional_ranges_data = calibration.functional_ranges_deprecated_json
164+
if not functional_ranges_data or not isinstance(functional_ranges_data, list):
165+
print(f" Skipping calibration {calibration.id} - no valid functional ranges data")
166+
continue
167+
168+
# Create functional classification rows for each range
169+
functional_classifications = []
170+
for i, functional_range in enumerate(functional_ranges_data):
171+
try:
172+
functional_classification = migrate_functional_range_to_row(
173+
db, calibration, functional_range, acmg_classification_cache
174+
)
175+
db.add(functional_classification)
176+
functional_classifications.append(functional_classification)
177+
print(f" Created functional classification row {i+1}/{len(functional_ranges_data)}")
178+
179+
except Exception as e:
180+
print(f" Error migrating functional range {i+1} for calibration {calibration.id}: {e}")
181+
error_count += 1
182+
continue
183+
184+
# Flush to get IDs for the functional classifications
185+
db.flush()
186+
187+
# Populate variant associations for each functional classification
188+
total_associations = 0
189+
for functional_classification in functional_classifications:
190+
try:
191+
associations_count = populate_variant_associations(
192+
db, functional_classification, calibration
193+
)
194+
total_associations += associations_count
195+
196+
except Exception as e:
197+
print(f" Error populating variant associations for functional classification {functional_classification.id}: {e}")
198+
error_count += 1
199+
continue
200+
201+
print(f" Created {total_associations} variant associations")
202+
203+
# Commit the changes for this calibration
204+
db.commit()
205+
migrated_count += 1
206+
print(f" Successfully migrated calibration {calibration.id}")
207+
208+
except Exception as e:
209+
print(f"Error migrating calibration {calibration.id}: {e}")
210+
db.rollback()
211+
error_count += 1
212+
continue
213+
214+
# Final statistics
215+
total_functional_classifications = db.scalar(
216+
sa.select(sa.func.count(ScoreCalibrationFunctionalClassification.id))
217+
)
218+
219+
total_associations = db.scalar(
220+
sa.select(sa.func.count()).select_from(
221+
score_calibration_functional_classification_variants_association_table
222+
)
223+
) or 0
224+
225+
print(f"\nMigration completed:")
226+
print(f" Successfully migrated: {migrated_count} calibrations")
227+
print(f" Functional classification rows created: {total_functional_classifications}")
228+
print(f" Variant associations created: {total_associations}")
229+
print(f" ACMG classifications created: {len(acmg_classification_cache)}")
230+
print(f" Errors encountered: {error_count}")
231+
232+
233+
def verify_migration(db: Session):
234+
"""Verify that the migration was successful."""
235+
print("\nVerifying migration...")
236+
237+
# Count original calibrations with functional ranges
238+
original_count = db.scalar(
239+
sa.select(sa.func.count(ScoreCalibration.id)).where(
240+
ScoreCalibration.functional_ranges_deprecated_json.isnot(None)
241+
)
242+
)
243+
244+
# Count migrated functional classifications
245+
migrated_count = db.scalar(
246+
sa.select(sa.func.count(ScoreCalibrationFunctionalClassification.id))
247+
)
248+
249+
# Count ACMG classifications
250+
acmg_count = db.scalar(
251+
sa.select(sa.func.count(ACMGClassification.id))
252+
)
253+
254+
# Count variant associations
255+
association_count = db.scalar(
256+
sa.select(sa.func.count()).select_from(
257+
score_calibration_functional_classification_variants_association_table
258+
)
259+
)
260+
261+
print(f"Original calibrations with functional ranges: {original_count}")
262+
print(f"Migrated functional classification rows: {migrated_count}")
263+
print(f"ACMG classification records: {acmg_count}")
264+
print(f"Variant associations created: {association_count}")
265+
266+
# Sample verification - check that relationships work
267+
sample_classification = db.scalar(
268+
sa.select(ScoreCalibrationFunctionalClassification).limit(1)
269+
)
270+
271+
if sample_classification:
272+
print(f"\nSample verification:")
273+
print(f" Functional classification ID: {sample_classification.id}")
274+
print(f" Label: {sample_classification.label}")
275+
print(f" Classification: {sample_classification.classification}")
276+
print(f" Range: {sample_classification.range}")
277+
print(f" Calibration ID: {sample_classification.calibration_id}")
278+
print(f" ACMG classification ID: {sample_classification.acmg_classification_id}")
279+
280+
# Count variants associated with this classification
281+
variant_count = db.scalar(
282+
sa.select(sa.func.count()).select_from(
283+
score_calibration_functional_classification_variants_association_table
284+
).where(
285+
score_calibration_functional_classification_variants_association_table.c.functional_classification_id == sample_classification.id
286+
)
287+
)
288+
print(f" Associated variants: {variant_count}")
289+
290+
# Functional classifications by type
291+
classification_stats = db.execute(
292+
sa.select(
293+
ScoreCalibrationFunctionalClassification.classification,
294+
sa.func.count().label('count')
295+
).group_by(ScoreCalibrationFunctionalClassification.classification)
296+
).all()
297+
298+
for classification, count in classification_stats:
299+
print(f"{classification}: {count} ranges")
300+
301+
302+
303+
def rollback_migration(db: Session):
304+
"""Rollback the migration by deleting all migrated data."""
305+
print("Rolling back migration...")
306+
307+
# Count records before deletion
308+
functional_count = db.scalar(
309+
sa.select(sa.func.count(ScoreCalibrationFunctionalClassification.id))
310+
)
311+
312+
acmg_count = db.scalar(
313+
sa.select(sa.func.count(ACMGClassification.id))
314+
)
315+
316+
association_count = db.scalar(
317+
sa.select(sa.func.count()).select_from(
318+
score_calibration_functional_classification_variants_association_table
319+
)
320+
)
321+
322+
# Delete in correct order (associations first, then functional classifications, then ACMG)
323+
db.execute(sa.delete(score_calibration_functional_classification_variants_association_table))
324+
db.execute(sa.delete(ScoreCalibrationFunctionalClassification))
325+
db.execute(sa.delete(ACMGClassification))
326+
db.commit()
327+
328+
print(f"Deleted {association_count} variant associations")
329+
print(f"Deleted {functional_count} functional classification rows")
330+
print(f"Deleted {acmg_count} ACMG classification rows")
331+
332+
333+
def show_usage():
334+
"""Show usage information."""
335+
print("""
336+
Usage: python migrate_jsonb_ranges_to_table_rows.py [command]
337+
338+
Commands:
339+
migrate (default) - Migrate JSONB functional_ranges to table rows
340+
verify - Verify migration without running it
341+
rollback - Remove all migrated data (destructive!)
342+
343+
Examples:
344+
python migrate_jsonb_ranges_to_table_rows.py # Run migration
345+
python migrate_jsonb_ranges_to_table_rows.py verify # Check status
346+
python migrate_jsonb_ranges_to_table_rows.py rollback # Undo migration
347+
""")
348+
349+
350+
if __name__ == "__main__":
351+
import sys
352+
353+
command = sys.argv[1] if len(sys.argv) > 1 else "migrate"
354+
355+
if command == "help" or command == "--help" or command == "-h":
356+
show_usage()
357+
elif command == "rollback":
358+
print("WARNING: This will delete all migrated functional classification data!")
359+
response = input("Are you sure you want to continue? (y/N): ")
360+
if response.lower() == 'y':
361+
with SessionLocal() as db:
362+
rollback_migration(db)
363+
else:
364+
print("Rollback cancelled.")
365+
elif command == "verify":
366+
with SessionLocal() as db:
367+
verify_migration(db)
368+
elif command == "migrate":
369+
with SessionLocal() as db:
370+
do_migration(db)
371+
verify_migration(db)
372+
else:
373+
print(f"Unknown command: {command}")
374+
show_usage()

0 commit comments

Comments
 (0)