From 213689b8c8215db45a2243d23498f3c2a4e06ede Mon Sep 17 00:00:00 2001 From: Joe Russack Date: Tue, 7 Apr 2026 21:38:39 -0700 Subject: [PATCH] perf: paginate delete blockers to avoid loading all related records (#7515) --- .../backend/delete_blockers/__init__.py | 0 .../backend/delete_blockers/tests/__init__.py | 0 .../delete_blockers/tests/test_pagination.py | 186 ++++++++++++++++++ specifyweb/backend/delete_blockers/views.py | 41 ++-- .../__tests__/useDeleteBlockers.test.tsx | 4 +- 5 files changed, 213 insertions(+), 18 deletions(-) create mode 100644 specifyweb/backend/delete_blockers/__init__.py create mode 100644 specifyweb/backend/delete_blockers/tests/__init__.py create mode 100644 specifyweb/backend/delete_blockers/tests/test_pagination.py diff --git a/specifyweb/backend/delete_blockers/__init__.py b/specifyweb/backend/delete_blockers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/specifyweb/backend/delete_blockers/tests/__init__.py b/specifyweb/backend/delete_blockers/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/specifyweb/backend/delete_blockers/tests/test_pagination.py b/specifyweb/backend/delete_blockers/tests/test_pagination.py new file mode 100644 index 00000000000..02513411f97 --- /dev/null +++ b/specifyweb/backend/delete_blockers/tests/test_pagination.py @@ -0,0 +1,186 @@ +""" +Tests for paginated delete blockers (#7515). + +The old implementation used Django's Collector.collect() which loads ALL +related objects into memory. For a Taxon like "Life" with 900K+ +determinations this causes OOM kills. + +The fix replaces Collector with targeted COUNT + limited-ID queries per +PROTECT relationship, returning total_count alongside a capped ids list. +""" + +from django.test import TestCase + +from specifyweb.specify.models import ( + Collection, + Collectionobject, + Datatype, + Determination, + Discipline, + Division, + Geographytreedef, + Geographytreedefitem, + Geologictimeperiodtreedef, + Institution, + Taxon, + Taxontreedef, + Taxontreedefitem, +) +from specifyweb.backend.delete_blockers.views import _collect_delete_blockers + + +class TestDeleteBlockerPagination(TestCase): + """Verify that _collect_delete_blockers returns total_count and caps ids.""" + + @classmethod + def setUpTestData(cls): + """Build a minimal Specify object graph with many determinations.""" + cls.institution = Institution.objects.create( + name='Test Institution', + isaccessionsglobal=True, + issecurityon=False, + isserverbased=False, + issharinglocalities=True, + issinglegeographytree=True, + ) + cls.division = Division.objects.create( + institution=cls.institution, name='Test Division', + ) + cls.geographytreedef = Geographytreedef.objects.create(name='Test gtd') + cls.geographytreedefitem = Geographytreedefitem.objects.create( + treedef=cls.geographytreedef, name='Planet', rankid=0, + ) + cls.geologictimeperiodtreedef = Geologictimeperiodtreedef.objects.create( + name='Test gtptd', + ) + cls.taxontreedef = Taxontreedef.objects.create(name='Test ttd') + cls.taxontreedefitem = Taxontreedefitem.objects.create( + treedef=cls.taxontreedef, name='Life', rankid=0, + ) + cls.datatype = Datatype.objects.create(name='Test datatype') + cls.discipline = Discipline.objects.create( + division=cls.division, + datatype=cls.datatype, + geologictimeperiodtreedef=cls.geologictimeperiodtreedef, + geographytreedef=cls.geographytreedef, + taxontreedef=cls.taxontreedef, + type='fish', + ) + cls.collection = Collection.objects.create( + catalognumformatname='test', + collectionname='TestCollection', + discipline=cls.discipline, + isembeddedcollectingevent=False, + ) + + # The taxon we will try to "delete" -- has many determinations + cls.taxon = Taxon.objects.create( + definition=cls.taxontreedef, + definitionitem=cls.taxontreedefitem, + name='Life', + rankid=0, + ) + + # Create 150 collection objects each with a determination pointing + # to cls.taxon. This exceeds the id_limit of 100 so we can verify + # pagination. + cos = Collectionobject.objects.bulk_create([ + Collectionobject( + catalognumber=str(i).zfill(9), + collection=cls.collection, + collectionmemberid=cls.collection.id, + ) + for i in range(150) + ]) + Determination.objects.bulk_create([ + Determination( + collectionobject=co, + collectionmemberid=cls.collection.id, + taxon=cls.taxon, + iscurrent=True, + ) + for co in cos + ]) + + def test_total_count_present(self): + """Every blocker entry must include total_count.""" + using = 'default' + result = _collect_delete_blockers(self.taxon, using) + for entry in result: + self.assertIn( + 'total_count', entry, + f"Missing total_count in blocker for " + f"{entry.get('table')}.{entry.get('field')}", + ) + + def test_total_count_reflects_true_count(self): + """total_count must reflect the real number of blocking rows.""" + using = 'default' + result = _collect_delete_blockers(self.taxon, using) + det_blocker = next( + (e for e in result + if e['table'] == 'Determination' and e['field'] == 'taxon'), + None, + ) + self.assertIsNotNone(det_blocker, 'Expected a Determination/taxon blocker') + self.assertEqual(det_blocker['total_count'], 150) + + def test_ids_capped_at_limit(self): + """ids list must not exceed the default limit of 100.""" + using = 'default' + result = _collect_delete_blockers(self.taxon, using) + det_blocker = next( + (e for e in result + if e['table'] == 'Determination' and e['field'] == 'taxon'), + None, + ) + self.assertIsNotNone(det_blocker) + self.assertLessEqual(len(det_blocker['ids']), 100) + + def test_small_blocker_has_exact_ids(self): + """When blocker count is below limit, ids lists all of them and + total_count equals len(ids).""" + taxon2 = Taxon.objects.create( + definition=self.taxontreedef, + definitionitem=self.taxontreedefitem, + name='SmallTaxon', + rankid=0, + ) + cos = Collectionobject.objects.bulk_create([ + Collectionobject( + catalognumber=str(i + 9000).zfill(9), + collection=self.collection, + collectionmemberid=self.collection.id, + ) + for i in range(3) + ]) + Determination.objects.bulk_create([ + Determination( + collectionobject=co, + collectionmemberid=self.collection.id, + taxon=taxon2, + iscurrent=True, + ) + for co in cos + ]) + + using = 'default' + result = _collect_delete_blockers(taxon2, using) + det_blocker = next( + (e for e in result + if e['table'] == 'Determination' and e['field'] == 'taxon'), + None, + ) + self.assertIsNotNone(det_blocker) + self.assertEqual(det_blocker['total_count'], 3) + self.assertEqual(len(det_blocker['ids']), 3) + + def test_backward_compatible_keys(self): + """Response must still contain table, field, and ids.""" + using = 'default' + result = _collect_delete_blockers(self.taxon, using) + self.assertGreater(len(result), 0, 'Expected at least one blocker') + for entry in result: + self.assertIn('table', entry) + self.assertIn('field', entry) + self.assertIn('ids', entry) diff --git a/specifyweb/backend/delete_blockers/views.py b/specifyweb/backend/delete_blockers/views.py index b579dc0defa..e811803ac67 100644 --- a/specifyweb/backend/delete_blockers/views.py +++ b/specifyweb/backend/delete_blockers/views.py @@ -1,6 +1,5 @@ from django import http from django.db import router, transaction -from django.db.models.deletion import Collector from specifyweb.middleware.general import require_http_methods from specifyweb.specify.api.crud import ( @@ -9,6 +8,7 @@ prepare_discipline_for_delete, ) from specifyweb.specify.api.serializers import toJson +from specifyweb.specify.models import protect_with_blockers from specifyweb.specify.views import login_maybe_required @login_maybe_required @@ -39,19 +39,28 @@ def delete_blockers(request, model, id): return http.HttpResponse(toJson(result), content_type='application/json') -def _collect_delete_blockers(obj, using) -> list[dict]: - collector = Collector(using=using) - collector.delete_blockers = [] - collector.collect([obj]) - return flatten([ - [ - { - 'table': sub_objs[0].__class__.__name__, - 'field': field.name, - 'ids': [sub_obj.id for sub_obj in sub_objs] - } - ] for field, sub_objs in collector.delete_blockers - ]) +def _collect_delete_blockers(obj, using, id_limit=100) -> list[dict]: + """Find PROTECT-related objects that block deletion of *obj*. -def flatten(l): - return [item for sublist in l for item in sublist] \ No newline at end of file + Instead of using Django's Collector (which loads every related row + into memory), we iterate the model's reverse relations, run a COUNT + query for each PROTECT relationship, and fetch at most *id_limit* + primary keys. This keeps memory bounded regardless of how many + blocking rows exist (#7515). + """ + result = [] + for related in obj._meta.related_objects: + if related.on_delete is not protect_with_blockers: + continue + qs = related.related_model.objects.using(using).filter( + **{related.field.name: obj.pk}) + total = qs.count() + if total > 0: + ids = list(qs.values_list('pk', flat=True)[:id_limit]) + result.append({ + 'table': related.related_model.__name__, + 'field': related.field.name, + 'ids': ids, + 'total_count': total, + }) + return result diff --git a/specifyweb/frontend/js_src/lib/hooks/__tests__/useDeleteBlockers.test.tsx b/specifyweb/frontend/js_src/lib/hooks/__tests__/useDeleteBlockers.test.tsx index 9429b0399a4..34c289c90ed 100644 --- a/specifyweb/frontend/js_src/lib/hooks/__tests__/useDeleteBlockers.test.tsx +++ b/specifyweb/frontend/js_src/lib/hooks/__tests__/useDeleteBlockers.test.tsx @@ -66,7 +66,7 @@ overrideAjax( isnot: false, isprompt: null, isrelfld: false, - isstrict: false, + isstrict: undefined, modifiedbyagent: null, operend: null, operstart: 8, @@ -95,7 +95,7 @@ overrideAjax( isnot: false, isprompt: null, isrelfld: false, - isstrict: false, + isstrict: undefined, modifiedbyagent: null, operend: null, operstart: 1,