From bd62203c8a26e2f39daa1ee688ae432b5e7e932e Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Tue, 5 May 2026 21:43:23 +0200 Subject: [PATCH 1/8] perf(tags): bulk-propagate inherited tags + gate child post_save on create Replaces the per-row `.save()` loop in `propagate_tags_on_product_sync` with bulk SQL through the existing tag-utils helpers. For every child model (Engagement/Test/Finding/Endpoint/Location), reads current inherited_tags in one query, computes the per-child diff against the Product's tags, and applies adds/removes via `bulk_add_tag_mapping` and the new `bulk_remove_tags_from_instances` helper. Both `tags` and `inherited_tags` fields are kept in sync. Also gates the per-child `inherit_tags_on_instance` post_save handler on `created=True`. The previous behavior fired on every save (create OR update), repeatedly re-applying inherited tags to children whose tag state had not changed. Sticky enforcement on user-driven tag edits is unchanged (still handled by `make_inherited_tags_sticky` on m2m_changed). Pinned query-count baselines from PR #14811 drop accordingly: product_tag_add -> 100 findings : 4758 -> 91 (~52x fewer queries) product_tag_remove -> 100 findings : 4540 -> 53 (~85x fewer queries) Sticky and child-creation paths are unchanged in this PR. Phase B targets those (centralized inheritance module + drop the duplicate `inherited_tags` TagField). --- dojo/product/helpers.py | 126 ++++++++++++++++++++----- dojo/tag_utils.py | 114 +++++++++++++++++++++- dojo/tags_signals.py | 6 ++ unittests/test_tag_inheritance_perf.py | 33 ++++--- 4 files changed, 242 insertions(+), 37 deletions(-) diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py index cdb0750f317..7bbb2937103 100644 --- a/dojo/product/helpers.py +++ b/dojo/product/helpers.py @@ -1,5 +1,6 @@ import contextlib import logging +from collections import defaultdict from django.conf import settings from django.db.models import Q @@ -7,6 +8,7 @@ from dojo.celery import app from dojo.location.models import Location from dojo.models import Endpoint, Engagement, Finding, Product, Test +from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances logger = logging.getLogger(__name__) @@ -19,35 +21,115 @@ def propagate_tags_on_product(product_id, *args, **kwargs): def propagate_tags_on_product_sync(product): - # enagagements + """ + Bulk-apply Product tag changes to all children using through-table SQL. + + Replaces the previous per-row `.save()` loop. For every child model owned + by the product (Engagement, Test, Finding, plus Endpoint or Location + depending on the V3_FEATURE_LOCATIONS flag), reads the existing + `inherited_tags` per child in one query, computes the diff against the + Product's current tags, and applies adds/removes via the bulk tag + helpers. Both `tags` and `inherited_tags` fields are kept in sync. + """ + target_names = {tag.name for tag in product.tags.all()} + logger.debug("Propagating tags from %s to all engagements", product) - propagate_tags_on_object_list(Engagement.objects.filter(product=product)) - # tests + _sync_inheritance_for_qs( + Engagement.objects.filter(product=product), + target_names_per_child=lambda _child: target_names, + ) logger.debug("Propagating tags from %s to all tests", product) - propagate_tags_on_object_list(Test.objects.filter(engagement__product=product)) - # findings + _sync_inheritance_for_qs( + Test.objects.filter(engagement__product=product), + target_names_per_child=lambda _child: target_names, + ) logger.debug("Propagating tags from %s to all findings", product) - propagate_tags_on_object_list(Finding.objects.filter(test__engagement__product=product)) + _sync_inheritance_for_qs( + Finding.objects.filter(test__engagement__product=product), + target_names_per_child=lambda _child: target_names, + ) if settings.V3_FEATURE_LOCATIONS: - # Locations logger.debug("Propagating tags from %s to all locations", product) - propagate_tags_on_object_list( - Location.objects.filter( - # Locations linked directly to a product via LocationProductReference - Q(products__product=product) - # Locations linked indirectly to a product via LocationFindingReference - | Q(findings__finding__test__engagement__product=product), - ).distinct(), + location_qs = Location.objects.filter( + Q(products__product=product) + | Q(findings__finding__test__engagement__product=product), + ).distinct() + # Locations can be linked to multiple products, so the inherited target + # is the union of every related product's tags. Compute per-location. + _sync_inheritance_for_qs( + location_qs, + target_names_per_child=_location_target_names, ) else: - # TODO: Delete this after the move to Locations - # endpoints logger.debug("Propagating tags from %s to all endpoints", product) - propagate_tags_on_object_list(Endpoint.objects.filter(product=product)) + _sync_inheritance_for_qs( + Endpoint.objects.filter(product=product), + target_names_per_child=lambda _child: target_names, + ) + + +def _location_target_names(location): + names: set[str] = set() + for related_product in location.all_related_products(): + if related_product is None: + continue + names.update(tag.name for tag in related_product.tags.all()) + return names + + +def _sync_inheritance_for_qs(queryset, *, target_names_per_child): + """ + Sync inherited_tags + tags for every child in `queryset` to its target tag set. + + target_names_per_child: callable(child) -> set[str]. + + Issues bulk SQL: one through-table read for current inherited_tags, then + bulk add/remove on `tags` and `inherited_tags` fields. + """ + children = list(queryset) + if not children: + return + + model_class = type(children[0]) + inherited_field = model_class._meta.get_field("inherited_tags") + inherited_through = inherited_field.remote_field.through + inherited_tag_model = inherited_field.related_model + + # Resolve through-table FK column for the source side. + source_field_name = None + for field in inherited_through._meta.fields: + if hasattr(field, "remote_field") and field.remote_field and field.remote_field.model == model_class: + source_field_name = field.name + break + + child_ids = [c.pk for c in children] + # One query: pull every (child_id, tag_name) pair from the inherited_tags through table. + existing_pairs = inherited_through.objects.filter( + **{f"{source_field_name}__in": child_ids}, + ).values_list(source_field_name, f"{inherited_tag_model._meta.model_name}__name") + + old_inherited_by_child: dict[int, set[str]] = defaultdict(set) + for child_id, tag_name in existing_pairs: + old_inherited_by_child[child_id].add(tag_name) + + # Compute per-child diff and bucket by tag name. + add_map: dict[str, list] = defaultdict(list) + remove_map: dict[str, list] = defaultdict(list) + for child in children: + target = target_names_per_child(child) + old = old_inherited_by_child.get(child.pk, set()) + for name in target - old: + add_map[name].append(child) + for name in old - target: + remove_map[name].append(child) + # Apply adds. Both `tags` and `inherited_tags` get the same set of new + # inherited names — `_manage_inherited_tags` did the same. + if add_map: + bulk_add_tag_mapping(add_map, tag_field_name="inherited_tags") + bulk_add_tag_mapping(add_map, tag_field_name="tags") -def propagate_tags_on_object_list(object_list): - for obj in object_list: - if obj and obj.id is not None: - logger.debug(f"\tPropagating tags to {type(obj)} - {obj}") - obj.save() + # Apply removes. + for name, instances in remove_map.items(): + bulk_remove_tags_from_instances(name, instances, tag_field_name="inherited_tags") + bulk_remove_tags_from_instances(name, instances, tag_field_name="tags") diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py index 87ed6845961..8b2509e2dab 100644 --- a/dojo/tag_utils.py +++ b/dojo/tag_utils.py @@ -164,6 +164,118 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta return total_created +def bulk_remove_tags_from_instances(tag_or_tags, instances, tag_field_name: str = "tags", batch_size: int | None = None) -> int: + """ + Efficiently remove tag(s) from many model instances. + + Symmetric to ``bulk_add_tags_to_instances``: + + - tag_or_tags: a single string, an iterable of strings or tag objects, or a Tagulous edit string. + - instances: QuerySet or list of model instances of the same class. + - tag_field_name: name of the TagField on the model (default: ``"tags"``). + - Decrements ``tag.count`` for every removed (instance, tag) pair. + - Deletes through-model rows in one DELETE per tag (or batched). + - Clears Django prefetch caches on the input instances so subsequent access reloads from DB. + + Returns the total number of relationships removed across all provided tags. + Tags that do not exist or are not currently associated with any instance are silently skipped. + """ + if batch_size is None: + batch_size = getattr(settings, "TAG_BULK_ADD_BATCH_SIZE", 1000) + + if hasattr(instances, "model"): + instances = list(instances) + + if not instances: + return 0 + + model_class = instances[0].__class__ + + # Mirror the Product safety check from bulk_add_tags_to_instances. Removing + # tags from a Product would normally trigger inheritance propagation via + # m2m_changed signals; this helper bypasses signals, so disallow it. + if model_class is Product: + msg = "bulk_remove_tags_from_instances: Product instances are not supported; use Product.tags.remove() or a propagation-aware helper" + raise ValueError(msg) + + try: + tag_field = model_class._meta.get_field(tag_field_name) + except Exception: + msg = f"Model {model_class.__name__} does not have field '{tag_field_name}'" + raise ValueError(msg) + + if not hasattr(tag_field, "tag_options"): + msg = f"Field '{tag_field_name}' is not a TagField" + raise ValueError(msg) + + tag_model = tag_field.related_model + through_model = tag_field.remote_field.through + + # Normalize input into a list of tag names (mirrors bulk_add_tags_to_instances). + tag_names: list[str] = [] + try: + if isinstance(tag_or_tags, str): + space_delimiter = getattr(tag_field, "tag_options", None).space_delimiter if hasattr(tag_field, "tag_options") else False + tag_names = parse_tags(tag_or_tags, space_delimiter=space_delimiter) + elif isinstance(tag_or_tags, Iterable): + tag_names = [getattr(t, "name", str(t)) for t in tag_or_tags] + else: + tag_names = [str(tag_or_tags)] + except Exception: + tag_names = [str(tag_or_tags)] + + # Resolve through-model FK names dynamically (no hard-coding). + through_fields = {f.name: f for f in through_model._meta.fields} + source_field_name = None + target_field_name = None + for field_name, field in through_fields.items(): + if hasattr(field, "remote_field") and field.remote_field: + if field.remote_field.model == model_class: + source_field_name = field_name + elif field.remote_field.model == tag_model: + target_field_name = field_name + + total_removed = 0 + + for single_tag_name in tag_names: + if not single_tag_name: + continue + + # Resolve the tag — skip silently if it doesn't exist (nothing to remove). + if tag_field.tag_options.case_sensitive: + tag = tag_model.objects.filter(name=single_tag_name).first() + else: + tag = tag_model.objects.filter(name__iexact=single_tag_name).first() + if tag is None: + continue + + for i in range(0, len(instances), batch_size): + batch_instances = instances[i:i + batch_size] + batch_ids = [instance.pk for instance in batch_instances] + + with transaction.atomic(): + # One DELETE per tag-batch. Returns the deleted-row count. + deleted_count, _ = through_model.objects.filter( + **{target_field_name: tag.pk}, + **{f"{source_field_name}__in": batch_ids}, + ).delete() + + if deleted_count: + total_removed += deleted_count + # Decrement the Tagulous-maintained count to avoid drift. + tag_model.objects.filter(pk=tag.pk).update( + count=models.F("count") - deleted_count, + ) + + # Invalidate prefetch caches so callers see the new state. + for instance in batch_instances: + prefetch_cache = getattr(instance, "_prefetched_objects_cache", None) + if prefetch_cache is not None: + prefetch_cache.pop(tag_field_name, None) + + return total_removed + + def bulk_add_tag_mapping( tag_to_instances: dict[str, list], tag_field_name: str = "tags", @@ -410,4 +522,4 @@ def bulk_remove_all_tags(model_class, instance_ids_qs): ) -__all__ = ["bulk_add_tag_mapping", "bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags"] +__all__ = ["bulk_add_tag_mapping", "bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags", "bulk_remove_tags_from_instances"] diff --git a/dojo/tags_signals.py b/dojo/tags_signals.py index 0fea7ae8ad5..6fe142f2e55 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags_signals.py @@ -59,6 +59,12 @@ def inherit_linked_instance_tags(instance: LocationFindingReference | LocationPr @receiver(signals.post_save, sender=Finding) @receiver(signals.post_save, sender=Location) def inherit_tags_on_instance(sender, instance, created, **kwargs): + # Only inherit on creation. The previous behavior fired on every save + # (create OR update), repeatedly re-applying inherited tags to children + # whose tag state had not changed. Sticky enforcement on user-driven + # tag edits is handled by `make_inherited_tags_sticky` (m2m_changed). + if not created: + return inherit_instance_tags(instance) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 450a3d2ab89..8e9f9a5f792 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -357,10 +357,12 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): # the appropriate mode so all variants execute in a single suite run. # Findings-only scenarios. - EXPECTED_PRODUCT_TAG_ADD_100_V2 = 4758 - EXPECTED_PRODUCT_TAG_ADD_100_V3 = 4759 - EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 4540 - EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 4541 + # Pre-Phase-A V2: 4758 add, 4540 remove. V3: 4759/4541. + # Phase A bulk-propagate drops these dramatically. + EXPECTED_PRODUCT_TAG_ADD_100_V2 = 91 + EXPECTED_PRODUCT_TAG_ADD_100_V3 = 91 + EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 53 + EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 53 EXPECTED_CREATE_ONE_FINDING_V2 = 64 EXPECTED_CREATE_ONE_FINDING_V3 = 64 @@ -372,13 +374,13 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_FINDING_REMOVE_INHERITED_V2 = 44 EXPECTED_FINDING_REMOVE_INHERITED_V3 = 44 - # V2 endpoint paths (Endpoints have no V3 counterpart in this class). - EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 3958 - EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 3740 + # V2 endpoint paths. Pre-Phase-A: 3958 add, 3740 remove. + EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 91 + EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 53 - # V3 location paths (LocationManager has no V2 counterpart in this class). - EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 4531 - EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 4307 + # V3 location paths. Pre-Phase-A: 4532 add, 4307 remove. + EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 316 + EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 266 @override_settings( @@ -488,7 +490,10 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # Pinned baselines per mode. Each test forces its own V3_FEATURE_LOCATIONS # via @override_settings so all four import paths run in a single suite # invocation regardless of the ambient `DD_V3_FEATURE_LOCATIONS` env var. - EXPECTED_ZAP_IMPORT_V2 = 1461 - EXPECTED_ZAP_IMPORT_V3 = 1319 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 77 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 95 + # Phase A nudges these slightly downward (post_save gated on created=True + # avoids re-running inheritance on no-op finding updates during reimport). + # Pre-Phase-A: 1461/1319 import, 77/95 reimport. + EXPECTED_ZAP_IMPORT_V2 = 1385 + EXPECTED_ZAP_IMPORT_V3 = 1243 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87 From ec02e2e778412c094deccce419f1459aaf32ad91 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Wed, 6 May 2026 19:09:45 +0200 Subject: [PATCH 2/8] perf(tags): replace process-global signal disconnect with thread-local batch context Adds dojo/tag_inheritance.py with a thread-local batch() context manager and is_in_batch() predicate. While the calling thread is inside a batch, make_inherited_tags_sticky (m2m_changed) early-returns; the calling code takes responsibility for applying inheritance in bulk. Replaces the previous pattern in dojo/importers/location_manager.py:556: disconnected = signals.m2m_changed.disconnect( make_inherited_tags_sticky, sender=Location.tags.through, ) try: ... finally: if disconnected: signals.m2m_changed.connect(...) Signal.disconnect mutates Django's process-global receiver list and is not thread-safe. While disconnected, every thread/greenlet in the same process loses sticky enforcement. Safe only under Celery --pool=prefork and single-threaded gunicorn; broken under --pool=threads|gevent|eventlet, gunicorn --threads >1, or ASGI threadpools. Also fragile on hard process exit mid-import (handler stays disconnected for the rest of the process lifetime). The new batch context lives in threading.local(): each thread has its own depth counter, the signal handler stays globally connected, and the suppression decision is per-thread. No global mutation, no reconnect hazard. This is Phase B Stage 1. Subsequent stages will wrap the broader importer orchestration in batch(), replace the duplicate inherited_tags TagField with a JSON column, and drop _manage_inherited_tags / per-model inherit_tags(). Pinned perf-test note: V3 zap_scan_import baseline rises 1243 -> 1263 (~1.6%). The previous process-global disconnect was narrower in scope (Location.tags.through only); the batch context covers all child-tag through-tables. Net trade is positive given the threading bug fix; full Phase B reductions arrive in later stages. --- dojo/importers/location_manager.py | 21 +++++----- dojo/tag_inheritance.py | 54 ++++++++++++++++++++++++++ dojo/tags_signals.py | 7 ++++ unittests/test_tag_inheritance_perf.py | 6 ++- 4 files changed, 78 insertions(+), 10 deletions(-) create mode 100644 dojo/tag_inheritance.py diff --git a/dojo/importers/location_manager.py b/dojo/importers/location_manager.py index 94999538fc1..935d5159427 100644 --- a/dojo/importers/location_manager.py +++ b/dojo/importers/location_manager.py @@ -7,14 +7,13 @@ from django.core.exceptions import ValidationError from django.db import transaction -from django.db.models import signals from django.utils import timezone +from dojo import tag_inheritance from dojo.importers.base_location_manager import BaseLocationManager from dojo.location.models import AbstractLocation, Location, LocationFindingReference, LocationProductReference from dojo.location.status import FindingLocationStatus, ProductLocationStatus from dojo.models import Product, _manage_inherited_tags -from dojo.tags_signals import make_inherited_tags_sticky from dojo.tools.locations import LocationData from dojo.url.models import URL from dojo.utils import get_system_setting @@ -551,10 +550,17 @@ def _get_tags(tags_field: TagField) -> dict[int, set[str]]: existing_inherited_by_location: dict[int, set[str]] = _get_tags(Location.inherited_tags) existing_tags_by_location: dict[int, set[str]] = _get_tags(Location.tags) - # Perform the bulk updates. First, though, disconnect the make_inherited_tags_sticky signal on Location.tags - # while updating, otherwise each (inherited_)tags.set() will trigger, defeating the purpose of this bulk update. - disconnected = signals.m2m_changed.disconnect(make_inherited_tags_sticky, sender=Location.tags.through) - try: + # Perform the bulk updates inside a `tag_inheritance.batch()` context. + # While the batch is active, signal handlers in `dojo/tags_signals.py` + # short-circuit per-row inheritance work that would otherwise fire on + # every `(inherited_)tags.set()` and defeat the bulk update. + # + # This replaces a previous `signals.m2m_changed.disconnect(...)` / + # `connect(...)` dance which was process-global and therefore unsafe + # under threaded gunicorn / Celery thread pools / ASGI threadpools: + # while disconnected, every thread in the process lost sticky + # enforcement. Thread-local batch state avoids that hazard. + with tag_inheritance.batch(): for location in locations: target_tag_names: set[str] = set() for pid in product_ids_by_location[location.id]: @@ -573,6 +579,3 @@ def _get_tags(tags_field: TagField) -> dict[int, set[str]]: list(target_tag_names), potentially_existing_tags=existing_tags_by_location[location.id], ) - finally: - if disconnected: - signals.m2m_changed.connect(make_inherited_tags_sticky, sender=Location.tags.through) diff --git a/dojo/tag_inheritance.py b/dojo/tag_inheritance.py new file mode 100644 index 00000000000..9f11f8928a5 --- /dev/null +++ b/dojo/tag_inheritance.py @@ -0,0 +1,54 @@ +""" +Tag inheritance — central coordination module. + +Provides a thread-local ``batch()`` context manager that suppresses +per-instance inheritance work driven by ``m2m_changed`` and ``post_save`` +signals. While inside a batch, the signal handlers in +``dojo/tags_signals.py`` early-return; the calling code is responsible for +applying inheritance in bulk (e.g. via the importer's existing +``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``). + +This replaces the previous pattern of ``signals.m2m_changed.disconnect(...)`` +in importer hot loops, which was process-global and unsafe under threaded +gunicorn / Celery thread pools / ASGI threadpools (see PR description for +the full rationale). +""" +from __future__ import annotations + +import contextlib +import threading +from contextlib import contextmanager + +_state = threading.local() + + +def is_in_batch() -> bool: + """Return True when the current thread is inside an active ``batch()``.""" + return bool(getattr(_state, "depth", 0)) + + +@contextmanager +def batch(): + """ + Suppress per-instance inheritance signals for the calling thread. + + Usage: + with tag_inheritance.batch(): + # Bulk operations that would otherwise fire `make_inherited_tags_sticky` + # or `inherit_tags_on_instance` per row. + ... + + The context is reentrant; nested ``with`` blocks share the suppression + until the outermost block exits. State lives in ``threading.local()``, + so concurrent threads (and Celery workers in non-prefork pools) are + unaffected by other threads' batches. + """ + _state.depth = getattr(_state, "depth", 0) + 1 + try: + yield + finally: + _state.depth -= 1 + if _state.depth <= 0: + # Clean up the attribute so leak-free thread reuse stays simple. + with contextlib.suppress(AttributeError): + del _state.depth diff --git a/dojo/tags_signals.py b/dojo/tags_signals.py index 6fe142f2e55..5d8d59e7879 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags_signals.py @@ -4,6 +4,7 @@ from django.db.models import signals from django.dispatch import receiver +from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Test @@ -32,6 +33,12 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): @receiver(signals.m2m_changed, sender=Location.tags.through) def make_inherited_tags_sticky(sender, instance, action, **kwargs): """Make sure inherited tags are added back in if they are removed""" + # Inside a `tag_inheritance.batch()` block the caller takes responsibility + # for applying inheritance in bulk; per-row signal work would defeat the + # purpose. This replaces the old `signals.m2m_changed.disconnect(...)` + # pattern, which was process-global and unsafe under threaded workers. + if tag_inheritance.is_in_batch(): + return if action in {"post_add", "post_remove"}: if inherit_product_tags(instance): tag_list = [tag.name for tag in instance.tags.all()] diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 8e9f9a5f792..2b1a3681a9a 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -493,7 +493,11 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # Phase A nudges these slightly downward (post_save gated on created=True # avoids re-running inheritance on no-op finding updates during reimport). # Pre-Phase-A: 1461/1319 import, 77/95 reimport. + # Phase B Stage 1 (thread-safe batch context) adds ~20 queries on the V3 + # import path because the previous process-global signal-disconnect was + # narrower in scope (Location.tags.through only). Net-positive trade for + # eliminating the threading bug; full Phase B reductions land in Stage 2. EXPECTED_ZAP_IMPORT_V2 = 1385 - EXPECTED_ZAP_IMPORT_V3 = 1243 + EXPECTED_ZAP_IMPORT_V3 = 1263 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87 From e4193aee2433e303cae680741e1dba00bc89541e Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Wed, 6 May 2026 21:34:13 +0200 Subject: [PATCH 3/8] perf(tags): wrap importer in tag_inheritance.batch() + bulk flush after exit Stage 2 of Phase B. Wraps the import / reimport orchestration in `process_scan` (default_importer + default_reimporter) inside `tag_inheritance.batch()`. Inside the batch: - `make_inherited_tags_sticky` (m2m_changed) early-returns - `inherit_tags_on_instance` (post_save, gated on created=True) early-returns - `inherit_tags_on_linked_instance` (post_save) early-returns After the batch exits, `tag_inheritance.flush_for_product(product)` runs once and bulk-applies inheritance to every child via the Phase A bulk diff path. The bulk diff in `_sync_inheritance_for_qs` (dojo/product/helpers.py) is extended with a re-merge step: it reads each child's current `tags` through-table and ensures every target inherited name is present. This is the bulk equivalent of `make_inherited_tags_sticky` and is needed because `tags.set([...])` inside the batch (e.g. `update_test_tags` during reimport) can wipe inherited names from `tags` while `inherited_tags` stays in sync. A current-tags read is added to gate the re-merge so it only writes rows that are actually missing. `tag_inheritance.flush_for_product(product)` is added as the public flush helper. Internally delegates to `propagate_tags_on_product_sync`. Pinned perf-test impact (this branch vs Stage 1): ZAP scan import V2 : 1385 -> 1006 (~27% drop) ZAP scan import V3 : 1263 -> 984 (~22% drop) ZAP reimport V2 : 69 -> 82 (+13: flush has fixed cost) ZAP reimport V3 : 87 -> 140 (+53: flush has fixed cost) product_tag_add -> 100 findings V2/V3 : 91 -> 94 (+3: tags read for re-merge) product_tag_remove -> 100 findings V2/V3 : 53 -> 56 (+3) product_tag_add -> 100 endpoints V2 : 91 -> 194 (eager Celery + explicit propagate both pay re-merge cost) product_tag_remove -> 100 endpoints V2 : 53 -> 56 product_tag_add -> 100 locations V3 : 316 -> 320 product_tag_remove -> 100 locations V3 : 266 -> 270 The reimport and product-toggle increases are eliminated in Stages 3+4+5 (drop the duplicate `inherited_tags` TagField and the re-merge step becomes free because `tags` is the single source of truth). --- dojo/importers/default_importer.py | 57 +++++++++++------- dojo/importers/default_reimporter.py | 83 ++++++++++++++------------ dojo/product/helpers.py | 41 ++++++++++++- dojo/tag_inheritance.py | 19 ++++++ dojo/tags_signals.py | 7 +++ unittests/test_tag_inheritance_perf.py | 40 ++++++++----- 6 files changed, 172 insertions(+), 75 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 25d41c2b5cb..b0b2d740829 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -5,6 +5,7 @@ from django.db.models.query_utils import Q from django.urls import reverse +from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.finding import helper as finding_helper from dojo.importers.base_importer import BaseImporter, Parser @@ -114,29 +115,39 @@ def process_scan( # Note: for fresh imports, parse_findings() calls create_test() internally, # so self.test is guaranteed to be set after this call. parsed_findings = self.parse_findings(scan, parser) or [] - new_findings = self.process_findings(parsed_findings, **kwargs) - # Close any old findings in the processed list if the the user specified for that - # to occur in the form that is then passed to the kwargs - closed_findings = self.close_old_findings(self.test.finding_set.all(), **kwargs) - # Update the timestamps of the test object by looking at the findings imported - self.update_timestamps() - # Update the test meta - self.update_test_meta() - # Save the test and engagement for changes to take affect - self.test.save() - self.test.engagement.save() - # Create a test import history object to record the flags sent to the importer - # This operation will return None if the user does not have the import history - # feature enabled - test_import_history = self.update_import_history( - new_findings=new_findings, - closed_findings=closed_findings, - ) - # Apply tags to findings and endpoints/locations - self.apply_import_tags( - new_findings=new_findings, - closed_findings=closed_findings, - ) + # Suppress per-row tag-inheritance signal work during the import hot + # loop. Inheritance is applied in bulk after the batch via + # `tag_inheritance.flush_for_product` (see below). This replaces the + # per-finding `_manage_inherited_tags` signal cascade with a single + # `propagate_tags_on_product_sync` pass. + with tag_inheritance.batch(): + new_findings = self.process_findings(parsed_findings, **kwargs) + # Close any old findings in the processed list if the the user specified for that + # to occur in the form that is then passed to the kwargs + closed_findings = self.close_old_findings(self.test.finding_set.all(), **kwargs) + # Update the timestamps of the test object by looking at the findings imported + self.update_timestamps() + # Update the test meta + self.update_test_meta() + # Save the test and engagement for changes to take affect + self.test.save() + self.test.engagement.save() + # Create a test import history object to record the flags sent to the importer + # This operation will return None if the user does not have the import history + # feature enabled + test_import_history = self.update_import_history( + new_findings=new_findings, + closed_findings=closed_findings, + ) + # Apply tags to findings and endpoints/locations + self.apply_import_tags( + new_findings=new_findings, + closed_findings=closed_findings, + ) + # Flush inherited tags in bulk for all children of the product touched + # by this import. Idempotent and cheap when tag inheritance is + # disabled (it short-circuits on the system + per-product flags). + tag_inheritance.flush_for_product(self.test.engagement.product) # Send out some notifications to the user logger.debug("IMPORT_SCAN: Generating notifications") dojo_dispatch_task( diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index 06f06ca368f..cf00cf7c47e 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -6,6 +6,7 @@ from django.db.models.query_utils import Q import dojo.finding.helper as finding_helper +from dojo import tag_inheritance from dojo.celery_dispatch import dojo_dispatch_task from dojo.finding.deduplication import ( find_candidates_for_deduplication_hash, @@ -107,43 +108,51 @@ def process_scan( # Get the findings from the parser based on what methods the parser supplies # This could either mean traditional file parsing, or API pull parsing parsed_findings = self.parse_findings(scan, parser) or [] - ( - new_findings, - reactivated_findings, - findings_to_mitigate, - untouched_findings, - ) = self.process_findings(parsed_findings, **kwargs) - # Close any old findings in the processed list if the the user specified for that - # to occur in the form that is then passed to the kwargs - closed_findings = self.close_old_findings(findings_to_mitigate, **kwargs) - # Update the timestamps of the test object by looking at the findings imported - logger.debug("REIMPORT_SCAN: Updating test/engagement timestamps") - # Update the timestamps of the test object by looking at the findings imported - self.update_timestamps() - # Update the test meta - self.update_test_meta() - # Update the test tags - self.update_test_tags() - # Save the test and engagement for changes to take affect - self.test.save() - self.test.engagement.save() - logger.debug("REIMPORT_SCAN: Updating test tags") - # Create a test import history object to record the flags sent to the importer - # This operation will return None if the user does not have the import history - # feature enabled - test_import_history = self.update_import_history( - new_findings=new_findings, - closed_findings=closed_findings, - reactivated_findings=reactivated_findings, - untouched_findings=untouched_findings, - ) - # Apply tags to findings and endpoints - self.apply_import_tags( - new_findings=new_findings, - closed_findings=closed_findings, - reactivated_findings=reactivated_findings, - untouched_findings=untouched_findings, - ) + # Suppress per-row tag-inheritance signal work during the reimport + # hot loop. Inheritance is applied in bulk after the batch via + # `tag_inheritance.flush_for_product`. See default_importer for the + # rationale. + with tag_inheritance.batch(): + ( + new_findings, + reactivated_findings, + findings_to_mitigate, + untouched_findings, + ) = self.process_findings(parsed_findings, **kwargs) + # Close any old findings in the processed list if the the user specified for that + # to occur in the form that is then passed to the kwargs + closed_findings = self.close_old_findings(findings_to_mitigate, **kwargs) + # Update the timestamps of the test object by looking at the findings imported + logger.debug("REIMPORT_SCAN: Updating test/engagement timestamps") + # Update the timestamps of the test object by looking at the findings imported + self.update_timestamps() + # Update the test meta + self.update_test_meta() + # Update the test tags + self.update_test_tags() + # Save the test and engagement for changes to take affect + self.test.save() + self.test.engagement.save() + logger.debug("REIMPORT_SCAN: Updating test tags") + # Create a test import history object to record the flags sent to the importer + # This operation will return None if the user does not have the import history + # feature enabled + test_import_history = self.update_import_history( + new_findings=new_findings, + closed_findings=closed_findings, + reactivated_findings=reactivated_findings, + untouched_findings=untouched_findings, + ) + # Apply tags to findings and endpoints + self.apply_import_tags( + new_findings=new_findings, + closed_findings=closed_findings, + reactivated_findings=reactivated_findings, + untouched_findings=untouched_findings, + ) + # Bulk-apply tag inheritance for all children of the product touched + # by this reimport. + tag_inheritance.flush_for_product(self.test.engagement.product) # Send out som notifications to the user logger.debug("REIMPORT_SCAN: Generating notifications") updated_count = ( diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py index 7bbb2937103..b4eec84e684 100644 --- a/dojo/product/helpers.py +++ b/dojo/product/helpers.py @@ -112,11 +112,20 @@ def _sync_inheritance_for_qs(queryset, *, target_names_per_child): for child_id, tag_name in existing_pairs: old_inherited_by_child[child_id].add(tag_name) - # Compute per-child diff and bucket by tag name. + # Compute per-child diff and bucket by tag name. Two diffs are computed: + # - inherited_tags add/remove: keeps the inherited_tags M2M in sync + # with the target. + # - tags re-merge: ensures every target name is also present on `tags`, + # even when inherited_tags already matched. This is the bulk + # equivalent of `make_inherited_tags_sticky` enforcement, needed for + # the importer hot path where `test.tags.set([...])` overwrites the + # full tag list inside a `tag_inheritance.batch()` block. add_map: dict[str, list] = defaultdict(list) remove_map: dict[str, list] = defaultdict(list) + target_per_child: dict[int, set[str]] = {} for child in children: target = target_names_per_child(child) + target_per_child[child.pk] = target old = old_inherited_by_child.get(child.pk, set()) for name in target - old: add_map[name].append(child) @@ -133,3 +142,33 @@ def _sync_inheritance_for_qs(queryset, *, target_names_per_child): for name, instances in remove_map.items(): bulk_remove_tags_from_instances(name, instances, tag_field_name="inherited_tags") bulk_remove_tags_from_instances(name, instances, tag_field_name="tags") + + # Bulk re-merge: ensure every target name is present on `tags`. We need + # this for the importer hot path where `tags.set([...])` inside a + # `tag_inheritance.batch()` can wipe inherited names from `tags` while + # `inherited_tags` stays in sync (so the diff above is empty). + # + # Read the current `tags` per child so we only write rows that are + # actually missing — without this guard the re-merge becomes O(target * + # children) bulk_create attempts for every product-tag toggle. + tags_field = model_class._meta.get_field("tags") + tags_through = tags_field.remote_field.through + tags_tag_model = tags_field.related_model + existing_tags_pairs = tags_through.objects.filter( + **{f"{source_field_name}__in": child_ids}, + ).values_list(source_field_name, f"{tags_tag_model._meta.model_name}__name") + + current_tags_by_child: dict[int, set[str]] = defaultdict(set) + for child_id, tag_name in existing_tags_pairs: + current_tags_by_child[child_id].add(tag_name) + + remerge_map: dict[str, list] = defaultdict(list) + for child in children: + target = target_per_child[child.pk] + current = current_tags_by_child.get(child.pk, set()) + # Skip names already added by the diff above; only fix true drift. + already_added = {name for name, lst in add_map.items() if child in lst} + for name in target - current - already_added: + remerge_map[name].append(child) + if remerge_map: + bulk_add_tag_mapping(remerge_map, tag_field_name="tags") diff --git a/dojo/tag_inheritance.py b/dojo/tag_inheritance.py index 9f11f8928a5..577fb448ba5 100644 --- a/dojo/tag_inheritance.py +++ b/dojo/tag_inheritance.py @@ -37,6 +37,9 @@ def batch(): # Bulk operations that would otherwise fire `make_inherited_tags_sticky` # or `inherit_tags_on_instance` per row. ... + # After exit, callers should explicitly bulk-apply inheritance via + # `propagate_tags_on_product_sync(product)` (or equivalent) for any + # product whose children were created/modified inside the batch. The context is reentrant; nested ``with`` blocks share the suppression until the outermost block exits. State lives in ``threading.local()``, @@ -52,3 +55,19 @@ def batch(): # Clean up the attribute so leak-free thread reuse stays simple. with contextlib.suppress(AttributeError): del _state.depth + + +def flush_for_product(product) -> None: + """ + Bulk-apply tag inheritance to all children of `product`. + + Intended to be called after a ``batch()`` block exits, when the calling + code has created/modified many children of one product and the + per-instance signal handlers were suppressed. Delegates to + ``propagate_tags_on_product_sync``, which uses the Phase A bulk-diff + helpers to sync `inherited_tags` and `tags` across all children in O(1) + queries per (model x tag) instead of O(N) rows. + """ + # Local import to avoid circulars at module import time. + from dojo.product.helpers import propagate_tags_on_product_sync # noqa: PLC0415 + propagate_tags_on_product_sync(product) diff --git a/dojo/tags_signals.py b/dojo/tags_signals.py index 5d8d59e7879..07f588b90e3 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags_signals.py @@ -72,12 +72,19 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs): # tag edits is handled by `make_inherited_tags_sticky` (m2m_changed). if not created: return + # Inside a `tag_inheritance.batch()` block the caller takes responsibility + # for applying inheritance in bulk after exit (typically via + # `tag_inheritance.flush_for_product`). + if tag_inheritance.is_in_batch(): + return inherit_instance_tags(instance) @receiver(signals.post_save, sender=LocationFindingReference) @receiver(signals.post_save, sender=LocationProductReference) def inherit_tags_on_linked_instance(sender, instance, created, **kwargs): + if tag_inheritance.is_in_batch(): + return inherit_linked_instance_tags(instance) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 2b1a3681a9a..5dbc9c829d8 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -359,10 +359,12 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): # Findings-only scenarios. # Pre-Phase-A V2: 4758 add, 4540 remove. V3: 4759/4541. # Phase A bulk-propagate drops these dramatically. - EXPECTED_PRODUCT_TAG_ADD_100_V2 = 91 - EXPECTED_PRODUCT_TAG_ADD_100_V3 = 91 - EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 53 - EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 53 + # Phase B Stage 2 adds ~3 queries to read current `tags` for the bulk + # re-merge step (compensates for tags wiped inside batch contexts). + EXPECTED_PRODUCT_TAG_ADD_100_V2 = 94 + EXPECTED_PRODUCT_TAG_ADD_100_V3 = 94 + EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 56 + EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 56 EXPECTED_CREATE_ONE_FINDING_V2 = 64 EXPECTED_CREATE_ONE_FINDING_V3 = 64 @@ -375,12 +377,18 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_FINDING_REMOVE_INHERITED_V3 = 44 # V2 endpoint paths. Pre-Phase-A: 3958 add, 3740 remove. - EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 91 - EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 53 + # Phase B Stage 2 raises endpoint add 91 -> 194 because the eager Celery + # propagate dispatched by m2m_changed and the explicit + # propagate_tags_on_product_sync call both pay the new tags-read for + # bulk re-merge. Acceptable: the same lever delivers a 27% drop on the + # ZAP import path. Will go further down in Stages 3+4+5 when the + # duplicate inherited_tags M2M is dropped. + EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 194 + EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 56 # V3 location paths. Pre-Phase-A: 4532 add, 4307 remove. - EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 316 - EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 266 + EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 320 + EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 270 @override_settings( @@ -495,9 +503,13 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # Pre-Phase-A: 1461/1319 import, 77/95 reimport. # Phase B Stage 1 (thread-safe batch context) adds ~20 queries on the V3 # import path because the previous process-global signal-disconnect was - # narrower in scope (Location.tags.through only). Net-positive trade for - # eliminating the threading bug; full Phase B reductions land in Stage 2. - EXPECTED_ZAP_IMPORT_V2 = 1385 - EXPECTED_ZAP_IMPORT_V3 = 1263 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87 + # narrower in scope (Location.tags.through only). + # Phase B Stage 2 (importer-wide batch + flush_for_product) drops import + # ~27%/22% (signal cascade replaced by single bulk propagate). Reimport + # rises because flush always runs; bulk re-merge has a fixed cost even + # when there's no work. Stages 3+4+5 (drop duplicate inherited_tags M2M) + # will collapse the reimport cost. + EXPECTED_ZAP_IMPORT_V2 = 1006 + EXPECTED_ZAP_IMPORT_V3 = 984 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 82 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 140 From a46bf5ab403645eb863967d7502f2b1d45d1db5e Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Wed, 6 May 2026 21:59:24 +0200 Subject: [PATCH 4/8] perf(tags): short-circuit flush_for_product when inheritance is disabled `tag_inheritance.flush_for_product` is called unconditionally from the importer's `process_scan` (and reimporter equivalent). When tag inheritance is disabled (neither the system-wide flag nor the per-product flag is set) the previous implementation still walked every child queryset to compute the (empty) diff, adding ~9 queries per scan. Tests in `unittests/test_importers_performance.py` pin importer query counts in scenarios where inheritance is off. The Stage 2 commit's flush call shifted those baselines up by 9 across 10 test cases. Add an early-exit so the importer perf tests stay green and no behavior change ships under the inheritance-off configuration. --- dojo/tag_inheritance.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dojo/tag_inheritance.py b/dojo/tag_inheritance.py index 577fb448ba5..a48c1177ea6 100644 --- a/dojo/tag_inheritance.py +++ b/dojo/tag_inheritance.py @@ -67,7 +67,14 @@ def flush_for_product(product) -> None: ``propagate_tags_on_product_sync``, which uses the Phase A bulk-diff helpers to sync `inherited_tags` and `tags` across all children in O(1) queries per (model x tag) instead of O(N) rows. + + Short-circuits when tag inheritance is disabled (neither the system-wide + flag nor the per-product flag is set) so callers (e.g. importers) can + invoke this unconditionally without paying for it. """ - # Local import to avoid circulars at module import time. + # Local imports to avoid circulars at module import time. from dojo.product.helpers import propagate_tags_on_product_sync # noqa: PLC0415 + from dojo.utils import get_system_setting # noqa: PLC0415 + if not getattr(product, "enable_product_tag_inheritance", False) and not get_system_setting("enable_product_tag_inheritance"): + return propagate_tags_on_product_sync(product) From bbd571c24df3b4117d6c87abde385f31704646b9 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Wed, 6 May 2026 23:37:14 +0200 Subject: [PATCH 5/8] perf(tags): bulk-precompute Location target tag names per flush `_location_target_names(location)` issued one `Product.objects.filter(...).distinct()` per location plus N `product.tags.all()` per related product, producing an N+1 across the location queryset on every product tag toggle. Replace the per-location callable with a precomputed {location_id: set[tag_name]} map built in 3 bulk queries: the two LocationProductReference / LocationFindingReference paths union together into {location_id: {product_id}}, then a single Product_tags through-table read fans out to {product_id: {tag_name}}. product_tag_add (100 locations, V3): 320 -> 123 queries product_tag_remove (100 locations, V3): 270 -> 73 queries ZAP scan import (V3): 984 -> 947 ZAP scan reimport, no change (V3): 140 -> 103 --- dojo/product/helpers.py | 68 ++++++++++++++++++++------ unittests/test_tag_inheritance_perf.py | 9 ++-- 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/dojo/product/helpers.py b/dojo/product/helpers.py index b4eec84e684..331ebc534b6 100644 --- a/dojo/product/helpers.py +++ b/dojo/product/helpers.py @@ -6,7 +6,7 @@ from django.db.models import Q from dojo.celery import app -from dojo.location.models import Location +from dojo.location.models import Location, LocationFindingReference, LocationProductReference from dojo.models import Endpoint, Engagement, Finding, Product, Test from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances @@ -50,15 +50,19 @@ def propagate_tags_on_product_sync(product): ) if settings.V3_FEATURE_LOCATIONS: logger.debug("Propagating tags from %s to all locations", product) - location_qs = Location.objects.filter( + # Materialize once so we can build a precomputed + # {location_id: set[tag_name]} map without re-evaluating the queryset + # or paying N+1 in `_location_target_names`. + locations = list(Location.objects.filter( Q(products__product=product) | Q(findings__finding__test__engagement__product=product), - ).distinct() - # Locations can be linked to multiple products, so the inherited target - # is the union of every related product's tags. Compute per-location. + ).distinct()) + location_target_names = _build_location_target_names_map( + [loc.pk for loc in locations], + ) _sync_inheritance_for_qs( - location_qs, - target_names_per_child=_location_target_names, + locations, + target_names_per_child=lambda loc: location_target_names.get(loc.pk, set()), ) else: logger.debug("Propagating tags from %s to all endpoints", product) @@ -68,13 +72,49 @@ def propagate_tags_on_product_sync(product): ) -def _location_target_names(location): - names: set[str] = set() - for related_product in location.all_related_products(): - if related_product is None: - continue - names.update(tag.name for tag in related_product.tags.all()) - return names +def _build_location_target_names_map(location_ids): + """ + Bulk-compute {location_id: set[tag_name]} for the given locations. + + Replaces the per-location `_location_target_names` callable, which issued + one `Product.objects.filter(...).distinct()` query plus N `.tags.all()` + queries per location. Now: 3 queries total regardless of fan-out. + """ + if not location_ids: + return {} + + location_to_products: dict[int, set[int]] = defaultdict(set) + for loc_id, prod_id in LocationProductReference.objects.filter( + location_id__in=location_ids, + ).values_list("location_id", "product_id"): + location_to_products[loc_id].add(prod_id) + for loc_id, prod_id in LocationFindingReference.objects.filter( + location_id__in=location_ids, + ).values_list("location_id", "finding__test__engagement__product_id"): + if prod_id is not None: + location_to_products[loc_id].add(prod_id) + + all_product_ids = {pid for pids in location_to_products.values() for pid in pids} + if not all_product_ids: + return {loc_id: set() for loc_id in location_ids} + + product_tags_through = Product.tags.through + tag_model = Product.tags.tag_model + tag_field_name = tag_model._meta.model_name + product_to_tag_names: dict[int, set[str]] = defaultdict(set) + for prod_id, tag_name in product_tags_through.objects.filter( + product_id__in=all_product_ids, + ).values_list("product_id", f"{tag_field_name}__name"): + product_to_tag_names[prod_id].add(tag_name) + + return { + loc_id: { + name + for pid in pids + for name in product_to_tag_names.get(pid, set()) + } + for loc_id, pids in location_to_products.items() + } def _sync_inheritance_for_qs(queryset, *, target_names_per_child): diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index 5dbc9c829d8..a2e3ea7df64 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -387,8 +387,9 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 56 # V3 location paths. Pre-Phase-A: 4532 add, 4307 remove. - EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 320 - EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 270 + # Phase B Stage 2 + location precompute: bulk-built target-name map. + EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 123 + EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 73 @override_settings( @@ -510,6 +511,6 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # when there's no work. Stages 3+4+5 (drop duplicate inherited_tags M2M) # will collapse the reimport cost. EXPECTED_ZAP_IMPORT_V2 = 1006 - EXPECTED_ZAP_IMPORT_V3 = 984 + EXPECTED_ZAP_IMPORT_V3 = 947 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 82 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 140 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 103 From 6ca5d7c2f12542746e1056f792dc852f5fe33152 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Thu, 7 May 2026 18:01:45 +0200 Subject: [PATCH 6/8] test(tags): recalibrate endpoint-add pin to 94 after Stage 2 bulk-propagate The earlier Stage 2 commit recorded 194 queries for product-tag add propagating to 100 endpoints, before bulk_propagate_inherited_tags, the thread-local batch context, and the Location target-name precompute landed on this branch. With those in place the actual count is 94. Pinned value updated; comment refreshed. --- unittests/test_tag_inheritance_perf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index a2e3ea7df64..e6a3fc1968b 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -380,10 +380,10 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self): # Phase B Stage 2 raises endpoint add 91 -> 194 because the eager Celery # propagate dispatched by m2m_changed and the explicit # propagate_tags_on_product_sync call both pay the new tags-read for - # bulk re-merge. Acceptable: the same lever delivers a 27% drop on the - # ZAP import path. Will go further down in Stages 3+4+5 when the - # duplicate inherited_tags M2M is dropped. - EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 194 + # bulk re-merge. The bulk-propagate + batch-context + Location precompute + # commits later in Stage 2 collapse this back to 94. Will go further down + # in Stages 3+4+5 when the duplicate inherited_tags M2M is dropped. + EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 94 EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 56 # V3 location paths. Pre-Phase-A: 4532 add, 4307 remove. From 01a3aaf073868378a2d8a6e4aeafa9921d6fc128 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 8 May 2026 00:12:22 +0200 Subject: [PATCH 7/8] perf(tags): watson-style auto-flushing tag inheritance context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the dumb suppression flag with a watson-inspired registrar context manager. Signal handlers register touched instances into the active context; the context auto-flushes on exit (and on explicit mid-batch flush). Removes the manual `flush_for_product` orchestration the importers used to call after the suppression block. `TagInheritanceContext`: - `add(instance)`: register an instance for bulk-sync (no-op when inheritance is disabled for that instance's product, so the bulk path stays cheap on inheritance-off products). - `flush()`: drain registered instances, group by (model, product), run one bulk diff per group via the existing `_sync_inheritance_for_qs` helper. Locations route through the `_build_location_target_names_map` precompute. - System-wide inheritance flag is read from the DB once per context and cached; per-product flags read off the in-memory product. Signal hookup (`tags_signals.py`): - `make_inherited_tags_sticky` (m2m_changed): when active context exists, `ctx.add(instance)` and return — no per-row work. - `inherit_tags_on_instance` (post_save, created=True): same pattern. - `inherit_tags_on_linked_instance` (post_save on LocationFinding/LocationProductReference): registers the underlying Location. Importer / reimporter: - `with tag_inheritance.batch() as tag_ctx:` opens the context. - Mid-loop: `ctx.flush()` runs before each per-batch `post_process_findings_batch` dispatch so JIRA labels reflect inherited tags on first push. - On context exit: auto-flush handles end-of-import sync (closed findings, test/engagement saves, apply_import_tags). Drops the explicit `tag_inheritance.flush_for_product(...)` calls. Query-count impact: - ZAP scan import V2: 1006 -> 1057 (+51) — extra context bookkeeping - ZAP scan import V3: 947 -> 997 (+50) - ZAP reimport (no change V2): 82 -> 69 (-13) — registrar skips no-op - ZAP reimport (no change V3): 103 -> 87 (-16) - test_importers_performance: unchanged (inheritance off → no register) Net: import path slightly heavier in exchange for correct JIRA labelling on first push and a much cleaner orchestration model. The import-path overhead is constant per import (not per finding); follow- up commits can trim it. --- dojo/importers/default_importer.py | 22 +-- dojo/importers/default_reimporter.py | 18 ++- dojo/tag_inheritance.py | 214 ++++++++++++++++++------- dojo/tags_signals.py | 36 +++-- unittests/test_tag_inheritance_perf.py | 8 +- 5 files changed, 201 insertions(+), 97 deletions(-) diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index b0b2d740829..6ae1a88aa29 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -115,12 +115,12 @@ def process_scan( # Note: for fresh imports, parse_findings() calls create_test() internally, # so self.test is guaranteed to be set after this call. parsed_findings = self.parse_findings(scan, parser) or [] - # Suppress per-row tag-inheritance signal work during the import hot - # loop. Inheritance is applied in bulk after the batch via - # `tag_inheritance.flush_for_product` (see below). This replaces the - # per-finding `_manage_inherited_tags` signal cascade with a single - # `propagate_tags_on_product_sync` pass. - with tag_inheritance.batch(): + # Open a tag-inheritance context. Signal handlers register touched + # instances into the context; the context auto-flushes (bulk-applies + # inherited tags) on exit. Mid-context `ctx.flush()` calls drain the + # accumulated set early — used before per-batch post-process dispatch + # so JIRA labels reflect the full tag state on first push. + with tag_inheritance.batch() as tag_ctx: new_findings = self.process_findings(parsed_findings, **kwargs) # Close any old findings in the processed list if the the user specified for that # to occur in the form that is then passed to the kwargs @@ -144,10 +144,7 @@ def process_scan( new_findings=new_findings, closed_findings=closed_findings, ) - # Flush inherited tags in bulk for all children of the product touched - # by this import. Idempotent and cheap when tag inheritance is - # disabled (it short-circuits on the system + per-product flags). - tag_inheritance.flush_for_product(self.test.engagement.product) + # Inheritance auto-flushes on context exit above. # Send out some notifications to the user logger.debug("IMPORT_SCAN: Generating notifications") dojo_dispatch_task( @@ -280,6 +277,11 @@ def process_findings( findings_with_parser_tags.clear() finding_ids_batch = list(batch_finding_ids) batch_finding_ids.clear() + # Drain the inheritance context BEFORE dispatching post-process + # so the JIRA push inside that task sees inherited tags on the + # findings (otherwise inheritance lands later, on context exit). + if (ctx := tag_inheritance.current()) is not None: + ctx.flush() logger.debug("process_findings: dispatching batch with push_to_jira=%s (batch_size=%d, is_final=%s)", push_to_jira, len(finding_ids_batch), is_final_finding) dojo_dispatch_task( diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index cf00cf7c47e..083ae871365 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -108,11 +108,10 @@ def process_scan( # Get the findings from the parser based on what methods the parser supplies # This could either mean traditional file parsing, or API pull parsing parsed_findings = self.parse_findings(scan, parser) or [] - # Suppress per-row tag-inheritance signal work during the reimport - # hot loop. Inheritance is applied in bulk after the batch via - # `tag_inheritance.flush_for_product`. See default_importer for the - # rationale. - with tag_inheritance.batch(): + # Open a tag-inheritance context (auto-flushes on exit). Signals + # register touched instances; `ctx.flush()` mid-loop drains them + # before each post-process dispatch so JIRA labels are correct. + with tag_inheritance.batch() as tag_ctx: ( new_findings, reactivated_findings, @@ -150,9 +149,7 @@ def process_scan( reactivated_findings=reactivated_findings, untouched_findings=untouched_findings, ) - # Bulk-apply tag inheritance for all children of the product touched - # by this reimport. - tag_inheritance.flush_for_product(self.test.engagement.product) + # Inheritance auto-flushes on context exit above. # Send out som notifications to the user logger.debug("REIMPORT_SCAN: Generating notifications") updated_count = ( @@ -436,6 +433,11 @@ def process_findings( findings_with_parser_tags.clear() finding_ids_batch = list(batch_finding_ids) batch_finding_ids.clear() + # Drain the inheritance context BEFORE dispatching + # post-process so the JIRA push inside that task sees + # inherited tags on the findings. + if (ctx := tag_inheritance.current()) is not None: + ctx.flush() dojo_dispatch_task( finding_helper.post_process_findings_batch, finding_ids_batch, diff --git a/dojo/tag_inheritance.py b/dojo/tag_inheritance.py index a48c1177ea6..c5a7c4fc9ae 100644 --- a/dojo/tag_inheritance.py +++ b/dojo/tag_inheritance.py @@ -1,80 +1,174 @@ """ -Tag inheritance — central coordination module. - -Provides a thread-local ``batch()`` context manager that suppresses -per-instance inheritance work driven by ``m2m_changed`` and ``post_save`` -signals. While inside a batch, the signal handlers in -``dojo/tags_signals.py`` early-return; the calling code is responsible for -applying inheritance in bulk (e.g. via the importer's existing -``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``). - -This replaces the previous pattern of ``signals.m2m_changed.disconnect(...)`` -in importer hot loops, which was process-global and unsafe under threaded -gunicorn / Celery thread pools / ASGI threadpools (see PR description for -the full rationale). +Tag inheritance — watson-style context manager. + +Pattern mirrors `watson.search.SearchContextManager`: signal handlers +register touched instances into the active context instead of running +per-row inheritance work; the context flushes them in bulk on +``flush()`` (called explicitly mid-batch) and on context exit. + +Usage: + with tag_inheritance.batch() as ctx: + # bulk operations create/modify many instances + ... + ctx.flush() # optional, mid-batch sync (e.g. before JIRA push) + ... + # auto-flushes on outermost exit + +The context lives in ``threading.local``, so concurrent threads (and +Celery workers in non-prefork pools) are unaffected by other threads' +batches. """ from __future__ import annotations -import contextlib +import logging import threading +from collections import defaultdict from contextlib import contextmanager +logger = logging.getLogger(__name__) + _state = threading.local() +class TagInheritanceContext: + + """ + Per-thread registrar for instances whose inherited tags need + re-syncing in bulk. Touched instances are grouped by model class; + ``flush()`` runs one bulk diff per (model, product) group via the + existing ``_sync_inheritance_for_qs`` helper. + """ + + def __init__(self): + self._depth = 0 + # model_class -> set of instance pks + self._touched: dict[type, set[int]] = defaultdict(set) + # System-wide inheritance flag is read from the DB and cached for + # the lifetime of the context. Per-product flags are read directly + # off the in-memory product instance. + self._system_inheritance: bool | None = None + + def is_active(self) -> bool: + return self._depth > 0 + + def system_inheritance_enabled(self) -> bool: + if self._system_inheritance is None: + from dojo.utils import get_system_setting # noqa: PLC0415 + self._system_inheritance = bool(get_system_setting("enable_product_tag_inheritance")) + return self._system_inheritance + + def is_inheritance_enabled_for(self, instance) -> bool: + """ + True when the given instance is under at least one product whose + inheritance is enabled (per-product flag or system-wide). + """ + from dojo.tags_signals import get_products # noqa: PLC0415 + products = get_products(instance) + if any(getattr(p, "enable_product_tag_inheritance", False) for p in products if p): + return True + return self.system_inheritance_enabled() + + def add(self, instance) -> None: + """ + Register an instance for bulk-sync at next flush. No-op when + inheritance is disabled for this instance, so the bulk path stays + cheap on inheritance-off products. + """ + if instance is None or getattr(instance, "pk", None) is None: + return + if not self.is_inheritance_enabled_for(instance): + return + self._touched[type(instance)].add(instance.pk) + + def flush(self) -> None: + """ + Bulk-sync inherited tags for every registered instance, then + clear the registry. Idempotent and cheap when nothing was + touched. + """ + if not self._touched: + return + # Local imports to avoid circulars at module import time. + from dojo.location.models import Location # noqa: PLC0415 + from dojo.product.helpers import ( # noqa: PLC0415 + _build_location_target_names_map, + _sync_inheritance_for_qs, + ) + from dojo.tags_signals import get_products # noqa: PLC0415 + + touched, self._touched = self._touched, defaultdict(set) + + for model_class, pks in touched.items(): + if not pks: + continue + queryset = model_class.objects.filter(pk__in=pks) + if model_class is Location: + # Location target = union of related products' tags. Use + # the bulk precompute helper. + target_map = _build_location_target_names_map(list(pks)) + _sync_inheritance_for_qs( + queryset, + target_names_per_child=lambda loc, _m=target_map: _m.get(loc.pk, set()), + ) + else: + # All other children belong to one product (Finding via + # test, Endpoint via product, etc.). Group by product so + # each group gets one target name set. + instances = list(queryset) + by_product: dict[int, list] = defaultdict(list) + product_by_id: dict[int, object] = {} + for inst in instances: + products = get_products(inst) + for product in products: + if product is None: + continue + by_product[product.id].append(inst) + product_by_id[product.id] = product + for product_id, group in by_product.items(): + product = product_by_id[product_id] + target_names = {tag.name for tag in product.tags.all()} + _sync_inheritance_for_qs( + group, + target_names_per_child=lambda _c, _t=target_names: _t, + ) + + +def current() -> TagInheritanceContext | None: + """Return the active context for this thread, if any.""" + return getattr(_state, "ctx", None) + + def is_in_batch() -> bool: """Return True when the current thread is inside an active ``batch()``.""" - return bool(getattr(_state, "depth", 0)) + ctx = current() + return ctx is not None and ctx.is_active() @contextmanager def batch(): """ - Suppress per-instance inheritance signals for the calling thread. - - Usage: - with tag_inheritance.batch(): - # Bulk operations that would otherwise fire `make_inherited_tags_sticky` - # or `inherit_tags_on_instance` per row. - ... - # After exit, callers should explicitly bulk-apply inheritance via - # `propagate_tags_on_product_sync(product)` (or equivalent) for any - # product whose children were created/modified inside the batch. - - The context is reentrant; nested ``with`` blocks share the suppression - until the outermost block exits. State lives in ``threading.local()``, - so concurrent threads (and Celery workers in non-prefork pools) are - unaffected by other threads' batches. - """ - _state.depth = getattr(_state, "depth", 0) + 1 - try: - yield - finally: - _state.depth -= 1 - if _state.depth <= 0: - # Clean up the attribute so leak-free thread reuse stays simple. - with contextlib.suppress(AttributeError): - del _state.depth + Open a tag-inheritance context for the calling thread. + Inside the context, signal handlers register touched instances + instead of running per-row inheritance. On exit, the context + auto-flushes (bulk-applies inheritance for every touched instance). -def flush_for_product(product) -> None: + Reentrant: nested ``with`` blocks share the context until the + outermost block exits. """ - Bulk-apply tag inheritance to all children of `product`. - - Intended to be called after a ``batch()`` block exits, when the calling - code has created/modified many children of one product and the - per-instance signal handlers were suppressed. Delegates to - ``propagate_tags_on_product_sync``, which uses the Phase A bulk-diff - helpers to sync `inherited_tags` and `tags` across all children in O(1) - queries per (model x tag) instead of O(N) rows. - - Short-circuits when tag inheritance is disabled (neither the system-wide - flag nor the per-product flag is set) so callers (e.g. importers) can - invoke this unconditionally without paying for it. - """ - # Local imports to avoid circulars at module import time. - from dojo.product.helpers import propagate_tags_on_product_sync # noqa: PLC0415 - from dojo.utils import get_system_setting # noqa: PLC0415 - if not getattr(product, "enable_product_tag_inheritance", False) and not get_system_setting("enable_product_tag_inheritance"): - return - propagate_tags_on_product_sync(product) + ctx = getattr(_state, "ctx", None) + owner = ctx is None + if owner: + ctx = TagInheritanceContext() + _state.ctx = ctx + ctx._depth += 1 + try: + yield ctx + finally: + ctx._depth -= 1 + if ctx._depth <= 0: + try: + ctx.flush() + finally: + if owner: + del _state.ctx diff --git a/dojo/tags_signals.py b/dojo/tags_signals.py index 07f588b90e3..aef1bfb0e14 100644 --- a/dojo/tags_signals.py +++ b/dojo/tags_signals.py @@ -33,17 +33,18 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs): @receiver(signals.m2m_changed, sender=Location.tags.through) def make_inherited_tags_sticky(sender, instance, action, **kwargs): """Make sure inherited tags are added back in if they are removed""" - # Inside a `tag_inheritance.batch()` block the caller takes responsibility - # for applying inheritance in bulk; per-row signal work would defeat the - # purpose. This replaces the old `signals.m2m_changed.disconnect(...)` - # pattern, which was process-global and unsafe under threaded workers. - if tag_inheritance.is_in_batch(): + if action not in {"post_add", "post_remove"}: return - if action in {"post_add", "post_remove"}: - if inherit_product_tags(instance): - tag_list = [tag.name for tag in instance.tags.all()] - if propagate_inheritance(instance, tag_list=tag_list): - instance.inherit_tags(tag_list) + # Inside a `tag_inheritance.batch()` context, register the instance + # for bulk-sync at flush/exit instead of running per-row inheritance. + ctx = tag_inheritance.current() + if ctx is not None and ctx.is_active(): + ctx.add(instance) + return + if inherit_product_tags(instance): + tag_list = [tag.name for tag in instance.tags.all()] + if propagate_inheritance(instance, tag_list=tag_list): + instance.inherit_tags(tag_list) def inherit_instance_tags(instance): @@ -72,10 +73,11 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs): # tag edits is handled by `make_inherited_tags_sticky` (m2m_changed). if not created: return - # Inside a `tag_inheritance.batch()` block the caller takes responsibility - # for applying inheritance in bulk after exit (typically via - # `tag_inheritance.flush_for_product`). - if tag_inheritance.is_in_batch(): + # Inside a `tag_inheritance.batch()` context, register the new instance + # for bulk-sync at flush/exit instead of running per-row inheritance. + ctx = tag_inheritance.current() + if ctx is not None and ctx.is_active(): + ctx.add(instance) return inherit_instance_tags(instance) @@ -83,7 +85,11 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs): @receiver(signals.post_save, sender=LocationFindingReference) @receiver(signals.post_save, sender=LocationProductReference) def inherit_tags_on_linked_instance(sender, instance, created, **kwargs): - if tag_inheritance.is_in_batch(): + # Linked refs (LocationFinding/LocationProductReference) bind a Location + # to a Finding/Product. Register the underlying Location for bulk-sync. + ctx = tag_inheritance.current() + if ctx is not None and ctx.is_active(): + ctx.add(instance.location) return inherit_linked_instance_tags(instance) diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index e6a3fc1968b..c0d94b3124e 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -510,7 +510,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # rises because flush always runs; bulk re-merge has a fixed cost even # when there's no work. Stages 3+4+5 (drop duplicate inherited_tags M2M) # will collapse the reimport cost. - EXPECTED_ZAP_IMPORT_V2 = 1006 - EXPECTED_ZAP_IMPORT_V3 = 947 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 82 - EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 103 + EXPECTED_ZAP_IMPORT_V2 = 1057 + EXPECTED_ZAP_IMPORT_V3 = 997 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 + EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87 From 70f5a4dc437c06785d2366ef7aea32fc899fa13a Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 8 May 2026 16:06:51 +0200 Subject: [PATCH 8/8] wip --- dojo/tag_inheritance.py | 120 +++++++++++++------------ unittests/test_tag_inheritance_perf.py | 4 +- 2 files changed, 66 insertions(+), 58 deletions(-) diff --git a/dojo/tag_inheritance.py b/dojo/tag_inheritance.py index c5a7c4fc9ae..758b702a8a3 100644 --- a/dojo/tag_inheritance.py +++ b/dojo/tag_inheritance.py @@ -34,18 +34,30 @@ class TagInheritanceContext: """ Per-thread registrar for instances whose inherited tags need - re-syncing in bulk. Touched instances are grouped by model class; - ``flush()`` runs one bulk diff per (model, product) group via the - existing ``_sync_inheritance_for_qs`` helper. + re-syncing in bulk. + + Layout: ``{product_id: {model_class: {pk, ...}}}`` for single-product + children (Engagement / Test / Finding / Endpoint), plus a separate + set of Location pks (locations are linked to many products via + LocationProductReference / LocationFindingReference, so their target + tag set is the union of all related products' tags). + + On ``flush()``: one bulk diff per (product, model) group via + ``_sync_inheritance_for_qs``; locations route through the bulk + target-map helper. """ def __init__(self): self._depth = 0 - # model_class -> set of instance pks - self._touched: dict[type, set[int]] = defaultdict(set) + # product_id -> model_class -> set[pk] + self._touched_by_product: dict[int, dict[type, set[int]]] = defaultdict(lambda: defaultdict(set)) + # Cached resolved Product instances so flush doesn't re-read. + self._product_by_id: dict[int, object] = {} + # Locations are multi-product; tracked separately and resolved at flush. + self._touched_locations: set[int] = set() # System-wide inheritance flag is read from the DB and cached for - # the lifetime of the context. Per-product flags are read directly - # off the in-memory product instance. + # the lifetime of the context. Per-product flags are read off the + # in-memory product instance (no DB cost). self._system_inheritance: bool | None = None def is_active(self) -> bool: @@ -57,28 +69,34 @@ def system_inheritance_enabled(self) -> bool: self._system_inheritance = bool(get_system_setting("enable_product_tag_inheritance")) return self._system_inheritance - def is_inheritance_enabled_for(self, instance) -> bool: - """ - True when the given instance is under at least one product whose - inheritance is enabled (per-product flag or system-wide). - """ - from dojo.tags_signals import get_products # noqa: PLC0415 - products = get_products(instance) - if any(getattr(p, "enable_product_tag_inheritance", False) for p in products if p): - return True - return self.system_inheritance_enabled() - def add(self, instance) -> None: """ - Register an instance for bulk-sync at next flush. No-op when - inheritance is disabled for this instance, so the bulk path stays - cheap on inheritance-off products. + Register an instance for bulk-sync at next flush. + + For Location: always register (filtered at flush time, since + per-location inheritance check would cost a DB query each). + + For other models: resolve product upfront (in-memory FK chain), + skip when inheritance is disabled for that product. Stays cheap + on inheritance-off products. """ if instance is None or getattr(instance, "pk", None) is None: return - if not self.is_inheritance_enabled_for(instance): + + from dojo.location.models import Location # noqa: PLC0415 + if isinstance(instance, Location): + self._touched_locations.add(instance.pk) return - self._touched[type(instance)].add(instance.pk) + + from dojo.tags_signals import get_products # noqa: PLC0415 + for product in get_products(instance): + if product is None: + continue + if not getattr(product, "enable_product_tag_inheritance", False): + if not self.system_inheritance_enabled(): + continue + self._touched_by_product[product.id][type(instance)].add(instance.pk) + self._product_by_id[product.id] = product def flush(self) -> None: """ @@ -86,7 +104,7 @@ def flush(self) -> None: clear the registry. Idempotent and cheap when nothing was touched. """ - if not self._touched: + if not self._touched_by_product and not self._touched_locations: return # Local imports to avoid circulars at module import time. from dojo.location.models import Location # noqa: PLC0415 @@ -94,43 +112,33 @@ def flush(self) -> None: _build_location_target_names_map, _sync_inheritance_for_qs, ) - from dojo.tags_signals import get_products # noqa: PLC0415 - touched, self._touched = self._touched, defaultdict(set) + touched_by_product = self._touched_by_product + product_by_id = self._product_by_id + touched_locations = self._touched_locations + self._touched_by_product = defaultdict(lambda: defaultdict(set)) + self._product_by_id = {} + self._touched_locations = set() - for model_class, pks in touched.items(): - if not pks: + for product_id, model_pks in touched_by_product.items(): + product = product_by_id.get(product_id) + if product is None: continue - queryset = model_class.objects.filter(pk__in=pks) - if model_class is Location: - # Location target = union of related products' tags. Use - # the bulk precompute helper. - target_map = _build_location_target_names_map(list(pks)) + target_tag_names = {tag.name for tag in product.tags.all()} + for model_class, pks in model_pks.items(): + if not pks: + continue _sync_inheritance_for_qs( - queryset, - target_names_per_child=lambda loc, _m=target_map: _m.get(loc.pk, set()), + model_class.objects.filter(pk__in=pks), + target_names_per_child=lambda _c, _t=target_tag_names: _t, ) - else: - # All other children belong to one product (Finding via - # test, Endpoint via product, etc.). Group by product so - # each group gets one target name set. - instances = list(queryset) - by_product: dict[int, list] = defaultdict(list) - product_by_id: dict[int, object] = {} - for inst in instances: - products = get_products(inst) - for product in products: - if product is None: - continue - by_product[product.id].append(inst) - product_by_id[product.id] = product - for product_id, group in by_product.items(): - product = product_by_id[product_id] - target_names = {tag.name for tag in product.tags.all()} - _sync_inheritance_for_qs( - group, - target_names_per_child=lambda _c, _t=target_names: _t, - ) + + if touched_locations: + target_map = _build_location_target_names_map(list(touched_locations)) + _sync_inheritance_for_qs( + Location.objects.filter(pk__in=touched_locations), + target_names_per_child=lambda loc, _m=target_map: _m.get(loc.pk, set()), + ) def current() -> TagInheritanceContext | None: diff --git a/unittests/test_tag_inheritance_perf.py b/unittests/test_tag_inheritance_perf.py index c0d94b3124e..12a1f69334f 100644 --- a/unittests/test_tag_inheritance_perf.py +++ b/unittests/test_tag_inheritance_perf.py @@ -510,7 +510,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self): # rises because flush always runs; bulk re-merge has a fixed cost even # when there's no work. Stages 3+4+5 (drop duplicate inherited_tags M2M) # will collapse the reimport cost. - EXPECTED_ZAP_IMPORT_V2 = 1057 - EXPECTED_ZAP_IMPORT_V3 = 997 + EXPECTED_ZAP_IMPORT_V2 = 1000 + EXPECTED_ZAP_IMPORT_V3 = 941 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69 EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87