From 9368d79c1e1f61c860f4724310a740e5309a1615 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 21 May 2026 21:30:30 -0400 Subject: [PATCH] fix(backend): thumbnail collisions, cancellable indexing, assert hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fragility items from the code review, batched because each is small but they share a router-or-helper touch point. ## Thumbnail cache filename collisions Previously the cache filename was derived from ``relative_path.replace("/", "_").replace("\\", "_")`` plus ``Path(...).stem``. Two real collision modes: * ``/a/b.jpg`` and ``/a_b.jpg`` mangled to the same ``_a_b`` stem. * ``cat.png`` and ``cat.jpg`` shared the same stem ``cat``. Either case meant the thumbnail of one image could be served for another. Switch to ``hashlib.blake2b(relative_path.encode(), digest_size=16).hexdigest()`` — collision probability for 128 bits of digest is effectively zero, and the full original path (including extension) is hashed so same-stem files don't alias. Existing cache files become orphaned on first request and are regenerated; no data loss. ## ``cancel_index_operation`` actually cancels now The old implementation called ``progress_tracker.set_error(..., "Operation cancelled by user")`` — which flipped the status the frontend polls but didn't stop the background indexing loop. The encoder kept burning GPU through the rest of the dataset and (in the create-new-index path) wrote the index file at the end. This change introduces cooperative cancellation: * ``ProgressTracker.request_cancel`` / ``is_cancel_requested`` — a set of pending album_keys, cleared by the next ``start_operation`` for the same key. * New ``IndexingCancelled`` exception in ``progress.py``. * ``_process_images_batch_async``'s per-image ``progress_cb`` polls the flag and raises ``IndexingCancelled`` at the next batch boundary — cancel-to-stop latency well under a second on any sane GPU. * ``_update_index_background_async`` catches ``IndexingCancelled`` cleanly (logs at INFO, not ERROR) so the background task summary doesn't look like a crash. * The cancel endpoint flips the status text immediately so the frontend's next poll sees the cancel even before the next batch. The status text the user sees is now "Indexing cancelled by user" instead of "Operation cancelled by user"; matches the language the inner layer uses on the exception path. ## Assert hardening Five ``assert``s converted to explicit raises so they survive ``python -O`` optimization: * ``embeddings.py:1011`` / ``:1087`` — preconditions on ``update_index`` / ``update_index_async`` that ``self.embeddings_path`` exists. Now ``raise FileNotFoundError`` with the same helpful message. * ``embeddings.py:1406`` — defensive ``isinstance(np.ndarray)`` check after ``_l2_normalize``. Now ``raise TypeError`` with the actual type. * ``invoke_metadata_view.py:137`` — ``_v5_control_layer_to_tuple``'s trust-the-caller assert that ``layer.control_adapter`` isn't None. Now ``raise ValueError`` — the caller filter (in ``_V5Strategy``) still drops None layers first, so the raise is only reachable if that contract is broken. * ``filetree.py:96`` — type-narrowing assert that ``ROOT_DIR`` is set on non-Windows. Now ``raise RuntimeError`` with an explicit message about ``PHOTOMAP_ALBUM_ROOT``. * ``search.py:221`` — ``relative_path is not None`` from ``get_relative_path``. Now ``raise HTTPException(500, ...)`` since it's a server-side mis-configuration. Co-Authored-By: Claude Opus 4.7 (1M context) --- photomap/backend/embeddings.py | 36 +++++++++++++------ .../invoke/invoke_metadata_view.py | 8 ++++- photomap/backend/progress.py | 32 +++++++++++++++++ photomap/backend/routers/filetree.py | 8 ++++- photomap/backend/routers/index.py | 20 +++++++++-- photomap/backend/routers/search.py | 26 ++++++++------ 6 files changed, 105 insertions(+), 25 deletions(-) diff --git a/photomap/backend/embeddings.py b/photomap/backend/embeddings.py index 6f5c37c0..5a8ebcfb 100644 --- a/photomap/backend/embeddings.py +++ b/photomap/backend/embeddings.py @@ -43,7 +43,7 @@ from .metadata_extraction import MetadataExtractor from .metadata_formatting import format_metadata from .metadata_modules import SlideSummary -from .progress import progress_tracker +from .progress import IndexingCancelled, progress_tracker from .util import atomic_savez logger = logging.getLogger(__name__) @@ -701,9 +701,19 @@ async def _process_images_batch_async( Serialized by :data:`_indexing_semaphore` so two concurrent album indexes don't both hold an encoder in VRAM at the same time. + + The progress callback also enforces cooperative cancellation: when + ``progress_tracker.is_cancel_requested(album_key)`` becomes True the + callback raises :class:`IndexingCancelled`, which the + ``_process_images_batch`` worker propagates back out so the rest of + the dataset isn't encoded and no partial index is written. The + per-image callback boundary is fine-grained enough to keep + cancel-to-stop latency well below a second on a typical GPU. """ def progress_cb(i: int, total: int, message: str) -> None: + if progress_tracker.is_cancel_requested(album_key): + raise IndexingCancelled("Indexing cancelled by user") progress_tracker.update_progress(album_key, i, message) async with _get_indexing_semaphore(): @@ -1008,9 +1018,11 @@ def update_index( num_workers: int = DEFAULT_NUM_WORKERS, ) -> IndexResult | None: """Update existing embeddings with new images.""" - assert ( - self.embeddings_path.exists() - ), f"Embeddings file {self.embeddings_path} does not exist. Please create an index first." + if not self.embeddings_path.exists(): + raise FileNotFoundError( + f"Embeddings file {self.embeddings_path} does not exist. " + "Please create an index first." + ) try: existing = self._load_existing_index_arrays() @@ -1084,9 +1096,11 @@ async def update_index_async( num_workers: int = DEFAULT_NUM_WORKERS, ) -> IndexResult | None: """Asynchronously update existing embeddings with new images.""" - assert ( - self.embeddings_path.exists() - ), f"Embeddings file {self.embeddings_path} does not exist. Please create an index first." + if not self.embeddings_path.exists(): + raise FileNotFoundError( + f"Embeddings file {self.embeddings_path} does not exist. " + "Please create an index first." + ) try: existing = self._load_existing_index_arrays() @@ -1403,9 +1417,11 @@ def find_duplicate_clusters(self, similarity_threshold=0.995): # Normalize embeddings. ``_l2_normalize`` carries an epsilon guard so # an all-zero row can't produce NaN here. norm_embeddings = _l2_normalize(embeddings, axis=-1) - assert isinstance( - norm_embeddings, np.ndarray - ), "Normalization failed, expected np.ndarray" + if not isinstance(norm_embeddings, np.ndarray): + raise TypeError( + f"_l2_normalize returned {type(norm_embeddings).__name__}, " + "expected np.ndarray" + ) # Use NearestNeighbors with cosine metric nn = NearestNeighbors(metric="cosine", algorithm="brute") diff --git a/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py b/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py index 87fb702e..5c5ab972 100644 --- a/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py +++ b/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py @@ -134,7 +134,13 @@ def _control_adapter_to_tuple(ca: ControlAdapter) -> ControlLayerTuple: def _v5_control_layer_to_tuple(layer: V5ControlLayer) -> ControlLayerTuple: ca = layer.control_adapter - assert ca is not None # caller filters this + if ca is None: + # Belt-and-braces guard — ``_V5Strategy.control_layers`` already + # filters layers whose ``control_adapter`` is None before calling + # in, so reaching this branch means the contract was broken + # upstream. Raise instead of asserting so the check survives + # ``python -O``. + raise ValueError("V5 control layer has no control_adapter") return ControlLayerTuple( model_name=ca.model.name if ca.model else "", image_name=_image_name(ca.image), diff --git a/photomap/backend/progress.py b/photomap/backend/progress.py index a9d51896..39882eb1 100644 --- a/photomap/backend/progress.py +++ b/photomap/backend/progress.py @@ -23,6 +23,12 @@ class IndexStatus(Enum): ERROR = "error" +class IndexingCancelled(Exception): + """Raised by ``_process_images_batch`` when a cancel was requested via + :meth:`ProgressTracker.request_cancel`. The async indexing wrappers + catch it and finish cleanly instead of writing a partial index.""" + + @dataclass class ProgressInfo: album_key: str @@ -64,6 +70,12 @@ class ProgressTracker: def __init__(self): self._progress: dict[str, ProgressInfo] = {} + # Album keys with an outstanding cancellation request. The indexing + # loop polls this each batch via ``is_cancel_requested`` and raises + # ``IndexingCancelled`` so the work stops before the next forward + # pass, instead of running to completion only to have the status + # flipped to ERROR after. + self._cancel_requested: set[str] = set() self._lock = threading.Lock() def start_operation(self, album_key: str, total_images: int, operation_type: str): @@ -77,6 +89,9 @@ def start_operation(self, album_key: str, total_images: int, operation_type: str total_images=total_images, start_time=time.time(), ) + # A new run clears any stale cancel flag from a previous job + # that finished or errored before the user pressed cancel. + self._cancel_requested.discard(album_key) def update_total_images(self, album_key: str, total_images: int): """Update the total number of images for an operation.""" @@ -116,6 +131,23 @@ def remove_progress(self, album_key: str): """Remove progress tracking for an album.""" with self._lock: self._progress.pop(album_key, None) + self._cancel_requested.discard(album_key) + + def request_cancel(self, album_key: str) -> None: + """Signal the indexing loop to stop on its next batch boundary. + + The actual cancellation is cooperative — see ``IndexingCancelled``. + Set even if no operation is currently running (e.g. cancel hits just + as indexing finishes); ``start_operation`` clears the flag the next + time the album is indexed. + """ + with self._lock: + self._cancel_requested.add(album_key) + + def is_cancel_requested(self, album_key: str) -> bool: + """Return True if a cancel was requested for ``album_key``.""" + with self._lock: + return album_key in self._cancel_requested def is_running(self, album_key: str) -> bool: """Check if an operation is currently running for an album.""" diff --git a/photomap/backend/routers/filetree.py b/photomap/backend/routers/filetree.py index f78dc800..4f54c9fc 100644 --- a/photomap/backend/routers/filetree.py +++ b/photomap/backend/routers/filetree.py @@ -93,7 +93,13 @@ async def get_directories(path: str = "", show_hidden: bool = False): else: dir_path = Path(path) else: - assert ROOT_DIR is not None + # On non-Windows ROOT_DIR is set via the module-level fallback + # (``os.environ.get("PHOTOMAP_ALBUM_ROOT", "/")``), so it's + # guaranteed truthy here. Make that contract explicit so a + # future env-handling change can't silently slip through under + # ``python -O`` where ``assert`` is stripped. + if not ROOT_DIR: + raise RuntimeError("PHOTOMAP_ALBUM_ROOT must be set on this platform") if not path: dir_path = Path(ROOT_DIR) else: diff --git a/photomap/backend/routers/index.py b/photomap/backend/routers/index.py index d26df45a..a9ba0f2b 100644 --- a/photomap/backend/routers/index.py +++ b/photomap/backend/routers/index.py @@ -15,7 +15,7 @@ from ..config import get_config_manager from ..embeddings import Embeddings, peek_encoder_spec -from ..progress import progress_tracker +from ..progress import IndexingCancelled, progress_tracker from .album import ( AlbumDep, EmbeddingsDep, @@ -196,7 +196,14 @@ async def get_index_progress( @index_router.delete("/cancel_index/{album_key}", tags=["Index"]) async def cancel_index_operation(album_key: str, album_config: AlbumDep) -> dict: - """Cancel an ongoing index operation.""" + """Cancel an ongoing index operation. + + Sets the cooperative-cancel flag in ``progress_tracker``; the indexing + loop polls it between batches via the per-image progress callback and + raises :class:`IndexingCancelled`, which the background task catches + cleanly. The status text is flipped here too so the frontend's poll + sees the cancel immediately, even before the next batch boundary. + """ del album_config # See get_index_progress above. try: if not progress_tracker.is_running(album_key): @@ -204,7 +211,8 @@ async def cancel_index_operation(album_key: str, album_config: AlbumDep) -> dict status_code=404, detail=f"No active operation for album '{album_key}'" ) - progress_tracker.set_error(album_key, "Operation cancelled by user") + progress_tracker.request_cancel(album_key) + progress_tracker.set_error(album_key, "Indexing cancelled by user") return { "success": True, @@ -509,6 +517,12 @@ async def _update_index_background_async(album_key: str, album_config): logger.info(f"Index update completed for album '{album_key}'") + except IndexingCancelled as e: + # User-requested cancellation isn't a failure — the inner layers + # already called ``set_error`` with the friendly message; log at + # info level so the background task summary doesn't look like a + # crash. + logger.info(f"Index update cancelled for album '{album_key}': {e}") except Exception as e: logger.error(f"Background index update failed for album '{album_key}': {e}") progress_tracker.set_error(album_key, str(e)) diff --git a/photomap/backend/routers/search.py b/photomap/backend/routers/search.py index a55a736a..a823ab9a 100644 --- a/photomap/backend/routers/search.py +++ b/photomap/backend/routers/search.py @@ -6,6 +6,7 @@ """ import base64 +import hashlib import json import re import zipfile @@ -218,16 +219,21 @@ async def serve_thumbnail( thumb_dir.mkdir(exist_ok=True) relative_path = config_manager.get_relative_path(str(image_path), album_key) - assert relative_path is not None, "Relative path should not be None" - safe_rel_path = relative_path.replace("/", "_").replace("\\", "_") - thumb_path = thumb_dir / f"{Path(safe_rel_path).stem}_{size}.png" - - # If color is specified, add it to the thumbnail filename to cache separately - if color: - color_hex = color.replace("#", "") - thumb_path = ( - thumb_dir / f"{Path(safe_rel_path).stem}_{size}_{color_hex}_r{radius}.png" - ) + if relative_path is None: + # ``get_relative_path`` returns ``None`` only when the image falls + # outside every configured ``image_paths`` entry — i.e. an album + # mis-configuration, not a user-supplied bad input. + raise HTTPException(status_code=500, detail="Image path is not inside the album") + + # Hash the full relative_path (including extension) so structurally + # different paths can't collapse to the same cache filename. The prior + # implementation ran ``.replace("/", "_")`` + ``Path(...).stem``, which + # collided ``/a/b.jpg`` with ``/a_b.jpg`` (same mangled name) and + # ``a.png`` with ``a.jpg`` (same stem) — both observable cache-poisoning + # bugs. blake2b-128 makes collisions effectively impossible. + rel_hash = hashlib.blake2b(relative_path.encode("utf-8"), digest_size=16).hexdigest() + suffix = f"_{size}.png" if not color else f"_{size}_{color.lstrip('#')}_r{radius}.png" + thumb_path = thumb_dir / f"{rel_hash}{suffix}" # Generate thumbnail if not cached or outdated if (