diff --git a/photomap/backend/embeddings.py b/photomap/backend/embeddings.py index 6f5c37c0..5a8ebcfb 100644 --- a/photomap/backend/embeddings.py +++ b/photomap/backend/embeddings.py @@ -43,7 +43,7 @@ from .metadata_extraction import MetadataExtractor from .metadata_formatting import format_metadata from .metadata_modules import SlideSummary -from .progress import progress_tracker +from .progress import IndexingCancelled, progress_tracker from .util import atomic_savez logger = logging.getLogger(__name__) @@ -701,9 +701,19 @@ async def _process_images_batch_async( Serialized by :data:`_indexing_semaphore` so two concurrent album indexes don't both hold an encoder in VRAM at the same time. + + The progress callback also enforces cooperative cancellation: when + ``progress_tracker.is_cancel_requested(album_key)`` becomes True the + callback raises :class:`IndexingCancelled`, which the + ``_process_images_batch`` worker propagates back out so the rest of + the dataset isn't encoded and no partial index is written. The + per-image callback boundary is fine-grained enough to keep + cancel-to-stop latency well below a second on a typical GPU. """ def progress_cb(i: int, total: int, message: str) -> None: + if progress_tracker.is_cancel_requested(album_key): + raise IndexingCancelled("Indexing cancelled by user") progress_tracker.update_progress(album_key, i, message) async with _get_indexing_semaphore(): @@ -1008,9 +1018,11 @@ def update_index( num_workers: int = DEFAULT_NUM_WORKERS, ) -> IndexResult | None: """Update existing embeddings with new images.""" - assert ( - self.embeddings_path.exists() - ), f"Embeddings file {self.embeddings_path} does not exist. Please create an index first." + if not self.embeddings_path.exists(): + raise FileNotFoundError( + f"Embeddings file {self.embeddings_path} does not exist. " + "Please create an index first." + ) try: existing = self._load_existing_index_arrays() @@ -1084,9 +1096,11 @@ async def update_index_async( num_workers: int = DEFAULT_NUM_WORKERS, ) -> IndexResult | None: """Asynchronously update existing embeddings with new images.""" - assert ( - self.embeddings_path.exists() - ), f"Embeddings file {self.embeddings_path} does not exist. Please create an index first." + if not self.embeddings_path.exists(): + raise FileNotFoundError( + f"Embeddings file {self.embeddings_path} does not exist. " + "Please create an index first." + ) try: existing = self._load_existing_index_arrays() @@ -1403,9 +1417,11 @@ def find_duplicate_clusters(self, similarity_threshold=0.995): # Normalize embeddings. ``_l2_normalize`` carries an epsilon guard so # an all-zero row can't produce NaN here. norm_embeddings = _l2_normalize(embeddings, axis=-1) - assert isinstance( - norm_embeddings, np.ndarray - ), "Normalization failed, expected np.ndarray" + if not isinstance(norm_embeddings, np.ndarray): + raise TypeError( + f"_l2_normalize returned {type(norm_embeddings).__name__}, " + "expected np.ndarray" + ) # Use NearestNeighbors with cosine metric nn = NearestNeighbors(metric="cosine", algorithm="brute") diff --git a/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py b/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py index 87fb702e..5c5ab972 100644 --- a/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py +++ b/photomap/backend/metadata_modules/invoke/invoke_metadata_view.py @@ -134,7 +134,13 @@ def _control_adapter_to_tuple(ca: ControlAdapter) -> ControlLayerTuple: def _v5_control_layer_to_tuple(layer: V5ControlLayer) -> ControlLayerTuple: ca = layer.control_adapter - assert ca is not None # caller filters this + if ca is None: + # Belt-and-braces guard — ``_V5Strategy.control_layers`` already + # filters layers whose ``control_adapter`` is None before calling + # in, so reaching this branch means the contract was broken + # upstream. Raise instead of asserting so the check survives + # ``python -O``. + raise ValueError("V5 control layer has no control_adapter") return ControlLayerTuple( model_name=ca.model.name if ca.model else "", image_name=_image_name(ca.image), diff --git a/photomap/backend/progress.py b/photomap/backend/progress.py index a9d51896..39882eb1 100644 --- a/photomap/backend/progress.py +++ b/photomap/backend/progress.py @@ -23,6 +23,12 @@ class IndexStatus(Enum): ERROR = "error" +class IndexingCancelled(Exception): + """Raised by ``_process_images_batch`` when a cancel was requested via + :meth:`ProgressTracker.request_cancel`. The async indexing wrappers + catch it and finish cleanly instead of writing a partial index.""" + + @dataclass class ProgressInfo: album_key: str @@ -64,6 +70,12 @@ class ProgressTracker: def __init__(self): self._progress: dict[str, ProgressInfo] = {} + # Album keys with an outstanding cancellation request. The indexing + # loop polls this each batch via ``is_cancel_requested`` and raises + # ``IndexingCancelled`` so the work stops before the next forward + # pass, instead of running to completion only to have the status + # flipped to ERROR after. + self._cancel_requested: set[str] = set() self._lock = threading.Lock() def start_operation(self, album_key: str, total_images: int, operation_type: str): @@ -77,6 +89,9 @@ def start_operation(self, album_key: str, total_images: int, operation_type: str total_images=total_images, start_time=time.time(), ) + # A new run clears any stale cancel flag from a previous job + # that finished or errored before the user pressed cancel. + self._cancel_requested.discard(album_key) def update_total_images(self, album_key: str, total_images: int): """Update the total number of images for an operation.""" @@ -116,6 +131,23 @@ def remove_progress(self, album_key: str): """Remove progress tracking for an album.""" with self._lock: self._progress.pop(album_key, None) + self._cancel_requested.discard(album_key) + + def request_cancel(self, album_key: str) -> None: + """Signal the indexing loop to stop on its next batch boundary. + + The actual cancellation is cooperative — see ``IndexingCancelled``. + Set even if no operation is currently running (e.g. cancel hits just + as indexing finishes); ``start_operation`` clears the flag the next + time the album is indexed. + """ + with self._lock: + self._cancel_requested.add(album_key) + + def is_cancel_requested(self, album_key: str) -> bool: + """Return True if a cancel was requested for ``album_key``.""" + with self._lock: + return album_key in self._cancel_requested def is_running(self, album_key: str) -> bool: """Check if an operation is currently running for an album.""" diff --git a/photomap/backend/routers/filetree.py b/photomap/backend/routers/filetree.py index f78dc800..4f54c9fc 100644 --- a/photomap/backend/routers/filetree.py +++ b/photomap/backend/routers/filetree.py @@ -93,7 +93,13 @@ async def get_directories(path: str = "", show_hidden: bool = False): else: dir_path = Path(path) else: - assert ROOT_DIR is not None + # On non-Windows ROOT_DIR is set via the module-level fallback + # (``os.environ.get("PHOTOMAP_ALBUM_ROOT", "/")``), so it's + # guaranteed truthy here. Make that contract explicit so a + # future env-handling change can't silently slip through under + # ``python -O`` where ``assert`` is stripped. + if not ROOT_DIR: + raise RuntimeError("PHOTOMAP_ALBUM_ROOT must be set on this platform") if not path: dir_path = Path(ROOT_DIR) else: diff --git a/photomap/backend/routers/index.py b/photomap/backend/routers/index.py index d26df45a..a9ba0f2b 100644 --- a/photomap/backend/routers/index.py +++ b/photomap/backend/routers/index.py @@ -15,7 +15,7 @@ from ..config import get_config_manager from ..embeddings import Embeddings, peek_encoder_spec -from ..progress import progress_tracker +from ..progress import IndexingCancelled, progress_tracker from .album import ( AlbumDep, EmbeddingsDep, @@ -196,7 +196,14 @@ async def get_index_progress( @index_router.delete("/cancel_index/{album_key}", tags=["Index"]) async def cancel_index_operation(album_key: str, album_config: AlbumDep) -> dict: - """Cancel an ongoing index operation.""" + """Cancel an ongoing index operation. + + Sets the cooperative-cancel flag in ``progress_tracker``; the indexing + loop polls it between batches via the per-image progress callback and + raises :class:`IndexingCancelled`, which the background task catches + cleanly. The status text is flipped here too so the frontend's poll + sees the cancel immediately, even before the next batch boundary. + """ del album_config # See get_index_progress above. try: if not progress_tracker.is_running(album_key): @@ -204,7 +211,8 @@ async def cancel_index_operation(album_key: str, album_config: AlbumDep) -> dict status_code=404, detail=f"No active operation for album '{album_key}'" ) - progress_tracker.set_error(album_key, "Operation cancelled by user") + progress_tracker.request_cancel(album_key) + progress_tracker.set_error(album_key, "Indexing cancelled by user") return { "success": True, @@ -509,6 +517,12 @@ async def _update_index_background_async(album_key: str, album_config): logger.info(f"Index update completed for album '{album_key}'") + except IndexingCancelled as e: + # User-requested cancellation isn't a failure — the inner layers + # already called ``set_error`` with the friendly message; log at + # info level so the background task summary doesn't look like a + # crash. + logger.info(f"Index update cancelled for album '{album_key}': {e}") except Exception as e: logger.error(f"Background index update failed for album '{album_key}': {e}") progress_tracker.set_error(album_key, str(e)) diff --git a/photomap/backend/routers/search.py b/photomap/backend/routers/search.py index a55a736a..a823ab9a 100644 --- a/photomap/backend/routers/search.py +++ b/photomap/backend/routers/search.py @@ -6,6 +6,7 @@ """ import base64 +import hashlib import json import re import zipfile @@ -218,16 +219,21 @@ async def serve_thumbnail( thumb_dir.mkdir(exist_ok=True) relative_path = config_manager.get_relative_path(str(image_path), album_key) - assert relative_path is not None, "Relative path should not be None" - safe_rel_path = relative_path.replace("/", "_").replace("\\", "_") - thumb_path = thumb_dir / f"{Path(safe_rel_path).stem}_{size}.png" - - # If color is specified, add it to the thumbnail filename to cache separately - if color: - color_hex = color.replace("#", "") - thumb_path = ( - thumb_dir / f"{Path(safe_rel_path).stem}_{size}_{color_hex}_r{radius}.png" - ) + if relative_path is None: + # ``get_relative_path`` returns ``None`` only when the image falls + # outside every configured ``image_paths`` entry — i.e. an album + # mis-configuration, not a user-supplied bad input. + raise HTTPException(status_code=500, detail="Image path is not inside the album") + + # Hash the full relative_path (including extension) so structurally + # different paths can't collapse to the same cache filename. The prior + # implementation ran ``.replace("/", "_")`` + ``Path(...).stem``, which + # collided ``/a/b.jpg`` with ``/a_b.jpg`` (same mangled name) and + # ``a.png`` with ``a.jpg`` (same stem) — both observable cache-poisoning + # bugs. blake2b-128 makes collisions effectively impossible. + rel_hash = hashlib.blake2b(relative_path.encode("utf-8"), digest_size=16).hexdigest() + suffix = f"_{size}.png" if not color else f"_{size}_{color.lstrip('#')}_r{radius}.png" + thumb_path = thumb_dir / f"{rel_hash}{suffix}" # Generate thumbnail if not cached or outdated if (