Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions photomap/backend/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from .metadata_extraction import MetadataExtractor
from .metadata_formatting import format_metadata
from .metadata_modules import SlideSummary
from .progress import progress_tracker
from .progress import IndexingCancelled, progress_tracker
from .util import atomic_savez

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -701,9 +701,19 @@ async def _process_images_batch_async(

Serialized by :data:`_indexing_semaphore` so two concurrent album
indexes don't both hold an encoder in VRAM at the same time.

The progress callback also enforces cooperative cancellation: when
``progress_tracker.is_cancel_requested(album_key)`` becomes True the
callback raises :class:`IndexingCancelled`, which the
``_process_images_batch`` worker propagates back out so the rest of
the dataset isn't encoded and no partial index is written. The
per-image callback boundary is fine-grained enough to keep
cancel-to-stop latency well below a second on a typical GPU.
"""

def progress_cb(i: int, total: int, message: str) -> None:
if progress_tracker.is_cancel_requested(album_key):
raise IndexingCancelled("Indexing cancelled by user")
progress_tracker.update_progress(album_key, i, message)

async with _get_indexing_semaphore():
Expand Down Expand Up @@ -1008,9 +1018,11 @@ def update_index(
num_workers: int = DEFAULT_NUM_WORKERS,
) -> IndexResult | None:
"""Update existing embeddings with new images."""
assert (
self.embeddings_path.exists()
), f"Embeddings file {self.embeddings_path} does not exist. Please create an index first."
if not self.embeddings_path.exists():
raise FileNotFoundError(
f"Embeddings file {self.embeddings_path} does not exist. "
"Please create an index first."
)

try:
existing = self._load_existing_index_arrays()
Expand Down Expand Up @@ -1084,9 +1096,11 @@ async def update_index_async(
num_workers: int = DEFAULT_NUM_WORKERS,
) -> IndexResult | None:
"""Asynchronously update existing embeddings with new images."""
assert (
self.embeddings_path.exists()
), f"Embeddings file {self.embeddings_path} does not exist. Please create an index first."
if not self.embeddings_path.exists():
raise FileNotFoundError(
f"Embeddings file {self.embeddings_path} does not exist. "
"Please create an index first."
)

try:
existing = self._load_existing_index_arrays()
Expand Down Expand Up @@ -1403,9 +1417,11 @@ def find_duplicate_clusters(self, similarity_threshold=0.995):
# Normalize embeddings. ``_l2_normalize`` carries an epsilon guard so
# an all-zero row can't produce NaN here.
norm_embeddings = _l2_normalize(embeddings, axis=-1)
assert isinstance(
norm_embeddings, np.ndarray
), "Normalization failed, expected np.ndarray"
if not isinstance(norm_embeddings, np.ndarray):
raise TypeError(
f"_l2_normalize returned {type(norm_embeddings).__name__}, "
"expected np.ndarray"
)

# Use NearestNeighbors with cosine metric
nn = NearestNeighbors(metric="cosine", algorithm="brute")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,13 @@ def _control_adapter_to_tuple(ca: ControlAdapter) -> ControlLayerTuple:

def _v5_control_layer_to_tuple(layer: V5ControlLayer) -> ControlLayerTuple:
ca = layer.control_adapter
assert ca is not None # caller filters this
if ca is None:
# Belt-and-braces guard — ``_V5Strategy.control_layers`` already
# filters layers whose ``control_adapter`` is None before calling
# in, so reaching this branch means the contract was broken
# upstream. Raise instead of asserting so the check survives
# ``python -O``.
raise ValueError("V5 control layer has no control_adapter")
return ControlLayerTuple(
model_name=ca.model.name if ca.model else "",
image_name=_image_name(ca.image),
Expand Down
32 changes: 32 additions & 0 deletions photomap/backend/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ class IndexStatus(Enum):
ERROR = "error"


class IndexingCancelled(Exception):
"""Raised by ``_process_images_batch`` when a cancel was requested via
:meth:`ProgressTracker.request_cancel`. The async indexing wrappers
catch it and finish cleanly instead of writing a partial index."""


@dataclass
class ProgressInfo:
album_key: str
Expand Down Expand Up @@ -64,6 +70,12 @@ class ProgressTracker:

def __init__(self):
self._progress: dict[str, ProgressInfo] = {}
# Album keys with an outstanding cancellation request. The indexing
# loop polls this each batch via ``is_cancel_requested`` and raises
# ``IndexingCancelled`` so the work stops before the next forward
# pass, instead of running to completion only to have the status
# flipped to ERROR after.
self._cancel_requested: set[str] = set()
self._lock = threading.Lock()

def start_operation(self, album_key: str, total_images: int, operation_type: str):
Expand All @@ -77,6 +89,9 @@ def start_operation(self, album_key: str, total_images: int, operation_type: str
total_images=total_images,
start_time=time.time(),
)
# A new run clears any stale cancel flag from a previous job
# that finished or errored before the user pressed cancel.
self._cancel_requested.discard(album_key)

def update_total_images(self, album_key: str, total_images: int):
"""Update the total number of images for an operation."""
Expand Down Expand Up @@ -116,6 +131,23 @@ def remove_progress(self, album_key: str):
"""Remove progress tracking for an album."""
with self._lock:
self._progress.pop(album_key, None)
self._cancel_requested.discard(album_key)

def request_cancel(self, album_key: str) -> None:
"""Signal the indexing loop to stop on its next batch boundary.

The actual cancellation is cooperative — see ``IndexingCancelled``.
Set even if no operation is currently running (e.g. cancel hits just
as indexing finishes); ``start_operation`` clears the flag the next
time the album is indexed.
"""
with self._lock:
self._cancel_requested.add(album_key)

def is_cancel_requested(self, album_key: str) -> bool:
"""Return True if a cancel was requested for ``album_key``."""
with self._lock:
return album_key in self._cancel_requested

def is_running(self, album_key: str) -> bool:
"""Check if an operation is currently running for an album."""
Expand Down
8 changes: 7 additions & 1 deletion photomap/backend/routers/filetree.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,13 @@ async def get_directories(path: str = "", show_hidden: bool = False):
else:
dir_path = Path(path)
else:
assert ROOT_DIR is not None
# On non-Windows ROOT_DIR is set via the module-level fallback
# (``os.environ.get("PHOTOMAP_ALBUM_ROOT", "/")``), so it's
# guaranteed truthy here. Make that contract explicit so a
# future env-handling change can't silently slip through under
# ``python -O`` where ``assert`` is stripped.
if not ROOT_DIR:
raise RuntimeError("PHOTOMAP_ALBUM_ROOT must be set on this platform")
if not path:
dir_path = Path(ROOT_DIR)
else:
Expand Down
20 changes: 17 additions & 3 deletions photomap/backend/routers/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from ..config import get_config_manager
from ..embeddings import Embeddings, peek_encoder_spec
from ..progress import progress_tracker
from ..progress import IndexingCancelled, progress_tracker
from .album import (
AlbumDep,
EmbeddingsDep,
Expand Down Expand Up @@ -196,15 +196,23 @@ async def get_index_progress(

@index_router.delete("/cancel_index/{album_key}", tags=["Index"])
async def cancel_index_operation(album_key: str, album_config: AlbumDep) -> dict:
"""Cancel an ongoing index operation."""
"""Cancel an ongoing index operation.

Sets the cooperative-cancel flag in ``progress_tracker``; the indexing
loop polls it between batches via the per-image progress callback and
raises :class:`IndexingCancelled`, which the background task catches
cleanly. The status text is flipped here too so the frontend's poll
sees the cancel immediately, even before the next batch boundary.
"""
del album_config # See get_index_progress above.
try:
if not progress_tracker.is_running(album_key):
raise HTTPException(
status_code=404, detail=f"No active operation for album '{album_key}'"
)

progress_tracker.set_error(album_key, "Operation cancelled by user")
progress_tracker.request_cancel(album_key)
progress_tracker.set_error(album_key, "Indexing cancelled by user")

return {
"success": True,
Expand Down Expand Up @@ -509,6 +517,12 @@ async def _update_index_background_async(album_key: str, album_config):

logger.info(f"Index update completed for album '{album_key}'")

except IndexingCancelled as e:
# User-requested cancellation isn't a failure — the inner layers
# already called ``set_error`` with the friendly message; log at
# info level so the background task summary doesn't look like a
# crash.
logger.info(f"Index update cancelled for album '{album_key}': {e}")
except Exception as e:
logger.error(f"Background index update failed for album '{album_key}': {e}")
progress_tracker.set_error(album_key, str(e))
26 changes: 16 additions & 10 deletions photomap/backend/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import base64
import hashlib
import json
import re
import zipfile
Expand Down Expand Up @@ -218,16 +219,21 @@ async def serve_thumbnail(
thumb_dir.mkdir(exist_ok=True)

relative_path = config_manager.get_relative_path(str(image_path), album_key)
assert relative_path is not None, "Relative path should not be None"
safe_rel_path = relative_path.replace("/", "_").replace("\\", "_")
thumb_path = thumb_dir / f"{Path(safe_rel_path).stem}_{size}.png"

# If color is specified, add it to the thumbnail filename to cache separately
if color:
color_hex = color.replace("#", "")
thumb_path = (
thumb_dir / f"{Path(safe_rel_path).stem}_{size}_{color_hex}_r{radius}.png"
)
if relative_path is None:
# ``get_relative_path`` returns ``None`` only when the image falls
# outside every configured ``image_paths`` entry — i.e. an album
# mis-configuration, not a user-supplied bad input.
raise HTTPException(status_code=500, detail="Image path is not inside the album")

# Hash the full relative_path (including extension) so structurally
# different paths can't collapse to the same cache filename. The prior
# implementation ran ``.replace("/", "_")`` + ``Path(...).stem``, which
# collided ``/a/b.jpg`` with ``/a_b.jpg`` (same mangled name) and
# ``a.png`` with ``a.jpg`` (same stem) — both observable cache-poisoning
# bugs. blake2b-128 makes collisions effectively impossible.
rel_hash = hashlib.blake2b(relative_path.encode("utf-8"), digest_size=16).hexdigest()
suffix = f"_{size}.png" if not color else f"_{size}_{color.lstrip('#')}_r{radius}.png"
thumb_path = thumb_dir / f"{rel_hash}{suffix}"

# Generate thumbnail if not cached or outdated
if (
Expand Down
Loading