diff --git a/Cargo.lock b/Cargo.lock index 31a74b4..7ea542b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1380,6 +1380,7 @@ dependencies = [ "dashmap", "flate2", "futures-util", + "glob", "image", "indexmap", "metrics", @@ -1403,6 +1404,7 @@ dependencies = [ "tracing", "tracing-subscriber", "uuid", + "zstd", ] [[package]] @@ -3000,6 +3002,34 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea" +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.5.0" diff --git a/server/Cargo.toml b/server/Cargo.toml index bf9766e..0cc090c 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -54,6 +54,12 @@ rstar = "0.12" # Zlib decompression for tissue overlay data flate2 = "1.0" +# Zstd decompression for v2 overlay format +zstd = "0.13" + +# Glob pattern matching for overlay discovery +glob = "0.3" + [build-dependencies] prost-build = "0.13" diff --git a/server/src/config.rs b/server/src/config.rs index 06596bb..1a1c501 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -123,12 +123,16 @@ pub struct SlideConfig { pub struct OverlayConfig { /// Directory containing overlay files pub overlays_dir: PathBuf, + /// Optional glob pattern for finding overlay files (e.g. "/overlays/**/*/cell_masks.bin"). + /// The parent directory of each match is used as the slide name. + pub overlay_pattern: Option, } impl Default for OverlayConfig { fn default() -> Self { Self { overlays_dir: PathBuf::from("./data/overlays"), + overlay_pattern: None, } } } @@ -283,6 +287,11 @@ impl Config { if let Ok(path) = env::var("OVERLAY_DIR") { config.overlay.overlays_dir = PathBuf::from(path); } + if let Ok(pattern) = env::var("OVERLAY_PATTERN") { + if !pattern.is_empty() { + config.overlay.overlay_pattern = Some(pattern); + } + } // Static files config if let Ok(path) = env::var("STATIC_FILES_DIR") { diff --git a/server/src/overlay/local.rs b/server/src/overlay/local.rs index 5e537be..26cfcb7 100644 --- a/server/src/overlay/local.rs +++ b/server/src/overlay/local.rs @@ -32,6 +32,9 @@ enum OverlayCacheState { /// Local overlay service that reads overlay files from disk pub struct LocalOverlayService { overlays_dir: PathBuf, + /// Pre-built index from glob pattern: slide_name → overlay file path. + /// When `Some`, used for lookup instead of directory scanning. + glob_index: Option>, reader: CompositeReader, cache: Arc>, } @@ -51,22 +54,75 @@ impl LocalOverlayService { pub fn new(config: &OverlayConfig) -> Result { let overlays_dir = config.overlays_dir.clone(); - // Create directory if it doesn't exist - if !overlays_dir.exists() { - std::fs::create_dir_all(&overlays_dir)?; - info!("Created overlays directory: {:?}", overlays_dir); - } + // Build glob index if pattern is set + let glob_index = if let Some(ref pattern) = config.overlay_pattern { + let index = Self::build_glob_index(pattern); + info!( + "OVERLAY_PATTERN='{}' matched {} overlay files", + pattern, + index.len() + ); + Some(index) + } else { + // Create directory if it doesn't exist (only in non-glob mode) + if !overlays_dir.exists() { + std::fs::create_dir_all(&overlays_dir)?; + info!("Created overlays directory: {:?}", overlays_dir); + } + None + }; Ok(Self { overlays_dir, + glob_index, reader: CompositeReader::new(), cache: Arc::new(DashMap::new()), }) } + /// Build a slide_name → file_path index from a glob pattern. + /// The immediate parent directory of each matched file is used as the slide name. + /// Known slide extensions (.svs, .ndpi, etc.) are stripped to match slide ID generation. + fn build_glob_index(pattern: &str) -> HashMap { + let mut index = HashMap::new(); + + match glob::glob(pattern) { + Ok(paths) => { + for entry in paths { + match entry { + Ok(path) => { + if let Some(slide_name) = path + .parent() + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + { + let slide_id = strip_slide_extension_and_sanitize(slide_name); + debug!("Glob matched overlay: {} -> {:?}", slide_id, path); + index.insert(slide_id, path); + } + } + Err(e) => { + warn!("Glob error for pattern '{}': {}", pattern, e); + } + } + } + } + Err(e) => { + warn!("Invalid glob pattern '{}': {}", pattern, e); + } + } + + index + } + /// Find overlay file for a given slide ID fn find_overlay_file(&self, slide_id: &str) -> Option { - // Try common extensions directly in overlays dir + // If glob index is available, use it for lookup + if let Some(ref index) = self.glob_index { + return index.get(slide_id).cloned(); + } + + // Fallback: try common extensions directly in overlays dir for ext in &["bin", "pb"] { let path = self.overlays_dir.join(format!("{}.{}", slide_id, ext)); if path.exists() { @@ -252,6 +308,14 @@ impl LocalOverlayService { /// List all available overlay files fn list_overlay_files(&self) -> Vec { + // If glob index is available, return its keys + if let Some(ref index) = self.glob_index { + let mut slide_ids: Vec = index.keys().cloned().collect(); + slide_ids.sort(); + return slide_ids; + } + + // Fallback: scan overlays directory let mut slide_ids = Vec::new(); if let Ok(entries) = std::fs::read_dir(&self.overlays_dir) { @@ -507,3 +571,26 @@ impl OverlayService for LocalOverlayService { LocalOverlayService::initiate_load(self, slide_id) } } + +/// Known slide file extensions, matching those in `slide/local.rs`. +const SLIDE_EXTENSIONS: &[&str] = &["svs", "ndpi", "tiff", "tif", "vms", "vmu", "scn", "mrxs"]; + +/// Strip a known slide extension (e.g. `.svs`) from a directory name and sanitize. +/// This mirrors how slide IDs are generated: `file_stem()` + `sanitize_id()`. +/// Example: "TCGA-AB-1234.svs" → "TCGA-AB-1234" +fn strip_slide_extension_and_sanitize(name: &str) -> String { + let stem = SLIDE_EXTENSIONS + .iter() + .find_map(|ext| name.strip_suffix(&format!(".{}", ext))) + .unwrap_or(name); + + stem.chars() + .map(|c| { + if c.is_alphanumeric() || c == '-' || c == '_' || c == '.' { + c + } else { + '_' + } + }) + .collect() +}