diff --git a/app.py b/app.py index 0b32024c..9de520f2 100644 --- a/app.py +++ b/app.py @@ -2,7 +2,79 @@ from tabs.dataset_viewer import dataset_viewer_tab from tabs.inference import inference_tab from tabs.evaluator import evaluator_tab -from perceptionmetrics.utils.gui import browse_folder +from perceptionmetrics.utils.gui import browse_folder, browse_file + + +# --------------------------------------------------------------------------- +# Sidebar helpers +# --------------------------------------------------------------------------- + +def _browse_img_dir(): + folder = browse_folder() + if folder: + st.session_state.manual_img_dir = folder + + +def _browse_ann_file(): + fpath = browse_file(filetypes=[".json"]) + if fpath: + st.session_state.manual_ann_file = fpath + + +def _browse_manual_dataset_dir(): + folder = browse_folder() + if folder: + st.session_state.manual_dataset_dir = folder + + +_MANUAL_PATH_KEYS = ( + "manual_img_dir", + "manual_ann_file", + "manual_dataset_dir", +) + + +def _render_manual_override_section(dataset_type: str): + """Render the 'Use manual paths' checkbox and its dataset-specific inputs. + + For COCO: image directory + annotation JSON file. + For YOLO: dataset root directory + data.yaml file. + Clears all manual-path session keys when the checkbox is off. + """ + st.checkbox("Use manual paths", key="manual_paths_enabled") + + if not st.session_state.get("manual_paths_enabled", False): + for k in _MANUAL_PATH_KEYS: + st.session_state[k] = "" + return + + _spacer = "
" + + if dataset_type == "COCO": + col1, col2 = st.columns([3, 1]) + with col1: + st.text_input("Image Directory", key="manual_img_dir") + with col2: + st.markdown(_spacer, unsafe_allow_html=True) + st.button("Browse", on_click=_browse_img_dir, + key="browse_manual_img_dir") + + col1, col2 = st.columns([3, 1]) + with col1: + st.text_input("Annotation File (.json)", key="manual_ann_file") + with col2: + st.markdown(_spacer, unsafe_allow_html=True) + st.button("Browse", on_click=_browse_ann_file, + key="browse_manual_ann_file") + + elif dataset_type == "YOLO": + col1, col2 = st.columns([3, 1]) + with col1: + st.text_input("Dataset Root Directory", key="manual_dataset_dir") + with col2: + st.markdown(_spacer, unsafe_allow_html=True) + st.button("Browse", on_click=_browse_manual_dataset_dir, + key="browse_manual_dataset_dir") def browse_dataset_path(): @@ -30,6 +102,10 @@ def browse_dataset_path(): st.session_state.setdefault("evaluation_step", 5) st.session_state.setdefault("detection_model", None) st.session_state.setdefault("detection_model_loaded", False) +st.session_state.setdefault("manual_paths_enabled", False) +st.session_state.setdefault("manual_img_dir", "") +st.session_state.setdefault("manual_ann_file", "") +st.session_state.setdefault("manual_dataset_dir", "") # Sidebar: Dataset Inputs with st.sidebar: @@ -68,6 +144,11 @@ def browse_dataset_path(): help="Upload a YAML dataset configuration file.", ) + # Manual path override — COCO and YOLO + _render_manual_override_section( + st.session_state.get("dataset_type", "COCO") + ) + with st.expander("Model Inputs", expanded=False): st.file_uploader( "Model File (.pt, .onnx, .h5, .pb, .pth, .torchscript)", diff --git a/perceptionmetrics/datasets/yolo.py b/perceptionmetrics/datasets/yolo.py index e08a02cc..6c2fb1cc 100644 --- a/perceptionmetrics/datasets/yolo.py +++ b/perceptionmetrics/datasets/yolo.py @@ -10,6 +10,50 @@ from perceptionmetrics.utils import io as uio +def find_yaml_and_dataset_dir(dataset_path: str, split: str) -> Tuple[str, str]: + """ + Find a YAML config file and validate the dataset root for a YOLO dataset. + + Searches for any ``*.yaml`` / ``*.yml`` file in *dataset_path*. Accepts + any filename (e.g. ``data.yaml``, ``coco128.yaml``) so the function works + with datasets that use non-standard YAML names. + + :param dataset_path: Root of the YOLO dataset (contains a *.yaml, images/, labels/) + :type dataset_path: str + :param split: Dataset split name (e.g., "train", "val", "test") — used only + to surface a clearer error when the YAML lacks that split key. + :type split: str + :return: Tuple of (yaml_path, dataset_path) + :rtype: Tuple[str, str] + :raises FileNotFoundError: If no YAML file exists in *dataset_path*, or if + the requested split key is missing/null in the YAML. + """ + if not os.path.isdir(dataset_path): + raise FileNotFoundError(f"Dataset root not found: {dataset_path}") + + # Accept any .yaml / .yml in the root — prefer data.yaml if present + yaml_candidates = glob(os.path.join(dataset_path, "*.yaml")) + glob( + os.path.join(dataset_path, "*.yml") + ) + if not yaml_candidates: + raise FileNotFoundError( + f"No YAML config file found in {dataset_path}. " + "Expected a *.yaml or *.yml file at the dataset root." + ) + # Prefer data.yaml; fall back to the first match + preferred = os.path.join(dataset_path, "data.yaml") + yaml_path = preferred if preferred in yaml_candidates else yaml_candidates[0] + + dataset_info = uio.read_yaml(yaml_path) + split_path = dataset_info.get(split) + if not split_path: + raise FileNotFoundError( + f"Split '{split}' is missing or null in {yaml_path}." + ) + + return yaml_path, dataset_path + + def build_dataset( dataset_fname: str, dataset_dir: Optional[str] = None, im_ext: str = "jpg" ) -> Tuple[pd.DataFrame, dict, str]: @@ -57,8 +101,31 @@ def build_dataset( dataset_fname, ) continue - images_dir = os.path.join(dataset_dir, split_path) - labels_dir = os.path.join(dataset_dir, split_path.replace("images", "labels")) + + # Resolve images_dir robustly: + # The YAML's split_path may be an absolute path originating from a + # different machine (e.g. a Colab path like /content/.../images/train). + # When os.path.join(dataset_dir, split_path) would resolve to a + # non-existent directory, fall back to the canonical local layout: + # /images/ and /labels/. + candidate_images = os.path.join(dataset_dir, split_path) + if os.path.isabs(split_path) or not os.path.isdir(candidate_images): + images_dir = os.path.join(dataset_dir, "images", split) + labels_dir = os.path.join(dataset_dir, "labels", split) + if not os.path.isdir(images_dir): + logging.warning( + "Image directory for split '%s' not found at '%s' or '%s'; skipping.", + split, + candidate_images, + images_dir, + ) + continue + else: + images_dir = candidate_images + labels_dir = os.path.join( + dataset_dir, split_path.replace("images", "labels") + ) + for label_fname in glob(os.path.join(labels_dir, "*.txt")): label_basename = os.path.basename(label_fname) image_basename = label_basename.replace(".txt", f".{im_ext}") diff --git a/perceptionmetrics/utils/gui.py b/perceptionmetrics/utils/gui.py index b1295338..67448d1f 100644 --- a/perceptionmetrics/utils/gui.py +++ b/perceptionmetrics/utils/gui.py @@ -79,5 +79,90 @@ def browse_folder(): except (FileNotFoundError, Exception): continue return None + except Exception: + return None + + +def browse_file(filetypes=None): + """ + Opens a native file selection dialog and returns the selected file path. + Works on Windows, macOS, and Linux (with zenity or kdialog). + + :param filetypes: List of file extensions to filter (e.g. [".json", ".yaml"]). + Pass None or empty list to allow all files. + :type filetypes: list[str] | None + :return: Selected file path or None if cancelled. + :rtype: str | None + """ + try: + is_windows = sys.platform.startswith("win") + is_wsl_env = is_wsl() + if is_windows or is_wsl_env: + # Build a PowerShell filter string like "JSON files (*.json)|*.json|All files (*.*)|*.*" + if filetypes: + parts = [] + for ext in filetypes: + ext_clean = ext.lstrip(".") + parts.append(f"{ext_clean.upper()} files (*.{ext_clean})|*.{ext_clean}") + parts.append("All files (*.*)|*.*") + filter_str = "|".join(parts) + else: + filter_str = "All files (*.*)|*.*" + + script = ( + "Add-Type -AssemblyName System.windows.forms;" + "$f=New-Object System.Windows.Forms.OpenFileDialog;" + f'$f.Filter="{filter_str}";' + 'if($f.ShowDialog() -eq "OK"){Write-Output $f.FileName}' + ) + result = subprocess.run( + ["powershell.exe", "-NoProfile", "-Command", script], + capture_output=True, + text=True, + timeout=30, + ) + fpath = result.stdout.strip() + if fpath and is_wsl_env: + result = subprocess.run( + ["wslpath", "-u", fpath], + capture_output=True, + text=True, + timeout=30, + ) + fpath = result.stdout.strip() + return fpath if fpath else None + elif sys.platform == "darwin": + if filetypes: + type_list = ", ".join(f'"{e.lstrip(".")}"' for e in filetypes) + script = f'POSIX path of (choose file with prompt "Select file:" of type {{{type_list}}})' + else: + script = 'POSIX path of (choose file with prompt "Select file:")' + result = subprocess.run( + ["osascript", "-e", script], capture_output=True, text=True, timeout=30 + ) + fpath = result.stdout.strip() + return fpath if fpath else None + else: + # Linux: try zenity, then kdialog + for tool in ["zenity", "kdialog"]: + try: + if tool == "zenity": + cmd = ["zenity", "--file-selection", "--title=Select file"] + if filetypes: + for ext in filetypes: + cmd += ["--file-filter", f"*{ext}"] + else: + cmd = ["kdialog", "--getopenfilename", "--title", "Select file"] + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=30 + ) + if result.returncode in (0, 1): + fpath = result.stdout.strip() + return fpath if fpath else None + except subprocess.TimeoutExpired: + return None + except (FileNotFoundError, Exception): + continue + return None except Exception: return None \ No newline at end of file diff --git a/tabs/dataset_viewer.py b/tabs/dataset_viewer.py index 85077129..ce21768f 100644 --- a/tabs/dataset_viewer.py +++ b/tabs/dataset_viewer.py @@ -3,6 +3,7 @@ from streamlit_image_select import image_select from perceptionmetrics.datasets.coco import find_img_dir_and_ann_file +from perceptionmetrics.datasets.yolo import find_yaml_and_dataset_dir def dataset_viewer_tab(): @@ -19,33 +20,99 @@ def dataset_viewer_tab(): dataset_path = st.session_state.get("dataset_path", "") dataset_type = st.session_state.get("dataset_type", "COCO").lower() split = st.session_state.get("split", "val") + manual_paths_enabled = st.session_state.get("manual_paths_enabled", False) + # COCO manual paths + manual_img_dir = st.session_state.get("manual_img_dir", "").strip() + manual_ann_file = st.session_state.get("manual_ann_file", "").strip() + using_manual_paths = manual_paths_enabled and bool(manual_img_dir) and bool(manual_ann_file) + # YOLO manual paths (only dataset root dir — YAML is auto-discovered) + manual_dataset_dir = st.session_state.get("manual_dataset_dir", "").strip() + using_yolo_manual_paths = manual_paths_enabled and bool(manual_dataset_dir) # Header row only st.header("Dataset Viewer") - if not dataset_path or not os.path.isdir(dataset_path): + if ( + not using_manual_paths + and not using_yolo_manual_paths + and (not dataset_path or not os.path.isdir(dataset_path)) + ): st.warning("⚠️ Please select a valid dataset folder.") return # Setup paths and pagination if dataset_type == "coco": - try: - img_dir, ann_file = find_img_dir_and_ann_file( - dataset_path=dataset_path, split=split - ) - except FileNotFoundError: - st.warning("Dataset files not found. Check path and split.") - return + if using_manual_paths: + img_dir, ann_file = manual_img_dir, manual_ann_file + else: + try: + img_dir, ann_file = find_img_dir_and_ann_file( + dataset_path=dataset_path, split=split + ) + except FileNotFoundError: + st.warning( + "Dataset files not found. Check path and split, or tick " + "**Use manual paths** in the sidebar.\n\n" + "**Expected auto-detection structure:**\n" + "```\n" + "dataset_root/\n" + f"├── images/{split}/\n" + f"└── annotations/instances_{split}.json\n" + "```" + ) + return elif dataset_type == "yolo": - dataset_config_file = st.session_state.get("dataset_config_file", None) - img_dir = os.path.join(dataset_path, f"images/{split}") + if using_yolo_manual_paths: + # Manual path: auto-discover YAML inside the provided root + dataset_config_file = None + try: + yolo_yaml_path, yolo_dataset_dir = find_yaml_and_dataset_dir( + dataset_path=manual_dataset_dir, split=split + ) + except FileNotFoundError as exc: + st.warning( + f"YOLO dataset not found in manual path: {exc}\n\n" + "Ensure your dataset folder has the standard structure:\n" + "```\n" + "dataset_root/\n" + "├── *.yaml (e.g. data.yaml)\n" + f"├── images/{split}/\n" + f"└── labels/{split}/\n" + "```" + ) + return + img_dir = os.path.join(yolo_dataset_dir, f"images/{split}") + else: + dataset_config_file = st.session_state.get("dataset_config_file", None) + try: + yolo_yaml_path, yolo_dataset_dir = find_yaml_and_dataset_dir( + dataset_path=dataset_path, split=split + ) + img_dir = os.path.join(yolo_dataset_dir, f"images/{split}") + except FileNotFoundError as exc: + if dataset_config_file is not None: + # fall back to uploaded YAML path + yolo_yaml_path = None # will be resolved from uploaded file + yolo_dataset_dir = dataset_path + img_dir = os.path.join(dataset_path, f"images/{split}") + else: + st.warning( + f"YOLO dataset not found: {exc}\n\n" + "Tick **Use manual paths** in the sidebar, or ensure your " + "dataset folder has the standard structure:\n" + "```\n" + "dataset_root/\n" + "├── *.yaml (e.g. data.yaml)\n" + f"├── images/{split}/\n" + f"└── labels/{split}/\n" + "```" + ) + return + if not os.path.isdir(img_dir): st.warning("Image directory not found.") return - if dataset_config_file is None: - st.warning("Dataset configuration file not found. Please upload it.") - return else: st.error("Unsupported dataset type.") return @@ -81,7 +148,12 @@ def dataset_viewer_tab(): st.markdown("
", unsafe_allow_html=True) # Load dataset - dataset_key = f"{dataset_path}_{split}" + if using_manual_paths: + dataset_key = f"manual_coco_{manual_img_dir}_{manual_ann_file}_{split}" + elif using_yolo_manual_paths: + dataset_key = f"manual_yolo_{manual_dataset_dir}_{split}" + else: + dataset_key = f"{dataset_path}_{split}" if dataset_key not in st.session_state: try: if dataset_type == "coco": @@ -91,36 +163,49 @@ def dataset_viewer_tab(): split=split, ) elif dataset_type == "yolo": - if dataset_config_file is not None: - # Save uploaded config file to a temporary location + if using_yolo_manual_paths: + # Manual paths: use yaml + dir directly, no temp file needed + yolo_dataset = YOLODataset(yolo_yaml_path, yolo_dataset_dir) + elif dataset_config_file is not None: + # Uploaded YAML: persist to a temp file for YOLODataset with tempfile.NamedTemporaryFile( delete=False, suffix=".yaml" ) as tmp: tmp.write(dataset_config_file.read()) tmp_path = tmp.name - - # Load YOLO dataset - yolo_dataset = YOLODataset(tmp_path, dataset_path) - st.session_state["full_dataset_df"] = yolo_dataset.dataset - - # Filter dataset for the selected split - yolo_dataset.dataset = yolo_dataset.dataset[ - yolo_dataset.dataset["split"] == split - ].reset_index(drop=True) - st.session_state[dataset_key] = yolo_dataset - + yolo_dataset = YOLODataset(tmp_path, yolo_dataset_dir) os.unlink(tmp_path) # Clean up temp file else: st.warning( - "Dataset configuration file not found. Please upload it." + "Dataset configuration file not found. Please upload it " + "or use **Use manual paths** in the sidebar." ) return + + st.session_state["full_dataset_df"] = yolo_dataset.dataset + # Filter dataset for the selected split + yolo_dataset.dataset = yolo_dataset.dataset[ + yolo_dataset.dataset["split"] == split + ].reset_index(drop=True) + st.session_state[dataset_key] = yolo_dataset else: st.error("Unsupported dataset type.") return except Exception as e: - st.error(f"Failed to load dataset: {e}") + if using_manual_paths or using_yolo_manual_paths: + source = "manual paths" + else: + source = "auto-detected paths" + st.error( + f"❌ Failed to load dataset ({source}): {e}\n\n" + "**Expected COCO structure:**\n" + "```\n" + "dataset_root/\n" + f"├── images/{split}/\n" + f"└── annotations/instances_{split}.json\n" + "```" + ) return else: # Ensure cached dataset has the correct split; if not, rebuild it diff --git a/tabs/evaluator.py b/tabs/evaluator.py index 2e5fcac8..2a59c9f3 100644 --- a/tabs/evaluator.py +++ b/tabs/evaluator.py @@ -29,21 +29,32 @@ def evaluator_tab(): dataset_path = st.session_state.get("dataset_path", "") dataset_type = st.session_state.get("dataset_type", "Coco") split = st.session_state.get("split", "val") + manual_paths_enabled = st.session_state.get("manual_paths_enabled", False) + manual_img_dir = st.session_state.get("manual_img_dir", "").strip() + manual_ann_file = st.session_state.get("manual_ann_file", "").strip() + using_manual_paths = manual_paths_enabled and bool(manual_img_dir) and bool(manual_ann_file) # Try to get existing dataset from session state first - dataset_key = f"{dataset_path}_{split}" + dataset_key = ( + f"manual_{manual_img_dir}_{manual_ann_file}_{split}" + if using_manual_paths + else f"{dataset_path}_{split}" + ) if dataset_key in st.session_state: dataset = st.session_state[dataset_key] dataset_available = True st.success( - f"✅ Dataset loaded: {dataset_path} ({split} split) - {len(dataset.dataset)} samples" + f"✅ Dataset loaded: {dataset_path or 'manual paths'} ({split} split) - {len(dataset.dataset)} samples" ) - elif dataset_path and os.path.isdir(dataset_path): + elif using_manual_paths or (dataset_path and os.path.isdir(dataset_path)): try: if dataset_type.lower() == "coco": - img_dir, ann_file = find_img_dir_and_ann_file( - dataset_path=dataset_path, split=split - ) + if using_manual_paths: + img_dir, ann_file = manual_img_dir, manual_ann_file + else: + img_dir, ann_file = find_img_dir_and_ann_file( + dataset_path=dataset_path, split=split + ) if os.path.isdir(img_dir) and os.path.isfile(ann_file): st.session_state[dataset_key] = CocoDataset( @@ -58,7 +69,14 @@ def evaluator_tab(): ) else: st.warning( - "⚠️ Dataset files not found. Please check the dataset path and split in the sidebar." + "⚠️ Dataset files not found. Check the dataset path and split in the sidebar, " + "or use **Manual Path Override**.\n\n" + "**Expected auto-detection structure:**\n" + "```\n" + "dataset_root/\n" + f"├── images/{split}/\n" + f"└── annotations/instances_{split}.json\n" + "```" ) else: st.warning( @@ -66,6 +84,7 @@ def evaluator_tab(): ) except Exception as e: st.error(f"❌ Error loading dataset: {e}") + else: st.warning( "⚠️ No dataset path provided. Please set the dataset path in the sidebar." diff --git a/tests/test_yolo.py b/tests/test_yolo.py index 2fc45fe9..77e69ad0 100644 --- a/tests/test_yolo.py +++ b/tests/test_yolo.py @@ -1,4 +1,5 @@ import logging +import os import sys from unittest.mock import MagicMock, patch @@ -19,7 +20,7 @@ if not callable(getattr(_tqdm_mod, "tqdm", None)): _tqdm_mod.tqdm = lambda iterable, **kw: iterable # type: ignore[attr-defined] -from perceptionmetrics.datasets.yolo import build_dataset # noqa: E402 +from perceptionmetrics.datasets.yolo import build_dataset, find_yaml_and_dataset_dir # noqa: E402 # --------------------------------------------------------------------------- # Helpers @@ -84,7 +85,7 @@ def _fake_isfile(path): # --------------------------------------------------------------------------- -# Tests +# Tests for build_dataset # --------------------------------------------------------------------------- @@ -158,3 +159,60 @@ def test_build_dataset(caplog): assert "cat" in ontology and "dog" in ontology assert ontology["cat"]["idx"] == 0 assert ontology["dog"]["idx"] == 1 + + +# --------------------------------------------------------------------------- +# Tests for find_yaml_and_dataset_dir +# --------------------------------------------------------------------------- + + +def test_find_yaml_and_dataset_dir_valid(): + """Happy path: valid root with a YAML and a present split.""" + fake_root = "/fake/dataset" + fake_yaml = os.path.join(fake_root, "data.yaml") + fake_yaml_content = {"train": "images/train", "names": {0: "cat"}} + + with patch("os.path.isdir", return_value=True), patch( + "perceptionmetrics.datasets.yolo.glob", + side_effect=lambda pattern: [fake_yaml] if "*.yaml" in pattern else [], + ), patch( + "perceptionmetrics.datasets.yolo.uio.read_yaml", + return_value=fake_yaml_content, + ): + yaml_path, dataset_dir = find_yaml_and_dataset_dir(fake_root, "train") + + assert yaml_path == fake_yaml + assert dataset_dir == fake_root + + +def test_find_yaml_and_dataset_dir_no_yaml(): + """Raises FileNotFoundError when no YAML file exists in the root.""" + import pytest + + with patch("os.path.isdir", return_value=True), patch( + "perceptionmetrics.datasets.yolo.glob", return_value=[], + ): + with pytest.raises(FileNotFoundError, match="No YAML config"): + find_yaml_and_dataset_dir("/fake/dataset", "train") + + +def test_find_yaml_and_dataset_dir_missing_or_null_split(): + """Raises FileNotFoundError when the requested split is absent or null.""" + import pytest + + fake_root = "/fake/dataset" + fake_yaml = os.path.join(fake_root, "data.yaml") + + for bad_yaml in [ + {"train": "images/train", "names": {0: "cat"}}, # 'val' missing entirely + {"train": "images/train", "val": None, "names": {0: "cat"}}, # 'val' is null + ]: + with patch("os.path.isdir", return_value=True), patch( + "perceptionmetrics.datasets.yolo.glob", + side_effect=lambda pattern: [fake_yaml] if "*.yaml" in pattern else [], + ), patch( + "perceptionmetrics.datasets.yolo.uio.read_yaml", + return_value=bad_yaml, + ): + with pytest.raises(FileNotFoundError, match="val"): + find_yaml_and_dataset_dir(fake_root, "val")