diff --git a/tests/engines/test_engine_abc.py b/tests/engines/test_engine_abc.py index 3e9f01cc7..939b2de97 100644 --- a/tests/engines/test_engine_abc.py +++ b/tests/engines/test_engine_abc.py @@ -66,7 +66,7 @@ def test_incorrect_output_type() -> None: with pytest.raises( TypeError, - match=r".*output_type must be 'dict' or 'zarr' or 'annotationstore*", + match=r".*output_type must be 'dict' or 'zarr', 'qupath' or 'annotationstore*", ): _ = eng.run( images=np.zeros((10, 224, 224, 3), dtype=np.uint8), @@ -401,48 +401,6 @@ def test_patch_pred_zarr_store(track_tmp_path: pytest.TempPathFactory) -> None: ) assert Path.exists(out), "Zarr output file does not exist" - eng = TestEngineABC(model="alexnet-kather100k") - with pytest.raises( - ValueError, - match=r".*Patch output must contain coordinates.", - ): - _ = eng.run( - images=np.zeros((10, 224, 224, 3), dtype=np.uint8), - labels=list(range(10)), - device=device, - save_dir=save_dir, - overwrite=True, - output_type="AnnotationStore", - ) - - with pytest.raises( - ValueError, - match=r".*Patch output must contain coordinates.", - ): - _ = eng.run( - images=np.zeros((10, 224, 224, 3), dtype=np.uint8), - labels=list(range(10)), - device=device, - save_dir=save_dir, - overwrite=True, - output_type="AnnotationStore", - class_dict={0: "class0", 1: "class1"}, - ) - - with pytest.raises( - ValueError, - match=r".*Patch output must contain coordinates.", - ): - _ = eng.run( - images=np.zeros((10, 224, 224, 3), dtype=np.uint8), - labels=list(range(10)), - device=device, - save_dir=save_dir, - overwrite=True, - output_type="AnnotationStore", - scale_factor=(2.0, 2.0), - ) - def test_get_dataloader(sample_svs: Path) -> None: """Test the get_dataloader function.""" diff --git a/tests/engines/test_multi_task_segmentor.py b/tests/engines/test_multi_task_segmentor.py index ed275e2ed..de47de5b1 100644 --- a/tests/engines/test_multi_task_segmentor.py +++ b/tests/engines/test_multi_task_segmentor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import shutil from pathlib import Path from typing import TYPE_CHECKING, Any, Final @@ -19,6 +20,7 @@ from tiatoolbox.annotation import SQLiteStore from tiatoolbox.models.architecture import fetch_pretrained_weights from tiatoolbox.models.engine.multi_task_segmentor import ( + DaskDelayedJSONStore, MultiTaskSegmentor, _clear_zarr, _get_sel_indices_margin_lines, @@ -140,9 +142,10 @@ def test_mtsegmentor_patches(remote_sample: Callable, track_tmp_path: Path) -> N assert len(output_ann) == 6 + fields_nuclei = ["box", "centroid", "contours", "prob", "type"] + fields_layer = ["contours", "type"] + for task_name in mtsegmentor.tasks: - fields_nuclei = ["box", "centroid", "contours", "prob", "type"] - fields_layer = ["contours", "type"] fields = fields_nuclei if task_name == "nuclei_segmentation" else fields_layer output_ann_ = [p for p in output_ann if p.name.endswith(f"{task_name}.db")] expected_counts = ( @@ -161,6 +164,37 @@ def test_mtsegmentor_patches(remote_sample: Callable, track_tmp_path: Path) -> N class_dict=mtsegmentor.model.class_dict, ) + # QuPath JSON does not have fields + fields_nuclei = ["contours", "prob", "type"] + # QuPath output comparison + output_json = mtsegmentor.run( + images=patches, + patch_mode=True, + device=device, + output_type="QuPath", + save_dir=track_tmp_path / "patch_output_qupath", + ) + + assert len(output_json) == 6 + + for task_name in mtsegmentor.tasks: + fields = fields_nuclei if task_name == "nuclei_segmentation" else fields_layer + output_json_ = [p for p in output_json if p.name.endswith(f"{task_name}.json")] + expected_counts = ( + expected_counts_nuclei + if task_name == "nuclei_segmentation" + else expected_counts_layer + ) + assert_qupath_json_patch_output( + inputs=patches, + output_json=output_json_, + output_dict=output_dict[task_name], + track_tmp_path=track_tmp_path, + fields=fields, + expected_counts=expected_counts, + task_name=task_name, + ) + def test_single_task_mtsegmentor( remote_sample: Callable, @@ -278,7 +312,49 @@ def test_single_task_mtsegmentor( for field in fields: assert field not in zarr_group - assert "Probability maps cannot be saved as AnnotationStore" in caplog.text + assert "Probability maps cannot be saved as AnnotationStore or JSON" in caplog.text + + # QuPath output comparison + + # Reinitialize to check for probabilities in output. + mtsegmentor.drop_keys = [] + output_json = mtsegmentor.run( + images=inputs, + patch_mode=True, + device=device, + output_type="QuPath", + save_dir=track_tmp_path / "patch_output_qupath", + return_probabilities=True, + ) + + assert len(output_json) == 3 + + assert_qupath_json_patch_output( + inputs=inputs, + output_json=output_json, + output_dict=output_dict, + track_tmp_path=track_tmp_path, + fields=["box", "centroid", "contours", "prob", "type"], + expected_counts=expected_counts_nuclei, + task_name=None, + ) + + zarr_file = track_tmp_path / "patch_output_qupath" / "output.zarr" + + assert zarr_file.exists() + + zarr_group = zarr.open( + str(zarr_file), + mode="r", + ) + + assert "probabilities" in zarr_group + + fields = ["box", "centroid", "contours", "prob", "type", "predictions"] + for field in fields: + assert field not in zarr_group + + assert "Probability maps cannot be saved as AnnotationStore or JSON" in caplog.text def test_wsi_mtsegmentor_correct_nonsquare_shape( @@ -509,6 +585,44 @@ def test_wsi_segmentor_annotationstore( assert store_file_path.exists() assert store_file_path == output[wsi4_512_512_svs][0] + +def test_wsi_segmentor_qupath(remote_sample: Callable, track_tmp_path: Path) -> None: + """Test MultiTaskSegmentor for WSIs with AnnotationStore output.""" + wsi4_512_512_svs = remote_sample("wsi4_512_512_svs") + # testing different configuration for hovernet. + # kumar only has two probability maps + # Need to test Null values in JSON output. + model_name = "hovernet_original-kumar" + mtsegmentor = MultiTaskSegmentor( + model=model_name, + batch_size=32, + verbose=False, + ) + + class_dict = mtsegmentor.model.class_dict + + # Return Probabilities is False + output = mtsegmentor.run( + images=[wsi4_512_512_svs], + return_probabilities=False, + device=device, + patch_mode=False, + save_dir=track_tmp_path / "wsi_out_check", + verbose=True, + output_type="qupath", + class_dict=class_dict, + memory_threshold=0, + ) + + for output_ in output[wsi4_512_512_svs]: + assert output_.suffix != ".zarr" + + json_file_name = f"{wsi4_512_512_svs.stem}.json" + json_file_name = track_tmp_path / "wsi_out_check" / json_file_name + assert json_file_name.exists() + assert json_file_name == output[wsi4_512_512_svs][0] + + # Weights not used after this test weights_path = Path(fetch_pretrained_weights(model_name=model_name)) weights_path.unlink() @@ -535,7 +649,7 @@ def test_wsi_segmentor_annotationstore_probabilities( output_type="annotationstore", ) - assert "Probability maps cannot be saved as AnnotationStore." in caplog.text + assert "Probability maps cannot be saved as AnnotationStore or JSON." in caplog.text zarr_group = zarr.open(output[wsi4_512_512_svs][0], mode="r") assert "probabilities" in zarr_group @@ -681,6 +795,121 @@ class FakeVM: assert np.array_equal(new_zarr[idx][:], np.array([[1, 2, 3]])) +def test_qupath_feature_class_dict_lookup_fails() -> None: + """Test qupath_feature_class_dict lookup fails.""" + qupath_json = DaskDelayedJSONStore.__new__(DaskDelayedJSONStore) + qupath_json._contours = [np.array([[0, 0], [1, 0], [1, 1]])] + qupath_json._processed_predictions = {"type": np.array([5], dtype=object)} + + class_dict = {0: "A", 1: "B"} # does NOT contain 5 + class_colors = {0: [255, 0, 0], 1: [0, 255, 0]} # also does NOT contain 5 + + feat = qupath_json._build_single_qupath_feature( + i=0, + class_dict=class_dict, + origin=(0, 0), + scale_factor=(1, 1), + class_colors=class_colors, + ) + + # type should fall back to raw value (5) + assert feat["properties"]["type"] == 5 + # classification block should NOT appear + assert "classification" not in feat["properties"] + + +def test_qupath_feature_classification_block_skipped() -> None: + """Test qupath_feature_classification_block_skipped fails.""" + qupath_json = DaskDelayedJSONStore.__new__(DaskDelayedJSONStore) + qupath_json._contours = [np.array([[0, 0], [1, 0], [1, 1]])] + qupath_json._processed_predictions = {"type": np.array([1], dtype=object)} + + class_dict = {1: "Tumor"} + class_colors = {0: [255, 0, 0]} # does NOT contain 1 + + feat = qupath_json._build_single_qupath_feature( + i=0, + class_dict=class_dict, + origin=(0, 0), + scale_factor=(1, 1), + class_colors=class_colors, + ) + + assert feat["properties"]["type"] == "Tumor" + assert "classification" not in feat["properties"] + + +def test_compute_qupath_json_valid_ids_not_empty(track_tmp_path: Path) -> None: + """Test compute_qupath_json valid ids not empty.""" + store = DaskDelayedJSONStore.__new__(DaskDelayedJSONStore) + + # One simple contour + store._contours = [np.array([[0, 0], [10, 0], [10, 10], [0, 10]])] + + # Mixed type array → valid_ids = [1, 2] + store._processed_predictions = {"type": np.array([1, None, 2], dtype=object)} + + out_path = track_tmp_path / "out.json" + result_path = store.compute_qupath_json( + class_dict=None, + save_path=out_path, + verbose=False, + ) + + # Load JSON + data = json.loads(Path(result_path).read_text()) + props = data["features"][0]["properties"] + + # 1. class_dict should have been inferred as {0:0, 1:1, 2:2} + assert props["type"] in (1, 2) + + # 2. type must NOT be null + assert props["type"] is not None + + # 3. classification block should exist only if class_value in class_colours + assert "null" not in json.dumps(data) + + +def test_compute_qupath_json_string_class_names(track_tmp_path: Path) -> None: + """Test compute_qupath_json string class names not empty and str.""" + store = DaskDelayedJSONStore.__new__(DaskDelayedJSONStore) + + # One simple contour + store._contours = [np.array([[0, 0], [10, 0], [10, 10], [0, 10]])] + + # String class names → triggers the "already class names" branch + store._processed_predictions = { + "type": np.array(["Tumor", None, "Stroma"], dtype=object) + } + + # Run compute_qupath_json with class_dict=None + out_path = track_tmp_path / "out.json" + result_path = store.compute_qupath_json( + class_dict=None, + save_path=out_path, + verbose=False, + ) + + # Load JSON + data = json.loads(Path(result_path).read_text()) + props = data["features"][0]["properties"] + + # --- Assertions --- + + # 1. type must be one of the string class names + assert props["type"] in ("Tumor", "Stroma") + + # 2. type must NOT be null + assert props["type"] is not None + + # 3. class_dict should have been inferred as identity mapping + # "Stroma": "Stroma", "Tumor": "Tumor" + # So classification block should exist only if class_colours + # contains the key, but we don't enforce that here — just + # ensure no nulls + assert "null" not in json.dumps(data) + + # HELPER functions def assert_output_lengths( output: OutputType, expected_counts: Sequence[int], fields: list[str] @@ -807,6 +1036,108 @@ def assert_annotation_store_patch_output( assert annotations_list == [] +def assert_qupath_json_patch_output( # skipcq: PY-R1000 + inputs: list | np.ndarray, + output_json: list[Path], + task_name: str | None, + track_tmp_path: Path, + expected_counts: Sequence[int], + output_dict: dict, + fields: list[str], +) -> None: + """Helper function to test QuPath JSON output.""" + for patch_idx, json_path in enumerate(output_json): + # --- 1. Verify filename matches expected pattern --- + if isinstance(inputs[patch_idx], Path): + file_name = ( + f"{inputs[patch_idx].stem}.json" + if task_name is None + else f"{inputs[patch_idx].stem}_{task_name}.json" + ) + else: + file_name = ( + f"{patch_idx}.json" + if task_name is None + else f"{patch_idx}_{task_name}.json" + ) + + assert json_path == track_tmp_path / "patch_output_qupath" / file_name + + # --- 2. Load JSON --- + with Path.open(json_path, "r") as f: + qupath_json = json.load(f) + + features = qupath_json.get("features", []) + assert isinstance(features, list) + + # --- 3. Zero-object case --- + if expected_counts[patch_idx] == 0: + assert len(features) == 0 + continue + + # --- 4. Non-zero case --- + assert len(features) == expected_counts[patch_idx] + + # Extract results from JSON + result = {field: [] for field in fields} + + for feat in features: + props = feat.get("properties", {}) + + # non-geometric fields (box, centroid, prob, type, etc.) + for field in fields: + if field == "contours": + continue + if field in props: + result[field].append(props[field]) + + # contours from geometry + if "contours" in fields: + geom = feat["geometry"] + coords = geom["coordinates"][0] # exterior ring + coords = [(int(x), int(y)) for x, y in coords] + result["contours"].append(coords) + + # Wrap for compatibility with assert_output_lengths + result_wrapped = {field: [result[field]] for field in fields} + + # --- 5. Length check --- + assert_output_lengths( + result_wrapped, + expected_counts=[expected_counts[patch_idx]], + fields=fields, + ) + + # --- 6. Equality check for non-contour fields --- + fields_no_contours = fields.copy() + if "contours" in fields_no_contours: + fields_no_contours.remove("contours") + + assert_output_equal( + result_wrapped, + output_dict, + fields=fields_no_contours, + indices_a=[0], + indices_b=[patch_idx], + ) + + # --- 7. Contour comparison --- + if "contours" in fields: + matches = [] + for a, b in zip( + result["contours"], + output_dict["contours"][patch_idx], + strict=False, + ): + # Discard last point (closed polygon) + a_arr = np.array(a[:-1], dtype=int) + b_arr = np.array(b, dtype=int) + matches.append(np.array_equal(a_arr, b_arr)) + + # Allow small geometric differences + assert sum(matches) / len(matches) >= 0.95 + + # ------------------------------------------------------------------------------------- # Command Line Interface # ------------------------------------------------------------------------------------- diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index b5588dfa9..33c2b1a8d 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -1,5 +1,6 @@ """Tests for NucleusDetector.""" +import json from collections.abc import Callable from pathlib import Path @@ -13,6 +14,9 @@ from tiatoolbox.annotation.storage import SQLiteStore from tiatoolbox.models.engine.nucleus_detector import ( NucleusDetector, + _write_detection_arrays_to_store, + save_detection_arrays_to_qupath_json, + save_detection_arrays_to_store, ) from tiatoolbox.utils import env_detection as toolbox_env from tiatoolbox.utils.misc import imwrite @@ -50,7 +54,7 @@ def test_write_detection_arrays_to_store() -> None: "probabilities": np.array([1.0, 0.5], dtype=np.float32), } - store = NucleusDetector.save_detection_arrays_to_store(detection_arrays) + store = save_detection_arrays_to_store(detection_arrays) assert len(store.values()) == 2 detection_arrays = { @@ -63,7 +67,32 @@ def test_write_detection_arrays_to_store() -> None: ValueError, match=r"Detection record lengths are misaligned.", ): - _ = NucleusDetector.save_detection_arrays_to_store(detection_arrays) + _ = save_detection_arrays_to_store(detection_arrays) + + +def test_write_detection_arrays_to_qupath() -> None: + """Test writing detection arrays to QuPath JSON.""" + detection_arrays = { + "x": np.array([1, 3], dtype=np.uint32), + "y": np.array([1, 2], dtype=np.uint32), + "classes": np.array([0, 1], dtype=np.uint32), + "probabilities": np.array([1.0, 0.5], dtype=np.float32), + } + + json_ = save_detection_arrays_to_qupath_json(detection_arrays) + assert len(json_.values()) == 2 + + detection_arrays = { + "x": np.array([1], dtype=np.uint32), + "y": np.array([1, 2], dtype=np.uint32), + "classes": np.array([0], dtype=np.uint32), + "probabilities": np.array([1.0, 0.5], dtype=np.float32), + } + with pytest.raises( + ValueError, + match=r"Detection record lengths are misaligned.", + ): + _ = save_detection_arrays_to_store(detection_arrays) def test_write_detection_records_to_store_no_class_dict() -> None: @@ -71,7 +100,7 @@ def test_write_detection_records_to_store_no_class_dict() -> None: detection_records = (np.array([1]), np.array([2]), np.array([0]), np.array([1.0])) dummy_store = SQLiteStore() - total = NucleusDetector._write_detection_arrays_to_store( + total = _write_detection_arrays_to_store( detection_records, store=dummy_store, scale_factor=(1.0, 1.0), class_dict=None ) assert len(dummy_store.values()) == 1 @@ -142,6 +171,27 @@ def test_nucleus_detector_patch_annotation_store_output( assert len(store_2.values()) == 0 store_2.close() + _ = nucleus_detector.run( + patch_mode=True, + device=device, + output_type="qupath", + memory_threshold=50, + images=[image_dir / "patch_0.png", image_dir / "patch_1.png"], + save_dir=save_dir, + overwrite=True, + ) + + with Path.open(save_dir / "patch_0.json", "r") as f: + data_1 = json.load(f) + features_1 = data_1.get("features", []) + assert len(features_1) == 1 + + with Path.open(save_dir / "patch_1.json", "r") as f: + data_2 = json.load(f) + features_2 = data_2.get("features", []) + + assert len(features_2) == 0 + rm_dir(save_dir) @@ -266,6 +316,30 @@ def test_nucleus_detector_wsi( assert annotation.properties["type"] == "test_nucleus" store.close() + # QuPath + nucleus_detector.drop_keys = [] + _ = nucleus_detector.run( + patch_mode=False, + device=device, + output_type="qupath", + memory_threshold=50, + images=[mini_wsi_svs], + save_dir=save_dir, + overwrite=True, + batch_size=8, + class_dict={0: "test_nucleus"}, + min_distance=5, + postproc_tile_shape=(2048, 2048), + ) + + with Path.open(save_dir / "wsi4_512_512.json", "r") as f: + qupath_json = json.load(f) + features: list[dict] = qupath_json.get("features", []) + assert 245 <= len(features) <= 255 + first = features[0] + # Classification name + assert first["properties"]["classification"]["name"] == "test_nucleus" + # Check cached centroid maps are removed temp_zarr_files = save_dir / "wsi4_512_512.zarr" assert not temp_zarr_files.exists() diff --git a/tests/engines/test_patch_predictor.py b/tests/engines/test_patch_predictor.py index b346f94de..7746cd346 100644 --- a/tests/engines/test_patch_predictor.py +++ b/tests/engines/test_patch_predictor.py @@ -199,7 +199,7 @@ def test_patch_predictor_api( processed_predictions["coordinates"] = np.asarray( [[0, 0, 224, 224], [0, 0, 224, 224]] ) - + (track_tmp_path / "patch_out_check").mkdir() output_ = predictor.save_predictions( processed_predictions=processed_predictions, output_type="annotationstore", @@ -212,47 +212,6 @@ def test_patch_predictor_api( assert np.all(np.array(output_ann["probabilities"]) >= 0) -def test_patch_predictor_patch_mode_no_probabilities( - sample_patch1: Path, - sample_patch2: Path, - track_tmp_path: Path, -) -> None: - """Test the output of patch classification models on Kather100K dataset.""" - inputs = [Path(sample_patch1), Path(sample_patch2)] - - predictor = PatchPredictor( - model="alexnet-kather100k", - batch_size=32, - verbose=False, - ) - - output = predictor.run( - images=inputs, - return_probabilities=False, - return_labels=False, - device=device, - patch_mode=True, - ) - - assert "probabilities" not in output - - processed_predictions = {k: v for k, v in output.items() if k != "labels"} - processed_predictions["coordinates"] = np.asarray( - [[0, 0, 224, 224], [0, 0, 224, 224]] - ) - - output_ = predictor.save_predictions( - processed_predictions=processed_predictions, - output_type="annotationstore", - save_path=track_tmp_path / "patch_out_check" / "output.db", - ) - - assert output_.exists() - output_ann = _extract_probabilities_from_annotation_store(output_) - assert np.all(output_ann["predictions"] == [6, 3]) - assert "probabilities" not in output - - def test_wsi_predictor_api( sample_wsi_dict: dict, track_tmp_path: Path, @@ -391,6 +350,92 @@ def test_wsi_predictor_zarr( assert "Output file saved at " in caplog.text +def test_patch_predictor_patch_mode_annotation_store( + sample_patch1: Path, + sample_patch2: Path, + track_tmp_path: Path, +) -> None: + """Test the output of patch classification models on Kather100K dataset.""" + inputs = [Path(sample_patch1), Path(sample_patch2)] + + predictor = PatchPredictor( + model="alexnet-kather100k", + batch_size=32, + verbose=False, + ) + # don't run test on GPU + output = predictor.run( + images=inputs, + return_probabilities=True, + return_labels=False, + device=device, + patch_mode=True, + save_dir=track_tmp_path / "patch_out_check", + output_type="annotationstore", + ) + + assert output.exists() + output = _extract_probabilities_from_annotation_store(output) + assert np.all(output["predictions"] == [6, 3]) + assert np.all(np.array(output["probabilities"]) <= 1) + assert np.all(np.array(output["probabilities"]) >= 0) + + +def test_patch_predictor_patch_mode_no_probabilities( + sample_patch1: Path, + sample_patch2: Path, + track_tmp_path: Path, +) -> None: + """Test the output of patch classification models on Kather100K dataset.""" + inputs = [Path(sample_patch1), Path(sample_patch2)] + + predictor = PatchPredictor( + model="alexnet-kather100k", + batch_size=32, + verbose=False, + ) + + output = predictor.run( + images=inputs, + return_probabilities=False, + return_labels=False, + device=device, + patch_mode=True, + ) + + assert "probabilities" not in output + + processed_predictions = {k: v for k, v in output.items() if k != "labels"} + processed_predictions["coordinates"] = np.asarray( + [[0, 0, 224, 224], [0, 0, 224, 224]] + ) + + (track_tmp_path / "patch_out_check").mkdir() + + output_ = predictor.save_predictions( + processed_predictions=processed_predictions, + output_type="annotationstore", + save_path=track_tmp_path / "patch_out_check" / "output.db", + ) + + assert output_.exists() + output_ann = _extract_probabilities_from_annotation_store(output_) + assert np.all(output_ann["predictions"] == [6, 3]) + assert "probabilities" not in output + + # QuPath Output + output_ = predictor.save_predictions( + processed_predictions=processed_predictions, + output_type="qupath", + save_path=track_tmp_path / "patch_out_check" / "output.json", + ) + + assert output_.exists() + output_ann = _extract_from_qupath_json(output_) + assert np.all(output_ann["predictions"] == [6, 3]) + assert "probabilities" not in output + + def test_engine_run_wsi_annotation_store( sample_wsi_dict: dict, track_tmp_path: Path, @@ -439,6 +484,54 @@ def test_engine_run_wsi_annotation_store( shutil.rmtree(save_dir) +def test_engine_run_wsi_qupath( + sample_wsi_dict: dict, + track_tmp_path: Path, + caplog: pytest.LogCaptureFixture, +) -> None: + """Test the engine run for Whole slide images.""" + # convert to pathlib Path to prevent wsireader complaint + mini_wsi_svs = Path(sample_wsi_dict["wsi2_4k_4k_svs"]) + mini_wsi_msk = Path(sample_wsi_dict["wsi2_4k_4k_msk"]) + + eng = PatchPredictor(model="alexnet-kather100k") + + patch_size = np.array([224, 224]) + save_dir = f"{track_tmp_path}/model_wsi_output" + + kwargs = { + "patch_input_shape": patch_size, + "stride_shape": patch_size, + "resolution": 0.5, + "save_dir": save_dir, + "units": "mpp", + "scale_factor": (2.0, 2.0), + } + + output = eng.run( + images=[mini_wsi_svs], + masks=[mini_wsi_msk], + patch_mode=False, + output_type="QuPath", + batch_size=4, + **kwargs, + ) + + output_ = output[mini_wsi_svs] + + assert output_.exists() + assert output_.suffix == ".json" + output_ = _extract_from_qupath_json(output_) + + # prediction for each patch + assert np.array(output_["predictions"]).shape == (69,) + assert _validate_probabilities(output_) + + assert "Output file saved at " in caplog.text + + shutil.rmtree(save_dir) + + # -------------------------------------------------------------------------------------- # torch.compile # -------------------------------------------------------------------------------------- @@ -659,6 +752,28 @@ def _extract_probabilities_from_annotation_store(dbfile: str | Path) -> dict: return output +def _extract_from_qupath_json(json_file: str) -> dict: + """Extract predictions (and optionally coordinates) from QuPath GeoJSON.""" + with Path.open(json_file, "r") as f: + data = json.load(f) + + output = {"predictions": [], "coordinates": []} + + for feature in data.get("features", []): + props = feature.get("properties", {}) + cls = props.get("classification", {}) + + # prediction - class name + output["predictions"].append(cls.get("name")) + + # geometry - polygon + geom = feature.get("geometry", {}) + coords = geom.get("coordinates", [[]])[0] # first ring of polygon + output["coordinates"].append(coords) + + return output + + def _validate_probabilities(output: list | dict | zarr.group) -> bool: """Helper function to test if the probabilities value are valid.""" probabilities = np.array([0.5]) diff --git a/tests/engines/test_semantic_segmentor.py b/tests/engines/test_semantic_segmentor.py index c81194621..30c898202 100644 --- a/tests/engines/test_semantic_segmentor.py +++ b/tests/engines/test_semantic_segmentor.py @@ -131,6 +131,39 @@ def test_semantic_segmentor_patches( _test_store_output_patch(output_seg[0]) +def _test_qupath_output_patch(output: Path) -> None: + """Helper function to test QuPath JSON output for a patch.""" + with Path.open(output) as f: + data = json.load(f) + + assert "features" in data + features = data["features"] + assert len(features) > 0 + + geometry_types = [] + class_values = set() + + for feat in features: + # geometry type + geom = feat.get("geometry", {}) + geometry_types.append(geom.get("type")) + + # class index (you stored this as class_value) + class_val = feat.get("class_value") + if class_val is not None: + class_values.add(class_val) + + # Check geometry type + assert "Polygon" in geometry_types + + # When class_dict is None, types are assigned as 0, 1, ... + assert 0 in class_values + assert 1 in class_values + + # Basic sanity check + assert features is not None + + def test_semantic_segmentor_tiles(track_tmp_path: Path) -> None: """Tests SemanticSegmentor on image tiles with no mpp metadata.""" segmentor = SemanticSegmentor( @@ -165,6 +198,33 @@ def test_semantic_segmentor_tiles(track_tmp_path: Path) -> None: sample_image.unlink() +def test_save_qupath_json(remote_sample: Callable, track_tmp_path: Path) -> None: + """Test for saving output as annotation store.""" + segmentor = SemanticSegmentor( + model="fcn-tissue_mask", batch_size=32, verbose=False, device=device + ) + + # Test str input + sample_image = remote_sample("thumbnail-1k-1k") + + inputs = [Path(sample_image)] + + output = segmentor.run( + images=inputs, + return_probabilities=False, + return_labels=False, + device=device, + patch_mode=True, + save_dir=track_tmp_path / "output1", + output_type="qupath", + verbose=True, + ) + + assert output[0] == track_tmp_path / "output1" / (sample_image.stem + ".json") + assert len(output) == 1 + _test_qupath_output_patch(output[0]) + + def test_save_annotation_store_nparray( remote_sample: Callable, track_tmp_path: Path, caplog: pytest.LogCaptureFixture ) -> None: @@ -196,7 +256,7 @@ def test_save_annotation_store_nparray( zarr_group = zarr.open(str(track_tmp_path / "output1" / "output.zarr"), mode="r") assert "probabilities" in zarr_group - assert "Probability maps cannot be saved as AnnotationStore." in caplog.text + assert "Probability maps cannot be saved as AnnotationStore" in caplog.text _test_store_output_patch(output[0]) _test_store_output_patch(output[1]) @@ -460,7 +520,7 @@ def test_wsi_segmentor_zarr( assert 0.48 < np.mean(output_["probabilities"][:]) < 0.52 -def test_wsi_segmentor_annotationstore( +def test_wsi_segmentor_annotationstore_qupath( remote_sample: Callable, track_tmp_path: Path, caplog: pytest.CaptureFixture ) -> None: """Test SemanticSegmentor for WSIs with AnnotationStore output.""" @@ -493,6 +553,7 @@ def test_wsi_segmentor_annotationstore( verbose=False, ) # Return Probabilities is True + # Check QuPath output output = segmentor.run( images=[wsi4_512_512_svs], return_probabilities=True, @@ -501,17 +562,17 @@ def test_wsi_segmentor_annotationstore( patch_mode=False, save_dir=track_tmp_path / "wsi_prob_out_check", verbose=True, - output_type="annotationstore", + output_type="QuPath", ) assert output[wsi4_512_512_svs] == track_tmp_path / "wsi_prob_out_check" / ( - wsi4_512_512_svs.stem + ".db" + wsi4_512_512_svs.stem + ".json" ) assert output[wsi4_512_512_svs].with_suffix(".zarr").exists() zarr_group = zarr.open(output[wsi4_512_512_svs].with_suffix(".zarr"), mode="r") assert "probabilities" in zarr_group - assert "Probability maps cannot be saved as AnnotationStore." in caplog.text + assert "Probability maps cannot be saved as AnnotationStore or JSON." in caplog.text def test_prepare_full_batch_low_memory(track_tmp_path: Path) -> None: diff --git a/tests/test_utils.py b/tests/test_utils.py index 04e91405c..8fbc44f5e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -36,7 +36,13 @@ ) from tiatoolbox.utils import misc from tiatoolbox.utils.exceptions import FileNotSupportedError -from tiatoolbox.utils.misc import cast_to_min_dtype, create_smart_array +from tiatoolbox.utils.misc import ( + _semantic_segmentations_as_qupath_json, + _tiles, + cast_to_min_dtype, + create_smart_array, + dict_to_store_patch_predictions, +) from tiatoolbox.utils.transforms import locsize2bounds if TYPE_CHECKING: @@ -1780,6 +1786,7 @@ def test_patch_pred_store_persist(track_tmp_path: pytest.TempPathFactory) -> Non "labels": [1, 0, 1], } save_path = track_tmp_path / "patch_output" / "output.db" + save_path.parent.mkdir() store_path = misc.dict_to_store_patch_predictions( patch_output, (1.0, 1.0), save_path=save_path @@ -1816,6 +1823,7 @@ def test_patch_pred_store_persist_ext(track_tmp_path: pytest.TempPathFactory) -> # sends the path of a jpeg source image, expects .db file in the same directory save_path = track_tmp_path / "patch_output" / "output.jpeg" + save_path.parent.mkdir() store_path = misc.dict_to_store_patch_predictions( patch_output, (1.0, 1.0), save_path=save_path @@ -1886,6 +1894,7 @@ def test_dict_to_store_semantic_segment() -> None: scale_factor=(1.0, 1.0), class_dict=None, save_path=None, + output_type="annotationstore", ) assert len(store_) == 1 for annotation in store_.values(): @@ -1899,6 +1908,7 @@ def test_dict_to_store_semantic_segment() -> None: scale_factor=(1.0, 1.0), class_dict=None, save_path=None, + output_type="annotationstore", ) assert len(store_) == 2 @@ -1918,6 +1928,7 @@ def test_dict_to_store_semantic_segment() -> None: scale_factor=(1.0, 1.0), class_dict=None, save_path=None, + output_type="annotationstore", ) assert len(store_) == 3 @@ -1938,6 +1949,7 @@ def test_dict_to_store_semantic_segment() -> None: scale_factor=(1.0, 1.0), class_dict=None, save_path=None, + output_type="annotationstore", ) assert len(store_) == 4 annotations_ = store_.values() @@ -1974,6 +1986,7 @@ def test_dict_to_store_semantic_segment_holes(track_tmp_path: Path) -> None: scale_factor=(1.0, 1.0), class_dict={0: "background", 1: "object"}, save_path=save_dir_path, + output_type="annotationstore", ) assert save_dir_path.exists() @@ -1983,6 +1996,7 @@ def test_dict_to_store_semantic_segment_holes(track_tmp_path: Path) -> None: scale_factor=(1.0, 1.0), class_dict={0: "background", 1: "object"}, save_path=None, + output_type="annotationstore", ) # outer contour and inner contour/hole are now within the same geometry @@ -2033,6 +2047,7 @@ def test_dict_to_store_semantic_segment_multiple_holes() -> None: scale_factor=(1.0, 1.0), class_dict={0: "background", 1: "object"}, save_path=None, + output_type="annotationstore", ) # outer contour and inner contour/hole are now within the same geometry @@ -2081,6 +2096,7 @@ def test_dict_to_store_semantic_segment_no_holes() -> None: scale_factor=(1.0, 1.0), class_dict={0: "background", 1: "object"}, save_path=None, + output_type="annotationstore", ) # outer contour and inner contour/hole are now within the same geometry @@ -2333,3 +2349,110 @@ class FakeVM: assert isinstance(arr, np.ndarray) assert arr.shape == shape assert arr.dtype == dtype + + +def test_tiles_zero_iterations() -> None: + """Test helper function with zero iterations.""" + in_img = np.zeros((0, 0), dtype=np.uint8) + + tile_size = (32, 32) # larger than the image + colormap = 2 # arbitrary valid OpenCV colormap + + tile_iter = _tiles(in_img, tile_size, colormap=colormap, level=0) + + tiles = list(tile_iter) + + assert tiles == [] # no tiles generated + + +def test_semantic_segmentation_returns_json_dict() -> None: + """Test for semantic_segmentation QuPath JSON dict.""" + # Fake 4 x 4 prediction map with two classes: 0 and 1 + preds_np = np.array( + [ + [0, 0, 1, 1], + [0, 0, 1, 1], + [0, 0, 1, 1], + [0, 0, 1, 1], + ], + dtype=np.uint8, + ) + + preds = da.from_array(preds_np, chunks=(4, 4)) + + layer_list = [0, 1] # two classes + scale_factor = (1.0, 1.0) + class_dict = {0: "Background", 1: "Tumor"} + + qupath_json = _semantic_segmentations_as_qupath_json( + layer_list=layer_list, + preds=preds, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=None, + verbose=False, + ) + + # --- Assert --- + assert isinstance(qupath_json, dict) + assert "type" in qupath_json + assert qupath_json["type"] == "FeatureCollection" + assert "features" in qupath_json + assert isinstance(qupath_json["features"], list) + assert len(qupath_json["features"]) > 0 + + for feature in qupath_json["features"]: + assert feature["properties"]["classification"]["name"] in class_dict.values() + assert feature["properties"]["classification"]["color"] is not None + assert feature["name"] in class_dict.values() + assert feature["class_value"] in class_dict + + +def test_dict_to_store_patch_predictions_returns_qupath_json() -> None: + """Test for dict_to_store_patch_predictions QuPath JSON dict.""" + # Fake patch output + patch_output = { + "predictions": np.array([0, 1, 0, 1], dtype=np.uint8), + "coordinates": np.array( + [ + [0, 0, 10, 10], + [10, 0, 20, 10], + [0, 10, 10, 20], + [10, 10, 20, 20], + ] + ), + "labels": np.array([0, 1, 0, 1]), + } + + scale_factor = (1.0, 1.0) + class_dict = {0: "Background", 1: "Tumor"} + + result = dict_to_store_patch_predictions( + patch_output=patch_output, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=None, + output_type="qupath", + verbose=False, + ) + + assert isinstance(result, dict) + + assert "type" in result + assert result["type"] == "FeatureCollection" + assert "features" in result + assert isinstance(result["features"], list) + + assert len(result["features"]) > 0 + + for feature in result["features"]: + assert feature["type"] == "Feature" + assert "geometry" in feature + assert "properties" in feature + assert "classification" in feature["properties"] + assert "name" in feature + assert "class_value" in feature + + assert feature["class_value"] in class_dict + assert feature["properties"]["classification"]["name"] in class_dict.values() + assert feature["properties"]["classification"]["color"] is not None diff --git a/tiatoolbox/models/engine/engine_abc.py b/tiatoolbox/models/engine/engine_abc.py index e107ac9c7..47de20caf 100644 --- a/tiatoolbox/models/engine/engine_abc.py +++ b/tiatoolbox/models/engine/engine_abc.py @@ -234,7 +234,7 @@ class EngineABC(ABC): # noqa: B024 drop_keys (list): Keys to exclude from model output. output_type (Any): - Format of output ("dict", "zarr", "AnnotationStore"). + Format of output ("dict", "zarr", "qupath", "AnnotationStore"). verbose (bool): Whether to enable verbose logging. @@ -649,7 +649,7 @@ def save_predictions( Dictionary containing processed model predictions. output_type (str): Desired output format. - Supported values are "dict", "zarr", and "annotationstore". + Supported values are "dict", "zarr", "qupath" and "annotationstore". save_path (Path | None): Path to save the output file. Required for "zarr" and "annotationstore" formats. @@ -694,6 +694,8 @@ def save_predictions( dict | AnnotationStore | Path: - If output_type is "dict": returns predictions as a dictionary. - If output_type is "zarr": returns path to saved zarr file. + - If output_type is "qupath": returns a QuPath JSON + or path to .json file. - If output_type is "annotationstore": returns an AnnotationStore or path to .db file. @@ -724,8 +726,11 @@ def save_predictions( if output_type.lower() == "dict": return processed_predictions - if output_type.lower() == "annotationstore": - save_path = Path(kwargs.get("output_file", save_path.parent / "output.db")) + if output_type.lower() in ["qupath", "annotationstore"]: + suffix = ".json" if output_type.lower() == "qupath" else ".db" + save_path = Path( + kwargs.get("output_file", save_path.parent / ("output" + suffix)) + ) # scale_factor set from kwargs scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) @@ -737,6 +742,7 @@ def save_predictions( scale_factor, class_dict, save_path, + output_type=output_type, verbose=self.verbose, ) @@ -1216,7 +1222,7 @@ def _update_run_params( ioconfig (ModelIOConfigABC | None): IO configuration for patch extraction and resolution settings. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". + Desired output format: "dict", "zarr", "qupath" or "annotationstore". overwrite (bool): Whether to overwrite existing output files. Default is False. patch_mode (bool): @@ -1268,7 +1274,7 @@ def _update_run_params( ValueError: If required configuration or input parameters are missing. ValueError: - If save_dir is not provided and output_type is "zarr" + If save_dir is not provided and output_type is "zarr", "qupath" or "annotationstore". """ @@ -1292,8 +1298,8 @@ def _update_run_params( self.patch_mode = patch_mode self._validate_input_numbers(images=images, masks=masks, labels=self.labels) - if output_type.lower() not in ["dict", "zarr", "annotationstore"]: - msg = "output_type must be 'dict' or 'zarr' or 'annotationstore'." + if output_type.lower() not in ["dict", "zarr", "qupath", "annotationstore"]: + msg = "output_type must be 'dict' or 'zarr', 'qupath' or 'annotationstore'." raise TypeError(msg) self.output_type = output_type @@ -1301,6 +1307,7 @@ def _update_run_params( if save_dir is not None and output_type.lower() not in [ "zarr", "annotationstore", + "qupath", ]: self.output_type = "zarr" msg = ( @@ -1310,7 +1317,11 @@ def _update_run_params( ) logger.info(msg) - if save_dir is None and output_type.lower() in ["zarr", "annotationstore"]: + if save_dir is None and output_type.lower() in [ + "zarr", + "qupath", + "annotationstore", + ]: msg = f"Please provide save_dir for output_type={output_type}" raise ValueError(msg) @@ -1349,7 +1360,7 @@ def _run_patch_mode( Args: output_type (str): Desired output format. Supported values are "dict", "zarr", - and "annotationstore". + "qupath" and "annotationstore". save_dir (Path): Directory to save the output files. **kwargs (EngineABCRunParams): @@ -1393,6 +1404,8 @@ def _run_patch_mode( dict | AnnotationStore | Path: - If output_type is "dict": returns predictions as a dictionary. - If output_type is "zarr": returns path to saved zarr file. + - If output_type is "qupath": returns a QuPath JSON + or path to .json file. - If output_type is "annotationstore": returns an AnnotationStore or path to .db file. @@ -1414,7 +1427,7 @@ def _run_patch_mode( ) raw_predictions = self.infer_patches( dataloader=self.dataloader, - return_coordinates=output_type == "annotationstore", + return_coordinates=output_type.lower() in ["annotationstore", "qupath"], ) processed_predictions = self.post_process_patches( @@ -1502,7 +1515,7 @@ def _run_wsi_mode( Args: output_type (str): Desired output format. Supported values are "dict", "zarr", - and "annotationstore". + "qupath" and "annotationstore". save_dir (Path): Directory to save the output files. **kwargs (EngineABCRunParams): @@ -1549,8 +1562,10 @@ def _run_wsi_mode( """ suffix = ".zarr" - if output_type == "AnnotationStore": + if output_type.lower() == "annotationstore": suffix = ".db" + if output_type.lower() == "qupath": + suffix = ".json" def get_path(image: Path | WSIReader) -> Path: """Return path to output file.""" @@ -1660,7 +1675,7 @@ def run( overwrite (bool): Whether to overwrite existing output files. Default is False. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". + Desired output format: "dict", "zarr", "qupath", or "annotationstore". **kwargs (EngineABCRunParams): Additional runtime parameters to update engine attributes. diff --git a/tiatoolbox/models/engine/multi_task_segmentor.py b/tiatoolbox/models/engine/multi_task_segmentor.py index c0885edac..935dee4e0 100644 --- a/tiatoolbox/models/engine/multi_task_segmentor.py +++ b/tiatoolbox/models/engine/multi_task_segmentor.py @@ -130,6 +130,9 @@ import torch import zarr from dask import delayed +from matplotlib import pyplot as plt +from shapely import Polygon +from shapely.geometry import mapping from shapely.geometry import shape as feature2geometry from shapely.strtree import STRtree from tqdm.auto import tqdm @@ -142,6 +145,7 @@ from tiatoolbox.utils.misc import ( create_smart_array, make_valid_poly, + save_qupath_json, tqdm_dask_progress_bar, update_tqdm_desc, ) @@ -1614,11 +1618,12 @@ def _save_predictions_as_dict_zarr( ) return save_path - def _save_predictions_as_annotationstore( + def _save_predictions_as_json_store( self: MultiTaskSegmentor, processed_predictions: dict, task_name: str | None = None, save_path: Path | None = None, + output_type: str = "annotationstore", **kwargs: Unpack[MultiTaskSegmentorRunParams], ) -> dict | AnnotationStore | Path | list[Path]: """Helper function to save predictions as annotationstore.""" @@ -1656,13 +1661,14 @@ def _save_predictions_as_annotationstore( if self.patch_mode: for idx, curr_image in enumerate(self.images): values = [processed_predictions[key][idx] for key in keys_to_compute] - output_path = _save_annotation_store( + predictions = dict(zip(keys_to_compute, values, strict=False)) + output_path = _save_annotation_json_store( curr_image=curr_image, - keys_to_compute=keys_to_compute, - values=values, + predictions=predictions, task_name=task_name, idx=idx, save_path=save_path, + output_type=output_type, class_dict=class_dict, scale_factor=scale_factor, num_workers=num_workers, @@ -1672,14 +1678,15 @@ def _save_predictions_as_annotationstore( else: values = [processed_predictions[key] for key in keys_to_compute] + predictions = dict(zip(keys_to_compute, values, strict=False)) save_paths = [ - _save_annotation_store( + _save_annotation_json_store( curr_image=save_path, - keys_to_compute=keys_to_compute, - values=values, + predictions=predictions, task_name=task_name, idx=0, save_path=save_path, + output_type=output_type, class_dict=class_dict, scale_factor=scale_factor, num_workers=num_workers, @@ -1693,7 +1700,7 @@ def _save_predictions_as_annotationstore( return_probabilities = kwargs.get("return_probabilities", False) if return_probabilities: msg = ( - f"Probability maps cannot be saved as AnnotationStore. " + f"Probability maps cannot be saved as AnnotationStore or JSON. " f"To visualise heatmaps in TIAToolbox Visualization tool," f"convert heatmaps in {save_path} to ome.tiff using" f"tiatoolbox.utils.misc.write_probability_heatmap_as_ome_tiff." @@ -1868,10 +1875,11 @@ def save_predictions( **processed_predictions[task_name], "coordinates": processed_predictions["coordinates"], } - out_path = self._save_predictions_as_annotationstore( + out_path = self._save_predictions_as_json_store( processed_predictions=dict_for_store, task_name=task_name, save_path=save_path, + output_type=output_type, **kwargs, ) save_paths += out_path @@ -1880,10 +1888,11 @@ def save_predictions( return save_paths - return self._save_predictions_as_annotationstore( + return self._save_predictions_as_json_store( processed_predictions=processed_predictions, task_name=None, save_path=save_path, + output_type=output_type, **kwargs, ) @@ -2643,20 +2652,20 @@ def _check_and_update_for_memory_overload( return canvas, count, canvas_zarr, count_zarr, tqdm_loop -def _save_annotation_store( +def _save_annotation_json_store( curr_image: Path | None, - keys_to_compute: list[str], - values: list[da.Array | list[da.Array]], + predictions: dict[str, da.Array | list[da.Array]], task_name: str, idx: int, save_path: Path, + output_type: str, class_dict: dict, scale_factor: tuple[float, float], num_workers: int, *, verbose: bool = True, ) -> Path: - """Helper function to save to annotation store.""" + """Helper function to save to QuPath JSON or Annotation store.""" if isinstance(curr_image, Path): store_file_name = ( f"{curr_image.stem}.db" @@ -2665,27 +2674,22 @@ def _save_annotation_store( ) else: store_file_name = f"{idx}.db" if task_name is None else f"{idx}_{task_name}.db" - predictions_ = dict(zip(keys_to_compute, values, strict=False)) - output_path = save_path.parent / store_file_name + suffix = ".json" if output_type.lower() == "qupath" else ".db" + output_path = (save_path.parent / store_file_name).with_suffix(suffix) # Patch mode indexes the "coordinates" while calculating "values" variable. origin = (0.0, 0.0) - _ = predictions_.pop("coordinates") - store = SQLiteStore() - store = dict_to_store( - store=store, - processed_predictions=predictions_, + _ = predictions.pop("coordinates") + return dict_to_json_store( + processed_predictions=predictions, class_dict=class_dict, scale_factor=scale_factor, origin=origin, num_workers=num_workers, verbose=verbose, + output_path=output_path, + output_type=output_type, ) - store.commit() - store.dump(output_path) - - return output_path - def _process_instance_predictions( inst_dict: dict, @@ -3119,17 +3123,18 @@ def _compute_info_dict_for_merge( ) -def dict_to_store( - store: SQLiteStore, +def dict_to_json_store( processed_predictions: dict, + output_path: Path, + output_type: str, class_dict: dict | None = None, origin: tuple[float, float] = (0, 0), scale_factor: tuple[float, float] = (1, 1), num_workers: int = multiprocessing.cpu_count(), *, verbose: bool = True, -) -> AnnotationStore: - """Write polygonal multitask predictions into an SQLite-backed AnnotationStore. +) -> Path: + """Write polygonal multitask predictions into an QuPath JSON or AnnotationStore. Converts a task dictionary (with per-object fields) into `Annotation` records, applying coordinate scaling and translation to move predictions into the slide's @@ -3148,12 +3153,14 @@ def dict_to_store( with list-like values aligned to `contours` length. Args: - store (SQLiteStore): - Target annotation store that will receive the converted annotations. processed_predictions (dict): Dictionary containing per-object fields. Must include `"contours"`; may include `"geom_type"` and any number of additional fields to be written as properties. + output_path (Path): + Path to save the output. + output_type (str): + Desired output format: "qupath" or "annotationstore". class_dict (dict | None): Optional mapping for the `"type"` field. When provided and when `"type"` is present in `processed_predictions`, each `"type"` value is @@ -3194,12 +3201,24 @@ def dict_to_store( for key, arr in processed_predictions.items() } contours = processed_predictions.pop("contours") - delayed_tasks = DaskDelayedAnnotationStore( + delayed_tasks = DaskDelayedJSONStore( contours=contours, processed_predictions=processed_predictions, ) - return delayed_tasks.compute_annotations( + if output_type.lower() == "qupath": + return delayed_tasks.compute_qupath_json( + class_dict=class_dict, + origin=origin, + scale_factor=scale_factor, + batch_size=100, + num_workers=num_workers, + verbose=verbose, + save_path=output_path.with_suffix(".json"), + ) + + store = SQLiteStore() + store = delayed_tasks.compute_annotations( store=store, class_dict=class_dict, origin=origin, @@ -3209,8 +3228,13 @@ def dict_to_store( verbose=verbose, ) + store.commit() + store.dump(output_path) + + return output_path + -class DaskDelayedAnnotationStore: +class DaskDelayedJSONStore: """Compute and write TIAToolbox annotations using batched Dask Delayed tasks. This class parallelizes annotation construction using Dask Delayed while @@ -3221,7 +3245,7 @@ class DaskDelayedAnnotationStore: """ def __init__( - self: DaskDelayedAnnotationStore, + self: DaskDelayedJSONStore, contours: np.ndarray, processed_predictions: dict, ) -> None: @@ -3244,7 +3268,7 @@ def __init__( self._processed_predictions = processed_predictions def _build_single_annotation( - self: DaskDelayedAnnotationStore, + self: DaskDelayedJSONStore, i: int, class_dict: dict[int, str] | None, origin: tuple[float, float], @@ -3298,8 +3322,102 @@ def _build_single_annotation( return Annotation(geom, properties) + def _build_single_qupath_feature( + self: DaskDelayedJSONStore, + i: int, + class_dict: dict | None, + origin: tuple[float, float], + scale_factor: tuple[float, float], + class_colors: dict, + ) -> dict: + """Build a single feature for index ``i``. + + This method performs: + - geometry creation + - coordinate scaling and translation + - per-object property extraction + - optional class label mapping + + Args: + i (int): + Index of the object to convert into an annotation. + + class_dict (dict[int, str] | None): + Optional mapping from integer class IDs to string labels. + If ``None``, raw integer class IDs are used. + + origin (tuple[float, float]): + Translation offset ``(x, y)`` applied after scaling. + + scale_factor (tuple[float, float]): + Scaling factors ``(sx, sy)`` applied to contour coordinates. + + class_colors (dict): + Maps classes to specific colors. + + Returns: + dict: + A fully constructed Feature dictionary instance for writing + to QuPath JSON. + + """ + contour = np.array(self._contours[i], dtype=float) + contour[:, 0] = contour[:, 0] * scale_factor[0] + origin[0] + contour[:, 1] = contour[:, 1] * scale_factor[1] + origin[1] + + poly = Polygon(contour) + poly_geo = mapping(poly) + + props = {} + class_value = None + class_name = None + + for key, arr in self._processed_predictions.items(): + value = arr[i].tolist() if hasattr(arr[i], "tolist") else arr[i] + + if key == "type": + # Handle None class name + if value is None: + # Assign default class 0 + class_value = 0 + class_name = class_dict.get(0, 0) + props["type"] = class_name + continue + + # Safe class lookup + if class_dict is not None and value in class_dict: + class_name = class_dict[value] + else: + # Already a name or no mapping available + class_name = value + + props["type"] = class_name + class_value = value + else: + if value is None: + continue + props[key] = np.array(value).tolist() + + # Classification block + if class_name is not None and class_value in class_colors: + color = class_colors[class_value] + props["classification"] = { + "name": class_name, + "color": color, + } + props["class_value"] = class_value + + return { + "type": "Feature", + "id": f"object_{i}", + "geometry": poly_geo, + "properties": props, + "objectType": "annotation", + "name": class_name if class_name is not None else "object", + } + def compute_annotations( - self: DaskDelayedAnnotationStore, + self: DaskDelayedJSONStore, store: SQLiteStore, class_dict: dict[int, str] | None, origin: tuple[float, float] = (0, 0), @@ -3377,3 +3495,86 @@ def compute_annotations( ) ) return store + + def compute_qupath_json( + self: DaskDelayedJSONStore, + class_dict: dict[int, str] | None, + origin: tuple[float, float] = (0, 0), + scale_factor: tuple[float, float] = (1, 1), + save_path: Path | None = None, + batch_size: int = 100, + num_workers: int = 0, + *, + verbose: bool = True, + ) -> Path: + """Compute annotations in batches and return/save QuPath JSON.""" + num_contours = len(self._contours) + features: list[dict] = [] + + if class_dict is None: + type_arr = self._processed_predictions.get("type") + + # Extract only valid class IDs/names + valid_ids = [v for v in type_arr if v is not None] + + if len(valid_ids) == 0: + # No class info at all → fallback + class_dict = {0: 0} + # Numeric class IDs + elif all(isinstance(v, (int, np.integer)) for v in valid_ids): + max_class = int(max(valid_ids)) + class_dict = {i: i for i in range(max_class + 1)} + else: + # Already class names + unique_names = sorted(set(valid_ids)) + class_dict = {name: name for name in unique_names} + + # Enumerate class_dict keys to assign stable integer color indices + class_keys = list(class_dict.keys()) + num_classes = len(class_keys) + cmap = plt.cm.get_cmap("tab20", num_classes) + + class_colors = { + key: [ + int(cmap(i)[0] * 255), + int(cmap(i)[1] * 255), + int(cmap(i)[2] * 255), + ] + for i, key in enumerate(class_keys) + } + + # Batch processing (mirrors compute_annotations) + for batch_id in tqdm( + range(0, num_contours, batch_size), + leave=False, + desc="Calculating QuPath features in batches.", + disable=not verbose, + ): + delayed_tasks = [ + delayed(self._build_single_qupath_feature)( + i, + class_dict, + origin, + scale_factor, + class_colors, + ) + for i in tqdm( + range(batch_id, min(batch_id + batch_size, num_contours)), + leave=False, + desc="Creating delayed tasks for QuPath JSON", + disable=not verbose, + ) + ] + + # Compute batch immediately + batch_features = tqdm_dask_progress_bar( + write_tasks=delayed_tasks, + desc="Computing QuPath features", + verbose=verbose, + num_workers=num_workers, + ) + features.extend(batch_features) + + qupath_json = {"type": "FeatureCollection", "features": features} + + return save_qupath_json(save_path=save_path, qupath_json=qupath_json) diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 65395482f..75afe3b36 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -53,6 +53,7 @@ import dask.array as da import numpy as np import zarr +from matplotlib import pyplot as plt from shapely.geometry import Point from tqdm.auto import tqdm @@ -62,7 +63,11 @@ SemanticSegmentor, SemanticSegmentorRunParams, ) -from tiatoolbox.utils.misc import tqdm_dask_progress_bar +from tiatoolbox.utils.misc import ( + save_annotations, + save_qupath_json, + tqdm_dask_progress_bar, +) if TYPE_CHECKING: # pragma: no cover import os @@ -212,7 +217,8 @@ class NucleusDetector(SemanticSegmentor): drop_keys (list): Keys to exclude from model output when saving results. output_type (str): - Output format (``"dict"``, ``"zarr"``, or ``"annotationstore"``). + Output format (``"dict"``, ``"zarr"``, ``"qupath"``, + or ``"annotationstore"``). Examples: >>> from tiatoolbox.models.engine.nucleus_detector import NucleusDetector @@ -520,7 +526,8 @@ class IDs. - ``"probabilities"`` (da.Array): detection probabilities. output_type (str): - Desired output format: ``"dict"``, ``"zarr"``, or ``"annotationstore"``. + Desired output format: ``"dict"``, ``"zarr"``, ``"qupath"`` + or ``"annotationstore"``. save_path (Path | None): Path at which to save the output file(s). Required for file outputs (e.g., Zarr or SQLite .db). If ``None`` and ``output_type="dict"``, @@ -576,6 +583,10 @@ class names. returns a Python dictionary of predictions. - If ``output_type="zarr"``: returns the path to the saved ``.zarr`` group. + - If ``output_type="qupath"``: + returns QuPath JSON or the path(s) to saved + ``.json`` file(s). In patch mode, a list of per-image paths + may be returned. - If ``output_type="annotationstore"``: returns an AnnotationStore handle or the path(s) to saved ``.db`` file(s). In patch mode, a list of per-image paths @@ -587,7 +598,7 @@ class names. TIAToolbox engines. """ - if output_type.lower() != "annotationstore": + if output_type.lower() not in ["qupath", "annotationstore"]: out = super().save_predictions( processed_predictions, output_type, @@ -602,11 +613,12 @@ class names. if class_dict is None: class_dict = self.model.output_class_dict - out = self._save_predictions_annotation_store( + out = self._save_predictions_qupath_json_annotations_db( processed_predictions, save_path=save_path, scale_factor=scale_factor, class_dict=class_dict, + output_type=output_type, ) # Remove cached centroid maps if wsi mode @@ -619,12 +631,13 @@ class names. return out - def _save_predictions_annotation_store( + def _save_predictions_qupath_json_annotations_db( self: NucleusDetector, processed_predictions: dict, save_path: Path | None = None, scale_factor: tuple[float, float] = (1.0, 1.0), class_dict: dict | None = None, + output_type: str = "annotationstore", ) -> AnnotationStore | Path | list[Path]: """Save nucleus detections to an AnnotationStore (.db). @@ -664,6 +677,8 @@ def _save_predictions_annotation_store( Scaling factors applied to x and y coordinates prior to writing. Typically corresponds to ``model_mpp / slide_mpp``. Defaults to ``(1.0, 1.0)``. + output_type (str): + Desired output format: ``"qupath"`` or ``"annotationstore"``. class_dict (dict or None): Optional mapping from original class IDs to class names or remapped IDs. If ``None``, an identity mapping based on present classes is used. @@ -686,11 +701,12 @@ def _save_predictions_annotation_store( save_paths = [] num_patches = len(processed_predictions["x"]) + suffix = ".json" if output_type == "qupath" else ".db" for i in range(num_patches): if isinstance(self.images[i], Path): - output_path = save_path.parent / (self.images[i].stem + ".db") + output_path = save_path.parent / (self.images[i].stem + suffix) else: - output_path = save_path.parent / (str(i) + ".db") + output_path = save_path.parent / (str(i) + suffix) detection_arrays = { "x": processed_predictions["x"][i], @@ -699,17 +715,34 @@ def _save_predictions_annotation_store( "probabilities": processed_predictions["probabilities"][i], } - out_file = self.save_detection_arrays_to_store( - detection_arrays=detection_arrays, - scale_factor=scale_factor, - class_dict=class_dict, - save_path=output_path, + out_file = ( + save_detection_arrays_to_qupath_json( + detection_arrays=detection_arrays, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=output_path, + ) + if output_type == "qupath" + else save_detection_arrays_to_store( + detection_arrays=detection_arrays, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=output_path, + ) ) save_paths.append(out_file) return save_paths - return self.save_detection_arrays_to_store( + if output_type == "qupath": + return save_detection_arrays_to_qupath_json( + detection_arrays=processed_predictions, + scale_factor=scale_factor, + save_path=save_path, + class_dict=class_dict, + ) + + return save_detection_arrays_to_store( detection_arrays=processed_predictions, scale_factor=scale_factor, save_path=save_path, @@ -825,186 +858,6 @@ class IDs for each detection (``np.uint32``). "probabilities": da.from_array(probs, chunks="auto"), } - @staticmethod - def _write_detection_arrays_to_store( - detection_arrays: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], - store: SQLiteStore, - scale_factor: tuple[float, float], - class_dict: dict[int, str | int] | None, - batch_size: int = 5000, - *, - verbose: bool = True, - ) -> int: - """Write detection arrays to an AnnotationStore in batches. - - Converts coordinate, class, and probability arrays into `Annotation` - objects and appends them to an SQLite-backed store in configurable - batch sizes. Coordinates are scaled to baseline slide resolution using - the provided `scale_factor`, and optional class-ID remapping is applied - via `class_dict`. - - Args: - detection_arrays (tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]): - Tuple of arrays in the order: - `(x_coords, y_coords, class_ids, probabilities)`. - Each element must be a 1-D NumPy array of equal length. - store (SQLiteStore): - Target `AnnotationStore` instance to receive the detections. - scale_factor (tuple[float, float]): - Factors applied to `(x, y)` coordinates prior to writing, - typically `(model_mpp / slide_mpp)`. The scaled coordinates are - rounded to `np.uint32`. - class_dict (dict[int, str | int] | None): - Optional mapping from original class IDs to names or remapped IDs. - If `None`, an identity mapping is used for the set of present classes. - batch_size (int): - Number of records to write per batch. Default is `5000`. - verbose (bool): - Whether to display logs and progress bar. - - Returns: - int: - Total number of detection records written to the store. - - Notes: - - Coordinates are scaled and rounded to integers to ensure consistent - geometry creation for `Annotation` points. - - Class mapping is applied per-record; unmapped IDs fall back to their - original values. - - Writing in batches reduces memory pressure and improves throughput - on large number of detections. - - """ - xs, ys, classes, probs = detection_arrays - n = len(xs) - if n == 0: - return 0 # nothing to write - - # scale coordinates - xs = np.rint(xs * scale_factor[0]).astype(np.uint32, copy=False) - ys = np.rint(ys * scale_factor[1]).astype(np.uint32, copy=False) - - # class mapping - if class_dict is None: - # identity over actually-present types - uniq = np.unique(classes) - class_dict = {int(k): int(k) for k in uniq} - labels = np.array( - [class_dict.get(int(k), int(k)) for k in classes], dtype=object - ) - - def make_points(xs_batch: np.ndarray, ys_batch: np.ndarray) -> list[Point]: - """Create Shapely Point geometries from coordinate arrays in batches.""" - return [ - Point(int(xx), int(yy)) - for xx, yy in zip(xs_batch, ys_batch, strict=True) - ] - - tqdm_loop = tqdm( - range(0, n, batch_size), - leave=False, - desc="Writing detections to store", - disable=not verbose, - ) - written = 0 - for i in tqdm_loop: - j = min(i + batch_size, n) - pts = make_points(xs[i:j], ys[i:j]) - - anns = [ - Annotation( - geometry=pt, properties={"type": lbl, "probability": float(pp)} - ) - for pt, lbl, pp in zip(pts, labels[i:j], probs[i:j], strict=True) - ] - store.append_many(anns) - written += j - i - return written - - @staticmethod - def save_detection_arrays_to_store( - detection_arrays: dict[str, da.Array | np.ndarray], - scale_factor: tuple[float, float] = (1.0, 1.0), - class_dict: dict | None = None, - save_path: Path | None = None, - batch_size: int = 5000, - ) -> Path | SQLiteStore: - """Write nucleus detection arrays to an SQLite-backed AnnotationStore. - - Converts the detection arrays into NumPy form, applies coordinate scaling - and optional class-ID remapping, and writes the results into an in-memory - SQLiteStore. If `save_path` is provided, the store is committed and saved - to disk as a `.db` file. This method provides a unified interface for - converting Dask-based detection outputs into persistent annotation storage. - - Args: - detection_arrays (dict[str, da.Array]): - A dictionary containing the detection fields: - - ``"x"``: dask array of x coordinates (``np.uint32``). - - ``"y"``: dask array of y coordinates (``np.uint32``). - - ``"classes"``: dask array of class IDs (``np.uint32``). - - ``"probabilities"``: dask array of detection scores (``np.float32``). - scale_factor (tuple[float, float], optional): - Multiplicative factors applied to the x and y coordinates before - saving. The scaled coordinates are rounded to integer pixel - locations. Defaults to ``(1.0, 1.0)``. - class_dict (dict or None): - Optional mapping of class IDs to class names or remapped IDs. - If ``None``, an identity mapping is used based on the detected - class IDs. - save_path (Path or None): - Destination path for saving the `.db` file. If ``None``, the - resulting SQLiteStore is returned in memory. If provided, the - parent directory is created if needed, and the final store is - written as ``save_path.with_suffix(".db")``. - batch_size (int): - Number of detection records to write per batch. Defaults to ``5000``. - - Returns: - Path or SQLiteStore: - - If `save_path` is provided: the path to the saved `.db` file. - - If `save_path` is ``None``: an in-memory `SQLiteStore` containing - all detections. - - Notes: - - The heavy lifting is delegated to - :meth:`NucleusDetector._write_detection_arrays_to_store`, - which performs coordinate scaling, class mapping, and batch writing. - - """ - xs = detection_arrays["x"] - ys = detection_arrays["y"] - classes = detection_arrays["classes"] - probs = detection_arrays["probabilities"] - - xs = np.atleast_1d(np.asarray(xs)) - ys = np.atleast_1d(np.asarray(ys)) - classes = np.atleast_1d(np.asarray(classes)) - probs = np.atleast_1d(np.asarray(probs)) - - if not len(xs) == len(ys) == len(classes) == len(probs): - msg = "Detection record lengths are misaligned." - raise ValueError(msg) - - store = SQLiteStore() - total_written = NucleusDetector._write_detection_arrays_to_store( - (xs, ys, classes, probs), - store, - scale_factor, - class_dict, - batch_size, - ) - logger.info("Total detections written to store: %s", total_written) - - if save_path: - save_path.parent.absolute().mkdir(parents=True, exist_ok=True) - save_path = save_path.parent.absolute() / (save_path.stem + ".db") - store.commit() - store.dump(save_path) - return save_path - - return store - def run( self: NucleusDetector, images: list[os.PathLike | Path | WSIReader | np.ndarray] | np.ndarray, @@ -1140,3 +993,285 @@ class names. output_type=output_type, **kwargs, ) + + +def save_detection_arrays_to_qupath_json( + detection_arrays: dict[str, da.Array], + scale_factor: tuple[float, float] = (1.0, 1.0), + class_dict: dict | None = None, + save_path: Path | None = None, +) -> dict | Path: + """Write nucleus detection arrays to QuPath JSON. + + Produces a FeatureCollection where each detection is represented as a + Point geometry with classification metadata and probability score. + + Args: + detection_arrays (dict[str, da.Array]): + A dictionary containing the detection fields: + - ``"x"``: dask array of x coordinates (``np.uint32``). + - ``"y"``: dask array of y coordinates (``np.uint32``). + - ``"classes"``: dask array of class IDs (``np.uint32``). + - ``"probabilities"``: dask array of detection scores (``np.float32``). + scale_factor (tuple[float, float], optional): + Multiplicative factors applied to the x and y coordinates before + saving. The scaled coordinates are rounded to integer pixel + locations. Defaults to ``(1.0, 1.0)``. + class_dict (dict or None): + Optional mapping of class IDs to class names or remapped IDs. + If ``None``, an identity mapping is used based on the detected + class IDs. + save_path (Path or None): + Destination path for saving the QuPath-compatible ``.json`` file. + If ``None``, an in-memory JSON-compatible representation of all + detections is returned instead of writing to disk. + + Returns: + Path or QuPath: + - If ``save_path`` is provided: the path to the saved ``.json`` file. + - If ``save_path`` is ``None``: an in-memory dict representing + QuPath JSON containing all detections. + + """ + xs, ys, classes, probs = _validate_detections_for_saving_to_json( + detection_arrays=detection_arrays, + ) + + # Determine class dictionary + unique_classes = np.unique(classes).tolist() + if class_dict is None: + class_dict = {int(i): int(i) for i in unique_classes} + + # Color map for classes + num_classes = len(class_dict) + cmap = plt.cm.get_cmap("tab20", num_classes) + class_colors = { + class_idx: [ + int(cmap(class_idx)[0] * 255), + int(cmap(class_idx)[1] * 255), + int(cmap(class_idx)[2] * 255), + ] + for class_idx in class_dict + } + + features: list[dict] = [] + + for i, _ in enumerate(xs): + # Scale coordinates + x = float(xs[i]) * scale_factor[0] + y = float(ys[i]) * scale_factor[1] + + class_id = int(classes[i]) + class_label = class_dict.get(class_id, class_id) + prob = float(probs[i]) + + # QuPath point geometry + point_geo = { + "type": "Point", + "coordinates": [x, y], + } + + feature = { + "type": "Feature", + "id": f"detection_{i}", + "geometry": point_geo, + "properties": { + "classification": { + "name": class_label, + "color": class_colors[class_id], + }, + "probability": prob, + }, + "objectType": "detection", + "name": class_label, + "class_value": class_id, + } + + features.append(feature) + + qupath_json = {"type": "FeatureCollection", "features": features} + + if save_path: + return save_qupath_json(save_path=save_path, qupath_json=qupath_json) + + return qupath_json + + +def save_detection_arrays_to_store( + detection_arrays: dict[str, da.Array], + scale_factor: tuple[float, float] = (1.0, 1.0), + class_dict: dict | None = None, + save_path: Path | None = None, + batch_size: int = 5000, +) -> Path | SQLiteStore: + """Write nucleus detection arrays to an SQLite-backed AnnotationStore. + + Converts the detection arrays into NumPy form, applies coordinate scaling + and optional class-ID remapping, and writes the results into an in-memory + SQLiteStore. If `save_path` is provided, the store is committed and saved + to disk as a `.db` file. This method provides a unified interface for + converting Dask-based detection outputs into persistent annotation storage. + + Args: + detection_arrays (dict[str, da.Array]): + A dictionary containing the detection fields: + - ``"x"``: dask array of x coordinates (``np.uint32``). + - ``"y"``: dask array of y coordinates (``np.uint32``). + - ``"classes"``: dask array of class IDs (``np.uint32``). + - ``"probabilities"``: dask array of detection scores (``np.float32``). + scale_factor (tuple[float, float], optional): + Multiplicative factors applied to the x and y coordinates before + saving. The scaled coordinates are rounded to integer pixel + locations. Defaults to ``(1.0, 1.0)``. + class_dict (dict or None): + Optional mapping of class IDs to class names or remapped IDs. + If ``None``, an identity mapping is used based on the detected + class IDs. + save_path (Path or None): + Destination path for saving the `.db` file. If ``None``, the + resulting SQLiteStore is returned in memory. If provided, the + parent directory is created if needed, and the final store is + written as ``save_path.with_suffix(".db")``. + batch_size (int): + Number of detection records to write per batch. Defaults to ``5000``. + + Returns: + Path or SQLiteStore: + - If `save_path` is provided: the path to the saved `.db` file. + - If `save_path` is ``None``: an in-memory `SQLiteStore` containing + all detections. + + Notes: + - The heavy lifting is delegated to + :meth:`_write_detection_arrays_to_store`, + which performs coordinate scaling, class mapping, and batch writing. + + """ + xs, ys, classes, probs = _validate_detections_for_saving_to_json( + detection_arrays=detection_arrays, + ) + + store = SQLiteStore() + total_written = _write_detection_arrays_to_store( + detection_arrays=(xs, ys, classes, probs), + store=store, + scale_factor=scale_factor, + class_dict=class_dict, + batch_size=batch_size, + ) + logger.info("Total detections written to store: %s", total_written) + + if save_path: + return save_annotations( + save_path=save_path, + store=store, + ) + + return store + + +def _validate_detections_for_saving_to_json( + detection_arrays: dict[str, da.Array], +) -> tuple: + """Validates x, y, classes and probs for writing to QuPath or AnnotationStore.""" + xs = np.atleast_1d(np.asarray(detection_arrays["x"])) + ys = np.atleast_1d(np.asarray(detection_arrays["y"])) + classes = np.atleast_1d(np.asarray(detection_arrays["classes"])) + probs = np.atleast_1d(np.asarray(detection_arrays["probabilities"])) + + if not len(xs) == len(ys) == len(classes) == len(probs): + msg = "Detection record lengths are misaligned." + raise ValueError(msg) + + return xs, ys, classes, probs + + +def _write_detection_arrays_to_store( + detection_arrays: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], + store: SQLiteStore, + scale_factor: tuple[float, float], + class_dict: dict[int, str | int] | None, + batch_size: int = 5000, + *, + verbose: bool = True, +) -> int: + """Write detection arrays to an AnnotationStore in batches. + + Converts coordinate, class, and probability arrays into `Annotation` + objects and appends them to an SQLite-backed store in configurable + batch sizes. Coordinates are scaled to baseline slide resolution using + the provided `scale_factor`, and optional class-ID remapping is applied + via `class_dict`. + + Args: + detection_arrays (tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]): + Tuple of arrays in the order: + `(x_coords, y_coords, class_ids, probabilities)`. + Each element must be a 1-D NumPy array of equal length. + store (SQLiteStore): + Target `AnnotationStore` instance to receive the detections. + scale_factor (tuple[float, float]): + Factors applied to `(x, y)` coordinates prior to writing, + typically `(model_mpp / slide_mpp)`. The scaled coordinates are + rounded to `np.uint32`. + class_dict (dict[int, str | int] | None): + Optional mapping from original class IDs to names or remapped IDs. + If `None`, an identity mapping is used for the set of present classes. + batch_size (int): + Number of records to write per batch. Default is `5000`. + verbose (bool): + Whether to display logs and progress bar. + + Returns: + int: + Total number of detection records written to the store. + + Notes: + - Coordinates are scaled and rounded to integers to ensure consistent + geometry creation for `Annotation` points. + - Class mapping is applied per-record; unmapped IDs fall back to their + original values. + - Writing in batches reduces memory pressure and improves throughput + on large number of detections. + + """ + xs, ys, classes, probs = detection_arrays + n = len(xs) + if n == 0: + return 0 # nothing to write + + # scale coordinates + xs = np.rint(xs * scale_factor[0]).astype(np.uint32, copy=False) + ys = np.rint(ys * scale_factor[1]).astype(np.uint32, copy=False) + + # class mapping + if class_dict is None: + # identity over actually-present types + uniq = np.unique(classes) + class_dict = {int(k): int(k) for k in uniq} + labels = np.array([class_dict.get(int(k), int(k)) for k in classes], dtype=object) + + def make_points(xs_batch: np.ndarray, ys_batch: np.ndarray) -> list[Point]: + """Create Shapely Point geometries from coordinate arrays in batches.""" + return [ + Point(int(xx), int(yy)) for xx, yy in zip(xs_batch, ys_batch, strict=True) + ] + + tqdm_loop = tqdm( + range(0, n, batch_size), + leave=False, + desc="Writing detections to store", + disable=not verbose, + ) + written = 0 + for i in tqdm_loop: + j = min(i + batch_size, n) + pts = make_points(xs[i:j], ys[i:j]) + + anns = [ + Annotation(geometry=pt, properties={"type": lbl, "probability": float(pp)}) + for pt, lbl, pp in zip(pts, labels[i:j], probs[i:j], strict=True) + ] + store.append_many(anns) + written += j - i + return written diff --git a/tiatoolbox/models/engine/nucleus_instance_segmentor.py b/tiatoolbox/models/engine/nucleus_instance_segmentor.py index 4b2d7bb63..9369707c5 100644 --- a/tiatoolbox/models/engine/nucleus_instance_segmentor.py +++ b/tiatoolbox/models/engine/nucleus_instance_segmentor.py @@ -124,7 +124,7 @@ class NucleusInstanceSegmentor(MultiTaskSegmentor): """ def __init__( - self: MultiTaskSegmentor, + self: NucleusInstanceSegmentor, model: str | ModelABC, batch_size: int = 8, num_workers: int = 0, diff --git a/tiatoolbox/models/engine/patch_predictor.py b/tiatoolbox/models/engine/patch_predictor.py index c2e71325d..a64a07945 100644 --- a/tiatoolbox/models/engine/patch_predictor.py +++ b/tiatoolbox/models/engine/patch_predictor.py @@ -241,7 +241,7 @@ class PatchPredictor(EngineABC): drop_keys (list): Keys to exclude from model output. output_type (str): - Format of output ("dict", "zarr", "annotationstore"). + Format of output ("dict", "zarr", "qupath", "annotationstore"). Example: >>> # list of 2 image patches as input @@ -479,7 +479,8 @@ def _update_run_params( ioconfig (IOPatchPredictorConfig | None): IO configuration for patch extraction and resolution. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". + Desired output format: "dict", "zarr", "qupath" + or "annotationstore". overwrite (bool): Whether to overwrite existing output files. Default is False. patch_mode (bool): @@ -589,8 +590,8 @@ def run( overwrite (bool): Whether to overwrite existing output files. Default is False. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". - Default value is "zarr". + Desired output format: "dict", "zarr", "qupath" + or "annotationstore". Default value is "zarr". **kwargs (PredictorRunParams): Additional runtime parameters to configure prediction. diff --git a/tiatoolbox/models/engine/semantic_segmentor.py b/tiatoolbox/models/engine/semantic_segmentor.py index 56cbc4c2c..02f9d4fe6 100644 --- a/tiatoolbox/models/engine/semantic_segmentor.py +++ b/tiatoolbox/models/engine/semantic_segmentor.py @@ -232,7 +232,7 @@ class SemanticSegmentor(PatchPredictor): drop_keys (list): Keys to exclude from model output. output_type (str): - Format of output ("dict", "zarr", "annotationstore"). + Format of output ("dict", "zarr", "qupath", "annotationstore"). output_locations (list | None): Coordinates of output patches used during WSI processing. @@ -620,9 +620,10 @@ def save_predictions( processed_predictions (dict): Dictionary containing processed model predictions. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". + Desired output format: "dict", "zarr", "qupath" or "annotationstore". save_path (Path | None): - Path to save the output file. Required for "zarr" and "annotationstore". + Path to save the output file. Required for "zarr", "qupath" + and "annotationstore". **kwargs (SemanticSegmentorRunParams): Additional runtime parameters to configure segmentation. @@ -666,12 +667,14 @@ def save_predictions( dict | AnnotationStore | Path | list[Path]: - If output_type is "dict": returns predictions as a dictionary. - If output_type is "zarr": returns path to saved Zarr file. + - If output_type is "qupath": returns QuPath JSON + or path or list of paths to .json file. - If output_type is "annotationstore": returns AnnotationStore or path or list of paths to .db file. """ # Conversion to annotationstore uses a different function for SemanticSegmentor - if output_type.lower() != "annotationstore": + if output_type.lower() not in ["qupath", "annotationstore"]: return super().save_predictions( processed_predictions, output_type, save_path=save_path, **kwargs ) @@ -701,17 +704,20 @@ def save_predictions( # Need to add support for zarr conversion. save_paths = [] - logger.info("Saving predictions as AnnotationStore.") + suffix = ".json" if output_type.lower() == "qupath" else ".db" + msg = f"Saving predictions as {output_type} in {suffix} format." + logger.info(msg) if self.patch_mode: for i, predictions in enumerate(processed_predictions["predictions"]): if isinstance(self.images[i], Path): - output_path = save_path.parent / (self.images[i].stem + ".db") + output_path = save_path.parent / (self.images[i].stem + suffix) else: - output_path = save_path.parent / (str(i) + ".db") + output_path = save_path.parent / (str(i) + suffix) out_file = dict_to_store_semantic_segmentor( patch_output={"predictions": predictions}, scale_factor=scale_factor, + output_type=output_type, class_dict=class_dict, save_path=output_path, verbose=self.verbose, @@ -722,15 +728,16 @@ def save_predictions( out_file = dict_to_store_semantic_segmentor( patch_output=processed_predictions, scale_factor=scale_factor, + output_type=output_type, class_dict=class_dict, - save_path=save_path.with_suffix(".db"), + save_path=save_path.with_suffix(suffix), verbose=self.verbose, ) save_paths = out_file if return_probabilities: msg = ( - f"Probability maps cannot be saved as AnnotationStore. " + f"Probability maps cannot be saved as AnnotationStore or JSON. " f"To visualise heatmaps in TIAToolbox Visualization tool," f"convert heatmaps in {save_path} to ome.tiff using" f"tiatoolbox.utils.misc.write_probability_heatmap_as_ome_tiff." @@ -779,7 +786,8 @@ def _update_run_params( ioconfig (ModelIOConfigABC | None): IO configuration for patch extraction and resolution. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". + Desired output format: "dict", "zarr", "qupath", + or "annotationstore". overwrite (bool): Whether to overwrite existing output files. Default is False. patch_mode (bool): @@ -895,8 +903,8 @@ def run( overwrite (bool): Whether to overwrite existing output files. Default is False. output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". Default - is "dict". + Desired output format: "dict", "zarr", "qupath", + or "annotationstore". Default is "dict". **kwargs (SemanticSegmentorRunParams): Additional runtime parameters to configure segmentation. diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index 71d0cf3af..22b6f3e4b 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -14,6 +14,7 @@ import cv2 import dask.array as da import joblib +import matplotlib.pyplot as plt import numpy as np import pandas as pd import psutil @@ -24,11 +25,10 @@ from dask import compute from filelock import FileLock from shapely.affinity import translate -from shapely.geometry import Polygon +from shapely.geometry import Polygon, mapping from shapely.geometry import shape as feature2geometry from skimage import exposure -from tqdm import trange -from tqdm.auto import tqdm +from tqdm.auto import tqdm, trange from tqdm.dask import TqdmCallback from tiatoolbox import logger @@ -1216,7 +1216,7 @@ def patch_predictions_as_annotations( keys: list, class_dict: dict, class_probs: list | np.ndarray, - patch_coords: list, + patch_coords: list | np.ndarray, classes_predicted: list, labels: list, *, @@ -1247,6 +1247,60 @@ def patch_predictions_as_annotations( return annotations +def patch_predictions_as_qupath_json( + preds: list | np.ndarray, + class_dict: dict, + patch_coords: list | np.ndarray, + *, + verbose: bool = True, +) -> dict: + """Helper function to generate QuPath JSON per patch predictions.""" + features = [] + # pick a color for each class based on the class index, using a colormap + num_classes = len(class_dict) + cmap = plt.cm.get_cmap("tab20", num_classes) + class_colours = { + class_idx: [ + int(cmap(class_idx)[0] * 255), + int(cmap(class_idx)[1] * 255), + int(cmap(class_idx)[2] * 255), + ] + for class_idx in class_dict + } + + tqdm_loop = tqdm( + range(np.asarray(patch_coords).shape[0]), + leave=False, + desc="Converting outputs to QuPath JSON.", + disable=not verbose, + ) + + for i in tqdm_loop: + class_idx = int(preds[i]) + class_name = class_dict[class_idx] + polygon_geo = Polygon.from_bounds(*patch_coords[i]) + polygon_feat = mapping(polygon_geo) + + feature = { + "type": "Feature", + "id": f"patch_{i}", + "geometry": polygon_feat, + "properties": { + "classification": { + "name": class_name, + "color": class_colours[class_idx], + } + }, + "objectType": "annotation", + "name": class_name, + "class_value": class_idx, + } + + features.append(feature) + + return {"type": "FeatureCollection", "features": features} + + def get_zarr_array(zarr_array: zarr.core.Array | np.ndarray | list) -> np.ndarray: """Converts a zarr array into a numpy array.""" if isinstance(zarr_array, zarr.core.Array): @@ -1363,11 +1417,12 @@ def process_contours( def dict_to_store_semantic_segmentor( patch_output: dict | zarr.Group, scale_factor: tuple[float, float], + output_type: str, class_dict: dict | None = None, save_path: Path | None = None, *, verbose: bool = True, -) -> AnnotationStore | Path: +) -> AnnotationStore | dict | Path: """Converts output of TIAToolbox SemanticSegmentor engine to AnnotationStore. Args: @@ -1378,6 +1433,9 @@ def dict_to_store_semantic_segmentor( annotations. All coordinates will be multiplied by this factor to allow conversion of annotations saved at non-baseline resolution to baseline. Should be model_mpp/slide_mpp. + output_type (str): + "annotationstore" → return AnnotationStore + "qupath" → return QuPath JSON dict class_dict (dict): Optional dictionary mapping class indices to class names. save_path (str or Path): @@ -1398,11 +1456,121 @@ def dict_to_store_semantic_segmentor( # Get the number of unique predictions layer_list = da.unique(preds).compute() - store = SQLiteStore() - if class_dict is None: class_dict = {int(i): int(i) for i in layer_list.tolist()} + if output_type.lower() == "qupath": + return _semantic_segmentations_as_qupath_json( + layer_list=layer_list, + preds=preds, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=save_path, + verbose=verbose, + ) + + return _semantic_segmentations_as_annotations( + layer_list=layer_list, + preds=preds, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=save_path, + verbose=verbose, + ) + + +def _semantic_segmentations_as_qupath_json( + layer_list: list, + preds: da.Array, + scale_factor: tuple[float, float], + class_dict: dict, + save_path: Path | None = None, + *, + verbose: bool = True, +) -> dict | Path: + """Helper function to save semantic segmentation as QuPath json.""" + features: list = [] + + # color map for classes + num_classes = len(class_dict) + cmap = plt.cm.get_cmap("tab20", num_classes) + class_colours = { + class_idx: [ + int(cmap(class_idx)[0] * 255), + int(cmap(class_idx)[1] * 255), + int(cmap(class_idx)[2] * 255), + ] + for class_idx in class_dict + } + + tqdm_loop = tqdm( + layer_list, + leave=False, + desc="Converting outputs to QuPath JSON.", + disable=not verbose, + ) + + for type_class in tqdm_loop: + class_id = int(type_class) + class_label = class_dict[class_id] + + # binary mask for this class + layer = da.where(preds == type_class, 1, 0).astype("uint8").compute() + + contours, _ = cv2.findContours(layer, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) + + contours = cast("list[np.ndarray]", contours) + + # Convert contours to polygons + for cnt in contours: + if cnt.shape[0] < 3: # noqa: PLR2004 + continue + + # scale coordinates + cnt_scaled: np.ndarray = cnt.squeeze(1).astype(float) + cnt_scaled[:, 0] *= scale_factor[0] + cnt_scaled[:, 1] *= scale_factor[1] + + poly = Polygon(cnt_scaled) + poly_geo = mapping(poly) + + feature = { + "type": "Feature", + "geometry": poly_geo, + "id": f"class_{class_id}_{len(features)}", + "properties": { + "classification": { + "name": class_label, + "color": class_colours[class_id], + } + }, + "objectType": "annotation", + "name": class_label, + "class_value": class_id, + } + + features.append(feature) + + qupath_json = {"type": "FeatureCollection", "features": features} + + # if a save directory is provided, then dump JSON into a file + if save_path: + return save_qupath_json(save_path=save_path, qupath_json=qupath_json) + + return qupath_json + + +def _semantic_segmentations_as_annotations( + layer_list: list, + preds: da.Array, + scale_factor: tuple[float, float], + class_dict: dict, + save_path: Path | None = None, + *, + verbose: bool = True, +) -> AnnotationStore | Path: + """Helper function to save semantic segmentation as annotations.""" + store = SQLiteStore() annotations_list: list[Annotation] = [] tqdm_loop = tqdm( @@ -1433,28 +1601,46 @@ def dict_to_store_semantic_segmentor( annotations_list, [str(i) for i in range(len(annotations_list))] ) - # # if a save director is provided, then dump store into a file + # # if a save directory is provided, then dump store into a file if save_path: - # ensure parent directory exists - save_path.parent.absolute().mkdir(parents=True, exist_ok=True) - # ensure proper db extension - save_path = save_path.parent.absolute() / (save_path.stem + ".db") - store.commit() - store.dump(save_path) - return save_path + return save_annotations( + save_path=save_path, + store=store, + ) return store +def save_annotations( + save_path: Path, + store: AnnotationStore, +) -> Path: + """Saves Annotation Store to disk.""" + # ensure proper db extension + save_path = save_path.parent.absolute() / (save_path.stem + ".db") + store.commit() + store.dump(save_path) + return save_path + + +def save_qupath_json(save_path: Path, qupath_json: dict) -> Path: + """Saves QuPath JSON to disk.""" + save_path = save_path.with_suffix(".json") + with Path.open(save_path, "w") as f: + json.dump(qupath_json, f, indent=2) + return save_path + + def dict_to_store_patch_predictions( patch_output: dict | zarr.group, scale_factor: tuple[float, float], class_dict: dict | None = None, save_path: Path | None = None, + output_type: str = "AnnotationStore", *, verbose: bool = True, -) -> AnnotationStore | Path: - """Converts output of TIAToolbox PatchPredictor engine to AnnotationStore. +) -> AnnotationStore | dict | Path: + """Converts output of the PatchPredictor engine to AnnotationStore or QuPath json. Args: patch_output (dict | zarr.Group): @@ -1470,6 +1656,9 @@ def dict_to_store_patch_predictions( save_path (str or Path): Optional Output directory to save the Annotation Store results. + output_type (str): + "AnnotationStore" → return AnnotationStore + "QuPath" → return QuPath JSON dict verbose (bool): Whether to display logs and progress bar. @@ -1488,13 +1677,15 @@ def dict_to_store_patch_predictions( # get relevant keys class_probs = get_zarr_array(patch_output.get("probabilities", [])) preds = get_zarr_array(patch_output.get("predictions", [])) - patch_coords = np.array(patch_output.get("coordinates", [])) + + # Scale coordinates if not np.all(np.array(scale_factor) == 1): patch_coords = patch_coords * (np.tile(scale_factor, 2)) # to baseline mpp labels = patch_output.get("labels", []) - # get classes to consider + + # Determine classes if len(class_probs) == 0: classes_predicted = np.unique(preds).tolist() else: @@ -1507,12 +1698,25 @@ def dict_to_store_patch_predictions( else: class_dict = {i: i for i in range(len(class_probs[0]))} - # find what keys we need to save + # Keys to save keys = ["predictions"] keys = keys + [key for key in ["probabilities", "labels"] if key in patch_output] + if output_type.lower() == "qupath": + qupath_json = patch_predictions_as_qupath_json( + preds=preds, + class_dict=class_dict, + patch_coords=patch_coords, + verbose=verbose, + ) + + if save_path: + return save_qupath_json(save_path=save_path, qupath_json=qupath_json) + + return qupath_json + # put patch predictions into a store - annotations = patch_predictions_as_annotations( + annotations_ = patch_predictions_as_annotations( preds.astype(float), keys, class_dict, @@ -1524,16 +1728,14 @@ def dict_to_store_patch_predictions( ) store = SQLiteStore() - _ = store.append_many(annotations, [str(i) for i in range(len(annotations))]) + _ = store.append_many(annotations_, [str(i) for i in range(len(annotations_))]) # if a save director is provided, then dump store into a file if save_path: - # ensure parent directory exists - save_path.parent.absolute().mkdir(parents=True, exist_ok=True) - # ensure proper db extension - save_path = save_path.parent.absolute() / (save_path.stem + ".db") - store.dump(save_path) - return save_path + return save_annotations( + save_path=save_path, + store=store, + ) return store