Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 104 additions & 100 deletions examples/11-import-foundation-models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"id": "MR3BWBayNg_K",
"tags": [
"remove-cell"
"hide-output"
]
},
"outputs": [],
Expand Down Expand Up @@ -119,19 +119,16 @@
"import warnings\n",
"from pathlib import Path\n",
"\n",
"import dask.array as da\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import umap\n",
"from huggingface_hub import notebook_login\n",
"import zarr\n",
"from huggingface_hub import hf_hub_download, notebook_login\n",
"\n",
"from tiatoolbox import logger\n",
"from tiatoolbox.models.architecture.vanilla import TimmBackbone\n",
"from tiatoolbox.models.engine.semantic_segmentor import (\n",
" DeepFeatureExtractor,\n",
" IOSegmentorConfig,\n",
")\n",
"from tiatoolbox.utils.misc import download_data\n",
"from tiatoolbox.models.engine.deep_feature_extractor import DeepFeatureExtractor\n",
"from tiatoolbox.models.engine.io_config import IOPatchPredictorConfig\n",
"from tiatoolbox.wsicore.wsireader import WSIReader\n",
"\n",
"# Configure logging and warnings\n",
Expand Down Expand Up @@ -182,9 +179,20 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "kd_rQ3f_ZrxG"
"id": "kd_rQ3f_ZrxG",
"tags": [
"hide-output"
]
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"deleting tmp directory\n"
]
}
],
"source": [
"![ -d tmp ] && ( echo \"deleting tmp directory\"; rm -rf tmp )"
]
Expand All @@ -207,39 +215,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BjGYV4HO-vQe",
"outputId": "caaeee6c-b455-4a01-ef8c-ed60824f78d3"
"tags": [
"hide-output"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|2024-12-02|14:52:58.760| [INFO] Download has started. Please wait...\n",
"|2024-12-02|14:52:58.763| [INFO] Download is complete.\n"
]
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "82e5cd5df7484f0f8d4bde918bdc1a30",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"sample_wsis/TCGA-3L-AA1B-01Z-00-DX1.8923(…): 0%| | 0.00/1.23G [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"global_save_dir = Path(\"tmp/\")\n",
"\n",
"# File name of WSI\n",
"wsi_path = global_save_dir / \"sample_wsi.svs\"\n",
"\n",
"logger.info(\"Download has started. Please wait...\")\n",
"\n",
"# Downloading and unzip a sample whole-slide image\n",
"download_data(\n",
" \"https://tiatoolbox.dcs.warwick.ac.uk/sample_wsis/TCGA-3L-AA1B-01Z-00-DX1.8923A151-A690-40B7-9E5A-FCBEDFC2394F.svs\",\n",
" wsi_path,\n",
")\n",
"\n",
"logger.info(\"Download is complete.\")"
"global_save_dir = \"./tmp\"\n",
"if not Path(global_save_dir).exists():\n",
" Path(global_save_dir).mkdir(exist_ok=True)\n",
"\n",
"# Downloading sample image tile\n",
"wsi_path = hf_hub_download(\n",
" repo_id=\"TIACentre/TIAToolBox_Remote_Samples\",\n",
" filename=\"sample_wsis/TCGA-3L-AA1B-01Z-00-DX1.8923A151-A690-40B7-9E5A-FCBEDFC2394F.svs\",\n",
" repo_type=\"dataset\",\n",
" local_dir=global_save_dir,\n",
")"
]
},
{
Expand All @@ -258,31 +267,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "46ae7f18dc944c92bd04632b5369844a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Token has not been saved to git credential helper.\n"
]
}
],
"outputs": [],
"source": [
"notebook_login()"
]
Expand All @@ -291,80 +278,96 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we create the model using pre-trained network architectures. FFor other models available in the `timm` library, such as computational pathology-specific foundation models, use `TimmBackbone` (e.g. EfficientNet, UNI, Prov-GigaPath, H-optimus-0). For standard CNN model architectures available in PyTorch (e.g., AlexNet, ResNet, DenseNet, Inception), use `CNNBackbone`.\n",
"Next, we create the model using pre-trained network architectures. For other models available in the `timm` library, such as computational pathology-specific foundation models, use `TimmBackbone` (e.g., `\"EfficientNet\"`, `\"UNI\"`, `\"Prov-GigaPath\"`, `\"H-optimus-0\"`). For standard CNN model architectures available in PyTorch (e.g., `\"AlexNet\"`, `\"ResNet\"`, `\"DenseNet\"`, `\"Inception\"`), use `CNNBackbone`.\n",
"\n",
"In the example below, we use the `UNI` model. However, this can be changed to other computational pathology-specific foundation models by modifying the `backbone` argument to `prov-gigapath` or `H-optimus-0`. When using foundation models, please ensure to cite the corresponding paper and follow the specific access requirements. Certain models require users to link their GitHub and HuggingFace accounts and have their model access request accepted, subject to certain conditions, such as for [UNI](https://huggingface.co/MahmoodLab/UNI) and [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath). Other models, such as [H-optimius-0](https://huggingface.co/bioptimus/H-optimus-0), have no such requirements.\n",
"In the example below, we use the `UNI` model. However, this can be changed to other computational pathology-specific foundation models by modifying the `model` argument to `prov-gigapath` or `H-optimus-0`. When using foundation models, please ensure to cite the corresponding paper and follow the specific access requirements. Certain models require users to link their GitHub and HuggingFace accounts and have their model access request accepted, subject to certain conditions, such as for [UNI](https://huggingface.co/MahmoodLab/UNI) and [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath). Other models, such as [H-optimius-0](https://huggingface.co/bioptimus/H-optimus-0), have no such requirements.\n",
"\n",
"We also provide an `IOSegmentorConfig` specifying the input/output patch shape and resolution for processing and saving the output.\n",
"We also provide an `IOPatchPredictorConfig` specifying the input/output patch shape and resolution for processing and saving the output.\n",
"\n",
"Finally, we use the `DeepFeatureExtractor` to extract these deep features, per patch, from the WSI. A mask is automatically generated to guide the patch extraction process and ignore the background.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"execution_count": 7,
"metadata": {
"tags": [
"hide-output"
]
},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"|2024-12-02|14:53:32.136| [INFO] Loading pretrained weights from Hugging Face hub (MahmoodLab/UNI)\n"
"GPU is not compatible with torch.compile. Compatible GPUs include NVIDIA V100, A100, and H100. Speedup numbers may be lower than expected.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dbe5c169afeb4958a023678cd579a804",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing WSIs: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"|2024-12-02|14:53:32.984| [WARNING] GPU is not compatible with torch.compile. Compatible GPUs include NVIDIA V100, A100, and H100. Speedup numbers may be lower than expected.\n",
"|2024-12-02|14:53:33.419| [WARNING] Read: Scale > 1.This means that the desired resolution is higher than the WSI baseline (maximum encoded resolution). Interpolation of read regions may occur.\n",
"Process Batch: 100%|##########################| 630/630 [05:18<00:00, 1.98it/s]"
"Read: Scale > 1.This means that the desired resolution is higher than the WSI baseline (maximum encoded resolution). Interpolation of read regions may occur.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"|2024-12-02|14:58:52.845| [INFO] Finish: 0\n",
"|2024-12-02|14:58:52.846| [INFO] --Input: tmp/sample_wsi.svs\n",
"|2024-12-02|14:58:52.847| [INFO] --Output: /newdata/u1973415/TIAToolbox/tiatoolbox/examples/tmp/wsi_features/0\n"
]
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4de0cad982a246fa916a36b4d5556070",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Inferring patches: 0%| | 0/629 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
"[########################################] | 100% Completed | 1.64 sms\n"
]
}
],
"source": [
"model = TimmBackbone(backbone=\"UNI\", pretrained=True)\n",
"\n",
"wsi_ioconfig = IOSegmentorConfig(\n",
"wsi_ioconfig = IOPatchPredictorConfig(\n",
" input_resolutions=[{\"units\": \"mpp\", \"resolution\": 0.5}],\n",
" patch_input_shape=[224, 224],\n",
" output_resolutions=[{\"units\": \"mpp\", \"resolution\": 0.5}],\n",
" patch_output_shape=[224, 224],\n",
" stride_shape=[224, 224],\n",
")\n",
"\n",
"# create the feature extractor and run it on the WSI\n",
"extractor = DeepFeatureExtractor(\n",
" model=model,\n",
" auto_generate_mask=True,\n",
" model=\"UNI\",\n",
" batch_size=32,\n",
" num_loader_workers=4,\n",
" num_postproc_workers=4,\n",
" num_workers=4,\n",
")\n",
"\n",
"out = extractor.predict(\n",
" imgs=[wsi_path],\n",
" mode=\"wsi\",\n",
"out = extractor.run(\n",
" images=[wsi_path],\n",
" ioconfig=wsi_ioconfig,\n",
" save_dir=global_save_dir / \"wsi_features\",\n",
" patch_mode=False,\n",
" save_dir=Path(global_save_dir) / \"wsi_features\",\n",
" device=device,\n",
" output_type=\"zarr\",\n",
")"
]
},
Expand All @@ -382,7 +385,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -391,7 +394,7 @@
"Text(0.5, 1.0, 'UMAP feature embedding')"
]
},
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -434,8 +437,9 @@
"\n",
"\n",
"# load the features output by our feature extractor\n",
"pos = np.load(global_save_dir / \"wsi_features\" / \"0.position.npy\")\n",
"feats = np.load(global_save_dir / \"wsi_features\" / \"0.features.0.npy\")\n",
"zarr_features = zarr.open(out[Path(wsi_path)])\n",
"pos = da.from_zarr(zarr_features[\"coordinates\"])\n",
"feats = da.from_zarr(zarr_features[\"features\"])\n",
"pos = pos / 8 # as we extracted at 0.5mpp, and we are overlaying on a thumbnail at 4mpp\n",
"\n",
"# reduce the features into 3 dimensional (rgb) space\n",
Expand Down Expand Up @@ -475,7 +479,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "tiatoolbox-demo-py39",
"display_name": "tiatoolbox-dev",
"language": "python",
"name": "python3"
},
Expand All @@ -489,7 +493,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
"version": "3.10.18"
}
},
"nbformat": 4,
Expand Down