Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions monailabel/datastore/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import tempfile
import time
import urllib.parse
from typing import Any, Dict

import numpy as np
import requests
Expand Down Expand Up @@ -318,6 +319,22 @@ def download_from_cvat(self, max_retry_count=5, retry_wait_time=10):
retry_count += 1
return None

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""Not Implemented"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Returns whether the application's studies is directed at multichannel (4D) data
"""
raise NotImplementedError("This datastore does not support multichannel imaging")

def get_is_multi_file(self) -> bool:
"""
Returns whether the application's studies is directed at directories containing multiple images per sample
"""
raise NotImplementedError("This datastore does not support support multi-volume imaging")


"""
def main():
Expand Down
16 changes: 16 additions & 0 deletions monailabel/datastore/dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,19 @@ def _download_labeled_data(self):
def datalist(self, full_path=True) -> List[Dict[str, Any]]:
self._download_labeled_data()
return super().datalist(full_path)

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""Not Implemented"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Returns whether the application's studies is directed at multichannel (4D) data
"""
raise NotImplementedError("This datastore does not support multichannel imaging")

def get_is_multi_file(self) -> bool:
"""
Returns whether the application's studies is directed at directories containing multiple images per sample
"""
raise NotImplementedError("This datastore does not support support multi-volume imaging")
16 changes: 16 additions & 0 deletions monailabel/datastore/dsa.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,22 @@ def status(self) -> Dict[str, Any]:
def json(self):
return self.datalist()

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""Not Implemented"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Returns whether the application's studies is directed at multichannel (4D) data
"""
raise NotImplementedError("This datastore does not support multichannel imaging")

def get_is_multi_file(self) -> bool:
"""
Returns whether the application's studies is directed at directories containing multiple images per sample
"""
raise NotImplementedError("This datastore does not support support multi-volume imaging")


"""
def main():
Expand Down
73 changes: 57 additions & 16 deletions monailabel/datastore/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,11 @@ def __init__(
images_dir: str = ".",
labels_dir: str = "labels",
datastore_config: str = "datastore_v2.json",
extensions=("*.nii.gz", "*.nii"),
extensions=("*.nii.gz", "*.nii", "*.nrrd"),
auto_reload=False,
read_only=False,
multichannel: bool = False,
multi_file: bool = False,
):
"""
Creates a `LocalDataset` object
Expand All @@ -124,6 +126,8 @@ def __init__(
self._ignore_event_config = False
self._config_ts = 0
self._auto_reload = auto_reload
self._multichannel: bool = multichannel
self._multi_file: bool = multi_file

logging.getLogger("filelock").setLevel(logging.ERROR)

Expand Down Expand Up @@ -256,6 +260,12 @@ def datalist(self, full_path=True) -> List[Dict[str, Any]]:
ds = json.loads(json.dumps(ds).replace(f"{self._datastore_path.rstrip(os.pathsep)}{os.pathsep}", ""))
return ds

def get_is_multichannel(self) -> bool:
return self._multichannel

def get_is_multi_file(self) -> bool:
return self._multi_file

def get_image(self, image_id: str, params=None) -> Any:
"""
Retrieve image object based on image id
Expand Down Expand Up @@ -431,6 +441,29 @@ def refresh(self):
"""
self._reconcile_datastore()

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
id = os.path.basename(filename)
if not directory_id:
directory_id = id

logger.info(f"Adding Image: {directory_id} => {filename}")
name = directory_id
dest = os.path.realpath(os.path.join(self._datastore.image_path(), name))

with FileLock(self._lock_file):
logger.debug("Acquired the lock!")
shutil.copy(filename, dest)

info = info if info else {}
info["ts"] = int(time.time())
info["name"] = name

# images = get_directory_contents(filename)
self._datastore.objects[directory_id] = ImageLabelModel(image=DataModel(info=info, ext=""))
self._update_datastore_file(lock=False)
logger.debug("Released the lock!")
return directory_id

def add_image(self, image_id: str, image_filename: str, image_info: Dict[str, Any]) -> str:
id, image_ext = self._to_id(os.path.basename(image_filename))
if not image_id:
Expand Down Expand Up @@ -552,10 +585,15 @@ def _list_files(self, path, patterns):
files = os.listdir(path)

filtered = dict()
for pattern in patterns:
matching = fnmatch.filter(files, pattern)
for file in matching:
filtered[os.path.basename(file)] = file
if not self._multi_file:
for pattern in patterns:
matching = fnmatch.filter(files, pattern)
for file in matching:
filtered[os.path.basename(file)] = file
else:
for file in files:
if file.lower() not in ["labels", ".lock", "datastore_v2.json"]:
filtered[os.path.basename(file)] = file
return filtered

def _reconcile_datastore(self):
Expand Down Expand Up @@ -585,23 +623,26 @@ def _add_non_existing_images(self) -> int:
invalidate = 0
self._init_from_datastore_file()

local_images = self._list_files(self._datastore.image_path(), self._extensions)
local_files = self._list_files(self._datastore.image_path(), self._extensions)

image_ids = list(self._datastore.objects.keys())
for image_file in local_images:
image_id, image_ext = self._to_id(image_file)
if image_id not in image_ids:
logger.info(f"Adding New Image: {image_id} => {image_file}")
ids = list(self._datastore.objects.keys())
for file in local_files:
if self._multi_file:
# Directories have no extension — use the name as-is
file_id = file
file_ext_str = ""
else:
file_id, file_ext_str = self._to_id(file)

name = self._filename(image_id, image_ext)
image_info = {
if file_id not in ids:
logger.info(f"Adding New Image: {file_id} => {file}")
name = self._filename(file_id, file_ext_str)
file_info = {
"ts": int(time.time()),
# "checksum": file_checksum(os.path.join(self._datastore.image_path(), name)),
"name": name,
}

invalidate += 1
self._datastore.objects[image_id] = ImageLabelModel(image=DataModel(info=image_info, ext=image_ext))
self._datastore.objects[file_id] = ImageLabelModel(name=DataModel(info=file_info, ext=file_ext_str))

return invalidate

Expand Down
16 changes: 16 additions & 0 deletions monailabel/datastore/xnat.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,22 @@ def __upload_assessment(self, aiaa_model_name, image_id, file_path, type):

self._request_put(url, data, type=type)

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""Not Implemented"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Returns whether the application's studies is directed at multichannel (4D) data
"""
raise NotImplementedError("This datastore does not support multichannel imaging")

def get_is_multi_file(self) -> bool:
"""
Returns whether the application's studies is directed at directories containing multiple images per sample
"""
raise NotImplementedError("This datastore does not support support multi-volume imaging")


"""
def main():
Expand Down
10 changes: 7 additions & 3 deletions monailabel/endpoints/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def add_image(
logger.info(f"Image: {image}; File: {file}; params: {params}")
file_ext = "".join(pathlib.Path(file.filename).suffixes) if file.filename else ".nii.gz"

image_id = image if image else os.path.basename(file.filename).replace(file_ext, "")
id = image if image else os.path.basename(file.filename).replace(file_ext, "")
image_file = tempfile.NamedTemporaryFile(suffix=file_ext).name

with open(image_file, "wb") as buffer:
Expand All @@ -79,8 +79,12 @@ def add_image(
save_params: Dict[str, Any] = json.loads(params) if params else {}
if user:
save_params["user"] = user
image_id = instance.datastore().add_image(image_id, image_file, save_params)
return {"image": image_id}
if not instance.datastore().get_is_multi_file():
image_id = instance.datastore().add_image(id, image_file, save_params)
return {"image": image_id}
else:
directory_id = instance.datastore().add_directory(id, image_file, save_params)
return {"image": directory_id}


def remove_image(id: str, user: Optional[str] = None):
Expand Down
17 changes: 15 additions & 2 deletions monailabel/interfaces/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ def __init__(
self.app_dir = app_dir
self.studies = studies
self.conf = conf if conf else {}

self.multichannel: bool = strtobool(conf.get("multichannel", False))
self.multi_file: bool = strtobool(conf.get("multi_file", False))
self.input_channels = conf.get("input_channels", False)
self.name = name
self.description = description
self.version = version
Expand Down Expand Up @@ -146,6 +148,8 @@ def init_datastore(self) -> Datastore:
extensions=settings.MONAI_LABEL_DATASTORE_FILE_EXT,
auto_reload=settings.MONAI_LABEL_DATASTORE_AUTO_RELOAD,
read_only=settings.MONAI_LABEL_DATASTORE_READ_ONLY,
multichannel=self.multichannel,
multi_file=self.multi_file,
)

def init_remote_datastore(self) -> Datastore:
Expand Down Expand Up @@ -281,6 +285,10 @@ def infer(self, request, datastore=None):
f"Inference Task is not Initialized. There is no model '{model}' available",
)

request["multi_file"] = self.multi_file
request["multichannel"] = self.multichannel
request["input_channels"] = self.input_channels

request = copy.deepcopy(request)
request["description"] = task.description

Expand All @@ -292,7 +300,7 @@ def infer(self, request, datastore=None):
else:
request["image"] = datastore.get_image_uri(request["image"])

if os.path.isdir(request["image"]):
if os.path.isdir(request["image"]) and not self.multi_file:
logger.info("Input is a Directory; Consider it as DICOM")

logger.debug(f"Image => {request['image']}")
Expand Down Expand Up @@ -430,6 +438,11 @@ def train(self, request):
f"Train Task is not Initialized. There is no model '{model}' available; {request}",
)

# 4D image support, send train task information regarding data
request["multi_file"] = self.multi_file
request["multichannel"] = self.multichannel
request["input_channels"] = self.input_channels

request = copy.deepcopy(request)
result = task(request, self.datastore())

Expand Down
26 changes: 26 additions & 0 deletions monailabel/interfaces/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,18 @@ def refresh(self) -> None:
"""
pass

@abstractmethod
def add_directory(self, id: str, filename: str, info: Dict[str, Any]) -> str:
"""
Save a directory for the given directory id and return the newly saved directory's id

:param id: the directory id for the image; If None then base filename will be used
:param filename: the path to the directory
:param info: additional info for the directory
:return: the directory id for the saved image filename
"""
pass

@abstractmethod
def add_image(self, image_id: str, image_filename: str, image_info: Dict[str, Any]) -> str:
"""
Expand Down Expand Up @@ -279,3 +291,17 @@ def json(self):
Return json representation of datastore
"""
pass

@abstractmethod
def get_is_multichannel(self) -> bool:
"""
Returns whether the application's studies is directed at multichannel (4D) data
"""
pass

@abstractmethod
def get_is_multi_file(self) -> bool:
"""
Returns whether the application's studies is directed at directories containing multiple images per sample
"""
pass
8 changes: 8 additions & 0 deletions monailabel/tasks/activelearning/first.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,13 @@ def __call__(self, request, datastore: Datastore):
images.sort()
image = images[0]

# If the datastore contains 4d images send the multichannel flag to ensure images are loaded as sequences
if datastore.get_is_multichannel():
return {"id": image, "multichannel": True}

# If the datastore is multi_file, each sample has a directory with multiple images
if datastore.get_is_multi_file():
return {"id": image, "multi_file": True}

logger.info(f"First: Selected Image: {image}")
return {"id": image}
13 changes: 13 additions & 0 deletions monailabel/tasks/activelearning/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,17 @@ def __call__(self, request, datastore: Datastore):
image = random.choices(images, weights=weights)[0]
logger.debug(f"Random: Images: {images}; Weight: {weights}")
logger.info(f"Random: Selected Image: {image}; Weight: {weights[0]}")

# If the datastore contains 4d images send the multichannel flag to ensure images are loaded as sequences
if datastore.get_is_multichannel():
return {"id": image, "weight": weights[0], "multichannel": True}

# If the datastore is multi_file, each sample has a directory with multiple images
if datastore.get_is_multi_file():
return {
"id": image,
"weight": weights[0],
"multi_file": True,
} # this will send the directory and we will walk it later on

return {"id": image, "weight": weights[0]}
5 changes: 5 additions & 0 deletions monailabel/tasks/train/basic_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def __init__(self):
self.multi_gpu = False # multi gpu enabled
self.local_rank = 0 # local rank in case of multi gpu
self.world_size = 0 # world size in case of multi gpu
self.input_channels = 1
self.multi_file = False

self.request = None
self.trainer = None
Expand Down Expand Up @@ -490,6 +492,9 @@ def train(self, rank, world_size, request, datalist):

context.run_id = request["run_id"]
context.multi_gpu = request["multi_gpu"]
context.multi_file = request.get("multi_file", False)
context.input_channels = request.get("input_channels", 1)

if context.multi_gpu:
os.environ["LOCAL_RANK"] = str(context.local_rank)

Expand Down
Loading