-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprepare_mimic.py
More file actions
90 lines (73 loc) · 3.19 KB
/
prepare_mimic.py
File metadata and controls
90 lines (73 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
from getpass import getpass
import pandas as pd
import requests
from getpass import getpass
import os
from bs4 import BeautifulSoup
import numpy as np
from PIL import Image
from utils.PhysioNetClient import PhysioNetClient
from urllib.parse import urljoin
MIMIC_CXR = "https://physionet.org/files/mimic-cxr/2.1.0"
MIMIC_JPG = "https://physionet.org/files/mimic-cxr-jpg/2.1.0"
CHEXMASK = "https://physionet.org/files/chexmask-cxr-segmentation-data/1.0.0"
output_path = '/local/scratch/clmn1/data/xray/MIMIC_new/'
output_size = (256, 256) # tuple (64, 64) or 'original'
num_images = 10
username = input("PhysioNet Username: ")
password = getpass("PhysioNet Password: ")
def find_path_to_img(dicom_id, image_filenames):
for line in image_filenames:
if dicom_id in line:
return line.strip()
return None
def get_mask_from_RLE(rle, height, width):
mask = np.zeros(height * width, dtype=np.uint8)
if pd.isna(rle):
return mask.reshape((height, width))
rle_numbers = [int(num) for num in rle.split()]
for i in range(0, len(rle_numbers), 2):
start = rle_numbers[i] - 1
length = rle_numbers[i + 1]
mask[start:start + length] = 1
return mask.reshape((height, width))
client = PhysioNetClient(username, password)
client.login()
image_filenames_file = os.path.join(output_path, "image_filenames.txt")
if not os.path.exists(image_filenames_file):
client.download_file(MIMIC_JPG + "/IMAGE_FILENAMES", save_path=image_filenames_file)
with open(image_filenames_file, 'r') as file:
image_filenames = file.readlines()
mimic_segmentation_file = os.path.join(output_path, "MIMIC-CXR-JPG.csv")
if not os.path.exists(mimic_segmentation_file):
client.download_file(CHEXMASK + "/OriginalResolution/MIMIC-CXR-JPG.csv", save_path=mimic_segmentation_file)
df = pd.read_csv(mimic_segmentation_file)
df.sort_values(by='Dice RCA (Mean)', ascending=False, inplace=True)
df = df.reset_index(drop=True)
images_out_path = os.path.join(output_path, "images/")
masks_out_path = os.path.join(output_path, "labels/")
os.makedirs(images_out_path, exist_ok=True)
os.makedirs(masks_out_path, exist_ok=True)
for i, row in df.iterrows():
if i > num_images:
break
assert row["Dice RCA (Mean)"] >= 0.7
dicom_id = row['dicom_id']
img_path = find_path_to_img(dicom_id, image_filenames)
client.download_file(
url=urljoin(MIMIC_JPG + "/", img_path),
save_path=os.path.join(images_out_path, f"{dicom_id}.jpg")
)
if output_size != 'original':
img = Image.open(os.path.join(images_out_path, f"{dicom_id}.jpg"))
img = img.resize(output_size, resample=Image.BILINEAR)
img.save(os.path.join(images_out_path, f"{dicom_id}.png"))
os.remove(os.path.join(images_out_path, f"{dicom_id}.jpg"))
masks = [get_mask_from_RLE(row[anatomy], int(row['Height']), int(row['Width'])) for anatomy in ['Left Lung', 'Right Lung', 'Heart']]
mask = np.stack(masks, axis=-1)
mask_img = Image.fromarray((mask * 255).astype(np.uint8))
if output_size != 'original':
mask_img = mask_img.resize(output_size, resample=Image.NEAREST)
# save as PNG
mask_img.save(os.path.join(masks_out_path, f"{dicom_id}.png"))