Skip to content

Commit 89a3c22

Browse files
committed
Remove need to specify scanner strenght
1 parent 971e29e commit 89a3c22

8 files changed

Lines changed: 45 additions & 31 deletions

File tree

docs/FAQs.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ Solution:
5050
5151
## **Issues & questions with pipeline use**
5252
53+
### **I have an issue with FLAIR feature that does not exist**
54+
55+
If you are running a subject with only a T1 scan and no FLAIR scan but you receive an issue like :
56+
```bash
57+
KeyError: "Unable to open object (object '.on_lh.gm_FLAIR_0.25.sm3.mgh' doesn't exist)"
58+
exit status 1
59+
```
60+
You are likely having this issue because you might have previously ran this same subject ID with a FLAIR scan and the FreeSurfer segmentation has been done using the FLAIR scan. Therefore, even if you remove the FLAIR scan from the input data and run again the command, the intermediate FreeSurfer outputs for that subject still contain FLAIR information, which will make the pipeline looks for for FLAIR feature but fail to find it.
61+
62+
To avoid this in the future, if you want to run a same subject with and without FLAIR, you should create two separate input folders with two different subject's ID such as `sub-0001noflair` and `sub-0001flair`.
63+
5364
### **I have an issue during the harmonisation**
5465
5566
If your issue looks like :
817 Bytes
Loading

docs/prepare_data.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ You can copy the *demographics_file.csv* that you can find in your <meld_data_fo
6363
- Group: 'patient' if the subject is a patient or 'control' if the subject is a control
6464
- Age at preoperative: The age of the subject at the time of the preoperative T1 scan (in years)
6565
- Sex: 'male' if the subject is a male or 'female' if the subject is a female
66-
- Scanner: the scanner strenght associated with the MRI data ('3T' for 3 Tesla or '15T' for 1.5 Tesla)
6766

6867
### Warning
6968
- please ensure the column names are unchanged and completed with the appropriate values, otherwise the pipeline will fail.

meld_graph/data_preprocessing.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,6 @@ def make_boundary_zones(self, smoothing=0, boundary_feature_name=".on_lh.boundar
563563

564564
def load_covars(self, subject_ids=None, demographic_file=DEMOGRAPHIC_FEATURES_FILE):
565565
# if not os.path.isfile(demographic_file):
566-
# demographic_file = os.path.join(self.data_dir,demographic_file)
567566
if subject_ids is None:
568567
subject_ids = self.subject_ids
569568
covars = pd.DataFrame()
@@ -584,7 +583,7 @@ def load_covars(self, subject_ids=None, demographic_file=DEMOGRAPHIC_FEATURES_FI
584583
else:
585584
print(f'ERROR: There is an issue with the coded sex of subject {subject}')
586585
group.append(subj.is_patient)
587-
sites_scanners.append(subj.site_code + "_" + subj.scanner)
586+
sites_scanners.append(subj.site_code) # just site code now
588587

589588
covars["ages"] = ages
590589
covars["sex"] = sex
@@ -652,7 +651,7 @@ def shrink_combat_estimates(self, estimates):
652651
def unshrink_combat_estimates(self, estimates):
653652
""" unshrink combat estimates to use as input in neuroCombatFromTraining"""
654653
num_subjects = estimates['num_subjects'][0]
655-
mod_mean = np.zeros((len(estimates['stand.mean']),num_subjects ))
654+
mod_mean = np.zeros((len(estimates['stand.mean']),num_subjects))
656655
estimates['mod.mean'] = mod_mean
657656
estimates['stand.mean'] = np.tile(estimates['stand.mean'], (num_subjects,1)).T
658657
return estimates
@@ -765,7 +764,7 @@ def get_combat_new_site_parameters(
765764
return
766765
# load in covariates - age, sex, group, site and scanner unless provided
767766
new_site_covars = self.load_covars(subject_ids=np.array(listids)[np.array(combat_subject_include)], demographic_file=demographic_file).copy()
768-
#check site_scanner codes are the same for all subjects
767+
# check site_scanner codes are the same for all subjects
769768
if len(new_site_covars['site_scanner'].unique())==1:
770769
site_scanner = new_site_covars['site_scanner'].unique()[0]
771770
else:
@@ -854,12 +853,13 @@ def combat_new_subject(self, feature_name, combat_params_file):
854853
rh = subj.load_feature_values(feature_name, hemi="rh")[self.cohort.cortex_mask]
855854
combined_hemis = np.hstack([lh, rh])
856855
precombat_features.append(combined_hemis)
857-
site_scanner.append(subj.site_code + "_" + subj.scanner)
856+
site_scanner.append(subj.site_code) # just site code now
858857
subjects_included.append(subject)
859858
#if matrix empty, pass
860859
if precombat_features:
861860
combat_estimates = self.read_norm_combat_parameters(feature_name, combat_params_file)
862861
combat_estimates = self.unshrink_combat_estimates(combat_estimates)
862+
combat_estimates["batches"] = [x.split('_')[0] for x in combat_estimates["batches"]] # remove scanner strenght from the batch code if exist
863863
precombat_features = np.array(precombat_features)
864864
site_scanner = np.array(site_scanner)
865865
dict_combat = neuroCombatFromTraining(dat=precombat_features.T, batch=site_scanner, estimates=combat_estimates)
@@ -1224,7 +1224,7 @@ def intra_inter_subject(self, feature, cohort_for_norm=None, params_norm=None):
12241224
else:
12251225
included_subjects[k] = False
12261226
controls_subjects[k] = False
1227-
print(f"INFO: exlude subjects {np.array(self.subject_ids)[~included_subjects]}")
1227+
print(f"INFO: exclude subjects {np.array(self.subject_ids)[~included_subjects]}")
12281228
if vals_array:
12291229
vals_array = np.array(vals_array)
12301230
# remove exclude subjects
@@ -1287,7 +1287,7 @@ def asymmetry_subject(self, feature, cohort_for_norm=None, params_norm=None):
12871287
else:
12881288
included_subjects[k] = False
12891289
controls_subjects[k] = False
1290-
print(f"INFO: exlude subjects {np.array(self.subject_ids)[~included_subjects]}")
1290+
print(f"INFO: exclude subjects {np.array(self.subject_ids)[~included_subjects]}")
12911291
if vals_asym_array :
12921292
vals_asym_array = np.array(vals_asym_array)
12931293
# remove exclude subjects

meld_graph/download_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def download_test_data():
2020
"""
2121
Download test data from figshare
2222
"""
23-
url = "https://figshare.com/ndownloader/files/50432751?private_link=3b790cfb027f4036f19a"
23+
url = "https://figshare.com/ndownloader/files/53523443?private_link=413bc45083e67c7e7a11"
2424
test_data_dir = MELD_DATA_PATH
2525
os.makedirs(test_data_dir, exist_ok=True)
2626
print('downloading test data to '+ test_data_dir)

meld_graph/meld_cohort.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def get_subject_ids(self, **kwargs):
240240
if isinstance(site_codes, str):
241241
site_codes = [site_codes]
242242
# get scanners
243-
scanners = kwargs.get("scanners", ["3T", "15T"])
243+
scanners = kwargs.get("scanners", ["3T", "15T", "XT"])
244244
if not isinstance(scanners, list):
245245
scanners = [scanners]
246246

@@ -387,16 +387,16 @@ def __init__(self, subject_id, cohort):
387387

388388
@property
389389
def scanner(self):
390+
# Note: no need to specify scanner strength with MELD Graph pipeline, but still need it to be compatible with previous MELD FCD dataset
390391
scanner = self.get_demographic_features('Scanner')
392+
if scanner is None:
393+
scanner="XT" #no need to specify
391394
if scanner in ("15T" , "1.5T" , "15t" , "1.5t" ):
392-
scanner="15T"
395+
scanner="15T" # to be compatible with old way
393396
elif scanner in ("3T" , "3t" ):
394-
scanner="3T"
397+
scanner="3T" # to be compatible with old way
395398
else:
396-
print(
397-
f"Error: incorrect scanner for {self.subject_id}. Unable to determine if scanner 15T or 3T "
398-
)
399-
sys.exit()
399+
scanner="XT" #no need to specify
400400
return scanner
401401

402402
@property
@@ -418,6 +418,12 @@ def surf_dir_path(self, hemi):
418418
"""return path to features dir (surf_dir)"""
419419
return os.path.join(self.site_code, self.scanner, self.group, self.subject_id, hemi)
420420

421+
def find_path(self, name):
422+
""" Find the first object with the subject id in the hdf5"""
423+
if self.subject_id in name:
424+
return name
425+
426+
421427
@property
422428
def is_patient(self):
423429
return self.group == "patient"
@@ -437,10 +443,10 @@ def get_lesion_hemisphere(self):
437443
return None
438444

439445
with self.cohort._site_hdf5(self.site_code, self.group) as f:
440-
surf_dir_lh = f.require_group(self.surf_dir_path("lh"))
446+
surf_dir_lh = f[os.path.join(self.site_code, f[self.site_code].visit(self.find_path), "lh")]
441447
if ".on_lh.lesion.mgh" in surf_dir_lh.keys():
442448
return "lh"
443-
surf_dir_rh = f.require_group(self.surf_dir_path("rh"))
449+
surf_dir_rh = f[os.path.join(self.site_code, f[self.site_code].visit(self.find_path), "rh")]
444450
if ".on_lh.lesion.mgh" in surf_dir_rh.keys():
445451
return "rh"
446452
return None
@@ -452,7 +458,8 @@ def has_features(self, features):
452458
def get_feature_list(self, hemi="lh"):
453459
"""Outputs a list of the features a participant has for each hemisphere"""
454460
with self.cohort._site_hdf5(self.site_code, self.group) as f:
455-
keys = list(f[self.surf_dir_path(hemi)].keys())
461+
surf_dir_path = os.path.join(self.site_code, f[self.site_code].visit(self.find_path), hemi)
462+
keys = list(f[surf_dir_path].keys())
456463
# remove lesion and boundaries from list of features
457464
if ".on_lh.lesion.mgh" in keys:
458465
keys.remove(".on_lh.lesion.mgh")
@@ -514,6 +521,8 @@ def get_demographic_features(
514521

515522
if "urfer" in desired_name:
516523
matched_name = "Freesurfer_nul"
524+
elif "Scanner" in desired_name:
525+
return None
517526
else:
518527
self.log.warning(f"Unable to find column matching {desired_name}, please double check for typos")
519528
return None
@@ -551,7 +560,7 @@ def load_feature_values(self, feature, hemi="lh"):
551560
feature_values = np.zeros(NVERT, dtype=np.float32)
552561
# read data from hdf5
553562
with self.cohort._site_hdf5(self.site_code, self.group) as f:
554-
surf_dir = f[self.surf_dir_path(hemi)]
563+
surf_dir = f[os.path.join(self.site_code, f[self.site_code].visit(self.find_path), hemi)]
555564
if feature in surf_dir.keys():
556565
feature_values[:] = surf_dir[feature][:]
557566
else:

meld_graph/tools_pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def create_demographic_file(subjects_ids, save_file, harmo_code='noHarmo'):
107107
df['ID']=subjects_ids.astype(str)
108108
df['Harmo code']=[str(harmo_code) for subject in subjects_ids]
109109
df['Group']=['patient' for subject in subjects_ids]
110-
df['Scanner']=['3T' for subject in subjects_ids]
110+
df['Scanner']=['XT' for subject in subjects_ids]
111111
df.to_csv(save_file)
112112

113113
def create_dataset_file(subjects_ids, save_file):

scripts/data_preparation/extract_features/io_meld.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ def load_subject_features(fs_id,features,subject_number,medial_wall,subjects_dir
7878

7979
def get_group_site(fs_id, csv_path):
8080
"""
81-
Read demographic features from csv file and extract group, sex and scanner
81+
Read demographic features from csv file and extract harmo code and group
8282
"""
83-
features_name=["Harmo code", "Group", "Scanner"]
83+
features_name=["Harmo code", "Group"]
8484
df = pd.read_csv(csv_path, header=0, encoding="latin")
8585
# get index column
8686
id_col = None
@@ -124,14 +124,9 @@ def save_subject(fs_id,features,medial_wall,subject_dir, demographic_file, outp
124124
failed=False
125125
n_vert=163842
126126
#get subject info from id
127-
site_code, c_p, scanner = get_group_site(fs_id, demographic_file)
128-
if scanner in ("15T" , "1.5T" , "15t" , "1.5t" ):
129-
scanner="15T"
130-
elif scanner in ("3T" , "3t" ):
131-
scanner="3T"
132-
else:
133-
print('scanner for subject '+ fs_id + ' cannot be identified as either 1.5T or 3T...')
134-
scanner='false'
127+
site_code, c_p = get_group_site(fs_id, demographic_file)
128+
print('scanner for subject '+ fs_id + 'is set as default XT')
129+
scanner='XT'
135130
#skip subject if info not available
136131
if 'false' in (c_p, scanner, site_code):
137132
print("Skipping subject " + fs_id)

0 commit comments

Comments
 (0)