-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmeso_data_pipe.py
More file actions
114 lines (102 loc) · 4.18 KB
/
meso_data_pipe.py
File metadata and controls
114 lines (102 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import pandas as pd
from tiatoolbox.annotation.storage import SQLiteStore, Annotation
from pathlib import Path
import json
from shapely.geometry import Polygon
import math
from shapely.affinity import translate
"""processes raw detections from qupath and save them in an
AnnotationStore for use with MesoGraph"""
def to_core_space(core_db: SQLiteStore, top_left):
# make annotation coords relative to core image top left
core_db.translate_db(-top_left[0], -top_left[1])
core_db.commit()
def mk_dbs_from_geojson(dataset="meso"):
if dataset == "meso":
labels_path = Path(r"D:\Meso\core_labels_AIME.csv")
dets_path = Path(r"D:\QuPath_Projects\Meso_TMA\detections")
cents = pd.read_csv(r"D:\QuPath_Projects\Meso_TMA\core_cents.csv")
um_per_pix = 0.4415
core_width = 2854
else:
labels_path = Path(r"D:\Mesobank_TMA\Mesobank_labels.csv")
dets_path = Path(r"D:\Mesobank_TMA\mesobank_proj\detections")
cents = pd.read_csv(r"D:\Mesobank_TMA\mesobank_cents.csv")
cents.set_index("Name", inplace=True)
um_per_pix = 0.5034
core_width = 1566
# dets_list = list(dets_path.glob('*.geojson'))
dets_list = list(dets_path.glob("*\*.geojson"))
# 1462 for mesobank, 2854 for meso
SQ = SQLiteStore()
# for dets in dets_list:
# SQ.add_from(dets)
# SQ.commit()
# SQ.dump(str(dets_path/'detections.db'))
# core_cents=pd.read_csv(r'D:\QuPath_Projects\Meso_TMA\core_cents.csv')
labels_df = pd.read_csv(labels_path)
labels_df.set_index("Core", inplace=True)
for core in dets_list:
if core.stem not in labels_df.index or core.stem not in cents.index:
continue
label = {
"epithelioid": "E",
"biphasic": "B",
"sarcomatoid": "S",
"desmoplastic": "D",
}[labels_df.loc[core.stem]["labels"].lower()]
# label = labels_df.loc[core.stem]['labels'] #for meso
# anns = SQ.query(where = f'props["Parent"] == "{core}"')
SQ = SQLiteStore()
top_left = (
cents.loc[core.stem.split("_")[0]][
["Centroid X µm", "Centroid Y µm"]
].values
/ um_per_pix
- core_width / 2
)
with open(core, "r") as f:
anns = json.load(f)
for ann in anns["features"]:
props = {
pair["name"]: pair["value"] if not math.isnan(pair["value"]) else 0
for pair in ann["properties"]["measurements"]
}
poly = Polygon(ann["nucleusGeometry"]["coordinates"][0])
# keep these in slide space to extract resnet feats from etc
props["Centroid X µm"] = poly.centroid.x
props["Centroid Y µm"] = poly.centroid.y
# make poly contour pts relative to core image top left for vis
poly = translate(poly, -top_left[0], -top_left[1])
SQ.append(Annotation(poly, props))
SQ.commit()
SQ.dump(str(dets_path / "stores" / f"{core.stem}_{label}.db"))
if __name__ == "__main__":
dataset = "mesobank"
mk_dbs_from_geojson(dataset)
# if dataset == "mesobank":
# dets_path = Path(r"D:\Mesobank_TMA\mesobank_proj\detections\stores")
# cents = pd.read_csv(r"D:\Mesobank_TMA\core_cents.csv")
# um_per_pix = 0.5034
# core_width = 1462
# else:
# dets_path = Path(r"D:\QuPath_Projects\Meso_TMA\detections\stores")
# cents = pd.read_csv(r"D:\QuPath_Projects\Meso_TMA\core_cents.csv")
# um_per_pix = 0.4415
# core_width = 2854
# # dets_list = list(dets_path.glob('*.geojson'))
# dets_list = list(dets_path.glob("*.db"))
# cents.set_index("Name", inplace=True)
# for core in dets_list:
# if core.stem.split("_")[0] not in cents.index:
# continue
# print(f"processing core {core.stem}")
# SQ = SQLiteStore(core)
# top_left = (
# cents.loc[core.stem.split("_")[0]][
# ["Centroid X µm", "Centroid Y µm"]
# ].values
# / um_per_pix
# - core_width / 2
# )
# to_core_space(SQ, top_left)