Skip to content

Commit 70f41c5

Browse files
authored
Adding pairwise analysis for control consensus and replicate quality of treatments assessment (#33)
* updated pairwise compare and pre-commit hooks * updated pairwise compare analysis: consensus and replicate scoring * added updates
1 parent 122ffc7 commit 70f41c5

17 files changed

Lines changed: 7718 additions & 6153 deletions

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
repos:
55
# Keep pre-commit itself up to date
66
- repo: https://gitlab.com/vojko.pribudic.foss/pre-commit-update
7-
rev: v0.6.1
7+
rev: v0.7.0
88
hooks:
99
- id: pre-commit-update
1010
args: ["--verbose"]
1111

1212
# Ruff for linting Python files
1313
- repo: https://github.com/charliermarsh/ruff-pre-commit
14-
rev: v0.11.5
14+
rev: v0.11.8
1515
hooks:
1616
- id: ruff
1717
args: ["--fix"]

notebooks/exploratory-analysis/pairwise-analysis/dmso-consensus-profiles.ipynb

Lines changed: 390 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python
2+
3+
# # Generating DMSO consensus profiles
4+
5+
# In[1]:
6+
7+
8+
import pathlib
9+
import sys
10+
11+
import pandas as pd
12+
from pycytominer import consensus
13+
14+
sys.path.append("../../../")
15+
from utils import data_utils
16+
17+
# In[2]:
18+
19+
20+
# setting profile path
21+
concat_profile_path = pathlib.Path("../UMAP-aggregated-fs-profiles/results/concat_data/batch_1_concat_agg_fs.csv").resolve(strict=True)
22+
23+
# setting output path
24+
# output_path = pathlib.Path("results/").resolve(strict=True)
25+
26+
27+
# In[3]:
28+
29+
30+
# load in aggregate profiles
31+
agg_df = pd.read_csv(concat_profile_path)
32+
33+
# update aggregate profiles to only DMSO treated wells
34+
dmso_agg_df = agg_df.loc[
35+
(agg_df["Metadata_control_type"] == "positive")
36+
| (agg_df["Metadata_control_type"] == "negative")]
37+
38+
# split the metadata and morphology features
39+
dmso_agg_meta, dmso_agg_feats = data_utils.split_meta_and_features(dmso_agg_df)
40+
41+
# display
42+
print("Shape: ", dmso_agg_df.shape)
43+
dmso_agg_df.head()
44+
45+
46+
# In[4]:
47+
48+
49+
consensus_df = consensus(profiles = dmso_agg_df,
50+
replicate_columns=["Metadata_plate_barcode", "Metadata_plate_name", "Metadata_treatment"],
51+
operation="median",
52+
features=dmso_agg_feats,
53+
)

notebooks/exploratory-analysis/pairwise-analysis/nbconverted/pairwise-compare.py

Lines changed: 141 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/usr/bin/env python
22

3+
# # Comparing controls and treatments using pairwise compare
4+
#
5+
# This notebook employs pairwise comparison to quantify the similarity between cellular profiles. In this section, we assess the consistency of experimental replicates and evaluate the similarity between treated wells (containing failing cardiac fibroblast cells) and control wells.
6+
37
# In[1]:
48

59

@@ -9,6 +13,7 @@
913
import pandas as pd
1014
from comparators.PearsonsCorrelation import PearsonsCorrelation
1115
from comparison_tools.PairwiseCompareManager import PairwiseCompareManager
16+
from pycytominer import consensus
1217
from pycytominer.cyto_utils import load_profiles
1318

1419
# loading project utils
@@ -37,7 +42,7 @@
3742
# split the features:
3843
metadata, features = split_meta_and_features(agg_profile)
3944

40-
# now only select DMSO profiles that are DMSO-positive and DMSO-negative
45+
# now only select DMSO profiles that are DMSO_positive and DMSO-negative
4146
dmso_profiles = agg_profile.loc[
4247
(agg_profile["Metadata_treatment"] == "DMSO-positive")
4348
| (agg_profile["Metadata_treatment"] == "DMSO-negative")
@@ -125,14 +130,144 @@
125130
)
126131

127132

133+
# ## Calculate pair wise across DMSO consensus profiles
134+
135+
# In[7]:
136+
137+
138+
consensus_dmso_df = consensus(
139+
profiles=dmso_profiles,
140+
replicate_columns=[
141+
"Metadata_plate_barcode",
142+
"Metadata_plate_name",
143+
"Metadata_treatment",
144+
],
145+
operation="median",
146+
features=features,
147+
)
148+
149+
# split to positive and negative controls
150+
consensus_dmso_pos_df = consensus_dmso_df.loc[
151+
consensus_dmso_df["Metadata_treatment"] == "DMSO-positive"
152+
]
153+
consensus_dmso_neg_df = consensus_dmso_df.loc[
154+
consensus_dmso_df["Metadata_treatment"] == "DMSO-negative"
155+
]
156+
157+
158+
# In[8]:
159+
160+
161+
# comparing the consensus profiles of the positive controls
162+
consensus_dmso_pos_cntrl_comparer = PairwiseCompareManager(
163+
_df=consensus_dmso_pos_df,
164+
_feat_cols=features,
165+
_different_columns=["Metadata_plate_name"],
166+
_same_columns=["Metadata_treatment"],
167+
_comparator=PearsonsCorrelation(),
168+
)
169+
170+
# comparing the consensus profiles of the negative controls
171+
consensus_dmso_neg_cntrl_comparer = PairwiseCompareManager(
172+
_df=consensus_dmso_neg_df,
173+
_feat_cols=features,
174+
_different_columns=["Metadata_plate_name"],
175+
_same_columns=["Metadata_treatment"],
176+
_comparator=PearsonsCorrelation(),
177+
)
178+
179+
# collecting all pairwise scores
180+
consensus_pos_cntrl_pairwise_scores = consensus_dmso_pos_cntrl_comparer()
181+
consensus_neg_cntrl_pairwise_scores = consensus_dmso_neg_cntrl_comparer()
182+
183+
184+
# In[9]:
185+
186+
187+
# selecting only relevant columns
188+
consensus_pos_cntrl_scores = consensus_pos_cntrl_pairwise_scores[
189+
[
190+
"pearsons_correlation",
191+
"Metadata_treatment__antehoc_group0",
192+
"Metadata_plate_name__posthoc_group0",
193+
"Metadata_plate_name__posthoc_group1",
194+
]
195+
]
196+
consensus_neg_cntrl_scores = consensus_neg_cntrl_pairwise_scores[
197+
[
198+
"pearsons_correlation",
199+
"Metadata_treatment__antehoc_group0",
200+
"Metadata_plate_name__posthoc_group0",
201+
"Metadata_plate_name__posthoc_group1",
202+
]
203+
]
204+
205+
# generated plate well names
206+
final_consensus_pairwise_scores = pd.concat(
207+
[
208+
consensus_pos_cntrl_scores,
209+
consensus_neg_cntrl_scores,
210+
]
211+
).rename(columns={"Metadata_treatment__antehoc_group0": "Metadata_treatment"}).reset_index(drop=True)
212+
213+
# saving the final consensus pairwise scores
214+
final_consensus_pairwise_scores.to_csv(output_path / "final_dmso_consensus_pairwise_scores.csv", index=False)
215+
216+
217+
# ## Calculating pairwise compare within replicates
218+
#
219+
# In this section, we compute pairwise Pearson correlations between replicates of the same treatment. This helps identify poorly performing technical replicates—those with low correlation values—while high correlations indicate consistent and reliable measurements across replicates.
220+
221+
# In[10]:
222+
223+
224+
# selecting only the treated wells without the DMSO profiles
225+
treated_wells_only_df = agg_profile.loc[
226+
(agg_profile["Metadata_treatment"] != "DMSO-positive") & (agg_profile["Metadata_treatment"] != "DMSO-negative")
227+
].copy()
228+
229+
# reducing the metadata to only the relevant ones
230+
treated_wells_only_df = treated_wells_only_df[["Metadata_plate_name", "Metadata_treatment"] + features]
231+
232+
233+
# In[ ]:
234+
235+
236+
# calculating the pairwise scores between replicates
237+
replicate_pairwise_comparer = PairwiseCompareManager(
238+
_df=treated_wells_only_df,
239+
_feat_cols=features,
240+
_different_columns=["Metadata_plate_name"],
241+
_same_columns=["Metadata_treatment"],
242+
_comparator=PearsonsCorrelation(),
243+
)
244+
245+
# collecting all pairwise scores
246+
replicate_pairwise_scores = replicate_pairwise_comparer()
247+
248+
249+
# In[12]:
250+
251+
252+
# selecting only relevant columns
253+
replicate_pairwise_scores = replicate_pairwise_scores[["pearsons_correlation", "Metadata_treatment__antehoc_group0", "Metadata_plate_name__posthoc_group0", "Metadata_plate_name__posthoc_group1"]]
254+
255+
# renaming the columns
256+
replicate_pairwise_scores.columns = ["pearsons_correlation", "Metadata_treatment", "plate_name_0", "plate_name_1"]
257+
258+
# saving the final pairwise scores
259+
replicate_pairwise_scores.to_csv(output_path / "final_replicate_pairwise_scores.csv", index=False)
260+
261+
262+
128263
# ## Calculating pair wise across treatments
129264
#
130265
# In this section of the notebook, we conduct pairwise comparisons across all treatments and specific controls. Two data frames are created:
131266
#
132267
# - **healthy_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the healthy reference.
133268
# - **failing_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the failing reference.
134269

135-
# In[7]:
270+
# In[13]:
136271

137272

138273
# calculating pairwise correlation between healthy control and treated failing wells
@@ -159,25 +294,25 @@
159294
failing_ref_trt_pairwise_scores = failing_ref_trt_pairwise_comparer()
160295

161296

162-
# In[8]:
297+
# In[14]:
163298

164299

165300
# Select only the relevant columns and add a reference column for healthy controls
166301
health_ref_pairwise_scores = healthy_ref_trt_pairwise_scores[
167-
["pearsons_correlation", "Metadata_treatment__antehoc_group1"]
302+
["pearsons_correlation", "Metadata_treatment__posthoc_group1"]
168303
].copy()
169304
health_ref_pairwise_scores["reference"] = "Healthy"
170305

171306
# Select only the relevant columns and add a reference column for failing controls
172307
failing_ref_pairwise_scores = failing_ref_trt_pairwise_scores[
173-
["pearsons_correlation", "Metadata_treatment__antehoc_group1"]
308+
["pearsons_correlation", "Metadata_treatment__posthoc_group1"]
174309
].copy()
175310
failing_ref_pairwise_scores["reference"] = "Failing"
176311

177312
# Combine the healthy and failing control dataframes into a single dataframe
178313
final_trt_pairwise_scores = (
179314
pd.concat([health_ref_pairwise_scores, failing_ref_pairwise_scores])
180-
.rename(columns={"Metadata_treatment__antehoc_group1": "Metadata_treatment"})
315+
.rename(columns={"Metadata_treatment__posthoc_group1": "Metadata_treatment"})
181316
.reset_index(drop=True)
182317
)
183318

0 commit comments

Comments
 (0)