|
1 | 1 | #!/usr/bin/env python |
2 | 2 |
|
| 3 | +# # Comparing controls and treatments using pairwise compare |
| 4 | +# |
| 5 | +# This notebook employs pairwise comparison to quantify the similarity between cellular profiles. In this section, we assess the consistency of experimental replicates and evaluate the similarity between treated wells (containing failing cardiac fibroblast cells) and control wells. |
| 6 | + |
3 | 7 | # In[1]: |
4 | 8 |
|
5 | 9 |
|
|
9 | 13 | import pandas as pd |
10 | 14 | from comparators.PearsonsCorrelation import PearsonsCorrelation |
11 | 15 | from comparison_tools.PairwiseCompareManager import PairwiseCompareManager |
| 16 | +from pycytominer import consensus |
12 | 17 | from pycytominer.cyto_utils import load_profiles |
13 | 18 |
|
14 | 19 | # loading project utils |
|
37 | 42 | # split the features: |
38 | 43 | metadata, features = split_meta_and_features(agg_profile) |
39 | 44 |
|
40 | | -# now only select DMSO profiles that are DMSO-positive and DMSO-negative |
| 45 | +# now only select DMSO profiles that are DMSO_positive and DMSO-negative |
41 | 46 | dmso_profiles = agg_profile.loc[ |
42 | 47 | (agg_profile["Metadata_treatment"] == "DMSO-positive") |
43 | 48 | | (agg_profile["Metadata_treatment"] == "DMSO-negative") |
|
125 | 130 | ) |
126 | 131 |
|
127 | 132 |
|
| 133 | +# ## Calculate pair wise across DMSO consensus profiles |
| 134 | + |
| 135 | +# In[7]: |
| 136 | + |
| 137 | + |
| 138 | +consensus_dmso_df = consensus( |
| 139 | + profiles=dmso_profiles, |
| 140 | + replicate_columns=[ |
| 141 | + "Metadata_plate_barcode", |
| 142 | + "Metadata_plate_name", |
| 143 | + "Metadata_treatment", |
| 144 | + ], |
| 145 | + operation="median", |
| 146 | + features=features, |
| 147 | +) |
| 148 | + |
| 149 | +# split to positive and negative controls |
| 150 | +consensus_dmso_pos_df = consensus_dmso_df.loc[ |
| 151 | + consensus_dmso_df["Metadata_treatment"] == "DMSO-positive" |
| 152 | +] |
| 153 | +consensus_dmso_neg_df = consensus_dmso_df.loc[ |
| 154 | + consensus_dmso_df["Metadata_treatment"] == "DMSO-negative" |
| 155 | +] |
| 156 | + |
| 157 | + |
| 158 | +# In[8]: |
| 159 | + |
| 160 | + |
| 161 | +# comparing the consensus profiles of the positive controls |
| 162 | +consensus_dmso_pos_cntrl_comparer = PairwiseCompareManager( |
| 163 | + _df=consensus_dmso_pos_df, |
| 164 | + _feat_cols=features, |
| 165 | + _different_columns=["Metadata_plate_name"], |
| 166 | + _same_columns=["Metadata_treatment"], |
| 167 | + _comparator=PearsonsCorrelation(), |
| 168 | +) |
| 169 | + |
| 170 | +# comparing the consensus profiles of the negative controls |
| 171 | +consensus_dmso_neg_cntrl_comparer = PairwiseCompareManager( |
| 172 | + _df=consensus_dmso_neg_df, |
| 173 | + _feat_cols=features, |
| 174 | + _different_columns=["Metadata_plate_name"], |
| 175 | + _same_columns=["Metadata_treatment"], |
| 176 | + _comparator=PearsonsCorrelation(), |
| 177 | +) |
| 178 | + |
| 179 | +# collecting all pairwise scores |
| 180 | +consensus_pos_cntrl_pairwise_scores = consensus_dmso_pos_cntrl_comparer() |
| 181 | +consensus_neg_cntrl_pairwise_scores = consensus_dmso_neg_cntrl_comparer() |
| 182 | + |
| 183 | + |
| 184 | +# In[9]: |
| 185 | + |
| 186 | + |
| 187 | +# selecting only relevant columns |
| 188 | +consensus_pos_cntrl_scores = consensus_pos_cntrl_pairwise_scores[ |
| 189 | + [ |
| 190 | + "pearsons_correlation", |
| 191 | + "Metadata_treatment__antehoc_group0", |
| 192 | + "Metadata_plate_name__posthoc_group0", |
| 193 | + "Metadata_plate_name__posthoc_group1", |
| 194 | + ] |
| 195 | +] |
| 196 | +consensus_neg_cntrl_scores = consensus_neg_cntrl_pairwise_scores[ |
| 197 | + [ |
| 198 | + "pearsons_correlation", |
| 199 | + "Metadata_treatment__antehoc_group0", |
| 200 | + "Metadata_plate_name__posthoc_group0", |
| 201 | + "Metadata_plate_name__posthoc_group1", |
| 202 | + ] |
| 203 | +] |
| 204 | + |
| 205 | +# generated plate well names |
| 206 | +final_consensus_pairwise_scores = pd.concat( |
| 207 | + [ |
| 208 | + consensus_pos_cntrl_scores, |
| 209 | + consensus_neg_cntrl_scores, |
| 210 | + ] |
| 211 | +).rename(columns={"Metadata_treatment__antehoc_group0": "Metadata_treatment"}).reset_index(drop=True) |
| 212 | + |
| 213 | +# saving the final consensus pairwise scores |
| 214 | +final_consensus_pairwise_scores.to_csv(output_path / "final_dmso_consensus_pairwise_scores.csv", index=False) |
| 215 | + |
| 216 | + |
| 217 | +# ## Calculating pairwise compare within replicates |
| 218 | +# |
| 219 | +# In this section, we compute pairwise Pearson correlations between replicates of the same treatment. This helps identify poorly performing technical replicates—those with low correlation values—while high correlations indicate consistent and reliable measurements across replicates. |
| 220 | + |
| 221 | +# In[10]: |
| 222 | + |
| 223 | + |
| 224 | +# selecting only the treated wells without the DMSO profiles |
| 225 | +treated_wells_only_df = agg_profile.loc[ |
| 226 | + (agg_profile["Metadata_treatment"] != "DMSO-positive") & (agg_profile["Metadata_treatment"] != "DMSO-negative") |
| 227 | +].copy() |
| 228 | + |
| 229 | +# reducing the metadata to only the relevant ones |
| 230 | +treated_wells_only_df = treated_wells_only_df[["Metadata_plate_name", "Metadata_treatment"] + features] |
| 231 | + |
| 232 | + |
| 233 | +# In[ ]: |
| 234 | + |
| 235 | + |
| 236 | +# calculating the pairwise scores between replicates |
| 237 | +replicate_pairwise_comparer = PairwiseCompareManager( |
| 238 | + _df=treated_wells_only_df, |
| 239 | + _feat_cols=features, |
| 240 | + _different_columns=["Metadata_plate_name"], |
| 241 | + _same_columns=["Metadata_treatment"], |
| 242 | + _comparator=PearsonsCorrelation(), |
| 243 | +) |
| 244 | + |
| 245 | +# collecting all pairwise scores |
| 246 | +replicate_pairwise_scores = replicate_pairwise_comparer() |
| 247 | + |
| 248 | + |
| 249 | +# In[12]: |
| 250 | + |
| 251 | + |
| 252 | +# selecting only relevant columns |
| 253 | +replicate_pairwise_scores = replicate_pairwise_scores[["pearsons_correlation", "Metadata_treatment__antehoc_group0", "Metadata_plate_name__posthoc_group0", "Metadata_plate_name__posthoc_group1"]] |
| 254 | + |
| 255 | +# renaming the columns |
| 256 | +replicate_pairwise_scores.columns = ["pearsons_correlation", "Metadata_treatment", "plate_name_0", "plate_name_1"] |
| 257 | + |
| 258 | +# saving the final pairwise scores |
| 259 | +replicate_pairwise_scores.to_csv(output_path / "final_replicate_pairwise_scores.csv", index=False) |
| 260 | + |
| 261 | + |
| 262 | + |
128 | 263 | # ## Calculating pair wise across treatments |
129 | 264 | # |
130 | 265 | # In this section of the notebook, we conduct pairwise comparisons across all treatments and specific controls. Two data frames are created: |
131 | 266 | # |
132 | 267 | # - **healthy_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the healthy reference. |
133 | 268 | # - **failing_ref**: This dataset contains pairwise calculations comparing all treated failing cells to the failing reference. |
134 | 269 |
|
135 | | -# In[7]: |
| 270 | +# In[13]: |
136 | 271 |
|
137 | 272 |
|
138 | 273 | # calculating pairwise correlation between healthy control and treated failing wells |
|
159 | 294 | failing_ref_trt_pairwise_scores = failing_ref_trt_pairwise_comparer() |
160 | 295 |
|
161 | 296 |
|
162 | | -# In[8]: |
| 297 | +# In[14]: |
163 | 298 |
|
164 | 299 |
|
165 | 300 | # Select only the relevant columns and add a reference column for healthy controls |
166 | 301 | health_ref_pairwise_scores = healthy_ref_trt_pairwise_scores[ |
167 | | - ["pearsons_correlation", "Metadata_treatment__antehoc_group1"] |
| 302 | + ["pearsons_correlation", "Metadata_treatment__posthoc_group1"] |
168 | 303 | ].copy() |
169 | 304 | health_ref_pairwise_scores["reference"] = "Healthy" |
170 | 305 |
|
171 | 306 | # Select only the relevant columns and add a reference column for failing controls |
172 | 307 | failing_ref_pairwise_scores = failing_ref_trt_pairwise_scores[ |
173 | | - ["pearsons_correlation", "Metadata_treatment__antehoc_group1"] |
| 308 | + ["pearsons_correlation", "Metadata_treatment__posthoc_group1"] |
174 | 309 | ].copy() |
175 | 310 | failing_ref_pairwise_scores["reference"] = "Failing" |
176 | 311 |
|
177 | 312 | # Combine the healthy and failing control dataframes into a single dataframe |
178 | 313 | final_trt_pairwise_scores = ( |
179 | 314 | pd.concat([health_ref_pairwise_scores, failing_ref_pairwise_scores]) |
180 | | - .rename(columns={"Metadata_treatment__antehoc_group1": "Metadata_treatment"}) |
| 315 | + .rename(columns={"Metadata_treatment__posthoc_group1": "Metadata_treatment"}) |
181 | 316 | .reset_index(drop=True) |
182 | 317 | ) |
183 | 318 |
|
|
0 commit comments