diff --git a/examples/custom-code-AD/README.md b/examples/custom-code-AD/README.md index 6d7fbc8..c634c55 100644 --- a/examples/custom-code-AD/README.md +++ b/examples/custom-code-AD/README.md @@ -1,5 +1,29 @@ # Custom Code in Automation Designer +## 5-parameter-logistic + +Example snippet fits a 5-Parameter Logistic (5PL) curve to dose-response data to extract EC50, Hill slope, and asymmetry, with AIC comparison against a standard 4PL fit and an interactive plot with residuals subplot. + +- **[Input File](./snippets/5-parameter-logistic/docs/input/dose_response_5pl_data.xlsx)** +- **[Custom Code Block](./snippets/5-parameter-logistic/dose_response_5pl.py)** +- **[Output Files](./snippets/5-parameter-logistic/docs/output)** + +## kinetic-timecourse + +Example snippet fits one-phase association kinetic curves per compound to extract parameters (Emax, kobs, t½, AUC, R²), with t-test comparison at the inflection point and a multi-compound time-course plot with 95% CI ribbons. + +- **[Input File](./snippets/kinetic-timecourse/docs/input/timecourse_data.xlsx)** +- **[Custom Code Block](./snippets/kinetic-timecourse/kinetics_timecourse.py)** +- **[Output Files](./snippets/kinetic-timecourse/docs/output)** + +## multi-group-anova + +Example snippet performs one-way ANOVA to test for significant differences across treatment groups, followed by Tukey HSD post-hoc analysis for pairwise comparisons, with descriptive statistics and an annotated bar chart output. + +- **[Input File](./snippets/multi-group-anova/docs/input/multigroup_efficacy_data.xlsx)** +- **[Custom Code Block](./snippets/multi-group-anova/multigroup_anova.py)** +- **[Output Files](./snippets/multi-group-anova/docs/output)** + ## plot-chromatogram Example snippet demonstrates plotting a multi-axis HPLC chromatogram (retention volume X Absorbance (mAU), Temperature (degC), pH) with custom code. diff --git a/examples/custom-code-AD/requirements.txt b/examples/custom-code-AD/requirements.txt index 03bd3ee..28e7008 100644 --- a/examples/custom-code-AD/requirements.txt +++ b/examples/custom-code-AD/requirements.txt @@ -1,12 +1,15 @@ allotropy==0.1.105 biopython==1.86 +flowkit==0.1.0 lmfit==1.3.4 +matplotlib==3.10.3 numpy==2.2.4 openpyxl==3.1.5 pandas==2.2.3 plotly==5.22.0 pyarrow==19.0.1 -pydantic==2.12.5 +pydantic==1.10.21 +seaborn==0.13.2 scikit-learn==1.6.1 scipy==1.15.2 statsmodels==0.14.4 \ No newline at end of file diff --git a/examples/custom-code-AD/snippets/5-parameter-logistic/docs/input/dose_response_5pl_data.xlsx b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/input/dose_response_5pl_data.xlsx new file mode 100644 index 0000000..9804737 Binary files /dev/null and b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/input/dose_response_5pl_data.xlsx differ diff --git a/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/5pl_parameters.csv b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/5pl_parameters.csv new file mode 100644 index 0000000..7d2e650 --- /dev/null +++ b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/5pl_parameters.csv @@ -0,0 +1,9 @@ +Parameter,Value,Std_Error,Note +Bottom (%),1.37,0.873, +Top (%),99.235,0.9498, +EC50 (µM),0.7516,0.2311, +Hill Slope,1.623,0.2467, +Asymmetry (F),0.775,0.2657,asym=1.0 → standard 4PL +R²,0.9994,, +AIC (5PL),11.69,, +ΔAIC vs 4PL,-1.31,,Positive = 5PL preferred diff --git a/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/aggregated_data.csv b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/aggregated_data.csv new file mode 100644 index 0000000..91ff285 --- /dev/null +++ b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/aggregated_data.csv @@ -0,0 +1,12 @@ +Concentration_uM,Mean_Inhibition,SD,N,SEM +0.001,2.4375,2.027,4,1.0135 +0.003,0.7925,0.9151,4,0.4576 +0.01,2.34,1.6329,4,0.8164 +0.03,2.2925,1.7193,4,0.8596 +0.1,8.165,2.0677,4,1.0338 +0.3,28.765,2.5523,4,1.2761 +1.0,67.31,2.2448,4,1.1224 +3.0,93.6675,2.1078,4,1.0539 +10.0,96.17,2.5479,4,1.2739 +30.0,99.835,3.1382,4,1.5691 +100.0,99.25,1.5948,4,0.7974 diff --git a/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/dose_response_curve.png b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/dose_response_curve.png new file mode 100644 index 0000000..23bf866 Binary files /dev/null and b/examples/custom-code-AD/snippets/5-parameter-logistic/docs/output/dose_response_curve.png differ diff --git a/examples/custom-code-AD/snippets/5-parameter-logistic/dose_response_5pl.py b/examples/custom-code-AD/snippets/5-parameter-logistic/dose_response_5pl.py new file mode 100644 index 0000000..a836971 --- /dev/null +++ b/examples/custom-code-AD/snippets/5-parameter-logistic/dose_response_5pl.py @@ -0,0 +1,261 @@ +""" +Benchling Custom Code Demo 5: Dose-Response 5PL Curve Fitting +============================================================= +5-Parameter Logistic (5PL) model adds an asymmetry parameter to the +standard 4PL, giving a better fit for asymmetric sigmoidal curves — +common in immunoassays and ELISA-based dose-response experiments. + +INPUTS: + inputs[0]: pd.DataFrame with columns: + - Concentration_uM (float) + - Replicate (int) + - Signal_%Inhibition (float) + +OUTPUTS: + - "5pl_parameters" pd.DataFrame — EC50, Hill slope, Asymmetry, R², AIC, ± SE + - "aggregated_data" pd.DataFrame — Mean ± SEM per concentration + - "dose_response_curve" go.Figure — Interactive 5PL plot with residuals subplot + +Supported packages: +allotropy, biopython, lmfit, numpy, openpyxl, pandas, plotly, +pyarrow, pydantic, scikit-learn, scipy, statsmodels +""" + +from io import BytesIO +import numpy as np +import pandas as pd +import plotly.graph_objects as go +from plotly.subplots import make_subplots +from typing import NamedTuple +from lmfit import Model + + +class IOData(NamedTuple): + name: str + data: BytesIO | pd.DataFrame | go.Figure + + +# --------------------------------------------------------------------------- +# 5PL model +# --------------------------------------------------------------------------- +def five_pl(x, bottom, top, ec50, hill, asym): + """ + 5-Parameter Logistic model. + Extends 4PL with 'asym' (asymmetry / F parameter) which allows the + upper and lower plateaus to be approached at different rates. + asym=1.0 reduces exactly to the standard 4PL model. + """ + return bottom + (top - bottom) / ((1.0 + (ec50 / np.clip(x, 1e-12, None)) ** hill) ** asym) + + +def fit_5pl(concentrations: np.ndarray, responses: np.ndarray): + """ + Fit 5PL using lmfit. Returns (result, r_squared, aic). + Also fits 4PL (asym fixed=1) so AIC can flag if 5PL is warranted. + """ + model = Model(five_pl) + + # --- 5PL fit --- + params_5pl = model.make_params( + bottom=dict(value=float(responses.min()), min=0, max=20), + top= dict(value=float(responses.max()), min=80, max=110), + ec50= dict(value=float(np.median(concentrations)), min=1e-6, max=1e6), + hill= dict(value=1.5, min=0.1, max=10), + asym= dict(value=1.0, min=0.05, max=10), + ) + result_5pl = model.fit(responses, params_5pl, x=concentrations) + + # --- 4PL fit (asym locked to 1) for AIC comparison --- + params_4pl = model.make_params( + bottom=dict(value=float(responses.min()), min=0, max=20), + top= dict(value=float(responses.max()), min=80, max=110), + ec50= dict(value=float(np.median(concentrations)), min=1e-6, max=1e6), + hill= dict(value=1.5, min=0.1, max=10), + asym= dict(value=1.0, vary=False), + ) + result_4pl = model.fit(responses, params_4pl, x=concentrations) + + # R² + fitted = result_5pl.best_fit + ss_res = float(np.sum((responses - fitted) ** 2)) + ss_tot = float(np.sum((responses - responses.mean()) ** 2)) + r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0 + + return result_5pl, result_4pl, r2 + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- +def custom_code(inputs: list[IOData], **kwargs) -> list[IOData]: + # --- Load input --- + df = None + for i in inputs: + if isinstance(i.data, pd.DataFrame): + df = i.data + break + elif isinstance(i.data, BytesIO): + df = pd.read_excel(i.data) + break + if df is None: + raise ValueError("No DataFrame or Excel file found in inputs") + + df["Concentration_uM"] = df["Concentration_uM"].astype(float) + df["Signal_%Inhibition"] = df["Signal_%Inhibition"].astype(float) + + # --- Aggregate replicates --- + agg = ( + df.groupby("Concentration_uM")["Signal_%Inhibition"] + .agg(Mean="mean", SD="std", N="count") + .reset_index() + ) + agg.columns = ["Concentration_uM", "Mean_Inhibition", "SD", "N"] + agg["SEM"] = agg["SD"] / np.sqrt(agg["N"]) + + conc = agg["Concentration_uM"].values + resp = agg["Mean_Inhibition"].values + + # --- Fit 5PL (and 4PL for comparison) --- + result_5pl, result_4pl, r2 = fit_5pl(conc, resp) + p = result_5pl.params + + def _se(param): + return round(float(param.stderr), 4) if param.stderr else None + + # Delta AIC: positive = 5PL is better fit + delta_aic = result_4pl.aic - result_5pl.aic + + summary = pd.DataFrame({ + "Parameter": ["Bottom (%)", "Top (%)", "EC50 (µM)", "Hill Slope", "Asymmetry (F)", "R²", "AIC (5PL)", "ΔAIC vs 4PL"], + "Value": [ + round(float(p["bottom"].value), 3), + round(float(p["top"].value), 3), + round(float(p["ec50"].value), 4), + round(float(p["hill"].value), 3), + round(float(p["asym"].value), 3), + round(r2, 4), + round(float(result_5pl.aic), 2), + round(float(delta_aic), 2), + ], + "Std_Error": [ + _se(p["bottom"]), _se(p["top"]), _se(p["ec50"]), + _se(p["hill"]), _se(p["asym"]), None, None, None, + ], + "Note": [ + "", "", "", + "", "asym=1.0 → standard 4PL", + "", "", + "Positive = 5PL preferred", + ], + }) + + # --- Smooth fit lines --- + x_fit = np.logspace(np.log10(conc.min()), np.log10(conc.max()), 400) + y_5pl = five_pl(x_fit, p["bottom"].value, p["top"].value, + p["ec50"].value, p["hill"].value, p["asym"].value) + p4 = result_4pl.params + y_4pl = five_pl(x_fit, p4["bottom"].value, p4["top"].value, + p4["ec50"].value, p4["hill"].value, 1.0) + + # Residuals (5PL) + fitted_at_conc = five_pl(conc, p["bottom"].value, p["top"].value, + p["ec50"].value, p["hill"].value, p["asym"].value) + residuals = resp - fitted_at_conc + ec50_val = float(p["ec50"].value) + + # --- Build figure: main curve + residuals subplot --- + fig = make_subplots( + rows=2, cols=1, + row_heights=[0.72, 0.28], + shared_xaxes=False, + vertical_spacing=0.12, + subplot_titles=("5PL Dose-Response Fit", "Residuals"), + ) + + # Individual replicates + fig.add_trace(go.Scatter( + x=df["Concentration_uM"], y=df["Signal_%Inhibition"], + mode="markers", + marker=dict(symbol="circle-open", size=6, color="#7FBBDA", opacity=0.7), + name="Replicates", + showlegend=True, + ), row=1, col=1) + + # Mean ± SEM + fig.add_trace(go.Scatter( + x=agg["Concentration_uM"], y=agg["Mean_Inhibition"], + error_y=dict(array=agg["SEM"].tolist(), visible=True, color="#1B6CA8"), + mode="markers", + marker=dict(size=10, color="#1B6CA8"), + name="Mean ± SEM", + ), row=1, col=1) + + # 4PL fit (reference) + fig.add_trace(go.Scatter( + x=x_fit, y=y_4pl, + mode="lines", + line=dict(color="#AAAAAA", width=1.8, dash="dash"), + name=f"4PL fit (ΔAIC={delta_aic:+.1f})", + ), row=1, col=1) + + # 5PL fit + fig.add_trace(go.Scatter( + x=x_fit, y=y_5pl, + mode="lines", + line=dict(color="#E84545", width=2.5), + name=f"5PL fit R²={r2:.4f}", + ), row=1, col=1) + + # EC50 dashed vertical line using paper-relative x coords on row 1 axes + fig.add_shape( + type="line", + xref="x", yref="y", + x0=ec50_val, x1=ec50_val, + y0=-5, y1=110, + line=dict(color="#888888", width=1.2, dash="dot"), + row=1, col=1, + ) + fig.add_annotation( + x=np.log10(ec50_val), y=55, + xref="x", yref="y", + text=f"EC50={ec50_val:.3f} µM", + showarrow=False, + font=dict(size=11, color="#555555"), + bgcolor="rgba(255,255,255,0.7)", + xanchor="left", + row=1, col=1, + ) + + # Residuals + fig.add_trace(go.Scatter( + x=conc, y=residuals, + mode="markers+lines", + marker=dict(size=8, color="#E84545"), + line=dict(color="#E84545", width=1, dash="dot"), + name="Residuals", + showlegend=False, + ), row=2, col=1) + fig.add_trace(go.Scatter( + x=[conc.min(), conc.max()], y=[0, 0], + mode="lines", line=dict(color="#AAAAAA", width=1), + showlegend=False, hoverinfo="skip", + ), row=2, col=1) + + # Set log scale explicitly on both subplots + fig.update_xaxes(type="log", row=1, col=1) + fig.update_xaxes(title_text="Concentration (µM)", type="log", row=2, col=1) + fig.update_yaxes(title_text="% Inhibition", range=[-5, 110], row=1, col=1) + fig.update_yaxes(title_text="Residual", zeroline=True, row=2, col=1) + + fig.update_layout( + template="plotly_white", + legend=dict(x=1.02, y=0.95, xanchor="left"), + width=860, height=640, + margin=dict(t=60, r=160), + ) + + return [ + IOData(name="5pl_parameters", data=summary), + IOData(name="aggregated_data", data=agg.round(4)), + IOData(name="dose_response_curve", data=fig), + ] diff --git a/examples/custom-code-AD/snippets/kinetic-timecourse/docs/input/timecourse_data.xlsx b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/input/timecourse_data.xlsx new file mode 100644 index 0000000..6f637c0 Binary files /dev/null and b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/input/timecourse_data.xlsx differ diff --git a/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/aggregated_kinetics.csv b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/aggregated_kinetics.csv new file mode 100644 index 0000000..80662cb --- /dev/null +++ b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/aggregated_kinetics.csv @@ -0,0 +1,31 @@ +Compound,Time_hr,Mean,SD,N,SEM,CI95_lower,CI95_upper +Compound_A,0.0,-0.2787,1.3743,3,0.7935,-1.8338,1.2765 +Compound_A,0.5,1.277,0.7716,3,0.4455,0.4039,2.1501 +Compound_A,1.0,1.8333,1.0417,3,0.6014,0.6546,3.0121 +Compound_A,2.0,3.54,0.6152,3,0.3552,2.8438,4.2362 +Compound_A,4.0,6.349,0.2737,3,0.158,6.0393,6.6587 +Compound_A,8.0,11.643,0.5953,3,0.3437,10.9693,12.3167 +Compound_A,12.0,13.6943,1.7458,3,1.008,11.7188,15.6699 +Compound_A,24.0,18.3767,0.2974,3,0.1717,18.0401,18.7132 +Compound_A,48.0,19.4823,0.1698,3,0.098,19.2902,19.6745 +Compound_A,72.0,20.15,0.585,3,0.3377,19.4881,20.8119 +Compound_B,0.0,0.1373,0.6439,3,0.3718,-0.5913,0.866 +Compound_B,0.5,0.6577,0.7196,3,0.4155,-0.1567,1.472 +Compound_B,1.0,1.188,0.575,3,0.332,0.5373,1.8387 +Compound_B,2.0,2.2697,0.5702,3,0.3292,1.6244,2.9149 +Compound_B,4.0,3.5597,0.463,3,0.2673,3.0358,4.0836 +Compound_B,8.0,7.0487,0.5562,3,0.3211,6.4193,7.6781 +Compound_B,12.0,9.5947,0.5182,3,0.2992,9.0083,10.1811 +Compound_B,24.0,14.1617,0.402,3,0.2321,13.7067,14.6166 +Compound_B,48.0,18.3357,0.2967,3,0.1713,17.9999,18.6715 +Compound_B,72.0,19.525,1.3894,3,0.8022,17.9527,21.0973 +Vehicle,0.0,0.196,1.0212,3,0.5896,-0.9596,1.3516 +Vehicle,0.5,0.3307,0.295,3,0.1703,-0.0032,0.6645 +Vehicle,1.0,-0.3997,0.4085,3,0.2359,-0.862,0.0626 +Vehicle,2.0,0.0007,0.7273,3,0.4199,-0.8223,0.8237 +Vehicle,4.0,-0.126,0.4066,3,0.2348,-0.5862,0.3342 +Vehicle,8.0,-0.089,0.4928,3,0.2845,-0.6466,0.4686 +Vehicle,12.0,-0.199,0.3337,3,0.1927,-0.5767,0.1787 +Vehicle,24.0,-0.953,0.3277,3,0.1892,-1.3239,-0.5821 +Vehicle,48.0,0.4193,0.6914,3,0.3992,-0.3631,1.2017 +Vehicle,72.0,0.181,0.441,3,0.2546,-0.318,0.68 diff --git a/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/kinetic_curves.png b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/kinetic_curves.png new file mode 100644 index 0000000..d50afd3 Binary files /dev/null and b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/kinetic_curves.png differ diff --git a/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/kinetic_params.csv b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/kinetic_params.csv new file mode 100644 index 0000000..af3ec3d --- /dev/null +++ b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/kinetic_params.csv @@ -0,0 +1,4 @@ +Compound,Emax_AU,kobs_1_hr,t_half_hr,AUC_AU_hr,R2 +Compound_A,19.949,0.10149,6.83,1222.58,0.9985 +Compound_B,19.867,0.05363,12.92,1049.56,0.9994 +Vehicle,7.749,0.0001,6931.36,-7.33,-0.0248 diff --git a/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/ttest_summary.csv b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/ttest_summary.csv new file mode 100644 index 0000000..e9b02c7 --- /dev/null +++ b/examples/custom-code-AD/snippets/kinetic-timecourse/docs/output/ttest_summary.csv @@ -0,0 +1,2 @@ +Comparison,Timepoint_hr,Timepoint_basis,t_statistic,p_value,Significance +Compound_A vs Compound_B,12.0,1/kobs inflection (target=14.2h),3.8992,0.01755,* diff --git a/examples/custom-code-AD/snippets/kinetic-timecourse/kinetics_timecourse.py b/examples/custom-code-AD/snippets/kinetic-timecourse/kinetics_timecourse.py new file mode 100644 index 0000000..7b24ceb --- /dev/null +++ b/examples/custom-code-AD/snippets/kinetic-timecourse/kinetics_timecourse.py @@ -0,0 +1,215 @@ +""" +Benchling Custom Code Demo 4: Kinetic Time-Course Analysis +=========================================================== +Automated Excel → Benchling publication-ready kinetic graphs, +replacing manual Prism curve generation for compound comparisons. + +INPUTS: + inputs[0]: pd.DataFrame — long format with columns: + Time_hr | Compound | Replicate | Response_AU + +OUTPUTS: + - "kinetic_params" pd.DataFrame — Emax, kobs, t½, AUC, R² per compound + - "ttest_summary" pd.DataFrame — t-test between compounds at final timepoint + - "aggregated_kinetics" pd.DataFrame — Mean ± SEM per compound × timepoint + - "kinetic_curves" go.Figure — Multi-compound curves with 95% CI ribbons + +Supported packages: +allotropy, biopython, lmfit, numpy, openpyxl, pandas, plotly, +pyarrow, pydantic, scikit-learn, scipy, statsmodels +""" + +from io import BytesIO +import numpy as np +import pandas as pd +import plotly.graph_objects as go +from typing import NamedTuple +from lmfit import Model +from scipy.stats import ttest_ind + + +class IOData(NamedTuple): + name: str + data: BytesIO | pd.DataFrame | go.Figure + + +COMPOUND_COLORS = ["#E84545", "#1B6CA8", "#AAAAAA", "#2ECC71", "#F39C12"] + + +# --------------------------------------------------------------------------- +# One-phase association model via lmfit +# --------------------------------------------------------------------------- +def one_phase_assoc(t, emax, kobs): + """One-phase association: y = Emax * (1 - exp(-kobs * t))""" + return emax * (1.0 - np.exp(-kobs * np.clip(t, 0, None))) + + +def fit_kinetics(times: np.ndarray, responses: np.ndarray): + """Returns (emax, kobs, t_half, r2) or nans on failure.""" + try: + model = Model(one_phase_assoc) + params = model.make_params( + emax=dict(value=float(responses.max()), min=0), + kobs=dict(value=0.05, min=1e-4, max=10), + ) + result = model.fit(responses, params, t=times) + emax = result.params["emax"].value + kobs = result.params["kobs"].value + t_half = np.log(2) / kobs if kobs > 0 else np.nan + ss_res = np.sum((responses - result.best_fit) ** 2) + ss_tot = np.sum((responses - responses.mean()) ** 2) + r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0 + return float(emax), float(kobs), float(t_half), float(r2) + except Exception: + return np.nan, np.nan, np.nan, np.nan + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- +def custom_code(inputs: list[IOData], **kwargs) -> list[IOData]: + # --- Load input --- + df = None + for i in inputs: + if isinstance(i.data, pd.DataFrame): + df = i.data + break + elif isinstance(i.data, BytesIO): + df = pd.read_excel(i.data) + break + if df is None: + raise ValueError("No DataFrame or Excel file found in inputs") + df["Time_hr"] = df["Time_hr"].astype(float) + df["Response_AU"] = df["Response_AU"].astype(float) + + compounds = df["Compound"].unique().tolist() + + # --- Aggregate: mean ± SEM per compound × timepoint --- + agg = ( + df.groupby(["Compound", "Time_hr"])["Response_AU"] + .agg(Mean="mean", SD="std", N="count") + .reset_index() + ) + agg["SEM"] = agg["SD"] / np.sqrt(agg["N"]) + agg["CI95_lower"] = agg["Mean"] - 1.96 * agg["SEM"] + agg["CI95_upper"] = agg["Mean"] + 1.96 * agg["SEM"] + + # --- Fit kinetics per compound --- + param_rows = [] + for cpd in compounds: + sub = agg[agg["Compound"] == cpd].sort_values("Time_hr") + times_arr = sub["Time_hr"].values + means_arr = sub["Mean"].values + emax, kobs, t_half, r2 = fit_kinetics(times_arr, means_arr) + + # AUC via trapezoidal integration + auc = float(np.trapezoid(means_arr, times_arr)) if len(times_arr) > 1 else np.nan + + param_rows.append({ + "Compound": cpd, + "Emax_AU": round(emax, 3) if not np.isnan(emax) else None, + "kobs_1_hr": round(kobs, 5) if not np.isnan(kobs) else None, + "t_half_hr": round(t_half, 2) if not np.isnan(t_half) else None, + "AUC_AU_hr": round(auc, 2) if not np.isnan(auc) else None, + "R2": round(r2, 4) if not np.isnan(r2) else None, + }) + + params_df = pd.DataFrame(param_rows) + + # --- T-test at the inflection point (t = 1/kobs), where curves are most differentiated --- + # For one-phase association, dy/dt is maximised at t=0 but compounds diverge most + # visibly around t = 1/kobs (the time constant). We average 1/kobs across active + # compounds and snap to the nearest measured timepoint. + active = [c for c in compounds if "vehicle" not in c.lower() and "control" not in c.lower()] + available_timepoints = sorted(df["Time_hr"].unique()) + stats_rows = [] + + if len(active) >= 2: + # Compute t_inflection = 1/kobs for each active compound + t_inflections = [] + for cpd in active: + sub = agg[agg["Compound"] == cpd].sort_values("Time_hr") + _, kobs, _, _ = fit_kinetics(sub["Time_hr"].values, sub["Mean"].values) + if not np.isnan(kobs) and kobs > 0: + t_inflections.append(1.0 / kobs) + + if t_inflections: + t_target = float(np.mean(t_inflections)) + # Snap to nearest actual measured timepoint + t_compare = float(min(available_timepoints, key=lambda t: abs(t - t_target))) + else: + # Fallback: midpoint of timecourse + t_compare = float(available_timepoints[len(available_timepoints) // 2]) + + a_vals = df[(df["Compound"] == active[0]) & (df["Time_hr"] == t_compare)]["Response_AU"].values + b_vals = df[(df["Compound"] == active[1]) & (df["Time_hr"] == t_compare)]["Response_AU"].values + if len(a_vals) > 1 and len(b_vals) > 1: + t_stat, p_val = ttest_ind(a_vals, b_vals) + sig = "****" if p_val < 0.0001 else "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else "ns" + stats_rows.append({ + "Comparison": f"{active[0]} vs {active[1]}", + "Timepoint_hr": t_compare, + "Timepoint_basis": f"1/kobs inflection (target={t_target:.1f}h)", + "t_statistic": round(float(t_stat), 4), + "p_value": round(float(p_val), 5), + "Significance": sig, + }) + ttest_df = pd.DataFrame(stats_rows) if stats_rows else pd.DataFrame( + columns=["Comparison", "Timepoint_hr", "Timepoint_basis", "t_statistic", "p_value", "Significance"]) + + # --- Multi-compound kinetics plot --- + t_smooth = np.linspace(0, float(df["Time_hr"].max()), 300) + fig = go.Figure() + + for idx, cpd in enumerate(compounds): + color = COMPOUND_COLORS[idx % len(COMPOUND_COLORS)] + sub = agg[agg["Compound"] == cpd].sort_values("Time_hr") + times_m = sub["Time_hr"].values + means_m = sub["Mean"].values + + # Smooth fitted line + emax, kobs, _, _ = fit_kinetics(times_m, means_m) + if not np.isnan(emax): + y_smooth = one_phase_assoc(t_smooth, emax, kobs) + fig.add_trace(go.Scatter( + x=t_smooth, y=y_smooth, + mode="lines", line=dict(color=color, width=2.5), + name=cpd, legendgroup=cpd, + )) + + # 95% CI ribbon + x_ribbon = np.concatenate([sub["Time_hr"], sub["Time_hr"].values[::-1]]) + y_ribbon = np.concatenate([sub["CI95_upper"], sub["CI95_lower"].values[::-1]]) + fig.add_trace(go.Scatter( + x=x_ribbon, y=y_ribbon, + fill="toself", fillcolor=color, opacity=0.15, + line=dict(width=0), showlegend=False, legendgroup=cpd, hoverinfo="skip", + )) + + # Mean ± SEM markers + fig.add_trace(go.Scatter( + x=sub["Time_hr"], y=sub["Mean"], + error_y=dict(array=sub["SEM"].tolist(), visible=True, color=color), + mode="markers", + marker=dict(color=color, size=9, line=dict(color="white", width=1.5)), + showlegend=False, legendgroup=cpd, + hovertemplate=f"{cpd} t=%{{x}}h mean=%{{y:.2f}}", + )) + + fig.update_layout( + title=dict(text="Kinetic Time-Course: One-Phase Association Fit", font_size=18), + xaxis=dict(title="Time (hours)"), + yaxis=dict(title="Response (AU)", rangemode="tozero"), + legend=dict(title="Compound", x=0.02, y=0.98), + template="plotly_white", + width=860, height=540, + ) + + return [ + IOData(name="kinetic_params", data=params_df), + IOData(name="ttest_summary", data=ttest_df), + IOData(name="aggregated_kinetics", data=agg.round(4)), + IOData(name="kinetic_curves", data=fig), + ] + + diff --git a/examples/custom-code-AD/snippets/multi-group-anova/docs/input/multigroup_efficacy_data.xlsx b/examples/custom-code-AD/snippets/multi-group-anova/docs/input/multigroup_efficacy_data.xlsx new file mode 100644 index 0000000..8aedf62 Binary files /dev/null and b/examples/custom-code-AD/snippets/multi-group-anova/docs/input/multigroup_efficacy_data.xlsx differ diff --git a/examples/custom-code-AD/snippets/multi-group-anova/docs/output/anova_bar_chart.png b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/anova_bar_chart.png new file mode 100644 index 0000000..5ae513d Binary files /dev/null and b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/anova_bar_chart.png differ diff --git a/examples/custom-code-AD/snippets/multi-group-anova/docs/output/anova_summary.csv b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/anova_summary.csv new file mode 100644 index 0000000..e6d2148 --- /dev/null +++ b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/anova_summary.csv @@ -0,0 +1,2 @@ +Test,F_statistic,p_value,Result +One-Way ANOVA,170.2789,<0.0001,Significant (p<0.05) diff --git a/examples/custom-code-AD/snippets/multi-group-anova/docs/output/descriptive_stats.csv b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/descriptive_stats.csv new file mode 100644 index 0000000..83eb5fe --- /dev/null +++ b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/descriptive_stats.csv @@ -0,0 +1,6 @@ +Group,N,Mean,SD,SEM +Vehicle_Control,8,9.46,1.275,0.451 +Treatment_1mg,8,14.218,1.134,0.401 +Treatment_5mg,8,18.828,1.768,0.625 +Treatment_10mg,8,22.211,1.616,0.571 +Positive_Control,8,25.628,0.986,0.349 diff --git a/examples/custom-code-AD/snippets/multi-group-anova/docs/output/tukey_pairwise.csv b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/tukey_pairwise.csv new file mode 100644 index 0000000..190400d --- /dev/null +++ b/examples/custom-code-AD/snippets/multi-group-anova/docs/output/tukey_pairwise.csv @@ -0,0 +1,11 @@ +Group1,Group2,Mean_Diff,p_adj,CI_Lower,CI_Upper,Reject_H0,Significance +Positive_Control,Treatment_10mg,-3.4175,2.00e-04,-5.4115,-1.4235,True,*** +Positive_Control,Treatment_1mg,-11.4104,<0.0001,-13.4044,-9.4163,True,**** +Positive_Control,Treatment_5mg,-6.8008,<0.0001,-8.7948,-4.8067,True,**** +Positive_Control,Vehicle_Control,-16.1684,<0.0001,-18.1624,-14.1743,True,**** +Treatment_10mg,Treatment_1mg,-7.9929,<0.0001,-9.9869,-5.9988,True,**** +Treatment_10mg,Treatment_5mg,-3.3833,2.00e-04,-5.3773,-1.3892,True,*** +Treatment_10mg,Vehicle_Control,-12.7509,<0.0001,-14.7449,-10.7568,True,**** +Treatment_1mg,Treatment_5mg,4.6096,<0.0001,2.6156,6.6037,True,**** +Treatment_1mg,Vehicle_Control,-4.758,<0.0001,-6.752,-2.764,True,**** +Treatment_5mg,Vehicle_Control,-9.3676,<0.0001,-11.3617,-7.3736,True,**** diff --git a/examples/custom-code-AD/snippets/multi-group-anova/multigroup_anova.py b/examples/custom-code-AD/snippets/multi-group-anova/multigroup_anova.py new file mode 100644 index 0000000..d766eb0 --- /dev/null +++ b/examples/custom-code-AD/snippets/multi-group-anova/multigroup_anova.py @@ -0,0 +1,184 @@ +""" +Benchling Custom Code Demo 2: Multi-Group ANOVA + Tukey Post-Hoc +================================================================ +Replaces manual copy-paste into Prism for group significance testing. + +INPUTS: + inputs[0]: pd.DataFrame — wide format, one column per group, e.g.: + Vehicle_Control | Treatment_1mg | Treatment_5mg | Treatment_10mg | Positive_Control + +OUTPUTS: + - "anova_summary" pd.DataFrame — F-statistic, p-value, result + - "tukey_pairwise" pd.DataFrame — All pairwise comparisons + significance stars + - "descriptive_stats" pd.DataFrame — N, Mean, SD, SEM per group + - "anova_bar_chart" go.Figure — Bar chart with jitter and significance annotations + +Supported packages: +allotropy, biopython, lmfit, numpy, openpyxl, pandas, plotly, +pyarrow, pydantic, scikit-learn, scipy, statsmodels +""" + +from io import BytesIO +import numpy as np +import pandas as pd +import plotly.graph_objects as go +from typing import NamedTuple +from scipy import stats +from statsmodels.stats.multicomp import pairwise_tukeyhsd + + +class IOData(NamedTuple): + name: str + data: BytesIO | pd.DataFrame | go.Figure + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- +def custom_code(inputs: list[IOData], **kwargs) -> list[IOData]: + # --- Load input --- + df = None + for i in inputs: + if isinstance(i.data, pd.DataFrame): + df = i.data + break + elif isinstance(i.data, BytesIO): + df = pd.read_excel(i.data) + break + if df is None: + raise ValueError("No DataFrame or Excel file found in inputs") + + # Drop any index column that may come from Excel + if df.columns[0] in ("Sample_ID", "Unnamed: 0"): + df = df.iloc[:, 1:] + + group_names = list(df.columns) + groups = {col: df[col].dropna().astype(float).values for col in group_names} + + # --- One-Way ANOVA --- + f_stat, p_value = stats.f_oneway(*groups.values()) + anova_summary = pd.DataFrame([{ + "Test": "One-Way ANOVA", + "F_statistic": round(float(f_stat), 4), + "p_value": "<0.0001" if p_value < 0.0001 else (f"{p_value:.2e}" if p_value < 0.001 else round(float(p_value), 4)), + "Result": "Significant (p<0.05)" if p_value < 0.05 else "Not Significant", + }]) + + # --- Tukey HSD via statsmodels --- + all_values = np.concatenate(list(groups.values())) + all_labels = np.concatenate([[g] * len(v) for g, v in groups.items()]) + tukey = pairwise_tukeyhsd(all_values, all_labels, alpha=0.05) + tukey_df = pd.DataFrame(data=tukey._results_table.data[1:], + columns=tukey._results_table.data[0]) + tukey_df.columns = ["Group1", "Group2", "Mean_Diff", "p_adj", "CI_Lower", "CI_Upper", "Reject_H0"] + tukey_df["Mean_Diff"] = tukey_df["Mean_Diff"].astype(float).round(4) + def sig_stars(p): + if p < 0.0001: return "****" + if p < 0.001: return "***" + if p < 0.01: return "**" + if p < 0.05: return "*" + return "ns" + + tukey_df["p_adj_float"] = tukey_df["p_adj"].astype(float) + tukey_df["Significance"] = tukey_df["p_adj_float"].apply(sig_stars) + tukey_df["p_adj"] = tukey_df["p_adj_float"].apply( + lambda p: "<0.0001" if p < 0.0001 else (f"{p:.2e}" if p < 0.001 else round(p, 4)) + ) + tukey_df.drop(columns=["p_adj_float"], inplace=True) + + # --- Descriptive stats --- + desc = pd.DataFrame({ + "Group": group_names, + "N": [len(groups[g]) for g in group_names], + "Mean": [round(float(groups[g].mean()), 3) for g in group_names], + "SD": [round(float(groups[g].std(ddof=1)), 3) for g in group_names], + "SEM": [round(float(groups[g].std(ddof=1) / + np.sqrt(len(groups[g]))), 3) for g in group_names], + }) + + # --- Bar chart --- + colors = ["#AAAAAA", "#4DAADF", "#1B6CA8", "#0D3F6E", "#E84545"] + fig = go.Figure() + np.random.seed(0) + + for i, g in enumerate(group_names): + sem = float(groups[g].std(ddof=1) / np.sqrt(len(groups[g]))) + mean = float(groups[g].mean()) + color = colors[i % len(colors)] + + # Jittered individual points + x_jitter = np.random.uniform(-0.25, 0.25, len(groups[g])) + i + fig.add_trace(go.Scatter( + x=x_jitter.tolist(), y=groups[g].tolist(), + mode="markers", marker=dict(color=color, size=6, opacity=0.55), + showlegend=False, hovertemplate=f"{g}: %{{y:.2f}}", + )) + # Bar + fig.add_trace(go.Bar( + x=[i], y=[mean], + error_y=dict(type="data", array=[sem], visible=True), + marker_color=color, marker_line_color="black", marker_line_width=1, + name=g, width=0.5, + )) + + # Significance brackets vs Vehicle (group 0) + # Stack brackets above the tallest bar, spaced by a fixed step + data_max = max(float(v.mean() + v.std()) for v in groups.values()) + step = data_max * 0.08 + bracket_level = data_max * 1.12 + annotations = [dict( + x=0.99, y=0.99, xref="paper", yref="paper", + text=f"ANOVA: F={f_stat:.2f}, p<0.0001" if p_value < 0.0001 else (f"ANOVA: F={f_stat:.2f}, p={p_value:.2e}" if p_value < 0.001 else f"ANOVA: F={f_stat:.2f}, p={p_value:.4f}"), + showarrow=False, bgcolor="white", bordercolor="black", borderwidth=1, + )] + + bracket_idx = 0 + for i, g in enumerate(group_names[1:], 1): + row = tukey_df[ + ((tukey_df["Group1"] == group_names[0]) & (tukey_df["Group2"] == g)) | + ((tukey_df["Group1"] == g) & (tukey_df["Group2"] == group_names[0])) + ] + if not row.empty and row.iloc[0]["Significance"] != "ns": + sig = row.iloc[0]["Significance"] + y_br = bracket_level + bracket_idx * step + # Horizontal bar + fig.add_shape(type="line", x0=0, x1=i, y0=y_br, y1=y_br, + line=dict(color="black", width=1.2)) + # Left tick + fig.add_shape(type="line", x0=0, x1=0, y0=y_br - step * 0.2, y1=y_br, + line=dict(color="black", width=1.2)) + # Right tick + fig.add_shape(type="line", x0=i, x1=i, y0=y_br - step * 0.2, y1=y_br, + line=dict(color="black", width=1.2)) + annotations.append(dict( + x=i / 2, y=y_br + step * 0.15, text=sig, + showarrow=False, font=dict(size=13), + )) + bracket_idx += 1 + + y_axis_max = bracket_level + (bracket_idx + 1) * step + + fig.update_layout( + title=dict(text="Multi-Group Efficacy: One-Way ANOVA + Tukey HSD", font_size=18), + xaxis=dict( + tickmode="array", + tickvals=list(range(len(group_names))), + ticktext=[g.replace("_", "
") for g in group_names], + title="Treatment Group", + ), + yaxis=dict(title="Response (AU)", zeroline=True, range=[0, y_axis_max]), + template="plotly_white", + barmode="overlay", + showlegend=False, + width=860, height=580, + annotations=annotations, + ) + + return [ + IOData(name="anova_summary", data=anova_summary), + IOData(name="tukey_pairwise", data=tukey_df), + IOData(name="descriptive_stats", data=desc), + IOData(name="anova_bar_chart", data=fig), + ] + +