diff --git a/app.py b/app.py index 6c276f0..194d857 100644 --- a/app.py +++ b/app.py @@ -27,6 +27,7 @@ st.Page(Path("content", "results_pca.py"), title="PCA", icon="๐Ÿ“Š"), st.Page(Path("content", "results_heatmap.py"), title="Heatmap", icon="๐Ÿ”ฅ"), st.Page(Path("content", "results_library.py"), title="Spectral Library", icon="๐Ÿ“š"), + st.Page(Path("content", "results_proteomicslfq.py"), title="Proteomics LFQ", icon="๐Ÿงช"), ], } diff --git a/content/results_proteomicslfq.py b/content/results_proteomicslfq.py new file mode 100644 index 0000000..77eb332 --- /dev/null +++ b/content/results_proteomicslfq.py @@ -0,0 +1,101 @@ +from pathlib import Path +import streamlit as st +import pandas as pd +import numpy as np +import plotly.express as px + +from src.common.common import page_setup +from src.common.results_helpers import get_abundance_data + +# ================================ +# Page setup +# ================================ +params = page_setup() +st.title("ProteomicsLFQ Results") + +# ================================ +# Workspace check +# ================================ +if "workspace" not in st.session_state: + st.warning("Please initialize your workspace first.") + st.stop() + +# ================================ +# Load abundance data +# ================================ +res = get_abundance_data(st.session_state["workspace"]) +if res is None: + st.info( + "Abundance data not available or incomplete. " + "Please run the workflow and configure sample groups first." + ) + st.stop() + +pivot_df, expr_df, group_map = res + +# ================================ +# Tabs +# ================================ +protein_tab, = st.tabs(["๐Ÿงฌ Protein Table"]) + +# ================================ +# Protein-level results +# ================================ +with protein_tab: + st.markdown("### ๐Ÿงฌ Protein-Level Abundance Table") + st.info( + "This protein-level table is generated by grouping all PSMs that map to the " + "same protein and aggregating their intensities across samples.\n\n" + "Additionally, log2 fold change and p-values are calculated between sample groups." + ) + + if pivot_df.empty: + st.info("No protein-level data available.") + else: + st.session_state["pivot_df"] = pivot_df + st.dataframe(pivot_df.sort_values("p-value"), use_container_width=True) + +# ====================================================== +# GO Enrichment Results +# ====================================================== +st.markdown("---") +st.subheader("๐Ÿงฌ GO Enrichment Analysis") + +results_dir = Path(st.session_state["workspace"]) / "topp-workflow" / "results" / "go-terms" +go_json_file = results_dir / "go_results.json" + +if not go_json_file.exists(): + st.info("GO Enrichment results are not available yet. Please run the analysis first.") +else: + import json + import plotly.io as pio + + with open(go_json_file, "r") as f: + go_data = json.load(f) + + bp_tab, cc_tab, mf_tab = st.tabs([ + "๐Ÿงฌ Biological Process", + "๐Ÿ  Cellular Component", + "โš™๏ธ Molecular Function", + ]) + + for tab, go_type in zip([bp_tab, cc_tab, mf_tab], ["BP", "CC", "MF"]): + with tab: + if go_type not in go_data: + st.info(f"No enriched {go_type} terms found.") + continue + + fig_json = go_data[go_type]["fig_json"] + df_dict = go_data[go_type]["df_dict"] + + fig = pio.from_json(fig_json) + + df_go = pd.DataFrame(df_dict) + + if df_go.empty: + st.info(f"No enriched {go_type} terms found.") + else: + st.plotly_chart(fig, use_container_width=True) + + st.markdown(f"#### {go_type} Enrichment Results") + st.dataframe(df_go, use_container_width=True) \ No newline at end of file diff --git a/content/workflow_run.py b/content/workflow_run.py index eadb5a5..6853470 100644 --- a/content/workflow_run.py +++ b/content/workflow_run.py @@ -5,5 +5,4 @@ params = page_setup() wf = WorkflowTest() - wf.show_execution_section() diff --git a/requirements.txt b/requirements.txt index 510c539..2f5fd30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -145,7 +145,7 @@ polars>=1.0.0 cython easypqp>=0.1.34 pyprophet>=2.2.0 - +mygene # Redis Queue dependencies (for online mode) redis>=5.0.0 rq>=1.16.0 diff --git a/src/WorkflowTest.py b/src/WorkflowTest.py index d94a37d..4abbf92 100644 --- a/src/WorkflowTest.py +++ b/src/WorkflowTest.py @@ -6,12 +6,17 @@ from pyopenms import IdXMLFile from scipy.stats import ttest_ind import numpy as np +import mygene +from collections import defaultdict +from scipy.stats import fisher_exact from src.workflow.WorkflowManager import WorkflowManager +from src.common.common import page_setup +from src.common.results_helpers import get_abundance_data from src.common.results_helpers import parse_idxml, build_spectra_cache from openms_insight import Table, Heatmap, LinePlot, SequenceView - +# params = page_setup() class WorkflowTest(WorkflowManager): def __init__(self) -> None: @@ -354,16 +359,14 @@ def execution(self) -> bool: filter_dir = results_dir / "filter_results" quant_dir = results_dir / "quant_results" - results_dir = Path(self.workflow_dir, "input-files") - for d in [comet_dir, perc_dir, filter_dir, quant_dir]: d.mkdir(parents=True, exist_ok=True) self.logger.log("๐Ÿ“ Output directories created") - # ================================ - # 2๏ธโƒฃ File path definitions (per sample) - # ================================ + # # ================================ + # # 2๏ธโƒฃ File path definitions (per sample) + # # ================================ comet_results = [] percolator_results = [] filter_results = [] @@ -499,10 +502,6 @@ def execution(self) -> bool: self.logger.log("โœ… Peptide search complete") - # if not Path(comet_results).exists(): - # st.error(f"CometAdapter failed for {stem}") - # st.stop() - # --- PercolatorAdapter --- self.logger.log("๐Ÿ“Š Running rescoring...") with st.spinner(f"PercolatorAdapter ({stem})"): @@ -764,9 +763,9 @@ def execution(self) -> bool: st.success(f"โœ“ {stem} identification completed") - # # ================================ - # # 4๏ธโƒฃ ProteomicsLFQ (cross-sample) - # # ================================ + # ================================ + # 4๏ธโƒฃ ProteomicsLFQ (cross-sample) + # ================================ self.logger.log("๐Ÿ“ˆ Running cross-sample quantification...") st.info("Running ProteomicsLFQ (cross-sample quantification)") @@ -813,10 +812,17 @@ def execution(self) -> bool: return False self.logger.log("โœ… Quantification complete") - # if not Path(quant_mztab).exists(): - # st.error("ProteomicsLFQ failed: mzTab not created") - # st.stop() - + # ====================================================== + # โš ๏ธ 5๏ธโƒฃ GO Enrichment Analysis (INLINE IN EXECUTION) + # ====================================================== + workspace_path = Path(self.workflow_dir).parent + res = get_abundance_data(workspace_path) + if res is not None: + pivot_df, _, _ = res + self.logger.log("โœ… pivot_df loaded, starting GO enrichment...") + self._run_go_enrichment(pivot_df, results_dir) + else: + st.warning("GO enrichment skipped: abundance data not available.") # ================================ # 5๏ธโƒฃ Final report @@ -825,13 +831,158 @@ def execution(self) -> bool: st.write("๐Ÿ“ Results directory:") st.code(str(results_dir)) + return True + + def _run_go_enrichment(self, pivot_df: pd.DataFrame, results_dir: Path): + p_cutoff = 0.05 + fc_cutoff = 1.0 - st.write("๐Ÿ“„ Generated files:") - st.write(f"- mzTab: {quant_mztab}") - st.write(f"- consensusXML: {quant_cxml}") - st.write(f"- MSstats CSV: {quant_msstats}") + analysis_df = pivot_df.dropna(subset=["p-value", "log2FC"]).copy() - return True + if analysis_df.empty: + st.error("No valid statistical data found for GO enrichment.") + self.logger.log("โ— analysis_df is empty") + else: + with st.spinner("Fetching GO terms from MyGene.info API..."): + mg = mygene.MyGeneInfo() + + def get_clean_uniprot(name): + parts = str(name).split("|") + return parts[1] if len(parts) >= 2 else parts[0] + + analysis_df["UniProt"] = analysis_df["ProteinName"].apply(get_clean_uniprot) + + bg_ids = analysis_df["UniProt"].dropna().astype(str).unique().tolist() + fg_ids = analysis_df[ + (analysis_df["p-value"] < p_cutoff) & + (analysis_df["log2FC"].abs() >= fc_cutoff) + ]["UniProt"].dropna().astype(str).unique().tolist() + self.logger.log("โœ… get_clean_uniprot applied") + + if len(fg_ids) < 3: + st.warning( + f"Not enough significant proteins " + f"(p < {p_cutoff}, |log2FC| โ‰ฅ {fc_cutoff}). " + f"Found: {len(fg_ids)}" + ) + self.logger.log("โ— Not enough significant proteins") + else: + res_list = mg.querymany( + bg_ids, scopes="uniprot", fields="go", as_dataframe=False + ) + res_go = pd.DataFrame(res_list) + if "notfound" in res_go.columns: + res_go = res_go[res_go["notfound"] != True] + + def extract_go_terms(go_data, go_type): + if not isinstance(go_data, dict) or go_type not in go_data: + return [] + terms = go_data[go_type] + if isinstance(terms, dict): + terms = [terms] + return list({t.get("term") for t in terms if "term" in t}) + + for go_type in ["BP", "CC", "MF"]: + res_go[f"{go_type}_terms"] = res_go["go"].apply( + lambda x: extract_go_terms(x, go_type) + ) + + annotated_ids = set(res_go["query"].astype(str)) + fg_set = annotated_ids.intersection(fg_ids) + bg_set = annotated_ids + self.logger.log(f"โœ… fg_set bg_set are set") + + def run_go(go_type): + go2fg = defaultdict(set) + go2bg = defaultdict(set) + + for _, row in res_go.iterrows(): + uid = str(row["query"]) + for term in row[f"{go_type}_terms"]: + go2bg[term].add(uid) + if uid in fg_set: + go2fg[term].add(uid) + + records = [] + N_fg = len(fg_set) + N_bg = len(bg_set) + + for term, fg_genes in go2fg.items(): + a = len(fg_genes) + if a == 0: + continue + b = N_fg - a + c = len(go2bg[term]) - a + d = N_bg - (a + b + c) + + _, p = fisher_exact([[a, b], [c, d]], alternative="greater") + records.append({ + "GO_Term": term, + "Count": a, + "GeneRatio": f"{a}/{N_fg}", + "p_value": p, + }) + + df = pd.DataFrame(records) + if df.empty: + return None, None + + df["-log10(p)"] = -np.log10(df["p_value"].replace(0, 1e-10)) + df = df.sort_values("p_value").head(20) + + # โœ… Plotly Figure + fig = px.bar( + df, + x="-log10(p)", + y="GO_Term", + orientation="h", + title=f"GO Enrichment ({go_type})", + ) + + self.logger.log(f"โœ… Plotly Figure generated") + + fig.update_layout( + yaxis=dict(autorange="reversed"), + height=500, + margin=dict(l=10, r=10, t=40, b=10), + ) + + return fig, df + + go_results = {} + + for go_type in ["BP", "CC", "MF"]: + fig, df_go = run_go(go_type) + if fig is not None: + go_results[go_type] = { + "fig": fig, + "df": df_go + } + self.logger.log(f"โœ… go_type generated") + + go_dir = results_dir / "go-terms" + go_dir.mkdir(parents=True, exist_ok=True) + + import json + go_data = {} + + for go_type in ["BP", "CC", "MF"]: + if go_type in go_results: + fig = go_results[go_type]["fig"] + df = go_results[go_type]["df"] + + go_data[go_type] = { + "fig_json": fig.to_json(), # Figure โ†’ JSON string + "df_dict": df.to_dict(orient="records") # DataFrame โ†’ list of dicts + } + + go_json_file = go_dir / "go_results.json" + with open(go_json_file, "w") as f: + json.dump(go_data, f) + st.session_state["go_results"] = go_results + st.session_state["go_ready"] = True if go_data else False + self.logger.log("โœ… GO enrichment analysis complete") + @st.fragment def results(self) -> None: diff --git a/src/workflow/WorkflowManager.py b/src/workflow/WorkflowManager.py index a87fb33..c3343dc 100644 --- a/src/workflow/WorkflowManager.py +++ b/src/workflow/WorkflowManager.py @@ -191,9 +191,10 @@ def stop_workflow(self) -> bool: return self._stop_local_workflow() def _stop_local_workflow(self) -> bool: - """Stop locally running workflow process""" + """Stop locally running workflow process - Windows Compatible""" import os import signal + import platform pid_dir = self.executor.pid_dir if not pid_dir.exists(): @@ -203,11 +204,18 @@ def _stop_local_workflow(self) -> bool: for pid_file in pid_dir.iterdir(): try: pid = int(pid_file.name) - os.kill(pid, signal.SIGTERM) + # Windows + if platform.system() == "Windows": + os.system(f"taskkill /F /T /PID {pid}") + else: + # Linux/macOS + os.kill(pid, signal.SIGTERM) + pid_file.unlink() stopped = True - except (ValueError, ProcessLookupError, PermissionError): - pid_file.unlink() # Clean up stale PID file + except (ValueError, ProcessLookupError, PermissionError, OSError): + if pid_file.exists(): + pid_file.unlink() # Clean up the pid directory shutil.rmtree(pid_dir, ignore_errors=True)