Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions quantmsutils/diann/diann2msstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,8 @@ def diann2msstats(
out_msstats = out_msstats[out_msstats["Intensity"] != 0]

out_msstats["PeptideSequence"] = out_msstats["PeptideSequence"].apply(_sanitize_sequence)
out_msstats.loc[:, "PeptideSequence"] = out_msstats.apply(
lambda x: (
AASequence.fromString(x["PeptideSequence"]).toString()
if "^" not in x["PeptideSequence"]
else "^" + AASequence.fromString(x["PeptideSequence"].replace("^", "")).toString()
),
axis=1,
)
seq_map = {s: _to_openms_sequence(s) for s in out_msstats["PeptideSequence"].unique()}
out_msstats["PeptideSequence"] = out_msstats["PeptideSequence"].map(seq_map)
out_msstats["FragmentIon"] = "NA"
out_msstats["ProductCharge"] = "0"

Expand Down Expand Up @@ -267,3 +261,31 @@ def load_report(report_path, qvalue_threshold: float) -> pd.DataFrame:
def _sanitize_sequence(seq):
seq = seq.replace("(SILAC)", "")
return seq


def _to_openms_sequence(seq: str) -> str:
"""Canonicalize a DIA-NN peptide+mod string via pyopenms.

Preserves the leading ``^`` anchor used by DIA-NN to mark N-terminal
cleavage peptides. When pyopenms raises a ``RuntimeError`` — typically
because the runtime container ships pyopenms without the OpenMS share
directory (UniMod XML), leaving only a small set of common modifications
resolvable from the compiled-in fallback — the input is returned
unchanged so downstream conversion can proceed. A warning is logged
once per unique input string.
"""
has_anchor = "^" in seq
body = seq.replace("^", "") if has_anchor else seq
try:
canonical = AASequence.fromString(body).toString()
except RuntimeError as err:
logger.warning(
"pyopenms could not parse peptide %r (%s); keeping the raw "
"DIA-NN sequence. If this affects many peptides, the runtime "
"container is likely missing the OpenMS share directory "
"(OPENMS_DATA_PATH).",
body,
err,
)
canonical = body
return ("^" + canonical) if has_anchor else canonical
29 changes: 29 additions & 0 deletions tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,35 @@ def test_dianncfg_example(self):

assert result.exit_code == 0

def test_to_openms_sequence_falls_back_on_unknown_mod(self):
"""Unknown modifications should not crash the conversion.

The runtime container may ship pyopenms without the OpenMS share
directory, leaving only common mods resolvable. In that case we
keep the raw DIA-NN sequence so downstream MSstats conversion can
proceed instead of crashing with a RuntimeError.
"""
from quantmsutils.diann.diann2msstats import _to_openms_sequence

bogus = "M(NoSuchModXYZ)PEPTIDE"
assert _to_openms_sequence(bogus) == bogus

def test_to_openms_sequence_preserves_n_term_anchor(self):
"""The leading ``^`` anchor used by DIA-NN for N-terminal cleavage
peptides must survive both the pyopenms round-trip and the
fallback path.
"""
from quantmsutils.diann.diann2msstats import _to_openms_sequence

# known mod -> canonical form retains anchor
anchored = "^M(Oxidation)PEPTIDE"
out = _to_openms_sequence(anchored)
assert out.startswith("^")

# unknown mod -> raw passthrough retains anchor
bogus_anchored = "^M(NoSuchModXYZ)PEPTIDE"
assert _to_openms_sequence(bogus_anchored) == bogus_anchored


class TestSamplesheetCommands:
"""Test class for samplesheet related commands"""
Expand Down