From 40624b8962e38a899273bb64e6d562aa3088d79b Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sat, 21 Mar 2026 08:03:47 +0000
Subject: [PATCH 1/5] making some changes in the SDRF validation for the
 workflow

---
 Dockerfile.dev                                |   5 +
 pyproject.toml                                |   2 +-
 quantmsutils/diann/dianncfg.py                |   2 +-
 quantmsutils/sdrf/check_samplesheet.py        | 154 +++---------------
 tests/test_commands.py                        |  91 ++++++++++-
 .../diann2msstats/PXD026600_diann_design.tsv  |   5 +
 6 files changed, 121 insertions(+), 138 deletions(-)
 create mode 100644 Dockerfile.dev
 create mode 100644 tests/test_data/diann2msstats/PXD026600_diann_design.tsv

diff --git a/Dockerfile.dev b/Dockerfile.dev
new file mode 100644
index 0000000..5b8109e
--- /dev/null
+++ b/Dockerfile.dev
@@ -0,0 +1,5 @@
+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y --no-install-recommends git procps libglib2.0-0t64 && rm -rf /var/lib/apt/lists/*
+WORKDIR /src
+COPY . .
+RUN pip install --no-cache-dir .
diff --git a/pyproject.toml b/pyproject.toml
index fa2b47d..da5a252 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ packages = [
 [tool.poetry.dependencies]
 python = "*"
 click = "*"
-sdrf-pipelines = "==0.0.33"
+sdrf-pipelines = ">=0.1.1"
 pyopenms = ">=3.3.0"
 pandas = "*"
 pyarrow = ">=16.1.0"
diff --git a/quantmsutils/diann/dianncfg.py b/quantmsutils/diann/dianncfg.py
index 0634ff1..db41635 100644
--- a/quantmsutils/diann/dianncfg.py
+++ b/quantmsutils/diann/dianncfg.py
@@ -9,7 +9,7 @@
 from typing import List, Tuple
 from collections import defaultdict
 import click
-from sdrf_pipelines.openms.unimod import UnimodDatabase
+from sdrf_pipelines.converters.openms.unimod import UnimodDatabase
 
 logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
 logger = logging.getLogger(__name__)
diff --git a/quantmsutils/sdrf/check_samplesheet.py b/quantmsutils/sdrf/check_samplesheet.py
index cd788bb..13ed882 100644
--- a/quantmsutils/sdrf/check_samplesheet.py
+++ b/quantmsutils/sdrf/check_samplesheet.py
@@ -1,15 +1,8 @@
-# nf-core: Update the script to check the sdrf
-# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv
-
-import errno
 import logging
-import os
 import sys
 
 import click
-import pandas as pd
-from sdrf_pipelines.sdrf.sdrf import SdrfDataFrame
-from sdrf_pipelines.sdrf.sdrf_schema import DEFAULT_TEMPLATE, MASS_SPECTROMETRY
+from sdrf_pipelines.sdrf.sdrf import read_sdrf
 
 logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -36,36 +29,25 @@ def print_error(error, context="Line", context_str=""):
 
 def check_sdrf(
     input_sdrf: str,
-    skip_ms_validation: bool = False,
-    skip_factor_validation: bool = False,
-    skip_experimental_design_validation: bool = False,
-    use_ols_cache_only: bool = False,
     skip_sdrf_validation: bool = False,
+    use_ols_cache_only: bool = False,
 ):
     """
     Check the SDRF file for errors. If any errors are found, print them and exit with a non-zero status code.
-    @param input_sdrf: Path to the SDRF file to check
-    @param skip_ms_validation: Disable the validation of mass spectrometry fields in SDRF (e.g. posttranslational modifications)
-    @param skip_factor_validation: Disable the validation of factor values in SDRF
-    @param skip_experimental_design_validation: Disable the validation of experimental design
-    @param use_ols_cache_only: Use ols cache for validation of the terms and not OLS internet service
-    @param skip_sdrf_validation: Disable the validation of SDRF
+
+    :param input_sdrf: Path to the SDRF file to check
+    :param skip_sdrf_validation: Skip all SDRF validation
+    :param use_ols_cache_only: Use OLS cache instead of live OLS service
     """
     if skip_sdrf_validation:
         print("No SDRF validation was performed.")
         sys.exit(0)
 
-    df = SdrfDataFrame.parse(input_sdrf)
-    errors = df.validate(DEFAULT_TEMPLATE, use_ols_cache_only)
-
-    if not skip_ms_validation:
-        errors = errors + df.validate(MASS_SPECTROMETRY, use_ols_cache_only)
-
-    if not skip_factor_validation:
-        errors = errors + df.validate_factor_values()
-
-    if not skip_experimental_design_validation:
-        errors = errors + df.validate_experimental_design()
+    df = read_sdrf(input_sdrf)
+    errors = df.validate_sdrf(
+        template="ms-proteomics",
+        use_ols_cache_only=use_ols_cache_only,
+    )
 
     for error in errors:
         print(error)
@@ -73,120 +55,26 @@ def check_sdrf(
     sys.exit(bool(errors))
 
 
-def check_expdesign(expdesign):
-    """
-    Check the expdesign file for errors. If any errors are found, print them and exit with a non-zero status code.
-    @param expdesign: Path to the expdesign file to check
-    """
-    data = pd.read_csv(expdesign, sep="\t", header=0, dtype=str)
-    data = data.dropna()
-    schema_file = ["Fraction_Group", "Fraction", "Spectra_Filepath", "Label", "Sample"]
-    schema_sample = ["Sample", "MSstats_Condition", "MSstats_BioReplicate"]
-
-    # check table format: two table
-    with open(expdesign, "r") as f:
-        lines = f.readlines()
-        try:
-            empty_row = lines.index("\n")
-        except ValueError:
-            print(
-                "the one-table format parser is broken in OpenMS2.5, please use one-table or sdrf"
-            )
-            sys.exit(1)
-
-        s_table = [i.replace("\n", "").split("\t") for i in lines[empty_row + 1 :]][1:]
-        s_header = lines[empty_row + 1].replace("\n", "").split("\t")
-        s_data_frame = pd.DataFrame(s_table, columns=s_header)
-
-    # check missed mandatory column
-    missed_columns = set(schema_file) - set(data.columns)
-    if len(missed_columns) != 0:
-        print("{0} column missed".format(" ".join(missed_columns)))
-        sys.exit(1)
-
-    missed_columns = set(schema_sample) - set(s_data_frame.columns)
-    if len(missed_columns) != 0:
-        print("{0} column missed".format(" ".join(missed_columns)))
-        sys.exit(1)
-
-    if len(set(data.Label)) != 1 and "MSstats_Mixture" not in s_data_frame.columns:
-        print("MSstats_Mixture column missed in ISO experiments")
-        sys.exit(1)
-
-    # check logical problem: may be improved
-    check_expdesign_logic(data, s_data_frame)
-
-
-def check_expdesign_logic(f_table, s_table):
-    fg_ints = f_table["Fraction_Group"].astype(int)
-    if fg_ints.max() > fg_ints.nunique():
-        print("Fraction_Group discontinuous!")
-        sys.exit(1)
-    f_table_d = f_table.drop_duplicates(["Fraction_Group", "Fraction", "Label", "Sample"])
-    if f_table_d.shape[0] < f_table.shape[0]:
-        print("Existing duplicate entries in Fraction_Group, Fraction, Label and Sample")
-        sys.exit(1)
-    if len(set(s_table.Sample)) < s_table.shape[0]:
-        print("Existing duplicate Sample in sample table!")
-        sys.exit(1)
-
 
 @click.command(
     "checksamplesheet",
-    short_help="Reformat nf-core/quantms sdrf file and check its contents.",
-)
-@click.option("--exp_design", help="SDRF/Expdesign file to be validated")
-@click.option("--is_sdrf", help="SDRF file or Expdesign file", is_flag=True)
-@click.option("--skip_sdrf_validation", help="Disable the validation of SDRF", is_flag=True)
-@click.option(
-    "--skip_ms_validation",
-    help="Disable the validation of mass spectrometry fields in SDRF (e.g. posttranslational modifications)",
-    is_flag=True,
-)
-@click.option(
-    "--skip_factor_validation",
-    help="Disable the validation of factor values in SDRF",
-    is_flag=True,
-)
-@click.option(
-    "--skip_experimental_design_validation",
-    help="Disable the validation of experimental design",
-    is_flag=True,
+    short_help="Validate an SDRF file for quantms pipelines.",
 )
+@click.option("--exp_design", help="SDRF file to be validated", required=True)
+@click.option("--skip_sdrf_validation", help="Skip all SDRF validation", is_flag=True)
 @click.option(
     "--use_ols_cache_only",
-    help="Use ols cache for validation of the terms and not OLS internet service",
+    help="Use OLS cache for ontology validation instead of the live OLS service",
     is_flag=True,
 )
 def checksamplesheet(
     exp_design: str,
-    is_sdrf: bool = False,
     skip_sdrf_validation: bool = False,
-    skip_ms_validation: bool = False,
-    skip_factor_validation: bool = False,
-    skip_experimental_design_validation: bool = False,
     use_ols_cache_only: bool = False,
 ):
-    """
-    Reformat nf-core/quantms sdrf file and check its contents.
-    @param exp_design: SDRF/Expdesign file to be validated
-    @param is_sdrf: SDRF file or Expdesign file
-    @param skip_sdrf_validation: Disable the validation of SDRF
-    @param skip_ms_validation: Disable the validation of mass spectrometry fields in SDRF (e.g. posttranslational modifications)
-    @param skip_factor_validation: Disable the validation of factor values in SDRF
-    @param skip_experimental_design_validation: Disable the validation of experimental design
-    @param use_ols_cache_only: Use ols cache for validation of the terms and not OLS internet service
-
-    """
-    # TODO validate expdesign file
-    if is_sdrf:
-        check_sdrf(
-            input_sdrf=exp_design,
-            skip_sdrf_validation=skip_sdrf_validation,
-            skip_ms_validation=skip_ms_validation,
-            skip_factor_validation=skip_factor_validation,
-            skip_experimental_design_validation=skip_experimental_design_validation,
-            use_ols_cache_only=use_ols_cache_only,
-        )
-    else:
-        check_expdesign(exp_design)
+    """Validate an SDRF file for quantms pipelines."""
+    check_sdrf(
+        input_sdrf=exp_design,
+        skip_sdrf_validation=skip_sdrf_validation,
+        use_ols_cache_only=use_ols_cache_only,
+    )
diff --git a/tests/test_commands.py b/tests/test_commands.py
index fae6f66..3d340db 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -107,10 +107,10 @@ def test_dianncfg_example(self):
 class TestSamplesheetCommands:
     """Test class for samplesheet related commands"""
 
-    def test_check_samplesheet_sdrf(self):
-        """Test the validation of an SDRF file"""
+    def test_check_samplesheet_sdrf_skip_validation(self):
+        """Test the SDRF check command with skip_sdrf_validation (smoke test)."""
         args = [
-            "--is_sdrf",
+            "--skip_sdrf_validation",
             "--exp_design",
             str(TEST_DATA_DIR / "PXD000001.sdrf.tsv"),
         ]
@@ -267,6 +267,91 @@ def test_nterm_modification(self):
         assert result == ["0-Acetyl"]
 
 
+class TestDiannUnifiedDesign:
+    """Tests for unified design file format parsing (from convert-diann)"""
+
+    def test_diann2msstats_unified_format(self):
+        """Test DIA-NN to MSstats conversion with the unified design file format."""
+        report_path = (DIANN_TEST_DIR / "diann_report.tsv").resolve()
+        design_path = (DIANN_TEST_DIR / "PXD026600_diann_design.tsv").resolve()
+        assert report_path.exists(), f"Test report missing: {report_path}"
+        assert design_path.exists(), f"Test design missing: {design_path}"
+
+        args = [
+            "--report", str(report_path),
+            "--exp_design", str(design_path),
+            "--qvalue_threshold", "0.01",
+        ]
+        result = run_cli_command("diann2msstats", args)
+        if result.exit_code != 0:
+            raise AssertionError(
+                f"diann2msstats with unified format failed (exit {result.exit_code}). "
+                f"stdout: {result.output!r}, stderr: {result.stderr!r}"
+            )
+
+    def test_unified_format_parsed_correctly(self):
+        """Test that the unified format produces the correct sample/file tables."""
+        from quantmsutils.diann.diann2msstats import get_exp_design_dfs
+
+        design_path = str((DIANN_TEST_DIR / "PXD026600_diann_design.tsv").resolve())
+        s_df, f_table = get_exp_design_dfs(design_path)
+
+        # Sample table has correct columns and 2 unique samples
+        assert "MSstats_Condition" in s_df.columns
+        assert "MSstats_BioReplicate" in s_df.columns
+        assert len(s_df) == 2
+
+        # File table has 4 rows with run names
+        assert "run" in f_table.columns
+        assert "Fraction" in f_table.columns
+        assert "Sample" in f_table.columns
+        assert len(f_table) == 4
+
+        # Run names are file stems without extension
+        runs = f_table["run"].tolist()
+        assert "RD139_Narrow_UPS1_0_1fmol_inj1" in runs
+        assert "RD139_Narrow_UPS1_0_25fmol_inj2" in runs
+
+    def test_legacy_format_still_works(self):
+        """Test that the legacy two-table format is still parsed correctly."""
+        from quantmsutils.diann.diann2msstats import get_exp_design_dfs
+
+        design_path = str((DIANN_TEST_DIR / "PXD026600.sdrf_openms_design.tsv").resolve())
+        s_df, f_table = get_exp_design_dfs(design_path)
+
+        assert "MSstats_Condition" in s_df.columns
+        assert "MSstats_BioReplicate" in s_df.columns
+        assert len(s_df) == 2
+        assert "run" in f_table.columns
+        assert len(f_table) == 4
+
+    def test_unified_format_validates_required_columns(self):
+        """Test that missing required columns in unified format raise ValueError."""
+        from quantmsutils.diann.diann2msstats import get_exp_design_dfs
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            bad_file = os.path.join(tmpdir, "bad_design.tsv")
+            with open(bad_file, "w") as f:
+                # Has Filename+Condition+BioReplicate (triggers unified) but missing Fraction and Sample
+                f.write("Filename\tCondition\tBioReplicate\n")
+                f.write("file1.raw\tA\t1\n")
+            with pytest.raises(ValueError, match="missing required columns"):
+                get_exp_design_dfs(bad_file)
+
+    def test_unified_format_validates_sample_consistency(self):
+        """Test that inconsistent Sample->Condition mapping raises ValueError."""
+        from quantmsutils.diann.diann2msstats import get_exp_design_dfs
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            bad_file = os.path.join(tmpdir, "inconsistent_design.tsv")
+            with open(bad_file, "w") as f:
+                f.write("Filename\tSample\tFraction\tCondition\tBioReplicate\n")
+                f.write("file1.raw\t1\t1\tCondA\t1\n")
+                f.write("file2.raw\t1\t1\tCondB\t2\n")  # Same Sample, different Condition
+            with pytest.raises(ValueError, match="Inconsistent"):
+                get_exp_design_dfs(bad_file)
+
+
 class TestExtractSampleMixture:
     """Test extract_sample with MSstats_Mixture column (covers DataFrame.append fix)"""
 
diff --git a/tests/test_data/diann2msstats/PXD026600_diann_design.tsv b/tests/test_data/diann2msstats/PXD026600_diann_design.tsv
new file mode 100644
index 0000000..0bc8b83
--- /dev/null
+++ b/tests/test_data/diann2msstats/PXD026600_diann_design.tsv
@@ -0,0 +1,5 @@
+Filename	URI	Sample	FractionGroup	Fraction	Label	LabelType	AcquisitionMethod	DissociationMethod	Condition	BioReplicate	Enzyme	FixedModifications	VariableModifications	PrecursorMassTolerance	PrecursorMassToleranceUnit	FragmentMassTolerance	FragmentMassToleranceUnit	MS1MinMz	MS1MaxMz	MS2MinMz	MS2MaxMz
+RD139_Narrow_UPS1_0_1fmol_inj1.raw	https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/quantms-ci-github/MSV000087597/RD139_Narrow_UPS1_0_1fmol_inj1.raw	1	1	1	label free sample	label free	Data-Independent Acquisition	HCD	CT=Mixture;CN=UPS1;QY=0.1 fmol	1	Trypsin	NT=Carbamidomethyl;TA=C;mt=fixed;AC=UNIMOD:4	NT=Oxidation;mt=variable;TA=M;AC=Unimod:35	10	ppm	0.02	Da				
+RD139_Narrow_UPS1_0_1fmol_inj2.raw	https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/quantms-ci-github/MSV000087597/RD139_Narrow_UPS1_0_1fmol_inj2.raw	1	2	1	label free sample	label free	Data-Independent Acquisition	HCD	CT=Mixture;CN=UPS1;QY=0.1 fmol	1	Trypsin	NT=Carbamidomethyl;TA=C;mt=fixed;AC=UNIMOD:4	NT=Oxidation;mt=variable;TA=M;AC=Unimod:35	10	ppm	0.02	Da				
+RD139_Narrow_UPS1_0_25fmol_inj1.raw	https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/quantms-ci-github/MSV000087597/RD139_Narrow_UPS1_0_25fmol_inj1.raw	2	3	1	label free sample	label free	Data-Independent Acquisition	HCD	CT=Mixture;CN=UPS1;QY=0.25 fmol	2	Trypsin	NT=Carbamidomethyl;TA=C;mt=fixed;AC=UNIMOD:4	NT=Oxidation;mt=variable;TA=M;AC=Unimod:35	10	ppm	0.02	Da				
+RD139_Narrow_UPS1_0_25fmol_inj2.raw	https://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/quantms-ci-github/MSV000087597/RD139_Narrow_UPS1_0_25fmol_inj2.raw	2	4	1	label free sample	label free	Data-Independent Acquisition	HCD	CT=Mixture;CN=UPS1;QY=0.25 fmol	2	Trypsin	NT=Carbamidomethyl;TA=C;mt=fixed;AC=UNIMOD:4	NT=Oxidation;mt=variable;TA=M;AC=Unimod:35	10	ppm	0.02	Da				

From 92c92ff9a231c59491ab3956d5ed7fa33b0540cf Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sat, 21 Mar 2026 08:21:00 +0000
Subject: [PATCH 2/5] minor changes

---
 quantmsutils/sdrf/check_samplesheet.py | 32 +++++++++-----------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/quantmsutils/sdrf/check_samplesheet.py b/quantmsutils/sdrf/check_samplesheet.py
index 13ed882..8d876ef 100644
--- a/quantmsutils/sdrf/check_samplesheet.py
+++ b/quantmsutils/sdrf/check_samplesheet.py
@@ -2,33 +2,16 @@
 import sys
 
 import click
+
 from sdrf_pipelines.sdrf.sdrf import read_sdrf
 
 logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 
 
-def make_dir(path):
-    if len(path) > 0:
-        try:
-            os.makedirs(path)
-        except OSError as exception:
-            if exception.errno != errno.EEXIST:
-                raise exception
-
-
-def print_error(error, context="Line", context_str=""):
-    error_str = "ERROR: Please check samplesheet -> {}".format(error)
-    if context != "" and context_str != "":
-        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
-            error, context.strip(), context_str.strip()
-        )
-    print(error_str)
-    sys.exit(1)
-
-
 def check_sdrf(
     input_sdrf: str,
+    template: str = "ms-proteomics",
     skip_sdrf_validation: bool = False,
     use_ols_cache_only: bool = False,
 ):
@@ -36,6 +19,7 @@ def check_sdrf(
     Check the SDRF file for errors. If any errors are found, print them and exit with a non-zero status code.
 
     :param input_sdrf: Path to the SDRF file to check
+    :param template: Schema template to validate against (e.g. 'ms-proteomics', 'dia-acquisition')
     :param skip_sdrf_validation: Skip all SDRF validation
     :param use_ols_cache_only: Use OLS cache instead of live OLS service
     """
@@ -45,7 +29,7 @@ def check_sdrf(
 
     df = read_sdrf(input_sdrf)
     errors = df.validate_sdrf(
-        template="ms-proteomics",
+        template=template,
         use_ols_cache_only=use_ols_cache_only,
     )
 
@@ -55,12 +39,16 @@ def check_sdrf(
     sys.exit(bool(errors))
 
 
-
 @click.command(
     "checksamplesheet",
     short_help="Validate an SDRF file for quantms pipelines.",
 )
 @click.option("--exp_design", help="SDRF file to be validated", required=True)
+@click.option(
+    "--template", "-t",
+    help="Schema template to validate against (e.g. ms-proteomics, dia-acquisition)",
+    default="ms-proteomics",
+)
 @click.option("--skip_sdrf_validation", help="Skip all SDRF validation", is_flag=True)
 @click.option(
     "--use_ols_cache_only",
@@ -69,12 +57,14 @@ def check_sdrf(
 )
 def checksamplesheet(
     exp_design: str,
+    template: str = "ms-proteomics",
     skip_sdrf_validation: bool = False,
     use_ols_cache_only: bool = False,
 ):
     """Validate an SDRF file for quantms pipelines."""
     check_sdrf(
         input_sdrf=exp_design,
+        template=template,
         skip_sdrf_validation=skip_sdrf_validation,
         use_ols_cache_only=use_ols_cache_only,
     )

From fdc415b79f186385acc51d8c7027fd90ca5f07a1 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sat, 21 Mar 2026 08:46:08 +0000
Subject: [PATCH 3/5] minor changes remove setuptools

---
 quantmsutils/sdrf/check_samplesheet.py | 97 +++++++++++++++++++++-----
 recipe/meta.yaml                       |  9 +--
 tests/test_commands.py                 | 39 ++++++++++-
 3 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/quantmsutils/sdrf/check_samplesheet.py b/quantmsutils/sdrf/check_samplesheet.py
index 8d876ef..01af482 100644
--- a/quantmsutils/sdrf/check_samplesheet.py
+++ b/quantmsutils/sdrf/check_samplesheet.py
@@ -2,36 +2,63 @@
 import sys
 
 import click
+import pandas as pd
 
 from sdrf_pipelines.sdrf.sdrf import read_sdrf
 
 logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 
+# Minimal columns required to run quantms/quantmsdiann pipelines.
+# These are checked in --minimal mode instead of full schema validation.
+MINIMAL_REQUIRED_COLUMNS = [
+    "source name",
+    "assay name",
+    "comment[data file]",
+    "comment[label]",
+    "comment[cleavage agent details]",
+    "comment[instrument]",
+    "comment[proteomics data acquisition method]",
+    "technology type",
+]
+
+# Columns with at least one modification parameters column (pattern match)
+MINIMAL_PATTERN_COLUMNS = [
+    "comment[modification parameters",  # prefix match — multiple columns allowed
+]
+
+# Recommended columns: warn if missing but don't fail
+MINIMAL_RECOMMENDED_COLUMNS = [
+    "comment[precursor mass tolerance]",
+    "comment[fragment mass tolerance]",
+    "comment[dissociation method]",
+    "comment[technical replicate]",
+    "comment[fraction identifier]",
+]
+
 
 def check_sdrf(
     input_sdrf: str,
     template: str = "ms-proteomics",
-    skip_sdrf_validation: bool = False,
+    minimal: bool = False,
     use_ols_cache_only: bool = False,
 ):
     """
-    Check the SDRF file for errors. If any errors are found, print them and exit with a non-zero status code.
+    Check the SDRF file for errors.
 
     :param input_sdrf: Path to the SDRF file to check
-    :param template: Schema template to validate against (e.g. 'ms-proteomics', 'dia-acquisition')
-    :param skip_sdrf_validation: Skip all SDRF validation
+    :param template: Schema template for full validation (e.g. 'ms-proteomics', 'dia-acquisition')
+    :param minimal: Only validate columns required to run the pipeline (skip organism, etc.)
     :param use_ols_cache_only: Use OLS cache instead of live OLS service
     """
-    if skip_sdrf_validation:
-        print("No SDRF validation was performed.")
-        sys.exit(0)
-
-    df = read_sdrf(input_sdrf)
-    errors = df.validate_sdrf(
-        template=template,
-        use_ols_cache_only=use_ols_cache_only,
-    )
+    if minimal:
+        errors = _validate_minimal(input_sdrf)
+    else:
+        df = read_sdrf(input_sdrf)
+        errors = df.validate_sdrf(
+            template=template,
+            use_ols_cache_only=use_ols_cache_only,
+        )
 
     for error in errors:
         print(error)
@@ -39,6 +66,38 @@ def check_sdrf(
     sys.exit(bool(errors))
 
 
+def _validate_minimal(input_sdrf: str) -> list[str]:
+    """Validate only the columns required to run the pipeline.
+
+    Returns a list of error strings. Only missing required columns
+    produce errors; missing recommended columns produce warnings (non-blocking).
+    """
+    df = pd.read_csv(input_sdrf, sep="\t", nrows=0)
+    columns_lower = [c.lower() for c in df.columns]
+    errors = []
+
+    # Check required columns (case-insensitive)
+    for col in MINIMAL_REQUIRED_COLUMNS:
+        if col.lower() not in columns_lower:
+            errors.append(f"ERROR: Required column '{col}' is missing from the SDRF file.")
+
+    # Check at least one modification parameters column exists
+    has_mod_col = any(c.startswith("comment[modification parameters") for c in columns_lower)
+    if not has_mod_col:
+        errors.append(
+            "ERROR: At least one 'comment[modification parameters]' column is required."
+        )
+
+    # Warn about recommended columns (non-blocking)
+    for col in MINIMAL_RECOMMENDED_COLUMNS:
+        if col.lower() not in columns_lower:
+            logger.warning(
+                f"Recommended column '{col}' is missing. Pipeline will use default parameters."
+            )
+
+    return errors
+
+
 @click.command(
     "checksamplesheet",
     short_help="Validate an SDRF file for quantms pipelines.",
@@ -46,10 +105,14 @@ def check_sdrf(
 @click.option("--exp_design", help="SDRF file to be validated", required=True)
 @click.option(
     "--template", "-t",
-    help="Schema template to validate against (e.g. ms-proteomics, dia-acquisition)",
+    help="Schema template for full validation (e.g. ms-proteomics, dia-acquisition)",
     default="ms-proteomics",
 )
-@click.option("--skip_sdrf_validation", help="Skip all SDRF validation", is_flag=True)
+@click.option(
+    "--minimal",
+    help="Only validate columns required to run the pipeline (skip organism, metadata, etc.)",
+    is_flag=True,
+)
 @click.option(
     "--use_ols_cache_only",
     help="Use OLS cache for ontology validation instead of the live OLS service",
@@ -58,13 +121,13 @@ def check_sdrf(
 def checksamplesheet(
     exp_design: str,
     template: str = "ms-proteomics",
-    skip_sdrf_validation: bool = False,
+    minimal: bool = False,
     use_ols_cache_only: bool = False,
 ):
     """Validate an SDRF file for quantms pipelines."""
     check_sdrf(
         input_sdrf=exp_design,
         template=template,
-        skip_sdrf_validation=skip_sdrf_validation,
+        minimal=minimal,
         use_ols_cache_only=use_ols_cache_only,
     )
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index d31b5be..b318cac 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,7 +1,7 @@
 # recipe/meta.yaml
 package:
   name: quantms-utils
-  version: "0.0.25"
+  version: "0.0.26"
 
 source:
   path: ../
@@ -20,19 +20,16 @@ requirements:
     - python
     - pip
     - poetry-core >=1.2.0
-    - setuptools <78
+
   run:
     - python >=3.9,<3.13
     - click
-    - setuptools <78
-    - sdrf-pipelines >=0.0.33,<0.1.0
+    - sdrf-pipelines >=0.1.1
     - pyopenms>=3.3.0
     - pandas
     - pyarrow>=16.1.0
     - scipy
 test:
-  requires:
-    - setuptools <78
   imports:
     - quantmsutils
   commands:
diff --git a/tests/test_commands.py b/tests/test_commands.py
index 3d340db..0d35f54 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -107,16 +107,49 @@ def test_dianncfg_example(self):
 class TestSamplesheetCommands:
     """Test class for samplesheet related commands"""
 
-    def test_check_samplesheet_sdrf_skip_validation(self):
-        """Test the SDRF check command with skip_sdrf_validation (smoke test)."""
+    def test_check_samplesheet_minimal_pxd000001(self):
+        """Test minimal validation on PXD000001 (legacy SDRF without acquisition method)."""
+        # PXD000001 is a TMT dataset without comment[proteomics data acquisition method]
+        # Minimal validation should flag it as missing a required column
         args = [
-            "--skip_sdrf_validation",
+            "--minimal",
             "--exp_design",
             str(TEST_DATA_DIR / "PXD000001.sdrf.tsv"),
         ]
         result = run_cli_command("checksamplesheet", args)
+        assert result.exit_code != 0
+        assert "proteomics data acquisition method" in result.output.lower()
+
+    def test_check_samplesheet_minimal_valid(self):
+        """Test minimal validation passes for a valid SDRF with all required columns."""
+        import tempfile
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".sdrf.tsv", delete=False) as f:
+            f.write("source name\tassay name\tcomment[data file]\tcomment[label]\t"
+                    "comment[instrument]\tcomment[proteomics data acquisition method]\t"
+                    "technology type\tcomment[cleavage agent details]\t"
+                    "comment[modification parameters]\n")
+            f.write("S1\trun1\tfile1.raw\tlabel free sample\tOrbitrap\t"
+                    "Data-Independent Acquisition\tMS\tTrypsin\tOxidation\n")
+            tmp_path = f.name
+        args = ["--minimal", "--exp_design", tmp_path]
+        result = run_cli_command("checksamplesheet", args)
         assert result.exit_code == 0
 
+    def test_check_samplesheet_minimal_missing_column(self):
+        """Test minimal validation fails when a required column is missing."""
+        import tempfile
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".sdrf.tsv", delete=False) as f:
+            # Missing comment[cleavage agent details]
+            f.write("source name\tassay name\tcomment[data file]\tcomment[label]\t"
+                    "comment[instrument]\tcomment[proteomics data acquisition method]\t"
+                    "technology type\tcomment[modification parameters]\n")
+            f.write("S1\trun1\tfile1.raw\tlabel free sample\tOrbitrap\tDIA\tMS\tOxidation\n")
+            f.name
+        args = ["--minimal", "--exp_design", f.name]
+        result = run_cli_command("checksamplesheet", args)
+        assert result.exit_code != 0
+        assert "cleavage agent" in result.output.lower()
+
     def test_extract_sample_from_expdesign(self):
         """Test extracting sample information from experiment design"""
         args = ["--expdesign", str(TEST_DATA_DIR / "BSA_design_urls.tsv")]

From 11ccc4c5fb6850657c2ca898eea07f4d741de5d5 Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sat, 21 Mar 2026 09:58:03 +0000
Subject: [PATCH 4/5] minor changes

---
 tests/test_commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_commands.py b/tests/test_commands.py
index 0d35f54..021d5a1 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -144,8 +144,8 @@ def test_check_samplesheet_minimal_missing_column(self):
                     "comment[instrument]\tcomment[proteomics data acquisition method]\t"
                     "technology type\tcomment[modification parameters]\n")
             f.write("S1\trun1\tfile1.raw\tlabel free sample\tOrbitrap\tDIA\tMS\tOxidation\n")
-            f.name
-        args = ["--minimal", "--exp_design", f.name]
+            tmp_path = f.name
+        args = ["--minimal", "--exp_design", tmp_path]
         result = run_cli_command("checksamplesheet", args)
         assert result.exit_code != 0
         assert "cleavage agent" in result.output.lower()

From 45f3cffd466c56f032c91bd34f691ff25880921f Mon Sep 17 00:00:00 2001
From: Yasset Perez-Riverol <ypriverol@gmail.com>
Date: Sat, 21 Mar 2026 10:51:55 +0000
Subject: [PATCH 5/5] minor changes

---
 quantmsutils/sdrf/check_samplesheet.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/quantmsutils/sdrf/check_samplesheet.py b/quantmsutils/sdrf/check_samplesheet.py
index 01af482..8754ea3 100644
--- a/quantmsutils/sdrf/check_samplesheet.py
+++ b/quantmsutils/sdrf/check_samplesheet.py
@@ -22,11 +22,6 @@
     "technology type",
 ]
 
-# Columns with at least one modification parameters column (pattern match)
-MINIMAL_PATTERN_COLUMNS = [
-    "comment[modification parameters",  # prefix match — multiple columns allowed
-]
-
 # Recommended columns: warn if missing but don't fail
 MINIMAL_RECOMMENDED_COLUMNS = [
     "comment[precursor mass tolerance]",
@@ -72,10 +67,16 @@ def _validate_minimal(input_sdrf: str) -> list[str]:
     Returns a list of error strings. Only missing required columns
     produce errors; missing recommended columns produce warnings (non-blocking).
     """
-    df = pd.read_csv(input_sdrf, sep="\t", nrows=0)
-    columns_lower = [c.lower() for c in df.columns]
+    df_header = pd.read_csv(input_sdrf, sep="\t", nrows=0)
+    columns_lower = [c.lower() for c in df_header.columns]
     errors = []
 
+    # Reject header-only files
+    df_rows = pd.read_csv(input_sdrf, sep="\t", nrows=1)
+    if len(df_rows) == 0:
+        errors.append("ERROR: SDRF file contains a header but no data rows.")
+        return errors
+
     # Check required columns (case-insensitive)
     for col in MINIMAL_REQUIRED_COLUMNS:
         if col.lower() not in columns_lower: