Skip to content

Commit 700025b

Browse files
committed
Apply suggestions from code review
1 parent fb1f2a6 commit 700025b

1 file changed

Lines changed: 57 additions & 9 deletions

File tree

src/topp/IsobaricWorkflow.cpp

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <OpenMS/FORMAT/ExperimentalDesignFile.h>
3232
#include <OpenMS/FORMAT/FileHandler.h>
3333
#include <OpenMS/FORMAT/MzMLFile.h>
34+
#include <OpenMS/FORMAT/MzTabFile.h>
3435
#include <OpenMS/KERNEL/MSExperiment.h>
3536
#include <OpenMS/PROCESSING/ID/IDFilter.h>
3637

@@ -190,18 +191,27 @@ class TOPPIsobaricWorkflow :
190191
setValidFormats_("in_id", {"idXML"});
191192
registerInputFile_("exp_design", "<file>", "", "experimental design file (optional). If not given, the design is assumed to be unfractionated.", false);
192193
setValidFormats_("exp_design", {"tsv"});
193-
registerOutputFile_("out", "<file>", "", "output consensusXML or mzTab file with quantitative information");
194-
setValidFormats_("out", {"consensusXML","mzTab"});
194+
registerOutputFile_("out", "<file>", "", "output consensusXML file");
195+
setValidFormats_("out", {"consensusXML"});
196+
registerOutputFile_("out_mzTab", "<file>", "", "output mzTab file with quantitative information");
197+
setValidFormats_("out_mzTab", {"mzTab"});
195198
registerFlag_("calculate_id_purity", "Calculate the purity of the precursor ion based on the MS1 spectrum. Only used for MS3, otherwise it is the same as the quant. precursor purity.");
196199
registerIntOption_("max_parallel_files", "<num>", 1, "Maximum number of files to load in parallel.", false);
197200
registerFlag_("protein_inference", "Infer and group proteins");
198201
registerFlag_("protein_quant", "Quantify proteins from the peptide quantification. Implies protein inference.");
199-
registerDoubleOption_("peptide_score", "<score>", NAN, "The score which should be reached by a peptide hit to be kept. (use 'NAN' to disable this filter)", false);
202+
registerDoubleOption_("psm_score", "<score>", NAN, "The score which should be reached by a peptide hit to be kept. (use 'NAN' to disable this filter)", false);
200203
registerDoubleOption_("protein_score", "<score>", NAN, "The score which should be reached by a protein hit to be kept. All proteins are filtered based on their singleton scores irrespective of grouping. Use in combination with 'delete_unreferenced_peptide_hits' to remove affected peptides. (use 'NAN' to disable this filter)", false);
201204
registerFlag_("delete_unreferenced_peptide_hits", "Peptides not referenced by any protein are deleted in the IDs.");
202205
// registerFlag_("remove_decoys", "Remove decoys according to the information in the user parameters.");
203206
registerStringOption_("inference_method", "<option>", "aggregation", "Methods used for protein inference", false);
204207
setValidStrings_("inference_method", ListUtils::create<String>("aggregation,bayesian"));
208+
registerStringOption_("picked_fdr", "<option>", "false", "Use a picked protein FDR", false, true);
209+
setValidStrings_("picked_fdr", {"true", "false"});
210+
registerStringOption_("picked_decoy_string", "<decoy_string>", "", "If using picked protein FDRs, which decoy string was used? Leave blank for auto-detection.", false, true);
211+
registerStringOption_("picked_decoy_prefix", "<option>", "prefix", "If using picked protein FDRs, was the decoy string a prefix or suffix? Ignored during auto-detection.", false, true);
212+
setValidStrings_("picked_decoy_prefix", {"prefix", "suffix"});
213+
registerStringOption_("FDR_type", "<option>", "PSM", "Sub-protein FDR level. PSM, PSM+peptide (best PSM q-value).", false, true);
214+
setValidStrings_("FDR_type", {"PSM", "PSM+peptide"});
205215
registerDoubleOption_("proteinFDR", "<threshold>", 1.0, "Protein FDR threshold (0.05=5%).", false);
206216
setMinFloat_("proteinFDR", 0.0);
207217
setMaxFloat_("proteinFDR", 1.0);
@@ -521,18 +531,18 @@ class TOPPIsobaricWorkflow :
521531
FileHandler().loadIdentifications(id_file, prot_ids, pep_ids);
522532
// TODO filter by qvalue here?
523533
double pro_score = getDoubleOption_("protein_score");
524-
double pep_score = getDoubleOption_("peptide_score");
534+
double psm_score = getDoubleOption_("psm_score");
525535

526536
if (!std::isnan(pro_score))
527537
{
528538
OPENMS_LOG_INFO << "Filtering by protein score (better than " << pro_score << ")..." << endl;
529539
IDFilter::filterHitsByScore(prot_ids, pro_score);
530540
}
531541

532-
if (!std::isnan(pep_score))
542+
if (! std::isnan(psm_score))
533543
{
534-
OPENMS_LOG_INFO << "Filtering by peptide score (better than " << pep_score << ")..." << endl;
535-
IDFilter::filterHitsByScore(pep_ids, pep_score);
544+
OPENMS_LOG_INFO << "Filtering by PSM score (better than " << psm_score << ")..." << endl;
545+
IDFilter::filterHitsByScore(pep_ids, psm_score);
536546
}
537547

538548
merger.insertRuns(std::move(prot_ids), {}); // pep IDs will be stored in the consensus features
@@ -667,6 +677,8 @@ class TOPPIsobaricWorkflow :
667677
if (!bayesian) {
668678
BasicProteinInferenceAlgorithm prot_inference;
669679
Param bpi_param = getParam_().copy("BasicProteinInference:", true);
680+
bpi_param.setValue("annotate_indistinguishable_groups", groups ? "true" : "false");
681+
bpi_param.setValue("greedy_group_resolution", greedy_group_resolution ? "true" : "false");
670682
writeDebug_("Parameters passed to BasicProteinInference algorithm", bpi_param, 3);
671683
prot_inference.setParameters(bpi_param);
672684
prot_inference.run(cmap, cmap.getProteinIdentifications()[0], false);
@@ -684,8 +696,25 @@ class TOPPIsobaricWorkflow :
684696

685697
FalseDiscoveryRate fdr;
686698
auto& proteins = cmap.getProteinIdentifications()[0];
687-
fdr.applyBasic(proteins);
688-
fdr.applyBasic(cmap, true);
699+
700+
if (getStringOption_("picked_fdr") == "true")
701+
{
702+
fdr.applyPickedProteinFDR(proteins, getStringOption_("picked_decoy_string"), getStringOption_("picked_decoy_prefix") == "prefix");
703+
}
704+
else
705+
{
706+
fdr.applyBasic(proteins);
707+
}
708+
709+
if (getStringOption_("FDR_type") == "PSM+peptide")
710+
{
711+
fdr.applyBasicPeptideLevel(cmap, false);
712+
}
713+
else
714+
{
715+
fdr.applyBasic(cmap, false);
716+
}
717+
689718

690719
bool rm_pep = getFlag_("delete_unreferenced_peptide_hits");
691720
if (rm_pep)
@@ -779,6 +808,25 @@ class TOPPIsobaricWorkflow :
779808

780809
// TODO also allow storing mzTab and even better, parquet
781810
FileHandler().storeConsensusFeatures(out, cmap);
811+
812+
String out_mzTab = getStringOption_("out_mzTab");
813+
if (! out_mzTab.empty())
814+
{
815+
const bool report_unidentified_features(false);
816+
const bool report_unmapped(true);
817+
const bool report_subfeatures(false);
818+
const bool report_unidentified_spectra(false);
819+
const bool report_not_only_best_psm_per_spectrum(false);
820+
821+
MzTabFile().store(out_mzTab,
822+
cmap,
823+
false,
824+
report_unidentified_features,
825+
report_unmapped,
826+
report_subfeatures,
827+
report_unidentified_spectra,
828+
report_not_only_best_psm_per_spectrum);
829+
}
782830

783831
return EXECUTION_OK;
784832
}

0 commit comments

Comments
 (0)