Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/modules/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ process {
}

// Result tables from multiple pipelines including LFQ, TMT, DIA, DDA
withName: '.*:PROTEOMICSLFQ|PROTEIN_QUANTIFIER|MSSTATS_CONVERTER|FINAL_QUANTIFICATION|CONVERT_RESULTS' {
withName: '.*:PROTEOMICSLFQ|PROTEIN_QUANTIFIER|MSSTATS_CONVERTER|FINAL_QUANTIFICATION|CONVERT_RESULTS|ISOBARIC_WORKFLOW' {
publishDir = [
path: { "${params.outdir}/quant_tables" },
mode: 'copy',
Expand Down
87 changes: 87 additions & 0 deletions modules/local/openms/isobaric_workflow/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
process ISOBARIC_WORKFLOW {
tag "${expdes.baseName}"
label 'process_high'
label 'openms'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' :
'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }"

input:
val(labelling_type)
path(mzmls)
path(id_files)
path(expdes)

output:
path "${expdes.baseName}_openms.mzTab", emit: out_mztab
path "${expdes.baseName}_openms.consensusXML", emit: out_consensusXML
path "*.log", emit: log
path "versions.yml", emit: versions

script:
def args = task.ext.args ?: ''
def extractBaseName = { filename ->
def name = filename.toString()
name = name.replaceAll(/\.mzML$/, '')

if (name.endsWith('.idXML')) {
name = name.replaceAll(/\.idXML$/, '')
name = name.replaceAll(/_(comet|msgf|sage|consensus)(_perc)?(_filter)?(_fdr)?$/, '')
}
return name
}

def mzml_sorted = mzmls.collect().sort{ a, b ->
extractBaseName(a.name) <=> extractBaseName(b.name)
}
def id_sorted = id_files.collect().sort{ a, b ->
extractBaseName(a.name) <=> extractBaseName(b.name)
}
Comment on lines +24 to +40
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Fail fast if the mzML/idXML pairing drifts.

Upstream these files are already matched by sample, but this process rebuilds that pairing by sorting both lists independently and then passing them positionally to -in / -in_id. If a filename falls outside the current normalization regex, the command still runs but can quantitate against the wrong ID file.

🛠️ Suggested guard
     def mzml_sorted = mzmls.collect().sort{ a, b ->
         extractBaseName(a.name) <=> extractBaseName(b.name)
     }
     def id_sorted = id_files.collect().sort{ a, b ->
         extractBaseName(a.name) <=> extractBaseName(b.name)
     }
+    def mzml_names = mzml_sorted.collect { extractBaseName(it.name) }
+    def id_names   = id_sorted.collect { extractBaseName(it.name) }
+    if (mzml_names != id_names) {
+        throw new IllegalArgumentException("Mismatched IsobaricWorkflow inputs: mzML=${mzml_names} idXML=${id_names}")
+    }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@modules/local/openms/isobaric_workflow/main.nf` around lines 23 - 39, The
code rebuilds pairings by independently sorting mzmls and id_files into
mzml_sorted and id_sorted using extractBaseName, which can silently mispair
files if normalization misses a name; after computing mzml_sorted and id_sorted,
add a fast-fail check: ensure sizes match and then iterate index-wise comparing
extractBaseName(mzml_sorted[i].name) to extractBaseName(id_sorted[i].name), and
if any mismatch occurs throw an exception (or call error/exit) with a clear
message listing the offending pair(s) and their original names so the pipeline
fails immediately rather than proceeding with wrong ID/mzML pairing.


// Build isotope correction matrix argument if enabled
def isotope_correction = ""
if (params.isotope_correction && params.plex_corr_matrix_file != null) {
def matrix_lines = new File(params.plex_corr_matrix_file).readLines()
.findAll { !it.startsWith('#') && it.trim() }
.drop(1)
.collect { line ->
def values = line.split('/')
if (labelling_type == 'tmt18plex' || labelling_type == 'tmt16plex') {
return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}/${values[5]}/${values[6]}/${values[7]}/${values[8]}\""
} else {
return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}\""
}
}
def correction_matrix = matrix_lines.join(" ")
isotope_correction = "-quantification:isotope_correction true -${labelling_type}:correction_matrix ${correction_matrix}"
}

"""
IsobaricWorkflow \\
-threads ${task.cpus} \\
-in ${mzml_sorted.join(' ')} \\
-in_id ${id_sorted.join(' ')} \\
-exp_design ${expdes} \\
-type ${labelling_type} \\
-inference_method ${params.protein_inference_method} \\
-protein_quantification ${params.protein_quant} \\
-psmFDR ${params.psm_level_fdr_cutoff} \\
-proteinFDR ${params.protein_level_fdr_cutoff} \\
-picked_fdr ${params.picked_fdr} \\
-picked_decoy_string ${params.decoy_string} \\
Comment on lines +71 to +72
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Propagate the decoy-position setting in this new path.

The pipeline already exposes params.decoy_string_position, but this command only forwards the decoy string. Any non-default suffix-decoy setup is ignored here, so the isobaric workflow no longer honors the full picked-FDR decoy configuration.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@modules/local/openms/isobaric_workflow/main.nf` around lines 52 - 53, The
command only forwards -picked_fdr and -picked_decoy_string but omits the
decoy-position option, so non-default suffix/prefix decoy setups are ignored;
update the invocation that sets -picked_fdr ${params.picked_fdr} and
-picked_decoy_string ${params.decoy_string} to also pass the decoy position
parameter (e.g., add the corresponding flag with
${params.decoy_string_position}) so the pipeline honors
params.decoy_string_position when running the picked-FDR step.

-extraction:min_precursor_purity ${params.min_precursor_purity} \\
-extraction:precursor_isotope_deviation ${params.precursor_isotope_deviation} \\
-extraction:min_reporter_intensity ${params.min_reporter_intensity} \\
${isotope_correction} \\
-out ${expdes.baseName}_openms.consensusXML \\
-out_mzTab ${expdes.baseName}_openms.mzTab \\
$args \\
2>&1 | tee isobaricworkflow.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
IsobaricWorkflow: \$(IsobaricWorkflow 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
END_VERSIONS
"""
}
42 changes: 42 additions & 0 deletions modules/local/openms/isobaric_workflow/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: isobaric_workflow
description: Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment.
keywords:
- OpenMS
- quantification
tools:
- IsobaricWorkflow:
description: |
Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment.
homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html
documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html
- mzmls:
type: file
description: Input Spectra in mzML format
pattern: "*.mzML"
- id_files:
type: file
description: Identifications in idXML or mzIdentML format with posterior error probabilities as score type.
pattern: "*.idXML"
- expdes:
type: file
description: An experimental design file
pattern: "*.tsv"
output:
- out_mztab:
type: file
description: mzTab file with analysis results
pattern: "*.mzTab"
- out_consensusXML:
type: file
description: ConsensusXML file for visualization and further processing in OpenMS.
pattern: "*.consensusXML"
- log:
type: file
description: log file
pattern: "*.log"
- version:
type: file
description: File containing software version
pattern: "versions.yml"
authors:
- "@MaLLLiYA"
48 changes: 23 additions & 25 deletions workflows/tmt.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
//
// MODULES: Local to the pipeline
//
include { FILE_MERGE } from '../modules/local/openms/file_merge/main'
include { MSSTATS_TMT } from '../modules/local/msstats/msstats_tmt/main'
include { ISOBARIC_WORKFLOW } from '../modules/local/openms/isobaric_workflow/main'
include { MSSTATS_CONVERTER } from '../modules/local/openms/msstats_converter/main'

//
// SUBWORKFLOWS: Consisting of a mix of local and nf-core/modules
//
include { FEATURE_MAPPER } from '../subworkflows/local/feature_mapper/main'
include { PROTEIN_INFERENCE } from '../subworkflows/local/protein_inference/main'
include { PROTEIN_QUANT } from '../subworkflows/local/protein_quant/main'
include { ID } from '../subworkflows/local/id/main'

/*
Expand All @@ -41,35 +39,35 @@ workflow TMT {
ch_software_versions = ch_software_versions.mix(ID.out.versions)

//
// SUBWORKFLOW: FEATUREMAPPER
// SUBWORKFLOW: ISOBARIC_WORKFLOW
//
FEATURE_MAPPER(ch_file_preparation_results, ID.out.id_results)
ch_software_versions = ch_software_versions.mix(FEATURE_MAPPER.out.versions)
// Extract labelling_type from meta (auto-detected from SDRF)
ch_file_preparation_results.join(ID.out.id_results)
.multiMap { it ->
labelling_type: it[0].labelling_type
mzmls: it[1]
ids: it[2]
}
.set{ ch_iso_workflow }
ISOBARIC_WORKFLOW(ch_iso_workflow.labelling_type.first(),
ch_iso_workflow.mzmls.collect(),
ch_iso_workflow.ids.collect(),
ch_expdesign
)
ch_software_versions = ch_software_versions.mix(ISOBARIC_WORKFLOW.out.versions)

//
// MODULE: FILEMERGE
// MODULE: MSSTATS_CONVERTER
//
FILE_MERGE(FEATURE_MAPPER.out.id_map.collect())
ch_software_versions = ch_software_versions.mix(FILE_MERGE.out.versions)

//
// SUBWORKFLOW: PROTEININFERENCE
//
PROTEIN_INFERENCE(FILE_MERGE.out.id_merge)
ch_software_versions = ch_software_versions.mix(PROTEIN_INFERENCE.out.versions)

//
// SUBWORKFLOW: PROTEINQUANT
//
PROTEIN_QUANT(PROTEIN_INFERENCE.out.epi_idfilter, ch_expdesign)
ch_software_versions = ch_software_versions.mix(PROTEIN_QUANT.out.versions)
MSSTATS_CONVERTER(ISOBARIC_WORKFLOW.out.out_consensusXML, ch_expdesign, "ISO")
ch_software_versions = ch_software_versions.mix(MSSTATS_CONVERTER.out.versions)

//
// MODULE: MSSTATSTMT
//
ch_msstats_out = Channel.empty()
if(!params.skip_post_msstats){
MSSTATS_TMT(PROTEIN_QUANT.out.msstats_csv)
MSSTATS_TMT(MSSTATS_CONVERTER.out.out_msstats)
ch_msstats_out = MSSTATS_TMT.out.msstats_csv
ch_software_versions = ch_software_versions.mix(MSSTATS_TMT.out.versions)
}
Expand All @@ -85,8 +83,8 @@ workflow TMT {
emit:
ch_pmultiqc_ids = ch_pmultiqc_ids
ch_pmultiqc_consensus = ch_pmultiqc_consensus
final_result = PROTEIN_QUANT.out.out_mztab
msstats_in = PROTEIN_QUANT.out.msstats_csv
final_result = ISOBARIC_WORKFLOW.out.out_mztab
msstats_in = MSSTATS_CONVERTER.out.out_msstats
msstats_out = ch_msstats_out
versions = ch_software_versions
}
Loading