-
Notifications
You must be signed in to change notification settings - Fork 54
Major refactoring in the Isobaric workflow enabling to work similar to proteomicsLFQ #692
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
66080bb
3765937
3ec6c72
70c60e4
f369833
2a33101
d560a37
e6c1c41
881a69a
e7d22b1
fc363bb
31bc8a7
6f76db5
69d5c61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| process ISOBARIC_WORKFLOW { | ||
| tag "${expdes.baseName}" | ||
| label 'process_high' | ||
| label 'openms' | ||
|
|
||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
| 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : | ||
| 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" | ||
|
|
||
| input: | ||
| val(labelling_type) | ||
| path(mzmls) | ||
| path(id_files) | ||
| path(expdes) | ||
|
|
||
| output: | ||
| path "${expdes.baseName}_openms.mzTab", emit: out_mztab | ||
| path "${expdes.baseName}_openms.consensusXML", emit: out_consensusXML | ||
| path "*.log", emit: log | ||
| path "versions.yml", emit: versions | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| def extractBaseName = { filename -> | ||
| def name = filename.toString() | ||
| name = name.replaceAll(/\.mzML$/, '') | ||
|
|
||
| if (name.endsWith('.idXML')) { | ||
| name = name.replaceAll(/\.idXML$/, '') | ||
| name = name.replaceAll(/_(comet|msgf|sage|consensus)(_perc)?(_filter)?(_fdr)?$/, '') | ||
| } | ||
| return name | ||
| } | ||
|
|
||
| def mzml_sorted = mzmls.collect().sort{ a, b -> | ||
| extractBaseName(a.name) <=> extractBaseName(b.name) | ||
| } | ||
| def id_sorted = id_files.collect().sort{ a, b -> | ||
| extractBaseName(a.name) <=> extractBaseName(b.name) | ||
| } | ||
|
|
||
| // Build isotope correction matrix argument if enabled | ||
| def isotope_correction = "" | ||
| if (params.isotope_correction && params.plex_corr_matrix_file != null) { | ||
| def matrix_lines = new File(params.plex_corr_matrix_file).readLines() | ||
| .findAll { !it.startsWith('#') && it.trim() } | ||
| .drop(1) | ||
| .collect { line -> | ||
| def values = line.split('/') | ||
| if (labelling_type == 'tmt18plex' || labelling_type == 'tmt16plex') { | ||
| return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}/${values[5]}/${values[6]}/${values[7]}/${values[8]}\"" | ||
| } else { | ||
| return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}\"" | ||
| } | ||
| } | ||
| def correction_matrix = matrix_lines.join(" ") | ||
| isotope_correction = "-quantification:isotope_correction true -${labelling_type}:correction_matrix ${correction_matrix}" | ||
| } | ||
|
|
||
| """ | ||
| IsobaricWorkflow \\ | ||
| -threads ${task.cpus} \\ | ||
| -in ${mzml_sorted.join(' ')} \\ | ||
| -in_id ${id_sorted.join(' ')} \\ | ||
| -exp_design ${expdes} \\ | ||
| -type ${labelling_type} \\ | ||
| -inference_method ${params.protein_inference_method} \\ | ||
| -protein_quantification ${params.protein_quant} \\ | ||
| -psmFDR ${params.psm_level_fdr_cutoff} \\ | ||
| -proteinFDR ${params.protein_level_fdr_cutoff} \\ | ||
| -picked_fdr ${params.picked_fdr} \\ | ||
| -picked_decoy_string ${params.decoy_string} \\ | ||
|
Comment on lines
+71
to
+72
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Propagate the decoy-position setting in this new path. The pipeline already exposes 🤖 Prompt for AI Agents |
||
| -extraction:min_precursor_purity ${params.min_precursor_purity} \\ | ||
| -extraction:precursor_isotope_deviation ${params.precursor_isotope_deviation} \\ | ||
| -extraction:min_reporter_intensity ${params.min_reporter_intensity} \\ | ||
| ${isotope_correction} \\ | ||
| -out ${expdes.baseName}_openms.consensusXML \\ | ||
| -out_mzTab ${expdes.baseName}_openms.mzTab \\ | ||
| $args \\ | ||
| 2>&1 | tee isobaricworkflow.log | ||
|
|
||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| IsobaricWorkflow: \$(IsobaricWorkflow 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) | ||
| END_VERSIONS | ||
| """ | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| name: isobaric_workflow | ||
| description: Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. | ||
| keywords: | ||
| - OpenMS | ||
| - quantification | ||
| tools: | ||
| - IsobaricWorkflow: | ||
| description: | | ||
| Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. | ||
| homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html | ||
| documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html | ||
| - mzmls: | ||
| type: file | ||
| description: Input Spectra in mzML format | ||
| pattern: "*.mzML" | ||
| - id_files: | ||
| type: file | ||
| description: Identifications in idXML or mzIdentML format with posterior error probabilities as score type. | ||
| pattern: "*.idXML" | ||
| - expdes: | ||
| type: file | ||
| description: An experimental design file | ||
| pattern: "*.tsv" | ||
| output: | ||
| - out_mztab: | ||
| type: file | ||
| description: mzTab file with analysis results | ||
| pattern: "*.mzTab" | ||
| - out_consensusXML: | ||
| type: file | ||
| description: ConsensusXML file for visualization and further processing in OpenMS. | ||
| pattern: "*.consensusXML" | ||
| - log: | ||
| type: file | ||
| description: log file | ||
| pattern: "*.log" | ||
| - version: | ||
| type: file | ||
| description: File containing software version | ||
| pattern: "versions.yml" | ||
| authors: | ||
| - "@MaLLLiYA" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fail fast if the mzML/idXML pairing drifts.
Upstream these files are already matched by sample, but this process rebuilds that pairing by sorting both lists independently and then passing them positionally to
-in/-in_id. If a filename falls outside the current normalization regex, the command still runs but can quantitate against the wrong ID file.🛠️ Suggested guard
def mzml_sorted = mzmls.collect().sort{ a, b -> extractBaseName(a.name) <=> extractBaseName(b.name) } def id_sorted = id_files.collect().sort{ a, b -> extractBaseName(a.name) <=> extractBaseName(b.name) } + def mzml_names = mzml_sorted.collect { extractBaseName(it.name) } + def id_names = id_sorted.collect { extractBaseName(it.name) } + if (mzml_names != id_names) { + throw new IllegalArgumentException("Mismatched IsobaricWorkflow inputs: mzML=${mzml_names} idXML=${id_names}") + }🤖 Prompt for AI Agents