comparison 0.4.2/workflows/centriflaken_hy.nf @ 105:52045ea4679d

"planemo upload"
author kkonganti
date Thu, 27 Jun 2024 14:17:26 -0400
parents
children
comparison
equal deleted inserted replaced
104:17890124001d 105:52045ea4679d
1 // Define any required imports for this specific workflow
2 import java.nio.file.Paths
3 import nextflow.file.FileHelper
4
5 // Include any necessary methods
6 include { \
7 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
8 addPadding; wrapUpHelp } from "${params.routines}"
9 include { seqkitrmdupHelp } from "${params.toolshelp}${params.fs}seqkitrmdup"
10 include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2"
11 include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge"
12 include { megahitHelp } from "${params.toolshelp}${params.fs}megahit"
13 include { spadesHelp } from "${params.toolshelp}${params.fs}spades"
14 include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder"
15 include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2"
16 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst"
17 include { abricateHelp } from "${params.toolshelp}${params.fs}abricate"
18
19 // Exit if help requested before any subworkflows
20 if (params.help) {
21 log.info help()
22 exit 0
23 }
24
25 // Include any necessary modules and subworkflows
26 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq"
27 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main"
28 include { SEQKIT_RMDUP } from "${params.modules}${params.fs}seqkit${params.fs}rmdup${params.fs}main"
29 include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main"
30 include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main"
31 include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main"
32 include { MEGAHIT_ASSEMBLE } from "${params.modules}${params.fs}megahit${params.fs}assemble${params.fs}main"
33 include { SPADES_ASSEMBLE } from "${params.modules}${params.fs}spades${params.fs}assemble${params.fs}main"
34 include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main"
35 include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main"
36 include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main"
37 include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main"
38 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main"
39 include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main"
40 include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main"
41 include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main"
42 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main"
43 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
44
45
46
47 /*
48 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49 INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN-HY WORKFLOW
50 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
51 */
52
53 def kraken2_db_dir = file ( "${params.kraken2_db}" )
54 def centrifuge_x = file ( "${params.centrifuge_x}" )
55 def spades_custom_hmm = (params.spades_hmm ? file ( "${params.spades_hmm}" ) : false)
56 def reads_platform = 0
57 def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ]
58
59 reads_platform += (params.input ? 1 : 0)
60
61 if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) {
62 stopNow("Please check if the following absolute paths are valid:\n" +
63 "${params.kraken2_db}\n${params.centrifuge_x}\n" +
64 "Cannot proceed further!")
65 }
66
67 if (spades_custom_hmm && !spades_custom_hmm.exists()) {
68 stopNow("Please check if the following SPAdes' custom HMM directory\n" +
69 "path is valid:\n${params.spades_hmm}\nCannot proceed further!")
70 }
71
72 if (reads_platform < 1 || reads_platform == 0) {
73 stopNow("Please mention at least one absolute path to input folder which contains\n" +
74 "FASTQ files sequenced using the --input option.\n" +
75 "Ex: --input (Illumina or Generic short reads in FASTQ format)")
76 }
77
78 if (params.centrifuge_extract_bug != params.kraken2_extract_bug) {
79 stopNow("Please make sure that the bug to be extracted is same\n" +
80 "for both --centrifuge_extract_bug and --kraken2_extract_bug options.")
81 }
82
83 /*
84 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
85 RUN THE CENTRIFLAKEN-HY WORKFLOW
86 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
87 */
88
89 workflow CENTRIFLAKEN_HY {
90 main:
91 ch_asm_filtered_contigs = Channel.empty()
92 ch_mqc_custom_tbl = Channel.empty()
93 ch_dummy = Channel.fromPath("${params.dummyfile}")
94 ch_dummy2 = Channel.fromPath("${params.dummyfile2}")
95
96 log.info summaryOfParams()
97
98 PROCESS_FASTQ()
99 .processed_reads
100 .map {
101 meta, fastq ->
102 meta.centrifuge_x = params.centrifuge_x
103 meta.kraken2_db = params.kraken2_db
104 [meta, fastq]
105 }
106 .set { ch_processed_reads }
107
108 PROCESS_FASTQ
109 .out
110 .versions
111 .set { software_versions }
112
113 FASTQC ( ch_processed_reads )
114
115 if (params.seqkit_rmdup_run) {
116 SEQKIT_RMDUP ( ch_processed_reads )
117
118 SEQKIT_RMDUP
119 .out
120 .fastx
121 .set { ch_processed_reads }
122
123 software_versions
124 .mix ( SEQKIT_RMDUP.out.versions.ifEmpty(null) )
125 .set { software_versions }
126 }
127
128 CENTRIFUGE_CLASSIFY ( ch_processed_reads )
129
130 CENTRIFUGE_PROCESS (
131 CENTRIFUGE_CLASSIFY.out.report
132 .join( CENTRIFUGE_CLASSIFY.out.output )
133 )
134
135 ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted )
136 .set { ch_centrifuge_extracted }
137
138 SEQKIT_GREP ( ch_centrifuge_extracted )
139
140 // As of 06/02/2022, with the upcoming newer versions of NextFlow, we will be able to do
141 // allowNull: true for both input and output, but until then, we have to use dummy files.
142 // and work arounds.
143 // https://github.com/nextflow-io/nextflow/pull/2893
144 if (params.spades_run) {
145 SPADES_ASSEMBLE (
146 SEQKIT_GREP.out.fastx
147 .combine(ch_dummy)
148 .combine(ch_dummy2)
149 )
150
151 SPADES_ASSEMBLE
152 .out
153 .assembly
154 .set { ch_assembly }
155
156 software_versions
157 .mix ( SPADES_ASSEMBLE.out.versions.ifEmpty(null) )
158 .set { software_versions }
159 } else if (params.megahit_run) {
160 MEGAHIT_ASSEMBLE (
161 SEQKIT_GREP.out.fastx
162 )
163
164 MEGAHIT_ASSEMBLE
165 .out
166 .assembly
167 .set { ch_assembly }
168
169 software_versions
170 .mix ( MEGAHIT_ASSEMBLE.out.versions.ifEmpty(null) )
171 .set { software_versions }
172 }
173
174 ch_assembly
175 .map {
176 meta, fastq ->
177 meta.is_assembly = true
178 [meta, fastq]
179 }
180 .set { ch_assembly }
181
182 ch_assembly.ifEmpty { [ false, false ] }
183
184 KRAKEN2_CLASSIFY ( ch_assembly )
185
186 KRAKEN2_EXTRACT_CONTIGS (
187 ch_assembly
188 .join( KRAKEN2_CLASSIFY.out.kraken_output ),
189 params.kraken2_extract_bug
190 )
191
192 KRAKEN2_EXTRACT_CONTIGS
193 .out
194 .asm_filtered_contigs
195 .map {
196 meta, fastq ->
197 meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize()
198 meta.serotypefinder_db = params.serotypefinder_db
199 [meta, fastq]
200 }
201 .set { ch_asm_filtered_contigs }
202
203 SEROTYPEFINDER ( ch_asm_filtered_contigs )
204
205 SEQSERO2 ( ch_asm_filtered_contigs )
206
207 MLST ( ch_asm_filtered_contigs )
208
209 ABRICATE_RUN (
210 ch_asm_filtered_contigs,
211 abricate_dbs
212 )
213
214 ABRICATE_RUN
215 .out
216 .abricated
217 .map { meta, abres -> [ abricate_dbs, abres ] }
218 .groupTuple(by: [0])
219 .map { it -> tuple ( it[0], it[1].flatten() ) }
220 .set { ch_abricated }
221
222 ABRICATE_SUMMARY ( ch_abricated )
223
224 CENTRIFUGE_CLASSIFY.out.kreport
225 .map { meta, kreport -> [ kreport ] }
226 .flatten()
227 .concat (
228 KRAKEN2_CLASSIFY.out.kraken_report
229 .map { meta, kreport -> [ kreport ] }
230 .flatten(),
231 FASTQC.out.zip
232 .map { meta, zip -> [ zip ] }
233 .flatten()
234 )
235 .set { ch_mqc_classify }
236
237 if (params.serotypefinder_run) {
238 SEROTYPEFINDER
239 .out
240 .serotyped
241 .map { meta, tsv -> [ 'serotypefinder', tsv ] }
242 .groupTuple(by: [0])
243 .map { it -> tuple ( it[0], it[1].flatten() ) }
244 .set { ch_mqc_custom_tbl }
245 } else if (params.seqsero2_run) {
246 SEQSERO2
247 .out
248 .serotyped
249 .map { meta, tsv -> [ 'seqsero2', tsv ] }
250 .groupTuple(by: [0])
251 .map { it -> tuple ( it[0], it[1].flatten() ) }
252 .set { ch_mqc_custom_tbl }
253 }
254
255 ch_mqc_custom_tbl
256 .concat (
257 ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )},
258 ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )},
259 ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )},
260 ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )},
261 )
262 .groupTuple(by: [0])
263 .map { it -> [ it[0], it[1].flatten() ]}
264 .set { ch_mqc_custom_tbl }
265
266 TABLE_SUMMARY ( ch_mqc_custom_tbl )
267
268 DUMP_SOFTWARE_VERSIONS (
269 software_versions
270 .mix (
271 FASTQC.out.versions,
272 CENTRIFUGE_CLASSIFY.out.versions,
273 CENTRIFUGE_PROCESS.out.versions,
274 SEQKIT_GREP.out.versions,
275 KRAKEN2_CLASSIFY.out.versions.ifEmpty(null),
276 KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null),
277 SEROTYPEFINDER.out.versions.ifEmpty(null),
278 SEQSERO2.out.versions.ifEmpty(null),
279 MLST.out.versions.ifEmpty(null),
280 ABRICATE_RUN.out.versions.ifEmpty(null),
281 ABRICATE_SUMMARY.out.versions.ifEmpty(null),
282 TABLE_SUMMARY.out.versions.ifEmpty(null)
283 )
284 .unique()
285 .collectFile(name: 'collected_versions.yml')
286 )
287
288 DUMP_SOFTWARE_VERSIONS
289 .out
290 .mqc_yml
291 .concat (
292 ch_mqc_classify,
293 TABLE_SUMMARY.out.mqc_yml
294 )
295 .collect()
296 .set { ch_multiqc }
297
298 MULTIQC ( ch_multiqc )
299 }
300
301 /*
302 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
303 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
304 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
305 */
306
307 workflow.onComplete {
308 if (workflow.success) {
309 // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S)
310 //
311 // Nextflow's .moveTo will error out if directories contain files and it
312 // would be complex to include logic to skip directories
313 //
314 def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps"
315 def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results"
316 def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' )
317 def final_intermediate = file( final_intermediate_dir, type: 'dir' )
318 def final_results = file( final_results_dir, type: 'dir' )
319 def pipeline_output = file( params.output, type: 'dir' )
320
321 if ( !final_intermediate.exists() ) {
322 final_intermediate.mkdirs()
323
324 FileHelper.visitFiles(Paths.get("${params.output}"), '*') {
325 if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) {
326 FileHelper.movePath(
327 it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" )
328 )
329 }
330 }
331 }
332
333 if ( kraken2_ext_contigs.exists() && !final_results.exists() ) {
334 final_results.mkdirs()
335
336 FileHelper.movePath(
337 Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ),
338 Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" )
339 )
340 }
341
342 sendMail()
343 }
344 }
345
346 workflow.onError {
347 sendMail()
348 }
349
350 /*
351 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
352 HELPER METHODS FOR CENTRIFLAKEN-HY WORKFLOW
353 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
354 */
355
356 def help() {
357
358 Map helptext = [:]
359
360 helptext.putAll (
361 fastqEntryPointHelp() +
362 seqkitrmdupHelp(params).text +
363 kraken2Help(params).text +
364 centrifugeHelp(params).text +
365 megahitHelp(params).text +
366 spadesHelp(params).text +
367 serotypefinderHelp(params).text +
368 seqsero2Help(params).text +
369 mlstHelp(params).text +
370 abricateHelp(params).text +
371 wrapUpHelp()
372 )
373
374 return addPadding(helptext)
375 }