Mercurial > repos > kkonganti > cfsan_centriflaken
comparison 0.3.0/workflows/centriflaken_hy.nf @ 92:295c2597a475
"planemo upload"
author | kkonganti |
---|---|
date | Tue, 19 Jul 2022 10:07:24 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
91:046e57368d3b | 92:295c2597a475 |
---|---|
1 // Define any required imports for this specific workflow | |
2 import java.nio.file.Paths | |
3 import nextflow.file.FileHelper | |
4 | |
5 // Include any necessary methods | |
6 include { \ | |
7 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ | |
8 addPadding; wrapUpHelp } from "${params.routines}" | |
9 include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2" | |
10 include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge" | |
11 include { megahitHelp } from "${params.toolshelp}${params.fs}megahit" | |
12 include { spadesHelp } from "${params.toolshelp}${params.fs}spades" | |
13 include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder" | |
14 include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2" | |
15 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" | |
16 include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" | |
17 | |
18 // Exit if help requested before any subworkflows | |
19 if (params.help) { | |
20 log.info help() | |
21 exit 0 | |
22 } | |
23 | |
24 // Include any necessary modules and subworkflows | |
25 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" | |
26 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" | |
27 include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main" | |
28 include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main" | |
29 include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" | |
30 include { MEGAHIT_ASSEMBLE } from "${params.modules}${params.fs}megahit${params.fs}assemble${params.fs}main" | |
31 include { SPADES_ASSEMBLE } from "${params.modules}${params.fs}spades${params.fs}assemble${params.fs}main" | |
32 include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main" | |
33 include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main" | |
34 include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main" | |
35 include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main" | |
36 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" | |
37 include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" | |
38 include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" | |
39 include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" | |
40 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" | |
41 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" | |
42 | |
43 | |
44 | |
45 /* | |
46 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
47 INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN-HY WORKFLOW | |
48 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
49 */ | |
50 | |
51 def kraken2_db_dir = file ( "${params.kraken2_db}" ) | |
52 def centrifuge_x = file ( "${params.centrifuge_x}" ) | |
53 def spades_custom_hmm = (params.spades_hmm ? file ( "${params.spades_hmm}" ) : false) | |
54 def reads_platform = 0 | |
55 def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] | |
56 | |
57 reads_platform += (params.input ? 1 : 0) | |
58 | |
59 if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) { | |
60 stopNow("Please check if the following absolute paths are valid:\n" + | |
61 "${params.kraken2_db}\n${params.centrifuge_x}\n" + | |
62 "Cannot proceed further!") | |
63 } | |
64 | |
65 if (spades_custom_hmm && !spades_custom_hmm.exists()) { | |
66 stopNow("Please check if the following SPAdes' custom HMM directory\n" + | |
67 "path is valid:\n${params.spades_hmm}\nCannot proceed further!") | |
68 } | |
69 | |
70 if (reads_platform < 1 || reads_platform == 0) { | |
71 stopNow("Please mention at least one absolute path to input folder which contains\n" + | |
72 "FASTQ files sequenced using the --input option.\n" + | |
73 "Ex: --input (Illumina or Generic short reads in FASTQ format)") | |
74 } | |
75 | |
76 if (params.centrifuge_extract_bug != params.kraken2_extract_bug) { | |
77 stopNow("Please make sure that the bug to be extracted is same\n" + | |
78 "for both --centrifuge_extract_bug and --kraken2_extract_bug options.") | |
79 } | |
80 | |
81 /* | |
82 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
83 RUN THE CENTRIFLAKEN-HY WORKFLOW | |
84 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
85 */ | |
86 | |
87 workflow CENTRIFLAKEN_HY { | |
88 main: | |
89 ch_asm_filtered_contigs = Channel.empty() | |
90 ch_mqc_custom_tbl = Channel.empty() | |
91 ch_dummy = Channel.fromPath("${params.dummyfile}") | |
92 ch_dummy2 = Channel.fromPath("${params.dummyfile2}") | |
93 | |
94 log.info summaryOfParams() | |
95 | |
96 PROCESS_FASTQ() | |
97 .processed_reads | |
98 .map { | |
99 meta, fastq -> | |
100 meta.centrifuge_x = params.centrifuge_x | |
101 meta.kraken2_db = params.kraken2_db | |
102 [meta, fastq] | |
103 } | |
104 .set { ch_processed_reads } | |
105 | |
106 PROCESS_FASTQ | |
107 .out | |
108 .versions | |
109 .set { software_versions } | |
110 | |
111 FASTQC ( ch_processed_reads ) | |
112 | |
113 CENTRIFUGE_CLASSIFY ( ch_processed_reads ) | |
114 | |
115 CENTRIFUGE_PROCESS ( | |
116 CENTRIFUGE_CLASSIFY.out.report | |
117 .join( CENTRIFUGE_CLASSIFY.out.output ) | |
118 ) | |
119 | |
120 ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) | |
121 .set { ch_centrifuge_extracted } | |
122 | |
123 SEQKIT_GREP ( ch_centrifuge_extracted ) | |
124 | |
125 // As of 06/02/2022, with the upcoming newer versions of NextFlow, we will be able to do | |
126 // allowNull: true for both input and output, but until then, we have to use dummy files. | |
127 // and work arounds. | |
128 // https://github.com/nextflow-io/nextflow/pull/2893 | |
129 if (params.spades_run) { | |
130 SPADES_ASSEMBLE ( | |
131 SEQKIT_GREP.out.fastx | |
132 .combine(ch_dummy) | |
133 .combine(ch_dummy2) | |
134 ) | |
135 | |
136 SPADES_ASSEMBLE | |
137 .out | |
138 .assembly | |
139 .set { ch_assembly } | |
140 | |
141 software_versions | |
142 .mix ( SPADES_ASSEMBLE.out.versions.ifEmpty(null) ) | |
143 .set { software_versions } | |
144 } else if (params.megahit_run) { | |
145 MEGAHIT_ASSEMBLE ( | |
146 SEQKIT_GREP.out.fastx | |
147 ) | |
148 | |
149 MEGAHIT_ASSEMBLE | |
150 .out | |
151 .assembly | |
152 .set { ch_assembly } | |
153 | |
154 software_versions | |
155 .mix ( MEGAHIT_ASSEMBLE.out.versions.ifEmpty(null) ) | |
156 .set { software_versions } | |
157 } | |
158 | |
159 ch_assembly | |
160 .map { | |
161 meta, fastq -> | |
162 meta.is_assembly = true | |
163 [meta, fastq] | |
164 } | |
165 .set { ch_assembly } | |
166 | |
167 ch_assembly.ifEmpty { [ false, false ] } | |
168 | |
169 KRAKEN2_CLASSIFY ( ch_assembly ) | |
170 | |
171 KRAKEN2_EXTRACT_CONTIGS ( | |
172 ch_assembly | |
173 .join( KRAKEN2_CLASSIFY.out.kraken_output ), | |
174 params.kraken2_extract_bug | |
175 ) | |
176 | |
177 KRAKEN2_EXTRACT_CONTIGS | |
178 .out | |
179 .asm_filtered_contigs | |
180 .map { | |
181 meta, fastq -> | |
182 meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize() | |
183 meta.serotypefinder_db = params.serotypefinder_db | |
184 [meta, fastq] | |
185 } | |
186 .set { ch_asm_filtered_contigs } | |
187 | |
188 SEROTYPEFINDER ( ch_asm_filtered_contigs ) | |
189 | |
190 SEQSERO2 ( ch_asm_filtered_contigs ) | |
191 | |
192 MLST ( ch_asm_filtered_contigs ) | |
193 | |
194 ABRICATE_RUN ( | |
195 ch_asm_filtered_contigs, | |
196 abricate_dbs | |
197 ) | |
198 | |
199 ABRICATE_RUN | |
200 .out | |
201 .abricated | |
202 .map { meta, abres -> [ abricate_dbs, abres ] } | |
203 .groupTuple(by: [0]) | |
204 .map { it -> tuple ( it[0], it[1].flatten() ) } | |
205 .set { ch_abricated } | |
206 | |
207 ABRICATE_SUMMARY ( ch_abricated ) | |
208 | |
209 CENTRIFUGE_CLASSIFY.out.kreport | |
210 .map { meta, kreport -> [ kreport ] } | |
211 .flatten() | |
212 .concat ( | |
213 KRAKEN2_CLASSIFY.out.kraken_report | |
214 .map { meta, kreport -> [ kreport ] } | |
215 .flatten(), | |
216 FASTQC.out.zip | |
217 .map { meta, zip -> [ zip ] } | |
218 .flatten() | |
219 ) | |
220 .set { ch_mqc_classify } | |
221 | |
222 if (params.serotypefinder_run) { | |
223 SEROTYPEFINDER | |
224 .out | |
225 .serotyped | |
226 .map { meta, tsv -> [ 'serotypefinder', tsv ] } | |
227 .groupTuple(by: [0]) | |
228 .map { it -> tuple ( it[0], it[1].flatten() ) } | |
229 .set { ch_mqc_custom_tbl } | |
230 } else if (params.seqsero2_run) { | |
231 SEQSERO2 | |
232 .out | |
233 .serotyped | |
234 .map { meta, tsv -> [ 'seqsero2', tsv ] } | |
235 .groupTuple(by: [0]) | |
236 .map { it -> tuple ( it[0], it[1].flatten() ) } | |
237 .set { ch_mqc_custom_tbl } | |
238 } | |
239 | |
240 ch_mqc_custom_tbl | |
241 .concat ( | |
242 ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )}, | |
243 ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )}, | |
244 ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )}, | |
245 ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )}, | |
246 ) | |
247 .groupTuple(by: [0]) | |
248 .map { it -> [ it[0], it[1].flatten() ]} | |
249 .set { ch_mqc_custom_tbl } | |
250 | |
251 TABLE_SUMMARY ( ch_mqc_custom_tbl ) | |
252 | |
253 DUMP_SOFTWARE_VERSIONS ( | |
254 software_versions | |
255 .mix ( | |
256 FASTQC.out.versions, | |
257 CENTRIFUGE_CLASSIFY.out.versions, | |
258 CENTRIFUGE_PROCESS.out.versions, | |
259 SEQKIT_GREP.out.versions, | |
260 KRAKEN2_CLASSIFY.out.versions.ifEmpty(null), | |
261 KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null), | |
262 SEROTYPEFINDER.out.versions.ifEmpty(null), | |
263 SEQSERO2.out.versions.ifEmpty(null), | |
264 MLST.out.versions.ifEmpty(null), | |
265 ABRICATE_RUN.out.versions.ifEmpty(null), | |
266 ABRICATE_SUMMARY.out.versions.ifEmpty(null), | |
267 TABLE_SUMMARY.out.versions.ifEmpty(null) | |
268 ) | |
269 .unique() | |
270 .collectFile(name: 'collected_versions.yml') | |
271 ) | |
272 | |
273 DUMP_SOFTWARE_VERSIONS | |
274 .out | |
275 .mqc_yml | |
276 .concat ( | |
277 ch_mqc_classify, | |
278 TABLE_SUMMARY.out.mqc_yml | |
279 ) | |
280 .collect() | |
281 .set { ch_multiqc } | |
282 | |
283 MULTIQC ( ch_multiqc ) | |
284 } | |
285 | |
286 /* | |
287 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
288 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG | |
289 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
290 */ | |
291 | |
292 workflow.onComplete { | |
293 if (workflow.success) { | |
294 // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S) | |
295 // | |
296 // Nextflow's .moveTo will error out if directories contain files and it | |
297 // would be complex to include logic to skip directories | |
298 // | |
299 def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps" | |
300 def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results" | |
301 def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' ) | |
302 def final_intermediate = file( final_intermediate_dir, type: 'dir' ) | |
303 def final_results = file( final_results_dir, type: 'dir' ) | |
304 def pipeline_output = file( params.output, type: 'dir' ) | |
305 | |
306 if ( !final_intermediate.exists() ) { | |
307 final_intermediate.mkdirs() | |
308 | |
309 FileHelper.visitFiles(Paths.get("${params.output}"), '*') { | |
310 if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) { | |
311 FileHelper.movePath( | |
312 it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" ) | |
313 ) | |
314 } | |
315 } | |
316 } | |
317 | |
318 if ( kraken2_ext_contigs.exists() && !final_results.exists() ) { | |
319 final_results.mkdirs() | |
320 | |
321 FileHelper.movePath( | |
322 Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ), | |
323 Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" ) | |
324 ) | |
325 } | |
326 | |
327 sendMail() | |
328 } | |
329 } | |
330 | |
331 workflow.onError { | |
332 sendMail() | |
333 } | |
334 | |
335 /* | |
336 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
337 HELPER METHODS FOR CENTRIFLAKEN-HY WORKFLOW | |
338 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
339 */ | |
340 | |
341 def help() { | |
342 | |
343 Map helptext = [:] | |
344 | |
345 helptext.putAll ( | |
346 fastqEntryPointHelp() + | |
347 kraken2Help(params).text + | |
348 centrifugeHelp(params).text + | |
349 megahitHelp(params).text + | |
350 spadesHelp(params).text + | |
351 serotypefinderHelp(params).text + | |
352 seqsero2Help(params).text + | |
353 mlstHelp(params).text + | |
354 abricateHelp(params).text + | |
355 wrapUpHelp() | |
356 ) | |
357 | |
358 return addPadding(helptext) | |
359 } |