kkonganti@0
|
1 // Define any required imports for this specific workflow
|
kkonganti@0
|
2 import java.nio.file.Paths
|
kkonganti@0
|
3 import nextflow.file.FileHelper
|
kkonganti@0
|
4
|
kkonganti@0
|
5 // Include any necessary methods
|
kkonganti@0
|
6 include { \
|
kkonganti@0
|
7 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
|
kkonganti@0
|
8 addPadding; wrapUpHelp } from "${params.routines}"
|
kkonganti@0
|
9 include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2"
|
kkonganti@0
|
10 include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge"
|
kkonganti@0
|
11 include { flyeHelp } from "${params.toolshelp}${params.fs}flye"
|
kkonganti@0
|
12 include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder"
|
kkonganti@0
|
13 include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2"
|
kkonganti@0
|
14 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst"
|
kkonganti@0
|
15 include { abricateHelp } from "${params.toolshelp}${params.fs}abricate"
|
kkonganti@0
|
16
|
kkonganti@0
|
17 // Exit if help requested before any subworkflows
|
kkonganti@0
|
18 if (params.help) {
|
kkonganti@0
|
19 log.info help()
|
kkonganti@0
|
20 exit 0
|
kkonganti@0
|
21 }
|
kkonganti@0
|
22
|
kkonganti@0
|
23 // Include any necessary modules and subworkflows
|
kkonganti@0
|
24 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq"
|
kkonganti@0
|
25 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main"
|
kkonganti@0
|
26 include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main"
|
kkonganti@0
|
27 include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main"
|
kkonganti@0
|
28 include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main"
|
kkonganti@0
|
29 include { FLYE_ASSEMBLE } from "${params.modules}${params.fs}flye${params.fs}assemble${params.fs}main"
|
kkonganti@0
|
30 include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main"
|
kkonganti@0
|
31 include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main"
|
kkonganti@0
|
32 include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main"
|
kkonganti@0
|
33 include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main"
|
kkonganti@0
|
34 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main"
|
kkonganti@0
|
35 include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main"
|
kkonganti@0
|
36 include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main"
|
kkonganti@0
|
37 include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main"
|
kkonganti@0
|
38 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main"
|
kkonganti@0
|
39 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
|
kkonganti@0
|
40
|
kkonganti@0
|
41
|
kkonganti@0
|
42
|
kkonganti@0
|
43 /*
|
kkonganti@0
|
44 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
45 INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN WORKFLOW
|
kkonganti@0
|
46 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
47 */
|
kkonganti@0
|
48
|
kkonganti@0
|
49 def kraken2_db_dir = file ( "${params.kraken2_db}" )
|
kkonganti@0
|
50 def centrifuge_x = file ( "${params.centrifuge_x}" )
|
kkonganti@0
|
51 def reads_platform = 0
|
kkonganti@0
|
52 def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ]
|
kkonganti@0
|
53
|
kkonganti@0
|
54 reads_platform += (params.flye_nano_raw ? 1 : 0)
|
kkonganti@0
|
55 reads_platform += (params.flye_nano_corr ? 1 : 0)
|
kkonganti@0
|
56 reads_platform += (params.flye_nano_hq ? 1 : 0)
|
kkonganti@0
|
57 reads_platform += (params.flye_pacbio_raw ? 1 : 0)
|
kkonganti@0
|
58 reads_platform += (params.flye_pacbio_corr ? 1 : 0)
|
kkonganti@0
|
59 reads_platform += (params.flye_pacbio_hifi ? 1 : 0)
|
kkonganti@0
|
60
|
kkonganti@0
|
61 if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) {
|
kkonganti@0
|
62 stopNow("Please check if the following absolute paths are valid:\n" +
|
kkonganti@0
|
63 "${params.kraken2_db}\n${params.centrifuge_x}\n" +
|
kkonganti@0
|
64 "Cannot proceed further!")
|
kkonganti@0
|
65 }
|
kkonganti@0
|
66
|
kkonganti@0
|
67 if (reads_platform > 1 || reads_platform == 0) {
|
kkonganti@0
|
68 msg_0 = (reads_platform > 1 ? "only" : "at least")
|
kkonganti@0
|
69 stopNow("Please mention ${msg_0} one read platform for use with the flye assembler\n" +
|
kkonganti@0
|
70 "using any one of the following options:\n" +
|
kkonganti@0
|
71 "--flye_nano_raw\n--flye_nano_corr\n--flye_nano_hq\n" +
|
kkonganti@0
|
72 "--flye_pacbio_raw\n--flye_pacbio_corr\n--flye_pacbio_hifi")
|
kkonganti@0
|
73 }
|
kkonganti@0
|
74
|
kkonganti@0
|
75 if (params.centrifuge_extract_bug != params.kraken2_extract_bug) {
|
kkonganti@0
|
76 stopNow("Please make sure that the bug to be extracted is same\n" +
|
kkonganti@0
|
77 "for both --centrifuge_extract_bug and --kraken2_extract_bug options.")
|
kkonganti@0
|
78 }
|
kkonganti@0
|
79
|
kkonganti@0
|
80 /*
|
kkonganti@0
|
81 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
82 RUN THE CENTRIFLAKEN WORKFLOW
|
kkonganti@0
|
83 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
84 */
|
kkonganti@0
|
85
|
kkonganti@0
|
86 workflow CENTRIFLAKEN {
|
kkonganti@0
|
87 main:
|
kkonganti@0
|
88 ch_asm_filtered_contigs = Channel.empty()
|
kkonganti@0
|
89 ch_mqc_custom_tbl = Channel.empty()
|
kkonganti@0
|
90
|
kkonganti@0
|
91 log.info summaryOfParams()
|
kkonganti@0
|
92
|
kkonganti@0
|
93 PROCESS_FASTQ()
|
kkonganti@0
|
94 .processed_reads
|
kkonganti@0
|
95 .map {
|
kkonganti@0
|
96 meta, fastq ->
|
kkonganti@0
|
97 meta.centrifuge_x = params.centrifuge_x
|
kkonganti@0
|
98 meta.kraken2_db = params.kraken2_db
|
kkonganti@0
|
99 [meta, fastq]
|
kkonganti@0
|
100 }
|
kkonganti@0
|
101 .set { ch_processed_reads }
|
kkonganti@0
|
102
|
kkonganti@0
|
103 PROCESS_FASTQ
|
kkonganti@0
|
104 .out
|
kkonganti@0
|
105 .versions
|
kkonganti@0
|
106 .set { software_versions }
|
kkonganti@0
|
107
|
kkonganti@0
|
108 FASTQC ( ch_processed_reads )
|
kkonganti@0
|
109
|
kkonganti@47
|
110 CENTRIFUGE_CLASSIFY ( ch_processed_reads )
|
kkonganti@0
|
111
|
kkonganti@47
|
112 CENTRIFUGE_PROCESS (
|
kkonganti@0
|
113 CENTRIFUGE_CLASSIFY.out.report
|
kkonganti@0
|
114 .join( CENTRIFUGE_CLASSIFY.out.output )
|
kkonganti@0
|
115 )
|
kkonganti@0
|
116
|
kkonganti@47
|
117 ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted )
|
kkonganti@0
|
118 .set { ch_centrifuge_extracted }
|
kkonganti@0
|
119
|
kkonganti@0
|
120 SEQKIT_GREP ( ch_centrifuge_extracted )
|
kkonganti@0
|
121
|
kkonganti@0
|
122 FLYE_ASSEMBLE ( SEQKIT_GREP.out.fastx )
|
kkonganti@0
|
123
|
kkonganti@0
|
124 FLYE_ASSEMBLE
|
kkonganti@0
|
125 .out
|
kkonganti@0
|
126 .assembly
|
kkonganti@0
|
127 .set { ch_flye_assembly }
|
kkonganti@0
|
128
|
kkonganti@0
|
129 ch_flye_assembly.ifEmpty { [ false, false ] }
|
kkonganti@0
|
130
|
kkonganti@47
|
131 KRAKEN2_CLASSIFY ( ch_flye_assembly )
|
kkonganti@0
|
132
|
kkonganti@0
|
133 KRAKEN2_EXTRACT_CONTIGS (
|
kkonganti@0
|
134 ch_flye_assembly
|
kkonganti@0
|
135 .join( KRAKEN2_CLASSIFY.out.kraken_output ),
|
kkonganti@0
|
136 params.kraken2_extract_bug
|
kkonganti@0
|
137 )
|
kkonganti@0
|
138
|
kkonganti@0
|
139 KRAKEN2_EXTRACT_CONTIGS
|
kkonganti@0
|
140 .out
|
kkonganti@0
|
141 .asm_filtered_contigs
|
kkonganti@0
|
142 .map {
|
kkonganti@0
|
143 meta, fastq ->
|
kkonganti@0
|
144 meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize()
|
kkonganti@0
|
145 meta.serotypefinder_db = params.serotypefinder_db
|
kkonganti@0
|
146 [meta, fastq]
|
kkonganti@0
|
147 }
|
kkonganti@0
|
148 .set { ch_asm_filtered_contigs }
|
kkonganti@0
|
149
|
kkonganti@0
|
150 SEROTYPEFINDER ( ch_asm_filtered_contigs )
|
kkonganti@0
|
151
|
kkonganti@0
|
152 SEQSERO2 ( ch_asm_filtered_contigs )
|
kkonganti@0
|
153
|
kkonganti@0
|
154 MLST ( ch_asm_filtered_contigs )
|
kkonganti@0
|
155
|
kkonganti@0
|
156 ABRICATE_RUN (
|
kkonganti@0
|
157 ch_asm_filtered_contigs,
|
kkonganti@0
|
158 abricate_dbs
|
kkonganti@0
|
159 )
|
kkonganti@0
|
160
|
kkonganti@0
|
161 ABRICATE_RUN
|
kkonganti@0
|
162 .out
|
kkonganti@0
|
163 .abricated
|
kkonganti@0
|
164 .map { meta, abres -> [ abricate_dbs, abres ] }
|
kkonganti@0
|
165 .groupTuple(by: [0])
|
kkonganti@0
|
166 .map { it -> tuple ( it[0], it[1].flatten() ) }
|
kkonganti@0
|
167 .set { ch_abricated }
|
kkonganti@0
|
168
|
kkonganti@0
|
169 ABRICATE_SUMMARY ( ch_abricated )
|
kkonganti@0
|
170
|
kkonganti@0
|
171 // ABRICATE_SUMMARY.out.ecoli_vf.set { ch_abricate_summary_ecoli_vf }
|
kkonganti@0
|
172 // ch_abricate_summary_ecoli_vf.ifEmpty { [ false, false ] }
|
kkonganti@0
|
173
|
kkonganti@0
|
174 CENTRIFUGE_CLASSIFY.out.kreport
|
kkonganti@0
|
175 .map { meta, kreport -> [ kreport ] }
|
kkonganti@0
|
176 .flatten()
|
kkonganti@0
|
177 .concat (
|
kkonganti@0
|
178 KRAKEN2_CLASSIFY.out.kraken_report
|
kkonganti@0
|
179 .map { meta, kreport -> [ kreport ] }
|
kkonganti@0
|
180 .flatten(),
|
kkonganti@0
|
181 FASTQC.out.zip
|
kkonganti@0
|
182 .map { meta, zip -> [ zip ] }
|
kkonganti@0
|
183 .flatten()
|
kkonganti@0
|
184 )
|
kkonganti@0
|
185 .set { ch_mqc_classify }
|
kkonganti@0
|
186
|
kkonganti@0
|
187 if (params.serotypefinder_run) {
|
kkonganti@0
|
188 SEROTYPEFINDER
|
kkonganti@0
|
189 .out
|
kkonganti@0
|
190 .serotyped
|
kkonganti@0
|
191 .map { meta, tsv -> [ 'serotypefinder', tsv ] }
|
kkonganti@0
|
192 .groupTuple(by: [0])
|
kkonganti@0
|
193 .map { it -> tuple ( it[0], it[1].flatten() ) }
|
kkonganti@0
|
194 .set { ch_mqc_custom_tbl }
|
kkonganti@0
|
195 } else if (params.seqsero2_run) {
|
kkonganti@0
|
196 SEQSERO2
|
kkonganti@0
|
197 .out
|
kkonganti@0
|
198 .serotyped
|
kkonganti@0
|
199 .map { meta, tsv -> [ 'seqsero2', tsv ] }
|
kkonganti@0
|
200 .groupTuple(by: [0])
|
kkonganti@0
|
201 .map { it -> tuple ( it[0], it[1].flatten() ) }
|
kkonganti@0
|
202 .set { ch_mqc_custom_tbl }
|
kkonganti@0
|
203 }
|
kkonganti@0
|
204
|
kkonganti@0
|
205 ch_mqc_custom_tbl
|
kkonganti@0
|
206 .concat (
|
kkonganti@0
|
207 ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )},
|
kkonganti@0
|
208 ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )},
|
kkonganti@0
|
209 ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )},
|
kkonganti@0
|
210 ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )},
|
kkonganti@0
|
211 )
|
kkonganti@0
|
212 .groupTuple(by: [0])
|
kkonganti@0
|
213 .map { it -> [ it[0], it[1].flatten() ]}
|
kkonganti@0
|
214 .set { ch_mqc_custom_tbl }
|
kkonganti@0
|
215
|
kkonganti@47
|
216 TABLE_SUMMARY ( ch_mqc_custom_tbl )
|
kkonganti@0
|
217
|
kkonganti@0
|
218 DUMP_SOFTWARE_VERSIONS (
|
kkonganti@0
|
219 software_versions
|
kkonganti@0
|
220 .mix (
|
kkonganti@0
|
221 FASTQC.out.versions,
|
kkonganti@0
|
222 CENTRIFUGE_CLASSIFY.out.versions,
|
kkonganti@0
|
223 CENTRIFUGE_PROCESS.out.versions,
|
kkonganti@0
|
224 SEQKIT_GREP.out.versions,
|
kkonganti@0
|
225 FLYE_ASSEMBLE.out.versions.ifEmpty(null),
|
kkonganti@0
|
226 KRAKEN2_CLASSIFY.out.versions.ifEmpty(null),
|
kkonganti@0
|
227 KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null),
|
kkonganti@0
|
228 SEROTYPEFINDER.out.versions.ifEmpty(null),
|
kkonganti@0
|
229 SEQSERO2.out.versions.ifEmpty(null),
|
kkonganti@0
|
230 MLST.out.versions.ifEmpty(null),
|
kkonganti@0
|
231 ABRICATE_RUN.out.versions.ifEmpty(null),
|
kkonganti@0
|
232 ABRICATE_SUMMARY.out.versions.ifEmpty(null),
|
kkonganti@0
|
233 TABLE_SUMMARY.out.versions.ifEmpty(null)
|
kkonganti@0
|
234 )
|
kkonganti@0
|
235 .unique()
|
kkonganti@0
|
236 .collectFile(name: 'collected_versions.yml')
|
kkonganti@0
|
237 )
|
kkonganti@0
|
238
|
kkonganti@0
|
239 DUMP_SOFTWARE_VERSIONS
|
kkonganti@0
|
240 .out
|
kkonganti@0
|
241 .mqc_yml
|
kkonganti@0
|
242 .concat (
|
kkonganti@0
|
243 ch_mqc_classify,
|
kkonganti@0
|
244 TABLE_SUMMARY.out.mqc_yml
|
kkonganti@0
|
245 )
|
kkonganti@0
|
246 .collect()
|
kkonganti@0
|
247 .set { ch_multiqc }
|
kkonganti@0
|
248
|
kkonganti@47
|
249 MULTIQC ( ch_multiqc )
|
kkonganti@0
|
250 }
|
kkonganti@0
|
251
|
kkonganti@0
|
252 /*
|
kkonganti@0
|
253 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
254 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
|
kkonganti@0
|
255 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
256 */
|
kkonganti@0
|
257
|
kkonganti@0
|
258 workflow.onComplete {
|
kkonganti@0
|
259 if (workflow.success) {
|
kkonganti@0
|
260 // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S)
|
kkonganti@0
|
261 //
|
kkonganti@0
|
262 // Nextflow's .moveTo will error out if directories contain files and it
|
kkonganti@0
|
263 // would be complex to include logic to skip directories
|
kkonganti@0
|
264 //
|
kkonganti@0
|
265 def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps"
|
kkonganti@0
|
266 def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results"
|
kkonganti@0
|
267 def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' )
|
kkonganti@0
|
268 def final_intermediate = file( final_intermediate_dir, type: 'dir' )
|
kkonganti@0
|
269 def final_results = file( final_results_dir, type: 'dir' )
|
kkonganti@0
|
270 def pipeline_output = file( params.output, type: 'dir' )
|
kkonganti@0
|
271
|
kkonganti@0
|
272 if ( !final_intermediate.exists() ) {
|
kkonganti@0
|
273 final_intermediate.mkdirs()
|
kkonganti@0
|
274
|
kkonganti@0
|
275 FileHelper.visitFiles(Paths.get("${params.output}"), '*') {
|
kkonganti@0
|
276 if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) {
|
kkonganti@0
|
277 FileHelper.movePath(
|
kkonganti@0
|
278 it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" )
|
kkonganti@0
|
279 )
|
kkonganti@0
|
280 }
|
kkonganti@0
|
281 }
|
kkonganti@0
|
282 }
|
kkonganti@0
|
283
|
kkonganti@0
|
284 if ( kraken2_ext_contigs.exists() && !final_results.exists() ) {
|
kkonganti@0
|
285 final_results.mkdirs()
|
kkonganti@0
|
286
|
kkonganti@0
|
287 FileHelper.movePath(
|
kkonganti@0
|
288 Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ),
|
kkonganti@0
|
289 Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" )
|
kkonganti@0
|
290 )
|
kkonganti@0
|
291 }
|
kkonganti@0
|
292
|
kkonganti@0
|
293 sendMail()
|
kkonganti@0
|
294 }
|
kkonganti@0
|
295 }
|
kkonganti@0
|
296
|
kkonganti@0
|
297 workflow.onError {
|
kkonganti@0
|
298 sendMail()
|
kkonganti@0
|
299 }
|
kkonganti@0
|
300
|
kkonganti@0
|
301 /*
|
kkonganti@0
|
302 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
303 HELPER METHODS FOR CENTRIFLAKEN WORKFLOW
|
kkonganti@0
|
304 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
305 */
|
kkonganti@0
|
306
|
kkonganti@0
|
307 def help() {
|
kkonganti@0
|
308
|
kkonganti@0
|
309 Map helptext = [:]
|
kkonganti@0
|
310
|
kkonganti@47
|
311 helptext.putAll (
|
kkonganti@0
|
312 fastqEntryPointHelp() +
|
kkonganti@0
|
313 kraken2Help(params).text +
|
kkonganti@0
|
314 centrifugeHelp(params).text +
|
kkonganti@0
|
315 flyeHelp(params).text +
|
kkonganti@0
|
316 serotypefinderHelp(params).text +
|
kkonganti@0
|
317 seqsero2Help(params).text +
|
kkonganti@0
|
318 mlstHelp(params).text +
|
kkonganti@0
|
319 abricateHelp(params).text +
|
kkonganti@0
|
320 wrapUpHelp()
|
kkonganti@0
|
321 )
|
kkonganti@0
|
322
|
kkonganti@0
|
323 return addPadding(helptext)
|
kkonganti@0
|
324 } |