comparison 0.6.1/workflows/bettercallsal.nf @ 11:749faef1caa9

"planemo upload"
author kkonganti
date Tue, 05 Sep 2023 11:51:40 -0400
parents
children
comparison
equal deleted inserted replaced
10:1b9de878b04a 11:749faef1caa9
1 // Define any required imports for this specific workflow
2 import java.nio.file.Paths
3 import java.util.zip.GZIPInputStream
4 import java.io.FileInputStream
5 import nextflow.file.FileHelper
6
7
8 // Include any necessary methods
9 include { \
10 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; conciseHelp; \
11 addPadding; wrapUpHelp } from "${params.routines}"
12 include { bbmergeHelp } from "${params.toolshelp}${params.fs}bbmerge"
13 include { fastpHelp } from "${params.toolshelp}${params.fs}fastp"
14 include { mashscreenHelp } from "${params.toolshelp}${params.fs}mashscreen"
15 include { tuspyHelp } from "${params.toolshelp}${params.fs}tuspy"
16 include { sourmashsketchHelp } from "${params.toolshelp}${params.fs}sourmashsketch"
17 include { sourmashgatherHelp } from "${params.toolshelp}${params.fs}sourmashgather"
18 include { sourmashsearchHelp } from "${params.toolshelp}${params.fs}sourmashsearch"
19 include { sfhpyHelp } from "${params.toolshelp}${params.fs}sfhpy"
20 include { kmaindexHelp } from "${params.toolshelp}${params.fs}kmaindex"
21 include { kmaalignHelp } from "${params.toolshelp}${params.fs}kmaalign"
22 include { megahitHelp } from "${params.toolshelp}${params.fs}megahit"
23 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst"
24 include { abricateHelp } from "${params.toolshelp}${params.fs}abricate"
25 include { salmonidxHelp } from "${params.toolshelp}${params.fs}salmonidx"
26 include { gsrpyHelp } from "${params.toolshelp}${params.fs}gsrpy"
27
28 // Exit if help requested before any subworkflows
29 if (params.help) {
30 log.info help()
31 exit 0
32 }
33
34
35 // Include any necessary modules and subworkflows
36 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq"
37 include { CAT_CAT } from "${params.modules}${params.fs}cat_cat${params.fs}main"
38 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main"
39 include { BBTOOLS_BBMERGE } from "${params.modules}${params.fs}bbtools${params.fs}bbmerge${params.fs}main"
40 include { FASTP } from "${params.modules}${params.fs}fastp${params.fs}main"
41 include { MASH_SCREEN } from "${params.modules}${params.fs}mash${params.fs}screen${params.fs}main"
42 include { TOP_UNIQUE_SEROVARS } from "${params.modules}${params.fs}top_unique_serovars${params.fs}main"
43 include { SOURMASH_SKETCH } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
44 include { SOURMASH_GATHER } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main"
45 include { SOURMASH_SEARCH } from "${params.modules}${params.fs}sourmash${params.fs}search${params.fs}main"
46 include { KMA_INDEX } from "${params.modules}${params.fs}kma${params.fs}index${params.fs}main"
47 include { KMA_ALIGN } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main"
48 include { OTF_GENOME } from "${params.modules}${params.fs}otf_genome${params.fs}main"
49 include { MEGAHIT_ASSEMBLE } from "${params.modules}${params.fs}megahit${params.fs}assemble${params.fs}main"
50 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main"
51 include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main"
52 include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main"
53 include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main"
54 include { SALMON_INDEX } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main"
55 include { SALMON_QUANT } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main"
56 include { SOURMASH_COMPARE } from "${params.modules}${params.fs}custom${params.fs}sourmash${params.fs}compare${params.fs}main"
57 include { BCS_DISTANCE_MATRIX } from "${params.modules}${params.fs}bcs_distance_matrix${params.fs}main"
58 include { BCS_RESULTS } from "${params.modules}${params.fs}bcs_results${params.fs}main"
59 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
60 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main"
61
62 /*
63 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
64 INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW
65 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66 */
67
68 def reads_platform = 0
69 def salmon_idx_decoys = file ( "${params.salmonidx_decoys}" )
70 def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ]
71
72 reads_platform += (params.input ? 1 : 0)
73
74 if (reads_platform < 1 || reads_platform == 0) {
75 stopNow("Please mention at least one absolute path to input folder which contains\n" +
76 "FASTQ files sequenced using the --input option.\n" +
77 "Ex: --input (Illumina or Generic short reads in FASTQ format)")
78 }
79
80 checkMetadataExists(params.mash_sketch, 'MASH sketch')
81 checkMetadataExists(params.tuspy_ps, 'ACC2SERO pickle')
82 checkMetadataExists(params.gsrpy_snp_clus_metadata, 'PDG reference target cluster metadata')
83
84 /*
85 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 RUN THE BETTERCALLSAL WORKFLOW
87 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
88 */
89
90 workflow BETTERCALLSAL {
91 main:
92 log.info summaryOfParams()
93
94 PROCESS_FASTQ()
95
96 PROCESS_FASTQ
97 .out
98 .versions
99 .set { software_versions }
100
101 PROCESS_FASTQ
102 .out
103 .processed_reads
104 .set { ch_processed_reads }
105
106 if (params.bbmerge_run && !params.fq_single_end) {
107 ch_processed_reads
108 .map { meta, fastq ->
109 meta.adapters = (params.bbmerge_adapters ?: params.dummyfile)
110 [ meta, fastq ]
111 }
112 .set { ch_processed_reads }
113
114 BBTOOLS_BBMERGE( ch_processed_reads )
115
116 BBTOOLS_BBMERGE
117 .out
118 .fastq
119 .map { meta, fastq -> [ meta, [ fastq ] ] }
120 .set { ch_processed_reads }
121
122 software_versions
123 .mix ( BBTOOLS_BBMERGE.out.versions )
124 .set { software_versions }
125 }
126
127 if (params.fastp_run) {
128 FASTP ( ch_processed_reads )
129
130 FASTP
131 .out
132 .passed_reads
133 .set { ch_processed_reads }
134
135 FASTP
136 .out
137 .json
138 .map { meta, json -> [ json ] }
139 .collect()
140 .set { ch_multiqc }
141
142 software_versions
143 .mix ( FASTP.out.versions )
144 .set { software_versions }
145 } else {
146 FASTQC ( ch_processed_reads )
147
148 FASTQC
149 .out
150 .zip
151 .map { meta, zip -> [ zip ] }
152 .collect()
153 .set { ch_multiqc }
154
155 software_versions
156 .mix ( FASTQC.out.versions )
157 .set { software_versions }
158 }
159
160 if (params.bcs_concat_pe && !params.fq_single_end && !params.bbmerge_run) {
161 CAT_CAT ( ch_processed_reads )
162
163 CAT_CAT
164 .out
165 .concatenated_reads
166 .set { ch_processed_reads }
167
168 software_versions
169 .mix ( CAT_CAT.out.versions )
170 .set { software_versions }
171 }
172
173 ch_processed_reads
174 .map { meta, fastq ->
175 meta.sequence_sketch = params.mash_sketch
176 meta.get_kma_hit_accs = true
177 meta.single_end = true
178 meta.salmon_decoys = params.dummyfile
179 meta.salmon_lib_type = (params.salmonalign_libtype ?: false)
180 [ meta, fastq ]
181 }
182 .filter { meta, fastq ->
183 fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] )
184 fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toString() ) )
185 fq_gzip.read() != -1
186 }
187 .set { ch_processed_reads }
188
189 MASH_SCREEN ( ch_processed_reads )
190
191 TOP_UNIQUE_SEROVARS ( MASH_SCREEN.out.screened )
192
193 TOP_UNIQUE_SEROVARS.out.genomes_fasta
194 .set { ch_genomes_fasta }
195
196 TOP_UNIQUE_SEROVARS.out.failed
197 .set { ch_bcs_calls_failed }
198
199 if (params.sourmashgather_run || params.sourmashsearch_run) {
200 SOURMASH_SKETCH (
201 ch_processed_reads
202 .join ( ch_genomes_fasta )
203 )
204
205 if (params.sourmashgather_run) {
206 SOURMASH_GATHER (
207 SOURMASH_SKETCH.out.signatures,
208 [], [], [], []
209 )
210
211 SOURMASH_GATHER
212 .out
213 .genomes_fasta
214 .set { ch_genomes_fasta }
215
216 ch_bcs_calls_failed
217 .concat ( SOURMASH_GATHER.out.failed )
218 .set { ch_bcs_calls_failed }
219
220 software_versions
221 .mix ( SOURMASH_GATHER.out.versions.ifEmpty(null) )
222 .set { software_versions }
223 }
224
225 if (params.sourmashsearch_run) {
226 SOURMASH_SEARCH (
227 SOURMASH_SKETCH.out.signatures,
228 []
229 )
230
231 SOURMASH_SEARCH
232 .out
233 .genomes_fasta
234 .set { ch_genomes_fasta }
235
236 ch_bcs_calls_failed
237 .concat ( SOURMASH_SEARCH.out.failed )
238 .set { ch_bcs_calls_failed }
239
240 software_versions
241 .mix ( SOURMASH_SEARCH.out.versions.ifEmpty(null) )
242 .set { software_versions }
243 }
244 }
245
246 KMA_INDEX ( ch_genomes_fasta )
247
248 KMA_ALIGN (
249 ch_processed_reads
250 .join ( KMA_INDEX.out.idx )
251 )
252
253 OTF_GENOME (
254 KMA_ALIGN.out.hits
255 .join ( KMA_ALIGN.out.frags )
256 )
257
258 OTF_GENOME.out.failed
259 .concat ( ch_bcs_calls_failed )
260 .collectFile( name: 'BCS_NO_CALLS.txt' )
261 .set { ch_bcs_no_calls }
262
263 SALMON_INDEX ( OTF_GENOME.out.genomes_fasta )
264
265 SALMON_QUANT (
266 ch_processed_reads
267 .join ( SALMON_INDEX.out.idx )
268 )
269
270 SALMON_QUANT
271 .out
272 .results
273 .groupTuple(by: [0])
274 .map { it -> tuple ( it[1].flatten() ) }
275 .mix ( ch_bcs_no_calls )
276 .collect()
277 .set { ch_salmon_res_dirs }
278
279 if (params.sourmashsketch_run) {
280 SOURMASH_SKETCH
281 .out
282 .signatures
283 .groupTuple(by: [0])
284 .map { meta, qsigs, dsigs -> [ qsigs ] }
285 .collect()
286 .flatten()
287 .collect()
288 .set { ch_query_sigs }
289
290 KMA_ALIGN
291 .out
292 .hits
293 .map { meta, hits -> [ hits ] }
294 .collect()
295 .flatten()
296 .collectFile(name: 'accessions.txt')
297 .set { ch_otf_genomes }
298
299 if (params.megahit_run) {
300
301 MEGAHIT_ASSEMBLE ( OTF_GENOME.out.reads_extracted )
302
303 MEGAHIT_ASSEMBLE
304 .out
305 .assembly
306 .set { ch_asm_filtered_contigs }
307
308 MLST ( ch_asm_filtered_contigs )
309
310 MLST.out.tsv
311 .map { meta, tsv -> [ 'mlst', tsv] }
312 .groupTuple(by: [0])
313 .map { it -> tuple ( it[0], it[1].flatten() ) }
314 .set { ch_mqc_custom_tbl }
315
316 ABRICATE_RUN (
317 ch_asm_filtered_contigs,
318 abricate_dbs
319 )
320
321 ABRICATE_RUN
322 .out
323 .abricated
324 .map { meta, abres -> [ abricate_dbs, abres ] }
325 .groupTuple(by: [0])
326 .map { it -> tuple ( it[0], it[1].flatten() ) }
327 .set { ch_abricated }
328
329 ABRICATE_SUMMARY ( ch_abricated )
330
331 ch_mqc_custom_tbl
332 .concat (
333 ABRICATE_SUMMARY.out.ncbiamrplus.map { it -> tuple ( it[0], it[1] )},
334 ABRICATE_SUMMARY.out.resfinder.map { it -> tuple ( it[0], it[1] )},
335 ABRICATE_SUMMARY.out.megares.map { it -> tuple ( it[0], it[1] )},
336 ABRICATE_SUMMARY.out.argannot.map { it -> tuple ( it[0], it[1] )},
337 )
338 .groupTuple(by: [0])
339 .map { it -> [ it[0], it[1].flatten() ]}
340 .set { ch_mqc_custom_tbl }
341
342 TABLE_SUMMARY ( ch_mqc_custom_tbl )
343
344 ch_multiqc
345 .concat ( TABLE_SUMMARY.out.mqc_yml )
346 .set { ch_multiqc }
347
348 software_versions
349 .mix (
350 MEGAHIT_ASSEMBLE.out.versions.ifEmpty(null),
351 MLST.out.versions.ifEmpty(null),
352 ABRICATE_RUN.out.versions.ifEmpty(null),
353 ABRICATE_SUMMARY.out.versions.ifEmpty(null),
354 TABLE_SUMMARY.out.versions.ifEmpty(null)
355 )
356 .set { software_versions }
357 }
358
359 SOURMASH_COMPARE ( ch_query_sigs, ch_otf_genomes )
360
361 BCS_DISTANCE_MATRIX (
362 SOURMASH_COMPARE.out.matrix,
363 SOURMASH_COMPARE.out.labels
364 )
365
366 ch_multiqc
367 .concat ( BCS_DISTANCE_MATRIX.out.mqc_yml )
368 .set { ch_multiqc }
369
370 software_versions
371 .mix (
372 SOURMASH_SKETCH.out.versions.ifEmpty(null),
373 SOURMASH_COMPARE.out.versions.ifEmpty(null),
374 BCS_DISTANCE_MATRIX.out.versions.ifEmpty(null),
375 )
376 .set { software_versions }
377 }
378
379 BCS_RESULTS ( ch_salmon_res_dirs )
380
381 DUMP_SOFTWARE_VERSIONS (
382 software_versions
383 .mix (
384 MASH_SCREEN.out.versions,
385 TOP_UNIQUE_SEROVARS.out.versions,
386 KMA_INDEX.out.versions,
387 KMA_ALIGN.out.versions,
388 OTF_GENOME.out.versions.ifEmpty(null),
389 SALMON_INDEX.out.versions,
390 SALMON_QUANT.out.versions,
391 BCS_RESULTS.out.versions
392 )
393 .unique()
394 .collectFile(name: 'collected_versions.yml')
395 )
396
397 DUMP_SOFTWARE_VERSIONS
398 .out
399 .mqc_yml
400 .concat (
401 ch_multiqc,
402 BCS_RESULTS.out.mqc_yml,
403 BCS_RESULTS.out.mqc_json
404 )
405 .collect()
406 .set { ch_multiqc }
407
408 MULTIQC ( ch_multiqc )
409 }
410
411 /*
412 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
413 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
414 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
415 */
416
417 workflow.onComplete {
418 if (workflow.success) {
419 sendMail()
420 }
421 }
422
423 workflow.onError {
424 sendMail()
425 }
426
427 /*
428 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
429 METHOD TO CHECK METADATA EXISTENCE
430 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
431 */
432
433 def checkMetadataExists(file_path, msg) {
434 file_path_obj = file( file_path )
435
436 if (!file_path_obj.exists() || file_path_obj.size() == 0) {
437 stopNow("Please check if your ${msg} file\n" +
438 "[ ${file_path} ]\nexists and is not of size 0.")
439 }
440 }
441
442 /*
443 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
444 HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW
445 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
446 */
447
448 def help() {
449
450 Map helptext = [:]
451 Map bcsConcatHelp = [:]
452 Map fastpAdapterHelp = [:]
453 Map nH = [:]
454 def uHelp = (params.help.getClass().toString() =~ /String/ ? params.help.tokenize(',').join(' ') : '')
455
456 Map defaultHelp = [
457 '--help bbmerge' : 'Show bbmerge.sh CLI options',
458 '--help fastp' : 'Show fastp CLI options',
459 '--help mash' : 'Show mash `screen` CLI options',
460 '--help tuspy' : 'Show get_top_unique_mash_hit_genomes.py CLI options',
461 '--help sourmashsketch' : 'Show sourmash `sketch` CLI options',
462 '--help sourmashgather' : 'Show sourmash `gather` CLI options',
463 '--help sourmashsearch' : 'Show sourmash `search` CLI options',
464 '--help sfhpy' : 'Show sourmash_filter_hits.py CLI options',
465 '--help kmaindex' : 'Show kma `index` CLI options',
466 '--help kmaalign' : 'Show kma CLI options',
467 '--help megahit' : 'Show megahit CLI options',
468 '--help mlst' : 'Show mlst CLI options',
469 '--help abricate' : 'Show abricate CLI options',
470 '--help salmon' : 'Show salmon `index` CLI options',
471 '--help gsrpy' : 'Show gen_salmon_res_table.py CLI options\n'
472 ]
473
474 bcsConcatHelp['--bcs_concat_pe'] = "Concatenate paired-end files. " +
475 "Default: ${params.bcs_concat_pe}"
476
477 fastpAdapterHelp['--fastp_use_custom_adapaters'] = "Use custom adapter FASTA with fastp on top of " +
478 "built-in adapter sequence auto-detection. Enabling this option will attempt to find and remove " +
479 "all possible Illumina adapter and primer sequences but will make the workflow run slow. " +
480 "Default: ${params.fastp_use_custom_adapters}"
481
482 if (params.help.getClass().toString() =~ /Boolean/ || uHelp.size() == 0) {
483 println conciseHelp('fastp,mash')
484 helptext.putAll(defaultHelp)
485 } else {
486 params.help.tokenize(',').each { h ->
487 if (defaultHelp.keySet().findAll{ it =~ /(?i)\b${h}\b/ }.size() == 0) {
488 println conciseHelp('fastp,mash')
489 stopNow("Tool [ ${h} ] is not a part of ${params.pipeline} pipeline.")
490 }
491 }
492
493 helptext.putAll(
494 fastqEntryPointHelp() +
495 bcsConcatHelp +
496 (uHelp =~ /(?i)\bbbmerge/ ? bbmergeHelp(params).text : nH) +
497 (uHelp =~ /(?i)\bfastp/ ? fastpHelp(params).text + fastpAdapterHelp : nH) +
498 (uHelp =~ /(?i)\bmash/ ? mashscreenHelp(params).text : nH) +
499 (uHelp =~ /(?i)\btuspy/ ? tuspyHelp(params).text : nH) +
500 (uHelp =~ /(?i)\bsourmashsketch/ ? sourmashsketchHelp(params).text : nH) +
501 (uHelp =~ /(?i)\bsourmashgather/ ? sourmashgatherHelp(params).text : nH) +
502 (uHelp =~ /(?i)\bsourmashsearch/ ? sourmashsearchHelp(params).text : nH) +
503 (uHelp =~ /(?i)\bsfhpy/ ? sfhpyHelp(params).text : nH) +
504 (uHelp =~ /(?i)\bkmaindex/ ? kmaindexHelp(params).text : nH) +
505 (uHelp =~ /(?i)\bkmaalign/ ? kmaalignHelp(params).text : nH) +
506 (uHelp =~ /(?i)\bmegahit/ ? megahitHelp(params).text : nH) +
507 (uHelp =~ /(?i)\bmlst/ ? mlstHelp(params).text : nH) +
508 (uHelp =~ /(?i)\babricate/ ? abricateHelp(params).text : nH) +
509 (uHelp =~ /(?i)\bsalmon/ ? salmonidxHelp(params).text : nH) +
510 (uHelp =~ /(?i)\bgsrpy/ ? gsrpyHelp(params).text : nH) +
511 wrapUpHelp()
512 )
513 }
514
515 return addPadding(helptext)
516 }