comparison 1.0.0/workflows/bettercallsal_lr.nf @ 0:801b85b03a17 draft default tip

planemo upload
author galaxytrakr
date Thu, 28 May 2026 20:31:42 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:801b85b03a17
1 // Define any required imports for this specific workflow
2 import java.nio.file.Paths
3 import java.util.zip.GZIPInputStream
4 import java.io.FileInputStream
5 import nextflow.file.FileHelper
6
7
8 // Include any necessary methods
9 include { \
10 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; conciseHelp; \
11 addPadding; wrapUpHelp } from "${params.routines}"
12 include { filtlongHelp } from "${params.toolshelp}${params.fs}filtlong"
13 include { mashscreenHelp } from "${params.toolshelp}${params.fs}mashscreen"
14 include { tuspyHelp } from "${params.toolshelp}${params.fs}tuspy"
15 include { sourmashsketchHelp } from "${params.toolshelp}${params.fs}sourmashsketch"
16 include { sourmashgatherHelp } from "${params.toolshelp}${params.fs}sourmashgather"
17 include { sourmashsearchHelp } from "${params.toolshelp}${params.fs}sourmashsearch"
18 include { sfhpyHelp } from "${params.toolshelp}${params.fs}sfhpy"
19 include { flyeHelp } from "${params.toolshelp}${params.fs}flye"
20 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst"
21 include { abricateHelp } from "${params.toolshelp}${params.fs}abricate"
22 include { gsrpyHelp } from "${params.toolshelp}${params.fs}gsrpy"
23
24 // Exit if help requested before any subworkflows
25 if (params.help) {
26 log.info help()
27 exit 0
28 }
29
30
31 // Include any necessary modules and subworkflows
32 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq"
33 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main"
34 include { FILTLONG } from "${params.modules}${params.fs}filtlong${params.fs}main"
35 include { MASH_SCREEN } from "${params.modules}${params.fs}mash${params.fs}screen${params.fs}main"
36 include { TOP_UNIQUE_SEROVARS } from "${params.modules}${params.fs}top_unique_serovars${params.fs}main"
37 include { SOURMASH_SKETCH } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
38 include { SOURMASH_GATHER } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main"
39 include { SOURMASH_SEARCH } from "${params.modules}${params.fs}sourmash${params.fs}search${params.fs}main"
40 include { GATHER_HITS } from "${params.modules}${params.fs}gather_hits${params.fs}main"
41 include { OTF_GENOME } from "${params.modules}${params.fs}otf_genome${params.fs}main"
42 include { FLYE_ASSEMBLE } from "${params.modules}${params.fs}flye${params.fs}assemble${params.fs}main"
43 include { MINIMAP2_ALIGN } from "${params.modules}${params.fs}minimap2${params.fs}align${params.fs}main"
44 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main"
45 include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main"
46 include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main"
47 include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main"
48 include { SALMON_QUANT } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main"
49 include { SOURMASH_COMPARE } from "${params.modules}${params.fs}custom${params.fs}sourmash${params.fs}compare${params.fs}main"
50 include { BCS_DISTANCE_MATRIX } from "${params.modules}${params.fs}bcs_distance_matrix${params.fs}main"
51 include { BCS_RESULTS } from "${params.modules}${params.fs}bcs_results${params.fs}main"
52 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
53 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main"
54
55 /*
56 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57 INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW
58 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 */
60
61 def reads_platform = 0
62 def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ]
63
64 reads_platform += (params.input ? 1 : 0)
65
66 if (reads_platform < 1 || reads_platform == 0) {
67 stopNow("Please mention at least one absolute path to input folder which contains\n" +
68 "FASTQ files sequenced using the --input option.\n" +
69 "Ex: --input (Illumina or Generic short reads in FASTQ format)")
70 }
71
72 checkMetadataExists(params.mash_sketch, 'MASH sketch')
73 checkMetadataExists(params.tuspy_ps, 'ACC2SERO pickle')
74 checkMetadataExists(params.gsrpy_snp_clus_metadata, 'PDG reference target cluster metadata')
75
76 /*
77 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
78 RUN THE BETTERCALLSAL_LR WORKFLOW
79 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
80 */
81
82 workflow BETTERCALLSAL_LR {
83 main:
84 log.info summaryOfParams()
85
86 aaaa = Channel.empty()
87
88 PROCESS_FASTQ()
89
90 PROCESS_FASTQ.out.versions
91 .set { software_versions }
92
93 PROCESS_FASTQ.out.processed_reads
94 .tap { ch_fpass_processed_reads }
95 .map { meta, fastq ->
96 meta.single_end = params.fq_single_end
97 [ meta, [], fastq ]
98 }
99 .set { ch_processed_reads_lr }
100
101 FILTLONG ( ch_processed_reads_lr )
102
103 FILTLONG.out.filtered_reads
104 .map { meta, fastq ->
105 def meta2 = [:]
106 meta2.id = meta.id.toString() + '.filtered'
107 meta2.single_end = meta.single_end
108 meta2.strandedness = meta.strandedness
109 [ meta2, fastq ]
110 }
111 .set { ch_processed_reads_fqc }
112
113 FILTLONG.out.log
114 .map { meta, log -> [ log ] }
115 .collect()
116 .set { ch_multiqc }
117
118 FASTQC (
119 ch_fpass_processed_reads
120 .map { meta, fastq ->
121 def meta2 = [:]
122 meta2.id = meta.id.toString() + '.raw'
123 meta2.single_end = meta.single_end
124 meta2.strandedness = meta.strandedness
125 [ meta2, fastq ]
126 }
127 .concat ( ch_processed_reads_fqc )
128 )
129
130 FASTQC.out.zip
131 .map { meta, zip -> [ zip ] }
132 .collect()
133 .set { ch_fqc_mqc }
134
135 FILTLONG.out.filtered_reads
136 .map { meta, fastq ->
137 meta.sequence_sketch = params.mash_sketch
138 meta.single_end = true
139 meta.salmon_alignment_mode = true
140 meta.salmon_decoys = params.dummyfile
141 meta.salmon_lib_type = (params.salmonalign_libtype ?: false)
142 [ meta, fastq ]
143 }
144 .filter { meta, fastq ->
145 fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] )
146 fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toString() ) )
147 fq_gzip.read() != -1
148 }
149 .set { ch_processed_reads }
150
151 MASH_SCREEN ( ch_processed_reads )
152
153 TOP_UNIQUE_SEROVARS ( MASH_SCREEN.out.screened )
154
155 TOP_UNIQUE_SEROVARS.out.genomes_fasta
156 .set { ch_genomes_fasta }
157
158 TOP_UNIQUE_SEROVARS.out.failed
159 .set { ch_bcs_calls_failed }
160
161 if (params.sourmashgather_run || params.sourmashsearch_run) {
162 SOURMASH_SKETCH (
163 ch_processed_reads
164 .join ( ch_genomes_fasta )
165 )
166
167 if (params.sourmashgather_run) {
168 SOURMASH_GATHER (
169 SOURMASH_SKETCH.out.signatures,
170 [], [], [], []
171 )
172
173 SOURMASH_GATHER.out.genomes_fasta
174 .set { ch_genomes_fasta }
175
176 ch_bcs_calls_failed
177 .concat ( SOURMASH_GATHER.out.failed )
178 .set { ch_bcs_calls_failed }
179
180 software_versions
181 .mix ( SOURMASH_GATHER.out.versions.ifEmpty(null) )
182 .set { software_versions }
183 }
184
185 if (params.sourmashsearch_run) {
186 SOURMASH_SEARCH (
187 SOURMASH_SKETCH.out.signatures,
188 []
189 )
190
191 SOURMASH_SEARCH.out.genomes_fasta
192 .set { ch_genomes_fasta }
193
194 ch_bcs_calls_failed
195 .concat ( SOURMASH_SEARCH.out.failed )
196 .set { ch_bcs_calls_failed }
197
198 software_versions
199 .mix ( SOURMASH_SEARCH.out.versions.ifEmpty(null) )
200 .set { software_versions }
201 }
202 }
203
204 GATHER_HITS ( ch_genomes_fasta )
205
206 OTF_GENOME (
207 GATHER_HITS.out.sm_template_hits
208 .map { meta, hits ->
209 [meta, hits, []]
210 }
211 )
212
213 OTF_GENOME.out.failed
214 .concat ( ch_bcs_calls_failed )
215 .collectFile( name: 'BCS_NO_CALLS.txt' )
216 .set { ch_bcs_no_calls }
217
218 OTF_GENOME.out.genomes_fasta
219 .join ( ch_processed_reads )
220 .multiMap { meta, genomes, filtered ->
221 reads: [meta, filtered]
222 assmb: [meta, genomes]
223 }
224 .set { ch_assemble_these }
225
226 MINIMAP2_ALIGN (
227 ch_assemble_these.reads,
228 ch_assemble_these.assmb,
229 params.mm2_align_bam,
230 params.mm2_align_bam_sorted,
231 params.mm2_align_cigar_paf,
232 params.mm2_align_cigar_bam
233 )
234
235 SALMON_QUANT (
236 MINIMAP2_ALIGN.out.bam
237 .join ( ch_assemble_these.assmb )
238 )
239
240 SALMON_QUANT.out.results
241 .groupTuple(by: [0])
242 .map { it -> tuple ( it[1].flatten() ) }
243 .mix ( ch_bcs_no_calls )
244 .collect()
245 .set { ch_salmon_res_dirs }
246
247 if (params.sourmashsketch_run) {
248 SOURMASH_SKETCH.out.signatures
249 .groupTuple(by: [0])
250 .map { meta, qsigs, dsigs -> [ qsigs ] }
251 .collect()
252 .flatten()
253 .collect()
254 .set { ch_query_sigs }
255
256 GATHER_HITS.out.sm_template_hits
257 .map { meta, hits -> [ hits ] }
258 .collect()
259 .flatten()
260 .collectFile(name: 'accessions.txt')
261 .set { ch_otf_genomes }
262
263 if (params.flye_run) {
264
265 FLYE_ASSEMBLE ( ch_assemble_these.reads )
266
267 FLYE_ASSEMBLE.out.assembly
268 .set { ch_asm_polished_contigs }
269
270 MLST ( ch_asm_polished_contigs )
271
272 MLST.out.tsv
273 .map { meta, tsv -> [ 'mlst', tsv] }
274 .groupTuple(by: [0])
275 .map { it -> tuple ( it[0], it[1].flatten() ) }
276 .set { ch_mqc_custom_tbl }
277
278 ABRICATE_RUN (
279 ch_asm_polished_contigs,
280 abricate_dbs
281 )
282
283 ABRICATE_RUN.out.abricated
284 .map { meta, abres -> [ abricate_dbs, abres ] }
285 .groupTuple(by: [0])
286 .map { it -> tuple ( it[0], it[1].flatten() ) }
287 .set { ch_abricated }
288
289 ABRICATE_SUMMARY ( ch_abricated )
290
291 ch_mqc_custom_tbl
292 .concat (
293 ABRICATE_SUMMARY.out.ncbiamrplus.map { it -> tuple ( it[0], it[1] )},
294 ABRICATE_SUMMARY.out.resfinder.map { it -> tuple ( it[0], it[1] )},
295 ABRICATE_SUMMARY.out.megares.map { it -> tuple ( it[0], it[1] )},
296 ABRICATE_SUMMARY.out.argannot.map { it -> tuple ( it[0], it[1] )},
297 )
298 .groupTuple(by: [0])
299 .map { it -> [ it[0], it[1].flatten() ]}
300 .set { ch_mqc_custom_tbl }
301
302 TABLE_SUMMARY ( ch_mqc_custom_tbl )
303
304 ch_multiqc
305 .concat ( TABLE_SUMMARY.out.mqc_yml )
306 .set { ch_multiqc }
307
308 software_versions
309 .mix (
310 FLYE_ASSEMBLE.out.versions.ifEmpty(null),
311 MLST.out.versions.ifEmpty(null),
312 ABRICATE_RUN.out.versions.ifEmpty(null),
313 ABRICATE_SUMMARY.out.versions.ifEmpty(null),
314 TABLE_SUMMARY.out.versions.ifEmpty(null)
315 )
316 .set { software_versions }
317 }
318
319 SOURMASH_COMPARE ( ch_query_sigs, ch_otf_genomes )
320
321 BCS_DISTANCE_MATRIX (
322 SOURMASH_COMPARE.out.matrix,
323 SOURMASH_COMPARE.out.labels
324 )
325
326 ch_multiqc
327 .concat ( BCS_DISTANCE_MATRIX.out.mqc_yml )
328 .set { ch_multiqc }
329
330 software_versions
331 .mix (
332 SOURMASH_SKETCH.out.versions.ifEmpty(null),
333 SOURMASH_COMPARE.out.versions.ifEmpty(null),
334 BCS_DISTANCE_MATRIX.out.versions.ifEmpty(null),
335 )
336 .set { software_versions }
337 }
338
339 BCS_RESULTS ( ch_salmon_res_dirs )
340
341 DUMP_SOFTWARE_VERSIONS (
342 software_versions
343 .mix (
344 FILTLONG.out.versions,
345 FASTQC.out.versions,
346 MASH_SCREEN.out.versions,
347 TOP_UNIQUE_SEROVARS.out.versions,
348 GATHER_HITS.out.versions,
349 OTF_GENOME.out.versions.ifEmpty(null),
350 MINIMAP2_ALIGN.out.versions,
351 SALMON_QUANT.out.versions,
352 BCS_RESULTS.out.versions
353 )
354 .unique()
355 .collectFile(name: 'collected_versions.yml')
356 )
357
358 if (params.multiqc_run) {
359 DUMP_SOFTWARE_VERSIONS.out.mqc_yml
360 .concat (
361 ch_multiqc,
362 ch_fqc_mqc,
363 BCS_RESULTS.out.mqc_yml,
364 BCS_RESULTS.out.mqc_json
365 )
366 .collect()
367 .set { ch_multiqc }
368
369 MULTIQC ( ch_multiqc )
370 }
371
372 }
373
374 /*
375 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
376 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
378 */
379
380 workflow.onComplete {
381 if (workflow.success) {
382 sendMail()
383 }
384 }
385
386 workflow.onError {
387 sendMail()
388 }
389
390 /*
391 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
392 METHOD TO CHECK METADATA EXISTENCE
393 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
394 */
395
396 def checkMetadataExists(file_path, msg) {
397 file_path_obj = file( file_path )
398
399 if (!file_path_obj.exists() || file_path_obj.size() == 0) {
400 stopNow("Please check if your ${msg} file\n" +
401 "[ ${file_path} ]\nexists and is not of size 0.")
402 }
403 }
404
405 /*
406 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
407 HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW
408 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
409 */
410
411 def help() {
412
413 Map helptext = [:]
414 Map nH = [:]
415 def uHelp = (params.help.getClass().toString() =~ /String/ ? params.help.tokenize(',').join(' ') : '')
416
417 Map defaultHelp = [
418 '--help filtlong' : 'Show filtlong CLI options',
419 '--help mash' : 'Show mash `screen` CLI options',
420 '--help tuspy' : 'Show get_top_unique_mash_hit_genomes.py CLI options',
421 '--help sourmashsketch' : 'Show sourmash `sketch` CLI options',
422 '--help sourmashgather' : 'Show sourmash `gather` CLI options',
423 '--help sourmashsearch' : 'Show sourmash `search` CLI options',
424 '--help sfhpy' : 'Show sourmash_filter_hits.py CLI options',
425 '--help flye' : 'Show flye CLI options',
426 '--help mlst' : 'Show mlst CLI options',
427 '--help abricate' : 'Show abricate CLI options',
428 '--help gsrpy' : 'Show gen_salmon_res_table.py CLI options\n'
429 ]
430
431 if (params.help.getClass().toString() =~ /Boolean/ || uHelp.size() == 0) {
432 println conciseHelp('fastp,mash')
433 helptext.putAll(defaultHelp)
434 } else {
435 params.help.tokenize(',').each { h ->
436 if (defaultHelp.keySet().findAll{ it =~ /(?i)\b${h}\b/ }.size() == 0) {
437 println conciseHelp('fastp,mash')
438 stopNow("Tool [ ${h} ] is not a part of ${params.pipeline} pipeline.")
439 }
440 }
441
442 helptext.putAll(
443 fastqEntryPointHelp() +
444 (uHelp =~ /(?i)\bfiltlong/ ? filtlongHelp(params).text : nH) +
445 (uHelp =~ /(?i)\bmash/ ? mashscreenHelp(params).text : nH) +
446 (uHelp =~ /(?i)\btuspy/ ? tuspyHelp(params).text : nH) +
447 (uHelp =~ /(?i)\bsourmashsketch/ ? sourmashsketchHelp(params).text : nH) +
448 (uHelp =~ /(?i)\bsourmashgather/ ? sourmashgatherHelp(params).text : nH) +
449 (uHelp =~ /(?i)\bsourmashsearch/ ? sourmashsearchHelp(params).text : nH) +
450 (uHelp =~ /(?i)\bsfhpy/ ? sfhpyHelp(params).text : nH) +
451 (uHelp =~ /(?i)\bflye/ ? flyeHelp(params).text : nH) +
452 (uHelp =~ /(?i)\bmlst/ ? mlstHelp(params).text : nH) +
453 (uHelp =~ /(?i)\babricate/ ? abricateHelp(params).text : nH) +
454 (uHelp =~ /(?i)\bgsrpy/ ? gsrpyHelp(params).text : nH) +
455 wrapUpHelp()
456 )
457 }
458
459 return addPadding(helptext)
460 }