comparison 0.5.0/workflows/bettercallsal.nf @ 1:365849f031fd

"planemo upload"
author kkonganti
date Mon, 05 Jun 2023 18:48:51 -0400
parents
children
comparison
equal deleted inserted replaced
0:a4b1ee4b68b1 1:365849f031fd
1 // Define any required imports for this specific workflow
2 import java.nio.file.Paths
3 import java.util.zip.GZIPInputStream
4 import java.io.FileInputStream
5 import nextflow.file.FileHelper
6
7
8 // Include any necessary methods
9 include { \
10 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
11 addPadding; wrapUpHelp } from "${params.routines}"
12 include { bbmergeHelp } from "${params.toolshelp}${params.fs}bbmerge"
13 include { fastpHelp } from "${params.toolshelp}${params.fs}fastp"
14 include { mashscreenHelp } from "${params.toolshelp}${params.fs}mashscreen"
15 include { tuspyHelp } from "${params.toolshelp}${params.fs}tuspy"
16 include { sourmashsketchHelp } from "${params.toolshelp}${params.fs}sourmashsketch"
17 include { sourmashgatherHelp } from "${params.toolshelp}${params.fs}sourmashgather"
18 include { sourmashsearchHelp } from "${params.toolshelp}${params.fs}sourmashsearch"
19 include { sfhpyHelp } from "${params.toolshelp}${params.fs}sfhpy"
20 include { kmaindexHelp } from "${params.toolshelp}${params.fs}kmaindex"
21 include { kmaalignHelp } from "${params.toolshelp}${params.fs}kmaalign"
22 include { salmonidxHelp } from "${params.toolshelp}${params.fs}salmonidx"
23 include { gsrpyHelp } from "${params.toolshelp}${params.fs}gsrpy"
24
25 // Exit if help requested before any subworkflows
26 if (params.help) {
27 log.info help()
28 exit 0
29 }
30
31
32 // Include any necessary modules and subworkflows
33 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq"
34 include { CAT_CAT } from "${params.modules}${params.fs}cat_cat${params.fs}main"
35 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main"
36 include { BBTOOLS_BBMERGE } from "${params.modules}${params.fs}bbtools${params.fs}bbmerge${params.fs}main"
37 include { FASTP } from "${params.modules}${params.fs}fastp${params.fs}main"
38 include { MASH_SCREEN } from "${params.modules}${params.fs}mash${params.fs}screen${params.fs}main"
39 include { TOP_UNIQUE_SEROVARS } from "${params.modules}${params.fs}top_unique_serovars${params.fs}main"
40 include { SOURMASH_SKETCH } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
41 include { SOURMASH_GATHER } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main"
42 include { SOURMASH_SEARCH } from "${params.modules}${params.fs}sourmash${params.fs}search${params.fs}main"
43 include { KMA_INDEX } from "${params.modules}${params.fs}kma${params.fs}index${params.fs}main"
44 include { KMA_ALIGN } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main"
45 include { OTF_GENOME } from "${params.modules}${params.fs}otf_genome${params.fs}main"
46 include { SALMON_INDEX } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main"
47 include { SALMON_QUANT } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main"
48 include { SOURMASH_COMPARE } from "${params.modules}${params.fs}custom${params.fs}sourmash${params.fs}compare${params.fs}main"
49 include { BCS_DISTANCE_MATRIX } from "${params.modules}${params.fs}bcs_distance_matrix${params.fs}main"
50 include { BCS_RESULTS } from "${params.modules}${params.fs}bcs_results${params.fs}main"
51 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
52 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main"
53
54 /*
55 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
56 INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW
57 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
58 */
59
60 def reads_platform = 0
61 def salmon_idx_decoys = file ( "${params.salmonidx_decoys}" )
62
63 reads_platform += (params.input ? 1 : 0)
64
65 if (reads_platform < 1 || reads_platform == 0) {
66 stopNow("Please mention at least one absolute path to input folder which contains\n" +
67 "FASTQ files sequenced using the --input option.\n" +
68 "Ex: --input (Illumina or Generic short reads in FASTQ format)")
69 }
70
71 checkMetadataExists(params.mash_sketch, 'MASH sketch')
72 checkMetadataExists(params.tuspy_ps, 'ACC2SERO pickle')
73 checkMetadataExists(params.gsrpy_snp_clus_metadata, 'PDG reference target cluster metadata')
74
75 /*
76 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
77 RUN THE BETTERCALLSAL WORKFLOW
78 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
79 */
80
81 workflow BETTERCALLSAL {
82 main:
83 log.info summaryOfParams()
84
85 PROCESS_FASTQ()
86
87 PROCESS_FASTQ
88 .out
89 .versions
90 .set { software_versions }
91
92 PROCESS_FASTQ
93 .out
94 .processed_reads
95 .set { ch_processed_reads }
96
97 if (params.bbmerge_run && !params.fq_single_end) {
98 ch_processed_reads
99 .map { meta, fastq ->
100 meta.adapters = (params.bbmerge_adapters ?: params.dummyfile)
101 [ meta, fastq ]
102 }
103 .set { ch_processed_reads }
104
105 BBTOOLS_BBMERGE( ch_processed_reads )
106
107 BBTOOLS_BBMERGE
108 .out
109 .fastq
110 .map { meta, fastq ->
111 [ meta, [ fastq ] ]
112 }
113 .set { ch_processed_reads }
114
115 software_versions
116 .mix ( BBTOOLS_BBMERGE.out.versions )
117 .set { software_versions }
118 }
119
120 if (params.fastp_run) {
121 FASTP ( ch_processed_reads )
122
123 FASTP
124 .out
125 .passed_reads
126 .set { ch_processed_reads }
127
128 FASTP
129 .out
130 .json
131 .map { meta, json -> [ json ] }
132 .collect()
133 .set { ch_multiqc }
134
135 software_versions
136 .mix ( FASTP.out.versions )
137 .set { software_versions }
138 } else {
139 FASTQC ( ch_processed_reads )
140
141 FASTQC
142 .out
143 .zip
144 .map { meta, zip -> [ zip ] }
145 .collect()
146 .set { ch_multiqc }
147
148 software_versions
149 .mix ( FASTQC.out.versions )
150 .set { software_versions }
151 }
152
153 if (params.bcs_concat_pe && !params.fq_single_end && !params.bbmerge_run) {
154 CAT_CAT ( ch_processed_reads )
155
156 CAT_CAT
157 .out
158 .concatenated_reads
159 .set { ch_processed_reads }
160
161 software_versions
162 .mix ( CAT_CAT.out.versions )
163 .set { software_versions }
164 }
165
166 ch_processed_reads
167 .map { meta, fastq ->
168 meta.sequence_sketch = params.mash_sketch
169 meta.get_kma_hit_accs = true
170 meta.single_end = true
171 meta.salmon_decoys = params.dummyfile
172 meta.salmon_lib_type = (params.salmonalign_libtype ?: false)
173 [ meta, fastq ]
174 }
175 .filter { meta, fastq ->
176 fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] )
177 fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toString() ) )
178 fq_gzip.read() != -1
179 }
180 .set { ch_processed_reads }
181
182 MASH_SCREEN ( ch_processed_reads )
183
184 TOP_UNIQUE_SEROVARS ( MASH_SCREEN.out.screened )
185
186 TOP_UNIQUE_SEROVARS.out.genomes_fasta
187 .set { ch_genomes_fasta }
188
189 TOP_UNIQUE_SEROVARS.out.failed
190 .set { ch_bcs_calls_failed }
191
192 if (params.sourmashgather_run || params.sourmashsearch_run) {
193 SOURMASH_SKETCH (
194 ch_processed_reads
195 .join ( ch_genomes_fasta )
196 )
197
198 if (params.sourmashgather_run) {
199 SOURMASH_GATHER (
200 SOURMASH_SKETCH.out.signatures,
201 [], [], [], []
202 )
203
204 SOURMASH_GATHER
205 .out
206 .genomes_fasta
207 .set { ch_genomes_fasta }
208
209 ch_bcs_calls_failed
210 .concat( SOURMASH_GATHER.out.failed )
211 .set { ch_bcs_calls_failed }
212
213 software_versions
214 .mix ( SOURMASH_GATHER.out.versions.ifEmpty(null) )
215 .set { software_versions }
216 }
217
218 if (params.sourmashsearch_run) {
219 SOURMASH_SEARCH (
220 SOURMASH_SKETCH.out.signatures,
221 []
222 )
223
224 SOURMASH_SEARCH
225 .out
226 .genomes_fasta
227 .set { ch_genomes_fasta }
228
229 ch_bcs_calls_failed
230 .concat( SOURMASH_SEARCH.out.failed )
231 .set { ch_bcs_calls_failed }
232
233 software_versions
234 .mix ( SOURMASH_SEARCH.out.versions.ifEmpty(null) )
235 .set { software_versions }
236 }
237 }
238
239 KMA_INDEX ( ch_genomes_fasta )
240
241 KMA_ALIGN (
242 ch_processed_reads
243 .join(KMA_INDEX.out.idx)
244 )
245
246 OTF_GENOME ( KMA_ALIGN.out.hits )
247
248 OTF_GENOME.out.failed
249 .concat( ch_bcs_calls_failed )
250 .collectFile(name: 'BCS_NO_CALLS.txt')
251 .set { ch_bcs_no_calls }
252
253 SALMON_INDEX ( OTF_GENOME.out.genomes_fasta )
254
255 SALMON_QUANT (
256 ch_processed_reads
257 .join(SALMON_INDEX.out.idx)
258 )
259
260 SALMON_QUANT
261 .out
262 .results
263 .groupTuple(by: [0])
264 .map { it -> tuple ( it[1].flatten() ) }
265 .mix ( ch_bcs_no_calls )
266 .collect()
267 .set { ch_salmon_res_dirs }
268
269 if (params.sourmashsketch_run) {
270 SOURMASH_SKETCH
271 .out
272 .signatures
273 .groupTuple(by: [0])
274 .map { meta, qsigs, dsigs ->
275 [ qsigs ]
276 }
277 .collect()
278 .flatten()
279 .collect()
280 .set { ch_query_sigs }
281
282 KMA_ALIGN
283 .out
284 .hits
285 .map { meta, hits ->
286 [ hits ]
287 }
288 .collect()
289 .flatten()
290 .collectFile(name: 'accessions.txt')
291 .set { ch_otf_genomes }
292
293 SOURMASH_COMPARE ( ch_query_sigs, ch_otf_genomes )
294
295 BCS_DISTANCE_MATRIX (
296 SOURMASH_COMPARE.out.matrix,
297 SOURMASH_COMPARE.out.labels
298 )
299
300 ch_multiqc
301 .concat( BCS_DISTANCE_MATRIX.out.mqc_yml )
302 .set { ch_multiqc }
303
304 software_versions
305 .mix (
306 SOURMASH_SKETCH.out.versions.ifEmpty(null),
307 SOURMASH_COMPARE.out.versions.ifEmpty(null),
308 BCS_DISTANCE_MATRIX.out.versions.ifEmpty(null),
309 )
310 .set { software_versions }
311 }
312
313 BCS_RESULTS ( ch_salmon_res_dirs )
314
315 DUMP_SOFTWARE_VERSIONS (
316 software_versions
317 .mix (
318 MASH_SCREEN.out.versions,
319 TOP_UNIQUE_SEROVARS.out.versions,
320 KMA_INDEX.out.versions,
321 KMA_ALIGN.out.versions,
322 OTF_GENOME.out.versions.ifEmpty(null),
323 SALMON_INDEX.out.versions,
324 SALMON_QUANT.out.versions,
325 BCS_RESULTS.out.versions
326 )
327 .unique()
328 .collectFile(name: 'collected_versions.yml')
329 )
330
331 DUMP_SOFTWARE_VERSIONS
332 .out
333 .mqc_yml
334 .concat (
335 ch_multiqc,
336 BCS_RESULTS.out.mqc_yml,
337 BCS_RESULTS.out.mqc_json
338 )
339 .collect()
340 .set { ch_multiqc }
341
342 MULTIQC ( ch_multiqc )
343 }
344
345 /*
346 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
347 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
348 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
349 */
350
351 workflow.onComplete {
352 if (workflow.success) {
353 sendMail()
354 }
355 }
356
357 workflow.onError {
358 sendMail()
359 }
360
361 /*
362 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
363 METHOD TO CHECK METADATA EXISTENCE
364 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
365 */
366
367 def checkMetadataExists(file_path, msg) {
368 file_path_obj = file( file_path )
369
370 if (!file_path_obj.exists() || file_path_obj.size() == 0) {
371 stopNow("Please check if your ${msg} file\n" +
372 "[ ${file_path} ]\nexists and is not of size 0.")
373 }
374 }
375
376 /*
377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
378 HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW
379 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
380 */
381
382 def help() {
383
384 Map helptext = [:]
385 Map bcsConcatHelp = [:]
386 Map fastpAdapterHelp = [:]
387
388 bcsConcatHelp['--bcs_concat_pe'] = "Concatenate paired-end files. " +
389 "Default: ${params.bcs_concat_pe}"
390
391 fastpAdapterHelp['--fastp_use_custom_adapaters'] = "Use custom adapter FASTA with fastp on top of " +
392 "built-in adapter sequence auto-detection. Enabling this option will attempt to find and remove " +
393 "all possible Illumina adapter and primer sequences but will make the workflow run slow. " +
394 "Default: ${params.fastp_use_custom_adapters}"
395
396 helptext.putAll (
397 fastqEntryPointHelp() +
398 bcsConcatHelp +
399 bbmergeHelp(params).text +
400 fastpHelp(params).text +
401 fastpAdapterHelp +
402 mashscreenHelp(params).text +
403 tuspyHelp(params).text +
404 sourmashsketchHelp(params).text +
405 sourmashgatherHelp(params).text +
406 sourmashsearchHelp(params).text +
407 sfhpyHelp(params).text +
408 kmaindexHelp(params).text +
409 kmaalignHelp(params).text +
410 salmonidxHelp(params).text +
411 gsrpyHelp(params).text +
412 wrapUpHelp()
413 )
414
415 return addPadding(helptext)
416 }