Mercurial > repos > kkonganti > cfsan_bettercallsal
comparison 0.5.0/workflows/bettercallsal.nf @ 1:365849f031fd
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 05 Jun 2023 18:48:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:a4b1ee4b68b1 | 1:365849f031fd |
---|---|
1 // Define any required imports for this specific workflow | |
2 import java.nio.file.Paths | |
3 import java.util.zip.GZIPInputStream | |
4 import java.io.FileInputStream | |
5 import nextflow.file.FileHelper | |
6 | |
7 | |
8 // Include any necessary methods | |
9 include { \ | |
10 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ | |
11 addPadding; wrapUpHelp } from "${params.routines}" | |
12 include { bbmergeHelp } from "${params.toolshelp}${params.fs}bbmerge" | |
13 include { fastpHelp } from "${params.toolshelp}${params.fs}fastp" | |
14 include { mashscreenHelp } from "${params.toolshelp}${params.fs}mashscreen" | |
15 include { tuspyHelp } from "${params.toolshelp}${params.fs}tuspy" | |
16 include { sourmashsketchHelp } from "${params.toolshelp}${params.fs}sourmashsketch" | |
17 include { sourmashgatherHelp } from "${params.toolshelp}${params.fs}sourmashgather" | |
18 include { sourmashsearchHelp } from "${params.toolshelp}${params.fs}sourmashsearch" | |
19 include { sfhpyHelp } from "${params.toolshelp}${params.fs}sfhpy" | |
20 include { kmaindexHelp } from "${params.toolshelp}${params.fs}kmaindex" | |
21 include { kmaalignHelp } from "${params.toolshelp}${params.fs}kmaalign" | |
22 include { salmonidxHelp } from "${params.toolshelp}${params.fs}salmonidx" | |
23 include { gsrpyHelp } from "${params.toolshelp}${params.fs}gsrpy" | |
24 | |
25 // Exit if help requested before any subworkflows | |
26 if (params.help) { | |
27 log.info help() | |
28 exit 0 | |
29 } | |
30 | |
31 | |
32 // Include any necessary modules and subworkflows | |
33 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" | |
34 include { CAT_CAT } from "${params.modules}${params.fs}cat_cat${params.fs}main" | |
35 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" | |
36 include { BBTOOLS_BBMERGE } from "${params.modules}${params.fs}bbtools${params.fs}bbmerge${params.fs}main" | |
37 include { FASTP } from "${params.modules}${params.fs}fastp${params.fs}main" | |
38 include { MASH_SCREEN } from "${params.modules}${params.fs}mash${params.fs}screen${params.fs}main" | |
39 include { TOP_UNIQUE_SEROVARS } from "${params.modules}${params.fs}top_unique_serovars${params.fs}main" | |
40 include { SOURMASH_SKETCH } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main" | |
41 include { SOURMASH_GATHER } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main" | |
42 include { SOURMASH_SEARCH } from "${params.modules}${params.fs}sourmash${params.fs}search${params.fs}main" | |
43 include { KMA_INDEX } from "${params.modules}${params.fs}kma${params.fs}index${params.fs}main" | |
44 include { KMA_ALIGN } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main" | |
45 include { OTF_GENOME } from "${params.modules}${params.fs}otf_genome${params.fs}main" | |
46 include { SALMON_INDEX } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main" | |
47 include { SALMON_QUANT } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main" | |
48 include { SOURMASH_COMPARE } from "${params.modules}${params.fs}custom${params.fs}sourmash${params.fs}compare${params.fs}main" | |
49 include { BCS_DISTANCE_MATRIX } from "${params.modules}${params.fs}bcs_distance_matrix${params.fs}main" | |
50 include { BCS_RESULTS } from "${params.modules}${params.fs}bcs_results${params.fs}main" | |
51 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" | |
52 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" | |
53 | |
54 /* | |
55 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
56 INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW | |
57 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
58 */ | |
59 | |
60 def reads_platform = 0 | |
61 def salmon_idx_decoys = file ( "${params.salmonidx_decoys}" ) | |
62 | |
63 reads_platform += (params.input ? 1 : 0) | |
64 | |
65 if (reads_platform < 1 || reads_platform == 0) { | |
66 stopNow("Please mention at least one absolute path to input folder which contains\n" + | |
67 "FASTQ files sequenced using the --input option.\n" + | |
68 "Ex: --input (Illumina or Generic short reads in FASTQ format)") | |
69 } | |
70 | |
71 checkMetadataExists(params.mash_sketch, 'MASH sketch') | |
72 checkMetadataExists(params.tuspy_ps, 'ACC2SERO pickle') | |
73 checkMetadataExists(params.gsrpy_snp_clus_metadata, 'PDG reference target cluster metadata') | |
74 | |
75 /* | |
76 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
77 RUN THE BETTERCALLSAL WORKFLOW | |
78 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
79 */ | |
80 | |
81 workflow BETTERCALLSAL { | |
82 main: | |
83 log.info summaryOfParams() | |
84 | |
85 PROCESS_FASTQ() | |
86 | |
87 PROCESS_FASTQ | |
88 .out | |
89 .versions | |
90 .set { software_versions } | |
91 | |
92 PROCESS_FASTQ | |
93 .out | |
94 .processed_reads | |
95 .set { ch_processed_reads } | |
96 | |
97 if (params.bbmerge_run && !params.fq_single_end) { | |
98 ch_processed_reads | |
99 .map { meta, fastq -> | |
100 meta.adapters = (params.bbmerge_adapters ?: params.dummyfile) | |
101 [ meta, fastq ] | |
102 } | |
103 .set { ch_processed_reads } | |
104 | |
105 BBTOOLS_BBMERGE( ch_processed_reads ) | |
106 | |
107 BBTOOLS_BBMERGE | |
108 .out | |
109 .fastq | |
110 .map { meta, fastq -> | |
111 [ meta, [ fastq ] ] | |
112 } | |
113 .set { ch_processed_reads } | |
114 | |
115 software_versions | |
116 .mix ( BBTOOLS_BBMERGE.out.versions ) | |
117 .set { software_versions } | |
118 } | |
119 | |
120 if (params.fastp_run) { | |
121 FASTP ( ch_processed_reads ) | |
122 | |
123 FASTP | |
124 .out | |
125 .passed_reads | |
126 .set { ch_processed_reads } | |
127 | |
128 FASTP | |
129 .out | |
130 .json | |
131 .map { meta, json -> [ json ] } | |
132 .collect() | |
133 .set { ch_multiqc } | |
134 | |
135 software_versions | |
136 .mix ( FASTP.out.versions ) | |
137 .set { software_versions } | |
138 } else { | |
139 FASTQC ( ch_processed_reads ) | |
140 | |
141 FASTQC | |
142 .out | |
143 .zip | |
144 .map { meta, zip -> [ zip ] } | |
145 .collect() | |
146 .set { ch_multiqc } | |
147 | |
148 software_versions | |
149 .mix ( FASTQC.out.versions ) | |
150 .set { software_versions } | |
151 } | |
152 | |
153 if (params.bcs_concat_pe && !params.fq_single_end && !params.bbmerge_run) { | |
154 CAT_CAT ( ch_processed_reads ) | |
155 | |
156 CAT_CAT | |
157 .out | |
158 .concatenated_reads | |
159 .set { ch_processed_reads } | |
160 | |
161 software_versions | |
162 .mix ( CAT_CAT.out.versions ) | |
163 .set { software_versions } | |
164 } | |
165 | |
166 ch_processed_reads | |
167 .map { meta, fastq -> | |
168 meta.sequence_sketch = params.mash_sketch | |
169 meta.get_kma_hit_accs = true | |
170 meta.single_end = true | |
171 meta.salmon_decoys = params.dummyfile | |
172 meta.salmon_lib_type = (params.salmonalign_libtype ?: false) | |
173 [ meta, fastq ] | |
174 } | |
175 .filter { meta, fastq -> | |
176 fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] ) | |
177 fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toString() ) ) | |
178 fq_gzip.read() != -1 | |
179 } | |
180 .set { ch_processed_reads } | |
181 | |
182 MASH_SCREEN ( ch_processed_reads ) | |
183 | |
184 TOP_UNIQUE_SEROVARS ( MASH_SCREEN.out.screened ) | |
185 | |
186 TOP_UNIQUE_SEROVARS.out.genomes_fasta | |
187 .set { ch_genomes_fasta } | |
188 | |
189 TOP_UNIQUE_SEROVARS.out.failed | |
190 .set { ch_bcs_calls_failed } | |
191 | |
192 if (params.sourmashgather_run || params.sourmashsearch_run) { | |
193 SOURMASH_SKETCH ( | |
194 ch_processed_reads | |
195 .join ( ch_genomes_fasta ) | |
196 ) | |
197 | |
198 if (params.sourmashgather_run) { | |
199 SOURMASH_GATHER ( | |
200 SOURMASH_SKETCH.out.signatures, | |
201 [], [], [], [] | |
202 ) | |
203 | |
204 SOURMASH_GATHER | |
205 .out | |
206 .genomes_fasta | |
207 .set { ch_genomes_fasta } | |
208 | |
209 ch_bcs_calls_failed | |
210 .concat( SOURMASH_GATHER.out.failed ) | |
211 .set { ch_bcs_calls_failed } | |
212 | |
213 software_versions | |
214 .mix ( SOURMASH_GATHER.out.versions.ifEmpty(null) ) | |
215 .set { software_versions } | |
216 } | |
217 | |
218 if (params.sourmashsearch_run) { | |
219 SOURMASH_SEARCH ( | |
220 SOURMASH_SKETCH.out.signatures, | |
221 [] | |
222 ) | |
223 | |
224 SOURMASH_SEARCH | |
225 .out | |
226 .genomes_fasta | |
227 .set { ch_genomes_fasta } | |
228 | |
229 ch_bcs_calls_failed | |
230 .concat( SOURMASH_SEARCH.out.failed ) | |
231 .set { ch_bcs_calls_failed } | |
232 | |
233 software_versions | |
234 .mix ( SOURMASH_SEARCH.out.versions.ifEmpty(null) ) | |
235 .set { software_versions } | |
236 } | |
237 } | |
238 | |
239 KMA_INDEX ( ch_genomes_fasta ) | |
240 | |
241 KMA_ALIGN ( | |
242 ch_processed_reads | |
243 .join(KMA_INDEX.out.idx) | |
244 ) | |
245 | |
246 OTF_GENOME ( KMA_ALIGN.out.hits ) | |
247 | |
248 OTF_GENOME.out.failed | |
249 .concat( ch_bcs_calls_failed ) | |
250 .collectFile(name: 'BCS_NO_CALLS.txt') | |
251 .set { ch_bcs_no_calls } | |
252 | |
253 SALMON_INDEX ( OTF_GENOME.out.genomes_fasta ) | |
254 | |
255 SALMON_QUANT ( | |
256 ch_processed_reads | |
257 .join(SALMON_INDEX.out.idx) | |
258 ) | |
259 | |
260 SALMON_QUANT | |
261 .out | |
262 .results | |
263 .groupTuple(by: [0]) | |
264 .map { it -> tuple ( it[1].flatten() ) } | |
265 .mix ( ch_bcs_no_calls ) | |
266 .collect() | |
267 .set { ch_salmon_res_dirs } | |
268 | |
269 if (params.sourmashsketch_run) { | |
270 SOURMASH_SKETCH | |
271 .out | |
272 .signatures | |
273 .groupTuple(by: [0]) | |
274 .map { meta, qsigs, dsigs -> | |
275 [ qsigs ] | |
276 } | |
277 .collect() | |
278 .flatten() | |
279 .collect() | |
280 .set { ch_query_sigs } | |
281 | |
282 KMA_ALIGN | |
283 .out | |
284 .hits | |
285 .map { meta, hits -> | |
286 [ hits ] | |
287 } | |
288 .collect() | |
289 .flatten() | |
290 .collectFile(name: 'accessions.txt') | |
291 .set { ch_otf_genomes } | |
292 | |
293 SOURMASH_COMPARE ( ch_query_sigs, ch_otf_genomes ) | |
294 | |
295 BCS_DISTANCE_MATRIX ( | |
296 SOURMASH_COMPARE.out.matrix, | |
297 SOURMASH_COMPARE.out.labels | |
298 ) | |
299 | |
300 ch_multiqc | |
301 .concat( BCS_DISTANCE_MATRIX.out.mqc_yml ) | |
302 .set { ch_multiqc } | |
303 | |
304 software_versions | |
305 .mix ( | |
306 SOURMASH_SKETCH.out.versions.ifEmpty(null), | |
307 SOURMASH_COMPARE.out.versions.ifEmpty(null), | |
308 BCS_DISTANCE_MATRIX.out.versions.ifEmpty(null), | |
309 ) | |
310 .set { software_versions } | |
311 } | |
312 | |
313 BCS_RESULTS ( ch_salmon_res_dirs ) | |
314 | |
315 DUMP_SOFTWARE_VERSIONS ( | |
316 software_versions | |
317 .mix ( | |
318 MASH_SCREEN.out.versions, | |
319 TOP_UNIQUE_SEROVARS.out.versions, | |
320 KMA_INDEX.out.versions, | |
321 KMA_ALIGN.out.versions, | |
322 OTF_GENOME.out.versions.ifEmpty(null), | |
323 SALMON_INDEX.out.versions, | |
324 SALMON_QUANT.out.versions, | |
325 BCS_RESULTS.out.versions | |
326 ) | |
327 .unique() | |
328 .collectFile(name: 'collected_versions.yml') | |
329 ) | |
330 | |
331 DUMP_SOFTWARE_VERSIONS | |
332 .out | |
333 .mqc_yml | |
334 .concat ( | |
335 ch_multiqc, | |
336 BCS_RESULTS.out.mqc_yml, | |
337 BCS_RESULTS.out.mqc_json | |
338 ) | |
339 .collect() | |
340 .set { ch_multiqc } | |
341 | |
342 MULTIQC ( ch_multiqc ) | |
343 } | |
344 | |
345 /* | |
346 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
347 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG | |
348 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
349 */ | |
350 | |
351 workflow.onComplete { | |
352 if (workflow.success) { | |
353 sendMail() | |
354 } | |
355 } | |
356 | |
357 workflow.onError { | |
358 sendMail() | |
359 } | |
360 | |
361 /* | |
362 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
363 METHOD TO CHECK METADATA EXISTENCE | |
364 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
365 */ | |
366 | |
367 def checkMetadataExists(file_path, msg) { | |
368 file_path_obj = file( file_path ) | |
369 | |
370 if (!file_path_obj.exists() || file_path_obj.size() == 0) { | |
371 stopNow("Please check if your ${msg} file\n" + | |
372 "[ ${file_path} ]\nexists and is not of size 0.") | |
373 } | |
374 } | |
375 | |
376 /* | |
377 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
378 HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW | |
379 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
380 */ | |
381 | |
382 def help() { | |
383 | |
384 Map helptext = [:] | |
385 Map bcsConcatHelp = [:] | |
386 Map fastpAdapterHelp = [:] | |
387 | |
388 bcsConcatHelp['--bcs_concat_pe'] = "Concatenate paired-end files. " + | |
389 "Default: ${params.bcs_concat_pe}" | |
390 | |
391 fastpAdapterHelp['--fastp_use_custom_adapaters'] = "Use custom adapter FASTA with fastp on top of " + | |
392 "built-in adapter sequence auto-detection. Enabling this option will attempt to find and remove " + | |
393 "all possible Illumina adapter and primer sequences but will make the workflow run slow. " + | |
394 "Default: ${params.fastp_use_custom_adapters}" | |
395 | |
396 helptext.putAll ( | |
397 fastqEntryPointHelp() + | |
398 bcsConcatHelp + | |
399 bbmergeHelp(params).text + | |
400 fastpHelp(params).text + | |
401 fastpAdapterHelp + | |
402 mashscreenHelp(params).text + | |
403 tuspyHelp(params).text + | |
404 sourmashsketchHelp(params).text + | |
405 sourmashgatherHelp(params).text + | |
406 sourmashsearchHelp(params).text + | |
407 sfhpyHelp(params).text + | |
408 kmaindexHelp(params).text + | |
409 kmaalignHelp(params).text + | |
410 salmonidxHelp(params).text + | |
411 gsrpyHelp(params).text + | |
412 wrapUpHelp() | |
413 ) | |
414 | |
415 return addPadding(helptext) | |
416 } |