kkonganti@0
|
1 // Define any required imports for this specific workflow
|
kkonganti@0
|
2 import java.nio.file.Paths
|
kkonganti@0
|
3 import java.util.zip.GZIPInputStream
|
kkonganti@0
|
4 import java.io.FileInputStream
|
kkonganti@0
|
5 import nextflow.file.FileHelper
|
kkonganti@0
|
6
|
kkonganti@0
|
7
|
kkonganti@0
|
8 // Include any necessary methods
|
kkonganti@0
|
9 include { \
|
kkonganti@0
|
10 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
|
kkonganti@0
|
11 addPadding; wrapUpHelp } from "${params.routines}"
|
kkonganti@0
|
12 include { fastpHelp } from "${params.toolshelp}${params.fs}fastp"
|
kkonganti@0
|
13 include { kmaalignHelp } from "${params.toolshelp}${params.fs}kmaalign"
|
kkonganti@0
|
14 include { seqkitgrepHelp } from "${params.toolshelp}${params.fs}seqkitgrep"
|
kkonganti@0
|
15 include { salmonidxHelp } from "${params.toolshelp}${params.fs}salmonidx"
|
kkonganti@0
|
16 include { sourmashsketchHelp } from "${params.toolshelp}${params.fs}sourmashsketch"
|
kkonganti@0
|
17 include { sourmashgatherHelp } from "${params.toolshelp}${params.fs}sourmashgather"
|
kkonganti@0
|
18 include { sfhpyHelp } from "${params.toolshelp}${params.fs}sfhpy"
|
kkonganti@0
|
19 include { gsalkronapyHelp } from "${params.toolshelp}${params.fs}gsalkronapy"
|
kkonganti@0
|
20 include { kronaktimporttextHelp } from "${params.toolshelp}${params.fs}kronaktimporttext"
|
kkonganti@0
|
21
|
kkonganti@0
|
22 // Exit if help requested before any subworkflows
|
kkonganti@0
|
23 if (params.help) {
|
kkonganti@0
|
24 log.info help()
|
kkonganti@0
|
25 exit 0
|
kkonganti@0
|
26 }
|
kkonganti@0
|
27
|
kkonganti@0
|
28
|
kkonganti@0
|
29 // Include any necessary modules and subworkflows
|
kkonganti@0
|
30 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq"
|
kkonganti@0
|
31 include { FASTP } from "${params.modules}${params.fs}fastp${params.fs}main"
|
kkonganti@0
|
32 include { KMA_ALIGN } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main"
|
kkonganti@0
|
33 include { OTF_GENOME } from "${params.modules}${params.fs}otf_genome${params.fs}main"
|
kkonganti@0
|
34 include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main"
|
kkonganti@0
|
35 include { SALMON_INDEX } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main"
|
kkonganti@0
|
36 include { SALMON_QUANT } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main"
|
kkonganti@0
|
37 include { SOURMASH_SKETCH } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
|
kkonganti@0
|
38 include { SOURMASH_SKETCH \
|
kkonganti@0
|
39 as REDUCE_DB_IDX } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
|
kkonganti@0
|
40 include { SOURMASH_GATHER } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main"
|
kkonganti@0
|
41 include { NOWAYOUT_RESULTS } from "${params.modules}${params.fs}nowayout_results${params.fs}main"
|
kkonganti@0
|
42 include { KRONA_KTIMPORTTEXT } from "${params.modules}${params.fs}krona${params.fs}ktimporttext${params.fs}main"
|
kkonganti@0
|
43 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
|
kkonganti@0
|
44 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main"
|
kkonganti@0
|
45
|
kkonganti@0
|
46 /*
|
kkonganti@0
|
47 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
48 INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW
|
kkonganti@0
|
49 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
50 */
|
kkonganti@0
|
51
|
kkonganti@0
|
52 def reads_platform = 0
|
kkonganti@0
|
53 reads_platform += (params.input ? 1 : 0)
|
kkonganti@0
|
54
|
kkonganti@0
|
55 if (reads_platform < 1 || reads_platform == 0) {
|
kkonganti@0
|
56 stopNow("Please mention at least one absolute path to input folder which contains\n" +
|
kkonganti@0
|
57 "FASTQ files sequenced using the --input option.\n" +
|
kkonganti@0
|
58 "Ex: --input (Illumina or Generic short reads in FASTQ format)")
|
kkonganti@0
|
59 }
|
kkonganti@0
|
60
|
kkonganti@0
|
61 params.fastp_adapter_fasta ? checkMetadataExists(params.fastp_adapter_fasta, 'Adapter sequences FASTA') : null
|
kkonganti@0
|
62 checkMetadataExists(params.lineages_csv, 'Lineages CSV')
|
kkonganti@0
|
63 checkMetadataExists(params.kmaalign_idx, 'KMA Indices')
|
kkonganti@0
|
64 checkMetadataExists(params.ref_fna, 'FASTA reference')
|
kkonganti@0
|
65
|
kkonganti@0
|
66 ch_sourmash_lin = file( params.lineages_csv )
|
kkonganti@0
|
67
|
kkonganti@0
|
68
|
kkonganti@0
|
69 /*
|
kkonganti@0
|
70 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
71 RUN THE BETTERCALLSAL WORKFLOW
|
kkonganti@0
|
72 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
73 */
|
kkonganti@0
|
74
|
kkonganti@0
|
75 workflow NOWAYOUT {
|
kkonganti@0
|
76 main:
|
kkonganti@0
|
77 log.info summaryOfParams()
|
kkonganti@0
|
78
|
kkonganti@0
|
79 PROCESS_FASTQ()
|
kkonganti@0
|
80
|
kkonganti@0
|
81 PROCESS_FASTQ.out.versions
|
kkonganti@0
|
82 .set { software_versions }
|
kkonganti@0
|
83
|
kkonganti@0
|
84 PROCESS_FASTQ.out.processed_reads
|
kkonganti@0
|
85 .set { ch_processed_reads }
|
kkonganti@0
|
86
|
kkonganti@0
|
87 ch_processed_reads
|
kkonganti@0
|
88 .map { meta, fastq ->
|
kkonganti@0
|
89 meta.get_kma_hit_accs = true
|
kkonganti@0
|
90 meta.salmon_decoys = params.dummyfile
|
kkonganti@0
|
91 meta.salmon_lib_type = (params.salmonalign_libtype ?: false)
|
kkonganti@0
|
92 meta.kma_t_db = params.kmaalign_idx
|
kkonganti@0
|
93 [ meta, fastq ]
|
kkonganti@0
|
94 }
|
kkonganti@0
|
95 .filter { meta, fastq ->
|
kkonganti@0
|
96 fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] )
|
kkonganti@0
|
97 fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toAbsolutePath().toString() ) )
|
kkonganti@0
|
98 fq_gzip.read() != -1
|
kkonganti@0
|
99 }
|
kkonganti@0
|
100 .set { ch_processed_reads }
|
kkonganti@0
|
101
|
kkonganti@0
|
102 FASTP( ch_processed_reads )
|
kkonganti@0
|
103
|
kkonganti@0
|
104 FASTP.out.json
|
kkonganti@0
|
105 .map { meta, json ->
|
kkonganti@0
|
106 json
|
kkonganti@0
|
107 }
|
kkonganti@0
|
108 .collect()
|
kkonganti@0
|
109 .set { ch_multiqc }
|
kkonganti@0
|
110
|
kkonganti@0
|
111 KMA_ALIGN(
|
kkonganti@0
|
112 FASTP.out.passed_reads
|
kkonganti@0
|
113 .map { meta, fastq ->
|
kkonganti@0
|
114 [meta, fastq, []]
|
kkonganti@0
|
115 }
|
kkonganti@0
|
116 )
|
kkonganti@0
|
117
|
kkonganti@0
|
118 OTF_GENOME(
|
kkonganti@0
|
119 KMA_ALIGN.out.hits
|
kkonganti@0
|
120 .join(KMA_ALIGN.out.frags)
|
kkonganti@0
|
121 )
|
kkonganti@0
|
122
|
kkonganti@0
|
123 OTF_GENOME.out.reads_extracted
|
kkonganti@0
|
124 .filter { meta, fasta ->
|
kkonganti@0
|
125 fa_file = ( fasta.getClass().toString() =~ /ArrayList/ ? fasta : [ fasta ] )
|
kkonganti@0
|
126 fa_gzip = new GZIPInputStream( new FileInputStream( fa_file[0].toAbsolutePath().toString() ) )
|
kkonganti@0
|
127 fa_gzip.read() != -1
|
kkonganti@0
|
128 }
|
kkonganti@0
|
129 .set { ch_mito_aln_reads }
|
kkonganti@0
|
130
|
kkonganti@0
|
131 SEQKIT_GREP(
|
kkonganti@0
|
132 KMA_ALIGN.out.hits
|
kkonganti@0
|
133 .filter { meta, mapped_refs ->
|
kkonganti@0
|
134 patterns = file( mapped_refs )
|
kkonganti@0
|
135 patterns.size() > 0
|
kkonganti@0
|
136 }
|
kkonganti@0
|
137 .map { meta, mapped_refs ->
|
kkonganti@0
|
138 [meta, params.ref_fna, mapped_refs]
|
kkonganti@0
|
139 }
|
kkonganti@0
|
140 )
|
kkonganti@0
|
141
|
kkonganti@0
|
142 SALMON_INDEX( SEQKIT_GREP.out.fastx )
|
kkonganti@0
|
143
|
kkonganti@0
|
144 SALMON_QUANT(
|
kkonganti@0
|
145 ch_mito_aln_reads
|
kkonganti@0
|
146 .join( SALMON_INDEX.out.idx )
|
kkonganti@0
|
147 )
|
kkonganti@0
|
148
|
kkonganti@0
|
149 REDUCE_DB_IDX(
|
kkonganti@0
|
150 SEQKIT_GREP.out.fastx,
|
kkonganti@0
|
151 true,
|
kkonganti@0
|
152 false,
|
kkonganti@0
|
153 'db'
|
kkonganti@0
|
154 )
|
kkonganti@0
|
155
|
kkonganti@0
|
156 SOURMASH_SKETCH(
|
kkonganti@0
|
157 ch_mito_aln_reads,
|
kkonganti@0
|
158 false,
|
kkonganti@0
|
159 false,
|
kkonganti@0
|
160 'query'
|
kkonganti@0
|
161 )
|
kkonganti@0
|
162
|
kkonganti@0
|
163 SOURMASH_GATHER(
|
kkonganti@0
|
164 SOURMASH_SKETCH.out.signatures
|
kkonganti@0
|
165 .join( REDUCE_DB_IDX.out.signatures ),
|
kkonganti@0
|
166 [], [], [], []
|
kkonganti@0
|
167 )
|
kkonganti@0
|
168
|
kkonganti@0
|
169 // SOURMASH_TAX_METAGENOME(
|
kkonganti@0
|
170 // SOURMASH_GATHER.out.result
|
kkonganti@0
|
171 // .groupTuple(by: [0])
|
kkonganti@0
|
172 // .map { meta, csv ->
|
kkonganti@0
|
173 // [ meta, csv, ch_sourmash_lin ]
|
kkonganti@0
|
174 // }
|
kkonganti@0
|
175 // )
|
kkonganti@0
|
176
|
kkonganti@0
|
177 // SOURMASH_TAX_METAGENOME.out.csv
|
kkonganti@0
|
178 // .map { meta, csv ->
|
kkonganti@0
|
179 // csv
|
kkonganti@0
|
180 // }
|
kkonganti@0
|
181 // .set { ch_lin_csv }
|
kkonganti@0
|
182
|
kkonganti@0
|
183 // SOURMASH_TAX_METAGENOME.out.tsv
|
kkonganti@0
|
184 // .tap { ch_lin_krona }
|
kkonganti@0
|
185 // .map { meta, tsv ->
|
kkonganti@0
|
186 // tsv
|
kkonganti@0
|
187 // }
|
kkonganti@0
|
188 // .tap { ch_lin_tsv }
|
kkonganti@0
|
189
|
kkonganti@0
|
190 SOURMASH_GATHER.out.result
|
kkonganti@0
|
191 .groupTuple(by: [0])
|
kkonganti@0
|
192 .map { meta, csv ->
|
kkonganti@0
|
193 [ csv ]
|
kkonganti@0
|
194 }
|
kkonganti@0
|
195 .concat(
|
kkonganti@0
|
196 SALMON_QUANT.out.results
|
kkonganti@0
|
197 .map { meta, salmon_res ->
|
kkonganti@0
|
198 [ salmon_res ]
|
kkonganti@0
|
199 }
|
kkonganti@0
|
200 )
|
kkonganti@0
|
201 .concat(
|
kkonganti@0
|
202 SOURMASH_GATHER.out.failed
|
kkonganti@0
|
203 .map { meta, failed ->
|
kkonganti@0
|
204 [ failed ]
|
kkonganti@0
|
205 }
|
kkonganti@0
|
206 )
|
kkonganti@0
|
207 .concat( OTF_GENOME.out.failed )
|
kkonganti@0
|
208 .collect()
|
kkonganti@0
|
209 .flatten()
|
kkonganti@0
|
210 .collect()
|
kkonganti@0
|
211 .set { ch_gene_abn }
|
kkonganti@0
|
212
|
kkonganti@0
|
213 NOWAYOUT_RESULTS( ch_gene_abn, ch_sourmash_lin )
|
kkonganti@0
|
214
|
kkonganti@0
|
215 NOWAYOUT_RESULTS.out.tsv
|
kkonganti@0
|
216 .flatten()
|
kkonganti@0
|
217 .filter { tsv -> tsv.toString() =~ /.*${params.krona_res_suffix}$/ }
|
kkonganti@0
|
218 .map { tsv ->
|
kkonganti@0
|
219 meta = [:]
|
kkonganti@0
|
220 meta.id = "${params.cfsanpipename}_${params.pipeline}_krona"
|
kkonganti@0
|
221 [ meta, tsv ]
|
kkonganti@0
|
222 }
|
kkonganti@0
|
223 .groupTuple(by: [0])
|
kkonganti@0
|
224 .set { ch_lin_krona }
|
kkonganti@0
|
225
|
kkonganti@0
|
226 // ch_lin_tsv
|
kkonganti@0
|
227 // .mix( ch_lin_csv )
|
kkonganti@0
|
228 // .collect()
|
kkonganti@0
|
229 // .set { ch_lin_summary }
|
kkonganti@0
|
230
|
kkonganti@0
|
231 // SOURMASH_TAX_METAGENOME.out.txt
|
kkonganti@0
|
232 // .map { meta, txt ->
|
kkonganti@0
|
233 // txt
|
kkonganti@0
|
234 // }
|
kkonganti@0
|
235 // .collect()
|
kkonganti@0
|
236 // .set { ch_lin_kreport }
|
kkonganti@0
|
237
|
kkonganti@0
|
238 // NOWAYOUT_RESULTS(
|
kkonganti@0
|
239 // ch_lin_summary
|
kkonganti@0
|
240 // .concat( SOURMASH_GATHER.out.failed )
|
kkonganti@0
|
241 // .concat( OTF_GENOME.out.failed )
|
kkonganti@0
|
242 // .collect()
|
kkonganti@0
|
243 // )
|
kkonganti@0
|
244
|
kkonganti@0
|
245 KRONA_KTIMPORTTEXT( ch_lin_krona )
|
kkonganti@0
|
246
|
kkonganti@0
|
247 DUMP_SOFTWARE_VERSIONS(
|
kkonganti@0
|
248 software_versions
|
kkonganti@0
|
249 .mix (
|
kkonganti@0
|
250 FASTP.out.versions,
|
kkonganti@0
|
251 KMA_ALIGN.out.versions,
|
kkonganti@0
|
252 SEQKIT_GREP.out.versions,
|
kkonganti@0
|
253 REDUCE_DB_IDX.out.versions,
|
kkonganti@0
|
254 SOURMASH_SKETCH.out.versions,
|
kkonganti@0
|
255 SOURMASH_GATHER.out.versions,
|
kkonganti@0
|
256 SALMON_INDEX.out.versions,
|
kkonganti@0
|
257 SALMON_QUANT.out.versions,
|
kkonganti@0
|
258 NOWAYOUT_RESULTS.out.versions,
|
kkonganti@0
|
259 KRONA_KTIMPORTTEXT.out.versions
|
kkonganti@0
|
260 )
|
kkonganti@0
|
261 .unique()
|
kkonganti@0
|
262 .collectFile(name: 'collected_versions.yml')
|
kkonganti@0
|
263 )
|
kkonganti@0
|
264
|
kkonganti@0
|
265 DUMP_SOFTWARE_VERSIONS.out.mqc_yml
|
kkonganti@0
|
266 .concat(
|
kkonganti@0
|
267 ch_multiqc,
|
kkonganti@0
|
268 NOWAYOUT_RESULTS.out.mqc_yml
|
kkonganti@0
|
269 )
|
kkonganti@0
|
270 .collect()
|
kkonganti@0
|
271 .flatten()
|
kkonganti@0
|
272 .collect()
|
kkonganti@0
|
273 .set { ch_multiqc }
|
kkonganti@0
|
274
|
kkonganti@0
|
275 MULTIQC( ch_multiqc )
|
kkonganti@0
|
276 }
|
kkonganti@0
|
277
|
kkonganti@0
|
278 /*
|
kkonganti@0
|
279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
280 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
|
kkonganti@0
|
281 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
282 */
|
kkonganti@0
|
283
|
kkonganti@0
|
284 workflow.onComplete {
|
kkonganti@0
|
285 if (workflow.success) {
|
kkonganti@0
|
286 sendMail()
|
kkonganti@0
|
287 }
|
kkonganti@0
|
288 }
|
kkonganti@0
|
289
|
kkonganti@0
|
290 workflow.onError {
|
kkonganti@0
|
291 sendMail()
|
kkonganti@0
|
292 }
|
kkonganti@0
|
293
|
kkonganti@0
|
294 /*
|
kkonganti@0
|
295 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
296 METHOD TO CHECK METADATA EXISTENCE
|
kkonganti@0
|
297 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
298 */
|
kkonganti@0
|
299
|
kkonganti@0
|
300 def checkMetadataExists(file_path, msg) {
|
kkonganti@0
|
301 file_path_obj = file( file_path )
|
kkonganti@0
|
302
|
kkonganti@0
|
303 if (msg.toString().find(/(?i)KMA/)) {
|
kkonganti@0
|
304 if (!file_path_obj.parent.exists() || file_path_obj.parent.size() == 0) {
|
kkonganti@0
|
305 stopNow("Please check if your ${msg}\n" +
|
kkonganti@0
|
306 "[ ${file_path} ]\nexists and that the files are not of size 0.")
|
kkonganti@0
|
307 }
|
kkonganti@0
|
308 }
|
kkonganti@0
|
309 else if (!file_path_obj.exists() || file_path_obj.size() == 0) {
|
kkonganti@0
|
310 stopNow("Please check if your ${msg} file\n" +
|
kkonganti@0
|
311 "[ ${file_path} ]\nexists and is not of size 0.")
|
kkonganti@0
|
312 }
|
kkonganti@0
|
313 }
|
kkonganti@0
|
314
|
kkonganti@0
|
315 /*
|
kkonganti@0
|
316 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
317 HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW
|
kkonganti@0
|
318 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
kkonganti@0
|
319 */
|
kkonganti@0
|
320
|
kkonganti@0
|
321 def help() {
|
kkonganti@0
|
322
|
kkonganti@0
|
323 Map helptext = [:]
|
kkonganti@0
|
324
|
kkonganti@0
|
325 helptext.putAll (
|
kkonganti@0
|
326 fastqEntryPointHelp() +
|
kkonganti@0
|
327 fastpHelp(params).text +
|
kkonganti@0
|
328 kmaalignHelp(params).text +
|
kkonganti@0
|
329 seqkitgrepHelp(params).text +
|
kkonganti@0
|
330 salmonidxHelp(params).text +
|
kkonganti@0
|
331 sourmashsketchHelp(params).text +
|
kkonganti@0
|
332 sourmashgatherHelp(params).text +
|
kkonganti@0
|
333 sfhpyHelp(params).text +
|
kkonganti@0
|
334 gsalkronapyHelp(params).text +
|
kkonganti@0
|
335 kronaktimporttextHelp(params).text +
|
kkonganti@0
|
336 wrapUpHelp()
|
kkonganti@0
|
337 )
|
kkonganti@0
|
338
|
kkonganti@0
|
339 return addPadding(helptext)
|
kkonganti@0
|
340 }
|