Mercurial > repos > kkonganti > hfp_nowayout
comparison 0.5.0/workflows/nowayout.nf @ 0:97cd2f532efe
planemo upload
author | kkonganti |
---|---|
date | Mon, 31 Mar 2025 14:50:40 -0400 |
parents | |
children | 3539fbeb4230 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:97cd2f532efe |
---|---|
1 // Define any required imports for this specific workflow | |
2 import java.nio.file.Paths | |
3 import java.util.zip.GZIPInputStream | |
4 import java.io.FileInputStream | |
5 import nextflow.file.FileHelper | |
6 | |
7 | |
8 // Include any necessary methods | |
9 include { \ | |
10 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ | |
11 addPadding; wrapUpHelp } from "${params.routines}" | |
12 include { fastpHelp } from "${params.toolshelp}${params.fs}fastp" | |
13 include { kmaalignHelp } from "${params.toolshelp}${params.fs}kmaalign" | |
14 include { seqkitgrepHelp } from "${params.toolshelp}${params.fs}seqkitgrep" | |
15 include { salmonidxHelp } from "${params.toolshelp}${params.fs}salmonidx" | |
16 include { sourmashsketchHelp } from "${params.toolshelp}${params.fs}sourmashsketch" | |
17 include { sourmashgatherHelp } from "${params.toolshelp}${params.fs}sourmashgather" | |
18 include { sfhpyHelp } from "${params.toolshelp}${params.fs}sfhpy" | |
19 include { gsalkronapyHelp } from "${params.toolshelp}${params.fs}gsalkronapy" | |
20 include { kronaktimporttextHelp } from "${params.toolshelp}${params.fs}kronaktimporttext" | |
21 | |
22 // Exit if help requested before any subworkflows | |
23 if (params.help) { | |
24 log.info help() | |
25 exit 0 | |
26 } | |
27 | |
28 | |
29 // Include any necessary modules and subworkflows | |
30 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" | |
31 include { FASTP } from "${params.modules}${params.fs}fastp${params.fs}main" | |
32 include { KMA_ALIGN } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main" | |
33 include { OTF_GENOME } from "${params.modules}${params.fs}otf_genome${params.fs}main" | |
34 include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" | |
35 include { SALMON_INDEX } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main" | |
36 include { SALMON_QUANT } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main" | |
37 include { SOURMASH_SKETCH } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main" | |
38 include { SOURMASH_SKETCH \ | |
39 as REDUCE_DB_IDX } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main" | |
40 include { SOURMASH_GATHER } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main" | |
41 include { NOWAYOUT_RESULTS } from "${params.modules}${params.fs}nowayout_results${params.fs}main" | |
42 include { KRONA_KTIMPORTTEXT } from "${params.modules}${params.fs}krona${params.fs}ktimporttext${params.fs}main" | |
43 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" | |
44 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" | |
45 | |
46 /* | |
47 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
48 INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW | |
49 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
50 */ | |
51 | |
52 def reads_platform = 0 | |
53 reads_platform += (params.input ? 1 : 0) | |
54 | |
55 if (reads_platform < 1 || reads_platform == 0) { | |
56 stopNow("Please mention at least one absolute path to input folder which contains\n" + | |
57 "FASTQ files sequenced using the --input option.\n" + | |
58 "Ex: --input (Illumina or Generic short reads in FASTQ format)") | |
59 } | |
60 | |
61 params.fastp_adapter_fasta ? checkMetadataExists(params.fastp_adapter_fasta, 'Adapter sequences FASTA') : null | |
62 checkMetadataExists(params.lineages_csv, 'Lineages CSV') | |
63 checkMetadataExists(params.kmaalign_idx, 'KMA Indices') | |
64 checkMetadataExists(params.ref_fna, 'FASTA reference') | |
65 | |
66 ch_sourmash_lin = file( params.lineages_csv ) | |
67 | |
68 | |
69 /* | |
70 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
71 RUN THE BETTERCALLSAL WORKFLOW | |
72 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
73 */ | |
74 | |
75 workflow NOWAYOUT { | |
76 main: | |
77 log.info summaryOfParams() | |
78 | |
79 PROCESS_FASTQ() | |
80 | |
81 PROCESS_FASTQ.out.versions | |
82 .set { software_versions } | |
83 | |
84 PROCESS_FASTQ.out.processed_reads | |
85 .set { ch_processed_reads } | |
86 | |
87 ch_processed_reads | |
88 .map { meta, fastq -> | |
89 meta.get_kma_hit_accs = true | |
90 meta.salmon_decoys = params.dummyfile | |
91 meta.salmon_lib_type = (params.salmonalign_libtype ?: false) | |
92 meta.kma_t_db = params.kmaalign_idx | |
93 [ meta, fastq ] | |
94 } | |
95 .filter { meta, fastq -> | |
96 fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] ) | |
97 fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toAbsolutePath().toString() ) ) | |
98 fq_gzip.read() != -1 | |
99 } | |
100 .set { ch_processed_reads } | |
101 | |
102 FASTP( ch_processed_reads ) | |
103 | |
104 FASTP.out.json | |
105 .map { meta, json -> | |
106 json | |
107 } | |
108 .collect() | |
109 .set { ch_multiqc } | |
110 | |
111 KMA_ALIGN( | |
112 FASTP.out.passed_reads | |
113 .map { meta, fastq -> | |
114 [meta, fastq, []] | |
115 } | |
116 ) | |
117 | |
118 OTF_GENOME( | |
119 KMA_ALIGN.out.hits | |
120 .join(KMA_ALIGN.out.frags) | |
121 ) | |
122 | |
123 OTF_GENOME.out.reads_extracted | |
124 .filter { meta, fasta -> | |
125 fa_file = ( fasta.getClass().toString() =~ /ArrayList/ ? fasta : [ fasta ] ) | |
126 fa_gzip = new GZIPInputStream( new FileInputStream( fa_file[0].toAbsolutePath().toString() ) ) | |
127 fa_gzip.read() != -1 | |
128 } | |
129 .set { ch_mito_aln_reads } | |
130 | |
131 SEQKIT_GREP( | |
132 KMA_ALIGN.out.hits | |
133 .filter { meta, mapped_refs -> | |
134 patterns = file( mapped_refs ) | |
135 patterns.size() > 0 | |
136 } | |
137 .map { meta, mapped_refs -> | |
138 [meta, params.ref_fna, mapped_refs] | |
139 } | |
140 ) | |
141 | |
142 SALMON_INDEX( SEQKIT_GREP.out.fastx ) | |
143 | |
144 SALMON_QUANT( | |
145 ch_mito_aln_reads | |
146 .join( SALMON_INDEX.out.idx ) | |
147 ) | |
148 | |
149 REDUCE_DB_IDX( | |
150 SEQKIT_GREP.out.fastx, | |
151 true, | |
152 false, | |
153 'db' | |
154 ) | |
155 | |
156 SOURMASH_SKETCH( | |
157 ch_mito_aln_reads, | |
158 false, | |
159 false, | |
160 'query' | |
161 ) | |
162 | |
163 SOURMASH_GATHER( | |
164 SOURMASH_SKETCH.out.signatures | |
165 .join( REDUCE_DB_IDX.out.signatures ), | |
166 [], [], [], [] | |
167 ) | |
168 | |
169 // SOURMASH_TAX_METAGENOME( | |
170 // SOURMASH_GATHER.out.result | |
171 // .groupTuple(by: [0]) | |
172 // .map { meta, csv -> | |
173 // [ meta, csv, ch_sourmash_lin ] | |
174 // } | |
175 // ) | |
176 | |
177 // SOURMASH_TAX_METAGENOME.out.csv | |
178 // .map { meta, csv -> | |
179 // csv | |
180 // } | |
181 // .set { ch_lin_csv } | |
182 | |
183 // SOURMASH_TAX_METAGENOME.out.tsv | |
184 // .tap { ch_lin_krona } | |
185 // .map { meta, tsv -> | |
186 // tsv | |
187 // } | |
188 // .tap { ch_lin_tsv } | |
189 | |
190 SOURMASH_GATHER.out.result | |
191 .groupTuple(by: [0]) | |
192 .map { meta, csv -> | |
193 [ csv ] | |
194 } | |
195 .concat( | |
196 SALMON_QUANT.out.results | |
197 .map { meta, salmon_res -> | |
198 [ salmon_res ] | |
199 } | |
200 ) | |
201 .concat( | |
202 SOURMASH_GATHER.out.failed | |
203 .map { meta, failed -> | |
204 [ failed ] | |
205 } | |
206 ) | |
207 .concat( OTF_GENOME.out.failed ) | |
208 .collect() | |
209 .flatten() | |
210 .collect() | |
211 .set { ch_gene_abn } | |
212 | |
213 NOWAYOUT_RESULTS( ch_gene_abn, ch_sourmash_lin ) | |
214 | |
215 NOWAYOUT_RESULTS.out.tsv | |
216 .flatten() | |
217 .filter { tsv -> tsv.toString() =~ /.*${params.krona_res_suffix}$/ } | |
218 .map { tsv -> | |
219 meta = [:] | |
220 meta.id = "${params.cfsanpipename}_${params.pipeline}_krona" | |
221 [ meta, tsv ] | |
222 } | |
223 .groupTuple(by: [0]) | |
224 .set { ch_lin_krona } | |
225 | |
226 // ch_lin_tsv | |
227 // .mix( ch_lin_csv ) | |
228 // .collect() | |
229 // .set { ch_lin_summary } | |
230 | |
231 // SOURMASH_TAX_METAGENOME.out.txt | |
232 // .map { meta, txt -> | |
233 // txt | |
234 // } | |
235 // .collect() | |
236 // .set { ch_lin_kreport } | |
237 | |
238 // NOWAYOUT_RESULTS( | |
239 // ch_lin_summary | |
240 // .concat( SOURMASH_GATHER.out.failed ) | |
241 // .concat( OTF_GENOME.out.failed ) | |
242 // .collect() | |
243 // ) | |
244 | |
245 KRONA_KTIMPORTTEXT( ch_lin_krona ) | |
246 | |
247 DUMP_SOFTWARE_VERSIONS( | |
248 software_versions | |
249 .mix ( | |
250 FASTP.out.versions, | |
251 KMA_ALIGN.out.versions, | |
252 SEQKIT_GREP.out.versions, | |
253 REDUCE_DB_IDX.out.versions, | |
254 SOURMASH_SKETCH.out.versions, | |
255 SOURMASH_GATHER.out.versions, | |
256 SALMON_INDEX.out.versions, | |
257 SALMON_QUANT.out.versions, | |
258 NOWAYOUT_RESULTS.out.versions, | |
259 KRONA_KTIMPORTTEXT.out.versions | |
260 ) | |
261 .unique() | |
262 .collectFile(name: 'collected_versions.yml') | |
263 ) | |
264 | |
265 DUMP_SOFTWARE_VERSIONS.out.mqc_yml | |
266 .concat( | |
267 ch_multiqc, | |
268 NOWAYOUT_RESULTS.out.mqc_yml | |
269 ) | |
270 .collect() | |
271 .flatten() | |
272 .collect() | |
273 .set { ch_multiqc } | |
274 | |
275 MULTIQC( ch_multiqc ) | |
276 } | |
277 | |
278 /* | |
279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
280 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG | |
281 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
282 */ | |
283 | |
284 workflow.onComplete { | |
285 if (workflow.success) { | |
286 sendMail() | |
287 } | |
288 } | |
289 | |
290 workflow.onError { | |
291 sendMail() | |
292 } | |
293 | |
294 /* | |
295 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
296 METHOD TO CHECK METADATA EXISTENCE | |
297 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
298 */ | |
299 | |
300 def checkMetadataExists(file_path, msg) { | |
301 file_path_obj = file( file_path ) | |
302 | |
303 if (msg.toString().find(/(?i)KMA/)) { | |
304 if (!file_path_obj.parent.exists() || file_path_obj.parent.size() == 0) { | |
305 stopNow("Please check if your ${msg}\n" + | |
306 "[ ${file_path} ]\nexists and that the files are not of size 0.") | |
307 } | |
308 } | |
309 else if (!file_path_obj.exists() || file_path_obj.size() == 0) { | |
310 stopNow("Please check if your ${msg} file\n" + | |
311 "[ ${file_path} ]\nexists and is not of size 0.") | |
312 } | |
313 } | |
314 | |
315 /* | |
316 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
317 HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW | |
318 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
319 */ | |
320 | |
321 def help() { | |
322 | |
323 Map helptext = [:] | |
324 | |
325 helptext.putAll ( | |
326 fastqEntryPointHelp() + | |
327 fastpHelp(params).text + | |
328 kmaalignHelp(params).text + | |
329 seqkitgrepHelp(params).text + | |
330 salmonidxHelp(params).text + | |
331 sourmashsketchHelp(params).text + | |
332 sourmashgatherHelp(params).text + | |
333 sfhpyHelp(params).text + | |
334 gsalkronapyHelp(params).text + | |
335 kronaktimporttextHelp(params).text + | |
336 wrapUpHelp() | |
337 ) | |
338 | |
339 return addPadding(helptext) | |
340 } |