comparison 0.1.0/workflows/cronology_db.nf @ 0:c8597e9e1a97

"planemo upload"
author kkonganti
date Mon, 27 Nov 2023 12:37:44 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c8597e9e1a97
1 // Define any required imports for this specific workflow
2 import java.nio.file.Paths
3 import nextflow.file.FileHelper
4
5 // Include any necessary methods
6 include { \
7 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
8 addPadding; wrapUpHelp } from "${params.routines}"
9 include { dpubmlstpyHelp } from "${params.toolshelp}${params.fs}dpubmlstpy"
10 include { checkm2predictHelp } from "${params.toolshelp}${params.fs}checkm2predict"
11 include { guncrunHelp } from "${params.toolshelp}${params.fs}guncrun"
12 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst"
13
14 // Exit if help requested before any subworkflows
15 if (params.help) {
16 log.info help()
17 exit 0
18 }
19
20 // Include any necessary modules and subworkflows
21 include { DOWNLOAD_PDG_METADATA } from "${params.modules}${params.fs}download_pdg_metadata${params.fs}main"
22 include { DOWNLOAD_PUBMLST_SCHEME } from "${params.modules}${params.fs}download_pubmlst_scheme${params.fs}main"
23 include { FILTER_PDG_METADATA } from "${params.modules}${params.fs}filter_pdg_metadata${params.fs}main"
24 include { GUNC_RUN } from "${params.modules}${params.fs}gunc${params.fs}run${params.fs}main"
25 include { CHECKM2_PREDICT } from "${params.modules}${params.fs}checkm2${params.fs}predict${params.fs}main"
26 include { QUAL_PASSED_GENOMES } from "${params.modules}${params.fs}custom${params.fs}qual_passed_genomes${params.fs}main"
27 include { SCAFFOLD_GENOMES } from "${params.modules}${params.fs}scaffold_genomes${params.fs}main"
28 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main"
29 include { INDEX_PDG_METADATA } from "${params.modules}${params.fs}index_pdg_metadata${params.fs}main"
30 include { MASH_SKETCH } from "${params.modules}${params.fs}mash${params.fs}sketch${params.fs}main"
31 include { MASH_PASTE } from "${params.modules}${params.fs}mash${params.fs}paste${params.fs}main"
32 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
33
34 /*
35 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
36 INPUTS AND ANY CHECKS FOR THE CRONOLOGY_DB WORKFLOW
37 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38 */
39
40 if (!params.output) {
41 stopNow("Please mention the absolute UNIX path to store the DB flat files\n" +
42 "using the --output option.\n" +
43 "Ex: --output /path/to/cronology/db_files")
44 }
45
46 checkDBPathExists(params.guncrun_dbpath, 'GUNC')
47 checkDBPathExists(params.checkm2predict_dbpath, 'CheckM2')
48
49 /*
50 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
51 RUN THE CRONOLOGY_DB WORKFLOW
52 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
53 */
54
55 workflow CRONOLOGY_DB {
56 main:
57 log.info summaryOfParams()
58
59 DOWNLOAD_PDG_METADATA ( params.pdg_release ?: null )
60
61 DOWNLOAD_PDG_METADATA.out.versions
62 .set { software_versions }
63
64 DOWNLOAD_PUBMLST_SCHEME ( params.dpubmlstpy_org ?: null )
65
66 FILTER_PDG_METADATA (
67 DOWNLOAD_PDG_METADATA.out.accs
68 .splitText(by: params.genomes_chunk, file: true)
69 )
70
71 FILTER_PDG_METADATA.out.accs_chunk_tbl
72 .collectFile(sort: { acc_f -> acc_f.simpleName })
73 .multiMap { acc_chunk_file ->
74 def meta = [:]
75 meta.id = 'AssemblyQC'
76 meta.phone_ncbi = true
77 gunc: [ meta, params.guncrun_dbpath, acc_chunk_file ]
78 checkm2: [ meta, params.checkm2predict_dbpath, acc_chunk_file ]
79 }
80 .set { ch_run_qual_on_these_accs }
81
82 CHECKM2_PREDICT ( ch_run_qual_on_these_accs.checkm2 )
83
84 GUNC_RUN ( ch_run_qual_on_these_accs.gunc )
85
86 QUAL_PASSED_GENOMES (
87 CHECKM2_PREDICT.out.quality_report_passed
88 .map { meta, qual ->
89 [ qual ]
90 }
91 .collect()
92 .flatten()
93 .collectFile(name: 'checkm2_quality_passed.txt'),
94 GUNC_RUN.out.quality_report_passed
95 .map { meta, qual ->
96 [ qual ]
97 }
98 .collect()
99 .flatten()
100 .collectFile(name: 'gunc_quality_passed.txt')
101 )
102
103 SCAFFOLD_GENOMES (
104 QUAL_PASSED_GENOMES.out.accs
105 .splitText(by: params.genomes_chunk, file: true)
106 )
107
108 SCAFFOLD_GENOMES.out.scaffolded
109 .multiMap { scaffolded ->
110 def meta = [:]
111 meta.id = (params.pdg_release ?: 'NCBI Pathogen Genomes')
112 mlst: [ meta, scaffolded ]
113 mash: [ meta, scaffolded ]
114 }
115 .set { ch_scaffolded_genomes }
116
117 MLST (
118 ch_scaffolded_genomes.mlst
119 .combine( DOWNLOAD_PUBMLST_SCHEME.out.pubmlst_dir )
120 )
121
122 MLST.out.tsv
123 .map { meta, tsv ->
124 tsv
125 }
126 .collectFile(
127 name: 'mlst_results.tsv',
128 keepHeader: true,
129 skip: 1
130 )
131 .set { ch_mlst_results }
132
133 INDEX_PDG_METADATA (
134 DOWNLOAD_PDG_METADATA.out.pdg_metadata,
135 DOWNLOAD_PDG_METADATA.out.snp_cluster_metadata,
136 DOWNLOAD_PDG_METADATA.out.accs,
137 ch_mlst_results
138 )
139
140 MASH_SKETCH (
141 ch_scaffolded_genomes.mash
142 .map { it -> tuple ( it[0], it[1].flatten() ) }
143 )
144
145 MASH_PASTE (
146 MASH_SKETCH.out.sketch
147 .map { meta, sketch ->
148 [ [id: (params.pdg_release ?: 'NCBI Pathogen Genomes')], sketch ]
149 }
150 .groupTuple(by: [0])
151 )
152
153 DUMP_SOFTWARE_VERSIONS (
154 software_versions
155 .mix (
156 DOWNLOAD_PDG_METADATA.out.versions,
157 DOWNLOAD_PUBMLST_SCHEME.out.versions,
158 FILTER_PDG_METADATA.out.versions,
159 CHECKM2_PREDICT.out.versions,
160 GUNC_RUN.out.versions,
161 QUAL_PASSED_GENOMES.out.versions,
162 SCAFFOLD_GENOMES.out.versions,
163 MLST.out.versions,
164 INDEX_PDG_METADATA.out.versions,
165 MASH_SKETCH.out.versions,
166 MASH_PASTE.out.versions
167 )
168 .unique()
169 .collectFile(name: 'collected_versions.yml')
170 )
171 }
172
173 /*
174 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
176 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177 */
178
179 workflow.onComplete {
180 if (workflow.success) {
181 sendMail()
182 }
183 }
184
185 workflow.onError {
186 sendMail()
187 }
188
189 /*
190 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
191 METHOD TO CHECK IF DB PATHS EXIST
192 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193 */
194
195 def checkDBPathExists(db_path, msg) {
196 db_path_obj = file( db_path )
197
198 if (!db_path_obj.exists()) {
199 stopNow("Please check if the database path for ${msg}\n" +
200 "[ ${db_path} ]\nexists.")
201 }
202 }/*
203
204 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
205 HELP TEXT METHODS FOR CRONOLOGY WORKFLOW
206 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
207 */
208
209 def help() {
210
211 Map helptext = [:]
212
213 helptext.putAll (
214 fastqEntryPointHelp().findAll {
215 it.key =~ /Required|output|Other|Workflow|Author|Version/
216 } +
217 dpubmlstpyHelp(params).text +
218 checkm2predictHelp(params).text +
219 guncrunHelp(params).text +
220 wrapUpHelp()
221 )
222
223 return addPadding(helptext)
224 }