Mercurial > repos > galaxytrakr > hfp_centriflaken_awsbatch
comparison 0.4.2/workflows/centriflaken_hy.nf @ 0:082e0091e813 draft default tip
planemo upload
| author | galaxytrakr |
|---|---|
| date | Fri, 29 May 2026 13:27:47 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:082e0091e813 |
|---|---|
| 1 // Define any required imports for this specific workflow | |
| 2 import java.nio.file.Paths | |
| 3 import nextflow.file.FileHelper | |
| 4 | |
| 5 // Include any necessary methods | |
| 6 include { \ | |
| 7 summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ | |
| 8 addPadding; wrapUpHelp } from "${params.routines}" | |
| 9 include { seqkitrmdupHelp } from "${params.toolshelp}${params.fs}seqkitrmdup" | |
| 10 include { kraken2Help } from "${params.toolshelp}${params.fs}kraken2" | |
| 11 include { centrifugeHelp } from "${params.toolshelp}${params.fs}centrifuge" | |
| 12 include { megahitHelp } from "${params.toolshelp}${params.fs}megahit" | |
| 13 include { spadesHelp } from "${params.toolshelp}${params.fs}spades" | |
| 14 include { serotypefinderHelp } from "${params.toolshelp}${params.fs}serotypefinder" | |
| 15 include { seqsero2Help } from "${params.toolshelp}${params.fs}seqsero2" | |
| 16 include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" | |
| 17 include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" | |
| 18 | |
| 19 // Exit if help requested before any subworkflows | |
| 20 if (params.help) { | |
| 21 log.info help() | |
| 22 exit 0 | |
| 23 } | |
| 24 | |
| 25 // Include any necessary modules and subworkflows | |
| 26 include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" | |
| 27 include { FASTQC } from "${params.modules}${params.fs}fastqc${params.fs}main" | |
| 28 include { SEQKIT_RMDUP } from "${params.modules}${params.fs}seqkit${params.fs}rmdup${params.fs}main" | |
| 29 include { CENTRIFUGE_CLASSIFY } from "${params.modules}${params.fs}centrifuge${params.fs}classify${params.fs}main" | |
| 30 include { CENTRIFUGE_PROCESS } from "${params.modules}${params.fs}centrifuge${params.fs}process${params.fs}main" | |
| 31 include { SEQKIT_GREP } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main" | |
| 32 include { MEGAHIT_ASSEMBLE } from "${params.modules}${params.fs}megahit${params.fs}assemble${params.fs}main" | |
| 33 include { SPADES_ASSEMBLE } from "${params.modules}${params.fs}spades${params.fs}assemble${params.fs}main" | |
| 34 include { KRAKEN2_CLASSIFY } from "${params.modules}${params.fs}kraken2${params.fs}classify${params.fs}main" | |
| 35 include { KRAKEN2_EXTRACT_CONTIGS } from "${params.modules}${params.fs}kraken2${params.fs}extract_contigs${params.fs}main" | |
| 36 include { SEROTYPEFINDER } from "${params.modules}${params.fs}serotypefinder${params.fs}main" | |
| 37 include { SEQSERO2 } from "${params.modules}${params.fs}seqsero2${params.fs}main" | |
| 38 include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" | |
| 39 include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" | |
| 40 include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" | |
| 41 include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" | |
| 42 include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" | |
| 43 include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" | |
| 44 | |
| 45 | |
| 46 | |
| 47 /* | |
| 48 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 49 INPUTS AND ANY CHECKS FOR THE CENTRIFLAKEN-HY WORKFLOW | |
| 50 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 51 */ | |
| 52 | |
| 53 def kraken2_db_dir = file ( "${params.kraken2_db}" ) | |
| 54 def centrifuge_x = file ( "${params.centrifuge_x}" ) | |
| 55 def spades_custom_hmm = (params.spades_hmm ? file ( "${params.spades_hmm}" ) : false) | |
| 56 def reads_platform = 0 | |
| 57 def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] | |
| 58 | |
| 59 reads_platform += (params.input ? 1 : 0) | |
| 60 | |
| 61 if (!kraken2_db_dir.exists() || !centrifuge_x.getParent().exists()) { | |
| 62 stopNow("Please check if the following absolute paths are valid:\n" + | |
| 63 "${params.kraken2_db}\n${params.centrifuge_x}\n" + | |
| 64 "Cannot proceed further!") | |
| 65 } | |
| 66 | |
| 67 if (spades_custom_hmm && !spades_custom_hmm.exists()) { | |
| 68 stopNow("Please check if the following SPAdes' custom HMM directory\n" + | |
| 69 "path is valid:\n${params.spades_hmm}\nCannot proceed further!") | |
| 70 } | |
| 71 | |
| 72 if (reads_platform < 1 || reads_platform == 0) { | |
| 73 stopNow("Please mention at least one absolute path to input folder which contains\n" + | |
| 74 "FASTQ files sequenced using the --input option.\n" + | |
| 75 "Ex: --input (Illumina or Generic short reads in FASTQ format)") | |
| 76 } | |
| 77 | |
| 78 if (params.centrifuge_extract_bug != params.kraken2_extract_bug) { | |
| 79 stopNow("Please make sure that the bug to be extracted is same\n" + | |
| 80 "for both --centrifuge_extract_bug and --kraken2_extract_bug options.") | |
| 81 } | |
| 82 | |
| 83 /* | |
| 84 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 85 RUN THE CENTRIFLAKEN-HY WORKFLOW | |
| 86 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 87 */ | |
| 88 | |
| 89 workflow CENTRIFLAKEN_HY { | |
| 90 main: | |
| 91 ch_asm_filtered_contigs = Channel.empty() | |
| 92 ch_mqc_custom_tbl = Channel.empty() | |
| 93 ch_dummy = Channel.fromPath("${params.dummyfile}") | |
| 94 ch_dummy2 = Channel.fromPath("${params.dummyfile2}") | |
| 95 | |
| 96 log.info summaryOfParams() | |
| 97 | |
| 98 PROCESS_FASTQ() | |
| 99 .processed_reads | |
| 100 .map { | |
| 101 meta, fastq -> | |
| 102 meta.centrifuge_x = params.centrifuge_x | |
| 103 meta.kraken2_db = params.kraken2_db | |
| 104 [meta, fastq] | |
| 105 } | |
| 106 .set { ch_processed_reads } | |
| 107 | |
| 108 PROCESS_FASTQ | |
| 109 .out | |
| 110 .versions | |
| 111 .set { software_versions } | |
| 112 | |
| 113 FASTQC ( ch_processed_reads ) | |
| 114 | |
| 115 if (params.seqkit_rmdup_run) { | |
| 116 SEQKIT_RMDUP ( ch_processed_reads ) | |
| 117 | |
| 118 SEQKIT_RMDUP | |
| 119 .out | |
| 120 .fastx | |
| 121 .set { ch_processed_reads } | |
| 122 | |
| 123 software_versions | |
| 124 .mix ( SEQKIT_RMDUP.out.versions.ifEmpty(null) ) | |
| 125 .set { software_versions } | |
| 126 } | |
| 127 | |
| 128 CENTRIFUGE_CLASSIFY ( ch_processed_reads ) | |
| 129 | |
| 130 CENTRIFUGE_PROCESS ( | |
| 131 CENTRIFUGE_CLASSIFY.out.report | |
| 132 .join( CENTRIFUGE_CLASSIFY.out.output ) | |
| 133 ) | |
| 134 | |
| 135 ch_processed_reads.join ( CENTRIFUGE_PROCESS.out.extracted ) | |
| 136 .set { ch_centrifuge_extracted } | |
| 137 | |
| 138 SEQKIT_GREP ( ch_centrifuge_extracted ) | |
| 139 | |
| 140 // As of 06/02/2022, with the upcoming newer versions of NextFlow, we will be able to do | |
| 141 // allowNull: true for both input and output, but until then, we have to use dummy files. | |
| 142 // and work arounds. | |
| 143 // https://github.com/nextflow-io/nextflow/pull/2893 | |
| 144 if (params.spades_run) { | |
| 145 SPADES_ASSEMBLE ( | |
| 146 SEQKIT_GREP.out.fastx | |
| 147 .combine(ch_dummy) | |
| 148 .combine(ch_dummy2) | |
| 149 ) | |
| 150 | |
| 151 SPADES_ASSEMBLE | |
| 152 .out | |
| 153 .assembly | |
| 154 .set { ch_assembly } | |
| 155 | |
| 156 software_versions | |
| 157 .mix ( SPADES_ASSEMBLE.out.versions.ifEmpty(null) ) | |
| 158 .set { software_versions } | |
| 159 } else if (params.megahit_run) { | |
| 160 MEGAHIT_ASSEMBLE ( | |
| 161 SEQKIT_GREP.out.fastx | |
| 162 ) | |
| 163 | |
| 164 MEGAHIT_ASSEMBLE | |
| 165 .out | |
| 166 .assembly | |
| 167 .set { ch_assembly } | |
| 168 | |
| 169 software_versions | |
| 170 .mix ( MEGAHIT_ASSEMBLE.out.versions.ifEmpty(null) ) | |
| 171 .set { software_versions } | |
| 172 } | |
| 173 | |
| 174 ch_assembly | |
| 175 .map { | |
| 176 meta, fastq -> | |
| 177 meta.is_assembly = true | |
| 178 [meta, fastq] | |
| 179 } | |
| 180 .set { ch_assembly } | |
| 181 | |
| 182 ch_assembly.ifEmpty { [ false, false ] } | |
| 183 | |
| 184 KRAKEN2_CLASSIFY ( ch_assembly ) | |
| 185 | |
| 186 KRAKEN2_EXTRACT_CONTIGS ( | |
| 187 ch_assembly | |
| 188 .join( KRAKEN2_CLASSIFY.out.kraken_output ), | |
| 189 params.kraken2_extract_bug | |
| 190 ) | |
| 191 | |
| 192 KRAKEN2_EXTRACT_CONTIGS | |
| 193 .out | |
| 194 .asm_filtered_contigs | |
| 195 .map { | |
| 196 meta, fastq -> | |
| 197 meta.organism = params.kraken2_extract_bug.split(/\s+/)[0].capitalize() | |
| 198 meta.serotypefinder_db = params.serotypefinder_db | |
| 199 [meta, fastq] | |
| 200 } | |
| 201 .set { ch_asm_filtered_contigs } | |
| 202 | |
| 203 SEROTYPEFINDER ( ch_asm_filtered_contigs ) | |
| 204 | |
| 205 SEQSERO2 ( ch_asm_filtered_contigs ) | |
| 206 | |
| 207 MLST ( ch_asm_filtered_contigs ) | |
| 208 | |
| 209 ABRICATE_RUN ( | |
| 210 ch_asm_filtered_contigs, | |
| 211 abricate_dbs | |
| 212 ) | |
| 213 | |
| 214 ABRICATE_RUN | |
| 215 .out | |
| 216 .abricated | |
| 217 .map { meta, abres -> [ abricate_dbs, abres ] } | |
| 218 .groupTuple(by: [0]) | |
| 219 .map { it -> tuple ( it[0], it[1].flatten() ) } | |
| 220 .set { ch_abricated } | |
| 221 | |
| 222 ABRICATE_SUMMARY ( ch_abricated ) | |
| 223 | |
| 224 CENTRIFUGE_CLASSIFY.out.kreport | |
| 225 .map { meta, kreport -> [ kreport ] } | |
| 226 .flatten() | |
| 227 .concat ( | |
| 228 KRAKEN2_CLASSIFY.out.kraken_report | |
| 229 .map { meta, kreport -> [ kreport ] } | |
| 230 .flatten(), | |
| 231 FASTQC.out.zip | |
| 232 .map { meta, zip -> [ zip ] } | |
| 233 .flatten() | |
| 234 ) | |
| 235 .set { ch_mqc_classify } | |
| 236 | |
| 237 if (params.serotypefinder_run) { | |
| 238 SEROTYPEFINDER | |
| 239 .out | |
| 240 .serotyped | |
| 241 .map { meta, tsv -> [ 'serotypefinder', tsv ] } | |
| 242 .groupTuple(by: [0]) | |
| 243 .map { it -> tuple ( it[0], it[1].flatten() ) } | |
| 244 .set { ch_mqc_custom_tbl } | |
| 245 } else if (params.seqsero2_run) { | |
| 246 SEQSERO2 | |
| 247 .out | |
| 248 .serotyped | |
| 249 .map { meta, tsv -> [ 'seqsero2', tsv ] } | |
| 250 .groupTuple(by: [0]) | |
| 251 .map { it -> tuple ( it[0], it[1].flatten() ) } | |
| 252 .set { ch_mqc_custom_tbl } | |
| 253 } | |
| 254 | |
| 255 ch_mqc_custom_tbl | |
| 256 .concat ( | |
| 257 ABRICATE_SUMMARY.out.ncbiamrplus.map{ it -> tuple ( it[0], it[1] )}, | |
| 258 ABRICATE_SUMMARY.out.resfinder.map{ it -> tuple ( it[0], it[1] )}, | |
| 259 ABRICATE_SUMMARY.out.megares.map{ it -> tuple ( it[0], it[1] )}, | |
| 260 ABRICATE_SUMMARY.out.argannot.map{ it -> tuple ( it[0], it[1] )}, | |
| 261 ) | |
| 262 .groupTuple(by: [0]) | |
| 263 .map { it -> [ it[0], it[1].flatten() ]} | |
| 264 .set { ch_mqc_custom_tbl } | |
| 265 | |
| 266 TABLE_SUMMARY ( ch_mqc_custom_tbl ) | |
| 267 | |
| 268 DUMP_SOFTWARE_VERSIONS ( | |
| 269 software_versions | |
| 270 .mix ( | |
| 271 FASTQC.out.versions, | |
| 272 CENTRIFUGE_CLASSIFY.out.versions, | |
| 273 CENTRIFUGE_PROCESS.out.versions, | |
| 274 SEQKIT_GREP.out.versions, | |
| 275 KRAKEN2_CLASSIFY.out.versions.ifEmpty(null), | |
| 276 KRAKEN2_EXTRACT_CONTIGS.out.versions.ifEmpty(null), | |
| 277 SEROTYPEFINDER.out.versions.ifEmpty(null), | |
| 278 SEQSERO2.out.versions.ifEmpty(null), | |
| 279 MLST.out.versions.ifEmpty(null), | |
| 280 ABRICATE_RUN.out.versions.ifEmpty(null), | |
| 281 ABRICATE_SUMMARY.out.versions.ifEmpty(null), | |
| 282 TABLE_SUMMARY.out.versions.ifEmpty(null) | |
| 283 ) | |
| 284 .unique() | |
| 285 .collectFile(name: 'collected_versions.yml') | |
| 286 ) | |
| 287 | |
| 288 DUMP_SOFTWARE_VERSIONS | |
| 289 .out | |
| 290 .mqc_yml | |
| 291 .concat ( | |
| 292 ch_mqc_classify, | |
| 293 TABLE_SUMMARY.out.mqc_yml | |
| 294 ) | |
| 295 .collect() | |
| 296 .set { ch_multiqc } | |
| 297 | |
| 298 MULTIQC ( ch_multiqc ) | |
| 299 } | |
| 300 | |
| 301 /* | |
| 302 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 303 ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG | |
| 304 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 305 */ | |
| 306 | |
| 307 workflow.onComplete { | |
| 308 if (workflow.success) { | |
| 309 // CREATE APPROPRIATE DIRECTORIES AND MOVE AS REQUESTED BY STAKEHOLDER(S) | |
| 310 // | |
| 311 // Nextflow's .moveTo will error out if directories contain files and it | |
| 312 // would be complex to include logic to skip directories | |
| 313 // | |
| 314 def final_intermediate_dir = "${params.output}${params.fs}${params.pipeline}-steps" | |
| 315 def final_results_dir = "${params.output}${params.fs}${params.pipeline}-results" | |
| 316 def kraken2_ext_contigs = file( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs", type: 'dir' ) | |
| 317 def final_intermediate = file( final_intermediate_dir, type: 'dir' ) | |
| 318 def final_results = file( final_results_dir, type: 'dir' ) | |
| 319 def pipeline_output = file( params.output, type: 'dir' ) | |
| 320 | |
| 321 if ( !final_intermediate.exists() ) { | |
| 322 final_intermediate.mkdirs() | |
| 323 | |
| 324 FileHelper.visitFiles(Paths.get("${params.output}"), '*') { | |
| 325 if ( !(it.name ==~ /^(${params.cfsanpipename}|multiqc|\.nextflow|${workflow.workDir.name}|${params.pipeline}).*/) ) { | |
| 326 FileHelper.movePath( | |
| 327 it, Paths.get( "${final_intermediate_dir}${params.fs}${it.name}" ) | |
| 328 ) | |
| 329 } | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 if ( kraken2_ext_contigs.exists() && !final_results.exists() ) { | |
| 334 final_results.mkdirs() | |
| 335 | |
| 336 FileHelper.movePath( | |
| 337 Paths.get( "${final_intermediate_dir}${params.fs}kraken2_extract_contigs" ), | |
| 338 Paths.get( "${final_results_dir}${params.fs}kraken2_extract_contigs" ) | |
| 339 ) | |
| 340 } | |
| 341 | |
| 342 sendMail() | |
| 343 } | |
| 344 } | |
| 345 | |
| 346 workflow.onError { | |
| 347 sendMail() | |
| 348 } | |
| 349 | |
| 350 /* | |
| 351 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 352 HELPER METHODS FOR CENTRIFLAKEN-HY WORKFLOW | |
| 353 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 354 */ | |
| 355 | |
| 356 def help() { | |
| 357 | |
| 358 Map helptext = [:] | |
| 359 | |
| 360 helptext.putAll ( | |
| 361 fastqEntryPointHelp() + | |
| 362 seqkitrmdupHelp(params).text + | |
| 363 kraken2Help(params).text + | |
| 364 centrifugeHelp(params).text + | |
| 365 megahitHelp(params).text + | |
| 366 spadesHelp(params).text + | |
| 367 serotypefinderHelp(params).text + | |
| 368 seqsero2Help(params).text + | |
| 369 mlstHelp(params).text + | |
| 370 abricateHelp(params).text + | |
| 371 wrapUpHelp() | |
| 372 ) | |
| 373 | |
| 374 return addPadding(helptext) | |
| 375 } |
