Mercurial > repos > kkonganti > cfsan_cronology
diff 0.1.0/workflows/cronology.nf @ 0:c8597e9e1a97
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 27 Nov 2023 12:37:44 -0500 |
parents | |
children | 5eaaf749472c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.1.0/workflows/cronology.nf Mon Nov 27 12:37:44 2023 -0500 @@ -0,0 +1,412 @@ +// Define any required imports for this specific workflow +import java.nio.file.Paths +import nextflow.file.FileHelper + +// Include any necessary methods +include { \ + summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; conciseHelp; \ + addPadding; wrapUpHelp } from "${params.routines}" +include { dpubmlstpyHelp } from "${params.toolshelp}${params.fs}dpubmlstpy" +include { fastpHelp } from "${params.toolshelp}${params.fs}fastp" +include { mashscreenHelp } from "${params.toolshelp}${params.fs}mashscreen" +include { tuspyHelp } from "${params.toolshelp}${params.fs}tuspy" +include { spadesHelp } from "${params.toolshelp}${params.fs}spades" +include { shovillHelp } from "${params.toolshelp}${params.fs}shovill" +include { polypolishHelp } from "${params.toolshelp}${params.fs}polypolish" +include { mashtreeHelp } from "${params.toolshelp}${params.fs}mashtree" +include { quastHelp } from "${params.toolshelp}${params.fs}quast" +include { prodigalHelp } from "${params.toolshelp}${params.fs}prodigal" +include { prokkaHelp } from "${params.toolshelp}${params.fs}prokka" +include { pirateHelp } from "${params.toolshelp}${params.fs}pirate" +include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" +include { abricateHelp } from "${params.toolshelp}${params.fs}abricate" + +// Exit if help requested before any subworkflows +if (params.help) { + log.info help() + exit 0 +} + +// Include any necessary modules and subworkflows +include { PROCESS_FASTQ } from "${params.subworkflows}${params.fs}process_fastq" +include { PRODKA } from "${params.subworkflows}${params.fs}prodka" +include { DOWNLOAD_PUBMLST_SCHEME } from "${params.modules}${params.fs}download_pubmlst_scheme${params.fs}main" +include { DOWNLOAD_REF_GENOME } from "${params.modules}${params.fs}download_ref_genome${params.fs}main" +include { FASTP } from "${params.modules}${params.fs}fastp${params.fs}main" +include { MASH_SCREEN } from "${params.modules}${params.fs}mash${params.fs}screen${params.fs}main" +include { TOP_UNIQUE_SEROVARS } from "${params.modules}${params.fs}top_unique_serovars${params.fs}main" +include { CAT_UNIQUE } from "${params.modules}${params.fs}cat${params.fs}unique${params.fs}main" +include { SPADES_ASSEMBLE } from "${params.modules}${params.fs}spades${params.fs}assemble${params.fs}main" +include { SHOVILL } from "${params.modules}${params.fs}shovill${params.fs}main" +include { BWA_IDX_MEM } from "${params.modules}${params.fs}custom${params.fs}bwa_idx_mem${params.fs}main" +include { POLYPOLISH } from "${params.modules}${params.fs}polypolish${params.fs}main" +include { GUNC_RUN } from "${params.modules}${params.fs}gunc${params.fs}run${params.fs}main" +include { QUAST } from "${params.modules}${params.fs}quast${params.fs}main" +include { RMLST_POST } from "${params.modules}${params.fs}rmlst${params.fs}main" +include { PIRATE } from "${params.modules}${params.fs}pirate${params.fs}main" +include { MASHTREE } from "${params.modules}${params.fs}mashtree${params.fs}main" +include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" +include { ABRICATE_RUN } from "${params.modules}${params.fs}abricate${params.fs}run${params.fs}main" +include { ABRICATE_SUMMARY } from "${params.modules}${params.fs}abricate${params.fs}summary${params.fs}main" +include { TABLE_SUMMARY } from "${params.modules}${params.fs}cat${params.fs}tables${params.fs}main" +include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" +include { MULTIQC } from "${params.modules}${params.fs}multiqc${params.fs}main" + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + INPUTS AND ANY CHECKS FOR THE CRONOLOGY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +def spades_custom_hmm = (params.spades_hmm ? file ( "${params.spades_hmm}" ) : false) +def reads_platform = 0 +def abricate_dbs = [ 'ncbiamrplus', 'resfinder', 'megares', 'argannot' ] + +reads_platform += (params.input ? 1 : 0) + +if (spades_custom_hmm && !spades_custom_hmm.exists()) { + stopNow("Please check if the following SPAdes' custom HMM directory\n" + + "path is valid:\n${params.spades_hmm}\nCannot proceed further!") +} + +if (reads_platform < 1 || reads_platform == 0) { + stopNow("Please mention at least one absolute path to input folder which contains\n" + + "FASTQ files sequenced using the --input option.\n" + + "Ex: --input (Illumina or Generic short reads in FASTQ format)") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN THE CRONOLOGY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow CRONOLOGY { + main: + ch_mqc_custom_tbl = Channel.empty() + ch_dummy = Channel.fromPath("${params.dummyfile}") + ch_dummy2 = Channel.fromPath("${params.dummyfile2}") + + log.info summaryOfParams() + + PROCESS_FASTQ() + + PROCESS_FASTQ.out.versions + .set { software_versions } + + PROCESS_FASTQ.out.processed_reads + .map { meta, fastq -> + meta.sequence_sketch = (params.mash_sketch ?: null) + [meta, fastq] + } + .set { ch_processed_reads } + + DOWNLOAD_PUBMLST_SCHEME( params.dpubmlstpy_org ?: null ) + + DOWNLOAD_REF_GENOME( + (params.ref_acc ? ['id': params.ref_acc] : null) + ) + + FASTP( ch_processed_reads ) + + FASTP.out.passed_reads + .set { ch_processed_reads } + + FASTP.out.json + .map { meta, json -> [ json ] } + .collect() + .set { ch_multiqc } + + MASH_SCREEN( ch_processed_reads ) + + TOP_UNIQUE_SEROVARS( MASH_SCREEN.out.screened ) + + TOP_UNIQUE_SEROVARS.out.tsv + .map { meta, tsv -> tsv } + .collectFile( + name: 'iTOL_metadata_w_dups.txt', + keepHeader: true, + skip: 4, + sort: true + ) + .map { file -> + def meta = [:] + meta.id = 'Unique iTOL Metadata' + meta.skip_header = 4 + [meta, file] + } + .concat( + TOP_UNIQUE_SEROVARS.out.popup + .map { meta, popup -> popup } + .collectFile( + name: 'iTOL_2_NCBI_Pathogens_w_dups.txt', + keepHeader: true, + skip: 3, + sort: true + ) + .map { file -> + def meta = [:] + meta.id = 'Unique iTOL Popup' + meta.skip_header = 3 + [meta, file] + } + ) + .set { ch_uniq } + + TOP_UNIQUE_SEROVARS.out.accessions + .map { meta, acc -> acc } + .splitText() + .collect() + .flatten() + .unique() + .collectFile(name: 'tree_genomes.txt') + .map { genomes -> [ [id: 'hitsTree'], genomes ]} + .set { ch_genomes_fofn } + + CAT_UNIQUE( ch_uniq ) + + if (params.fq_single_end) { + SPADES_ASSEMBLE( + ch_processed_reads + .combine(ch_dummy) + .combine(ch_dummy2) + ) + + SPADES_ASSEMBLE.out.assembly + .set{ ch_assembly } + + software_versions + .mix( SPADES_ASSEMBLE.out.versions.ifEmpty(null) ) + .set { software_versions } + } else { + SHOVILL( ch_processed_reads ) + + SHOVILL.out.contigs + .set { ch_assembly } + + software_versions + .mix( SHOVILL.out.versions.ifEmpty(null) ) + .set { software_versions } + } + + if (params.polypolish_run) { + BWA_IDX_MEM( + ch_assembly + .join( ch_processed_reads ) + ) + + POLYPOLISH( + ch_assembly + .join( BWA_IDX_MEM.out.aligned_sam ) + ) + + POLYPOLISH.out.polished + .set { ch_assembly } + + software_versions + .mix(POLYPOLISH.out.versions) + } + + ch_assembly + .combine( DOWNLOAD_REF_GENOME.out.fasta ) + .combine( DOWNLOAD_REF_GENOME.out.gff ) + .multiMap { meta, consensus, fasta, gff -> + sample_fa: consensus + polished: [meta, consensus] + ref_fasta: [meta, fasta] + ref_gff: [meta, gff] + } + .set { ch_quast } + + MASHTREE( + ch_genomes_fofn, + DOWNLOAD_REF_GENOME.out.fasta + .concat( ch_quast.sample_fa ) + .collect() + ) + + PRODKA( + ch_quast.ref_fasta, + ch_quast.polished + ) + + RMLST_POST( ch_assembly ) + + MLST ( + ch_assembly + .combine( DOWNLOAD_PUBMLST_SCHEME.out.pubmlst_dir ) + ) + + QUAST( + ch_quast.polished, + ch_quast.ref_fasta, + ch_quast.ref_gff + ) + + if (params.pirate_run) { + PIRATE( + PRODKA.out.prokka_gff + .map { meta, gff -> + tuple( [id: 'Predicted Genes'], gff ) + } + .groupTuple(by: [0]) + ) + + software_versions + .mix(PIRATE.out.versions) + } + + RMLST_POST.out.tsv + .map { meta, tsv -> [ 'rmlst', tsv] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_mqc_rmlst_tbl } + + MLST.out.tsv + .map { meta, tsv -> [ 'mlst', tsv] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_mqc_custom_tbl } + + ABRICATE_RUN ( ch_assembly, abricate_dbs ) + + ABRICATE_RUN.out.abricated + .map { meta, abres -> [ abricate_dbs, abres ] } + .groupTuple(by: [0]) + .map { it -> tuple ( it[0], it[1].flatten() ) } + .set { ch_abricated } + + ABRICATE_SUMMARY ( ch_abricated ) + + ch_mqc_custom_tbl + .concat ( + ch_mqc_rmlst_tbl, + ABRICATE_SUMMARY.out.ncbiamrplus.map { it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.resfinder.map { it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.megares.map { it -> tuple ( it[0], it[1] )}, + ABRICATE_SUMMARY.out.argannot.map { it -> tuple ( it[0], it[1] )}, + ) + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ]} + .set { ch_mqc_custom_tbl } + + TABLE_SUMMARY ( ch_mqc_custom_tbl ) + + DUMP_SOFTWARE_VERSIONS ( + software_versions + .mix( + DOWNLOAD_PUBMLST_SCHEME.out.versions, + DOWNLOAD_REF_GENOME.out.versions, + FASTP.out.versions, + MASH_SCREEN.out.versions, + TOP_UNIQUE_SEROVARS.out.versions, + CAT_UNIQUE.out.versions, + MASHTREE.out.versions, + POLYPOLISH.out.versions, + QUAST.out.versions, + PRODKA.out.versions, + RMLST_POST.out.versions, + MLST.out.versions, + ABRICATE_RUN.out.versions, + ABRICATE_SUMMARY.out.versions, + TABLE_SUMMARY.out.versions + ) + .unique() + .collectFile(name: 'collected_versions.yml') + ) + + DUMP_SOFTWARE_VERSIONS.out.mqc_yml + .concat ( + ch_multiqc, + TABLE_SUMMARY.out.mqc_yml, + PRODKA.out.prokka_txt.map { meta, txt -> txt }, + QUAST.out.results.map { meta, res -> res } + ) + .collect() + .set { ch_multiqc } + + MULTIQC( ch_multiqc ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (workflow.success) { + sendMail() + } +} + +workflow.onError { + sendMail() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + HELP TEXT METHODS FOR CRONOLOGY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def help() { + + Map helptext = [:] + Map nH = [:] + Map fastpAdapterHelp = [:] + def uHelp = (params.help.getClass().toString() =~ /String/ ? params.help.tokenize(',').join(' ') : '') + + Map defaultHelp = [ + '--help dpubmlstpy' : 'Show dl_pubmlst_profiles_and_schemes.py CLI options CLI options', + '--help fastp' : 'Show fastp CLI options', + '--help spades' : 'Show mash `screen` CLI options', + '--help shovill' : 'Show shovill CLI options', + '--help polypolish' : 'Show polypolish CLI options', + '--help quast' : 'Show quast.py CLI options', + '--help prodigal' : 'Show prodigal CLI options', + '--help prokka' : 'Show prokka CLI options', + '--help pirate' : 'Show priate CLI options', + '--help mlst' : 'Show mlst CLI options', + '--help mash' : 'Show mash `screen` CLI options', + '--help tree' : 'Show mashtree CLI options', + '--help abricate' : 'Show abricate CLI options\n' + ] + + fastpAdapterHelp['--fastp_use_custom_adapaters'] = "Use custom adapter FASTA with fastp on top of " + + "built-in adapter sequence auto-detection. Enabling this option will attempt to find and remove " + + "all possible Illumina adapter and primer sequences but will make the workflow run slow. " + + "Default: ${params.fastp_use_custom_adapters}" + + if (params.help.getClass().toString() =~ /Boolean/ || uHelp.size() == 0) { + println conciseHelp('fastp,polypolish') + helptext.putAll(defaultHelp) + } else { + params.help.tokenize(',').each { h -> + if (defaultHelp.keySet().findAll{ it =~ /(?i)\b${h}\b/ }.size() == 0) { + println conciseHelp('fastp,polypolish') + stopNow("Tool [ ${h} ] is not a part of ${params.pipeline} pipeline.") + } + } + + helptext.putAll( + fastqEntryPointHelp() + + (uHelp =~ /(?i)\bdpubmlstpy/ ? dpubmlstpyHelp(params).text : nH) + + (uHelp =~ /(?i)\bfastp/ ? fastpHelp(params).text + fastpAdapterHelp : nH) + + (uHelp =~ /(?i)\bmash/ ? mashscreenHelp(params).text : nH) + + (uHelp =~ /(?i)\btuspy/ ? tuspyHelp(params).text : nH) + + (uHelp =~ /(?i)\bspades/ ? spadesHelp(params).text : nH) + + (uHelp =~ /(?i)\bshovill/ ? shovillHelp(params).text : nH) + + (uHelp =~ /(?i)\bpolypolish/ ? polypolishHelp(params).text : nH) + + (uHelp =~ /(?i)\bquast/ ? quastHelp(params).text : nH) + + (uHelp =~ /(?i)\bprodigal/ ? prodigalHelp(params).text : nH) + + (uHelp =~ /(?i)\bprokka/ ? prokkaHelp(params).text : nH) + + (uHelp =~ /(?i)\bpirate/ ? pirateHelp(params).text : nH) + + (uHelp =~ /(?i)\bmlst/ ? mlstHelp(params).text : nH) + + (uHelp =~ /(?i)\btree/ ? mashtreeHelp(params).text : nH) + + (uHelp =~ /(?i)\babricate/ ? abricateHelp(params).text : nH) + + wrapUpHelp() + ) + } + + return addPadding(helptext) +} \ No newline at end of file