Mercurial > repos > kkonganti > cfsan_cronology
diff 0.1.0/workflows/cronology_db.nf @ 0:c8597e9e1a97
"planemo upload"
author | kkonganti |
---|---|
date | Mon, 27 Nov 2023 12:37:44 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/0.1.0/workflows/cronology_db.nf Mon Nov 27 12:37:44 2023 -0500 @@ -0,0 +1,224 @@ +// Define any required imports for this specific workflow +import java.nio.file.Paths +import nextflow.file.FileHelper + +// Include any necessary methods +include { \ + summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \ + addPadding; wrapUpHelp } from "${params.routines}" +include { dpubmlstpyHelp } from "${params.toolshelp}${params.fs}dpubmlstpy" +include { checkm2predictHelp } from "${params.toolshelp}${params.fs}checkm2predict" +include { guncrunHelp } from "${params.toolshelp}${params.fs}guncrun" +include { mlstHelp } from "${params.toolshelp}${params.fs}mlst" + +// Exit if help requested before any subworkflows +if (params.help) { + log.info help() + exit 0 +} + +// Include any necessary modules and subworkflows +include { DOWNLOAD_PDG_METADATA } from "${params.modules}${params.fs}download_pdg_metadata${params.fs}main" +include { DOWNLOAD_PUBMLST_SCHEME } from "${params.modules}${params.fs}download_pubmlst_scheme${params.fs}main" +include { FILTER_PDG_METADATA } from "${params.modules}${params.fs}filter_pdg_metadata${params.fs}main" +include { GUNC_RUN } from "${params.modules}${params.fs}gunc${params.fs}run${params.fs}main" +include { CHECKM2_PREDICT } from "${params.modules}${params.fs}checkm2${params.fs}predict${params.fs}main" +include { QUAL_PASSED_GENOMES } from "${params.modules}${params.fs}custom${params.fs}qual_passed_genomes${params.fs}main" +include { SCAFFOLD_GENOMES } from "${params.modules}${params.fs}scaffold_genomes${params.fs}main" +include { MLST } from "${params.modules}${params.fs}mlst${params.fs}main" +include { INDEX_PDG_METADATA } from "${params.modules}${params.fs}index_pdg_metadata${params.fs}main" +include { MASH_SKETCH } from "${params.modules}${params.fs}mash${params.fs}sketch${params.fs}main" +include { MASH_PASTE } from "${params.modules}${params.fs}mash${params.fs}paste${params.fs}main" +include { DUMP_SOFTWARE_VERSIONS } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main" + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + INPUTS AND ANY CHECKS FOR THE CRONOLOGY_DB WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +if (!params.output) { + stopNow("Please mention the absolute UNIX path to store the DB flat files\n" + + "using the --output option.\n" + + "Ex: --output /path/to/cronology/db_files") +} + +checkDBPathExists(params.guncrun_dbpath, 'GUNC') +checkDBPathExists(params.checkm2predict_dbpath, 'CheckM2') + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN THE CRONOLOGY_DB WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow CRONOLOGY_DB { + main: + log.info summaryOfParams() + + DOWNLOAD_PDG_METADATA ( params.pdg_release ?: null ) + + DOWNLOAD_PDG_METADATA.out.versions + .set { software_versions } + + DOWNLOAD_PUBMLST_SCHEME ( params.dpubmlstpy_org ?: null ) + + FILTER_PDG_METADATA ( + DOWNLOAD_PDG_METADATA.out.accs + .splitText(by: params.genomes_chunk, file: true) + ) + + FILTER_PDG_METADATA.out.accs_chunk_tbl + .collectFile(sort: { acc_f -> acc_f.simpleName }) + .multiMap { acc_chunk_file -> + def meta = [:] + meta.id = 'AssemblyQC' + meta.phone_ncbi = true + gunc: [ meta, params.guncrun_dbpath, acc_chunk_file ] + checkm2: [ meta, params.checkm2predict_dbpath, acc_chunk_file ] + } + .set { ch_run_qual_on_these_accs } + + CHECKM2_PREDICT ( ch_run_qual_on_these_accs.checkm2 ) + + GUNC_RUN ( ch_run_qual_on_these_accs.gunc ) + + QUAL_PASSED_GENOMES ( + CHECKM2_PREDICT.out.quality_report_passed + .map { meta, qual -> + [ qual ] + } + .collect() + .flatten() + .collectFile(name: 'checkm2_quality_passed.txt'), + GUNC_RUN.out.quality_report_passed + .map { meta, qual -> + [ qual ] + } + .collect() + .flatten() + .collectFile(name: 'gunc_quality_passed.txt') + ) + + SCAFFOLD_GENOMES ( + QUAL_PASSED_GENOMES.out.accs + .splitText(by: params.genomes_chunk, file: true) + ) + + SCAFFOLD_GENOMES.out.scaffolded + .multiMap { scaffolded -> + def meta = [:] + meta.id = (params.pdg_release ?: 'NCBI Pathogen Genomes') + mlst: [ meta, scaffolded ] + mash: [ meta, scaffolded ] + } + .set { ch_scaffolded_genomes } + + MLST ( + ch_scaffolded_genomes.mlst + .combine( DOWNLOAD_PUBMLST_SCHEME.out.pubmlst_dir ) + ) + + MLST.out.tsv + .map { meta, tsv -> + tsv + } + .collectFile( + name: 'mlst_results.tsv', + keepHeader: true, + skip: 1 + ) + .set { ch_mlst_results } + + INDEX_PDG_METADATA ( + DOWNLOAD_PDG_METADATA.out.pdg_metadata, + DOWNLOAD_PDG_METADATA.out.snp_cluster_metadata, + DOWNLOAD_PDG_METADATA.out.accs, + ch_mlst_results + ) + + MASH_SKETCH ( + ch_scaffolded_genomes.mash + .map { it -> tuple ( it[0], it[1].flatten() ) } + ) + + MASH_PASTE ( + MASH_SKETCH.out.sketch + .map { meta, sketch -> + [ [id: (params.pdg_release ?: 'NCBI Pathogen Genomes')], sketch ] + } + .groupTuple(by: [0]) + ) + + DUMP_SOFTWARE_VERSIONS ( + software_versions + .mix ( + DOWNLOAD_PDG_METADATA.out.versions, + DOWNLOAD_PUBMLST_SCHEME.out.versions, + FILTER_PDG_METADATA.out.versions, + CHECKM2_PREDICT.out.versions, + GUNC_RUN.out.versions, + QUAL_PASSED_GENOMES.out.versions, + SCAFFOLD_GENOMES.out.versions, + MLST.out.versions, + INDEX_PDG_METADATA.out.versions, + MASH_SKETCH.out.versions, + MASH_PASTE.out.versions + ) + .unique() + .collectFile(name: 'collected_versions.yml') + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (workflow.success) { + sendMail() + } +} + +workflow.onError { + sendMail() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + METHOD TO CHECK IF DB PATHS EXIST +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def checkDBPathExists(db_path, msg) { + db_path_obj = file( db_path ) + + if (!db_path_obj.exists()) { + stopNow("Please check if the database path for ${msg}\n" + + "[ ${db_path} ]\nexists.") + } +}/* + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + HELP TEXT METHODS FOR CRONOLOGY WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def help() { + + Map helptext = [:] + + helptext.putAll ( + fastqEntryPointHelp().findAll { + it.key =~ /Required|output|Other|Workflow|Author|Version/ + } + + dpubmlstpyHelp(params).text + + checkm2predictHelp(params).text + + guncrunHelp(params).text + + wrapUpHelp() + ) + + return addPadding(helptext) +} \ No newline at end of file