diff 0.5.0/workflows/nowayout.nf @ 0:97cd2f532efe

planemo upload
author kkonganti
date Mon, 31 Mar 2025 14:50:40 -0400
parents
children 3539fbeb4230
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/0.5.0/workflows/nowayout.nf	Mon Mar 31 14:50:40 2025 -0400
@@ -0,0 +1,340 @@
+// Define any required imports for this specific workflow
+import java.nio.file.Paths
+import java.util.zip.GZIPInputStream
+import java.io.FileInputStream
+import nextflow.file.FileHelper
+
+
+// Include any necessary methods
+include { \
+    summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
+    addPadding; wrapUpHelp           } from "${params.routines}"
+include { fastpHelp                  } from "${params.toolshelp}${params.fs}fastp"
+include { kmaalignHelp               } from "${params.toolshelp}${params.fs}kmaalign"
+include { seqkitgrepHelp             } from "${params.toolshelp}${params.fs}seqkitgrep"
+include { salmonidxHelp              } from "${params.toolshelp}${params.fs}salmonidx"
+include { sourmashsketchHelp         } from "${params.toolshelp}${params.fs}sourmashsketch"
+include { sourmashgatherHelp         } from "${params.toolshelp}${params.fs}sourmashgather"
+include { sfhpyHelp                  } from "${params.toolshelp}${params.fs}sfhpy"
+include { gsalkronapyHelp            } from "${params.toolshelp}${params.fs}gsalkronapy"
+include { kronaktimporttextHelp      } from "${params.toolshelp}${params.fs}kronaktimporttext"
+
+// Exit if help requested before any subworkflows
+if (params.help) {
+    log.info help()
+    exit 0
+}
+
+
+// Include any necessary modules and subworkflows
+include { PROCESS_FASTQ           } from "${params.subworkflows}${params.fs}process_fastq"
+include { FASTP                   } from "${params.modules}${params.fs}fastp${params.fs}main"
+include { KMA_ALIGN               } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main"
+include { OTF_GENOME              } from "${params.modules}${params.fs}otf_genome${params.fs}main"
+include { SEQKIT_GREP             } from "${params.modules}${params.fs}seqkit${params.fs}grep${params.fs}main"
+include { SALMON_INDEX            } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main"
+include { SALMON_QUANT            } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main"
+include { SOURMASH_SKETCH         } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
+include { SOURMASH_SKETCH \
+    as REDUCE_DB_IDX              } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
+include { SOURMASH_GATHER         } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main"
+include { NOWAYOUT_RESULTS        } from "${params.modules}${params.fs}nowayout_results${params.fs}main"
+include { KRONA_KTIMPORTTEXT      } from "${params.modules}${params.fs}krona${params.fs}ktimporttext${params.fs}main"
+include { DUMP_SOFTWARE_VERSIONS  } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
+include { MULTIQC                 } from "${params.modules}${params.fs}multiqc${params.fs}main"
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+def reads_platform = 0
+reads_platform += (params.input ? 1 : 0)
+
+if (reads_platform < 1 || reads_platform == 0) {
+    stopNow("Please mention at least one absolute path to input folder which contains\n" +
+            "FASTQ files sequenced using the --input option.\n" +
+        "Ex: --input (Illumina or Generic short reads in FASTQ format)")
+}
+
+params.fastp_adapter_fasta ? checkMetadataExists(params.fastp_adapter_fasta, 'Adapter sequences FASTA') : null
+checkMetadataExists(params.lineages_csv, 'Lineages CSV')
+checkMetadataExists(params.kmaalign_idx, 'KMA Indices')
+checkMetadataExists(params.ref_fna, 'FASTA reference')
+
+ch_sourmash_lin = file( params.lineages_csv )
+
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN THE BETTERCALLSAL WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow NOWAYOUT {
+    main:
+        log.info summaryOfParams()
+
+        PROCESS_FASTQ()
+
+        PROCESS_FASTQ.out.versions
+            .set { software_versions }
+
+        PROCESS_FASTQ.out.processed_reads
+            .set { ch_processed_reads }
+
+        ch_processed_reads
+            .map { meta, fastq ->
+                meta.get_kma_hit_accs = true
+                meta.salmon_decoys = params.dummyfile
+                meta.salmon_lib_type = (params.salmonalign_libtype ?: false)
+                meta.kma_t_db = params.kmaalign_idx
+                [ meta, fastq ]
+            }
+            .filter { meta, fastq ->
+                fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] )
+                fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toAbsolutePath().toString() ) )
+                fq_gzip.read() != -1
+            }
+            .set { ch_processed_reads }
+
+        FASTP( ch_processed_reads )
+
+        FASTP.out.json
+            .map { meta, json ->
+                json
+            }
+            .collect()
+            .set { ch_multiqc }
+
+        KMA_ALIGN(
+            FASTP.out.passed_reads
+                .map { meta, fastq ->
+                    [meta, fastq, []]
+                }
+        )
+
+        OTF_GENOME(
+            KMA_ALIGN.out.hits
+                .join(KMA_ALIGN.out.frags)
+        )
+
+        OTF_GENOME.out.reads_extracted
+            .filter { meta, fasta ->
+                fa_file = ( fasta.getClass().toString() =~ /ArrayList/ ? fasta : [ fasta ] )
+                fa_gzip = new GZIPInputStream( new FileInputStream( fa_file[0].toAbsolutePath().toString() ) )
+                fa_gzip.read() != -1
+            }
+            .set { ch_mito_aln_reads }
+
+        SEQKIT_GREP(
+            KMA_ALIGN.out.hits
+                .filter { meta, mapped_refs ->
+                    patterns = file( mapped_refs )
+                    patterns.size() > 0
+                }
+                .map { meta, mapped_refs ->
+                    [meta, params.ref_fna, mapped_refs]
+                }
+        )
+
+        SALMON_INDEX( SEQKIT_GREP.out.fastx )
+
+        SALMON_QUANT(
+            ch_mito_aln_reads
+                .join( SALMON_INDEX.out.idx )
+        )
+
+        REDUCE_DB_IDX(
+            SEQKIT_GREP.out.fastx,
+            true,
+            false,
+            'db'
+        )
+
+        SOURMASH_SKETCH(
+            ch_mito_aln_reads,
+            false,
+            false,
+            'query'
+        )
+
+        SOURMASH_GATHER(
+            SOURMASH_SKETCH.out.signatures
+                .join( REDUCE_DB_IDX.out.signatures ),
+                [], [], [], []
+        )
+
+        // SOURMASH_TAX_METAGENOME(
+        //     SOURMASH_GATHER.out.result
+        //         .groupTuple(by: [0])
+        //         .map { meta, csv ->
+        //             [ meta, csv, ch_sourmash_lin ]
+        //         }
+        // )
+
+        // SOURMASH_TAX_METAGENOME.out.csv
+        //     .map { meta, csv ->
+        //         csv
+        //     }
+        //     .set { ch_lin_csv }
+
+        // SOURMASH_TAX_METAGENOME.out.tsv
+        //     .tap { ch_lin_krona }
+        //     .map { meta, tsv ->
+        //         tsv
+        //     }
+        //     .tap { ch_lin_tsv }
+
+        SOURMASH_GATHER.out.result
+            .groupTuple(by: [0])
+            .map { meta, csv ->
+                [ csv ]
+            }
+            .concat(
+                SALMON_QUANT.out.results
+                    .map { meta, salmon_res ->
+                        [ salmon_res ]
+                    }
+            )
+            .concat(
+                SOURMASH_GATHER.out.failed
+                    .map { meta, failed ->
+                        [ failed ]
+                    }
+            )
+            .concat( OTF_GENOME.out.failed )
+            .collect()
+            .flatten()
+            .collect()
+            .set { ch_gene_abn }
+        
+        NOWAYOUT_RESULTS( ch_gene_abn, ch_sourmash_lin )
+
+        NOWAYOUT_RESULTS.out.tsv
+            .flatten()
+            .filter { tsv -> tsv.toString() =~ /.*${params.krona_res_suffix}$/ }
+            .map { tsv ->
+                    meta = [:]
+                    meta.id = "${params.cfsanpipename}_${params.pipeline}_krona"
+                    [ meta, tsv ]
+            }
+            .groupTuple(by: [0])
+            .set { ch_lin_krona }
+
+        // ch_lin_tsv
+        //     .mix( ch_lin_csv )
+        //     .collect()
+        //     .set { ch_lin_summary }
+
+        // SOURMASH_TAX_METAGENOME.out.txt
+        //     .map { meta, txt ->
+        //         txt
+        //     }
+        //     .collect()
+        //     .set { ch_lin_kreport }
+
+        // NOWAYOUT_RESULTS(
+        //     ch_lin_summary
+        //         .concat( SOURMASH_GATHER.out.failed )
+        //         .concat( OTF_GENOME.out.failed )
+        //         .collect()
+        // )
+
+        KRONA_KTIMPORTTEXT( ch_lin_krona )
+        
+        DUMP_SOFTWARE_VERSIONS(
+            software_versions
+                .mix (
+                    FASTP.out.versions,
+                    KMA_ALIGN.out.versions,
+                    SEQKIT_GREP.out.versions,
+                    REDUCE_DB_IDX.out.versions,
+                    SOURMASH_SKETCH.out.versions,
+                    SOURMASH_GATHER.out.versions,
+                    SALMON_INDEX.out.versions,
+                    SALMON_QUANT.out.versions,
+                    NOWAYOUT_RESULTS.out.versions,
+                    KRONA_KTIMPORTTEXT.out.versions
+                )
+                .unique()
+                .collectFile(name: 'collected_versions.yml')
+        )
+
+        DUMP_SOFTWARE_VERSIONS.out.mqc_yml
+            .concat(
+                ch_multiqc,
+                NOWAYOUT_RESULTS.out.mqc_yml
+            )
+            .collect()
+            .flatten()
+            .collect()
+            .set { ch_multiqc }
+
+        MULTIQC( ch_multiqc )
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow.onComplete {
+    if (workflow.success) {
+        sendMail()
+    }
+}
+
+workflow.onError {
+    sendMail()
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    METHOD TO CHECK METADATA EXISTENCE
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+def checkMetadataExists(file_path, msg) {
+    file_path_obj = file( file_path )
+    
+    if (msg.toString().find(/(?i)KMA/)) {
+        if (!file_path_obj.parent.exists() || file_path_obj.parent.size() == 0) {
+            stopNow("Please check if your ${msg}\n" +
+                "[ ${file_path} ]\nexists and that the files are not of size 0.")
+        }
+    }
+    else if (!file_path_obj.exists() || file_path_obj.size() == 0) {
+        stopNow("Please check if your ${msg} file\n" +
+            "[ ${file_path} ]\nexists and is not of size 0.")
+    }
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+def help() {
+
+    Map helptext = [:]
+
+    helptext.putAll (
+        fastqEntryPointHelp() +
+        fastpHelp(params).text +
+        kmaalignHelp(params).text +
+        seqkitgrepHelp(params).text +
+        salmonidxHelp(params).text +
+        sourmashsketchHelp(params).text +
+        sourmashgatherHelp(params).text +
+        sfhpyHelp(params).text +
+        gsalkronapyHelp(params).text +
+        kronaktimporttextHelp(params).text +
+        wrapUpHelp()
+    )
+
+    return addPadding(helptext)
+}