view 0.5.0/workflows/bettercallsal.nf @ 11:749faef1caa9

"planemo upload"
author kkonganti
date Tue, 05 Sep 2023 11:51:40 -0400
parents 365849f031fd
children
line wrap: on
line source
// Define any required imports for this specific workflow
import java.nio.file.Paths
import java.util.zip.GZIPInputStream
import java.io.FileInputStream
import nextflow.file.FileHelper


// Include any necessary methods
include { \
    summaryOfParams; stopNow; fastqEntryPointHelp; sendMail; \
    addPadding; wrapUpHelp    } from "${params.routines}"
include { bbmergeHelp         } from "${params.toolshelp}${params.fs}bbmerge"
include { fastpHelp           } from "${params.toolshelp}${params.fs}fastp"
include { mashscreenHelp      } from "${params.toolshelp}${params.fs}mashscreen"
include { tuspyHelp           } from "${params.toolshelp}${params.fs}tuspy"
include { sourmashsketchHelp  } from "${params.toolshelp}${params.fs}sourmashsketch"
include { sourmashgatherHelp  } from "${params.toolshelp}${params.fs}sourmashgather"
include { sourmashsearchHelp  } from "${params.toolshelp}${params.fs}sourmashsearch"
include { sfhpyHelp           } from "${params.toolshelp}${params.fs}sfhpy"
include { kmaindexHelp        } from "${params.toolshelp}${params.fs}kmaindex"
include { kmaalignHelp        } from "${params.toolshelp}${params.fs}kmaalign"
include { salmonidxHelp       } from "${params.toolshelp}${params.fs}salmonidx"
include { gsrpyHelp           } from "${params.toolshelp}${params.fs}gsrpy"

// Exit if help requested before any subworkflows
if (params.help) {
    log.info help()
    exit 0
}


// Include any necessary modules and subworkflows
include { PROCESS_FASTQ           } from "${params.subworkflows}${params.fs}process_fastq"
include { CAT_CAT                 } from "${params.modules}${params.fs}cat_cat${params.fs}main"
include { FASTQC                  } from "${params.modules}${params.fs}fastqc${params.fs}main"
include { BBTOOLS_BBMERGE         } from "${params.modules}${params.fs}bbtools${params.fs}bbmerge${params.fs}main"
include { FASTP                   } from "${params.modules}${params.fs}fastp${params.fs}main"
include { MASH_SCREEN             } from "${params.modules}${params.fs}mash${params.fs}screen${params.fs}main"
include { TOP_UNIQUE_SEROVARS     } from "${params.modules}${params.fs}top_unique_serovars${params.fs}main"
include { SOURMASH_SKETCH         } from "${params.modules}${params.fs}sourmash${params.fs}sketch${params.fs}main"
include { SOURMASH_GATHER         } from "${params.modules}${params.fs}sourmash${params.fs}gather${params.fs}main"
include { SOURMASH_SEARCH         } from "${params.modules}${params.fs}sourmash${params.fs}search${params.fs}main"
include { KMA_INDEX               } from "${params.modules}${params.fs}kma${params.fs}index${params.fs}main"
include { KMA_ALIGN               } from "${params.modules}${params.fs}kma${params.fs}align${params.fs}main"
include { OTF_GENOME              } from "${params.modules}${params.fs}otf_genome${params.fs}main"
include { SALMON_INDEX            } from "${params.modules}${params.fs}salmon${params.fs}index${params.fs}main"
include { SALMON_QUANT            } from "${params.modules}${params.fs}salmon${params.fs}quant${params.fs}main"
include { SOURMASH_COMPARE        } from "${params.modules}${params.fs}custom${params.fs}sourmash${params.fs}compare${params.fs}main"
include { BCS_DISTANCE_MATRIX     } from "${params.modules}${params.fs}bcs_distance_matrix${params.fs}main"
include { BCS_RESULTS             } from "${params.modules}${params.fs}bcs_results${params.fs}main"
include { DUMP_SOFTWARE_VERSIONS  } from "${params.modules}${params.fs}custom${params.fs}dump_software_versions${params.fs}main"
include { MULTIQC                 } from "${params.modules}${params.fs}multiqc${params.fs}main"

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    INPUTS AND ANY CHECKS FOR THE BETTERCALLSAL WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

def reads_platform = 0
def salmon_idx_decoys = file ( "${params.salmonidx_decoys}" )

reads_platform += (params.input ? 1 : 0)

if (reads_platform < 1 || reads_platform == 0) {
    stopNow("Please mention at least one absolute path to input folder which contains\n" +
            "FASTQ files sequenced using the --input option.\n" +
        "Ex: --input (Illumina or Generic short reads in FASTQ format)")
}

checkMetadataExists(params.mash_sketch, 'MASH sketch')
checkMetadataExists(params.tuspy_ps, 'ACC2SERO pickle')
checkMetadataExists(params.gsrpy_snp_clus_metadata, 'PDG reference target cluster metadata')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    RUN THE BETTERCALLSAL WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow BETTERCALLSAL {
    main:
        log.info summaryOfParams()

        PROCESS_FASTQ()

        PROCESS_FASTQ
            .out
            .versions
            .set { software_versions }

        PROCESS_FASTQ
            .out
            .processed_reads
            .set { ch_processed_reads }

        if (params.bbmerge_run && !params.fq_single_end) {
            ch_processed_reads
                .map { meta, fastq ->
                    meta.adapters = (params.bbmerge_adapters ?: params.dummyfile)
                    [ meta, fastq ]
                }
                .set { ch_processed_reads }
            
            BBTOOLS_BBMERGE( ch_processed_reads )

            BBTOOLS_BBMERGE
                .out
                .fastq
                .map { meta, fastq ->
                    [ meta, [ fastq ] ]
                }
                .set { ch_processed_reads }

            software_versions
                .mix ( BBTOOLS_BBMERGE.out.versions )
                .set { software_versions }
        }

        if (params.fastp_run) {
            FASTP ( ch_processed_reads )

            FASTP
                .out
                .passed_reads
                .set { ch_processed_reads }

            FASTP
                .out
                .json
                .map { meta, json -> [ json ] }
                .collect()
                .set { ch_multiqc }

            software_versions
                .mix ( FASTP.out.versions )
                .set { software_versions }
        } else {
            FASTQC ( ch_processed_reads )

            FASTQC
                .out
                .zip
                .map { meta, zip -> [ zip ] }
                .collect()
                .set { ch_multiqc }

            software_versions
                .mix ( FASTQC.out.versions )
                .set { software_versions }
        }

        if (params.bcs_concat_pe && !params.fq_single_end && !params.bbmerge_run) {
            CAT_CAT ( ch_processed_reads )

            CAT_CAT
                .out
                .concatenated_reads
                .set { ch_processed_reads }

            software_versions
                .mix ( CAT_CAT.out.versions )
                .set { software_versions }
        }

        ch_processed_reads
            .map { meta, fastq ->
                meta.sequence_sketch = params.mash_sketch
                meta.get_kma_hit_accs = true
                meta.single_end = true
                meta.salmon_decoys = params.dummyfile
                meta.salmon_lib_type = (params.salmonalign_libtype ?: false)
                [ meta, fastq ]
            }
            .filter { meta, fastq ->
                fq_file = ( fastq.getClass().toString() =~ /ArrayList/ ? fastq : [ fastq ] )
                fq_gzip = new GZIPInputStream( new FileInputStream( fq_file[0].toString() ) )
                fq_gzip.read() != -1
            }
            .set { ch_processed_reads }

        MASH_SCREEN ( ch_processed_reads )

        TOP_UNIQUE_SEROVARS ( MASH_SCREEN.out.screened )
        
        TOP_UNIQUE_SEROVARS.out.genomes_fasta
            .set { ch_genomes_fasta }

        TOP_UNIQUE_SEROVARS.out.failed
            .set { ch_bcs_calls_failed }

        if (params.sourmashgather_run || params.sourmashsearch_run) {
            SOURMASH_SKETCH (
                ch_processed_reads
                    .join ( ch_genomes_fasta )
            )

            if (params.sourmashgather_run) {
                SOURMASH_GATHER (
                    SOURMASH_SKETCH.out.signatures,
                    [], [], [], []
                )

                SOURMASH_GATHER
                    .out
                    .genomes_fasta
                    .set { ch_genomes_fasta }

                ch_bcs_calls_failed
                    .concat( SOURMASH_GATHER.out.failed )
                    .set { ch_bcs_calls_failed }

                software_versions
                    .mix ( SOURMASH_GATHER.out.versions.ifEmpty(null) )
                    .set { software_versions }
            }

            if (params.sourmashsearch_run) {
                SOURMASH_SEARCH (
                    SOURMASH_SKETCH.out.signatures,
                    []
                )

                SOURMASH_SEARCH
                    .out
                    .genomes_fasta
                    .set { ch_genomes_fasta }

                ch_bcs_calls_failed
                    .concat( SOURMASH_SEARCH.out.failed )
                    .set { ch_bcs_calls_failed }

                software_versions
                    .mix ( SOURMASH_SEARCH.out.versions.ifEmpty(null) )
                    .set { software_versions }
            }
        }

        KMA_INDEX ( ch_genomes_fasta )

        KMA_ALIGN ( 
            ch_processed_reads
                .join(KMA_INDEX.out.idx)
        )

        OTF_GENOME ( KMA_ALIGN.out.hits )

        OTF_GENOME.out.failed
            .concat( ch_bcs_calls_failed )
            .collectFile(name: 'BCS_NO_CALLS.txt')
            .set { ch_bcs_no_calls }

        SALMON_INDEX ( OTF_GENOME.out.genomes_fasta )

        SALMON_QUANT (
            ch_processed_reads
                .join(SALMON_INDEX.out.idx)
        )

        SALMON_QUANT
            .out
            .results
            .groupTuple(by: [0])
            .map { it -> tuple ( it[1].flatten() ) }
            .mix ( ch_bcs_no_calls )
            .collect()
            .set { ch_salmon_res_dirs }

        if (params.sourmashsketch_run) {
            SOURMASH_SKETCH
                .out
                .signatures
                .groupTuple(by: [0])
                .map { meta, qsigs, dsigs ->
                    [ qsigs ]
                }
                .collect()
                .flatten()
                .collect()
                .set { ch_query_sigs }

            KMA_ALIGN
                .out
                .hits
                .map { meta, hits ->
                    [ hits ]
                }
                .collect()
                .flatten()
                .collectFile(name: 'accessions.txt')
                .set { ch_otf_genomes }

            SOURMASH_COMPARE ( ch_query_sigs, ch_otf_genomes )

            BCS_DISTANCE_MATRIX (
                SOURMASH_COMPARE.out.matrix,
                SOURMASH_COMPARE.out.labels
            )

            ch_multiqc
                .concat( BCS_DISTANCE_MATRIX.out.mqc_yml )
                .set { ch_multiqc }

            software_versions
                .mix (
                    SOURMASH_SKETCH.out.versions.ifEmpty(null),
                    SOURMASH_COMPARE.out.versions.ifEmpty(null),
                    BCS_DISTANCE_MATRIX.out.versions.ifEmpty(null),
                )
                .set { software_versions }
        }

        BCS_RESULTS ( ch_salmon_res_dirs )

        DUMP_SOFTWARE_VERSIONS (
            software_versions
                .mix (
                    MASH_SCREEN.out.versions,
                    TOP_UNIQUE_SEROVARS.out.versions,
                    KMA_INDEX.out.versions,
                    KMA_ALIGN.out.versions,
                    OTF_GENOME.out.versions.ifEmpty(null),
                    SALMON_INDEX.out.versions,
                    SALMON_QUANT.out.versions,
                    BCS_RESULTS.out.versions
                )
                .unique()
                .collectFile(name: 'collected_versions.yml')
        )

        DUMP_SOFTWARE_VERSIONS
            .out
            .mqc_yml
            .concat (
                ch_multiqc,
                BCS_RESULTS.out.mqc_yml,
                BCS_RESULTS.out.mqc_json
            )
            .collect()
            .set { ch_multiqc }

        MULTIQC ( ch_multiqc )
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ON COMPLETE, SHOW GORY DETAILS OF ALL PARAMS WHICH WILL BE HELPFUL TO DEBUG
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow.onComplete {
    if (workflow.success) {
        sendMail()
    }
}

workflow.onError {
    sendMail()
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    METHOD TO CHECK METADATA EXISTENCE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

def checkMetadataExists(file_path, msg) {
    file_path_obj = file( file_path )

    if (!file_path_obj.exists() || file_path_obj.size() == 0) {
        stopNow("Please check if your ${msg} file\n" +
            "[ ${file_path} ]\nexists and is not of size 0.")
    }
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    HELP TEXT METHODS FOR BETTERCALLSAL WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

def help() {

    Map helptext = [:]
    Map bcsConcatHelp = [:]
    Map fastpAdapterHelp = [:]

    bcsConcatHelp['--bcs_concat_pe'] = "Concatenate paired-end files. " +
        "Default: ${params.bcs_concat_pe}"

    fastpAdapterHelp['--fastp_use_custom_adapaters'] = "Use custom adapter FASTA with fastp on top of " +
        "built-in adapter sequence auto-detection. Enabling this option will attempt to find and remove " +
        "all possible Illumina adapter and primer sequences but will make the workflow run slow. " +
        "Default: ${params.fastp_use_custom_adapters}"

    helptext.putAll (
        fastqEntryPointHelp() +
        bcsConcatHelp +
        bbmergeHelp(params).text +
        fastpHelp(params).text +
        fastpAdapterHelp +
        mashscreenHelp(params).text +
        tuspyHelp(params).text +
        sourmashsketchHelp(params).text +
        sourmashgatherHelp(params).text +
        sourmashsearchHelp(params).text +
        sfhpyHelp(params).text +
        kmaindexHelp(params).text +
        kmaalignHelp(params).text +
        salmonidxHelp(params).text +
        gsrpyHelp(params).text +
        wrapUpHelp()
    )

    return addPadding(helptext)
}