annotate 0.4.2/subworkflows/process_fastq.nf @ 130:04f6ac8ca13c

planemo upload
author kkonganti
date Wed, 03 Jul 2024 15:16:39 -0400
parents 52045ea4679d
children
rev   line source
kkonganti@105 1 // Include any necessary methods and modules
kkonganti@105 2 include { stopNow; validateParamsForFASTQ } from "${params.routines}"
kkonganti@105 3 include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main"
kkonganti@105 4 include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main"
kkonganti@105 5 include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main"
kkonganti@105 6 include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main"
kkonganti@105 7
kkonganti@105 8 // Validate 4 required workflow parameters if
kkonganti@105 9 // FASTQ files are the input for the
kkonganti@105 10 // entry point.
kkonganti@105 11 validateParamsForFASTQ()
kkonganti@105 12
kkonganti@105 13 // Start the subworkflow
kkonganti@105 14 workflow PROCESS_FASTQ {
kkonganti@105 15 main:
kkonganti@105 16 versions = Channel.empty()
kkonganti@105 17 input_ch = Channel.empty()
kkonganti@105 18 reads = Channel.empty()
kkonganti@105 19
kkonganti@105 20 def input = file( (params.input ?: params.metadata) )
kkonganti@105 21
kkonganti@105 22 if (params.input) {
kkonganti@105 23 def fastq_files = []
kkonganti@105 24
kkonganti@105 25 if (params.fq_suffix == null) {
kkonganti@105 26 stopNow("We need to know what suffix the FASTQ files ends with inside the\n" +
kkonganti@105 27 "directory. Please use the --fq_suffix option to indicate the file\n" +
kkonganti@105 28 "suffix by which the files are to be collected to run the pipeline on.")
kkonganti@105 29 }
kkonganti@105 30
kkonganti@105 31 if (params.fq_strandedness == null) {
kkonganti@105 32 stopNow("We need to know if the FASTQ files inside the directory\n" +
kkonganti@105 33 "are sequenced using stranded or non-stranded sequencing. This is generally\n" +
kkonganti@105 34 "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" +
kkonganti@105 35 "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" +
kkonganti@105 36 "that the reads are unstranded.")
kkonganti@105 37 }
kkonganti@105 38
kkonganti@105 39 if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) {
kkonganti@105 40 stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" +
kkonganti@105 41 "By default the filename delimiter is _ (underscore). This delimiter character\n" +
kkonganti@105 42 "is used to split and assign a group name. The group name can be controlled by\n" +
kkonganti@105 43 "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" +
kkonganti@105 44 "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" +
kkonganti@105 45 "options: --fq_filename_delim _ --fq_filename_delim_idx 1")
kkonganti@105 46 }
kkonganti@105 47
kkonganti@105 48 if (!input.exists()) {
kkonganti@105 49 stopNow("The input directory,\n${params.input}\ndoes not exist!")
kkonganti@105 50 }
kkonganti@105 51
kkonganti@105 52 input.eachFileRecurse {
kkonganti@105 53 it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null
kkonganti@105 54 }
kkonganti@105 55
kkonganti@105 56 if (fastq_files.findAll{ it != null }.size() == 0) {
kkonganti@105 57 stopNow("The input directory,\n${params.input}\nis empty! or does not " +
kkonganti@105 58 "have FASTQ files ending with the suffix: ${params.fq_suffix}")
kkonganti@105 59 }
kkonganti@105 60
kkonganti@105 61 GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') )
kkonganti@105 62 GEN_SAMPLESHEET.out.csv.set{ input_ch }
kkonganti@105 63 versions.mix( GEN_SAMPLESHEET.out.versions )
kkonganti@105 64 .set { versions }
kkonganti@105 65 } else if (params.metadata) {
kkonganti@105 66 if (!input.exists()) {
kkonganti@105 67 stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!")
kkonganti@105 68 }
kkonganti@105 69
kkonganti@105 70 if (input.size() <= 0) {
kkonganti@105 71 stopNow("The metadata CSV file,\n${params.metadata}\nis empty!")
kkonganti@105 72 }
kkonganti@105 73
kkonganti@105 74 Channel.fromPath(params.metadata, type: 'file')
kkonganti@105 75 .set { input_ch }
kkonganti@105 76 }
kkonganti@105 77
kkonganti@105 78 SAMPLESHEET_CHECK( input_ch )
kkonganti@105 79 .csv
kkonganti@105 80 .splitCsv( header: true, sep: ',')
kkonganti@105 81 .map { create_fastq_channel(it) }
kkonganti@105 82 .groupTuple(by: [0])
kkonganti@105 83 .branch {
kkonganti@105 84 meta, fastq ->
kkonganti@105 85 single : fastq.size() == 1
kkonganti@105 86 return [ meta, fastq.flatten() ]
kkonganti@105 87 multiple : fastq.size() > 1
kkonganti@105 88 return [ meta, fastq.flatten() ]
kkonganti@105 89 }
kkonganti@105 90 .set { reads }
kkonganti@105 91
kkonganti@105 92 CAT_FASTQ( reads.multiple )
kkonganti@105 93 .catted_reads
kkonganti@105 94 .mix( reads.single )
kkonganti@105 95 .set { processed_reads }
kkonganti@105 96
kkonganti@105 97 if (params.fq_filter_by_len.toInteger() > 0) {
kkonganti@105 98 SEQKIT_SEQ( processed_reads )
kkonganti@105 99 .fastx
kkonganti@105 100 .set { processed_reads }
kkonganti@105 101
kkonganti@105 102 versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) )
kkonganti@105 103 .set { versions }
kkonganti@105 104 }
kkonganti@105 105
kkonganti@105 106 versions.mix(
kkonganti@105 107 SAMPLESHEET_CHECK.out.versions,
kkonganti@105 108 CAT_FASTQ.out.versions.first().ifEmpty(null)
kkonganti@105 109 )
kkonganti@105 110 .set { versions }
kkonganti@105 111
kkonganti@105 112 emit:
kkonganti@105 113 processed_reads
kkonganti@105 114 versions
kkonganti@105 115 }
kkonganti@105 116
kkonganti@105 117 // Function to get list of [ meta, [ fq1, fq2 ] ]
kkonganti@105 118 def create_fastq_channel(LinkedHashMap row) {
kkonganti@105 119
kkonganti@105 120 def meta = [:]
kkonganti@105 121 meta.id = row.sample
kkonganti@105 122 meta.single_end = row.single_end.toBoolean()
kkonganti@105 123 meta.strandedness = row.strandedness
kkonganti@105 124 meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1]
kkonganti@105 125 .join(params.fq_filename_delim)
kkonganti@105 126 meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id)
kkonganti@105 127
kkonganti@105 128 def array = []
kkonganti@105 129
kkonganti@105 130 if (!file(row.fq1).exists()) {
kkonganti@105 131 stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" +
kkonganti@105 132 "\n${row.fq1}")
kkonganti@105 133 }
kkonganti@105 134 if (meta.single_end) {
kkonganti@105 135 array = [ meta, [ file(row.fq1) ] ]
kkonganti@105 136 } else {
kkonganti@105 137 if (!file(row.fq2).exists()) {
kkonganti@105 138 stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" +
kkonganti@105 139 "\n${row.fq2}")
kkonganti@105 140 }
kkonganti@105 141 array = [ meta, [ file(row.fq1), file(row.fq2) ] ]
kkonganti@105 142 }
kkonganti@105 143 return array
kkonganti@105 144 }