Mercurial > repos > galaxytrakr > hfp_bettercallsal_konda
comparison 1.0.0/subworkflows/process_fastq.nf @ 0:0a8dda29956e draft default tip
planemo upload
| author | galaxytrakr |
|---|---|
| date | Thu, 28 May 2026 20:41:10 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0a8dda29956e |
|---|---|
| 1 // Include any necessary methods and modules | |
| 2 include { stopNow; validateParamsForFASTQ } from "${params.routines}" | |
| 3 include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main" | |
| 4 include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main" | |
| 5 include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main" | |
| 6 include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main" | |
| 7 | |
| 8 // Validate 4 required workflow parameters if | |
| 9 // FASTQ files are the input for the | |
| 10 // entry point. | |
| 11 validateParamsForFASTQ() | |
| 12 | |
| 13 // Start the subworkflow | |
| 14 workflow PROCESS_FASTQ { | |
| 15 main: | |
| 16 versions = Channel.empty() | |
| 17 input_ch = Channel.empty() | |
| 18 reads = Channel.empty() | |
| 19 | |
| 20 def input = file( (params.input ?: params.metadata) ) | |
| 21 | |
| 22 if (params.input) { | |
| 23 def fastq_files = [] | |
| 24 | |
| 25 if (params.fq_suffix == null) { | |
| 26 stopNow("We need to know what suffix the FASTQ files ends with inside the\n" + | |
| 27 "directory. Please use the --fq_suffix option to indicate the file\n" + | |
| 28 "suffix by which the files are to be collected to run the pipeline on.") | |
| 29 } | |
| 30 | |
| 31 if (params.fq_strandedness == null) { | |
| 32 stopNow("We need to know if the FASTQ files inside the directory\n" + | |
| 33 "are sequenced using stranded or non-stranded sequencing. This is generally\n" + | |
| 34 "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" + | |
| 35 "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" + | |
| 36 "that the reads are unstranded.") | |
| 37 } | |
| 38 | |
| 39 if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) { | |
| 40 stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" + | |
| 41 "By default the filename delimiter is _ (underscore). This delimiter character\n" + | |
| 42 "is used to split and assign a group name. The group name can be controlled by\n" + | |
| 43 "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" + | |
| 44 "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" + | |
| 45 "options: --fq_filename_delim _ --fq_filename_delim_idx 1") | |
| 46 } | |
| 47 | |
| 48 if (!input.exists()) { | |
| 49 stopNow("The input directory,\n${params.input}\ndoes not exist!") | |
| 50 } | |
| 51 | |
| 52 input.eachFileRecurse { | |
| 53 it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null | |
| 54 } | |
| 55 | |
| 56 if (fastq_files.findAll{ it != null }.size() == 0) { | |
| 57 stopNow("The input directory,\n${params.input}\nis empty! or does not " + | |
| 58 "have FASTQ files ending with the suffix: ${params.fq_suffix}") | |
| 59 } | |
| 60 | |
| 61 GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') ) | |
| 62 GEN_SAMPLESHEET.out.csv.set{ input_ch } | |
| 63 versions.mix( GEN_SAMPLESHEET.out.versions ) | |
| 64 .set { versions } | |
| 65 } else if (params.metadata) { | |
| 66 if (!input.exists()) { | |
| 67 stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!") | |
| 68 } | |
| 69 | |
| 70 if (input.size() <= 0) { | |
| 71 stopNow("The metadata CSV file,\n${params.metadata}\nis empty!") | |
| 72 } | |
| 73 | |
| 74 Channel.fromPath(params.metadata, type: 'file') | |
| 75 .set { input_ch } | |
| 76 } | |
| 77 | |
| 78 SAMPLESHEET_CHECK( input_ch ) | |
| 79 .csv | |
| 80 .splitCsv( header: true, sep: ',') | |
| 81 .map { create_fastq_channel(it) } | |
| 82 .groupTuple(by: [0]) | |
| 83 .branch { | |
| 84 meta, fastq -> | |
| 85 single : fastq.size() == 1 | |
| 86 return [ meta, fastq.flatten() ] | |
| 87 multiple : fastq.size() > 1 | |
| 88 return [ meta, fastq.flatten() ] | |
| 89 } | |
| 90 .set { reads } | |
| 91 | |
| 92 CAT_FASTQ( reads.multiple ) | |
| 93 .catted_reads | |
| 94 .mix( reads.single ) | |
| 95 .set { processed_reads } | |
| 96 | |
| 97 if (params.fq_filter_by_len.toInteger() > 0) { | |
| 98 SEQKIT_SEQ( processed_reads ) | |
| 99 .fastx | |
| 100 .set { processed_reads } | |
| 101 | |
| 102 versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) ) | |
| 103 .set { versions } | |
| 104 } | |
| 105 | |
| 106 versions.mix( | |
| 107 SAMPLESHEET_CHECK.out.versions, | |
| 108 CAT_FASTQ.out.versions.first().ifEmpty(null) | |
| 109 ) | |
| 110 .set { versions } | |
| 111 | |
| 112 emit: | |
| 113 processed_reads | |
| 114 versions | |
| 115 } | |
| 116 | |
| 117 // Function to get list of [ meta, [ fq1, fq2 ] ] | |
| 118 def create_fastq_channel(LinkedHashMap row) { | |
| 119 | |
| 120 def meta = [:] | |
| 121 meta.id = row.sample | |
| 122 meta.single_end = row.single_end.toBoolean() | |
| 123 meta.strandedness = row.strandedness | |
| 124 meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1] | |
| 125 .join(params.fq_filename_delim) | |
| 126 meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id) | |
| 127 | |
| 128 def array = [] | |
| 129 | |
| 130 if (!file(row.fq1).exists()) { | |
| 131 stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" + | |
| 132 "\n${row.fq1}") | |
| 133 } | |
| 134 if (meta.single_end) { | |
| 135 array = [ meta, [ file(row.fq1) ] ] | |
| 136 } else { | |
| 137 if (!file(row.fq2).exists()) { | |
| 138 stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" + | |
| 139 "\n${row.fq2}") | |
| 140 } | |
| 141 array = [ meta, [ file(row.fq1), file(row.fq2) ] ] | |
| 142 } | |
| 143 return array | |
| 144 } |
