annotate 0.5.0/subworkflows/process_fastq.nf @ 0:3c767f9cfd88 draft default tip

planemo upload
author galaxytrakr
date Fri, 29 May 2026 13:37:56 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
1 // Include any necessary methods and modules
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
2 include { stopNow; validateParamsForFASTQ } from "${params.routines}"
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
3 include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main"
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
4 include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main"
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
5 include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main"
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
6 include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main"
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
7
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
8 // Validate 4 required workflow parameters if
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
9 // FASTQ files are the input for the
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
10 // entry point.
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
11 validateParamsForFASTQ()
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
12
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
13 // Start the subworkflow
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
14 workflow PROCESS_FASTQ {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
15 main:
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
16 versions = Channel.empty()
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
17 input_ch = Channel.empty()
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
18 reads = Channel.empty()
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
19
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
20 def input = file( (params.input ?: params.metadata) )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
21
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
22 if (params.input) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
23 def fastq_files = []
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
24
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
25 if (params.fq_suffix == null) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
26 stopNow("We need to know what suffix the FASTQ files ends with inside the\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
27 "directory. Please use the --fq_suffix option to indicate the file\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
28 "suffix by which the files are to be collected to run the pipeline on.")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
29 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
30
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
31 if (params.fq_strandedness == null) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
32 stopNow("We need to know if the FASTQ files inside the directory\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
33 "are sequenced using stranded or non-stranded sequencing. This is generally\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
34 "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
35 "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
36 "that the reads are unstranded.")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
37 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
38
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
39 if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
40 stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
41 "By default the filename delimiter is _ (underscore). This delimiter character\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
42 "is used to split and assign a group name. The group name can be controlled by\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
43 "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
44 "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
45 "options: --fq_filename_delim _ --fq_filename_delim_idx 1")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
46 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
47
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
48 if (!input.exists()) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
49 stopNow("The input directory,\n${params.input}\ndoes not exist!")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
50 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
51
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
52 input.eachFileRecurse {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
53 it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
54 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
55
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
56 if (fastq_files.findAll{ it != null }.size() == 0) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
57 stopNow("The input directory,\n${params.input}\nis empty! or does not " +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
58 "have FASTQ files ending with the suffix: ${params.fq_suffix}")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
59 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
60
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
61 GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
62 GEN_SAMPLESHEET.out.csv.set{ input_ch }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
63 versions.mix( GEN_SAMPLESHEET.out.versions )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
64 .set { versions }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
65 } else if (params.metadata) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
66 if (!input.exists()) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
67 stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
68 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
69
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
70 if (input.size() <= 0) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
71 stopNow("The metadata CSV file,\n${params.metadata}\nis empty!")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
72 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
73
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
74 Channel.fromPath(params.metadata, type: 'file')
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
75 .set { input_ch }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
76 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
77
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
78 SAMPLESHEET_CHECK( input_ch )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
79 .csv
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
80 .splitCsv( header: true, sep: ',')
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
81 .map { create_fastq_channel(it) }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
82 .groupTuple(by: [0])
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
83 .branch {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
84 meta, fastq ->
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
85 single : fastq.size() == 1
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
86 return [ meta, fastq.flatten() ]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
87 multiple : fastq.size() > 1
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
88 return [ meta, fastq.flatten() ]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
89 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
90 .set { reads }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
91
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
92 CAT_FASTQ( reads.multiple )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
93 .catted_reads
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
94 .mix( reads.single )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
95 .set { processed_reads }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
96
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
97 if (params.fq_filter_by_len.toInteger() > 0) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
98 SEQKIT_SEQ( processed_reads )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
99 .fastx
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
100 .set { processed_reads }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
101
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
102 versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
103 .set { versions }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
104 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
105
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
106 versions.mix(
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
107 SAMPLESHEET_CHECK.out.versions,
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
108 CAT_FASTQ.out.versions.first().ifEmpty(null)
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
109 )
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
110 .set { versions }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
111
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
112 emit:
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
113 processed_reads
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
114 versions
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
115 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
116
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
117 // Function to get list of [ meta, [ fq1, fq2 ] ]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
118 def create_fastq_channel(LinkedHashMap row) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
119
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
120 def meta = [:]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
121 meta.id = row.sample
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
122 meta.single_end = row.single_end.toBoolean()
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
123 meta.strandedness = row.strandedness
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
124 meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
125 .join(params.fq_filename_delim)
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
126 meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id)
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
127
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
128 def array = []
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
129
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
130 if (!file(row.fq1).exists()) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
131 stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
132 "\n${row.fq1}")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
133 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
134 if (meta.single_end) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
135 array = [ meta, [ file(row.fq1) ] ]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
136 } else {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
137 if (!file(row.fq2).exists()) {
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
138 stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" +
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
139 "\n${row.fq2}")
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
140 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
141 array = [ meta, [ file(row.fq1), file(row.fq2) ] ]
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
142 }
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
143 return array
3c767f9cfd88 planemo upload
galaxytrakr
parents:
diff changeset
144 }