kkonganti@1
|
1 // Include any necessary methods and modules
|
kkonganti@1
|
2 include { stopNow; validateParamsForFASTQ } from "${params.routines}"
|
kkonganti@1
|
3 include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main"
|
kkonganti@1
|
4 include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main"
|
kkonganti@1
|
5 include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main"
|
kkonganti@1
|
6 include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main"
|
kkonganti@1
|
7
|
kkonganti@1
|
8 // Validate 4 required workflow parameters if
|
kkonganti@1
|
9 // FASTQ files are the input for the
|
kkonganti@1
|
10 // entry point.
|
kkonganti@1
|
11 validateParamsForFASTQ()
|
kkonganti@1
|
12
|
kkonganti@1
|
13 // Start the subworkflow
|
kkonganti@1
|
14 workflow PROCESS_FASTQ {
|
kkonganti@1
|
15 main:
|
kkonganti@1
|
16 versions = Channel.empty()
|
kkonganti@1
|
17 input_ch = Channel.empty()
|
kkonganti@1
|
18 reads = Channel.empty()
|
kkonganti@1
|
19
|
kkonganti@1
|
20 def input = file( (params.input ?: params.metadata) )
|
kkonganti@1
|
21
|
kkonganti@1
|
22 if (params.input) {
|
kkonganti@1
|
23 def fastq_files = []
|
kkonganti@1
|
24
|
kkonganti@1
|
25 if (params.fq_suffix == null) {
|
kkonganti@1
|
26 stopNow("We need to know what suffix the FASTQ files ends with inside the\n" +
|
kkonganti@1
|
27 "directory. Please use the --fq_suffix option to indicate the file\n" +
|
kkonganti@1
|
28 "suffix by which the files are to be collected to run the pipeline on.")
|
kkonganti@1
|
29 }
|
kkonganti@1
|
30
|
kkonganti@1
|
31 if (params.fq_strandedness == null) {
|
kkonganti@1
|
32 stopNow("We need to know if the FASTQ files inside the directory\n" +
|
kkonganti@1
|
33 "are sequenced using stranded or non-stranded sequencing. This is generally\n" +
|
kkonganti@1
|
34 "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" +
|
kkonganti@1
|
35 "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" +
|
kkonganti@1
|
36 "that the reads are unstranded.")
|
kkonganti@1
|
37 }
|
kkonganti@1
|
38
|
kkonganti@1
|
39 if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) {
|
kkonganti@1
|
40 stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" +
|
kkonganti@1
|
41 "By default the filename delimiter is _ (underscore). This delimiter character\n" +
|
kkonganti@1
|
42 "is used to split and assign a group name. The group name can be controlled by\n" +
|
kkonganti@1
|
43 "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" +
|
kkonganti@1
|
44 "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" +
|
kkonganti@1
|
45 "options: --fq_filename_delim _ --fq_filename_delim_idx 1")
|
kkonganti@1
|
46 }
|
kkonganti@1
|
47
|
kkonganti@1
|
48 if (!input.exists()) {
|
kkonganti@1
|
49 stopNow("The input directory,\n${params.input}\ndoes not exist!")
|
kkonganti@1
|
50 }
|
kkonganti@1
|
51
|
kkonganti@1
|
52 input.eachFileRecurse {
|
kkonganti@1
|
53 it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null
|
kkonganti@1
|
54 }
|
kkonganti@1
|
55
|
kkonganti@1
|
56 if (fastq_files.findAll{ it != null }.size() == 0) {
|
kkonganti@1
|
57 stopNow("The input directory,\n${params.input}\nis empty! or does not " +
|
kkonganti@1
|
58 "have FASTQ files ending with the suffix: ${params.fq_suffix}")
|
kkonganti@1
|
59 }
|
kkonganti@1
|
60
|
kkonganti@1
|
61 GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') )
|
kkonganti@1
|
62 GEN_SAMPLESHEET.out.csv.set{ input_ch }
|
kkonganti@1
|
63 versions.mix( GEN_SAMPLESHEET.out.versions )
|
kkonganti@1
|
64 .set { versions }
|
kkonganti@1
|
65 } else if (params.metadata) {
|
kkonganti@1
|
66 if (!input.exists()) {
|
kkonganti@1
|
67 stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!")
|
kkonganti@1
|
68 }
|
kkonganti@1
|
69
|
kkonganti@1
|
70 if (input.size() <= 0) {
|
kkonganti@1
|
71 stopNow("The metadata CSV file,\n${params.metadata}\nis empty!")
|
kkonganti@1
|
72 }
|
kkonganti@1
|
73
|
kkonganti@1
|
74 Channel.fromPath(params.metadata, type: 'file')
|
kkonganti@1
|
75 .set { input_ch }
|
kkonganti@1
|
76 }
|
kkonganti@1
|
77
|
kkonganti@1
|
78 SAMPLESHEET_CHECK( input_ch )
|
kkonganti@1
|
79 .csv
|
kkonganti@1
|
80 .splitCsv( header: true, sep: ',')
|
kkonganti@1
|
81 .map { create_fastq_channel(it) }
|
kkonganti@1
|
82 .groupTuple(by: [0])
|
kkonganti@1
|
83 .branch {
|
kkonganti@1
|
84 meta, fastq ->
|
kkonganti@1
|
85 single : fastq.size() == 1
|
kkonganti@1
|
86 return [ meta, fastq.flatten() ]
|
kkonganti@1
|
87 multiple : fastq.size() > 1
|
kkonganti@1
|
88 return [ meta, fastq.flatten() ]
|
kkonganti@1
|
89 }
|
kkonganti@1
|
90 .set { reads }
|
kkonganti@1
|
91
|
kkonganti@1
|
92 CAT_FASTQ( reads.multiple )
|
kkonganti@1
|
93 .catted_reads
|
kkonganti@1
|
94 .mix( reads.single )
|
kkonganti@1
|
95 .set { processed_reads }
|
kkonganti@1
|
96
|
kkonganti@1
|
97 if (params.fq_filter_by_len.toInteger() > 0) {
|
kkonganti@1
|
98 SEQKIT_SEQ( processed_reads )
|
kkonganti@1
|
99 .fastx
|
kkonganti@1
|
100 .set { processed_reads }
|
kkonganti@1
|
101
|
kkonganti@1
|
102 versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) )
|
kkonganti@1
|
103 .set { versions }
|
kkonganti@1
|
104 }
|
kkonganti@1
|
105
|
kkonganti@1
|
106 versions.mix(
|
kkonganti@1
|
107 SAMPLESHEET_CHECK.out.versions,
|
kkonganti@1
|
108 CAT_FASTQ.out.versions.first().ifEmpty(null)
|
kkonganti@1
|
109 )
|
kkonganti@1
|
110 .set { versions }
|
kkonganti@1
|
111
|
kkonganti@1
|
112 emit:
|
kkonganti@1
|
113 processed_reads
|
kkonganti@1
|
114 versions
|
kkonganti@1
|
115 }
|
kkonganti@1
|
116
|
kkonganti@1
|
117 // Function to get list of [ meta, [ fq1, fq2 ] ]
|
kkonganti@1
|
118 def create_fastq_channel(LinkedHashMap row) {
|
kkonganti@1
|
119
|
kkonganti@1
|
120 def meta = [:]
|
kkonganti@1
|
121 meta.id = row.sample
|
kkonganti@1
|
122 meta.single_end = row.single_end.toBoolean()
|
kkonganti@1
|
123 meta.strandedness = row.strandedness
|
kkonganti@1
|
124 meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1]
|
kkonganti@1
|
125 .join(params.fq_filename_delim)
|
kkonganti@1
|
126 meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id)
|
kkonganti@1
|
127
|
kkonganti@1
|
128 def array = []
|
kkonganti@1
|
129
|
kkonganti@1
|
130 if (!file(row.fq1).exists()) {
|
kkonganti@1
|
131 stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" +
|
kkonganti@1
|
132 "\n${row.fq1}")
|
kkonganti@1
|
133 }
|
kkonganti@1
|
134 if (meta.single_end) {
|
kkonganti@1
|
135 array = [ meta, [ file(row.fq1) ] ]
|
kkonganti@1
|
136 } else {
|
kkonganti@1
|
137 if (!file(row.fq2).exists()) {
|
kkonganti@1
|
138 stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" +
|
kkonganti@1
|
139 "\n${row.fq2}")
|
kkonganti@1
|
140 }
|
kkonganti@1
|
141 array = [ meta, [ file(row.fq1), file(row.fq2) ] ]
|
kkonganti@1
|
142 }
|
kkonganti@1
|
143 return array
|
kkonganti@1
|
144 } |