rliterman@0
|
1 #! /usr/bin/env nextflow
|
rliterman@0
|
2 nextflow.enable.dsl=2
|
rliterman@0
|
3
|
rliterman@0
|
4 // CSP2 Main Script
|
rliterman@0
|
5 // Params are read in from command line or from nextflow.config and/or conf/profiles.config
|
rliterman@0
|
6
|
rliterman@0
|
7 // Check if help flag was passed
|
rliterman@0
|
8 help1 = "${params.help}" == "nohelp" ? "nohelp" : "help"
|
rliterman@0
|
9 help2 = "${params.h}" == "nohelp" ? "nohelp" : "help"
|
rliterman@0
|
10
|
rliterman@0
|
11 def printHelp() {
|
rliterman@0
|
12 println """
|
rliterman@0
|
13 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
rliterman@0
|
14 CSP2
|
rliterman@0
|
15 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
rliterman@0
|
16
|
rliterman@0
|
17 Global default params:
|
rliterman@0
|
18
|
rliterman@0
|
19 --out Set name for output folder/file prefixes (Default: CSP2_<timestamp>)
|
rliterman@0
|
20 --outroot Set output parent directory (Default: CWD; Useful to hardset in nextflow.config if
|
rliterman@0
|
21 you want all output go to the same parent folder, with unique IDs set by --out)
|
rliterman@0
|
22 --tmp_dir Manually specify a TMP directory for pybedtools output
|
rliterman@0
|
23 --help/--h Display this help menu
|
rliterman@0
|
24
|
rliterman@0
|
25
|
rliterman@0
|
26 CSP2 can run in the following run modes:
|
rliterman@0
|
27
|
rliterman@0
|
28 --runmode Run mode for CSP2:
|
rliterman@0
|
29
|
rliterman@0
|
30 - assemble: Assemble read data (--reads/--ref_reads) into FASTA using SKESA
|
rliterman@0
|
31
|
rliterman@0
|
32 - align: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta),
|
rliterman@0
|
33 run MUMmer alignment analysis for each query/ref combination
|
rliterman@0
|
34
|
rliterman@0
|
35 - screen: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta)
|
rliterman@0
|
36 and/or MUMmer output (.snpdiffs), create a report for raw SNP
|
rliterman@0
|
37 distances between each query and reference assembly
|
rliterman@0
|
38
|
rliterman@0
|
39 - snp: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta)
|
rliterman@0
|
40 and/or MUMmer output (.snpdiffs), generate alignments and pairwise
|
rliterman@0
|
41 distances for all queries based on each reference dataset
|
rliterman@0
|
42
|
rliterman@0
|
43 Input Data:
|
rliterman@0
|
44
|
rliterman@0
|
45 --fasta Location for query isolate assembly data (.fasta/.fa/.fna). Can be a list of files, a path
|
rliterman@0
|
46 to a signle single FASTA, or a path to a directories with assemblies.
|
rliterman@0
|
47 --ref_fasta Location for reference isolate assembly data (.fasta/.fa/.fna). Can be a list of files, a
|
rliterman@0
|
48 path to a signle single FASTA, or a path to a directories with assemblies.
|
rliterman@0
|
49
|
rliterman@0
|
50 --reads Directory or list of directories containing query isolate read data
|
rliterman@0
|
51 --readext Read file extension (Default: fastq.gz)
|
rliterman@0
|
52 --forward Forward read file suffix (Default: _1.fastq.gz)
|
rliterman@0
|
53 --reverse Reverse read file suffix (Default: _2.fastq.gz)
|
rliterman@0
|
54
|
rliterman@0
|
55 --ref_reads Directory or list of directories containing reference isolate read data
|
rliterman@0
|
56 --ref_readext Reference read file extension (Default: fastq.gz)
|
rliterman@0
|
57 --ref_forward Reference forward read file suffix (Default: _1.fastq.gz)
|
rliterman@0
|
58 --ref_reverse Reference reverse read file suffix (Default: _2.fastq.gz)
|
rliterman@0
|
59
|
rliterman@0
|
60 --snpdiffs Location for pre-generated snpdiffs files (List of snpdiffs files, directory with snpdiffs)
|
rliterman@0
|
61
|
rliterman@0
|
62 --ref_id IDs to specify reference sequences (Comma-separated list; e.g., Sample_A,Sample_B,Sample_C)
|
rliterman@0
|
63
|
rliterman@0
|
64 --trim_name A common string to remove from all sample IDs (Default: ''; Useful if all assemblies end in
|
rliterman@0
|
65 something like "_contigs_skesa.fasta")
|
rliterman@0
|
66
|
rliterman@0
|
67 --n_ref If running in --runmode snp, the number of reference genomes for CSP2 to select if none are provided (Default: 1)
|
rliterman@0
|
68
|
rliterman@0
|
69 --exclude A comma-separated list of IDs to remove prior to analysis (Useful for removing low quality
|
rliterman@0
|
70 isolates in combination with --snpdiffs)
|
rliterman@0
|
71
|
rliterman@0
|
72 QC variables:
|
rliterman@0
|
73
|
rliterman@0
|
74 --min_cov Only consider queries if the reference genome is covered by at least <min_cov>% (Default: 85)
|
rliterman@0
|
75 --min_len Only consider SNPs from contig alignments longer than <min_len> bp (Default: 500)
|
rliterman@0
|
76 --min_iden Only consider SNPs from alignments with at least <min_iden> percent identity (Default: 99)
|
rliterman@0
|
77 --dwin A comma-separated set of window sizes for SNP density filters (Default: 1000,125,15; Set --dwin 0 to disable density filtering)
|
rliterman@0
|
78 --wsnps A comma-separated set of maximum SNP counts per window above (Default: 3,2,1)
|
rliterman@0
|
79 --max_missing If running in --runmode snp, mask SNPs where data is missing or purged from <max_missing>% of isolates (Default: 50)
|
rliterman@0
|
80
|
rliterman@0
|
81 Edge Trimming:
|
rliterman@0
|
82
|
rliterman@0
|
83 --ref_edge Don't include SNPs that fall within <ref_edge>bp of a reference contig edge (Default: 150)
|
rliterman@0
|
84 --query_edge Don't include SNPs that fall within <query_edge>bp of a query contig edge (Default: 150)
|
rliterman@0
|
85 --rescue If flagged (Default: not flagged), sites that were filtered out due solely to query edge proximity are rescued if
|
rliterman@0
|
86 the same reference position is covered more centrally by another query
|
rliterman@0
|
87 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
rliterman@0
|
88
|
rliterman@0
|
89 Example Commands:
|
rliterman@0
|
90
|
rliterman@0
|
91 1) Run CSP2 in SNP Pipeline mode using all the FASTA from /my/data/dir, and choose 3 references
|
rliterman@0
|
92
|
rliterman@0
|
93 nextflow run CSP2.nf --runmode snp --fasta /my/data/dir --n_ref 3
|
rliterman@0
|
94
|
rliterman@0
|
95 2) Screen all the paired-end .fastq files from /my/read/dir against the reference isolate in /my/reference/isolates.txt
|
rliterman@0
|
96
|
rliterman@0
|
97 nextflow run CSP2.nf --runmode screen --ref_fasta /my/reference/isolates.txt --reads /my/read/dir --readext .fastq --forward _1.fastq --reverse _2.fastq
|
rliterman@0
|
98
|
rliterman@0
|
99 3) Re-run the SNP pipeline using old snpdiffs files after changing the density filters and removing a bad sample
|
rliterman@0
|
100
|
rliterman@0
|
101 nextflow run CSP2.nf --runmode snp --snpdiffs /my/old/analysis/snpdiffs --dwin 5000,2500,1000 --wsnps 6,4,2 --ref_id Sample_A --exclude Sample_Q --out HQ_Density
|
rliterman@0
|
102
|
rliterman@0
|
103 4) Run in assembly mode and use HPC modules specified in profiles.config (NOTE: Setting the profile in nextflow uses a single hyphen (-) as compared to other arguments (--))
|
rliterman@0
|
104
|
rliterman@0
|
105 nextflow run CSP2.nf -profile myHPC --runmode assemble --reads /my/read/dir --out Assemblies
|
rliterman@0
|
106
|
rliterman@0
|
107 5) Run in SNP pipeline mode using SLURM and use the built in conda environment (NOTE: For local jobs using conda, use -profile standard_conda)
|
rliterman@0
|
108
|
rliterman@0
|
109 nextflow run CSP2.nf -profile slurm_conda --runmode snp --fasta /my/data/dir --out CSP2_Conda
|
rliterman@0
|
110
|
rliterman@0
|
111 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
rliterman@0
|
112 """
|
rliterman@0
|
113 System.exit(0)
|
rliterman@0
|
114 }
|
rliterman@0
|
115
|
rliterman@0
|
116 if (help1 == "help") {
|
rliterman@0
|
117 printHelp()
|
rliterman@0
|
118 } else if(help2 =="help"){
|
rliterman@0
|
119 printHelp()
|
rliterman@0
|
120 }
|
rliterman@0
|
121
|
rliterman@0
|
122 // Assess run mode
|
rliterman@0
|
123 if (params.runmode == "") {
|
rliterman@0
|
124 error "--runmode must be specified..."
|
rliterman@0
|
125 } else if (!['align','assemble', 'screen', 'snp','conda_init'].contains(params.runmode)){
|
rliterman@0
|
126 error "--runmode must be 'align','assemble', 'screen', or 'snp', not ${params.runmode}..."
|
rliterman@0
|
127 }
|
rliterman@0
|
128
|
rliterman@0
|
129 // If runmode is conda_init, launch a local process to spurn the generation of the conda environment and exit
|
rliterman@0
|
130 if (params.runmode != "conda_init") {
|
rliterman@0
|
131
|
rliterman@0
|
132 // Ensure necessary data is provided given the run mode
|
rliterman@0
|
133 // Runmode 'assemble'
|
rliterman@0
|
134 // - Requires: --reads/--ref_reads
|
rliterman@0
|
135 // - Runs SKESA and summarzies output FASTA
|
rliterman@0
|
136 if (params.runmode == "assemble"){
|
rliterman@0
|
137 if((params.reads == "") && (params.ref_reads == "")){
|
rliterman@0
|
138 error "Runmode is --assemble but no read data provided via --reads/--ref_reads"
|
rliterman@0
|
139 }
|
rliterman@0
|
140 }
|
rliterman@0
|
141
|
rliterman@0
|
142 // Runmode 'align'
|
rliterman@0
|
143 // - Requires: --reads/--fasta/--snpdiffs
|
rliterman@0
|
144 // - Optional: --ref_reads/--ref_fasta/--ref_id
|
rliterman@0
|
145 // - Runs MUMmer, generates .snpdiffs, and alignment summary.
|
rliterman@0
|
146 // - If references are provided via --ref_reads/--ref_fasta/--ref_id, non-reference samples are aligned to each reference
|
rliterman@0
|
147 // - If no references are provided, alignments are all-vs-all
|
rliterman@0
|
148 // - If --snpdiffs are provided, their FASTAs will be autodetected and, if present, used as queries or references as specified by --ref_reads/--ref_fasta/--ref_id
|
rliterman@0
|
149 // - Does NOT perform QC filtering
|
rliterman@0
|
150
|
rliterman@0
|
151 else if (params.runmode == "align"){
|
rliterman@0
|
152 if((params.fasta == "") && (params.reads == "") && (params.snpdiffs == "")){
|
rliterman@0
|
153 error "Runmode is --align but no query data provided via --fasta/--reads/--snpdiffs"
|
rliterman@0
|
154 }
|
rliterman@0
|
155 }
|
rliterman@0
|
156
|
rliterman@0
|
157 // Runmode 'screen'
|
rliterman@0
|
158 // - Requires: --reads/--fasta/--snpdiffs
|
rliterman@0
|
159 // - Optional: --ref_reads/--ref_fasta/--ref_id
|
rliterman@0
|
160 // - Generates .snpdiffs files (if needed), applies QC, and generates alignment summaries and SNP distance estimates
|
rliterman@0
|
161 // - If references are provided via --ref_reads/--ref_fasta/--ref_id, non-reference samples are aligned to each reference
|
rliterman@0
|
162 // - If no references are provided, alignments are all-vs-all
|
rliterman@0
|
163 // - If --snpdiffs are provided, (1) they will be QC filtered and included in the output report and (2) their FASTAs will be autodetected and, if present, used as queries or references as specified by --ref_reads/--ref_fasta/--ref_id
|
rliterman@0
|
164
|
rliterman@0
|
165 else if (params.runmode == "screen"){
|
rliterman@0
|
166 if((params.fasta == "") && (params.reads == "") && (params.snpdiffs == "")){
|
rliterman@0
|
167 error "Runmode is --screen but no query data provided via --snpdiffs/--reads/--fasta"
|
rliterman@0
|
168 }
|
rliterman@0
|
169 }
|
rliterman@0
|
170
|
rliterman@0
|
171 // Runmode 'snp'
|
rliterman@0
|
172 // - Requires: --reads/--fasta/--snpdiffs
|
rliterman@0
|
173 // - Optional: --ref_reads/--ref_fasta/--ref_id
|
rliterman@0
|
174 // - If references are not provided, runs RefChooser using all FASTAs to choose references (--n_ref sets how many references to choose)
|
rliterman@0
|
175 // - Each query is aligned to each reference, and pairwise SNP distances for all queries are generated based on that reference
|
rliterman@0
|
176 // - Generates .snpdiffs files (if needed), applies QC, and generates SNP distance data between all queries based on their alignment to each reference
|
rliterman@0
|
177 else if (params.runmode == "snp"){
|
rliterman@0
|
178 if((params.snpdiffs == "") && (params.fasta == "") && (params.reads == "")) {
|
rliterman@0
|
179 error "Runmode is --snp but no query data provided via --snpdiffs/--reads/--fasta"
|
rliterman@0
|
180 }
|
rliterman@0
|
181 }
|
rliterman@0
|
182
|
rliterman@0
|
183 // Set directory structure
|
rliterman@0
|
184 if (params.outroot == "") {
|
rliterman@0
|
185 output_directory = file(params.out)
|
rliterman@0
|
186 } else {
|
rliterman@0
|
187 out_root = file(params.outroot)
|
rliterman@0
|
188 output_directory = file("${out_root}/${params.out}")
|
rliterman@0
|
189 }
|
rliterman@0
|
190
|
rliterman@0
|
191 // If the output directory exists, create a new subdirectory with the default output name ("CSP2_<TIME>")
|
rliterman@0
|
192 if(!output_directory.getParent().isDirectory()){
|
rliterman@0
|
193 error "Parent directory for output (--outroot) is not a valid directory [${output_directory.getParent()}]..."
|
rliterman@0
|
194 } else if(output_directory.isDirectory()){
|
rliterman@0
|
195 output_directory = file("${output_directory}/CSP2_${new java.util.Date().getTime()}")
|
rliterman@0
|
196 output_directory.mkdirs()
|
rliterman@0
|
197 } else{
|
rliterman@0
|
198 output_directory.mkdirs()
|
rliterman@0
|
199 }
|
rliterman@0
|
200
|
rliterman@0
|
201 if(params.tmp_dir != ""){
|
rliterman@0
|
202 tempdir = file(params.tmp_dir)
|
rliterman@0
|
203
|
rliterman@0
|
204 if(tempdir.isDirectory()){
|
rliterman@0
|
205 temp_dir = file("${tempdir}/CSP2_${new java.util.Date().getTime()}_tmp")
|
rliterman@0
|
206 temp_dir.mkdirs()
|
rliterman@0
|
207 params.temp_dir = file(temp_dir)
|
rliterman@0
|
208
|
rliterman@0
|
209 } else if(tempdir.getParent().isDirectory()){
|
rliterman@0
|
210 tempdir.mkdirs()
|
rliterman@0
|
211 params.temp_dir = tempdir
|
rliterman@0
|
212 } else{
|
rliterman@0
|
213 error "Parent directory for temp directory --tmp_dir (${params.tmp_dir}) is not a valid directory..."
|
rliterman@0
|
214 }
|
rliterman@0
|
215 } else{
|
rliterman@0
|
216 params.temp_dir = ""
|
rliterman@0
|
217 }
|
rliterman@0
|
218
|
rliterman@0
|
219 // Set MUMmer and SNP directories
|
rliterman@0
|
220 mummer_directory = file("${output_directory}/MUMmer_Output")
|
rliterman@0
|
221 snpdiffs_directory = file("${output_directory}/snpdiffs")
|
rliterman@0
|
222 snp_directory = file("${output_directory}/SNP_Analysis")
|
rliterman@0
|
223
|
rliterman@0
|
224 // Set paths for output files
|
rliterman@0
|
225 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
|
rliterman@0
|
226 screening_results_file = file("${output_directory}/Screening_Results.tsv")
|
rliterman@0
|
227
|
rliterman@0
|
228 // In --runmode assembly, results save to output_directory
|
rliterman@0
|
229 if(params.runmode == "assemble"){
|
rliterman@0
|
230 ref_mode = false
|
rliterman@0
|
231
|
rliterman@0
|
232 log_directory = file("${output_directory}")
|
rliterman@0
|
233 assembly_directory = file("${output_directory}")
|
rliterman@0
|
234
|
rliterman@0
|
235 // Set dummy paths for log files
|
rliterman@0
|
236 screen_log_dir = file("${log_directory}/Screening_Logs")
|
rliterman@0
|
237 snp_log_dir = file("${log_directory}/SNP_Logs")
|
rliterman@0
|
238 ref_id_file = file("${log_directory}/Reference_IDs.txt")
|
rliterman@0
|
239 mummer_log_directory = file("${log_directory}/MUMmer_Logs")
|
rliterman@0
|
240 mash_dir = file("${log_directory}/sketch_dir")
|
rliterman@0
|
241
|
rliterman@0
|
242 } else{
|
rliterman@0
|
243
|
rliterman@0
|
244 // Get reference mode
|
rliterman@0
|
245 if(params.ref_reads == "" && params.ref_fasta == "" && params.ref_id == ""){
|
rliterman@0
|
246 ref_mode = false
|
rliterman@0
|
247 } else{
|
rliterman@0
|
248 ref_mode = true
|
rliterman@0
|
249 }
|
rliterman@0
|
250
|
rliterman@0
|
251 // Set directories
|
rliterman@0
|
252 log_directory = file("${output_directory}/logs")
|
rliterman@0
|
253 assembly_directory = file("${output_directory}/Assemblies")
|
rliterman@0
|
254 mummer_log_directory = file("${log_directory}/MUMmer_Logs")
|
rliterman@0
|
255 ref_id_file = file("${log_directory}/Reference_IDs.txt")
|
rliterman@0
|
256
|
rliterman@0
|
257 // Create directories
|
rliterman@0
|
258 log_directory.mkdirs()
|
rliterman@0
|
259 mummer_directory.mkdirs()
|
rliterman@0
|
260 mummer_log_directory.mkdirs()
|
rliterman@0
|
261 snpdiffs_directory.mkdirs()
|
rliterman@0
|
262
|
rliterman@0
|
263 // Touch Reference_IDs.txt to establish it
|
rliterman@0
|
264 file(ref_id_file).text = ''
|
rliterman@0
|
265
|
rliterman@0
|
266 // Set paths for log subdirectories
|
rliterman@0
|
267 screen_log_dir = file("${log_directory}/Screening_Logs")
|
rliterman@0
|
268 snp_log_dir = file("${log_directory}/SNP_Logs")
|
rliterman@0
|
269 mash_dir = file("${log_directory}/sketch_dir")
|
rliterman@0
|
270
|
rliterman@0
|
271 // If --reads/--ref_reads are provided, prepare a directory for assemblies
|
rliterman@0
|
272 if((params.reads != "") || (params.ref_reads != "")){
|
rliterman@0
|
273 assembly_directory.mkdirs()
|
rliterman@0
|
274 }
|
rliterman@0
|
275
|
rliterman@0
|
276 // If runmode is snp, prepare a directory for SNP analysis + logs
|
rliterman@0
|
277 if(params.runmode == "snp"){
|
rliterman@0
|
278 snp_directory.mkdirs()
|
rliterman@0
|
279 snp_log_dir.mkdirs()
|
rliterman@0
|
280 if(!ref_mode){
|
rliterman@0
|
281 mash_dir.mkdirs()
|
rliterman@0
|
282 }
|
rliterman@0
|
283 }
|
rliterman@0
|
284
|
rliterman@0
|
285 // If runmode is screen, prepare a directory for screening logs
|
rliterman@0
|
286 if(params.runmode == "screen"){
|
rliterman@0
|
287 screen_log_dir.mkdirs()
|
rliterman@0
|
288 }
|
rliterman@0
|
289 }
|
rliterman@0
|
290
|
rliterman@0
|
291 // Parameterize variables to pass between scripts
|
rliterman@0
|
292 params.output_directory = file(output_directory)
|
rliterman@0
|
293 params.log_directory = file(log_directory)
|
rliterman@0
|
294 params.screen_log_dir = file(screen_log_dir)
|
rliterman@0
|
295 params.snp_log_dir = file(snp_log_dir)
|
rliterman@0
|
296 params.assembly_directory = file(assembly_directory)
|
rliterman@0
|
297 params.mummer_directory = file(mummer_directory)
|
rliterman@0
|
298 params.mummer_log_directory = file(mummer_log_directory)
|
rliterman@0
|
299 params.snpdiffs_directory = file(snpdiffs_directory)
|
rliterman@0
|
300 params.snp_directory = file(snp_directory)
|
rliterman@0
|
301 params.ref_id_file = file(ref_id_file)
|
rliterman@0
|
302 params.mash_directory = file(mash_dir)
|
rliterman@0
|
303
|
rliterman@0
|
304 params.ref_mode = ref_mode
|
rliterman@0
|
305
|
rliterman@0
|
306 // Set up modules if needed
|
rliterman@0
|
307 params.load_python_module = params.python_module == "" ? "" : "module load -s ${params.python_module}"
|
rliterman@0
|
308 params.load_skesa_module = params.skesa_module == "" ? "" : "module load -s ${params.skesa_module}"
|
rliterman@0
|
309 params.load_bedtools_module = params.bedtools_module == "" ? "" : "module load -s ${params.bedtools_module}"
|
rliterman@0
|
310 params.load_bbtools_module = params.bbtools_module == "" ? "" : "module load -s ${params.bbtools_module}"
|
rliterman@0
|
311 params.load_mummer_module = params.mummer_module == "" ? "" : "module load -s ${params.mummer_module}"
|
rliterman@0
|
312 params.load_mash_module = params.mash_module == "" ? "" : "module load -s ${params.mash_module}"
|
rliterman@0
|
313
|
rliterman@0
|
314 // Save params to log file
|
rliterman@0
|
315 params.each { key, value ->
|
rliterman@0
|
316 file("${log_directory}/CSP2_Params.txt") << "$key = $value\n"
|
rliterman@0
|
317 }
|
rliterman@0
|
318 } else{
|
rliterman@0
|
319 params.output_directory = "./"
|
rliterman@0
|
320 params.log_directory = "./"
|
rliterman@0
|
321 params.screen_log_dir = "./"
|
rliterman@0
|
322 params.snp_log_dir = "./"
|
rliterman@0
|
323 params.assembly_directory = "./"
|
rliterman@0
|
324 params.mummer_directory = "./"
|
rliterman@0
|
325 params.mummer_log_directory = "./"
|
rliterman@0
|
326 params.snpdiffs_directory = "./"
|
rliterman@0
|
327 params.snp_directory = "./"
|
rliterman@0
|
328 params.ref_id_file = "./"
|
rliterman@0
|
329 params.mash_directory = "./"
|
rliterman@0
|
330 params.ref_mode = false
|
rliterman@0
|
331 params.load_python_module = ""
|
rliterman@0
|
332 params.load_skesa_module = ""
|
rliterman@0
|
333 params.load_bedtools_module = ""
|
rliterman@0
|
334 params.load_bbtools_module = ""
|
rliterman@0
|
335 params.load_mummer_module = ""
|
rliterman@0
|
336 params.load_mash_module = ""
|
rliterman@0
|
337 }
|
rliterman@0
|
338
|
rliterman@0
|
339 //////////////////////////////////////////////////////////////////////////////////////////
|
rliterman@0
|
340
|
rliterman@0
|
341 // Import modules
|
rliterman@0
|
342 include {fetchData} from "./subworkflows/fetchData/main.nf"
|
rliterman@0
|
343 include {alignGenomes} from "./subworkflows/alignData/main.nf"
|
rliterman@0
|
344 include {runScreen;runSNPPipeline} from "./subworkflows/snpdiffs/main.nf"
|
rliterman@0
|
345 include {runRefChooser} from "./subworkflows/refchooser/main.nf"
|
rliterman@0
|
346
|
rliterman@0
|
347 workflow{
|
rliterman@0
|
348
|
rliterman@0
|
349 if(params.runmode == "conda_init"){
|
rliterman@0
|
350 conda_init()
|
rliterman@0
|
351 } else{
|
rliterman@0
|
352 // Read in data
|
rliterman@0
|
353 input_data = fetchData()
|
rliterman@0
|
354
|
rliterman@0
|
355 query_data = input_data.query_data
|
rliterman@0
|
356 reference_data = input_data.reference_data
|
rliterman@0
|
357 snpdiffs_data = input_data.snpdiff_data
|
rliterman@0
|
358
|
rliterman@0
|
359 // Create channel for pre-aligned data [(Query_1,Query_2),SNPDiffs_File]
|
rliterman@0
|
360 prealigned = snpdiffs_data
|
rliterman@0
|
361 .map { it -> tuple([it[0], it[1]].sort().join(',').toString(), it[2]) }
|
rliterman@0
|
362 .unique{it -> it[0]}
|
rliterman@0
|
363
|
rliterman@0
|
364 // If run mode is 'assemble', tasks are complete
|
rliterman@0
|
365 if((params.runmode == "align") || (params.runmode == "screen") || (params.runmode == "snp")){
|
rliterman@0
|
366
|
rliterman@0
|
367 // If there is no reference data, align all query_data against each other
|
rliterman@0
|
368 if(!ref_mode){
|
rliterman@0
|
369
|
rliterman@0
|
370 if((params.runmode == "align") || (params.runmode == "screen")){
|
rliterman@0
|
371 seen_combinations = []
|
rliterman@0
|
372
|
rliterman@0
|
373 to_align = query_data.combine(query_data) // Self-combine query data
|
rliterman@0
|
374 .collect().flatten().collate(4)
|
rliterman@0
|
375 .filter{it -> (it[1].toString() != "null") && (it[3].toString() != "null")} // Can't align without FASTA
|
rliterman@0
|
376 .filter{ it -> // Get unique combinations
|
rliterman@0
|
377
|
rliterman@0
|
378 combination = ["${it[0]}", "${it[2]}"].sort()
|
rliterman@0
|
379
|
rliterman@0
|
380 if(combination in seen_combinations) {
|
rliterman@0
|
381 return false
|
rliterman@0
|
382 } else {
|
rliterman@0
|
383 seen_combinations << combination
|
rliterman@0
|
384 return true
|
rliterman@0
|
385 }}
|
rliterman@0
|
386 }
|
rliterman@0
|
387
|
rliterman@0
|
388 // If running SNP pipeline without references, run RefChooser to choose references
|
rliterman@0
|
389 else if(params.runmode == "snp"){
|
rliterman@0
|
390 reference_data = runRefChooser(query_data)
|
rliterman@0
|
391
|
rliterman@0
|
392 to_align = query_data
|
rliterman@0
|
393 .combine(reference_data)
|
rliterman@0
|
394 .filter{it -> (it[1].toString() != "null") && (it[3].toString() != "null")} // Can't align without FASTA
|
rliterman@0
|
395 }
|
rliterman@0
|
396 }
|
rliterman@0
|
397
|
rliterman@0
|
398 // If references are provided, align all queries against all references
|
rliterman@0
|
399 else{
|
rliterman@0
|
400 to_align = query_data
|
rliterman@0
|
401 .combine(reference_data)
|
rliterman@0
|
402 .filter{it -> (it[1].toString() != "null") && (it[3].toString() != "null")} // Can't align without FASTA
|
rliterman@0
|
403 }
|
rliterman@0
|
404
|
rliterman@0
|
405 // Don't align things that are already aligned via --snpdiffs
|
rliterman@0
|
406 unaligned = to_align
|
rliterman@0
|
407 .map { it -> tuple([it[0], it[2]].sort().join(',').toString(),it[0], it[1], it[2], it[3]) }
|
rliterman@0
|
408 .unique{it -> it[0]}
|
rliterman@0
|
409 .join(prealigned, by:0, remainder:true)
|
rliterman@0
|
410 .filter{it -> it[5].toString() == "null"}
|
rliterman@0
|
411 .map{it -> [it[1], it[2], it[3], it[4]]}
|
rliterman@0
|
412 | collect | flatten | collate(4)
|
rliterman@0
|
413
|
rliterman@0
|
414 all_snpdiffs = alignGenomes(unaligned,snpdiffs_data)
|
rliterman@0
|
415 .ifEmpty { error "No .snpdiffs to process..." }
|
rliterman@0
|
416 .collect().flatten().collate(3)
|
rliterman@0
|
417
|
rliterman@0
|
418 if(params.runmode == "screen"){
|
rliterman@0
|
419 runScreen(all_snpdiffs)
|
rliterman@0
|
420 } else if(params.runmode == "snp"){
|
rliterman@0
|
421 runSNPPipeline(all_snpdiffs,reference_data)
|
rliterman@0
|
422 }
|
rliterman@0
|
423 }
|
rliterman@0
|
424 }
|
rliterman@0
|
425 }
|
rliterman@0
|
426
|
rliterman@0
|
427 // Dummy process to stimulate conda env generation
|
rliterman@0
|
428 process conda_init {
|
rliterman@0
|
429 executor = 'local'
|
rliterman@0
|
430 cpus = 1
|
rliterman@0
|
431 maxForks = 1
|
rliterman@0
|
432
|
rliterman@0
|
433 script:
|
rliterman@0
|
434 """
|
rliterman@0
|
435 """
|
rliterman@0
|
436 } |