annotate CSP2/CSP2.nf @ 1:9df7494b1398

"planemo upload"
author rliterman
date Mon, 02 Dec 2024 11:24:02 -0500
parents 01431fa12065
children
rev   line source
rliterman@0 1 #! /usr/bin/env nextflow
rliterman@0 2 nextflow.enable.dsl=2
rliterman@0 3
rliterman@0 4 // CSP2 Main Script
rliterman@0 5 // Params are read in from command line or from nextflow.config and/or conf/profiles.config
rliterman@0 6
rliterman@0 7 // Check if help flag was passed
rliterman@0 8 help1 = "${params.help}" == "nohelp" ? "nohelp" : "help"
rliterman@0 9 help2 = "${params.h}" == "nohelp" ? "nohelp" : "help"
rliterman@0 10
rliterman@0 11 def printHelp() {
rliterman@0 12 println """
rliterman@0 13 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rliterman@0 14 CSP2
rliterman@0 15 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rliterman@0 16
rliterman@0 17 Global default params:
rliterman@0 18
rliterman@0 19 --out Set name for output folder/file prefixes (Default: CSP2_<timestamp>)
rliterman@0 20 --outroot Set output parent directory (Default: CWD; Useful to hardset in nextflow.config if
rliterman@0 21 you want all output go to the same parent folder, with unique IDs set by --out)
rliterman@0 22 --tmp_dir Manually specify a TMP directory for pybedtools output
rliterman@0 23 --help/--h Display this help menu
rliterman@0 24
rliterman@0 25
rliterman@0 26 CSP2 can run in the following run modes:
rliterman@0 27
rliterman@0 28 --runmode Run mode for CSP2:
rliterman@0 29
rliterman@0 30 - assemble: Assemble read data (--reads/--ref_reads) into FASTA using SKESA
rliterman@0 31
rliterman@0 32 - align: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta),
rliterman@0 33 run MUMmer alignment analysis for each query/ref combination
rliterman@0 34
rliterman@0 35 - screen: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta)
rliterman@0 36 and/or MUMmer output (.snpdiffs), create a report for raw SNP
rliterman@0 37 distances between each query and reference assembly
rliterman@0 38
rliterman@0 39 - snp: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta)
rliterman@0 40 and/or MUMmer output (.snpdiffs), generate alignments and pairwise
rliterman@0 41 distances for all queries based on each reference dataset
rliterman@0 42
rliterman@0 43 Input Data:
rliterman@0 44
rliterman@0 45 --fasta Location for query isolate assembly data (.fasta/.fa/.fna). Can be a list of files, a path
rliterman@0 46 to a signle single FASTA, or a path to a directories with assemblies.
rliterman@0 47 --ref_fasta Location for reference isolate assembly data (.fasta/.fa/.fna). Can be a list of files, a
rliterman@0 48 path to a signle single FASTA, or a path to a directories with assemblies.
rliterman@0 49
rliterman@0 50 --reads Directory or list of directories containing query isolate read data
rliterman@0 51 --readext Read file extension (Default: fastq.gz)
rliterman@0 52 --forward Forward read file suffix (Default: _1.fastq.gz)
rliterman@0 53 --reverse Reverse read file suffix (Default: _2.fastq.gz)
rliterman@0 54
rliterman@0 55 --ref_reads Directory or list of directories containing reference isolate read data
rliterman@0 56 --ref_readext Reference read file extension (Default: fastq.gz)
rliterman@0 57 --ref_forward Reference forward read file suffix (Default: _1.fastq.gz)
rliterman@0 58 --ref_reverse Reference reverse read file suffix (Default: _2.fastq.gz)
rliterman@0 59
rliterman@0 60 --snpdiffs Location for pre-generated snpdiffs files (List of snpdiffs files, directory with snpdiffs)
rliterman@0 61
rliterman@0 62 --ref_id IDs to specify reference sequences (Comma-separated list; e.g., Sample_A,Sample_B,Sample_C)
rliterman@0 63
rliterman@0 64 --trim_name A common string to remove from all sample IDs (Default: ''; Useful if all assemblies end in
rliterman@0 65 something like "_contigs_skesa.fasta")
rliterman@0 66
rliterman@0 67 --n_ref If running in --runmode snp, the number of reference genomes for CSP2 to select if none are provided (Default: 1)
rliterman@0 68
rliterman@0 69 --exclude A comma-separated list of IDs to remove prior to analysis (Useful for removing low quality
rliterman@0 70 isolates in combination with --snpdiffs)
rliterman@0 71
rliterman@0 72 QC variables:
rliterman@0 73
rliterman@0 74 --min_cov Only consider queries if the reference genome is covered by at least <min_cov>% (Default: 85)
rliterman@0 75 --min_len Only consider SNPs from contig alignments longer than <min_len> bp (Default: 500)
rliterman@0 76 --min_iden Only consider SNPs from alignments with at least <min_iden> percent identity (Default: 99)
rliterman@0 77 --dwin A comma-separated set of window sizes for SNP density filters (Default: 1000,125,15; Set --dwin 0 to disable density filtering)
rliterman@0 78 --wsnps A comma-separated set of maximum SNP counts per window above (Default: 3,2,1)
rliterman@0 79 --max_missing If running in --runmode snp, mask SNPs where data is missing or purged from <max_missing>% of isolates (Default: 50)
rliterman@0 80
rliterman@0 81 Edge Trimming:
rliterman@0 82
rliterman@0 83 --ref_edge Don't include SNPs that fall within <ref_edge>bp of a reference contig edge (Default: 150)
rliterman@0 84 --query_edge Don't include SNPs that fall within <query_edge>bp of a query contig edge (Default: 150)
rliterman@0 85 --rescue If flagged (Default: not flagged), sites that were filtered out due solely to query edge proximity are rescued if
rliterman@0 86 the same reference position is covered more centrally by another query
rliterman@0 87 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rliterman@0 88
rliterman@0 89 Example Commands:
rliterman@0 90
rliterman@0 91 1) Run CSP2 in SNP Pipeline mode using all the FASTA from /my/data/dir, and choose 3 references
rliterman@0 92
rliterman@0 93 nextflow run CSP2.nf --runmode snp --fasta /my/data/dir --n_ref 3
rliterman@0 94
rliterman@0 95 2) Screen all the paired-end .fastq files from /my/read/dir against the reference isolate in /my/reference/isolates.txt
rliterman@0 96
rliterman@0 97 nextflow run CSP2.nf --runmode screen --ref_fasta /my/reference/isolates.txt --reads /my/read/dir --readext .fastq --forward _1.fastq --reverse _2.fastq
rliterman@0 98
rliterman@0 99 3) Re-run the SNP pipeline using old snpdiffs files after changing the density filters and removing a bad sample
rliterman@0 100
rliterman@0 101 nextflow run CSP2.nf --runmode snp --snpdiffs /my/old/analysis/snpdiffs --dwin 5000,2500,1000 --wsnps 6,4,2 --ref_id Sample_A --exclude Sample_Q --out HQ_Density
rliterman@0 102
rliterman@0 103 4) Run in assembly mode and use HPC modules specified in profiles.config (NOTE: Setting the profile in nextflow uses a single hyphen (-) as compared to other arguments (--))
rliterman@0 104
rliterman@0 105 nextflow run CSP2.nf -profile myHPC --runmode assemble --reads /my/read/dir --out Assemblies
rliterman@0 106
rliterman@0 107 5) Run in SNP pipeline mode using SLURM and use the built in conda environment (NOTE: For local jobs using conda, use -profile standard_conda)
rliterman@0 108
rliterman@0 109 nextflow run CSP2.nf -profile slurm_conda --runmode snp --fasta /my/data/dir --out CSP2_Conda
rliterman@0 110
rliterman@0 111 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rliterman@0 112 """
rliterman@0 113 System.exit(0)
rliterman@0 114 }
rliterman@0 115
rliterman@0 116 if (help1 == "help") {
rliterman@0 117 printHelp()
rliterman@0 118 } else if(help2 =="help"){
rliterman@0 119 printHelp()
rliterman@0 120 }
rliterman@0 121
rliterman@0 122 // Assess run mode
rliterman@0 123 if (params.runmode == "") {
rliterman@0 124 error "--runmode must be specified..."
rliterman@0 125 } else if (!['align','assemble', 'screen', 'snp','conda_init'].contains(params.runmode)){
rliterman@0 126 error "--runmode must be 'align','assemble', 'screen', or 'snp', not ${params.runmode}..."
rliterman@0 127 }
rliterman@0 128
rliterman@0 129 // If runmode is conda_init, launch a local process to spurn the generation of the conda environment and exit
rliterman@0 130 if (params.runmode != "conda_init") {
rliterman@0 131
rliterman@0 132 // Ensure necessary data is provided given the run mode
rliterman@0 133 // Runmode 'assemble'
rliterman@0 134 // - Requires: --reads/--ref_reads
rliterman@0 135 // - Runs SKESA and summarzies output FASTA
rliterman@0 136 if (params.runmode == "assemble"){
rliterman@0 137 if((params.reads == "") && (params.ref_reads == "")){
rliterman@0 138 error "Runmode is --assemble but no read data provided via --reads/--ref_reads"
rliterman@0 139 }
rliterman@0 140 }
rliterman@0 141
rliterman@0 142 // Runmode 'align'
rliterman@0 143 // - Requires: --reads/--fasta/--snpdiffs
rliterman@0 144 // - Optional: --ref_reads/--ref_fasta/--ref_id
rliterman@0 145 // - Runs MUMmer, generates .snpdiffs, and alignment summary.
rliterman@0 146 // - If references are provided via --ref_reads/--ref_fasta/--ref_id, non-reference samples are aligned to each reference
rliterman@0 147 // - If no references are provided, alignments are all-vs-all
rliterman@0 148 // - If --snpdiffs are provided, their FASTAs will be autodetected and, if present, used as queries or references as specified by --ref_reads/--ref_fasta/--ref_id
rliterman@0 149 // - Does NOT perform QC filtering
rliterman@0 150
rliterman@0 151 else if (params.runmode == "align"){
rliterman@0 152 if((params.fasta == "") && (params.reads == "") && (params.snpdiffs == "")){
rliterman@0 153 error "Runmode is --align but no query data provided via --fasta/--reads/--snpdiffs"
rliterman@0 154 }
rliterman@0 155 }
rliterman@0 156
rliterman@0 157 // Runmode 'screen'
rliterman@0 158 // - Requires: --reads/--fasta/--snpdiffs
rliterman@0 159 // - Optional: --ref_reads/--ref_fasta/--ref_id
rliterman@0 160 // - Generates .snpdiffs files (if needed), applies QC, and generates alignment summaries and SNP distance estimates
rliterman@0 161 // - If references are provided via --ref_reads/--ref_fasta/--ref_id, non-reference samples are aligned to each reference
rliterman@0 162 // - If no references are provided, alignments are all-vs-all
rliterman@0 163 // - If --snpdiffs are provided, (1) they will be QC filtered and included in the output report and (2) their FASTAs will be autodetected and, if present, used as queries or references as specified by --ref_reads/--ref_fasta/--ref_id
rliterman@0 164
rliterman@0 165 else if (params.runmode == "screen"){
rliterman@0 166 if((params.fasta == "") && (params.reads == "") && (params.snpdiffs == "")){
rliterman@0 167 error "Runmode is --screen but no query data provided via --snpdiffs/--reads/--fasta"
rliterman@0 168 }
rliterman@0 169 }
rliterman@0 170
rliterman@0 171 // Runmode 'snp'
rliterman@0 172 // - Requires: --reads/--fasta/--snpdiffs
rliterman@0 173 // - Optional: --ref_reads/--ref_fasta/--ref_id
rliterman@0 174 // - If references are not provided, runs RefChooser using all FASTAs to choose references (--n_ref sets how many references to choose)
rliterman@0 175 // - Each query is aligned to each reference, and pairwise SNP distances for all queries are generated based on that reference
rliterman@0 176 // - Generates .snpdiffs files (if needed), applies QC, and generates SNP distance data between all queries based on their alignment to each reference
rliterman@0 177 else if (params.runmode == "snp"){
rliterman@0 178 if((params.snpdiffs == "") && (params.fasta == "") && (params.reads == "")) {
rliterman@0 179 error "Runmode is --snp but no query data provided via --snpdiffs/--reads/--fasta"
rliterman@0 180 }
rliterman@0 181 }
rliterman@0 182
rliterman@0 183 // Set directory structure
rliterman@0 184 if (params.outroot == "") {
rliterman@0 185 output_directory = file(params.out)
rliterman@0 186 } else {
rliterman@0 187 out_root = file(params.outroot)
rliterman@0 188 output_directory = file("${out_root}/${params.out}")
rliterman@0 189 }
rliterman@0 190
rliterman@0 191 // If the output directory exists, create a new subdirectory with the default output name ("CSP2_<TIME>")
rliterman@0 192 if(!output_directory.getParent().isDirectory()){
rliterman@0 193 error "Parent directory for output (--outroot) is not a valid directory [${output_directory.getParent()}]..."
rliterman@0 194 } else if(output_directory.isDirectory()){
rliterman@0 195 output_directory = file("${output_directory}/CSP2_${new java.util.Date().getTime()}")
rliterman@0 196 output_directory.mkdirs()
rliterman@0 197 } else{
rliterman@0 198 output_directory.mkdirs()
rliterman@0 199 }
rliterman@0 200
rliterman@0 201 if(params.tmp_dir != ""){
rliterman@0 202 tempdir = file(params.tmp_dir)
rliterman@0 203
rliterman@0 204 if(tempdir.isDirectory()){
rliterman@0 205 temp_dir = file("${tempdir}/CSP2_${new java.util.Date().getTime()}_tmp")
rliterman@0 206 temp_dir.mkdirs()
rliterman@0 207 params.temp_dir = file(temp_dir)
rliterman@0 208
rliterman@0 209 } else if(tempdir.getParent().isDirectory()){
rliterman@0 210 tempdir.mkdirs()
rliterman@0 211 params.temp_dir = tempdir
rliterman@0 212 } else{
rliterman@0 213 error "Parent directory for temp directory --tmp_dir (${params.tmp_dir}) is not a valid directory..."
rliterman@0 214 }
rliterman@0 215 } else{
rliterman@0 216 params.temp_dir = ""
rliterman@0 217 }
rliterman@0 218
rliterman@0 219 // Set MUMmer and SNP directories
rliterman@0 220 mummer_directory = file("${output_directory}/MUMmer_Output")
rliterman@0 221 snpdiffs_directory = file("${output_directory}/snpdiffs")
rliterman@0 222 snp_directory = file("${output_directory}/SNP_Analysis")
rliterman@0 223
rliterman@0 224 // Set paths for output files
rliterman@0 225 isolate_data_file = file("${output_directory}/Isolate_Data.tsv")
rliterman@0 226 screening_results_file = file("${output_directory}/Screening_Results.tsv")
rliterman@0 227
rliterman@0 228 // In --runmode assembly, results save to output_directory
rliterman@0 229 if(params.runmode == "assemble"){
rliterman@0 230 ref_mode = false
rliterman@0 231
rliterman@0 232 log_directory = file("${output_directory}")
rliterman@0 233 assembly_directory = file("${output_directory}")
rliterman@0 234
rliterman@0 235 // Set dummy paths for log files
rliterman@0 236 screen_log_dir = file("${log_directory}/Screening_Logs")
rliterman@0 237 snp_log_dir = file("${log_directory}/SNP_Logs")
rliterman@0 238 ref_id_file = file("${log_directory}/Reference_IDs.txt")
rliterman@0 239 mummer_log_directory = file("${log_directory}/MUMmer_Logs")
rliterman@0 240 mash_dir = file("${log_directory}/sketch_dir")
rliterman@0 241
rliterman@0 242 } else{
rliterman@0 243
rliterman@0 244 // Get reference mode
rliterman@0 245 if(params.ref_reads == "" && params.ref_fasta == "" && params.ref_id == ""){
rliterman@0 246 ref_mode = false
rliterman@0 247 } else{
rliterman@0 248 ref_mode = true
rliterman@0 249 }
rliterman@0 250
rliterman@0 251 // Set directories
rliterman@0 252 log_directory = file("${output_directory}/logs")
rliterman@0 253 assembly_directory = file("${output_directory}/Assemblies")
rliterman@0 254 mummer_log_directory = file("${log_directory}/MUMmer_Logs")
rliterman@0 255 ref_id_file = file("${log_directory}/Reference_IDs.txt")
rliterman@0 256
rliterman@0 257 // Create directories
rliterman@0 258 log_directory.mkdirs()
rliterman@0 259 mummer_directory.mkdirs()
rliterman@0 260 mummer_log_directory.mkdirs()
rliterman@0 261 snpdiffs_directory.mkdirs()
rliterman@0 262
rliterman@0 263 // Touch Reference_IDs.txt to establish it
rliterman@0 264 file(ref_id_file).text = ''
rliterman@0 265
rliterman@0 266 // Set paths for log subdirectories
rliterman@0 267 screen_log_dir = file("${log_directory}/Screening_Logs")
rliterman@0 268 snp_log_dir = file("${log_directory}/SNP_Logs")
rliterman@0 269 mash_dir = file("${log_directory}/sketch_dir")
rliterman@0 270
rliterman@0 271 // If --reads/--ref_reads are provided, prepare a directory for assemblies
rliterman@0 272 if((params.reads != "") || (params.ref_reads != "")){
rliterman@0 273 assembly_directory.mkdirs()
rliterman@0 274 }
rliterman@0 275
rliterman@0 276 // If runmode is snp, prepare a directory for SNP analysis + logs
rliterman@0 277 if(params.runmode == "snp"){
rliterman@0 278 snp_directory.mkdirs()
rliterman@0 279 snp_log_dir.mkdirs()
rliterman@0 280 if(!ref_mode){
rliterman@0 281 mash_dir.mkdirs()
rliterman@0 282 }
rliterman@0 283 }
rliterman@0 284
rliterman@0 285 // If runmode is screen, prepare a directory for screening logs
rliterman@0 286 if(params.runmode == "screen"){
rliterman@0 287 screen_log_dir.mkdirs()
rliterman@0 288 }
rliterman@0 289 }
rliterman@0 290
rliterman@0 291 // Parameterize variables to pass between scripts
rliterman@0 292 params.output_directory = file(output_directory)
rliterman@0 293 params.log_directory = file(log_directory)
rliterman@0 294 params.screen_log_dir = file(screen_log_dir)
rliterman@0 295 params.snp_log_dir = file(snp_log_dir)
rliterman@0 296 params.assembly_directory = file(assembly_directory)
rliterman@0 297 params.mummer_directory = file(mummer_directory)
rliterman@0 298 params.mummer_log_directory = file(mummer_log_directory)
rliterman@0 299 params.snpdiffs_directory = file(snpdiffs_directory)
rliterman@0 300 params.snp_directory = file(snp_directory)
rliterman@0 301 params.ref_id_file = file(ref_id_file)
rliterman@0 302 params.mash_directory = file(mash_dir)
rliterman@0 303
rliterman@0 304 params.ref_mode = ref_mode
rliterman@0 305
rliterman@0 306 // Set up modules if needed
rliterman@0 307 params.load_python_module = params.python_module == "" ? "" : "module load -s ${params.python_module}"
rliterman@0 308 params.load_skesa_module = params.skesa_module == "" ? "" : "module load -s ${params.skesa_module}"
rliterman@0 309 params.load_bedtools_module = params.bedtools_module == "" ? "" : "module load -s ${params.bedtools_module}"
rliterman@0 310 params.load_bbtools_module = params.bbtools_module == "" ? "" : "module load -s ${params.bbtools_module}"
rliterman@0 311 params.load_mummer_module = params.mummer_module == "" ? "" : "module load -s ${params.mummer_module}"
rliterman@0 312 params.load_mash_module = params.mash_module == "" ? "" : "module load -s ${params.mash_module}"
rliterman@0 313
rliterman@0 314 // Save params to log file
rliterman@0 315 params.each { key, value ->
rliterman@0 316 file("${log_directory}/CSP2_Params.txt") << "$key = $value\n"
rliterman@0 317 }
rliterman@0 318 } else{
rliterman@0 319 params.output_directory = "./"
rliterman@0 320 params.log_directory = "./"
rliterman@0 321 params.screen_log_dir = "./"
rliterman@0 322 params.snp_log_dir = "./"
rliterman@0 323 params.assembly_directory = "./"
rliterman@0 324 params.mummer_directory = "./"
rliterman@0 325 params.mummer_log_directory = "./"
rliterman@0 326 params.snpdiffs_directory = "./"
rliterman@0 327 params.snp_directory = "./"
rliterman@0 328 params.ref_id_file = "./"
rliterman@0 329 params.mash_directory = "./"
rliterman@0 330 params.ref_mode = false
rliterman@0 331 params.load_python_module = ""
rliterman@0 332 params.load_skesa_module = ""
rliterman@0 333 params.load_bedtools_module = ""
rliterman@0 334 params.load_bbtools_module = ""
rliterman@0 335 params.load_mummer_module = ""
rliterman@0 336 params.load_mash_module = ""
rliterman@0 337 }
rliterman@0 338
rliterman@0 339 //////////////////////////////////////////////////////////////////////////////////////////
rliterman@0 340
rliterman@0 341 // Import modules
rliterman@0 342 include {fetchData} from "./subworkflows/fetchData/main.nf"
rliterman@0 343 include {alignGenomes} from "./subworkflows/alignData/main.nf"
rliterman@0 344 include {runScreen;runSNPPipeline} from "./subworkflows/snpdiffs/main.nf"
rliterman@0 345 include {runRefChooser} from "./subworkflows/refchooser/main.nf"
rliterman@0 346
rliterman@0 347 workflow{
rliterman@0 348
rliterman@0 349 if(params.runmode == "conda_init"){
rliterman@0 350 conda_init()
rliterman@0 351 } else{
rliterman@0 352 // Read in data
rliterman@0 353 input_data = fetchData()
rliterman@0 354
rliterman@0 355 query_data = input_data.query_data
rliterman@0 356 reference_data = input_data.reference_data
rliterman@0 357 snpdiffs_data = input_data.snpdiff_data
rliterman@0 358
rliterman@0 359 // Create channel for pre-aligned data [(Query_1,Query_2),SNPDiffs_File]
rliterman@0 360 prealigned = snpdiffs_data
rliterman@0 361 .map { it -> tuple([it[0], it[1]].sort().join(',').toString(), it[2]) }
rliterman@0 362 .unique{it -> it[0]}
rliterman@0 363
rliterman@0 364 // If run mode is 'assemble', tasks are complete
rliterman@0 365 if((params.runmode == "align") || (params.runmode == "screen") || (params.runmode == "snp")){
rliterman@0 366
rliterman@0 367 // If there is no reference data, align all query_data against each other
rliterman@0 368 if(!ref_mode){
rliterman@0 369
rliterman@0 370 if((params.runmode == "align") || (params.runmode == "screen")){
rliterman@0 371 seen_combinations = []
rliterman@0 372
rliterman@0 373 to_align = query_data.combine(query_data) // Self-combine query data
rliterman@0 374 .collect().flatten().collate(4)
rliterman@0 375 .filter{it -> (it[1].toString() != "null") && (it[3].toString() != "null")} // Can't align without FASTA
rliterman@0 376 .filter{ it -> // Get unique combinations
rliterman@0 377
rliterman@0 378 combination = ["${it[0]}", "${it[2]}"].sort()
rliterman@0 379
rliterman@0 380 if(combination in seen_combinations) {
rliterman@0 381 return false
rliterman@0 382 } else {
rliterman@0 383 seen_combinations << combination
rliterman@0 384 return true
rliterman@0 385 }}
rliterman@0 386 }
rliterman@0 387
rliterman@0 388 // If running SNP pipeline without references, run RefChooser to choose references
rliterman@0 389 else if(params.runmode == "snp"){
rliterman@0 390 reference_data = runRefChooser(query_data)
rliterman@0 391
rliterman@0 392 to_align = query_data
rliterman@0 393 .combine(reference_data)
rliterman@0 394 .filter{it -> (it[1].toString() != "null") && (it[3].toString() != "null")} // Can't align without FASTA
rliterman@0 395 }
rliterman@0 396 }
rliterman@0 397
rliterman@0 398 // If references are provided, align all queries against all references
rliterman@0 399 else{
rliterman@0 400 to_align = query_data
rliterman@0 401 .combine(reference_data)
rliterman@0 402 .filter{it -> (it[1].toString() != "null") && (it[3].toString() != "null")} // Can't align without FASTA
rliterman@0 403 }
rliterman@0 404
rliterman@0 405 // Don't align things that are already aligned via --snpdiffs
rliterman@0 406 unaligned = to_align
rliterman@0 407 .map { it -> tuple([it[0], it[2]].sort().join(',').toString(),it[0], it[1], it[2], it[3]) }
rliterman@0 408 .unique{it -> it[0]}
rliterman@0 409 .join(prealigned, by:0, remainder:true)
rliterman@0 410 .filter{it -> it[5].toString() == "null"}
rliterman@0 411 .map{it -> [it[1], it[2], it[3], it[4]]}
rliterman@0 412 | collect | flatten | collate(4)
rliterman@0 413
rliterman@0 414 all_snpdiffs = alignGenomes(unaligned,snpdiffs_data)
rliterman@0 415 .ifEmpty { error "No .snpdiffs to process..." }
rliterman@0 416 .collect().flatten().collate(3)
rliterman@0 417
rliterman@0 418 if(params.runmode == "screen"){
rliterman@0 419 runScreen(all_snpdiffs)
rliterman@0 420 } else if(params.runmode == "snp"){
rliterman@0 421 runSNPPipeline(all_snpdiffs,reference_data)
rliterman@0 422 }
rliterman@0 423 }
rliterman@0 424 }
rliterman@0 425 }
rliterman@0 426
rliterman@0 427 // Dummy process to stimulate conda env generation
rliterman@0 428 process conda_init {
rliterman@0 429 executor = 'local'
rliterman@0 430 cpus = 1
rliterman@0 431 maxForks = 1
rliterman@0 432
rliterman@0 433 script:
rliterman@0 434 """
rliterman@0 435 """
rliterman@0 436 }