annotate CSP2/nextflow.config @ 29:b6ec322b1f05

"planemo upload"
author rliterman
date Wed, 04 Dec 2024 16:02:07 -0500
parents 893a6993efe3
children 93393808f415
rev   line source
rliterman@0 1 /*
rliterman@0 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rliterman@0 3 CSP2 Nextflow config file
rliterman@0 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rliterman@0 5 Default config options for all compute environments
rliterman@0 6 ----------------------------------------------------------------------------------------
rliterman@0 7 */
rliterman@0 8
rliterman@0 9 // Enable conda
rliterman@0 10 conda.enabled = true
rliterman@0 11
rliterman@0 12 // Import profile settings
rliterman@0 13 includeConfig "conf/profiles.config"
rliterman@0 14
rliterman@15 15 // CPU/memory settings
rliterman@15 16 process {
rliterman@15 17 withLabel: 'mummerMem' {
rliterman@28 18 task_name = 'CSP2-MUMmer'
rliterman@22 19 cpus = 1
rliterman@28 20 //memory = '4 GB'
rliterman@15 21 }
rliterman@28 22 withLabel: 'skesaMem' {
rliterman@28 23 task_name = 'CSP2-SKESA'
rliterman@28 24 //memory = '12 GB'
rliterman@28 25 }
rliterman@15 26 }
rliterman@15 27
rliterman@0 28 // Global default params
rliterman@0 29 params {
rliterman@0 30
rliterman@0 31 // Setting output directory
rliterman@0 32
rliterman@0 33 // Set name for output folder/file prefixes
rliterman@0 34 out = "CSP2_${new java.util.Date().getTime()}"
rliterman@0 35
rliterman@0 36 // Set output parent directory [Default: CWD; Set this to have all output go to the same parent folder, with unique IDs set by --out]
rliterman@0 37 outroot = ""
rliterman@0 38
rliterman@0 39 // CSP2 can run in the following run-modes:
rliterman@0 40
rliterman@0 41 // assemble: Assemble read data (--reads/--ref_reads) into FASTA via SKESA (ignores --fasta/--ref_fasta/--snpdiffs)
rliterman@0 42 // align: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta), run MUMmer alignment analysis for each query/ref combination (ignores --snpdiffs)
rliterman@0 43 // screen: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta) and/or MUMmer output (.snpdiffs), create a report for raw SNP distances between each query and reference assembly
rliterman@0 44 // snp: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta) and/or MUMmer output (.snpdiffs), generate alignments and pairwise distances for all queries based on each reference dataset
rliterman@0 45
rliterman@0 46 runmode = ""
rliterman@0 47
rliterman@0 48 // Location for isolate sequence data
rliterman@0 49 reads = ""
rliterman@0 50 fasta = ""
rliterman@0 51
rliterman@0 52 // Location for reference sequence data
rliterman@0 53 ref_reads = ""
rliterman@0 54 ref_fasta = ""
rliterman@0 55
rliterman@0 56 // IDs for reference sequences (Comma-separated list)
rliterman@0 57 ref_id = ""
rliterman@0 58
rliterman@0 59 // Location for snpdiffs files
rliterman@0 60 snpdiffs = ""
rliterman@0 61
rliterman@0 62 // Read read_info
rliterman@0 63 readext = "fastq.gz"
rliterman@0 64 forward = "_1.fastq.gz"
rliterman@0 65 reverse = "_2.fastq.gz"
rliterman@0 66
rliterman@0 67 ref_readext = "fastq.gz"
rliterman@0 68 ref_forward = "_1.fastq.gz"
rliterman@0 69 ref_reverse = "_2.fastq.gz"
rliterman@0 70
rliterman@0 71 // Analytical variables
rliterman@0 72
rliterman@0 73 // Only consider queries if the reference genome is covered by at least <min_cov>% [Default: 85]
rliterman@0 74 min_cov = 85
rliterman@0 75
rliterman@0 76 // Only consider SNPs from contig alignments longer than <min_len> bp [Default: 500]
rliterman@0 77 min_len = 500
rliterman@0 78
rliterman@0 79 // Only consider SNPs from contig alignments with <min_iden>% identity [Default: 99]
rliterman@0 80 min_iden = 99
rliterman@0 81
rliterman@0 82 // Remove SNPs that occur within <ref_edge>bp from the end of the reference contig [Default: 150]
rliterman@0 83 ref_edge = 150
rliterman@0 84
rliterman@0 85 // Remove SNPs that occur within <query_edge>bp from the end of the query contig [Default: 150]
rliterman@0 86 query_edge = 150
rliterman@0 87
rliterman@0 88 // SNP density filters: Given density windows provided by dwin, purge windows where more than the allowable window SNPs (wsnps) are found
rliterman@0 89 // Default: 3 max per 1000bp, 2 max per 125bp, 1 max per 15bp, filtered from biggest window to smallest
rliterman@0 90 // Set --dwin 0 to disable density filtering
rliterman@0 91 dwin = "1000,125,15"
rliterman@0 92 wsnps = "3,2,1"
rliterman@0 93
rliterman@0 94 // If running refchooser in snp mode, compare queries to the top X references [Default: 1]
rliterman@0 95 n_ref = 1
rliterman@0 96
rliterman@0 97 // If the assembly file contains the string <trim_name>, remove it from the sample name (e.g. '_contigs_skesa')
rliterman@0 98 trim_name = '""'
rliterman@0 99
rliterman@0 100 // If running SNP pipeline, set the maximum percent of isolates with missing data allowed in the final alignment/distances [Default: 50]
rliterman@0 101 max_missing = 50
rliterman@0 102
rliterman@0 103 // Alternate directory for pybedtools tmp files [Default: "" (system default)]
rliterman@0 104 tmp_dir = ""
rliterman@0 105
rliterman@0 106 // Set IDs for isolates to exclude from analysis (Comma-separated list)
rliterman@0 107 exclude = ""
rliterman@0 108
rliterman@0 109 // By default, do not perform edge-filtered SNP rescuing
rliterman@0 110 rescue = "norescue"
rliterman@0 111
rliterman@0 112 // Help function
rliterman@0 113 help = "nohelp"
rliterman@0 114 h = "nohelp"
rliterman@16 115 }