rliterman@0
|
1 /*
|
rliterman@0
|
2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
rliterman@0
|
3 CSP2 Nextflow config file
|
rliterman@0
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
rliterman@0
|
5 Default config options for all compute environments
|
rliterman@0
|
6 ----------------------------------------------------------------------------------------
|
rliterman@0
|
7 */
|
rliterman@0
|
8
|
rliterman@0
|
9 // Enable conda
|
rliterman@0
|
10 conda.enabled = true
|
rliterman@0
|
11
|
rliterman@0
|
12 // Import profile settings
|
rliterman@0
|
13 includeConfig "conf/profiles.config"
|
rliterman@0
|
14
|
rliterman@15
|
15 // CPU/memory settings
|
rliterman@15
|
16 process {
|
rliterman@15
|
17 withLabel: 'mummerMem' {
|
rliterman@22
|
18 cpus = 1
|
rliterman@22
|
19 memory = '4 GB'
|
rliterman@15
|
20 }
|
rliterman@22
|
21 // withLabel: 'skesaMem' {
|
rliterman@22
|
22 // memory = '12 GB'
|
rliterman@22
|
23 // }
|
rliterman@15
|
24 }
|
rliterman@15
|
25
|
rliterman@0
|
26 // Global default params
|
rliterman@0
|
27 params {
|
rliterman@0
|
28
|
rliterman@0
|
29 // Setting output directory
|
rliterman@0
|
30
|
rliterman@0
|
31 // Set name for output folder/file prefixes
|
rliterman@0
|
32 out = "CSP2_${new java.util.Date().getTime()}"
|
rliterman@0
|
33
|
rliterman@0
|
34 // Set output parent directory [Default: CWD; Set this to have all output go to the same parent folder, with unique IDs set by --out]
|
rliterman@0
|
35 outroot = ""
|
rliterman@0
|
36
|
rliterman@0
|
37 // CSP2 can run in the following run-modes:
|
rliterman@0
|
38
|
rliterman@0
|
39 // assemble: Assemble read data (--reads/--ref_reads) into FASTA via SKESA (ignores --fasta/--ref_fasta/--snpdiffs)
|
rliterman@0
|
40 // align: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta), run MUMmer alignment analysis for each query/ref combination (ignores --snpdiffs)
|
rliterman@0
|
41 // screen: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta) and/or MUMmer output (.snpdiffs), create a report for raw SNP distances between each query and reference assembly
|
rliterman@0
|
42 // snp: Given query data (--reads/--fasta) and reference data (--ref_reads/--ref_fasta) and/or MUMmer output (.snpdiffs), generate alignments and pairwise distances for all queries based on each reference dataset
|
rliterman@0
|
43
|
rliterman@0
|
44 runmode = ""
|
rliterman@0
|
45
|
rliterman@0
|
46 // Location for isolate sequence data
|
rliterman@0
|
47 reads = ""
|
rliterman@0
|
48 fasta = ""
|
rliterman@0
|
49
|
rliterman@0
|
50 // Location for reference sequence data
|
rliterman@0
|
51 ref_reads = ""
|
rliterman@0
|
52 ref_fasta = ""
|
rliterman@0
|
53
|
rliterman@0
|
54 // IDs for reference sequences (Comma-separated list)
|
rliterman@0
|
55 ref_id = ""
|
rliterman@0
|
56
|
rliterman@0
|
57 // Location for snpdiffs files
|
rliterman@0
|
58 snpdiffs = ""
|
rliterman@0
|
59
|
rliterman@0
|
60 // Read read_info
|
rliterman@0
|
61 readext = "fastq.gz"
|
rliterman@0
|
62 forward = "_1.fastq.gz"
|
rliterman@0
|
63 reverse = "_2.fastq.gz"
|
rliterman@0
|
64
|
rliterman@0
|
65 ref_readext = "fastq.gz"
|
rliterman@0
|
66 ref_forward = "_1.fastq.gz"
|
rliterman@0
|
67 ref_reverse = "_2.fastq.gz"
|
rliterman@0
|
68
|
rliterman@0
|
69 // Analytical variables
|
rliterman@0
|
70
|
rliterman@0
|
71 // Only consider queries if the reference genome is covered by at least <min_cov>% [Default: 85]
|
rliterman@0
|
72 min_cov = 85
|
rliterman@0
|
73
|
rliterman@0
|
74 // Only consider SNPs from contig alignments longer than <min_len> bp [Default: 500]
|
rliterman@0
|
75 min_len = 500
|
rliterman@0
|
76
|
rliterman@0
|
77 // Only consider SNPs from contig alignments with <min_iden>% identity [Default: 99]
|
rliterman@0
|
78 min_iden = 99
|
rliterman@0
|
79
|
rliterman@0
|
80 // Remove SNPs that occur within <ref_edge>bp from the end of the reference contig [Default: 150]
|
rliterman@0
|
81 ref_edge = 150
|
rliterman@0
|
82
|
rliterman@0
|
83 // Remove SNPs that occur within <query_edge>bp from the end of the query contig [Default: 150]
|
rliterman@0
|
84 query_edge = 150
|
rliterman@0
|
85
|
rliterman@0
|
86 // SNP density filters: Given density windows provided by dwin, purge windows where more than the allowable window SNPs (wsnps) are found
|
rliterman@0
|
87 // Default: 3 max per 1000bp, 2 max per 125bp, 1 max per 15bp, filtered from biggest window to smallest
|
rliterman@0
|
88 // Set --dwin 0 to disable density filtering
|
rliterman@0
|
89 dwin = "1000,125,15"
|
rliterman@0
|
90 wsnps = "3,2,1"
|
rliterman@0
|
91
|
rliterman@0
|
92 // If running refchooser in snp mode, compare queries to the top X references [Default: 1]
|
rliterman@0
|
93 n_ref = 1
|
rliterman@0
|
94
|
rliterman@0
|
95 // If the assembly file contains the string <trim_name>, remove it from the sample name (e.g. '_contigs_skesa')
|
rliterman@0
|
96 trim_name = '""'
|
rliterman@0
|
97
|
rliterman@0
|
98 // If running SNP pipeline, set the maximum percent of isolates with missing data allowed in the final alignment/distances [Default: 50]
|
rliterman@0
|
99 max_missing = 50
|
rliterman@0
|
100
|
rliterman@0
|
101 // Alternate directory for pybedtools tmp files [Default: "" (system default)]
|
rliterman@0
|
102 tmp_dir = ""
|
rliterman@0
|
103
|
rliterman@0
|
104 // Set IDs for isolates to exclude from analysis (Comma-separated list)
|
rliterman@0
|
105 exclude = ""
|
rliterman@0
|
106
|
rliterman@0
|
107 // By default, do not perform edge-filtered SNP rescuing
|
rliterman@0
|
108 rescue = "norescue"
|
rliterman@0
|
109
|
rliterman@0
|
110 // Help function
|
rliterman@0
|
111 help = "nohelp"
|
rliterman@0
|
112 h = "nohelp"
|
rliterman@16
|
113 }
|