Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/calctruequality.sh @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/bin/bash | |
2 | |
3 usage(){ | |
4 echo " | |
5 Written by Brian Bushnell | |
6 Last modified March 21, 2019 | |
7 | |
8 Description: Calculates observed quality scores from mapped sam/bam files. | |
9 Generates matrices for use in recalibrating quality scores. By default, | |
10 the matrices are written to /ref/qual/ in the current directory. | |
11 | |
12 If you have multiple sam/bam files demultiplexed from a single sequencing run, | |
13 it is recommended to use all of them as input for increased statistical power. | |
14 Once the matrices are generated, recalibration can be done on mapped or | |
15 unmapped reads; you may get better results by recalibrating the fastq and | |
16 remapping the calibrated reads. | |
17 | |
18 Note! Diploid organisms with a high heterozygousity rate will induce | |
19 inaccurate recalibration at the high end of the quality scale unless SNP | |
20 locations are masked or variations are called. For example, recalibrating | |
21 human reads mapped to an unmasked human reference would generate an | |
22 expected maximal Q-score of roughly 30 due to the human 1/1000 SNP rate. | |
23 Variations can be ignored by using the callvars flag or providing | |
24 a file of variations. | |
25 | |
26 Usage: | |
27 | |
28 Step 1. Generate matrices (from mapped sam or bam files): | |
29 calctruequality.sh in=<file,file,...file> path=<directory> | |
30 | |
31 Step 2. Recalibrate reads (any kind of files): | |
32 bbduk.sh in=<file> out=<file> recalibrate | |
33 | |
34 | |
35 Parameters (and their defaults) | |
36 | |
37 Input parameters: | |
38 in=<file,file> Sam file or comma-delimited list of files. Alignments | |
39 must use = and X cigar symbols, or have MD tags, or | |
40 ref must be specified. | |
41 reads=-1 Stop after processing this many reads (if positive). | |
42 samstreamer=t (ss) Load reads multithreaded to increase speed. | |
43 unpigz=t Use pigz to decompress. | |
44 | |
45 Output parameters: | |
46 overwrite=t (ow) Set to true to allow overwriting of existing files. | |
47 path=. Directory to write quality matrices (within /ref subdir). | |
48 write=t Write matrices. | |
49 showstats=t Print a summary. | |
50 pigz=f Use pigz to compress. | |
51 | |
52 Other parameters: | |
53 t=auto Number of worker threads. | |
54 passes=2 Recalibration passes, 1 or 2. 2 is slower but gives more | |
55 accurate quality scores. | |
56 recalqmax=42 Adjust max quality scores tracked. The actual highest | |
57 quality score allowed is recalqmax-1. | |
58 trackall=f Track all available quality metrics and produce all | |
59 matrices, including the ones that are not selected for | |
60 quality adjustment. Reduces speed, but allows testing the | |
61 effects of different recalibration matrices. | |
62 indels=t Include indels in quality calculations. | |
63 | |
64 Variation calling: | |
65 varfile=<file> Use the variants in this var file, instead of calling | |
66 variants. The format can be produced by CallVariants. | |
67 vcf=<file> Use the variants in this VCF file, instead of | |
68 calling variants. | |
69 callvars=f Call SNPs, and do not count them as errors. | |
70 ploidy=1 Set the organism's ploidy. | |
71 ref= Required for variation-calling. | |
72 | |
73 *** 'Variant-Calling Cutoffs' flags in callvariants.sh are also supported *** | |
74 | |
75 Selecting matrices: | |
76 loadq102= For each recalibration matrix, enable or disable that matrix with t/f. | |
77 You can specify pass1 or pass2 like this: loadq102_p1=f loadq102_p2=t. | |
78 The default is loadqbp_p1=t loadqbp_p2=t loadqb123_p=t. | |
79 clearmatrices=f If true, clear all the existing matrix selections. For example: | |
80 'clearmatrices loadqbp_p1' | |
81 This would ignore defaults and select only qbp for the first pass. | |
82 | |
83 Available matrices: | |
84 q102 Quality, leading quality, trailing quality. | |
85 qap Quality, average quality, position. | |
86 qbp Quality, current base, position. | |
87 q10 Quality, leading quality. | |
88 q12 Quality, trailing quality. | |
89 qb12 Quality, leading base, current base. | |
90 qb012 Quality, two leading bases, current base. | |
91 qb123 Quality, leading base, current base, trailing base. | |
92 qb234 Quality, current base, two trailing bases. | |
93 q12b12 Quality, trailing quality, leading base, current base. | |
94 qp Quality, position. | |
95 q Current quality score only. | |
96 | |
97 | |
98 Java Parameters: | |
99 -Xmx This will set Java's memory usage, overriding autodetection. | |
100 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs. | |
101 The max is typically 85% of physical memory. | |
102 -eoom This flag will cause the process to exit if an | |
103 out-of-memory exception occurs. Requires Java 8u92+. | |
104 -da Disable assertions. | |
105 | |
106 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. | |
107 " | |
108 } | |
109 | |
110 #This block allows symlinked shellscripts to correctly set classpath. | |
111 pushd . > /dev/null | |
112 DIR="${BASH_SOURCE[0]}" | |
113 while [ -h "$DIR" ]; do | |
114 cd "$(dirname "$DIR")" | |
115 DIR="$(readlink "$(basename "$DIR")")" | |
116 done | |
117 cd "$(dirname "$DIR")" | |
118 DIR="$(pwd)/" | |
119 popd > /dev/null | |
120 | |
121 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" | |
122 CP="$DIR""current/" | |
123 | |
124 z="-Xmx2g" | |
125 z2="-Xms2g" | |
126 set=0 | |
127 | |
128 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then | |
129 usage | |
130 exit | |
131 fi | |
132 | |
133 calcXmx () { | |
134 source "$DIR""/calcmem.sh" | |
135 setEnvironment | |
136 parseXmx "$@" | |
137 if [[ $set == 1 ]]; then | |
138 return | |
139 fi | |
140 freeRam 3200m 84 | |
141 z="-Xmx${RAM}m" | |
142 z2="-Xms${RAM}m" | |
143 } | |
144 calcXmx "$@" | |
145 | |
146 calctruequality() { | |
147 local CMD="java $EA $EOOM $z $z2 -cp $CP jgi.CalcTrueQuality $@" | |
148 echo $CMD >&2 | |
149 eval $CMD | |
150 } | |
151 | |
152 calctruequality "$@" |