comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/calctruequality.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/bin/bash
2
3 usage(){
4 echo "
5 Written by Brian Bushnell
6 Last modified March 21, 2019
7
8 Description: Calculates observed quality scores from mapped sam/bam files.
9 Generates matrices for use in recalibrating quality scores. By default,
10 the matrices are written to /ref/qual/ in the current directory.
11
12 If you have multiple sam/bam files demultiplexed from a single sequencing run,
13 it is recommended to use all of them as input for increased statistical power.
14 Once the matrices are generated, recalibration can be done on mapped or
15 unmapped reads; you may get better results by recalibrating the fastq and
16 remapping the calibrated reads.
17
18 Note! Diploid organisms with a high heterozygousity rate will induce
19 inaccurate recalibration at the high end of the quality scale unless SNP
20 locations are masked or variations are called. For example, recalibrating
21 human reads mapped to an unmasked human reference would generate an
22 expected maximal Q-score of roughly 30 due to the human 1/1000 SNP rate.
23 Variations can be ignored by using the callvars flag or providing
24 a file of variations.
25
26 Usage:
27
28 Step 1. Generate matrices (from mapped sam or bam files):
29 calctruequality.sh in=<file,file,...file> path=<directory>
30
31 Step 2. Recalibrate reads (any kind of files):
32 bbduk.sh in=<file> out=<file> recalibrate
33
34
35 Parameters (and their defaults)
36
37 Input parameters:
38 in=<file,file> Sam file or comma-delimited list of files. Alignments
39 must use = and X cigar symbols, or have MD tags, or
40 ref must be specified.
41 reads=-1 Stop after processing this many reads (if positive).
42 samstreamer=t (ss) Load reads multithreaded to increase speed.
43 unpigz=t Use pigz to decompress.
44
45 Output parameters:
46 overwrite=t (ow) Set to true to allow overwriting of existing files.
47 path=. Directory to write quality matrices (within /ref subdir).
48 write=t Write matrices.
49 showstats=t Print a summary.
50 pigz=f Use pigz to compress.
51
52 Other parameters:
53 t=auto Number of worker threads.
54 passes=2 Recalibration passes, 1 or 2. 2 is slower but gives more
55 accurate quality scores.
56 recalqmax=42 Adjust max quality scores tracked. The actual highest
57 quality score allowed is recalqmax-1.
58 trackall=f Track all available quality metrics and produce all
59 matrices, including the ones that are not selected for
60 quality adjustment. Reduces speed, but allows testing the
61 effects of different recalibration matrices.
62 indels=t Include indels in quality calculations.
63
64 Variation calling:
65 varfile=<file> Use the variants in this var file, instead of calling
66 variants. The format can be produced by CallVariants.
67 vcf=<file> Use the variants in this VCF file, instead of
68 calling variants.
69 callvars=f Call SNPs, and do not count them as errors.
70 ploidy=1 Set the organism's ploidy.
71 ref= Required for variation-calling.
72
73 *** 'Variant-Calling Cutoffs' flags in callvariants.sh are also supported ***
74
75 Selecting matrices:
76 loadq102= For each recalibration matrix, enable or disable that matrix with t/f.
77 You can specify pass1 or pass2 like this: loadq102_p1=f loadq102_p2=t.
78 The default is loadqbp_p1=t loadqbp_p2=t loadqb123_p=t.
79 clearmatrices=f If true, clear all the existing matrix selections. For example:
80 'clearmatrices loadqbp_p1'
81 This would ignore defaults and select only qbp for the first pass.
82
83 Available matrices:
84 q102 Quality, leading quality, trailing quality.
85 qap Quality, average quality, position.
86 qbp Quality, current base, position.
87 q10 Quality, leading quality.
88 q12 Quality, trailing quality.
89 qb12 Quality, leading base, current base.
90 qb012 Quality, two leading bases, current base.
91 qb123 Quality, leading base, current base, trailing base.
92 qb234 Quality, current base, two trailing bases.
93 q12b12 Quality, trailing quality, leading base, current base.
94 qp Quality, position.
95 q Current quality score only.
96
97
98 Java Parameters:
99 -Xmx This will set Java's memory usage, overriding autodetection.
100 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
101 The max is typically 85% of physical memory.
102 -eoom This flag will cause the process to exit if an
103 out-of-memory exception occurs. Requires Java 8u92+.
104 -da Disable assertions.
105
106 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
107 "
108 }
109
110 #This block allows symlinked shellscripts to correctly set classpath.
111 pushd . > /dev/null
112 DIR="${BASH_SOURCE[0]}"
113 while [ -h "$DIR" ]; do
114 cd "$(dirname "$DIR")"
115 DIR="$(readlink "$(basename "$DIR")")"
116 done
117 cd "$(dirname "$DIR")"
118 DIR="$(pwd)/"
119 popd > /dev/null
120
121 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
122 CP="$DIR""current/"
123
124 z="-Xmx2g"
125 z2="-Xms2g"
126 set=0
127
128 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
129 usage
130 exit
131 fi
132
133 calcXmx () {
134 source "$DIR""/calcmem.sh"
135 setEnvironment
136 parseXmx "$@"
137 if [[ $set == 1 ]]; then
138 return
139 fi
140 freeRam 3200m 84
141 z="-Xmx${RAM}m"
142 z2="-Xms${RAM}m"
143 }
144 calcXmx "$@"
145
146 calctruequality() {
147 local CMD="java $EA $EOOM $z $z2 -cp $CP jgi.CalcTrueQuality $@"
148 echo $CMD >&2
149 eval $CMD
150 }
151
152 calctruequality "$@"