jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified May 23, 2014
|
jpayne@69
|
7
|
jpayne@69
|
8 *** DEPRECATED: This should still work but is no longer maintained. ***
|
jpayne@69
|
9
|
jpayne@69
|
10 Description: Annotates reads with their kmer depth.
|
jpayne@69
|
11
|
jpayne@69
|
12 Usage: kmercoverage in=<input> out=<read output> hist=<histogram output>
|
jpayne@69
|
13
|
jpayne@69
|
14 Input parameters:
|
jpayne@69
|
15 in2=null Second input file for paired reads
|
jpayne@69
|
16 extra=null Additional files to use for input (generating hash table) but not for output
|
jpayne@69
|
17 fastareadlen=2^31 Break up FASTA reads longer than this. Can be useful when processing scaffolded genomes
|
jpayne@69
|
18 tablereads=-1 Use at most this many reads when building the hashtable (-1 means all)
|
jpayne@69
|
19 kmersample=1 Process every nth kmer, and skip the rest
|
jpayne@69
|
20 readsample=1 Process every nth read, and skip the rest
|
jpayne@69
|
21
|
jpayne@69
|
22 Output parameters:
|
jpayne@69
|
23 hist=null Specify a file to output the depth histogram
|
jpayne@69
|
24 histlen=10000 Max depth displayed on histogram
|
jpayne@69
|
25 reads=-1 Only process this number of reads, then quit (-1 means all)
|
jpayne@69
|
26 sampleoutput=t Use sampling on output as well as input (not used if sample rates are 1)
|
jpayne@69
|
27 printcoverage=f Only print coverage information instead of reads
|
jpayne@69
|
28 useheader=f Append coverage info to the read's header
|
jpayne@69
|
29 minmedian=0 Don't output reads with median coverage below this
|
jpayne@69
|
30 minaverage=0 Don't output reads with average coverage below this
|
jpayne@69
|
31 zerobin=f Set to true if you want kmers with a count of 0 to go in the 0 bin instead of the 1 bin in histograms.
|
jpayne@69
|
32 Default is false, to prevent confusion about how there can be 0-count kmers.
|
jpayne@69
|
33 The reason is that based on the 'minq' and 'minprob' settings, some kmers may be excluded from the bloom filter.
|
jpayne@69
|
34
|
jpayne@69
|
35 Hashing parameters:
|
jpayne@69
|
36 k=31 Kmer length (values under 32 are most efficient, but arbitrarily high values are supported)
|
jpayne@69
|
37 cbits=8 Bits per cell in bloom filter; must be 2, 4, 8, 16, or 32. Maximum kmer depth recorded is 2^cbits.
|
jpayne@69
|
38 Large values decrease accuracy for a fixed amount of memory.
|
jpayne@69
|
39 hashes=4 Number of times a kmer is hashed. Higher is slower.
|
jpayne@69
|
40 Higher is MORE accurate if there is enough memory, and LESS accurate if there is not enough memory.
|
jpayne@69
|
41 prefilter=f True is slower, but generally more accurate; filters out low-depth kmers from the main hashtable.
|
jpayne@69
|
42 prehashes=2 Number of hashes for prefilter.
|
jpayne@69
|
43 passes=1 More passes can sometimes increase accuracy by iteratively removing low-depth kmers
|
jpayne@69
|
44 minq=7 Ignore kmers containing bases with quality below this
|
jpayne@69
|
45 minprob=0.5 Ignore kmers with overall probability of correctness below this
|
jpayne@69
|
46 threads=X Spawn exactly X hashing threads (default is number of logical processors). Total active threads may exceed X by up to 4.
|
jpayne@69
|
47
|
jpayne@69
|
48 Java Parameters:
|
jpayne@69
|
49 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
50 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
|
jpayne@69
|
51 The max is typically 85% of physical memory.
|
jpayne@69
|
52 -eoom This flag will cause the process to exit if an
|
jpayne@69
|
53 out-of-memory exception occurs. Requires Java 8u92+.
|
jpayne@69
|
54 -da Disable assertions.
|
jpayne@69
|
55
|
jpayne@69
|
56 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
57 "
|
jpayne@69
|
58 }
|
jpayne@69
|
59
|
jpayne@69
|
60 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
61 pushd . > /dev/null
|
jpayne@69
|
62 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
63 while [ -h "$DIR" ]; do
|
jpayne@69
|
64 cd "$(dirname "$DIR")"
|
jpayne@69
|
65 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
66 done
|
jpayne@69
|
67 cd "$(dirname "$DIR")"
|
jpayne@69
|
68 DIR="$(pwd)/"
|
jpayne@69
|
69 popd > /dev/null
|
jpayne@69
|
70
|
jpayne@69
|
71 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
72 CP="$DIR""current/"
|
jpayne@69
|
73
|
jpayne@69
|
74 z="-Xmx1g"
|
jpayne@69
|
75 z2="-Xms1g"
|
jpayne@69
|
76 set=0
|
jpayne@69
|
77
|
jpayne@69
|
78 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
79 usage
|
jpayne@69
|
80 exit
|
jpayne@69
|
81 fi
|
jpayne@69
|
82
|
jpayne@69
|
83 calcXmx () {
|
jpayne@69
|
84 source "$DIR""/calcmem.sh"
|
jpayne@69
|
85 setEnvironment
|
jpayne@69
|
86 parseXmx "$@"
|
jpayne@69
|
87 if [[ $set == 1 ]]; then
|
jpayne@69
|
88 return
|
jpayne@69
|
89 fi
|
jpayne@69
|
90 freeRam 3200m 84
|
jpayne@69
|
91 z="-Xmx${RAM}m"
|
jpayne@69
|
92 z2="-Xms${RAM}m"
|
jpayne@69
|
93 }
|
jpayne@69
|
94 calcXmx "$@"
|
jpayne@69
|
95
|
jpayne@69
|
96 kmercoverage() {
|
jpayne@69
|
97 local CMD="java $EA $EOOM $z -cp $CP jgi.KmerCoverage prefilter=true bits=16 interleaved=false $@"
|
jpayne@69
|
98 echo $CMD >&2
|
jpayne@69
|
99 eval $CMD
|
jpayne@69
|
100 }
|
jpayne@69
|
101
|
jpayne@69
|
102 kmercoverage "$@"
|