annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/kmercoverage.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/bin/bash
jpayne@69 2
jpayne@69 3 usage(){
jpayne@69 4 echo "
jpayne@69 5 Written by Brian Bushnell
jpayne@69 6 Last modified May 23, 2014
jpayne@69 7
jpayne@69 8 *** DEPRECATED: This should still work but is no longer maintained. ***
jpayne@69 9
jpayne@69 10 Description: Annotates reads with their kmer depth.
jpayne@69 11
jpayne@69 12 Usage: kmercoverage in=<input> out=<read output> hist=<histogram output>
jpayne@69 13
jpayne@69 14 Input parameters:
jpayne@69 15 in2=null Second input file for paired reads
jpayne@69 16 extra=null Additional files to use for input (generating hash table) but not for output
jpayne@69 17 fastareadlen=2^31 Break up FASTA reads longer than this. Can be useful when processing scaffolded genomes
jpayne@69 18 tablereads=-1 Use at most this many reads when building the hashtable (-1 means all)
jpayne@69 19 kmersample=1 Process every nth kmer, and skip the rest
jpayne@69 20 readsample=1 Process every nth read, and skip the rest
jpayne@69 21
jpayne@69 22 Output parameters:
jpayne@69 23 hist=null Specify a file to output the depth histogram
jpayne@69 24 histlen=10000 Max depth displayed on histogram
jpayne@69 25 reads=-1 Only process this number of reads, then quit (-1 means all)
jpayne@69 26 sampleoutput=t Use sampling on output as well as input (not used if sample rates are 1)
jpayne@69 27 printcoverage=f Only print coverage information instead of reads
jpayne@69 28 useheader=f Append coverage info to the read's header
jpayne@69 29 minmedian=0 Don't output reads with median coverage below this
jpayne@69 30 minaverage=0 Don't output reads with average coverage below this
jpayne@69 31 zerobin=f Set to true if you want kmers with a count of 0 to go in the 0 bin instead of the 1 bin in histograms.
jpayne@69 32 Default is false, to prevent confusion about how there can be 0-count kmers.
jpayne@69 33 The reason is that based on the 'minq' and 'minprob' settings, some kmers may be excluded from the bloom filter.
jpayne@69 34
jpayne@69 35 Hashing parameters:
jpayne@69 36 k=31 Kmer length (values under 32 are most efficient, but arbitrarily high values are supported)
jpayne@69 37 cbits=8 Bits per cell in bloom filter; must be 2, 4, 8, 16, or 32. Maximum kmer depth recorded is 2^cbits.
jpayne@69 38 Large values decrease accuracy for a fixed amount of memory.
jpayne@69 39 hashes=4 Number of times a kmer is hashed. Higher is slower.
jpayne@69 40 Higher is MORE accurate if there is enough memory, and LESS accurate if there is not enough memory.
jpayne@69 41 prefilter=f True is slower, but generally more accurate; filters out low-depth kmers from the main hashtable.
jpayne@69 42 prehashes=2 Number of hashes for prefilter.
jpayne@69 43 passes=1 More passes can sometimes increase accuracy by iteratively removing low-depth kmers
jpayne@69 44 minq=7 Ignore kmers containing bases with quality below this
jpayne@69 45 minprob=0.5 Ignore kmers with overall probability of correctness below this
jpayne@69 46 threads=X Spawn exactly X hashing threads (default is number of logical processors). Total active threads may exceed X by up to 4.
jpayne@69 47
jpayne@69 48 Java Parameters:
jpayne@69 49 -Xmx This will set Java's memory usage, overriding autodetection.
jpayne@69 50 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
jpayne@69 51 The max is typically 85% of physical memory.
jpayne@69 52 -eoom This flag will cause the process to exit if an
jpayne@69 53 out-of-memory exception occurs. Requires Java 8u92+.
jpayne@69 54 -da Disable assertions.
jpayne@69 55
jpayne@69 56 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
jpayne@69 57 "
jpayne@69 58 }
jpayne@69 59
jpayne@69 60 #This block allows symlinked shellscripts to correctly set classpath.
jpayne@69 61 pushd . > /dev/null
jpayne@69 62 DIR="${BASH_SOURCE[0]}"
jpayne@69 63 while [ -h "$DIR" ]; do
jpayne@69 64 cd "$(dirname "$DIR")"
jpayne@69 65 DIR="$(readlink "$(basename "$DIR")")"
jpayne@69 66 done
jpayne@69 67 cd "$(dirname "$DIR")"
jpayne@69 68 DIR="$(pwd)/"
jpayne@69 69 popd > /dev/null
jpayne@69 70
jpayne@69 71 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
jpayne@69 72 CP="$DIR""current/"
jpayne@69 73
jpayne@69 74 z="-Xmx1g"
jpayne@69 75 z2="-Xms1g"
jpayne@69 76 set=0
jpayne@69 77
jpayne@69 78 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
jpayne@69 79 usage
jpayne@69 80 exit
jpayne@69 81 fi
jpayne@69 82
jpayne@69 83 calcXmx () {
jpayne@69 84 source "$DIR""/calcmem.sh"
jpayne@69 85 setEnvironment
jpayne@69 86 parseXmx "$@"
jpayne@69 87 if [[ $set == 1 ]]; then
jpayne@69 88 return
jpayne@69 89 fi
jpayne@69 90 freeRam 3200m 84
jpayne@69 91 z="-Xmx${RAM}m"
jpayne@69 92 z2="-Xms${RAM}m"
jpayne@69 93 }
jpayne@69 94 calcXmx "$@"
jpayne@69 95
jpayne@69 96 kmercoverage() {
jpayne@69 97 local CMD="java $EA $EOOM $z -cp $CP jgi.KmerCoverage prefilter=true bits=16 interleaved=false $@"
jpayne@69 98 echo $CMD >&2
jpayne@69 99 eval $CMD
jpayne@69 100 }
jpayne@69 101
jpayne@69 102 kmercoverage "$@"