jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified February 20, 2020
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Estimates cardinality of unique kmers in sequence data.
|
jpayne@69
|
9 Processes multiple kmer lengths simultaneously to produce a histogram.
|
jpayne@69
|
10
|
jpayne@69
|
11 Usage: kmercountmulti.sh in=<file> sweep=<20,100,8> out=<histogram output>
|
jpayne@69
|
12
|
jpayne@69
|
13 Parameters:
|
jpayne@69
|
14 in=<file> (in1) Input file, or comma-delimited list of files.
|
jpayne@69
|
15 in2=<file> Optional second file for paired reads.
|
jpayne@69
|
16 out=<file> Histogram output. Default is stdout.
|
jpayne@69
|
17 k= Comma-delimited list of kmer lengths to use.
|
jpayne@69
|
18 sweep=min,max,incr Use incremented kmer values from min to max. For example,
|
jpayne@69
|
19 sweep=20,26,2 is equivalent to k=20,22,24,26.
|
jpayne@69
|
20 buckets=2048 Use this many buckets for counting; higher decreases
|
jpayne@69
|
21 variance, for large datasets. Must be a power of 2.
|
jpayne@69
|
22 seed=-1 Use this seed for hash functions.
|
jpayne@69
|
23 A negative number forces a random seed.
|
jpayne@69
|
24 minprob=0 Set to a value between 0 and 1 to exclude kmers with a
|
jpayne@69
|
25 lower probability of being correct.
|
jpayne@69
|
26 hashes=1 Use this many hash functions. More hashes yield greater
|
jpayne@69
|
27 accuracy, but H hashes takes H times as long.
|
jpayne@69
|
28 stdev=f Print standard deviations.
|
jpayne@69
|
29
|
jpayne@69
|
30 Shortcuts:
|
jpayne@69
|
31 The # symbol will be substituted for 1 and 2.
|
jpayne@69
|
32 For example:
|
jpayne@69
|
33 kmercountmulti.sh in=read#.fq
|
jpayne@69
|
34 ...is equivalent to:
|
jpayne@69
|
35 kmercountmulti.sh in1=read1.fq in2=read2.fq
|
jpayne@69
|
36
|
jpayne@69
|
37 Java Parameters:
|
jpayne@69
|
38 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
39 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
|
jpayne@69
|
40 The max is typically 85% of physical memory.
|
jpayne@69
|
41 -eoom This flag will cause the process to exit if an
|
jpayne@69
|
42 out-of-memory exception occurs. Requires Java 8u92+.
|
jpayne@69
|
43 -da Disable assertions.
|
jpayne@69
|
44
|
jpayne@69
|
45 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
46 "
|
jpayne@69
|
47 }
|
jpayne@69
|
48
|
jpayne@69
|
49 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
50 pushd . > /dev/null
|
jpayne@69
|
51 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
52 while [ -h "$DIR" ]; do
|
jpayne@69
|
53 cd "$(dirname "$DIR")"
|
jpayne@69
|
54 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
55 done
|
jpayne@69
|
56 cd "$(dirname "$DIR")"
|
jpayne@69
|
57 DIR="$(pwd)/"
|
jpayne@69
|
58 popd > /dev/null
|
jpayne@69
|
59
|
jpayne@69
|
60 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
61 CP="$DIR""current/"
|
jpayne@69
|
62
|
jpayne@69
|
63 z="-Xmx500m"
|
jpayne@69
|
64 set=0
|
jpayne@69
|
65
|
jpayne@69
|
66 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
67 usage
|
jpayne@69
|
68 exit
|
jpayne@69
|
69 fi
|
jpayne@69
|
70
|
jpayne@69
|
71 calcXmx () {
|
jpayne@69
|
72 source "$DIR""/calcmem.sh"
|
jpayne@69
|
73 setEnvironment
|
jpayne@69
|
74 parseXmx "$@"
|
jpayne@69
|
75 }
|
jpayne@69
|
76 calcXmx "$@"
|
jpayne@69
|
77
|
jpayne@69
|
78 function kmercountmulti() {
|
jpayne@69
|
79 local CMD="java $EA $EOOM $z -cp $CP jgi.KmerCountMulti $@"
|
jpayne@69
|
80 echo $CMD >&2
|
jpayne@69
|
81 eval $CMD
|
jpayne@69
|
82 }
|
jpayne@69
|
83
|
jpayne@69
|
84 kmercountmulti "$@"
|