jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified March 24, 2020
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Estimates cardinality of unique kmers in sequence data.
|
jpayne@69
|
9 See also kmercountmulti.sh.
|
jpayne@69
|
10
|
jpayne@69
|
11 Usage: loglog.sh in=<file> k=<31>
|
jpayne@69
|
12
|
jpayne@69
|
13 Parameters:
|
jpayne@69
|
14 in=<file> (in1) Input file, or comma-delimited list of files.
|
jpayne@69
|
15 in2=<file> ptional second file for paired reads.
|
jpayne@69
|
16 k=31 Use this kmer length for counting.
|
jpayne@69
|
17 buckets=2048 Use this many buckets for counting; higher decreases
|
jpayne@69
|
18 variance, for large datasets. Must be a power of 2.
|
jpayne@69
|
19 seed=-1 Use this seed for hash functions. A negative number forces
|
jpayne@69
|
20 a random seed.
|
jpayne@69
|
21 minprob=0 Set to a value between 0 and 1 to exclude kmers with a lower
|
jpayne@69
|
22 probability of being correct.
|
jpayne@69
|
23
|
jpayne@69
|
24
|
jpayne@69
|
25 Shortcuts:
|
jpayne@69
|
26 The # symbol will be substituted for 1 and 2.
|
jpayne@69
|
27 For example:
|
jpayne@69
|
28 loglog.sh in=read#.fq
|
jpayne@69
|
29 ...is equivalent to:
|
jpayne@69
|
30 loglog.sh in1=read1.fq in2=read2.fq
|
jpayne@69
|
31
|
jpayne@69
|
32 Java Parameters:
|
jpayne@69
|
33 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
34 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
|
jpayne@69
|
35 The max is typically 85% of physical memory.
|
jpayne@69
|
36 -eoom This flag will cause the process to exit if an out-of-memory
|
jpayne@69
|
37 exception occurs. Requires Java 8u92+.
|
jpayne@69
|
38 -da Disable assertions.
|
jpayne@69
|
39
|
jpayne@69
|
40 Supported input formats are fastq, fasta, scarf, sam, and bam.
|
jpayne@69
|
41 Supported compression formats are gzip and bz2.
|
jpayne@69
|
42 To read from stdin, set 'in=stdin'. The format should be specified with an extension, like 'in=stdin.fq.gz'
|
jpayne@69
|
43
|
jpayne@69
|
44 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
45 "
|
jpayne@69
|
46 }
|
jpayne@69
|
47
|
jpayne@69
|
48 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
49 pushd . > /dev/null
|
jpayne@69
|
50 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
51 while [ -h "$DIR" ]; do
|
jpayne@69
|
52 cd "$(dirname "$DIR")"
|
jpayne@69
|
53 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
54 done
|
jpayne@69
|
55 cd "$(dirname "$DIR")"
|
jpayne@69
|
56 DIR="$(pwd)/"
|
jpayne@69
|
57 popd > /dev/null
|
jpayne@69
|
58
|
jpayne@69
|
59 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
60 CP="$DIR""current/"
|
jpayne@69
|
61
|
jpayne@69
|
62 z="-Xmx200m"
|
jpayne@69
|
63 set=0
|
jpayne@69
|
64
|
jpayne@69
|
65 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
66 usage
|
jpayne@69
|
67 exit
|
jpayne@69
|
68 fi
|
jpayne@69
|
69
|
jpayne@69
|
70 calcXmx () {
|
jpayne@69
|
71 source "$DIR""/calcmem.sh"
|
jpayne@69
|
72 setEnvironment
|
jpayne@69
|
73 parseXmx "$@"
|
jpayne@69
|
74 }
|
jpayne@69
|
75 calcXmx "$@"
|
jpayne@69
|
76
|
jpayne@69
|
77 function loglog() {
|
jpayne@69
|
78 local CMD="java $EA $EOOM $z -cp $CP cardinality.LogLogWrapper $@"
|
jpayne@69
|
79 echo $CMD >&2
|
jpayne@69
|
80 eval $CMD
|
jpayne@69
|
81 }
|
jpayne@69
|
82
|
jpayne@69
|
83 loglog "$@"
|