jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified July 16, 2018
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Compresses sequence data into a fasta file containing each kmer
|
jpayne@69
|
9 exactly once. Allows arbitrary kmer set operations via multiple passes.
|
jpayne@69
|
10
|
jpayne@69
|
11 Usage: kcompress.sh in=<reads> out=<contigs> min=<1> max=<2147483647>
|
jpayne@69
|
12
|
jpayne@69
|
13 Input parameters:
|
jpayne@69
|
14 in=<file> Primary input file for reads to use as kmer data.
|
jpayne@69
|
15 in2=<file> Second input file for paired data.
|
jpayne@69
|
16 reads=-1 Only process this number of reads, then quit (-1 means all).
|
jpayne@69
|
17
|
jpayne@69
|
18 Output parameters:
|
jpayne@69
|
19 out=<file> Write contigs (in contig mode).
|
jpayne@69
|
20 showstats=t Print assembly statistics after writing contigs.
|
jpayne@69
|
21 fuse=0 Fuse output sequences into chunks at least this long,
|
jpayne@69
|
22 padded with 1 N between sequences.
|
jpayne@69
|
23
|
jpayne@69
|
24 Prefiltering parameters:
|
jpayne@69
|
25 prefilter=0 If set to a positive integer, use a countmin sketch
|
jpayne@69
|
26 to ignore kmers with depth of that value or lower.
|
jpayne@69
|
27 prehashes=2 Number of hashes for prefilter.
|
jpayne@69
|
28 prefiltersize=0.2 (pff) Fraction of memory to use for prefilter.
|
jpayne@69
|
29 minprobprefilter=t (mpp) Use minprob for the prefilter.
|
jpayne@69
|
30 prepasses=1 Use this many prefiltering passes; higher be more thorough
|
jpayne@69
|
31 if the filter is very full. Set to 'auto' to iteratively
|
jpayne@69
|
32 prefilter until the remaining kmers will fit in memory.
|
jpayne@69
|
33
|
jpayne@69
|
34 Hashing parameters:
|
jpayne@69
|
35 k=31 Kmer length (1 to 31).
|
jpayne@69
|
36 prealloc=t Pre-allocate memory rather than dynamically growing;
|
jpayne@69
|
37 faster and more memory-efficient. A float fraction (0-1)
|
jpayne@69
|
38 may be specified; default is 1.
|
jpayne@69
|
39 minprob=0.5 Ignore kmers with overall probability of correctness below this.
|
jpayne@69
|
40 minprobmain=t (mpm) Use minprob for the primary kmer counts.
|
jpayne@69
|
41 threads=X Spawn X threads (default is number of logical processors).
|
jpayne@69
|
42
|
jpayne@69
|
43 Assembly parameters:
|
jpayne@69
|
44 mincount=1 (min) Only retain kmers that occur at least this many times.
|
jpayne@69
|
45 maxcount=BIG (max) Only retain kmers that occur at most this many times.
|
jpayne@69
|
46 requiresamecount (rsc) Only build contigs from kmers with exactly the same count.
|
jpayne@69
|
47 rcomp=t Store forward and reverse kmers together. Setting this to
|
jpayne@69
|
48 false will only use forward kmers.
|
jpayne@69
|
49
|
jpayne@69
|
50
|
jpayne@69
|
51 Java Parameters:
|
jpayne@69
|
52 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
53 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
|
jpayne@69
|
54 The max is typically 85% of physical memory.
|
jpayne@69
|
55 -eoom This flag will cause the process to exit if an
|
jpayne@69
|
56 out-of-memory exception occurs. Requires Java 8u92+.
|
jpayne@69
|
57 -da Disable assertions.
|
jpayne@69
|
58 "
|
jpayne@69
|
59 }
|
jpayne@69
|
60
|
jpayne@69
|
61 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
62 pushd . > /dev/null
|
jpayne@69
|
63 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
64 while [ -h "$DIR" ]; do
|
jpayne@69
|
65 cd "$(dirname "$DIR")"
|
jpayne@69
|
66 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
67 done
|
jpayne@69
|
68 cd "$(dirname "$DIR")"
|
jpayne@69
|
69 DIR="$(pwd)/"
|
jpayne@69
|
70 popd > /dev/null
|
jpayne@69
|
71
|
jpayne@69
|
72 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
73 CP="$DIR""current/"
|
jpayne@69
|
74
|
jpayne@69
|
75 z="-Xmx14g"
|
jpayne@69
|
76 z2="-Xms14g"
|
jpayne@69
|
77 set=0
|
jpayne@69
|
78
|
jpayne@69
|
79 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
80 usage
|
jpayne@69
|
81 exit
|
jpayne@69
|
82 fi
|
jpayne@69
|
83
|
jpayne@69
|
84 calcXmx () {
|
jpayne@69
|
85 source "$DIR""/calcmem.sh"
|
jpayne@69
|
86 setEnvironment
|
jpayne@69
|
87 parseXmx "$@"
|
jpayne@69
|
88 if [[ $set == 1 ]]; then
|
jpayne@69
|
89 return
|
jpayne@69
|
90 fi
|
jpayne@69
|
91 freeRam 15000m 84
|
jpayne@69
|
92 z="-Xmx${RAM}m"
|
jpayne@69
|
93 z2="-Xms${RAM}m"
|
jpayne@69
|
94 }
|
jpayne@69
|
95 calcXmx "$@"
|
jpayne@69
|
96
|
jpayne@69
|
97 kcompress() {
|
jpayne@69
|
98 local CMD="java $EA $EOOM $z $z2 -cp $CP assemble.KmerCompressor $@"
|
jpayne@69
|
99 echo $CMD >&2
|
jpayne@69
|
100 eval $CMD
|
jpayne@69
|
101 }
|
jpayne@69
|
102
|
jpayne@69
|
103 kcompress "$@"
|