jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell.
|
jpayne@69
|
6 Last modified June 28, 2016
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Decontaminates multiplexed assemblies via normalization and mapping.
|
jpayne@69
|
9
|
jpayne@69
|
10 Usage: decontaminate.sh reads=<file,file> ref=<file,file> out=<directory>
|
jpayne@69
|
11 or
|
jpayne@69
|
12 decontaminate.sh readnamefile=<file> refnamefile=<file> out=<directory>
|
jpayne@69
|
13
|
jpayne@69
|
14 Input Parameters:
|
jpayne@69
|
15 reads=<file,file> Input reads, one file per library.
|
jpayne@69
|
16 ref=<file,file> Input assemblies, one file per library.
|
jpayne@69
|
17 readnamefile=<file> List of input reads, one line per library.
|
jpayne@69
|
18 refnamefile=<file> List of input assemblies, one line per library.
|
jpayne@69
|
19
|
jpayne@69
|
20 interleaved=auto True forces paired/interleaved input; false forces single-ended mapping.
|
jpayne@69
|
21 If not specified, interleaved status will be autodetected from read names.
|
jpayne@69
|
22 unpigz=t Spawn a pigz (parallel gzip) process for faster decompression. Requires pigz to be installed.
|
jpayne@69
|
23 touppercase=t (tuc) Convert lowercase letters in reads to upper case (otherwise they will not match the reference).
|
jpayne@69
|
24
|
jpayne@69
|
25 Output Parameters:
|
jpayne@69
|
26 pigz=f Spawn a pigz (parallel gzip) process for faster compression. Requires pigz to be installed.
|
jpayne@69
|
27 tmpdir=. Write temp files here. By default is uses the system's $TMPDIR or current directory.
|
jpayne@69
|
28 outdir=. Write ouput files here.
|
jpayne@69
|
29
|
jpayne@69
|
30 Mapping Parameters:
|
jpayne@69
|
31 kfilter=55 Set to a positive number N to require minimum N contiguous matches for a mapped read.
|
jpayne@69
|
32 ambig=random Determines how coverage will be calculated for ambiguously-mapped reads.
|
jpayne@69
|
33 first: Add coverage only at first genomic mapping location.
|
jpayne@69
|
34 random: Add coverage at a random best-scoring location.
|
jpayne@69
|
35 all: Add coverage at all best-scoring locations.
|
jpayne@69
|
36 toss: Discard ambiguously-mapped reads without adding coverage.
|
jpayne@69
|
37
|
jpayne@69
|
38 Filtering Parameters:
|
jpayne@69
|
39 minc=3.5 Min average coverage to retain scaffold.
|
jpayne@69
|
40 minp=20 Min percent coverage to retain scaffold.
|
jpayne@69
|
41 minr=18 Min mapped reads to retain scaffold.
|
jpayne@69
|
42 minl=500 Min length to retain scaffold.
|
jpayne@69
|
43 ratio=1.2 Contigs will not be removed by minc unless the coverage changed by at least this factor. 0 disables this filter.
|
jpayne@69
|
44 mapraw=t Set true to map the unnormalized reads. Required to filter by 'ratio'.
|
jpayne@69
|
45 basesundermin=-1 If positive, removes contigs with at least this many bases in low-coverage windows.
|
jpayne@69
|
46 window=500 Sliding window size
|
jpayne@69
|
47 windowcov=5 Average coverage below this will be classified as low.
|
jpayne@69
|
48
|
jpayne@69
|
49 Tadpole Parameters:
|
jpayne@69
|
50 ecct=f Error-correct with Tadpole before normalization.
|
jpayne@69
|
51 kt=42 Kmer length for Tadpole.
|
jpayne@69
|
52 aggressive=f Do aggressive error correction.
|
jpayne@69
|
53 conservative=f Do conservative error correction.
|
jpayne@69
|
54 tadpoleprefilter=1 (tadpre) Ignore kmers under this depth to save memory.
|
jpayne@69
|
55
|
jpayne@69
|
56 Normalization Parameters:
|
jpayne@69
|
57 mindepth=2 Min depth of reads to keep.
|
jpayne@69
|
58 target=20 Target normalization depth.
|
jpayne@69
|
59 hashes=4 Number of hashes in Bloom filter.
|
jpayne@69
|
60 passes=1 Normalization passes.
|
jpayne@69
|
61 minprob=0.5 Min probability of correctness to add a kmer.
|
jpayne@69
|
62 dp=0.75 (depthpercentile) Percentile to use for depth proxy (0.5 means median).
|
jpayne@69
|
63 prefilter=t Prefilter, for large datasets.
|
jpayne@69
|
64 filterbits=32 (fbits) Bits per cell in primary filter.
|
jpayne@69
|
65 prefilterbits=2 (pbits) Bits per cell in prefilter.
|
jpayne@69
|
66 k=31 Kmer length for normalization. Longer is more precise but less sensitive.
|
jpayne@69
|
67
|
jpayne@69
|
68 Other parameters:
|
jpayne@69
|
69 opfn=0 (onlyprocessfirstn) Set to a positive number to only process that many datasets. This is for internal testing of specificity.
|
jpayne@69
|
70
|
jpayne@69
|
71 Java Parameters:
|
jpayne@69
|
72 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
73 -Xmx20g will specify 20 gigs of RAM, and -Xmx800m will specify 800 megs.
|
jpayne@69
|
74 The max is typically 85% of physical memory.
|
jpayne@69
|
75 -eoom This flag will cause the process to exit if an
|
jpayne@69
|
76 out-of-memory exception occurs. Requires Java 8u92+.
|
jpayne@69
|
77 -da Disable assertions.
|
jpayne@69
|
78
|
jpayne@69
|
79 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
80 "
|
jpayne@69
|
81 }
|
jpayne@69
|
82
|
jpayne@69
|
83 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
84 pushd . > /dev/null
|
jpayne@69
|
85 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
86 while [ -h "$DIR" ]; do
|
jpayne@69
|
87 cd "$(dirname "$DIR")"
|
jpayne@69
|
88 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
89 done
|
jpayne@69
|
90 cd "$(dirname "$DIR")"
|
jpayne@69
|
91 DIR="$(pwd)/"
|
jpayne@69
|
92 popd > /dev/null
|
jpayne@69
|
93
|
jpayne@69
|
94 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
95 CP="$DIR""current/"
|
jpayne@69
|
96 JNI="-Djava.library.path=""$DIR""jni/"
|
jpayne@69
|
97 JNI=""
|
jpayne@69
|
98
|
jpayne@69
|
99 z="-Xmx1g"
|
jpayne@69
|
100 z2="-Xms1g"
|
jpayne@69
|
101 set=0
|
jpayne@69
|
102
|
jpayne@69
|
103 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
104 usage
|
jpayne@69
|
105 exit
|
jpayne@69
|
106 fi
|
jpayne@69
|
107
|
jpayne@69
|
108 calcXmx () {
|
jpayne@69
|
109 source "$DIR""/calcmem.sh"
|
jpayne@69
|
110 setEnvironment
|
jpayne@69
|
111 parseXmx "$@"
|
jpayne@69
|
112 if [[ $set == 1 ]]; then
|
jpayne@69
|
113 return
|
jpayne@69
|
114 fi
|
jpayne@69
|
115 freeRam 15000m 84
|
jpayne@69
|
116 z="-Xmx${RAM}m"
|
jpayne@69
|
117 z2="-Xms${RAM}m"
|
jpayne@69
|
118 }
|
jpayne@69
|
119 calcXmx "$@"
|
jpayne@69
|
120
|
jpayne@69
|
121
|
jpayne@69
|
122 decontaminate() {
|
jpayne@69
|
123 local CMD="java $JNI $EA $EOOM $z $z2 -cp $CP jgi.DecontaminateByNormalization $@"
|
jpayne@69
|
124 echo $CMD >&2
|
jpayne@69
|
125 eval $CMD
|
jpayne@69
|
126 }
|
jpayne@69
|
127
|
jpayne@69
|
128 decontaminate "$@"
|