jpayne@69
|
1 #!/bin/bash
|
jpayne@69
|
2
|
jpayne@69
|
3 usage(){
|
jpayne@69
|
4 echo "
|
jpayne@69
|
5 Written by Brian Bushnell
|
jpayne@69
|
6 Last modified May 6, 2020
|
jpayne@69
|
7
|
jpayne@69
|
8 Description: Finds PacBio reads containing inverted repeats.
|
jpayne@69
|
9 These are candidate triangle reads (ice cream cones).
|
jpayne@69
|
10 Either ice cream cones only, or all inverted repeats, can be filtered.
|
jpayne@69
|
11
|
jpayne@69
|
12 Usage: icecreamfinder.sh in=<input file> out=<output file> outb=<bad reads>
|
jpayne@69
|
13
|
jpayne@69
|
14 File I/O parameters:
|
jpayne@69
|
15 in=<file> Primary input.
|
jpayne@69
|
16 out=<file> (outgood) Output for good reads.
|
jpayne@69
|
17 outa=<file> (outambig) Output for with inverted repeats, but it is unclear
|
jpayne@69
|
18 whether that is natural or artifactual.
|
jpayne@69
|
19 outb=<file> (outbad) Output for reads suspected as chimeric.
|
jpayne@69
|
20 outj=<file> (outjunction) Output for junctions in inverted repeat reads.
|
jpayne@69
|
21 stats=<file> Print screen output here instead of to the screen.
|
jpayne@69
|
22 json=f Print stats as json.
|
jpayne@69
|
23 asrhist=<file> Adapter alignment score ratio histogram.
|
jpayne@69
|
24 irsist=<file> Inverted repeat alignment score ratio histogram.
|
jpayne@69
|
25 ambig= Determine where ambiguous reads are sent. They will ALWAYS
|
jpayne@69
|
26 be sent to outa if specified. If not, they will be sent
|
jpayne@69
|
27 to outg (good) unless overridden by this flag. Options:
|
jpayne@69
|
28 ambig=good: Send ambiguous reads to outg.
|
jpayne@69
|
29 ambig=bad: Send ambiguous reads to outb.
|
jpayne@69
|
30 ambig=good,bad: Send ambiguous reads to outg and outb.
|
jpayne@69
|
31 ambig=null: Do not send to outg or outb.
|
jpayne@69
|
32 overwrite=f (ow) Set to false to force the program to abort rather than
|
jpayne@69
|
33 overwrite an existing file.
|
jpayne@69
|
34 ziplevel=2 (zl) Set to 1 (lowest) through 9 (max) to change compression
|
jpayne@69
|
35 level; lower compression is faster.
|
jpayne@69
|
36
|
jpayne@69
|
37 Processing parameters:
|
jpayne@69
|
38 alignrc=t Align the reverse-complement of the read to itself to look
|
jpayne@69
|
39 for inverted repeats.
|
jpayne@69
|
40 alignadapter=t Align adapter sequence to reads.
|
jpayne@69
|
41 adapter= default: ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT
|
jpayne@69
|
42 icecreamonly=t (ico) Only remove suspected triangle reads. Otherwise, all
|
jpayne@69
|
43 inverted repeats are removed.
|
jpayne@69
|
44 ksr=t (keepshortreads) Keep non-triangle reads from triangle ZMWs.
|
jpayne@69
|
45 kzt=f (keepzmwstogether) Send all reads from a ZMW to the same file.
|
jpayne@69
|
46 targetqlen=352 (qlen) Make queries of this length from a read tip.
|
jpayne@69
|
47 qlenfraction=0.15 Try to make queries at most this fraction of read length.
|
jpayne@69
|
48 For short reads this will override targetqlen.
|
jpayne@69
|
49 minlen=40 Do not output reads shorter than this, after trimming.
|
jpayne@69
|
50 minqlen=100 Do not make queries shorter than this. For very short
|
jpayne@69
|
51 reads this will override qlenfraction.
|
jpayne@69
|
52 shortfraction=0.4 Only declare a read to be a triangle if the short half
|
jpayne@69
|
53 of the repeat is at least this fraction of read length.
|
jpayne@69
|
54 ccs=f Input reads are CCS, meaning they are all full-pass.
|
jpayne@69
|
55 In this case you should increase minratio.
|
jpayne@69
|
56 trim=t Trim adapter sequence from read tips.
|
jpayne@69
|
57 trimpolya=f Trim terminal poly-A and poly-T sequences, for some isoseq
|
jpayne@69
|
58 libraries.
|
jpayne@69
|
59 minpolymer=5 Don't trim poly-A sequence shorter than this.
|
jpayne@69
|
60 polyerror=0.2 Max error rate for trimming poly-A.
|
jpayne@69
|
61
|
jpayne@69
|
62
|
jpayne@69
|
63 Speed and sensitivity:
|
jpayne@69
|
64 jni=f Enable C code for higher speed and identical results.
|
jpayne@69
|
65 minratio= Fraction of maximal alignment score to consider as matching.
|
jpayne@69
|
66 Higher is more stringent; lower allows more sequencing errors.
|
jpayne@69
|
67 This is VERY SENSITIVE. For error-corrected reads it should
|
jpayne@69
|
68 be set higher. It is roughly the expected identity of one
|
jpayne@69
|
69 read to another (double the per-read error rate).
|
jpayne@69
|
70 minratio1=0.59 Set minratio for the first alignment pass only.
|
jpayne@69
|
71 minratio2=0.64 Set minratio for the second alignment pass only.
|
jpayne@69
|
72 adapterratio=0.18 Initial adapter detection sensitivity; affects speed.
|
jpayne@69
|
73 adapterratio2=.325 Final adapter detection sensitivity.
|
jpayne@69
|
74 minscore=-800 Exit alignment early if score drops below this.
|
jpayne@69
|
75
|
jpayne@69
|
76 Entropy parameters (recommended setting is 'entropy=t'):
|
jpayne@69
|
77 minentropy=-1 Set to 0.4 or above to remove low-entropy reads;
|
jpayne@69
|
78 range is 0-1, recommended value is 0.55. 0.7 is too high.
|
jpayne@69
|
79 Negative numbers disable this function.
|
jpayne@69
|
80 entropyk=3 Kmer length for entropy calculation.
|
jpayne@69
|
81 entropylen=350 Reads with entropy below cutoff for at least this many
|
jpayne@69
|
82 consecutive bases will be removed.
|
jpayne@69
|
83 entropyfraction=0.5 Alternative minimum length for short reads; the shorter
|
jpayne@69
|
84 of entropylen and entfraction*readlength will be used.
|
jpayne@69
|
85 entropywindow=50 Window size used for entropy calculation.
|
jpayne@69
|
86 maxmonomerfraction=0.74 (mmf) Also require this fraction of bases in each
|
jpayne@69
|
87 window to be the same base.
|
jpayne@69
|
88
|
jpayne@69
|
89 Java Parameters:
|
jpayne@69
|
90 -Xmx This will set Java's memory usage, overriding autodetection.
|
jpayne@69
|
91 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will
|
jpayne@69
|
92 specify 200 megs. The max is typically 85% of physical memory.
|
jpayne@69
|
93 -eoom This flag will cause the process to exit if an out-of-memory
|
jpayne@69
|
94 exception occurs. Requires Java 8u92+.
|
jpayne@69
|
95 -da Disable assertions.
|
jpayne@69
|
96
|
jpayne@69
|
97 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
|
jpayne@69
|
98 "
|
jpayne@69
|
99 }
|
jpayne@69
|
100
|
jpayne@69
|
101 #This block allows symlinked shellscripts to correctly set classpath.
|
jpayne@69
|
102 pushd . > /dev/null
|
jpayne@69
|
103 DIR="${BASH_SOURCE[0]}"
|
jpayne@69
|
104 while [ -h "$DIR" ]; do
|
jpayne@69
|
105 cd "$(dirname "$DIR")"
|
jpayne@69
|
106 DIR="$(readlink "$(basename "$DIR")")"
|
jpayne@69
|
107 done
|
jpayne@69
|
108 cd "$(dirname "$DIR")"
|
jpayne@69
|
109 DIR="$(pwd)/"
|
jpayne@69
|
110 popd > /dev/null
|
jpayne@69
|
111
|
jpayne@69
|
112 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
|
jpayne@69
|
113 CP="$DIR""current/"
|
jpayne@69
|
114 JNI="-Djava.library.path=""$DIR""jni/"
|
jpayne@69
|
115 #JNI=""
|
jpayne@69
|
116
|
jpayne@69
|
117 z="-Xmx2g"
|
jpayne@69
|
118 z2="-Xms2g"
|
jpayne@69
|
119 z3="-Xss16m"
|
jpayne@69
|
120 set=0
|
jpayne@69
|
121
|
jpayne@69
|
122 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
|
jpayne@69
|
123 usage
|
jpayne@69
|
124 exit
|
jpayne@69
|
125 fi
|
jpayne@69
|
126
|
jpayne@69
|
127 calcXmx () {
|
jpayne@69
|
128 source "$DIR""/calcmem.sh"
|
jpayne@69
|
129 setEnvironment
|
jpayne@69
|
130 parseXmx "$@"
|
jpayne@69
|
131 if [[ $set == 1 ]]; then
|
jpayne@69
|
132 return
|
jpayne@69
|
133 fi
|
jpayne@69
|
134 freeRam 2000m 42
|
jpayne@69
|
135 z="-Xmx${RAM}m"
|
jpayne@69
|
136 z2="-Xms${RAM}m"
|
jpayne@69
|
137 }
|
jpayne@69
|
138 calcXmx "$@"
|
jpayne@69
|
139
|
jpayne@69
|
140 icecream() {
|
jpayne@69
|
141 local CMD="java $EA $EOOM $z $z2 $z3 $JNI -cp $CP icecream.IceCreamFinder $@"
|
jpayne@69
|
142 if [[ $silent != 1 ]]; then
|
jpayne@69
|
143 echo $CMD >&2
|
jpayne@69
|
144 fi
|
jpayne@69
|
145 eval $CMD
|
jpayne@69
|
146 }
|
jpayne@69
|
147
|
jpayne@69
|
148 icecream "$@"
|