comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/icecreamfinder.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/bin/bash
2
3 usage(){
4 echo "
5 Written by Brian Bushnell
6 Last modified May 6, 2020
7
8 Description: Finds PacBio reads containing inverted repeats.
9 These are candidate triangle reads (ice cream cones).
10 Either ice cream cones only, or all inverted repeats, can be filtered.
11
12 Usage: icecreamfinder.sh in=<input file> out=<output file> outb=<bad reads>
13
14 File I/O parameters:
15 in=<file> Primary input.
16 out=<file> (outgood) Output for good reads.
17 outa=<file> (outambig) Output for with inverted repeats, but it is unclear
18 whether that is natural or artifactual.
19 outb=<file> (outbad) Output for reads suspected as chimeric.
20 outj=<file> (outjunction) Output for junctions in inverted repeat reads.
21 stats=<file> Print screen output here instead of to the screen.
22 json=f Print stats as json.
23 asrhist=<file> Adapter alignment score ratio histogram.
24 irsist=<file> Inverted repeat alignment score ratio histogram.
25 ambig= Determine where ambiguous reads are sent. They will ALWAYS
26 be sent to outa if specified. If not, they will be sent
27 to outg (good) unless overridden by this flag. Options:
28 ambig=good: Send ambiguous reads to outg.
29 ambig=bad: Send ambiguous reads to outb.
30 ambig=good,bad: Send ambiguous reads to outg and outb.
31 ambig=null: Do not send to outg or outb.
32 overwrite=f (ow) Set to false to force the program to abort rather than
33 overwrite an existing file.
34 ziplevel=2 (zl) Set to 1 (lowest) through 9 (max) to change compression
35 level; lower compression is faster.
36
37 Processing parameters:
38 alignrc=t Align the reverse-complement of the read to itself to look
39 for inverted repeats.
40 alignadapter=t Align adapter sequence to reads.
41 adapter= default: ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT
42 icecreamonly=t (ico) Only remove suspected triangle reads. Otherwise, all
43 inverted repeats are removed.
44 ksr=t (keepshortreads) Keep non-triangle reads from triangle ZMWs.
45 kzt=f (keepzmwstogether) Send all reads from a ZMW to the same file.
46 targetqlen=352 (qlen) Make queries of this length from a read tip.
47 qlenfraction=0.15 Try to make queries at most this fraction of read length.
48 For short reads this will override targetqlen.
49 minlen=40 Do not output reads shorter than this, after trimming.
50 minqlen=100 Do not make queries shorter than this. For very short
51 reads this will override qlenfraction.
52 shortfraction=0.4 Only declare a read to be a triangle if the short half
53 of the repeat is at least this fraction of read length.
54 ccs=f Input reads are CCS, meaning they are all full-pass.
55 In this case you should increase minratio.
56 trim=t Trim adapter sequence from read tips.
57 trimpolya=f Trim terminal poly-A and poly-T sequences, for some isoseq
58 libraries.
59 minpolymer=5 Don't trim poly-A sequence shorter than this.
60 polyerror=0.2 Max error rate for trimming poly-A.
61
62
63 Speed and sensitivity:
64 jni=f Enable C code for higher speed and identical results.
65 minratio= Fraction of maximal alignment score to consider as matching.
66 Higher is more stringent; lower allows more sequencing errors.
67 This is VERY SENSITIVE. For error-corrected reads it should
68 be set higher. It is roughly the expected identity of one
69 read to another (double the per-read error rate).
70 minratio1=0.59 Set minratio for the first alignment pass only.
71 minratio2=0.64 Set minratio for the second alignment pass only.
72 adapterratio=0.18 Initial adapter detection sensitivity; affects speed.
73 adapterratio2=.325 Final adapter detection sensitivity.
74 minscore=-800 Exit alignment early if score drops below this.
75
76 Entropy parameters (recommended setting is 'entropy=t'):
77 minentropy=-1 Set to 0.4 or above to remove low-entropy reads;
78 range is 0-1, recommended value is 0.55. 0.7 is too high.
79 Negative numbers disable this function.
80 entropyk=3 Kmer length for entropy calculation.
81 entropylen=350 Reads with entropy below cutoff for at least this many
82 consecutive bases will be removed.
83 entropyfraction=0.5 Alternative minimum length for short reads; the shorter
84 of entropylen and entfraction*readlength will be used.
85 entropywindow=50 Window size used for entropy calculation.
86 maxmonomerfraction=0.74 (mmf) Also require this fraction of bases in each
87 window to be the same base.
88
89 Java Parameters:
90 -Xmx This will set Java's memory usage, overriding autodetection.
91 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will
92 specify 200 megs. The max is typically 85% of physical memory.
93 -eoom This flag will cause the process to exit if an out-of-memory
94 exception occurs. Requires Java 8u92+.
95 -da Disable assertions.
96
97 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
98 "
99 }
100
101 #This block allows symlinked shellscripts to correctly set classpath.
102 pushd . > /dev/null
103 DIR="${BASH_SOURCE[0]}"
104 while [ -h "$DIR" ]; do
105 cd "$(dirname "$DIR")"
106 DIR="$(readlink "$(basename "$DIR")")"
107 done
108 cd "$(dirname "$DIR")"
109 DIR="$(pwd)/"
110 popd > /dev/null
111
112 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
113 CP="$DIR""current/"
114 JNI="-Djava.library.path=""$DIR""jni/"
115 #JNI=""
116
117 z="-Xmx2g"
118 z2="-Xms2g"
119 z3="-Xss16m"
120 set=0
121
122 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
123 usage
124 exit
125 fi
126
127 calcXmx () {
128 source "$DIR""/calcmem.sh"
129 setEnvironment
130 parseXmx "$@"
131 if [[ $set == 1 ]]; then
132 return
133 fi
134 freeRam 2000m 42
135 z="-Xmx${RAM}m"
136 z2="-Xms${RAM}m"
137 }
138 calcXmx "$@"
139
140 icecream() {
141 local CMD="java $EA $EOOM $z $z2 $z3 $JNI -cp $CP icecream.IceCreamFinder $@"
142 if [[ $silent != 1 ]]; then
143 echo $CMD >&2
144 fi
145 eval $CMD
146 }
147
148 icecream "$@"