Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/icecreamfinder.sh @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/bin/bash | |
2 | |
3 usage(){ | |
4 echo " | |
5 Written by Brian Bushnell | |
6 Last modified May 6, 2020 | |
7 | |
8 Description: Finds PacBio reads containing inverted repeats. | |
9 These are candidate triangle reads (ice cream cones). | |
10 Either ice cream cones only, or all inverted repeats, can be filtered. | |
11 | |
12 Usage: icecreamfinder.sh in=<input file> out=<output file> outb=<bad reads> | |
13 | |
14 File I/O parameters: | |
15 in=<file> Primary input. | |
16 out=<file> (outgood) Output for good reads. | |
17 outa=<file> (outambig) Output for with inverted repeats, but it is unclear | |
18 whether that is natural or artifactual. | |
19 outb=<file> (outbad) Output for reads suspected as chimeric. | |
20 outj=<file> (outjunction) Output for junctions in inverted repeat reads. | |
21 stats=<file> Print screen output here instead of to the screen. | |
22 json=f Print stats as json. | |
23 asrhist=<file> Adapter alignment score ratio histogram. | |
24 irsist=<file> Inverted repeat alignment score ratio histogram. | |
25 ambig= Determine where ambiguous reads are sent. They will ALWAYS | |
26 be sent to outa if specified. If not, they will be sent | |
27 to outg (good) unless overridden by this flag. Options: | |
28 ambig=good: Send ambiguous reads to outg. | |
29 ambig=bad: Send ambiguous reads to outb. | |
30 ambig=good,bad: Send ambiguous reads to outg and outb. | |
31 ambig=null: Do not send to outg or outb. | |
32 overwrite=f (ow) Set to false to force the program to abort rather than | |
33 overwrite an existing file. | |
34 ziplevel=2 (zl) Set to 1 (lowest) through 9 (max) to change compression | |
35 level; lower compression is faster. | |
36 | |
37 Processing parameters: | |
38 alignrc=t Align the reverse-complement of the read to itself to look | |
39 for inverted repeats. | |
40 alignadapter=t Align adapter sequence to reads. | |
41 adapter= default: ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT | |
42 icecreamonly=t (ico) Only remove suspected triangle reads. Otherwise, all | |
43 inverted repeats are removed. | |
44 ksr=t (keepshortreads) Keep non-triangle reads from triangle ZMWs. | |
45 kzt=f (keepzmwstogether) Send all reads from a ZMW to the same file. | |
46 targetqlen=352 (qlen) Make queries of this length from a read tip. | |
47 qlenfraction=0.15 Try to make queries at most this fraction of read length. | |
48 For short reads this will override targetqlen. | |
49 minlen=40 Do not output reads shorter than this, after trimming. | |
50 minqlen=100 Do not make queries shorter than this. For very short | |
51 reads this will override qlenfraction. | |
52 shortfraction=0.4 Only declare a read to be a triangle if the short half | |
53 of the repeat is at least this fraction of read length. | |
54 ccs=f Input reads are CCS, meaning they are all full-pass. | |
55 In this case you should increase minratio. | |
56 trim=t Trim adapter sequence from read tips. | |
57 trimpolya=f Trim terminal poly-A and poly-T sequences, for some isoseq | |
58 libraries. | |
59 minpolymer=5 Don't trim poly-A sequence shorter than this. | |
60 polyerror=0.2 Max error rate for trimming poly-A. | |
61 | |
62 | |
63 Speed and sensitivity: | |
64 jni=f Enable C code for higher speed and identical results. | |
65 minratio= Fraction of maximal alignment score to consider as matching. | |
66 Higher is more stringent; lower allows more sequencing errors. | |
67 This is VERY SENSITIVE. For error-corrected reads it should | |
68 be set higher. It is roughly the expected identity of one | |
69 read to another (double the per-read error rate). | |
70 minratio1=0.59 Set minratio for the first alignment pass only. | |
71 minratio2=0.64 Set minratio for the second alignment pass only. | |
72 adapterratio=0.18 Initial adapter detection sensitivity; affects speed. | |
73 adapterratio2=.325 Final adapter detection sensitivity. | |
74 minscore=-800 Exit alignment early if score drops below this. | |
75 | |
76 Entropy parameters (recommended setting is 'entropy=t'): | |
77 minentropy=-1 Set to 0.4 or above to remove low-entropy reads; | |
78 range is 0-1, recommended value is 0.55. 0.7 is too high. | |
79 Negative numbers disable this function. | |
80 entropyk=3 Kmer length for entropy calculation. | |
81 entropylen=350 Reads with entropy below cutoff for at least this many | |
82 consecutive bases will be removed. | |
83 entropyfraction=0.5 Alternative minimum length for short reads; the shorter | |
84 of entropylen and entfraction*readlength will be used. | |
85 entropywindow=50 Window size used for entropy calculation. | |
86 maxmonomerfraction=0.74 (mmf) Also require this fraction of bases in each | |
87 window to be the same base. | |
88 | |
89 Java Parameters: | |
90 -Xmx This will set Java's memory usage, overriding autodetection. | |
91 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will | |
92 specify 200 megs. The max is typically 85% of physical memory. | |
93 -eoom This flag will cause the process to exit if an out-of-memory | |
94 exception occurs. Requires Java 8u92+. | |
95 -da Disable assertions. | |
96 | |
97 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. | |
98 " | |
99 } | |
100 | |
101 #This block allows symlinked shellscripts to correctly set classpath. | |
102 pushd . > /dev/null | |
103 DIR="${BASH_SOURCE[0]}" | |
104 while [ -h "$DIR" ]; do | |
105 cd "$(dirname "$DIR")" | |
106 DIR="$(readlink "$(basename "$DIR")")" | |
107 done | |
108 cd "$(dirname "$DIR")" | |
109 DIR="$(pwd)/" | |
110 popd > /dev/null | |
111 | |
112 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" | |
113 CP="$DIR""current/" | |
114 JNI="-Djava.library.path=""$DIR""jni/" | |
115 #JNI="" | |
116 | |
117 z="-Xmx2g" | |
118 z2="-Xms2g" | |
119 z3="-Xss16m" | |
120 set=0 | |
121 | |
122 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then | |
123 usage | |
124 exit | |
125 fi | |
126 | |
127 calcXmx () { | |
128 source "$DIR""/calcmem.sh" | |
129 setEnvironment | |
130 parseXmx "$@" | |
131 if [[ $set == 1 ]]; then | |
132 return | |
133 fi | |
134 freeRam 2000m 42 | |
135 z="-Xmx${RAM}m" | |
136 z2="-Xms${RAM}m" | |
137 } | |
138 calcXmx "$@" | |
139 | |
140 icecream() { | |
141 local CMD="java $EA $EOOM $z $z2 $z3 $JNI -cp $CP icecream.IceCreamFinder $@" | |
142 if [[ $silent != 1 ]]; then | |
143 echo $CMD >&2 | |
144 fi | |
145 eval $CMD | |
146 } | |
147 | |
148 icecream "$@" |