annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/icecreamfinder.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/bin/bash
jpayne@69 2
jpayne@69 3 usage(){
jpayne@69 4 echo "
jpayne@69 5 Written by Brian Bushnell
jpayne@69 6 Last modified May 6, 2020
jpayne@69 7
jpayne@69 8 Description: Finds PacBio reads containing inverted repeats.
jpayne@69 9 These are candidate triangle reads (ice cream cones).
jpayne@69 10 Either ice cream cones only, or all inverted repeats, can be filtered.
jpayne@69 11
jpayne@69 12 Usage: icecreamfinder.sh in=<input file> out=<output file> outb=<bad reads>
jpayne@69 13
jpayne@69 14 File I/O parameters:
jpayne@69 15 in=<file> Primary input.
jpayne@69 16 out=<file> (outgood) Output for good reads.
jpayne@69 17 outa=<file> (outambig) Output for with inverted repeats, but it is unclear
jpayne@69 18 whether that is natural or artifactual.
jpayne@69 19 outb=<file> (outbad) Output for reads suspected as chimeric.
jpayne@69 20 outj=<file> (outjunction) Output for junctions in inverted repeat reads.
jpayne@69 21 stats=<file> Print screen output here instead of to the screen.
jpayne@69 22 json=f Print stats as json.
jpayne@69 23 asrhist=<file> Adapter alignment score ratio histogram.
jpayne@69 24 irsist=<file> Inverted repeat alignment score ratio histogram.
jpayne@69 25 ambig= Determine where ambiguous reads are sent. They will ALWAYS
jpayne@69 26 be sent to outa if specified. If not, they will be sent
jpayne@69 27 to outg (good) unless overridden by this flag. Options:
jpayne@69 28 ambig=good: Send ambiguous reads to outg.
jpayne@69 29 ambig=bad: Send ambiguous reads to outb.
jpayne@69 30 ambig=good,bad: Send ambiguous reads to outg and outb.
jpayne@69 31 ambig=null: Do not send to outg or outb.
jpayne@69 32 overwrite=f (ow) Set to false to force the program to abort rather than
jpayne@69 33 overwrite an existing file.
jpayne@69 34 ziplevel=2 (zl) Set to 1 (lowest) through 9 (max) to change compression
jpayne@69 35 level; lower compression is faster.
jpayne@69 36
jpayne@69 37 Processing parameters:
jpayne@69 38 alignrc=t Align the reverse-complement of the read to itself to look
jpayne@69 39 for inverted repeats.
jpayne@69 40 alignadapter=t Align adapter sequence to reads.
jpayne@69 41 adapter= default: ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT
jpayne@69 42 icecreamonly=t (ico) Only remove suspected triangle reads. Otherwise, all
jpayne@69 43 inverted repeats are removed.
jpayne@69 44 ksr=t (keepshortreads) Keep non-triangle reads from triangle ZMWs.
jpayne@69 45 kzt=f (keepzmwstogether) Send all reads from a ZMW to the same file.
jpayne@69 46 targetqlen=352 (qlen) Make queries of this length from a read tip.
jpayne@69 47 qlenfraction=0.15 Try to make queries at most this fraction of read length.
jpayne@69 48 For short reads this will override targetqlen.
jpayne@69 49 minlen=40 Do not output reads shorter than this, after trimming.
jpayne@69 50 minqlen=100 Do not make queries shorter than this. For very short
jpayne@69 51 reads this will override qlenfraction.
jpayne@69 52 shortfraction=0.4 Only declare a read to be a triangle if the short half
jpayne@69 53 of the repeat is at least this fraction of read length.
jpayne@69 54 ccs=f Input reads are CCS, meaning they are all full-pass.
jpayne@69 55 In this case you should increase minratio.
jpayne@69 56 trim=t Trim adapter sequence from read tips.
jpayne@69 57 trimpolya=f Trim terminal poly-A and poly-T sequences, for some isoseq
jpayne@69 58 libraries.
jpayne@69 59 minpolymer=5 Don't trim poly-A sequence shorter than this.
jpayne@69 60 polyerror=0.2 Max error rate for trimming poly-A.
jpayne@69 61
jpayne@69 62
jpayne@69 63 Speed and sensitivity:
jpayne@69 64 jni=f Enable C code for higher speed and identical results.
jpayne@69 65 minratio= Fraction of maximal alignment score to consider as matching.
jpayne@69 66 Higher is more stringent; lower allows more sequencing errors.
jpayne@69 67 This is VERY SENSITIVE. For error-corrected reads it should
jpayne@69 68 be set higher. It is roughly the expected identity of one
jpayne@69 69 read to another (double the per-read error rate).
jpayne@69 70 minratio1=0.59 Set minratio for the first alignment pass only.
jpayne@69 71 minratio2=0.64 Set minratio for the second alignment pass only.
jpayne@69 72 adapterratio=0.18 Initial adapter detection sensitivity; affects speed.
jpayne@69 73 adapterratio2=.325 Final adapter detection sensitivity.
jpayne@69 74 minscore=-800 Exit alignment early if score drops below this.
jpayne@69 75
jpayne@69 76 Entropy parameters (recommended setting is 'entropy=t'):
jpayne@69 77 minentropy=-1 Set to 0.4 or above to remove low-entropy reads;
jpayne@69 78 range is 0-1, recommended value is 0.55. 0.7 is too high.
jpayne@69 79 Negative numbers disable this function.
jpayne@69 80 entropyk=3 Kmer length for entropy calculation.
jpayne@69 81 entropylen=350 Reads with entropy below cutoff for at least this many
jpayne@69 82 consecutive bases will be removed.
jpayne@69 83 entropyfraction=0.5 Alternative minimum length for short reads; the shorter
jpayne@69 84 of entropylen and entfraction*readlength will be used.
jpayne@69 85 entropywindow=50 Window size used for entropy calculation.
jpayne@69 86 maxmonomerfraction=0.74 (mmf) Also require this fraction of bases in each
jpayne@69 87 window to be the same base.
jpayne@69 88
jpayne@69 89 Java Parameters:
jpayne@69 90 -Xmx This will set Java's memory usage, overriding autodetection.
jpayne@69 91 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will
jpayne@69 92 specify 200 megs. The max is typically 85% of physical memory.
jpayne@69 93 -eoom This flag will cause the process to exit if an out-of-memory
jpayne@69 94 exception occurs. Requires Java 8u92+.
jpayne@69 95 -da Disable assertions.
jpayne@69 96
jpayne@69 97 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
jpayne@69 98 "
jpayne@69 99 }
jpayne@69 100
jpayne@69 101 #This block allows symlinked shellscripts to correctly set classpath.
jpayne@69 102 pushd . > /dev/null
jpayne@69 103 DIR="${BASH_SOURCE[0]}"
jpayne@69 104 while [ -h "$DIR" ]; do
jpayne@69 105 cd "$(dirname "$DIR")"
jpayne@69 106 DIR="$(readlink "$(basename "$DIR")")"
jpayne@69 107 done
jpayne@69 108 cd "$(dirname "$DIR")"
jpayne@69 109 DIR="$(pwd)/"
jpayne@69 110 popd > /dev/null
jpayne@69 111
jpayne@69 112 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
jpayne@69 113 CP="$DIR""current/"
jpayne@69 114 JNI="-Djava.library.path=""$DIR""jni/"
jpayne@69 115 #JNI=""
jpayne@69 116
jpayne@69 117 z="-Xmx2g"
jpayne@69 118 z2="-Xms2g"
jpayne@69 119 z3="-Xss16m"
jpayne@69 120 set=0
jpayne@69 121
jpayne@69 122 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
jpayne@69 123 usage
jpayne@69 124 exit
jpayne@69 125 fi
jpayne@69 126
jpayne@69 127 calcXmx () {
jpayne@69 128 source "$DIR""/calcmem.sh"
jpayne@69 129 setEnvironment
jpayne@69 130 parseXmx "$@"
jpayne@69 131 if [[ $set == 1 ]]; then
jpayne@69 132 return
jpayne@69 133 fi
jpayne@69 134 freeRam 2000m 42
jpayne@69 135 z="-Xmx${RAM}m"
jpayne@69 136 z2="-Xms${RAM}m"
jpayne@69 137 }
jpayne@69 138 calcXmx "$@"
jpayne@69 139
jpayne@69 140 icecream() {
jpayne@69 141 local CMD="java $EA $EOOM $z $z2 $z3 $JNI -cp $CP icecream.IceCreamFinder $@"
jpayne@69 142 if [[ $silent != 1 ]]; then
jpayne@69 143 echo $CMD >&2
jpayne@69 144 fi
jpayne@69 145 eval $CMD
jpayne@69 146 }
jpayne@69 147
jpayne@69 148 icecream "$@"