csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/bbmap.sh annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/bbmap.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children

rev	line source
jpayne@69	1 #!/bin/bash
jpayne@69	2
jpayne@69	3 usage(){
jpayne@69	4 echo "
jpayne@69	5 BBMap
jpayne@69	6 Written by Brian Bushnell, from Dec. 2010 - present
jpayne@69	7 Last modified September 15, 2022
jpayne@69	8
jpayne@69	9 Description: Fast and accurate splice-aware read aligner.
jpayne@69	10 Please read bbmap/docs/guides/BBMapGuide.txt for more information.
jpayne@69	11
jpayne@69	12 To index: bbmap.sh ref=<reference fasta>
jpayne@69	13 To map: bbmap.sh in=<reads> out=<output sam>
jpayne@69	14 To map without writing an index:
jpayne@69	15 bbmap.sh ref=<reference fasta> in=<reads> out=<output sam> nodisk
jpayne@69	16
jpayne@69	17 in=stdin will accept reads from standard in, and out=stdout will write to
jpayne@69	18 standard out, but file extensions are still needed to specify the format of the
jpayne@69	19 input and output files e.g. in=stdin.fa.gz will read gzipped fasta from
jpayne@69	20 standard in; out=stdout.sam.gz will write gzipped sam.
jpayne@69	21
jpayne@69	22 Indexing Parameters (required when building the index):
jpayne@69	23 nodisk=f Set to true to build index in memory and write nothing
jpayne@69	24 to disk except output.
jpayne@69	25 ref=<file> Specify the reference sequence. Only do this ONCE,
jpayne@69	26 when building the index (unless using 'nodisk').
jpayne@69	27 build=1 If multiple references are indexed in the same directory,
jpayne@69	28 each needs a unique numeric ID (unless using 'nodisk').
jpayne@69	29 k=13 Kmer length, range 8-15. Longer is faster but uses
jpayne@69	30 more memory. Shorter is more sensitive.
jpayne@69	31 If indexing and mapping are done in two steps, K should
jpayne@69	32 be specified each time.
jpayne@69	33 path=<.> Specify the location to write the index, if you don't
jpayne@69	34 want it in the current working directory.
jpayne@69	35 usemodulo=f Throw away ~80% of kmers based on remainder modulo a
jpayne@69	36 number (reduces RAM by 50% and sensitivity slightly).
jpayne@69	37 Should be enabled both when building the index AND
jpayne@69	38 when mapping.
jpayne@69	39 rebuild=f Force a rebuild of the index (ref= should be set).
jpayne@69	40
jpayne@69	41 Input Parameters:
jpayne@69	42 build=1 Designate index to use. Corresponds to the number
jpayne@69	43 specified when building the index.
jpayne@69	44 in=<file> Primary reads input; required parameter.
jpayne@69	45 in2=<file> For paired reads in two files.
jpayne@69	46 interleaved=auto True forces paired/interleaved input; false forces
jpayne@69	47 single-ended mapping. If not specified, interleaved
jpayne@69	48 status will be autodetected from read names.
jpayne@69	49 fastareadlen=500 Break up FASTA reads longer than this. Max is 500 for
jpayne@69	50 BBMap and 6000 for BBMapPacBio. Only works for FASTA
jpayne@69	51 input (use 'maxlen' for FASTQ input). The default for
jpayne@69	52 bbmap.sh is 500, and for mapPacBio.sh is 6000.
jpayne@69	53 unpigz=f Spawn a pigz (parallel gzip) process for faster
jpayne@69	54 decompression than using Java.
jpayne@69	55 Requires pigz to be installed.
jpayne@69	56 touppercase=t (tuc) Convert lowercase letters in reads to upper case
jpayne@69	57 (otherwise they will not match the reference).
jpayne@69	58
jpayne@69	59 Sampling Parameters:
jpayne@69	60
jpayne@69	61 reads=-1 Set to a positive number N to only process the first N
jpayne@69	62 reads (or pairs), then quit. -1 means use all reads.
jpayne@69	63 samplerate=1 Set to a number from 0 to 1 to randomly select that
jpayne@69	64 fraction of reads for mapping. 1 uses all reads.
jpayne@69	65 skipreads=0 Set to a number N to skip the first N reads (or pairs),
jpayne@69	66 then map the rest.
jpayne@69	67
jpayne@69	68 Mapping Parameters:
jpayne@69	69 fast=f This flag is a macro which sets other paramters to run
jpayne@69	70 faster, at reduced sensitivity. Bad for RNA-seq.
jpayne@69	71 slow=f This flag is a macro which sets other paramters to run
jpayne@69	72 slower, at greater sensitivity. 'vslow' is even slower.
jpayne@69	73 maxindel=16000 Don't look for indels longer than this. Lower is faster.
jpayne@69	74 Set to >=100k for RNAseq with long introns like mammals.
jpayne@69	75 strictmaxindel=f When enabled, do not allow indels longer than 'maxindel'.
jpayne@69	76 By default these are not sought, but may be found anyway.
jpayne@69	77 tipsearch=100 Look this far for read-end deletions with anchors
jpayne@69	78 shorter than K, using brute force.
jpayne@69	79 minid=0.76 Approximate minimum alignment identity to look for.
jpayne@69	80 Higher is faster and less sensitive.
jpayne@69	81 minhits=1 Minimum number of seed hits required for candidate sites.
jpayne@69	82 Higher is faster.
jpayne@69	83 local=f Set to true to use local, rather than global, alignments.
jpayne@69	84 This will soft-clip ugly ends of poor alignments.
jpayne@69	85 perfectmode=f Allow only perfect mappings when set to true (very fast).
jpayne@69	86 semiperfectmode=f Allow only perfect and semiperfect (perfect except for
jpayne@69	87 N's in the reference) mappings.
jpayne@69	88 threads=auto (t) Set to number of threads desired. By default, uses
jpayne@69	89 all cores available.
jpayne@69	90 ambiguous=best (ambig) Set behavior on ambiguously-mapped reads (with
jpayne@69	91 multiple top-scoring mapping locations).
jpayne@69	92 best (use the first best site)
jpayne@69	93 toss (consider unmapped)
jpayne@69	94 random (select one top-scoring site randomly)
jpayne@69	95 all (retain all top-scoring sites)
jpayne@69	96 samestrandpairs=f (ssp) Specify whether paired reads should map to the
jpayne@69	97 same strand or opposite strands.
jpayne@69	98 requirecorrectstrand=t (rcs) Forbid pairing of reads without correct strand
jpayne@69	99 orientation. Set to false for long-mate-pair libraries.
jpayne@69	100 killbadpairs=f (kbp) If a read pair is mapped with an inappropriate
jpayne@69	101 insert size or orientation, the read with the lower
jpayne@69	102 mapping quality is marked unmapped.
jpayne@69	103 pairedonly=f (po) Treat unpaired reads as unmapped. Thus they will
jpayne@69	104 be sent to 'outu' but not 'outm'.
jpayne@69	105 rcomp=f Reverse complement both reads prior to mapping (for LMP
jpayne@69	106 outward-facing libraries).
jpayne@69	107 rcompmate=f Reverse complement read2 prior to mapping.
jpayne@69	108 pairlen=32000 Set max allowed distance between paired reads.
jpayne@69	109 (insert size)=(pairlen)+(read1 length)+(read2 length)
jpayne@69	110 rescuedist=1200 Don't try to rescue paired reads if avg. insert size
jpayne@69	111 greater than this. Lower is faster.
jpayne@69	112 rescuemismatches=32 Maximum mismatches allowed in a rescued read. Lower
jpayne@69	113 is faster.
jpayne@69	114 averagepairdist=100 (apd) Initial average distance between paired reads.
jpayne@69	115 Varies dynamically; does not need to be specified.
jpayne@69	116 deterministic=f Run in deterministic mode. In this case it is good
jpayne@69	117 to set averagepairdist. BBMap is deterministic
jpayne@69	118 without this flag if using single-ended reads,
jpayne@69	119 or run singlethreaded.
jpayne@69	120 bandwidthratio=0 (bwr) If above zero, restrict alignment band to this
jpayne@69	121 fraction of read length. Faster but less accurate.
jpayne@69	122 bandwidth=0 (bw) Set the bandwidth directly.
jpayne@69	123 fraction of read length. Faster but less accurate.
jpayne@69	124 usejni=f (jni) Do alignments faster, in C code. Requires
jpayne@69	125 compiling the C code; details are in /jni/README.txt.
jpayne@69	126 maxsites2=800 Don't analyze (or print) more than this many alignments
jpayne@69	127 per read.
jpayne@69	128 ignorefrequentkmers=t (ifk) Discard low-information kmers that occur often.
jpayne@69	129 excludefraction=0.03 (ef) Fraction of kmers to ignore. For example, 0.03
jpayne@69	130 will ignore the most common 3% of kmers.
jpayne@69	131 greedy=t Use a greedy algorithm to discard the least-useful
jpayne@69	132 kmers on a per-read basis.
jpayne@69	133 kfilter=0 If positive, potential mapping sites must have at
jpayne@69	134 least this many consecutive exact matches.
jpayne@69	135
jpayne@69	136
jpayne@69	137 Quality and Trimming Parameters:
jpayne@69	138 qin=auto Set to 33 or 64 to specify input quality value ASCII
jpayne@69	139 offset. 33 is Sanger, 64 is old Solexa.
jpayne@69	140 qout=auto Set to 33 or 64 to specify output quality value ASCII
jpayne@69	141 offset (only if output format is fastq).
jpayne@69	142 qtrim=f Quality-trim ends before mapping. Options are:
jpayne@69	143 'f' (false), 'l' (left), 'r' (right), and 'lr' (both).
jpayne@69	144 untrim=f Undo trimming after mapping. Untrimmed bases will be
jpayne@69	145 soft-clipped in cigar strings.
jpayne@69	146 trimq=6 Trim regions with average quality below this
jpayne@69	147 (phred algorithm).
jpayne@69	148 mintrimlength=60 (mintl) Don't trim reads to be shorter than this.
jpayne@69	149 fakefastaquality=-1 (ffq) Set to a positive number 1-50 to generate fake
jpayne@69	150 quality strings for fasta input reads.
jpayne@69	151 ignorebadquality=f (ibq) Keep going, rather than crashing, if a read has
jpayne@69	152 out-of-range quality values.
jpayne@69	153 usequality=t Use quality scores when determining which read kmers
jpayne@69	154 to use as seeds.
jpayne@69	155 minaveragequality=0 (maq) Do not map reads with average quality below this.
jpayne@69	156 maqb=0 If positive, calculate maq from this many initial bases.
jpayne@69	157
jpayne@69	158 Output Parameters:
jpayne@69	159 out=<file> Write all reads to this file.
jpayne@69	160 outu=<file> Write only unmapped reads to this file. Does not
jpayne@69	161 include unmapped paired reads with a mapped mate.
jpayne@69	162 outm=<file> Write only mapped reads to this file. Includes
jpayne@69	163 unmapped paired reads with a mapped mate.
jpayne@69	164 mappedonly=f If true, treats 'out' like 'outm'.
jpayne@69	165 bamscript=<file> (bs) Write a shell script to <file> that will turn
jpayne@69	166 the sam output into a sorted, indexed bam file.
jpayne@69	167 ordered=f Set to true to output reads in same order as input.
jpayne@69	168 Slower and uses more memory.
jpayne@69	169 overwrite=f (ow) Allow process to overwrite existing files.
jpayne@69	170 secondary=f Print secondary alignments.
jpayne@69	171 sssr=0.95 (secondarysitescoreratio) Print only secondary alignments
jpayne@69	172 with score of at least this fraction of primary.
jpayne@69	173 ssao=f (secondarysiteasambiguousonly) Only print secondary
jpayne@69	174 alignments for ambiguously-mapped reads.
jpayne@69	175 maxsites=5 Maximum number of total alignments to print per read.
jpayne@69	176 Only relevant when secondary=t.
jpayne@69	177 quickmatch=f Generate cigar strings more quickly.
jpayne@69	178 trimreaddescriptions=f (trd) Truncate read and ref names at the first whitespace,
jpayne@69	179 assuming that the remainder is a comment or description.
jpayne@69	180 ziplevel=2 (zl) Compression level for zip or gzip output.
jpayne@69	181 pigz=f Spawn a pigz (parallel gzip) process for faster
jpayne@69	182 compression than Java. Requires pigz to be installed.
jpayne@69	183 machineout=f Set to true to output statistics in machine-friendly
jpayne@69	184 'key=value' format.
jpayne@69	185 printunmappedcount=f Print the total number of unmapped reads and bases.
jpayne@69	186 If input is paired, the number will be of pairs
jpayne@69	187 for which both reads are unmapped.
jpayne@69	188 showprogress=0 If positive, print a '.' every X reads.
jpayne@69	189 showprogress2=0 If positive, print the number of seconds since the
jpayne@69	190 last progress update (instead of a '.').
jpayne@69	191 renamebyinsert=f Renames reads based on their mapped insert size.
jpayne@69	192
jpayne@69	193 Bloom-Filtering Parameters (bloomfilter.sh is the standalone version).
jpayne@69	194 bloom=f Use a Bloom filter to ignore reads not sharing kmers
jpayne@69	195 with the reference. This uses more memory, but speeds
jpayne@69	196 mapping when most reads don't match the reference.
jpayne@69	197 bloomhashes=2 Number of hash functions.
jpayne@69	198 bloomminhits=3 Number of consecutive hits to be considered matched.
jpayne@69	199 bloomk=31 Bloom filter kmer length.
jpayne@69	200 bloomserial=t Use the serialized Bloom filter for greater loading
jpayne@69	201 speed, if available. If not, generate and write one.
jpayne@69	202
jpayne@69	203 Post-Filtering Parameters:
jpayne@69	204 idfilter=0 Independant of minid; sets exact minimum identity
jpayne@69	205 allowed for alignments to be printed. Range 0 to 1.
jpayne@69	206 subfilter=-1 Ban alignments with more than this many substitutions.
jpayne@69	207 insfilter=-1 Ban alignments with more than this many insertions.
jpayne@69	208 delfilter=-1 Ban alignments with more than this many deletions.
jpayne@69	209 indelfilter=-1 Ban alignments with more than this many indels.
jpayne@69	210 editfilter=-1 Ban alignments with more than this many edits.
jpayne@69	211 inslenfilter=-1 Ban alignments with an insertion longer than this.
jpayne@69	212 dellenfilter=-1 Ban alignments with a deletion longer than this.
jpayne@69	213 nfilter=-1 Ban alignments with more than this many ns. This
jpayne@69	214 includes nocall, noref, and off scaffold ends.
jpayne@69	215
jpayne@69	216 Sam flags and settings:
jpayne@69	217 noheader=f Disable generation of header lines.
jpayne@69	218 sam=1.4 Set to 1.4 to write Sam version 1.4 cigar strings,
jpayne@69	219 with = and X, or 1.3 to use M.
jpayne@69	220 saa=t (secondaryalignmentasterisks) Use asterisks instead of
jpayne@69	221 bases for sam secondary alignments.
jpayne@69	222 cigar=t Set to 'f' to skip generation of cigar strings (faster).
jpayne@69	223 keepnames=f Keep original names of paired reads, rather than
jpayne@69	224 ensuring both reads have the same name.
jpayne@69	225 intronlen=999999999 Set to a lower number like 10 to change 'D' to 'N' in
jpayne@69	226 cigar strings for deletions of at least that length.
jpayne@69	227 rgid= Set readgroup ID. All other readgroup fields
jpayne@69	228 can be set similarly, with the flag rgXX=
jpayne@69	229 If you set a readgroup flag to the word 'filename',
jpayne@69	230 e.g. rgid=filename, the input file name will be used.
jpayne@69	231 mdtag=f Write MD tags.
jpayne@69	232 nhtag=f Write NH tags.
jpayne@69	233 xmtag=f Write XM tags (may only work correctly with ambig=all).
jpayne@69	234 amtag=f Write AM tags.
jpayne@69	235 nmtag=f Write NM tags.
jpayne@69	236 xstag=f Set to 'xs=fs', 'xs=ss', or 'xs=us' to write XS tags
jpayne@69	237 for RNAseq using firststrand, secondstrand, or
jpayne@69	238 unstranded libraries. Needed by Cufflinks.
jpayne@69	239 JGI mainly uses 'firststrand'.
jpayne@69	240 stoptag=f Write a tag indicating read stop location, prefixed by YS:i:
jpayne@69	241 lengthtag=f Write a tag indicating (query,ref) alignment lengths,
jpayne@69	242 prefixed by YL:Z:
jpayne@69	243 idtag=f Write a tag indicating percent identity, prefixed by YI:f:
jpayne@69	244 inserttag=f Write a tag indicating insert size, prefixed by X8:Z:
jpayne@69	245 scoretag=f Write a tag indicating BBMap's raw score, prefixed by YR:i:
jpayne@69	246 timetag=f Write a tag indicating this read's mapping time, prefixed by X0:i:
jpayne@69	247 boundstag=f Write a tag indicating whether either read in the pair
jpayne@69	248 goes off the end of the reference, prefixed by XB:Z:
jpayne@69	249 notags=f Turn off all optional tags.
jpayne@69	250
jpayne@69	251 Histogram and statistics output parameters:
jpayne@69	252 scafstats=<file> Statistics on how many reads mapped to which scaffold.
jpayne@69	253 refstats=<file> Statistics on how many reads mapped to which reference
jpayne@69	254 file; only for BBSplit.
jpayne@69	255 sortscafs=t Sort scaffolds or references by read count.
jpayne@69	256 bhist=<file> Base composition histogram by position.
jpayne@69	257 qhist=<file> Quality histogram by position.
jpayne@69	258 aqhist=<file> Histogram of average read quality.
jpayne@69	259 bqhist=<file> Quality histogram designed for box plots.
jpayne@69	260 lhist=<file> Read length histogram.
jpayne@69	261 ihist=<file> Write histogram of insert sizes (for paired reads).
jpayne@69	262 ehist=<file> Errors-per-read histogram.
jpayne@69	263 qahist=<file> Quality accuracy histogram of error rates versus
jpayne@69	264 quality score.
jpayne@69	265 indelhist=<file> Indel length histogram.
jpayne@69	266 mhist=<file> Histogram of match, sub, del, and ins rates by
jpayne@69	267 read location.
jpayne@69	268 gchist=<file> Read GC content histogram.
jpayne@69	269 gcbins=100 Number gchist bins. Set to 'auto' to use read length.
jpayne@69	270 gcpairs=t Use average GC of paired reads.
jpayne@69	271 idhist=<file> Histogram of read count versus percent identity.
jpayne@69	272 idbins=100 Number idhist bins. Set to 'auto' to use read length.
jpayne@69	273 statsfile=stderr Mapping statistics are printed here.
jpayne@69	274
jpayne@69	275 Coverage output parameters (these may reduce speed and use more RAM):
jpayne@69	276 covstats=<file> Per-scaffold coverage info.
jpayne@69	277 rpkm=<file> Per-scaffold RPKM/FPKM counts.
jpayne@69	278 covhist=<file> Histogram of # occurrences of each depth level.
jpayne@69	279 basecov=<file> Coverage per base location.
jpayne@69	280 bincov=<file> Print binned coverage per location (one line per X bases).
jpayne@69	281 covbinsize=1000 Set the binsize for binned coverage output.
jpayne@69	282 nzo=t Only print scaffolds with nonzero coverage.
jpayne@69	283 twocolumn=f Change to true to print only ID and Avg_fold instead of
jpayne@69	284 all 6 columns to the 'out=' file.
jpayne@69	285 32bit=f Set to true if you need per-base coverage over 64k.
jpayne@69	286 strandedcov=f Track coverage for plus and minus strand independently.
jpayne@69	287 startcov=f Only track start positions of reads.
jpayne@69	288 secondarycov=t Include coverage of secondary alignments.
jpayne@69	289 physcov=f Calculate physical coverage for paired reads.
jpayne@69	290 This includes the unsequenced bases.
jpayne@69	291 delcoverage=t (delcov) Count bases covered by deletions as covered.
jpayne@69	292 True is faster than false.
jpayne@69	293 covk=0 If positive, calculate kmer coverage statistics.
jpayne@69	294
jpayne@69	295 Java Parameters:
jpayne@69	296 -Xmx This will set Java's memory usage,
jpayne@69	297 overriding autodetection.
jpayne@69	298 -Xmx20g will specify 20 gigs of RAM, and -Xmx800m
jpayne@69	299 will specify 800 megs. The max is typically 85% of
jpayne@69	300 physical memory. The human genome requires around 24g,
jpayne@69	301 or 12g with the 'usemodulo' flag. The index uses
jpayne@69	302 roughly 6 bytes per reference base.
jpayne@69	303 -eoom This flag will cause the process to exit if an
jpayne@69	304 out-of-memory exception occurs. Requires Java 8u92+.
jpayne@69	305 -da Disable assertions.
jpayne@69	306
jpayne@69	307 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter
jpayne@69	308 any problems, or post at: http://seqanswers.com/forums/showthread.php?t=41057
jpayne@69	309 "
jpayne@69	310 }
jpayne@69	311
jpayne@69	312 #This block allows symlinked shellscripts to correctly set classpath.
jpayne@69	313 pushd . > /dev/null
jpayne@69	314 DIR="${BASH_SOURCE[0]}"
jpayne@69	315 while [ -h "$DIR" ]; do
jpayne@69	316 cd "$(dirname "$DIR")"
jpayne@69	317 DIR="$(readlink "$(basename "$DIR")")"
jpayne@69	318 done
jpayne@69	319 cd "$(dirname "$DIR")"
jpayne@69	320 DIR="$(pwd)/"
jpayne@69	321 popd > /dev/null
jpayne@69	322
jpayne@69	323 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
jpayne@69	324 CP="$DIR""current/"
jpayne@69	325 JNI="-Djava.library.path=""$DIR""jni/"
jpayne@69	326 JNI=""
jpayne@69	327
jpayne@69	328 z="-Xmx1g"
jpayne@69	329 z2="-Xms1g"
jpayne@69	330 set=0
jpayne@69	331
jpayne@69	332 if [ -z "$1" ] \|\| [[ $1 == -h ]] \|\| [[ $1 == --help ]]; then
jpayne@69	333 usage
jpayne@69	334 exit
jpayne@69	335 fi
jpayne@69	336
jpayne@69	337 calcXmx () {
jpayne@69	338 source "$DIR""/calcmem.sh"
jpayne@69	339 setEnvironment
jpayne@69	340 parseXmx "$@"
jpayne@69	341 if [[ $set == 1 ]]; then
jpayne@69	342 return
jpayne@69	343 fi
jpayne@69	344 freeRam 3200m 84
jpayne@69	345 z="-Xmx${RAM}m"
jpayne@69	346 z2="-Xms${RAM}m"
jpayne@69	347 }
jpayne@69	348 calcXmx "$@"
jpayne@69	349
jpayne@69	350
jpayne@69	351 bbmap() {
jpayne@69	352 local CMD="java $EA $EOOM $z $z2 $JNI -cp $CP align2.BBMap build=1 overwrite=true fastareadlen=500 $@"
jpayne@69	353 echo $CMD >&2
jpayne@69	354 eval $CMD
jpayne@69	355 }
jpayne@69	356
jpayne@69	357 bbmap "$@"

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/bbmap.sh @ 69:33d812a61356