annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/randomreads.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #!/bin/bash
jpayne@69 2
jpayne@69 3 usage(){
jpayne@69 4 echo "
jpayne@69 5 Written by Brian Bushnell
jpayne@69 6 Last modified April 1, 2019
jpayne@69 7
jpayne@69 8 Description: Generates random synthetic reads from a reference genome. Read names indicate their genomic origin.
jpayne@69 9 Allows precise customization of things like insert size and synthetic mutation type, sizes, and rates.
jpayne@69 10 Read names generated by this program are used by MakeRocCure (samtoroc.sh) and GradeSamFile (gradesam.sh).
jpayne@69 11 They can also be used by BBMap (bbmap.sh) and BBMerge (bbmerge.sh) to automatically calculate
jpayne@69 12 true and false positive rates, if the flag 'parsecustom' is used.
jpayne@69 13
jpayne@69 14 Usage: randomreads.sh ref=<file> out=<file> length=<number> reads=<number>
jpayne@69 15
jpayne@69 16 Basic parameters:
jpayne@69 17 out=null Output file. If reads are paired and a single file name is
jpayne@69 18 given, output will be interleaved. For paired reads in twin
jpayne@69 19 files, set out1= and out2=
jpayne@69 20 ref=null Reference file. Not needed if the reference is already indexed.
jpayne@69 21 build=1 If multiple references are indexed in the same directory,
jpayne@69 22 each needs a unique build ID.
jpayne@69 23 midpad=300 Specifies space between scaffolds in packed index.
jpayne@69 24 reads=0 Generate this many reads (or pairs).
jpayne@69 25 coverage=-1 If positive, generate enough reads to hit this coverage
jpayne@69 26 target, based on the genome size.
jpayne@69 27 overwrite=t Set to false to disallow overwriting of existing files.
jpayne@69 28 replacenoref=f Set to true to replace Ns in the reference sequence
jpayne@69 29 with random letters.
jpayne@69 30 simplenames=f Set to true to generate read names that clearly indicate
jpayne@69 31 genomic origin, without BBMap internal coordinates.
jpayne@69 32 illuminanames=f Set to true to have matching names for paired reads,
jpayne@69 33 rather than naming by location.
jpayne@69 34 renamebyinsert=f Insert the insert size into the name.
jpayne@69 35 addpairnum=f Set true to add ' 1:' and ' 2:' to the end of read names.
jpayne@69 36 addslash=f Set true to add '/1' and '/2' to the end of read names.
jpayne@69 37 spaceslash=f Set true to add a space before slash read pairnum.
jpayne@69 38 prefix=null Generated reads will start with this prefix,
jpayne@69 39 rather than naming by location.
jpayne@69 40 seed=0 Use this to set the random number generator seed;
jpayne@69 41 use -1 for a random seed.
jpayne@69 42
jpayne@69 43 Length Parameters - normally only minlength and maxlength are needed.
jpayne@69 44 minlength=150 Generate reads of up to this length.
jpayne@69 45 maxlength=150 Generate reads of at least this length.
jpayne@69 46 gaussianlength=f Use a gaussian length distribution (for PacBio).
jpayne@69 47 Otherwise, the distribution is linear.
jpayne@69 48 midlength=-1 Gaussian curve peaks at this point. Must be between
jpayne@69 49 minlength and maxlength, in Gaussian mode.
jpayne@69 50 readlengthsd=-1 Standard deviation of the Gaussian curve. Note that the
jpayne@69 51 final curve is a sum of multiple curves, but this will affect
jpayne@69 52 overall curve width. By default this is set to 1/4 of range.
jpayne@69 53
jpayne@69 54 Pairing parameters:
jpayne@69 55 paired=f Set to true for paired reads.
jpayne@69 56 mininsert= Controls minimum insert length. Default depends on read length.
jpayne@69 57 maxinsert= Controls maximum insert length. Default depends on read length.
jpayne@69 58 triangle=f Make a triangular insert size distribution.
jpayne@69 59 flat=f Make a roughly flat insert size distribution..
jpayne@69 60 superflat=f Make a perfectly flat insert size distribution.
jpayne@69 61 gaussian=t Make a bell-shaped insert size distribution, with
jpayne@69 62 standard deviation of (maxinsert-mininsert)/6.
jpayne@69 63 samestrand=f Generate paired reads on the same strand.
jpayne@69 64
jpayne@69 65 Mutation parameters:
jpayne@69 66 snprate=0 Add snps to reads with this probability (0-1).
jpayne@69 67 insrate=0 Add insertions to reads with this probability (0-1).
jpayne@69 68 delrate=0 Add deletions to reads with this probability (0-1).
jpayne@69 69 subrate=0 Add contiguous substitutions to reads with this probability (0-1).
jpayne@69 70 nrate=0 Add nocalls to reads with this probability (0-1).
jpayne@69 71
jpayne@69 72 Note: With a 'rate' of X, each read has an X chance of getting at least
jpayne@69 73 1 mutation, X^2 chance of 2+ mutations, X^3 chance of 3+ mutations,
jpayne@69 74 and so forth up to the maximum allowed number of mutations of that type.
jpayne@69 75
jpayne@69 76 maxsnps=3 Add at most this many snps per read.
jpayne@69 77 maxinss=2 Add at most this many deletions per read.
jpayne@69 78 maxdels=2 Add at most this many insertions per read.
jpayne@69 79 maxsubs=2 Add at most this many contiguous substitutions per read.
jpayne@69 80 maxns=0 Add at most this many blocks of Ns per read.
jpayne@69 81
jpayne@69 82 maxinslen=12 Max length of insertions.
jpayne@69 83 maxdellen=400 Max length of deletions.
jpayne@69 84 maxsublen=12 Max length of contiguous substitutions.
jpayne@69 85 maxnlen=1 Min length of N blocks.
jpayne@69 86
jpayne@69 87 mininslen=1 Min length of insertions.
jpayne@69 88 mindellen=1 Min length of deletions.
jpayne@69 89 minsublen=2 Min length of contiguous substitutions.
jpayne@69 90 minnlen=1 Min length of N blocks.
jpayne@69 91
jpayne@69 92 Illumina quality parameters:
jpayne@69 93 maxq=36 Upper bound of quality values.
jpayne@69 94 midq=28 Approximate average of quality values.
jpayne@69 95 minq=20 Lower bound of quality values.
jpayne@69 96 q= Sets maxq, midq, and minq to the same value.
jpayne@69 97 adderrors=t Add substitution errors based on quality values,
jpayne@69 98 after mutations.
jpayne@69 99 qv=4 Vary the base quality of reads by up to this much
jpayne@69 100 to simulate tile effects.
jpayne@69 101
jpayne@69 102 PacBio quality parameters:
jpayne@69 103 pacbio=f Use a PacBio error model, rather than Illumina
jpayne@69 104 error model, and add PacBio errors after mutations.
jpayne@69 105 pbmin=0.13 Minimum rate of PacBio errors for a read.
jpayne@69 106 pbmax=0.17 Maximum rate of PacBio errors for a read.
jpayne@69 107
jpayne@69 108 Other Parameters:
jpayne@69 109 overlap=1 Require reads to overlap scaffold end by at least this much.
jpayne@69 110 banns=f Do not generate reads over reference Ns.
jpayne@69 111 metagenome=f Assign scaffolds a random exponential coverage level,
jpayne@69 112 to simulate a metagenomic or RNA coverage distribution.
jpayne@69 113 randomscaffold=f Choose random scaffolds without respect to length.
jpayne@69 114 amp=1 Simulate highly-amplified MDA single-cell data by
jpayne@69 115 setting this to a higher number like 1000.
jpayne@69 116 replacenoref=f Replace intra- and inter-scaffold Ns with random bases.
jpayne@69 117 pbadapter= Add adapter sequence to some reads using this literal string.
jpayne@69 118 fragadapter= Add this sequence to paired reads with insert size
jpayne@69 119 shorter than read length.
jpayne@69 120 fragadapter2= Use this sequence for read 2.
jpayne@69 121
jpayne@69 122 Java Parameters:
jpayne@69 123 -Xmx This will set Java's memory usage, overriding the
jpayne@69 124 program's automatic memory detection.
jpayne@69 125 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify
jpayne@69 126 200 megs.
jpayne@69 127 The max is typically 85% of physical memory.
jpayne@69 128 -eoom This flag will cause the process to exit if an out-of-memory
jpayne@69 129 exception occurs. Requires Java 8u92+.
jpayne@69 130 -da Disable assertions.
jpayne@69 131 "
jpayne@69 132 }
jpayne@69 133
jpayne@69 134 #This block allows symlinked shellscripts to correctly set classpath.
jpayne@69 135 pushd . > /dev/null
jpayne@69 136 DIR="${BASH_SOURCE[0]}"
jpayne@69 137 while [ -h "$DIR" ]; do
jpayne@69 138 cd "$(dirname "$DIR")"
jpayne@69 139 DIR="$(readlink "$(basename "$DIR")")"
jpayne@69 140 done
jpayne@69 141 cd "$(dirname "$DIR")"
jpayne@69 142 DIR="$(pwd)/"
jpayne@69 143 popd > /dev/null
jpayne@69 144
jpayne@69 145 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
jpayne@69 146 CP="$DIR""current/"
jpayne@69 147
jpayne@69 148 z="-Xmx1g"
jpayne@69 149 z2="-Xms1g"
jpayne@69 150 set=0
jpayne@69 151
jpayne@69 152 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
jpayne@69 153 usage
jpayne@69 154 exit
jpayne@69 155 fi
jpayne@69 156
jpayne@69 157 calcXmx () {
jpayne@69 158 source "$DIR""/calcmem.sh"
jpayne@69 159 setEnvironment
jpayne@69 160 parseXmx "$@"
jpayne@69 161 if [[ $set == 1 ]]; then
jpayne@69 162 return
jpayne@69 163 fi
jpayne@69 164 freeRam 3200m 84
jpayne@69 165 z="-Xmx${RAM}m"
jpayne@69 166 z2="-Xms${RAM}m"
jpayne@69 167 }
jpayne@69 168 calcXmx "$@"
jpayne@69 169
jpayne@69 170 randomreads() {
jpayne@69 171 local CMD="java $EA $EOOM $z -cp $CP align2.RandomReads3 build=1 $@"
jpayne@69 172 echo $CMD >&2
jpayne@69 173 eval $CMD
jpayne@69 174 }
jpayne@69 175
jpayne@69 176 randomreads "$@"