comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/randomreads.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/bin/bash
2
3 usage(){
4 echo "
5 Written by Brian Bushnell
6 Last modified April 1, 2019
7
8 Description: Generates random synthetic reads from a reference genome. Read names indicate their genomic origin.
9 Allows precise customization of things like insert size and synthetic mutation type, sizes, and rates.
10 Read names generated by this program are used by MakeRocCure (samtoroc.sh) and GradeSamFile (gradesam.sh).
11 They can also be used by BBMap (bbmap.sh) and BBMerge (bbmerge.sh) to automatically calculate
12 true and false positive rates, if the flag 'parsecustom' is used.
13
14 Usage: randomreads.sh ref=<file> out=<file> length=<number> reads=<number>
15
16 Basic parameters:
17 out=null Output file. If reads are paired and a single file name is
18 given, output will be interleaved. For paired reads in twin
19 files, set out1= and out2=
20 ref=null Reference file. Not needed if the reference is already indexed.
21 build=1 If multiple references are indexed in the same directory,
22 each needs a unique build ID.
23 midpad=300 Specifies space between scaffolds in packed index.
24 reads=0 Generate this many reads (or pairs).
25 coverage=-1 If positive, generate enough reads to hit this coverage
26 target, based on the genome size.
27 overwrite=t Set to false to disallow overwriting of existing files.
28 replacenoref=f Set to true to replace Ns in the reference sequence
29 with random letters.
30 simplenames=f Set to true to generate read names that clearly indicate
31 genomic origin, without BBMap internal coordinates.
32 illuminanames=f Set to true to have matching names for paired reads,
33 rather than naming by location.
34 renamebyinsert=f Insert the insert size into the name.
35 addpairnum=f Set true to add ' 1:' and ' 2:' to the end of read names.
36 addslash=f Set true to add '/1' and '/2' to the end of read names.
37 spaceslash=f Set true to add a space before slash read pairnum.
38 prefix=null Generated reads will start with this prefix,
39 rather than naming by location.
40 seed=0 Use this to set the random number generator seed;
41 use -1 for a random seed.
42
43 Length Parameters - normally only minlength and maxlength are needed.
44 minlength=150 Generate reads of up to this length.
45 maxlength=150 Generate reads of at least this length.
46 gaussianlength=f Use a gaussian length distribution (for PacBio).
47 Otherwise, the distribution is linear.
48 midlength=-1 Gaussian curve peaks at this point. Must be between
49 minlength and maxlength, in Gaussian mode.
50 readlengthsd=-1 Standard deviation of the Gaussian curve. Note that the
51 final curve is a sum of multiple curves, but this will affect
52 overall curve width. By default this is set to 1/4 of range.
53
54 Pairing parameters:
55 paired=f Set to true for paired reads.
56 mininsert= Controls minimum insert length. Default depends on read length.
57 maxinsert= Controls maximum insert length. Default depends on read length.
58 triangle=f Make a triangular insert size distribution.
59 flat=f Make a roughly flat insert size distribution..
60 superflat=f Make a perfectly flat insert size distribution.
61 gaussian=t Make a bell-shaped insert size distribution, with
62 standard deviation of (maxinsert-mininsert)/6.
63 samestrand=f Generate paired reads on the same strand.
64
65 Mutation parameters:
66 snprate=0 Add snps to reads with this probability (0-1).
67 insrate=0 Add insertions to reads with this probability (0-1).
68 delrate=0 Add deletions to reads with this probability (0-1).
69 subrate=0 Add contiguous substitutions to reads with this probability (0-1).
70 nrate=0 Add nocalls to reads with this probability (0-1).
71
72 Note: With a 'rate' of X, each read has an X chance of getting at least
73 1 mutation, X^2 chance of 2+ mutations, X^3 chance of 3+ mutations,
74 and so forth up to the maximum allowed number of mutations of that type.
75
76 maxsnps=3 Add at most this many snps per read.
77 maxinss=2 Add at most this many deletions per read.
78 maxdels=2 Add at most this many insertions per read.
79 maxsubs=2 Add at most this many contiguous substitutions per read.
80 maxns=0 Add at most this many blocks of Ns per read.
81
82 maxinslen=12 Max length of insertions.
83 maxdellen=400 Max length of deletions.
84 maxsublen=12 Max length of contiguous substitutions.
85 maxnlen=1 Min length of N blocks.
86
87 mininslen=1 Min length of insertions.
88 mindellen=1 Min length of deletions.
89 minsublen=2 Min length of contiguous substitutions.
90 minnlen=1 Min length of N blocks.
91
92 Illumina quality parameters:
93 maxq=36 Upper bound of quality values.
94 midq=28 Approximate average of quality values.
95 minq=20 Lower bound of quality values.
96 q= Sets maxq, midq, and minq to the same value.
97 adderrors=t Add substitution errors based on quality values,
98 after mutations.
99 qv=4 Vary the base quality of reads by up to this much
100 to simulate tile effects.
101
102 PacBio quality parameters:
103 pacbio=f Use a PacBio error model, rather than Illumina
104 error model, and add PacBio errors after mutations.
105 pbmin=0.13 Minimum rate of PacBio errors for a read.
106 pbmax=0.17 Maximum rate of PacBio errors for a read.
107
108 Other Parameters:
109 overlap=1 Require reads to overlap scaffold end by at least this much.
110 banns=f Do not generate reads over reference Ns.
111 metagenome=f Assign scaffolds a random exponential coverage level,
112 to simulate a metagenomic or RNA coverage distribution.
113 randomscaffold=f Choose random scaffolds without respect to length.
114 amp=1 Simulate highly-amplified MDA single-cell data by
115 setting this to a higher number like 1000.
116 replacenoref=f Replace intra- and inter-scaffold Ns with random bases.
117 pbadapter= Add adapter sequence to some reads using this literal string.
118 fragadapter= Add this sequence to paired reads with insert size
119 shorter than read length.
120 fragadapter2= Use this sequence for read 2.
121
122 Java Parameters:
123 -Xmx This will set Java's memory usage, overriding the
124 program's automatic memory detection.
125 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify
126 200 megs.
127 The max is typically 85% of physical memory.
128 -eoom This flag will cause the process to exit if an out-of-memory
129 exception occurs. Requires Java 8u92+.
130 -da Disable assertions.
131 "
132 }
133
134 #This block allows symlinked shellscripts to correctly set classpath.
135 pushd . > /dev/null
136 DIR="${BASH_SOURCE[0]}"
137 while [ -h "$DIR" ]; do
138 cd "$(dirname "$DIR")"
139 DIR="$(readlink "$(basename "$DIR")")"
140 done
141 cd "$(dirname "$DIR")"
142 DIR="$(pwd)/"
143 popd > /dev/null
144
145 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
146 CP="$DIR""current/"
147
148 z="-Xmx1g"
149 z2="-Xms1g"
150 set=0
151
152 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
153 usage
154 exit
155 fi
156
157 calcXmx () {
158 source "$DIR""/calcmem.sh"
159 setEnvironment
160 parseXmx "$@"
161 if [[ $set == 1 ]]; then
162 return
163 fi
164 freeRam 3200m 84
165 z="-Xmx${RAM}m"
166 z2="-Xms${RAM}m"
167 }
168 calcXmx "$@"
169
170 randomreads() {
171 local CMD="java $EA $EOOM $z -cp $CP align2.RandomReads3 build=1 $@"
172 echo $CMD >&2
173 eval $CMD
174 }
175
176 randomreads "$@"