Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/randomreads.sh @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/bin/bash | |
2 | |
3 usage(){ | |
4 echo " | |
5 Written by Brian Bushnell | |
6 Last modified April 1, 2019 | |
7 | |
8 Description: Generates random synthetic reads from a reference genome. Read names indicate their genomic origin. | |
9 Allows precise customization of things like insert size and synthetic mutation type, sizes, and rates. | |
10 Read names generated by this program are used by MakeRocCure (samtoroc.sh) and GradeSamFile (gradesam.sh). | |
11 They can also be used by BBMap (bbmap.sh) and BBMerge (bbmerge.sh) to automatically calculate | |
12 true and false positive rates, if the flag 'parsecustom' is used. | |
13 | |
14 Usage: randomreads.sh ref=<file> out=<file> length=<number> reads=<number> | |
15 | |
16 Basic parameters: | |
17 out=null Output file. If reads are paired and a single file name is | |
18 given, output will be interleaved. For paired reads in twin | |
19 files, set out1= and out2= | |
20 ref=null Reference file. Not needed if the reference is already indexed. | |
21 build=1 If multiple references are indexed in the same directory, | |
22 each needs a unique build ID. | |
23 midpad=300 Specifies space between scaffolds in packed index. | |
24 reads=0 Generate this many reads (or pairs). | |
25 coverage=-1 If positive, generate enough reads to hit this coverage | |
26 target, based on the genome size. | |
27 overwrite=t Set to false to disallow overwriting of existing files. | |
28 replacenoref=f Set to true to replace Ns in the reference sequence | |
29 with random letters. | |
30 simplenames=f Set to true to generate read names that clearly indicate | |
31 genomic origin, without BBMap internal coordinates. | |
32 illuminanames=f Set to true to have matching names for paired reads, | |
33 rather than naming by location. | |
34 renamebyinsert=f Insert the insert size into the name. | |
35 addpairnum=f Set true to add ' 1:' and ' 2:' to the end of read names. | |
36 addslash=f Set true to add '/1' and '/2' to the end of read names. | |
37 spaceslash=f Set true to add a space before slash read pairnum. | |
38 prefix=null Generated reads will start with this prefix, | |
39 rather than naming by location. | |
40 seed=0 Use this to set the random number generator seed; | |
41 use -1 for a random seed. | |
42 | |
43 Length Parameters - normally only minlength and maxlength are needed. | |
44 minlength=150 Generate reads of up to this length. | |
45 maxlength=150 Generate reads of at least this length. | |
46 gaussianlength=f Use a gaussian length distribution (for PacBio). | |
47 Otherwise, the distribution is linear. | |
48 midlength=-1 Gaussian curve peaks at this point. Must be between | |
49 minlength and maxlength, in Gaussian mode. | |
50 readlengthsd=-1 Standard deviation of the Gaussian curve. Note that the | |
51 final curve is a sum of multiple curves, but this will affect | |
52 overall curve width. By default this is set to 1/4 of range. | |
53 | |
54 Pairing parameters: | |
55 paired=f Set to true for paired reads. | |
56 mininsert= Controls minimum insert length. Default depends on read length. | |
57 maxinsert= Controls maximum insert length. Default depends on read length. | |
58 triangle=f Make a triangular insert size distribution. | |
59 flat=f Make a roughly flat insert size distribution.. | |
60 superflat=f Make a perfectly flat insert size distribution. | |
61 gaussian=t Make a bell-shaped insert size distribution, with | |
62 standard deviation of (maxinsert-mininsert)/6. | |
63 samestrand=f Generate paired reads on the same strand. | |
64 | |
65 Mutation parameters: | |
66 snprate=0 Add snps to reads with this probability (0-1). | |
67 insrate=0 Add insertions to reads with this probability (0-1). | |
68 delrate=0 Add deletions to reads with this probability (0-1). | |
69 subrate=0 Add contiguous substitutions to reads with this probability (0-1). | |
70 nrate=0 Add nocalls to reads with this probability (0-1). | |
71 | |
72 Note: With a 'rate' of X, each read has an X chance of getting at least | |
73 1 mutation, X^2 chance of 2+ mutations, X^3 chance of 3+ mutations, | |
74 and so forth up to the maximum allowed number of mutations of that type. | |
75 | |
76 maxsnps=3 Add at most this many snps per read. | |
77 maxinss=2 Add at most this many deletions per read. | |
78 maxdels=2 Add at most this many insertions per read. | |
79 maxsubs=2 Add at most this many contiguous substitutions per read. | |
80 maxns=0 Add at most this many blocks of Ns per read. | |
81 | |
82 maxinslen=12 Max length of insertions. | |
83 maxdellen=400 Max length of deletions. | |
84 maxsublen=12 Max length of contiguous substitutions. | |
85 maxnlen=1 Min length of N blocks. | |
86 | |
87 mininslen=1 Min length of insertions. | |
88 mindellen=1 Min length of deletions. | |
89 minsublen=2 Min length of contiguous substitutions. | |
90 minnlen=1 Min length of N blocks. | |
91 | |
92 Illumina quality parameters: | |
93 maxq=36 Upper bound of quality values. | |
94 midq=28 Approximate average of quality values. | |
95 minq=20 Lower bound of quality values. | |
96 q= Sets maxq, midq, and minq to the same value. | |
97 adderrors=t Add substitution errors based on quality values, | |
98 after mutations. | |
99 qv=4 Vary the base quality of reads by up to this much | |
100 to simulate tile effects. | |
101 | |
102 PacBio quality parameters: | |
103 pacbio=f Use a PacBio error model, rather than Illumina | |
104 error model, and add PacBio errors after mutations. | |
105 pbmin=0.13 Minimum rate of PacBio errors for a read. | |
106 pbmax=0.17 Maximum rate of PacBio errors for a read. | |
107 | |
108 Other Parameters: | |
109 overlap=1 Require reads to overlap scaffold end by at least this much. | |
110 banns=f Do not generate reads over reference Ns. | |
111 metagenome=f Assign scaffolds a random exponential coverage level, | |
112 to simulate a metagenomic or RNA coverage distribution. | |
113 randomscaffold=f Choose random scaffolds without respect to length. | |
114 amp=1 Simulate highly-amplified MDA single-cell data by | |
115 setting this to a higher number like 1000. | |
116 replacenoref=f Replace intra- and inter-scaffold Ns with random bases. | |
117 pbadapter= Add adapter sequence to some reads using this literal string. | |
118 fragadapter= Add this sequence to paired reads with insert size | |
119 shorter than read length. | |
120 fragadapter2= Use this sequence for read 2. | |
121 | |
122 Java Parameters: | |
123 -Xmx This will set Java's memory usage, overriding the | |
124 program's automatic memory detection. | |
125 -Xmx20g will specify 20 gigs of RAM, and -Xmx200m will specify | |
126 200 megs. | |
127 The max is typically 85% of physical memory. | |
128 -eoom This flag will cause the process to exit if an out-of-memory | |
129 exception occurs. Requires Java 8u92+. | |
130 -da Disable assertions. | |
131 " | |
132 } | |
133 | |
134 #This block allows symlinked shellscripts to correctly set classpath. | |
135 pushd . > /dev/null | |
136 DIR="${BASH_SOURCE[0]}" | |
137 while [ -h "$DIR" ]; do | |
138 cd "$(dirname "$DIR")" | |
139 DIR="$(readlink "$(basename "$DIR")")" | |
140 done | |
141 cd "$(dirname "$DIR")" | |
142 DIR="$(pwd)/" | |
143 popd > /dev/null | |
144 | |
145 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" | |
146 CP="$DIR""current/" | |
147 | |
148 z="-Xmx1g" | |
149 z2="-Xms1g" | |
150 set=0 | |
151 | |
152 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then | |
153 usage | |
154 exit | |
155 fi | |
156 | |
157 calcXmx () { | |
158 source "$DIR""/calcmem.sh" | |
159 setEnvironment | |
160 parseXmx "$@" | |
161 if [[ $set == 1 ]]; then | |
162 return | |
163 fi | |
164 freeRam 3200m 84 | |
165 z="-Xmx${RAM}m" | |
166 z2="-Xms${RAM}m" | |
167 } | |
168 calcXmx "$@" | |
169 | |
170 randomreads() { | |
171 local CMD="java $EA $EOOM $z -cp $CP align2.RandomReads3 build=1 $@" | |
172 echo $CMD >&2 | |
173 eval $CMD | |
174 } | |
175 | |
176 randomreads "$@" |