jpayne@68
|
1 package icecream;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Arrays;
|
jpayne@68
|
6 import java.util.Random;
|
jpayne@68
|
7 import java.util.concurrent.atomic.AtomicLong;
|
jpayne@68
|
8
|
jpayne@68
|
9 import dna.AminoAcid;
|
jpayne@68
|
10 import fileIO.ByteFile;
|
jpayne@68
|
11 import fileIO.ByteStreamWriter;
|
jpayne@68
|
12 import fileIO.FileFormat;
|
jpayne@68
|
13 import fileIO.ReadWrite;
|
jpayne@68
|
14 import shared.Parse;
|
jpayne@68
|
15 import shared.Parser;
|
jpayne@68
|
16 import shared.PreParser;
|
jpayne@68
|
17 import shared.ReadStats;
|
jpayne@68
|
18 import shared.Shared;
|
jpayne@68
|
19 import shared.Timer;
|
jpayne@68
|
20 import shared.Tools;
|
jpayne@68
|
21 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
22 import stream.ConcurrentReadOutputStream;
|
jpayne@68
|
23 import stream.FASTQ;
|
jpayne@68
|
24 import stream.FastaReadInputStream;
|
jpayne@68
|
25 import stream.Read;
|
jpayne@68
|
26 import stream.SamLine;
|
jpayne@68
|
27 import structures.ByteBuilder;
|
jpayne@68
|
28 import structures.IntList;
|
jpayne@68
|
29 import structures.ListNum;
|
jpayne@68
|
30
|
jpayne@68
|
31 /**
|
jpayne@68
|
32 * Generates chimeric PacBio reads containing inverted repeats
|
jpayne@68
|
33 * due to missing adapters.
|
jpayne@68
|
34 *
|
jpayne@68
|
35 * @author Brian Bushnell
|
jpayne@68
|
36 * @date June 8, 2019
|
jpayne@68
|
37 *
|
jpayne@68
|
38 */
|
jpayne@68
|
39 public class IceCreamMaker {
|
jpayne@68
|
40
|
jpayne@68
|
41 /*--------------------------------------------------------------*/
|
jpayne@68
|
42 /*---------------- Initialization ----------------*/
|
jpayne@68
|
43 /*--------------------------------------------------------------*/
|
jpayne@68
|
44
|
jpayne@68
|
45 /**
|
jpayne@68
|
46 * Code entrance from the command line.
|
jpayne@68
|
47 * @param args Command line arguments
|
jpayne@68
|
48 */
|
jpayne@68
|
49 public static void main(String[] args){
|
jpayne@68
|
50 //Start a timer immediately upon code entrance.
|
jpayne@68
|
51 Timer t=new Timer();
|
jpayne@68
|
52
|
jpayne@68
|
53 //Create an instance of this class
|
jpayne@68
|
54 IceCreamMaker x=new IceCreamMaker(args);
|
jpayne@68
|
55
|
jpayne@68
|
56 //Run the object
|
jpayne@68
|
57 x.process(t);
|
jpayne@68
|
58
|
jpayne@68
|
59 //Close the print stream if it was redirected
|
jpayne@68
|
60 Shared.closeStream(x.outstream);
|
jpayne@68
|
61 }
|
jpayne@68
|
62
|
jpayne@68
|
63 /**
|
jpayne@68
|
64 * Constructor.
|
jpayne@68
|
65 * @param args Command line arguments
|
jpayne@68
|
66 */
|
jpayne@68
|
67 public IceCreamMaker(String[] args){
|
jpayne@68
|
68
|
jpayne@68
|
69 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
70 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
71 args=pp.args;
|
jpayne@68
|
72 outstream=pp.outstream;
|
jpayne@68
|
73 }
|
jpayne@68
|
74
|
jpayne@68
|
75 //Set shared static variables prior to parsing
|
jpayne@68
|
76 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
77 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
78 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
|
jpayne@68
|
79 Shared.FAKE_QUAL=8;
|
jpayne@68
|
80 // Shared.FASTA_WRAP=511;
|
jpayne@68
|
81 SamLine.SET_FROM_OK=true;
|
jpayne@68
|
82
|
jpayne@68
|
83 {//Parse the arguments
|
jpayne@68
|
84 final Parser parser=parse(args);
|
jpayne@68
|
85 Parser.processQuality();
|
jpayne@68
|
86
|
jpayne@68
|
87 maxReads=parser.maxReads;
|
jpayne@68
|
88 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
89 append=ReadStats.append=parser.append;
|
jpayne@68
|
90
|
jpayne@68
|
91 in1=parser.in1;
|
jpayne@68
|
92 extin=parser.extin;
|
jpayne@68
|
93
|
jpayne@68
|
94 out1=parser.out1;
|
jpayne@68
|
95 qfout1=parser.qfout1;
|
jpayne@68
|
96 extout=parser.extout;
|
jpayne@68
|
97 }
|
jpayne@68
|
98
|
jpayne@68
|
99 validateParams();
|
jpayne@68
|
100 fixExtensions(); //Add or remove .gz or .bz2 as needed
|
jpayne@68
|
101 checkFileExistence(); //Ensure files can be read and written
|
jpayne@68
|
102 checkStatics(); //Adjust file-related static fields as needed for this program
|
jpayne@68
|
103
|
jpayne@68
|
104 //Create output FileFormat objects
|
jpayne@68
|
105 ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, false);
|
jpayne@68
|
106
|
jpayne@68
|
107 //Create input FileFormat objects
|
jpayne@68
|
108 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
|
jpayne@68
|
109
|
jpayne@68
|
110 ffIdHist=FileFormat.testOutput(outIdHist, FileFormat.TXT, null, false, overwrite, append, false);
|
jpayne@68
|
111
|
jpayne@68
|
112 insThresh=insFraction;
|
jpayne@68
|
113 delThresh=delFraction+insThresh;
|
jpayne@68
|
114 }
|
jpayne@68
|
115
|
jpayne@68
|
116 /*--------------------------------------------------------------*/
|
jpayne@68
|
117 /*---------------- Initialization Helpers ----------------*/
|
jpayne@68
|
118 /*--------------------------------------------------------------*/
|
jpayne@68
|
119
|
jpayne@68
|
120 /** Parse arguments from the command line */
|
jpayne@68
|
121 private Parser parse(String[] args){
|
jpayne@68
|
122
|
jpayne@68
|
123 //Create a parser object
|
jpayne@68
|
124 Parser parser=new Parser();
|
jpayne@68
|
125
|
jpayne@68
|
126 //Set any necessary Parser defaults here
|
jpayne@68
|
127 //parser.foo=bar;
|
jpayne@68
|
128
|
jpayne@68
|
129 //Parse each argument
|
jpayne@68
|
130 for(int i=0; i<args.length; i++){
|
jpayne@68
|
131 String arg=args[i];
|
jpayne@68
|
132
|
jpayne@68
|
133 //Break arguments into their constituent parts, in the form of "a=b"
|
jpayne@68
|
134 String[] split=arg.split("=");
|
jpayne@68
|
135 String a=split[0].toLowerCase();
|
jpayne@68
|
136 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
137 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
|
jpayne@68
|
138
|
jpayne@68
|
139 if(a.equals("verbose")){
|
jpayne@68
|
140 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
141 }
|
jpayne@68
|
142
|
jpayne@68
|
143 else if(a.equals("idhist")){
|
jpayne@68
|
144 outIdHist=b;
|
jpayne@68
|
145 }
|
jpayne@68
|
146
|
jpayne@68
|
147 else if(a.equals("minlength") || a.equals("minlen")){
|
jpayne@68
|
148 minMoleculeLength=Parse.parseIntKMG(b);
|
jpayne@68
|
149 }else if(a.equals("maxlength") || a.equals("maxlen")){
|
jpayne@68
|
150 maxMoleculeLength=Parse.parseIntKMG(b);
|
jpayne@68
|
151 }else if(a.equals("length") || a.equals("len")){
|
jpayne@68
|
152 minMoleculeLength=maxMoleculeLength=Parse.parseIntKMG(b);
|
jpayne@68
|
153 }
|
jpayne@68
|
154
|
jpayne@68
|
155 else if(a.equals("minmovie") || a.equals("minmov")){
|
jpayne@68
|
156 minMovie=Parse.parseIntKMG(b);
|
jpayne@68
|
157 }else if(a.equals("maxmovie") || a.equals("maxmov")){
|
jpayne@68
|
158 maxMovie=Parse.parseIntKMG(b);
|
jpayne@68
|
159 }else if(a.equals("movie") || a.equals("mov")){
|
jpayne@68
|
160 minMovie=maxMovie=Parse.parseIntKMG(b);
|
jpayne@68
|
161 }
|
jpayne@68
|
162
|
jpayne@68
|
163 else if(a.equals("missingrate") || a.equals("missing")){
|
jpayne@68
|
164 missingRate=Double.parseDouble(b);
|
jpayne@68
|
165 assert(missingRate<=1);
|
jpayne@68
|
166 }else if(a.equals("hiddenrate") || a.equals("hidden")){
|
jpayne@68
|
167 hiddenRate=Double.parseDouble(b);
|
jpayne@68
|
168 assert(hiddenRate<=1);
|
jpayne@68
|
169 }else if(a.equals("bothends")){
|
jpayne@68
|
170 allowBothEndsBad=Parse.parseBoolean(b);
|
jpayne@68
|
171 assert(false) : "TODO";
|
jpayne@68
|
172 }
|
jpayne@68
|
173
|
jpayne@68
|
174 else if(a.equals("gc")){
|
jpayne@68
|
175 genomeGC=(float)Double.parseDouble(b);
|
jpayne@68
|
176 assert(genomeGC>=0 && genomeGC<=1);
|
jpayne@68
|
177 }else if(a.equals("genomesize")){
|
jpayne@68
|
178 genomeSize=Parse.parseKMG(b);
|
jpayne@68
|
179 }else if(a.equals("addns") || a.equals("ns")){
|
jpayne@68
|
180 addNs=Parse.parseBoolean(b);
|
jpayne@68
|
181 }else if(a.equals("seed")){
|
jpayne@68
|
182 seed=Long.parseLong(b);
|
jpayne@68
|
183 }else if(a.equals("zmws") || a.equals("maxzmws") || a.equals("reads")){
|
jpayne@68
|
184 maxZMWs=Parse.parseIntKMG(b);
|
jpayne@68
|
185 }else if(a.equals("ccs")){
|
jpayne@68
|
186 makeCCS=Parse.parseBoolean(b);
|
jpayne@68
|
187 }
|
jpayne@68
|
188
|
jpayne@68
|
189 else if(a.equals("invertedrepeatrate") || a.equals("invertrepeatrate") || a.equals("irrate")){
|
jpayne@68
|
190 invertedRepeatRate=Double.parseDouble(b);
|
jpayne@68
|
191 assert(invertedRepeatRate>=0);
|
jpayne@68
|
192 }else if(a.equals("invertedrepeatminlen") || a.equals("invertrepeatminlen") || a.equals("irminlen")){
|
jpayne@68
|
193 invertedRepeatMinLength=Parse.parseIntKMG(b);
|
jpayne@68
|
194 }else if(a.equals("invertedrepeatmaxlen") || a.equals("invertrepeatmaxlen") || a.equals("irmaxlen")){
|
jpayne@68
|
195 invertedRepeatMaxLength=Parse.parseIntKMG(b);
|
jpayne@68
|
196 }else if(a.equals("invertedrepeatlen") || a.equals("invertrepeatlen") || a.equals("irlen")){
|
jpayne@68
|
197 invertedRepeatMinLength=invertedRepeatMaxLength=Parse.parseIntKMG(b);
|
jpayne@68
|
198 }
|
jpayne@68
|
199
|
jpayne@68
|
200 else if(a.equals("miner") || a.equals("minerrorrate")){
|
jpayne@68
|
201 minErrorRate=(float)Double.parseDouble(b);
|
jpayne@68
|
202 assert(minErrorRate>=0 && minErrorRate<=1);
|
jpayne@68
|
203 }else if(a.equals("maxer") || a.equals("maxerrorrate")){
|
jpayne@68
|
204 maxErrorRate=(float)Double.parseDouble(b);
|
jpayne@68
|
205 assert(maxErrorRate>=0 && maxErrorRate<=1);
|
jpayne@68
|
206 }else if(a.equals("er") || a.equals("errorrate")){
|
jpayne@68
|
207 minErrorRate=maxErrorRate=(float)Double.parseDouble(b);
|
jpayne@68
|
208 assert(minErrorRate>=0 && minErrorRate<=1);
|
jpayne@68
|
209 }
|
jpayne@68
|
210
|
jpayne@68
|
211 else if(a.equals("minid") || a.equals("minidentity")){
|
jpayne@68
|
212 maxErrorRate=1-(float)Double.parseDouble(b);
|
jpayne@68
|
213 assert(maxErrorRate>=0 && maxErrorRate<=1);
|
jpayne@68
|
214 }else if(a.equals("maxid") || a.equals("maxidentity")){
|
jpayne@68
|
215 minErrorRate=1-(float)Double.parseDouble(b);
|
jpayne@68
|
216 assert(minErrorRate>=0 && minErrorRate<=1);
|
jpayne@68
|
217 }else if(a.equals("id") || a.equals("identity")){
|
jpayne@68
|
218 minErrorRate=maxErrorRate=1-(float)Double.parseDouble(b);
|
jpayne@68
|
219 assert(minErrorRate>=0 && minErrorRate<=1);
|
jpayne@68
|
220 }else if(a.equals("adderrors")){
|
jpayne@68
|
221 addErrors=Parse.parseBoolean(b);
|
jpayne@68
|
222 }else if(a.equals("ref")){
|
jpayne@68
|
223 parser.in1=b;
|
jpayne@68
|
224 }
|
jpayne@68
|
225
|
jpayne@68
|
226 else if(a.equals("parse_flag_goes_here")){
|
jpayne@68
|
227 long fake_variable=Parse.parseKMG(b);
|
jpayne@68
|
228 //Set a variable here
|
jpayne@68
|
229 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
|
jpayne@68
|
230 //do nothing
|
jpayne@68
|
231 }else{
|
jpayne@68
|
232 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
233 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
234 }
|
jpayne@68
|
235 }
|
jpayne@68
|
236
|
jpayne@68
|
237 return parser;
|
jpayne@68
|
238 }
|
jpayne@68
|
239
|
jpayne@68
|
240 /** Add or remove .gz or .bz2 as needed */
|
jpayne@68
|
241 private void fixExtensions(){
|
jpayne@68
|
242 in1=Tools.fixExtension(in1);
|
jpayne@68
|
243 }
|
jpayne@68
|
244
|
jpayne@68
|
245 /** Ensure files can be read and written */
|
jpayne@68
|
246 private void checkFileExistence(){
|
jpayne@68
|
247 //Ensure output files can be written
|
jpayne@68
|
248 if(!Tools.testOutputFiles(overwrite, append, false, out1, outIdHist)){
|
jpayne@68
|
249 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
|
jpayne@68
|
250 }
|
jpayne@68
|
251
|
jpayne@68
|
252 //Ensure input files can be read
|
jpayne@68
|
253 if(!Tools.testInputFiles(false, true, in1)){
|
jpayne@68
|
254 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
255 }
|
jpayne@68
|
256
|
jpayne@68
|
257 //Ensure that no file was specified multiple times
|
jpayne@68
|
258 if(!Tools.testForDuplicateFiles(true, in1, out1, outIdHist)){
|
jpayne@68
|
259 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
260 }
|
jpayne@68
|
261 }
|
jpayne@68
|
262
|
jpayne@68
|
263 /** Adjust file-related static fields as needed for this program */
|
jpayne@68
|
264 private static void checkStatics(){
|
jpayne@68
|
265 //Adjust the number of threads for input file reading
|
jpayne@68
|
266 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
267 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
268 }
|
jpayne@68
|
269
|
jpayne@68
|
270 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
271 }
|
jpayne@68
|
272
|
jpayne@68
|
273 /** Ensure parameter ranges are within bounds and required parameters are set */
|
jpayne@68
|
274 private boolean validateParams(){
|
jpayne@68
|
275 assert(minMoleculeLength>0 && minMoleculeLength<=maxMoleculeLength) : minMoleculeLength+", "+maxMoleculeLength;
|
jpayne@68
|
276 assert(minMovie>0 && minMovie<=maxMovie) : minMovie+", "+maxMovie;
|
jpayne@68
|
277
|
jpayne@68
|
278 assert(missingRate>=0 && missingRate<=1) : missingRate;
|
jpayne@68
|
279 assert(hiddenRate>=0 && hiddenRate<=1) : hiddenRate;
|
jpayne@68
|
280 assert(genomeGC>=0 && genomeGC<=1) : genomeGC;
|
jpayne@68
|
281 assert(in1!=null || genomeSize>maxMoleculeLength) : genomeSize;
|
jpayne@68
|
282 assert(in1!=null || invertedRepeatMaxLength*2<genomeSize) : genomeSize;
|
jpayne@68
|
283 assert(maxZMWs>0) : "zmsw="+maxZMWs+"; please set to a positive number.";
|
jpayne@68
|
284
|
jpayne@68
|
285 assert(invertedRepeatRate>=0) : invertedRepeatRate;
|
jpayne@68
|
286 assert(invertedRepeatMinLength>0 && invertedRepeatMinLength<=invertedRepeatMaxLength) : invertedRepeatMinLength+", "+invertedRepeatMaxLength;
|
jpayne@68
|
287
|
jpayne@68
|
288 assert(minErrorRate>=0 && minErrorRate<=maxErrorRate) : minErrorRate+", "+maxErrorRate;
|
jpayne@68
|
289 return true;
|
jpayne@68
|
290 }
|
jpayne@68
|
291
|
jpayne@68
|
292 /*--------------------------------------------------------------*/
|
jpayne@68
|
293 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
294 /*--------------------------------------------------------------*/
|
jpayne@68
|
295
|
jpayne@68
|
296 /** Create read streams and process all data */
|
jpayne@68
|
297 void process(Timer t){
|
jpayne@68
|
298
|
jpayne@68
|
299 //Turn off read validation in the input threads to increase speed
|
jpayne@68
|
300 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
|
jpayne@68
|
301 Read.VALIDATE_IN_CONSTRUCTOR=true;
|
jpayne@68
|
302
|
jpayne@68
|
303 //Create a read input stream
|
jpayne@68
|
304 final ConcurrentReadInputStream cris=makeCris();
|
jpayne@68
|
305
|
jpayne@68
|
306 //Optionally create a read output stream
|
jpayne@68
|
307 final ConcurrentReadOutputStream ros=makeCros(false);
|
jpayne@68
|
308
|
jpayne@68
|
309 //Reset counters
|
jpayne@68
|
310 readsProcessed=readsOut=0;
|
jpayne@68
|
311 basesProcessed=basesOut=0;
|
jpayne@68
|
312
|
jpayne@68
|
313 Random randy=Shared.threadLocalRandom(seed);
|
jpayne@68
|
314 if(cris==null){
|
jpayne@68
|
315 ref=genSynthGenome(randy);
|
jpayne@68
|
316 }else{
|
jpayne@68
|
317 ref=loadData(cris, randy);
|
jpayne@68
|
318 }
|
jpayne@68
|
319
|
jpayne@68
|
320 if(invertedRepeatRate>0){
|
jpayne@68
|
321 addInvertedRepeats(ref, randy);
|
jpayne@68
|
322 }
|
jpayne@68
|
323
|
jpayne@68
|
324 //Process the reads in separate threads
|
jpayne@68
|
325 spawnThreads(cris, ros);
|
jpayne@68
|
326
|
jpayne@68
|
327 if(verbose){outstream.println("Finished; closing streams.");}
|
jpayne@68
|
328
|
jpayne@68
|
329 //Write anything that was accumulated by ReadStats
|
jpayne@68
|
330 errorState|=ReadStats.writeAll();
|
jpayne@68
|
331 //Close the read streams
|
jpayne@68
|
332 errorState|=ReadWrite.closeStreams(cris, ros);
|
jpayne@68
|
333
|
jpayne@68
|
334 writeIdHist();
|
jpayne@68
|
335
|
jpayne@68
|
336 //Reset read validation
|
jpayne@68
|
337 Read.VALIDATE_IN_CONSTRUCTOR=vic;
|
jpayne@68
|
338
|
jpayne@68
|
339 //Report timing and results
|
jpayne@68
|
340 t.stop();
|
jpayne@68
|
341 outstream.println(Tools.timeReadsBasesProcessed(t, readsOut, basesOut, 8));
|
jpayne@68
|
342 // outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
|
jpayne@68
|
343
|
jpayne@68
|
344 //Throw an exception of there was an error in a thread
|
jpayne@68
|
345 if(errorState){
|
jpayne@68
|
346 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
347 }
|
jpayne@68
|
348 }
|
jpayne@68
|
349
|
jpayne@68
|
350 private void writeIdHist(){
|
jpayne@68
|
351 if(ffIdHist==null){return;}
|
jpayne@68
|
352 ByteStreamWriter bsw=new ByteStreamWriter(ffIdHist);
|
jpayne@68
|
353 bsw.start();
|
jpayne@68
|
354 bsw.print("#Identity\tCount\n".getBytes());
|
jpayne@68
|
355 for(int i=0; i<idHist.length; i++){
|
jpayne@68
|
356 bsw.print(i*100.0/(ID_BINS-1), 3).print('\t').println(idHist[i]);
|
jpayne@68
|
357 }
|
jpayne@68
|
358 errorState|=bsw.poisonAndWait();
|
jpayne@68
|
359 }
|
jpayne@68
|
360
|
jpayne@68
|
361 /** Create a Read Input Stream */
|
jpayne@68
|
362 private ConcurrentReadInputStream makeCris(){
|
jpayne@68
|
363 if(ffin1==null){return null;}
|
jpayne@68
|
364 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null);
|
jpayne@68
|
365 cris.start(); //Start the stream
|
jpayne@68
|
366 if(verbose){outstream.println("Started cris");}
|
jpayne@68
|
367 return cris;
|
jpayne@68
|
368 }
|
jpayne@68
|
369
|
jpayne@68
|
370 /** Create a Read Output Stream */
|
jpayne@68
|
371 private ConcurrentReadOutputStream makeCros(boolean pairedInput){
|
jpayne@68
|
372 if(ffout1==null){return null;}
|
jpayne@68
|
373
|
jpayne@68
|
374 //Set output buffer size
|
jpayne@68
|
375 final int buff=4;
|
jpayne@68
|
376
|
jpayne@68
|
377 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(
|
jpayne@68
|
378 ffout1, null, qfout1, null, buff, null, false);
|
jpayne@68
|
379 ros.start(); //Start the stream
|
jpayne@68
|
380 return ros;
|
jpayne@68
|
381 }
|
jpayne@68
|
382
|
jpayne@68
|
383 /** Spawn process threads */
|
jpayne@68
|
384 private void spawnThreads(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros){
|
jpayne@68
|
385
|
jpayne@68
|
386 //Do anything necessary prior to processing
|
jpayne@68
|
387
|
jpayne@68
|
388 //Determine how many threads may be used
|
jpayne@68
|
389 final int threads=Shared.threads();
|
jpayne@68
|
390
|
jpayne@68
|
391 //Fill a list with ProcessThreads
|
jpayne@68
|
392 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
|
jpayne@68
|
393 for(int i=0; i<threads; i++){
|
jpayne@68
|
394 alpt.add(new ProcessThread(ros, i, nextZmwID, seed));
|
jpayne@68
|
395 }
|
jpayne@68
|
396
|
jpayne@68
|
397 //Start the threads
|
jpayne@68
|
398 for(ProcessThread pt : alpt){
|
jpayne@68
|
399 pt.start();
|
jpayne@68
|
400 }
|
jpayne@68
|
401
|
jpayne@68
|
402 //Wait for threads to finish
|
jpayne@68
|
403 waitForThreads(alpt);
|
jpayne@68
|
404
|
jpayne@68
|
405 //Do anything necessary after processing
|
jpayne@68
|
406
|
jpayne@68
|
407 }
|
jpayne@68
|
408
|
jpayne@68
|
409 /** Wait until all worker threads are finished, then return */
|
jpayne@68
|
410 private void waitForThreads(ArrayList<ProcessThread> alpt){
|
jpayne@68
|
411
|
jpayne@68
|
412 //Wait for completion of all threads
|
jpayne@68
|
413 boolean success=true;
|
jpayne@68
|
414 for(ProcessThread pt : alpt){
|
jpayne@68
|
415
|
jpayne@68
|
416 //Wait until this thread has terminated
|
jpayne@68
|
417 while(pt.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
418 try {
|
jpayne@68
|
419 //Attempt a join operation
|
jpayne@68
|
420 pt.join();
|
jpayne@68
|
421 } catch (InterruptedException e) {
|
jpayne@68
|
422 //Potentially handle this, if it is expected to occur
|
jpayne@68
|
423 e.printStackTrace();
|
jpayne@68
|
424 }
|
jpayne@68
|
425 }
|
jpayne@68
|
426
|
jpayne@68
|
427 //Accumulate per-thread statistics
|
jpayne@68
|
428 readsOut+=pt.readsOutT;
|
jpayne@68
|
429 basesOut+=pt.basesOutT;
|
jpayne@68
|
430 Tools.add(idHist, pt.idHistT);
|
jpayne@68
|
431
|
jpayne@68
|
432 success&=pt.success;
|
jpayne@68
|
433 }
|
jpayne@68
|
434
|
jpayne@68
|
435 //Track whether any threads failed
|
jpayne@68
|
436 if(!success){errorState=true;}
|
jpayne@68
|
437 }
|
jpayne@68
|
438
|
jpayne@68
|
439 /*--------------------------------------------------------------*/
|
jpayne@68
|
440 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
441 /*--------------------------------------------------------------*/
|
jpayne@68
|
442
|
jpayne@68
|
443 private byte randomBase(Random randy) {
|
jpayne@68
|
444 float rGC=randy.nextFloat();
|
jpayne@68
|
445 if(rGC>=genomeGC){//AT
|
jpayne@68
|
446 return (byte)(randy.nextBoolean() ? 'A' : 'T');
|
jpayne@68
|
447 }else{//GC
|
jpayne@68
|
448 return (byte)(randy.nextBoolean() ? 'G' : 'C');
|
jpayne@68
|
449 }
|
jpayne@68
|
450 }
|
jpayne@68
|
451
|
jpayne@68
|
452 private static int randomLength(int min, int max, Random randy) {
|
jpayne@68
|
453 if(min==max){return min;}
|
jpayne@68
|
454 int range=max-min+1;
|
jpayne@68
|
455 int x=min+Tools.min(randy.nextInt(range), randy.nextInt(range));
|
jpayne@68
|
456 // System.err.println(x+", "+min+", "+max);
|
jpayne@68
|
457 // new Exception().printStackTrace();
|
jpayne@68
|
458 // System.err.println(randy.getClass());
|
jpayne@68
|
459 // System.err.println(randy.nextLong());
|
jpayne@68
|
460 return x;
|
jpayne@68
|
461 }
|
jpayne@68
|
462
|
jpayne@68
|
463 private static float randomRate(float min, float max, Random randy) {
|
jpayne@68
|
464 if(min==max){return min;}
|
jpayne@68
|
465 float range=max-min;
|
jpayne@68
|
466 // float a=(randy.nextFloat()+randy.nextFloat());
|
jpayne@68
|
467 float b=(randy.nextFloat()+randy.nextFloat());
|
jpayne@68
|
468 float c=(1.6f*randy.nextFloat()+0.4f*randy.nextFloat());
|
jpayne@68
|
469 float x=min+range*0.5f*Tools.min(b, c);
|
jpayne@68
|
470 // assert(false) : "x="+x+", a="+a+", b="+b+", c="+c+", min="+min+", max="+max;
|
jpayne@68
|
471 // System.err.println(x+", "+min+", "+max);
|
jpayne@68
|
472 return x;
|
jpayne@68
|
473 }
|
jpayne@68
|
474
|
jpayne@68
|
475 private byte[] genSynthGenome(Random randy){
|
jpayne@68
|
476 assert(genomeSize<=MAX_GENOME_LENGTH) : genomeSize;
|
jpayne@68
|
477 byte[] array=new byte[(int)genomeSize];
|
jpayne@68
|
478 for(int i=0; i<genomeSize; i++){
|
jpayne@68
|
479 array[i]=randomBase(randy);
|
jpayne@68
|
480 }
|
jpayne@68
|
481 return array;
|
jpayne@68
|
482 }
|
jpayne@68
|
483
|
jpayne@68
|
484 private byte[] loadData(ConcurrentReadInputStream cris, Random randy){
|
jpayne@68
|
485
|
jpayne@68
|
486 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
487
|
jpayne@68
|
488 //Grab the first ListNum of reads
|
jpayne@68
|
489 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
490
|
jpayne@68
|
491 //As long as there is a nonempty read list...
|
jpayne@68
|
492 while(ln!=null && ln.size()>0){
|
jpayne@68
|
493 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
|
jpayne@68
|
494
|
jpayne@68
|
495 for(Read r : ln){
|
jpayne@68
|
496
|
jpayne@68
|
497 // System.err.println("Fetched read len="+r.length());//123
|
jpayne@68
|
498
|
jpayne@68
|
499 //Increment counters
|
jpayne@68
|
500 readsProcessed+=r.pairCount();
|
jpayne@68
|
501 basesProcessed+=r.pairLength();
|
jpayne@68
|
502
|
jpayne@68
|
503
|
jpayne@68
|
504
|
jpayne@68
|
505 // System.err.println("Filter: addNs="+addNs+", (r.length()>maxMoleculeLength && (invertedRepeatRate<=0 || r.length()>2*invertedRepeatMaxLength);//123
|
jpayne@68
|
506
|
jpayne@68
|
507 //Filter disabled; it causes short sequences to be ignored.
|
jpayne@68
|
508 //Optional filter criteria
|
jpayne@68
|
509 // if(!addNs || (r.length()>maxMoleculeLength && (invertedRepeatRate<=0 || r.length()>2*invertedRepeatMaxLength))){
|
jpayne@68
|
510 final byte[] bases=r.bases;
|
jpayne@68
|
511
|
jpayne@68
|
512 if(addNs){
|
jpayne@68
|
513 if(bb.length()>0){bb.append('N');}
|
jpayne@68
|
514 }else{
|
jpayne@68
|
515 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
516 if(!AminoAcid.isFullyDefined(bases[i])){
|
jpayne@68
|
517 bases[i]=randomBase(randy);
|
jpayne@68
|
518 }
|
jpayne@68
|
519 }
|
jpayne@68
|
520 }
|
jpayne@68
|
521
|
jpayne@68
|
522 if(bb.length()+bases.length<=MAX_GENOME_LENGTH){
|
jpayne@68
|
523 bb.append(bases);
|
jpayne@68
|
524 // System.err.println("Appended "+r.length());//123
|
jpayne@68
|
525 }else{
|
jpayne@68
|
526 // System.err.println("Appending partial.");//123
|
jpayne@68
|
527 for(byte b : bases){
|
jpayne@68
|
528 bb.append(b);
|
jpayne@68
|
529 if(bb.length>=MAX_GENOME_LENGTH){
|
jpayne@68
|
530 cris.returnList(ln.id, true);
|
jpayne@68
|
531 // System.err.println("Returning partial "+bb.length);//123
|
jpayne@68
|
532 return bb.toBytes();
|
jpayne@68
|
533 }
|
jpayne@68
|
534 }
|
jpayne@68
|
535 }
|
jpayne@68
|
536 // }
|
jpayne@68
|
537 }
|
jpayne@68
|
538
|
jpayne@68
|
539 //Notify the input stream that the list was used
|
jpayne@68
|
540 cris.returnList(ln);
|
jpayne@68
|
541 // if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access
|
jpayne@68
|
542
|
jpayne@68
|
543 //Fetch a new list
|
jpayne@68
|
544 ln=cris.nextList();
|
jpayne@68
|
545 }
|
jpayne@68
|
546
|
jpayne@68
|
547 //Notify the input stream that the final list was used
|
jpayne@68
|
548 if(ln!=null){
|
jpayne@68
|
549 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
|
jpayne@68
|
550 }
|
jpayne@68
|
551
|
jpayne@68
|
552 // System.err.println("Returning full "+bb.length);//123
|
jpayne@68
|
553 return bb.toBytes();
|
jpayne@68
|
554 }
|
jpayne@68
|
555
|
jpayne@68
|
556 private void addInvertedRepeats(byte[] bases, Random randy){
|
jpayne@68
|
557
|
jpayne@68
|
558 long added=0;
|
jpayne@68
|
559 long toAdd=(long) (bases.length*invertedRepeatRate);
|
jpayne@68
|
560 while(added<toAdd){
|
jpayne@68
|
561 int len=randomLength(invertedRepeatMinLength, invertedRepeatMaxLength, randy);
|
jpayne@68
|
562 int start=randy.nextInt(bases.length-2*len);
|
jpayne@68
|
563 int stop=start+len;
|
jpayne@68
|
564 boolean OK=true;
|
jpayne@68
|
565 for(int i=0; i<len && OK; i++){
|
jpayne@68
|
566 OK&=(bases[start+i]!='N' && bases[stop+i]!='N');
|
jpayne@68
|
567 }
|
jpayne@68
|
568 if(OK){
|
jpayne@68
|
569 for(int i=0; i<len; i++){
|
jpayne@68
|
570 byte b=bases[stop-i-1];
|
jpayne@68
|
571 bases[stop+i]=AminoAcid.baseToComplementExtended[b];
|
jpayne@68
|
572 }
|
jpayne@68
|
573 added+=2*len;
|
jpayne@68
|
574 // System.err.println("added="+added+"/"+toAdd);
|
jpayne@68
|
575 }else{
|
jpayne@68
|
576 //
|
jpayne@68
|
577 }
|
jpayne@68
|
578 }
|
jpayne@68
|
579
|
jpayne@68
|
580 }
|
jpayne@68
|
581
|
jpayne@68
|
582 /*--------------------------------------------------------------*/
|
jpayne@68
|
583 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
584 /*--------------------------------------------------------------*/
|
jpayne@68
|
585
|
jpayne@68
|
586 /** */
|
jpayne@68
|
587 private class ProcessThread extends Thread {
|
jpayne@68
|
588
|
jpayne@68
|
589 //Constructor
|
jpayne@68
|
590 ProcessThread(final ConcurrentReadOutputStream ros_, final int tid_,
|
jpayne@68
|
591 final AtomicLong nextZmwID_, final long seed){
|
jpayne@68
|
592 ros=ros_;
|
jpayne@68
|
593 tid=tid_;
|
jpayne@68
|
594 atomicZmwID=nextZmwID_;
|
jpayne@68
|
595 // assert(false) : randy.getClass()+", "+randy.nextLong();
|
jpayne@68
|
596 }
|
jpayne@68
|
597
|
jpayne@68
|
598 //Called by start()
|
jpayne@68
|
599 @Override
|
jpayne@68
|
600 public void run(){
|
jpayne@68
|
601 //Do anything necessary prior to processing
|
jpayne@68
|
602 randy=Shared.threadLocalRandom(seed<0 ? seed : seed+(tid+1)*tid*1000L);
|
jpayne@68
|
603
|
jpayne@68
|
604 //Process the reads
|
jpayne@68
|
605 processInner();
|
jpayne@68
|
606
|
jpayne@68
|
607 //Do anything necessary after processing
|
jpayne@68
|
608
|
jpayne@68
|
609 //Indicate successful exit status
|
jpayne@68
|
610 success=true;
|
jpayne@68
|
611 }
|
jpayne@68
|
612
|
jpayne@68
|
613 /** Iterate through the reads */
|
jpayne@68
|
614 void processInner(){
|
jpayne@68
|
615
|
jpayne@68
|
616 //As long as there is a nonempty read list...
|
jpayne@68
|
617 for(long generated=atomicZmwID.getAndAdd(readsPerList); generated<maxZMWs;
|
jpayne@68
|
618 generated=atomicZmwID.getAndAdd(readsPerList)){
|
jpayne@68
|
619 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
|
jpayne@68
|
620
|
jpayne@68
|
621 long toGenerate=Tools.min(readsPerList, maxZMWs-generated);
|
jpayne@68
|
622
|
jpayne@68
|
623 ArrayList<Read> reads=generateList((int)toGenerate, generated);
|
jpayne@68
|
624
|
jpayne@68
|
625 //Output reads to the output stream
|
jpayne@68
|
626 if(ros!=null){ros.add(reads, 0);}
|
jpayne@68
|
627 }
|
jpayne@68
|
628 }
|
jpayne@68
|
629
|
jpayne@68
|
630 /** Generate the next list of reads */
|
jpayne@68
|
631 private ArrayList<Read> generateList(int toGenerate, long nextID){
|
jpayne@68
|
632
|
jpayne@68
|
633 //Grab the actual read list from the ListNum
|
jpayne@68
|
634 final ArrayList<Read> reads=new ArrayList<Read>(toGenerate);
|
jpayne@68
|
635
|
jpayne@68
|
636 //Loop through each read in the list
|
jpayne@68
|
637 for(int i=0; i<toGenerate; i++, nextID++){
|
jpayne@68
|
638 ArrayList<Read> zmw=generateZMW(nextID);
|
jpayne@68
|
639 if(zmw==null){
|
jpayne@68
|
640 i--;
|
jpayne@68
|
641 nextID--;
|
jpayne@68
|
642 }else{reads.addAll(zmw);}
|
jpayne@68
|
643 }
|
jpayne@68
|
644
|
jpayne@68
|
645 return reads;
|
jpayne@68
|
646 }
|
jpayne@68
|
647
|
jpayne@68
|
648 private ReadBuilder median(ArrayList<ReadBuilder> list){
|
jpayne@68
|
649 if(list.size()<3){return null;}
|
jpayne@68
|
650 IntList lengths=new IntList(list.size()-2);
|
jpayne@68
|
651
|
jpayne@68
|
652 for(int i=1; i<list.size()-1; i++){
|
jpayne@68
|
653 lengths.add(list.get(i).length());
|
jpayne@68
|
654 }
|
jpayne@68
|
655 lengths.sort();
|
jpayne@68
|
656 int median=lengths.get((lengths.size-1)/2);
|
jpayne@68
|
657
|
jpayne@68
|
658 for(int i=1; i<list.size()-1; i++){
|
jpayne@68
|
659 ReadBuilder rb=list.get(i);
|
jpayne@68
|
660 if(rb.length()==median){
|
jpayne@68
|
661 return rb;
|
jpayne@68
|
662 }
|
jpayne@68
|
663 }
|
jpayne@68
|
664
|
jpayne@68
|
665 assert(false);
|
jpayne@68
|
666 return null;
|
jpayne@68
|
667 }
|
jpayne@68
|
668
|
jpayne@68
|
669 /**
|
jpayne@68
|
670 * Generate a single read.
|
jpayne@68
|
671 */
|
jpayne@68
|
672 private ArrayList<Read> generateZMW(final long nextID){
|
jpayne@68
|
673
|
jpayne@68
|
674 final int movieLength=randomLength(minMovie, maxMovie, randy);
|
jpayne@68
|
675 final float errorRate=randomRate(minErrorRate, maxErrorRate, randy);
|
jpayne@68
|
676
|
jpayne@68
|
677 ArrayList<ReadBuilder> baseCalls=baseCallAllPasses(movieLength, errorRate, nextID);
|
jpayne@68
|
678 if(baseCalls==null){
|
jpayne@68
|
679 // System.err.println(movieLength+", "+errorRate+", "+nextID);//123
|
jpayne@68
|
680 return null;
|
jpayne@68
|
681 }
|
jpayne@68
|
682
|
jpayne@68
|
683 final boolean missing=randy.nextFloat()<missingRate;
|
jpayne@68
|
684 if(missing){
|
jpayne@68
|
685 final int missingMod=randy.nextInt(2);
|
jpayne@68
|
686 ArrayList<ReadBuilder> temp=new ArrayList<ReadBuilder>();
|
jpayne@68
|
687
|
jpayne@68
|
688 ReadBuilder current=null;
|
jpayne@68
|
689 for(int i=0; i<baseCalls.size(); i++){
|
jpayne@68
|
690 ReadBuilder rb=baseCalls.get(i);
|
jpayne@68
|
691 assert(rb.subreads==1);
|
jpayne@68
|
692 assert(rb.missing==0);
|
jpayne@68
|
693 assert(rb.adapters==0);
|
jpayne@68
|
694 if(current!=null && (i&1)==missingMod){
|
jpayne@68
|
695 current.add(rb);
|
jpayne@68
|
696 current.missing++;
|
jpayne@68
|
697 assert(current.subreads>1);
|
jpayne@68
|
698 assert(current.missing>0);
|
jpayne@68
|
699 temp.add(current);
|
jpayne@68
|
700 current=null;
|
jpayne@68
|
701 }else{
|
jpayne@68
|
702 if(current!=null){temp.add(current);}
|
jpayne@68
|
703 current=rb;
|
jpayne@68
|
704 }
|
jpayne@68
|
705 }
|
jpayne@68
|
706 if(current!=null){temp.add(current);}
|
jpayne@68
|
707 baseCalls=temp;
|
jpayne@68
|
708 }
|
jpayne@68
|
709
|
jpayne@68
|
710 if(makeCCS){
|
jpayne@68
|
711 ReadBuilder median=median(baseCalls);
|
jpayne@68
|
712 if(median==null){return null;}
|
jpayne@68
|
713 baseCalls.clear();
|
jpayne@68
|
714 baseCalls.add(median);
|
jpayne@68
|
715 }else if(hiddenRate>0){
|
jpayne@68
|
716 ArrayList<ReadBuilder> temp=new ArrayList<ReadBuilder>();
|
jpayne@68
|
717
|
jpayne@68
|
718 ReadBuilder current=null;
|
jpayne@68
|
719 for(int i=0; i<baseCalls.size(); i++){
|
jpayne@68
|
720 ReadBuilder rb=baseCalls.get(i);
|
jpayne@68
|
721 assert(rb.adapters==0);
|
jpayne@68
|
722 if(current!=null && randy.nextFloat()<hiddenRate){
|
jpayne@68
|
723 current.add(baseCallAdapter(errorRate));
|
jpayne@68
|
724 current.add(rb);
|
jpayne@68
|
725 assert(current.adapters>0);
|
jpayne@68
|
726 assert(current.subreads>1);
|
jpayne@68
|
727 }else{
|
jpayne@68
|
728 if(current!=null){temp.add(current);}
|
jpayne@68
|
729 current=rb;
|
jpayne@68
|
730 assert(current.adapters==0);
|
jpayne@68
|
731 }
|
jpayne@68
|
732 }
|
jpayne@68
|
733 if(current!=null){temp.add(current);}
|
jpayne@68
|
734 baseCalls=temp;
|
jpayne@68
|
735 }
|
jpayne@68
|
736
|
jpayne@68
|
737
|
jpayne@68
|
738 ArrayList<Read> reads=new ArrayList<Read>();
|
jpayne@68
|
739 for(ReadBuilder rb : baseCalls){
|
jpayne@68
|
740 Read r=rb.toRead();
|
jpayne@68
|
741 readsOutT+=r.pairCount();
|
jpayne@68
|
742 basesOutT+=r.length();
|
jpayne@68
|
743 reads.add(r);
|
jpayne@68
|
744 }
|
jpayne@68
|
745
|
jpayne@68
|
746 idHistT[(int)((1-errorRate)*(ID_BINS-1)+0.5f)]++;
|
jpayne@68
|
747 return reads;
|
jpayne@68
|
748 }
|
jpayne@68
|
749
|
jpayne@68
|
750 private ArrayList<ReadBuilder> baseCallAllPasses(final int movieLength, final float errorRate, long zmw){
|
jpayne@68
|
751 byte[] frag=null;
|
jpayne@68
|
752
|
jpayne@68
|
753 for(int i=0; i<10 && frag==null; i++){//retry several times
|
jpayne@68
|
754 frag=fetchBases(ref);
|
jpayne@68
|
755 }
|
jpayne@68
|
756
|
jpayne@68
|
757 if(frag==null){
|
jpayne@68
|
758 // System.err.println("Failed baseCallAllPasses("+movieLength+", "+errorRate+", "+zmw);//123
|
jpayne@68
|
759 return null;
|
jpayne@68
|
760 }
|
jpayne@68
|
761
|
jpayne@68
|
762 ArrayList<ReadBuilder> list=new ArrayList<ReadBuilder>();
|
jpayne@68
|
763 int movieRemaining=movieLength;
|
jpayne@68
|
764 int moviePos=0;
|
jpayne@68
|
765 assert(frag.length>0) : frag.length;
|
jpayne@68
|
766 int start=randy.nextInt(frag.length);
|
jpayne@68
|
767 while(movieRemaining>0){
|
jpayne@68
|
768 ReadBuilder rb=baseCallOnePass(frag, errorRate, start, moviePos, movieRemaining, zmw);
|
jpayne@68
|
769 list.add(rb);
|
jpayne@68
|
770 start=0;
|
jpayne@68
|
771 int elapsed=rb.length()+adapterLen;
|
jpayne@68
|
772 moviePos+=elapsed;
|
jpayne@68
|
773 movieRemaining-=elapsed;
|
jpayne@68
|
774 AminoAcid.reverseComplementBasesInPlace(frag);
|
jpayne@68
|
775 }
|
jpayne@68
|
776 return list;
|
jpayne@68
|
777 }
|
jpayne@68
|
778
|
jpayne@68
|
779 /** Call bases for one pass */
|
jpayne@68
|
780 private ReadBuilder baseCallOnePass(final byte[] frag, final float errorRate, final int start, final int movieStart, int movieRemaining, long zmw){
|
jpayne@68
|
781 final float mult=1/(1-errorRate);
|
jpayne@68
|
782 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
783 int fpos=start;
|
jpayne@68
|
784 for(; fpos<frag.length && movieRemaining>0; fpos++){
|
jpayne@68
|
785 float f=randy.nextFloat();
|
jpayne@68
|
786 byte b=frag[fpos];
|
jpayne@68
|
787 if(f>=errorRate){
|
jpayne@68
|
788 bb.append(b);
|
jpayne@68
|
789 movieRemaining--;
|
jpayne@68
|
790 }else{
|
jpayne@68
|
791 f=mult*(1-f);
|
jpayne@68
|
792 if(f<insThresh){//ins
|
jpayne@68
|
793 b=AminoAcid.numberToBase[randy.nextInt(4)];
|
jpayne@68
|
794 bb.append(b);
|
jpayne@68
|
795 fpos--;
|
jpayne@68
|
796 movieRemaining--;
|
jpayne@68
|
797 }else if(f<delThresh){//del
|
jpayne@68
|
798
|
jpayne@68
|
799 }else{//sub
|
jpayne@68
|
800 int x=AminoAcid.baseToNumber[b]+randy.nextInt(3)+1;
|
jpayne@68
|
801 b=AminoAcid.numberToBase[x&3];
|
jpayne@68
|
802 bb.append(b);
|
jpayne@68
|
803 movieRemaining--;
|
jpayne@68
|
804 }
|
jpayne@68
|
805 }
|
jpayne@68
|
806 }
|
jpayne@68
|
807
|
jpayne@68
|
808 float passes=(fpos-start)*1.0f/frag.length;
|
jpayne@68
|
809 ReadBuilder rb=new ReadBuilder(bb, passes, movieStart, zmw);
|
jpayne@68
|
810 rb.errorRate=errorRate;
|
jpayne@68
|
811 return rb;
|
jpayne@68
|
812 }
|
jpayne@68
|
813
|
jpayne@68
|
814 /** Call bases for an adapter sequence pass */
|
jpayne@68
|
815 private ReadBuilder baseCallAdapter(final float errorRate){
|
jpayne@68
|
816 ReadBuilder rb=baseCallOnePass(pacbioAdapter, errorRate, 0, 0, 999999, 0);
|
jpayne@68
|
817 rb.passes=0;
|
jpayne@68
|
818 rb.fullPasses=0;
|
jpayne@68
|
819 rb.subreads=0;
|
jpayne@68
|
820 rb.adapters=1;
|
jpayne@68
|
821 return rb;
|
jpayne@68
|
822 }
|
jpayne@68
|
823
|
jpayne@68
|
824 private byte[] fetchBases(byte[] source){
|
jpayne@68
|
825
|
jpayne@68
|
826 final int len=randomLength(minMoleculeLength, maxMoleculeLength, randy);
|
jpayne@68
|
827 int start=len>=source.length ? 0 : randy.nextInt(source.length-len);
|
jpayne@68
|
828 int stop=Tools.min(start+len, source.length);
|
jpayne@68
|
829
|
jpayne@68
|
830 // System.err.println("fetchBases(len="+len+", slen="+source.length+", start="+start+", stop="+stop);//123
|
jpayne@68
|
831
|
jpayne@68
|
832 for(int i=start; i<stop; i++){
|
jpayne@68
|
833 if(!AminoAcid.isFullyDefined(source[i])){
|
jpayne@68
|
834 return null;
|
jpayne@68
|
835 }
|
jpayne@68
|
836 }
|
jpayne@68
|
837 if(stop-start<1){return null;}
|
jpayne@68
|
838 byte[] frag=Arrays.copyOfRange(source, start, stop);
|
jpayne@68
|
839 if(randy.nextBoolean()){AminoAcid.reverseComplementBasesInPlace(frag);}
|
jpayne@68
|
840 return frag;
|
jpayne@68
|
841 }
|
jpayne@68
|
842
|
jpayne@68
|
843 /** Number of reads retained by this thread */
|
jpayne@68
|
844 protected long readsOutT=0;
|
jpayne@68
|
845 /** Number of bases retained by this thread */
|
jpayne@68
|
846 protected long basesOutT=0;
|
jpayne@68
|
847
|
jpayne@68
|
848 /** True only if this thread has completed successfully */
|
jpayne@68
|
849 boolean success=false;
|
jpayne@68
|
850
|
jpayne@68
|
851 private final AtomicLong atomicZmwID;
|
jpayne@68
|
852 private final int readsPerList=Shared.bufferLen();
|
jpayne@68
|
853
|
jpayne@68
|
854 /** Random number source */
|
jpayne@68
|
855 private Random randy;
|
jpayne@68
|
856
|
jpayne@68
|
857 /** Random number source */
|
jpayne@68
|
858 private final long[] idHistT=new long[ID_BINS];
|
jpayne@68
|
859
|
jpayne@68
|
860 /** Shared output stream */
|
jpayne@68
|
861 private final ConcurrentReadOutputStream ros;
|
jpayne@68
|
862 /** Thread ID */
|
jpayne@68
|
863 final int tid;
|
jpayne@68
|
864 }
|
jpayne@68
|
865
|
jpayne@68
|
866 /*--------------------------------------------------------------*/
|
jpayne@68
|
867
|
jpayne@68
|
868
|
jpayne@68
|
869
|
jpayne@68
|
870 /*--------------------------------------------------------------*/
|
jpayne@68
|
871 /*---------------- Fields ----------------*/
|
jpayne@68
|
872 /*--------------------------------------------------------------*/
|
jpayne@68
|
873
|
jpayne@68
|
874 /** Primary input file path */
|
jpayne@68
|
875 private String in1=null;
|
jpayne@68
|
876
|
jpayne@68
|
877 /** Primary output file path */
|
jpayne@68
|
878 private String out1=null;
|
jpayne@68
|
879
|
jpayne@68
|
880 /** Primary output file path */
|
jpayne@68
|
881 private String outIdHist=null;
|
jpayne@68
|
882
|
jpayne@68
|
883 private String qfout1=null;
|
jpayne@68
|
884
|
jpayne@68
|
885 /** Override input file extension */
|
jpayne@68
|
886 private String extin=null;
|
jpayne@68
|
887 /** Override output file extension */
|
jpayne@68
|
888 private String extout=null;
|
jpayne@68
|
889
|
jpayne@68
|
890 /*--------------------------------------------------------------*/
|
jpayne@68
|
891
|
jpayne@68
|
892 /** */
|
jpayne@68
|
893 private int minMoleculeLength=500;
|
jpayne@68
|
894 /** */
|
jpayne@68
|
895 private int maxMoleculeLength=10000;
|
jpayne@68
|
896 /** */
|
jpayne@68
|
897 private int minMovie=500;
|
jpayne@68
|
898 /** */
|
jpayne@68
|
899 private int maxMovie=40000;
|
jpayne@68
|
900
|
jpayne@68
|
901 /** */
|
jpayne@68
|
902 private double missingRate=0.0;
|
jpayne@68
|
903 /** */
|
jpayne@68
|
904 private double hiddenRate=0.0;
|
jpayne@68
|
905 /** */
|
jpayne@68
|
906 private boolean allowBothEndsBad=false;
|
jpayne@68
|
907
|
jpayne@68
|
908 /** */
|
jpayne@68
|
909 private float genomeGC=0.6f;
|
jpayne@68
|
910 /** */
|
jpayne@68
|
911 private long genomeSize=10000000;
|
jpayne@68
|
912 /** */
|
jpayne@68
|
913 private boolean addNs=true;
|
jpayne@68
|
914
|
jpayne@68
|
915 /** */
|
jpayne@68
|
916 private double invertedRepeatRate=0.0;
|
jpayne@68
|
917 /** */
|
jpayne@68
|
918 private int invertedRepeatMinLength=100;
|
jpayne@68
|
919 /** */
|
jpayne@68
|
920 private int invertedRepeatMaxLength=10000;
|
jpayne@68
|
921
|
jpayne@68
|
922 /** */
|
jpayne@68
|
923 private float minErrorRate=0.05f;
|
jpayne@68
|
924 /** */
|
jpayne@68
|
925 private float maxErrorRate=0.25f;
|
jpayne@68
|
926 /** */
|
jpayne@68
|
927 private boolean addErrors=true;
|
jpayne@68
|
928 /** One read per ZMW */
|
jpayne@68
|
929 private boolean makeCCS=false;
|
jpayne@68
|
930
|
jpayne@68
|
931 /** */
|
jpayne@68
|
932 private long seed=-1;
|
jpayne@68
|
933
|
jpayne@68
|
934 private long[] idHist=new long[ID_BINS];
|
jpayne@68
|
935
|
jpayne@68
|
936 //These should add to 1
|
jpayne@68
|
937 private float insFraction=0.40f;
|
jpayne@68
|
938 private float delFraction=0.35f;
|
jpayne@68
|
939 private float subFraction=0.25f;
|
jpayne@68
|
940
|
jpayne@68
|
941 private final float insThresh;
|
jpayne@68
|
942 private final float delThresh;
|
jpayne@68
|
943
|
jpayne@68
|
944 /*--------------------------------------------------------------*/
|
jpayne@68
|
945
|
jpayne@68
|
946 /** Number of reads processed */
|
jpayne@68
|
947 protected long readsProcessed=0;
|
jpayne@68
|
948 /** Number of bases processed */
|
jpayne@68
|
949 protected long basesProcessed=0;
|
jpayne@68
|
950
|
jpayne@68
|
951 /** Number of reads retained */
|
jpayne@68
|
952 protected long readsOut=0;
|
jpayne@68
|
953 /** Number of bases retained */
|
jpayne@68
|
954 protected long basesOut=0;
|
jpayne@68
|
955
|
jpayne@68
|
956 /** Quit after processing this many INPUT reads */
|
jpayne@68
|
957 private long maxReads=-1;
|
jpayne@68
|
958
|
jpayne@68
|
959 /** Quit after generating this many OUTPUT zmws */
|
jpayne@68
|
960 private long maxZMWs=-1;
|
jpayne@68
|
961
|
jpayne@68
|
962 /** Reference genome, max 2Gbp */
|
jpayne@68
|
963 private byte[] ref;
|
jpayne@68
|
964
|
jpayne@68
|
965 private AtomicLong nextZmwID=new AtomicLong(0);
|
jpayne@68
|
966
|
jpayne@68
|
967 /*--------------------------------------------------------------*/
|
jpayne@68
|
968 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
969 /*--------------------------------------------------------------*/
|
jpayne@68
|
970
|
jpayne@68
|
971 /** Primary input file */
|
jpayne@68
|
972 private final FileFormat ffin1;
|
jpayne@68
|
973
|
jpayne@68
|
974 /** Primary output file */
|
jpayne@68
|
975 private final FileFormat ffout1;
|
jpayne@68
|
976
|
jpayne@68
|
977 /** Primary output file */
|
jpayne@68
|
978 private final FileFormat ffIdHist;
|
jpayne@68
|
979
|
jpayne@68
|
980 /*--------------------------------------------------------------*/
|
jpayne@68
|
981 /*---------------- Constants ----------------*/
|
jpayne@68
|
982 /*--------------------------------------------------------------*/
|
jpayne@68
|
983
|
jpayne@68
|
984 private static final int ID_BINS=201;
|
jpayne@68
|
985
|
jpayne@68
|
986 private static final long MAX_GENOME_LENGTH=2000000000;
|
jpayne@68
|
987
|
jpayne@68
|
988 public static final byte[] pacbioAdapter="ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT".getBytes();
|
jpayne@68
|
989 public static final int adapterLen=pacbioAdapter.length;
|
jpayne@68
|
990
|
jpayne@68
|
991 /*--------------------------------------------------------------*/
|
jpayne@68
|
992 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
993 /*--------------------------------------------------------------*/
|
jpayne@68
|
994
|
jpayne@68
|
995 /** Print status messages to this output stream */
|
jpayne@68
|
996 private PrintStream outstream=System.err;
|
jpayne@68
|
997 /** Print verbose messages */
|
jpayne@68
|
998 public static boolean verbose=false;
|
jpayne@68
|
999 /** True if an error was encountered */
|
jpayne@68
|
1000 public boolean errorState=false;
|
jpayne@68
|
1001 /** Overwrite existing output files */
|
jpayne@68
|
1002 private boolean overwrite=false;
|
jpayne@68
|
1003 /** Append to existing output files */
|
jpayne@68
|
1004 private boolean append=false;
|
jpayne@68
|
1005
|
jpayne@68
|
1006 }
|