annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/icecream/IceCreamMaker.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package icecream;
jpayne@68 2
jpayne@68 3 import java.io.PrintStream;
jpayne@68 4 import java.util.ArrayList;
jpayne@68 5 import java.util.Arrays;
jpayne@68 6 import java.util.Random;
jpayne@68 7 import java.util.concurrent.atomic.AtomicLong;
jpayne@68 8
jpayne@68 9 import dna.AminoAcid;
jpayne@68 10 import fileIO.ByteFile;
jpayne@68 11 import fileIO.ByteStreamWriter;
jpayne@68 12 import fileIO.FileFormat;
jpayne@68 13 import fileIO.ReadWrite;
jpayne@68 14 import shared.Parse;
jpayne@68 15 import shared.Parser;
jpayne@68 16 import shared.PreParser;
jpayne@68 17 import shared.ReadStats;
jpayne@68 18 import shared.Shared;
jpayne@68 19 import shared.Timer;
jpayne@68 20 import shared.Tools;
jpayne@68 21 import stream.ConcurrentReadInputStream;
jpayne@68 22 import stream.ConcurrentReadOutputStream;
jpayne@68 23 import stream.FASTQ;
jpayne@68 24 import stream.FastaReadInputStream;
jpayne@68 25 import stream.Read;
jpayne@68 26 import stream.SamLine;
jpayne@68 27 import structures.ByteBuilder;
jpayne@68 28 import structures.IntList;
jpayne@68 29 import structures.ListNum;
jpayne@68 30
jpayne@68 31 /**
jpayne@68 32 * Generates chimeric PacBio reads containing inverted repeats
jpayne@68 33 * due to missing adapters.
jpayne@68 34 *
jpayne@68 35 * @author Brian Bushnell
jpayne@68 36 * @date June 8, 2019
jpayne@68 37 *
jpayne@68 38 */
jpayne@68 39 public class IceCreamMaker {
jpayne@68 40
jpayne@68 41 /*--------------------------------------------------------------*/
jpayne@68 42 /*---------------- Initialization ----------------*/
jpayne@68 43 /*--------------------------------------------------------------*/
jpayne@68 44
jpayne@68 45 /**
jpayne@68 46 * Code entrance from the command line.
jpayne@68 47 * @param args Command line arguments
jpayne@68 48 */
jpayne@68 49 public static void main(String[] args){
jpayne@68 50 //Start a timer immediately upon code entrance.
jpayne@68 51 Timer t=new Timer();
jpayne@68 52
jpayne@68 53 //Create an instance of this class
jpayne@68 54 IceCreamMaker x=new IceCreamMaker(args);
jpayne@68 55
jpayne@68 56 //Run the object
jpayne@68 57 x.process(t);
jpayne@68 58
jpayne@68 59 //Close the print stream if it was redirected
jpayne@68 60 Shared.closeStream(x.outstream);
jpayne@68 61 }
jpayne@68 62
jpayne@68 63 /**
jpayne@68 64 * Constructor.
jpayne@68 65 * @param args Command line arguments
jpayne@68 66 */
jpayne@68 67 public IceCreamMaker(String[] args){
jpayne@68 68
jpayne@68 69 {//Preparse block for help, config files, and outstream
jpayne@68 70 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 71 args=pp.args;
jpayne@68 72 outstream=pp.outstream;
jpayne@68 73 }
jpayne@68 74
jpayne@68 75 //Set shared static variables prior to parsing
jpayne@68 76 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 77 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 78 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
jpayne@68 79 Shared.FAKE_QUAL=8;
jpayne@68 80 // Shared.FASTA_WRAP=511;
jpayne@68 81 SamLine.SET_FROM_OK=true;
jpayne@68 82
jpayne@68 83 {//Parse the arguments
jpayne@68 84 final Parser parser=parse(args);
jpayne@68 85 Parser.processQuality();
jpayne@68 86
jpayne@68 87 maxReads=parser.maxReads;
jpayne@68 88 overwrite=ReadStats.overwrite=parser.overwrite;
jpayne@68 89 append=ReadStats.append=parser.append;
jpayne@68 90
jpayne@68 91 in1=parser.in1;
jpayne@68 92 extin=parser.extin;
jpayne@68 93
jpayne@68 94 out1=parser.out1;
jpayne@68 95 qfout1=parser.qfout1;
jpayne@68 96 extout=parser.extout;
jpayne@68 97 }
jpayne@68 98
jpayne@68 99 validateParams();
jpayne@68 100 fixExtensions(); //Add or remove .gz or .bz2 as needed
jpayne@68 101 checkFileExistence(); //Ensure files can be read and written
jpayne@68 102 checkStatics(); //Adjust file-related static fields as needed for this program
jpayne@68 103
jpayne@68 104 //Create output FileFormat objects
jpayne@68 105 ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, false);
jpayne@68 106
jpayne@68 107 //Create input FileFormat objects
jpayne@68 108 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
jpayne@68 109
jpayne@68 110 ffIdHist=FileFormat.testOutput(outIdHist, FileFormat.TXT, null, false, overwrite, append, false);
jpayne@68 111
jpayne@68 112 insThresh=insFraction;
jpayne@68 113 delThresh=delFraction+insThresh;
jpayne@68 114 }
jpayne@68 115
jpayne@68 116 /*--------------------------------------------------------------*/
jpayne@68 117 /*---------------- Initialization Helpers ----------------*/
jpayne@68 118 /*--------------------------------------------------------------*/
jpayne@68 119
jpayne@68 120 /** Parse arguments from the command line */
jpayne@68 121 private Parser parse(String[] args){
jpayne@68 122
jpayne@68 123 //Create a parser object
jpayne@68 124 Parser parser=new Parser();
jpayne@68 125
jpayne@68 126 //Set any necessary Parser defaults here
jpayne@68 127 //parser.foo=bar;
jpayne@68 128
jpayne@68 129 //Parse each argument
jpayne@68 130 for(int i=0; i<args.length; i++){
jpayne@68 131 String arg=args[i];
jpayne@68 132
jpayne@68 133 //Break arguments into their constituent parts, in the form of "a=b"
jpayne@68 134 String[] split=arg.split("=");
jpayne@68 135 String a=split[0].toLowerCase();
jpayne@68 136 String b=split.length>1 ? split[1] : null;
jpayne@68 137 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
jpayne@68 138
jpayne@68 139 if(a.equals("verbose")){
jpayne@68 140 verbose=Parse.parseBoolean(b);
jpayne@68 141 }
jpayne@68 142
jpayne@68 143 else if(a.equals("idhist")){
jpayne@68 144 outIdHist=b;
jpayne@68 145 }
jpayne@68 146
jpayne@68 147 else if(a.equals("minlength") || a.equals("minlen")){
jpayne@68 148 minMoleculeLength=Parse.parseIntKMG(b);
jpayne@68 149 }else if(a.equals("maxlength") || a.equals("maxlen")){
jpayne@68 150 maxMoleculeLength=Parse.parseIntKMG(b);
jpayne@68 151 }else if(a.equals("length") || a.equals("len")){
jpayne@68 152 minMoleculeLength=maxMoleculeLength=Parse.parseIntKMG(b);
jpayne@68 153 }
jpayne@68 154
jpayne@68 155 else if(a.equals("minmovie") || a.equals("minmov")){
jpayne@68 156 minMovie=Parse.parseIntKMG(b);
jpayne@68 157 }else if(a.equals("maxmovie") || a.equals("maxmov")){
jpayne@68 158 maxMovie=Parse.parseIntKMG(b);
jpayne@68 159 }else if(a.equals("movie") || a.equals("mov")){
jpayne@68 160 minMovie=maxMovie=Parse.parseIntKMG(b);
jpayne@68 161 }
jpayne@68 162
jpayne@68 163 else if(a.equals("missingrate") || a.equals("missing")){
jpayne@68 164 missingRate=Double.parseDouble(b);
jpayne@68 165 assert(missingRate<=1);
jpayne@68 166 }else if(a.equals("hiddenrate") || a.equals("hidden")){
jpayne@68 167 hiddenRate=Double.parseDouble(b);
jpayne@68 168 assert(hiddenRate<=1);
jpayne@68 169 }else if(a.equals("bothends")){
jpayne@68 170 allowBothEndsBad=Parse.parseBoolean(b);
jpayne@68 171 assert(false) : "TODO";
jpayne@68 172 }
jpayne@68 173
jpayne@68 174 else if(a.equals("gc")){
jpayne@68 175 genomeGC=(float)Double.parseDouble(b);
jpayne@68 176 assert(genomeGC>=0 && genomeGC<=1);
jpayne@68 177 }else if(a.equals("genomesize")){
jpayne@68 178 genomeSize=Parse.parseKMG(b);
jpayne@68 179 }else if(a.equals("addns") || a.equals("ns")){
jpayne@68 180 addNs=Parse.parseBoolean(b);
jpayne@68 181 }else if(a.equals("seed")){
jpayne@68 182 seed=Long.parseLong(b);
jpayne@68 183 }else if(a.equals("zmws") || a.equals("maxzmws") || a.equals("reads")){
jpayne@68 184 maxZMWs=Parse.parseIntKMG(b);
jpayne@68 185 }else if(a.equals("ccs")){
jpayne@68 186 makeCCS=Parse.parseBoolean(b);
jpayne@68 187 }
jpayne@68 188
jpayne@68 189 else if(a.equals("invertedrepeatrate") || a.equals("invertrepeatrate") || a.equals("irrate")){
jpayne@68 190 invertedRepeatRate=Double.parseDouble(b);
jpayne@68 191 assert(invertedRepeatRate>=0);
jpayne@68 192 }else if(a.equals("invertedrepeatminlen") || a.equals("invertrepeatminlen") || a.equals("irminlen")){
jpayne@68 193 invertedRepeatMinLength=Parse.parseIntKMG(b);
jpayne@68 194 }else if(a.equals("invertedrepeatmaxlen") || a.equals("invertrepeatmaxlen") || a.equals("irmaxlen")){
jpayne@68 195 invertedRepeatMaxLength=Parse.parseIntKMG(b);
jpayne@68 196 }else if(a.equals("invertedrepeatlen") || a.equals("invertrepeatlen") || a.equals("irlen")){
jpayne@68 197 invertedRepeatMinLength=invertedRepeatMaxLength=Parse.parseIntKMG(b);
jpayne@68 198 }
jpayne@68 199
jpayne@68 200 else if(a.equals("miner") || a.equals("minerrorrate")){
jpayne@68 201 minErrorRate=(float)Double.parseDouble(b);
jpayne@68 202 assert(minErrorRate>=0 && minErrorRate<=1);
jpayne@68 203 }else if(a.equals("maxer") || a.equals("maxerrorrate")){
jpayne@68 204 maxErrorRate=(float)Double.parseDouble(b);
jpayne@68 205 assert(maxErrorRate>=0 && maxErrorRate<=1);
jpayne@68 206 }else if(a.equals("er") || a.equals("errorrate")){
jpayne@68 207 minErrorRate=maxErrorRate=(float)Double.parseDouble(b);
jpayne@68 208 assert(minErrorRate>=0 && minErrorRate<=1);
jpayne@68 209 }
jpayne@68 210
jpayne@68 211 else if(a.equals("minid") || a.equals("minidentity")){
jpayne@68 212 maxErrorRate=1-(float)Double.parseDouble(b);
jpayne@68 213 assert(maxErrorRate>=0 && maxErrorRate<=1);
jpayne@68 214 }else if(a.equals("maxid") || a.equals("maxidentity")){
jpayne@68 215 minErrorRate=1-(float)Double.parseDouble(b);
jpayne@68 216 assert(minErrorRate>=0 && minErrorRate<=1);
jpayne@68 217 }else if(a.equals("id") || a.equals("identity")){
jpayne@68 218 minErrorRate=maxErrorRate=1-(float)Double.parseDouble(b);
jpayne@68 219 assert(minErrorRate>=0 && minErrorRate<=1);
jpayne@68 220 }else if(a.equals("adderrors")){
jpayne@68 221 addErrors=Parse.parseBoolean(b);
jpayne@68 222 }else if(a.equals("ref")){
jpayne@68 223 parser.in1=b;
jpayne@68 224 }
jpayne@68 225
jpayne@68 226 else if(a.equals("parse_flag_goes_here")){
jpayne@68 227 long fake_variable=Parse.parseKMG(b);
jpayne@68 228 //Set a variable here
jpayne@68 229 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
jpayne@68 230 //do nothing
jpayne@68 231 }else{
jpayne@68 232 outstream.println("Unknown parameter "+args[i]);
jpayne@68 233 assert(false) : "Unknown parameter "+args[i];
jpayne@68 234 }
jpayne@68 235 }
jpayne@68 236
jpayne@68 237 return parser;
jpayne@68 238 }
jpayne@68 239
jpayne@68 240 /** Add or remove .gz or .bz2 as needed */
jpayne@68 241 private void fixExtensions(){
jpayne@68 242 in1=Tools.fixExtension(in1);
jpayne@68 243 }
jpayne@68 244
jpayne@68 245 /** Ensure files can be read and written */
jpayne@68 246 private void checkFileExistence(){
jpayne@68 247 //Ensure output files can be written
jpayne@68 248 if(!Tools.testOutputFiles(overwrite, append, false, out1, outIdHist)){
jpayne@68 249 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
jpayne@68 250 }
jpayne@68 251
jpayne@68 252 //Ensure input files can be read
jpayne@68 253 if(!Tools.testInputFiles(false, true, in1)){
jpayne@68 254 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 255 }
jpayne@68 256
jpayne@68 257 //Ensure that no file was specified multiple times
jpayne@68 258 if(!Tools.testForDuplicateFiles(true, in1, out1, outIdHist)){
jpayne@68 259 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 260 }
jpayne@68 261 }
jpayne@68 262
jpayne@68 263 /** Adjust file-related static fields as needed for this program */
jpayne@68 264 private static void checkStatics(){
jpayne@68 265 //Adjust the number of threads for input file reading
jpayne@68 266 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 267 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 268 }
jpayne@68 269
jpayne@68 270 assert(FastaReadInputStream.settingsOK());
jpayne@68 271 }
jpayne@68 272
jpayne@68 273 /** Ensure parameter ranges are within bounds and required parameters are set */
jpayne@68 274 private boolean validateParams(){
jpayne@68 275 assert(minMoleculeLength>0 && minMoleculeLength<=maxMoleculeLength) : minMoleculeLength+", "+maxMoleculeLength;
jpayne@68 276 assert(minMovie>0 && minMovie<=maxMovie) : minMovie+", "+maxMovie;
jpayne@68 277
jpayne@68 278 assert(missingRate>=0 && missingRate<=1) : missingRate;
jpayne@68 279 assert(hiddenRate>=0 && hiddenRate<=1) : hiddenRate;
jpayne@68 280 assert(genomeGC>=0 && genomeGC<=1) : genomeGC;
jpayne@68 281 assert(in1!=null || genomeSize>maxMoleculeLength) : genomeSize;
jpayne@68 282 assert(in1!=null || invertedRepeatMaxLength*2<genomeSize) : genomeSize;
jpayne@68 283 assert(maxZMWs>0) : "zmsw="+maxZMWs+"; please set to a positive number.";
jpayne@68 284
jpayne@68 285 assert(invertedRepeatRate>=0) : invertedRepeatRate;
jpayne@68 286 assert(invertedRepeatMinLength>0 && invertedRepeatMinLength<=invertedRepeatMaxLength) : invertedRepeatMinLength+", "+invertedRepeatMaxLength;
jpayne@68 287
jpayne@68 288 assert(minErrorRate>=0 && minErrorRate<=maxErrorRate) : minErrorRate+", "+maxErrorRate;
jpayne@68 289 return true;
jpayne@68 290 }
jpayne@68 291
jpayne@68 292 /*--------------------------------------------------------------*/
jpayne@68 293 /*---------------- Outer Methods ----------------*/
jpayne@68 294 /*--------------------------------------------------------------*/
jpayne@68 295
jpayne@68 296 /** Create read streams and process all data */
jpayne@68 297 void process(Timer t){
jpayne@68 298
jpayne@68 299 //Turn off read validation in the input threads to increase speed
jpayne@68 300 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
jpayne@68 301 Read.VALIDATE_IN_CONSTRUCTOR=true;
jpayne@68 302
jpayne@68 303 //Create a read input stream
jpayne@68 304 final ConcurrentReadInputStream cris=makeCris();
jpayne@68 305
jpayne@68 306 //Optionally create a read output stream
jpayne@68 307 final ConcurrentReadOutputStream ros=makeCros(false);
jpayne@68 308
jpayne@68 309 //Reset counters
jpayne@68 310 readsProcessed=readsOut=0;
jpayne@68 311 basesProcessed=basesOut=0;
jpayne@68 312
jpayne@68 313 Random randy=Shared.threadLocalRandom(seed);
jpayne@68 314 if(cris==null){
jpayne@68 315 ref=genSynthGenome(randy);
jpayne@68 316 }else{
jpayne@68 317 ref=loadData(cris, randy);
jpayne@68 318 }
jpayne@68 319
jpayne@68 320 if(invertedRepeatRate>0){
jpayne@68 321 addInvertedRepeats(ref, randy);
jpayne@68 322 }
jpayne@68 323
jpayne@68 324 //Process the reads in separate threads
jpayne@68 325 spawnThreads(cris, ros);
jpayne@68 326
jpayne@68 327 if(verbose){outstream.println("Finished; closing streams.");}
jpayne@68 328
jpayne@68 329 //Write anything that was accumulated by ReadStats
jpayne@68 330 errorState|=ReadStats.writeAll();
jpayne@68 331 //Close the read streams
jpayne@68 332 errorState|=ReadWrite.closeStreams(cris, ros);
jpayne@68 333
jpayne@68 334 writeIdHist();
jpayne@68 335
jpayne@68 336 //Reset read validation
jpayne@68 337 Read.VALIDATE_IN_CONSTRUCTOR=vic;
jpayne@68 338
jpayne@68 339 //Report timing and results
jpayne@68 340 t.stop();
jpayne@68 341 outstream.println(Tools.timeReadsBasesProcessed(t, readsOut, basesOut, 8));
jpayne@68 342 // outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
jpayne@68 343
jpayne@68 344 //Throw an exception of there was an error in a thread
jpayne@68 345 if(errorState){
jpayne@68 346 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 347 }
jpayne@68 348 }
jpayne@68 349
jpayne@68 350 private void writeIdHist(){
jpayne@68 351 if(ffIdHist==null){return;}
jpayne@68 352 ByteStreamWriter bsw=new ByteStreamWriter(ffIdHist);
jpayne@68 353 bsw.start();
jpayne@68 354 bsw.print("#Identity\tCount\n".getBytes());
jpayne@68 355 for(int i=0; i<idHist.length; i++){
jpayne@68 356 bsw.print(i*100.0/(ID_BINS-1), 3).print('\t').println(idHist[i]);
jpayne@68 357 }
jpayne@68 358 errorState|=bsw.poisonAndWait();
jpayne@68 359 }
jpayne@68 360
jpayne@68 361 /** Create a Read Input Stream */
jpayne@68 362 private ConcurrentReadInputStream makeCris(){
jpayne@68 363 if(ffin1==null){return null;}
jpayne@68 364 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null);
jpayne@68 365 cris.start(); //Start the stream
jpayne@68 366 if(verbose){outstream.println("Started cris");}
jpayne@68 367 return cris;
jpayne@68 368 }
jpayne@68 369
jpayne@68 370 /** Create a Read Output Stream */
jpayne@68 371 private ConcurrentReadOutputStream makeCros(boolean pairedInput){
jpayne@68 372 if(ffout1==null){return null;}
jpayne@68 373
jpayne@68 374 //Set output buffer size
jpayne@68 375 final int buff=4;
jpayne@68 376
jpayne@68 377 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(
jpayne@68 378 ffout1, null, qfout1, null, buff, null, false);
jpayne@68 379 ros.start(); //Start the stream
jpayne@68 380 return ros;
jpayne@68 381 }
jpayne@68 382
jpayne@68 383 /** Spawn process threads */
jpayne@68 384 private void spawnThreads(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros){
jpayne@68 385
jpayne@68 386 //Do anything necessary prior to processing
jpayne@68 387
jpayne@68 388 //Determine how many threads may be used
jpayne@68 389 final int threads=Shared.threads();
jpayne@68 390
jpayne@68 391 //Fill a list with ProcessThreads
jpayne@68 392 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
jpayne@68 393 for(int i=0; i<threads; i++){
jpayne@68 394 alpt.add(new ProcessThread(ros, i, nextZmwID, seed));
jpayne@68 395 }
jpayne@68 396
jpayne@68 397 //Start the threads
jpayne@68 398 for(ProcessThread pt : alpt){
jpayne@68 399 pt.start();
jpayne@68 400 }
jpayne@68 401
jpayne@68 402 //Wait for threads to finish
jpayne@68 403 waitForThreads(alpt);
jpayne@68 404
jpayne@68 405 //Do anything necessary after processing
jpayne@68 406
jpayne@68 407 }
jpayne@68 408
jpayne@68 409 /** Wait until all worker threads are finished, then return */
jpayne@68 410 private void waitForThreads(ArrayList<ProcessThread> alpt){
jpayne@68 411
jpayne@68 412 //Wait for completion of all threads
jpayne@68 413 boolean success=true;
jpayne@68 414 for(ProcessThread pt : alpt){
jpayne@68 415
jpayne@68 416 //Wait until this thread has terminated
jpayne@68 417 while(pt.getState()!=Thread.State.TERMINATED){
jpayne@68 418 try {
jpayne@68 419 //Attempt a join operation
jpayne@68 420 pt.join();
jpayne@68 421 } catch (InterruptedException e) {
jpayne@68 422 //Potentially handle this, if it is expected to occur
jpayne@68 423 e.printStackTrace();
jpayne@68 424 }
jpayne@68 425 }
jpayne@68 426
jpayne@68 427 //Accumulate per-thread statistics
jpayne@68 428 readsOut+=pt.readsOutT;
jpayne@68 429 basesOut+=pt.basesOutT;
jpayne@68 430 Tools.add(idHist, pt.idHistT);
jpayne@68 431
jpayne@68 432 success&=pt.success;
jpayne@68 433 }
jpayne@68 434
jpayne@68 435 //Track whether any threads failed
jpayne@68 436 if(!success){errorState=true;}
jpayne@68 437 }
jpayne@68 438
jpayne@68 439 /*--------------------------------------------------------------*/
jpayne@68 440 /*---------------- Inner Methods ----------------*/
jpayne@68 441 /*--------------------------------------------------------------*/
jpayne@68 442
jpayne@68 443 private byte randomBase(Random randy) {
jpayne@68 444 float rGC=randy.nextFloat();
jpayne@68 445 if(rGC>=genomeGC){//AT
jpayne@68 446 return (byte)(randy.nextBoolean() ? 'A' : 'T');
jpayne@68 447 }else{//GC
jpayne@68 448 return (byte)(randy.nextBoolean() ? 'G' : 'C');
jpayne@68 449 }
jpayne@68 450 }
jpayne@68 451
jpayne@68 452 private static int randomLength(int min, int max, Random randy) {
jpayne@68 453 if(min==max){return min;}
jpayne@68 454 int range=max-min+1;
jpayne@68 455 int x=min+Tools.min(randy.nextInt(range), randy.nextInt(range));
jpayne@68 456 // System.err.println(x+", "+min+", "+max);
jpayne@68 457 // new Exception().printStackTrace();
jpayne@68 458 // System.err.println(randy.getClass());
jpayne@68 459 // System.err.println(randy.nextLong());
jpayne@68 460 return x;
jpayne@68 461 }
jpayne@68 462
jpayne@68 463 private static float randomRate(float min, float max, Random randy) {
jpayne@68 464 if(min==max){return min;}
jpayne@68 465 float range=max-min;
jpayne@68 466 // float a=(randy.nextFloat()+randy.nextFloat());
jpayne@68 467 float b=(randy.nextFloat()+randy.nextFloat());
jpayne@68 468 float c=(1.6f*randy.nextFloat()+0.4f*randy.nextFloat());
jpayne@68 469 float x=min+range*0.5f*Tools.min(b, c);
jpayne@68 470 // assert(false) : "x="+x+", a="+a+", b="+b+", c="+c+", min="+min+", max="+max;
jpayne@68 471 // System.err.println(x+", "+min+", "+max);
jpayne@68 472 return x;
jpayne@68 473 }
jpayne@68 474
jpayne@68 475 private byte[] genSynthGenome(Random randy){
jpayne@68 476 assert(genomeSize<=MAX_GENOME_LENGTH) : genomeSize;
jpayne@68 477 byte[] array=new byte[(int)genomeSize];
jpayne@68 478 for(int i=0; i<genomeSize; i++){
jpayne@68 479 array[i]=randomBase(randy);
jpayne@68 480 }
jpayne@68 481 return array;
jpayne@68 482 }
jpayne@68 483
jpayne@68 484 private byte[] loadData(ConcurrentReadInputStream cris, Random randy){
jpayne@68 485
jpayne@68 486 ByteBuilder bb=new ByteBuilder();
jpayne@68 487
jpayne@68 488 //Grab the first ListNum of reads
jpayne@68 489 ListNum<Read> ln=cris.nextList();
jpayne@68 490
jpayne@68 491 //As long as there is a nonempty read list...
jpayne@68 492 while(ln!=null && ln.size()>0){
jpayne@68 493 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
jpayne@68 494
jpayne@68 495 for(Read r : ln){
jpayne@68 496
jpayne@68 497 // System.err.println("Fetched read len="+r.length());//123
jpayne@68 498
jpayne@68 499 //Increment counters
jpayne@68 500 readsProcessed+=r.pairCount();
jpayne@68 501 basesProcessed+=r.pairLength();
jpayne@68 502
jpayne@68 503
jpayne@68 504
jpayne@68 505 // System.err.println("Filter: addNs="+addNs+", (r.length()>maxMoleculeLength && (invertedRepeatRate<=0 || r.length()>2*invertedRepeatMaxLength);//123
jpayne@68 506
jpayne@68 507 //Filter disabled; it causes short sequences to be ignored.
jpayne@68 508 //Optional filter criteria
jpayne@68 509 // if(!addNs || (r.length()>maxMoleculeLength && (invertedRepeatRate<=0 || r.length()>2*invertedRepeatMaxLength))){
jpayne@68 510 final byte[] bases=r.bases;
jpayne@68 511
jpayne@68 512 if(addNs){
jpayne@68 513 if(bb.length()>0){bb.append('N');}
jpayne@68 514 }else{
jpayne@68 515 for(int i=0; i<bases.length; i++){
jpayne@68 516 if(!AminoAcid.isFullyDefined(bases[i])){
jpayne@68 517 bases[i]=randomBase(randy);
jpayne@68 518 }
jpayne@68 519 }
jpayne@68 520 }
jpayne@68 521
jpayne@68 522 if(bb.length()+bases.length<=MAX_GENOME_LENGTH){
jpayne@68 523 bb.append(bases);
jpayne@68 524 // System.err.println("Appended "+r.length());//123
jpayne@68 525 }else{
jpayne@68 526 // System.err.println("Appending partial.");//123
jpayne@68 527 for(byte b : bases){
jpayne@68 528 bb.append(b);
jpayne@68 529 if(bb.length>=MAX_GENOME_LENGTH){
jpayne@68 530 cris.returnList(ln.id, true);
jpayne@68 531 // System.err.println("Returning partial "+bb.length);//123
jpayne@68 532 return bb.toBytes();
jpayne@68 533 }
jpayne@68 534 }
jpayne@68 535 }
jpayne@68 536 // }
jpayne@68 537 }
jpayne@68 538
jpayne@68 539 //Notify the input stream that the list was used
jpayne@68 540 cris.returnList(ln);
jpayne@68 541 // if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access
jpayne@68 542
jpayne@68 543 //Fetch a new list
jpayne@68 544 ln=cris.nextList();
jpayne@68 545 }
jpayne@68 546
jpayne@68 547 //Notify the input stream that the final list was used
jpayne@68 548 if(ln!=null){
jpayne@68 549 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
jpayne@68 550 }
jpayne@68 551
jpayne@68 552 // System.err.println("Returning full "+bb.length);//123
jpayne@68 553 return bb.toBytes();
jpayne@68 554 }
jpayne@68 555
jpayne@68 556 private void addInvertedRepeats(byte[] bases, Random randy){
jpayne@68 557
jpayne@68 558 long added=0;
jpayne@68 559 long toAdd=(long) (bases.length*invertedRepeatRate);
jpayne@68 560 while(added<toAdd){
jpayne@68 561 int len=randomLength(invertedRepeatMinLength, invertedRepeatMaxLength, randy);
jpayne@68 562 int start=randy.nextInt(bases.length-2*len);
jpayne@68 563 int stop=start+len;
jpayne@68 564 boolean OK=true;
jpayne@68 565 for(int i=0; i<len && OK; i++){
jpayne@68 566 OK&=(bases[start+i]!='N' && bases[stop+i]!='N');
jpayne@68 567 }
jpayne@68 568 if(OK){
jpayne@68 569 for(int i=0; i<len; i++){
jpayne@68 570 byte b=bases[stop-i-1];
jpayne@68 571 bases[stop+i]=AminoAcid.baseToComplementExtended[b];
jpayne@68 572 }
jpayne@68 573 added+=2*len;
jpayne@68 574 // System.err.println("added="+added+"/"+toAdd);
jpayne@68 575 }else{
jpayne@68 576 //
jpayne@68 577 }
jpayne@68 578 }
jpayne@68 579
jpayne@68 580 }
jpayne@68 581
jpayne@68 582 /*--------------------------------------------------------------*/
jpayne@68 583 /*---------------- Inner Classes ----------------*/
jpayne@68 584 /*--------------------------------------------------------------*/
jpayne@68 585
jpayne@68 586 /** */
jpayne@68 587 private class ProcessThread extends Thread {
jpayne@68 588
jpayne@68 589 //Constructor
jpayne@68 590 ProcessThread(final ConcurrentReadOutputStream ros_, final int tid_,
jpayne@68 591 final AtomicLong nextZmwID_, final long seed){
jpayne@68 592 ros=ros_;
jpayne@68 593 tid=tid_;
jpayne@68 594 atomicZmwID=nextZmwID_;
jpayne@68 595 // assert(false) : randy.getClass()+", "+randy.nextLong();
jpayne@68 596 }
jpayne@68 597
jpayne@68 598 //Called by start()
jpayne@68 599 @Override
jpayne@68 600 public void run(){
jpayne@68 601 //Do anything necessary prior to processing
jpayne@68 602 randy=Shared.threadLocalRandom(seed<0 ? seed : seed+(tid+1)*tid*1000L);
jpayne@68 603
jpayne@68 604 //Process the reads
jpayne@68 605 processInner();
jpayne@68 606
jpayne@68 607 //Do anything necessary after processing
jpayne@68 608
jpayne@68 609 //Indicate successful exit status
jpayne@68 610 success=true;
jpayne@68 611 }
jpayne@68 612
jpayne@68 613 /** Iterate through the reads */
jpayne@68 614 void processInner(){
jpayne@68 615
jpayne@68 616 //As long as there is a nonempty read list...
jpayne@68 617 for(long generated=atomicZmwID.getAndAdd(readsPerList); generated<maxZMWs;
jpayne@68 618 generated=atomicZmwID.getAndAdd(readsPerList)){
jpayne@68 619 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
jpayne@68 620
jpayne@68 621 long toGenerate=Tools.min(readsPerList, maxZMWs-generated);
jpayne@68 622
jpayne@68 623 ArrayList<Read> reads=generateList((int)toGenerate, generated);
jpayne@68 624
jpayne@68 625 //Output reads to the output stream
jpayne@68 626 if(ros!=null){ros.add(reads, 0);}
jpayne@68 627 }
jpayne@68 628 }
jpayne@68 629
jpayne@68 630 /** Generate the next list of reads */
jpayne@68 631 private ArrayList<Read> generateList(int toGenerate, long nextID){
jpayne@68 632
jpayne@68 633 //Grab the actual read list from the ListNum
jpayne@68 634 final ArrayList<Read> reads=new ArrayList<Read>(toGenerate);
jpayne@68 635
jpayne@68 636 //Loop through each read in the list
jpayne@68 637 for(int i=0; i<toGenerate; i++, nextID++){
jpayne@68 638 ArrayList<Read> zmw=generateZMW(nextID);
jpayne@68 639 if(zmw==null){
jpayne@68 640 i--;
jpayne@68 641 nextID--;
jpayne@68 642 }else{reads.addAll(zmw);}
jpayne@68 643 }
jpayne@68 644
jpayne@68 645 return reads;
jpayne@68 646 }
jpayne@68 647
jpayne@68 648 private ReadBuilder median(ArrayList<ReadBuilder> list){
jpayne@68 649 if(list.size()<3){return null;}
jpayne@68 650 IntList lengths=new IntList(list.size()-2);
jpayne@68 651
jpayne@68 652 for(int i=1; i<list.size()-1; i++){
jpayne@68 653 lengths.add(list.get(i).length());
jpayne@68 654 }
jpayne@68 655 lengths.sort();
jpayne@68 656 int median=lengths.get((lengths.size-1)/2);
jpayne@68 657
jpayne@68 658 for(int i=1; i<list.size()-1; i++){
jpayne@68 659 ReadBuilder rb=list.get(i);
jpayne@68 660 if(rb.length()==median){
jpayne@68 661 return rb;
jpayne@68 662 }
jpayne@68 663 }
jpayne@68 664
jpayne@68 665 assert(false);
jpayne@68 666 return null;
jpayne@68 667 }
jpayne@68 668
jpayne@68 669 /**
jpayne@68 670 * Generate a single read.
jpayne@68 671 */
jpayne@68 672 private ArrayList<Read> generateZMW(final long nextID){
jpayne@68 673
jpayne@68 674 final int movieLength=randomLength(minMovie, maxMovie, randy);
jpayne@68 675 final float errorRate=randomRate(minErrorRate, maxErrorRate, randy);
jpayne@68 676
jpayne@68 677 ArrayList<ReadBuilder> baseCalls=baseCallAllPasses(movieLength, errorRate, nextID);
jpayne@68 678 if(baseCalls==null){
jpayne@68 679 // System.err.println(movieLength+", "+errorRate+", "+nextID);//123
jpayne@68 680 return null;
jpayne@68 681 }
jpayne@68 682
jpayne@68 683 final boolean missing=randy.nextFloat()<missingRate;
jpayne@68 684 if(missing){
jpayne@68 685 final int missingMod=randy.nextInt(2);
jpayne@68 686 ArrayList<ReadBuilder> temp=new ArrayList<ReadBuilder>();
jpayne@68 687
jpayne@68 688 ReadBuilder current=null;
jpayne@68 689 for(int i=0; i<baseCalls.size(); i++){
jpayne@68 690 ReadBuilder rb=baseCalls.get(i);
jpayne@68 691 assert(rb.subreads==1);
jpayne@68 692 assert(rb.missing==0);
jpayne@68 693 assert(rb.adapters==0);
jpayne@68 694 if(current!=null && (i&1)==missingMod){
jpayne@68 695 current.add(rb);
jpayne@68 696 current.missing++;
jpayne@68 697 assert(current.subreads>1);
jpayne@68 698 assert(current.missing>0);
jpayne@68 699 temp.add(current);
jpayne@68 700 current=null;
jpayne@68 701 }else{
jpayne@68 702 if(current!=null){temp.add(current);}
jpayne@68 703 current=rb;
jpayne@68 704 }
jpayne@68 705 }
jpayne@68 706 if(current!=null){temp.add(current);}
jpayne@68 707 baseCalls=temp;
jpayne@68 708 }
jpayne@68 709
jpayne@68 710 if(makeCCS){
jpayne@68 711 ReadBuilder median=median(baseCalls);
jpayne@68 712 if(median==null){return null;}
jpayne@68 713 baseCalls.clear();
jpayne@68 714 baseCalls.add(median);
jpayne@68 715 }else if(hiddenRate>0){
jpayne@68 716 ArrayList<ReadBuilder> temp=new ArrayList<ReadBuilder>();
jpayne@68 717
jpayne@68 718 ReadBuilder current=null;
jpayne@68 719 for(int i=0; i<baseCalls.size(); i++){
jpayne@68 720 ReadBuilder rb=baseCalls.get(i);
jpayne@68 721 assert(rb.adapters==0);
jpayne@68 722 if(current!=null && randy.nextFloat()<hiddenRate){
jpayne@68 723 current.add(baseCallAdapter(errorRate));
jpayne@68 724 current.add(rb);
jpayne@68 725 assert(current.adapters>0);
jpayne@68 726 assert(current.subreads>1);
jpayne@68 727 }else{
jpayne@68 728 if(current!=null){temp.add(current);}
jpayne@68 729 current=rb;
jpayne@68 730 assert(current.adapters==0);
jpayne@68 731 }
jpayne@68 732 }
jpayne@68 733 if(current!=null){temp.add(current);}
jpayne@68 734 baseCalls=temp;
jpayne@68 735 }
jpayne@68 736
jpayne@68 737
jpayne@68 738 ArrayList<Read> reads=new ArrayList<Read>();
jpayne@68 739 for(ReadBuilder rb : baseCalls){
jpayne@68 740 Read r=rb.toRead();
jpayne@68 741 readsOutT+=r.pairCount();
jpayne@68 742 basesOutT+=r.length();
jpayne@68 743 reads.add(r);
jpayne@68 744 }
jpayne@68 745
jpayne@68 746 idHistT[(int)((1-errorRate)*(ID_BINS-1)+0.5f)]++;
jpayne@68 747 return reads;
jpayne@68 748 }
jpayne@68 749
jpayne@68 750 private ArrayList<ReadBuilder> baseCallAllPasses(final int movieLength, final float errorRate, long zmw){
jpayne@68 751 byte[] frag=null;
jpayne@68 752
jpayne@68 753 for(int i=0; i<10 && frag==null; i++){//retry several times
jpayne@68 754 frag=fetchBases(ref);
jpayne@68 755 }
jpayne@68 756
jpayne@68 757 if(frag==null){
jpayne@68 758 // System.err.println("Failed baseCallAllPasses("+movieLength+", "+errorRate+", "+zmw);//123
jpayne@68 759 return null;
jpayne@68 760 }
jpayne@68 761
jpayne@68 762 ArrayList<ReadBuilder> list=new ArrayList<ReadBuilder>();
jpayne@68 763 int movieRemaining=movieLength;
jpayne@68 764 int moviePos=0;
jpayne@68 765 assert(frag.length>0) : frag.length;
jpayne@68 766 int start=randy.nextInt(frag.length);
jpayne@68 767 while(movieRemaining>0){
jpayne@68 768 ReadBuilder rb=baseCallOnePass(frag, errorRate, start, moviePos, movieRemaining, zmw);
jpayne@68 769 list.add(rb);
jpayne@68 770 start=0;
jpayne@68 771 int elapsed=rb.length()+adapterLen;
jpayne@68 772 moviePos+=elapsed;
jpayne@68 773 movieRemaining-=elapsed;
jpayne@68 774 AminoAcid.reverseComplementBasesInPlace(frag);
jpayne@68 775 }
jpayne@68 776 return list;
jpayne@68 777 }
jpayne@68 778
jpayne@68 779 /** Call bases for one pass */
jpayne@68 780 private ReadBuilder baseCallOnePass(final byte[] frag, final float errorRate, final int start, final int movieStart, int movieRemaining, long zmw){
jpayne@68 781 final float mult=1/(1-errorRate);
jpayne@68 782 ByteBuilder bb=new ByteBuilder();
jpayne@68 783 int fpos=start;
jpayne@68 784 for(; fpos<frag.length && movieRemaining>0; fpos++){
jpayne@68 785 float f=randy.nextFloat();
jpayne@68 786 byte b=frag[fpos];
jpayne@68 787 if(f>=errorRate){
jpayne@68 788 bb.append(b);
jpayne@68 789 movieRemaining--;
jpayne@68 790 }else{
jpayne@68 791 f=mult*(1-f);
jpayne@68 792 if(f<insThresh){//ins
jpayne@68 793 b=AminoAcid.numberToBase[randy.nextInt(4)];
jpayne@68 794 bb.append(b);
jpayne@68 795 fpos--;
jpayne@68 796 movieRemaining--;
jpayne@68 797 }else if(f<delThresh){//del
jpayne@68 798
jpayne@68 799 }else{//sub
jpayne@68 800 int x=AminoAcid.baseToNumber[b]+randy.nextInt(3)+1;
jpayne@68 801 b=AminoAcid.numberToBase[x&3];
jpayne@68 802 bb.append(b);
jpayne@68 803 movieRemaining--;
jpayne@68 804 }
jpayne@68 805 }
jpayne@68 806 }
jpayne@68 807
jpayne@68 808 float passes=(fpos-start)*1.0f/frag.length;
jpayne@68 809 ReadBuilder rb=new ReadBuilder(bb, passes, movieStart, zmw);
jpayne@68 810 rb.errorRate=errorRate;
jpayne@68 811 return rb;
jpayne@68 812 }
jpayne@68 813
jpayne@68 814 /** Call bases for an adapter sequence pass */
jpayne@68 815 private ReadBuilder baseCallAdapter(final float errorRate){
jpayne@68 816 ReadBuilder rb=baseCallOnePass(pacbioAdapter, errorRate, 0, 0, 999999, 0);
jpayne@68 817 rb.passes=0;
jpayne@68 818 rb.fullPasses=0;
jpayne@68 819 rb.subreads=0;
jpayne@68 820 rb.adapters=1;
jpayne@68 821 return rb;
jpayne@68 822 }
jpayne@68 823
jpayne@68 824 private byte[] fetchBases(byte[] source){
jpayne@68 825
jpayne@68 826 final int len=randomLength(minMoleculeLength, maxMoleculeLength, randy);
jpayne@68 827 int start=len>=source.length ? 0 : randy.nextInt(source.length-len);
jpayne@68 828 int stop=Tools.min(start+len, source.length);
jpayne@68 829
jpayne@68 830 // System.err.println("fetchBases(len="+len+", slen="+source.length+", start="+start+", stop="+stop);//123
jpayne@68 831
jpayne@68 832 for(int i=start; i<stop; i++){
jpayne@68 833 if(!AminoAcid.isFullyDefined(source[i])){
jpayne@68 834 return null;
jpayne@68 835 }
jpayne@68 836 }
jpayne@68 837 if(stop-start<1){return null;}
jpayne@68 838 byte[] frag=Arrays.copyOfRange(source, start, stop);
jpayne@68 839 if(randy.nextBoolean()){AminoAcid.reverseComplementBasesInPlace(frag);}
jpayne@68 840 return frag;
jpayne@68 841 }
jpayne@68 842
jpayne@68 843 /** Number of reads retained by this thread */
jpayne@68 844 protected long readsOutT=0;
jpayne@68 845 /** Number of bases retained by this thread */
jpayne@68 846 protected long basesOutT=0;
jpayne@68 847
jpayne@68 848 /** True only if this thread has completed successfully */
jpayne@68 849 boolean success=false;
jpayne@68 850
jpayne@68 851 private final AtomicLong atomicZmwID;
jpayne@68 852 private final int readsPerList=Shared.bufferLen();
jpayne@68 853
jpayne@68 854 /** Random number source */
jpayne@68 855 private Random randy;
jpayne@68 856
jpayne@68 857 /** Random number source */
jpayne@68 858 private final long[] idHistT=new long[ID_BINS];
jpayne@68 859
jpayne@68 860 /** Shared output stream */
jpayne@68 861 private final ConcurrentReadOutputStream ros;
jpayne@68 862 /** Thread ID */
jpayne@68 863 final int tid;
jpayne@68 864 }
jpayne@68 865
jpayne@68 866 /*--------------------------------------------------------------*/
jpayne@68 867
jpayne@68 868
jpayne@68 869
jpayne@68 870 /*--------------------------------------------------------------*/
jpayne@68 871 /*---------------- Fields ----------------*/
jpayne@68 872 /*--------------------------------------------------------------*/
jpayne@68 873
jpayne@68 874 /** Primary input file path */
jpayne@68 875 private String in1=null;
jpayne@68 876
jpayne@68 877 /** Primary output file path */
jpayne@68 878 private String out1=null;
jpayne@68 879
jpayne@68 880 /** Primary output file path */
jpayne@68 881 private String outIdHist=null;
jpayne@68 882
jpayne@68 883 private String qfout1=null;
jpayne@68 884
jpayne@68 885 /** Override input file extension */
jpayne@68 886 private String extin=null;
jpayne@68 887 /** Override output file extension */
jpayne@68 888 private String extout=null;
jpayne@68 889
jpayne@68 890 /*--------------------------------------------------------------*/
jpayne@68 891
jpayne@68 892 /** */
jpayne@68 893 private int minMoleculeLength=500;
jpayne@68 894 /** */
jpayne@68 895 private int maxMoleculeLength=10000;
jpayne@68 896 /** */
jpayne@68 897 private int minMovie=500;
jpayne@68 898 /** */
jpayne@68 899 private int maxMovie=40000;
jpayne@68 900
jpayne@68 901 /** */
jpayne@68 902 private double missingRate=0.0;
jpayne@68 903 /** */
jpayne@68 904 private double hiddenRate=0.0;
jpayne@68 905 /** */
jpayne@68 906 private boolean allowBothEndsBad=false;
jpayne@68 907
jpayne@68 908 /** */
jpayne@68 909 private float genomeGC=0.6f;
jpayne@68 910 /** */
jpayne@68 911 private long genomeSize=10000000;
jpayne@68 912 /** */
jpayne@68 913 private boolean addNs=true;
jpayne@68 914
jpayne@68 915 /** */
jpayne@68 916 private double invertedRepeatRate=0.0;
jpayne@68 917 /** */
jpayne@68 918 private int invertedRepeatMinLength=100;
jpayne@68 919 /** */
jpayne@68 920 private int invertedRepeatMaxLength=10000;
jpayne@68 921
jpayne@68 922 /** */
jpayne@68 923 private float minErrorRate=0.05f;
jpayne@68 924 /** */
jpayne@68 925 private float maxErrorRate=0.25f;
jpayne@68 926 /** */
jpayne@68 927 private boolean addErrors=true;
jpayne@68 928 /** One read per ZMW */
jpayne@68 929 private boolean makeCCS=false;
jpayne@68 930
jpayne@68 931 /** */
jpayne@68 932 private long seed=-1;
jpayne@68 933
jpayne@68 934 private long[] idHist=new long[ID_BINS];
jpayne@68 935
jpayne@68 936 //These should add to 1
jpayne@68 937 private float insFraction=0.40f;
jpayne@68 938 private float delFraction=0.35f;
jpayne@68 939 private float subFraction=0.25f;
jpayne@68 940
jpayne@68 941 private final float insThresh;
jpayne@68 942 private final float delThresh;
jpayne@68 943
jpayne@68 944 /*--------------------------------------------------------------*/
jpayne@68 945
jpayne@68 946 /** Number of reads processed */
jpayne@68 947 protected long readsProcessed=0;
jpayne@68 948 /** Number of bases processed */
jpayne@68 949 protected long basesProcessed=0;
jpayne@68 950
jpayne@68 951 /** Number of reads retained */
jpayne@68 952 protected long readsOut=0;
jpayne@68 953 /** Number of bases retained */
jpayne@68 954 protected long basesOut=0;
jpayne@68 955
jpayne@68 956 /** Quit after processing this many INPUT reads */
jpayne@68 957 private long maxReads=-1;
jpayne@68 958
jpayne@68 959 /** Quit after generating this many OUTPUT zmws */
jpayne@68 960 private long maxZMWs=-1;
jpayne@68 961
jpayne@68 962 /** Reference genome, max 2Gbp */
jpayne@68 963 private byte[] ref;
jpayne@68 964
jpayne@68 965 private AtomicLong nextZmwID=new AtomicLong(0);
jpayne@68 966
jpayne@68 967 /*--------------------------------------------------------------*/
jpayne@68 968 /*---------------- Final Fields ----------------*/
jpayne@68 969 /*--------------------------------------------------------------*/
jpayne@68 970
jpayne@68 971 /** Primary input file */
jpayne@68 972 private final FileFormat ffin1;
jpayne@68 973
jpayne@68 974 /** Primary output file */
jpayne@68 975 private final FileFormat ffout1;
jpayne@68 976
jpayne@68 977 /** Primary output file */
jpayne@68 978 private final FileFormat ffIdHist;
jpayne@68 979
jpayne@68 980 /*--------------------------------------------------------------*/
jpayne@68 981 /*---------------- Constants ----------------*/
jpayne@68 982 /*--------------------------------------------------------------*/
jpayne@68 983
jpayne@68 984 private static final int ID_BINS=201;
jpayne@68 985
jpayne@68 986 private static final long MAX_GENOME_LENGTH=2000000000;
jpayne@68 987
jpayne@68 988 public static final byte[] pacbioAdapter="ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT".getBytes();
jpayne@68 989 public static final int adapterLen=pacbioAdapter.length;
jpayne@68 990
jpayne@68 991 /*--------------------------------------------------------------*/
jpayne@68 992 /*---------------- Common Fields ----------------*/
jpayne@68 993 /*--------------------------------------------------------------*/
jpayne@68 994
jpayne@68 995 /** Print status messages to this output stream */
jpayne@68 996 private PrintStream outstream=System.err;
jpayne@68 997 /** Print verbose messages */
jpayne@68 998 public static boolean verbose=false;
jpayne@68 999 /** True if an error was encountered */
jpayne@68 1000 public boolean errorState=false;
jpayne@68 1001 /** Overwrite existing output files */
jpayne@68 1002 private boolean overwrite=false;
jpayne@68 1003 /** Append to existing output files */
jpayne@68 1004 private boolean append=false;
jpayne@68 1005
jpayne@68 1006 }