view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/icecream/IceCreamMaker.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package icecream;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.atomic.AtomicLong;

import dna.AminoAcid;
import fileIO.ByteFile;
import fileIO.ByteStreamWriter;
import fileIO.FileFormat;
import fileIO.ReadWrite;
import shared.Parse;
import shared.Parser;
import shared.PreParser;
import shared.ReadStats;
import shared.Shared;
import shared.Timer;
import shared.Tools;
import stream.ConcurrentReadInputStream;
import stream.ConcurrentReadOutputStream;
import stream.FASTQ;
import stream.FastaReadInputStream;
import stream.Read;
import stream.SamLine;
import structures.ByteBuilder;
import structures.IntList;
import structures.ListNum;

/**
 * Generates chimeric PacBio reads containing inverted repeats
 * due to missing adapters.
 * 
 * @author Brian Bushnell
 * @date June 8, 2019
 *
 */
public class IceCreamMaker {
	
	/*--------------------------------------------------------------*/
	/*----------------        Initialization        ----------------*/
	/*--------------------------------------------------------------*/
	
	/**
	 * Code entrance from the command line.
	 * @param args Command line arguments
	 */
	public static void main(String[] args){
		//Start a timer immediately upon code entrance.
		Timer t=new Timer();
		
		//Create an instance of this class
		IceCreamMaker x=new IceCreamMaker(args);
		
		//Run the object
		x.process(t);
		
		//Close the print stream if it was redirected
		Shared.closeStream(x.outstream);
	}
	
	/**
	 * Constructor.
	 * @param args Command line arguments
	 */
	public IceCreamMaker(String[] args){
		
		{//Preparse block for help, config files, and outstream
			PreParser pp=new PreParser(args, getClass(), false);
			args=pp.args;
			outstream=pp.outstream;
		}
		
		//Set shared static variables prior to parsing
		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
		FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
		Shared.FAKE_QUAL=8;
//		Shared.FASTA_WRAP=511;
		SamLine.SET_FROM_OK=true;
		
		{//Parse the arguments
			final Parser parser=parse(args);
			Parser.processQuality();
			
			maxReads=parser.maxReads;
			overwrite=ReadStats.overwrite=parser.overwrite;
			append=ReadStats.append=parser.append;
			
			in1=parser.in1;
			extin=parser.extin;

			out1=parser.out1;
			qfout1=parser.qfout1;
			extout=parser.extout;
		}

		validateParams();
		fixExtensions(); //Add or remove .gz or .bz2 as needed
		checkFileExistence(); //Ensure files can be read and written
		checkStatics(); //Adjust file-related static fields as needed for this program 
		
		//Create output FileFormat objects
		ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, false);

		//Create input FileFormat objects
		ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
		
		ffIdHist=FileFormat.testOutput(outIdHist, FileFormat.TXT, null, false, overwrite, append, false);
		
		insThresh=insFraction;
		delThresh=delFraction+insThresh;
	}
	
	/*--------------------------------------------------------------*/
	/*----------------    Initialization Helpers    ----------------*/
	/*--------------------------------------------------------------*/
	
	/** Parse arguments from the command line */
	private Parser parse(String[] args){
		
		//Create a parser object
		Parser parser=new Parser();
		
		//Set any necessary Parser defaults here
		//parser.foo=bar;
		
		//Parse each argument
		for(int i=0; i<args.length; i++){
			String arg=args[i];
			
			//Break arguments into their constituent parts, in the form of "a=b"
			String[] split=arg.split("=");
			String a=split[0].toLowerCase();
			String b=split.length>1 ? split[1] : null;
			if(b!=null && b.equalsIgnoreCase("null")){b=null;}
			
			if(a.equals("verbose")){
				verbose=Parse.parseBoolean(b);
			}
			
			else if(a.equals("idhist")){
				outIdHist=b;
			}
			
			else if(a.equals("minlength") || a.equals("minlen")){
				minMoleculeLength=Parse.parseIntKMG(b);
			}else if(a.equals("maxlength") || a.equals("maxlen")){
				maxMoleculeLength=Parse.parseIntKMG(b);
			}else if(a.equals("length") || a.equals("len")){
				minMoleculeLength=maxMoleculeLength=Parse.parseIntKMG(b);
			}
			
			else if(a.equals("minmovie") || a.equals("minmov")){
				minMovie=Parse.parseIntKMG(b);
			}else if(a.equals("maxmovie") || a.equals("maxmov")){
				maxMovie=Parse.parseIntKMG(b);
			}else if(a.equals("movie") || a.equals("mov")){
				minMovie=maxMovie=Parse.parseIntKMG(b);
			}
			
			else if(a.equals("missingrate") || a.equals("missing")){
				missingRate=Double.parseDouble(b);
				assert(missingRate<=1);
			}else if(a.equals("hiddenrate") || a.equals("hidden")){
				hiddenRate=Double.parseDouble(b);
				assert(hiddenRate<=1);
			}else if(a.equals("bothends")){
				allowBothEndsBad=Parse.parseBoolean(b);
				assert(false) : "TODO";
			}
			
			else if(a.equals("gc")){
				genomeGC=(float)Double.parseDouble(b);
				assert(genomeGC>=0 && genomeGC<=1);
			}else if(a.equals("genomesize")){
				genomeSize=Parse.parseKMG(b);
			}else if(a.equals("addns") || a.equals("ns")){
				addNs=Parse.parseBoolean(b);
			}else if(a.equals("seed")){
				seed=Long.parseLong(b);
			}else if(a.equals("zmws") || a.equals("maxzmws") || a.equals("reads")){
				maxZMWs=Parse.parseIntKMG(b);
			}else if(a.equals("ccs")){
				makeCCS=Parse.parseBoolean(b);
			}
			
			else if(a.equals("invertedrepeatrate") || a.equals("invertrepeatrate") || a.equals("irrate")){
				invertedRepeatRate=Double.parseDouble(b);
				assert(invertedRepeatRate>=0);
			}else if(a.equals("invertedrepeatminlen") || a.equals("invertrepeatminlen") || a.equals("irminlen")){
				invertedRepeatMinLength=Parse.parseIntKMG(b);
			}else if(a.equals("invertedrepeatmaxlen") || a.equals("invertrepeatmaxlen") || a.equals("irmaxlen")){
				invertedRepeatMaxLength=Parse.parseIntKMG(b);
			}else if(a.equals("invertedrepeatlen") || a.equals("invertrepeatlen") || a.equals("irlen")){
				invertedRepeatMinLength=invertedRepeatMaxLength=Parse.parseIntKMG(b);
			}
			
			else if(a.equals("miner") || a.equals("minerrorrate")){
				minErrorRate=(float)Double.parseDouble(b);
				assert(minErrorRate>=0 && minErrorRate<=1);
			}else if(a.equals("maxer") || a.equals("maxerrorrate")){
				maxErrorRate=(float)Double.parseDouble(b);
				assert(maxErrorRate>=0 && maxErrorRate<=1);
			}else if(a.equals("er") || a.equals("errorrate")){
				minErrorRate=maxErrorRate=(float)Double.parseDouble(b);
				assert(minErrorRate>=0 && minErrorRate<=1);
			}
			
			else if(a.equals("minid") || a.equals("minidentity")){
				maxErrorRate=1-(float)Double.parseDouble(b);
				assert(maxErrorRate>=0 && maxErrorRate<=1);
			}else if(a.equals("maxid") || a.equals("maxidentity")){
				minErrorRate=1-(float)Double.parseDouble(b);
				assert(minErrorRate>=0 && minErrorRate<=1);
			}else if(a.equals("id") || a.equals("identity")){
				minErrorRate=maxErrorRate=1-(float)Double.parseDouble(b);
				assert(minErrorRate>=0 && minErrorRate<=1);
			}else if(a.equals("adderrors")){
				addErrors=Parse.parseBoolean(b);
			}else if(a.equals("ref")){
				parser.in1=b;
			}
			
			else if(a.equals("parse_flag_goes_here")){
				long fake_variable=Parse.parseKMG(b);
				//Set a variable here
			}else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
				//do nothing
			}else{
				outstream.println("Unknown parameter "+args[i]);
				assert(false) : "Unknown parameter "+args[i];
			}
		}
		
		return parser;
	}
	
	/** Add or remove .gz or .bz2 as needed */
	private void fixExtensions(){
		in1=Tools.fixExtension(in1);
	}
	
	/** Ensure files can be read and written */
	private void checkFileExistence(){
		//Ensure output files can be written
		if(!Tools.testOutputFiles(overwrite, append, false, out1, outIdHist)){
			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
		}
		
		//Ensure input files can be read
		if(!Tools.testInputFiles(false, true, in1)){
			throw new RuntimeException("\nCan't read some input files.\n");  
		}
		
		//Ensure that no file was specified multiple times
		if(!Tools.testForDuplicateFiles(true, in1, out1, outIdHist)){
			throw new RuntimeException("\nSome file names were specified multiple times.\n");
		}
	}
	
	/** Adjust file-related static fields as needed for this program */
	private static void checkStatics(){
		//Adjust the number of threads for input file reading
		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
			ByteFile.FORCE_MODE_BF2=true;
		}
		
		assert(FastaReadInputStream.settingsOK());
	}
	
	/** Ensure parameter ranges are within bounds and required parameters are set */
	private boolean validateParams(){
		assert(minMoleculeLength>0 && minMoleculeLength<=maxMoleculeLength) : minMoleculeLength+", "+maxMoleculeLength;
		assert(minMovie>0 && minMovie<=maxMovie) : minMovie+", "+maxMovie;

		assert(missingRate>=0 && missingRate<=1) : missingRate;
		assert(hiddenRate>=0 && hiddenRate<=1) : hiddenRate;
		assert(genomeGC>=0 && genomeGC<=1) : genomeGC;
		assert(in1!=null || genomeSize>maxMoleculeLength) : genomeSize;
		assert(in1!=null || invertedRepeatMaxLength*2<genomeSize) : genomeSize;
		assert(maxZMWs>0) : "zmsw="+maxZMWs+"; please set to a positive number.";

		assert(invertedRepeatRate>=0) : invertedRepeatRate;
		assert(invertedRepeatMinLength>0 && invertedRepeatMinLength<=invertedRepeatMaxLength) : invertedRepeatMinLength+", "+invertedRepeatMaxLength;

		assert(minErrorRate>=0 && minErrorRate<=maxErrorRate) : minErrorRate+", "+maxErrorRate;
		return true;
	}
	
	/*--------------------------------------------------------------*/
	/*----------------         Outer Methods        ----------------*/
	/*--------------------------------------------------------------*/

	/** Create read streams and process all data */
	void process(Timer t){
		
		//Turn off read validation in the input threads to increase speed
		final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
		Read.VALIDATE_IN_CONSTRUCTOR=true;
		
		//Create a read input stream
		final ConcurrentReadInputStream cris=makeCris();
		
		//Optionally create a read output stream
		final ConcurrentReadOutputStream ros=makeCros(false);
		
		//Reset counters
		readsProcessed=readsOut=0;
		basesProcessed=basesOut=0;
		
		Random randy=Shared.threadLocalRandom(seed);
		if(cris==null){
			ref=genSynthGenome(randy);
		}else{
			ref=loadData(cris, randy);
		}
		
		if(invertedRepeatRate>0){
			addInvertedRepeats(ref, randy);
		}
		
		//Process the reads in separate threads
		spawnThreads(cris, ros);
		
		if(verbose){outstream.println("Finished; closing streams.");}
		
		//Write anything that was accumulated by ReadStats
		errorState|=ReadStats.writeAll();
		//Close the read streams
		errorState|=ReadWrite.closeStreams(cris, ros);
		
		writeIdHist();
		
		//Reset read validation
		Read.VALIDATE_IN_CONSTRUCTOR=vic;
		
		//Report timing and results
		t.stop();
		outstream.println(Tools.timeReadsBasesProcessed(t, readsOut, basesOut, 8));
//		outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
		
		//Throw an exception of there was an error in a thread
		if(errorState){
			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
		}
	}
	
	private void writeIdHist(){
		if(ffIdHist==null){return;}
		ByteStreamWriter bsw=new ByteStreamWriter(ffIdHist);
		bsw.start();
		bsw.print("#Identity\tCount\n".getBytes());
		for(int i=0; i<idHist.length; i++){
			bsw.print(i*100.0/(ID_BINS-1), 3).print('\t').println(idHist[i]);
		}
		errorState|=bsw.poisonAndWait();
	}
	
	/** Create a Read Input Stream */
	private ConcurrentReadInputStream makeCris(){
		if(ffin1==null){return null;}
		ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null);
		cris.start(); //Start the stream
		if(verbose){outstream.println("Started cris");}
		return cris;
	}
	
	/** Create a Read Output Stream */
	private ConcurrentReadOutputStream makeCros(boolean pairedInput){
		if(ffout1==null){return null;}

		//Set output buffer size
		final int buff=4;

		final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(
				ffout1, null, qfout1, null, buff, null, false);
		ros.start(); //Start the stream
		return ros;
	}
	
	/** Spawn process threads */
	private void spawnThreads(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros){
		
		//Do anything necessary prior to processing
		
		//Determine how many threads may be used
		final int threads=Shared.threads();
		
		//Fill a list with ProcessThreads
		ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
		for(int i=0; i<threads; i++){
			alpt.add(new ProcessThread(ros, i, nextZmwID, seed));
		}
		
		//Start the threads
		for(ProcessThread pt : alpt){
			pt.start();
		}
		
		//Wait for threads to finish
		waitForThreads(alpt);
		
		//Do anything necessary after processing
		
	}
	
	/** Wait until all worker threads are finished, then return */
	private void waitForThreads(ArrayList<ProcessThread> alpt){
		
		//Wait for completion of all threads
		boolean success=true;
		for(ProcessThread pt : alpt){
			
			//Wait until this thread has terminated
			while(pt.getState()!=Thread.State.TERMINATED){
				try {
					//Attempt a join operation
					pt.join();
				} catch (InterruptedException e) {
					//Potentially handle this, if it is expected to occur
					e.printStackTrace();
				}
			}
			
			//Accumulate per-thread statistics
			readsOut+=pt.readsOutT;
			basesOut+=pt.basesOutT;
			Tools.add(idHist, pt.idHistT);
			
			success&=pt.success;
		}
		
		//Track whether any threads failed
		if(!success){errorState=true;}
	}
	
	/*--------------------------------------------------------------*/
	/*----------------         Inner Methods        ----------------*/
	/*--------------------------------------------------------------*/
	
	private byte randomBase(Random randy) {
		float rGC=randy.nextFloat();
		if(rGC>=genomeGC){//AT
			return (byte)(randy.nextBoolean() ? 'A' : 'T');
		}else{//GC
			return (byte)(randy.nextBoolean() ? 'G' : 'C');
		}
	}
	
	private static int randomLength(int min, int max, Random randy) {
		if(min==max){return min;}
		int range=max-min+1;
		int x=min+Tools.min(randy.nextInt(range), randy.nextInt(range));
//		System.err.println(x+", "+min+", "+max);
//		new Exception().printStackTrace();
//		System.err.println(randy.getClass());
//		System.err.println(randy.nextLong());
		return x;
	}
	
	private static float randomRate(float min, float max, Random randy) {
		if(min==max){return min;}
		float range=max-min;
//		float a=(randy.nextFloat()+randy.nextFloat());
		float b=(randy.nextFloat()+randy.nextFloat());
		float c=(1.6f*randy.nextFloat()+0.4f*randy.nextFloat());
		float x=min+range*0.5f*Tools.min(b, c);
//		assert(false) : "x="+x+", a="+a+", b="+b+", c="+c+", min="+min+", max="+max;
//		System.err.println(x+", "+min+", "+max);
		return x;
	}
	
	private byte[] genSynthGenome(Random randy){
		assert(genomeSize<=MAX_GENOME_LENGTH) : genomeSize;
		byte[] array=new byte[(int)genomeSize];
		for(int i=0; i<genomeSize; i++){
			array[i]=randomBase(randy);
		}
		return array;
	}
	
	private byte[] loadData(ConcurrentReadInputStream cris, Random randy){

		ByteBuilder bb=new ByteBuilder();
		
		//Grab the first ListNum of reads
		ListNum<Read> ln=cris.nextList();

		//As long as there is a nonempty read list...
		while(ln!=null && ln.size()>0){
//			if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
			
			for(Read r : ln){
				
//				System.err.println("Fetched read len="+r.length());//123
				
				//Increment counters
				readsProcessed+=r.pairCount();
				basesProcessed+=r.pairLength();
				
				

//				System.err.println("Filter: addNs="+addNs+", (r.length()>maxMoleculeLength && (invertedRepeatRate<=0 || r.length()>2*invertedRepeatMaxLength);//123
				
				//Filter disabled; it causes short sequences to be ignored.
				//Optional filter criteria
//				if(!addNs || (r.length()>maxMoleculeLength && (invertedRepeatRate<=0 || r.length()>2*invertedRepeatMaxLength))){
					final byte[] bases=r.bases;
					
					if(addNs){
						if(bb.length()>0){bb.append('N');}
					}else{
						for(int i=0; i<bases.length; i++){
							if(!AminoAcid.isFullyDefined(bases[i])){
								bases[i]=randomBase(randy);
							}
						}
					}
					
					if(bb.length()+bases.length<=MAX_GENOME_LENGTH){
						bb.append(bases);
//						System.err.println("Appended "+r.length());//123
					}else{
//						System.err.println("Appending partial.");//123
						for(byte b : bases){
							bb.append(b);
							if(bb.length>=MAX_GENOME_LENGTH){
								cris.returnList(ln.id, true);
//								System.err.println("Returning partial "+bb.length);//123
								return bb.toBytes();
							}
						}
					}
//				}
			}

			//Notify the input stream that the list was used
			cris.returnList(ln);
//			if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access
			
			//Fetch a new list
			ln=cris.nextList();
		}

		//Notify the input stream that the final list was used
		if(ln!=null){
			cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
		}

//		System.err.println("Returning full "+bb.length);//123
		return bb.toBytes();
	}
	
	private void addInvertedRepeats(byte[] bases, Random randy){
		
		long added=0;
		long toAdd=(long) (bases.length*invertedRepeatRate);
		while(added<toAdd){
			int len=randomLength(invertedRepeatMinLength, invertedRepeatMaxLength, randy);
			int start=randy.nextInt(bases.length-2*len);
			int stop=start+len;
			boolean OK=true;
			for(int i=0; i<len && OK; i++){
				OK&=(bases[start+i]!='N' && bases[stop+i]!='N');
			}
			if(OK){
				for(int i=0; i<len; i++){
					byte b=bases[stop-i-1];
					bases[stop+i]=AminoAcid.baseToComplementExtended[b];
				}
				added+=2*len;
//				System.err.println("added="+added+"/"+toAdd);
			}else{
//				
			}
		}
		
	}
	
	/*--------------------------------------------------------------*/
	/*----------------         Inner Classes        ----------------*/
	/*--------------------------------------------------------------*/
	
	/** */
	private class ProcessThread extends Thread {
		
		//Constructor
		ProcessThread(final ConcurrentReadOutputStream ros_, final int tid_, 
				final AtomicLong nextZmwID_, final long seed){
			ros=ros_;
			tid=tid_;
			atomicZmwID=nextZmwID_;
//			assert(false) : randy.getClass()+", "+randy.nextLong();
		}
		
		//Called by start()
		@Override
		public void run(){
			//Do anything necessary prior to processing
			randy=Shared.threadLocalRandom(seed<0 ? seed : seed+(tid+1)*tid*1000L);
			
			//Process the reads
			processInner();
			
			//Do anything necessary after processing
			
			//Indicate successful exit status
			success=true;
		}
		
		/** Iterate through the reads */
		void processInner(){

			//As long as there is a nonempty read list...
			for(long generated=atomicZmwID.getAndAdd(readsPerList); generated<maxZMWs; 
					generated=atomicZmwID.getAndAdd(readsPerList)){
//				if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access

				long toGenerate=Tools.min(readsPerList, maxZMWs-generated);
				
				ArrayList<Read> reads=generateList((int)toGenerate, generated);

				//Output reads to the output stream
				if(ros!=null){ros.add(reads, 0);}
			}
		}
		
		/** Generate the next list of reads */
		private ArrayList<Read> generateList(int toGenerate, long nextID){

			//Grab the actual read list from the ListNum
			final ArrayList<Read> reads=new ArrayList<Read>(toGenerate);
			
			//Loop through each read in the list
			for(int i=0; i<toGenerate; i++, nextID++){
				ArrayList<Read> zmw=generateZMW(nextID);
				if(zmw==null){
					i--;
					nextID--;
				}else{reads.addAll(zmw);}
			}

			return reads;
		}
		
		private ReadBuilder median(ArrayList<ReadBuilder> list){
			if(list.size()<3){return null;}
			IntList lengths=new IntList(list.size()-2);
			
			for(int i=1; i<list.size()-1; i++){
				lengths.add(list.get(i).length());
			}
			lengths.sort();
			int median=lengths.get((lengths.size-1)/2);
			
			for(int i=1; i<list.size()-1; i++){
				ReadBuilder rb=list.get(i);
				if(rb.length()==median){
					return rb;
				}
			}
			
			assert(false);
			return null;
		}
		
		/**
		 * Generate a single read.
		 */
		private ArrayList<Read> generateZMW(final long nextID){
			
			final int movieLength=randomLength(minMovie, maxMovie, randy);
			final float errorRate=randomRate(minErrorRate, maxErrorRate, randy);
			
			ArrayList<ReadBuilder> baseCalls=baseCallAllPasses(movieLength, errorRate, nextID);
			if(baseCalls==null){
//				System.err.println(movieLength+", "+errorRate+", "+nextID);//123
				return null;
			}
			
			final boolean missing=randy.nextFloat()<missingRate;
			if(missing){
				final int missingMod=randy.nextInt(2);
				ArrayList<ReadBuilder> temp=new ArrayList<ReadBuilder>();
				
				ReadBuilder current=null;
				for(int i=0; i<baseCalls.size(); i++){
					ReadBuilder rb=baseCalls.get(i);
					assert(rb.subreads==1);
					assert(rb.missing==0);
					assert(rb.adapters==0);
					if(current!=null && (i&1)==missingMod){
						current.add(rb);
						current.missing++;
						assert(current.subreads>1);
						assert(current.missing>0);
						temp.add(current);
						current=null;
					}else{
						if(current!=null){temp.add(current);}
						current=rb;
					}
				}
				if(current!=null){temp.add(current);}
				baseCalls=temp;
			}
			
			if(makeCCS){
				ReadBuilder median=median(baseCalls);
				if(median==null){return null;}
				baseCalls.clear();
				baseCalls.add(median);
			}else if(hiddenRate>0){
				ArrayList<ReadBuilder> temp=new ArrayList<ReadBuilder>();
				
				ReadBuilder current=null;
				for(int i=0; i<baseCalls.size(); i++){
					ReadBuilder rb=baseCalls.get(i);
					assert(rb.adapters==0);
					if(current!=null && randy.nextFloat()<hiddenRate){
						current.add(baseCallAdapter(errorRate));
						current.add(rb);
						assert(current.adapters>0);
						assert(current.subreads>1);
					}else{
						if(current!=null){temp.add(current);}
						current=rb;
						assert(current.adapters==0);
					}
				}
				if(current!=null){temp.add(current);}
				baseCalls=temp;
			}
			
			
			ArrayList<Read> reads=new ArrayList<Read>();
			for(ReadBuilder rb : baseCalls){
				Read r=rb.toRead();
				readsOutT+=r.pairCount();
				basesOutT+=r.length();
				reads.add(r);
			}
			
			idHistT[(int)((1-errorRate)*(ID_BINS-1)+0.5f)]++;
			return reads;
		}
		
		private ArrayList<ReadBuilder> baseCallAllPasses(final int movieLength, final float errorRate, long zmw){
			byte[] frag=null;
			
			for(int i=0; i<10 && frag==null; i++){//retry several times
				frag=fetchBases(ref);
			}
			
			if(frag==null){
//				System.err.println("Failed baseCallAllPasses("+movieLength+", "+errorRate+", "+zmw);//123
				return null;
			}
			
			ArrayList<ReadBuilder> list=new ArrayList<ReadBuilder>();
			int movieRemaining=movieLength;
			int moviePos=0;
			assert(frag.length>0) : frag.length;
			int start=randy.nextInt(frag.length);
			while(movieRemaining>0){
				ReadBuilder rb=baseCallOnePass(frag, errorRate, start, moviePos, movieRemaining, zmw);
				list.add(rb);
				start=0;
				int elapsed=rb.length()+adapterLen;
				moviePos+=elapsed;
				movieRemaining-=elapsed;
				AminoAcid.reverseComplementBasesInPlace(frag);
			}
			return list;
		}
		
		/** Call bases for one pass */
		private ReadBuilder baseCallOnePass(final byte[] frag, final float errorRate, final int start, final int movieStart, int movieRemaining, long zmw){
			final float mult=1/(1-errorRate);
			ByteBuilder bb=new ByteBuilder();
			int fpos=start;
			for(; fpos<frag.length && movieRemaining>0; fpos++){
				float f=randy.nextFloat();
				byte b=frag[fpos];
				if(f>=errorRate){
					bb.append(b);
					movieRemaining--;
				}else{
					f=mult*(1-f);
					if(f<insThresh){//ins
						b=AminoAcid.numberToBase[randy.nextInt(4)];
						bb.append(b);
						fpos--;
						movieRemaining--;
					}else if(f<delThresh){//del
						
					}else{//sub
						int x=AminoAcid.baseToNumber[b]+randy.nextInt(3)+1;
						b=AminoAcid.numberToBase[x&3];
						bb.append(b);
						movieRemaining--;
					}
				}
			}
			
			float passes=(fpos-start)*1.0f/frag.length;
			ReadBuilder rb=new ReadBuilder(bb, passes, movieStart, zmw);
			rb.errorRate=errorRate;
			return rb;
		}
		
		/** Call bases for an adapter sequence pass */
		private ReadBuilder baseCallAdapter(final float errorRate){
			ReadBuilder rb=baseCallOnePass(pacbioAdapter, errorRate, 0, 0, 999999, 0);
			rb.passes=0;
			rb.fullPasses=0;
			rb.subreads=0;
			rb.adapters=1;
			return rb;
		}
		
		private byte[] fetchBases(byte[] source){
			
			final int len=randomLength(minMoleculeLength, maxMoleculeLength, randy);
			int start=len>=source.length ? 0 : randy.nextInt(source.length-len);
			int stop=Tools.min(start+len, source.length);
			
//			System.err.println("fetchBases(len="+len+", slen="+source.length+", start="+start+", stop="+stop);//123
			
			for(int i=start; i<stop; i++){
				if(!AminoAcid.isFullyDefined(source[i])){
					return null;
				}
			}
			if(stop-start<1){return null;}
			byte[] frag=Arrays.copyOfRange(source, start, stop);
			if(randy.nextBoolean()){AminoAcid.reverseComplementBasesInPlace(frag);}
			return frag;
		}
		
		/** Number of reads retained by this thread */
		protected long readsOutT=0;
		/** Number of bases retained by this thread */
		protected long basesOutT=0;
		
		/** True only if this thread has completed successfully */
		boolean success=false;
		
		private final AtomicLong atomicZmwID;
		private final int readsPerList=Shared.bufferLen();
		
		/** Random number source */
		private Random randy;
		
		/** Random number source */
		private final long[] idHistT=new long[ID_BINS];
		
		/** Shared output stream */
		private final ConcurrentReadOutputStream ros;
		/** Thread ID */
		final int tid;
	}
	
	/*--------------------------------------------------------------*/
	
	
	
	/*--------------------------------------------------------------*/
	/*----------------            Fields            ----------------*/
	/*--------------------------------------------------------------*/
	
	/** Primary input file path */
	private String in1=null;

	/** Primary output file path */
	private String out1=null;

	/** Primary output file path */
	private String outIdHist=null;

	private String qfout1=null;
	
	/** Override input file extension */
	private String extin=null;
	/** Override output file extension */
	private String extout=null;
	
	/*--------------------------------------------------------------*/

	/**  */
	private int minMoleculeLength=500;
	/**  */
	private int maxMoleculeLength=10000;
	/**  */
	private int minMovie=500;
	/**  */
	private int maxMovie=40000;
	
	/**  */
	private double missingRate=0.0;
	/**  */
	private double hiddenRate=0.0;
	/**  */
	private boolean allowBothEndsBad=false;
	
	/**  */
	private float genomeGC=0.6f;
	/**  */
	private long genomeSize=10000000;
	/**  */
	private boolean addNs=true;
	
	/**  */
	private double invertedRepeatRate=0.0;
	/**  */
	private int invertedRepeatMinLength=100;
	/**  */
	private int invertedRepeatMaxLength=10000;
	
	/**  */
	private float minErrorRate=0.05f;
	/**  */
	private float maxErrorRate=0.25f;
	/**  */
	private boolean addErrors=true;
	/** One read per ZMW */
	private boolean makeCCS=false;
	
	/** */
	private long seed=-1;
	
	private long[] idHist=new long[ID_BINS]; 
	
	//These should add to 1
	private float insFraction=0.40f;
	private float delFraction=0.35f;
	private float subFraction=0.25f;

	private final float insThresh;
	private final float delThresh;
	
	/*--------------------------------------------------------------*/

	/** Number of reads processed */
	protected long readsProcessed=0;
	/** Number of bases processed */
	protected long basesProcessed=0;

	/** Number of reads retained */
	protected long readsOut=0;
	/** Number of bases retained */
	protected long basesOut=0;

	/** Quit after processing this many INPUT reads */
	private long maxReads=-1;
	
	/** Quit after generating this many OUTPUT zmws */
	private long maxZMWs=-1;
	
	/** Reference genome, max 2Gbp */
	private byte[] ref;
	
	private AtomicLong nextZmwID=new AtomicLong(0);
	
	/*--------------------------------------------------------------*/
	/*----------------         Final Fields         ----------------*/
	/*--------------------------------------------------------------*/

	/** Primary input file */
	private final FileFormat ffin1;
	
	/** Primary output file */
	private final FileFormat ffout1;
	
	/** Primary output file */
	private final FileFormat ffIdHist;
	
	/*--------------------------------------------------------------*/
	/*----------------          Constants           ----------------*/
	/*--------------------------------------------------------------*/
	
	private static final int ID_BINS=201;
	
	private static final long MAX_GENOME_LENGTH=2000000000;

	public static final byte[] pacbioAdapter="ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT".getBytes();
	public static final int adapterLen=pacbioAdapter.length;
	
	/*--------------------------------------------------------------*/
	/*----------------        Common Fields         ----------------*/
	/*--------------------------------------------------------------*/
	
	/** Print status messages to this output stream */
	private PrintStream outstream=System.err;
	/** Print verbose messages */
	public static boolean verbose=false;
	/** True if an error was encountered */
	public boolean errorState=false;
	/** Overwrite existing output files */
	private boolean overwrite=false;
	/** Append to existing output files */
	private boolean append=false;
	
}