diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/KmerLimit2.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/KmerLimit2.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,971 @@
+package sketch;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Locale;
+import java.util.Random;
+
+import dna.AminoAcid;
+import fileIO.ByteFile;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.ReadStats;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.ConcurrentReadInputStream;
+import stream.ConcurrentReadOutputStream;
+import stream.FASTQ;
+import stream.FastaReadInputStream;
+import stream.Read;
+import structures.IntMap;
+import structures.ListNum;
+
+/**
+ * 
+ * @author Brian Bushnell
+ * @date July 30, 2018
+ *
+ */
+public class KmerLimit2 extends SketchObject {
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Initialization        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/**
+	 * Code entrance from the command line.
+	 * @param args Command line arguments
+	 */
+	public static void main(String[] args){
+		//Start a timer immediately upon code entrance.
+		Timer t=new Timer();
+		
+		//Create an instance of this class
+		KmerLimit2 x=new KmerLimit2(args);
+		
+		//Run the object
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	/**
+	 * Constructor.
+	 * @param args Command line arguments
+	 */
+	public KmerLimit2(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		boolean setInterleaved=false; //Whether interleaved was explicitly set.
+		
+		//Set shared static variables
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		SketchObject.setKeyFraction(0.1);
+		defaultParams.minEntropy=0;
+		defaultParams.minProb=0.2f;
+		
+		boolean setHeapSize=false;
+		int heapSize_=8091;
+		long targetKmers_=0;
+		int k_=32;
+		int minCount_=1;
+		
+		//Create a parser object
+		Parser parser=new Parser();
+		parser.overwrite=true;
+		
+		//Parse each argument
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			
+			//Break arguments into their constituent parts, in the form of "a=b"
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+			if(b!=null && b.equalsIgnoreCase("null")){b=null;}
+			
+			if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+			}else if(a.equals("ordered")){
+				ordered=Parse.parseBoolean(b);
+			}else if(a.equals("size") || a.equals("heapsize")){
+				heapSize_=Parse.parseIntKMG(b);
+				setHeapSize=true;
+			}else if(a.equals("kmers") || a.equals("target") || a.equals("limit")){
+				targetKmers_=Parse.parseKMG(b);
+			}else if(a.equals("mincount")){
+				minCount_=Parse.parseIntKMG(b);
+			}else if(a.equals("maxexpandedlength") || a.equals("maxlength") || a.equals("maxlen")){
+				maxExpandedLength=Parse.parseIntKMG(b);
+			}else if(a.equals("seed")){
+				seed=Parse.parseKMG(b);
+			}else if(a.equals("trials")){
+				trials=Parse.parseIntKMG(b);
+			}else if(parseSketchFlags(arg, a, b)){
+				parser.parse(arg, a, b);
+			}else if(defaultParams.parse(arg, a, b)){
+				parser.parse(arg, a, b);
+			}else if(a.equals("parse_flag_goes_here")){
+				long fake_variable=Parse.parseKMG(b);
+				//Set a variable here
+			}else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
+				//do nothing
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+			}
+		}
+		
+		if(!setHeapSize && minCount_>1){heapSize_=32000;}
+		heapSize=heapSize_;
+		targetKmers=targetKmers_;
+		k=k_;
+		minCount=minCount_;
+		assert(targetKmers>0) : "Must set a kmer limit.";
+		assert(heapSize>0) : "Heap size must be positive.";
+		assert(k>0 && k<=32) : "0<k<33; k="+k;
+		postParse();
+		
+//		if(minCount>1){
+//			Shared.setBufferLen(800);
+//		}
+		
+		{//Process parser fields
+			Parser.processQuality();
+			
+			maxReads=parser.maxReads;
+			
+			overwrite=ReadStats.overwrite=parser.overwrite;
+			append=ReadStats.append=parser.append;
+			setInterleaved=parser.setInterleaved;
+			
+			in1=parser.in1;
+			in2=parser.in2;
+			qfin1=parser.qfin1;
+			qfin2=parser.qfin2;
+
+			out1=parser.out1;
+			out2=parser.out2;
+			qfout1=parser.qfout1;
+			qfout2=parser.qfout2;
+			
+			extin=parser.extin;
+			extout=parser.extout;
+		}
+		
+		//Do input file # replacement
+		if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
+			in2=in1.replace("#", "2");
+			in1=in1.replace("#", "1");
+		}
+		
+		//Do output file # replacement
+		if(out1!=null && out2==null && out1.indexOf('#')>-1){
+			out2=out1.replace("#", "2");
+			out1=out1.replace("#", "1");
+		}
+		
+		//Adjust interleaved detection based on the number of input files
+		if(in2!=null){
+			if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");}
+			FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
+		}
+		
+		assert(FastaReadInputStream.settingsOK());
+		
+		//Ensure there is an input file
+		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
+		
+		//Adjust the number of threads for input file reading
+		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
+			ByteFile.FORCE_MODE_BF2=true;
+		}
+		
+		//Ensure out2 is not set without out1
+		if(out1==null && out2!=null){throw new RuntimeException("Error - cannot define out2 without defining out1.");}
+		
+		//Adjust interleaved settings based on number of output files
+		if(!setInterleaved){
+			assert(in1!=null && (out1!=null || out2==null)) : "\nin1="+in1+"\nin2="+in2+"\nout1="+out1+"\nout2="+out2+"\n";
+			if(in2!=null){ //If there are 2 input streams.
+				FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
+				outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
+			}else{ //There is one input stream.
+				if(out2!=null){
+					FASTQ.FORCE_INTERLEAVED=true;
+					FASTQ.TEST_INTERLEAVED=false;
+					outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
+				}
+			}
+		}
+		
+		//Ensure output files can be written
+		if(!Tools.testOutputFiles(overwrite, append, false, out1, out2)){
+			outstream.println((out1==null)+", "+(out2==null)+", "+out1+", "+out2);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+", "+out2+"\n");
+		}
+		
+		//Ensure input files can be read
+		if(!Tools.testInputFiles(false, true, in1, in2)){
+			throw new RuntimeException("\nCan't read some input files.\n");  
+		}
+		
+		//Ensure that no file was specified multiple times
+		if(!Tools.testForDuplicateFiles(true, in1, in2, out1, out2)){
+			throw new RuntimeException("\nSome file names were specified multiple times.\n");
+		}
+		
+		//Create output FileFormat objects
+		ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, ordered);
+		ffout2=FileFormat.testOutput(out2, FileFormat.FASTQ, extout, true, overwrite, append, ordered);
+
+		//Create input FileFormat objects
+		ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
+		ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true);
+
+		minProb=defaultParams.minProb;
+		minQual=defaultParams.minQual;
+		
+		shift=2*k;
+		shift2=shift-2;
+		mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32
+		sharedHeap=new SketchHeap(heapSize, 0, true);
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Outer Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+
+	/** Create read streams and process all data */
+	void process(Timer t){
+		
+		//Turn off read validation in the input threads to increase speed
+		final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
+		Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
+		
+//		//Optionally create a read output stream
+//		final ConcurrentReadOutputStream ros;
+//		if(ffout1!=null){
+//			//Select output buffer size based on whether it needs to be ordered
+//			final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8);
+//			
+//			//Notify user of output mode
+//			if(cris.paired() && out2==null && (in1!=null && !ffin1.samOrBam() && !ffout1.samOrBam())){
+//				outstream.println("Writing interleaved.");
+//			}
+//			
+//			ros=ConcurrentReadOutputStream.getStream(ffout1, ffout2, qfout1, qfout2, buff, null, false);
+//			ros.start(); //Start the stream
+//		}else{ros=null;}
+		
+		//Reset counters
+		readsProcessed=readsOut=0;
+		basesProcessed=basesOut=0;
+		
+		//Process the reads in separate threads
+		spawnThreads0();
+		
+//		if(verbose){outstream.println("Finished; closing streams.");}
+		
+		//Reset read validation
+		Read.VALIDATE_IN_CONSTRUCTOR=vic;
+
+		
+		Sketch sketch=new Sketch(sharedHeap, true, true, null);
+		sketch=capLengthAtCountSum(sketch, maxExpandedLength);
+		final long reads=Tools.max(1, sketch.genomeSequences);
+		final long targetReads=calcTargetReads(sketch, targetKmers, minCount, trials, seed);
+		final double targetRate=Tools.min(1, targetReads/(double)reads);
+		final String targetRateS=String.format(Locale.ROOT, "%.4f%%",targetRate*100);
+		
+		//Report timing and results
+		t.stop();
+		outstream.println("Finished counting kmers.");
+		outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
+		
+		String kstring0=Tools.padKM(sketch.genomeSizeEstimate(minCount), 8);
+		String rstring0=Tools.padKM(targetReads, 8);
+		outstream.println("Unique Kmers:       "+kstring0);
+		outstream.println("Target Reads:       "+rstring0+"\t"+targetRateS);
+		
+//		outstream.println("Reads:        \t"+reads);
+//		outstream.println("Unique Kmers: \t"+sketch.genomeSizeEstimate(minCount));
+//		outstream.println("Target Reads: \t"+targetReads);
+//		outstream.println("Sample Rate:  \t"+targetRateS);
+//		outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
+		
+		t.start();
+		outstream.println("\nSubsampling reads.");
+		
+//		String kstring=Tools.padKM(sharedHeap.genomeSizeEstimate(minCount), 8);
+//		outstream.println("Unique Kmers Out:   "+kstring);
+		
+		
+//		ArrayList<String> args=new ArrayList<String>();
+//		args.add("in="+in1);
+//		if(in2!=null){args.add("in2="+in2);}
+//		args.add("out="+out1);
+//		if(out2!=null){args.add("out2="+out2);}
+//		args.add("ordered="+ordered);
+//		args.add("ow="+(overwrite ? "t" : "f"));
+//		if(targetRate<1){args.add("samplerate="+targetRateS);}
+//		args.add("loglogout");
+//		args.add("loglogk="+k);
+//		args.add("loglogminprob="+minProb);
+//		BBDukF.main(args.toArray(new String[0]));
+		
+//		Sketch sk=new Sketch(sharedHeap, true, true, null);
+//		outstream.println(sk.genomeSizeEstimate());
+		spawnThreads2(targetRate);
+		t.stop();
+		outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
+		
+		outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
+		String kstring=Tools.padKM(sharedHeap.genomeSizeEstimate(minCount), 8);
+		outstream.println("Unique Kmers Out:   "+kstring);
+		
+		//Throw an exception of there was an error in a thread
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+	}
+	
+	/** Spawn process threads */
+	private void spawnThreads0(){
+		
+		//Create a read input stream
+		final ConcurrentReadInputStream cris;
+		{
+			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, qfin1, qfin2);
+			cris.start(); //Start the stream
+			if(verbose){outstream.println("Started cris");}
+		}
+		paired=cris.paired();
+		if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));}
+		
+		//Determine how many threads may be used
+		final int threads=Tools.min(10, Shared.threads());
+		
+		//Fill a list with ProcessThreads
+		ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
+		for(int i=0; i<threads; i++){
+			alpt.add(new ProcessThread(cris, null, i, heapSize));
+		}
+		
+		//Start the threads
+		for(ProcessThread pt : alpt){
+			pt.start();
+		}
+		
+		//Wait for completion of all threads
+		boolean success=true;
+		for(ProcessThread pt : alpt){
+			
+			//Wait until this thread has terminated
+			while(pt.getState()!=Thread.State.TERMINATED){
+				try {
+					//Attempt a join operation
+					pt.join();
+				} catch (InterruptedException e) {
+					//Potentially handle this, if it is expected to occur
+					e.printStackTrace();
+				}
+			}
+			
+			//Accumulate per-thread statistics
+			readsProcessed+=pt.readsProcessedT;
+			basesProcessed+=pt.basesProcessedT;
+			readsOut+=pt.readsOutT;
+			basesOut+=pt.basesOutT;
+			success&=pt.success;
+		}
+		
+		//Track whether any threads failed
+		if(!success){errorState=true;}
+		
+		//Do anything necessary after processing
+		
+		//Close the read streams
+		errorState|=ReadWrite.closeStreams(cris);
+		
+	}
+	
+	/** Spawn process threads */
+	private void spawnThreads2(double rate){
+		
+		//Create a read input stream
+		final ConcurrentReadInputStream cris;
+		{
+			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, qfin1, qfin2);
+			cris.setSampleRate((float)rate, seed);
+			cris.start(); //Start the stream
+			if(verbose){outstream.println("Started cris");}
+		}
+//		paired=cris.paired();
+//		if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));}
+		
+		//Optionally create a read output stream
+		final ConcurrentReadOutputStream ros;
+		if(ffout1!=null){
+			//Select output buffer size based on whether it needs to be ordered
+			final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8);
+			
+			//Notify user of output mode
+			if(cris.paired() && out2==null && (in1!=null && !ffin1.samOrBam() && !ffout1.samOrBam())){
+				outstream.println("Writing interleaved.");
+			}
+			
+			ros=ConcurrentReadOutputStream.getStream(ffout1, ffout2, qfout1, qfout2, buff, null, false);
+			ros.start(); //Start the stream
+		}else{ros=null;}
+		
+		//Determine how many threads may be used
+		final int threads=Tools.min(10, Shared.threads());
+		
+		sharedHeap.clear();			
+//		readsProcessed=0;
+//		basesProcessed=0;
+		readsOut=0;
+		basesOut=0;
+		
+		//Fill a list with ProcessThreads
+		ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
+		for(int i=0; i<threads; i++){
+			alpt.add(new ProcessThread(cris, ros, i, heapSize));
+		}
+		
+		//Start the threads
+		for(ProcessThread pt : alpt){
+			pt.start();
+		}
+		
+		//Wait for completion of all threads
+		boolean success=true;
+		for(ProcessThread pt : alpt){
+			
+			//Wait until this thread has terminated
+			while(pt.getState()!=Thread.State.TERMINATED){
+				try {
+					//Attempt a join operation
+					pt.join();
+				} catch (InterruptedException e) {
+					//Potentially handle this, if it is expected to occur
+					e.printStackTrace();
+				}
+			}
+			
+			//Accumulate per-thread statistics
+//			readsProcessed+=pt.readsProcessedT;
+//			basesProcessed+=pt.basesProcessedT;
+			readsOut+=pt.readsOutT;
+			basesOut+=pt.basesOutT;
+			success&=pt.success;
+		}
+		
+		//Track whether any threads failed
+		if(!success){errorState=true;}
+		
+		//Do anything necessary after processing
+		
+		//Write anything that was accumulated by ReadStats
+		errorState|=ReadStats.writeAll();
+		//Close the read streams
+		errorState|=ReadWrite.closeStreams(cris, ros);
+		
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Inner Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+
+	public static Sketch capLengthAtCountSum(Sketch sketch0, int max) {
+		int len=0;
+		long sum=0;
+		for(; len<sketch0.keyCounts.length; len++){
+			sum=sum+sketch0.keyCounts[len];
+			if(sum>max){break;}
+		}
+		if(len>=sketch0.length()){return sketch0;}
+		
+		long[] keys=Arrays.copyOf(sketch0.keys, len);
+		int[] counts=Arrays.copyOf(sketch0.keyCounts, len);
+		
+//		long[] array_, int[] counts_, int taxID_, long imgID_, long gSizeBases_, long gSizeKmers_, long gSequences_, double probCorrect_,
+//		String taxName_, String name0_, String fname_, ArrayList<String> meta_
+		
+		Sketch sk=new Sketch(keys, counts, null, null, null, -1, -1, 
+				sketch0.genomeSizeBases, sketch0.genomeSizeKmers, sketch0.genomeSequences, sketch0.probCorrect,
+				null, null, null, null);
+		
+		return sk;
+	}
+	
+	public static long calcTargetReads(Sketch sketch, long targetKmers, int minCount, int trials, long seed){
+		final int[] counts0=sketch.keyCounts;
+		final int[] counts=Arrays.copyOf(counts0, counts0.length);
+		final long size=sketch.genomeSizeEstimate(minCount);
+		final long reads=sketch.genomeSequences;
+		final double targetKmerFraction=targetKmers/(double)size;
+		if(targetKmerFraction>=1){return reads;}
+		
+		final int targetKeys=(int)(targetKmerFraction*counts.length);
+		final long countSum=Tools.sum(counts0);
+		assert(countSum<Shared.MAX_ARRAY_LEN) : countSum;
+//		System.err.println("countsum: "+countSum);
+		
+		final IntMap map=new IntMap(0, counts0.length);
+		final int[] expanded=new int[(int)countSum];
+		
+		long roundSum=0;
+		final Random randy=Shared.threadLocalRandom(seed);
+		for(int i=0; i<trials; i++){
+			Tools.fill(counts, counts0);
+//			long rounds=reduceRounds(counts0, counts, minCount, targetKeys, randy);
+			long rounds=reduceRoundsIM(counts0, expanded, minCount, targetKeys, randy, map);
+			roundSum+=rounds;
+		}
+		double avgRounds=roundSum/(double)trials;
+//		System.err.println("avgRounds: "+avgRounds);
+		double targetCountFraction=1-(avgRounds/countSum);
+//		System.err.println("targetFraction: "+targetCountFraction);
+		return (long)(targetCountFraction*reads);
+	}
+	
+//	public static int reduceRoundsOld(final int[] counts, final int minCount, final int targetKeys, final Random randy){
+//		assert(minCount>=0) : minCount;
+//		int rounds=0;
+//		int valid=0;
+//		for(int x : counts){
+//			if(x>=minCount){valid++;}
+//		}
+//		
+//		int len=counts.length;
+//		System.err.println(targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Tools.sum(counts)+", "+Arrays.toString(counts));
+//		for(; valid>targetKeys; rounds++){
+//			int pos=randy.nextInt(len);
+////			assert(counts[pos]>0) : pos+"/"+len+": "+targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Arrays.toString(counts);
+//			if(counts[pos]==minCount){valid--;}
+//			counts[pos]--;
+//			if(counts[pos]==0){
+//				len--;//shrink the array
+//				System.err.println("len="+len+", counts[len]="+counts[len]);
+//				System.err.println("pos="+pos+", counts[pos]="+counts[pos]);
+//				counts[pos]=counts[len];//move the last element to the empty slot 
+//				counts[len]=0;
+//				if(pos!=len && len>0){
+//					assert(counts[pos]>0) : pos+"/"+len+": "+targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Arrays.toString(counts);
+//				}
+//			}
+//			System.err.println(len+", "+pos+": "+Arrays.toString(counts));
+//		}
+//		
+//		System.err.println(targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Tools.sum(counts));
+//		
+//		return rounds;
+//	}
+	
+	//This can be done faster with bins.
+	//Each bin contains all kmers with count x.  When a bin is hit, one kmer moves to the next bin lower.
+	//Alternately, expand the array into one physical kmer per count.  Store the current counts in an IntMap. Remove key each time.
+	public static long reduceRounds(final int[] counts0, final int[] counts, final int minCount, final int targetKeys, final Random randy){
+		assert(minCount>=0) : minCount;
+		long rounds=0;
+		int valid=0;
+		for(int x : counts){
+			if(x>=minCount){valid++;}
+		}
+		
+		int len=counts.length;
+		final long sum0=Tools.sum(counts);
+		long sum=sum0;
+//		System.err.println(targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Tools.sum(counts)+", "+Arrays.toString(counts));
+		for(; valid>targetKeys; rounds++){
+			long posNum=(Long.MAX_VALUE&randy.nextLong())%sum;
+			long sum2=0;
+			int pos=0;
+			
+			for(int i=0; i<counts.length; i++){
+				int x=counts[i];
+				if(x>0){
+					sum2+=x;
+					if(sum2>=posNum){
+						pos=i;
+						break;
+					}
+				}
+			}
+			
+//			for(int i=0; i<counts0.length; i++){
+//				int x=counts0[i];
+//				if(x>0){
+//					sum2+=x;
+//					if(sum2>=posNum){
+//						pos=i;
+//						break;
+//					}
+//				}
+//			}
+			
+			sum--;
+			
+			assert(counts[pos]>0) : pos+"/"+len+": "+targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Arrays.toString(counts);
+			if(counts[pos]==minCount){valid--;}
+			counts[pos]--;
+			if(counts[pos]==0){
+				len--;//shrink the array
+			}
+//			System.err.println(len+", "+pos+": "+Arrays.toString(counts));
+		}
+		
+//		System.err.println(targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Tools.sum(counts));
+		
+		return rounds;
+	}
+	
+	//This can be done faster with bins.
+	//Each bin contains all kmers with count x.  When a bin is hit, one kmer moves to the next bin lower.
+	//Alternately, expand the array into one physical kmer per count.  Store the current counts in an IntMap. Remove key each time.
+	public static long reduceRoundsIM(final int[] counts0, final int[] expanded, final int minCount, final int targetKeys, final Random randy, final IntMap map){
+		assert(minCount>=0) : minCount;
+		long rounds=0;
+		int valid=0;
+		map.clear();
+		for(int i=0, k=0; i<counts0.length; i++){
+			int x=counts0[i];
+//			counts[i]=counts0[i];
+			if(x>=minCount){valid++;}
+			map.put(i, x);
+			for(int j=0; j<x; j++, k++){
+				expanded[k]=i;
+			}
+		}
+		assert(expanded.length==Tools.sum(counts0));
+		
+		int len=expanded.length;
+//		System.err.println(targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Tools.sum(counts)+", "+Arrays.toString(counts));
+		for(; valid>targetKeys; rounds++){
+			final int pos=randy.nextInt(len);
+			final int key=expanded[pos];
+			final int x=map.get(key);
+			assert(x>0);
+			
+			
+			if(x==minCount){valid--;}
+			map.put(key, x-1);
+			
+			len--;//shrink the array
+			//				System.err.println("len="+len+", counts[len]="+counts[len]);
+			//				System.err.println("pos="+pos+", counts[pos]="+counts[pos]);
+			expanded[pos]=expanded[len];//move the last element to the empty slot 
+			expanded[len]=0;
+			
+//			System.err.println(len+", "+pos+": "+Arrays.toString(counts));
+		}
+		
+//		System.err.println(targetKeys+", "+counts.length+", "+valid+", "+len+", "+rounds+", "+Tools.sum(counts));
+		
+		return rounds;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Inner Classes        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** This class is static to prevent accidental writing to shared variables.
+	 * It is safe to remove the static modifier. */
+	private class ProcessThread extends Thread {
+		
+		//Constructor
+		ProcessThread(final ConcurrentReadInputStream cris_, final ConcurrentReadOutputStream ros_, final int tid_, final int size){
+			cris=cris_;
+			ros=ros_;
+			tid=tid_;
+			localHeap=new SketchHeap(size, 0, true);
+		}
+		
+		//Called by start()
+		@Override
+		public void run(){
+			//Do anything necessary prior to processing
+			
+			//Process the reads
+			processInner();
+			
+			//Do anything necessary after processing
+			dumpHeap();
+			
+			//Indicate successful exit status
+			success=true;
+		}
+		
+		/** Iterate through the reads */
+		void processInner(){
+			
+			//Grab the first ListNum of reads
+			ListNum<Read> ln=cris.nextList();
+			//Grab the actual read list from the ListNum
+			ArrayList<Read> reads=(ln!=null ? ln.list : null);
+
+			//Check to ensure pairing is as expected
+			if(reads!=null && !reads.isEmpty()){
+				Read r=reads.get(0);
+//				assert(ffin1.samOrBam() || (r.mate!=null)==cris.paired()); //Disabled due to non-static access
+			}
+
+			//As long as there is a nonempty read list...
+			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
+//				if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
+
+				//Loop through each read in the list
+				for(int idx=0; idx<reads.size(); idx++){
+					final Read r1=reads.get(idx);
+					final Read r2=r1.mate;
+					
+					//Validate reads in worker threads
+					if(!r1.validated()){r1.validate(true);}
+					if(r2!=null && !r2.validated()){r2.validate(true);}
+
+					//Track the initial length for statistics
+					final int initialLength1=r1.length();
+					final int initialLength2=r1.mateLength();
+
+					//Increment counters
+					readsProcessedT+=r1.pairCount();
+					basesProcessedT+=initialLength1+initialLength2;
+					
+					//Reads are processed in this block.
+					processReadPair(r1, r2);
+				}
+				
+				if(ros!=null){
+					for(Read r1 : reads){
+						readsOutT+=r1.pairCount();
+						basesOutT+=r1.pairLength();
+					}
+
+					//Output reads to the output stream
+					if(ros!=null){ros.add(reads, ln.id);}
+				}
+
+				//Notify the input stream that the list was used
+				cris.returnList(ln);
+//				if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access
+
+				//Fetch a new list
+				ln=cris.nextList();
+				reads=(ln!=null ? ln.list : null);
+			}
+
+			//Notify the input stream that the final list was used
+			if(ln!=null){
+				if(ln.list!=null){ln.list.clear();}
+				cris.returnList(ln.id, true);
+			}
+		}
+		
+		/**
+		 * Process a read or a read pair.
+		 * @param r1 Read 1
+		 * @param r2 Read 2 (may be null)
+		 */
+		void processReadPair(final Read r1, final Read r2){
+			processReadNucleotide(r1);
+			if(r2!=null){processReadNucleotide(r2);}
+		}
+		
+		void processReadNucleotide(final Read r){
+			final byte[] bases=r.bases;
+			final byte[] quals=r.quality;
+			long kmer=0;
+			long rkmer=0;
+			int len=0;
+			assert(!r.aminoacid());
+			
+			final long min=minHashValue;
+			localHeap.genomeSizeBases+=r.length();
+			localHeap.genomeSequences++;
+			
+			if(quals==null || (minProb<=0 && minQual<2)){
+				for(int i=0; i<bases.length; i++){
+					byte b=bases[i];
+					long x=AminoAcid.baseToNumber[b];
+					long x2=AminoAcid.baseToComplementNumber[b];
+					
+					kmer=((kmer<<2)|x)&mask;
+					rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
+					
+					if(x<0){len=0; rkmer=0;}else{len++;}
+					if(len>=k){
+						localHeap.genomeSizeKmers++;
+						final long hashcode=hash(kmer, rkmer);
+						if(hashcode>min){localHeap.add(hashcode);}
+					}
+				}
+			}else{
+				float prob=1;
+				for(int i=0; i<bases.length; i++){
+					final byte b=bases[i];
+					final long x=AminoAcid.baseToNumber[b];
+					final long x2=AminoAcid.baseToComplementNumber[b];
+					
+					{//Quality-related stuff
+						final byte q=quals[i];
+						assert(q>=0) : Arrays.toString(quals)+"\n"+minProb+", "+minQual;
+						prob=prob*align2.QualityTools.PROB_CORRECT[q];
+						if(len>k){
+							byte oldq=quals[i-k];
+							prob=prob*align2.QualityTools.PROB_CORRECT_INVERSE[oldq];
+						}
+						if(x<0 || q<minQual){
+							len=0;
+							kmer=rkmer=0;
+							prob=1;
+						}else{
+							len++;
+						}
+					}
+					
+					kmer=((kmer<<2)|x)&mask;
+					rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
+					
+					if(len>=k && prob>=minProb){
+						localHeap.genomeSizeKmers++;
+						localHeap.probSum+=prob;
+						final long hashcode=hash(kmer, rkmer);
+						if(hashcode>min){localHeap.checkAndAdd(hashcode);}
+					}
+				}
+			}
+		}
+		
+		private void dumpHeap(){
+			synchronized(sharedHeap){
+				sharedHeap.add(localHeap);
+			}
+		}
+
+		/** Number of reads processed by this thread */
+		protected long readsProcessedT=0;
+		/** Number of bases processed by this thread */
+		protected long basesProcessedT=0;
+		
+		/** Number of reads retained by this thread */
+		protected long readsOutT=0;
+		/** Number of bases retained by this thread */
+		protected long basesOutT=0;
+		
+		/** True only if this thread has completed successfully */
+		boolean success=false;
+
+		/** Shared input stream */
+		private final ConcurrentReadInputStream cris;
+		/** Shared output stream */
+		private final ConcurrentReadOutputStream ros;
+		/** Thread ID */
+		final int tid;
+		
+		final SketchHeap localHeap;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------            Fields            ----------------*/
+	/*--------------------------------------------------------------*/
+
+	/** Primary input file path */
+	private String in1=null;
+	/** Secondary input file path */
+	private String in2=null;
+	
+	private String qfin1=null;
+	private String qfin2=null;
+
+	/** Primary output file path */
+	private String out1=null;
+	/** Secondary output file path */
+	private String out2=null;
+
+	private String qfout1=null;
+	private String qfout2=null;
+	
+	/** Override input file extension */
+	private String extin=null;
+	/** Override output file extension */
+	private String extout=null;
+	
+	/*--------------------------------------------------------------*/
+
+	/** Number of reads processed */
+	protected long readsProcessed=0;
+	/** Number of bases processed */
+	protected long basesProcessed=0;
+
+	/** Number of reads retained */
+	protected long readsOut=0;
+	/** Number of bases retained */
+	protected long basesOut=0;
+
+	/** Quit after processing this many input reads; -1 means no limit */
+	private long maxReads=-1;
+	
+	private boolean paired=false;
+	private int trials=25;
+	private long seed=-1;
+	private int maxExpandedLength=50000000;
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Final Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+
+	/** Primary input file */
+	private final FileFormat ffin1;
+	/** Secondary input file */
+	private final FileFormat ffin2;
+	
+	/** Primary output file */
+	private final FileFormat ffout1;
+	/** Secondary output file */
+	private final FileFormat ffout2;
+	
+	private final SketchHeap sharedHeap;
+	private final int heapSize;
+	private final long targetKmers;
+	private final int minCount;
+
+	final int shift;
+	final int shift2;
+	final long mask;
+	
+	final float minProb;
+	final byte minQual;
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Common Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Print status messages to this output stream */
+	private PrintStream outstream=System.err;
+	/** Print verbose messages */
+	public static boolean verbose=false;
+	/** True if an error was encountered */
+	public boolean errorState=false;
+	/** Overwrite existing output files */
+	private boolean overwrite=true;
+	/** Append to existing output files */
+	private boolean append=false;
+	/** Reads are output in input order (not enabled) */
+	private boolean ordered=true;
+	
+}