view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/ReadCounter.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package bloom;

import java.io.File;
import java.lang.Thread.State;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Locale;
import dna.AminoAcid;
import fileIO.FileFormat;
import fileIO.ReadWrite;
import jgi.BBMerge;
import kmer.KmerTableSet;
import shared.Parse;
import shared.Parser;
import shared.PreParser;
import shared.Shared;
import shared.Timer;
import shared.Tools;
import sketch.SketchObject;
import stream.ConcurrentReadInputStream;
import stream.FastaReadInputStream;
import stream.Read;
import structures.ListNum;
import structures.LongList;
import ukmer.Kmer;

/**
 * @author Brian Bushnell
 * @date Jul 5, 2012
 *
 */
public class ReadCounter extends KmerCountAbstract {
	
	public static void main(String[] args){
		
		{//Preparse block for help, config files, and outstream
			PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false);
			args=pp.args;
//			outstream=pp.outstream;
		}
		
		Timer t=new Timer();
		
		String fname1=args[0];
		String fname2=(args.length>1 ? args[1] : null);
		int k=14;
		int cbits=16;
		int matrixbits=-1;
		int hashes=1;
		
		for(int i=2; i<args.length; i++){
			final String arg=args[i];
			final String[] split=arg.split("=");
			String a=split[0].toLowerCase();
			String b=split.length>1 ? split[1] : null;
			
			if(a.equals("k") || a.equals("kmer")){
				k=Integer.parseInt(b);
			}else if(a.startsWith("cbits") || a.startsWith("cellbits")){
				cbits=Integer.parseInt(b);
			}else if(a.startsWith("reads") || a.startsWith("maxreads")){
				maxReads=Parse.parseKMG(b);
			}else if(a.startsWith("matrixbits")){
				matrixbits=Integer.parseInt(b);
			}else if(a.startsWith("hashes")){
				hashes=Integer.parseInt(b);
			}else if(a.equals("canonical")){
				CANONICAL=Parse.parseBoolean(b);
			}else{
				throw new RuntimeException("Unknown parameter "+args[i]);
			}
		}
		
		{//Process parser fields
			Parser.processQuality();
		}
		
		int kbits=Tools.min(2*k, 62);
		if(matrixbits<0){
			matrixbits=kbits;
		}
		matrixbits=Tools.min(kbits, matrixbits);
		
		if(fileIO.FileFormat.hasFastaExtension(fname1)){
			assert(!FastaReadInputStream.SPLIT_READS);
			FastaReadInputStream.MIN_READ_LEN=k;
		}
		
		KCountArray counts=KCountArray.makeNew(1L<<matrixbits, cbits, hashes);
		ReadCounter rc=new ReadCounter(k, true, false, false, false);
		try {
			rc.count(fname1, fname2, counts);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		counts.shutdown();
		
//		verbose=true;
		
		t.stop();
		System.out.println("Finished counting; time = "+t);
		
		rc.printStatistics(counts);
		
	}
	
	/** Defaults for nucleotides. */
	public ReadCounter(final int k_){this(k_, true, false, false, false);}
	
	public ReadCounter(final int k_, final boolean rcomp_, 
			final boolean ecco_, final boolean merge_, final boolean amino_){
		k=k_;
		rcomp=rcomp_;
		ecco=ecco_;
		merge=merge_;
		amino=amino_;

		final int bitsPerChar=(amino ? AminoAcid.AMINO_SHIFT : 2);
		aminoShift=AminoAcid.AMINO_SHIFT;
		shift=bitsPerChar*k;
		shift2=shift-bitsPerChar;
		mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32
		
		assert(!amino || k*bitsPerChar<64);
		assert(!amino || !rcomp);
		assert(k>0);
	}

	public void printStatistics(KCountArray counts){
		long[] freq=counts.transformToFrequency();

//		System.out.println(count+"\n");
//		System.out.println(Arrays.toString(freq)+"\n");
		
		long sum=sum(freq);
		System.out.println("Kmer fraction:");
		int lim1=8, lim2=16;
		for(int i=0; i<lim1; i++){
			String prefix=i+"";
			while(prefix.length()<8){prefix=prefix+" ";}
			System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%%   ",(100l*freq[i]/(double)sum))+"\t"+freq[i]);
		}
		while(lim1<=freq.length){
			int x=0;
			for(int i=lim1; i<lim2; i++){
				x+=freq[i];
			}
			String prefix=lim1+"-"+(lim2-1);
			if(lim2>=freq.length){prefix=lim1+"+";}
			while(prefix.length()<8){prefix=prefix+" ";}
			System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%%   ",(100l*x/(double)sum))+"\t"+x);
			lim1*=2;
			lim2=min(lim2*2, freq.length);
		}
		
		long sum2=sum-freq[0];
		long x=freq[1];
		System.out.println();
		System.out.println("Keys Counted:  \t         \t"+keysCounted);
		System.out.println("Unique:        \t         \t"+sum2);
		System.out.println("Avg Sites/Key: \t         \t"+String.format(Locale.ROOT, "%.3f    ",(keysCounted*1d/sum2)));
		System.out.println();
		System.out.println("Singleton:     \t"+String.format(Locale.ROOT, "%.3f%%   ",(100l*x/(double)sum2))+"\t"+x);
		x=sum2-x;
		System.out.println("Useful:        \t"+String.format(Locale.ROOT, "%.3f%%   ",(100l*x/(double)sum2))+"\t"+x);
	}
	
	public KCountArray makeKca(String fname1, String fname2, Iterable<String> extraFiles, int cbits){
		return makeKca(fname1, fname2, extraFiles, cbits, Tools.min(2*k, 35), 1, minQuality, 
				maxReads, 1, 1, 1, 2);
	}
	
	public KCountArray makeKca(String fname1, String fname2, Iterable<String> extraFiles,
			int cbits, int matrixbits, int hashes, int minqual, long maxreads){
		assert(matrixbits<63);
		return makeKca(fname1, fname2, extraFiles, cbits, matrixbits, hashes, minqual, 
				maxreads, 1, 1, 1, 2);
	}
	
	public KCountArray makeKca(String fname1, String fname2, Iterable<String> extraFiles,
			int cbits, int matrixbits, int hashes, int minqual,
			long maxreads, int passes, int stepsize, int thresh1, int thresh2){
		assert(matrixbits<63);
		return makeKca(fname1, fname2, extraFiles,
				cbits, 1L<<matrixbits, hashes, minqual,
				maxreads, passes, stepsize, thresh1, thresh2, null, 0);
	}
	
	public KCountArray makeKca(String fname1, String fname2, Iterable<String> extraFiles,
			int cbits, long cells, int hashes, int minqual,
			long maxreads, int passes, int stepsize, int thresh1, int thresh2){
		return makeKca(fname1, fname2, extraFiles,
				cbits, cells, hashes, minqual,
				maxreads, passes, stepsize, thresh1, thresh2, null, 0);
	}
	
	public KCountArray makeKca_als(ArrayList<String> fname1, ArrayList<String> fname2, Iterable<String> extraFiles,
			int cbits, long cells, int hashes, int minqual,
			long maxreads, int passes, int stepsize, int thresh1, int thresh2, 
			KCountArray prefilter, int prefilterLimit_){
		String a=null, b=null;
		ArrayList<String> list=new ArrayList<String>();
		if(fname1!=null){
			for(int i=0; i<fname1.size(); i++){
				if(i==0){a=fname1.get(i);}
				else{list.add(fname1.get(i));}
			}
		}
		if(fname2!=null){
			for(int i=0; i<fname2.size(); i++){
				if(i==0){b=fname2.get(i);}
				else{list.add(fname2.get(i));}
			}
		}
		if(extraFiles!=null){
			for(String s : extraFiles){
				list.add(s);
			}
		}
		return makeKca(a, b, list.isEmpty() ? null : list, cbits, cells, hashes, minqual, 
				maxreads, passes, stepsize, thresh1, thresh2,
				prefilter, prefilterLimit_);
	}
	
	public KCountArray makeKca(String fname1, String fname2, Iterable<String> extraFiles,
			int cbits, long cells, int hashes, int minqual,
			long maxreads, int passes, int stepsize, int thresh1, int thresh2,
			KCountArray prefilter, int prefilterLimit_){
//		verbose=true;
		if(verbose){System.err.println("Making kca from ("+fname1+", "+fname2+")\nk="+k+", cells="+Tools.toKMG(cells)+", cbits="+cbits);}
		
		if(fname1==null && fname2==null && extraFiles==null){
			return KCountArray.makeNew(cells, cbits, hashes, prefilter, prefilterLimit_);
		}
		
		boolean oldsplit=FastaReadInputStream.SPLIT_READS;
		long oldmax=maxReads;
		byte oldq=minQuality;
		maxReads=maxreads;
		minQuality=(byte)minqual;
		//	System.out.println("kbits="+(kbits)+" -> "+(1L<<kbits)+", matrixbits="+(matrixbits)+" -> "+(1L<<matrixbits)+", cbits="+cbits+", gap="+gap+", hashes="+hashes);
		KCountArray kca=KCountArray.makeNew(cells, cbits, hashes, prefilter, prefilterLimit_);
		
//		System.out.println("a");
		{//For processing input lists
			ArrayList<String> extra2=null;
			if(fname1!=null && fname1.contains(",")){
				String[] s=fname1.split(",");
				if(extra2==null){extra2=new ArrayList<String>();}
				for(int i=1; i<s.length; i++){extra2.add(s[i]);}
				fname1=s[0];
			}
			if(fname2!=null && fname2.contains(",")){
				String[] s=fname2.split(",");
				if(extra2==null){extra2=new ArrayList<String>();}
				for(int i=1; i<s.length; i++){extra2.add(s[i]);}
				fname2=s[0];
			}
			if(extra2!=null){
				if(extraFiles!=null){
					for(String s : extraFiles){
						extra2.add(s);
					}
				}
				extraFiles=extra2;
			}
		}
//		System.out.println("b");
		
		if(extraFiles!=null){
			for(String s : extraFiles){
				if(fileIO.FileFormat.hasFastaExtension(s)){
					assert(!FastaReadInputStream.SPLIT_READS);
				}
			}
		}
		
//		System.out.println("c");
		
		if(passes==1){
//			System.out.println("c1");
			if(fname1!=null){
				try {
					count(fname1, fname2, kca);
				} catch (Exception e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
			if(extraFiles!=null){
				maxReads=-1;
				for(String s : extraFiles){
					try {
						count(s, null, kca);
					} catch (Exception e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			}
			kca.shutdown();

		}else{
//			System.out.println("c2");
			assert(passes>1);
			KCountArray trusted=null;
			for(int i=1; i<passes; i++){
				boolean conservative=i>2;// /*or, alternately, (trusted==null || trusted.capacity()>0.3)
				int step=(stepsize==1 ? 1 : stepsize+i%2);
				//			if(!conservative){step=(step+3)/4;}
				if(!conservative){step=Tools.min(3, (step+3)/4);}

				try {
					count(fname1, fname2, cbits, kca, trusted, maxreads, thresh1, step, conservative);
				} catch (Exception e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				if(extraFiles!=null){
					maxReads=-1;
					for(String s : extraFiles){
						try {
							count(s, null, cbits, kca, trusted, maxreads, thresh1, step, conservative);
						} catch (Exception e) {
							// TODO Auto-generated catch block
							e.printStackTrace();
						}
					}
				}
				kca.shutdown();
				
				System.out.println("Trusted:   \t"+kca.toShortString());
				trusted=kca;
				kca=KCountArray.makeNew(cells, cbits, hashes, prefilter, prefilterLimit_);

			}

			try {
				count(fname1, fname2, cbits, kca, trusted, maxreads, thresh2, stepsize, true);
			} catch (Exception e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			if(extraFiles!=null){
				maxReads=-1;
				for(String s : extraFiles){
					try {
						count(s, null, cbits, kca, trusted, maxreads, thresh2, stepsize, true);
					} catch (Exception e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			}
			kca.shutdown();
		}
//		System.out.println("d");
		minQuality=oldq;
		maxReads=oldmax;
		FastaReadInputStream.SPLIT_READS=oldsplit;
		
		
		return kca;
	}
	
	public KCountArray count(String reads1, String reads2, KCountArray counts) throws Exception{
//		System.err.println("countFastq...  making a new cris");
		assert(counts!=null);
		
		{
			int pound=reads1.lastIndexOf('#');
			if(pound>=0 && reads2==null && !new File(reads1).exists()){
				String a=reads1.substring(0, pound);
				String b=reads1.substring(pound+1);
				reads1=a+1+b;
				reads2=a+2+b;
			}
		}
		
		final ConcurrentReadInputStream cris;
		{
			FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
			FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
			if(ff2==null){ff1.preferShreds=true;}
//			if(ff2!=null){ //TODO - interleaved flag
			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
			cris.start(); //4567
		}
		
		assert(cris!=null) : reads1;
		if(verbose){System.err.println("Started cris");}
		boolean paired=cris.paired();
		if(verbose){System.err.println("Paired: "+paired);}
		
//		countFastq(cris, count);
//		assert(false) : THREADS;
		CountThread[] cta=new CountThread[Shared.threads()];
		for(int i=0; i<cta.length; i++){
			cta[i]=new CountThread(cris, counts);
			cta[i].start();
		}
//		System.out.println("~1");
		for(int i=0; i<cta.length; i++){
//			System.out.println("~2");
			CountThread ct=cta[i];
			synchronized(ct){
//				System.out.println("~3");
				while(ct.getState()!=State.TERMINATED){
//					System.out.println("~4");
					try {
						ct.join(2000);
					} catch (InterruptedException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
//					System.out.println("~5");
				}
			}
		}
//		System.out.println("~6");
		
		ReadWrite.closeStream(cris);
		if(verbose){System.err.println("Closed stream");}
		if(verbose){System.err.println("Processed "+readsProcessed+" reads.");}

		
		return counts;
	}
	
	public KCountArray count(String reads1, String reads2, final int cbits, 
			KCountArray counts, final KCountArray trusted, final long maxReads, final int thresh, 
			final int detectStepsize, final boolean conservative)
			throws Exception{
		
		assert(counts!=null);
		
		{
			int pound=reads1.lastIndexOf('#');
			if(pound>=0 && reads2==null && !new File(reads1).exists()){
				String a=reads1.substring(0, pound);
				String b=reads1.substring(pound+1);
				reads1=a+1+b;
				reads2=a+2+b;
			}
		}
		
		final ConcurrentReadInputStream cris;
		{
			FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
			FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
			if(ff2==null){ff1.preferShreds=true;}
			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
			cris.start(); //4567
		}
		
		assert(cris!=null) : reads1;
		if(verbose){System.err.println("Started cris");}
		boolean paired=cris.paired();
		if(verbose){System.err.println("Paired: "+paired);}
		

//		countFastq(cris, count, trusted, thresh, detectStepsize, conservative);

//		assert(false) : THREADS;
		CountThread[] cta=new CountThread[Shared.threads()];
		for(int i=0; i<cta.length; i++){
			cta[i]=new CountThread(cris, counts, trusted, thresh, detectStepsize, conservative);
			cta[i].start();
		}
		
		for(int i=0; i<cta.length; i++){
			CountThread ct=cta[i];
			synchronized(ct){
				while(ct.isAlive()){
					try {
						ct.join(1000);
					} catch (InterruptedException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
			}
		}
		
		cris.close();
		if(verbose){System.err.println("Closed stream");}
		
//		System.out.println("*** after ***");
//		System.out.println("\ntrusted=\n"+trusted);
//		System.out.println("\ncount=\n"+count);
		
		return counts;
	}
	
	private final int findOverlap(Read r1, Read r2, boolean ecc){
		if(vstrict){
			return BBMerge.findOverlapVStrict(r1, r2, ecc);
		}else{
			return BBMerge.findOverlapStrict(r1, r2, ecc);
		}
	}
	
	private class CountThread extends Thread{
		
		CountThread(final ConcurrentReadInputStream cris_, final KCountArray counts_){
			this(cris_, counts_, null, 2, 1, true);
		}
		
		CountThread(final ConcurrentReadInputStream cris_,
				final KCountArray counts_, final KCountArray trusted_, final int thresh_,
				final int detectStepsize_, final boolean conservative_){
			cris=cris_;
			counts=counts_;
			trusted=trusted_;
			thresh=thresh_;
			detectStepsize=detectStepsize_;
			conservative=conservative_;
		}
		
		@Override
		public void run(){
//			System.out.println("Running");
			if(trusted==null){
				count(cris);
			}else{
				count(cris, thresh, detectStepsize,  conservative);
			}
//			System.out.println("Finished: "+readsProcessedLocal);
			
			if(BUFFERED){dumpBuffer();}
			
			synchronized(getClass()){
				keysCounted+=keysCountedLocal;
				increments+=incrementsLocal;
				readsProcessed+=readsProcessedLocal;

				if(verbose){System.err.println(keysCounted+", "+keysCountedLocal);}
				if(verbose){System.err.println(readsProcessed+", "+readsProcessedLocal);}
			}
		}
		
		private void increment(final long key){
			if(BUFFERED){
				buffer.add(key);
				if(buffer.size>=BUFFERLEN){
					dumpBuffer();
				}
			}else{
				incrementByAmount(key, 1);
			}
		}
		
		
		//TODO: All this 'sum' nonsense is just to count kmers added.  Could be derived from buffer length.
		private void dumpBuffer(){
			final int lim=buffer.size;
			if(lim<1){return;}
			final long[] array=buffer.array;
//			if(SORT_SERIAL){
//				buffer.sortSerial();
//			}else{
//				buffer.sort();
//			}
			buffer.sort();//Can be disabled via parallelsort flag but only affects arrays>10k
			long kmer=array[0]-1;//Ensures a nonmatch
			int streak=0;
			for(int i=0; i<lim; i++){
				long x=array[i];
				if(x==kmer){streak++;}
				else{
					if(streak>0){incrementByAmount(kmer, streak);}
					kmer=x;
					streak=1;
				}
			}
			assert(streak>0);
			incrementByAmount(kmer, streak);
			buffer.clear();
		}
		
		private void incrementByAmount(final long key, int amt){
			if(SKETCH_MODE){
				final long code=SketchObject.hash(key);
				if(code<minHashValue){return;}
				counts.increment(STORE_HASHED ? code : key, amt);
			}else{
				counts.increment(key, amt);
			}
			keysCountedLocal+=amt;
			incrementsLocal++;
		}
		
		private final void count(ConcurrentReadInputStream cris){
			assert(k>=1 && counts!=null);
//			System.out.println("Waiting for list");
			ListNum<Read> ln=cris.nextList();
			ArrayList<Read> reads=(ln!=null ? ln.list : null);
//			System.out.println("Got list: "+(ln==null ? "null" : ln.id)+", "+(ln==null || ln.list==null ? "null" : ln.list.size()));

			long[] array=null;
			final Kmer kmer=new Kmer(k);

			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
				//System.err.println("reads.size()="+reads.size());
				for(Read r1 : reads){

					Read r2=r1.mate;
					if((ecco || merge) && r2!=null){
						if(merge){
							final int insert=findOverlap(r1, r2, false);
							if(insert>0){
								r2.reverseComplement();
								r1=r1.joinRead(insert);
								r2=null;
							}
						}else if(ecco){
							findOverlap(r1, r2, true);
						}
					}
					readsProcessedLocal++;

					if(k<=maxShortKmerLength){
						array=addRead_Advanced(r1, array);
					}else{
						addReadBig(r1, kmer);
						addReadBig(r1.mate, kmer);
					}
					//						System.out.println(r);
					//						System.out.println("kmers hashed: "+keysCountedLocal);
				}
				//System.err.println("returning list");
				cris.returnList(ln);
				//System.err.println("fetching list");
				ln=cris.nextList();
				reads=(ln!=null ? ln.list : null);
			}

			
			if(verbose){System.err.println("Finished reading");}
			cris.returnList(ln);
			if(verbose){System.err.println("Returned list");}
		}
		
		


		private final void count(final ConcurrentReadInputStream cris, final int thresh,
				final int detectStepsize, final boolean conservative){
			
			ListNum<Read> ln=cris.nextList();
			ArrayList<Read> reads=(ln!=null ? ln.list : null);
			
			long[] array=null;
			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
				//System.err.println("reads.size()="+reads.size());
				for(Read r1 : reads){
					
					Read r2=r1.mate;
					if((ecco || merge) && r2!=null){
						if(merge){
							final int insert=findOverlap(r1, r2, false);
							if(insert>0){
								r2.reverseComplement();
								r1=r1.joinRead(insert);
								r2=null;
							}
						}else if(ecco){
							findOverlap(r1, r2, true);
						}
					}
					{
						if(trusted!=null){
							BitSet bs=(conservative ? ErrorCorrect.detectErrorsBulk(r1, trusted, k, thresh, detectStepsize) :
								ErrorCorrect.detectTrusted(r1, trusted, k, thresh, detectStepsize));
//							System.out.println("\n"+toString(bs, r.length()));
//							System.out.println(new String(r.bases));
							if(bs!=null){
								for(int i=bs.nextClearBit(0); i<r1.length(); i=bs.nextClearBit(i+1)){
									r1.bases[i]='N';
									if(r1.quality!=null){r1.quality[i]=0;}
								}
							}
//							System.out.println(new String(r.bases));
//							System.out.println("used = "+String.format(Locale.ROOT, "%.3f%%",count.usedFraction()*100));
//							System.out.println("used = "+((KCountArray4)count).cellsUsed());
//							if(bs.length()<r.length()){r=null;}
						}
//						if(r!=null){addRead(r, count, rcomp);}
					}
					if(r2!=null){
						if(trusted!=null){
							BitSet bs=(conservative ? ErrorCorrect.detectErrorsBulk(r2, trusted, k, thresh, detectStepsize) :
								ErrorCorrect.detectTrusted(r2, trusted, k, thresh, detectStepsize));
							if(bs!=null){
								for(int i=bs.nextClearBit(0); i<r2.length(); i=bs.nextClearBit(i+1)){
									r2.bases[i]='N';
									if(r2.quality!=null){r2.quality[i]=0;}
								}
							}
						}
					}
					array=addRead_Advanced(r1, array);

				}
				//System.err.println("returning list");
				cris.returnList(ln);
				//System.err.println("fetching list");
				ln=cris.nextList();
				reads=(ln!=null ? ln.list : null);
			}
			
			if(verbose){System.err.println("Finished reading");}
			cris.returnList(ln);
			if(verbose){System.err.println("Returned list");}
		}
		
		
		
		/**
		 * Hash a read's kmers into the KCountArray.
		 * Advanced mode processes paired reads together and sorts kmers to eliminate spurious duplicates.
		 * @param r1
		 * @param counts
		 * @param k
		 * @param mask
		 * @param rcomp
		 */
		private final long[] addRead_Advanced(Read r1, long[] array){
			if(PREJOIN && r1.mate!=null && r1.insert()>0){
				r1.mate.reverseComplement();
				r1=r1.joinRead();
			}
			Read r2=r1.mate;
			final int len1=Tools.max(0, r1.length()-k+1);
			final int len2=(r2==null || r2.bases==null) ? 0 : Tools.max(0, r2.length()-k+1);
			final int len=len1+len2;
			if(len<1){return array;}
			
			if(!KEEP_DUPLICATE_KMERS){
				if(array==null || array.length!=len){array=new long[len];}
				Arrays.fill(array, -1);
				fillKmerArray(r1, array, 0, len1);
				if(r2!=null){fillKmerArray(r2, array, len1, len);}
				
				Arrays.sort(array);
				long prev=-1;
				for(int i=0; i<array.length; i++){
					long kmer=array[i];
					if(kmer!=prev){
						increment(kmer);
						prev=kmer;
					}
				}
			}else{
				if(len1>0){addRead(r1);}
				if(len2>0){addRead(r2);}
			}
			return array;
		}
		
		private final void addReadBig(Read r, Kmer kmer){
			if(r==null || r.bases==null){return;}
			final byte[] bases=r.bases;
			final byte[] quals=r.quality;
			int len=0;
			
			if(bases==null || bases.length<k){return;}
			kmer.clear();
			
			/* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
			float prob=1;
			for(int i=0; i<bases.length; i++){
				final byte b=bases[i];
				final long x=AminoAcid.baseToNumber[b];

				//Update kmers
				kmer.addRight(b);
				
				if(minProb>0 && quals!=null){//Update probability
					prob=prob*KmerTableSet.PROB_CORRECT[quals[i]];
					if(len>k){
						byte oldq=quals[i-k];
						prob=prob*KmerTableSet.PROB_CORRECT_INVERSE[oldq];
					}
				}

				//Handle Ns
				if(x<0){
					len=0;
					prob=1;
				}else{len++;}
				
				assert(len==kmer.len);
				
				if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-k), Tools.min(i+1, k)));}
				if(len>=k && prob>=minProb){
//					System.err.println("Incrementing xor()="+kmer.xor());
					increment(kmer.xor());
//					counts.incrementAndReturnUnincremented(kmer.xor(), 1);
//					keysCountedLocal++;
				}
			}
		}
		
		private final void fillKmerArray(Read r, final long[] array, final int start, final int stop){
			if(amino){
				fillKmerArrayAmino(r, array, start, stop);
				return;
			}
			if(k>maxShortKmerLength){
				fillKmerArrayLong(r, array, start, stop);
				return;
			}
			assert(k<=maxShortKmerLength);
			assert(!PREJOIN || r.mate==null);
			assert(CANONICAL);
			assert(array!=null);
			
			final byte[] bases=r.bases;
			final byte[] quals=r.quality;
			
			if(bases==null || bases.length<k){return;}
			
			final int passes=(rcomp ? 2 : 1);
			for(int pass=0; pass<passes; pass++){
				int len=0;
				int idx=(pass==0 ? start-k+1 : stop+k-2);
				long kmer=0;
				float prob=1;
				for(int i=0; i<bases.length; i++){
					byte b=bases[i];
					assert(b>=0) : r.id+", "+bases.length+", "+i+", "+b+"\n"+(bases.length<20000 ? r.toFasta().toString() : "");
					int x=AminoAcid.baseToNumber[b];
					
//					int x=AminoAcid.baseToNumber[b<0 ? 'N' : b];

					byte q;
					if(quals==null){
						q=50;
					}else{
						q=quals[i];
						prob=prob*align2.QualityTools.PROB_CORRECT[q];
						if(len>k){
							byte oldq=quals[i-k];
							prob=prob*align2.QualityTools.PROB_CORRECT_INVERSE[oldq];
						}
					}

					if(x<0 || q<minQuality){
						len=0;
						kmer=0;
						prob=1;
					}else{
						kmer=((kmer<<2)|x)&mask;
						len++;
						if(len>=k && prob>=minProb){
							array[idx]=Tools.max(array[idx], kmer);
						}
					}
					if(pass==0){idx++;}else{idx--;}
				}
//				System.out.println(Arrays.toString(array));
				r.reverseComplement();
			}
		}
		
		private final void fillKmerArrayAmino(Read r, final long[] array, final int start, final int stop){
			assert(false) : "TODO"; //TODO
			if(k>maxShortKmerLength){
				fillKmerArrayLong(r, array, start, stop);
				return;
			}
			assert(k<=maxShortKmerLength);
			assert(!PREJOIN || r.mate==null);
			assert(CANONICAL);
			assert(array!=null);
			
			final byte[] bases=r.bases;
			final byte[] quals=r.quality;
			
			if(bases==null || bases.length<k){return;}
			
			for(int pass=0; pass<2; pass++){
				int len=0;
				int idx=(pass==0 ? start-k+1 : stop+k-2);
				long kmer=0;
				float prob=1;
				for(int i=0; i<bases.length; i++){
					byte b=bases[i];
					assert(b>=0) : r.id+", "+bases.length+", "+i+", "+b+"\n"+(bases.length<20000 ? r.toFasta().toString() : "");
					int x=AminoAcid.baseToNumber[b];
					
//					int x=AminoAcid.baseToNumber[b<0 ? 'N' : b];

					byte q;
					if(quals==null){
						q=50;
					}else{
						q=quals[i];
						prob=prob*align2.QualityTools.PROB_CORRECT[q];
						if(len>k){
							byte oldq=quals[i-k];
							prob=prob*align2.QualityTools.PROB_CORRECT_INVERSE[oldq];
						}
					}

					if(x<0 || q<minQuality){
						len=0;
						kmer=0;
						prob=1;
					}else{
						kmer=((kmer<<2)|x)&mask;
						len++;
						if(len>=k && prob>=minProb){
							array[idx]=Tools.max(array[idx], kmer);
						}
					}
					if(pass==0){idx++;}else{idx--;}
				}
//				System.out.println(Arrays.toString(array));
				r.reverseComplement();
			}
		}
		
		private final void addRead(Read r){
			if(amino){
				addReadAmino(r);
				return;
			}
			assert(k<=maxShortKmerLength);
			assert(!PREJOIN || r.mate==null);
			assert(CANONICAL);

			final byte[] bases=r.bases;
			final byte[] quals=r.quality;

			if(bases==null || bases.length<k){return;}
			
			long kmer=0;
			long rkmer=0;
			int len=0;
			float prob=1;

			for(int i=0; i<bases.length; i++){
				final byte b=bases[i];
				long x=AminoAcid.baseToNumber[b];
				long x2=AminoAcid.baseToComplementNumber[b];
				kmer=((kmer<<2)|x)&mask;
				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;

				final byte q;
				if(quals==null){
					q=50;
				}else{
					q=quals[i];
					prob=prob*align2.QualityTools.PROB_CORRECT[q];
					if(len>k){
						byte oldq=quals[i-k];
						prob=prob*align2.QualityTools.PROB_CORRECT_INVERSE[oldq];
					}
				}

				if(x<0 || q<minQuality){
					len=0;
					kmer=rkmer=0;
					prob=1;
				}else{
					len++;
					if(len>=k && prob>=minProb){
						long key=(rcomp ? Tools.max(kmer, rkmer) : kmer);
						increment(key);
					}
				}
			}
		}
		
		private final void addReadAmino(Read r){
			assert(!PREJOIN || r.mate==null);

			final byte[] bases=r.bases;

			if(bases==null || bases.length<k){return;}
			
			long kmer=0;
			int len=0;

			for(int i=0; i<bases.length; i++){
				final byte b=bases[i];
				long x=AminoAcid.acidToNumber[b];
				kmer=((kmer<<aminoShift)|x)&mask;

				if(x<0){
					len=0;
					kmer=0;
				}else{
					len++;
					if(len>=k){
						increment(kmer);
					}
				}
			}
		}
		
		private final void fillKmerArrayLong(Read r, final long[] array, final int start, final int stop){
			assert(k>maxShortKmerLength) : k;
			assert(!PREJOIN || r.mate==null);
			assert(CANONICAL);
			assert(array!=null);
			Kmer kmer=new Kmer(k);
			
			float prob=1;
			byte[] bases=r.bases;
			byte[] quals=r.quality;
			
			kmer.clear();
			
			for(int i=0, idx=start-k+1; i<bases.length; i++, idx++){
				byte b=bases[i];
				kmer.addRight(b);
				
				byte q;
				if(quals==null){
					q=50;
				}else{
					q=quals[i];
					prob=prob*align2.QualityTools.PROB_CORRECT[q];
					if(kmer.len>k){
						byte oldq=quals[i-k];
						prob=prob*align2.QualityTools.PROB_CORRECT_INVERSE[oldq];
					}
				}
				
				if(!AminoAcid.isFullyDefined(b) || q<minQuality){
					kmer.clear();
					prob=1;
				}
				if(kmer.len>=k && prob>=minProb){
					array[idx]=kmer.xor();
				}
			}
		}
		
		private final ConcurrentReadInputStream cris;
		
		private final KCountArray counts;
		private final KCountArray trusted;
		private final int thresh;
		private final int detectStepsize;
		private final boolean conservative;
		private long keysCountedLocal=0;
		private long incrementsLocal=0;
		private long readsProcessedLocal=0;
		private final long minHashValue=SketchObject.minHashValue;
		private final LongList buffer=(BUFFERED ? new LongList(BUFFERLEN) : null);
	}
	
	public static  boolean vstrict=false;
	
	private final int k;
	private final int aminoShift;
	private final int shift;
	private final int shift2;
	private final long mask;
	private final boolean rcomp;
	private final boolean ecco;
	private final boolean merge;
	private final boolean amino;
	
}