view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/Clump.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package clump;

import java.util.ArrayList;

import dna.AminoAcid;
import shared.KillSwitch;
import shared.Parse;
import shared.Tools;
import stream.Read;
import structures.ByteBuilder;

/**
 * A list of reads sharing a kmer.
 * @author Brian Bushnell
 * @date Nov 7, 2015
 *
 */
public class Clump extends ArrayList<Read> implements Comparable<Clump> {
	
	public static Clump makeClump(long kmer){
		try {
			return new Clump(kmer);
		} catch (OutOfMemoryError e) {
			KillSwitch.memKill(e);
			throw new RuntimeException();
		}
	}
	
	private Clump(long kmer_){
		this(kmer_, 4);
	}

	private Clump(long kmer_, int size){
		super(size);
		kmer=kmer_;
	}

	@Override
	public boolean add(Read r){
		ReadKey key=(ReadKey) r.obj;
		assert(key.kmer==kmer);
		key.clump=this;
		return super.add(r);
	}
	
	private void setMaxima(){
		maxLeft=-1;
		maxRight=-1;
		width=-1;
		for(Read r : this){
			ReadKey key=(ReadKey) r.obj;
			final int pos=key.position;
			maxLeft=Tools.max(maxLeft, pos);
			maxRight=Tools.max(maxRight, r.length()-pos);
		}
		width=maxLeft+maxRight;
	}
	
	/** This will create counts of bases, or sums of qualities, at each position in the cluster. */
	private int[][] count(final boolean qualityScores){
		if(width<0){setMaxima();}
		
//		System.err.println("\n\n");
		final int[][] counts=new int[4][width];
		for(Read r : this){
			ReadKey key=(ReadKey) r.obj;
			final int pos=key.position;
			byte[] bases=r.bases, quals=r.quality;
			if(quals==null){useQuality=false;}
			
//			System.err.println("pos="+pos+", maxLeft="+maxLeft);
			for(int cloc=0, rloc=maxLeft-pos; cloc<bases.length; cloc++, rloc++){
//				System.err.println("cloc="+cloc+"/"+bases.length+", rloc="+rloc+"/"+width);
				int x=AminoAcid.baseToNumber[bases[cloc]];
				if(x>-1){
					final int q;
					if(qualityScores){q=(quals==null ? 20 : quals[cloc]);}
					else{q=1;}
					counts[x][rloc]+=q;
				}
			}
		}
		return counts;
	}
	
	/*--------------------------------------------------------------*/
	
	public Read makeSimpleConsensus(){
//		assert(Splitter.findBestPivot(this)<0) : Splitter.findBestPivot(this); //TODO: Slow
		if(size()==1){
			Read r=get(0);
			if(renameConsensus){r.id=r.numericID+" size=1";}
			return r;
		}
		final int[][] bcounts=baseCounts();
		final int[][] qcounts=qualityCounts();
		
		final byte[] bases=new byte[width], quals=new byte[width];
		for(int i=0; i<width; i++){
			int x=getConsensusAtPosition(qcounts, i);
			int y=getSecondHighest(qcounts, i);
			if(x>=0 && qcounts[x][i]==qcounts[y][i]){//Tie-breaker
				x=getConsensusAtPosition(bcounts, i);
				y=getSecondHighest(bcounts, i);
			}
			
			
			if(x<0){
//				System.err.println("q="+0+", x="+x+"; A="+counts[0][i]+", C="+counts[1][i]+", G="+counts[2][i]+", T="+counts[3][i]);
				bases[i]='N';
				quals[i]=0;
				assert(getSumAtPosition(qcounts, i)==0) : "\n"+bcounts[0][i]+", "+bcounts[1][i]+", "+bcounts[2][i]+", "+bcounts[3][i]+
														  "\n"+qcounts[0][i]+", "+qcounts[1][i]+", "+qcounts[2][i]+", "+qcounts[3][i]+
														  "\nwidth="+width+", i="+i+", size="+size()+"\n"+new String(bases, 0, i)+"\n"+get(0)+"\n"+get(1)+"\n";
//				assert(getSumAtPosition(bcounts, i)==0) : "\n"+bcounts[0][i]+", "+bcounts[1][i]+", "+bcounts[2][i]+", "+bcounts[3][i]+
//														  "\n"+qcounts[0][i]+", "+qcounts[1][i]+", "+qcounts[2][i]+", "+qcounts[3][i]+
//														  "\nwidth="+width+", i="+i+", size="+size()+"\n"+new String(bases, 0, i)+"\n"+get(0)+"\n"+get(1)+"\n";
			}else{
				final long bsum=getSumAtPosition(bcounts, i);
				final long bmajor=bcounts[x][i];
				final long bminor=bsum-bcounts[x][i];
				final long bsecond=bcounts[y][i];

				final long qsum=getSumAtPosition(qcounts, i);
				final long qmajor=qcounts[x][i];
				final long qminor=qsum-qcounts[x][i];
				final long qsecond=qcounts[y][i];
				
				float bratio=bminor/(float)(bmajor+bminor);
				double phred=(bminor==0 ? Read.MAX_CALLED_QUALITY() : -10*Math.log10(bratio));
				phred=Tools.min(phred, qmajor-qminor);
//				if(phred<0 || phred>127){
//					assert(false) :  i+","+x+","+bsum+","+qsum+","+bmajor+","+bminor+","+bratio+","+phred+"\n"+
//							bcounts[0][i]+","+bcounts[1][i]+","+bcounts[2][i]+","+bcounts[3][i]+"\n"+
//							qcounts[0][i]+","+qcounts[1][i]+","+qcounts[2][i]+","+qcounts[3][i]+"\n"+
//							this.toStringStaggered()+"\n";
//				}
//				assert(phred>=0 && phred<=127) : phred+","+x+","+i+","+bratio+","+bcounts[x][i]+","+bcounts[0][i]+
//					","+bcounts[1][i]+","+bcounts[2][i]+","+bcounts[3][i];
//				assert(phred>=0 && phred<=127) : bmajor+", "+bminor+", "+phred+", "+Read.MAX_CALLED_QUALITY;
				byte q=Read.capQuality((long)Math.round(phred));
				bases[i]=AminoAcid.numberToBase[x];
				quals[i]=q;
				assert(q>0);
				assert(x>-1);
				assert(bases[i]!='N');
			}
		}
		Read leftmost=this.get(0);//TODO:  Actually rightmost!
		Read r=new Read(bases, quals, leftmost.numericID+" size="+size(), 0);
		//TODO: Attach the long pair, and make sure the kmer location is correct.
//		assert(false) : "\n"+r.toFastq()+"\nCheck kmer location.";
//		assert(size()==1) : "\n"+r.toFastq()+"\n"+get(0).toFastq()+"\n"+get(size()-1).toFastq()+"\n";
		return r;
	}
	
	/*--------------------------------------------------------------*/
	
	public int removeDuplicates(){
		assert(KmerComparator.compareSequence);
		if(size()<2){return 0;}
		
		int removedTotal=0, removed=0;
		
		final boolean sortXY=(forceSortXY || sortYEarly() || (opticalOnly && (sortX || sortY) && size()>=sortXYSize));
		
		final int maxDiscarded;
		final int scan;
		
		if(opticalOnly && sortXY){
			scan=Tools.max(scanLimit, 200);
			maxDiscarded=scan+10;
		}else if(!sortXY && ((maxSubstitutions<1 && dupeSubRate<=0) || scanLimit<0)){
			scan=0;
			maxDiscarded=0;
		}else{
			scan=scanLimit;
			maxDiscarded=scan+10;
		}
		
		if(sortXY){
			assert(sortX || sortY);

			if(sortY){
				if(!sortYEarly()){
					this.sort(KmerComparatorY.comparator);
				}else{
//					for(int i=1; i<this.size(); i++){
//						Read a=get(i-1);
//						Read b=get(i);
//						assert(KmerComparatorY.comparator.compare(a, b)<=0) : a.obj+" vs "+b.obj;
//					}
//					//Otherwise, it was already Y-sorted.
				}
//				assert(false) : sortY();
				removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
				removedTotal+=removed;
//				System.err.println("RemovedY: "+removed);
				while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){
					removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
					removedTotal+=removed;
//					System.err.println("RemovedY: "+removed);
				}
			}
			if(sortX && (ReadKey.spanTilesX || !sortY)){
				this.sort(KmerComparatorX.comparator);
				removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
				removedTotal+=removed;
//				System.err.println("RemovedX: "+removed);
				while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){
					removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
					removedTotal+=removed;
//					System.err.println("RemovedX: "+removed);
				}
			}
		}else{
			removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, false, markOnly, markAll, renameByCount, maxOpticalDistance);
			removedTotal+=removed;
			while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){
				removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, false, markOnly, markAll, renameByCount, maxOpticalDistance);
				removedTotal+=removed;
			}
		}
		
		return removedTotal;
	}
	
	private int removeDuplicates_inner(final int maxSubs, final float subRate, final int scanLimit, final int maxDiscarded,
			final boolean optical, final boolean xySorted, final boolean mark, final boolean markAll, final boolean rename, final float dist){
		final int size=size();
		if(size<2){return 0;}
		int dupePairs=0;
		int dupeReads=0;
		
//		final boolean breakOnTile=(optical && !FlowcellCoordinate.spanTiles);
		
//		final long start=System.nanoTime();
		
		for(int i=0, lim=size-1; i<lim; i++){
			final Read a=get(i);
			if(!a.discarded()){
				final ReadKey keyA=(ReadKey) a.obj;
				final int aLen=a.length();
				int unequals=0;
				int discarded=0;
				for(int j=i+1; j<size && unequals<=scanLimit && discarded<=maxDiscarded && (!a.discarded() || markAll); j++){
					final Read b=get(j);
					if(b.discarded()){
						discarded++;
					}else{
						final int bLen=b.length();
						final ReadKey keyB=(ReadKey) b.obj;
						if(!containment && !keyA.equals(keyB)){break;}
//						if(containment && affix && !keyA.physicalAffix(keyB, aLen, bLen)){break;}
//						if(optical && keyA.lane!=keyB.lane){break;} //Already in equals method
//						if(breakOnTile && keyA.tile!=keyB.tile){break;} //Already in equals method
						if(optical && xySorted && !keyA.nearXY(keyB, dist)){break;}
//						if(System.nanoTime()-start>200000000000L){
//							TextStreamWriter tsw=new TextStreamWriter("foo.fq", true, false, false);
//							tsw.start();
//							for(Read x : this){
//								tsw.println(x.toFastq());
//							}
//							tsw.poisonAndWait();
//							assert(false) : "kmer="+kmer+", size="+size();
//						}
						if(equals(a, b, maxSubs, subRate)){
							if(!optical || keyA.near(keyB, dist)){
								if(printDuplicates){
									System.out.println(a.toFasta());
									System.out.println(b.toFasta());
								}
								float errA=a.expectedErrorsIncludingMate(true);
								float errB=b.expectedErrorsIncludingMate(true);
								if(markAll){
									b.setDiscarded(true);
									assert(!a.discarded() || markAll);
									dupePairs++;
									dupeReads+=1+b.mateCount();
									unequals=0;
									if(!a.discarded()){
										a.setDiscarded(true);
										dupePairs++;
										dupeReads+=1+a.mateCount();
									}
								}else if(containment || errB>=errA){
									b.setDiscarded(true);
									assert(!a.discarded() || markAll);
									a.copies+=b.copies+parseExtraCopies(b);
									dupePairs++;
									dupeReads+=1+b.mateCount();
									unequals=0;
								}else{
									a.setDiscarded(true);
									assert(!b.discarded() || markAll);
									b.copies+=a.copies+parseExtraCopies(a);
									dupePairs++;
									dupeReads+=1+a.mateCount();
								}
							}
						}else{
							unequals++;
						}
					}
				}
			}
		}
		if(dupeReads>0){
			for(int i=0; i<size; i++){
				Read a=get(i);
				if(a.discarded()){
					if(mark){
						if(!a.id.endsWith(" duplicate")){
							a.id+=" duplicate";
							if(a.mate!=null){a.mate.id+=" duplicate";}
						}
					}else{
						set(i, null);
					}
				}else if(rename && a.copies>1){
					renameFromCount(a);
				}
				a.copies=1;
			}
			if(!mark){
				int x=Tools.condenseStrict(this);
				assert(x==dupePairs) : size()+", "+size+", "+dupePairs+", "+x;
				assert((size()>0 || markAll) && size()==size-dupePairs) : size()+", "+size+", "+dupePairs;
			}
		}
		
		if(containment){
			dupeReads+=removeDuplicates_backwards(maxSubs, subRate, scanLimit, maxDiscarded, optical, xySorted, mark, markAll, rename, dist);
		}
		
		return dupeReads;
	}
	
	/** Only for containments */
	private int removeDuplicates_backwards(final int maxSubs, final float subRate, final int scanLimit, final int maxDiscarded,
			final boolean optical, final boolean xySorted, final boolean mark, final boolean markAll, final boolean rename, final float dist){
		final int size=size();
		if(size<2){return 0;}
		int dupePairs=0;
		int dupeReads=0;
		
//		final boolean breakOnTile=(optical && !FlowcellCoordinate.spanTiles);
		
//		final long start=System.nanoTime();
		
		for(int i=size-1; i>0; i--){
			final Read a=get(i);
			if(!a.discarded()){
				final ReadKey keyA=(ReadKey) a.obj;
				final int aLen=a.length();
				int unequals=0;
				int discarded=0;
				for(int j=i-1; j>=0 && unequals<=scanLimit && discarded<=maxDiscarded && (!a.discarded() || markAll); j--){
					final Read b=get(j);
					if(b.discarded()){
						discarded++;
					}else{
						final int bLen=b.length();
						final ReadKey keyB=(ReadKey) b.obj;
						if(!containment && !keyA.equals(keyB)){break;}
//						if(containment && affix && !keyA.physicalAffix(keyB, aLen, bLen)){break;}
//						if(optical && keyA.lane!=keyB.lane){break;} //Already in equals method
//						if(breakOnTile && keyA.tile!=keyB.tile){break;} //Already in equals method
						if(optical && xySorted && !keyA.nearXY(keyB, dist)){break;}
//						if(System.nanoTime()-start>200000000000L){
//							TextStreamWriter tsw=new TextStreamWriter("foo.fq", true, false, false);
//							tsw.start();
//							for(Read x : this){
//								tsw.println(x.toFastq());
//							}
//							tsw.poisonAndWait();
//							assert(false) : "kmer="+kmer+", size="+size();
//						}
						if(equals(a, b, maxSubs, subRate)){
							if(!optical || keyA.near(keyB, dist)){
								if(printDuplicates){
									System.out.println(a.toFasta());
									System.out.println(b.toFasta());
								}
								float errA=a.expectedErrorsIncludingMate(true);
								float errB=b.expectedErrorsIncludingMate(true);
								if(markAll){
									b.setDiscarded(true);
									assert(!a.discarded() || markAll);
									dupePairs++;
									dupeReads+=1+b.mateCount();
									unequals=0;
									if(!a.discarded()){
										a.setDiscarded(true);
										dupePairs++;
										dupeReads+=1+a.mateCount();
									}
								}else if(containment || errB>=errA){
									b.setDiscarded(true);
									assert(!a.discarded() || markAll);
									a.copies+=b.copies+parseExtraCopies(b);
									dupePairs++;
									dupeReads+=1+b.mateCount();
									unequals=0;
								}else{
									a.setDiscarded(true);
									assert(!b.discarded() || markAll);
									b.copies+=a.copies+parseExtraCopies(a);
									dupePairs++;
									dupeReads+=1+a.mateCount();
								}
							}
						}else{
							unequals++;
						}
					}
				}
			}
		}
		if(dupeReads>0){
			for(int i=0; i<size; i++){
				Read a=get(i);
				if(a.discarded()){
					if(mark){
						if(!a.id.endsWith(" duplicate")){
							a.id+=" duplicate";
							if(a.mate!=null){a.mate.id+=" duplicate";}
						}
					}else{
						set(i, null);
					}
				}else if(rename && a.copies>1){
					renameFromCount(a);
				}
				a.copies=1;
			}
			if(!mark){
				int x=Tools.condenseStrict(this);
				assert(x==dupePairs) : size()+", "+size+", "+dupePairs+", "+x;
				assert((size()>0 || markAll) && size()==size-dupePairs) : size()+", "+size+", "+dupePairs;
			}
		}
		return dupeReads;
	}
	
	public int parseExtraCopies(final Read a){
		final String id=a.id;
		final int space=id.lastIndexOf(' ');
		int extraCopies=0;
		if(space<0){return 0;}
		if(space>=0 && Tools.contains(id, "copies=", space+1)){
			extraCopies=Integer.parseInt(id.substring(space+8))-1;
		}
		return extraCopies;
	}
	
	private void renameFromCount(final Read a){
		final int newExtraCopies=a.copies-1;
		assert(newExtraCopies>0) : newExtraCopies;
		final int oldExtraCopies=parseExtraCopies(a);
		final int total=1+newExtraCopies+oldExtraCopies;
		renameToTotal(a, total);
		if(a.pairnum()==0 && a.mate!=null){
			assert(a.mate.pairnum()==1);
			renameToTotal(a.mate, total);
		}
	}
	
	private static void renameToTotal(final Read a, final int total){
		final String id=a.id;
		final int space=id.lastIndexOf(' ');
		if(space>=0 && Tools.contains(id, "copies=", space+1)){
			a.id=a.id.substring(0, space);
		}
		a.id=a.id+" copies="+total;
	}
	
//	public boolean nearby(Read a, Read b, FlowcellCoordinate fca, FlowcellCoordinate fcb, float maxDist){
////		fca.setFrom(a.id);
//		fcb.setFrom(b.id);
//		float dist=fca.distance(fcb);
//		return dist<=maxDist;
//	}
	
//	public boolean nearby(ReadKey ka, ReadKey kb, float maxDist){
//		float dist=ka.distance(kb);
//		return dist<=maxDist;
//	}
	
	public static boolean equals(Read a, Read b, int maxSubs, float dupeSubRate){
		if(a.numericID==b.numericID){return false;}
		if(dupeSubRate>0){maxSubs=Tools.max(maxSubs, (int)(dupeSubRate*Tools.min(a.length(), b.length())));}
		if(containment){
			return contains(a, b, maxSubs);
		}
		if(!equals(a.bases, b.bases, maxSubs)){return false;}
		if(a.mate!=null && !equals(a.mate.bases, b.mate.bases, maxSubs)){return false;}
		return true;
	}
	
	public static boolean equals(byte[] a, byte[] b, int maxSubs){
		if(a==b){return false;}//Nothing should subsume itself
		if(a==null || b==null){return false;}
		if(a.length!=b.length){return false;}
		int subs=0;
		if(allowNs){
			for(int i=0; i<a.length; i++){
				if(a[i]!=b[i] && (a[i]!='N' && b[i]!='N')){
					subs++;
					if(subs>maxSubs){return false;}
				}
			}
		}else{
			for(int i=0; i<a.length; i++){
				if(a[i]!=b[i]){
					subs++;
					if(subs>maxSubs){return false;}
				}
			}
		}
		return true;
	}
	
	public static boolean contains(Read a, Read b, int maxSubs){
		if(a.numericID==b.numericID){return false;}
		boolean ok=contains_inner(a, b, maxSubs);
		if(!ok || a.mate==null){return ok;}
		ok=contains_inner(a.mate, b.mate, maxSubs);
		if(!ok){return false;}
		ReadKey rka1=(ReadKey)a.obj;
		ReadKey rkb1=(ReadKey)b.obj;
		ReadKey rka2=(ReadKey)a.mate.obj; //TODO: In containment mode, mates need to always get keys.
		ReadKey rkb2=(ReadKey)b.mate.obj;
		return ((rka1.kmerMinusStrand==rkb1.kmerMinusStrand) && (rka2.kmerMinusStrand==rkb2.kmerMinusStrand)); //Ensures that both reads have the same directionality.
	}
	
	public static boolean contains_inner(Read a, Read b, int maxSubs){
//		if(a.length()==b.length()){return equals(a.bases, b.bases, maxSubs);}
		ReadKey rka=(ReadKey)a.obj;
		ReadKey rkb=(ReadKey)b.obj;
		if(affix ? !rka.physicalAffix(rkb, a.length(), b.length()) : !rka.physicallyContains(rkb, a.length(), b.length())){return false;}
		boolean flipped=false;
//		if(a.mate!=null){//In paired mode, need synchronization if the reads are in difference clumps.  But...  just don't do that.
//			Read max, min;
//			if(a.numericID>b.numericID){max=a; min=b;}//Protects against deadlocks.
//			else{max=b; min=a;}
//			synchronized(min){
//				synchronized(max){
//					if(rka.kmerMinusStrand!=rkb.kmerMinusStrand){
//						rkb.flip(b, k);
//						flipped=true;
//					}
//					boolean ok=contains(a.bases, b.bases, rka.position, rkb.position, maxSubs);
//					if(flipped){rkb.flip(b, k);}
//					return ok;
//				}
//			}
//		}
		if(rka.kmerMinusStrand!=rkb.kmerMinusStrand){
			rkb.flip(b, k);
			flipped=true;
		}
		boolean ok=contains(a.bases, b.bases, rka.position, rkb.position, maxSubs);
		if(flipped){rkb.flip(b, k);}
		return ok;
	}
	
	public static boolean contains(byte[] a, byte[] b, int posA, int posB, int maxSubs){
		if(a==null || b==null){return false;}
		assert(a.length>=b.length);
		if(a==b){return false;} //Nothing should contain itself
		int subs=0;
		
		int ai, bi;
		final int dif=posA-posB;
		if(dif>0){
			ai=0;
			bi=-dif;
		}else{
			ai=dif;
			bi=0;
		}
		
		if(allowNs){
			for(; ai<a.length && bi<b.length; ai++, bi++){
				if(ai>=0 && bi>=0){
					final byte na=a[ai], nb=b[bi];
					if(na!=nb && na!='N' && nb!='N'){
						subs++;
						if(subs>maxSubs){return false;}
					}
				}
			}
		}else{
			for(; ai<a.length && bi<b.length; ai++, bi++){
				if(ai>=0 && bi>=0 && a[ai]!=b[bi]){
					subs++;
					if(subs>maxSubs){return false;}
				}
			}
		}
		return true;
	}
	
	/*--------------------------------------------------------------*/
	
	public long splitAndErrorCorrect(){
		if(size()<Splitter.minSizeSplit){
			return errorCorrect();
		}
		long sum=0;
		ArrayList<Clump> list=Splitter.splitOnPivot(this);
		for(Clump c : list){
			sum+=c.errorCorrect();
		}
		return sum;
	}
	
	public long errorCorrect(){
		if(size()<=minCountCorrect){return 0;}
//		assert(Splitter.findBestPivot(this)<0); //TODO: Slow
		Read consen=makeSimpleConsensus();
		long sum=0;
		final int[] rvector=KillSwitch.allocInt1D(2);
		for(Read r : this){
			sum+=errorCorrect(r, consen, rvector);
		}
		return sum;
	}
	
	private int errorCorrect(Read call, Read ref, int[] rvector){

//		assert(call.validated());
//		assert(call.checkQuality());
//		assert(ref.checkQuality());
		
		final float identity=identity(call, ref.bases, rvector);
		if((identity<minIdentity && (rvector[1]>0 || rvector[0]<50)) || (identity==1 && !call.containsNocalls()/* && !ref.containsNocalls()*/)){
			//TODO: Strange, the identity==1 clause causes different behavior, though it does give a speedup.
			return 0;
		}
		final byte[] cbases=call.bases, cquals=call.quality;
		final byte[] rbases=ref.bases, rquals=ref.quality;
		
		ReadKey key=(ReadKey) call.obj;
		final int pos=key.position;
		
		final int[][] bcounts=baseCounts();
		final int[][] qcounts=qualityCounts();
		final float[][] qAvgs=qualityAverages();
		
		final int minDepth=(int)Tools.max(minCountCorrect, minSizeFractionCorrect*size());
		
		int corrections=0;
		
		final int cStart=0, rStart=maxLeft-pos, max=cbases.length;
		for(int cloc=cStart, rloc=rStart; cloc<max; cloc++, rloc++){
			//Called base, ref base
			final byte cb=cbases[cloc], rb=rbases[rloc];
			//Called quality, ref quality
			final byte cq=(cquals==null ? 20 : cquals[cloc]), rq=rquals[rloc];
			//Called number
			final byte cx=AminoAcid.baseToNumber[cb];
			//Ref number
			final byte rx=AminoAcid.baseToNumber[rb];
			
//			assert((cb=='N') == (cquals[cloc]==0));
			
			final byte b, q;
			if(cx<0){
				b=rb;
				q=(byte)Tools.min(20, rq);
			}else if(cb==rb){
				b=cb;
				q=(byte)Tools.mid(cq, cq+1, rq);//Adjust upward
				assert(q>=cq && (q<=rq || q<=cq));
			}else{
				final int cCount=bcounts[cx][rloc];
				final int rCount=bcounts[rx][rloc];
				final int altQSum=qcounts[cx][rloc];
				final int rQSum=qcounts[rx][rloc];
				final float rQAvg=qAvgs[rx][rloc];
				if(cCount<=maxCIncorrect && cq<=maxQIncorrect && cq*minQRatio<rQSum && cq*minAQRatio<8+rQAvg){
					final byte pminor=getSecondHighest(bcounts, rloc);

					assert(rx>=0 && rx<bcounts.length) : rx+", "+rloc+", "+bcounts.length+"\n"+call.toFastq()+"\n"+ref.toFastq();
					assert(rloc>=0 && rloc<bcounts[rx].length) : rx+", "+rloc+", "+bcounts[rloc].length+"\n"+call.toFastq()+"\n"+ref.toFastq();
					final int minorCount=bcounts[pminor][rloc];

					final long sum=getSumAtPosition(bcounts, rloc);
					//				final long altCount=sum-rCount;

					boolean change=false;
					if(rCount>=minCountCorrect && sum>=minDepth){
						final float ratioNeeded=Tools.min(minRatio, minRatioMult*minorCount+minRatioOffset+minRatioQMult*cq);
//						final float qratioNeeded=Tools.min(minRatio, minRatioMult*altQSum+minRatioOffset+minRatioQMult*cq); //altQSum is slightly different than minorQCount
						if(minorCount*ratioNeeded<=rCount && altQSum*ratioNeeded<=rQSum){
							change=true;
						}
						
//						else if(minorCount*ratioNeeded<=rCount){
//							assert(false) : "\n"+altQSum+", "+rQSum+", "+qratioNeeded+"\n"+cCount+", "+rCount+", "+sum+", "+ratioNeeded+"\n"+(altQSum*qratioNeeded);
//						}
					}
					if(change){
						corrections++;
						b=rb;
						q=(byte)Tools.min(cq+1, 25, rq);
						//					assert(q==25 || (q<=rq && q>=cq)) : q+", "+cq+", "+rq;
					}else{
						b=cb;
						q=(byte)Tools.mid(cq, cq-1, 6);//Adjust downward
						assert(q<=cq || q>=6) : q+","+cq;
					}
				}else{
					b=cb;
					q=cq;
				}
			}
			cbases[cloc]=b;
			if(cquals!=null){
				byte q2=(byte)Tools.mid(q, cq+maxQAdjust, cq-maxQAdjust);
				if(q2==0 && AminoAcid.isFullyDefined(b)){
					assert(!AminoAcid.isFullyDefined(cb));
					q2=(byte)Tools.mid(2, 25, (rq+25)/2);
				}else if(!AminoAcid.isFullyDefined(b)){
					q2=0;
				}
				cquals[cloc]=q2;
				assert((b=='N') == (cquals[cloc]==0)) : "b="+(char)b+", cb="+(char)cb+", rb="+(char)rb+", cx="+cx+", "
						+ "new="+cquals[cloc]+", q="+q+", old="+cq+", rq="+rq+", loc="+rloc+"\n"+call.toFastq()+"\n"+ref.toFastq();
			}
		}
		return corrections;
	}
	
	/*--------------------------------------------------------------*/
	
	//Only used by condense mode.
	public ArrayList<Read> makeConsensus(){
		if(size()==1){
			Read r=get(0);
			r.id=r.numericID+" size=1";
			return this;
		}
		
		ArrayList<Clump> clumps=Splitter.splitOnPivot(this);//TODO: Really, this should return null if there is no pivot

		ArrayList<Read> list=new ArrayList<Read>(clumps.size());
		for(Clump c : clumps){
			Read temp=c.makeSimpleConsensus();
			list.add(temp);
		}
		return list;
	}
	
	/*--------------------------------------------------------------*/
	
	private float identity(Read call, byte[] rbases, int[] rvector){
		ReadKey key=(ReadKey) call.obj;
		final int pos=key.position;
		byte[] cbases=call.bases, quals=call.quality;
		int good=0, bad=0;
		
		final int cStart=0, rStart=maxLeft-pos, max=cbases.length;
		for(int cloc=cStart, rloc=rStart; cloc<max; cloc++, rloc++){
			final byte cb=cbases[cloc], rb=rbases[rloc];
			if(AminoAcid.isFullyDefined(cb) && AminoAcid.isFullyDefined(rb)){
				if(cb==rb){good++;}
				else{bad++;}
			}
		}
		rvector[0]=good;
		rvector[1]=bad;
		return good==0 ? 0 : good/(float)(good+bad);
	}
	
	/*--------------------------------------------------------------*/
	
	long getSumAtPosition(int[][] counts, int pos){
		long sum=0;
		for(int x=0; x<4; x++){
			sum+=counts[x][pos];
		}
		return sum;
	}
	
	byte getConsensusAtPosition(int[][] counts, int pos){
		byte xMax=0;
		for(byte x=1; x<4; x++){
//			System.err.println("x="+x+", max="+max+", Checking "+counts[x][pos]+" vs "+counts[x][max]);
			if(counts[x][pos]>counts[xMax][pos]){xMax=x;}
		}
//		assert(counts[max][pos]>=counts[0][pos]);
//		assert(counts[max][pos]>=counts[1][pos]);
//		assert(counts[max][pos]>=counts[2][pos]) : max+", "+counts[max][pos]+", ["+counts[0][pos]+", "+counts[1][pos]+", "+counts[2][pos]+", "+counts[3][pos]+"]";
//		assert(counts[max][pos]>=counts[3][pos]);
		return (counts[xMax][pos]>0 ? xMax : -1);
	}
	
	byte getSecondHighest(int[][] counts, int pos){
		byte first=0;
		byte second=1;
		if(counts[first][pos]<counts[second][pos]){
			first=1; second=0;
		}
		for(byte x=2; x<4; x++){
//			System.err.println("x="+x+", max="+max+", Checking "+counts[x][pos]+" vs "+counts[x][max]);
			if(counts[x][pos]>counts[first][pos]){
				second=first;
				first=x;
			}else if(counts[x][pos]>counts[second][pos]){
				second=x;
			}
		}
//		assert(counts[max][pos]>=counts[0][pos]);
//		assert(counts[max][pos]>=counts[1][pos]);
//		assert(counts[max][pos]>=counts[2][pos]) : max+", "+counts[max][pos]+", ["+counts[0][pos]+", "+counts[1][pos]+", "+counts[2][pos]+", "+counts[3][pos]+"]";
//		assert(counts[max][pos]>=counts[3][pos]);
		
		return second; //may be actually 0.
		//return (counts[second][pos]>0 ? second : -1);
	}
	
	/*--------------------------------------------------------------*/
	
	public String toStringStaggered(){
		ByteBuilder sb=new ByteBuilder();
		for(Read r : this){
			ReadKey key=(ReadKey) r.obj;
			final int pos=key.position;
			byte[] bases=r.bases, quals=r.quality;
			int rloc=0, cloc=rloc+pos-maxLeft;
			while(cloc<0){sb.append(' '); cloc++;}
			sb.append(bases);
			sb.append('\n');
		}
		return sb.toString();
	}
	
	public Read consensusRead(){
		if(consensusRead==null){
			consensusRead=makeSimpleConsensus();
		}
		return consensusRead;
	}
	
	public int width(){
		assert(width>=0) : width;
		return width;
	}
	
	public int maxLeft(){
		assert(maxLeft>=0);
		return maxLeft;
	}
	
	public int maxRight(){
		assert(maxRight>=0);
		return maxRight;
	}
	
	/*--------------------------------------------------------------*/
	
	int[][] baseCounts(){
		if(baseCounts==null){
			baseCounts=count(false);
			int len=baseCounts[0].length;
			assert(width==-1 || width==len);
		}
		return baseCounts;
	}
	
	int[][] qualityCounts(){
		if(qualityCounts==null){
			qualityCounts=count(true);
			int len=qualityCounts[0].length;
			assert(width==-1 || width==len);
		}
		return qualityCounts;
	}
	
	float[][] qualityAverages(){
		if(qualityAverages==null){
			qualityAverages=new float[4][width];
			for(int i=0; i<4; i++){
				for(int j=0; j<width; j++){
					int b=baseCounts[i][j];
					int q=qualityCounts[i][j];
					qualityAverages[i][j]=(b==0 ? 0 : q/(float)b);
				}
			}
		}
		return qualityAverages;
	}

	void clearCounts(){
		baseCounts=qualityCounts=null;
		qualityAverages=null;
	}
	
	private void clearConsensus(){
		consensusRead=null;
	}
	
	/*--------------------------------------------------------------*/
	
	@Override
	public boolean equals(Object b){
		return this==b;
	}
	
	@Override
	public int hashCode(){
		return Long.hashCode(kmer);
	}

	@Override
	public int compareTo(Clump o) {
		int x=Long.compare(kmer, o.kmer);
		return x!=0 ? x : o.size()-size();
	}
	
	/*--------------------------------------------------------------*/
	
	public final long kmer;
	
	private int width=-1;
	private int maxLeft=-1;
	private int maxRight=-1;
	
	private int[][] baseCounts;
	private int[][] qualityCounts;
	private float[][] qualityAverages;
	
	private Read consensusRead;
	
	boolean useQuality(){return useQuality;}
	private boolean useQuality=true;
	
	boolean added=false;
	
	public static final int overhead=overhead();
	private static int overhead(){
		return 16+ //self
				16+ //Backing array
				4+ //Backing array size
				4*(8)+ //Backing array initial capacity
				1*(8)+ //kmer
				3*(4)+ //int fields
				4*(8)+ //pointers
				2*(4); //booleans
	}
	
	/*--------------------------------------------------------------*/
	
	public static boolean parseStatic(String arg, String a, String b){
		if(a.equals("mincountcorrect") || a.equals("mincc")){
			minCountCorrect=Integer.parseInt(b);
		}else if(a.equals("minsizesplit") || a.equals("minss")){
			Splitter.minSizeSplit=Integer.parseInt(b);
		}else if(a.equals("minsizefractionsplit") || a.equals("minsfs")){
			Splitter.minSizeFractionSplit=Float.parseFloat(b);
		}else if(a.equals("minsizefractioncorrect") || a.equals("minsfc")){
			minSizeFractionCorrect=Float.parseFloat(b);
		}else if(a.equals("minratio") || a.equals("minr")){
			minRatio=Float.parseFloat(b);
		}else if(a.equals("minqratio") || a.equals("minqr")){
			minQRatio=Float.parseFloat(b);
		}else if(a.equals("minaqratio") || a.equals("minaqr")){
			minAQRatio=Float.parseFloat(b);
		}else if(a.equals("minratiooffset") || a.equals("minro")){
			minRatioOffset=Float.parseFloat(b);
		}else if(a.equals("minratiomult") || a.equals("minrm")){
			minRatioMult=Float.parseFloat(b);
		}else if(a.equals("minratioqmult") || a.equals("minrqm")){
			minRatioQMult=Float.parseFloat(b);
		}else if(a.equals("minidentity") || a.equals("minid")){
			minIdentity=Float.parseFloat(b);
		}else if(a.equals("maxqadjust")){
			maxQAdjust=(byte)Integer.parseInt(b);
		}else if(a.equals("maxqi") || a.equals("maxqincorrect") || a.equals("maxqualityincorrect")){
			maxQIncorrect=Integer.parseInt(b);
			if(maxCIncorrect<0){maxQIncorrect=Integer.MAX_VALUE;}
		}else if(a.equals("maxci") || a.equals("maxcincorrect") || a.equals("maxcountincorrect")){
			maxCIncorrect=Integer.parseInt(b);
			if(maxCIncorrect<0){maxCIncorrect=Integer.MAX_VALUE;}
		}else if(a.equals("border")){
			KmerComparator.defaultBorder=Integer.parseInt(b);
		}else if(a.equals("conservative")){
			conservativeFlag=Parse.parseBoolean(b);
		}else if(a.equals("aggressive")){
			aggressiveFlag=Parse.parseBoolean(b);
		}else if(a.equals("forceprocess")){
			forceProcess=Parse.parseBoolean(b);
		}else if(a.equals("mergefirst")){
			KmerComparator.mergeFirst=Parse.parseBoolean(b);
		}else if(a.equals("findcorrelations")){
			Splitter.FIND_CORRELATIONS=Parse.parseBoolean(b);
		}else if(a.equals("maxcorrelations")){
			Splitter.MAX_CORRELATIONS=Integer.parseInt(b);
		}
		
		else if(a.equals("sortx")){
			sortX=Parse.parseBoolean(b);
		}else if(a.equals("sorty")){
			sortY=Parse.parseBoolean(b);
		}else if(a.equals("sortxy")){
			sortX=sortY=Parse.parseBoolean(b);
		}else if(a.equals("forcesortxy") || a.equals("forcexy")){
			forceSortXY=Parse.parseBoolean(b);
		}else if(a.equals("sortxysize") || a.equals("xysize")){
			sortXYSize=Integer.parseInt(b);
		}
		
		else if(a.equals("removeallduplicates") || a.equals("allduplicates")){
			markAll=Parse.parseBoolean(b);
		}else if(a.equals("addcount") || a.equals("renamebycount")){
			renameByCount=Parse.parseBoolean(b);
		}else if(a.equals("optical") || a.equals("opticalonly")){
			opticalOnly=Parse.parseBoolean(b);
		}else if(a.equals("dupesubs") || a.equals("duplicatesubs") || a.equals("dsubs") || a.equals("subs") || a.equals("s")){
			maxSubstitutions=Integer.parseInt(b);
		}else if(a.equals("dupedist") || a.equals("duplicatedistance") || a.equals("ddist") || a.equals("dist") || a.equals("opticaldist") || a.equals("distance")){
			maxOpticalDistance=Float.parseFloat(b);
			opticalOnly=maxOpticalDistance>=0;
		}else if(a.equals("scanlimit") || a.equals("scan")){
			scanLimit=Integer.parseInt(b);
		}else if(a.equals("allowns")){
			allowNs=Parse.parseBoolean(b);
		}else if(a.equals("containment") || a.equals("absorbcontainment") || a.equals("ac") || a.equals("contains")){
			containment=Parse.parseBoolean(b);
		}else if(a.equalsIgnoreCase("prefixOrSuffix") || a.equalsIgnoreCase("suffixOrPrefix") || a.equals("affix") || a.equals("pos")){
			affix=Parse.parseBoolean(b);
		}else if(a.equals("printduplicates")){
			printDuplicates=Parse.parseBoolean(b);
		}else if(a.equals("dupeidentity")){
			float dupeIdentity=Float.parseFloat(b);
			if(dupeIdentity>1){dupeIdentity=dupeIdentity/100;}
			assert(dupeIdentity<=1);
			dupeSubRate=1-dupeIdentity;
		}else if(a.equals("dupesubrate") || a.equals("dsr") || a.equals("subrate")){
			dupeSubRate=Float.parseFloat(b);
			if(dupeSubRate>1){dupeSubRate=dupeSubRate/100;}
			assert(dupeSubRate<=1);
		}
		
		else if(a.equals("minsizesplit")){
			Splitter.minSizeSplit=Integer.parseInt(b);
		}else if(a.equals("minsizefractionsplit")){
			Splitter.minSizeFractionSplit=Float.parseFloat(b);
		}else{
			return false;
		}
		
		return true;
	}
	
	static void setConservative(boolean newState){
		
		if(aggressiveFlag){return;}
		if(newState==conservativeMode){return;}
		conservativeMode=newState;
		
		Splitter.conservative=conservativeMode;
		
		if(conservativeMode){
			minCountCorrect++;
			minSizeFractionCorrect*=1.5f;
			minRatio*=1.25f;
			minQRatio*=1.5f;
			minAQRatio*=1.4f;
			minRatioOffset*=1.25f;
			minRatioQMult*=1.50f;
			minRatioMult*=1.4f;
			minIdentity=1-((1-minIdentity)*0.7f);
			if(maxQIncorrect==Integer.MAX_VALUE){maxQIncorrect=35;}
		}else{
			minCountCorrect--;
			minSizeFractionCorrect/=1.5f;
			minRatio/=1.25f;
			minQRatio/=1.5f;
			minAQRatio/=1.4f;
			minRatioOffset/=1.25f;
			minRatioQMult/=1.50f;
			minRatioMult/=1.4f;
			minIdentity=1-((1-minIdentity)/0.7f);
			if(maxQIncorrect==35){maxQIncorrect=Integer.MAX_VALUE;}
		}
	}
	
	/*--------------------------------------------------------------*/

	public static void setXY() {
		if(ReadKey.spanTilesX){sortX=true;}
		if(ReadKey.spanTilesY){sortY=true;}
	}

	static boolean allowNs=true;
	static boolean markAll=false;
	static boolean markOnly=false;
	static boolean opticalOnly=false;
	static boolean containment=false;
	static boolean affix=false;
	static boolean printDuplicates=false; //For debugging
	
	private static boolean renameByCount=false;
	private static int scanLimit=5;
	private static int maxSubstitutions=2;
	private static float dupeSubRate=0;
	private static float maxOpticalDistance=40;
	
	static boolean forceProcess=false;
	static boolean conservativeFlag=false;
	static boolean aggressiveFlag=false;
	static boolean conservativeMode=false;
	static boolean renameConsensus=false;
	static int minCountCorrect=4; //mcc=4 was slightly better than 3
	static float minSizeFractionCorrect=0.20f; //0.11 is very slightly better than 0.14
	static float minRatio=30.0f;
	static float minQRatio=2.8f; //Does nothing?
	static float minAQRatio=0.7f;
	static float minRatioOffset=1.9f; //3 is far worse than 2; 1 is better
	static float minRatioQMult=0.08f;
	static float minRatioMult=1.80f; //2.5 is far worse than 2; 1.5 is better
	static float minIdentity=0.97f; //0.98 is slightly better than 0.96; 0.94 is substantially worse
	static byte maxQAdjust=0;
	static int maxQIncorrect=Integer.MAX_VALUE;
	static int maxCIncorrect=Integer.MAX_VALUE;
	
	static boolean sortX=false; //Not needed for NextSeq
	static boolean sortY=true;
	static boolean forceSortXY=false; //Mainly for testing
	static int sortXYSize=6;
	
	/** May slightly increase speed for optical dedupe.  Can be safely disabled. */
	static boolean sortYEarly(){return sortY && (forceSortXY || opticalOnly);}
	
//	private static final boolean countQuality=false;
	public static int k=31;
	private static final long serialVersionUID = 1L;
	
}