Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,460 @@ +package bloom; + +import java.io.Serializable; +import java.util.Locale; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicIntegerArray; + +import dna.AminoAcid; +import shared.Shared; +import shared.Tools; +import structures.ByteBuilder; + +/** + * @author Brian Bushnell + * @date Jul 5, 2012 + */ +public abstract class KCountArray implements Serializable { + + /** + * + */ + private static final long serialVersionUID = 1590374813059942002L; + + public static KCountArray makeNew(long cells_, int cbits_){ + return makeNew(cells_, cbits_, 1); + } + + public static KCountArray makeNew(long cells_, int cbits_, int hashes_){ + return makeNew(cells_, cbits_, hashes_, null, 0); + } + + //TODO: Get rid of keys_ arg. + public static KCountArray makeNew(long cells_, int cbits_, int hashes_, KCountArray prefilter, int prefilterLimit_){ + KCountArray kca=new KCountArray7MTA(cells_, cbits_, hashes_, prefilter, prefilterLimit_); + kca.initialize(); + return kca; + } + + protected KCountArray(final long cells_, int cbits_){ + assert(cbits_<=32); + assert(Integer.bitCount(cbits_)==1); + assert(Long.bitCount(cells_)==1) || this.getClass()==KCountArray7MT.class : this.getClass(); + + numArrays=64; + arrayBits=31-Integer.numberOfLeadingZeros(numArrays); + arrayMask=numArrays-1; + + while(cbits_*cells_<32*numArrays){ + assert(false) : cells_+", "+cbits_+", "+numArrays+", "+(cbits_*cells_)+"<"+(32*numArrays); + cbits_*=2; + } //Increases bits per cell so that at minimum each array is size 1 + + assert(cbits_<=32); + + cells=cells_; + cellBits=cbits_; + valueMask=(cellBits==32 ? Integer.MAX_VALUE : ~((-1)<<cellBits)); + maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31))); + cellsPerWord=32/cellBits; + indexShift=Integer.numberOfTrailingZeros(cellsPerWord); + cellMask=cellsPerWord-1; + + if(verbose){ + System.out.println(description()); + } + } + + protected KCountArray(final long cells_, int cbits_, int arrays_){ + assert(cbits_<=32); + assert(Integer.bitCount(cbits_)==1); + assert(Long.bitCount(cells_)==1) || this.getClass()==KCountArray7MT.class || this.getClass()==KCountArray7MTA.class || this.getClass()==KCountArray8MT.class; + + numArrays=arrays_; + assert(Integer.bitCount(numArrays)==1) : numArrays+", "+cells_+", "+cbits_; + arrayBits=31-Integer.numberOfLeadingZeros(numArrays); + arrayMask=numArrays-1; + + while(cbits_*cells_<32*numArrays){ + assert(false) : cells_+", "+cbits_+", "+numArrays+", "+(cbits_*cells_)+"<"+(32*numArrays); + cbits_*=2; + } //Increases bits per cell so that at minimum each array is size 1 + + assert(cbits_<=32) : "Why?"; + + cells=cells_; + cellBits=cbits_; + valueMask=(cellBits==32 ? Integer.MAX_VALUE : ~((-1)<<cellBits)); + maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31))); + cellsPerWord=32/cellBits; + indexShift=Integer.numberOfTrailingZeros(cellsPerWord); + cellMask=cellsPerWord-1; + + if(verbose){ + System.out.println(description()); + } + } + + public abstract int read(long key); + public int read(long keys[]){throw new RuntimeException("Unimplemented.");} + public final int read(long key, int k, boolean makeCanonical){return read(makeCanonical ? makeCanonical2(key, k) : key);} + + public abstract void write(long key, int value); + + //TODO: Consider adding a boolean for return old value. + public final void increment(long key){increment(key, 1);} + public final void decrement(long key){decrement(key, 1);} + + /** Returns nothing for simplicity. */ + public abstract void increment(long key, int incr); + + /** Returns unincremented value */ + public abstract int incrementAndReturnUnincremented(long key, int incr); + +// /** Returns unincremented value */ +// public final int incrementAndReturnUnincremented(Kmer kmer, int incr){ +// return incrementAndReturnUnincremented(kmer.xor(), incr); +// } + + //For long kmers. + public int incrementAndReturnUnincremented(long[] keys, int incr){ + throw new RuntimeException("Unimplemented."); + } + + /** Optional method. */ + public void decrement(long key, int incr){ + throw new RuntimeException("This class "+getClass().getName()+" does not support decrement."); + } + + public final int readPrecise(long key, int k, boolean makeCanonical){ + assert(k<=32); + int b=read(makeCanonical ? makeCanonical2(key, k) : key); + if(b<1){return b;} + int a=readLeft(key, k, makeCanonical); + if(a>=b){return b;} + int c=readRight(key, k, makeCanonical); + if(c>=b){return b;} + return (int)(((long)a+(long)c)/2); +// return max(a, c); +// int mid=Tools.min(a, b, c); +// System.out.println("a="+a+", b="+b+", c="+c+" -> "+mid); +// return mid; + } + + public final int readPreciseMin(long key, int k, boolean makeCanonical){ + assert(k<=32); + int b=read(makeCanonical ? makeCanonical2(key, k) : key); + if(b<1){return b;} + int a=readLeft(key, k, makeCanonical); + if(a<1){return a;} + int c=readRight(key, k, makeCanonical); + return Tools.min(a, b, c); + } + + /** + * @param key Kmer to evaluate + * @return Sum of counts of all 4 possible left-adjacent kmers + */ + public int readLeft(long key, int k, boolean makeCanonical){throw new RuntimeException("Unsupported.");} + /** + * @param key Kmer to evaluate + * @return Sum of counts of all 4 possible right-adjacent kmers + */ + public int readRight(long key, int k, boolean makeCanonical){throw new RuntimeException("Unsupported.");} + /** + * @param key Kmer to evaluate + * @return Array of counts of all 4 possible left-adjacent kmers + */ + public int[] readAllLeft(final long key, final int k, boolean makeCanonical, int[] rvec){throw new RuntimeException("Unsupported.");} + /** + * @param key Kmer to evaluate + * @return Array of counts of all 4 possible right-adjacent kmers + */ + public int[] readAllRight(final long key, final int k, boolean makeCanonical, int[] rvec){throw new RuntimeException("Unsupported.");} + + public void increment(long[] keys){ + synchronized(this){ + for(long key : keys){ + increment(key); + } + } + } + + public abstract long[] transformToFrequency(); + public final long[] transformToFrequency(int[][] matrix){ + long[] freq=new long[100000]; + int maxFreq=freq.length-1; + + if(cellBits!=32){ + assert(cellBits>0); + for(int[] array : matrix){ + for(int i=0; i<array.length; i++){ + int word=array[i]; + int j=cellsPerWord; + // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); + for(; word!=0; j--){ + int x=word&valueMask; + int x2=(int)min(x, maxFreq); + freq[x2]++; + word=(word>>>cellBits); + // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); + } + freq[0]+=j; + } + } + }else{ + for(int[] array : matrix){ + for(int i=0; i<array.length; i++){ + int word=array[i]; + int x2=(int)min(word, maxFreq); + freq[x2]++; + } + } + } + return freq; + } + + public final long[] transformToFrequency(AtomicIntegerArray[] matrix){ + long[] freq=new long[100000]; + int maxFreq=freq.length-1; + + if(cellBits!=32){ + assert(cellBits>0); + for(AtomicIntegerArray array : matrix){ + for(int i=0; i<array.length(); i++){ + int word=array.get(i); + int j=cellsPerWord; + // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); + for(; word!=0; j--){ + int x=word&valueMask; + int x2=(int)min(x, maxFreq); + freq[x2]++; + word=(word>>>cellBits); + // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); + } + freq[0]+=j; + } + } + }else{ + for(AtomicIntegerArray array : matrix){ + for(int i=0; i<array.length(); i++){ + int word=array.get(i); + int x2=(int)min(word, maxFreq); + freq[x2]++; + } + } + } + return freq; + } + + public final ByteBuilder description(){ + ByteBuilder sb=new ByteBuilder(); + long words=cells/cellsPerWord; + int wordsPerArray=(int)(words/numArrays); + sb.append("cells: \t"+cells).append('\n'); + sb.append("cellBits:\t"+cellBits).append('\n'); + sb.append("valueMask:\t"+Long.toHexString(valueMask)).append('\n'); + sb.append("maxValue:\t"+maxValue).append('\n'); + sb.append("cellsPerWord:\t"+cellsPerWord).append('\n'); + sb.append("indexShift:\t"+indexShift).append('\n'); + sb.append("words: \t"+words).append('\n'); + sb.append("wordsPerArray:\t"+wordsPerArray).append('\n'); + sb.append("numArrays:\t"+numArrays).append('\n'); + sb.append("Memory: \t"+mem()).append('\n'); + sb.append("Usage: \t"+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100)); + return sb; + } + + public final String toShortString(){ + return "mem = "+mem()+" \tcells = "+toKMG(cells)+" \tused = "+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100); + } + + public final String toShortString(int hashes){ + return ("hashes = "+hashes+" \t ")+ + "mem = "+mem()+" \tcells = "+toKMG(cells)+" \tused = "+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100); + } + + @Override + public final String toString(){ + return description().toString(); + } + + public abstract CharSequence toContentsString(); + + public abstract double usedFraction(); + + public abstract double usedFraction(int mindepth); + + public abstract long cellsUsed(int mindepth); + + public final double estimateUniqueKmers(int hashes){ + double f=usedFraction(); + double f2=(1-Math.pow(1-f, 1.0/hashes)); + double n=(-cells)*Math.log(1-f2); + return n; + } + + public final double estimateUniqueKmers(int hashes, int mindepth){ +// assert(false) : this.getClass().getName(); + double f=usedFraction(mindepth); + double f2=(1-Math.pow(1-f, 1.0/hashes)); + double n=(-cells)*Math.log(1-f2); + return n; + } + + public final double estimateUniqueKmersFromUsedFraction(int hashes, double usedFraction){ + double f=usedFraction; + double f2=(1-Math.pow(1-f, 1.0/hashes)); + double n=(-cells)*Math.log(1-f2); + return n; + } + + public final String mem(){ + long mem=(cells*cellBits)/8; + if(mem<(1<<20)){ + return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10))); + }else if(mem<(1<<30)){ + return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20))); + }else{ + return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30))); + } + } + + public static String toKMG(long x){ + double div=1; + String ext=""; + if(x>10000000000L){ + div=1000000000L; + ext="B"; + }else if(x>10000000){ + div=1000000; + ext="M"; + }else if(x>100000){ + div=1000; + ext="K"; + } + return String.format(Locale.ROOT, "%.2f", x/div)+ext; + } + + static final AtomicIntegerArray[] allocMatrix(final int numArrays, final int wordsPerArray){ + final AtomicIntegerArray[] matrix=new AtomicIntegerArray[numArrays]; + final AllocThread[] array=new AllocThread[Tools.min(Tools.max(Shared.threads()/2, 1), numArrays)]; + final AtomicInteger next=new AtomicInteger(0); + for(int i=0; i<array.length; i++){ + array[i]=new AllocThread(matrix, next, wordsPerArray); + } + for(int i=0; i<array.length; i++){array[i].start();} + for(AllocThread at : array){ + while(at.getState()!=Thread.State.TERMINATED){ + try { + at.join(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + return matrix; + } + + private static class AllocThread extends Thread{ + + AllocThread(AtomicIntegerArray[] matrix_, AtomicInteger next_, int wordsPerArray_){ + matrix=matrix_; + next=next_; + wordsPerArray=wordsPerArray_; + } + + @Override + public void run(){ + int x=next.getAndIncrement(); + while(x<matrix.length){ + matrix[x]=new AtomicIntegerArray(wordsPerArray); + x=next.getAndIncrement(); + } + } + + private final AtomicIntegerArray[] matrix; + private final AtomicInteger next; + private final int wordsPerArray; + + } + + +// long hash(long x, int y){throw new RuntimeException("Not supported.");} + abstract long hash(long x, int y); + + public static final int min(int x, int y){return x<y ? x : y;} + public static final int max(int x, int y){return x>y ? x : y;} + public static final long min(long x, long y){return x<y ? x : y;} + public static final long max(long x, long y){return x>y ? x : y;} + + /** Any necessary initialization. */ + public void initialize(){} + + /** Any necessary shutdown steps. */ + public void shutdown(){} + + public final long cells; + public final int cellBits; + /** Originally this was different than valueMask in the case that valueMask was negative, but now they are the same. */ + public final int maxValue; + + protected final int cellsPerWord; + protected final int indexShift; + protected final int cellMask; + protected final int valueMask; + + protected static int minArrays=calcMinArrays(); + protected final int arrayBits; + protected final int numArrays; + protected final int arrayMask; + + public static boolean verbose=false; + + private static final int calcMinArrays(){ + int x=Tools.max(Shared.threads(), 2); + while(Integer.bitCount(x)!=1){x++;} + return x; + } + + public static final boolean isCanonical(long key, int k){ + assert(k>3 && k<=32); + long b=AminoAcid.reverseComplementBinaryFast(key, k); + return key>=b; + } + + /** Assumes that the key is not canonical */ + public static final long makeCanonical(final long key, final int k){ + assert(k>3 && k<=32); +// assert(!isCanonical(key, k)); + final long r=AminoAcid.reverseComplementBinaryFast(key, k); + assert(r>=key); +// assert(isCanonical(r, k)); +// assert(AminoAcid.reverseComplementBinaryFast(r, k)==key); + return r; + } + + + public static final long makeCanonical2(final long key, final int k){ + assert(k>3 && k<=32); + if(isCanonical(key, k)){return key;} + long r=AminoAcid.reverseComplementBinaryFast(key, k); +// assert(isCanonical(r, k)) : k+"\n"+Long.toBinaryString(key)+"\n"+Long.toBinaryString(r)+"\n"+Long.toBinaryString(AminoAcid.reverseComplementBinaryFast(r, k)); +// assert(AminoAcid.reverseComplementBinaryFast(r, k)==key) : k+"\n"+Long.toBinaryString(key)+"\n"+Long.toBinaryString(r)+"\n"+Long.toBinaryString(AminoAcid.reverseComplementBinaryFast(r, k)); + return r; + } + + public KCountArray prefilter(){ + throw new RuntimeException("TODO: Override"); + } + + public void purgeFilter(){ + throw new RuntimeException("TODO: Override"); + } + + /** Increases accuracy of overloaded multi-bit tables */ + public static boolean LOCKED_INCREMENT=false; + public static boolean SET_LOCKED_INCREMENT=false; + +}