comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.io.Serializable;
4 import java.util.Locale;
5 import java.util.concurrent.atomic.AtomicInteger;
6 import java.util.concurrent.atomic.AtomicIntegerArray;
7
8 import dna.AminoAcid;
9 import shared.Shared;
10 import shared.Tools;
11 import structures.ByteBuilder;
12
13 /**
14 * @author Brian Bushnell
15 * @date Jul 5, 2012
16 */
17 public abstract class KCountArray implements Serializable {
18
19 /**
20 *
21 */
22 private static final long serialVersionUID = 1590374813059942002L;
23
24 public static KCountArray makeNew(long cells_, int cbits_){
25 return makeNew(cells_, cbits_, 1);
26 }
27
28 public static KCountArray makeNew(long cells_, int cbits_, int hashes_){
29 return makeNew(cells_, cbits_, hashes_, null, 0);
30 }
31
32 //TODO: Get rid of keys_ arg.
33 public static KCountArray makeNew(long cells_, int cbits_, int hashes_, KCountArray prefilter, int prefilterLimit_){
34 KCountArray kca=new KCountArray7MTA(cells_, cbits_, hashes_, prefilter, prefilterLimit_);
35 kca.initialize();
36 return kca;
37 }
38
39 protected KCountArray(final long cells_, int cbits_){
40 assert(cbits_<=32);
41 assert(Integer.bitCount(cbits_)==1);
42 assert(Long.bitCount(cells_)==1) || this.getClass()==KCountArray7MT.class : this.getClass();
43
44 numArrays=64;
45 arrayBits=31-Integer.numberOfLeadingZeros(numArrays);
46 arrayMask=numArrays-1;
47
48 while(cbits_*cells_<32*numArrays){
49 assert(false) : cells_+", "+cbits_+", "+numArrays+", "+(cbits_*cells_)+"<"+(32*numArrays);
50 cbits_*=2;
51 } //Increases bits per cell so that at minimum each array is size 1
52
53 assert(cbits_<=32);
54
55 cells=cells_;
56 cellBits=cbits_;
57 valueMask=(cellBits==32 ? Integer.MAX_VALUE : ~((-1)<<cellBits));
58 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31)));
59 cellsPerWord=32/cellBits;
60 indexShift=Integer.numberOfTrailingZeros(cellsPerWord);
61 cellMask=cellsPerWord-1;
62
63 if(verbose){
64 System.out.println(description());
65 }
66 }
67
68 protected KCountArray(final long cells_, int cbits_, int arrays_){
69 assert(cbits_<=32);
70 assert(Integer.bitCount(cbits_)==1);
71 assert(Long.bitCount(cells_)==1) || this.getClass()==KCountArray7MT.class || this.getClass()==KCountArray7MTA.class || this.getClass()==KCountArray8MT.class;
72
73 numArrays=arrays_;
74 assert(Integer.bitCount(numArrays)==1) : numArrays+", "+cells_+", "+cbits_;
75 arrayBits=31-Integer.numberOfLeadingZeros(numArrays);
76 arrayMask=numArrays-1;
77
78 while(cbits_*cells_<32*numArrays){
79 assert(false) : cells_+", "+cbits_+", "+numArrays+", "+(cbits_*cells_)+"<"+(32*numArrays);
80 cbits_*=2;
81 } //Increases bits per cell so that at minimum each array is size 1
82
83 assert(cbits_<=32) : "Why?";
84
85 cells=cells_;
86 cellBits=cbits_;
87 valueMask=(cellBits==32 ? Integer.MAX_VALUE : ~((-1)<<cellBits));
88 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31)));
89 cellsPerWord=32/cellBits;
90 indexShift=Integer.numberOfTrailingZeros(cellsPerWord);
91 cellMask=cellsPerWord-1;
92
93 if(verbose){
94 System.out.println(description());
95 }
96 }
97
98 public abstract int read(long key);
99 public int read(long keys[]){throw new RuntimeException("Unimplemented.");}
100 public final int read(long key, int k, boolean makeCanonical){return read(makeCanonical ? makeCanonical2(key, k) : key);}
101
102 public abstract void write(long key, int value);
103
104 //TODO: Consider adding a boolean for return old value.
105 public final void increment(long key){increment(key, 1);}
106 public final void decrement(long key){decrement(key, 1);}
107
108 /** Returns nothing for simplicity. */
109 public abstract void increment(long key, int incr);
110
111 /** Returns unincremented value */
112 public abstract int incrementAndReturnUnincremented(long key, int incr);
113
114 // /** Returns unincremented value */
115 // public final int incrementAndReturnUnincremented(Kmer kmer, int incr){
116 // return incrementAndReturnUnincremented(kmer.xor(), incr);
117 // }
118
119 //For long kmers.
120 public int incrementAndReturnUnincremented(long[] keys, int incr){
121 throw new RuntimeException("Unimplemented.");
122 }
123
124 /** Optional method. */
125 public void decrement(long key, int incr){
126 throw new RuntimeException("This class "+getClass().getName()+" does not support decrement.");
127 }
128
129 public final int readPrecise(long key, int k, boolean makeCanonical){
130 assert(k<=32);
131 int b=read(makeCanonical ? makeCanonical2(key, k) : key);
132 if(b<1){return b;}
133 int a=readLeft(key, k, makeCanonical);
134 if(a>=b){return b;}
135 int c=readRight(key, k, makeCanonical);
136 if(c>=b){return b;}
137 return (int)(((long)a+(long)c)/2);
138 // return max(a, c);
139 // int mid=Tools.min(a, b, c);
140 // System.out.println("a="+a+", b="+b+", c="+c+" -> "+mid);
141 // return mid;
142 }
143
144 public final int readPreciseMin(long key, int k, boolean makeCanonical){
145 assert(k<=32);
146 int b=read(makeCanonical ? makeCanonical2(key, k) : key);
147 if(b<1){return b;}
148 int a=readLeft(key, k, makeCanonical);
149 if(a<1){return a;}
150 int c=readRight(key, k, makeCanonical);
151 return Tools.min(a, b, c);
152 }
153
154 /**
155 * @param key Kmer to evaluate
156 * @return Sum of counts of all 4 possible left-adjacent kmers
157 */
158 public int readLeft(long key, int k, boolean makeCanonical){throw new RuntimeException("Unsupported.");}
159 /**
160 * @param key Kmer to evaluate
161 * @return Sum of counts of all 4 possible right-adjacent kmers
162 */
163 public int readRight(long key, int k, boolean makeCanonical){throw new RuntimeException("Unsupported.");}
164 /**
165 * @param key Kmer to evaluate
166 * @return Array of counts of all 4 possible left-adjacent kmers
167 */
168 public int[] readAllLeft(final long key, final int k, boolean makeCanonical, int[] rvec){throw new RuntimeException("Unsupported.");}
169 /**
170 * @param key Kmer to evaluate
171 * @return Array of counts of all 4 possible right-adjacent kmers
172 */
173 public int[] readAllRight(final long key, final int k, boolean makeCanonical, int[] rvec){throw new RuntimeException("Unsupported.");}
174
175 public void increment(long[] keys){
176 synchronized(this){
177 for(long key : keys){
178 increment(key);
179 }
180 }
181 }
182
183 public abstract long[] transformToFrequency();
184 public final long[] transformToFrequency(int[][] matrix){
185 long[] freq=new long[100000];
186 int maxFreq=freq.length-1;
187
188 if(cellBits!=32){
189 assert(cellBits>0);
190 for(int[] array : matrix){
191 for(int i=0; i<array.length; i++){
192 int word=array[i];
193 int j=cellsPerWord;
194 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
195 for(; word!=0; j--){
196 int x=word&valueMask;
197 int x2=(int)min(x, maxFreq);
198 freq[x2]++;
199 word=(word>>>cellBits);
200 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
201 }
202 freq[0]+=j;
203 }
204 }
205 }else{
206 for(int[] array : matrix){
207 for(int i=0; i<array.length; i++){
208 int word=array[i];
209 int x2=(int)min(word, maxFreq);
210 freq[x2]++;
211 }
212 }
213 }
214 return freq;
215 }
216
217 public final long[] transformToFrequency(AtomicIntegerArray[] matrix){
218 long[] freq=new long[100000];
219 int maxFreq=freq.length-1;
220
221 if(cellBits!=32){
222 assert(cellBits>0);
223 for(AtomicIntegerArray array : matrix){
224 for(int i=0; i<array.length(); i++){
225 int word=array.get(i);
226 int j=cellsPerWord;
227 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
228 for(; word!=0; j--){
229 int x=word&valueMask;
230 int x2=(int)min(x, maxFreq);
231 freq[x2]++;
232 word=(word>>>cellBits);
233 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
234 }
235 freq[0]+=j;
236 }
237 }
238 }else{
239 for(AtomicIntegerArray array : matrix){
240 for(int i=0; i<array.length(); i++){
241 int word=array.get(i);
242 int x2=(int)min(word, maxFreq);
243 freq[x2]++;
244 }
245 }
246 }
247 return freq;
248 }
249
250 public final ByteBuilder description(){
251 ByteBuilder sb=new ByteBuilder();
252 long words=cells/cellsPerWord;
253 int wordsPerArray=(int)(words/numArrays);
254 sb.append("cells: \t"+cells).append('\n');
255 sb.append("cellBits:\t"+cellBits).append('\n');
256 sb.append("valueMask:\t"+Long.toHexString(valueMask)).append('\n');
257 sb.append("maxValue:\t"+maxValue).append('\n');
258 sb.append("cellsPerWord:\t"+cellsPerWord).append('\n');
259 sb.append("indexShift:\t"+indexShift).append('\n');
260 sb.append("words: \t"+words).append('\n');
261 sb.append("wordsPerArray:\t"+wordsPerArray).append('\n');
262 sb.append("numArrays:\t"+numArrays).append('\n');
263 sb.append("Memory: \t"+mem()).append('\n');
264 sb.append("Usage: \t"+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100));
265 return sb;
266 }
267
268 public final String toShortString(){
269 return "mem = "+mem()+" \tcells = "+toKMG(cells)+" \tused = "+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100);
270 }
271
272 public final String toShortString(int hashes){
273 return ("hashes = "+hashes+" \t ")+
274 "mem = "+mem()+" \tcells = "+toKMG(cells)+" \tused = "+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100);
275 }
276
277 @Override
278 public final String toString(){
279 return description().toString();
280 }
281
282 public abstract CharSequence toContentsString();
283
284 public abstract double usedFraction();
285
286 public abstract double usedFraction(int mindepth);
287
288 public abstract long cellsUsed(int mindepth);
289
290 public final double estimateUniqueKmers(int hashes){
291 double f=usedFraction();
292 double f2=(1-Math.pow(1-f, 1.0/hashes));
293 double n=(-cells)*Math.log(1-f2);
294 return n;
295 }
296
297 public final double estimateUniqueKmers(int hashes, int mindepth){
298 // assert(false) : this.getClass().getName();
299 double f=usedFraction(mindepth);
300 double f2=(1-Math.pow(1-f, 1.0/hashes));
301 double n=(-cells)*Math.log(1-f2);
302 return n;
303 }
304
305 public final double estimateUniqueKmersFromUsedFraction(int hashes, double usedFraction){
306 double f=usedFraction;
307 double f2=(1-Math.pow(1-f, 1.0/hashes));
308 double n=(-cells)*Math.log(1-f2);
309 return n;
310 }
311
312 public final String mem(){
313 long mem=(cells*cellBits)/8;
314 if(mem<(1<<20)){
315 return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10)));
316 }else if(mem<(1<<30)){
317 return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
318 }else{
319 return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
320 }
321 }
322
323 public static String toKMG(long x){
324 double div=1;
325 String ext="";
326 if(x>10000000000L){
327 div=1000000000L;
328 ext="B";
329 }else if(x>10000000){
330 div=1000000;
331 ext="M";
332 }else if(x>100000){
333 div=1000;
334 ext="K";
335 }
336 return String.format(Locale.ROOT, "%.2f", x/div)+ext;
337 }
338
339 static final AtomicIntegerArray[] allocMatrix(final int numArrays, final int wordsPerArray){
340 final AtomicIntegerArray[] matrix=new AtomicIntegerArray[numArrays];
341 final AllocThread[] array=new AllocThread[Tools.min(Tools.max(Shared.threads()/2, 1), numArrays)];
342 final AtomicInteger next=new AtomicInteger(0);
343 for(int i=0; i<array.length; i++){
344 array[i]=new AllocThread(matrix, next, wordsPerArray);
345 }
346 for(int i=0; i<array.length; i++){array[i].start();}
347 for(AllocThread at : array){
348 while(at.getState()!=Thread.State.TERMINATED){
349 try {
350 at.join();
351 } catch (InterruptedException e) {
352 // TODO Auto-generated catch block
353 e.printStackTrace();
354 }
355 }
356 }
357 return matrix;
358 }
359
360 private static class AllocThread extends Thread{
361
362 AllocThread(AtomicIntegerArray[] matrix_, AtomicInteger next_, int wordsPerArray_){
363 matrix=matrix_;
364 next=next_;
365 wordsPerArray=wordsPerArray_;
366 }
367
368 @Override
369 public void run(){
370 int x=next.getAndIncrement();
371 while(x<matrix.length){
372 matrix[x]=new AtomicIntegerArray(wordsPerArray);
373 x=next.getAndIncrement();
374 }
375 }
376
377 private final AtomicIntegerArray[] matrix;
378 private final AtomicInteger next;
379 private final int wordsPerArray;
380
381 }
382
383
384 // long hash(long x, int y){throw new RuntimeException("Not supported.");}
385 abstract long hash(long x, int y);
386
387 public static final int min(int x, int y){return x<y ? x : y;}
388 public static final int max(int x, int y){return x>y ? x : y;}
389 public static final long min(long x, long y){return x<y ? x : y;}
390 public static final long max(long x, long y){return x>y ? x : y;}
391
392 /** Any necessary initialization. */
393 public void initialize(){}
394
395 /** Any necessary shutdown steps. */
396 public void shutdown(){}
397
398 public final long cells;
399 public final int cellBits;
400 /** Originally this was different than valueMask in the case that valueMask was negative, but now they are the same. */
401 public final int maxValue;
402
403 protected final int cellsPerWord;
404 protected final int indexShift;
405 protected final int cellMask;
406 protected final int valueMask;
407
408 protected static int minArrays=calcMinArrays();
409 protected final int arrayBits;
410 protected final int numArrays;
411 protected final int arrayMask;
412
413 public static boolean verbose=false;
414
415 private static final int calcMinArrays(){
416 int x=Tools.max(Shared.threads(), 2);
417 while(Integer.bitCount(x)!=1){x++;}
418 return x;
419 }
420
421 public static final boolean isCanonical(long key, int k){
422 assert(k>3 && k<=32);
423 long b=AminoAcid.reverseComplementBinaryFast(key, k);
424 return key>=b;
425 }
426
427 /** Assumes that the key is not canonical */
428 public static final long makeCanonical(final long key, final int k){
429 assert(k>3 && k<=32);
430 // assert(!isCanonical(key, k));
431 final long r=AminoAcid.reverseComplementBinaryFast(key, k);
432 assert(r>=key);
433 // assert(isCanonical(r, k));
434 // assert(AminoAcid.reverseComplementBinaryFast(r, k)==key);
435 return r;
436 }
437
438
439 public static final long makeCanonical2(final long key, final int k){
440 assert(k>3 && k<=32);
441 if(isCanonical(key, k)){return key;}
442 long r=AminoAcid.reverseComplementBinaryFast(key, k);
443 // assert(isCanonical(r, k)) : k+"\n"+Long.toBinaryString(key)+"\n"+Long.toBinaryString(r)+"\n"+Long.toBinaryString(AminoAcid.reverseComplementBinaryFast(r, k));
444 // assert(AminoAcid.reverseComplementBinaryFast(r, k)==key) : k+"\n"+Long.toBinaryString(key)+"\n"+Long.toBinaryString(r)+"\n"+Long.toBinaryString(AminoAcid.reverseComplementBinaryFast(r, k));
445 return r;
446 }
447
448 public KCountArray prefilter(){
449 throw new RuntimeException("TODO: Override");
450 }
451
452 public void purgeFilter(){
453 throw new RuntimeException("TODO: Override");
454 }
455
456 /** Increases accuracy of overloaded multi-bit tables */
457 public static boolean LOCKED_INCREMENT=false;
458 public static boolean SET_LOCKED_INCREMENT=false;
459
460 }