Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.io.Serializable; | |
4 import java.util.Locale; | |
5 import java.util.concurrent.atomic.AtomicInteger; | |
6 import java.util.concurrent.atomic.AtomicIntegerArray; | |
7 | |
8 import dna.AminoAcid; | |
9 import shared.Shared; | |
10 import shared.Tools; | |
11 import structures.ByteBuilder; | |
12 | |
13 /** | |
14 * @author Brian Bushnell | |
15 * @date Jul 5, 2012 | |
16 */ | |
17 public abstract class KCountArray implements Serializable { | |
18 | |
19 /** | |
20 * | |
21 */ | |
22 private static final long serialVersionUID = 1590374813059942002L; | |
23 | |
24 public static KCountArray makeNew(long cells_, int cbits_){ | |
25 return makeNew(cells_, cbits_, 1); | |
26 } | |
27 | |
28 public static KCountArray makeNew(long cells_, int cbits_, int hashes_){ | |
29 return makeNew(cells_, cbits_, hashes_, null, 0); | |
30 } | |
31 | |
32 //TODO: Get rid of keys_ arg. | |
33 public static KCountArray makeNew(long cells_, int cbits_, int hashes_, KCountArray prefilter, int prefilterLimit_){ | |
34 KCountArray kca=new KCountArray7MTA(cells_, cbits_, hashes_, prefilter, prefilterLimit_); | |
35 kca.initialize(); | |
36 return kca; | |
37 } | |
38 | |
39 protected KCountArray(final long cells_, int cbits_){ | |
40 assert(cbits_<=32); | |
41 assert(Integer.bitCount(cbits_)==1); | |
42 assert(Long.bitCount(cells_)==1) || this.getClass()==KCountArray7MT.class : this.getClass(); | |
43 | |
44 numArrays=64; | |
45 arrayBits=31-Integer.numberOfLeadingZeros(numArrays); | |
46 arrayMask=numArrays-1; | |
47 | |
48 while(cbits_*cells_<32*numArrays){ | |
49 assert(false) : cells_+", "+cbits_+", "+numArrays+", "+(cbits_*cells_)+"<"+(32*numArrays); | |
50 cbits_*=2; | |
51 } //Increases bits per cell so that at minimum each array is size 1 | |
52 | |
53 assert(cbits_<=32); | |
54 | |
55 cells=cells_; | |
56 cellBits=cbits_; | |
57 valueMask=(cellBits==32 ? Integer.MAX_VALUE : ~((-1)<<cellBits)); | |
58 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31))); | |
59 cellsPerWord=32/cellBits; | |
60 indexShift=Integer.numberOfTrailingZeros(cellsPerWord); | |
61 cellMask=cellsPerWord-1; | |
62 | |
63 if(verbose){ | |
64 System.out.println(description()); | |
65 } | |
66 } | |
67 | |
68 protected KCountArray(final long cells_, int cbits_, int arrays_){ | |
69 assert(cbits_<=32); | |
70 assert(Integer.bitCount(cbits_)==1); | |
71 assert(Long.bitCount(cells_)==1) || this.getClass()==KCountArray7MT.class || this.getClass()==KCountArray7MTA.class || this.getClass()==KCountArray8MT.class; | |
72 | |
73 numArrays=arrays_; | |
74 assert(Integer.bitCount(numArrays)==1) : numArrays+", "+cells_+", "+cbits_; | |
75 arrayBits=31-Integer.numberOfLeadingZeros(numArrays); | |
76 arrayMask=numArrays-1; | |
77 | |
78 while(cbits_*cells_<32*numArrays){ | |
79 assert(false) : cells_+", "+cbits_+", "+numArrays+", "+(cbits_*cells_)+"<"+(32*numArrays); | |
80 cbits_*=2; | |
81 } //Increases bits per cell so that at minimum each array is size 1 | |
82 | |
83 assert(cbits_<=32) : "Why?"; | |
84 | |
85 cells=cells_; | |
86 cellBits=cbits_; | |
87 valueMask=(cellBits==32 ? Integer.MAX_VALUE : ~((-1)<<cellBits)); | |
88 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31))); | |
89 cellsPerWord=32/cellBits; | |
90 indexShift=Integer.numberOfTrailingZeros(cellsPerWord); | |
91 cellMask=cellsPerWord-1; | |
92 | |
93 if(verbose){ | |
94 System.out.println(description()); | |
95 } | |
96 } | |
97 | |
98 public abstract int read(long key); | |
99 public int read(long keys[]){throw new RuntimeException("Unimplemented.");} | |
100 public final int read(long key, int k, boolean makeCanonical){return read(makeCanonical ? makeCanonical2(key, k) : key);} | |
101 | |
102 public abstract void write(long key, int value); | |
103 | |
104 //TODO: Consider adding a boolean for return old value. | |
105 public final void increment(long key){increment(key, 1);} | |
106 public final void decrement(long key){decrement(key, 1);} | |
107 | |
108 /** Returns nothing for simplicity. */ | |
109 public abstract void increment(long key, int incr); | |
110 | |
111 /** Returns unincremented value */ | |
112 public abstract int incrementAndReturnUnincremented(long key, int incr); | |
113 | |
114 // /** Returns unincremented value */ | |
115 // public final int incrementAndReturnUnincremented(Kmer kmer, int incr){ | |
116 // return incrementAndReturnUnincremented(kmer.xor(), incr); | |
117 // } | |
118 | |
119 //For long kmers. | |
120 public int incrementAndReturnUnincremented(long[] keys, int incr){ | |
121 throw new RuntimeException("Unimplemented."); | |
122 } | |
123 | |
124 /** Optional method. */ | |
125 public void decrement(long key, int incr){ | |
126 throw new RuntimeException("This class "+getClass().getName()+" does not support decrement."); | |
127 } | |
128 | |
129 public final int readPrecise(long key, int k, boolean makeCanonical){ | |
130 assert(k<=32); | |
131 int b=read(makeCanonical ? makeCanonical2(key, k) : key); | |
132 if(b<1){return b;} | |
133 int a=readLeft(key, k, makeCanonical); | |
134 if(a>=b){return b;} | |
135 int c=readRight(key, k, makeCanonical); | |
136 if(c>=b){return b;} | |
137 return (int)(((long)a+(long)c)/2); | |
138 // return max(a, c); | |
139 // int mid=Tools.min(a, b, c); | |
140 // System.out.println("a="+a+", b="+b+", c="+c+" -> "+mid); | |
141 // return mid; | |
142 } | |
143 | |
144 public final int readPreciseMin(long key, int k, boolean makeCanonical){ | |
145 assert(k<=32); | |
146 int b=read(makeCanonical ? makeCanonical2(key, k) : key); | |
147 if(b<1){return b;} | |
148 int a=readLeft(key, k, makeCanonical); | |
149 if(a<1){return a;} | |
150 int c=readRight(key, k, makeCanonical); | |
151 return Tools.min(a, b, c); | |
152 } | |
153 | |
154 /** | |
155 * @param key Kmer to evaluate | |
156 * @return Sum of counts of all 4 possible left-adjacent kmers | |
157 */ | |
158 public int readLeft(long key, int k, boolean makeCanonical){throw new RuntimeException("Unsupported.");} | |
159 /** | |
160 * @param key Kmer to evaluate | |
161 * @return Sum of counts of all 4 possible right-adjacent kmers | |
162 */ | |
163 public int readRight(long key, int k, boolean makeCanonical){throw new RuntimeException("Unsupported.");} | |
164 /** | |
165 * @param key Kmer to evaluate | |
166 * @return Array of counts of all 4 possible left-adjacent kmers | |
167 */ | |
168 public int[] readAllLeft(final long key, final int k, boolean makeCanonical, int[] rvec){throw new RuntimeException("Unsupported.");} | |
169 /** | |
170 * @param key Kmer to evaluate | |
171 * @return Array of counts of all 4 possible right-adjacent kmers | |
172 */ | |
173 public int[] readAllRight(final long key, final int k, boolean makeCanonical, int[] rvec){throw new RuntimeException("Unsupported.");} | |
174 | |
175 public void increment(long[] keys){ | |
176 synchronized(this){ | |
177 for(long key : keys){ | |
178 increment(key); | |
179 } | |
180 } | |
181 } | |
182 | |
183 public abstract long[] transformToFrequency(); | |
184 public final long[] transformToFrequency(int[][] matrix){ | |
185 long[] freq=new long[100000]; | |
186 int maxFreq=freq.length-1; | |
187 | |
188 if(cellBits!=32){ | |
189 assert(cellBits>0); | |
190 for(int[] array : matrix){ | |
191 for(int i=0; i<array.length; i++){ | |
192 int word=array[i]; | |
193 int j=cellsPerWord; | |
194 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); | |
195 for(; word!=0; j--){ | |
196 int x=word&valueMask; | |
197 int x2=(int)min(x, maxFreq); | |
198 freq[x2]++; | |
199 word=(word>>>cellBits); | |
200 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); | |
201 } | |
202 freq[0]+=j; | |
203 } | |
204 } | |
205 }else{ | |
206 for(int[] array : matrix){ | |
207 for(int i=0; i<array.length; i++){ | |
208 int word=array[i]; | |
209 int x2=(int)min(word, maxFreq); | |
210 freq[x2]++; | |
211 } | |
212 } | |
213 } | |
214 return freq; | |
215 } | |
216 | |
217 public final long[] transformToFrequency(AtomicIntegerArray[] matrix){ | |
218 long[] freq=new long[100000]; | |
219 int maxFreq=freq.length-1; | |
220 | |
221 if(cellBits!=32){ | |
222 assert(cellBits>0); | |
223 for(AtomicIntegerArray array : matrix){ | |
224 for(int i=0; i<array.length(); i++){ | |
225 int word=array.get(i); | |
226 int j=cellsPerWord; | |
227 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); | |
228 for(; word!=0; j--){ | |
229 int x=word&valueMask; | |
230 int x2=(int)min(x, maxFreq); | |
231 freq[x2]++; | |
232 word=(word>>>cellBits); | |
233 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits); | |
234 } | |
235 freq[0]+=j; | |
236 } | |
237 } | |
238 }else{ | |
239 for(AtomicIntegerArray array : matrix){ | |
240 for(int i=0; i<array.length(); i++){ | |
241 int word=array.get(i); | |
242 int x2=(int)min(word, maxFreq); | |
243 freq[x2]++; | |
244 } | |
245 } | |
246 } | |
247 return freq; | |
248 } | |
249 | |
250 public final ByteBuilder description(){ | |
251 ByteBuilder sb=new ByteBuilder(); | |
252 long words=cells/cellsPerWord; | |
253 int wordsPerArray=(int)(words/numArrays); | |
254 sb.append("cells: \t"+cells).append('\n'); | |
255 sb.append("cellBits:\t"+cellBits).append('\n'); | |
256 sb.append("valueMask:\t"+Long.toHexString(valueMask)).append('\n'); | |
257 sb.append("maxValue:\t"+maxValue).append('\n'); | |
258 sb.append("cellsPerWord:\t"+cellsPerWord).append('\n'); | |
259 sb.append("indexShift:\t"+indexShift).append('\n'); | |
260 sb.append("words: \t"+words).append('\n'); | |
261 sb.append("wordsPerArray:\t"+wordsPerArray).append('\n'); | |
262 sb.append("numArrays:\t"+numArrays).append('\n'); | |
263 sb.append("Memory: \t"+mem()).append('\n'); | |
264 sb.append("Usage: \t"+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100)); | |
265 return sb; | |
266 } | |
267 | |
268 public final String toShortString(){ | |
269 return "mem = "+mem()+" \tcells = "+toKMG(cells)+" \tused = "+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100); | |
270 } | |
271 | |
272 public final String toShortString(int hashes){ | |
273 return ("hashes = "+hashes+" \t ")+ | |
274 "mem = "+mem()+" \tcells = "+toKMG(cells)+" \tused = "+String.format(Locale.ROOT, "%.3f%%",usedFraction()*100); | |
275 } | |
276 | |
277 @Override | |
278 public final String toString(){ | |
279 return description().toString(); | |
280 } | |
281 | |
282 public abstract CharSequence toContentsString(); | |
283 | |
284 public abstract double usedFraction(); | |
285 | |
286 public abstract double usedFraction(int mindepth); | |
287 | |
288 public abstract long cellsUsed(int mindepth); | |
289 | |
290 public final double estimateUniqueKmers(int hashes){ | |
291 double f=usedFraction(); | |
292 double f2=(1-Math.pow(1-f, 1.0/hashes)); | |
293 double n=(-cells)*Math.log(1-f2); | |
294 return n; | |
295 } | |
296 | |
297 public final double estimateUniqueKmers(int hashes, int mindepth){ | |
298 // assert(false) : this.getClass().getName(); | |
299 double f=usedFraction(mindepth); | |
300 double f2=(1-Math.pow(1-f, 1.0/hashes)); | |
301 double n=(-cells)*Math.log(1-f2); | |
302 return n; | |
303 } | |
304 | |
305 public final double estimateUniqueKmersFromUsedFraction(int hashes, double usedFraction){ | |
306 double f=usedFraction; | |
307 double f2=(1-Math.pow(1-f, 1.0/hashes)); | |
308 double n=(-cells)*Math.log(1-f2); | |
309 return n; | |
310 } | |
311 | |
312 public final String mem(){ | |
313 long mem=(cells*cellBits)/8; | |
314 if(mem<(1<<20)){ | |
315 return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10))); | |
316 }else if(mem<(1<<30)){ | |
317 return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20))); | |
318 }else{ | |
319 return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30))); | |
320 } | |
321 } | |
322 | |
323 public static String toKMG(long x){ | |
324 double div=1; | |
325 String ext=""; | |
326 if(x>10000000000L){ | |
327 div=1000000000L; | |
328 ext="B"; | |
329 }else if(x>10000000){ | |
330 div=1000000; | |
331 ext="M"; | |
332 }else if(x>100000){ | |
333 div=1000; | |
334 ext="K"; | |
335 } | |
336 return String.format(Locale.ROOT, "%.2f", x/div)+ext; | |
337 } | |
338 | |
339 static final AtomicIntegerArray[] allocMatrix(final int numArrays, final int wordsPerArray){ | |
340 final AtomicIntegerArray[] matrix=new AtomicIntegerArray[numArrays]; | |
341 final AllocThread[] array=new AllocThread[Tools.min(Tools.max(Shared.threads()/2, 1), numArrays)]; | |
342 final AtomicInteger next=new AtomicInteger(0); | |
343 for(int i=0; i<array.length; i++){ | |
344 array[i]=new AllocThread(matrix, next, wordsPerArray); | |
345 } | |
346 for(int i=0; i<array.length; i++){array[i].start();} | |
347 for(AllocThread at : array){ | |
348 while(at.getState()!=Thread.State.TERMINATED){ | |
349 try { | |
350 at.join(); | |
351 } catch (InterruptedException e) { | |
352 // TODO Auto-generated catch block | |
353 e.printStackTrace(); | |
354 } | |
355 } | |
356 } | |
357 return matrix; | |
358 } | |
359 | |
360 private static class AllocThread extends Thread{ | |
361 | |
362 AllocThread(AtomicIntegerArray[] matrix_, AtomicInteger next_, int wordsPerArray_){ | |
363 matrix=matrix_; | |
364 next=next_; | |
365 wordsPerArray=wordsPerArray_; | |
366 } | |
367 | |
368 @Override | |
369 public void run(){ | |
370 int x=next.getAndIncrement(); | |
371 while(x<matrix.length){ | |
372 matrix[x]=new AtomicIntegerArray(wordsPerArray); | |
373 x=next.getAndIncrement(); | |
374 } | |
375 } | |
376 | |
377 private final AtomicIntegerArray[] matrix; | |
378 private final AtomicInteger next; | |
379 private final int wordsPerArray; | |
380 | |
381 } | |
382 | |
383 | |
384 // long hash(long x, int y){throw new RuntimeException("Not supported.");} | |
385 abstract long hash(long x, int y); | |
386 | |
387 public static final int min(int x, int y){return x<y ? x : y;} | |
388 public static final int max(int x, int y){return x>y ? x : y;} | |
389 public static final long min(long x, long y){return x<y ? x : y;} | |
390 public static final long max(long x, long y){return x>y ? x : y;} | |
391 | |
392 /** Any necessary initialization. */ | |
393 public void initialize(){} | |
394 | |
395 /** Any necessary shutdown steps. */ | |
396 public void shutdown(){} | |
397 | |
398 public final long cells; | |
399 public final int cellBits; | |
400 /** Originally this was different than valueMask in the case that valueMask was negative, but now they are the same. */ | |
401 public final int maxValue; | |
402 | |
403 protected final int cellsPerWord; | |
404 protected final int indexShift; | |
405 protected final int cellMask; | |
406 protected final int valueMask; | |
407 | |
408 protected static int minArrays=calcMinArrays(); | |
409 protected final int arrayBits; | |
410 protected final int numArrays; | |
411 protected final int arrayMask; | |
412 | |
413 public static boolean verbose=false; | |
414 | |
415 private static final int calcMinArrays(){ | |
416 int x=Tools.max(Shared.threads(), 2); | |
417 while(Integer.bitCount(x)!=1){x++;} | |
418 return x; | |
419 } | |
420 | |
421 public static final boolean isCanonical(long key, int k){ | |
422 assert(k>3 && k<=32); | |
423 long b=AminoAcid.reverseComplementBinaryFast(key, k); | |
424 return key>=b; | |
425 } | |
426 | |
427 /** Assumes that the key is not canonical */ | |
428 public static final long makeCanonical(final long key, final int k){ | |
429 assert(k>3 && k<=32); | |
430 // assert(!isCanonical(key, k)); | |
431 final long r=AminoAcid.reverseComplementBinaryFast(key, k); | |
432 assert(r>=key); | |
433 // assert(isCanonical(r, k)); | |
434 // assert(AminoAcid.reverseComplementBinaryFast(r, k)==key); | |
435 return r; | |
436 } | |
437 | |
438 | |
439 public static final long makeCanonical2(final long key, final int k){ | |
440 assert(k>3 && k<=32); | |
441 if(isCanonical(key, k)){return key;} | |
442 long r=AminoAcid.reverseComplementBinaryFast(key, k); | |
443 // assert(isCanonical(r, k)) : k+"\n"+Long.toBinaryString(key)+"\n"+Long.toBinaryString(r)+"\n"+Long.toBinaryString(AminoAcid.reverseComplementBinaryFast(r, k)); | |
444 // assert(AminoAcid.reverseComplementBinaryFast(r, k)==key) : k+"\n"+Long.toBinaryString(key)+"\n"+Long.toBinaryString(r)+"\n"+Long.toBinaryString(AminoAcid.reverseComplementBinaryFast(r, k)); | |
445 return r; | |
446 } | |
447 | |
448 public KCountArray prefilter(){ | |
449 throw new RuntimeException("TODO: Override"); | |
450 } | |
451 | |
452 public void purgeFilter(){ | |
453 throw new RuntimeException("TODO: Override"); | |
454 } | |
455 | |
456 /** Increases accuracy of overloaded multi-bit tables */ | |
457 public static boolean LOCKED_INCREMENT=false; | |
458 public static boolean SET_LOCKED_INCREMENT=false; | |
459 | |
460 } |