Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray6MT.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.util.Arrays; | |
4 import java.util.Random; | |
5 import java.util.concurrent.ArrayBlockingQueue; | |
6 | |
7 import shared.Shared; | |
8 import shared.Timer; | |
9 import structures.ByteBuilder; | |
10 | |
11 | |
12 /** | |
13 * | |
14 * Uses hashing rather than direct-mapping to support longer kmers. | |
15 * | |
16 * @author Brian Bushnell | |
17 * @date Aug 17, 2012 | |
18 * | |
19 */ | |
20 public class KCountArray6MT extends KCountArray { | |
21 | |
22 /** | |
23 * | |
24 */ | |
25 private static final long serialVersionUID = -1524266549200637631L; | |
26 | |
27 public static void main(String[] args){ | |
28 long cells=Long.parseLong(args[0]); | |
29 int bits=Integer.parseInt(args[1]); | |
30 int gap=Integer.parseInt(args[2]); | |
31 int hashes=Integer.parseInt(args[3]); | |
32 | |
33 verbose=false; | |
34 | |
35 KCountArray6MT kca=new KCountArray6MT(cells, bits, gap, hashes); | |
36 | |
37 System.out.println(kca.read(0)); | |
38 kca.increment(0); | |
39 System.out.println(kca.read(0)); | |
40 kca.increment(0); | |
41 System.out.println(kca.read(0)); | |
42 System.out.println(); | |
43 | |
44 System.out.println(kca.read(1)); | |
45 kca.increment(1); | |
46 System.out.println(kca.read(1)); | |
47 kca.increment(1); | |
48 System.out.println(kca.read(1)); | |
49 System.out.println(); | |
50 | |
51 System.out.println(kca.read(100)); | |
52 kca.increment(100); | |
53 System.out.println(kca.read(100)); | |
54 kca.increment(100); | |
55 System.out.println(kca.read(100)); | |
56 kca.increment(100); | |
57 System.out.println(kca.read(100)); | |
58 System.out.println(); | |
59 | |
60 | |
61 System.out.println(kca.read(150)); | |
62 kca.increment(150); | |
63 System.out.println(kca.read(150)); | |
64 System.out.println(); | |
65 | |
66 } | |
67 | |
68 public KCountArray6MT(long cells_, int bits_, int gap_, int hashes_){ | |
69 super(cells_, bits_); | |
70 // verbose=false; | |
71 long words=cells/cellsPerWord; | |
72 assert(words/numArrays<=Integer.MAX_VALUE); | |
73 wordsPerArray=(int)(words/numArrays); | |
74 cellsPerArray=cells/numArrays; | |
75 cellMod=cellsPerArray-1; | |
76 hashes=hashes_; | |
77 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes); | |
78 // assert(false); | |
79 matrix=new int[numArrays][]; | |
80 assert(hashes>0 && hashes<=hashMasks.length); | |
81 } | |
82 | |
83 @Override | |
84 public int read(final long rawKey){ | |
85 assert(finished); | |
86 if(verbose){System.err.println("Reading raw key "+rawKey);} | |
87 | |
88 long key1=hash(rawKey, 3); | |
89 int arrayNum=(int)(key1&arrayMask); | |
90 long key2=hash(rawKey, 0); | |
91 | |
92 int min=readHashed(key2, arrayNum); | |
93 for(int i=1; i<hashes && min>0; i++){ | |
94 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);} | |
95 key2=Long.rotateRight(key2, hashBits); | |
96 key2=hash(key2, i); | |
97 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);} | |
98 min=min(min, readHashed(key2, arrayNum)); | |
99 } | |
100 return min; | |
101 } | |
102 | |
103 int readHashed(long key, int arrayNum){ | |
104 if(verbose){System.err.print("Reading hashed key "+key);} | |
105 // System.out.println("key="+key); | |
106 // int arrayNum=(int)(key&arrayMask); | |
107 key=(key&Long.MAX_VALUE)%(cellMod); | |
108 // key=(key>>>(arrayBits+1))%(cellMod); | |
109 // System.out.println("array="+arrayNum); | |
110 // System.out.println("key2="+key); | |
111 int[] array=matrix[arrayNum]; | |
112 int index=(int)(key>>>indexShift); | |
113 // assert(false) : indexShift; | |
114 // System.out.println("index="+index); | |
115 int word=array[index]; | |
116 // System.out.println("word="+Integer.toHexString(word)); | |
117 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask))); | |
118 // int cellShift=(int)(cellBits*(key&cellMask)); | |
119 int cellShift=(int)(cellBits*key); | |
120 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));} | |
121 // System.out.println("cellShift="+cellShift); | |
122 return (int)((word>>>cellShift)&valueMask); | |
123 } | |
124 | |
125 @Override | |
126 public void write(final long key, int value){ | |
127 throw new RuntimeException("Not allowed for this class."); | |
128 } | |
129 | |
130 //Slow | |
131 @Override | |
132 public void increment(final long rawKey, int amt){ | |
133 for(int i=0; i<amt; i++){increment0(rawKey);} | |
134 } | |
135 | |
136 public void increment0(final long rawKey){ | |
137 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");} | |
138 | |
139 long key1=hash(rawKey, 3); | |
140 | |
141 if(verbose){System.err.println("key2="+key1+", value="+read(rawKey));} | |
142 | |
143 int bnum=(int)(key1&arrayMask); | |
144 long[] array=buffers[bnum]; | |
145 int loc=bufferlen[bnum]; | |
146 | |
147 // key2=Long.rotateRight(key2, hashBits); | |
148 // array[loc]=key2; | |
149 | |
150 array[loc]=rawKey; | |
151 bufferlen[bnum]++; | |
152 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);} | |
153 if(bufferlen[bnum]>=array.length){ | |
154 | |
155 if(verbose){System.err.println("Moving array.");} | |
156 bufferlen[bnum]=0; | |
157 buffers[bnum]=new long[array.length]; | |
158 | |
159 writers[bnum].add(array); | |
160 if(verbose){System.err.println("Moved.");} | |
161 } | |
162 } | |
163 | |
164 /** Returns unincremented value */ | |
165 @Override | |
166 public int incrementAndReturnUnincremented(long key, int incr){ | |
167 throw new RuntimeException("Operation not supported."); | |
168 } | |
169 | |
170 @Override | |
171 public long[] transformToFrequency(){ | |
172 return transformToFrequency(matrix); | |
173 } | |
174 | |
175 @Override | |
176 public ByteBuilder toContentsString(){ | |
177 ByteBuilder sb=new ByteBuilder(); | |
178 sb.append('['); | |
179 String comma=""; | |
180 for(int[] array : matrix){ | |
181 for(int i=0; i<array.length; i++){ | |
182 int word=array[i]; | |
183 for(int j=0; j<cellsPerWord; j++){ | |
184 int x=word&valueMask; | |
185 sb.append(comma); | |
186 sb.append(x); | |
187 word>>>=cellBits; | |
188 comma=", "; | |
189 } | |
190 } | |
191 } | |
192 sb.append(']'); | |
193 return sb; | |
194 } | |
195 | |
196 @Override | |
197 public double usedFraction(){return cellsUsed/(double)cells;} | |
198 | |
199 @Override | |
200 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;} | |
201 | |
202 @Override | |
203 public long cellsUsed(int mindepth){ | |
204 long count=0; | |
205 for(int[] array : matrix){ | |
206 if(array!=null){ | |
207 for(int word : array){ | |
208 while(word>0){ | |
209 int x=word&valueMask; | |
210 if(x>=mindepth){count++;} | |
211 word>>>=cellBits; | |
212 } | |
213 } | |
214 } | |
215 } | |
216 return count; | |
217 } | |
218 | |
219 | |
220 @Override | |
221 final long hash(long key, int row){ | |
222 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
223 // int cell=(int)(hashCellMask&(key)); | |
224 | |
225 if(row==0){//Doublehash only first time | |
226 key=key^hashMasks[(row+4)%hashMasks.length][cell]; | |
227 cell=(int)(hashCellMask&(key>>4)); | |
228 // cell=(int)(hashCellMask&(key>>hashBits)); | |
229 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
230 } | |
231 | |
232 return key^hashMasks[row][cell]; | |
233 } | |
234 | |
235 /** | |
236 * @param i | |
237 * @param j | |
238 * @return | |
239 */ | |
240 private static long[][] makeMasks(int rows, int cols) { | |
241 | |
242 long seed; | |
243 synchronized(KCountArray6MT.class){ | |
244 seed=counter; | |
245 counter++; | |
246 } | |
247 | |
248 Timer t=new Timer(); | |
249 long[][] r=new long[rows][cols]; | |
250 Random randy=Shared.threadLocalRandom(seed); | |
251 for(int i=0; i<r.length; i++){ | |
252 fillMasks(r[i], randy); | |
253 } | |
254 t.stop(); | |
255 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);} | |
256 return r; | |
257 } | |
258 | |
259 private static void fillMasks(long[] r, Random randy) { | |
260 // for(int i=0; i<r.length; i++){ | |
261 // long x=0; | |
262 // while(Long.bitCount(x&0xFFFFFFFF)!=16){ | |
263 // x=randy.nextLong(); | |
264 // } | |
265 // r[i]=(x&Long.MAX_VALUE); | |
266 // } | |
267 | |
268 final int hlen=(1<<hashBits); | |
269 assert(r.length==hlen); | |
270 int[] count1=new int[hlen]; | |
271 int[] count2=new int[hlen]; | |
272 final long mask=hlen-1; | |
273 | |
274 for(int i=0; i<r.length; i++){ | |
275 long x=0; | |
276 int y=0; | |
277 int z=0; | |
278 while(Long.bitCount(x&0xFFFFFFFFL)!=16){ | |
279 x=randy.nextLong(); | |
280 while(Long.bitCount(x&0xFFFFFFFFL)<16){ | |
281 x|=(1L<<randy.nextInt(32)); | |
282 } | |
283 while(Long.bitCount(x&0xFFFFFFFFL)>16){ | |
284 x&=(~(1L<<randy.nextInt(32))); | |
285 } | |
286 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){ | |
287 x|=(1L<<(randy.nextInt(32)+32)); | |
288 } | |
289 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){ | |
290 x&=(~(1L<<(randy.nextInt(32)+32))); | |
291 } | |
292 | |
293 // System.out.print("."); | |
294 // y=(((int)(x&mask))^i); | |
295 y=(((int)(x&mask))); | |
296 z=(int)((x>>hashBits)&mask); | |
297 if(count1[y]>0 || count2[z]>0){ | |
298 x=0; | |
299 } | |
300 } | |
301 // System.out.println(Long.toBinaryString(x)); | |
302 r[i]=(x&Long.MAX_VALUE); | |
303 count1[y]++; | |
304 count2[z]++; | |
305 } | |
306 | |
307 } | |
308 | |
309 | |
310 @Override | |
311 public void initialize(){ | |
312 for(int i=0; i<writers.length; i++){ | |
313 writers[i]=new WriteThread(i); | |
314 writers[i].start(); | |
315 | |
316 // while(!writers[i].isAlive()){ | |
317 // System.out.print("."); | |
318 // } | |
319 } | |
320 } | |
321 | |
322 @Override | |
323 public void shutdown(){ | |
324 if(finished){return;} | |
325 synchronized(this){ | |
326 if(finished){return;} | |
327 | |
328 //Clear buffers | |
329 for(int i=0; i<numArrays; i++){ | |
330 long[] array=buffers[i]; | |
331 int len=bufferlen[i]; | |
332 buffers[i]=null; | |
333 bufferlen[i]=0; | |
334 | |
335 if(len<array.length){ | |
336 array=Arrays.copyOf(array, len); | |
337 } | |
338 | |
339 if(array.length>0){ | |
340 writers[i].add(array); | |
341 } | |
342 } | |
343 | |
344 //Add poison | |
345 for(WriteThread wt : writers){ | |
346 wt.add(poison); | |
347 } | |
348 | |
349 //Wait for termination | |
350 for(WriteThread wt : writers){ | |
351 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive()); | |
352 while(wt.isAlive()){ | |
353 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive()); | |
354 try { | |
355 wt.join(10000); | |
356 } catch (InterruptedException e) { | |
357 // TODO Auto-generated catch block | |
358 e.printStackTrace(); | |
359 } | |
360 if(wt.isAlive()){System.err.println(wt.getClass().getCanonicalName()+" is taking a long time to die.");} | |
361 } | |
362 cellsUsed+=wt.cellsUsedPersonal; | |
363 // System.out.println("cellsUsed="+cellsUsed); | |
364 } | |
365 | |
366 assert(!finished); | |
367 finished=true; | |
368 } | |
369 } | |
370 | |
371 private class WriteThread extends Thread{ | |
372 | |
373 public WriteThread(int tnum){ | |
374 num=tnum; | |
375 } | |
376 | |
377 @Override | |
378 public void run(){ | |
379 assert(matrix[num]==null); | |
380 array=new int[wordsPerArray]; //Makes NUMA systems use local memory. | |
381 | |
382 matrix[num]=array; | |
383 | |
384 long[] keys=null; | |
385 while(!shutdown){ | |
386 | |
387 if(verbose){System.err.println(" - Reading keys for wt"+num+".");} | |
388 while(keys==null){ | |
389 try { | |
390 keys=writeQueue.take(); | |
391 } catch (InterruptedException e) { | |
392 // TODO Auto-generated catch block | |
393 e.printStackTrace(); | |
394 } | |
395 } | |
396 if(keys==poison){ | |
397 // assert(false); | |
398 shutdown=true; | |
399 }else{ | |
400 for(long key : keys){ | |
401 incrementRawLocal(key); | |
402 } | |
403 } | |
404 // System.out.println(" -- Read keys for wt"+num+". poison="+(keys==poison)+", len="+keys.length); | |
405 if(verbose){System.err.println(" -- Read keys for wt"+num+". (success)");} | |
406 keys=null; | |
407 if(verbose){System.err.println("shutdown="+shutdown);} | |
408 } | |
409 | |
410 // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+"."); | |
411 // assert(false) : ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+"."; | |
412 | |
413 array=null; | |
414 } | |
415 | |
416 void add(long[] keys){ | |
417 // assert(isAlive()); | |
418 assert(!shutdown); | |
419 if(shutdown){return;} | |
420 // assert(keys!=poison); | |
421 if(verbose){System.err.println(" + Adding keys to wt"+num+".");} | |
422 boolean success=false; | |
423 while(!success){ | |
424 try { | |
425 writeQueue.put(keys); | |
426 success=true; | |
427 } catch (InterruptedException e) { | |
428 // TODO Auto-generated catch block | |
429 e.printStackTrace(); | |
430 } | |
431 } | |
432 if(verbose){System.err.println(" ++ Added keys to wt"+num+". (success)");} | |
433 } | |
434 | |
435 private int incrementRawLocal(long rawKey){ | |
436 // verbose=(rawKey==32662670693L); | |
437 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");} | |
438 // verbose=true; | |
439 assert(1>0); | |
440 | |
441 long key2=rawKey; | |
442 if(hashes==1){ | |
443 key2=hash(key2, 0); | |
444 // int x=incrementHashedIfAtMost(key2, 1, maxValue-1); | |
445 int x=incrementHashedLocal(key2); | |
446 assert(x>=1) : "original=?, new should be >="+(1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
447 return x; | |
448 } | |
449 | |
450 int min=0; | |
451 // final int min=read(rawKey); | |
452 // if(min>=maxValue){return maxValue;} | |
453 | |
454 assert(key2==rawKey); | |
455 for(int i=0; i<hashes; i++){ | |
456 key2=hash(key2, i); | |
457 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2, num));} | |
458 // int x=incrementHashedIfAtMost(key2, 1, min); | |
459 int x=incrementHashedLocal(key2); | |
460 assert(x>=min+1) : "i="+i+", original="+min+", new should be <="+(min+1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
461 if(verbose){System.err.println("postIncr value="+readHashed(key2, num));} | |
462 // assert(read(rawKey)<=min+1) : "i="+i+", original="+min+", new should be <="+(min+1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
463 // assert(readHashed(key2)>=min+1) : "i="+i+", original="+min+", new should be <="+(min+1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
464 key2=Long.rotateRight(key2, hashBits); | |
465 } | |
466 // assert(read(rawKey)==min+1) : "original="+min+", new should be "+(min+1)+", new="+read(rawKey)+", max="+maxValue; | |
467 // assert(false); | |
468 return min(min+1, maxValue); | |
469 } | |
470 | |
471 private int incrementHashedLocal(long key){ | |
472 // assert((key&arrayMask)==num); | |
473 key=(key&Long.MAX_VALUE)%(cellMod); | |
474 // key=(key>>>(arrayBits+1))%(cellMod); | |
475 int index=(int)(key>>>indexShift); | |
476 int word=array[index]; | |
477 int cellShift=(int)(cellBits*key); | |
478 int value=((word>>>cellShift)&valueMask); | |
479 if(value==0){cellsUsedPersonal++;} | |
480 value=min(value+1, maxValue); | |
481 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); | |
482 array[index]=word; | |
483 return value; | |
484 } | |
485 | |
486 private int[] array; | |
487 private final int num; | |
488 public long cellsUsedPersonal=0; | |
489 | |
490 public ArrayBlockingQueue<long[]> writeQueue=new ArrayBlockingQueue<long[]>(16); | |
491 public boolean shutdown=false; | |
492 | |
493 } | |
494 | |
495 | |
496 public long cellsUsed(){return cellsUsed;} | |
497 | |
498 private boolean finished=false; | |
499 | |
500 private long cellsUsed; | |
501 final int[][] matrix; | |
502 private final WriteThread[] writers=new WriteThread[numArrays]; | |
503 final int hashes; | |
504 final int wordsPerArray; | |
505 private final long cellsPerArray; | |
506 final long cellMod; | |
507 private final long[][] hashMasks=makeMasks(8, hashArrayLength); | |
508 | |
509 private final long[][] buffers=new long[numArrays][1000]; | |
510 private final int[] bufferlen=new int[numArrays]; | |
511 | |
512 private static final int hashBits=6; | |
513 private static final int hashArrayLength=1<<hashBits; | |
514 private static final int hashCellMask=hashArrayLength-1; | |
515 static final long[] poison=new long[0]; | |
516 | |
517 private static long counter=0; | |
518 | |
519 } |