Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray7MT.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.util.Arrays; | |
4 import java.util.Random; | |
5 import java.util.concurrent.ArrayBlockingQueue; | |
6 | |
7 import shared.Primes; | |
8 import shared.Shared; | |
9 import shared.Timer; | |
10 import shared.Tools; | |
11 import structures.ByteBuilder; | |
12 | |
13 | |
14 /** | |
15 * | |
16 * Uses prime numbers for array lengths. | |
17 * | |
18 * @author Brian Bushnell | |
19 * @date Aug 17, 2012 | |
20 * | |
21 */ | |
22 public class KCountArray7MT extends KCountArray { | |
23 | |
24 /** | |
25 * | |
26 */ | |
27 private static final long serialVersionUID = -8767643111803866913L; | |
28 | |
29 public static void main(String[] args){ | |
30 long cells=Long.parseLong(args[0]); | |
31 int bits=Integer.parseInt(args[1]); | |
32 int hashes=Integer.parseInt(args[2]); | |
33 | |
34 verbose=false; | |
35 | |
36 KCountArray7MT kca=new KCountArray7MT(cells, bits, hashes); | |
37 | |
38 System.out.println(kca.read(0)); | |
39 kca.increment(0); | |
40 System.out.println(kca.read(0)); | |
41 kca.increment(0); | |
42 System.out.println(kca.read(0)); | |
43 System.out.println(); | |
44 | |
45 System.out.println(kca.read(1)); | |
46 kca.increment(1); | |
47 System.out.println(kca.read(1)); | |
48 kca.increment(1); | |
49 System.out.println(kca.read(1)); | |
50 System.out.println(); | |
51 | |
52 System.out.println(kca.read(100)); | |
53 kca.increment(100); | |
54 System.out.println(kca.read(100)); | |
55 kca.increment(100); | |
56 System.out.println(kca.read(100)); | |
57 kca.increment(100); | |
58 System.out.println(kca.read(100)); | |
59 System.out.println(); | |
60 | |
61 | |
62 System.out.println(kca.read(150)); | |
63 kca.increment(150); | |
64 System.out.println(kca.read(150)); | |
65 System.out.println(); | |
66 | |
67 } | |
68 | |
69 public KCountArray7MT(long cells_, int bits_, int hashes_){ | |
70 super(getPrimeCells(cells_, bits_), bits_, getDesiredArrays(cells_, bits_)); | |
71 // verbose=false; | |
72 // assert(false); | |
73 // System.out.println(cells); | |
74 cellsPerArray=cells/numArrays; | |
75 wordsPerArray=(int)((cellsPerArray%cellsPerWord)==0 ? (cellsPerArray/cellsPerWord) : (cellsPerArray/cellsPerWord+1)); | |
76 cellMod=cellsPerArray; | |
77 hashes=hashes_; | |
78 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes); | |
79 // assert(false); | |
80 matrix=new int[numArrays][]; | |
81 assert(hashes>0 && hashes<=hashMasks.length); | |
82 } | |
83 | |
84 private static int getDesiredArrays(long desiredCells, int bits){ | |
85 | |
86 long words=Tools.max((desiredCells*bits+31)/32, minArrays); | |
87 int arrays=minArrays; | |
88 while(words/arrays>=Integer.MAX_VALUE){ | |
89 arrays*=2; | |
90 } | |
91 // assert(false) : arrays; | |
92 return arrays; | |
93 } | |
94 | |
95 private static long getPrimeCells(long desiredCells, int bits){ | |
96 | |
97 int arrays=getDesiredArrays(desiredCells, bits); | |
98 | |
99 long x=(desiredCells+arrays-1)/arrays; | |
100 long x2=Primes.primeAtMost(x); | |
101 return x2*arrays; | |
102 } | |
103 | |
104 @Override | |
105 public int read(final long rawKey){ | |
106 assert(finished); | |
107 if(verbose){System.err.println("Reading raw key "+rawKey);} | |
108 long key2=hash(rawKey, 0); | |
109 int min=readHashed(key2); | |
110 for(int i=1; i<hashes && min>0; i++){ | |
111 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);} | |
112 key2=Long.rotateRight(key2, hashBits); | |
113 key2=hash(key2, i); | |
114 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);} | |
115 min=min(min, readHashed(key2)); | |
116 } | |
117 return min; | |
118 } | |
119 | |
120 private int readHashed(long key){ | |
121 if(verbose){System.err.print("Reading hashed key "+key);} | |
122 // System.out.println("key="+key); | |
123 int arrayNum=(int)(key&arrayMask); | |
124 key=(key>>>arrayBits)%(cellMod); | |
125 // key=(key>>>(arrayBits+1))%(cellMod); | |
126 // System.out.println("array="+arrayNum); | |
127 // System.out.println("key2="+key); | |
128 int[] array=matrix[arrayNum]; | |
129 int index=(int)(key>>>indexShift); | |
130 // assert(false) : indexShift; | |
131 // System.out.println("index="+index); | |
132 int word=array[index]; | |
133 // System.out.println("word="+Integer.toHexString(word)); | |
134 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask))); | |
135 // int cellShift=(int)(cellBits*(key&cellMask)); | |
136 int cellShift=(int)(cellBits*key); | |
137 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));} | |
138 // System.out.println("cellShift="+cellShift); | |
139 return (int)((word>>>cellShift)&valueMask); | |
140 } | |
141 | |
142 @Override | |
143 public void write(final long key, int value){ | |
144 throw new RuntimeException("Not allowed for this class."); | |
145 } | |
146 | |
147 @Override | |
148 /** This should increase speed by doing the first hash outside the critical section, but it does not seem to help. */ | |
149 public void increment(long[] keys){ | |
150 for(int i=0; i<keys.length; i++){ | |
151 keys[i]=hash(keys[i], 0); | |
152 } | |
153 synchronized(buffers){ | |
154 for(long key : keys){ | |
155 incrementPartiallyHashed(key); | |
156 } | |
157 } | |
158 } | |
159 | |
160 //Slow | |
161 @Override | |
162 public void increment(final long rawKey, int amt){ | |
163 for(int i=0; i<amt; i++){increment0(rawKey);} | |
164 } | |
165 | |
166 public void increment0(final long rawKey){ | |
167 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");} | |
168 | |
169 long key2=rawKey; | |
170 for(int i=0; i<hashes; i++){ | |
171 key2=hash(key2, i); | |
172 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));} | |
173 // assert(readHashed(key2)==0); | |
174 | |
175 int bnum=(int)(key2&arrayMask); | |
176 long[] array=buffers[bnum]; | |
177 int loc=bufferlen[bnum]; | |
178 array[loc]=key2; | |
179 bufferlen[bnum]++; | |
180 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);} | |
181 if(bufferlen[bnum]>=array.length){ | |
182 | |
183 if(verbose){System.err.println("Moving array.");} | |
184 bufferlen[bnum]=0; | |
185 buffers[bnum]=new long[array.length]; | |
186 | |
187 writers[bnum].add(array); | |
188 if(verbose){System.err.println("Moved.");} | |
189 } | |
190 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
191 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
192 key2=Long.rotateRight(key2, hashBits); | |
193 } | |
194 } | |
195 | |
196 private void incrementPartiallyHashed(final long pKey){ | |
197 if(verbose){System.err.println("\n*** Incrementing key "+pKey+" ***");} | |
198 | |
199 long key2=pKey; | |
200 | |
201 { | |
202 int bnum=(int)(key2&arrayMask); | |
203 long[] array=buffers[bnum]; | |
204 int loc=bufferlen[bnum]; | |
205 array[loc]=key2; | |
206 bufferlen[bnum]++; | |
207 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);} | |
208 if(bufferlen[bnum]>=array.length){ | |
209 | |
210 if(verbose){System.err.println("Moving array.");} | |
211 bufferlen[bnum]=0; | |
212 buffers[bnum]=new long[array.length]; | |
213 | |
214 writers[bnum].add(array); | |
215 if(verbose){System.err.println("Moved.");} | |
216 } | |
217 } | |
218 | |
219 for(int i=1; i<hashes; i++){ | |
220 key2=Long.rotateRight(key2, hashBits); | |
221 key2=hash(key2, i); | |
222 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));} | |
223 // assert(readHashed(key2)==0); | |
224 | |
225 int bnum=(int)(key2&arrayMask); | |
226 long[] array=buffers[bnum]; | |
227 int loc=bufferlen[bnum]; | |
228 array[loc]=key2; | |
229 bufferlen[bnum]++; | |
230 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);} | |
231 if(bufferlen[bnum]>=array.length){ | |
232 | |
233 if(verbose){System.err.println("Moving array.");} | |
234 bufferlen[bnum]=0; | |
235 buffers[bnum]=new long[array.length]; | |
236 | |
237 writers[bnum].add(array); | |
238 if(verbose){System.err.println("Moved.");} | |
239 } | |
240 } | |
241 } | |
242 | |
243 /** Returns unincremented value */ | |
244 @Override | |
245 public int incrementAndReturnUnincremented(long key, int incr){ | |
246 throw new RuntimeException("Operation not supported."); | |
247 } | |
248 | |
249 @Override | |
250 public long[] transformToFrequency(){ | |
251 return transformToFrequency(matrix); | |
252 } | |
253 | |
254 @Override | |
255 public ByteBuilder toContentsString(){ | |
256 ByteBuilder sb=new ByteBuilder(); | |
257 sb.append("["); | |
258 String comma=""; | |
259 for(int[] array : matrix){ | |
260 for(int i=0; i<array.length; i++){ | |
261 int word=array[i]; | |
262 for(int j=0; j<cellsPerWord; j++){ | |
263 int x=word&valueMask; | |
264 sb.append(comma); | |
265 sb.append(x); | |
266 word>>>=cellBits; | |
267 comma=", "; | |
268 } | |
269 } | |
270 } | |
271 sb.append("]"); | |
272 return sb; | |
273 } | |
274 | |
275 @Override | |
276 public double usedFraction(){return cellsUsed/(double)cells;} | |
277 | |
278 @Override | |
279 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;} | |
280 | |
281 @Override | |
282 public long cellsUsed(int mindepth){ | |
283 long count=0; | |
284 // System.out.println("A"); | |
285 for(int[] array : matrix){ | |
286 // System.out.println("B"); | |
287 if(array!=null){ | |
288 // System.out.println("C"); | |
289 for(int word : array){ | |
290 // System.out.println("D: "+Integer.toHexString(word)); | |
291 while(word>0){ | |
292 int x=word&valueMask; | |
293 // System.out.println("E: "+x+", "+mindepth); | |
294 if(x>=mindepth){count++;} | |
295 word>>>=cellBits; | |
296 } | |
297 } | |
298 } | |
299 } | |
300 return count; | |
301 } | |
302 | |
303 | |
304 @Override | |
305 final long hash(long key, int row){ | |
306 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
307 // int cell=(int)(hashCellMask&(key)); | |
308 | |
309 if(row==0){//Doublehash only first time | |
310 key=key^hashMasks[(row+4)%hashMasks.length][cell]; | |
311 cell=(int)(hashCellMask&(key>>5)); | |
312 // cell=(int)(hashCellMask&(key>>hashBits)); | |
313 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
314 } | |
315 | |
316 return key^hashMasks[row][cell]; | |
317 } | |
318 | |
319 /** | |
320 * @param i | |
321 * @param j | |
322 * @return | |
323 */ | |
324 private static long[][] makeMasks(int rows, int cols) { | |
325 | |
326 long seed; | |
327 synchronized(KCountArray7MT.class){ | |
328 seed=counter; | |
329 counter++; | |
330 } | |
331 | |
332 Timer t=new Timer(); | |
333 long[][] r=new long[rows][cols]; | |
334 Random randy=Shared.threadLocalRandom(seed); | |
335 for(int i=0; i<r.length; i++){ | |
336 fillMasks(r[i], randy); | |
337 } | |
338 t.stop(); | |
339 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);} | |
340 return r; | |
341 } | |
342 | |
343 private static void fillMasks(long[] r, Random randy) { | |
344 // for(int i=0; i<r.length; i++){ | |
345 // long x=0; | |
346 // while(Long.bitCount(x&0xFFFFFFFF)!=16){ | |
347 // x=randy.nextLong(); | |
348 // } | |
349 // r[i]=(x&Long.MAX_VALUE); | |
350 // } | |
351 | |
352 final int hlen=(1<<hashBits); | |
353 assert(r.length==hlen); | |
354 int[] count1=new int[hlen]; | |
355 int[] count2=new int[hlen]; | |
356 final long mask=hlen-1; | |
357 | |
358 for(int i=0; i<r.length; i++){ | |
359 long x=0; | |
360 int y=0; | |
361 int z=0; | |
362 while(Long.bitCount(x&0xFFFFFFFFL)!=16){ | |
363 x=randy.nextLong(); | |
364 while(Long.bitCount(x&0xFFFFFFFFL)<16){ | |
365 x|=(1L<<randy.nextInt(32)); | |
366 } | |
367 while(Long.bitCount(x&0xFFFFFFFFL)>16){ | |
368 x&=(~(1L<<randy.nextInt(32))); | |
369 } | |
370 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){ | |
371 x|=(1L<<(randy.nextInt(32)+32)); | |
372 } | |
373 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){ | |
374 x&=(~(1L<<(randy.nextInt(32)+32))); | |
375 } | |
376 | |
377 // System.out.print("."); | |
378 // y=(((int)(x&mask))^i); | |
379 y=(((int)(x&mask))); | |
380 z=(int)((x>>hashBits)&mask); | |
381 if(count1[y]>0 || count2[z]>0){ | |
382 x=0; | |
383 } | |
384 } | |
385 // System.out.println(Long.toBinaryString(x)); | |
386 r[i]=(x&Long.MAX_VALUE); | |
387 count1[y]++; | |
388 count2[z]++; | |
389 } | |
390 | |
391 } | |
392 | |
393 | |
394 @Override | |
395 public void initialize(){ | |
396 for(int i=0; i<writers.length; i++){ | |
397 writers[i]=new WriteThread(i); | |
398 writers[i].start(); | |
399 | |
400 // while(!writers[i].isAlive()){ | |
401 // System.out.print("."); | |
402 // } | |
403 } | |
404 // assert(false) : writers.length; | |
405 } | |
406 | |
407 @Override | |
408 public void shutdown(){ | |
409 if(finished){return;} | |
410 synchronized(this){ | |
411 if(finished){return;} | |
412 | |
413 //Clear buffers | |
414 for(int i=0; i<numArrays; i++){ | |
415 long[] array=buffers[i]; | |
416 int len=bufferlen[i]; | |
417 buffers[i]=null; | |
418 bufferlen[i]=0; | |
419 | |
420 if(len<array.length){ | |
421 array=Arrays.copyOf(array, len); | |
422 } | |
423 | |
424 if(array.length>0){ | |
425 writers[i].add(array); | |
426 } | |
427 } | |
428 | |
429 //Add poison | |
430 for(WriteThread wt : writers){ | |
431 wt.add(poison); | |
432 } | |
433 | |
434 //Wait for termination | |
435 for(WriteThread wt : writers){ | |
436 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive()); | |
437 while(wt.isAlive()){ | |
438 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive()); | |
439 try { | |
440 wt.join(10000); | |
441 } catch (InterruptedException e) { | |
442 // TODO Auto-generated catch block | |
443 e.printStackTrace(); | |
444 } | |
445 if(wt.isAlive()){System.err.println(wt.getClass().getCanonicalName()+" is taking a long time to die.");} | |
446 } | |
447 cellsUsed+=wt.cellsUsedPersonal; | |
448 // System.out.println("cellsUsed="+cellsUsed); | |
449 // System.err.println("wt.cellsUsedPersonal="+wt.cellsUsedPersonal); | |
450 } | |
451 | |
452 assert(!finished); | |
453 finished=true; | |
454 } | |
455 } | |
456 | |
457 private class WriteThread extends Thread{ | |
458 | |
459 public WriteThread(int tnum){ | |
460 num=tnum; | |
461 } | |
462 | |
463 @Override | |
464 public void run(){ | |
465 assert(matrix[num]==null); | |
466 array=new int[wordsPerArray]; //Makes NUMA systems use local memory. | |
467 | |
468 matrix[num]=array; | |
469 | |
470 long[] keys=null; | |
471 while(!shutdown){ | |
472 | |
473 if(verbose){System.err.println(" - Reading keys for wt"+num+".");} | |
474 while(keys==null){ | |
475 try { | |
476 keys=writeQueue.take(); | |
477 } catch (InterruptedException e) { | |
478 // TODO Auto-generated catch block | |
479 e.printStackTrace(); | |
480 } | |
481 } | |
482 if(keys==poison){ | |
483 // assert(false); | |
484 shutdown=true; | |
485 }else{ | |
486 for(long key : keys){ | |
487 incrementHashedLocal(key); | |
488 } | |
489 } | |
490 // System.out.println(" -- Read keys for wt"+num+". poison="+(keys==poison)+", len="+keys.length); | |
491 if(verbose){System.err.println(" -- Read keys for wt"+num+". (success)");} | |
492 keys=null; | |
493 if(verbose){System.err.println("shutdown="+shutdown);} | |
494 } | |
495 | |
496 // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+"."); | |
497 // assert(false) : ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+"."; | |
498 | |
499 array=null; | |
500 } | |
501 | |
502 void add(long[] keys){ | |
503 // assert(isAlive()); | |
504 assert(!shutdown); | |
505 if(shutdown){return;} | |
506 // assert(keys!=poison); | |
507 if(verbose){System.err.println(" + Adding keys to wt"+num+".");} | |
508 boolean success=false; | |
509 while(!success){ | |
510 try { | |
511 writeQueue.put(keys); | |
512 success=true; | |
513 } catch (InterruptedException e) { | |
514 // TODO Auto-generated catch block | |
515 e.printStackTrace(); | |
516 } | |
517 } | |
518 if(verbose){System.err.println(" ++ Added keys to wt"+num+". (success)");} | |
519 } | |
520 | |
521 private int incrementHashedLocal(long key){ | |
522 assert((key&arrayMask)==num); | |
523 key=(key>>>arrayBits)%(cellMod); | |
524 // key=(key>>>(arrayBits+1))%(cellMod); | |
525 int index=(int)(key>>>indexShift); | |
526 int word=array[index]; | |
527 int cellShift=(int)(cellBits*key); | |
528 int value=((word>>>cellShift)&valueMask); | |
529 if(value==0){cellsUsedPersonal++;} | |
530 value=min(value+1, maxValue); | |
531 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); | |
532 array[index]=word; | |
533 return value; | |
534 } | |
535 | |
536 private int[] array; | |
537 private final int num; | |
538 public long cellsUsedPersonal=0; | |
539 | |
540 public ArrayBlockingQueue<long[]> writeQueue=new ArrayBlockingQueue<long[]>(16); | |
541 public boolean shutdown=false; | |
542 | |
543 } | |
544 | |
545 | |
546 public long cellsUsed(){return cellsUsed;} | |
547 | |
548 private boolean finished=false; | |
549 | |
550 private long cellsUsed; | |
551 final int[][] matrix; | |
552 private final WriteThread[] writers=new WriteThread[numArrays]; | |
553 private final int hashes; | |
554 final int wordsPerArray; | |
555 private final long cellsPerArray; | |
556 final long cellMod; | |
557 private final long[][] hashMasks=makeMasks(8, hashArrayLength); | |
558 | |
559 private final long[][] buffers=new long[numArrays][500]; | |
560 private final int[] bufferlen=new int[numArrays]; | |
561 | |
562 private static final int hashBits=6; | |
563 private static final int hashArrayLength=1<<hashBits; | |
564 private static final int hashCellMask=hashArrayLength-1; | |
565 static final long[] poison=new long[0]; | |
566 | |
567 private static long counter=0; | |
568 | |
569 } |