comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray7MT.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.util.Arrays;
4 import java.util.Random;
5 import java.util.concurrent.ArrayBlockingQueue;
6
7 import shared.Primes;
8 import shared.Shared;
9 import shared.Timer;
10 import shared.Tools;
11 import structures.ByteBuilder;
12
13
14 /**
15 *
16 * Uses prime numbers for array lengths.
17 *
18 * @author Brian Bushnell
19 * @date Aug 17, 2012
20 *
21 */
22 public class KCountArray7MT extends KCountArray {
23
24 /**
25 *
26 */
27 private static final long serialVersionUID = -8767643111803866913L;
28
29 public static void main(String[] args){
30 long cells=Long.parseLong(args[0]);
31 int bits=Integer.parseInt(args[1]);
32 int hashes=Integer.parseInt(args[2]);
33
34 verbose=false;
35
36 KCountArray7MT kca=new KCountArray7MT(cells, bits, hashes);
37
38 System.out.println(kca.read(0));
39 kca.increment(0);
40 System.out.println(kca.read(0));
41 kca.increment(0);
42 System.out.println(kca.read(0));
43 System.out.println();
44
45 System.out.println(kca.read(1));
46 kca.increment(1);
47 System.out.println(kca.read(1));
48 kca.increment(1);
49 System.out.println(kca.read(1));
50 System.out.println();
51
52 System.out.println(kca.read(100));
53 kca.increment(100);
54 System.out.println(kca.read(100));
55 kca.increment(100);
56 System.out.println(kca.read(100));
57 kca.increment(100);
58 System.out.println(kca.read(100));
59 System.out.println();
60
61
62 System.out.println(kca.read(150));
63 kca.increment(150);
64 System.out.println(kca.read(150));
65 System.out.println();
66
67 }
68
69 public KCountArray7MT(long cells_, int bits_, int hashes_){
70 super(getPrimeCells(cells_, bits_), bits_, getDesiredArrays(cells_, bits_));
71 // verbose=false;
72 // assert(false);
73 // System.out.println(cells);
74 cellsPerArray=cells/numArrays;
75 wordsPerArray=(int)((cellsPerArray%cellsPerWord)==0 ? (cellsPerArray/cellsPerWord) : (cellsPerArray/cellsPerWord+1));
76 cellMod=cellsPerArray;
77 hashes=hashes_;
78 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
79 // assert(false);
80 matrix=new int[numArrays][];
81 assert(hashes>0 && hashes<=hashMasks.length);
82 }
83
84 private static int getDesiredArrays(long desiredCells, int bits){
85
86 long words=Tools.max((desiredCells*bits+31)/32, minArrays);
87 int arrays=minArrays;
88 while(words/arrays>=Integer.MAX_VALUE){
89 arrays*=2;
90 }
91 // assert(false) : arrays;
92 return arrays;
93 }
94
95 private static long getPrimeCells(long desiredCells, int bits){
96
97 int arrays=getDesiredArrays(desiredCells, bits);
98
99 long x=(desiredCells+arrays-1)/arrays;
100 long x2=Primes.primeAtMost(x);
101 return x2*arrays;
102 }
103
104 @Override
105 public int read(final long rawKey){
106 assert(finished);
107 if(verbose){System.err.println("Reading raw key "+rawKey);}
108 long key2=hash(rawKey, 0);
109 int min=readHashed(key2);
110 for(int i=1; i<hashes && min>0; i++){
111 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);}
112 key2=Long.rotateRight(key2, hashBits);
113 key2=hash(key2, i);
114 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);}
115 min=min(min, readHashed(key2));
116 }
117 return min;
118 }
119
120 private int readHashed(long key){
121 if(verbose){System.err.print("Reading hashed key "+key);}
122 // System.out.println("key="+key);
123 int arrayNum=(int)(key&arrayMask);
124 key=(key>>>arrayBits)%(cellMod);
125 // key=(key>>>(arrayBits+1))%(cellMod);
126 // System.out.println("array="+arrayNum);
127 // System.out.println("key2="+key);
128 int[] array=matrix[arrayNum];
129 int index=(int)(key>>>indexShift);
130 // assert(false) : indexShift;
131 // System.out.println("index="+index);
132 int word=array[index];
133 // System.out.println("word="+Integer.toHexString(word));
134 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask)));
135 // int cellShift=(int)(cellBits*(key&cellMask));
136 int cellShift=(int)(cellBits*key);
137 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));}
138 // System.out.println("cellShift="+cellShift);
139 return (int)((word>>>cellShift)&valueMask);
140 }
141
142 @Override
143 public void write(final long key, int value){
144 throw new RuntimeException("Not allowed for this class.");
145 }
146
147 @Override
148 /** This should increase speed by doing the first hash outside the critical section, but it does not seem to help. */
149 public void increment(long[] keys){
150 for(int i=0; i<keys.length; i++){
151 keys[i]=hash(keys[i], 0);
152 }
153 synchronized(buffers){
154 for(long key : keys){
155 incrementPartiallyHashed(key);
156 }
157 }
158 }
159
160 //Slow
161 @Override
162 public void increment(final long rawKey, int amt){
163 for(int i=0; i<amt; i++){increment0(rawKey);}
164 }
165
166 public void increment0(final long rawKey){
167 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
168
169 long key2=rawKey;
170 for(int i=0; i<hashes; i++){
171 key2=hash(key2, i);
172 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
173 // assert(readHashed(key2)==0);
174
175 int bnum=(int)(key2&arrayMask);
176 long[] array=buffers[bnum];
177 int loc=bufferlen[bnum];
178 array[loc]=key2;
179 bufferlen[bnum]++;
180 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);}
181 if(bufferlen[bnum]>=array.length){
182
183 if(verbose){System.err.println("Moving array.");}
184 bufferlen[bnum]=0;
185 buffers[bnum]=new long[array.length];
186
187 writers[bnum].add(array);
188 if(verbose){System.err.println("Moved.");}
189 }
190 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
191 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
192 key2=Long.rotateRight(key2, hashBits);
193 }
194 }
195
196 private void incrementPartiallyHashed(final long pKey){
197 if(verbose){System.err.println("\n*** Incrementing key "+pKey+" ***");}
198
199 long key2=pKey;
200
201 {
202 int bnum=(int)(key2&arrayMask);
203 long[] array=buffers[bnum];
204 int loc=bufferlen[bnum];
205 array[loc]=key2;
206 bufferlen[bnum]++;
207 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);}
208 if(bufferlen[bnum]>=array.length){
209
210 if(verbose){System.err.println("Moving array.");}
211 bufferlen[bnum]=0;
212 buffers[bnum]=new long[array.length];
213
214 writers[bnum].add(array);
215 if(verbose){System.err.println("Moved.");}
216 }
217 }
218
219 for(int i=1; i<hashes; i++){
220 key2=Long.rotateRight(key2, hashBits);
221 key2=hash(key2, i);
222 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
223 // assert(readHashed(key2)==0);
224
225 int bnum=(int)(key2&arrayMask);
226 long[] array=buffers[bnum];
227 int loc=bufferlen[bnum];
228 array[loc]=key2;
229 bufferlen[bnum]++;
230 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);}
231 if(bufferlen[bnum]>=array.length){
232
233 if(verbose){System.err.println("Moving array.");}
234 bufferlen[bnum]=0;
235 buffers[bnum]=new long[array.length];
236
237 writers[bnum].add(array);
238 if(verbose){System.err.println("Moved.");}
239 }
240 }
241 }
242
243 /** Returns unincremented value */
244 @Override
245 public int incrementAndReturnUnincremented(long key, int incr){
246 throw new RuntimeException("Operation not supported.");
247 }
248
249 @Override
250 public long[] transformToFrequency(){
251 return transformToFrequency(matrix);
252 }
253
254 @Override
255 public ByteBuilder toContentsString(){
256 ByteBuilder sb=new ByteBuilder();
257 sb.append("[");
258 String comma="";
259 for(int[] array : matrix){
260 for(int i=0; i<array.length; i++){
261 int word=array[i];
262 for(int j=0; j<cellsPerWord; j++){
263 int x=word&valueMask;
264 sb.append(comma);
265 sb.append(x);
266 word>>>=cellBits;
267 comma=", ";
268 }
269 }
270 }
271 sb.append("]");
272 return sb;
273 }
274
275 @Override
276 public double usedFraction(){return cellsUsed/(double)cells;}
277
278 @Override
279 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
280
281 @Override
282 public long cellsUsed(int mindepth){
283 long count=0;
284 // System.out.println("A");
285 for(int[] array : matrix){
286 // System.out.println("B");
287 if(array!=null){
288 // System.out.println("C");
289 for(int word : array){
290 // System.out.println("D: "+Integer.toHexString(word));
291 while(word>0){
292 int x=word&valueMask;
293 // System.out.println("E: "+x+", "+mindepth);
294 if(x>=mindepth){count++;}
295 word>>>=cellBits;
296 }
297 }
298 }
299 }
300 return count;
301 }
302
303
304 @Override
305 final long hash(long key, int row){
306 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
307 // int cell=(int)(hashCellMask&(key));
308
309 if(row==0){//Doublehash only first time
310 key=key^hashMasks[(row+4)%hashMasks.length][cell];
311 cell=(int)(hashCellMask&(key>>5));
312 // cell=(int)(hashCellMask&(key>>hashBits));
313 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
314 }
315
316 return key^hashMasks[row][cell];
317 }
318
319 /**
320 * @param i
321 * @param j
322 * @return
323 */
324 private static long[][] makeMasks(int rows, int cols) {
325
326 long seed;
327 synchronized(KCountArray7MT.class){
328 seed=counter;
329 counter++;
330 }
331
332 Timer t=new Timer();
333 long[][] r=new long[rows][cols];
334 Random randy=Shared.threadLocalRandom(seed);
335 for(int i=0; i<r.length; i++){
336 fillMasks(r[i], randy);
337 }
338 t.stop();
339 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);}
340 return r;
341 }
342
343 private static void fillMasks(long[] r, Random randy) {
344 // for(int i=0; i<r.length; i++){
345 // long x=0;
346 // while(Long.bitCount(x&0xFFFFFFFF)!=16){
347 // x=randy.nextLong();
348 // }
349 // r[i]=(x&Long.MAX_VALUE);
350 // }
351
352 final int hlen=(1<<hashBits);
353 assert(r.length==hlen);
354 int[] count1=new int[hlen];
355 int[] count2=new int[hlen];
356 final long mask=hlen-1;
357
358 for(int i=0; i<r.length; i++){
359 long x=0;
360 int y=0;
361 int z=0;
362 while(Long.bitCount(x&0xFFFFFFFFL)!=16){
363 x=randy.nextLong();
364 while(Long.bitCount(x&0xFFFFFFFFL)<16){
365 x|=(1L<<randy.nextInt(32));
366 }
367 while(Long.bitCount(x&0xFFFFFFFFL)>16){
368 x&=(~(1L<<randy.nextInt(32)));
369 }
370 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){
371 x|=(1L<<(randy.nextInt(32)+32));
372 }
373 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){
374 x&=(~(1L<<(randy.nextInt(32)+32)));
375 }
376
377 // System.out.print(".");
378 // y=(((int)(x&mask))^i);
379 y=(((int)(x&mask)));
380 z=(int)((x>>hashBits)&mask);
381 if(count1[y]>0 || count2[z]>0){
382 x=0;
383 }
384 }
385 // System.out.println(Long.toBinaryString(x));
386 r[i]=(x&Long.MAX_VALUE);
387 count1[y]++;
388 count2[z]++;
389 }
390
391 }
392
393
394 @Override
395 public void initialize(){
396 for(int i=0; i<writers.length; i++){
397 writers[i]=new WriteThread(i);
398 writers[i].start();
399
400 // while(!writers[i].isAlive()){
401 // System.out.print(".");
402 // }
403 }
404 // assert(false) : writers.length;
405 }
406
407 @Override
408 public void shutdown(){
409 if(finished){return;}
410 synchronized(this){
411 if(finished){return;}
412
413 //Clear buffers
414 for(int i=0; i<numArrays; i++){
415 long[] array=buffers[i];
416 int len=bufferlen[i];
417 buffers[i]=null;
418 bufferlen[i]=0;
419
420 if(len<array.length){
421 array=Arrays.copyOf(array, len);
422 }
423
424 if(array.length>0){
425 writers[i].add(array);
426 }
427 }
428
429 //Add poison
430 for(WriteThread wt : writers){
431 wt.add(poison);
432 }
433
434 //Wait for termination
435 for(WriteThread wt : writers){
436 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive());
437 while(wt.isAlive()){
438 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive());
439 try {
440 wt.join(10000);
441 } catch (InterruptedException e) {
442 // TODO Auto-generated catch block
443 e.printStackTrace();
444 }
445 if(wt.isAlive()){System.err.println(wt.getClass().getCanonicalName()+" is taking a long time to die.");}
446 }
447 cellsUsed+=wt.cellsUsedPersonal;
448 // System.out.println("cellsUsed="+cellsUsed);
449 // System.err.println("wt.cellsUsedPersonal="+wt.cellsUsedPersonal);
450 }
451
452 assert(!finished);
453 finished=true;
454 }
455 }
456
457 private class WriteThread extends Thread{
458
459 public WriteThread(int tnum){
460 num=tnum;
461 }
462
463 @Override
464 public void run(){
465 assert(matrix[num]==null);
466 array=new int[wordsPerArray]; //Makes NUMA systems use local memory.
467
468 matrix[num]=array;
469
470 long[] keys=null;
471 while(!shutdown){
472
473 if(verbose){System.err.println(" - Reading keys for wt"+num+".");}
474 while(keys==null){
475 try {
476 keys=writeQueue.take();
477 } catch (InterruptedException e) {
478 // TODO Auto-generated catch block
479 e.printStackTrace();
480 }
481 }
482 if(keys==poison){
483 // assert(false);
484 shutdown=true;
485 }else{
486 for(long key : keys){
487 incrementHashedLocal(key);
488 }
489 }
490 // System.out.println(" -- Read keys for wt"+num+". poison="+(keys==poison)+", len="+keys.length);
491 if(verbose){System.err.println(" -- Read keys for wt"+num+". (success)");}
492 keys=null;
493 if(verbose){System.err.println("shutdown="+shutdown);}
494 }
495
496 // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+".");
497 // assert(false) : ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+".";
498
499 array=null;
500 }
501
502 void add(long[] keys){
503 // assert(isAlive());
504 assert(!shutdown);
505 if(shutdown){return;}
506 // assert(keys!=poison);
507 if(verbose){System.err.println(" + Adding keys to wt"+num+".");}
508 boolean success=false;
509 while(!success){
510 try {
511 writeQueue.put(keys);
512 success=true;
513 } catch (InterruptedException e) {
514 // TODO Auto-generated catch block
515 e.printStackTrace();
516 }
517 }
518 if(verbose){System.err.println(" ++ Added keys to wt"+num+". (success)");}
519 }
520
521 private int incrementHashedLocal(long key){
522 assert((key&arrayMask)==num);
523 key=(key>>>arrayBits)%(cellMod);
524 // key=(key>>>(arrayBits+1))%(cellMod);
525 int index=(int)(key>>>indexShift);
526 int word=array[index];
527 int cellShift=(int)(cellBits*key);
528 int value=((word>>>cellShift)&valueMask);
529 if(value==0){cellsUsedPersonal++;}
530 value=min(value+1, maxValue);
531 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
532 array[index]=word;
533 return value;
534 }
535
536 private int[] array;
537 private final int num;
538 public long cellsUsedPersonal=0;
539
540 public ArrayBlockingQueue<long[]> writeQueue=new ArrayBlockingQueue<long[]>(16);
541 public boolean shutdown=false;
542
543 }
544
545
546 public long cellsUsed(){return cellsUsed;}
547
548 private boolean finished=false;
549
550 private long cellsUsed;
551 final int[][] matrix;
552 private final WriteThread[] writers=new WriteThread[numArrays];
553 private final int hashes;
554 final int wordsPerArray;
555 private final long cellsPerArray;
556 final long cellMod;
557 private final long[][] hashMasks=makeMasks(8, hashArrayLength);
558
559 private final long[][] buffers=new long[numArrays][500];
560 private final int[] bufferlen=new int[numArrays];
561
562 private static final int hashBits=6;
563 private static final int hashArrayLength=1<<hashBits;
564 private static final int hashCellMask=hashArrayLength-1;
565 static final long[] poison=new long[0];
566
567 private static long counter=0;
568
569 }