comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray5MT.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.util.Arrays;
4 import java.util.Random;
5 import java.util.concurrent.ArrayBlockingQueue;
6
7 import shared.Shared;
8 import shared.Timer;
9
10
11 /**
12 *
13 * Uses hashing rather than direct-mapping to support longer kmers.
14 *
15 * @author Brian Bushnell
16 * @date Aug 17, 2012
17 *
18 */
19 public class KCountArray5MT extends KCountArray {
20
21 /**
22 *
23 */
24 private static final long serialVersionUID = -5456926900022701212L;
25
26 public static void main(String[] args){
27 long cells=Long.parseLong(args[0]);
28 int bits=Integer.parseInt(args[1]);
29 int gap=Integer.parseInt(args[2]);
30 int hashes=Integer.parseInt(args[3]);
31
32 verbose=false;
33
34 KCountArray5MT kca=new KCountArray5MT(cells, bits, gap, hashes);
35
36 System.out.println(kca.read(0));
37 kca.increment(0);
38 System.out.println(kca.read(0));
39 kca.increment(0);
40 System.out.println(kca.read(0));
41 System.out.println();
42
43 System.out.println(kca.read(1));
44 kca.increment(1);
45 System.out.println(kca.read(1));
46 kca.increment(1);
47 System.out.println(kca.read(1));
48 System.out.println();
49
50 System.out.println(kca.read(100));
51 kca.increment(100);
52 System.out.println(kca.read(100));
53 kca.increment(100);
54 System.out.println(kca.read(100));
55 kca.increment(100);
56 System.out.println(kca.read(100));
57 System.out.println();
58
59
60 System.out.println(kca.read(150));
61 kca.increment(150);
62 System.out.println(kca.read(150));
63 System.out.println();
64
65 }
66
67 public KCountArray5MT(long cells_, int bits_, int gap_, int hashes_){
68 super(cells_, bits_);
69 // verbose=false;
70 long words=cells/cellsPerWord;
71 assert(words/numArrays<=Integer.MAX_VALUE);
72 wordsPerArray=(int)(words/numArrays);
73 cellsPerArray=cells/numArrays;
74 cellMod=cellsPerArray-1;
75 hashes=hashes_;
76 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
77 // assert(false);
78 matrix=new int[numArrays][];
79 assert(hashes>0 && hashes<=hashMasks.length);
80 }
81
82 @Override
83 public int read(final long rawKey){
84 assert(finished);
85 if(verbose){System.err.println("Reading raw key "+rawKey);}
86 long key2=hash(rawKey, 0);
87 int min=readHashed(key2);
88 for(int i=1; i<hashes && min>0; i++){
89 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);}
90 key2=Long.rotateRight(key2, hashBits);
91 key2=hash(key2, i);
92 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);}
93 min=min(min, readHashed(key2));
94 }
95 return min;
96 }
97
98 int readHashed(long key){
99 assert(finished);
100 if(verbose){System.err.print("Reading hashed key "+key);}
101 // System.out.println("key="+key);
102 int arrayNum=(int)(key&arrayMask);
103 key=(key>>>arrayBits)%(cellMod);
104 // System.out.println("array="+arrayNum);
105 // System.out.println("key2="+key);
106 int[] array=matrix[arrayNum];
107 int index=(int)(key>>>indexShift);
108 // assert(false) : indexShift;
109 // System.out.println("index="+index);
110 int word=array[index];
111 // System.out.println("word="+Integer.toHexString(word));
112 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask)));
113 // int cellShift=(int)(cellBits*(key&cellMask));
114 int cellShift=(int)(cellBits*key);
115 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));}
116 // System.out.println("cellShift="+cellShift);
117 return (int)((word>>>cellShift)&valueMask);
118 }
119
120 @Override
121 public void write(final long key, int value){
122 throw new RuntimeException("Not allowed for this class.");
123 }
124
125 //Slow
126 @Override
127 public void increment(final long rawKey, int amt){
128 for(int i=0; i<amt; i++){increment0(rawKey);}
129 }
130
131 public void increment0(final long rawKey){
132 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
133
134 buffer[bufferlen]=hash(rawKey, 0);
135 bufferlen++;
136
137 if(bufferlen>=buffer.length){
138
139 if(verbose){System.err.println("Moving array.");}
140
141 for(int w=0; w<writers.length; w++){
142 writers[w].add(buffer);
143 }
144 bufferlen=0;
145 buffer=new long[buffer.length];
146 if(verbose){System.err.println("Moved.");}
147 }
148
149 }
150
151
152 @Override
153 public synchronized void increment(long[] keys){
154 for(int i=0; i<keys.length; i++){
155 keys[i]=hash(keys[i],0);
156 }
157 for(int w=0; w<writers.length; w++){
158 writers[w].add(keys);
159 }
160 }
161
162 /** Returns unincremented value */
163 @Override
164 public int incrementAndReturnUnincremented(long key, int incr){
165 throw new RuntimeException("Operation not supported.");
166 }
167
168 @Override
169 public long[] transformToFrequency(){
170 return transformToFrequency(matrix);
171 }
172
173 @Override
174 public String toContentsString(){
175 StringBuilder sb=new StringBuilder();
176 sb.append("[");
177 String comma="";
178 for(int[] array : matrix){
179 for(int i=0; i<array.length; i++){
180 int word=array[i];
181 for(int j=0; j<cellsPerWord; j++){
182 int x=word&valueMask;
183 sb.append(comma);
184 sb.append(x);
185 word>>>=cellBits;
186 comma=", ";
187 }
188 }
189 }
190 sb.append("]");
191 return sb.toString();
192 }
193
194 @Override
195 public double usedFraction(){return cellsUsed/(double)cells;}
196
197 @Override
198 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
199
200 @Override
201 public long cellsUsed(int mindepth){
202 long count=0;
203 for(int[] array : matrix){
204 if(array!=null){
205 for(int word : array){
206 while(word>0){
207 int x=word&valueMask;
208 if(x>=mindepth){count++;}
209 word>>>=cellBits;
210 }
211 }
212 }
213 }
214 return count;
215 }
216
217
218 @Override
219 final long hash(long key, int row){
220 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
221 // int cell=(int)(hashCellMask&(key));
222
223 if(row==0){//Doublehash only first time
224 key=key^hashMasks[(row+4)%hashMasks.length][cell];
225 cell=(int)(hashCellMask&(key>>4));
226 // cell=(int)(hashCellMask&(key>>hashBits));
227 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
228 }
229
230 return key^hashMasks[row][cell];
231 }
232
233 /**
234 * @param i
235 * @param j
236 * @return
237 */
238 private static long[][] makeMasks(int rows, int cols) {
239
240 long seed;
241 synchronized(KCountArray5MT.class){
242 seed=counter;
243 counter++;
244 }
245
246 Timer t=new Timer();
247 long[][] r=new long[rows][cols];
248 Random randy=Shared.threadLocalRandom(seed);
249 for(int i=0; i<r.length; i++){
250 fillMasks(r[i], randy);
251 }
252 t.stop();
253 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);}
254 return r;
255 }
256
257 private static void fillMasks(long[] r, Random randy) {
258 // for(int i=0; i<r.length; i++){
259 // long x=0;
260 // while(Long.bitCount(x&0xFFFFFFFF)!=16){
261 // x=randy.nextLong();
262 // }
263 // r[i]=(x&Long.MAX_VALUE);
264 // }
265
266 final int hlen=(1<<hashBits);
267 assert(r.length==hlen);
268 int[] count1=new int[hlen];
269 int[] count2=new int[hlen];
270 final long mask=hlen-1;
271
272 for(int i=0; i<r.length; i++){
273 long x=0;
274 int y=0;
275 int z=0;
276 while(Long.bitCount(x&0xFFFFFFFFL)!=16){
277 x=randy.nextLong();
278 while(Long.bitCount(x&0xFFFFFFFFL)<16){
279 x|=(1L<<randy.nextInt(32));
280 }
281 while(Long.bitCount(x&0xFFFFFFFFL)>16){
282 x&=(~(1L<<randy.nextInt(32)));
283 }
284 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){
285 x|=(1L<<(randy.nextInt(32)+32));
286 }
287 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){
288 x&=(~(1L<<(randy.nextInt(32)+32)));
289 }
290
291 // System.out.print(".");
292 // y=(((int)(x&mask))^i);
293 y=(((int)(x&mask)));
294 z=(int)((x>>hashBits)&mask);
295 if(count1[y]>0 || count2[z]>0){
296 x=0;
297 }
298 }
299 // System.out.println(Long.toBinaryString(x));
300 r[i]=(x&Long.MAX_VALUE);
301 count1[y]++;
302 count2[z]++;
303 }
304
305 }
306
307
308 @Override
309 public void initialize(){
310 for(int i=0; i<writers.length; i++){
311 writers[i]=new WriteThread(i);
312 writers[i].start();
313
314 while(!writers[i].isAlive()){
315 System.out.print(".");
316 }
317 }
318 }
319
320 @Override
321 public void shutdown(){
322 if(finished){return;}
323 synchronized(this){
324 if(finished){return;}
325
326 //Clear buffer
327 if(bufferlen<buffer.length){
328 buffer=Arrays.copyOf(buffer, bufferlen);
329 }
330
331 if(buffer.length>0){
332 for(int i=0; i<writers.length; i++)
333 writers[i].add(buffer);
334 }
335 buffer=null;
336 bufferlen=0;
337
338
339 //Add poison
340 for(WriteThread wt : writers){
341 wt.add(poison);
342 }
343
344 //Wait for termination
345 for(WriteThread wt : writers){
346 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive());
347 while(wt.isAlive()){
348 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive());
349 try {
350 wt.join(10000);
351 } catch (InterruptedException e) {
352 // TODO Auto-generated catch block
353 e.printStackTrace();
354 }
355 if(wt.isAlive()){System.err.println(wt.getClass().getCanonicalName()+" is taking a long time to die.");}
356 }
357 cellsUsed+=wt.cellsUsedPersonal;
358 // System.out.println("cellsUsed="+cellsUsed);
359 }
360
361 assert(!finished);
362 finished=true;
363 }
364 }
365
366 private class WriteThread extends Thread{
367
368 public WriteThread(int tnum){
369 num=tnum;
370 }
371
372 @Override
373 public void run(){
374 assert(matrix[num]==null);
375 array=new int[wordsPerArray]; //Makes NUMA systems use local memory.
376 // assert(false);
377 matrix[num]=array;
378
379 // assert(num==1);
380
381 long[] keys=null;
382 while(!shutdown){
383 // assert(false);
384
385 if(verbose){System.err.println(" - Reading keys for wt"+num+".");}
386 while(keys==null){
387 // System.out.println("Searching for keys.");
388 try {
389 keys=writeQueue.take();
390 } catch (InterruptedException e) {
391 // TODO Auto-generated catch block
392 e.printStackTrace();
393 }
394 // System.out.println("*******************************************Found keys: "+keys.length);
395 // assert(false);
396 }
397 if(keys==poison){
398 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~0 ";
399 shutdown=true;
400 }else{
401 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1 ";
402 for(long rawKey : keys){
403 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~2 ";
404 if(verbose){System.err.println("Writer "+num+" considering raw key "+rawKey);}
405 long key2=rawKey;
406 // int y=read(rawKey);
407 if((key2&arrayMask)==num){
408 int x=incrementHashedLocal(key2);
409 assert(x>=0) : "i="+0+", original=?, new should be >=0, new="+readHashed(key2)+", max="+maxValue+", key="+rawKey;
410 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
411
412 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~4 ";
413 }
414
415 for(int i=1; i<hashes; i++){
416 key2=Long.rotateRight(key2, hashBits);
417 key2=hash(key2, i);
418 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~3 ";
419 if(verbose){System.err.println("rawKey="+rawKey+", i="+i+", key2="+key2+", value="+readHashed(key2));}
420 if((key2&arrayMask)==num){
421 int x=incrementHashedLocal(key2);
422 assert(x>=0) : "i="+i+", original=?, new should be >=0, new="+readHashed(key2)+", max="+maxValue+", key="+rawKey;
423 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
424
425 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~4 ";
426 }
427
428 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~5 ";
429 }
430 // int z=read(rawKey);
431 // assert(hashes!=1 || !b || z==maxValue || z==y+1) : "b="+b+", y="+y+", z="+z+", rawKey="+rawKey+", num="+num;
432 }
433 }
434 // System.out.println(" -- Read keys for wt"+num+". poison="+(keys==poison)+", len="+keys.length);
435 if(verbose){System.err.println(" -- Read keys for wt"+num+". (success)");}
436 keys=null;
437 if(verbose){System.err.println("shutdown="+shutdown);}
438 }
439
440 // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+".");
441 // assert(false) : ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+".";
442
443 array=null;
444 }
445
446 void add(long[] keys){
447 // assert(isAlive());
448
449 // assert(!shutdown);
450 // if(shutdown){return;}
451
452 if(verbose){System.err.println(" + Adding keys to wt"+num+".");}
453 boolean success=false;
454 while(!success){
455 try {
456 writeQueue.put(keys);
457 success=true;
458 } catch (InterruptedException e) {
459 // TODO Auto-generated catch block
460 e.printStackTrace();
461 }
462 }
463 if(verbose){System.err.println(" ++ Added keys to wt"+num+". (success)");}
464 }
465
466 private int incrementHashedLocal(final long key_){
467 if(verbose){System.err.println("\n*** wt"+num+" incrementing hashed key "+key_+" ***");}
468 assert((key_&arrayMask)==num);
469 long key=(key_>>>arrayBits)%(cellMod);
470 int index=(int)(key>>>indexShift);
471 int word=array[index];
472 int cellShift=(int)(cellBits*key);
473 int value=((word>>>cellShift)&valueMask);
474 if(value==0){cellsUsedPersonal++;}
475 value=min(value+1, maxValue);
476 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
477 array[index]=word;
478 if(verbose){System.err.println("\n*** wt"+num+" Incremented hashed key "+key_+". Value = "+readHashed(key_)+" ***");}
479 return value;
480 }
481
482 private int[] array;
483 private final int num;
484 public long cellsUsedPersonal=0;
485
486 public ArrayBlockingQueue<long[]> writeQueue=new ArrayBlockingQueue<long[]>(8);
487 public boolean shutdown=false;
488
489 }
490
491
492 public long cellsUsed(){return cellsUsed;}
493
494 private boolean finished=false;
495
496 private long cellsUsed;
497 final int[][] matrix;
498 private final WriteThread[] writers=new WriteThread[numArrays];
499 final int hashes;
500 final int wordsPerArray;
501 private final long cellsPerArray;
502 final long cellMod;
503 private final long[][] hashMasks=makeMasks(8, hashArrayLength);
504
505 private long[] buffer=new long[2000];
506 private int bufferlen=0;
507
508 private static final int hashBits=6;
509 private static final int hashArrayLength=1<<hashBits;
510 private static final int hashCellMask=hashArrayLength-1;
511 static final long[] poison=new long[0];
512
513 private static long counter=0;
514
515 }