comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray6MT.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.util.Arrays;
4 import java.util.Random;
5 import java.util.concurrent.ArrayBlockingQueue;
6
7 import shared.Shared;
8 import shared.Timer;
9 import structures.ByteBuilder;
10
11
12 /**
13 *
14 * Uses hashing rather than direct-mapping to support longer kmers.
15 *
16 * @author Brian Bushnell
17 * @date Aug 17, 2012
18 *
19 */
20 public class KCountArray6MT extends KCountArray {
21
22 /**
23 *
24 */
25 private static final long serialVersionUID = -1524266549200637631L;
26
27 public static void main(String[] args){
28 long cells=Long.parseLong(args[0]);
29 int bits=Integer.parseInt(args[1]);
30 int gap=Integer.parseInt(args[2]);
31 int hashes=Integer.parseInt(args[3]);
32
33 verbose=false;
34
35 KCountArray6MT kca=new KCountArray6MT(cells, bits, gap, hashes);
36
37 System.out.println(kca.read(0));
38 kca.increment(0);
39 System.out.println(kca.read(0));
40 kca.increment(0);
41 System.out.println(kca.read(0));
42 System.out.println();
43
44 System.out.println(kca.read(1));
45 kca.increment(1);
46 System.out.println(kca.read(1));
47 kca.increment(1);
48 System.out.println(kca.read(1));
49 System.out.println();
50
51 System.out.println(kca.read(100));
52 kca.increment(100);
53 System.out.println(kca.read(100));
54 kca.increment(100);
55 System.out.println(kca.read(100));
56 kca.increment(100);
57 System.out.println(kca.read(100));
58 System.out.println();
59
60
61 System.out.println(kca.read(150));
62 kca.increment(150);
63 System.out.println(kca.read(150));
64 System.out.println();
65
66 }
67
68 public KCountArray6MT(long cells_, int bits_, int gap_, int hashes_){
69 super(cells_, bits_);
70 // verbose=false;
71 long words=cells/cellsPerWord;
72 assert(words/numArrays<=Integer.MAX_VALUE);
73 wordsPerArray=(int)(words/numArrays);
74 cellsPerArray=cells/numArrays;
75 cellMod=cellsPerArray-1;
76 hashes=hashes_;
77 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
78 // assert(false);
79 matrix=new int[numArrays][];
80 assert(hashes>0 && hashes<=hashMasks.length);
81 }
82
83 @Override
84 public int read(final long rawKey){
85 assert(finished);
86 if(verbose){System.err.println("Reading raw key "+rawKey);}
87
88 long key1=hash(rawKey, 3);
89 int arrayNum=(int)(key1&arrayMask);
90 long key2=hash(rawKey, 0);
91
92 int min=readHashed(key2, arrayNum);
93 for(int i=1; i<hashes && min>0; i++){
94 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);}
95 key2=Long.rotateRight(key2, hashBits);
96 key2=hash(key2, i);
97 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);}
98 min=min(min, readHashed(key2, arrayNum));
99 }
100 return min;
101 }
102
103 int readHashed(long key, int arrayNum){
104 if(verbose){System.err.print("Reading hashed key "+key);}
105 // System.out.println("key="+key);
106 // int arrayNum=(int)(key&arrayMask);
107 key=(key&Long.MAX_VALUE)%(cellMod);
108 // key=(key>>>(arrayBits+1))%(cellMod);
109 // System.out.println("array="+arrayNum);
110 // System.out.println("key2="+key);
111 int[] array=matrix[arrayNum];
112 int index=(int)(key>>>indexShift);
113 // assert(false) : indexShift;
114 // System.out.println("index="+index);
115 int word=array[index];
116 // System.out.println("word="+Integer.toHexString(word));
117 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask)));
118 // int cellShift=(int)(cellBits*(key&cellMask));
119 int cellShift=(int)(cellBits*key);
120 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));}
121 // System.out.println("cellShift="+cellShift);
122 return (int)((word>>>cellShift)&valueMask);
123 }
124
125 @Override
126 public void write(final long key, int value){
127 throw new RuntimeException("Not allowed for this class.");
128 }
129
130 //Slow
131 @Override
132 public void increment(final long rawKey, int amt){
133 for(int i=0; i<amt; i++){increment0(rawKey);}
134 }
135
136 public void increment0(final long rawKey){
137 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
138
139 long key1=hash(rawKey, 3);
140
141 if(verbose){System.err.println("key2="+key1+", value="+read(rawKey));}
142
143 int bnum=(int)(key1&arrayMask);
144 long[] array=buffers[bnum];
145 int loc=bufferlen[bnum];
146
147 // key2=Long.rotateRight(key2, hashBits);
148 // array[loc]=key2;
149
150 array[loc]=rawKey;
151 bufferlen[bnum]++;
152 if(verbose){System.err.println("bufferlen["+bnum+"] = "+bufferlen[bnum]);}
153 if(bufferlen[bnum]>=array.length){
154
155 if(verbose){System.err.println("Moving array.");}
156 bufferlen[bnum]=0;
157 buffers[bnum]=new long[array.length];
158
159 writers[bnum].add(array);
160 if(verbose){System.err.println("Moved.");}
161 }
162 }
163
164 /** Returns unincremented value */
165 @Override
166 public int incrementAndReturnUnincremented(long key, int incr){
167 throw new RuntimeException("Operation not supported.");
168 }
169
170 @Override
171 public long[] transformToFrequency(){
172 return transformToFrequency(matrix);
173 }
174
175 @Override
176 public ByteBuilder toContentsString(){
177 ByteBuilder sb=new ByteBuilder();
178 sb.append('[');
179 String comma="";
180 for(int[] array : matrix){
181 for(int i=0; i<array.length; i++){
182 int word=array[i];
183 for(int j=0; j<cellsPerWord; j++){
184 int x=word&valueMask;
185 sb.append(comma);
186 sb.append(x);
187 word>>>=cellBits;
188 comma=", ";
189 }
190 }
191 }
192 sb.append(']');
193 return sb;
194 }
195
196 @Override
197 public double usedFraction(){return cellsUsed/(double)cells;}
198
199 @Override
200 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
201
202 @Override
203 public long cellsUsed(int mindepth){
204 long count=0;
205 for(int[] array : matrix){
206 if(array!=null){
207 for(int word : array){
208 while(word>0){
209 int x=word&valueMask;
210 if(x>=mindepth){count++;}
211 word>>>=cellBits;
212 }
213 }
214 }
215 }
216 return count;
217 }
218
219
220 @Override
221 final long hash(long key, int row){
222 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
223 // int cell=(int)(hashCellMask&(key));
224
225 if(row==0){//Doublehash only first time
226 key=key^hashMasks[(row+4)%hashMasks.length][cell];
227 cell=(int)(hashCellMask&(key>>4));
228 // cell=(int)(hashCellMask&(key>>hashBits));
229 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
230 }
231
232 return key^hashMasks[row][cell];
233 }
234
235 /**
236 * @param i
237 * @param j
238 * @return
239 */
240 private static long[][] makeMasks(int rows, int cols) {
241
242 long seed;
243 synchronized(KCountArray6MT.class){
244 seed=counter;
245 counter++;
246 }
247
248 Timer t=new Timer();
249 long[][] r=new long[rows][cols];
250 Random randy=Shared.threadLocalRandom(seed);
251 for(int i=0; i<r.length; i++){
252 fillMasks(r[i], randy);
253 }
254 t.stop();
255 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);}
256 return r;
257 }
258
259 private static void fillMasks(long[] r, Random randy) {
260 // for(int i=0; i<r.length; i++){
261 // long x=0;
262 // while(Long.bitCount(x&0xFFFFFFFF)!=16){
263 // x=randy.nextLong();
264 // }
265 // r[i]=(x&Long.MAX_VALUE);
266 // }
267
268 final int hlen=(1<<hashBits);
269 assert(r.length==hlen);
270 int[] count1=new int[hlen];
271 int[] count2=new int[hlen];
272 final long mask=hlen-1;
273
274 for(int i=0; i<r.length; i++){
275 long x=0;
276 int y=0;
277 int z=0;
278 while(Long.bitCount(x&0xFFFFFFFFL)!=16){
279 x=randy.nextLong();
280 while(Long.bitCount(x&0xFFFFFFFFL)<16){
281 x|=(1L<<randy.nextInt(32));
282 }
283 while(Long.bitCount(x&0xFFFFFFFFL)>16){
284 x&=(~(1L<<randy.nextInt(32)));
285 }
286 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){
287 x|=(1L<<(randy.nextInt(32)+32));
288 }
289 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){
290 x&=(~(1L<<(randy.nextInt(32)+32)));
291 }
292
293 // System.out.print(".");
294 // y=(((int)(x&mask))^i);
295 y=(((int)(x&mask)));
296 z=(int)((x>>hashBits)&mask);
297 if(count1[y]>0 || count2[z]>0){
298 x=0;
299 }
300 }
301 // System.out.println(Long.toBinaryString(x));
302 r[i]=(x&Long.MAX_VALUE);
303 count1[y]++;
304 count2[z]++;
305 }
306
307 }
308
309
310 @Override
311 public void initialize(){
312 for(int i=0; i<writers.length; i++){
313 writers[i]=new WriteThread(i);
314 writers[i].start();
315
316 // while(!writers[i].isAlive()){
317 // System.out.print(".");
318 // }
319 }
320 }
321
322 @Override
323 public void shutdown(){
324 if(finished){return;}
325 synchronized(this){
326 if(finished){return;}
327
328 //Clear buffers
329 for(int i=0; i<numArrays; i++){
330 long[] array=buffers[i];
331 int len=bufferlen[i];
332 buffers[i]=null;
333 bufferlen[i]=0;
334
335 if(len<array.length){
336 array=Arrays.copyOf(array, len);
337 }
338
339 if(array.length>0){
340 writers[i].add(array);
341 }
342 }
343
344 //Add poison
345 for(WriteThread wt : writers){
346 wt.add(poison);
347 }
348
349 //Wait for termination
350 for(WriteThread wt : writers){
351 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive());
352 while(wt.isAlive()){
353 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive());
354 try {
355 wt.join(10000);
356 } catch (InterruptedException e) {
357 // TODO Auto-generated catch block
358 e.printStackTrace();
359 }
360 if(wt.isAlive()){System.err.println(wt.getClass().getCanonicalName()+" is taking a long time to die.");}
361 }
362 cellsUsed+=wt.cellsUsedPersonal;
363 // System.out.println("cellsUsed="+cellsUsed);
364 }
365
366 assert(!finished);
367 finished=true;
368 }
369 }
370
371 private class WriteThread extends Thread{
372
373 public WriteThread(int tnum){
374 num=tnum;
375 }
376
377 @Override
378 public void run(){
379 assert(matrix[num]==null);
380 array=new int[wordsPerArray]; //Makes NUMA systems use local memory.
381
382 matrix[num]=array;
383
384 long[] keys=null;
385 while(!shutdown){
386
387 if(verbose){System.err.println(" - Reading keys for wt"+num+".");}
388 while(keys==null){
389 try {
390 keys=writeQueue.take();
391 } catch (InterruptedException e) {
392 // TODO Auto-generated catch block
393 e.printStackTrace();
394 }
395 }
396 if(keys==poison){
397 // assert(false);
398 shutdown=true;
399 }else{
400 for(long key : keys){
401 incrementRawLocal(key);
402 }
403 }
404 // System.out.println(" -- Read keys for wt"+num+". poison="+(keys==poison)+", len="+keys.length);
405 if(verbose){System.err.println(" -- Read keys for wt"+num+". (success)");}
406 keys=null;
407 if(verbose){System.err.println("shutdown="+shutdown);}
408 }
409
410 // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+".");
411 // assert(false) : ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+".";
412
413 array=null;
414 }
415
416 void add(long[] keys){
417 // assert(isAlive());
418 assert(!shutdown);
419 if(shutdown){return;}
420 // assert(keys!=poison);
421 if(verbose){System.err.println(" + Adding keys to wt"+num+".");}
422 boolean success=false;
423 while(!success){
424 try {
425 writeQueue.put(keys);
426 success=true;
427 } catch (InterruptedException e) {
428 // TODO Auto-generated catch block
429 e.printStackTrace();
430 }
431 }
432 if(verbose){System.err.println(" ++ Added keys to wt"+num+". (success)");}
433 }
434
435 private int incrementRawLocal(long rawKey){
436 // verbose=(rawKey==32662670693L);
437 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
438 // verbose=true;
439 assert(1>0);
440
441 long key2=rawKey;
442 if(hashes==1){
443 key2=hash(key2, 0);
444 // int x=incrementHashedIfAtMost(key2, 1, maxValue-1);
445 int x=incrementHashedLocal(key2);
446 assert(x>=1) : "original=?, new should be >="+(1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
447 return x;
448 }
449
450 int min=0;
451 // final int min=read(rawKey);
452 // if(min>=maxValue){return maxValue;}
453
454 assert(key2==rawKey);
455 for(int i=0; i<hashes; i++){
456 key2=hash(key2, i);
457 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2, num));}
458 // int x=incrementHashedIfAtMost(key2, 1, min);
459 int x=incrementHashedLocal(key2);
460 assert(x>=min+1) : "i="+i+", original="+min+", new should be <="+(min+1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
461 if(verbose){System.err.println("postIncr value="+readHashed(key2, num));}
462 // assert(read(rawKey)<=min+1) : "i="+i+", original="+min+", new should be <="+(min+1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
463 // assert(readHashed(key2)>=min+1) : "i="+i+", original="+min+", new should be <="+(min+1)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
464 key2=Long.rotateRight(key2, hashBits);
465 }
466 // assert(read(rawKey)==min+1) : "original="+min+", new should be "+(min+1)+", new="+read(rawKey)+", max="+maxValue;
467 // assert(false);
468 return min(min+1, maxValue);
469 }
470
471 private int incrementHashedLocal(long key){
472 // assert((key&arrayMask)==num);
473 key=(key&Long.MAX_VALUE)%(cellMod);
474 // key=(key>>>(arrayBits+1))%(cellMod);
475 int index=(int)(key>>>indexShift);
476 int word=array[index];
477 int cellShift=(int)(cellBits*key);
478 int value=((word>>>cellShift)&valueMask);
479 if(value==0){cellsUsedPersonal++;}
480 value=min(value+1, maxValue);
481 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
482 array[index]=word;
483 return value;
484 }
485
486 private int[] array;
487 private final int num;
488 public long cellsUsedPersonal=0;
489
490 public ArrayBlockingQueue<long[]> writeQueue=new ArrayBlockingQueue<long[]>(16);
491 public boolean shutdown=false;
492
493 }
494
495
496 public long cellsUsed(){return cellsUsed;}
497
498 private boolean finished=false;
499
500 private long cellsUsed;
501 final int[][] matrix;
502 private final WriteThread[] writers=new WriteThread[numArrays];
503 final int hashes;
504 final int wordsPerArray;
505 private final long cellsPerArray;
506 final long cellMod;
507 private final long[][] hashMasks=makeMasks(8, hashArrayLength);
508
509 private final long[][] buffers=new long[numArrays][1000];
510 private final int[] bufferlen=new int[numArrays];
511
512 private static final int hashBits=6;
513 private static final int hashArrayLength=1<<hashBits;
514 private static final int hashCellMask=hashArrayLength-1;
515 static final long[] poison=new long[0];
516
517 private static long counter=0;
518
519 }