Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray5MT.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.util.Arrays; | |
4 import java.util.Random; | |
5 import java.util.concurrent.ArrayBlockingQueue; | |
6 | |
7 import shared.Shared; | |
8 import shared.Timer; | |
9 | |
10 | |
11 /** | |
12 * | |
13 * Uses hashing rather than direct-mapping to support longer kmers. | |
14 * | |
15 * @author Brian Bushnell | |
16 * @date Aug 17, 2012 | |
17 * | |
18 */ | |
19 public class KCountArray5MT extends KCountArray { | |
20 | |
21 /** | |
22 * | |
23 */ | |
24 private static final long serialVersionUID = -5456926900022701212L; | |
25 | |
26 public static void main(String[] args){ | |
27 long cells=Long.parseLong(args[0]); | |
28 int bits=Integer.parseInt(args[1]); | |
29 int gap=Integer.parseInt(args[2]); | |
30 int hashes=Integer.parseInt(args[3]); | |
31 | |
32 verbose=false; | |
33 | |
34 KCountArray5MT kca=new KCountArray5MT(cells, bits, gap, hashes); | |
35 | |
36 System.out.println(kca.read(0)); | |
37 kca.increment(0); | |
38 System.out.println(kca.read(0)); | |
39 kca.increment(0); | |
40 System.out.println(kca.read(0)); | |
41 System.out.println(); | |
42 | |
43 System.out.println(kca.read(1)); | |
44 kca.increment(1); | |
45 System.out.println(kca.read(1)); | |
46 kca.increment(1); | |
47 System.out.println(kca.read(1)); | |
48 System.out.println(); | |
49 | |
50 System.out.println(kca.read(100)); | |
51 kca.increment(100); | |
52 System.out.println(kca.read(100)); | |
53 kca.increment(100); | |
54 System.out.println(kca.read(100)); | |
55 kca.increment(100); | |
56 System.out.println(kca.read(100)); | |
57 System.out.println(); | |
58 | |
59 | |
60 System.out.println(kca.read(150)); | |
61 kca.increment(150); | |
62 System.out.println(kca.read(150)); | |
63 System.out.println(); | |
64 | |
65 } | |
66 | |
67 public KCountArray5MT(long cells_, int bits_, int gap_, int hashes_){ | |
68 super(cells_, bits_); | |
69 // verbose=false; | |
70 long words=cells/cellsPerWord; | |
71 assert(words/numArrays<=Integer.MAX_VALUE); | |
72 wordsPerArray=(int)(words/numArrays); | |
73 cellsPerArray=cells/numArrays; | |
74 cellMod=cellsPerArray-1; | |
75 hashes=hashes_; | |
76 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes); | |
77 // assert(false); | |
78 matrix=new int[numArrays][]; | |
79 assert(hashes>0 && hashes<=hashMasks.length); | |
80 } | |
81 | |
82 @Override | |
83 public int read(final long rawKey){ | |
84 assert(finished); | |
85 if(verbose){System.err.println("Reading raw key "+rawKey);} | |
86 long key2=hash(rawKey, 0); | |
87 int min=readHashed(key2); | |
88 for(int i=1; i<hashes && min>0; i++){ | |
89 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);} | |
90 key2=Long.rotateRight(key2, hashBits); | |
91 key2=hash(key2, i); | |
92 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);} | |
93 min=min(min, readHashed(key2)); | |
94 } | |
95 return min; | |
96 } | |
97 | |
98 int readHashed(long key){ | |
99 assert(finished); | |
100 if(verbose){System.err.print("Reading hashed key "+key);} | |
101 // System.out.println("key="+key); | |
102 int arrayNum=(int)(key&arrayMask); | |
103 key=(key>>>arrayBits)%(cellMod); | |
104 // System.out.println("array="+arrayNum); | |
105 // System.out.println("key2="+key); | |
106 int[] array=matrix[arrayNum]; | |
107 int index=(int)(key>>>indexShift); | |
108 // assert(false) : indexShift; | |
109 // System.out.println("index="+index); | |
110 int word=array[index]; | |
111 // System.out.println("word="+Integer.toHexString(word)); | |
112 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask))); | |
113 // int cellShift=(int)(cellBits*(key&cellMask)); | |
114 int cellShift=(int)(cellBits*key); | |
115 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));} | |
116 // System.out.println("cellShift="+cellShift); | |
117 return (int)((word>>>cellShift)&valueMask); | |
118 } | |
119 | |
120 @Override | |
121 public void write(final long key, int value){ | |
122 throw new RuntimeException("Not allowed for this class."); | |
123 } | |
124 | |
125 //Slow | |
126 @Override | |
127 public void increment(final long rawKey, int amt){ | |
128 for(int i=0; i<amt; i++){increment0(rawKey);} | |
129 } | |
130 | |
131 public void increment0(final long rawKey){ | |
132 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");} | |
133 | |
134 buffer[bufferlen]=hash(rawKey, 0); | |
135 bufferlen++; | |
136 | |
137 if(bufferlen>=buffer.length){ | |
138 | |
139 if(verbose){System.err.println("Moving array.");} | |
140 | |
141 for(int w=0; w<writers.length; w++){ | |
142 writers[w].add(buffer); | |
143 } | |
144 bufferlen=0; | |
145 buffer=new long[buffer.length]; | |
146 if(verbose){System.err.println("Moved.");} | |
147 } | |
148 | |
149 } | |
150 | |
151 | |
152 @Override | |
153 public synchronized void increment(long[] keys){ | |
154 for(int i=0; i<keys.length; i++){ | |
155 keys[i]=hash(keys[i],0); | |
156 } | |
157 for(int w=0; w<writers.length; w++){ | |
158 writers[w].add(keys); | |
159 } | |
160 } | |
161 | |
162 /** Returns unincremented value */ | |
163 @Override | |
164 public int incrementAndReturnUnincremented(long key, int incr){ | |
165 throw new RuntimeException("Operation not supported."); | |
166 } | |
167 | |
168 @Override | |
169 public long[] transformToFrequency(){ | |
170 return transformToFrequency(matrix); | |
171 } | |
172 | |
173 @Override | |
174 public String toContentsString(){ | |
175 StringBuilder sb=new StringBuilder(); | |
176 sb.append("["); | |
177 String comma=""; | |
178 for(int[] array : matrix){ | |
179 for(int i=0; i<array.length; i++){ | |
180 int word=array[i]; | |
181 for(int j=0; j<cellsPerWord; j++){ | |
182 int x=word&valueMask; | |
183 sb.append(comma); | |
184 sb.append(x); | |
185 word>>>=cellBits; | |
186 comma=", "; | |
187 } | |
188 } | |
189 } | |
190 sb.append("]"); | |
191 return sb.toString(); | |
192 } | |
193 | |
194 @Override | |
195 public double usedFraction(){return cellsUsed/(double)cells;} | |
196 | |
197 @Override | |
198 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;} | |
199 | |
200 @Override | |
201 public long cellsUsed(int mindepth){ | |
202 long count=0; | |
203 for(int[] array : matrix){ | |
204 if(array!=null){ | |
205 for(int word : array){ | |
206 while(word>0){ | |
207 int x=word&valueMask; | |
208 if(x>=mindepth){count++;} | |
209 word>>>=cellBits; | |
210 } | |
211 } | |
212 } | |
213 } | |
214 return count; | |
215 } | |
216 | |
217 | |
218 @Override | |
219 final long hash(long key, int row){ | |
220 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
221 // int cell=(int)(hashCellMask&(key)); | |
222 | |
223 if(row==0){//Doublehash only first time | |
224 key=key^hashMasks[(row+4)%hashMasks.length][cell]; | |
225 cell=(int)(hashCellMask&(key>>4)); | |
226 // cell=(int)(hashCellMask&(key>>hashBits)); | |
227 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
228 } | |
229 | |
230 return key^hashMasks[row][cell]; | |
231 } | |
232 | |
233 /** | |
234 * @param i | |
235 * @param j | |
236 * @return | |
237 */ | |
238 private static long[][] makeMasks(int rows, int cols) { | |
239 | |
240 long seed; | |
241 synchronized(KCountArray5MT.class){ | |
242 seed=counter; | |
243 counter++; | |
244 } | |
245 | |
246 Timer t=new Timer(); | |
247 long[][] r=new long[rows][cols]; | |
248 Random randy=Shared.threadLocalRandom(seed); | |
249 for(int i=0; i<r.length; i++){ | |
250 fillMasks(r[i], randy); | |
251 } | |
252 t.stop(); | |
253 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);} | |
254 return r; | |
255 } | |
256 | |
257 private static void fillMasks(long[] r, Random randy) { | |
258 // for(int i=0; i<r.length; i++){ | |
259 // long x=0; | |
260 // while(Long.bitCount(x&0xFFFFFFFF)!=16){ | |
261 // x=randy.nextLong(); | |
262 // } | |
263 // r[i]=(x&Long.MAX_VALUE); | |
264 // } | |
265 | |
266 final int hlen=(1<<hashBits); | |
267 assert(r.length==hlen); | |
268 int[] count1=new int[hlen]; | |
269 int[] count2=new int[hlen]; | |
270 final long mask=hlen-1; | |
271 | |
272 for(int i=0; i<r.length; i++){ | |
273 long x=0; | |
274 int y=0; | |
275 int z=0; | |
276 while(Long.bitCount(x&0xFFFFFFFFL)!=16){ | |
277 x=randy.nextLong(); | |
278 while(Long.bitCount(x&0xFFFFFFFFL)<16){ | |
279 x|=(1L<<randy.nextInt(32)); | |
280 } | |
281 while(Long.bitCount(x&0xFFFFFFFFL)>16){ | |
282 x&=(~(1L<<randy.nextInt(32))); | |
283 } | |
284 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){ | |
285 x|=(1L<<(randy.nextInt(32)+32)); | |
286 } | |
287 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){ | |
288 x&=(~(1L<<(randy.nextInt(32)+32))); | |
289 } | |
290 | |
291 // System.out.print("."); | |
292 // y=(((int)(x&mask))^i); | |
293 y=(((int)(x&mask))); | |
294 z=(int)((x>>hashBits)&mask); | |
295 if(count1[y]>0 || count2[z]>0){ | |
296 x=0; | |
297 } | |
298 } | |
299 // System.out.println(Long.toBinaryString(x)); | |
300 r[i]=(x&Long.MAX_VALUE); | |
301 count1[y]++; | |
302 count2[z]++; | |
303 } | |
304 | |
305 } | |
306 | |
307 | |
308 @Override | |
309 public void initialize(){ | |
310 for(int i=0; i<writers.length; i++){ | |
311 writers[i]=new WriteThread(i); | |
312 writers[i].start(); | |
313 | |
314 while(!writers[i].isAlive()){ | |
315 System.out.print("."); | |
316 } | |
317 } | |
318 } | |
319 | |
320 @Override | |
321 public void shutdown(){ | |
322 if(finished){return;} | |
323 synchronized(this){ | |
324 if(finished){return;} | |
325 | |
326 //Clear buffer | |
327 if(bufferlen<buffer.length){ | |
328 buffer=Arrays.copyOf(buffer, bufferlen); | |
329 } | |
330 | |
331 if(buffer.length>0){ | |
332 for(int i=0; i<writers.length; i++) | |
333 writers[i].add(buffer); | |
334 } | |
335 buffer=null; | |
336 bufferlen=0; | |
337 | |
338 | |
339 //Add poison | |
340 for(WriteThread wt : writers){ | |
341 wt.add(poison); | |
342 } | |
343 | |
344 //Wait for termination | |
345 for(WriteThread wt : writers){ | |
346 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive()); | |
347 while(wt.isAlive()){ | |
348 // System.out.println("wt"+wt.num+" is alive: "+wt.isAlive()); | |
349 try { | |
350 wt.join(10000); | |
351 } catch (InterruptedException e) { | |
352 // TODO Auto-generated catch block | |
353 e.printStackTrace(); | |
354 } | |
355 if(wt.isAlive()){System.err.println(wt.getClass().getCanonicalName()+" is taking a long time to die.");} | |
356 } | |
357 cellsUsed+=wt.cellsUsedPersonal; | |
358 // System.out.println("cellsUsed="+cellsUsed); | |
359 } | |
360 | |
361 assert(!finished); | |
362 finished=true; | |
363 } | |
364 } | |
365 | |
366 private class WriteThread extends Thread{ | |
367 | |
368 public WriteThread(int tnum){ | |
369 num=tnum; | |
370 } | |
371 | |
372 @Override | |
373 public void run(){ | |
374 assert(matrix[num]==null); | |
375 array=new int[wordsPerArray]; //Makes NUMA systems use local memory. | |
376 // assert(false); | |
377 matrix[num]=array; | |
378 | |
379 // assert(num==1); | |
380 | |
381 long[] keys=null; | |
382 while(!shutdown){ | |
383 // assert(false); | |
384 | |
385 if(verbose){System.err.println(" - Reading keys for wt"+num+".");} | |
386 while(keys==null){ | |
387 // System.out.println("Searching for keys."); | |
388 try { | |
389 keys=writeQueue.take(); | |
390 } catch (InterruptedException e) { | |
391 // TODO Auto-generated catch block | |
392 e.printStackTrace(); | |
393 } | |
394 // System.out.println("*******************************************Found keys: "+keys.length); | |
395 // assert(false); | |
396 } | |
397 if(keys==poison){ | |
398 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~0 "; | |
399 shutdown=true; | |
400 }else{ | |
401 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1 "; | |
402 for(long rawKey : keys){ | |
403 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~2 "; | |
404 if(verbose){System.err.println("Writer "+num+" considering raw key "+rawKey);} | |
405 long key2=rawKey; | |
406 // int y=read(rawKey); | |
407 if((key2&arrayMask)==num){ | |
408 int x=incrementHashedLocal(key2); | |
409 assert(x>=0) : "i="+0+", original=?, new should be >=0, new="+readHashed(key2)+", max="+maxValue+", key="+rawKey; | |
410 if(verbose){System.err.println("postIncr value="+readHashed(key2));} | |
411 | |
412 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~4 "; | |
413 } | |
414 | |
415 for(int i=1; i<hashes; i++){ | |
416 key2=Long.rotateRight(key2, hashBits); | |
417 key2=hash(key2, i); | |
418 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~3 "; | |
419 if(verbose){System.err.println("rawKey="+rawKey+", i="+i+", key2="+key2+", value="+readHashed(key2));} | |
420 if((key2&arrayMask)==num){ | |
421 int x=incrementHashedLocal(key2); | |
422 assert(x>=0) : "i="+i+", original=?, new should be >=0, new="+readHashed(key2)+", max="+maxValue+", key="+rawKey; | |
423 if(verbose){System.err.println("postIncr value="+readHashed(key2));} | |
424 | |
425 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~4 "; | |
426 } | |
427 | |
428 // assert(false) : " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~5 "; | |
429 } | |
430 // int z=read(rawKey); | |
431 // assert(hashes!=1 || !b || z==maxValue || z==y+1) : "b="+b+", y="+y+", z="+z+", rawKey="+rawKey+", num="+num; | |
432 } | |
433 } | |
434 // System.out.println(" -- Read keys for wt"+num+". poison="+(keys==poison)+", len="+keys.length); | |
435 if(verbose){System.err.println(" -- Read keys for wt"+num+". (success)");} | |
436 keys=null; | |
437 if(verbose){System.err.println("shutdown="+shutdown);} | |
438 } | |
439 | |
440 // System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+"."); | |
441 // assert(false) : ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I died: "+shutdown+", "+(keys==null)+"."; | |
442 | |
443 array=null; | |
444 } | |
445 | |
446 void add(long[] keys){ | |
447 // assert(isAlive()); | |
448 | |
449 // assert(!shutdown); | |
450 // if(shutdown){return;} | |
451 | |
452 if(verbose){System.err.println(" + Adding keys to wt"+num+".");} | |
453 boolean success=false; | |
454 while(!success){ | |
455 try { | |
456 writeQueue.put(keys); | |
457 success=true; | |
458 } catch (InterruptedException e) { | |
459 // TODO Auto-generated catch block | |
460 e.printStackTrace(); | |
461 } | |
462 } | |
463 if(verbose){System.err.println(" ++ Added keys to wt"+num+". (success)");} | |
464 } | |
465 | |
466 private int incrementHashedLocal(final long key_){ | |
467 if(verbose){System.err.println("\n*** wt"+num+" incrementing hashed key "+key_+" ***");} | |
468 assert((key_&arrayMask)==num); | |
469 long key=(key_>>>arrayBits)%(cellMod); | |
470 int index=(int)(key>>>indexShift); | |
471 int word=array[index]; | |
472 int cellShift=(int)(cellBits*key); | |
473 int value=((word>>>cellShift)&valueMask); | |
474 if(value==0){cellsUsedPersonal++;} | |
475 value=min(value+1, maxValue); | |
476 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); | |
477 array[index]=word; | |
478 if(verbose){System.err.println("\n*** wt"+num+" Incremented hashed key "+key_+". Value = "+readHashed(key_)+" ***");} | |
479 return value; | |
480 } | |
481 | |
482 private int[] array; | |
483 private final int num; | |
484 public long cellsUsedPersonal=0; | |
485 | |
486 public ArrayBlockingQueue<long[]> writeQueue=new ArrayBlockingQueue<long[]>(8); | |
487 public boolean shutdown=false; | |
488 | |
489 } | |
490 | |
491 | |
492 public long cellsUsed(){return cellsUsed;} | |
493 | |
494 private boolean finished=false; | |
495 | |
496 private long cellsUsed; | |
497 final int[][] matrix; | |
498 private final WriteThread[] writers=new WriteThread[numArrays]; | |
499 final int hashes; | |
500 final int wordsPerArray; | |
501 private final long cellsPerArray; | |
502 final long cellMod; | |
503 private final long[][] hashMasks=makeMasks(8, hashArrayLength); | |
504 | |
505 private long[] buffer=new long[2000]; | |
506 private int bufferlen=0; | |
507 | |
508 private static final int hashBits=6; | |
509 private static final int hashArrayLength=1<<hashBits; | |
510 private static final int hashCellMask=hashArrayLength-1; | |
511 static final long[] poison=new long[0]; | |
512 | |
513 private static long counter=0; | |
514 | |
515 } |