comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray4.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.util.Random;
4
5 import shared.Shared;
6 import shared.Timer;
7
8
9 /**
10 *
11 * Uses hashing rather than direct-mapping to support longer kmers.
12 *
13 * @author Brian Bushnell
14 * @date Aug 17, 2012
15 *
16 */
17 public class KCountArray4 extends KCountArray {
18
19 /**
20 *
21 */
22 private static final long serialVersionUID = -1418539960644885681L;
23
24 public static void main(String[] args){
25 long cells=Long.parseLong(args[0]);
26 int bits=Integer.parseInt(args[1]);
27 int gap=Integer.parseInt(args[2]);
28 int hashes=Integer.parseInt(args[3]);
29
30 verbose=false;
31
32 KCountArray4 kca=new KCountArray4(cells, bits, gap, hashes);
33
34 System.out.println(kca.read(0));
35 kca.increment(0);
36 System.out.println(kca.read(0));
37 kca.increment(0);
38 System.out.println(kca.read(0));
39 System.out.println();
40
41 System.out.println(kca.read(1));
42 kca.increment(1);
43 System.out.println(kca.read(1));
44 kca.increment(1);
45 System.out.println(kca.read(1));
46 System.out.println();
47
48 System.out.println(kca.read(100));
49 kca.increment(100);
50 System.out.println(kca.read(100));
51 kca.increment(100);
52 System.out.println(kca.read(100));
53 kca.increment(100);
54 System.out.println(kca.read(100));
55 System.out.println();
56
57
58 System.out.println(kca.read(150));
59 kca.increment(150);
60 System.out.println(kca.read(150));
61 System.out.println();
62
63 }
64
65 public KCountArray4(long cells_, int bits_, int gap_, int hashes_){
66 super(cells_, bits_);
67 long words=cells/cellsPerWord;
68 assert(words/numArrays<=Integer.MAX_VALUE);
69 int wordsPerArray=(int)(words/numArrays);
70 hashes=hashes_;
71 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
72 // assert(false);
73 matrix=new int[numArrays][wordsPerArray];
74 assert(hashes>0 && hashes<=hashMasks.length);
75 }
76
77 @Override
78 public int read(final long rawKey){
79 if(verbose){System.err.println("Reading raw key "+rawKey);}
80 long key2=hash(rawKey, 0);
81 int min=readHashed(key2);
82 for(int i=1; i<hashes && min>0; i++){
83 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);}
84 key2=Long.rotateRight(key2, hashBits);
85 key2=hash(key2, i);
86 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);}
87 min=min(min, readHashed(key2));
88 }
89 return min;
90 }
91
92 private int readHashed(long key){
93 if(verbose){System.err.print("Reading hashed key "+key);}
94 key=((key&Long.MAX_VALUE)%(cells-1));
95 // System.out.println("key="+key);
96 int arrayNum=(int)(key&arrayMask);
97 // System.out.println("array="+arrayNum);
98 key>>>=arrayBits;
99 // System.out.println("key2="+key);
100 int[] array=matrix[arrayNum];
101 int index=(int)(key>>>indexShift);
102 // assert(false) : indexShift;
103 // System.out.println("index="+index);
104 int word=array[index];
105 // System.out.println("word="+Integer.toHexString(word));
106 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask)));
107 // int cellShift=(int)(cellBits*(key&cellMask));
108 int cellShift=(int)(cellBits*key);
109 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));}
110 // System.out.println("cellShift="+cellShift);
111 return (int)((word>>>cellShift)&valueMask);
112 }
113
114 @Override
115 public void write(final long key, int value){
116 throw new RuntimeException("Not allowed for this class.");
117 }
118
119 @Override
120 public void increment(final long rawKey, int incr){
121 // verbose=(rawKey==32662670693L);
122 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
123 // verbose=true;
124 assert(incr>0);
125
126 long key2=rawKey;
127 if(hashes==1){
128 key2=hash(key2, 0);
129 int x=incrementHashedIfAtMost(key2, incr, maxValue-1);
130 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
131 //return x;
132 }
133
134 final int min=read(rawKey);
135 if(min>=maxValue){return /*maxValue*/;}
136
137 assert(key2==rawKey);
138 for(int i=0; i<hashes; i++){
139 key2=hash(key2, i);
140 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
141 int x=incrementHashedIfAtMost(key2, incr, min);
142 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
143 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
144 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
145 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
146 key2=Long.rotateRight(key2, hashBits);
147 }
148 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue;
149 // assert(false);
150 //return min(min+incr, maxValue);
151 }
152
153 /** Returns unincremented value */
154 @Override
155 public int incrementAndReturnUnincremented(long rawKey, int incr){
156 // verbose=(rawKey==32662670693L);
157 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
158 // verbose=true;
159 assert(incr>0);
160
161 long key2=rawKey;
162 if(hashes==1){
163 key2=hash(key2, 0);
164 int x=incrementHashedIfAtMost(key2, incr, maxValue-1);
165 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
166 return x;
167 }
168
169 final int min=read(rawKey);
170 if(min>=maxValue){return maxValue;}
171
172 assert(key2==rawKey);
173 for(int i=0; i<hashes; i++){
174 key2=hash(key2, i);
175 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
176 int x=incrementHashedIfAtMost(key2, incr, min);
177 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
178 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
179 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
180 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
181 key2=Long.rotateRight(key2, hashBits);
182 }
183 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue;
184 // assert(false);
185 return min;
186 }
187
188 private int incrementHashedIfAtMost(long key, int incr, int lim){
189 if(verbose){System.err.print("incrementing hashed key "+key);}
190 key=((key&Long.MAX_VALUE)%(cells-1));
191 int arrayNum=(int)(key&arrayMask);
192 key>>>=arrayBits;
193 int[] array=matrix[arrayNum];
194 int index=(int)(key>>>indexShift);
195 int word=array[index];
196 int cellShift=(int)(cellBits*key);
197 int value=((word>>>cellShift)&valueMask);
198 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+value+", limit="+lim);}
199 if(value>lim){return value;}
200 if(value==0 && incr>0){cellsUsed++;}
201 value=min(value+incr, maxValue);
202 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
203 array[index]=word;
204 return value;
205 }
206
207 private int incrementHashed(long key, int incr){
208 assert(incr>0);
209 int arrayNum=(int)(key&arrayMask);
210 key>>>=arrayBits;
211 int[] array=matrix[arrayNum];
212 int index=(int)(key>>>indexShift);
213 int word=array[index];
214 int cellShift=(int)(cellBits*key);
215 int value=((word>>>cellShift)&valueMask);
216 if(value==0 && incr>0){cellsUsed++;}
217 value=min(value+incr, maxValue);
218 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
219 array[index]=word;
220 return value;
221 }
222
223 @Override
224 public long[] transformToFrequency(){
225 return transformToFrequency(matrix);
226 }
227
228 @Override
229 public String toContentsString(){
230 StringBuilder sb=new StringBuilder();
231 sb.append("[");
232 String comma="";
233 for(int[] array : matrix){
234 for(int i=0; i<array.length; i++){
235 int word=array[i];
236 for(int j=0; j<cellsPerWord; j++){
237 int x=word&valueMask;
238 sb.append(comma);
239 sb.append(x);
240 word>>>=cellBits;
241 comma=", ";
242 }
243 }
244 }
245 sb.append("]");
246 return sb.toString();
247 }
248
249 @Override
250 public double usedFraction(){return cellsUsed/(double)cells;}
251
252 @Override
253 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
254
255 @Override
256 public long cellsUsed(int mindepth){
257 long count=0;
258 for(int[] array : matrix){
259 if(array!=null){
260 for(int word : array){
261 while(word>0){
262 int x=word&valueMask;
263 if(x>=mindepth){count++;}
264 word>>>=cellBits;
265 }
266 }
267 }
268 }
269 return count;
270 }
271
272
273 @Override
274 final long hash(long key, int row){
275 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
276 // int cell=(int)(hashCellMask&(key));
277
278 if(row==0){//Doublehash only first time
279 key=key^hashMasks[(row+4)%hashMasks.length][cell];
280 cell=(int)(hashCellMask&(key>>4));
281 // cell=(int)(hashCellMask&(key>>hashBits));
282 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
283 }
284
285 return key^hashMasks[row][cell];
286 }
287
288 /**
289 * @param i
290 * @param j
291 * @return
292 */
293 private static long[][] makeMasks(int rows, int cols) {
294
295 long seed;
296 synchronized(KCountArray4.class){
297 seed=counter;
298 counter++;
299 }
300
301 Timer t=new Timer();
302 long[][] r=new long[rows][cols];
303 Random randy=Shared.threadLocalRandom(seed);
304 for(int i=0; i<r.length; i++){
305 fillMasks(r[i], randy);
306 }
307 t.stop();
308 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);}
309 return r;
310 }
311
312 private static void fillMasks(long[] r, Random randy) {
313 // for(int i=0; i<r.length; i++){
314 // long x=0;
315 // while(Long.bitCount(x&0xFFFFFFFF)!=16){
316 // x=randy.nextLong();
317 // }
318 // r[i]=(x&Long.MAX_VALUE);
319 // }
320
321 final int hlen=(1<<hashBits);
322 assert(r.length==hlen);
323 int[] count1=new int[hlen];
324 int[] count2=new int[hlen];
325 final long mask=hlen-1;
326
327 for(int i=0; i<r.length; i++){
328 long x=0;
329 int y=0;
330 int z=0;
331 while(Long.bitCount(x&0xFFFFFFFFL)!=16){
332 x=randy.nextLong();
333 while(Long.bitCount(x&0xFFFFFFFFL)<16){
334 x|=(1L<<randy.nextInt(32));
335 }
336 while(Long.bitCount(x&0xFFFFFFFFL)>16){
337 x&=(~(1L<<randy.nextInt(32)));
338 }
339 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){
340 x|=(1L<<(randy.nextInt(32)+32));
341 }
342 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){
343 x&=(~(1L<<(randy.nextInt(32)+32)));
344 }
345
346 // System.out.print(".");
347 // y=(((int)(x&mask))^i);
348 y=(((int)(x&mask)));
349 z=(int)((x>>hashBits)&mask);
350 if(count1[y]>0 || count2[z]>0){
351 x=0;
352 }
353 }
354 // System.out.println(Long.toBinaryString(x));
355 r[i]=(x&Long.MAX_VALUE);
356 count1[y]++;
357 count2[z]++;
358 }
359
360 }
361
362 public long cellsUsed(){return cellsUsed;}
363
364 private long cellsUsed;
365 private final int[][] matrix;
366 private final int hashes;
367
368
369 private static final int hashBits=6;
370 private static final int hashArrayLength=1<<hashBits;
371 private static final int hashCellMask=hashArrayLength-1;
372 private final long[][] hashMasks=makeMasks(8, hashArrayLength);
373
374 private static long counter=0;
375
376 }