Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray4.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package bloom; | |
2 | |
3 import java.util.Random; | |
4 | |
5 import shared.Shared; | |
6 import shared.Timer; | |
7 | |
8 | |
9 /** | |
10 * | |
11 * Uses hashing rather than direct-mapping to support longer kmers. | |
12 * | |
13 * @author Brian Bushnell | |
14 * @date Aug 17, 2012 | |
15 * | |
16 */ | |
17 public class KCountArray4 extends KCountArray { | |
18 | |
19 /** | |
20 * | |
21 */ | |
22 private static final long serialVersionUID = -1418539960644885681L; | |
23 | |
24 public static void main(String[] args){ | |
25 long cells=Long.parseLong(args[0]); | |
26 int bits=Integer.parseInt(args[1]); | |
27 int gap=Integer.parseInt(args[2]); | |
28 int hashes=Integer.parseInt(args[3]); | |
29 | |
30 verbose=false; | |
31 | |
32 KCountArray4 kca=new KCountArray4(cells, bits, gap, hashes); | |
33 | |
34 System.out.println(kca.read(0)); | |
35 kca.increment(0); | |
36 System.out.println(kca.read(0)); | |
37 kca.increment(0); | |
38 System.out.println(kca.read(0)); | |
39 System.out.println(); | |
40 | |
41 System.out.println(kca.read(1)); | |
42 kca.increment(1); | |
43 System.out.println(kca.read(1)); | |
44 kca.increment(1); | |
45 System.out.println(kca.read(1)); | |
46 System.out.println(); | |
47 | |
48 System.out.println(kca.read(100)); | |
49 kca.increment(100); | |
50 System.out.println(kca.read(100)); | |
51 kca.increment(100); | |
52 System.out.println(kca.read(100)); | |
53 kca.increment(100); | |
54 System.out.println(kca.read(100)); | |
55 System.out.println(); | |
56 | |
57 | |
58 System.out.println(kca.read(150)); | |
59 kca.increment(150); | |
60 System.out.println(kca.read(150)); | |
61 System.out.println(); | |
62 | |
63 } | |
64 | |
65 public KCountArray4(long cells_, int bits_, int gap_, int hashes_){ | |
66 super(cells_, bits_); | |
67 long words=cells/cellsPerWord; | |
68 assert(words/numArrays<=Integer.MAX_VALUE); | |
69 int wordsPerArray=(int)(words/numArrays); | |
70 hashes=hashes_; | |
71 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes); | |
72 // assert(false); | |
73 matrix=new int[numArrays][wordsPerArray]; | |
74 assert(hashes>0 && hashes<=hashMasks.length); | |
75 } | |
76 | |
77 @Override | |
78 public int read(final long rawKey){ | |
79 if(verbose){System.err.println("Reading raw key "+rawKey);} | |
80 long key2=hash(rawKey, 0); | |
81 int min=readHashed(key2); | |
82 for(int i=1; i<hashes && min>0; i++){ | |
83 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);} | |
84 key2=Long.rotateRight(key2, hashBits); | |
85 key2=hash(key2, i); | |
86 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);} | |
87 min=min(min, readHashed(key2)); | |
88 } | |
89 return min; | |
90 } | |
91 | |
92 private int readHashed(long key){ | |
93 if(verbose){System.err.print("Reading hashed key "+key);} | |
94 key=((key&Long.MAX_VALUE)%(cells-1)); | |
95 // System.out.println("key="+key); | |
96 int arrayNum=(int)(key&arrayMask); | |
97 // System.out.println("array="+arrayNum); | |
98 key>>>=arrayBits; | |
99 // System.out.println("key2="+key); | |
100 int[] array=matrix[arrayNum]; | |
101 int index=(int)(key>>>indexShift); | |
102 // assert(false) : indexShift; | |
103 // System.out.println("index="+index); | |
104 int word=array[index]; | |
105 // System.out.println("word="+Integer.toHexString(word)); | |
106 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask))); | |
107 // int cellShift=(int)(cellBits*(key&cellMask)); | |
108 int cellShift=(int)(cellBits*key); | |
109 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));} | |
110 // System.out.println("cellShift="+cellShift); | |
111 return (int)((word>>>cellShift)&valueMask); | |
112 } | |
113 | |
114 @Override | |
115 public void write(final long key, int value){ | |
116 throw new RuntimeException("Not allowed for this class."); | |
117 } | |
118 | |
119 @Override | |
120 public void increment(final long rawKey, int incr){ | |
121 // verbose=(rawKey==32662670693L); | |
122 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");} | |
123 // verbose=true; | |
124 assert(incr>0); | |
125 | |
126 long key2=rawKey; | |
127 if(hashes==1){ | |
128 key2=hash(key2, 0); | |
129 int x=incrementHashedIfAtMost(key2, incr, maxValue-1); | |
130 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
131 //return x; | |
132 } | |
133 | |
134 final int min=read(rawKey); | |
135 if(min>=maxValue){return /*maxValue*/;} | |
136 | |
137 assert(key2==rawKey); | |
138 for(int i=0; i<hashes; i++){ | |
139 key2=hash(key2, i); | |
140 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));} | |
141 int x=incrementHashedIfAtMost(key2, incr, min); | |
142 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
143 if(verbose){System.err.println("postIncr value="+readHashed(key2));} | |
144 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
145 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
146 key2=Long.rotateRight(key2, hashBits); | |
147 } | |
148 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue; | |
149 // assert(false); | |
150 //return min(min+incr, maxValue); | |
151 } | |
152 | |
153 /** Returns unincremented value */ | |
154 @Override | |
155 public int incrementAndReturnUnincremented(long rawKey, int incr){ | |
156 // verbose=(rawKey==32662670693L); | |
157 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");} | |
158 // verbose=true; | |
159 assert(incr>0); | |
160 | |
161 long key2=rawKey; | |
162 if(hashes==1){ | |
163 key2=hash(key2, 0); | |
164 int x=incrementHashedIfAtMost(key2, incr, maxValue-1); | |
165 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
166 return x; | |
167 } | |
168 | |
169 final int min=read(rawKey); | |
170 if(min>=maxValue){return maxValue;} | |
171 | |
172 assert(key2==rawKey); | |
173 for(int i=0; i<hashes; i++){ | |
174 key2=hash(key2, i); | |
175 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));} | |
176 int x=incrementHashedIfAtMost(key2, incr, min); | |
177 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
178 if(verbose){System.err.println("postIncr value="+readHashed(key2));} | |
179 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
180 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey; | |
181 key2=Long.rotateRight(key2, hashBits); | |
182 } | |
183 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue; | |
184 // assert(false); | |
185 return min; | |
186 } | |
187 | |
188 private int incrementHashedIfAtMost(long key, int incr, int lim){ | |
189 if(verbose){System.err.print("incrementing hashed key "+key);} | |
190 key=((key&Long.MAX_VALUE)%(cells-1)); | |
191 int arrayNum=(int)(key&arrayMask); | |
192 key>>>=arrayBits; | |
193 int[] array=matrix[arrayNum]; | |
194 int index=(int)(key>>>indexShift); | |
195 int word=array[index]; | |
196 int cellShift=(int)(cellBits*key); | |
197 int value=((word>>>cellShift)&valueMask); | |
198 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+value+", limit="+lim);} | |
199 if(value>lim){return value;} | |
200 if(value==0 && incr>0){cellsUsed++;} | |
201 value=min(value+incr, maxValue); | |
202 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); | |
203 array[index]=word; | |
204 return value; | |
205 } | |
206 | |
207 private int incrementHashed(long key, int incr){ | |
208 assert(incr>0); | |
209 int arrayNum=(int)(key&arrayMask); | |
210 key>>>=arrayBits; | |
211 int[] array=matrix[arrayNum]; | |
212 int index=(int)(key>>>indexShift); | |
213 int word=array[index]; | |
214 int cellShift=(int)(cellBits*key); | |
215 int value=((word>>>cellShift)&valueMask); | |
216 if(value==0 && incr>0){cellsUsed++;} | |
217 value=min(value+incr, maxValue); | |
218 word=(value<<cellShift)|(word&~((valueMask)<<cellShift)); | |
219 array[index]=word; | |
220 return value; | |
221 } | |
222 | |
223 @Override | |
224 public long[] transformToFrequency(){ | |
225 return transformToFrequency(matrix); | |
226 } | |
227 | |
228 @Override | |
229 public String toContentsString(){ | |
230 StringBuilder sb=new StringBuilder(); | |
231 sb.append("["); | |
232 String comma=""; | |
233 for(int[] array : matrix){ | |
234 for(int i=0; i<array.length; i++){ | |
235 int word=array[i]; | |
236 for(int j=0; j<cellsPerWord; j++){ | |
237 int x=word&valueMask; | |
238 sb.append(comma); | |
239 sb.append(x); | |
240 word>>>=cellBits; | |
241 comma=", "; | |
242 } | |
243 } | |
244 } | |
245 sb.append("]"); | |
246 return sb.toString(); | |
247 } | |
248 | |
249 @Override | |
250 public double usedFraction(){return cellsUsed/(double)cells;} | |
251 | |
252 @Override | |
253 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;} | |
254 | |
255 @Override | |
256 public long cellsUsed(int mindepth){ | |
257 long count=0; | |
258 for(int[] array : matrix){ | |
259 if(array!=null){ | |
260 for(int word : array){ | |
261 while(word>0){ | |
262 int x=word&valueMask; | |
263 if(x>=mindepth){count++;} | |
264 word>>>=cellBits; | |
265 } | |
266 } | |
267 } | |
268 } | |
269 return count; | |
270 } | |
271 | |
272 | |
273 @Override | |
274 final long hash(long key, int row){ | |
275 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
276 // int cell=(int)(hashCellMask&(key)); | |
277 | |
278 if(row==0){//Doublehash only first time | |
279 key=key^hashMasks[(row+4)%hashMasks.length][cell]; | |
280 cell=(int)(hashCellMask&(key>>4)); | |
281 // cell=(int)(hashCellMask&(key>>hashBits)); | |
282 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1)); | |
283 } | |
284 | |
285 return key^hashMasks[row][cell]; | |
286 } | |
287 | |
288 /** | |
289 * @param i | |
290 * @param j | |
291 * @return | |
292 */ | |
293 private static long[][] makeMasks(int rows, int cols) { | |
294 | |
295 long seed; | |
296 synchronized(KCountArray4.class){ | |
297 seed=counter; | |
298 counter++; | |
299 } | |
300 | |
301 Timer t=new Timer(); | |
302 long[][] r=new long[rows][cols]; | |
303 Random randy=Shared.threadLocalRandom(seed); | |
304 for(int i=0; i<r.length; i++){ | |
305 fillMasks(r[i], randy); | |
306 } | |
307 t.stop(); | |
308 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);} | |
309 return r; | |
310 } | |
311 | |
312 private static void fillMasks(long[] r, Random randy) { | |
313 // for(int i=0; i<r.length; i++){ | |
314 // long x=0; | |
315 // while(Long.bitCount(x&0xFFFFFFFF)!=16){ | |
316 // x=randy.nextLong(); | |
317 // } | |
318 // r[i]=(x&Long.MAX_VALUE); | |
319 // } | |
320 | |
321 final int hlen=(1<<hashBits); | |
322 assert(r.length==hlen); | |
323 int[] count1=new int[hlen]; | |
324 int[] count2=new int[hlen]; | |
325 final long mask=hlen-1; | |
326 | |
327 for(int i=0; i<r.length; i++){ | |
328 long x=0; | |
329 int y=0; | |
330 int z=0; | |
331 while(Long.bitCount(x&0xFFFFFFFFL)!=16){ | |
332 x=randy.nextLong(); | |
333 while(Long.bitCount(x&0xFFFFFFFFL)<16){ | |
334 x|=(1L<<randy.nextInt(32)); | |
335 } | |
336 while(Long.bitCount(x&0xFFFFFFFFL)>16){ | |
337 x&=(~(1L<<randy.nextInt(32))); | |
338 } | |
339 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){ | |
340 x|=(1L<<(randy.nextInt(32)+32)); | |
341 } | |
342 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){ | |
343 x&=(~(1L<<(randy.nextInt(32)+32))); | |
344 } | |
345 | |
346 // System.out.print("."); | |
347 // y=(((int)(x&mask))^i); | |
348 y=(((int)(x&mask))); | |
349 z=(int)((x>>hashBits)&mask); | |
350 if(count1[y]>0 || count2[z]>0){ | |
351 x=0; | |
352 } | |
353 } | |
354 // System.out.println(Long.toBinaryString(x)); | |
355 r[i]=(x&Long.MAX_VALUE); | |
356 count1[y]++; | |
357 count2[z]++; | |
358 } | |
359 | |
360 } | |
361 | |
362 public long cellsUsed(){return cellsUsed;} | |
363 | |
364 private long cellsUsed; | |
365 private final int[][] matrix; | |
366 private final int hashes; | |
367 | |
368 | |
369 private static final int hashBits=6; | |
370 private static final int hashArrayLength=1<<hashBits; | |
371 private static final int hashCellMask=hashArrayLength-1; | |
372 private final long[][] hashMasks=makeMasks(8, hashArrayLength); | |
373 | |
374 private static long counter=0; | |
375 | |
376 } |