jpayne@68
|
1 package bloom;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.Random;
|
jpayne@68
|
4
|
jpayne@68
|
5 import shared.Shared;
|
jpayne@68
|
6 import shared.Timer;
|
jpayne@68
|
7
|
jpayne@68
|
8
|
jpayne@68
|
9 /**
|
jpayne@68
|
10 *
|
jpayne@68
|
11 * Uses hashing rather than direct-mapping to support longer kmers.
|
jpayne@68
|
12 *
|
jpayne@68
|
13 * @author Brian Bushnell
|
jpayne@68
|
14 * @date Aug 17, 2012
|
jpayne@68
|
15 *
|
jpayne@68
|
16 */
|
jpayne@68
|
17 public class KCountArray4 extends KCountArray {
|
jpayne@68
|
18
|
jpayne@68
|
19 /**
|
jpayne@68
|
20 *
|
jpayne@68
|
21 */
|
jpayne@68
|
22 private static final long serialVersionUID = -1418539960644885681L;
|
jpayne@68
|
23
|
jpayne@68
|
24 public static void main(String[] args){
|
jpayne@68
|
25 long cells=Long.parseLong(args[0]);
|
jpayne@68
|
26 int bits=Integer.parseInt(args[1]);
|
jpayne@68
|
27 int gap=Integer.parseInt(args[2]);
|
jpayne@68
|
28 int hashes=Integer.parseInt(args[3]);
|
jpayne@68
|
29
|
jpayne@68
|
30 verbose=false;
|
jpayne@68
|
31
|
jpayne@68
|
32 KCountArray4 kca=new KCountArray4(cells, bits, gap, hashes);
|
jpayne@68
|
33
|
jpayne@68
|
34 System.out.println(kca.read(0));
|
jpayne@68
|
35 kca.increment(0);
|
jpayne@68
|
36 System.out.println(kca.read(0));
|
jpayne@68
|
37 kca.increment(0);
|
jpayne@68
|
38 System.out.println(kca.read(0));
|
jpayne@68
|
39 System.out.println();
|
jpayne@68
|
40
|
jpayne@68
|
41 System.out.println(kca.read(1));
|
jpayne@68
|
42 kca.increment(1);
|
jpayne@68
|
43 System.out.println(kca.read(1));
|
jpayne@68
|
44 kca.increment(1);
|
jpayne@68
|
45 System.out.println(kca.read(1));
|
jpayne@68
|
46 System.out.println();
|
jpayne@68
|
47
|
jpayne@68
|
48 System.out.println(kca.read(100));
|
jpayne@68
|
49 kca.increment(100);
|
jpayne@68
|
50 System.out.println(kca.read(100));
|
jpayne@68
|
51 kca.increment(100);
|
jpayne@68
|
52 System.out.println(kca.read(100));
|
jpayne@68
|
53 kca.increment(100);
|
jpayne@68
|
54 System.out.println(kca.read(100));
|
jpayne@68
|
55 System.out.println();
|
jpayne@68
|
56
|
jpayne@68
|
57
|
jpayne@68
|
58 System.out.println(kca.read(150));
|
jpayne@68
|
59 kca.increment(150);
|
jpayne@68
|
60 System.out.println(kca.read(150));
|
jpayne@68
|
61 System.out.println();
|
jpayne@68
|
62
|
jpayne@68
|
63 }
|
jpayne@68
|
64
|
jpayne@68
|
65 public KCountArray4(long cells_, int bits_, int gap_, int hashes_){
|
jpayne@68
|
66 super(cells_, bits_);
|
jpayne@68
|
67 long words=cells/cellsPerWord;
|
jpayne@68
|
68 assert(words/numArrays<=Integer.MAX_VALUE);
|
jpayne@68
|
69 int wordsPerArray=(int)(words/numArrays);
|
jpayne@68
|
70 hashes=hashes_;
|
jpayne@68
|
71 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
|
jpayne@68
|
72 // assert(false);
|
jpayne@68
|
73 matrix=new int[numArrays][wordsPerArray];
|
jpayne@68
|
74 assert(hashes>0 && hashes<=hashMasks.length);
|
jpayne@68
|
75 }
|
jpayne@68
|
76
|
jpayne@68
|
77 @Override
|
jpayne@68
|
78 public int read(final long rawKey){
|
jpayne@68
|
79 if(verbose){System.err.println("Reading raw key "+rawKey);}
|
jpayne@68
|
80 long key2=hash(rawKey, 0);
|
jpayne@68
|
81 int min=readHashed(key2);
|
jpayne@68
|
82 for(int i=1; i<hashes && min>0; i++){
|
jpayne@68
|
83 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);}
|
jpayne@68
|
84 key2=Long.rotateRight(key2, hashBits);
|
jpayne@68
|
85 key2=hash(key2, i);
|
jpayne@68
|
86 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);}
|
jpayne@68
|
87 min=min(min, readHashed(key2));
|
jpayne@68
|
88 }
|
jpayne@68
|
89 return min;
|
jpayne@68
|
90 }
|
jpayne@68
|
91
|
jpayne@68
|
92 private int readHashed(long key){
|
jpayne@68
|
93 if(verbose){System.err.print("Reading hashed key "+key);}
|
jpayne@68
|
94 key=((key&Long.MAX_VALUE)%(cells-1));
|
jpayne@68
|
95 // System.out.println("key="+key);
|
jpayne@68
|
96 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
97 // System.out.println("array="+arrayNum);
|
jpayne@68
|
98 key>>>=arrayBits;
|
jpayne@68
|
99 // System.out.println("key2="+key);
|
jpayne@68
|
100 int[] array=matrix[arrayNum];
|
jpayne@68
|
101 int index=(int)(key>>>indexShift);
|
jpayne@68
|
102 // assert(false) : indexShift;
|
jpayne@68
|
103 // System.out.println("index="+index);
|
jpayne@68
|
104 int word=array[index];
|
jpayne@68
|
105 // System.out.println("word="+Integer.toHexString(word));
|
jpayne@68
|
106 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask)));
|
jpayne@68
|
107 // int cellShift=(int)(cellBits*(key&cellMask));
|
jpayne@68
|
108 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
109 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));}
|
jpayne@68
|
110 // System.out.println("cellShift="+cellShift);
|
jpayne@68
|
111 return (int)((word>>>cellShift)&valueMask);
|
jpayne@68
|
112 }
|
jpayne@68
|
113
|
jpayne@68
|
114 @Override
|
jpayne@68
|
115 public void write(final long key, int value){
|
jpayne@68
|
116 throw new RuntimeException("Not allowed for this class.");
|
jpayne@68
|
117 }
|
jpayne@68
|
118
|
jpayne@68
|
119 @Override
|
jpayne@68
|
120 public void increment(final long rawKey, int incr){
|
jpayne@68
|
121 // verbose=(rawKey==32662670693L);
|
jpayne@68
|
122 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
|
jpayne@68
|
123 // verbose=true;
|
jpayne@68
|
124 assert(incr>0);
|
jpayne@68
|
125
|
jpayne@68
|
126 long key2=rawKey;
|
jpayne@68
|
127 if(hashes==1){
|
jpayne@68
|
128 key2=hash(key2, 0);
|
jpayne@68
|
129 int x=incrementHashedIfAtMost(key2, incr, maxValue-1);
|
jpayne@68
|
130 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
131 //return x;
|
jpayne@68
|
132 }
|
jpayne@68
|
133
|
jpayne@68
|
134 final int min=read(rawKey);
|
jpayne@68
|
135 if(min>=maxValue){return /*maxValue*/;}
|
jpayne@68
|
136
|
jpayne@68
|
137 assert(key2==rawKey);
|
jpayne@68
|
138 for(int i=0; i<hashes; i++){
|
jpayne@68
|
139 key2=hash(key2, i);
|
jpayne@68
|
140 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
|
jpayne@68
|
141 int x=incrementHashedIfAtMost(key2, incr, min);
|
jpayne@68
|
142 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
143 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
|
jpayne@68
|
144 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
145 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
146 key2=Long.rotateRight(key2, hashBits);
|
jpayne@68
|
147 }
|
jpayne@68
|
148 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue;
|
jpayne@68
|
149 // assert(false);
|
jpayne@68
|
150 //return min(min+incr, maxValue);
|
jpayne@68
|
151 }
|
jpayne@68
|
152
|
jpayne@68
|
153 /** Returns unincremented value */
|
jpayne@68
|
154 @Override
|
jpayne@68
|
155 public int incrementAndReturnUnincremented(long rawKey, int incr){
|
jpayne@68
|
156 // verbose=(rawKey==32662670693L);
|
jpayne@68
|
157 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
|
jpayne@68
|
158 // verbose=true;
|
jpayne@68
|
159 assert(incr>0);
|
jpayne@68
|
160
|
jpayne@68
|
161 long key2=rawKey;
|
jpayne@68
|
162 if(hashes==1){
|
jpayne@68
|
163 key2=hash(key2, 0);
|
jpayne@68
|
164 int x=incrementHashedIfAtMost(key2, incr, maxValue-1);
|
jpayne@68
|
165 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
166 return x;
|
jpayne@68
|
167 }
|
jpayne@68
|
168
|
jpayne@68
|
169 final int min=read(rawKey);
|
jpayne@68
|
170 if(min>=maxValue){return maxValue;}
|
jpayne@68
|
171
|
jpayne@68
|
172 assert(key2==rawKey);
|
jpayne@68
|
173 for(int i=0; i<hashes; i++){
|
jpayne@68
|
174 key2=hash(key2, i);
|
jpayne@68
|
175 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
|
jpayne@68
|
176 int x=incrementHashedIfAtMost(key2, incr, min);
|
jpayne@68
|
177 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
178 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
|
jpayne@68
|
179 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
180 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
|
jpayne@68
|
181 key2=Long.rotateRight(key2, hashBits);
|
jpayne@68
|
182 }
|
jpayne@68
|
183 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue;
|
jpayne@68
|
184 // assert(false);
|
jpayne@68
|
185 return min;
|
jpayne@68
|
186 }
|
jpayne@68
|
187
|
jpayne@68
|
188 private int incrementHashedIfAtMost(long key, int incr, int lim){
|
jpayne@68
|
189 if(verbose){System.err.print("incrementing hashed key "+key);}
|
jpayne@68
|
190 key=((key&Long.MAX_VALUE)%(cells-1));
|
jpayne@68
|
191 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
192 key>>>=arrayBits;
|
jpayne@68
|
193 int[] array=matrix[arrayNum];
|
jpayne@68
|
194 int index=(int)(key>>>indexShift);
|
jpayne@68
|
195 int word=array[index];
|
jpayne@68
|
196 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
197 int value=((word>>>cellShift)&valueMask);
|
jpayne@68
|
198 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+value+", limit="+lim);}
|
jpayne@68
|
199 if(value>lim){return value;}
|
jpayne@68
|
200 if(value==0 && incr>0){cellsUsed++;}
|
jpayne@68
|
201 value=min(value+incr, maxValue);
|
jpayne@68
|
202 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
|
jpayne@68
|
203 array[index]=word;
|
jpayne@68
|
204 return value;
|
jpayne@68
|
205 }
|
jpayne@68
|
206
|
jpayne@68
|
207 private int incrementHashed(long key, int incr){
|
jpayne@68
|
208 assert(incr>0);
|
jpayne@68
|
209 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
210 key>>>=arrayBits;
|
jpayne@68
|
211 int[] array=matrix[arrayNum];
|
jpayne@68
|
212 int index=(int)(key>>>indexShift);
|
jpayne@68
|
213 int word=array[index];
|
jpayne@68
|
214 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
215 int value=((word>>>cellShift)&valueMask);
|
jpayne@68
|
216 if(value==0 && incr>0){cellsUsed++;}
|
jpayne@68
|
217 value=min(value+incr, maxValue);
|
jpayne@68
|
218 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
|
jpayne@68
|
219 array[index]=word;
|
jpayne@68
|
220 return value;
|
jpayne@68
|
221 }
|
jpayne@68
|
222
|
jpayne@68
|
223 @Override
|
jpayne@68
|
224 public long[] transformToFrequency(){
|
jpayne@68
|
225 return transformToFrequency(matrix);
|
jpayne@68
|
226 }
|
jpayne@68
|
227
|
jpayne@68
|
228 @Override
|
jpayne@68
|
229 public String toContentsString(){
|
jpayne@68
|
230 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
231 sb.append("[");
|
jpayne@68
|
232 String comma="";
|
jpayne@68
|
233 for(int[] array : matrix){
|
jpayne@68
|
234 for(int i=0; i<array.length; i++){
|
jpayne@68
|
235 int word=array[i];
|
jpayne@68
|
236 for(int j=0; j<cellsPerWord; j++){
|
jpayne@68
|
237 int x=word&valueMask;
|
jpayne@68
|
238 sb.append(comma);
|
jpayne@68
|
239 sb.append(x);
|
jpayne@68
|
240 word>>>=cellBits;
|
jpayne@68
|
241 comma=", ";
|
jpayne@68
|
242 }
|
jpayne@68
|
243 }
|
jpayne@68
|
244 }
|
jpayne@68
|
245 sb.append("]");
|
jpayne@68
|
246 return sb.toString();
|
jpayne@68
|
247 }
|
jpayne@68
|
248
|
jpayne@68
|
249 @Override
|
jpayne@68
|
250 public double usedFraction(){return cellsUsed/(double)cells;}
|
jpayne@68
|
251
|
jpayne@68
|
252 @Override
|
jpayne@68
|
253 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
|
jpayne@68
|
254
|
jpayne@68
|
255 @Override
|
jpayne@68
|
256 public long cellsUsed(int mindepth){
|
jpayne@68
|
257 long count=0;
|
jpayne@68
|
258 for(int[] array : matrix){
|
jpayne@68
|
259 if(array!=null){
|
jpayne@68
|
260 for(int word : array){
|
jpayne@68
|
261 while(word>0){
|
jpayne@68
|
262 int x=word&valueMask;
|
jpayne@68
|
263 if(x>=mindepth){count++;}
|
jpayne@68
|
264 word>>>=cellBits;
|
jpayne@68
|
265 }
|
jpayne@68
|
266 }
|
jpayne@68
|
267 }
|
jpayne@68
|
268 }
|
jpayne@68
|
269 return count;
|
jpayne@68
|
270 }
|
jpayne@68
|
271
|
jpayne@68
|
272
|
jpayne@68
|
273 @Override
|
jpayne@68
|
274 final long hash(long key, int row){
|
jpayne@68
|
275 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
|
jpayne@68
|
276 // int cell=(int)(hashCellMask&(key));
|
jpayne@68
|
277
|
jpayne@68
|
278 if(row==0){//Doublehash only first time
|
jpayne@68
|
279 key=key^hashMasks[(row+4)%hashMasks.length][cell];
|
jpayne@68
|
280 cell=(int)(hashCellMask&(key>>4));
|
jpayne@68
|
281 // cell=(int)(hashCellMask&(key>>hashBits));
|
jpayne@68
|
282 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
|
jpayne@68
|
283 }
|
jpayne@68
|
284
|
jpayne@68
|
285 return key^hashMasks[row][cell];
|
jpayne@68
|
286 }
|
jpayne@68
|
287
|
jpayne@68
|
288 /**
|
jpayne@68
|
289 * @param i
|
jpayne@68
|
290 * @param j
|
jpayne@68
|
291 * @return
|
jpayne@68
|
292 */
|
jpayne@68
|
293 private static long[][] makeMasks(int rows, int cols) {
|
jpayne@68
|
294
|
jpayne@68
|
295 long seed;
|
jpayne@68
|
296 synchronized(KCountArray4.class){
|
jpayne@68
|
297 seed=counter;
|
jpayne@68
|
298 counter++;
|
jpayne@68
|
299 }
|
jpayne@68
|
300
|
jpayne@68
|
301 Timer t=new Timer();
|
jpayne@68
|
302 long[][] r=new long[rows][cols];
|
jpayne@68
|
303 Random randy=Shared.threadLocalRandom(seed);
|
jpayne@68
|
304 for(int i=0; i<r.length; i++){
|
jpayne@68
|
305 fillMasks(r[i], randy);
|
jpayne@68
|
306 }
|
jpayne@68
|
307 t.stop();
|
jpayne@68
|
308 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);}
|
jpayne@68
|
309 return r;
|
jpayne@68
|
310 }
|
jpayne@68
|
311
|
jpayne@68
|
312 private static void fillMasks(long[] r, Random randy) {
|
jpayne@68
|
313 // for(int i=0; i<r.length; i++){
|
jpayne@68
|
314 // long x=0;
|
jpayne@68
|
315 // while(Long.bitCount(x&0xFFFFFFFF)!=16){
|
jpayne@68
|
316 // x=randy.nextLong();
|
jpayne@68
|
317 // }
|
jpayne@68
|
318 // r[i]=(x&Long.MAX_VALUE);
|
jpayne@68
|
319 // }
|
jpayne@68
|
320
|
jpayne@68
|
321 final int hlen=(1<<hashBits);
|
jpayne@68
|
322 assert(r.length==hlen);
|
jpayne@68
|
323 int[] count1=new int[hlen];
|
jpayne@68
|
324 int[] count2=new int[hlen];
|
jpayne@68
|
325 final long mask=hlen-1;
|
jpayne@68
|
326
|
jpayne@68
|
327 for(int i=0; i<r.length; i++){
|
jpayne@68
|
328 long x=0;
|
jpayne@68
|
329 int y=0;
|
jpayne@68
|
330 int z=0;
|
jpayne@68
|
331 while(Long.bitCount(x&0xFFFFFFFFL)!=16){
|
jpayne@68
|
332 x=randy.nextLong();
|
jpayne@68
|
333 while(Long.bitCount(x&0xFFFFFFFFL)<16){
|
jpayne@68
|
334 x|=(1L<<randy.nextInt(32));
|
jpayne@68
|
335 }
|
jpayne@68
|
336 while(Long.bitCount(x&0xFFFFFFFFL)>16){
|
jpayne@68
|
337 x&=(~(1L<<randy.nextInt(32)));
|
jpayne@68
|
338 }
|
jpayne@68
|
339 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){
|
jpayne@68
|
340 x|=(1L<<(randy.nextInt(32)+32));
|
jpayne@68
|
341 }
|
jpayne@68
|
342 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){
|
jpayne@68
|
343 x&=(~(1L<<(randy.nextInt(32)+32)));
|
jpayne@68
|
344 }
|
jpayne@68
|
345
|
jpayne@68
|
346 // System.out.print(".");
|
jpayne@68
|
347 // y=(((int)(x&mask))^i);
|
jpayne@68
|
348 y=(((int)(x&mask)));
|
jpayne@68
|
349 z=(int)((x>>hashBits)&mask);
|
jpayne@68
|
350 if(count1[y]>0 || count2[z]>0){
|
jpayne@68
|
351 x=0;
|
jpayne@68
|
352 }
|
jpayne@68
|
353 }
|
jpayne@68
|
354 // System.out.println(Long.toBinaryString(x));
|
jpayne@68
|
355 r[i]=(x&Long.MAX_VALUE);
|
jpayne@68
|
356 count1[y]++;
|
jpayne@68
|
357 count2[z]++;
|
jpayne@68
|
358 }
|
jpayne@68
|
359
|
jpayne@68
|
360 }
|
jpayne@68
|
361
|
jpayne@68
|
362 public long cellsUsed(){return cellsUsed;}
|
jpayne@68
|
363
|
jpayne@68
|
364 private long cellsUsed;
|
jpayne@68
|
365 private final int[][] matrix;
|
jpayne@68
|
366 private final int hashes;
|
jpayne@68
|
367
|
jpayne@68
|
368
|
jpayne@68
|
369 private static final int hashBits=6;
|
jpayne@68
|
370 private static final int hashArrayLength=1<<hashBits;
|
jpayne@68
|
371 private static final int hashCellMask=hashArrayLength-1;
|
jpayne@68
|
372 private final long[][] hashMasks=makeMasks(8, hashArrayLength);
|
jpayne@68
|
373
|
jpayne@68
|
374 private static long counter=0;
|
jpayne@68
|
375
|
jpayne@68
|
376 }
|