jpayne@68
|
1 package bloom;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.Locale;
|
jpayne@68
|
4
|
jpayne@68
|
5 /**
|
jpayne@68
|
6 * @author Brian Bushnell
|
jpayne@68
|
7 * @date Jul 5, 2012
|
jpayne@68
|
8 */
|
jpayne@68
|
9 public class KCountArray2 {
|
jpayne@68
|
10
|
jpayne@68
|
11 public static void main(String[] args){
|
jpayne@68
|
12 KCountArray2 kca=new KCountArray2(1024, 16);
|
jpayne@68
|
13 }
|
jpayne@68
|
14
|
jpayne@68
|
15 public KCountArray2(long cells_, int bits_){
|
jpayne@68
|
16 this(cells_, bits_, 0);
|
jpayne@68
|
17 }
|
jpayne@68
|
18
|
jpayne@68
|
19 public KCountArray2(long cells_, int bits_, int gap_){
|
jpayne@68
|
20 gap=gap_;
|
jpayne@68
|
21 assert(bits_<=32);
|
jpayne@68
|
22 assert(Integer.bitCount(bits_)==1);
|
jpayne@68
|
23 assert(Long.bitCount(cells_)==1);
|
jpayne@68
|
24
|
jpayne@68
|
25 while(bits_*cells_<32*numArrays){
|
jpayne@68
|
26 assert(false);
|
jpayne@68
|
27 bits_*=2;
|
jpayne@68
|
28 } //Increases bits per cell so that at minimum each array is size 1
|
jpayne@68
|
29
|
jpayne@68
|
30 assert(bits_!=32);
|
jpayne@68
|
31
|
jpayne@68
|
32 cells=cells_;
|
jpayne@68
|
33 cellBits=bits_;
|
jpayne@68
|
34 valueMask=~((-1)<<cellBits);
|
jpayne@68
|
35 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31)));
|
jpayne@68
|
36 cellsPerWord=32/cellBits;
|
jpayne@68
|
37 indexShift=Integer.numberOfTrailingZeros(cellsPerWord);
|
jpayne@68
|
38 long words=cells/cellsPerWord;
|
jpayne@68
|
39 int wordsPerArray=(int)(words/numArrays);
|
jpayne@68
|
40 matrix=new int[numArrays][wordsPerArray];
|
jpayne@68
|
41
|
jpayne@68
|
42 if(verbose){
|
jpayne@68
|
43 System.out.println("cells: \t"+cells);
|
jpayne@68
|
44 System.out.println("cellBits:\t"+cellBits);
|
jpayne@68
|
45 System.out.println("valueMask:\t"+Long.toHexString(valueMask));
|
jpayne@68
|
46 System.out.println("maxValue:\t"+maxValue);
|
jpayne@68
|
47 System.out.println("cellsPerWord:\t"+cellsPerWord);
|
jpayne@68
|
48 System.out.println("indexShift:\t"+indexShift);
|
jpayne@68
|
49 System.out.println("words: \t"+words);
|
jpayne@68
|
50 System.out.println("wordsPerArray:\t"+wordsPerArray);
|
jpayne@68
|
51 System.out.println("numArrays:\t"+numArrays);
|
jpayne@68
|
52
|
jpayne@68
|
53
|
jpayne@68
|
54 long mem=words*4;
|
jpayne@68
|
55 if(mem<(1<<30)){
|
jpayne@68
|
56 System.out.println("memory: \t"+String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
|
jpayne@68
|
57 }else{
|
jpayne@68
|
58 System.out.println("memory: \t"+String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
|
jpayne@68
|
59 }
|
jpayne@68
|
60 }
|
jpayne@68
|
61 }
|
jpayne@68
|
62
|
jpayne@68
|
63 public int read(long key){
|
jpayne@68
|
64 // System.out.println("key="+key);
|
jpayne@68
|
65 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
66 // System.out.println("array="+arrayNum);
|
jpayne@68
|
67 key>>>=arrayBits;
|
jpayne@68
|
68 // System.out.println("key2="+key);
|
jpayne@68
|
69 int[] array=matrix[arrayNum];
|
jpayne@68
|
70 int index=(int)(key>>>indexShift);
|
jpayne@68
|
71 // System.out.println("index="+index);
|
jpayne@68
|
72 int word=array[index];
|
jpayne@68
|
73 // System.out.println("word="+Integer.toHexString(word));
|
jpayne@68
|
74 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
75 // System.out.println("cellShift="+cellShift);
|
jpayne@68
|
76 return (int)((word>>>cellShift)&valueMask);
|
jpayne@68
|
77 }
|
jpayne@68
|
78
|
jpayne@68
|
79 public void write(long key, int value){
|
jpayne@68
|
80 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
81 key>>>=arrayBits;
|
jpayne@68
|
82 int[] array=matrix[arrayNum];
|
jpayne@68
|
83 int index=(int)(key>>>indexShift);
|
jpayne@68
|
84 int word=array[index];
|
jpayne@68
|
85 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
86 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
|
jpayne@68
|
87 array[index]=word;
|
jpayne@68
|
88 }
|
jpayne@68
|
89
|
jpayne@68
|
90 public int increment(long key, int incr){
|
jpayne@68
|
91 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
92 key>>>=arrayBits;
|
jpayne@68
|
93 int[] array=matrix[arrayNum];
|
jpayne@68
|
94 int index=(int)(key>>>indexShift);
|
jpayne@68
|
95 int word=array[index];
|
jpayne@68
|
96 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
97 int value=((word>>>cellShift)&valueMask);
|
jpayne@68
|
98 if(value==0 && incr>0){cellsUsed++;}
|
jpayne@68
|
99 else if(incr<0 && value+incr==0){cellsUsed--;}
|
jpayne@68
|
100 value=min(value+incr, maxValue);
|
jpayne@68
|
101 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
|
jpayne@68
|
102 array[index]=word;
|
jpayne@68
|
103 return (int)value;
|
jpayne@68
|
104 }
|
jpayne@68
|
105
|
jpayne@68
|
106 /** Returns unincremented value */
|
jpayne@68
|
107 public int increment2(long key, int incr){
|
jpayne@68
|
108 int arrayNum=(int)(key&arrayMask);
|
jpayne@68
|
109 key>>>=arrayBits;
|
jpayne@68
|
110 int[] array=matrix[arrayNum];
|
jpayne@68
|
111 int index=(int)(key>>>indexShift);
|
jpayne@68
|
112 int word=array[index];
|
jpayne@68
|
113 int cellShift=(int)(cellBits*key);
|
jpayne@68
|
114 final int value=((word>>>cellShift)&valueMask);
|
jpayne@68
|
115 final int value2=min(value+incr, maxValue);
|
jpayne@68
|
116 word=(value2<<cellShift)|(word&~((valueMask)<<cellShift));
|
jpayne@68
|
117 array[index]=word;
|
jpayne@68
|
118 return value;
|
jpayne@68
|
119 }
|
jpayne@68
|
120
|
jpayne@68
|
121 public long[] transformToFrequency(){
|
jpayne@68
|
122 long[] freq=new long[100000];
|
jpayne@68
|
123 int maxFreq=freq.length-1;
|
jpayne@68
|
124
|
jpayne@68
|
125 if(cellBits!=32){
|
jpayne@68
|
126 assert(cellBits>0);
|
jpayne@68
|
127 for(int[] array : matrix){
|
jpayne@68
|
128 for(int i=0; i<array.length; i++){
|
jpayne@68
|
129 int word=array[i];
|
jpayne@68
|
130 int j=cellsPerWord;
|
jpayne@68
|
131 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
|
jpayne@68
|
132 for(; word!=0; j--){
|
jpayne@68
|
133 int x=word&valueMask;
|
jpayne@68
|
134 int x2=(int)min(x, maxFreq);
|
jpayne@68
|
135 freq[x2]++;
|
jpayne@68
|
136 word=(word>>>cellBits);
|
jpayne@68
|
137 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
|
jpayne@68
|
138 }
|
jpayne@68
|
139 freq[0]+=j;
|
jpayne@68
|
140 }
|
jpayne@68
|
141 }
|
jpayne@68
|
142 }else{
|
jpayne@68
|
143 for(int[] array : matrix){
|
jpayne@68
|
144 for(int i=0; i<array.length; i++){
|
jpayne@68
|
145 int word=array[i];
|
jpayne@68
|
146 int x2=(int)min(word, maxFreq);
|
jpayne@68
|
147 freq[x2]++;
|
jpayne@68
|
148 }
|
jpayne@68
|
149 }
|
jpayne@68
|
150 }
|
jpayne@68
|
151 return freq;
|
jpayne@68
|
152 }
|
jpayne@68
|
153
|
jpayne@68
|
154 @Override
|
jpayne@68
|
155 public String toString(){
|
jpayne@68
|
156 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
157 sb.append("[");
|
jpayne@68
|
158 String comma="";
|
jpayne@68
|
159 for(int[] array : matrix){
|
jpayne@68
|
160 for(int i=0; i<array.length; i++){
|
jpayne@68
|
161 int word=array[i];
|
jpayne@68
|
162 for(int j=0; j<cellsPerWord; j++){
|
jpayne@68
|
163 int x=word&valueMask;
|
jpayne@68
|
164 sb.append(comma);
|
jpayne@68
|
165 sb.append(x);
|
jpayne@68
|
166 word>>>=cellBits;
|
jpayne@68
|
167 comma=", ";
|
jpayne@68
|
168 }
|
jpayne@68
|
169 }
|
jpayne@68
|
170 }
|
jpayne@68
|
171 sb.append("]");
|
jpayne@68
|
172 return sb.toString();
|
jpayne@68
|
173 }
|
jpayne@68
|
174
|
jpayne@68
|
175 public double usedFraction(){return cellsUsed/(double)cells;}
|
jpayne@68
|
176
|
jpayne@68
|
177 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
|
jpayne@68
|
178
|
jpayne@68
|
179 public long cellsUsed(int mindepth){
|
jpayne@68
|
180 long count=0;
|
jpayne@68
|
181 for(int[] array : matrix){
|
jpayne@68
|
182 if(array!=null){
|
jpayne@68
|
183 for(int word : array){
|
jpayne@68
|
184 while(word>0){
|
jpayne@68
|
185 int x=word&valueMask;
|
jpayne@68
|
186 if(x>=mindepth){count++;}
|
jpayne@68
|
187 word>>>=cellBits;
|
jpayne@68
|
188 }
|
jpayne@68
|
189 }
|
jpayne@68
|
190 }
|
jpayne@68
|
191 }
|
jpayne@68
|
192 return count;
|
jpayne@68
|
193 }
|
jpayne@68
|
194
|
jpayne@68
|
195 public String mem(){
|
jpayne@68
|
196 long mem=(cells*cellBits)/8;
|
jpayne@68
|
197 if(mem<(1<<20)){
|
jpayne@68
|
198 return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10)));
|
jpayne@68
|
199 }else if(mem<(1<<30)){
|
jpayne@68
|
200 return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
|
jpayne@68
|
201 }else{
|
jpayne@68
|
202 return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
|
jpayne@68
|
203 }
|
jpayne@68
|
204 }
|
jpayne@68
|
205
|
jpayne@68
|
206 public static final int min(int x, int y){return x<y ? x : y;}
|
jpayne@68
|
207 public static final int max(int x, int y){return x>y ? x : y;}
|
jpayne@68
|
208 public static final long min(long x, long y){return x<y ? x : y;}
|
jpayne@68
|
209 public static final long max(long x, long y){return x>y ? x : y;}
|
jpayne@68
|
210
|
jpayne@68
|
211 private long cellsUsed;
|
jpayne@68
|
212
|
jpayne@68
|
213 public final long cells;
|
jpayne@68
|
214 public final int cellBits;
|
jpayne@68
|
215 public final int maxValue;
|
jpayne@68
|
216 public final int gap; //Set this for convenience on gapped tables to make sure you're using the right table.
|
jpayne@68
|
217
|
jpayne@68
|
218 private final int cellsPerWord;
|
jpayne@68
|
219 private final int indexShift;
|
jpayne@68
|
220 private final int valueMask;
|
jpayne@68
|
221 private final int[][] matrix;
|
jpayne@68
|
222
|
jpayne@68
|
223 private static final int arrayBits=2;
|
jpayne@68
|
224 private static final int numArrays=1<<arrayBits;
|
jpayne@68
|
225 private static final int arrayMask=numArrays-1;
|
jpayne@68
|
226
|
jpayne@68
|
227 public static boolean verbose=false;
|
jpayne@68
|
228
|
jpayne@68
|
229
|
jpayne@68
|
230 }
|