comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray2.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package bloom;
2
3 import java.util.Locale;
4
5 /**
6 * @author Brian Bushnell
7 * @date Jul 5, 2012
8 */
9 public class KCountArray2 {
10
11 public static void main(String[] args){
12 KCountArray2 kca=new KCountArray2(1024, 16);
13 }
14
15 public KCountArray2(long cells_, int bits_){
16 this(cells_, bits_, 0);
17 }
18
19 public KCountArray2(long cells_, int bits_, int gap_){
20 gap=gap_;
21 assert(bits_<=32);
22 assert(Integer.bitCount(bits_)==1);
23 assert(Long.bitCount(cells_)==1);
24
25 while(bits_*cells_<32*numArrays){
26 assert(false);
27 bits_*=2;
28 } //Increases bits per cell so that at minimum each array is size 1
29
30 assert(bits_!=32);
31
32 cells=cells_;
33 cellBits=bits_;
34 valueMask=~((-1)<<cellBits);
35 maxValue=min(Integer.MAX_VALUE, ~((-1)<<min(cellBits,31)));
36 cellsPerWord=32/cellBits;
37 indexShift=Integer.numberOfTrailingZeros(cellsPerWord);
38 long words=cells/cellsPerWord;
39 int wordsPerArray=(int)(words/numArrays);
40 matrix=new int[numArrays][wordsPerArray];
41
42 if(verbose){
43 System.out.println("cells: \t"+cells);
44 System.out.println("cellBits:\t"+cellBits);
45 System.out.println("valueMask:\t"+Long.toHexString(valueMask));
46 System.out.println("maxValue:\t"+maxValue);
47 System.out.println("cellsPerWord:\t"+cellsPerWord);
48 System.out.println("indexShift:\t"+indexShift);
49 System.out.println("words: \t"+words);
50 System.out.println("wordsPerArray:\t"+wordsPerArray);
51 System.out.println("numArrays:\t"+numArrays);
52
53
54 long mem=words*4;
55 if(mem<(1<<30)){
56 System.out.println("memory: \t"+String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
57 }else{
58 System.out.println("memory: \t"+String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
59 }
60 }
61 }
62
63 public int read(long key){
64 // System.out.println("key="+key);
65 int arrayNum=(int)(key&arrayMask);
66 // System.out.println("array="+arrayNum);
67 key>>>=arrayBits;
68 // System.out.println("key2="+key);
69 int[] array=matrix[arrayNum];
70 int index=(int)(key>>>indexShift);
71 // System.out.println("index="+index);
72 int word=array[index];
73 // System.out.println("word="+Integer.toHexString(word));
74 int cellShift=(int)(cellBits*key);
75 // System.out.println("cellShift="+cellShift);
76 return (int)((word>>>cellShift)&valueMask);
77 }
78
79 public void write(long key, int value){
80 int arrayNum=(int)(key&arrayMask);
81 key>>>=arrayBits;
82 int[] array=matrix[arrayNum];
83 int index=(int)(key>>>indexShift);
84 int word=array[index];
85 int cellShift=(int)(cellBits*key);
86 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
87 array[index]=word;
88 }
89
90 public int increment(long key, int incr){
91 int arrayNum=(int)(key&arrayMask);
92 key>>>=arrayBits;
93 int[] array=matrix[arrayNum];
94 int index=(int)(key>>>indexShift);
95 int word=array[index];
96 int cellShift=(int)(cellBits*key);
97 int value=((word>>>cellShift)&valueMask);
98 if(value==0 && incr>0){cellsUsed++;}
99 else if(incr<0 && value+incr==0){cellsUsed--;}
100 value=min(value+incr, maxValue);
101 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
102 array[index]=word;
103 return (int)value;
104 }
105
106 /** Returns unincremented value */
107 public int increment2(long key, int incr){
108 int arrayNum=(int)(key&arrayMask);
109 key>>>=arrayBits;
110 int[] array=matrix[arrayNum];
111 int index=(int)(key>>>indexShift);
112 int word=array[index];
113 int cellShift=(int)(cellBits*key);
114 final int value=((word>>>cellShift)&valueMask);
115 final int value2=min(value+incr, maxValue);
116 word=(value2<<cellShift)|(word&~((valueMask)<<cellShift));
117 array[index]=word;
118 return value;
119 }
120
121 public long[] transformToFrequency(){
122 long[] freq=new long[100000];
123 int maxFreq=freq.length-1;
124
125 if(cellBits!=32){
126 assert(cellBits>0);
127 for(int[] array : matrix){
128 for(int i=0; i<array.length; i++){
129 int word=array[i];
130 int j=cellsPerWord;
131 // System.out.println("initial: word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
132 for(; word!=0; j--){
133 int x=word&valueMask;
134 int x2=(int)min(x, maxFreq);
135 freq[x2]++;
136 word=(word>>>cellBits);
137 // System.out.println("word = "+word+", j = "+Integer.toHexString(j)+", cellbits="+cellBits);
138 }
139 freq[0]+=j;
140 }
141 }
142 }else{
143 for(int[] array : matrix){
144 for(int i=0; i<array.length; i++){
145 int word=array[i];
146 int x2=(int)min(word, maxFreq);
147 freq[x2]++;
148 }
149 }
150 }
151 return freq;
152 }
153
154 @Override
155 public String toString(){
156 StringBuilder sb=new StringBuilder();
157 sb.append("[");
158 String comma="";
159 for(int[] array : matrix){
160 for(int i=0; i<array.length; i++){
161 int word=array[i];
162 for(int j=0; j<cellsPerWord; j++){
163 int x=word&valueMask;
164 sb.append(comma);
165 sb.append(x);
166 word>>>=cellBits;
167 comma=", ";
168 }
169 }
170 }
171 sb.append("]");
172 return sb.toString();
173 }
174
175 public double usedFraction(){return cellsUsed/(double)cells;}
176
177 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
178
179 public long cellsUsed(int mindepth){
180 long count=0;
181 for(int[] array : matrix){
182 if(array!=null){
183 for(int word : array){
184 while(word>0){
185 int x=word&valueMask;
186 if(x>=mindepth){count++;}
187 word>>>=cellBits;
188 }
189 }
190 }
191 }
192 return count;
193 }
194
195 public String mem(){
196 long mem=(cells*cellBits)/8;
197 if(mem<(1<<20)){
198 return (String.format(Locale.ROOT, "%.2f KB", mem*1d/(1<<10)));
199 }else if(mem<(1<<30)){
200 return (String.format(Locale.ROOT, "%.2f MB", mem*1d/(1<<20)));
201 }else{
202 return (String.format(Locale.ROOT, "%.2f GB", mem*1d/(1<<30)));
203 }
204 }
205
206 public static final int min(int x, int y){return x<y ? x : y;}
207 public static final int max(int x, int y){return x>y ? x : y;}
208 public static final long min(long x, long y){return x<y ? x : y;}
209 public static final long max(long x, long y){return x>y ? x : y;}
210
211 private long cellsUsed;
212
213 public final long cells;
214 public final int cellBits;
215 public final int maxValue;
216 public final int gap; //Set this for convenience on gapped tables to make sure you're using the right table.
217
218 private final int cellsPerWord;
219 private final int indexShift;
220 private final int valueMask;
221 private final int[][] matrix;
222
223 private static final int arrayBits=2;
224 private static final int numArrays=1<<arrayBits;
225 private static final int arrayMask=numArrays-1;
226
227 public static boolean verbose=false;
228
229
230 }