annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/bloom/KCountArray4.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package bloom;
jpayne@68 2
jpayne@68 3 import java.util.Random;
jpayne@68 4
jpayne@68 5 import shared.Shared;
jpayne@68 6 import shared.Timer;
jpayne@68 7
jpayne@68 8
jpayne@68 9 /**
jpayne@68 10 *
jpayne@68 11 * Uses hashing rather than direct-mapping to support longer kmers.
jpayne@68 12 *
jpayne@68 13 * @author Brian Bushnell
jpayne@68 14 * @date Aug 17, 2012
jpayne@68 15 *
jpayne@68 16 */
jpayne@68 17 public class KCountArray4 extends KCountArray {
jpayne@68 18
jpayne@68 19 /**
jpayne@68 20 *
jpayne@68 21 */
jpayne@68 22 private static final long serialVersionUID = -1418539960644885681L;
jpayne@68 23
jpayne@68 24 public static void main(String[] args){
jpayne@68 25 long cells=Long.parseLong(args[0]);
jpayne@68 26 int bits=Integer.parseInt(args[1]);
jpayne@68 27 int gap=Integer.parseInt(args[2]);
jpayne@68 28 int hashes=Integer.parseInt(args[3]);
jpayne@68 29
jpayne@68 30 verbose=false;
jpayne@68 31
jpayne@68 32 KCountArray4 kca=new KCountArray4(cells, bits, gap, hashes);
jpayne@68 33
jpayne@68 34 System.out.println(kca.read(0));
jpayne@68 35 kca.increment(0);
jpayne@68 36 System.out.println(kca.read(0));
jpayne@68 37 kca.increment(0);
jpayne@68 38 System.out.println(kca.read(0));
jpayne@68 39 System.out.println();
jpayne@68 40
jpayne@68 41 System.out.println(kca.read(1));
jpayne@68 42 kca.increment(1);
jpayne@68 43 System.out.println(kca.read(1));
jpayne@68 44 kca.increment(1);
jpayne@68 45 System.out.println(kca.read(1));
jpayne@68 46 System.out.println();
jpayne@68 47
jpayne@68 48 System.out.println(kca.read(100));
jpayne@68 49 kca.increment(100);
jpayne@68 50 System.out.println(kca.read(100));
jpayne@68 51 kca.increment(100);
jpayne@68 52 System.out.println(kca.read(100));
jpayne@68 53 kca.increment(100);
jpayne@68 54 System.out.println(kca.read(100));
jpayne@68 55 System.out.println();
jpayne@68 56
jpayne@68 57
jpayne@68 58 System.out.println(kca.read(150));
jpayne@68 59 kca.increment(150);
jpayne@68 60 System.out.println(kca.read(150));
jpayne@68 61 System.out.println();
jpayne@68 62
jpayne@68 63 }
jpayne@68 64
jpayne@68 65 public KCountArray4(long cells_, int bits_, int gap_, int hashes_){
jpayne@68 66 super(cells_, bits_);
jpayne@68 67 long words=cells/cellsPerWord;
jpayne@68 68 assert(words/numArrays<=Integer.MAX_VALUE);
jpayne@68 69 int wordsPerArray=(int)(words/numArrays);
jpayne@68 70 hashes=hashes_;
jpayne@68 71 // System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
jpayne@68 72 // assert(false);
jpayne@68 73 matrix=new int[numArrays][wordsPerArray];
jpayne@68 74 assert(hashes>0 && hashes<=hashMasks.length);
jpayne@68 75 }
jpayne@68 76
jpayne@68 77 @Override
jpayne@68 78 public int read(final long rawKey){
jpayne@68 79 if(verbose){System.err.println("Reading raw key "+rawKey);}
jpayne@68 80 long key2=hash(rawKey, 0);
jpayne@68 81 int min=readHashed(key2);
jpayne@68 82 for(int i=1; i<hashes && min>0; i++){
jpayne@68 83 if(verbose){System.err.println("Reading. i="+i+", key2="+key2);}
jpayne@68 84 key2=Long.rotateRight(key2, hashBits);
jpayne@68 85 key2=hash(key2, i);
jpayne@68 86 if(verbose){System.err.println("Rot/hash. i="+i+", key2="+key2);}
jpayne@68 87 min=min(min, readHashed(key2));
jpayne@68 88 }
jpayne@68 89 return min;
jpayne@68 90 }
jpayne@68 91
jpayne@68 92 private int readHashed(long key){
jpayne@68 93 if(verbose){System.err.print("Reading hashed key "+key);}
jpayne@68 94 key=((key&Long.MAX_VALUE)%(cells-1));
jpayne@68 95 // System.out.println("key="+key);
jpayne@68 96 int arrayNum=(int)(key&arrayMask);
jpayne@68 97 // System.out.println("array="+arrayNum);
jpayne@68 98 key>>>=arrayBits;
jpayne@68 99 // System.out.println("key2="+key);
jpayne@68 100 int[] array=matrix[arrayNum];
jpayne@68 101 int index=(int)(key>>>indexShift);
jpayne@68 102 // assert(false) : indexShift;
jpayne@68 103 // System.out.println("index="+index);
jpayne@68 104 int word=array[index];
jpayne@68 105 // System.out.println("word="+Integer.toHexString(word));
jpayne@68 106 assert(word>>>(cellBits*key) == word>>>(cellBits*(key&cellMask)));
jpayne@68 107 // int cellShift=(int)(cellBits*(key&cellMask));
jpayne@68 108 int cellShift=(int)(cellBits*key);
jpayne@68 109 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+((int)((word>>>cellShift)&valueMask)));}
jpayne@68 110 // System.out.println("cellShift="+cellShift);
jpayne@68 111 return (int)((word>>>cellShift)&valueMask);
jpayne@68 112 }
jpayne@68 113
jpayne@68 114 @Override
jpayne@68 115 public void write(final long key, int value){
jpayne@68 116 throw new RuntimeException("Not allowed for this class.");
jpayne@68 117 }
jpayne@68 118
jpayne@68 119 @Override
jpayne@68 120 public void increment(final long rawKey, int incr){
jpayne@68 121 // verbose=(rawKey==32662670693L);
jpayne@68 122 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
jpayne@68 123 // verbose=true;
jpayne@68 124 assert(incr>0);
jpayne@68 125
jpayne@68 126 long key2=rawKey;
jpayne@68 127 if(hashes==1){
jpayne@68 128 key2=hash(key2, 0);
jpayne@68 129 int x=incrementHashedIfAtMost(key2, incr, maxValue-1);
jpayne@68 130 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 131 //return x;
jpayne@68 132 }
jpayne@68 133
jpayne@68 134 final int min=read(rawKey);
jpayne@68 135 if(min>=maxValue){return /*maxValue*/;}
jpayne@68 136
jpayne@68 137 assert(key2==rawKey);
jpayne@68 138 for(int i=0; i<hashes; i++){
jpayne@68 139 key2=hash(key2, i);
jpayne@68 140 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
jpayne@68 141 int x=incrementHashedIfAtMost(key2, incr, min);
jpayne@68 142 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 143 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
jpayne@68 144 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 145 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 146 key2=Long.rotateRight(key2, hashBits);
jpayne@68 147 }
jpayne@68 148 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue;
jpayne@68 149 // assert(false);
jpayne@68 150 //return min(min+incr, maxValue);
jpayne@68 151 }
jpayne@68 152
jpayne@68 153 /** Returns unincremented value */
jpayne@68 154 @Override
jpayne@68 155 public int incrementAndReturnUnincremented(long rawKey, int incr){
jpayne@68 156 // verbose=(rawKey==32662670693L);
jpayne@68 157 if(verbose){System.err.println("\n*** Incrementing raw key "+rawKey+" ***");}
jpayne@68 158 // verbose=true;
jpayne@68 159 assert(incr>0);
jpayne@68 160
jpayne@68 161 long key2=rawKey;
jpayne@68 162 if(hashes==1){
jpayne@68 163 key2=hash(key2, 0);
jpayne@68 164 int x=incrementHashedIfAtMost(key2, incr, maxValue-1);
jpayne@68 165 assert(x>=incr) : "original=?, new should be >="+(incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 166 return x;
jpayne@68 167 }
jpayne@68 168
jpayne@68 169 final int min=read(rawKey);
jpayne@68 170 if(min>=maxValue){return maxValue;}
jpayne@68 171
jpayne@68 172 assert(key2==rawKey);
jpayne@68 173 for(int i=0; i<hashes; i++){
jpayne@68 174 key2=hash(key2, i);
jpayne@68 175 if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
jpayne@68 176 int x=incrementHashedIfAtMost(key2, incr, min);
jpayne@68 177 assert(x>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 178 if(verbose){System.err.println("postIncr value="+readHashed(key2));}
jpayne@68 179 // assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 180 // assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
jpayne@68 181 key2=Long.rotateRight(key2, hashBits);
jpayne@68 182 }
jpayne@68 183 // assert(read(rawKey)==min+incr) : "original="+min+", new should be "+(min+incr)+", new="+read(rawKey)+", max="+maxValue;
jpayne@68 184 // assert(false);
jpayne@68 185 return min;
jpayne@68 186 }
jpayne@68 187
jpayne@68 188 private int incrementHashedIfAtMost(long key, int incr, int lim){
jpayne@68 189 if(verbose){System.err.print("incrementing hashed key "+key);}
jpayne@68 190 key=((key&Long.MAX_VALUE)%(cells-1));
jpayne@68 191 int arrayNum=(int)(key&arrayMask);
jpayne@68 192 key>>>=arrayBits;
jpayne@68 193 int[] array=matrix[arrayNum];
jpayne@68 194 int index=(int)(key>>>indexShift);
jpayne@68 195 int word=array[index];
jpayne@68 196 int cellShift=(int)(cellBits*key);
jpayne@68 197 int value=((word>>>cellShift)&valueMask);
jpayne@68 198 if(verbose){System.err.println(", array="+arrayNum+", index="+index+", cellShift="+(cellShift%32)+", value="+value+", limit="+lim);}
jpayne@68 199 if(value>lim){return value;}
jpayne@68 200 if(value==0 && incr>0){cellsUsed++;}
jpayne@68 201 value=min(value+incr, maxValue);
jpayne@68 202 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
jpayne@68 203 array[index]=word;
jpayne@68 204 return value;
jpayne@68 205 }
jpayne@68 206
jpayne@68 207 private int incrementHashed(long key, int incr){
jpayne@68 208 assert(incr>0);
jpayne@68 209 int arrayNum=(int)(key&arrayMask);
jpayne@68 210 key>>>=arrayBits;
jpayne@68 211 int[] array=matrix[arrayNum];
jpayne@68 212 int index=(int)(key>>>indexShift);
jpayne@68 213 int word=array[index];
jpayne@68 214 int cellShift=(int)(cellBits*key);
jpayne@68 215 int value=((word>>>cellShift)&valueMask);
jpayne@68 216 if(value==0 && incr>0){cellsUsed++;}
jpayne@68 217 value=min(value+incr, maxValue);
jpayne@68 218 word=(value<<cellShift)|(word&~((valueMask)<<cellShift));
jpayne@68 219 array[index]=word;
jpayne@68 220 return value;
jpayne@68 221 }
jpayne@68 222
jpayne@68 223 @Override
jpayne@68 224 public long[] transformToFrequency(){
jpayne@68 225 return transformToFrequency(matrix);
jpayne@68 226 }
jpayne@68 227
jpayne@68 228 @Override
jpayne@68 229 public String toContentsString(){
jpayne@68 230 StringBuilder sb=new StringBuilder();
jpayne@68 231 sb.append("[");
jpayne@68 232 String comma="";
jpayne@68 233 for(int[] array : matrix){
jpayne@68 234 for(int i=0; i<array.length; i++){
jpayne@68 235 int word=array[i];
jpayne@68 236 for(int j=0; j<cellsPerWord; j++){
jpayne@68 237 int x=word&valueMask;
jpayne@68 238 sb.append(comma);
jpayne@68 239 sb.append(x);
jpayne@68 240 word>>>=cellBits;
jpayne@68 241 comma=", ";
jpayne@68 242 }
jpayne@68 243 }
jpayne@68 244 }
jpayne@68 245 sb.append("]");
jpayne@68 246 return sb.toString();
jpayne@68 247 }
jpayne@68 248
jpayne@68 249 @Override
jpayne@68 250 public double usedFraction(){return cellsUsed/(double)cells;}
jpayne@68 251
jpayne@68 252 @Override
jpayne@68 253 public double usedFraction(int mindepth){return cellsUsed(mindepth)/(double)cells;}
jpayne@68 254
jpayne@68 255 @Override
jpayne@68 256 public long cellsUsed(int mindepth){
jpayne@68 257 long count=0;
jpayne@68 258 for(int[] array : matrix){
jpayne@68 259 if(array!=null){
jpayne@68 260 for(int word : array){
jpayne@68 261 while(word>0){
jpayne@68 262 int x=word&valueMask;
jpayne@68 263 if(x>=mindepth){count++;}
jpayne@68 264 word>>>=cellBits;
jpayne@68 265 }
jpayne@68 266 }
jpayne@68 267 }
jpayne@68 268 }
jpayne@68 269 return count;
jpayne@68 270 }
jpayne@68 271
jpayne@68 272
jpayne@68 273 @Override
jpayne@68 274 final long hash(long key, int row){
jpayne@68 275 int cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
jpayne@68 276 // int cell=(int)(hashCellMask&(key));
jpayne@68 277
jpayne@68 278 if(row==0){//Doublehash only first time
jpayne@68 279 key=key^hashMasks[(row+4)%hashMasks.length][cell];
jpayne@68 280 cell=(int)(hashCellMask&(key>>4));
jpayne@68 281 // cell=(int)(hashCellMask&(key>>hashBits));
jpayne@68 282 // cell=(int)((Long.MAX_VALUE&key)%(hashArrayLength-1));
jpayne@68 283 }
jpayne@68 284
jpayne@68 285 return key^hashMasks[row][cell];
jpayne@68 286 }
jpayne@68 287
jpayne@68 288 /**
jpayne@68 289 * @param i
jpayne@68 290 * @param j
jpayne@68 291 * @return
jpayne@68 292 */
jpayne@68 293 private static long[][] makeMasks(int rows, int cols) {
jpayne@68 294
jpayne@68 295 long seed;
jpayne@68 296 synchronized(KCountArray4.class){
jpayne@68 297 seed=counter;
jpayne@68 298 counter++;
jpayne@68 299 }
jpayne@68 300
jpayne@68 301 Timer t=new Timer();
jpayne@68 302 long[][] r=new long[rows][cols];
jpayne@68 303 Random randy=Shared.threadLocalRandom(seed);
jpayne@68 304 for(int i=0; i<r.length; i++){
jpayne@68 305 fillMasks(r[i], randy);
jpayne@68 306 }
jpayne@68 307 t.stop();
jpayne@68 308 if(t.elapsed>200000000L){System.out.println("Mask-creation time: "+t);}
jpayne@68 309 return r;
jpayne@68 310 }
jpayne@68 311
jpayne@68 312 private static void fillMasks(long[] r, Random randy) {
jpayne@68 313 // for(int i=0; i<r.length; i++){
jpayne@68 314 // long x=0;
jpayne@68 315 // while(Long.bitCount(x&0xFFFFFFFF)!=16){
jpayne@68 316 // x=randy.nextLong();
jpayne@68 317 // }
jpayne@68 318 // r[i]=(x&Long.MAX_VALUE);
jpayne@68 319 // }
jpayne@68 320
jpayne@68 321 final int hlen=(1<<hashBits);
jpayne@68 322 assert(r.length==hlen);
jpayne@68 323 int[] count1=new int[hlen];
jpayne@68 324 int[] count2=new int[hlen];
jpayne@68 325 final long mask=hlen-1;
jpayne@68 326
jpayne@68 327 for(int i=0; i<r.length; i++){
jpayne@68 328 long x=0;
jpayne@68 329 int y=0;
jpayne@68 330 int z=0;
jpayne@68 331 while(Long.bitCount(x&0xFFFFFFFFL)!=16){
jpayne@68 332 x=randy.nextLong();
jpayne@68 333 while(Long.bitCount(x&0xFFFFFFFFL)<16){
jpayne@68 334 x|=(1L<<randy.nextInt(32));
jpayne@68 335 }
jpayne@68 336 while(Long.bitCount(x&0xFFFFFFFFL)>16){
jpayne@68 337 x&=(~(1L<<randy.nextInt(32)));
jpayne@68 338 }
jpayne@68 339 while(Long.bitCount(x&0xFFFFFFFF00000000L)<16){
jpayne@68 340 x|=(1L<<(randy.nextInt(32)+32));
jpayne@68 341 }
jpayne@68 342 while(Long.bitCount(x&0xFFFFFFFF00000000L)>16){
jpayne@68 343 x&=(~(1L<<(randy.nextInt(32)+32)));
jpayne@68 344 }
jpayne@68 345
jpayne@68 346 // System.out.print(".");
jpayne@68 347 // y=(((int)(x&mask))^i);
jpayne@68 348 y=(((int)(x&mask)));
jpayne@68 349 z=(int)((x>>hashBits)&mask);
jpayne@68 350 if(count1[y]>0 || count2[z]>0){
jpayne@68 351 x=0;
jpayne@68 352 }
jpayne@68 353 }
jpayne@68 354 // System.out.println(Long.toBinaryString(x));
jpayne@68 355 r[i]=(x&Long.MAX_VALUE);
jpayne@68 356 count1[y]++;
jpayne@68 357 count2[z]++;
jpayne@68 358 }
jpayne@68 359
jpayne@68 360 }
jpayne@68 361
jpayne@68 362 public long cellsUsed(){return cellsUsed;}
jpayne@68 363
jpayne@68 364 private long cellsUsed;
jpayne@68 365 private final int[][] matrix;
jpayne@68 366 private final int hashes;
jpayne@68 367
jpayne@68 368
jpayne@68 369 private static final int hashBits=6;
jpayne@68 370 private static final int hashArrayLength=1<<hashBits;
jpayne@68 371 private static final int hashCellMask=hashArrayLength-1;
jpayne@68 372 private final long[][] hashMasks=makeMasks(8, hashArrayLength);
jpayne@68 373
jpayne@68 374 private static long counter=0;
jpayne@68 375
jpayne@68 376 }