annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTable.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package kmer;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4 import java.util.concurrent.atomic.AtomicLong;
jpayne@68 5 import java.util.concurrent.locks.Lock;
jpayne@68 6 import java.util.concurrent.locks.ReentrantLock;
jpayne@68 7
jpayne@68 8 import fileIO.ByteStreamWriter;
jpayne@68 9 import fileIO.TextStreamWriter;
jpayne@68 10 import shared.Primes;
jpayne@68 11 import shared.Tools;
jpayne@68 12 import structures.ByteBuilder;
jpayne@68 13 import structures.SuperLongList;
jpayne@68 14
jpayne@68 15 /**
jpayne@68 16 * @author Brian Bushnell
jpayne@68 17 * @date Oct 23, 2013
jpayne@68 18 *
jpayne@68 19 */
jpayne@68 20 public final class KmerTable extends AbstractKmerTable {
jpayne@68 21
jpayne@68 22 /*--------------------------------------------------------------*/
jpayne@68 23 /*---------------- Initialization ----------------*/
jpayne@68 24 /*--------------------------------------------------------------*/
jpayne@68 25
jpayne@68 26 public KmerTable(int initialSize, boolean autoResize_){
jpayne@68 27 if(initialSize>1){
jpayne@68 28 initialSize=(int)Tools.min(maxPrime, Primes.primeAtLeast(initialSize));
jpayne@68 29 }else{
jpayne@68 30 initialSize=1;
jpayne@68 31 }
jpayne@68 32 prime=initialSize;
jpayne@68 33 sizeLimit=(long) (initialSize*resizeMult);
jpayne@68 34 array=new KmerLink[prime];
jpayne@68 35 autoResize=autoResize_;
jpayne@68 36 }
jpayne@68 37
jpayne@68 38 /*--------------------------------------------------------------*/
jpayne@68 39 /*---------------- Public Methods ----------------*/
jpayne@68 40 /*--------------------------------------------------------------*/
jpayne@68 41
jpayne@68 42 @Override
jpayne@68 43 public int increment(final long kmer, final int incr){
jpayne@68 44 final int cell=(int)(kmer%prime);
jpayne@68 45 KmerLink n=array[cell], prev=null;
jpayne@68 46 while(n!=null && n.pivot!=kmer){
jpayne@68 47 prev=n;
jpayne@68 48 n=n.next;
jpayne@68 49 }
jpayne@68 50 if(n==null){
jpayne@68 51 n=new KmerLink(kmer, incr);
jpayne@68 52 size++;
jpayne@68 53 if(prev==null){
jpayne@68 54 array[cell]=n;
jpayne@68 55 }else{
jpayne@68 56 prev.next=n;
jpayne@68 57 }
jpayne@68 58 if(autoResize && size>sizeLimit){resize();}
jpayne@68 59 }else{
jpayne@68 60 n.value+=incr;
jpayne@68 61 if(n.value<0){n.value=Integer.MAX_VALUE;}
jpayne@68 62 }
jpayne@68 63 return n.value;
jpayne@68 64 }
jpayne@68 65
jpayne@68 66 @Override
jpayne@68 67 public int incrementAndReturnNumCreated(final long kmer, final int incr){
jpayne@68 68 final int cell=(int)(kmer%prime);
jpayne@68 69 KmerLink n=array[cell], prev=null;
jpayne@68 70 while(n!=null && n.pivot!=kmer){
jpayne@68 71 prev=n;
jpayne@68 72 n=n.next;
jpayne@68 73 }
jpayne@68 74 if(n==null){
jpayne@68 75 n=new KmerLink(kmer, incr);
jpayne@68 76 size++;
jpayne@68 77 if(prev==null){
jpayne@68 78 array[cell]=n;
jpayne@68 79 }else{
jpayne@68 80 prev.next=n;
jpayne@68 81 }
jpayne@68 82 if(autoResize && size>sizeLimit){resize();}
jpayne@68 83 return 1;
jpayne@68 84 }else{
jpayne@68 85 n.value+=incr;
jpayne@68 86 if(n.value<0){n.value=Integer.MAX_VALUE;}
jpayne@68 87 return 0;
jpayne@68 88 }
jpayne@68 89 }
jpayne@68 90
jpayne@68 91 @Override
jpayne@68 92 public int set(long kmer, int value){
jpayne@68 93 int x=1, cell=(int)(kmer%prime);
jpayne@68 94 final KmerLink n=array[cell];
jpayne@68 95 if(n==null){
jpayne@68 96 array[cell]=new KmerLink(kmer, value);
jpayne@68 97 }else{
jpayne@68 98 x=n.set(kmer, value);
jpayne@68 99 }
jpayne@68 100 size+=x;
jpayne@68 101 if(autoResize && size>sizeLimit){resize();}
jpayne@68 102 return x;
jpayne@68 103 }
jpayne@68 104
jpayne@68 105 @Override
jpayne@68 106 public int set(long kmer, int[] vals, int vlen) {
jpayne@68 107 throw new RuntimeException("Unimplemented.");
jpayne@68 108 }
jpayne@68 109
jpayne@68 110 @Override
jpayne@68 111 public int setIfNotPresent(long kmer, int value){
jpayne@68 112 int x=1, cell=(int)(kmer%prime);
jpayne@68 113 final KmerLink n=array[cell];
jpayne@68 114 if(n==null){
jpayne@68 115 array[cell]=new KmerLink(kmer, value);
jpayne@68 116 }else{
jpayne@68 117 x=n.setIfNotPresent(kmer, value);
jpayne@68 118 }
jpayne@68 119 size+=x;
jpayne@68 120 if(autoResize && size>sizeLimit){resize();}
jpayne@68 121 return x;
jpayne@68 122 }
jpayne@68 123
jpayne@68 124 @Override
jpayne@68 125 public int getValue(long kmer){
jpayne@68 126 int cell=(int)(kmer%prime);
jpayne@68 127 KmerLink n=array[cell];
jpayne@68 128 while(n!=null && n.pivot!=kmer){n=n.next;}
jpayne@68 129 return n==null ? 0 : n.value;
jpayne@68 130 }
jpayne@68 131
jpayne@68 132 @Override
jpayne@68 133 public int[] getValues(long kmer, int[] singleton){
jpayne@68 134 assert(array.length==0);
jpayne@68 135 singleton[0]=getValue(kmer);
jpayne@68 136 return singleton;
jpayne@68 137 }
jpayne@68 138
jpayne@68 139 @Override
jpayne@68 140 public boolean contains(long kmer){
jpayne@68 141 KmerLink node=get(kmer);
jpayne@68 142 return node!=null;
jpayne@68 143 }
jpayne@68 144
jpayne@68 145 /*--------------------------------------------------------------*/
jpayne@68 146 /*---------------- Ownership ----------------*/
jpayne@68 147 /*--------------------------------------------------------------*/
jpayne@68 148
jpayne@68 149 @Override
jpayne@68 150 public final void initializeOwnership(){
jpayne@68 151 for(KmerLink n : array){
jpayne@68 152 if(n!=null){n.initializeOwnership();}
jpayne@68 153 }
jpayne@68 154 }
jpayne@68 155
jpayne@68 156 @Override
jpayne@68 157 public final void clearOwnership(){
jpayne@68 158 for(KmerLink n : array){
jpayne@68 159 if(n!=null){n.clearOwnership();}
jpayne@68 160 }
jpayne@68 161 }
jpayne@68 162
jpayne@68 163 @Override
jpayne@68 164 public final int setOwner(final long kmer, final int newOwner){
jpayne@68 165 final int cell=(int)(kmer%prime);
jpayne@68 166 KmerLink n=array[cell];
jpayne@68 167 assert(n!=null);
jpayne@68 168 return n.setOwner(kmer, newOwner);
jpayne@68 169 }
jpayne@68 170
jpayne@68 171 @Override
jpayne@68 172 public final boolean clearOwner(final long kmer, final int owner){
jpayne@68 173 final int cell=(int)(kmer%prime);
jpayne@68 174 KmerLink n=array[cell];
jpayne@68 175 assert(n!=null);
jpayne@68 176 return n.clearOwner(kmer, owner);
jpayne@68 177 }
jpayne@68 178
jpayne@68 179 @Override
jpayne@68 180 public final int getOwner(final long kmer){
jpayne@68 181 final int cell=(int)(kmer%prime);
jpayne@68 182 KmerLink n=array[cell];
jpayne@68 183 assert(n!=null);
jpayne@68 184 return n.getOwner(kmer);
jpayne@68 185 }
jpayne@68 186
jpayne@68 187 /*--------------------------------------------------------------*/
jpayne@68 188 /*---------------- Nonpublic Methods ----------------*/
jpayne@68 189 /*--------------------------------------------------------------*/
jpayne@68 190
jpayne@68 191 @Override
jpayne@68 192 KmerLink get(long kmer){
jpayne@68 193 int cell=(int)(kmer%prime);
jpayne@68 194 KmerLink n=array[cell];
jpayne@68 195 while(n!=null && n.pivot!=kmer){n=n.next;}
jpayne@68 196 return n;
jpayne@68 197 }
jpayne@68 198
jpayne@68 199 boolean insert(KmerLink n){
jpayne@68 200 n.next=null;
jpayne@68 201 int cell=(int)(n.pivot%prime);
jpayne@68 202 if(array[cell]==null){
jpayne@68 203 array[cell]=n;
jpayne@68 204 return true;
jpayne@68 205 }
jpayne@68 206 return array[cell].insert(n);
jpayne@68 207 }
jpayne@68 208
jpayne@68 209 /*--------------------------------------------------------------*/
jpayne@68 210 /*---------------- Private Methods ----------------*/
jpayne@68 211 /*--------------------------------------------------------------*/
jpayne@68 212
jpayne@68 213 /*--------------------------------------------------------------*/
jpayne@68 214 /*---------------- Invalid Methods ----------------*/
jpayne@68 215 /*--------------------------------------------------------------*/
jpayne@68 216
jpayne@68 217 /*--------------------------------------------------------------*/
jpayne@68 218 /*---------------- Resizing and Rebalancing ----------------*/
jpayne@68 219 /*--------------------------------------------------------------*/
jpayne@68 220
jpayne@68 221 @Override
jpayne@68 222 boolean canResize() {return true;}
jpayne@68 223
jpayne@68 224 @Override
jpayne@68 225 public boolean canRebalance() {return false;}
jpayne@68 226
jpayne@68 227 @Override
jpayne@68 228 public long size() {return size;}
jpayne@68 229
jpayne@68 230 @Override
jpayne@68 231 public int arrayLength() {return array.length;}
jpayne@68 232
jpayne@68 233 @Override
jpayne@68 234 synchronized void resize(){
jpayne@68 235 // assert(false);
jpayne@68 236 // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
jpayne@68 237 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
jpayne@68 238
jpayne@68 239 final long maxAllowedByLoadFactor=(long)(size*minLoadMult);
jpayne@68 240 final long minAllowedByLoadFactor=(long)(size*maxLoadMult);
jpayne@68 241 assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
jpayne@68 242 if(maxAllowedByLoadFactor<prime){return;}
jpayne@68 243
jpayne@68 244 long x=10+(long)(prime*resizeMult);
jpayne@68 245 x=Tools.max(x, minAllowedByLoadFactor);
jpayne@68 246 x=Tools.min(x, maxAllowedByLoadFactor);
jpayne@68 247
jpayne@68 248 int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
jpayne@68 249
jpayne@68 250 if(prime2<=prime){return;}
jpayne@68 251
jpayne@68 252 prime=prime2;
jpayne@68 253 // System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
jpayne@68 254 KmerLink[] old=array;
jpayne@68 255 array=new KmerLink[prime2];
jpayne@68 256 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
jpayne@68 257 for(int i=0; i<old.length; i++){
jpayne@68 258 if(old[i]!=null){
jpayne@68 259 old[i].traverseInfix(list);
jpayne@68 260 for(KmerLink n : list){insert(n);}
jpayne@68 261 list.clear();
jpayne@68 262 }
jpayne@68 263 }
jpayne@68 264 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
jpayne@68 265 }
jpayne@68 266
jpayne@68 267 @Override
jpayne@68 268 public void rebalance(){
jpayne@68 269 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
jpayne@68 270 for(int i=0; i<array.length; i++){
jpayne@68 271 if(array[i]!=null){array[i]=array[i].rebalance(list);}
jpayne@68 272 }
jpayne@68 273 }
jpayne@68 274
jpayne@68 275 /*--------------------------------------------------------------*/
jpayne@68 276 /*---------------- Info Dumping ----------------*/
jpayne@68 277 /*--------------------------------------------------------------*/
jpayne@68 278
jpayne@68 279 @Deprecated
jpayne@68 280 @Override
jpayne@68 281 public boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){
jpayne@68 282 throw new RuntimeException("TODO");
jpayne@68 283 }
jpayne@68 284
jpayne@68 285 @Override
jpayne@68 286 public boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){
jpayne@68 287 for(int i=0; i<array.length; i++){
jpayne@68 288 KmerLink node=array[i];
jpayne@68 289 if(node!=null && node.value>=mincount){
jpayne@68 290 node.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining);
jpayne@68 291 }
jpayne@68 292 }
jpayne@68 293 return true;
jpayne@68 294 }
jpayne@68 295
jpayne@68 296 @Deprecated
jpayne@68 297 @Override
jpayne@68 298 public boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){
jpayne@68 299 throw new RuntimeException("TODO");
jpayne@68 300 }
jpayne@68 301
jpayne@68 302 @Deprecated
jpayne@68 303 @Override
jpayne@68 304 public void fillHistogram(long[] ca, int max){
jpayne@68 305 throw new RuntimeException("TODO");
jpayne@68 306 }
jpayne@68 307
jpayne@68 308 @Deprecated
jpayne@68 309 @Override
jpayne@68 310 public void fillHistogram(SuperLongList sll){
jpayne@68 311 throw new RuntimeException("TODO");
jpayne@68 312 }
jpayne@68 313
jpayne@68 314 @Deprecated
jpayne@68 315 @Override
jpayne@68 316 public void countGC(long[] gcCounts, int max){
jpayne@68 317 throw new RuntimeException("TODO");
jpayne@68 318 }
jpayne@68 319
jpayne@68 320 @Deprecated
jpayne@68 321 @Override
jpayne@68 322 public long regenerate(final int limit){
jpayne@68 323 throw new RuntimeException("TODO - remove zero-value links.");
jpayne@68 324 }
jpayne@68 325
jpayne@68 326 /*--------------------------------------------------------------*/
jpayne@68 327 /*---------------- Fields ----------------*/
jpayne@68 328 /*--------------------------------------------------------------*/
jpayne@68 329
jpayne@68 330 KmerLink[] array;
jpayne@68 331 int prime;
jpayne@68 332 long size=0;
jpayne@68 333 long sizeLimit;
jpayne@68 334 final boolean autoResize;
jpayne@68 335 private final Lock lock=new ReentrantLock();
jpayne@68 336
jpayne@68 337 @Override
jpayne@68 338 final Lock getLock(){return lock;}
jpayne@68 339
jpayne@68 340 /*--------------------------------------------------------------*/
jpayne@68 341 /*---------------- Static Fields ----------------*/
jpayne@68 342 /*--------------------------------------------------------------*/
jpayne@68 343
jpayne@68 344 final static int maxPrime=(int)Primes.primeAtMost(Integer.MAX_VALUE);
jpayne@68 345 final static float resizeMult=2f; //Resize by a minimum of this much
jpayne@68 346 final static float minLoadFactor=0.5f; //Resize by enough to get the load above this factor
jpayne@68 347 final static float maxLoadFactor=0.98f; //Resize by enough to get the load under this factor
jpayne@68 348 final static float minLoadMult=1/minLoadFactor;
jpayne@68 349 final static float maxLoadMult=1/maxLoadFactor;
jpayne@68 350
jpayne@68 351 }