Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/HashArray1D.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package kmer; import java.util.ArrayList; import java.util.Arrays; import shared.KillSwitch; import shared.Primes; import shared.Tools; import structures.SuperLongList; /** * Stores kmers in a long[] and counts in an int[], with a victim cache. * @author Brian Bushnell * @date Oct 25, 2013 * */ public final class HashArray1D extends HashArray { /*--------------------------------------------------------------*/ /*---------------- Initialization ----------------*/ /*--------------------------------------------------------------*/ public HashArray1D(int[] schedule_, long coreMask_){ super(schedule_, coreMask_, false); values=allocInt1D(prime+extra); } public HashArray1D(int initialSize, long coreMask, boolean autoResize_){ super(initialSize, coreMask, autoResize_, false); values=allocInt1D(prime+extra); } /*--------------------------------------------------------------*/ /*---------------- Public Methods ----------------*/ /*--------------------------------------------------------------*/ @Override public final int increment(final long kmer, final int incr){ int cell=kmerToCell(kmer); for(final int max=cell+extra; cell<max; cell++){ long n=array[cell]; if(n==kmer){ values[cell]+=incr; if(values[cell]<0){values[cell]=Integer.MAX_VALUE;} return values[cell]; }else if(n==NOT_PRESENT){ array[cell]=kmer; size++; values[cell]=incr; if(autoResize && size+victims.size>sizeLimit){resize();} return 1; } } int x=victims.increment(kmer, incr); if(autoResize && size+victims.size>sizeLimit){resize();} return x; } @Override public final int incrementAndReturnNumCreated(final long kmer, final int incr){ int cell=kmerToCell(kmer); for(final int max=cell+extra; cell<max; cell++){ long n=array[cell]; if(n==kmer){ values[cell]+=incr; if(values[cell]<0){values[cell]=Integer.MAX_VALUE;} return 0; }else if(n==NOT_PRESENT){ array[cell]=kmer; size++; values[cell]=incr; if(autoResize && size+victims.size>sizeLimit){resize();} return 1; } } return victims.incrementAndReturnNumCreated(kmer, incr); } @Override public void fillHistogram(SuperLongList sll){ for(int i=0; i<values.length; i++){ int count=values[i]; if(count>0){sll.add(count);} } if(victims!=null){ victims.fillHistogram(sll); } } /*--------------------------------------------------------------*/ /*---------------- Nonpublic Methods ----------------*/ /*--------------------------------------------------------------*/ @Override public final int readCellValue(int cell) { return values[cell]; } @Override protected final int[] readCellValues(int cell, int[] singleton) { singleton[0]=values[cell]; return singleton; } @Override protected final void insertValue(long kmer, int v, int cell) { assert(array[cell]==kmer); values[cell]=v; } @Override protected final void insertValue(long kmer, int[] vals, int cell, int vlen) { assert(array[cell]==kmer); assert(vals.length==1); values[cell]=vals[0]; } /*--------------------------------------------------------------*/ /*---------------- Resizing and Rebalancing ----------------*/ /*--------------------------------------------------------------*/ @Override public final boolean canRebalance() {return false;} // @Override // protected synchronized void resize_old(){ //// assert(false); //// System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); // if(prime>=maxPrime){ // sizeLimit=0xFFFFFFFFFFFFL; // return; // } // // final long oldSize=size, oldVSize=victims.size; // final long totalSize=oldSize+oldVSize; // // final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult); // final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult); // //// sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime); // // assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); // if(maxAllowedByLoadFactor<prime){ // sizeLimit=(long)(maxLoadFactor*prime); // return; // } // // long x=10+(long)(prime*resizeMult); // x=Tools.max(x, minAllowedByLoadFactor); // x=Tools.min(x, maxAllowedByLoadFactor); // // int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); // // if(prime2<=prime){ // sizeLimit=(long)(maxLoadFactor*prime); // assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x; // return; // } // // prime=prime2; //// System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); // long[] oldk=array; // int[] oldc=values; // KmerNode[] oldv=victims.array; // array=allocLong1D(prime2+extra); // Arrays.fill(array, NOT_PRESENT); // values=allocInt1D(prime2+extra); // ArrayList<KmerNode> list=victims.toList(); // Arrays.fill(oldv, null); // victims.size=0; // size=0; // sizeLimit=Long.MAX_VALUE; // // if(TWO_PASS_RESIZE){ // for(int i=0; i<oldk.length; i++){ // if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);} // } // for(KmerNode n : list){ // if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());} // } // for(int i=0; i<oldk.length; i++){ // if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);} // } // for(KmerNode n : list){ // if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());} // } // }else{ // for(int i=0; i<oldk.length; i++){ // if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);} // } // for(KmerNode n : list){ // if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());} // } // } // // assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size; // // sizeLimit=(long)(maxLoadFactor*prime); // } @Override protected synchronized void resize(){ // assert(false); // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); if(prime>=maxPrime){ // sizeLimit=0xFFFFFFFFFFFFL; KillSwitch.memKill(new OutOfMemoryError()); } final long oldSize=size, oldVSize=victims.size; if(schedule!=null){ final long oldPrime=prime; prime=nextScheduleSize(); if(prime<=oldPrime){KillSwitch.memKill(new OutOfMemoryError());} sizeLimit=(long)((atMaxSize() ? maxLoadFactorFinal : maxLoadFactor)*prime); }else{//Old method final long totalSize=oldSize+oldVSize; final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult); final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult); // sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime); assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); if(maxAllowedByLoadFactor<prime){ sizeLimit=(long)(maxLoadFactor*prime); return; } long x=10+(long)(prime*resizeMult); x=Tools.max(x, minAllowedByLoadFactor); x=Tools.min(x, maxAllowedByLoadFactor); int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); if(prime2<=prime){ sizeLimit=(long)(maxLoadFactor*prime); assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x; return; } prime=prime2; sizeLimit=(long)(maxLoadFactor*prime); } // System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); long[] oldk=array; int[] oldc=values; KmerNode[] oldv=victims.array; array=allocLong1D(prime+extra); Arrays.fill(array, NOT_PRESENT); // System.err.print(prime+" ");//123 values=allocInt1D(prime+extra); ArrayList<KmerNode> list=victims.toList(); Arrays.fill(oldv, null); victims.size=0; size=0; if(TWO_PASS_RESIZE){ for(int i=0; i<oldk.length; i++){ if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);} } for(KmerNode n : list){ if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());} } for(int i=0; i<oldk.length; i++){ if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);} } for(KmerNode n : list){ if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());} } }else{ for(int i=0; i<oldk.length; i++){ if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);} } for(KmerNode n : list){ if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());} } } assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size; } @Deprecated @Override public void rebalance(){ throw new RuntimeException("Unimplemented."); } @Override public long regenerate(final int limit){ long sum=0; assert(owners==null) : "Clear ownership before regeneration."; for(int pos=0; pos<values.length; pos++){ final long key=array[pos]; if(key>=0){ final int value=values[pos]; values[pos]=NOT_PRESENT; array[pos]=NOT_PRESENT; size--; if(value>limit){ set(key, value); }else{ sum++; } } } ArrayList<KmerNode> nodes=victims.toList(); victims.clear(); for(KmerNode node : nodes){ int value=node.value(); if(value<=limit){ sum++; }else{ set(node.pivot, node.value()); } } return sum; } @Override public String toString(){ return Arrays.toString(array); } public Walker walk(){ return new Walker1D(); } /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ private int[] values; public int[] values(){return values;} /*--------------------------------------------------------------*/ /*---------------- Walker ----------------*/ /*--------------------------------------------------------------*/ public class Walker1D extends Walker { Walker1D(){ ha=HashArray1D.this; victims=ha.victims().toList(); } /** * Fills this object with the next key and value. * @return True if successful. */ public boolean next(){ while(i<values.length && values[i]==NOT_XPRESENT){i++;} if(i<values.length){ kmer=array[i]; value=values[i]; assert(value!=NOT_XPRESENT); i++; return true; } if(i2<victims.size()){ KmerNode kn=victims.get(i2); kmer=kn.pivot; value=kn.value(); assert(value!=NOT_XPRESENT); i2++; return true; } kmer=-1; value=NOT_XPRESENT; return false; } public long kmer(){return kmer;} public int value(){return value;} /** Hash map over which this is walking */ private HashArray1D ha; /** Victim list of the hash map */ private ArrayList<KmerNode> victims; private long kmer; private int value; /** Potential next kmer cell; may point to an empty cell */ private int i=0; /** Next victim in list */ private int i2=0; } //TODO: Remove after fixing array initialization private static final int NOT_XPRESENT=0; public long calcMem() { long mem=0; mem+=(array.length*8); mem+=(values.length*4); mem+=(owners==null ? 0 : owners.length()*4); for(KmerNode kn : victims.array){ mem+=8; if(kn!=null){mem+=kn.calcMem();} } // TODO Auto-generated method stub return mem; } }