Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/HashArray1D.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/HashArray1D.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,412 @@ +package kmer; + +import java.util.ArrayList; +import java.util.Arrays; + +import shared.KillSwitch; +import shared.Primes; +import shared.Tools; +import structures.SuperLongList; + +/** + * Stores kmers in a long[] and counts in an int[], with a victim cache. + * @author Brian Bushnell + * @date Oct 25, 2013 + * + */ +public final class HashArray1D extends HashArray { + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + public HashArray1D(int[] schedule_, long coreMask_){ + super(schedule_, coreMask_, false); + values=allocInt1D(prime+extra); + } + + public HashArray1D(int initialSize, long coreMask, boolean autoResize_){ + super(initialSize, coreMask, autoResize_, false); + values=allocInt1D(prime+extra); + } + + /*--------------------------------------------------------------*/ + /*---------------- Public Methods ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public final int increment(final long kmer, final int incr){ + int cell=kmerToCell(kmer); + + for(final int max=cell+extra; cell<max; cell++){ + long n=array[cell]; + if(n==kmer){ + values[cell]+=incr; + if(values[cell]<0){values[cell]=Integer.MAX_VALUE;} + return values[cell]; + }else if(n==NOT_PRESENT){ + array[cell]=kmer; + size++; + values[cell]=incr; + if(autoResize && size+victims.size>sizeLimit){resize();} + return 1; + } + } + int x=victims.increment(kmer, incr); + if(autoResize && size+victims.size>sizeLimit){resize();} + return x; + } + + @Override + public final int incrementAndReturnNumCreated(final long kmer, final int incr){ + int cell=kmerToCell(kmer); + + for(final int max=cell+extra; cell<max; cell++){ + long n=array[cell]; + if(n==kmer){ + values[cell]+=incr; + if(values[cell]<0){values[cell]=Integer.MAX_VALUE;} + return 0; + }else if(n==NOT_PRESENT){ + array[cell]=kmer; + size++; + values[cell]=incr; + if(autoResize && size+victims.size>sizeLimit){resize();} + return 1; + } + } + return victims.incrementAndReturnNumCreated(kmer, incr); + } + + @Override + public void fillHistogram(SuperLongList sll){ + for(int i=0; i<values.length; i++){ + int count=values[i]; + if(count>0){sll.add(count);} + } + if(victims!=null){ + victims.fillHistogram(sll); + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Nonpublic Methods ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public final int readCellValue(int cell) { + return values[cell]; + } + + @Override + protected final int[] readCellValues(int cell, int[] singleton) { + singleton[0]=values[cell]; + return singleton; + } + + @Override + protected final void insertValue(long kmer, int v, int cell) { + assert(array[cell]==kmer); + values[cell]=v; + } + + @Override + protected final void insertValue(long kmer, int[] vals, int cell, int vlen) { + assert(array[cell]==kmer); + assert(vals.length==1); + values[cell]=vals[0]; + } + + /*--------------------------------------------------------------*/ + /*---------------- Resizing and Rebalancing ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public final boolean canRebalance() {return false;} + +// @Override +// protected synchronized void resize_old(){ +//// assert(false); +//// System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); +// if(prime>=maxPrime){ +// sizeLimit=0xFFFFFFFFFFFFL; +// return; +// } +// +// final long oldSize=size, oldVSize=victims.size; +// final long totalSize=oldSize+oldVSize; +// +// final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult); +// final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult); +// +//// sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime); +// +// assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); +// if(maxAllowedByLoadFactor<prime){ +// sizeLimit=(long)(maxLoadFactor*prime); +// return; +// } +// +// long x=10+(long)(prime*resizeMult); +// x=Tools.max(x, minAllowedByLoadFactor); +// x=Tools.min(x, maxAllowedByLoadFactor); +// +// int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); +// +// if(prime2<=prime){ +// sizeLimit=(long)(maxLoadFactor*prime); +// assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x; +// return; +// } +// +// prime=prime2; +//// System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); +// long[] oldk=array; +// int[] oldc=values; +// KmerNode[] oldv=victims.array; +// array=allocLong1D(prime2+extra); +// Arrays.fill(array, NOT_PRESENT); +// values=allocInt1D(prime2+extra); +// ArrayList<KmerNode> list=victims.toList(); +// Arrays.fill(oldv, null); +// victims.size=0; +// size=0; +// sizeLimit=Long.MAX_VALUE; +// +// if(TWO_PASS_RESIZE){ +// for(int i=0; i<oldk.length; i++){ +// if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);} +// } +// for(KmerNode n : list){ +// if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());} +// } +// for(int i=0; i<oldk.length; i++){ +// if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);} +// } +// for(KmerNode n : list){ +// if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());} +// } +// }else{ +// for(int i=0; i<oldk.length; i++){ +// if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);} +// } +// for(KmerNode n : list){ +// if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());} +// } +// } +// +// assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size; +// +// sizeLimit=(long)(maxLoadFactor*prime); +// } + + @Override + protected synchronized void resize(){ +// assert(false); +// System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); + if(prime>=maxPrime){ +// sizeLimit=0xFFFFFFFFFFFFL; + KillSwitch.memKill(new OutOfMemoryError()); + } + + final long oldSize=size, oldVSize=victims.size; + if(schedule!=null){ + final long oldPrime=prime; + prime=nextScheduleSize(); + if(prime<=oldPrime){KillSwitch.memKill(new OutOfMemoryError());} + sizeLimit=(long)((atMaxSize() ? maxLoadFactorFinal : maxLoadFactor)*prime); + }else{//Old method + final long totalSize=oldSize+oldVSize; + + final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult); + final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult); + + // sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime); + + assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); + if(maxAllowedByLoadFactor<prime){ + sizeLimit=(long)(maxLoadFactor*prime); + return; + } + + long x=10+(long)(prime*resizeMult); + x=Tools.max(x, minAllowedByLoadFactor); + x=Tools.min(x, maxAllowedByLoadFactor); + + int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); + + if(prime2<=prime){ + sizeLimit=(long)(maxLoadFactor*prime); + assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x; + return; + } + + prime=prime2; + sizeLimit=(long)(maxLoadFactor*prime); + } +// System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); + long[] oldk=array; + int[] oldc=values; + KmerNode[] oldv=victims.array; + array=allocLong1D(prime+extra); + Arrays.fill(array, NOT_PRESENT); +// System.err.print(prime+" ");//123 + values=allocInt1D(prime+extra); + ArrayList<KmerNode> list=victims.toList(); + Arrays.fill(oldv, null); + victims.size=0; + size=0; + + if(TWO_PASS_RESIZE){ + for(int i=0; i<oldk.length; i++){ + if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);} + } + for(KmerNode n : list){ + if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());} + } + for(int i=0; i<oldk.length; i++){ + if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);} + } + for(KmerNode n : list){ + if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());} + } + }else{ + for(int i=0; i<oldk.length; i++){ + if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);} + } + for(KmerNode n : list){ + if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());} + } + } + + assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size; + } + + @Deprecated + @Override + public void rebalance(){ + throw new RuntimeException("Unimplemented."); + } + + @Override + public long regenerate(final int limit){ + long sum=0; + assert(owners==null) : "Clear ownership before regeneration."; + for(int pos=0; pos<values.length; pos++){ + final long key=array[pos]; + if(key>=0){ + final int value=values[pos]; + values[pos]=NOT_PRESENT; + array[pos]=NOT_PRESENT; + size--; + if(value>limit){ + set(key, value); + }else{ + sum++; + } + } + } + + ArrayList<KmerNode> nodes=victims.toList(); + victims.clear(); + for(KmerNode node : nodes){ + int value=node.value(); + if(value<=limit){ + sum++; + }else{ + set(node.pivot, node.value()); + } + } + + return sum; + } + + @Override + public String toString(){ + return Arrays.toString(array); + } + + public Walker walk(){ + return new Walker1D(); + } + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private int[] values; + + public int[] values(){return values;} + + /*--------------------------------------------------------------*/ + /*---------------- Walker ----------------*/ + /*--------------------------------------------------------------*/ + + public class Walker1D extends Walker { + + Walker1D(){ + ha=HashArray1D.this; + victims=ha.victims().toList(); + } + + /** + * Fills this object with the next key and value. + * @return True if successful. + */ + public boolean next(){ + while(i<values.length && values[i]==NOT_XPRESENT){i++;} + if(i<values.length){ + kmer=array[i]; + value=values[i]; + assert(value!=NOT_XPRESENT); + i++; + return true; + } + if(i2<victims.size()){ + KmerNode kn=victims.get(i2); + kmer=kn.pivot; + value=kn.value(); + assert(value!=NOT_XPRESENT); + i2++; + return true; + } + kmer=-1; + value=NOT_XPRESENT; + return false; + } + + public long kmer(){return kmer;} + public int value(){return value;} + + /** Hash map over which this is walking */ + private HashArray1D ha; + /** Victim list of the hash map */ + private ArrayList<KmerNode> victims; + + private long kmer; + private int value; + + /** Potential next kmer cell; may point to an empty cell */ + private int i=0; + /** Next victim in list */ + private int i2=0; + } + + //TODO: Remove after fixing array initialization + private static final int NOT_XPRESENT=0; + + public long calcMem() { + long mem=0; + mem+=(array.length*8); + mem+=(values.length*4); + mem+=(owners==null ? 0 : owners.length()*4); + for(KmerNode kn : victims.array){ + mem+=8; + if(kn!=null){mem+=kn.calcMem();} + } + // TODO Auto-generated method stub + return mem; + } + + +}