diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/HashArray1D.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/HashArray1D.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,412 @@
+package kmer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import shared.KillSwitch;
+import shared.Primes;
+import shared.Tools;
+import structures.SuperLongList;
+
+/**
+ * Stores kmers in a long[] and counts in an int[], with a victim cache.
+ * @author Brian Bushnell
+ * @date Oct 25, 2013
+ *
+ */
+public final class HashArray1D extends HashArray {
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Initialization        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	public HashArray1D(int[] schedule_, long coreMask_){
+		super(schedule_, coreMask_, false);
+		values=allocInt1D(prime+extra);
+	}
+	
+	public HashArray1D(int initialSize, long coreMask, boolean autoResize_){
+		super(initialSize, coreMask, autoResize_, false);
+		values=allocInt1D(prime+extra);
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Public Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	@Override
+	public final int increment(final long kmer, final int incr){
+		int cell=kmerToCell(kmer);
+		
+		for(final int max=cell+extra; cell<max; cell++){
+			long n=array[cell];
+			if(n==kmer){
+				values[cell]+=incr;
+				if(values[cell]<0){values[cell]=Integer.MAX_VALUE;}
+				return values[cell];
+			}else if(n==NOT_PRESENT){
+				array[cell]=kmer;
+				size++;
+				values[cell]=incr;
+				if(autoResize && size+victims.size>sizeLimit){resize();}
+				return 1;
+			}
+		}
+		int x=victims.increment(kmer, incr);
+		if(autoResize && size+victims.size>sizeLimit){resize();}
+		return x;
+	}
+	
+	@Override
+	public final int incrementAndReturnNumCreated(final long kmer, final int incr){
+		int cell=kmerToCell(kmer);
+		
+		for(final int max=cell+extra; cell<max; cell++){
+			long n=array[cell];
+			if(n==kmer){
+				values[cell]+=incr;
+				if(values[cell]<0){values[cell]=Integer.MAX_VALUE;}
+				return 0;
+			}else if(n==NOT_PRESENT){
+				array[cell]=kmer;
+				size++;
+				values[cell]=incr;
+				if(autoResize && size+victims.size>sizeLimit){resize();}
+				return 1;
+			}
+		}
+		return victims.incrementAndReturnNumCreated(kmer, incr);
+	}
+	
+	@Override
+	public void fillHistogram(SuperLongList sll){
+		for(int i=0; i<values.length; i++){
+			int count=values[i];
+			if(count>0){sll.add(count);}
+		}
+		if(victims!=null){
+			victims.fillHistogram(sll);
+		}
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------      Nonpublic Methods       ----------------*/
+	/*--------------------------------------------------------------*/
+
+	@Override
+	public final int readCellValue(int cell) {
+		return values[cell];
+	}
+	
+	@Override
+	protected final int[] readCellValues(int cell, int[] singleton) {
+		singleton[0]=values[cell];
+		return singleton;
+	}
+	
+	@Override
+	protected final void insertValue(long kmer, int v, int cell) {
+		assert(array[cell]==kmer);
+		values[cell]=v;
+	}
+	
+	@Override
+	protected final void insertValue(long kmer, int[] vals, int cell, int vlen) {
+		assert(array[cell]==kmer);
+		assert(vals.length==1);
+		values[cell]=vals[0];
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------   Resizing and Rebalancing   ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	@Override
+	public final boolean canRebalance() {return false;}
+	
+//	@Override
+//	protected synchronized void resize_old(){
+////		assert(false);
+////		System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
+//		if(prime>=maxPrime){
+//			sizeLimit=0xFFFFFFFFFFFFL;
+//			return;
+//		}
+//		
+//		final long oldSize=size, oldVSize=victims.size;
+//		final long totalSize=oldSize+oldVSize;
+//		
+//		final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult);
+//		final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult);
+//
+////		sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime);
+//		
+//		assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
+//		if(maxAllowedByLoadFactor<prime){
+//			sizeLimit=(long)(maxLoadFactor*prime);
+//			return;
+//		}
+//		
+//		long x=10+(long)(prime*resizeMult);
+//		x=Tools.max(x, minAllowedByLoadFactor);
+//		x=Tools.min(x, maxAllowedByLoadFactor);
+//		
+//		int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
+//		
+//		if(prime2<=prime){
+//			sizeLimit=(long)(maxLoadFactor*prime);
+//			assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x;
+//			return;
+//		}
+//		
+//		prime=prime2;
+////		System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
+//		long[] oldk=array;
+//		int[] oldc=values;
+//		KmerNode[] oldv=victims.array;
+//		array=allocLong1D(prime2+extra);
+//		Arrays.fill(array, NOT_PRESENT);
+//		values=allocInt1D(prime2+extra);
+//		ArrayList<KmerNode> list=victims.toList();
+//		Arrays.fill(oldv, null);
+//		victims.size=0;
+//		size=0;
+//		sizeLimit=Long.MAX_VALUE;
+//		
+//		if(TWO_PASS_RESIZE){
+//			for(int i=0; i<oldk.length; i++){
+//				if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);}
+//			}
+//			for(KmerNode n : list){
+//				if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());}
+//			}
+//			for(int i=0; i<oldk.length; i++){
+//				if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);}
+//			}
+//			for(KmerNode n : list){
+//				if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());}
+//			}
+//		}else{
+//			for(int i=0; i<oldk.length; i++){
+//				if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);}
+//			}
+//			for(KmerNode n : list){
+//				if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());}
+//			}
+//		}
+//		
+//		assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size;
+//		
+//		sizeLimit=(long)(maxLoadFactor*prime);
+//	}
+	
+	@Override
+	protected synchronized void resize(){
+//		assert(false);
+//		System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
+		if(prime>=maxPrime){
+//			sizeLimit=0xFFFFFFFFFFFFL;
+			KillSwitch.memKill(new OutOfMemoryError());
+		}
+
+		final long oldSize=size, oldVSize=victims.size;
+		if(schedule!=null){
+			final long oldPrime=prime;
+			prime=nextScheduleSize();
+			if(prime<=oldPrime){KillSwitch.memKill(new OutOfMemoryError());}
+			sizeLimit=(long)((atMaxSize() ? maxLoadFactorFinal : maxLoadFactor)*prime);
+		}else{//Old method
+			final long totalSize=oldSize+oldVSize;
+
+			final long maxAllowedByLoadFactor=(long)(totalSize*minLoadMult);
+			final long minAllowedByLoadFactor=(long)(totalSize*maxLoadMult);
+
+			//		sizeLimit=Tools.min((long)(maxLoadFactor*prime), maxPrime);
+
+			assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
+			if(maxAllowedByLoadFactor<prime){
+				sizeLimit=(long)(maxLoadFactor*prime);
+				return;
+			}
+
+			long x=10+(long)(prime*resizeMult);
+			x=Tools.max(x, minAllowedByLoadFactor);
+			x=Tools.min(x, maxAllowedByLoadFactor);
+
+			int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
+
+			if(prime2<=prime){
+				sizeLimit=(long)(maxLoadFactor*prime);
+				assert(prime2==prime) : "Resizing to smaller array? "+totalSize+", "+prime+", "+x;
+				return;
+			}
+
+			prime=prime2;
+			sizeLimit=(long)(maxLoadFactor*prime);
+		}
+//		System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
+		long[] oldk=array;
+		int[] oldc=values;
+		KmerNode[] oldv=victims.array;
+		array=allocLong1D(prime+extra);
+		Arrays.fill(array, NOT_PRESENT);
+//		System.err.print(prime+" ");//123
+		values=allocInt1D(prime+extra);
+		ArrayList<KmerNode> list=victims.toList();
+		Arrays.fill(oldv, null);
+		victims.size=0;
+		size=0;
+		
+		if(TWO_PASS_RESIZE){
+			for(int i=0; i<oldk.length; i++){
+				if(oldk[i]>NOT_PRESENT && oldc[i]>1){set(oldk[i], oldc[i]);}
+			}
+			for(KmerNode n : list){
+				if(n.pivot>NOT_PRESENT && n.value()>1){set(n.pivot, n.value());}
+			}
+			for(int i=0; i<oldk.length; i++){
+				if(oldk[i]>NOT_PRESENT && oldc[i]<=1){set(oldk[i], oldc[i]);}
+			}
+			for(KmerNode n : list){
+				if(n.pivot>NOT_PRESENT && n.value()<=1){set(n.pivot, n.value());}
+			}
+		}else{
+			for(int i=0; i<oldk.length; i++){
+				if(oldk[i]>NOT_PRESENT){set(oldk[i], oldc[i]);}
+			}
+			for(KmerNode n : list){
+				if(n.pivot>NOT_PRESENT){set(n.pivot, n.value());}
+			}
+		}
+		
+		assert(oldSize+oldVSize==size+victims.size) : oldSize+", "+oldVSize+" -> "+size+", "+victims.size;
+	}
+	
+	@Deprecated
+	@Override
+	public void rebalance(){
+		throw new RuntimeException("Unimplemented.");
+	}
+	
+	@Override
+	public long regenerate(final int limit){
+		long sum=0;
+		assert(owners==null) : "Clear ownership before regeneration.";
+		for(int pos=0; pos<values.length; pos++){
+			final long key=array[pos];
+			if(key>=0){
+				final int value=values[pos];
+				values[pos]=NOT_PRESENT;
+				array[pos]=NOT_PRESENT;
+				size--;
+				if(value>limit){
+					set(key, value);
+				}else{
+					sum++;
+				}
+			}
+		}
+		
+		ArrayList<KmerNode> nodes=victims.toList();
+		victims.clear();
+		for(KmerNode node : nodes){
+			int value=node.value();
+			if(value<=limit){
+				sum++;
+			}else{
+				set(node.pivot, node.value());
+			}
+		}
+		
+		return sum;
+	}
+	
+	@Override
+	public String toString(){
+		return Arrays.toString(array);
+	}
+	
+	public Walker walk(){
+		return new Walker1D();
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------            Fields            ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	private int[] values;
+	
+	public int[] values(){return values;}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------            Walker            ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	public class Walker1D extends Walker {
+		
+		Walker1D(){
+			ha=HashArray1D.this;
+			victims=ha.victims().toList();
+		}
+		
+		/** 
+		 * Fills this object with the next key and value.
+		 * @return True if successful.
+		 */
+		public boolean next(){
+			while(i<values.length && values[i]==NOT_XPRESENT){i++;}
+			if(i<values.length){
+				kmer=array[i];
+				value=values[i];
+				assert(value!=NOT_XPRESENT);
+				i++;
+				return true;
+			}
+			if(i2<victims.size()){
+				KmerNode kn=victims.get(i2);
+				kmer=kn.pivot;
+				value=kn.value();
+				assert(value!=NOT_XPRESENT);
+				i2++;
+				return true;
+			}
+			kmer=-1;
+			value=NOT_XPRESENT;
+			return false;
+		}
+		
+		public long kmer(){return kmer;}
+		public int value(){return value;}
+		
+		/** Hash map over which this is walking */
+		private HashArray1D ha;
+		/** Victim list of the hash map */
+		private ArrayList<KmerNode> victims;
+		
+		private long kmer;
+		private int value;
+
+		/** Potential next kmer cell; may point to an empty cell */
+		private int i=0;
+		/** Next victim in list */
+		private int i2=0;
+	}
+	
+	//TODO: Remove after fixing array initialization
+	private static final int NOT_XPRESENT=0;
+
+	public long calcMem() {
+		long mem=0;
+		mem+=(array.length*8);
+		mem+=(values.length*4);
+		mem+=(owners==null ? 0 : owners.length()*4);
+		for(KmerNode kn : victims.array){
+			mem+=8;
+			if(kn!=null){mem+=kn.calcMem();}
+		}
+		// TODO Auto-generated method stub
+		return mem;
+	}
+
+	
+}