view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SketchIndex.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package sketch;

import java.util.ArrayList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import kmer.AbstractKmerTable;
import kmer.HashBuffer;
import kmer.KmerTableSet;
import shared.Shared;
import shared.Timer;
import shared.Tools;
import structures.AbstractBitSet;
import structures.IntHashMap;
import structures.IntHashSetList;
import structures.IntList;

public class SketchIndex extends SketchObject {
	
	public SketchIndex(ArrayList<Sketch> refs){
		refSketches=refs;
		tables=new KmerTableSet(new String[] {"ways="+WAYS, "tabletype="+AbstractKmerTable.ARRAYHF, "prealloc="+(prealloc>0 ? ""+prealloc : "f")}, 
				20+(defaultParams.trackCounts() ? 4 : 0)+18);//An extra 18 bytes per kmer because a lot of them occur multiple times
		tables.allocateTables();
		tableArray=tables.tables();
	}
	
	public void load(){
		spawnIndexThreads();
		if(useWhitelist){
			assert(!Whitelist.exists());
			Whitelist.initialize(tableArray);
		}
	}
	
	/** Spawn index threads */
	private void spawnIndexThreads(){
		
		//Do anything necessary prior to processing
		
		//Determine how many threads may be used
		final int threads=Shared.threads();
		ArrayList<IndexThread> alht=new ArrayList<IndexThread>(threads);
		AtomicInteger ai=new AtomicInteger(0);
		AtomicLong totalKeys=new AtomicLong(0);
		AtomicLong uniqueKeys=new AtomicLong(0);
		for(int i=0; i<threads; i++){
			alht.add(new IndexThread(ai, totalKeys, uniqueKeys));
		}
		
		//Start the threads
		for(IndexThread pt : alht){
			pt.start();
		}
		
		//Wait for completion of all threads
		boolean success=true;
		long codesProcessed=0;
		for(IndexThread pt : alht){
			
			//Wait until this thread has terminated
			while(pt.getState()!=Thread.State.TERMINATED){
				try {
					//Attempt a join operation
					pt.join();
					synchronized(pt){
						codesProcessed+=pt.codesProcessedT;
					}
				} catch (InterruptedException e) {
					//Potentially handle this, if it is expected to occur
					e.printStackTrace();
				}
			}
			success&=pt.success;
		}
		
		//Track whether any threads failed
		if(!success){errorState=true;}
		
		System.err.println("Indexed "+uniqueKeys+" unique and "+totalKeys+" total hashcodes."); //For some reason codesProcessed is nondeterministic.
		
		//Do anything necessary after processing
//		System.gc();
	}
	
	/*--------------------------------------------------------------*/
	
	public SketchResults getSketches(Sketch a, DisplayParams params){
		if(useIntMap){
			return getSketchesMap(a, params);
		}else{
			return getSketchesList(a, params);
		}
	}
	
	/** Return true if added. */
	private boolean addToTaxSet(int sketchID, IntHashSetList taxSet, int taxLevelExtended){
		Sketch sk=refSketches.get(sketchID);
		int taxID=sk.taxID;
		if(taxID<0 || taxID>=minFakeID){return false;}
		taxID=taxtree.getIdAtLevelExtended(taxID, taxLevelExtended);
		return taxSet.add(taxID);
	}
	
	public SketchResults getSketchesList(final Sketch a, DisplayParams params){
		final int minHits=params.minHits, contamLevel=params.contamLevel();
		final boolean countContamHits=params.needContamCounts();//, metaFilter=params.hasMetaFilters(), taxFilter=params.hasTaxFilters();
		final Timer t=(printTime ? new Timer() : null);
		
		final int[] singleton=new int[1];
		final IntList idList=new IntList(Tools.min(targetSketchSize, indexLimit, 1000));
		AbstractBitSet abs=a.indexBitSet();
		assert((abs==null)!=countContamHits);
		
		final IntHashSetList taxSet;
		final int[][] taxHits;
		if(contamLevel>=0){
			taxSet=new IntHashSetList(31);
			taxHits=new int[a.length()][];
			assert(taxtree!=null) : "A TaxTree is required for this operation.";
		}else{
			taxSet=null;
			taxHits=null;
		}
		
		for(int i=0; i<a.keys.length; i++){
			long key=a.keys[i];
			AbstractKmerTable set=tableArray[(int)(key%WAYS)];
//			System.err.println(set.getValue(key));
			final int[] ids=set.getValues(key, singleton);
//			System.err.println(Arrays.toString(ids));
			if(ids!=null && ids[0]>=0){
				int incr=0;
				for(int id : ids){
					if(id>=0){
						final int trueID=id-1;//Minimum id is 1, indicating sketch 0.
						idList.add(trueID);//Minimum id is 1, indicating sketch 0.
						incr++;
						if(taxSet!=null){addToTaxSet(trueID, taxSet, contamLevel);}
					}
				}
				if(countContamHits && incr>0){abs.increment(i, incr);}
				if(taxSet!=null && taxSet.size()>0){
					taxHits[i]=taxSet.toArray();
					taxSet.clear();
				}
			}
			
		}
		
//		assert(abs!=null);
//		assert(false) : abs.cardinality();
		
		if(printTime){
			t.stop("\nTime for searching index: \t");
			t.start();
		}
		
//		System.err.println("idList.size:"+idList.size);
		if(idList.size<minHits){return new SketchResults(a);}//null breaks some things
		idList.sort();
		
		if(printTime){
			t.stop("Time for sorting "+idList.size()+" hits:\t");
			t.start();
		}
		
		ArrayList<Sketch> list=new ArrayList<Sketch>(Tools.min(8, idList.size));

		int last=-1;
		int hits=0;
		for(int i=0; i<idList.size; i++){
			int id=idList.get(i);
			if(id==last){
//				System.err.println("A: "+last+", "+id+", "+count+", "+minHits);
				hits++;
			}else{
//				System.err.println("B: "+last+", "+id+", "+count+", "+minHits);
				if(last>-1 && (hits>=minHits)){
					final Sketch ref=refSketches.get(last);
					list.add(ref);
				}
				last=id;
				hits=0;
			}
		}
		if(last>-1 && (hits>=minHits)){
			final Sketch ref=refSketches.get(last);
//			if((!metaFilter || ref.passesMeta(params)) && (!taxFilter || params.passesFilter(ref))){list.add(ref);}
			list.add(ref);
		}
		if(printTime){
			t.stop("Time for fetching sketches: \t");
		}
		return list.isEmpty() ? new SketchResults(a) : new SketchResults(a, list, taxHits);
	}
	
//	static ThreadLocal<IntHashMap> intMapHolder=new ThreadLocal<IntHashMap>();
	
	public final int[] getSketchIdsMap(long key, int[] singleton){
		AbstractKmerTable set=tableArray[(int)(key%WAYS)];
		final int[] ids=set.getValues(key, singleton);
		return ids;
	}
	
	public SketchResults getSketchesMap(final Sketch a, DisplayParams params){
		final int minHits=params.minHits, contamLevel=params.contamLevel();
		final boolean countContamHits=params.needContamCounts();//, metaFilter=params.hasMetaFilters(), taxFilter=params.hasTaxFilters();
		final Timer t=(printTime ? new Timer() : null);
		final int[] singleton=new int[1];
		
		final IntHashMap idMap=new IntHashMap(Tools.min(targetSketchSize, indexLimit, intMapSize), 0.7f);
		
		AbstractBitSet abs=a.indexBitSet();
		assert((abs==null)!=countContamHits);
		
		final IntHashSetList taxSet;
		final int[][] taxHits;
		if(contamLevel>=0){
			taxSet=new IntHashSetList(31);
			taxHits=new int[a.length()][];
			assert(taxtree!=null) : "A TaxTree is required for this operation.";
		}else{
			taxSet=null;
			taxHits=null;
		}
//		assert(false) : (taxHits==null)+", "+contamLevel;
		
		if(printTime){
			t.stop("\nTime for allocation:      \t");
			t.start();
		}
		
		int[] refHitCounts;
		if(params.printRefHits){
			refHitCounts=new int[a.keys.length];
			a.setRefHitCounts(refHitCounts);
		}else{refHitCounts=null;}
		
		for(int i=0; i<a.keys.length; i++){
			long key=a.keys[i];
			final int[] ids=getSketchIdsMap(key, singleton);
			
			if(ids!=null && ids[0]>=0){
				int incr=0;
				for(int id : ids){
					if(id>=0){
						final int trueID=id-1;//Minimum id is 1, indicating sketch 0.
						idMap.increment(trueID);
						if(!allToAll || compareSelf){incr++;}
						if(taxSet!=null){addToTaxSet(trueID, taxSet, contamLevel);}
						if(refHitCounts!=null && trueID!=a.sketchID){refHitCounts[i]++;}
					}
				}
				if(countContamHits && incr>0){abs.increment(i, incr);}
				if(taxSet!=null && taxSet.size()>0){
					taxHits[i]=taxSet.toArray();
					taxSet.clear();
				}
			}
		}
		
		if(printTime){
			t.stop("Time for searching index: \t");
			System.err.println("Size:                   \t"+idMap.size());
			t.start();
		}
		
//		System.err.println("idList.size:"+idList.size);
		final int size=idMap.size();
		if(size==0){return new SketchResults(a);}//null breaks some things
		
		ArrayList<Sketch> list=new ArrayList<Sketch>(Tools.min(8, size));

		final int[] keys=idMap.keys();
		final int[] values=idMap.values();
		
		for(int i=0; i<keys.length; i++){
			int value=values[i];
			if(value>=minHits){
				int id=keys[i];
				final Sketch ref=refSketches.get(id);
//				if((!metaFilter || ref.passesMeta(params)) && (!taxFilter || params.passesFilter(ref))){list.add(ref);}
				list.add(ref);
			}
		}
		if(printTime){
			t.stop("Time for fetching sketches: \t");
		}
		return list.isEmpty() ? new SketchResults(a) : new SketchResults(a, list, taxHits);
	}
	
	/*--------------------------------------------------------------*/
	
	public class IndexThread extends Thread {
		
		public IndexThread(AtomicInteger nextIndex_, AtomicLong keyCount_, AtomicLong uniqueKeyCount_){
			buffer=new HashBuffer(tableArray, 1000, 31, true, false);
			nextIndex=nextIndex_;
			keyCount=keyCount_;
			uniqueKeyCount=uniqueKeyCount_;
		}
		
		@Override
		public void run(){
//			System.err.println("Thread running.");
			int id=nextIndex.getAndIncrement();
			final int numSketches=refSketches.size();
			final int limit0=Tools.min((AUTOSIZE || AUTOSIZE_LINEAR ? Integer.MAX_VALUE : targetSketchSize), indexLimit);
//			System.err.println("numSketches="+numSketches);
			while(id<numSketches){
				final Sketch sk=refSketches.get(id);
				final long[] array=sk.keys;
				final int limit=Tools.min(array.length, limit0);
//				System.err.println("limit="+limit);
				for(int i=0; i<limit; i++){
					long key=array[i];
					buffer.set(key, id+1);//Must be greater than zero
					codesProcessedT++;
				}
				id=nextIndex.getAndIncrement();
			}
			long temp=buffer.flush();
			
			synchronized(this){
				codesProcessedT+=0;
				success=true;
				keyCount.getAndAdd(codesProcessedT);
				uniqueKeyCount.getAndAdd(buffer.uniqueAdded);
//				if(codesProcessedT>0){System.err.println(codesProcessedT);}
			}
		}
		
		AtomicInteger nextIndex;
		AtomicLong keyCount;
		AtomicLong uniqueKeyCount;
		long codesProcessedT=0;
		HashBuffer buffer;
		boolean success=false;
		
	}
	
	/*--------------------------------------------------------------*/
	
	public final KmerTableSet tables;
	public final AbstractKmerTable[] tableArray;
	public final ArrayList<Sketch> refSketches;
	
	public boolean errorState=false;

	private static final boolean printTime=false;
	public static boolean useIntMap=true;
//	public static boolean useIntMapBinary=false;
	public static int intMapSize=1000;
	public static int indexLimit=Integer.MAX_VALUE;
	public static final int WAYS=31;
	
}