annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SketchIndex.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.util.ArrayList;
jpayne@68 4 import java.util.concurrent.atomic.AtomicInteger;
jpayne@68 5 import java.util.concurrent.atomic.AtomicLong;
jpayne@68 6
jpayne@68 7 import kmer.AbstractKmerTable;
jpayne@68 8 import kmer.HashBuffer;
jpayne@68 9 import kmer.KmerTableSet;
jpayne@68 10 import shared.Shared;
jpayne@68 11 import shared.Timer;
jpayne@68 12 import shared.Tools;
jpayne@68 13 import structures.AbstractBitSet;
jpayne@68 14 import structures.IntHashMap;
jpayne@68 15 import structures.IntHashSetList;
jpayne@68 16 import structures.IntList;
jpayne@68 17
jpayne@68 18 public class SketchIndex extends SketchObject {
jpayne@68 19
jpayne@68 20 public SketchIndex(ArrayList<Sketch> refs){
jpayne@68 21 refSketches=refs;
jpayne@68 22 tables=new KmerTableSet(new String[] {"ways="+WAYS, "tabletype="+AbstractKmerTable.ARRAYHF, "prealloc="+(prealloc>0 ? ""+prealloc : "f")},
jpayne@68 23 20+(defaultParams.trackCounts() ? 4 : 0)+18);//An extra 18 bytes per kmer because a lot of them occur multiple times
jpayne@68 24 tables.allocateTables();
jpayne@68 25 tableArray=tables.tables();
jpayne@68 26 }
jpayne@68 27
jpayne@68 28 public void load(){
jpayne@68 29 spawnIndexThreads();
jpayne@68 30 if(useWhitelist){
jpayne@68 31 assert(!Whitelist.exists());
jpayne@68 32 Whitelist.initialize(tableArray);
jpayne@68 33 }
jpayne@68 34 }
jpayne@68 35
jpayne@68 36 /** Spawn index threads */
jpayne@68 37 private void spawnIndexThreads(){
jpayne@68 38
jpayne@68 39 //Do anything necessary prior to processing
jpayne@68 40
jpayne@68 41 //Determine how many threads may be used
jpayne@68 42 final int threads=Shared.threads();
jpayne@68 43 ArrayList<IndexThread> alht=new ArrayList<IndexThread>(threads);
jpayne@68 44 AtomicInteger ai=new AtomicInteger(0);
jpayne@68 45 AtomicLong totalKeys=new AtomicLong(0);
jpayne@68 46 AtomicLong uniqueKeys=new AtomicLong(0);
jpayne@68 47 for(int i=0; i<threads; i++){
jpayne@68 48 alht.add(new IndexThread(ai, totalKeys, uniqueKeys));
jpayne@68 49 }
jpayne@68 50
jpayne@68 51 //Start the threads
jpayne@68 52 for(IndexThread pt : alht){
jpayne@68 53 pt.start();
jpayne@68 54 }
jpayne@68 55
jpayne@68 56 //Wait for completion of all threads
jpayne@68 57 boolean success=true;
jpayne@68 58 long codesProcessed=0;
jpayne@68 59 for(IndexThread pt : alht){
jpayne@68 60
jpayne@68 61 //Wait until this thread has terminated
jpayne@68 62 while(pt.getState()!=Thread.State.TERMINATED){
jpayne@68 63 try {
jpayne@68 64 //Attempt a join operation
jpayne@68 65 pt.join();
jpayne@68 66 synchronized(pt){
jpayne@68 67 codesProcessed+=pt.codesProcessedT;
jpayne@68 68 }
jpayne@68 69 } catch (InterruptedException e) {
jpayne@68 70 //Potentially handle this, if it is expected to occur
jpayne@68 71 e.printStackTrace();
jpayne@68 72 }
jpayne@68 73 }
jpayne@68 74 success&=pt.success;
jpayne@68 75 }
jpayne@68 76
jpayne@68 77 //Track whether any threads failed
jpayne@68 78 if(!success){errorState=true;}
jpayne@68 79
jpayne@68 80 System.err.println("Indexed "+uniqueKeys+" unique and "+totalKeys+" total hashcodes."); //For some reason codesProcessed is nondeterministic.
jpayne@68 81
jpayne@68 82 //Do anything necessary after processing
jpayne@68 83 // System.gc();
jpayne@68 84 }
jpayne@68 85
jpayne@68 86 /*--------------------------------------------------------------*/
jpayne@68 87
jpayne@68 88 public SketchResults getSketches(Sketch a, DisplayParams params){
jpayne@68 89 if(useIntMap){
jpayne@68 90 return getSketchesMap(a, params);
jpayne@68 91 }else{
jpayne@68 92 return getSketchesList(a, params);
jpayne@68 93 }
jpayne@68 94 }
jpayne@68 95
jpayne@68 96 /** Return true if added. */
jpayne@68 97 private boolean addToTaxSet(int sketchID, IntHashSetList taxSet, int taxLevelExtended){
jpayne@68 98 Sketch sk=refSketches.get(sketchID);
jpayne@68 99 int taxID=sk.taxID;
jpayne@68 100 if(taxID<0 || taxID>=minFakeID){return false;}
jpayne@68 101 taxID=taxtree.getIdAtLevelExtended(taxID, taxLevelExtended);
jpayne@68 102 return taxSet.add(taxID);
jpayne@68 103 }
jpayne@68 104
jpayne@68 105 public SketchResults getSketchesList(final Sketch a, DisplayParams params){
jpayne@68 106 final int minHits=params.minHits, contamLevel=params.contamLevel();
jpayne@68 107 final boolean countContamHits=params.needContamCounts();//, metaFilter=params.hasMetaFilters(), taxFilter=params.hasTaxFilters();
jpayne@68 108 final Timer t=(printTime ? new Timer() : null);
jpayne@68 109
jpayne@68 110 final int[] singleton=new int[1];
jpayne@68 111 final IntList idList=new IntList(Tools.min(targetSketchSize, indexLimit, 1000));
jpayne@68 112 AbstractBitSet abs=a.indexBitSet();
jpayne@68 113 assert((abs==null)!=countContamHits);
jpayne@68 114
jpayne@68 115 final IntHashSetList taxSet;
jpayne@68 116 final int[][] taxHits;
jpayne@68 117 if(contamLevel>=0){
jpayne@68 118 taxSet=new IntHashSetList(31);
jpayne@68 119 taxHits=new int[a.length()][];
jpayne@68 120 assert(taxtree!=null) : "A TaxTree is required for this operation.";
jpayne@68 121 }else{
jpayne@68 122 taxSet=null;
jpayne@68 123 taxHits=null;
jpayne@68 124 }
jpayne@68 125
jpayne@68 126 for(int i=0; i<a.keys.length; i++){
jpayne@68 127 long key=a.keys[i];
jpayne@68 128 AbstractKmerTable set=tableArray[(int)(key%WAYS)];
jpayne@68 129 // System.err.println(set.getValue(key));
jpayne@68 130 final int[] ids=set.getValues(key, singleton);
jpayne@68 131 // System.err.println(Arrays.toString(ids));
jpayne@68 132 if(ids!=null && ids[0]>=0){
jpayne@68 133 int incr=0;
jpayne@68 134 for(int id : ids){
jpayne@68 135 if(id>=0){
jpayne@68 136 final int trueID=id-1;//Minimum id is 1, indicating sketch 0.
jpayne@68 137 idList.add(trueID);//Minimum id is 1, indicating sketch 0.
jpayne@68 138 incr++;
jpayne@68 139 if(taxSet!=null){addToTaxSet(trueID, taxSet, contamLevel);}
jpayne@68 140 }
jpayne@68 141 }
jpayne@68 142 if(countContamHits && incr>0){abs.increment(i, incr);}
jpayne@68 143 if(taxSet!=null && taxSet.size()>0){
jpayne@68 144 taxHits[i]=taxSet.toArray();
jpayne@68 145 taxSet.clear();
jpayne@68 146 }
jpayne@68 147 }
jpayne@68 148
jpayne@68 149 }
jpayne@68 150
jpayne@68 151 // assert(abs!=null);
jpayne@68 152 // assert(false) : abs.cardinality();
jpayne@68 153
jpayne@68 154 if(printTime){
jpayne@68 155 t.stop("\nTime for searching index: \t");
jpayne@68 156 t.start();
jpayne@68 157 }
jpayne@68 158
jpayne@68 159 // System.err.println("idList.size:"+idList.size);
jpayne@68 160 if(idList.size<minHits){return new SketchResults(a);}//null breaks some things
jpayne@68 161 idList.sort();
jpayne@68 162
jpayne@68 163 if(printTime){
jpayne@68 164 t.stop("Time for sorting "+idList.size()+" hits:\t");
jpayne@68 165 t.start();
jpayne@68 166 }
jpayne@68 167
jpayne@68 168 ArrayList<Sketch> list=new ArrayList<Sketch>(Tools.min(8, idList.size));
jpayne@68 169
jpayne@68 170 int last=-1;
jpayne@68 171 int hits=0;
jpayne@68 172 for(int i=0; i<idList.size; i++){
jpayne@68 173 int id=idList.get(i);
jpayne@68 174 if(id==last){
jpayne@68 175 // System.err.println("A: "+last+", "+id+", "+count+", "+minHits);
jpayne@68 176 hits++;
jpayne@68 177 }else{
jpayne@68 178 // System.err.println("B: "+last+", "+id+", "+count+", "+minHits);
jpayne@68 179 if(last>-1 && (hits>=minHits)){
jpayne@68 180 final Sketch ref=refSketches.get(last);
jpayne@68 181 list.add(ref);
jpayne@68 182 }
jpayne@68 183 last=id;
jpayne@68 184 hits=0;
jpayne@68 185 }
jpayne@68 186 }
jpayne@68 187 if(last>-1 && (hits>=minHits)){
jpayne@68 188 final Sketch ref=refSketches.get(last);
jpayne@68 189 // if((!metaFilter || ref.passesMeta(params)) && (!taxFilter || params.passesFilter(ref))){list.add(ref);}
jpayne@68 190 list.add(ref);
jpayne@68 191 }
jpayne@68 192 if(printTime){
jpayne@68 193 t.stop("Time for fetching sketches: \t");
jpayne@68 194 }
jpayne@68 195 return list.isEmpty() ? new SketchResults(a) : new SketchResults(a, list, taxHits);
jpayne@68 196 }
jpayne@68 197
jpayne@68 198 // static ThreadLocal<IntHashMap> intMapHolder=new ThreadLocal<IntHashMap>();
jpayne@68 199
jpayne@68 200 public final int[] getSketchIdsMap(long key, int[] singleton){
jpayne@68 201 AbstractKmerTable set=tableArray[(int)(key%WAYS)];
jpayne@68 202 final int[] ids=set.getValues(key, singleton);
jpayne@68 203 return ids;
jpayne@68 204 }
jpayne@68 205
jpayne@68 206 public SketchResults getSketchesMap(final Sketch a, DisplayParams params){
jpayne@68 207 final int minHits=params.minHits, contamLevel=params.contamLevel();
jpayne@68 208 final boolean countContamHits=params.needContamCounts();//, metaFilter=params.hasMetaFilters(), taxFilter=params.hasTaxFilters();
jpayne@68 209 final Timer t=(printTime ? new Timer() : null);
jpayne@68 210 final int[] singleton=new int[1];
jpayne@68 211
jpayne@68 212 final IntHashMap idMap=new IntHashMap(Tools.min(targetSketchSize, indexLimit, intMapSize), 0.7f);
jpayne@68 213
jpayne@68 214 AbstractBitSet abs=a.indexBitSet();
jpayne@68 215 assert((abs==null)!=countContamHits);
jpayne@68 216
jpayne@68 217 final IntHashSetList taxSet;
jpayne@68 218 final int[][] taxHits;
jpayne@68 219 if(contamLevel>=0){
jpayne@68 220 taxSet=new IntHashSetList(31);
jpayne@68 221 taxHits=new int[a.length()][];
jpayne@68 222 assert(taxtree!=null) : "A TaxTree is required for this operation.";
jpayne@68 223 }else{
jpayne@68 224 taxSet=null;
jpayne@68 225 taxHits=null;
jpayne@68 226 }
jpayne@68 227 // assert(false) : (taxHits==null)+", "+contamLevel;
jpayne@68 228
jpayne@68 229 if(printTime){
jpayne@68 230 t.stop("\nTime for allocation: \t");
jpayne@68 231 t.start();
jpayne@68 232 }
jpayne@68 233
jpayne@68 234 int[] refHitCounts;
jpayne@68 235 if(params.printRefHits){
jpayne@68 236 refHitCounts=new int[a.keys.length];
jpayne@68 237 a.setRefHitCounts(refHitCounts);
jpayne@68 238 }else{refHitCounts=null;}
jpayne@68 239
jpayne@68 240 for(int i=0; i<a.keys.length; i++){
jpayne@68 241 long key=a.keys[i];
jpayne@68 242 final int[] ids=getSketchIdsMap(key, singleton);
jpayne@68 243
jpayne@68 244 if(ids!=null && ids[0]>=0){
jpayne@68 245 int incr=0;
jpayne@68 246 for(int id : ids){
jpayne@68 247 if(id>=0){
jpayne@68 248 final int trueID=id-1;//Minimum id is 1, indicating sketch 0.
jpayne@68 249 idMap.increment(trueID);
jpayne@68 250 if(!allToAll || compareSelf){incr++;}
jpayne@68 251 if(taxSet!=null){addToTaxSet(trueID, taxSet, contamLevel);}
jpayne@68 252 if(refHitCounts!=null && trueID!=a.sketchID){refHitCounts[i]++;}
jpayne@68 253 }
jpayne@68 254 }
jpayne@68 255 if(countContamHits && incr>0){abs.increment(i, incr);}
jpayne@68 256 if(taxSet!=null && taxSet.size()>0){
jpayne@68 257 taxHits[i]=taxSet.toArray();
jpayne@68 258 taxSet.clear();
jpayne@68 259 }
jpayne@68 260 }
jpayne@68 261 }
jpayne@68 262
jpayne@68 263 if(printTime){
jpayne@68 264 t.stop("Time for searching index: \t");
jpayne@68 265 System.err.println("Size: \t"+idMap.size());
jpayne@68 266 t.start();
jpayne@68 267 }
jpayne@68 268
jpayne@68 269 // System.err.println("idList.size:"+idList.size);
jpayne@68 270 final int size=idMap.size();
jpayne@68 271 if(size==0){return new SketchResults(a);}//null breaks some things
jpayne@68 272
jpayne@68 273 ArrayList<Sketch> list=new ArrayList<Sketch>(Tools.min(8, size));
jpayne@68 274
jpayne@68 275 final int[] keys=idMap.keys();
jpayne@68 276 final int[] values=idMap.values();
jpayne@68 277
jpayne@68 278 for(int i=0; i<keys.length; i++){
jpayne@68 279 int value=values[i];
jpayne@68 280 if(value>=minHits){
jpayne@68 281 int id=keys[i];
jpayne@68 282 final Sketch ref=refSketches.get(id);
jpayne@68 283 // if((!metaFilter || ref.passesMeta(params)) && (!taxFilter || params.passesFilter(ref))){list.add(ref);}
jpayne@68 284 list.add(ref);
jpayne@68 285 }
jpayne@68 286 }
jpayne@68 287 if(printTime){
jpayne@68 288 t.stop("Time for fetching sketches: \t");
jpayne@68 289 }
jpayne@68 290 return list.isEmpty() ? new SketchResults(a) : new SketchResults(a, list, taxHits);
jpayne@68 291 }
jpayne@68 292
jpayne@68 293 /*--------------------------------------------------------------*/
jpayne@68 294
jpayne@68 295 public class IndexThread extends Thread {
jpayne@68 296
jpayne@68 297 public IndexThread(AtomicInteger nextIndex_, AtomicLong keyCount_, AtomicLong uniqueKeyCount_){
jpayne@68 298 buffer=new HashBuffer(tableArray, 1000, 31, true, false);
jpayne@68 299 nextIndex=nextIndex_;
jpayne@68 300 keyCount=keyCount_;
jpayne@68 301 uniqueKeyCount=uniqueKeyCount_;
jpayne@68 302 }
jpayne@68 303
jpayne@68 304 @Override
jpayne@68 305 public void run(){
jpayne@68 306 // System.err.println("Thread running.");
jpayne@68 307 int id=nextIndex.getAndIncrement();
jpayne@68 308 final int numSketches=refSketches.size();
jpayne@68 309 final int limit0=Tools.min((AUTOSIZE || AUTOSIZE_LINEAR ? Integer.MAX_VALUE : targetSketchSize), indexLimit);
jpayne@68 310 // System.err.println("numSketches="+numSketches);
jpayne@68 311 while(id<numSketches){
jpayne@68 312 final Sketch sk=refSketches.get(id);
jpayne@68 313 final long[] array=sk.keys;
jpayne@68 314 final int limit=Tools.min(array.length, limit0);
jpayne@68 315 // System.err.println("limit="+limit);
jpayne@68 316 for(int i=0; i<limit; i++){
jpayne@68 317 long key=array[i];
jpayne@68 318 buffer.set(key, id+1);//Must be greater than zero
jpayne@68 319 codesProcessedT++;
jpayne@68 320 }
jpayne@68 321 id=nextIndex.getAndIncrement();
jpayne@68 322 }
jpayne@68 323 long temp=buffer.flush();
jpayne@68 324
jpayne@68 325 synchronized(this){
jpayne@68 326 codesProcessedT+=0;
jpayne@68 327 success=true;
jpayne@68 328 keyCount.getAndAdd(codesProcessedT);
jpayne@68 329 uniqueKeyCount.getAndAdd(buffer.uniqueAdded);
jpayne@68 330 // if(codesProcessedT>0){System.err.println(codesProcessedT);}
jpayne@68 331 }
jpayne@68 332 }
jpayne@68 333
jpayne@68 334 AtomicInteger nextIndex;
jpayne@68 335 AtomicLong keyCount;
jpayne@68 336 AtomicLong uniqueKeyCount;
jpayne@68 337 long codesProcessedT=0;
jpayne@68 338 HashBuffer buffer;
jpayne@68 339 boolean success=false;
jpayne@68 340
jpayne@68 341 }
jpayne@68 342
jpayne@68 343 /*--------------------------------------------------------------*/
jpayne@68 344
jpayne@68 345 public final KmerTableSet tables;
jpayne@68 346 public final AbstractKmerTable[] tableArray;
jpayne@68 347 public final ArrayList<Sketch> refSketches;
jpayne@68 348
jpayne@68 349 public boolean errorState=false;
jpayne@68 350
jpayne@68 351 private static final boolean printTime=false;
jpayne@68 352 public static boolean useIntMap=true;
jpayne@68 353 // public static boolean useIntMapBinary=false;
jpayne@68 354 public static int intMapSize=1000;
jpayne@68 355 public static int indexLimit=Integer.MAX_VALUE;
jpayne@68 356 public static final int WAYS=31;
jpayne@68 357
jpayne@68 358 }