annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SketchSearcher.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.util.ArrayList;
jpayne@68 5 import java.util.Collections;
jpayne@68 6 import java.util.LinkedHashSet;
jpayne@68 7 import java.util.Set;
jpayne@68 8 import java.util.concurrent.ConcurrentHashMap;
jpayne@68 9 import java.util.concurrent.atomic.AtomicInteger;
jpayne@68 10 import java.util.concurrent.atomic.AtomicLong;
jpayne@68 11
jpayne@68 12 import shared.Parse;
jpayne@68 13 import shared.Shared;
jpayne@68 14 import shared.Tools;
jpayne@68 15 import structures.AbstractBitSet;
jpayne@68 16 import structures.ByteBuilder;
jpayne@68 17 import structures.Heap;
jpayne@68 18 import structures.IntHashMap;
jpayne@68 19 import tax.TaxNode;
jpayne@68 20 import tax.TaxTree;
jpayne@68 21
jpayne@68 22 public class SketchSearcher extends SketchObject {
jpayne@68 23
jpayne@68 24 public SketchSearcher(){
jpayne@68 25
jpayne@68 26 }
jpayne@68 27
jpayne@68 28 public boolean parse(String arg, String a, String b, boolean addFileIfNotFound){
jpayne@68 29
jpayne@68 30 // System.err.println("Parsing "+arg+"; ref="+refFiles); //123
jpayne@68 31
jpayne@68 32 if(parseSketchFlags(arg, a, b)){
jpayne@68 33 //Do nothing
jpayne@68 34 }else if(defaultParams.parse(arg, a, b)){
jpayne@68 35 //Do nothing
jpayne@68 36 }else if(a.equals("verbose")){
jpayne@68 37 verbose=Parse.parseBoolean(b);
jpayne@68 38 }else if(a.equals("ref")){
jpayne@68 39 addRefFiles(b);
jpayne@68 40 }else if(arg.equalsIgnoreCase("nt") || arg.equalsIgnoreCase("RefSeq") || arg.equalsIgnoreCase("refseqbig") || arg.equalsIgnoreCase("nr")
jpayne@68 41 || arg.equalsIgnoreCase("img") || arg.equalsIgnoreCase("silva") || arg.equalsIgnoreCase("ribo")
jpayne@68 42 || arg.equalsIgnoreCase("mito") || arg.equalsIgnoreCase("fungi")
jpayne@68 43 || arg.equalsIgnoreCase("prokprot") || arg.equalsIgnoreCase("prokprotbig") || arg.equalsIgnoreCase("protein") ||
jpayne@68 44 arg.equalsIgnoreCase("protien") || a.equalsIgnoreCase("prot")){
jpayne@68 45 addRefFiles(arg);
jpayne@68 46 }else if(a.equals("threads") || a.equals("sketchthreads") || a.equals("t")){
jpayne@68 47 threads=Integer.parseInt(b);
jpayne@68 48 }
jpayne@68 49
jpayne@68 50 else if(a.equalsIgnoreCase("minLevelExtended") || a.equalsIgnoreCase("minLevel")){
jpayne@68 51 minLevelExtended=TaxTree.parseLevelExtended(b);
jpayne@68 52 }else if(a.equals("index") || a.equals("makeindex")){
jpayne@68 53 if(b!=null && "auto".equalsIgnoreCase(b)){
jpayne@68 54 autoIndex=true;
jpayne@68 55 makeIndex=true;
jpayne@68 56 }else{
jpayne@68 57 autoIndex=false;
jpayne@68 58 makeIndex=Parse.parseBoolean(b);
jpayne@68 59 }
jpayne@68 60 }else if(a.equals("indexsize") || a.equals("indexlimit")){
jpayne@68 61 SketchIndex.indexLimit=Integer.parseInt(b);
jpayne@68 62 }
jpayne@68 63
jpayne@68 64 else if(b==null && arg.indexOf('=')<0 && addFileIfNotFound && (arg.indexOf(',')>=0 || new File(arg).exists())){
jpayne@68 65 addRefFiles(arg);
jpayne@68 66 }else{
jpayne@68 67 return false;
jpayne@68 68 }
jpayne@68 69 // System.err.println("Parsed "+arg+"; ref="+refFiles); //123
jpayne@68 70 return true;
jpayne@68 71 }
jpayne@68 72
jpayne@68 73 public boolean compare(ArrayList<Sketch> querySketches, ByteBuilder sb, DisplayParams params, int maxThreads){
jpayne@68 74 assert(params.postParsed);
jpayne@68 75 final boolean json=params.json();
jpayne@68 76 ConcurrentHashMap<Integer, Comparison> map=new ConcurrentHashMap<Integer, Comparison>();
jpayne@68 77
jpayne@68 78 SketchResults[] alca=new SketchResults[querySketches.size()];
jpayne@68 79
jpayne@68 80 if(verbose2){System.err.println("At compare.");}
jpayne@68 81
jpayne@68 82 boolean success=true;
jpayne@68 83 final CompareBuffer buffer=new CompareBuffer(false);
jpayne@68 84 AtomicInteger fakeID=new AtomicInteger(minFakeID);
jpayne@68 85 for(int i=0; i<querySketches.size(); i++){
jpayne@68 86 fakeID.set(minFakeID);
jpayne@68 87 Sketch a=querySketches.get(i);
jpayne@68 88
jpayne@68 89 SketchResults results=processSketch(a, buffer, fakeID, map, params, maxThreads);
jpayne@68 90 a.clearRefHitCounts();
jpayne@68 91 alca[i]=results;
jpayne@68 92 // System.out.println(a.present);
jpayne@68 93 }
jpayne@68 94
jpayne@68 95 if(verbose2){System.err.println("Made results.");}
jpayne@68 96
jpayne@68 97 for(int i=0; i<alca.length; i++){
jpayne@68 98 // Sketch s=sketches.get(i);
jpayne@68 99 SketchResults results=alca[i];
jpayne@68 100
jpayne@68 101 if(json && alca.length>1 && i==0){
jpayne@68 102 sb.append('[');
jpayne@68 103 }
jpayne@68 104
jpayne@68 105 sb.append(results.toText(params));
jpayne@68 106
jpayne@68 107 if(json && alca.length>1){
jpayne@68 108 if(i<alca.length-1){
jpayne@68 109 sb.append(',');
jpayne@68 110 }else{
jpayne@68 111 sb.append(']');
jpayne@68 112 }
jpayne@68 113 }
jpayne@68 114 }
jpayne@68 115 return success;
jpayne@68 116 }
jpayne@68 117
jpayne@68 118 private class CompareThread extends Thread {
jpayne@68 119
jpayne@68 120 CompareThread(Sketch a_, ArrayList<Sketch> localRefSketches_, int pid_, int incr_,
jpayne@68 121 AtomicInteger fakeID_, ConcurrentHashMap<Integer, Comparison> map_, DisplayParams params_){
jpayne@68 122 a=a_;
jpayne@68 123 pid=pid_;
jpayne@68 124 incr=incr_;
jpayne@68 125 fakeID=fakeID_;
jpayne@68 126 map=map_;
jpayne@68 127 params=params_;
jpayne@68 128 localRefSketches=localRefSketches_;
jpayne@68 129 buffer=new CompareBuffer(params.needContamCounts());
jpayne@68 130 if(buffer.cbs!=null){buffer.cbs.setCapacity(a.length(), 0);}
jpayne@68 131 }
jpayne@68 132
jpayne@68 133 @Override
jpayne@68 134 public void run(){
jpayne@68 135 if(a.length()<1 || a.length()<params.minHits || (params.requireSSU && !a.hasSSU())){return;}//TODO: Change to 'require16S'
jpayne@68 136 assert(a.compareBitSet()==null || buffer.cbs!=null) : (a.compareBitSet()==null)+", "+(buffer.cbs==null); //Unsafe to use a.cbs multithreaded unless atomic
jpayne@68 137 final AbstractBitSet cbs=(buffer.cbs==null ? a.compareBitSet() : buffer.cbs);
jpayne@68 138 for(int i=pid; i<localRefSketches.size(); i+=incr){
jpayne@68 139 Sketch b=localRefSketches.get(i);
jpayne@68 140 if(params.passesFilter(b)){
jpayne@68 141 processPair(a, b, buffer, cbs, fakeID, map, params);
jpayne@68 142 localComparisons++;
jpayne@68 143 }
jpayne@68 144 }
jpayne@68 145 comparisons.getAndAdd(localComparisons);
jpayne@68 146 }
jpayne@68 147
jpayne@68 148 final AtomicInteger fakeID;
jpayne@68 149 final ConcurrentHashMap<Integer, Comparison> map;
jpayne@68 150 final CompareBuffer buffer;
jpayne@68 151 final int incr;
jpayne@68 152 final int pid;
jpayne@68 153 final Sketch a;
jpayne@68 154 final DisplayParams params;
jpayne@68 155 final ArrayList<Sketch> localRefSketches;
jpayne@68 156 long localComparisons=0;
jpayne@68 157
jpayne@68 158 }
jpayne@68 159
jpayne@68 160 public SketchResults processSketch(Sketch a, CompareBuffer buffer, AtomicInteger fakeID,
jpayne@68 161 ConcurrentHashMap<Integer, Comparison> map, DisplayParams params, int maxThreads){
jpayne@68 162 if(a.length()<1 || a.length()<params.minHits || (params.requireSSU && !a.hasSSU())){return new SketchResults(a);}
jpayne@68 163 // Timer t=new Timer();
jpayne@68 164 // t.start("Began query.");
jpayne@68 165 assert(a.compareBitSet()==null);
jpayne@68 166 assert(a.indexBitSet()==null);
jpayne@68 167
jpayne@68 168 if(verbose2){System.err.println("At processSketch 1");} //123
jpayne@68 169
jpayne@68 170 a.makeBitSets(params.needContamCounts(), index!=null);
jpayne@68 171
jpayne@68 172 final SketchResults sr;
jpayne@68 173 if(index!=null){
jpayne@68 174 sr=index.getSketches(a, params);
jpayne@68 175 }else{
jpayne@68 176 sr=new SketchResults(a, refSketches, null);
jpayne@68 177 }
jpayne@68 178
jpayne@68 179 if(verbose2){System.err.println("At processSketch 2");} //123
jpayne@68 180
jpayne@68 181 if(sr==null || sr.refSketchList==null || sr.refSketchList.isEmpty()){
jpayne@68 182 if(verbose2){System.err.println("At processSketch 2.0");} //123
jpayne@68 183 return sr;
jpayne@68 184 }
jpayne@68 185
jpayne@68 186 if(verbose2){System.err.println("At processSketch 2.1");} //123
jpayne@68 187
jpayne@68 188 if(verbose2){System.err.println("At processSketch 2.2");} //123
jpayne@68 189
jpayne@68 190 if(maxThreads>1 && Shared.threads()>1 && sr.refSketchList.size()>31){
jpayne@68 191 if(verbose2){System.err.println("At processSketch 2.3");} //123
jpayne@68 192 assert((buffer.cbs==null)==(params.needContamCounts()));
jpayne@68 193 spawnThreads(a, sr.refSketchList, fakeID, map, params, maxThreads);
jpayne@68 194 if(verbose2){System.err.println("At processSketch 2.4");} //123
jpayne@68 195 }else{
jpayne@68 196 if(verbose2){System.err.println("At processSketch 2.5");} //123
jpayne@68 197 assert(buffer.cbs==null);
jpayne@68 198 long comp=0;
jpayne@68 199 for(Sketch b : sr.refSketchList){
jpayne@68 200 if(params.passesFilter(b)){
jpayne@68 201 comp++;
jpayne@68 202 processPair(a, b, buffer, a.compareBitSet(), /*sr.taxHits,*/ fakeID, map, params);
jpayne@68 203 }
jpayne@68 204 }
jpayne@68 205 comparisons.getAndAdd(comp);
jpayne@68 206 if(verbose2){System.err.println("At processSketch 2.6");} //123
jpayne@68 207 }
jpayne@68 208 if(verbose2){System.err.println("At processSketch 3");} //123
jpayne@68 209
jpayne@68 210 sr.addMap(map, params, buffer);
jpayne@68 211
jpayne@68 212 fakeID.set(minFakeID);
jpayne@68 213 map.clear();
jpayne@68 214 if(verbose2){System.err.println("At processSketch 4");} //123
jpayne@68 215 a.clearRefHitCounts();
jpayne@68 216
jpayne@68 217 return sr;
jpayne@68 218 }
jpayne@68 219
jpayne@68 220 //For remote homology
jpayne@68 221 boolean passesTax(Sketch q, Sketch ref){
jpayne@68 222 assert(minLevelExtended>=0);
jpayne@68 223 final int qid=q.taxID;
jpayne@68 224 if(qid<0 || qid>=minFakeID){return false;}
jpayne@68 225 TaxNode qtn=taxtree.getNode(qid);
jpayne@68 226 if(qtn==null){return false;}
jpayne@68 227 if(qtn.levelExtended>minLevelExtended){return false;}
jpayne@68 228 final int rid=(ref==null ? -1 : ref.taxID);
jpayne@68 229 if(rid>=0 && rid<minFakeID){
jpayne@68 230 TaxNode rtn=taxtree.getNode(rid);
jpayne@68 231 if(rtn!=null && rtn.levelExtended<=minLevelExtended){
jpayne@68 232 TaxNode ancestor=taxtree.commonAncestor(qtn, rtn);
jpayne@68 233 if(ancestor!=null && ancestor.levelExtended>=minLevelExtended){
jpayne@68 234 return true;
jpayne@68 235 }
jpayne@68 236 }
jpayne@68 237 }
jpayne@68 238 return false;
jpayne@68 239 }
jpayne@68 240
jpayne@68 241 private void spawnThreads(Sketch a, ArrayList<Sketch> refs, AtomicInteger fakeID,
jpayne@68 242 ConcurrentHashMap<Integer, Comparison> map, DisplayParams params, int maxThreads){
jpayne@68 243 final int toSpawn=Tools.max(1, Tools.min((refs.size()+7)/8, threads, maxThreads, Shared.threads()));
jpayne@68 244 ArrayList<CompareThread> alct=new ArrayList<CompareThread>(toSpawn);
jpayne@68 245 if(verbose2){System.err.println("At spawnThreads");} //123
jpayne@68 246 for(int t=0; t<toSpawn; t++){
jpayne@68 247 alct.add(new CompareThread(a, refs, t, toSpawn, fakeID, map, params));
jpayne@68 248 }
jpayne@68 249 for(CompareThread ct : alct){ct.start();}
jpayne@68 250 for(CompareThread ct : alct){
jpayne@68 251
jpayne@68 252 //Wait until this thread has terminated
jpayne@68 253 while(ct.getState()!=Thread.State.TERMINATED){
jpayne@68 254 try {
jpayne@68 255 //Attempt a join operation
jpayne@68 256 ct.join();
jpayne@68 257 } catch (InterruptedException e) {
jpayne@68 258 e.printStackTrace();
jpayne@68 259 }
jpayne@68 260 }
jpayne@68 261 }
jpayne@68 262 if(params.needContamCounts()){
jpayne@68 263 for(CompareThread ct : alct){
jpayne@68 264 if(ct.buffer.cbs==null){
jpayne@68 265 assert((AUTOSIZE || AUTOSIZE_LINEAR) && index!=null);//Not really what this does
jpayne@68 266 break;
jpayne@68 267 }
jpayne@68 268 a.addToBitSet(ct.buffer.cbs);
jpayne@68 269 }
jpayne@68 270 }
jpayne@68 271 a.clearRefHitCounts();
jpayne@68 272 alct=null;
jpayne@68 273 }
jpayne@68 274
jpayne@68 275 // private void writeResults(ArrayList<Comparison> al, Sketch s, StringBuilder sb){
jpayne@68 276 // sb.append("\nResults for "+s.name()+":\n\n");
jpayne@68 277 //
jpayne@68 278 // ArrayList<TaxNode> tnl=new ArrayList<TaxNode>();
jpayne@68 279 // for(Comparison c : al){
jpayne@68 280 // formatComparison(c, format, sb, printTax);
jpayne@68 281 // }
jpayne@68 282 // }
jpayne@68 283
jpayne@68 284 boolean processPair(Sketch a, Sketch b, CompareBuffer buffer, AbstractBitSet abs,
jpayne@68 285 AtomicInteger fakeID, ConcurrentHashMap<Integer, Comparison> map, DisplayParams params){
jpayne@68 286 // System.err.println("Comparing "+a.name()+" and "+b.name());
jpayne@68 287 assert(!params.printRefHits || a.refHitCounts()!=null || !SketchObject.makeIndex);
jpayne@68 288
jpayne@68 289
jpayne@68 290 if(b.genomeSizeBases<params.minBases){return false;}
jpayne@68 291 if(minLevelExtended>-1 && !passesTax(a, b)){return false;}
jpayne@68 292 if(params.minSizeRatio>0){
jpayne@68 293 long sea=a.genomeSizeEstimate();
jpayne@68 294 long seb=b.genomeSizeEstimate();
jpayne@68 295 if(Tools.min(sea, seb)<params.minSizeRatio*Tools.max(sea, seb)){return false;}
jpayne@68 296 }
jpayne@68 297 Comparison c=compareOneToOne(a, b, buffer, abs, /*taxHits, params.contamLevel(),*/ params.minHits, params.minWKID, params.minANI, params.requireSSU, null);
jpayne@68 298 if(c==null){return false;}
jpayne@68 299 if(c.taxID()<1){c.taxID=fakeID.getAndIncrement();}
jpayne@68 300
jpayne@68 301 // System.err.println("TID: "+c.taxID()+", "+fakeID);
jpayne@68 302
jpayne@68 303 TaxNode tn=(taxtree==null ? null : taxtree.getNode(b.taxID));
jpayne@68 304 if(tn!=null){
jpayne@68 305 c.taxName=tn.name;
jpayne@68 306 if(tn.level<params.taxLevel){
jpayne@68 307 TaxNode tn2=taxtree.getNodeAtLevel(b.taxID, params.taxLevel);
jpayne@68 308 tn=tn2;
jpayne@68 309 }
jpayne@68 310 }
jpayne@68 311 Integer key=(tn==null ? c.taxID : tn.id);
jpayne@68 312
jpayne@68 313 Comparison old=map.get(key);
jpayne@68 314 // System.err.println("A. Old: "+(old==null ? 0 : old.hits)+", new: "+c.hits);
jpayne@68 315 if(old!=null && params.compare(old, c)>0){return false;}
jpayne@68 316
jpayne@68 317 old=map.put(key, c);
jpayne@68 318 while(old!=null && params.compare(old, c)>0){
jpayne@68 319 // System.err.println("B. Old: "+(old==null ? 0 : old.hits)+", new: "+c.hits);
jpayne@68 320 c=old;
jpayne@68 321 old=map.put(key, c);
jpayne@68 322 }
jpayne@68 323 return true;
jpayne@68 324 }
jpayne@68 325
jpayne@68 326 // //TODO: Interestingly, the heap never seems to be created by anything... not sure what it's for.
jpayne@68 327 // private static Comparison compareOneToOne(final Sketch a, final Sketch b, CompareBuffer buffer, AbstractBitSet abs,
jpayne@68 328 // int minHits, float minWKID, float minANI, boolean aniFromWKID, Heap<Comparison> heap){
jpayne@68 329 //// assert(heap!=null); //Optional, for testing.
jpayne@68 330 // if(a==b && !compareSelf){return null;}
jpayne@68 331 // final int matches=a.countMatches(b, buffer, abs, true/*!makeIndex || !AUTOSIZE*/, null, -1);
jpayne@68 332 // assert(matches==buffer.hits());
jpayne@68 333 // if(matches<minHits){return null;}
jpayne@68 334 //// asdf //TODO: handle k1 and k2 WKIDs here.
jpayne@68 335 // {
jpayne@68 336 //// final int div=aniFromWKID ? buffer.minDivisor() : buffer.maxDivisor();
jpayne@68 337 //// final float xkid=matches/(float)div;//This could be kid or wkid at this point...
jpayne@68 338 //// if(xkid<minWKID){return null;}
jpayne@68 339 //
jpayne@68 340 // final int div=aniFromWKID ? buffer.minDivisor() : buffer.maxDivisor();
jpayne@68 341 // final float xkid=matches/(float)div;//This could be kid or wkid at this point...
jpayne@68 342 // if(xkid<minWKID){return null;}
jpayne@68 343 //
jpayne@68 344 // //TODO (?) This is only necessary because of the order of setting minwkid and minani.
jpayne@68 345 // //minWKID can be deterministically determined from minANI so if it is set correctly this can be skipped.
jpayne@68 346 // if(minANI>0){
jpayne@68 347 // final float ani=wkidToAni(xkid, a.k1Fraction());
jpayne@68 348 // if(ani<minANI){return null;}
jpayne@68 349 // }
jpayne@68 350 // }
jpayne@68 351 //
jpayne@68 352 // if(heap!=null && !heap.hasRoom() && heap.peek().hits()>matches){return null;} //TODO: Should be based on score
jpayne@68 353 //
jpayne@68 354 //// System.err.print("*");
jpayne@68 355 // Comparison c=new Comparison(buffer, a, b);
jpayne@68 356 // if(heap==null || heap.add(c)){return c;}
jpayne@68 357 // return null;
jpayne@68 358 // }
jpayne@68 359
jpayne@68 360 //TODO: Interestingly, the heap never seems to be created by anything... not sure what it's for.
jpayne@68 361 private static Comparison compareOneToOne(final Sketch a, final Sketch b, CompareBuffer buffer, AbstractBitSet abs,
jpayne@68 362 int minHits, float minWKID, float minANI, boolean requireSSU, Heap<Comparison> heap){
jpayne@68 363 // assert(heap!=null); //Optional, for testing.
jpayne@68 364 // assert(a.refHitCounts!=null);
jpayne@68 365 if(a==b && !compareSelf){return null;}
jpayne@68 366 if(requireSSU && !a.sharesSSU(b)){return null;}
jpayne@68 367 final int matches=a.countMatches(b, buffer, abs, true/*!makeIndex || !AUTOSIZE*/, null, -1);
jpayne@68 368 assert(matches==buffer.hits());
jpayne@68 369 if(matches<minHits){return null;}
jpayne@68 370
jpayne@68 371 {
jpayne@68 372 final float wkid=buffer.wkid();
jpayne@68 373 if(wkid<minWKID){return null;}
jpayne@68 374
jpayne@68 375 if(minANI>0){
jpayne@68 376 final float ani=buffer.ani();
jpayne@68 377 if(ani<minANI){return null;}
jpayne@68 378 }
jpayne@68 379 }
jpayne@68 380
jpayne@68 381 if(heap!=null && !heap.hasRoom() && heap.peek().hits()>matches){return null;} //TODO: Should be based on score
jpayne@68 382
jpayne@68 383 // System.err.print("*");
jpayne@68 384 Comparison c=new Comparison(buffer, a, b);
jpayne@68 385 if(heap==null || heap.add(c)){return c;}
jpayne@68 386 return null;
jpayne@68 387 }
jpayne@68 388
jpayne@68 389 public void addRefFiles(String a){
jpayne@68 390 if(a.equalsIgnoreCase("nr")){
jpayne@68 391 addRefFiles(NR_PATH());
jpayne@68 392 if(blacklist==null){blacklist=Blacklist.nrBlacklist();}
jpayne@68 393 if(defaultParams.dbName==null){defaultParams.dbName="nr";}
jpayne@68 394 if(!setK){k=defaultKAmino; k2=defaultK2Amino;}
jpayne@68 395 }else if(a.equalsIgnoreCase("nt")){
jpayne@68 396 addRefFiles(NT_PATH());
jpayne@68 397 if(blacklist==null){blacklist=Blacklist.ntBlacklist();}
jpayne@68 398 if(defaultParams.dbName==null){defaultParams.dbName="nt";}
jpayne@68 399 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 400 }else if(a.equalsIgnoreCase("refseq")){
jpayne@68 401 addRefFiles(REFSEQ_PATH());
jpayne@68 402 if(blacklist==null){blacklist=Blacklist.refseqBlacklist();}
jpayne@68 403 if(defaultParams.dbName==null){defaultParams.dbName="RefSeq";}
jpayne@68 404 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 405 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=2.0f;}
jpayne@68 406 }else if(a.equalsIgnoreCase("refseqbig")){
jpayne@68 407 addRefFiles(REFSEQ_PATH_BIG());
jpayne@68 408 if(blacklist==null){blacklist=Blacklist.refseqBlacklist();}
jpayne@68 409 if(defaultParams.dbName==null){defaultParams.dbName="RefSeq";}
jpayne@68 410 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 411 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=4.5f;}
jpayne@68 412 }else if(a.equalsIgnoreCase("silva")){
jpayne@68 413 // TaxTree.SILVA_MODE=Parse.parseBoolean(b);
jpayne@68 414 addRefFiles(SILVA_PATH());
jpayne@68 415 if(blacklist==null){blacklist=Blacklist.silvaBlacklist();}
jpayne@68 416 if(defaultParams.dbName==null){defaultParams.dbName="Silva";}
jpayne@68 417 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 418 }else if(a.equalsIgnoreCase("img")){
jpayne@68 419 addRefFiles(IMG_PATH());
jpayne@68 420 if(blacklist==null){blacklist=Blacklist.imgBlacklist();}
jpayne@68 421 if(defaultParams.dbName==null){defaultParams.dbName="IMG";}
jpayne@68 422 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 423 }else if(a.equalsIgnoreCase("prokprot") || a.equalsIgnoreCase("protein")){
jpayne@68 424 addRefFiles(PROKPROT_PATH());
jpayne@68 425 if(blacklist==null){blacklist=Blacklist.prokProtBlacklist();}
jpayne@68 426 if(defaultParams.dbName==null){defaultParams.dbName="ProkProt";}
jpayne@68 427 if(!setK){k=defaultKAmino; k2=defaultK2Amino;}
jpayne@68 428 if(!amino && !translate) {
jpayne@68 429 translate=true;
jpayne@68 430 System.err.println("Setting translate to true because a protein dataset is being used.");
jpayne@68 431 }
jpayne@68 432 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=3.0f;}
jpayne@68 433 }else if(a.equalsIgnoreCase("prokprotbig") || a.equalsIgnoreCase("proteinbig")){
jpayne@68 434 addRefFiles(PROKPROT_PATH_BIG());
jpayne@68 435 if(blacklist==null){blacklist=Blacklist.prokProtBlacklist();}
jpayne@68 436 if(defaultParams.dbName==null){defaultParams.dbName="ProkProt";}
jpayne@68 437 if(!setK){k=defaultKAmino; k2=defaultK2Amino;}
jpayne@68 438 if(!amino && !translate) {
jpayne@68 439 translate=true;
jpayne@68 440 System.err.println("Setting translate to true because a protein dataset is being used.");
jpayne@68 441 }
jpayne@68 442 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=7.5f;}
jpayne@68 443 }else if(a.equalsIgnoreCase("mito") || a.equalsIgnoreCase("refseqmito")){
jpayne@68 444 addRefFiles(MITO_PATH());
jpayne@68 445 if(blacklist==null){blacklist=Blacklist.mitoBlacklist();}
jpayne@68 446 if(defaultParams.dbName==null){defaultParams.dbName="RefSeqMito";}
jpayne@68 447 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 448 }else if(a.equalsIgnoreCase("fungi") || a.equalsIgnoreCase("refseqfungi")){
jpayne@68 449 addRefFiles(FUNGI_PATH());
jpayne@68 450 if(blacklist==null){blacklist=Blacklist.fungiBlacklist();}
jpayne@68 451 if(defaultParams.dbName==null){defaultParams.dbName="RefSeqFungi";}
jpayne@68 452 if(!setK){k=defaultK; k2=defaultK2;}
jpayne@68 453 }else{
jpayne@68 454 addFiles(a, refFiles);
jpayne@68 455 }
jpayne@68 456 }
jpayne@68 457
jpayne@68 458 static void addFiles(String a, Set<String> list){
jpayne@68 459 if(a==null){return;}
jpayne@68 460 File f=new File(a);
jpayne@68 461 assert(!list.contains(a)) : "Duplicate file "+a;
jpayne@68 462
jpayne@68 463 if(f.exists()){
jpayne@68 464 list.add(a);
jpayne@68 465 }else if(a.indexOf(',')>0){
jpayne@68 466 for(String s : a.split(",")){addFiles(s, list);}
jpayne@68 467 }else if(a.indexOf('#')>=0 && new File(a.replaceFirst("#", "0")).exists()){
jpayne@68 468 for(int i=0; true; i++){
jpayne@68 469 String temp=a.replaceFirst("#", ""+i);
jpayne@68 470 if(!new File(temp).exists()){break;}
jpayne@68 471 list.add(temp);
jpayne@68 472 }
jpayne@68 473 }else{
jpayne@68 474 list.add(a);
jpayne@68 475 }
jpayne@68 476 }
jpayne@68 477
jpayne@68 478 public void makeIndex(){
jpayne@68 479 assert(index==null);
jpayne@68 480 index=new SketchIndex(refSketches);
jpayne@68 481 index.load();
jpayne@68 482 }
jpayne@68 483
jpayne@68 484 public void loadReferences(int mode_, DisplayParams params){
jpayne@68 485 loadReferences(mode_, params.minKeyOccuranceCount, params.minEntropy, params.minProb, params.minQual);
jpayne@68 486 }
jpayne@68 487
jpayne@68 488 public void loadReferences(int mode_, int minKeyOccuranceCount, float minEntropy, float minProb, byte minQual) {
jpayne@68 489 makeTool(minKeyOccuranceCount, false, false);
jpayne@68 490 refSketches=tool.loadSketches_MT(mode_, 1f, -1, minEntropy, minProb, minQual, refFiles);
jpayne@68 491 assert(refSketches!=null) : refFiles;
jpayne@68 492 if(mode_==PER_FILE){
jpayne@68 493 Collections.sort(refSketches, SketchIdComparator.comparator);
jpayne@68 494 }
jpayne@68 495 taxIDToSketchIDMap=new IntHashMap(Tools.max(3, (int)(refSketches.size()*1.2f)));
jpayne@68 496 for(int i=0; i<refSketches.size(); i++){
jpayne@68 497 Sketch sk=refSketches.get(i);
jpayne@68 498 if(sk!=null && sk.taxID>0){
jpayne@68 499 taxIDToSketchIDMap.set(sk.taxID, i);
jpayne@68 500 }
jpayne@68 501 }
jpayne@68 502 // System.err.println("Sketches: "+refSketches.get(0).name());
jpayne@68 503 if(makeIndex){
jpayne@68 504 makeIndex();
jpayne@68 505 }
jpayne@68 506 }
jpayne@68 507
jpayne@68 508 public void makeTool(int minKeyOccuranceCount, boolean trackCounts, boolean mergePairs){
jpayne@68 509 if(tool==null){
jpayne@68 510 tool=new SketchTool(targetSketchSize, minKeyOccuranceCount, trackCounts, mergePairs);
jpayne@68 511 }
jpayne@68 512 }
jpayne@68 513
jpayne@68 514 public ArrayList<Sketch> loadSketchesFromString(String sketchString){
jpayne@68 515 return tool.loadSketchesFromString(sketchString);
jpayne@68 516 }
jpayne@68 517
jpayne@68 518 public int refFileCount(){return refFiles==null ? 0 : refFiles.size();}
jpayne@68 519 public int refSketchCount(){return refSketches==null ? 0 : refSketches.size();}
jpayne@68 520
jpayne@68 521 public Sketch findReferenceSketch(int taxID){
jpayne@68 522 if(taxID<1){return null;}
jpayne@68 523 int skid=taxIDToSketchIDMap.get(taxID);
jpayne@68 524 return skid<0 ? null : refSketches.get(skid);
jpayne@68 525 }
jpayne@68 526
jpayne@68 527 /*--------------------------------------------------------------*/
jpayne@68 528
jpayne@68 529 public SketchIndex index=null;
jpayne@68 530 public boolean autoIndex=true;
jpayne@68 531
jpayne@68 532 public SketchTool tool=null;
jpayne@68 533 public ArrayList<Sketch> refSketches;
jpayne@68 534 LinkedHashSet<String> refFiles=new LinkedHashSet<String>();
jpayne@68 535 /** For ref sketch lookups by TaxID */
jpayne@68 536 private IntHashMap taxIDToSketchIDMap;
jpayne@68 537 public int threads=Shared.threads();
jpayne@68 538 boolean verbose;
jpayne@68 539 boolean errorState=false;
jpayne@68 540 AtomicLong comparisons=new AtomicLong(0);
jpayne@68 541
jpayne@68 542 int minLevelExtended=-1;
jpayne@68 543
jpayne@68 544 }