Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SketchSearcher.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.io.File; | |
4 import java.util.ArrayList; | |
5 import java.util.Collections; | |
6 import java.util.LinkedHashSet; | |
7 import java.util.Set; | |
8 import java.util.concurrent.ConcurrentHashMap; | |
9 import java.util.concurrent.atomic.AtomicInteger; | |
10 import java.util.concurrent.atomic.AtomicLong; | |
11 | |
12 import shared.Parse; | |
13 import shared.Shared; | |
14 import shared.Tools; | |
15 import structures.AbstractBitSet; | |
16 import structures.ByteBuilder; | |
17 import structures.Heap; | |
18 import structures.IntHashMap; | |
19 import tax.TaxNode; | |
20 import tax.TaxTree; | |
21 | |
22 public class SketchSearcher extends SketchObject { | |
23 | |
24 public SketchSearcher(){ | |
25 | |
26 } | |
27 | |
28 public boolean parse(String arg, String a, String b, boolean addFileIfNotFound){ | |
29 | |
30 // System.err.println("Parsing "+arg+"; ref="+refFiles); //123 | |
31 | |
32 if(parseSketchFlags(arg, a, b)){ | |
33 //Do nothing | |
34 }else if(defaultParams.parse(arg, a, b)){ | |
35 //Do nothing | |
36 }else if(a.equals("verbose")){ | |
37 verbose=Parse.parseBoolean(b); | |
38 }else if(a.equals("ref")){ | |
39 addRefFiles(b); | |
40 }else if(arg.equalsIgnoreCase("nt") || arg.equalsIgnoreCase("RefSeq") || arg.equalsIgnoreCase("refseqbig") || arg.equalsIgnoreCase("nr") | |
41 || arg.equalsIgnoreCase("img") || arg.equalsIgnoreCase("silva") || arg.equalsIgnoreCase("ribo") | |
42 || arg.equalsIgnoreCase("mito") || arg.equalsIgnoreCase("fungi") | |
43 || arg.equalsIgnoreCase("prokprot") || arg.equalsIgnoreCase("prokprotbig") || arg.equalsIgnoreCase("protein") || | |
44 arg.equalsIgnoreCase("protien") || a.equalsIgnoreCase("prot")){ | |
45 addRefFiles(arg); | |
46 }else if(a.equals("threads") || a.equals("sketchthreads") || a.equals("t")){ | |
47 threads=Integer.parseInt(b); | |
48 } | |
49 | |
50 else if(a.equalsIgnoreCase("minLevelExtended") || a.equalsIgnoreCase("minLevel")){ | |
51 minLevelExtended=TaxTree.parseLevelExtended(b); | |
52 }else if(a.equals("index") || a.equals("makeindex")){ | |
53 if(b!=null && "auto".equalsIgnoreCase(b)){ | |
54 autoIndex=true; | |
55 makeIndex=true; | |
56 }else{ | |
57 autoIndex=false; | |
58 makeIndex=Parse.parseBoolean(b); | |
59 } | |
60 }else if(a.equals("indexsize") || a.equals("indexlimit")){ | |
61 SketchIndex.indexLimit=Integer.parseInt(b); | |
62 } | |
63 | |
64 else if(b==null && arg.indexOf('=')<0 && addFileIfNotFound && (arg.indexOf(',')>=0 || new File(arg).exists())){ | |
65 addRefFiles(arg); | |
66 }else{ | |
67 return false; | |
68 } | |
69 // System.err.println("Parsed "+arg+"; ref="+refFiles); //123 | |
70 return true; | |
71 } | |
72 | |
73 public boolean compare(ArrayList<Sketch> querySketches, ByteBuilder sb, DisplayParams params, int maxThreads){ | |
74 assert(params.postParsed); | |
75 final boolean json=params.json(); | |
76 ConcurrentHashMap<Integer, Comparison> map=new ConcurrentHashMap<Integer, Comparison>(); | |
77 | |
78 SketchResults[] alca=new SketchResults[querySketches.size()]; | |
79 | |
80 if(verbose2){System.err.println("At compare.");} | |
81 | |
82 boolean success=true; | |
83 final CompareBuffer buffer=new CompareBuffer(false); | |
84 AtomicInteger fakeID=new AtomicInteger(minFakeID); | |
85 for(int i=0; i<querySketches.size(); i++){ | |
86 fakeID.set(minFakeID); | |
87 Sketch a=querySketches.get(i); | |
88 | |
89 SketchResults results=processSketch(a, buffer, fakeID, map, params, maxThreads); | |
90 a.clearRefHitCounts(); | |
91 alca[i]=results; | |
92 // System.out.println(a.present); | |
93 } | |
94 | |
95 if(verbose2){System.err.println("Made results.");} | |
96 | |
97 for(int i=0; i<alca.length; i++){ | |
98 // Sketch s=sketches.get(i); | |
99 SketchResults results=alca[i]; | |
100 | |
101 if(json && alca.length>1 && i==0){ | |
102 sb.append('['); | |
103 } | |
104 | |
105 sb.append(results.toText(params)); | |
106 | |
107 if(json && alca.length>1){ | |
108 if(i<alca.length-1){ | |
109 sb.append(','); | |
110 }else{ | |
111 sb.append(']'); | |
112 } | |
113 } | |
114 } | |
115 return success; | |
116 } | |
117 | |
118 private class CompareThread extends Thread { | |
119 | |
120 CompareThread(Sketch a_, ArrayList<Sketch> localRefSketches_, int pid_, int incr_, | |
121 AtomicInteger fakeID_, ConcurrentHashMap<Integer, Comparison> map_, DisplayParams params_){ | |
122 a=a_; | |
123 pid=pid_; | |
124 incr=incr_; | |
125 fakeID=fakeID_; | |
126 map=map_; | |
127 params=params_; | |
128 localRefSketches=localRefSketches_; | |
129 buffer=new CompareBuffer(params.needContamCounts()); | |
130 if(buffer.cbs!=null){buffer.cbs.setCapacity(a.length(), 0);} | |
131 } | |
132 | |
133 @Override | |
134 public void run(){ | |
135 if(a.length()<1 || a.length()<params.minHits || (params.requireSSU && !a.hasSSU())){return;}//TODO: Change to 'require16S' | |
136 assert(a.compareBitSet()==null || buffer.cbs!=null) : (a.compareBitSet()==null)+", "+(buffer.cbs==null); //Unsafe to use a.cbs multithreaded unless atomic | |
137 final AbstractBitSet cbs=(buffer.cbs==null ? a.compareBitSet() : buffer.cbs); | |
138 for(int i=pid; i<localRefSketches.size(); i+=incr){ | |
139 Sketch b=localRefSketches.get(i); | |
140 if(params.passesFilter(b)){ | |
141 processPair(a, b, buffer, cbs, fakeID, map, params); | |
142 localComparisons++; | |
143 } | |
144 } | |
145 comparisons.getAndAdd(localComparisons); | |
146 } | |
147 | |
148 final AtomicInteger fakeID; | |
149 final ConcurrentHashMap<Integer, Comparison> map; | |
150 final CompareBuffer buffer; | |
151 final int incr; | |
152 final int pid; | |
153 final Sketch a; | |
154 final DisplayParams params; | |
155 final ArrayList<Sketch> localRefSketches; | |
156 long localComparisons=0; | |
157 | |
158 } | |
159 | |
160 public SketchResults processSketch(Sketch a, CompareBuffer buffer, AtomicInteger fakeID, | |
161 ConcurrentHashMap<Integer, Comparison> map, DisplayParams params, int maxThreads){ | |
162 if(a.length()<1 || a.length()<params.minHits || (params.requireSSU && !a.hasSSU())){return new SketchResults(a);} | |
163 // Timer t=new Timer(); | |
164 // t.start("Began query."); | |
165 assert(a.compareBitSet()==null); | |
166 assert(a.indexBitSet()==null); | |
167 | |
168 if(verbose2){System.err.println("At processSketch 1");} //123 | |
169 | |
170 a.makeBitSets(params.needContamCounts(), index!=null); | |
171 | |
172 final SketchResults sr; | |
173 if(index!=null){ | |
174 sr=index.getSketches(a, params); | |
175 }else{ | |
176 sr=new SketchResults(a, refSketches, null); | |
177 } | |
178 | |
179 if(verbose2){System.err.println("At processSketch 2");} //123 | |
180 | |
181 if(sr==null || sr.refSketchList==null || sr.refSketchList.isEmpty()){ | |
182 if(verbose2){System.err.println("At processSketch 2.0");} //123 | |
183 return sr; | |
184 } | |
185 | |
186 if(verbose2){System.err.println("At processSketch 2.1");} //123 | |
187 | |
188 if(verbose2){System.err.println("At processSketch 2.2");} //123 | |
189 | |
190 if(maxThreads>1 && Shared.threads()>1 && sr.refSketchList.size()>31){ | |
191 if(verbose2){System.err.println("At processSketch 2.3");} //123 | |
192 assert((buffer.cbs==null)==(params.needContamCounts())); | |
193 spawnThreads(a, sr.refSketchList, fakeID, map, params, maxThreads); | |
194 if(verbose2){System.err.println("At processSketch 2.4");} //123 | |
195 }else{ | |
196 if(verbose2){System.err.println("At processSketch 2.5");} //123 | |
197 assert(buffer.cbs==null); | |
198 long comp=0; | |
199 for(Sketch b : sr.refSketchList){ | |
200 if(params.passesFilter(b)){ | |
201 comp++; | |
202 processPair(a, b, buffer, a.compareBitSet(), /*sr.taxHits,*/ fakeID, map, params); | |
203 } | |
204 } | |
205 comparisons.getAndAdd(comp); | |
206 if(verbose2){System.err.println("At processSketch 2.6");} //123 | |
207 } | |
208 if(verbose2){System.err.println("At processSketch 3");} //123 | |
209 | |
210 sr.addMap(map, params, buffer); | |
211 | |
212 fakeID.set(minFakeID); | |
213 map.clear(); | |
214 if(verbose2){System.err.println("At processSketch 4");} //123 | |
215 a.clearRefHitCounts(); | |
216 | |
217 return sr; | |
218 } | |
219 | |
220 //For remote homology | |
221 boolean passesTax(Sketch q, Sketch ref){ | |
222 assert(minLevelExtended>=0); | |
223 final int qid=q.taxID; | |
224 if(qid<0 || qid>=minFakeID){return false;} | |
225 TaxNode qtn=taxtree.getNode(qid); | |
226 if(qtn==null){return false;} | |
227 if(qtn.levelExtended>minLevelExtended){return false;} | |
228 final int rid=(ref==null ? -1 : ref.taxID); | |
229 if(rid>=0 && rid<minFakeID){ | |
230 TaxNode rtn=taxtree.getNode(rid); | |
231 if(rtn!=null && rtn.levelExtended<=minLevelExtended){ | |
232 TaxNode ancestor=taxtree.commonAncestor(qtn, rtn); | |
233 if(ancestor!=null && ancestor.levelExtended>=minLevelExtended){ | |
234 return true; | |
235 } | |
236 } | |
237 } | |
238 return false; | |
239 } | |
240 | |
241 private void spawnThreads(Sketch a, ArrayList<Sketch> refs, AtomicInteger fakeID, | |
242 ConcurrentHashMap<Integer, Comparison> map, DisplayParams params, int maxThreads){ | |
243 final int toSpawn=Tools.max(1, Tools.min((refs.size()+7)/8, threads, maxThreads, Shared.threads())); | |
244 ArrayList<CompareThread> alct=new ArrayList<CompareThread>(toSpawn); | |
245 if(verbose2){System.err.println("At spawnThreads");} //123 | |
246 for(int t=0; t<toSpawn; t++){ | |
247 alct.add(new CompareThread(a, refs, t, toSpawn, fakeID, map, params)); | |
248 } | |
249 for(CompareThread ct : alct){ct.start();} | |
250 for(CompareThread ct : alct){ | |
251 | |
252 //Wait until this thread has terminated | |
253 while(ct.getState()!=Thread.State.TERMINATED){ | |
254 try { | |
255 //Attempt a join operation | |
256 ct.join(); | |
257 } catch (InterruptedException e) { | |
258 e.printStackTrace(); | |
259 } | |
260 } | |
261 } | |
262 if(params.needContamCounts()){ | |
263 for(CompareThread ct : alct){ | |
264 if(ct.buffer.cbs==null){ | |
265 assert((AUTOSIZE || AUTOSIZE_LINEAR) && index!=null);//Not really what this does | |
266 break; | |
267 } | |
268 a.addToBitSet(ct.buffer.cbs); | |
269 } | |
270 } | |
271 a.clearRefHitCounts(); | |
272 alct=null; | |
273 } | |
274 | |
275 // private void writeResults(ArrayList<Comparison> al, Sketch s, StringBuilder sb){ | |
276 // sb.append("\nResults for "+s.name()+":\n\n"); | |
277 // | |
278 // ArrayList<TaxNode> tnl=new ArrayList<TaxNode>(); | |
279 // for(Comparison c : al){ | |
280 // formatComparison(c, format, sb, printTax); | |
281 // } | |
282 // } | |
283 | |
284 boolean processPair(Sketch a, Sketch b, CompareBuffer buffer, AbstractBitSet abs, | |
285 AtomicInteger fakeID, ConcurrentHashMap<Integer, Comparison> map, DisplayParams params){ | |
286 // System.err.println("Comparing "+a.name()+" and "+b.name()); | |
287 assert(!params.printRefHits || a.refHitCounts()!=null || !SketchObject.makeIndex); | |
288 | |
289 | |
290 if(b.genomeSizeBases<params.minBases){return false;} | |
291 if(minLevelExtended>-1 && !passesTax(a, b)){return false;} | |
292 if(params.minSizeRatio>0){ | |
293 long sea=a.genomeSizeEstimate(); | |
294 long seb=b.genomeSizeEstimate(); | |
295 if(Tools.min(sea, seb)<params.minSizeRatio*Tools.max(sea, seb)){return false;} | |
296 } | |
297 Comparison c=compareOneToOne(a, b, buffer, abs, /*taxHits, params.contamLevel(),*/ params.minHits, params.minWKID, params.minANI, params.requireSSU, null); | |
298 if(c==null){return false;} | |
299 if(c.taxID()<1){c.taxID=fakeID.getAndIncrement();} | |
300 | |
301 // System.err.println("TID: "+c.taxID()+", "+fakeID); | |
302 | |
303 TaxNode tn=(taxtree==null ? null : taxtree.getNode(b.taxID)); | |
304 if(tn!=null){ | |
305 c.taxName=tn.name; | |
306 if(tn.level<params.taxLevel){ | |
307 TaxNode tn2=taxtree.getNodeAtLevel(b.taxID, params.taxLevel); | |
308 tn=tn2; | |
309 } | |
310 } | |
311 Integer key=(tn==null ? c.taxID : tn.id); | |
312 | |
313 Comparison old=map.get(key); | |
314 // System.err.println("A. Old: "+(old==null ? 0 : old.hits)+", new: "+c.hits); | |
315 if(old!=null && params.compare(old, c)>0){return false;} | |
316 | |
317 old=map.put(key, c); | |
318 while(old!=null && params.compare(old, c)>0){ | |
319 // System.err.println("B. Old: "+(old==null ? 0 : old.hits)+", new: "+c.hits); | |
320 c=old; | |
321 old=map.put(key, c); | |
322 } | |
323 return true; | |
324 } | |
325 | |
326 // //TODO: Interestingly, the heap never seems to be created by anything... not sure what it's for. | |
327 // private static Comparison compareOneToOne(final Sketch a, final Sketch b, CompareBuffer buffer, AbstractBitSet abs, | |
328 // int minHits, float minWKID, float minANI, boolean aniFromWKID, Heap<Comparison> heap){ | |
329 //// assert(heap!=null); //Optional, for testing. | |
330 // if(a==b && !compareSelf){return null;} | |
331 // final int matches=a.countMatches(b, buffer, abs, true/*!makeIndex || !AUTOSIZE*/, null, -1); | |
332 // assert(matches==buffer.hits()); | |
333 // if(matches<minHits){return null;} | |
334 //// asdf //TODO: handle k1 and k2 WKIDs here. | |
335 // { | |
336 //// final int div=aniFromWKID ? buffer.minDivisor() : buffer.maxDivisor(); | |
337 //// final float xkid=matches/(float)div;//This could be kid or wkid at this point... | |
338 //// if(xkid<minWKID){return null;} | |
339 // | |
340 // final int div=aniFromWKID ? buffer.minDivisor() : buffer.maxDivisor(); | |
341 // final float xkid=matches/(float)div;//This could be kid or wkid at this point... | |
342 // if(xkid<minWKID){return null;} | |
343 // | |
344 // //TODO (?) This is only necessary because of the order of setting minwkid and minani. | |
345 // //minWKID can be deterministically determined from minANI so if it is set correctly this can be skipped. | |
346 // if(minANI>0){ | |
347 // final float ani=wkidToAni(xkid, a.k1Fraction()); | |
348 // if(ani<minANI){return null;} | |
349 // } | |
350 // } | |
351 // | |
352 // if(heap!=null && !heap.hasRoom() && heap.peek().hits()>matches){return null;} //TODO: Should be based on score | |
353 // | |
354 //// System.err.print("*"); | |
355 // Comparison c=new Comparison(buffer, a, b); | |
356 // if(heap==null || heap.add(c)){return c;} | |
357 // return null; | |
358 // } | |
359 | |
360 //TODO: Interestingly, the heap never seems to be created by anything... not sure what it's for. | |
361 private static Comparison compareOneToOne(final Sketch a, final Sketch b, CompareBuffer buffer, AbstractBitSet abs, | |
362 int minHits, float minWKID, float minANI, boolean requireSSU, Heap<Comparison> heap){ | |
363 // assert(heap!=null); //Optional, for testing. | |
364 // assert(a.refHitCounts!=null); | |
365 if(a==b && !compareSelf){return null;} | |
366 if(requireSSU && !a.sharesSSU(b)){return null;} | |
367 final int matches=a.countMatches(b, buffer, abs, true/*!makeIndex || !AUTOSIZE*/, null, -1); | |
368 assert(matches==buffer.hits()); | |
369 if(matches<minHits){return null;} | |
370 | |
371 { | |
372 final float wkid=buffer.wkid(); | |
373 if(wkid<minWKID){return null;} | |
374 | |
375 if(minANI>0){ | |
376 final float ani=buffer.ani(); | |
377 if(ani<minANI){return null;} | |
378 } | |
379 } | |
380 | |
381 if(heap!=null && !heap.hasRoom() && heap.peek().hits()>matches){return null;} //TODO: Should be based on score | |
382 | |
383 // System.err.print("*"); | |
384 Comparison c=new Comparison(buffer, a, b); | |
385 if(heap==null || heap.add(c)){return c;} | |
386 return null; | |
387 } | |
388 | |
389 public void addRefFiles(String a){ | |
390 if(a.equalsIgnoreCase("nr")){ | |
391 addRefFiles(NR_PATH()); | |
392 if(blacklist==null){blacklist=Blacklist.nrBlacklist();} | |
393 if(defaultParams.dbName==null){defaultParams.dbName="nr";} | |
394 if(!setK){k=defaultKAmino; k2=defaultK2Amino;} | |
395 }else if(a.equalsIgnoreCase("nt")){ | |
396 addRefFiles(NT_PATH()); | |
397 if(blacklist==null){blacklist=Blacklist.ntBlacklist();} | |
398 if(defaultParams.dbName==null){defaultParams.dbName="nt";} | |
399 if(!setK){k=defaultK; k2=defaultK2;} | |
400 }else if(a.equalsIgnoreCase("refseq")){ | |
401 addRefFiles(REFSEQ_PATH()); | |
402 if(blacklist==null){blacklist=Blacklist.refseqBlacklist();} | |
403 if(defaultParams.dbName==null){defaultParams.dbName="RefSeq";} | |
404 if(!setK){k=defaultK; k2=defaultK2;} | |
405 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=2.0f;} | |
406 }else if(a.equalsIgnoreCase("refseqbig")){ | |
407 addRefFiles(REFSEQ_PATH_BIG()); | |
408 if(blacklist==null){blacklist=Blacklist.refseqBlacklist();} | |
409 if(defaultParams.dbName==null){defaultParams.dbName="RefSeq";} | |
410 if(!setK){k=defaultK; k2=defaultK2;} | |
411 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=4.5f;} | |
412 }else if(a.equalsIgnoreCase("silva")){ | |
413 // TaxTree.SILVA_MODE=Parse.parseBoolean(b); | |
414 addRefFiles(SILVA_PATH()); | |
415 if(blacklist==null){blacklist=Blacklist.silvaBlacklist();} | |
416 if(defaultParams.dbName==null){defaultParams.dbName="Silva";} | |
417 if(!setK){k=defaultK; k2=defaultK2;} | |
418 }else if(a.equalsIgnoreCase("img")){ | |
419 addRefFiles(IMG_PATH()); | |
420 if(blacklist==null){blacklist=Blacklist.imgBlacklist();} | |
421 if(defaultParams.dbName==null){defaultParams.dbName="IMG";} | |
422 if(!setK){k=defaultK; k2=defaultK2;} | |
423 }else if(a.equalsIgnoreCase("prokprot") || a.equalsIgnoreCase("protein")){ | |
424 addRefFiles(PROKPROT_PATH()); | |
425 if(blacklist==null){blacklist=Blacklist.prokProtBlacklist();} | |
426 if(defaultParams.dbName==null){defaultParams.dbName="ProkProt";} | |
427 if(!setK){k=defaultKAmino; k2=defaultK2Amino;} | |
428 if(!amino && !translate) { | |
429 translate=true; | |
430 System.err.println("Setting translate to true because a protein dataset is being used."); | |
431 } | |
432 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=3.0f;} | |
433 }else if(a.equalsIgnoreCase("prokprotbig") || a.equalsIgnoreCase("proteinbig")){ | |
434 addRefFiles(PROKPROT_PATH_BIG()); | |
435 if(blacklist==null){blacklist=Blacklist.prokProtBlacklist();} | |
436 if(defaultParams.dbName==null){defaultParams.dbName="ProkProt";} | |
437 if(!setK){k=defaultKAmino; k2=defaultK2Amino;} | |
438 if(!amino && !translate) { | |
439 translate=true; | |
440 System.err.println("Setting translate to true because a protein dataset is being used."); | |
441 } | |
442 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=7.5f;} | |
443 }else if(a.equalsIgnoreCase("mito") || a.equalsIgnoreCase("refseqmito")){ | |
444 addRefFiles(MITO_PATH()); | |
445 if(blacklist==null){blacklist=Blacklist.mitoBlacklist();} | |
446 if(defaultParams.dbName==null){defaultParams.dbName="RefSeqMito";} | |
447 if(!setK){k=defaultK; k2=defaultK2;} | |
448 }else if(a.equalsIgnoreCase("fungi") || a.equalsIgnoreCase("refseqfungi")){ | |
449 addRefFiles(FUNGI_PATH()); | |
450 if(blacklist==null){blacklist=Blacklist.fungiBlacklist();} | |
451 if(defaultParams.dbName==null){defaultParams.dbName="RefSeqFungi";} | |
452 if(!setK){k=defaultK; k2=defaultK2;} | |
453 }else{ | |
454 addFiles(a, refFiles); | |
455 } | |
456 } | |
457 | |
458 static void addFiles(String a, Set<String> list){ | |
459 if(a==null){return;} | |
460 File f=new File(a); | |
461 assert(!list.contains(a)) : "Duplicate file "+a; | |
462 | |
463 if(f.exists()){ | |
464 list.add(a); | |
465 }else if(a.indexOf(',')>0){ | |
466 for(String s : a.split(",")){addFiles(s, list);} | |
467 }else if(a.indexOf('#')>=0 && new File(a.replaceFirst("#", "0")).exists()){ | |
468 for(int i=0; true; i++){ | |
469 String temp=a.replaceFirst("#", ""+i); | |
470 if(!new File(temp).exists()){break;} | |
471 list.add(temp); | |
472 } | |
473 }else{ | |
474 list.add(a); | |
475 } | |
476 } | |
477 | |
478 public void makeIndex(){ | |
479 assert(index==null); | |
480 index=new SketchIndex(refSketches); | |
481 index.load(); | |
482 } | |
483 | |
484 public void loadReferences(int mode_, DisplayParams params){ | |
485 loadReferences(mode_, params.minKeyOccuranceCount, params.minEntropy, params.minProb, params.minQual); | |
486 } | |
487 | |
488 public void loadReferences(int mode_, int minKeyOccuranceCount, float minEntropy, float minProb, byte minQual) { | |
489 makeTool(minKeyOccuranceCount, false, false); | |
490 refSketches=tool.loadSketches_MT(mode_, 1f, -1, minEntropy, minProb, minQual, refFiles); | |
491 assert(refSketches!=null) : refFiles; | |
492 if(mode_==PER_FILE){ | |
493 Collections.sort(refSketches, SketchIdComparator.comparator); | |
494 } | |
495 taxIDToSketchIDMap=new IntHashMap(Tools.max(3, (int)(refSketches.size()*1.2f))); | |
496 for(int i=0; i<refSketches.size(); i++){ | |
497 Sketch sk=refSketches.get(i); | |
498 if(sk!=null && sk.taxID>0){ | |
499 taxIDToSketchIDMap.set(sk.taxID, i); | |
500 } | |
501 } | |
502 // System.err.println("Sketches: "+refSketches.get(0).name()); | |
503 if(makeIndex){ | |
504 makeIndex(); | |
505 } | |
506 } | |
507 | |
508 public void makeTool(int minKeyOccuranceCount, boolean trackCounts, boolean mergePairs){ | |
509 if(tool==null){ | |
510 tool=new SketchTool(targetSketchSize, minKeyOccuranceCount, trackCounts, mergePairs); | |
511 } | |
512 } | |
513 | |
514 public ArrayList<Sketch> loadSketchesFromString(String sketchString){ | |
515 return tool.loadSketchesFromString(sketchString); | |
516 } | |
517 | |
518 public int refFileCount(){return refFiles==null ? 0 : refFiles.size();} | |
519 public int refSketchCount(){return refSketches==null ? 0 : refSketches.size();} | |
520 | |
521 public Sketch findReferenceSketch(int taxID){ | |
522 if(taxID<1){return null;} | |
523 int skid=taxIDToSketchIDMap.get(taxID); | |
524 return skid<0 ? null : refSketches.get(skid); | |
525 } | |
526 | |
527 /*--------------------------------------------------------------*/ | |
528 | |
529 public SketchIndex index=null; | |
530 public boolean autoIndex=true; | |
531 | |
532 public SketchTool tool=null; | |
533 public ArrayList<Sketch> refSketches; | |
534 LinkedHashSet<String> refFiles=new LinkedHashSet<String>(); | |
535 /** For ref sketch lookups by TaxID */ | |
536 private IntHashMap taxIDToSketchIDMap; | |
537 public int threads=Shared.threads(); | |
538 boolean verbose; | |
539 boolean errorState=false; | |
540 AtomicLong comparisons=new AtomicLong(0); | |
541 | |
542 int minLevelExtended=-1; | |
543 | |
544 } |