jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Collections;
|
jpayne@68
|
6 import java.util.LinkedHashSet;
|
jpayne@68
|
7 import java.util.Set;
|
jpayne@68
|
8 import java.util.concurrent.ConcurrentHashMap;
|
jpayne@68
|
9 import java.util.concurrent.atomic.AtomicInteger;
|
jpayne@68
|
10 import java.util.concurrent.atomic.AtomicLong;
|
jpayne@68
|
11
|
jpayne@68
|
12 import shared.Parse;
|
jpayne@68
|
13 import shared.Shared;
|
jpayne@68
|
14 import shared.Tools;
|
jpayne@68
|
15 import structures.AbstractBitSet;
|
jpayne@68
|
16 import structures.ByteBuilder;
|
jpayne@68
|
17 import structures.Heap;
|
jpayne@68
|
18 import structures.IntHashMap;
|
jpayne@68
|
19 import tax.TaxNode;
|
jpayne@68
|
20 import tax.TaxTree;
|
jpayne@68
|
21
|
jpayne@68
|
22 public class SketchSearcher extends SketchObject {
|
jpayne@68
|
23
|
jpayne@68
|
24 public SketchSearcher(){
|
jpayne@68
|
25
|
jpayne@68
|
26 }
|
jpayne@68
|
27
|
jpayne@68
|
28 public boolean parse(String arg, String a, String b, boolean addFileIfNotFound){
|
jpayne@68
|
29
|
jpayne@68
|
30 // System.err.println("Parsing "+arg+"; ref="+refFiles); //123
|
jpayne@68
|
31
|
jpayne@68
|
32 if(parseSketchFlags(arg, a, b)){
|
jpayne@68
|
33 //Do nothing
|
jpayne@68
|
34 }else if(defaultParams.parse(arg, a, b)){
|
jpayne@68
|
35 //Do nothing
|
jpayne@68
|
36 }else if(a.equals("verbose")){
|
jpayne@68
|
37 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
38 }else if(a.equals("ref")){
|
jpayne@68
|
39 addRefFiles(b);
|
jpayne@68
|
40 }else if(arg.equalsIgnoreCase("nt") || arg.equalsIgnoreCase("RefSeq") || arg.equalsIgnoreCase("refseqbig") || arg.equalsIgnoreCase("nr")
|
jpayne@68
|
41 || arg.equalsIgnoreCase("img") || arg.equalsIgnoreCase("silva") || arg.equalsIgnoreCase("ribo")
|
jpayne@68
|
42 || arg.equalsIgnoreCase("mito") || arg.equalsIgnoreCase("fungi")
|
jpayne@68
|
43 || arg.equalsIgnoreCase("prokprot") || arg.equalsIgnoreCase("prokprotbig") || arg.equalsIgnoreCase("protein") ||
|
jpayne@68
|
44 arg.equalsIgnoreCase("protien") || a.equalsIgnoreCase("prot")){
|
jpayne@68
|
45 addRefFiles(arg);
|
jpayne@68
|
46 }else if(a.equals("threads") || a.equals("sketchthreads") || a.equals("t")){
|
jpayne@68
|
47 threads=Integer.parseInt(b);
|
jpayne@68
|
48 }
|
jpayne@68
|
49
|
jpayne@68
|
50 else if(a.equalsIgnoreCase("minLevelExtended") || a.equalsIgnoreCase("minLevel")){
|
jpayne@68
|
51 minLevelExtended=TaxTree.parseLevelExtended(b);
|
jpayne@68
|
52 }else if(a.equals("index") || a.equals("makeindex")){
|
jpayne@68
|
53 if(b!=null && "auto".equalsIgnoreCase(b)){
|
jpayne@68
|
54 autoIndex=true;
|
jpayne@68
|
55 makeIndex=true;
|
jpayne@68
|
56 }else{
|
jpayne@68
|
57 autoIndex=false;
|
jpayne@68
|
58 makeIndex=Parse.parseBoolean(b);
|
jpayne@68
|
59 }
|
jpayne@68
|
60 }else if(a.equals("indexsize") || a.equals("indexlimit")){
|
jpayne@68
|
61 SketchIndex.indexLimit=Integer.parseInt(b);
|
jpayne@68
|
62 }
|
jpayne@68
|
63
|
jpayne@68
|
64 else if(b==null && arg.indexOf('=')<0 && addFileIfNotFound && (arg.indexOf(',')>=0 || new File(arg).exists())){
|
jpayne@68
|
65 addRefFiles(arg);
|
jpayne@68
|
66 }else{
|
jpayne@68
|
67 return false;
|
jpayne@68
|
68 }
|
jpayne@68
|
69 // System.err.println("Parsed "+arg+"; ref="+refFiles); //123
|
jpayne@68
|
70 return true;
|
jpayne@68
|
71 }
|
jpayne@68
|
72
|
jpayne@68
|
73 public boolean compare(ArrayList<Sketch> querySketches, ByteBuilder sb, DisplayParams params, int maxThreads){
|
jpayne@68
|
74 assert(params.postParsed);
|
jpayne@68
|
75 final boolean json=params.json();
|
jpayne@68
|
76 ConcurrentHashMap<Integer, Comparison> map=new ConcurrentHashMap<Integer, Comparison>();
|
jpayne@68
|
77
|
jpayne@68
|
78 SketchResults[] alca=new SketchResults[querySketches.size()];
|
jpayne@68
|
79
|
jpayne@68
|
80 if(verbose2){System.err.println("At compare.");}
|
jpayne@68
|
81
|
jpayne@68
|
82 boolean success=true;
|
jpayne@68
|
83 final CompareBuffer buffer=new CompareBuffer(false);
|
jpayne@68
|
84 AtomicInteger fakeID=new AtomicInteger(minFakeID);
|
jpayne@68
|
85 for(int i=0; i<querySketches.size(); i++){
|
jpayne@68
|
86 fakeID.set(minFakeID);
|
jpayne@68
|
87 Sketch a=querySketches.get(i);
|
jpayne@68
|
88
|
jpayne@68
|
89 SketchResults results=processSketch(a, buffer, fakeID, map, params, maxThreads);
|
jpayne@68
|
90 a.clearRefHitCounts();
|
jpayne@68
|
91 alca[i]=results;
|
jpayne@68
|
92 // System.out.println(a.present);
|
jpayne@68
|
93 }
|
jpayne@68
|
94
|
jpayne@68
|
95 if(verbose2){System.err.println("Made results.");}
|
jpayne@68
|
96
|
jpayne@68
|
97 for(int i=0; i<alca.length; i++){
|
jpayne@68
|
98 // Sketch s=sketches.get(i);
|
jpayne@68
|
99 SketchResults results=alca[i];
|
jpayne@68
|
100
|
jpayne@68
|
101 if(json && alca.length>1 && i==0){
|
jpayne@68
|
102 sb.append('[');
|
jpayne@68
|
103 }
|
jpayne@68
|
104
|
jpayne@68
|
105 sb.append(results.toText(params));
|
jpayne@68
|
106
|
jpayne@68
|
107 if(json && alca.length>1){
|
jpayne@68
|
108 if(i<alca.length-1){
|
jpayne@68
|
109 sb.append(',');
|
jpayne@68
|
110 }else{
|
jpayne@68
|
111 sb.append(']');
|
jpayne@68
|
112 }
|
jpayne@68
|
113 }
|
jpayne@68
|
114 }
|
jpayne@68
|
115 return success;
|
jpayne@68
|
116 }
|
jpayne@68
|
117
|
jpayne@68
|
118 private class CompareThread extends Thread {
|
jpayne@68
|
119
|
jpayne@68
|
120 CompareThread(Sketch a_, ArrayList<Sketch> localRefSketches_, int pid_, int incr_,
|
jpayne@68
|
121 AtomicInteger fakeID_, ConcurrentHashMap<Integer, Comparison> map_, DisplayParams params_){
|
jpayne@68
|
122 a=a_;
|
jpayne@68
|
123 pid=pid_;
|
jpayne@68
|
124 incr=incr_;
|
jpayne@68
|
125 fakeID=fakeID_;
|
jpayne@68
|
126 map=map_;
|
jpayne@68
|
127 params=params_;
|
jpayne@68
|
128 localRefSketches=localRefSketches_;
|
jpayne@68
|
129 buffer=new CompareBuffer(params.needContamCounts());
|
jpayne@68
|
130 if(buffer.cbs!=null){buffer.cbs.setCapacity(a.length(), 0);}
|
jpayne@68
|
131 }
|
jpayne@68
|
132
|
jpayne@68
|
133 @Override
|
jpayne@68
|
134 public void run(){
|
jpayne@68
|
135 if(a.length()<1 || a.length()<params.minHits || (params.requireSSU && !a.hasSSU())){return;}//TODO: Change to 'require16S'
|
jpayne@68
|
136 assert(a.compareBitSet()==null || buffer.cbs!=null) : (a.compareBitSet()==null)+", "+(buffer.cbs==null); //Unsafe to use a.cbs multithreaded unless atomic
|
jpayne@68
|
137 final AbstractBitSet cbs=(buffer.cbs==null ? a.compareBitSet() : buffer.cbs);
|
jpayne@68
|
138 for(int i=pid; i<localRefSketches.size(); i+=incr){
|
jpayne@68
|
139 Sketch b=localRefSketches.get(i);
|
jpayne@68
|
140 if(params.passesFilter(b)){
|
jpayne@68
|
141 processPair(a, b, buffer, cbs, fakeID, map, params);
|
jpayne@68
|
142 localComparisons++;
|
jpayne@68
|
143 }
|
jpayne@68
|
144 }
|
jpayne@68
|
145 comparisons.getAndAdd(localComparisons);
|
jpayne@68
|
146 }
|
jpayne@68
|
147
|
jpayne@68
|
148 final AtomicInteger fakeID;
|
jpayne@68
|
149 final ConcurrentHashMap<Integer, Comparison> map;
|
jpayne@68
|
150 final CompareBuffer buffer;
|
jpayne@68
|
151 final int incr;
|
jpayne@68
|
152 final int pid;
|
jpayne@68
|
153 final Sketch a;
|
jpayne@68
|
154 final DisplayParams params;
|
jpayne@68
|
155 final ArrayList<Sketch> localRefSketches;
|
jpayne@68
|
156 long localComparisons=0;
|
jpayne@68
|
157
|
jpayne@68
|
158 }
|
jpayne@68
|
159
|
jpayne@68
|
160 public SketchResults processSketch(Sketch a, CompareBuffer buffer, AtomicInteger fakeID,
|
jpayne@68
|
161 ConcurrentHashMap<Integer, Comparison> map, DisplayParams params, int maxThreads){
|
jpayne@68
|
162 if(a.length()<1 || a.length()<params.minHits || (params.requireSSU && !a.hasSSU())){return new SketchResults(a);}
|
jpayne@68
|
163 // Timer t=new Timer();
|
jpayne@68
|
164 // t.start("Began query.");
|
jpayne@68
|
165 assert(a.compareBitSet()==null);
|
jpayne@68
|
166 assert(a.indexBitSet()==null);
|
jpayne@68
|
167
|
jpayne@68
|
168 if(verbose2){System.err.println("At processSketch 1");} //123
|
jpayne@68
|
169
|
jpayne@68
|
170 a.makeBitSets(params.needContamCounts(), index!=null);
|
jpayne@68
|
171
|
jpayne@68
|
172 final SketchResults sr;
|
jpayne@68
|
173 if(index!=null){
|
jpayne@68
|
174 sr=index.getSketches(a, params);
|
jpayne@68
|
175 }else{
|
jpayne@68
|
176 sr=new SketchResults(a, refSketches, null);
|
jpayne@68
|
177 }
|
jpayne@68
|
178
|
jpayne@68
|
179 if(verbose2){System.err.println("At processSketch 2");} //123
|
jpayne@68
|
180
|
jpayne@68
|
181 if(sr==null || sr.refSketchList==null || sr.refSketchList.isEmpty()){
|
jpayne@68
|
182 if(verbose2){System.err.println("At processSketch 2.0");} //123
|
jpayne@68
|
183 return sr;
|
jpayne@68
|
184 }
|
jpayne@68
|
185
|
jpayne@68
|
186 if(verbose2){System.err.println("At processSketch 2.1");} //123
|
jpayne@68
|
187
|
jpayne@68
|
188 if(verbose2){System.err.println("At processSketch 2.2");} //123
|
jpayne@68
|
189
|
jpayne@68
|
190 if(maxThreads>1 && Shared.threads()>1 && sr.refSketchList.size()>31){
|
jpayne@68
|
191 if(verbose2){System.err.println("At processSketch 2.3");} //123
|
jpayne@68
|
192 assert((buffer.cbs==null)==(params.needContamCounts()));
|
jpayne@68
|
193 spawnThreads(a, sr.refSketchList, fakeID, map, params, maxThreads);
|
jpayne@68
|
194 if(verbose2){System.err.println("At processSketch 2.4");} //123
|
jpayne@68
|
195 }else{
|
jpayne@68
|
196 if(verbose2){System.err.println("At processSketch 2.5");} //123
|
jpayne@68
|
197 assert(buffer.cbs==null);
|
jpayne@68
|
198 long comp=0;
|
jpayne@68
|
199 for(Sketch b : sr.refSketchList){
|
jpayne@68
|
200 if(params.passesFilter(b)){
|
jpayne@68
|
201 comp++;
|
jpayne@68
|
202 processPair(a, b, buffer, a.compareBitSet(), /*sr.taxHits,*/ fakeID, map, params);
|
jpayne@68
|
203 }
|
jpayne@68
|
204 }
|
jpayne@68
|
205 comparisons.getAndAdd(comp);
|
jpayne@68
|
206 if(verbose2){System.err.println("At processSketch 2.6");} //123
|
jpayne@68
|
207 }
|
jpayne@68
|
208 if(verbose2){System.err.println("At processSketch 3");} //123
|
jpayne@68
|
209
|
jpayne@68
|
210 sr.addMap(map, params, buffer);
|
jpayne@68
|
211
|
jpayne@68
|
212 fakeID.set(minFakeID);
|
jpayne@68
|
213 map.clear();
|
jpayne@68
|
214 if(verbose2){System.err.println("At processSketch 4");} //123
|
jpayne@68
|
215 a.clearRefHitCounts();
|
jpayne@68
|
216
|
jpayne@68
|
217 return sr;
|
jpayne@68
|
218 }
|
jpayne@68
|
219
|
jpayne@68
|
220 //For remote homology
|
jpayne@68
|
221 boolean passesTax(Sketch q, Sketch ref){
|
jpayne@68
|
222 assert(minLevelExtended>=0);
|
jpayne@68
|
223 final int qid=q.taxID;
|
jpayne@68
|
224 if(qid<0 || qid>=minFakeID){return false;}
|
jpayne@68
|
225 TaxNode qtn=taxtree.getNode(qid);
|
jpayne@68
|
226 if(qtn==null){return false;}
|
jpayne@68
|
227 if(qtn.levelExtended>minLevelExtended){return false;}
|
jpayne@68
|
228 final int rid=(ref==null ? -1 : ref.taxID);
|
jpayne@68
|
229 if(rid>=0 && rid<minFakeID){
|
jpayne@68
|
230 TaxNode rtn=taxtree.getNode(rid);
|
jpayne@68
|
231 if(rtn!=null && rtn.levelExtended<=minLevelExtended){
|
jpayne@68
|
232 TaxNode ancestor=taxtree.commonAncestor(qtn, rtn);
|
jpayne@68
|
233 if(ancestor!=null && ancestor.levelExtended>=minLevelExtended){
|
jpayne@68
|
234 return true;
|
jpayne@68
|
235 }
|
jpayne@68
|
236 }
|
jpayne@68
|
237 }
|
jpayne@68
|
238 return false;
|
jpayne@68
|
239 }
|
jpayne@68
|
240
|
jpayne@68
|
241 private void spawnThreads(Sketch a, ArrayList<Sketch> refs, AtomicInteger fakeID,
|
jpayne@68
|
242 ConcurrentHashMap<Integer, Comparison> map, DisplayParams params, int maxThreads){
|
jpayne@68
|
243 final int toSpawn=Tools.max(1, Tools.min((refs.size()+7)/8, threads, maxThreads, Shared.threads()));
|
jpayne@68
|
244 ArrayList<CompareThread> alct=new ArrayList<CompareThread>(toSpawn);
|
jpayne@68
|
245 if(verbose2){System.err.println("At spawnThreads");} //123
|
jpayne@68
|
246 for(int t=0; t<toSpawn; t++){
|
jpayne@68
|
247 alct.add(new CompareThread(a, refs, t, toSpawn, fakeID, map, params));
|
jpayne@68
|
248 }
|
jpayne@68
|
249 for(CompareThread ct : alct){ct.start();}
|
jpayne@68
|
250 for(CompareThread ct : alct){
|
jpayne@68
|
251
|
jpayne@68
|
252 //Wait until this thread has terminated
|
jpayne@68
|
253 while(ct.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
254 try {
|
jpayne@68
|
255 //Attempt a join operation
|
jpayne@68
|
256 ct.join();
|
jpayne@68
|
257 } catch (InterruptedException e) {
|
jpayne@68
|
258 e.printStackTrace();
|
jpayne@68
|
259 }
|
jpayne@68
|
260 }
|
jpayne@68
|
261 }
|
jpayne@68
|
262 if(params.needContamCounts()){
|
jpayne@68
|
263 for(CompareThread ct : alct){
|
jpayne@68
|
264 if(ct.buffer.cbs==null){
|
jpayne@68
|
265 assert((AUTOSIZE || AUTOSIZE_LINEAR) && index!=null);//Not really what this does
|
jpayne@68
|
266 break;
|
jpayne@68
|
267 }
|
jpayne@68
|
268 a.addToBitSet(ct.buffer.cbs);
|
jpayne@68
|
269 }
|
jpayne@68
|
270 }
|
jpayne@68
|
271 a.clearRefHitCounts();
|
jpayne@68
|
272 alct=null;
|
jpayne@68
|
273 }
|
jpayne@68
|
274
|
jpayne@68
|
275 // private void writeResults(ArrayList<Comparison> al, Sketch s, StringBuilder sb){
|
jpayne@68
|
276 // sb.append("\nResults for "+s.name()+":\n\n");
|
jpayne@68
|
277 //
|
jpayne@68
|
278 // ArrayList<TaxNode> tnl=new ArrayList<TaxNode>();
|
jpayne@68
|
279 // for(Comparison c : al){
|
jpayne@68
|
280 // formatComparison(c, format, sb, printTax);
|
jpayne@68
|
281 // }
|
jpayne@68
|
282 // }
|
jpayne@68
|
283
|
jpayne@68
|
284 boolean processPair(Sketch a, Sketch b, CompareBuffer buffer, AbstractBitSet abs,
|
jpayne@68
|
285 AtomicInteger fakeID, ConcurrentHashMap<Integer, Comparison> map, DisplayParams params){
|
jpayne@68
|
286 // System.err.println("Comparing "+a.name()+" and "+b.name());
|
jpayne@68
|
287 assert(!params.printRefHits || a.refHitCounts()!=null || !SketchObject.makeIndex);
|
jpayne@68
|
288
|
jpayne@68
|
289
|
jpayne@68
|
290 if(b.genomeSizeBases<params.minBases){return false;}
|
jpayne@68
|
291 if(minLevelExtended>-1 && !passesTax(a, b)){return false;}
|
jpayne@68
|
292 if(params.minSizeRatio>0){
|
jpayne@68
|
293 long sea=a.genomeSizeEstimate();
|
jpayne@68
|
294 long seb=b.genomeSizeEstimate();
|
jpayne@68
|
295 if(Tools.min(sea, seb)<params.minSizeRatio*Tools.max(sea, seb)){return false;}
|
jpayne@68
|
296 }
|
jpayne@68
|
297 Comparison c=compareOneToOne(a, b, buffer, abs, /*taxHits, params.contamLevel(),*/ params.minHits, params.minWKID, params.minANI, params.requireSSU, null);
|
jpayne@68
|
298 if(c==null){return false;}
|
jpayne@68
|
299 if(c.taxID()<1){c.taxID=fakeID.getAndIncrement();}
|
jpayne@68
|
300
|
jpayne@68
|
301 // System.err.println("TID: "+c.taxID()+", "+fakeID);
|
jpayne@68
|
302
|
jpayne@68
|
303 TaxNode tn=(taxtree==null ? null : taxtree.getNode(b.taxID));
|
jpayne@68
|
304 if(tn!=null){
|
jpayne@68
|
305 c.taxName=tn.name;
|
jpayne@68
|
306 if(tn.level<params.taxLevel){
|
jpayne@68
|
307 TaxNode tn2=taxtree.getNodeAtLevel(b.taxID, params.taxLevel);
|
jpayne@68
|
308 tn=tn2;
|
jpayne@68
|
309 }
|
jpayne@68
|
310 }
|
jpayne@68
|
311 Integer key=(tn==null ? c.taxID : tn.id);
|
jpayne@68
|
312
|
jpayne@68
|
313 Comparison old=map.get(key);
|
jpayne@68
|
314 // System.err.println("A. Old: "+(old==null ? 0 : old.hits)+", new: "+c.hits);
|
jpayne@68
|
315 if(old!=null && params.compare(old, c)>0){return false;}
|
jpayne@68
|
316
|
jpayne@68
|
317 old=map.put(key, c);
|
jpayne@68
|
318 while(old!=null && params.compare(old, c)>0){
|
jpayne@68
|
319 // System.err.println("B. Old: "+(old==null ? 0 : old.hits)+", new: "+c.hits);
|
jpayne@68
|
320 c=old;
|
jpayne@68
|
321 old=map.put(key, c);
|
jpayne@68
|
322 }
|
jpayne@68
|
323 return true;
|
jpayne@68
|
324 }
|
jpayne@68
|
325
|
jpayne@68
|
326 // //TODO: Interestingly, the heap never seems to be created by anything... not sure what it's for.
|
jpayne@68
|
327 // private static Comparison compareOneToOne(final Sketch a, final Sketch b, CompareBuffer buffer, AbstractBitSet abs,
|
jpayne@68
|
328 // int minHits, float minWKID, float minANI, boolean aniFromWKID, Heap<Comparison> heap){
|
jpayne@68
|
329 //// assert(heap!=null); //Optional, for testing.
|
jpayne@68
|
330 // if(a==b && !compareSelf){return null;}
|
jpayne@68
|
331 // final int matches=a.countMatches(b, buffer, abs, true/*!makeIndex || !AUTOSIZE*/, null, -1);
|
jpayne@68
|
332 // assert(matches==buffer.hits());
|
jpayne@68
|
333 // if(matches<minHits){return null;}
|
jpayne@68
|
334 //// asdf //TODO: handle k1 and k2 WKIDs here.
|
jpayne@68
|
335 // {
|
jpayne@68
|
336 //// final int div=aniFromWKID ? buffer.minDivisor() : buffer.maxDivisor();
|
jpayne@68
|
337 //// final float xkid=matches/(float)div;//This could be kid or wkid at this point...
|
jpayne@68
|
338 //// if(xkid<minWKID){return null;}
|
jpayne@68
|
339 //
|
jpayne@68
|
340 // final int div=aniFromWKID ? buffer.minDivisor() : buffer.maxDivisor();
|
jpayne@68
|
341 // final float xkid=matches/(float)div;//This could be kid or wkid at this point...
|
jpayne@68
|
342 // if(xkid<minWKID){return null;}
|
jpayne@68
|
343 //
|
jpayne@68
|
344 // //TODO (?) This is only necessary because of the order of setting minwkid and minani.
|
jpayne@68
|
345 // //minWKID can be deterministically determined from minANI so if it is set correctly this can be skipped.
|
jpayne@68
|
346 // if(minANI>0){
|
jpayne@68
|
347 // final float ani=wkidToAni(xkid, a.k1Fraction());
|
jpayne@68
|
348 // if(ani<minANI){return null;}
|
jpayne@68
|
349 // }
|
jpayne@68
|
350 // }
|
jpayne@68
|
351 //
|
jpayne@68
|
352 // if(heap!=null && !heap.hasRoom() && heap.peek().hits()>matches){return null;} //TODO: Should be based on score
|
jpayne@68
|
353 //
|
jpayne@68
|
354 //// System.err.print("*");
|
jpayne@68
|
355 // Comparison c=new Comparison(buffer, a, b);
|
jpayne@68
|
356 // if(heap==null || heap.add(c)){return c;}
|
jpayne@68
|
357 // return null;
|
jpayne@68
|
358 // }
|
jpayne@68
|
359
|
jpayne@68
|
360 //TODO: Interestingly, the heap never seems to be created by anything... not sure what it's for.
|
jpayne@68
|
361 private static Comparison compareOneToOne(final Sketch a, final Sketch b, CompareBuffer buffer, AbstractBitSet abs,
|
jpayne@68
|
362 int minHits, float minWKID, float minANI, boolean requireSSU, Heap<Comparison> heap){
|
jpayne@68
|
363 // assert(heap!=null); //Optional, for testing.
|
jpayne@68
|
364 // assert(a.refHitCounts!=null);
|
jpayne@68
|
365 if(a==b && !compareSelf){return null;}
|
jpayne@68
|
366 if(requireSSU && !a.sharesSSU(b)){return null;}
|
jpayne@68
|
367 final int matches=a.countMatches(b, buffer, abs, true/*!makeIndex || !AUTOSIZE*/, null, -1);
|
jpayne@68
|
368 assert(matches==buffer.hits());
|
jpayne@68
|
369 if(matches<minHits){return null;}
|
jpayne@68
|
370
|
jpayne@68
|
371 {
|
jpayne@68
|
372 final float wkid=buffer.wkid();
|
jpayne@68
|
373 if(wkid<minWKID){return null;}
|
jpayne@68
|
374
|
jpayne@68
|
375 if(minANI>0){
|
jpayne@68
|
376 final float ani=buffer.ani();
|
jpayne@68
|
377 if(ani<minANI){return null;}
|
jpayne@68
|
378 }
|
jpayne@68
|
379 }
|
jpayne@68
|
380
|
jpayne@68
|
381 if(heap!=null && !heap.hasRoom() && heap.peek().hits()>matches){return null;} //TODO: Should be based on score
|
jpayne@68
|
382
|
jpayne@68
|
383 // System.err.print("*");
|
jpayne@68
|
384 Comparison c=new Comparison(buffer, a, b);
|
jpayne@68
|
385 if(heap==null || heap.add(c)){return c;}
|
jpayne@68
|
386 return null;
|
jpayne@68
|
387 }
|
jpayne@68
|
388
|
jpayne@68
|
389 public void addRefFiles(String a){
|
jpayne@68
|
390 if(a.equalsIgnoreCase("nr")){
|
jpayne@68
|
391 addRefFiles(NR_PATH());
|
jpayne@68
|
392 if(blacklist==null){blacklist=Blacklist.nrBlacklist();}
|
jpayne@68
|
393 if(defaultParams.dbName==null){defaultParams.dbName="nr";}
|
jpayne@68
|
394 if(!setK){k=defaultKAmino; k2=defaultK2Amino;}
|
jpayne@68
|
395 }else if(a.equalsIgnoreCase("nt")){
|
jpayne@68
|
396 addRefFiles(NT_PATH());
|
jpayne@68
|
397 if(blacklist==null){blacklist=Blacklist.ntBlacklist();}
|
jpayne@68
|
398 if(defaultParams.dbName==null){defaultParams.dbName="nt";}
|
jpayne@68
|
399 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
400 }else if(a.equalsIgnoreCase("refseq")){
|
jpayne@68
|
401 addRefFiles(REFSEQ_PATH());
|
jpayne@68
|
402 if(blacklist==null){blacklist=Blacklist.refseqBlacklist();}
|
jpayne@68
|
403 if(defaultParams.dbName==null){defaultParams.dbName="RefSeq";}
|
jpayne@68
|
404 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
405 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=2.0f;}
|
jpayne@68
|
406 }else if(a.equalsIgnoreCase("refseqbig")){
|
jpayne@68
|
407 addRefFiles(REFSEQ_PATH_BIG());
|
jpayne@68
|
408 if(blacklist==null){blacklist=Blacklist.refseqBlacklist();}
|
jpayne@68
|
409 if(defaultParams.dbName==null){defaultParams.dbName="RefSeq";}
|
jpayne@68
|
410 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
411 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=4.5f;}
|
jpayne@68
|
412 }else if(a.equalsIgnoreCase("silva")){
|
jpayne@68
|
413 // TaxTree.SILVA_MODE=Parse.parseBoolean(b);
|
jpayne@68
|
414 addRefFiles(SILVA_PATH());
|
jpayne@68
|
415 if(blacklist==null){blacklist=Blacklist.silvaBlacklist();}
|
jpayne@68
|
416 if(defaultParams.dbName==null){defaultParams.dbName="Silva";}
|
jpayne@68
|
417 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
418 }else if(a.equalsIgnoreCase("img")){
|
jpayne@68
|
419 addRefFiles(IMG_PATH());
|
jpayne@68
|
420 if(blacklist==null){blacklist=Blacklist.imgBlacklist();}
|
jpayne@68
|
421 if(defaultParams.dbName==null){defaultParams.dbName="IMG";}
|
jpayne@68
|
422 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
423 }else if(a.equalsIgnoreCase("prokprot") || a.equalsIgnoreCase("protein")){
|
jpayne@68
|
424 addRefFiles(PROKPROT_PATH());
|
jpayne@68
|
425 if(blacklist==null){blacklist=Blacklist.prokProtBlacklist();}
|
jpayne@68
|
426 if(defaultParams.dbName==null){defaultParams.dbName="ProkProt";}
|
jpayne@68
|
427 if(!setK){k=defaultKAmino; k2=defaultK2Amino;}
|
jpayne@68
|
428 if(!amino && !translate) {
|
jpayne@68
|
429 translate=true;
|
jpayne@68
|
430 System.err.println("Setting translate to true because a protein dataset is being used.");
|
jpayne@68
|
431 }
|
jpayne@68
|
432 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=3.0f;}
|
jpayne@68
|
433 }else if(a.equalsIgnoreCase("prokprotbig") || a.equalsIgnoreCase("proteinbig")){
|
jpayne@68
|
434 addRefFiles(PROKPROT_PATH_BIG());
|
jpayne@68
|
435 if(blacklist==null){blacklist=Blacklist.prokProtBlacklist();}
|
jpayne@68
|
436 if(defaultParams.dbName==null){defaultParams.dbName="ProkProt";}
|
jpayne@68
|
437 if(!setK){k=defaultKAmino; k2=defaultK2Amino;}
|
jpayne@68
|
438 if(!amino && !translate) {
|
jpayne@68
|
439 translate=true;
|
jpayne@68
|
440 System.err.println("Setting translate to true because a protein dataset is being used.");
|
jpayne@68
|
441 }
|
jpayne@68
|
442 if(!SET_AUTOSIZE_FACTOR){AUTOSIZE_FACTOR=7.5f;}
|
jpayne@68
|
443 }else if(a.equalsIgnoreCase("mito") || a.equalsIgnoreCase("refseqmito")){
|
jpayne@68
|
444 addRefFiles(MITO_PATH());
|
jpayne@68
|
445 if(blacklist==null){blacklist=Blacklist.mitoBlacklist();}
|
jpayne@68
|
446 if(defaultParams.dbName==null){defaultParams.dbName="RefSeqMito";}
|
jpayne@68
|
447 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
448 }else if(a.equalsIgnoreCase("fungi") || a.equalsIgnoreCase("refseqfungi")){
|
jpayne@68
|
449 addRefFiles(FUNGI_PATH());
|
jpayne@68
|
450 if(blacklist==null){blacklist=Blacklist.fungiBlacklist();}
|
jpayne@68
|
451 if(defaultParams.dbName==null){defaultParams.dbName="RefSeqFungi";}
|
jpayne@68
|
452 if(!setK){k=defaultK; k2=defaultK2;}
|
jpayne@68
|
453 }else{
|
jpayne@68
|
454 addFiles(a, refFiles);
|
jpayne@68
|
455 }
|
jpayne@68
|
456 }
|
jpayne@68
|
457
|
jpayne@68
|
458 static void addFiles(String a, Set<String> list){
|
jpayne@68
|
459 if(a==null){return;}
|
jpayne@68
|
460 File f=new File(a);
|
jpayne@68
|
461 assert(!list.contains(a)) : "Duplicate file "+a;
|
jpayne@68
|
462
|
jpayne@68
|
463 if(f.exists()){
|
jpayne@68
|
464 list.add(a);
|
jpayne@68
|
465 }else if(a.indexOf(',')>0){
|
jpayne@68
|
466 for(String s : a.split(",")){addFiles(s, list);}
|
jpayne@68
|
467 }else if(a.indexOf('#')>=0 && new File(a.replaceFirst("#", "0")).exists()){
|
jpayne@68
|
468 for(int i=0; true; i++){
|
jpayne@68
|
469 String temp=a.replaceFirst("#", ""+i);
|
jpayne@68
|
470 if(!new File(temp).exists()){break;}
|
jpayne@68
|
471 list.add(temp);
|
jpayne@68
|
472 }
|
jpayne@68
|
473 }else{
|
jpayne@68
|
474 list.add(a);
|
jpayne@68
|
475 }
|
jpayne@68
|
476 }
|
jpayne@68
|
477
|
jpayne@68
|
478 public void makeIndex(){
|
jpayne@68
|
479 assert(index==null);
|
jpayne@68
|
480 index=new SketchIndex(refSketches);
|
jpayne@68
|
481 index.load();
|
jpayne@68
|
482 }
|
jpayne@68
|
483
|
jpayne@68
|
484 public void loadReferences(int mode_, DisplayParams params){
|
jpayne@68
|
485 loadReferences(mode_, params.minKeyOccuranceCount, params.minEntropy, params.minProb, params.minQual);
|
jpayne@68
|
486 }
|
jpayne@68
|
487
|
jpayne@68
|
488 public void loadReferences(int mode_, int minKeyOccuranceCount, float minEntropy, float minProb, byte minQual) {
|
jpayne@68
|
489 makeTool(minKeyOccuranceCount, false, false);
|
jpayne@68
|
490 refSketches=tool.loadSketches_MT(mode_, 1f, -1, minEntropy, minProb, minQual, refFiles);
|
jpayne@68
|
491 assert(refSketches!=null) : refFiles;
|
jpayne@68
|
492 if(mode_==PER_FILE){
|
jpayne@68
|
493 Collections.sort(refSketches, SketchIdComparator.comparator);
|
jpayne@68
|
494 }
|
jpayne@68
|
495 taxIDToSketchIDMap=new IntHashMap(Tools.max(3, (int)(refSketches.size()*1.2f)));
|
jpayne@68
|
496 for(int i=0; i<refSketches.size(); i++){
|
jpayne@68
|
497 Sketch sk=refSketches.get(i);
|
jpayne@68
|
498 if(sk!=null && sk.taxID>0){
|
jpayne@68
|
499 taxIDToSketchIDMap.set(sk.taxID, i);
|
jpayne@68
|
500 }
|
jpayne@68
|
501 }
|
jpayne@68
|
502 // System.err.println("Sketches: "+refSketches.get(0).name());
|
jpayne@68
|
503 if(makeIndex){
|
jpayne@68
|
504 makeIndex();
|
jpayne@68
|
505 }
|
jpayne@68
|
506 }
|
jpayne@68
|
507
|
jpayne@68
|
508 public void makeTool(int minKeyOccuranceCount, boolean trackCounts, boolean mergePairs){
|
jpayne@68
|
509 if(tool==null){
|
jpayne@68
|
510 tool=new SketchTool(targetSketchSize, minKeyOccuranceCount, trackCounts, mergePairs);
|
jpayne@68
|
511 }
|
jpayne@68
|
512 }
|
jpayne@68
|
513
|
jpayne@68
|
514 public ArrayList<Sketch> loadSketchesFromString(String sketchString){
|
jpayne@68
|
515 return tool.loadSketchesFromString(sketchString);
|
jpayne@68
|
516 }
|
jpayne@68
|
517
|
jpayne@68
|
518 public int refFileCount(){return refFiles==null ? 0 : refFiles.size();}
|
jpayne@68
|
519 public int refSketchCount(){return refSketches==null ? 0 : refSketches.size();}
|
jpayne@68
|
520
|
jpayne@68
|
521 public Sketch findReferenceSketch(int taxID){
|
jpayne@68
|
522 if(taxID<1){return null;}
|
jpayne@68
|
523 int skid=taxIDToSketchIDMap.get(taxID);
|
jpayne@68
|
524 return skid<0 ? null : refSketches.get(skid);
|
jpayne@68
|
525 }
|
jpayne@68
|
526
|
jpayne@68
|
527 /*--------------------------------------------------------------*/
|
jpayne@68
|
528
|
jpayne@68
|
529 public SketchIndex index=null;
|
jpayne@68
|
530 public boolean autoIndex=true;
|
jpayne@68
|
531
|
jpayne@68
|
532 public SketchTool tool=null;
|
jpayne@68
|
533 public ArrayList<Sketch> refSketches;
|
jpayne@68
|
534 LinkedHashSet<String> refFiles=new LinkedHashSet<String>();
|
jpayne@68
|
535 /** For ref sketch lookups by TaxID */
|
jpayne@68
|
536 private IntHashMap taxIDToSketchIDMap;
|
jpayne@68
|
537 public int threads=Shared.threads();
|
jpayne@68
|
538 boolean verbose;
|
jpayne@68
|
539 boolean errorState=false;
|
jpayne@68
|
540 AtomicLong comparisons=new AtomicLong(0);
|
jpayne@68
|
541
|
jpayne@68
|
542 int minLevelExtended=-1;
|
jpayne@68
|
543
|
jpayne@68
|
544 }
|