jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.concurrent.atomic.AtomicInteger;
|
jpayne@68
|
5 import java.util.concurrent.atomic.AtomicLong;
|
jpayne@68
|
6
|
jpayne@68
|
7 import kmer.AbstractKmerTable;
|
jpayne@68
|
8 import kmer.HashBuffer;
|
jpayne@68
|
9 import kmer.KmerTableSet;
|
jpayne@68
|
10 import shared.Shared;
|
jpayne@68
|
11 import shared.Timer;
|
jpayne@68
|
12 import shared.Tools;
|
jpayne@68
|
13 import structures.AbstractBitSet;
|
jpayne@68
|
14 import structures.IntHashMap;
|
jpayne@68
|
15 import structures.IntHashSetList;
|
jpayne@68
|
16 import structures.IntList;
|
jpayne@68
|
17
|
jpayne@68
|
18 public class SketchIndex extends SketchObject {
|
jpayne@68
|
19
|
jpayne@68
|
20 public SketchIndex(ArrayList<Sketch> refs){
|
jpayne@68
|
21 refSketches=refs;
|
jpayne@68
|
22 tables=new KmerTableSet(new String[] {"ways="+WAYS, "tabletype="+AbstractKmerTable.ARRAYHF, "prealloc="+(prealloc>0 ? ""+prealloc : "f")},
|
jpayne@68
|
23 20+(defaultParams.trackCounts() ? 4 : 0)+18);//An extra 18 bytes per kmer because a lot of them occur multiple times
|
jpayne@68
|
24 tables.allocateTables();
|
jpayne@68
|
25 tableArray=tables.tables();
|
jpayne@68
|
26 }
|
jpayne@68
|
27
|
jpayne@68
|
28 public void load(){
|
jpayne@68
|
29 spawnIndexThreads();
|
jpayne@68
|
30 if(useWhitelist){
|
jpayne@68
|
31 assert(!Whitelist.exists());
|
jpayne@68
|
32 Whitelist.initialize(tableArray);
|
jpayne@68
|
33 }
|
jpayne@68
|
34 }
|
jpayne@68
|
35
|
jpayne@68
|
36 /** Spawn index threads */
|
jpayne@68
|
37 private void spawnIndexThreads(){
|
jpayne@68
|
38
|
jpayne@68
|
39 //Do anything necessary prior to processing
|
jpayne@68
|
40
|
jpayne@68
|
41 //Determine how many threads may be used
|
jpayne@68
|
42 final int threads=Shared.threads();
|
jpayne@68
|
43 ArrayList<IndexThread> alht=new ArrayList<IndexThread>(threads);
|
jpayne@68
|
44 AtomicInteger ai=new AtomicInteger(0);
|
jpayne@68
|
45 AtomicLong totalKeys=new AtomicLong(0);
|
jpayne@68
|
46 AtomicLong uniqueKeys=new AtomicLong(0);
|
jpayne@68
|
47 for(int i=0; i<threads; i++){
|
jpayne@68
|
48 alht.add(new IndexThread(ai, totalKeys, uniqueKeys));
|
jpayne@68
|
49 }
|
jpayne@68
|
50
|
jpayne@68
|
51 //Start the threads
|
jpayne@68
|
52 for(IndexThread pt : alht){
|
jpayne@68
|
53 pt.start();
|
jpayne@68
|
54 }
|
jpayne@68
|
55
|
jpayne@68
|
56 //Wait for completion of all threads
|
jpayne@68
|
57 boolean success=true;
|
jpayne@68
|
58 long codesProcessed=0;
|
jpayne@68
|
59 for(IndexThread pt : alht){
|
jpayne@68
|
60
|
jpayne@68
|
61 //Wait until this thread has terminated
|
jpayne@68
|
62 while(pt.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
63 try {
|
jpayne@68
|
64 //Attempt a join operation
|
jpayne@68
|
65 pt.join();
|
jpayne@68
|
66 synchronized(pt){
|
jpayne@68
|
67 codesProcessed+=pt.codesProcessedT;
|
jpayne@68
|
68 }
|
jpayne@68
|
69 } catch (InterruptedException e) {
|
jpayne@68
|
70 //Potentially handle this, if it is expected to occur
|
jpayne@68
|
71 e.printStackTrace();
|
jpayne@68
|
72 }
|
jpayne@68
|
73 }
|
jpayne@68
|
74 success&=pt.success;
|
jpayne@68
|
75 }
|
jpayne@68
|
76
|
jpayne@68
|
77 //Track whether any threads failed
|
jpayne@68
|
78 if(!success){errorState=true;}
|
jpayne@68
|
79
|
jpayne@68
|
80 System.err.println("Indexed "+uniqueKeys+" unique and "+totalKeys+" total hashcodes."); //For some reason codesProcessed is nondeterministic.
|
jpayne@68
|
81
|
jpayne@68
|
82 //Do anything necessary after processing
|
jpayne@68
|
83 // System.gc();
|
jpayne@68
|
84 }
|
jpayne@68
|
85
|
jpayne@68
|
86 /*--------------------------------------------------------------*/
|
jpayne@68
|
87
|
jpayne@68
|
88 public SketchResults getSketches(Sketch a, DisplayParams params){
|
jpayne@68
|
89 if(useIntMap){
|
jpayne@68
|
90 return getSketchesMap(a, params);
|
jpayne@68
|
91 }else{
|
jpayne@68
|
92 return getSketchesList(a, params);
|
jpayne@68
|
93 }
|
jpayne@68
|
94 }
|
jpayne@68
|
95
|
jpayne@68
|
96 /** Return true if added. */
|
jpayne@68
|
97 private boolean addToTaxSet(int sketchID, IntHashSetList taxSet, int taxLevelExtended){
|
jpayne@68
|
98 Sketch sk=refSketches.get(sketchID);
|
jpayne@68
|
99 int taxID=sk.taxID;
|
jpayne@68
|
100 if(taxID<0 || taxID>=minFakeID){return false;}
|
jpayne@68
|
101 taxID=taxtree.getIdAtLevelExtended(taxID, taxLevelExtended);
|
jpayne@68
|
102 return taxSet.add(taxID);
|
jpayne@68
|
103 }
|
jpayne@68
|
104
|
jpayne@68
|
105 public SketchResults getSketchesList(final Sketch a, DisplayParams params){
|
jpayne@68
|
106 final int minHits=params.minHits, contamLevel=params.contamLevel();
|
jpayne@68
|
107 final boolean countContamHits=params.needContamCounts();//, metaFilter=params.hasMetaFilters(), taxFilter=params.hasTaxFilters();
|
jpayne@68
|
108 final Timer t=(printTime ? new Timer() : null);
|
jpayne@68
|
109
|
jpayne@68
|
110 final int[] singleton=new int[1];
|
jpayne@68
|
111 final IntList idList=new IntList(Tools.min(targetSketchSize, indexLimit, 1000));
|
jpayne@68
|
112 AbstractBitSet abs=a.indexBitSet();
|
jpayne@68
|
113 assert((abs==null)!=countContamHits);
|
jpayne@68
|
114
|
jpayne@68
|
115 final IntHashSetList taxSet;
|
jpayne@68
|
116 final int[][] taxHits;
|
jpayne@68
|
117 if(contamLevel>=0){
|
jpayne@68
|
118 taxSet=new IntHashSetList(31);
|
jpayne@68
|
119 taxHits=new int[a.length()][];
|
jpayne@68
|
120 assert(taxtree!=null) : "A TaxTree is required for this operation.";
|
jpayne@68
|
121 }else{
|
jpayne@68
|
122 taxSet=null;
|
jpayne@68
|
123 taxHits=null;
|
jpayne@68
|
124 }
|
jpayne@68
|
125
|
jpayne@68
|
126 for(int i=0; i<a.keys.length; i++){
|
jpayne@68
|
127 long key=a.keys[i];
|
jpayne@68
|
128 AbstractKmerTable set=tableArray[(int)(key%WAYS)];
|
jpayne@68
|
129 // System.err.println(set.getValue(key));
|
jpayne@68
|
130 final int[] ids=set.getValues(key, singleton);
|
jpayne@68
|
131 // System.err.println(Arrays.toString(ids));
|
jpayne@68
|
132 if(ids!=null && ids[0]>=0){
|
jpayne@68
|
133 int incr=0;
|
jpayne@68
|
134 for(int id : ids){
|
jpayne@68
|
135 if(id>=0){
|
jpayne@68
|
136 final int trueID=id-1;//Minimum id is 1, indicating sketch 0.
|
jpayne@68
|
137 idList.add(trueID);//Minimum id is 1, indicating sketch 0.
|
jpayne@68
|
138 incr++;
|
jpayne@68
|
139 if(taxSet!=null){addToTaxSet(trueID, taxSet, contamLevel);}
|
jpayne@68
|
140 }
|
jpayne@68
|
141 }
|
jpayne@68
|
142 if(countContamHits && incr>0){abs.increment(i, incr);}
|
jpayne@68
|
143 if(taxSet!=null && taxSet.size()>0){
|
jpayne@68
|
144 taxHits[i]=taxSet.toArray();
|
jpayne@68
|
145 taxSet.clear();
|
jpayne@68
|
146 }
|
jpayne@68
|
147 }
|
jpayne@68
|
148
|
jpayne@68
|
149 }
|
jpayne@68
|
150
|
jpayne@68
|
151 // assert(abs!=null);
|
jpayne@68
|
152 // assert(false) : abs.cardinality();
|
jpayne@68
|
153
|
jpayne@68
|
154 if(printTime){
|
jpayne@68
|
155 t.stop("\nTime for searching index: \t");
|
jpayne@68
|
156 t.start();
|
jpayne@68
|
157 }
|
jpayne@68
|
158
|
jpayne@68
|
159 // System.err.println("idList.size:"+idList.size);
|
jpayne@68
|
160 if(idList.size<minHits){return new SketchResults(a);}//null breaks some things
|
jpayne@68
|
161 idList.sort();
|
jpayne@68
|
162
|
jpayne@68
|
163 if(printTime){
|
jpayne@68
|
164 t.stop("Time for sorting "+idList.size()+" hits:\t");
|
jpayne@68
|
165 t.start();
|
jpayne@68
|
166 }
|
jpayne@68
|
167
|
jpayne@68
|
168 ArrayList<Sketch> list=new ArrayList<Sketch>(Tools.min(8, idList.size));
|
jpayne@68
|
169
|
jpayne@68
|
170 int last=-1;
|
jpayne@68
|
171 int hits=0;
|
jpayne@68
|
172 for(int i=0; i<idList.size; i++){
|
jpayne@68
|
173 int id=idList.get(i);
|
jpayne@68
|
174 if(id==last){
|
jpayne@68
|
175 // System.err.println("A: "+last+", "+id+", "+count+", "+minHits);
|
jpayne@68
|
176 hits++;
|
jpayne@68
|
177 }else{
|
jpayne@68
|
178 // System.err.println("B: "+last+", "+id+", "+count+", "+minHits);
|
jpayne@68
|
179 if(last>-1 && (hits>=minHits)){
|
jpayne@68
|
180 final Sketch ref=refSketches.get(last);
|
jpayne@68
|
181 list.add(ref);
|
jpayne@68
|
182 }
|
jpayne@68
|
183 last=id;
|
jpayne@68
|
184 hits=0;
|
jpayne@68
|
185 }
|
jpayne@68
|
186 }
|
jpayne@68
|
187 if(last>-1 && (hits>=minHits)){
|
jpayne@68
|
188 final Sketch ref=refSketches.get(last);
|
jpayne@68
|
189 // if((!metaFilter || ref.passesMeta(params)) && (!taxFilter || params.passesFilter(ref))){list.add(ref);}
|
jpayne@68
|
190 list.add(ref);
|
jpayne@68
|
191 }
|
jpayne@68
|
192 if(printTime){
|
jpayne@68
|
193 t.stop("Time for fetching sketches: \t");
|
jpayne@68
|
194 }
|
jpayne@68
|
195 return list.isEmpty() ? new SketchResults(a) : new SketchResults(a, list, taxHits);
|
jpayne@68
|
196 }
|
jpayne@68
|
197
|
jpayne@68
|
198 // static ThreadLocal<IntHashMap> intMapHolder=new ThreadLocal<IntHashMap>();
|
jpayne@68
|
199
|
jpayne@68
|
200 public final int[] getSketchIdsMap(long key, int[] singleton){
|
jpayne@68
|
201 AbstractKmerTable set=tableArray[(int)(key%WAYS)];
|
jpayne@68
|
202 final int[] ids=set.getValues(key, singleton);
|
jpayne@68
|
203 return ids;
|
jpayne@68
|
204 }
|
jpayne@68
|
205
|
jpayne@68
|
206 public SketchResults getSketchesMap(final Sketch a, DisplayParams params){
|
jpayne@68
|
207 final int minHits=params.minHits, contamLevel=params.contamLevel();
|
jpayne@68
|
208 final boolean countContamHits=params.needContamCounts();//, metaFilter=params.hasMetaFilters(), taxFilter=params.hasTaxFilters();
|
jpayne@68
|
209 final Timer t=(printTime ? new Timer() : null);
|
jpayne@68
|
210 final int[] singleton=new int[1];
|
jpayne@68
|
211
|
jpayne@68
|
212 final IntHashMap idMap=new IntHashMap(Tools.min(targetSketchSize, indexLimit, intMapSize), 0.7f);
|
jpayne@68
|
213
|
jpayne@68
|
214 AbstractBitSet abs=a.indexBitSet();
|
jpayne@68
|
215 assert((abs==null)!=countContamHits);
|
jpayne@68
|
216
|
jpayne@68
|
217 final IntHashSetList taxSet;
|
jpayne@68
|
218 final int[][] taxHits;
|
jpayne@68
|
219 if(contamLevel>=0){
|
jpayne@68
|
220 taxSet=new IntHashSetList(31);
|
jpayne@68
|
221 taxHits=new int[a.length()][];
|
jpayne@68
|
222 assert(taxtree!=null) : "A TaxTree is required for this operation.";
|
jpayne@68
|
223 }else{
|
jpayne@68
|
224 taxSet=null;
|
jpayne@68
|
225 taxHits=null;
|
jpayne@68
|
226 }
|
jpayne@68
|
227 // assert(false) : (taxHits==null)+", "+contamLevel;
|
jpayne@68
|
228
|
jpayne@68
|
229 if(printTime){
|
jpayne@68
|
230 t.stop("\nTime for allocation: \t");
|
jpayne@68
|
231 t.start();
|
jpayne@68
|
232 }
|
jpayne@68
|
233
|
jpayne@68
|
234 int[] refHitCounts;
|
jpayne@68
|
235 if(params.printRefHits){
|
jpayne@68
|
236 refHitCounts=new int[a.keys.length];
|
jpayne@68
|
237 a.setRefHitCounts(refHitCounts);
|
jpayne@68
|
238 }else{refHitCounts=null;}
|
jpayne@68
|
239
|
jpayne@68
|
240 for(int i=0; i<a.keys.length; i++){
|
jpayne@68
|
241 long key=a.keys[i];
|
jpayne@68
|
242 final int[] ids=getSketchIdsMap(key, singleton);
|
jpayne@68
|
243
|
jpayne@68
|
244 if(ids!=null && ids[0]>=0){
|
jpayne@68
|
245 int incr=0;
|
jpayne@68
|
246 for(int id : ids){
|
jpayne@68
|
247 if(id>=0){
|
jpayne@68
|
248 final int trueID=id-1;//Minimum id is 1, indicating sketch 0.
|
jpayne@68
|
249 idMap.increment(trueID);
|
jpayne@68
|
250 if(!allToAll || compareSelf){incr++;}
|
jpayne@68
|
251 if(taxSet!=null){addToTaxSet(trueID, taxSet, contamLevel);}
|
jpayne@68
|
252 if(refHitCounts!=null && trueID!=a.sketchID){refHitCounts[i]++;}
|
jpayne@68
|
253 }
|
jpayne@68
|
254 }
|
jpayne@68
|
255 if(countContamHits && incr>0){abs.increment(i, incr);}
|
jpayne@68
|
256 if(taxSet!=null && taxSet.size()>0){
|
jpayne@68
|
257 taxHits[i]=taxSet.toArray();
|
jpayne@68
|
258 taxSet.clear();
|
jpayne@68
|
259 }
|
jpayne@68
|
260 }
|
jpayne@68
|
261 }
|
jpayne@68
|
262
|
jpayne@68
|
263 if(printTime){
|
jpayne@68
|
264 t.stop("Time for searching index: \t");
|
jpayne@68
|
265 System.err.println("Size: \t"+idMap.size());
|
jpayne@68
|
266 t.start();
|
jpayne@68
|
267 }
|
jpayne@68
|
268
|
jpayne@68
|
269 // System.err.println("idList.size:"+idList.size);
|
jpayne@68
|
270 final int size=idMap.size();
|
jpayne@68
|
271 if(size==0){return new SketchResults(a);}//null breaks some things
|
jpayne@68
|
272
|
jpayne@68
|
273 ArrayList<Sketch> list=new ArrayList<Sketch>(Tools.min(8, size));
|
jpayne@68
|
274
|
jpayne@68
|
275 final int[] keys=idMap.keys();
|
jpayne@68
|
276 final int[] values=idMap.values();
|
jpayne@68
|
277
|
jpayne@68
|
278 for(int i=0; i<keys.length; i++){
|
jpayne@68
|
279 int value=values[i];
|
jpayne@68
|
280 if(value>=minHits){
|
jpayne@68
|
281 int id=keys[i];
|
jpayne@68
|
282 final Sketch ref=refSketches.get(id);
|
jpayne@68
|
283 // if((!metaFilter || ref.passesMeta(params)) && (!taxFilter || params.passesFilter(ref))){list.add(ref);}
|
jpayne@68
|
284 list.add(ref);
|
jpayne@68
|
285 }
|
jpayne@68
|
286 }
|
jpayne@68
|
287 if(printTime){
|
jpayne@68
|
288 t.stop("Time for fetching sketches: \t");
|
jpayne@68
|
289 }
|
jpayne@68
|
290 return list.isEmpty() ? new SketchResults(a) : new SketchResults(a, list, taxHits);
|
jpayne@68
|
291 }
|
jpayne@68
|
292
|
jpayne@68
|
293 /*--------------------------------------------------------------*/
|
jpayne@68
|
294
|
jpayne@68
|
295 public class IndexThread extends Thread {
|
jpayne@68
|
296
|
jpayne@68
|
297 public IndexThread(AtomicInteger nextIndex_, AtomicLong keyCount_, AtomicLong uniqueKeyCount_){
|
jpayne@68
|
298 buffer=new HashBuffer(tableArray, 1000, 31, true, false);
|
jpayne@68
|
299 nextIndex=nextIndex_;
|
jpayne@68
|
300 keyCount=keyCount_;
|
jpayne@68
|
301 uniqueKeyCount=uniqueKeyCount_;
|
jpayne@68
|
302 }
|
jpayne@68
|
303
|
jpayne@68
|
304 @Override
|
jpayne@68
|
305 public void run(){
|
jpayne@68
|
306 // System.err.println("Thread running.");
|
jpayne@68
|
307 int id=nextIndex.getAndIncrement();
|
jpayne@68
|
308 final int numSketches=refSketches.size();
|
jpayne@68
|
309 final int limit0=Tools.min((AUTOSIZE || AUTOSIZE_LINEAR ? Integer.MAX_VALUE : targetSketchSize), indexLimit);
|
jpayne@68
|
310 // System.err.println("numSketches="+numSketches);
|
jpayne@68
|
311 while(id<numSketches){
|
jpayne@68
|
312 final Sketch sk=refSketches.get(id);
|
jpayne@68
|
313 final long[] array=sk.keys;
|
jpayne@68
|
314 final int limit=Tools.min(array.length, limit0);
|
jpayne@68
|
315 // System.err.println("limit="+limit);
|
jpayne@68
|
316 for(int i=0; i<limit; i++){
|
jpayne@68
|
317 long key=array[i];
|
jpayne@68
|
318 buffer.set(key, id+1);//Must be greater than zero
|
jpayne@68
|
319 codesProcessedT++;
|
jpayne@68
|
320 }
|
jpayne@68
|
321 id=nextIndex.getAndIncrement();
|
jpayne@68
|
322 }
|
jpayne@68
|
323 long temp=buffer.flush();
|
jpayne@68
|
324
|
jpayne@68
|
325 synchronized(this){
|
jpayne@68
|
326 codesProcessedT+=0;
|
jpayne@68
|
327 success=true;
|
jpayne@68
|
328 keyCount.getAndAdd(codesProcessedT);
|
jpayne@68
|
329 uniqueKeyCount.getAndAdd(buffer.uniqueAdded);
|
jpayne@68
|
330 // if(codesProcessedT>0){System.err.println(codesProcessedT);}
|
jpayne@68
|
331 }
|
jpayne@68
|
332 }
|
jpayne@68
|
333
|
jpayne@68
|
334 AtomicInteger nextIndex;
|
jpayne@68
|
335 AtomicLong keyCount;
|
jpayne@68
|
336 AtomicLong uniqueKeyCount;
|
jpayne@68
|
337 long codesProcessedT=0;
|
jpayne@68
|
338 HashBuffer buffer;
|
jpayne@68
|
339 boolean success=false;
|
jpayne@68
|
340
|
jpayne@68
|
341 }
|
jpayne@68
|
342
|
jpayne@68
|
343 /*--------------------------------------------------------------*/
|
jpayne@68
|
344
|
jpayne@68
|
345 public final KmerTableSet tables;
|
jpayne@68
|
346 public final AbstractKmerTable[] tableArray;
|
jpayne@68
|
347 public final ArrayList<Sketch> refSketches;
|
jpayne@68
|
348
|
jpayne@68
|
349 public boolean errorState=false;
|
jpayne@68
|
350
|
jpayne@68
|
351 private static final boolean printTime=false;
|
jpayne@68
|
352 public static boolean useIntMap=true;
|
jpayne@68
|
353 // public static boolean useIntMapBinary=false;
|
jpayne@68
|
354 public static int intMapSize=1000;
|
jpayne@68
|
355 public static int indexLimit=Integer.MAX_VALUE;
|
jpayne@68
|
356 public static final int WAYS=31;
|
jpayne@68
|
357
|
jpayne@68
|
358 }
|