jpayne@68
|
1 package kmer;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.Arrays;
|
jpayne@68
|
4 import java.util.concurrent.atomic.AtomicIntegerArray;
|
jpayne@68
|
5 import java.util.concurrent.atomic.AtomicLong;
|
jpayne@68
|
6 import java.util.concurrent.locks.Lock;
|
jpayne@68
|
7 import java.util.concurrent.locks.ReentrantLock;
|
jpayne@68
|
8
|
jpayne@68
|
9 import fileIO.ByteStreamWriter;
|
jpayne@68
|
10 import fileIO.TextStreamWriter;
|
jpayne@68
|
11 import shared.Primes;
|
jpayne@68
|
12 import shared.Tools;
|
jpayne@68
|
13 import structures.ByteBuilder;
|
jpayne@68
|
14 import structures.SuperLongList;
|
jpayne@68
|
15
|
jpayne@68
|
16 /**
|
jpayne@68
|
17 * Stores kmers in a long[] and values in an int[][], with a victim cache.
|
jpayne@68
|
18 * @author Brian Bushnell
|
jpayne@68
|
19 * @date Nov 7, 2014
|
jpayne@68
|
20 *
|
jpayne@68
|
21 */
|
jpayne@68
|
22 public abstract class HashArray extends AbstractKmerTable {
|
jpayne@68
|
23
|
jpayne@68
|
24 /*--------------------------------------------------------------*/
|
jpayne@68
|
25 /*---------------- Initialization ----------------*/
|
jpayne@68
|
26 /*--------------------------------------------------------------*/
|
jpayne@68
|
27
|
jpayne@68
|
28 HashArray(int[] schedule_, long coreMask_, boolean twod_){
|
jpayne@68
|
29 schedule=schedule_;
|
jpayne@68
|
30 autoResize=schedule.length>1;
|
jpayne@68
|
31 prime=schedule[0];
|
jpayne@68
|
32
|
jpayne@68
|
33 sizeLimit=(long)((schedule.length==1 ? maxLoadFactorFinal : maxLoadFactor)*prime);
|
jpayne@68
|
34 array=allocLong1D(prime+extra);
|
jpayne@68
|
35 victims=new HashForest(Tools.max(10, prime/victimRatio), autoResize, twod_);
|
jpayne@68
|
36 Arrays.fill(array, NOT_PRESENT);
|
jpayne@68
|
37 twoD=twod_;
|
jpayne@68
|
38 coreMask=coreMask_;
|
jpayne@68
|
39 // coreMask2=coreMask_|3;
|
jpayne@68
|
40 coreMask2=coreMask_; //Simplifies fast fill
|
jpayne@68
|
41 }
|
jpayne@68
|
42
|
jpayne@68
|
43 HashArray(int initialSize, long coreMask_, boolean autoResize_, boolean twod_){
|
jpayne@68
|
44 schedule=null;
|
jpayne@68
|
45 prime=initialSize;
|
jpayne@68
|
46 sizeLimit=(long)(maxLoadFactor*prime);
|
jpayne@68
|
47 array=allocLong1D(prime+extra);
|
jpayne@68
|
48 victims=new HashForest(Tools.max(10, initialSize/victimRatio), autoResize_, twod_);
|
jpayne@68
|
49 Arrays.fill(array, NOT_PRESENT);
|
jpayne@68
|
50 autoResize=autoResize_;
|
jpayne@68
|
51 twoD=twod_;
|
jpayne@68
|
52 coreMask=coreMask_;
|
jpayne@68
|
53 // coreMask2=coreMask_|3;
|
jpayne@68
|
54 coreMask2=coreMask_; //Simplifies fast fill
|
jpayne@68
|
55 }
|
jpayne@68
|
56
|
jpayne@68
|
57 /*--------------------------------------------------------------*/
|
jpayne@68
|
58 /*---------------- Public Methods ----------------*/
|
jpayne@68
|
59 /*--------------------------------------------------------------*/
|
jpayne@68
|
60
|
jpayne@68
|
61 /*--------------------------------------------------------------*/
|
jpayne@68
|
62 /*---------------- Test Methods ----------------*/
|
jpayne@68
|
63 /*--------------------------------------------------------------*/
|
jpayne@68
|
64
|
jpayne@68
|
65 // public final int set_Test(final long kmer, final int v){
|
jpayne@68
|
66 // assert(TESTMODE);
|
jpayne@68
|
67 // final int x;
|
jpayne@68
|
68 // if(TWOD){
|
jpayne@68
|
69 // int[] old=getValues(kmer, new int[1]);
|
jpayne@68
|
70 // assert(old==null || contains(kmer, old));
|
jpayne@68
|
71 // if(verbose){System.err.println("Fetched "+Arrays.toString(old));}
|
jpayne@68
|
72 // x=set0(kmer, v);
|
jpayne@68
|
73 // assert(old==null || contains(kmer, old)) : "old="+Arrays.toString(old)+", v="+v+", kmer="+kmer+
|
jpayne@68
|
74 // ", get(kmer)="+(Arrays.toString(getValues(kmer, new int[1])));
|
jpayne@68
|
75 // assert(contains(kmer, v));
|
jpayne@68
|
76 // }else{
|
jpayne@68
|
77 // int old=getValue(kmer);
|
jpayne@68
|
78 // assert(old==0 || old==-1 || contains(kmer, old));
|
jpayne@68
|
79 // x=set0(kmer, v);
|
jpayne@68
|
80 // assert(contains(kmer, v)) : "old="+old+", v="+v+", kmer="+kmer+", get(kmer)="+getValue(kmer);
|
jpayne@68
|
81 // assert(v==old || !contains(kmer, old));
|
jpayne@68
|
82 // }
|
jpayne@68
|
83 // return x;
|
jpayne@68
|
84 // }
|
jpayne@68
|
85 //
|
jpayne@68
|
86 // public final int set_Test(final long kmer, final int v[]){
|
jpayne@68
|
87 // assert(TESTMODE);
|
jpayne@68
|
88 // final int x;
|
jpayne@68
|
89 // if(TWOD){
|
jpayne@68
|
90 // final int[] singleton=new int[1];
|
jpayne@68
|
91 // int[] old=getValues(kmer, singleton);
|
jpayne@68
|
92 // assert(old==null || contains(kmer, old));
|
jpayne@68
|
93 // if(verbose){System.err.println("Before: old="+Arrays.toString(old)+", v="+Arrays.toString(v));}
|
jpayne@68
|
94 // x=set0(kmer, v);
|
jpayne@68
|
95 // if(verbose){System.err.println("After: old="+Arrays.toString(old)+", v="+Arrays.toString(v)+", get()="+Arrays.toString(getValues(kmer, singleton)));}
|
jpayne@68
|
96 // assert(old==null || contains(kmer, old)) : "old="+Arrays.toString(old)+", v="+Arrays.toString(v)+", kmer="+kmer+
|
jpayne@68
|
97 // ", get(kmer)="+(Arrays.toString(getValues(kmer, new int[1])));
|
jpayne@68
|
98 // assert(contains(kmer, v)) : "old="+Arrays.toString(old)+", v="+Arrays.toString(v)+", kmer="+kmer+
|
jpayne@68
|
99 // ", get(kmer)="+(Arrays.toString(getValues(kmer, new int[1])));
|
jpayne@68
|
100 // }else{
|
jpayne@68
|
101 // int old=getValue(kmer);
|
jpayne@68
|
102 // assert(old==0 || old==-1 || contains(kmer, old));
|
jpayne@68
|
103 // x=set0(kmer, v);
|
jpayne@68
|
104 // assert(contains(kmer, v)) : "old="+old+", v="+v+", kmer="+kmer+", get(kmer)="+getValue(kmer);
|
jpayne@68
|
105 // assert(v[0]==old || !contains(kmer, old));
|
jpayne@68
|
106 // }
|
jpayne@68
|
107 // return x;
|
jpayne@68
|
108 // }
|
jpayne@68
|
109 //
|
jpayne@68
|
110 // public final int setIfNotPresent_Test(long kmer, int v){
|
jpayne@68
|
111 // assert(TESTMODE);
|
jpayne@68
|
112 // final int x;
|
jpayne@68
|
113 // if(TWOD){
|
jpayne@68
|
114 //// int[] vals=getValues(kmer, null);
|
jpayne@68
|
115 //// assert(vals==null || contains(kmer, vals));
|
jpayne@68
|
116 //// x=setIfNotPresent(kmer, v);
|
jpayne@68
|
117 //// assert(contains(kmer, vals));
|
jpayne@68
|
118 //// assert(contains(kmer, v));
|
jpayne@68
|
119 // x=0;
|
jpayne@68
|
120 // assert(false);
|
jpayne@68
|
121 // }else{
|
jpayne@68
|
122 // int old=getValue(kmer);
|
jpayne@68
|
123 // assert(old==0 || old==-1 || contains(kmer, old));
|
jpayne@68
|
124 // x=setIfNotPresent0(kmer, v);
|
jpayne@68
|
125 // assert((old<1 && contains(kmer, v)) || (old>0 && contains(kmer, old))) : kmer+", "+old+", "+v;
|
jpayne@68
|
126 // }
|
jpayne@68
|
127 // return x;
|
jpayne@68
|
128 // }
|
jpayne@68
|
129
|
jpayne@68
|
130 /*--------------------------------------------------------------*/
|
jpayne@68
|
131
|
jpayne@68
|
132 public final int kmerToCell(long kmer){
|
jpayne@68
|
133 final int cell=(int)((kmer&coreMask)%prime);
|
jpayne@68
|
134 return cell;
|
jpayne@68
|
135 }
|
jpayne@68
|
136
|
jpayne@68
|
137 @Override
|
jpayne@68
|
138 public final int set(final long kmer, final int[] v, final int vlen){
|
jpayne@68
|
139 int cell=kmerToCell(kmer);
|
jpayne@68
|
140
|
jpayne@68
|
141 for(final int max=cell+extra; cell<max; cell++){
|
jpayne@68
|
142 long n=array[cell];
|
jpayne@68
|
143 if(n==kmer){
|
jpayne@68
|
144 if(verbose){System.err.println("A2: Adding "+kmer+", "+Arrays.toString(v)+", "+cell);}
|
jpayne@68
|
145 insertValue(kmer, v, cell, vlen);
|
jpayne@68
|
146 if(verbose){System.err.println("A2: getValues("+kmer+") = "+Arrays.toString(getValues(kmer, new int[1])));}
|
jpayne@68
|
147 return 0;
|
jpayne@68
|
148 }else if(n==NOT_PRESENT){
|
jpayne@68
|
149 if(verbose){System.err.println("B2: Adding "+kmer+", "+Arrays.toString(v)+", "+cell);}
|
jpayne@68
|
150 array[cell]=kmer;
|
jpayne@68
|
151 insertValue(kmer, v, cell, vlen);
|
jpayne@68
|
152 if(verbose){System.err.println("B2: getValues("+kmer+") = "+Arrays.toString(getValues(kmer, new int[1])));}
|
jpayne@68
|
153 size++;
|
jpayne@68
|
154 if(autoResize && size+victims.size>sizeLimit){resize();}
|
jpayne@68
|
155 return 1;
|
jpayne@68
|
156 }
|
jpayne@68
|
157 }
|
jpayne@68
|
158 if(verbose){System.err.println("C2: Adding "+kmer+", "+v+", "+cell);}
|
jpayne@68
|
159 final int x=victims.set(kmer, v, vlen);
|
jpayne@68
|
160 if(autoResize && size+victims.size>sizeLimit){resize();}
|
jpayne@68
|
161 if(verbose){System.err.println("C2: getValues("+kmer+") = "+Arrays.toString(getValues(kmer, new int[1])));}
|
jpayne@68
|
162 return x;
|
jpayne@68
|
163 }
|
jpayne@68
|
164
|
jpayne@68
|
165 @Override
|
jpayne@68
|
166 public final int set(final long kmer, final int v){
|
jpayne@68
|
167 int cell=kmerToCell(kmer);
|
jpayne@68
|
168
|
jpayne@68
|
169 // assert(TESTMODE);
|
jpayne@68
|
170 // ll.add(kmer);
|
jpayne@68
|
171 // il.add(v);
|
jpayne@68
|
172
|
jpayne@68
|
173 for(final int max=cell+extra; cell<max; cell++){
|
jpayne@68
|
174 long n=array[cell];
|
jpayne@68
|
175 if(n==kmer){
|
jpayne@68
|
176 if(verbose){System.err.println("A1: Adding "+kmer+", "+v+", "+cell);}
|
jpayne@68
|
177 insertValue(kmer, v, cell);
|
jpayne@68
|
178 if(verbose){System.err.println("A1: getValues("+kmer+") = "+Arrays.toString(getValues(kmer, new int[1])));}
|
jpayne@68
|
179 return 0;
|
jpayne@68
|
180 }else if(n==NOT_PRESENT){
|
jpayne@68
|
181 if(verbose){System.err.println("B1: Adding "+kmer+", "+v+", "+cell);}
|
jpayne@68
|
182 array[cell]=kmer;
|
jpayne@68
|
183 insertValue(kmer, v, cell);
|
jpayne@68
|
184 if(verbose){System.err.println("B1: getValues("+kmer+") = "+Arrays.toString(getValues(kmer, new int[1])));}
|
jpayne@68
|
185 size++;
|
jpayne@68
|
186 if(autoResize && size+victims.size>sizeLimit){resize();}
|
jpayne@68
|
187 return 1;
|
jpayne@68
|
188 }
|
jpayne@68
|
189 }
|
jpayne@68
|
190 if(verbose){System.err.println("C1: Adding "+kmer+", "+v+", "+cell+
|
jpayne@68
|
191 "; victims.get(kmer)="+Arrays.toString(victims.getValues(kmer, new int[1])));}
|
jpayne@68
|
192 final int x=victims.set(kmer, v);
|
jpayne@68
|
193 if(autoResize && size+victims.size>sizeLimit){resize();}
|
jpayne@68
|
194 if(verbose){System.err.println("C1: getValues("+kmer+") = "+Arrays.toString(getValues(kmer, new int[1]))+
|
jpayne@68
|
195 "; victims.get(kmer)="+Arrays.toString(victims.getValues(kmer, new int[1])));}
|
jpayne@68
|
196 return x;
|
jpayne@68
|
197 }
|
jpayne@68
|
198
|
jpayne@68
|
199 @Override
|
jpayne@68
|
200 public final int setIfNotPresent(long kmer, int value){
|
jpayne@68
|
201 int cell=kmerToCell(kmer);
|
jpayne@68
|
202 for(final int max=cell+extra; cell<max; cell++){
|
jpayne@68
|
203 long n=array[cell];
|
jpayne@68
|
204 if(n==kmer){
|
jpayne@68
|
205 return 0;
|
jpayne@68
|
206 }else if(n==NOT_PRESENT){
|
jpayne@68
|
207 array[cell]=kmer;
|
jpayne@68
|
208 insertValue(kmer, value, cell);
|
jpayne@68
|
209 size++;
|
jpayne@68
|
210 if(autoResize && size+victims.size>sizeLimit){resize();}
|
jpayne@68
|
211 return 1;
|
jpayne@68
|
212 }
|
jpayne@68
|
213 }
|
jpayne@68
|
214 // System.err.println("size="+size+", prime="+prime+", limit="+sizeLimit);
|
jpayne@68
|
215 int x=victims.setIfNotPresent(kmer, value);
|
jpayne@68
|
216 if(autoResize && size+victims.size>sizeLimit){resize();}
|
jpayne@68
|
217 return x;
|
jpayne@68
|
218 }
|
jpayne@68
|
219
|
jpayne@68
|
220 @Override
|
jpayne@68
|
221 public final int getValue(long kmer){
|
jpayne@68
|
222 int cell=findKmer(kmer);
|
jpayne@68
|
223 if(cell==NOT_PRESENT){return NOT_PRESENT;}
|
jpayne@68
|
224 if(cell==HASH_COLLISION){return victims.getValue(kmer);}
|
jpayne@68
|
225 return readCellValue(cell);
|
jpayne@68
|
226 }
|
jpayne@68
|
227
|
jpayne@68
|
228 public final int getValue(long kmer, int startCell){
|
jpayne@68
|
229 int cell=findKmer(kmer, startCell);
|
jpayne@68
|
230 if(cell==NOT_PRESENT){return NOT_PRESENT;}
|
jpayne@68
|
231 if(cell==HASH_COLLISION){return victims.getValue(kmer);}
|
jpayne@68
|
232 return readCellValue(cell);
|
jpayne@68
|
233 }
|
jpayne@68
|
234
|
jpayne@68
|
235 @Override
|
jpayne@68
|
236 public final int[] getValues(long kmer, int[] singleton){
|
jpayne@68
|
237 int cell=findKmer(kmer);
|
jpayne@68
|
238 if(cell==NOT_PRESENT){
|
jpayne@68
|
239 singleton[0]=NOT_PRESENT;
|
jpayne@68
|
240 return singleton;
|
jpayne@68
|
241 }
|
jpayne@68
|
242 if(cell==HASH_COLLISION){return victims.getValues(kmer, singleton);}
|
jpayne@68
|
243 return readCellValues(cell, singleton);
|
jpayne@68
|
244 }
|
jpayne@68
|
245
|
jpayne@68
|
246 @Override
|
jpayne@68
|
247 public final boolean contains(long kmer){
|
jpayne@68
|
248 int cell=findKmer(kmer);
|
jpayne@68
|
249 if(cell==NOT_PRESENT){return false;}
|
jpayne@68
|
250 if(cell==HASH_COLLISION){return victims.contains(kmer);}
|
jpayne@68
|
251 return true;
|
jpayne@68
|
252 }
|
jpayne@68
|
253
|
jpayne@68
|
254 public final long getKmer(int cell) {
|
jpayne@68
|
255 return array[cell];
|
jpayne@68
|
256 }
|
jpayne@68
|
257
|
jpayne@68
|
258 /*--------------------------------------------------------------*/
|
jpayne@68
|
259 /*---------------- Ownership ----------------*/
|
jpayne@68
|
260 /*--------------------------------------------------------------*/
|
jpayne@68
|
261
|
jpayne@68
|
262 @Override
|
jpayne@68
|
263 public final void initializeOwnership(){
|
jpayne@68
|
264 assert(owners==null);
|
jpayne@68
|
265 owners=allocAtomicInt(array.length);
|
jpayne@68
|
266 for(int i=0; i<array.length; i++){
|
jpayne@68
|
267 owners.set(i, NO_OWNER);
|
jpayne@68
|
268 }
|
jpayne@68
|
269 victims.initializeOwnership();
|
jpayne@68
|
270 }
|
jpayne@68
|
271
|
jpayne@68
|
272 @Override
|
jpayne@68
|
273 public final void clearOwnership(){
|
jpayne@68
|
274 owners=null;
|
jpayne@68
|
275 victims.clearOwnership();
|
jpayne@68
|
276 }
|
jpayne@68
|
277
|
jpayne@68
|
278 @Override
|
jpayne@68
|
279 public final int setOwner(final long kmer, final int newOwner){
|
jpayne@68
|
280 final int cell=findKmer(kmer);
|
jpayne@68
|
281 assert(cell!=NOT_PRESENT);
|
jpayne@68
|
282 if(cell==HASH_COLLISION){return victims.setOwner(kmer, newOwner);}
|
jpayne@68
|
283 return setOwner(kmer, newOwner, cell);
|
jpayne@68
|
284 }
|
jpayne@68
|
285
|
jpayne@68
|
286 public final int setOwner(final long kmer, final int newOwner, final int cell){
|
jpayne@68
|
287 assert(array[cell]==kmer);
|
jpayne@68
|
288 final int original=owners.get(cell);
|
jpayne@68
|
289 int current=original;
|
jpayne@68
|
290 while(current<newOwner){
|
jpayne@68
|
291 boolean success=owners.compareAndSet(cell, current, newOwner);
|
jpayne@68
|
292 if(!success){current=owners.get(cell);}
|
jpayne@68
|
293 else{current=newOwner;}
|
jpayne@68
|
294 }
|
jpayne@68
|
295 assert(current>=original) : "original="+original+", current="+current+", newOwner="+newOwner+", re-read="+owners.get(cell);
|
jpayne@68
|
296 return current;
|
jpayne@68
|
297 }
|
jpayne@68
|
298
|
jpayne@68
|
299 @Override
|
jpayne@68
|
300 public final boolean clearOwner(final long kmer, final int owner){
|
jpayne@68
|
301 final int cell=findKmer(kmer);
|
jpayne@68
|
302 assert(cell!=NOT_PRESENT);
|
jpayne@68
|
303 if(cell==HASH_COLLISION){return victims.clearOwner(kmer, owner);}
|
jpayne@68
|
304 return clearOwner(kmer, owner, cell);
|
jpayne@68
|
305 }
|
jpayne@68
|
306
|
jpayne@68
|
307 public final boolean clearOwner(final long kmer, final int owner, final int cell){
|
jpayne@68
|
308 assert(array[cell]==kmer);
|
jpayne@68
|
309 boolean success=owners.compareAndSet(cell, owner, NO_OWNER);
|
jpayne@68
|
310 return success;
|
jpayne@68
|
311 }
|
jpayne@68
|
312
|
jpayne@68
|
313 @Override
|
jpayne@68
|
314 public final int getOwner(final long kmer){
|
jpayne@68
|
315 final int cell=findKmer(kmer);
|
jpayne@68
|
316 assert(cell!=NOT_PRESENT);
|
jpayne@68
|
317 if(cell==HASH_COLLISION){return victims.getOwner(kmer);}
|
jpayne@68
|
318 return getCellOwner(cell);
|
jpayne@68
|
319 }
|
jpayne@68
|
320
|
jpayne@68
|
321 public final int getCellOwner(final int cell){
|
jpayne@68
|
322 return owners.get(cell);
|
jpayne@68
|
323 }
|
jpayne@68
|
324
|
jpayne@68
|
325 /*--------------------------------------------------------------*/
|
jpayne@68
|
326 /*---------------- Nonpublic Methods ----------------*/
|
jpayne@68
|
327 /*--------------------------------------------------------------*/
|
jpayne@68
|
328
|
jpayne@68
|
329 protected abstract void insertValue(final long kmer, final int v, final int cell);
|
jpayne@68
|
330
|
jpayne@68
|
331 // protected abstract void insertValue(final long kmer, final int[] vals, final int cell);
|
jpayne@68
|
332
|
jpayne@68
|
333 /** This is for IntList3 support with HashArrayHybridFast */
|
jpayne@68
|
334 protected abstract void insertValue(final long kmer, final int[] vals, final int cell, final int vlen);
|
jpayne@68
|
335
|
jpayne@68
|
336 protected abstract int readCellValue(int cell);
|
jpayne@68
|
337 protected abstract int[] readCellValues(int cell, int[] singleton);
|
jpayne@68
|
338
|
jpayne@68
|
339 @Override
|
jpayne@68
|
340 final Object get(long kmer){
|
jpayne@68
|
341 throw new RuntimeException("Unimplemented.");
|
jpayne@68
|
342 }
|
jpayne@68
|
343
|
jpayne@68
|
344 final int findKmer(long kmer){
|
jpayne@68
|
345 return findKmer(kmer, kmerToCell(kmer));
|
jpayne@68
|
346 }
|
jpayne@68
|
347
|
jpayne@68
|
348 final int findKmer(final long kmer, final int startCell){
|
jpayne@68
|
349 int cell=startCell;
|
jpayne@68
|
350 for(final int max=cell+extra; cell<max; cell++){
|
jpayne@68
|
351 final long n=array[cell];
|
jpayne@68
|
352 if(n==kmer){return cell;}
|
jpayne@68
|
353 else if(n==NOT_PRESENT){return NOT_PRESENT;}
|
jpayne@68
|
354 }
|
jpayne@68
|
355 return HASH_COLLISION;
|
jpayne@68
|
356 }
|
jpayne@68
|
357
|
jpayne@68
|
358 final int findKmerOrEmpty(long kmer){
|
jpayne@68
|
359 int cell=kmerToCell(kmer);
|
jpayne@68
|
360 for(final int max=cell+extra; cell<max; cell++){
|
jpayne@68
|
361 final long n=array[cell];
|
jpayne@68
|
362 if(n==kmer || n==NOT_PRESENT){return cell;}
|
jpayne@68
|
363 }
|
jpayne@68
|
364 return HASH_COLLISION;
|
jpayne@68
|
365 }
|
jpayne@68
|
366
|
jpayne@68
|
367 /*--------------------------------------------------------------*/
|
jpayne@68
|
368 /*---------------- Resizing and Rebalancing ----------------*/
|
jpayne@68
|
369 /*--------------------------------------------------------------*/
|
jpayne@68
|
370
|
jpayne@68
|
371 @Override
|
jpayne@68
|
372 final boolean canResize() {return true;}
|
jpayne@68
|
373
|
jpayne@68
|
374 @Override
|
jpayne@68
|
375 final public long size() {return size;}
|
jpayne@68
|
376
|
jpayne@68
|
377 @Override
|
jpayne@68
|
378 final public int arrayLength() {return array.length;}
|
jpayne@68
|
379
|
jpayne@68
|
380 @Override
|
jpayne@68
|
381 protected abstract void resize();
|
jpayne@68
|
382
|
jpayne@68
|
383 /*--------------------------------------------------------------*/
|
jpayne@68
|
384 /*---------------- Info Dumping ----------------*/
|
jpayne@68
|
385 /*--------------------------------------------------------------*/
|
jpayne@68
|
386
|
jpayne@68
|
387 @Override
|
jpayne@68
|
388 public final boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){
|
jpayne@68
|
389 if(twoD){
|
jpayne@68
|
390 final int[] singleton=new int[1];
|
jpayne@68
|
391 for(int i=0; i<array.length; i++){
|
jpayne@68
|
392 long kmer=array[i];
|
jpayne@68
|
393 if(kmer!=NOT_PRESENT){
|
jpayne@68
|
394 tsw.print(toText(kmer, readCellValues(i, singleton), k).append('\n'));
|
jpayne@68
|
395 }
|
jpayne@68
|
396 }
|
jpayne@68
|
397 }else{
|
jpayne@68
|
398 for(int i=0; i<array.length; i++){
|
jpayne@68
|
399 long kmer=array[i];
|
jpayne@68
|
400 if(kmer!=NOT_PRESENT && (mincount<2 || readCellValue(i)>=mincount)){
|
jpayne@68
|
401 tsw.print(toText(kmer, readCellValue(i), k).append('\n'));
|
jpayne@68
|
402 }
|
jpayne@68
|
403 }
|
jpayne@68
|
404 }
|
jpayne@68
|
405 if(victims!=null){
|
jpayne@68
|
406 victims.dumpKmersAsText(tsw, k, mincount, maxcount);
|
jpayne@68
|
407 }
|
jpayne@68
|
408 return true;
|
jpayne@68
|
409 }
|
jpayne@68
|
410
|
jpayne@68
|
411 @Override
|
jpayne@68
|
412 public final boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){
|
jpayne@68
|
413 if(twoD){
|
jpayne@68
|
414 final int[] singleton=new int[1];
|
jpayne@68
|
415 for(int i=0; i<array.length; i++){
|
jpayne@68
|
416 long kmer=array[i];
|
jpayne@68
|
417 if(kmer!=NOT_PRESENT){
|
jpayne@68
|
418 if(remaining!=null && remaining.decrementAndGet()<0){return true;}
|
jpayne@68
|
419 bsw.printlnKmer(kmer, readCellValues(i, singleton), k);
|
jpayne@68
|
420 }
|
jpayne@68
|
421 }
|
jpayne@68
|
422 }else{
|
jpayne@68
|
423 for(int i=0; i<array.length; i++){
|
jpayne@68
|
424 long kmer=array[i];
|
jpayne@68
|
425 if(kmer!=NOT_PRESENT && (mincount<2 || readCellValue(i)>=mincount)){
|
jpayne@68
|
426 if(remaining!=null && remaining.decrementAndGet()<0){return true;}
|
jpayne@68
|
427 bsw.printlnKmer(kmer, readCellValue(i), k);
|
jpayne@68
|
428 }
|
jpayne@68
|
429 }
|
jpayne@68
|
430 }
|
jpayne@68
|
431 if(victims!=null){
|
jpayne@68
|
432 victims.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining);
|
jpayne@68
|
433 }
|
jpayne@68
|
434 return true;
|
jpayne@68
|
435 }
|
jpayne@68
|
436
|
jpayne@68
|
437 @Override
|
jpayne@68
|
438 public final boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){
|
jpayne@68
|
439 if(twoD){
|
jpayne@68
|
440 final int[] singleton=new int[1];
|
jpayne@68
|
441 for(int i=0; i<array.length; i++){
|
jpayne@68
|
442 long kmer=array[i];
|
jpayne@68
|
443 if(kmer!=NOT_PRESENT){
|
jpayne@68
|
444 if(remaining!=null && remaining.decrementAndGet()<0){return true;}
|
jpayne@68
|
445 toBytes(kmer, readCellValues(i, singleton), k, bb);
|
jpayne@68
|
446 bb.nl();
|
jpayne@68
|
447 if(bb.length()>=16000){
|
jpayne@68
|
448 ByteBuilder bb2=new ByteBuilder(bb);
|
jpayne@68
|
449 synchronized(bsw){bsw.addJob(bb2);}
|
jpayne@68
|
450 bb.clear();
|
jpayne@68
|
451 }
|
jpayne@68
|
452 }
|
jpayne@68
|
453 }
|
jpayne@68
|
454 }else{
|
jpayne@68
|
455 for(int i=0; i<array.length; i++){
|
jpayne@68
|
456 long kmer=array[i];
|
jpayne@68
|
457 if(kmer!=NOT_PRESENT && (mincount<2 || readCellValue(i)>=mincount)){
|
jpayne@68
|
458 if(remaining!=null && remaining.decrementAndGet()<0){return true;}
|
jpayne@68
|
459 toBytes(kmer, readCellValue(i), k, bb);
|
jpayne@68
|
460 bb.nl();
|
jpayne@68
|
461 if(bb.length()>=16000){
|
jpayne@68
|
462 ByteBuilder bb2=new ByteBuilder(bb);
|
jpayne@68
|
463 synchronized(bsw){bsw.addJob(bb2);}
|
jpayne@68
|
464 bb.clear();
|
jpayne@68
|
465 }
|
jpayne@68
|
466 }
|
jpayne@68
|
467 }
|
jpayne@68
|
468 }
|
jpayne@68
|
469 if(victims!=null){
|
jpayne@68
|
470 victims.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining);
|
jpayne@68
|
471 }
|
jpayne@68
|
472 return true;
|
jpayne@68
|
473 }
|
jpayne@68
|
474
|
jpayne@68
|
475 @Override
|
jpayne@68
|
476 public final void fillHistogram(long[] ca, int max){
|
jpayne@68
|
477 for(int i=0; i<array.length; i++){
|
jpayne@68
|
478 long kmer=array[i];
|
jpayne@68
|
479 if(kmer!=NOT_PRESENT){
|
jpayne@68
|
480 int count=Tools.min(readCellValue(i), max);
|
jpayne@68
|
481 ca[count]++;
|
jpayne@68
|
482 }
|
jpayne@68
|
483 }
|
jpayne@68
|
484 if(victims!=null){
|
jpayne@68
|
485 victims.fillHistogram(ca, max);
|
jpayne@68
|
486 }
|
jpayne@68
|
487 }
|
jpayne@68
|
488
|
jpayne@68
|
489 @Override
|
jpayne@68
|
490 public void fillHistogram(SuperLongList sll){
|
jpayne@68
|
491 for(int i=0; i<array.length; i++){
|
jpayne@68
|
492 long kmer=array[i];
|
jpayne@68
|
493 if(kmer!=NOT_PRESENT){
|
jpayne@68
|
494 int count=readCellValue(i);
|
jpayne@68
|
495 sll.add(count);
|
jpayne@68
|
496 }
|
jpayne@68
|
497 }
|
jpayne@68
|
498 if(victims!=null){
|
jpayne@68
|
499 victims.fillHistogram(sll);
|
jpayne@68
|
500 }
|
jpayne@68
|
501 }
|
jpayne@68
|
502
|
jpayne@68
|
503 @Override
|
jpayne@68
|
504 public final void countGC(long[] gcCounts, int max){
|
jpayne@68
|
505 for(int i=0; i<array.length; i++){
|
jpayne@68
|
506 long kmer=array[i];
|
jpayne@68
|
507 if(kmer!=NOT_PRESENT){
|
jpayne@68
|
508 int count=readCellValue(i);
|
jpayne@68
|
509 int index=Tools.min(count, max);
|
jpayne@68
|
510 gcCounts[index]+=gc(kmer);
|
jpayne@68
|
511 }
|
jpayne@68
|
512 }
|
jpayne@68
|
513 if(victims!=null){
|
jpayne@68
|
514 victims.countGC(gcCounts, max);
|
jpayne@68
|
515 }
|
jpayne@68
|
516 }
|
jpayne@68
|
517
|
jpayne@68
|
518 public HashForest victims(){
|
jpayne@68
|
519 return victims;
|
jpayne@68
|
520 }
|
jpayne@68
|
521
|
jpayne@68
|
522 /*--------------------------------------------------------------*/
|
jpayne@68
|
523 /*---------------- Fields ----------------*/
|
jpayne@68
|
524 /*--------------------------------------------------------------*/
|
jpayne@68
|
525
|
jpayne@68
|
526 AtomicIntegerArray owners;
|
jpayne@68
|
527 long[] array;
|
jpayne@68
|
528 int prime;
|
jpayne@68
|
529 long size=0;
|
jpayne@68
|
530 long sizeLimit;
|
jpayne@68
|
531 final HashForest victims;
|
jpayne@68
|
532 final boolean autoResize;
|
jpayne@68
|
533 public final boolean twoD;
|
jpayne@68
|
534 private final Lock lock=new ReentrantLock();
|
jpayne@68
|
535 private final long coreMask;//for ways
|
jpayne@68
|
536 private final long coreMask2;//for cells
|
jpayne@68
|
537
|
jpayne@68
|
538 protected int nextScheduleSize(){
|
jpayne@68
|
539 if(schedulePos<schedule.length-1){schedulePos++;}
|
jpayne@68
|
540 return schedule[schedulePos];
|
jpayne@68
|
541 }
|
jpayne@68
|
542
|
jpayne@68
|
543 protected boolean atMaxSize(){
|
jpayne@68
|
544 return schedulePos>=schedule.length-1;
|
jpayne@68
|
545 }
|
jpayne@68
|
546
|
jpayne@68
|
547 protected final int[] schedule;
|
jpayne@68
|
548 private int schedulePos=0;
|
jpayne@68
|
549
|
jpayne@68
|
550 public long[] array(){return array;}
|
jpayne@68
|
551
|
jpayne@68
|
552 public AtomicIntegerArray owners() {return owners;}
|
jpayne@68
|
553 @Override
|
jpayne@68
|
554 final Lock getLock(){return lock;}
|
jpayne@68
|
555
|
jpayne@68
|
556 /*--------------------------------------------------------------*/
|
jpayne@68
|
557 /*---------------- Static Fields ----------------*/
|
jpayne@68
|
558 /*--------------------------------------------------------------*/
|
jpayne@68
|
559
|
jpayne@68
|
560 final static int victimRatio=16; //Initial divisor for victim cache size; it self-resizes.
|
jpayne@68
|
561 final static int extra=60; //Amazingly, increasing this gave increasing returns past 300. Old default was 21. Could allow higher maxLoadFactorFinal and smaller victim cache.
|
jpayne@68
|
562 final static int maxPrime=Primes.primeAtMost(Integer.MAX_VALUE-extra-20);
|
jpayne@68
|
563 final static float resizeMult=2f; //Resize by a minimum of this much; not needed for schedule
|
jpayne@68
|
564 final static float minLoadFactor=0.58f; //Resize by enough to get the load above this factor; not needed for schedule
|
jpayne@68
|
565 final static float maxLoadFactor=0.88f; //Reaching this load triggers resizing
|
jpayne@68
|
566 final static float maxLoadFactorFinal=0.95f; //Reaching this load triggers killing
|
jpayne@68
|
567 final static float minLoadMult=1/minLoadFactor;
|
jpayne@68
|
568 final static float maxLoadMult=1/maxLoadFactor;
|
jpayne@68
|
569
|
jpayne@68
|
570 }
|