jpayne@68
|
1 package kmer;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.concurrent.atomic.AtomicLong;
|
jpayne@68
|
5 import java.util.concurrent.locks.Lock;
|
jpayne@68
|
6 import java.util.concurrent.locks.ReentrantLock;
|
jpayne@68
|
7
|
jpayne@68
|
8 import fileIO.ByteStreamWriter;
|
jpayne@68
|
9 import fileIO.TextStreamWriter;
|
jpayne@68
|
10 import shared.Primes;
|
jpayne@68
|
11 import shared.Tools;
|
jpayne@68
|
12 import structures.ByteBuilder;
|
jpayne@68
|
13 import structures.SuperLongList;
|
jpayne@68
|
14
|
jpayne@68
|
15 /**
|
jpayne@68
|
16 * @author Brian Bushnell
|
jpayne@68
|
17 * @date Oct 23, 2013
|
jpayne@68
|
18 *
|
jpayne@68
|
19 */
|
jpayne@68
|
20 public final class KmerTable extends AbstractKmerTable {
|
jpayne@68
|
21
|
jpayne@68
|
22 /*--------------------------------------------------------------*/
|
jpayne@68
|
23 /*---------------- Initialization ----------------*/
|
jpayne@68
|
24 /*--------------------------------------------------------------*/
|
jpayne@68
|
25
|
jpayne@68
|
26 public KmerTable(int initialSize, boolean autoResize_){
|
jpayne@68
|
27 if(initialSize>1){
|
jpayne@68
|
28 initialSize=(int)Tools.min(maxPrime, Primes.primeAtLeast(initialSize));
|
jpayne@68
|
29 }else{
|
jpayne@68
|
30 initialSize=1;
|
jpayne@68
|
31 }
|
jpayne@68
|
32 prime=initialSize;
|
jpayne@68
|
33 sizeLimit=(long) (initialSize*resizeMult);
|
jpayne@68
|
34 array=new KmerLink[prime];
|
jpayne@68
|
35 autoResize=autoResize_;
|
jpayne@68
|
36 }
|
jpayne@68
|
37
|
jpayne@68
|
38 /*--------------------------------------------------------------*/
|
jpayne@68
|
39 /*---------------- Public Methods ----------------*/
|
jpayne@68
|
40 /*--------------------------------------------------------------*/
|
jpayne@68
|
41
|
jpayne@68
|
42 @Override
|
jpayne@68
|
43 public int increment(final long kmer, final int incr){
|
jpayne@68
|
44 final int cell=(int)(kmer%prime);
|
jpayne@68
|
45 KmerLink n=array[cell], prev=null;
|
jpayne@68
|
46 while(n!=null && n.pivot!=kmer){
|
jpayne@68
|
47 prev=n;
|
jpayne@68
|
48 n=n.next;
|
jpayne@68
|
49 }
|
jpayne@68
|
50 if(n==null){
|
jpayne@68
|
51 n=new KmerLink(kmer, incr);
|
jpayne@68
|
52 size++;
|
jpayne@68
|
53 if(prev==null){
|
jpayne@68
|
54 array[cell]=n;
|
jpayne@68
|
55 }else{
|
jpayne@68
|
56 prev.next=n;
|
jpayne@68
|
57 }
|
jpayne@68
|
58 if(autoResize && size>sizeLimit){resize();}
|
jpayne@68
|
59 }else{
|
jpayne@68
|
60 n.value+=incr;
|
jpayne@68
|
61 if(n.value<0){n.value=Integer.MAX_VALUE;}
|
jpayne@68
|
62 }
|
jpayne@68
|
63 return n.value;
|
jpayne@68
|
64 }
|
jpayne@68
|
65
|
jpayne@68
|
66 @Override
|
jpayne@68
|
67 public int incrementAndReturnNumCreated(final long kmer, final int incr){
|
jpayne@68
|
68 final int cell=(int)(kmer%prime);
|
jpayne@68
|
69 KmerLink n=array[cell], prev=null;
|
jpayne@68
|
70 while(n!=null && n.pivot!=kmer){
|
jpayne@68
|
71 prev=n;
|
jpayne@68
|
72 n=n.next;
|
jpayne@68
|
73 }
|
jpayne@68
|
74 if(n==null){
|
jpayne@68
|
75 n=new KmerLink(kmer, incr);
|
jpayne@68
|
76 size++;
|
jpayne@68
|
77 if(prev==null){
|
jpayne@68
|
78 array[cell]=n;
|
jpayne@68
|
79 }else{
|
jpayne@68
|
80 prev.next=n;
|
jpayne@68
|
81 }
|
jpayne@68
|
82 if(autoResize && size>sizeLimit){resize();}
|
jpayne@68
|
83 return 1;
|
jpayne@68
|
84 }else{
|
jpayne@68
|
85 n.value+=incr;
|
jpayne@68
|
86 if(n.value<0){n.value=Integer.MAX_VALUE;}
|
jpayne@68
|
87 return 0;
|
jpayne@68
|
88 }
|
jpayne@68
|
89 }
|
jpayne@68
|
90
|
jpayne@68
|
91 @Override
|
jpayne@68
|
92 public int set(long kmer, int value){
|
jpayne@68
|
93 int x=1, cell=(int)(kmer%prime);
|
jpayne@68
|
94 final KmerLink n=array[cell];
|
jpayne@68
|
95 if(n==null){
|
jpayne@68
|
96 array[cell]=new KmerLink(kmer, value);
|
jpayne@68
|
97 }else{
|
jpayne@68
|
98 x=n.set(kmer, value);
|
jpayne@68
|
99 }
|
jpayne@68
|
100 size+=x;
|
jpayne@68
|
101 if(autoResize && size>sizeLimit){resize();}
|
jpayne@68
|
102 return x;
|
jpayne@68
|
103 }
|
jpayne@68
|
104
|
jpayne@68
|
105 @Override
|
jpayne@68
|
106 public int set(long kmer, int[] vals, int vlen) {
|
jpayne@68
|
107 throw new RuntimeException("Unimplemented.");
|
jpayne@68
|
108 }
|
jpayne@68
|
109
|
jpayne@68
|
110 @Override
|
jpayne@68
|
111 public int setIfNotPresent(long kmer, int value){
|
jpayne@68
|
112 int x=1, cell=(int)(kmer%prime);
|
jpayne@68
|
113 final KmerLink n=array[cell];
|
jpayne@68
|
114 if(n==null){
|
jpayne@68
|
115 array[cell]=new KmerLink(kmer, value);
|
jpayne@68
|
116 }else{
|
jpayne@68
|
117 x=n.setIfNotPresent(kmer, value);
|
jpayne@68
|
118 }
|
jpayne@68
|
119 size+=x;
|
jpayne@68
|
120 if(autoResize && size>sizeLimit){resize();}
|
jpayne@68
|
121 return x;
|
jpayne@68
|
122 }
|
jpayne@68
|
123
|
jpayne@68
|
124 @Override
|
jpayne@68
|
125 public int getValue(long kmer){
|
jpayne@68
|
126 int cell=(int)(kmer%prime);
|
jpayne@68
|
127 KmerLink n=array[cell];
|
jpayne@68
|
128 while(n!=null && n.pivot!=kmer){n=n.next;}
|
jpayne@68
|
129 return n==null ? 0 : n.value;
|
jpayne@68
|
130 }
|
jpayne@68
|
131
|
jpayne@68
|
132 @Override
|
jpayne@68
|
133 public int[] getValues(long kmer, int[] singleton){
|
jpayne@68
|
134 assert(array.length==0);
|
jpayne@68
|
135 singleton[0]=getValue(kmer);
|
jpayne@68
|
136 return singleton;
|
jpayne@68
|
137 }
|
jpayne@68
|
138
|
jpayne@68
|
139 @Override
|
jpayne@68
|
140 public boolean contains(long kmer){
|
jpayne@68
|
141 KmerLink node=get(kmer);
|
jpayne@68
|
142 return node!=null;
|
jpayne@68
|
143 }
|
jpayne@68
|
144
|
jpayne@68
|
145 /*--------------------------------------------------------------*/
|
jpayne@68
|
146 /*---------------- Ownership ----------------*/
|
jpayne@68
|
147 /*--------------------------------------------------------------*/
|
jpayne@68
|
148
|
jpayne@68
|
149 @Override
|
jpayne@68
|
150 public final void initializeOwnership(){
|
jpayne@68
|
151 for(KmerLink n : array){
|
jpayne@68
|
152 if(n!=null){n.initializeOwnership();}
|
jpayne@68
|
153 }
|
jpayne@68
|
154 }
|
jpayne@68
|
155
|
jpayne@68
|
156 @Override
|
jpayne@68
|
157 public final void clearOwnership(){
|
jpayne@68
|
158 for(KmerLink n : array){
|
jpayne@68
|
159 if(n!=null){n.clearOwnership();}
|
jpayne@68
|
160 }
|
jpayne@68
|
161 }
|
jpayne@68
|
162
|
jpayne@68
|
163 @Override
|
jpayne@68
|
164 public final int setOwner(final long kmer, final int newOwner){
|
jpayne@68
|
165 final int cell=(int)(kmer%prime);
|
jpayne@68
|
166 KmerLink n=array[cell];
|
jpayne@68
|
167 assert(n!=null);
|
jpayne@68
|
168 return n.setOwner(kmer, newOwner);
|
jpayne@68
|
169 }
|
jpayne@68
|
170
|
jpayne@68
|
171 @Override
|
jpayne@68
|
172 public final boolean clearOwner(final long kmer, final int owner){
|
jpayne@68
|
173 final int cell=(int)(kmer%prime);
|
jpayne@68
|
174 KmerLink n=array[cell];
|
jpayne@68
|
175 assert(n!=null);
|
jpayne@68
|
176 return n.clearOwner(kmer, owner);
|
jpayne@68
|
177 }
|
jpayne@68
|
178
|
jpayne@68
|
179 @Override
|
jpayne@68
|
180 public final int getOwner(final long kmer){
|
jpayne@68
|
181 final int cell=(int)(kmer%prime);
|
jpayne@68
|
182 KmerLink n=array[cell];
|
jpayne@68
|
183 assert(n!=null);
|
jpayne@68
|
184 return n.getOwner(kmer);
|
jpayne@68
|
185 }
|
jpayne@68
|
186
|
jpayne@68
|
187 /*--------------------------------------------------------------*/
|
jpayne@68
|
188 /*---------------- Nonpublic Methods ----------------*/
|
jpayne@68
|
189 /*--------------------------------------------------------------*/
|
jpayne@68
|
190
|
jpayne@68
|
191 @Override
|
jpayne@68
|
192 KmerLink get(long kmer){
|
jpayne@68
|
193 int cell=(int)(kmer%prime);
|
jpayne@68
|
194 KmerLink n=array[cell];
|
jpayne@68
|
195 while(n!=null && n.pivot!=kmer){n=n.next;}
|
jpayne@68
|
196 return n;
|
jpayne@68
|
197 }
|
jpayne@68
|
198
|
jpayne@68
|
199 boolean insert(KmerLink n){
|
jpayne@68
|
200 n.next=null;
|
jpayne@68
|
201 int cell=(int)(n.pivot%prime);
|
jpayne@68
|
202 if(array[cell]==null){
|
jpayne@68
|
203 array[cell]=n;
|
jpayne@68
|
204 return true;
|
jpayne@68
|
205 }
|
jpayne@68
|
206 return array[cell].insert(n);
|
jpayne@68
|
207 }
|
jpayne@68
|
208
|
jpayne@68
|
209 /*--------------------------------------------------------------*/
|
jpayne@68
|
210 /*---------------- Private Methods ----------------*/
|
jpayne@68
|
211 /*--------------------------------------------------------------*/
|
jpayne@68
|
212
|
jpayne@68
|
213 /*--------------------------------------------------------------*/
|
jpayne@68
|
214 /*---------------- Invalid Methods ----------------*/
|
jpayne@68
|
215 /*--------------------------------------------------------------*/
|
jpayne@68
|
216
|
jpayne@68
|
217 /*--------------------------------------------------------------*/
|
jpayne@68
|
218 /*---------------- Resizing and Rebalancing ----------------*/
|
jpayne@68
|
219 /*--------------------------------------------------------------*/
|
jpayne@68
|
220
|
jpayne@68
|
221 @Override
|
jpayne@68
|
222 boolean canResize() {return true;}
|
jpayne@68
|
223
|
jpayne@68
|
224 @Override
|
jpayne@68
|
225 public boolean canRebalance() {return false;}
|
jpayne@68
|
226
|
jpayne@68
|
227 @Override
|
jpayne@68
|
228 public long size() {return size;}
|
jpayne@68
|
229
|
jpayne@68
|
230 @Override
|
jpayne@68
|
231 public int arrayLength() {return array.length;}
|
jpayne@68
|
232
|
jpayne@68
|
233 @Override
|
jpayne@68
|
234 synchronized void resize(){
|
jpayne@68
|
235 // assert(false);
|
jpayne@68
|
236 // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
|
jpayne@68
|
237 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
|
jpayne@68
|
238
|
jpayne@68
|
239 final long maxAllowedByLoadFactor=(long)(size*minLoadMult);
|
jpayne@68
|
240 final long minAllowedByLoadFactor=(long)(size*maxLoadMult);
|
jpayne@68
|
241 assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
|
jpayne@68
|
242 if(maxAllowedByLoadFactor<prime){return;}
|
jpayne@68
|
243
|
jpayne@68
|
244 long x=10+(long)(prime*resizeMult);
|
jpayne@68
|
245 x=Tools.max(x, minAllowedByLoadFactor);
|
jpayne@68
|
246 x=Tools.min(x, maxAllowedByLoadFactor);
|
jpayne@68
|
247
|
jpayne@68
|
248 int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
|
jpayne@68
|
249
|
jpayne@68
|
250 if(prime2<=prime){return;}
|
jpayne@68
|
251
|
jpayne@68
|
252 prime=prime2;
|
jpayne@68
|
253 // System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
|
jpayne@68
|
254 KmerLink[] old=array;
|
jpayne@68
|
255 array=new KmerLink[prime2];
|
jpayne@68
|
256 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
|
jpayne@68
|
257 for(int i=0; i<old.length; i++){
|
jpayne@68
|
258 if(old[i]!=null){
|
jpayne@68
|
259 old[i].traverseInfix(list);
|
jpayne@68
|
260 for(KmerLink n : list){insert(n);}
|
jpayne@68
|
261 list.clear();
|
jpayne@68
|
262 }
|
jpayne@68
|
263 }
|
jpayne@68
|
264 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
|
jpayne@68
|
265 }
|
jpayne@68
|
266
|
jpayne@68
|
267 @Override
|
jpayne@68
|
268 public void rebalance(){
|
jpayne@68
|
269 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
|
jpayne@68
|
270 for(int i=0; i<array.length; i++){
|
jpayne@68
|
271 if(array[i]!=null){array[i]=array[i].rebalance(list);}
|
jpayne@68
|
272 }
|
jpayne@68
|
273 }
|
jpayne@68
|
274
|
jpayne@68
|
275 /*--------------------------------------------------------------*/
|
jpayne@68
|
276 /*---------------- Info Dumping ----------------*/
|
jpayne@68
|
277 /*--------------------------------------------------------------*/
|
jpayne@68
|
278
|
jpayne@68
|
279 @Deprecated
|
jpayne@68
|
280 @Override
|
jpayne@68
|
281 public boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){
|
jpayne@68
|
282 throw new RuntimeException("TODO");
|
jpayne@68
|
283 }
|
jpayne@68
|
284
|
jpayne@68
|
285 @Override
|
jpayne@68
|
286 public boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){
|
jpayne@68
|
287 for(int i=0; i<array.length; i++){
|
jpayne@68
|
288 KmerLink node=array[i];
|
jpayne@68
|
289 if(node!=null && node.value>=mincount){
|
jpayne@68
|
290 node.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining);
|
jpayne@68
|
291 }
|
jpayne@68
|
292 }
|
jpayne@68
|
293 return true;
|
jpayne@68
|
294 }
|
jpayne@68
|
295
|
jpayne@68
|
296 @Deprecated
|
jpayne@68
|
297 @Override
|
jpayne@68
|
298 public boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){
|
jpayne@68
|
299 throw new RuntimeException("TODO");
|
jpayne@68
|
300 }
|
jpayne@68
|
301
|
jpayne@68
|
302 @Deprecated
|
jpayne@68
|
303 @Override
|
jpayne@68
|
304 public void fillHistogram(long[] ca, int max){
|
jpayne@68
|
305 throw new RuntimeException("TODO");
|
jpayne@68
|
306 }
|
jpayne@68
|
307
|
jpayne@68
|
308 @Deprecated
|
jpayne@68
|
309 @Override
|
jpayne@68
|
310 public void fillHistogram(SuperLongList sll){
|
jpayne@68
|
311 throw new RuntimeException("TODO");
|
jpayne@68
|
312 }
|
jpayne@68
|
313
|
jpayne@68
|
314 @Deprecated
|
jpayne@68
|
315 @Override
|
jpayne@68
|
316 public void countGC(long[] gcCounts, int max){
|
jpayne@68
|
317 throw new RuntimeException("TODO");
|
jpayne@68
|
318 }
|
jpayne@68
|
319
|
jpayne@68
|
320 @Deprecated
|
jpayne@68
|
321 @Override
|
jpayne@68
|
322 public long regenerate(final int limit){
|
jpayne@68
|
323 throw new RuntimeException("TODO - remove zero-value links.");
|
jpayne@68
|
324 }
|
jpayne@68
|
325
|
jpayne@68
|
326 /*--------------------------------------------------------------*/
|
jpayne@68
|
327 /*---------------- Fields ----------------*/
|
jpayne@68
|
328 /*--------------------------------------------------------------*/
|
jpayne@68
|
329
|
jpayne@68
|
330 KmerLink[] array;
|
jpayne@68
|
331 int prime;
|
jpayne@68
|
332 long size=0;
|
jpayne@68
|
333 long sizeLimit;
|
jpayne@68
|
334 final boolean autoResize;
|
jpayne@68
|
335 private final Lock lock=new ReentrantLock();
|
jpayne@68
|
336
|
jpayne@68
|
337 @Override
|
jpayne@68
|
338 final Lock getLock(){return lock;}
|
jpayne@68
|
339
|
jpayne@68
|
340 /*--------------------------------------------------------------*/
|
jpayne@68
|
341 /*---------------- Static Fields ----------------*/
|
jpayne@68
|
342 /*--------------------------------------------------------------*/
|
jpayne@68
|
343
|
jpayne@68
|
344 final static int maxPrime=(int)Primes.primeAtMost(Integer.MAX_VALUE);
|
jpayne@68
|
345 final static float resizeMult=2f; //Resize by a minimum of this much
|
jpayne@68
|
346 final static float minLoadFactor=0.5f; //Resize by enough to get the load above this factor
|
jpayne@68
|
347 final static float maxLoadFactor=0.98f; //Resize by enough to get the load under this factor
|
jpayne@68
|
348 final static float minLoadMult=1/minLoadFactor;
|
jpayne@68
|
349 final static float maxLoadMult=1/maxLoadFactor;
|
jpayne@68
|
350
|
jpayne@68
|
351 }
|