Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTable.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package kmer; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.concurrent.atomic.AtomicLong; | |
5 import java.util.concurrent.locks.Lock; | |
6 import java.util.concurrent.locks.ReentrantLock; | |
7 | |
8 import fileIO.ByteStreamWriter; | |
9 import fileIO.TextStreamWriter; | |
10 import shared.Primes; | |
11 import shared.Tools; | |
12 import structures.ByteBuilder; | |
13 import structures.SuperLongList; | |
14 | |
15 /** | |
16 * @author Brian Bushnell | |
17 * @date Oct 23, 2013 | |
18 * | |
19 */ | |
20 public final class KmerTable extends AbstractKmerTable { | |
21 | |
22 /*--------------------------------------------------------------*/ | |
23 /*---------------- Initialization ----------------*/ | |
24 /*--------------------------------------------------------------*/ | |
25 | |
26 public KmerTable(int initialSize, boolean autoResize_){ | |
27 if(initialSize>1){ | |
28 initialSize=(int)Tools.min(maxPrime, Primes.primeAtLeast(initialSize)); | |
29 }else{ | |
30 initialSize=1; | |
31 } | |
32 prime=initialSize; | |
33 sizeLimit=(long) (initialSize*resizeMult); | |
34 array=new KmerLink[prime]; | |
35 autoResize=autoResize_; | |
36 } | |
37 | |
38 /*--------------------------------------------------------------*/ | |
39 /*---------------- Public Methods ----------------*/ | |
40 /*--------------------------------------------------------------*/ | |
41 | |
42 @Override | |
43 public int increment(final long kmer, final int incr){ | |
44 final int cell=(int)(kmer%prime); | |
45 KmerLink n=array[cell], prev=null; | |
46 while(n!=null && n.pivot!=kmer){ | |
47 prev=n; | |
48 n=n.next; | |
49 } | |
50 if(n==null){ | |
51 n=new KmerLink(kmer, incr); | |
52 size++; | |
53 if(prev==null){ | |
54 array[cell]=n; | |
55 }else{ | |
56 prev.next=n; | |
57 } | |
58 if(autoResize && size>sizeLimit){resize();} | |
59 }else{ | |
60 n.value+=incr; | |
61 if(n.value<0){n.value=Integer.MAX_VALUE;} | |
62 } | |
63 return n.value; | |
64 } | |
65 | |
66 @Override | |
67 public int incrementAndReturnNumCreated(final long kmer, final int incr){ | |
68 final int cell=(int)(kmer%prime); | |
69 KmerLink n=array[cell], prev=null; | |
70 while(n!=null && n.pivot!=kmer){ | |
71 prev=n; | |
72 n=n.next; | |
73 } | |
74 if(n==null){ | |
75 n=new KmerLink(kmer, incr); | |
76 size++; | |
77 if(prev==null){ | |
78 array[cell]=n; | |
79 }else{ | |
80 prev.next=n; | |
81 } | |
82 if(autoResize && size>sizeLimit){resize();} | |
83 return 1; | |
84 }else{ | |
85 n.value+=incr; | |
86 if(n.value<0){n.value=Integer.MAX_VALUE;} | |
87 return 0; | |
88 } | |
89 } | |
90 | |
91 @Override | |
92 public int set(long kmer, int value){ | |
93 int x=1, cell=(int)(kmer%prime); | |
94 final KmerLink n=array[cell]; | |
95 if(n==null){ | |
96 array[cell]=new KmerLink(kmer, value); | |
97 }else{ | |
98 x=n.set(kmer, value); | |
99 } | |
100 size+=x; | |
101 if(autoResize && size>sizeLimit){resize();} | |
102 return x; | |
103 } | |
104 | |
105 @Override | |
106 public int set(long kmer, int[] vals, int vlen) { | |
107 throw new RuntimeException("Unimplemented."); | |
108 } | |
109 | |
110 @Override | |
111 public int setIfNotPresent(long kmer, int value){ | |
112 int x=1, cell=(int)(kmer%prime); | |
113 final KmerLink n=array[cell]; | |
114 if(n==null){ | |
115 array[cell]=new KmerLink(kmer, value); | |
116 }else{ | |
117 x=n.setIfNotPresent(kmer, value); | |
118 } | |
119 size+=x; | |
120 if(autoResize && size>sizeLimit){resize();} | |
121 return x; | |
122 } | |
123 | |
124 @Override | |
125 public int getValue(long kmer){ | |
126 int cell=(int)(kmer%prime); | |
127 KmerLink n=array[cell]; | |
128 while(n!=null && n.pivot!=kmer){n=n.next;} | |
129 return n==null ? 0 : n.value; | |
130 } | |
131 | |
132 @Override | |
133 public int[] getValues(long kmer, int[] singleton){ | |
134 assert(array.length==0); | |
135 singleton[0]=getValue(kmer); | |
136 return singleton; | |
137 } | |
138 | |
139 @Override | |
140 public boolean contains(long kmer){ | |
141 KmerLink node=get(kmer); | |
142 return node!=null; | |
143 } | |
144 | |
145 /*--------------------------------------------------------------*/ | |
146 /*---------------- Ownership ----------------*/ | |
147 /*--------------------------------------------------------------*/ | |
148 | |
149 @Override | |
150 public final void initializeOwnership(){ | |
151 for(KmerLink n : array){ | |
152 if(n!=null){n.initializeOwnership();} | |
153 } | |
154 } | |
155 | |
156 @Override | |
157 public final void clearOwnership(){ | |
158 for(KmerLink n : array){ | |
159 if(n!=null){n.clearOwnership();} | |
160 } | |
161 } | |
162 | |
163 @Override | |
164 public final int setOwner(final long kmer, final int newOwner){ | |
165 final int cell=(int)(kmer%prime); | |
166 KmerLink n=array[cell]; | |
167 assert(n!=null); | |
168 return n.setOwner(kmer, newOwner); | |
169 } | |
170 | |
171 @Override | |
172 public final boolean clearOwner(final long kmer, final int owner){ | |
173 final int cell=(int)(kmer%prime); | |
174 KmerLink n=array[cell]; | |
175 assert(n!=null); | |
176 return n.clearOwner(kmer, owner); | |
177 } | |
178 | |
179 @Override | |
180 public final int getOwner(final long kmer){ | |
181 final int cell=(int)(kmer%prime); | |
182 KmerLink n=array[cell]; | |
183 assert(n!=null); | |
184 return n.getOwner(kmer); | |
185 } | |
186 | |
187 /*--------------------------------------------------------------*/ | |
188 /*---------------- Nonpublic Methods ----------------*/ | |
189 /*--------------------------------------------------------------*/ | |
190 | |
191 @Override | |
192 KmerLink get(long kmer){ | |
193 int cell=(int)(kmer%prime); | |
194 KmerLink n=array[cell]; | |
195 while(n!=null && n.pivot!=kmer){n=n.next;} | |
196 return n; | |
197 } | |
198 | |
199 boolean insert(KmerLink n){ | |
200 n.next=null; | |
201 int cell=(int)(n.pivot%prime); | |
202 if(array[cell]==null){ | |
203 array[cell]=n; | |
204 return true; | |
205 } | |
206 return array[cell].insert(n); | |
207 } | |
208 | |
209 /*--------------------------------------------------------------*/ | |
210 /*---------------- Private Methods ----------------*/ | |
211 /*--------------------------------------------------------------*/ | |
212 | |
213 /*--------------------------------------------------------------*/ | |
214 /*---------------- Invalid Methods ----------------*/ | |
215 /*--------------------------------------------------------------*/ | |
216 | |
217 /*--------------------------------------------------------------*/ | |
218 /*---------------- Resizing and Rebalancing ----------------*/ | |
219 /*--------------------------------------------------------------*/ | |
220 | |
221 @Override | |
222 boolean canResize() {return true;} | |
223 | |
224 @Override | |
225 public boolean canRebalance() {return false;} | |
226 | |
227 @Override | |
228 public long size() {return size;} | |
229 | |
230 @Override | |
231 public int arrayLength() {return array.length;} | |
232 | |
233 @Override | |
234 synchronized void resize(){ | |
235 // assert(false); | |
236 // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime)); | |
237 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime)); | |
238 | |
239 final long maxAllowedByLoadFactor=(long)(size*minLoadMult); | |
240 final long minAllowedByLoadFactor=(long)(size*maxLoadMult); | |
241 assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor); | |
242 if(maxAllowedByLoadFactor<prime){return;} | |
243 | |
244 long x=10+(long)(prime*resizeMult); | |
245 x=Tools.max(x, minAllowedByLoadFactor); | |
246 x=Tools.min(x, maxAllowedByLoadFactor); | |
247 | |
248 int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x)); | |
249 | |
250 if(prime2<=prime){return;} | |
251 | |
252 prime=prime2; | |
253 // System.err.println("Resized to "+prime+"; load="+(size*1f/prime)); | |
254 KmerLink[] old=array; | |
255 array=new KmerLink[prime2]; | |
256 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000); | |
257 for(int i=0; i<old.length; i++){ | |
258 if(old[i]!=null){ | |
259 old[i].traverseInfix(list); | |
260 for(KmerLink n : list){insert(n);} | |
261 list.clear(); | |
262 } | |
263 } | |
264 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime)); | |
265 } | |
266 | |
267 @Override | |
268 public void rebalance(){ | |
269 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000); | |
270 for(int i=0; i<array.length; i++){ | |
271 if(array[i]!=null){array[i]=array[i].rebalance(list);} | |
272 } | |
273 } | |
274 | |
275 /*--------------------------------------------------------------*/ | |
276 /*---------------- Info Dumping ----------------*/ | |
277 /*--------------------------------------------------------------*/ | |
278 | |
279 @Deprecated | |
280 @Override | |
281 public boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){ | |
282 throw new RuntimeException("TODO"); | |
283 } | |
284 | |
285 @Override | |
286 public boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){ | |
287 for(int i=0; i<array.length; i++){ | |
288 KmerLink node=array[i]; | |
289 if(node!=null && node.value>=mincount){ | |
290 node.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining); | |
291 } | |
292 } | |
293 return true; | |
294 } | |
295 | |
296 @Deprecated | |
297 @Override | |
298 public boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){ | |
299 throw new RuntimeException("TODO"); | |
300 } | |
301 | |
302 @Deprecated | |
303 @Override | |
304 public void fillHistogram(long[] ca, int max){ | |
305 throw new RuntimeException("TODO"); | |
306 } | |
307 | |
308 @Deprecated | |
309 @Override | |
310 public void fillHistogram(SuperLongList sll){ | |
311 throw new RuntimeException("TODO"); | |
312 } | |
313 | |
314 @Deprecated | |
315 @Override | |
316 public void countGC(long[] gcCounts, int max){ | |
317 throw new RuntimeException("TODO"); | |
318 } | |
319 | |
320 @Deprecated | |
321 @Override | |
322 public long regenerate(final int limit){ | |
323 throw new RuntimeException("TODO - remove zero-value links."); | |
324 } | |
325 | |
326 /*--------------------------------------------------------------*/ | |
327 /*---------------- Fields ----------------*/ | |
328 /*--------------------------------------------------------------*/ | |
329 | |
330 KmerLink[] array; | |
331 int prime; | |
332 long size=0; | |
333 long sizeLimit; | |
334 final boolean autoResize; | |
335 private final Lock lock=new ReentrantLock(); | |
336 | |
337 @Override | |
338 final Lock getLock(){return lock;} | |
339 | |
340 /*--------------------------------------------------------------*/ | |
341 /*---------------- Static Fields ----------------*/ | |
342 /*--------------------------------------------------------------*/ | |
343 | |
344 final static int maxPrime=(int)Primes.primeAtMost(Integer.MAX_VALUE); | |
345 final static float resizeMult=2f; //Resize by a minimum of this much | |
346 final static float minLoadFactor=0.5f; //Resize by enough to get the load above this factor | |
347 final static float maxLoadFactor=0.98f; //Resize by enough to get the load under this factor | |
348 final static float minLoadMult=1/minLoadFactor; | |
349 final static float maxLoadMult=1/maxLoadFactor; | |
350 | |
351 } |