comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTable.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package kmer;
2
3 import java.util.ArrayList;
4 import java.util.concurrent.atomic.AtomicLong;
5 import java.util.concurrent.locks.Lock;
6 import java.util.concurrent.locks.ReentrantLock;
7
8 import fileIO.ByteStreamWriter;
9 import fileIO.TextStreamWriter;
10 import shared.Primes;
11 import shared.Tools;
12 import structures.ByteBuilder;
13 import structures.SuperLongList;
14
15 /**
16 * @author Brian Bushnell
17 * @date Oct 23, 2013
18 *
19 */
20 public final class KmerTable extends AbstractKmerTable {
21
22 /*--------------------------------------------------------------*/
23 /*---------------- Initialization ----------------*/
24 /*--------------------------------------------------------------*/
25
26 public KmerTable(int initialSize, boolean autoResize_){
27 if(initialSize>1){
28 initialSize=(int)Tools.min(maxPrime, Primes.primeAtLeast(initialSize));
29 }else{
30 initialSize=1;
31 }
32 prime=initialSize;
33 sizeLimit=(long) (initialSize*resizeMult);
34 array=new KmerLink[prime];
35 autoResize=autoResize_;
36 }
37
38 /*--------------------------------------------------------------*/
39 /*---------------- Public Methods ----------------*/
40 /*--------------------------------------------------------------*/
41
42 @Override
43 public int increment(final long kmer, final int incr){
44 final int cell=(int)(kmer%prime);
45 KmerLink n=array[cell], prev=null;
46 while(n!=null && n.pivot!=kmer){
47 prev=n;
48 n=n.next;
49 }
50 if(n==null){
51 n=new KmerLink(kmer, incr);
52 size++;
53 if(prev==null){
54 array[cell]=n;
55 }else{
56 prev.next=n;
57 }
58 if(autoResize && size>sizeLimit){resize();}
59 }else{
60 n.value+=incr;
61 if(n.value<0){n.value=Integer.MAX_VALUE;}
62 }
63 return n.value;
64 }
65
66 @Override
67 public int incrementAndReturnNumCreated(final long kmer, final int incr){
68 final int cell=(int)(kmer%prime);
69 KmerLink n=array[cell], prev=null;
70 while(n!=null && n.pivot!=kmer){
71 prev=n;
72 n=n.next;
73 }
74 if(n==null){
75 n=new KmerLink(kmer, incr);
76 size++;
77 if(prev==null){
78 array[cell]=n;
79 }else{
80 prev.next=n;
81 }
82 if(autoResize && size>sizeLimit){resize();}
83 return 1;
84 }else{
85 n.value+=incr;
86 if(n.value<0){n.value=Integer.MAX_VALUE;}
87 return 0;
88 }
89 }
90
91 @Override
92 public int set(long kmer, int value){
93 int x=1, cell=(int)(kmer%prime);
94 final KmerLink n=array[cell];
95 if(n==null){
96 array[cell]=new KmerLink(kmer, value);
97 }else{
98 x=n.set(kmer, value);
99 }
100 size+=x;
101 if(autoResize && size>sizeLimit){resize();}
102 return x;
103 }
104
105 @Override
106 public int set(long kmer, int[] vals, int vlen) {
107 throw new RuntimeException("Unimplemented.");
108 }
109
110 @Override
111 public int setIfNotPresent(long kmer, int value){
112 int x=1, cell=(int)(kmer%prime);
113 final KmerLink n=array[cell];
114 if(n==null){
115 array[cell]=new KmerLink(kmer, value);
116 }else{
117 x=n.setIfNotPresent(kmer, value);
118 }
119 size+=x;
120 if(autoResize && size>sizeLimit){resize();}
121 return x;
122 }
123
124 @Override
125 public int getValue(long kmer){
126 int cell=(int)(kmer%prime);
127 KmerLink n=array[cell];
128 while(n!=null && n.pivot!=kmer){n=n.next;}
129 return n==null ? 0 : n.value;
130 }
131
132 @Override
133 public int[] getValues(long kmer, int[] singleton){
134 assert(array.length==0);
135 singleton[0]=getValue(kmer);
136 return singleton;
137 }
138
139 @Override
140 public boolean contains(long kmer){
141 KmerLink node=get(kmer);
142 return node!=null;
143 }
144
145 /*--------------------------------------------------------------*/
146 /*---------------- Ownership ----------------*/
147 /*--------------------------------------------------------------*/
148
149 @Override
150 public final void initializeOwnership(){
151 for(KmerLink n : array){
152 if(n!=null){n.initializeOwnership();}
153 }
154 }
155
156 @Override
157 public final void clearOwnership(){
158 for(KmerLink n : array){
159 if(n!=null){n.clearOwnership();}
160 }
161 }
162
163 @Override
164 public final int setOwner(final long kmer, final int newOwner){
165 final int cell=(int)(kmer%prime);
166 KmerLink n=array[cell];
167 assert(n!=null);
168 return n.setOwner(kmer, newOwner);
169 }
170
171 @Override
172 public final boolean clearOwner(final long kmer, final int owner){
173 final int cell=(int)(kmer%prime);
174 KmerLink n=array[cell];
175 assert(n!=null);
176 return n.clearOwner(kmer, owner);
177 }
178
179 @Override
180 public final int getOwner(final long kmer){
181 final int cell=(int)(kmer%prime);
182 KmerLink n=array[cell];
183 assert(n!=null);
184 return n.getOwner(kmer);
185 }
186
187 /*--------------------------------------------------------------*/
188 /*---------------- Nonpublic Methods ----------------*/
189 /*--------------------------------------------------------------*/
190
191 @Override
192 KmerLink get(long kmer){
193 int cell=(int)(kmer%prime);
194 KmerLink n=array[cell];
195 while(n!=null && n.pivot!=kmer){n=n.next;}
196 return n;
197 }
198
199 boolean insert(KmerLink n){
200 n.next=null;
201 int cell=(int)(n.pivot%prime);
202 if(array[cell]==null){
203 array[cell]=n;
204 return true;
205 }
206 return array[cell].insert(n);
207 }
208
209 /*--------------------------------------------------------------*/
210 /*---------------- Private Methods ----------------*/
211 /*--------------------------------------------------------------*/
212
213 /*--------------------------------------------------------------*/
214 /*---------------- Invalid Methods ----------------*/
215 /*--------------------------------------------------------------*/
216
217 /*--------------------------------------------------------------*/
218 /*---------------- Resizing and Rebalancing ----------------*/
219 /*--------------------------------------------------------------*/
220
221 @Override
222 boolean canResize() {return true;}
223
224 @Override
225 public boolean canRebalance() {return false;}
226
227 @Override
228 public long size() {return size;}
229
230 @Override
231 public int arrayLength() {return array.length;}
232
233 @Override
234 synchronized void resize(){
235 // assert(false);
236 // System.err.println("Resizing from "+prime+"; load="+(size*1f/prime));
237 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
238
239 final long maxAllowedByLoadFactor=(long)(size*minLoadMult);
240 final long minAllowedByLoadFactor=(long)(size*maxLoadMult);
241 assert(maxAllowedByLoadFactor>=minAllowedByLoadFactor);
242 if(maxAllowedByLoadFactor<prime){return;}
243
244 long x=10+(long)(prime*resizeMult);
245 x=Tools.max(x, minAllowedByLoadFactor);
246 x=Tools.min(x, maxAllowedByLoadFactor);
247
248 int prime2=(int)Tools.min(maxPrime, Primes.primeAtLeast(x));
249
250 if(prime2<=prime){return;}
251
252 prime=prime2;
253 // System.err.println("Resized to "+prime+"; load="+(size*1f/prime));
254 KmerLink[] old=array;
255 array=new KmerLink[prime2];
256 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
257 for(int i=0; i<old.length; i++){
258 if(old[i]!=null){
259 old[i].traverseInfix(list);
260 for(KmerLink n : list){insert(n);}
261 list.clear();
262 }
263 }
264 sizeLimit=Tools.max((long)(size*1.4), (long)(maxLoadFactor*prime));
265 }
266
267 @Override
268 public void rebalance(){
269 ArrayList<KmerLink> list=new ArrayList<KmerLink>(1000);
270 for(int i=0; i<array.length; i++){
271 if(array[i]!=null){array[i]=array[i].rebalance(list);}
272 }
273 }
274
275 /*--------------------------------------------------------------*/
276 /*---------------- Info Dumping ----------------*/
277 /*--------------------------------------------------------------*/
278
279 @Deprecated
280 @Override
281 public boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount){
282 throw new RuntimeException("TODO");
283 }
284
285 @Override
286 public boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining){
287 for(int i=0; i<array.length; i++){
288 KmerLink node=array[i];
289 if(node!=null && node.value>=mincount){
290 node.dumpKmersAsBytes_MT(bsw, bb, k, mincount, maxcount, remaining);
291 }
292 }
293 return true;
294 }
295
296 @Deprecated
297 @Override
298 public boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining){
299 throw new RuntimeException("TODO");
300 }
301
302 @Deprecated
303 @Override
304 public void fillHistogram(long[] ca, int max){
305 throw new RuntimeException("TODO");
306 }
307
308 @Deprecated
309 @Override
310 public void fillHistogram(SuperLongList sll){
311 throw new RuntimeException("TODO");
312 }
313
314 @Deprecated
315 @Override
316 public void countGC(long[] gcCounts, int max){
317 throw new RuntimeException("TODO");
318 }
319
320 @Deprecated
321 @Override
322 public long regenerate(final int limit){
323 throw new RuntimeException("TODO - remove zero-value links.");
324 }
325
326 /*--------------------------------------------------------------*/
327 /*---------------- Fields ----------------*/
328 /*--------------------------------------------------------------*/
329
330 KmerLink[] array;
331 int prime;
332 long size=0;
333 long sizeLimit;
334 final boolean autoResize;
335 private final Lock lock=new ReentrantLock();
336
337 @Override
338 final Lock getLock(){return lock;}
339
340 /*--------------------------------------------------------------*/
341 /*---------------- Static Fields ----------------*/
342 /*--------------------------------------------------------------*/
343
344 final static int maxPrime=(int)Primes.primeAtMost(Integer.MAX_VALUE);
345 final static float resizeMult=2f; //Resize by a minimum of this much
346 final static float minLoadFactor=0.5f; //Resize by enough to get the load above this factor
347 final static float maxLoadFactor=0.98f; //Resize by enough to get the load under this factor
348 final static float minLoadMult=1/minLoadFactor;
349 final static float maxLoadMult=1/maxLoadFactor;
350
351 }