Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/AbstractKmerTable.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package kmer; | |
2 | |
3 import java.util.concurrent.atomic.AtomicIntegerArray; | |
4 import java.util.concurrent.atomic.AtomicLong; | |
5 import java.util.concurrent.locks.Lock; | |
6 | |
7 import dna.AminoAcid; | |
8 import fileIO.ByteStreamWriter; | |
9 import fileIO.TextStreamWriter; | |
10 import shared.KillSwitch; | |
11 import shared.Shared; | |
12 import shared.Tools; | |
13 import structures.ByteBuilder; | |
14 import structures.SuperLongList; | |
15 | |
16 | |
17 /** | |
18 * @author Brian Bushnell | |
19 * @date Oct 23, 2013 | |
20 * | |
21 */ | |
22 public abstract class AbstractKmerTable { | |
23 | |
24 /*--------------------------------------------------------------*/ | |
25 /*---------------- Abstract Methods ----------------*/ | |
26 /*--------------------------------------------------------------*/ | |
27 | |
28 // /** Returns count */ | |
29 // public final int increment(long kmer){return increment(kmer, 1);} | |
30 | |
31 /** Returns count */ | |
32 public abstract int increment(final long kmer, final int incr); | |
33 | |
34 // /** Returns number of entries created */ | |
35 // public final int incrementAndReturnNumCreated(final long kmer){return incrementAndReturnNumCreated(kmer, 1);} | |
36 | |
37 /** Returns number of entries created. Incr must be positive. */ | |
38 public abstract int incrementAndReturnNumCreated(final long kmer, final int incr); | |
39 | |
40 public abstract int set(long kmer, int value); | |
41 | |
42 // public abstract int set(long kmer, int[] vals); | |
43 | |
44 /** This is for IntList3 support with HashArrayHybridFast */ | |
45 public abstract int set(long kmer, int[] vals, int vlen); | |
46 | |
47 /** Returns number of kmers added */ | |
48 public abstract int setIfNotPresent(long kmer, int value); | |
49 | |
50 /** | |
51 * Fetch the value associated with a kmer. | |
52 * @param kmer | |
53 * @return A value. -1 means the kmer was not present. | |
54 */ | |
55 public abstract int getValue(long kmer); | |
56 | |
57 /** | |
58 * Fetch the values associated with a kmer. | |
59 * @param kmer | |
60 * @param singleton A blank array of length 1. | |
61 * @return An array filled with values. Values of -1 are invalid. | |
62 */ | |
63 public abstract int[] getValues(long kmer, int[] singleton); | |
64 | |
65 public abstract boolean contains(long kmer); | |
66 | |
67 public final boolean contains(long kmer, int v){ | |
68 assert(TESTMODE); | |
69 int[] set=getValues(kmer, new int[] {-1}); | |
70 if(set==null){return false;} | |
71 for(int s : set){ | |
72 if(s==-1){break;} | |
73 if(s==v){return true;} | |
74 } | |
75 return false; | |
76 } | |
77 | |
78 public final boolean contains(long kmer, int[] vals){ | |
79 assert(TESTMODE); | |
80 int[] set=getValues(kmer, new int[] {-1}); | |
81 if(set==null){return false;} | |
82 boolean success=true; | |
83 for(int v : vals){ | |
84 if(v==-1){break;} | |
85 success=false; | |
86 for(int s : set){ | |
87 if(s==v){ | |
88 success=true; | |
89 break; | |
90 } | |
91 } | |
92 if(!success){break;} | |
93 } | |
94 return success; | |
95 } | |
96 | |
97 public abstract void rebalance(); | |
98 | |
99 public abstract long size(); | |
100 public abstract int arrayLength(); | |
101 public abstract boolean canRebalance(); | |
102 | |
103 public abstract boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount); | |
104 public abstract boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining); | |
105 public abstract boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining); | |
106 | |
107 public abstract void fillHistogram(long[] ca, int max); | |
108 public abstract void fillHistogram(SuperLongList sll); | |
109 public abstract void countGC(long[] gcCounts, int max); | |
110 | |
111 public static final int gc(long kmer){ | |
112 int gc=0; | |
113 while(kmer>0){ | |
114 long x=kmer&3; | |
115 kmer>>>=2; | |
116 if(x==1 || x==2){gc++;} | |
117 } | |
118 return gc; | |
119 } | |
120 | |
121 abstract Object get(long kmer); | |
122 abstract void resize(); | |
123 abstract boolean canResize(); | |
124 | |
125 | |
126 | |
127 /** | |
128 * Removes entries with a value of the limit or less. | |
129 * Rehashes the remainder. | |
130 * @return Number removed. | |
131 */ | |
132 abstract long regenerate(int limit); | |
133 | |
134 final void lock(){getLock().lock();} | |
135 final void unlock(){getLock().unlock();} | |
136 final boolean tryLock(){return getLock().tryLock();} | |
137 Lock getLock(){ | |
138 throw new RuntimeException("Unimplemented."); | |
139 } | |
140 | |
141 /*--------------------------------------------------------------*/ | |
142 /*--------------- Allocation Methods ----------------*/ | |
143 /*--------------------------------------------------------------*/ | |
144 | |
145 final static AtomicIntegerArray allocAtomicInt(int len){ | |
146 return KillSwitch.allocAtomicInt(len); | |
147 } | |
148 | |
149 final static long[] allocLong1D(int len){ | |
150 return KillSwitch.allocLong1D(len); | |
151 } | |
152 | |
153 final static long[][] allocLong2D(int mult, int len){ | |
154 return KillSwitch.allocLong2D(mult, len); | |
155 } | |
156 | |
157 final static int[] allocInt1D(int len){ | |
158 return KillSwitch.allocInt1D(len); | |
159 } | |
160 | |
161 final static int[][] allocInt2D(int len){ | |
162 return KillSwitch.allocInt2D(len); | |
163 } | |
164 | |
165 final static KmerNode[] allocKmerNodeArray(int len){ | |
166 KmerNode[] ret=null; | |
167 try { | |
168 ret=new KmerNode[len]; | |
169 } catch (OutOfMemoryError e) { | |
170 synchronized(killMessage){ | |
171 e.printStackTrace(); | |
172 System.err.println(killMessage); | |
173 // Shared.printMemory(); | |
174 KillSwitch.killSilent(); | |
175 } | |
176 } | |
177 return ret; | |
178 } | |
179 | |
180 /*--------------------------------------------------------------*/ | |
181 /*--------------- Ownership Methods ----------------*/ | |
182 /*--------------------------------------------------------------*/ | |
183 | |
184 /** Set the thread owning this kmer. Return the new owner. | |
185 * Will only change the owner if newOwner is greater than current owner. */ | |
186 public abstract int setOwner(long kmer, int newOwner); | |
187 | |
188 /** Reset owner to -1 if this is the current owner. */ | |
189 public abstract boolean clearOwner(long kmer, int owner); | |
190 | |
191 /** Return the thread ID owning this kmer, or -1. */ | |
192 public abstract int getOwner(long kmer); | |
193 | |
194 /** Create data structures needed for ownership representation */ | |
195 public abstract void initializeOwnership(); | |
196 | |
197 /** Eliminate ownership data structures or set them to -1. */ | |
198 public abstract void clearOwnership(); | |
199 | |
200 /*--------------------------------------------------------------*/ | |
201 /*---------------- Methods ----------------*/ | |
202 /*--------------------------------------------------------------*/ | |
203 | |
204 public static final StringBuilder toText(long kmer, int k){ | |
205 byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); | |
206 int bits=(Shared.AMINO_IN ? 5 : 2); | |
207 int mask=(Shared.AMINO_IN ? 31 : 3); | |
208 StringBuilder sb=new StringBuilder(k); | |
209 for(int i=k-1; i>=0; i--){ | |
210 int x=(int)((kmer>>(bits*i))&mask); | |
211 sb.append((char)lookup[x]); | |
212 } | |
213 return sb; | |
214 } | |
215 | |
216 static final StringBuilder toText(long kmer, int count, int k){ | |
217 StringBuilder sb=new StringBuilder(k+10); | |
218 return toText(kmer, count, k, sb); | |
219 } | |
220 | |
221 static final ByteBuilder toBytes(long kmer, int count, int k){ | |
222 ByteBuilder bb=new ByteBuilder(k+10); | |
223 return toBytes(kmer, count, k, bb); | |
224 } | |
225 | |
226 static final StringBuilder toText(long kmer, int[] values, int k){ | |
227 StringBuilder sb=new StringBuilder(k+10); | |
228 return toText(kmer, values, k, sb); | |
229 } | |
230 | |
231 static final ByteBuilder toBytes(long kmer, int[] values, int k){ | |
232 ByteBuilder bb=new ByteBuilder(k+10); | |
233 return toBytes(kmer, values, k, bb); | |
234 } | |
235 | |
236 static final StringBuilder toText(long kmer, int count, int k, StringBuilder sb){ | |
237 byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); | |
238 int bits=(Shared.AMINO_IN ? 5 : 2); | |
239 int mask=(Shared.AMINO_IN ? 31 : 3); | |
240 if(FASTA_DUMP){ | |
241 sb.append('>'); | |
242 sb.append(count); | |
243 sb.append('\n'); | |
244 for(int i=k-1; i>=0; i--){ | |
245 int x=(int)((kmer>>(bits*i))&mask); | |
246 sb.append((char)lookup[x]); | |
247 } | |
248 }else{ | |
249 for(int i=k-1; i>=0; i--){ | |
250 int x=(int)((kmer>>(bits*i))&mask); | |
251 sb.append((char)lookup[x]); | |
252 } | |
253 sb.append('\t'); | |
254 sb.append(count); | |
255 } | |
256 return sb; | |
257 } | |
258 | |
259 static final StringBuilder toText(long kmer, int[] values, int k, StringBuilder sb){ | |
260 byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); | |
261 int bits=(Shared.AMINO_IN ? 5 : 2); | |
262 int mask=(Shared.AMINO_IN ? 31 : 3); | |
263 if(FASTA_DUMP){ | |
264 sb.append('>'); | |
265 for(int i=0; i<values.length; i++){ | |
266 int x=values[i]; | |
267 if(x==-1){break;} | |
268 if(i>0){sb.append(',');} | |
269 sb.append(x); | |
270 } | |
271 sb.append('\n'); | |
272 for(int i=k-1; i>=0; i--){ | |
273 int x=(int)((kmer>>(bits*i))&mask); | |
274 sb.append((char)lookup[x]); | |
275 } | |
276 }else{ | |
277 for(int i=k-1; i>=0; i--){ | |
278 int x=(int)((kmer>>(bits*i))&mask); | |
279 sb.append((char)lookup[x]); | |
280 } | |
281 sb.append('\t'); | |
282 for(int i=0; i<values.length; i++){ | |
283 int x=values[i]; | |
284 if(x==-1){break;} | |
285 if(i>0){sb.append(',');} | |
286 sb.append(x); | |
287 } | |
288 } | |
289 return sb; | |
290 } | |
291 | |
292 public static final ByteBuilder toBytes(long kmer, long count, int k, ByteBuilder bb){ | |
293 byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); | |
294 int bits=(Shared.AMINO_IN ? 5 : 2); | |
295 int mask=(Shared.AMINO_IN ? 31 : 3); | |
296 if(FASTA_DUMP){ | |
297 bb.append('>'); | |
298 bb.append(count); | |
299 bb.nl(); | |
300 for(int i=k-1; i>=0; i--){ | |
301 int x=(int)((kmer>>(bits*i))&mask); | |
302 bb.append(lookup[x]); | |
303 } | |
304 // assert(false) : kmer+"->\n"+bb+"\n"+AminoAcid.kmerToStringAA(kmer, k); | |
305 }else if(NUMERIC_DUMP){ | |
306 bb.append(Long.toHexString(kmer)); | |
307 bb.tab(); | |
308 bb.append(count); | |
309 }else{ | |
310 for(int i=k-1; i>=0; i--){ | |
311 int x=(int)((kmer>>(bits*i))&mask); | |
312 bb.append(lookup[x]); | |
313 } | |
314 bb.tab(); | |
315 bb.append(count); | |
316 } | |
317 return bb; | |
318 } | |
319 | |
320 public static final ByteBuilder toBytes(long kmer, int[] values, int k, ByteBuilder bb){ | |
321 byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); | |
322 int bits=(Shared.AMINO_IN ? 5 : 2); | |
323 int mask=(Shared.AMINO_IN ? 31 : 3); | |
324 if(FASTA_DUMP){ | |
325 bb.append('>'); | |
326 for(int i=0; i<values.length; i++){ | |
327 int x=values[i]; | |
328 if(x==-1){break;} | |
329 if(i>0){bb.append(',');} | |
330 bb.append(x); | |
331 } | |
332 bb.nl(); | |
333 for(int i=k-1; i>=0; i--){ | |
334 int x=(int)((kmer>>(bits*i))&mask); | |
335 bb.append(lookup[x]); | |
336 } | |
337 }else if(NUMERIC_DUMP){ | |
338 bb.append(Long.toHexString(kmer)); | |
339 bb.tab(); | |
340 for(int i=0; i<values.length; i++){ | |
341 int x=values[i]; | |
342 if(x==-1){break;} | |
343 if(i>0){bb.append(',');} | |
344 bb.append(x); | |
345 } | |
346 }else{ | |
347 for(int i=k-1; i>=0; i--){ | |
348 int x=(int)((kmer>>(bits*i))&mask); | |
349 bb.append(lookup[x]); | |
350 } | |
351 bb.tab(); | |
352 for(int i=0; i<values.length; i++){ | |
353 int x=values[i]; | |
354 if(x==-1){break;} | |
355 if(i>0){bb.append(',');} | |
356 bb.append(x); | |
357 } | |
358 } | |
359 return bb; | |
360 } | |
361 | |
362 // static void appendKmerText(long kmer, int count, int k, StringBuilder sb){ | |
363 // sb.setLength(0); | |
364 // toText(kmer, count, k, sb); | |
365 // sb.append('\n'); | |
366 // } | |
367 | |
368 static void appendKmerText(long kmer, int count, int k, ByteBuilder bb){ | |
369 bb.setLength(0); | |
370 toBytes(kmer, count, k, bb); | |
371 bb.nl(); | |
372 } | |
373 | |
374 | |
375 /** For buffered tables. */ | |
376 long flush(){ | |
377 throw new RuntimeException("Unsupported."); | |
378 } | |
379 | |
380 /** | |
381 * This allocates the data structures in multiple threads. Unfortunately, it does not lead to any speedup, at least for ARRAY type. | |
382 * @param ways | |
383 * @param tableType | |
384 * @param schedule | |
385 * @param mask | |
386 * @return The preallocated table | |
387 */ | |
388 public static final AbstractKmerTable[] preallocate(int ways, int tableType, int[] schedule, long mask){ | |
389 | |
390 final AbstractKmerTable[] tables=new AbstractKmerTable[ways]; | |
391 | |
392 { | |
393 shared.Timer tm=new shared.Timer(); | |
394 final int t=Tools.max(1, Tools.min(Shared.threads(), 2, ways)); //More than 2 still improves allocation time, but only slightly; ~25% faster at t=4. | |
395 final AllocThread[] allocators=new AllocThread[t]; | |
396 for(int i=0; i<t; i++){ | |
397 allocators[i]=new AllocThread(tableType, schedule, i, t, mask, tables); | |
398 } | |
399 for(AllocThread at : allocators){at.start();} | |
400 for(AllocThread at : allocators){ | |
401 while(at.getState()!=Thread.State.TERMINATED){ | |
402 try { | |
403 at.join(); | |
404 } catch (InterruptedException e) { | |
405 // TODO Auto-generated catch block | |
406 e.printStackTrace(); | |
407 } | |
408 } | |
409 } | |
410 tm.stop(); | |
411 if(AbstractKmerTableSet.DISPLAY_PROGRESS){System.err.println(tm);} | |
412 } | |
413 | |
414 synchronized(tables){ | |
415 for(int i=0; i<tables.length; i++){ | |
416 final AbstractKmerTable akt=tables[i]; | |
417 if(akt==null){ | |
418 throw new RuntimeException("KmerTable allocation failed, probably due to lack of RAM: "+i+", "+tables.length); | |
419 } | |
420 } | |
421 } | |
422 | |
423 return tables; | |
424 } | |
425 | |
426 /*--------------------------------------------------------------*/ | |
427 /*---------------- Nested Classes ----------------*/ | |
428 /*--------------------------------------------------------------*/ | |
429 | |
430 private static class AllocThread extends Thread{ | |
431 | |
432 AllocThread(int type_, int[] schedule_, int mod_, int div_, | |
433 long mask_, AbstractKmerTable[] tables_){ | |
434 type=type_; | |
435 schedule=schedule_; | |
436 size=schedule[0]; | |
437 mod=mod_; | |
438 div=div_; | |
439 mask=mask_; | |
440 growable=schedule.length>1; | |
441 tables=tables_; | |
442 } | |
443 | |
444 @Override | |
445 public void run(){ | |
446 //Initialize tables | |
447 long sum=0; | |
448 | |
449 // Shared.printMemory();} | |
450 for(int i=mod; i<tables.length; i+=div){ | |
451 // System.err.println("T"+i+" allocating "+i); | |
452 final AbstractKmerTable akt; | |
453 if(type==FOREST1D){ | |
454 akt=new HashForest(size, growable, false); | |
455 }else if(type==TABLE){ | |
456 akt=new KmerTable(size, growable); | |
457 }else if(type==ARRAY1D){ | |
458 akt=new HashArray1D(schedule, mask); | |
459 // long len=((HashArray1D)akt).arrayLength(); | |
460 // long mem=((HashArray1D)akt).calcMem(); | |
461 // sum+=mem; | |
462 // akt=new HashArray1D(size, -1, mask, growable);//TODO: Set maxSize | |
463 }else if(type==NODE1D){ | |
464 throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); | |
465 // akt=new KmerNode2(-1, 0); | |
466 }else if(type==FOREST2D){ | |
467 akt=new HashForest(size, growable, true); | |
468 }else if(type==TABLE2D){ | |
469 throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); | |
470 }else if(type==ARRAY2D){ | |
471 akt=new HashArray2D(schedule, mask); | |
472 // akt=new HashArray2D(size, -1, mask, growable);//TODO: Set maxSize | |
473 }else if(type==NODE2D){ | |
474 throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); | |
475 // akt=new KmerNode(-1, 0); | |
476 }else if(type==ARRAYH){ | |
477 akt=new HashArrayHybrid(schedule, mask); | |
478 // akt=new HashArrayHybrid(size, -1, mask, growable);//TODO: Set maxSize | |
479 }else if(type==ARRAYHF){ | |
480 akt=new HashArrayHybridFast(schedule, mask); | |
481 // akt=new HashArrayHybrid(size, -1, mask, growable);//TODO: Set maxSize | |
482 }else{ | |
483 throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); | |
484 } | |
485 synchronized(tables){ | |
486 tables[i]=akt; | |
487 } | |
488 // System.err.println("T"+i+" allocated "+i); | |
489 | |
490 // if(i%100==0){ | |
491 // System.err.println("Allocated "+(sum/1000000)+"MB"); | |
492 // Shared.printMemory(); | |
493 // } | |
494 } | |
495 // Shared.printMemory(); | |
496 // assert(false) : ("Allocated "+(sum/1000000)+"MB"); | |
497 } | |
498 | |
499 private final int type; | |
500 private final int[] schedule; | |
501 private final int size; | |
502 private final int mod; | |
503 private final int div; | |
504 private final long mask; | |
505 private final boolean growable; | |
506 final AbstractKmerTable[] tables; | |
507 | |
508 } | |
509 | |
510 public Walker walk() { | |
511 throw new RuntimeException("Unimplemented"); | |
512 } | |
513 | |
514 /*--------------------------------------------------------------*/ | |
515 /*---------------- Fields ----------------*/ | |
516 /*--------------------------------------------------------------*/ | |
517 | |
518 public static boolean FASTA_DUMP=true; | |
519 public static boolean NUMERIC_DUMP=false; | |
520 public static boolean TWO_PASS_RESIZE=false; | |
521 | |
522 public static final boolean verbose=false; | |
523 public static final boolean TESTMODE=false; //123 SLOW! | |
524 | |
525 public static final int UNKNOWN=0, ARRAY1D=1, FOREST1D=2, TABLE=3, NODE1D=4, ARRAY2D=5, FOREST2D=6, TABLE2D=7, NODE2D=8, ARRAYH=9, ARRAYHF=10; | |
526 | |
527 public static final int NOT_PRESENT=-1, HASH_COLLISION=-2; | |
528 public static final int NO_OWNER=-1; | |
529 | |
530 private static final String killMessage=new String("\nThis program ran out of memory. Try increasing the -Xmx flag and setting prealloc."); | |
531 | |
532 } |