Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTableSet.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package kmer; | |
2 | |
3 import java.io.File; | |
4 import java.util.ArrayList; | |
5 import java.util.Arrays; | |
6 import java.util.BitSet; | |
7 import java.util.concurrent.atomic.AtomicLong; | |
8 | |
9 import assemble.Contig; | |
10 import bloom.KmerCountAbstract; | |
11 import dna.AminoAcid; | |
12 import fileIO.ByteStreamWriter; | |
13 import fileIO.FileFormat; | |
14 import fileIO.ReadWrite; | |
15 import jgi.BBMerge; | |
16 import shared.Parse; | |
17 import shared.Parser; | |
18 import shared.PreParser; | |
19 import shared.Primes; | |
20 import shared.ReadStats; | |
21 import shared.Shared; | |
22 import shared.Timer; | |
23 import shared.Tools; | |
24 import shared.TrimRead; | |
25 import stream.ConcurrentReadInputStream; | |
26 import stream.FastaReadInputStream; | |
27 import stream.Read; | |
28 import structures.ByteBuilder; | |
29 import structures.IntList; | |
30 import structures.ListNum; | |
31 import structures.LongList; | |
32 import ukmer.Kmer; | |
33 | |
34 | |
35 /** | |
36 * Loads and holds kmers for Tadpole | |
37 * @author Brian Bushnell | |
38 * @date Jun 22, 2015 | |
39 * | |
40 */ | |
41 public class KmerTableSet extends AbstractKmerTableSet { | |
42 | |
43 /** | |
44 * Code entrance from the command line. | |
45 * @param args Command line arguments | |
46 */ | |
47 public static void main(String[] args){ | |
48 Timer t=new Timer(), t2=new Timer(); | |
49 t.start(); | |
50 t2.start(); | |
51 KmerTableSet set=new KmerTableSet(args); | |
52 t2.stop(); | |
53 outstream.println("Initialization Time: \t"+t2); | |
54 | |
55 ///And run it | |
56 set.process(t); | |
57 | |
58 //Close the print stream if it was redirected | |
59 Shared.closeStream(outstream); | |
60 } | |
61 | |
62 | |
63 /** | |
64 * Constructor. | |
65 * @param args Command line arguments | |
66 */ | |
67 private KmerTableSet(String[] args){ | |
68 this(args, 12);//+5 if using ownership and building contigs | |
69 } | |
70 | |
71 | |
72 /** | |
73 * Constructor. | |
74 * @param args Command line arguments | |
75 */ | |
76 public KmerTableSet(String[] args, final int bytesPerKmer_){ | |
77 {//Preparse block for help, config files, and outstream | |
78 PreParser pp=new PreParser(args, getClass(), false); | |
79 args=pp.args; | |
80 outstream=pp.outstream; | |
81 }//TODO - no easy way to close outstream | |
82 | |
83 /* Initialize local variables with defaults */ | |
84 Parser parser=new Parser(); | |
85 boolean prealloc_=false; | |
86 int k_=31; | |
87 int ways_=-1; | |
88 int filterMax_=2; | |
89 boolean ecco_=false, merge_=false; | |
90 boolean rcomp_=true; | |
91 double minProb_=defaultMinprob; | |
92 int tableType_=AbstractKmerTable.ARRAY1D; | |
93 /* Parse arguments */ | |
94 for(int i=0; i<args.length; i++){ | |
95 | |
96 final String arg=args[i]; | |
97 String[] split=arg.split("="); | |
98 String a=split[0].toLowerCase(); | |
99 String b=split.length>1 ? split[1] : null; | |
100 | |
101 if(Parser.parseCommonStatic(arg, a, b)){ | |
102 //do nothing | |
103 }else if(Parser.parseZip(arg, a, b)){ | |
104 //do nothing | |
105 }else if(Parser.parseQuality(arg, a, b)){ | |
106 //do nothing | |
107 }else if(Parser.parseFasta(arg, a, b)){ | |
108 //do nothing | |
109 }else if(parser.parseInterleaved(arg, a, b)){ | |
110 //do nothing | |
111 }else if(parser.parseTrim(arg, a, b)){ | |
112 //do nothing | |
113 }else if(a.equals("in") || a.equals("in1")){ | |
114 in1.clear(); | |
115 if(b!=null){ | |
116 String[] s=b.split(","); | |
117 for(String ss : s){ | |
118 in1.add(ss); | |
119 } | |
120 } | |
121 }else if(a.equals("in2")){ | |
122 in2.clear(); | |
123 if(b!=null){ | |
124 String[] s=b.split(","); | |
125 for(String ss : s){ | |
126 in2.add(ss); | |
127 } | |
128 } | |
129 }else if(a.equals("extra")){ | |
130 extra.clear(); | |
131 if(b!=null){ | |
132 String[] s=b.split(","); | |
133 for(String ss : s){ | |
134 extra.add(ss); | |
135 } | |
136 } | |
137 }else if(a.equals("append") || a.equals("app")){ | |
138 append=ReadStats.append=Parse.parseBoolean(b); | |
139 }else if(a.equals("overwrite") || a.equals("ow")){ | |
140 overwrite=Parse.parseBoolean(b); | |
141 }else if(a.equals("initialsize")){ | |
142 initialSize=Parse.parseIntKMG(b); | |
143 }else if(a.equals("showstats") || a.equals("stats")){ | |
144 showStats=Parse.parseBoolean(b); | |
145 }else if(a.equals("ways")){ | |
146 ways_=Parse.parseIntKMG(b); | |
147 }else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){ | |
148 buflen=Parse.parseIntKMG(b); | |
149 }else if(a.equals("k")){ | |
150 assert(b!=null) : "\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n"; | |
151 k_=Parse.parseIntKMG(b); | |
152 }else if(a.equals("threads") || a.equals("t")){ | |
153 THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b)); | |
154 }else if(a.equals("showspeed") || a.equals("ss")){ | |
155 showSpeed=Parse.parseBoolean(b); | |
156 }else if(a.equals("ecco")){ | |
157 ecco_=Parse.parseBoolean(b); | |
158 }else if(a.equals("merge")){ | |
159 merge_=Parse.parseBoolean(b); | |
160 }else if(a.equals("verbose")){ | |
161 // assert(false) : "Verbose flag is currently static final; must be recompiled to change."; | |
162 verbose=Parse.parseBoolean(b); | |
163 }else if(a.equals("verbose2")){ | |
164 // assert(false) : "Verbose flag is currently static final; must be recompiled to change."; | |
165 verbose2=Parse.parseBoolean(b); | |
166 }else if(a.equals("minprob")){ | |
167 minProb_=Double.parseDouble(b); | |
168 }else if(a.equals("minprobprefilter") || a.equals("mpp")){ | |
169 minProbPrefilter=Parse.parseBoolean(b); | |
170 }else if(a.equals("minprobmain") || a.equals("mpm")){ | |
171 minProbMain=Parse.parseBoolean(b); | |
172 }else if(a.equals("reads") || a.startsWith("maxreads")){ | |
173 maxReads=Parse.parseKMG(b); | |
174 }else if(a.equals("prealloc") || a.equals("preallocate")){ | |
175 if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){ | |
176 prealloc_=Parse.parseBoolean(b); | |
177 }else{ | |
178 preallocFraction=Tools.max(0, Double.parseDouble(b)); | |
179 prealloc_=(preallocFraction>0); | |
180 } | |
181 }else if(a.equals("prefilter")){ | |
182 if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){ | |
183 prefilter=Parse.parseBoolean(b); | |
184 }else{ | |
185 filterMax_=Parse.parseIntKMG(b); | |
186 prefilter=filterMax_>0; | |
187 } | |
188 }else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){ | |
189 prefilterFraction=Tools.max(0, Double.parseDouble(b)); | |
190 assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory."; | |
191 prefilter=prefilterFraction>0; | |
192 }else if(a.equals("prehashes") || a.equals("hashes")){ | |
193 prehashes=Parse.parseIntKMG(b); | |
194 }else if(a.equals("prefilterpasses") || a.equals("prepasses")){ | |
195 assert(b!=null) : "Bad parameter: "+arg; | |
196 if(b.equalsIgnoreCase("auto")){ | |
197 prepasses=-1; | |
198 }else{ | |
199 prepasses=Parse.parseIntKMG(b); | |
200 } | |
201 }else if(a.equals("onepass")){ | |
202 onePass=Parse.parseBoolean(b); | |
203 }else if(a.equals("passes")){ | |
204 int passes=Parse.parseIntKMG(b); | |
205 onePass=(passes<2); | |
206 }else if(a.equals("rcomp")){ | |
207 rcomp_=Parse.parseBoolean(b); | |
208 }else if(a.equals("tabletype")){ | |
209 tableType_=Integer.parseInt(b); | |
210 } | |
211 | |
212 else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") || | |
213 a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){ | |
214 filterMemoryOverride=Parse.parseKMG(b); | |
215 } | |
216 | |
217 else if(IGNORE_UNKNOWN_ARGS){ | |
218 //Do nothing | |
219 }else{ | |
220 throw new RuntimeException("Unknown parameter "+args[i]); | |
221 } | |
222 } | |
223 | |
224 {//Process parser fields | |
225 Parser.processQuality(); | |
226 | |
227 qtrimLeft=parser.qtrimLeft; | |
228 qtrimRight=parser.qtrimRight; | |
229 trimq=parser.trimq; | |
230 trimE=parser.trimE(); | |
231 | |
232 minAvgQuality=parser.minAvgQuality; | |
233 minAvgQualityBases=parser.minAvgQualityBases; | |
234 amino=Shared.AMINO_IN; | |
235 if(amino){k_=Tools.min(k_, 12);} | |
236 } | |
237 | |
238 if(prepasses==0 || !prefilter){ | |
239 prepasses=0; | |
240 prefilter=false; | |
241 } | |
242 | |
243 | |
244 // assert(false) : prepasses+", "+onePass+", "+prefilter; | |
245 | |
246 { | |
247 long memory=Runtime.getRuntime().maxMemory(); | |
248 double xmsRatio=Shared.xmsRatio(); | |
249 // long tmemory=Runtime.getRuntime().totalMemory(); | |
250 usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45); | |
251 if(prepasses==0 || !prefilter){ | |
252 filterMemory0=filterMemory1=0; | |
253 }else if(filterMemoryOverride>0){ | |
254 filterMemory0=filterMemory1=filterMemoryOverride; | |
255 }else{ | |
256 double low=Tools.min(prefilterFraction, 1-prefilterFraction); | |
257 double high=1-low; | |
258 if(prepasses<0 || (prepasses&1)==1){//odd passes | |
259 filterMemory0=(long)(usableMemory*low); | |
260 filterMemory1=(long)(usableMemory*high); | |
261 }else{//even passes | |
262 filterMemory0=(long)(usableMemory*high); | |
263 filterMemory1=(long)(usableMemory*low); | |
264 } | |
265 } | |
266 tableMemory=(long)(usableMemory*.95-filterMemory0); | |
267 } | |
268 | |
269 tableType=tableType_; | |
270 prealloc=prealloc_; | |
271 bytesPerKmer=bytesPerKmer_; | |
272 if(ways_<1){ | |
273 long maxKmers=(2*tableMemory)/bytesPerKmer; | |
274 long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE); | |
275 ways_=(int)Tools.max(31, (int)(Tools.min(96, Shared.threads())*2.5), minWays); | |
276 ways_=(int)Primes.primeAtLeast(ways_); | |
277 assert(ways_>0); | |
278 // System.err.println("ways="+ways_); | |
279 } | |
280 | |
281 /* Set final variables; post-process and validate argument combinations */ | |
282 | |
283 onePass=onePass&prefilter; | |
284 ways=ways_; | |
285 filterMax=Tools.min(filterMax_, 0x7FFFFFFF); | |
286 ecco=ecco_; | |
287 merge=merge_; | |
288 minProb=(float)minProb_; | |
289 rcomp=rcomp_; | |
290 // assert(false) : tableMemory+", "+bytesPerKmer+", "+prealloc+", "+preallocFraction; | |
291 estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArray.maxLoadFactorFinal*0.97)); | |
292 | |
293 // System.err.println("tableMemory="+tableMemory+", bytesPerKmer="+bytesPerKmer+", estimatedKmerCapacity="+estimatedKmerCapacity); | |
294 KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0); | |
295 k=k_; | |
296 k2=k-1; | |
297 int bitsPerBase=(amino ? 5 : 2); | |
298 int mask=(amino ? 31 : 3); | |
299 coreMask=(AbstractKmerTableSet.MASK_CORE ? ~(((-1L)<<(bitsPerBase*(k_-1)))|mask) : -1L); | |
300 | |
301 if(k<1 || k>31){throw new RuntimeException("\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n");} | |
302 | |
303 if(initialSize<1){ | |
304 final long memOverWays=tableMemory/(bytesPerKmer*ways); | |
305 final double mem2=(prealloc ? preallocFraction : 1)*tableMemory; | |
306 initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault); | |
307 if(initialSize!=initialSizeDefault){ | |
308 System.err.println("Initial size set to "+initialSize); | |
309 } | |
310 } | |
311 | |
312 /* Adjust I/O settings and filenames */ | |
313 | |
314 assert(FastaReadInputStream.settingsOK()); | |
315 | |
316 if(in1.isEmpty()){ | |
317 //throw new RuntimeException("Error - at least one input file is required."); | |
318 } | |
319 | |
320 for(int i=0; i<in1.size(); i++){ | |
321 String s=in1.get(i); | |
322 if(s!=null && s.contains("#") && !new File(s).exists()){ | |
323 int pound=s.lastIndexOf('#'); | |
324 String a=s.substring(0, pound); | |
325 String b=s.substring(pound+1); | |
326 in1.set(i, a+1+b); | |
327 in2.add(a+2+b); | |
328 } | |
329 } | |
330 | |
331 if(!extra.isEmpty()){ | |
332 ArrayList<String> temp=(ArrayList<String>) extra.clone(); | |
333 extra.clear(); | |
334 for(String s : temp){ | |
335 if(s!=null && s.contains("#") && !new File(s).exists()){ | |
336 int pound=s.lastIndexOf('#'); | |
337 String a=s.substring(0, pound); | |
338 String b=s.substring(pound+1); | |
339 extra.add(a); | |
340 extra.add(b); | |
341 }else{ | |
342 extra.add(s); | |
343 } | |
344 } | |
345 } | |
346 | |
347 { | |
348 boolean allowDuplicates=true; | |
349 if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){ | |
350 throw new RuntimeException("\nCan't read some input files.\n"); | |
351 } | |
352 } | |
353 assert(THREADS>0); | |
354 | |
355 if(DISPLAY_PROGRESS){ | |
356 // assert(false); | |
357 outstream.println("Initial:"); | |
358 outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f")); | |
359 Shared.printMemory(); | |
360 outstream.println(); | |
361 } | |
362 } | |
363 | |
364 /*--------------------------------------------------------------*/ | |
365 /*---------------- Outer Methods ----------------*/ | |
366 /*--------------------------------------------------------------*/ | |
367 | |
368 @Override | |
369 public void clear(){ | |
370 tables=null; | |
371 } | |
372 | |
373 /*--------------------------------------------------------------*/ | |
374 /*---------------- Inner Methods ----------------*/ | |
375 /*--------------------------------------------------------------*/ | |
376 | |
377 @Override | |
378 public void allocateTables(){ | |
379 assert(tables==null); | |
380 tables=null; | |
381 final long coreMask=(MASK_CORE ? ~(((-1L)<<(2*(k-1)))|3) : -1L); | |
382 | |
383 ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc, | |
384 (prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride); | |
385 int[] schedule=scheduleMaker.makeSchedule(); | |
386 //assert(false) : preallocFraction+", "+ways+", "+Arrays.toString(schedule); | |
387 // System.err.println("DEBUG "+preallocFraction+", "+ways+", "+Arrays.toString(schedule)); | |
388 tables=AbstractKmerTable.preallocate(ways, tableType, schedule, coreMask); | |
389 | |
390 // tables=AbstractKmerTable.preallocate(ways, tableType, initialSize, coreMask, (!prealloc || preallocFraction<1)); | |
391 } | |
392 | |
393 /** | |
394 * Load reads into tables, using multiple LoadThread. | |
395 */ | |
396 @Override | |
397 public long loadKmers(String fname1, String fname2){ | |
398 | |
399 /* Create read input stream */ | |
400 final ConcurrentReadInputStream cris; | |
401 { | |
402 FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true); | |
403 FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true); | |
404 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2); | |
405 cris.start(); //4567 | |
406 } | |
407 | |
408 // /* Optionally skip the first reads, since initial reads may have lower quality */ | |
409 // if(skipreads>0){ | |
410 // long skipped=0; | |
411 // | |
412 // ListNum<Read> ln=cris.nextList(); | |
413 // ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
414 // | |
415 // while(skipped<skipreads && reads!=null && reads.size()>0){ | |
416 // skipped+=reads.size(); | |
417 // | |
418 // cris.returnList(ln); | |
419 // ln=cris.nextList(); | |
420 // reads=(ln!=null ? ln.list : null); | |
421 // } | |
422 // cris.returnList(ln); | |
423 // if(reads==null || reads.isEmpty()){ | |
424 // ReadWrite.closeStreams(cris); | |
425 // System.err.println("Skipped all of the reads."); | |
426 // System.exit(0); | |
427 // } | |
428 // } | |
429 | |
430 /* Create ProcessThreads */ | |
431 ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS); | |
432 for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));} | |
433 for(LoadThread pt : alpt){pt.start();} | |
434 | |
435 long added=0; | |
436 | |
437 /* Wait for threads to die, and gather statistics */ | |
438 for(LoadThread pt : alpt){ | |
439 while(pt.getState()!=Thread.State.TERMINATED){ | |
440 try { | |
441 pt.join(); | |
442 } catch (InterruptedException e) { | |
443 // TODO Auto-generated catch block | |
444 e.printStackTrace(); | |
445 } | |
446 } | |
447 added+=pt.added; | |
448 | |
449 readsIn+=pt.readsInT; | |
450 basesIn+=pt.basesInT; | |
451 lowqReads+=pt.lowqReadsT; | |
452 lowqBases+=pt.lowqBasesT; | |
453 readsTrimmed+=pt.readsTrimmedT; | |
454 basesTrimmed+=pt.basesTrimmedT; | |
455 kmersIn+=pt.kmersInT; | |
456 } | |
457 | |
458 /* Shut down I/O streams; capture error status */ | |
459 errorState|=ReadWrite.closeStreams(cris); | |
460 return added; | |
461 } | |
462 | |
463 /*--------------------------------------------------------------*/ | |
464 /*---------------- Inner Classes ----------------*/ | |
465 /*--------------------------------------------------------------*/ | |
466 | |
467 /** | |
468 * Loads kmers. | |
469 */ | |
470 private class LoadThread extends Thread{ | |
471 | |
472 /** | |
473 * Constructor | |
474 * @param cris_ Read input stream | |
475 */ | |
476 public LoadThread(ConcurrentReadInputStream cris_){ | |
477 cris=cris_; | |
478 table=new HashBuffer(tables, buflen, k, false, true); | |
479 } | |
480 | |
481 @Override | |
482 public void run(){ | |
483 ListNum<Read> ln=cris.nextList(); | |
484 ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
485 | |
486 //While there are more reads lists... | |
487 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning | |
488 | |
489 //For each read (or pair) in the list... | |
490 for(int i=0; i<reads.size(); i++){ | |
491 Read r1=reads.get(i); | |
492 Read r2=r1.mate; | |
493 | |
494 if(!r1.validated()){r1.validate(true);} | |
495 if(r2!=null && !r2.validated()){r2.validate(true);} | |
496 | |
497 if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));} | |
498 | |
499 readsInT++; | |
500 basesInT+=r1.length(); | |
501 if(r2!=null){ | |
502 readsInT++; | |
503 basesInT+=r2.length(); | |
504 } | |
505 | |
506 if(maxNs<Integer.MAX_VALUE){ | |
507 if(r1!=null && r1.countUndefined()>maxNs){r1.setDiscarded(true);} | |
508 if(r2!=null && r2.countUndefined()>maxNs){r2.setDiscarded(true);} | |
509 } | |
510 | |
511 //Determine whether to discard the reads based on average quality | |
512 if(minAvgQuality>0){ | |
513 if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);} | |
514 if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);} | |
515 } | |
516 | |
517 if(r1!=null){ | |
518 if(qtrimLeft || qtrimRight){ | |
519 int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1, false); | |
520 basesTrimmedT+=x; | |
521 readsTrimmedT+=(x>0 ? 1 : 0); | |
522 } | |
523 if(r1.length()<k){r1.setDiscarded(true);} | |
524 } | |
525 if(r2!=null){ | |
526 if(qtrimLeft || qtrimRight){ | |
527 int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1, false); | |
528 basesTrimmedT+=x; | |
529 readsTrimmedT+=(x>0 ? 1 : 0); | |
530 } | |
531 if(r2.length()<k){r2.setDiscarded(true);} | |
532 } | |
533 | |
534 if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){ | |
535 if(merge){ | |
536 final int insert=BBMerge.findOverlapStrict(r1, r2, false); | |
537 if(insert>0){ | |
538 r2.reverseComplement(); | |
539 r1=r1.joinRead(insert); | |
540 r2=null; | |
541 } | |
542 }else if(ecco){ | |
543 BBMerge.findOverlapStrict(r1, r2, true); | |
544 } | |
545 } | |
546 | |
547 if(minLen>0){ | |
548 if(r1!=null && r1.length()<minLen){r1.setDiscarded(true);} | |
549 if(r2!=null && r2.length()<minLen){r2.setDiscarded(true);} | |
550 } | |
551 | |
552 if(r1!=null){ | |
553 if(r1.discarded()){ | |
554 lowqBasesT+=r1.length(); | |
555 lowqReadsT++; | |
556 }else{ | |
557 long temp=addKmersToTable(r1); | |
558 added+=temp; | |
559 if(verbose){System.err.println("A: Added "+temp);} | |
560 } | |
561 } | |
562 if(r2!=null){ | |
563 if(r2.discarded()){ | |
564 lowqBasesT+=r2.length(); | |
565 lowqReadsT++; | |
566 }else{ | |
567 long temp=addKmersToTable(r2); | |
568 added+=temp; | |
569 if(verbose){System.err.println("B: Added "+temp);} | |
570 } | |
571 } | |
572 } | |
573 | |
574 //Fetch a new read list | |
575 cris.returnList(ln); | |
576 ln=cris.nextList(); | |
577 reads=(ln!=null ? ln.list : null); | |
578 } | |
579 cris.returnList(ln); | |
580 long temp=table.flush(); | |
581 if(verbose){System.err.println("Flush: Added "+temp);} | |
582 added+=temp; | |
583 } | |
584 | |
585 private final int addKmersToTableAA(final Read r){ | |
586 if(r==null || r.bases==null){return 0;} | |
587 final float minProb2=(minProbMain ? minProb : 0); | |
588 final byte[] bases=r.bases; | |
589 final byte[] quals=r.quality; | |
590 final int shift=5*k; | |
591 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
592 long kmer=0; | |
593 int created=0; | |
594 int len=0; | |
595 | |
596 if(bases==null || bases.length<k){return -1;} | |
597 | |
598 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
599 float prob=1; | |
600 for(int i=0; i<bases.length; i++){ | |
601 final byte b=bases[i]; | |
602 final long x=AminoAcid.acidToNumber[b]; | |
603 | |
604 //Update kmers | |
605 kmer=((kmer<<5)|x)&mask; | |
606 | |
607 if(minProb2>0 && quals!=null){//Update probability | |
608 prob=prob*PROB_CORRECT[quals[i]]; | |
609 if(len>k){ | |
610 byte oldq=quals[i-k]; | |
611 prob=prob*PROB_CORRECT_INVERSE[oldq]; | |
612 } | |
613 } | |
614 | |
615 //Handle Ns | |
616 if(x<0){ | |
617 len=0; | |
618 kmer=0; | |
619 prob=1; | |
620 }else{len++;} | |
621 | |
622 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
623 if(len>=k && prob>=minProb2){ | |
624 kmersInT++; | |
625 final long key=kmer; | |
626 if(!prefilter || prefilterArray.read(key)>filterMax2){ | |
627 int temp=table.incrementAndReturnNumCreated(key, 1); | |
628 created+=temp; | |
629 if(verbose){System.err.println("C: Added "+temp);} | |
630 } | |
631 } | |
632 } | |
633 | |
634 return created; | |
635 } | |
636 | |
637 private final int addKmersToTable(final Read r){ | |
638 if(amino){return addKmersToTableAA(r);} | |
639 if(onePass){return addKmersToTable_onePass(r);} | |
640 if(r==null || r.bases==null){return 0;} | |
641 final float minProb2=(minProbMain ? minProb : 0); | |
642 final byte[] bases=r.bases; | |
643 final byte[] quals=r.quality; | |
644 final int shift=2*k; | |
645 final int shift2=shift-2; | |
646 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
647 long kmer=0; | |
648 long rkmer=0; | |
649 int created=0; | |
650 int len=0; | |
651 | |
652 if(bases==null || bases.length<k){return -1;} | |
653 | |
654 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
655 float prob=1; | |
656 for(int i=0; i<bases.length; i++){ | |
657 final byte b=bases[i]; | |
658 final long x=AminoAcid.baseToNumber[b]; | |
659 final long x2=AminoAcid.baseToComplementNumber[b]; | |
660 | |
661 //Update kmers | |
662 kmer=((kmer<<2)|x)&mask; | |
663 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
664 | |
665 if(minProb2>0 && quals!=null){//Update probability | |
666 prob=prob*PROB_CORRECT[quals[i]]; | |
667 if(len>k){ | |
668 byte oldq=quals[i-k]; | |
669 prob=prob*PROB_CORRECT_INVERSE[oldq]; | |
670 } | |
671 } | |
672 | |
673 //Handle Ns | |
674 if(x<0){ | |
675 len=0; | |
676 kmer=rkmer=0; | |
677 prob=1; | |
678 }else{len++;} | |
679 | |
680 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
681 if(len>=k && prob>=minProb2){ | |
682 kmersInT++; | |
683 final long key=toValue(kmer, rkmer); | |
684 if(!prefilter || prefilterArray.read(key)>filterMax2){ | |
685 int temp=table.incrementAndReturnNumCreated(key, 1); | |
686 created+=temp; | |
687 if(verbose){System.err.println("C: Added "+temp);} | |
688 } | |
689 } | |
690 } | |
691 | |
692 return created; | |
693 } | |
694 | |
695 | |
696 private final int addKmersToTable_onePass(final Read r){ | |
697 assert(prefilter); | |
698 if(r==null || r.bases==null){return 0;} | |
699 final byte[] bases=r.bases; | |
700 final byte[] quals=r.quality; | |
701 final int shift=2*k; | |
702 final int shift2=shift-2; | |
703 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
704 long kmer=0; | |
705 long rkmer=0; | |
706 int created=0; | |
707 int len=0; | |
708 | |
709 if(bases==null || bases.length<k){return -1;} | |
710 | |
711 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
712 float prob=1; | |
713 for(int i=0; i<bases.length; i++){ | |
714 final byte b=bases[i]; | |
715 final long x=AminoAcid.baseToNumber[b]; | |
716 final long x2=AminoAcid.baseToComplementNumber[b]; | |
717 | |
718 //Update kmers | |
719 kmer=((kmer<<2)|x)&mask; | |
720 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
721 | |
722 if(minProb>0 && quals!=null){//Update probability | |
723 prob=prob*PROB_CORRECT[quals[i]]; | |
724 if(len>k){ | |
725 byte oldq=quals[i-k]; | |
726 prob=prob*PROB_CORRECT_INVERSE[oldq]; | |
727 } | |
728 } | |
729 | |
730 //Handle Ns | |
731 if(x<0){ | |
732 len=0; | |
733 kmer=rkmer=0; | |
734 prob=1; | |
735 }else{len++;} | |
736 | |
737 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
738 if(len>=k){ | |
739 final long key=toValue(kmer, rkmer); | |
740 int count=prefilterArray.incrementAndReturnUnincremented(key, 1); | |
741 if(count>=filterMax2){ | |
742 int temp=table.incrementAndReturnNumCreated(key, 1); | |
743 created+=temp; | |
744 if(verbose){System.err.println("D: Added "+temp);} | |
745 } | |
746 } | |
747 } | |
748 return created; | |
749 } | |
750 | |
751 /*--------------------------------------------------------------*/ | |
752 | |
753 /** Input read stream */ | |
754 private final ConcurrentReadInputStream cris; | |
755 | |
756 private final HashBuffer table; | |
757 | |
758 public long added=0; | |
759 | |
760 private long readsInT=0; | |
761 private long basesInT=0; | |
762 private long lowqReadsT=0; | |
763 private long lowqBasesT=0; | |
764 private long readsTrimmedT=0; | |
765 private long basesTrimmedT=0; | |
766 private long kmersInT=0; | |
767 | |
768 } | |
769 | |
770 | |
771 /*--------------------------------------------------------------*/ | |
772 /*---------------- Convenience ----------------*/ | |
773 /*--------------------------------------------------------------*/ | |
774 | |
775 public void regenerateKmers(byte[] bases, LongList kmers, IntList counts, final int a){ | |
776 final int loc=a+k; | |
777 final int lim=Tools.min(counts.size, a+k+1); | |
778 final int shift=2*k; | |
779 final int shift2=shift-2; | |
780 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
781 long kmer=kmers.get(a); | |
782 long rkmer=rcomp(kmer); | |
783 int len=k; | |
784 | |
785 // assert(false) : a+", "+loc+", "+lim; | |
786 | |
787 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
788 for(int i=loc, j=a+1; j<lim; i++, j++){ | |
789 final byte b=bases[i]; | |
790 final long x=AminoAcid.baseToNumber[b]; | |
791 final long x2=AminoAcid.baseToComplementNumber[b]; | |
792 kmer=((kmer<<2)|x)&mask; | |
793 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
794 if(x<0){ | |
795 len=0; | |
796 kmer=rkmer=0; | |
797 }else{len++;} | |
798 | |
799 if(len>=k){ | |
800 assert(kmers.get(j)!=kmer); | |
801 kmers.set(j, kmer); | |
802 int count=getCount(kmer, rkmer); | |
803 counts.set(j, count); | |
804 }else{ | |
805 kmers.set(j, -1); | |
806 counts.set(j, 0); | |
807 } | |
808 } | |
809 } | |
810 | |
811 @Override | |
812 public int regenerateCounts(byte[] bases, IntList counts, final Kmer dummy, BitSet changed){ | |
813 assert(!changed.isEmpty()); | |
814 final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1; | |
815 final int ca=firstBase-k; | |
816 // final int b=changed.nextSetBit(0);ca+kbig; //first base changed | |
817 final int firstCount=Tools.max(firstBase-k+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit | |
818 // System.err.println("ca="+ca+", b="+b+", lim="+lim); | |
819 // System.err.println("Regen from count "+(ca+1)+"-"+lim); | |
820 | |
821 final int shift=2*k; | |
822 final int shift2=shift-2; | |
823 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
824 long kmer=0, rkmer=0; | |
825 int len=0; | |
826 int valid=0; | |
827 | |
828 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts); | |
829 | |
830 for(int i=Tools.max(0, firstBase-k+1), lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){ | |
831 final byte base=bases[i]; | |
832 final long x=AminoAcid.baseToNumber[base]; | |
833 final long x2=AminoAcid.baseToComplementNumber[base]; | |
834 kmer=((kmer<<2)|x)&mask; | |
835 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
836 | |
837 if(x<0){ | |
838 len=0; | |
839 kmer=rkmer=0; | |
840 }else{ | |
841 len++; | |
842 } | |
843 | |
844 final int c=i-k+1; | |
845 if(i>=firstBase){ | |
846 if(len>=k){ | |
847 valid++; | |
848 int count=getCount(kmer, rkmer); | |
849 counts.set(c, count); | |
850 }else if(c>=0){ | |
851 counts.set(c, 0); | |
852 } | |
853 } | |
854 } | |
855 | |
856 return valid; | |
857 } | |
858 | |
859 @Override | |
860 public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer dummy){ | |
861 final int b=k-1; | |
862 final int lim=(positions==null ? bases.length : Tools.min(bases.length, positions.length()+k-1)); | |
863 final int shift=2*k; | |
864 final int shift2=shift-2; | |
865 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
866 long kmer=0, rkmer=0; | |
867 int len=0; | |
868 int valid=0; | |
869 | |
870 counts.clear(); | |
871 | |
872 for(int i=0, j=-b; i<lim; i++, j++){ | |
873 final byte base=bases[i]; | |
874 final long x=AminoAcid.baseToNumber[base]; | |
875 final long x2=AminoAcid.baseToComplementNumber[base]; | |
876 kmer=((kmer<<2)|x)&mask; | |
877 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
878 | |
879 if(x<0){ | |
880 len=0; | |
881 kmer=rkmer=0; | |
882 }else{ | |
883 len++; | |
884 } | |
885 | |
886 if(j>=0){ | |
887 if(len>=k && (positions==null || positions.get(j))){ | |
888 valid++; | |
889 int count=getCount(kmer, rkmer); | |
890 assert(i-k+1==counts.size()) : "j="+j+", counts="+counts.size()+", b="+(b)+", (i-k+1)="+(i-k+1); | |
891 assert(j==counts.size()); | |
892 counts.add(Tools.max(count, 0)); | |
893 // counts.set(i-k+1, count); | |
894 }else{ | |
895 counts.add(0); | |
896 // counts.set(i-k+1, 0); | |
897 } | |
898 } | |
899 } | |
900 | |
901 // assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size; | |
902 assert((counts.size==0 && bases.length<k) || counts.size==lim-k+1) : bases.length+", "+k+", "+counts.size; | |
903 | |
904 return valid; | |
905 } | |
906 | |
907 public int regenerateCounts(byte[] bases, IntList counts, final int ca){ | |
908 final int b=ca+k-1; | |
909 final int lim=Tools.min(bases.length, b+k+1); | |
910 final int shift=2*k; | |
911 final int shift2=shift-2; | |
912 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
913 long kmer=0, rkmer=0; | |
914 int len=0; | |
915 int valid=0; | |
916 | |
917 final int clen=counts.size; | |
918 | |
919 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts. | |
920 * i is an index in the base array, j is an index in the count array. */ | |
921 for(int i=b-k+1; i<lim; i++){ | |
922 final byte base=bases[i]; | |
923 final long x=AminoAcid.baseToNumber[base]; | |
924 final long x2=AminoAcid.baseToComplementNumber[base]; | |
925 kmer=((kmer<<2)|x)&mask; | |
926 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
927 | |
928 if(x<0){ | |
929 len=0; | |
930 kmer=rkmer=0; | |
931 }else{ | |
932 len++; | |
933 } | |
934 | |
935 if(i>=b){ | |
936 if(len>=k){ | |
937 valid++; | |
938 int count=getCount(kmer, rkmer); | |
939 counts.set(i-k+1, count); | |
940 }else{ | |
941 counts.set(i-k+1, 0); | |
942 } | |
943 } | |
944 } | |
945 | |
946 assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size; | |
947 assert(clen==counts.size); | |
948 | |
949 return valid; | |
950 } | |
951 | |
952 /** Returns number of valid kmers */ | |
953 public int fillKmers(byte[] bases, LongList kmers){ | |
954 final int blen=bases.length; | |
955 if(blen<k){return 0;} | |
956 final int min=k-1; | |
957 final int shift=2*k; | |
958 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
959 long kmer=0; | |
960 int len=0; | |
961 int valid=0; | |
962 | |
963 kmers.clear(); | |
964 | |
965 /* Loop through the bases, maintaining a forward kmer via bitshifts */ | |
966 for(int i=0; i<blen; i++){ | |
967 final byte b=bases[i]; | |
968 assert(b>=0) : Arrays.toString(bases); | |
969 final long x=AminoAcid.baseToNumber[b]; | |
970 kmer=((kmer<<2)|x)&mask; | |
971 if(x<0){ | |
972 len=0; | |
973 kmer=0; | |
974 }else{len++;} | |
975 if(i>=min){ | |
976 if(len>=k){ | |
977 kmers.add(kmer); | |
978 valid++; | |
979 }else{ | |
980 kmers.add(-1); | |
981 } | |
982 } | |
983 } | |
984 return valid; | |
985 } | |
986 | |
987 public void fillCounts(LongList kmers, IntList counts){ | |
988 counts.clear(); | |
989 for(int i=0; i<kmers.size; i++){ | |
990 long kmer=kmers.get(i); | |
991 if(kmer>=0){ | |
992 long rkmer=rcomp(kmer); | |
993 int count=getCount(kmer, rkmer); | |
994 counts.add(count); | |
995 }else{ | |
996 counts.add(0); | |
997 } | |
998 } | |
999 } | |
1000 | |
1001 | |
1002 /*--------------------------------------------------------------*/ | |
1003 /*---------------- Helper Methods ----------------*/ | |
1004 /*--------------------------------------------------------------*/ | |
1005 | |
1006 @Override | |
1007 public long regenerate(final int limit){ | |
1008 long sum=0; | |
1009 for(AbstractKmerTable akt : tables){ | |
1010 sum+=akt.regenerate(limit); | |
1011 } | |
1012 return sum; | |
1013 } | |
1014 | |
1015 public HashArray1D getTableForKey(long key){ | |
1016 return (HashArray1D) tables[kmerToWay(key)]; | |
1017 } | |
1018 | |
1019 @Override | |
1020 public HashArray1D getTable(int tnum){ | |
1021 return (HashArray1D) tables[tnum]; | |
1022 } | |
1023 | |
1024 @Override | |
1025 public long[] fillHistogram(int histMax) { | |
1026 return HistogramMaker.fillHistogram(tables, histMax); | |
1027 } | |
1028 | |
1029 @Override | |
1030 public void countGC(long[] gcCounts, int max) { | |
1031 for(AbstractKmerTable set : tables){ | |
1032 set.countGC(gcCounts, max); | |
1033 } | |
1034 } | |
1035 | |
1036 @Override | |
1037 public void initializeOwnership(){ | |
1038 OwnershipThread.initialize(tables); | |
1039 } | |
1040 | |
1041 @Override | |
1042 public void clearOwnership(){ | |
1043 OwnershipThread.clear(tables); | |
1044 } | |
1045 | |
1046 public long rightmostKmer(final ByteBuilder bb){ | |
1047 return rightmostKmer(bb.array, bb.length()); | |
1048 } | |
1049 | |
1050 public long rightmostKmer(final byte[] bases, final int blen){ | |
1051 if(blen<k){return -1;} | |
1052 final int shift=2*k; | |
1053 final int shift2=shift-2; | |
1054 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1055 long kmer=0; | |
1056 long rkmer=0; | |
1057 int len=0; | |
1058 | |
1059 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */ | |
1060 { | |
1061 for(int i=blen-k; i<blen; i++){ | |
1062 final byte b=bases[i]; | |
1063 final long x=AminoAcid.baseToNumber[b]; | |
1064 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1065 kmer=((kmer<<2)|x)&mask; | |
1066 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1067 if(x<0){ | |
1068 len=0; | |
1069 kmer=rkmer=0; | |
1070 }else{len++;} | |
1071 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
1072 } | |
1073 } | |
1074 | |
1075 if(len<k){return -1;} | |
1076 else{assert(len==k);} | |
1077 return kmer; | |
1078 } | |
1079 | |
1080 public long leftmostKmer(final ByteBuilder bb){ | |
1081 return leftmostKmer(bb.array, bb.length()); | |
1082 } | |
1083 | |
1084 public long leftmostKmer(final byte[] bases, final int blen){ | |
1085 if(blen<k){return -1;} | |
1086 final int shift=2*k; | |
1087 final int shift2=shift-2; | |
1088 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1089 long kmer=0; | |
1090 long rkmer=0; | |
1091 int len=0; | |
1092 | |
1093 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */ | |
1094 { | |
1095 for(int i=0; i<k; i++){ | |
1096 final byte b=bases[i]; | |
1097 final long x=AminoAcid.baseToNumber[b]; | |
1098 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1099 kmer=((kmer<<2)|x)&mask; | |
1100 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1101 if(x<0){ | |
1102 len=0; | |
1103 kmer=rkmer=0; | |
1104 }else{len++;} | |
1105 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
1106 } | |
1107 } | |
1108 | |
1109 if(len<k){return -1;} | |
1110 else{assert(len==k);} | |
1111 return kmer; | |
1112 } | |
1113 | |
1114 public boolean doubleClaim(final ByteBuilder bb, final int id){ | |
1115 return doubleClaim(bb.array, bb.length(), id); | |
1116 } | |
1117 | |
1118 /** Ensures there can be only one owner. */ | |
1119 public boolean doubleClaim(final byte[] bases, final int blength, final int id){ | |
1120 boolean success=claim(bases, blength, id, true); | |
1121 if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);} | |
1122 if(!success){return false;} | |
1123 success=claim(bases, blength, id+CLAIM_OFFSET, true); | |
1124 if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);} | |
1125 return success; | |
1126 } | |
1127 | |
1128 public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly){ | |
1129 return claim(bb.array, bb.length(), id, exitEarly); | |
1130 } | |
1131 | |
1132 public float calcCoverage(final byte[] bases, final int blength){ | |
1133 final int shift=2*k; | |
1134 final int shift2=shift-2; | |
1135 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1136 long kmer=0, rkmer=0; | |
1137 int len=0; | |
1138 long sum=0, max=0; | |
1139 int kmers=0; | |
1140 | |
1141 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
1142 for(int i=0; i<blength; i++){ | |
1143 final byte b=bases[i]; | |
1144 final long x=AminoAcid.baseToNumber[b]; | |
1145 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1146 kmer=((kmer<<2)|x)&mask; | |
1147 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1148 if(x<0){ | |
1149 len=0; | |
1150 kmer=rkmer=0; | |
1151 }else{len++;} | |
1152 if(len>=k){ | |
1153 int count=getCount(kmer, rkmer); | |
1154 sum+=count; | |
1155 max=Tools.max(count, max); | |
1156 kmers++; | |
1157 } | |
1158 } | |
1159 return sum==0 ? 0 : sum/(float)kmers; | |
1160 } | |
1161 | |
1162 public float calcCoverage(final Contig contig){ | |
1163 final byte[] bases=contig.bases; | |
1164 if(bases.length<k){return 0;} | |
1165 final int shift=2*k; | |
1166 final int shift2=shift-2; | |
1167 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1168 long kmer=0, rkmer=0; | |
1169 int len=0; | |
1170 long sum=0; | |
1171 int max=0, min=Integer.MAX_VALUE; | |
1172 int kmers=0; | |
1173 | |
1174 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
1175 for(int i=0; i<bases.length; i++){ | |
1176 final byte b=bases[i]; | |
1177 final long x=AminoAcid.baseToNumber[b]; | |
1178 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1179 kmer=((kmer<<2)|x)&mask; | |
1180 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1181 if(x<0){ | |
1182 len=0; | |
1183 kmer=rkmer=0; | |
1184 }else{len++;} | |
1185 if(len>=k){ | |
1186 int count=getCount(kmer, rkmer); | |
1187 sum+=count; | |
1188 max=Tools.max(count, max); | |
1189 min=Tools.min(count, min); | |
1190 kmers++; | |
1191 } | |
1192 } | |
1193 contig.coverage=sum==0 ? 0 : sum/(float)kmers; | |
1194 contig.maxCov=max; | |
1195 contig.minCov=sum==0 ? 0 : min; | |
1196 return contig.coverage; | |
1197 } | |
1198 | |
1199 public boolean claim(final byte[] bases, final int blength, final int id, boolean exitEarly){ | |
1200 if(verbose){outstream.println("Thread "+id+" claim start.");} | |
1201 final int shift=2*k; | |
1202 final int shift2=shift-2; | |
1203 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1204 long kmer=0, rkmer=0; | |
1205 int len=0; | |
1206 boolean success=true; | |
1207 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
1208 for(int i=0; i<blength && success; i++){ | |
1209 final byte b=bases[i]; | |
1210 final long x=AminoAcid.baseToNumber[b]; | |
1211 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1212 kmer=((kmer<<2)|x)&mask; | |
1213 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1214 if(x<0){ | |
1215 len=0; | |
1216 kmer=rkmer=0; | |
1217 }else{len++;} | |
1218 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
1219 if(len>=k){ | |
1220 success=claim(kmer, rkmer, id/*, rid, i*/); | |
1221 success=(success || !exitEarly); | |
1222 } | |
1223 } | |
1224 return success; | |
1225 } | |
1226 | |
1227 public boolean claim(final long kmer, final long rkmer, final int id/*, final long rid, final int pos*/){ | |
1228 //TODO: rid and pos are just for debugging. | |
1229 final long key=toValue(kmer, rkmer); | |
1230 final int way=kmerToWay(key); | |
1231 final AbstractKmerTable table=tables[way]; | |
1232 final int count=table.getValue(key); | |
1233 assert(count==-1 || count>0) : count; | |
1234 // if(verbose /*|| true*/){outstream.println("Count="+count+".");} | |
1235 if(count<0){return true;} | |
1236 assert(count>0) : count; | |
1237 final int owner=table.setOwner(key, id); | |
1238 if(verbose){outstream.println("owner="+owner+".");} | |
1239 // assert(owner==id) : id+", "+owner+", "+rid+", "+pos; | |
1240 return owner==id; | |
1241 } | |
1242 | |
1243 public void release(ByteBuilder bb, final int id){ | |
1244 release(bb.array, bb.length(), id); | |
1245 } | |
1246 | |
1247 public void release(final byte[] bases, final int blength, final int id){ | |
1248 if(verbose /*|| true*/){outstream.println("*Thread "+id+" release start.");} | |
1249 final int shift=2*k; | |
1250 final int shift2=shift-2; | |
1251 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1252 long kmer=0, rkmer=0; | |
1253 int len=0; | |
1254 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
1255 for(int i=0; i<blength; i++){ | |
1256 final byte b=bases[i]; | |
1257 final long x=AminoAcid.baseToNumber[b]; | |
1258 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1259 kmer=((kmer<<2)|x)&mask; | |
1260 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1261 if(x<0){ | |
1262 len=0; | |
1263 kmer=rkmer=0; | |
1264 }else{len++;} | |
1265 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
1266 if(len>=k){ | |
1267 release(kmer, rkmer, id); | |
1268 } | |
1269 } | |
1270 } | |
1271 | |
1272 public boolean release(final long kmer, final long rkmer, final int id){ | |
1273 return release(toValue(kmer, rkmer), id); | |
1274 } | |
1275 | |
1276 public boolean release(final long key, final int id){ | |
1277 final int way=kmerToWay(key); | |
1278 final AbstractKmerTable table=tables[way]; | |
1279 final int count=table.getValue(key); | |
1280 // if(verbose /*|| true*/){outstream.println("Count="+count+".");} | |
1281 if(count<1){return true;} | |
1282 return table.clearOwner(key, id); | |
1283 } | |
1284 | |
1285 public int findOwner(ByteBuilder bb, final int id){ | |
1286 return findOwner(bb.array, bb.length(), id); | |
1287 } | |
1288 | |
1289 public int findOwner(final byte[] bases, final int blength, final int id){ | |
1290 final int shift=2*k; | |
1291 final int shift2=shift-2; | |
1292 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
1293 long kmer=0, rkmer=0; | |
1294 int len=0; | |
1295 int maxOwner=-1; | |
1296 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
1297 for(int i=0; i<blength; i++){ | |
1298 final byte b=bases[i]; | |
1299 final long x=AminoAcid.baseToNumber[b]; | |
1300 final long x2=AminoAcid.baseToComplementNumber[b]; | |
1301 kmer=((kmer<<2)|x)&mask; | |
1302 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
1303 if(x<0){ | |
1304 len=0; | |
1305 kmer=rkmer=0; | |
1306 }else{len++;} | |
1307 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
1308 if(len>=k){ | |
1309 int owner=findOwner(kmer, rkmer); | |
1310 maxOwner=Tools.max(owner, maxOwner); | |
1311 if(maxOwner>id){break;} | |
1312 } | |
1313 } | |
1314 return maxOwner; | |
1315 } | |
1316 | |
1317 public int findOwner(final long kmer){ | |
1318 return findOwner(kmer, rcomp(kmer)); | |
1319 } | |
1320 | |
1321 public int findOwner(final long kmer, final long rkmer){ | |
1322 final long key=toValue(kmer, rkmer); | |
1323 final int way=kmerToWay(key); | |
1324 final AbstractKmerTable table=tables[way]; | |
1325 final int count=table.getValue(key); | |
1326 if(count<0){return -1;} | |
1327 final int owner=table.getOwner(key); | |
1328 return owner; | |
1329 } | |
1330 | |
1331 public int getCount(long kmer, long rkmer){ | |
1332 long key=toValue(kmer, rkmer); | |
1333 int way=kmerToWay(key); | |
1334 return tables[way].getValue(key); | |
1335 } | |
1336 | |
1337 public int getCount(long key){ | |
1338 int way=kmerToWay(key); | |
1339 return tables[way].getValue(key); | |
1340 } | |
1341 | |
1342 /*--------------------------------------------------------------*/ | |
1343 /*---------------- Fill Counts ----------------*/ | |
1344 /*--------------------------------------------------------------*/ | |
1345 | |
1346 public int fillRightCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){ | |
1347 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){ | |
1348 return fillRightCounts_fast(kmer, rkmer, counts, mask, shift2); | |
1349 }else{ | |
1350 return fillRightCounts_safe(kmer, rkmer, counts, mask, shift2); | |
1351 } | |
1352 } | |
1353 | |
1354 public int fillRightCounts_fast(final long kmer0, final long rkmer0, int[] counts, | |
1355 long mask, int shift2){ | |
1356 // assert((k&1)==1) : k; | |
1357 assert(MASK_CORE); | |
1358 final long kmer=(kmer0<<2)&mask; | |
1359 final long rkmer=(rkmer0>>>2); | |
1360 final long a=kmer&coreMask, b=rkmer&coreMask; | |
1361 | |
1362 int max=-1, maxPos=0; | |
1363 if(a==b){ | |
1364 return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2); | |
1365 }else if(a>b){ | |
1366 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2); | |
1367 final long core=a; | |
1368 final int way=kmerToWay(core); | |
1369 // final AbstractKmerTable table=tables[way]; | |
1370 final HashArray table=(HashArray) tables[way]; | |
1371 final int cell=table.kmerToCell(kmer); | |
1372 for(int i=0; i<=3; i++){ | |
1373 final long key=kmer|i; | |
1374 final int count=Tools.max(0, table.getValue(key, cell)); | |
1375 // final int count=Tools.max(0, table.getValue(key)); | |
1376 counts[i]=count; | |
1377 if(count>max){ | |
1378 max=count; | |
1379 maxPos=i; | |
1380 } | |
1381 } | |
1382 }else{ | |
1383 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2); | |
1384 //Use a higher increment for the left bits | |
1385 final long core=b; | |
1386 final int way=kmerToWay(core); | |
1387 // final AbstractKmerTable table=tables[way]; | |
1388 final HashArray table=(HashArray) tables[way]; | |
1389 final int cell=table.kmerToCell(rkmer); | |
1390 final long incr=1L<<(2*(k-1)); | |
1391 long j=3*incr; | |
1392 for(int i=0; i<=3; i++, j-=incr){ | |
1393 final long key=rkmer|j; | |
1394 final int count=Tools.max(0, table.getValue(key, cell)); | |
1395 // final int count=Tools.max(0, table.getValue(key)); | |
1396 counts[i]=count; | |
1397 if(count>max){ | |
1398 max=count; | |
1399 maxPos=i; | |
1400 } | |
1401 } | |
1402 } | |
1403 return maxPos; | |
1404 } | |
1405 | |
1406 //TODO: Change this to take advantage of coreMask | |
1407 //Requires special handling of core palindromes. | |
1408 //Thus it would be easiest to just handle odd kmers, and K is normally 31 anyway. | |
1409 public int fillRightCounts_safe(long kmer, long rkmer, int[] counts, long mask, int shift2){ | |
1410 assert(kmer==rcomp(rkmer)); | |
1411 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));} | |
1412 kmer=(kmer<<2)&mask; | |
1413 rkmer=(rkmer>>>2); | |
1414 int max=-1, maxPos=0; | |
1415 | |
1416 for(int i=0; i<=3; i++){ | |
1417 long kmer2=kmer|((long)i); | |
1418 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2); | |
1419 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));} | |
1420 assert(kmer2==(kmer2&mask)); | |
1421 assert(rkmer2==(rkmer2&mask)); | |
1422 assert(kmer2==rcomp(rkmer2)); | |
1423 long key=toValue(kmer2, rkmer2); | |
1424 int way=kmerToWay(key); | |
1425 int count=tables[way].getValue(key); | |
1426 assert(count==NOT_PRESENT || count>=0); | |
1427 count=Tools.max(count, 0); | |
1428 counts[i]=count; | |
1429 if(count>max){ | |
1430 max=count; | |
1431 maxPos=i; | |
1432 } | |
1433 } | |
1434 return maxPos; | |
1435 } | |
1436 | |
1437 /** For KmerCompressor. */ | |
1438 public int fillRightCountsRcompOnly(long kmer, long rkmer, int[] counts, long mask, int shift2){ | |
1439 assert(kmer==rcomp(rkmer)); | |
1440 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));} | |
1441 kmer=(kmer<<2)&mask; | |
1442 rkmer=(rkmer>>>2); | |
1443 int max=-1, maxPos=0; | |
1444 | |
1445 for(int i=0; i<=3; i++){ | |
1446 long kmer2=kmer|((long)i); | |
1447 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2); | |
1448 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));} | |
1449 assert(kmer2==(kmer2&mask)); | |
1450 assert(rkmer2==(rkmer2&mask)); | |
1451 assert(kmer2==rcomp(rkmer2)); | |
1452 long key=rkmer2; | |
1453 int way=kmerToWay(key); | |
1454 int count=tables[way].getValue(key); | |
1455 assert(count==NOT_PRESENT || count>=0); | |
1456 count=Tools.max(count, 0); | |
1457 counts[i]=count; | |
1458 if(count>max){ | |
1459 max=count; | |
1460 maxPos=i; | |
1461 } | |
1462 } | |
1463 return maxPos; | |
1464 } | |
1465 | |
1466 public int fillLeftCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){ | |
1467 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){ | |
1468 return fillLeftCounts_fast(kmer, rkmer, counts, mask, shift2); | |
1469 }else{ | |
1470 return fillLeftCounts_safe(kmer, rkmer, counts, mask, shift2); | |
1471 } | |
1472 } | |
1473 | |
1474 public int fillLeftCounts_fast(final long kmer0, final long rkmer0, int[] counts, | |
1475 long mask, int shift2){ | |
1476 // assert((k&1)==1) : k; | |
1477 assert(MASK_CORE); | |
1478 final long rkmer=(rkmer0<<2)&mask; | |
1479 final long kmer=(kmer0>>>2); | |
1480 final long a=kmer&coreMask, b=rkmer&coreMask; | |
1481 | |
1482 int max=-1, maxPos=0; | |
1483 if(a==b){ | |
1484 return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2); | |
1485 }else if(a>b){ | |
1486 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2); | |
1487 | |
1488 final long core=a; | |
1489 final int way=kmerToWay(core); | |
1490 // final AbstractKmerTable table=tables[way]; | |
1491 final HashArray table=(HashArray) tables[way]; | |
1492 final int cell=table.kmerToCell(kmer); | |
1493 final long incr=1L<<(2*(k-1)); | |
1494 long j=0;//Must be declared as a long, outside of loop | |
1495 for(int i=0; i<=3; i++, j+=incr){ | |
1496 // final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]); | |
1497 // final long kmer2=kmer|(((long)i)<<shift2); | |
1498 // final long key2=toValue(rkmer2, kmer2); | |
1499 // int way2=kmerToWay(key2); | |
1500 // int count2=tables[way2].getValue(key2); | |
1501 // count2=Tools.max(count2, 0); | |
1502 | |
1503 final long key=kmer|j; | |
1504 final int count=Tools.max(0, table.getValue(key, cell)); | |
1505 // int count=Tools.max(0, table.getValue(key)); | |
1506 | |
1507 // assert(way==way2) : i+", "+way+", "+way2; | |
1508 // assert(key==key2) : i+", "+Long.toBinaryString(kmer)+", "+ | |
1509 // Long.toBinaryString(key)+", "+Long.toBinaryString(key2)+ | |
1510 // ", "+Long.toBinaryString(j)+", "+Long.toBinaryString(incr); | |
1511 // assert(count==count2) : i+", "+count+", "+count2; | |
1512 | |
1513 counts[i]=count; | |
1514 if(count>max){ | |
1515 max=count; | |
1516 maxPos=i; | |
1517 } | |
1518 } | |
1519 return maxPos; | |
1520 }else{ | |
1521 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2); | |
1522 final long core=b; | |
1523 final int way=kmerToWay(core); | |
1524 // final AbstractKmerTable table=tables[way]; | |
1525 final HashArray table=(HashArray) tables[way]; | |
1526 final int cell=table.kmerToCell(rkmer); | |
1527 for(int i=0, j=3; i<=3; i++, j--){ | |
1528 final long key=rkmer|j; | |
1529 final int count=Tools.max(0, table.getValue(key, cell)); | |
1530 // final int count=Tools.max(0, table.getValue(key)); | |
1531 | |
1532 counts[i]=count; | |
1533 if(count>max){ | |
1534 max=count; | |
1535 maxPos=i; | |
1536 } | |
1537 } | |
1538 return maxPos; | |
1539 } | |
1540 } | |
1541 | |
1542 public int fillLeftCounts_safe(final long kmer0, final long rkmer0, int[] counts, long mask, int shift2){ | |
1543 assert(kmer0==rcomp(rkmer0)); | |
1544 if(verbose){outstream.println("fillLeftCounts: "+toText(kmer0)+", "+toText(rkmer0));} | |
1545 long rkmer=(rkmer0<<2)&mask; | |
1546 long kmer=(kmer0>>>2); | |
1547 int max=-1, maxPos=0; | |
1548 // assert(false) : shift2+", "+k; | |
1549 for(int i=0; i<=3; i++){ | |
1550 final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]); | |
1551 final long kmer2=kmer|(((long)i)<<shift2); | |
1552 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));} | |
1553 assert(kmer2==(kmer2&mask)); | |
1554 assert(rkmer2==(rkmer2&mask)); | |
1555 assert(kmer2==rcomp(rkmer2)) : "\n"+"kmer: \t"+toText(rcomp(rkmer2))+", "+toText(rcomp(kmer2)); | |
1556 long key=toValue(rkmer2, kmer2); | |
1557 int way=kmerToWay(key); | |
1558 int count=tables[way].getValue(key); | |
1559 assert(count==NOT_PRESENT || count>=0); | |
1560 count=Tools.max(count, 0); | |
1561 counts[i]=count; | |
1562 if(count>max){ | |
1563 max=count; | |
1564 maxPos=i; | |
1565 } | |
1566 } | |
1567 return maxPos; | |
1568 } | |
1569 | |
1570 /*--------------------------------------------------------------*/ | |
1571 /*---------------- Printing Methods ----------------*/ | |
1572 /*--------------------------------------------------------------*/ | |
1573 | |
1574 @Override | |
1575 public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){ | |
1576 if(fname==null){return false;} | |
1577 Timer t=new Timer(); | |
1578 | |
1579 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true); | |
1580 bsw.start(); | |
1581 for(AbstractKmerTable set : tables){ | |
1582 set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining); | |
1583 } | |
1584 bsw.poisonAndWait(); | |
1585 | |
1586 t.stop(); | |
1587 if(printTime){outstream.println("Kmer Dump Time: \t"+t);} | |
1588 return bsw.errorState; | |
1589 } | |
1590 | |
1591 @Override | |
1592 public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){ | |
1593 final int threads=Tools.min(Shared.threads(), tables.length); | |
1594 if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);} | |
1595 | |
1596 if(fname==null){return false;} | |
1597 Timer t=new Timer(); | |
1598 | |
1599 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true); | |
1600 bsw.start(); | |
1601 DumpThread.dump(k, mincount, maxcount, tables, bsw, remaining); | |
1602 bsw.poisonAndWait(); | |
1603 | |
1604 t.stop(); | |
1605 if(printTime){outstream.println("Kmer Dump Time: \t"+t);} | |
1606 return bsw.errorState; | |
1607 } | |
1608 | |
1609 /*--------------------------------------------------------------*/ | |
1610 /*---------------- Recall Methods ----------------*/ | |
1611 /*--------------------------------------------------------------*/ | |
1612 | |
1613 private final long rcomp(long kmer){return AminoAcid.reverseComplementBinaryFast(kmer, k);} | |
1614 private final StringBuilder toText(long kmer){return AbstractKmerTable.toText(kmer, k);} | |
1615 | |
1616 /*--------------------------------------------------------------*/ | |
1617 /*---------------- Static Methods ----------------*/ | |
1618 /*--------------------------------------------------------------*/ | |
1619 | |
1620 /** | |
1621 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined. | |
1622 * @param kmer Forward kmer | |
1623 * @param rkmer Reverse kmer | |
1624 * @return Canonical value | |
1625 */ | |
1626 public final long toValue(long kmer, long rkmer){ | |
1627 // long value=(rcomp ? Tools.max(kmer, rkmer) : kmer); | |
1628 // return value; | |
1629 if(!rcomp){return kmer;} | |
1630 final long a=kmer&coreMask, b=rkmer&coreMask; | |
1631 return a>b ? kmer : a<b ? rkmer : Tools.max(kmer, rkmer); | |
1632 } | |
1633 | |
1634 /*--------------------------------------------------------------*/ | |
1635 /*---------------- Final Primitives ----------------*/ | |
1636 /*--------------------------------------------------------------*/ | |
1637 | |
1638 @Override | |
1639 public int kbig(){return k;} | |
1640 @Override | |
1641 public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;} | |
1642 @Override | |
1643 public boolean ecco(){return ecco;} | |
1644 @Override | |
1645 public boolean qtrimLeft(){return qtrimLeft;} | |
1646 @Override | |
1647 public boolean qtrimRight(){return qtrimRight;} | |
1648 @Override | |
1649 public float minAvgQuality(){return minAvgQuality;} | |
1650 @Override | |
1651 public long tableMemory(){return tableMemory;} | |
1652 @Override | |
1653 public long estimatedKmerCapacity(){return estimatedKmerCapacity;} | |
1654 @Override | |
1655 public int ways(){return ways;} | |
1656 @Override | |
1657 public boolean rcomp(){return rcomp;} | |
1658 | |
1659 public final int kmerToWay(final long kmer){ | |
1660 final int way=(int)((kmer&coreMask)%ways); | |
1661 return way; | |
1662 } | |
1663 | |
1664 /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in tables[Y] */ | |
1665 private AbstractKmerTable[] tables; | |
1666 | |
1667 public AbstractKmerTable[] tables(){return tables;} | |
1668 | |
1669 public long filterMemoryOverride=0; | |
1670 | |
1671 public final int tableType; //AbstractKmerTable.ARRAY1D; | |
1672 | |
1673 private final int bytesPerKmer; | |
1674 | |
1675 private final long usableMemory; | |
1676 private final long filterMemory0; | |
1677 private final long filterMemory1; | |
1678 private final long tableMemory; | |
1679 private final long estimatedKmerCapacity; | |
1680 | |
1681 /** Number of tables (and threads, during loading) */ | |
1682 private final boolean prealloc; | |
1683 | |
1684 /** Number of tables (and threads, during loading) */ | |
1685 public final int ways; | |
1686 | |
1687 /** Normal kmer length */ | |
1688 public final int k; | |
1689 /** k-1; used in some expressions */ | |
1690 public final int k2; | |
1691 | |
1692 public final long coreMask; | |
1693 | |
1694 /** Look for reverse-complements as well as forward kmers. Default: true */ | |
1695 private final boolean rcomp; | |
1696 | |
1697 /** Quality-trim the left side */ | |
1698 public final boolean qtrimLeft; | |
1699 /** Quality-trim the right side */ | |
1700 public final boolean qtrimRight; | |
1701 /** Trim bases at this quality or below. Default: 4 */ | |
1702 public final float trimq; | |
1703 /** Error rate for trimming (derived from trimq) */ | |
1704 private final float trimE; | |
1705 | |
1706 /** Throw away reads below this average quality after trimming. Default: 0 */ | |
1707 public final float minAvgQuality; | |
1708 /** If positive, calculate average quality from the first X bases only. Default: 0 */ | |
1709 public final int minAvgQualityBases; | |
1710 | |
1711 /** Ignore kmers with probability of correctness less than this */ | |
1712 public final float minProb; | |
1713 | |
1714 /** Correct via overlap */ | |
1715 private final boolean ecco; | |
1716 | |
1717 /** Attempt to merge via overlap prior to counting kmers */ | |
1718 private final boolean merge; | |
1719 | |
1720 /*--------------------------------------------------------------*/ | |
1721 /*---------------- Walker ----------------*/ | |
1722 /*--------------------------------------------------------------*/ | |
1723 | |
1724 public Walker walk(){ | |
1725 return new WalkerKST(); | |
1726 } | |
1727 | |
1728 public class WalkerKST extends Walker { | |
1729 | |
1730 WalkerKST(){ | |
1731 w=tables[0].walk(); | |
1732 } | |
1733 | |
1734 /** | |
1735 * Fills this object with the next key and value. | |
1736 * @return True if successful. | |
1737 */ | |
1738 public boolean next(){ | |
1739 if(w==null){return false;} | |
1740 if(w.next()){return true;} | |
1741 if(tnum<tables.length){tnum++;} | |
1742 w=(tnum<tables.length ? tables[tnum].walk() : null); | |
1743 return w==null ? false : w.next(); | |
1744 } | |
1745 | |
1746 public long kmer(){return w.kmer();} | |
1747 public int value(){return w.value();} | |
1748 | |
1749 private Walker w=null; | |
1750 | |
1751 /** current table number */ | |
1752 private int tnum=0; | |
1753 } | |
1754 | |
1755 } |