comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTableSet.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package kmer;
2
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.BitSet;
7 import java.util.concurrent.atomic.AtomicLong;
8
9 import assemble.Contig;
10 import bloom.KmerCountAbstract;
11 import dna.AminoAcid;
12 import fileIO.ByteStreamWriter;
13 import fileIO.FileFormat;
14 import fileIO.ReadWrite;
15 import jgi.BBMerge;
16 import shared.Parse;
17 import shared.Parser;
18 import shared.PreParser;
19 import shared.Primes;
20 import shared.ReadStats;
21 import shared.Shared;
22 import shared.Timer;
23 import shared.Tools;
24 import shared.TrimRead;
25 import stream.ConcurrentReadInputStream;
26 import stream.FastaReadInputStream;
27 import stream.Read;
28 import structures.ByteBuilder;
29 import structures.IntList;
30 import structures.ListNum;
31 import structures.LongList;
32 import ukmer.Kmer;
33
34
35 /**
36 * Loads and holds kmers for Tadpole
37 * @author Brian Bushnell
38 * @date Jun 22, 2015
39 *
40 */
41 public class KmerTableSet extends AbstractKmerTableSet {
42
43 /**
44 * Code entrance from the command line.
45 * @param args Command line arguments
46 */
47 public static void main(String[] args){
48 Timer t=new Timer(), t2=new Timer();
49 t.start();
50 t2.start();
51 KmerTableSet set=new KmerTableSet(args);
52 t2.stop();
53 outstream.println("Initialization Time: \t"+t2);
54
55 ///And run it
56 set.process(t);
57
58 //Close the print stream if it was redirected
59 Shared.closeStream(outstream);
60 }
61
62
63 /**
64 * Constructor.
65 * @param args Command line arguments
66 */
67 private KmerTableSet(String[] args){
68 this(args, 12);//+5 if using ownership and building contigs
69 }
70
71
72 /**
73 * Constructor.
74 * @param args Command line arguments
75 */
76 public KmerTableSet(String[] args, final int bytesPerKmer_){
77 {//Preparse block for help, config files, and outstream
78 PreParser pp=new PreParser(args, getClass(), false);
79 args=pp.args;
80 outstream=pp.outstream;
81 }//TODO - no easy way to close outstream
82
83 /* Initialize local variables with defaults */
84 Parser parser=new Parser();
85 boolean prealloc_=false;
86 int k_=31;
87 int ways_=-1;
88 int filterMax_=2;
89 boolean ecco_=false, merge_=false;
90 boolean rcomp_=true;
91 double minProb_=defaultMinprob;
92 int tableType_=AbstractKmerTable.ARRAY1D;
93 /* Parse arguments */
94 for(int i=0; i<args.length; i++){
95
96 final String arg=args[i];
97 String[] split=arg.split("=");
98 String a=split[0].toLowerCase();
99 String b=split.length>1 ? split[1] : null;
100
101 if(Parser.parseCommonStatic(arg, a, b)){
102 //do nothing
103 }else if(Parser.parseZip(arg, a, b)){
104 //do nothing
105 }else if(Parser.parseQuality(arg, a, b)){
106 //do nothing
107 }else if(Parser.parseFasta(arg, a, b)){
108 //do nothing
109 }else if(parser.parseInterleaved(arg, a, b)){
110 //do nothing
111 }else if(parser.parseTrim(arg, a, b)){
112 //do nothing
113 }else if(a.equals("in") || a.equals("in1")){
114 in1.clear();
115 if(b!=null){
116 String[] s=b.split(",");
117 for(String ss : s){
118 in1.add(ss);
119 }
120 }
121 }else if(a.equals("in2")){
122 in2.clear();
123 if(b!=null){
124 String[] s=b.split(",");
125 for(String ss : s){
126 in2.add(ss);
127 }
128 }
129 }else if(a.equals("extra")){
130 extra.clear();
131 if(b!=null){
132 String[] s=b.split(",");
133 for(String ss : s){
134 extra.add(ss);
135 }
136 }
137 }else if(a.equals("append") || a.equals("app")){
138 append=ReadStats.append=Parse.parseBoolean(b);
139 }else if(a.equals("overwrite") || a.equals("ow")){
140 overwrite=Parse.parseBoolean(b);
141 }else if(a.equals("initialsize")){
142 initialSize=Parse.parseIntKMG(b);
143 }else if(a.equals("showstats") || a.equals("stats")){
144 showStats=Parse.parseBoolean(b);
145 }else if(a.equals("ways")){
146 ways_=Parse.parseIntKMG(b);
147 }else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){
148 buflen=Parse.parseIntKMG(b);
149 }else if(a.equals("k")){
150 assert(b!=null) : "\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n";
151 k_=Parse.parseIntKMG(b);
152 }else if(a.equals("threads") || a.equals("t")){
153 THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b));
154 }else if(a.equals("showspeed") || a.equals("ss")){
155 showSpeed=Parse.parseBoolean(b);
156 }else if(a.equals("ecco")){
157 ecco_=Parse.parseBoolean(b);
158 }else if(a.equals("merge")){
159 merge_=Parse.parseBoolean(b);
160 }else if(a.equals("verbose")){
161 // assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
162 verbose=Parse.parseBoolean(b);
163 }else if(a.equals("verbose2")){
164 // assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
165 verbose2=Parse.parseBoolean(b);
166 }else if(a.equals("minprob")){
167 minProb_=Double.parseDouble(b);
168 }else if(a.equals("minprobprefilter") || a.equals("mpp")){
169 minProbPrefilter=Parse.parseBoolean(b);
170 }else if(a.equals("minprobmain") || a.equals("mpm")){
171 minProbMain=Parse.parseBoolean(b);
172 }else if(a.equals("reads") || a.startsWith("maxreads")){
173 maxReads=Parse.parseKMG(b);
174 }else if(a.equals("prealloc") || a.equals("preallocate")){
175 if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){
176 prealloc_=Parse.parseBoolean(b);
177 }else{
178 preallocFraction=Tools.max(0, Double.parseDouble(b));
179 prealloc_=(preallocFraction>0);
180 }
181 }else if(a.equals("prefilter")){
182 if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){
183 prefilter=Parse.parseBoolean(b);
184 }else{
185 filterMax_=Parse.parseIntKMG(b);
186 prefilter=filterMax_>0;
187 }
188 }else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){
189 prefilterFraction=Tools.max(0, Double.parseDouble(b));
190 assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory.";
191 prefilter=prefilterFraction>0;
192 }else if(a.equals("prehashes") || a.equals("hashes")){
193 prehashes=Parse.parseIntKMG(b);
194 }else if(a.equals("prefilterpasses") || a.equals("prepasses")){
195 assert(b!=null) : "Bad parameter: "+arg;
196 if(b.equalsIgnoreCase("auto")){
197 prepasses=-1;
198 }else{
199 prepasses=Parse.parseIntKMG(b);
200 }
201 }else if(a.equals("onepass")){
202 onePass=Parse.parseBoolean(b);
203 }else if(a.equals("passes")){
204 int passes=Parse.parseIntKMG(b);
205 onePass=(passes<2);
206 }else if(a.equals("rcomp")){
207 rcomp_=Parse.parseBoolean(b);
208 }else if(a.equals("tabletype")){
209 tableType_=Integer.parseInt(b);
210 }
211
212 else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") ||
213 a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){
214 filterMemoryOverride=Parse.parseKMG(b);
215 }
216
217 else if(IGNORE_UNKNOWN_ARGS){
218 //Do nothing
219 }else{
220 throw new RuntimeException("Unknown parameter "+args[i]);
221 }
222 }
223
224 {//Process parser fields
225 Parser.processQuality();
226
227 qtrimLeft=parser.qtrimLeft;
228 qtrimRight=parser.qtrimRight;
229 trimq=parser.trimq;
230 trimE=parser.trimE();
231
232 minAvgQuality=parser.minAvgQuality;
233 minAvgQualityBases=parser.minAvgQualityBases;
234 amino=Shared.AMINO_IN;
235 if(amino){k_=Tools.min(k_, 12);}
236 }
237
238 if(prepasses==0 || !prefilter){
239 prepasses=0;
240 prefilter=false;
241 }
242
243
244 // assert(false) : prepasses+", "+onePass+", "+prefilter;
245
246 {
247 long memory=Runtime.getRuntime().maxMemory();
248 double xmsRatio=Shared.xmsRatio();
249 // long tmemory=Runtime.getRuntime().totalMemory();
250 usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45);
251 if(prepasses==0 || !prefilter){
252 filterMemory0=filterMemory1=0;
253 }else if(filterMemoryOverride>0){
254 filterMemory0=filterMemory1=filterMemoryOverride;
255 }else{
256 double low=Tools.min(prefilterFraction, 1-prefilterFraction);
257 double high=1-low;
258 if(prepasses<0 || (prepasses&1)==1){//odd passes
259 filterMemory0=(long)(usableMemory*low);
260 filterMemory1=(long)(usableMemory*high);
261 }else{//even passes
262 filterMemory0=(long)(usableMemory*high);
263 filterMemory1=(long)(usableMemory*low);
264 }
265 }
266 tableMemory=(long)(usableMemory*.95-filterMemory0);
267 }
268
269 tableType=tableType_;
270 prealloc=prealloc_;
271 bytesPerKmer=bytesPerKmer_;
272 if(ways_<1){
273 long maxKmers=(2*tableMemory)/bytesPerKmer;
274 long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE);
275 ways_=(int)Tools.max(31, (int)(Tools.min(96, Shared.threads())*2.5), minWays);
276 ways_=(int)Primes.primeAtLeast(ways_);
277 assert(ways_>0);
278 // System.err.println("ways="+ways_);
279 }
280
281 /* Set final variables; post-process and validate argument combinations */
282
283 onePass=onePass&prefilter;
284 ways=ways_;
285 filterMax=Tools.min(filterMax_, 0x7FFFFFFF);
286 ecco=ecco_;
287 merge=merge_;
288 minProb=(float)minProb_;
289 rcomp=rcomp_;
290 // assert(false) : tableMemory+", "+bytesPerKmer+", "+prealloc+", "+preallocFraction;
291 estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArray.maxLoadFactorFinal*0.97));
292
293 // System.err.println("tableMemory="+tableMemory+", bytesPerKmer="+bytesPerKmer+", estimatedKmerCapacity="+estimatedKmerCapacity);
294 KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0);
295 k=k_;
296 k2=k-1;
297 int bitsPerBase=(amino ? 5 : 2);
298 int mask=(amino ? 31 : 3);
299 coreMask=(AbstractKmerTableSet.MASK_CORE ? ~(((-1L)<<(bitsPerBase*(k_-1)))|mask) : -1L);
300
301 if(k<1 || k>31){throw new RuntimeException("\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n");}
302
303 if(initialSize<1){
304 final long memOverWays=tableMemory/(bytesPerKmer*ways);
305 final double mem2=(prealloc ? preallocFraction : 1)*tableMemory;
306 initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault);
307 if(initialSize!=initialSizeDefault){
308 System.err.println("Initial size set to "+initialSize);
309 }
310 }
311
312 /* Adjust I/O settings and filenames */
313
314 assert(FastaReadInputStream.settingsOK());
315
316 if(in1.isEmpty()){
317 //throw new RuntimeException("Error - at least one input file is required.");
318 }
319
320 for(int i=0; i<in1.size(); i++){
321 String s=in1.get(i);
322 if(s!=null && s.contains("#") && !new File(s).exists()){
323 int pound=s.lastIndexOf('#');
324 String a=s.substring(0, pound);
325 String b=s.substring(pound+1);
326 in1.set(i, a+1+b);
327 in2.add(a+2+b);
328 }
329 }
330
331 if(!extra.isEmpty()){
332 ArrayList<String> temp=(ArrayList<String>) extra.clone();
333 extra.clear();
334 for(String s : temp){
335 if(s!=null && s.contains("#") && !new File(s).exists()){
336 int pound=s.lastIndexOf('#');
337 String a=s.substring(0, pound);
338 String b=s.substring(pound+1);
339 extra.add(a);
340 extra.add(b);
341 }else{
342 extra.add(s);
343 }
344 }
345 }
346
347 {
348 boolean allowDuplicates=true;
349 if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){
350 throw new RuntimeException("\nCan't read some input files.\n");
351 }
352 }
353 assert(THREADS>0);
354
355 if(DISPLAY_PROGRESS){
356 // assert(false);
357 outstream.println("Initial:");
358 outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f"));
359 Shared.printMemory();
360 outstream.println();
361 }
362 }
363
364 /*--------------------------------------------------------------*/
365 /*---------------- Outer Methods ----------------*/
366 /*--------------------------------------------------------------*/
367
368 @Override
369 public void clear(){
370 tables=null;
371 }
372
373 /*--------------------------------------------------------------*/
374 /*---------------- Inner Methods ----------------*/
375 /*--------------------------------------------------------------*/
376
377 @Override
378 public void allocateTables(){
379 assert(tables==null);
380 tables=null;
381 final long coreMask=(MASK_CORE ? ~(((-1L)<<(2*(k-1)))|3) : -1L);
382
383 ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc,
384 (prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride);
385 int[] schedule=scheduleMaker.makeSchedule();
386 //assert(false) : preallocFraction+", "+ways+", "+Arrays.toString(schedule);
387 // System.err.println("DEBUG "+preallocFraction+", "+ways+", "+Arrays.toString(schedule));
388 tables=AbstractKmerTable.preallocate(ways, tableType, schedule, coreMask);
389
390 // tables=AbstractKmerTable.preallocate(ways, tableType, initialSize, coreMask, (!prealloc || preallocFraction<1));
391 }
392
393 /**
394 * Load reads into tables, using multiple LoadThread.
395 */
396 @Override
397 public long loadKmers(String fname1, String fname2){
398
399 /* Create read input stream */
400 final ConcurrentReadInputStream cris;
401 {
402 FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true);
403 FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true);
404 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2);
405 cris.start(); //4567
406 }
407
408 // /* Optionally skip the first reads, since initial reads may have lower quality */
409 // if(skipreads>0){
410 // long skipped=0;
411 //
412 // ListNum<Read> ln=cris.nextList();
413 // ArrayList<Read> reads=(ln!=null ? ln.list : null);
414 //
415 // while(skipped<skipreads && reads!=null && reads.size()>0){
416 // skipped+=reads.size();
417 //
418 // cris.returnList(ln);
419 // ln=cris.nextList();
420 // reads=(ln!=null ? ln.list : null);
421 // }
422 // cris.returnList(ln);
423 // if(reads==null || reads.isEmpty()){
424 // ReadWrite.closeStreams(cris);
425 // System.err.println("Skipped all of the reads.");
426 // System.exit(0);
427 // }
428 // }
429
430 /* Create ProcessThreads */
431 ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS);
432 for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));}
433 for(LoadThread pt : alpt){pt.start();}
434
435 long added=0;
436
437 /* Wait for threads to die, and gather statistics */
438 for(LoadThread pt : alpt){
439 while(pt.getState()!=Thread.State.TERMINATED){
440 try {
441 pt.join();
442 } catch (InterruptedException e) {
443 // TODO Auto-generated catch block
444 e.printStackTrace();
445 }
446 }
447 added+=pt.added;
448
449 readsIn+=pt.readsInT;
450 basesIn+=pt.basesInT;
451 lowqReads+=pt.lowqReadsT;
452 lowqBases+=pt.lowqBasesT;
453 readsTrimmed+=pt.readsTrimmedT;
454 basesTrimmed+=pt.basesTrimmedT;
455 kmersIn+=pt.kmersInT;
456 }
457
458 /* Shut down I/O streams; capture error status */
459 errorState|=ReadWrite.closeStreams(cris);
460 return added;
461 }
462
463 /*--------------------------------------------------------------*/
464 /*---------------- Inner Classes ----------------*/
465 /*--------------------------------------------------------------*/
466
467 /**
468 * Loads kmers.
469 */
470 private class LoadThread extends Thread{
471
472 /**
473 * Constructor
474 * @param cris_ Read input stream
475 */
476 public LoadThread(ConcurrentReadInputStream cris_){
477 cris=cris_;
478 table=new HashBuffer(tables, buflen, k, false, true);
479 }
480
481 @Override
482 public void run(){
483 ListNum<Read> ln=cris.nextList();
484 ArrayList<Read> reads=(ln!=null ? ln.list : null);
485
486 //While there are more reads lists...
487 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
488
489 //For each read (or pair) in the list...
490 for(int i=0; i<reads.size(); i++){
491 Read r1=reads.get(i);
492 Read r2=r1.mate;
493
494 if(!r1.validated()){r1.validate(true);}
495 if(r2!=null && !r2.validated()){r2.validate(true);}
496
497 if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));}
498
499 readsInT++;
500 basesInT+=r1.length();
501 if(r2!=null){
502 readsInT++;
503 basesInT+=r2.length();
504 }
505
506 if(maxNs<Integer.MAX_VALUE){
507 if(r1!=null && r1.countUndefined()>maxNs){r1.setDiscarded(true);}
508 if(r2!=null && r2.countUndefined()>maxNs){r2.setDiscarded(true);}
509 }
510
511 //Determine whether to discard the reads based on average quality
512 if(minAvgQuality>0){
513 if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);}
514 if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);}
515 }
516
517 if(r1!=null){
518 if(qtrimLeft || qtrimRight){
519 int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1, false);
520 basesTrimmedT+=x;
521 readsTrimmedT+=(x>0 ? 1 : 0);
522 }
523 if(r1.length()<k){r1.setDiscarded(true);}
524 }
525 if(r2!=null){
526 if(qtrimLeft || qtrimRight){
527 int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1, false);
528 basesTrimmedT+=x;
529 readsTrimmedT+=(x>0 ? 1 : 0);
530 }
531 if(r2.length()<k){r2.setDiscarded(true);}
532 }
533
534 if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){
535 if(merge){
536 final int insert=BBMerge.findOverlapStrict(r1, r2, false);
537 if(insert>0){
538 r2.reverseComplement();
539 r1=r1.joinRead(insert);
540 r2=null;
541 }
542 }else if(ecco){
543 BBMerge.findOverlapStrict(r1, r2, true);
544 }
545 }
546
547 if(minLen>0){
548 if(r1!=null && r1.length()<minLen){r1.setDiscarded(true);}
549 if(r2!=null && r2.length()<minLen){r2.setDiscarded(true);}
550 }
551
552 if(r1!=null){
553 if(r1.discarded()){
554 lowqBasesT+=r1.length();
555 lowqReadsT++;
556 }else{
557 long temp=addKmersToTable(r1);
558 added+=temp;
559 if(verbose){System.err.println("A: Added "+temp);}
560 }
561 }
562 if(r2!=null){
563 if(r2.discarded()){
564 lowqBasesT+=r2.length();
565 lowqReadsT++;
566 }else{
567 long temp=addKmersToTable(r2);
568 added+=temp;
569 if(verbose){System.err.println("B: Added "+temp);}
570 }
571 }
572 }
573
574 //Fetch a new read list
575 cris.returnList(ln);
576 ln=cris.nextList();
577 reads=(ln!=null ? ln.list : null);
578 }
579 cris.returnList(ln);
580 long temp=table.flush();
581 if(verbose){System.err.println("Flush: Added "+temp);}
582 added+=temp;
583 }
584
585 private final int addKmersToTableAA(final Read r){
586 if(r==null || r.bases==null){return 0;}
587 final float minProb2=(minProbMain ? minProb : 0);
588 final byte[] bases=r.bases;
589 final byte[] quals=r.quality;
590 final int shift=5*k;
591 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
592 long kmer=0;
593 int created=0;
594 int len=0;
595
596 if(bases==null || bases.length<k){return -1;}
597
598 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
599 float prob=1;
600 for(int i=0; i<bases.length; i++){
601 final byte b=bases[i];
602 final long x=AminoAcid.acidToNumber[b];
603
604 //Update kmers
605 kmer=((kmer<<5)|x)&mask;
606
607 if(minProb2>0 && quals!=null){//Update probability
608 prob=prob*PROB_CORRECT[quals[i]];
609 if(len>k){
610 byte oldq=quals[i-k];
611 prob=prob*PROB_CORRECT_INVERSE[oldq];
612 }
613 }
614
615 //Handle Ns
616 if(x<0){
617 len=0;
618 kmer=0;
619 prob=1;
620 }else{len++;}
621
622 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
623 if(len>=k && prob>=minProb2){
624 kmersInT++;
625 final long key=kmer;
626 if(!prefilter || prefilterArray.read(key)>filterMax2){
627 int temp=table.incrementAndReturnNumCreated(key, 1);
628 created+=temp;
629 if(verbose){System.err.println("C: Added "+temp);}
630 }
631 }
632 }
633
634 return created;
635 }
636
637 private final int addKmersToTable(final Read r){
638 if(amino){return addKmersToTableAA(r);}
639 if(onePass){return addKmersToTable_onePass(r);}
640 if(r==null || r.bases==null){return 0;}
641 final float minProb2=(minProbMain ? minProb : 0);
642 final byte[] bases=r.bases;
643 final byte[] quals=r.quality;
644 final int shift=2*k;
645 final int shift2=shift-2;
646 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
647 long kmer=0;
648 long rkmer=0;
649 int created=0;
650 int len=0;
651
652 if(bases==null || bases.length<k){return -1;}
653
654 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
655 float prob=1;
656 for(int i=0; i<bases.length; i++){
657 final byte b=bases[i];
658 final long x=AminoAcid.baseToNumber[b];
659 final long x2=AminoAcid.baseToComplementNumber[b];
660
661 //Update kmers
662 kmer=((kmer<<2)|x)&mask;
663 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
664
665 if(minProb2>0 && quals!=null){//Update probability
666 prob=prob*PROB_CORRECT[quals[i]];
667 if(len>k){
668 byte oldq=quals[i-k];
669 prob=prob*PROB_CORRECT_INVERSE[oldq];
670 }
671 }
672
673 //Handle Ns
674 if(x<0){
675 len=0;
676 kmer=rkmer=0;
677 prob=1;
678 }else{len++;}
679
680 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
681 if(len>=k && prob>=minProb2){
682 kmersInT++;
683 final long key=toValue(kmer, rkmer);
684 if(!prefilter || prefilterArray.read(key)>filterMax2){
685 int temp=table.incrementAndReturnNumCreated(key, 1);
686 created+=temp;
687 if(verbose){System.err.println("C: Added "+temp);}
688 }
689 }
690 }
691
692 return created;
693 }
694
695
696 private final int addKmersToTable_onePass(final Read r){
697 assert(prefilter);
698 if(r==null || r.bases==null){return 0;}
699 final byte[] bases=r.bases;
700 final byte[] quals=r.quality;
701 final int shift=2*k;
702 final int shift2=shift-2;
703 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
704 long kmer=0;
705 long rkmer=0;
706 int created=0;
707 int len=0;
708
709 if(bases==null || bases.length<k){return -1;}
710
711 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
712 float prob=1;
713 for(int i=0; i<bases.length; i++){
714 final byte b=bases[i];
715 final long x=AminoAcid.baseToNumber[b];
716 final long x2=AminoAcid.baseToComplementNumber[b];
717
718 //Update kmers
719 kmer=((kmer<<2)|x)&mask;
720 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
721
722 if(minProb>0 && quals!=null){//Update probability
723 prob=prob*PROB_CORRECT[quals[i]];
724 if(len>k){
725 byte oldq=quals[i-k];
726 prob=prob*PROB_CORRECT_INVERSE[oldq];
727 }
728 }
729
730 //Handle Ns
731 if(x<0){
732 len=0;
733 kmer=rkmer=0;
734 prob=1;
735 }else{len++;}
736
737 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
738 if(len>=k){
739 final long key=toValue(kmer, rkmer);
740 int count=prefilterArray.incrementAndReturnUnincremented(key, 1);
741 if(count>=filterMax2){
742 int temp=table.incrementAndReturnNumCreated(key, 1);
743 created+=temp;
744 if(verbose){System.err.println("D: Added "+temp);}
745 }
746 }
747 }
748 return created;
749 }
750
751 /*--------------------------------------------------------------*/
752
753 /** Input read stream */
754 private final ConcurrentReadInputStream cris;
755
756 private final HashBuffer table;
757
758 public long added=0;
759
760 private long readsInT=0;
761 private long basesInT=0;
762 private long lowqReadsT=0;
763 private long lowqBasesT=0;
764 private long readsTrimmedT=0;
765 private long basesTrimmedT=0;
766 private long kmersInT=0;
767
768 }
769
770
771 /*--------------------------------------------------------------*/
772 /*---------------- Convenience ----------------*/
773 /*--------------------------------------------------------------*/
774
775 public void regenerateKmers(byte[] bases, LongList kmers, IntList counts, final int a){
776 final int loc=a+k;
777 final int lim=Tools.min(counts.size, a+k+1);
778 final int shift=2*k;
779 final int shift2=shift-2;
780 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
781 long kmer=kmers.get(a);
782 long rkmer=rcomp(kmer);
783 int len=k;
784
785 // assert(false) : a+", "+loc+", "+lim;
786
787 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
788 for(int i=loc, j=a+1; j<lim; i++, j++){
789 final byte b=bases[i];
790 final long x=AminoAcid.baseToNumber[b];
791 final long x2=AminoAcid.baseToComplementNumber[b];
792 kmer=((kmer<<2)|x)&mask;
793 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
794 if(x<0){
795 len=0;
796 kmer=rkmer=0;
797 }else{len++;}
798
799 if(len>=k){
800 assert(kmers.get(j)!=kmer);
801 kmers.set(j, kmer);
802 int count=getCount(kmer, rkmer);
803 counts.set(j, count);
804 }else{
805 kmers.set(j, -1);
806 counts.set(j, 0);
807 }
808 }
809 }
810
811 @Override
812 public int regenerateCounts(byte[] bases, IntList counts, final Kmer dummy, BitSet changed){
813 assert(!changed.isEmpty());
814 final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1;
815 final int ca=firstBase-k;
816 // final int b=changed.nextSetBit(0);ca+kbig; //first base changed
817 final int firstCount=Tools.max(firstBase-k+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit
818 // System.err.println("ca="+ca+", b="+b+", lim="+lim);
819 // System.err.println("Regen from count "+(ca+1)+"-"+lim);
820
821 final int shift=2*k;
822 final int shift2=shift-2;
823 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
824 long kmer=0, rkmer=0;
825 int len=0;
826 int valid=0;
827
828 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
829
830 for(int i=Tools.max(0, firstBase-k+1), lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){
831 final byte base=bases[i];
832 final long x=AminoAcid.baseToNumber[base];
833 final long x2=AminoAcid.baseToComplementNumber[base];
834 kmer=((kmer<<2)|x)&mask;
835 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
836
837 if(x<0){
838 len=0;
839 kmer=rkmer=0;
840 }else{
841 len++;
842 }
843
844 final int c=i-k+1;
845 if(i>=firstBase){
846 if(len>=k){
847 valid++;
848 int count=getCount(kmer, rkmer);
849 counts.set(c, count);
850 }else if(c>=0){
851 counts.set(c, 0);
852 }
853 }
854 }
855
856 return valid;
857 }
858
859 @Override
860 public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer dummy){
861 final int b=k-1;
862 final int lim=(positions==null ? bases.length : Tools.min(bases.length, positions.length()+k-1));
863 final int shift=2*k;
864 final int shift2=shift-2;
865 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
866 long kmer=0, rkmer=0;
867 int len=0;
868 int valid=0;
869
870 counts.clear();
871
872 for(int i=0, j=-b; i<lim; i++, j++){
873 final byte base=bases[i];
874 final long x=AminoAcid.baseToNumber[base];
875 final long x2=AminoAcid.baseToComplementNumber[base];
876 kmer=((kmer<<2)|x)&mask;
877 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
878
879 if(x<0){
880 len=0;
881 kmer=rkmer=0;
882 }else{
883 len++;
884 }
885
886 if(j>=0){
887 if(len>=k && (positions==null || positions.get(j))){
888 valid++;
889 int count=getCount(kmer, rkmer);
890 assert(i-k+1==counts.size()) : "j="+j+", counts="+counts.size()+", b="+(b)+", (i-k+1)="+(i-k+1);
891 assert(j==counts.size());
892 counts.add(Tools.max(count, 0));
893 // counts.set(i-k+1, count);
894 }else{
895 counts.add(0);
896 // counts.set(i-k+1, 0);
897 }
898 }
899 }
900
901 // assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size;
902 assert((counts.size==0 && bases.length<k) || counts.size==lim-k+1) : bases.length+", "+k+", "+counts.size;
903
904 return valid;
905 }
906
907 public int regenerateCounts(byte[] bases, IntList counts, final int ca){
908 final int b=ca+k-1;
909 final int lim=Tools.min(bases.length, b+k+1);
910 final int shift=2*k;
911 final int shift2=shift-2;
912 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
913 long kmer=0, rkmer=0;
914 int len=0;
915 int valid=0;
916
917 final int clen=counts.size;
918
919 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts.
920 * i is an index in the base array, j is an index in the count array. */
921 for(int i=b-k+1; i<lim; i++){
922 final byte base=bases[i];
923 final long x=AminoAcid.baseToNumber[base];
924 final long x2=AminoAcid.baseToComplementNumber[base];
925 kmer=((kmer<<2)|x)&mask;
926 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
927
928 if(x<0){
929 len=0;
930 kmer=rkmer=0;
931 }else{
932 len++;
933 }
934
935 if(i>=b){
936 if(len>=k){
937 valid++;
938 int count=getCount(kmer, rkmer);
939 counts.set(i-k+1, count);
940 }else{
941 counts.set(i-k+1, 0);
942 }
943 }
944 }
945
946 assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size;
947 assert(clen==counts.size);
948
949 return valid;
950 }
951
952 /** Returns number of valid kmers */
953 public int fillKmers(byte[] bases, LongList kmers){
954 final int blen=bases.length;
955 if(blen<k){return 0;}
956 final int min=k-1;
957 final int shift=2*k;
958 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
959 long kmer=0;
960 int len=0;
961 int valid=0;
962
963 kmers.clear();
964
965 /* Loop through the bases, maintaining a forward kmer via bitshifts */
966 for(int i=0; i<blen; i++){
967 final byte b=bases[i];
968 assert(b>=0) : Arrays.toString(bases);
969 final long x=AminoAcid.baseToNumber[b];
970 kmer=((kmer<<2)|x)&mask;
971 if(x<0){
972 len=0;
973 kmer=0;
974 }else{len++;}
975 if(i>=min){
976 if(len>=k){
977 kmers.add(kmer);
978 valid++;
979 }else{
980 kmers.add(-1);
981 }
982 }
983 }
984 return valid;
985 }
986
987 public void fillCounts(LongList kmers, IntList counts){
988 counts.clear();
989 for(int i=0; i<kmers.size; i++){
990 long kmer=kmers.get(i);
991 if(kmer>=0){
992 long rkmer=rcomp(kmer);
993 int count=getCount(kmer, rkmer);
994 counts.add(count);
995 }else{
996 counts.add(0);
997 }
998 }
999 }
1000
1001
1002 /*--------------------------------------------------------------*/
1003 /*---------------- Helper Methods ----------------*/
1004 /*--------------------------------------------------------------*/
1005
1006 @Override
1007 public long regenerate(final int limit){
1008 long sum=0;
1009 for(AbstractKmerTable akt : tables){
1010 sum+=akt.regenerate(limit);
1011 }
1012 return sum;
1013 }
1014
1015 public HashArray1D getTableForKey(long key){
1016 return (HashArray1D) tables[kmerToWay(key)];
1017 }
1018
1019 @Override
1020 public HashArray1D getTable(int tnum){
1021 return (HashArray1D) tables[tnum];
1022 }
1023
1024 @Override
1025 public long[] fillHistogram(int histMax) {
1026 return HistogramMaker.fillHistogram(tables, histMax);
1027 }
1028
1029 @Override
1030 public void countGC(long[] gcCounts, int max) {
1031 for(AbstractKmerTable set : tables){
1032 set.countGC(gcCounts, max);
1033 }
1034 }
1035
1036 @Override
1037 public void initializeOwnership(){
1038 OwnershipThread.initialize(tables);
1039 }
1040
1041 @Override
1042 public void clearOwnership(){
1043 OwnershipThread.clear(tables);
1044 }
1045
1046 public long rightmostKmer(final ByteBuilder bb){
1047 return rightmostKmer(bb.array, bb.length());
1048 }
1049
1050 public long rightmostKmer(final byte[] bases, final int blen){
1051 if(blen<k){return -1;}
1052 final int shift=2*k;
1053 final int shift2=shift-2;
1054 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1055 long kmer=0;
1056 long rkmer=0;
1057 int len=0;
1058
1059 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
1060 {
1061 for(int i=blen-k; i<blen; i++){
1062 final byte b=bases[i];
1063 final long x=AminoAcid.baseToNumber[b];
1064 final long x2=AminoAcid.baseToComplementNumber[b];
1065 kmer=((kmer<<2)|x)&mask;
1066 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1067 if(x<0){
1068 len=0;
1069 kmer=rkmer=0;
1070 }else{len++;}
1071 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1072 }
1073 }
1074
1075 if(len<k){return -1;}
1076 else{assert(len==k);}
1077 return kmer;
1078 }
1079
1080 public long leftmostKmer(final ByteBuilder bb){
1081 return leftmostKmer(bb.array, bb.length());
1082 }
1083
1084 public long leftmostKmer(final byte[] bases, final int blen){
1085 if(blen<k){return -1;}
1086 final int shift=2*k;
1087 final int shift2=shift-2;
1088 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1089 long kmer=0;
1090 long rkmer=0;
1091 int len=0;
1092
1093 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */
1094 {
1095 for(int i=0; i<k; i++){
1096 final byte b=bases[i];
1097 final long x=AminoAcid.baseToNumber[b];
1098 final long x2=AminoAcid.baseToComplementNumber[b];
1099 kmer=((kmer<<2)|x)&mask;
1100 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1101 if(x<0){
1102 len=0;
1103 kmer=rkmer=0;
1104 }else{len++;}
1105 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1106 }
1107 }
1108
1109 if(len<k){return -1;}
1110 else{assert(len==k);}
1111 return kmer;
1112 }
1113
1114 public boolean doubleClaim(final ByteBuilder bb, final int id){
1115 return doubleClaim(bb.array, bb.length(), id);
1116 }
1117
1118 /** Ensures there can be only one owner. */
1119 public boolean doubleClaim(final byte[] bases, final int blength, final int id){
1120 boolean success=claim(bases, blength, id, true);
1121 if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);}
1122 if(!success){return false;}
1123 success=claim(bases, blength, id+CLAIM_OFFSET, true);
1124 if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);}
1125 return success;
1126 }
1127
1128 public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly){
1129 return claim(bb.array, bb.length(), id, exitEarly);
1130 }
1131
1132 public float calcCoverage(final byte[] bases, final int blength){
1133 final int shift=2*k;
1134 final int shift2=shift-2;
1135 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1136 long kmer=0, rkmer=0;
1137 int len=0;
1138 long sum=0, max=0;
1139 int kmers=0;
1140
1141 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1142 for(int i=0; i<blength; i++){
1143 final byte b=bases[i];
1144 final long x=AminoAcid.baseToNumber[b];
1145 final long x2=AminoAcid.baseToComplementNumber[b];
1146 kmer=((kmer<<2)|x)&mask;
1147 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1148 if(x<0){
1149 len=0;
1150 kmer=rkmer=0;
1151 }else{len++;}
1152 if(len>=k){
1153 int count=getCount(kmer, rkmer);
1154 sum+=count;
1155 max=Tools.max(count, max);
1156 kmers++;
1157 }
1158 }
1159 return sum==0 ? 0 : sum/(float)kmers;
1160 }
1161
1162 public float calcCoverage(final Contig contig){
1163 final byte[] bases=contig.bases;
1164 if(bases.length<k){return 0;}
1165 final int shift=2*k;
1166 final int shift2=shift-2;
1167 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1168 long kmer=0, rkmer=0;
1169 int len=0;
1170 long sum=0;
1171 int max=0, min=Integer.MAX_VALUE;
1172 int kmers=0;
1173
1174 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1175 for(int i=0; i<bases.length; i++){
1176 final byte b=bases[i];
1177 final long x=AminoAcid.baseToNumber[b];
1178 final long x2=AminoAcid.baseToComplementNumber[b];
1179 kmer=((kmer<<2)|x)&mask;
1180 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1181 if(x<0){
1182 len=0;
1183 kmer=rkmer=0;
1184 }else{len++;}
1185 if(len>=k){
1186 int count=getCount(kmer, rkmer);
1187 sum+=count;
1188 max=Tools.max(count, max);
1189 min=Tools.min(count, min);
1190 kmers++;
1191 }
1192 }
1193 contig.coverage=sum==0 ? 0 : sum/(float)kmers;
1194 contig.maxCov=max;
1195 contig.minCov=sum==0 ? 0 : min;
1196 return contig.coverage;
1197 }
1198
1199 public boolean claim(final byte[] bases, final int blength, final int id, boolean exitEarly){
1200 if(verbose){outstream.println("Thread "+id+" claim start.");}
1201 final int shift=2*k;
1202 final int shift2=shift-2;
1203 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1204 long kmer=0, rkmer=0;
1205 int len=0;
1206 boolean success=true;
1207 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1208 for(int i=0; i<blength && success; i++){
1209 final byte b=bases[i];
1210 final long x=AminoAcid.baseToNumber[b];
1211 final long x2=AminoAcid.baseToComplementNumber[b];
1212 kmer=((kmer<<2)|x)&mask;
1213 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1214 if(x<0){
1215 len=0;
1216 kmer=rkmer=0;
1217 }else{len++;}
1218 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1219 if(len>=k){
1220 success=claim(kmer, rkmer, id/*, rid, i*/);
1221 success=(success || !exitEarly);
1222 }
1223 }
1224 return success;
1225 }
1226
1227 public boolean claim(final long kmer, final long rkmer, final int id/*, final long rid, final int pos*/){
1228 //TODO: rid and pos are just for debugging.
1229 final long key=toValue(kmer, rkmer);
1230 final int way=kmerToWay(key);
1231 final AbstractKmerTable table=tables[way];
1232 final int count=table.getValue(key);
1233 assert(count==-1 || count>0) : count;
1234 // if(verbose /*|| true*/){outstream.println("Count="+count+".");}
1235 if(count<0){return true;}
1236 assert(count>0) : count;
1237 final int owner=table.setOwner(key, id);
1238 if(verbose){outstream.println("owner="+owner+".");}
1239 // assert(owner==id) : id+", "+owner+", "+rid+", "+pos;
1240 return owner==id;
1241 }
1242
1243 public void release(ByteBuilder bb, final int id){
1244 release(bb.array, bb.length(), id);
1245 }
1246
1247 public void release(final byte[] bases, final int blength, final int id){
1248 if(verbose /*|| true*/){outstream.println("*Thread "+id+" release start.");}
1249 final int shift=2*k;
1250 final int shift2=shift-2;
1251 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1252 long kmer=0, rkmer=0;
1253 int len=0;
1254 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1255 for(int i=0; i<blength; i++){
1256 final byte b=bases[i];
1257 final long x=AminoAcid.baseToNumber[b];
1258 final long x2=AminoAcid.baseToComplementNumber[b];
1259 kmer=((kmer<<2)|x)&mask;
1260 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1261 if(x<0){
1262 len=0;
1263 kmer=rkmer=0;
1264 }else{len++;}
1265 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1266 if(len>=k){
1267 release(kmer, rkmer, id);
1268 }
1269 }
1270 }
1271
1272 public boolean release(final long kmer, final long rkmer, final int id){
1273 return release(toValue(kmer, rkmer), id);
1274 }
1275
1276 public boolean release(final long key, final int id){
1277 final int way=kmerToWay(key);
1278 final AbstractKmerTable table=tables[way];
1279 final int count=table.getValue(key);
1280 // if(verbose /*|| true*/){outstream.println("Count="+count+".");}
1281 if(count<1){return true;}
1282 return table.clearOwner(key, id);
1283 }
1284
1285 public int findOwner(ByteBuilder bb, final int id){
1286 return findOwner(bb.array, bb.length(), id);
1287 }
1288
1289 public int findOwner(final byte[] bases, final int blength, final int id){
1290 final int shift=2*k;
1291 final int shift2=shift-2;
1292 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
1293 long kmer=0, rkmer=0;
1294 int len=0;
1295 int maxOwner=-1;
1296 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
1297 for(int i=0; i<blength; i++){
1298 final byte b=bases[i];
1299 final long x=AminoAcid.baseToNumber[b];
1300 final long x2=AminoAcid.baseToComplementNumber[b];
1301 kmer=((kmer<<2)|x)&mask;
1302 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
1303 if(x<0){
1304 len=0;
1305 kmer=rkmer=0;
1306 }else{len++;}
1307 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
1308 if(len>=k){
1309 int owner=findOwner(kmer, rkmer);
1310 maxOwner=Tools.max(owner, maxOwner);
1311 if(maxOwner>id){break;}
1312 }
1313 }
1314 return maxOwner;
1315 }
1316
1317 public int findOwner(final long kmer){
1318 return findOwner(kmer, rcomp(kmer));
1319 }
1320
1321 public int findOwner(final long kmer, final long rkmer){
1322 final long key=toValue(kmer, rkmer);
1323 final int way=kmerToWay(key);
1324 final AbstractKmerTable table=tables[way];
1325 final int count=table.getValue(key);
1326 if(count<0){return -1;}
1327 final int owner=table.getOwner(key);
1328 return owner;
1329 }
1330
1331 public int getCount(long kmer, long rkmer){
1332 long key=toValue(kmer, rkmer);
1333 int way=kmerToWay(key);
1334 return tables[way].getValue(key);
1335 }
1336
1337 public int getCount(long key){
1338 int way=kmerToWay(key);
1339 return tables[way].getValue(key);
1340 }
1341
1342 /*--------------------------------------------------------------*/
1343 /*---------------- Fill Counts ----------------*/
1344 /*--------------------------------------------------------------*/
1345
1346 public int fillRightCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){
1347 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){
1348 return fillRightCounts_fast(kmer, rkmer, counts, mask, shift2);
1349 }else{
1350 return fillRightCounts_safe(kmer, rkmer, counts, mask, shift2);
1351 }
1352 }
1353
1354 public int fillRightCounts_fast(final long kmer0, final long rkmer0, int[] counts,
1355 long mask, int shift2){
1356 // assert((k&1)==1) : k;
1357 assert(MASK_CORE);
1358 final long kmer=(kmer0<<2)&mask;
1359 final long rkmer=(rkmer0>>>2);
1360 final long a=kmer&coreMask, b=rkmer&coreMask;
1361
1362 int max=-1, maxPos=0;
1363 if(a==b){
1364 return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
1365 }else if(a>b){
1366 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
1367 final long core=a;
1368 final int way=kmerToWay(core);
1369 // final AbstractKmerTable table=tables[way];
1370 final HashArray table=(HashArray) tables[way];
1371 final int cell=table.kmerToCell(kmer);
1372 for(int i=0; i<=3; i++){
1373 final long key=kmer|i;
1374 final int count=Tools.max(0, table.getValue(key, cell));
1375 // final int count=Tools.max(0, table.getValue(key));
1376 counts[i]=count;
1377 if(count>max){
1378 max=count;
1379 maxPos=i;
1380 }
1381 }
1382 }else{
1383 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
1384 //Use a higher increment for the left bits
1385 final long core=b;
1386 final int way=kmerToWay(core);
1387 // final AbstractKmerTable table=tables[way];
1388 final HashArray table=(HashArray) tables[way];
1389 final int cell=table.kmerToCell(rkmer);
1390 final long incr=1L<<(2*(k-1));
1391 long j=3*incr;
1392 for(int i=0; i<=3; i++, j-=incr){
1393 final long key=rkmer|j;
1394 final int count=Tools.max(0, table.getValue(key, cell));
1395 // final int count=Tools.max(0, table.getValue(key));
1396 counts[i]=count;
1397 if(count>max){
1398 max=count;
1399 maxPos=i;
1400 }
1401 }
1402 }
1403 return maxPos;
1404 }
1405
1406 //TODO: Change this to take advantage of coreMask
1407 //Requires special handling of core palindromes.
1408 //Thus it would be easiest to just handle odd kmers, and K is normally 31 anyway.
1409 public int fillRightCounts_safe(long kmer, long rkmer, int[] counts, long mask, int shift2){
1410 assert(kmer==rcomp(rkmer));
1411 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));}
1412 kmer=(kmer<<2)&mask;
1413 rkmer=(rkmer>>>2);
1414 int max=-1, maxPos=0;
1415
1416 for(int i=0; i<=3; i++){
1417 long kmer2=kmer|((long)i);
1418 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2);
1419 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
1420 assert(kmer2==(kmer2&mask));
1421 assert(rkmer2==(rkmer2&mask));
1422 assert(kmer2==rcomp(rkmer2));
1423 long key=toValue(kmer2, rkmer2);
1424 int way=kmerToWay(key);
1425 int count=tables[way].getValue(key);
1426 assert(count==NOT_PRESENT || count>=0);
1427 count=Tools.max(count, 0);
1428 counts[i]=count;
1429 if(count>max){
1430 max=count;
1431 maxPos=i;
1432 }
1433 }
1434 return maxPos;
1435 }
1436
1437 /** For KmerCompressor. */
1438 public int fillRightCountsRcompOnly(long kmer, long rkmer, int[] counts, long mask, int shift2){
1439 assert(kmer==rcomp(rkmer));
1440 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));}
1441 kmer=(kmer<<2)&mask;
1442 rkmer=(rkmer>>>2);
1443 int max=-1, maxPos=0;
1444
1445 for(int i=0; i<=3; i++){
1446 long kmer2=kmer|((long)i);
1447 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2);
1448 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
1449 assert(kmer2==(kmer2&mask));
1450 assert(rkmer2==(rkmer2&mask));
1451 assert(kmer2==rcomp(rkmer2));
1452 long key=rkmer2;
1453 int way=kmerToWay(key);
1454 int count=tables[way].getValue(key);
1455 assert(count==NOT_PRESENT || count>=0);
1456 count=Tools.max(count, 0);
1457 counts[i]=count;
1458 if(count>max){
1459 max=count;
1460 maxPos=i;
1461 }
1462 }
1463 return maxPos;
1464 }
1465
1466 public int fillLeftCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){
1467 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){
1468 return fillLeftCounts_fast(kmer, rkmer, counts, mask, shift2);
1469 }else{
1470 return fillLeftCounts_safe(kmer, rkmer, counts, mask, shift2);
1471 }
1472 }
1473
1474 public int fillLeftCounts_fast(final long kmer0, final long rkmer0, int[] counts,
1475 long mask, int shift2){
1476 // assert((k&1)==1) : k;
1477 assert(MASK_CORE);
1478 final long rkmer=(rkmer0<<2)&mask;
1479 final long kmer=(kmer0>>>2);
1480 final long a=kmer&coreMask, b=rkmer&coreMask;
1481
1482 int max=-1, maxPos=0;
1483 if(a==b){
1484 return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
1485 }else if(a>b){
1486 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
1487
1488 final long core=a;
1489 final int way=kmerToWay(core);
1490 // final AbstractKmerTable table=tables[way];
1491 final HashArray table=(HashArray) tables[way];
1492 final int cell=table.kmerToCell(kmer);
1493 final long incr=1L<<(2*(k-1));
1494 long j=0;//Must be declared as a long, outside of loop
1495 for(int i=0; i<=3; i++, j+=incr){
1496 // final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]);
1497 // final long kmer2=kmer|(((long)i)<<shift2);
1498 // final long key2=toValue(rkmer2, kmer2);
1499 // int way2=kmerToWay(key2);
1500 // int count2=tables[way2].getValue(key2);
1501 // count2=Tools.max(count2, 0);
1502
1503 final long key=kmer|j;
1504 final int count=Tools.max(0, table.getValue(key, cell));
1505 // int count=Tools.max(0, table.getValue(key));
1506
1507 // assert(way==way2) : i+", "+way+", "+way2;
1508 // assert(key==key2) : i+", "+Long.toBinaryString(kmer)+", "+
1509 // Long.toBinaryString(key)+", "+Long.toBinaryString(key2)+
1510 // ", "+Long.toBinaryString(j)+", "+Long.toBinaryString(incr);
1511 // assert(count==count2) : i+", "+count+", "+count2;
1512
1513 counts[i]=count;
1514 if(count>max){
1515 max=count;
1516 maxPos=i;
1517 }
1518 }
1519 return maxPos;
1520 }else{
1521 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
1522 final long core=b;
1523 final int way=kmerToWay(core);
1524 // final AbstractKmerTable table=tables[way];
1525 final HashArray table=(HashArray) tables[way];
1526 final int cell=table.kmerToCell(rkmer);
1527 for(int i=0, j=3; i<=3; i++, j--){
1528 final long key=rkmer|j;
1529 final int count=Tools.max(0, table.getValue(key, cell));
1530 // final int count=Tools.max(0, table.getValue(key));
1531
1532 counts[i]=count;
1533 if(count>max){
1534 max=count;
1535 maxPos=i;
1536 }
1537 }
1538 return maxPos;
1539 }
1540 }
1541
1542 public int fillLeftCounts_safe(final long kmer0, final long rkmer0, int[] counts, long mask, int shift2){
1543 assert(kmer0==rcomp(rkmer0));
1544 if(verbose){outstream.println("fillLeftCounts: "+toText(kmer0)+", "+toText(rkmer0));}
1545 long rkmer=(rkmer0<<2)&mask;
1546 long kmer=(kmer0>>>2);
1547 int max=-1, maxPos=0;
1548 // assert(false) : shift2+", "+k;
1549 for(int i=0; i<=3; i++){
1550 final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]);
1551 final long kmer2=kmer|(((long)i)<<shift2);
1552 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
1553 assert(kmer2==(kmer2&mask));
1554 assert(rkmer2==(rkmer2&mask));
1555 assert(kmer2==rcomp(rkmer2)) : "\n"+"kmer: \t"+toText(rcomp(rkmer2))+", "+toText(rcomp(kmer2));
1556 long key=toValue(rkmer2, kmer2);
1557 int way=kmerToWay(key);
1558 int count=tables[way].getValue(key);
1559 assert(count==NOT_PRESENT || count>=0);
1560 count=Tools.max(count, 0);
1561 counts[i]=count;
1562 if(count>max){
1563 max=count;
1564 maxPos=i;
1565 }
1566 }
1567 return maxPos;
1568 }
1569
1570 /*--------------------------------------------------------------*/
1571 /*---------------- Printing Methods ----------------*/
1572 /*--------------------------------------------------------------*/
1573
1574 @Override
1575 public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
1576 if(fname==null){return false;}
1577 Timer t=new Timer();
1578
1579 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
1580 bsw.start();
1581 for(AbstractKmerTable set : tables){
1582 set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining);
1583 }
1584 bsw.poisonAndWait();
1585
1586 t.stop();
1587 if(printTime){outstream.println("Kmer Dump Time: \t"+t);}
1588 return bsw.errorState;
1589 }
1590
1591 @Override
1592 public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
1593 final int threads=Tools.min(Shared.threads(), tables.length);
1594 if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);}
1595
1596 if(fname==null){return false;}
1597 Timer t=new Timer();
1598
1599 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
1600 bsw.start();
1601 DumpThread.dump(k, mincount, maxcount, tables, bsw, remaining);
1602 bsw.poisonAndWait();
1603
1604 t.stop();
1605 if(printTime){outstream.println("Kmer Dump Time: \t"+t);}
1606 return bsw.errorState;
1607 }
1608
1609 /*--------------------------------------------------------------*/
1610 /*---------------- Recall Methods ----------------*/
1611 /*--------------------------------------------------------------*/
1612
1613 private final long rcomp(long kmer){return AminoAcid.reverseComplementBinaryFast(kmer, k);}
1614 private final StringBuilder toText(long kmer){return AbstractKmerTable.toText(kmer, k);}
1615
1616 /*--------------------------------------------------------------*/
1617 /*---------------- Static Methods ----------------*/
1618 /*--------------------------------------------------------------*/
1619
1620 /**
1621 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined.
1622 * @param kmer Forward kmer
1623 * @param rkmer Reverse kmer
1624 * @return Canonical value
1625 */
1626 public final long toValue(long kmer, long rkmer){
1627 // long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
1628 // return value;
1629 if(!rcomp){return kmer;}
1630 final long a=kmer&coreMask, b=rkmer&coreMask;
1631 return a>b ? kmer : a<b ? rkmer : Tools.max(kmer, rkmer);
1632 }
1633
1634 /*--------------------------------------------------------------*/
1635 /*---------------- Final Primitives ----------------*/
1636 /*--------------------------------------------------------------*/
1637
1638 @Override
1639 public int kbig(){return k;}
1640 @Override
1641 public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;}
1642 @Override
1643 public boolean ecco(){return ecco;}
1644 @Override
1645 public boolean qtrimLeft(){return qtrimLeft;}
1646 @Override
1647 public boolean qtrimRight(){return qtrimRight;}
1648 @Override
1649 public float minAvgQuality(){return minAvgQuality;}
1650 @Override
1651 public long tableMemory(){return tableMemory;}
1652 @Override
1653 public long estimatedKmerCapacity(){return estimatedKmerCapacity;}
1654 @Override
1655 public int ways(){return ways;}
1656 @Override
1657 public boolean rcomp(){return rcomp;}
1658
1659 public final int kmerToWay(final long kmer){
1660 final int way=(int)((kmer&coreMask)%ways);
1661 return way;
1662 }
1663
1664 /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in tables[Y] */
1665 private AbstractKmerTable[] tables;
1666
1667 public AbstractKmerTable[] tables(){return tables;}
1668
1669 public long filterMemoryOverride=0;
1670
1671 public final int tableType; //AbstractKmerTable.ARRAY1D;
1672
1673 private final int bytesPerKmer;
1674
1675 private final long usableMemory;
1676 private final long filterMemory0;
1677 private final long filterMemory1;
1678 private final long tableMemory;
1679 private final long estimatedKmerCapacity;
1680
1681 /** Number of tables (and threads, during loading) */
1682 private final boolean prealloc;
1683
1684 /** Number of tables (and threads, during loading) */
1685 public final int ways;
1686
1687 /** Normal kmer length */
1688 public final int k;
1689 /** k-1; used in some expressions */
1690 public final int k2;
1691
1692 public final long coreMask;
1693
1694 /** Look for reverse-complements as well as forward kmers. Default: true */
1695 private final boolean rcomp;
1696
1697 /** Quality-trim the left side */
1698 public final boolean qtrimLeft;
1699 /** Quality-trim the right side */
1700 public final boolean qtrimRight;
1701 /** Trim bases at this quality or below. Default: 4 */
1702 public final float trimq;
1703 /** Error rate for trimming (derived from trimq) */
1704 private final float trimE;
1705
1706 /** Throw away reads below this average quality after trimming. Default: 0 */
1707 public final float minAvgQuality;
1708 /** If positive, calculate average quality from the first X bases only. Default: 0 */
1709 public final int minAvgQualityBases;
1710
1711 /** Ignore kmers with probability of correctness less than this */
1712 public final float minProb;
1713
1714 /** Correct via overlap */
1715 private final boolean ecco;
1716
1717 /** Attempt to merge via overlap prior to counting kmers */
1718 private final boolean merge;
1719
1720 /*--------------------------------------------------------------*/
1721 /*---------------- Walker ----------------*/
1722 /*--------------------------------------------------------------*/
1723
1724 public Walker walk(){
1725 return new WalkerKST();
1726 }
1727
1728 public class WalkerKST extends Walker {
1729
1730 WalkerKST(){
1731 w=tables[0].walk();
1732 }
1733
1734 /**
1735 * Fills this object with the next key and value.
1736 * @return True if successful.
1737 */
1738 public boolean next(){
1739 if(w==null){return false;}
1740 if(w.next()){return true;}
1741 if(tnum<tables.length){tnum++;}
1742 w=(tnum<tables.length ? tables[tnum].walk() : null);
1743 return w==null ? false : w.next();
1744 }
1745
1746 public long kmer(){return w.kmer();}
1747 public int value(){return w.value();}
1748
1749 private Walker w=null;
1750
1751 /** current table number */
1752 private int tnum=0;
1753 }
1754
1755 }