Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTableSet.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTableSet.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,1755 @@ +package kmer; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.concurrent.atomic.AtomicLong; + +import assemble.Contig; +import bloom.KmerCountAbstract; +import dna.AminoAcid; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import jgi.BBMerge; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.Primes; +import shared.ReadStats; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import shared.TrimRead; +import stream.ConcurrentReadInputStream; +import stream.FastaReadInputStream; +import stream.Read; +import structures.ByteBuilder; +import structures.IntList; +import structures.ListNum; +import structures.LongList; +import ukmer.Kmer; + + +/** + * Loads and holds kmers for Tadpole + * @author Brian Bushnell + * @date Jun 22, 2015 + * + */ +public class KmerTableSet extends AbstractKmerTableSet { + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + Timer t=new Timer(), t2=new Timer(); + t.start(); + t2.start(); + KmerTableSet set=new KmerTableSet(args); + t2.stop(); + outstream.println("Initialization Time: \t"+t2); + + ///And run it + set.process(t); + + //Close the print stream if it was redirected + Shared.closeStream(outstream); + } + + + /** + * Constructor. + * @param args Command line arguments + */ + private KmerTableSet(String[] args){ + this(args, 12);//+5 if using ownership and building contigs + } + + + /** + * Constructor. + * @param args Command line arguments + */ + public KmerTableSet(String[] args, final int bytesPerKmer_){ + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, getClass(), false); + args=pp.args; + outstream=pp.outstream; + }//TODO - no easy way to close outstream + + /* Initialize local variables with defaults */ + Parser parser=new Parser(); + boolean prealloc_=false; + int k_=31; + int ways_=-1; + int filterMax_=2; + boolean ecco_=false, merge_=false; + boolean rcomp_=true; + double minProb_=defaultMinprob; + int tableType_=AbstractKmerTable.ARRAY1D; + /* Parse arguments */ + for(int i=0; i<args.length; i++){ + + final String arg=args[i]; + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(Parser.parseCommonStatic(arg, a, b)){ + //do nothing + }else if(Parser.parseZip(arg, a, b)){ + //do nothing + }else if(Parser.parseQuality(arg, a, b)){ + //do nothing + }else if(Parser.parseFasta(arg, a, b)){ + //do nothing + }else if(parser.parseInterleaved(arg, a, b)){ + //do nothing + }else if(parser.parseTrim(arg, a, b)){ + //do nothing + }else if(a.equals("in") || a.equals("in1")){ + in1.clear(); + if(b!=null){ + String[] s=b.split(","); + for(String ss : s){ + in1.add(ss); + } + } + }else if(a.equals("in2")){ + in2.clear(); + if(b!=null){ + String[] s=b.split(","); + for(String ss : s){ + in2.add(ss); + } + } + }else if(a.equals("extra")){ + extra.clear(); + if(b!=null){ + String[] s=b.split(","); + for(String ss : s){ + extra.add(ss); + } + } + }else if(a.equals("append") || a.equals("app")){ + append=ReadStats.append=Parse.parseBoolean(b); + }else if(a.equals("overwrite") || a.equals("ow")){ + overwrite=Parse.parseBoolean(b); + }else if(a.equals("initialsize")){ + initialSize=Parse.parseIntKMG(b); + }else if(a.equals("showstats") || a.equals("stats")){ + showStats=Parse.parseBoolean(b); + }else if(a.equals("ways")){ + ways_=Parse.parseIntKMG(b); + }else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){ + buflen=Parse.parseIntKMG(b); + }else if(a.equals("k")){ + assert(b!=null) : "\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n"; + k_=Parse.parseIntKMG(b); + }else if(a.equals("threads") || a.equals("t")){ + THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b)); + }else if(a.equals("showspeed") || a.equals("ss")){ + showSpeed=Parse.parseBoolean(b); + }else if(a.equals("ecco")){ + ecco_=Parse.parseBoolean(b); + }else if(a.equals("merge")){ + merge_=Parse.parseBoolean(b); + }else if(a.equals("verbose")){ +// assert(false) : "Verbose flag is currently static final; must be recompiled to change."; + verbose=Parse.parseBoolean(b); + }else if(a.equals("verbose2")){ +// assert(false) : "Verbose flag is currently static final; must be recompiled to change."; + verbose2=Parse.parseBoolean(b); + }else if(a.equals("minprob")){ + minProb_=Double.parseDouble(b); + }else if(a.equals("minprobprefilter") || a.equals("mpp")){ + minProbPrefilter=Parse.parseBoolean(b); + }else if(a.equals("minprobmain") || a.equals("mpm")){ + minProbMain=Parse.parseBoolean(b); + }else if(a.equals("reads") || a.startsWith("maxreads")){ + maxReads=Parse.parseKMG(b); + }else if(a.equals("prealloc") || a.equals("preallocate")){ + if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){ + prealloc_=Parse.parseBoolean(b); + }else{ + preallocFraction=Tools.max(0, Double.parseDouble(b)); + prealloc_=(preallocFraction>0); + } + }else if(a.equals("prefilter")){ + if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){ + prefilter=Parse.parseBoolean(b); + }else{ + filterMax_=Parse.parseIntKMG(b); + prefilter=filterMax_>0; + } + }else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){ + prefilterFraction=Tools.max(0, Double.parseDouble(b)); + assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory."; + prefilter=prefilterFraction>0; + }else if(a.equals("prehashes") || a.equals("hashes")){ + prehashes=Parse.parseIntKMG(b); + }else if(a.equals("prefilterpasses") || a.equals("prepasses")){ + assert(b!=null) : "Bad parameter: "+arg; + if(b.equalsIgnoreCase("auto")){ + prepasses=-1; + }else{ + prepasses=Parse.parseIntKMG(b); + } + }else if(a.equals("onepass")){ + onePass=Parse.parseBoolean(b); + }else if(a.equals("passes")){ + int passes=Parse.parseIntKMG(b); + onePass=(passes<2); + }else if(a.equals("rcomp")){ + rcomp_=Parse.parseBoolean(b); + }else if(a.equals("tabletype")){ + tableType_=Integer.parseInt(b); + } + + else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") || + a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){ + filterMemoryOverride=Parse.parseKMG(b); + } + + else if(IGNORE_UNKNOWN_ARGS){ + //Do nothing + }else{ + throw new RuntimeException("Unknown parameter "+args[i]); + } + } + + {//Process parser fields + Parser.processQuality(); + + qtrimLeft=parser.qtrimLeft; + qtrimRight=parser.qtrimRight; + trimq=parser.trimq; + trimE=parser.trimE(); + + minAvgQuality=parser.minAvgQuality; + minAvgQualityBases=parser.minAvgQualityBases; + amino=Shared.AMINO_IN; + if(amino){k_=Tools.min(k_, 12);} + } + + if(prepasses==0 || !prefilter){ + prepasses=0; + prefilter=false; + } + + +// assert(false) : prepasses+", "+onePass+", "+prefilter; + + { + long memory=Runtime.getRuntime().maxMemory(); + double xmsRatio=Shared.xmsRatio(); +// long tmemory=Runtime.getRuntime().totalMemory(); + usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45); + if(prepasses==0 || !prefilter){ + filterMemory0=filterMemory1=0; + }else if(filterMemoryOverride>0){ + filterMemory0=filterMemory1=filterMemoryOverride; + }else{ + double low=Tools.min(prefilterFraction, 1-prefilterFraction); + double high=1-low; + if(prepasses<0 || (prepasses&1)==1){//odd passes + filterMemory0=(long)(usableMemory*low); + filterMemory1=(long)(usableMemory*high); + }else{//even passes + filterMemory0=(long)(usableMemory*high); + filterMemory1=(long)(usableMemory*low); + } + } + tableMemory=(long)(usableMemory*.95-filterMemory0); + } + + tableType=tableType_; + prealloc=prealloc_; + bytesPerKmer=bytesPerKmer_; + if(ways_<1){ + long maxKmers=(2*tableMemory)/bytesPerKmer; + long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE); + ways_=(int)Tools.max(31, (int)(Tools.min(96, Shared.threads())*2.5), minWays); + ways_=(int)Primes.primeAtLeast(ways_); + assert(ways_>0); +// System.err.println("ways="+ways_); + } + + /* Set final variables; post-process and validate argument combinations */ + + onePass=onePass&prefilter; + ways=ways_; + filterMax=Tools.min(filterMax_, 0x7FFFFFFF); + ecco=ecco_; + merge=merge_; + minProb=(float)minProb_; + rcomp=rcomp_; +// assert(false) : tableMemory+", "+bytesPerKmer+", "+prealloc+", "+preallocFraction; + estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArray.maxLoadFactorFinal*0.97)); + +// System.err.println("tableMemory="+tableMemory+", bytesPerKmer="+bytesPerKmer+", estimatedKmerCapacity="+estimatedKmerCapacity); + KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0); + k=k_; + k2=k-1; + int bitsPerBase=(amino ? 5 : 2); + int mask=(amino ? 31 : 3); + coreMask=(AbstractKmerTableSet.MASK_CORE ? ~(((-1L)<<(bitsPerBase*(k_-1)))|mask) : -1L); + + if(k<1 || k>31){throw new RuntimeException("\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n");} + + if(initialSize<1){ + final long memOverWays=tableMemory/(bytesPerKmer*ways); + final double mem2=(prealloc ? preallocFraction : 1)*tableMemory; + initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault); + if(initialSize!=initialSizeDefault){ + System.err.println("Initial size set to "+initialSize); + } + } + + /* Adjust I/O settings and filenames */ + + assert(FastaReadInputStream.settingsOK()); + + if(in1.isEmpty()){ + //throw new RuntimeException("Error - at least one input file is required."); + } + + for(int i=0; i<in1.size(); i++){ + String s=in1.get(i); + if(s!=null && s.contains("#") && !new File(s).exists()){ + int pound=s.lastIndexOf('#'); + String a=s.substring(0, pound); + String b=s.substring(pound+1); + in1.set(i, a+1+b); + in2.add(a+2+b); + } + } + + if(!extra.isEmpty()){ + ArrayList<String> temp=(ArrayList<String>) extra.clone(); + extra.clear(); + for(String s : temp){ + if(s!=null && s.contains("#") && !new File(s).exists()){ + int pound=s.lastIndexOf('#'); + String a=s.substring(0, pound); + String b=s.substring(pound+1); + extra.add(a); + extra.add(b); + }else{ + extra.add(s); + } + } + } + + { + boolean allowDuplicates=true; + if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){ + throw new RuntimeException("\nCan't read some input files.\n"); + } + } + assert(THREADS>0); + + if(DISPLAY_PROGRESS){ +// assert(false); + outstream.println("Initial:"); + outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f")); + Shared.printMemory(); + outstream.println(); + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public void clear(){ + tables=null; + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public void allocateTables(){ + assert(tables==null); + tables=null; + final long coreMask=(MASK_CORE ? ~(((-1L)<<(2*(k-1)))|3) : -1L); + + ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc, + (prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride); + int[] schedule=scheduleMaker.makeSchedule(); + //assert(false) : preallocFraction+", "+ways+", "+Arrays.toString(schedule); +// System.err.println("DEBUG "+preallocFraction+", "+ways+", "+Arrays.toString(schedule)); + tables=AbstractKmerTable.preallocate(ways, tableType, schedule, coreMask); + +// tables=AbstractKmerTable.preallocate(ways, tableType, initialSize, coreMask, (!prealloc || preallocFraction<1)); + } + + /** + * Load reads into tables, using multiple LoadThread. + */ + @Override + public long loadKmers(String fname1, String fname2){ + + /* Create read input stream */ + final ConcurrentReadInputStream cris; + { + FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true); + FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true); + cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2); + cris.start(); //4567 + } + +// /* Optionally skip the first reads, since initial reads may have lower quality */ +// if(skipreads>0){ +// long skipped=0; +// +// ListNum<Read> ln=cris.nextList(); +// ArrayList<Read> reads=(ln!=null ? ln.list : null); +// +// while(skipped<skipreads && reads!=null && reads.size()>0){ +// skipped+=reads.size(); +// +// cris.returnList(ln); +// ln=cris.nextList(); +// reads=(ln!=null ? ln.list : null); +// } +// cris.returnList(ln); +// if(reads==null || reads.isEmpty()){ +// ReadWrite.closeStreams(cris); +// System.err.println("Skipped all of the reads."); +// System.exit(0); +// } +// } + + /* Create ProcessThreads */ + ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS); + for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));} + for(LoadThread pt : alpt){pt.start();} + + long added=0; + + /* Wait for threads to die, and gather statistics */ + for(LoadThread pt : alpt){ + while(pt.getState()!=Thread.State.TERMINATED){ + try { + pt.join(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + added+=pt.added; + + readsIn+=pt.readsInT; + basesIn+=pt.basesInT; + lowqReads+=pt.lowqReadsT; + lowqBases+=pt.lowqBasesT; + readsTrimmed+=pt.readsTrimmedT; + basesTrimmed+=pt.basesTrimmedT; + kmersIn+=pt.kmersInT; + } + + /* Shut down I/O streams; capture error status */ + errorState|=ReadWrite.closeStreams(cris); + return added; + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Classes ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Loads kmers. + */ + private class LoadThread extends Thread{ + + /** + * Constructor + * @param cris_ Read input stream + */ + public LoadThread(ConcurrentReadInputStream cris_){ + cris=cris_; + table=new HashBuffer(tables, buflen, k, false, true); + } + + @Override + public void run(){ + ListNum<Read> ln=cris.nextList(); + ArrayList<Read> reads=(ln!=null ? ln.list : null); + + //While there are more reads lists... + while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning + + //For each read (or pair) in the list... + for(int i=0; i<reads.size(); i++){ + Read r1=reads.get(i); + Read r2=r1.mate; + + if(!r1.validated()){r1.validate(true);} + if(r2!=null && !r2.validated()){r2.validate(true);} + + if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));} + + readsInT++; + basesInT+=r1.length(); + if(r2!=null){ + readsInT++; + basesInT+=r2.length(); + } + + if(maxNs<Integer.MAX_VALUE){ + if(r1!=null && r1.countUndefined()>maxNs){r1.setDiscarded(true);} + if(r2!=null && r2.countUndefined()>maxNs){r2.setDiscarded(true);} + } + + //Determine whether to discard the reads based on average quality + if(minAvgQuality>0){ + if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);} + if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);} + } + + if(r1!=null){ + if(qtrimLeft || qtrimRight){ + int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1, false); + basesTrimmedT+=x; + readsTrimmedT+=(x>0 ? 1 : 0); + } + if(r1.length()<k){r1.setDiscarded(true);} + } + if(r2!=null){ + if(qtrimLeft || qtrimRight){ + int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1, false); + basesTrimmedT+=x; + readsTrimmedT+=(x>0 ? 1 : 0); + } + if(r2.length()<k){r2.setDiscarded(true);} + } + + if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){ + if(merge){ + final int insert=BBMerge.findOverlapStrict(r1, r2, false); + if(insert>0){ + r2.reverseComplement(); + r1=r1.joinRead(insert); + r2=null; + } + }else if(ecco){ + BBMerge.findOverlapStrict(r1, r2, true); + } + } + + if(minLen>0){ + if(r1!=null && r1.length()<minLen){r1.setDiscarded(true);} + if(r2!=null && r2.length()<minLen){r2.setDiscarded(true);} + } + + if(r1!=null){ + if(r1.discarded()){ + lowqBasesT+=r1.length(); + lowqReadsT++; + }else{ + long temp=addKmersToTable(r1); + added+=temp; + if(verbose){System.err.println("A: Added "+temp);} + } + } + if(r2!=null){ + if(r2.discarded()){ + lowqBasesT+=r2.length(); + lowqReadsT++; + }else{ + long temp=addKmersToTable(r2); + added+=temp; + if(verbose){System.err.println("B: Added "+temp);} + } + } + } + + //Fetch a new read list + cris.returnList(ln); + ln=cris.nextList(); + reads=(ln!=null ? ln.list : null); + } + cris.returnList(ln); + long temp=table.flush(); + if(verbose){System.err.println("Flush: Added "+temp);} + added+=temp; + } + + private final int addKmersToTableAA(final Read r){ + if(r==null || r.bases==null){return 0;} + final float minProb2=(minProbMain ? minProb : 0); + final byte[] bases=r.bases; + final byte[] quals=r.quality; + final int shift=5*k; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0; + int created=0; + int len=0; + + if(bases==null || bases.length<k){return -1;} + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + float prob=1; + for(int i=0; i<bases.length; i++){ + final byte b=bases[i]; + final long x=AminoAcid.acidToNumber[b]; + + //Update kmers + kmer=((kmer<<5)|x)&mask; + + if(minProb2>0 && quals!=null){//Update probability + prob=prob*PROB_CORRECT[quals[i]]; + if(len>k){ + byte oldq=quals[i-k]; + prob=prob*PROB_CORRECT_INVERSE[oldq]; + } + } + + //Handle Ns + if(x<0){ + len=0; + kmer=0; + prob=1; + }else{len++;} + + if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + if(len>=k && prob>=minProb2){ + kmersInT++; + final long key=kmer; + if(!prefilter || prefilterArray.read(key)>filterMax2){ + int temp=table.incrementAndReturnNumCreated(key, 1); + created+=temp; + if(verbose){System.err.println("C: Added "+temp);} + } + } + } + + return created; + } + + private final int addKmersToTable(final Read r){ + if(amino){return addKmersToTableAA(r);} + if(onePass){return addKmersToTable_onePass(r);} + if(r==null || r.bases==null){return 0;} + final float minProb2=(minProbMain ? minProb : 0); + final byte[] bases=r.bases; + final byte[] quals=r.quality; + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0; + long rkmer=0; + int created=0; + int len=0; + + if(bases==null || bases.length<k){return -1;} + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + float prob=1; + for(int i=0; i<bases.length; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + + //Update kmers + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + + if(minProb2>0 && quals!=null){//Update probability + prob=prob*PROB_CORRECT[quals[i]]; + if(len>k){ + byte oldq=quals[i-k]; + prob=prob*PROB_CORRECT_INVERSE[oldq]; + } + } + + //Handle Ns + if(x<0){ + len=0; + kmer=rkmer=0; + prob=1; + }else{len++;} + + if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + if(len>=k && prob>=minProb2){ + kmersInT++; + final long key=toValue(kmer, rkmer); + if(!prefilter || prefilterArray.read(key)>filterMax2){ + int temp=table.incrementAndReturnNumCreated(key, 1); + created+=temp; + if(verbose){System.err.println("C: Added "+temp);} + } + } + } + + return created; + } + + + private final int addKmersToTable_onePass(final Read r){ + assert(prefilter); + if(r==null || r.bases==null){return 0;} + final byte[] bases=r.bases; + final byte[] quals=r.quality; + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0; + long rkmer=0; + int created=0; + int len=0; + + if(bases==null || bases.length<k){return -1;} + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + float prob=1; + for(int i=0; i<bases.length; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + + //Update kmers + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + + if(minProb>0 && quals!=null){//Update probability + prob=prob*PROB_CORRECT[quals[i]]; + if(len>k){ + byte oldq=quals[i-k]; + prob=prob*PROB_CORRECT_INVERSE[oldq]; + } + } + + //Handle Ns + if(x<0){ + len=0; + kmer=rkmer=0; + prob=1; + }else{len++;} + + if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + if(len>=k){ + final long key=toValue(kmer, rkmer); + int count=prefilterArray.incrementAndReturnUnincremented(key, 1); + if(count>=filterMax2){ + int temp=table.incrementAndReturnNumCreated(key, 1); + created+=temp; + if(verbose){System.err.println("D: Added "+temp);} + } + } + } + return created; + } + + /*--------------------------------------------------------------*/ + + /** Input read stream */ + private final ConcurrentReadInputStream cris; + + private final HashBuffer table; + + public long added=0; + + private long readsInT=0; + private long basesInT=0; + private long lowqReadsT=0; + private long lowqBasesT=0; + private long readsTrimmedT=0; + private long basesTrimmedT=0; + private long kmersInT=0; + + } + + + /*--------------------------------------------------------------*/ + /*---------------- Convenience ----------------*/ + /*--------------------------------------------------------------*/ + + public void regenerateKmers(byte[] bases, LongList kmers, IntList counts, final int a){ + final int loc=a+k; + final int lim=Tools.min(counts.size, a+k+1); + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=kmers.get(a); + long rkmer=rcomp(kmer); + int len=k; + +// assert(false) : a+", "+loc+", "+lim; + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + for(int i=loc, j=a+1; j<lim; i++, j++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + + if(len>=k){ + assert(kmers.get(j)!=kmer); + kmers.set(j, kmer); + int count=getCount(kmer, rkmer); + counts.set(j, count); + }else{ + kmers.set(j, -1); + counts.set(j, 0); + } + } + } + + @Override + public int regenerateCounts(byte[] bases, IntList counts, final Kmer dummy, BitSet changed){ + assert(!changed.isEmpty()); + final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1; + final int ca=firstBase-k; +// final int b=changed.nextSetBit(0);ca+kbig; //first base changed + final int firstCount=Tools.max(firstBase-k+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit +// System.err.println("ca="+ca+", b="+b+", lim="+lim); +// System.err.println("Regen from count "+(ca+1)+"-"+lim); + + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + int valid=0; + +// System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts); + + for(int i=Tools.max(0, firstBase-k+1), lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){ + final byte base=bases[i]; + final long x=AminoAcid.baseToNumber[base]; + final long x2=AminoAcid.baseToComplementNumber[base]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + + if(x<0){ + len=0; + kmer=rkmer=0; + }else{ + len++; + } + + final int c=i-k+1; + if(i>=firstBase){ + if(len>=k){ + valid++; + int count=getCount(kmer, rkmer); + counts.set(c, count); + }else if(c>=0){ + counts.set(c, 0); + } + } + } + + return valid; + } + + @Override + public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer dummy){ + final int b=k-1; + final int lim=(positions==null ? bases.length : Tools.min(bases.length, positions.length()+k-1)); + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + int valid=0; + + counts.clear(); + + for(int i=0, j=-b; i<lim; i++, j++){ + final byte base=bases[i]; + final long x=AminoAcid.baseToNumber[base]; + final long x2=AminoAcid.baseToComplementNumber[base]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + + if(x<0){ + len=0; + kmer=rkmer=0; + }else{ + len++; + } + + if(j>=0){ + if(len>=k && (positions==null || positions.get(j))){ + valid++; + int count=getCount(kmer, rkmer); + assert(i-k+1==counts.size()) : "j="+j+", counts="+counts.size()+", b="+(b)+", (i-k+1)="+(i-k+1); + assert(j==counts.size()); + counts.add(Tools.max(count, 0)); +// counts.set(i-k+1, count); + }else{ + counts.add(0); +// counts.set(i-k+1, 0); + } + } + } + +// assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size; + assert((counts.size==0 && bases.length<k) || counts.size==lim-k+1) : bases.length+", "+k+", "+counts.size; + + return valid; + } + + public int regenerateCounts(byte[] bases, IntList counts, final int ca){ + final int b=ca+k-1; + final int lim=Tools.min(bases.length, b+k+1); + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + int valid=0; + + final int clen=counts.size; + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts. + * i is an index in the base array, j is an index in the count array. */ + for(int i=b-k+1; i<lim; i++){ + final byte base=bases[i]; + final long x=AminoAcid.baseToNumber[base]; + final long x2=AminoAcid.baseToComplementNumber[base]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + + if(x<0){ + len=0; + kmer=rkmer=0; + }else{ + len++; + } + + if(i>=b){ + if(len>=k){ + valid++; + int count=getCount(kmer, rkmer); + counts.set(i-k+1, count); + }else{ + counts.set(i-k+1, 0); + } + } + } + + assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size; + assert(clen==counts.size); + + return valid; + } + + /** Returns number of valid kmers */ + public int fillKmers(byte[] bases, LongList kmers){ + final int blen=bases.length; + if(blen<k){return 0;} + final int min=k-1; + final int shift=2*k; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0; + int len=0; + int valid=0; + + kmers.clear(); + + /* Loop through the bases, maintaining a forward kmer via bitshifts */ + for(int i=0; i<blen; i++){ + final byte b=bases[i]; + assert(b>=0) : Arrays.toString(bases); + final long x=AminoAcid.baseToNumber[b]; + kmer=((kmer<<2)|x)&mask; + if(x<0){ + len=0; + kmer=0; + }else{len++;} + if(i>=min){ + if(len>=k){ + kmers.add(kmer); + valid++; + }else{ + kmers.add(-1); + } + } + } + return valid; + } + + public void fillCounts(LongList kmers, IntList counts){ + counts.clear(); + for(int i=0; i<kmers.size; i++){ + long kmer=kmers.get(i); + if(kmer>=0){ + long rkmer=rcomp(kmer); + int count=getCount(kmer, rkmer); + counts.add(count); + }else{ + counts.add(0); + } + } + } + + + /*--------------------------------------------------------------*/ + /*---------------- Helper Methods ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public long regenerate(final int limit){ + long sum=0; + for(AbstractKmerTable akt : tables){ + sum+=akt.regenerate(limit); + } + return sum; + } + + public HashArray1D getTableForKey(long key){ + return (HashArray1D) tables[kmerToWay(key)]; + } + + @Override + public HashArray1D getTable(int tnum){ + return (HashArray1D) tables[tnum]; + } + + @Override + public long[] fillHistogram(int histMax) { + return HistogramMaker.fillHistogram(tables, histMax); + } + + @Override + public void countGC(long[] gcCounts, int max) { + for(AbstractKmerTable set : tables){ + set.countGC(gcCounts, max); + } + } + + @Override + public void initializeOwnership(){ + OwnershipThread.initialize(tables); + } + + @Override + public void clearOwnership(){ + OwnershipThread.clear(tables); + } + + public long rightmostKmer(final ByteBuilder bb){ + return rightmostKmer(bb.array, bb.length()); + } + + public long rightmostKmer(final byte[] bases, final int blen){ + if(blen<k){return -1;} + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0; + long rkmer=0; + int len=0; + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */ + { + for(int i=blen-k; i<blen; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + } + } + + if(len<k){return -1;} + else{assert(len==k);} + return kmer; + } + + public long leftmostKmer(final ByteBuilder bb){ + return leftmostKmer(bb.array, bb.length()); + } + + public long leftmostKmer(final byte[] bases, final int blen){ + if(blen<k){return -1;} + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0; + long rkmer=0; + int len=0; + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */ + { + for(int i=0; i<k; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + } + } + + if(len<k){return -1;} + else{assert(len==k);} + return kmer; + } + + public boolean doubleClaim(final ByteBuilder bb, final int id){ + return doubleClaim(bb.array, bb.length(), id); + } + + /** Ensures there can be only one owner. */ + public boolean doubleClaim(final byte[] bases, final int blength, final int id){ + boolean success=claim(bases, blength, id, true); + if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);} + if(!success){return false;} + success=claim(bases, blength, id+CLAIM_OFFSET, true); + if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);} + return success; + } + + public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly){ + return claim(bb.array, bb.length(), id, exitEarly); + } + + public float calcCoverage(final byte[] bases, final int blength){ + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + long sum=0, max=0; + int kmers=0; + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + for(int i=0; i<blength; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(len>=k){ + int count=getCount(kmer, rkmer); + sum+=count; + max=Tools.max(count, max); + kmers++; + } + } + return sum==0 ? 0 : sum/(float)kmers; + } + + public float calcCoverage(final Contig contig){ + final byte[] bases=contig.bases; + if(bases.length<k){return 0;} + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + long sum=0; + int max=0, min=Integer.MAX_VALUE; + int kmers=0; + + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + for(int i=0; i<bases.length; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(len>=k){ + int count=getCount(kmer, rkmer); + sum+=count; + max=Tools.max(count, max); + min=Tools.min(count, min); + kmers++; + } + } + contig.coverage=sum==0 ? 0 : sum/(float)kmers; + contig.maxCov=max; + contig.minCov=sum==0 ? 0 : min; + return contig.coverage; + } + + public boolean claim(final byte[] bases, final int blength, final int id, boolean exitEarly){ + if(verbose){outstream.println("Thread "+id+" claim start.");} + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + boolean success=true; + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + for(int i=0; i<blength && success; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + if(len>=k){ + success=claim(kmer, rkmer, id/*, rid, i*/); + success=(success || !exitEarly); + } + } + return success; + } + + public boolean claim(final long kmer, final long rkmer, final int id/*, final long rid, final int pos*/){ + //TODO: rid and pos are just for debugging. + final long key=toValue(kmer, rkmer); + final int way=kmerToWay(key); + final AbstractKmerTable table=tables[way]; + final int count=table.getValue(key); + assert(count==-1 || count>0) : count; +// if(verbose /*|| true*/){outstream.println("Count="+count+".");} + if(count<0){return true;} + assert(count>0) : count; + final int owner=table.setOwner(key, id); + if(verbose){outstream.println("owner="+owner+".");} +// assert(owner==id) : id+", "+owner+", "+rid+", "+pos; + return owner==id; + } + + public void release(ByteBuilder bb, final int id){ + release(bb.array, bb.length(), id); + } + + public void release(final byte[] bases, final int blength, final int id){ + if(verbose /*|| true*/){outstream.println("*Thread "+id+" release start.");} + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + for(int i=0; i<blength; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + if(len>=k){ + release(kmer, rkmer, id); + } + } + } + + public boolean release(final long kmer, final long rkmer, final int id){ + return release(toValue(kmer, rkmer), id); + } + + public boolean release(final long key, final int id){ + final int way=kmerToWay(key); + final AbstractKmerTable table=tables[way]; + final int count=table.getValue(key); +// if(verbose /*|| true*/){outstream.println("Count="+count+".");} + if(count<1){return true;} + return table.clearOwner(key, id); + } + + public int findOwner(ByteBuilder bb, final int id){ + return findOwner(bb.array, bb.length(), id); + } + + public int findOwner(final byte[] bases, final int blength, final int id){ + final int shift=2*k; + final int shift2=shift-2; + final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); + long kmer=0, rkmer=0; + int len=0; + int maxOwner=-1; + /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ + for(int i=0; i<blength; i++){ + final byte b=bases[i]; + final long x=AminoAcid.baseToNumber[b]; + final long x2=AminoAcid.baseToComplementNumber[b]; + kmer=((kmer<<2)|x)&mask; + rkmer=((rkmer>>>2)|(x2<<shift2))&mask; + if(x<0){ + len=0; + kmer=rkmer=0; + }else{len++;} + if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} + if(len>=k){ + int owner=findOwner(kmer, rkmer); + maxOwner=Tools.max(owner, maxOwner); + if(maxOwner>id){break;} + } + } + return maxOwner; + } + + public int findOwner(final long kmer){ + return findOwner(kmer, rcomp(kmer)); + } + + public int findOwner(final long kmer, final long rkmer){ + final long key=toValue(kmer, rkmer); + final int way=kmerToWay(key); + final AbstractKmerTable table=tables[way]; + final int count=table.getValue(key); + if(count<0){return -1;} + final int owner=table.getOwner(key); + return owner; + } + + public int getCount(long kmer, long rkmer){ + long key=toValue(kmer, rkmer); + int way=kmerToWay(key); + return tables[way].getValue(key); + } + + public int getCount(long key){ + int way=kmerToWay(key); + return tables[way].getValue(key); + } + + /*--------------------------------------------------------------*/ + /*---------------- Fill Counts ----------------*/ + /*--------------------------------------------------------------*/ + + public int fillRightCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){ + if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){ + return fillRightCounts_fast(kmer, rkmer, counts, mask, shift2); + }else{ + return fillRightCounts_safe(kmer, rkmer, counts, mask, shift2); + } + } + + public int fillRightCounts_fast(final long kmer0, final long rkmer0, int[] counts, + long mask, int shift2){ +// assert((k&1)==1) : k; + assert(MASK_CORE); + final long kmer=(kmer0<<2)&mask; + final long rkmer=(rkmer0>>>2); + final long a=kmer&coreMask, b=rkmer&coreMask; + + int max=-1, maxPos=0; + if(a==b){ + return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2); + }else if(a>b){ +// return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2); + final long core=a; + final int way=kmerToWay(core); +// final AbstractKmerTable table=tables[way]; + final HashArray table=(HashArray) tables[way]; + final int cell=table.kmerToCell(kmer); + for(int i=0; i<=3; i++){ + final long key=kmer|i; + final int count=Tools.max(0, table.getValue(key, cell)); +// final int count=Tools.max(0, table.getValue(key)); + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + }else{ +// return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2); + //Use a higher increment for the left bits + final long core=b; + final int way=kmerToWay(core); +// final AbstractKmerTable table=tables[way]; + final HashArray table=(HashArray) tables[way]; + final int cell=table.kmerToCell(rkmer); + final long incr=1L<<(2*(k-1)); + long j=3*incr; + for(int i=0; i<=3; i++, j-=incr){ + final long key=rkmer|j; + final int count=Tools.max(0, table.getValue(key, cell)); +// final int count=Tools.max(0, table.getValue(key)); + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + } + return maxPos; + } + + //TODO: Change this to take advantage of coreMask + //Requires special handling of core palindromes. + //Thus it would be easiest to just handle odd kmers, and K is normally 31 anyway. + public int fillRightCounts_safe(long kmer, long rkmer, int[] counts, long mask, int shift2){ + assert(kmer==rcomp(rkmer)); + if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));} + kmer=(kmer<<2)&mask; + rkmer=(rkmer>>>2); + int max=-1, maxPos=0; + + for(int i=0; i<=3; i++){ + long kmer2=kmer|((long)i); + long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2); + if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));} + assert(kmer2==(kmer2&mask)); + assert(rkmer2==(rkmer2&mask)); + assert(kmer2==rcomp(rkmer2)); + long key=toValue(kmer2, rkmer2); + int way=kmerToWay(key); + int count=tables[way].getValue(key); + assert(count==NOT_PRESENT || count>=0); + count=Tools.max(count, 0); + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + return maxPos; + } + + /** For KmerCompressor. */ + public int fillRightCountsRcompOnly(long kmer, long rkmer, int[] counts, long mask, int shift2){ + assert(kmer==rcomp(rkmer)); + if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));} + kmer=(kmer<<2)&mask; + rkmer=(rkmer>>>2); + int max=-1, maxPos=0; + + for(int i=0; i<=3; i++){ + long kmer2=kmer|((long)i); + long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2); + if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));} + assert(kmer2==(kmer2&mask)); + assert(rkmer2==(rkmer2&mask)); + assert(kmer2==rcomp(rkmer2)); + long key=rkmer2; + int way=kmerToWay(key); + int count=tables[way].getValue(key); + assert(count==NOT_PRESENT || count>=0); + count=Tools.max(count, 0); + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + return maxPos; + } + + public int fillLeftCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){ + if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){ + return fillLeftCounts_fast(kmer, rkmer, counts, mask, shift2); + }else{ + return fillLeftCounts_safe(kmer, rkmer, counts, mask, shift2); + } + } + + public int fillLeftCounts_fast(final long kmer0, final long rkmer0, int[] counts, + long mask, int shift2){ +// assert((k&1)==1) : k; + assert(MASK_CORE); + final long rkmer=(rkmer0<<2)&mask; + final long kmer=(kmer0>>>2); + final long a=kmer&coreMask, b=rkmer&coreMask; + + int max=-1, maxPos=0; + if(a==b){ + return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2); + }else if(a>b){ +// return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2); + + final long core=a; + final int way=kmerToWay(core); +// final AbstractKmerTable table=tables[way]; + final HashArray table=(HashArray) tables[way]; + final int cell=table.kmerToCell(kmer); + final long incr=1L<<(2*(k-1)); + long j=0;//Must be declared as a long, outside of loop + for(int i=0; i<=3; i++, j+=incr){ +// final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]); +// final long kmer2=kmer|(((long)i)<<shift2); +// final long key2=toValue(rkmer2, kmer2); +// int way2=kmerToWay(key2); +// int count2=tables[way2].getValue(key2); +// count2=Tools.max(count2, 0); + + final long key=kmer|j; + final int count=Tools.max(0, table.getValue(key, cell)); +// int count=Tools.max(0, table.getValue(key)); + +// assert(way==way2) : i+", "+way+", "+way2; +// assert(key==key2) : i+", "+Long.toBinaryString(kmer)+", "+ +// Long.toBinaryString(key)+", "+Long.toBinaryString(key2)+ +// ", "+Long.toBinaryString(j)+", "+Long.toBinaryString(incr); +// assert(count==count2) : i+", "+count+", "+count2; + + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + return maxPos; + }else{ +// return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2); + final long core=b; + final int way=kmerToWay(core); +// final AbstractKmerTable table=tables[way]; + final HashArray table=(HashArray) tables[way]; + final int cell=table.kmerToCell(rkmer); + for(int i=0, j=3; i<=3; i++, j--){ + final long key=rkmer|j; + final int count=Tools.max(0, table.getValue(key, cell)); +// final int count=Tools.max(0, table.getValue(key)); + + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + return maxPos; + } + } + + public int fillLeftCounts_safe(final long kmer0, final long rkmer0, int[] counts, long mask, int shift2){ + assert(kmer0==rcomp(rkmer0)); + if(verbose){outstream.println("fillLeftCounts: "+toText(kmer0)+", "+toText(rkmer0));} + long rkmer=(rkmer0<<2)&mask; + long kmer=(kmer0>>>2); + int max=-1, maxPos=0; +// assert(false) : shift2+", "+k; + for(int i=0; i<=3; i++){ + final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]); + final long kmer2=kmer|(((long)i)<<shift2); + if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));} + assert(kmer2==(kmer2&mask)); + assert(rkmer2==(rkmer2&mask)); + assert(kmer2==rcomp(rkmer2)) : "\n"+"kmer: \t"+toText(rcomp(rkmer2))+", "+toText(rcomp(kmer2)); + long key=toValue(rkmer2, kmer2); + int way=kmerToWay(key); + int count=tables[way].getValue(key); + assert(count==NOT_PRESENT || count>=0); + count=Tools.max(count, 0); + counts[i]=count; + if(count>max){ + max=count; + maxPos=i; + } + } + return maxPos; + } + + /*--------------------------------------------------------------*/ + /*---------------- Printing Methods ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){ + if(fname==null){return false;} + Timer t=new Timer(); + + ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true); + bsw.start(); + for(AbstractKmerTable set : tables){ + set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining); + } + bsw.poisonAndWait(); + + t.stop(); + if(printTime){outstream.println("Kmer Dump Time: \t"+t);} + return bsw.errorState; + } + + @Override + public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){ + final int threads=Tools.min(Shared.threads(), tables.length); + if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);} + + if(fname==null){return false;} + Timer t=new Timer(); + + ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true); + bsw.start(); + DumpThread.dump(k, mincount, maxcount, tables, bsw, remaining); + bsw.poisonAndWait(); + + t.stop(); + if(printTime){outstream.println("Kmer Dump Time: \t"+t);} + return bsw.errorState; + } + + /*--------------------------------------------------------------*/ + /*---------------- Recall Methods ----------------*/ + /*--------------------------------------------------------------*/ + + private final long rcomp(long kmer){return AminoAcid.reverseComplementBinaryFast(kmer, k);} + private final StringBuilder toText(long kmer){return AbstractKmerTable.toText(kmer, k);} + + /*--------------------------------------------------------------*/ + /*---------------- Static Methods ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Transforms a kmer into a canonical value stored in the table. Expected to be inlined. + * @param kmer Forward kmer + * @param rkmer Reverse kmer + * @return Canonical value + */ + public final long toValue(long kmer, long rkmer){ +// long value=(rcomp ? Tools.max(kmer, rkmer) : kmer); +// return value; + if(!rcomp){return kmer;} + final long a=kmer&coreMask, b=rkmer&coreMask; + return a>b ? kmer : a<b ? rkmer : Tools.max(kmer, rkmer); + } + + /*--------------------------------------------------------------*/ + /*---------------- Final Primitives ----------------*/ + /*--------------------------------------------------------------*/ + + @Override + public int kbig(){return k;} + @Override + public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;} + @Override + public boolean ecco(){return ecco;} + @Override + public boolean qtrimLeft(){return qtrimLeft;} + @Override + public boolean qtrimRight(){return qtrimRight;} + @Override + public float minAvgQuality(){return minAvgQuality;} + @Override + public long tableMemory(){return tableMemory;} + @Override + public long estimatedKmerCapacity(){return estimatedKmerCapacity;} + @Override + public int ways(){return ways;} + @Override + public boolean rcomp(){return rcomp;} + + public final int kmerToWay(final long kmer){ + final int way=(int)((kmer&coreMask)%ways); + return way; + } + + /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in tables[Y] */ + private AbstractKmerTable[] tables; + + public AbstractKmerTable[] tables(){return tables;} + + public long filterMemoryOverride=0; + + public final int tableType; //AbstractKmerTable.ARRAY1D; + + private final int bytesPerKmer; + + private final long usableMemory; + private final long filterMemory0; + private final long filterMemory1; + private final long tableMemory; + private final long estimatedKmerCapacity; + + /** Number of tables (and threads, during loading) */ + private final boolean prealloc; + + /** Number of tables (and threads, during loading) */ + public final int ways; + + /** Normal kmer length */ + public final int k; + /** k-1; used in some expressions */ + public final int k2; + + public final long coreMask; + + /** Look for reverse-complements as well as forward kmers. Default: true */ + private final boolean rcomp; + + /** Quality-trim the left side */ + public final boolean qtrimLeft; + /** Quality-trim the right side */ + public final boolean qtrimRight; + /** Trim bases at this quality or below. Default: 4 */ + public final float trimq; + /** Error rate for trimming (derived from trimq) */ + private final float trimE; + + /** Throw away reads below this average quality after trimming. Default: 0 */ + public final float minAvgQuality; + /** If positive, calculate average quality from the first X bases only. Default: 0 */ + public final int minAvgQualityBases; + + /** Ignore kmers with probability of correctness less than this */ + public final float minProb; + + /** Correct via overlap */ + private final boolean ecco; + + /** Attempt to merge via overlap prior to counting kmers */ + private final boolean merge; + + /*--------------------------------------------------------------*/ + /*---------------- Walker ----------------*/ + /*--------------------------------------------------------------*/ + + public Walker walk(){ + return new WalkerKST(); + } + + public class WalkerKST extends Walker { + + WalkerKST(){ + w=tables[0].walk(); + } + + /** + * Fills this object with the next key and value. + * @return True if successful. + */ + public boolean next(){ + if(w==null){return false;} + if(w.next()){return true;} + if(tnum<tables.length){tnum++;} + w=(tnum<tables.length ? tables[tnum].walk() : null); + return w==null ? false : w.next(); + } + + public long kmer(){return w.kmer();} + public int value(){return w.value();} + + private Walker w=null; + + /** current table number */ + private int tnum=0; + } + +}