jpayne@68: package bloom; jpayne@68: jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.BitSet; jpayne@68: import java.util.Locale; jpayne@68: jpayne@68: import dna.AminoAcid; jpayne@68: import fileIO.FileFormat; jpayne@68: import shared.Timer; jpayne@68: import stream.ConcurrentReadInputStream; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import stream.Read; jpayne@68: import structures.ListNum; jpayne@68: jpayne@68: /** jpayne@68: * @author Brian Bushnell jpayne@68: * @date Jul 5, 2012 jpayne@68: * jpayne@68: */ jpayne@68: public class KmerCount5 extends KmerCountAbstract { jpayne@68: jpayne@68: public static void main(String[] args){ jpayne@68: jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: String fname1=args[0]; jpayne@68: String fname2=(args.length>1 ? args[1] : null); jpayne@68: int k=14; jpayne@68: int cbits=16; jpayne@68: int gap=0; jpayne@68: jpayne@68: for(int i=2; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("k") || a.equals("kmer")){ jpayne@68: k=Integer.parseInt(b); jpayne@68: }else if(a.startsWith("cbits") || a.startsWith("cellbits")){ jpayne@68: cbits=Integer.parseInt(b); jpayne@68: }else if(a.startsWith("gap")){ jpayne@68: gap=Integer.parseInt(b); jpayne@68: }else{ jpayne@68: throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: KCountArray count=null; jpayne@68: jpayne@68: if(fileIO.FileFormat.hasFastaExtension(fname1)){ jpayne@68: assert(!FastaReadInputStream.SPLIT_READS); jpayne@68: FastaReadInputStream.MIN_READ_LEN=k; jpayne@68: } jpayne@68: jpayne@68: if(gap==0){ jpayne@68: count=count(fname1, fname2, k, cbits, true); jpayne@68: }else{ jpayne@68: count=countFastqSplit(fname1, fname2, (k+1)/2, k/2, gap, cbits, true, null); jpayne@68: } jpayne@68: jpayne@68: jpayne@68: t.stop(); jpayne@68: System.out.println("Finished counting; time = "+t); jpayne@68: jpayne@68: printStatistics(count); jpayne@68: jpayne@68: } jpayne@68: jpayne@68: public static void printStatistics(KCountArray count){ jpayne@68: long[] freq=count.transformToFrequency(); jpayne@68: jpayne@68: // System.out.println(count+"\n"); jpayne@68: // System.out.println(Arrays.toString(freq)+"\n"); jpayne@68: jpayne@68: long sum=sum(freq); jpayne@68: System.out.println("Kmer fraction:"); jpayne@68: int lim1=8, lim2=16; jpayne@68: for(int i=0; i=freq.length){prefix=lim1+"+";} jpayne@68: while(prefix.length()<8){prefix=prefix+" ";} jpayne@68: System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum))+"\t"+x); jpayne@68: lim1*=2; jpayne@68: lim2=min(lim2*2, freq.length); jpayne@68: } jpayne@68: jpayne@68: long sum2=sum-freq[0]; jpayne@68: long x=freq[1]; jpayne@68: System.out.println(); jpayne@68: System.out.println("Keys Counted: \t \t"+keysCounted); jpayne@68: System.out.println("Unique: \t \t"+sum2); jpayne@68: System.out.println("Avg Sites/Key: \t \t"+String.format(Locale.ROOT, "%.3f ",(keysCounted*1d/sum2))); jpayne@68: System.out.println(); jpayne@68: System.out.println("Singleton: \t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum2))+"\t"+x); jpayne@68: x=sum2-x; jpayne@68: System.out.println("Useful: \t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum2))+"\t"+x); jpayne@68: } jpayne@68: jpayne@68: public static KCountArray count(String reads1, String reads2, int k, int cbits, boolean rcomp){ jpayne@68: return count(reads1, reads2, k, cbits, rcomp, null); jpayne@68: } jpayne@68: jpayne@68: public static KCountArray count(String reads1, String reads2, int k, int cbits, boolean rcomp, KCountArray count){ jpayne@68: assert(k<32 && k>=1 && (count!=null || k<20)); jpayne@68: final int kbits=2*k; jpayne@68: final long mask=(kbits>63 ? -1L : ~((-1L)< ln=cris.nextList(); jpayne@68: ArrayList reads=(ln!=null ? ln.list : null); jpayne@68: jpayne@68: if(reads!=null && !reads.isEmpty()){ jpayne@68: Read r=reads.get(0); jpayne@68: assert(paired==(r.mate!=null)); jpayne@68: } jpayne@68: jpayne@68: while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning jpayne@68: //System.err.println("reads.size()="+reads.size()); jpayne@68: for(Read r : reads){ jpayne@68: readsProcessed++; jpayne@68: jpayne@68: addRead(r, count, k, mask, rcomp); jpayne@68: if(r.mate!=null){ jpayne@68: addRead(r.mate, count, k, mask, rcomp); jpayne@68: } jpayne@68: jpayne@68: } jpayne@68: //System.err.println("returning list"); jpayne@68: cris.returnList(ln); jpayne@68: //System.err.println("fetching list"); jpayne@68: ln=cris.nextList(); jpayne@68: reads=(ln!=null ? ln.list : null); jpayne@68: } jpayne@68: if(verbose){System.err.println("Finished reading");} jpayne@68: cris.returnList(ln); jpayne@68: if(verbose){System.err.println("Returned list");} jpayne@68: cris.close(); jpayne@68: if(verbose){System.err.println("Closed stream");} jpayne@68: if(verbose){System.err.println("Processed "+readsProcessed+" reads.");} jpayne@68: jpayne@68: jpayne@68: return count; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: jpayne@68: jpayne@68: public static KCountArray count(final String reads1, final String reads2, final int k, final int cbits, final boolean rcomp, jpayne@68: KCountArray counts, final KCountArray trusted, final long maxReads, final int thresh, final int detectStepsize, final boolean conservative){ jpayne@68: jpayne@68: assert(k<32 && k>=1 && (counts!=null || k<20)); jpayne@68: final int kbits=2*k; jpayne@68: final long mask=(kbits>63 ? -1L : ~((-1L)< ln=cris.nextList(); jpayne@68: ArrayList reads=(ln!=null ? ln.list : null); jpayne@68: jpayne@68: if(reads!=null && !reads.isEmpty()){ jpayne@68: Read r=reads.get(0); jpayne@68: assert(paired==(r.mate!=null)); jpayne@68: } jpayne@68: jpayne@68: jpayne@68: while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning jpayne@68: //System.err.println("reads.size()="+reads.size()); jpayne@68: for(Read r : reads){ jpayne@68: jpayne@68: Read r2=r.mate; jpayne@68: { jpayne@68: if(trusted!=null){ jpayne@68: BitSet bs=(conservative ? ErrorCorrect.detectErrorsBulk(r, trusted, k, thresh, detectStepsize) : jpayne@68: ErrorCorrect.detectTrusted(r, trusted, k, thresh, detectStepsize)); jpayne@68: // System.out.println("\n"+toString(bs, r.length())); jpayne@68: // System.out.println(new String(r.bases)); jpayne@68: for(int i=bs.nextClearBit(0); i=1 && (counts!=null || k<20)); jpayne@68: assert(gap>=0); jpayne@68: final int kbits1=2*k1; jpayne@68: final int kbits2=2*k2; jpayne@68: final long mask1=~((-1L)<<(kbits1)); jpayne@68: final long mask2=~((-1L)<<(kbits2)); jpayne@68: jpayne@68: if(counts==null){ jpayne@68: final long cells=1L<<(kbits1+kbits2); jpayne@68: if(verbose){System.err.println("k1="+k1+", k2="+k2+", kbits1="+kbits1+", kbits2="+kbits2+", cells="+cells+ jpayne@68: ", mask1="+Long.toHexString(mask1)+", mask2="+Long.toHexString(mask2));} jpayne@68: counts=KCountArray.makeNew(cells, cbits); jpayne@68: } jpayne@68: jpayne@68: final ConcurrentReadInputStream cris; jpayne@68: { jpayne@68: FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true); jpayne@68: FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true); jpayne@68: cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2); jpayne@68: cris.start(); //4567 jpayne@68: } jpayne@68: jpayne@68: assert(cris!=null) : reads1; jpayne@68: System.err.println("Started cris"); jpayne@68: boolean paired=cris.paired(); jpayne@68: if(verbose){System.err.println("Paired: "+paired);} jpayne@68: jpayne@68: ListNum ln=cris.nextList(); jpayne@68: ArrayList reads=(ln!=null ? ln.list : null); jpayne@68: jpayne@68: if(reads!=null && !reads.isEmpty()){ jpayne@68: Read r=reads.get(0); jpayne@68: assert(paired==(r.mate!=null)); jpayne@68: } jpayne@68: jpayne@68: while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning jpayne@68: //System.err.println("reads.size()="+reads.size()); jpayne@68: for(Read r : reads){ jpayne@68: readsProcessed++; jpayne@68: jpayne@68: addReadSplit(r, counts, k1, k2, mask1, mask2, gap, rcomp); jpayne@68: if(r.mate!=null){ jpayne@68: addReadSplit(r.mate, counts, k1, k2, mask1, mask2, gap, rcomp); jpayne@68: } jpayne@68: jpayne@68: } jpayne@68: //System.err.println("returning list"); jpayne@68: cris.returnList(ln); jpayne@68: //System.err.println("fetching list"); jpayne@68: ln=cris.nextList(); jpayne@68: reads=(ln!=null ? ln.list : null); jpayne@68: } jpayne@68: if(verbose){System.err.println("Finished reading");} jpayne@68: cris.returnList(ln); jpayne@68: if(verbose){System.err.println("Returned list");} jpayne@68: cris.close(); jpayne@68: if(verbose){System.err.println("Closed stream");} jpayne@68: if(verbose){System.err.println("Processed "+readsProcessed+" reads.");} jpayne@68: jpayne@68: jpayne@68: return counts; jpayne@68: } jpayne@68: jpayne@68: public static void addRead(final Read r, final KCountArray count, final int k, final long mask, boolean rcomp){ jpayne@68: int len=0; jpayne@68: long kmer=0; jpayne@68: byte[] bases=r.bases; jpayne@68: byte[] quals=r.quality; jpayne@68: for(int i=0; i=k){ jpayne@68: keysCounted++; jpayne@68: // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer)); jpayne@68: count.increment(kmer); jpayne@68: // System.out.println(" -> "+count.read(kmer)); jpayne@68: // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]); jpayne@68: // array[(int)kmer]++; jpayne@68: // System.out.println(" -> "+array[(int)kmer]+"\n"); jpayne@68: // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: if(rcomp){ jpayne@68: r.reverseComplement(); jpayne@68: addRead(r, count, k, mask, false); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: public static void addReadSplit(final Read r, final KCountArray count, final int k1, final int k2, final long mask1, final long mask2, final int gap, boolean rcomp){ jpayne@68: int len=0; jpayne@68: int shift=k2*2; jpayne@68: long kmer1=0; jpayne@68: long kmer2=0; jpayne@68: byte[] bases=r.bases; jpayne@68: byte[] quals=r.quality; jpayne@68: jpayne@68: assert(kmer1>=kmer2); jpayne@68: jpayne@68: // assert(false) : k1+", "+k2+", "+mask1+", "+mask2+", "+gap; jpayne@68: jpayne@68: for(int i=0, j=i+k1+gap; j=k1){ jpayne@68: keysCounted++; jpayne@68: // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer)); jpayne@68: jpayne@68: long key=(kmer1< "+count.read(kmer)); jpayne@68: // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]); jpayne@68: // array[(int)kmer]++; jpayne@68: // System.out.println(" -> "+array[(int)kmer]+"\n"); jpayne@68: // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: if(rcomp){ jpayne@68: r.reverseComplement(); jpayne@68: addReadSplit(r, count, k1, k2, mask1, mask2, gap, false); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: public static void addReadSplit(final byte[] bases, final KCountArray count, final int k1, final int k2, final long mask1, final long mask2, final int gap, boolean rcomp){ jpayne@68: int len=0; jpayne@68: int shift=k2*2; jpayne@68: long kmer1=0; jpayne@68: long kmer2=0; jpayne@68: byte[] quals=null; jpayne@68: jpayne@68: assert(kmer1>=kmer2); jpayne@68: jpayne@68: // assert(false) : k1+", "+k2+", "+mask1+", "+mask2+", "+gap; jpayne@68: jpayne@68: for(int i=0, j=i+k1+gap; j=k1){ jpayne@68: keysCounted++; jpayne@68: // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer)); jpayne@68: jpayne@68: long key=(kmer1< "+count.read(kmer)); jpayne@68: // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]); jpayne@68: // array[(int)kmer]++; jpayne@68: // System.out.println(" -> "+array[(int)kmer]+"\n"); jpayne@68: // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: if(rcomp){ jpayne@68: AminoAcid.reverseComplementBasesInPlace(bases); jpayne@68: addReadSplit(bases, count, k1, k2, mask1, mask2, gap, false); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: }