jpayne@68
|
1 package bloom;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.Locale;
|
jpayne@68
|
5
|
jpayne@68
|
6 import dna.AminoAcid;
|
jpayne@68
|
7 import fileIO.FileFormat;
|
jpayne@68
|
8 import fileIO.ReadWrite;
|
jpayne@68
|
9 import shared.Timer;
|
jpayne@68
|
10 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
11 import stream.FastaReadInputStream;
|
jpayne@68
|
12 import stream.Read;
|
jpayne@68
|
13 import structures.ListNum;
|
jpayne@68
|
14
|
jpayne@68
|
15 /**
|
jpayne@68
|
16 * @author Brian Bushnell
|
jpayne@68
|
17 * @date Jul 5, 2012
|
jpayne@68
|
18 *
|
jpayne@68
|
19 */
|
jpayne@68
|
20 public class KmerCount3 extends KmerCountAbstract {
|
jpayne@68
|
21
|
jpayne@68
|
22 public static void main(String[] args){
|
jpayne@68
|
23
|
jpayne@68
|
24 Timer t=new Timer();
|
jpayne@68
|
25
|
jpayne@68
|
26 String fname1=args[0];
|
jpayne@68
|
27 String fname2=(args.length>3 || args[1].contains(".") ? args[1] : null);
|
jpayne@68
|
28 int k=Integer.parseInt(args[args.length-2]);
|
jpayne@68
|
29 int cbits=Integer.parseInt(args[args.length-1]);
|
jpayne@68
|
30
|
jpayne@68
|
31 KCountArray2 count=null;
|
jpayne@68
|
32
|
jpayne@68
|
33 if(fileIO.FileFormat.hasFastaExtension(fname1)){
|
jpayne@68
|
34 FastaReadInputStream.MIN_READ_LEN=k;
|
jpayne@68
|
35 }
|
jpayne@68
|
36 count=countFastq(fname1, fname2, k, cbits);
|
jpayne@68
|
37
|
jpayne@68
|
38
|
jpayne@68
|
39 t.stop();
|
jpayne@68
|
40 System.out.println("Finished counting; time = "+t);
|
jpayne@68
|
41
|
jpayne@68
|
42 long[] freq=count.transformToFrequency();
|
jpayne@68
|
43
|
jpayne@68
|
44 // System.out.println(count+"\n");
|
jpayne@68
|
45 // System.out.println(Arrays.toString(freq)+"\n");
|
jpayne@68
|
46
|
jpayne@68
|
47 long sum=sum(freq);
|
jpayne@68
|
48 System.out.println("Kmer fraction:");
|
jpayne@68
|
49 int lim1=8, lim2=16;
|
jpayne@68
|
50 for(int i=0; i<lim1; i++){
|
jpayne@68
|
51 String prefix=i+"";
|
jpayne@68
|
52 while(prefix.length()<8){prefix=prefix+" ";}
|
jpayne@68
|
53 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*freq[i]/(double)sum))+"\t"+freq[i]);
|
jpayne@68
|
54 }
|
jpayne@68
|
55 while(lim1<=freq.length){
|
jpayne@68
|
56 int x=0;
|
jpayne@68
|
57 for(int i=lim1; i<lim2; i++){
|
jpayne@68
|
58 x+=freq[i];
|
jpayne@68
|
59 }
|
jpayne@68
|
60 String prefix=lim1+"-"+(lim2-1);
|
jpayne@68
|
61 if(lim2>=freq.length){prefix=lim1+"+";}
|
jpayne@68
|
62 while(prefix.length()<8){prefix=prefix+" ";}
|
jpayne@68
|
63 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum))+"\t"+x);
|
jpayne@68
|
64 lim1*=2;
|
jpayne@68
|
65 lim2=min(lim2*2, freq.length);
|
jpayne@68
|
66 }
|
jpayne@68
|
67 }
|
jpayne@68
|
68
|
jpayne@68
|
69 public static KCountArray2 countFastq(String reads1, String reads2, int k, int cbits){
|
jpayne@68
|
70 assert(k>=1 && k<20);
|
jpayne@68
|
71 final int kbits=2*k;
|
jpayne@68
|
72 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
|
jpayne@68
|
73 final long cells=mask+1;
|
jpayne@68
|
74 if(verbose){System.err.println("k="+k+", kbits="+kbits+", cells="+cells+", mask="+Long.toHexString(mask));}
|
jpayne@68
|
75 final KCountArray2 count=new KCountArray2(cells, cbits);
|
jpayne@68
|
76
|
jpayne@68
|
77 final ConcurrentReadInputStream cris;
|
jpayne@68
|
78 {
|
jpayne@68
|
79 FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
80 FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
81 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
|
jpayne@68
|
82 cris.start(); //4567
|
jpayne@68
|
83 }
|
jpayne@68
|
84
|
jpayne@68
|
85 assert(cris!=null) : reads1;
|
jpayne@68
|
86 System.err.println("Started cris");
|
jpayne@68
|
87 boolean paired=cris.paired();
|
jpayne@68
|
88 System.err.println("Paired: "+paired);
|
jpayne@68
|
89
|
jpayne@68
|
90 long kmer=0; //current kmer
|
jpayne@68
|
91 int len=0; //distance since last contig start or ambiguous base
|
jpayne@68
|
92
|
jpayne@68
|
93 {
|
jpayne@68
|
94 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
95 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
96
|
jpayne@68
|
97 if(reads!=null && !reads.isEmpty()){
|
jpayne@68
|
98 Read r=reads.get(0);
|
jpayne@68
|
99 assert(paired==(r.mate!=null));
|
jpayne@68
|
100 }
|
jpayne@68
|
101
|
jpayne@68
|
102 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
103 //System.err.println("reads.size()="+reads.size());
|
jpayne@68
|
104 for(Read r : reads){
|
jpayne@68
|
105 readsProcessed++;
|
jpayne@68
|
106
|
jpayne@68
|
107 len=0;
|
jpayne@68
|
108 kmer=0;
|
jpayne@68
|
109 byte[] bases=r.bases;
|
jpayne@68
|
110 byte[] quals=r.quality;
|
jpayne@68
|
111 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
112 byte b=bases[i];
|
jpayne@68
|
113 int x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
114 if(x<0 || quals[i]<minQuality){
|
jpayne@68
|
115 len=0;
|
jpayne@68
|
116 kmer=0;
|
jpayne@68
|
117 }else{
|
jpayne@68
|
118 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
119 len++;
|
jpayne@68
|
120 if(len>=k){
|
jpayne@68
|
121 // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer));
|
jpayne@68
|
122 count.increment(kmer, 1);
|
jpayne@68
|
123 // System.out.println(" -> "+count.read(kmer));
|
jpayne@68
|
124 // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]);
|
jpayne@68
|
125 // array[(int)kmer]++;
|
jpayne@68
|
126 // System.out.println(" -> "+array[(int)kmer]+"\n");
|
jpayne@68
|
127 // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3);
|
jpayne@68
|
128 }
|
jpayne@68
|
129 }
|
jpayne@68
|
130 }
|
jpayne@68
|
131
|
jpayne@68
|
132 if(r.mate!=null){
|
jpayne@68
|
133 len=0;
|
jpayne@68
|
134 kmer=0;
|
jpayne@68
|
135 bases=r.mate.bases;
|
jpayne@68
|
136 quals=r.mate.quality;
|
jpayne@68
|
137 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
138 byte b=bases[i];
|
jpayne@68
|
139 int x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
140 if(x<0 || quals[i]<minQuality){
|
jpayne@68
|
141 len=0;
|
jpayne@68
|
142 kmer=0;
|
jpayne@68
|
143 }else{
|
jpayne@68
|
144 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
145 len++;
|
jpayne@68
|
146 if(len>=k){
|
jpayne@68
|
147 count.increment(kmer, 1);
|
jpayne@68
|
148 }
|
jpayne@68
|
149 }
|
jpayne@68
|
150 }
|
jpayne@68
|
151 }
|
jpayne@68
|
152
|
jpayne@68
|
153 }
|
jpayne@68
|
154 //System.err.println("returning list");
|
jpayne@68
|
155 cris.returnList(ln);
|
jpayne@68
|
156 //System.err.println("fetching list");
|
jpayne@68
|
157 ln=cris.nextList();
|
jpayne@68
|
158 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
159 }
|
jpayne@68
|
160 System.err.println("Finished reading");
|
jpayne@68
|
161 cris.returnList(ln);
|
jpayne@68
|
162 System.err.println("Returned list");
|
jpayne@68
|
163 ReadWrite.closeStream(cris);
|
jpayne@68
|
164 System.err.println("Closed stream");
|
jpayne@68
|
165 System.err.println("Processed "+readsProcessed+" reads.");
|
jpayne@68
|
166 }
|
jpayne@68
|
167
|
jpayne@68
|
168 return count;
|
jpayne@68
|
169 }
|
jpayne@68
|
170
|
jpayne@68
|
171 }
|