jpayne@68
|
1 package bloom;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.BitSet;
|
jpayne@68
|
5 import java.util.Locale;
|
jpayne@68
|
6
|
jpayne@68
|
7 import dna.AminoAcid;
|
jpayne@68
|
8 import fileIO.FileFormat;
|
jpayne@68
|
9 import shared.Timer;
|
jpayne@68
|
10 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
11 import stream.FastaReadInputStream;
|
jpayne@68
|
12 import stream.Read;
|
jpayne@68
|
13 import structures.ListNum;
|
jpayne@68
|
14
|
jpayne@68
|
15 /**
|
jpayne@68
|
16 * @author Brian Bushnell
|
jpayne@68
|
17 * @date Jul 5, 2012
|
jpayne@68
|
18 *
|
jpayne@68
|
19 */
|
jpayne@68
|
20 public class KmerCount5 extends KmerCountAbstract {
|
jpayne@68
|
21
|
jpayne@68
|
22 public static void main(String[] args){
|
jpayne@68
|
23
|
jpayne@68
|
24 Timer t=new Timer();
|
jpayne@68
|
25
|
jpayne@68
|
26 String fname1=args[0];
|
jpayne@68
|
27 String fname2=(args.length>1 ? args[1] : null);
|
jpayne@68
|
28 int k=14;
|
jpayne@68
|
29 int cbits=16;
|
jpayne@68
|
30 int gap=0;
|
jpayne@68
|
31
|
jpayne@68
|
32 for(int i=2; i<args.length; i++){
|
jpayne@68
|
33 final String arg=args[i];
|
jpayne@68
|
34 final String[] split=arg.split("=");
|
jpayne@68
|
35 String a=split[0].toLowerCase();
|
jpayne@68
|
36 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
37
|
jpayne@68
|
38 if(a.equals("k") || a.equals("kmer")){
|
jpayne@68
|
39 k=Integer.parseInt(b);
|
jpayne@68
|
40 }else if(a.startsWith("cbits") || a.startsWith("cellbits")){
|
jpayne@68
|
41 cbits=Integer.parseInt(b);
|
jpayne@68
|
42 }else if(a.startsWith("gap")){
|
jpayne@68
|
43 gap=Integer.parseInt(b);
|
jpayne@68
|
44 }else{
|
jpayne@68
|
45 throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
46 }
|
jpayne@68
|
47 }
|
jpayne@68
|
48
|
jpayne@68
|
49 KCountArray count=null;
|
jpayne@68
|
50
|
jpayne@68
|
51 if(fileIO.FileFormat.hasFastaExtension(fname1)){
|
jpayne@68
|
52 assert(!FastaReadInputStream.SPLIT_READS);
|
jpayne@68
|
53 FastaReadInputStream.MIN_READ_LEN=k;
|
jpayne@68
|
54 }
|
jpayne@68
|
55
|
jpayne@68
|
56 if(gap==0){
|
jpayne@68
|
57 count=count(fname1, fname2, k, cbits, true);
|
jpayne@68
|
58 }else{
|
jpayne@68
|
59 count=countFastqSplit(fname1, fname2, (k+1)/2, k/2, gap, cbits, true, null);
|
jpayne@68
|
60 }
|
jpayne@68
|
61
|
jpayne@68
|
62
|
jpayne@68
|
63 t.stop();
|
jpayne@68
|
64 System.out.println("Finished counting; time = "+t);
|
jpayne@68
|
65
|
jpayne@68
|
66 printStatistics(count);
|
jpayne@68
|
67
|
jpayne@68
|
68 }
|
jpayne@68
|
69
|
jpayne@68
|
70 public static void printStatistics(KCountArray count){
|
jpayne@68
|
71 long[] freq=count.transformToFrequency();
|
jpayne@68
|
72
|
jpayne@68
|
73 // System.out.println(count+"\n");
|
jpayne@68
|
74 // System.out.println(Arrays.toString(freq)+"\n");
|
jpayne@68
|
75
|
jpayne@68
|
76 long sum=sum(freq);
|
jpayne@68
|
77 System.out.println("Kmer fraction:");
|
jpayne@68
|
78 int lim1=8, lim2=16;
|
jpayne@68
|
79 for(int i=0; i<lim1; i++){
|
jpayne@68
|
80 String prefix=i+"";
|
jpayne@68
|
81 while(prefix.length()<8){prefix=prefix+" ";}
|
jpayne@68
|
82 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*freq[i]/(double)sum))+"\t"+freq[i]);
|
jpayne@68
|
83 }
|
jpayne@68
|
84 while(lim1<=freq.length){
|
jpayne@68
|
85 int x=0;
|
jpayne@68
|
86 for(int i=lim1; i<lim2; i++){
|
jpayne@68
|
87 x+=freq[i];
|
jpayne@68
|
88 }
|
jpayne@68
|
89 String prefix=lim1+"-"+(lim2-1);
|
jpayne@68
|
90 if(lim2>=freq.length){prefix=lim1+"+";}
|
jpayne@68
|
91 while(prefix.length()<8){prefix=prefix+" ";}
|
jpayne@68
|
92 System.out.println(prefix+"\t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum))+"\t"+x);
|
jpayne@68
|
93 lim1*=2;
|
jpayne@68
|
94 lim2=min(lim2*2, freq.length);
|
jpayne@68
|
95 }
|
jpayne@68
|
96
|
jpayne@68
|
97 long sum2=sum-freq[0];
|
jpayne@68
|
98 long x=freq[1];
|
jpayne@68
|
99 System.out.println();
|
jpayne@68
|
100 System.out.println("Keys Counted: \t \t"+keysCounted);
|
jpayne@68
|
101 System.out.println("Unique: \t \t"+sum2);
|
jpayne@68
|
102 System.out.println("Avg Sites/Key: \t \t"+String.format(Locale.ROOT, "%.3f ",(keysCounted*1d/sum2)));
|
jpayne@68
|
103 System.out.println();
|
jpayne@68
|
104 System.out.println("Singleton: \t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum2))+"\t"+x);
|
jpayne@68
|
105 x=sum2-x;
|
jpayne@68
|
106 System.out.println("Useful: \t"+String.format(Locale.ROOT, "%.3f%% ",(100l*x/(double)sum2))+"\t"+x);
|
jpayne@68
|
107 }
|
jpayne@68
|
108
|
jpayne@68
|
109 public static KCountArray count(String reads1, String reads2, int k, int cbits, boolean rcomp){
|
jpayne@68
|
110 return count(reads1, reads2, k, cbits, rcomp, null);
|
jpayne@68
|
111 }
|
jpayne@68
|
112
|
jpayne@68
|
113 public static KCountArray count(String reads1, String reads2, int k, int cbits, boolean rcomp, KCountArray count){
|
jpayne@68
|
114 assert(k<32 && k>=1 && (count!=null || k<20));
|
jpayne@68
|
115 final int kbits=2*k;
|
jpayne@68
|
116 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
|
jpayne@68
|
117
|
jpayne@68
|
118 if(count==null){
|
jpayne@68
|
119 final long cells=1L<<kbits;
|
jpayne@68
|
120 if(verbose){System.err.println("k="+k+", kbits="+kbits+", cells="+cells+", mask="+Long.toHexString(mask));}
|
jpayne@68
|
121 count=KCountArray.makeNew(cells, cbits);
|
jpayne@68
|
122 }
|
jpayne@68
|
123
|
jpayne@68
|
124 final ConcurrentReadInputStream cris;
|
jpayne@68
|
125 {
|
jpayne@68
|
126 FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
127 FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
128 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
|
jpayne@68
|
129 cris.start(); //4567
|
jpayne@68
|
130 }
|
jpayne@68
|
131
|
jpayne@68
|
132 assert(cris!=null) : reads1;
|
jpayne@68
|
133 System.err.println("Started cris");
|
jpayne@68
|
134 boolean paired=cris.paired();
|
jpayne@68
|
135 if(verbose){System.err.println("Paired: "+paired);}
|
jpayne@68
|
136
|
jpayne@68
|
137 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
138 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
139
|
jpayne@68
|
140 if(reads!=null && !reads.isEmpty()){
|
jpayne@68
|
141 Read r=reads.get(0);
|
jpayne@68
|
142 assert(paired==(r.mate!=null));
|
jpayne@68
|
143 }
|
jpayne@68
|
144
|
jpayne@68
|
145 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
146 //System.err.println("reads.size()="+reads.size());
|
jpayne@68
|
147 for(Read r : reads){
|
jpayne@68
|
148 readsProcessed++;
|
jpayne@68
|
149
|
jpayne@68
|
150 addRead(r, count, k, mask, rcomp);
|
jpayne@68
|
151 if(r.mate!=null){
|
jpayne@68
|
152 addRead(r.mate, count, k, mask, rcomp);
|
jpayne@68
|
153 }
|
jpayne@68
|
154
|
jpayne@68
|
155 }
|
jpayne@68
|
156 //System.err.println("returning list");
|
jpayne@68
|
157 cris.returnList(ln);
|
jpayne@68
|
158 //System.err.println("fetching list");
|
jpayne@68
|
159 ln=cris.nextList();
|
jpayne@68
|
160 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
161 }
|
jpayne@68
|
162 if(verbose){System.err.println("Finished reading");}
|
jpayne@68
|
163 cris.returnList(ln);
|
jpayne@68
|
164 if(verbose){System.err.println("Returned list");}
|
jpayne@68
|
165 cris.close();
|
jpayne@68
|
166 if(verbose){System.err.println("Closed stream");}
|
jpayne@68
|
167 if(verbose){System.err.println("Processed "+readsProcessed+" reads.");}
|
jpayne@68
|
168
|
jpayne@68
|
169
|
jpayne@68
|
170 return count;
|
jpayne@68
|
171 }
|
jpayne@68
|
172
|
jpayne@68
|
173
|
jpayne@68
|
174
|
jpayne@68
|
175
|
jpayne@68
|
176 public static KCountArray count(final String reads1, final String reads2, final int k, final int cbits, final boolean rcomp,
|
jpayne@68
|
177 KCountArray counts, final KCountArray trusted, final long maxReads, final int thresh, final int detectStepsize, final boolean conservative){
|
jpayne@68
|
178
|
jpayne@68
|
179 assert(k<32 && k>=1 && (counts!=null || k<20));
|
jpayne@68
|
180 final int kbits=2*k;
|
jpayne@68
|
181 final long mask=(kbits>63 ? -1L : ~((-1L)<<kbits));
|
jpayne@68
|
182
|
jpayne@68
|
183 // System.out.println("k="+k+", kbits="+kbits+", mask="+Long.toHexString(mask)+", thresh="+thresh);
|
jpayne@68
|
184 // System.out.println("\ntrusted=\n"+trusted);
|
jpayne@68
|
185 // System.out.println("\ncount=\n"+count);
|
jpayne@68
|
186
|
jpayne@68
|
187 if(counts==null){
|
jpayne@68
|
188 final long cells=1L<<kbits;
|
jpayne@68
|
189 if(verbose){System.err.println("k="+k+", kbits="+kbits+", cells="+cells+", mask="+Long.toHexString(mask));}
|
jpayne@68
|
190 counts=KCountArray.makeNew(cells, cbits);
|
jpayne@68
|
191 }
|
jpayne@68
|
192
|
jpayne@68
|
193 final ConcurrentReadInputStream cris;
|
jpayne@68
|
194 {
|
jpayne@68
|
195 FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
196 FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
197 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
|
jpayne@68
|
198 cris.start(); //4567
|
jpayne@68
|
199 }
|
jpayne@68
|
200
|
jpayne@68
|
201 assert(cris!=null) : reads1;
|
jpayne@68
|
202 System.err.println("Started cris");
|
jpayne@68
|
203 boolean paired=cris.paired();
|
jpayne@68
|
204 if(verbose){System.err.println("Paired: "+paired);}
|
jpayne@68
|
205
|
jpayne@68
|
206 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
207 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
208
|
jpayne@68
|
209 if(reads!=null && !reads.isEmpty()){
|
jpayne@68
|
210 Read r=reads.get(0);
|
jpayne@68
|
211 assert(paired==(r.mate!=null));
|
jpayne@68
|
212 }
|
jpayne@68
|
213
|
jpayne@68
|
214
|
jpayne@68
|
215 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
216 //System.err.println("reads.size()="+reads.size());
|
jpayne@68
|
217 for(Read r : reads){
|
jpayne@68
|
218
|
jpayne@68
|
219 Read r2=r.mate;
|
jpayne@68
|
220 {
|
jpayne@68
|
221 if(trusted!=null){
|
jpayne@68
|
222 BitSet bs=(conservative ? ErrorCorrect.detectErrorsBulk(r, trusted, k, thresh, detectStepsize) :
|
jpayne@68
|
223 ErrorCorrect.detectTrusted(r, trusted, k, thresh, detectStepsize));
|
jpayne@68
|
224 // System.out.println("\n"+toString(bs, r.length()));
|
jpayne@68
|
225 // System.out.println(new String(r.bases));
|
jpayne@68
|
226 for(int i=bs.nextClearBit(0); i<r.length(); i=bs.nextClearBit(i+1)){
|
jpayne@68
|
227 r.bases[i]='N';
|
jpayne@68
|
228 r.quality[i]=0;
|
jpayne@68
|
229 }
|
jpayne@68
|
230 // System.out.println(new String(r.bases));
|
jpayne@68
|
231 // System.out.println("used = "+String.format(Locale.ROOT, "%.3f%%",count.usedFraction()*100));
|
jpayne@68
|
232 // System.out.println("used = "+((KCountArray4)count).cellsUsed());
|
jpayne@68
|
233 // if(bs.length()<r.length()){r=null;}
|
jpayne@68
|
234 }
|
jpayne@68
|
235 // if(r!=null){KmerCount5.addRead(r, count, k, mask, rcomp);}
|
jpayne@68
|
236 KmerCount5.addRead(r, counts, k, mask, rcomp);
|
jpayne@68
|
237 }
|
jpayne@68
|
238 if(r2!=null){
|
jpayne@68
|
239 if(trusted!=null){
|
jpayne@68
|
240 BitSet bs=(conservative ? ErrorCorrect.detectErrorsBulk(r2, trusted, k, thresh, detectStepsize) :
|
jpayne@68
|
241 ErrorCorrect.detectTrusted(r2, trusted, k, thresh, detectStepsize));
|
jpayne@68
|
242 for(int i=bs.nextClearBit(0); i<r2.length(); i=bs.nextClearBit(i+1)){
|
jpayne@68
|
243 r2.bases[i]='N';
|
jpayne@68
|
244 r2.quality[i]=0;
|
jpayne@68
|
245 }
|
jpayne@68
|
246 }
|
jpayne@68
|
247 KmerCount5.addRead(r2, counts, k, mask, rcomp);
|
jpayne@68
|
248 }
|
jpayne@68
|
249
|
jpayne@68
|
250 }
|
jpayne@68
|
251 //System.err.println("returning list");
|
jpayne@68
|
252 cris.returnList(ln);
|
jpayne@68
|
253 //System.err.println("fetching list");
|
jpayne@68
|
254 ln=cris.nextList();
|
jpayne@68
|
255 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
256 }
|
jpayne@68
|
257
|
jpayne@68
|
258 if(verbose){System.err.println("Finished reading");}
|
jpayne@68
|
259 cris.returnList(ln);
|
jpayne@68
|
260 if(verbose){System.err.println("Returned list");}
|
jpayne@68
|
261 cris.close();
|
jpayne@68
|
262 if(verbose){System.err.println("Closed stream");}
|
jpayne@68
|
263
|
jpayne@68
|
264 // System.out.println("*** after ***");
|
jpayne@68
|
265 // System.out.println("\ntrusted=\n"+trusted);
|
jpayne@68
|
266 // System.out.println("\ncount=\n"+count);
|
jpayne@68
|
267
|
jpayne@68
|
268 return counts;
|
jpayne@68
|
269 }
|
jpayne@68
|
270
|
jpayne@68
|
271
|
jpayne@68
|
272 public static KCountArray countFastqSplit(String reads1, String reads2, int k1, int k2, int gap, int cbits, boolean rcomp, KCountArray counts){
|
jpayne@68
|
273 int k=k1+k2;
|
jpayne@68
|
274 assert(k<32 && k>=1 && (counts!=null || k<20));
|
jpayne@68
|
275 assert(gap>=0);
|
jpayne@68
|
276 final int kbits1=2*k1;
|
jpayne@68
|
277 final int kbits2=2*k2;
|
jpayne@68
|
278 final long mask1=~((-1L)<<(kbits1));
|
jpayne@68
|
279 final long mask2=~((-1L)<<(kbits2));
|
jpayne@68
|
280
|
jpayne@68
|
281 if(counts==null){
|
jpayne@68
|
282 final long cells=1L<<(kbits1+kbits2);
|
jpayne@68
|
283 if(verbose){System.err.println("k1="+k1+", k2="+k2+", kbits1="+kbits1+", kbits2="+kbits2+", cells="+cells+
|
jpayne@68
|
284 ", mask1="+Long.toHexString(mask1)+", mask2="+Long.toHexString(mask2));}
|
jpayne@68
|
285 counts=KCountArray.makeNew(cells, cbits);
|
jpayne@68
|
286 }
|
jpayne@68
|
287
|
jpayne@68
|
288 final ConcurrentReadInputStream cris;
|
jpayne@68
|
289 {
|
jpayne@68
|
290 FileFormat ff1=FileFormat.testInput(reads1, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
291 FileFormat ff2=FileFormat.testInput(reads2, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
292 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ff1, ff2);
|
jpayne@68
|
293 cris.start(); //4567
|
jpayne@68
|
294 }
|
jpayne@68
|
295
|
jpayne@68
|
296 assert(cris!=null) : reads1;
|
jpayne@68
|
297 System.err.println("Started cris");
|
jpayne@68
|
298 boolean paired=cris.paired();
|
jpayne@68
|
299 if(verbose){System.err.println("Paired: "+paired);}
|
jpayne@68
|
300
|
jpayne@68
|
301 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
302 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
303
|
jpayne@68
|
304 if(reads!=null && !reads.isEmpty()){
|
jpayne@68
|
305 Read r=reads.get(0);
|
jpayne@68
|
306 assert(paired==(r.mate!=null));
|
jpayne@68
|
307 }
|
jpayne@68
|
308
|
jpayne@68
|
309 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
310 //System.err.println("reads.size()="+reads.size());
|
jpayne@68
|
311 for(Read r : reads){
|
jpayne@68
|
312 readsProcessed++;
|
jpayne@68
|
313
|
jpayne@68
|
314 addReadSplit(r, counts, k1, k2, mask1, mask2, gap, rcomp);
|
jpayne@68
|
315 if(r.mate!=null){
|
jpayne@68
|
316 addReadSplit(r.mate, counts, k1, k2, mask1, mask2, gap, rcomp);
|
jpayne@68
|
317 }
|
jpayne@68
|
318
|
jpayne@68
|
319 }
|
jpayne@68
|
320 //System.err.println("returning list");
|
jpayne@68
|
321 cris.returnList(ln);
|
jpayne@68
|
322 //System.err.println("fetching list");
|
jpayne@68
|
323 ln=cris.nextList();
|
jpayne@68
|
324 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
325 }
|
jpayne@68
|
326 if(verbose){System.err.println("Finished reading");}
|
jpayne@68
|
327 cris.returnList(ln);
|
jpayne@68
|
328 if(verbose){System.err.println("Returned list");}
|
jpayne@68
|
329 cris.close();
|
jpayne@68
|
330 if(verbose){System.err.println("Closed stream");}
|
jpayne@68
|
331 if(verbose){System.err.println("Processed "+readsProcessed+" reads.");}
|
jpayne@68
|
332
|
jpayne@68
|
333
|
jpayne@68
|
334 return counts;
|
jpayne@68
|
335 }
|
jpayne@68
|
336
|
jpayne@68
|
337 public static void addRead(final Read r, final KCountArray count, final int k, final long mask, boolean rcomp){
|
jpayne@68
|
338 int len=0;
|
jpayne@68
|
339 long kmer=0;
|
jpayne@68
|
340 byte[] bases=r.bases;
|
jpayne@68
|
341 byte[] quals=r.quality;
|
jpayne@68
|
342 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
343 byte b=bases[i];
|
jpayne@68
|
344 int x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
345 if(x<0 || (quals!=null && quals[i]<minQuality)){
|
jpayne@68
|
346 len=0;
|
jpayne@68
|
347 kmer=0;
|
jpayne@68
|
348 }else{
|
jpayne@68
|
349 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
350 len++;
|
jpayne@68
|
351 if(len>=k){
|
jpayne@68
|
352 keysCounted++;
|
jpayne@68
|
353 // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer));
|
jpayne@68
|
354 count.increment(kmer);
|
jpayne@68
|
355 // System.out.println(" -> "+count.read(kmer));
|
jpayne@68
|
356 // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]);
|
jpayne@68
|
357 // array[(int)kmer]++;
|
jpayne@68
|
358 // System.out.println(" -> "+array[(int)kmer]+"\n");
|
jpayne@68
|
359 // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3);
|
jpayne@68
|
360 }
|
jpayne@68
|
361 }
|
jpayne@68
|
362 }
|
jpayne@68
|
363 if(rcomp){
|
jpayne@68
|
364 r.reverseComplement();
|
jpayne@68
|
365 addRead(r, count, k, mask, false);
|
jpayne@68
|
366 }
|
jpayne@68
|
367 }
|
jpayne@68
|
368
|
jpayne@68
|
369 public static void addReadSplit(final Read r, final KCountArray count, final int k1, final int k2, final long mask1, final long mask2, final int gap, boolean rcomp){
|
jpayne@68
|
370 int len=0;
|
jpayne@68
|
371 int shift=k2*2;
|
jpayne@68
|
372 long kmer1=0;
|
jpayne@68
|
373 long kmer2=0;
|
jpayne@68
|
374 byte[] bases=r.bases;
|
jpayne@68
|
375 byte[] quals=r.quality;
|
jpayne@68
|
376
|
jpayne@68
|
377 assert(kmer1>=kmer2);
|
jpayne@68
|
378
|
jpayne@68
|
379 // assert(false) : k1+", "+k2+", "+mask1+", "+mask2+", "+gap;
|
jpayne@68
|
380
|
jpayne@68
|
381 for(int i=0, j=i+k1+gap; j<bases.length; i++, j++){
|
jpayne@68
|
382 int x1=AminoAcid.baseToNumber[bases[i]];
|
jpayne@68
|
383 int x2=AminoAcid.baseToNumber[bases[j]];
|
jpayne@68
|
384 if(x1<0 || x2<0 || (quals!=null && (quals[i]<minQuality || quals[j]<minQuality))){
|
jpayne@68
|
385 len=0;
|
jpayne@68
|
386 kmer1=0;
|
jpayne@68
|
387 kmer2=0;
|
jpayne@68
|
388 }else{
|
jpayne@68
|
389 kmer1=((kmer1<<2)|x1)&mask1;
|
jpayne@68
|
390 kmer2=((kmer2<<2)|x2)&mask2;
|
jpayne@68
|
391 len++;
|
jpayne@68
|
392 if(len>=k1){
|
jpayne@68
|
393 keysCounted++;
|
jpayne@68
|
394 // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer));
|
jpayne@68
|
395
|
jpayne@68
|
396 long key=(kmer1<<shift)|kmer2;
|
jpayne@68
|
397 // System.err.println(Long.toHexString(key));
|
jpayne@68
|
398 count.increment(key);
|
jpayne@68
|
399 // System.out.println(" -> "+count.read(kmer));
|
jpayne@68
|
400 // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]);
|
jpayne@68
|
401 // array[(int)kmer]++;
|
jpayne@68
|
402 // System.out.println(" -> "+array[(int)kmer]+"\n");
|
jpayne@68
|
403 // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3);
|
jpayne@68
|
404 }
|
jpayne@68
|
405 }
|
jpayne@68
|
406 }
|
jpayne@68
|
407 if(rcomp){
|
jpayne@68
|
408 r.reverseComplement();
|
jpayne@68
|
409 addReadSplit(r, count, k1, k2, mask1, mask2, gap, false);
|
jpayne@68
|
410 }
|
jpayne@68
|
411 }
|
jpayne@68
|
412
|
jpayne@68
|
413 public static void addReadSplit(final byte[] bases, final KCountArray count, final int k1, final int k2, final long mask1, final long mask2, final int gap, boolean rcomp){
|
jpayne@68
|
414 int len=0;
|
jpayne@68
|
415 int shift=k2*2;
|
jpayne@68
|
416 long kmer1=0;
|
jpayne@68
|
417 long kmer2=0;
|
jpayne@68
|
418 byte[] quals=null;
|
jpayne@68
|
419
|
jpayne@68
|
420 assert(kmer1>=kmer2);
|
jpayne@68
|
421
|
jpayne@68
|
422 // assert(false) : k1+", "+k2+", "+mask1+", "+mask2+", "+gap;
|
jpayne@68
|
423
|
jpayne@68
|
424 for(int i=0, j=i+k1+gap; j<bases.length; i++, j++){
|
jpayne@68
|
425 int x1=AminoAcid.baseToNumber[bases[i]];
|
jpayne@68
|
426 int x2=AminoAcid.baseToNumber[bases[j]];
|
jpayne@68
|
427 if(x1<0 || x2<0 || (quals!=null && (quals[i]<minQuality || quals[j]<minQuality))){
|
jpayne@68
|
428 len=0;
|
jpayne@68
|
429 kmer1=0;
|
jpayne@68
|
430 kmer2=0;
|
jpayne@68
|
431 }else{
|
jpayne@68
|
432 kmer1=((kmer1<<2)|x1)&mask1;
|
jpayne@68
|
433 kmer2=((kmer2<<2)|x2)&mask2;
|
jpayne@68
|
434 len++;
|
jpayne@68
|
435 if(len>=k1){
|
jpayne@68
|
436 keysCounted++;
|
jpayne@68
|
437 // System.out.print("Incrementing "+Long.toHexString(kmer)+": "+count.read(kmer));
|
jpayne@68
|
438
|
jpayne@68
|
439 long key=(kmer1<<shift)|kmer2;
|
jpayne@68
|
440 System.out.println(Long.toHexString(kmer1));
|
jpayne@68
|
441 System.out.println(Long.toHexString(kmer2));
|
jpayne@68
|
442 System.out.println(Long.toHexString(key));
|
jpayne@68
|
443 count.increment(key);
|
jpayne@68
|
444 // System.out.println(" -> "+count.read(kmer));
|
jpayne@68
|
445 // System.out.print("Incrementing array for "+Long.toHexString(kmer)+": "+array[(int)kmer]);
|
jpayne@68
|
446 // array[(int)kmer]++;
|
jpayne@68
|
447 // System.out.println(" -> "+array[(int)kmer]+"\n");
|
jpayne@68
|
448 // assert(array[(int)kmer]==count.read(kmer) || array[(int)kmer]>3);
|
jpayne@68
|
449 }
|
jpayne@68
|
450 }
|
jpayne@68
|
451 }
|
jpayne@68
|
452 if(rcomp){
|
jpayne@68
|
453 AminoAcid.reverseComplementBasesInPlace(bases);
|
jpayne@68
|
454 addReadSplit(bases, count, k1, k2, mask1, mask2, gap, false);
|
jpayne@68
|
455 }
|
jpayne@68
|
456 }
|
jpayne@68
|
457
|
jpayne@68
|
458 }
|