Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/shared/ReadStats.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package shared; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Arrays; | |
5 import java.util.Locale; | |
6 | |
7 import align2.QualityTools; | |
8 import dna.AminoAcid; | |
9 import fileIO.ByteStreamWriter; | |
10 import fileIO.TextStreamWriter; | |
11 import stream.Read; | |
12 import stream.SamLine; | |
13 import structures.ByteBuilder; | |
14 import structures.EntropyTracker; | |
15 import structures.LongList; | |
16 import structures.SuperLongList; | |
17 | |
18 | |
19 /** | |
20 * @author Brian Bushnell | |
21 * @date Mar 18, 2013 | |
22 * | |
23 */ | |
24 public class ReadStats { | |
25 | |
26 public ReadStats(){this(true);} | |
27 | |
28 public ReadStats(boolean addToList){ | |
29 if(addToList){ | |
30 synchronized(ReadStats.class){ | |
31 objectList.add(this); | |
32 } | |
33 } | |
34 | |
35 if(COLLECT_QUALITY_STATS){ | |
36 aqualArray=new long[2][127]; | |
37 qualLength=new long[2][MAXLEN]; | |
38 qualSum=new long[2][MAXLEN]; | |
39 qualSumDouble=new double[2][MAXLEN]; | |
40 bqualHistOverall=new long[127]; | |
41 }else{ | |
42 aqualArray=null; | |
43 qualLength=null; | |
44 qualSum=null; | |
45 qualSumDouble=null; | |
46 bqualHistOverall=null; | |
47 } | |
48 | |
49 if(BQUAL_HIST_FILE!=null){ | |
50 bqualHist=new long[2][MAXLEN][127]; | |
51 }else{ | |
52 bqualHist=null; | |
53 } | |
54 | |
55 if(QUAL_COUNT_HIST_FILE!=null){ | |
56 qcountHist=new long[2][127]; | |
57 }else{ | |
58 qcountHist=null; | |
59 } | |
60 | |
61 if(COLLECT_MATCH_STATS){ | |
62 matchSum=new long[2][MAXLEN]; | |
63 delSum=new long[2][MAXLEN]; | |
64 insSum=new long[2][MAXLEN]; | |
65 subSum=new long[2][MAXLEN]; | |
66 nSum=new long[2][MAXLEN]; | |
67 clipSum=new long[2][MAXLEN]; | |
68 otherSum=new long[2][MAXLEN]; | |
69 }else{ | |
70 matchSum=null; | |
71 delSum=null; | |
72 insSum=null; | |
73 subSum=null; | |
74 nSum=null; | |
75 clipSum=null; | |
76 otherSum=null; | |
77 } | |
78 | |
79 if(COLLECT_QUALITY_ACCURACY){ | |
80 qualMatch=new long[99]; | |
81 qualSub=new long[99]; | |
82 qualIns=new long[99]; | |
83 qualDel=new long[99]; | |
84 }else{ | |
85 qualMatch=null; | |
86 qualSub=null; | |
87 qualIns=null; | |
88 qualDel=null; | |
89 } | |
90 | |
91 if(COLLECT_INSERT_STATS){ | |
92 insertHist=new LongList(MAXLEN); | |
93 }else{ | |
94 insertHist=null; | |
95 } | |
96 | |
97 if(COLLECT_BASE_STATS){ | |
98 baseHist=new LongList[2][5]; | |
99 for(int i=0; i<baseHist.length; i++){ | |
100 for(int j=0; j<baseHist[i].length; j++){ | |
101 baseHist[i][j]=new LongList(400); | |
102 } | |
103 } | |
104 }else{ | |
105 baseHist=null; | |
106 } | |
107 | |
108 | |
109 if(COLLECT_INDEL_STATS){ | |
110 insHist=new LongList(100); | |
111 delHist=new LongList(100); | |
112 delHist2=new LongList(100); | |
113 }else{ | |
114 insHist=null; | |
115 delHist=null; | |
116 delHist2=null; | |
117 } | |
118 | |
119 if(COLLECT_GC_STATS){ | |
120 gcHist=new long[GC_BINS+1]; | |
121 }else{ | |
122 gcHist=null; | |
123 } | |
124 | |
125 if(COLLECT_ENTROPY_STATS){ | |
126 entropyHist=new long[ENTROPY_BINS+1]; | |
127 eTracker=new EntropyTracker(Shared.AMINO_IN, 0, true); | |
128 }else{ | |
129 entropyHist=null; | |
130 eTracker=null; | |
131 } | |
132 | |
133 if(COLLECT_ERROR_STATS){ | |
134 errorHist=new LongList(100); | |
135 }else{ | |
136 errorHist=null; | |
137 } | |
138 | |
139 if(COLLECT_LENGTH_STATS){ | |
140 lengthHist=new SuperLongList(20000); | |
141 }else{ | |
142 lengthHist=null; | |
143 } | |
144 | |
145 if(COLLECT_IDENTITY_STATS){ | |
146 idHist=new long[ID_BINS+1]; | |
147 idBaseHist=new long[ID_BINS+1]; | |
148 }else{ | |
149 idHist=null; | |
150 idBaseHist=null; | |
151 } | |
152 | |
153 if(COLLECT_TIME_STATS){ | |
154 timeHist=new LongList(1001); | |
155 }else{ | |
156 timeHist=null; | |
157 } | |
158 | |
159 } | |
160 | |
161 public static ReadStats mergeAll(){ | |
162 if(objectList==null || objectList.isEmpty()){return merged=null;} | |
163 if(objectList.size()==1){return merged=objectList.get(0);} | |
164 | |
165 ReadStats x=new ReadStats(false); | |
166 for(ReadStats rs : objectList){ | |
167 x.read2Count+=rs.read2Count; | |
168 if(COLLECT_QUALITY_STATS){ | |
169 for(int i=0; i<MAXLEN; i++){ | |
170 x.qualLength[0][i]+=rs.qualLength[0][i]; | |
171 x.qualLength[1][i]+=rs.qualLength[1][i]; | |
172 x.qualSum[0][i]+=rs.qualSum[0][i]; | |
173 x.qualSum[1][i]+=rs.qualSum[1][i]; | |
174 x.qualSumDouble[0][i]+=rs.qualSumDouble[0][i]; | |
175 x.qualSumDouble[1][i]+=rs.qualSumDouble[1][i]; | |
176 } | |
177 for(int i=0; i<x.aqualArray[0].length; i++){ | |
178 x.aqualArray[0][i]+=rs.aqualArray[0][i]; | |
179 x.aqualArray[1][i]+=rs.aqualArray[1][i]; | |
180 } | |
181 for(int i=0; i<x.bqualHistOverall.length; i++){ | |
182 x.bqualHistOverall[i]+=rs.bqualHistOverall[i]; | |
183 } | |
184 if(BQUAL_HIST_FILE!=null){ | |
185 for(int i=0; i<x.bqualHist.length; i++){ | |
186 for(int j=0; j<x.bqualHist[i].length; j++){ | |
187 for(int k=0; k<x.bqualHist[i][j].length; k++){ | |
188 x.bqualHist[i][j][k]+=rs.bqualHist[i][j][k]; | |
189 } | |
190 } | |
191 } | |
192 } | |
193 if(QUAL_COUNT_HIST_FILE!=null){ | |
194 for(int i=0; i<x.qcountHist.length; i++){ | |
195 for(int j=0; j<x.qcountHist[i].length; j++){ | |
196 x.qcountHist[i][j]+=rs.qcountHist[i][j]; | |
197 } | |
198 } | |
199 } | |
200 } | |
201 | |
202 if(COLLECT_MATCH_STATS){ | |
203 for(int i=0; i<MAXLEN; i++){ | |
204 x.matchSum[0][i]+=rs.matchSum[0][i]; | |
205 x.matchSum[1][i]+=rs.matchSum[1][i]; | |
206 x.delSum[0][i]+=rs.delSum[0][i]; | |
207 x.delSum[1][i]+=rs.delSum[1][i]; | |
208 x.insSum[0][i]+=rs.insSum[0][i]; | |
209 x.insSum[1][i]+=rs.insSum[1][i]; | |
210 x.subSum[0][i]+=rs.subSum[0][i]; | |
211 x.subSum[1][i]+=rs.subSum[1][i]; | |
212 x.nSum[0][i]+=rs.nSum[0][i]; | |
213 x.nSum[1][i]+=rs.nSum[1][i]; | |
214 x.clipSum[0][i]+=rs.clipSum[0][i]; | |
215 x.clipSum[1][i]+=rs.clipSum[1][i]; | |
216 x.otherSum[0][i]+=rs.otherSum[0][i]; | |
217 x.otherSum[1][i]+=rs.otherSum[1][i]; | |
218 } | |
219 } | |
220 if(COLLECT_INSERT_STATS){ | |
221 x.insertHist.incrementBy(rs.insertHist); | |
222 x.pairedCount+=rs.pairedCount; | |
223 x.unpairedCount+=rs.unpairedCount; | |
224 } | |
225 if(COLLECT_BASE_STATS){ | |
226 for(int i=0; i<rs.baseHist.length; i++){ | |
227 for(int j=0; j<rs.baseHist[i].length; j++){ | |
228 x.baseHist[i][j].incrementBy(rs.baseHist[i][j]); | |
229 } | |
230 } | |
231 } | |
232 if(COLLECT_QUALITY_ACCURACY){ | |
233 for(int i=0; i<x.qualMatch.length; i++){ | |
234 x.qualMatch[i]+=rs.qualMatch[i]; | |
235 x.qualSub[i]+=rs.qualSub[i]; | |
236 x.qualIns[i]+=rs.qualIns[i]; | |
237 x.qualDel[i]+=rs.qualDel[i]; | |
238 } | |
239 } | |
240 | |
241 | |
242 if(COLLECT_INDEL_STATS){ | |
243 x.delHist.incrementBy(rs.delHist); | |
244 x.delHist2.incrementBy(rs.delHist2); | |
245 x.insHist.incrementBy(rs.insHist); | |
246 } | |
247 | |
248 if(COLLECT_LENGTH_STATS){ | |
249 x.lengthHist.incrementBy(rs.lengthHist); | |
250 } | |
251 | |
252 | |
253 if(COLLECT_ERROR_STATS){ | |
254 x.errorHist.incrementBy(rs.errorHist); | |
255 } | |
256 | |
257 if(COLLECT_GC_STATS){ | |
258 for(int i=0; i<rs.gcHist.length; i++){ | |
259 x.gcHist[i]+=rs.gcHist[i]; | |
260 } | |
261 } | |
262 | |
263 if(COLLECT_ENTROPY_STATS){ | |
264 for(int i=0; i<rs.entropyHist.length; i++){ | |
265 x.entropyHist[i]+=rs.entropyHist[i]; | |
266 } | |
267 } | |
268 | |
269 if(COLLECT_IDENTITY_STATS){ | |
270 for(int i=0; i<rs.idHist.length; i++){ | |
271 x.idHist[i]+=rs.idHist[i]; | |
272 x.idBaseHist[i]+=rs.idBaseHist[i]; | |
273 } | |
274 } | |
275 | |
276 if(COLLECT_TIME_STATS){ | |
277 x.timeHist.incrementBy(rs.timeHist); | |
278 } | |
279 | |
280 x.gcMaxReadLen=Tools.max(x.gcMaxReadLen, rs.gcMaxReadLen); | |
281 x.idMaxReadLen=Tools.max(x.idMaxReadLen, rs.idMaxReadLen); | |
282 } | |
283 | |
284 merged=x; | |
285 return x; | |
286 } | |
287 | |
288 public void addToQualityHistogram(final Read r){ | |
289 if(r==null){return;} | |
290 addToQualityHistogram2(r); | |
291 if(r.mate!=null){addToQualityHistogram2(r.mate);} | |
292 } | |
293 | |
294 private void addToQualityHistogram2(final Read r){ | |
295 final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum(); | |
296 if(r==null || r.quality==null || r.quality.length<1){return;} | |
297 byte[] quals=r.quality, bases=r.bases; | |
298 final Object obj=r.obj; | |
299 if(obj!=null && obj.getClass()==TrimRead.class){ | |
300 quals=(pairnum==0 ? ((TrimRead)obj).qual1 : ((TrimRead)obj).qual2); | |
301 bases=(pairnum==0 ? ((TrimRead)obj).bases1 : ((TrimRead)obj).bases2); | |
302 } | |
303 if(pairnum==1){read2Count++;} | |
304 addToQualityHistogram(quals, pairnum); | |
305 int x=Read.avgQualityByProbabilityInt(bases, quals, true, 0); | |
306 aqualArray[pairnum][x]++; | |
307 if(BQUAL_HIST_FILE!=null){ | |
308 addToBQualityHistogram(quals, pairnum); | |
309 } | |
310 if(QUAL_COUNT_HIST_FILE!=null){ | |
311 addToQCountHistogram(quals, pairnum); | |
312 } | |
313 } | |
314 | |
315 public void addToQualityHistogram(byte[] qual, int pairnum){ | |
316 if(qual==null || qual.length<1){return;} | |
317 final int limit=Tools.min(qual.length, MAXLEN); | |
318 final long[] ql=qualLength[pairnum], qs=qualSum[pairnum]; | |
319 final double[] qsd=qualSumDouble[pairnum]; | |
320 ql[limit-1]++; | |
321 for(int i=0; i<limit; i++){ | |
322 qs[i]+=qual[i]; | |
323 qsd[i]+=QualityTools.PROB_ERROR[qual[i]]; | |
324 } | |
325 for(byte q : qual){ | |
326 bqualHistOverall[q]++; | |
327 } | |
328 } | |
329 | |
330 private void addToBQualityHistogram(byte[] qual, int pairnum){ | |
331 if(qual==null || qual.length<1){return;} | |
332 final int limit=Tools.min(qual.length, MAXLEN); | |
333 final long[][] bqh=bqualHist[pairnum]; | |
334 for(int i=0; i<limit; i++){ | |
335 bqh[i][qual[i]]++; | |
336 } | |
337 } | |
338 | |
339 private void addToQCountHistogram(byte[] qual, int pairnum){ | |
340 if(qual==null || qual.length<1){return;} | |
341 final long[] qch=qcountHist[pairnum]; | |
342 for(byte q : qual){ | |
343 qch[q]++; | |
344 } | |
345 } | |
346 | |
347 public void addToQualityAccuracy(final Read r){ | |
348 if(r==null){return;} | |
349 addToQualityAccuracy(r, 0); | |
350 if(r.mate!=null){addToQualityAccuracy(r.mate, 1);} | |
351 } | |
352 | |
353 public void addToQualityAccuracy(final Read r, int pairnum){ | |
354 if(r==null || r.quality==null || r.quality.length<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;} | |
355 final byte[] bases=r.bases; | |
356 final byte[] qual=r.quality; | |
357 byte[] match=r.match; | |
358 | |
359 if(r.shortmatch()){match=Read.toLongMatchString(match);} | |
360 | |
361 final boolean plus=(r.strand()==0); | |
362 int bpos=0; | |
363 byte lastm='A'; | |
364 for(int mpos=0; mpos<match.length/* && rpos<limit*/; mpos++){ | |
365 byte b=bases[bpos]; | |
366 byte q=qual[bpos]; | |
367 byte m=match[plus ? mpos : match.length-mpos-1]; | |
368 | |
369 { | |
370 if(m=='m'){ | |
371 qualMatch[q]++; | |
372 }else if(m=='S'){ | |
373 qualSub[q]++; | |
374 }else if(m=='I'){ | |
375 if(AminoAcid.isFullyDefined(b)){qualIns[q]++;} | |
376 }else if(m=='N'){ | |
377 //do nothing | |
378 }else if(m=='C'){ | |
379 //do nothing | |
380 }else if(m=='V' || m=='i'){ | |
381 //do nothing | |
382 }else if(m=='D'){ | |
383 if(lastm!=m){ | |
384 int x=bpos, y=bpos-1; | |
385 if(x<qual.length){ | |
386 if(AminoAcid.isFullyDefined(bases[x])){ | |
387 qualDel[qual[x]]++; | |
388 } | |
389 } | |
390 if(y>=0){ | |
391 if(AminoAcid.isFullyDefined(bases[y])){ | |
392 qualDel[qual[y]]++; | |
393 } | |
394 } | |
395 } | |
396 bpos--; | |
397 }else if(m=='d'){ | |
398 bpos--; | |
399 }else{ | |
400 assert(!Tools.isDigit(m)) : ((char)m); | |
401 } | |
402 } | |
403 | |
404 bpos++; | |
405 lastm=m; | |
406 } | |
407 | |
408 } | |
409 | |
410 public void addToErrorHistogram(final Read r){ | |
411 if(r==null){return;} | |
412 addToErrorHistogram(r, 0); | |
413 if(r.mate!=null){addToErrorHistogram(r.mate, 1);} | |
414 } | |
415 | |
416 private void addToErrorHistogram(final Read r, int pairnum){ | |
417 if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;} | |
418 r.toLongMatchString(false); | |
419 int x=r.countSubs(); | |
420 errorHist.increment(x, 1); | |
421 } | |
422 | |
423 public void addToLengthHistogram(final Read r){ | |
424 if(r==null){return;} | |
425 addToLengthHistogram(r, 0); | |
426 if(r.mate!=null){addToLengthHistogram(r.mate, 1);} | |
427 } | |
428 | |
429 private void addToLengthHistogram(final Read r, int pairnum){ | |
430 if(r==null || r.bases==null){return;} | |
431 int x=r.length();//Tools.min(r.length(), MAXLENGTHLEN); Old style before SLL | |
432 lengthHist.increment(x, 1); | |
433 } | |
434 | |
435 public void addToGCHistogram(final Read r1){ | |
436 if(r1==null){return;} | |
437 final Read r2=r1.mate; | |
438 final int len1=r1.length(), len2=r1.mateLength(); | |
439 | |
440 final float gc1=(len1>0 ? r1.gc() : -1); | |
441 final float gc2=(len2>0 ? r2.gc() : -1); | |
442 if(usePairGC){ | |
443 final float gc; | |
444 if(r2==null){ | |
445 gc=gc1; | |
446 }else{ | |
447 gc=(gc1*len1+gc2*len2)/(len1+len2); | |
448 } | |
449 addToGCHistogram(gc, len1+len2); | |
450 }else{ | |
451 addToGCHistogram(gc1, len1); | |
452 addToGCHistogram(gc2, len2); | |
453 } | |
454 } | |
455 | |
456 private void addToGCHistogram(final float gc, final int len){ | |
457 if(gc<0 || len<1){return;} | |
458 gcHist[Tools.min(GC_BINS, (int)(gc*(GC_BINS+1)))]++; | |
459 gcMaxReadLen=Tools.max(len, gcMaxReadLen); | |
460 } | |
461 | |
462 public void addToEntropyHistogram(final Read r1){ | |
463 if(r1==null){return;} | |
464 final Read r2=r1.mate; | |
465 final int len1=r1.length(), len2=r1.mateLength(); | |
466 | |
467 final float entropy1=(len1>0 ? eTracker.averageEntropy(r1.bases, allowEntropyNs) : -1); | |
468 final float entropy2=(len2>0 ? eTracker.averageEntropy(r2.bases, allowEntropyNs) : -1); | |
469 if(/* usePairEntropy */ false){ | |
470 final float entropy; | |
471 if(r2==null){ | |
472 entropy=entropy1; | |
473 }else{ | |
474 entropy=(entropy1*len1+entropy2*len2)/(len1+len2); | |
475 } | |
476 addToEntropyHistogram(entropy, len1+len2); | |
477 }else{ | |
478 addToEntropyHistogram(entropy1, len1); | |
479 addToEntropyHistogram(entropy2, len2); | |
480 } | |
481 } | |
482 | |
483 private void addToEntropyHistogram(final float entropy, final int len){ | |
484 if(entropy<0 || len<1){return;} | |
485 entropyHist[Tools.min(ENTROPY_BINS, (int)(entropy*(ENTROPY_BINS+1)))]++; | |
486 } | |
487 | |
488 public void addToIdentityHistogram(final Read r){ | |
489 if(r==null){return;} | |
490 addToIdentityHistogram(r, 0); | |
491 if(r.mate!=null){addToIdentityHistogram(r.mate, 1);} | |
492 } | |
493 | |
494 private void addToIdentityHistogram(final Read r, int pairnum){ | |
495 if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;} | |
496 float id=r.identity(); | |
497 idHist[(int)(id*ID_BINS)]++; | |
498 idBaseHist[(int)(id*ID_BINS)]+=r.length(); | |
499 idMaxReadLen=Tools.max(r.length(), idMaxReadLen); | |
500 } | |
501 | |
502 public void addToTimeHistogram(final Read r){ | |
503 if(r==null){return;} | |
504 addToTimeHistogram(r, 0);//Time for pairs is the same. | |
505 } | |
506 | |
507 private void addToTimeHistogram(final Read r, int pairnum){ | |
508 if(r==null){return;} | |
509 assert(r.obj!=null && r.obj.getClass()==Long.class); | |
510 int x=(int)Tools.min(((Long)r.obj).longValue(), MAXTIMELEN); | |
511 timeHist.increment(x, 1); | |
512 } | |
513 | |
514 public boolean addToIndelHistogram(final Read r){ | |
515 if(r==null){return false;} | |
516 boolean success=addToIndelHistogram(r, 0); | |
517 if(r.mate!=null){success=addToIndelHistogram(r.mate, 1)|success;} | |
518 return success; | |
519 } | |
520 | |
521 /** Handles short match, long match, and reads with attached SamLines */ | |
522 private boolean addToIndelHistogram(final Read r, int pairnum){ | |
523 if(r==null || !r.mapped()){return false;} | |
524 if(r.samline!=null){ | |
525 boolean success=addToIndelHistogram(r.samline); | |
526 if(success){return true;} | |
527 } | |
528 if(r.match==null/* || r.discarded()*/){return false;} | |
529 final byte[] match=r.match; | |
530 | |
531 byte lastLetter='?'; | |
532 boolean digit=false; | |
533 int streak=0; | |
534 for(int mpos=0; mpos<match.length; mpos++){ | |
535 final byte m=match[mpos]; | |
536 | |
537 if(Tools.isDigit(m)){ | |
538 streak=streak*10+m-'0'; | |
539 digit=true; | |
540 }else{ | |
541 if(lastLetter==m){ | |
542 streak++; | |
543 }else{ | |
544 // assert(streak>0 || (streak==0 && lastm=='?')); | |
545 if(!digit){streak++;} | |
546 digit=false; | |
547 if(lastLetter=='D'){ | |
548 streak=Tools.min(streak, MAXDELLEN2); | |
549 if(streak<MAXDELLEN){delHist.increment(streak, 1);} | |
550 delHist2.increment(streak/100, 1); | |
551 // System.err.println("A. Del: "+streak+", "+MAXDELLEN+", "+MAXDELLEN2); | |
552 }else if(lastLetter=='I'){ | |
553 streak=Tools.min(streak, MAXINSLEN); | |
554 insHist.increment(streak, 1); | |
555 // System.err.println("A. Ins: "+streak+", "+MAXINSLEN); | |
556 } | |
557 streak=0; | |
558 } | |
559 lastLetter=m; | |
560 } | |
561 } | |
562 | |
563 {//Final symbol | |
564 if(!digit){streak++;} | |
565 digit=false; | |
566 if(lastLetter=='D'){ | |
567 streak=Tools.min(streak, MAXDELLEN2); | |
568 if(streak<MAXDELLEN){delHist.increment(streak, 1);} | |
569 delHist2.increment(streak/100, 1); | |
570 // System.err.println("A. Del: "+streak+", "+MAXDELLEN+", "+MAXDELLEN2); | |
571 }else if(lastLetter=='I'){ | |
572 streak=Tools.min(streak, MAXINSLEN); | |
573 insHist.increment(streak, 1); | |
574 // System.err.println("A. Ins: "+streak+", "+MAXINSLEN); | |
575 } | |
576 streak=0; | |
577 } | |
578 return true; | |
579 } | |
580 | |
581 private boolean addToIndelHistogram(SamLine sl){ | |
582 if(sl==null || sl.cigar==null || !sl.mapped()){ | |
583 return false; | |
584 } | |
585 final String cigar=sl.cigar; | |
586 // final int pairnum=sl.pairnum(); | |
587 | |
588 int count=0; | |
589 for(int cpos=0; cpos<cigar.length(); cpos++){ | |
590 final char c=cigar.charAt(cpos); | |
591 | |
592 if(Tools.isDigit(c)){ | |
593 count=count*10+c-'0'; | |
594 }else{ | |
595 if(c=='I'){ | |
596 int streak=Tools.min(count, MAXINSLEN); | |
597 insHist.increment(streak, 1); | |
598 // System.err.println("A. Ins: "+streak+", "+MAXINSLEN); | |
599 }else if(c=='D'){ | |
600 int streak=Tools.min(count, MAXDELLEN2); | |
601 if(streak<MAXDELLEN){delHist.increment(streak, 1);} | |
602 delHist2.increment(streak/100, 1); | |
603 // System.err.println("A. Del: "+streak+", "+MAXDELLEN+", "+MAXDELLEN2); | |
604 }else{ | |
605 //Ignore | |
606 } | |
607 count=0; | |
608 } | |
609 } | |
610 assert(count==0) : count; | |
611 return true; | |
612 } | |
613 | |
614 public void addToMatchHistogram(final Read r){ | |
615 if(r==null){return;} | |
616 addToMatchHistogram2(r); | |
617 if(r.mate!=null){addToMatchHistogram2(r.mate);} | |
618 } | |
619 | |
620 private void addToMatchHistogram2(final Read r){ | |
621 if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;} | |
622 final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum(); | |
623 if(pairnum==1){read2Count++;} | |
624 final byte[] bases=r.bases; | |
625 final int limit=Tools.min(bases.length, MAXLEN); | |
626 final long[] ms=matchSum[pairnum], ds=delSum[pairnum], is=insSum[pairnum], | |
627 ss=subSum[pairnum], ns=nSum[pairnum], cs=clipSum[pairnum], os=otherSum[pairnum]; | |
628 | |
629 byte[] match=r.match; | |
630 if(r.shortmatch() && match!=null){match=Read.toLongMatchString(match);} | |
631 | |
632 if(match==null){ | |
633 for(int i=0; i<limit; i++){ | |
634 byte b=bases[i]; | |
635 if(b=='N'){ns[i]++;} | |
636 else{os[i]++;} | |
637 } | |
638 }else{ | |
639 final boolean plus=(r.strand()==0); | |
640 int bpos=0; | |
641 byte lastm='A'; | |
642 for(int mpos=0; mpos<match.length && bpos<limit; mpos++){ | |
643 byte b=bases[bpos];//bases[plus ? rpos : bases.length-rpos-1]; | |
644 byte m=match[plus ? mpos : match.length-mpos-1];//match[mpos]; | |
645 if(b=='N'){ | |
646 if(m=='D'){ | |
647 if(lastm!=m){ds[bpos]++;} | |
648 bpos--; | |
649 }else{ns[bpos]++;} | |
650 }else{ | |
651 if(m=='m'){ | |
652 ms[bpos]++; | |
653 }else if(m=='S'){ | |
654 ss[bpos]++; | |
655 }else if(m=='I'){ | |
656 is[bpos]++; | |
657 }else if(m=='N' || m=='V'){ | |
658 // assert(false) : "\n"+r+"\n"+new String(Data.getChromosome(r.chrom).getBytes(r.start, r.stop))+"\nrpos="+rpos+", mpos="+mpos; | |
659 os[bpos]++; | |
660 }else if(m=='C'){ | |
661 // assert(false) : r; | |
662 cs[bpos]++; | |
663 }else if(m=='D'){ | |
664 if(lastm!=m){ds[bpos]++;} | |
665 bpos--; | |
666 }else if(m=='i'){ | |
667 os[bpos]++; | |
668 }else if(m=='d'){ | |
669 bpos--; | |
670 }else{ | |
671 os[bpos]++; | |
672 assert(false) : "For read "+r.numericID+", unknown symbol in match string: ASCII "+m+" = "+(char)m; | |
673 } | |
674 } | |
675 bpos++; | |
676 lastm=m; | |
677 } | |
678 } | |
679 } | |
680 | |
681 public void addToInsertHistogram(final Read r, boolean ignoreMappingStrand){ | |
682 if(verbose){ | |
683 System.err.print(r.numericID); | |
684 if(r==null || r.mate==null || !r.mapped() || !r.mate.mapped() || !r.paired()){ | |
685 System.err.println("\n"); | |
686 }else{ | |
687 System.err.println("\t"+r.strand()+"\t"+r.insertSizeMapped(ignoreMappingStrand)+"\t"+r.mate.insertSizeMapped(ignoreMappingStrand)); | |
688 } | |
689 } | |
690 if(r==null || r.mate==null || !r.mapped() || !r.mate.mapped() || !r.paired()){ | |
691 unpairedCount++; | |
692 return; | |
693 } | |
694 int x=Tools.min(MAXINSERTLEN, r.insertSizeMapped(ignoreMappingStrand)); | |
695 if(x>0){ | |
696 insertHist.increment(x, 1); | |
697 pairedCount++; | |
698 }else{ | |
699 unpairedCount++; | |
700 } | |
701 // assert(x!=1) : "\n"+r+"\n\n"+r.mate+"\n"; | |
702 // System.out.println("Incrementing "+x); | |
703 } | |
704 | |
705 public void addToInsertHistogram(final SamLine r1){ | |
706 int x=r1.tlen; | |
707 if(x<0) {x=-x;} | |
708 x=Tools.min(MAXINSERTLEN, x); | |
709 if(r1.pairedOnSameChrom() && x>0){ | |
710 pairedCount++; | |
711 insertHist.increment(x, 1); | |
712 }else { | |
713 unpairedCount++; | |
714 } | |
715 } | |
716 | |
717 public void addToInsertHistogram(final SamLine r1, final SamLine r2){ | |
718 if(r1==null){return;} | |
719 int x=insertSizeMapped(r1, r2, REQUIRE_PROPER_PAIR); | |
720 if(verbose){ | |
721 System.err.println(r1.qname+"\t"+x); | |
722 } | |
723 x=Tools.min(MAXINSERTLEN, x); | |
724 if(x>0){ | |
725 insertHist.increment(x, 1); | |
726 pairedCount++; | |
727 }else{ | |
728 unpairedCount++; | |
729 } | |
730 } | |
731 | |
732 /** This is untested and only gives approximate answers when overlapping reads contain indels. | |
733 * It may give incorrect answers for same-strange pairs that are shorter than read length. | |
734 * It might give negative answers but that would be a bug. */ | |
735 public static int insertSizeMapped(SamLine r1, SamLine r2, boolean requireProperPair){ | |
736 if(r2==null){return r1.length();} | |
737 if(!r1.mapped() || !r2.mapped() || !r1.pairedOnSameChrom() || (requireProperPair && !r1.properPair())){ | |
738 return -1; | |
739 } | |
740 | |
741 int a1=r1.start(true, false); | |
742 int a2=r2.start(true, false); | |
743 | |
744 if(r1.strand()!=r2.strand()){ | |
745 if(r1.strand()==1){return insertSizeMapped(r2, r1, requireProperPair);} | |
746 }else if(a1>a2){ | |
747 return insertSizeMapped(r2, r1, requireProperPair); | |
748 } | |
749 | |
750 int b1=r1.stop(a1, true, false); | |
751 int b2=r2.stop(a2, true, false); | |
752 | |
753 int clen1=r1.calcCigarLength(true, false); | |
754 int clen2=r2.calcCigarLength(true, false); | |
755 | |
756 int mlen1=b1-a1+1; | |
757 int mlen2=b2-a2+1; | |
758 | |
759 int dif1=mlen1-clen1; | |
760 int dif2=mlen2-clen2; | |
761 | |
762 int mlen12=b2-a1+1; | |
763 | |
764 if(Tools.overlap(a1, b1, a2, b2)){//hard case | |
765 return mlen12-Tools.max(dif1, dif2); //Approximate | |
766 }else{//easy case | |
767 return mlen12-dif1-dif2; | |
768 } | |
769 } | |
770 | |
771 public void addToBaseHistogram(final Read r){ | |
772 addToBaseHistogram2(r); | |
773 if(r.mate!=null){addToBaseHistogram2(r.mate);} | |
774 } | |
775 | |
776 public void addToBaseHistogram2(final Read r){ | |
777 if(r==null || r.bases==null){return;} | |
778 final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum(); | |
779 if(pairnum==1){read2Count++;} | |
780 final byte[] bases=r.bases; | |
781 final LongList[] lists=baseHist[pairnum]; | |
782 for(int i=0; i<bases.length; i++){ | |
783 byte b=bases[i]; | |
784 int x=AminoAcid.baseToNumber[b]+1; | |
785 lists[x].increment(i, 1); | |
786 } | |
787 } | |
788 | |
789 public static boolean testFiles(boolean allowDuplicates){ | |
790 return Tools.testOutputFiles(overwrite, append, allowDuplicates, | |
791 AVG_QUAL_HIST_FILE, QUAL_HIST_FILE, BQUAL_HIST_FILE, BQUAL_HIST_OVERALL_FILE, QUAL_COUNT_HIST_FILE, | |
792 MATCH_HIST_FILE, INSERT_HIST_FILE, BASE_HIST_FILE, QUAL_ACCURACY_FILE, INDEL_HIST_FILE, ERROR_HIST_FILE, LENGTH_HIST_FILE, | |
793 GC_HIST_FILE, ENTROPY_HIST_FILE, IDENTITY_HIST_FILE, TIME_HIST_FILE); | |
794 } | |
795 | |
796 public static boolean writeAll(){ | |
797 if(collectingStats()){ | |
798 ReadStats rs=mergeAll(); | |
799 boolean paired=rs.read2Count>0; | |
800 | |
801 if(AVG_QUAL_HIST_FILE!=null){rs.writeAverageQualityToFile(AVG_QUAL_HIST_FILE, paired);} | |
802 if(QUAL_HIST_FILE!=null){rs.writeQualityToFile(QUAL_HIST_FILE, paired);} | |
803 if(BQUAL_HIST_FILE!=null){rs.writeBQualityToFile(BQUAL_HIST_FILE, paired);} | |
804 if(BQUAL_HIST_OVERALL_FILE!=null){rs.writeBQualityOverallToFile(BQUAL_HIST_OVERALL_FILE);} | |
805 if(QUAL_COUNT_HIST_FILE!=null){rs.writeQCountToFile(QUAL_COUNT_HIST_FILE, paired);} | |
806 if(MATCH_HIST_FILE!=null){rs.writeMatchToFile(MATCH_HIST_FILE, paired);} | |
807 if(INSERT_HIST_FILE!=null){rs.writeInsertToFile(INSERT_HIST_FILE);} | |
808 if(BASE_HIST_FILE!=null){rs.writeBaseContentToFile(BASE_HIST_FILE, paired);} | |
809 if(QUAL_ACCURACY_FILE!=null){rs.writeQualityAccuracyToFile(QUAL_ACCURACY_FILE);} | |
810 | |
811 if(INDEL_HIST_FILE!=null){rs.writeIndelToFile(INDEL_HIST_FILE);} | |
812 if(ERROR_HIST_FILE!=null){rs.writeErrorToFile(ERROR_HIST_FILE);} | |
813 if(LENGTH_HIST_FILE!=null){rs.writeLengthToFile(LENGTH_HIST_FILE);} | |
814 if(GC_HIST_FILE!=null){rs.writeGCToFile(GC_HIST_FILE, true);} | |
815 if(ENTROPY_HIST_FILE!=null){rs.writeEntropyToFile(ENTROPY_HIST_FILE, true);} | |
816 if(IDENTITY_HIST_FILE!=null){rs.writeIdentityToFile(IDENTITY_HIST_FILE, true);} | |
817 if(TIME_HIST_FILE!=null){rs.writeTimeToFile(TIME_HIST_FILE);} | |
818 | |
819 boolean b=rs.errorState; | |
820 clear(); | |
821 return b; | |
822 } | |
823 clear(); | |
824 return false; | |
825 } | |
826 | |
827 public void writeAverageQualityToFile(String fname, boolean writePaired){ | |
828 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); | |
829 tsw.start(); | |
830 tsw.print("#Quality\tcount1\tfraction1"+(writePaired ? "\tcount2\tfraction2" : "")+"\n"); | |
831 | |
832 long sum1=Tools.sum(aqualArray[0]); | |
833 long sum2=Tools.sum(aqualArray[1]); | |
834 double mult1=1.0/Tools.max(1, sum1); | |
835 double mult2=1.0/Tools.max(1, sum2); | |
836 | |
837 long y=sum1+sum2; | |
838 for(int i=0; i<aqualArray[0].length; i++){ | |
839 long x1=aqualArray[0][i]; | |
840 long x2=aqualArray[1][i]; | |
841 y-=x1; | |
842 y-=x2; | |
843 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%.5f", i, x1, x1*mult1)); | |
844 if(writePaired){ | |
845 tsw.print(String.format(Locale.ROOT, "\t%d\t%.5f", x2, x2*mult2)); | |
846 } | |
847 tsw.print("\n"); | |
848 if(y<=0){break;} | |
849 } | |
850 tsw.poisonAndWait(); | |
851 errorState|=tsw.errorState; | |
852 } | |
853 | |
854 public void writeQCountToFile(String fname, boolean writePaired){ | |
855 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); | |
856 tsw.start(); | |
857 tsw.print("#Quality\tcount1\tfraction1"+(writePaired ? "\tcount2\tfraction2" : "")+"\n"); | |
858 | |
859 long sum1=Tools.sum(qcountHist[0]); | |
860 long sum2=Tools.sum(qcountHist[1]); | |
861 double mult1=1.0/Tools.max(1, sum1); | |
862 double mult2=1.0/Tools.max(1, sum2); | |
863 | |
864 long y=sum1+sum2; | |
865 for(int i=0; i<qcountHist[0].length; i++){ | |
866 long x1=qcountHist[0][i]; | |
867 long x2=qcountHist[1][i]; | |
868 y-=x1; | |
869 y-=x2; | |
870 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%.5f", i, x1, x1*mult1)); | |
871 if(writePaired){ | |
872 tsw.print(String.format(Locale.ROOT, "\t%d\t%.5f", x2, x2*mult2)); | |
873 } | |
874 tsw.print("\n"); | |
875 if(y<=0){break;} | |
876 } | |
877 tsw.poisonAndWait(); | |
878 errorState|=tsw.errorState; | |
879 } | |
880 | |
881 public void writeQualityToFile(String fname, boolean writePaired){ | |
882 StringBuilder sb=new StringBuilder(); | |
883 final boolean measure=(matchSum!=null); | |
884 if(measure){ | |
885 if(writePaired){ | |
886 sb.append("#BaseNum\tRead1_linear\tRead1_log\tRead1_measured\tRead2_linear\tRead2_log\tRead2_measured\n"); | |
887 }else{ | |
888 sb.append("#BaseNum\tRead1_linear\tRead1_log\tRead1_measured\n"); | |
889 } | |
890 }else{ | |
891 sb.append("#BaseNum\tRead1_linear\tRead1_log"+(writePaired ? "\tRead2_linear\tRead2_log" : "")+"\n"); | |
892 } | |
893 | |
894 final long[] qs1=qualSum[0], qs2=qualSum[1], ql1=qualLength[0], ql2=qualLength[1]; | |
895 final double[] qsd1=qualSumDouble[0], qsd2=qualSumDouble[1]; | |
896 | |
897 for(int i=MAXLEN-2; i>=0; i--){ | |
898 ql1[i]+=ql1[i+1]; | |
899 ql2[i]+=ql2[i+1]; | |
900 } | |
901 | |
902 double div1sum=0; | |
903 double div2sum=0; | |
904 double deviation1sum=0; | |
905 double deviation2sum=0; | |
906 | |
907 if(writePaired){ | |
908 for(int i=0; i<MAXLEN && (ql1[i]>0 || ql2[i]>0); i++){ | |
909 final int a=i+1; | |
910 double blin, clin, blog, clog; | |
911 final double div1=(double)Tools.max(1, ql1[i]); | |
912 final double div2=(double)Tools.max(1, ql2[i]); | |
913 | |
914 blin=qs1[i]/div1; | |
915 clin=qs2[i]/div2; | |
916 blog=qsd1[i]/div1; | |
917 clog=qsd2[i]/div2; | |
918 blog=QualityTools.probErrorToPhredDouble(blog); | |
919 clog=QualityTools.probErrorToPhredDouble(clog); | |
920 if(measure){ | |
921 double bcalc=calcQualityAtPosition(i, 0); | |
922 double ccalc=calcQualityAtPosition(i, 1); | |
923 | |
924 div1sum+=div1; | |
925 div2sum+=div2; | |
926 deviation1sum+=Math.abs(blog-bcalc)*div1; | |
927 deviation2sum+=Math.abs(clog-ccalc)*div2; | |
928 | |
929 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n", a, blin, blog, bcalc, clin, clog, ccalc)); | |
930 }else{ | |
931 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\t%.3f\n", a, blin, blog, clin, clog)); | |
932 } | |
933 } | |
934 }else{ | |
935 for(int i=0; i<MAXLEN && ql1[i]>0; i++){ | |
936 final int a=i+1; | |
937 double blin, blog; | |
938 final double div1=(double)Tools.max(1, ql1[i]); | |
939 | |
940 blin=qs1[i]/div1; | |
941 blog=qsd1[i]/div1; | |
942 blog=QualityTools.probErrorToPhredDouble(blog); | |
943 if(measure){ | |
944 double bcalc=calcQualityAtPosition(i, 0); | |
945 | |
946 div1sum+=div1; | |
947 deviation1sum+=Math.abs(blog-bcalc)*div1; | |
948 | |
949 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\n", a, blin, blog, bcalc)); | |
950 }else{ | |
951 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\n", a, blin, blog)); | |
952 } | |
953 } | |
954 } | |
955 | |
956 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); | |
957 tsw.start(); | |
958 if(measure){ | |
959 if(writePaired){ | |
960 tsw.print(String.format(Locale.ROOT, "#Deviation\t%.4f\t%.4f\n", deviation1sum/div1sum, deviation2sum/div2sum)); | |
961 }else{ | |
962 tsw.print(String.format(Locale.ROOT, "#Deviation\t%.4f\n", deviation1sum/div1sum)); | |
963 } | |
964 } | |
965 tsw.print(sb); | |
966 tsw.poisonAndWait(); | |
967 errorState|=tsw.errorState; | |
968 } | |
969 | |
970 private double calcQualityAtPosition(int pos, int pairnum){ | |
971 boolean includeNs=true; | |
972 long m=matchSum[pairnum][pos]; | |
973 long d=delSum[pairnum][pos]; //left-adjacent deletion | |
974 long d2=delSum[pairnum][Tools.min(pos, delSum[pairnum].length-1)]; //right-adjacent deletion | |
975 long i=insSum[pairnum][pos]; | |
976 long s=subSum[pairnum][pos]; | |
977 long n=(includeNs ? nSum[pairnum][pos] : 0); //This only tracks no-calls, not no-refs. | |
978 long good=Tools.max(0, m*2-d-d2+n/2); | |
979 long total=Tools.max(0, m*2+i*2+s*2+n*2); //not d | |
980 long bad=total-good; | |
981 if(total<1){return 0;} | |
982 double error=bad/(double)total; | |
983 return QualityTools.probErrorToPhredDouble(error); | |
984 } | |
985 | |
986 public void writeBQualityOverallToFile(String fname){ | |
987 final long[] cp30=Arrays.copyOf(bqualHistOverall, bqualHistOverall.length); | |
988 for(int i=0; i<30; i++){cp30[i]=0;} | |
989 | |
990 final long sum=Tools.sum(bqualHistOverall); | |
991 final long median=Tools.percentileHistogram(bqualHistOverall, 0.5); | |
992 final double mean=Tools.averageHistogram(bqualHistOverall); | |
993 final double stdev=Tools.standardDeviationHistogram(bqualHistOverall); | |
994 final double mean30=Tools.averageHistogram(cp30); | |
995 final double stdev30=Tools.standardDeviationHistogram(cp30); | |
996 final double mult=1.0/Tools.max(1, sum); | |
997 long y=sum; | |
998 | |
999 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); | |
1000 tsw.start(); | |
1001 tsw.print("#Median\t"+median+"\n"); | |
1002 tsw.print("#Mean\t"+String.format(Locale.ROOT, "%.3f", mean)+"\n"); | |
1003 tsw.print("#STDev\t"+String.format(Locale.ROOT, "%.3f", stdev)+"\n"); | |
1004 tsw.print("#Mean_30\t"+String.format(Locale.ROOT, "%.3f", mean30)+"\n"); | |
1005 tsw.print("#STDev_30\t"+String.format(Locale.ROOT, "%.3f", stdev30)+"\n"); | |
1006 tsw.print("#Quality\tbases\tfraction\n"); | |
1007 | |
1008 for(int i=0; i<bqualHistOverall.length; i++){ | |
1009 long x=bqualHistOverall[i]; | |
1010 y-=x; | |
1011 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%.5f", i, x, x*mult)); | |
1012 tsw.print("\n"); | |
1013 if(y<=0){break;} | |
1014 } | |
1015 tsw.poisonAndWait(); | |
1016 errorState|=tsw.errorState; | |
1017 } | |
1018 | |
1019 public void writeBQualityToFile(String fname, boolean writePaired){ | |
1020 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); | |
1021 tsw.start(); | |
1022 tsw.print("#BaseNum\tcount_1\tmin_1\tmax_1\tmean_1\tQ1_1\tmed_1\tQ3_1\tLW_1\tRW_1"); | |
1023 if(writePaired){tsw.print("\tcount_2\tmin_2\tmax_2\tmean_2\tQ1_2\tmed_2\tQ3_2\tLW_2\tRW_2");} | |
1024 tsw.print("\n"); | |
1025 | |
1026 for(int i=0; i<MAXLEN; i++){ | |
1027 final long[] a1=bqualHist[0][i], a2=bqualHist[1][i]; | |
1028 final long sum1=Tools.sum(a1), sum2=Tools.sum(a2); | |
1029 if(sum1<1 && sum2<1){break;} | |
1030 | |
1031 { | |
1032 final long a[]=a1, sum=sum1; | |
1033 | |
1034 final long weightedSum=Tools.sumHistogram(a); | |
1035 final long med=Tools.medianHistogram(a), min=Tools.minHistogram(a), max=Tools.maxHistogram(a); | |
1036 final long firstQuart=Tools.percentileHistogram(a, 0.25); | |
1037 final long thirdQuart=Tools.percentileHistogram(a, 0.75); | |
1038 final long leftWhisker=Tools.percentileHistogram(a, 0.02); | |
1039 final long rightWhisker=Tools.percentileHistogram(a, 0.98); | |
1040 final double mean=weightedSum*1.0/Tools.max(sum, 0); | |
1041 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%d\t%d", i, sum, min, max, mean, firstQuart, med, thirdQuart, leftWhisker, rightWhisker)); | |
1042 } | |
1043 | |
1044 if(writePaired){ | |
1045 final long a[]=a2, sum=sum2; | |
1046 | |
1047 final long weightedSum=Tools.sumHistogram(a); | |
1048 final long med=Tools.medianHistogram(a), min=Tools.minHistogram(a), max=Tools.maxHistogram(a); | |
1049 final long firstQuart=Tools.percentileHistogram(a, 0.25); | |
1050 final long thirdQuart=Tools.percentileHistogram(a, 0.75); | |
1051 final long leftWhisker=Tools.percentileHistogram(a, 0.02); | |
1052 final long rightWhisker=Tools.percentileHistogram(a, 0.98); | |
1053 final double mean=weightedSum*1.0/Tools.max(sum, 0); | |
1054 tsw.print(String.format(Locale.ROOT, "\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%d\t%d", sum, min, max, mean, firstQuart, med, thirdQuart, leftWhisker, rightWhisker)); | |
1055 } | |
1056 tsw.print("\n"); | |
1057 } | |
1058 tsw.poisonAndWait(); | |
1059 errorState|=tsw.errorState; | |
1060 } | |
1061 | |
1062 public void writeQualityAccuracyToFile(String fname){ | |
1063 | |
1064 int max=qualMatch.length; | |
1065 for(int i=max-1; i>=0; i--){ | |
1066 if(qualMatch[i]+qualSub[i]+qualIns[i]+qualDel[i]>0){break;} | |
1067 max=i; | |
1068 } | |
1069 | |
1070 final int qMin=Read.MIN_CALLED_QUALITY(), qMax=Read.MAX_CALLED_QUALITY(); | |
1071 | |
1072 double devsum=0; | |
1073 double devsumSub=0; | |
1074 long observations=0; | |
1075 for(int i=0; i<max; i++){ | |
1076 long qm=qualMatch[i]*2; | |
1077 long qs=qualSub[i]*2; | |
1078 long qi=qualIns[i]*2; | |
1079 long qd=qualDel[i]; | |
1080 | |
1081 double phred=-1; | |
1082 double phredSub=-1; | |
1083 | |
1084 long sum=qm+qs+qi+qd; | |
1085 if(sum>0){ | |
1086 double mult=1.0/sum; | |
1087 double subRate=(qs)*mult; | |
1088 double errorRate=(qs+qi+qd)*mult; | |
1089 | |
1090 phredSub=QualityTools.probErrorToPhredDouble(subRate); | |
1091 phred=QualityTools.probErrorToPhredDouble(errorRate); | |
1092 double deviation=phred-i; | |
1093 double deviationSub=phredSub-i; | |
1094 if(i==qMin && deviation<0){deviation=0;} | |
1095 else if(i==qMax && max==qMax+1 && deviation>0){deviation=0;} | |
1096 if(i==qMin && deviationSub<0){deviationSub=0;} | |
1097 else if(i==qMax && max==qMax+1 && deviationSub>0){deviationSub=0;} | |
1098 devsum+=(Math.abs(deviation)*sum); | |
1099 devsumSub+=(Math.abs(deviationSub)*sum); | |
1100 observations+=sum; | |
1101 } | |
1102 } | |
1103 | |
1104 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); | |
1105 tsw.start(); | |
1106 tsw.print(String.format(Locale.ROOT, "#Deviation\t%.3f\n", devsum/observations)); | |
1107 tsw.print(String.format(Locale.ROOT, "#DeviationSub\t%.3f\n", devsumSub/observations)); | |
1108 tsw.print("#Quality\tMatch\tSub\tIns\tDel\tTrueQuality\tTrueQualitySub\n"); | |
1109 for(int i=0; i<max; i++){ | |
1110 long qm=qualMatch[i]*2; | |
1111 long qs=qualSub[i]*2; | |
1112 long qi=qualIns[i]*2; | |
1113 long qd=qualDel[i]; | |
1114 | |
1115 double phred=-1; | |
1116 double phredSub=-1; | |
1117 | |
1118 long sum=qm+qs+qi+qd; | |
1119 if(sum>0){ | |
1120 double mult=1.0/sum; | |
1121 double subRate=(qs)*mult; | |
1122 double errorRate=(qs+qi+qd)*mult; | |
1123 | |
1124 phredSub=QualityTools.probErrorToPhredDouble(subRate); | |
1125 phred=QualityTools.probErrorToPhredDouble(errorRate); | |
1126 | |
1127 // System.err.println("sub: "+qs+"/"+sum+" -> "+subRate+" -> "+phredSub); | |
1128 } | |
1129 | |
1130 tsw.print(i+"\t"+qm+"\t"+qs+"\t"+qi+"\t"+qd); | |
1131 tsw.print(phred>=0 ? String.format(Locale.ROOT, "\t%.2f", phred) : "\t"); | |
1132 tsw.print(phredSub>=0 ? String.format(Locale.ROOT, "\t%.2f\n", phredSub) : "\t\n"); | |
1133 | |
1134 // System.err.println(qm+"\t"+qs+"\t"+qi+"\t"+qd); | |
1135 } | |
1136 | |
1137 tsw.poisonAndWait(); | |
1138 errorState|=tsw.errorState; | |
1139 } | |
1140 | |
1141 public void writeMatchToFile(String fname, boolean writePaired){ | |
1142 if(!writePaired){ | |
1143 writeMatchToFileUnpaired(fname); | |
1144 return; | |
1145 } | |
1146 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false); | |
1147 tsw.start(); | |
1148 tsw.print("#BaseNum\tMatch1\tSub1\tDel1\tIns1\tN1\tOther1\tMatch2\tSub2\tDel2\tIns2\tN2\tOther2\n"); | |
1149 | |
1150 final long[] ms1=matchSum[0], ds1=delSum[0], is1=insSum[0], | |
1151 ss1=subSum[0], ns1=nSum[0], cs1=clipSum[0], os1=otherSum[0]; | |
1152 final long[] ms2=matchSum[1], ds2=delSum[1], is2=insSum[1], | |
1153 ss2=subSum[1], ns2=nSum[1], cs2=clipSum[1], os2=otherSum[1]; | |
1154 | |
1155 for(int i=0; i<MAXLEN; i++){ | |
1156 int a=i+1; | |
1157 long sum1=ms1[i]+is1[i]+ss1[i]+ns1[i]+cs1[i]+os1[i]; //no deletions | |
1158 long sum2=ms2[i]+is2[i]+ss2[i]+ns2[i]+cs2[i]+os2[i]; //no deletions | |
1159 if(sum1==0 && sum2==0){break;} | |
1160 double inv1=1.0/(double)Tools.max(1, sum1); | |
1161 double inv2=1.0/(double)Tools.max(1, sum2); | |
1162 | |
1163 tsw.print(String.format(Locale.ROOT, "%d", a)); | |
1164 tsw.print(String.format(Locale.ROOT, "\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f", | |
1165 ms1[i]*inv1, ss1[i]*inv1, ds1[i]*inv1, is1[i]*inv1, ns1[i]*inv1, (os1[i]+cs1[i])*inv1)); | |
1166 tsw.print(String.format(Locale.ROOT, "\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f", | |
1167 ms2[i]*inv2, ss2[i]*inv2, ds2[i]*inv2, is2[i]*inv2, ns2[i]*inv2, (os2[i]+cs2[i])*inv2) | |
1168 // +", "+ms2[i]+", "+is2[i]+", "+ss2[i]+", "+ns2[i]+", "+cs2[i]+", "+os2[i] | |
1169 ); | |
1170 tsw.print("\n"); | |
1171 } | |
1172 tsw.poisonAndWait(); | |
1173 errorState|=tsw.errorState; | |
1174 } | |
1175 | |
1176 public void writeMatchToFileUnpaired(String fname){ | |
1177 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false); | |
1178 tsw.start(); | |
1179 tsw.print("#BaseNum\tMatch1\tSub1\tDel1\tIns1\tN1\tOther1\n"); | |
1180 | |
1181 final long[] ms1=matchSum[0], ds1=delSum[0], is1=insSum[0], | |
1182 ss1=subSum[0], ns1=nSum[0], cs1=clipSum[0], os1=otherSum[0]; | |
1183 | |
1184 for(int i=0; i<MAXLEN; i++){ | |
1185 int a=i+1; | |
1186 long sum1=ms1[i]+is1[i]+ss1[i]+ns1[i]+cs1[i]+os1[i]; //no deletions | |
1187 if(sum1==0){break;} | |
1188 double inv1=1.0/(double)Tools.max(1, sum1); | |
1189 | |
1190 tsw.print(String.format(Locale.ROOT, "%d", a)); | |
1191 tsw.print(String.format(Locale.ROOT, "\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f", | |
1192 ms1[i]*inv1, ss1[i]*inv1, ds1[i]*inv1, is1[i]*inv1, ns1[i]*inv1, (os1[i]+cs1[i])*inv1) | |
1193 // +", "+ms1[i]+", "+is1[i]+", "+ss1[i]+", "+ns1[i]+", "+cs1[i]+", "+os1[i] | |
1194 ); | |
1195 tsw.print("\n"); | |
1196 } | |
1197 tsw.poisonAndWait(); | |
1198 errorState|=tsw.errorState; | |
1199 } | |
1200 | |
1201 public void writeInsertToFile(String fname){ | |
1202 StringBuilder sb=new StringBuilder(); | |
1203 sb.append("#Mean\t"+String.format(Locale.ROOT, "%.3f", Tools.averageHistogram(insertHist.array))+"\n"); | |
1204 sb.append("#Median\t"+Tools.percentileHistogram(insertHist.array, 0.5)+"\n"); | |
1205 sb.append("#Mode\t"+Tools.calcModeHistogram(insertHist.array)+"\n"); | |
1206 sb.append("#STDev\t"+String.format(Locale.ROOT, "%.3f", Tools.standardDeviationHistogram(insertHist.array))+"\n"); | |
1207 double percent=pairedCount*100.0/(pairedCount+unpairedCount); | |
1208 sb.append("#PercentOfPairs\t"+String.format(Locale.ROOT, "%.3f", percent)+"\n"); | |
1209 // sb.append("#PercentOfPairs\t"+String.format(Locale.ROOT, "%.3f", matedPercent)+"\n"); | |
1210 sb.append("#InsertSize\tCount\n"); | |
1211 writeHistogramToFile(fname, sb.toString(), insertHist, !skipZeroInsertCount); | |
1212 } | |
1213 | |
1214 public void writeBaseContentToFile(String fname, boolean paired){ | |
1215 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); | |
1216 bsw.start(); | |
1217 bsw.print("#Pos\tA\tC\tG\tT\tN\n"); | |
1218 | |
1219 int max=writeBaseContentToFile2(bsw, baseHist[0], 0); | |
1220 if(paired){ | |
1221 writeBaseContentToFile2(bsw, baseHist[1], max); | |
1222 } | |
1223 | |
1224 bsw.poisonAndWait(); | |
1225 errorState|=bsw.errorState; | |
1226 } | |
1227 | |
1228 private static int writeBaseContentToFile2(ByteStreamWriter bsw, LongList[] lists, int offset){ | |
1229 int max=0; | |
1230 ByteBuilder sb=new ByteBuilder(100); | |
1231 for(LongList ll : lists){max=Tools.max(max, ll.size);} | |
1232 for(int i=0; i<max; i++){ | |
1233 long a=lists[1].get(i); | |
1234 long c=lists[2].get(i); | |
1235 long g=lists[3].get(i); | |
1236 long t=lists[4].get(i); | |
1237 long n=lists[0].get(i); | |
1238 double mult=1.0/(a+c+g+t+n); | |
1239 | |
1240 sb.setLength(0); | |
1241 sb.append(i+offset).tab(); | |
1242 sb.append(a*mult, 5).tab(); | |
1243 sb.append(c*mult, 5).tab(); | |
1244 sb.append(g*mult, 5).tab(); | |
1245 sb.append(t*mult, 5).tab(); | |
1246 sb.append(n*mult, 5); | |
1247 sb.nl(); | |
1248 | |
1249 bsw.print(sb); | |
1250 } | |
1251 return max; | |
1252 } | |
1253 | |
1254 public void writeIndelToFile(String fname){ | |
1255 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); | |
1256 bsw.start(); | |
1257 bsw.print("#Length\tDeletions\tInsertions\n"); | |
1258 | |
1259 int max=Tools.max(insHist.size, delHist.size); | |
1260 | |
1261 ByteBuilder bb=new ByteBuilder(100); | |
1262 for(int i=0; i<max; i++){ | |
1263 long x=delHist.get(i); | |
1264 long y=insHist.get(i); | |
1265 if(x>0 || y>0 || !skipZeroIndel){ | |
1266 bb.clear(); | |
1267 bb.append(i).tab().append(x).tab().append(y).nl(); | |
1268 bsw.print(bb); | |
1269 } | |
1270 } | |
1271 | |
1272 //TODO: Disabled because it was irritating when graphing. Should write to a different file. | |
1273 // tsw.print("#Length_bin\tDeletions\n"); | |
1274 // max=delHist2.size; | |
1275 // for(int i=0; i<max; i++){ | |
1276 // long x=delHist2.get(i); | |
1277 // if(x>0 || !skipZeroIndel){ | |
1278 // tsw.print((i*DEL_BIN)+"\t"+x+"\n"); | |
1279 // } | |
1280 // } | |
1281 | |
1282 bsw.poisonAndWait(); | |
1283 errorState|=bsw.errorState; | |
1284 } | |
1285 | |
1286 public void writeErrorToFile(String fname){ | |
1287 writeHistogramToFile(fname, "#Errors\tCount\n", errorHist, false); | |
1288 } | |
1289 | |
1290 public void writeLengthToFile(String fname){ | |
1291 writeHistogramToFile(fname, "#Length\tCount\n", lengthHist, false); | |
1292 } | |
1293 | |
1294 public void writeTimeToFile(String fname){ | |
1295 writeHistogramToFile(fname, "#Time\tCount\n", timeHist, false); | |
1296 } | |
1297 | |
1298 public void writeHistogramToFile(String fname, String header, LongList hist, boolean printZeros){ | |
1299 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); | |
1300 bsw.start(); | |
1301 bsw.print(header); | |
1302 | |
1303 int max=hist.size; | |
1304 | |
1305 ByteBuilder bb=new ByteBuilder(40); | |
1306 for(int i=0; i<max; i++){ | |
1307 long x=hist.get(i); | |
1308 if(x>0 || printZeros){ | |
1309 bb.clear().append(i).tab().append(x).nl(); | |
1310 bsw.print(bb); | |
1311 } | |
1312 } | |
1313 bsw.poisonAndWait(); | |
1314 errorState|=bsw.errorState; | |
1315 } | |
1316 | |
1317 public void writeHistogramToFile(String fname, String header, SuperLongList hist, boolean printZeros){ | |
1318 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); | |
1319 bsw.start(); | |
1320 bsw.print(header); | |
1321 | |
1322 hist.sort(); | |
1323 long max=hist.max(); | |
1324 long[] array=hist.array(); | |
1325 LongList list=hist.list(); | |
1326 | |
1327 ByteBuilder bb=new ByteBuilder(40); | |
1328 for(int i=0; i<array.length; i++){ | |
1329 long x=array[i]; | |
1330 if(x>0 || printZeros){ | |
1331 bb.clear().append(i).tab().append(x).nl(); | |
1332 bsw.print(bb); | |
1333 } | |
1334 } | |
1335 {//TODO: This part does not bother printing zeros regardless of the flag. | |
1336 long prev=-1; | |
1337 int streak=0; | |
1338 for(int i=0; i<list.size(); i++){ | |
1339 long x=list.get(i); | |
1340 if(x==prev){streak++;} | |
1341 else{ | |
1342 if(streak>0){//Print previous key | |
1343 bb.clear().append(prev).tab().append(streak).nl(); | |
1344 bsw.print(bb); | |
1345 } | |
1346 streak=1; | |
1347 prev=x; | |
1348 } | |
1349 } | |
1350 if(streak>0){//Print final key | |
1351 bb.clear().append(prev).tab().append(streak).nl(); | |
1352 bsw.print(bb); | |
1353 } | |
1354 } | |
1355 bsw.poisonAndWait(); | |
1356 errorState|=bsw.errorState; | |
1357 } | |
1358 | |
1359 public void writeGCToFile(String fname, boolean printZeros){ | |
1360 final long[] hist; | |
1361 if(GC_BINS_AUTO && gcMaxReadLen+1<gcHist.length){ | |
1362 hist=Tools.downsample(gcHist, gcMaxReadLen+1); | |
1363 }else{ | |
1364 hist=gcHist; | |
1365 } | |
1366 final int bins=hist.length; | |
1367 final double gcMult=100.0/Tools.max(1, bins-1); | |
1368 final long total=Tools.sum(hist); | |
1369 final long max=Tools.max(hist); | |
1370 final double countsPerX=Tools.max(1, ((max*1000.0)/40)); | |
1371 final double fractionMult=1.0/Tools.max(1, total); | |
1372 long sum=0; | |
1373 | |
1374 GCMean=Tools.averageHistogram(hist)*gcMult; | |
1375 GCMedian=Tools.percentileHistogram(hist, 0.5)*gcMult; | |
1376 GCMode=Tools.calcModeHistogram(hist)*gcMult; | |
1377 GCSTDev=Tools.standardDeviationHistogram(hist)*gcMult; | |
1378 | |
1379 ByteBuilder bb=new ByteBuilder(256); | |
1380 bb.append("#Mean\t").append(GCMean, 3).nl(); | |
1381 bb.append("#Median\t").append(GCMedian, 3).nl(); | |
1382 bb.append("#Mode\t").append(GCMode, 3).nl(); | |
1383 bb.append("#STDev\t").append(GCSTDev, 3).nl(); | |
1384 if(GC_PLOT_X){ | |
1385 bb.append("#GC\tCount\tCumulative\tPlot\n"); | |
1386 }else{ | |
1387 bb.append("#GC\tCount\n"); | |
1388 } | |
1389 | |
1390 | |
1391 // bsw.print("#Mean\t"+String.format(Locale.ROOT, "%.3f", GCMean)+"\n"); | |
1392 // bsw.print("#Median\t"+String.format(Locale.ROOT, "%.3f", GCMedian)+"\n"); | |
1393 // bsw.print("#Mode\t"+String.format(Locale.ROOT, "%.3f", GCMode)+"\n"); | |
1394 // bsw.print("#STDev\t"+String.format(Locale.ROOT, "%.3f", GCSTDev)+"\n"); | |
1395 // if(GC_PLOT_X){ | |
1396 // bsw.print("#GC\tCount\tCumulative\tPlot\n"); | |
1397 // }else{ | |
1398 // bsw.print("#GC\tCount\n"); | |
1399 // } | |
1400 | |
1401 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); | |
1402 bsw.start(); | |
1403 bsw.print(bb); | |
1404 | |
1405 for(int i=0; i<bins; i++){ | |
1406 long x=hist[i]; | |
1407 sum+=x; | |
1408 if(x>0 || printZeros){ | |
1409 //This assumes GC_BINS==100 | |
1410 // tsw.print(i+"\t"+x+"\n"); | |
1411 if(GC_PLOT_X){ | |
1412 bb.clear(); | |
1413 bb.append(i*gcMult, 1).tab().append(x).tab(); | |
1414 bb.append(sum*fractionMult, 3).tab(); | |
1415 | |
1416 int len=(int)((x*1000)/countsPerX); | |
1417 for(int j=0; j<len; j++){bb.append('X');} | |
1418 if(len<1 && x>0){ | |
1419 if((x*1000f)/countsPerX>0.1f){bb.append('x');} | |
1420 else{bb.append('.');} | |
1421 } | |
1422 | |
1423 bb.append('\n'); | |
1424 bsw.print(bb); | |
1425 }else{ | |
1426 bb.clear().append(i*gcMult, 1).tab().append(x).nl(); | |
1427 bsw.print(bb); | |
1428 } | |
1429 } | |
1430 } | |
1431 bsw.poisonAndWait(); | |
1432 errorState|=bsw.errorState; | |
1433 } | |
1434 | |
1435 public void writeEntropyToFile(String fname, boolean printZeros){ | |
1436 final long[] hist=entropyHist; | |
1437 | |
1438 final int bins=hist.length; | |
1439 final double mult=1.0/Tools.max(1, bins-1); | |
1440 final long total=Tools.sum(hist); | |
1441 final long max=Tools.max(hist); | |
1442 final double countsPerX=Tools.max(1, ((max*1000.0)/40)); | |
1443 final double fractionMult=1.0/Tools.max(1, total); | |
1444 long sum=0; | |
1445 | |
1446 double mean=Tools.averageHistogram(hist)*mult; | |
1447 double median=Tools.percentileHistogram(hist, 0.5)*mult; | |
1448 double mode=Tools.calcModeHistogram(hist)*mult; | |
1449 double stdev=Tools.standardDeviationHistogram(hist)*mult; | |
1450 | |
1451 ByteBuilder bb=new ByteBuilder(256); | |
1452 bb.append("#Mean\t").append(mean, 6).nl(); | |
1453 bb.append("#Median\t").append(median, 6).nl(); | |
1454 bb.append("#Mode\t").append(mode, 6).nl(); | |
1455 bb.append("#STDev\t").append(stdev, 6).nl(); | |
1456 bb.append("#Value\tCount\n"); | |
1457 | |
1458 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); | |
1459 bsw.start(); | |
1460 bsw.print(bb); | |
1461 | |
1462 for(int i=0; i<bins; i++){ | |
1463 long x=hist[i]; | |
1464 sum+=x; | |
1465 if(x>0 || printZeros){ | |
1466 bb.clear().append(i*mult, 4).tab().append(x).nl(); | |
1467 bsw.print(bb); | |
1468 } | |
1469 } | |
1470 bsw.poisonAndWait(); | |
1471 errorState|=bsw.errorState; | |
1472 } | |
1473 | |
1474 public void writeIdentityToFile(String fname, boolean printZeros){ | |
1475 final long[] hist, histb; | |
1476 if(ID_BINS_AUTO && idMaxReadLen+1<idHist.length){ | |
1477 hist=Tools.downsample(idHist, idMaxReadLen+1); | |
1478 histb=Tools.downsample(idBaseHist, idMaxReadLen+1); | |
1479 }else{ | |
1480 hist=idHist; | |
1481 histb=idBaseHist; | |
1482 } | |
1483 final int max=hist.length; | |
1484 final double mult=100.0/(max-1); | |
1485 | |
1486 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false); | |
1487 tsw.start(); | |
1488 | |
1489 | |
1490 tsw.print("#Mean_reads\t"+String.format(Locale.ROOT, "%.3f", (Tools.averageHistogram(hist)*mult))+"\n"); | |
1491 tsw.print("#Mean_bases\t"+(String.format(Locale.ROOT, "%.3f", Tools.averageHistogram(histb)*mult))+"\n"); | |
1492 tsw.print("#Median_reads\t"+(int)Math.round(Tools.percentileHistogram(hist, 0.5)*mult)+"\n"); | |
1493 tsw.print("#Median_bases\t"+(int)Math.round(Tools.percentileHistogram(histb, 0.5)*mult)+"\n"); | |
1494 tsw.print("#Mode_reads\t"+(int)Math.round(Tools.calcModeHistogram(hist)*mult)+"\n"); | |
1495 tsw.print("#Mode_bases\t"+(int)Math.round(Tools.calcModeHistogram(histb)*mult)+"\n"); | |
1496 tsw.print("#STDev_reads\t"+String.format(Locale.ROOT, "%.3f", (Tools.standardDeviationHistogram(hist)*mult))+"\n"); | |
1497 tsw.print("#STDev_bases\t"+String.format(Locale.ROOT, "%.3f", (Tools.standardDeviationHistogram(histb)*mult))+"\n"); | |
1498 tsw.print("#Identity\tReads\tBases\n"); | |
1499 | |
1500 for(int i=0; i<max; i++){ | |
1501 long x=hist[i], x2=histb[i]; | |
1502 if(x>0 || printZeros){ | |
1503 tsw.print(String.format(Locale.ROOT, "%.1f", i*mult)+"\t"+x+"\t"+x2+"\n"); | |
1504 } | |
1505 } | |
1506 tsw.poisonAndWait(); | |
1507 errorState|=tsw.errorState; | |
1508 } | |
1509 | |
1510 //Tracks to see if read2s have been encountered, for displaying stats. | |
1511 private long read2Count=0; | |
1512 | |
1513 public long pairedCount=0; | |
1514 public long unpairedCount=0; | |
1515 | |
1516 public final long[][] aqualArray; | |
1517 public final long[][] qualLength; | |
1518 public final long[][] qualSum; | |
1519 | |
1520 public final long[][][] bqualHist; | |
1521 public final long[] bqualHistOverall; | |
1522 | |
1523 public final long[][] qcountHist; | |
1524 | |
1525 public final double[][] qualSumDouble; | |
1526 | |
1527 public final long[][] matchSum; | |
1528 public final long[][] delSum; | |
1529 public final long[][] insSum; | |
1530 public final long[][] subSum; | |
1531 public final long[][] nSum; | |
1532 public final long[][] clipSum; | |
1533 public final long[][] otherSum; | |
1534 | |
1535 public final long[] qualMatch; | |
1536 public final long[] qualSub; | |
1537 public final long[] qualIns; | |
1538 public final long[] qualDel; | |
1539 | |
1540 public final long[] gcHist; | |
1541 public final long[] entropyHist; | |
1542 public final EntropyTracker eTracker; | |
1543 public final long[] idHist; | |
1544 public final long[] idBaseHist; | |
1545 private int gcMaxReadLen=1; | |
1546 private int idMaxReadLen=1; | |
1547 | |
1548 public final LongList[][] baseHist; | |
1549 | |
1550 /** Insert size */ | |
1551 public final LongList insertHist; | |
1552 /** Read length */ | |
1553 public final SuperLongList lengthHist; | |
1554 /** Number errors per read */ | |
1555 public final LongList errorHist; | |
1556 /** Insertion length */ | |
1557 public final LongList insHist; | |
1558 /** Deletion length */ | |
1559 public final LongList delHist; | |
1560 /** Deletion length, binned */ | |
1561 public final LongList delHist2; | |
1562 /** Time */ | |
1563 public final LongList timeHist; | |
1564 | |
1565 public static boolean REQUIRE_PROPER_PAIR=true; | |
1566 public static int MAXLEN=6000; | |
1567 public static int MAXINSERTLEN=80000; | |
1568 @Deprecated | |
1569 public static int MAXLENGTHLEN=80000; | |
1570 public static final int MAXTIMELEN=80000; | |
1571 public static final int MAXINSLEN=1000; | |
1572 public static final int MAXDELLEN=1000; | |
1573 public static final int MAXDELLEN2=1000000; | |
1574 public static final int DEL_BIN=100; | |
1575 public static int ID_BINS=100; | |
1576 public static boolean ID_BINS_AUTO=false; | |
1577 public static int GC_BINS=100; | |
1578 public static boolean GC_BINS_AUTO=false; | |
1579 public static boolean GC_PLOT_X=false; | |
1580 public static int ENTROPY_BINS=1000; | |
1581 | |
1582 public static double GCMean; | |
1583 public static double GCMedian; | |
1584 public static double GCMode; | |
1585 public static double GCSTDev; | |
1586 | |
1587 // public static double entropyMean; | |
1588 // public static double entropyMedian; | |
1589 // public static double entropyMode; | |
1590 // public static double entropySTDev; | |
1591 | |
1592 public boolean errorState=false; | |
1593 | |
1594 public static ReadStats merged=null; | |
1595 | |
1596 // public static double matedPercent=0; | |
1597 | |
1598 public static void clear(){ | |
1599 COLLECT_QUALITY_STATS=false; | |
1600 COLLECT_QUALITY_ACCURACY=false; | |
1601 COLLECT_MATCH_STATS=false; | |
1602 COLLECT_INSERT_STATS=false; | |
1603 COLLECT_BASE_STATS=false; | |
1604 COLLECT_INDEL_STATS=false; | |
1605 COLLECT_GC_STATS=false; | |
1606 COLLECT_ENTROPY_STATS=false; | |
1607 COLLECT_ERROR_STATS=false; | |
1608 COLLECT_LENGTH_STATS=false; | |
1609 COLLECT_IDENTITY_STATS=false; | |
1610 COLLECT_TIME_STATS=false; | |
1611 | |
1612 AVG_QUAL_HIST_FILE=null; | |
1613 QUAL_HIST_FILE=null; | |
1614 BQUAL_HIST_FILE=null; | |
1615 QUAL_COUNT_HIST_FILE=null; | |
1616 BQUAL_HIST_OVERALL_FILE=null; | |
1617 QUAL_ACCURACY_FILE=null; | |
1618 MATCH_HIST_FILE=null; | |
1619 INSERT_HIST_FILE=null; | |
1620 BASE_HIST_FILE=null; | |
1621 INDEL_HIST_FILE=null; | |
1622 ERROR_HIST_FILE=null; | |
1623 LENGTH_HIST_FILE=null; | |
1624 GC_HIST_FILE=null; | |
1625 ENTROPY_HIST_FILE=null; | |
1626 IDENTITY_HIST_FILE=null; | |
1627 TIME_HIST_FILE=null; | |
1628 } | |
1629 | |
1630 public static ArrayList<ReadStats> objectList=new ArrayList<ReadStats>(); | |
1631 public static boolean COLLECT_QUALITY_STATS=false; | |
1632 public static boolean COLLECT_QUALITY_ACCURACY=false; | |
1633 public static boolean COLLECT_MATCH_STATS=false; | |
1634 public static boolean COLLECT_INSERT_STATS=false; | |
1635 public static boolean COLLECT_BASE_STATS=false; | |
1636 public static boolean COLLECT_INDEL_STATS=false; | |
1637 public static boolean COLLECT_GC_STATS=false; | |
1638 public static boolean COLLECT_ENTROPY_STATS=false; | |
1639 public static boolean COLLECT_ERROR_STATS=false; | |
1640 public static boolean COLLECT_LENGTH_STATS=false; | |
1641 public static boolean COLLECT_IDENTITY_STATS=false; | |
1642 public static boolean COLLECT_TIME_STATS=false; | |
1643 | |
1644 public static boolean collectingStats(){ | |
1645 return COLLECT_QUALITY_STATS || COLLECT_QUALITY_ACCURACY || COLLECT_MATCH_STATS || COLLECT_INSERT_STATS | |
1646 || COLLECT_BASE_STATS || COLLECT_INDEL_STATS || COLLECT_GC_STATS || COLLECT_ENTROPY_STATS | |
1647 || COLLECT_ERROR_STATS || COLLECT_LENGTH_STATS || COLLECT_IDENTITY_STATS || COLLECT_TIME_STATS; | |
1648 } | |
1649 | |
1650 public static boolean usePairGC=true; | |
1651 public static boolean allowEntropyNs=true; | |
1652 | |
1653 public static String AVG_QUAL_HIST_FILE=null; | |
1654 public static String QUAL_HIST_FILE=null; | |
1655 public static String BQUAL_HIST_FILE=null; | |
1656 public static String QUAL_COUNT_HIST_FILE=null; | |
1657 public static String BQUAL_HIST_OVERALL_FILE=null; | |
1658 public static String QUAL_ACCURACY_FILE=null; | |
1659 public static String MATCH_HIST_FILE=null; | |
1660 public static String INSERT_HIST_FILE=null; | |
1661 public static String BASE_HIST_FILE=null; | |
1662 public static String INDEL_HIST_FILE=null; | |
1663 public static String ERROR_HIST_FILE=null; | |
1664 public static String LENGTH_HIST_FILE=null; | |
1665 public static String GC_HIST_FILE=null; | |
1666 public static String ENTROPY_HIST_FILE=null; | |
1667 public static String IDENTITY_HIST_FILE=null; | |
1668 public static String TIME_HIST_FILE=null; | |
1669 | |
1670 public static boolean overwrite=false; | |
1671 public static boolean append=false; | |
1672 public static final boolean verbose=false; | |
1673 | |
1674 public static boolean skipZeroInsertCount=true; | |
1675 public static boolean skipZeroIndel=true; | |
1676 | |
1677 } |