jpayne@68
|
1 package shared;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.Arrays;
|
jpayne@68
|
5 import java.util.Locale;
|
jpayne@68
|
6
|
jpayne@68
|
7 import align2.QualityTools;
|
jpayne@68
|
8 import dna.AminoAcid;
|
jpayne@68
|
9 import fileIO.ByteStreamWriter;
|
jpayne@68
|
10 import fileIO.TextStreamWriter;
|
jpayne@68
|
11 import stream.Read;
|
jpayne@68
|
12 import stream.SamLine;
|
jpayne@68
|
13 import structures.ByteBuilder;
|
jpayne@68
|
14 import structures.EntropyTracker;
|
jpayne@68
|
15 import structures.LongList;
|
jpayne@68
|
16 import structures.SuperLongList;
|
jpayne@68
|
17
|
jpayne@68
|
18
|
jpayne@68
|
19 /**
|
jpayne@68
|
20 * @author Brian Bushnell
|
jpayne@68
|
21 * @date Mar 18, 2013
|
jpayne@68
|
22 *
|
jpayne@68
|
23 */
|
jpayne@68
|
24 public class ReadStats {
|
jpayne@68
|
25
|
jpayne@68
|
26 public ReadStats(){this(true);}
|
jpayne@68
|
27
|
jpayne@68
|
28 public ReadStats(boolean addToList){
|
jpayne@68
|
29 if(addToList){
|
jpayne@68
|
30 synchronized(ReadStats.class){
|
jpayne@68
|
31 objectList.add(this);
|
jpayne@68
|
32 }
|
jpayne@68
|
33 }
|
jpayne@68
|
34
|
jpayne@68
|
35 if(COLLECT_QUALITY_STATS){
|
jpayne@68
|
36 aqualArray=new long[2][127];
|
jpayne@68
|
37 qualLength=new long[2][MAXLEN];
|
jpayne@68
|
38 qualSum=new long[2][MAXLEN];
|
jpayne@68
|
39 qualSumDouble=new double[2][MAXLEN];
|
jpayne@68
|
40 bqualHistOverall=new long[127];
|
jpayne@68
|
41 }else{
|
jpayne@68
|
42 aqualArray=null;
|
jpayne@68
|
43 qualLength=null;
|
jpayne@68
|
44 qualSum=null;
|
jpayne@68
|
45 qualSumDouble=null;
|
jpayne@68
|
46 bqualHistOverall=null;
|
jpayne@68
|
47 }
|
jpayne@68
|
48
|
jpayne@68
|
49 if(BQUAL_HIST_FILE!=null){
|
jpayne@68
|
50 bqualHist=new long[2][MAXLEN][127];
|
jpayne@68
|
51 }else{
|
jpayne@68
|
52 bqualHist=null;
|
jpayne@68
|
53 }
|
jpayne@68
|
54
|
jpayne@68
|
55 if(QUAL_COUNT_HIST_FILE!=null){
|
jpayne@68
|
56 qcountHist=new long[2][127];
|
jpayne@68
|
57 }else{
|
jpayne@68
|
58 qcountHist=null;
|
jpayne@68
|
59 }
|
jpayne@68
|
60
|
jpayne@68
|
61 if(COLLECT_MATCH_STATS){
|
jpayne@68
|
62 matchSum=new long[2][MAXLEN];
|
jpayne@68
|
63 delSum=new long[2][MAXLEN];
|
jpayne@68
|
64 insSum=new long[2][MAXLEN];
|
jpayne@68
|
65 subSum=new long[2][MAXLEN];
|
jpayne@68
|
66 nSum=new long[2][MAXLEN];
|
jpayne@68
|
67 clipSum=new long[2][MAXLEN];
|
jpayne@68
|
68 otherSum=new long[2][MAXLEN];
|
jpayne@68
|
69 }else{
|
jpayne@68
|
70 matchSum=null;
|
jpayne@68
|
71 delSum=null;
|
jpayne@68
|
72 insSum=null;
|
jpayne@68
|
73 subSum=null;
|
jpayne@68
|
74 nSum=null;
|
jpayne@68
|
75 clipSum=null;
|
jpayne@68
|
76 otherSum=null;
|
jpayne@68
|
77 }
|
jpayne@68
|
78
|
jpayne@68
|
79 if(COLLECT_QUALITY_ACCURACY){
|
jpayne@68
|
80 qualMatch=new long[99];
|
jpayne@68
|
81 qualSub=new long[99];
|
jpayne@68
|
82 qualIns=new long[99];
|
jpayne@68
|
83 qualDel=new long[99];
|
jpayne@68
|
84 }else{
|
jpayne@68
|
85 qualMatch=null;
|
jpayne@68
|
86 qualSub=null;
|
jpayne@68
|
87 qualIns=null;
|
jpayne@68
|
88 qualDel=null;
|
jpayne@68
|
89 }
|
jpayne@68
|
90
|
jpayne@68
|
91 if(COLLECT_INSERT_STATS){
|
jpayne@68
|
92 insertHist=new LongList(MAXLEN);
|
jpayne@68
|
93 }else{
|
jpayne@68
|
94 insertHist=null;
|
jpayne@68
|
95 }
|
jpayne@68
|
96
|
jpayne@68
|
97 if(COLLECT_BASE_STATS){
|
jpayne@68
|
98 baseHist=new LongList[2][5];
|
jpayne@68
|
99 for(int i=0; i<baseHist.length; i++){
|
jpayne@68
|
100 for(int j=0; j<baseHist[i].length; j++){
|
jpayne@68
|
101 baseHist[i][j]=new LongList(400);
|
jpayne@68
|
102 }
|
jpayne@68
|
103 }
|
jpayne@68
|
104 }else{
|
jpayne@68
|
105 baseHist=null;
|
jpayne@68
|
106 }
|
jpayne@68
|
107
|
jpayne@68
|
108
|
jpayne@68
|
109 if(COLLECT_INDEL_STATS){
|
jpayne@68
|
110 insHist=new LongList(100);
|
jpayne@68
|
111 delHist=new LongList(100);
|
jpayne@68
|
112 delHist2=new LongList(100);
|
jpayne@68
|
113 }else{
|
jpayne@68
|
114 insHist=null;
|
jpayne@68
|
115 delHist=null;
|
jpayne@68
|
116 delHist2=null;
|
jpayne@68
|
117 }
|
jpayne@68
|
118
|
jpayne@68
|
119 if(COLLECT_GC_STATS){
|
jpayne@68
|
120 gcHist=new long[GC_BINS+1];
|
jpayne@68
|
121 }else{
|
jpayne@68
|
122 gcHist=null;
|
jpayne@68
|
123 }
|
jpayne@68
|
124
|
jpayne@68
|
125 if(COLLECT_ENTROPY_STATS){
|
jpayne@68
|
126 entropyHist=new long[ENTROPY_BINS+1];
|
jpayne@68
|
127 eTracker=new EntropyTracker(Shared.AMINO_IN, 0, true);
|
jpayne@68
|
128 }else{
|
jpayne@68
|
129 entropyHist=null;
|
jpayne@68
|
130 eTracker=null;
|
jpayne@68
|
131 }
|
jpayne@68
|
132
|
jpayne@68
|
133 if(COLLECT_ERROR_STATS){
|
jpayne@68
|
134 errorHist=new LongList(100);
|
jpayne@68
|
135 }else{
|
jpayne@68
|
136 errorHist=null;
|
jpayne@68
|
137 }
|
jpayne@68
|
138
|
jpayne@68
|
139 if(COLLECT_LENGTH_STATS){
|
jpayne@68
|
140 lengthHist=new SuperLongList(20000);
|
jpayne@68
|
141 }else{
|
jpayne@68
|
142 lengthHist=null;
|
jpayne@68
|
143 }
|
jpayne@68
|
144
|
jpayne@68
|
145 if(COLLECT_IDENTITY_STATS){
|
jpayne@68
|
146 idHist=new long[ID_BINS+1];
|
jpayne@68
|
147 idBaseHist=new long[ID_BINS+1];
|
jpayne@68
|
148 }else{
|
jpayne@68
|
149 idHist=null;
|
jpayne@68
|
150 idBaseHist=null;
|
jpayne@68
|
151 }
|
jpayne@68
|
152
|
jpayne@68
|
153 if(COLLECT_TIME_STATS){
|
jpayne@68
|
154 timeHist=new LongList(1001);
|
jpayne@68
|
155 }else{
|
jpayne@68
|
156 timeHist=null;
|
jpayne@68
|
157 }
|
jpayne@68
|
158
|
jpayne@68
|
159 }
|
jpayne@68
|
160
|
jpayne@68
|
161 public static ReadStats mergeAll(){
|
jpayne@68
|
162 if(objectList==null || objectList.isEmpty()){return merged=null;}
|
jpayne@68
|
163 if(objectList.size()==1){return merged=objectList.get(0);}
|
jpayne@68
|
164
|
jpayne@68
|
165 ReadStats x=new ReadStats(false);
|
jpayne@68
|
166 for(ReadStats rs : objectList){
|
jpayne@68
|
167 x.read2Count+=rs.read2Count;
|
jpayne@68
|
168 if(COLLECT_QUALITY_STATS){
|
jpayne@68
|
169 for(int i=0; i<MAXLEN; i++){
|
jpayne@68
|
170 x.qualLength[0][i]+=rs.qualLength[0][i];
|
jpayne@68
|
171 x.qualLength[1][i]+=rs.qualLength[1][i];
|
jpayne@68
|
172 x.qualSum[0][i]+=rs.qualSum[0][i];
|
jpayne@68
|
173 x.qualSum[1][i]+=rs.qualSum[1][i];
|
jpayne@68
|
174 x.qualSumDouble[0][i]+=rs.qualSumDouble[0][i];
|
jpayne@68
|
175 x.qualSumDouble[1][i]+=rs.qualSumDouble[1][i];
|
jpayne@68
|
176 }
|
jpayne@68
|
177 for(int i=0; i<x.aqualArray[0].length; i++){
|
jpayne@68
|
178 x.aqualArray[0][i]+=rs.aqualArray[0][i];
|
jpayne@68
|
179 x.aqualArray[1][i]+=rs.aqualArray[1][i];
|
jpayne@68
|
180 }
|
jpayne@68
|
181 for(int i=0; i<x.bqualHistOverall.length; i++){
|
jpayne@68
|
182 x.bqualHistOverall[i]+=rs.bqualHistOverall[i];
|
jpayne@68
|
183 }
|
jpayne@68
|
184 if(BQUAL_HIST_FILE!=null){
|
jpayne@68
|
185 for(int i=0; i<x.bqualHist.length; i++){
|
jpayne@68
|
186 for(int j=0; j<x.bqualHist[i].length; j++){
|
jpayne@68
|
187 for(int k=0; k<x.bqualHist[i][j].length; k++){
|
jpayne@68
|
188 x.bqualHist[i][j][k]+=rs.bqualHist[i][j][k];
|
jpayne@68
|
189 }
|
jpayne@68
|
190 }
|
jpayne@68
|
191 }
|
jpayne@68
|
192 }
|
jpayne@68
|
193 if(QUAL_COUNT_HIST_FILE!=null){
|
jpayne@68
|
194 for(int i=0; i<x.qcountHist.length; i++){
|
jpayne@68
|
195 for(int j=0; j<x.qcountHist[i].length; j++){
|
jpayne@68
|
196 x.qcountHist[i][j]+=rs.qcountHist[i][j];
|
jpayne@68
|
197 }
|
jpayne@68
|
198 }
|
jpayne@68
|
199 }
|
jpayne@68
|
200 }
|
jpayne@68
|
201
|
jpayne@68
|
202 if(COLLECT_MATCH_STATS){
|
jpayne@68
|
203 for(int i=0; i<MAXLEN; i++){
|
jpayne@68
|
204 x.matchSum[0][i]+=rs.matchSum[0][i];
|
jpayne@68
|
205 x.matchSum[1][i]+=rs.matchSum[1][i];
|
jpayne@68
|
206 x.delSum[0][i]+=rs.delSum[0][i];
|
jpayne@68
|
207 x.delSum[1][i]+=rs.delSum[1][i];
|
jpayne@68
|
208 x.insSum[0][i]+=rs.insSum[0][i];
|
jpayne@68
|
209 x.insSum[1][i]+=rs.insSum[1][i];
|
jpayne@68
|
210 x.subSum[0][i]+=rs.subSum[0][i];
|
jpayne@68
|
211 x.subSum[1][i]+=rs.subSum[1][i];
|
jpayne@68
|
212 x.nSum[0][i]+=rs.nSum[0][i];
|
jpayne@68
|
213 x.nSum[1][i]+=rs.nSum[1][i];
|
jpayne@68
|
214 x.clipSum[0][i]+=rs.clipSum[0][i];
|
jpayne@68
|
215 x.clipSum[1][i]+=rs.clipSum[1][i];
|
jpayne@68
|
216 x.otherSum[0][i]+=rs.otherSum[0][i];
|
jpayne@68
|
217 x.otherSum[1][i]+=rs.otherSum[1][i];
|
jpayne@68
|
218 }
|
jpayne@68
|
219 }
|
jpayne@68
|
220 if(COLLECT_INSERT_STATS){
|
jpayne@68
|
221 x.insertHist.incrementBy(rs.insertHist);
|
jpayne@68
|
222 x.pairedCount+=rs.pairedCount;
|
jpayne@68
|
223 x.unpairedCount+=rs.unpairedCount;
|
jpayne@68
|
224 }
|
jpayne@68
|
225 if(COLLECT_BASE_STATS){
|
jpayne@68
|
226 for(int i=0; i<rs.baseHist.length; i++){
|
jpayne@68
|
227 for(int j=0; j<rs.baseHist[i].length; j++){
|
jpayne@68
|
228 x.baseHist[i][j].incrementBy(rs.baseHist[i][j]);
|
jpayne@68
|
229 }
|
jpayne@68
|
230 }
|
jpayne@68
|
231 }
|
jpayne@68
|
232 if(COLLECT_QUALITY_ACCURACY){
|
jpayne@68
|
233 for(int i=0; i<x.qualMatch.length; i++){
|
jpayne@68
|
234 x.qualMatch[i]+=rs.qualMatch[i];
|
jpayne@68
|
235 x.qualSub[i]+=rs.qualSub[i];
|
jpayne@68
|
236 x.qualIns[i]+=rs.qualIns[i];
|
jpayne@68
|
237 x.qualDel[i]+=rs.qualDel[i];
|
jpayne@68
|
238 }
|
jpayne@68
|
239 }
|
jpayne@68
|
240
|
jpayne@68
|
241
|
jpayne@68
|
242 if(COLLECT_INDEL_STATS){
|
jpayne@68
|
243 x.delHist.incrementBy(rs.delHist);
|
jpayne@68
|
244 x.delHist2.incrementBy(rs.delHist2);
|
jpayne@68
|
245 x.insHist.incrementBy(rs.insHist);
|
jpayne@68
|
246 }
|
jpayne@68
|
247
|
jpayne@68
|
248 if(COLLECT_LENGTH_STATS){
|
jpayne@68
|
249 x.lengthHist.incrementBy(rs.lengthHist);
|
jpayne@68
|
250 }
|
jpayne@68
|
251
|
jpayne@68
|
252
|
jpayne@68
|
253 if(COLLECT_ERROR_STATS){
|
jpayne@68
|
254 x.errorHist.incrementBy(rs.errorHist);
|
jpayne@68
|
255 }
|
jpayne@68
|
256
|
jpayne@68
|
257 if(COLLECT_GC_STATS){
|
jpayne@68
|
258 for(int i=0; i<rs.gcHist.length; i++){
|
jpayne@68
|
259 x.gcHist[i]+=rs.gcHist[i];
|
jpayne@68
|
260 }
|
jpayne@68
|
261 }
|
jpayne@68
|
262
|
jpayne@68
|
263 if(COLLECT_ENTROPY_STATS){
|
jpayne@68
|
264 for(int i=0; i<rs.entropyHist.length; i++){
|
jpayne@68
|
265 x.entropyHist[i]+=rs.entropyHist[i];
|
jpayne@68
|
266 }
|
jpayne@68
|
267 }
|
jpayne@68
|
268
|
jpayne@68
|
269 if(COLLECT_IDENTITY_STATS){
|
jpayne@68
|
270 for(int i=0; i<rs.idHist.length; i++){
|
jpayne@68
|
271 x.idHist[i]+=rs.idHist[i];
|
jpayne@68
|
272 x.idBaseHist[i]+=rs.idBaseHist[i];
|
jpayne@68
|
273 }
|
jpayne@68
|
274 }
|
jpayne@68
|
275
|
jpayne@68
|
276 if(COLLECT_TIME_STATS){
|
jpayne@68
|
277 x.timeHist.incrementBy(rs.timeHist);
|
jpayne@68
|
278 }
|
jpayne@68
|
279
|
jpayne@68
|
280 x.gcMaxReadLen=Tools.max(x.gcMaxReadLen, rs.gcMaxReadLen);
|
jpayne@68
|
281 x.idMaxReadLen=Tools.max(x.idMaxReadLen, rs.idMaxReadLen);
|
jpayne@68
|
282 }
|
jpayne@68
|
283
|
jpayne@68
|
284 merged=x;
|
jpayne@68
|
285 return x;
|
jpayne@68
|
286 }
|
jpayne@68
|
287
|
jpayne@68
|
288 public void addToQualityHistogram(final Read r){
|
jpayne@68
|
289 if(r==null){return;}
|
jpayne@68
|
290 addToQualityHistogram2(r);
|
jpayne@68
|
291 if(r.mate!=null){addToQualityHistogram2(r.mate);}
|
jpayne@68
|
292 }
|
jpayne@68
|
293
|
jpayne@68
|
294 private void addToQualityHistogram2(final Read r){
|
jpayne@68
|
295 final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum();
|
jpayne@68
|
296 if(r==null || r.quality==null || r.quality.length<1){return;}
|
jpayne@68
|
297 byte[] quals=r.quality, bases=r.bases;
|
jpayne@68
|
298 final Object obj=r.obj;
|
jpayne@68
|
299 if(obj!=null && obj.getClass()==TrimRead.class){
|
jpayne@68
|
300 quals=(pairnum==0 ? ((TrimRead)obj).qual1 : ((TrimRead)obj).qual2);
|
jpayne@68
|
301 bases=(pairnum==0 ? ((TrimRead)obj).bases1 : ((TrimRead)obj).bases2);
|
jpayne@68
|
302 }
|
jpayne@68
|
303 if(pairnum==1){read2Count++;}
|
jpayne@68
|
304 addToQualityHistogram(quals, pairnum);
|
jpayne@68
|
305 int x=Read.avgQualityByProbabilityInt(bases, quals, true, 0);
|
jpayne@68
|
306 aqualArray[pairnum][x]++;
|
jpayne@68
|
307 if(BQUAL_HIST_FILE!=null){
|
jpayne@68
|
308 addToBQualityHistogram(quals, pairnum);
|
jpayne@68
|
309 }
|
jpayne@68
|
310 if(QUAL_COUNT_HIST_FILE!=null){
|
jpayne@68
|
311 addToQCountHistogram(quals, pairnum);
|
jpayne@68
|
312 }
|
jpayne@68
|
313 }
|
jpayne@68
|
314
|
jpayne@68
|
315 public void addToQualityHistogram(byte[] qual, int pairnum){
|
jpayne@68
|
316 if(qual==null || qual.length<1){return;}
|
jpayne@68
|
317 final int limit=Tools.min(qual.length, MAXLEN);
|
jpayne@68
|
318 final long[] ql=qualLength[pairnum], qs=qualSum[pairnum];
|
jpayne@68
|
319 final double[] qsd=qualSumDouble[pairnum];
|
jpayne@68
|
320 ql[limit-1]++;
|
jpayne@68
|
321 for(int i=0; i<limit; i++){
|
jpayne@68
|
322 qs[i]+=qual[i];
|
jpayne@68
|
323 qsd[i]+=QualityTools.PROB_ERROR[qual[i]];
|
jpayne@68
|
324 }
|
jpayne@68
|
325 for(byte q : qual){
|
jpayne@68
|
326 bqualHistOverall[q]++;
|
jpayne@68
|
327 }
|
jpayne@68
|
328 }
|
jpayne@68
|
329
|
jpayne@68
|
330 private void addToBQualityHistogram(byte[] qual, int pairnum){
|
jpayne@68
|
331 if(qual==null || qual.length<1){return;}
|
jpayne@68
|
332 final int limit=Tools.min(qual.length, MAXLEN);
|
jpayne@68
|
333 final long[][] bqh=bqualHist[pairnum];
|
jpayne@68
|
334 for(int i=0; i<limit; i++){
|
jpayne@68
|
335 bqh[i][qual[i]]++;
|
jpayne@68
|
336 }
|
jpayne@68
|
337 }
|
jpayne@68
|
338
|
jpayne@68
|
339 private void addToQCountHistogram(byte[] qual, int pairnum){
|
jpayne@68
|
340 if(qual==null || qual.length<1){return;}
|
jpayne@68
|
341 final long[] qch=qcountHist[pairnum];
|
jpayne@68
|
342 for(byte q : qual){
|
jpayne@68
|
343 qch[q]++;
|
jpayne@68
|
344 }
|
jpayne@68
|
345 }
|
jpayne@68
|
346
|
jpayne@68
|
347 public void addToQualityAccuracy(final Read r){
|
jpayne@68
|
348 if(r==null){return;}
|
jpayne@68
|
349 addToQualityAccuracy(r, 0);
|
jpayne@68
|
350 if(r.mate!=null){addToQualityAccuracy(r.mate, 1);}
|
jpayne@68
|
351 }
|
jpayne@68
|
352
|
jpayne@68
|
353 public void addToQualityAccuracy(final Read r, int pairnum){
|
jpayne@68
|
354 if(r==null || r.quality==null || r.quality.length<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;}
|
jpayne@68
|
355 final byte[] bases=r.bases;
|
jpayne@68
|
356 final byte[] qual=r.quality;
|
jpayne@68
|
357 byte[] match=r.match;
|
jpayne@68
|
358
|
jpayne@68
|
359 if(r.shortmatch()){match=Read.toLongMatchString(match);}
|
jpayne@68
|
360
|
jpayne@68
|
361 final boolean plus=(r.strand()==0);
|
jpayne@68
|
362 int bpos=0;
|
jpayne@68
|
363 byte lastm='A';
|
jpayne@68
|
364 for(int mpos=0; mpos<match.length/* && rpos<limit*/; mpos++){
|
jpayne@68
|
365 byte b=bases[bpos];
|
jpayne@68
|
366 byte q=qual[bpos];
|
jpayne@68
|
367 byte m=match[plus ? mpos : match.length-mpos-1];
|
jpayne@68
|
368
|
jpayne@68
|
369 {
|
jpayne@68
|
370 if(m=='m'){
|
jpayne@68
|
371 qualMatch[q]++;
|
jpayne@68
|
372 }else if(m=='S'){
|
jpayne@68
|
373 qualSub[q]++;
|
jpayne@68
|
374 }else if(m=='I'){
|
jpayne@68
|
375 if(AminoAcid.isFullyDefined(b)){qualIns[q]++;}
|
jpayne@68
|
376 }else if(m=='N'){
|
jpayne@68
|
377 //do nothing
|
jpayne@68
|
378 }else if(m=='C'){
|
jpayne@68
|
379 //do nothing
|
jpayne@68
|
380 }else if(m=='V' || m=='i'){
|
jpayne@68
|
381 //do nothing
|
jpayne@68
|
382 }else if(m=='D'){
|
jpayne@68
|
383 if(lastm!=m){
|
jpayne@68
|
384 int x=bpos, y=bpos-1;
|
jpayne@68
|
385 if(x<qual.length){
|
jpayne@68
|
386 if(AminoAcid.isFullyDefined(bases[x])){
|
jpayne@68
|
387 qualDel[qual[x]]++;
|
jpayne@68
|
388 }
|
jpayne@68
|
389 }
|
jpayne@68
|
390 if(y>=0){
|
jpayne@68
|
391 if(AminoAcid.isFullyDefined(bases[y])){
|
jpayne@68
|
392 qualDel[qual[y]]++;
|
jpayne@68
|
393 }
|
jpayne@68
|
394 }
|
jpayne@68
|
395 }
|
jpayne@68
|
396 bpos--;
|
jpayne@68
|
397 }else if(m=='d'){
|
jpayne@68
|
398 bpos--;
|
jpayne@68
|
399 }else{
|
jpayne@68
|
400 assert(!Tools.isDigit(m)) : ((char)m);
|
jpayne@68
|
401 }
|
jpayne@68
|
402 }
|
jpayne@68
|
403
|
jpayne@68
|
404 bpos++;
|
jpayne@68
|
405 lastm=m;
|
jpayne@68
|
406 }
|
jpayne@68
|
407
|
jpayne@68
|
408 }
|
jpayne@68
|
409
|
jpayne@68
|
410 public void addToErrorHistogram(final Read r){
|
jpayne@68
|
411 if(r==null){return;}
|
jpayne@68
|
412 addToErrorHistogram(r, 0);
|
jpayne@68
|
413 if(r.mate!=null){addToErrorHistogram(r.mate, 1);}
|
jpayne@68
|
414 }
|
jpayne@68
|
415
|
jpayne@68
|
416 private void addToErrorHistogram(final Read r, int pairnum){
|
jpayne@68
|
417 if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;}
|
jpayne@68
|
418 r.toLongMatchString(false);
|
jpayne@68
|
419 int x=r.countSubs();
|
jpayne@68
|
420 errorHist.increment(x, 1);
|
jpayne@68
|
421 }
|
jpayne@68
|
422
|
jpayne@68
|
423 public void addToLengthHistogram(final Read r){
|
jpayne@68
|
424 if(r==null){return;}
|
jpayne@68
|
425 addToLengthHistogram(r, 0);
|
jpayne@68
|
426 if(r.mate!=null){addToLengthHistogram(r.mate, 1);}
|
jpayne@68
|
427 }
|
jpayne@68
|
428
|
jpayne@68
|
429 private void addToLengthHistogram(final Read r, int pairnum){
|
jpayne@68
|
430 if(r==null || r.bases==null){return;}
|
jpayne@68
|
431 int x=r.length();//Tools.min(r.length(), MAXLENGTHLEN); Old style before SLL
|
jpayne@68
|
432 lengthHist.increment(x, 1);
|
jpayne@68
|
433 }
|
jpayne@68
|
434
|
jpayne@68
|
435 public void addToGCHistogram(final Read r1){
|
jpayne@68
|
436 if(r1==null){return;}
|
jpayne@68
|
437 final Read r2=r1.mate;
|
jpayne@68
|
438 final int len1=r1.length(), len2=r1.mateLength();
|
jpayne@68
|
439
|
jpayne@68
|
440 final float gc1=(len1>0 ? r1.gc() : -1);
|
jpayne@68
|
441 final float gc2=(len2>0 ? r2.gc() : -1);
|
jpayne@68
|
442 if(usePairGC){
|
jpayne@68
|
443 final float gc;
|
jpayne@68
|
444 if(r2==null){
|
jpayne@68
|
445 gc=gc1;
|
jpayne@68
|
446 }else{
|
jpayne@68
|
447 gc=(gc1*len1+gc2*len2)/(len1+len2);
|
jpayne@68
|
448 }
|
jpayne@68
|
449 addToGCHistogram(gc, len1+len2);
|
jpayne@68
|
450 }else{
|
jpayne@68
|
451 addToGCHistogram(gc1, len1);
|
jpayne@68
|
452 addToGCHistogram(gc2, len2);
|
jpayne@68
|
453 }
|
jpayne@68
|
454 }
|
jpayne@68
|
455
|
jpayne@68
|
456 private void addToGCHistogram(final float gc, final int len){
|
jpayne@68
|
457 if(gc<0 || len<1){return;}
|
jpayne@68
|
458 gcHist[Tools.min(GC_BINS, (int)(gc*(GC_BINS+1)))]++;
|
jpayne@68
|
459 gcMaxReadLen=Tools.max(len, gcMaxReadLen);
|
jpayne@68
|
460 }
|
jpayne@68
|
461
|
jpayne@68
|
462 public void addToEntropyHistogram(final Read r1){
|
jpayne@68
|
463 if(r1==null){return;}
|
jpayne@68
|
464 final Read r2=r1.mate;
|
jpayne@68
|
465 final int len1=r1.length(), len2=r1.mateLength();
|
jpayne@68
|
466
|
jpayne@68
|
467 final float entropy1=(len1>0 ? eTracker.averageEntropy(r1.bases, allowEntropyNs) : -1);
|
jpayne@68
|
468 final float entropy2=(len2>0 ? eTracker.averageEntropy(r2.bases, allowEntropyNs) : -1);
|
jpayne@68
|
469 if(/* usePairEntropy */ false){
|
jpayne@68
|
470 final float entropy;
|
jpayne@68
|
471 if(r2==null){
|
jpayne@68
|
472 entropy=entropy1;
|
jpayne@68
|
473 }else{
|
jpayne@68
|
474 entropy=(entropy1*len1+entropy2*len2)/(len1+len2);
|
jpayne@68
|
475 }
|
jpayne@68
|
476 addToEntropyHistogram(entropy, len1+len2);
|
jpayne@68
|
477 }else{
|
jpayne@68
|
478 addToEntropyHistogram(entropy1, len1);
|
jpayne@68
|
479 addToEntropyHistogram(entropy2, len2);
|
jpayne@68
|
480 }
|
jpayne@68
|
481 }
|
jpayne@68
|
482
|
jpayne@68
|
483 private void addToEntropyHistogram(final float entropy, final int len){
|
jpayne@68
|
484 if(entropy<0 || len<1){return;}
|
jpayne@68
|
485 entropyHist[Tools.min(ENTROPY_BINS, (int)(entropy*(ENTROPY_BINS+1)))]++;
|
jpayne@68
|
486 }
|
jpayne@68
|
487
|
jpayne@68
|
488 public void addToIdentityHistogram(final Read r){
|
jpayne@68
|
489 if(r==null){return;}
|
jpayne@68
|
490 addToIdentityHistogram(r, 0);
|
jpayne@68
|
491 if(r.mate!=null){addToIdentityHistogram(r.mate, 1);}
|
jpayne@68
|
492 }
|
jpayne@68
|
493
|
jpayne@68
|
494 private void addToIdentityHistogram(final Read r, int pairnum){
|
jpayne@68
|
495 if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;}
|
jpayne@68
|
496 float id=r.identity();
|
jpayne@68
|
497 idHist[(int)(id*ID_BINS)]++;
|
jpayne@68
|
498 idBaseHist[(int)(id*ID_BINS)]+=r.length();
|
jpayne@68
|
499 idMaxReadLen=Tools.max(r.length(), idMaxReadLen);
|
jpayne@68
|
500 }
|
jpayne@68
|
501
|
jpayne@68
|
502 public void addToTimeHistogram(final Read r){
|
jpayne@68
|
503 if(r==null){return;}
|
jpayne@68
|
504 addToTimeHistogram(r, 0);//Time for pairs is the same.
|
jpayne@68
|
505 }
|
jpayne@68
|
506
|
jpayne@68
|
507 private void addToTimeHistogram(final Read r, int pairnum){
|
jpayne@68
|
508 if(r==null){return;}
|
jpayne@68
|
509 assert(r.obj!=null && r.obj.getClass()==Long.class);
|
jpayne@68
|
510 int x=(int)Tools.min(((Long)r.obj).longValue(), MAXTIMELEN);
|
jpayne@68
|
511 timeHist.increment(x, 1);
|
jpayne@68
|
512 }
|
jpayne@68
|
513
|
jpayne@68
|
514 public boolean addToIndelHistogram(final Read r){
|
jpayne@68
|
515 if(r==null){return false;}
|
jpayne@68
|
516 boolean success=addToIndelHistogram(r, 0);
|
jpayne@68
|
517 if(r.mate!=null){success=addToIndelHistogram(r.mate, 1)|success;}
|
jpayne@68
|
518 return success;
|
jpayne@68
|
519 }
|
jpayne@68
|
520
|
jpayne@68
|
521 /** Handles short match, long match, and reads with attached SamLines */
|
jpayne@68
|
522 private boolean addToIndelHistogram(final Read r, int pairnum){
|
jpayne@68
|
523 if(r==null || !r.mapped()){return false;}
|
jpayne@68
|
524 if(r.samline!=null){
|
jpayne@68
|
525 boolean success=addToIndelHistogram(r.samline);
|
jpayne@68
|
526 if(success){return true;}
|
jpayne@68
|
527 }
|
jpayne@68
|
528 if(r.match==null/* || r.discarded()*/){return false;}
|
jpayne@68
|
529 final byte[] match=r.match;
|
jpayne@68
|
530
|
jpayne@68
|
531 byte lastLetter='?';
|
jpayne@68
|
532 boolean digit=false;
|
jpayne@68
|
533 int streak=0;
|
jpayne@68
|
534 for(int mpos=0; mpos<match.length; mpos++){
|
jpayne@68
|
535 final byte m=match[mpos];
|
jpayne@68
|
536
|
jpayne@68
|
537 if(Tools.isDigit(m)){
|
jpayne@68
|
538 streak=streak*10+m-'0';
|
jpayne@68
|
539 digit=true;
|
jpayne@68
|
540 }else{
|
jpayne@68
|
541 if(lastLetter==m){
|
jpayne@68
|
542 streak++;
|
jpayne@68
|
543 }else{
|
jpayne@68
|
544 // assert(streak>0 || (streak==0 && lastm=='?'));
|
jpayne@68
|
545 if(!digit){streak++;}
|
jpayne@68
|
546 digit=false;
|
jpayne@68
|
547 if(lastLetter=='D'){
|
jpayne@68
|
548 streak=Tools.min(streak, MAXDELLEN2);
|
jpayne@68
|
549 if(streak<MAXDELLEN){delHist.increment(streak, 1);}
|
jpayne@68
|
550 delHist2.increment(streak/100, 1);
|
jpayne@68
|
551 // System.err.println("A. Del: "+streak+", "+MAXDELLEN+", "+MAXDELLEN2);
|
jpayne@68
|
552 }else if(lastLetter=='I'){
|
jpayne@68
|
553 streak=Tools.min(streak, MAXINSLEN);
|
jpayne@68
|
554 insHist.increment(streak, 1);
|
jpayne@68
|
555 // System.err.println("A. Ins: "+streak+", "+MAXINSLEN);
|
jpayne@68
|
556 }
|
jpayne@68
|
557 streak=0;
|
jpayne@68
|
558 }
|
jpayne@68
|
559 lastLetter=m;
|
jpayne@68
|
560 }
|
jpayne@68
|
561 }
|
jpayne@68
|
562
|
jpayne@68
|
563 {//Final symbol
|
jpayne@68
|
564 if(!digit){streak++;}
|
jpayne@68
|
565 digit=false;
|
jpayne@68
|
566 if(lastLetter=='D'){
|
jpayne@68
|
567 streak=Tools.min(streak, MAXDELLEN2);
|
jpayne@68
|
568 if(streak<MAXDELLEN){delHist.increment(streak, 1);}
|
jpayne@68
|
569 delHist2.increment(streak/100, 1);
|
jpayne@68
|
570 // System.err.println("A. Del: "+streak+", "+MAXDELLEN+", "+MAXDELLEN2);
|
jpayne@68
|
571 }else if(lastLetter=='I'){
|
jpayne@68
|
572 streak=Tools.min(streak, MAXINSLEN);
|
jpayne@68
|
573 insHist.increment(streak, 1);
|
jpayne@68
|
574 // System.err.println("A. Ins: "+streak+", "+MAXINSLEN);
|
jpayne@68
|
575 }
|
jpayne@68
|
576 streak=0;
|
jpayne@68
|
577 }
|
jpayne@68
|
578 return true;
|
jpayne@68
|
579 }
|
jpayne@68
|
580
|
jpayne@68
|
581 private boolean addToIndelHistogram(SamLine sl){
|
jpayne@68
|
582 if(sl==null || sl.cigar==null || !sl.mapped()){
|
jpayne@68
|
583 return false;
|
jpayne@68
|
584 }
|
jpayne@68
|
585 final String cigar=sl.cigar;
|
jpayne@68
|
586 // final int pairnum=sl.pairnum();
|
jpayne@68
|
587
|
jpayne@68
|
588 int count=0;
|
jpayne@68
|
589 for(int cpos=0; cpos<cigar.length(); cpos++){
|
jpayne@68
|
590 final char c=cigar.charAt(cpos);
|
jpayne@68
|
591
|
jpayne@68
|
592 if(Tools.isDigit(c)){
|
jpayne@68
|
593 count=count*10+c-'0';
|
jpayne@68
|
594 }else{
|
jpayne@68
|
595 if(c=='I'){
|
jpayne@68
|
596 int streak=Tools.min(count, MAXINSLEN);
|
jpayne@68
|
597 insHist.increment(streak, 1);
|
jpayne@68
|
598 // System.err.println("A. Ins: "+streak+", "+MAXINSLEN);
|
jpayne@68
|
599 }else if(c=='D'){
|
jpayne@68
|
600 int streak=Tools.min(count, MAXDELLEN2);
|
jpayne@68
|
601 if(streak<MAXDELLEN){delHist.increment(streak, 1);}
|
jpayne@68
|
602 delHist2.increment(streak/100, 1);
|
jpayne@68
|
603 // System.err.println("A. Del: "+streak+", "+MAXDELLEN+", "+MAXDELLEN2);
|
jpayne@68
|
604 }else{
|
jpayne@68
|
605 //Ignore
|
jpayne@68
|
606 }
|
jpayne@68
|
607 count=0;
|
jpayne@68
|
608 }
|
jpayne@68
|
609 }
|
jpayne@68
|
610 assert(count==0) : count;
|
jpayne@68
|
611 return true;
|
jpayne@68
|
612 }
|
jpayne@68
|
613
|
jpayne@68
|
614 public void addToMatchHistogram(final Read r){
|
jpayne@68
|
615 if(r==null){return;}
|
jpayne@68
|
616 addToMatchHistogram2(r);
|
jpayne@68
|
617 if(r.mate!=null){addToMatchHistogram2(r.mate);}
|
jpayne@68
|
618 }
|
jpayne@68
|
619
|
jpayne@68
|
620 private void addToMatchHistogram2(final Read r){
|
jpayne@68
|
621 if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;}
|
jpayne@68
|
622 final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum();
|
jpayne@68
|
623 if(pairnum==1){read2Count++;}
|
jpayne@68
|
624 final byte[] bases=r.bases;
|
jpayne@68
|
625 final int limit=Tools.min(bases.length, MAXLEN);
|
jpayne@68
|
626 final long[] ms=matchSum[pairnum], ds=delSum[pairnum], is=insSum[pairnum],
|
jpayne@68
|
627 ss=subSum[pairnum], ns=nSum[pairnum], cs=clipSum[pairnum], os=otherSum[pairnum];
|
jpayne@68
|
628
|
jpayne@68
|
629 byte[] match=r.match;
|
jpayne@68
|
630 if(r.shortmatch() && match!=null){match=Read.toLongMatchString(match);}
|
jpayne@68
|
631
|
jpayne@68
|
632 if(match==null){
|
jpayne@68
|
633 for(int i=0; i<limit; i++){
|
jpayne@68
|
634 byte b=bases[i];
|
jpayne@68
|
635 if(b=='N'){ns[i]++;}
|
jpayne@68
|
636 else{os[i]++;}
|
jpayne@68
|
637 }
|
jpayne@68
|
638 }else{
|
jpayne@68
|
639 final boolean plus=(r.strand()==0);
|
jpayne@68
|
640 int bpos=0;
|
jpayne@68
|
641 byte lastm='A';
|
jpayne@68
|
642 for(int mpos=0; mpos<match.length && bpos<limit; mpos++){
|
jpayne@68
|
643 byte b=bases[bpos];//bases[plus ? rpos : bases.length-rpos-1];
|
jpayne@68
|
644 byte m=match[plus ? mpos : match.length-mpos-1];//match[mpos];
|
jpayne@68
|
645 if(b=='N'){
|
jpayne@68
|
646 if(m=='D'){
|
jpayne@68
|
647 if(lastm!=m){ds[bpos]++;}
|
jpayne@68
|
648 bpos--;
|
jpayne@68
|
649 }else{ns[bpos]++;}
|
jpayne@68
|
650 }else{
|
jpayne@68
|
651 if(m=='m'){
|
jpayne@68
|
652 ms[bpos]++;
|
jpayne@68
|
653 }else if(m=='S'){
|
jpayne@68
|
654 ss[bpos]++;
|
jpayne@68
|
655 }else if(m=='I'){
|
jpayne@68
|
656 is[bpos]++;
|
jpayne@68
|
657 }else if(m=='N' || m=='V'){
|
jpayne@68
|
658 // assert(false) : "\n"+r+"\n"+new String(Data.getChromosome(r.chrom).getBytes(r.start, r.stop))+"\nrpos="+rpos+", mpos="+mpos;
|
jpayne@68
|
659 os[bpos]++;
|
jpayne@68
|
660 }else if(m=='C'){
|
jpayne@68
|
661 // assert(false) : r;
|
jpayne@68
|
662 cs[bpos]++;
|
jpayne@68
|
663 }else if(m=='D'){
|
jpayne@68
|
664 if(lastm!=m){ds[bpos]++;}
|
jpayne@68
|
665 bpos--;
|
jpayne@68
|
666 }else if(m=='i'){
|
jpayne@68
|
667 os[bpos]++;
|
jpayne@68
|
668 }else if(m=='d'){
|
jpayne@68
|
669 bpos--;
|
jpayne@68
|
670 }else{
|
jpayne@68
|
671 os[bpos]++;
|
jpayne@68
|
672 assert(false) : "For read "+r.numericID+", unknown symbol in match string: ASCII "+m+" = "+(char)m;
|
jpayne@68
|
673 }
|
jpayne@68
|
674 }
|
jpayne@68
|
675 bpos++;
|
jpayne@68
|
676 lastm=m;
|
jpayne@68
|
677 }
|
jpayne@68
|
678 }
|
jpayne@68
|
679 }
|
jpayne@68
|
680
|
jpayne@68
|
681 public void addToInsertHistogram(final Read r, boolean ignoreMappingStrand){
|
jpayne@68
|
682 if(verbose){
|
jpayne@68
|
683 System.err.print(r.numericID);
|
jpayne@68
|
684 if(r==null || r.mate==null || !r.mapped() || !r.mate.mapped() || !r.paired()){
|
jpayne@68
|
685 System.err.println("\n");
|
jpayne@68
|
686 }else{
|
jpayne@68
|
687 System.err.println("\t"+r.strand()+"\t"+r.insertSizeMapped(ignoreMappingStrand)+"\t"+r.mate.insertSizeMapped(ignoreMappingStrand));
|
jpayne@68
|
688 }
|
jpayne@68
|
689 }
|
jpayne@68
|
690 if(r==null || r.mate==null || !r.mapped() || !r.mate.mapped() || !r.paired()){
|
jpayne@68
|
691 unpairedCount++;
|
jpayne@68
|
692 return;
|
jpayne@68
|
693 }
|
jpayne@68
|
694 int x=Tools.min(MAXINSERTLEN, r.insertSizeMapped(ignoreMappingStrand));
|
jpayne@68
|
695 if(x>0){
|
jpayne@68
|
696 insertHist.increment(x, 1);
|
jpayne@68
|
697 pairedCount++;
|
jpayne@68
|
698 }else{
|
jpayne@68
|
699 unpairedCount++;
|
jpayne@68
|
700 }
|
jpayne@68
|
701 // assert(x!=1) : "\n"+r+"\n\n"+r.mate+"\n";
|
jpayne@68
|
702 // System.out.println("Incrementing "+x);
|
jpayne@68
|
703 }
|
jpayne@68
|
704
|
jpayne@68
|
705 public void addToInsertHistogram(final SamLine r1){
|
jpayne@68
|
706 int x=r1.tlen;
|
jpayne@68
|
707 if(x<0) {x=-x;}
|
jpayne@68
|
708 x=Tools.min(MAXINSERTLEN, x);
|
jpayne@68
|
709 if(r1.pairedOnSameChrom() && x>0){
|
jpayne@68
|
710 pairedCount++;
|
jpayne@68
|
711 insertHist.increment(x, 1);
|
jpayne@68
|
712 }else {
|
jpayne@68
|
713 unpairedCount++;
|
jpayne@68
|
714 }
|
jpayne@68
|
715 }
|
jpayne@68
|
716
|
jpayne@68
|
717 public void addToInsertHistogram(final SamLine r1, final SamLine r2){
|
jpayne@68
|
718 if(r1==null){return;}
|
jpayne@68
|
719 int x=insertSizeMapped(r1, r2, REQUIRE_PROPER_PAIR);
|
jpayne@68
|
720 if(verbose){
|
jpayne@68
|
721 System.err.println(r1.qname+"\t"+x);
|
jpayne@68
|
722 }
|
jpayne@68
|
723 x=Tools.min(MAXINSERTLEN, x);
|
jpayne@68
|
724 if(x>0){
|
jpayne@68
|
725 insertHist.increment(x, 1);
|
jpayne@68
|
726 pairedCount++;
|
jpayne@68
|
727 }else{
|
jpayne@68
|
728 unpairedCount++;
|
jpayne@68
|
729 }
|
jpayne@68
|
730 }
|
jpayne@68
|
731
|
jpayne@68
|
732 /** This is untested and only gives approximate answers when overlapping reads contain indels.
|
jpayne@68
|
733 * It may give incorrect answers for same-strange pairs that are shorter than read length.
|
jpayne@68
|
734 * It might give negative answers but that would be a bug. */
|
jpayne@68
|
735 public static int insertSizeMapped(SamLine r1, SamLine r2, boolean requireProperPair){
|
jpayne@68
|
736 if(r2==null){return r1.length();}
|
jpayne@68
|
737 if(!r1.mapped() || !r2.mapped() || !r1.pairedOnSameChrom() || (requireProperPair && !r1.properPair())){
|
jpayne@68
|
738 return -1;
|
jpayne@68
|
739 }
|
jpayne@68
|
740
|
jpayne@68
|
741 int a1=r1.start(true, false);
|
jpayne@68
|
742 int a2=r2.start(true, false);
|
jpayne@68
|
743
|
jpayne@68
|
744 if(r1.strand()!=r2.strand()){
|
jpayne@68
|
745 if(r1.strand()==1){return insertSizeMapped(r2, r1, requireProperPair);}
|
jpayne@68
|
746 }else if(a1>a2){
|
jpayne@68
|
747 return insertSizeMapped(r2, r1, requireProperPair);
|
jpayne@68
|
748 }
|
jpayne@68
|
749
|
jpayne@68
|
750 int b1=r1.stop(a1, true, false);
|
jpayne@68
|
751 int b2=r2.stop(a2, true, false);
|
jpayne@68
|
752
|
jpayne@68
|
753 int clen1=r1.calcCigarLength(true, false);
|
jpayne@68
|
754 int clen2=r2.calcCigarLength(true, false);
|
jpayne@68
|
755
|
jpayne@68
|
756 int mlen1=b1-a1+1;
|
jpayne@68
|
757 int mlen2=b2-a2+1;
|
jpayne@68
|
758
|
jpayne@68
|
759 int dif1=mlen1-clen1;
|
jpayne@68
|
760 int dif2=mlen2-clen2;
|
jpayne@68
|
761
|
jpayne@68
|
762 int mlen12=b2-a1+1;
|
jpayne@68
|
763
|
jpayne@68
|
764 if(Tools.overlap(a1, b1, a2, b2)){//hard case
|
jpayne@68
|
765 return mlen12-Tools.max(dif1, dif2); //Approximate
|
jpayne@68
|
766 }else{//easy case
|
jpayne@68
|
767 return mlen12-dif1-dif2;
|
jpayne@68
|
768 }
|
jpayne@68
|
769 }
|
jpayne@68
|
770
|
jpayne@68
|
771 public void addToBaseHistogram(final Read r){
|
jpayne@68
|
772 addToBaseHistogram2(r);
|
jpayne@68
|
773 if(r.mate!=null){addToBaseHistogram2(r.mate);}
|
jpayne@68
|
774 }
|
jpayne@68
|
775
|
jpayne@68
|
776 public void addToBaseHistogram2(final Read r){
|
jpayne@68
|
777 if(r==null || r.bases==null){return;}
|
jpayne@68
|
778 final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum();
|
jpayne@68
|
779 if(pairnum==1){read2Count++;}
|
jpayne@68
|
780 final byte[] bases=r.bases;
|
jpayne@68
|
781 final LongList[] lists=baseHist[pairnum];
|
jpayne@68
|
782 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
783 byte b=bases[i];
|
jpayne@68
|
784 int x=AminoAcid.baseToNumber[b]+1;
|
jpayne@68
|
785 lists[x].increment(i, 1);
|
jpayne@68
|
786 }
|
jpayne@68
|
787 }
|
jpayne@68
|
788
|
jpayne@68
|
789 public static boolean testFiles(boolean allowDuplicates){
|
jpayne@68
|
790 return Tools.testOutputFiles(overwrite, append, allowDuplicates,
|
jpayne@68
|
791 AVG_QUAL_HIST_FILE, QUAL_HIST_FILE, BQUAL_HIST_FILE, BQUAL_HIST_OVERALL_FILE, QUAL_COUNT_HIST_FILE,
|
jpayne@68
|
792 MATCH_HIST_FILE, INSERT_HIST_FILE, BASE_HIST_FILE, QUAL_ACCURACY_FILE, INDEL_HIST_FILE, ERROR_HIST_FILE, LENGTH_HIST_FILE,
|
jpayne@68
|
793 GC_HIST_FILE, ENTROPY_HIST_FILE, IDENTITY_HIST_FILE, TIME_HIST_FILE);
|
jpayne@68
|
794 }
|
jpayne@68
|
795
|
jpayne@68
|
796 public static boolean writeAll(){
|
jpayne@68
|
797 if(collectingStats()){
|
jpayne@68
|
798 ReadStats rs=mergeAll();
|
jpayne@68
|
799 boolean paired=rs.read2Count>0;
|
jpayne@68
|
800
|
jpayne@68
|
801 if(AVG_QUAL_HIST_FILE!=null){rs.writeAverageQualityToFile(AVG_QUAL_HIST_FILE, paired);}
|
jpayne@68
|
802 if(QUAL_HIST_FILE!=null){rs.writeQualityToFile(QUAL_HIST_FILE, paired);}
|
jpayne@68
|
803 if(BQUAL_HIST_FILE!=null){rs.writeBQualityToFile(BQUAL_HIST_FILE, paired);}
|
jpayne@68
|
804 if(BQUAL_HIST_OVERALL_FILE!=null){rs.writeBQualityOverallToFile(BQUAL_HIST_OVERALL_FILE);}
|
jpayne@68
|
805 if(QUAL_COUNT_HIST_FILE!=null){rs.writeQCountToFile(QUAL_COUNT_HIST_FILE, paired);}
|
jpayne@68
|
806 if(MATCH_HIST_FILE!=null){rs.writeMatchToFile(MATCH_HIST_FILE, paired);}
|
jpayne@68
|
807 if(INSERT_HIST_FILE!=null){rs.writeInsertToFile(INSERT_HIST_FILE);}
|
jpayne@68
|
808 if(BASE_HIST_FILE!=null){rs.writeBaseContentToFile(BASE_HIST_FILE, paired);}
|
jpayne@68
|
809 if(QUAL_ACCURACY_FILE!=null){rs.writeQualityAccuracyToFile(QUAL_ACCURACY_FILE);}
|
jpayne@68
|
810
|
jpayne@68
|
811 if(INDEL_HIST_FILE!=null){rs.writeIndelToFile(INDEL_HIST_FILE);}
|
jpayne@68
|
812 if(ERROR_HIST_FILE!=null){rs.writeErrorToFile(ERROR_HIST_FILE);}
|
jpayne@68
|
813 if(LENGTH_HIST_FILE!=null){rs.writeLengthToFile(LENGTH_HIST_FILE);}
|
jpayne@68
|
814 if(GC_HIST_FILE!=null){rs.writeGCToFile(GC_HIST_FILE, true);}
|
jpayne@68
|
815 if(ENTROPY_HIST_FILE!=null){rs.writeEntropyToFile(ENTROPY_HIST_FILE, true);}
|
jpayne@68
|
816 if(IDENTITY_HIST_FILE!=null){rs.writeIdentityToFile(IDENTITY_HIST_FILE, true);}
|
jpayne@68
|
817 if(TIME_HIST_FILE!=null){rs.writeTimeToFile(TIME_HIST_FILE);}
|
jpayne@68
|
818
|
jpayne@68
|
819 boolean b=rs.errorState;
|
jpayne@68
|
820 clear();
|
jpayne@68
|
821 return b;
|
jpayne@68
|
822 }
|
jpayne@68
|
823 clear();
|
jpayne@68
|
824 return false;
|
jpayne@68
|
825 }
|
jpayne@68
|
826
|
jpayne@68
|
827 public void writeAverageQualityToFile(String fname, boolean writePaired){
|
jpayne@68
|
828 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false);
|
jpayne@68
|
829 tsw.start();
|
jpayne@68
|
830 tsw.print("#Quality\tcount1\tfraction1"+(writePaired ? "\tcount2\tfraction2" : "")+"\n");
|
jpayne@68
|
831
|
jpayne@68
|
832 long sum1=Tools.sum(aqualArray[0]);
|
jpayne@68
|
833 long sum2=Tools.sum(aqualArray[1]);
|
jpayne@68
|
834 double mult1=1.0/Tools.max(1, sum1);
|
jpayne@68
|
835 double mult2=1.0/Tools.max(1, sum2);
|
jpayne@68
|
836
|
jpayne@68
|
837 long y=sum1+sum2;
|
jpayne@68
|
838 for(int i=0; i<aqualArray[0].length; i++){
|
jpayne@68
|
839 long x1=aqualArray[0][i];
|
jpayne@68
|
840 long x2=aqualArray[1][i];
|
jpayne@68
|
841 y-=x1;
|
jpayne@68
|
842 y-=x2;
|
jpayne@68
|
843 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%.5f", i, x1, x1*mult1));
|
jpayne@68
|
844 if(writePaired){
|
jpayne@68
|
845 tsw.print(String.format(Locale.ROOT, "\t%d\t%.5f", x2, x2*mult2));
|
jpayne@68
|
846 }
|
jpayne@68
|
847 tsw.print("\n");
|
jpayne@68
|
848 if(y<=0){break;}
|
jpayne@68
|
849 }
|
jpayne@68
|
850 tsw.poisonAndWait();
|
jpayne@68
|
851 errorState|=tsw.errorState;
|
jpayne@68
|
852 }
|
jpayne@68
|
853
|
jpayne@68
|
854 public void writeQCountToFile(String fname, boolean writePaired){
|
jpayne@68
|
855 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false);
|
jpayne@68
|
856 tsw.start();
|
jpayne@68
|
857 tsw.print("#Quality\tcount1\tfraction1"+(writePaired ? "\tcount2\tfraction2" : "")+"\n");
|
jpayne@68
|
858
|
jpayne@68
|
859 long sum1=Tools.sum(qcountHist[0]);
|
jpayne@68
|
860 long sum2=Tools.sum(qcountHist[1]);
|
jpayne@68
|
861 double mult1=1.0/Tools.max(1, sum1);
|
jpayne@68
|
862 double mult2=1.0/Tools.max(1, sum2);
|
jpayne@68
|
863
|
jpayne@68
|
864 long y=sum1+sum2;
|
jpayne@68
|
865 for(int i=0; i<qcountHist[0].length; i++){
|
jpayne@68
|
866 long x1=qcountHist[0][i];
|
jpayne@68
|
867 long x2=qcountHist[1][i];
|
jpayne@68
|
868 y-=x1;
|
jpayne@68
|
869 y-=x2;
|
jpayne@68
|
870 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%.5f", i, x1, x1*mult1));
|
jpayne@68
|
871 if(writePaired){
|
jpayne@68
|
872 tsw.print(String.format(Locale.ROOT, "\t%d\t%.5f", x2, x2*mult2));
|
jpayne@68
|
873 }
|
jpayne@68
|
874 tsw.print("\n");
|
jpayne@68
|
875 if(y<=0){break;}
|
jpayne@68
|
876 }
|
jpayne@68
|
877 tsw.poisonAndWait();
|
jpayne@68
|
878 errorState|=tsw.errorState;
|
jpayne@68
|
879 }
|
jpayne@68
|
880
|
jpayne@68
|
881 public void writeQualityToFile(String fname, boolean writePaired){
|
jpayne@68
|
882 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
883 final boolean measure=(matchSum!=null);
|
jpayne@68
|
884 if(measure){
|
jpayne@68
|
885 if(writePaired){
|
jpayne@68
|
886 sb.append("#BaseNum\tRead1_linear\tRead1_log\tRead1_measured\tRead2_linear\tRead2_log\tRead2_measured\n");
|
jpayne@68
|
887 }else{
|
jpayne@68
|
888 sb.append("#BaseNum\tRead1_linear\tRead1_log\tRead1_measured\n");
|
jpayne@68
|
889 }
|
jpayne@68
|
890 }else{
|
jpayne@68
|
891 sb.append("#BaseNum\tRead1_linear\tRead1_log"+(writePaired ? "\tRead2_linear\tRead2_log" : "")+"\n");
|
jpayne@68
|
892 }
|
jpayne@68
|
893
|
jpayne@68
|
894 final long[] qs1=qualSum[0], qs2=qualSum[1], ql1=qualLength[0], ql2=qualLength[1];
|
jpayne@68
|
895 final double[] qsd1=qualSumDouble[0], qsd2=qualSumDouble[1];
|
jpayne@68
|
896
|
jpayne@68
|
897 for(int i=MAXLEN-2; i>=0; i--){
|
jpayne@68
|
898 ql1[i]+=ql1[i+1];
|
jpayne@68
|
899 ql2[i]+=ql2[i+1];
|
jpayne@68
|
900 }
|
jpayne@68
|
901
|
jpayne@68
|
902 double div1sum=0;
|
jpayne@68
|
903 double div2sum=0;
|
jpayne@68
|
904 double deviation1sum=0;
|
jpayne@68
|
905 double deviation2sum=0;
|
jpayne@68
|
906
|
jpayne@68
|
907 if(writePaired){
|
jpayne@68
|
908 for(int i=0; i<MAXLEN && (ql1[i]>0 || ql2[i]>0); i++){
|
jpayne@68
|
909 final int a=i+1;
|
jpayne@68
|
910 double blin, clin, blog, clog;
|
jpayne@68
|
911 final double div1=(double)Tools.max(1, ql1[i]);
|
jpayne@68
|
912 final double div2=(double)Tools.max(1, ql2[i]);
|
jpayne@68
|
913
|
jpayne@68
|
914 blin=qs1[i]/div1;
|
jpayne@68
|
915 clin=qs2[i]/div2;
|
jpayne@68
|
916 blog=qsd1[i]/div1;
|
jpayne@68
|
917 clog=qsd2[i]/div2;
|
jpayne@68
|
918 blog=QualityTools.probErrorToPhredDouble(blog);
|
jpayne@68
|
919 clog=QualityTools.probErrorToPhredDouble(clog);
|
jpayne@68
|
920 if(measure){
|
jpayne@68
|
921 double bcalc=calcQualityAtPosition(i, 0);
|
jpayne@68
|
922 double ccalc=calcQualityAtPosition(i, 1);
|
jpayne@68
|
923
|
jpayne@68
|
924 div1sum+=div1;
|
jpayne@68
|
925 div2sum+=div2;
|
jpayne@68
|
926 deviation1sum+=Math.abs(blog-bcalc)*div1;
|
jpayne@68
|
927 deviation2sum+=Math.abs(clog-ccalc)*div2;
|
jpayne@68
|
928
|
jpayne@68
|
929 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n", a, blin, blog, bcalc, clin, clog, ccalc));
|
jpayne@68
|
930 }else{
|
jpayne@68
|
931 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\t%.3f\n", a, blin, blog, clin, clog));
|
jpayne@68
|
932 }
|
jpayne@68
|
933 }
|
jpayne@68
|
934 }else{
|
jpayne@68
|
935 for(int i=0; i<MAXLEN && ql1[i]>0; i++){
|
jpayne@68
|
936 final int a=i+1;
|
jpayne@68
|
937 double blin, blog;
|
jpayne@68
|
938 final double div1=(double)Tools.max(1, ql1[i]);
|
jpayne@68
|
939
|
jpayne@68
|
940 blin=qs1[i]/div1;
|
jpayne@68
|
941 blog=qsd1[i]/div1;
|
jpayne@68
|
942 blog=QualityTools.probErrorToPhredDouble(blog);
|
jpayne@68
|
943 if(measure){
|
jpayne@68
|
944 double bcalc=calcQualityAtPosition(i, 0);
|
jpayne@68
|
945
|
jpayne@68
|
946 div1sum+=div1;
|
jpayne@68
|
947 deviation1sum+=Math.abs(blog-bcalc)*div1;
|
jpayne@68
|
948
|
jpayne@68
|
949 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\n", a, blin, blog, bcalc));
|
jpayne@68
|
950 }else{
|
jpayne@68
|
951 sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\n", a, blin, blog));
|
jpayne@68
|
952 }
|
jpayne@68
|
953 }
|
jpayne@68
|
954 }
|
jpayne@68
|
955
|
jpayne@68
|
956 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false);
|
jpayne@68
|
957 tsw.start();
|
jpayne@68
|
958 if(measure){
|
jpayne@68
|
959 if(writePaired){
|
jpayne@68
|
960 tsw.print(String.format(Locale.ROOT, "#Deviation\t%.4f\t%.4f\n", deviation1sum/div1sum, deviation2sum/div2sum));
|
jpayne@68
|
961 }else{
|
jpayne@68
|
962 tsw.print(String.format(Locale.ROOT, "#Deviation\t%.4f\n", deviation1sum/div1sum));
|
jpayne@68
|
963 }
|
jpayne@68
|
964 }
|
jpayne@68
|
965 tsw.print(sb);
|
jpayne@68
|
966 tsw.poisonAndWait();
|
jpayne@68
|
967 errorState|=tsw.errorState;
|
jpayne@68
|
968 }
|
jpayne@68
|
969
|
jpayne@68
|
970 private double calcQualityAtPosition(int pos, int pairnum){
|
jpayne@68
|
971 boolean includeNs=true;
|
jpayne@68
|
972 long m=matchSum[pairnum][pos];
|
jpayne@68
|
973 long d=delSum[pairnum][pos]; //left-adjacent deletion
|
jpayne@68
|
974 long d2=delSum[pairnum][Tools.min(pos, delSum[pairnum].length-1)]; //right-adjacent deletion
|
jpayne@68
|
975 long i=insSum[pairnum][pos];
|
jpayne@68
|
976 long s=subSum[pairnum][pos];
|
jpayne@68
|
977 long n=(includeNs ? nSum[pairnum][pos] : 0); //This only tracks no-calls, not no-refs.
|
jpayne@68
|
978 long good=Tools.max(0, m*2-d-d2+n/2);
|
jpayne@68
|
979 long total=Tools.max(0, m*2+i*2+s*2+n*2); //not d
|
jpayne@68
|
980 long bad=total-good;
|
jpayne@68
|
981 if(total<1){return 0;}
|
jpayne@68
|
982 double error=bad/(double)total;
|
jpayne@68
|
983 return QualityTools.probErrorToPhredDouble(error);
|
jpayne@68
|
984 }
|
jpayne@68
|
985
|
jpayne@68
|
986 public void writeBQualityOverallToFile(String fname){
|
jpayne@68
|
987 final long[] cp30=Arrays.copyOf(bqualHistOverall, bqualHistOverall.length);
|
jpayne@68
|
988 for(int i=0; i<30; i++){cp30[i]=0;}
|
jpayne@68
|
989
|
jpayne@68
|
990 final long sum=Tools.sum(bqualHistOverall);
|
jpayne@68
|
991 final long median=Tools.percentileHistogram(bqualHistOverall, 0.5);
|
jpayne@68
|
992 final double mean=Tools.averageHistogram(bqualHistOverall);
|
jpayne@68
|
993 final double stdev=Tools.standardDeviationHistogram(bqualHistOverall);
|
jpayne@68
|
994 final double mean30=Tools.averageHistogram(cp30);
|
jpayne@68
|
995 final double stdev30=Tools.standardDeviationHistogram(cp30);
|
jpayne@68
|
996 final double mult=1.0/Tools.max(1, sum);
|
jpayne@68
|
997 long y=sum;
|
jpayne@68
|
998
|
jpayne@68
|
999 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false);
|
jpayne@68
|
1000 tsw.start();
|
jpayne@68
|
1001 tsw.print("#Median\t"+median+"\n");
|
jpayne@68
|
1002 tsw.print("#Mean\t"+String.format(Locale.ROOT, "%.3f", mean)+"\n");
|
jpayne@68
|
1003 tsw.print("#STDev\t"+String.format(Locale.ROOT, "%.3f", stdev)+"\n");
|
jpayne@68
|
1004 tsw.print("#Mean_30\t"+String.format(Locale.ROOT, "%.3f", mean30)+"\n");
|
jpayne@68
|
1005 tsw.print("#STDev_30\t"+String.format(Locale.ROOT, "%.3f", stdev30)+"\n");
|
jpayne@68
|
1006 tsw.print("#Quality\tbases\tfraction\n");
|
jpayne@68
|
1007
|
jpayne@68
|
1008 for(int i=0; i<bqualHistOverall.length; i++){
|
jpayne@68
|
1009 long x=bqualHistOverall[i];
|
jpayne@68
|
1010 y-=x;
|
jpayne@68
|
1011 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%.5f", i, x, x*mult));
|
jpayne@68
|
1012 tsw.print("\n");
|
jpayne@68
|
1013 if(y<=0){break;}
|
jpayne@68
|
1014 }
|
jpayne@68
|
1015 tsw.poisonAndWait();
|
jpayne@68
|
1016 errorState|=tsw.errorState;
|
jpayne@68
|
1017 }
|
jpayne@68
|
1018
|
jpayne@68
|
1019 public void writeBQualityToFile(String fname, boolean writePaired){
|
jpayne@68
|
1020 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false);
|
jpayne@68
|
1021 tsw.start();
|
jpayne@68
|
1022 tsw.print("#BaseNum\tcount_1\tmin_1\tmax_1\tmean_1\tQ1_1\tmed_1\tQ3_1\tLW_1\tRW_1");
|
jpayne@68
|
1023 if(writePaired){tsw.print("\tcount_2\tmin_2\tmax_2\tmean_2\tQ1_2\tmed_2\tQ3_2\tLW_2\tRW_2");}
|
jpayne@68
|
1024 tsw.print("\n");
|
jpayne@68
|
1025
|
jpayne@68
|
1026 for(int i=0; i<MAXLEN; i++){
|
jpayne@68
|
1027 final long[] a1=bqualHist[0][i], a2=bqualHist[1][i];
|
jpayne@68
|
1028 final long sum1=Tools.sum(a1), sum2=Tools.sum(a2);
|
jpayne@68
|
1029 if(sum1<1 && sum2<1){break;}
|
jpayne@68
|
1030
|
jpayne@68
|
1031 {
|
jpayne@68
|
1032 final long a[]=a1, sum=sum1;
|
jpayne@68
|
1033
|
jpayne@68
|
1034 final long weightedSum=Tools.sumHistogram(a);
|
jpayne@68
|
1035 final long med=Tools.medianHistogram(a), min=Tools.minHistogram(a), max=Tools.maxHistogram(a);
|
jpayne@68
|
1036 final long firstQuart=Tools.percentileHistogram(a, 0.25);
|
jpayne@68
|
1037 final long thirdQuart=Tools.percentileHistogram(a, 0.75);
|
jpayne@68
|
1038 final long leftWhisker=Tools.percentileHistogram(a, 0.02);
|
jpayne@68
|
1039 final long rightWhisker=Tools.percentileHistogram(a, 0.98);
|
jpayne@68
|
1040 final double mean=weightedSum*1.0/Tools.max(sum, 0);
|
jpayne@68
|
1041 tsw.print(String.format(Locale.ROOT, "%d\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%d\t%d", i, sum, min, max, mean, firstQuart, med, thirdQuart, leftWhisker, rightWhisker));
|
jpayne@68
|
1042 }
|
jpayne@68
|
1043
|
jpayne@68
|
1044 if(writePaired){
|
jpayne@68
|
1045 final long a[]=a2, sum=sum2;
|
jpayne@68
|
1046
|
jpayne@68
|
1047 final long weightedSum=Tools.sumHistogram(a);
|
jpayne@68
|
1048 final long med=Tools.medianHistogram(a), min=Tools.minHistogram(a), max=Tools.maxHistogram(a);
|
jpayne@68
|
1049 final long firstQuart=Tools.percentileHistogram(a, 0.25);
|
jpayne@68
|
1050 final long thirdQuart=Tools.percentileHistogram(a, 0.75);
|
jpayne@68
|
1051 final long leftWhisker=Tools.percentileHistogram(a, 0.02);
|
jpayne@68
|
1052 final long rightWhisker=Tools.percentileHistogram(a, 0.98);
|
jpayne@68
|
1053 final double mean=weightedSum*1.0/Tools.max(sum, 0);
|
jpayne@68
|
1054 tsw.print(String.format(Locale.ROOT, "\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%d\t%d", sum, min, max, mean, firstQuart, med, thirdQuart, leftWhisker, rightWhisker));
|
jpayne@68
|
1055 }
|
jpayne@68
|
1056 tsw.print("\n");
|
jpayne@68
|
1057 }
|
jpayne@68
|
1058 tsw.poisonAndWait();
|
jpayne@68
|
1059 errorState|=tsw.errorState;
|
jpayne@68
|
1060 }
|
jpayne@68
|
1061
|
jpayne@68
|
1062 public void writeQualityAccuracyToFile(String fname){
|
jpayne@68
|
1063
|
jpayne@68
|
1064 int max=qualMatch.length;
|
jpayne@68
|
1065 for(int i=max-1; i>=0; i--){
|
jpayne@68
|
1066 if(qualMatch[i]+qualSub[i]+qualIns[i]+qualDel[i]>0){break;}
|
jpayne@68
|
1067 max=i;
|
jpayne@68
|
1068 }
|
jpayne@68
|
1069
|
jpayne@68
|
1070 final int qMin=Read.MIN_CALLED_QUALITY(), qMax=Read.MAX_CALLED_QUALITY();
|
jpayne@68
|
1071
|
jpayne@68
|
1072 double devsum=0;
|
jpayne@68
|
1073 double devsumSub=0;
|
jpayne@68
|
1074 long observations=0;
|
jpayne@68
|
1075 for(int i=0; i<max; i++){
|
jpayne@68
|
1076 long qm=qualMatch[i]*2;
|
jpayne@68
|
1077 long qs=qualSub[i]*2;
|
jpayne@68
|
1078 long qi=qualIns[i]*2;
|
jpayne@68
|
1079 long qd=qualDel[i];
|
jpayne@68
|
1080
|
jpayne@68
|
1081 double phred=-1;
|
jpayne@68
|
1082 double phredSub=-1;
|
jpayne@68
|
1083
|
jpayne@68
|
1084 long sum=qm+qs+qi+qd;
|
jpayne@68
|
1085 if(sum>0){
|
jpayne@68
|
1086 double mult=1.0/sum;
|
jpayne@68
|
1087 double subRate=(qs)*mult;
|
jpayne@68
|
1088 double errorRate=(qs+qi+qd)*mult;
|
jpayne@68
|
1089
|
jpayne@68
|
1090 phredSub=QualityTools.probErrorToPhredDouble(subRate);
|
jpayne@68
|
1091 phred=QualityTools.probErrorToPhredDouble(errorRate);
|
jpayne@68
|
1092 double deviation=phred-i;
|
jpayne@68
|
1093 double deviationSub=phredSub-i;
|
jpayne@68
|
1094 if(i==qMin && deviation<0){deviation=0;}
|
jpayne@68
|
1095 else if(i==qMax && max==qMax+1 && deviation>0){deviation=0;}
|
jpayne@68
|
1096 if(i==qMin && deviationSub<0){deviationSub=0;}
|
jpayne@68
|
1097 else if(i==qMax && max==qMax+1 && deviationSub>0){deviationSub=0;}
|
jpayne@68
|
1098 devsum+=(Math.abs(deviation)*sum);
|
jpayne@68
|
1099 devsumSub+=(Math.abs(deviationSub)*sum);
|
jpayne@68
|
1100 observations+=sum;
|
jpayne@68
|
1101 }
|
jpayne@68
|
1102 }
|
jpayne@68
|
1103
|
jpayne@68
|
1104 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false);
|
jpayne@68
|
1105 tsw.start();
|
jpayne@68
|
1106 tsw.print(String.format(Locale.ROOT, "#Deviation\t%.3f\n", devsum/observations));
|
jpayne@68
|
1107 tsw.print(String.format(Locale.ROOT, "#DeviationSub\t%.3f\n", devsumSub/observations));
|
jpayne@68
|
1108 tsw.print("#Quality\tMatch\tSub\tIns\tDel\tTrueQuality\tTrueQualitySub\n");
|
jpayne@68
|
1109 for(int i=0; i<max; i++){
|
jpayne@68
|
1110 long qm=qualMatch[i]*2;
|
jpayne@68
|
1111 long qs=qualSub[i]*2;
|
jpayne@68
|
1112 long qi=qualIns[i]*2;
|
jpayne@68
|
1113 long qd=qualDel[i];
|
jpayne@68
|
1114
|
jpayne@68
|
1115 double phred=-1;
|
jpayne@68
|
1116 double phredSub=-1;
|
jpayne@68
|
1117
|
jpayne@68
|
1118 long sum=qm+qs+qi+qd;
|
jpayne@68
|
1119 if(sum>0){
|
jpayne@68
|
1120 double mult=1.0/sum;
|
jpayne@68
|
1121 double subRate=(qs)*mult;
|
jpayne@68
|
1122 double errorRate=(qs+qi+qd)*mult;
|
jpayne@68
|
1123
|
jpayne@68
|
1124 phredSub=QualityTools.probErrorToPhredDouble(subRate);
|
jpayne@68
|
1125 phred=QualityTools.probErrorToPhredDouble(errorRate);
|
jpayne@68
|
1126
|
jpayne@68
|
1127 // System.err.println("sub: "+qs+"/"+sum+" -> "+subRate+" -> "+phredSub);
|
jpayne@68
|
1128 }
|
jpayne@68
|
1129
|
jpayne@68
|
1130 tsw.print(i+"\t"+qm+"\t"+qs+"\t"+qi+"\t"+qd);
|
jpayne@68
|
1131 tsw.print(phred>=0 ? String.format(Locale.ROOT, "\t%.2f", phred) : "\t");
|
jpayne@68
|
1132 tsw.print(phredSub>=0 ? String.format(Locale.ROOT, "\t%.2f\n", phredSub) : "\t\n");
|
jpayne@68
|
1133
|
jpayne@68
|
1134 // System.err.println(qm+"\t"+qs+"\t"+qi+"\t"+qd);
|
jpayne@68
|
1135 }
|
jpayne@68
|
1136
|
jpayne@68
|
1137 tsw.poisonAndWait();
|
jpayne@68
|
1138 errorState|=tsw.errorState;
|
jpayne@68
|
1139 }
|
jpayne@68
|
1140
|
jpayne@68
|
1141 public void writeMatchToFile(String fname, boolean writePaired){
|
jpayne@68
|
1142 if(!writePaired){
|
jpayne@68
|
1143 writeMatchToFileUnpaired(fname);
|
jpayne@68
|
1144 return;
|
jpayne@68
|
1145 }
|
jpayne@68
|
1146 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1147 tsw.start();
|
jpayne@68
|
1148 tsw.print("#BaseNum\tMatch1\tSub1\tDel1\tIns1\tN1\tOther1\tMatch2\tSub2\tDel2\tIns2\tN2\tOther2\n");
|
jpayne@68
|
1149
|
jpayne@68
|
1150 final long[] ms1=matchSum[0], ds1=delSum[0], is1=insSum[0],
|
jpayne@68
|
1151 ss1=subSum[0], ns1=nSum[0], cs1=clipSum[0], os1=otherSum[0];
|
jpayne@68
|
1152 final long[] ms2=matchSum[1], ds2=delSum[1], is2=insSum[1],
|
jpayne@68
|
1153 ss2=subSum[1], ns2=nSum[1], cs2=clipSum[1], os2=otherSum[1];
|
jpayne@68
|
1154
|
jpayne@68
|
1155 for(int i=0; i<MAXLEN; i++){
|
jpayne@68
|
1156 int a=i+1;
|
jpayne@68
|
1157 long sum1=ms1[i]+is1[i]+ss1[i]+ns1[i]+cs1[i]+os1[i]; //no deletions
|
jpayne@68
|
1158 long sum2=ms2[i]+is2[i]+ss2[i]+ns2[i]+cs2[i]+os2[i]; //no deletions
|
jpayne@68
|
1159 if(sum1==0 && sum2==0){break;}
|
jpayne@68
|
1160 double inv1=1.0/(double)Tools.max(1, sum1);
|
jpayne@68
|
1161 double inv2=1.0/(double)Tools.max(1, sum2);
|
jpayne@68
|
1162
|
jpayne@68
|
1163 tsw.print(String.format(Locale.ROOT, "%d", a));
|
jpayne@68
|
1164 tsw.print(String.format(Locale.ROOT, "\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f",
|
jpayne@68
|
1165 ms1[i]*inv1, ss1[i]*inv1, ds1[i]*inv1, is1[i]*inv1, ns1[i]*inv1, (os1[i]+cs1[i])*inv1));
|
jpayne@68
|
1166 tsw.print(String.format(Locale.ROOT, "\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f",
|
jpayne@68
|
1167 ms2[i]*inv2, ss2[i]*inv2, ds2[i]*inv2, is2[i]*inv2, ns2[i]*inv2, (os2[i]+cs2[i])*inv2)
|
jpayne@68
|
1168 // +", "+ms2[i]+", "+is2[i]+", "+ss2[i]+", "+ns2[i]+", "+cs2[i]+", "+os2[i]
|
jpayne@68
|
1169 );
|
jpayne@68
|
1170 tsw.print("\n");
|
jpayne@68
|
1171 }
|
jpayne@68
|
1172 tsw.poisonAndWait();
|
jpayne@68
|
1173 errorState|=tsw.errorState;
|
jpayne@68
|
1174 }
|
jpayne@68
|
1175
|
jpayne@68
|
1176 public void writeMatchToFileUnpaired(String fname){
|
jpayne@68
|
1177 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1178 tsw.start();
|
jpayne@68
|
1179 tsw.print("#BaseNum\tMatch1\tSub1\tDel1\tIns1\tN1\tOther1\n");
|
jpayne@68
|
1180
|
jpayne@68
|
1181 final long[] ms1=matchSum[0], ds1=delSum[0], is1=insSum[0],
|
jpayne@68
|
1182 ss1=subSum[0], ns1=nSum[0], cs1=clipSum[0], os1=otherSum[0];
|
jpayne@68
|
1183
|
jpayne@68
|
1184 for(int i=0; i<MAXLEN; i++){
|
jpayne@68
|
1185 int a=i+1;
|
jpayne@68
|
1186 long sum1=ms1[i]+is1[i]+ss1[i]+ns1[i]+cs1[i]+os1[i]; //no deletions
|
jpayne@68
|
1187 if(sum1==0){break;}
|
jpayne@68
|
1188 double inv1=1.0/(double)Tools.max(1, sum1);
|
jpayne@68
|
1189
|
jpayne@68
|
1190 tsw.print(String.format(Locale.ROOT, "%d", a));
|
jpayne@68
|
1191 tsw.print(String.format(Locale.ROOT, "\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f\t%.5f",
|
jpayne@68
|
1192 ms1[i]*inv1, ss1[i]*inv1, ds1[i]*inv1, is1[i]*inv1, ns1[i]*inv1, (os1[i]+cs1[i])*inv1)
|
jpayne@68
|
1193 // +", "+ms1[i]+", "+is1[i]+", "+ss1[i]+", "+ns1[i]+", "+cs1[i]+", "+os1[i]
|
jpayne@68
|
1194 );
|
jpayne@68
|
1195 tsw.print("\n");
|
jpayne@68
|
1196 }
|
jpayne@68
|
1197 tsw.poisonAndWait();
|
jpayne@68
|
1198 errorState|=tsw.errorState;
|
jpayne@68
|
1199 }
|
jpayne@68
|
1200
|
jpayne@68
|
1201 public void writeInsertToFile(String fname){
|
jpayne@68
|
1202 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
1203 sb.append("#Mean\t"+String.format(Locale.ROOT, "%.3f", Tools.averageHistogram(insertHist.array))+"\n");
|
jpayne@68
|
1204 sb.append("#Median\t"+Tools.percentileHistogram(insertHist.array, 0.5)+"\n");
|
jpayne@68
|
1205 sb.append("#Mode\t"+Tools.calcModeHistogram(insertHist.array)+"\n");
|
jpayne@68
|
1206 sb.append("#STDev\t"+String.format(Locale.ROOT, "%.3f", Tools.standardDeviationHistogram(insertHist.array))+"\n");
|
jpayne@68
|
1207 double percent=pairedCount*100.0/(pairedCount+unpairedCount);
|
jpayne@68
|
1208 sb.append("#PercentOfPairs\t"+String.format(Locale.ROOT, "%.3f", percent)+"\n");
|
jpayne@68
|
1209 // sb.append("#PercentOfPairs\t"+String.format(Locale.ROOT, "%.3f", matedPercent)+"\n");
|
jpayne@68
|
1210 sb.append("#InsertSize\tCount\n");
|
jpayne@68
|
1211 writeHistogramToFile(fname, sb.toString(), insertHist, !skipZeroInsertCount);
|
jpayne@68
|
1212 }
|
jpayne@68
|
1213
|
jpayne@68
|
1214 public void writeBaseContentToFile(String fname, boolean paired){
|
jpayne@68
|
1215 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1216 bsw.start();
|
jpayne@68
|
1217 bsw.print("#Pos\tA\tC\tG\tT\tN\n");
|
jpayne@68
|
1218
|
jpayne@68
|
1219 int max=writeBaseContentToFile2(bsw, baseHist[0], 0);
|
jpayne@68
|
1220 if(paired){
|
jpayne@68
|
1221 writeBaseContentToFile2(bsw, baseHist[1], max);
|
jpayne@68
|
1222 }
|
jpayne@68
|
1223
|
jpayne@68
|
1224 bsw.poisonAndWait();
|
jpayne@68
|
1225 errorState|=bsw.errorState;
|
jpayne@68
|
1226 }
|
jpayne@68
|
1227
|
jpayne@68
|
1228 private static int writeBaseContentToFile2(ByteStreamWriter bsw, LongList[] lists, int offset){
|
jpayne@68
|
1229 int max=0;
|
jpayne@68
|
1230 ByteBuilder sb=new ByteBuilder(100);
|
jpayne@68
|
1231 for(LongList ll : lists){max=Tools.max(max, ll.size);}
|
jpayne@68
|
1232 for(int i=0; i<max; i++){
|
jpayne@68
|
1233 long a=lists[1].get(i);
|
jpayne@68
|
1234 long c=lists[2].get(i);
|
jpayne@68
|
1235 long g=lists[3].get(i);
|
jpayne@68
|
1236 long t=lists[4].get(i);
|
jpayne@68
|
1237 long n=lists[0].get(i);
|
jpayne@68
|
1238 double mult=1.0/(a+c+g+t+n);
|
jpayne@68
|
1239
|
jpayne@68
|
1240 sb.setLength(0);
|
jpayne@68
|
1241 sb.append(i+offset).tab();
|
jpayne@68
|
1242 sb.append(a*mult, 5).tab();
|
jpayne@68
|
1243 sb.append(c*mult, 5).tab();
|
jpayne@68
|
1244 sb.append(g*mult, 5).tab();
|
jpayne@68
|
1245 sb.append(t*mult, 5).tab();
|
jpayne@68
|
1246 sb.append(n*mult, 5);
|
jpayne@68
|
1247 sb.nl();
|
jpayne@68
|
1248
|
jpayne@68
|
1249 bsw.print(sb);
|
jpayne@68
|
1250 }
|
jpayne@68
|
1251 return max;
|
jpayne@68
|
1252 }
|
jpayne@68
|
1253
|
jpayne@68
|
1254 public void writeIndelToFile(String fname){
|
jpayne@68
|
1255 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1256 bsw.start();
|
jpayne@68
|
1257 bsw.print("#Length\tDeletions\tInsertions\n");
|
jpayne@68
|
1258
|
jpayne@68
|
1259 int max=Tools.max(insHist.size, delHist.size);
|
jpayne@68
|
1260
|
jpayne@68
|
1261 ByteBuilder bb=new ByteBuilder(100);
|
jpayne@68
|
1262 for(int i=0; i<max; i++){
|
jpayne@68
|
1263 long x=delHist.get(i);
|
jpayne@68
|
1264 long y=insHist.get(i);
|
jpayne@68
|
1265 if(x>0 || y>0 || !skipZeroIndel){
|
jpayne@68
|
1266 bb.clear();
|
jpayne@68
|
1267 bb.append(i).tab().append(x).tab().append(y).nl();
|
jpayne@68
|
1268 bsw.print(bb);
|
jpayne@68
|
1269 }
|
jpayne@68
|
1270 }
|
jpayne@68
|
1271
|
jpayne@68
|
1272 //TODO: Disabled because it was irritating when graphing. Should write to a different file.
|
jpayne@68
|
1273 // tsw.print("#Length_bin\tDeletions\n");
|
jpayne@68
|
1274 // max=delHist2.size;
|
jpayne@68
|
1275 // for(int i=0; i<max; i++){
|
jpayne@68
|
1276 // long x=delHist2.get(i);
|
jpayne@68
|
1277 // if(x>0 || !skipZeroIndel){
|
jpayne@68
|
1278 // tsw.print((i*DEL_BIN)+"\t"+x+"\n");
|
jpayne@68
|
1279 // }
|
jpayne@68
|
1280 // }
|
jpayne@68
|
1281
|
jpayne@68
|
1282 bsw.poisonAndWait();
|
jpayne@68
|
1283 errorState|=bsw.errorState;
|
jpayne@68
|
1284 }
|
jpayne@68
|
1285
|
jpayne@68
|
1286 public void writeErrorToFile(String fname){
|
jpayne@68
|
1287 writeHistogramToFile(fname, "#Errors\tCount\n", errorHist, false);
|
jpayne@68
|
1288 }
|
jpayne@68
|
1289
|
jpayne@68
|
1290 public void writeLengthToFile(String fname){
|
jpayne@68
|
1291 writeHistogramToFile(fname, "#Length\tCount\n", lengthHist, false);
|
jpayne@68
|
1292 }
|
jpayne@68
|
1293
|
jpayne@68
|
1294 public void writeTimeToFile(String fname){
|
jpayne@68
|
1295 writeHistogramToFile(fname, "#Time\tCount\n", timeHist, false);
|
jpayne@68
|
1296 }
|
jpayne@68
|
1297
|
jpayne@68
|
1298 public void writeHistogramToFile(String fname, String header, LongList hist, boolean printZeros){
|
jpayne@68
|
1299 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1300 bsw.start();
|
jpayne@68
|
1301 bsw.print(header);
|
jpayne@68
|
1302
|
jpayne@68
|
1303 int max=hist.size;
|
jpayne@68
|
1304
|
jpayne@68
|
1305 ByteBuilder bb=new ByteBuilder(40);
|
jpayne@68
|
1306 for(int i=0; i<max; i++){
|
jpayne@68
|
1307 long x=hist.get(i);
|
jpayne@68
|
1308 if(x>0 || printZeros){
|
jpayne@68
|
1309 bb.clear().append(i).tab().append(x).nl();
|
jpayne@68
|
1310 bsw.print(bb);
|
jpayne@68
|
1311 }
|
jpayne@68
|
1312 }
|
jpayne@68
|
1313 bsw.poisonAndWait();
|
jpayne@68
|
1314 errorState|=bsw.errorState;
|
jpayne@68
|
1315 }
|
jpayne@68
|
1316
|
jpayne@68
|
1317 public void writeHistogramToFile(String fname, String header, SuperLongList hist, boolean printZeros){
|
jpayne@68
|
1318 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1319 bsw.start();
|
jpayne@68
|
1320 bsw.print(header);
|
jpayne@68
|
1321
|
jpayne@68
|
1322 hist.sort();
|
jpayne@68
|
1323 long max=hist.max();
|
jpayne@68
|
1324 long[] array=hist.array();
|
jpayne@68
|
1325 LongList list=hist.list();
|
jpayne@68
|
1326
|
jpayne@68
|
1327 ByteBuilder bb=new ByteBuilder(40);
|
jpayne@68
|
1328 for(int i=0; i<array.length; i++){
|
jpayne@68
|
1329 long x=array[i];
|
jpayne@68
|
1330 if(x>0 || printZeros){
|
jpayne@68
|
1331 bb.clear().append(i).tab().append(x).nl();
|
jpayne@68
|
1332 bsw.print(bb);
|
jpayne@68
|
1333 }
|
jpayne@68
|
1334 }
|
jpayne@68
|
1335 {//TODO: This part does not bother printing zeros regardless of the flag.
|
jpayne@68
|
1336 long prev=-1;
|
jpayne@68
|
1337 int streak=0;
|
jpayne@68
|
1338 for(int i=0; i<list.size(); i++){
|
jpayne@68
|
1339 long x=list.get(i);
|
jpayne@68
|
1340 if(x==prev){streak++;}
|
jpayne@68
|
1341 else{
|
jpayne@68
|
1342 if(streak>0){//Print previous key
|
jpayne@68
|
1343 bb.clear().append(prev).tab().append(streak).nl();
|
jpayne@68
|
1344 bsw.print(bb);
|
jpayne@68
|
1345 }
|
jpayne@68
|
1346 streak=1;
|
jpayne@68
|
1347 prev=x;
|
jpayne@68
|
1348 }
|
jpayne@68
|
1349 }
|
jpayne@68
|
1350 if(streak>0){//Print final key
|
jpayne@68
|
1351 bb.clear().append(prev).tab().append(streak).nl();
|
jpayne@68
|
1352 bsw.print(bb);
|
jpayne@68
|
1353 }
|
jpayne@68
|
1354 }
|
jpayne@68
|
1355 bsw.poisonAndWait();
|
jpayne@68
|
1356 errorState|=bsw.errorState;
|
jpayne@68
|
1357 }
|
jpayne@68
|
1358
|
jpayne@68
|
1359 public void writeGCToFile(String fname, boolean printZeros){
|
jpayne@68
|
1360 final long[] hist;
|
jpayne@68
|
1361 if(GC_BINS_AUTO && gcMaxReadLen+1<gcHist.length){
|
jpayne@68
|
1362 hist=Tools.downsample(gcHist, gcMaxReadLen+1);
|
jpayne@68
|
1363 }else{
|
jpayne@68
|
1364 hist=gcHist;
|
jpayne@68
|
1365 }
|
jpayne@68
|
1366 final int bins=hist.length;
|
jpayne@68
|
1367 final double gcMult=100.0/Tools.max(1, bins-1);
|
jpayne@68
|
1368 final long total=Tools.sum(hist);
|
jpayne@68
|
1369 final long max=Tools.max(hist);
|
jpayne@68
|
1370 final double countsPerX=Tools.max(1, ((max*1000.0)/40));
|
jpayne@68
|
1371 final double fractionMult=1.0/Tools.max(1, total);
|
jpayne@68
|
1372 long sum=0;
|
jpayne@68
|
1373
|
jpayne@68
|
1374 GCMean=Tools.averageHistogram(hist)*gcMult;
|
jpayne@68
|
1375 GCMedian=Tools.percentileHistogram(hist, 0.5)*gcMult;
|
jpayne@68
|
1376 GCMode=Tools.calcModeHistogram(hist)*gcMult;
|
jpayne@68
|
1377 GCSTDev=Tools.standardDeviationHistogram(hist)*gcMult;
|
jpayne@68
|
1378
|
jpayne@68
|
1379 ByteBuilder bb=new ByteBuilder(256);
|
jpayne@68
|
1380 bb.append("#Mean\t").append(GCMean, 3).nl();
|
jpayne@68
|
1381 bb.append("#Median\t").append(GCMedian, 3).nl();
|
jpayne@68
|
1382 bb.append("#Mode\t").append(GCMode, 3).nl();
|
jpayne@68
|
1383 bb.append("#STDev\t").append(GCSTDev, 3).nl();
|
jpayne@68
|
1384 if(GC_PLOT_X){
|
jpayne@68
|
1385 bb.append("#GC\tCount\tCumulative\tPlot\n");
|
jpayne@68
|
1386 }else{
|
jpayne@68
|
1387 bb.append("#GC\tCount\n");
|
jpayne@68
|
1388 }
|
jpayne@68
|
1389
|
jpayne@68
|
1390
|
jpayne@68
|
1391 // bsw.print("#Mean\t"+String.format(Locale.ROOT, "%.3f", GCMean)+"\n");
|
jpayne@68
|
1392 // bsw.print("#Median\t"+String.format(Locale.ROOT, "%.3f", GCMedian)+"\n");
|
jpayne@68
|
1393 // bsw.print("#Mode\t"+String.format(Locale.ROOT, "%.3f", GCMode)+"\n");
|
jpayne@68
|
1394 // bsw.print("#STDev\t"+String.format(Locale.ROOT, "%.3f", GCSTDev)+"\n");
|
jpayne@68
|
1395 // if(GC_PLOT_X){
|
jpayne@68
|
1396 // bsw.print("#GC\tCount\tCumulative\tPlot\n");
|
jpayne@68
|
1397 // }else{
|
jpayne@68
|
1398 // bsw.print("#GC\tCount\n");
|
jpayne@68
|
1399 // }
|
jpayne@68
|
1400
|
jpayne@68
|
1401 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1402 bsw.start();
|
jpayne@68
|
1403 bsw.print(bb);
|
jpayne@68
|
1404
|
jpayne@68
|
1405 for(int i=0; i<bins; i++){
|
jpayne@68
|
1406 long x=hist[i];
|
jpayne@68
|
1407 sum+=x;
|
jpayne@68
|
1408 if(x>0 || printZeros){
|
jpayne@68
|
1409 //This assumes GC_BINS==100
|
jpayne@68
|
1410 // tsw.print(i+"\t"+x+"\n");
|
jpayne@68
|
1411 if(GC_PLOT_X){
|
jpayne@68
|
1412 bb.clear();
|
jpayne@68
|
1413 bb.append(i*gcMult, 1).tab().append(x).tab();
|
jpayne@68
|
1414 bb.append(sum*fractionMult, 3).tab();
|
jpayne@68
|
1415
|
jpayne@68
|
1416 int len=(int)((x*1000)/countsPerX);
|
jpayne@68
|
1417 for(int j=0; j<len; j++){bb.append('X');}
|
jpayne@68
|
1418 if(len<1 && x>0){
|
jpayne@68
|
1419 if((x*1000f)/countsPerX>0.1f){bb.append('x');}
|
jpayne@68
|
1420 else{bb.append('.');}
|
jpayne@68
|
1421 }
|
jpayne@68
|
1422
|
jpayne@68
|
1423 bb.append('\n');
|
jpayne@68
|
1424 bsw.print(bb);
|
jpayne@68
|
1425 }else{
|
jpayne@68
|
1426 bb.clear().append(i*gcMult, 1).tab().append(x).nl();
|
jpayne@68
|
1427 bsw.print(bb);
|
jpayne@68
|
1428 }
|
jpayne@68
|
1429 }
|
jpayne@68
|
1430 }
|
jpayne@68
|
1431 bsw.poisonAndWait();
|
jpayne@68
|
1432 errorState|=bsw.errorState;
|
jpayne@68
|
1433 }
|
jpayne@68
|
1434
|
jpayne@68
|
1435 public void writeEntropyToFile(String fname, boolean printZeros){
|
jpayne@68
|
1436 final long[] hist=entropyHist;
|
jpayne@68
|
1437
|
jpayne@68
|
1438 final int bins=hist.length;
|
jpayne@68
|
1439 final double mult=1.0/Tools.max(1, bins-1);
|
jpayne@68
|
1440 final long total=Tools.sum(hist);
|
jpayne@68
|
1441 final long max=Tools.max(hist);
|
jpayne@68
|
1442 final double countsPerX=Tools.max(1, ((max*1000.0)/40));
|
jpayne@68
|
1443 final double fractionMult=1.0/Tools.max(1, total);
|
jpayne@68
|
1444 long sum=0;
|
jpayne@68
|
1445
|
jpayne@68
|
1446 double mean=Tools.averageHistogram(hist)*mult;
|
jpayne@68
|
1447 double median=Tools.percentileHistogram(hist, 0.5)*mult;
|
jpayne@68
|
1448 double mode=Tools.calcModeHistogram(hist)*mult;
|
jpayne@68
|
1449 double stdev=Tools.standardDeviationHistogram(hist)*mult;
|
jpayne@68
|
1450
|
jpayne@68
|
1451 ByteBuilder bb=new ByteBuilder(256);
|
jpayne@68
|
1452 bb.append("#Mean\t").append(mean, 6).nl();
|
jpayne@68
|
1453 bb.append("#Median\t").append(median, 6).nl();
|
jpayne@68
|
1454 bb.append("#Mode\t").append(mode, 6).nl();
|
jpayne@68
|
1455 bb.append("#STDev\t").append(stdev, 6).nl();
|
jpayne@68
|
1456 bb.append("#Value\tCount\n");
|
jpayne@68
|
1457
|
jpayne@68
|
1458 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1459 bsw.start();
|
jpayne@68
|
1460 bsw.print(bb);
|
jpayne@68
|
1461
|
jpayne@68
|
1462 for(int i=0; i<bins; i++){
|
jpayne@68
|
1463 long x=hist[i];
|
jpayne@68
|
1464 sum+=x;
|
jpayne@68
|
1465 if(x>0 || printZeros){
|
jpayne@68
|
1466 bb.clear().append(i*mult, 4).tab().append(x).nl();
|
jpayne@68
|
1467 bsw.print(bb);
|
jpayne@68
|
1468 }
|
jpayne@68
|
1469 }
|
jpayne@68
|
1470 bsw.poisonAndWait();
|
jpayne@68
|
1471 errorState|=bsw.errorState;
|
jpayne@68
|
1472 }
|
jpayne@68
|
1473
|
jpayne@68
|
1474 public void writeIdentityToFile(String fname, boolean printZeros){
|
jpayne@68
|
1475 final long[] hist, histb;
|
jpayne@68
|
1476 if(ID_BINS_AUTO && idMaxReadLen+1<idHist.length){
|
jpayne@68
|
1477 hist=Tools.downsample(idHist, idMaxReadLen+1);
|
jpayne@68
|
1478 histb=Tools.downsample(idBaseHist, idMaxReadLen+1);
|
jpayne@68
|
1479 }else{
|
jpayne@68
|
1480 hist=idHist;
|
jpayne@68
|
1481 histb=idBaseHist;
|
jpayne@68
|
1482 }
|
jpayne@68
|
1483 final int max=hist.length;
|
jpayne@68
|
1484 final double mult=100.0/(max-1);
|
jpayne@68
|
1485
|
jpayne@68
|
1486 TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false);
|
jpayne@68
|
1487 tsw.start();
|
jpayne@68
|
1488
|
jpayne@68
|
1489
|
jpayne@68
|
1490 tsw.print("#Mean_reads\t"+String.format(Locale.ROOT, "%.3f", (Tools.averageHistogram(hist)*mult))+"\n");
|
jpayne@68
|
1491 tsw.print("#Mean_bases\t"+(String.format(Locale.ROOT, "%.3f", Tools.averageHistogram(histb)*mult))+"\n");
|
jpayne@68
|
1492 tsw.print("#Median_reads\t"+(int)Math.round(Tools.percentileHistogram(hist, 0.5)*mult)+"\n");
|
jpayne@68
|
1493 tsw.print("#Median_bases\t"+(int)Math.round(Tools.percentileHistogram(histb, 0.5)*mult)+"\n");
|
jpayne@68
|
1494 tsw.print("#Mode_reads\t"+(int)Math.round(Tools.calcModeHistogram(hist)*mult)+"\n");
|
jpayne@68
|
1495 tsw.print("#Mode_bases\t"+(int)Math.round(Tools.calcModeHistogram(histb)*mult)+"\n");
|
jpayne@68
|
1496 tsw.print("#STDev_reads\t"+String.format(Locale.ROOT, "%.3f", (Tools.standardDeviationHistogram(hist)*mult))+"\n");
|
jpayne@68
|
1497 tsw.print("#STDev_bases\t"+String.format(Locale.ROOT, "%.3f", (Tools.standardDeviationHistogram(histb)*mult))+"\n");
|
jpayne@68
|
1498 tsw.print("#Identity\tReads\tBases\n");
|
jpayne@68
|
1499
|
jpayne@68
|
1500 for(int i=0; i<max; i++){
|
jpayne@68
|
1501 long x=hist[i], x2=histb[i];
|
jpayne@68
|
1502 if(x>0 || printZeros){
|
jpayne@68
|
1503 tsw.print(String.format(Locale.ROOT, "%.1f", i*mult)+"\t"+x+"\t"+x2+"\n");
|
jpayne@68
|
1504 }
|
jpayne@68
|
1505 }
|
jpayne@68
|
1506 tsw.poisonAndWait();
|
jpayne@68
|
1507 errorState|=tsw.errorState;
|
jpayne@68
|
1508 }
|
jpayne@68
|
1509
|
jpayne@68
|
1510 //Tracks to see if read2s have been encountered, for displaying stats.
|
jpayne@68
|
1511 private long read2Count=0;
|
jpayne@68
|
1512
|
jpayne@68
|
1513 public long pairedCount=0;
|
jpayne@68
|
1514 public long unpairedCount=0;
|
jpayne@68
|
1515
|
jpayne@68
|
1516 public final long[][] aqualArray;
|
jpayne@68
|
1517 public final long[][] qualLength;
|
jpayne@68
|
1518 public final long[][] qualSum;
|
jpayne@68
|
1519
|
jpayne@68
|
1520 public final long[][][] bqualHist;
|
jpayne@68
|
1521 public final long[] bqualHistOverall;
|
jpayne@68
|
1522
|
jpayne@68
|
1523 public final long[][] qcountHist;
|
jpayne@68
|
1524
|
jpayne@68
|
1525 public final double[][] qualSumDouble;
|
jpayne@68
|
1526
|
jpayne@68
|
1527 public final long[][] matchSum;
|
jpayne@68
|
1528 public final long[][] delSum;
|
jpayne@68
|
1529 public final long[][] insSum;
|
jpayne@68
|
1530 public final long[][] subSum;
|
jpayne@68
|
1531 public final long[][] nSum;
|
jpayne@68
|
1532 public final long[][] clipSum;
|
jpayne@68
|
1533 public final long[][] otherSum;
|
jpayne@68
|
1534
|
jpayne@68
|
1535 public final long[] qualMatch;
|
jpayne@68
|
1536 public final long[] qualSub;
|
jpayne@68
|
1537 public final long[] qualIns;
|
jpayne@68
|
1538 public final long[] qualDel;
|
jpayne@68
|
1539
|
jpayne@68
|
1540 public final long[] gcHist;
|
jpayne@68
|
1541 public final long[] entropyHist;
|
jpayne@68
|
1542 public final EntropyTracker eTracker;
|
jpayne@68
|
1543 public final long[] idHist;
|
jpayne@68
|
1544 public final long[] idBaseHist;
|
jpayne@68
|
1545 private int gcMaxReadLen=1;
|
jpayne@68
|
1546 private int idMaxReadLen=1;
|
jpayne@68
|
1547
|
jpayne@68
|
1548 public final LongList[][] baseHist;
|
jpayne@68
|
1549
|
jpayne@68
|
1550 /** Insert size */
|
jpayne@68
|
1551 public final LongList insertHist;
|
jpayne@68
|
1552 /** Read length */
|
jpayne@68
|
1553 public final SuperLongList lengthHist;
|
jpayne@68
|
1554 /** Number errors per read */
|
jpayne@68
|
1555 public final LongList errorHist;
|
jpayne@68
|
1556 /** Insertion length */
|
jpayne@68
|
1557 public final LongList insHist;
|
jpayne@68
|
1558 /** Deletion length */
|
jpayne@68
|
1559 public final LongList delHist;
|
jpayne@68
|
1560 /** Deletion length, binned */
|
jpayne@68
|
1561 public final LongList delHist2;
|
jpayne@68
|
1562 /** Time */
|
jpayne@68
|
1563 public final LongList timeHist;
|
jpayne@68
|
1564
|
jpayne@68
|
1565 public static boolean REQUIRE_PROPER_PAIR=true;
|
jpayne@68
|
1566 public static int MAXLEN=6000;
|
jpayne@68
|
1567 public static int MAXINSERTLEN=80000;
|
jpayne@68
|
1568 @Deprecated
|
jpayne@68
|
1569 public static int MAXLENGTHLEN=80000;
|
jpayne@68
|
1570 public static final int MAXTIMELEN=80000;
|
jpayne@68
|
1571 public static final int MAXINSLEN=1000;
|
jpayne@68
|
1572 public static final int MAXDELLEN=1000;
|
jpayne@68
|
1573 public static final int MAXDELLEN2=1000000;
|
jpayne@68
|
1574 public static final int DEL_BIN=100;
|
jpayne@68
|
1575 public static int ID_BINS=100;
|
jpayne@68
|
1576 public static boolean ID_BINS_AUTO=false;
|
jpayne@68
|
1577 public static int GC_BINS=100;
|
jpayne@68
|
1578 public static boolean GC_BINS_AUTO=false;
|
jpayne@68
|
1579 public static boolean GC_PLOT_X=false;
|
jpayne@68
|
1580 public static int ENTROPY_BINS=1000;
|
jpayne@68
|
1581
|
jpayne@68
|
1582 public static double GCMean;
|
jpayne@68
|
1583 public static double GCMedian;
|
jpayne@68
|
1584 public static double GCMode;
|
jpayne@68
|
1585 public static double GCSTDev;
|
jpayne@68
|
1586
|
jpayne@68
|
1587 // public static double entropyMean;
|
jpayne@68
|
1588 // public static double entropyMedian;
|
jpayne@68
|
1589 // public static double entropyMode;
|
jpayne@68
|
1590 // public static double entropySTDev;
|
jpayne@68
|
1591
|
jpayne@68
|
1592 public boolean errorState=false;
|
jpayne@68
|
1593
|
jpayne@68
|
1594 public static ReadStats merged=null;
|
jpayne@68
|
1595
|
jpayne@68
|
1596 // public static double matedPercent=0;
|
jpayne@68
|
1597
|
jpayne@68
|
1598 public static void clear(){
|
jpayne@68
|
1599 COLLECT_QUALITY_STATS=false;
|
jpayne@68
|
1600 COLLECT_QUALITY_ACCURACY=false;
|
jpayne@68
|
1601 COLLECT_MATCH_STATS=false;
|
jpayne@68
|
1602 COLLECT_INSERT_STATS=false;
|
jpayne@68
|
1603 COLLECT_BASE_STATS=false;
|
jpayne@68
|
1604 COLLECT_INDEL_STATS=false;
|
jpayne@68
|
1605 COLLECT_GC_STATS=false;
|
jpayne@68
|
1606 COLLECT_ENTROPY_STATS=false;
|
jpayne@68
|
1607 COLLECT_ERROR_STATS=false;
|
jpayne@68
|
1608 COLLECT_LENGTH_STATS=false;
|
jpayne@68
|
1609 COLLECT_IDENTITY_STATS=false;
|
jpayne@68
|
1610 COLLECT_TIME_STATS=false;
|
jpayne@68
|
1611
|
jpayne@68
|
1612 AVG_QUAL_HIST_FILE=null;
|
jpayne@68
|
1613 QUAL_HIST_FILE=null;
|
jpayne@68
|
1614 BQUAL_HIST_FILE=null;
|
jpayne@68
|
1615 QUAL_COUNT_HIST_FILE=null;
|
jpayne@68
|
1616 BQUAL_HIST_OVERALL_FILE=null;
|
jpayne@68
|
1617 QUAL_ACCURACY_FILE=null;
|
jpayne@68
|
1618 MATCH_HIST_FILE=null;
|
jpayne@68
|
1619 INSERT_HIST_FILE=null;
|
jpayne@68
|
1620 BASE_HIST_FILE=null;
|
jpayne@68
|
1621 INDEL_HIST_FILE=null;
|
jpayne@68
|
1622 ERROR_HIST_FILE=null;
|
jpayne@68
|
1623 LENGTH_HIST_FILE=null;
|
jpayne@68
|
1624 GC_HIST_FILE=null;
|
jpayne@68
|
1625 ENTROPY_HIST_FILE=null;
|
jpayne@68
|
1626 IDENTITY_HIST_FILE=null;
|
jpayne@68
|
1627 TIME_HIST_FILE=null;
|
jpayne@68
|
1628 }
|
jpayne@68
|
1629
|
jpayne@68
|
1630 public static ArrayList<ReadStats> objectList=new ArrayList<ReadStats>();
|
jpayne@68
|
1631 public static boolean COLLECT_QUALITY_STATS=false;
|
jpayne@68
|
1632 public static boolean COLLECT_QUALITY_ACCURACY=false;
|
jpayne@68
|
1633 public static boolean COLLECT_MATCH_STATS=false;
|
jpayne@68
|
1634 public static boolean COLLECT_INSERT_STATS=false;
|
jpayne@68
|
1635 public static boolean COLLECT_BASE_STATS=false;
|
jpayne@68
|
1636 public static boolean COLLECT_INDEL_STATS=false;
|
jpayne@68
|
1637 public static boolean COLLECT_GC_STATS=false;
|
jpayne@68
|
1638 public static boolean COLLECT_ENTROPY_STATS=false;
|
jpayne@68
|
1639 public static boolean COLLECT_ERROR_STATS=false;
|
jpayne@68
|
1640 public static boolean COLLECT_LENGTH_STATS=false;
|
jpayne@68
|
1641 public static boolean COLLECT_IDENTITY_STATS=false;
|
jpayne@68
|
1642 public static boolean COLLECT_TIME_STATS=false;
|
jpayne@68
|
1643
|
jpayne@68
|
1644 public static boolean collectingStats(){
|
jpayne@68
|
1645 return COLLECT_QUALITY_STATS || COLLECT_QUALITY_ACCURACY || COLLECT_MATCH_STATS || COLLECT_INSERT_STATS
|
jpayne@68
|
1646 || COLLECT_BASE_STATS || COLLECT_INDEL_STATS || COLLECT_GC_STATS || COLLECT_ENTROPY_STATS
|
jpayne@68
|
1647 || COLLECT_ERROR_STATS || COLLECT_LENGTH_STATS || COLLECT_IDENTITY_STATS || COLLECT_TIME_STATS;
|
jpayne@68
|
1648 }
|
jpayne@68
|
1649
|
jpayne@68
|
1650 public static boolean usePairGC=true;
|
jpayne@68
|
1651 public static boolean allowEntropyNs=true;
|
jpayne@68
|
1652
|
jpayne@68
|
1653 public static String AVG_QUAL_HIST_FILE=null;
|
jpayne@68
|
1654 public static String QUAL_HIST_FILE=null;
|
jpayne@68
|
1655 public static String BQUAL_HIST_FILE=null;
|
jpayne@68
|
1656 public static String QUAL_COUNT_HIST_FILE=null;
|
jpayne@68
|
1657 public static String BQUAL_HIST_OVERALL_FILE=null;
|
jpayne@68
|
1658 public static String QUAL_ACCURACY_FILE=null;
|
jpayne@68
|
1659 public static String MATCH_HIST_FILE=null;
|
jpayne@68
|
1660 public static String INSERT_HIST_FILE=null;
|
jpayne@68
|
1661 public static String BASE_HIST_FILE=null;
|
jpayne@68
|
1662 public static String INDEL_HIST_FILE=null;
|
jpayne@68
|
1663 public static String ERROR_HIST_FILE=null;
|
jpayne@68
|
1664 public static String LENGTH_HIST_FILE=null;
|
jpayne@68
|
1665 public static String GC_HIST_FILE=null;
|
jpayne@68
|
1666 public static String ENTROPY_HIST_FILE=null;
|
jpayne@68
|
1667 public static String IDENTITY_HIST_FILE=null;
|
jpayne@68
|
1668 public static String TIME_HIST_FILE=null;
|
jpayne@68
|
1669
|
jpayne@68
|
1670 public static boolean overwrite=false;
|
jpayne@68
|
1671 public static boolean append=false;
|
jpayne@68
|
1672 public static final boolean verbose=false;
|
jpayne@68
|
1673
|
jpayne@68
|
1674 public static boolean skipZeroInsertCount=true;
|
jpayne@68
|
1675 public static boolean skipZeroIndel=true;
|
jpayne@68
|
1676
|
jpayne@68
|
1677 }
|