jpayne@68: package shared; jpayne@68: jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Arrays; jpayne@68: import java.util.Locale; jpayne@68: jpayne@68: import align2.QualityTools; jpayne@68: import dna.AminoAcid; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.TextStreamWriter; jpayne@68: import stream.Read; jpayne@68: import stream.SamLine; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.EntropyTracker; jpayne@68: import structures.LongList; jpayne@68: import structures.SuperLongList; jpayne@68: jpayne@68: jpayne@68: /** jpayne@68: * @author Brian Bushnell jpayne@68: * @date Mar 18, 2013 jpayne@68: * jpayne@68: */ jpayne@68: public class ReadStats { jpayne@68: jpayne@68: public ReadStats(){this(true);} jpayne@68: jpayne@68: public ReadStats(boolean addToList){ jpayne@68: if(addToList){ jpayne@68: synchronized(ReadStats.class){ jpayne@68: objectList.add(this); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(COLLECT_QUALITY_STATS){ jpayne@68: aqualArray=new long[2][127]; jpayne@68: qualLength=new long[2][MAXLEN]; jpayne@68: qualSum=new long[2][MAXLEN]; jpayne@68: qualSumDouble=new double[2][MAXLEN]; jpayne@68: bqualHistOverall=new long[127]; jpayne@68: }else{ jpayne@68: aqualArray=null; jpayne@68: qualLength=null; jpayne@68: qualSum=null; jpayne@68: qualSumDouble=null; jpayne@68: bqualHistOverall=null; jpayne@68: } jpayne@68: jpayne@68: if(BQUAL_HIST_FILE!=null){ jpayne@68: bqualHist=new long[2][MAXLEN][127]; jpayne@68: }else{ jpayne@68: bqualHist=null; jpayne@68: } jpayne@68: jpayne@68: if(QUAL_COUNT_HIST_FILE!=null){ jpayne@68: qcountHist=new long[2][127]; jpayne@68: }else{ jpayne@68: qcountHist=null; jpayne@68: } jpayne@68: jpayne@68: if(COLLECT_MATCH_STATS){ jpayne@68: matchSum=new long[2][MAXLEN]; jpayne@68: delSum=new long[2][MAXLEN]; jpayne@68: insSum=new long[2][MAXLEN]; jpayne@68: subSum=new long[2][MAXLEN]; jpayne@68: nSum=new long[2][MAXLEN]; jpayne@68: clipSum=new long[2][MAXLEN]; jpayne@68: otherSum=new long[2][MAXLEN]; jpayne@68: }else{ jpayne@68: matchSum=null; jpayne@68: delSum=null; jpayne@68: insSum=null; jpayne@68: subSum=null; jpayne@68: nSum=null; jpayne@68: clipSum=null; jpayne@68: otherSum=null; jpayne@68: } jpayne@68: jpayne@68: if(COLLECT_QUALITY_ACCURACY){ jpayne@68: qualMatch=new long[99]; jpayne@68: qualSub=new long[99]; jpayne@68: qualIns=new long[99]; jpayne@68: qualDel=new long[99]; jpayne@68: }else{ jpayne@68: qualMatch=null; jpayne@68: qualSub=null; jpayne@68: qualIns=null; jpayne@68: qualDel=null; jpayne@68: } jpayne@68: jpayne@68: if(COLLECT_INSERT_STATS){ jpayne@68: insertHist=new LongList(MAXLEN); jpayne@68: }else{ jpayne@68: insertHist=null; jpayne@68: } jpayne@68: jpayne@68: if(COLLECT_BASE_STATS){ jpayne@68: baseHist=new LongList[2][5]; jpayne@68: for(int i=0; i=0){ jpayne@68: if(AminoAcid.isFullyDefined(bases[y])){ jpayne@68: qualDel[qual[y]]++; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: bpos--; jpayne@68: }else if(m=='d'){ jpayne@68: bpos--; jpayne@68: }else{ jpayne@68: assert(!Tools.isDigit(m)) : ((char)m); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: bpos++; jpayne@68: lastm=m; jpayne@68: } jpayne@68: jpayne@68: } jpayne@68: jpayne@68: public void addToErrorHistogram(final Read r){ jpayne@68: if(r==null){return;} jpayne@68: addToErrorHistogram(r, 0); jpayne@68: if(r.mate!=null){addToErrorHistogram(r.mate, 1);} jpayne@68: } jpayne@68: jpayne@68: private void addToErrorHistogram(final Read r, int pairnum){ jpayne@68: if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;} jpayne@68: r.toLongMatchString(false); jpayne@68: int x=r.countSubs(); jpayne@68: errorHist.increment(x, 1); jpayne@68: } jpayne@68: jpayne@68: public void addToLengthHistogram(final Read r){ jpayne@68: if(r==null){return;} jpayne@68: addToLengthHistogram(r, 0); jpayne@68: if(r.mate!=null){addToLengthHistogram(r.mate, 1);} jpayne@68: } jpayne@68: jpayne@68: private void addToLengthHistogram(final Read r, int pairnum){ jpayne@68: if(r==null || r.bases==null){return;} jpayne@68: int x=r.length();//Tools.min(r.length(), MAXLENGTHLEN); Old style before SLL jpayne@68: lengthHist.increment(x, 1); jpayne@68: } jpayne@68: jpayne@68: public void addToGCHistogram(final Read r1){ jpayne@68: if(r1==null){return;} jpayne@68: final Read r2=r1.mate; jpayne@68: final int len1=r1.length(), len2=r1.mateLength(); jpayne@68: jpayne@68: final float gc1=(len1>0 ? r1.gc() : -1); jpayne@68: final float gc2=(len2>0 ? r2.gc() : -1); jpayne@68: if(usePairGC){ jpayne@68: final float gc; jpayne@68: if(r2==null){ jpayne@68: gc=gc1; jpayne@68: }else{ jpayne@68: gc=(gc1*len1+gc2*len2)/(len1+len2); jpayne@68: } jpayne@68: addToGCHistogram(gc, len1+len2); jpayne@68: }else{ jpayne@68: addToGCHistogram(gc1, len1); jpayne@68: addToGCHistogram(gc2, len2); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private void addToGCHistogram(final float gc, final int len){ jpayne@68: if(gc<0 || len<1){return;} jpayne@68: gcHist[Tools.min(GC_BINS, (int)(gc*(GC_BINS+1)))]++; jpayne@68: gcMaxReadLen=Tools.max(len, gcMaxReadLen); jpayne@68: } jpayne@68: jpayne@68: public void addToEntropyHistogram(final Read r1){ jpayne@68: if(r1==null){return;} jpayne@68: final Read r2=r1.mate; jpayne@68: final int len1=r1.length(), len2=r1.mateLength(); jpayne@68: jpayne@68: final float entropy1=(len1>0 ? eTracker.averageEntropy(r1.bases, allowEntropyNs) : -1); jpayne@68: final float entropy2=(len2>0 ? eTracker.averageEntropy(r2.bases, allowEntropyNs) : -1); jpayne@68: if(/* usePairEntropy */ false){ jpayne@68: final float entropy; jpayne@68: if(r2==null){ jpayne@68: entropy=entropy1; jpayne@68: }else{ jpayne@68: entropy=(entropy1*len1+entropy2*len2)/(len1+len2); jpayne@68: } jpayne@68: addToEntropyHistogram(entropy, len1+len2); jpayne@68: }else{ jpayne@68: addToEntropyHistogram(entropy1, len1); jpayne@68: addToEntropyHistogram(entropy2, len2); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private void addToEntropyHistogram(final float entropy, final int len){ jpayne@68: if(entropy<0 || len<1){return;} jpayne@68: entropyHist[Tools.min(ENTROPY_BINS, (int)(entropy*(ENTROPY_BINS+1)))]++; jpayne@68: } jpayne@68: jpayne@68: public void addToIdentityHistogram(final Read r){ jpayne@68: if(r==null){return;} jpayne@68: addToIdentityHistogram(r, 0); jpayne@68: if(r.mate!=null){addToIdentityHistogram(r.mate, 1);} jpayne@68: } jpayne@68: jpayne@68: private void addToIdentityHistogram(final Read r, int pairnum){ jpayne@68: if(r==null || r.bases==null || r.length()<1 || !r.mapped() || r.match==null/* || r.discarded()*/){return;} jpayne@68: float id=r.identity(); jpayne@68: idHist[(int)(id*ID_BINS)]++; jpayne@68: idBaseHist[(int)(id*ID_BINS)]+=r.length(); jpayne@68: idMaxReadLen=Tools.max(r.length(), idMaxReadLen); jpayne@68: } jpayne@68: jpayne@68: public void addToTimeHistogram(final Read r){ jpayne@68: if(r==null){return;} jpayne@68: addToTimeHistogram(r, 0);//Time for pairs is the same. jpayne@68: } jpayne@68: jpayne@68: private void addToTimeHistogram(final Read r, int pairnum){ jpayne@68: if(r==null){return;} jpayne@68: assert(r.obj!=null && r.obj.getClass()==Long.class); jpayne@68: int x=(int)Tools.min(((Long)r.obj).longValue(), MAXTIMELEN); jpayne@68: timeHist.increment(x, 1); jpayne@68: } jpayne@68: jpayne@68: public boolean addToIndelHistogram(final Read r){ jpayne@68: if(r==null){return false;} jpayne@68: boolean success=addToIndelHistogram(r, 0); jpayne@68: if(r.mate!=null){success=addToIndelHistogram(r.mate, 1)|success;} jpayne@68: return success; jpayne@68: } jpayne@68: jpayne@68: /** Handles short match, long match, and reads with attached SamLines */ jpayne@68: private boolean addToIndelHistogram(final Read r, int pairnum){ jpayne@68: if(r==null || !r.mapped()){return false;} jpayne@68: if(r.samline!=null){ jpayne@68: boolean success=addToIndelHistogram(r.samline); jpayne@68: if(success){return true;} jpayne@68: } jpayne@68: if(r.match==null/* || r.discarded()*/){return false;} jpayne@68: final byte[] match=r.match; jpayne@68: jpayne@68: byte lastLetter='?'; jpayne@68: boolean digit=false; jpayne@68: int streak=0; jpayne@68: for(int mpos=0; mpos0 || (streak==0 && lastm=='?')); jpayne@68: if(!digit){streak++;} jpayne@68: digit=false; jpayne@68: if(lastLetter=='D'){ jpayne@68: streak=Tools.min(streak, MAXDELLEN2); jpayne@68: if(streak0){ jpayne@68: insertHist.increment(x, 1); jpayne@68: pairedCount++; jpayne@68: }else{ jpayne@68: unpairedCount++; jpayne@68: } jpayne@68: // assert(x!=1) : "\n"+r+"\n\n"+r.mate+"\n"; jpayne@68: // System.out.println("Incrementing "+x); jpayne@68: } jpayne@68: jpayne@68: public void addToInsertHistogram(final SamLine r1){ jpayne@68: int x=r1.tlen; jpayne@68: if(x<0) {x=-x;} jpayne@68: x=Tools.min(MAXINSERTLEN, x); jpayne@68: if(r1.pairedOnSameChrom() && x>0){ jpayne@68: pairedCount++; jpayne@68: insertHist.increment(x, 1); jpayne@68: }else { jpayne@68: unpairedCount++; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: public void addToInsertHistogram(final SamLine r1, final SamLine r2){ jpayne@68: if(r1==null){return;} jpayne@68: int x=insertSizeMapped(r1, r2, REQUIRE_PROPER_PAIR); jpayne@68: if(verbose){ jpayne@68: System.err.println(r1.qname+"\t"+x); jpayne@68: } jpayne@68: x=Tools.min(MAXINSERTLEN, x); jpayne@68: if(x>0){ jpayne@68: insertHist.increment(x, 1); jpayne@68: pairedCount++; jpayne@68: }else{ jpayne@68: unpairedCount++; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** This is untested and only gives approximate answers when overlapping reads contain indels. jpayne@68: * It may give incorrect answers for same-strange pairs that are shorter than read length. jpayne@68: * It might give negative answers but that would be a bug. */ jpayne@68: public static int insertSizeMapped(SamLine r1, SamLine r2, boolean requireProperPair){ jpayne@68: if(r2==null){return r1.length();} jpayne@68: if(!r1.mapped() || !r2.mapped() || !r1.pairedOnSameChrom() || (requireProperPair && !r1.properPair())){ jpayne@68: return -1; jpayne@68: } jpayne@68: jpayne@68: int a1=r1.start(true, false); jpayne@68: int a2=r2.start(true, false); jpayne@68: jpayne@68: if(r1.strand()!=r2.strand()){ jpayne@68: if(r1.strand()==1){return insertSizeMapped(r2, r1, requireProperPair);} jpayne@68: }else if(a1>a2){ jpayne@68: return insertSizeMapped(r2, r1, requireProperPair); jpayne@68: } jpayne@68: jpayne@68: int b1=r1.stop(a1, true, false); jpayne@68: int b2=r2.stop(a2, true, false); jpayne@68: jpayne@68: int clen1=r1.calcCigarLength(true, false); jpayne@68: int clen2=r2.calcCigarLength(true, false); jpayne@68: jpayne@68: int mlen1=b1-a1+1; jpayne@68: int mlen2=b2-a2+1; jpayne@68: jpayne@68: int dif1=mlen1-clen1; jpayne@68: int dif2=mlen2-clen2; jpayne@68: jpayne@68: int mlen12=b2-a1+1; jpayne@68: jpayne@68: if(Tools.overlap(a1, b1, a2, b2)){//hard case jpayne@68: return mlen12-Tools.max(dif1, dif2); //Approximate jpayne@68: }else{//easy case jpayne@68: return mlen12-dif1-dif2; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: public void addToBaseHistogram(final Read r){ jpayne@68: addToBaseHistogram2(r); jpayne@68: if(r.mate!=null){addToBaseHistogram2(r.mate);} jpayne@68: } jpayne@68: jpayne@68: public void addToBaseHistogram2(final Read r){ jpayne@68: if(r==null || r.bases==null){return;} jpayne@68: final int pairnum=r.samline==null ? r.pairnum() : r.samline.pairnum(); jpayne@68: if(pairnum==1){read2Count++;} jpayne@68: final byte[] bases=r.bases; jpayne@68: final LongList[] lists=baseHist[pairnum]; jpayne@68: for(int i=0; i0; jpayne@68: jpayne@68: if(AVG_QUAL_HIST_FILE!=null){rs.writeAverageQualityToFile(AVG_QUAL_HIST_FILE, paired);} jpayne@68: if(QUAL_HIST_FILE!=null){rs.writeQualityToFile(QUAL_HIST_FILE, paired);} jpayne@68: if(BQUAL_HIST_FILE!=null){rs.writeBQualityToFile(BQUAL_HIST_FILE, paired);} jpayne@68: if(BQUAL_HIST_OVERALL_FILE!=null){rs.writeBQualityOverallToFile(BQUAL_HIST_OVERALL_FILE);} jpayne@68: if(QUAL_COUNT_HIST_FILE!=null){rs.writeQCountToFile(QUAL_COUNT_HIST_FILE, paired);} jpayne@68: if(MATCH_HIST_FILE!=null){rs.writeMatchToFile(MATCH_HIST_FILE, paired);} jpayne@68: if(INSERT_HIST_FILE!=null){rs.writeInsertToFile(INSERT_HIST_FILE);} jpayne@68: if(BASE_HIST_FILE!=null){rs.writeBaseContentToFile(BASE_HIST_FILE, paired);} jpayne@68: if(QUAL_ACCURACY_FILE!=null){rs.writeQualityAccuracyToFile(QUAL_ACCURACY_FILE);} jpayne@68: jpayne@68: if(INDEL_HIST_FILE!=null){rs.writeIndelToFile(INDEL_HIST_FILE);} jpayne@68: if(ERROR_HIST_FILE!=null){rs.writeErrorToFile(ERROR_HIST_FILE);} jpayne@68: if(LENGTH_HIST_FILE!=null){rs.writeLengthToFile(LENGTH_HIST_FILE);} jpayne@68: if(GC_HIST_FILE!=null){rs.writeGCToFile(GC_HIST_FILE, true);} jpayne@68: if(ENTROPY_HIST_FILE!=null){rs.writeEntropyToFile(ENTROPY_HIST_FILE, true);} jpayne@68: if(IDENTITY_HIST_FILE!=null){rs.writeIdentityToFile(IDENTITY_HIST_FILE, true);} jpayne@68: if(TIME_HIST_FILE!=null){rs.writeTimeToFile(TIME_HIST_FILE);} jpayne@68: jpayne@68: boolean b=rs.errorState; jpayne@68: clear(); jpayne@68: return b; jpayne@68: } jpayne@68: clear(); jpayne@68: return false; jpayne@68: } jpayne@68: jpayne@68: public void writeAverageQualityToFile(String fname, boolean writePaired){ jpayne@68: TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); jpayne@68: tsw.start(); jpayne@68: tsw.print("#Quality\tcount1\tfraction1"+(writePaired ? "\tcount2\tfraction2" : "")+"\n"); jpayne@68: jpayne@68: long sum1=Tools.sum(aqualArray[0]); jpayne@68: long sum2=Tools.sum(aqualArray[1]); jpayne@68: double mult1=1.0/Tools.max(1, sum1); jpayne@68: double mult2=1.0/Tools.max(1, sum2); jpayne@68: jpayne@68: long y=sum1+sum2; jpayne@68: for(int i=0; i=0; i--){ jpayne@68: ql1[i]+=ql1[i+1]; jpayne@68: ql2[i]+=ql2[i+1]; jpayne@68: } jpayne@68: jpayne@68: double div1sum=0; jpayne@68: double div2sum=0; jpayne@68: double deviation1sum=0; jpayne@68: double deviation2sum=0; jpayne@68: jpayne@68: if(writePaired){ jpayne@68: for(int i=0; i0 || ql2[i]>0); i++){ jpayne@68: final int a=i+1; jpayne@68: double blin, clin, blog, clog; jpayne@68: final double div1=(double)Tools.max(1, ql1[i]); jpayne@68: final double div2=(double)Tools.max(1, ql2[i]); jpayne@68: jpayne@68: blin=qs1[i]/div1; jpayne@68: clin=qs2[i]/div2; jpayne@68: blog=qsd1[i]/div1; jpayne@68: clog=qsd2[i]/div2; jpayne@68: blog=QualityTools.probErrorToPhredDouble(blog); jpayne@68: clog=QualityTools.probErrorToPhredDouble(clog); jpayne@68: if(measure){ jpayne@68: double bcalc=calcQualityAtPosition(i, 0); jpayne@68: double ccalc=calcQualityAtPosition(i, 1); jpayne@68: jpayne@68: div1sum+=div1; jpayne@68: div2sum+=div2; jpayne@68: deviation1sum+=Math.abs(blog-bcalc)*div1; jpayne@68: deviation2sum+=Math.abs(clog-ccalc)*div2; jpayne@68: jpayne@68: sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n", a, blin, blog, bcalc, clin, clog, ccalc)); jpayne@68: }else{ jpayne@68: sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\t%.3f\n", a, blin, blog, clin, clog)); jpayne@68: } jpayne@68: } jpayne@68: }else{ jpayne@68: for(int i=0; i0; i++){ jpayne@68: final int a=i+1; jpayne@68: double blin, blog; jpayne@68: final double div1=(double)Tools.max(1, ql1[i]); jpayne@68: jpayne@68: blin=qs1[i]/div1; jpayne@68: blog=qsd1[i]/div1; jpayne@68: blog=QualityTools.probErrorToPhredDouble(blog); jpayne@68: if(measure){ jpayne@68: double bcalc=calcQualityAtPosition(i, 0); jpayne@68: jpayne@68: div1sum+=div1; jpayne@68: deviation1sum+=Math.abs(blog-bcalc)*div1; jpayne@68: jpayne@68: sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\t%.3f\n", a, blin, blog, bcalc)); jpayne@68: }else{ jpayne@68: sb.append(String.format(Locale.ROOT, "%d\t%.3f\t%.3f\n", a, blin, blog)); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); jpayne@68: tsw.start(); jpayne@68: if(measure){ jpayne@68: if(writePaired){ jpayne@68: tsw.print(String.format(Locale.ROOT, "#Deviation\t%.4f\t%.4f\n", deviation1sum/div1sum, deviation2sum/div2sum)); jpayne@68: }else{ jpayne@68: tsw.print(String.format(Locale.ROOT, "#Deviation\t%.4f\n", deviation1sum/div1sum)); jpayne@68: } jpayne@68: } jpayne@68: tsw.print(sb); jpayne@68: tsw.poisonAndWait(); jpayne@68: errorState|=tsw.errorState; jpayne@68: } jpayne@68: jpayne@68: private double calcQualityAtPosition(int pos, int pairnum){ jpayne@68: boolean includeNs=true; jpayne@68: long m=matchSum[pairnum][pos]; jpayne@68: long d=delSum[pairnum][pos]; //left-adjacent deletion jpayne@68: long d2=delSum[pairnum][Tools.min(pos, delSum[pairnum].length-1)]; //right-adjacent deletion jpayne@68: long i=insSum[pairnum][pos]; jpayne@68: long s=subSum[pairnum][pos]; jpayne@68: long n=(includeNs ? nSum[pairnum][pos] : 0); //This only tracks no-calls, not no-refs. jpayne@68: long good=Tools.max(0, m*2-d-d2+n/2); jpayne@68: long total=Tools.max(0, m*2+i*2+s*2+n*2); //not d jpayne@68: long bad=total-good; jpayne@68: if(total<1){return 0;} jpayne@68: double error=bad/(double)total; jpayne@68: return QualityTools.probErrorToPhredDouble(error); jpayne@68: } jpayne@68: jpayne@68: public void writeBQualityOverallToFile(String fname){ jpayne@68: final long[] cp30=Arrays.copyOf(bqualHistOverall, bqualHistOverall.length); jpayne@68: for(int i=0; i<30; i++){cp30[i]=0;} jpayne@68: jpayne@68: final long sum=Tools.sum(bqualHistOverall); jpayne@68: final long median=Tools.percentileHistogram(bqualHistOverall, 0.5); jpayne@68: final double mean=Tools.averageHistogram(bqualHistOverall); jpayne@68: final double stdev=Tools.standardDeviationHistogram(bqualHistOverall); jpayne@68: final double mean30=Tools.averageHistogram(cp30); jpayne@68: final double stdev30=Tools.standardDeviationHistogram(cp30); jpayne@68: final double mult=1.0/Tools.max(1, sum); jpayne@68: long y=sum; jpayne@68: jpayne@68: TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); jpayne@68: tsw.start(); jpayne@68: tsw.print("#Median\t"+median+"\n"); jpayne@68: tsw.print("#Mean\t"+String.format(Locale.ROOT, "%.3f", mean)+"\n"); jpayne@68: tsw.print("#STDev\t"+String.format(Locale.ROOT, "%.3f", stdev)+"\n"); jpayne@68: tsw.print("#Mean_30\t"+String.format(Locale.ROOT, "%.3f", mean30)+"\n"); jpayne@68: tsw.print("#STDev_30\t"+String.format(Locale.ROOT, "%.3f", stdev30)+"\n"); jpayne@68: tsw.print("#Quality\tbases\tfraction\n"); jpayne@68: jpayne@68: for(int i=0; i=0; i--){ jpayne@68: if(qualMatch[i]+qualSub[i]+qualIns[i]+qualDel[i]>0){break;} jpayne@68: max=i; jpayne@68: } jpayne@68: jpayne@68: final int qMin=Read.MIN_CALLED_QUALITY(), qMax=Read.MAX_CALLED_QUALITY(); jpayne@68: jpayne@68: double devsum=0; jpayne@68: double devsumSub=0; jpayne@68: long observations=0; jpayne@68: for(int i=0; i0){ jpayne@68: double mult=1.0/sum; jpayne@68: double subRate=(qs)*mult; jpayne@68: double errorRate=(qs+qi+qd)*mult; jpayne@68: jpayne@68: phredSub=QualityTools.probErrorToPhredDouble(subRate); jpayne@68: phred=QualityTools.probErrorToPhredDouble(errorRate); jpayne@68: double deviation=phred-i; jpayne@68: double deviationSub=phredSub-i; jpayne@68: if(i==qMin && deviation<0){deviation=0;} jpayne@68: else if(i==qMax && max==qMax+1 && deviation>0){deviation=0;} jpayne@68: if(i==qMin && deviationSub<0){deviationSub=0;} jpayne@68: else if(i==qMax && max==qMax+1 && deviationSub>0){deviationSub=0;} jpayne@68: devsum+=(Math.abs(deviation)*sum); jpayne@68: devsumSub+=(Math.abs(deviationSub)*sum); jpayne@68: observations+=sum; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, append, false); jpayne@68: tsw.start(); jpayne@68: tsw.print(String.format(Locale.ROOT, "#Deviation\t%.3f\n", devsum/observations)); jpayne@68: tsw.print(String.format(Locale.ROOT, "#DeviationSub\t%.3f\n", devsumSub/observations)); jpayne@68: tsw.print("#Quality\tMatch\tSub\tIns\tDel\tTrueQuality\tTrueQualitySub\n"); jpayne@68: for(int i=0; i0){ jpayne@68: double mult=1.0/sum; jpayne@68: double subRate=(qs)*mult; jpayne@68: double errorRate=(qs+qi+qd)*mult; jpayne@68: jpayne@68: phredSub=QualityTools.probErrorToPhredDouble(subRate); jpayne@68: phred=QualityTools.probErrorToPhredDouble(errorRate); jpayne@68: jpayne@68: // System.err.println("sub: "+qs+"/"+sum+" -> "+subRate+" -> "+phredSub); jpayne@68: } jpayne@68: jpayne@68: tsw.print(i+"\t"+qm+"\t"+qs+"\t"+qi+"\t"+qd); jpayne@68: tsw.print(phred>=0 ? String.format(Locale.ROOT, "\t%.2f", phred) : "\t"); jpayne@68: tsw.print(phredSub>=0 ? String.format(Locale.ROOT, "\t%.2f\n", phredSub) : "\t\n"); jpayne@68: jpayne@68: // System.err.println(qm+"\t"+qs+"\t"+qi+"\t"+qd); jpayne@68: } jpayne@68: jpayne@68: tsw.poisonAndWait(); jpayne@68: errorState|=tsw.errorState; jpayne@68: } jpayne@68: jpayne@68: public void writeMatchToFile(String fname, boolean writePaired){ jpayne@68: if(!writePaired){ jpayne@68: writeMatchToFileUnpaired(fname); jpayne@68: return; jpayne@68: } jpayne@68: TextStreamWriter tsw=new TextStreamWriter(fname, overwrite, false, false); jpayne@68: tsw.start(); jpayne@68: tsw.print("#BaseNum\tMatch1\tSub1\tDel1\tIns1\tN1\tOther1\tMatch2\tSub2\tDel2\tIns2\tN2\tOther2\n"); jpayne@68: jpayne@68: final long[] ms1=matchSum[0], ds1=delSum[0], is1=insSum[0], jpayne@68: ss1=subSum[0], ns1=nSum[0], cs1=clipSum[0], os1=otherSum[0]; jpayne@68: final long[] ms2=matchSum[1], ds2=delSum[1], is2=insSum[1], jpayne@68: ss2=subSum[1], ns2=nSum[1], cs2=clipSum[1], os2=otherSum[1]; jpayne@68: jpayne@68: for(int i=0; i0 || y>0 || !skipZeroIndel){ jpayne@68: bb.clear(); jpayne@68: bb.append(i).tab().append(x).tab().append(y).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //TODO: Disabled because it was irritating when graphing. Should write to a different file. jpayne@68: // tsw.print("#Length_bin\tDeletions\n"); jpayne@68: // max=delHist2.size; jpayne@68: // for(int i=0; i0 || !skipZeroIndel){ jpayne@68: // tsw.print((i*DEL_BIN)+"\t"+x+"\n"); jpayne@68: // } jpayne@68: // } jpayne@68: jpayne@68: bsw.poisonAndWait(); jpayne@68: errorState|=bsw.errorState; jpayne@68: } jpayne@68: jpayne@68: public void writeErrorToFile(String fname){ jpayne@68: writeHistogramToFile(fname, "#Errors\tCount\n", errorHist, false); jpayne@68: } jpayne@68: jpayne@68: public void writeLengthToFile(String fname){ jpayne@68: writeHistogramToFile(fname, "#Length\tCount\n", lengthHist, false); jpayne@68: } jpayne@68: jpayne@68: public void writeTimeToFile(String fname){ jpayne@68: writeHistogramToFile(fname, "#Time\tCount\n", timeHist, false); jpayne@68: } jpayne@68: jpayne@68: public void writeHistogramToFile(String fname, String header, LongList hist, boolean printZeros){ jpayne@68: ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); jpayne@68: bsw.start(); jpayne@68: bsw.print(header); jpayne@68: jpayne@68: int max=hist.size; jpayne@68: jpayne@68: ByteBuilder bb=new ByteBuilder(40); jpayne@68: for(int i=0; i0 || printZeros){ jpayne@68: bb.clear().append(i).tab().append(x).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: } jpayne@68: bsw.poisonAndWait(); jpayne@68: errorState|=bsw.errorState; jpayne@68: } jpayne@68: jpayne@68: public void writeHistogramToFile(String fname, String header, SuperLongList hist, boolean printZeros){ jpayne@68: ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); jpayne@68: bsw.start(); jpayne@68: bsw.print(header); jpayne@68: jpayne@68: hist.sort(); jpayne@68: long max=hist.max(); jpayne@68: long[] array=hist.array(); jpayne@68: LongList list=hist.list(); jpayne@68: jpayne@68: ByteBuilder bb=new ByteBuilder(40); jpayne@68: for(int i=0; i0 || printZeros){ jpayne@68: bb.clear().append(i).tab().append(x).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: } jpayne@68: {//TODO: This part does not bother printing zeros regardless of the flag. jpayne@68: long prev=-1; jpayne@68: int streak=0; jpayne@68: for(int i=0; i0){//Print previous key jpayne@68: bb.clear().append(prev).tab().append(streak).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: streak=1; jpayne@68: prev=x; jpayne@68: } jpayne@68: } jpayne@68: if(streak>0){//Print final key jpayne@68: bb.clear().append(prev).tab().append(streak).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: } jpayne@68: bsw.poisonAndWait(); jpayne@68: errorState|=bsw.errorState; jpayne@68: } jpayne@68: jpayne@68: public void writeGCToFile(String fname, boolean printZeros){ jpayne@68: final long[] hist; jpayne@68: if(GC_BINS_AUTO && gcMaxReadLen+10 || printZeros){ jpayne@68: //This assumes GC_BINS==100 jpayne@68: // tsw.print(i+"\t"+x+"\n"); jpayne@68: if(GC_PLOT_X){ jpayne@68: bb.clear(); jpayne@68: bb.append(i*gcMult, 1).tab().append(x).tab(); jpayne@68: bb.append(sum*fractionMult, 3).tab(); jpayne@68: jpayne@68: int len=(int)((x*1000)/countsPerX); jpayne@68: for(int j=0; j0){ jpayne@68: if((x*1000f)/countsPerX>0.1f){bb.append('x');} jpayne@68: else{bb.append('.');} jpayne@68: } jpayne@68: jpayne@68: bb.append('\n'); jpayne@68: bsw.print(bb); jpayne@68: }else{ jpayne@68: bb.clear().append(i*gcMult, 1).tab().append(x).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: bsw.poisonAndWait(); jpayne@68: errorState|=bsw.errorState; jpayne@68: } jpayne@68: jpayne@68: public void writeEntropyToFile(String fname, boolean printZeros){ jpayne@68: final long[] hist=entropyHist; jpayne@68: jpayne@68: final int bins=hist.length; jpayne@68: final double mult=1.0/Tools.max(1, bins-1); jpayne@68: final long total=Tools.sum(hist); jpayne@68: final long max=Tools.max(hist); jpayne@68: final double countsPerX=Tools.max(1, ((max*1000.0)/40)); jpayne@68: final double fractionMult=1.0/Tools.max(1, total); jpayne@68: long sum=0; jpayne@68: jpayne@68: double mean=Tools.averageHistogram(hist)*mult; jpayne@68: double median=Tools.percentileHistogram(hist, 0.5)*mult; jpayne@68: double mode=Tools.calcModeHistogram(hist)*mult; jpayne@68: double stdev=Tools.standardDeviationHistogram(hist)*mult; jpayne@68: jpayne@68: ByteBuilder bb=new ByteBuilder(256); jpayne@68: bb.append("#Mean\t").append(mean, 6).nl(); jpayne@68: bb.append("#Median\t").append(median, 6).nl(); jpayne@68: bb.append("#Mode\t").append(mode, 6).nl(); jpayne@68: bb.append("#STDev\t").append(stdev, 6).nl(); jpayne@68: bb.append("#Value\tCount\n"); jpayne@68: jpayne@68: ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, false, false); jpayne@68: bsw.start(); jpayne@68: bsw.print(bb); jpayne@68: jpayne@68: for(int i=0; i0 || printZeros){ jpayne@68: bb.clear().append(i*mult, 4).tab().append(x).nl(); jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: } jpayne@68: bsw.poisonAndWait(); jpayne@68: errorState|=bsw.errorState; jpayne@68: } jpayne@68: jpayne@68: public void writeIdentityToFile(String fname, boolean printZeros){ jpayne@68: final long[] hist, histb; jpayne@68: if(ID_BINS_AUTO && idMaxReadLen+10 || printZeros){ jpayne@68: tsw.print(String.format(Locale.ROOT, "%.1f", i*mult)+"\t"+x+"\t"+x2+"\n"); jpayne@68: } jpayne@68: } jpayne@68: tsw.poisonAndWait(); jpayne@68: errorState|=tsw.errorState; jpayne@68: } jpayne@68: jpayne@68: //Tracks to see if read2s have been encountered, for displaying stats. jpayne@68: private long read2Count=0; jpayne@68: jpayne@68: public long pairedCount=0; jpayne@68: public long unpairedCount=0; jpayne@68: jpayne@68: public final long[][] aqualArray; jpayne@68: public final long[][] qualLength; jpayne@68: public final long[][] qualSum; jpayne@68: jpayne@68: public final long[][][] bqualHist; jpayne@68: public final long[] bqualHistOverall; jpayne@68: jpayne@68: public final long[][] qcountHist; jpayne@68: jpayne@68: public final double[][] qualSumDouble; jpayne@68: jpayne@68: public final long[][] matchSum; jpayne@68: public final long[][] delSum; jpayne@68: public final long[][] insSum; jpayne@68: public final long[][] subSum; jpayne@68: public final long[][] nSum; jpayne@68: public final long[][] clipSum; jpayne@68: public final long[][] otherSum; jpayne@68: jpayne@68: public final long[] qualMatch; jpayne@68: public final long[] qualSub; jpayne@68: public final long[] qualIns; jpayne@68: public final long[] qualDel; jpayne@68: jpayne@68: public final long[] gcHist; jpayne@68: public final long[] entropyHist; jpayne@68: public final EntropyTracker eTracker; jpayne@68: public final long[] idHist; jpayne@68: public final long[] idBaseHist; jpayne@68: private int gcMaxReadLen=1; jpayne@68: private int idMaxReadLen=1; jpayne@68: jpayne@68: public final LongList[][] baseHist; jpayne@68: jpayne@68: /** Insert size */ jpayne@68: public final LongList insertHist; jpayne@68: /** Read length */ jpayne@68: public final SuperLongList lengthHist; jpayne@68: /** Number errors per read */ jpayne@68: public final LongList errorHist; jpayne@68: /** Insertion length */ jpayne@68: public final LongList insHist; jpayne@68: /** Deletion length */ jpayne@68: public final LongList delHist; jpayne@68: /** Deletion length, binned */ jpayne@68: public final LongList delHist2; jpayne@68: /** Time */ jpayne@68: public final LongList timeHist; jpayne@68: jpayne@68: public static boolean REQUIRE_PROPER_PAIR=true; jpayne@68: public static int MAXLEN=6000; jpayne@68: public static int MAXINSERTLEN=80000; jpayne@68: @Deprecated jpayne@68: public static int MAXLENGTHLEN=80000; jpayne@68: public static final int MAXTIMELEN=80000; jpayne@68: public static final int MAXINSLEN=1000; jpayne@68: public static final int MAXDELLEN=1000; jpayne@68: public static final int MAXDELLEN2=1000000; jpayne@68: public static final int DEL_BIN=100; jpayne@68: public static int ID_BINS=100; jpayne@68: public static boolean ID_BINS_AUTO=false; jpayne@68: public static int GC_BINS=100; jpayne@68: public static boolean GC_BINS_AUTO=false; jpayne@68: public static boolean GC_PLOT_X=false; jpayne@68: public static int ENTROPY_BINS=1000; jpayne@68: jpayne@68: public static double GCMean; jpayne@68: public static double GCMedian; jpayne@68: public static double GCMode; jpayne@68: public static double GCSTDev; jpayne@68: jpayne@68: // public static double entropyMean; jpayne@68: // public static double entropyMedian; jpayne@68: // public static double entropyMode; jpayne@68: // public static double entropySTDev; jpayne@68: jpayne@68: public boolean errorState=false; jpayne@68: jpayne@68: public static ReadStats merged=null; jpayne@68: jpayne@68: // public static double matedPercent=0; jpayne@68: jpayne@68: public static void clear(){ jpayne@68: COLLECT_QUALITY_STATS=false; jpayne@68: COLLECT_QUALITY_ACCURACY=false; jpayne@68: COLLECT_MATCH_STATS=false; jpayne@68: COLLECT_INSERT_STATS=false; jpayne@68: COLLECT_BASE_STATS=false; jpayne@68: COLLECT_INDEL_STATS=false; jpayne@68: COLLECT_GC_STATS=false; jpayne@68: COLLECT_ENTROPY_STATS=false; jpayne@68: COLLECT_ERROR_STATS=false; jpayne@68: COLLECT_LENGTH_STATS=false; jpayne@68: COLLECT_IDENTITY_STATS=false; jpayne@68: COLLECT_TIME_STATS=false; jpayne@68: jpayne@68: AVG_QUAL_HIST_FILE=null; jpayne@68: QUAL_HIST_FILE=null; jpayne@68: BQUAL_HIST_FILE=null; jpayne@68: QUAL_COUNT_HIST_FILE=null; jpayne@68: BQUAL_HIST_OVERALL_FILE=null; jpayne@68: QUAL_ACCURACY_FILE=null; jpayne@68: MATCH_HIST_FILE=null; jpayne@68: INSERT_HIST_FILE=null; jpayne@68: BASE_HIST_FILE=null; jpayne@68: INDEL_HIST_FILE=null; jpayne@68: ERROR_HIST_FILE=null; jpayne@68: LENGTH_HIST_FILE=null; jpayne@68: GC_HIST_FILE=null; jpayne@68: ENTROPY_HIST_FILE=null; jpayne@68: IDENTITY_HIST_FILE=null; jpayne@68: TIME_HIST_FILE=null; jpayne@68: } jpayne@68: jpayne@68: public static ArrayList objectList=new ArrayList(); jpayne@68: public static boolean COLLECT_QUALITY_STATS=false; jpayne@68: public static boolean COLLECT_QUALITY_ACCURACY=false; jpayne@68: public static boolean COLLECT_MATCH_STATS=false; jpayne@68: public static boolean COLLECT_INSERT_STATS=false; jpayne@68: public static boolean COLLECT_BASE_STATS=false; jpayne@68: public static boolean COLLECT_INDEL_STATS=false; jpayne@68: public static boolean COLLECT_GC_STATS=false; jpayne@68: public static boolean COLLECT_ENTROPY_STATS=false; jpayne@68: public static boolean COLLECT_ERROR_STATS=false; jpayne@68: public static boolean COLLECT_LENGTH_STATS=false; jpayne@68: public static boolean COLLECT_IDENTITY_STATS=false; jpayne@68: public static boolean COLLECT_TIME_STATS=false; jpayne@68: jpayne@68: public static boolean collectingStats(){ jpayne@68: return COLLECT_QUALITY_STATS || COLLECT_QUALITY_ACCURACY || COLLECT_MATCH_STATS || COLLECT_INSERT_STATS jpayne@68: || COLLECT_BASE_STATS || COLLECT_INDEL_STATS || COLLECT_GC_STATS || COLLECT_ENTROPY_STATS jpayne@68: || COLLECT_ERROR_STATS || COLLECT_LENGTH_STATS || COLLECT_IDENTITY_STATS || COLLECT_TIME_STATS; jpayne@68: } jpayne@68: jpayne@68: public static boolean usePairGC=true; jpayne@68: public static boolean allowEntropyNs=true; jpayne@68: jpayne@68: public static String AVG_QUAL_HIST_FILE=null; jpayne@68: public static String QUAL_HIST_FILE=null; jpayne@68: public static String BQUAL_HIST_FILE=null; jpayne@68: public static String QUAL_COUNT_HIST_FILE=null; jpayne@68: public static String BQUAL_HIST_OVERALL_FILE=null; jpayne@68: public static String QUAL_ACCURACY_FILE=null; jpayne@68: public static String MATCH_HIST_FILE=null; jpayne@68: public static String INSERT_HIST_FILE=null; jpayne@68: public static String BASE_HIST_FILE=null; jpayne@68: public static String INDEL_HIST_FILE=null; jpayne@68: public static String ERROR_HIST_FILE=null; jpayne@68: public static String LENGTH_HIST_FILE=null; jpayne@68: public static String GC_HIST_FILE=null; jpayne@68: public static String ENTROPY_HIST_FILE=null; jpayne@68: public static String IDENTITY_HIST_FILE=null; jpayne@68: public static String TIME_HIST_FILE=null; jpayne@68: jpayne@68: public static boolean overwrite=false; jpayne@68: public static boolean append=false; jpayne@68: public static final boolean verbose=false; jpayne@68: jpayne@68: public static boolean skipZeroInsertCount=true; jpayne@68: public static boolean skipZeroIndel=true; jpayne@68: jpayne@68: }