jpayne@68: package aligner; jpayne@68: jpayne@68: import java.util.Arrays; jpayne@68: jpayne@68: import dna.AminoAcid; jpayne@68: import dna.ChromosomeArray; jpayne@68: import dna.Data; jpayne@68: import shared.Tools; jpayne@68: import stream.Read; jpayne@68: import stream.SiteScore; jpayne@68: jpayne@68: /** jpayne@68: * Based on MSA9ts, with transform scores tweaked for PacBio. */ jpayne@68: public final class MultiStateAligner9PacBioAdapter3 { jpayne@68: jpayne@68: jpayne@68: public MultiStateAligner9PacBioAdapter3(int maxRows_, int maxColumns_){ jpayne@68: // assert(maxColumns_>=200); jpayne@68: // assert(maxRows_>=200); jpayne@68: maxRows=maxRows_; jpayne@68: maxColumns=maxColumns_; jpayne@68: packed=new int[3][maxRows+1][]; jpayne@68: jpayne@68: for(int i=0; i<3; i++){ jpayne@68: packed[i][0]=new int[maxColumns+1]; jpayne@68: packed[i][1]=new int[maxColumns+1]; jpayne@68: packed[i][2]=new int[maxColumns+1]; jpayne@68: for(int j=3; j0); jpayne@68: rows=read.length; jpayne@68: columns=refEndLoc-refStartLoc+1; jpayne@68: jpayne@68: if(/*read.length<40 || */false || minScore<=0 || columns>read.length+Tools.min(100, read.length)){ jpayne@68: // assert(false) : minScore; jpayne@68: return fillUnlimited(read, ref, refStartLoc, refEndLoc); jpayne@68: } jpayne@68: jpayne@68: minScore-=100; //Increases quality trivially jpayne@68: jpayne@68: assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows+"\n"+ jpayne@68: refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n"; jpayne@68: assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns+"\n"+ jpayne@68: refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n"; jpayne@68: jpayne@68: assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc+"\n"+ jpayne@68: refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n"; jpayne@68: assert(refEndLocBADoff); //TODO: Actually, it needs to be substantially more. jpayne@68: assert(subfloor=0; i--){ jpayne@68: vertLimit[i]=Tools.max(vertLimit[i+1]-POINTSoff_MATCH2, floor); jpayne@68: } jpayne@68: jpayne@68: horizLimit[columns]=minScore_off; jpayne@68: for(int i=columns-1; i>=0; i--){ jpayne@68: horizLimit[i]=Tools.max(horizLimit[i+1]-POINTSoff_MATCH2, floor); jpayne@68: } jpayne@68: jpayne@68: for(int row=1; row<=rows; row++){ jpayne@68: jpayne@68: { jpayne@68: int score=calcInsScoreOffset(row); jpayne@68: packed[0][row][0]=score; jpayne@68: packed[1][row][0]=score; jpayne@68: packed[2][row][0]=score; jpayne@68: } jpayne@68: jpayne@68: int colStart=minGoodCol; jpayne@68: int colStop=maxGoodCol; jpayne@68: jpayne@68: minGoodCol=-1; jpayne@68: maxGoodCol=-2; jpayne@68: jpayne@68: final int vlimit=vertLimit[row]; jpayne@68: jpayne@68: if(verbose2){ jpayne@68: System.out.println(); jpayne@68: System.out.println("row="+row); jpayne@68: System.out.println("colStart="+colStart); jpayne@68: System.out.println("colStop="+colStop); jpayne@68: System.out.println("vlimit="+(vlimit>>SCOREOFFSET)+"\t"+(vlimit)); jpayne@68: } jpayne@68: jpayne@68: if(colStart<0 || colStop1){ jpayne@68: assert(row>0); jpayne@68: packed[MODE_MS][row][colStart-1]=subfloor; jpayne@68: packed[MODE_INS][row][colStart-1]=subfloor; jpayne@68: packed[MODE_DEL][row][colStart-1]=subfloor; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: for(int col=colStart; col<=columns; col++){ jpayne@68: jpayne@68: jpayne@68: if(verbose2){ jpayne@68: System.out.println("\ncol "+col); jpayne@68: } jpayne@68: jpayne@68: final byte call0=(row<2 ? (byte)'?' : read[row-2]); jpayne@68: final byte call1=read[row-1]; jpayne@68: final byte ref0=(col<2 ? (byte)'!' : ref[refStartLoc+col-2]); jpayne@68: final byte ref1=ref[refStartLoc+col-1]; jpayne@68: jpayne@68: // final boolean match=(read[row-1]==ref[refStartLoc+col-1]); jpayne@68: // final boolean prevMatch=(row<2 || col<2 ? false : read[row-2]==ref[refStartLoc+col-2]); jpayne@68: final boolean match=(call1==ref1); jpayne@68: final boolean prevMatch=(call0==ref0); jpayne@68: jpayne@68: // System.err.println("") jpayne@68: jpayne@68: iterationsLimited++; jpayne@68: final int limit=Tools.max(vlimit, horizLimit[col]); jpayne@68: final int limit3=Tools.max(floor, (match ? limit-POINTSoff_MATCH2 : limit-POINTSoff_SUB3)); jpayne@68: jpayne@68: final int delNeeded=Tools.max(0, row-col-1); jpayne@68: final int insNeeded=Tools.max(0, (rows-row)-(columns-col)-1); jpayne@68: jpayne@68: final int delPenalty=calcDelScoreOffset(delNeeded); jpayne@68: final int insPenalty=calcInsScoreOffset(insNeeded); jpayne@68: jpayne@68: jpayne@68: final int scoreFromDiag_MS=packed[MODE_MS][row-1][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel_MS=packed[MODE_DEL][row-1][col-1]&SCOREMASK; jpayne@68: final int scoreFromIns_MS=packed[MODE_INS][row-1][col-1]&SCOREMASK; jpayne@68: jpayne@68: final int scoreFromDiag_DEL=packed[MODE_MS][row][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel_DEL=packed[MODE_DEL][row][col-1]&SCOREMASK; jpayne@68: jpayne@68: final int scoreFromDiag_INS=packed[MODE_MS][row-1][col]&SCOREMASK; jpayne@68: final int scoreFromIns_INS=packed[MODE_INS][row-1][col]&SCOREMASK; jpayne@68: jpayne@68: // if(scoreFromDiag_MS=scoreD && scoreMS>=scoreI){ jpayne@68: score=scoreMS; jpayne@68: time=(prevMatch ? streak+1 : 1); jpayne@68: prevState=MODE_MS; jpayne@68: }else if(scoreD>=scoreI){ jpayne@68: score=scoreD; jpayne@68: time=1; jpayne@68: prevState=MODE_DEL; jpayne@68: }else{ jpayne@68: score=scoreI; jpayne@68: time=1; jpayne@68: prevState=MODE_INS; jpayne@68: } jpayne@68: jpayne@68: }else{ jpayne@68: jpayne@68: int scoreMS; jpayne@68: if(ref1!='N' && call1!='N'){ jpayne@68: scoreMS=scoreFromDiag_MS+(prevMatch ? (streak<=1 ? POINTSoff_SUBR : POINTSoff_SUB) : subScoreArray[streak]); jpayne@68: }else{ jpayne@68: scoreMS=scoreFromDiag_MS+POINTSoff_NOCALL; jpayne@68: } jpayne@68: jpayne@68: int scoreD=scoreFromDel_MS+POINTSoff_SUB; //+2 to move it as close as possible to the deletion / insertion jpayne@68: int scoreI=scoreFromIns_MS+POINTSoff_SUB; jpayne@68: jpayne@68: if(scoreMS>=scoreD && scoreMS>=scoreI){ jpayne@68: score=scoreMS; jpayne@68: time=(prevMatch ? 1 : streak+1); jpayne@68: // time=(prevMatch ? (streak==1 ? 3 : 1) : streak+1); jpayne@68: prevState=MODE_MS; jpayne@68: }else if(scoreD>=scoreI){ jpayne@68: score=scoreD; jpayne@68: time=1; jpayne@68: prevState=MODE_DEL; jpayne@68: }else{ jpayne@68: score=scoreI; jpayne@68: time=1; jpayne@68: prevState=MODE_INS; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: final int limit2; jpayne@68: if(delNeeded>0){ jpayne@68: limit2=limit-delPenalty; jpayne@68: }else if(insNeeded>0){ jpayne@68: limit2=limit-insPenalty; jpayne@68: }else{ jpayne@68: limit2=limit; jpayne@68: } jpayne@68: assert(limit2>=limit); jpayne@68: jpayne@68: if(verbose2){System.err.println("MS: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));} jpayne@68: jpayne@68: if(score>=limit2){ jpayne@68: maxGoodCol=col; jpayne@68: if(minGoodCol<0){minGoodCol=col;} jpayne@68: }else{ jpayne@68: score=subfloor; jpayne@68: } jpayne@68: jpayne@68: assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;} jpayne@68: assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead"; jpayne@68: assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: // packed[MODE_MS][row][col]=(score|prevState|time); jpayne@68: packed[MODE_MS][row][col]=(score|time); jpayne@68: assert((score&SCOREMASK)==score); jpayne@68: // assert((prevState&MODEMASK)==prevState); jpayne@68: assert((time&TIMEMASK)==time); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if((scoreFromDiag_DEL<=limit && scoreFromDel_DEL<=limit)){ jpayne@68: // assert((scoreFromDiag_DEL<=limit && scoreFromDel_DEL<=limit)) : scoreFromDiag_DEL+", "+row; jpayne@68: packed[MODE_DEL][row][col]=subfloor; jpayne@68: }else{//Calculate DEL score jpayne@68: jpayne@68: final int streak=packed[MODE_DEL][row][col-1]&TIMEMASK; jpayne@68: jpayne@68: int scoreMS=scoreFromDiag_DEL+POINTSoff_DEL; jpayne@68: int scoreD=scoreFromDel_DEL+delScoreArray[streak]; jpayne@68: // int scoreI=scoreFromIns+POINTSoff_DEL; jpayne@68: jpayne@68: jpayne@68: if(ref1=='N'){ jpayne@68: scoreMS+=POINTSoff_DEL_REF_N; jpayne@68: scoreD+=POINTSoff_DEL_REF_N; jpayne@68: } jpayne@68: jpayne@68: //if(match){scoreMS=subfloor;} jpayne@68: jpayne@68: int score; jpayne@68: int time; jpayne@68: byte prevState; jpayne@68: if(scoreMS>=scoreD){ jpayne@68: score=scoreMS; jpayne@68: time=1; jpayne@68: prevState=MODE_MS; jpayne@68: }else{ jpayne@68: score=scoreD; jpayne@68: time=streak+1; jpayne@68: prevState=MODE_DEL; jpayne@68: } jpayne@68: jpayne@68: final int limit2; jpayne@68: if(insNeeded>0){ jpayne@68: limit2=limit-insPenalty; jpayne@68: }else if(delNeeded>0){ jpayne@68: limit2=limit-calcDelScoreOffset(time+delNeeded)+calcDelScoreOffset(time); jpayne@68: }else{ jpayne@68: limit2=limit; jpayne@68: } jpayne@68: assert(limit2>=limit); jpayne@68: if(verbose2){System.err.println("DEL: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));} jpayne@68: jpayne@68: if(score>=limit2){ jpayne@68: maxGoodCol=col; jpayne@68: if(minGoodCol<0){minGoodCol=col;} jpayne@68: }else{ jpayne@68: score=subfloor; jpayne@68: } jpayne@68: jpayne@68: assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;} jpayne@68: assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead"; jpayne@68: assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: // packed[MODE_DEL][row][col]=(score|prevState|time); jpayne@68: packed[MODE_DEL][row][col]=(score|time); jpayne@68: assert((score&SCOREMASK)==score); jpayne@68: // assert((prevState&MODEMASK)==prevState); jpayne@68: assert((time&TIMEMASK)==time); jpayne@68: } jpayne@68: jpayne@68: if(scoreFromDiag_INS<=limit && scoreFromIns_INS<=limit){ jpayne@68: packed[MODE_INS][row][col]=subfloor; jpayne@68: }else{//Calculate INS score jpayne@68: jpayne@68: final int streak=packed[MODE_INS][row-1][col]&TIMEMASK; jpayne@68: jpayne@68: int scoreMS=scoreFromDiag_INS+POINTSoff_INS; jpayne@68: // int scoreD=scoreFromDel+POINTSoff_INS; jpayne@68: int scoreI=scoreFromIns_INS+insScoreArray[streak]; jpayne@68: jpayne@68: jpayne@68: // System.err.println("("+row+","+col+")\t"+scoreFromDiag+"+"+POINTSoff_INS+"="+scoreM+", "+ jpayne@68: // scoreFromSub+"+"+POINTSoff_INS+"="+scoreS+", " jpayne@68: // +scoreD+", "+scoreFromIns+"+"+ jpayne@68: // (streak==0 ? POINTSoff_INS : streak=colStop){ jpayne@68: if(col>colStop && maxGoodCol1){ jpayne@68: packed[MODE_MS][row-1][col+1]=subfloor; jpayne@68: packed[MODE_INS][row-1][col+1]=subfloor; jpayne@68: packed[MODE_DEL][row-1][col+1]=subfloor; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: jpayne@68: int maxCol=-1; jpayne@68: int maxState=-1; jpayne@68: int maxScore=Integer.MIN_VALUE; jpayne@68: jpayne@68: for(int state=0; statemaxScore){ jpayne@68: maxScore=x; jpayne@68: maxCol=col; jpayne@68: maxState=state; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: assert(maxScore>=BADoff); jpayne@68: // if(maxScore==BADoff){ jpayne@68: // return null; jpayne@68: // } jpayne@68: // if(maxScore>=SCOREOFFSET; jpayne@68: jpayne@68: // System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore); jpayne@68: return new int[] {rows, maxCol, maxState, maxScore}; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** return new int[] {rows, maxC, maxS, max}; jpayne@68: * Does not require a min score (ie, same as old method) */ jpayne@68: private final int[] fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc){ jpayne@68: rows=read.length; jpayne@68: columns=refEndLoc-refStartLoc+1; jpayne@68: jpayne@68: final int maxGain=(read.length-1)*POINTSoff_MATCH2+POINTSoff_MATCH; jpayne@68: final int subfloor=0-2*maxGain; jpayne@68: assert(subfloor>BADoff && subfloor*2>BADoff); //TODO: Actually, it needs to be substantially more. jpayne@68: jpayne@68: //temporary, for finding a bug jpayne@68: if(rows>maxRows || columns>maxColumns){ jpayne@68: throw new RuntimeException("rows="+rows+", maxRows="+maxRows+", cols="+columns+", maxCols="+maxColumns+"\n"+new String(read)+"\n"); jpayne@68: } jpayne@68: jpayne@68: assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows; jpayne@68: assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns; jpayne@68: jpayne@68: assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc; jpayne@68: assert(refEndLoc=scoreD && scoreMS>=scoreI){ jpayne@68: score=scoreMS; jpayne@68: time=(prevMatch ? streak+1 : 1); jpayne@68: // prevState=MODE_MS; jpayne@68: }else if(scoreD>=scoreI){ jpayne@68: score=scoreD; jpayne@68: time=1; jpayne@68: // prevState=MODE_DEL; jpayne@68: }else{ jpayne@68: score=scoreI; jpayne@68: time=1; jpayne@68: // prevState=MODE_INS; jpayne@68: } jpayne@68: jpayne@68: assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;} jpayne@68: assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: // packed[MODE_MS][row][col]=(score|prevState|time); jpayne@68: packed[MODE_MS][row][col]=(score|time); jpayne@68: assert((score&SCOREMASK)==score); jpayne@68: // assert((prevState&MODEMASK)==prevState); jpayne@68: assert((time&TIMEMASK)==time); jpayne@68: jpayne@68: }else{ jpayne@68: jpayne@68: int scoreMS; jpayne@68: if(ref1!='N' && call1!='N'){ jpayne@68: scoreMS=scoreFromDiag+(prevMatch ? (streak<=1 ? POINTSoff_SUBR : POINTSoff_SUB) : subScoreArray[streak]); jpayne@68: }else{ jpayne@68: scoreMS=scoreFromDiag+POINTSoff_NOCALL; jpayne@68: } jpayne@68: jpayne@68: int scoreD=scoreFromDel+POINTSoff_SUB; //+2 to move it as close as possible to the deletion / insertion jpayne@68: int scoreI=scoreFromIns+POINTSoff_SUB; jpayne@68: jpayne@68: int score; jpayne@68: int time; jpayne@68: byte prevState; jpayne@68: if(scoreMS>=scoreD && scoreMS>=scoreI){ jpayne@68: score=scoreMS; jpayne@68: time=(prevMatch ? 1 : streak+1); jpayne@68: // time=(prevMatch ? (streak==1 ? 3 : 1) : streak+1); jpayne@68: prevState=MODE_MS; jpayne@68: }else if(scoreD>=scoreI){ jpayne@68: score=scoreD; jpayne@68: time=1; jpayne@68: prevState=MODE_DEL; jpayne@68: }else{ jpayne@68: score=scoreI; jpayne@68: time=1; jpayne@68: prevState=MODE_INS; jpayne@68: } jpayne@68: jpayne@68: assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;} jpayne@68: assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: // packed[MODE_MS][row][col]=(score|prevState|time); jpayne@68: packed[MODE_MS][row][col]=(score|time); jpayne@68: assert((score&SCOREMASK)==score); jpayne@68: // assert((prevState&MODEMASK)==prevState); jpayne@68: assert((time&TIMEMASK)==time); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: {//Calculate DEL score jpayne@68: jpayne@68: final int streak=packed[MODE_DEL][row][col-1]&TIMEMASK; jpayne@68: jpayne@68: final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK; jpayne@68: jpayne@68: int scoreMS=scoreFromDiag+POINTSoff_DEL; jpayne@68: int scoreD=scoreFromDel+delScoreArray[streak]; jpayne@68: // int scoreI=scoreFromIns+POINTSoff_DEL; jpayne@68: jpayne@68: if(ref1=='N'){ jpayne@68: scoreMS+=POINTSoff_DEL_REF_N; jpayne@68: scoreD+=POINTSoff_DEL_REF_N; jpayne@68: } jpayne@68: jpayne@68: //if(match){scoreMS=subfloor;} jpayne@68: jpayne@68: int score; jpayne@68: int time; jpayne@68: byte prevState; jpayne@68: if(scoreMS>=scoreD){ jpayne@68: score=scoreMS; jpayne@68: time=1; jpayne@68: prevState=MODE_MS; jpayne@68: }else{ jpayne@68: score=scoreD; jpayne@68: time=streak+1; jpayne@68: prevState=MODE_DEL; jpayne@68: } jpayne@68: jpayne@68: assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;} jpayne@68: assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead"; jpayne@68: // packed[MODE_DEL][row][col]=(score|prevState|time); jpayne@68: packed[MODE_DEL][row][col]=(score|time); jpayne@68: assert((score&SCOREMASK)==score); jpayne@68: // assert((prevState&MODEMASK)==prevState); jpayne@68: assert((time&TIMEMASK)==time); jpayne@68: } jpayne@68: jpayne@68: {//Calculate INS score jpayne@68: jpayne@68: final int streak=packed[MODE_INS][row-1][col]&TIMEMASK; jpayne@68: jpayne@68: final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK; jpayne@68: final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK; jpayne@68: jpayne@68: int scoreMS=scoreFromDiag+POINTSoff_INS; jpayne@68: // int scoreD=scoreFromDel+POINTSoff_INS; jpayne@68: int scoreI=scoreFromIns+insScoreArray[streak]; jpayne@68: jpayne@68: // System.err.println("("+row+","+col+")\t"+scoreFromDiag+"+"+POINTSoff_INS+"="+scoreM+", "+ jpayne@68: // scoreFromSub+"+"+POINTSoff_INS+"="+scoreS+", " jpayne@68: // +scoreD+", "+scoreFromIns+"+"+ jpayne@68: // (streak==0 ? POINTSoff_INS : streakmaxScore){ jpayne@68: maxScore=x; jpayne@68: maxCol=col; jpayne@68: maxState=state; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: maxScore>>=SCOREOFFSET; jpayne@68: jpayne@68: // System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore); jpayne@68: return new int[] {rows, maxCol, maxState, maxScore}; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** Generates the match string */ jpayne@68: public final byte[] traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state){ jpayne@68: // assert(false); jpayne@68: assert(refStartLoc<=refEndLoc) : refStartLoc+", "+refEndLoc; jpayne@68: assert(row==rows); jpayne@68: jpayne@68: byte[] out=new byte[row+col-1]; //TODO if an out of bound crash occurs, try removing the "-1". jpayne@68: int outPos=0; jpayne@68: jpayne@68: if(state==MODE_INS){ jpayne@68: //TODO ? Maybe not needed. jpayne@68: } jpayne@68: jpayne@68: while(row>0 && col>0){ jpayne@68: jpayne@68: // byte prev0=(byte)(packed[state][row][col]&MODEMASK); jpayne@68: jpayne@68: final int time=packed[state][row][col]&TIMEMASK; jpayne@68: final byte prev; jpayne@68: jpayne@68: // System.err.println("state="+state+", prev="+prev+", row="+row+", col="+col+", score="+scores[state][row][col]); jpayne@68: jpayne@68: if(state==MODE_MS){ jpayne@68: if(time>1){prev=(byte)state;} jpayne@68: else{ jpayne@68: final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK; jpayne@68: final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK; jpayne@68: if(scoreFromDiag>=scoreFromDel && scoreFromDiag>=scoreFromIns){prev=MODE_MS;} jpayne@68: else if(scoreFromDel>=scoreFromIns){prev=MODE_DEL;} jpayne@68: else{prev=MODE_INS;} jpayne@68: } jpayne@68: jpayne@68: byte c=read[row-1]; jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: if(c==r){ jpayne@68: out[outPos]='m'; jpayne@68: }else{ jpayne@68: if(!AminoAcid.isFullyDefined(c)){ jpayne@68: out[outPos]='N'; jpayne@68: }else if(!AminoAcid.isFullyDefined(r)){ jpayne@68: // out[outPos]='X'; jpayne@68: out[outPos]='N'; jpayne@68: }else{ jpayne@68: out[outPos]='S'; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: row--; jpayne@68: col--; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: if(time>1){prev=(byte)state;} jpayne@68: else{ jpayne@68: final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK; jpayne@68: if(scoreFromDiag>=scoreFromDel){prev=MODE_MS;} jpayne@68: else{prev=MODE_DEL;} jpayne@68: } jpayne@68: jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: out[outPos]='D'; jpayne@68: jpayne@68: col--; jpayne@68: }else{ jpayne@68: if(time>1){prev=(byte)state;} jpayne@68: else{ jpayne@68: final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK; jpayne@68: final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK; jpayne@68: if(scoreFromDiag>=scoreFromIns){prev=MODE_MS;} jpayne@68: else{prev=MODE_INS;} jpayne@68: } jpayne@68: jpayne@68: assert(state==MODE_INS) : state; jpayne@68: if(col==0){ jpayne@68: out[outPos]='X'; jpayne@68: }else if(col>=columns){ jpayne@68: out[outPos]='Y'; jpayne@68: }else{ jpayne@68: out[outPos]='I'; jpayne@68: } jpayne@68: row--; jpayne@68: } jpayne@68: jpayne@68: // assert(prev==prev0); jpayne@68: state=prev; jpayne@68: outPos++; jpayne@68: } jpayne@68: jpayne@68: assert(row==0 || col==0); jpayne@68: if(col!=row){ jpayne@68: while(row>0){ jpayne@68: out[outPos]='X'; jpayne@68: outPos++; jpayne@68: row--; jpayne@68: col--; jpayne@68: } jpayne@68: if(col>0){ jpayne@68: //do nothing jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: jpayne@68: //Shrink and reverse the string jpayne@68: byte[] out2=new byte[outPos]; jpayne@68: for(int i=0; i=0 && maxState=0 && maxRow=0 && maxColdifC){ jpayne@68: score+=POINTSoff_NOREF; jpayne@68: difR--; jpayne@68: } jpayne@68: jpayne@68: row+=difR; jpayne@68: col+=difR; jpayne@68: jpayne@68: } jpayne@68: jpayne@68: assert(refStartLoc<=refEndLoc); jpayne@68: assert(row==rows); jpayne@68: jpayne@68: jpayne@68: final int bestRefStop=refStartLoc+col-1; jpayne@68: jpayne@68: while(row>0 && col>0){ jpayne@68: // System.err.println("state="+state+", row="+row+", col="+col); jpayne@68: jpayne@68: jpayne@68: jpayne@68: // byte prev0=(byte)(packed[state][row][col]&MODEMASK); jpayne@68: jpayne@68: final int time=packed[state][row][col]&TIMEMASK; jpayne@68: final byte prev; jpayne@68: jpayne@68: if(state==MODE_MS){ jpayne@68: if(time>1){prev=(byte)state;} jpayne@68: else{ jpayne@68: final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK; jpayne@68: final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK; jpayne@68: if(scoreFromDiag>=scoreFromDel && scoreFromDiag>=scoreFromIns){prev=MODE_MS;} jpayne@68: else if(scoreFromDel>=scoreFromIns){prev=MODE_DEL;} jpayne@68: else{prev=MODE_INS;} jpayne@68: } jpayne@68: row--; jpayne@68: col--; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: if(time>1){prev=(byte)state;} jpayne@68: else{ jpayne@68: final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK; jpayne@68: final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK; jpayne@68: if(scoreFromDiag>=scoreFromDel){prev=MODE_MS;} jpayne@68: else{prev=MODE_DEL;} jpayne@68: } jpayne@68: col--; jpayne@68: }else{ jpayne@68: assert(state==MODE_INS); jpayne@68: if(time>1){prev=(byte)state;} jpayne@68: else{ jpayne@68: final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK; jpayne@68: final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK; jpayne@68: if(scoreFromDiag>=scoreFromIns){prev=MODE_MS;} jpayne@68: else{prev=MODE_INS;} jpayne@68: } jpayne@68: row--; jpayne@68: } jpayne@68: jpayne@68: if(col<0){ jpayne@68: System.err.println(row); jpayne@68: break; //prevents an out of bounds access jpayne@68: jpayne@68: } jpayne@68: jpayne@68: // assert(prev==prev0); jpayne@68: state=prev; jpayne@68: jpayne@68: // System.err.println("state2="+state+", row2="+row+", col2="+col+"\n"); jpayne@68: } jpayne@68: // assert(false) : row+", "+col; jpayne@68: if(row>col){ jpayne@68: col-=row; jpayne@68: } jpayne@68: jpayne@68: final int bestRefStart=refStartLoc+col; jpayne@68: jpayne@68: score>>=SCOREOFFSET; jpayne@68: int[] rvec; jpayne@68: if(bestRefStartrefEndLoc){ //Suggest extra padding in cases of overflow jpayne@68: int padLeft=Tools.max(0, refStartLoc-bestRefStart); jpayne@68: int padRight=Tools.max(0, bestRefStop-refEndLoc); jpayne@68: rvec=new int[] {score, bestRefStart, bestRefStop, padLeft, padRight}; jpayne@68: }else{ jpayne@68: rvec=new int[] {score, bestRefStart, bestRefStop}; jpayne@68: } jpayne@68: return rvec; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** Will not fill areas that cannot match minScore. jpayne@68: * @return {score, bestRefStart, bestRefStop} */ jpayne@68: public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){ jpayne@68: int a=Tools.max(0, refStartLoc); jpayne@68: int b=Tools.min(ref.length-1, refEndLoc); jpayne@68: assert(b>=a); jpayne@68: jpayne@68: int[] score; jpayne@68: jpayne@68: if(b-a>=maxColumns){ jpayne@68: System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns); jpayne@68: assert(false) : refStartLoc+", "+refEndLoc; jpayne@68: b=Tools.min(ref.length-1, a+maxColumns-1); jpayne@68: } jpayne@68: int[] max=fillLimited(read, ref, a, b, minScore); jpayne@68: score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2])); jpayne@68: jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: jpayne@68: public final static int scoreNoIndels(byte[] read, SiteScore ss){ jpayne@68: ChromosomeArray cha=Data.getChromosome(ss.chrom); jpayne@68: return scoreNoIndels(read, cha.array, ss.start, ss); jpayne@68: } jpayne@68: jpayne@68: public final static int scoreNoIndels(byte[] read, final int chrom, final int refStart){ jpayne@68: ChromosomeArray cha=Data.getChromosome(chrom); jpayne@68: return scoreNoIndels(read, cha.array, refStart, null); jpayne@68: } jpayne@68: jpayne@68: public final static int scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores){ jpayne@68: ChromosomeArray cha=Data.getChromosome(ss.chrom); jpayne@68: return scoreNoIndels(read, cha.array, baseScores, ss.start, ss); jpayne@68: } jpayne@68: jpayne@68: public final static int scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores){ jpayne@68: ChromosomeArray cha=Data.getChromosome(chrom); jpayne@68: return scoreNoIndels(read, cha.array, baseScores, refStart, null); jpayne@68: } jpayne@68: jpayne@68: public final static int scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss){ jpayne@68: jpayne@68: int score=0; jpayne@68: int mode=-1; jpayne@68: int timeInMode=0; jpayne@68: jpayne@68: //This block handles cases where the read runs outside the reference jpayne@68: //Of course, padding the reference with 'N' would be better, but... jpayne@68: int readStart=0; jpayne@68: int readStop=read.length; jpayne@68: final int refStop=refStart+read.length; jpayne@68: boolean semiperfect=true; jpayne@68: int norefs=0; jpayne@68: jpayne@68: if(refStart<0){ jpayne@68: readStart=0-refStart; jpayne@68: score+=POINTS_NOREF*readStart; jpayne@68: norefs+=readStart; jpayne@68: } jpayne@68: if(refStop>ref.length){ jpayne@68: int dif=(refStop-ref.length); jpayne@68: readStop-=dif; jpayne@68: score+=POINTS_NOREF*dif; jpayne@68: norefs+=dif; jpayne@68: } jpayne@68: jpayne@68: // if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed. jpayne@68: jpayne@68: for(int i=readStart; iref.length){ jpayne@68: int dif=(refStop-ref.length); jpayne@68: readStop-=dif; jpayne@68: score+=POINTS_NOREF*dif; jpayne@68: norefs+=dif; jpayne@68: } jpayne@68: jpayne@68: // if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed. jpayne@68: jpayne@68: for(int i=readStart; iref.length){ jpayne@68: int dif=(refStop-ref.length); jpayne@68: System.err.println("dif="+dif+", ref.length="+ref.length+", refStop="+refStop); jpayne@68: readStop-=dif; jpayne@68: score+=POINTS_NOREF*dif; jpayne@68: } jpayne@68: assert(refStart+readStop<=ref.length) : "readStart="+readStart+", readStop="+readStop+ jpayne@68: ", refStart="+refStart+", refStop="+refStop+", ref.length="+ref.length+", read.length="+read.length; jpayne@68: jpayne@68: assert(matchReturn!=null); jpayne@68: assert(matchReturn.length==1); jpayne@68: if(matchReturn[0]==null || matchReturn[0].length!=read.length){ jpayne@68: assert(matchReturn[0]==null || matchReturn[0].lengthref.length){ jpayne@68: int dif=(refStop-ref.length); jpayne@68: System.err.println("dif="+dif+", ref.length="+ref.length+", refStop="+refStop); jpayne@68: readStop-=dif; jpayne@68: score+=POINTS_NOREF*dif; jpayne@68: } jpayne@68: assert(refStart+readStop<=ref.length) : "readStart="+readStart+", readStop="+readStop+ jpayne@68: ", refStart="+refStart+", refStop="+refStop+", ref.length="+ref.length+", read.length="+read.length; jpayne@68: jpayne@68: assert(matchReturn!=null); jpayne@68: assert(matchReturn.length==1); jpayne@68: if(matchReturn[0]==null || matchReturn[0].length!=read.length){ jpayne@68: assert(matchReturn[0]==null || matchReturn[0].length=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces; jpayne@68: for(int i=0; i=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces; jpayne@68: for(int i=0; i>SCOREOFFSET; jpayne@68: String s=" "+num; jpayne@68: if(s.length()>width){s=num>0 ? maxString : minString;} jpayne@68: int spaces=width-s.length(); jpayne@68: assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces; jpayne@68: for(int i=0; i=0); jpayne@68: for(int i=0; iLIMIT_FOR_COST_4){ jpayne@68: score+=(len-LIMIT_FOR_COST_4)*POINTS_DEL4; jpayne@68: len=LIMIT_FOR_COST_4; jpayne@68: } jpayne@68: if(len>LIMIT_FOR_COST_3){ jpayne@68: score+=(len-LIMIT_FOR_COST_3)*POINTS_DEL3; jpayne@68: len=LIMIT_FOR_COST_3; jpayne@68: } jpayne@68: if(len>1){ jpayne@68: score+=(len-1)*POINTS_DEL2; jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: private static int calcDelScoreOffset(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTSoff_DEL; jpayne@68: jpayne@68: if(len>LIMIT_FOR_COST_4){ jpayne@68: score+=(len-LIMIT_FOR_COST_4)*POINTSoff_DEL4; jpayne@68: len=LIMIT_FOR_COST_4; jpayne@68: } jpayne@68: if(len>LIMIT_FOR_COST_3){ jpayne@68: score+=(len-LIMIT_FOR_COST_3)*POINTSoff_DEL3; jpayne@68: len=LIMIT_FOR_COST_3; jpayne@68: } jpayne@68: if(len>1){ jpayne@68: score+=(len-1)*POINTSoff_DEL2; jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: private static int calcMatchScoreOffset(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTSoff_MATCH; jpayne@68: jpayne@68: if(len>1){ jpayne@68: score+=(len-1)*POINTSoff_MATCH2; jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: private static int calcSubScoreOffset(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTSoff_SUB; jpayne@68: jpayne@68: if(len>LIMIT_FOR_COST_3){ jpayne@68: score+=(len-LIMIT_FOR_COST_3)*POINTSoff_SUB3; jpayne@68: len=LIMIT_FOR_COST_3; jpayne@68: } jpayne@68: if(len>1){ jpayne@68: score+=(len-1)*POINTSoff_SUB2; jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: public static int calcInsScore(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTS_INS; jpayne@68: jpayne@68: if(len>LIMIT_FOR_COST_4){ jpayne@68: score+=(len-LIMIT_FOR_COST_4)*POINTS_INS4; jpayne@68: len=LIMIT_FOR_COST_4; jpayne@68: } jpayne@68: if(len>LIMIT_FOR_COST_3){ jpayne@68: score+=(len-LIMIT_FOR_COST_3)*POINTS_INS3; jpayne@68: len=LIMIT_FOR_COST_3; jpayne@68: } jpayne@68: if(len>1){ jpayne@68: score+=(len-1)*POINTS_INS2; jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: private static int calcInsScoreOffset(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTSoff_INS; jpayne@68: if(len>LIMIT_FOR_COST_4){ jpayne@68: score+=(len-LIMIT_FOR_COST_4)*POINTSoff_INS4; jpayne@68: len=LIMIT_FOR_COST_4; jpayne@68: } jpayne@68: if(len>LIMIT_FOR_COST_3){ jpayne@68: score+=(len-LIMIT_FOR_COST_3)*POINTSoff_INS3; jpayne@68: len=LIMIT_FOR_COST_3; jpayne@68: } jpayne@68: if(len>1){ jpayne@68: score+=(len-1)*POINTSoff_INS2; jpayne@68: } jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: private final int maxRows; jpayne@68: private final int maxColumns; jpayne@68: jpayne@68: private final int[][][] packed; jpayne@68: jpayne@68: private final int[] vertLimit; jpayne@68: private final int[] horizLimit; jpayne@68: jpayne@68: private final int[] insScoreArray; jpayne@68: private final int[] delScoreArray; jpayne@68: private final int[] matchScoreArray; jpayne@68: private final int[] subScoreArray; jpayne@68: jpayne@68: CharSequence showVertLimit(){ jpayne@68: StringBuilder sb=new StringBuilder(); jpayne@68: for(int i=0; i<=rows; i++){sb.append(vertLimit[i]>>SCOREOFFSET).append(",");} jpayne@68: return sb; jpayne@68: } jpayne@68: CharSequence showHorizLimit(){ jpayne@68: StringBuilder sb=new StringBuilder(); jpayne@68: for(int i=0; i<=columns; i++){sb.append(horizLimit[i]>>SCOREOFFSET).append(",");} jpayne@68: return sb; jpayne@68: } jpayne@68: jpayne@68: // public static final int MODEBITS=2; jpayne@68: public static final int TIMEBITS=12; jpayne@68: public static final int SCOREBITS=32-TIMEBITS; jpayne@68: public static final int MAX_TIME=((1<