jpayne@68: package aligner; jpayne@68: jpayne@68: import dna.AminoAcid; jpayne@68: import shared.KillSwitch; jpayne@68: import shared.Tools; jpayne@68: jpayne@68: /** jpayne@68: * Based on MSA9PBA, but reduced to a single matrix. */ jpayne@68: public final class SingleStateAlignerFlat implements Aligner { jpayne@68: jpayne@68: jpayne@68: public SingleStateAlignerFlat(){} jpayne@68: jpayne@68: private void prefillTopRow(){ jpayne@68: final int[] header=packed[0]; jpayne@68: final int qlen=rows; jpayne@68: for(int i=0; i<=columns; i++){ jpayne@68: int x=columns-i+1; jpayne@68: int qbases=qlen-x; jpayne@68: jpayne@68: //Minimal points to prefer a leftmost alignment jpayne@68: header[i]=qbases<=0 ? 0 : ((-qbases)<maxColumns0){//Make a new matrix jpayne@68: packed=KillSwitch.allocInt2D(maxRows+1, maxColumns+1); jpayne@68: prefillLeftColumnStartingAt(1); jpayne@68: }else{//Copy old rows jpayne@68: assert(maxRows0>0 && maxColumns0>0); jpayne@68: assert(maxRows>maxRows0 && maxColumns<=maxColumns0); jpayne@68: packed=KillSwitch.allocInt2D(maxRows+1); jpayne@68: for(int i=0; imaxRows || columns>maxColumns){ jpayne@68: throw new RuntimeException("rows="+rows+", maxRows="+maxRows+", cols="+columns+", maxCols="+maxColumns+"\n"+new String(read)+"\n"); jpayne@68: } jpayne@68: jpayne@68: assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows; jpayne@68: assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns; jpayne@68: jpayne@68: assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc; jpayne@68: assert(refEndLoc=delScore ? diagScore : delScore; jpayne@68: score=score>=insScore ? score : insScore; jpayne@68: jpayne@68: packed[row][col]=score; jpayne@68: } jpayne@68: // iterationsUnlimited+=columns; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: int maxCol=-1; jpayne@68: int maxState=-1; jpayne@68: int maxStart=-1; jpayne@68: int maxScore=Integer.MIN_VALUE; jpayne@68: jpayne@68: for(int col=1; col<=columns; col++){ jpayne@68: int x=packed[rows][col]; jpayne@68: if((x&SCOREMASK)>maxScore){ jpayne@68: maxScore=x; jpayne@68: maxCol=col; jpayne@68: maxState=x&MODEMASK; jpayne@68: maxStart=x&STARTMASK; jpayne@68: } jpayne@68: } jpayne@68: maxScore>>=SCOREOFFSET; jpayne@68: jpayne@68: // System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore+"; minScore="+minScore); jpayne@68: return maxScore0 && col>0){ jpayne@68: byte q=query[row-1]; jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: boolean defined=(AminoAcid.isFullyDefined(q) && AminoAcid.isFullyDefined(r)); jpayne@68: state=(packed[row][col]&MODEMASK); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: out[outPos]=defined ? (byte)'m' : (byte)'N'; jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: out[outPos]=defined ? (byte)'S' : (byte)'N'; jpayne@68: }else if(state==MODE_N){ jpayne@68: col--; jpayne@68: row--; jpayne@68: out[outPos]='N'; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: out[outPos]='D'; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: // out[outPos]='I'; jpayne@68: if(col>=0 && col0){ jpayne@68: out[outPos]='C'; jpayne@68: outPos++; jpayne@68: row--; jpayne@68: col--; jpayne@68: } jpayne@68: if(col>0){ jpayne@68: //do nothing jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Shrink and reverse the string jpayne@68: byte[] out2=new byte[outPos]; jpayne@68: for(int i=0; i0 && col>0){ jpayne@68: byte q=query[row-1]; jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: boolean defined=(AminoAcid.isFullyDefined(q) && AminoAcid.isFullyDefined(r)); jpayne@68: state=(packed[row][col]&MODEMASK); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: match+=(defined ? 1 : 0); jpayne@68: noref+=(defined ? 0 : 1); jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: sub+=(defined ? 1 : 0); jpayne@68: noref+=(defined ? 0 : 1); jpayne@68: }else if(state==MODE_N){//Can't currently happen jpayne@68: col--; jpayne@68: row--; jpayne@68: noref++; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: del++; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: boolean edge=(col<=1 || col>=columns); jpayne@68: ins+=(edge ? 0 : 1); jpayne@68: clip+=(edge ? 1 : 0); jpayne@68: }else{ jpayne@68: assert(false) : state; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: assert(row==0 || col==0); jpayne@68: if(col!=row){//Not sure what this is doing jpayne@68: while(row>0){ jpayne@68: clip++; jpayne@68: row--; jpayne@68: col--; jpayne@68: } jpayne@68: if(col>0){ jpayne@68: //do nothing jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(extra!=null){ jpayne@68: assert(extra.length==5); jpayne@68: extra[0]=match; jpayne@68: extra[1]=sub; jpayne@68: extra[2]=del; jpayne@68: extra[3]=ins; jpayne@68: extra[4]=noref; jpayne@68: extra[5]=clip; jpayne@68: } jpayne@68: jpayne@68: float len=match+sub+ins+del+noref*0.1f; jpayne@68: float id=match/Tools.max(1.0f, len); jpayne@68: return id; jpayne@68: } jpayne@68: jpayne@68: /** @return {score, bestRefStart, bestRefStop} */ jpayne@68: @Override jpayne@68: public final int[] score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, jpayne@68: final int maxRow, final int maxCol, final int maxState/*, final int maxScore, final int maxStart*/){ jpayne@68: jpayne@68: int row=maxRow; jpayne@68: int col=maxCol; jpayne@68: int state=maxState; jpayne@68: jpayne@68: assert(maxState>=0 && maxState=0 && maxRow=0 && maxColdifC){ jpayne@68: score+=POINTSoff_NOREF; jpayne@68: difR--; jpayne@68: } jpayne@68: jpayne@68: row+=difR; jpayne@68: col+=difR; jpayne@68: jpayne@68: } jpayne@68: jpayne@68: assert(refStartLoc<=refEndLoc); jpayne@68: assert(row==rows); jpayne@68: jpayne@68: jpayne@68: final int bestRefStop=refStartLoc+col-1; jpayne@68: jpayne@68: while(row>0 && col>0){ jpayne@68: // final byte c=read[row-1]; jpayne@68: // final byte r=ref[refStartLoc+col-1]; jpayne@68: // final boolean defined=(AminoAcid.isFullyDefined(c) && AminoAcid.isFullyDefined(r)); jpayne@68: state=(packed[row][col]&MODEMASK); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: }else if(state==MODE_N){ jpayne@68: col--; jpayne@68: row--; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: }else{ jpayne@68: assert(false) : state; jpayne@68: } jpayne@68: } jpayne@68: // assert(false) : row+", "+col; jpayne@68: if(row>col){ jpayne@68: col-=row; jpayne@68: } jpayne@68: jpayne@68: final int bestRefStart=refStartLoc+col; jpayne@68: jpayne@68: score>>=SCOREOFFSET; jpayne@68: // System.err.println("t2\t"+score+", "+maxScore+", "+maxStart+", "+bestRefStart); jpayne@68: int[] rvec; jpayne@68: if(bestRefStartrefEndLoc){ //Suggest extra padding in cases of overflow jpayne@68: int padLeft=Tools.max(0, refStartLoc-bestRefStart); jpayne@68: int padRight=Tools.max(0, bestRefStop-refEndLoc); jpayne@68: rvec=new int[] {score, bestRefStart, bestRefStop, padLeft, padRight}; jpayne@68: }else{ jpayne@68: rvec=new int[] {score, bestRefStart, bestRefStop}; jpayne@68: } jpayne@68: return rvec; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** Will not fill areas that cannot match minScore. jpayne@68: * @return {score, bestRefStart, bestRefStop} */ jpayne@68: @Override jpayne@68: public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){ jpayne@68: int a=Tools.max(0, refStartLoc); jpayne@68: int b=Tools.min(ref.length-1, refEndLoc); jpayne@68: assert(b>=a); jpayne@68: jpayne@68: if(b-a>=maxColumns){ jpayne@68: System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns); jpayne@68: assert(false) : refStartLoc+", "+refEndLoc; jpayne@68: b=Tools.min(ref.length-1, a+maxColumns-1); jpayne@68: } jpayne@68: int[] max=fillLimited(read, ref, a, b, minScore); jpayne@68: // return max==null ? null : new int[] {max[3], 0, max[1]}; jpayne@68: jpayne@68: int[] score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2]/*, max[3], max[4]*/)); jpayne@68: jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: public static final String toString(byte[] ref, int startLoc, int stopLoc){ jpayne@68: StringBuilder sb=new StringBuilder(stopLoc-startLoc+1); jpayne@68: for(int i=startLoc; i<=stopLoc; i++){sb.append((char)ref[i]);} jpayne@68: return sb.toString(); jpayne@68: } jpayne@68: jpayne@68: // public int maxScoreByIdentity(int len, float identity){ jpayne@68: // assert(identity>=0 && identity<=1); jpayne@68: // return (int)(len*(identity*POINTS_MATCH+(1-identity)*POINTS_SUB)); jpayne@68: // } jpayne@68: jpayne@68: @Override jpayne@68: public int minScoreByIdentity(int len, float identity){ jpayne@68: assert(identity>=0 && identity<=1); jpayne@68: jpayne@68: int a=(int)(len*(identity*POINTS_MATCH+(1-identity)*POINTS_SUB)); jpayne@68: int b=(int)(len*(identity*POINTS_MATCH+(1-identity)*POINTS_INS)); jpayne@68: int c=(int)(len*(1*POINTS_MATCH+((1/(Tools.max(identity, 0.000001f)))-1)*POINTS_DEL)); jpayne@68: return Tools.min(a, b, c); jpayne@68: } jpayne@68: jpayne@68: private static int calcDelScoreOffset(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTSoff_DEL*len; jpayne@68: return score; jpayne@68: } jpayne@68: // jpayne@68: // public static int calcInsScore(int len){ jpayne@68: // if(len<=0){return 0;} jpayne@68: // int score=POINTS_INS; jpayne@68: // jpayne@68: // if(len>1){ jpayne@68: // score+=(len-1)*POINTS_INS2; jpayne@68: // } jpayne@68: // return score; jpayne@68: // } jpayne@68: // jpayne@68: // private static int calcInsScoreOffset(int len){ jpayne@68: // if(len<=0){return 0;} jpayne@68: // int score=POINTSoff_INS; jpayne@68: // jpayne@68: // if(len>1){ jpayne@68: // score+=(len-1)*POINTSoff_INS2; jpayne@68: // } jpayne@68: // return score; jpayne@68: // } jpayne@68: jpayne@68: @Override jpayne@68: public int rows(){return rows;} jpayne@68: @Override jpayne@68: public int columns(){return columns;} jpayne@68: jpayne@68: jpayne@68: private int maxRows; jpayne@68: private int maxColumns; jpayne@68: jpayne@68: private int[][] packed; jpayne@68: jpayne@68: public static final int MODEBITS=3; jpayne@68: public static final int STARTBITS=9; jpayne@68: public static final int LOWBITS=MODEBITS+STARTBITS; jpayne@68: public static final int SCOREBITS=32-STARTBITS; jpayne@68: public static final int MAX_START=((1<