jpayne@68: package aligner; jpayne@68: jpayne@68: import dna.AminoAcid; jpayne@68: import shared.KillSwitch; jpayne@68: import shared.Tools; jpayne@68: jpayne@68: /** jpayne@68: * Based on SSAFlat, but with previous state pointers removed. */ jpayne@68: public final class SingleStateAlignerFlat2Amino implements Aligner { jpayne@68: jpayne@68: jpayne@68: public SingleStateAlignerFlat2Amino(){} jpayne@68: jpayne@68: private void prefillTopRow(){ jpayne@68: final int[] header=packed[0]; jpayne@68: final int qlen=rows; jpayne@68: for(int i=0; i<=columns; i++){ jpayne@68: int x=columns-i+1; jpayne@68: int qbases=qlen-x; jpayne@68: jpayne@68: //Minimal points to prefer a leftmost alignment jpayne@68: header[i]=qbases<=0 ? 0 : -qbases; jpayne@68: jpayne@68: //Forces consumption of query, but does not allow for insertions... jpayne@68: // header[i]=qbases<=0 ? 0 : calcDelScoreOffset(qbases); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private void prefillLeftColumnStartingAt(int i){ jpayne@68: packed[0][0]=MODE_MATCH; jpayne@68: i=Tools.max(1, i); jpayne@68: for(int score=MODE_INS+(POINTS_INS*i); i<=maxRows; i++){//Fill column 0 with insertions jpayne@68: score+=POINTS_INS; jpayne@68: packed[i][0]=score; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private void initialize(int rows_, int columns_){ jpayne@68: rows=rows_; jpayne@68: columns=columns_; jpayne@68: if(rows<=maxRows && columns<=maxColumns){ jpayne@68: prefillTopRow(); jpayne@68: // prefillLeftColumn(); jpayne@68: return; jpayne@68: } jpayne@68: jpayne@68: final int maxRows0=maxRows; jpayne@68: final int maxColumns0=maxColumns; jpayne@68: final int[][] packed0=packed; jpayne@68: jpayne@68: //Monotonic increase jpayne@68: maxRows=Tools.max(maxRows, rows+10); jpayne@68: maxColumns=Tools.max(maxColumns, columns+10); jpayne@68: jpayne@68: if(packed==null || maxColumns>maxColumns0){//Make a new matrix jpayne@68: packed=KillSwitch.allocInt2D(maxRows+1, maxColumns+1); jpayne@68: prefillLeftColumnStartingAt(1); jpayne@68: }else{//Copy old rows jpayne@68: assert(maxRows0>0 && maxColumns0>0); jpayne@68: assert(maxRows>maxRows0 && maxColumns<=maxColumns0); jpayne@68: packed=KillSwitch.allocInt2D(maxRows+1); jpayne@68: for(int i=0; imaxRows || columns>maxColumns){ jpayne@68: throw new RuntimeException("rows="+rows+", maxRows="+maxRows+", cols="+columns+", maxCols="+maxColumns+"\n"+new String(read)+"\n"); jpayne@68: } jpayne@68: jpayne@68: assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows; jpayne@68: assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns; jpayne@68: jpayne@68: assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc; jpayne@68: assert(refEndLoc=delScore ? diagScore : delScore; jpayne@68: score=score>=insScore ? score : insScore; jpayne@68: jpayne@68: packed[row][col]=score; jpayne@68: } jpayne@68: //iterationsUnlimited+=columns; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: int maxCol=-1; jpayne@68: int maxState=-1; jpayne@68: int maxStart=-1; jpayne@68: int maxScore=Integer.MIN_VALUE; jpayne@68: jpayne@68: for(int col=1; col<=columns; col++){ jpayne@68: int x=packed[rows][col]; jpayne@68: if(x>maxScore){ jpayne@68: maxScore=x; jpayne@68: maxCol=col; jpayne@68: jpayne@68: // assert(rows-1=delScore && diagScore>=insScore){ jpayne@68: return defined ? match ? MODE_MATCH : MODE_SUB : MODE_N; jpayne@68: }else if(delScore>=insScore){ jpayne@68: return MODE_DEL; jpayne@68: } jpayne@68: return MODE_INS; jpayne@68: } jpayne@68: jpayne@68: /** Generates the match string */ jpayne@68: @Override jpayne@68: public final byte[] traceback(byte[] query, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state){ jpayne@68: // assert(false); jpayne@68: assert(refStartLoc<=refEndLoc) : refStartLoc+", "+refEndLoc; jpayne@68: assert(row==rows); jpayne@68: jpayne@68: byte[] out=new byte[row+col-1]; //TODO if an out of bound crash occurs, try removing the "-1". jpayne@68: int outPos=0; jpayne@68: jpayne@68: // assert(state==(packed[row][col]&MODEMASK)); jpayne@68: jpayne@68: while(row>0 && col>0){ jpayne@68: byte q=query[row-1]; jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: boolean defined=(AminoAcid.isFullyDefinedAA(q) && AminoAcid.isFullyDefinedAA(r)); jpayne@68: state=getState(row, col, q, r); jpayne@68: // assert(defined) : state+", "+(int)q+", "+(int)r+", "+new String(query); jpayne@68: // assert(state!=MODE_N) : state+", "+Character.toString(q)+", "+Character.toString(r)+", "+new String(query); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: out[outPos]=defined ? (byte)'m' : (byte)'N'; jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: out[outPos]=defined ? (byte)'S' : (byte)'N'; jpayne@68: }else if(state==MODE_N){ jpayne@68: col--; jpayne@68: row--; jpayne@68: out[outPos]='N'; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: out[outPos]='D'; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: // out[outPos]='I'; jpayne@68: if(col>=0 && col0){ jpayne@68: out[outPos]='C'; jpayne@68: outPos++; jpayne@68: row--; jpayne@68: col--; jpayne@68: } jpayne@68: if(col>0){ jpayne@68: //do nothing jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Shrink and reverse the string jpayne@68: byte[] out2=new byte[outPos]; jpayne@68: for(int i=0; i0 && col>0){ jpayne@68: byte q=query[row-1]; jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: boolean defined=(AminoAcid.isFullyDefinedAA(q) && AminoAcid.isFullyDefinedAA(r)); jpayne@68: state=getState(row, col, q, r); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: match+=(defined ? 1 : 0); jpayne@68: noref+=(defined ? 0 : 1); jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: sub+=(defined ? 1 : 0); jpayne@68: noref+=(defined ? 0 : 1); jpayne@68: }else if(state==MODE_N){ jpayne@68: col--; jpayne@68: row--; jpayne@68: noref++; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: del++; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: boolean edge=(col<=1 || col>=columns); jpayne@68: ins+=(edge ? 0 : 1); jpayne@68: clip+=(edge ? 1 : 0); jpayne@68: }else{ jpayne@68: assert(false) : state; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: assert(row==0 || col==0); jpayne@68: if(col!=row){//Not sure what this is doing jpayne@68: while(row>0){ jpayne@68: clip++; jpayne@68: row--; jpayne@68: col--; jpayne@68: } jpayne@68: if(col>0){ jpayne@68: //do nothing jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(extra!=null){ jpayne@68: assert(extra.length==5); jpayne@68: extra[0]=match; jpayne@68: extra[1]=sub; jpayne@68: extra[2]=del; jpayne@68: extra[3]=ins; jpayne@68: extra[4]=noref; jpayne@68: extra[5]=clip; jpayne@68: } jpayne@68: jpayne@68: float len=match+sub+ins+del+noref*0.1f; jpayne@68: float id=match/Tools.max(1.0f, len); jpayne@68: return id; jpayne@68: } jpayne@68: jpayne@68: /** Generates identity; jpayne@68: * fills 'extra' with {match, sub, del, ins, N, clip} if present */ jpayne@68: public float tracebackIdentityAmino(byte[] query, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state, int[] extra){ jpayne@68: jpayne@68: // assert(false); jpayne@68: assert(refStartLoc<=refEndLoc) : refStartLoc+", "+refEndLoc; jpayne@68: assert(row==rows); jpayne@68: jpayne@68: // assert(state==(packed[row][col]&MODEMASK)); jpayne@68: int match=0, sub=0, del=0, ins=0, noref=0, clip=0; jpayne@68: jpayne@68: while(row>0 && col>0){ jpayne@68: byte q=query[row-1]; jpayne@68: byte r=ref[refStartLoc+col-1]; jpayne@68: boolean defined=(AminoAcid.isFullyDefinedAA(q) && AminoAcid.isFullyDefinedAA(r)); jpayne@68: state=getState(row, col, q, r); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: match+=(defined ? 1 : 0); jpayne@68: noref+=(defined ? 0 : 1); jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: sub+=(defined ? 1 : 0); jpayne@68: noref+=(defined ? 0 : 1); jpayne@68: }else if(state==MODE_N){ jpayne@68: col--; jpayne@68: row--; jpayne@68: noref++; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: del++; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: boolean edge=(col<=1 || col>=columns); jpayne@68: ins+=(edge ? 0 : 1); jpayne@68: clip+=(edge ? 1 : 0); jpayne@68: }else{ jpayne@68: assert(false) : state; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: assert(row==0 || col==0); jpayne@68: if(col!=row){//Not sure what this is doing jpayne@68: while(row>0){ jpayne@68: clip++; jpayne@68: row--; jpayne@68: col--; jpayne@68: } jpayne@68: if(col>0){ jpayne@68: //do nothing jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(extra!=null){ jpayne@68: assert(extra.length==5); jpayne@68: extra[0]=match; jpayne@68: extra[1]=sub; jpayne@68: extra[2]=del; jpayne@68: extra[3]=ins; jpayne@68: extra[4]=noref; jpayne@68: extra[5]=clip; jpayne@68: } jpayne@68: jpayne@68: float len=match+sub+ins+del+noref*0.1f; jpayne@68: float id=match/Tools.max(1.0f, len); jpayne@68: return id; jpayne@68: } jpayne@68: jpayne@68: /** @return {score, bestRefStart, bestRefStop} */ jpayne@68: @Override jpayne@68: public final int[] score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc, jpayne@68: final int maxRow, final int maxCol, final int maxState/*, final int maxScore, final int maxStart*/){ jpayne@68: jpayne@68: int row=maxRow; jpayne@68: int col=maxCol; jpayne@68: int state=maxState; jpayne@68: jpayne@68: assert(maxState>=0 && maxState=0 && maxRow=0 && maxColdifC){ jpayne@68: score+=POINTS_NOREF; jpayne@68: difR--; jpayne@68: } jpayne@68: jpayne@68: row+=difR; jpayne@68: col+=difR; jpayne@68: jpayne@68: } jpayne@68: jpayne@68: assert(refStartLoc<=refEndLoc); jpayne@68: assert(row==rows); jpayne@68: jpayne@68: jpayne@68: final int bestRefStop=refStartLoc+col-1; jpayne@68: jpayne@68: while(row>0 && col>0){ jpayne@68: final byte q=read[row-1]; jpayne@68: final byte r=ref[refStartLoc+col-1]; jpayne@68: // final boolean defined=(AminoAcid.isFullyDefinedAA(q) && AminoAcid.isFullyDefinedAA(r)); jpayne@68: state=getState(row, col, q, r); jpayne@68: if(state==MODE_MATCH){ jpayne@68: col--; jpayne@68: row--; jpayne@68: }else if(state==MODE_SUB){ jpayne@68: col--; jpayne@68: row--; jpayne@68: }else if(state==MODE_N){ jpayne@68: col--; jpayne@68: row--; jpayne@68: }else if(state==MODE_DEL){ jpayne@68: col--; jpayne@68: }else if(state==MODE_INS){ jpayne@68: row--; jpayne@68: }else{ jpayne@68: assert(false) : state; jpayne@68: } jpayne@68: } jpayne@68: // assert(false) : row+", "+col; jpayne@68: if(row>col){ jpayne@68: col-=row; jpayne@68: } jpayne@68: jpayne@68: final int bestRefStart=refStartLoc+col; jpayne@68: jpayne@68: // System.err.println("t2\t"+score+", "+maxScore+", "+maxStart+", "+bestRefStart); jpayne@68: int[] rvec; jpayne@68: if(bestRefStartrefEndLoc){ //Suggest extra padding in cases of overflow jpayne@68: int padLeft=Tools.max(0, refStartLoc-bestRefStart); jpayne@68: int padRight=Tools.max(0, bestRefStop-refEndLoc); jpayne@68: rvec=new int[] {score, bestRefStart, bestRefStop, padLeft, padRight}; jpayne@68: }else{ jpayne@68: rvec=new int[] {score, bestRefStart, bestRefStop}; jpayne@68: } jpayne@68: return rvec; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** Will not fill areas that cannot match minScore. jpayne@68: * @return {score, bestRefStart, bestRefStop} */ jpayne@68: @Override jpayne@68: public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){ jpayne@68: int a=Tools.max(0, refStartLoc); jpayne@68: int b=Tools.min(ref.length-1, refEndLoc); jpayne@68: assert(b>=a); jpayne@68: jpayne@68: if(b-a>=maxColumns){ jpayne@68: System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns); jpayne@68: assert(false) : refStartLoc+", "+refEndLoc; jpayne@68: b=Tools.min(ref.length-1, a+maxColumns-1); jpayne@68: } jpayne@68: int[] max=fillLimited(read, ref, a, b, minScore); jpayne@68: // return max==null ? null : new int[] {max[3], 0, max[1]}; jpayne@68: jpayne@68: int[] score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2]/*, max[3], max[4]*/)); jpayne@68: jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: public static final String toString(byte[] ref, int startLoc, int stopLoc){ jpayne@68: StringBuilder sb=new StringBuilder(stopLoc-startLoc+1); jpayne@68: for(int i=startLoc; i<=stopLoc; i++){sb.append((char)ref[i]);} jpayne@68: return sb.toString(); jpayne@68: } jpayne@68: jpayne@68: // public static int calcDelScore(int len){ jpayne@68: // if(len<=0){return 0;} jpayne@68: // int score=POINTS_DEL; jpayne@68: // if(len>1){ jpayne@68: // score+=(len-1)*POINTS_DEL2; jpayne@68: // } jpayne@68: // return score; jpayne@68: // } jpayne@68: jpayne@68: // public int maxScoreByIdentity(int len, float identity){ jpayne@68: // assert(identity>=0 && identity<=1); jpayne@68: // return (int)(len*(identity*POINTS_MATCH+(1-identity)*POINTS_SUB)); jpayne@68: // } jpayne@68: jpayne@68: @Override jpayne@68: public int minScoreByIdentity(int len, float identity){ jpayne@68: assert(identity>=0 && identity<=1); jpayne@68: jpayne@68: int a=(int)(len*(identity*POINTS_MATCH+(1-identity)*POINTS_SUB)); jpayne@68: int b=(int)(len*(identity*POINTS_MATCH+(1-identity)*POINTS_INS)); jpayne@68: int c=(int)(len*(1*POINTS_MATCH+((1/(Tools.max(identity, 0.000001f)))-1)*POINTS_DEL)); jpayne@68: return Tools.min(a, b, c); jpayne@68: } jpayne@68: jpayne@68: private static int calcDelScore(int len){ jpayne@68: if(len<=0){return 0;} jpayne@68: int score=POINTS_DEL*len; jpayne@68: return score; jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public int rows(){return rows;} jpayne@68: @Override jpayne@68: public int columns(){return columns;} jpayne@68: jpayne@68: jpayne@68: private int maxRows; jpayne@68: private int maxColumns; jpayne@68: jpayne@68: private int[][] packed; jpayne@68: jpayne@68: public static final int MAX_SCORE=Integer.MAX_VALUE-2000; jpayne@68: public static final int MIN_SCORE=0-MAX_SCORE; //Keeps it 1 point above "BAD". jpayne@68: jpayne@68: //For some reason changing MODE_DEL from 1 to 0 breaks everything jpayne@68: private static final byte MODE_DEL=1; jpayne@68: private static final byte MODE_INS=2; jpayne@68: private static final byte MODE_SUB=3; jpayne@68: private static final byte MODE_MATCH=4; jpayne@68: private static final byte MODE_N=5; jpayne@68: jpayne@68: public static final int POINTS_NOREF=-15; jpayne@68: public static final int POINTS_MATCH=100; jpayne@68: public static final int POINTS_SUB=-50; jpayne@68: public static final int POINTS_INS=-121; jpayne@68: public static final int POINTS_DEL=-111; jpayne@68: jpayne@68: // public static final int POINTS_NOREF=-100000; jpayne@68: // public static final int POINTS_MATCH=100; jpayne@68: // public static final int POINTS_SUB=-100; jpayne@68: // public static final int POINTS_INS=-100; jpayne@68: // public static final int POINTS_DEL=-100; jpayne@68: jpayne@68: public static final int BAD=MIN_SCORE-1; jpayne@68: jpayne@68: private int rows; jpayne@68: private int columns; jpayne@68: jpayne@68: // public long iterationsLimited=0; jpayne@68: // public long iterationsUnlimited=0; jpayne@68: jpayne@68: public boolean verbose=false; jpayne@68: public boolean verbose2=false; jpayne@68: jpayne@68: }