jpayne@68: package prok; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Arrays; jpayne@68: import java.util.Collections; jpayne@68: jpayne@68: import aligner.SingleStateAlignerFlat2; jpayne@68: import aligner.SingleStateAlignerFlat3; jpayne@68: import aligner.SingleStateAlignerFlatFloat; jpayne@68: import dna.AminoAcid; jpayne@68: import shared.KillSwitch; jpayne@68: import shared.Tools; jpayne@68: import stream.Read; jpayne@68: import structures.FloatList; jpayne@68: import structures.IntList; jpayne@68: import structures.LongHashSet; jpayne@68: jpayne@68: jpayne@68: /** jpayne@68: * This class calls genes within a single thread. jpayne@68: * @author Brian Bushnell jpayne@68: * @date Sep 24, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class GeneCaller extends ProkObject { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Init ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: GeneCaller(int minLen_, int maxOverlapSameStrand_, int maxOverlapOppositeStrand_, jpayne@68: float minStartScore_, float minStopScore_, float minInnerScore_, jpayne@68: float minOrfScore_, float minAvgScore_, GeneModel pgm_){ jpayne@68: minLen=minLen_; jpayne@68: maxOverlapSameStrand=maxOverlapSameStrand_; jpayne@68: maxOverlapOppositeStrand=maxOverlapOppositeStrand_; jpayne@68: pgm=pgm_; jpayne@68: jpayne@68: minStartScore=minStartScore_; jpayne@68: minStopScore=minStopScore_; jpayne@68: minInnerScore=minInnerScore_; jpayne@68: minOrfScore=minOrfScore_; jpayne@68: minAvgScore=minAvgScore_; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public ArrayList callGenes(Read r){ jpayne@68: return callGenes(r, pgm); jpayne@68: } jpayne@68: jpayne@68: public ArrayList callGenes(Read r, GeneModel pgm_){ jpayne@68: pgm=pgm_; jpayne@68: jpayne@68: final String name=r.id; jpayne@68: final byte[] bases=r.bases; jpayne@68: jpayne@68: //Lists of all longest orfs per frame jpayne@68: ArrayList[] frameLists=makeOrfs(name, bases, minLen); jpayne@68: //Lists of all high-scoring orfs per frame, with potentially multiple orfs sharing stops. jpayne@68: ArrayList[] brokenLists=breakOrfs(frameLists, bases); jpayne@68: jpayne@68: ArrayList[] rnaLists=null; jpayne@68: final int rlen=r.length(); jpayne@68: if(calltRNA || (call16S && rlen>800) || (call23S && rlen>1500) || call5S || (call18S && rlen>1000)){ jpayne@68: rnaLists=makeRnas(name, bases); jpayne@68: jpayne@68: brokenLists[0].addAll(rnaLists[0]); jpayne@68: brokenLists[3].addAll(rnaLists[1]); jpayne@68: Collections.sort(brokenLists[0]); jpayne@68: Collections.sort(brokenLists[3]); jpayne@68: } jpayne@68: jpayne@68: boolean printAllOrfs=false; jpayne@68: boolean printRnas=false; jpayne@68: if(printAllOrfs){ jpayne@68: ArrayList temp=new ArrayList(); jpayne@68: for(ArrayList broken : brokenLists){ jpayne@68: temp.addAll(broken); jpayne@68: } jpayne@68: Collections.sort(temp); jpayne@68: return temp; jpayne@68: } jpayne@68: jpayne@68: if(printRnas && rnaLists!=null){ jpayne@68: ArrayList temp=new ArrayList(); jpayne@68: for(ArrayList list : rnaLists){ jpayne@68: temp.addAll(list); jpayne@68: } jpayne@68: Collections.sort(temp); jpayne@68: return temp; jpayne@68: } jpayne@68: jpayne@68: stCds2.add(brokenLists); jpayne@68: jpayne@68: //Find the optimal path through Orfs jpayne@68: ArrayList path=findPath(brokenLists, bases); jpayne@68: // geneStartsOut+=path.size(); jpayne@68: jpayne@68: if(callCDS){stCdsPass.add(path);} jpayne@68: if(calltRNA){sttRNA.add(path);} jpayne@68: if(call16S){st16s.add(path);} jpayne@68: if(call23S){st23s.add(path);} jpayne@68: if(call5S){st5s.add(path);} jpayne@68: if(call18S){st18s.add(path);} jpayne@68: jpayne@68: return path; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Generates lists of all max-length non-overlapping Orfs per frame. jpayne@68: * There IS overlap between frames. jpayne@68: * All Orfs come out flipped to + orientation. jpayne@68: * */ jpayne@68: ArrayList[] makeRnas(String name, byte[] bases){ jpayne@68: @SuppressWarnings("unchecked") jpayne@68: ArrayList[] array=new ArrayList[2]; jpayne@68: array[0]=new ArrayList(); jpayne@68: array[1]=new ArrayList(); jpayne@68: final float[] scores=new float[bases.length]; jpayne@68: final int[] kmersSeen=(lsuKmers==null && ssuKmers==null && trnaKmers==null && r5SKmers==null) ? null : new int[bases.length]; jpayne@68: for(int strand=0; strand<2; strand++){ jpayne@68: for(StatsContainer sc : pgm.rnaContainers){ jpayne@68: if(ProkObject.callType(sc.type)){ jpayne@68: ArrayList list=makeRnasForStrand(name, bases, strand, sc, scores, (sc.kmerSet()==null ? null : kmersSeen), false, -1);//TODO: Make this loop through all RNA types jpayne@68: if(strand==1 && list!=null){ jpayne@68: for(Orf orf : list){ jpayne@68: assert(orf.strand==strand); jpayne@68: orf.flip(); jpayne@68: } jpayne@68: } jpayne@68: if(list!=null){array[strand].addAll(list);} jpayne@68: } jpayne@68: } jpayne@68: Collections.sort(array[strand]); jpayne@68: AminoAcid.reverseComplementBasesInPlace(bases); jpayne@68: } jpayne@68: return array; jpayne@68: } jpayne@68: jpayne@68: /** Designed for quickly calling a single SSU */ jpayne@68: public Orf makeRna(String name, byte[] bases, int type){ jpayne@68: final float[] scores=new float[bases.length];//TODO: Big and slow; make a FloatList? jpayne@68: StatsContainer sc=pgm.allContainers[type]; jpayne@68: final int[] kmersSeen=(sc.kmerSet()==null ? null : new int[bases.length]);//TODO: IntList? jpayne@68: jpayne@68: int strand=0; jpayne@68: ArrayList list=makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, true, -1); jpayne@68: final Orf best1=pickBest(list); jpayne@68: assert(best1==null || best1.start>=0 && best1.stop-999){return best1;} jpayne@68: jpayne@68: strand++; jpayne@68: AminoAcid.reverseComplementBasesInPlace(bases); jpayne@68: list=makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, true, -1); jpayne@68: AminoAcid.reverseComplementBasesInPlace(bases); jpayne@68: if(strand==1 && list!=null){ jpayne@68: for(Orf orf : list){ jpayne@68: assert(orf.strand==strand); jpayne@68: orf.flip(); jpayne@68: } jpayne@68: } jpayne@68: final Orf best2=pickBest(list); jpayne@68: assert(best2==null || best2.start>=0 && best2.stop-999){return best2;} jpayne@68: return best1!=null ? best1 : best2; jpayne@68: } jpayne@68: jpayne@68: final Orf pickBest(ArrayList list){ jpayne@68: if(list==null){return null;} jpayne@68: Orf best=null; jpayne@68: for(Orf orf : list){ jpayne@68: if(best==null || orf.orfScore>best.orfScore){ jpayne@68: best=orf; jpayne@68: } jpayne@68: } jpayne@68: return best; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Generates lists of all max-length non-overlapping Orfs per frame. jpayne@68: * There IS overlap between frames. jpayne@68: * All Orfs come out flipped to + orientation. jpayne@68: * */ jpayne@68: static ArrayList[] makeOrfs(String name, byte[] bases, int minlen){ jpayne@68: @SuppressWarnings("unchecked") jpayne@68: ArrayList[] array=new ArrayList[6]; jpayne@68: for(int strand=0; strand<2; strand++){ jpayne@68: for(int frame=0; frame<3; frame++){ jpayne@68: ArrayList list=makeOrfsForFrame(name, bases, frame, strand, minlen); jpayne@68: array[frame+3*strand]=list; jpayne@68: if(strand==1 && list!=null){ jpayne@68: for(Orf orf : list){ jpayne@68: assert(orf.frame==frame); jpayne@68: assert(orf.strand==strand); jpayne@68: orf.flip(); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: AminoAcid.reverseComplementBasesInPlace(bases); jpayne@68: } jpayne@68: return array; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Dynamic programming phase. jpayne@68: * @param frameLists jpayne@68: * @param bases jpayne@68: * @return jpayne@68: */ jpayne@68: private ArrayList findPath(ArrayList[] frameLists, byte[] bases){ jpayne@68: ArrayList all=new ArrayList(); jpayne@68: for(ArrayList list : frameLists){all.addAll(list);} jpayne@68: if(all.isEmpty()){return all;} jpayne@68: Collections.sort(all); jpayne@68: jpayne@68: for(Orf orf : all){ jpayne@68: orf.pathScorePlus=-999999; jpayne@68: orf.pathScoreMinus=-999999; jpayne@68: } jpayne@68: jpayne@68: int[] lastPositionScored=KillSwitch.allocInt1D(6); jpayne@68: Arrays.fill(lastPositionScored, -1); jpayne@68: jpayne@68: //Index of highest-scoring ORF in this frame, with prev on the plus strand jpayne@68: int[] bestIndexPlus=KillSwitch.allocInt1D(6); jpayne@68: //Index of highest-scoring ORF in this frame, with prev on the minus strand jpayne@68: int[] bestIndexMinus=KillSwitch.allocInt1D(6); jpayne@68: //Highest-scoring ORF in this frame, with prev on the plus strand jpayne@68: Orf[] bestOrfPlus=new Orf[6]; jpayne@68: //Highest-scoring ORF in this frame, with prev on the minus strand jpayne@68: Orf[] bestOrfMinus=new Orf[6]; jpayne@68: jpayne@68: int[][] bestIndex=new int[][] {bestIndexPlus, bestIndexMinus}; jpayne@68: Orf[][] bestOrf=new Orf[][] {bestOrfPlus, bestOrfMinus}; jpayne@68: jpayne@68: for(Orf orf : all){ jpayne@68: final int myListNum=3*orf.strand+orf.frame; jpayne@68: calcPathScore(orf, frameLists, lastPositionScored, bestIndex); jpayne@68: if(bestOrfPlus[myListNum]==null || orf.pathScorePlus>=bestOrfPlus[myListNum].pathScorePlus){ jpayne@68: bestOrfPlus[myListNum]=orf; jpayne@68: bestIndexPlus[myListNum]=lastPositionScored[myListNum]; jpayne@68: assert(frameLists[myListNum].get(lastPositionScored[myListNum])==orf); jpayne@68: } jpayne@68: if(bestOrfMinus[myListNum]==null || orf.pathScoreMinus>=bestOrfMinus[myListNum].pathScoreMinus){ jpayne@68: bestOrfMinus[myListNum]=orf; jpayne@68: bestIndexMinus[myListNum]=lastPositionScored[myListNum]; jpayne@68: assert(frameLists[myListNum].get(lastPositionScored[myListNum])==orf); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: Orf best=bestOrf[0][0]; jpayne@68: for(Orf[] array : bestOrf){ jpayne@68: for(Orf orf : array){ jpayne@68: if(best==null || (orf!=null && orf.pathScore()>best.pathScore())){ jpayne@68: best=orf; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: ArrayList bestPath=new ArrayList(); jpayne@68: for(Orf orf=best; orf!=null; orf=orf.prev()){ jpayne@68: bestPath.add(orf); jpayne@68: if(orf.type==CDS){geneStartsOut++;} jpayne@68: else if(orf.type==tRNA){tRNAOut++;} jpayne@68: else if(orf.type==r16S){r16SOut++;} jpayne@68: else if(orf.type==r23S){r23SOut++;} jpayne@68: else if(orf.type==r5S){r5SOut++;} jpayne@68: else if(orf.type==r18S){r18SOut++;} jpayne@68: } jpayne@68: Collections.sort(bestPath); jpayne@68: return bestPath; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Calculate the best path to this ORF. jpayne@68: * @param orf jpayne@68: * @param frameLists jpayne@68: * @param lastPositionScored jpayne@68: * @param bestIndex jpayne@68: */ jpayne@68: private void calcPathScore(Orf orf, ArrayList[] frameLists, int[] lastPositionScored, int[][] bestIndex){ jpayne@68: final int myListNum=3*orf.strand+orf.frame; jpayne@68: jpayne@68: // System.err.println("* "+orf); jpayne@68: // System.err.println("* "+Arrays.toString(lastPositionScored)); jpayne@68: // System.err.println(); jpayne@68: jpayne@68: for(int listStrand=0; listStrand<2; listStrand++){ jpayne@68: for(int listFrame=0; listFrame<3; listFrame++){ jpayne@68: int listNum=listFrame+3*listStrand; jpayne@68: ArrayList list=frameLists[listNum]; jpayne@68: int lastPos=lastPositionScored[listNum]; jpayne@68: int bestPos=bestIndex[listStrand][listNum]; jpayne@68: if(listStrand==0){ jpayne@68: calcPathScorePlus(orf, list, listStrand, lastPos, bestPos); jpayne@68: }else{ jpayne@68: calcPathScoreMinus(orf, list, listStrand, lastPos, bestPos); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: // System.err.println(myListNum+", "+Arrays.toString(lastPositionScored)+", "+frameLists[myListNum].size()); jpayne@68: jpayne@68: lastPositionScored[myListNum]++; jpayne@68: assert(frameLists[myListNum].get(lastPositionScored[myListNum])==orf) : myListNum+"\n"+orf+"\n"+frameLists[myListNum].get(lastPositionScored[myListNum])+"\n" jpayne@68: +Arrays.toString(lastPositionScored)+"\n"+frameLists[myListNum].get(lastPositionScored[myListNum]+1); jpayne@68: jpayne@68: //These are sanity checks to make sure that the path did not break in the middle. jpayne@68: //Safe to disable. jpayne@68: // assert(orf.prevPlus!=null || orf.stop<100000); jpayne@68: // assert(orf.prevMinus!=null || orf.stop<100000); jpayne@68: // assert(orf.pathScore>-10) : orf.pathScore+"\n"+orf+"\n"+orf.prev+"\n"; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Calculate the best path to this ORF from a plus-strand previous ORF. jpayne@68: * @param orf jpayne@68: * @param list jpayne@68: * @param listStrand jpayne@68: * @param lastPos jpayne@68: * @param bestPos jpayne@68: */ jpayne@68: private void calcPathScorePlus(final Orf orf, final ArrayList list, final int listStrand, final int lastPos, final int bestPos){ jpayne@68: assert(listStrand==0); jpayne@68: if(lastPos<0){ jpayne@68: if(orf.prevPlus==null){ jpayne@68: orf.pathScorePlus=orf.orfScore; jpayne@68: orf.pathLengthPlus=1; jpayne@68: } jpayne@68: return; jpayne@68: } jpayne@68: if(list.isEmpty()){return;} jpayne@68: jpayne@68: // System.err.println("\nExamining \t"+orf+"\nlastPos="+lastPos+", bestPos="+bestPos+", sameFrame="+sameFrame); jpayne@68: boolean found=false; jpayne@68: final boolean sameStrand=(orf.strand==listStrand); jpayne@68: final int maxOverlap=(sameStrand ? maxOverlapSameStrand : maxOverlapOppositeStrand); jpayne@68: for(int i=lastPos, min=Tools.max(0, bestPos-lookbackPlus); i>=min || (i>0 && !found); i--){ jpayne@68: Orf prev=list.get(i); jpayne@68: assert(prev!=orf) : prev; jpayne@68: // System.err.println("Comparing to \t"+prev); jpayne@68: if(orf.isValidPrev(prev, maxOverlap)){ jpayne@68: int overlap=Tools.max(0, prev.stop-orf.start+1); jpayne@68: float orfScore=overlap==0 ? orf.orfScore : orf.calcOrfScore(overlap); jpayne@68: jpayne@68: final float prevScore=prev.pathScore(); jpayne@68: final int prevLength=prev.pathLength(); jpayne@68: jpayne@68: float pathScore; jpayne@68: final int pathLength; jpayne@68: if(sameStrand){ jpayne@68: pathLength=prevLength+1; jpayne@68: pathScore=prevScore+orfScore; jpayne@68: pathScore+=p0+p1*(Tools.mid(p5*(p2+pathLength), p6*(p3-pathLength), p4)); jpayne@68: }else{ jpayne@68: pathLength=1; jpayne@68: pathScore=prev.pathScore()+orfScore; jpayne@68: pathScore+=q1+Tools.mid(q2*prevLength, q3+q4*prevLength, q5); jpayne@68: } jpayne@68: jpayne@68: if(overlap<1 && prevScore>0){found=true;} jpayne@68: if(pathScore>=orf.pathScorePlus){ jpayne@68: orf.pathScorePlus=pathScore; jpayne@68: orf.prevPlus=prev; jpayne@68: orf.pathLengthPlus=pathLength; jpayne@68: // System.err.println("Set as best"); jpayne@68: } jpayne@68: } jpayne@68: if(found && prev.stop list, final int listStrand, final int lastPos, final int bestPos){ jpayne@68: assert(listStrand==1); jpayne@68: if(lastPos<0){ jpayne@68: if(orf.prevMinus==null){ jpayne@68: orf.pathScoreMinus=orf.orfScore; jpayne@68: orf.pathLengthMinus=1; jpayne@68: } jpayne@68: return; jpayne@68: } jpayne@68: if(list.isEmpty()){return;} jpayne@68: jpayne@68: // System.err.println("\nExamining \t"+orf+"\nlastPos="+lastPos+", bestPos="+bestPos+", sameFrame="+sameFrame); jpayne@68: boolean found=false; jpayne@68: final boolean sameStrand=(orf.strand==listStrand); jpayne@68: final int maxOverlap=(sameStrand ? maxOverlapSameStrand : maxOverlapOppositeStrand); jpayne@68: for(int i=lastPos, min=Tools.max(0, bestPos-lookbackMinus); i>=min || (i>0 && !found); i--){ jpayne@68: Orf prev=list.get(i); jpayne@68: assert(prev!=orf) : prev; jpayne@68: // System.err.println("Comparing to \t"+prev); jpayne@68: if(orf.isValidPrev(prev, maxOverlap)){ jpayne@68: int overlap=Tools.max(0, prev.stop-orf.start+1); jpayne@68: float orfScore=overlap==0 ? orf.orfScore : orf.calcOrfScore(overlap); jpayne@68: jpayne@68: final float prevScore=prev.pathScore(); jpayne@68: final int prevLength=prev.pathLength(); jpayne@68: jpayne@68: float pathScore; jpayne@68: final int pathLength; jpayne@68: if(sameStrand){ jpayne@68: pathLength=prevLength+1; jpayne@68: pathScore=prevScore+orfScore; jpayne@68: pathScore+=p0+p1*(Tools.mid(p5*(p2+pathLength), p6*(p3-pathLength), p4)); jpayne@68: }else{ jpayne@68: pathLength=1; jpayne@68: pathScore=prev.pathScore()+orfScore; jpayne@68: pathScore+=q1+Tools.mid(q2*prevLength, q3+q4*prevLength, q5); jpayne@68: } jpayne@68: if(overlap<1 && prevScore>0){found=true;} jpayne@68: if(pathScore>=orf.pathScoreMinus){ jpayne@68: orf.pathScoreMinus=pathScore; jpayne@68: orf.prevMinus=prev; jpayne@68: orf.pathLengthMinus=pathLength; jpayne@68: // System.err.println("Set as best"); jpayne@68: } jpayne@68: } jpayne@68: if(found && prev.stop makeOrfsForFrame(String name, byte[] bases, int startFrame, int strand, int minlen){ jpayne@68: // assert(false) : "TODO"; jpayne@68: assert(minlen>=3); jpayne@68: if(bases==null || bases.length orfs=new ArrayList(); jpayne@68: if(!ProkObject.callCDS){return orfs;} jpayne@68: // int mask=63; jpayne@68: int code=0; jpayne@68: int start=-2; jpayne@68: int frame=0; jpayne@68: int pos=startFrame; jpayne@68: jpayne@68: jpayne@68: for(; pos=0){ jpayne@68: if(GeneModel.isStopCodon(code) || code<0){//NOTE: This adds a stop codon wherever there are Ns. jpayne@68: int len=pos-start+1; jpayne@68: if(len>=minlen){ jpayne@68: Orf f=new Orf(name, start, pos, strand, startFrame, bases, true, CDS); jpayne@68: orfs.add(f); jpayne@68: } jpayne@68: start=-1; jpayne@68: } jpayne@68: }else{ jpayne@68: if(start==-2 || (start<0 && GeneModel.isStartCodon(code))){ jpayne@68: start=pos-2; jpayne@68: } jpayne@68: } jpayne@68: code=0; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Add a stop codon at the sequence end. jpayne@68: if(start>=0){ jpayne@68: pos--; jpayne@68: while(frame!=3 && frame!=-1){ jpayne@68: pos--; jpayne@68: frame--; jpayne@68: } jpayne@68: int len=pos-start+1; jpayne@68: if(len>=minlen){ jpayne@68: assert(pos makeRnasForStrand(String name, byte[] bases, int strand, StatsContainer sc, float[] scores, int[] kmersSeen, boolean quitEarly, float bias){ jpayne@68: final int window=sc.lengthAvg; jpayne@68: if(bases==null || bases.length*2 orfs=new ArrayList(sc.type==tRNA ? 32 : 8); jpayne@68: jpayne@68: final FrameStats inner=sc.inner; jpayne@68: // final FrameStats start=sc.start; jpayne@68: // final FrameStats stop=sc.stop; jpayne@68: jpayne@68: final int k=inner.k; jpayne@68: final int mask=inner.mask; jpayne@68: // final float invLen=sc.invLengthAvg; jpayne@68: final int halfWindow=window/2; jpayne@68: final int maxWindow=(int)(window*1.5f); jpayne@68: final int maxWindow2=(int)(window*2.5f); jpayne@68: // final int slop=Tools.max(50, window/8); jpayne@68: int len=0; jpayne@68: int kmer=0; jpayne@68: float currentScore=0; jpayne@68: float currentScoreAbs=0; jpayne@68: bias=(bias>-1 ? bias : biases[sc.type]); jpayne@68: final float maxBias=biases[sc.type]*1.45f; jpayne@68: jpayne@68: float thresh=cutoff1[sc.type]; jpayne@68: float prevScore=0; jpayne@68: int lastStart=0; jpayne@68: jpayne@68: float max=0; jpayne@68: int maxPos=0; jpayne@68: jpayne@68: for(int pos=0; pos=0 && b<128) : "Invalid base b="+((int)b)+"; pos="+pos+"\n"+new String(bases)+"\n"; jpayne@68: final int x=AminoAcid.baseToNumber[b]; jpayne@68: kmer=((kmer<<2)|x)&mask; jpayne@68: jpayne@68: if(x>=0){ jpayne@68: len++; jpayne@68: if(len>=k){ jpayne@68: float prob=inner.probs[0][kmer]; jpayne@68: float dif=prob-bias;//Prob above 1 is more likely than average jpayne@68: currentScoreAbs+=prob; jpayne@68: currentScore=Tools.max(0, currentScore+dif); jpayne@68: } jpayne@68: jpayne@68: if(currentScore>0){ jpayne@68: if(currentScore>max){ jpayne@68: max=currentScore; jpayne@68: maxPos=pos; jpayne@68: } jpayne@68: if(prevScore<=0){ jpayne@68: lastStart=pos; jpayne@68: } jpayne@68: }else{ jpayne@68: int rnaLen=maxPos-lastStart; jpayne@68: if(max>thresh && rnaLen>=halfWindow){ jpayne@68: if(rnaLen>maxWindow){ jpayne@68: if(bias<=maxBias){ jpayne@68: orfs=null; jpayne@68: float biasMult=(rnaLen>8*window ? 1.2f : rnaLen>4*window ? 1.1f : 1.05f); jpayne@68: return makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, quitEarly, bias*biasMult); jpayne@68: } jpayne@68: } jpayne@68: if(rnaLen<=maxWindow2){ jpayne@68: Orf orf=new Orf(name, lastStart, maxPos, strand, 0, bases, false, sc.type); jpayne@68: orfs.add(orf); jpayne@68: orf.orfScore=max; jpayne@68: if(verbose){ jpayne@68: final int start2=(strand==0 ? lastStart : bases.length-maxPos-1); jpayne@68: final int stop2=(strand==0 ? maxPos : bases.length-lastStart-1); jpayne@68: System.err.println("Made Orf "+start2+"\t"+stop2+"\t"+max); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: max=0; jpayne@68: lastStart=pos; jpayne@68: } jpayne@68: // System.err.println("i="+i+"\tscore="+score+"\tmax="+max+"\tmaxPos="+maxPos+"\tprevScore="+prevScore+"\tlastStart="+lastStart); jpayne@68: prevScore=currentScore; jpayne@68: jpayne@68: // if(pos>=223000 && pos<232000){ jpayne@68: //// System.err.println("i="+i+"\tscore="+score+"\tmax="+max+"\tmaxPos="+maxPos+"\tprevScore="+prevScore+"\tlastStart="+lastStart); jpayne@68: // System.out.println(pos+"\t"+currentScore); jpayne@68: // } jpayne@68: jpayne@68: }else{ jpayne@68: len=0; jpayne@68: kmer=0; jpayne@68: } jpayne@68: scores[pos]=currentScoreAbs; jpayne@68: } jpayne@68: jpayne@68: // System.err.println("size="+orfs.size()+", type="+Orf.typeStrings[sc.type]); jpayne@68: jpayne@68: jpayne@68: { jpayne@68: int rnaLen=maxPos-lastStart; jpayne@68: if(max>thresh && rnaLen>=halfWindow){ jpayne@68: if(rnaLen>maxWindow){ jpayne@68: if(bias<=maxBias){ jpayne@68: orfs=null; jpayne@68: float biasMult=(rnaLen>8*window ? 1.2f : rnaLen>4*window ? 1.1f : 1.05f); jpayne@68: return makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, quitEarly, bias*biasMult); jpayne@68: } jpayne@68: } jpayne@68: if(rnaLen<=maxWindow2){ jpayne@68: Orf orf=new Orf(name, lastStart, maxPos, strand, 0, bases, false, sc.type); jpayne@68: orfs.add(orf); jpayne@68: orf.orfScore=max; jpayne@68: if(verbose){ jpayne@68: final int start2=(strand==0 ? lastStart : bases.length-maxPos-1); jpayne@68: final int stop2=(strand==0 ? maxPos : bases.length-lastStart-1); jpayne@68: System.err.println(start2+"\t"+stop2+"\t"+max); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(kmersSeen!=null && orfs.size()>0 && sc.kmerSet()!=null){ jpayne@68: fillKmersSeen(bases, kmersSeen, sc.kmerSet(), sc.kLongLen()); jpayne@68: } jpayne@68: jpayne@68: float cutoff=cutoff2[sc.type]; jpayne@68: jpayne@68: for(int i=0; i=0){ jpayne@68: // len++; jpayne@68: // if(len>=k){ jpayne@68: // float prob=inner.probs[0][kmer]; jpayne@68: // float dif=prob-1.2f;//Prob above 1 is more likely than average jpayne@68: // currentScore=Tools.max(0, currentScore+dif); jpayne@68: // if(currentScore>0){ jpayne@68: // currentStreak++; jpayne@68: // }else{ jpayne@68: // currentStreak=0; jpayne@68: // } jpayne@68: // if(currentScore>200 && currentStreak>1500 && currentStreak<1700){ jpayne@68: // Orf orf=new Orf(name, pos-currentStreak-1, pos, strand, 0, bases, false); jpayne@68: // orfs.add(orf); jpayne@68: // orf.orfScore=currentScore; jpayne@68: // orf.startScore=start.scorePoint(orf.start, bases); jpayne@68: // orf.stopScore=stop.scorePoint(orf.stop, bases); jpayne@68: // currentStreak=0; jpayne@68: // currentScore=0; jpayne@68: // } jpayne@68: // } jpayne@68: // }else{ jpayne@68: // len=0; jpayne@68: // kmer=0; jpayne@68: // } jpayne@68: // } jpayne@68: jpayne@68: return orfs; jpayne@68: } jpayne@68: jpayne@68: void fillKmersSeen(byte[] bases, int[] kmersSeen, LongHashSet set, int k){ jpayne@68: final long mask=~((-1L)<<(2*k)); jpayne@68: long kmer=0; jpayne@68: int len=0; jpayne@68: int seen=0; jpayne@68: for(int i=0; i=0){ jpayne@68: len++; jpayne@68: kmer=((kmer<<2)|num)&mask; jpayne@68: if(len>=k && set.contains(kmer)){seen++;} jpayne@68: }else{ jpayne@68: len=0; jpayne@68: } jpayne@68: kmersSeen[i]=seen; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: boolean refineRna(Orf orf, byte[] bases, int strand, StatsContainer sc, float[] scores, int[] kmersSeen){ jpayne@68: if(orf==null){return false;} jpayne@68: if(verbose){System.err.println("REFINE: "+orf.toStringFlipped());} jpayne@68: final int window=sc.lengthAvg; jpayne@68: // final int halfWindow=window/2; jpayne@68: // final int maxWindow=(int)(window*1.5f); jpayne@68: final int slop=Tools.max(60, 10+window/8); jpayne@68: final float invWindow=sc.invLengthAvg; jpayne@68: final float oldScore=orf.orfScore; jpayne@68: IntList starts=new IntList(); jpayne@68: IntList stops=new IntList(); jpayne@68: FloatList startScores=new FloatList(); jpayne@68: FloatList stopScores=new FloatList(); jpayne@68: jpayne@68: final int leftmost=Tools.max(0, orf.start-slop); jpayne@68: final int rightmost=Tools.min(bases.length-1, orf.stop+slop); jpayne@68: if(kmersSeen!=null){ jpayne@68: if(kmersSeen[leftmost]>=kmersSeen[rightmost]){ jpayne@68: // System.err.println("Bad: "+oldScore); jpayne@68: orf.orfScore=-999; jpayne@68: return false; jpayne@68: }else{ jpayne@68: // System.err.println("Good: "+oldScore); jpayne@68: } jpayne@68: } jpayne@68: if(verbose){System.err.println("REFINE2");} jpayne@68: jpayne@68: {//starts jpayne@68: final int left=leftmost; jpayne@68: final int right=Tools.min(bases.length-1, orf.stop+slop-window); jpayne@68: final float thresh=cutoff3[sc.type]; jpayne@68: fillPoints(left, right, bases, sc.start, thresh, starts, startScores); jpayne@68: } jpayne@68: if(verbose){System.err.println("starts: "+starts.size);} jpayne@68: // if((orf.start+"").startsWith("146") || true){System.err.println(starts);} jpayne@68: if(verbose){System.err.println(startScores);} jpayne@68: jpayne@68: {//stops jpayne@68: final int left=Tools.max(0, orf.start-slop+window); jpayne@68: final int right=rightmost; jpayne@68: final float thresh=cutoff4[sc.type]; jpayne@68: fillPoints(left, right, bases, sc.stop, thresh, stops, stopScores); jpayne@68: } jpayne@68: if(verbose){System.err.println("stops: "+stops.size);} jpayne@68: // if((orf.start+"").startsWith("146") || true){System.err.println(stops);} jpayne@68: if(verbose){System.err.println(stopScores);} jpayne@68: jpayne@68: final float innerThresh=cutoff5[sc.type]; jpayne@68: jpayne@68: final int minlen=Tools.max(window/2, window-slop); jpayne@68: final int maxlen=window+slop; jpayne@68: jpayne@68: orf.orfScore=0; jpayne@68: int lastStop=0; jpayne@68: for(int i=0; iorf.orfScore){ jpayne@68: orf.start=start; jpayne@68: orf.stop=stop; jpayne@68: orf.kmerScore=innerScore*len; jpayne@68: // if(verbose){ jpayne@68: // final int start2=(strand==0 ? start : bases.length-stop-1); jpayne@68: // final int stop2=(strand==0 ? stop : bases.length-start-1); jpayne@68: // System.err.println(start2+"-"+stop2+", "+startScore+", "+stopScore+", "+innerScore+", "+(score*scoreMult[sc.type])+", "+oldScore); jpayne@68: // } jpayne@68: orf.startScore=startScore; jpayne@68: orf.stopScore=stopScore; jpayne@68: orf.orfScore=score; jpayne@68: } jpayne@68: }else{ jpayne@68: assert(true); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: orf.orfScore*=scoreMult[sc.type]; jpayne@68: jpayne@68: if(starts.isEmpty() || stops.isEmpty()){ jpayne@68: if(verbose){System.err.println("No starts or stops.");} jpayne@68: orf.orfScore=Tools.min(-999, orf.orfScore-9999); jpayne@68: return false; jpayne@68: } jpayne@68: return refineByAlignment(orf, bases, strand, sc);//Returns true if no consensus is present jpayne@68: } jpayne@68: jpayne@68: boolean refineByAlignment(Orf orf, byte[] bases, int strand, StatsContainer sc){ jpayne@68: if(verbose){System.err.println("ALIGN");} jpayne@68: Read[] consensus=ProkObject.consensusReads(sc.type); jpayne@68: if(consensus==null || consensus.length==0){return true;} jpayne@68: boolean refined=false; jpayne@68: // System.err.println("Initial: "+orf.start+", "+orf.stop); jpayne@68: for(Read r : consensus){ jpayne@68: // refined=refineByAlignment(orf, bases, strand, sc, r.bases, 15, 15, 2); jpayne@68: refined=refineByAlignment(orf, bases, strand, sc, r.bases, sc.startSlop(), sc.stopSlop(), 2); jpayne@68: if(refined || sc.type==r18S || true){break;} jpayne@68: } jpayne@68: if(refined){ jpayne@68: if(verbose){System.err.println("Aligned to: "+orf.start+", "+orf.stop);} jpayne@68: }else{ jpayne@68: if(verbose){System.err.println("Alignment failed.");} jpayne@68: orf.orfScore=Tools.min(-999, orf.orfScore-9999); jpayne@68: } jpayne@68: return refined; jpayne@68: } jpayne@68: jpayne@68: boolean refineByAlignment(Orf orf, byte[] bases, int strand, StatsContainer sc, byte[] consensus, jpayne@68: final int startSlop, final int stopSlop, int recurLimit){ jpayne@68: final int start0=orf.start; jpayne@68: final int stop0=orf.stop; jpayne@68: jpayne@68: assert(start0>=0 && start0=0 && stop010*sc.lengthAvg && reflen>20000){ jpayne@68: System.err.println("Skipped reflen "+reflen+"/"+sc.lengthAvg+" for " jpayne@68: + "seqlen="+bases.length+", orf="+orf.toString()); jpayne@68: assert(false); jpayne@68: //TODO: Possibly change return to -1, 0, 1 ("can't align") jpayne@68: //Should be a limit on window size... jpayne@68: //Also consider shrinking matrix after jumbo alignments jpayne@68: return false; jpayne@68: } jpayne@68: assert(a>=0 && b=0 && rstart3 && recurLimit>0){ jpayne@68: final int padLeft=score[3]; jpayne@68: final int padRight=score[4]; jpayne@68: //TODO: This takes extra memory. It may be better to cap the width or ignore start0/stop0 here. jpayne@68: int rstart2=Tools.max(0, Tools.min(start0, rstart)-10); jpayne@68: int rstop2=Tools.min(bases.length-1, Tools.max(stop0, rstop)+10); jpayne@68: assert(rstart2>=0 && rstart2=0 && rstop2b){ jpayne@68: boolean ret=refineByAlignment(orf, bases, strand, sc, consensus, 0, 0, recurLimit-1); jpayne@68: if(ret){return true;} jpayne@68: } jpayne@68: orf.start=start0; jpayne@68: orf.stop=stop0; jpayne@68: // assert(false) : "Don't traceback after recur."; jpayne@68: } jpayne@68: if(verbose){ jpayne@68: System.err.println("Identity: "+id); jpayne@68: } jpayne@68: // assert(score.length==3) : "TODO: Handle padding requests."; jpayne@68: jpayne@68: // System.err.println("Identity: "+String.format("%.2f", 100*id)+"; location: "+rstart+"-"+rstop); jpayne@68: if(idstartSlop){orf.start=rstart;} jpayne@68: if(Tools.absdif(rstop, stop0)>stopSlop){orf.stop=rstop;} jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: void fillPoints(final int left, final int right, final byte[] bases, final FrameStats fs, float thresh, final IntList points, final FloatList scores){ jpayne@68: points.clear(); jpayne@68: scores.clear(); jpayne@68: final float minThresh=thresh;//thresh*0.05f; jpayne@68: // System.err.println(left+", "+right+", "+thresh+", "+minThresh); jpayne@68: while(points.size()<8 && thresh>=minThresh){ jpayne@68: // System.err.println("Running with thresh="+thresh); jpayne@68: points.clear(); jpayne@68: scores.clear(); jpayne@68: for(int i=left; i=thresh){ jpayne@68: points.add(i); jpayne@68: scores.add(score); jpayne@68: } jpayne@68: } jpayne@68: thresh=thresh*0.75f; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Generate all possible genes from each Orf, and return them in a new set of lists. jpayne@68: * @param frameLists jpayne@68: * @param bases jpayne@68: * @return Lists of orfs. jpayne@68: */ jpayne@68: private ArrayList[] breakOrfs(ArrayList[] frameLists, byte[] bases){ jpayne@68: jpayne@68: @SuppressWarnings("unchecked") jpayne@68: ArrayList[] brokenLists=new ArrayList[6]; jpayne@68: for(int strand=0; strand<2; strand++){ jpayne@68: for(int frame=0; frame<3; frame++){ jpayne@68: int fnum=frame+3*strand; jpayne@68: ArrayList longest=frameLists[fnum]; //Longest Orf per stop jpayne@68: ArrayList broken=new ArrayList(); //All high scoring Orfs, including multiple per stop, with different starts jpayne@68: if(longest!=null) { jpayne@68: for(Orf orf : longest){ jpayne@68: assert(orf.frame==frame); jpayne@68: assert(orf.strand==strand); jpayne@68: ArrayList temp=breakOrf(orf, bases); jpayne@68: if(temp!=null){ jpayne@68: broken.addAll(temp); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: Collections.sort(broken); jpayne@68: brokenLists[fnum]=broken; jpayne@68: } jpayne@68: //Reverse-complement bases after processing each strand jpayne@68: AminoAcid.reverseComplementBasesInPlace(bases); jpayne@68: } jpayne@68: return brokenLists; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Generate an Orf for each possible start codon. jpayne@68: * Retain only the high-scoring ones. jpayne@68: * @param longest Longest open reading frame for a given stop. jpayne@68: * @param bases Bases, oriented for this Orf. jpayne@68: * @return List of Orfs. jpayne@68: */ jpayne@68: private ArrayList breakOrf(Orf longest, byte[] bases){ jpayne@68: assert(longest.start0) : pgm.statsCDS.inner; jpayne@68: jpayne@68: final int k=innerStats.k; jpayne@68: final int mask=~((-1)<<(2*k)); jpayne@68: jpayne@68: final float stopScore=stopStats.scorePoint(longest.stop, bases); jpayne@68: stCds.geneStopScoreSum+=stopScore; jpayne@68: stCds.geneStopScoreCount++; jpayne@68: jpayne@68: ArrayList broken=new ArrayList(); jpayne@68: int created=0; jpayne@68: jpayne@68: int codon=0; jpayne@68: int kmer=0; jpayne@68: int len=0; jpayne@68: int numKmers=0; jpayne@68: float currentScore=0; jpayne@68: for(int pos=start, currentFrame=0; pos<=stop; pos++){ jpayne@68: final byte b=bases[pos]; jpayne@68: final int x=AminoAcid.baseToNumber[b]; jpayne@68: codon=((codon<<2)|x); jpayne@68: kmer=((kmer<<2)|x)&mask; jpayne@68: jpayne@68: if(x>=0){ jpayne@68: len++; jpayne@68: if(len>=k){ jpayne@68: float prob=innerStats.probs[currentFrame][kmer]; jpayne@68: float dif=prob-0.99f;//Prob above 1 is more likely than average jpayne@68: currentScore+=dif; jpayne@68: // outstream.println("pos="+pos+"\tdif="+String.format(Locale.ROOT, "%.4f", dif)+",\tscore="+String.format(Locale.ROOT, "%.4f", currentScore)+ jpayne@68: // "\tasStart="+String.format(Locale.ROOT, "%.4f", pgm.calcStartScore(pos-2, bases))+"\tasStop="+String.format(Locale.ROOT, "%.4f", stopStats.scorePoint(pos, bases))+ jpayne@68: // "\tcodon="+AminoAcid.kmerToString(kmer, 3)+" frame="+(currentFrame)); jpayne@68: }else{ jpayne@68: // outstream.println("pos="+pos+"\tdif="+String.format(Locale.ROOT, "%.4f", 0.0)+",\tscore="+String.format(Locale.ROOT, "%.4f", 0.0)+ jpayne@68: // "\tasStart="+String.format(Locale.ROOT, "%.4f", pgm.calcStartScore(pos-2, bases))+"\tasStop="+String.format(Locale.ROOT, "%.4f", stopStats.scorePoint(pos, bases))+ jpayne@68: // "\tcodon="+AminoAcid.kmerToString(kmer, 3)+" frame="+(currentFrame)); jpayne@68: } jpayne@68: }else{ jpayne@68: len=0; jpayne@68: kmer=0; jpayne@68: } jpayne@68: jpayne@68: currentFrame++; jpayne@68: // outstream.println("pos="+pos+", codon="+AminoAcid.kmerToString(kmer, 3)+", frame="+currentFrame+", start="+start+", isStartCodon="+pgm.isStartCodon(codon)); jpayne@68: if(currentFrame>2){ jpayne@68: currentFrame=0; jpayne@68: if(pos=minLen) : "glen="+glen+", minLen="+minLen+", pos="+pos+", max="+max+", start="+start; jpayne@68: jpayne@68: int oStart=pos-2; jpayne@68: float startScore=startStats.scorePoint(oStart, bases); jpayne@68: jpayne@68: stCds.geneStartScoreSum+=startScore; jpayne@68: stCds.geneStartScoreCount++; jpayne@68: jpayne@68: stCds.lengthSum+=(stop-(pos-2)+1); jpayne@68: stCds.lengthCount++; jpayne@68: jpayne@68: if((startScore>=minStartScore || pos<6) /* && stopScore>=minStopScore /*|| broken.isEmpty()*/){ jpayne@68: Orf orf=new Orf(name, pos-2, stop, strand, longest.frame, bases, false, longest.type); jpayne@68: jpayne@68: geneStartsMade++; jpayne@68: orf.kmerScore=currentScore; jpayne@68: orf.startScore=startScore; jpayne@68: orf.stopScore=stopScore; jpayne@68: jpayne@68: assert(orf.frame==longest.frame); jpayne@68: assert(orf.strand==strand); jpayne@68: jpayne@68: if(strand==1){orf.flip();} jpayne@68: broken.add(orf); jpayne@68: created++; jpayne@68: } jpayne@68: } jpayne@68: codon=0; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: final int size=broken.size(); jpayne@68: final int sizeCutoff=Tools.max(5, size/2); jpayne@68: if(size<1){return broken;} jpayne@68: Orf best=broken.get(0); jpayne@68: Orf bestStart=broken.get(0); jpayne@68: for(int i=0; i=best.orfScore){best=orf;} jpayne@68: if(orf.startScore>=bestStart.startScore){bestStart=orf;} jpayne@68: jpayne@68: stCds.geneInnerScoreSum+=orf.averageKmerScore(); jpayne@68: stCds.geneInnerScoreCount++; jpayne@68: } jpayne@68: jpayne@68: //Sort to by score descending to eliminate low-scoring copies. jpayne@68: if(broken.size()>1){Collections.sort(broken, Feature.featureComparatorScore);} jpayne@68: jpayne@68: int removed=0; jpayne@68: for(int i=0; isizeCutoff){ jpayne@68: broken.set(i, null); jpayne@68: removed++; jpayne@68: } jpayne@68: } jpayne@68: if(removed>0){ jpayne@68: Tools.condenseStrict(broken); jpayne@68: } jpayne@68: jpayne@68: geneStartsRetained+=broken.size(); jpayne@68: geneStopsRetained+=(broken.size()>0 ? 1 : 0); jpayne@68: jpayne@68: if(flipped==1){longest.flip();} jpayne@68: return broken; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Current gene model. jpayne@68: * TODO: Dynamically swap this as needed for contigs with varying GC. jpayne@68: */ jpayne@68: GeneModel pgm; jpayne@68: jpayne@68: //Gene-calling cutoffs jpayne@68: final int minLen; jpayne@68: final int maxOverlapSameStrand; jpayne@68: final int maxOverlapOppositeStrand; jpayne@68: final float minStartScore; jpayne@68: final float minStopScore; jpayne@68: final float minInnerScore; jpayne@68: final float minOrfScore; jpayne@68: final float minAvgScore; jpayne@68: jpayne@68: // static float[] cutoff1=new float[] {0, 40f, 200f, 150f, 45f}; jpayne@68: // static float[] cutoff2=new float[] {0, 44f, 300f, 150f, 60f}; jpayne@68: // static float[] cutoff3=new float[] {0, 1.7f, 1.5f, 1.4f, 1.8f}; jpayne@68: // static float[] cutoff4=new float[] {0, 1.6f, 1.5f, 0.6f, 1.1f}; jpayne@68: // static float[] cutoff5=new float[] {0, 1.0f, 1.0f, 1.0f, 1.55f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 8f, 200f, 175f, 12f}; jpayne@68: // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.15f}; jpayne@68: jpayne@68: // static float[] cutoff1=new float[] {0, 40f, 140f, 150f, 45f}; jpayne@68: // static float[] cutoff2=new float[] {0, 44f, 300f, 150f, 45f}; jpayne@68: // static float[] cutoff3=new float[] {0, 1.7f, 1.1f, 1.3f, 1.8f}; jpayne@68: // static float[] cutoff4=new float[] {0, 1.6f, 1.3f, 0.5f, 1.1f}; jpayne@68: // static float[] cutoff5=new float[] {0, 1.0f, 1.0f, 1.0f, 1.3f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 8f, 200f, 175f, 12f}; jpayne@68: // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.15f}; jpayne@68: jpayne@68: //for k=4,2,2 jpayne@68: // static float[] cutoff1=new float[] {0, 40f, 140f, 150f, 40f}; jpayne@68: // static float[] cutoff2=new float[] {0, 44f, 300f, 150f, 45f}; jpayne@68: // static float[] cutoff3=new float[] {0, 1.7f, 1.1f, 1.1f, 1.8f}; jpayne@68: // static float[] cutoff4=new float[] {0, 1.6f, 1.3f, 0.45f, 1.1f}; jpayne@68: // static float[] cutoff5=new float[] {0, 1.0f, 1.0f, 1.0f, 1.3f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 8f, 200f, 175f, 12f}; jpayne@68: // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.22f}; jpayne@68: jpayne@68: // //for k=5,3,3 jpayne@68: // static float[] cutoff1=new float[] {0, 40f, 300f, 300f, 135f};//sum score jpayne@68: // static float[] cutoff2=new float[] {0, 44f, 300f, 400f, 100f};//orf score jpayne@68: // static float[] cutoff3=new float[] {0, 1.7f, 1.5f, 1.5f, 3.5f};//start score jpayne@68: // static float[] cutoff4=new float[] {0, 1.6f, 1.5f, 1.4f, 1.8f};//stop score jpayne@68: // static float[] cutoff5=new float[] {0, 1.0f, 1.1f, 1.15f, 1.4f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 8f, 80f, 120f, 5f};//score mult jpayne@68: // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.22f};//bias for sum score jpayne@68: jpayne@68: //for k=6,3,3 - this has low scores for correct 23s stops; may try k=2 or k=4 for that end jpayne@68: //Also, 5S seems to score low very for archaea jpayne@68: // public static int CDS=0, tRNA=1, /*r16S=2,*/ r23S=3, r5S=4, r18S=5, r28S=6, RNA=7; jpayne@68: // static float[] cutoff1=new float[] {0, 100f, 600f, 400f, 300f};//sum score jpayne@68: // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 32f};//orf score jpayne@68: // static float[] cutoff3=new float[] {0, 3.0f, 1.8f, 1.6f, 4.0f};//start score jpayne@68: // static float[] cutoff4=new float[] {0, 2.8f, 2.0f, 0.90f, 1.9f};//stop score jpayne@68: // static float[] cutoff5=new float[] {0, 2.8f, 1.1f, 1.55f, 1.4f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.0f};//score mult jpayne@68: // static float[] biases=new float[] {1f, 1.25f, 1.30f, 1.30f, 1.22f};//16S bias for sum score: 1.25 best for archs, 1.4 best for bacts jpayne@68: jpayne@68: //// {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"}; jpayne@68: // static float[] cutoff1=new float[] {0, 90f, 300f, 400f, 270f, 300f};//sum score jpayne@68: // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 32f, 300f};//orf score jpayne@68: // static float[] cutoff3=new float[] {0, 2.8f, 1.65f, 1.6f, 3.8f, 1.65f};//start score jpayne@68: // static float[] cutoff4=new float[] {0, 2.6f, 1.70f, 0.90f, 1.8f, 1.70f};//stop score jpayne@68: // static float[] cutoff5=new float[] {0, 2.8f, 1.70f, 1.55f, 1.4f, 1.70f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.0f, 35f};//score mult jpayne@68: // static float[] biases=new float[] {1f, 1.50f, 1.30f, 1.30f, 1.30f, 1.30f}; jpayne@68: jpayne@68: //// {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"}; jpayne@68: // static float[] cutoff1=new float[] {0, 90f, 300f, 400f, 90f, 300f};//sum score jpayne@68: // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 24f, 300f};//orf score jpayne@68: // static float[] cutoff3=new float[] {0, 2.8f, 1.65f, 1.6f, 2.0f, 1.65f};//start score jpayne@68: // static float[] cutoff4=new float[] {0, 2.6f, 1.70f, 0.90f, 1.0f, 1.70f};//stop score jpayne@68: // static float[] cutoff5=new float[] {0, 2.8f, 1.70f, 1.55f, 2.6f, 1.70f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.25f, 35f};//score mult jpayne@68: // static float[] biases=new float[] {1f, 1.50f, 1.30f, 1.30f, 1.50f, 1.30f}; jpayne@68: jpayne@68: //// {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"}; jpayne@68: // static float[] cutoff1=new float[] {0, 90f, 300f, 400f, 100f, 300f};//sum score jpayne@68: // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 32f, 300f};//orf score jpayne@68: // static float[] cutoff3=new float[] {0, 2.8f, 1.65f, 1.6f, 1.8f, 1.65f};//start score jpayne@68: // static float[] cutoff4=new float[] {0, 2.6f, 1.70f, 0.90f, 1.0f, 1.70f};//stop score jpayne@68: // static float[] cutoff5=new float[] {0, 2.8f, 1.70f, 1.55f, 2.6f, 1.70f};//inner score jpayne@68: // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.25f, 35f};//score mult jpayne@68: // static float[] biases=new float[] {1f, 1.50f, 1.30f, 1.30f, 1.55f, 1.30f}; jpayne@68: jpayne@68: // {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"}; jpayne@68: static float[] cutoff1=new float[] {0, 20f, 300f, 400f, 100f, 400f};//sum score jpayne@68: static float[] cutoff2=new float[] {0, 36f, 300f, 300f, 32f, 300f};//orf score //tRNA is very sensitive here jpayne@68: static float[] cutoff3=new float[] {0, 2.4f, 1.65f, 1.6f, 1.8f, 1.5f};//start score jpayne@68: static float[] cutoff4=new float[] {0, 1.5f, 1.70f, 0.90f, 1.0f, 1.1f};//stop score jpayne@68: static float[] cutoff5=new float[] {0, 2.2f, 1.70f, 1.55f, 2.6f, 1.5f};//inner score jpayne@68: static float[] scoreMult=new float[] {1f, 1.0f, 35f, 80f, 1.25f, 35f};//score mult jpayne@68: static float[] biases=new float[] {1f, 1.45f, 1.30f, 1.30f, 1.55f, 1.50f}; jpayne@68: jpayne@68: long geneStopsMade=0; jpayne@68: long geneStartsMade=0; jpayne@68: long geneStartsRetained=0; jpayne@68: long geneStopsRetained=0; jpayne@68: long geneStartsOut=0; jpayne@68: jpayne@68: long tRNAOut=0; jpayne@68: long r16SOut=0; jpayne@68: long r23SOut=0; jpayne@68: long r5SOut=0; jpayne@68: long r18SOut=0; jpayne@68: jpayne@68: ScoreTracker stCds=new ScoreTracker(CDS); jpayne@68: ScoreTracker stCds2=new ScoreTracker(CDS); jpayne@68: ScoreTracker stCdsPass=new ScoreTracker(CDS); jpayne@68: ScoreTracker sttRNA=new ScoreTracker(tRNA); jpayne@68: ScoreTracker st16s=new ScoreTracker(r16S); jpayne@68: ScoreTracker st23s=new ScoreTracker(r23S); jpayne@68: ScoreTracker st5s=new ScoreTracker(r5S); jpayne@68: ScoreTracker st18s=new ScoreTracker(r18S); jpayne@68: jpayne@68: ScoreTracker[] trackers=new ScoreTracker[] {stCdsPass, sttRNA, st16s, st23s, st5s, st18s}; jpayne@68: jpayne@68: public static SingleStateAlignerFlat2 getSSA(){ jpayne@68: SingleStateAlignerFlat2 ssa=localSSA.get(); jpayne@68: if(ssa==null){ jpayne@68: synchronized(localSSA){ jpayne@68: ssa=localSSA.get(); jpayne@68: if(ssa==null){ jpayne@68: ssa=new SingleStateAlignerFlat2(); jpayne@68: localSSA.set(ssa); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: return ssa; jpayne@68: } jpayne@68: jpayne@68: public static SingleStateAlignerFlat3 getSSA3(){ jpayne@68: SingleStateAlignerFlat3 ssa=localSSA3.get(); jpayne@68: if(ssa==null){ jpayne@68: synchronized(localSSA3){ jpayne@68: ssa=localSSA3.get(); jpayne@68: if(ssa==null){ jpayne@68: ssa=new SingleStateAlignerFlat3(); jpayne@68: localSSA3.set(ssa); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: return ssa; jpayne@68: } jpayne@68: jpayne@68: public static SingleStateAlignerFlatFloat getSSAF(){ jpayne@68: SingleStateAlignerFlatFloat ssa=localSSAF.get(); jpayne@68: if(ssa==null){ jpayne@68: synchronized(localSSAF){ jpayne@68: ssa=localSSAF.get(); jpayne@68: if(ssa==null){ jpayne@68: ssa=new SingleStateAlignerFlatFloat(); jpayne@68: localSSAF.set(ssa); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: return ssa; jpayne@68: } jpayne@68: jpayne@68: private static ThreadLocal localSSA=new ThreadLocal(); jpayne@68: private static ThreadLocal localSSA3=new ThreadLocal(); jpayne@68: private static ThreadLocal localSSAF=new ThreadLocal(); jpayne@68: // public static int maxAlignmentEndpointDifference=15; jpayne@68: public static int alignmentPadding=300; jpayne@68: jpayne@68: public static int breakLimit=12; jpayne@68: public static int lookbackPlus=70; jpayne@68: public static int lookbackMinus=25; jpayne@68: jpayne@68: // pathScore+=p0+p1*(Tools.mid(p5*(p2+pathLength), p6*(p3-pathLength), p4)); jpayne@68: jpayne@68: //Operon length modifiers for same strand jpayne@68: public static float p0=-30f; jpayne@68: public static float p1=-0.35f; //Sensitive - higher decreases FP and TP jpayne@68: public static float p2=4.0f;//Insensitive - higher positive decreases FP and TP jpayne@68: public static float p3=12f; //Higher decreases FP (substantially) and TP (slightly) jpayne@68: public static float p4=-10f; //Lower decreases FP (weakly) and TP (greatly) jpayne@68: public static float p5=2.0f; //Insensitive - lower increases FP and TP jpayne@68: public static float p6=2f; //Greater increases FP and TP jpayne@68: jpayne@68: // pathScore+=q1+Tools.mid(q2*prevLength, q3+q4*prevLength, q5); jpayne@68: jpayne@68: //Operon length modifiers for opposite strand jpayne@68: public static float q1=-36f; jpayne@68: public static float q2=-1.6f; //q2 and q4 must have opposite signs jpayne@68: public static float q3=-12f; jpayne@68: public static float q4=3.0f; //(Lower [even negative] decreases FP and TP) jpayne@68: public static float q5=-40f; jpayne@68: jpayne@68: private static PrintStream outstream=System.err; jpayne@68: static boolean verbose; jpayne@68: jpayne@68: }