annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/GeneCaller.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package prok;
jpayne@68 2
jpayne@68 3 import java.io.PrintStream;
jpayne@68 4 import java.util.ArrayList;
jpayne@68 5 import java.util.Arrays;
jpayne@68 6 import java.util.Collections;
jpayne@68 7
jpayne@68 8 import aligner.SingleStateAlignerFlat2;
jpayne@68 9 import aligner.SingleStateAlignerFlat3;
jpayne@68 10 import aligner.SingleStateAlignerFlatFloat;
jpayne@68 11 import dna.AminoAcid;
jpayne@68 12 import shared.KillSwitch;
jpayne@68 13 import shared.Tools;
jpayne@68 14 import stream.Read;
jpayne@68 15 import structures.FloatList;
jpayne@68 16 import structures.IntList;
jpayne@68 17 import structures.LongHashSet;
jpayne@68 18
jpayne@68 19
jpayne@68 20 /**
jpayne@68 21 * This class calls genes within a single thread.
jpayne@68 22 * @author Brian Bushnell
jpayne@68 23 * @date Sep 24, 2018
jpayne@68 24 *
jpayne@68 25 */
jpayne@68 26 public class GeneCaller extends ProkObject {
jpayne@68 27
jpayne@68 28 /*--------------------------------------------------------------*/
jpayne@68 29 /*---------------- Init ----------------*/
jpayne@68 30 /*--------------------------------------------------------------*/
jpayne@68 31
jpayne@68 32 GeneCaller(int minLen_, int maxOverlapSameStrand_, int maxOverlapOppositeStrand_,
jpayne@68 33 float minStartScore_, float minStopScore_, float minInnerScore_,
jpayne@68 34 float minOrfScore_, float minAvgScore_, GeneModel pgm_){
jpayne@68 35 minLen=minLen_;
jpayne@68 36 maxOverlapSameStrand=maxOverlapSameStrand_;
jpayne@68 37 maxOverlapOppositeStrand=maxOverlapOppositeStrand_;
jpayne@68 38 pgm=pgm_;
jpayne@68 39
jpayne@68 40 minStartScore=minStartScore_;
jpayne@68 41 minStopScore=minStopScore_;
jpayne@68 42 minInnerScore=minInnerScore_;
jpayne@68 43 minOrfScore=minOrfScore_;
jpayne@68 44 minAvgScore=minAvgScore_;
jpayne@68 45 }
jpayne@68 46
jpayne@68 47 /*--------------------------------------------------------------*/
jpayne@68 48 /*---------------- Outer Methods ----------------*/
jpayne@68 49 /*--------------------------------------------------------------*/
jpayne@68 50
jpayne@68 51 public ArrayList<Orf> callGenes(Read r){
jpayne@68 52 return callGenes(r, pgm);
jpayne@68 53 }
jpayne@68 54
jpayne@68 55 public ArrayList<Orf> callGenes(Read r, GeneModel pgm_){
jpayne@68 56 pgm=pgm_;
jpayne@68 57
jpayne@68 58 final String name=r.id;
jpayne@68 59 final byte[] bases=r.bases;
jpayne@68 60
jpayne@68 61 //Lists of all longest orfs per frame
jpayne@68 62 ArrayList<Orf>[] frameLists=makeOrfs(name, bases, minLen);
jpayne@68 63 //Lists of all high-scoring orfs per frame, with potentially multiple orfs sharing stops.
jpayne@68 64 ArrayList<Orf>[] brokenLists=breakOrfs(frameLists, bases);
jpayne@68 65
jpayne@68 66 ArrayList<Orf>[] rnaLists=null;
jpayne@68 67 final int rlen=r.length();
jpayne@68 68 if(calltRNA || (call16S && rlen>800) || (call23S && rlen>1500) || call5S || (call18S && rlen>1000)){
jpayne@68 69 rnaLists=makeRnas(name, bases);
jpayne@68 70
jpayne@68 71 brokenLists[0].addAll(rnaLists[0]);
jpayne@68 72 brokenLists[3].addAll(rnaLists[1]);
jpayne@68 73 Collections.sort(brokenLists[0]);
jpayne@68 74 Collections.sort(brokenLists[3]);
jpayne@68 75 }
jpayne@68 76
jpayne@68 77 boolean printAllOrfs=false;
jpayne@68 78 boolean printRnas=false;
jpayne@68 79 if(printAllOrfs){
jpayne@68 80 ArrayList<Orf> temp=new ArrayList<Orf>();
jpayne@68 81 for(ArrayList<Orf> broken : brokenLists){
jpayne@68 82 temp.addAll(broken);
jpayne@68 83 }
jpayne@68 84 Collections.sort(temp);
jpayne@68 85 return temp;
jpayne@68 86 }
jpayne@68 87
jpayne@68 88 if(printRnas && rnaLists!=null){
jpayne@68 89 ArrayList<Orf> temp=new ArrayList<Orf>();
jpayne@68 90 for(ArrayList<Orf> list : rnaLists){
jpayne@68 91 temp.addAll(list);
jpayne@68 92 }
jpayne@68 93 Collections.sort(temp);
jpayne@68 94 return temp;
jpayne@68 95 }
jpayne@68 96
jpayne@68 97 stCds2.add(brokenLists);
jpayne@68 98
jpayne@68 99 //Find the optimal path through Orfs
jpayne@68 100 ArrayList<Orf> path=findPath(brokenLists, bases);
jpayne@68 101 // geneStartsOut+=path.size();
jpayne@68 102
jpayne@68 103 if(callCDS){stCdsPass.add(path);}
jpayne@68 104 if(calltRNA){sttRNA.add(path);}
jpayne@68 105 if(call16S){st16s.add(path);}
jpayne@68 106 if(call23S){st23s.add(path);}
jpayne@68 107 if(call5S){st5s.add(path);}
jpayne@68 108 if(call18S){st18s.add(path);}
jpayne@68 109
jpayne@68 110 return path;
jpayne@68 111 }
jpayne@68 112
jpayne@68 113 /*--------------------------------------------------------------*/
jpayne@68 114 /*---------------- Inner Methods ----------------*/
jpayne@68 115 /*--------------------------------------------------------------*/
jpayne@68 116
jpayne@68 117 /**
jpayne@68 118 * Generates lists of all max-length non-overlapping Orfs per frame.
jpayne@68 119 * There IS overlap between frames.
jpayne@68 120 * All Orfs come out flipped to + orientation.
jpayne@68 121 * */
jpayne@68 122 ArrayList<Orf>[] makeRnas(String name, byte[] bases){
jpayne@68 123 @SuppressWarnings("unchecked")
jpayne@68 124 ArrayList<Orf>[] array=new ArrayList[2];
jpayne@68 125 array[0]=new ArrayList<Orf>();
jpayne@68 126 array[1]=new ArrayList<Orf>();
jpayne@68 127 final float[] scores=new float[bases.length];
jpayne@68 128 final int[] kmersSeen=(lsuKmers==null && ssuKmers==null && trnaKmers==null && r5SKmers==null) ? null : new int[bases.length];
jpayne@68 129 for(int strand=0; strand<2; strand++){
jpayne@68 130 for(StatsContainer sc : pgm.rnaContainers){
jpayne@68 131 if(ProkObject.callType(sc.type)){
jpayne@68 132 ArrayList<Orf> list=makeRnasForStrand(name, bases, strand, sc, scores, (sc.kmerSet()==null ? null : kmersSeen), false, -1);//TODO: Make this loop through all RNA types
jpayne@68 133 if(strand==1 && list!=null){
jpayne@68 134 for(Orf orf : list){
jpayne@68 135 assert(orf.strand==strand);
jpayne@68 136 orf.flip();
jpayne@68 137 }
jpayne@68 138 }
jpayne@68 139 if(list!=null){array[strand].addAll(list);}
jpayne@68 140 }
jpayne@68 141 }
jpayne@68 142 Collections.sort(array[strand]);
jpayne@68 143 AminoAcid.reverseComplementBasesInPlace(bases);
jpayne@68 144 }
jpayne@68 145 return array;
jpayne@68 146 }
jpayne@68 147
jpayne@68 148 /** Designed for quickly calling a single SSU */
jpayne@68 149 public Orf makeRna(String name, byte[] bases, int type){
jpayne@68 150 final float[] scores=new float[bases.length];//TODO: Big and slow; make a FloatList?
jpayne@68 151 StatsContainer sc=pgm.allContainers[type];
jpayne@68 152 final int[] kmersSeen=(sc.kmerSet()==null ? null : new int[bases.length]);//TODO: IntList?
jpayne@68 153
jpayne@68 154 int strand=0;
jpayne@68 155 ArrayList<Orf> list=makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, true, -1);
jpayne@68 156 final Orf best1=pickBest(list);
jpayne@68 157 assert(best1==null || best1.start>=0 && best1.stop<bases.length) : bases.length+"\n"+best1;
jpayne@68 158 if(best1!=null && best1.orfScore>-999){return best1;}
jpayne@68 159
jpayne@68 160 strand++;
jpayne@68 161 AminoAcid.reverseComplementBasesInPlace(bases);
jpayne@68 162 list=makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, true, -1);
jpayne@68 163 AminoAcid.reverseComplementBasesInPlace(bases);
jpayne@68 164 if(strand==1 && list!=null){
jpayne@68 165 for(Orf orf : list){
jpayne@68 166 assert(orf.strand==strand);
jpayne@68 167 orf.flip();
jpayne@68 168 }
jpayne@68 169 }
jpayne@68 170 final Orf best2=pickBest(list);
jpayne@68 171 assert(best2==null || best2.start>=0 && best2.stop<bases.length) : bases.length+"\n"+best2;
jpayne@68 172 if(best2!=null && best2.orfScore>-999){return best2;}
jpayne@68 173 return best1!=null ? best1 : best2;
jpayne@68 174 }
jpayne@68 175
jpayne@68 176 final Orf pickBest(ArrayList<Orf> list){
jpayne@68 177 if(list==null){return null;}
jpayne@68 178 Orf best=null;
jpayne@68 179 for(Orf orf : list){
jpayne@68 180 if(best==null || orf.orfScore>best.orfScore){
jpayne@68 181 best=orf;
jpayne@68 182 }
jpayne@68 183 }
jpayne@68 184 return best;
jpayne@68 185 }
jpayne@68 186
jpayne@68 187 /**
jpayne@68 188 * Generates lists of all max-length non-overlapping Orfs per frame.
jpayne@68 189 * There IS overlap between frames.
jpayne@68 190 * All Orfs come out flipped to + orientation.
jpayne@68 191 * */
jpayne@68 192 static ArrayList<Orf>[] makeOrfs(String name, byte[] bases, int minlen){
jpayne@68 193 @SuppressWarnings("unchecked")
jpayne@68 194 ArrayList<Orf>[] array=new ArrayList[6];
jpayne@68 195 for(int strand=0; strand<2; strand++){
jpayne@68 196 for(int frame=0; frame<3; frame++){
jpayne@68 197 ArrayList<Orf> list=makeOrfsForFrame(name, bases, frame, strand, minlen);
jpayne@68 198 array[frame+3*strand]=list;
jpayne@68 199 if(strand==1 && list!=null){
jpayne@68 200 for(Orf orf : list){
jpayne@68 201 assert(orf.frame==frame);
jpayne@68 202 assert(orf.strand==strand);
jpayne@68 203 orf.flip();
jpayne@68 204 }
jpayne@68 205 }
jpayne@68 206 }
jpayne@68 207 AminoAcid.reverseComplementBasesInPlace(bases);
jpayne@68 208 }
jpayne@68 209 return array;
jpayne@68 210 }
jpayne@68 211
jpayne@68 212 /**
jpayne@68 213 * Dynamic programming phase.
jpayne@68 214 * @param frameLists
jpayne@68 215 * @param bases
jpayne@68 216 * @return
jpayne@68 217 */
jpayne@68 218 private ArrayList<Orf> findPath(ArrayList<Orf>[] frameLists, byte[] bases){
jpayne@68 219 ArrayList<Orf> all=new ArrayList<Orf>();
jpayne@68 220 for(ArrayList<Orf> list : frameLists){all.addAll(list);}
jpayne@68 221 if(all.isEmpty()){return all;}
jpayne@68 222 Collections.sort(all);
jpayne@68 223
jpayne@68 224 for(Orf orf : all){
jpayne@68 225 orf.pathScorePlus=-999999;
jpayne@68 226 orf.pathScoreMinus=-999999;
jpayne@68 227 }
jpayne@68 228
jpayne@68 229 int[] lastPositionScored=KillSwitch.allocInt1D(6);
jpayne@68 230 Arrays.fill(lastPositionScored, -1);
jpayne@68 231
jpayne@68 232 //Index of highest-scoring ORF in this frame, with prev on the plus strand
jpayne@68 233 int[] bestIndexPlus=KillSwitch.allocInt1D(6);
jpayne@68 234 //Index of highest-scoring ORF in this frame, with prev on the minus strand
jpayne@68 235 int[] bestIndexMinus=KillSwitch.allocInt1D(6);
jpayne@68 236 //Highest-scoring ORF in this frame, with prev on the plus strand
jpayne@68 237 Orf[] bestOrfPlus=new Orf[6];
jpayne@68 238 //Highest-scoring ORF in this frame, with prev on the minus strand
jpayne@68 239 Orf[] bestOrfMinus=new Orf[6];
jpayne@68 240
jpayne@68 241 int[][] bestIndex=new int[][] {bestIndexPlus, bestIndexMinus};
jpayne@68 242 Orf[][] bestOrf=new Orf[][] {bestOrfPlus, bestOrfMinus};
jpayne@68 243
jpayne@68 244 for(Orf orf : all){
jpayne@68 245 final int myListNum=3*orf.strand+orf.frame;
jpayne@68 246 calcPathScore(orf, frameLists, lastPositionScored, bestIndex);
jpayne@68 247 if(bestOrfPlus[myListNum]==null || orf.pathScorePlus>=bestOrfPlus[myListNum].pathScorePlus){
jpayne@68 248 bestOrfPlus[myListNum]=orf;
jpayne@68 249 bestIndexPlus[myListNum]=lastPositionScored[myListNum];
jpayne@68 250 assert(frameLists[myListNum].get(lastPositionScored[myListNum])==orf);
jpayne@68 251 }
jpayne@68 252 if(bestOrfMinus[myListNum]==null || orf.pathScoreMinus>=bestOrfMinus[myListNum].pathScoreMinus){
jpayne@68 253 bestOrfMinus[myListNum]=orf;
jpayne@68 254 bestIndexMinus[myListNum]=lastPositionScored[myListNum];
jpayne@68 255 assert(frameLists[myListNum].get(lastPositionScored[myListNum])==orf);
jpayne@68 256 }
jpayne@68 257 }
jpayne@68 258
jpayne@68 259 Orf best=bestOrf[0][0];
jpayne@68 260 for(Orf[] array : bestOrf){
jpayne@68 261 for(Orf orf : array){
jpayne@68 262 if(best==null || (orf!=null && orf.pathScore()>best.pathScore())){
jpayne@68 263 best=orf;
jpayne@68 264 }
jpayne@68 265 }
jpayne@68 266 }
jpayne@68 267 ArrayList<Orf> bestPath=new ArrayList<Orf>();
jpayne@68 268 for(Orf orf=best; orf!=null; orf=orf.prev()){
jpayne@68 269 bestPath.add(orf);
jpayne@68 270 if(orf.type==CDS){geneStartsOut++;}
jpayne@68 271 else if(orf.type==tRNA){tRNAOut++;}
jpayne@68 272 else if(orf.type==r16S){r16SOut++;}
jpayne@68 273 else if(orf.type==r23S){r23SOut++;}
jpayne@68 274 else if(orf.type==r5S){r5SOut++;}
jpayne@68 275 else if(orf.type==r18S){r18SOut++;}
jpayne@68 276 }
jpayne@68 277 Collections.sort(bestPath);
jpayne@68 278 return bestPath;
jpayne@68 279 }
jpayne@68 280
jpayne@68 281 /**
jpayne@68 282 * Calculate the best path to this ORF.
jpayne@68 283 * @param orf
jpayne@68 284 * @param frameLists
jpayne@68 285 * @param lastPositionScored
jpayne@68 286 * @param bestIndex
jpayne@68 287 */
jpayne@68 288 private void calcPathScore(Orf orf, ArrayList<Orf>[] frameLists, int[] lastPositionScored, int[][] bestIndex){
jpayne@68 289 final int myListNum=3*orf.strand+orf.frame;
jpayne@68 290
jpayne@68 291 // System.err.println("* "+orf);
jpayne@68 292 // System.err.println("* "+Arrays.toString(lastPositionScored));
jpayne@68 293 // System.err.println();
jpayne@68 294
jpayne@68 295 for(int listStrand=0; listStrand<2; listStrand++){
jpayne@68 296 for(int listFrame=0; listFrame<3; listFrame++){
jpayne@68 297 int listNum=listFrame+3*listStrand;
jpayne@68 298 ArrayList<Orf> list=frameLists[listNum];
jpayne@68 299 int lastPos=lastPositionScored[listNum];
jpayne@68 300 int bestPos=bestIndex[listStrand][listNum];
jpayne@68 301 if(listStrand==0){
jpayne@68 302 calcPathScorePlus(orf, list, listStrand, lastPos, bestPos);
jpayne@68 303 }else{
jpayne@68 304 calcPathScoreMinus(orf, list, listStrand, lastPos, bestPos);
jpayne@68 305 }
jpayne@68 306 }
jpayne@68 307 }
jpayne@68 308
jpayne@68 309 // System.err.println(myListNum+", "+Arrays.toString(lastPositionScored)+", "+frameLists[myListNum].size());
jpayne@68 310
jpayne@68 311 lastPositionScored[myListNum]++;
jpayne@68 312 assert(frameLists[myListNum].get(lastPositionScored[myListNum])==orf) : myListNum+"\n"+orf+"\n"+frameLists[myListNum].get(lastPositionScored[myListNum])+"\n"
jpayne@68 313 +Arrays.toString(lastPositionScored)+"\n"+frameLists[myListNum].get(lastPositionScored[myListNum]+1);
jpayne@68 314
jpayne@68 315 //These are sanity checks to make sure that the path did not break in the middle.
jpayne@68 316 //Safe to disable.
jpayne@68 317 // assert(orf.prevPlus!=null || orf.stop<100000);
jpayne@68 318 // assert(orf.prevMinus!=null || orf.stop<100000);
jpayne@68 319 // assert(orf.pathScore>-10) : orf.pathScore+"\n"+orf+"\n"+orf.prev+"\n";
jpayne@68 320 }
jpayne@68 321
jpayne@68 322 /**
jpayne@68 323 * Calculate the best path to this ORF from a plus-strand previous ORF.
jpayne@68 324 * @param orf
jpayne@68 325 * @param list
jpayne@68 326 * @param listStrand
jpayne@68 327 * @param lastPos
jpayne@68 328 * @param bestPos
jpayne@68 329 */
jpayne@68 330 private void calcPathScorePlus(final Orf orf, final ArrayList<Orf> list, final int listStrand, final int lastPos, final int bestPos){
jpayne@68 331 assert(listStrand==0);
jpayne@68 332 if(lastPos<0){
jpayne@68 333 if(orf.prevPlus==null){
jpayne@68 334 orf.pathScorePlus=orf.orfScore;
jpayne@68 335 orf.pathLengthPlus=1;
jpayne@68 336 }
jpayne@68 337 return;
jpayne@68 338 }
jpayne@68 339 if(list.isEmpty()){return;}
jpayne@68 340
jpayne@68 341 // System.err.println("\nExamining \t"+orf+"\nlastPos="+lastPos+", bestPos="+bestPos+", sameFrame="+sameFrame);
jpayne@68 342 boolean found=false;
jpayne@68 343 final boolean sameStrand=(orf.strand==listStrand);
jpayne@68 344 final int maxOverlap=(sameStrand ? maxOverlapSameStrand : maxOverlapOppositeStrand);
jpayne@68 345 for(int i=lastPos, min=Tools.max(0, bestPos-lookbackPlus); i>=min || (i>0 && !found); i--){
jpayne@68 346 Orf prev=list.get(i);
jpayne@68 347 assert(prev!=orf) : prev;
jpayne@68 348 // System.err.println("Comparing to \t"+prev);
jpayne@68 349 if(orf.isValidPrev(prev, maxOverlap)){
jpayne@68 350 int overlap=Tools.max(0, prev.stop-orf.start+1);
jpayne@68 351 float orfScore=overlap==0 ? orf.orfScore : orf.calcOrfScore(overlap);
jpayne@68 352
jpayne@68 353 final float prevScore=prev.pathScore();
jpayne@68 354 final int prevLength=prev.pathLength();
jpayne@68 355
jpayne@68 356 float pathScore;
jpayne@68 357 final int pathLength;
jpayne@68 358 if(sameStrand){
jpayne@68 359 pathLength=prevLength+1;
jpayne@68 360 pathScore=prevScore+orfScore;
jpayne@68 361 pathScore+=p0+p1*(Tools.mid(p5*(p2+pathLength), p6*(p3-pathLength), p4));
jpayne@68 362 }else{
jpayne@68 363 pathLength=1;
jpayne@68 364 pathScore=prev.pathScore()+orfScore;
jpayne@68 365 pathScore+=q1+Tools.mid(q2*prevLength, q3+q4*prevLength, q5);
jpayne@68 366 }
jpayne@68 367
jpayne@68 368 if(overlap<1 && prevScore>0){found=true;}
jpayne@68 369 if(pathScore>=orf.pathScorePlus){
jpayne@68 370 orf.pathScorePlus=pathScore;
jpayne@68 371 orf.prevPlus=prev;
jpayne@68 372 orf.pathLengthPlus=pathLength;
jpayne@68 373 // System.err.println("Set as best");
jpayne@68 374 }
jpayne@68 375 }
jpayne@68 376 if(found && prev.stop<maxOverlap-2000 && orf.prevPlus!=null){
jpayne@68 377 System.err.println("Breaking");
jpayne@68 378 break;
jpayne@68 379 }
jpayne@68 380 }
jpayne@68 381 }
jpayne@68 382
jpayne@68 383 /**
jpayne@68 384 * Calculate the best path to this ORF from a minus-strand previous ORF.
jpayne@68 385 * @param orf
jpayne@68 386 * @param list
jpayne@68 387 * @param listStrand
jpayne@68 388 * @param lastPos
jpayne@68 389 * @param bestPos
jpayne@68 390 */
jpayne@68 391 private void calcPathScoreMinus(final Orf orf, final ArrayList<Orf> list, final int listStrand, final int lastPos, final int bestPos){
jpayne@68 392 assert(listStrand==1);
jpayne@68 393 if(lastPos<0){
jpayne@68 394 if(orf.prevMinus==null){
jpayne@68 395 orf.pathScoreMinus=orf.orfScore;
jpayne@68 396 orf.pathLengthMinus=1;
jpayne@68 397 }
jpayne@68 398 return;
jpayne@68 399 }
jpayne@68 400 if(list.isEmpty()){return;}
jpayne@68 401
jpayne@68 402 // System.err.println("\nExamining \t"+orf+"\nlastPos="+lastPos+", bestPos="+bestPos+", sameFrame="+sameFrame);
jpayne@68 403 boolean found=false;
jpayne@68 404 final boolean sameStrand=(orf.strand==listStrand);
jpayne@68 405 final int maxOverlap=(sameStrand ? maxOverlapSameStrand : maxOverlapOppositeStrand);
jpayne@68 406 for(int i=lastPos, min=Tools.max(0, bestPos-lookbackMinus); i>=min || (i>0 && !found); i--){
jpayne@68 407 Orf prev=list.get(i);
jpayne@68 408 assert(prev!=orf) : prev;
jpayne@68 409 // System.err.println("Comparing to \t"+prev);
jpayne@68 410 if(orf.isValidPrev(prev, maxOverlap)){
jpayne@68 411 int overlap=Tools.max(0, prev.stop-orf.start+1);
jpayne@68 412 float orfScore=overlap==0 ? orf.orfScore : orf.calcOrfScore(overlap);
jpayne@68 413
jpayne@68 414 final float prevScore=prev.pathScore();
jpayne@68 415 final int prevLength=prev.pathLength();
jpayne@68 416
jpayne@68 417 float pathScore;
jpayne@68 418 final int pathLength;
jpayne@68 419 if(sameStrand){
jpayne@68 420 pathLength=prevLength+1;
jpayne@68 421 pathScore=prevScore+orfScore;
jpayne@68 422 pathScore+=p0+p1*(Tools.mid(p5*(p2+pathLength), p6*(p3-pathLength), p4));
jpayne@68 423 }else{
jpayne@68 424 pathLength=1;
jpayne@68 425 pathScore=prev.pathScore()+orfScore;
jpayne@68 426 pathScore+=q1+Tools.mid(q2*prevLength, q3+q4*prevLength, q5);
jpayne@68 427 }
jpayne@68 428 if(overlap<1 && prevScore>0){found=true;}
jpayne@68 429 if(pathScore>=orf.pathScoreMinus){
jpayne@68 430 orf.pathScoreMinus=pathScore;
jpayne@68 431 orf.prevMinus=prev;
jpayne@68 432 orf.pathLengthMinus=pathLength;
jpayne@68 433 // System.err.println("Set as best");
jpayne@68 434 }
jpayne@68 435 }
jpayne@68 436 if(found && prev.stop<maxOverlap-2000 && orf.prevMinus!=null){
jpayne@68 437 System.err.println("Breaking");
jpayne@68 438 break;
jpayne@68 439 }
jpayne@68 440 }
jpayne@68 441 }
jpayne@68 442
jpayne@68 443 /**
jpayne@68 444 * Generates a list of maximal-length Orfs only (non-overlapping).
jpayne@68 445 * All Orfs come out in native orientation (unflipped).
jpayne@68 446 * */
jpayne@68 447 static ArrayList<Orf> makeOrfsForFrame(String name, byte[] bases, int startFrame, int strand, int minlen){
jpayne@68 448 // assert(false) : "TODO";
jpayne@68 449 assert(minlen>=3);
jpayne@68 450 if(bases==null || bases.length<minlen){return null;}
jpayne@68 451 ArrayList<Orf> orfs=new ArrayList<Orf>();
jpayne@68 452 if(!ProkObject.callCDS){return orfs;}
jpayne@68 453 // int mask=63;
jpayne@68 454 int code=0;
jpayne@68 455 int start=-2;
jpayne@68 456 int frame=0;
jpayne@68 457 int pos=startFrame;
jpayne@68 458
jpayne@68 459
jpayne@68 460 for(; pos<bases.length; pos++){
jpayne@68 461 byte b=bases[pos];
jpayne@68 462 int x=AminoAcid.baseToNumber[b];
jpayne@68 463 // code=((code<<2)|x)&mask;
jpayne@68 464 code=((code<<2)|x);
jpayne@68 465 frame++;
jpayne@68 466 if(frame==3){
jpayne@68 467 frame=0;
jpayne@68 468 if(start>=0){
jpayne@68 469 if(GeneModel.isStopCodon(code) || code<0){//NOTE: This adds a stop codon wherever there are Ns.
jpayne@68 470 int len=pos-start+1;
jpayne@68 471 if(len>=minlen){
jpayne@68 472 Orf f=new Orf(name, start, pos, strand, startFrame, bases, true, CDS);
jpayne@68 473 orfs.add(f);
jpayne@68 474 }
jpayne@68 475 start=-1;
jpayne@68 476 }
jpayne@68 477 }else{
jpayne@68 478 if(start==-2 || (start<0 && GeneModel.isStartCodon(code))){
jpayne@68 479 start=pos-2;
jpayne@68 480 }
jpayne@68 481 }
jpayne@68 482 code=0;
jpayne@68 483 }
jpayne@68 484 }
jpayne@68 485
jpayne@68 486 //Add a stop codon at the sequence end.
jpayne@68 487 if(start>=0){
jpayne@68 488 pos--;
jpayne@68 489 while(frame!=3 && frame!=-1){
jpayne@68 490 pos--;
jpayne@68 491 frame--;
jpayne@68 492 }
jpayne@68 493 int len=pos-start+1;
jpayne@68 494 if(len>=minlen){
jpayne@68 495 assert(pos<bases.length) : start+", "+pos+", "+bases.length;
jpayne@68 496 Orf f=new Orf(name, start, pos, strand, startFrame, bases, true, CDS);
jpayne@68 497 orfs.add(f);
jpayne@68 498 }
jpayne@68 499 }
jpayne@68 500
jpayne@68 501 return orfs;
jpayne@68 502 }
jpayne@68 503
jpayne@68 504 /**
jpayne@68 505 * Generates a list of maximal-length RNAs (non-overlapping).
jpayne@68 506 * All RNAs come out in native orientation (unflipped).
jpayne@68 507 * */
jpayne@68 508 ArrayList<Orf> makeRnasForStrand(String name, byte[] bases, int strand, StatsContainer sc, float[] scores, int[] kmersSeen, boolean quitEarly, float bias){
jpayne@68 509 final int window=sc.lengthAvg;
jpayne@68 510 if(bases==null || bases.length*2<window){return null;}
jpayne@68 511 ArrayList<Orf> orfs=new ArrayList<Orf>(sc.type==tRNA ? 32 : 8);
jpayne@68 512
jpayne@68 513 final FrameStats inner=sc.inner;
jpayne@68 514 // final FrameStats start=sc.start;
jpayne@68 515 // final FrameStats stop=sc.stop;
jpayne@68 516
jpayne@68 517 final int k=inner.k;
jpayne@68 518 final int mask=inner.mask;
jpayne@68 519 // final float invLen=sc.invLengthAvg;
jpayne@68 520 final int halfWindow=window/2;
jpayne@68 521 final int maxWindow=(int)(window*1.5f);
jpayne@68 522 final int maxWindow2=(int)(window*2.5f);
jpayne@68 523 // final int slop=Tools.max(50, window/8);
jpayne@68 524 int len=0;
jpayne@68 525 int kmer=0;
jpayne@68 526 float currentScore=0;
jpayne@68 527 float currentScoreAbs=0;
jpayne@68 528 bias=(bias>-1 ? bias : biases[sc.type]);
jpayne@68 529 final float maxBias=biases[sc.type]*1.45f;
jpayne@68 530
jpayne@68 531 float thresh=cutoff1[sc.type];
jpayne@68 532 float prevScore=0;
jpayne@68 533 int lastStart=0;
jpayne@68 534
jpayne@68 535 float max=0;
jpayne@68 536 int maxPos=0;
jpayne@68 537
jpayne@68 538 for(int pos=0; pos<bases.length; pos++){
jpayne@68 539 final byte b=bases[pos];
jpayne@68 540 assert(b>=0 && b<128) : "Invalid base b="+((int)b)+"; pos="+pos+"\n"+new String(bases)+"\n";
jpayne@68 541 final int x=AminoAcid.baseToNumber[b];
jpayne@68 542 kmer=((kmer<<2)|x)&mask;
jpayne@68 543
jpayne@68 544 if(x>=0){
jpayne@68 545 len++;
jpayne@68 546 if(len>=k){
jpayne@68 547 float prob=inner.probs[0][kmer];
jpayne@68 548 float dif=prob-bias;//Prob above 1 is more likely than average
jpayne@68 549 currentScoreAbs+=prob;
jpayne@68 550 currentScore=Tools.max(0, currentScore+dif);
jpayne@68 551 }
jpayne@68 552
jpayne@68 553 if(currentScore>0){
jpayne@68 554 if(currentScore>max){
jpayne@68 555 max=currentScore;
jpayne@68 556 maxPos=pos;
jpayne@68 557 }
jpayne@68 558 if(prevScore<=0){
jpayne@68 559 lastStart=pos;
jpayne@68 560 }
jpayne@68 561 }else{
jpayne@68 562 int rnaLen=maxPos-lastStart;
jpayne@68 563 if(max>thresh && rnaLen>=halfWindow){
jpayne@68 564 if(rnaLen>maxWindow){
jpayne@68 565 if(bias<=maxBias){
jpayne@68 566 orfs=null;
jpayne@68 567 float biasMult=(rnaLen>8*window ? 1.2f : rnaLen>4*window ? 1.1f : 1.05f);
jpayne@68 568 return makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, quitEarly, bias*biasMult);
jpayne@68 569 }
jpayne@68 570 }
jpayne@68 571 if(rnaLen<=maxWindow2){
jpayne@68 572 Orf orf=new Orf(name, lastStart, maxPos, strand, 0, bases, false, sc.type);
jpayne@68 573 orfs.add(orf);
jpayne@68 574 orf.orfScore=max;
jpayne@68 575 if(verbose){
jpayne@68 576 final int start2=(strand==0 ? lastStart : bases.length-maxPos-1);
jpayne@68 577 final int stop2=(strand==0 ? maxPos : bases.length-lastStart-1);
jpayne@68 578 System.err.println("Made Orf "+start2+"\t"+stop2+"\t"+max);
jpayne@68 579 }
jpayne@68 580 }
jpayne@68 581 }
jpayne@68 582 max=0;
jpayne@68 583 lastStart=pos;
jpayne@68 584 }
jpayne@68 585 // System.err.println("i="+i+"\tscore="+score+"\tmax="+max+"\tmaxPos="+maxPos+"\tprevScore="+prevScore+"\tlastStart="+lastStart);
jpayne@68 586 prevScore=currentScore;
jpayne@68 587
jpayne@68 588 // if(pos>=223000 && pos<232000){
jpayne@68 589 //// System.err.println("i="+i+"\tscore="+score+"\tmax="+max+"\tmaxPos="+maxPos+"\tprevScore="+prevScore+"\tlastStart="+lastStart);
jpayne@68 590 // System.out.println(pos+"\t"+currentScore);
jpayne@68 591 // }
jpayne@68 592
jpayne@68 593 }else{
jpayne@68 594 len=0;
jpayne@68 595 kmer=0;
jpayne@68 596 }
jpayne@68 597 scores[pos]=currentScoreAbs;
jpayne@68 598 }
jpayne@68 599
jpayne@68 600 // System.err.println("size="+orfs.size()+", type="+Orf.typeStrings[sc.type]);
jpayne@68 601
jpayne@68 602
jpayne@68 603 {
jpayne@68 604 int rnaLen=maxPos-lastStart;
jpayne@68 605 if(max>thresh && rnaLen>=halfWindow){
jpayne@68 606 if(rnaLen>maxWindow){
jpayne@68 607 if(bias<=maxBias){
jpayne@68 608 orfs=null;
jpayne@68 609 float biasMult=(rnaLen>8*window ? 1.2f : rnaLen>4*window ? 1.1f : 1.05f);
jpayne@68 610 return makeRnasForStrand(name, bases, strand, sc, scores, kmersSeen, quitEarly, bias*biasMult);
jpayne@68 611 }
jpayne@68 612 }
jpayne@68 613 if(rnaLen<=maxWindow2){
jpayne@68 614 Orf orf=new Orf(name, lastStart, maxPos, strand, 0, bases, false, sc.type);
jpayne@68 615 orfs.add(orf);
jpayne@68 616 orf.orfScore=max;
jpayne@68 617 if(verbose){
jpayne@68 618 final int start2=(strand==0 ? lastStart : bases.length-maxPos-1);
jpayne@68 619 final int stop2=(strand==0 ? maxPos : bases.length-lastStart-1);
jpayne@68 620 System.err.println(start2+"\t"+stop2+"\t"+max);
jpayne@68 621 }
jpayne@68 622 }
jpayne@68 623 }
jpayne@68 624 }
jpayne@68 625
jpayne@68 626 if(kmersSeen!=null && orfs.size()>0 && sc.kmerSet()!=null){
jpayne@68 627 fillKmersSeen(bases, kmersSeen, sc.kmerSet(), sc.kLongLen());
jpayne@68 628 }
jpayne@68 629
jpayne@68 630 float cutoff=cutoff2[sc.type];
jpayne@68 631
jpayne@68 632 for(int i=0; i<orfs.size(); i++){
jpayne@68 633 Orf orf=orfs.get(i);
jpayne@68 634 // System.err.println(orf.orfScore);
jpayne@68 635 boolean good=refineRna(orf, bases, strand, sc, scores, kmersSeen);
jpayne@68 636 if(orf.orfScore<cutoff || !good){
jpayne@68 637 if(verbose){System.err.println("REJECT: "+orf.toStringFlipped());}
jpayne@68 638 orfs.set(i, null);
jpayne@68 639 }else{
jpayne@68 640 if(verbose){System.err.println("ACCEPT: "+orf.toStringFlipped());}
jpayne@68 641 if(quitEarly){
jpayne@68 642 orfs.clear();
jpayne@68 643 orfs.add(orf);
jpayne@68 644 return orfs;
jpayne@68 645 }
jpayne@68 646 }
jpayne@68 647 }
jpayne@68 648 Tools.condenseStrict(orfs);
jpayne@68 649
jpayne@68 650 // assert(false);
jpayne@68 651
jpayne@68 652 // for(int pos=0; pos<bases.length; pos++){
jpayne@68 653 // final byte b=bases[pos];
jpayne@68 654 // final int x=AminoAcid.baseToNumber[b];
jpayne@68 655 // kmer=((kmer<<2)|x)&mask;
jpayne@68 656 //
jpayne@68 657 // if(x>=0){
jpayne@68 658 // len++;
jpayne@68 659 // if(len>=k){
jpayne@68 660 // float prob=inner.probs[0][kmer];
jpayne@68 661 // float dif=prob-1.2f;//Prob above 1 is more likely than average
jpayne@68 662 // currentScore=Tools.max(0, currentScore+dif);
jpayne@68 663 // if(currentScore>0){
jpayne@68 664 // currentStreak++;
jpayne@68 665 // }else{
jpayne@68 666 // currentStreak=0;
jpayne@68 667 // }
jpayne@68 668 // if(currentScore>200 && currentStreak>1500 && currentStreak<1700){
jpayne@68 669 // Orf orf=new Orf(name, pos-currentStreak-1, pos, strand, 0, bases, false);
jpayne@68 670 // orfs.add(orf);
jpayne@68 671 // orf.orfScore=currentScore;
jpayne@68 672 // orf.startScore=start.scorePoint(orf.start, bases);
jpayne@68 673 // orf.stopScore=stop.scorePoint(orf.stop, bases);
jpayne@68 674 // currentStreak=0;
jpayne@68 675 // currentScore=0;
jpayne@68 676 // }
jpayne@68 677 // }
jpayne@68 678 // }else{
jpayne@68 679 // len=0;
jpayne@68 680 // kmer=0;
jpayne@68 681 // }
jpayne@68 682 // }
jpayne@68 683
jpayne@68 684 return orfs;
jpayne@68 685 }
jpayne@68 686
jpayne@68 687 void fillKmersSeen(byte[] bases, int[] kmersSeen, LongHashSet set, int k){
jpayne@68 688 final long mask=~((-1L)<<(2*k));
jpayne@68 689 long kmer=0;
jpayne@68 690 int len=0;
jpayne@68 691 int seen=0;
jpayne@68 692 for(int i=0; i<bases.length; i++){
jpayne@68 693 final byte b=bases[i];
jpayne@68 694 final int num=AminoAcid.baseToNumber[b];
jpayne@68 695 if(num>=0){
jpayne@68 696 len++;
jpayne@68 697 kmer=((kmer<<2)|num)&mask;
jpayne@68 698 if(len>=k && set.contains(kmer)){seen++;}
jpayne@68 699 }else{
jpayne@68 700 len=0;
jpayne@68 701 }
jpayne@68 702 kmersSeen[i]=seen;
jpayne@68 703 }
jpayne@68 704 }
jpayne@68 705
jpayne@68 706 boolean refineRna(Orf orf, byte[] bases, int strand, StatsContainer sc, float[] scores, int[] kmersSeen){
jpayne@68 707 if(orf==null){return false;}
jpayne@68 708 if(verbose){System.err.println("REFINE: "+orf.toStringFlipped());}
jpayne@68 709 final int window=sc.lengthAvg;
jpayne@68 710 // final int halfWindow=window/2;
jpayne@68 711 // final int maxWindow=(int)(window*1.5f);
jpayne@68 712 final int slop=Tools.max(60, 10+window/8);
jpayne@68 713 final float invWindow=sc.invLengthAvg;
jpayne@68 714 final float oldScore=orf.orfScore;
jpayne@68 715 IntList starts=new IntList();
jpayne@68 716 IntList stops=new IntList();
jpayne@68 717 FloatList startScores=new FloatList();
jpayne@68 718 FloatList stopScores=new FloatList();
jpayne@68 719
jpayne@68 720 final int leftmost=Tools.max(0, orf.start-slop);
jpayne@68 721 final int rightmost=Tools.min(bases.length-1, orf.stop+slop);
jpayne@68 722 if(kmersSeen!=null){
jpayne@68 723 if(kmersSeen[leftmost]>=kmersSeen[rightmost]){
jpayne@68 724 // System.err.println("Bad: "+oldScore);
jpayne@68 725 orf.orfScore=-999;
jpayne@68 726 return false;
jpayne@68 727 }else{
jpayne@68 728 // System.err.println("Good: "+oldScore);
jpayne@68 729 }
jpayne@68 730 }
jpayne@68 731 if(verbose){System.err.println("REFINE2");}
jpayne@68 732
jpayne@68 733 {//starts
jpayne@68 734 final int left=leftmost;
jpayne@68 735 final int right=Tools.min(bases.length-1, orf.stop+slop-window);
jpayne@68 736 final float thresh=cutoff3[sc.type];
jpayne@68 737 fillPoints(left, right, bases, sc.start, thresh, starts, startScores);
jpayne@68 738 }
jpayne@68 739 if(verbose){System.err.println("starts: "+starts.size);}
jpayne@68 740 // if((orf.start+"").startsWith("146") || true){System.err.println(starts);}
jpayne@68 741 if(verbose){System.err.println(startScores);}
jpayne@68 742
jpayne@68 743 {//stops
jpayne@68 744 final int left=Tools.max(0, orf.start-slop+window);
jpayne@68 745 final int right=rightmost;
jpayne@68 746 final float thresh=cutoff4[sc.type];
jpayne@68 747 fillPoints(left, right, bases, sc.stop, thresh, stops, stopScores);
jpayne@68 748 }
jpayne@68 749 if(verbose){System.err.println("stops: "+stops.size);}
jpayne@68 750 // if((orf.start+"").startsWith("146") || true){System.err.println(stops);}
jpayne@68 751 if(verbose){System.err.println(stopScores);}
jpayne@68 752
jpayne@68 753 final float innerThresh=cutoff5[sc.type];
jpayne@68 754
jpayne@68 755 final int minlen=Tools.max(window/2, window-slop);
jpayne@68 756 final int maxlen=window+slop;
jpayne@68 757
jpayne@68 758 orf.orfScore=0;
jpayne@68 759 int lastStop=0;
jpayne@68 760 for(int i=0; i<starts.size; i++){
jpayne@68 761 final int start=starts.get(i);
jpayne@68 762 final int startSeen=kmersSeen==null ? 0 : kmersSeen[start];
jpayne@68 763 final float startScore=startScores.get(i);
jpayne@68 764 // System.err.println("start="+start);
jpayne@68 765 for(int j=lastStop; j<stops.size; j++){
jpayne@68 766 final int stop=stops.get(j);
jpayne@68 767 final int rnalen=stop-start+1;
jpayne@68 768 // System.err.println("stop="+stop);
jpayne@68 769 if(rnalen<minlen){
jpayne@68 770 lastStop=j+1;
jpayne@68 771 }else if(rnalen>maxlen){
jpayne@68 772 // System.err.println("broke");
jpayne@68 773 break;
jpayne@68 774 }else if(kmersSeen!=null && kmersSeen[stop]<=startSeen){//TODO: Test this
jpayne@68 775 // //skip
jpayne@68 776 }else{
jpayne@68 777 final int len=stop-start+1;
jpayne@68 778 final float stopScore=stopScores.get(j);
jpayne@68 779 final float innerScore=(scores[stop]-scores[start])/len;
jpayne@68 780 // System.err.println("innerScore="+innerScore);
jpayne@68 781 assert(rnalen<=maxlen) : "start="+start+", stop="+stop+", rnalen="+rnalen+", minlen="+minlen+", maxlen="+maxlen;
jpayne@68 782 if(innerScore>=innerThresh){
jpayne@68 783 final float a=Tools.max(startScore+0.25f, 0.25f);
jpayne@68 784 final float b=Tools.max(stopScore+0.25f, 0.25f);
jpayne@68 785 final float c=innerScore*innerScore;
jpayne@68 786 final float d=(window-(2.4f*Tools.absdif(len, window)))*invWindow;
jpayne@68 787 final float score=c*d*(float)Math.sqrt(a*b);
jpayne@68 788 if(verbose && score>=0.2f*orf.orfScore){
jpayne@68 789 final int start2=(strand==0 ? start : bases.length-stop-1);
jpayne@68 790 final int stop2=(strand==0 ? stop : bases.length-start-1);
jpayne@68 791 System.err.println(start2+"-"+stop2+", "+startScore+", "+stopScore+", "+innerScore+", "+(score*scoreMult[sc.type])+", "+oldScore);
jpayne@68 792 }
jpayne@68 793 if(score>orf.orfScore){
jpayne@68 794 orf.start=start;
jpayne@68 795 orf.stop=stop;
jpayne@68 796 orf.kmerScore=innerScore*len;
jpayne@68 797 // if(verbose){
jpayne@68 798 // final int start2=(strand==0 ? start : bases.length-stop-1);
jpayne@68 799 // final int stop2=(strand==0 ? stop : bases.length-start-1);
jpayne@68 800 // System.err.println(start2+"-"+stop2+", "+startScore+", "+stopScore+", "+innerScore+", "+(score*scoreMult[sc.type])+", "+oldScore);
jpayne@68 801 // }
jpayne@68 802 orf.startScore=startScore;
jpayne@68 803 orf.stopScore=stopScore;
jpayne@68 804 orf.orfScore=score;
jpayne@68 805 }
jpayne@68 806 }else{
jpayne@68 807 assert(true);
jpayne@68 808 }
jpayne@68 809 }
jpayne@68 810 }
jpayne@68 811 }
jpayne@68 812 orf.orfScore*=scoreMult[sc.type];
jpayne@68 813
jpayne@68 814 if(starts.isEmpty() || stops.isEmpty()){
jpayne@68 815 if(verbose){System.err.println("No starts or stops.");}
jpayne@68 816 orf.orfScore=Tools.min(-999, orf.orfScore-9999);
jpayne@68 817 return false;
jpayne@68 818 }
jpayne@68 819 return refineByAlignment(orf, bases, strand, sc);//Returns true if no consensus is present
jpayne@68 820 }
jpayne@68 821
jpayne@68 822 boolean refineByAlignment(Orf orf, byte[] bases, int strand, StatsContainer sc){
jpayne@68 823 if(verbose){System.err.println("ALIGN");}
jpayne@68 824 Read[] consensus=ProkObject.consensusReads(sc.type);
jpayne@68 825 if(consensus==null || consensus.length==0){return true;}
jpayne@68 826 boolean refined=false;
jpayne@68 827 // System.err.println("Initial: "+orf.start+", "+orf.stop);
jpayne@68 828 for(Read r : consensus){
jpayne@68 829 // refined=refineByAlignment(orf, bases, strand, sc, r.bases, 15, 15, 2);
jpayne@68 830 refined=refineByAlignment(orf, bases, strand, sc, r.bases, sc.startSlop(), sc.stopSlop(), 2);
jpayne@68 831 if(refined || sc.type==r18S || true){break;}
jpayne@68 832 }
jpayne@68 833 if(refined){
jpayne@68 834 if(verbose){System.err.println("Aligned to: "+orf.start+", "+orf.stop);}
jpayne@68 835 }else{
jpayne@68 836 if(verbose){System.err.println("Alignment failed.");}
jpayne@68 837 orf.orfScore=Tools.min(-999, orf.orfScore-9999);
jpayne@68 838 }
jpayne@68 839 return refined;
jpayne@68 840 }
jpayne@68 841
jpayne@68 842 boolean refineByAlignment(Orf orf, byte[] bases, int strand, StatsContainer sc, byte[] consensus,
jpayne@68 843 final int startSlop, final int stopSlop, int recurLimit){
jpayne@68 844 final int start0=orf.start;
jpayne@68 845 final int stop0=orf.stop;
jpayne@68 846
jpayne@68 847 assert(start0>=0 && start0<bases.length) : start0+", "+stop0;
jpayne@68 848 assert(stop0>=0 && stop0<bases.length) : start0+", "+stop0;
jpayne@68 849
jpayne@68 850 final float minID=sc.minIdentity();
jpayne@68 851 final int padding=Tools.min(alignmentPadding, 30+sc.lengthAvg/4);
jpayne@68 852 final int a=Tools.max(0, orf.start-padding);
jpayne@68 853 final int b=Tools.min(bases.length-1, orf.stop+padding);
jpayne@68 854 final int reflen=b-a+1;
jpayne@68 855 if(reflen>10*sc.lengthAvg && reflen>20000){
jpayne@68 856 System.err.println("Skipped reflen "+reflen+"/"+sc.lengthAvg+" for "
jpayne@68 857 + "seqlen="+bases.length+", orf="+orf.toString());
jpayne@68 858 assert(false);
jpayne@68 859 //TODO: Possibly change return to -1, 0, 1 ("can't align")
jpayne@68 860 //Should be a limit on window size...
jpayne@68 861 //Also consider shrinking matrix after jumbo alignments
jpayne@68 862 return false;
jpayne@68 863 }
jpayne@68 864 assert(a>=0 && b<bases.length) : a+", "+b;
jpayne@68 865 SingleStateAlignerFlat2 ssa=getSSA();
jpayne@68 866 final int minScore=ssa.minScoreByIdentity(consensus.length, minID);
jpayne@68 867 int[] max=ssa.fillUnlimited(consensus, bases, a, b, minScore);
jpayne@68 868 if(max==null){return false;}
jpayne@68 869
jpayne@68 870 final int rows=max[0];
jpayne@68 871 final int maxCol=max[1];
jpayne@68 872 final int maxState=max[2];
jpayne@68 873 // final int maxScore=max[3];
jpayne@68 874 // final int maxStart=max[4];
jpayne@68 875
jpayne@68 876 //returns {score, bestRefStart, bestRefStop}
jpayne@68 877 //padded: {score, bestRefStart, bestRefStop, padLeft, padRight};
jpayne@68 878 final int[] score=ssa.score(consensus, bases, a, b, rows, maxCol, maxState);
jpayne@68 879 final int rstart=Tools.max(score[1], 0);
jpayne@68 880 final int rstop=Tools.min(score[2], bases.length-1);
jpayne@68 881 final float id=ssa.tracebackIdentity(consensus, bases, a, b, rows, maxCol, maxState, null);
jpayne@68 882
jpayne@68 883 assert(rstart>=0 && rstart<bases.length) : "bases="+bases.length+
jpayne@68 884 ", rstart="+rstart+", rstop="+rstop+", a="+a+", b="+b+"\n"+"score="+Arrays.toString(score)+", id="+id;
jpayne@68 885 assert(rstop>=0 && rstop<bases.length) : rstart+", "+rstop;
jpayne@68 886
jpayne@68 887 if(score.length>3 && recurLimit>0){
jpayne@68 888 final int padLeft=score[3];
jpayne@68 889 final int padRight=score[4];
jpayne@68 890 //TODO: This takes extra memory. It may be better to cap the width or ignore start0/stop0 here.
jpayne@68 891 int rstart2=Tools.max(0, Tools.min(start0, rstart)-10);
jpayne@68 892 int rstop2=Tools.min(bases.length-1, Tools.max(stop0, rstop)+10);
jpayne@68 893 assert(rstart2>=0 && rstart2<bases.length) : rstart2+", "+rstop2;
jpayne@68 894 assert(rstop2>=0 && rstop2<bases.length) : rstart2+", "+rstop2;
jpayne@68 895 orf.start=rstart2;
jpayne@68 896 orf.stop=rstop2;
jpayne@68 897 if(orf.start<a || orf.stop>b){
jpayne@68 898 boolean ret=refineByAlignment(orf, bases, strand, sc, consensus, 0, 0, recurLimit-1);
jpayne@68 899 if(ret){return true;}
jpayne@68 900 }
jpayne@68 901 orf.start=start0;
jpayne@68 902 orf.stop=stop0;
jpayne@68 903 // assert(false) : "Don't traceback after recur.";
jpayne@68 904 }
jpayne@68 905 if(verbose){
jpayne@68 906 System.err.println("Identity: "+id);
jpayne@68 907 }
jpayne@68 908 // assert(score.length==3) : "TODO: Handle padding requests.";
jpayne@68 909
jpayne@68 910 // System.err.println("Identity: "+String.format("%.2f", 100*id)+"; location: "+rstart+"-"+rstop);
jpayne@68 911 if(id<minID){return false;}
jpayne@68 912
jpayne@68 913
jpayne@68 914 if(Tools.absdif(rstart, start0)>startSlop){orf.start=rstart;}
jpayne@68 915 if(Tools.absdif(rstop, stop0)>stopSlop){orf.stop=rstop;}
jpayne@68 916 return true;
jpayne@68 917 }
jpayne@68 918
jpayne@68 919 void fillPoints(final int left, final int right, final byte[] bases, final FrameStats fs, float thresh, final IntList points, final FloatList scores){
jpayne@68 920 points.clear();
jpayne@68 921 scores.clear();
jpayne@68 922 final float minThresh=thresh;//thresh*0.05f;
jpayne@68 923 // System.err.println(left+", "+right+", "+thresh+", "+minThresh);
jpayne@68 924 while(points.size()<8 && thresh>=minThresh){
jpayne@68 925 // System.err.println("Running with thresh="+thresh);
jpayne@68 926 points.clear();
jpayne@68 927 scores.clear();
jpayne@68 928 for(int i=left; i<right; i++){
jpayne@68 929 float score=fs.scorePoint(i, bases);
jpayne@68 930 // System.err.println(i+", "+score);
jpayne@68 931 if(score>=thresh){
jpayne@68 932 points.add(i);
jpayne@68 933 scores.add(score);
jpayne@68 934 }
jpayne@68 935 }
jpayne@68 936 thresh=thresh*0.75f;
jpayne@68 937 }
jpayne@68 938 }
jpayne@68 939
jpayne@68 940 /**
jpayne@68 941 * Generate all possible genes from each Orf, and return them in a new set of lists.
jpayne@68 942 * @param frameLists
jpayne@68 943 * @param bases
jpayne@68 944 * @return Lists of orfs.
jpayne@68 945 */
jpayne@68 946 private ArrayList<Orf>[] breakOrfs(ArrayList<Orf>[] frameLists, byte[] bases){
jpayne@68 947
jpayne@68 948 @SuppressWarnings("unchecked")
jpayne@68 949 ArrayList<Orf>[] brokenLists=new ArrayList[6];
jpayne@68 950 for(int strand=0; strand<2; strand++){
jpayne@68 951 for(int frame=0; frame<3; frame++){
jpayne@68 952 int fnum=frame+3*strand;
jpayne@68 953 ArrayList<Orf> longest=frameLists[fnum]; //Longest Orf per stop
jpayne@68 954 ArrayList<Orf> broken=new ArrayList<Orf>(); //All high scoring Orfs, including multiple per stop, with different starts
jpayne@68 955 if(longest!=null) {
jpayne@68 956 for(Orf orf : longest){
jpayne@68 957 assert(orf.frame==frame);
jpayne@68 958 assert(orf.strand==strand);
jpayne@68 959 ArrayList<Orf> temp=breakOrf(orf, bases);
jpayne@68 960 if(temp!=null){
jpayne@68 961 broken.addAll(temp);
jpayne@68 962 }
jpayne@68 963 }
jpayne@68 964 }
jpayne@68 965 Collections.sort(broken);
jpayne@68 966 brokenLists[fnum]=broken;
jpayne@68 967 }
jpayne@68 968 //Reverse-complement bases after processing each strand
jpayne@68 969 AminoAcid.reverseComplementBasesInPlace(bases);
jpayne@68 970 }
jpayne@68 971 return brokenLists;
jpayne@68 972 }
jpayne@68 973
jpayne@68 974 /**
jpayne@68 975 * Generate an Orf for each possible start codon.
jpayne@68 976 * Retain only the high-scoring ones.
jpayne@68 977 * @param longest Longest open reading frame for a given stop.
jpayne@68 978 * @param bases Bases, oriented for this Orf.
jpayne@68 979 * @return List of Orfs.
jpayne@68 980 */
jpayne@68 981 private ArrayList<Orf> breakOrf(Orf longest, byte[] bases){
jpayne@68 982 assert(longest.start<longest.stop);
jpayne@68 983 final int flipped=longest.flipped();
jpayne@68 984 if(flipped==1){longest.flip();}//Now the orf is aligned to its native strand
jpayne@68 985
jpayne@68 986 geneStopsMade++;
jpayne@68 987
jpayne@68 988 final FrameStats innerStats=pgm.statsCDS.inner;
jpayne@68 989 final FrameStats startStats=pgm.statsCDS.start;
jpayne@68 990 final FrameStats stopStats=pgm.statsCDS.stop;
jpayne@68 991
jpayne@68 992 final String name=longest.scafName;
jpayne@68 993 final int start=longest.start;
jpayne@68 994 final int stop=longest.stop;
jpayne@68 995 final int strand=longest.strand;
jpayne@68 996 final int max=Tools.min(longest.stop-2, longest.stop-minLen+4);
jpayne@68 997
jpayne@68 998 assert(pgm!=null) : pgm;
jpayne@68 999 assert(pgm.statsCDS!=null) : pgm;
jpayne@68 1000 assert(pgm.statsCDS.inner!=null) : pgm.statsCDS;
jpayne@68 1001 assert(pgm.statsCDS.inner.k>0) : pgm.statsCDS.inner;
jpayne@68 1002
jpayne@68 1003 final int k=innerStats.k;
jpayne@68 1004 final int mask=~((-1)<<(2*k));
jpayne@68 1005
jpayne@68 1006 final float stopScore=stopStats.scorePoint(longest.stop, bases);
jpayne@68 1007 stCds.geneStopScoreSum+=stopScore;
jpayne@68 1008 stCds.geneStopScoreCount++;
jpayne@68 1009
jpayne@68 1010 ArrayList<Orf> broken=new ArrayList<Orf>();
jpayne@68 1011 int created=0;
jpayne@68 1012
jpayne@68 1013 int codon=0;
jpayne@68 1014 int kmer=0;
jpayne@68 1015 int len=0;
jpayne@68 1016 int numKmers=0;
jpayne@68 1017 float currentScore=0;
jpayne@68 1018 for(int pos=start, currentFrame=0; pos<=stop; pos++){
jpayne@68 1019 final byte b=bases[pos];
jpayne@68 1020 final int x=AminoAcid.baseToNumber[b];
jpayne@68 1021 codon=((codon<<2)|x);
jpayne@68 1022 kmer=((kmer<<2)|x)&mask;
jpayne@68 1023
jpayne@68 1024 if(x>=0){
jpayne@68 1025 len++;
jpayne@68 1026 if(len>=k){
jpayne@68 1027 float prob=innerStats.probs[currentFrame][kmer];
jpayne@68 1028 float dif=prob-0.99f;//Prob above 1 is more likely than average
jpayne@68 1029 currentScore+=dif;
jpayne@68 1030 // outstream.println("pos="+pos+"\tdif="+String.format(Locale.ROOT, "%.4f", dif)+",\tscore="+String.format(Locale.ROOT, "%.4f", currentScore)+
jpayne@68 1031 // "\tasStart="+String.format(Locale.ROOT, "%.4f", pgm.calcStartScore(pos-2, bases))+"\tasStop="+String.format(Locale.ROOT, "%.4f", stopStats.scorePoint(pos, bases))+
jpayne@68 1032 // "\tcodon="+AminoAcid.kmerToString(kmer, 3)+" frame="+(currentFrame));
jpayne@68 1033 }else{
jpayne@68 1034 // outstream.println("pos="+pos+"\tdif="+String.format(Locale.ROOT, "%.4f", 0.0)+",\tscore="+String.format(Locale.ROOT, "%.4f", 0.0)+
jpayne@68 1035 // "\tasStart="+String.format(Locale.ROOT, "%.4f", pgm.calcStartScore(pos-2, bases))+"\tasStop="+String.format(Locale.ROOT, "%.4f", stopStats.scorePoint(pos, bases))+
jpayne@68 1036 // "\tcodon="+AminoAcid.kmerToString(kmer, 3)+" frame="+(currentFrame));
jpayne@68 1037 }
jpayne@68 1038 }else{
jpayne@68 1039 len=0;
jpayne@68 1040 kmer=0;
jpayne@68 1041 }
jpayne@68 1042
jpayne@68 1043 currentFrame++;
jpayne@68 1044 // outstream.println("pos="+pos+", codon="+AminoAcid.kmerToString(kmer, 3)+", frame="+currentFrame+", start="+start+", isStartCodon="+pgm.isStartCodon(codon));
jpayne@68 1045 if(currentFrame>2){
jpayne@68 1046 currentFrame=0;
jpayne@68 1047 if(pos<max && created<breakLimit && (pos==start+2 || pgm.isStartCodon(codon))){
jpayne@68 1048 // outstream.println(x);
jpayne@68 1049 int glen=stop-pos+3;
jpayne@68 1050 assert(glen>=minLen) : "glen="+glen+", minLen="+minLen+", pos="+pos+", max="+max+", start="+start;
jpayne@68 1051
jpayne@68 1052 int oStart=pos-2;
jpayne@68 1053 float startScore=startStats.scorePoint(oStart, bases);
jpayne@68 1054
jpayne@68 1055 stCds.geneStartScoreSum+=startScore;
jpayne@68 1056 stCds.geneStartScoreCount++;
jpayne@68 1057
jpayne@68 1058 stCds.lengthSum+=(stop-(pos-2)+1);
jpayne@68 1059 stCds.lengthCount++;
jpayne@68 1060
jpayne@68 1061 if((startScore>=minStartScore || pos<6) /* && stopScore>=minStopScore /*|| broken.isEmpty()*/){
jpayne@68 1062 Orf orf=new Orf(name, pos-2, stop, strand, longest.frame, bases, false, longest.type);
jpayne@68 1063
jpayne@68 1064 geneStartsMade++;
jpayne@68 1065 orf.kmerScore=currentScore;
jpayne@68 1066 orf.startScore=startScore;
jpayne@68 1067 orf.stopScore=stopScore;
jpayne@68 1068
jpayne@68 1069 assert(orf.frame==longest.frame);
jpayne@68 1070 assert(orf.strand==strand);
jpayne@68 1071
jpayne@68 1072 if(strand==1){orf.flip();}
jpayne@68 1073 broken.add(orf);
jpayne@68 1074 created++;
jpayne@68 1075 }
jpayne@68 1076 }
jpayne@68 1077 codon=0;
jpayne@68 1078 }
jpayne@68 1079 }
jpayne@68 1080
jpayne@68 1081 final int size=broken.size();
jpayne@68 1082 final int sizeCutoff=Tools.max(5, size/2);
jpayne@68 1083 if(size<1){return broken;}
jpayne@68 1084 Orf best=broken.get(0);
jpayne@68 1085 Orf bestStart=broken.get(0);
jpayne@68 1086 for(int i=0; i<size; i++){
jpayne@68 1087 Orf orf=broken.get(i);
jpayne@68 1088
jpayne@68 1089 //This fixes scores because they were generated together, from start to stop, to make this O(N) instead of O(N^2).
jpayne@68 1090 orf.kmerScore=currentScore-orf.kmerScore;
jpayne@68 1091 orf.orfScore=orf.calcOrfScore();
jpayne@68 1092 if(orf.orfScore>=best.orfScore){best=orf;}
jpayne@68 1093 if(orf.startScore>=bestStart.startScore){bestStart=orf;}
jpayne@68 1094
jpayne@68 1095 stCds.geneInnerScoreSum+=orf.averageKmerScore();
jpayne@68 1096 stCds.geneInnerScoreCount++;
jpayne@68 1097 }
jpayne@68 1098
jpayne@68 1099 //Sort to by score descending to eliminate low-scoring copies.
jpayne@68 1100 if(broken.size()>1){Collections.sort(broken, Feature.featureComparatorScore);}
jpayne@68 1101
jpayne@68 1102 int removed=0;
jpayne@68 1103 for(int i=0; i<size; i++){//Fix scores because they were generated together, from start to stop, to make this O(N) instead of O(N^2).
jpayne@68 1104 Orf orf=broken.get(i);
jpayne@68 1105 if(orf.averageKmerScore()<minInnerScore || orf.orfScore<minOrfScore ||
jpayne@68 1106 orf.orfScore/orf.length()<minAvgScore ||
jpayne@68 1107 orf.orfScore<0.5f*best.orfScore-10 || (orf.startScore<bestStart.startScore-0.55f && orf.kmerScore<best.kmerScore*1.1f && orf!=best)){
jpayne@68 1108 broken.set(i, null);
jpayne@68 1109 removed++;
jpayne@68 1110 }else if(i>sizeCutoff){
jpayne@68 1111 broken.set(i, null);
jpayne@68 1112 removed++;
jpayne@68 1113 }
jpayne@68 1114 }
jpayne@68 1115 if(removed>0){
jpayne@68 1116 Tools.condenseStrict(broken);
jpayne@68 1117 }
jpayne@68 1118
jpayne@68 1119 geneStartsRetained+=broken.size();
jpayne@68 1120 geneStopsRetained+=(broken.size()>0 ? 1 : 0);
jpayne@68 1121
jpayne@68 1122 if(flipped==1){longest.flip();}
jpayne@68 1123 return broken;
jpayne@68 1124 }
jpayne@68 1125
jpayne@68 1126 /*--------------------------------------------------------------*/
jpayne@68 1127 /*---------------- Fields ----------------*/
jpayne@68 1128 /*--------------------------------------------------------------*/
jpayne@68 1129
jpayne@68 1130 /**
jpayne@68 1131 * Current gene model.
jpayne@68 1132 * TODO: Dynamically swap this as needed for contigs with varying GC.
jpayne@68 1133 */
jpayne@68 1134 GeneModel pgm;
jpayne@68 1135
jpayne@68 1136 //Gene-calling cutoffs
jpayne@68 1137 final int minLen;
jpayne@68 1138 final int maxOverlapSameStrand;
jpayne@68 1139 final int maxOverlapOppositeStrand;
jpayne@68 1140 final float minStartScore;
jpayne@68 1141 final float minStopScore;
jpayne@68 1142 final float minInnerScore;
jpayne@68 1143 final float minOrfScore;
jpayne@68 1144 final float minAvgScore;
jpayne@68 1145
jpayne@68 1146 // static float[] cutoff1=new float[] {0, 40f, 200f, 150f, 45f};
jpayne@68 1147 // static float[] cutoff2=new float[] {0, 44f, 300f, 150f, 60f};
jpayne@68 1148 // static float[] cutoff3=new float[] {0, 1.7f, 1.5f, 1.4f, 1.8f};
jpayne@68 1149 // static float[] cutoff4=new float[] {0, 1.6f, 1.5f, 0.6f, 1.1f};
jpayne@68 1150 // static float[] cutoff5=new float[] {0, 1.0f, 1.0f, 1.0f, 1.55f};//inner score
jpayne@68 1151 // static float[] scoreMult=new float[] {1f, 8f, 200f, 175f, 12f};
jpayne@68 1152 // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.15f};
jpayne@68 1153
jpayne@68 1154 // static float[] cutoff1=new float[] {0, 40f, 140f, 150f, 45f};
jpayne@68 1155 // static float[] cutoff2=new float[] {0, 44f, 300f, 150f, 45f};
jpayne@68 1156 // static float[] cutoff3=new float[] {0, 1.7f, 1.1f, 1.3f, 1.8f};
jpayne@68 1157 // static float[] cutoff4=new float[] {0, 1.6f, 1.3f, 0.5f, 1.1f};
jpayne@68 1158 // static float[] cutoff5=new float[] {0, 1.0f, 1.0f, 1.0f, 1.3f};//inner score
jpayne@68 1159 // static float[] scoreMult=new float[] {1f, 8f, 200f, 175f, 12f};
jpayne@68 1160 // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.15f};
jpayne@68 1161
jpayne@68 1162 //for k=4,2,2
jpayne@68 1163 // static float[] cutoff1=new float[] {0, 40f, 140f, 150f, 40f};
jpayne@68 1164 // static float[] cutoff2=new float[] {0, 44f, 300f, 150f, 45f};
jpayne@68 1165 // static float[] cutoff3=new float[] {0, 1.7f, 1.1f, 1.1f, 1.8f};
jpayne@68 1166 // static float[] cutoff4=new float[] {0, 1.6f, 1.3f, 0.45f, 1.1f};
jpayne@68 1167 // static float[] cutoff5=new float[] {0, 1.0f, 1.0f, 1.0f, 1.3f};//inner score
jpayne@68 1168 // static float[] scoreMult=new float[] {1f, 8f, 200f, 175f, 12f};
jpayne@68 1169 // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.22f};
jpayne@68 1170
jpayne@68 1171 // //for k=5,3,3
jpayne@68 1172 // static float[] cutoff1=new float[] {0, 40f, 300f, 300f, 135f};//sum score
jpayne@68 1173 // static float[] cutoff2=new float[] {0, 44f, 300f, 400f, 100f};//orf score
jpayne@68 1174 // static float[] cutoff3=new float[] {0, 1.7f, 1.5f, 1.5f, 3.5f};//start score
jpayne@68 1175 // static float[] cutoff4=new float[] {0, 1.6f, 1.5f, 1.4f, 1.8f};//stop score
jpayne@68 1176 // static float[] cutoff5=new float[] {0, 1.0f, 1.1f, 1.15f, 1.4f};//inner score
jpayne@68 1177 // static float[] scoreMult=new float[] {1f, 8f, 80f, 120f, 5f};//score mult
jpayne@68 1178 // static float[] biases=new float[] {1f, 1.17f, 1.2f, 1.2f, 1.22f};//bias for sum score
jpayne@68 1179
jpayne@68 1180 //for k=6,3,3 - this has low scores for correct 23s stops; may try k=2 or k=4 for that end
jpayne@68 1181 //Also, 5S seems to score low very for archaea
jpayne@68 1182 // public static int CDS=0, tRNA=1, /*r16S=2,*/ r23S=3, r5S=4, r18S=5, r28S=6, RNA=7;
jpayne@68 1183 // static float[] cutoff1=new float[] {0, 100f, 600f, 400f, 300f};//sum score
jpayne@68 1184 // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 32f};//orf score
jpayne@68 1185 // static float[] cutoff3=new float[] {0, 3.0f, 1.8f, 1.6f, 4.0f};//start score
jpayne@68 1186 // static float[] cutoff4=new float[] {0, 2.8f, 2.0f, 0.90f, 1.9f};//stop score
jpayne@68 1187 // static float[] cutoff5=new float[] {0, 2.8f, 1.1f, 1.55f, 1.4f};//inner score
jpayne@68 1188 // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.0f};//score mult
jpayne@68 1189 // static float[] biases=new float[] {1f, 1.25f, 1.30f, 1.30f, 1.22f};//16S bias for sum score: 1.25 best for archs, 1.4 best for bacts
jpayne@68 1190
jpayne@68 1191 //// {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"};
jpayne@68 1192 // static float[] cutoff1=new float[] {0, 90f, 300f, 400f, 270f, 300f};//sum score
jpayne@68 1193 // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 32f, 300f};//orf score
jpayne@68 1194 // static float[] cutoff3=new float[] {0, 2.8f, 1.65f, 1.6f, 3.8f, 1.65f};//start score
jpayne@68 1195 // static float[] cutoff4=new float[] {0, 2.6f, 1.70f, 0.90f, 1.8f, 1.70f};//stop score
jpayne@68 1196 // static float[] cutoff5=new float[] {0, 2.8f, 1.70f, 1.55f, 1.4f, 1.70f};//inner score
jpayne@68 1197 // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.0f, 35f};//score mult
jpayne@68 1198 // static float[] biases=new float[] {1f, 1.50f, 1.30f, 1.30f, 1.30f, 1.30f};
jpayne@68 1199
jpayne@68 1200 //// {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"};
jpayne@68 1201 // static float[] cutoff1=new float[] {0, 90f, 300f, 400f, 90f, 300f};//sum score
jpayne@68 1202 // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 24f, 300f};//orf score
jpayne@68 1203 // static float[] cutoff3=new float[] {0, 2.8f, 1.65f, 1.6f, 2.0f, 1.65f};//start score
jpayne@68 1204 // static float[] cutoff4=new float[] {0, 2.6f, 1.70f, 0.90f, 1.0f, 1.70f};//stop score
jpayne@68 1205 // static float[] cutoff5=new float[] {0, 2.8f, 1.70f, 1.55f, 2.6f, 1.70f};//inner score
jpayne@68 1206 // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.25f, 35f};//score mult
jpayne@68 1207 // static float[] biases=new float[] {1f, 1.50f, 1.30f, 1.30f, 1.50f, 1.30f};
jpayne@68 1208
jpayne@68 1209 //// {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"};
jpayne@68 1210 // static float[] cutoff1=new float[] {0, 90f, 300f, 400f, 100f, 300f};//sum score
jpayne@68 1211 // static float[] cutoff2=new float[] {0, 48f, 300f, 300f, 32f, 300f};//orf score
jpayne@68 1212 // static float[] cutoff3=new float[] {0, 2.8f, 1.65f, 1.6f, 1.8f, 1.65f};//start score
jpayne@68 1213 // static float[] cutoff4=new float[] {0, 2.6f, 1.70f, 0.90f, 1.0f, 1.70f};//stop score
jpayne@68 1214 // static float[] cutoff5=new float[] {0, 2.8f, 1.70f, 1.55f, 2.6f, 1.70f};//inner score
jpayne@68 1215 // static float[] scoreMult=new float[] {1f, 1f, 35f, 80f, 1.25f, 35f};//score mult
jpayne@68 1216 // static float[] biases=new float[] {1f, 1.50f, 1.30f, 1.30f, 1.55f, 1.30f};
jpayne@68 1217
jpayne@68 1218 // {"CDS", "tRNA", "16S", "23S", "5S", "18S", "28S", "RNA"};
jpayne@68 1219 static float[] cutoff1=new float[] {0, 20f, 300f, 400f, 100f, 400f};//sum score
jpayne@68 1220 static float[] cutoff2=new float[] {0, 36f, 300f, 300f, 32f, 300f};//orf score //tRNA is very sensitive here
jpayne@68 1221 static float[] cutoff3=new float[] {0, 2.4f, 1.65f, 1.6f, 1.8f, 1.5f};//start score
jpayne@68 1222 static float[] cutoff4=new float[] {0, 1.5f, 1.70f, 0.90f, 1.0f, 1.1f};//stop score
jpayne@68 1223 static float[] cutoff5=new float[] {0, 2.2f, 1.70f, 1.55f, 2.6f, 1.5f};//inner score
jpayne@68 1224 static float[] scoreMult=new float[] {1f, 1.0f, 35f, 80f, 1.25f, 35f};//score mult
jpayne@68 1225 static float[] biases=new float[] {1f, 1.45f, 1.30f, 1.30f, 1.55f, 1.50f};
jpayne@68 1226
jpayne@68 1227 long geneStopsMade=0;
jpayne@68 1228 long geneStartsMade=0;
jpayne@68 1229 long geneStartsRetained=0;
jpayne@68 1230 long geneStopsRetained=0;
jpayne@68 1231 long geneStartsOut=0;
jpayne@68 1232
jpayne@68 1233 long tRNAOut=0;
jpayne@68 1234 long r16SOut=0;
jpayne@68 1235 long r23SOut=0;
jpayne@68 1236 long r5SOut=0;
jpayne@68 1237 long r18SOut=0;
jpayne@68 1238
jpayne@68 1239 ScoreTracker stCds=new ScoreTracker(CDS);
jpayne@68 1240 ScoreTracker stCds2=new ScoreTracker(CDS);
jpayne@68 1241 ScoreTracker stCdsPass=new ScoreTracker(CDS);
jpayne@68 1242 ScoreTracker sttRNA=new ScoreTracker(tRNA);
jpayne@68 1243 ScoreTracker st16s=new ScoreTracker(r16S);
jpayne@68 1244 ScoreTracker st23s=new ScoreTracker(r23S);
jpayne@68 1245 ScoreTracker st5s=new ScoreTracker(r5S);
jpayne@68 1246 ScoreTracker st18s=new ScoreTracker(r18S);
jpayne@68 1247
jpayne@68 1248 ScoreTracker[] trackers=new ScoreTracker[] {stCdsPass, sttRNA, st16s, st23s, st5s, st18s};
jpayne@68 1249
jpayne@68 1250 public static SingleStateAlignerFlat2 getSSA(){
jpayne@68 1251 SingleStateAlignerFlat2 ssa=localSSA.get();
jpayne@68 1252 if(ssa==null){
jpayne@68 1253 synchronized(localSSA){
jpayne@68 1254 ssa=localSSA.get();
jpayne@68 1255 if(ssa==null){
jpayne@68 1256 ssa=new SingleStateAlignerFlat2();
jpayne@68 1257 localSSA.set(ssa);
jpayne@68 1258 }
jpayne@68 1259 }
jpayne@68 1260 }
jpayne@68 1261 return ssa;
jpayne@68 1262 }
jpayne@68 1263
jpayne@68 1264 public static SingleStateAlignerFlat3 getSSA3(){
jpayne@68 1265 SingleStateAlignerFlat3 ssa=localSSA3.get();
jpayne@68 1266 if(ssa==null){
jpayne@68 1267 synchronized(localSSA3){
jpayne@68 1268 ssa=localSSA3.get();
jpayne@68 1269 if(ssa==null){
jpayne@68 1270 ssa=new SingleStateAlignerFlat3();
jpayne@68 1271 localSSA3.set(ssa);
jpayne@68 1272 }
jpayne@68 1273 }
jpayne@68 1274 }
jpayne@68 1275 return ssa;
jpayne@68 1276 }
jpayne@68 1277
jpayne@68 1278 public static SingleStateAlignerFlatFloat getSSAF(){
jpayne@68 1279 SingleStateAlignerFlatFloat ssa=localSSAF.get();
jpayne@68 1280 if(ssa==null){
jpayne@68 1281 synchronized(localSSAF){
jpayne@68 1282 ssa=localSSAF.get();
jpayne@68 1283 if(ssa==null){
jpayne@68 1284 ssa=new SingleStateAlignerFlatFloat();
jpayne@68 1285 localSSAF.set(ssa);
jpayne@68 1286 }
jpayne@68 1287 }
jpayne@68 1288 }
jpayne@68 1289 return ssa;
jpayne@68 1290 }
jpayne@68 1291
jpayne@68 1292 private static ThreadLocal<SingleStateAlignerFlat2> localSSA=new ThreadLocal<SingleStateAlignerFlat2>();
jpayne@68 1293 private static ThreadLocal<SingleStateAlignerFlat3> localSSA3=new ThreadLocal<SingleStateAlignerFlat3>();
jpayne@68 1294 private static ThreadLocal<SingleStateAlignerFlatFloat> localSSAF=new ThreadLocal<SingleStateAlignerFlatFloat>();
jpayne@68 1295 // public static int maxAlignmentEndpointDifference=15;
jpayne@68 1296 public static int alignmentPadding=300;
jpayne@68 1297
jpayne@68 1298 public static int breakLimit=12;
jpayne@68 1299 public static int lookbackPlus=70;
jpayne@68 1300 public static int lookbackMinus=25;
jpayne@68 1301
jpayne@68 1302 // pathScore+=p0+p1*(Tools.mid(p5*(p2+pathLength), p6*(p3-pathLength), p4));
jpayne@68 1303
jpayne@68 1304 //Operon length modifiers for same strand
jpayne@68 1305 public static float p0=-30f;
jpayne@68 1306 public static float p1=-0.35f; //Sensitive - higher decreases FP and TP
jpayne@68 1307 public static float p2=4.0f;//Insensitive - higher positive decreases FP and TP
jpayne@68 1308 public static float p3=12f; //Higher decreases FP (substantially) and TP (slightly)
jpayne@68 1309 public static float p4=-10f; //Lower decreases FP (weakly) and TP (greatly)
jpayne@68 1310 public static float p5=2.0f; //Insensitive - lower increases FP and TP
jpayne@68 1311 public static float p6=2f; //Greater increases FP and TP
jpayne@68 1312
jpayne@68 1313 // pathScore+=q1+Tools.mid(q2*prevLength, q3+q4*prevLength, q5);
jpayne@68 1314
jpayne@68 1315 //Operon length modifiers for opposite strand
jpayne@68 1316 public static float q1=-36f;
jpayne@68 1317 public static float q2=-1.6f; //q2 and q4 must have opposite signs
jpayne@68 1318 public static float q3=-12f;
jpayne@68 1319 public static float q4=3.0f; //(Lower [even negative] decreases FP and TP)
jpayne@68 1320 public static float q5=-40f;
jpayne@68 1321
jpayne@68 1322 private static PrintStream outstream=System.err;
jpayne@68 1323 static boolean verbose;
jpayne@68 1324
jpayne@68 1325 }