annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/Orf.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package prok;
jpayne@68 2
jpayne@68 3 import dna.AminoAcid;
jpayne@68 4 import shared.Shared;
jpayne@68 5 import shared.Tools;
jpayne@68 6 import structures.ByteBuilder;
jpayne@68 7
jpayne@68 8 /**
jpayne@68 9 * ORF means Open Reading Frame.
jpayne@68 10 * It starts at the first base of a start codon and ends at the last base of a stop codon.
jpayne@68 11 * The length is divisible by 3.
jpayne@68 12 * @author Brian Bushnell
jpayne@68 13 * @date Sep 20, 2018
jpayne@68 14 *
jpayne@68 15 */
jpayne@68 16 public class Orf extends Feature {
jpayne@68 17
jpayne@68 18 /*--------------------------------------------------------------*/
jpayne@68 19 /*---------------- Initialization ----------------*/
jpayne@68 20 /*--------------------------------------------------------------*/
jpayne@68 21
jpayne@68 22 /**
jpayne@68 23 * Bases and coordinates are assumed to be the correct strand.
jpayne@68 24 * Minus-strand ORFs can be flipped at the end of the constructor.
jpayne@68 25 * @param scafName_
jpayne@68 26 * @param start_
jpayne@68 27 * @param stop_
jpayne@68 28 * @param strand_
jpayne@68 29 * @param frame_
jpayne@68 30 * @param bases
jpayne@68 31 * @param flip
jpayne@68 32 */
jpayne@68 33 public Orf(String scafName_, int start_, int stop_, int strand_, int frame_, byte[] bases, boolean flip, int type_) {
jpayne@68 34 super(scafName_, start_, stop_, strand_, bases.length);
jpayne@68 35 frame=frame_;
jpayne@68 36 startCodon=getCodon(start, bases);
jpayne@68 37 stopCodon=getCodon(stop-2, bases);
jpayne@68 38 type=type_;
jpayne@68 39
jpayne@68 40 if(flip && strand==Shared.MINUS){flip();}
jpayne@68 41 }
jpayne@68 42
jpayne@68 43 /*--------------------------------------------------------------*/
jpayne@68 44 /*---------------- Init Helpers ----------------*/
jpayne@68 45 /*--------------------------------------------------------------*/
jpayne@68 46
jpayne@68 47 /**
jpayne@68 48 * Grab the codon starting at from.
jpayne@68 49 * Assumes bases are in the correct strand
jpayne@68 50 * @param from
jpayne@68 51 * @param bases
jpayne@68 52 * @return
jpayne@68 53 */
jpayne@68 54 private static int getCodon(int from, byte[] bases){
jpayne@68 55 int codon=0;
jpayne@68 56 for(int i=0; i<3; i++){
jpayne@68 57 // assert(i+from<bases.length) : i+", "+from+", "+bases.length;
jpayne@68 58 byte b=bases[i+from];
jpayne@68 59 int x=AminoAcid.baseToNumber[b];
jpayne@68 60 codon=(codon<<2)|x;
jpayne@68 61 }
jpayne@68 62 return codon;
jpayne@68 63 }
jpayne@68 64
jpayne@68 65 public float calcOrfScore(){
jpayne@68 66 return calcOrfScore(0);
jpayne@68 67 }
jpayne@68 68
jpayne@68 69 /**
jpayne@68 70 * The score of an ORF alone is a factor of the length, start score, stop score, and kmer score.
jpayne@68 71 * The score of an ORF in the context of an overlapping gene also includes a penalty for the overlap length.
jpayne@68 72 * @param overlap
jpayne@68 73 * @return Calculated score
jpayne@68 74 */
jpayne@68 75 public float calcOrfScore(int overlap){
jpayne@68 76 double a=Math.sqrt(Tools.max(f1, e1+startScore));
jpayne@68 77 // double b=Math.sqrt(f2/*Tools.max(f2, e2+stopScore)*/);//This is better, ignoring stopscore completely
jpayne@68 78 double b=Math.sqrt(Tools.max(f2, e2+0.35f*stopScore));
jpayne@68 79 double c=Tools.max(f3, e3+averageKmerScore());
jpayne@68 80 assert(a!=Double.NaN);
jpayne@68 81 assert(b!=Double.NaN);
jpayne@68 82 assert(c!=Double.NaN);
jpayne@68 83 c=4*Math.pow(c, 2.2);
jpayne@68 84 double d=(0.1*a*b*c*(Math.pow(length()-overlap, 2.5)-(overlap<1 ? 0 : Math.pow(overlap+50, 2))));//TODO: Adjust these constants
jpayne@68 85 if(d>0){d=Math.sqrt(d);}
jpayne@68 86 assert(d!=Double.NaN);
jpayne@68 87 return (float)d;
jpayne@68 88 }
jpayne@68 89
jpayne@68 90 public float averageKmerScore(){
jpayne@68 91 return kmerScore/(length()-GeneModel.kInnerCDS-2); //This slightly affects score if kInnerCDS is changed
jpayne@68 92 }
jpayne@68 93
jpayne@68 94 /*--------------------------------------------------------------*/
jpayne@68 95 /*---------------- Public Methods ----------------*/
jpayne@68 96 /*--------------------------------------------------------------*/
jpayne@68 97
jpayne@68 98 public boolean isValidPrev(Orf prev, int maxOverlap){
jpayne@68 99 if(prev.stop>=stop || prev.stop>=start+maxOverlap || prev.start>=start){return false;}
jpayne@68 100 if(prev.frame==frame && prev.strand==strand && prev.stop>=start){return false;}
jpayne@68 101 return true;
jpayne@68 102 }
jpayne@68 103
jpayne@68 104 public float pathScore() {return Tools.max(pathScorePlus, pathScoreMinus);}
jpayne@68 105 public float pathScore(int prevStrand) {return prevStrand==0 ? pathScorePlus : pathScoreMinus;}
jpayne@68 106
jpayne@68 107 public Orf prev(){return pathScorePlus>=pathScoreMinus ? prevPlus : prevMinus;}
jpayne@68 108 public Orf prev(int prevStrand){return prevStrand==0 ? prevPlus : prevMinus;}
jpayne@68 109
jpayne@68 110 public int pathLength(int prevStrand){return prevStrand==0 ? pathLengthPlus : pathLengthMinus;}
jpayne@68 111 public int pathLength(){return pathScorePlus>=pathScoreMinus ? pathLengthPlus : pathLengthMinus;}
jpayne@68 112
jpayne@68 113 /*--------------------------------------------------------------*/
jpayne@68 114 /*---------------- ToString ----------------*/
jpayne@68 115 /*--------------------------------------------------------------*/
jpayne@68 116
jpayne@68 117 public String toStringFlipped(){
jpayne@68 118 if(strand==flipped()){
jpayne@68 119 return toString();
jpayne@68 120 }
jpayne@68 121 flip();
jpayne@68 122 String s=toString();
jpayne@68 123 flip();
jpayne@68 124 return s;
jpayne@68 125 }
jpayne@68 126
jpayne@68 127 @Override
jpayne@68 128 public String toString(){
jpayne@68 129 return toGff();
jpayne@68 130 }
jpayne@68 131
jpayne@68 132 public String toGff(){
jpayne@68 133 ByteBuilder bb=new ByteBuilder();
jpayne@68 134 appendGff(bb);
jpayne@68 135 return bb.toString();
jpayne@68 136 }
jpayne@68 137
jpayne@68 138 public ByteBuilder appendGff(ByteBuilder bb){
jpayne@68 139 if(scafName==null){
jpayne@68 140 bb.append('.').tab();
jpayne@68 141 }else{
jpayne@68 142 for(int i=0, max=scafName.length(); i<max; i++){
jpayne@68 143 char c=scafName.charAt(i);
jpayne@68 144 if(c==' ' || c=='\t'){break;}
jpayne@68 145 bb.append(c);
jpayne@68 146 }
jpayne@68 147 bb.tab();
jpayne@68 148 }
jpayne@68 149 bb.append("BBTools").append('\t');
jpayne@68 150 bb.append(typeStrings2[type]).append('\t');
jpayne@68 151 bb.append(start+1).append('\t');
jpayne@68 152 bb.append(stop+1).append('\t');
jpayne@68 153
jpayne@68 154 if(orfScore<0){bb.append('.').append('\t');}
jpayne@68 155 else{bb.append(orfScore, 2).append('\t');}
jpayne@68 156
jpayne@68 157 bb.append(strand<0 ? '.' : Shared.strandCodes2[strand]).append('\t');
jpayne@68 158
jpayne@68 159 bb.append('0').append('\t');
jpayne@68 160
jpayne@68 161 //bb.append('.');
jpayne@68 162 bb.append(typeStrings[type]).append(',');
jpayne@68 163 if(type==0){
jpayne@68 164 bb.append("fr").append(frame).append(',');
jpayne@68 165 }
jpayne@68 166 // bb.append(startCodon).append(',');
jpayne@68 167 // bb.append(stopCodon).append(',');
jpayne@68 168 bb.append("startScr:").append(startScore, 3).append(',');
jpayne@68 169 bb.append("stopScr:").append(stopScore, 3).append(',');
jpayne@68 170 bb.append("innerScr:").append(averageKmerScore(), 3).append(',');
jpayne@68 171 bb.append("len:").append(length());
jpayne@68 172 if(type==0){
jpayne@68 173 bb.append(',');
jpayne@68 174 bb.append("start:").append(AminoAcid.codonToString(startCodon)).append(',');
jpayne@68 175 bb.append("stop:").append(AminoAcid.codonToString(stopCodon));
jpayne@68 176 }
jpayne@68 177 return bb;
jpayne@68 178 }
jpayne@68 179
jpayne@68 180 public boolean isSSU(){
jpayne@68 181 return type==r16S || type==r18S;
jpayne@68 182 }
jpayne@68 183
jpayne@68 184 public boolean is16S(){
jpayne@68 185 return type==r16S;
jpayne@68 186 }
jpayne@68 187
jpayne@68 188 public boolean is18S(){
jpayne@68 189 return type==r18S;
jpayne@68 190 }
jpayne@68 191
jpayne@68 192 /*--------------------------------------------------------------*/
jpayne@68 193 /*---------------- Overrides ----------------*/
jpayne@68 194 /*--------------------------------------------------------------*/
jpayne@68 195
jpayne@68 196 @Override
jpayne@68 197 public float score() {
jpayne@68 198 return orfScore;
jpayne@68 199 }
jpayne@68 200
jpayne@68 201 /*--------------------------------------------------------------*/
jpayne@68 202 /*---------------- Fields ----------------*/
jpayne@68 203 /*--------------------------------------------------------------*/
jpayne@68 204
jpayne@68 205 public final int frame;
jpayne@68 206
jpayne@68 207 //These are not needed but nice for printing
jpayne@68 208 public final int startCodon;
jpayne@68 209 public final int stopCodon;
jpayne@68 210
jpayne@68 211 public float startScore;
jpayne@68 212 public float stopScore;
jpayne@68 213 public float kmerScore;
jpayne@68 214
jpayne@68 215 public float orfScore;
jpayne@68 216
jpayne@68 217 //Path scores are for pathfinding phase
jpayne@68 218
jpayne@68 219 public float pathScorePlus;
jpayne@68 220 public int pathLengthPlus=1;
jpayne@68 221 public Orf prevPlus;
jpayne@68 222
jpayne@68 223 public float pathScoreMinus;
jpayne@68 224 public int pathLengthMinus=1;
jpayne@68 225 public Orf prevMinus;
jpayne@68 226
jpayne@68 227 public final int type;
jpayne@68 228
jpayne@68 229 /*--------------------------------------------------------------*/
jpayne@68 230 /*---------------- Static Fields ----------------*/
jpayne@68 231 /*--------------------------------------------------------------*/
jpayne@68 232
jpayne@68 233 /* for kinnercds=6 */
jpayne@68 234 // static float e1=0.1f;
jpayne@68 235 // static float e2=-0.04f;
jpayne@68 236 // static float e3=0.01f;//Decreasing this decreases TP but increases SNR
jpayne@68 237 //
jpayne@68 238 // static float f1=0.08f;
jpayne@68 239 // static float f2=0.06f;
jpayne@68 240 // static float f3=0.09f;
jpayne@68 241
jpayne@68 242 /* for kinnercds=7 */
jpayne@68 243 static float e1=0.35f;
jpayne@68 244 static float e2=-0.1f;
jpayne@68 245 static float e3=-0.01f;//Decreasing this decreases TP but increases SNR
jpayne@68 246
jpayne@68 247 static float f1=0.08f;
jpayne@68 248 static float f2=0.02f;
jpayne@68 249 static float f3=0.09f;
jpayne@68 250
jpayne@68 251 }