jpayne@68
|
1 package prok;
|
jpayne@68
|
2
|
jpayne@68
|
3 import dna.AminoAcid;
|
jpayne@68
|
4 import shared.Shared;
|
jpayne@68
|
5 import shared.Tools;
|
jpayne@68
|
6 import structures.ByteBuilder;
|
jpayne@68
|
7
|
jpayne@68
|
8 /**
|
jpayne@68
|
9 * ORF means Open Reading Frame.
|
jpayne@68
|
10 * It starts at the first base of a start codon and ends at the last base of a stop codon.
|
jpayne@68
|
11 * The length is divisible by 3.
|
jpayne@68
|
12 * @author Brian Bushnell
|
jpayne@68
|
13 * @date Sep 20, 2018
|
jpayne@68
|
14 *
|
jpayne@68
|
15 */
|
jpayne@68
|
16 public class Orf extends Feature {
|
jpayne@68
|
17
|
jpayne@68
|
18 /*--------------------------------------------------------------*/
|
jpayne@68
|
19 /*---------------- Initialization ----------------*/
|
jpayne@68
|
20 /*--------------------------------------------------------------*/
|
jpayne@68
|
21
|
jpayne@68
|
22 /**
|
jpayne@68
|
23 * Bases and coordinates are assumed to be the correct strand.
|
jpayne@68
|
24 * Minus-strand ORFs can be flipped at the end of the constructor.
|
jpayne@68
|
25 * @param scafName_
|
jpayne@68
|
26 * @param start_
|
jpayne@68
|
27 * @param stop_
|
jpayne@68
|
28 * @param strand_
|
jpayne@68
|
29 * @param frame_
|
jpayne@68
|
30 * @param bases
|
jpayne@68
|
31 * @param flip
|
jpayne@68
|
32 */
|
jpayne@68
|
33 public Orf(String scafName_, int start_, int stop_, int strand_, int frame_, byte[] bases, boolean flip, int type_) {
|
jpayne@68
|
34 super(scafName_, start_, stop_, strand_, bases.length);
|
jpayne@68
|
35 frame=frame_;
|
jpayne@68
|
36 startCodon=getCodon(start, bases);
|
jpayne@68
|
37 stopCodon=getCodon(stop-2, bases);
|
jpayne@68
|
38 type=type_;
|
jpayne@68
|
39
|
jpayne@68
|
40 if(flip && strand==Shared.MINUS){flip();}
|
jpayne@68
|
41 }
|
jpayne@68
|
42
|
jpayne@68
|
43 /*--------------------------------------------------------------*/
|
jpayne@68
|
44 /*---------------- Init Helpers ----------------*/
|
jpayne@68
|
45 /*--------------------------------------------------------------*/
|
jpayne@68
|
46
|
jpayne@68
|
47 /**
|
jpayne@68
|
48 * Grab the codon starting at from.
|
jpayne@68
|
49 * Assumes bases are in the correct strand
|
jpayne@68
|
50 * @param from
|
jpayne@68
|
51 * @param bases
|
jpayne@68
|
52 * @return
|
jpayne@68
|
53 */
|
jpayne@68
|
54 private static int getCodon(int from, byte[] bases){
|
jpayne@68
|
55 int codon=0;
|
jpayne@68
|
56 for(int i=0; i<3; i++){
|
jpayne@68
|
57 // assert(i+from<bases.length) : i+", "+from+", "+bases.length;
|
jpayne@68
|
58 byte b=bases[i+from];
|
jpayne@68
|
59 int x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
60 codon=(codon<<2)|x;
|
jpayne@68
|
61 }
|
jpayne@68
|
62 return codon;
|
jpayne@68
|
63 }
|
jpayne@68
|
64
|
jpayne@68
|
65 public float calcOrfScore(){
|
jpayne@68
|
66 return calcOrfScore(0);
|
jpayne@68
|
67 }
|
jpayne@68
|
68
|
jpayne@68
|
69 /**
|
jpayne@68
|
70 * The score of an ORF alone is a factor of the length, start score, stop score, and kmer score.
|
jpayne@68
|
71 * The score of an ORF in the context of an overlapping gene also includes a penalty for the overlap length.
|
jpayne@68
|
72 * @param overlap
|
jpayne@68
|
73 * @return Calculated score
|
jpayne@68
|
74 */
|
jpayne@68
|
75 public float calcOrfScore(int overlap){
|
jpayne@68
|
76 double a=Math.sqrt(Tools.max(f1, e1+startScore));
|
jpayne@68
|
77 // double b=Math.sqrt(f2/*Tools.max(f2, e2+stopScore)*/);//This is better, ignoring stopscore completely
|
jpayne@68
|
78 double b=Math.sqrt(Tools.max(f2, e2+0.35f*stopScore));
|
jpayne@68
|
79 double c=Tools.max(f3, e3+averageKmerScore());
|
jpayne@68
|
80 assert(a!=Double.NaN);
|
jpayne@68
|
81 assert(b!=Double.NaN);
|
jpayne@68
|
82 assert(c!=Double.NaN);
|
jpayne@68
|
83 c=4*Math.pow(c, 2.2);
|
jpayne@68
|
84 double d=(0.1*a*b*c*(Math.pow(length()-overlap, 2.5)-(overlap<1 ? 0 : Math.pow(overlap+50, 2))));//TODO: Adjust these constants
|
jpayne@68
|
85 if(d>0){d=Math.sqrt(d);}
|
jpayne@68
|
86 assert(d!=Double.NaN);
|
jpayne@68
|
87 return (float)d;
|
jpayne@68
|
88 }
|
jpayne@68
|
89
|
jpayne@68
|
90 public float averageKmerScore(){
|
jpayne@68
|
91 return kmerScore/(length()-GeneModel.kInnerCDS-2); //This slightly affects score if kInnerCDS is changed
|
jpayne@68
|
92 }
|
jpayne@68
|
93
|
jpayne@68
|
94 /*--------------------------------------------------------------*/
|
jpayne@68
|
95 /*---------------- Public Methods ----------------*/
|
jpayne@68
|
96 /*--------------------------------------------------------------*/
|
jpayne@68
|
97
|
jpayne@68
|
98 public boolean isValidPrev(Orf prev, int maxOverlap){
|
jpayne@68
|
99 if(prev.stop>=stop || prev.stop>=start+maxOverlap || prev.start>=start){return false;}
|
jpayne@68
|
100 if(prev.frame==frame && prev.strand==strand && prev.stop>=start){return false;}
|
jpayne@68
|
101 return true;
|
jpayne@68
|
102 }
|
jpayne@68
|
103
|
jpayne@68
|
104 public float pathScore() {return Tools.max(pathScorePlus, pathScoreMinus);}
|
jpayne@68
|
105 public float pathScore(int prevStrand) {return prevStrand==0 ? pathScorePlus : pathScoreMinus;}
|
jpayne@68
|
106
|
jpayne@68
|
107 public Orf prev(){return pathScorePlus>=pathScoreMinus ? prevPlus : prevMinus;}
|
jpayne@68
|
108 public Orf prev(int prevStrand){return prevStrand==0 ? prevPlus : prevMinus;}
|
jpayne@68
|
109
|
jpayne@68
|
110 public int pathLength(int prevStrand){return prevStrand==0 ? pathLengthPlus : pathLengthMinus;}
|
jpayne@68
|
111 public int pathLength(){return pathScorePlus>=pathScoreMinus ? pathLengthPlus : pathLengthMinus;}
|
jpayne@68
|
112
|
jpayne@68
|
113 /*--------------------------------------------------------------*/
|
jpayne@68
|
114 /*---------------- ToString ----------------*/
|
jpayne@68
|
115 /*--------------------------------------------------------------*/
|
jpayne@68
|
116
|
jpayne@68
|
117 public String toStringFlipped(){
|
jpayne@68
|
118 if(strand==flipped()){
|
jpayne@68
|
119 return toString();
|
jpayne@68
|
120 }
|
jpayne@68
|
121 flip();
|
jpayne@68
|
122 String s=toString();
|
jpayne@68
|
123 flip();
|
jpayne@68
|
124 return s;
|
jpayne@68
|
125 }
|
jpayne@68
|
126
|
jpayne@68
|
127 @Override
|
jpayne@68
|
128 public String toString(){
|
jpayne@68
|
129 return toGff();
|
jpayne@68
|
130 }
|
jpayne@68
|
131
|
jpayne@68
|
132 public String toGff(){
|
jpayne@68
|
133 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
134 appendGff(bb);
|
jpayne@68
|
135 return bb.toString();
|
jpayne@68
|
136 }
|
jpayne@68
|
137
|
jpayne@68
|
138 public ByteBuilder appendGff(ByteBuilder bb){
|
jpayne@68
|
139 if(scafName==null){
|
jpayne@68
|
140 bb.append('.').tab();
|
jpayne@68
|
141 }else{
|
jpayne@68
|
142 for(int i=0, max=scafName.length(); i<max; i++){
|
jpayne@68
|
143 char c=scafName.charAt(i);
|
jpayne@68
|
144 if(c==' ' || c=='\t'){break;}
|
jpayne@68
|
145 bb.append(c);
|
jpayne@68
|
146 }
|
jpayne@68
|
147 bb.tab();
|
jpayne@68
|
148 }
|
jpayne@68
|
149 bb.append("BBTools").append('\t');
|
jpayne@68
|
150 bb.append(typeStrings2[type]).append('\t');
|
jpayne@68
|
151 bb.append(start+1).append('\t');
|
jpayne@68
|
152 bb.append(stop+1).append('\t');
|
jpayne@68
|
153
|
jpayne@68
|
154 if(orfScore<0){bb.append('.').append('\t');}
|
jpayne@68
|
155 else{bb.append(orfScore, 2).append('\t');}
|
jpayne@68
|
156
|
jpayne@68
|
157 bb.append(strand<0 ? '.' : Shared.strandCodes2[strand]).append('\t');
|
jpayne@68
|
158
|
jpayne@68
|
159 bb.append('0').append('\t');
|
jpayne@68
|
160
|
jpayne@68
|
161 //bb.append('.');
|
jpayne@68
|
162 bb.append(typeStrings[type]).append(',');
|
jpayne@68
|
163 if(type==0){
|
jpayne@68
|
164 bb.append("fr").append(frame).append(',');
|
jpayne@68
|
165 }
|
jpayne@68
|
166 // bb.append(startCodon).append(',');
|
jpayne@68
|
167 // bb.append(stopCodon).append(',');
|
jpayne@68
|
168 bb.append("startScr:").append(startScore, 3).append(',');
|
jpayne@68
|
169 bb.append("stopScr:").append(stopScore, 3).append(',');
|
jpayne@68
|
170 bb.append("innerScr:").append(averageKmerScore(), 3).append(',');
|
jpayne@68
|
171 bb.append("len:").append(length());
|
jpayne@68
|
172 if(type==0){
|
jpayne@68
|
173 bb.append(',');
|
jpayne@68
|
174 bb.append("start:").append(AminoAcid.codonToString(startCodon)).append(',');
|
jpayne@68
|
175 bb.append("stop:").append(AminoAcid.codonToString(stopCodon));
|
jpayne@68
|
176 }
|
jpayne@68
|
177 return bb;
|
jpayne@68
|
178 }
|
jpayne@68
|
179
|
jpayne@68
|
180 public boolean isSSU(){
|
jpayne@68
|
181 return type==r16S || type==r18S;
|
jpayne@68
|
182 }
|
jpayne@68
|
183
|
jpayne@68
|
184 public boolean is16S(){
|
jpayne@68
|
185 return type==r16S;
|
jpayne@68
|
186 }
|
jpayne@68
|
187
|
jpayne@68
|
188 public boolean is18S(){
|
jpayne@68
|
189 return type==r18S;
|
jpayne@68
|
190 }
|
jpayne@68
|
191
|
jpayne@68
|
192 /*--------------------------------------------------------------*/
|
jpayne@68
|
193 /*---------------- Overrides ----------------*/
|
jpayne@68
|
194 /*--------------------------------------------------------------*/
|
jpayne@68
|
195
|
jpayne@68
|
196 @Override
|
jpayne@68
|
197 public float score() {
|
jpayne@68
|
198 return orfScore;
|
jpayne@68
|
199 }
|
jpayne@68
|
200
|
jpayne@68
|
201 /*--------------------------------------------------------------*/
|
jpayne@68
|
202 /*---------------- Fields ----------------*/
|
jpayne@68
|
203 /*--------------------------------------------------------------*/
|
jpayne@68
|
204
|
jpayne@68
|
205 public final int frame;
|
jpayne@68
|
206
|
jpayne@68
|
207 //These are not needed but nice for printing
|
jpayne@68
|
208 public final int startCodon;
|
jpayne@68
|
209 public final int stopCodon;
|
jpayne@68
|
210
|
jpayne@68
|
211 public float startScore;
|
jpayne@68
|
212 public float stopScore;
|
jpayne@68
|
213 public float kmerScore;
|
jpayne@68
|
214
|
jpayne@68
|
215 public float orfScore;
|
jpayne@68
|
216
|
jpayne@68
|
217 //Path scores are for pathfinding phase
|
jpayne@68
|
218
|
jpayne@68
|
219 public float pathScorePlus;
|
jpayne@68
|
220 public int pathLengthPlus=1;
|
jpayne@68
|
221 public Orf prevPlus;
|
jpayne@68
|
222
|
jpayne@68
|
223 public float pathScoreMinus;
|
jpayne@68
|
224 public int pathLengthMinus=1;
|
jpayne@68
|
225 public Orf prevMinus;
|
jpayne@68
|
226
|
jpayne@68
|
227 public final int type;
|
jpayne@68
|
228
|
jpayne@68
|
229 /*--------------------------------------------------------------*/
|
jpayne@68
|
230 /*---------------- Static Fields ----------------*/
|
jpayne@68
|
231 /*--------------------------------------------------------------*/
|
jpayne@68
|
232
|
jpayne@68
|
233 /* for kinnercds=6 */
|
jpayne@68
|
234 // static float e1=0.1f;
|
jpayne@68
|
235 // static float e2=-0.04f;
|
jpayne@68
|
236 // static float e3=0.01f;//Decreasing this decreases TP but increases SNR
|
jpayne@68
|
237 //
|
jpayne@68
|
238 // static float f1=0.08f;
|
jpayne@68
|
239 // static float f2=0.06f;
|
jpayne@68
|
240 // static float f3=0.09f;
|
jpayne@68
|
241
|
jpayne@68
|
242 /* for kinnercds=7 */
|
jpayne@68
|
243 static float e1=0.35f;
|
jpayne@68
|
244 static float e2=-0.1f;
|
jpayne@68
|
245 static float e3=-0.01f;//Decreasing this decreases TP but increases SNR
|
jpayne@68
|
246
|
jpayne@68
|
247 static float f1=0.08f;
|
jpayne@68
|
248 static float f2=0.02f;
|
jpayne@68
|
249 static float f3=0.09f;
|
jpayne@68
|
250
|
jpayne@68
|
251 }
|