comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/Orf.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package prok;
2
3 import dna.AminoAcid;
4 import shared.Shared;
5 import shared.Tools;
6 import structures.ByteBuilder;
7
8 /**
9 * ORF means Open Reading Frame.
10 * It starts at the first base of a start codon and ends at the last base of a stop codon.
11 * The length is divisible by 3.
12 * @author Brian Bushnell
13 * @date Sep 20, 2018
14 *
15 */
16 public class Orf extends Feature {
17
18 /*--------------------------------------------------------------*/
19 /*---------------- Initialization ----------------*/
20 /*--------------------------------------------------------------*/
21
22 /**
23 * Bases and coordinates are assumed to be the correct strand.
24 * Minus-strand ORFs can be flipped at the end of the constructor.
25 * @param scafName_
26 * @param start_
27 * @param stop_
28 * @param strand_
29 * @param frame_
30 * @param bases
31 * @param flip
32 */
33 public Orf(String scafName_, int start_, int stop_, int strand_, int frame_, byte[] bases, boolean flip, int type_) {
34 super(scafName_, start_, stop_, strand_, bases.length);
35 frame=frame_;
36 startCodon=getCodon(start, bases);
37 stopCodon=getCodon(stop-2, bases);
38 type=type_;
39
40 if(flip && strand==Shared.MINUS){flip();}
41 }
42
43 /*--------------------------------------------------------------*/
44 /*---------------- Init Helpers ----------------*/
45 /*--------------------------------------------------------------*/
46
47 /**
48 * Grab the codon starting at from.
49 * Assumes bases are in the correct strand
50 * @param from
51 * @param bases
52 * @return
53 */
54 private static int getCodon(int from, byte[] bases){
55 int codon=0;
56 for(int i=0; i<3; i++){
57 // assert(i+from<bases.length) : i+", "+from+", "+bases.length;
58 byte b=bases[i+from];
59 int x=AminoAcid.baseToNumber[b];
60 codon=(codon<<2)|x;
61 }
62 return codon;
63 }
64
65 public float calcOrfScore(){
66 return calcOrfScore(0);
67 }
68
69 /**
70 * The score of an ORF alone is a factor of the length, start score, stop score, and kmer score.
71 * The score of an ORF in the context of an overlapping gene also includes a penalty for the overlap length.
72 * @param overlap
73 * @return Calculated score
74 */
75 public float calcOrfScore(int overlap){
76 double a=Math.sqrt(Tools.max(f1, e1+startScore));
77 // double b=Math.sqrt(f2/*Tools.max(f2, e2+stopScore)*/);//This is better, ignoring stopscore completely
78 double b=Math.sqrt(Tools.max(f2, e2+0.35f*stopScore));
79 double c=Tools.max(f3, e3+averageKmerScore());
80 assert(a!=Double.NaN);
81 assert(b!=Double.NaN);
82 assert(c!=Double.NaN);
83 c=4*Math.pow(c, 2.2);
84 double d=(0.1*a*b*c*(Math.pow(length()-overlap, 2.5)-(overlap<1 ? 0 : Math.pow(overlap+50, 2))));//TODO: Adjust these constants
85 if(d>0){d=Math.sqrt(d);}
86 assert(d!=Double.NaN);
87 return (float)d;
88 }
89
90 public float averageKmerScore(){
91 return kmerScore/(length()-GeneModel.kInnerCDS-2); //This slightly affects score if kInnerCDS is changed
92 }
93
94 /*--------------------------------------------------------------*/
95 /*---------------- Public Methods ----------------*/
96 /*--------------------------------------------------------------*/
97
98 public boolean isValidPrev(Orf prev, int maxOverlap){
99 if(prev.stop>=stop || prev.stop>=start+maxOverlap || prev.start>=start){return false;}
100 if(prev.frame==frame && prev.strand==strand && prev.stop>=start){return false;}
101 return true;
102 }
103
104 public float pathScore() {return Tools.max(pathScorePlus, pathScoreMinus);}
105 public float pathScore(int prevStrand) {return prevStrand==0 ? pathScorePlus : pathScoreMinus;}
106
107 public Orf prev(){return pathScorePlus>=pathScoreMinus ? prevPlus : prevMinus;}
108 public Orf prev(int prevStrand){return prevStrand==0 ? prevPlus : prevMinus;}
109
110 public int pathLength(int prevStrand){return prevStrand==0 ? pathLengthPlus : pathLengthMinus;}
111 public int pathLength(){return pathScorePlus>=pathScoreMinus ? pathLengthPlus : pathLengthMinus;}
112
113 /*--------------------------------------------------------------*/
114 /*---------------- ToString ----------------*/
115 /*--------------------------------------------------------------*/
116
117 public String toStringFlipped(){
118 if(strand==flipped()){
119 return toString();
120 }
121 flip();
122 String s=toString();
123 flip();
124 return s;
125 }
126
127 @Override
128 public String toString(){
129 return toGff();
130 }
131
132 public String toGff(){
133 ByteBuilder bb=new ByteBuilder();
134 appendGff(bb);
135 return bb.toString();
136 }
137
138 public ByteBuilder appendGff(ByteBuilder bb){
139 if(scafName==null){
140 bb.append('.').tab();
141 }else{
142 for(int i=0, max=scafName.length(); i<max; i++){
143 char c=scafName.charAt(i);
144 if(c==' ' || c=='\t'){break;}
145 bb.append(c);
146 }
147 bb.tab();
148 }
149 bb.append("BBTools").append('\t');
150 bb.append(typeStrings2[type]).append('\t');
151 bb.append(start+1).append('\t');
152 bb.append(stop+1).append('\t');
153
154 if(orfScore<0){bb.append('.').append('\t');}
155 else{bb.append(orfScore, 2).append('\t');}
156
157 bb.append(strand<0 ? '.' : Shared.strandCodes2[strand]).append('\t');
158
159 bb.append('0').append('\t');
160
161 //bb.append('.');
162 bb.append(typeStrings[type]).append(',');
163 if(type==0){
164 bb.append("fr").append(frame).append(',');
165 }
166 // bb.append(startCodon).append(',');
167 // bb.append(stopCodon).append(',');
168 bb.append("startScr:").append(startScore, 3).append(',');
169 bb.append("stopScr:").append(stopScore, 3).append(',');
170 bb.append("innerScr:").append(averageKmerScore(), 3).append(',');
171 bb.append("len:").append(length());
172 if(type==0){
173 bb.append(',');
174 bb.append("start:").append(AminoAcid.codonToString(startCodon)).append(',');
175 bb.append("stop:").append(AminoAcid.codonToString(stopCodon));
176 }
177 return bb;
178 }
179
180 public boolean isSSU(){
181 return type==r16S || type==r18S;
182 }
183
184 public boolean is16S(){
185 return type==r16S;
186 }
187
188 public boolean is18S(){
189 return type==r18S;
190 }
191
192 /*--------------------------------------------------------------*/
193 /*---------------- Overrides ----------------*/
194 /*--------------------------------------------------------------*/
195
196 @Override
197 public float score() {
198 return orfScore;
199 }
200
201 /*--------------------------------------------------------------*/
202 /*---------------- Fields ----------------*/
203 /*--------------------------------------------------------------*/
204
205 public final int frame;
206
207 //These are not needed but nice for printing
208 public final int startCodon;
209 public final int stopCodon;
210
211 public float startScore;
212 public float stopScore;
213 public float kmerScore;
214
215 public float orfScore;
216
217 //Path scores are for pathfinding phase
218
219 public float pathScorePlus;
220 public int pathLengthPlus=1;
221 public Orf prevPlus;
222
223 public float pathScoreMinus;
224 public int pathLengthMinus=1;
225 public Orf prevMinus;
226
227 public final int type;
228
229 /*--------------------------------------------------------------*/
230 /*---------------- Static Fields ----------------*/
231 /*--------------------------------------------------------------*/
232
233 /* for kinnercds=6 */
234 // static float e1=0.1f;
235 // static float e2=-0.04f;
236 // static float e3=0.01f;//Decreasing this decreases TP but increases SNR
237 //
238 // static float f1=0.08f;
239 // static float f2=0.06f;
240 // static float f3=0.09f;
241
242 /* for kinnercds=7 */
243 static float e1=0.35f;
244 static float e2=-0.1f;
245 static float e3=-0.01f;//Decreasing this decreases TP but increases SNR
246
247 static float f1=0.08f;
248 static float f2=0.02f;
249 static float f3=0.09f;
250
251 }