Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/Orf.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package prok; | |
2 | |
3 import dna.AminoAcid; | |
4 import shared.Shared; | |
5 import shared.Tools; | |
6 import structures.ByteBuilder; | |
7 | |
8 /** | |
9 * ORF means Open Reading Frame. | |
10 * It starts at the first base of a start codon and ends at the last base of a stop codon. | |
11 * The length is divisible by 3. | |
12 * @author Brian Bushnell | |
13 * @date Sep 20, 2018 | |
14 * | |
15 */ | |
16 public class Orf extends Feature { | |
17 | |
18 /*--------------------------------------------------------------*/ | |
19 /*---------------- Initialization ----------------*/ | |
20 /*--------------------------------------------------------------*/ | |
21 | |
22 /** | |
23 * Bases and coordinates are assumed to be the correct strand. | |
24 * Minus-strand ORFs can be flipped at the end of the constructor. | |
25 * @param scafName_ | |
26 * @param start_ | |
27 * @param stop_ | |
28 * @param strand_ | |
29 * @param frame_ | |
30 * @param bases | |
31 * @param flip | |
32 */ | |
33 public Orf(String scafName_, int start_, int stop_, int strand_, int frame_, byte[] bases, boolean flip, int type_) { | |
34 super(scafName_, start_, stop_, strand_, bases.length); | |
35 frame=frame_; | |
36 startCodon=getCodon(start, bases); | |
37 stopCodon=getCodon(stop-2, bases); | |
38 type=type_; | |
39 | |
40 if(flip && strand==Shared.MINUS){flip();} | |
41 } | |
42 | |
43 /*--------------------------------------------------------------*/ | |
44 /*---------------- Init Helpers ----------------*/ | |
45 /*--------------------------------------------------------------*/ | |
46 | |
47 /** | |
48 * Grab the codon starting at from. | |
49 * Assumes bases are in the correct strand | |
50 * @param from | |
51 * @param bases | |
52 * @return | |
53 */ | |
54 private static int getCodon(int from, byte[] bases){ | |
55 int codon=0; | |
56 for(int i=0; i<3; i++){ | |
57 // assert(i+from<bases.length) : i+", "+from+", "+bases.length; | |
58 byte b=bases[i+from]; | |
59 int x=AminoAcid.baseToNumber[b]; | |
60 codon=(codon<<2)|x; | |
61 } | |
62 return codon; | |
63 } | |
64 | |
65 public float calcOrfScore(){ | |
66 return calcOrfScore(0); | |
67 } | |
68 | |
69 /** | |
70 * The score of an ORF alone is a factor of the length, start score, stop score, and kmer score. | |
71 * The score of an ORF in the context of an overlapping gene also includes a penalty for the overlap length. | |
72 * @param overlap | |
73 * @return Calculated score | |
74 */ | |
75 public float calcOrfScore(int overlap){ | |
76 double a=Math.sqrt(Tools.max(f1, e1+startScore)); | |
77 // double b=Math.sqrt(f2/*Tools.max(f2, e2+stopScore)*/);//This is better, ignoring stopscore completely | |
78 double b=Math.sqrt(Tools.max(f2, e2+0.35f*stopScore)); | |
79 double c=Tools.max(f3, e3+averageKmerScore()); | |
80 assert(a!=Double.NaN); | |
81 assert(b!=Double.NaN); | |
82 assert(c!=Double.NaN); | |
83 c=4*Math.pow(c, 2.2); | |
84 double d=(0.1*a*b*c*(Math.pow(length()-overlap, 2.5)-(overlap<1 ? 0 : Math.pow(overlap+50, 2))));//TODO: Adjust these constants | |
85 if(d>0){d=Math.sqrt(d);} | |
86 assert(d!=Double.NaN); | |
87 return (float)d; | |
88 } | |
89 | |
90 public float averageKmerScore(){ | |
91 return kmerScore/(length()-GeneModel.kInnerCDS-2); //This slightly affects score if kInnerCDS is changed | |
92 } | |
93 | |
94 /*--------------------------------------------------------------*/ | |
95 /*---------------- Public Methods ----------------*/ | |
96 /*--------------------------------------------------------------*/ | |
97 | |
98 public boolean isValidPrev(Orf prev, int maxOverlap){ | |
99 if(prev.stop>=stop || prev.stop>=start+maxOverlap || prev.start>=start){return false;} | |
100 if(prev.frame==frame && prev.strand==strand && prev.stop>=start){return false;} | |
101 return true; | |
102 } | |
103 | |
104 public float pathScore() {return Tools.max(pathScorePlus, pathScoreMinus);} | |
105 public float pathScore(int prevStrand) {return prevStrand==0 ? pathScorePlus : pathScoreMinus;} | |
106 | |
107 public Orf prev(){return pathScorePlus>=pathScoreMinus ? prevPlus : prevMinus;} | |
108 public Orf prev(int prevStrand){return prevStrand==0 ? prevPlus : prevMinus;} | |
109 | |
110 public int pathLength(int prevStrand){return prevStrand==0 ? pathLengthPlus : pathLengthMinus;} | |
111 public int pathLength(){return pathScorePlus>=pathScoreMinus ? pathLengthPlus : pathLengthMinus;} | |
112 | |
113 /*--------------------------------------------------------------*/ | |
114 /*---------------- ToString ----------------*/ | |
115 /*--------------------------------------------------------------*/ | |
116 | |
117 public String toStringFlipped(){ | |
118 if(strand==flipped()){ | |
119 return toString(); | |
120 } | |
121 flip(); | |
122 String s=toString(); | |
123 flip(); | |
124 return s; | |
125 } | |
126 | |
127 @Override | |
128 public String toString(){ | |
129 return toGff(); | |
130 } | |
131 | |
132 public String toGff(){ | |
133 ByteBuilder bb=new ByteBuilder(); | |
134 appendGff(bb); | |
135 return bb.toString(); | |
136 } | |
137 | |
138 public ByteBuilder appendGff(ByteBuilder bb){ | |
139 if(scafName==null){ | |
140 bb.append('.').tab(); | |
141 }else{ | |
142 for(int i=0, max=scafName.length(); i<max; i++){ | |
143 char c=scafName.charAt(i); | |
144 if(c==' ' || c=='\t'){break;} | |
145 bb.append(c); | |
146 } | |
147 bb.tab(); | |
148 } | |
149 bb.append("BBTools").append('\t'); | |
150 bb.append(typeStrings2[type]).append('\t'); | |
151 bb.append(start+1).append('\t'); | |
152 bb.append(stop+1).append('\t'); | |
153 | |
154 if(orfScore<0){bb.append('.').append('\t');} | |
155 else{bb.append(orfScore, 2).append('\t');} | |
156 | |
157 bb.append(strand<0 ? '.' : Shared.strandCodes2[strand]).append('\t'); | |
158 | |
159 bb.append('0').append('\t'); | |
160 | |
161 //bb.append('.'); | |
162 bb.append(typeStrings[type]).append(','); | |
163 if(type==0){ | |
164 bb.append("fr").append(frame).append(','); | |
165 } | |
166 // bb.append(startCodon).append(','); | |
167 // bb.append(stopCodon).append(','); | |
168 bb.append("startScr:").append(startScore, 3).append(','); | |
169 bb.append("stopScr:").append(stopScore, 3).append(','); | |
170 bb.append("innerScr:").append(averageKmerScore(), 3).append(','); | |
171 bb.append("len:").append(length()); | |
172 if(type==0){ | |
173 bb.append(','); | |
174 bb.append("start:").append(AminoAcid.codonToString(startCodon)).append(','); | |
175 bb.append("stop:").append(AminoAcid.codonToString(stopCodon)); | |
176 } | |
177 return bb; | |
178 } | |
179 | |
180 public boolean isSSU(){ | |
181 return type==r16S || type==r18S; | |
182 } | |
183 | |
184 public boolean is16S(){ | |
185 return type==r16S; | |
186 } | |
187 | |
188 public boolean is18S(){ | |
189 return type==r18S; | |
190 } | |
191 | |
192 /*--------------------------------------------------------------*/ | |
193 /*---------------- Overrides ----------------*/ | |
194 /*--------------------------------------------------------------*/ | |
195 | |
196 @Override | |
197 public float score() { | |
198 return orfScore; | |
199 } | |
200 | |
201 /*--------------------------------------------------------------*/ | |
202 /*---------------- Fields ----------------*/ | |
203 /*--------------------------------------------------------------*/ | |
204 | |
205 public final int frame; | |
206 | |
207 //These are not needed but nice for printing | |
208 public final int startCodon; | |
209 public final int stopCodon; | |
210 | |
211 public float startScore; | |
212 public float stopScore; | |
213 public float kmerScore; | |
214 | |
215 public float orfScore; | |
216 | |
217 //Path scores are for pathfinding phase | |
218 | |
219 public float pathScorePlus; | |
220 public int pathLengthPlus=1; | |
221 public Orf prevPlus; | |
222 | |
223 public float pathScoreMinus; | |
224 public int pathLengthMinus=1; | |
225 public Orf prevMinus; | |
226 | |
227 public final int type; | |
228 | |
229 /*--------------------------------------------------------------*/ | |
230 /*---------------- Static Fields ----------------*/ | |
231 /*--------------------------------------------------------------*/ | |
232 | |
233 /* for kinnercds=6 */ | |
234 // static float e1=0.1f; | |
235 // static float e2=-0.04f; | |
236 // static float e3=0.01f;//Decreasing this decreases TP but increases SNR | |
237 // | |
238 // static float f1=0.08f; | |
239 // static float f2=0.06f; | |
240 // static float f3=0.09f; | |
241 | |
242 /* for kinnercds=7 */ | |
243 static float e1=0.35f; | |
244 static float e2=-0.1f; | |
245 static float e3=-0.01f;//Decreasing this decreases TP but increases SNR | |
246 | |
247 static float f1=0.08f; | |
248 static float f2=0.02f; | |
249 static float f3=0.09f; | |
250 | |
251 } |