annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/FrameStats.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package prok;
jpayne@68 2
jpayne@68 3 import java.util.Arrays;
jpayne@68 4
jpayne@68 5 import dna.AminoAcid;
jpayne@68 6 import shared.KillSwitch;
jpayne@68 7 import shared.Parse;
jpayne@68 8 import shared.Tools;
jpayne@68 9 import structures.ByteBuilder;
jpayne@68 10
jpayne@68 11 /**
jpayne@68 12 * Stores frame-relative kmer counts for a type of genomic feature, such as a coding start site.
jpayne@68 13 * @author Brian Bushnell
jpayne@68 14 * @date Sep 24, 2018
jpayne@68 15 */
jpayne@68 16 public class FrameStats {
jpayne@68 17
jpayne@68 18 /*--------------------------------------------------------------*/
jpayne@68 19 /*---------------- Initialization ----------------*/
jpayne@68 20 /*--------------------------------------------------------------*/
jpayne@68 21
jpayne@68 22 public FrameStats(String name_, int k_, int frames_, int leftOffset_){
jpayne@68 23 name=name_;
jpayne@68 24 k=k_;
jpayne@68 25 mask=~((-1)<<(2*k));
jpayne@68 26 frames=frames_;
jpayne@68 27 kMax=1<<(2*k);
jpayne@68 28 invFrames=1.0f/frames;
jpayne@68 29 leftOffset=leftOffset_;
jpayne@68 30
jpayne@68 31 probs=new float[frames][kMax];
jpayne@68 32 countsTrue=new long[frames][kMax];
jpayne@68 33 countsFalse=new long[frames][kMax];
jpayne@68 34 counts=new long[][][] {countsFalse, countsTrue};
jpayne@68 35 }
jpayne@68 36
jpayne@68 37 /*--------------------------------------------------------------*/
jpayne@68 38 /*---------------- Methods ----------------*/
jpayne@68 39 /*--------------------------------------------------------------*/
jpayne@68 40
jpayne@68 41 public void add(int kmer, int frame, int valid){
jpayne@68 42 counts[valid][frame][kmer]++;
jpayne@68 43 validSums[valid]++;
jpayne@68 44 }
jpayne@68 45
jpayne@68 46 public boolean compatibleWith(FrameStats fs) {
jpayne@68 47 return fs.name.equals(name) && fs.leftOffset==leftOffset && fs.k==k && fs.frames==frames;
jpayne@68 48 }
jpayne@68 49
jpayne@68 50 public void clear() {
jpayne@68 51 Tools.fill(counts, 0);
jpayne@68 52 Arrays.fill(validSums, 0);
jpayne@68 53 }
jpayne@68 54
jpayne@68 55 public void setFrom(FrameStats fs) {
jpayne@68 56 assert(compatibleWith(fs)) : name+", "+frames+", "+fs.name+", "+fs.frames;
jpayne@68 57 clear();
jpayne@68 58 add(fs);
jpayne@68 59 }
jpayne@68 60
jpayne@68 61 public void add(FrameStats fs){
jpayne@68 62 assert(fs.name.equals(name));
jpayne@68 63 assert(fs.leftOffset==leftOffset);
jpayne@68 64 assert(fs.k==k);
jpayne@68 65 assert(fs.frames==frames) : name+", "+frames+", "+fs.name+", "+fs.frames;
jpayne@68 66 // for(int x=0; x<counts.length; x++) {
jpayne@68 67 // for(int y=0; y<counts[x].length; y++) {
jpayne@68 68 // for(int z=0; z<counts[x][y].length; z++) {
jpayne@68 69 // assert(fs.counts[x][y][z]>=0) : counts[x][y][z]+", "+fs.counts[x][y][z]+", "+fs.name;
jpayne@68 70 // assert(counts[x][y][z]>=0) : counts[x][y][z]+", "+fs.counts[x][y][z];
jpayne@68 71 // }
jpayne@68 72 // }
jpayne@68 73 // }
jpayne@68 74
jpayne@68 75 Tools.add(counts, fs.counts);
jpayne@68 76 Tools.add(validSums, fs.validSums);
jpayne@68 77 // for(int x=0; x<counts.length; x++) {
jpayne@68 78 // for(int y=0; y<counts[x].length; y++) {
jpayne@68 79 // for(int z=0; z<counts[x][y].length; z++) {
jpayne@68 80 // assert(fs.counts[x][y][z]>=0) : counts[x][y][z]+", "+fs.counts[x][y][z];
jpayne@68 81 // assert(counts[x][y][z]>=0) : counts[x][y][z]+", "+fs.counts[x][y][z];
jpayne@68 82 // }
jpayne@68 83 // }
jpayne@68 84 // }
jpayne@68 85 // calculate();
jpayne@68 86 }
jpayne@68 87
jpayne@68 88 public void multiplyBy(double mult) {
jpayne@68 89 Tools.multiplyBy(counts, mult);
jpayne@68 90 Tools.multiplyBy(validSums, mult);
jpayne@68 91 }
jpayne@68 92
jpayne@68 93 void calculate(){
jpayne@68 94 average=(float)((validSums[1]+1.0)/(validSums[0]+validSums[1]+1.0));
jpayne@68 95 invAvg=1.0f/average;
jpayne@68 96
jpayne@68 97 for(int a=0; a<frames; a++){
jpayne@68 98 for(int b=0; b<kMax; b++){
jpayne@68 99 long t=countsTrue[a][b];
jpayne@68 100 long f=countsFalse[a][b];
jpayne@68 101 probs[a][b]=(float)(t/(t+f+1.0))*invAvg;
jpayne@68 102 }
jpayne@68 103 }
jpayne@68 104 }
jpayne@68 105
jpayne@68 106 public float scorePoint(int point, byte[] bases){
jpayne@68 107 final int mask=~((-1)<<(2*k));
jpayne@68 108
jpayne@68 109 int kmer=0;
jpayne@68 110 int len=0;
jpayne@68 111 float score=0;
jpayne@68 112
jpayne@68 113 // outstream.println("k="+k);
jpayne@68 114 // outstream.println("mask="+mask);
jpayne@68 115
jpayne@68 116 int start=point-leftOffset;
jpayne@68 117 for(int i=start, frame=0-k+1; i<bases.length && frame<frames; i++, frame++){
jpayne@68 118 byte b=(i>=0 ? bases[i] : (byte)'A');
jpayne@68 119 int x=AminoAcid.baseToNumber[b];
jpayne@68 120 kmer=((kmer<<2)|x)&mask;
jpayne@68 121
jpayne@68 122 // outstream.println("b="+(char)b+", kmer="+kmer+", len="+(len+1)+", frame="+frame);
jpayne@68 123
jpayne@68 124 if(x>=0){
jpayne@68 125 len++;
jpayne@68 126 if(len>=k){
jpayne@68 127 float prob=probs[frame][kmer];
jpayne@68 128 float dif=prob-0.99f;
jpayne@68 129 score+=dif;
jpayne@68 130
jpayne@68 131 // if(name.equals("startStats")){
jpayne@68 132 // System.err.println("frame="+frame+" kmer="+AminoAcid.kmerToString(kmer, k)+
jpayne@68 133 // String.format(Locale.ROOT, " prob=%.4f\tdif=%.4f\tscore=%.4f", prob, dif, score)+
jpayne@68 134 // "\tvalid="+counts[1][frame][kmer]+"\tinvalid="+counts[0][frame][kmer]);
jpayne@68 135 // }
jpayne@68 136 }
jpayne@68 137 }else{len=0;}
jpayne@68 138 }
jpayne@68 139
jpayne@68 140 return score*invFrames;
jpayne@68 141 }
jpayne@68 142
jpayne@68 143 void processCDSFrames(byte[] bases, byte[] validFrames){
jpayne@68 144 if(!ProkObject.callCDS){return;}
jpayne@68 145 int kmer=0;
jpayne@68 146 int len=0;
jpayne@68 147
jpayne@68 148 for(int i=0; i<bases.length; i++){
jpayne@68 149 byte b=bases[i];
jpayne@68 150 int x=AminoAcid.baseToNumber[b];
jpayne@68 151 kmer=((kmer<<2)|x)&mask;
jpayne@68 152 if(x>=0){
jpayne@68 153 len++;
jpayne@68 154 if(len>=k){
jpayne@68 155 int vf=validFrames[i];
jpayne@68 156 for(int frame=0; frame<frames; frame++){
jpayne@68 157 int valid=vf&1;
jpayne@68 158 add(kmer, frame, valid);
jpayne@68 159 //For CDS start (0-based) of 189, i=192, j=189, vf=1, frame=0 - all as expected.
jpayne@68 160 // assert(valid==0) : "vf="+vf+", frame="+frame+", len="+len+", kmer="+AminoAcid.kmerToString(kmer, k)+", i="+i+", j="+j;
jpayne@68 161 vf=(vf>>1);
jpayne@68 162 }
jpayne@68 163 }
jpayne@68 164 }else{len=0;}
jpayne@68 165 }
jpayne@68 166 }
jpayne@68 167
jpayne@68 168 void processPoint(byte[] bases, int point, int valid){
jpayne@68 169
jpayne@68 170 //Degenerate cases where the point is at the end, possibly from a truncated gene.
jpayne@68 171 if(point<3){return;}
jpayne@68 172 if(point>=bases.length-3){return;}
jpayne@68 173
jpayne@68 174 int kmer=0;
jpayne@68 175 int len=0;
jpayne@68 176
jpayne@68 177 // outstream.println("k="+k);
jpayne@68 178 // outstream.println("mask="+mask);
jpayne@68 179
jpayne@68 180 int start=point-leftOffset;
jpayne@68 181
jpayne@68 182 int i=start, frame=0-k+1;
jpayne@68 183 while(i<0){i++; frame++;}
jpayne@68 184 for(; i<bases.length && frame<frames; i++, frame++){
jpayne@68 185 byte b=bases[i];
jpayne@68 186 int x=AminoAcid.baseToNumber[b];
jpayne@68 187 kmer=((kmer<<2)|x)&mask;
jpayne@68 188
jpayne@68 189 // outstream.println("b="+(char)b+", kmer="+kmer+", len="+(len+1)+", frame="+frame);
jpayne@68 190
jpayne@68 191 if(x>=0){
jpayne@68 192 len++;
jpayne@68 193 if(len>=k){
jpayne@68 194 add(kmer, frame, valid);
jpayne@68 195 }
jpayne@68 196 }else{len=0;}
jpayne@68 197 }
jpayne@68 198 }
jpayne@68 199
jpayne@68 200 /*--------------------------------------------------------------*/
jpayne@68 201 /*---------------- Text Methods ----------------*/
jpayne@68 202 /*--------------------------------------------------------------*/
jpayne@68 203
jpayne@68 204 public void parseData(byte[] line) {
jpayne@68 205 int a=0, b=0;
jpayne@68 206 final int valid, frame;
jpayne@68 207
jpayne@68 208 while(b<line.length && line[b]!='\t'){b++;}
jpayne@68 209 assert(b>a) : "Missing field 0: "+new String(line);
jpayne@68 210 valid=Parse.parseInt(line, a, b);
jpayne@68 211 b++;
jpayne@68 212 a=b;
jpayne@68 213
jpayne@68 214 while(b<line.length && line[b]!='\t'){b++;}
jpayne@68 215 assert(b>a) : "Missing field 1: "+new String(line);
jpayne@68 216 frame=Parse.parseInt(line, a, b);
jpayne@68 217 b++;
jpayne@68 218 a=b;
jpayne@68 219
jpayne@68 220 assert(valid==0 || valid==1);
jpayne@68 221 assert(frame>=0 && frame<frames);
jpayne@68 222 try {
jpayne@68 223 final long[] row=counts[valid][frame];
jpayne@68 224 long sum=0;
jpayne@68 225 for(int kmer=0; kmer<row.length; kmer++){
jpayne@68 226 while(b<line.length && line[b]!='\t'){b++;}
jpayne@68 227 assert(b>a) : "Missing field 1: "+new String(line);
jpayne@68 228 long count=Parse.parseLong(line, a, b);
jpayne@68 229 b++;
jpayne@68 230 a=b;
jpayne@68 231 row[kmer]=count;
jpayne@68 232 sum+=count;
jpayne@68 233 }
jpayne@68 234 validSums[valid]+=sum;
jpayne@68 235 } catch (Exception e) {
jpayne@68 236 System.err.println(new String(line)+"\n"+name);
jpayne@68 237 assert(false) : e;
jpayne@68 238 }
jpayne@68 239 }
jpayne@68 240
jpayne@68 241 @Override
jpayne@68 242 public String toString(){
jpayne@68 243 return appendTo(new ByteBuilder()).toString();
jpayne@68 244 }
jpayne@68 245
jpayne@68 246 public ByteBuilder appendTo(ByteBuilder bb){
jpayne@68 247 bb.append("#name\t").append(name).nl();
jpayne@68 248 bb.append("#k\t").append(k).nl();
jpayne@68 249 bb.append("#frames\t").append(frames).nl();
jpayne@68 250 bb.append("#offset\t").append(leftOffset).nl();
jpayne@68 251 bb.append("#valid\tframe");
jpayne@68 252 for(int i=0; i<kMax; i++){bb.tab().append(AminoAcid.kmerToString(i, k));}
jpayne@68 253 bb.nl();
jpayne@68 254 for(int a=0; a<2; a++){//valid
jpayne@68 255 for(int b=0; b<frames; b++){
jpayne@68 256 bb.append(a);
jpayne@68 257 bb.tab().append(b);
jpayne@68 258 for(int c=0; c<kMax; c++){
jpayne@68 259 bb.tab().append(counts[a][b][c]);
jpayne@68 260 }
jpayne@68 261 bb.nl();
jpayne@68 262 }
jpayne@68 263 }
jpayne@68 264 return bb;
jpayne@68 265 }
jpayne@68 266
jpayne@68 267 public ByteBuilder append0(ByteBuilder bb){
jpayne@68 268 bb.append("#name\t").append(name).nl();
jpayne@68 269 bb.append("#k\t").append(k).nl();
jpayne@68 270 bb.append("#frames\t").append(frames).nl();
jpayne@68 271 bb.append("#offset\t").append(leftOffset).nl();
jpayne@68 272 bb.append("#valid\tframe");
jpayne@68 273 for(int i=0; i<kMax; i++){bb.tab().append(AminoAcid.kmerToString(i, k));}
jpayne@68 274 bb.nl();
jpayne@68 275 for(int a=0; a<2; a++){//valid
jpayne@68 276 for(int b=0; b<frames; b++){
jpayne@68 277 bb.append(a);
jpayne@68 278 bb.tab().append(b);
jpayne@68 279 for(int c=0; c<kMax; c++){
jpayne@68 280 bb.tab().append(0);
jpayne@68 281 }
jpayne@68 282 bb.nl();
jpayne@68 283 }
jpayne@68 284 }
jpayne@68 285 return bb;
jpayne@68 286 }
jpayne@68 287
jpayne@68 288 /*--------------------------------------------------------------*/
jpayne@68 289 /*---------------- Fields ----------------*/
jpayne@68 290 /*--------------------------------------------------------------*/
jpayne@68 291
jpayne@68 292 public final String name;
jpayne@68 293 public final int k;
jpayne@68 294 public final int mask;
jpayne@68 295 public final int frames;
jpayne@68 296 public final int kMax;
jpayne@68 297 public final float invFrames;
jpayne@68 298 public final int leftOffset;
jpayne@68 299 public int rightOffset() {return frames-leftOffset-1;}
jpayne@68 300
jpayne@68 301 public final float[][] probs;
jpayne@68 302 public final long[][] countsTrue;
jpayne@68 303 public final long[][] countsFalse;
jpayne@68 304 public final long[][][] counts;
jpayne@68 305
jpayne@68 306 public final long[] validSums=KillSwitch.allocLong1D(2);
jpayne@68 307 private float average=-1;
jpayne@68 308 private float invAvg=-1;
jpayne@68 309
jpayne@68 310 }