comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/aligner/MultiStateAligner9PacBioAdapter3.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package aligner;
2
3 import java.util.Arrays;
4
5 import dna.AminoAcid;
6 import dna.ChromosomeArray;
7 import dna.Data;
8 import shared.Tools;
9 import stream.Read;
10 import stream.SiteScore;
11
12 /**
13 * Based on MSA9ts, with transform scores tweaked for PacBio. */
14 public final class MultiStateAligner9PacBioAdapter3 {
15
16
17 public MultiStateAligner9PacBioAdapter3(int maxRows_, int maxColumns_){
18 // assert(maxColumns_>=200);
19 // assert(maxRows_>=200);
20 maxRows=maxRows_;
21 maxColumns=maxColumns_;
22 packed=new int[3][maxRows+1][];
23
24 for(int i=0; i<3; i++){
25 packed[i][0]=new int[maxColumns+1];
26 packed[i][1]=new int[maxColumns+1];
27 packed[i][2]=new int[maxColumns+1];
28 for(int j=3; j<maxRows+1; j++){
29 packed[i][j]=packed[i][j-2];
30 }
31 }
32
33 insScoreArray=new int[maxRows+1];
34 delScoreArray=new int[maxColumns+1];
35 matchScoreArray=new int[maxRows+1];
36 subScoreArray=new int[maxRows+1];
37 for(int i=0; i<insScoreArray.length; i++){
38 insScoreArray[i]=(i==0 ? POINTSoff_INS :
39 i<LIMIT_FOR_COST_3 ? POINTSoff_INS2 :
40 i<LIMIT_FOR_COST_4 ? POINTSoff_INS3 : POINTSoff_INS4);
41 }
42 for(int i=0; i<delScoreArray.length; i++){
43 delScoreArray[i]=(i==0 ? POINTSoff_DEL :
44 i<LIMIT_FOR_COST_3 ? POINTSoff_DEL2 :
45 i<LIMIT_FOR_COST_4 ? POINTSoff_DEL3 : POINTSoff_DEL4);
46 }
47 for(int i=0; i<matchScoreArray.length; i++){
48 matchScoreArray[i]=(i==0 ? POINTSoff_MATCH2 : POINTSoff_MATCH);
49 }
50 for(int i=0; i<subScoreArray.length; i++){
51 subScoreArray[i]=(i==0 ? POINTSoff_SUB : i<LIMIT_FOR_COST_3 ? POINTSoff_SUB2 : POINTSoff_SUB3);
52 }
53
54 vertLimit=new int[maxRows+1];
55 horizLimit=new int[maxColumns+1];
56 Arrays.fill(vertLimit, BADoff);
57 Arrays.fill(horizLimit, BADoff);
58
59 for(int matrix=0; matrix<packed.length; matrix++){
60 for(int i=1; i<=3; i++){
61 for(int j=0; j<packed[matrix][i].length; j++){
62 packed[matrix][i][j]|=BADoff;
63 }
64 }
65
66 //Initializes the rows; not needed.
67 for(int i=1; i<=2; i++){
68
69 int score=calcInsScoreOffset(i);
70 packed[matrix][i][0]=score;
71 }
72 }
73 }
74
75
76 /** return new int[] {rows, maxC, maxS, max};
77 * Will not fill areas that cannot match minScore */
78 public final int[] fillLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){
79 return fillLimitedX(read, ref, refStartLoc, refEndLoc, minScore);
80 }
81
82
83 /** return new int[] {rows, maxC, maxS, max};
84 * Will not fill areas that cannot match minScore */
85 private final int[] fillLimitedX(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){
86 // minScore=0;
87 // assert(minScore>0);
88 rows=read.length;
89 columns=refEndLoc-refStartLoc+1;
90
91 if(/*read.length<40 || */false || minScore<=0 || columns>read.length+Tools.min(100, read.length)){
92 // assert(false) : minScore;
93 return fillUnlimited(read, ref, refStartLoc, refEndLoc);
94 }
95
96 minScore-=100; //Increases quality trivially
97
98 assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows+"\n"+
99 refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
100 assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns+"\n"+
101 refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
102
103 assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc+"\n"+
104 refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
105 assert(refEndLoc<ref.length) : "Check that values are in-bounds before calling this function: "+refEndLoc+", "+ref.length+"\n"+
106 refStartLoc+", "+refEndLoc+", "+rows+", "+maxRows+", "+columns+", "+maxColumns+"\n"+new String(read)+"\n";
107
108 // for(int x=0; x<packed.length; x++){
109 // for(int y=1; y<rows+1; y++){
110 // Arrays.fill(packed[x][y], 1, columns+1, BADoff);
111 // }
112 // }
113 for(int x=0; x<packed.length; x++){
114
115 // Arrays.fill(packed[x][1], 1, columns+1, BADoff);
116 Arrays.fill(packed[x][rows], 1, columns+1, BADoff);
117 // for(int y=1; y<rows+1; y++){
118 // Arrays.fill(packed[x][y], 1, columns+1, BADoff);
119 // }
120 }
121
122 int minGoodCol=1;
123 int maxGoodCol=columns;
124
125 final int minScore_off=(minScore<<SCOREOFFSET);
126 final int maxGain=(read.length-1)*POINTSoff_MATCH2+POINTSoff_MATCH;
127 final int floor=minScore_off-maxGain;
128 // final int subfloor=Tools.max(BADoff, floor-200*POINTSoff_MATCH2);
129 final int subfloor=floor-5*POINTSoff_MATCH2;
130 assert(subfloor>BADoff); //TODO: Actually, it needs to be substantially more.
131 assert(subfloor<minScore_off) : minScore_off+", "+floor+", "+BADoff+", "+subfloor;
132
133 if(verbose2){
134 System.out.println();
135 System.out.println("minScore="+minScore+"\t"+minScore_off);
136 System.out.println("maxGain="+(maxGain>>SCOREOFFSET)+"\t"+(maxGain));
137 System.out.println("floor="+(floor>>SCOREOFFSET)+"\t"+(floor));
138 System.out.println("subfloor="+(subfloor>>SCOREOFFSET)+"\t"+(subfloor));
139 System.out.println("BADoff="+(BADoff>>SCOREOFFSET)+"\t"+(BADoff));
140 System.out.println("maxGain="+(maxGain>>SCOREOFFSET)+"\t"+(maxGain));
141 System.out.println();
142 }
143
144 vertLimit[rows]=minScore_off;
145 for(int i=rows-1; i>=0; i--){
146 vertLimit[i]=Tools.max(vertLimit[i+1]-POINTSoff_MATCH2, floor);
147 }
148
149 horizLimit[columns]=minScore_off;
150 for(int i=columns-1; i>=0; i--){
151 horizLimit[i]=Tools.max(horizLimit[i+1]-POINTSoff_MATCH2, floor);
152 }
153
154 for(int row=1; row<=rows; row++){
155
156 {
157 int score=calcInsScoreOffset(row);
158 packed[0][row][0]=score;
159 packed[1][row][0]=score;
160 packed[2][row][0]=score;
161 }
162
163 int colStart=minGoodCol;
164 int colStop=maxGoodCol;
165
166 minGoodCol=-1;
167 maxGoodCol=-2;
168
169 final int vlimit=vertLimit[row];
170
171 if(verbose2){
172 System.out.println();
173 System.out.println("row="+row);
174 System.out.println("colStart="+colStart);
175 System.out.println("colStop="+colStop);
176 System.out.println("vlimit="+(vlimit>>SCOREOFFSET)+"\t"+(vlimit));
177 }
178
179 if(colStart<0 || colStop<colStart){break;}
180
181
182 if(colStart>1){
183 assert(row>0);
184 packed[MODE_MS][row][colStart-1]=subfloor;
185 packed[MODE_INS][row][colStart-1]=subfloor;
186 packed[MODE_DEL][row][colStart-1]=subfloor;
187 }
188
189
190 for(int col=colStart; col<=columns; col++){
191
192
193 if(verbose2){
194 System.out.println("\ncol "+col);
195 }
196
197 final byte call0=(row<2 ? (byte)'?' : read[row-2]);
198 final byte call1=read[row-1];
199 final byte ref0=(col<2 ? (byte)'!' : ref[refStartLoc+col-2]);
200 final byte ref1=ref[refStartLoc+col-1];
201
202 // final boolean match=(read[row-1]==ref[refStartLoc+col-1]);
203 // final boolean prevMatch=(row<2 || col<2 ? false : read[row-2]==ref[refStartLoc+col-2]);
204 final boolean match=(call1==ref1);
205 final boolean prevMatch=(call0==ref0);
206
207 // System.err.println("")
208
209 iterationsLimited++;
210 final int limit=Tools.max(vlimit, horizLimit[col]);
211 final int limit3=Tools.max(floor, (match ? limit-POINTSoff_MATCH2 : limit-POINTSoff_SUB3));
212
213 final int delNeeded=Tools.max(0, row-col-1);
214 final int insNeeded=Tools.max(0, (rows-row)-(columns-col)-1);
215
216 final int delPenalty=calcDelScoreOffset(delNeeded);
217 final int insPenalty=calcInsScoreOffset(insNeeded);
218
219
220 final int scoreFromDiag_MS=packed[MODE_MS][row-1][col-1]&SCOREMASK;
221 final int scoreFromDel_MS=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
222 final int scoreFromIns_MS=packed[MODE_INS][row-1][col-1]&SCOREMASK;
223
224 final int scoreFromDiag_DEL=packed[MODE_MS][row][col-1]&SCOREMASK;
225 final int scoreFromDel_DEL=packed[MODE_DEL][row][col-1]&SCOREMASK;
226
227 final int scoreFromDiag_INS=packed[MODE_MS][row-1][col]&SCOREMASK;
228 final int scoreFromIns_INS=packed[MODE_INS][row-1][col]&SCOREMASK;
229
230 // if(scoreFromDiag_MS<limit3 && scoreFromDel_MS<limit3 && scoreFromIns_MS<limit3
231 // && scoreFromDiag_DEL<limit && scoreFromDel_DEL<limit && scoreFromDiag_INS<limit && scoreFromIns_INS<limit){
232 // iterationsLimited--; //A "fast" iteration
233 // }
234
235 if((scoreFromDiag_MS<=limit3 && scoreFromDel_MS<=limit3 && scoreFromIns_MS<=limit3)){
236 packed[MODE_MS][row][col]=subfloor;
237 }else{//Calculate match and sub scores
238 final int streak=(packed[MODE_MS][row-1][col-1]&TIMEMASK);
239
240 {//Calculate match/sub score
241
242 int score;
243 int time;
244 byte prevState;
245
246 if(match){
247
248 int scoreMS=scoreFromDiag_MS+(prevMatch ? POINTSoff_MATCH2 : POINTSoff_MATCH);
249 int scoreD=scoreFromDel_MS+POINTSoff_MATCH;
250 int scoreI=scoreFromIns_MS+POINTSoff_MATCH;
251
252 // byte prevState;
253 if(scoreMS>=scoreD && scoreMS>=scoreI){
254 score=scoreMS;
255 time=(prevMatch ? streak+1 : 1);
256 prevState=MODE_MS;
257 }else if(scoreD>=scoreI){
258 score=scoreD;
259 time=1;
260 prevState=MODE_DEL;
261 }else{
262 score=scoreI;
263 time=1;
264 prevState=MODE_INS;
265 }
266
267 }else{
268
269 int scoreMS;
270 if(ref1!='N' && call1!='N'){
271 scoreMS=scoreFromDiag_MS+(prevMatch ? (streak<=1 ? POINTSoff_SUBR : POINTSoff_SUB) : subScoreArray[streak]);
272 }else{
273 scoreMS=scoreFromDiag_MS+POINTSoff_NOCALL;
274 }
275
276 int scoreD=scoreFromDel_MS+POINTSoff_SUB; //+2 to move it as close as possible to the deletion / insertion
277 int scoreI=scoreFromIns_MS+POINTSoff_SUB;
278
279 if(scoreMS>=scoreD && scoreMS>=scoreI){
280 score=scoreMS;
281 time=(prevMatch ? 1 : streak+1);
282 // time=(prevMatch ? (streak==1 ? 3 : 1) : streak+1);
283 prevState=MODE_MS;
284 }else if(scoreD>=scoreI){
285 score=scoreD;
286 time=1;
287 prevState=MODE_DEL;
288 }else{
289 score=scoreI;
290 time=1;
291 prevState=MODE_INS;
292 }
293 }
294
295 final int limit2;
296 if(delNeeded>0){
297 limit2=limit-delPenalty;
298 }else if(insNeeded>0){
299 limit2=limit-insPenalty;
300 }else{
301 limit2=limit;
302 }
303 assert(limit2>=limit);
304
305 if(verbose2){System.err.println("MS: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));}
306
307 if(score>=limit2){
308 maxGoodCol=col;
309 if(minGoodCol<0){minGoodCol=col;}
310 }else{
311 score=subfloor;
312 }
313
314 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
315 assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead";
316 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
317 // packed[MODE_MS][row][col]=(score|prevState|time);
318 packed[MODE_MS][row][col]=(score|time);
319 assert((score&SCOREMASK)==score);
320 // assert((prevState&MODEMASK)==prevState);
321 assert((time&TIMEMASK)==time);
322 }
323 }
324
325 if((scoreFromDiag_DEL<=limit && scoreFromDel_DEL<=limit)){
326 // assert((scoreFromDiag_DEL<=limit && scoreFromDel_DEL<=limit)) : scoreFromDiag_DEL+", "+row;
327 packed[MODE_DEL][row][col]=subfloor;
328 }else{//Calculate DEL score
329
330 final int streak=packed[MODE_DEL][row][col-1]&TIMEMASK;
331
332 int scoreMS=scoreFromDiag_DEL+POINTSoff_DEL;
333 int scoreD=scoreFromDel_DEL+delScoreArray[streak];
334 // int scoreI=scoreFromIns+POINTSoff_DEL;
335
336
337 if(ref1=='N'){
338 scoreMS+=POINTSoff_DEL_REF_N;
339 scoreD+=POINTSoff_DEL_REF_N;
340 }
341
342 //if(match){scoreMS=subfloor;}
343
344 int score;
345 int time;
346 byte prevState;
347 if(scoreMS>=scoreD){
348 score=scoreMS;
349 time=1;
350 prevState=MODE_MS;
351 }else{
352 score=scoreD;
353 time=streak+1;
354 prevState=MODE_DEL;
355 }
356
357 final int limit2;
358 if(insNeeded>0){
359 limit2=limit-insPenalty;
360 }else if(delNeeded>0){
361 limit2=limit-calcDelScoreOffset(time+delNeeded)+calcDelScoreOffset(time);
362 }else{
363 limit2=limit;
364 }
365 assert(limit2>=limit);
366 if(verbose2){System.err.println("DEL: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));}
367
368 if(score>=limit2){
369 maxGoodCol=col;
370 if(minGoodCol<0){minGoodCol=col;}
371 }else{
372 score=subfloor;
373 }
374
375 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
376 assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead";
377 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
378 // packed[MODE_DEL][row][col]=(score|prevState|time);
379 packed[MODE_DEL][row][col]=(score|time);
380 assert((score&SCOREMASK)==score);
381 // assert((prevState&MODEMASK)==prevState);
382 assert((time&TIMEMASK)==time);
383 }
384
385 if(scoreFromDiag_INS<=limit && scoreFromIns_INS<=limit){
386 packed[MODE_INS][row][col]=subfloor;
387 }else{//Calculate INS score
388
389 final int streak=packed[MODE_INS][row-1][col]&TIMEMASK;
390
391 int scoreMS=scoreFromDiag_INS+POINTSoff_INS;
392 // int scoreD=scoreFromDel+POINTSoff_INS;
393 int scoreI=scoreFromIns_INS+insScoreArray[streak];
394
395
396 // System.err.println("("+row+","+col+")\t"+scoreFromDiag+"+"+POINTSoff_INS+"="+scoreM+", "+
397 // scoreFromSub+"+"+POINTSoff_INS+"="+scoreS+", "
398 // +scoreD+", "+scoreFromIns+"+"+
399 // (streak==0 ? POINTSoff_INS : streak<LIMIT_FOR_COST_3 ? POINTSoff_INS2 : POINTSoff_INS3)+"="+scoreI);
400
401 //if(match){scoreMS=subfloor;}
402
403 int score;
404 int time;
405 byte prevState;
406 if(scoreMS>=scoreI){
407 score=scoreMS;
408 time=1;
409 prevState=MODE_MS;
410 }else{
411 score=scoreI;
412 time=streak+1;
413 prevState=MODE_INS;
414 }
415
416 final int limit2;
417 if(delNeeded>0){
418 limit2=limit-delPenalty;
419 }else if(insNeeded>0){
420 limit2=limit-calcInsScoreOffset(time+insNeeded)+calcInsScoreOffset(time);
421 }else{
422 limit2=limit;
423 }
424 assert(limit2>=limit);
425
426 if(verbose2){System.err.println("INS: \tlimit2="+(limit2>>SCOREOFFSET)+"\t, score="+(score>>SCOREOFFSET));}
427 if(score>=limit2){
428 maxGoodCol=col;
429 if(minGoodCol<0){minGoodCol=col;}
430 }else{
431 score=subfloor;
432 }
433
434 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
435 assert(score>=MINoff_SCORE || score==BADoff) : "Score overflow - use MSA2 instead";
436 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
437 // packed[MODE_INS][row][col]=(score|prevState|time);
438 packed[MODE_INS][row][col]=(score|time);
439 assert((score&SCOREMASK)==score);
440 // assert((prevState&MODEMASK)==prevState);
441 assert((time&TIMEMASK)==time);
442 }
443
444
445 if(col>=colStop){
446 if(col>colStop && maxGoodCol<col){break;}
447 if(row>1){
448 packed[MODE_MS][row-1][col+1]=subfloor;
449 packed[MODE_INS][row-1][col+1]=subfloor;
450 packed[MODE_DEL][row-1][col+1]=subfloor;
451 }
452 }
453 }
454 }
455
456
457 int maxCol=-1;
458 int maxState=-1;
459 int maxScore=Integer.MIN_VALUE;
460
461 for(int state=0; state<packed.length; state++){
462 for(int col=1; col<=columns; col++){
463 int x=packed[state][rows][col]&SCOREMASK;
464 if(x>maxScore){
465 maxScore=x;
466 maxCol=col;
467 maxState=state;
468 }
469 }
470 }
471
472 assert(maxScore>=BADoff);
473 // if(maxScore==BADoff){
474 // return null;
475 // }
476 // if(maxScore<floor){
477 // return null;
478 // }
479 if(maxScore<minScore_off){
480 return null;
481 }
482
483 maxScore>>=SCOREOFFSET;
484
485 // System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore);
486 return new int[] {rows, maxCol, maxState, maxScore};
487 }
488
489
490 /** return new int[] {rows, maxC, maxS, max};
491 * Does not require a min score (ie, same as old method) */
492 private final int[] fillUnlimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc){
493 rows=read.length;
494 columns=refEndLoc-refStartLoc+1;
495
496 final int maxGain=(read.length-1)*POINTSoff_MATCH2+POINTSoff_MATCH;
497 final int subfloor=0-2*maxGain;
498 assert(subfloor>BADoff && subfloor*2>BADoff); //TODO: Actually, it needs to be substantially more.
499
500 //temporary, for finding a bug
501 if(rows>maxRows || columns>maxColumns){
502 throw new RuntimeException("rows="+rows+", maxRows="+maxRows+", cols="+columns+", maxCols="+maxColumns+"\n"+new String(read)+"\n");
503 }
504
505 assert(rows<=maxRows) : "Check that values are in-bounds before calling this function: "+rows+", "+maxRows;
506 assert(columns<=maxColumns) : "Check that values are in-bounds before calling this function: "+columns+", "+maxColumns;
507
508 assert(refStartLoc>=0) : "Check that values are in-bounds before calling this function: "+refStartLoc;
509 assert(refEndLoc<ref.length) : "Check that values are in-bounds before calling this function: "+refEndLoc+", "+ref.length;
510
511 for(int row=1; row<=rows; row++){
512
513 {
514 int score=calcInsScoreOffset(row);
515 packed[0][row][0]=score;
516 packed[1][row][0]=score;
517 packed[2][row][0]=score;
518 }
519
520 // int minc=max(1, row-20);
521 // int maxc=min(columns, row+20);
522
523 for(int col=1; col<=columns; col++){
524 iterationsUnlimited++;
525
526 // final boolean match=(read[row-1]==ref[refStartLoc+col-1]);
527 // final boolean prevMatch=(row<2 || col<2 ? false : read[row-2]==ref[refStartLoc+col-2]);
528
529 final byte call0=(row<2 ? (byte)'?' : read[row-2]);
530 final byte call1=read[row-1];
531 final byte ref0=(col<2 ? (byte)'!' : ref[refStartLoc+col-2]);
532 final byte ref1=ref[refStartLoc+col-1];
533
534 final boolean match=(call1==ref1);
535 final boolean prevMatch=(call0==ref0);
536
537 {//Calculate match and sub scores
538
539 final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK;
540 final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
541 final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK;
542 final int streak=(packed[MODE_MS][row-1][col-1]&TIMEMASK);
543
544 {//Calculate match/sub score
545
546 if(match){
547
548 int scoreMS=scoreFromDiag+(prevMatch ? POINTSoff_MATCH2 : POINTSoff_MATCH);
549 int scoreD=scoreFromDel+POINTSoff_MATCH;
550 int scoreI=scoreFromIns+POINTSoff_MATCH;
551
552 int score;
553 int time;
554 // byte prevState;
555 if(scoreMS>=scoreD && scoreMS>=scoreI){
556 score=scoreMS;
557 time=(prevMatch ? streak+1 : 1);
558 // prevState=MODE_MS;
559 }else if(scoreD>=scoreI){
560 score=scoreD;
561 time=1;
562 // prevState=MODE_DEL;
563 }else{
564 score=scoreI;
565 time=1;
566 // prevState=MODE_INS;
567 }
568
569 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
570 assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
571 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
572 // packed[MODE_MS][row][col]=(score|prevState|time);
573 packed[MODE_MS][row][col]=(score|time);
574 assert((score&SCOREMASK)==score);
575 // assert((prevState&MODEMASK)==prevState);
576 assert((time&TIMEMASK)==time);
577
578 }else{
579
580 int scoreMS;
581 if(ref1!='N' && call1!='N'){
582 scoreMS=scoreFromDiag+(prevMatch ? (streak<=1 ? POINTSoff_SUBR : POINTSoff_SUB) : subScoreArray[streak]);
583 }else{
584 scoreMS=scoreFromDiag+POINTSoff_NOCALL;
585 }
586
587 int scoreD=scoreFromDel+POINTSoff_SUB; //+2 to move it as close as possible to the deletion / insertion
588 int scoreI=scoreFromIns+POINTSoff_SUB;
589
590 int score;
591 int time;
592 byte prevState;
593 if(scoreMS>=scoreD && scoreMS>=scoreI){
594 score=scoreMS;
595 time=(prevMatch ? 1 : streak+1);
596 // time=(prevMatch ? (streak==1 ? 3 : 1) : streak+1);
597 prevState=MODE_MS;
598 }else if(scoreD>=scoreI){
599 score=scoreD;
600 time=1;
601 prevState=MODE_DEL;
602 }else{
603 score=scoreI;
604 time=1;
605 prevState=MODE_INS;
606 }
607
608 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
609 assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
610 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
611 // packed[MODE_MS][row][col]=(score|prevState|time);
612 packed[MODE_MS][row][col]=(score|time);
613 assert((score&SCOREMASK)==score);
614 // assert((prevState&MODEMASK)==prevState);
615 assert((time&TIMEMASK)==time);
616 }
617 }
618 }
619
620 {//Calculate DEL score
621
622 final int streak=packed[MODE_DEL][row][col-1]&TIMEMASK;
623
624 final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK;
625 final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK;
626
627 int scoreMS=scoreFromDiag+POINTSoff_DEL;
628 int scoreD=scoreFromDel+delScoreArray[streak];
629 // int scoreI=scoreFromIns+POINTSoff_DEL;
630
631 if(ref1=='N'){
632 scoreMS+=POINTSoff_DEL_REF_N;
633 scoreD+=POINTSoff_DEL_REF_N;
634 }
635
636 //if(match){scoreMS=subfloor;}
637
638 int score;
639 int time;
640 byte prevState;
641 if(scoreMS>=scoreD){
642 score=scoreMS;
643 time=1;
644 prevState=MODE_MS;
645 }else{
646 score=scoreD;
647 time=streak+1;
648 prevState=MODE_DEL;
649 }
650
651 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
652 assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
653 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
654 // packed[MODE_DEL][row][col]=(score|prevState|time);
655 packed[MODE_DEL][row][col]=(score|time);
656 assert((score&SCOREMASK)==score);
657 // assert((prevState&MODEMASK)==prevState);
658 assert((time&TIMEMASK)==time);
659 }
660
661 {//Calculate INS score
662
663 final int streak=packed[MODE_INS][row-1][col]&TIMEMASK;
664
665 final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK;
666 final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK;
667
668 int scoreMS=scoreFromDiag+POINTSoff_INS;
669 // int scoreD=scoreFromDel+POINTSoff_INS;
670 int scoreI=scoreFromIns+insScoreArray[streak];
671
672 // System.err.println("("+row+","+col+")\t"+scoreFromDiag+"+"+POINTSoff_INS+"="+scoreM+", "+
673 // scoreFromSub+"+"+POINTSoff_INS+"="+scoreS+", "
674 // +scoreD+", "+scoreFromIns+"+"+
675 // (streak==0 ? POINTSoff_INS : streak<LIMIT_FOR_COST_3 ? POINTSoff_INS2 : POINTSoff_INS3)+"="+scoreI);
676
677 //if(match){scoreMS=subfloor;}
678
679 int score;
680 int time;
681 byte prevState;
682 if(scoreMS>=scoreI){
683 score=scoreMS;
684 time=1;
685 prevState=MODE_MS;
686 }else{
687 score=scoreI;
688 time=streak+1;
689 prevState=MODE_INS;
690 }
691
692 assert(time<=MAX_TIME);//if(time>MAX_TIME){time=MAX_TIME-3;}
693 assert(score>=MINoff_SCORE) : "Score overflow - use MSA2 instead";
694 assert(score<=MAXoff_SCORE) : "Score overflow - use MSA2 instead";
695 // packed[MODE_INS][row][col]=(score|prevState|time);
696 packed[MODE_INS][row][col]=(score|time);
697 assert((score&SCOREMASK)==score);
698 // assert((prevState&MODEMASK)==prevState);
699 assert((time&TIMEMASK)==time);
700 }
701 }
702 }
703
704
705 int maxCol=-1;
706 int maxState=-1;
707 int maxScore=Integer.MIN_VALUE;
708
709 for(int state=0; state<packed.length; state++){
710 for(int col=1; col<=columns; col++){
711 int x=packed[state][rows][col]&SCOREMASK;
712 if(x>maxScore){
713 maxScore=x;
714 maxCol=col;
715 maxState=state;
716 }
717 }
718 }
719 maxScore>>=SCOREOFFSET;
720
721 // System.err.println("Returning "+rows+", "+maxCol+", "+maxState+", "+maxScore);
722 return new int[] {rows, maxCol, maxState, maxScore};
723 }
724
725
726 /** Generates the match string */
727 public final byte[] traceback(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int row, int col, int state){
728 // assert(false);
729 assert(refStartLoc<=refEndLoc) : refStartLoc+", "+refEndLoc;
730 assert(row==rows);
731
732 byte[] out=new byte[row+col-1]; //TODO if an out of bound crash occurs, try removing the "-1".
733 int outPos=0;
734
735 if(state==MODE_INS){
736 //TODO ? Maybe not needed.
737 }
738
739 while(row>0 && col>0){
740
741 // byte prev0=(byte)(packed[state][row][col]&MODEMASK);
742
743 final int time=packed[state][row][col]&TIMEMASK;
744 final byte prev;
745
746 // System.err.println("state="+state+", prev="+prev+", row="+row+", col="+col+", score="+scores[state][row][col]);
747
748 if(state==MODE_MS){
749 if(time>1){prev=(byte)state;}
750 else{
751 final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK;
752 final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
753 final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK;
754 if(scoreFromDiag>=scoreFromDel && scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
755 else if(scoreFromDel>=scoreFromIns){prev=MODE_DEL;}
756 else{prev=MODE_INS;}
757 }
758
759 byte c=read[row-1];
760 byte r=ref[refStartLoc+col-1];
761 if(c==r){
762 out[outPos]='m';
763 }else{
764 if(!AminoAcid.isFullyDefined(c)){
765 out[outPos]='N';
766 }else if(!AminoAcid.isFullyDefined(r)){
767 // out[outPos]='X';
768 out[outPos]='N';
769 }else{
770 out[outPos]='S';
771 }
772 }
773
774 row--;
775 col--;
776 }else if(state==MODE_DEL){
777 if(time>1){prev=(byte)state;}
778 else{
779 final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK;
780 final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK;
781 if(scoreFromDiag>=scoreFromDel){prev=MODE_MS;}
782 else{prev=MODE_DEL;}
783 }
784
785 byte r=ref[refStartLoc+col-1];
786 out[outPos]='D';
787
788 col--;
789 }else{
790 if(time>1){prev=(byte)state;}
791 else{
792 final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK;
793 final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK;
794 if(scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
795 else{prev=MODE_INS;}
796 }
797
798 assert(state==MODE_INS) : state;
799 if(col==0){
800 out[outPos]='X';
801 }else if(col>=columns){
802 out[outPos]='Y';
803 }else{
804 out[outPos]='I';
805 }
806 row--;
807 }
808
809 // assert(prev==prev0);
810 state=prev;
811 outPos++;
812 }
813
814 assert(row==0 || col==0);
815 if(col!=row){
816 while(row>0){
817 out[outPos]='X';
818 outPos++;
819 row--;
820 col--;
821 }
822 if(col>0){
823 //do nothing
824 }
825 }
826
827
828 //Shrink and reverse the string
829 byte[] out2=new byte[outPos];
830 for(int i=0; i<outPos; i++){
831 out2[i]=out[outPos-i-1];
832 }
833 out=null;
834
835 return out2;
836 }
837
838 /** @return {score, bestRefStart, bestRefStop} */
839 public final int[] score(final byte[] read, final byte[] ref, final int refStartLoc, final int refEndLoc,
840 final int maxRow, final int maxCol, final int maxState){
841
842 int row=maxRow;
843 int col=maxCol;
844 int state=maxState;
845
846 assert(maxState>=0 && maxState<packed.length) :
847 maxState+", "+maxRow+", "+maxCol+"\n"+new String(read)+"\n"+toString(ref, refStartLoc, refEndLoc);
848 assert(maxRow>=0 && maxRow<packed[0].length) :
849 maxState+", "+maxRow+", "+maxCol+"\n"+new String(read)+"\n"+toString(ref, refStartLoc, refEndLoc);
850 assert(maxCol>=0 && maxCol<packed[0][0].length) :
851 maxState+", "+maxRow+", "+maxCol+"\n"+new String(read)+"\n"+toString(ref, refStartLoc, refEndLoc);
852
853 int score=packed[maxState][maxRow][maxCol]&SCOREMASK; //Or zero, if it is to be recalculated
854
855 if(row<rows){
856 int difR=rows-row;
857 int difC=columns-col;
858
859 while(difR>difC){
860 score+=POINTSoff_NOREF;
861 difR--;
862 }
863
864 row+=difR;
865 col+=difR;
866
867 }
868
869 assert(refStartLoc<=refEndLoc);
870 assert(row==rows);
871
872
873 final int bestRefStop=refStartLoc+col-1;
874
875 while(row>0 && col>0){
876 // System.err.println("state="+state+", row="+row+", col="+col);
877
878
879
880 // byte prev0=(byte)(packed[state][row][col]&MODEMASK);
881
882 final int time=packed[state][row][col]&TIMEMASK;
883 final byte prev;
884
885 if(state==MODE_MS){
886 if(time>1){prev=(byte)state;}
887 else{
888 final int scoreFromDiag=packed[MODE_MS][row-1][col-1]&SCOREMASK;
889 final int scoreFromDel=packed[MODE_DEL][row-1][col-1]&SCOREMASK;
890 final int scoreFromIns=packed[MODE_INS][row-1][col-1]&SCOREMASK;
891 if(scoreFromDiag>=scoreFromDel && scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
892 else if(scoreFromDel>=scoreFromIns){prev=MODE_DEL;}
893 else{prev=MODE_INS;}
894 }
895 row--;
896 col--;
897 }else if(state==MODE_DEL){
898 if(time>1){prev=(byte)state;}
899 else{
900 final int scoreFromDiag=packed[MODE_MS][row][col-1]&SCOREMASK;
901 final int scoreFromDel=packed[MODE_DEL][row][col-1]&SCOREMASK;
902 if(scoreFromDiag>=scoreFromDel){prev=MODE_MS;}
903 else{prev=MODE_DEL;}
904 }
905 col--;
906 }else{
907 assert(state==MODE_INS);
908 if(time>1){prev=(byte)state;}
909 else{
910 final int scoreFromDiag=packed[MODE_MS][row-1][col]&SCOREMASK;
911 final int scoreFromIns=packed[MODE_INS][row-1][col]&SCOREMASK;
912 if(scoreFromDiag>=scoreFromIns){prev=MODE_MS;}
913 else{prev=MODE_INS;}
914 }
915 row--;
916 }
917
918 if(col<0){
919 System.err.println(row);
920 break; //prevents an out of bounds access
921
922 }
923
924 // assert(prev==prev0);
925 state=prev;
926
927 // System.err.println("state2="+state+", row2="+row+", col2="+col+"\n");
928 }
929 // assert(false) : row+", "+col;
930 if(row>col){
931 col-=row;
932 }
933
934 final int bestRefStart=refStartLoc+col;
935
936 score>>=SCOREOFFSET;
937 int[] rvec;
938 if(bestRefStart<refStartLoc || bestRefStop>refEndLoc){ //Suggest extra padding in cases of overflow
939 int padLeft=Tools.max(0, refStartLoc-bestRefStart);
940 int padRight=Tools.max(0, bestRefStop-refEndLoc);
941 rvec=new int[] {score, bestRefStart, bestRefStop, padLeft, padRight};
942 }else{
943 rvec=new int[] {score, bestRefStart, bestRefStop};
944 }
945 return rvec;
946 }
947
948
949 /** Will not fill areas that cannot match minScore.
950 * @return {score, bestRefStart, bestRefStop} */
951 public final int[] fillAndScoreLimited(byte[] read, byte[] ref, int refStartLoc, int refEndLoc, int minScore){
952 int a=Tools.max(0, refStartLoc);
953 int b=Tools.min(ref.length-1, refEndLoc);
954 assert(b>=a);
955
956 int[] score;
957
958 if(b-a>=maxColumns){
959 System.err.println("Warning: Max alignment columns exceeded; restricting range. "+(b-a+1)+" > "+maxColumns);
960 assert(false) : refStartLoc+", "+refEndLoc;
961 b=Tools.min(ref.length-1, a+maxColumns-1);
962 }
963 int[] max=fillLimited(read, ref, a, b, minScore);
964 score=(max==null ? null : score(read, ref, a, b, max[0], max[1], max[2]));
965
966 return score;
967 }
968
969
970
971 public final static int scoreNoIndels(byte[] read, SiteScore ss){
972 ChromosomeArray cha=Data.getChromosome(ss.chrom);
973 return scoreNoIndels(read, cha.array, ss.start, ss);
974 }
975
976 public final static int scoreNoIndels(byte[] read, final int chrom, final int refStart){
977 ChromosomeArray cha=Data.getChromosome(chrom);
978 return scoreNoIndels(read, cha.array, refStart, null);
979 }
980
981 public final static int scoreNoIndels(byte[] read, SiteScore ss, byte[] baseScores){
982 ChromosomeArray cha=Data.getChromosome(ss.chrom);
983 return scoreNoIndels(read, cha.array, baseScores, ss.start, ss);
984 }
985
986 public final static int scoreNoIndels(byte[] read, final int chrom, final int refStart, byte[] baseScores){
987 ChromosomeArray cha=Data.getChromosome(chrom);
988 return scoreNoIndels(read, cha.array, baseScores, refStart, null);
989 }
990
991 public final static int scoreNoIndels(byte[] read, byte[] ref, final int refStart, final SiteScore ss){
992
993 int score=0;
994 int mode=-1;
995 int timeInMode=0;
996
997 //This block handles cases where the read runs outside the reference
998 //Of course, padding the reference with 'N' would be better, but...
999 int readStart=0;
1000 int readStop=read.length;
1001 final int refStop=refStart+read.length;
1002 boolean semiperfect=true;
1003 int norefs=0;
1004
1005 if(refStart<0){
1006 readStart=0-refStart;
1007 score+=POINTS_NOREF*readStart;
1008 norefs+=readStart;
1009 }
1010 if(refStop>ref.length){
1011 int dif=(refStop-ref.length);
1012 readStop-=dif;
1013 score+=POINTS_NOREF*dif;
1014 norefs+=dif;
1015 }
1016
1017 // if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1018
1019 for(int i=readStart; i<readStop; i++){
1020 byte c=read[i];
1021 byte r=ref[refStart+i];
1022
1023 if(c==r && c!='N'){
1024 if(mode==MODE_MS){
1025 timeInMode++;
1026 score+=POINTS_MATCH2;
1027 }else{
1028 timeInMode=0;
1029 score+=POINTS_MATCH;
1030 }
1031 mode=MODE_MS;
1032 }else if(c<0 || c=='N'){
1033 score+=POINTS_NOCALL;
1034 semiperfect=false;
1035 }else if(r<0 || r=='N'){
1036 score+=POINTS_NOREF;
1037 norefs++;
1038 }else{
1039 if(mode==MODE_SUB){timeInMode++;}
1040 else{timeInMode=0;}
1041
1042 if(timeInMode==0){score+=POINTS_SUB;}
1043 else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1044 else{score+=POINTS_SUB3;}
1045 mode=MODE_SUB;
1046 semiperfect=false;
1047 }
1048 }
1049
1050 if(semiperfect && ss!=null){ss.semiperfect=((ss.stop==ss.start+read.length-1) && (norefs<=read.length/2));}
1051
1052 return score;
1053 }
1054
1055
1056 public final static int scoreNoIndels(byte[] read, byte[] ref, byte[] baseScores, final int refStart, SiteScore ss){
1057
1058 int score=0;
1059 int mode=-1;
1060 int timeInMode=0;
1061 int norefs=0;
1062
1063 //This block handles cases where the read runs outside the reference
1064 //Of course, padding the reference with 'N' would be better, but...
1065 int readStart=0;
1066 int readStop=read.length;
1067 final int refStop=refStart+read.length;
1068 boolean semiperfect=true;
1069
1070 if(refStart<0){
1071 readStart=0-refStart;
1072 score+=POINTS_NOREF*readStart;
1073 norefs+=readStart;
1074 }
1075 if(refStop>ref.length){
1076 int dif=(refStop-ref.length);
1077 readStop-=dif;
1078 score+=POINTS_NOREF*dif;
1079 norefs+=dif;
1080 }
1081
1082 // if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1083
1084 for(int i=readStart; i<readStop; i++){
1085 byte c=read[i];
1086 byte r=ref[refStart+i];
1087
1088 if(c==r && c!='N'){
1089 if(mode==MODE_MS){
1090 timeInMode++;
1091 score+=POINTS_MATCH2;
1092 }else{
1093 timeInMode=0;
1094 score+=POINTS_MATCH;
1095 }
1096 score+=baseScores[i];
1097 mode=MODE_MS;
1098 }else if(c<0 || c=='N'){
1099 score+=POINTS_NOCALL;
1100 semiperfect=false;
1101 }else if(r<0 || r=='N'){
1102 score+=POINTS_NOREF;
1103 norefs++;
1104 }else{
1105 if(mode==MODE_SUB){timeInMode++;}
1106 else{timeInMode=0;}
1107
1108 if(timeInMode==0){score+=POINTS_SUB;}
1109 else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1110 else{score+=POINTS_SUB3;}
1111 mode=MODE_SUB;
1112 semiperfect=false;
1113 }
1114 }
1115
1116 if(semiperfect && ss!=null){ss.semiperfect=((ss.stop==ss.start+read.length-1) && (norefs<=read.length/2));}
1117 assert(Read.CHECKSITE(ss, read, -1));
1118
1119 return score;
1120 }
1121
1122
1123 public final static int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, byte[] baseScores, final int refStart, byte[][] matchReturn){
1124 int score=0;
1125 int mode=-1;
1126 int timeInMode=0;
1127
1128 assert(refStart<=ref.length) : refStart+", "+ref.length;
1129
1130 //This block handles cases where the read runs outside the reference
1131 //Of course, padding the reference with 'N' would be better, but...
1132 int readStart=0;
1133 int readStop=read.length;
1134 final int refStop=refStart+read.length;
1135 if(refStart<0){
1136 readStart=0-refStart;
1137 score+=POINTS_NOREF*readStart;
1138 }
1139 if(refStop>ref.length){
1140 int dif=(refStop-ref.length);
1141 System.err.println("dif="+dif+", ref.length="+ref.length+", refStop="+refStop);
1142 readStop-=dif;
1143 score+=POINTS_NOREF*dif;
1144 }
1145 assert(refStart+readStop<=ref.length) : "readStart="+readStart+", readStop="+readStop+
1146 ", refStart="+refStart+", refStop="+refStop+", ref.length="+ref.length+", read.length="+read.length;
1147
1148 assert(matchReturn!=null);
1149 assert(matchReturn.length==1);
1150 if(matchReturn[0]==null || matchReturn[0].length!=read.length){
1151 assert(matchReturn[0]==null || matchReturn[0].length<read.length) : matchReturn[0].length+"!="+read.length;
1152 matchReturn[0]=new byte[read.length];
1153 }
1154 final byte[] match=matchReturn[0];
1155
1156 // if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1157
1158 for(int i=readStart; i<readStop; i++){
1159 byte c=read[i];
1160 byte r=ref[refStart+i];
1161
1162 assert(r!='.' && c!='.');
1163
1164 if(c==r && c!='N'){
1165 if(mode==MODE_MS){
1166 timeInMode++;
1167 score+=POINTS_MATCH2;
1168 }else{
1169 timeInMode=0;
1170 score+=POINTS_MATCH;
1171 }
1172 score+=baseScores[i];
1173 match[i]='m';
1174 mode=MODE_MS;
1175 }else if(c<0 || c=='N'){
1176 score+=POINTS_NOCALL;
1177 match[i]='N';
1178 }else if(r<0 || r=='N'){
1179 score+=POINTS_NOREF;
1180 // match[i]='m';
1181 match[i]='N';
1182 }else{
1183 match[i]='S';
1184 if(mode==MODE_SUB){timeInMode++;}
1185 else{timeInMode=0;}
1186
1187 if(timeInMode==0){score+=POINTS_SUB;}
1188 else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1189 else{score+=POINTS_SUB3;}
1190 mode=MODE_SUB;
1191 }
1192 }
1193
1194 return score;
1195 }
1196
1197
1198 public final static int scoreNoIndelsAndMakeMatchString(byte[] read, byte[] ref, final int refStart, byte[][] matchReturn){
1199 int score=0;
1200 int mode=-1;
1201 int timeInMode=0;
1202
1203 assert(refStart<=ref.length) : refStart+", "+ref.length;
1204
1205 //This block handles cases where the read runs outside the reference
1206 //Of course, padding the reference with 'N' would be better, but...
1207 int readStart=0;
1208 int readStop=read.length;
1209 final int refStop=refStart+read.length;
1210 if(refStart<0){
1211 readStart=0-refStart;
1212 score+=POINTS_NOREF*readStart;
1213 }
1214 if(refStop>ref.length){
1215 int dif=(refStop-ref.length);
1216 System.err.println("dif="+dif+", ref.length="+ref.length+", refStop="+refStop);
1217 readStop-=dif;
1218 score+=POINTS_NOREF*dif;
1219 }
1220 assert(refStart+readStop<=ref.length) : "readStart="+readStart+", readStop="+readStop+
1221 ", refStart="+refStart+", refStop="+refStop+", ref.length="+ref.length+", read.length="+read.length;
1222
1223 assert(matchReturn!=null);
1224 assert(matchReturn.length==1);
1225 if(matchReturn[0]==null || matchReturn[0].length!=read.length){
1226 assert(matchReturn[0]==null || matchReturn[0].length<read.length) : matchReturn[0].length+"!="+read.length;
1227 matchReturn[0]=new byte[read.length];
1228 }
1229 final byte[] match=matchReturn[0];
1230
1231 // if(refStart<0 || refStart+read.length>ref.length){return -99999;} //No longer needed.
1232
1233 for(int i=readStart; i<readStop; i++){
1234 byte c=read[i];
1235 byte r=ref[refStart+i];
1236
1237 assert(r!='.' && c!='.');
1238
1239 if(c==r && c!='N'){
1240 if(mode==MODE_MS){
1241 timeInMode++;
1242 score+=POINTS_MATCH2;
1243 }else{
1244 timeInMode=0;
1245 score+=POINTS_MATCH;
1246 }
1247 match[i]='m';
1248 mode=MODE_MS;
1249 }else if(c<0 || c=='N'){
1250 score+=POINTS_NOCALL;
1251 match[i]='N';
1252 }else if(r<0 || r=='N'){
1253 score+=POINTS_NOREF;
1254 // match[i]='m';
1255 match[i]='N';
1256 }else{
1257 match[i]='S';
1258 if(mode==MODE_SUB){timeInMode++;}
1259 else{timeInMode=0;}
1260
1261 if(timeInMode==0){score+=POINTS_SUB;}
1262 else if(timeInMode<LIMIT_FOR_COST_3){score+=POINTS_SUB2;}
1263 else{score+=POINTS_SUB3;}
1264 mode=MODE_SUB;
1265 }
1266 }
1267
1268 return score;
1269 }
1270
1271 public static final int maxQuality(int numBases){
1272 return POINTS_MATCH+(numBases-1)*(POINTS_MATCH2);
1273 }
1274
1275 public static final int maxQuality(byte[] baseScores){
1276 return POINTS_MATCH+(baseScores.length-1)*(POINTS_MATCH2)+Tools.sumInt(baseScores);
1277 }
1278
1279 public static final int maxImperfectScore(int numBases){
1280 // int maxQ=maxQuality(numBases);
1281 //// maxImperfectSwScore=maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB);
1282 // int maxI=maxQ+POINTS_DEL;
1283 // maxI=Tools.max(maxI, maxQ+POINTS_INS-POINTS_MATCH2);
1284 // maxI=Tools.min(maxI, maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB));
1285
1286 int maxQ=maxQuality(numBases);
1287 int maxI=maxQ+Tools.min(POINTS_DEL, POINTS_INS-POINTS_MATCH2);
1288 assert(maxI<(maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB)));
1289 return maxI;
1290 }
1291
1292 public static final int maxImperfectScore(byte[] baseScores){
1293 // int maxQ=maxQuality(numBases);
1294 //// maxImperfectSwScore=maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB);
1295 // int maxI=maxQ+POINTS_DEL;
1296 // maxI=Tools.max(maxI, maxQ+POINTS_INS-POINTS_MATCH2);
1297 // maxI=Tools.min(maxI, maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB));
1298
1299 int maxQ=maxQuality(baseScores);
1300 int maxI=maxQ+Tools.min(POINTS_DEL, POINTS_INS-POINTS_MATCH2);
1301 assert(maxI<(maxQ-(POINTS_MATCH2+POINTS_MATCH2)+(POINTS_MATCH+POINTS_SUB)));
1302 return maxI;
1303 }
1304
1305 public static final String toString(int[] a){
1306
1307 int width=7;
1308
1309 StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1310 for(int num : a){
1311 String s=" "+num;
1312 int spaces=width-s.length();
1313 assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
1314 for(int i=0; i<spaces; i++){sb.append(' ');}
1315 sb.append(s);
1316 }
1317
1318 return sb.toString();
1319 }
1320
1321 public static final String toTimePacked(int[] a){
1322 int width=7;
1323
1324 StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1325 for(int num_ : a){
1326 int num=num_&TIMEMASK;
1327 String s=" "+num;
1328 int spaces=width-s.length();
1329 assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
1330 for(int i=0; i<spaces; i++){sb.append(' ');}
1331 sb.append(s);
1332 }
1333
1334 return sb.toString();
1335 }
1336
1337 public static final String toScorePacked(int[] a){
1338 int width=7;
1339
1340 String minString=" -";
1341 String maxString=" ";
1342 while(minString.length()<width){minString+='9';}
1343 while(maxString.length()<width){maxString+='9';}
1344
1345 StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1346 for(int num_ : a){
1347 int num=num_>>SCOREOFFSET;
1348 String s=" "+num;
1349 if(s.length()>width){s=num>0 ? maxString : minString;}
1350 int spaces=width-s.length();
1351 assert(spaces>=0) : width+", "+s.length()+", "+s+", "+num+", "+spaces;
1352 for(int i=0; i<spaces; i++){sb.append(' ');}
1353 sb.append(s);
1354 }
1355
1356 return sb.toString();
1357 }
1358
1359 public static final String toString(byte[] a){
1360
1361 int width=6;
1362
1363 StringBuilder sb=new StringBuilder((a.length+1)*width+2);
1364 for(int num : a){
1365 String s=" "+num;
1366 int spaces=width-s.length();
1367 assert(spaces>=0);
1368 for(int i=0; i<spaces; i++){sb.append(' ');}
1369 sb.append(s);
1370 }
1371
1372 return sb.toString();
1373 }
1374
1375 public static final String toString(byte[] ref, int startLoc, int stopLoc){
1376 StringBuilder sb=new StringBuilder(stopLoc-startLoc+1);
1377 for(int i=startLoc; i<=stopLoc; i++){sb.append((char)ref[i]);}
1378 return sb.toString();
1379 }
1380
1381 public static int calcDelScore(int len){
1382 if(len<=0){return 0;}
1383 int score=POINTS_DEL;
1384
1385 if(len>LIMIT_FOR_COST_4){
1386 score+=(len-LIMIT_FOR_COST_4)*POINTS_DEL4;
1387 len=LIMIT_FOR_COST_4;
1388 }
1389 if(len>LIMIT_FOR_COST_3){
1390 score+=(len-LIMIT_FOR_COST_3)*POINTS_DEL3;
1391 len=LIMIT_FOR_COST_3;
1392 }
1393 if(len>1){
1394 score+=(len-1)*POINTS_DEL2;
1395 }
1396 return score;
1397 }
1398
1399 private static int calcDelScoreOffset(int len){
1400 if(len<=0){return 0;}
1401 int score=POINTSoff_DEL;
1402
1403 if(len>LIMIT_FOR_COST_4){
1404 score+=(len-LIMIT_FOR_COST_4)*POINTSoff_DEL4;
1405 len=LIMIT_FOR_COST_4;
1406 }
1407 if(len>LIMIT_FOR_COST_3){
1408 score+=(len-LIMIT_FOR_COST_3)*POINTSoff_DEL3;
1409 len=LIMIT_FOR_COST_3;
1410 }
1411 if(len>1){
1412 score+=(len-1)*POINTSoff_DEL2;
1413 }
1414 return score;
1415 }
1416
1417 private static int calcMatchScoreOffset(int len){
1418 if(len<=0){return 0;}
1419 int score=POINTSoff_MATCH;
1420
1421 if(len>1){
1422 score+=(len-1)*POINTSoff_MATCH2;
1423 }
1424 return score;
1425 }
1426
1427 private static int calcSubScoreOffset(int len){
1428 if(len<=0){return 0;}
1429 int score=POINTSoff_SUB;
1430
1431 if(len>LIMIT_FOR_COST_3){
1432 score+=(len-LIMIT_FOR_COST_3)*POINTSoff_SUB3;
1433 len=LIMIT_FOR_COST_3;
1434 }
1435 if(len>1){
1436 score+=(len-1)*POINTSoff_SUB2;
1437 }
1438 return score;
1439 }
1440
1441 public static int calcInsScore(int len){
1442 if(len<=0){return 0;}
1443 int score=POINTS_INS;
1444
1445 if(len>LIMIT_FOR_COST_4){
1446 score+=(len-LIMIT_FOR_COST_4)*POINTS_INS4;
1447 len=LIMIT_FOR_COST_4;
1448 }
1449 if(len>LIMIT_FOR_COST_3){
1450 score+=(len-LIMIT_FOR_COST_3)*POINTS_INS3;
1451 len=LIMIT_FOR_COST_3;
1452 }
1453 if(len>1){
1454 score+=(len-1)*POINTS_INS2;
1455 }
1456 return score;
1457 }
1458
1459 private static int calcInsScoreOffset(int len){
1460 if(len<=0){return 0;}
1461 int score=POINTSoff_INS;
1462 if(len>LIMIT_FOR_COST_4){
1463 score+=(len-LIMIT_FOR_COST_4)*POINTSoff_INS4;
1464 len=LIMIT_FOR_COST_4;
1465 }
1466 if(len>LIMIT_FOR_COST_3){
1467 score+=(len-LIMIT_FOR_COST_3)*POINTSoff_INS3;
1468 len=LIMIT_FOR_COST_3;
1469 }
1470 if(len>1){
1471 score+=(len-1)*POINTSoff_INS2;
1472 }
1473 return score;
1474 }
1475
1476
1477 private final int maxRows;
1478 private final int maxColumns;
1479
1480 private final int[][][] packed;
1481
1482 private final int[] vertLimit;
1483 private final int[] horizLimit;
1484
1485 private final int[] insScoreArray;
1486 private final int[] delScoreArray;
1487 private final int[] matchScoreArray;
1488 private final int[] subScoreArray;
1489
1490 CharSequence showVertLimit(){
1491 StringBuilder sb=new StringBuilder();
1492 for(int i=0; i<=rows; i++){sb.append(vertLimit[i]>>SCOREOFFSET).append(",");}
1493 return sb;
1494 }
1495 CharSequence showHorizLimit(){
1496 StringBuilder sb=new StringBuilder();
1497 for(int i=0; i<=columns; i++){sb.append(horizLimit[i]>>SCOREOFFSET).append(",");}
1498 return sb;
1499 }
1500
1501 // public static final int MODEBITS=2;
1502 public static final int TIMEBITS=12;
1503 public static final int SCOREBITS=32-TIMEBITS;
1504 public static final int MAX_TIME=((1<<TIMEBITS)-1);
1505 public static final int MAX_SCORE=((1<<(SCOREBITS-1))-1)-2000;
1506 public static final int MIN_SCORE=0-MAX_SCORE; //Keeps it 1 point above "BAD".
1507
1508 // public static final int MODEOFFSET=0; //Always zero.
1509 // public static final int TIMEOFFSET=0;
1510 public static final int SCOREOFFSET=TIMEBITS;
1511
1512 // public static final int MODEMASK=~((-1)<<MODEBITS);
1513 // public static final int TIMEMASK=(~((-1)<<TIMEBITS))<<TIMEOFFSET;
1514 public static final int TIMEMASK=~((-1)<<TIMEBITS);
1515 public static final int SCOREMASK=(~((-1)<<SCOREBITS))<<SCOREOFFSET;
1516
1517 private static final byte MODE_MS=0;
1518 private static final byte MODE_DEL=1;
1519 private static final byte MODE_INS=2;
1520 private static final byte MODE_SUB=3;
1521
1522 public static final int POINTS_NOREF=-10;
1523 public static final int POINTS_NOCALL=-10;
1524 public static final int POINTS_MATCH=90;
1525 public static final int POINTS_MATCH2=100; //Note: Changing to 90 substantially reduces false positives
1526 public static final int POINTS_COMPATIBLE=50;
1527 public static final int POINTS_SUB=-143;
1528 public static final int POINTS_SUBR=-161; //increased penalty if prior match streak was at most 1
1529 public static final int POINTS_SUB2=-54;
1530 public static final int POINTS_SUB3=-35;
1531 public static final int POINTS_MATCHSUB=-10;
1532 public static final int POINTS_INS=-207;
1533 public static final int POINTS_INS2=-51;
1534 public static final int POINTS_INS3=-37;
1535 public static final int POINTS_INS4=-15;
1536 public static final int POINTS_DEL=-273;
1537 public static final int POINTS_DEL2=-38;
1538 public static final int POINTS_DEL3=-27;
1539 public static final int POINTS_DEL4=-15;
1540 public static final int POINTS_DEL_REF_N=-10;
1541
1542
1543 public static final int LIMIT_FOR_COST_3=5;
1544 public static final int LIMIT_FOR_COST_4=30;
1545
1546 public static final int BAD=MIN_SCORE-1;
1547
1548
1549 public static final int POINTSoff_NOREF=(POINTS_NOREF<<SCOREOFFSET);
1550 public static final int POINTSoff_NOCALL=(POINTS_NOCALL<<SCOREOFFSET);
1551 public static final int POINTSoff_MATCH=(POINTS_MATCH<<SCOREOFFSET);
1552 public static final int POINTSoff_MATCH2=(POINTS_MATCH2<<SCOREOFFSET);
1553 public static final int POINTSoff_COMPATIBLE=(POINTS_COMPATIBLE<<SCOREOFFSET);
1554 public static final int POINTSoff_SUB=(POINTS_SUB<<SCOREOFFSET);
1555 public static final int POINTSoff_SUBR=(POINTS_SUBR<<SCOREOFFSET);
1556 public static final int POINTSoff_SUB2=(POINTS_SUB2<<SCOREOFFSET);
1557 public static final int POINTSoff_SUB3=(POINTS_SUB3<<SCOREOFFSET);
1558 public static final int POINTSoff_MATCHSUB=(POINTS_MATCHSUB<<SCOREOFFSET);
1559 public static final int POINTSoff_INS=(POINTS_INS<<SCOREOFFSET);
1560 public static final int POINTSoff_INS2=(POINTS_INS2<<SCOREOFFSET);
1561 public static final int POINTSoff_INS3=(POINTS_INS3<<SCOREOFFSET);
1562 public static final int POINTSoff_INS4=(POINTS_INS4<<SCOREOFFSET);
1563 public static final int POINTSoff_DEL=(POINTS_DEL<<SCOREOFFSET);
1564 public static final int POINTSoff_DEL2=(POINTS_DEL2<<SCOREOFFSET);
1565 public static final int POINTSoff_DEL3=(POINTS_DEL3<<SCOREOFFSET);
1566 public static final int POINTSoff_DEL4=(POINTS_DEL4<<SCOREOFFSET);
1567 public static final int POINTSoff_DEL_REF_N=(POINTS_DEL_REF_N<<SCOREOFFSET);
1568 public static final int BADoff=(BAD<<SCOREOFFSET);
1569 public static final int MAXoff_SCORE=MAX_SCORE<<SCOREOFFSET;
1570 public static final int MINoff_SCORE=MIN_SCORE<<SCOREOFFSET;
1571
1572 private int rows;
1573 private int columns;
1574
1575 public long iterationsLimited=0;
1576 public long iterationsUnlimited=0;
1577
1578 public boolean verbose=false;
1579 public boolean verbose2=false;
1580
1581 }