jpayne@68
|
1 package shared;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.Serializable;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Arrays;
|
jpayne@68
|
6
|
jpayne@68
|
7 import align2.QualityTools;
|
jpayne@68
|
8 import dna.AminoAcid;
|
jpayne@68
|
9 import stream.Read;
|
jpayne@68
|
10 import stream.SamLine;
|
jpayne@68
|
11 import stream.SiteScore;
|
jpayne@68
|
12 import structures.ByteBuilder;
|
jpayne@68
|
13
|
jpayne@68
|
14 /**
|
jpayne@68
|
15 * Helper class for processes that do inline quality trimming.
|
jpayne@68
|
16 * @author Brian Bushnell
|
jpayne@68
|
17 * @date Mar 15, 2013
|
jpayne@68
|
18 *
|
jpayne@68
|
19 */
|
jpayne@68
|
20 public final class TrimRead implements Serializable {
|
jpayne@68
|
21
|
jpayne@68
|
22 /**
|
jpayne@68
|
23 *
|
jpayne@68
|
24 */
|
jpayne@68
|
25 private static final long serialVersionUID = 8791743639124592480L;
|
jpayne@68
|
26
|
jpayne@68
|
27 public static void main(String[] args){
|
jpayne@68
|
28 byte[] bases=args[0].getBytes();
|
jpayne@68
|
29 byte[] quals=(args.length<2 ? null : args[1].getBytes());
|
jpayne@68
|
30 if(quals!=null){
|
jpayne@68
|
31 for(int i=0; i<quals.length; i++){quals[i]-=32;}
|
jpayne@68
|
32 }
|
jpayne@68
|
33 byte[] match=(args.length<3 ? null : args[2].getBytes());
|
jpayne@68
|
34 float minq=(args.length<4 ? 5 : Float.parseFloat(args[3]));
|
jpayne@68
|
35 float minE=(float)QualityTools.phredToProbError(minq);
|
jpayne@68
|
36 Read r=new Read(bases, quals, 1);
|
jpayne@68
|
37 r.match=match;
|
jpayne@68
|
38 System.out.println("Before trim:\n"+r.toFastq()+(r.match==null ? "" : "\n"+new String(r.match)));
|
jpayne@68
|
39 System.out.println(Arrays.toString(r.quality));
|
jpayne@68
|
40 TrimRead tr=trim(r, true, true, minq, minE, 1);
|
jpayne@68
|
41 System.out.println("\nAfter trim:\n"+r.toFastq()+(r.match==null ? "" : "\n"+new String(r.match)));
|
jpayne@68
|
42 if(r.match==null){
|
jpayne@68
|
43 r.match=new byte[r.length()];
|
jpayne@68
|
44 for(int i=0; i<r.length(); i++){r.match[i]='m';}
|
jpayne@68
|
45 }
|
jpayne@68
|
46 tr.untrim();
|
jpayne@68
|
47 System.out.println("\nAfter untrim:\n"+r.toFastq()+(r.match==null ? "" : "\n"+new String(r.match)));
|
jpayne@68
|
48 }
|
jpayne@68
|
49
|
jpayne@68
|
50 // public static TrimRead trim(Read r, boolean trimLeft, boolean trimRight, float trimq, int minlen){
|
jpayne@68
|
51 // if(r==null || r.bases==null){return null;}
|
jpayne@68
|
52 //
|
jpayne@68
|
53 // final int a, b;
|
jpayne@68
|
54 // if(optimalMode){
|
jpayne@68
|
55 // long packed=testOptimal(r.bases, r.quality, QualityTools.PROB_ERROR[trimq]);
|
jpayne@68
|
56 // a=trimLeft ? (int)((packed>>32)&0xFFFFFFFFL) : 0;
|
jpayne@68
|
57 // b=trimRight ? (int)((packed)&0xFFFFFFFFL) : 0;
|
jpayne@68
|
58 // }else if(windowMode){
|
jpayne@68
|
59 // a=0;
|
jpayne@68
|
60 // b=(trimRight ? testRightWindow(r.bases, r.quality, (byte)trimq, windowLength) : 0);
|
jpayne@68
|
61 // }else{
|
jpayne@68
|
62 // a=(trimLeft ? testLeft(r.bases, r.quality, (byte)trimq) : 0);
|
jpayne@68
|
63 // b=(trimRight ? testRight(r.bases, r.quality, (byte)trimq) : 0);
|
jpayne@68
|
64 // }
|
jpayne@68
|
65 // return (a+b==0 ? null : new TrimRead(r, a, b, trimq, minlen));
|
jpayne@68
|
66 // }
|
jpayne@68
|
67
|
jpayne@68
|
68 public static TrimRead trim(Read r, boolean trimLeft, boolean trimRight,
|
jpayne@68
|
69 float trimq, float avgErrorRate, int minlen){
|
jpayne@68
|
70 if(r==null || r.bases==null){return null;}
|
jpayne@68
|
71
|
jpayne@68
|
72 final int a, b;
|
jpayne@68
|
73 if(optimalMode){
|
jpayne@68
|
74 long packed=testOptimal(r.bases, r.quality, avgErrorRate);
|
jpayne@68
|
75 a=trimLeft ? (int)((packed>>32)&0xFFFFFFFFL) : 0;
|
jpayne@68
|
76 b=trimRight ? (int)((packed)&0xFFFFFFFFL) : 0;
|
jpayne@68
|
77 }else if(windowMode){
|
jpayne@68
|
78 a=0;
|
jpayne@68
|
79 b=(trimRight ? testRightWindow(r.bases, r.quality, (byte)trimq, windowLength) : 0);
|
jpayne@68
|
80 }else{
|
jpayne@68
|
81 a=(trimLeft ? testLeft(r.bases, r.quality, (byte)trimq) : 0);
|
jpayne@68
|
82 b=(trimRight ? testRight(r.bases, r.quality, (byte)trimq) : 0);
|
jpayne@68
|
83 }
|
jpayne@68
|
84 return (a+b==0 ? null : new TrimRead(r, a, b, trimq, minlen));
|
jpayne@68
|
85 }
|
jpayne@68
|
86
|
jpayne@68
|
87 /**
|
jpayne@68
|
88 * @param r Read to trim
|
jpayne@68
|
89 * @param trimLeft Trim left side
|
jpayne@68
|
90 * @param trimRight Trim right side
|
jpayne@68
|
91 * @param trimq Maximum quality to trim
|
jpayne@68
|
92 * @param minResult Ensure trimmed read is at least this long
|
jpayne@68
|
93 * @return Number of bases trimmed
|
jpayne@68
|
94 */
|
jpayne@68
|
95 public static int trimFast(Read r, boolean trimLeft, boolean trimRight,
|
jpayne@68
|
96 float trimq, float avgErrorRate, int minResult){
|
jpayne@68
|
97 return trimFast(r, trimLeft, trimRight, trimq, avgErrorRate, minResult, false);
|
jpayne@68
|
98 }
|
jpayne@68
|
99
|
jpayne@68
|
100 /**
|
jpayne@68
|
101 * @param r Read to trim
|
jpayne@68
|
102 * @param trimLeft Trim left side
|
jpayne@68
|
103 * @param trimRight Trim right side
|
jpayne@68
|
104 * @param trimq Maximum quality to trim
|
jpayne@68
|
105 * @param minResult Ensure trimmed read is at least this long
|
jpayne@68
|
106 * @return Number of bases trimmed
|
jpayne@68
|
107 */
|
jpayne@68
|
108 public static int trimFast(Read r, boolean trimLeft, boolean trimRight,
|
jpayne@68
|
109 float trimq, float avgErrorRate, int minResult, boolean trimClip){
|
jpayne@68
|
110 return trimFast(r, trimLeft, trimRight, trimq, avgErrorRate, minResult, 0, trimClip);
|
jpayne@68
|
111 }
|
jpayne@68
|
112
|
jpayne@68
|
113 /**
|
jpayne@68
|
114 * This method allows a "discardUnder" parameter, so that qtrim=r will still discard
|
jpayne@68
|
115 * reads that if left-trimmed, would be shorter than the desired minimum length.
|
jpayne@68
|
116 * It is not presently used.
|
jpayne@68
|
117 * @param r Read to trim
|
jpayne@68
|
118 * @param trimLeft Trim left side
|
jpayne@68
|
119 * @param trimRight Trim right side
|
jpayne@68
|
120 * @param trimq Maximum quality to trim
|
jpayne@68
|
121 * @param minResult Ensure trimmed read is at least this long
|
jpayne@68
|
122 * @param discardUnder Resulting reads shorter than this are not wanted
|
jpayne@68
|
123 * @return Number of bases trimmed
|
jpayne@68
|
124 */
|
jpayne@68
|
125 public static int trimFast(Read r, boolean trimLeft, boolean trimRight,
|
jpayne@68
|
126 float trimq, float avgErrorRate, int minResult, int discardUnder, boolean trimClip){
|
jpayne@68
|
127 // assert(avgErrorRate==(float)QualityTools.phredToProbError(trimq)) : trimq+", "+avgErrorRate+", "+(float)QualityTools.phredToProbError(trimq);
|
jpayne@68
|
128 final byte[] bases=r.bases, qual=r.quality;
|
jpayne@68
|
129 if(bases==null || bases.length<1){return 0;}
|
jpayne@68
|
130
|
jpayne@68
|
131 // assert(false) : trimLeft+", "+trimRight+", "+trimq+", "+minResult+", "+discardUnder;
|
jpayne@68
|
132
|
jpayne@68
|
133 final int len=r.length();
|
jpayne@68
|
134 final int a0, b0;
|
jpayne@68
|
135 final int a, b;
|
jpayne@68
|
136 if(optimalMode){
|
jpayne@68
|
137 long packed=testOptimal(bases, qual, avgErrorRate);
|
jpayne@68
|
138 a0=(int)((packed>>32)&0xFFFFFFFFL);
|
jpayne@68
|
139 b0=(int)((packed)&0xFFFFFFFFL);
|
jpayne@68
|
140 if(trimLeft!=trimRight && discardUnder>0 && len-a0-b0<discardUnder){
|
jpayne@68
|
141 a=trimLeft ? len : 0;
|
jpayne@68
|
142 b=trimRight ? len : 0;
|
jpayne@68
|
143 }else{
|
jpayne@68
|
144 a=trimLeft ? a0 : 0;
|
jpayne@68
|
145 b=trimRight ? b0 : 0;
|
jpayne@68
|
146 }
|
jpayne@68
|
147 // assert(false) : a0+", "+b0+" -> "+a+", "+b;
|
jpayne@68
|
148 }else if(windowMode){
|
jpayne@68
|
149 a=0;
|
jpayne@68
|
150 b=(trimRight ? testRightWindow(bases, qual, (byte)trimq, windowLength) : 0);
|
jpayne@68
|
151 }else{
|
jpayne@68
|
152 a=(trimLeft ? testLeft(bases, qual, (byte)trimq) : 0);
|
jpayne@68
|
153 b=(trimRight ? testRight(bases, qual, (byte)trimq) : 0);
|
jpayne@68
|
154 }
|
jpayne@68
|
155 return trimByAmount(r, a, b, minResult, trimClip);
|
jpayne@68
|
156 }
|
jpayne@68
|
157
|
jpayne@68
|
158 public static boolean untrim(Read r){
|
jpayne@68
|
159 if(r==null || r.obj==null){return false;}
|
jpayne@68
|
160 if(r.obj.getClass()!=TrimRead.class){return false;}
|
jpayne@68
|
161 TrimRead tr=(TrimRead)r.obj;
|
jpayne@68
|
162 return tr.untrim();
|
jpayne@68
|
163 }
|
jpayne@68
|
164
|
jpayne@68
|
165 // public TrimRead(Read r_, boolean trimLeft, boolean trimRight, float trimq_, int minlen_){
|
jpayne@68
|
166 // this(r_, (trimLeft ? testLeft(r_.bases, r_.quality, (byte)trimq_) : 0), (trimRight ? testRight(r_.bases, r_.quality, (byte)trimq_) : 0), trimq_, minlen_);
|
jpayne@68
|
167 // }
|
jpayne@68
|
168
|
jpayne@68
|
169 public TrimRead(Read r_, int trimLeft, int trimRight, float trimq_, int minlen_){
|
jpayne@68
|
170 minlen_=Tools.max(minlen_, 0);
|
jpayne@68
|
171 r=r_;
|
jpayne@68
|
172 bases1=r.bases;
|
jpayne@68
|
173 qual1=r.quality;
|
jpayne@68
|
174 trimq=(byte)trimq_;
|
jpayne@68
|
175 assert(bases1!=null || qual1==null) : "\n"+new String(bases1)+"\n"+new String(qual1)+"\n";
|
jpayne@68
|
176 assert(bases1==null || qual1==null || bases1.length==qual1.length) : "\n"+new String(bases1)+"\n"+new String(qual1)+"\n";
|
jpayne@68
|
177 int trimmed=trim(trimLeft, trimRight, minlen_);
|
jpayne@68
|
178 if(trimmed>0){
|
jpayne@68
|
179 assert(bases2==null || bases2.length>=minlen_ || bases1.length<minlen_) : bases1.length+", "+bases2.length+", "+minlen_+", "+trimLeft+", "+trimRight;
|
jpayne@68
|
180 r.bases=bases2;
|
jpayne@68
|
181 r.quality=qual2;
|
jpayne@68
|
182 r.obj=this;
|
jpayne@68
|
183 trimMatch(r);
|
jpayne@68
|
184 }
|
jpayne@68
|
185 }
|
jpayne@68
|
186
|
jpayne@68
|
187 // /** Trim the left end of the read, from left to right */
|
jpayne@68
|
188 // private int trim(final boolean trimLeft, final boolean trimRight, final int minlen){
|
jpayne@68
|
189 // final int a, b;
|
jpayne@68
|
190 // if(optimalMode){
|
jpayne@68
|
191 // long packed=testOptimal(bases1, qual1, QualityTools.PROB_ERROR[trimq]);
|
jpayne@68
|
192 // a=trimLeft ? (int)((packed>>32)&0xFFFFFFFFL) : 0;
|
jpayne@68
|
193 // b=trimRight ? (int)((packed)&0xFFFFFFFFL) : 0;
|
jpayne@68
|
194 // }else{
|
jpayne@68
|
195 // a=(trimLeft ? testLeft(bases1, qual1, (byte)trimq) : 0);
|
jpayne@68
|
196 // b=(trimRight ? testRight(bases1, qual1, (byte)trimq) : 0);
|
jpayne@68
|
197 // }
|
jpayne@68
|
198 // return trim(a, b, minlen);
|
jpayne@68
|
199 // }
|
jpayne@68
|
200
|
jpayne@68
|
201 /** Trim the left end of the read, from left to right */
|
jpayne@68
|
202 private int trim(int trimLeft, int trimRight, final int minlen){
|
jpayne@68
|
203 assert(trimLeft>=0 && trimRight>=0) : "trimLeft="+trimLeft+", trimRight="+trimRight+", minlen="+minlen+", len="+bases1.length;
|
jpayne@68
|
204 assert(trimLeft>0 || trimRight>0) : "trimLeft="+trimLeft+", trimRight="+trimRight+", minlen="+minlen+", len="+bases1.length;
|
jpayne@68
|
205 final int maxTrim=Tools.min(bases1.length, bases1.length-minlen);
|
jpayne@68
|
206 if(trimLeft+trimRight>maxTrim){
|
jpayne@68
|
207 int excess=trimLeft+trimRight-maxTrim;
|
jpayne@68
|
208 if(trimLeft>0 && excess>0){
|
jpayne@68
|
209 trimLeft=Tools.max(0, trimLeft-excess);
|
jpayne@68
|
210 excess=trimLeft+trimRight-maxTrim;
|
jpayne@68
|
211 }
|
jpayne@68
|
212 if(trimRight>0 && excess>0){
|
jpayne@68
|
213 trimRight=Tools.max(0, trimRight-excess);
|
jpayne@68
|
214 excess=trimLeft+trimRight-maxTrim;
|
jpayne@68
|
215 }
|
jpayne@68
|
216
|
jpayne@68
|
217 }
|
jpayne@68
|
218
|
jpayne@68
|
219 leftTrimmed=trimLeft;
|
jpayne@68
|
220 rightTrimmed=trimRight;
|
jpayne@68
|
221 final int sum=leftTrimmed+rightTrimmed;
|
jpayne@68
|
222
|
jpayne@68
|
223 if(verbose){
|
jpayne@68
|
224 System.err.println("leftTrimmed="+leftTrimmed+", rightTrimmed="+rightTrimmed+", sum="+sum);
|
jpayne@68
|
225 }
|
jpayne@68
|
226
|
jpayne@68
|
227 if(sum==0){
|
jpayne@68
|
228 bases2=bases1;
|
jpayne@68
|
229 qual2=qual1;
|
jpayne@68
|
230 }else{
|
jpayne@68
|
231 bases2=KillSwitch.copyOfRange(bases1, trimLeft, bases1.length-trimRight);
|
jpayne@68
|
232 qual2=((qual1==null || (trimLeft+trimRight>=qual1.length)) ? null : KillSwitch.copyOfRange(qual1, trimLeft, qual1.length-trimRight));
|
jpayne@68
|
233 }
|
jpayne@68
|
234 return sum;
|
jpayne@68
|
235 }
|
jpayne@68
|
236
|
jpayne@68
|
237 /** Trim bases outside of leftLoc and rightLoc, excluding leftLoc and rightLoc */
|
jpayne@68
|
238 public static int trimToPosition(Read r, int leftLoc, int rightLoc, int minResultingLength){
|
jpayne@68
|
239 final int len=r.length();
|
jpayne@68
|
240 return trimByAmount(r, leftLoc, len-rightLoc-1, minResultingLength, false);
|
jpayne@68
|
241 }
|
jpayne@68
|
242
|
jpayne@68
|
243 /** Remove non-genetic-code from reads */
|
jpayne@68
|
244 public static int trimBadSequence(Read r){
|
jpayne@68
|
245 final byte[] bases=r.bases, quals=r.quality;
|
jpayne@68
|
246 if(bases==null){return 0;}
|
jpayne@68
|
247 final int minGenetic=20;
|
jpayne@68
|
248 int lastNon=-1;
|
jpayne@68
|
249 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
250 byte b=bases[i];
|
jpayne@68
|
251 if(!AminoAcid.isACGTN(b)){lastNon=i;}
|
jpayne@68
|
252 if(i-lastNon>minGenetic){break;}
|
jpayne@68
|
253 }
|
jpayne@68
|
254 if(lastNon>=0){
|
jpayne@68
|
255 r.bases=KillSwitch.copyOfRange(bases, lastNon+1, bases.length);
|
jpayne@68
|
256 if(quals!=null){
|
jpayne@68
|
257 r.quality=KillSwitch.copyOfRange(quals, lastNon+1, quals.length);
|
jpayne@68
|
258 }
|
jpayne@68
|
259 }
|
jpayne@68
|
260 return lastNon+1;
|
jpayne@68
|
261 }
|
jpayne@68
|
262
|
jpayne@68
|
263 /** Trim this many bases from each end */
|
jpayne@68
|
264 public static int trimByAmount(Read r, int leftTrimAmount, int rightTrimAmount, int minResultingLength){
|
jpayne@68
|
265 return trimByAmount(r, leftTrimAmount, rightTrimAmount, minResultingLength, false);
|
jpayne@68
|
266 }
|
jpayne@68
|
267
|
jpayne@68
|
268 /** Trim this many bases from each end */
|
jpayne@68
|
269 public static int trimByAmount(Read r, int leftTrimAmount, int rightTrimAmount, int minResultingLength, boolean trimClip){
|
jpayne@68
|
270
|
jpayne@68
|
271 leftTrimAmount=Tools.max(leftTrimAmount, 0);
|
jpayne@68
|
272 rightTrimAmount=Tools.max(rightTrimAmount, 0);
|
jpayne@68
|
273
|
jpayne@68
|
274 //These assertions are unnecessary if the mapping information will never be used or output.
|
jpayne@68
|
275 // assert(r.match==null) : "TODO: Handle trimming of reads with match strings.";
|
jpayne@68
|
276 assert(r.sites==null) : "TODO: Handle trimming of reads with SiteScores.";
|
jpayne@68
|
277
|
jpayne@68
|
278 if(r.match!=null){
|
jpayne@68
|
279 return trimReadWithMatch(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength, Integer.MAX_VALUE, trimClip);
|
jpayne@68
|
280 }else if(r.samline!=null && r.samline.cigar!=null && r.samline.cigarContainsOnlyME()){
|
jpayne@68
|
281 return trimReadWithMatchFast(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength);
|
jpayne@68
|
282 }
|
jpayne@68
|
283
|
jpayne@68
|
284 final byte[] bases=r.bases, qual=r.quality;
|
jpayne@68
|
285 final int len=(bases==null ? 0 : bases.length), qlen=(qual==null ? 0 : qual.length);
|
jpayne@68
|
286 if(len<1){return 0;}
|
jpayne@68
|
287 minResultingLength=Tools.min(len, Tools.max(minResultingLength, 0));
|
jpayne@68
|
288 if(leftTrimAmount+rightTrimAmount+minResultingLength>len){
|
jpayne@68
|
289 rightTrimAmount=Tools.max(1, len-minResultingLength);
|
jpayne@68
|
290 leftTrimAmount=0;
|
jpayne@68
|
291 }
|
jpayne@68
|
292
|
jpayne@68
|
293 final int total=leftTrimAmount+rightTrimAmount;
|
jpayne@68
|
294 // System.err.println("D: L="+leftTrimAmount+", R="+rightTrimAmount+", len="+r.length()+", tot="+total);
|
jpayne@68
|
295 if(total>0){
|
jpayne@68
|
296 r.bases=KillSwitch.copyOfRange(bases, leftTrimAmount, len-rightTrimAmount);
|
jpayne@68
|
297 r.quality=(leftTrimAmount+rightTrimAmount>=qlen ? null : KillSwitch.copyOfRange(qual, leftTrimAmount, qlen-rightTrimAmount));
|
jpayne@68
|
298 trimMatch(r);
|
jpayne@68
|
299 if(r.stop>r.start){ //TODO: Fixing mapped coordinates needs more work.
|
jpayne@68
|
300 r.start+=leftTrimAmount;
|
jpayne@68
|
301 r.stop-=rightTrimAmount;
|
jpayne@68
|
302 }
|
jpayne@68
|
303 }
|
jpayne@68
|
304
|
jpayne@68
|
305 if(verbose){
|
jpayne@68
|
306 System.err.println("leftTrimmed="+leftTrimAmount+", rightTrimmed="+rightTrimAmount+
|
jpayne@68
|
307 ", sum="+total+", final length="+r.length());
|
jpayne@68
|
308 }
|
jpayne@68
|
309 return total;
|
jpayne@68
|
310 }
|
jpayne@68
|
311
|
jpayne@68
|
312 /** Count number of bases that need trimming on each side, and pack into a long */
|
jpayne@68
|
313 public static long testOptimal(byte[] bases, byte[] qual, float avgErrorRate){
|
jpayne@68
|
314 if(optimalBias>=0){avgErrorRate=optimalBias;}//Override
|
jpayne@68
|
315 assert(avgErrorRate>0 && avgErrorRate<=1) : "Average error rate ("+avgErrorRate+") must be between 0 (exclusive) and 1 (inclusive)";
|
jpayne@68
|
316 if(bases==null || bases.length==0){return 0;}
|
jpayne@68
|
317 if(qual==null){return avgErrorRate>=1 ? 0 : ((((long)testLeftN(bases))<<32) | (((long)testRightN(bases))&0xFFFFFFFFL));}
|
jpayne@68
|
318
|
jpayne@68
|
319 float maxScore=0;
|
jpayne@68
|
320 float score=0;
|
jpayne@68
|
321 int maxLoc=-1;
|
jpayne@68
|
322 int maxCount=-1;
|
jpayne@68
|
323 int count=0;
|
jpayne@68
|
324
|
jpayne@68
|
325 final float nprob=Tools.max(Tools.min(avgErrorRate*1.1f, 1), NPROB);
|
jpayne@68
|
326
|
jpayne@68
|
327 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
328 final byte b=bases[i];
|
jpayne@68
|
329 byte q=qual[i];
|
jpayne@68
|
330 // float probError=(b=='N' ? nprob : ADJUST_QUALITY ? CalcTrueQuality.estimateErrorProbAvg(qual, bases, i) : QualityTools.PROB_ERROR[q]);
|
jpayne@68
|
331 // float probError=(b=='N' ? nprob : ADJUST_QUALITY ? CalcTrueQuality.estimateErrorProbGeoAvg(qual, bases, i) : QualityTools.PROB_ERROR[q]);
|
jpayne@68
|
332 // float probError=(b=='N' ? nprob : ADJUST_QUALITY ? CalcTrueQuality.estimateErrorProb2(qual, bases, i) : QualityTools.PROB_ERROR[q]);
|
jpayne@68
|
333
|
jpayne@68
|
334 // float probError=(b=='N' ? nprob : q==1 ? PROB1 : QualityTools.PROB_ERROR[q]);
|
jpayne@68
|
335 // float probError=(b=='N' ? nprob : QualityTools.PROB_ERROR[q]);
|
jpayne@68
|
336 float probError=((b=='N' || q<1) ? nprob : QualityTools.PROB_ERROR[q]);
|
jpayne@68
|
337
|
jpayne@68
|
338 // assert(q>0 || b=='N') : "index "+i+": q="+q+", b="+(char)b+"\n"+new String(bases)+"\n"+Arrays.toString(qual)+"\n";
|
jpayne@68
|
339
|
jpayne@68
|
340 float delta=avgErrorRate-probError;
|
jpayne@68
|
341 score=score+delta;
|
jpayne@68
|
342 if(score>0){
|
jpayne@68
|
343 count++;
|
jpayne@68
|
344 if(score>maxScore || (score==maxScore && count>maxCount)){
|
jpayne@68
|
345 maxScore=score;
|
jpayne@68
|
346 maxCount=count;
|
jpayne@68
|
347 maxLoc=i;
|
jpayne@68
|
348 }
|
jpayne@68
|
349 }else{
|
jpayne@68
|
350 score=0;
|
jpayne@68
|
351 count=0;
|
jpayne@68
|
352 }
|
jpayne@68
|
353 }
|
jpayne@68
|
354
|
jpayne@68
|
355 final int left, right;
|
jpayne@68
|
356 if(maxScore>0){
|
jpayne@68
|
357 assert(maxLoc>=0);
|
jpayne@68
|
358 assert(maxCount>0);
|
jpayne@68
|
359 left=maxLoc-maxCount+1;
|
jpayne@68
|
360 assert(left>=0 && left<=bases.length);
|
jpayne@68
|
361 right=bases.length-maxLoc-1;
|
jpayne@68
|
362 }else{
|
jpayne@68
|
363 left=0;
|
jpayne@68
|
364 right=bases.length;
|
jpayne@68
|
365 }
|
jpayne@68
|
366 final long packed=((((long)left)<<32) | (((long)right)&0xFFFFFFFFL));
|
jpayne@68
|
367
|
jpayne@68
|
368 if(verbose){
|
jpayne@68
|
369 System.err.println(Arrays.toString(qual));
|
jpayne@68
|
370 System.err.println("After testLocal: maxScore="+maxScore+", maxLoc="+maxLoc+", maxCount="+maxCount+
|
jpayne@68
|
371 ", left="+left+", right="+right+", returning "+Long.toHexString(packed));
|
jpayne@68
|
372 }
|
jpayne@68
|
373 return packed;
|
jpayne@68
|
374 }
|
jpayne@68
|
375
|
jpayne@68
|
376 /** Count number of bases that need trimming on left side */
|
jpayne@68
|
377 private static int testLeft(byte[] bases, byte[] qual, final byte trimq){
|
jpayne@68
|
378 if(bases==null || bases.length==0){return 0;}
|
jpayne@68
|
379 if(qual==null){return trimq<0 ? 0 : testLeftN(bases);}
|
jpayne@68
|
380 int good=0;
|
jpayne@68
|
381 int lastBad=-1;
|
jpayne@68
|
382 int i=0;
|
jpayne@68
|
383 for(; i<bases.length && good<minGoodInterval; i++){
|
jpayne@68
|
384 final byte q=qual[i];
|
jpayne@68
|
385 final byte b=bases[i];
|
jpayne@68
|
386 assert(q>0 || b=='N') : "index "+i+": q="+q+", b="+(char)b+"\n"+new String(bases)+"\n"+Arrays.toString(qual)+"\n";
|
jpayne@68
|
387 if(q>trimq){good++;}
|
jpayne@68
|
388 else{good=0; lastBad=i;}
|
jpayne@68
|
389 }
|
jpayne@68
|
390 if(verbose){
|
jpayne@68
|
391 // System.err.println(Arrays.toString(qual));
|
jpayne@68
|
392 System.err.println("After testLeft: good="+good+", lastBad="+lastBad+", i="+i+", returning "+(lastBad+1));
|
jpayne@68
|
393 // assert(false);
|
jpayne@68
|
394 }
|
jpayne@68
|
395 return lastBad+1;
|
jpayne@68
|
396 }
|
jpayne@68
|
397
|
jpayne@68
|
398 /** Count number of bases that need trimming on right side using a sliding window */
|
jpayne@68
|
399 private static int testRightWindow(byte[] bases, byte[] qual, final byte trimq, final int window){
|
jpayne@68
|
400 if(bases==null || bases.length==0){return 0;}
|
jpayne@68
|
401 if(qual==null || qual.length<window){return trimq>0 ? 0 : testRightN(bases);}
|
jpayne@68
|
402 final int thresh=Tools.max(window*trimq, 1);
|
jpayne@68
|
403 int sum=0;
|
jpayne@68
|
404 for(int i=0, j=-window; i<qual.length; i++, j++){
|
jpayne@68
|
405 final byte q=qual[i];
|
jpayne@68
|
406 sum+=q;
|
jpayne@68
|
407 if(j>=-1){
|
jpayne@68
|
408 if(j>=0){sum-=qual[j];}
|
jpayne@68
|
409 if(sum<thresh){
|
jpayne@68
|
410 return qual.length-j-1;
|
jpayne@68
|
411 }
|
jpayne@68
|
412 }
|
jpayne@68
|
413 }
|
jpayne@68
|
414 return 0;
|
jpayne@68
|
415 }
|
jpayne@68
|
416
|
jpayne@68
|
417 /** Count number of bases that need trimming on right side */
|
jpayne@68
|
418 private static int testRight(byte[] bases, byte[] qual, final byte trimq){
|
jpayne@68
|
419 if(bases==null || bases.length==0){return 0;}
|
jpayne@68
|
420 if(qual==null){return trimq<0 ? 0 : testRightN(bases);}
|
jpayne@68
|
421 int good=0;
|
jpayne@68
|
422 int lastBad=bases.length;
|
jpayne@68
|
423 int i=bases.length-1;
|
jpayne@68
|
424 for(; i>=0 && good<minGoodInterval; i--){
|
jpayne@68
|
425 final byte q=qual[i];
|
jpayne@68
|
426 final byte b=bases[i];
|
jpayne@68
|
427 assert(q>0 || b=='N') : "index "+i+": q="+q+", b="+(char)b+"\n"+new String(bases)+"\n"+Arrays.toString(qual)+"\n";
|
jpayne@68
|
428 if(q>trimq){good++;}
|
jpayne@68
|
429 else{good=0; lastBad=i;}
|
jpayne@68
|
430 }
|
jpayne@68
|
431 if(verbose){
|
jpayne@68
|
432 System.err.println("After trimLeft: good="+good+", lastBad="+lastBad+", i="+i+", returning "+(bases.length-lastBad));
|
jpayne@68
|
433 }
|
jpayne@68
|
434 return bases.length-lastBad;
|
jpayne@68
|
435 }
|
jpayne@68
|
436
|
jpayne@68
|
437 /** Count number of bases that need trimming on left side, considering only N as bad */
|
jpayne@68
|
438 public static int testLeftN(byte[] bases){
|
jpayne@68
|
439 if(bases==null || bases.length==0){return 0;}
|
jpayne@68
|
440 int good=0;
|
jpayne@68
|
441 int lastBad=-1;
|
jpayne@68
|
442 for(int i=0; i<bases.length && good<minGoodInterval; i++){
|
jpayne@68
|
443 final byte b=bases[i];
|
jpayne@68
|
444 //if(dna.AminoAcid.isFullyDefined(b)){good++;}
|
jpayne@68
|
445 if(b!=((byte)'N')){good++;}
|
jpayne@68
|
446 else{good=0; lastBad=i;}
|
jpayne@68
|
447 }
|
jpayne@68
|
448 return lastBad+1;
|
jpayne@68
|
449 }
|
jpayne@68
|
450
|
jpayne@68
|
451 /** Count number of bases that need trimming on right side, considering only N as bad */
|
jpayne@68
|
452 public static int testRightN(byte[] bases){
|
jpayne@68
|
453 if(bases==null || bases.length==0){return 0;}
|
jpayne@68
|
454 int good=0;
|
jpayne@68
|
455 int lastBad=bases.length;
|
jpayne@68
|
456 for(int i=bases.length-1; i>=0 && good<minGoodInterval; i--){
|
jpayne@68
|
457 final byte b=bases[i];
|
jpayne@68
|
458 //if(dna.AminoAcid.isFullyDefined(b)){good++;}
|
jpayne@68
|
459 if(b!=((byte)'N')){good++;}
|
jpayne@68
|
460 else{good=0; lastBad=i;}
|
jpayne@68
|
461 }
|
jpayne@68
|
462 return bases.length-lastBad;
|
jpayne@68
|
463 }
|
jpayne@68
|
464
|
jpayne@68
|
465 public boolean untrim(){
|
jpayne@68
|
466 if(leftTrimmed==0 && rightTrimmed==0){return false;}
|
jpayne@68
|
467 r.setPerfect(false);
|
jpayne@68
|
468
|
jpayne@68
|
469 final int lt, rt;
|
jpayne@68
|
470 if(r.strand()==Shared.PLUS){
|
jpayne@68
|
471 lt=leftTrimmed;
|
jpayne@68
|
472 rt=rightTrimmed;
|
jpayne@68
|
473 }else{
|
jpayne@68
|
474 lt=rightTrimmed;
|
jpayne@68
|
475 rt=leftTrimmed;
|
jpayne@68
|
476 }
|
jpayne@68
|
477
|
jpayne@68
|
478 boolean returnToShort=false;
|
jpayne@68
|
479 if(verbose){System.err.println("Untrimming");}
|
jpayne@68
|
480 if(r.match!=null){
|
jpayne@68
|
481 if(r.shortmatch()){
|
jpayne@68
|
482 r.toLongMatchString(false);
|
jpayne@68
|
483 returnToShort=true;
|
jpayne@68
|
484 }
|
jpayne@68
|
485 byte[] match2=new byte[r.match.length+lt+rt];
|
jpayne@68
|
486 int i=0;
|
jpayne@68
|
487 for(; i<lt; i++){
|
jpayne@68
|
488 match2[i]='C';
|
jpayne@68
|
489 }
|
jpayne@68
|
490 for(int j=0; j<r.match.length; i++, j++){
|
jpayne@68
|
491 match2[i]=r.match[j];
|
jpayne@68
|
492 }
|
jpayne@68
|
493 for(; i<match2.length; i++){
|
jpayne@68
|
494 match2[i]='C';
|
jpayne@68
|
495 }
|
jpayne@68
|
496 r.match=match2;
|
jpayne@68
|
497 }
|
jpayne@68
|
498 r.bases=bases1;
|
jpayne@68
|
499 r.quality=qual1;
|
jpayne@68
|
500 r.start-=lt;
|
jpayne@68
|
501 r.stop+=rt;
|
jpayne@68
|
502 if(returnToShort){
|
jpayne@68
|
503 r.toShortMatchString(true);
|
jpayne@68
|
504 }
|
jpayne@68
|
505
|
jpayne@68
|
506 if(r.sites!=null){
|
jpayne@68
|
507 for(SiteScore ss : r.sites){untrim(ss);}
|
jpayne@68
|
508 }
|
jpayne@68
|
509
|
jpayne@68
|
510 return true;
|
jpayne@68
|
511 }
|
jpayne@68
|
512
|
jpayne@68
|
513 private boolean untrim(SiteScore ss){
|
jpayne@68
|
514 if(ss==null){return false;}
|
jpayne@68
|
515 if(leftTrimmed==0 && rightTrimmed==0){return false;}
|
jpayne@68
|
516 ss.perfect=ss.semiperfect=false;
|
jpayne@68
|
517
|
jpayne@68
|
518 final int lt, rt;
|
jpayne@68
|
519 if(ss.strand==Shared.PLUS){
|
jpayne@68
|
520 lt=leftTrimmed;
|
jpayne@68
|
521 rt=rightTrimmed;
|
jpayne@68
|
522 }else{
|
jpayne@68
|
523 lt=rightTrimmed;
|
jpayne@68
|
524 rt=leftTrimmed;
|
jpayne@68
|
525 }
|
jpayne@68
|
526
|
jpayne@68
|
527 boolean returnToShort=false;
|
jpayne@68
|
528 if(verbose){System.err.println("Untrimming ss "+ss);}
|
jpayne@68
|
529 if(ss.match!=null){
|
jpayne@68
|
530
|
jpayne@68
|
531 boolean shortmatch=false;
|
jpayne@68
|
532 for(byte b : ss.match){
|
jpayne@68
|
533 if(Tools.isDigit(b)){shortmatch=true; break;}
|
jpayne@68
|
534 }
|
jpayne@68
|
535
|
jpayne@68
|
536 if(shortmatch){
|
jpayne@68
|
537 ss.match=Read.toLongMatchString(ss.match);
|
jpayne@68
|
538 returnToShort=true;
|
jpayne@68
|
539 }
|
jpayne@68
|
540 byte[] match2=new byte[ss.match.length+lt+rt];
|
jpayne@68
|
541 int i=0;
|
jpayne@68
|
542 for(; i<lt; i++){
|
jpayne@68
|
543 match2[i]='C';
|
jpayne@68
|
544 }
|
jpayne@68
|
545 for(int j=0; j<ss.match.length; i++, j++){
|
jpayne@68
|
546 match2[i]=ss.match[j];
|
jpayne@68
|
547 }
|
jpayne@68
|
548 for(; i<match2.length; i++){
|
jpayne@68
|
549 match2[i]='C';
|
jpayne@68
|
550 }
|
jpayne@68
|
551 ss.match=match2;
|
jpayne@68
|
552 }
|
jpayne@68
|
553 ss.setLimits(ss.start-lt, ss.stop+rt);
|
jpayne@68
|
554 if(returnToShort){ss.match=Read.toShortMatchString(ss.match);}
|
jpayne@68
|
555 return true;
|
jpayne@68
|
556 }
|
jpayne@68
|
557
|
jpayne@68
|
558 private static boolean trimMatch(Read r){
|
jpayne@68
|
559 if(r.match==null && r.sites==null){return false;}
|
jpayne@68
|
560
|
jpayne@68
|
561 //Easy mode!
|
jpayne@68
|
562 r.match=null;
|
jpayne@68
|
563 if(r.sites!=null){
|
jpayne@68
|
564 for(SiteScore ss : r.sites){
|
jpayne@68
|
565 if(ss!=null){ss.match=null;}
|
jpayne@68
|
566 }
|
jpayne@68
|
567 }
|
jpayne@68
|
568 return true;
|
jpayne@68
|
569
|
jpayne@68
|
570 //TODO - need to adjust read start and stop based on this information. Also check strand!
|
jpayne@68
|
571 // byte[] match=r.match;
|
jpayne@68
|
572 // if(r.shortmatch()){match=Read.toLongMatchString(match);}
|
jpayne@68
|
573 // byte[] match2=new byte[match.length-leftTrimmed-rightTrimmed];
|
jpayne@68
|
574 // for(int mpos=0, bpos=0; bpos<leftTrimmed; mpos++){
|
jpayne@68
|
575 // byte m=match[mpos];
|
jpayne@68
|
576 // if(m=='D'){
|
jpayne@68
|
577 // //do nothing
|
jpayne@68
|
578 // }else{
|
jpayne@68
|
579 // bpos++;
|
jpayne@68
|
580 // }
|
jpayne@68
|
581 // }
|
jpayne@68
|
582 }
|
jpayne@68
|
583
|
jpayne@68
|
584 /** Special case of 100% match, or no match string */
|
jpayne@68
|
585 public static int trimReadWithMatchFast(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
|
jpayne@68
|
586 assert(r.match!=null || sl!=null);
|
jpayne@68
|
587 if(r.bases==null){return 0;}
|
jpayne@68
|
588 if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
|
jpayne@68
|
589 if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
|
jpayne@68
|
590
|
jpayne@68
|
591 final boolean shortmatch=r.shortmatch();
|
jpayne@68
|
592 final byte[] oldMatch=r.match;
|
jpayne@68
|
593 r.match=null;
|
jpayne@68
|
594 r.samline=null;
|
jpayne@68
|
595 final int trimmed;
|
jpayne@68
|
596 // System.err.println(rightTrimAmount+", "+leftTrimAmount);
|
jpayne@68
|
597 if(sl!=null && sl.strand()==Shared.MINUS){
|
jpayne@68
|
598 trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
|
jpayne@68
|
599 }else{
|
jpayne@68
|
600 trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
|
jpayne@68
|
601 }
|
jpayne@68
|
602 r.samline=sl;
|
jpayne@68
|
603 if(trimmed<1){
|
jpayne@68
|
604 r.match=oldMatch;
|
jpayne@68
|
605 return 0;
|
jpayne@68
|
606 }
|
jpayne@68
|
607
|
jpayne@68
|
608 final int len=r.length();
|
jpayne@68
|
609 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
610 if(oldMatch!=null){
|
jpayne@68
|
611 if(shortmatch){
|
jpayne@68
|
612 bb.append((byte)'m');
|
jpayne@68
|
613 if(len>1){bb.append(len);}
|
jpayne@68
|
614 }else{
|
jpayne@68
|
615 for(int i=0; i<len; i++){bb.append((byte)'m');}
|
jpayne@68
|
616 }
|
jpayne@68
|
617 r.match=bb.toBytes();
|
jpayne@68
|
618 bb.clear();
|
jpayne@68
|
619 }
|
jpayne@68
|
620
|
jpayne@68
|
621 if(sl!=null){
|
jpayne@68
|
622 sl.pos+=leftTrimAmount;
|
jpayne@68
|
623 if(sl.cigar!=null && sl.cigar.length()>0){
|
jpayne@68
|
624 char c=sl.cigar.charAt(sl.cigar.length()-1);
|
jpayne@68
|
625 assert(c=='M' || c=='=') : c+"; "+sl.cigar+"\n"+sl;
|
jpayne@68
|
626 bb.append(len);
|
jpayne@68
|
627 bb.append(SamLine.VERSION>1.3 ? '=' : 'M');
|
jpayne@68
|
628 sl.cigar=bb.toString();
|
jpayne@68
|
629 }
|
jpayne@68
|
630 sl.seq=r.bases;
|
jpayne@68
|
631 sl.qual=r.quality;
|
jpayne@68
|
632 if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
|
jpayne@68
|
633 ArrayList<String> list=new ArrayList<String>(2);
|
jpayne@68
|
634 for(int i=0; i<sl.optional.size(); i++){
|
jpayne@68
|
635 String s=sl.optional.get(i);
|
jpayne@68
|
636 if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
|
jpayne@68
|
637 }
|
jpayne@68
|
638 sl.optional.clear();
|
jpayne@68
|
639 sl.optional.addAll(list);
|
jpayne@68
|
640 }
|
jpayne@68
|
641 }
|
jpayne@68
|
642 return trimmed;
|
jpayne@68
|
643 }
|
jpayne@68
|
644
|
jpayne@68
|
645 // //Should be unneeded, just use the above function
|
jpayne@68
|
646 // public static int trimReadWithoutMatch(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
|
jpayne@68
|
647 // if(r.bases==null){return 0;}
|
jpayne@68
|
648 // if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
|
jpayne@68
|
649 // if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
|
jpayne@68
|
650 //
|
jpayne@68
|
651 // assert(r.match==null);
|
jpayne@68
|
652 // final int trimmed;
|
jpayne@68
|
653 // if(sl!=null && sl.strand()==Shared.MINUS){
|
jpayne@68
|
654 // trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
|
jpayne@68
|
655 // }else{
|
jpayne@68
|
656 // trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
|
jpayne@68
|
657 // }
|
jpayne@68
|
658 // if(trimmed<1){return 0;}
|
jpayne@68
|
659 //
|
jpayne@68
|
660 // if(sl!=null){
|
jpayne@68
|
661 // sl.pos+=leftTrimAmount;
|
jpayne@68
|
662 // assert(sl.cigar==null);
|
jpayne@68
|
663 // sl.seq=r.bases;
|
jpayne@68
|
664 // sl.qual=r.quality;
|
jpayne@68
|
665 // if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
|
jpayne@68
|
666 // ArrayList<String> list=new ArrayList<String>(2);
|
jpayne@68
|
667 // for(int i=0; i<sl.optional.size(); i++){
|
jpayne@68
|
668 // String s=sl.optional.get(i);
|
jpayne@68
|
669 // if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
|
jpayne@68
|
670 // }
|
jpayne@68
|
671 // sl.optional.clear();
|
jpayne@68
|
672 // sl.optional.addAll(list);
|
jpayne@68
|
673 // }
|
jpayne@68
|
674 // }
|
jpayne@68
|
675 // return trimmed;
|
jpayne@68
|
676 // }
|
jpayne@68
|
677
|
jpayne@68
|
678 //TODO: This is slow
|
jpayne@68
|
679 //TODO: Note, returns a negative number if the whole read is supposed to be trimmed
|
jpayne@68
|
680 public static int trimReadWithMatch(final Read r, final SamLine sl,
|
jpayne@68
|
681 int leftTrimAmount, int rightTrimAmount, int minFinalLength, long scafLen, boolean trimClip){
|
jpayne@68
|
682 if(r.bases==null || (leftTrimAmount<1 && rightTrimAmount<1 && !trimClip)){return 0;}
|
jpayne@68
|
683 if(!r.containsNonM() && !trimClip){//TODO: H not handled
|
jpayne@68
|
684 return trimReadWithMatchFast(r, sl, leftTrimAmount, rightTrimAmount, minFinalLength);
|
jpayne@68
|
685 }
|
jpayne@68
|
686
|
jpayne@68
|
687 // if(r.match==null){
|
jpayne@68
|
688 // assert(sl.cigar==null);
|
jpayne@68
|
689 // int trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
|
jpayne@68
|
690 // if(sl!=null){
|
jpayne@68
|
691 // assert(sl.cigar==null);
|
jpayne@68
|
692 // int start=sl.pos-1;
|
jpayne@68
|
693 // int stop=start+sl.seq.length;
|
jpayne@68
|
694 // sl.seq=r.bases;
|
jpayne@68
|
695 // sl.qual=r.quality;
|
jpayne@68
|
696 // if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
|
jpayne@68
|
697 // ArrayList<String> list=new ArrayList<String>(2);
|
jpayne@68
|
698 // for(int i=0; i<sl.optional.size(); i++){
|
jpayne@68
|
699 // String s=sl.optional.get(i);
|
jpayne@68
|
700 // if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
|
jpayne@68
|
701 // }
|
jpayne@68
|
702 // sl.optional.clear();
|
jpayne@68
|
703 // sl.optional.addAll(list);
|
jpayne@68
|
704 // }
|
jpayne@68
|
705 // }
|
jpayne@68
|
706 // }
|
jpayne@68
|
707
|
jpayne@68
|
708 if(trimClip){
|
jpayne@68
|
709 r.toLongMatchString(false);
|
jpayne@68
|
710 byte[] match=r.match;
|
jpayne@68
|
711 int leftClip=0, rightClip=0;
|
jpayne@68
|
712 for(int i=0; i<match.length; i++){
|
jpayne@68
|
713 if(match[i]=='C'){leftClip++;}
|
jpayne@68
|
714 else{break;}
|
jpayne@68
|
715 }
|
jpayne@68
|
716 for(int i=match.length-1; i>=0; i--){
|
jpayne@68
|
717 if(match[i]=='C'){rightClip++;}
|
jpayne@68
|
718 else{break;}
|
jpayne@68
|
719 }
|
jpayne@68
|
720 leftTrimAmount=Tools.max(leftTrimAmount, leftClip);
|
jpayne@68
|
721 rightTrimAmount=Tools.max(rightTrimAmount, rightClip);
|
jpayne@68
|
722 }
|
jpayne@68
|
723
|
jpayne@68
|
724 if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
|
jpayne@68
|
725 if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
|
jpayne@68
|
726
|
jpayne@68
|
727 final int oldPos=(sl==null ? 1 : sl.pos);
|
jpayne@68
|
728
|
jpayne@68
|
729 r.toLongMatchString(false);
|
jpayne@68
|
730 byte[] match=r.match;
|
jpayne@68
|
731
|
jpayne@68
|
732 assert(!r.shortmatch());
|
jpayne@68
|
733
|
jpayne@68
|
734 // System.err.println("Q: "+sl);
|
jpayne@68
|
735 // System.err.println(new String(match));
|
jpayne@68
|
736
|
jpayne@68
|
737 ByteBuilder bb=new ByteBuilder(match.length);
|
jpayne@68
|
738 // System.err.println("leftTrimAmount="+leftTrimAmount+", rightTrimAmount="+rightTrimAmount);
|
jpayne@68
|
739
|
jpayne@68
|
740 int leftTrim=leftTrimAmount>0 ? r.length() : 0, rightTrim=rightTrimAmount>0 ? r.length() : 0;
|
jpayne@68
|
741 // System.err.println("leftTrim="+leftTrim+", rightTrim="+rightTrim);
|
jpayne@68
|
742 {
|
jpayne@68
|
743 final int mlen=match.length;
|
jpayne@68
|
744 boolean keep=leftTrimAmount<1;
|
jpayne@68
|
745 int rpos=(sl==null ? 1 : sl.pos);
|
jpayne@68
|
746 for(int mpos=0, cpos=0; mpos<mlen; mpos++){
|
jpayne@68
|
747 final byte m=match[mpos];
|
jpayne@68
|
748 if(m=='m' || m=='S' || m=='V' || m=='N'){
|
jpayne@68
|
749 cpos++;
|
jpayne@68
|
750 rpos++;
|
jpayne@68
|
751 }else if(m=='D'){
|
jpayne@68
|
752 rpos++;
|
jpayne@68
|
753 }else if(m=='I' || m=='C'){
|
jpayne@68
|
754 cpos++;
|
jpayne@68
|
755 }else if(m=='d'){
|
jpayne@68
|
756 rpos++;
|
jpayne@68
|
757 }else if(m=='i'){
|
jpayne@68
|
758 cpos++;
|
jpayne@68
|
759 }else{
|
jpayne@68
|
760 assert(false) : "Unknown symbol "+(char)m;
|
jpayne@68
|
761 }
|
jpayne@68
|
762
|
jpayne@68
|
763 if(keep){
|
jpayne@68
|
764 bb.append(m);
|
jpayne@68
|
765 }else if(cpos>=leftTrimAmount){
|
jpayne@68
|
766 byte next=(mpos<mlen-1 ? match[mpos+1] : (byte)'m');
|
jpayne@68
|
767 if(m=='m' || m=='S' || m=='V' || m=='N' || next=='m' || next=='S' || next=='V' || next=='N'){
|
jpayne@68
|
768 keep=true;
|
jpayne@68
|
769 if(sl!=null){sl.pos=rpos;}
|
jpayne@68
|
770 leftTrim=cpos;
|
jpayne@68
|
771 }
|
jpayne@68
|
772 }
|
jpayne@68
|
773 }
|
jpayne@68
|
774 }
|
jpayne@68
|
775 // System.err.println("R: "+sl);
|
jpayne@68
|
776 match=bb.toBytes();
|
jpayne@68
|
777 Tools.reverseInPlace(match);
|
jpayne@68
|
778 bb.clear();
|
jpayne@68
|
779 {
|
jpayne@68
|
780 final int mlen=match.length;
|
jpayne@68
|
781 boolean keep=rightTrimAmount<1;
|
jpayne@68
|
782 for(int mpos=0, cpos=0; mpos<mlen; mpos++){
|
jpayne@68
|
783 final byte m=match[mpos];
|
jpayne@68
|
784 if(m=='m' || m=='S' || m=='V' || m=='N' || m=='I' || m=='C'){
|
jpayne@68
|
785 cpos++;
|
jpayne@68
|
786 }else if(m=='D'){
|
jpayne@68
|
787 }else if(m=='d'){
|
jpayne@68
|
788 }else if(m=='i'){
|
jpayne@68
|
789 cpos++;
|
jpayne@68
|
790 }else{
|
jpayne@68
|
791 assert(false) : "Unknown symbol "+(char)m;
|
jpayne@68
|
792 }
|
jpayne@68
|
793
|
jpayne@68
|
794 if(keep){
|
jpayne@68
|
795 bb.append(m);
|
jpayne@68
|
796 }else if(cpos>=rightTrimAmount){
|
jpayne@68
|
797 byte next=(mpos<mlen-1 ? match[mpos+1] : (byte)'m');
|
jpayne@68
|
798 // if(m=='m' || m=='S' || m=='V' || m=='N' || next=='m' || next=='S' || next=='V' || next=='N'){
|
jpayne@68
|
799 if(next!='I' && next!='D' && next!='i' && next!='d'){
|
jpayne@68
|
800 keep=true;
|
jpayne@68
|
801 rightTrim=cpos;
|
jpayne@68
|
802 }
|
jpayne@68
|
803 // }
|
jpayne@68
|
804 }
|
jpayne@68
|
805 }
|
jpayne@68
|
806 }
|
jpayne@68
|
807 // System.err.println("S: "+sl);
|
jpayne@68
|
808 match=bb.toBytes();
|
jpayne@68
|
809 Tools.reverseInPlace(match);
|
jpayne@68
|
810
|
jpayne@68
|
811 // System.err.println("leftTrim="+leftTrim+", rightTrim="+rightTrim);
|
jpayne@68
|
812
|
jpayne@68
|
813 if(leftTrim+rightTrim>=r.length()){
|
jpayne@68
|
814 if(sl!=null){sl.pos=oldPos;}
|
jpayne@68
|
815 return -leftTrim-rightTrim;
|
jpayne@68
|
816 }
|
jpayne@68
|
817
|
jpayne@68
|
818 // System.err.println("T: "+sl);
|
jpayne@68
|
819 r.match=null;
|
jpayne@68
|
820 if(sl!=null && sl.strand()==Shared.MINUS){
|
jpayne@68
|
821 int temp=leftTrim;
|
jpayne@68
|
822 leftTrim=rightTrim;
|
jpayne@68
|
823 rightTrim=temp;
|
jpayne@68
|
824 }
|
jpayne@68
|
825 final int trimmed=trimByAmount(r, leftTrim, rightTrim, minFinalLength, false);
|
jpayne@68
|
826 // System.err.println("tba: "+leftTrim+", "+rightTrim+", "+minFinalLength);
|
jpayne@68
|
827 r.match=match;
|
jpayne@68
|
828
|
jpayne@68
|
829 if(sl!=null){
|
jpayne@68
|
830 int start=sl.pos-1;
|
jpayne@68
|
831 int stop=start+Read.calcMatchLength(match)-1;
|
jpayne@68
|
832 if(SamLine.VERSION>1.3){
|
jpayne@68
|
833 sl.cigar=SamLine.toCigar14(match, start, stop, scafLen, r.bases);
|
jpayne@68
|
834 }else{
|
jpayne@68
|
835 sl.cigar=SamLine.toCigar13(match, start, stop, scafLen, r.bases);
|
jpayne@68
|
836 }
|
jpayne@68
|
837 sl.seq=r.bases;
|
jpayne@68
|
838 sl.qual=r.quality;
|
jpayne@68
|
839 if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
|
jpayne@68
|
840 ArrayList<String> list=new ArrayList<String>(2);
|
jpayne@68
|
841 for(int i=0; i<sl.optional.size(); i++){
|
jpayne@68
|
842 String s=sl.optional.get(i);
|
jpayne@68
|
843 if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
|
jpayne@68
|
844 }
|
jpayne@68
|
845 sl.optional.clear();
|
jpayne@68
|
846 sl.optional.addAll(list);
|
jpayne@68
|
847 }
|
jpayne@68
|
848 }
|
jpayne@68
|
849 return trimmed;
|
jpayne@68
|
850 }
|
jpayne@68
|
851
|
jpayne@68
|
852 public int trimmed() {
|
jpayne@68
|
853 return leftTrimmed+rightTrimmed;
|
jpayne@68
|
854 }
|
jpayne@68
|
855
|
jpayne@68
|
856 public final Read r;
|
jpayne@68
|
857
|
jpayne@68
|
858 /** untrimmed bases */
|
jpayne@68
|
859 public final byte[] bases1;
|
jpayne@68
|
860 /** untrimmed qualities */
|
jpayne@68
|
861 public final byte[] qual1;
|
jpayne@68
|
862 /** trimmed bases */
|
jpayne@68
|
863 public byte[] bases2;
|
jpayne@68
|
864 /** trimmed qualities */
|
jpayne@68
|
865 public byte[] qual2;
|
jpayne@68
|
866
|
jpayne@68
|
867
|
jpayne@68
|
868 public final float trimq;
|
jpayne@68
|
869 public int leftTrimmed;
|
jpayne@68
|
870 public int rightTrimmed;
|
jpayne@68
|
871
|
jpayne@68
|
872 /** Require this many consecutive good bases to stop trimming. Minimum is 1.
|
jpayne@68
|
873 * This is for the old trimming mode and not really used anymore */
|
jpayne@68
|
874 public static int minGoodInterval=2;
|
jpayne@68
|
875
|
jpayne@68
|
876 public static boolean verbose=false;
|
jpayne@68
|
877 public static boolean optimalMode=true;
|
jpayne@68
|
878 public static boolean windowMode=false;
|
jpayne@68
|
879 public static float optimalBias=-1f;
|
jpayne@68
|
880
|
jpayne@68
|
881 public static int windowLength=4;
|
jpayne@68
|
882
|
jpayne@68
|
883 private static final float NPROB=0.75f;
|
jpayne@68
|
884 // public static float PROB1=QualityTools.PROB_ERROR[1];
|
jpayne@68
|
885
|
jpayne@68
|
886
|
jpayne@68
|
887 }
|