jpayne@68
|
1 package clump;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4
|
jpayne@68
|
5 import dna.AminoAcid;
|
jpayne@68
|
6 import shared.KillSwitch;
|
jpayne@68
|
7 import shared.Parse;
|
jpayne@68
|
8 import shared.Tools;
|
jpayne@68
|
9 import stream.Read;
|
jpayne@68
|
10 import structures.ByteBuilder;
|
jpayne@68
|
11
|
jpayne@68
|
12 /**
|
jpayne@68
|
13 * A list of reads sharing a kmer.
|
jpayne@68
|
14 * @author Brian Bushnell
|
jpayne@68
|
15 * @date Nov 7, 2015
|
jpayne@68
|
16 *
|
jpayne@68
|
17 */
|
jpayne@68
|
18 public class Clump extends ArrayList<Read> implements Comparable<Clump> {
|
jpayne@68
|
19
|
jpayne@68
|
20 public static Clump makeClump(long kmer){
|
jpayne@68
|
21 try {
|
jpayne@68
|
22 return new Clump(kmer);
|
jpayne@68
|
23 } catch (OutOfMemoryError e) {
|
jpayne@68
|
24 KillSwitch.memKill(e);
|
jpayne@68
|
25 throw new RuntimeException();
|
jpayne@68
|
26 }
|
jpayne@68
|
27 }
|
jpayne@68
|
28
|
jpayne@68
|
29 private Clump(long kmer_){
|
jpayne@68
|
30 this(kmer_, 4);
|
jpayne@68
|
31 }
|
jpayne@68
|
32
|
jpayne@68
|
33 private Clump(long kmer_, int size){
|
jpayne@68
|
34 super(size);
|
jpayne@68
|
35 kmer=kmer_;
|
jpayne@68
|
36 }
|
jpayne@68
|
37
|
jpayne@68
|
38 @Override
|
jpayne@68
|
39 public boolean add(Read r){
|
jpayne@68
|
40 ReadKey key=(ReadKey) r.obj;
|
jpayne@68
|
41 assert(key.kmer==kmer);
|
jpayne@68
|
42 key.clump=this;
|
jpayne@68
|
43 return super.add(r);
|
jpayne@68
|
44 }
|
jpayne@68
|
45
|
jpayne@68
|
46 private void setMaxima(){
|
jpayne@68
|
47 maxLeft=-1;
|
jpayne@68
|
48 maxRight=-1;
|
jpayne@68
|
49 width=-1;
|
jpayne@68
|
50 for(Read r : this){
|
jpayne@68
|
51 ReadKey key=(ReadKey) r.obj;
|
jpayne@68
|
52 final int pos=key.position;
|
jpayne@68
|
53 maxLeft=Tools.max(maxLeft, pos);
|
jpayne@68
|
54 maxRight=Tools.max(maxRight, r.length()-pos);
|
jpayne@68
|
55 }
|
jpayne@68
|
56 width=maxLeft+maxRight;
|
jpayne@68
|
57 }
|
jpayne@68
|
58
|
jpayne@68
|
59 /** This will create counts of bases, or sums of qualities, at each position in the cluster. */
|
jpayne@68
|
60 private int[][] count(final boolean qualityScores){
|
jpayne@68
|
61 if(width<0){setMaxima();}
|
jpayne@68
|
62
|
jpayne@68
|
63 // System.err.println("\n\n");
|
jpayne@68
|
64 final int[][] counts=new int[4][width];
|
jpayne@68
|
65 for(Read r : this){
|
jpayne@68
|
66 ReadKey key=(ReadKey) r.obj;
|
jpayne@68
|
67 final int pos=key.position;
|
jpayne@68
|
68 byte[] bases=r.bases, quals=r.quality;
|
jpayne@68
|
69 if(quals==null){useQuality=false;}
|
jpayne@68
|
70
|
jpayne@68
|
71 // System.err.println("pos="+pos+", maxLeft="+maxLeft);
|
jpayne@68
|
72 for(int cloc=0, rloc=maxLeft-pos; cloc<bases.length; cloc++, rloc++){
|
jpayne@68
|
73 // System.err.println("cloc="+cloc+"/"+bases.length+", rloc="+rloc+"/"+width);
|
jpayne@68
|
74 int x=AminoAcid.baseToNumber[bases[cloc]];
|
jpayne@68
|
75 if(x>-1){
|
jpayne@68
|
76 final int q;
|
jpayne@68
|
77 if(qualityScores){q=(quals==null ? 20 : quals[cloc]);}
|
jpayne@68
|
78 else{q=1;}
|
jpayne@68
|
79 counts[x][rloc]+=q;
|
jpayne@68
|
80 }
|
jpayne@68
|
81 }
|
jpayne@68
|
82 }
|
jpayne@68
|
83 return counts;
|
jpayne@68
|
84 }
|
jpayne@68
|
85
|
jpayne@68
|
86 /*--------------------------------------------------------------*/
|
jpayne@68
|
87
|
jpayne@68
|
88 public Read makeSimpleConsensus(){
|
jpayne@68
|
89 // assert(Splitter.findBestPivot(this)<0) : Splitter.findBestPivot(this); //TODO: Slow
|
jpayne@68
|
90 if(size()==1){
|
jpayne@68
|
91 Read r=get(0);
|
jpayne@68
|
92 if(renameConsensus){r.id=r.numericID+" size=1";}
|
jpayne@68
|
93 return r;
|
jpayne@68
|
94 }
|
jpayne@68
|
95 final int[][] bcounts=baseCounts();
|
jpayne@68
|
96 final int[][] qcounts=qualityCounts();
|
jpayne@68
|
97
|
jpayne@68
|
98 final byte[] bases=new byte[width], quals=new byte[width];
|
jpayne@68
|
99 for(int i=0; i<width; i++){
|
jpayne@68
|
100 int x=getConsensusAtPosition(qcounts, i);
|
jpayne@68
|
101 int y=getSecondHighest(qcounts, i);
|
jpayne@68
|
102 if(x>=0 && qcounts[x][i]==qcounts[y][i]){//Tie-breaker
|
jpayne@68
|
103 x=getConsensusAtPosition(bcounts, i);
|
jpayne@68
|
104 y=getSecondHighest(bcounts, i);
|
jpayne@68
|
105 }
|
jpayne@68
|
106
|
jpayne@68
|
107
|
jpayne@68
|
108 if(x<0){
|
jpayne@68
|
109 // System.err.println("q="+0+", x="+x+"; A="+counts[0][i]+", C="+counts[1][i]+", G="+counts[2][i]+", T="+counts[3][i]);
|
jpayne@68
|
110 bases[i]='N';
|
jpayne@68
|
111 quals[i]=0;
|
jpayne@68
|
112 assert(getSumAtPosition(qcounts, i)==0) : "\n"+bcounts[0][i]+", "+bcounts[1][i]+", "+bcounts[2][i]+", "+bcounts[3][i]+
|
jpayne@68
|
113 "\n"+qcounts[0][i]+", "+qcounts[1][i]+", "+qcounts[2][i]+", "+qcounts[3][i]+
|
jpayne@68
|
114 "\nwidth="+width+", i="+i+", size="+size()+"\n"+new String(bases, 0, i)+"\n"+get(0)+"\n"+get(1)+"\n";
|
jpayne@68
|
115 // assert(getSumAtPosition(bcounts, i)==0) : "\n"+bcounts[0][i]+", "+bcounts[1][i]+", "+bcounts[2][i]+", "+bcounts[3][i]+
|
jpayne@68
|
116 // "\n"+qcounts[0][i]+", "+qcounts[1][i]+", "+qcounts[2][i]+", "+qcounts[3][i]+
|
jpayne@68
|
117 // "\nwidth="+width+", i="+i+", size="+size()+"\n"+new String(bases, 0, i)+"\n"+get(0)+"\n"+get(1)+"\n";
|
jpayne@68
|
118 }else{
|
jpayne@68
|
119 final long bsum=getSumAtPosition(bcounts, i);
|
jpayne@68
|
120 final long bmajor=bcounts[x][i];
|
jpayne@68
|
121 final long bminor=bsum-bcounts[x][i];
|
jpayne@68
|
122 final long bsecond=bcounts[y][i];
|
jpayne@68
|
123
|
jpayne@68
|
124 final long qsum=getSumAtPosition(qcounts, i);
|
jpayne@68
|
125 final long qmajor=qcounts[x][i];
|
jpayne@68
|
126 final long qminor=qsum-qcounts[x][i];
|
jpayne@68
|
127 final long qsecond=qcounts[y][i];
|
jpayne@68
|
128
|
jpayne@68
|
129 float bratio=bminor/(float)(bmajor+bminor);
|
jpayne@68
|
130 double phred=(bminor==0 ? Read.MAX_CALLED_QUALITY() : -10*Math.log10(bratio));
|
jpayne@68
|
131 phred=Tools.min(phred, qmajor-qminor);
|
jpayne@68
|
132 // if(phred<0 || phred>127){
|
jpayne@68
|
133 // assert(false) : i+","+x+","+bsum+","+qsum+","+bmajor+","+bminor+","+bratio+","+phred+"\n"+
|
jpayne@68
|
134 // bcounts[0][i]+","+bcounts[1][i]+","+bcounts[2][i]+","+bcounts[3][i]+"\n"+
|
jpayne@68
|
135 // qcounts[0][i]+","+qcounts[1][i]+","+qcounts[2][i]+","+qcounts[3][i]+"\n"+
|
jpayne@68
|
136 // this.toStringStaggered()+"\n";
|
jpayne@68
|
137 // }
|
jpayne@68
|
138 // assert(phred>=0 && phred<=127) : phred+","+x+","+i+","+bratio+","+bcounts[x][i]+","+bcounts[0][i]+
|
jpayne@68
|
139 // ","+bcounts[1][i]+","+bcounts[2][i]+","+bcounts[3][i];
|
jpayne@68
|
140 // assert(phred>=0 && phred<=127) : bmajor+", "+bminor+", "+phred+", "+Read.MAX_CALLED_QUALITY;
|
jpayne@68
|
141 byte q=Read.capQuality((long)Math.round(phred));
|
jpayne@68
|
142 bases[i]=AminoAcid.numberToBase[x];
|
jpayne@68
|
143 quals[i]=q;
|
jpayne@68
|
144 assert(q>0);
|
jpayne@68
|
145 assert(x>-1);
|
jpayne@68
|
146 assert(bases[i]!='N');
|
jpayne@68
|
147 }
|
jpayne@68
|
148 }
|
jpayne@68
|
149 Read leftmost=this.get(0);//TODO: Actually rightmost!
|
jpayne@68
|
150 Read r=new Read(bases, quals, leftmost.numericID+" size="+size(), 0);
|
jpayne@68
|
151 //TODO: Attach the long pair, and make sure the kmer location is correct.
|
jpayne@68
|
152 // assert(false) : "\n"+r.toFastq()+"\nCheck kmer location.";
|
jpayne@68
|
153 // assert(size()==1) : "\n"+r.toFastq()+"\n"+get(0).toFastq()+"\n"+get(size()-1).toFastq()+"\n";
|
jpayne@68
|
154 return r;
|
jpayne@68
|
155 }
|
jpayne@68
|
156
|
jpayne@68
|
157 /*--------------------------------------------------------------*/
|
jpayne@68
|
158
|
jpayne@68
|
159 public int removeDuplicates(){
|
jpayne@68
|
160 assert(KmerComparator.compareSequence);
|
jpayne@68
|
161 if(size()<2){return 0;}
|
jpayne@68
|
162
|
jpayne@68
|
163 int removedTotal=0, removed=0;
|
jpayne@68
|
164
|
jpayne@68
|
165 final boolean sortXY=(forceSortXY || sortYEarly() || (opticalOnly && (sortX || sortY) && size()>=sortXYSize));
|
jpayne@68
|
166
|
jpayne@68
|
167 final int maxDiscarded;
|
jpayne@68
|
168 final int scan;
|
jpayne@68
|
169
|
jpayne@68
|
170 if(opticalOnly && sortXY){
|
jpayne@68
|
171 scan=Tools.max(scanLimit, 200);
|
jpayne@68
|
172 maxDiscarded=scan+10;
|
jpayne@68
|
173 }else if(!sortXY && ((maxSubstitutions<1 && dupeSubRate<=0) || scanLimit<0)){
|
jpayne@68
|
174 scan=0;
|
jpayne@68
|
175 maxDiscarded=0;
|
jpayne@68
|
176 }else{
|
jpayne@68
|
177 scan=scanLimit;
|
jpayne@68
|
178 maxDiscarded=scan+10;
|
jpayne@68
|
179 }
|
jpayne@68
|
180
|
jpayne@68
|
181 if(sortXY){
|
jpayne@68
|
182 assert(sortX || sortY);
|
jpayne@68
|
183
|
jpayne@68
|
184 if(sortY){
|
jpayne@68
|
185 if(!sortYEarly()){
|
jpayne@68
|
186 this.sort(KmerComparatorY.comparator);
|
jpayne@68
|
187 }else{
|
jpayne@68
|
188 // for(int i=1; i<this.size(); i++){
|
jpayne@68
|
189 // Read a=get(i-1);
|
jpayne@68
|
190 // Read b=get(i);
|
jpayne@68
|
191 // assert(KmerComparatorY.comparator.compare(a, b)<=0) : a.obj+" vs "+b.obj;
|
jpayne@68
|
192 // }
|
jpayne@68
|
193 // //Otherwise, it was already Y-sorted.
|
jpayne@68
|
194 }
|
jpayne@68
|
195 // assert(false) : sortY();
|
jpayne@68
|
196 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
|
jpayne@68
|
197 removedTotal+=removed;
|
jpayne@68
|
198 // System.err.println("RemovedY: "+removed);
|
jpayne@68
|
199 while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){
|
jpayne@68
|
200 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
|
jpayne@68
|
201 removedTotal+=removed;
|
jpayne@68
|
202 // System.err.println("RemovedY: "+removed);
|
jpayne@68
|
203 }
|
jpayne@68
|
204 }
|
jpayne@68
|
205 if(sortX && (ReadKey.spanTilesX || !sortY)){
|
jpayne@68
|
206 this.sort(KmerComparatorX.comparator);
|
jpayne@68
|
207 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
|
jpayne@68
|
208 removedTotal+=removed;
|
jpayne@68
|
209 // System.err.println("RemovedX: "+removed);
|
jpayne@68
|
210 while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){
|
jpayne@68
|
211 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance);
|
jpayne@68
|
212 removedTotal+=removed;
|
jpayne@68
|
213 // System.err.println("RemovedX: "+removed);
|
jpayne@68
|
214 }
|
jpayne@68
|
215 }
|
jpayne@68
|
216 }else{
|
jpayne@68
|
217 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, false, markOnly, markAll, renameByCount, maxOpticalDistance);
|
jpayne@68
|
218 removedTotal+=removed;
|
jpayne@68
|
219 while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){
|
jpayne@68
|
220 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, false, markOnly, markAll, renameByCount, maxOpticalDistance);
|
jpayne@68
|
221 removedTotal+=removed;
|
jpayne@68
|
222 }
|
jpayne@68
|
223 }
|
jpayne@68
|
224
|
jpayne@68
|
225 return removedTotal;
|
jpayne@68
|
226 }
|
jpayne@68
|
227
|
jpayne@68
|
228 private int removeDuplicates_inner(final int maxSubs, final float subRate, final int scanLimit, final int maxDiscarded,
|
jpayne@68
|
229 final boolean optical, final boolean xySorted, final boolean mark, final boolean markAll, final boolean rename, final float dist){
|
jpayne@68
|
230 final int size=size();
|
jpayne@68
|
231 if(size<2){return 0;}
|
jpayne@68
|
232 int dupePairs=0;
|
jpayne@68
|
233 int dupeReads=0;
|
jpayne@68
|
234
|
jpayne@68
|
235 // final boolean breakOnTile=(optical && !FlowcellCoordinate.spanTiles);
|
jpayne@68
|
236
|
jpayne@68
|
237 // final long start=System.nanoTime();
|
jpayne@68
|
238
|
jpayne@68
|
239 for(int i=0, lim=size-1; i<lim; i++){
|
jpayne@68
|
240 final Read a=get(i);
|
jpayne@68
|
241 if(!a.discarded()){
|
jpayne@68
|
242 final ReadKey keyA=(ReadKey) a.obj;
|
jpayne@68
|
243 final int aLen=a.length();
|
jpayne@68
|
244 int unequals=0;
|
jpayne@68
|
245 int discarded=0;
|
jpayne@68
|
246 for(int j=i+1; j<size && unequals<=scanLimit && discarded<=maxDiscarded && (!a.discarded() || markAll); j++){
|
jpayne@68
|
247 final Read b=get(j);
|
jpayne@68
|
248 if(b.discarded()){
|
jpayne@68
|
249 discarded++;
|
jpayne@68
|
250 }else{
|
jpayne@68
|
251 final int bLen=b.length();
|
jpayne@68
|
252 final ReadKey keyB=(ReadKey) b.obj;
|
jpayne@68
|
253 if(!containment && !keyA.equals(keyB)){break;}
|
jpayne@68
|
254 // if(containment && affix && !keyA.physicalAffix(keyB, aLen, bLen)){break;}
|
jpayne@68
|
255 // if(optical && keyA.lane!=keyB.lane){break;} //Already in equals method
|
jpayne@68
|
256 // if(breakOnTile && keyA.tile!=keyB.tile){break;} //Already in equals method
|
jpayne@68
|
257 if(optical && xySorted && !keyA.nearXY(keyB, dist)){break;}
|
jpayne@68
|
258 // if(System.nanoTime()-start>200000000000L){
|
jpayne@68
|
259 // TextStreamWriter tsw=new TextStreamWriter("foo.fq", true, false, false);
|
jpayne@68
|
260 // tsw.start();
|
jpayne@68
|
261 // for(Read x : this){
|
jpayne@68
|
262 // tsw.println(x.toFastq());
|
jpayne@68
|
263 // }
|
jpayne@68
|
264 // tsw.poisonAndWait();
|
jpayne@68
|
265 // assert(false) : "kmer="+kmer+", size="+size();
|
jpayne@68
|
266 // }
|
jpayne@68
|
267 if(equals(a, b, maxSubs, subRate)){
|
jpayne@68
|
268 if(!optical || keyA.near(keyB, dist)){
|
jpayne@68
|
269 if(printDuplicates){
|
jpayne@68
|
270 System.out.println(a.toFasta());
|
jpayne@68
|
271 System.out.println(b.toFasta());
|
jpayne@68
|
272 }
|
jpayne@68
|
273 float errA=a.expectedErrorsIncludingMate(true);
|
jpayne@68
|
274 float errB=b.expectedErrorsIncludingMate(true);
|
jpayne@68
|
275 if(markAll){
|
jpayne@68
|
276 b.setDiscarded(true);
|
jpayne@68
|
277 assert(!a.discarded() || markAll);
|
jpayne@68
|
278 dupePairs++;
|
jpayne@68
|
279 dupeReads+=1+b.mateCount();
|
jpayne@68
|
280 unequals=0;
|
jpayne@68
|
281 if(!a.discarded()){
|
jpayne@68
|
282 a.setDiscarded(true);
|
jpayne@68
|
283 dupePairs++;
|
jpayne@68
|
284 dupeReads+=1+a.mateCount();
|
jpayne@68
|
285 }
|
jpayne@68
|
286 }else if(containment || errB>=errA){
|
jpayne@68
|
287 b.setDiscarded(true);
|
jpayne@68
|
288 assert(!a.discarded() || markAll);
|
jpayne@68
|
289 a.copies+=b.copies+parseExtraCopies(b);
|
jpayne@68
|
290 dupePairs++;
|
jpayne@68
|
291 dupeReads+=1+b.mateCount();
|
jpayne@68
|
292 unequals=0;
|
jpayne@68
|
293 }else{
|
jpayne@68
|
294 a.setDiscarded(true);
|
jpayne@68
|
295 assert(!b.discarded() || markAll);
|
jpayne@68
|
296 b.copies+=a.copies+parseExtraCopies(a);
|
jpayne@68
|
297 dupePairs++;
|
jpayne@68
|
298 dupeReads+=1+a.mateCount();
|
jpayne@68
|
299 }
|
jpayne@68
|
300 }
|
jpayne@68
|
301 }else{
|
jpayne@68
|
302 unequals++;
|
jpayne@68
|
303 }
|
jpayne@68
|
304 }
|
jpayne@68
|
305 }
|
jpayne@68
|
306 }
|
jpayne@68
|
307 }
|
jpayne@68
|
308 if(dupeReads>0){
|
jpayne@68
|
309 for(int i=0; i<size; i++){
|
jpayne@68
|
310 Read a=get(i);
|
jpayne@68
|
311 if(a.discarded()){
|
jpayne@68
|
312 if(mark){
|
jpayne@68
|
313 if(!a.id.endsWith(" duplicate")){
|
jpayne@68
|
314 a.id+=" duplicate";
|
jpayne@68
|
315 if(a.mate!=null){a.mate.id+=" duplicate";}
|
jpayne@68
|
316 }
|
jpayne@68
|
317 }else{
|
jpayne@68
|
318 set(i, null);
|
jpayne@68
|
319 }
|
jpayne@68
|
320 }else if(rename && a.copies>1){
|
jpayne@68
|
321 renameFromCount(a);
|
jpayne@68
|
322 }
|
jpayne@68
|
323 a.copies=1;
|
jpayne@68
|
324 }
|
jpayne@68
|
325 if(!mark){
|
jpayne@68
|
326 int x=Tools.condenseStrict(this);
|
jpayne@68
|
327 assert(x==dupePairs) : size()+", "+size+", "+dupePairs+", "+x;
|
jpayne@68
|
328 assert((size()>0 || markAll) && size()==size-dupePairs) : size()+", "+size+", "+dupePairs;
|
jpayne@68
|
329 }
|
jpayne@68
|
330 }
|
jpayne@68
|
331
|
jpayne@68
|
332 if(containment){
|
jpayne@68
|
333 dupeReads+=removeDuplicates_backwards(maxSubs, subRate, scanLimit, maxDiscarded, optical, xySorted, mark, markAll, rename, dist);
|
jpayne@68
|
334 }
|
jpayne@68
|
335
|
jpayne@68
|
336 return dupeReads;
|
jpayne@68
|
337 }
|
jpayne@68
|
338
|
jpayne@68
|
339 /** Only for containments */
|
jpayne@68
|
340 private int removeDuplicates_backwards(final int maxSubs, final float subRate, final int scanLimit, final int maxDiscarded,
|
jpayne@68
|
341 final boolean optical, final boolean xySorted, final boolean mark, final boolean markAll, final boolean rename, final float dist){
|
jpayne@68
|
342 final int size=size();
|
jpayne@68
|
343 if(size<2){return 0;}
|
jpayne@68
|
344 int dupePairs=0;
|
jpayne@68
|
345 int dupeReads=0;
|
jpayne@68
|
346
|
jpayne@68
|
347 // final boolean breakOnTile=(optical && !FlowcellCoordinate.spanTiles);
|
jpayne@68
|
348
|
jpayne@68
|
349 // final long start=System.nanoTime();
|
jpayne@68
|
350
|
jpayne@68
|
351 for(int i=size-1; i>0; i--){
|
jpayne@68
|
352 final Read a=get(i);
|
jpayne@68
|
353 if(!a.discarded()){
|
jpayne@68
|
354 final ReadKey keyA=(ReadKey) a.obj;
|
jpayne@68
|
355 final int aLen=a.length();
|
jpayne@68
|
356 int unequals=0;
|
jpayne@68
|
357 int discarded=0;
|
jpayne@68
|
358 for(int j=i-1; j>=0 && unequals<=scanLimit && discarded<=maxDiscarded && (!a.discarded() || markAll); j--){
|
jpayne@68
|
359 final Read b=get(j);
|
jpayne@68
|
360 if(b.discarded()){
|
jpayne@68
|
361 discarded++;
|
jpayne@68
|
362 }else{
|
jpayne@68
|
363 final int bLen=b.length();
|
jpayne@68
|
364 final ReadKey keyB=(ReadKey) b.obj;
|
jpayne@68
|
365 if(!containment && !keyA.equals(keyB)){break;}
|
jpayne@68
|
366 // if(containment && affix && !keyA.physicalAffix(keyB, aLen, bLen)){break;}
|
jpayne@68
|
367 // if(optical && keyA.lane!=keyB.lane){break;} //Already in equals method
|
jpayne@68
|
368 // if(breakOnTile && keyA.tile!=keyB.tile){break;} //Already in equals method
|
jpayne@68
|
369 if(optical && xySorted && !keyA.nearXY(keyB, dist)){break;}
|
jpayne@68
|
370 // if(System.nanoTime()-start>200000000000L){
|
jpayne@68
|
371 // TextStreamWriter tsw=new TextStreamWriter("foo.fq", true, false, false);
|
jpayne@68
|
372 // tsw.start();
|
jpayne@68
|
373 // for(Read x : this){
|
jpayne@68
|
374 // tsw.println(x.toFastq());
|
jpayne@68
|
375 // }
|
jpayne@68
|
376 // tsw.poisonAndWait();
|
jpayne@68
|
377 // assert(false) : "kmer="+kmer+", size="+size();
|
jpayne@68
|
378 // }
|
jpayne@68
|
379 if(equals(a, b, maxSubs, subRate)){
|
jpayne@68
|
380 if(!optical || keyA.near(keyB, dist)){
|
jpayne@68
|
381 if(printDuplicates){
|
jpayne@68
|
382 System.out.println(a.toFasta());
|
jpayne@68
|
383 System.out.println(b.toFasta());
|
jpayne@68
|
384 }
|
jpayne@68
|
385 float errA=a.expectedErrorsIncludingMate(true);
|
jpayne@68
|
386 float errB=b.expectedErrorsIncludingMate(true);
|
jpayne@68
|
387 if(markAll){
|
jpayne@68
|
388 b.setDiscarded(true);
|
jpayne@68
|
389 assert(!a.discarded() || markAll);
|
jpayne@68
|
390 dupePairs++;
|
jpayne@68
|
391 dupeReads+=1+b.mateCount();
|
jpayne@68
|
392 unequals=0;
|
jpayne@68
|
393 if(!a.discarded()){
|
jpayne@68
|
394 a.setDiscarded(true);
|
jpayne@68
|
395 dupePairs++;
|
jpayne@68
|
396 dupeReads+=1+a.mateCount();
|
jpayne@68
|
397 }
|
jpayne@68
|
398 }else if(containment || errB>=errA){
|
jpayne@68
|
399 b.setDiscarded(true);
|
jpayne@68
|
400 assert(!a.discarded() || markAll);
|
jpayne@68
|
401 a.copies+=b.copies+parseExtraCopies(b);
|
jpayne@68
|
402 dupePairs++;
|
jpayne@68
|
403 dupeReads+=1+b.mateCount();
|
jpayne@68
|
404 unequals=0;
|
jpayne@68
|
405 }else{
|
jpayne@68
|
406 a.setDiscarded(true);
|
jpayne@68
|
407 assert(!b.discarded() || markAll);
|
jpayne@68
|
408 b.copies+=a.copies+parseExtraCopies(a);
|
jpayne@68
|
409 dupePairs++;
|
jpayne@68
|
410 dupeReads+=1+a.mateCount();
|
jpayne@68
|
411 }
|
jpayne@68
|
412 }
|
jpayne@68
|
413 }else{
|
jpayne@68
|
414 unequals++;
|
jpayne@68
|
415 }
|
jpayne@68
|
416 }
|
jpayne@68
|
417 }
|
jpayne@68
|
418 }
|
jpayne@68
|
419 }
|
jpayne@68
|
420 if(dupeReads>0){
|
jpayne@68
|
421 for(int i=0; i<size; i++){
|
jpayne@68
|
422 Read a=get(i);
|
jpayne@68
|
423 if(a.discarded()){
|
jpayne@68
|
424 if(mark){
|
jpayne@68
|
425 if(!a.id.endsWith(" duplicate")){
|
jpayne@68
|
426 a.id+=" duplicate";
|
jpayne@68
|
427 if(a.mate!=null){a.mate.id+=" duplicate";}
|
jpayne@68
|
428 }
|
jpayne@68
|
429 }else{
|
jpayne@68
|
430 set(i, null);
|
jpayne@68
|
431 }
|
jpayne@68
|
432 }else if(rename && a.copies>1){
|
jpayne@68
|
433 renameFromCount(a);
|
jpayne@68
|
434 }
|
jpayne@68
|
435 a.copies=1;
|
jpayne@68
|
436 }
|
jpayne@68
|
437 if(!mark){
|
jpayne@68
|
438 int x=Tools.condenseStrict(this);
|
jpayne@68
|
439 assert(x==dupePairs) : size()+", "+size+", "+dupePairs+", "+x;
|
jpayne@68
|
440 assert((size()>0 || markAll) && size()==size-dupePairs) : size()+", "+size+", "+dupePairs;
|
jpayne@68
|
441 }
|
jpayne@68
|
442 }
|
jpayne@68
|
443 return dupeReads;
|
jpayne@68
|
444 }
|
jpayne@68
|
445
|
jpayne@68
|
446 public int parseExtraCopies(final Read a){
|
jpayne@68
|
447 final String id=a.id;
|
jpayne@68
|
448 final int space=id.lastIndexOf(' ');
|
jpayne@68
|
449 int extraCopies=0;
|
jpayne@68
|
450 if(space<0){return 0;}
|
jpayne@68
|
451 if(space>=0 && Tools.contains(id, "copies=", space+1)){
|
jpayne@68
|
452 extraCopies=Integer.parseInt(id.substring(space+8))-1;
|
jpayne@68
|
453 }
|
jpayne@68
|
454 return extraCopies;
|
jpayne@68
|
455 }
|
jpayne@68
|
456
|
jpayne@68
|
457 private void renameFromCount(final Read a){
|
jpayne@68
|
458 final int newExtraCopies=a.copies-1;
|
jpayne@68
|
459 assert(newExtraCopies>0) : newExtraCopies;
|
jpayne@68
|
460 final int oldExtraCopies=parseExtraCopies(a);
|
jpayne@68
|
461 final int total=1+newExtraCopies+oldExtraCopies;
|
jpayne@68
|
462 renameToTotal(a, total);
|
jpayne@68
|
463 if(a.pairnum()==0 && a.mate!=null){
|
jpayne@68
|
464 assert(a.mate.pairnum()==1);
|
jpayne@68
|
465 renameToTotal(a.mate, total);
|
jpayne@68
|
466 }
|
jpayne@68
|
467 }
|
jpayne@68
|
468
|
jpayne@68
|
469 private static void renameToTotal(final Read a, final int total){
|
jpayne@68
|
470 final String id=a.id;
|
jpayne@68
|
471 final int space=id.lastIndexOf(' ');
|
jpayne@68
|
472 if(space>=0 && Tools.contains(id, "copies=", space+1)){
|
jpayne@68
|
473 a.id=a.id.substring(0, space);
|
jpayne@68
|
474 }
|
jpayne@68
|
475 a.id=a.id+" copies="+total;
|
jpayne@68
|
476 }
|
jpayne@68
|
477
|
jpayne@68
|
478 // public boolean nearby(Read a, Read b, FlowcellCoordinate fca, FlowcellCoordinate fcb, float maxDist){
|
jpayne@68
|
479 //// fca.setFrom(a.id);
|
jpayne@68
|
480 // fcb.setFrom(b.id);
|
jpayne@68
|
481 // float dist=fca.distance(fcb);
|
jpayne@68
|
482 // return dist<=maxDist;
|
jpayne@68
|
483 // }
|
jpayne@68
|
484
|
jpayne@68
|
485 // public boolean nearby(ReadKey ka, ReadKey kb, float maxDist){
|
jpayne@68
|
486 // float dist=ka.distance(kb);
|
jpayne@68
|
487 // return dist<=maxDist;
|
jpayne@68
|
488 // }
|
jpayne@68
|
489
|
jpayne@68
|
490 public static boolean equals(Read a, Read b, int maxSubs, float dupeSubRate){
|
jpayne@68
|
491 if(a.numericID==b.numericID){return false;}
|
jpayne@68
|
492 if(dupeSubRate>0){maxSubs=Tools.max(maxSubs, (int)(dupeSubRate*Tools.min(a.length(), b.length())));}
|
jpayne@68
|
493 if(containment){
|
jpayne@68
|
494 return contains(a, b, maxSubs);
|
jpayne@68
|
495 }
|
jpayne@68
|
496 if(!equals(a.bases, b.bases, maxSubs)){return false;}
|
jpayne@68
|
497 if(a.mate!=null && !equals(a.mate.bases, b.mate.bases, maxSubs)){return false;}
|
jpayne@68
|
498 return true;
|
jpayne@68
|
499 }
|
jpayne@68
|
500
|
jpayne@68
|
501 public static boolean equals(byte[] a, byte[] b, int maxSubs){
|
jpayne@68
|
502 if(a==b){return false;}//Nothing should subsume itself
|
jpayne@68
|
503 if(a==null || b==null){return false;}
|
jpayne@68
|
504 if(a.length!=b.length){return false;}
|
jpayne@68
|
505 int subs=0;
|
jpayne@68
|
506 if(allowNs){
|
jpayne@68
|
507 for(int i=0; i<a.length; i++){
|
jpayne@68
|
508 if(a[i]!=b[i] && (a[i]!='N' && b[i]!='N')){
|
jpayne@68
|
509 subs++;
|
jpayne@68
|
510 if(subs>maxSubs){return false;}
|
jpayne@68
|
511 }
|
jpayne@68
|
512 }
|
jpayne@68
|
513 }else{
|
jpayne@68
|
514 for(int i=0; i<a.length; i++){
|
jpayne@68
|
515 if(a[i]!=b[i]){
|
jpayne@68
|
516 subs++;
|
jpayne@68
|
517 if(subs>maxSubs){return false;}
|
jpayne@68
|
518 }
|
jpayne@68
|
519 }
|
jpayne@68
|
520 }
|
jpayne@68
|
521 return true;
|
jpayne@68
|
522 }
|
jpayne@68
|
523
|
jpayne@68
|
524 public static boolean contains(Read a, Read b, int maxSubs){
|
jpayne@68
|
525 if(a.numericID==b.numericID){return false;}
|
jpayne@68
|
526 boolean ok=contains_inner(a, b, maxSubs);
|
jpayne@68
|
527 if(!ok || a.mate==null){return ok;}
|
jpayne@68
|
528 ok=contains_inner(a.mate, b.mate, maxSubs);
|
jpayne@68
|
529 if(!ok){return false;}
|
jpayne@68
|
530 ReadKey rka1=(ReadKey)a.obj;
|
jpayne@68
|
531 ReadKey rkb1=(ReadKey)b.obj;
|
jpayne@68
|
532 ReadKey rka2=(ReadKey)a.mate.obj; //TODO: In containment mode, mates need to always get keys.
|
jpayne@68
|
533 ReadKey rkb2=(ReadKey)b.mate.obj;
|
jpayne@68
|
534 return ((rka1.kmerMinusStrand==rkb1.kmerMinusStrand) && (rka2.kmerMinusStrand==rkb2.kmerMinusStrand)); //Ensures that both reads have the same directionality.
|
jpayne@68
|
535 }
|
jpayne@68
|
536
|
jpayne@68
|
537 public static boolean contains_inner(Read a, Read b, int maxSubs){
|
jpayne@68
|
538 // if(a.length()==b.length()){return equals(a.bases, b.bases, maxSubs);}
|
jpayne@68
|
539 ReadKey rka=(ReadKey)a.obj;
|
jpayne@68
|
540 ReadKey rkb=(ReadKey)b.obj;
|
jpayne@68
|
541 if(affix ? !rka.physicalAffix(rkb, a.length(), b.length()) : !rka.physicallyContains(rkb, a.length(), b.length())){return false;}
|
jpayne@68
|
542 boolean flipped=false;
|
jpayne@68
|
543 // if(a.mate!=null){//In paired mode, need synchronization if the reads are in difference clumps. But... just don't do that.
|
jpayne@68
|
544 // Read max, min;
|
jpayne@68
|
545 // if(a.numericID>b.numericID){max=a; min=b;}//Protects against deadlocks.
|
jpayne@68
|
546 // else{max=b; min=a;}
|
jpayne@68
|
547 // synchronized(min){
|
jpayne@68
|
548 // synchronized(max){
|
jpayne@68
|
549 // if(rka.kmerMinusStrand!=rkb.kmerMinusStrand){
|
jpayne@68
|
550 // rkb.flip(b, k);
|
jpayne@68
|
551 // flipped=true;
|
jpayne@68
|
552 // }
|
jpayne@68
|
553 // boolean ok=contains(a.bases, b.bases, rka.position, rkb.position, maxSubs);
|
jpayne@68
|
554 // if(flipped){rkb.flip(b, k);}
|
jpayne@68
|
555 // return ok;
|
jpayne@68
|
556 // }
|
jpayne@68
|
557 // }
|
jpayne@68
|
558 // }
|
jpayne@68
|
559 if(rka.kmerMinusStrand!=rkb.kmerMinusStrand){
|
jpayne@68
|
560 rkb.flip(b, k);
|
jpayne@68
|
561 flipped=true;
|
jpayne@68
|
562 }
|
jpayne@68
|
563 boolean ok=contains(a.bases, b.bases, rka.position, rkb.position, maxSubs);
|
jpayne@68
|
564 if(flipped){rkb.flip(b, k);}
|
jpayne@68
|
565 return ok;
|
jpayne@68
|
566 }
|
jpayne@68
|
567
|
jpayne@68
|
568 public static boolean contains(byte[] a, byte[] b, int posA, int posB, int maxSubs){
|
jpayne@68
|
569 if(a==null || b==null){return false;}
|
jpayne@68
|
570 assert(a.length>=b.length);
|
jpayne@68
|
571 if(a==b){return false;} //Nothing should contain itself
|
jpayne@68
|
572 int subs=0;
|
jpayne@68
|
573
|
jpayne@68
|
574 int ai, bi;
|
jpayne@68
|
575 final int dif=posA-posB;
|
jpayne@68
|
576 if(dif>0){
|
jpayne@68
|
577 ai=0;
|
jpayne@68
|
578 bi=-dif;
|
jpayne@68
|
579 }else{
|
jpayne@68
|
580 ai=dif;
|
jpayne@68
|
581 bi=0;
|
jpayne@68
|
582 }
|
jpayne@68
|
583
|
jpayne@68
|
584 if(allowNs){
|
jpayne@68
|
585 for(; ai<a.length && bi<b.length; ai++, bi++){
|
jpayne@68
|
586 if(ai>=0 && bi>=0){
|
jpayne@68
|
587 final byte na=a[ai], nb=b[bi];
|
jpayne@68
|
588 if(na!=nb && na!='N' && nb!='N'){
|
jpayne@68
|
589 subs++;
|
jpayne@68
|
590 if(subs>maxSubs){return false;}
|
jpayne@68
|
591 }
|
jpayne@68
|
592 }
|
jpayne@68
|
593 }
|
jpayne@68
|
594 }else{
|
jpayne@68
|
595 for(; ai<a.length && bi<b.length; ai++, bi++){
|
jpayne@68
|
596 if(ai>=0 && bi>=0 && a[ai]!=b[bi]){
|
jpayne@68
|
597 subs++;
|
jpayne@68
|
598 if(subs>maxSubs){return false;}
|
jpayne@68
|
599 }
|
jpayne@68
|
600 }
|
jpayne@68
|
601 }
|
jpayne@68
|
602 return true;
|
jpayne@68
|
603 }
|
jpayne@68
|
604
|
jpayne@68
|
605 /*--------------------------------------------------------------*/
|
jpayne@68
|
606
|
jpayne@68
|
607 public long splitAndErrorCorrect(){
|
jpayne@68
|
608 if(size()<Splitter.minSizeSplit){
|
jpayne@68
|
609 return errorCorrect();
|
jpayne@68
|
610 }
|
jpayne@68
|
611 long sum=0;
|
jpayne@68
|
612 ArrayList<Clump> list=Splitter.splitOnPivot(this);
|
jpayne@68
|
613 for(Clump c : list){
|
jpayne@68
|
614 sum+=c.errorCorrect();
|
jpayne@68
|
615 }
|
jpayne@68
|
616 return sum;
|
jpayne@68
|
617 }
|
jpayne@68
|
618
|
jpayne@68
|
619 public long errorCorrect(){
|
jpayne@68
|
620 if(size()<=minCountCorrect){return 0;}
|
jpayne@68
|
621 // assert(Splitter.findBestPivot(this)<0); //TODO: Slow
|
jpayne@68
|
622 Read consen=makeSimpleConsensus();
|
jpayne@68
|
623 long sum=0;
|
jpayne@68
|
624 final int[] rvector=KillSwitch.allocInt1D(2);
|
jpayne@68
|
625 for(Read r : this){
|
jpayne@68
|
626 sum+=errorCorrect(r, consen, rvector);
|
jpayne@68
|
627 }
|
jpayne@68
|
628 return sum;
|
jpayne@68
|
629 }
|
jpayne@68
|
630
|
jpayne@68
|
631 private int errorCorrect(Read call, Read ref, int[] rvector){
|
jpayne@68
|
632
|
jpayne@68
|
633 // assert(call.validated());
|
jpayne@68
|
634 // assert(call.checkQuality());
|
jpayne@68
|
635 // assert(ref.checkQuality());
|
jpayne@68
|
636
|
jpayne@68
|
637 final float identity=identity(call, ref.bases, rvector);
|
jpayne@68
|
638 if((identity<minIdentity && (rvector[1]>0 || rvector[0]<50)) || (identity==1 && !call.containsNocalls()/* && !ref.containsNocalls()*/)){
|
jpayne@68
|
639 //TODO: Strange, the identity==1 clause causes different behavior, though it does give a speedup.
|
jpayne@68
|
640 return 0;
|
jpayne@68
|
641 }
|
jpayne@68
|
642 final byte[] cbases=call.bases, cquals=call.quality;
|
jpayne@68
|
643 final byte[] rbases=ref.bases, rquals=ref.quality;
|
jpayne@68
|
644
|
jpayne@68
|
645 ReadKey key=(ReadKey) call.obj;
|
jpayne@68
|
646 final int pos=key.position;
|
jpayne@68
|
647
|
jpayne@68
|
648 final int[][] bcounts=baseCounts();
|
jpayne@68
|
649 final int[][] qcounts=qualityCounts();
|
jpayne@68
|
650 final float[][] qAvgs=qualityAverages();
|
jpayne@68
|
651
|
jpayne@68
|
652 final int minDepth=(int)Tools.max(minCountCorrect, minSizeFractionCorrect*size());
|
jpayne@68
|
653
|
jpayne@68
|
654 int corrections=0;
|
jpayne@68
|
655
|
jpayne@68
|
656 final int cStart=0, rStart=maxLeft-pos, max=cbases.length;
|
jpayne@68
|
657 for(int cloc=cStart, rloc=rStart; cloc<max; cloc++, rloc++){
|
jpayne@68
|
658 //Called base, ref base
|
jpayne@68
|
659 final byte cb=cbases[cloc], rb=rbases[rloc];
|
jpayne@68
|
660 //Called quality, ref quality
|
jpayne@68
|
661 final byte cq=(cquals==null ? 20 : cquals[cloc]), rq=rquals[rloc];
|
jpayne@68
|
662 //Called number
|
jpayne@68
|
663 final byte cx=AminoAcid.baseToNumber[cb];
|
jpayne@68
|
664 //Ref number
|
jpayne@68
|
665 final byte rx=AminoAcid.baseToNumber[rb];
|
jpayne@68
|
666
|
jpayne@68
|
667 // assert((cb=='N') == (cquals[cloc]==0));
|
jpayne@68
|
668
|
jpayne@68
|
669 final byte b, q;
|
jpayne@68
|
670 if(cx<0){
|
jpayne@68
|
671 b=rb;
|
jpayne@68
|
672 q=(byte)Tools.min(20, rq);
|
jpayne@68
|
673 }else if(cb==rb){
|
jpayne@68
|
674 b=cb;
|
jpayne@68
|
675 q=(byte)Tools.mid(cq, cq+1, rq);//Adjust upward
|
jpayne@68
|
676 assert(q>=cq && (q<=rq || q<=cq));
|
jpayne@68
|
677 }else{
|
jpayne@68
|
678 final int cCount=bcounts[cx][rloc];
|
jpayne@68
|
679 final int rCount=bcounts[rx][rloc];
|
jpayne@68
|
680 final int altQSum=qcounts[cx][rloc];
|
jpayne@68
|
681 final int rQSum=qcounts[rx][rloc];
|
jpayne@68
|
682 final float rQAvg=qAvgs[rx][rloc];
|
jpayne@68
|
683 if(cCount<=maxCIncorrect && cq<=maxQIncorrect && cq*minQRatio<rQSum && cq*minAQRatio<8+rQAvg){
|
jpayne@68
|
684 final byte pminor=getSecondHighest(bcounts, rloc);
|
jpayne@68
|
685
|
jpayne@68
|
686 assert(rx>=0 && rx<bcounts.length) : rx+", "+rloc+", "+bcounts.length+"\n"+call.toFastq()+"\n"+ref.toFastq();
|
jpayne@68
|
687 assert(rloc>=0 && rloc<bcounts[rx].length) : rx+", "+rloc+", "+bcounts[rloc].length+"\n"+call.toFastq()+"\n"+ref.toFastq();
|
jpayne@68
|
688 final int minorCount=bcounts[pminor][rloc];
|
jpayne@68
|
689
|
jpayne@68
|
690 final long sum=getSumAtPosition(bcounts, rloc);
|
jpayne@68
|
691 // final long altCount=sum-rCount;
|
jpayne@68
|
692
|
jpayne@68
|
693 boolean change=false;
|
jpayne@68
|
694 if(rCount>=minCountCorrect && sum>=minDepth){
|
jpayne@68
|
695 final float ratioNeeded=Tools.min(minRatio, minRatioMult*minorCount+minRatioOffset+minRatioQMult*cq);
|
jpayne@68
|
696 // final float qratioNeeded=Tools.min(minRatio, minRatioMult*altQSum+minRatioOffset+minRatioQMult*cq); //altQSum is slightly different than minorQCount
|
jpayne@68
|
697 if(minorCount*ratioNeeded<=rCount && altQSum*ratioNeeded<=rQSum){
|
jpayne@68
|
698 change=true;
|
jpayne@68
|
699 }
|
jpayne@68
|
700
|
jpayne@68
|
701 // else if(minorCount*ratioNeeded<=rCount){
|
jpayne@68
|
702 // assert(false) : "\n"+altQSum+", "+rQSum+", "+qratioNeeded+"\n"+cCount+", "+rCount+", "+sum+", "+ratioNeeded+"\n"+(altQSum*qratioNeeded);
|
jpayne@68
|
703 // }
|
jpayne@68
|
704 }
|
jpayne@68
|
705 if(change){
|
jpayne@68
|
706 corrections++;
|
jpayne@68
|
707 b=rb;
|
jpayne@68
|
708 q=(byte)Tools.min(cq+1, 25, rq);
|
jpayne@68
|
709 // assert(q==25 || (q<=rq && q>=cq)) : q+", "+cq+", "+rq;
|
jpayne@68
|
710 }else{
|
jpayne@68
|
711 b=cb;
|
jpayne@68
|
712 q=(byte)Tools.mid(cq, cq-1, 6);//Adjust downward
|
jpayne@68
|
713 assert(q<=cq || q>=6) : q+","+cq;
|
jpayne@68
|
714 }
|
jpayne@68
|
715 }else{
|
jpayne@68
|
716 b=cb;
|
jpayne@68
|
717 q=cq;
|
jpayne@68
|
718 }
|
jpayne@68
|
719 }
|
jpayne@68
|
720 cbases[cloc]=b;
|
jpayne@68
|
721 if(cquals!=null){
|
jpayne@68
|
722 byte q2=(byte)Tools.mid(q, cq+maxQAdjust, cq-maxQAdjust);
|
jpayne@68
|
723 if(q2==0 && AminoAcid.isFullyDefined(b)){
|
jpayne@68
|
724 assert(!AminoAcid.isFullyDefined(cb));
|
jpayne@68
|
725 q2=(byte)Tools.mid(2, 25, (rq+25)/2);
|
jpayne@68
|
726 }else if(!AminoAcid.isFullyDefined(b)){
|
jpayne@68
|
727 q2=0;
|
jpayne@68
|
728 }
|
jpayne@68
|
729 cquals[cloc]=q2;
|
jpayne@68
|
730 assert((b=='N') == (cquals[cloc]==0)) : "b="+(char)b+", cb="+(char)cb+", rb="+(char)rb+", cx="+cx+", "
|
jpayne@68
|
731 + "new="+cquals[cloc]+", q="+q+", old="+cq+", rq="+rq+", loc="+rloc+"\n"+call.toFastq()+"\n"+ref.toFastq();
|
jpayne@68
|
732 }
|
jpayne@68
|
733 }
|
jpayne@68
|
734 return corrections;
|
jpayne@68
|
735 }
|
jpayne@68
|
736
|
jpayne@68
|
737 /*--------------------------------------------------------------*/
|
jpayne@68
|
738
|
jpayne@68
|
739 //Only used by condense mode.
|
jpayne@68
|
740 public ArrayList<Read> makeConsensus(){
|
jpayne@68
|
741 if(size()==1){
|
jpayne@68
|
742 Read r=get(0);
|
jpayne@68
|
743 r.id=r.numericID+" size=1";
|
jpayne@68
|
744 return this;
|
jpayne@68
|
745 }
|
jpayne@68
|
746
|
jpayne@68
|
747 ArrayList<Clump> clumps=Splitter.splitOnPivot(this);//TODO: Really, this should return null if there is no pivot
|
jpayne@68
|
748
|
jpayne@68
|
749 ArrayList<Read> list=new ArrayList<Read>(clumps.size());
|
jpayne@68
|
750 for(Clump c : clumps){
|
jpayne@68
|
751 Read temp=c.makeSimpleConsensus();
|
jpayne@68
|
752 list.add(temp);
|
jpayne@68
|
753 }
|
jpayne@68
|
754 return list;
|
jpayne@68
|
755 }
|
jpayne@68
|
756
|
jpayne@68
|
757 /*--------------------------------------------------------------*/
|
jpayne@68
|
758
|
jpayne@68
|
759 private float identity(Read call, byte[] rbases, int[] rvector){
|
jpayne@68
|
760 ReadKey key=(ReadKey) call.obj;
|
jpayne@68
|
761 final int pos=key.position;
|
jpayne@68
|
762 byte[] cbases=call.bases, quals=call.quality;
|
jpayne@68
|
763 int good=0, bad=0;
|
jpayne@68
|
764
|
jpayne@68
|
765 final int cStart=0, rStart=maxLeft-pos, max=cbases.length;
|
jpayne@68
|
766 for(int cloc=cStart, rloc=rStart; cloc<max; cloc++, rloc++){
|
jpayne@68
|
767 final byte cb=cbases[cloc], rb=rbases[rloc];
|
jpayne@68
|
768 if(AminoAcid.isFullyDefined(cb) && AminoAcid.isFullyDefined(rb)){
|
jpayne@68
|
769 if(cb==rb){good++;}
|
jpayne@68
|
770 else{bad++;}
|
jpayne@68
|
771 }
|
jpayne@68
|
772 }
|
jpayne@68
|
773 rvector[0]=good;
|
jpayne@68
|
774 rvector[1]=bad;
|
jpayne@68
|
775 return good==0 ? 0 : good/(float)(good+bad);
|
jpayne@68
|
776 }
|
jpayne@68
|
777
|
jpayne@68
|
778 /*--------------------------------------------------------------*/
|
jpayne@68
|
779
|
jpayne@68
|
780 long getSumAtPosition(int[][] counts, int pos){
|
jpayne@68
|
781 long sum=0;
|
jpayne@68
|
782 for(int x=0; x<4; x++){
|
jpayne@68
|
783 sum+=counts[x][pos];
|
jpayne@68
|
784 }
|
jpayne@68
|
785 return sum;
|
jpayne@68
|
786 }
|
jpayne@68
|
787
|
jpayne@68
|
788 byte getConsensusAtPosition(int[][] counts, int pos){
|
jpayne@68
|
789 byte xMax=0;
|
jpayne@68
|
790 for(byte x=1; x<4; x++){
|
jpayne@68
|
791 // System.err.println("x="+x+", max="+max+", Checking "+counts[x][pos]+" vs "+counts[x][max]);
|
jpayne@68
|
792 if(counts[x][pos]>counts[xMax][pos]){xMax=x;}
|
jpayne@68
|
793 }
|
jpayne@68
|
794 // assert(counts[max][pos]>=counts[0][pos]);
|
jpayne@68
|
795 // assert(counts[max][pos]>=counts[1][pos]);
|
jpayne@68
|
796 // assert(counts[max][pos]>=counts[2][pos]) : max+", "+counts[max][pos]+", ["+counts[0][pos]+", "+counts[1][pos]+", "+counts[2][pos]+", "+counts[3][pos]+"]";
|
jpayne@68
|
797 // assert(counts[max][pos]>=counts[3][pos]);
|
jpayne@68
|
798 return (counts[xMax][pos]>0 ? xMax : -1);
|
jpayne@68
|
799 }
|
jpayne@68
|
800
|
jpayne@68
|
801 byte getSecondHighest(int[][] counts, int pos){
|
jpayne@68
|
802 byte first=0;
|
jpayne@68
|
803 byte second=1;
|
jpayne@68
|
804 if(counts[first][pos]<counts[second][pos]){
|
jpayne@68
|
805 first=1; second=0;
|
jpayne@68
|
806 }
|
jpayne@68
|
807 for(byte x=2; x<4; x++){
|
jpayne@68
|
808 // System.err.println("x="+x+", max="+max+", Checking "+counts[x][pos]+" vs "+counts[x][max]);
|
jpayne@68
|
809 if(counts[x][pos]>counts[first][pos]){
|
jpayne@68
|
810 second=first;
|
jpayne@68
|
811 first=x;
|
jpayne@68
|
812 }else if(counts[x][pos]>counts[second][pos]){
|
jpayne@68
|
813 second=x;
|
jpayne@68
|
814 }
|
jpayne@68
|
815 }
|
jpayne@68
|
816 // assert(counts[max][pos]>=counts[0][pos]);
|
jpayne@68
|
817 // assert(counts[max][pos]>=counts[1][pos]);
|
jpayne@68
|
818 // assert(counts[max][pos]>=counts[2][pos]) : max+", "+counts[max][pos]+", ["+counts[0][pos]+", "+counts[1][pos]+", "+counts[2][pos]+", "+counts[3][pos]+"]";
|
jpayne@68
|
819 // assert(counts[max][pos]>=counts[3][pos]);
|
jpayne@68
|
820
|
jpayne@68
|
821 return second; //may be actually 0.
|
jpayne@68
|
822 //return (counts[second][pos]>0 ? second : -1);
|
jpayne@68
|
823 }
|
jpayne@68
|
824
|
jpayne@68
|
825 /*--------------------------------------------------------------*/
|
jpayne@68
|
826
|
jpayne@68
|
827 public String toStringStaggered(){
|
jpayne@68
|
828 ByteBuilder sb=new ByteBuilder();
|
jpayne@68
|
829 for(Read r : this){
|
jpayne@68
|
830 ReadKey key=(ReadKey) r.obj;
|
jpayne@68
|
831 final int pos=key.position;
|
jpayne@68
|
832 byte[] bases=r.bases, quals=r.quality;
|
jpayne@68
|
833 int rloc=0, cloc=rloc+pos-maxLeft;
|
jpayne@68
|
834 while(cloc<0){sb.append(' '); cloc++;}
|
jpayne@68
|
835 sb.append(bases);
|
jpayne@68
|
836 sb.append('\n');
|
jpayne@68
|
837 }
|
jpayne@68
|
838 return sb.toString();
|
jpayne@68
|
839 }
|
jpayne@68
|
840
|
jpayne@68
|
841 public Read consensusRead(){
|
jpayne@68
|
842 if(consensusRead==null){
|
jpayne@68
|
843 consensusRead=makeSimpleConsensus();
|
jpayne@68
|
844 }
|
jpayne@68
|
845 return consensusRead;
|
jpayne@68
|
846 }
|
jpayne@68
|
847
|
jpayne@68
|
848 public int width(){
|
jpayne@68
|
849 assert(width>=0) : width;
|
jpayne@68
|
850 return width;
|
jpayne@68
|
851 }
|
jpayne@68
|
852
|
jpayne@68
|
853 public int maxLeft(){
|
jpayne@68
|
854 assert(maxLeft>=0);
|
jpayne@68
|
855 return maxLeft;
|
jpayne@68
|
856 }
|
jpayne@68
|
857
|
jpayne@68
|
858 public int maxRight(){
|
jpayne@68
|
859 assert(maxRight>=0);
|
jpayne@68
|
860 return maxRight;
|
jpayne@68
|
861 }
|
jpayne@68
|
862
|
jpayne@68
|
863 /*--------------------------------------------------------------*/
|
jpayne@68
|
864
|
jpayne@68
|
865 int[][] baseCounts(){
|
jpayne@68
|
866 if(baseCounts==null){
|
jpayne@68
|
867 baseCounts=count(false);
|
jpayne@68
|
868 int len=baseCounts[0].length;
|
jpayne@68
|
869 assert(width==-1 || width==len);
|
jpayne@68
|
870 }
|
jpayne@68
|
871 return baseCounts;
|
jpayne@68
|
872 }
|
jpayne@68
|
873
|
jpayne@68
|
874 int[][] qualityCounts(){
|
jpayne@68
|
875 if(qualityCounts==null){
|
jpayne@68
|
876 qualityCounts=count(true);
|
jpayne@68
|
877 int len=qualityCounts[0].length;
|
jpayne@68
|
878 assert(width==-1 || width==len);
|
jpayne@68
|
879 }
|
jpayne@68
|
880 return qualityCounts;
|
jpayne@68
|
881 }
|
jpayne@68
|
882
|
jpayne@68
|
883 float[][] qualityAverages(){
|
jpayne@68
|
884 if(qualityAverages==null){
|
jpayne@68
|
885 qualityAverages=new float[4][width];
|
jpayne@68
|
886 for(int i=0; i<4; i++){
|
jpayne@68
|
887 for(int j=0; j<width; j++){
|
jpayne@68
|
888 int b=baseCounts[i][j];
|
jpayne@68
|
889 int q=qualityCounts[i][j];
|
jpayne@68
|
890 qualityAverages[i][j]=(b==0 ? 0 : q/(float)b);
|
jpayne@68
|
891 }
|
jpayne@68
|
892 }
|
jpayne@68
|
893 }
|
jpayne@68
|
894 return qualityAverages;
|
jpayne@68
|
895 }
|
jpayne@68
|
896
|
jpayne@68
|
897 void clearCounts(){
|
jpayne@68
|
898 baseCounts=qualityCounts=null;
|
jpayne@68
|
899 qualityAverages=null;
|
jpayne@68
|
900 }
|
jpayne@68
|
901
|
jpayne@68
|
902 private void clearConsensus(){
|
jpayne@68
|
903 consensusRead=null;
|
jpayne@68
|
904 }
|
jpayne@68
|
905
|
jpayne@68
|
906 /*--------------------------------------------------------------*/
|
jpayne@68
|
907
|
jpayne@68
|
908 @Override
|
jpayne@68
|
909 public boolean equals(Object b){
|
jpayne@68
|
910 return this==b;
|
jpayne@68
|
911 }
|
jpayne@68
|
912
|
jpayne@68
|
913 @Override
|
jpayne@68
|
914 public int hashCode(){
|
jpayne@68
|
915 return Long.hashCode(kmer);
|
jpayne@68
|
916 }
|
jpayne@68
|
917
|
jpayne@68
|
918 @Override
|
jpayne@68
|
919 public int compareTo(Clump o) {
|
jpayne@68
|
920 int x=Long.compare(kmer, o.kmer);
|
jpayne@68
|
921 return x!=0 ? x : o.size()-size();
|
jpayne@68
|
922 }
|
jpayne@68
|
923
|
jpayne@68
|
924 /*--------------------------------------------------------------*/
|
jpayne@68
|
925
|
jpayne@68
|
926 public final long kmer;
|
jpayne@68
|
927
|
jpayne@68
|
928 private int width=-1;
|
jpayne@68
|
929 private int maxLeft=-1;
|
jpayne@68
|
930 private int maxRight=-1;
|
jpayne@68
|
931
|
jpayne@68
|
932 private int[][] baseCounts;
|
jpayne@68
|
933 private int[][] qualityCounts;
|
jpayne@68
|
934 private float[][] qualityAverages;
|
jpayne@68
|
935
|
jpayne@68
|
936 private Read consensusRead;
|
jpayne@68
|
937
|
jpayne@68
|
938 boolean useQuality(){return useQuality;}
|
jpayne@68
|
939 private boolean useQuality=true;
|
jpayne@68
|
940
|
jpayne@68
|
941 boolean added=false;
|
jpayne@68
|
942
|
jpayne@68
|
943 public static final int overhead=overhead();
|
jpayne@68
|
944 private static int overhead(){
|
jpayne@68
|
945 return 16+ //self
|
jpayne@68
|
946 16+ //Backing array
|
jpayne@68
|
947 4+ //Backing array size
|
jpayne@68
|
948 4*(8)+ //Backing array initial capacity
|
jpayne@68
|
949 1*(8)+ //kmer
|
jpayne@68
|
950 3*(4)+ //int fields
|
jpayne@68
|
951 4*(8)+ //pointers
|
jpayne@68
|
952 2*(4); //booleans
|
jpayne@68
|
953 }
|
jpayne@68
|
954
|
jpayne@68
|
955 /*--------------------------------------------------------------*/
|
jpayne@68
|
956
|
jpayne@68
|
957 public static boolean parseStatic(String arg, String a, String b){
|
jpayne@68
|
958 if(a.equals("mincountcorrect") || a.equals("mincc")){
|
jpayne@68
|
959 minCountCorrect=Integer.parseInt(b);
|
jpayne@68
|
960 }else if(a.equals("minsizesplit") || a.equals("minss")){
|
jpayne@68
|
961 Splitter.minSizeSplit=Integer.parseInt(b);
|
jpayne@68
|
962 }else if(a.equals("minsizefractionsplit") || a.equals("minsfs")){
|
jpayne@68
|
963 Splitter.minSizeFractionSplit=Float.parseFloat(b);
|
jpayne@68
|
964 }else if(a.equals("minsizefractioncorrect") || a.equals("minsfc")){
|
jpayne@68
|
965 minSizeFractionCorrect=Float.parseFloat(b);
|
jpayne@68
|
966 }else if(a.equals("minratio") || a.equals("minr")){
|
jpayne@68
|
967 minRatio=Float.parseFloat(b);
|
jpayne@68
|
968 }else if(a.equals("minqratio") || a.equals("minqr")){
|
jpayne@68
|
969 minQRatio=Float.parseFloat(b);
|
jpayne@68
|
970 }else if(a.equals("minaqratio") || a.equals("minaqr")){
|
jpayne@68
|
971 minAQRatio=Float.parseFloat(b);
|
jpayne@68
|
972 }else if(a.equals("minratiooffset") || a.equals("minro")){
|
jpayne@68
|
973 minRatioOffset=Float.parseFloat(b);
|
jpayne@68
|
974 }else if(a.equals("minratiomult") || a.equals("minrm")){
|
jpayne@68
|
975 minRatioMult=Float.parseFloat(b);
|
jpayne@68
|
976 }else if(a.equals("minratioqmult") || a.equals("minrqm")){
|
jpayne@68
|
977 minRatioQMult=Float.parseFloat(b);
|
jpayne@68
|
978 }else if(a.equals("minidentity") || a.equals("minid")){
|
jpayne@68
|
979 minIdentity=Float.parseFloat(b);
|
jpayne@68
|
980 }else if(a.equals("maxqadjust")){
|
jpayne@68
|
981 maxQAdjust=(byte)Integer.parseInt(b);
|
jpayne@68
|
982 }else if(a.equals("maxqi") || a.equals("maxqincorrect") || a.equals("maxqualityincorrect")){
|
jpayne@68
|
983 maxQIncorrect=Integer.parseInt(b);
|
jpayne@68
|
984 if(maxCIncorrect<0){maxQIncorrect=Integer.MAX_VALUE;}
|
jpayne@68
|
985 }else if(a.equals("maxci") || a.equals("maxcincorrect") || a.equals("maxcountincorrect")){
|
jpayne@68
|
986 maxCIncorrect=Integer.parseInt(b);
|
jpayne@68
|
987 if(maxCIncorrect<0){maxCIncorrect=Integer.MAX_VALUE;}
|
jpayne@68
|
988 }else if(a.equals("border")){
|
jpayne@68
|
989 KmerComparator.defaultBorder=Integer.parseInt(b);
|
jpayne@68
|
990 }else if(a.equals("conservative")){
|
jpayne@68
|
991 conservativeFlag=Parse.parseBoolean(b);
|
jpayne@68
|
992 }else if(a.equals("aggressive")){
|
jpayne@68
|
993 aggressiveFlag=Parse.parseBoolean(b);
|
jpayne@68
|
994 }else if(a.equals("forceprocess")){
|
jpayne@68
|
995 forceProcess=Parse.parseBoolean(b);
|
jpayne@68
|
996 }else if(a.equals("mergefirst")){
|
jpayne@68
|
997 KmerComparator.mergeFirst=Parse.parseBoolean(b);
|
jpayne@68
|
998 }else if(a.equals("findcorrelations")){
|
jpayne@68
|
999 Splitter.FIND_CORRELATIONS=Parse.parseBoolean(b);
|
jpayne@68
|
1000 }else if(a.equals("maxcorrelations")){
|
jpayne@68
|
1001 Splitter.MAX_CORRELATIONS=Integer.parseInt(b);
|
jpayne@68
|
1002 }
|
jpayne@68
|
1003
|
jpayne@68
|
1004 else if(a.equals("sortx")){
|
jpayne@68
|
1005 sortX=Parse.parseBoolean(b);
|
jpayne@68
|
1006 }else if(a.equals("sorty")){
|
jpayne@68
|
1007 sortY=Parse.parseBoolean(b);
|
jpayne@68
|
1008 }else if(a.equals("sortxy")){
|
jpayne@68
|
1009 sortX=sortY=Parse.parseBoolean(b);
|
jpayne@68
|
1010 }else if(a.equals("forcesortxy") || a.equals("forcexy")){
|
jpayne@68
|
1011 forceSortXY=Parse.parseBoolean(b);
|
jpayne@68
|
1012 }else if(a.equals("sortxysize") || a.equals("xysize")){
|
jpayne@68
|
1013 sortXYSize=Integer.parseInt(b);
|
jpayne@68
|
1014 }
|
jpayne@68
|
1015
|
jpayne@68
|
1016 else if(a.equals("removeallduplicates") || a.equals("allduplicates")){
|
jpayne@68
|
1017 markAll=Parse.parseBoolean(b);
|
jpayne@68
|
1018 }else if(a.equals("addcount") || a.equals("renamebycount")){
|
jpayne@68
|
1019 renameByCount=Parse.parseBoolean(b);
|
jpayne@68
|
1020 }else if(a.equals("optical") || a.equals("opticalonly")){
|
jpayne@68
|
1021 opticalOnly=Parse.parseBoolean(b);
|
jpayne@68
|
1022 }else if(a.equals("dupesubs") || a.equals("duplicatesubs") || a.equals("dsubs") || a.equals("subs") || a.equals("s")){
|
jpayne@68
|
1023 maxSubstitutions=Integer.parseInt(b);
|
jpayne@68
|
1024 }else if(a.equals("dupedist") || a.equals("duplicatedistance") || a.equals("ddist") || a.equals("dist") || a.equals("opticaldist") || a.equals("distance")){
|
jpayne@68
|
1025 maxOpticalDistance=Float.parseFloat(b);
|
jpayne@68
|
1026 opticalOnly=maxOpticalDistance>=0;
|
jpayne@68
|
1027 }else if(a.equals("scanlimit") || a.equals("scan")){
|
jpayne@68
|
1028 scanLimit=Integer.parseInt(b);
|
jpayne@68
|
1029 }else if(a.equals("allowns")){
|
jpayne@68
|
1030 allowNs=Parse.parseBoolean(b);
|
jpayne@68
|
1031 }else if(a.equals("containment") || a.equals("absorbcontainment") || a.equals("ac") || a.equals("contains")){
|
jpayne@68
|
1032 containment=Parse.parseBoolean(b);
|
jpayne@68
|
1033 }else if(a.equalsIgnoreCase("prefixOrSuffix") || a.equalsIgnoreCase("suffixOrPrefix") || a.equals("affix") || a.equals("pos")){
|
jpayne@68
|
1034 affix=Parse.parseBoolean(b);
|
jpayne@68
|
1035 }else if(a.equals("printduplicates")){
|
jpayne@68
|
1036 printDuplicates=Parse.parseBoolean(b);
|
jpayne@68
|
1037 }else if(a.equals("dupeidentity")){
|
jpayne@68
|
1038 float dupeIdentity=Float.parseFloat(b);
|
jpayne@68
|
1039 if(dupeIdentity>1){dupeIdentity=dupeIdentity/100;}
|
jpayne@68
|
1040 assert(dupeIdentity<=1);
|
jpayne@68
|
1041 dupeSubRate=1-dupeIdentity;
|
jpayne@68
|
1042 }else if(a.equals("dupesubrate") || a.equals("dsr") || a.equals("subrate")){
|
jpayne@68
|
1043 dupeSubRate=Float.parseFloat(b);
|
jpayne@68
|
1044 if(dupeSubRate>1){dupeSubRate=dupeSubRate/100;}
|
jpayne@68
|
1045 assert(dupeSubRate<=1);
|
jpayne@68
|
1046 }
|
jpayne@68
|
1047
|
jpayne@68
|
1048 else if(a.equals("minsizesplit")){
|
jpayne@68
|
1049 Splitter.minSizeSplit=Integer.parseInt(b);
|
jpayne@68
|
1050 }else if(a.equals("minsizefractionsplit")){
|
jpayne@68
|
1051 Splitter.minSizeFractionSplit=Float.parseFloat(b);
|
jpayne@68
|
1052 }else{
|
jpayne@68
|
1053 return false;
|
jpayne@68
|
1054 }
|
jpayne@68
|
1055
|
jpayne@68
|
1056 return true;
|
jpayne@68
|
1057 }
|
jpayne@68
|
1058
|
jpayne@68
|
1059 static void setConservative(boolean newState){
|
jpayne@68
|
1060
|
jpayne@68
|
1061 if(aggressiveFlag){return;}
|
jpayne@68
|
1062 if(newState==conservativeMode){return;}
|
jpayne@68
|
1063 conservativeMode=newState;
|
jpayne@68
|
1064
|
jpayne@68
|
1065 Splitter.conservative=conservativeMode;
|
jpayne@68
|
1066
|
jpayne@68
|
1067 if(conservativeMode){
|
jpayne@68
|
1068 minCountCorrect++;
|
jpayne@68
|
1069 minSizeFractionCorrect*=1.5f;
|
jpayne@68
|
1070 minRatio*=1.25f;
|
jpayne@68
|
1071 minQRatio*=1.5f;
|
jpayne@68
|
1072 minAQRatio*=1.4f;
|
jpayne@68
|
1073 minRatioOffset*=1.25f;
|
jpayne@68
|
1074 minRatioQMult*=1.50f;
|
jpayne@68
|
1075 minRatioMult*=1.4f;
|
jpayne@68
|
1076 minIdentity=1-((1-minIdentity)*0.7f);
|
jpayne@68
|
1077 if(maxQIncorrect==Integer.MAX_VALUE){maxQIncorrect=35;}
|
jpayne@68
|
1078 }else{
|
jpayne@68
|
1079 minCountCorrect--;
|
jpayne@68
|
1080 minSizeFractionCorrect/=1.5f;
|
jpayne@68
|
1081 minRatio/=1.25f;
|
jpayne@68
|
1082 minQRatio/=1.5f;
|
jpayne@68
|
1083 minAQRatio/=1.4f;
|
jpayne@68
|
1084 minRatioOffset/=1.25f;
|
jpayne@68
|
1085 minRatioQMult/=1.50f;
|
jpayne@68
|
1086 minRatioMult/=1.4f;
|
jpayne@68
|
1087 minIdentity=1-((1-minIdentity)/0.7f);
|
jpayne@68
|
1088 if(maxQIncorrect==35){maxQIncorrect=Integer.MAX_VALUE;}
|
jpayne@68
|
1089 }
|
jpayne@68
|
1090 }
|
jpayne@68
|
1091
|
jpayne@68
|
1092 /*--------------------------------------------------------------*/
|
jpayne@68
|
1093
|
jpayne@68
|
1094 public static void setXY() {
|
jpayne@68
|
1095 if(ReadKey.spanTilesX){sortX=true;}
|
jpayne@68
|
1096 if(ReadKey.spanTilesY){sortY=true;}
|
jpayne@68
|
1097 }
|
jpayne@68
|
1098
|
jpayne@68
|
1099 static boolean allowNs=true;
|
jpayne@68
|
1100 static boolean markAll=false;
|
jpayne@68
|
1101 static boolean markOnly=false;
|
jpayne@68
|
1102 static boolean opticalOnly=false;
|
jpayne@68
|
1103 static boolean containment=false;
|
jpayne@68
|
1104 static boolean affix=false;
|
jpayne@68
|
1105 static boolean printDuplicates=false; //For debugging
|
jpayne@68
|
1106
|
jpayne@68
|
1107 private static boolean renameByCount=false;
|
jpayne@68
|
1108 private static int scanLimit=5;
|
jpayne@68
|
1109 private static int maxSubstitutions=2;
|
jpayne@68
|
1110 private static float dupeSubRate=0;
|
jpayne@68
|
1111 private static float maxOpticalDistance=40;
|
jpayne@68
|
1112
|
jpayne@68
|
1113 static boolean forceProcess=false;
|
jpayne@68
|
1114 static boolean conservativeFlag=false;
|
jpayne@68
|
1115 static boolean aggressiveFlag=false;
|
jpayne@68
|
1116 static boolean conservativeMode=false;
|
jpayne@68
|
1117 static boolean renameConsensus=false;
|
jpayne@68
|
1118 static int minCountCorrect=4; //mcc=4 was slightly better than 3
|
jpayne@68
|
1119 static float minSizeFractionCorrect=0.20f; //0.11 is very slightly better than 0.14
|
jpayne@68
|
1120 static float minRatio=30.0f;
|
jpayne@68
|
1121 static float minQRatio=2.8f; //Does nothing?
|
jpayne@68
|
1122 static float minAQRatio=0.7f;
|
jpayne@68
|
1123 static float minRatioOffset=1.9f; //3 is far worse than 2; 1 is better
|
jpayne@68
|
1124 static float minRatioQMult=0.08f;
|
jpayne@68
|
1125 static float minRatioMult=1.80f; //2.5 is far worse than 2; 1.5 is better
|
jpayne@68
|
1126 static float minIdentity=0.97f; //0.98 is slightly better than 0.96; 0.94 is substantially worse
|
jpayne@68
|
1127 static byte maxQAdjust=0;
|
jpayne@68
|
1128 static int maxQIncorrect=Integer.MAX_VALUE;
|
jpayne@68
|
1129 static int maxCIncorrect=Integer.MAX_VALUE;
|
jpayne@68
|
1130
|
jpayne@68
|
1131 static boolean sortX=false; //Not needed for NextSeq
|
jpayne@68
|
1132 static boolean sortY=true;
|
jpayne@68
|
1133 static boolean forceSortXY=false; //Mainly for testing
|
jpayne@68
|
1134 static int sortXYSize=6;
|
jpayne@68
|
1135
|
jpayne@68
|
1136 /** May slightly increase speed for optical dedupe. Can be safely disabled. */
|
jpayne@68
|
1137 static boolean sortYEarly(){return sortY && (forceSortXY || opticalOnly);}
|
jpayne@68
|
1138
|
jpayne@68
|
1139 // private static final boolean countQuality=false;
|
jpayne@68
|
1140 public static int k=31;
|
jpayne@68
|
1141 private static final long serialVersionUID = 1L;
|
jpayne@68
|
1142
|
jpayne@68
|
1143 }
|