Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/Clump.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package clump; | |
2 | |
3 import java.util.ArrayList; | |
4 | |
5 import dna.AminoAcid; | |
6 import shared.KillSwitch; | |
7 import shared.Parse; | |
8 import shared.Tools; | |
9 import stream.Read; | |
10 import structures.ByteBuilder; | |
11 | |
12 /** | |
13 * A list of reads sharing a kmer. | |
14 * @author Brian Bushnell | |
15 * @date Nov 7, 2015 | |
16 * | |
17 */ | |
18 public class Clump extends ArrayList<Read> implements Comparable<Clump> { | |
19 | |
20 public static Clump makeClump(long kmer){ | |
21 try { | |
22 return new Clump(kmer); | |
23 } catch (OutOfMemoryError e) { | |
24 KillSwitch.memKill(e); | |
25 throw new RuntimeException(); | |
26 } | |
27 } | |
28 | |
29 private Clump(long kmer_){ | |
30 this(kmer_, 4); | |
31 } | |
32 | |
33 private Clump(long kmer_, int size){ | |
34 super(size); | |
35 kmer=kmer_; | |
36 } | |
37 | |
38 @Override | |
39 public boolean add(Read r){ | |
40 ReadKey key=(ReadKey) r.obj; | |
41 assert(key.kmer==kmer); | |
42 key.clump=this; | |
43 return super.add(r); | |
44 } | |
45 | |
46 private void setMaxima(){ | |
47 maxLeft=-1; | |
48 maxRight=-1; | |
49 width=-1; | |
50 for(Read r : this){ | |
51 ReadKey key=(ReadKey) r.obj; | |
52 final int pos=key.position; | |
53 maxLeft=Tools.max(maxLeft, pos); | |
54 maxRight=Tools.max(maxRight, r.length()-pos); | |
55 } | |
56 width=maxLeft+maxRight; | |
57 } | |
58 | |
59 /** This will create counts of bases, or sums of qualities, at each position in the cluster. */ | |
60 private int[][] count(final boolean qualityScores){ | |
61 if(width<0){setMaxima();} | |
62 | |
63 // System.err.println("\n\n"); | |
64 final int[][] counts=new int[4][width]; | |
65 for(Read r : this){ | |
66 ReadKey key=(ReadKey) r.obj; | |
67 final int pos=key.position; | |
68 byte[] bases=r.bases, quals=r.quality; | |
69 if(quals==null){useQuality=false;} | |
70 | |
71 // System.err.println("pos="+pos+", maxLeft="+maxLeft); | |
72 for(int cloc=0, rloc=maxLeft-pos; cloc<bases.length; cloc++, rloc++){ | |
73 // System.err.println("cloc="+cloc+"/"+bases.length+", rloc="+rloc+"/"+width); | |
74 int x=AminoAcid.baseToNumber[bases[cloc]]; | |
75 if(x>-1){ | |
76 final int q; | |
77 if(qualityScores){q=(quals==null ? 20 : quals[cloc]);} | |
78 else{q=1;} | |
79 counts[x][rloc]+=q; | |
80 } | |
81 } | |
82 } | |
83 return counts; | |
84 } | |
85 | |
86 /*--------------------------------------------------------------*/ | |
87 | |
88 public Read makeSimpleConsensus(){ | |
89 // assert(Splitter.findBestPivot(this)<0) : Splitter.findBestPivot(this); //TODO: Slow | |
90 if(size()==1){ | |
91 Read r=get(0); | |
92 if(renameConsensus){r.id=r.numericID+" size=1";} | |
93 return r; | |
94 } | |
95 final int[][] bcounts=baseCounts(); | |
96 final int[][] qcounts=qualityCounts(); | |
97 | |
98 final byte[] bases=new byte[width], quals=new byte[width]; | |
99 for(int i=0; i<width; i++){ | |
100 int x=getConsensusAtPosition(qcounts, i); | |
101 int y=getSecondHighest(qcounts, i); | |
102 if(x>=0 && qcounts[x][i]==qcounts[y][i]){//Tie-breaker | |
103 x=getConsensusAtPosition(bcounts, i); | |
104 y=getSecondHighest(bcounts, i); | |
105 } | |
106 | |
107 | |
108 if(x<0){ | |
109 // System.err.println("q="+0+", x="+x+"; A="+counts[0][i]+", C="+counts[1][i]+", G="+counts[2][i]+", T="+counts[3][i]); | |
110 bases[i]='N'; | |
111 quals[i]=0; | |
112 assert(getSumAtPosition(qcounts, i)==0) : "\n"+bcounts[0][i]+", "+bcounts[1][i]+", "+bcounts[2][i]+", "+bcounts[3][i]+ | |
113 "\n"+qcounts[0][i]+", "+qcounts[1][i]+", "+qcounts[2][i]+", "+qcounts[3][i]+ | |
114 "\nwidth="+width+", i="+i+", size="+size()+"\n"+new String(bases, 0, i)+"\n"+get(0)+"\n"+get(1)+"\n"; | |
115 // assert(getSumAtPosition(bcounts, i)==0) : "\n"+bcounts[0][i]+", "+bcounts[1][i]+", "+bcounts[2][i]+", "+bcounts[3][i]+ | |
116 // "\n"+qcounts[0][i]+", "+qcounts[1][i]+", "+qcounts[2][i]+", "+qcounts[3][i]+ | |
117 // "\nwidth="+width+", i="+i+", size="+size()+"\n"+new String(bases, 0, i)+"\n"+get(0)+"\n"+get(1)+"\n"; | |
118 }else{ | |
119 final long bsum=getSumAtPosition(bcounts, i); | |
120 final long bmajor=bcounts[x][i]; | |
121 final long bminor=bsum-bcounts[x][i]; | |
122 final long bsecond=bcounts[y][i]; | |
123 | |
124 final long qsum=getSumAtPosition(qcounts, i); | |
125 final long qmajor=qcounts[x][i]; | |
126 final long qminor=qsum-qcounts[x][i]; | |
127 final long qsecond=qcounts[y][i]; | |
128 | |
129 float bratio=bminor/(float)(bmajor+bminor); | |
130 double phred=(bminor==0 ? Read.MAX_CALLED_QUALITY() : -10*Math.log10(bratio)); | |
131 phred=Tools.min(phred, qmajor-qminor); | |
132 // if(phred<0 || phred>127){ | |
133 // assert(false) : i+","+x+","+bsum+","+qsum+","+bmajor+","+bminor+","+bratio+","+phred+"\n"+ | |
134 // bcounts[0][i]+","+bcounts[1][i]+","+bcounts[2][i]+","+bcounts[3][i]+"\n"+ | |
135 // qcounts[0][i]+","+qcounts[1][i]+","+qcounts[2][i]+","+qcounts[3][i]+"\n"+ | |
136 // this.toStringStaggered()+"\n"; | |
137 // } | |
138 // assert(phred>=0 && phred<=127) : phred+","+x+","+i+","+bratio+","+bcounts[x][i]+","+bcounts[0][i]+ | |
139 // ","+bcounts[1][i]+","+bcounts[2][i]+","+bcounts[3][i]; | |
140 // assert(phred>=0 && phred<=127) : bmajor+", "+bminor+", "+phred+", "+Read.MAX_CALLED_QUALITY; | |
141 byte q=Read.capQuality((long)Math.round(phred)); | |
142 bases[i]=AminoAcid.numberToBase[x]; | |
143 quals[i]=q; | |
144 assert(q>0); | |
145 assert(x>-1); | |
146 assert(bases[i]!='N'); | |
147 } | |
148 } | |
149 Read leftmost=this.get(0);//TODO: Actually rightmost! | |
150 Read r=new Read(bases, quals, leftmost.numericID+" size="+size(), 0); | |
151 //TODO: Attach the long pair, and make sure the kmer location is correct. | |
152 // assert(false) : "\n"+r.toFastq()+"\nCheck kmer location."; | |
153 // assert(size()==1) : "\n"+r.toFastq()+"\n"+get(0).toFastq()+"\n"+get(size()-1).toFastq()+"\n"; | |
154 return r; | |
155 } | |
156 | |
157 /*--------------------------------------------------------------*/ | |
158 | |
159 public int removeDuplicates(){ | |
160 assert(KmerComparator.compareSequence); | |
161 if(size()<2){return 0;} | |
162 | |
163 int removedTotal=0, removed=0; | |
164 | |
165 final boolean sortXY=(forceSortXY || sortYEarly() || (opticalOnly && (sortX || sortY) && size()>=sortXYSize)); | |
166 | |
167 final int maxDiscarded; | |
168 final int scan; | |
169 | |
170 if(opticalOnly && sortXY){ | |
171 scan=Tools.max(scanLimit, 200); | |
172 maxDiscarded=scan+10; | |
173 }else if(!sortXY && ((maxSubstitutions<1 && dupeSubRate<=0) || scanLimit<0)){ | |
174 scan=0; | |
175 maxDiscarded=0; | |
176 }else{ | |
177 scan=scanLimit; | |
178 maxDiscarded=scan+10; | |
179 } | |
180 | |
181 if(sortXY){ | |
182 assert(sortX || sortY); | |
183 | |
184 if(sortY){ | |
185 if(!sortYEarly()){ | |
186 this.sort(KmerComparatorY.comparator); | |
187 }else{ | |
188 // for(int i=1; i<this.size(); i++){ | |
189 // Read a=get(i-1); | |
190 // Read b=get(i); | |
191 // assert(KmerComparatorY.comparator.compare(a, b)<=0) : a.obj+" vs "+b.obj; | |
192 // } | |
193 // //Otherwise, it was already Y-sorted. | |
194 } | |
195 // assert(false) : sortY(); | |
196 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance); | |
197 removedTotal+=removed; | |
198 // System.err.println("RemovedY: "+removed); | |
199 while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){ | |
200 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance); | |
201 removedTotal+=removed; | |
202 // System.err.println("RemovedY: "+removed); | |
203 } | |
204 } | |
205 if(sortX && (ReadKey.spanTilesX || !sortY)){ | |
206 this.sort(KmerComparatorX.comparator); | |
207 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance); | |
208 removedTotal+=removed; | |
209 // System.err.println("RemovedX: "+removed); | |
210 while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){ | |
211 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, true, markOnly, markAll, renameByCount, maxOpticalDistance); | |
212 removedTotal+=removed; | |
213 // System.err.println("RemovedX: "+removed); | |
214 } | |
215 } | |
216 }else{ | |
217 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan, maxDiscarded, opticalOnly, false, markOnly, markAll, renameByCount, maxOpticalDistance); | |
218 removedTotal+=removed; | |
219 while((maxSubstitutions>0 || dupeSubRate>0) && scanLimit>0 && removed>maxDiscarded){ | |
220 removed=removeDuplicates_inner(maxSubstitutions, dupeSubRate, scan+10, maxDiscarded*2+20, opticalOnly, false, markOnly, markAll, renameByCount, maxOpticalDistance); | |
221 removedTotal+=removed; | |
222 } | |
223 } | |
224 | |
225 return removedTotal; | |
226 } | |
227 | |
228 private int removeDuplicates_inner(final int maxSubs, final float subRate, final int scanLimit, final int maxDiscarded, | |
229 final boolean optical, final boolean xySorted, final boolean mark, final boolean markAll, final boolean rename, final float dist){ | |
230 final int size=size(); | |
231 if(size<2){return 0;} | |
232 int dupePairs=0; | |
233 int dupeReads=0; | |
234 | |
235 // final boolean breakOnTile=(optical && !FlowcellCoordinate.spanTiles); | |
236 | |
237 // final long start=System.nanoTime(); | |
238 | |
239 for(int i=0, lim=size-1; i<lim; i++){ | |
240 final Read a=get(i); | |
241 if(!a.discarded()){ | |
242 final ReadKey keyA=(ReadKey) a.obj; | |
243 final int aLen=a.length(); | |
244 int unequals=0; | |
245 int discarded=0; | |
246 for(int j=i+1; j<size && unequals<=scanLimit && discarded<=maxDiscarded && (!a.discarded() || markAll); j++){ | |
247 final Read b=get(j); | |
248 if(b.discarded()){ | |
249 discarded++; | |
250 }else{ | |
251 final int bLen=b.length(); | |
252 final ReadKey keyB=(ReadKey) b.obj; | |
253 if(!containment && !keyA.equals(keyB)){break;} | |
254 // if(containment && affix && !keyA.physicalAffix(keyB, aLen, bLen)){break;} | |
255 // if(optical && keyA.lane!=keyB.lane){break;} //Already in equals method | |
256 // if(breakOnTile && keyA.tile!=keyB.tile){break;} //Already in equals method | |
257 if(optical && xySorted && !keyA.nearXY(keyB, dist)){break;} | |
258 // if(System.nanoTime()-start>200000000000L){ | |
259 // TextStreamWriter tsw=new TextStreamWriter("foo.fq", true, false, false); | |
260 // tsw.start(); | |
261 // for(Read x : this){ | |
262 // tsw.println(x.toFastq()); | |
263 // } | |
264 // tsw.poisonAndWait(); | |
265 // assert(false) : "kmer="+kmer+", size="+size(); | |
266 // } | |
267 if(equals(a, b, maxSubs, subRate)){ | |
268 if(!optical || keyA.near(keyB, dist)){ | |
269 if(printDuplicates){ | |
270 System.out.println(a.toFasta()); | |
271 System.out.println(b.toFasta()); | |
272 } | |
273 float errA=a.expectedErrorsIncludingMate(true); | |
274 float errB=b.expectedErrorsIncludingMate(true); | |
275 if(markAll){ | |
276 b.setDiscarded(true); | |
277 assert(!a.discarded() || markAll); | |
278 dupePairs++; | |
279 dupeReads+=1+b.mateCount(); | |
280 unequals=0; | |
281 if(!a.discarded()){ | |
282 a.setDiscarded(true); | |
283 dupePairs++; | |
284 dupeReads+=1+a.mateCount(); | |
285 } | |
286 }else if(containment || errB>=errA){ | |
287 b.setDiscarded(true); | |
288 assert(!a.discarded() || markAll); | |
289 a.copies+=b.copies+parseExtraCopies(b); | |
290 dupePairs++; | |
291 dupeReads+=1+b.mateCount(); | |
292 unequals=0; | |
293 }else{ | |
294 a.setDiscarded(true); | |
295 assert(!b.discarded() || markAll); | |
296 b.copies+=a.copies+parseExtraCopies(a); | |
297 dupePairs++; | |
298 dupeReads+=1+a.mateCount(); | |
299 } | |
300 } | |
301 }else{ | |
302 unequals++; | |
303 } | |
304 } | |
305 } | |
306 } | |
307 } | |
308 if(dupeReads>0){ | |
309 for(int i=0; i<size; i++){ | |
310 Read a=get(i); | |
311 if(a.discarded()){ | |
312 if(mark){ | |
313 if(!a.id.endsWith(" duplicate")){ | |
314 a.id+=" duplicate"; | |
315 if(a.mate!=null){a.mate.id+=" duplicate";} | |
316 } | |
317 }else{ | |
318 set(i, null); | |
319 } | |
320 }else if(rename && a.copies>1){ | |
321 renameFromCount(a); | |
322 } | |
323 a.copies=1; | |
324 } | |
325 if(!mark){ | |
326 int x=Tools.condenseStrict(this); | |
327 assert(x==dupePairs) : size()+", "+size+", "+dupePairs+", "+x; | |
328 assert((size()>0 || markAll) && size()==size-dupePairs) : size()+", "+size+", "+dupePairs; | |
329 } | |
330 } | |
331 | |
332 if(containment){ | |
333 dupeReads+=removeDuplicates_backwards(maxSubs, subRate, scanLimit, maxDiscarded, optical, xySorted, mark, markAll, rename, dist); | |
334 } | |
335 | |
336 return dupeReads; | |
337 } | |
338 | |
339 /** Only for containments */ | |
340 private int removeDuplicates_backwards(final int maxSubs, final float subRate, final int scanLimit, final int maxDiscarded, | |
341 final boolean optical, final boolean xySorted, final boolean mark, final boolean markAll, final boolean rename, final float dist){ | |
342 final int size=size(); | |
343 if(size<2){return 0;} | |
344 int dupePairs=0; | |
345 int dupeReads=0; | |
346 | |
347 // final boolean breakOnTile=(optical && !FlowcellCoordinate.spanTiles); | |
348 | |
349 // final long start=System.nanoTime(); | |
350 | |
351 for(int i=size-1; i>0; i--){ | |
352 final Read a=get(i); | |
353 if(!a.discarded()){ | |
354 final ReadKey keyA=(ReadKey) a.obj; | |
355 final int aLen=a.length(); | |
356 int unequals=0; | |
357 int discarded=0; | |
358 for(int j=i-1; j>=0 && unequals<=scanLimit && discarded<=maxDiscarded && (!a.discarded() || markAll); j--){ | |
359 final Read b=get(j); | |
360 if(b.discarded()){ | |
361 discarded++; | |
362 }else{ | |
363 final int bLen=b.length(); | |
364 final ReadKey keyB=(ReadKey) b.obj; | |
365 if(!containment && !keyA.equals(keyB)){break;} | |
366 // if(containment && affix && !keyA.physicalAffix(keyB, aLen, bLen)){break;} | |
367 // if(optical && keyA.lane!=keyB.lane){break;} //Already in equals method | |
368 // if(breakOnTile && keyA.tile!=keyB.tile){break;} //Already in equals method | |
369 if(optical && xySorted && !keyA.nearXY(keyB, dist)){break;} | |
370 // if(System.nanoTime()-start>200000000000L){ | |
371 // TextStreamWriter tsw=new TextStreamWriter("foo.fq", true, false, false); | |
372 // tsw.start(); | |
373 // for(Read x : this){ | |
374 // tsw.println(x.toFastq()); | |
375 // } | |
376 // tsw.poisonAndWait(); | |
377 // assert(false) : "kmer="+kmer+", size="+size(); | |
378 // } | |
379 if(equals(a, b, maxSubs, subRate)){ | |
380 if(!optical || keyA.near(keyB, dist)){ | |
381 if(printDuplicates){ | |
382 System.out.println(a.toFasta()); | |
383 System.out.println(b.toFasta()); | |
384 } | |
385 float errA=a.expectedErrorsIncludingMate(true); | |
386 float errB=b.expectedErrorsIncludingMate(true); | |
387 if(markAll){ | |
388 b.setDiscarded(true); | |
389 assert(!a.discarded() || markAll); | |
390 dupePairs++; | |
391 dupeReads+=1+b.mateCount(); | |
392 unequals=0; | |
393 if(!a.discarded()){ | |
394 a.setDiscarded(true); | |
395 dupePairs++; | |
396 dupeReads+=1+a.mateCount(); | |
397 } | |
398 }else if(containment || errB>=errA){ | |
399 b.setDiscarded(true); | |
400 assert(!a.discarded() || markAll); | |
401 a.copies+=b.copies+parseExtraCopies(b); | |
402 dupePairs++; | |
403 dupeReads+=1+b.mateCount(); | |
404 unequals=0; | |
405 }else{ | |
406 a.setDiscarded(true); | |
407 assert(!b.discarded() || markAll); | |
408 b.copies+=a.copies+parseExtraCopies(a); | |
409 dupePairs++; | |
410 dupeReads+=1+a.mateCount(); | |
411 } | |
412 } | |
413 }else{ | |
414 unequals++; | |
415 } | |
416 } | |
417 } | |
418 } | |
419 } | |
420 if(dupeReads>0){ | |
421 for(int i=0; i<size; i++){ | |
422 Read a=get(i); | |
423 if(a.discarded()){ | |
424 if(mark){ | |
425 if(!a.id.endsWith(" duplicate")){ | |
426 a.id+=" duplicate"; | |
427 if(a.mate!=null){a.mate.id+=" duplicate";} | |
428 } | |
429 }else{ | |
430 set(i, null); | |
431 } | |
432 }else if(rename && a.copies>1){ | |
433 renameFromCount(a); | |
434 } | |
435 a.copies=1; | |
436 } | |
437 if(!mark){ | |
438 int x=Tools.condenseStrict(this); | |
439 assert(x==dupePairs) : size()+", "+size+", "+dupePairs+", "+x; | |
440 assert((size()>0 || markAll) && size()==size-dupePairs) : size()+", "+size+", "+dupePairs; | |
441 } | |
442 } | |
443 return dupeReads; | |
444 } | |
445 | |
446 public int parseExtraCopies(final Read a){ | |
447 final String id=a.id; | |
448 final int space=id.lastIndexOf(' '); | |
449 int extraCopies=0; | |
450 if(space<0){return 0;} | |
451 if(space>=0 && Tools.contains(id, "copies=", space+1)){ | |
452 extraCopies=Integer.parseInt(id.substring(space+8))-1; | |
453 } | |
454 return extraCopies; | |
455 } | |
456 | |
457 private void renameFromCount(final Read a){ | |
458 final int newExtraCopies=a.copies-1; | |
459 assert(newExtraCopies>0) : newExtraCopies; | |
460 final int oldExtraCopies=parseExtraCopies(a); | |
461 final int total=1+newExtraCopies+oldExtraCopies; | |
462 renameToTotal(a, total); | |
463 if(a.pairnum()==0 && a.mate!=null){ | |
464 assert(a.mate.pairnum()==1); | |
465 renameToTotal(a.mate, total); | |
466 } | |
467 } | |
468 | |
469 private static void renameToTotal(final Read a, final int total){ | |
470 final String id=a.id; | |
471 final int space=id.lastIndexOf(' '); | |
472 if(space>=0 && Tools.contains(id, "copies=", space+1)){ | |
473 a.id=a.id.substring(0, space); | |
474 } | |
475 a.id=a.id+" copies="+total; | |
476 } | |
477 | |
478 // public boolean nearby(Read a, Read b, FlowcellCoordinate fca, FlowcellCoordinate fcb, float maxDist){ | |
479 //// fca.setFrom(a.id); | |
480 // fcb.setFrom(b.id); | |
481 // float dist=fca.distance(fcb); | |
482 // return dist<=maxDist; | |
483 // } | |
484 | |
485 // public boolean nearby(ReadKey ka, ReadKey kb, float maxDist){ | |
486 // float dist=ka.distance(kb); | |
487 // return dist<=maxDist; | |
488 // } | |
489 | |
490 public static boolean equals(Read a, Read b, int maxSubs, float dupeSubRate){ | |
491 if(a.numericID==b.numericID){return false;} | |
492 if(dupeSubRate>0){maxSubs=Tools.max(maxSubs, (int)(dupeSubRate*Tools.min(a.length(), b.length())));} | |
493 if(containment){ | |
494 return contains(a, b, maxSubs); | |
495 } | |
496 if(!equals(a.bases, b.bases, maxSubs)){return false;} | |
497 if(a.mate!=null && !equals(a.mate.bases, b.mate.bases, maxSubs)){return false;} | |
498 return true; | |
499 } | |
500 | |
501 public static boolean equals(byte[] a, byte[] b, int maxSubs){ | |
502 if(a==b){return false;}//Nothing should subsume itself | |
503 if(a==null || b==null){return false;} | |
504 if(a.length!=b.length){return false;} | |
505 int subs=0; | |
506 if(allowNs){ | |
507 for(int i=0; i<a.length; i++){ | |
508 if(a[i]!=b[i] && (a[i]!='N' && b[i]!='N')){ | |
509 subs++; | |
510 if(subs>maxSubs){return false;} | |
511 } | |
512 } | |
513 }else{ | |
514 for(int i=0; i<a.length; i++){ | |
515 if(a[i]!=b[i]){ | |
516 subs++; | |
517 if(subs>maxSubs){return false;} | |
518 } | |
519 } | |
520 } | |
521 return true; | |
522 } | |
523 | |
524 public static boolean contains(Read a, Read b, int maxSubs){ | |
525 if(a.numericID==b.numericID){return false;} | |
526 boolean ok=contains_inner(a, b, maxSubs); | |
527 if(!ok || a.mate==null){return ok;} | |
528 ok=contains_inner(a.mate, b.mate, maxSubs); | |
529 if(!ok){return false;} | |
530 ReadKey rka1=(ReadKey)a.obj; | |
531 ReadKey rkb1=(ReadKey)b.obj; | |
532 ReadKey rka2=(ReadKey)a.mate.obj; //TODO: In containment mode, mates need to always get keys. | |
533 ReadKey rkb2=(ReadKey)b.mate.obj; | |
534 return ((rka1.kmerMinusStrand==rkb1.kmerMinusStrand) && (rka2.kmerMinusStrand==rkb2.kmerMinusStrand)); //Ensures that both reads have the same directionality. | |
535 } | |
536 | |
537 public static boolean contains_inner(Read a, Read b, int maxSubs){ | |
538 // if(a.length()==b.length()){return equals(a.bases, b.bases, maxSubs);} | |
539 ReadKey rka=(ReadKey)a.obj; | |
540 ReadKey rkb=(ReadKey)b.obj; | |
541 if(affix ? !rka.physicalAffix(rkb, a.length(), b.length()) : !rka.physicallyContains(rkb, a.length(), b.length())){return false;} | |
542 boolean flipped=false; | |
543 // if(a.mate!=null){//In paired mode, need synchronization if the reads are in difference clumps. But... just don't do that. | |
544 // Read max, min; | |
545 // if(a.numericID>b.numericID){max=a; min=b;}//Protects against deadlocks. | |
546 // else{max=b; min=a;} | |
547 // synchronized(min){ | |
548 // synchronized(max){ | |
549 // if(rka.kmerMinusStrand!=rkb.kmerMinusStrand){ | |
550 // rkb.flip(b, k); | |
551 // flipped=true; | |
552 // } | |
553 // boolean ok=contains(a.bases, b.bases, rka.position, rkb.position, maxSubs); | |
554 // if(flipped){rkb.flip(b, k);} | |
555 // return ok; | |
556 // } | |
557 // } | |
558 // } | |
559 if(rka.kmerMinusStrand!=rkb.kmerMinusStrand){ | |
560 rkb.flip(b, k); | |
561 flipped=true; | |
562 } | |
563 boolean ok=contains(a.bases, b.bases, rka.position, rkb.position, maxSubs); | |
564 if(flipped){rkb.flip(b, k);} | |
565 return ok; | |
566 } | |
567 | |
568 public static boolean contains(byte[] a, byte[] b, int posA, int posB, int maxSubs){ | |
569 if(a==null || b==null){return false;} | |
570 assert(a.length>=b.length); | |
571 if(a==b){return false;} //Nothing should contain itself | |
572 int subs=0; | |
573 | |
574 int ai, bi; | |
575 final int dif=posA-posB; | |
576 if(dif>0){ | |
577 ai=0; | |
578 bi=-dif; | |
579 }else{ | |
580 ai=dif; | |
581 bi=0; | |
582 } | |
583 | |
584 if(allowNs){ | |
585 for(; ai<a.length && bi<b.length; ai++, bi++){ | |
586 if(ai>=0 && bi>=0){ | |
587 final byte na=a[ai], nb=b[bi]; | |
588 if(na!=nb && na!='N' && nb!='N'){ | |
589 subs++; | |
590 if(subs>maxSubs){return false;} | |
591 } | |
592 } | |
593 } | |
594 }else{ | |
595 for(; ai<a.length && bi<b.length; ai++, bi++){ | |
596 if(ai>=0 && bi>=0 && a[ai]!=b[bi]){ | |
597 subs++; | |
598 if(subs>maxSubs){return false;} | |
599 } | |
600 } | |
601 } | |
602 return true; | |
603 } | |
604 | |
605 /*--------------------------------------------------------------*/ | |
606 | |
607 public long splitAndErrorCorrect(){ | |
608 if(size()<Splitter.minSizeSplit){ | |
609 return errorCorrect(); | |
610 } | |
611 long sum=0; | |
612 ArrayList<Clump> list=Splitter.splitOnPivot(this); | |
613 for(Clump c : list){ | |
614 sum+=c.errorCorrect(); | |
615 } | |
616 return sum; | |
617 } | |
618 | |
619 public long errorCorrect(){ | |
620 if(size()<=minCountCorrect){return 0;} | |
621 // assert(Splitter.findBestPivot(this)<0); //TODO: Slow | |
622 Read consen=makeSimpleConsensus(); | |
623 long sum=0; | |
624 final int[] rvector=KillSwitch.allocInt1D(2); | |
625 for(Read r : this){ | |
626 sum+=errorCorrect(r, consen, rvector); | |
627 } | |
628 return sum; | |
629 } | |
630 | |
631 private int errorCorrect(Read call, Read ref, int[] rvector){ | |
632 | |
633 // assert(call.validated()); | |
634 // assert(call.checkQuality()); | |
635 // assert(ref.checkQuality()); | |
636 | |
637 final float identity=identity(call, ref.bases, rvector); | |
638 if((identity<minIdentity && (rvector[1]>0 || rvector[0]<50)) || (identity==1 && !call.containsNocalls()/* && !ref.containsNocalls()*/)){ | |
639 //TODO: Strange, the identity==1 clause causes different behavior, though it does give a speedup. | |
640 return 0; | |
641 } | |
642 final byte[] cbases=call.bases, cquals=call.quality; | |
643 final byte[] rbases=ref.bases, rquals=ref.quality; | |
644 | |
645 ReadKey key=(ReadKey) call.obj; | |
646 final int pos=key.position; | |
647 | |
648 final int[][] bcounts=baseCounts(); | |
649 final int[][] qcounts=qualityCounts(); | |
650 final float[][] qAvgs=qualityAverages(); | |
651 | |
652 final int minDepth=(int)Tools.max(minCountCorrect, minSizeFractionCorrect*size()); | |
653 | |
654 int corrections=0; | |
655 | |
656 final int cStart=0, rStart=maxLeft-pos, max=cbases.length; | |
657 for(int cloc=cStart, rloc=rStart; cloc<max; cloc++, rloc++){ | |
658 //Called base, ref base | |
659 final byte cb=cbases[cloc], rb=rbases[rloc]; | |
660 //Called quality, ref quality | |
661 final byte cq=(cquals==null ? 20 : cquals[cloc]), rq=rquals[rloc]; | |
662 //Called number | |
663 final byte cx=AminoAcid.baseToNumber[cb]; | |
664 //Ref number | |
665 final byte rx=AminoAcid.baseToNumber[rb]; | |
666 | |
667 // assert((cb=='N') == (cquals[cloc]==0)); | |
668 | |
669 final byte b, q; | |
670 if(cx<0){ | |
671 b=rb; | |
672 q=(byte)Tools.min(20, rq); | |
673 }else if(cb==rb){ | |
674 b=cb; | |
675 q=(byte)Tools.mid(cq, cq+1, rq);//Adjust upward | |
676 assert(q>=cq && (q<=rq || q<=cq)); | |
677 }else{ | |
678 final int cCount=bcounts[cx][rloc]; | |
679 final int rCount=bcounts[rx][rloc]; | |
680 final int altQSum=qcounts[cx][rloc]; | |
681 final int rQSum=qcounts[rx][rloc]; | |
682 final float rQAvg=qAvgs[rx][rloc]; | |
683 if(cCount<=maxCIncorrect && cq<=maxQIncorrect && cq*minQRatio<rQSum && cq*minAQRatio<8+rQAvg){ | |
684 final byte pminor=getSecondHighest(bcounts, rloc); | |
685 | |
686 assert(rx>=0 && rx<bcounts.length) : rx+", "+rloc+", "+bcounts.length+"\n"+call.toFastq()+"\n"+ref.toFastq(); | |
687 assert(rloc>=0 && rloc<bcounts[rx].length) : rx+", "+rloc+", "+bcounts[rloc].length+"\n"+call.toFastq()+"\n"+ref.toFastq(); | |
688 final int minorCount=bcounts[pminor][rloc]; | |
689 | |
690 final long sum=getSumAtPosition(bcounts, rloc); | |
691 // final long altCount=sum-rCount; | |
692 | |
693 boolean change=false; | |
694 if(rCount>=minCountCorrect && sum>=minDepth){ | |
695 final float ratioNeeded=Tools.min(minRatio, minRatioMult*minorCount+minRatioOffset+minRatioQMult*cq); | |
696 // final float qratioNeeded=Tools.min(minRatio, minRatioMult*altQSum+minRatioOffset+minRatioQMult*cq); //altQSum is slightly different than minorQCount | |
697 if(minorCount*ratioNeeded<=rCount && altQSum*ratioNeeded<=rQSum){ | |
698 change=true; | |
699 } | |
700 | |
701 // else if(minorCount*ratioNeeded<=rCount){ | |
702 // assert(false) : "\n"+altQSum+", "+rQSum+", "+qratioNeeded+"\n"+cCount+", "+rCount+", "+sum+", "+ratioNeeded+"\n"+(altQSum*qratioNeeded); | |
703 // } | |
704 } | |
705 if(change){ | |
706 corrections++; | |
707 b=rb; | |
708 q=(byte)Tools.min(cq+1, 25, rq); | |
709 // assert(q==25 || (q<=rq && q>=cq)) : q+", "+cq+", "+rq; | |
710 }else{ | |
711 b=cb; | |
712 q=(byte)Tools.mid(cq, cq-1, 6);//Adjust downward | |
713 assert(q<=cq || q>=6) : q+","+cq; | |
714 } | |
715 }else{ | |
716 b=cb; | |
717 q=cq; | |
718 } | |
719 } | |
720 cbases[cloc]=b; | |
721 if(cquals!=null){ | |
722 byte q2=(byte)Tools.mid(q, cq+maxQAdjust, cq-maxQAdjust); | |
723 if(q2==0 && AminoAcid.isFullyDefined(b)){ | |
724 assert(!AminoAcid.isFullyDefined(cb)); | |
725 q2=(byte)Tools.mid(2, 25, (rq+25)/2); | |
726 }else if(!AminoAcid.isFullyDefined(b)){ | |
727 q2=0; | |
728 } | |
729 cquals[cloc]=q2; | |
730 assert((b=='N') == (cquals[cloc]==0)) : "b="+(char)b+", cb="+(char)cb+", rb="+(char)rb+", cx="+cx+", " | |
731 + "new="+cquals[cloc]+", q="+q+", old="+cq+", rq="+rq+", loc="+rloc+"\n"+call.toFastq()+"\n"+ref.toFastq(); | |
732 } | |
733 } | |
734 return corrections; | |
735 } | |
736 | |
737 /*--------------------------------------------------------------*/ | |
738 | |
739 //Only used by condense mode. | |
740 public ArrayList<Read> makeConsensus(){ | |
741 if(size()==1){ | |
742 Read r=get(0); | |
743 r.id=r.numericID+" size=1"; | |
744 return this; | |
745 } | |
746 | |
747 ArrayList<Clump> clumps=Splitter.splitOnPivot(this);//TODO: Really, this should return null if there is no pivot | |
748 | |
749 ArrayList<Read> list=new ArrayList<Read>(clumps.size()); | |
750 for(Clump c : clumps){ | |
751 Read temp=c.makeSimpleConsensus(); | |
752 list.add(temp); | |
753 } | |
754 return list; | |
755 } | |
756 | |
757 /*--------------------------------------------------------------*/ | |
758 | |
759 private float identity(Read call, byte[] rbases, int[] rvector){ | |
760 ReadKey key=(ReadKey) call.obj; | |
761 final int pos=key.position; | |
762 byte[] cbases=call.bases, quals=call.quality; | |
763 int good=0, bad=0; | |
764 | |
765 final int cStart=0, rStart=maxLeft-pos, max=cbases.length; | |
766 for(int cloc=cStart, rloc=rStart; cloc<max; cloc++, rloc++){ | |
767 final byte cb=cbases[cloc], rb=rbases[rloc]; | |
768 if(AminoAcid.isFullyDefined(cb) && AminoAcid.isFullyDefined(rb)){ | |
769 if(cb==rb){good++;} | |
770 else{bad++;} | |
771 } | |
772 } | |
773 rvector[0]=good; | |
774 rvector[1]=bad; | |
775 return good==0 ? 0 : good/(float)(good+bad); | |
776 } | |
777 | |
778 /*--------------------------------------------------------------*/ | |
779 | |
780 long getSumAtPosition(int[][] counts, int pos){ | |
781 long sum=0; | |
782 for(int x=0; x<4; x++){ | |
783 sum+=counts[x][pos]; | |
784 } | |
785 return sum; | |
786 } | |
787 | |
788 byte getConsensusAtPosition(int[][] counts, int pos){ | |
789 byte xMax=0; | |
790 for(byte x=1; x<4; x++){ | |
791 // System.err.println("x="+x+", max="+max+", Checking "+counts[x][pos]+" vs "+counts[x][max]); | |
792 if(counts[x][pos]>counts[xMax][pos]){xMax=x;} | |
793 } | |
794 // assert(counts[max][pos]>=counts[0][pos]); | |
795 // assert(counts[max][pos]>=counts[1][pos]); | |
796 // assert(counts[max][pos]>=counts[2][pos]) : max+", "+counts[max][pos]+", ["+counts[0][pos]+", "+counts[1][pos]+", "+counts[2][pos]+", "+counts[3][pos]+"]"; | |
797 // assert(counts[max][pos]>=counts[3][pos]); | |
798 return (counts[xMax][pos]>0 ? xMax : -1); | |
799 } | |
800 | |
801 byte getSecondHighest(int[][] counts, int pos){ | |
802 byte first=0; | |
803 byte second=1; | |
804 if(counts[first][pos]<counts[second][pos]){ | |
805 first=1; second=0; | |
806 } | |
807 for(byte x=2; x<4; x++){ | |
808 // System.err.println("x="+x+", max="+max+", Checking "+counts[x][pos]+" vs "+counts[x][max]); | |
809 if(counts[x][pos]>counts[first][pos]){ | |
810 second=first; | |
811 first=x; | |
812 }else if(counts[x][pos]>counts[second][pos]){ | |
813 second=x; | |
814 } | |
815 } | |
816 // assert(counts[max][pos]>=counts[0][pos]); | |
817 // assert(counts[max][pos]>=counts[1][pos]); | |
818 // assert(counts[max][pos]>=counts[2][pos]) : max+", "+counts[max][pos]+", ["+counts[0][pos]+", "+counts[1][pos]+", "+counts[2][pos]+", "+counts[3][pos]+"]"; | |
819 // assert(counts[max][pos]>=counts[3][pos]); | |
820 | |
821 return second; //may be actually 0. | |
822 //return (counts[second][pos]>0 ? second : -1); | |
823 } | |
824 | |
825 /*--------------------------------------------------------------*/ | |
826 | |
827 public String toStringStaggered(){ | |
828 ByteBuilder sb=new ByteBuilder(); | |
829 for(Read r : this){ | |
830 ReadKey key=(ReadKey) r.obj; | |
831 final int pos=key.position; | |
832 byte[] bases=r.bases, quals=r.quality; | |
833 int rloc=0, cloc=rloc+pos-maxLeft; | |
834 while(cloc<0){sb.append(' '); cloc++;} | |
835 sb.append(bases); | |
836 sb.append('\n'); | |
837 } | |
838 return sb.toString(); | |
839 } | |
840 | |
841 public Read consensusRead(){ | |
842 if(consensusRead==null){ | |
843 consensusRead=makeSimpleConsensus(); | |
844 } | |
845 return consensusRead; | |
846 } | |
847 | |
848 public int width(){ | |
849 assert(width>=0) : width; | |
850 return width; | |
851 } | |
852 | |
853 public int maxLeft(){ | |
854 assert(maxLeft>=0); | |
855 return maxLeft; | |
856 } | |
857 | |
858 public int maxRight(){ | |
859 assert(maxRight>=0); | |
860 return maxRight; | |
861 } | |
862 | |
863 /*--------------------------------------------------------------*/ | |
864 | |
865 int[][] baseCounts(){ | |
866 if(baseCounts==null){ | |
867 baseCounts=count(false); | |
868 int len=baseCounts[0].length; | |
869 assert(width==-1 || width==len); | |
870 } | |
871 return baseCounts; | |
872 } | |
873 | |
874 int[][] qualityCounts(){ | |
875 if(qualityCounts==null){ | |
876 qualityCounts=count(true); | |
877 int len=qualityCounts[0].length; | |
878 assert(width==-1 || width==len); | |
879 } | |
880 return qualityCounts; | |
881 } | |
882 | |
883 float[][] qualityAverages(){ | |
884 if(qualityAverages==null){ | |
885 qualityAverages=new float[4][width]; | |
886 for(int i=0; i<4; i++){ | |
887 for(int j=0; j<width; j++){ | |
888 int b=baseCounts[i][j]; | |
889 int q=qualityCounts[i][j]; | |
890 qualityAverages[i][j]=(b==0 ? 0 : q/(float)b); | |
891 } | |
892 } | |
893 } | |
894 return qualityAverages; | |
895 } | |
896 | |
897 void clearCounts(){ | |
898 baseCounts=qualityCounts=null; | |
899 qualityAverages=null; | |
900 } | |
901 | |
902 private void clearConsensus(){ | |
903 consensusRead=null; | |
904 } | |
905 | |
906 /*--------------------------------------------------------------*/ | |
907 | |
908 @Override | |
909 public boolean equals(Object b){ | |
910 return this==b; | |
911 } | |
912 | |
913 @Override | |
914 public int hashCode(){ | |
915 return Long.hashCode(kmer); | |
916 } | |
917 | |
918 @Override | |
919 public int compareTo(Clump o) { | |
920 int x=Long.compare(kmer, o.kmer); | |
921 return x!=0 ? x : o.size()-size(); | |
922 } | |
923 | |
924 /*--------------------------------------------------------------*/ | |
925 | |
926 public final long kmer; | |
927 | |
928 private int width=-1; | |
929 private int maxLeft=-1; | |
930 private int maxRight=-1; | |
931 | |
932 private int[][] baseCounts; | |
933 private int[][] qualityCounts; | |
934 private float[][] qualityAverages; | |
935 | |
936 private Read consensusRead; | |
937 | |
938 boolean useQuality(){return useQuality;} | |
939 private boolean useQuality=true; | |
940 | |
941 boolean added=false; | |
942 | |
943 public static final int overhead=overhead(); | |
944 private static int overhead(){ | |
945 return 16+ //self | |
946 16+ //Backing array | |
947 4+ //Backing array size | |
948 4*(8)+ //Backing array initial capacity | |
949 1*(8)+ //kmer | |
950 3*(4)+ //int fields | |
951 4*(8)+ //pointers | |
952 2*(4); //booleans | |
953 } | |
954 | |
955 /*--------------------------------------------------------------*/ | |
956 | |
957 public static boolean parseStatic(String arg, String a, String b){ | |
958 if(a.equals("mincountcorrect") || a.equals("mincc")){ | |
959 minCountCorrect=Integer.parseInt(b); | |
960 }else if(a.equals("minsizesplit") || a.equals("minss")){ | |
961 Splitter.minSizeSplit=Integer.parseInt(b); | |
962 }else if(a.equals("minsizefractionsplit") || a.equals("minsfs")){ | |
963 Splitter.minSizeFractionSplit=Float.parseFloat(b); | |
964 }else if(a.equals("minsizefractioncorrect") || a.equals("minsfc")){ | |
965 minSizeFractionCorrect=Float.parseFloat(b); | |
966 }else if(a.equals("minratio") || a.equals("minr")){ | |
967 minRatio=Float.parseFloat(b); | |
968 }else if(a.equals("minqratio") || a.equals("minqr")){ | |
969 minQRatio=Float.parseFloat(b); | |
970 }else if(a.equals("minaqratio") || a.equals("minaqr")){ | |
971 minAQRatio=Float.parseFloat(b); | |
972 }else if(a.equals("minratiooffset") || a.equals("minro")){ | |
973 minRatioOffset=Float.parseFloat(b); | |
974 }else if(a.equals("minratiomult") || a.equals("minrm")){ | |
975 minRatioMult=Float.parseFloat(b); | |
976 }else if(a.equals("minratioqmult") || a.equals("minrqm")){ | |
977 minRatioQMult=Float.parseFloat(b); | |
978 }else if(a.equals("minidentity") || a.equals("minid")){ | |
979 minIdentity=Float.parseFloat(b); | |
980 }else if(a.equals("maxqadjust")){ | |
981 maxQAdjust=(byte)Integer.parseInt(b); | |
982 }else if(a.equals("maxqi") || a.equals("maxqincorrect") || a.equals("maxqualityincorrect")){ | |
983 maxQIncorrect=Integer.parseInt(b); | |
984 if(maxCIncorrect<0){maxQIncorrect=Integer.MAX_VALUE;} | |
985 }else if(a.equals("maxci") || a.equals("maxcincorrect") || a.equals("maxcountincorrect")){ | |
986 maxCIncorrect=Integer.parseInt(b); | |
987 if(maxCIncorrect<0){maxCIncorrect=Integer.MAX_VALUE;} | |
988 }else if(a.equals("border")){ | |
989 KmerComparator.defaultBorder=Integer.parseInt(b); | |
990 }else if(a.equals("conservative")){ | |
991 conservativeFlag=Parse.parseBoolean(b); | |
992 }else if(a.equals("aggressive")){ | |
993 aggressiveFlag=Parse.parseBoolean(b); | |
994 }else if(a.equals("forceprocess")){ | |
995 forceProcess=Parse.parseBoolean(b); | |
996 }else if(a.equals("mergefirst")){ | |
997 KmerComparator.mergeFirst=Parse.parseBoolean(b); | |
998 }else if(a.equals("findcorrelations")){ | |
999 Splitter.FIND_CORRELATIONS=Parse.parseBoolean(b); | |
1000 }else if(a.equals("maxcorrelations")){ | |
1001 Splitter.MAX_CORRELATIONS=Integer.parseInt(b); | |
1002 } | |
1003 | |
1004 else if(a.equals("sortx")){ | |
1005 sortX=Parse.parseBoolean(b); | |
1006 }else if(a.equals("sorty")){ | |
1007 sortY=Parse.parseBoolean(b); | |
1008 }else if(a.equals("sortxy")){ | |
1009 sortX=sortY=Parse.parseBoolean(b); | |
1010 }else if(a.equals("forcesortxy") || a.equals("forcexy")){ | |
1011 forceSortXY=Parse.parseBoolean(b); | |
1012 }else if(a.equals("sortxysize") || a.equals("xysize")){ | |
1013 sortXYSize=Integer.parseInt(b); | |
1014 } | |
1015 | |
1016 else if(a.equals("removeallduplicates") || a.equals("allduplicates")){ | |
1017 markAll=Parse.parseBoolean(b); | |
1018 }else if(a.equals("addcount") || a.equals("renamebycount")){ | |
1019 renameByCount=Parse.parseBoolean(b); | |
1020 }else if(a.equals("optical") || a.equals("opticalonly")){ | |
1021 opticalOnly=Parse.parseBoolean(b); | |
1022 }else if(a.equals("dupesubs") || a.equals("duplicatesubs") || a.equals("dsubs") || a.equals("subs") || a.equals("s")){ | |
1023 maxSubstitutions=Integer.parseInt(b); | |
1024 }else if(a.equals("dupedist") || a.equals("duplicatedistance") || a.equals("ddist") || a.equals("dist") || a.equals("opticaldist") || a.equals("distance")){ | |
1025 maxOpticalDistance=Float.parseFloat(b); | |
1026 opticalOnly=maxOpticalDistance>=0; | |
1027 }else if(a.equals("scanlimit") || a.equals("scan")){ | |
1028 scanLimit=Integer.parseInt(b); | |
1029 }else if(a.equals("allowns")){ | |
1030 allowNs=Parse.parseBoolean(b); | |
1031 }else if(a.equals("containment") || a.equals("absorbcontainment") || a.equals("ac") || a.equals("contains")){ | |
1032 containment=Parse.parseBoolean(b); | |
1033 }else if(a.equalsIgnoreCase("prefixOrSuffix") || a.equalsIgnoreCase("suffixOrPrefix") || a.equals("affix") || a.equals("pos")){ | |
1034 affix=Parse.parseBoolean(b); | |
1035 }else if(a.equals("printduplicates")){ | |
1036 printDuplicates=Parse.parseBoolean(b); | |
1037 }else if(a.equals("dupeidentity")){ | |
1038 float dupeIdentity=Float.parseFloat(b); | |
1039 if(dupeIdentity>1){dupeIdentity=dupeIdentity/100;} | |
1040 assert(dupeIdentity<=1); | |
1041 dupeSubRate=1-dupeIdentity; | |
1042 }else if(a.equals("dupesubrate") || a.equals("dsr") || a.equals("subrate")){ | |
1043 dupeSubRate=Float.parseFloat(b); | |
1044 if(dupeSubRate>1){dupeSubRate=dupeSubRate/100;} | |
1045 assert(dupeSubRate<=1); | |
1046 } | |
1047 | |
1048 else if(a.equals("minsizesplit")){ | |
1049 Splitter.minSizeSplit=Integer.parseInt(b); | |
1050 }else if(a.equals("minsizefractionsplit")){ | |
1051 Splitter.minSizeFractionSplit=Float.parseFloat(b); | |
1052 }else{ | |
1053 return false; | |
1054 } | |
1055 | |
1056 return true; | |
1057 } | |
1058 | |
1059 static void setConservative(boolean newState){ | |
1060 | |
1061 if(aggressiveFlag){return;} | |
1062 if(newState==conservativeMode){return;} | |
1063 conservativeMode=newState; | |
1064 | |
1065 Splitter.conservative=conservativeMode; | |
1066 | |
1067 if(conservativeMode){ | |
1068 minCountCorrect++; | |
1069 minSizeFractionCorrect*=1.5f; | |
1070 minRatio*=1.25f; | |
1071 minQRatio*=1.5f; | |
1072 minAQRatio*=1.4f; | |
1073 minRatioOffset*=1.25f; | |
1074 minRatioQMult*=1.50f; | |
1075 minRatioMult*=1.4f; | |
1076 minIdentity=1-((1-minIdentity)*0.7f); | |
1077 if(maxQIncorrect==Integer.MAX_VALUE){maxQIncorrect=35;} | |
1078 }else{ | |
1079 minCountCorrect--; | |
1080 minSizeFractionCorrect/=1.5f; | |
1081 minRatio/=1.25f; | |
1082 minQRatio/=1.5f; | |
1083 minAQRatio/=1.4f; | |
1084 minRatioOffset/=1.25f; | |
1085 minRatioQMult/=1.50f; | |
1086 minRatioMult/=1.4f; | |
1087 minIdentity=1-((1-minIdentity)/0.7f); | |
1088 if(maxQIncorrect==35){maxQIncorrect=Integer.MAX_VALUE;} | |
1089 } | |
1090 } | |
1091 | |
1092 /*--------------------------------------------------------------*/ | |
1093 | |
1094 public static void setXY() { | |
1095 if(ReadKey.spanTilesX){sortX=true;} | |
1096 if(ReadKey.spanTilesY){sortY=true;} | |
1097 } | |
1098 | |
1099 static boolean allowNs=true; | |
1100 static boolean markAll=false; | |
1101 static boolean markOnly=false; | |
1102 static boolean opticalOnly=false; | |
1103 static boolean containment=false; | |
1104 static boolean affix=false; | |
1105 static boolean printDuplicates=false; //For debugging | |
1106 | |
1107 private static boolean renameByCount=false; | |
1108 private static int scanLimit=5; | |
1109 private static int maxSubstitutions=2; | |
1110 private static float dupeSubRate=0; | |
1111 private static float maxOpticalDistance=40; | |
1112 | |
1113 static boolean forceProcess=false; | |
1114 static boolean conservativeFlag=false; | |
1115 static boolean aggressiveFlag=false; | |
1116 static boolean conservativeMode=false; | |
1117 static boolean renameConsensus=false; | |
1118 static int minCountCorrect=4; //mcc=4 was slightly better than 3 | |
1119 static float minSizeFractionCorrect=0.20f; //0.11 is very slightly better than 0.14 | |
1120 static float minRatio=30.0f; | |
1121 static float minQRatio=2.8f; //Does nothing? | |
1122 static float minAQRatio=0.7f; | |
1123 static float minRatioOffset=1.9f; //3 is far worse than 2; 1 is better | |
1124 static float minRatioQMult=0.08f; | |
1125 static float minRatioMult=1.80f; //2.5 is far worse than 2; 1.5 is better | |
1126 static float minIdentity=0.97f; //0.98 is slightly better than 0.96; 0.94 is substantially worse | |
1127 static byte maxQAdjust=0; | |
1128 static int maxQIncorrect=Integer.MAX_VALUE; | |
1129 static int maxCIncorrect=Integer.MAX_VALUE; | |
1130 | |
1131 static boolean sortX=false; //Not needed for NextSeq | |
1132 static boolean sortY=true; | |
1133 static boolean forceSortXY=false; //Mainly for testing | |
1134 static int sortXYSize=6; | |
1135 | |
1136 /** May slightly increase speed for optical dedupe. Can be safely disabled. */ | |
1137 static boolean sortYEarly(){return sortY && (forceSortXY || opticalOnly);} | |
1138 | |
1139 // private static final boolean countQuality=false; | |
1140 public static int k=31; | |
1141 private static final long serialVersionUID = 1L; | |
1142 | |
1143 } |