jpayne@68
|
1 package clump;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.util.ArrayList;
|
jpayne@68
|
4 import java.util.LinkedHashMap;
|
jpayne@68
|
5 import java.util.Map.Entry;
|
jpayne@68
|
6
|
jpayne@68
|
7 import dna.AminoAcid;
|
jpayne@68
|
8 import shared.KillSwitch;
|
jpayne@68
|
9 import shared.Tools;
|
jpayne@68
|
10 import stream.Read;
|
jpayne@68
|
11 import structures.IntList;
|
jpayne@68
|
12 import structures.LongList;
|
jpayne@68
|
13
|
jpayne@68
|
14 /**
|
jpayne@68
|
15 * A tool for splitting clumps by allele.
|
jpayne@68
|
16 * @author Brian Bushnell
|
jpayne@68
|
17 * @date September 26, 2016
|
jpayne@68
|
18 *
|
jpayne@68
|
19 */
|
jpayne@68
|
20 class Splitter {
|
jpayne@68
|
21
|
jpayne@68
|
22 static ArrayList<Clump> splitOnPivot(Clump c){
|
jpayne@68
|
23 ArrayList<Clump> list=new ArrayList<Clump>(3);
|
jpayne@68
|
24 list.add(c);
|
jpayne@68
|
25 if(c.size()<minSizeSplit){
|
jpayne@68
|
26 // assert(findBestPivot(c)<0);
|
jpayne@68
|
27 return list;
|
jpayne@68
|
28 }
|
jpayne@68
|
29 return splitOnPivot(list);
|
jpayne@68
|
30 }
|
jpayne@68
|
31
|
jpayne@68
|
32 static ArrayList<Clump> splitOnPivot(ArrayList<Clump> list){
|
jpayne@68
|
33 ArrayList<Clump> out=new ArrayList<Clump>();
|
jpayne@68
|
34
|
jpayne@68
|
35 final IntList pivots=new IntList(2);
|
jpayne@68
|
36 for(int i=0; i<list.size(); i++){
|
jpayne@68
|
37 Clump clump=list.get(i);
|
jpayne@68
|
38 list.set(i, null);
|
jpayne@68
|
39 int pivot=findBestPivots(clump, FIND_CORRELATIONS, pivots);
|
jpayne@68
|
40 if(pivot<0){
|
jpayne@68
|
41 assert(pivots.size==0);
|
jpayne@68
|
42 out.add(clump);
|
jpayne@68
|
43 }else{
|
jpayne@68
|
44 assert(pivots.size==1 || pivots.size==2) : pivot+", "+pivots.size+", "+pivots;
|
jpayne@68
|
45 assert(pivots.get(0)==pivot);
|
jpayne@68
|
46 splitAndAdd(clump, pivots.get(0), (pivots.size>1 ? pivots.get(1) : -1), list);
|
jpayne@68
|
47 }
|
jpayne@68
|
48 }
|
jpayne@68
|
49 return out;
|
jpayne@68
|
50 }
|
jpayne@68
|
51
|
jpayne@68
|
52 static void splitAndAdd(Clump c, final int var1, final int var2, ArrayList<Clump> out) {
|
jpayne@68
|
53 final int maxLeft=c.maxLeft();
|
jpayne@68
|
54
|
jpayne@68
|
55 final Clump major=Clump.makeClump(c.kmer), minor=Clump.makeClump(c.kmer);
|
jpayne@68
|
56
|
jpayne@68
|
57 for(Read r : c){
|
jpayne@68
|
58 if(containsVar(var1, r, maxLeft) || (var2>=0 && containsVar(var2, r, maxLeft))){
|
jpayne@68
|
59 minor.add(r);
|
jpayne@68
|
60 }else{
|
jpayne@68
|
61 major.add(r);
|
jpayne@68
|
62 }
|
jpayne@68
|
63 }
|
jpayne@68
|
64 // System.err.println(major.size()+"\t"+minor.size());
|
jpayne@68
|
65 out.add(major);
|
jpayne@68
|
66 out.add(minor);
|
jpayne@68
|
67 }
|
jpayne@68
|
68
|
jpayne@68
|
69 //Returns the c
|
jpayne@68
|
70 static int countVariants(Clump c, LongList varCounts){
|
jpayne@68
|
71 varCounts.clear();
|
jpayne@68
|
72 final int[][] bcounts=c.baseCounts();
|
jpayne@68
|
73 final int[][] qcounts=c.qualityCounts();
|
jpayne@68
|
74 final int len=bcounts[0].length;
|
jpayne@68
|
75 for(int i=0; i<len; i++){
|
jpayne@68
|
76 final int major=c.getConsensusAtPosition(qcounts, i);
|
jpayne@68
|
77 for(int x=0; x<4; x++){
|
jpayne@68
|
78 final int bcount=bcounts[x][i];
|
jpayne@68
|
79 if(bcount>1 && x!=major){
|
jpayne@68
|
80 final long var=(((long)bcount)<<32)|((i<<shift)|x);
|
jpayne@68
|
81 varCounts.add(var);
|
jpayne@68
|
82 }
|
jpayne@68
|
83 }
|
jpayne@68
|
84 }
|
jpayne@68
|
85 if(varCounts.size()<1){return 0;}
|
jpayne@68
|
86 varCounts.sort();
|
jpayne@68
|
87 varCounts.reverse();
|
jpayne@68
|
88 return (int)(varCounts.get(0)>>>32);
|
jpayne@68
|
89 }
|
jpayne@68
|
90
|
jpayne@68
|
91 static LinkedHashMap<Integer, ArrayList<Read>> findReadVariants(Clump c, boolean makeMap){
|
jpayne@68
|
92 if(c.size()<5){return null;} //Not really needed with tiny clumps
|
jpayne@68
|
93 // assert(c.size()>3); //TODO
|
jpayne@68
|
94 LinkedHashMap<Integer, ArrayList<Read>> map=null;
|
jpayne@68
|
95 map=(makeMap ? new LinkedHashMap<Integer, ArrayList<Read>>() : null);
|
jpayne@68
|
96 // if(makeMap){
|
jpayne@68
|
97 // map=localMap.get();
|
jpayne@68
|
98 // if(map==null){
|
jpayne@68
|
99 // map=new LinkedHashMap<Integer, ArrayList<Read>>();
|
jpayne@68
|
100 // localMap.set(map);
|
jpayne@68
|
101 // }
|
jpayne@68
|
102 // map.clear();
|
jpayne@68
|
103 // }
|
jpayne@68
|
104
|
jpayne@68
|
105 final int[][] bcounts=c.baseCounts();
|
jpayne@68
|
106 final Read ref=c.consensusRead();
|
jpayne@68
|
107 final byte[] rbases=ref.bases;
|
jpayne@68
|
108 for(Read r : c){
|
jpayne@68
|
109 final byte[] bases=r.bases;
|
jpayne@68
|
110 final ReadKey key=(ReadKey) r.obj;
|
jpayne@68
|
111 IntList list=key.vars;
|
jpayne@68
|
112 if(list!=null){list.clear();}
|
jpayne@68
|
113
|
jpayne@68
|
114
|
jpayne@68
|
115 final int cStart=0, rStart=c.maxLeft()-key.position;
|
jpayne@68
|
116
|
jpayne@68
|
117 for(int i=cStart, j=rStart; i<bases.length; i++, j++){
|
jpayne@68
|
118 final byte cb=bases[i], rb=rbases[j];
|
jpayne@68
|
119 if(cb!=rb){
|
jpayne@68
|
120 byte x=AminoAcid.baseToNumber[cb];
|
jpayne@68
|
121 if(x>=0){
|
jpayne@68
|
122 int count=bcounts[x][j];
|
jpayne@68
|
123 if(count>1){
|
jpayne@68
|
124 int var=(j<<2)|x;
|
jpayne@68
|
125 if(list==null){list=key.vars=new IntList(4);}
|
jpayne@68
|
126 list.add(var);
|
jpayne@68
|
127
|
jpayne@68
|
128 if(map!=null){
|
jpayne@68
|
129 Integer mapkey=var;//mapKeys[var];
|
jpayne@68
|
130 ArrayList<Read> alr=map.get(mapkey);
|
jpayne@68
|
131 if(alr==null){
|
jpayne@68
|
132 alr=new ArrayList<Read>(4);
|
jpayne@68
|
133 map.put(mapkey, alr);
|
jpayne@68
|
134 }
|
jpayne@68
|
135 alr.add(r);
|
jpayne@68
|
136 }
|
jpayne@68
|
137
|
jpayne@68
|
138 }
|
jpayne@68
|
139 }
|
jpayne@68
|
140 }
|
jpayne@68
|
141 }
|
jpayne@68
|
142 }
|
jpayne@68
|
143 return map==null || map.isEmpty() ? null : map;
|
jpayne@68
|
144 }
|
jpayne@68
|
145
|
jpayne@68
|
146 static int findBestPivot_Correlated(Clump c, IntList pivots){
|
jpayne@68
|
147 assert(pivots.size==0);
|
jpayne@68
|
148 LinkedHashMap<Integer, ArrayList<Read>> map=findReadVariants(c, true);
|
jpayne@68
|
149 if(map==null){return -1;}
|
jpayne@68
|
150
|
jpayne@68
|
151 IntList collection=new IntList(32);
|
jpayne@68
|
152 int[] rvector=KillSwitch.allocInt1D(5);
|
jpayne@68
|
153
|
jpayne@68
|
154 int bestVar=-1;
|
jpayne@68
|
155 int bestVarCount=-1;
|
jpayne@68
|
156 int bestVar2=-1;
|
jpayne@68
|
157 int bestVar2Count=-1;
|
jpayne@68
|
158 int bestDifferent=-1;
|
jpayne@68
|
159 float bestCorrelation=-1;
|
jpayne@68
|
160 float bestScore=-1;
|
jpayne@68
|
161
|
jpayne@68
|
162 final float minCorrelation=0.75f;
|
jpayne@68
|
163 final int minVar2Count=2;
|
jpayne@68
|
164
|
jpayne@68
|
165 int max=0;
|
jpayne@68
|
166 for(Entry<Integer, ArrayList<Read>> entry : map.entrySet()){
|
jpayne@68
|
167 max=Tools.max(max, entry.getValue().size());
|
jpayne@68
|
168 }
|
jpayne@68
|
169 if(max<2){return -1;}
|
jpayne@68
|
170 final int thresh=Tools.max(2, max/2);
|
jpayne@68
|
171 final int thresh2=max/4;
|
jpayne@68
|
172 int numOverThresh=0;
|
jpayne@68
|
173 ArrayList<Integer> remove=new ArrayList<Integer>();
|
jpayne@68
|
174 for(Entry<Integer, ArrayList<Read>> entry : map.entrySet()){
|
jpayne@68
|
175 int x=entry.getValue().size();
|
jpayne@68
|
176 if(x>=thresh){numOverThresh++;}
|
jpayne@68
|
177 else if(x<thresh2){remove.add(entry.getKey());}
|
jpayne@68
|
178 }
|
jpayne@68
|
179 for(Integer key : remove){map.remove(key);}
|
jpayne@68
|
180 if(numOverThresh>MAX_CORRELATIONS){return -1;}
|
jpayne@68
|
181
|
jpayne@68
|
182 for(Entry<Integer, ArrayList<Read>> entry : map.entrySet()){
|
jpayne@68
|
183 final ArrayList<Read> rlist=entry.getValue();
|
jpayne@68
|
184 final Integer key=entry.getKey();
|
jpayne@68
|
185 final int var=key;
|
jpayne@68
|
186 if(rlist.size()>=thresh){
|
jpayne@68
|
187
|
jpayne@68
|
188 final int var2=examineVar(var, rlist, collection, rvector, map);
|
jpayne@68
|
189
|
jpayne@68
|
190 if(var2>=0){
|
jpayne@68
|
191
|
jpayne@68
|
192 final int varCount=rvector[1];
|
jpayne@68
|
193 final int var2Count=rvector[3];
|
jpayne@68
|
194 final int different=rvector[4];
|
jpayne@68
|
195
|
jpayne@68
|
196 final int var2reads;
|
jpayne@68
|
197 final int var2ReadsWithoutVar;
|
jpayne@68
|
198 {
|
jpayne@68
|
199 ArrayList<Read> temp=map.get(var2);
|
jpayne@68
|
200 var2reads=(temp==null ? 0 : temp.size());
|
jpayne@68
|
201
|
jpayne@68
|
202 // if(var2reads==var2Count){
|
jpayne@68
|
203 // var2ReadsWithoutVar=0;
|
jpayne@68
|
204 // }else{
|
jpayne@68
|
205 // var2ReadsWithoutVar=countDifferentAlleles(var, temp);
|
jpayne@68
|
206 // }
|
jpayne@68
|
207
|
jpayne@68
|
208 var2ReadsWithoutVar=var2reads-varCount;
|
jpayne@68
|
209
|
jpayne@68
|
210 }
|
jpayne@68
|
211
|
jpayne@68
|
212 // final float correlation=(var2Count-0.05f)/(float)varCount;
|
jpayne@68
|
213 // final float correlation=(varCount-different)/(float)varCount;
|
jpayne@68
|
214 final float correlation=(Tools.max(varCount, var2reads)-different)/(float)Tools.max(varCount, var2reads);
|
jpayne@68
|
215 final int distance=Tools.absdif(var>>2, var2>>2);
|
jpayne@68
|
216
|
jpayne@68
|
217 // final float score=correlation*((var2Count-1)-0.5f*var2ReadsWithoutVar-different+1.0f*(varCount));
|
jpayne@68
|
218
|
jpayne@68
|
219 final float score=correlation*(var2reads/*var2Count*/+varCount-different+0.5f*var2ReadsWithoutVar)*(distance+250);
|
jpayne@68
|
220
|
jpayne@68
|
221 // final float score=correlation*((var2Count-1)-1.0f*var2ReadsWithoutVar+1.0f*(varCount));
|
jpayne@68
|
222 if(correlation>=minCorrelation && var2Count>=minVar2Count){
|
jpayne@68
|
223 if(score>bestScore || (score==bestScore && varCount>bestVarCount)){
|
jpayne@68
|
224 bestVar=var;
|
jpayne@68
|
225 bestVarCount=varCount;
|
jpayne@68
|
226 bestVar2=var2;
|
jpayne@68
|
227 bestVar2Count=var2Count;
|
jpayne@68
|
228 bestCorrelation=correlation;
|
jpayne@68
|
229 bestScore=score;
|
jpayne@68
|
230 bestDifferent=different;
|
jpayne@68
|
231 }
|
jpayne@68
|
232 }
|
jpayne@68
|
233 }
|
jpayne@68
|
234 }
|
jpayne@68
|
235 }
|
jpayne@68
|
236
|
jpayne@68
|
237 if(bestVar2Count>=minVar2Count && bestCorrelation>=minCorrelation){
|
jpayne@68
|
238 pivots.add(bestVar);
|
jpayne@68
|
239 pivots.add(bestVar2);
|
jpayne@68
|
240 return bestVar;
|
jpayne@68
|
241 }
|
jpayne@68
|
242 return -1;
|
jpayne@68
|
243 }
|
jpayne@68
|
244
|
jpayne@68
|
245 static boolean containsVar(final int var, final Read r, final int maxLeft){
|
jpayne@68
|
246 final int varPos=var>>2;
|
jpayne@68
|
247 final int varAllele=var&alleleMask;
|
jpayne@68
|
248 final ReadKey rk=(ReadKey) r.obj;
|
jpayne@68
|
249 final int rloc=toReadLocation(varPos, maxLeft, rk.position);
|
jpayne@68
|
250 if(rloc<0 || rloc>=r.length()){
|
jpayne@68
|
251 return false;
|
jpayne@68
|
252 }
|
jpayne@68
|
253 final int readAllele=AminoAcid.baseToNumber[r.bases[rloc]];
|
jpayne@68
|
254 return readAllele==varAllele;
|
jpayne@68
|
255 }
|
jpayne@68
|
256
|
jpayne@68
|
257 static boolean hasDifferentAllele(final int var, final Read r/*, final Clump c*/){
|
jpayne@68
|
258 final int varPos=var>>2;
|
jpayne@68
|
259 final int varAllele=var&alleleMask;
|
jpayne@68
|
260 final ReadKey rk=(ReadKey) r.obj;
|
jpayne@68
|
261 final IntList vars=rk.vars;
|
jpayne@68
|
262 final Clump c=rk.clump;
|
jpayne@68
|
263 assert(c==rk.clump);
|
jpayne@68
|
264 final int maxLeft=c.maxLeft();
|
jpayne@68
|
265 final int rloc=toReadLocation(varPos, maxLeft, rk.position);
|
jpayne@68
|
266 if(rloc<0 || rloc>=r.length()){
|
jpayne@68
|
267 assert(!vars.contains(var));
|
jpayne@68
|
268 return false;
|
jpayne@68
|
269 }
|
jpayne@68
|
270 final int readAllele=AminoAcid.baseToNumber[r.bases[rloc]];
|
jpayne@68
|
271 assert((readAllele==varAllele)==vars.contains(var));
|
jpayne@68
|
272
|
jpayne@68
|
273 return readAllele!=varAllele;
|
jpayne@68
|
274 }
|
jpayne@68
|
275
|
jpayne@68
|
276 static int countDifferentAlleles(final int var, ArrayList<Read> list){
|
jpayne@68
|
277 if(list==null){return 0;}
|
jpayne@68
|
278 int sum=0;
|
jpayne@68
|
279 for(Read r : list){
|
jpayne@68
|
280 if(hasDifferentAllele(var, r)){sum++;}
|
jpayne@68
|
281 }
|
jpayne@68
|
282 return sum;
|
jpayne@68
|
283 }
|
jpayne@68
|
284
|
jpayne@68
|
285 static int examineVar(final int var, final ArrayList<Read> list, final IntList collection, final int[] rvector, LinkedHashMap<Integer, ArrayList<Read>> map){
|
jpayne@68
|
286 collection.clear();
|
jpayne@68
|
287
|
jpayne@68
|
288 for(Read r : list){
|
jpayne@68
|
289 final ReadKey rk=(ReadKey) r.obj;
|
jpayne@68
|
290 final IntList vars=rk.vars;
|
jpayne@68
|
291
|
jpayne@68
|
292 for(int i=0; i<vars.size; i++){
|
jpayne@68
|
293 final int v2=vars.get(i);
|
jpayne@68
|
294 if(v2!=var){
|
jpayne@68
|
295 collection.add(v2);
|
jpayne@68
|
296 }
|
jpayne@68
|
297 }
|
jpayne@68
|
298 }
|
jpayne@68
|
299 collection.sort();
|
jpayne@68
|
300
|
jpayne@68
|
301 final int varCount=list.size();
|
jpayne@68
|
302
|
jpayne@68
|
303 int lastVar2=-1, bestVar2=-1;
|
jpayne@68
|
304 int sharedCount=0, bestSharedCount=0, bestDifferent=999;
|
jpayne@68
|
305 for(int i=0; i<collection.size; i++){//TODO: Note that not all reads actually cover a given var
|
jpayne@68
|
306 int currentVar2=collection.get(i);
|
jpayne@68
|
307 if(currentVar2==lastVar2){sharedCount++;}
|
jpayne@68
|
308 else{
|
jpayne@68
|
309 if(sharedCount>bestSharedCount){
|
jpayne@68
|
310 final int different1=(sharedCount==varCount ? 0 : countDifferentAlleles(lastVar2, list));
|
jpayne@68
|
311 if(different1*8<varCount){
|
jpayne@68
|
312 ArrayList<Read> list2=map.get(lastVar2);
|
jpayne@68
|
313 final int varCount2=(list2==null ? 0 : list2.size());
|
jpayne@68
|
314 final int different2=(sharedCount==varCount2 ? 0 : countDifferentAlleles(var, list2));
|
jpayne@68
|
315 if(different2*8<varCount2){
|
jpayne@68
|
316 bestVar2=lastVar2;
|
jpayne@68
|
317 bestSharedCount=sharedCount;
|
jpayne@68
|
318 bestDifferent=Tools.max(different1, different2);
|
jpayne@68
|
319 }
|
jpayne@68
|
320 }
|
jpayne@68
|
321 }
|
jpayne@68
|
322 sharedCount=1;
|
jpayne@68
|
323 }
|
jpayne@68
|
324 lastVar2=currentVar2;
|
jpayne@68
|
325 }
|
jpayne@68
|
326 if(sharedCount>bestSharedCount){
|
jpayne@68
|
327 final int different1=(sharedCount==varCount ? 0 : countDifferentAlleles(lastVar2, list));
|
jpayne@68
|
328 if(different1*8<varCount){
|
jpayne@68
|
329 ArrayList<Read> list2=map.get(lastVar2);
|
jpayne@68
|
330 final int varCount2=(list2==null ? 0 : list2.size());
|
jpayne@68
|
331 final int different2=(sharedCount==varCount2 ? 0 : countDifferentAlleles(var, list2));
|
jpayne@68
|
332 if(different2*8<varCount2){
|
jpayne@68
|
333 bestVar2=lastVar2;
|
jpayne@68
|
334 bestSharedCount=sharedCount;
|
jpayne@68
|
335 bestDifferent=Tools.max(different1, different2);
|
jpayne@68
|
336 }
|
jpayne@68
|
337 }
|
jpayne@68
|
338 }
|
jpayne@68
|
339 rvector[0]=var;
|
jpayne@68
|
340 rvector[1]=list.size();
|
jpayne@68
|
341 rvector[2]=bestVar2;
|
jpayne@68
|
342 rvector[3]=sharedCount;
|
jpayne@68
|
343 rvector[4]=bestDifferent;
|
jpayne@68
|
344
|
jpayne@68
|
345 return bestVar2;
|
jpayne@68
|
346 }
|
jpayne@68
|
347
|
jpayne@68
|
348 static final int toReadLocation(final int clumpLocation, final int maxLeft, final int readPos){
|
jpayne@68
|
349 final int readLocation=clumpLocation+readPos-maxLeft;
|
jpayne@68
|
350 return readLocation;
|
jpayne@68
|
351 }
|
jpayne@68
|
352
|
jpayne@68
|
353 static final int toClumpLocation(final int readLocation, final int maxLeft, final int readPos){
|
jpayne@68
|
354 final int clumpLocation=readLocation-readPos+maxLeft;
|
jpayne@68
|
355 assert(readLocation==toReadLocation(clumpLocation, maxLeft, readPos));
|
jpayne@68
|
356 return clumpLocation;
|
jpayne@68
|
357 }
|
jpayne@68
|
358
|
jpayne@68
|
359 static int findBestPivots(Clump c, boolean findCorrelations, IntList pivots){
|
jpayne@68
|
360 pivots.clear();
|
jpayne@68
|
361 final int size=c.size();
|
jpayne@68
|
362 if(size<minSizeSplit){return -1;}
|
jpayne@68
|
363
|
jpayne@68
|
364 if(findCorrelations){
|
jpayne@68
|
365 int x=findBestPivot_Correlated(c, pivots);
|
jpayne@68
|
366 if(x>-1){return x;}
|
jpayne@68
|
367 }
|
jpayne@68
|
368
|
jpayne@68
|
369 final int[][] bcounts=c.baseCounts();
|
jpayne@68
|
370 final int[][] qcounts=c.qualityCounts();
|
jpayne@68
|
371 final float[][] qAvgs=c.qualityAverages();
|
jpayne@68
|
372 final int width=c.width();
|
jpayne@68
|
373
|
jpayne@68
|
374 int bestPosition=-1;
|
jpayne@68
|
375 int bestVar=-1;
|
jpayne@68
|
376 long bestBsecond=0;
|
jpayne@68
|
377 long bestQsecond=0;
|
jpayne@68
|
378
|
jpayne@68
|
379 // final float bmult=8f, bmult2=15f, qmult=(c.useQuality() ? 1.5f : 100f);
|
jpayne@68
|
380 final boolean useQuality=c.useQuality();
|
jpayne@68
|
381 final float bmult=20f, bmult2=20f;
|
jpayne@68
|
382 final float qmult=20f, qamult=1.5f;
|
jpayne@68
|
383
|
jpayne@68
|
384 final int minPivotDepth=Tools.max(4, (int)(minSizeFractionSplit*size));
|
jpayne@68
|
385 // final int minMinorAllele=Tools.max((conservative ? 1 : 2), (int)(0.25f+size/bmult2));
|
jpayne@68
|
386 final int minMinorAllele=Tools.max(2, (int)(size/bmult2));
|
jpayne@68
|
387 final int minMinorAlleleQ=minMinorAllele*10;
|
jpayne@68
|
388
|
jpayne@68
|
389 // assert(false) : size+", "+minSizeSplit+", "+minPivotDepth+", "+minMinorAllele;
|
jpayne@68
|
390
|
jpayne@68
|
391 for(int i=0; i<width; i++){
|
jpayne@68
|
392 final long sum=c.getSumAtPosition(bcounts, i);
|
jpayne@68
|
393 if(sum>=minPivotDepth){
|
jpayne@68
|
394 final int pmajor=c.getConsensusAtPosition(bcounts, i);
|
jpayne@68
|
395 final int pminor=c.getSecondHighest(bcounts, i);
|
jpayne@68
|
396 if(pmajor>=0){
|
jpayne@68
|
397 final long bmajor=bcounts[pmajor][i];
|
jpayne@68
|
398 final long bsecond=bcounts[pminor][i];
|
jpayne@68
|
399
|
jpayne@68
|
400 final long qmajor=qcounts[pmajor][i];
|
jpayne@68
|
401 final long qsecond=qcounts[pminor][i];
|
jpayne@68
|
402
|
jpayne@68
|
403 final float qamajor=qAvgs[pmajor][i];
|
jpayne@68
|
404 final float qasecond=qAvgs[pminor][i];
|
jpayne@68
|
405
|
jpayne@68
|
406 if(bsecond*bmult>=bmajor && bsecond>=minMinorAllele){
|
jpayne@68
|
407 if(!useQuality || (qsecond*qmult>=qmajor && qasecond*qamult>=qamajor && qsecond>=minMinorAlleleQ)){//candidate
|
jpayne@68
|
408 if(bsecond>bestBsecond || (bsecond==bestBsecond && qsecond>bestQsecond)){//Perhaps Qsecond should be more important...?
|
jpayne@68
|
409
|
jpayne@68
|
410 // assert(false) : size+", "+minSizeSplit+", "+minPivotDepth+", "+minMinorAllele+", "+bmajor+", "+bsecond+", "+qmajor+", "+qsecond+", "+minMinorAlleleQ;
|
jpayne@68
|
411
|
jpayne@68
|
412 bestBsecond=bsecond;
|
jpayne@68
|
413 bestQsecond=qsecond;
|
jpayne@68
|
414 bestPosition=i;
|
jpayne@68
|
415 bestVar=(bestPosition<<2)|pminor;
|
jpayne@68
|
416 }
|
jpayne@68
|
417 }
|
jpayne@68
|
418 }
|
jpayne@68
|
419 }
|
jpayne@68
|
420 }
|
jpayne@68
|
421 }
|
jpayne@68
|
422
|
jpayne@68
|
423 // if(bestVar<0 && findCorrelations){
|
jpayne@68
|
424 // bestVar=findBestPivot_Correlated(c);
|
jpayne@68
|
425 // }
|
jpayne@68
|
426
|
jpayne@68
|
427 if(bestVar>=0){pivots.add(bestVar);}
|
jpayne@68
|
428 return bestVar;
|
jpayne@68
|
429 }
|
jpayne@68
|
430
|
jpayne@68
|
431 static int minSizeSplit=4; //5 is actually better than 4 in allele separation tests...?
|
jpayne@68
|
432 static float minSizeFractionSplit=0.17f; //0.2 is substantially worse, 0.14 is a tiny bit better than 0.17
|
jpayne@68
|
433 static boolean conservative=false;
|
jpayne@68
|
434
|
jpayne@68
|
435 private static final int alleleMask=0x3;
|
jpayne@68
|
436 private static final int posMask=~alleleMask;
|
jpayne@68
|
437 private static final int shift=2;
|
jpayne@68
|
438
|
jpayne@68
|
439 static boolean FIND_CORRELATIONS=true;
|
jpayne@68
|
440 static int MAX_CORRELATIONS=12;
|
jpayne@68
|
441
|
jpayne@68
|
442 // private static final ThreadLocal<LinkedHashMap<Integer, ArrayList<Read>>> localMap=new ThreadLocal<LinkedHashMap<Integer, ArrayList<Read>>>();
|
jpayne@68
|
443 // private static final Integer[] mapKeys=new Integer[2000];
|
jpayne@68
|
444 // static{
|
jpayne@68
|
445 // for(int i=0; i<mapKeys.length; i++){mapKeys[i]=i;}
|
jpayne@68
|
446 // }
|
jpayne@68
|
447
|
jpayne@68
|
448 }
|