jpayne@68
|
1 package kmer;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4 import java.util.BitSet;
|
jpayne@68
|
5
|
jpayne@68
|
6 import dna.AminoAcid;
|
jpayne@68
|
7 import jgi.Dedupe;
|
jpayne@68
|
8 import shared.PreParser;
|
jpayne@68
|
9 import shared.Shared;
|
jpayne@68
|
10 import shared.Timer;
|
jpayne@68
|
11 import shared.Tools;
|
jpayne@68
|
12 import stream.Read;
|
jpayne@68
|
13 import structures.IntList;
|
jpayne@68
|
14
|
jpayne@68
|
15 /**
|
jpayne@68
|
16 * @author Brian Bushnell
|
jpayne@68
|
17 * @date Mar 5, 2015
|
jpayne@68
|
18 *
|
jpayne@68
|
19 */
|
jpayne@68
|
20 public class TableReader {
|
jpayne@68
|
21
|
jpayne@68
|
22 /*--------------------------------------------------------------*/
|
jpayne@68
|
23 /*---------------- Initialization ----------------*/
|
jpayne@68
|
24 /*--------------------------------------------------------------*/
|
jpayne@68
|
25
|
jpayne@68
|
26 /**
|
jpayne@68
|
27 * Code entrance from the command line.
|
jpayne@68
|
28 * @param args Command line arguments
|
jpayne@68
|
29 */
|
jpayne@68
|
30 public static void main(String[] args){
|
jpayne@68
|
31
|
jpayne@68
|
32 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
33 PreParser pp=new PreParser(args, null, false);
|
jpayne@68
|
34 args=pp.args;
|
jpayne@68
|
35 outstream=pp.outstream;
|
jpayne@68
|
36 }
|
jpayne@68
|
37
|
jpayne@68
|
38 Timer t=new Timer();
|
jpayne@68
|
39
|
jpayne@68
|
40 AbstractKmerTable[] tables=TableLoaderLockFree.makeTables(AbstractKmerTable.ARRAY1D, 12, -1L, false, 1.0);
|
jpayne@68
|
41
|
jpayne@68
|
42 int k=31;
|
jpayne@68
|
43 int mink=0;
|
jpayne@68
|
44 int speed=0;
|
jpayne@68
|
45 int hdist=0;
|
jpayne@68
|
46 int edist=0;
|
jpayne@68
|
47 boolean rcomp=true;
|
jpayne@68
|
48 boolean maskMiddle=false;
|
jpayne@68
|
49
|
jpayne@68
|
50 //Create a new Loader instance
|
jpayne@68
|
51 TableLoaderLockFree loader=new TableLoaderLockFree(tables, k, mink, speed, hdist, edist, rcomp, maskMiddle);
|
jpayne@68
|
52 loader.setRefSkip(0);
|
jpayne@68
|
53 loader.hammingDistance2=0;
|
jpayne@68
|
54 loader.editDistance2=0;
|
jpayne@68
|
55 loader.storeMode(TableLoaderLockFree.SET_IF_NOT_PRESENT);
|
jpayne@68
|
56
|
jpayne@68
|
57 ///And run it
|
jpayne@68
|
58 String[] refs=args;
|
jpayne@68
|
59 String[] literals=null;
|
jpayne@68
|
60 boolean keepNames=false;
|
jpayne@68
|
61 boolean useRefNames=false;
|
jpayne@68
|
62 long kmers=loader.processData(refs, literals, keepNames, useRefNames, false);
|
jpayne@68
|
63 t.stop();
|
jpayne@68
|
64
|
jpayne@68
|
65 outstream.println("Load Time:\t"+t);
|
jpayne@68
|
66 outstream.println("Return: \t"+kmers);
|
jpayne@68
|
67 outstream.println("refKmers: \t"+loader.refKmers);
|
jpayne@68
|
68 outstream.println("refBases: \t"+loader.refBases);
|
jpayne@68
|
69 outstream.println("refReads: \t"+loader.refReads);
|
jpayne@68
|
70
|
jpayne@68
|
71 int qskip=0;
|
jpayne@68
|
72 int qhdist=0;
|
jpayne@68
|
73 TableReader tr=new TableReader(k, mink, speed, qskip, qhdist, rcomp, maskMiddle);
|
jpayne@68
|
74
|
jpayne@68
|
75 //TODO: Stuff...
|
jpayne@68
|
76
|
jpayne@68
|
77 //Close the print stream if it was redirected
|
jpayne@68
|
78 Shared.closeStream(outstream);
|
jpayne@68
|
79 }
|
jpayne@68
|
80
|
jpayne@68
|
81 public TableReader(int k_){
|
jpayne@68
|
82 this(k_, 0, 0, 0, 0, true, false);
|
jpayne@68
|
83 }
|
jpayne@68
|
84
|
jpayne@68
|
85 public TableReader(int k_, int mink_, int speed_, int qskip_, int qhdist_, boolean rcomp_, boolean maskMiddle_){
|
jpayne@68
|
86 k=k_;
|
jpayne@68
|
87 k2=k-1;
|
jpayne@68
|
88 mink=mink_;
|
jpayne@68
|
89 rcomp=rcomp_;
|
jpayne@68
|
90 useShortKmers=(mink>0 && mink<k);
|
jpayne@68
|
91 speed=speed_;
|
jpayne@68
|
92 qSkip=qskip_;
|
jpayne@68
|
93 qHammingDistance=qhdist_;
|
jpayne@68
|
94 middleMask=maskMiddle ? ~(3L<<(2*(k/2))) : -1L;
|
jpayne@68
|
95
|
jpayne@68
|
96 noAccel=(speed<1 && qSkip<2);
|
jpayne@68
|
97 accel=!noAccel;
|
jpayne@68
|
98 }
|
jpayne@68
|
99
|
jpayne@68
|
100
|
jpayne@68
|
101 /*--------------------------------------------------------------*/
|
jpayne@68
|
102 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
103 /*--------------------------------------------------------------*/
|
jpayne@68
|
104
|
jpayne@68
|
105
|
jpayne@68
|
106 /**
|
jpayne@68
|
107 * Mask a read to cover matching kmers.
|
jpayne@68
|
108 * @param r Read to process
|
jpayne@68
|
109 * @param sets Kmer tables
|
jpayne@68
|
110 * @return Number of bases masked
|
jpayne@68
|
111 */
|
jpayne@68
|
112 public final int kMask(final Read r, final AbstractKmerTable[] sets){
|
jpayne@68
|
113 if(r==null){return 0;}
|
jpayne@68
|
114 if(verbose){outstream.println("KMasking read "+r.id);}
|
jpayne@68
|
115
|
jpayne@68
|
116 BitSet bs=markBits(r, sets);
|
jpayne@68
|
117 if(verbose){outstream.println("Null bitset.");}
|
jpayne@68
|
118 if(bs==null){return 0;}
|
jpayne@68
|
119
|
jpayne@68
|
120 final byte[] bases=r.bases, quals=r.quality;
|
jpayne@68
|
121 final int cardinality=bs.cardinality();
|
jpayne@68
|
122 assert(cardinality>0);
|
jpayne@68
|
123
|
jpayne@68
|
124 //Replace kmer hit zone with the trim symbol
|
jpayne@68
|
125 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
126 if(bs.get(i)){
|
jpayne@68
|
127 if(kmaskLowercase){
|
jpayne@68
|
128 bases[i]=(byte)Tools.toLowerCase(bases[i]);
|
jpayne@68
|
129 }else{
|
jpayne@68
|
130 bases[i]=trimSymbol;
|
jpayne@68
|
131 if(quals!=null && trimSymbol=='N'){quals[i]=0;}
|
jpayne@68
|
132 }
|
jpayne@68
|
133 }
|
jpayne@68
|
134 }
|
jpayne@68
|
135 return cardinality;
|
jpayne@68
|
136 }
|
jpayne@68
|
137
|
jpayne@68
|
138
|
jpayne@68
|
139 /**
|
jpayne@68
|
140 * Counts the number of kmer hits for a read.
|
jpayne@68
|
141 * @param r Read to process
|
jpayne@68
|
142 * @param sets Kmer tables
|
jpayne@68
|
143 * @return Number of hits
|
jpayne@68
|
144 */
|
jpayne@68
|
145 public final int countKmerHits(final Read r, final AbstractKmerTable[] sets){
|
jpayne@68
|
146 if(r==null || r.length()<k){return 0;}
|
jpayne@68
|
147 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){return 0;}
|
jpayne@68
|
148 final byte[] bases=r.bases;
|
jpayne@68
|
149 final int minlen=k-1;
|
jpayne@68
|
150 final int minlen2=(maskMiddle ? k/2 : k);
|
jpayne@68
|
151 final int shift=2*k;
|
jpayne@68
|
152 final int shift2=shift-2;
|
jpayne@68
|
153 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
154 long kmer=0;
|
jpayne@68
|
155 long rkmer=0;
|
jpayne@68
|
156 int found=0;
|
jpayne@68
|
157 int len=0;
|
jpayne@68
|
158
|
jpayne@68
|
159 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight));
|
jpayne@68
|
160 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft));
|
jpayne@68
|
161
|
jpayne@68
|
162 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
163 for(int i=start; i<stop; i++){
|
jpayne@68
|
164 byte b=bases[i];
|
jpayne@68
|
165 long x=AminoAcid.baseToNumber0[b];
|
jpayne@68
|
166 long x2=AminoAcid.baseToComplementNumber0[b];
|
jpayne@68
|
167 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
168 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
169 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;}
|
jpayne@68
|
170 if(verbose){outstream.println("Scanning6 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
171 if(len>=minlen2 && i>=minlen){
|
jpayne@68
|
172 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets);
|
jpayne@68
|
173 if(verbose){outstream.println("Testing kmer "+kmer+"; id="+id);}
|
jpayne@68
|
174 if(id>0){
|
jpayne@68
|
175 if(verbose){outstream.println("Found = "+(found+1)+"/"+minHits);}
|
jpayne@68
|
176 if(found>=minHits){
|
jpayne@68
|
177 return (found=found+1); //Early exit
|
jpayne@68
|
178 }
|
jpayne@68
|
179 found++;
|
jpayne@68
|
180 }
|
jpayne@68
|
181 }
|
jpayne@68
|
182 }
|
jpayne@68
|
183
|
jpayne@68
|
184 return found;
|
jpayne@68
|
185 }
|
jpayne@68
|
186
|
jpayne@68
|
187 /**
|
jpayne@68
|
188 * Returns the id of the sequence with the most kmer matches to this read, or -1 if none are at least minHits.
|
jpayne@68
|
189 * @param r Read to process
|
jpayne@68
|
190 * @param sets Kmer tables
|
jpayne@68
|
191 * @return id of best match
|
jpayne@68
|
192 */
|
jpayne@68
|
193 public final int findBestMatch(final Read r, final AbstractKmerTable[] sets){
|
jpayne@68
|
194 idList.size=0;
|
jpayne@68
|
195 if(r==null || r.length()<k){return -1;}
|
jpayne@68
|
196 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){return -1;}
|
jpayne@68
|
197 final byte[] bases=r.bases;
|
jpayne@68
|
198 final int minlen=k-1;
|
jpayne@68
|
199 final int minlen2=(maskMiddle ? k/2 : k);
|
jpayne@68
|
200 final int shift=2*k;
|
jpayne@68
|
201 final int shift2=shift-2;
|
jpayne@68
|
202 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
203 long kmer=0;
|
jpayne@68
|
204 long rkmer=0;
|
jpayne@68
|
205 int len=0;
|
jpayne@68
|
206 int found=0;
|
jpayne@68
|
207
|
jpayne@68
|
208 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight));
|
jpayne@68
|
209 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft));
|
jpayne@68
|
210
|
jpayne@68
|
211 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
212 for(int i=start; i<stop; i++){
|
jpayne@68
|
213 byte b=bases[i];
|
jpayne@68
|
214 long x=AminoAcid.baseToNumber0[b];
|
jpayne@68
|
215 long x2=AminoAcid.baseToComplementNumber0[b];
|
jpayne@68
|
216 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
217 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
218 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;}
|
jpayne@68
|
219 if(verbose){outstream.println("Scanning6 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
220 if(len>=minlen2 && i>=minlen){
|
jpayne@68
|
221 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets);
|
jpayne@68
|
222 if(id>0){
|
jpayne@68
|
223 countArray[id]++;
|
jpayne@68
|
224 if(countArray[id]==1){idList.add(id);}
|
jpayne@68
|
225 found++;
|
jpayne@68
|
226 if(verbose){outstream.println("Found = "+found+"/"+minHits);}
|
jpayne@68
|
227 }
|
jpayne@68
|
228 }
|
jpayne@68
|
229 }
|
jpayne@68
|
230
|
jpayne@68
|
231 final int id, max;
|
jpayne@68
|
232 if(found>=minHits){
|
jpayne@68
|
233 max=condenseLoose(countArray, idList, countList);
|
jpayne@68
|
234 int id0=-1;
|
jpayne@68
|
235 for(int i=0; i<countList.size; i++){
|
jpayne@68
|
236 if(countList.get(i)==max){
|
jpayne@68
|
237 id0=idList.get(i); break;
|
jpayne@68
|
238 }
|
jpayne@68
|
239 }
|
jpayne@68
|
240 id=id0;
|
jpayne@68
|
241 }else{
|
jpayne@68
|
242 max=0;
|
jpayne@68
|
243 id=-1;
|
jpayne@68
|
244 }
|
jpayne@68
|
245
|
jpayne@68
|
246 return id;
|
jpayne@68
|
247 }
|
jpayne@68
|
248
|
jpayne@68
|
249
|
jpayne@68
|
250 /**
|
jpayne@68
|
251 * Mask a read to cover matching kmers.
|
jpayne@68
|
252 * @param r Read to process
|
jpayne@68
|
253 * @param sets Kmer tables
|
jpayne@68
|
254 * @return Number of bases masked
|
jpayne@68
|
255 */
|
jpayne@68
|
256 public final BitSet markBits(final Read r, final AbstractKmerTable[] sets){
|
jpayne@68
|
257 if(r==null || r.length()<Tools.max(1, (useShortKmers ? Tools.min(k, mink) : k))){
|
jpayne@68
|
258 if(verbose){outstream.println("Read too short.");}
|
jpayne@68
|
259 return null;
|
jpayne@68
|
260 }
|
jpayne@68
|
261 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){
|
jpayne@68
|
262 if(verbose){outstream.println("Skipping read.");}
|
jpayne@68
|
263 return null;
|
jpayne@68
|
264 }
|
jpayne@68
|
265 if(verbose){outstream.println("Marking bitset for read "+r.id);}
|
jpayne@68
|
266 final byte[] bases=r.bases;
|
jpayne@68
|
267 final int minlen=k-1;
|
jpayne@68
|
268 final int minlen2=(maskMiddle ? k/2 : k);
|
jpayne@68
|
269 final int shift=2*k;
|
jpayne@68
|
270 final int shift2=shift-2;
|
jpayne@68
|
271 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
272 long kmer=0;
|
jpayne@68
|
273 long rkmer=0;
|
jpayne@68
|
274 int found=0;
|
jpayne@68
|
275 int len=0;
|
jpayne@68
|
276 int id0=-1; //ID of first kmer found.
|
jpayne@68
|
277
|
jpayne@68
|
278 BitSet bs=new BitSet(bases.length+trimPad+1);
|
jpayne@68
|
279
|
jpayne@68
|
280 final int minus=k-1-trimPad;
|
jpayne@68
|
281 final int plus=trimPad+1;
|
jpayne@68
|
282
|
jpayne@68
|
283 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight));
|
jpayne@68
|
284 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft));
|
jpayne@68
|
285
|
jpayne@68
|
286 //Scan for normal kmers
|
jpayne@68
|
287 for(int i=start; i<stop; i++){
|
jpayne@68
|
288 byte b=bases[i];
|
jpayne@68
|
289 long x=AminoAcid.baseToNumber0[b];
|
jpayne@68
|
290 long x2=AminoAcid.baseToComplementNumber0[b];
|
jpayne@68
|
291 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
292 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
293 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;}
|
jpayne@68
|
294 if(verbose){outstream.println("Scanning3 i="+i+", kmer="+kmer+", rkmer="+rkmer+", len="+len+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
295 if(len>=minlen2 && i>=minlen){
|
jpayne@68
|
296 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets);
|
jpayne@68
|
297 if(id>0){
|
jpayne@68
|
298 if(id0<0){id0=id;}
|
jpayne@68
|
299 if(verbose){
|
jpayne@68
|
300 outstream.println("a: Found "+kmer);
|
jpayne@68
|
301 outstream.println("Setting "+Tools.max(0, i-minus)+", "+(i+plus));
|
jpayne@68
|
302 outstream.println("i="+i+", minus="+minus+", plus="+plus+", trimpad="+trimPad+", k="+k);
|
jpayne@68
|
303 }
|
jpayne@68
|
304 bs.set(Tools.max(0, i-minus), i+plus);
|
jpayne@68
|
305 found++;
|
jpayne@68
|
306 }
|
jpayne@68
|
307 }
|
jpayne@68
|
308 }
|
jpayne@68
|
309
|
jpayne@68
|
310 //If nothing was found, scan for short kmers.
|
jpayne@68
|
311 if(useShortKmers){
|
jpayne@68
|
312 assert(!maskMiddle && middleMask==-1) : maskMiddle+", "+middleMask+", k="+", mink="+mink;
|
jpayne@68
|
313
|
jpayne@68
|
314 //Look for short kmers on left side
|
jpayne@68
|
315 {
|
jpayne@68
|
316 kmer=0;
|
jpayne@68
|
317 rkmer=0;
|
jpayne@68
|
318 len=0;
|
jpayne@68
|
319 final int lim=Tools.min(k, stop);
|
jpayne@68
|
320 for(int i=start; i<lim; i++){
|
jpayne@68
|
321 byte b=bases[i];
|
jpayne@68
|
322 long x=Dedupe.baseToNumber[b];
|
jpayne@68
|
323 long x2=Dedupe.baseToComplementNumber[b];
|
jpayne@68
|
324 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
325 rkmer=rkmer|(x2<<(2*len));
|
jpayne@68
|
326 len++;
|
jpayne@68
|
327 if(verbose){outstream.println("Scanning4 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
328 if(len>=mink){
|
jpayne@68
|
329
|
jpayne@68
|
330 if(verbose){
|
jpayne@68
|
331 outstream.println("Looking for left kmer "+AminoAcid.kmerToString(kmer, len));
|
jpayne@68
|
332 outstream.println("Looking for left rkmer "+AminoAcid.kmerToString(rkmer, len));
|
jpayne@68
|
333 }
|
jpayne@68
|
334 final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets);
|
jpayne@68
|
335 if(id>0){
|
jpayne@68
|
336 if(id0<0){id0=id;}
|
jpayne@68
|
337 if(verbose){
|
jpayne@68
|
338 outstream.println("b: Found "+kmer);
|
jpayne@68
|
339 outstream.println("Setting "+0+", "+(i+plus));
|
jpayne@68
|
340 }
|
jpayne@68
|
341 bs.set(0, i+plus);
|
jpayne@68
|
342 found++;
|
jpayne@68
|
343 }
|
jpayne@68
|
344 }
|
jpayne@68
|
345 }
|
jpayne@68
|
346 }
|
jpayne@68
|
347
|
jpayne@68
|
348 //Look for short kmers on right side
|
jpayne@68
|
349 {
|
jpayne@68
|
350 kmer=0;
|
jpayne@68
|
351 rkmer=0;
|
jpayne@68
|
352 len=0;
|
jpayne@68
|
353 final int lim=Tools.max(-1, stop-k);
|
jpayne@68
|
354 for(int i=stop-1; i>lim; i--){
|
jpayne@68
|
355 byte b=bases[i];
|
jpayne@68
|
356 long x=Dedupe.baseToNumber[b];
|
jpayne@68
|
357 long x2=Dedupe.baseToComplementNumber[b];
|
jpayne@68
|
358 kmer=kmer|(x<<(2*len));
|
jpayne@68
|
359 rkmer=((rkmer<<2)|x2)&mask;
|
jpayne@68
|
360 len++;
|
jpayne@68
|
361 if(verbose){outstream.println("Scanning5 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
362 if(len>=mink){
|
jpayne@68
|
363 if(verbose){
|
jpayne@68
|
364 outstream.println("Looking for right kmer "+
|
jpayne@68
|
365 AminoAcid.kmerToString(kmer&~lengthMasks[len], len)+"; value="+toValue(kmer, rkmer, lengthMasks[len])+"; kmask="+lengthMasks[len]);
|
jpayne@68
|
366 }
|
jpayne@68
|
367 final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets);
|
jpayne@68
|
368 if(id>0){
|
jpayne@68
|
369 if(id0<0){id0=id;}
|
jpayne@68
|
370 if(verbose){
|
jpayne@68
|
371 outstream.println("c: Found "+kmer);
|
jpayne@68
|
372 outstream.println("Setting "+Tools.max(0, i-trimPad)+", "+bases.length);
|
jpayne@68
|
373 }
|
jpayne@68
|
374 bs.set(Tools.max(0, i-trimPad), bases.length);
|
jpayne@68
|
375 found++;
|
jpayne@68
|
376 }
|
jpayne@68
|
377 }
|
jpayne@68
|
378 }
|
jpayne@68
|
379 }
|
jpayne@68
|
380 }
|
jpayne@68
|
381
|
jpayne@68
|
382
|
jpayne@68
|
383 if(verbose){outstream.println("found="+found+", bitset="+bs);}
|
jpayne@68
|
384
|
jpayne@68
|
385 if(found==0){return null;}
|
jpayne@68
|
386 assert(found>0) : "Overflow in 'found' variable.";
|
jpayne@68
|
387
|
jpayne@68
|
388 int cardinality=bs.cardinality();
|
jpayne@68
|
389 assert(cardinality>0);
|
jpayne@68
|
390
|
jpayne@68
|
391 return bs;
|
jpayne@68
|
392 }
|
jpayne@68
|
393
|
jpayne@68
|
394
|
jpayne@68
|
395 /*--------------------------------------------------------------*/
|
jpayne@68
|
396 /*---------------- Helper Methods ----------------*/
|
jpayne@68
|
397 /*--------------------------------------------------------------*/
|
jpayne@68
|
398 /**
|
jpayne@68
|
399 * Transforms a kmer into all canonical values for a given Hamming distance.
|
jpayne@68
|
400 * Returns the related id stored in the tables.
|
jpayne@68
|
401 * @param kmer Forward kmer
|
jpayne@68
|
402 * @param rkmer Reverse kmer
|
jpayne@68
|
403 * @param len kmer length
|
jpayne@68
|
404 * @param qHDist Hamming distance
|
jpayne@68
|
405 * @param qPos Position of kmer in query
|
jpayne@68
|
406 * @param sets Kmer hash tables
|
jpayne@68
|
407 * @return Value stored in table, or -1
|
jpayne@68
|
408 */
|
jpayne@68
|
409 public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final int qPos, final AbstractKmerTable[] sets){
|
jpayne@68
|
410 if(qSkip>1 && (qPos%qSkip!=0)){return -1;}
|
jpayne@68
|
411 return qHDist<1 ? getValue(kmer, rkmer, len, sets) : getValue(kmer, rkmer, len, qHDist, sets);
|
jpayne@68
|
412 }
|
jpayne@68
|
413
|
jpayne@68
|
414 /**
|
jpayne@68
|
415 * Transforms a kmer into all canonical values for a given Hamming distance.
|
jpayne@68
|
416 * Returns the related id stored in the tables.
|
jpayne@68
|
417 * @param kmer Forward kmer
|
jpayne@68
|
418 * @param rkmer Reverse kmer
|
jpayne@68
|
419 * @param len kmer length
|
jpayne@68
|
420 * @param qHDist Hamming distance
|
jpayne@68
|
421 * @param sets Kmer hash tables
|
jpayne@68
|
422 * @return Value stored in table, or -1
|
jpayne@68
|
423 */
|
jpayne@68
|
424 public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final AbstractKmerTable[] sets){
|
jpayne@68
|
425 int id=getValue(kmer, rkmer, len, sets);
|
jpayne@68
|
426 if(id<1 && qHDist>0){
|
jpayne@68
|
427 final int qHDistMinusOne=qHDist-1;
|
jpayne@68
|
428
|
jpayne@68
|
429 //Sub
|
jpayne@68
|
430 for(int j=0; j<4 && id<1; j++){
|
jpayne@68
|
431 for(int i=0; i<len && id<1; i++){
|
jpayne@68
|
432 final long temp=(kmer&clearMasks[i])|setMasks[j][i];
|
jpayne@68
|
433 if(temp!=kmer){
|
jpayne@68
|
434 long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
|
jpayne@68
|
435 id=getValue(temp, rtemp, len, qHDistMinusOne, sets);
|
jpayne@68
|
436 }
|
jpayne@68
|
437 }
|
jpayne@68
|
438 }
|
jpayne@68
|
439 }
|
jpayne@68
|
440 return id;
|
jpayne@68
|
441 }
|
jpayne@68
|
442
|
jpayne@68
|
443 /**
|
jpayne@68
|
444 * Transforms a kmer into a canonical value stored in the table and search.
|
jpayne@68
|
445 * @param kmer Forward kmer
|
jpayne@68
|
446 * @param rkmer Reverse kmer
|
jpayne@68
|
447 * @param len kmer length
|
jpayne@68
|
448 * @param sets Kmer hash tables
|
jpayne@68
|
449 * @return Value stored in table
|
jpayne@68
|
450 */
|
jpayne@68
|
451 public final int getValue(final long kmer, final long rkmer, final int len, final AbstractKmerTable[] sets){
|
jpayne@68
|
452 return getValueWithMask(kmer, rkmer, lengthMasks[len], sets);
|
jpayne@68
|
453 }
|
jpayne@68
|
454
|
jpayne@68
|
455 /**
|
jpayne@68
|
456 * Transforms a kmer into a canonical value stored in the table and search.
|
jpayne@68
|
457 * @param kmer Forward kmer
|
jpayne@68
|
458 * @param rkmer Reverse kmer
|
jpayne@68
|
459 * @param lengthMask Bitmask with single '1' set to left of kmer
|
jpayne@68
|
460 * @param sets Kmer hash tables
|
jpayne@68
|
461 * @return Value stored in table
|
jpayne@68
|
462 */
|
jpayne@68
|
463 public final int getValueWithMask(final long kmer, final long rkmer, final long lengthMask, final AbstractKmerTable[] sets){
|
jpayne@68
|
464 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer;
|
jpayne@68
|
465
|
jpayne@68
|
466 // final long max=(rcomp ? Tools.max(kmer, rkmer) : kmer);
|
jpayne@68
|
467 // final long key=(max&middleMask)|lengthMask;
|
jpayne@68
|
468
|
jpayne@68
|
469 final long key=toValue(kmer, rkmer, lengthMask);
|
jpayne@68
|
470
|
jpayne@68
|
471 if(noAccel || ((key/WAYS)&15)>=speed){
|
jpayne@68
|
472 if(verbose){outstream.println("Testing key "+key);}
|
jpayne@68
|
473 AbstractKmerTable set=sets[(int)(key%WAYS)];
|
jpayne@68
|
474 final int id=set.getValue(key);
|
jpayne@68
|
475 return id;
|
jpayne@68
|
476 }
|
jpayne@68
|
477 return -1;
|
jpayne@68
|
478 }
|
jpayne@68
|
479
|
jpayne@68
|
480
|
jpayne@68
|
481 /**
|
jpayne@68
|
482 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined.
|
jpayne@68
|
483 * @param kmer Forward kmer
|
jpayne@68
|
484 * @param rkmer Reverse kmer
|
jpayne@68
|
485 * @param lengthMask Bitmask with single '1' set to left of kmer
|
jpayne@68
|
486 * @return Canonical value
|
jpayne@68
|
487 */
|
jpayne@68
|
488 private final long toValue(long kmer, long rkmer, long lengthMask){
|
jpayne@68
|
489 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer;
|
jpayne@68
|
490 long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
|
jpayne@68
|
491 return (value&middleMask)|lengthMask;
|
jpayne@68
|
492 }
|
jpayne@68
|
493
|
jpayne@68
|
494 /**
|
jpayne@68
|
495 * Pack a list of counts from an array to an IntList.
|
jpayne@68
|
496 * @param loose Counter array
|
jpayne@68
|
497 * @param packed Unique values
|
jpayne@68
|
498 * @param counts Counts of values
|
jpayne@68
|
499 * @return Highest observed count
|
jpayne@68
|
500 */
|
jpayne@68
|
501 public static int condenseLoose(int[] loose, IntList packed, IntList counts){
|
jpayne@68
|
502 counts.size=0;
|
jpayne@68
|
503 if(packed.size<1){return 0;}
|
jpayne@68
|
504
|
jpayne@68
|
505 int max=0;
|
jpayne@68
|
506 for(int i=0; i<packed.size; i++){
|
jpayne@68
|
507 final int p=packed.get(i);
|
jpayne@68
|
508 final int c=loose[p];
|
jpayne@68
|
509 counts.add(c);
|
jpayne@68
|
510 loose[p]=0;
|
jpayne@68
|
511 max=Tools.max(max, c);
|
jpayne@68
|
512 }
|
jpayne@68
|
513 return max;
|
jpayne@68
|
514 }
|
jpayne@68
|
515
|
jpayne@68
|
516 public final int kmerToWay(final long kmer){
|
jpayne@68
|
517 // final int way=(int)((kmer&coreMask)%WAYS);
|
jpayne@68
|
518 // return way;
|
jpayne@68
|
519 return (int)(kmer%WAYS);
|
jpayne@68
|
520 }
|
jpayne@68
|
521
|
jpayne@68
|
522 /*--------------------------------------------------------------*/
|
jpayne@68
|
523 /*---------------- Fields ----------------*/
|
jpayne@68
|
524 /*--------------------------------------------------------------*/
|
jpayne@68
|
525
|
jpayne@68
|
526 /** Has this class encountered errors while processing? */
|
jpayne@68
|
527 public boolean errorState=false;
|
jpayne@68
|
528
|
jpayne@68
|
529 /** Make the middle base in a kmer a wildcard to improve sensitivity */
|
jpayne@68
|
530 public final boolean maskMiddle=false;
|
jpayne@68
|
531
|
jpayne@68
|
532 /** Search for query kmers with up to this many substitutions */
|
jpayne@68
|
533 private final int qHammingDistance;
|
jpayne@68
|
534 /** Search for short query kmers with up to this many substitutions */
|
jpayne@68
|
535 public int qHammingDistance2=-1;
|
jpayne@68
|
536
|
jpayne@68
|
537 /** Trim this much extra around matched kmers */
|
jpayne@68
|
538 public int trimPad=0;
|
jpayne@68
|
539
|
jpayne@68
|
540 /** If positive, only look for kmer matches in the leftmost X bases */
|
jpayne@68
|
541 public int restrictLeft=0;
|
jpayne@68
|
542 /** If positive, only look for kmer matches the rightmost X bases */
|
jpayne@68
|
543 public int restrictRight=0;
|
jpayne@68
|
544
|
jpayne@68
|
545 /** Don't allow a read 'N' to match a reference 'A'.
|
jpayne@68
|
546 * Reduces sensitivity when hdist>0 or edist>0. Default: false. */
|
jpayne@68
|
547 public boolean forbidNs=false;
|
jpayne@68
|
548
|
jpayne@68
|
549 /** Replace bases covered by matched kmers with this symbol */
|
jpayne@68
|
550 public byte trimSymbol='N';
|
jpayne@68
|
551
|
jpayne@68
|
552 /** Convert masked bases to lowercase */
|
jpayne@68
|
553 public boolean kmaskLowercase=false;
|
jpayne@68
|
554
|
jpayne@68
|
555 /** Don't look for kmers in read 1 */
|
jpayne@68
|
556 public boolean skipR1=false;
|
jpayne@68
|
557 /** Don't look for kmers in read 2 */
|
jpayne@68
|
558 public boolean skipR2=false;
|
jpayne@68
|
559
|
jpayne@68
|
560 /** A read must contain at least this many kmer hits before being considered a match. Default: 1 */
|
jpayne@68
|
561 public int minHits=1;
|
jpayne@68
|
562
|
jpayne@68
|
563 /*--------------------------------------------------------------*/
|
jpayne@68
|
564 /*---------------- Statistics ----------------*/
|
jpayne@68
|
565 /*--------------------------------------------------------------*/
|
jpayne@68
|
566
|
jpayne@68
|
567 // public long storedKmers=0;
|
jpayne@68
|
568
|
jpayne@68
|
569 /*--------------------------------------------------------------*/
|
jpayne@68
|
570 /*---------------- Per-Thread Fields ----------------*/
|
jpayne@68
|
571 /*--------------------------------------------------------------*/
|
jpayne@68
|
572
|
jpayne@68
|
573 public int[] countArray;
|
jpayne@68
|
574
|
jpayne@68
|
575 private final IntList idList=new IntList();
|
jpayne@68
|
576 private final IntList countList=new IntList();
|
jpayne@68
|
577
|
jpayne@68
|
578 /*--------------------------------------------------------------*/
|
jpayne@68
|
579 /*---------------- Final Primitives ----------------*/
|
jpayne@68
|
580 /*--------------------------------------------------------------*/
|
jpayne@68
|
581
|
jpayne@68
|
582 /** Look for reverse-complements as well as forward kmers. Default: true */
|
jpayne@68
|
583 private final boolean rcomp;
|
jpayne@68
|
584 /** AND bitmask with 0's at the middle base */
|
jpayne@68
|
585 private final long middleMask;
|
jpayne@68
|
586
|
jpayne@68
|
587 /** Normal kmer length */
|
jpayne@68
|
588 private final int k;
|
jpayne@68
|
589 /** k-1; used in some expressions */
|
jpayne@68
|
590 private final int k2;
|
jpayne@68
|
591 /** Shortest kmer to use for trimming */
|
jpayne@68
|
592 private final int mink;
|
jpayne@68
|
593 /** Attempt to match kmers shorter than normal k on read ends when doing kTrimming. */
|
jpayne@68
|
594 private final boolean useShortKmers;
|
jpayne@68
|
595
|
jpayne@68
|
596 /** Fraction of kmers to skip, 0 to 15 out of 16 */
|
jpayne@68
|
597 private final int speed;
|
jpayne@68
|
598
|
jpayne@68
|
599 /** Skip this many kmers when examining the read. Default 1.
|
jpayne@68
|
600 * 1 means every kmer is used, 2 means every other, etc. */
|
jpayne@68
|
601 private final int qSkip;
|
jpayne@68
|
602
|
jpayne@68
|
603 /** noAccel is true if speed and qSkip are disabled, accel is the opposite. */
|
jpayne@68
|
604 private final boolean noAccel, accel;
|
jpayne@68
|
605
|
jpayne@68
|
606 /*--------------------------------------------------------------*/
|
jpayne@68
|
607 /*---------------- Static Fields ----------------*/
|
jpayne@68
|
608 /*--------------------------------------------------------------*/
|
jpayne@68
|
609
|
jpayne@68
|
610 /** Number of tables (and threads, during loading) */
|
jpayne@68
|
611 private static final int WAYS=7; //123
|
jpayne@68
|
612 /** Verbose messages */
|
jpayne@68
|
613 public static final boolean verbose=false; //123
|
jpayne@68
|
614
|
jpayne@68
|
615 /** Print messages to this stream */
|
jpayne@68
|
616 private static PrintStream outstream=System.err;
|
jpayne@68
|
617
|
jpayne@68
|
618 /** x&clearMasks[i] will clear base i */
|
jpayne@68
|
619 private static final long[] clearMasks;
|
jpayne@68
|
620 /** x|setMasks[i][j] will set base i to j */
|
jpayne@68
|
621 private static final long[][] setMasks;
|
jpayne@68
|
622 /** x&leftMasks[i] will clear all bases to the right of i (exclusive) */
|
jpayne@68
|
623 private static final long[] leftMasks;
|
jpayne@68
|
624 /** x&rightMasks[i] will clear all bases to the left of i (inclusive) */
|
jpayne@68
|
625 private static final long[] rightMasks;
|
jpayne@68
|
626 /** x|kMasks[i] will set the bit to the left of the leftmost base */
|
jpayne@68
|
627 private static final long[] lengthMasks;
|
jpayne@68
|
628
|
jpayne@68
|
629 /*--------------------------------------------------------------*/
|
jpayne@68
|
630 /*---------------- Static Initializers ----------------*/
|
jpayne@68
|
631 /*--------------------------------------------------------------*/
|
jpayne@68
|
632
|
jpayne@68
|
633 static{
|
jpayne@68
|
634 clearMasks=new long[32];
|
jpayne@68
|
635 leftMasks=new long[32];
|
jpayne@68
|
636 rightMasks=new long[32];
|
jpayne@68
|
637 lengthMasks=new long[32];
|
jpayne@68
|
638 setMasks=new long[4][32];
|
jpayne@68
|
639 for(int i=0; i<32; i++){
|
jpayne@68
|
640 clearMasks[i]=~(3L<<(2*i));
|
jpayne@68
|
641 }
|
jpayne@68
|
642 for(int i=0; i<32; i++){
|
jpayne@68
|
643 leftMasks[i]=((-1L)<<(2*i));
|
jpayne@68
|
644 }
|
jpayne@68
|
645 for(int i=0; i<32; i++){
|
jpayne@68
|
646 rightMasks[i]=~((-1L)<<(2*i));
|
jpayne@68
|
647 }
|
jpayne@68
|
648 for(int i=0; i<32; i++){
|
jpayne@68
|
649 lengthMasks[i]=((1L)<<(2*i));
|
jpayne@68
|
650 }
|
jpayne@68
|
651 for(int i=0; i<32; i++){
|
jpayne@68
|
652 for(long j=0; j<4; j++){
|
jpayne@68
|
653 setMasks[(int)j][i]=(j<<(2*i));
|
jpayne@68
|
654 }
|
jpayne@68
|
655 }
|
jpayne@68
|
656 }
|
jpayne@68
|
657
|
jpayne@68
|
658 }
|