Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/TableReader.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package kmer; | |
2 | |
3 import java.io.PrintStream; | |
4 import java.util.BitSet; | |
5 | |
6 import dna.AminoAcid; | |
7 import jgi.Dedupe; | |
8 import shared.PreParser; | |
9 import shared.Shared; | |
10 import shared.Timer; | |
11 import shared.Tools; | |
12 import stream.Read; | |
13 import structures.IntList; | |
14 | |
15 /** | |
16 * @author Brian Bushnell | |
17 * @date Mar 5, 2015 | |
18 * | |
19 */ | |
20 public class TableReader { | |
21 | |
22 /*--------------------------------------------------------------*/ | |
23 /*---------------- Initialization ----------------*/ | |
24 /*--------------------------------------------------------------*/ | |
25 | |
26 /** | |
27 * Code entrance from the command line. | |
28 * @param args Command line arguments | |
29 */ | |
30 public static void main(String[] args){ | |
31 | |
32 {//Preparse block for help, config files, and outstream | |
33 PreParser pp=new PreParser(args, null, false); | |
34 args=pp.args; | |
35 outstream=pp.outstream; | |
36 } | |
37 | |
38 Timer t=new Timer(); | |
39 | |
40 AbstractKmerTable[] tables=TableLoaderLockFree.makeTables(AbstractKmerTable.ARRAY1D, 12, -1L, false, 1.0); | |
41 | |
42 int k=31; | |
43 int mink=0; | |
44 int speed=0; | |
45 int hdist=0; | |
46 int edist=0; | |
47 boolean rcomp=true; | |
48 boolean maskMiddle=false; | |
49 | |
50 //Create a new Loader instance | |
51 TableLoaderLockFree loader=new TableLoaderLockFree(tables, k, mink, speed, hdist, edist, rcomp, maskMiddle); | |
52 loader.setRefSkip(0); | |
53 loader.hammingDistance2=0; | |
54 loader.editDistance2=0; | |
55 loader.storeMode(TableLoaderLockFree.SET_IF_NOT_PRESENT); | |
56 | |
57 ///And run it | |
58 String[] refs=args; | |
59 String[] literals=null; | |
60 boolean keepNames=false; | |
61 boolean useRefNames=false; | |
62 long kmers=loader.processData(refs, literals, keepNames, useRefNames, false); | |
63 t.stop(); | |
64 | |
65 outstream.println("Load Time:\t"+t); | |
66 outstream.println("Return: \t"+kmers); | |
67 outstream.println("refKmers: \t"+loader.refKmers); | |
68 outstream.println("refBases: \t"+loader.refBases); | |
69 outstream.println("refReads: \t"+loader.refReads); | |
70 | |
71 int qskip=0; | |
72 int qhdist=0; | |
73 TableReader tr=new TableReader(k, mink, speed, qskip, qhdist, rcomp, maskMiddle); | |
74 | |
75 //TODO: Stuff... | |
76 | |
77 //Close the print stream if it was redirected | |
78 Shared.closeStream(outstream); | |
79 } | |
80 | |
81 public TableReader(int k_){ | |
82 this(k_, 0, 0, 0, 0, true, false); | |
83 } | |
84 | |
85 public TableReader(int k_, int mink_, int speed_, int qskip_, int qhdist_, boolean rcomp_, boolean maskMiddle_){ | |
86 k=k_; | |
87 k2=k-1; | |
88 mink=mink_; | |
89 rcomp=rcomp_; | |
90 useShortKmers=(mink>0 && mink<k); | |
91 speed=speed_; | |
92 qSkip=qskip_; | |
93 qHammingDistance=qhdist_; | |
94 middleMask=maskMiddle ? ~(3L<<(2*(k/2))) : -1L; | |
95 | |
96 noAccel=(speed<1 && qSkip<2); | |
97 accel=!noAccel; | |
98 } | |
99 | |
100 | |
101 /*--------------------------------------------------------------*/ | |
102 /*---------------- Outer Methods ----------------*/ | |
103 /*--------------------------------------------------------------*/ | |
104 | |
105 | |
106 /** | |
107 * Mask a read to cover matching kmers. | |
108 * @param r Read to process | |
109 * @param sets Kmer tables | |
110 * @return Number of bases masked | |
111 */ | |
112 public final int kMask(final Read r, final AbstractKmerTable[] sets){ | |
113 if(r==null){return 0;} | |
114 if(verbose){outstream.println("KMasking read "+r.id);} | |
115 | |
116 BitSet bs=markBits(r, sets); | |
117 if(verbose){outstream.println("Null bitset.");} | |
118 if(bs==null){return 0;} | |
119 | |
120 final byte[] bases=r.bases, quals=r.quality; | |
121 final int cardinality=bs.cardinality(); | |
122 assert(cardinality>0); | |
123 | |
124 //Replace kmer hit zone with the trim symbol | |
125 for(int i=0; i<bases.length; i++){ | |
126 if(bs.get(i)){ | |
127 if(kmaskLowercase){ | |
128 bases[i]=(byte)Tools.toLowerCase(bases[i]); | |
129 }else{ | |
130 bases[i]=trimSymbol; | |
131 if(quals!=null && trimSymbol=='N'){quals[i]=0;} | |
132 } | |
133 } | |
134 } | |
135 return cardinality; | |
136 } | |
137 | |
138 | |
139 /** | |
140 * Counts the number of kmer hits for a read. | |
141 * @param r Read to process | |
142 * @param sets Kmer tables | |
143 * @return Number of hits | |
144 */ | |
145 public final int countKmerHits(final Read r, final AbstractKmerTable[] sets){ | |
146 if(r==null || r.length()<k){return 0;} | |
147 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){return 0;} | |
148 final byte[] bases=r.bases; | |
149 final int minlen=k-1; | |
150 final int minlen2=(maskMiddle ? k/2 : k); | |
151 final int shift=2*k; | |
152 final int shift2=shift-2; | |
153 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
154 long kmer=0; | |
155 long rkmer=0; | |
156 int found=0; | |
157 int len=0; | |
158 | |
159 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight)); | |
160 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft)); | |
161 | |
162 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
163 for(int i=start; i<stop; i++){ | |
164 byte b=bases[i]; | |
165 long x=AminoAcid.baseToNumber0[b]; | |
166 long x2=AminoAcid.baseToComplementNumber0[b]; | |
167 kmer=((kmer<<2)|x)&mask; | |
168 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
169 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;} | |
170 if(verbose){outstream.println("Scanning6 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
171 if(len>=minlen2 && i>=minlen){ | |
172 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets); | |
173 if(verbose){outstream.println("Testing kmer "+kmer+"; id="+id);} | |
174 if(id>0){ | |
175 if(verbose){outstream.println("Found = "+(found+1)+"/"+minHits);} | |
176 if(found>=minHits){ | |
177 return (found=found+1); //Early exit | |
178 } | |
179 found++; | |
180 } | |
181 } | |
182 } | |
183 | |
184 return found; | |
185 } | |
186 | |
187 /** | |
188 * Returns the id of the sequence with the most kmer matches to this read, or -1 if none are at least minHits. | |
189 * @param r Read to process | |
190 * @param sets Kmer tables | |
191 * @return id of best match | |
192 */ | |
193 public final int findBestMatch(final Read r, final AbstractKmerTable[] sets){ | |
194 idList.size=0; | |
195 if(r==null || r.length()<k){return -1;} | |
196 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){return -1;} | |
197 final byte[] bases=r.bases; | |
198 final int minlen=k-1; | |
199 final int minlen2=(maskMiddle ? k/2 : k); | |
200 final int shift=2*k; | |
201 final int shift2=shift-2; | |
202 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
203 long kmer=0; | |
204 long rkmer=0; | |
205 int len=0; | |
206 int found=0; | |
207 | |
208 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight)); | |
209 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft)); | |
210 | |
211 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */ | |
212 for(int i=start; i<stop; i++){ | |
213 byte b=bases[i]; | |
214 long x=AminoAcid.baseToNumber0[b]; | |
215 long x2=AminoAcid.baseToComplementNumber0[b]; | |
216 kmer=((kmer<<2)|x)&mask; | |
217 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
218 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;} | |
219 if(verbose){outstream.println("Scanning6 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
220 if(len>=minlen2 && i>=minlen){ | |
221 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets); | |
222 if(id>0){ | |
223 countArray[id]++; | |
224 if(countArray[id]==1){idList.add(id);} | |
225 found++; | |
226 if(verbose){outstream.println("Found = "+found+"/"+minHits);} | |
227 } | |
228 } | |
229 } | |
230 | |
231 final int id, max; | |
232 if(found>=minHits){ | |
233 max=condenseLoose(countArray, idList, countList); | |
234 int id0=-1; | |
235 for(int i=0; i<countList.size; i++){ | |
236 if(countList.get(i)==max){ | |
237 id0=idList.get(i); break; | |
238 } | |
239 } | |
240 id=id0; | |
241 }else{ | |
242 max=0; | |
243 id=-1; | |
244 } | |
245 | |
246 return id; | |
247 } | |
248 | |
249 | |
250 /** | |
251 * Mask a read to cover matching kmers. | |
252 * @param r Read to process | |
253 * @param sets Kmer tables | |
254 * @return Number of bases masked | |
255 */ | |
256 public final BitSet markBits(final Read r, final AbstractKmerTable[] sets){ | |
257 if(r==null || r.length()<Tools.max(1, (useShortKmers ? Tools.min(k, mink) : k))){ | |
258 if(verbose){outstream.println("Read too short.");} | |
259 return null; | |
260 } | |
261 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){ | |
262 if(verbose){outstream.println("Skipping read.");} | |
263 return null; | |
264 } | |
265 if(verbose){outstream.println("Marking bitset for read "+r.id);} | |
266 final byte[] bases=r.bases; | |
267 final int minlen=k-1; | |
268 final int minlen2=(maskMiddle ? k/2 : k); | |
269 final int shift=2*k; | |
270 final int shift2=shift-2; | |
271 final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); | |
272 long kmer=0; | |
273 long rkmer=0; | |
274 int found=0; | |
275 int len=0; | |
276 int id0=-1; //ID of first kmer found. | |
277 | |
278 BitSet bs=new BitSet(bases.length+trimPad+1); | |
279 | |
280 final int minus=k-1-trimPad; | |
281 final int plus=trimPad+1; | |
282 | |
283 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight)); | |
284 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft)); | |
285 | |
286 //Scan for normal kmers | |
287 for(int i=start; i<stop; i++){ | |
288 byte b=bases[i]; | |
289 long x=AminoAcid.baseToNumber0[b]; | |
290 long x2=AminoAcid.baseToComplementNumber0[b]; | |
291 kmer=((kmer<<2)|x)&mask; | |
292 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
293 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;} | |
294 if(verbose){outstream.println("Scanning3 i="+i+", kmer="+kmer+", rkmer="+rkmer+", len="+len+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
295 if(len>=minlen2 && i>=minlen){ | |
296 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets); | |
297 if(id>0){ | |
298 if(id0<0){id0=id;} | |
299 if(verbose){ | |
300 outstream.println("a: Found "+kmer); | |
301 outstream.println("Setting "+Tools.max(0, i-minus)+", "+(i+plus)); | |
302 outstream.println("i="+i+", minus="+minus+", plus="+plus+", trimpad="+trimPad+", k="+k); | |
303 } | |
304 bs.set(Tools.max(0, i-minus), i+plus); | |
305 found++; | |
306 } | |
307 } | |
308 } | |
309 | |
310 //If nothing was found, scan for short kmers. | |
311 if(useShortKmers){ | |
312 assert(!maskMiddle && middleMask==-1) : maskMiddle+", "+middleMask+", k="+", mink="+mink; | |
313 | |
314 //Look for short kmers on left side | |
315 { | |
316 kmer=0; | |
317 rkmer=0; | |
318 len=0; | |
319 final int lim=Tools.min(k, stop); | |
320 for(int i=start; i<lim; i++){ | |
321 byte b=bases[i]; | |
322 long x=Dedupe.baseToNumber[b]; | |
323 long x2=Dedupe.baseToComplementNumber[b]; | |
324 kmer=((kmer<<2)|x)&mask; | |
325 rkmer=rkmer|(x2<<(2*len)); | |
326 len++; | |
327 if(verbose){outstream.println("Scanning4 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
328 if(len>=mink){ | |
329 | |
330 if(verbose){ | |
331 outstream.println("Looking for left kmer "+AminoAcid.kmerToString(kmer, len)); | |
332 outstream.println("Looking for left rkmer "+AminoAcid.kmerToString(rkmer, len)); | |
333 } | |
334 final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets); | |
335 if(id>0){ | |
336 if(id0<0){id0=id;} | |
337 if(verbose){ | |
338 outstream.println("b: Found "+kmer); | |
339 outstream.println("Setting "+0+", "+(i+plus)); | |
340 } | |
341 bs.set(0, i+plus); | |
342 found++; | |
343 } | |
344 } | |
345 } | |
346 } | |
347 | |
348 //Look for short kmers on right side | |
349 { | |
350 kmer=0; | |
351 rkmer=0; | |
352 len=0; | |
353 final int lim=Tools.max(-1, stop-k); | |
354 for(int i=stop-1; i>lim; i--){ | |
355 byte b=bases[i]; | |
356 long x=Dedupe.baseToNumber[b]; | |
357 long x2=Dedupe.baseToComplementNumber[b]; | |
358 kmer=kmer|(x<<(2*len)); | |
359 rkmer=((rkmer<<2)|x2)&mask; | |
360 len++; | |
361 if(verbose){outstream.println("Scanning5 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} | |
362 if(len>=mink){ | |
363 if(verbose){ | |
364 outstream.println("Looking for right kmer "+ | |
365 AminoAcid.kmerToString(kmer&~lengthMasks[len], len)+"; value="+toValue(kmer, rkmer, lengthMasks[len])+"; kmask="+lengthMasks[len]); | |
366 } | |
367 final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets); | |
368 if(id>0){ | |
369 if(id0<0){id0=id;} | |
370 if(verbose){ | |
371 outstream.println("c: Found "+kmer); | |
372 outstream.println("Setting "+Tools.max(0, i-trimPad)+", "+bases.length); | |
373 } | |
374 bs.set(Tools.max(0, i-trimPad), bases.length); | |
375 found++; | |
376 } | |
377 } | |
378 } | |
379 } | |
380 } | |
381 | |
382 | |
383 if(verbose){outstream.println("found="+found+", bitset="+bs);} | |
384 | |
385 if(found==0){return null;} | |
386 assert(found>0) : "Overflow in 'found' variable."; | |
387 | |
388 int cardinality=bs.cardinality(); | |
389 assert(cardinality>0); | |
390 | |
391 return bs; | |
392 } | |
393 | |
394 | |
395 /*--------------------------------------------------------------*/ | |
396 /*---------------- Helper Methods ----------------*/ | |
397 /*--------------------------------------------------------------*/ | |
398 /** | |
399 * Transforms a kmer into all canonical values for a given Hamming distance. | |
400 * Returns the related id stored in the tables. | |
401 * @param kmer Forward kmer | |
402 * @param rkmer Reverse kmer | |
403 * @param len kmer length | |
404 * @param qHDist Hamming distance | |
405 * @param qPos Position of kmer in query | |
406 * @param sets Kmer hash tables | |
407 * @return Value stored in table, or -1 | |
408 */ | |
409 public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final int qPos, final AbstractKmerTable[] sets){ | |
410 if(qSkip>1 && (qPos%qSkip!=0)){return -1;} | |
411 return qHDist<1 ? getValue(kmer, rkmer, len, sets) : getValue(kmer, rkmer, len, qHDist, sets); | |
412 } | |
413 | |
414 /** | |
415 * Transforms a kmer into all canonical values for a given Hamming distance. | |
416 * Returns the related id stored in the tables. | |
417 * @param kmer Forward kmer | |
418 * @param rkmer Reverse kmer | |
419 * @param len kmer length | |
420 * @param qHDist Hamming distance | |
421 * @param sets Kmer hash tables | |
422 * @return Value stored in table, or -1 | |
423 */ | |
424 public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final AbstractKmerTable[] sets){ | |
425 int id=getValue(kmer, rkmer, len, sets); | |
426 if(id<1 && qHDist>0){ | |
427 final int qHDistMinusOne=qHDist-1; | |
428 | |
429 //Sub | |
430 for(int j=0; j<4 && id<1; j++){ | |
431 for(int i=0; i<len && id<1; i++){ | |
432 final long temp=(kmer&clearMasks[i])|setMasks[j][i]; | |
433 if(temp!=kmer){ | |
434 long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len); | |
435 id=getValue(temp, rtemp, len, qHDistMinusOne, sets); | |
436 } | |
437 } | |
438 } | |
439 } | |
440 return id; | |
441 } | |
442 | |
443 /** | |
444 * Transforms a kmer into a canonical value stored in the table and search. | |
445 * @param kmer Forward kmer | |
446 * @param rkmer Reverse kmer | |
447 * @param len kmer length | |
448 * @param sets Kmer hash tables | |
449 * @return Value stored in table | |
450 */ | |
451 public final int getValue(final long kmer, final long rkmer, final int len, final AbstractKmerTable[] sets){ | |
452 return getValueWithMask(kmer, rkmer, lengthMasks[len], sets); | |
453 } | |
454 | |
455 /** | |
456 * Transforms a kmer into a canonical value stored in the table and search. | |
457 * @param kmer Forward kmer | |
458 * @param rkmer Reverse kmer | |
459 * @param lengthMask Bitmask with single '1' set to left of kmer | |
460 * @param sets Kmer hash tables | |
461 * @return Value stored in table | |
462 */ | |
463 public final int getValueWithMask(final long kmer, final long rkmer, final long lengthMask, final AbstractKmerTable[] sets){ | |
464 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer; | |
465 | |
466 // final long max=(rcomp ? Tools.max(kmer, rkmer) : kmer); | |
467 // final long key=(max&middleMask)|lengthMask; | |
468 | |
469 final long key=toValue(kmer, rkmer, lengthMask); | |
470 | |
471 if(noAccel || ((key/WAYS)&15)>=speed){ | |
472 if(verbose){outstream.println("Testing key "+key);} | |
473 AbstractKmerTable set=sets[(int)(key%WAYS)]; | |
474 final int id=set.getValue(key); | |
475 return id; | |
476 } | |
477 return -1; | |
478 } | |
479 | |
480 | |
481 /** | |
482 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined. | |
483 * @param kmer Forward kmer | |
484 * @param rkmer Reverse kmer | |
485 * @param lengthMask Bitmask with single '1' set to left of kmer | |
486 * @return Canonical value | |
487 */ | |
488 private final long toValue(long kmer, long rkmer, long lengthMask){ | |
489 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer; | |
490 long value=(rcomp ? Tools.max(kmer, rkmer) : kmer); | |
491 return (value&middleMask)|lengthMask; | |
492 } | |
493 | |
494 /** | |
495 * Pack a list of counts from an array to an IntList. | |
496 * @param loose Counter array | |
497 * @param packed Unique values | |
498 * @param counts Counts of values | |
499 * @return Highest observed count | |
500 */ | |
501 public static int condenseLoose(int[] loose, IntList packed, IntList counts){ | |
502 counts.size=0; | |
503 if(packed.size<1){return 0;} | |
504 | |
505 int max=0; | |
506 for(int i=0; i<packed.size; i++){ | |
507 final int p=packed.get(i); | |
508 final int c=loose[p]; | |
509 counts.add(c); | |
510 loose[p]=0; | |
511 max=Tools.max(max, c); | |
512 } | |
513 return max; | |
514 } | |
515 | |
516 public final int kmerToWay(final long kmer){ | |
517 // final int way=(int)((kmer&coreMask)%WAYS); | |
518 // return way; | |
519 return (int)(kmer%WAYS); | |
520 } | |
521 | |
522 /*--------------------------------------------------------------*/ | |
523 /*---------------- Fields ----------------*/ | |
524 /*--------------------------------------------------------------*/ | |
525 | |
526 /** Has this class encountered errors while processing? */ | |
527 public boolean errorState=false; | |
528 | |
529 /** Make the middle base in a kmer a wildcard to improve sensitivity */ | |
530 public final boolean maskMiddle=false; | |
531 | |
532 /** Search for query kmers with up to this many substitutions */ | |
533 private final int qHammingDistance; | |
534 /** Search for short query kmers with up to this many substitutions */ | |
535 public int qHammingDistance2=-1; | |
536 | |
537 /** Trim this much extra around matched kmers */ | |
538 public int trimPad=0; | |
539 | |
540 /** If positive, only look for kmer matches in the leftmost X bases */ | |
541 public int restrictLeft=0; | |
542 /** If positive, only look for kmer matches the rightmost X bases */ | |
543 public int restrictRight=0; | |
544 | |
545 /** Don't allow a read 'N' to match a reference 'A'. | |
546 * Reduces sensitivity when hdist>0 or edist>0. Default: false. */ | |
547 public boolean forbidNs=false; | |
548 | |
549 /** Replace bases covered by matched kmers with this symbol */ | |
550 public byte trimSymbol='N'; | |
551 | |
552 /** Convert masked bases to lowercase */ | |
553 public boolean kmaskLowercase=false; | |
554 | |
555 /** Don't look for kmers in read 1 */ | |
556 public boolean skipR1=false; | |
557 /** Don't look for kmers in read 2 */ | |
558 public boolean skipR2=false; | |
559 | |
560 /** A read must contain at least this many kmer hits before being considered a match. Default: 1 */ | |
561 public int minHits=1; | |
562 | |
563 /*--------------------------------------------------------------*/ | |
564 /*---------------- Statistics ----------------*/ | |
565 /*--------------------------------------------------------------*/ | |
566 | |
567 // public long storedKmers=0; | |
568 | |
569 /*--------------------------------------------------------------*/ | |
570 /*---------------- Per-Thread Fields ----------------*/ | |
571 /*--------------------------------------------------------------*/ | |
572 | |
573 public int[] countArray; | |
574 | |
575 private final IntList idList=new IntList(); | |
576 private final IntList countList=new IntList(); | |
577 | |
578 /*--------------------------------------------------------------*/ | |
579 /*---------------- Final Primitives ----------------*/ | |
580 /*--------------------------------------------------------------*/ | |
581 | |
582 /** Look for reverse-complements as well as forward kmers. Default: true */ | |
583 private final boolean rcomp; | |
584 /** AND bitmask with 0's at the middle base */ | |
585 private final long middleMask; | |
586 | |
587 /** Normal kmer length */ | |
588 private final int k; | |
589 /** k-1; used in some expressions */ | |
590 private final int k2; | |
591 /** Shortest kmer to use for trimming */ | |
592 private final int mink; | |
593 /** Attempt to match kmers shorter than normal k on read ends when doing kTrimming. */ | |
594 private final boolean useShortKmers; | |
595 | |
596 /** Fraction of kmers to skip, 0 to 15 out of 16 */ | |
597 private final int speed; | |
598 | |
599 /** Skip this many kmers when examining the read. Default 1. | |
600 * 1 means every kmer is used, 2 means every other, etc. */ | |
601 private final int qSkip; | |
602 | |
603 /** noAccel is true if speed and qSkip are disabled, accel is the opposite. */ | |
604 private final boolean noAccel, accel; | |
605 | |
606 /*--------------------------------------------------------------*/ | |
607 /*---------------- Static Fields ----------------*/ | |
608 /*--------------------------------------------------------------*/ | |
609 | |
610 /** Number of tables (and threads, during loading) */ | |
611 private static final int WAYS=7; //123 | |
612 /** Verbose messages */ | |
613 public static final boolean verbose=false; //123 | |
614 | |
615 /** Print messages to this stream */ | |
616 private static PrintStream outstream=System.err; | |
617 | |
618 /** x&clearMasks[i] will clear base i */ | |
619 private static final long[] clearMasks; | |
620 /** x|setMasks[i][j] will set base i to j */ | |
621 private static final long[][] setMasks; | |
622 /** x&leftMasks[i] will clear all bases to the right of i (exclusive) */ | |
623 private static final long[] leftMasks; | |
624 /** x&rightMasks[i] will clear all bases to the left of i (inclusive) */ | |
625 private static final long[] rightMasks; | |
626 /** x|kMasks[i] will set the bit to the left of the leftmost base */ | |
627 private static final long[] lengthMasks; | |
628 | |
629 /*--------------------------------------------------------------*/ | |
630 /*---------------- Static Initializers ----------------*/ | |
631 /*--------------------------------------------------------------*/ | |
632 | |
633 static{ | |
634 clearMasks=new long[32]; | |
635 leftMasks=new long[32]; | |
636 rightMasks=new long[32]; | |
637 lengthMasks=new long[32]; | |
638 setMasks=new long[4][32]; | |
639 for(int i=0; i<32; i++){ | |
640 clearMasks[i]=~(3L<<(2*i)); | |
641 } | |
642 for(int i=0; i<32; i++){ | |
643 leftMasks[i]=((-1L)<<(2*i)); | |
644 } | |
645 for(int i=0; i<32; i++){ | |
646 rightMasks[i]=~((-1L)<<(2*i)); | |
647 } | |
648 for(int i=0; i<32; i++){ | |
649 lengthMasks[i]=((1L)<<(2*i)); | |
650 } | |
651 for(int i=0; i<32; i++){ | |
652 for(long j=0; j<4; j++){ | |
653 setMasks[(int)j][i]=(j<<(2*i)); | |
654 } | |
655 } | |
656 } | |
657 | |
658 } |