annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/TableReader.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package kmer;
jpayne@68 2
jpayne@68 3 import java.io.PrintStream;
jpayne@68 4 import java.util.BitSet;
jpayne@68 5
jpayne@68 6 import dna.AminoAcid;
jpayne@68 7 import jgi.Dedupe;
jpayne@68 8 import shared.PreParser;
jpayne@68 9 import shared.Shared;
jpayne@68 10 import shared.Timer;
jpayne@68 11 import shared.Tools;
jpayne@68 12 import stream.Read;
jpayne@68 13 import structures.IntList;
jpayne@68 14
jpayne@68 15 /**
jpayne@68 16 * @author Brian Bushnell
jpayne@68 17 * @date Mar 5, 2015
jpayne@68 18 *
jpayne@68 19 */
jpayne@68 20 public class TableReader {
jpayne@68 21
jpayne@68 22 /*--------------------------------------------------------------*/
jpayne@68 23 /*---------------- Initialization ----------------*/
jpayne@68 24 /*--------------------------------------------------------------*/
jpayne@68 25
jpayne@68 26 /**
jpayne@68 27 * Code entrance from the command line.
jpayne@68 28 * @param args Command line arguments
jpayne@68 29 */
jpayne@68 30 public static void main(String[] args){
jpayne@68 31
jpayne@68 32 {//Preparse block for help, config files, and outstream
jpayne@68 33 PreParser pp=new PreParser(args, null, false);
jpayne@68 34 args=pp.args;
jpayne@68 35 outstream=pp.outstream;
jpayne@68 36 }
jpayne@68 37
jpayne@68 38 Timer t=new Timer();
jpayne@68 39
jpayne@68 40 AbstractKmerTable[] tables=TableLoaderLockFree.makeTables(AbstractKmerTable.ARRAY1D, 12, -1L, false, 1.0);
jpayne@68 41
jpayne@68 42 int k=31;
jpayne@68 43 int mink=0;
jpayne@68 44 int speed=0;
jpayne@68 45 int hdist=0;
jpayne@68 46 int edist=0;
jpayne@68 47 boolean rcomp=true;
jpayne@68 48 boolean maskMiddle=false;
jpayne@68 49
jpayne@68 50 //Create a new Loader instance
jpayne@68 51 TableLoaderLockFree loader=new TableLoaderLockFree(tables, k, mink, speed, hdist, edist, rcomp, maskMiddle);
jpayne@68 52 loader.setRefSkip(0);
jpayne@68 53 loader.hammingDistance2=0;
jpayne@68 54 loader.editDistance2=0;
jpayne@68 55 loader.storeMode(TableLoaderLockFree.SET_IF_NOT_PRESENT);
jpayne@68 56
jpayne@68 57 ///And run it
jpayne@68 58 String[] refs=args;
jpayne@68 59 String[] literals=null;
jpayne@68 60 boolean keepNames=false;
jpayne@68 61 boolean useRefNames=false;
jpayne@68 62 long kmers=loader.processData(refs, literals, keepNames, useRefNames, false);
jpayne@68 63 t.stop();
jpayne@68 64
jpayne@68 65 outstream.println("Load Time:\t"+t);
jpayne@68 66 outstream.println("Return: \t"+kmers);
jpayne@68 67 outstream.println("refKmers: \t"+loader.refKmers);
jpayne@68 68 outstream.println("refBases: \t"+loader.refBases);
jpayne@68 69 outstream.println("refReads: \t"+loader.refReads);
jpayne@68 70
jpayne@68 71 int qskip=0;
jpayne@68 72 int qhdist=0;
jpayne@68 73 TableReader tr=new TableReader(k, mink, speed, qskip, qhdist, rcomp, maskMiddle);
jpayne@68 74
jpayne@68 75 //TODO: Stuff...
jpayne@68 76
jpayne@68 77 //Close the print stream if it was redirected
jpayne@68 78 Shared.closeStream(outstream);
jpayne@68 79 }
jpayne@68 80
jpayne@68 81 public TableReader(int k_){
jpayne@68 82 this(k_, 0, 0, 0, 0, true, false);
jpayne@68 83 }
jpayne@68 84
jpayne@68 85 public TableReader(int k_, int mink_, int speed_, int qskip_, int qhdist_, boolean rcomp_, boolean maskMiddle_){
jpayne@68 86 k=k_;
jpayne@68 87 k2=k-1;
jpayne@68 88 mink=mink_;
jpayne@68 89 rcomp=rcomp_;
jpayne@68 90 useShortKmers=(mink>0 && mink<k);
jpayne@68 91 speed=speed_;
jpayne@68 92 qSkip=qskip_;
jpayne@68 93 qHammingDistance=qhdist_;
jpayne@68 94 middleMask=maskMiddle ? ~(3L<<(2*(k/2))) : -1L;
jpayne@68 95
jpayne@68 96 noAccel=(speed<1 && qSkip<2);
jpayne@68 97 accel=!noAccel;
jpayne@68 98 }
jpayne@68 99
jpayne@68 100
jpayne@68 101 /*--------------------------------------------------------------*/
jpayne@68 102 /*---------------- Outer Methods ----------------*/
jpayne@68 103 /*--------------------------------------------------------------*/
jpayne@68 104
jpayne@68 105
jpayne@68 106 /**
jpayne@68 107 * Mask a read to cover matching kmers.
jpayne@68 108 * @param r Read to process
jpayne@68 109 * @param sets Kmer tables
jpayne@68 110 * @return Number of bases masked
jpayne@68 111 */
jpayne@68 112 public final int kMask(final Read r, final AbstractKmerTable[] sets){
jpayne@68 113 if(r==null){return 0;}
jpayne@68 114 if(verbose){outstream.println("KMasking read "+r.id);}
jpayne@68 115
jpayne@68 116 BitSet bs=markBits(r, sets);
jpayne@68 117 if(verbose){outstream.println("Null bitset.");}
jpayne@68 118 if(bs==null){return 0;}
jpayne@68 119
jpayne@68 120 final byte[] bases=r.bases, quals=r.quality;
jpayne@68 121 final int cardinality=bs.cardinality();
jpayne@68 122 assert(cardinality>0);
jpayne@68 123
jpayne@68 124 //Replace kmer hit zone with the trim symbol
jpayne@68 125 for(int i=0; i<bases.length; i++){
jpayne@68 126 if(bs.get(i)){
jpayne@68 127 if(kmaskLowercase){
jpayne@68 128 bases[i]=(byte)Tools.toLowerCase(bases[i]);
jpayne@68 129 }else{
jpayne@68 130 bases[i]=trimSymbol;
jpayne@68 131 if(quals!=null && trimSymbol=='N'){quals[i]=0;}
jpayne@68 132 }
jpayne@68 133 }
jpayne@68 134 }
jpayne@68 135 return cardinality;
jpayne@68 136 }
jpayne@68 137
jpayne@68 138
jpayne@68 139 /**
jpayne@68 140 * Counts the number of kmer hits for a read.
jpayne@68 141 * @param r Read to process
jpayne@68 142 * @param sets Kmer tables
jpayne@68 143 * @return Number of hits
jpayne@68 144 */
jpayne@68 145 public final int countKmerHits(final Read r, final AbstractKmerTable[] sets){
jpayne@68 146 if(r==null || r.length()<k){return 0;}
jpayne@68 147 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){return 0;}
jpayne@68 148 final byte[] bases=r.bases;
jpayne@68 149 final int minlen=k-1;
jpayne@68 150 final int minlen2=(maskMiddle ? k/2 : k);
jpayne@68 151 final int shift=2*k;
jpayne@68 152 final int shift2=shift-2;
jpayne@68 153 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 154 long kmer=0;
jpayne@68 155 long rkmer=0;
jpayne@68 156 int found=0;
jpayne@68 157 int len=0;
jpayne@68 158
jpayne@68 159 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight));
jpayne@68 160 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft));
jpayne@68 161
jpayne@68 162 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 163 for(int i=start; i<stop; i++){
jpayne@68 164 byte b=bases[i];
jpayne@68 165 long x=AminoAcid.baseToNumber0[b];
jpayne@68 166 long x2=AminoAcid.baseToComplementNumber0[b];
jpayne@68 167 kmer=((kmer<<2)|x)&mask;
jpayne@68 168 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 169 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;}
jpayne@68 170 if(verbose){outstream.println("Scanning6 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 171 if(len>=minlen2 && i>=minlen){
jpayne@68 172 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets);
jpayne@68 173 if(verbose){outstream.println("Testing kmer "+kmer+"; id="+id);}
jpayne@68 174 if(id>0){
jpayne@68 175 if(verbose){outstream.println("Found = "+(found+1)+"/"+minHits);}
jpayne@68 176 if(found>=minHits){
jpayne@68 177 return (found=found+1); //Early exit
jpayne@68 178 }
jpayne@68 179 found++;
jpayne@68 180 }
jpayne@68 181 }
jpayne@68 182 }
jpayne@68 183
jpayne@68 184 return found;
jpayne@68 185 }
jpayne@68 186
jpayne@68 187 /**
jpayne@68 188 * Returns the id of the sequence with the most kmer matches to this read, or -1 if none are at least minHits.
jpayne@68 189 * @param r Read to process
jpayne@68 190 * @param sets Kmer tables
jpayne@68 191 * @return id of best match
jpayne@68 192 */
jpayne@68 193 public final int findBestMatch(final Read r, final AbstractKmerTable[] sets){
jpayne@68 194 idList.size=0;
jpayne@68 195 if(r==null || r.length()<k){return -1;}
jpayne@68 196 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){return -1;}
jpayne@68 197 final byte[] bases=r.bases;
jpayne@68 198 final int minlen=k-1;
jpayne@68 199 final int minlen2=(maskMiddle ? k/2 : k);
jpayne@68 200 final int shift=2*k;
jpayne@68 201 final int shift2=shift-2;
jpayne@68 202 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 203 long kmer=0;
jpayne@68 204 long rkmer=0;
jpayne@68 205 int len=0;
jpayne@68 206 int found=0;
jpayne@68 207
jpayne@68 208 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight));
jpayne@68 209 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft));
jpayne@68 210
jpayne@68 211 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 212 for(int i=start; i<stop; i++){
jpayne@68 213 byte b=bases[i];
jpayne@68 214 long x=AminoAcid.baseToNumber0[b];
jpayne@68 215 long x2=AminoAcid.baseToComplementNumber0[b];
jpayne@68 216 kmer=((kmer<<2)|x)&mask;
jpayne@68 217 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 218 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;}
jpayne@68 219 if(verbose){outstream.println("Scanning6 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 220 if(len>=minlen2 && i>=minlen){
jpayne@68 221 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets);
jpayne@68 222 if(id>0){
jpayne@68 223 countArray[id]++;
jpayne@68 224 if(countArray[id]==1){idList.add(id);}
jpayne@68 225 found++;
jpayne@68 226 if(verbose){outstream.println("Found = "+found+"/"+minHits);}
jpayne@68 227 }
jpayne@68 228 }
jpayne@68 229 }
jpayne@68 230
jpayne@68 231 final int id, max;
jpayne@68 232 if(found>=minHits){
jpayne@68 233 max=condenseLoose(countArray, idList, countList);
jpayne@68 234 int id0=-1;
jpayne@68 235 for(int i=0; i<countList.size; i++){
jpayne@68 236 if(countList.get(i)==max){
jpayne@68 237 id0=idList.get(i); break;
jpayne@68 238 }
jpayne@68 239 }
jpayne@68 240 id=id0;
jpayne@68 241 }else{
jpayne@68 242 max=0;
jpayne@68 243 id=-1;
jpayne@68 244 }
jpayne@68 245
jpayne@68 246 return id;
jpayne@68 247 }
jpayne@68 248
jpayne@68 249
jpayne@68 250 /**
jpayne@68 251 * Mask a read to cover matching kmers.
jpayne@68 252 * @param r Read to process
jpayne@68 253 * @param sets Kmer tables
jpayne@68 254 * @return Number of bases masked
jpayne@68 255 */
jpayne@68 256 public final BitSet markBits(final Read r, final AbstractKmerTable[] sets){
jpayne@68 257 if(r==null || r.length()<Tools.max(1, (useShortKmers ? Tools.min(k, mink) : k))){
jpayne@68 258 if(verbose){outstream.println("Read too short.");}
jpayne@68 259 return null;
jpayne@68 260 }
jpayne@68 261 if((skipR1 && r.pairnum()==0) || (skipR2 && r.pairnum()==1)){
jpayne@68 262 if(verbose){outstream.println("Skipping read.");}
jpayne@68 263 return null;
jpayne@68 264 }
jpayne@68 265 if(verbose){outstream.println("Marking bitset for read "+r.id);}
jpayne@68 266 final byte[] bases=r.bases;
jpayne@68 267 final int minlen=k-1;
jpayne@68 268 final int minlen2=(maskMiddle ? k/2 : k);
jpayne@68 269 final int shift=2*k;
jpayne@68 270 final int shift2=shift-2;
jpayne@68 271 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 272 long kmer=0;
jpayne@68 273 long rkmer=0;
jpayne@68 274 int found=0;
jpayne@68 275 int len=0;
jpayne@68 276 int id0=-1; //ID of first kmer found.
jpayne@68 277
jpayne@68 278 BitSet bs=new BitSet(bases.length+trimPad+1);
jpayne@68 279
jpayne@68 280 final int minus=k-1-trimPad;
jpayne@68 281 final int plus=trimPad+1;
jpayne@68 282
jpayne@68 283 final int start=(restrictRight<1 ? 0 : Tools.max(0, bases.length-restrictRight));
jpayne@68 284 final int stop=(restrictLeft<1 ? bases.length : Tools.min(bases.length, restrictLeft));
jpayne@68 285
jpayne@68 286 //Scan for normal kmers
jpayne@68 287 for(int i=start; i<stop; i++){
jpayne@68 288 byte b=bases[i];
jpayne@68 289 long x=AminoAcid.baseToNumber0[b];
jpayne@68 290 long x2=AminoAcid.baseToComplementNumber0[b];
jpayne@68 291 kmer=((kmer<<2)|x)&mask;
jpayne@68 292 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 293 if(b=='N' && forbidNs){len=0; rkmer=0;}else{len++;}
jpayne@68 294 if(verbose){outstream.println("Scanning3 i="+i+", kmer="+kmer+", rkmer="+rkmer+", len="+len+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 295 if(len>=minlen2 && i>=minlen){
jpayne@68 296 final int id=getValue(kmer, rkmer, k, qHammingDistance, i, sets);
jpayne@68 297 if(id>0){
jpayne@68 298 if(id0<0){id0=id;}
jpayne@68 299 if(verbose){
jpayne@68 300 outstream.println("a: Found "+kmer);
jpayne@68 301 outstream.println("Setting "+Tools.max(0, i-minus)+", "+(i+plus));
jpayne@68 302 outstream.println("i="+i+", minus="+minus+", plus="+plus+", trimpad="+trimPad+", k="+k);
jpayne@68 303 }
jpayne@68 304 bs.set(Tools.max(0, i-minus), i+plus);
jpayne@68 305 found++;
jpayne@68 306 }
jpayne@68 307 }
jpayne@68 308 }
jpayne@68 309
jpayne@68 310 //If nothing was found, scan for short kmers.
jpayne@68 311 if(useShortKmers){
jpayne@68 312 assert(!maskMiddle && middleMask==-1) : maskMiddle+", "+middleMask+", k="+", mink="+mink;
jpayne@68 313
jpayne@68 314 //Look for short kmers on left side
jpayne@68 315 {
jpayne@68 316 kmer=0;
jpayne@68 317 rkmer=0;
jpayne@68 318 len=0;
jpayne@68 319 final int lim=Tools.min(k, stop);
jpayne@68 320 for(int i=start; i<lim; i++){
jpayne@68 321 byte b=bases[i];
jpayne@68 322 long x=Dedupe.baseToNumber[b];
jpayne@68 323 long x2=Dedupe.baseToComplementNumber[b];
jpayne@68 324 kmer=((kmer<<2)|x)&mask;
jpayne@68 325 rkmer=rkmer|(x2<<(2*len));
jpayne@68 326 len++;
jpayne@68 327 if(verbose){outstream.println("Scanning4 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 328 if(len>=mink){
jpayne@68 329
jpayne@68 330 if(verbose){
jpayne@68 331 outstream.println("Looking for left kmer "+AminoAcid.kmerToString(kmer, len));
jpayne@68 332 outstream.println("Looking for left rkmer "+AminoAcid.kmerToString(rkmer, len));
jpayne@68 333 }
jpayne@68 334 final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets);
jpayne@68 335 if(id>0){
jpayne@68 336 if(id0<0){id0=id;}
jpayne@68 337 if(verbose){
jpayne@68 338 outstream.println("b: Found "+kmer);
jpayne@68 339 outstream.println("Setting "+0+", "+(i+plus));
jpayne@68 340 }
jpayne@68 341 bs.set(0, i+plus);
jpayne@68 342 found++;
jpayne@68 343 }
jpayne@68 344 }
jpayne@68 345 }
jpayne@68 346 }
jpayne@68 347
jpayne@68 348 //Look for short kmers on right side
jpayne@68 349 {
jpayne@68 350 kmer=0;
jpayne@68 351 rkmer=0;
jpayne@68 352 len=0;
jpayne@68 353 final int lim=Tools.max(-1, stop-k);
jpayne@68 354 for(int i=stop-1; i>lim; i--){
jpayne@68 355 byte b=bases[i];
jpayne@68 356 long x=Dedupe.baseToNumber[b];
jpayne@68 357 long x2=Dedupe.baseToComplementNumber[b];
jpayne@68 358 kmer=kmer|(x<<(2*len));
jpayne@68 359 rkmer=((rkmer<<2)|x2)&mask;
jpayne@68 360 len++;
jpayne@68 361 if(verbose){outstream.println("Scanning5 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 362 if(len>=mink){
jpayne@68 363 if(verbose){
jpayne@68 364 outstream.println("Looking for right kmer "+
jpayne@68 365 AminoAcid.kmerToString(kmer&~lengthMasks[len], len)+"; value="+toValue(kmer, rkmer, lengthMasks[len])+"; kmask="+lengthMasks[len]);
jpayne@68 366 }
jpayne@68 367 final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets);
jpayne@68 368 if(id>0){
jpayne@68 369 if(id0<0){id0=id;}
jpayne@68 370 if(verbose){
jpayne@68 371 outstream.println("c: Found "+kmer);
jpayne@68 372 outstream.println("Setting "+Tools.max(0, i-trimPad)+", "+bases.length);
jpayne@68 373 }
jpayne@68 374 bs.set(Tools.max(0, i-trimPad), bases.length);
jpayne@68 375 found++;
jpayne@68 376 }
jpayne@68 377 }
jpayne@68 378 }
jpayne@68 379 }
jpayne@68 380 }
jpayne@68 381
jpayne@68 382
jpayne@68 383 if(verbose){outstream.println("found="+found+", bitset="+bs);}
jpayne@68 384
jpayne@68 385 if(found==0){return null;}
jpayne@68 386 assert(found>0) : "Overflow in 'found' variable.";
jpayne@68 387
jpayne@68 388 int cardinality=bs.cardinality();
jpayne@68 389 assert(cardinality>0);
jpayne@68 390
jpayne@68 391 return bs;
jpayne@68 392 }
jpayne@68 393
jpayne@68 394
jpayne@68 395 /*--------------------------------------------------------------*/
jpayne@68 396 /*---------------- Helper Methods ----------------*/
jpayne@68 397 /*--------------------------------------------------------------*/
jpayne@68 398 /**
jpayne@68 399 * Transforms a kmer into all canonical values for a given Hamming distance.
jpayne@68 400 * Returns the related id stored in the tables.
jpayne@68 401 * @param kmer Forward kmer
jpayne@68 402 * @param rkmer Reverse kmer
jpayne@68 403 * @param len kmer length
jpayne@68 404 * @param qHDist Hamming distance
jpayne@68 405 * @param qPos Position of kmer in query
jpayne@68 406 * @param sets Kmer hash tables
jpayne@68 407 * @return Value stored in table, or -1
jpayne@68 408 */
jpayne@68 409 public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final int qPos, final AbstractKmerTable[] sets){
jpayne@68 410 if(qSkip>1 && (qPos%qSkip!=0)){return -1;}
jpayne@68 411 return qHDist<1 ? getValue(kmer, rkmer, len, sets) : getValue(kmer, rkmer, len, qHDist, sets);
jpayne@68 412 }
jpayne@68 413
jpayne@68 414 /**
jpayne@68 415 * Transforms a kmer into all canonical values for a given Hamming distance.
jpayne@68 416 * Returns the related id stored in the tables.
jpayne@68 417 * @param kmer Forward kmer
jpayne@68 418 * @param rkmer Reverse kmer
jpayne@68 419 * @param len kmer length
jpayne@68 420 * @param qHDist Hamming distance
jpayne@68 421 * @param sets Kmer hash tables
jpayne@68 422 * @return Value stored in table, or -1
jpayne@68 423 */
jpayne@68 424 public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final AbstractKmerTable[] sets){
jpayne@68 425 int id=getValue(kmer, rkmer, len, sets);
jpayne@68 426 if(id<1 && qHDist>0){
jpayne@68 427 final int qHDistMinusOne=qHDist-1;
jpayne@68 428
jpayne@68 429 //Sub
jpayne@68 430 for(int j=0; j<4 && id<1; j++){
jpayne@68 431 for(int i=0; i<len && id<1; i++){
jpayne@68 432 final long temp=(kmer&clearMasks[i])|setMasks[j][i];
jpayne@68 433 if(temp!=kmer){
jpayne@68 434 long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
jpayne@68 435 id=getValue(temp, rtemp, len, qHDistMinusOne, sets);
jpayne@68 436 }
jpayne@68 437 }
jpayne@68 438 }
jpayne@68 439 }
jpayne@68 440 return id;
jpayne@68 441 }
jpayne@68 442
jpayne@68 443 /**
jpayne@68 444 * Transforms a kmer into a canonical value stored in the table and search.
jpayne@68 445 * @param kmer Forward kmer
jpayne@68 446 * @param rkmer Reverse kmer
jpayne@68 447 * @param len kmer length
jpayne@68 448 * @param sets Kmer hash tables
jpayne@68 449 * @return Value stored in table
jpayne@68 450 */
jpayne@68 451 public final int getValue(final long kmer, final long rkmer, final int len, final AbstractKmerTable[] sets){
jpayne@68 452 return getValueWithMask(kmer, rkmer, lengthMasks[len], sets);
jpayne@68 453 }
jpayne@68 454
jpayne@68 455 /**
jpayne@68 456 * Transforms a kmer into a canonical value stored in the table and search.
jpayne@68 457 * @param kmer Forward kmer
jpayne@68 458 * @param rkmer Reverse kmer
jpayne@68 459 * @param lengthMask Bitmask with single '1' set to left of kmer
jpayne@68 460 * @param sets Kmer hash tables
jpayne@68 461 * @return Value stored in table
jpayne@68 462 */
jpayne@68 463 public final int getValueWithMask(final long kmer, final long rkmer, final long lengthMask, final AbstractKmerTable[] sets){
jpayne@68 464 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer;
jpayne@68 465
jpayne@68 466 // final long max=(rcomp ? Tools.max(kmer, rkmer) : kmer);
jpayne@68 467 // final long key=(max&middleMask)|lengthMask;
jpayne@68 468
jpayne@68 469 final long key=toValue(kmer, rkmer, lengthMask);
jpayne@68 470
jpayne@68 471 if(noAccel || ((key/WAYS)&15)>=speed){
jpayne@68 472 if(verbose){outstream.println("Testing key "+key);}
jpayne@68 473 AbstractKmerTable set=sets[(int)(key%WAYS)];
jpayne@68 474 final int id=set.getValue(key);
jpayne@68 475 return id;
jpayne@68 476 }
jpayne@68 477 return -1;
jpayne@68 478 }
jpayne@68 479
jpayne@68 480
jpayne@68 481 /**
jpayne@68 482 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined.
jpayne@68 483 * @param kmer Forward kmer
jpayne@68 484 * @param rkmer Reverse kmer
jpayne@68 485 * @param lengthMask Bitmask with single '1' set to left of kmer
jpayne@68 486 * @return Canonical value
jpayne@68 487 */
jpayne@68 488 private final long toValue(long kmer, long rkmer, long lengthMask){
jpayne@68 489 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer;
jpayne@68 490 long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
jpayne@68 491 return (value&middleMask)|lengthMask;
jpayne@68 492 }
jpayne@68 493
jpayne@68 494 /**
jpayne@68 495 * Pack a list of counts from an array to an IntList.
jpayne@68 496 * @param loose Counter array
jpayne@68 497 * @param packed Unique values
jpayne@68 498 * @param counts Counts of values
jpayne@68 499 * @return Highest observed count
jpayne@68 500 */
jpayne@68 501 public static int condenseLoose(int[] loose, IntList packed, IntList counts){
jpayne@68 502 counts.size=0;
jpayne@68 503 if(packed.size<1){return 0;}
jpayne@68 504
jpayne@68 505 int max=0;
jpayne@68 506 for(int i=0; i<packed.size; i++){
jpayne@68 507 final int p=packed.get(i);
jpayne@68 508 final int c=loose[p];
jpayne@68 509 counts.add(c);
jpayne@68 510 loose[p]=0;
jpayne@68 511 max=Tools.max(max, c);
jpayne@68 512 }
jpayne@68 513 return max;
jpayne@68 514 }
jpayne@68 515
jpayne@68 516 public final int kmerToWay(final long kmer){
jpayne@68 517 // final int way=(int)((kmer&coreMask)%WAYS);
jpayne@68 518 // return way;
jpayne@68 519 return (int)(kmer%WAYS);
jpayne@68 520 }
jpayne@68 521
jpayne@68 522 /*--------------------------------------------------------------*/
jpayne@68 523 /*---------------- Fields ----------------*/
jpayne@68 524 /*--------------------------------------------------------------*/
jpayne@68 525
jpayne@68 526 /** Has this class encountered errors while processing? */
jpayne@68 527 public boolean errorState=false;
jpayne@68 528
jpayne@68 529 /** Make the middle base in a kmer a wildcard to improve sensitivity */
jpayne@68 530 public final boolean maskMiddle=false;
jpayne@68 531
jpayne@68 532 /** Search for query kmers with up to this many substitutions */
jpayne@68 533 private final int qHammingDistance;
jpayne@68 534 /** Search for short query kmers with up to this many substitutions */
jpayne@68 535 public int qHammingDistance2=-1;
jpayne@68 536
jpayne@68 537 /** Trim this much extra around matched kmers */
jpayne@68 538 public int trimPad=0;
jpayne@68 539
jpayne@68 540 /** If positive, only look for kmer matches in the leftmost X bases */
jpayne@68 541 public int restrictLeft=0;
jpayne@68 542 /** If positive, only look for kmer matches the rightmost X bases */
jpayne@68 543 public int restrictRight=0;
jpayne@68 544
jpayne@68 545 /** Don't allow a read 'N' to match a reference 'A'.
jpayne@68 546 * Reduces sensitivity when hdist>0 or edist>0. Default: false. */
jpayne@68 547 public boolean forbidNs=false;
jpayne@68 548
jpayne@68 549 /** Replace bases covered by matched kmers with this symbol */
jpayne@68 550 public byte trimSymbol='N';
jpayne@68 551
jpayne@68 552 /** Convert masked bases to lowercase */
jpayne@68 553 public boolean kmaskLowercase=false;
jpayne@68 554
jpayne@68 555 /** Don't look for kmers in read 1 */
jpayne@68 556 public boolean skipR1=false;
jpayne@68 557 /** Don't look for kmers in read 2 */
jpayne@68 558 public boolean skipR2=false;
jpayne@68 559
jpayne@68 560 /** A read must contain at least this many kmer hits before being considered a match. Default: 1 */
jpayne@68 561 public int minHits=1;
jpayne@68 562
jpayne@68 563 /*--------------------------------------------------------------*/
jpayne@68 564 /*---------------- Statistics ----------------*/
jpayne@68 565 /*--------------------------------------------------------------*/
jpayne@68 566
jpayne@68 567 // public long storedKmers=0;
jpayne@68 568
jpayne@68 569 /*--------------------------------------------------------------*/
jpayne@68 570 /*---------------- Per-Thread Fields ----------------*/
jpayne@68 571 /*--------------------------------------------------------------*/
jpayne@68 572
jpayne@68 573 public int[] countArray;
jpayne@68 574
jpayne@68 575 private final IntList idList=new IntList();
jpayne@68 576 private final IntList countList=new IntList();
jpayne@68 577
jpayne@68 578 /*--------------------------------------------------------------*/
jpayne@68 579 /*---------------- Final Primitives ----------------*/
jpayne@68 580 /*--------------------------------------------------------------*/
jpayne@68 581
jpayne@68 582 /** Look for reverse-complements as well as forward kmers. Default: true */
jpayne@68 583 private final boolean rcomp;
jpayne@68 584 /** AND bitmask with 0's at the middle base */
jpayne@68 585 private final long middleMask;
jpayne@68 586
jpayne@68 587 /** Normal kmer length */
jpayne@68 588 private final int k;
jpayne@68 589 /** k-1; used in some expressions */
jpayne@68 590 private final int k2;
jpayne@68 591 /** Shortest kmer to use for trimming */
jpayne@68 592 private final int mink;
jpayne@68 593 /** Attempt to match kmers shorter than normal k on read ends when doing kTrimming. */
jpayne@68 594 private final boolean useShortKmers;
jpayne@68 595
jpayne@68 596 /** Fraction of kmers to skip, 0 to 15 out of 16 */
jpayne@68 597 private final int speed;
jpayne@68 598
jpayne@68 599 /** Skip this many kmers when examining the read. Default 1.
jpayne@68 600 * 1 means every kmer is used, 2 means every other, etc. */
jpayne@68 601 private final int qSkip;
jpayne@68 602
jpayne@68 603 /** noAccel is true if speed and qSkip are disabled, accel is the opposite. */
jpayne@68 604 private final boolean noAccel, accel;
jpayne@68 605
jpayne@68 606 /*--------------------------------------------------------------*/
jpayne@68 607 /*---------------- Static Fields ----------------*/
jpayne@68 608 /*--------------------------------------------------------------*/
jpayne@68 609
jpayne@68 610 /** Number of tables (and threads, during loading) */
jpayne@68 611 private static final int WAYS=7; //123
jpayne@68 612 /** Verbose messages */
jpayne@68 613 public static final boolean verbose=false; //123
jpayne@68 614
jpayne@68 615 /** Print messages to this stream */
jpayne@68 616 private static PrintStream outstream=System.err;
jpayne@68 617
jpayne@68 618 /** x&clearMasks[i] will clear base i */
jpayne@68 619 private static final long[] clearMasks;
jpayne@68 620 /** x|setMasks[i][j] will set base i to j */
jpayne@68 621 private static final long[][] setMasks;
jpayne@68 622 /** x&leftMasks[i] will clear all bases to the right of i (exclusive) */
jpayne@68 623 private static final long[] leftMasks;
jpayne@68 624 /** x&rightMasks[i] will clear all bases to the left of i (inclusive) */
jpayne@68 625 private static final long[] rightMasks;
jpayne@68 626 /** x|kMasks[i] will set the bit to the left of the leftmost base */
jpayne@68 627 private static final long[] lengthMasks;
jpayne@68 628
jpayne@68 629 /*--------------------------------------------------------------*/
jpayne@68 630 /*---------------- Static Initializers ----------------*/
jpayne@68 631 /*--------------------------------------------------------------*/
jpayne@68 632
jpayne@68 633 static{
jpayne@68 634 clearMasks=new long[32];
jpayne@68 635 leftMasks=new long[32];
jpayne@68 636 rightMasks=new long[32];
jpayne@68 637 lengthMasks=new long[32];
jpayne@68 638 setMasks=new long[4][32];
jpayne@68 639 for(int i=0; i<32; i++){
jpayne@68 640 clearMasks[i]=~(3L<<(2*i));
jpayne@68 641 }
jpayne@68 642 for(int i=0; i<32; i++){
jpayne@68 643 leftMasks[i]=((-1L)<<(2*i));
jpayne@68 644 }
jpayne@68 645 for(int i=0; i<32; i++){
jpayne@68 646 rightMasks[i]=~((-1L)<<(2*i));
jpayne@68 647 }
jpayne@68 648 for(int i=0; i<32; i++){
jpayne@68 649 lengthMasks[i]=((1L)<<(2*i));
jpayne@68 650 }
jpayne@68 651 for(int i=0; i<32; i++){
jpayne@68 652 for(long j=0; j<4; j++){
jpayne@68 653 setMasks[(int)j][i]=(j<<(2*i));
jpayne@68 654 }
jpayne@68 655 }
jpayne@68 656 }
jpayne@68 657
jpayne@68 658 }