annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/KmerTableSet.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package kmer;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.util.ArrayList;
jpayne@68 5 import java.util.Arrays;
jpayne@68 6 import java.util.BitSet;
jpayne@68 7 import java.util.concurrent.atomic.AtomicLong;
jpayne@68 8
jpayne@68 9 import assemble.Contig;
jpayne@68 10 import bloom.KmerCountAbstract;
jpayne@68 11 import dna.AminoAcid;
jpayne@68 12 import fileIO.ByteStreamWriter;
jpayne@68 13 import fileIO.FileFormat;
jpayne@68 14 import fileIO.ReadWrite;
jpayne@68 15 import jgi.BBMerge;
jpayne@68 16 import shared.Parse;
jpayne@68 17 import shared.Parser;
jpayne@68 18 import shared.PreParser;
jpayne@68 19 import shared.Primes;
jpayne@68 20 import shared.ReadStats;
jpayne@68 21 import shared.Shared;
jpayne@68 22 import shared.Timer;
jpayne@68 23 import shared.Tools;
jpayne@68 24 import shared.TrimRead;
jpayne@68 25 import stream.ConcurrentReadInputStream;
jpayne@68 26 import stream.FastaReadInputStream;
jpayne@68 27 import stream.Read;
jpayne@68 28 import structures.ByteBuilder;
jpayne@68 29 import structures.IntList;
jpayne@68 30 import structures.ListNum;
jpayne@68 31 import structures.LongList;
jpayne@68 32 import ukmer.Kmer;
jpayne@68 33
jpayne@68 34
jpayne@68 35 /**
jpayne@68 36 * Loads and holds kmers for Tadpole
jpayne@68 37 * @author Brian Bushnell
jpayne@68 38 * @date Jun 22, 2015
jpayne@68 39 *
jpayne@68 40 */
jpayne@68 41 public class KmerTableSet extends AbstractKmerTableSet {
jpayne@68 42
jpayne@68 43 /**
jpayne@68 44 * Code entrance from the command line.
jpayne@68 45 * @param args Command line arguments
jpayne@68 46 */
jpayne@68 47 public static void main(String[] args){
jpayne@68 48 Timer t=new Timer(), t2=new Timer();
jpayne@68 49 t.start();
jpayne@68 50 t2.start();
jpayne@68 51 KmerTableSet set=new KmerTableSet(args);
jpayne@68 52 t2.stop();
jpayne@68 53 outstream.println("Initialization Time: \t"+t2);
jpayne@68 54
jpayne@68 55 ///And run it
jpayne@68 56 set.process(t);
jpayne@68 57
jpayne@68 58 //Close the print stream if it was redirected
jpayne@68 59 Shared.closeStream(outstream);
jpayne@68 60 }
jpayne@68 61
jpayne@68 62
jpayne@68 63 /**
jpayne@68 64 * Constructor.
jpayne@68 65 * @param args Command line arguments
jpayne@68 66 */
jpayne@68 67 private KmerTableSet(String[] args){
jpayne@68 68 this(args, 12);//+5 if using ownership and building contigs
jpayne@68 69 }
jpayne@68 70
jpayne@68 71
jpayne@68 72 /**
jpayne@68 73 * Constructor.
jpayne@68 74 * @param args Command line arguments
jpayne@68 75 */
jpayne@68 76 public KmerTableSet(String[] args, final int bytesPerKmer_){
jpayne@68 77 {//Preparse block for help, config files, and outstream
jpayne@68 78 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 79 args=pp.args;
jpayne@68 80 outstream=pp.outstream;
jpayne@68 81 }//TODO - no easy way to close outstream
jpayne@68 82
jpayne@68 83 /* Initialize local variables with defaults */
jpayne@68 84 Parser parser=new Parser();
jpayne@68 85 boolean prealloc_=false;
jpayne@68 86 int k_=31;
jpayne@68 87 int ways_=-1;
jpayne@68 88 int filterMax_=2;
jpayne@68 89 boolean ecco_=false, merge_=false;
jpayne@68 90 boolean rcomp_=true;
jpayne@68 91 double minProb_=defaultMinprob;
jpayne@68 92 int tableType_=AbstractKmerTable.ARRAY1D;
jpayne@68 93 /* Parse arguments */
jpayne@68 94 for(int i=0; i<args.length; i++){
jpayne@68 95
jpayne@68 96 final String arg=args[i];
jpayne@68 97 String[] split=arg.split("=");
jpayne@68 98 String a=split[0].toLowerCase();
jpayne@68 99 String b=split.length>1 ? split[1] : null;
jpayne@68 100
jpayne@68 101 if(Parser.parseCommonStatic(arg, a, b)){
jpayne@68 102 //do nothing
jpayne@68 103 }else if(Parser.parseZip(arg, a, b)){
jpayne@68 104 //do nothing
jpayne@68 105 }else if(Parser.parseQuality(arg, a, b)){
jpayne@68 106 //do nothing
jpayne@68 107 }else if(Parser.parseFasta(arg, a, b)){
jpayne@68 108 //do nothing
jpayne@68 109 }else if(parser.parseInterleaved(arg, a, b)){
jpayne@68 110 //do nothing
jpayne@68 111 }else if(parser.parseTrim(arg, a, b)){
jpayne@68 112 //do nothing
jpayne@68 113 }else if(a.equals("in") || a.equals("in1")){
jpayne@68 114 in1.clear();
jpayne@68 115 if(b!=null){
jpayne@68 116 String[] s=b.split(",");
jpayne@68 117 for(String ss : s){
jpayne@68 118 in1.add(ss);
jpayne@68 119 }
jpayne@68 120 }
jpayne@68 121 }else if(a.equals("in2")){
jpayne@68 122 in2.clear();
jpayne@68 123 if(b!=null){
jpayne@68 124 String[] s=b.split(",");
jpayne@68 125 for(String ss : s){
jpayne@68 126 in2.add(ss);
jpayne@68 127 }
jpayne@68 128 }
jpayne@68 129 }else if(a.equals("extra")){
jpayne@68 130 extra.clear();
jpayne@68 131 if(b!=null){
jpayne@68 132 String[] s=b.split(",");
jpayne@68 133 for(String ss : s){
jpayne@68 134 extra.add(ss);
jpayne@68 135 }
jpayne@68 136 }
jpayne@68 137 }else if(a.equals("append") || a.equals("app")){
jpayne@68 138 append=ReadStats.append=Parse.parseBoolean(b);
jpayne@68 139 }else if(a.equals("overwrite") || a.equals("ow")){
jpayne@68 140 overwrite=Parse.parseBoolean(b);
jpayne@68 141 }else if(a.equals("initialsize")){
jpayne@68 142 initialSize=Parse.parseIntKMG(b);
jpayne@68 143 }else if(a.equals("showstats") || a.equals("stats")){
jpayne@68 144 showStats=Parse.parseBoolean(b);
jpayne@68 145 }else if(a.equals("ways")){
jpayne@68 146 ways_=Parse.parseIntKMG(b);
jpayne@68 147 }else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){
jpayne@68 148 buflen=Parse.parseIntKMG(b);
jpayne@68 149 }else if(a.equals("k")){
jpayne@68 150 assert(b!=null) : "\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n";
jpayne@68 151 k_=Parse.parseIntKMG(b);
jpayne@68 152 }else if(a.equals("threads") || a.equals("t")){
jpayne@68 153 THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b));
jpayne@68 154 }else if(a.equals("showspeed") || a.equals("ss")){
jpayne@68 155 showSpeed=Parse.parseBoolean(b);
jpayne@68 156 }else if(a.equals("ecco")){
jpayne@68 157 ecco_=Parse.parseBoolean(b);
jpayne@68 158 }else if(a.equals("merge")){
jpayne@68 159 merge_=Parse.parseBoolean(b);
jpayne@68 160 }else if(a.equals("verbose")){
jpayne@68 161 // assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
jpayne@68 162 verbose=Parse.parseBoolean(b);
jpayne@68 163 }else if(a.equals("verbose2")){
jpayne@68 164 // assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
jpayne@68 165 verbose2=Parse.parseBoolean(b);
jpayne@68 166 }else if(a.equals("minprob")){
jpayne@68 167 minProb_=Double.parseDouble(b);
jpayne@68 168 }else if(a.equals("minprobprefilter") || a.equals("mpp")){
jpayne@68 169 minProbPrefilter=Parse.parseBoolean(b);
jpayne@68 170 }else if(a.equals("minprobmain") || a.equals("mpm")){
jpayne@68 171 minProbMain=Parse.parseBoolean(b);
jpayne@68 172 }else if(a.equals("reads") || a.startsWith("maxreads")){
jpayne@68 173 maxReads=Parse.parseKMG(b);
jpayne@68 174 }else if(a.equals("prealloc") || a.equals("preallocate")){
jpayne@68 175 if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){
jpayne@68 176 prealloc_=Parse.parseBoolean(b);
jpayne@68 177 }else{
jpayne@68 178 preallocFraction=Tools.max(0, Double.parseDouble(b));
jpayne@68 179 prealloc_=(preallocFraction>0);
jpayne@68 180 }
jpayne@68 181 }else if(a.equals("prefilter")){
jpayne@68 182 if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){
jpayne@68 183 prefilter=Parse.parseBoolean(b);
jpayne@68 184 }else{
jpayne@68 185 filterMax_=Parse.parseIntKMG(b);
jpayne@68 186 prefilter=filterMax_>0;
jpayne@68 187 }
jpayne@68 188 }else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){
jpayne@68 189 prefilterFraction=Tools.max(0, Double.parseDouble(b));
jpayne@68 190 assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory.";
jpayne@68 191 prefilter=prefilterFraction>0;
jpayne@68 192 }else if(a.equals("prehashes") || a.equals("hashes")){
jpayne@68 193 prehashes=Parse.parseIntKMG(b);
jpayne@68 194 }else if(a.equals("prefilterpasses") || a.equals("prepasses")){
jpayne@68 195 assert(b!=null) : "Bad parameter: "+arg;
jpayne@68 196 if(b.equalsIgnoreCase("auto")){
jpayne@68 197 prepasses=-1;
jpayne@68 198 }else{
jpayne@68 199 prepasses=Parse.parseIntKMG(b);
jpayne@68 200 }
jpayne@68 201 }else if(a.equals("onepass")){
jpayne@68 202 onePass=Parse.parseBoolean(b);
jpayne@68 203 }else if(a.equals("passes")){
jpayne@68 204 int passes=Parse.parseIntKMG(b);
jpayne@68 205 onePass=(passes<2);
jpayne@68 206 }else if(a.equals("rcomp")){
jpayne@68 207 rcomp_=Parse.parseBoolean(b);
jpayne@68 208 }else if(a.equals("tabletype")){
jpayne@68 209 tableType_=Integer.parseInt(b);
jpayne@68 210 }
jpayne@68 211
jpayne@68 212 else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") ||
jpayne@68 213 a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){
jpayne@68 214 filterMemoryOverride=Parse.parseKMG(b);
jpayne@68 215 }
jpayne@68 216
jpayne@68 217 else if(IGNORE_UNKNOWN_ARGS){
jpayne@68 218 //Do nothing
jpayne@68 219 }else{
jpayne@68 220 throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 221 }
jpayne@68 222 }
jpayne@68 223
jpayne@68 224 {//Process parser fields
jpayne@68 225 Parser.processQuality();
jpayne@68 226
jpayne@68 227 qtrimLeft=parser.qtrimLeft;
jpayne@68 228 qtrimRight=parser.qtrimRight;
jpayne@68 229 trimq=parser.trimq;
jpayne@68 230 trimE=parser.trimE();
jpayne@68 231
jpayne@68 232 minAvgQuality=parser.minAvgQuality;
jpayne@68 233 minAvgQualityBases=parser.minAvgQualityBases;
jpayne@68 234 amino=Shared.AMINO_IN;
jpayne@68 235 if(amino){k_=Tools.min(k_, 12);}
jpayne@68 236 }
jpayne@68 237
jpayne@68 238 if(prepasses==0 || !prefilter){
jpayne@68 239 prepasses=0;
jpayne@68 240 prefilter=false;
jpayne@68 241 }
jpayne@68 242
jpayne@68 243
jpayne@68 244 // assert(false) : prepasses+", "+onePass+", "+prefilter;
jpayne@68 245
jpayne@68 246 {
jpayne@68 247 long memory=Runtime.getRuntime().maxMemory();
jpayne@68 248 double xmsRatio=Shared.xmsRatio();
jpayne@68 249 // long tmemory=Runtime.getRuntime().totalMemory();
jpayne@68 250 usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45);
jpayne@68 251 if(prepasses==0 || !prefilter){
jpayne@68 252 filterMemory0=filterMemory1=0;
jpayne@68 253 }else if(filterMemoryOverride>0){
jpayne@68 254 filterMemory0=filterMemory1=filterMemoryOverride;
jpayne@68 255 }else{
jpayne@68 256 double low=Tools.min(prefilterFraction, 1-prefilterFraction);
jpayne@68 257 double high=1-low;
jpayne@68 258 if(prepasses<0 || (prepasses&1)==1){//odd passes
jpayne@68 259 filterMemory0=(long)(usableMemory*low);
jpayne@68 260 filterMemory1=(long)(usableMemory*high);
jpayne@68 261 }else{//even passes
jpayne@68 262 filterMemory0=(long)(usableMemory*high);
jpayne@68 263 filterMemory1=(long)(usableMemory*low);
jpayne@68 264 }
jpayne@68 265 }
jpayne@68 266 tableMemory=(long)(usableMemory*.95-filterMemory0);
jpayne@68 267 }
jpayne@68 268
jpayne@68 269 tableType=tableType_;
jpayne@68 270 prealloc=prealloc_;
jpayne@68 271 bytesPerKmer=bytesPerKmer_;
jpayne@68 272 if(ways_<1){
jpayne@68 273 long maxKmers=(2*tableMemory)/bytesPerKmer;
jpayne@68 274 long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE);
jpayne@68 275 ways_=(int)Tools.max(31, (int)(Tools.min(96, Shared.threads())*2.5), minWays);
jpayne@68 276 ways_=(int)Primes.primeAtLeast(ways_);
jpayne@68 277 assert(ways_>0);
jpayne@68 278 // System.err.println("ways="+ways_);
jpayne@68 279 }
jpayne@68 280
jpayne@68 281 /* Set final variables; post-process and validate argument combinations */
jpayne@68 282
jpayne@68 283 onePass=onePass&prefilter;
jpayne@68 284 ways=ways_;
jpayne@68 285 filterMax=Tools.min(filterMax_, 0x7FFFFFFF);
jpayne@68 286 ecco=ecco_;
jpayne@68 287 merge=merge_;
jpayne@68 288 minProb=(float)minProb_;
jpayne@68 289 rcomp=rcomp_;
jpayne@68 290 // assert(false) : tableMemory+", "+bytesPerKmer+", "+prealloc+", "+preallocFraction;
jpayne@68 291 estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArray.maxLoadFactorFinal*0.97));
jpayne@68 292
jpayne@68 293 // System.err.println("tableMemory="+tableMemory+", bytesPerKmer="+bytesPerKmer+", estimatedKmerCapacity="+estimatedKmerCapacity);
jpayne@68 294 KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0);
jpayne@68 295 k=k_;
jpayne@68 296 k2=k-1;
jpayne@68 297 int bitsPerBase=(amino ? 5 : 2);
jpayne@68 298 int mask=(amino ? 31 : 3);
jpayne@68 299 coreMask=(AbstractKmerTableSet.MASK_CORE ? ~(((-1L)<<(bitsPerBase*(k_-1)))|mask) : -1L);
jpayne@68 300
jpayne@68 301 if(k<1 || k>31){throw new RuntimeException("\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n");}
jpayne@68 302
jpayne@68 303 if(initialSize<1){
jpayne@68 304 final long memOverWays=tableMemory/(bytesPerKmer*ways);
jpayne@68 305 final double mem2=(prealloc ? preallocFraction : 1)*tableMemory;
jpayne@68 306 initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault);
jpayne@68 307 if(initialSize!=initialSizeDefault){
jpayne@68 308 System.err.println("Initial size set to "+initialSize);
jpayne@68 309 }
jpayne@68 310 }
jpayne@68 311
jpayne@68 312 /* Adjust I/O settings and filenames */
jpayne@68 313
jpayne@68 314 assert(FastaReadInputStream.settingsOK());
jpayne@68 315
jpayne@68 316 if(in1.isEmpty()){
jpayne@68 317 //throw new RuntimeException("Error - at least one input file is required.");
jpayne@68 318 }
jpayne@68 319
jpayne@68 320 for(int i=0; i<in1.size(); i++){
jpayne@68 321 String s=in1.get(i);
jpayne@68 322 if(s!=null && s.contains("#") && !new File(s).exists()){
jpayne@68 323 int pound=s.lastIndexOf('#');
jpayne@68 324 String a=s.substring(0, pound);
jpayne@68 325 String b=s.substring(pound+1);
jpayne@68 326 in1.set(i, a+1+b);
jpayne@68 327 in2.add(a+2+b);
jpayne@68 328 }
jpayne@68 329 }
jpayne@68 330
jpayne@68 331 if(!extra.isEmpty()){
jpayne@68 332 ArrayList<String> temp=(ArrayList<String>) extra.clone();
jpayne@68 333 extra.clear();
jpayne@68 334 for(String s : temp){
jpayne@68 335 if(s!=null && s.contains("#") && !new File(s).exists()){
jpayne@68 336 int pound=s.lastIndexOf('#');
jpayne@68 337 String a=s.substring(0, pound);
jpayne@68 338 String b=s.substring(pound+1);
jpayne@68 339 extra.add(a);
jpayne@68 340 extra.add(b);
jpayne@68 341 }else{
jpayne@68 342 extra.add(s);
jpayne@68 343 }
jpayne@68 344 }
jpayne@68 345 }
jpayne@68 346
jpayne@68 347 {
jpayne@68 348 boolean allowDuplicates=true;
jpayne@68 349 if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){
jpayne@68 350 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 351 }
jpayne@68 352 }
jpayne@68 353 assert(THREADS>0);
jpayne@68 354
jpayne@68 355 if(DISPLAY_PROGRESS){
jpayne@68 356 // assert(false);
jpayne@68 357 outstream.println("Initial:");
jpayne@68 358 outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f"));
jpayne@68 359 Shared.printMemory();
jpayne@68 360 outstream.println();
jpayne@68 361 }
jpayne@68 362 }
jpayne@68 363
jpayne@68 364 /*--------------------------------------------------------------*/
jpayne@68 365 /*---------------- Outer Methods ----------------*/
jpayne@68 366 /*--------------------------------------------------------------*/
jpayne@68 367
jpayne@68 368 @Override
jpayne@68 369 public void clear(){
jpayne@68 370 tables=null;
jpayne@68 371 }
jpayne@68 372
jpayne@68 373 /*--------------------------------------------------------------*/
jpayne@68 374 /*---------------- Inner Methods ----------------*/
jpayne@68 375 /*--------------------------------------------------------------*/
jpayne@68 376
jpayne@68 377 @Override
jpayne@68 378 public void allocateTables(){
jpayne@68 379 assert(tables==null);
jpayne@68 380 tables=null;
jpayne@68 381 final long coreMask=(MASK_CORE ? ~(((-1L)<<(2*(k-1)))|3) : -1L);
jpayne@68 382
jpayne@68 383 ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc,
jpayne@68 384 (prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride);
jpayne@68 385 int[] schedule=scheduleMaker.makeSchedule();
jpayne@68 386 //assert(false) : preallocFraction+", "+ways+", "+Arrays.toString(schedule);
jpayne@68 387 // System.err.println("DEBUG "+preallocFraction+", "+ways+", "+Arrays.toString(schedule));
jpayne@68 388 tables=AbstractKmerTable.preallocate(ways, tableType, schedule, coreMask);
jpayne@68 389
jpayne@68 390 // tables=AbstractKmerTable.preallocate(ways, tableType, initialSize, coreMask, (!prealloc || preallocFraction<1));
jpayne@68 391 }
jpayne@68 392
jpayne@68 393 /**
jpayne@68 394 * Load reads into tables, using multiple LoadThread.
jpayne@68 395 */
jpayne@68 396 @Override
jpayne@68 397 public long loadKmers(String fname1, String fname2){
jpayne@68 398
jpayne@68 399 /* Create read input stream */
jpayne@68 400 final ConcurrentReadInputStream cris;
jpayne@68 401 {
jpayne@68 402 FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true);
jpayne@68 403 FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true);
jpayne@68 404 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2);
jpayne@68 405 cris.start(); //4567
jpayne@68 406 }
jpayne@68 407
jpayne@68 408 // /* Optionally skip the first reads, since initial reads may have lower quality */
jpayne@68 409 // if(skipreads>0){
jpayne@68 410 // long skipped=0;
jpayne@68 411 //
jpayne@68 412 // ListNum<Read> ln=cris.nextList();
jpayne@68 413 // ArrayList<Read> reads=(ln!=null ? ln.list : null);
jpayne@68 414 //
jpayne@68 415 // while(skipped<skipreads && reads!=null && reads.size()>0){
jpayne@68 416 // skipped+=reads.size();
jpayne@68 417 //
jpayne@68 418 // cris.returnList(ln);
jpayne@68 419 // ln=cris.nextList();
jpayne@68 420 // reads=(ln!=null ? ln.list : null);
jpayne@68 421 // }
jpayne@68 422 // cris.returnList(ln);
jpayne@68 423 // if(reads==null || reads.isEmpty()){
jpayne@68 424 // ReadWrite.closeStreams(cris);
jpayne@68 425 // System.err.println("Skipped all of the reads.");
jpayne@68 426 // System.exit(0);
jpayne@68 427 // }
jpayne@68 428 // }
jpayne@68 429
jpayne@68 430 /* Create ProcessThreads */
jpayne@68 431 ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS);
jpayne@68 432 for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));}
jpayne@68 433 for(LoadThread pt : alpt){pt.start();}
jpayne@68 434
jpayne@68 435 long added=0;
jpayne@68 436
jpayne@68 437 /* Wait for threads to die, and gather statistics */
jpayne@68 438 for(LoadThread pt : alpt){
jpayne@68 439 while(pt.getState()!=Thread.State.TERMINATED){
jpayne@68 440 try {
jpayne@68 441 pt.join();
jpayne@68 442 } catch (InterruptedException e) {
jpayne@68 443 // TODO Auto-generated catch block
jpayne@68 444 e.printStackTrace();
jpayne@68 445 }
jpayne@68 446 }
jpayne@68 447 added+=pt.added;
jpayne@68 448
jpayne@68 449 readsIn+=pt.readsInT;
jpayne@68 450 basesIn+=pt.basesInT;
jpayne@68 451 lowqReads+=pt.lowqReadsT;
jpayne@68 452 lowqBases+=pt.lowqBasesT;
jpayne@68 453 readsTrimmed+=pt.readsTrimmedT;
jpayne@68 454 basesTrimmed+=pt.basesTrimmedT;
jpayne@68 455 kmersIn+=pt.kmersInT;
jpayne@68 456 }
jpayne@68 457
jpayne@68 458 /* Shut down I/O streams; capture error status */
jpayne@68 459 errorState|=ReadWrite.closeStreams(cris);
jpayne@68 460 return added;
jpayne@68 461 }
jpayne@68 462
jpayne@68 463 /*--------------------------------------------------------------*/
jpayne@68 464 /*---------------- Inner Classes ----------------*/
jpayne@68 465 /*--------------------------------------------------------------*/
jpayne@68 466
jpayne@68 467 /**
jpayne@68 468 * Loads kmers.
jpayne@68 469 */
jpayne@68 470 private class LoadThread extends Thread{
jpayne@68 471
jpayne@68 472 /**
jpayne@68 473 * Constructor
jpayne@68 474 * @param cris_ Read input stream
jpayne@68 475 */
jpayne@68 476 public LoadThread(ConcurrentReadInputStream cris_){
jpayne@68 477 cris=cris_;
jpayne@68 478 table=new HashBuffer(tables, buflen, k, false, true);
jpayne@68 479 }
jpayne@68 480
jpayne@68 481 @Override
jpayne@68 482 public void run(){
jpayne@68 483 ListNum<Read> ln=cris.nextList();
jpayne@68 484 ArrayList<Read> reads=(ln!=null ? ln.list : null);
jpayne@68 485
jpayne@68 486 //While there are more reads lists...
jpayne@68 487 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
jpayne@68 488
jpayne@68 489 //For each read (or pair) in the list...
jpayne@68 490 for(int i=0; i<reads.size(); i++){
jpayne@68 491 Read r1=reads.get(i);
jpayne@68 492 Read r2=r1.mate;
jpayne@68 493
jpayne@68 494 if(!r1.validated()){r1.validate(true);}
jpayne@68 495 if(r2!=null && !r2.validated()){r2.validate(true);}
jpayne@68 496
jpayne@68 497 if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));}
jpayne@68 498
jpayne@68 499 readsInT++;
jpayne@68 500 basesInT+=r1.length();
jpayne@68 501 if(r2!=null){
jpayne@68 502 readsInT++;
jpayne@68 503 basesInT+=r2.length();
jpayne@68 504 }
jpayne@68 505
jpayne@68 506 if(maxNs<Integer.MAX_VALUE){
jpayne@68 507 if(r1!=null && r1.countUndefined()>maxNs){r1.setDiscarded(true);}
jpayne@68 508 if(r2!=null && r2.countUndefined()>maxNs){r2.setDiscarded(true);}
jpayne@68 509 }
jpayne@68 510
jpayne@68 511 //Determine whether to discard the reads based on average quality
jpayne@68 512 if(minAvgQuality>0){
jpayne@68 513 if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);}
jpayne@68 514 if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);}
jpayne@68 515 }
jpayne@68 516
jpayne@68 517 if(r1!=null){
jpayne@68 518 if(qtrimLeft || qtrimRight){
jpayne@68 519 int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1, false);
jpayne@68 520 basesTrimmedT+=x;
jpayne@68 521 readsTrimmedT+=(x>0 ? 1 : 0);
jpayne@68 522 }
jpayne@68 523 if(r1.length()<k){r1.setDiscarded(true);}
jpayne@68 524 }
jpayne@68 525 if(r2!=null){
jpayne@68 526 if(qtrimLeft || qtrimRight){
jpayne@68 527 int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1, false);
jpayne@68 528 basesTrimmedT+=x;
jpayne@68 529 readsTrimmedT+=(x>0 ? 1 : 0);
jpayne@68 530 }
jpayne@68 531 if(r2.length()<k){r2.setDiscarded(true);}
jpayne@68 532 }
jpayne@68 533
jpayne@68 534 if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){
jpayne@68 535 if(merge){
jpayne@68 536 final int insert=BBMerge.findOverlapStrict(r1, r2, false);
jpayne@68 537 if(insert>0){
jpayne@68 538 r2.reverseComplement();
jpayne@68 539 r1=r1.joinRead(insert);
jpayne@68 540 r2=null;
jpayne@68 541 }
jpayne@68 542 }else if(ecco){
jpayne@68 543 BBMerge.findOverlapStrict(r1, r2, true);
jpayne@68 544 }
jpayne@68 545 }
jpayne@68 546
jpayne@68 547 if(minLen>0){
jpayne@68 548 if(r1!=null && r1.length()<minLen){r1.setDiscarded(true);}
jpayne@68 549 if(r2!=null && r2.length()<minLen){r2.setDiscarded(true);}
jpayne@68 550 }
jpayne@68 551
jpayne@68 552 if(r1!=null){
jpayne@68 553 if(r1.discarded()){
jpayne@68 554 lowqBasesT+=r1.length();
jpayne@68 555 lowqReadsT++;
jpayne@68 556 }else{
jpayne@68 557 long temp=addKmersToTable(r1);
jpayne@68 558 added+=temp;
jpayne@68 559 if(verbose){System.err.println("A: Added "+temp);}
jpayne@68 560 }
jpayne@68 561 }
jpayne@68 562 if(r2!=null){
jpayne@68 563 if(r2.discarded()){
jpayne@68 564 lowqBasesT+=r2.length();
jpayne@68 565 lowqReadsT++;
jpayne@68 566 }else{
jpayne@68 567 long temp=addKmersToTable(r2);
jpayne@68 568 added+=temp;
jpayne@68 569 if(verbose){System.err.println("B: Added "+temp);}
jpayne@68 570 }
jpayne@68 571 }
jpayne@68 572 }
jpayne@68 573
jpayne@68 574 //Fetch a new read list
jpayne@68 575 cris.returnList(ln);
jpayne@68 576 ln=cris.nextList();
jpayne@68 577 reads=(ln!=null ? ln.list : null);
jpayne@68 578 }
jpayne@68 579 cris.returnList(ln);
jpayne@68 580 long temp=table.flush();
jpayne@68 581 if(verbose){System.err.println("Flush: Added "+temp);}
jpayne@68 582 added+=temp;
jpayne@68 583 }
jpayne@68 584
jpayne@68 585 private final int addKmersToTableAA(final Read r){
jpayne@68 586 if(r==null || r.bases==null){return 0;}
jpayne@68 587 final float minProb2=(minProbMain ? minProb : 0);
jpayne@68 588 final byte[] bases=r.bases;
jpayne@68 589 final byte[] quals=r.quality;
jpayne@68 590 final int shift=5*k;
jpayne@68 591 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 592 long kmer=0;
jpayne@68 593 int created=0;
jpayne@68 594 int len=0;
jpayne@68 595
jpayne@68 596 if(bases==null || bases.length<k){return -1;}
jpayne@68 597
jpayne@68 598 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 599 float prob=1;
jpayne@68 600 for(int i=0; i<bases.length; i++){
jpayne@68 601 final byte b=bases[i];
jpayne@68 602 final long x=AminoAcid.acidToNumber[b];
jpayne@68 603
jpayne@68 604 //Update kmers
jpayne@68 605 kmer=((kmer<<5)|x)&mask;
jpayne@68 606
jpayne@68 607 if(minProb2>0 && quals!=null){//Update probability
jpayne@68 608 prob=prob*PROB_CORRECT[quals[i]];
jpayne@68 609 if(len>k){
jpayne@68 610 byte oldq=quals[i-k];
jpayne@68 611 prob=prob*PROB_CORRECT_INVERSE[oldq];
jpayne@68 612 }
jpayne@68 613 }
jpayne@68 614
jpayne@68 615 //Handle Ns
jpayne@68 616 if(x<0){
jpayne@68 617 len=0;
jpayne@68 618 kmer=0;
jpayne@68 619 prob=1;
jpayne@68 620 }else{len++;}
jpayne@68 621
jpayne@68 622 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 623 if(len>=k && prob>=minProb2){
jpayne@68 624 kmersInT++;
jpayne@68 625 final long key=kmer;
jpayne@68 626 if(!prefilter || prefilterArray.read(key)>filterMax2){
jpayne@68 627 int temp=table.incrementAndReturnNumCreated(key, 1);
jpayne@68 628 created+=temp;
jpayne@68 629 if(verbose){System.err.println("C: Added "+temp);}
jpayne@68 630 }
jpayne@68 631 }
jpayne@68 632 }
jpayne@68 633
jpayne@68 634 return created;
jpayne@68 635 }
jpayne@68 636
jpayne@68 637 private final int addKmersToTable(final Read r){
jpayne@68 638 if(amino){return addKmersToTableAA(r);}
jpayne@68 639 if(onePass){return addKmersToTable_onePass(r);}
jpayne@68 640 if(r==null || r.bases==null){return 0;}
jpayne@68 641 final float minProb2=(minProbMain ? minProb : 0);
jpayne@68 642 final byte[] bases=r.bases;
jpayne@68 643 final byte[] quals=r.quality;
jpayne@68 644 final int shift=2*k;
jpayne@68 645 final int shift2=shift-2;
jpayne@68 646 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 647 long kmer=0;
jpayne@68 648 long rkmer=0;
jpayne@68 649 int created=0;
jpayne@68 650 int len=0;
jpayne@68 651
jpayne@68 652 if(bases==null || bases.length<k){return -1;}
jpayne@68 653
jpayne@68 654 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 655 float prob=1;
jpayne@68 656 for(int i=0; i<bases.length; i++){
jpayne@68 657 final byte b=bases[i];
jpayne@68 658 final long x=AminoAcid.baseToNumber[b];
jpayne@68 659 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 660
jpayne@68 661 //Update kmers
jpayne@68 662 kmer=((kmer<<2)|x)&mask;
jpayne@68 663 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 664
jpayne@68 665 if(minProb2>0 && quals!=null){//Update probability
jpayne@68 666 prob=prob*PROB_CORRECT[quals[i]];
jpayne@68 667 if(len>k){
jpayne@68 668 byte oldq=quals[i-k];
jpayne@68 669 prob=prob*PROB_CORRECT_INVERSE[oldq];
jpayne@68 670 }
jpayne@68 671 }
jpayne@68 672
jpayne@68 673 //Handle Ns
jpayne@68 674 if(x<0){
jpayne@68 675 len=0;
jpayne@68 676 kmer=rkmer=0;
jpayne@68 677 prob=1;
jpayne@68 678 }else{len++;}
jpayne@68 679
jpayne@68 680 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 681 if(len>=k && prob>=minProb2){
jpayne@68 682 kmersInT++;
jpayne@68 683 final long key=toValue(kmer, rkmer);
jpayne@68 684 if(!prefilter || prefilterArray.read(key)>filterMax2){
jpayne@68 685 int temp=table.incrementAndReturnNumCreated(key, 1);
jpayne@68 686 created+=temp;
jpayne@68 687 if(verbose){System.err.println("C: Added "+temp);}
jpayne@68 688 }
jpayne@68 689 }
jpayne@68 690 }
jpayne@68 691
jpayne@68 692 return created;
jpayne@68 693 }
jpayne@68 694
jpayne@68 695
jpayne@68 696 private final int addKmersToTable_onePass(final Read r){
jpayne@68 697 assert(prefilter);
jpayne@68 698 if(r==null || r.bases==null){return 0;}
jpayne@68 699 final byte[] bases=r.bases;
jpayne@68 700 final byte[] quals=r.quality;
jpayne@68 701 final int shift=2*k;
jpayne@68 702 final int shift2=shift-2;
jpayne@68 703 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 704 long kmer=0;
jpayne@68 705 long rkmer=0;
jpayne@68 706 int created=0;
jpayne@68 707 int len=0;
jpayne@68 708
jpayne@68 709 if(bases==null || bases.length<k){return -1;}
jpayne@68 710
jpayne@68 711 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 712 float prob=1;
jpayne@68 713 for(int i=0; i<bases.length; i++){
jpayne@68 714 final byte b=bases[i];
jpayne@68 715 final long x=AminoAcid.baseToNumber[b];
jpayne@68 716 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 717
jpayne@68 718 //Update kmers
jpayne@68 719 kmer=((kmer<<2)|x)&mask;
jpayne@68 720 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 721
jpayne@68 722 if(minProb>0 && quals!=null){//Update probability
jpayne@68 723 prob=prob*PROB_CORRECT[quals[i]];
jpayne@68 724 if(len>k){
jpayne@68 725 byte oldq=quals[i-k];
jpayne@68 726 prob=prob*PROB_CORRECT_INVERSE[oldq];
jpayne@68 727 }
jpayne@68 728 }
jpayne@68 729
jpayne@68 730 //Handle Ns
jpayne@68 731 if(x<0){
jpayne@68 732 len=0;
jpayne@68 733 kmer=rkmer=0;
jpayne@68 734 prob=1;
jpayne@68 735 }else{len++;}
jpayne@68 736
jpayne@68 737 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 738 if(len>=k){
jpayne@68 739 final long key=toValue(kmer, rkmer);
jpayne@68 740 int count=prefilterArray.incrementAndReturnUnincremented(key, 1);
jpayne@68 741 if(count>=filterMax2){
jpayne@68 742 int temp=table.incrementAndReturnNumCreated(key, 1);
jpayne@68 743 created+=temp;
jpayne@68 744 if(verbose){System.err.println("D: Added "+temp);}
jpayne@68 745 }
jpayne@68 746 }
jpayne@68 747 }
jpayne@68 748 return created;
jpayne@68 749 }
jpayne@68 750
jpayne@68 751 /*--------------------------------------------------------------*/
jpayne@68 752
jpayne@68 753 /** Input read stream */
jpayne@68 754 private final ConcurrentReadInputStream cris;
jpayne@68 755
jpayne@68 756 private final HashBuffer table;
jpayne@68 757
jpayne@68 758 public long added=0;
jpayne@68 759
jpayne@68 760 private long readsInT=0;
jpayne@68 761 private long basesInT=0;
jpayne@68 762 private long lowqReadsT=0;
jpayne@68 763 private long lowqBasesT=0;
jpayne@68 764 private long readsTrimmedT=0;
jpayne@68 765 private long basesTrimmedT=0;
jpayne@68 766 private long kmersInT=0;
jpayne@68 767
jpayne@68 768 }
jpayne@68 769
jpayne@68 770
jpayne@68 771 /*--------------------------------------------------------------*/
jpayne@68 772 /*---------------- Convenience ----------------*/
jpayne@68 773 /*--------------------------------------------------------------*/
jpayne@68 774
jpayne@68 775 public void regenerateKmers(byte[] bases, LongList kmers, IntList counts, final int a){
jpayne@68 776 final int loc=a+k;
jpayne@68 777 final int lim=Tools.min(counts.size, a+k+1);
jpayne@68 778 final int shift=2*k;
jpayne@68 779 final int shift2=shift-2;
jpayne@68 780 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 781 long kmer=kmers.get(a);
jpayne@68 782 long rkmer=rcomp(kmer);
jpayne@68 783 int len=k;
jpayne@68 784
jpayne@68 785 // assert(false) : a+", "+loc+", "+lim;
jpayne@68 786
jpayne@68 787 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 788 for(int i=loc, j=a+1; j<lim; i++, j++){
jpayne@68 789 final byte b=bases[i];
jpayne@68 790 final long x=AminoAcid.baseToNumber[b];
jpayne@68 791 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 792 kmer=((kmer<<2)|x)&mask;
jpayne@68 793 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 794 if(x<0){
jpayne@68 795 len=0;
jpayne@68 796 kmer=rkmer=0;
jpayne@68 797 }else{len++;}
jpayne@68 798
jpayne@68 799 if(len>=k){
jpayne@68 800 assert(kmers.get(j)!=kmer);
jpayne@68 801 kmers.set(j, kmer);
jpayne@68 802 int count=getCount(kmer, rkmer);
jpayne@68 803 counts.set(j, count);
jpayne@68 804 }else{
jpayne@68 805 kmers.set(j, -1);
jpayne@68 806 counts.set(j, 0);
jpayne@68 807 }
jpayne@68 808 }
jpayne@68 809 }
jpayne@68 810
jpayne@68 811 @Override
jpayne@68 812 public int regenerateCounts(byte[] bases, IntList counts, final Kmer dummy, BitSet changed){
jpayne@68 813 assert(!changed.isEmpty());
jpayne@68 814 final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1;
jpayne@68 815 final int ca=firstBase-k;
jpayne@68 816 // final int b=changed.nextSetBit(0);ca+kbig; //first base changed
jpayne@68 817 final int firstCount=Tools.max(firstBase-k+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit
jpayne@68 818 // System.err.println("ca="+ca+", b="+b+", lim="+lim);
jpayne@68 819 // System.err.println("Regen from count "+(ca+1)+"-"+lim);
jpayne@68 820
jpayne@68 821 final int shift=2*k;
jpayne@68 822 final int shift2=shift-2;
jpayne@68 823 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 824 long kmer=0, rkmer=0;
jpayne@68 825 int len=0;
jpayne@68 826 int valid=0;
jpayne@68 827
jpayne@68 828 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
jpayne@68 829
jpayne@68 830 for(int i=Tools.max(0, firstBase-k+1), lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){
jpayne@68 831 final byte base=bases[i];
jpayne@68 832 final long x=AminoAcid.baseToNumber[base];
jpayne@68 833 final long x2=AminoAcid.baseToComplementNumber[base];
jpayne@68 834 kmer=((kmer<<2)|x)&mask;
jpayne@68 835 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 836
jpayne@68 837 if(x<0){
jpayne@68 838 len=0;
jpayne@68 839 kmer=rkmer=0;
jpayne@68 840 }else{
jpayne@68 841 len++;
jpayne@68 842 }
jpayne@68 843
jpayne@68 844 final int c=i-k+1;
jpayne@68 845 if(i>=firstBase){
jpayne@68 846 if(len>=k){
jpayne@68 847 valid++;
jpayne@68 848 int count=getCount(kmer, rkmer);
jpayne@68 849 counts.set(c, count);
jpayne@68 850 }else if(c>=0){
jpayne@68 851 counts.set(c, 0);
jpayne@68 852 }
jpayne@68 853 }
jpayne@68 854 }
jpayne@68 855
jpayne@68 856 return valid;
jpayne@68 857 }
jpayne@68 858
jpayne@68 859 @Override
jpayne@68 860 public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer dummy){
jpayne@68 861 final int b=k-1;
jpayne@68 862 final int lim=(positions==null ? bases.length : Tools.min(bases.length, positions.length()+k-1));
jpayne@68 863 final int shift=2*k;
jpayne@68 864 final int shift2=shift-2;
jpayne@68 865 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 866 long kmer=0, rkmer=0;
jpayne@68 867 int len=0;
jpayne@68 868 int valid=0;
jpayne@68 869
jpayne@68 870 counts.clear();
jpayne@68 871
jpayne@68 872 for(int i=0, j=-b; i<lim; i++, j++){
jpayne@68 873 final byte base=bases[i];
jpayne@68 874 final long x=AminoAcid.baseToNumber[base];
jpayne@68 875 final long x2=AminoAcid.baseToComplementNumber[base];
jpayne@68 876 kmer=((kmer<<2)|x)&mask;
jpayne@68 877 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 878
jpayne@68 879 if(x<0){
jpayne@68 880 len=0;
jpayne@68 881 kmer=rkmer=0;
jpayne@68 882 }else{
jpayne@68 883 len++;
jpayne@68 884 }
jpayne@68 885
jpayne@68 886 if(j>=0){
jpayne@68 887 if(len>=k && (positions==null || positions.get(j))){
jpayne@68 888 valid++;
jpayne@68 889 int count=getCount(kmer, rkmer);
jpayne@68 890 assert(i-k+1==counts.size()) : "j="+j+", counts="+counts.size()+", b="+(b)+", (i-k+1)="+(i-k+1);
jpayne@68 891 assert(j==counts.size());
jpayne@68 892 counts.add(Tools.max(count, 0));
jpayne@68 893 // counts.set(i-k+1, count);
jpayne@68 894 }else{
jpayne@68 895 counts.add(0);
jpayne@68 896 // counts.set(i-k+1, 0);
jpayne@68 897 }
jpayne@68 898 }
jpayne@68 899 }
jpayne@68 900
jpayne@68 901 // assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size;
jpayne@68 902 assert((counts.size==0 && bases.length<k) || counts.size==lim-k+1) : bases.length+", "+k+", "+counts.size;
jpayne@68 903
jpayne@68 904 return valid;
jpayne@68 905 }
jpayne@68 906
jpayne@68 907 public int regenerateCounts(byte[] bases, IntList counts, final int ca){
jpayne@68 908 final int b=ca+k-1;
jpayne@68 909 final int lim=Tools.min(bases.length, b+k+1);
jpayne@68 910 final int shift=2*k;
jpayne@68 911 final int shift2=shift-2;
jpayne@68 912 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 913 long kmer=0, rkmer=0;
jpayne@68 914 int len=0;
jpayne@68 915 int valid=0;
jpayne@68 916
jpayne@68 917 final int clen=counts.size;
jpayne@68 918
jpayne@68 919 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts.
jpayne@68 920 * i is an index in the base array, j is an index in the count array. */
jpayne@68 921 for(int i=b-k+1; i<lim; i++){
jpayne@68 922 final byte base=bases[i];
jpayne@68 923 final long x=AminoAcid.baseToNumber[base];
jpayne@68 924 final long x2=AminoAcid.baseToComplementNumber[base];
jpayne@68 925 kmer=((kmer<<2)|x)&mask;
jpayne@68 926 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 927
jpayne@68 928 if(x<0){
jpayne@68 929 len=0;
jpayne@68 930 kmer=rkmer=0;
jpayne@68 931 }else{
jpayne@68 932 len++;
jpayne@68 933 }
jpayne@68 934
jpayne@68 935 if(i>=b){
jpayne@68 936 if(len>=k){
jpayne@68 937 valid++;
jpayne@68 938 int count=getCount(kmer, rkmer);
jpayne@68 939 counts.set(i-k+1, count);
jpayne@68 940 }else{
jpayne@68 941 counts.set(i-k+1, 0);
jpayne@68 942 }
jpayne@68 943 }
jpayne@68 944 }
jpayne@68 945
jpayne@68 946 assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size;
jpayne@68 947 assert(clen==counts.size);
jpayne@68 948
jpayne@68 949 return valid;
jpayne@68 950 }
jpayne@68 951
jpayne@68 952 /** Returns number of valid kmers */
jpayne@68 953 public int fillKmers(byte[] bases, LongList kmers){
jpayne@68 954 final int blen=bases.length;
jpayne@68 955 if(blen<k){return 0;}
jpayne@68 956 final int min=k-1;
jpayne@68 957 final int shift=2*k;
jpayne@68 958 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 959 long kmer=0;
jpayne@68 960 int len=0;
jpayne@68 961 int valid=0;
jpayne@68 962
jpayne@68 963 kmers.clear();
jpayne@68 964
jpayne@68 965 /* Loop through the bases, maintaining a forward kmer via bitshifts */
jpayne@68 966 for(int i=0; i<blen; i++){
jpayne@68 967 final byte b=bases[i];
jpayne@68 968 assert(b>=0) : Arrays.toString(bases);
jpayne@68 969 final long x=AminoAcid.baseToNumber[b];
jpayne@68 970 kmer=((kmer<<2)|x)&mask;
jpayne@68 971 if(x<0){
jpayne@68 972 len=0;
jpayne@68 973 kmer=0;
jpayne@68 974 }else{len++;}
jpayne@68 975 if(i>=min){
jpayne@68 976 if(len>=k){
jpayne@68 977 kmers.add(kmer);
jpayne@68 978 valid++;
jpayne@68 979 }else{
jpayne@68 980 kmers.add(-1);
jpayne@68 981 }
jpayne@68 982 }
jpayne@68 983 }
jpayne@68 984 return valid;
jpayne@68 985 }
jpayne@68 986
jpayne@68 987 public void fillCounts(LongList kmers, IntList counts){
jpayne@68 988 counts.clear();
jpayne@68 989 for(int i=0; i<kmers.size; i++){
jpayne@68 990 long kmer=kmers.get(i);
jpayne@68 991 if(kmer>=0){
jpayne@68 992 long rkmer=rcomp(kmer);
jpayne@68 993 int count=getCount(kmer, rkmer);
jpayne@68 994 counts.add(count);
jpayne@68 995 }else{
jpayne@68 996 counts.add(0);
jpayne@68 997 }
jpayne@68 998 }
jpayne@68 999 }
jpayne@68 1000
jpayne@68 1001
jpayne@68 1002 /*--------------------------------------------------------------*/
jpayne@68 1003 /*---------------- Helper Methods ----------------*/
jpayne@68 1004 /*--------------------------------------------------------------*/
jpayne@68 1005
jpayne@68 1006 @Override
jpayne@68 1007 public long regenerate(final int limit){
jpayne@68 1008 long sum=0;
jpayne@68 1009 for(AbstractKmerTable akt : tables){
jpayne@68 1010 sum+=akt.regenerate(limit);
jpayne@68 1011 }
jpayne@68 1012 return sum;
jpayne@68 1013 }
jpayne@68 1014
jpayne@68 1015 public HashArray1D getTableForKey(long key){
jpayne@68 1016 return (HashArray1D) tables[kmerToWay(key)];
jpayne@68 1017 }
jpayne@68 1018
jpayne@68 1019 @Override
jpayne@68 1020 public HashArray1D getTable(int tnum){
jpayne@68 1021 return (HashArray1D) tables[tnum];
jpayne@68 1022 }
jpayne@68 1023
jpayne@68 1024 @Override
jpayne@68 1025 public long[] fillHistogram(int histMax) {
jpayne@68 1026 return HistogramMaker.fillHistogram(tables, histMax);
jpayne@68 1027 }
jpayne@68 1028
jpayne@68 1029 @Override
jpayne@68 1030 public void countGC(long[] gcCounts, int max) {
jpayne@68 1031 for(AbstractKmerTable set : tables){
jpayne@68 1032 set.countGC(gcCounts, max);
jpayne@68 1033 }
jpayne@68 1034 }
jpayne@68 1035
jpayne@68 1036 @Override
jpayne@68 1037 public void initializeOwnership(){
jpayne@68 1038 OwnershipThread.initialize(tables);
jpayne@68 1039 }
jpayne@68 1040
jpayne@68 1041 @Override
jpayne@68 1042 public void clearOwnership(){
jpayne@68 1043 OwnershipThread.clear(tables);
jpayne@68 1044 }
jpayne@68 1045
jpayne@68 1046 public long rightmostKmer(final ByteBuilder bb){
jpayne@68 1047 return rightmostKmer(bb.array, bb.length());
jpayne@68 1048 }
jpayne@68 1049
jpayne@68 1050 public long rightmostKmer(final byte[] bases, final int blen){
jpayne@68 1051 if(blen<k){return -1;}
jpayne@68 1052 final int shift=2*k;
jpayne@68 1053 final int shift2=shift-2;
jpayne@68 1054 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1055 long kmer=0;
jpayne@68 1056 long rkmer=0;
jpayne@68 1057 int len=0;
jpayne@68 1058
jpayne@68 1059 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
jpayne@68 1060 {
jpayne@68 1061 for(int i=blen-k; i<blen; i++){
jpayne@68 1062 final byte b=bases[i];
jpayne@68 1063 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1064 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1065 kmer=((kmer<<2)|x)&mask;
jpayne@68 1066 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1067 if(x<0){
jpayne@68 1068 len=0;
jpayne@68 1069 kmer=rkmer=0;
jpayne@68 1070 }else{len++;}
jpayne@68 1071 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 1072 }
jpayne@68 1073 }
jpayne@68 1074
jpayne@68 1075 if(len<k){return -1;}
jpayne@68 1076 else{assert(len==k);}
jpayne@68 1077 return kmer;
jpayne@68 1078 }
jpayne@68 1079
jpayne@68 1080 public long leftmostKmer(final ByteBuilder bb){
jpayne@68 1081 return leftmostKmer(bb.array, bb.length());
jpayne@68 1082 }
jpayne@68 1083
jpayne@68 1084 public long leftmostKmer(final byte[] bases, final int blen){
jpayne@68 1085 if(blen<k){return -1;}
jpayne@68 1086 final int shift=2*k;
jpayne@68 1087 final int shift2=shift-2;
jpayne@68 1088 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1089 long kmer=0;
jpayne@68 1090 long rkmer=0;
jpayne@68 1091 int len=0;
jpayne@68 1092
jpayne@68 1093 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */
jpayne@68 1094 {
jpayne@68 1095 for(int i=0; i<k; i++){
jpayne@68 1096 final byte b=bases[i];
jpayne@68 1097 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1098 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1099 kmer=((kmer<<2)|x)&mask;
jpayne@68 1100 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1101 if(x<0){
jpayne@68 1102 len=0;
jpayne@68 1103 kmer=rkmer=0;
jpayne@68 1104 }else{len++;}
jpayne@68 1105 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 1106 }
jpayne@68 1107 }
jpayne@68 1108
jpayne@68 1109 if(len<k){return -1;}
jpayne@68 1110 else{assert(len==k);}
jpayne@68 1111 return kmer;
jpayne@68 1112 }
jpayne@68 1113
jpayne@68 1114 public boolean doubleClaim(final ByteBuilder bb, final int id){
jpayne@68 1115 return doubleClaim(bb.array, bb.length(), id);
jpayne@68 1116 }
jpayne@68 1117
jpayne@68 1118 /** Ensures there can be only one owner. */
jpayne@68 1119 public boolean doubleClaim(final byte[] bases, final int blength, final int id){
jpayne@68 1120 boolean success=claim(bases, blength, id, true);
jpayne@68 1121 if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);}
jpayne@68 1122 if(!success){return false;}
jpayne@68 1123 success=claim(bases, blength, id+CLAIM_OFFSET, true);
jpayne@68 1124 if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);}
jpayne@68 1125 return success;
jpayne@68 1126 }
jpayne@68 1127
jpayne@68 1128 public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly){
jpayne@68 1129 return claim(bb.array, bb.length(), id, exitEarly);
jpayne@68 1130 }
jpayne@68 1131
jpayne@68 1132 public float calcCoverage(final byte[] bases, final int blength){
jpayne@68 1133 final int shift=2*k;
jpayne@68 1134 final int shift2=shift-2;
jpayne@68 1135 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1136 long kmer=0, rkmer=0;
jpayne@68 1137 int len=0;
jpayne@68 1138 long sum=0, max=0;
jpayne@68 1139 int kmers=0;
jpayne@68 1140
jpayne@68 1141 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 1142 for(int i=0; i<blength; i++){
jpayne@68 1143 final byte b=bases[i];
jpayne@68 1144 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1145 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1146 kmer=((kmer<<2)|x)&mask;
jpayne@68 1147 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1148 if(x<0){
jpayne@68 1149 len=0;
jpayne@68 1150 kmer=rkmer=0;
jpayne@68 1151 }else{len++;}
jpayne@68 1152 if(len>=k){
jpayne@68 1153 int count=getCount(kmer, rkmer);
jpayne@68 1154 sum+=count;
jpayne@68 1155 max=Tools.max(count, max);
jpayne@68 1156 kmers++;
jpayne@68 1157 }
jpayne@68 1158 }
jpayne@68 1159 return sum==0 ? 0 : sum/(float)kmers;
jpayne@68 1160 }
jpayne@68 1161
jpayne@68 1162 public float calcCoverage(final Contig contig){
jpayne@68 1163 final byte[] bases=contig.bases;
jpayne@68 1164 if(bases.length<k){return 0;}
jpayne@68 1165 final int shift=2*k;
jpayne@68 1166 final int shift2=shift-2;
jpayne@68 1167 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1168 long kmer=0, rkmer=0;
jpayne@68 1169 int len=0;
jpayne@68 1170 long sum=0;
jpayne@68 1171 int max=0, min=Integer.MAX_VALUE;
jpayne@68 1172 int kmers=0;
jpayne@68 1173
jpayne@68 1174 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 1175 for(int i=0; i<bases.length; i++){
jpayne@68 1176 final byte b=bases[i];
jpayne@68 1177 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1178 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1179 kmer=((kmer<<2)|x)&mask;
jpayne@68 1180 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1181 if(x<0){
jpayne@68 1182 len=0;
jpayne@68 1183 kmer=rkmer=0;
jpayne@68 1184 }else{len++;}
jpayne@68 1185 if(len>=k){
jpayne@68 1186 int count=getCount(kmer, rkmer);
jpayne@68 1187 sum+=count;
jpayne@68 1188 max=Tools.max(count, max);
jpayne@68 1189 min=Tools.min(count, min);
jpayne@68 1190 kmers++;
jpayne@68 1191 }
jpayne@68 1192 }
jpayne@68 1193 contig.coverage=sum==0 ? 0 : sum/(float)kmers;
jpayne@68 1194 contig.maxCov=max;
jpayne@68 1195 contig.minCov=sum==0 ? 0 : min;
jpayne@68 1196 return contig.coverage;
jpayne@68 1197 }
jpayne@68 1198
jpayne@68 1199 public boolean claim(final byte[] bases, final int blength, final int id, boolean exitEarly){
jpayne@68 1200 if(verbose){outstream.println("Thread "+id+" claim start.");}
jpayne@68 1201 final int shift=2*k;
jpayne@68 1202 final int shift2=shift-2;
jpayne@68 1203 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1204 long kmer=0, rkmer=0;
jpayne@68 1205 int len=0;
jpayne@68 1206 boolean success=true;
jpayne@68 1207 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 1208 for(int i=0; i<blength && success; i++){
jpayne@68 1209 final byte b=bases[i];
jpayne@68 1210 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1211 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1212 kmer=((kmer<<2)|x)&mask;
jpayne@68 1213 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1214 if(x<0){
jpayne@68 1215 len=0;
jpayne@68 1216 kmer=rkmer=0;
jpayne@68 1217 }else{len++;}
jpayne@68 1218 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 1219 if(len>=k){
jpayne@68 1220 success=claim(kmer, rkmer, id/*, rid, i*/);
jpayne@68 1221 success=(success || !exitEarly);
jpayne@68 1222 }
jpayne@68 1223 }
jpayne@68 1224 return success;
jpayne@68 1225 }
jpayne@68 1226
jpayne@68 1227 public boolean claim(final long kmer, final long rkmer, final int id/*, final long rid, final int pos*/){
jpayne@68 1228 //TODO: rid and pos are just for debugging.
jpayne@68 1229 final long key=toValue(kmer, rkmer);
jpayne@68 1230 final int way=kmerToWay(key);
jpayne@68 1231 final AbstractKmerTable table=tables[way];
jpayne@68 1232 final int count=table.getValue(key);
jpayne@68 1233 assert(count==-1 || count>0) : count;
jpayne@68 1234 // if(verbose /*|| true*/){outstream.println("Count="+count+".");}
jpayne@68 1235 if(count<0){return true;}
jpayne@68 1236 assert(count>0) : count;
jpayne@68 1237 final int owner=table.setOwner(key, id);
jpayne@68 1238 if(verbose){outstream.println("owner="+owner+".");}
jpayne@68 1239 // assert(owner==id) : id+", "+owner+", "+rid+", "+pos;
jpayne@68 1240 return owner==id;
jpayne@68 1241 }
jpayne@68 1242
jpayne@68 1243 public void release(ByteBuilder bb, final int id){
jpayne@68 1244 release(bb.array, bb.length(), id);
jpayne@68 1245 }
jpayne@68 1246
jpayne@68 1247 public void release(final byte[] bases, final int blength, final int id){
jpayne@68 1248 if(verbose /*|| true*/){outstream.println("*Thread "+id+" release start.");}
jpayne@68 1249 final int shift=2*k;
jpayne@68 1250 final int shift2=shift-2;
jpayne@68 1251 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1252 long kmer=0, rkmer=0;
jpayne@68 1253 int len=0;
jpayne@68 1254 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 1255 for(int i=0; i<blength; i++){
jpayne@68 1256 final byte b=bases[i];
jpayne@68 1257 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1258 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1259 kmer=((kmer<<2)|x)&mask;
jpayne@68 1260 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1261 if(x<0){
jpayne@68 1262 len=0;
jpayne@68 1263 kmer=rkmer=0;
jpayne@68 1264 }else{len++;}
jpayne@68 1265 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 1266 if(len>=k){
jpayne@68 1267 release(kmer, rkmer, id);
jpayne@68 1268 }
jpayne@68 1269 }
jpayne@68 1270 }
jpayne@68 1271
jpayne@68 1272 public boolean release(final long kmer, final long rkmer, final int id){
jpayne@68 1273 return release(toValue(kmer, rkmer), id);
jpayne@68 1274 }
jpayne@68 1275
jpayne@68 1276 public boolean release(final long key, final int id){
jpayne@68 1277 final int way=kmerToWay(key);
jpayne@68 1278 final AbstractKmerTable table=tables[way];
jpayne@68 1279 final int count=table.getValue(key);
jpayne@68 1280 // if(verbose /*|| true*/){outstream.println("Count="+count+".");}
jpayne@68 1281 if(count<1){return true;}
jpayne@68 1282 return table.clearOwner(key, id);
jpayne@68 1283 }
jpayne@68 1284
jpayne@68 1285 public int findOwner(ByteBuilder bb, final int id){
jpayne@68 1286 return findOwner(bb.array, bb.length(), id);
jpayne@68 1287 }
jpayne@68 1288
jpayne@68 1289 public int findOwner(final byte[] bases, final int blength, final int id){
jpayne@68 1290 final int shift=2*k;
jpayne@68 1291 final int shift2=shift-2;
jpayne@68 1292 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
jpayne@68 1293 long kmer=0, rkmer=0;
jpayne@68 1294 int len=0;
jpayne@68 1295 int maxOwner=-1;
jpayne@68 1296 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
jpayne@68 1297 for(int i=0; i<blength; i++){
jpayne@68 1298 final byte b=bases[i];
jpayne@68 1299 final long x=AminoAcid.baseToNumber[b];
jpayne@68 1300 final long x2=AminoAcid.baseToComplementNumber[b];
jpayne@68 1301 kmer=((kmer<<2)|x)&mask;
jpayne@68 1302 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
jpayne@68 1303 if(x<0){
jpayne@68 1304 len=0;
jpayne@68 1305 kmer=rkmer=0;
jpayne@68 1306 }else{len++;}
jpayne@68 1307 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
jpayne@68 1308 if(len>=k){
jpayne@68 1309 int owner=findOwner(kmer, rkmer);
jpayne@68 1310 maxOwner=Tools.max(owner, maxOwner);
jpayne@68 1311 if(maxOwner>id){break;}
jpayne@68 1312 }
jpayne@68 1313 }
jpayne@68 1314 return maxOwner;
jpayne@68 1315 }
jpayne@68 1316
jpayne@68 1317 public int findOwner(final long kmer){
jpayne@68 1318 return findOwner(kmer, rcomp(kmer));
jpayne@68 1319 }
jpayne@68 1320
jpayne@68 1321 public int findOwner(final long kmer, final long rkmer){
jpayne@68 1322 final long key=toValue(kmer, rkmer);
jpayne@68 1323 final int way=kmerToWay(key);
jpayne@68 1324 final AbstractKmerTable table=tables[way];
jpayne@68 1325 final int count=table.getValue(key);
jpayne@68 1326 if(count<0){return -1;}
jpayne@68 1327 final int owner=table.getOwner(key);
jpayne@68 1328 return owner;
jpayne@68 1329 }
jpayne@68 1330
jpayne@68 1331 public int getCount(long kmer, long rkmer){
jpayne@68 1332 long key=toValue(kmer, rkmer);
jpayne@68 1333 int way=kmerToWay(key);
jpayne@68 1334 return tables[way].getValue(key);
jpayne@68 1335 }
jpayne@68 1336
jpayne@68 1337 public int getCount(long key){
jpayne@68 1338 int way=kmerToWay(key);
jpayne@68 1339 return tables[way].getValue(key);
jpayne@68 1340 }
jpayne@68 1341
jpayne@68 1342 /*--------------------------------------------------------------*/
jpayne@68 1343 /*---------------- Fill Counts ----------------*/
jpayne@68 1344 /*--------------------------------------------------------------*/
jpayne@68 1345
jpayne@68 1346 public int fillRightCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){
jpayne@68 1347 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){
jpayne@68 1348 return fillRightCounts_fast(kmer, rkmer, counts, mask, shift2);
jpayne@68 1349 }else{
jpayne@68 1350 return fillRightCounts_safe(kmer, rkmer, counts, mask, shift2);
jpayne@68 1351 }
jpayne@68 1352 }
jpayne@68 1353
jpayne@68 1354 public int fillRightCounts_fast(final long kmer0, final long rkmer0, int[] counts,
jpayne@68 1355 long mask, int shift2){
jpayne@68 1356 // assert((k&1)==1) : k;
jpayne@68 1357 assert(MASK_CORE);
jpayne@68 1358 final long kmer=(kmer0<<2)&mask;
jpayne@68 1359 final long rkmer=(rkmer0>>>2);
jpayne@68 1360 final long a=kmer&coreMask, b=rkmer&coreMask;
jpayne@68 1361
jpayne@68 1362 int max=-1, maxPos=0;
jpayne@68 1363 if(a==b){
jpayne@68 1364 return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
jpayne@68 1365 }else if(a>b){
jpayne@68 1366 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
jpayne@68 1367 final long core=a;
jpayne@68 1368 final int way=kmerToWay(core);
jpayne@68 1369 // final AbstractKmerTable table=tables[way];
jpayne@68 1370 final HashArray table=(HashArray) tables[way];
jpayne@68 1371 final int cell=table.kmerToCell(kmer);
jpayne@68 1372 for(int i=0; i<=3; i++){
jpayne@68 1373 final long key=kmer|i;
jpayne@68 1374 final int count=Tools.max(0, table.getValue(key, cell));
jpayne@68 1375 // final int count=Tools.max(0, table.getValue(key));
jpayne@68 1376 counts[i]=count;
jpayne@68 1377 if(count>max){
jpayne@68 1378 max=count;
jpayne@68 1379 maxPos=i;
jpayne@68 1380 }
jpayne@68 1381 }
jpayne@68 1382 }else{
jpayne@68 1383 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
jpayne@68 1384 //Use a higher increment for the left bits
jpayne@68 1385 final long core=b;
jpayne@68 1386 final int way=kmerToWay(core);
jpayne@68 1387 // final AbstractKmerTable table=tables[way];
jpayne@68 1388 final HashArray table=(HashArray) tables[way];
jpayne@68 1389 final int cell=table.kmerToCell(rkmer);
jpayne@68 1390 final long incr=1L<<(2*(k-1));
jpayne@68 1391 long j=3*incr;
jpayne@68 1392 for(int i=0; i<=3; i++, j-=incr){
jpayne@68 1393 final long key=rkmer|j;
jpayne@68 1394 final int count=Tools.max(0, table.getValue(key, cell));
jpayne@68 1395 // final int count=Tools.max(0, table.getValue(key));
jpayne@68 1396 counts[i]=count;
jpayne@68 1397 if(count>max){
jpayne@68 1398 max=count;
jpayne@68 1399 maxPos=i;
jpayne@68 1400 }
jpayne@68 1401 }
jpayne@68 1402 }
jpayne@68 1403 return maxPos;
jpayne@68 1404 }
jpayne@68 1405
jpayne@68 1406 //TODO: Change this to take advantage of coreMask
jpayne@68 1407 //Requires special handling of core palindromes.
jpayne@68 1408 //Thus it would be easiest to just handle odd kmers, and K is normally 31 anyway.
jpayne@68 1409 public int fillRightCounts_safe(long kmer, long rkmer, int[] counts, long mask, int shift2){
jpayne@68 1410 assert(kmer==rcomp(rkmer));
jpayne@68 1411 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));}
jpayne@68 1412 kmer=(kmer<<2)&mask;
jpayne@68 1413 rkmer=(rkmer>>>2);
jpayne@68 1414 int max=-1, maxPos=0;
jpayne@68 1415
jpayne@68 1416 for(int i=0; i<=3; i++){
jpayne@68 1417 long kmer2=kmer|((long)i);
jpayne@68 1418 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2);
jpayne@68 1419 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
jpayne@68 1420 assert(kmer2==(kmer2&mask));
jpayne@68 1421 assert(rkmer2==(rkmer2&mask));
jpayne@68 1422 assert(kmer2==rcomp(rkmer2));
jpayne@68 1423 long key=toValue(kmer2, rkmer2);
jpayne@68 1424 int way=kmerToWay(key);
jpayne@68 1425 int count=tables[way].getValue(key);
jpayne@68 1426 assert(count==NOT_PRESENT || count>=0);
jpayne@68 1427 count=Tools.max(count, 0);
jpayne@68 1428 counts[i]=count;
jpayne@68 1429 if(count>max){
jpayne@68 1430 max=count;
jpayne@68 1431 maxPos=i;
jpayne@68 1432 }
jpayne@68 1433 }
jpayne@68 1434 return maxPos;
jpayne@68 1435 }
jpayne@68 1436
jpayne@68 1437 /** For KmerCompressor. */
jpayne@68 1438 public int fillRightCountsRcompOnly(long kmer, long rkmer, int[] counts, long mask, int shift2){
jpayne@68 1439 assert(kmer==rcomp(rkmer));
jpayne@68 1440 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));}
jpayne@68 1441 kmer=(kmer<<2)&mask;
jpayne@68 1442 rkmer=(rkmer>>>2);
jpayne@68 1443 int max=-1, maxPos=0;
jpayne@68 1444
jpayne@68 1445 for(int i=0; i<=3; i++){
jpayne@68 1446 long kmer2=kmer|((long)i);
jpayne@68 1447 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2);
jpayne@68 1448 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
jpayne@68 1449 assert(kmer2==(kmer2&mask));
jpayne@68 1450 assert(rkmer2==(rkmer2&mask));
jpayne@68 1451 assert(kmer2==rcomp(rkmer2));
jpayne@68 1452 long key=rkmer2;
jpayne@68 1453 int way=kmerToWay(key);
jpayne@68 1454 int count=tables[way].getValue(key);
jpayne@68 1455 assert(count==NOT_PRESENT || count>=0);
jpayne@68 1456 count=Tools.max(count, 0);
jpayne@68 1457 counts[i]=count;
jpayne@68 1458 if(count>max){
jpayne@68 1459 max=count;
jpayne@68 1460 maxPos=i;
jpayne@68 1461 }
jpayne@68 1462 }
jpayne@68 1463 return maxPos;
jpayne@68 1464 }
jpayne@68 1465
jpayne@68 1466 public int fillLeftCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){
jpayne@68 1467 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){
jpayne@68 1468 return fillLeftCounts_fast(kmer, rkmer, counts, mask, shift2);
jpayne@68 1469 }else{
jpayne@68 1470 return fillLeftCounts_safe(kmer, rkmer, counts, mask, shift2);
jpayne@68 1471 }
jpayne@68 1472 }
jpayne@68 1473
jpayne@68 1474 public int fillLeftCounts_fast(final long kmer0, final long rkmer0, int[] counts,
jpayne@68 1475 long mask, int shift2){
jpayne@68 1476 // assert((k&1)==1) : k;
jpayne@68 1477 assert(MASK_CORE);
jpayne@68 1478 final long rkmer=(rkmer0<<2)&mask;
jpayne@68 1479 final long kmer=(kmer0>>>2);
jpayne@68 1480 final long a=kmer&coreMask, b=rkmer&coreMask;
jpayne@68 1481
jpayne@68 1482 int max=-1, maxPos=0;
jpayne@68 1483 if(a==b){
jpayne@68 1484 return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
jpayne@68 1485 }else if(a>b){
jpayne@68 1486 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
jpayne@68 1487
jpayne@68 1488 final long core=a;
jpayne@68 1489 final int way=kmerToWay(core);
jpayne@68 1490 // final AbstractKmerTable table=tables[way];
jpayne@68 1491 final HashArray table=(HashArray) tables[way];
jpayne@68 1492 final int cell=table.kmerToCell(kmer);
jpayne@68 1493 final long incr=1L<<(2*(k-1));
jpayne@68 1494 long j=0;//Must be declared as a long, outside of loop
jpayne@68 1495 for(int i=0; i<=3; i++, j+=incr){
jpayne@68 1496 // final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]);
jpayne@68 1497 // final long kmer2=kmer|(((long)i)<<shift2);
jpayne@68 1498 // final long key2=toValue(rkmer2, kmer2);
jpayne@68 1499 // int way2=kmerToWay(key2);
jpayne@68 1500 // int count2=tables[way2].getValue(key2);
jpayne@68 1501 // count2=Tools.max(count2, 0);
jpayne@68 1502
jpayne@68 1503 final long key=kmer|j;
jpayne@68 1504 final int count=Tools.max(0, table.getValue(key, cell));
jpayne@68 1505 // int count=Tools.max(0, table.getValue(key));
jpayne@68 1506
jpayne@68 1507 // assert(way==way2) : i+", "+way+", "+way2;
jpayne@68 1508 // assert(key==key2) : i+", "+Long.toBinaryString(kmer)+", "+
jpayne@68 1509 // Long.toBinaryString(key)+", "+Long.toBinaryString(key2)+
jpayne@68 1510 // ", "+Long.toBinaryString(j)+", "+Long.toBinaryString(incr);
jpayne@68 1511 // assert(count==count2) : i+", "+count+", "+count2;
jpayne@68 1512
jpayne@68 1513 counts[i]=count;
jpayne@68 1514 if(count>max){
jpayne@68 1515 max=count;
jpayne@68 1516 maxPos=i;
jpayne@68 1517 }
jpayne@68 1518 }
jpayne@68 1519 return maxPos;
jpayne@68 1520 }else{
jpayne@68 1521 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
jpayne@68 1522 final long core=b;
jpayne@68 1523 final int way=kmerToWay(core);
jpayne@68 1524 // final AbstractKmerTable table=tables[way];
jpayne@68 1525 final HashArray table=(HashArray) tables[way];
jpayne@68 1526 final int cell=table.kmerToCell(rkmer);
jpayne@68 1527 for(int i=0, j=3; i<=3; i++, j--){
jpayne@68 1528 final long key=rkmer|j;
jpayne@68 1529 final int count=Tools.max(0, table.getValue(key, cell));
jpayne@68 1530 // final int count=Tools.max(0, table.getValue(key));
jpayne@68 1531
jpayne@68 1532 counts[i]=count;
jpayne@68 1533 if(count>max){
jpayne@68 1534 max=count;
jpayne@68 1535 maxPos=i;
jpayne@68 1536 }
jpayne@68 1537 }
jpayne@68 1538 return maxPos;
jpayne@68 1539 }
jpayne@68 1540 }
jpayne@68 1541
jpayne@68 1542 public int fillLeftCounts_safe(final long kmer0, final long rkmer0, int[] counts, long mask, int shift2){
jpayne@68 1543 assert(kmer0==rcomp(rkmer0));
jpayne@68 1544 if(verbose){outstream.println("fillLeftCounts: "+toText(kmer0)+", "+toText(rkmer0));}
jpayne@68 1545 long rkmer=(rkmer0<<2)&mask;
jpayne@68 1546 long kmer=(kmer0>>>2);
jpayne@68 1547 int max=-1, maxPos=0;
jpayne@68 1548 // assert(false) : shift2+", "+k;
jpayne@68 1549 for(int i=0; i<=3; i++){
jpayne@68 1550 final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]);
jpayne@68 1551 final long kmer2=kmer|(((long)i)<<shift2);
jpayne@68 1552 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
jpayne@68 1553 assert(kmer2==(kmer2&mask));
jpayne@68 1554 assert(rkmer2==(rkmer2&mask));
jpayne@68 1555 assert(kmer2==rcomp(rkmer2)) : "\n"+"kmer: \t"+toText(rcomp(rkmer2))+", "+toText(rcomp(kmer2));
jpayne@68 1556 long key=toValue(rkmer2, kmer2);
jpayne@68 1557 int way=kmerToWay(key);
jpayne@68 1558 int count=tables[way].getValue(key);
jpayne@68 1559 assert(count==NOT_PRESENT || count>=0);
jpayne@68 1560 count=Tools.max(count, 0);
jpayne@68 1561 counts[i]=count;
jpayne@68 1562 if(count>max){
jpayne@68 1563 max=count;
jpayne@68 1564 maxPos=i;
jpayne@68 1565 }
jpayne@68 1566 }
jpayne@68 1567 return maxPos;
jpayne@68 1568 }
jpayne@68 1569
jpayne@68 1570 /*--------------------------------------------------------------*/
jpayne@68 1571 /*---------------- Printing Methods ----------------*/
jpayne@68 1572 /*--------------------------------------------------------------*/
jpayne@68 1573
jpayne@68 1574 @Override
jpayne@68 1575 public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
jpayne@68 1576 if(fname==null){return false;}
jpayne@68 1577 Timer t=new Timer();
jpayne@68 1578
jpayne@68 1579 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
jpayne@68 1580 bsw.start();
jpayne@68 1581 for(AbstractKmerTable set : tables){
jpayne@68 1582 set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining);
jpayne@68 1583 }
jpayne@68 1584 bsw.poisonAndWait();
jpayne@68 1585
jpayne@68 1586 t.stop();
jpayne@68 1587 if(printTime){outstream.println("Kmer Dump Time: \t"+t);}
jpayne@68 1588 return bsw.errorState;
jpayne@68 1589 }
jpayne@68 1590
jpayne@68 1591 @Override
jpayne@68 1592 public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
jpayne@68 1593 final int threads=Tools.min(Shared.threads(), tables.length);
jpayne@68 1594 if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);}
jpayne@68 1595
jpayne@68 1596 if(fname==null){return false;}
jpayne@68 1597 Timer t=new Timer();
jpayne@68 1598
jpayne@68 1599 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
jpayne@68 1600 bsw.start();
jpayne@68 1601 DumpThread.dump(k, mincount, maxcount, tables, bsw, remaining);
jpayne@68 1602 bsw.poisonAndWait();
jpayne@68 1603
jpayne@68 1604 t.stop();
jpayne@68 1605 if(printTime){outstream.println("Kmer Dump Time: \t"+t);}
jpayne@68 1606 return bsw.errorState;
jpayne@68 1607 }
jpayne@68 1608
jpayne@68 1609 /*--------------------------------------------------------------*/
jpayne@68 1610 /*---------------- Recall Methods ----------------*/
jpayne@68 1611 /*--------------------------------------------------------------*/
jpayne@68 1612
jpayne@68 1613 private final long rcomp(long kmer){return AminoAcid.reverseComplementBinaryFast(kmer, k);}
jpayne@68 1614 private final StringBuilder toText(long kmer){return AbstractKmerTable.toText(kmer, k);}
jpayne@68 1615
jpayne@68 1616 /*--------------------------------------------------------------*/
jpayne@68 1617 /*---------------- Static Methods ----------------*/
jpayne@68 1618 /*--------------------------------------------------------------*/
jpayne@68 1619
jpayne@68 1620 /**
jpayne@68 1621 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined.
jpayne@68 1622 * @param kmer Forward kmer
jpayne@68 1623 * @param rkmer Reverse kmer
jpayne@68 1624 * @return Canonical value
jpayne@68 1625 */
jpayne@68 1626 public final long toValue(long kmer, long rkmer){
jpayne@68 1627 // long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
jpayne@68 1628 // return value;
jpayne@68 1629 if(!rcomp){return kmer;}
jpayne@68 1630 final long a=kmer&coreMask, b=rkmer&coreMask;
jpayne@68 1631 return a>b ? kmer : a<b ? rkmer : Tools.max(kmer, rkmer);
jpayne@68 1632 }
jpayne@68 1633
jpayne@68 1634 /*--------------------------------------------------------------*/
jpayne@68 1635 /*---------------- Final Primitives ----------------*/
jpayne@68 1636 /*--------------------------------------------------------------*/
jpayne@68 1637
jpayne@68 1638 @Override
jpayne@68 1639 public int kbig(){return k;}
jpayne@68 1640 @Override
jpayne@68 1641 public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;}
jpayne@68 1642 @Override
jpayne@68 1643 public boolean ecco(){return ecco;}
jpayne@68 1644 @Override
jpayne@68 1645 public boolean qtrimLeft(){return qtrimLeft;}
jpayne@68 1646 @Override
jpayne@68 1647 public boolean qtrimRight(){return qtrimRight;}
jpayne@68 1648 @Override
jpayne@68 1649 public float minAvgQuality(){return minAvgQuality;}
jpayne@68 1650 @Override
jpayne@68 1651 public long tableMemory(){return tableMemory;}
jpayne@68 1652 @Override
jpayne@68 1653 public long estimatedKmerCapacity(){return estimatedKmerCapacity;}
jpayne@68 1654 @Override
jpayne@68 1655 public int ways(){return ways;}
jpayne@68 1656 @Override
jpayne@68 1657 public boolean rcomp(){return rcomp;}
jpayne@68 1658
jpayne@68 1659 public final int kmerToWay(final long kmer){
jpayne@68 1660 final int way=(int)((kmer&coreMask)%ways);
jpayne@68 1661 return way;
jpayne@68 1662 }
jpayne@68 1663
jpayne@68 1664 /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in tables[Y] */
jpayne@68 1665 private AbstractKmerTable[] tables;
jpayne@68 1666
jpayne@68 1667 public AbstractKmerTable[] tables(){return tables;}
jpayne@68 1668
jpayne@68 1669 public long filterMemoryOverride=0;
jpayne@68 1670
jpayne@68 1671 public final int tableType; //AbstractKmerTable.ARRAY1D;
jpayne@68 1672
jpayne@68 1673 private final int bytesPerKmer;
jpayne@68 1674
jpayne@68 1675 private final long usableMemory;
jpayne@68 1676 private final long filterMemory0;
jpayne@68 1677 private final long filterMemory1;
jpayne@68 1678 private final long tableMemory;
jpayne@68 1679 private final long estimatedKmerCapacity;
jpayne@68 1680
jpayne@68 1681 /** Number of tables (and threads, during loading) */
jpayne@68 1682 private final boolean prealloc;
jpayne@68 1683
jpayne@68 1684 /** Number of tables (and threads, during loading) */
jpayne@68 1685 public final int ways;
jpayne@68 1686
jpayne@68 1687 /** Normal kmer length */
jpayne@68 1688 public final int k;
jpayne@68 1689 /** k-1; used in some expressions */
jpayne@68 1690 public final int k2;
jpayne@68 1691
jpayne@68 1692 public final long coreMask;
jpayne@68 1693
jpayne@68 1694 /** Look for reverse-complements as well as forward kmers. Default: true */
jpayne@68 1695 private final boolean rcomp;
jpayne@68 1696
jpayne@68 1697 /** Quality-trim the left side */
jpayne@68 1698 public final boolean qtrimLeft;
jpayne@68 1699 /** Quality-trim the right side */
jpayne@68 1700 public final boolean qtrimRight;
jpayne@68 1701 /** Trim bases at this quality or below. Default: 4 */
jpayne@68 1702 public final float trimq;
jpayne@68 1703 /** Error rate for trimming (derived from trimq) */
jpayne@68 1704 private final float trimE;
jpayne@68 1705
jpayne@68 1706 /** Throw away reads below this average quality after trimming. Default: 0 */
jpayne@68 1707 public final float minAvgQuality;
jpayne@68 1708 /** If positive, calculate average quality from the first X bases only. Default: 0 */
jpayne@68 1709 public final int minAvgQualityBases;
jpayne@68 1710
jpayne@68 1711 /** Ignore kmers with probability of correctness less than this */
jpayne@68 1712 public final float minProb;
jpayne@68 1713
jpayne@68 1714 /** Correct via overlap */
jpayne@68 1715 private final boolean ecco;
jpayne@68 1716
jpayne@68 1717 /** Attempt to merge via overlap prior to counting kmers */
jpayne@68 1718 private final boolean merge;
jpayne@68 1719
jpayne@68 1720 /*--------------------------------------------------------------*/
jpayne@68 1721 /*---------------- Walker ----------------*/
jpayne@68 1722 /*--------------------------------------------------------------*/
jpayne@68 1723
jpayne@68 1724 public Walker walk(){
jpayne@68 1725 return new WalkerKST();
jpayne@68 1726 }
jpayne@68 1727
jpayne@68 1728 public class WalkerKST extends Walker {
jpayne@68 1729
jpayne@68 1730 WalkerKST(){
jpayne@68 1731 w=tables[0].walk();
jpayne@68 1732 }
jpayne@68 1733
jpayne@68 1734 /**
jpayne@68 1735 * Fills this object with the next key and value.
jpayne@68 1736 * @return True if successful.
jpayne@68 1737 */
jpayne@68 1738 public boolean next(){
jpayne@68 1739 if(w==null){return false;}
jpayne@68 1740 if(w.next()){return true;}
jpayne@68 1741 if(tnum<tables.length){tnum++;}
jpayne@68 1742 w=(tnum<tables.length ? tables[tnum].walk() : null);
jpayne@68 1743 return w==null ? false : w.next();
jpayne@68 1744 }
jpayne@68 1745
jpayne@68 1746 public long kmer(){return w.kmer();}
jpayne@68 1747 public int value(){return w.value();}
jpayne@68 1748
jpayne@68 1749 private Walker w=null;
jpayne@68 1750
jpayne@68 1751 /** current table number */
jpayne@68 1752 private int tnum=0;
jpayne@68 1753 }
jpayne@68 1754
jpayne@68 1755 }