comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SketchObject.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.io.PrintStream;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collections;
7 import java.util.Random;
8
9 import aligner.SingleStateAlignerFlat2;
10 import aligner.SingleStateAlignerFlatFloat;
11 import dna.AminoAcid;
12 import dna.Data;
13 import prok.GeneCaller;
14 import prok.GeneModel;
15 import prok.GeneModelParser;
16 import prok.ProkObject;
17 import shared.Parse;
18 import shared.Shared;
19 import shared.Timer;
20 import shared.Tools;
21 import stream.Read;
22 import structures.EntropyTracker;
23 import structures.IntList3;
24 import tax.TaxTree;
25
26 public class SketchObject {
27
28 /*--------------------------------------------------------------*/
29 /*---------------- Parsing ----------------*/
30 /*--------------------------------------------------------------*/
31
32 public static void main(String[] args){
33 Timer t=new Timer();
34 for(int i=0; i<1000000; i++){
35 wkidToAniExact(0.5, 32, 23, 0.000005);
36 }
37 t.stop();
38 System.out.println(t);
39 }
40
41 public static boolean parseSketchFlags(String arg, String a, String b){
42
43 if(parseCoding(arg, a, b)){
44 //
45 }
46
47 else if(a.equalsIgnoreCase("k")){
48 if(b.indexOf(',')>=0){
49 String[] split=b.split(",");
50 assert(split.length==2) : "Bad argument "+arg;
51 int x=Integer.parseInt(split[0]);
52 int y=Integer.parseInt(split[1]);
53 k=Tools.max(x, y);
54 k2=Tools.min(x, y);
55 if(k==k2){k2=0;}
56 }else{
57 k=Integer.parseInt(b);
58 k2=0;
59 }
60 setK=true;
61 }else if(a.equalsIgnoreCase("rcomp")){
62 rcomp=Parse.parseBoolean(b);
63 }else if(a.equalsIgnoreCase("minfakeid")){
64 minFakeID=Integer.parseInt(b);
65 }else if(a.equalsIgnoreCase("hashNames")){
66 hashNames=Parse.parseBoolean(b);
67 }else if(a.equalsIgnoreCase("preferSSUMap")){
68 preferSSUMap=Parse.parseBoolean(b);
69 }else if(a.equalsIgnoreCase("preferSSUMapForEuks") || a.equalsIgnoreCase("preferSSUMapEuks")){
70 preferSSUMapForEuks=Parse.parseBoolean(b);
71 }else if(a.equalsIgnoreCase("useSSUMapOnly")){
72 useSSUMapOnly=Parse.parseBoolean(b);
73 }else if(a.equalsIgnoreCase("useSSUMapOnlyForEuks") || a.equalsIgnoreCase("SSUMapOnlyEuks")){
74 useSSUMapOnlyForEuks=Parse.parseBoolean(b);
75 }else if(a.equalsIgnoreCase("skipNonCanonical")){
76 skipNonCanonical=Parse.parseBoolean(b);
77 }else if(a.equalsIgnoreCase("useSizeEstimateInScore") || a.equalsIgnoreCase("useSizeEstimate")){
78 useSizeEstimate=Parse.parseBoolean(b);
79 }else if(a.equalsIgnoreCase("blacklist") || a.equalsIgnoreCase("bl")){
80 blacklist=b;
81 }else if(a.equalsIgnoreCase("whitelist") || a.equalsIgnoreCase("usewhitelist")){
82 useWhitelist=Parse.parseBoolean(b);
83 }
84
85 else if(a.equalsIgnoreCase("sampleseed")){
86 sampleseed=Long.parseLong(b);
87 }
88
89 else if(a.equalsIgnoreCase("size") || a.equalsIgnoreCase("sketchsize")){
90 if("auto".equalsIgnoreCase(b)){
91 AUTOSIZE=true;
92 AUTOSIZE_LINEAR=false;
93 SET_AUTOSIZE=true;
94 }else if("linear".equalsIgnoreCase(b)){
95 AUTOSIZE=false;
96 AUTOSIZE_LINEAR=true;
97 SET_AUTOSIZE=true;
98 }else{
99 AUTOSIZE=false;
100 AUTOSIZE_LINEAR=false;
101 targetSketchSize=Parse.parseIntKMG(b);
102 SET_TARGET_SIZE=true;
103 }
104 }else if(a.equalsIgnoreCase("maxfraction") || a.equalsIgnoreCase("maxgenomefraction") || a.equalsIgnoreCase("mgf")){
105 maxGenomeFraction=Float.parseFloat(b);
106 }else if(a.equalsIgnoreCase("smallsketchsize")){
107 smallSketchSize=Integer.parseInt(b);
108 }else if(a.equalsIgnoreCase("minSketchSize")){
109 minSketchSize=Tools.max(1, Integer.parseInt(b));
110 }else if(a.equalsIgnoreCase("autosize")){
111 AUTOSIZE=Parse.parseBoolean(b);
112 if(AUTOSIZE){AUTOSIZE_LINEAR=false;}
113 SET_AUTOSIZE=true;
114 }else if(a.equalsIgnoreCase("autosizefactor") || a.equalsIgnoreCase("autosizefraction") || a.equalsIgnoreCase("autosizemult") || a.equalsIgnoreCase("sizemult")){
115 AUTOSIZE_FACTOR=Float.parseFloat(b);
116 SET_AUTOSIZE_FACTOR=true;
117 AUTOSIZE_LINEAR=false;
118 SET_AUTOSIZE=true;
119 }else if(a.equalsIgnoreCase("linear") || a.equalsIgnoreCase("lineardensity") || a.equalsIgnoreCase("density")){
120 if(Tools.isNumeric(b)){
121 AUTOSIZE_LINEAR_DENSITY=Double.parseDouble(b);
122 AUTOSIZE_LINEAR=true;
123 AUTOSIZE=false;
124 SET_AUTOSIZE=true;
125 }else{
126 AUTOSIZE_LINEAR=Parse.parseBoolean(b);
127 if(AUTOSIZE_LINEAR){AUTOSIZE=false;}
128 SET_AUTOSIZE=true;
129 }
130 }else if(a.equalsIgnoreCase("maxGenomeFractionSmall")){
131 maxGenomeFractionSmall=Float.parseFloat(b);
132 }else if(a.equalsIgnoreCase("keyFraction")){
133 double d=Double.parseDouble(b);
134 setKeyFraction(d);
135 }else if(a.equalsIgnoreCase("prealloc")){
136 if(b==null || Character.isLetter(b.charAt(0))){
137 if(Parse.parseBoolean(b)){
138 prealloc=1.0f;
139 }else{
140 prealloc=0;
141 }
142 }else{
143 prealloc=Float.parseFloat(b);
144 }
145 }
146
147 else if(a.equalsIgnoreCase("intmap")){
148 SketchIndex.useIntMap=Parse.parseBoolean(b);
149 }else if(a.equalsIgnoreCase("intmapsize")){
150 SketchIndex.intMapSize=Parse.parseIntKMG(b);
151 }else if(a.equalsIgnoreCase("bitsetbits")){
152 assert(false) : "bitsetbits should be 2.";
153 // bitSetBits=Integer.parseInt(b);
154 }
155
156 // else if(a.equalsIgnoreCase("minkmercount") || a.equalsIgnoreCase("minkeycount")){
157 // minKeyOccuranceCount=Integer.parseInt(b);
158 // }
159 else if(a.equalsIgnoreCase("sketchHeapFactor")){
160 sketchHeapFactor=Tools.max(1.0, Double.parseDouble(b));
161 }
162
163 else if(a.equalsIgnoreCase("killok")){
164 KILL_OK=Parse.parseBoolean(b);
165 }else if(a.equalsIgnoreCase("ssa") || a.equalsIgnoreCase("usessa")){
166 useSSA=Parse.parseBoolean(b);
167 }else if(a.equalsIgnoreCase("ssa3") || a.equalsIgnoreCase("usessa3")){
168 useSSA3=Parse.parseBoolean(b);
169 }else if(a.equalsIgnoreCase("exactani")){
170 EXACT_ANI=Parse.parseBoolean(b);
171 }else if(a.equalsIgnoreCase("translate") || a.equalsIgnoreCase("callgenes")){
172 translate=Parse.parseBoolean(b);
173 defaultParams.translate=translate;
174 }else if(a.equalsIgnoreCase("sixframes") || a.equalsIgnoreCase("6frames")){
175 sixframes=Parse.parseBoolean(b);
176 defaultParams.sixframes=sixframes;
177 if(sixframes){
178 translate=defaultParams.translate=true;
179 }
180 }else if(a.equalsIgnoreCase("processSSU") || a.equalsIgnoreCase("process16S") || a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("SSU")){
181 processSSU=Parse.parseBoolean(b);
182 }else if(a.equalsIgnoreCase("hashSeed")){
183 hashSeed=Long.parseLong(b);
184 //remakeCodes(hashSeed); //Happens in postParse
185 }
186
187 else if(a.equalsIgnoreCase("forceDisableMultithreadedFastq")){
188 forceDisableMultithreadedFastq=Parse.parseBoolean(b);
189 }
190
191 else if(a.equalsIgnoreCase("verifyentropy")){
192 EntropyTracker.verify=Parse.parseBoolean(b);
193 }else if(a.equalsIgnoreCase("entropyK")){
194 entropyK=Integer.parseInt(b);
195 }else if(a.equalsIgnoreCase("fastentropy") || a.equalsIgnoreCase("fentropy")){
196 if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.FAST;}
197 }else if(a.equalsIgnoreCase("mediumentropy") || a.equalsIgnoreCase("mentropy")){
198 if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.MEDIUM;}
199 }else if(a.equalsIgnoreCase("slowentropy") || a.equalsIgnoreCase("sentropy")){
200 if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.SLOW;}
201 }else if(a.equalsIgnoreCase("superslowentropy") || a.equalsIgnoreCase("ssentropy")){
202 if(Parse.parseBoolean(b)){EntropyTracker.speed=EntropyTracker.SUPERSLOW;}
203 }else if(a.equalsIgnoreCase("verbose2")){
204 verbose2=Parse.parseBoolean(b);
205 }else if(a.equalsIgnoreCase("loadSketchesFromSketchFile2")){
206 LOADER2=Parse.parseBoolean(b);
207 }
208
209 else if(a.equalsIgnoreCase("useToValue2") || a.equalsIgnoreCase("ToValue2")){
210 useToValue2=Parse.parseBoolean(b);
211 }else if(a.equals("ssu") || a.equals("ssufile")){
212 assert(false) : "ssufile is deprecated; please specify 16Sfile and/or 18Sfile independently";
213 }else if(a.equalsIgnoreCase("16Sfile")/* || a.equalsIgnoreCase("ssufile") || a.equalsIgnoreCase("silvafile")*/){
214 SSUMap.r16SFile=b;
215 if("auto".equalsIgnoreCase(SSUMap.r16SFile)){SSUMap.r16SFile=TaxTree.default16SFile();}
216 assert(SSUMap.r16SMap==null);
217 }else if(a.equalsIgnoreCase("18Sfile")){
218 SSUMap.r18SFile=b;
219 if("auto".equalsIgnoreCase(SSUMap.r18SFile)){SSUMap.r18SFile=TaxTree.default18SFile();}
220 assert(SSUMap.r18SMap==null);
221 }
222
223 // else if(a.equalsIgnoreCase("minHashValue")){
224 // minHashValue=Tools.max(1, Long.parseLong(b));
225 // }
226
227 else{
228 return false;
229 }
230 return true;
231 }
232
233 private static boolean parseCoding(String arg, String a, String b){
234 if(a.equalsIgnoreCase("deltaout") || a.equalsIgnoreCase("delta")){
235 deltaOut=Parse.parseBoolean(b);
236 }else if(a.equalsIgnoreCase("a33") || a.equalsIgnoreCase("a48")){
237 boolean x=Parse.parseBoolean(b);
238 if(x){CODING=A48;}
239 else if(CODING==A48){CODING=HEX;}
240 }else if(a.equalsIgnoreCase("hex")){
241 boolean x=Parse.parseBoolean(b);
242 if(x){CODING=HEX;}
243 else if(CODING==HEX){CODING=A48;}
244 }else if(a.equalsIgnoreCase("raw")){
245 boolean x=Parse.parseBoolean(b);
246 if(x){CODING=RAW;}
247 else if(CODING==RAW){CODING=A48;}
248 }else{
249 return false;
250 }
251 return true;
252 }
253
254 static int parseMode(String[] args){
255 int mode=defaultParams.mode;
256 for(String arg : args){
257 String[] split=arg.split("=");
258 String a=split[0].toLowerCase();
259 String b=split.length>1 ? split[1] : null;
260 int x=parseMode(arg, a, b);
261 if(x>-1){mode=x;}
262 }
263 return mode;
264 }
265
266 static int parseMode(String arg, String a, String b){
267 int mode_=-1;
268 if(a.equalsIgnoreCase("mode")){
269 assert(b!=null) : "Bad parameter: "+arg;
270 if(Tools.isDigit(b.charAt(0))){
271 mode_=Integer.parseInt(b);
272 }else if(b.equalsIgnoreCase("single") || b.equalsIgnoreCase("onesketch")){
273 mode_=ONE_SKETCH;
274 }else if(b.equalsIgnoreCase("taxa") || b.equalsIgnoreCase("pertaxa")){
275 mode_=PER_TAXA;
276 }else if(b.equalsIgnoreCase("sequence") || b.equalsIgnoreCase("persequence") || b.equalsIgnoreCase("header") || b.equalsIgnoreCase("perheader")){
277 mode_=PER_SEQUENCE;
278 // }else if(b.equalsIgnoreCase("header") || b.equalsIgnoreCase("perheader")){
279 // mode_=PER_HEADER;
280 }else if(b.equalsIgnoreCase("img")){
281 mode_=PER_IMG;
282 }else if(b.equalsIgnoreCase("perfile") || b.equalsIgnoreCase("file")){
283 mode_=PER_FILE;
284 }else{
285 assert(false) : "Bad parameter: "+arg;
286 }
287 }else if(a.equalsIgnoreCase("single") || a.equalsIgnoreCase("onesketch")){
288 mode_=ONE_SKETCH;
289 }else if(a.equalsIgnoreCase("taxa") || a.equalsIgnoreCase("pertaxa")){
290 mode_=PER_TAXA;
291 }else if(a.equalsIgnoreCase("sequence") || a.equalsIgnoreCase("persequence") || a.equalsIgnoreCase("header") || a.equalsIgnoreCase("perheader")){
292 mode_=PER_SEQUENCE;
293 // }else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("perheader")){
294 // mode_=PER_HEADER;
295 }else if(a.equalsIgnoreCase("perfile") || a.equalsIgnoreCase("file")){
296 mode_=PER_FILE;
297 }
298 return mode_;
299 }
300
301 /*--------------------------------------------------------------*/
302 /*---------------- Initialization ----------------*/
303 /*--------------------------------------------------------------*/
304
305 static synchronized void setTaxtree(String taxTreeFile, PrintStream outstream){
306 if(taxTreeFile==null){
307 taxtree=null;
308 return;
309 }
310 if(treefile!=null){
311 assert(!treefile.equals(taxTreeFile));
312 if(treefile.equals(taxTreeFile)){return;}
313 treefile=taxTreeFile;
314 }
315 taxtree=TaxTree.loadTaxTree(taxTreeFile, outstream, hashNames, false);
316 }
317
318 public static void reset(){
319 postparsed=false;
320 blacklist=null;
321 useWhitelist=false;
322 defaultParams=new DisplayParams();
323 amino=false;
324 translate=false;
325 sixframes=false;
326 }
327
328 public static void postParse(){
329 if(postparsed){return;}
330 postparsed=true;
331 IntList3.defaultMode=IntList3.UNIQUE; //Not really safe, if Seal uses Sketch...
332
333 if(defaultParams.amino()){amino=true;}
334 if(amino){Shared.AMINO_IN=true;}
335
336 if(amino8){
337 AminoAcid.AMINO_SHIFT=3;
338 System.err.println("Set AMINO_SHIFT to "+AminoAcid.AMINO_SHIFT);
339 }
340
341 if(amino || translate){
342 rcomp=false;
343 if(k>12){
344 int oldk=k;
345 int oldk2=k2;
346 // k=Tools.min(k, 63/AminoAcid.AMINO_SHIFT);
347 // k2=Tools.min(k2, k);
348 k=defaultKAmino;
349 k2=defaultK2Amino;
350 if(k==k2){k2=0;}
351 if(k!=oldk || k2!=oldk2){System.err.println("Set k to "+k+(k2>0 ? ","+k2 : ""));}
352 }
353 // AUTOSIZE_FACTOR=(AUTOSIZE_FACTOR*defaultK)/k;
354 // System.err.println("Set AUTOSIZE_FACTOR to "+AUTOSIZE_FACTOR);
355 }
356
357 if(aminoOrTranslate()){
358 bitsPerBase=5;//Maybe it is safe to keep these at 4 and 8 or 5 and 8; need to check.
359 bitsPerCycle=10;
360 }else{
361 bitsPerBase=2;
362 bitsPerCycle=8;
363 }
364 basesPerCycle=bitsPerCycle/bitsPerBase;
365 hashCycles=((k*bitsPerBase)+bitsPerCycle-1)/bitsPerCycle;
366
367 cycleMask=~((-1L)<<bitsPerCycle);
368 maxCycles=(64+bitsPerCycle-1)/bitsPerCycle;
369 codeIncrement=(int)(cycleMask+1);
370 codes=makeCodes(maxCycles, codeIncrement, hashSeed, false);
371 codes1D=makeCodes1D(codes);
372
373 if(k2>0){
374 assert(k2<k) : "k2 must be less than k.";
375 // assert(k2%basesPerCycle==0) : "k2 must be a multiple of "+basesPerCycle; //No longer required! Still recommended for speed though.
376
377 k2mask=~((-1L)<<(bitsPerBase*k2));
378 k2submask=~((-1L)<<(bitsPerBase*(k2%basesPerCycle)));
379 k2shift=(k-k2); //for toValue2
380 k2midmask=(k2mask<<((k2shift*bitsPerBase)/2)); //for toValue2
381 hashCycles2=k2/basesPerCycle;
382 }else{
383 useToValue2=false;
384 }
385 codeMax=hashCycles*codeIncrement;
386 codeMax2=hashCycles2*codeIncrement;
387
388 // hasher=k2<1 ? new Hasher1() : k2submask==0 ? new Hasher2() : new Hasher3();
389
390 if(translate || processSSU){
391 if(pgmFile==null){
392 pgmFile=Data.findPath("?model.pgm");
393 }
394 pgm=GeneModelParser.loadModel(pgmFile);
395 GeneCaller.call16S=processSSU;
396 GeneCaller.call18S=false;//processSSU;
397 GeneCaller.call23S=false;
398 GeneCaller.call5S=false;
399 GeneCaller.calltRNA=false;
400 GeneCaller.callCDS=translate;
401
402 if(GeneCaller.call16S || GeneCaller.call18S || GeneCaller.call23S){
403 ProkObject.loadLongKmers();
404 ProkObject.loadConsensusSequenceFromFile(true, true);
405 }
406 }
407
408 // if(HASH_VERSION<2 && useToValue2){HASH_VERSION=2;}
409 // else if(HASH_VERSION==2 && !useToValue2){HASH_VERSION=1;}
410 // assert(blacklist!=null) : blacklist+", "+(blacklist==null);
411 if(blacklist!=null){Blacklist.addFiles(blacklist);}
412
413 // System.err.println("amino="+amino+", k="+k+", k2="+k2+", bitsPerCycle="+bitsPerCycle+", bitsPerBase="+bitsPerBase+
414 // ", basesPerCycle="+basesPerCycle+", hashCycles="+hashCycles+", k2mask="+k2mask+", k2submask="+k2submask+", hashCycles2="+hashCycles2+
415 // ", codeMax="+codeMax+", codeMax2="+codeMax2);
416 // structures.IntList3.ascending=false;//123 for testing
417
418 alignerPool=new AlignmentThreadPool(Shared.threads());
419 }
420
421 /*--------------------------------------------------------------*/
422 /*---------------- Hashing ----------------*/
423 /*--------------------------------------------------------------*/
424
425 private static void remakeCodes(long hashSeed){
426 long[][] codes2=makeCodes(maxCycles, codeIncrement, hashSeed, false);
427 long[] codes1D2=makeCodes1D(codes2);
428 for(int i=0; i<codes2.length; i++){
429 for(int j=0; j<codes2[i].length; j++){
430 codes[i][j]=codes2[i][j];
431 }
432 }
433 for(int i=0; i<codes1D2.length; i++){
434 codes1D[i]=codes1D2[i];
435 }
436 }
437
438 public static long[][] makeCodes(int symbols, int modes, long seed, boolean positive){
439 Random randy=new Random(seed);
440 long mask=positive ? Long.MAX_VALUE : -1L;
441 long[][] r=new long[symbols][modes];
442 for(int i=0; i<symbols; i++){
443 for(int j=0; j<modes; j++){
444 r[i][j]=randy.nextLong()&mask;
445 }
446 }
447 for(int i=0; i<3; i++) {
448 antialias(r, randy);
449 }
450 return r;
451 }
452
453 private static void antialias(long[][] matrix, Random randy){
454 for(long[] array : matrix){
455 antialias(array, randy);
456 }
457 }
458
459 private static void antialias(long[] array, Random randy){
460 for(int i=0; i<64; i++){
461 antialiasNumbers(array, randy);
462 antialiasBit(array, randy, i);
463 }
464 }
465
466 private static void antialiasBit(long[] array, Random randy, int bit){
467 int half=array.length/2;
468 long ones=0;
469 for(int i=0; i<array.length; i++){
470 ones+=(array[i]>>bit)&1L;
471 }
472 final long orMask=1L<<bit;
473 final long andMask=~orMask;
474 while(ones<half-1){
475 int loc=randy.nextInt(array.length);
476 while((array[loc]&orMask)!=0){
477 loc=randy.nextInt(array.length);
478 }
479 array[loc]|=orMask;
480 ones++;
481 }
482 while(ones>half+1){
483 int loc=randy.nextInt(array.length);
484 while((array[loc]&orMask)==0){
485 loc=randy.nextInt(array.length);
486 }
487 array[loc]&=andMask;
488 ones--;
489 }
490 }
491
492 private static void antialiasNumbers(long[] array, Random randy){
493 for(int i=0; i<array.length; i++){
494 array[i]=antialiasNumber(array[i], randy);
495 }
496 }
497
498 private static long antialiasNumber(long number, Random randy){
499 int ones=Long.bitCount(number);
500 while(Long.bitCount(number)<31){
501 number=number|(1L<<randy.nextInt(64));
502 }
503 while(Long.bitCount(number)>33){
504 number=number&~(1L<<randy.nextInt(64));
505 }
506 return number;
507 }
508
509 // public static long[] makeCodes1D(int symbols, int modes, long seed, boolean positive){
510 // Random randy=Shared.threadLocalRandom(seed);
511 // long mask=positive ? Long.MAX_VALUE : -1L;
512 // long[] r=new long[symbols*modes];
513 // for(int i=0; i<r.length; i++){
514 // r[i]=randy.nextLong()&mask;
515 // }
516 // return r;
517 // }
518
519 public static long[] makeCodes1D(long[][] codes2D){
520 final int len=codes2D.length*codes2D[0].length;
521 long[] codes1D=new long[len];
522 int k=0;
523 for(long[] array : codes2D){
524 for(long x : array){
525 codes1D[k]=x;
526 k++;
527 }
528 }
529 return codes1D;
530 }
531
532 public static final long hash(long kmer){//Avoid using this.
533 return rcomp ? hash(kmer, AminoAcid.reverseComplementBinaryFast(kmer, k)) : hash(kmer, kmer);
534 }
535
536 public static final long hash(long kmer, long rkmer){
537 assert(postparsed);
538 // return hasher.hash_inner(kmer); //Slow
539 // return k2<1 ? hash1(kmer) : hash2(kmer);
540 if(useToValue2){return hashToValue2(kmer, rkmer);}
541 final long key=toValue(kmer, rkmer);
542 return k2<1 ? hash1(key) : k2submask==0 ? hash2(key) : hash3(key);
543 }
544
545 /**
546 * New version.
547 * Generates a hash code from a kmer.
548 * @param kmer Kmer to hash
549 * @return Hash code
550 */
551 private static final long hashToValue2(final long kmer0, final long rkmer0){
552 // return Tools.max(Tools.hash64shift(kmer0), Tools.hash64shift(kmer0&0xFFFFFFFFFFFFL));//Something like this would be around 10% faster; not worth it.
553 long kmer=kmer0, rkmer=rkmer0;
554 final long key;
555 final boolean useK1;
556 if(rcomp){
557 assert(!amino && !translate);
558 final long kmer2=((kmer&k2midmask)>>>k2shift);
559 final long rkmer2=((rkmer&k2midmask)>>>k2shift);
560 final long max2=Tools.max(kmer2, rkmer2);
561 // long cmasked=max2&kmerChoiceMask;
562 // useK1=kmerChoiceBitset.get((int)cmasked);
563 useK1=((max2%4999L)&1L)==0L;
564 key=useK1 ? Tools.max(kmer, rkmer) : max2;
565 // System.err.println("\n"+longer+", "+max2+", "+(max2%4999L)+", "+((max2%4999L)&1L)+
566 // ", "+Long.toHexString(kmer)+", "+Long.toHexString(max2)+", "+Long.toHexString(value));
567 // long value=longer ? (kmer2>rkmer2 ? kmer : rkmer) : max2;
568 // System.err.println("\n"+AminoAcid.kmerToString(kmer, k)+"\n"+AminoAcid.kmerToString(rkmer, k)+
569 // "\n"+AminoAcid.kmerToString(kmer2, k2)+"\n"+AminoAcid.kmerToString(rkmer2, k2)+"\n"+AminoAcid.kmerToString(value, k)+"\n");
570 // assert(kmer2==AminoAcid.reverseComplementBinaryFast(rkmer2, k2));)
571 }else{
572 assert(amino || translate);
573 final long kmer2=(kmer&k2mask);
574 useK1=((kmer2%4999)&1L)==0L;
575 key=useK1 ? kmer : kmer2;
576 }
577 // if(key%5!=0){return -1;}//This makes it a little faster.
578 final long bit=useK1 ? 0 : 1; //Note that this gets reversed later during the subtraction process
579
580 // System.err.println(bit+", "+Long.toHexString(kmer)+", "+Long.toHexString(k2mask));
581 // assert(bit==0) : bit+", "+Long.toHexString(kmer);
582
583 long hashcode=key, data=key;
584 // for(int i=0; i<codeMax; i+=codeIncrement){
585 // int x=(int)(data&cycleMask);
586 // data>>>=bitsPerCycle;
587 // hashcode^=codes1D[i+x];
588 // }
589 // for(int i=0; data!=0; i+=codeIncrement){
590 // int x=(int)(data&cycleMask);
591 // data>>>=bitsPerCycle;
592 // hashcode^=codes1D[i+x];
593 // }
594 {//5% faster than other loops.
595 int i=0;
596 do{
597 int x=(int)(data&cycleMask);
598 data>>>=bitsPerCycle;
599 hashcode^=codes1D[i+x];
600 i+=codeIncrement;
601 }while(data!=0);
602 }
603 hashcode=(hashcode&(-2L))|bit;
604
605 return hashcode;
606 }
607
608 /**
609 * Fastest version!
610 * Generates a hash code from a kmer.
611 * @param kmer Kmer to hash
612 * @return Hash code
613 */
614 private static final long hash1(long kmer){
615 // if(kmer%5!=0){return -1;}
616 long code=kmer;
617 for(int i=0; i<codeMax; i+=codeIncrement){
618 int x=(int)(kmer&cycleMask);
619 kmer>>=bitsPerCycle;
620 code^=codes1D[i+x];
621 }
622
623 return code;
624 }
625
626
627 /**
628 * Generates a hash code from a kmer, using dual kmer lengths.
629 * @param kmer Kmer to hash
630 * @return Hash code
631 */
632 private static final long hash2(final long kmer0){
633 // return Tools.hash64shift(kmer0);
634 long kmer=kmer0;
635 long code=0;
636
637 for(int i=0; i<codeMax2; i+=codeIncrement){
638 int x=(int)(kmer&cycleMask);
639 kmer>>=bitsPerCycle;
640 code^=codes1D[i+x];
641 }
642 final long code2=code^(k2mask&kmer0);
643 for(int i=codeMax2; i<codeMax; i+=codeIncrement){
644 int x=(int)(kmer&cycleMask);
645 kmer>>=bitsPerCycle;
646 code^=codes1D[i+x];
647 }
648 return ((code&1)==1) ? code2 : code^kmer0; //Random; faster than max.
649 }
650
651
652 /**
653 * Generates a hash code from a kmer, using dual kmer lengths, allowing K2 to be a non-multiple-of-4.
654 * Uses one additional and, xor, and lookup.
655 * @param kmer Kmer to hash
656 * @return Hash code
657 */
658 private static final long hash3(final long kmer0){
659 // return Tools.hash64shift(kmer0);
660
661 long kmer=kmer0;
662 long code=0;
663 assert(k2submask!=0);
664
665 int i=0;
666 for(; i<codeMax2; i+=codeIncrement){
667 int x=(int)(kmer&cycleMask);
668 kmer>>=bitsPerCycle;
669 code^=codes1D[i+x];
670 }
671 final int residual=(int)(kmer&k2submask);
672 final long code2=code^(k2mask&kmer0)^codes1D[i+residual];
673 for(; i<codeMax; i+=codeIncrement){
674 int x=(int)(kmer&cycleMask);
675 kmer>>=bitsPerCycle;
676 code^=codes1D[i+x];
677 }
678 return ((code&1)==1) ? code2 : code^kmer0; //Random; faster than max.
679 }
680
681
682 /*--------------------------------------------------------------*/
683 /*---------------- Convenience Methods ----------------*/
684 /*--------------------------------------------------------------*/
685
686 public static final float align(byte[] query, byte[] ref){
687 SingleStateAlignerFlat2 ssa=GeneCaller.getSSA();
688 int a=0, b=ref.length-1;
689 int[] max=ssa.fillUnlimited(query, ref, a, b, -9999);
690 if(max==null){return 0;}
691
692 final int rows=max[0];
693 final int maxCol=max[1];
694 final int maxState=max[2];
695 final float id=ssa.tracebackIdentity(query, ref, a, b, rows, maxCol, maxState, null);
696 return id;
697 }
698
699 public static final float alignAndMakeMatch(Read r, byte[] ref){
700 byte[] query=r.bases;
701 SingleStateAlignerFlat2 ssa=GeneCaller.getSSA();
702 int a=0, b=ref.length-1;
703 int[] max=ssa.fillUnlimited(query, ref, a, b, -9999);
704 if(max==null){return 0;}
705
706 final int rows=max[0];
707 final int maxCol=max[1];
708 final int maxState=max[2];
709 byte[] match=ssa.traceback(query, ref, a, b, rows, maxCol, 0);
710 int[] score=ssa.score(query, ref, a, b, rows, maxCol, 0);
711 final float id=Read.identity(match);
712 r.match=match;
713 r.start=score[1];
714 r.stop=score[2];
715 r.setMapped(true);
716 return id;
717 }
718
719 public static final float alignAndMakeMatch(Read r, byte[] ref, float[] refWeights/*, float[] insWeights, float[] delWeights*/){
720 byte[] query=r.bases;
721 SingleStateAlignerFlatFloat ssa=GeneCaller.getSSAF();
722 // ssa.setWeights(refWeights, insWeights, delWeights);
723 ssa.setWeights(refWeights);
724 int a=0, b=ref.length-1;
725 int[] max=ssa.fillUnlimited(query, ref, a, b, -9999);
726 if(max==null){return 0;}
727
728 final int rows=max[0];
729 final int maxCol=max[1];
730 final int maxState=max[2];
731 byte[] match=ssa.traceback(query, ref, a, b, rows, maxCol, 0);
732 int[] score=ssa.score(query, ref, a, b, rows, maxCol, 0);
733 final float id=Read.identity(match);
734 r.match=match;
735 r.start=score[1];
736 r.stop=score[2];
737 r.setMapped(true);
738 return id;
739 }
740
741 static String fixMeta(String s){
742 if(s==null){return null;}
743 int colon=s.indexOf(':');
744 assert(colon>=0);
745 if(s.length()==colon+5 && s.endsWith(":null")){return null;}
746 return s.replace('\t', ' ');
747 }
748
749 static ArrayList<String> fixMeta(ArrayList<String> list){
750 if(list==null || list.isEmpty()){return null;}
751 for(int i=0; i<list.size(); i++){
752 String s=list.get(i);
753 s=fixMeta(s);
754 if(s==null){
755 list.remove(i);
756 i--;
757 }else{
758 list.set(i, s);
759 }
760 }
761 if(list.isEmpty()){return null;}
762 list.trimToSize();
763 Collections.sort(list);
764 return list;
765 }
766
767 public static final float aniToWkid(final double ani){
768 final float wkid;
769 if(ani<=0){
770 wkid=0;
771 }else if(k2<1){
772 wkid=(float)Math.pow(ani, k);
773 }else{
774 wkid=0.5f*(float)(Math.pow(ani, k)+Math.pow(ani, k2));
775 }
776 return wkid;
777 }
778
779 public static final float wkidToAniExact(final double wkid, final int A, final int B, final double maxDeviation){
780 assert(A>B);
781 assert(maxDeviation>0);
782 final double logWkid=Math.log(wkid);
783 final double invA=1.0/A;
784 final double invB=1.0/B;
785
786 double aniUpper=Math.exp(logWkid*invA);
787 double aniLower=Math.exp(logWkid*invB);
788 assert(aniLower<=aniUpper);
789 double aniEst=(aniLower+aniUpper)*0.5f;
790 // System.err.println(aniEst);
791 // if(B>16){//Fast mode
792 // aniUpper=(aniUpper+3*aniEst)*0.25;
793 // aniLower=(aniLower+3*aniEst)*0.25;
794 // }
795 double wkidEst=aniToWkid(aniEst, A, B);
796 int iters=1;
797 // System.err.println(aniLower+"\t"+aniUpper+"\t"+aniEst+"\t"+aniToWkid(aniEst, A, B)+"\t"+(wkidEst-wkid));
798 while(Math.abs(wkidEst-wkid)>maxDeviation && iters<40){
799 if(wkidEst<wkid){aniLower=aniEst;}
800 else{aniUpper=aniEst;}
801 aniEst=(aniLower+aniUpper)*0.5f;
802 wkidEst=aniToWkid(aniEst, A, B);
803 // System.err.println(aniLower+"\t"+aniUpper+"\t"+aniEst+"\t"+wkidEst+"\t"+(wkidEst-wkid));
804 iters++;
805 }
806 // System.err.println("Iterations: "+iters);
807 return (float)aniEst;
808 }
809
810 public static double aniToWkid(double ani, int A, int B){
811 if(A<B){int C=A; A=B; B=C;}//Swap
812 double aPow=Math.pow(ani, A);
813 double bPow=Math.pow(ani, B);
814 // return 0.5*(aPow+bPow);
815 return useToValue2 ? 0.5*(aPow+bPow) : 0.5*(aPow+(bPow*0.4+aPow*0.6));
816 }
817
818 //From Kayla
819 // public static double aniToWkid(double ANI, int K2, int K1){
820 // if(K2<K1){int C=K2; K2=K1; K1=C;}//Swap
821 //
822 // return 0.5*((1 - (1 - Math.pow(ANI, K2-K1))*Math.pow(ANI, K1))*Math.pow(ANI, K2) +
823 // (1 + (1 - Math.pow(ANI, K2-K1))*Math.pow(ANI, K1))*Math.pow(ANI, K1));
824 // }
825
826 public static double aniToWkid(double ani, int A){
827 return Math.pow(ani, A);
828 }
829
830 public static final float wkidToAniExact(final double wkid, final int k){
831 return (float)Math.exp(Math.log(wkid)/(k));
832 }
833
834 public static final float wkidToAni(final double wkid){
835 final float ani;
836 if(wkid<=0){
837 ani=0;
838 }else if(k2<1){
839 ani=(float)Math.exp(Math.log(wkid)/k);
840 }else{
841 if(EXACT_ANI){return wkidToAniExact(wkid, k, k2, 0.00005);}
842
843 //This is linear interpolation which is not correct. But for some reason it works really well.
844 final double log=Math.log(wkid);
845
846 // double ani1=Math.exp(log/k);
847 // double ani2=Math.exp(log/k2);
848 // ani=(float)(0.5*(ani1+ani2));
849
850 //alternatively... this one seems to work better.
851 // ani=(float)Math.exp(2*log/(k*1.1+k2*0.9));
852 ani=(float)Math.exp(2*log/(1.2*k+.8*k2));
853 }
854 return ani;
855 }
856
857 // public static void kmerArrayToSketchArray(long[] kmers){
858 // for(int i=0; i<kmers.length; i++){
859 // long kmer=kmers[i];
860 // long hash=SketchTool.hash(kmer);
861 // assert(hash>=minHashValue);
862 // hash=Long.MAX_VALUE-hash;
863 // kmers[i]=hash;
864 // }
865 // Shared.sort(kmers);
866 // }
867
868 public static void hashArrayToSketchArray(long[] keys){
869 for(int i=0; i<keys.length; i++){
870 long hash=keys[i];
871 assert(hash>=minHashValue);
872 hash=Long.MAX_VALUE-hash;
873 keys[i]=hash;
874 }
875 Shared.sort(keys);
876 }
877
878 public static final long genomeSizeEstimate(long min, int length) {
879 if(length==0){return 0;}
880 double est=((double)Long.MAX_VALUE)*2*length/(Tools.max(min, 1));
881 // if(k2>0){est*=0.5;} //This is necessary if the hash function uses max, but not random.
882 // System.err.println("max="+Long.MAX_VALUE+", min="+min+", len="+length+", est="+(long)est);
883 // new Exception().printStackTrace(System.err);
884 return (long)Math.ceil(est);
885 }
886
887 public static final int toSketchSize(long genomeSizeBases, long genomeSizeKmers, long genomeSizeEstimate, int maxSketchSize){
888 // assert(false) : genomeSizeBases+", "+genomeSizeKmers+", "+genomeSizeEstimate+", "+maxSketchSize+", "+useSizeEstimate;
889 if(genomeSizeEstimate>0 && useSizeEstimate){
890 return toSketchSizeKmers(genomeSizeEstimate, maxSketchSize);
891 }
892 if(genomeSizeKmers>0){
893 return toSketchSizeKmers(genomeSizeKmers, maxSketchSize);
894 }
895 assert(genomeSizeBases>0) : "BBSketch does not currently support empty files.\n"
896 +genomeSizeBases+", "+genomeSizeKmers+", "+genomeSizeEstimate+", "+maxSketchSize+", "+useSizeEstimate;
897 return toSketchSizeBases(genomeSizeBases, maxSketchSize);
898 }
899
900 private static final int toSketchSizeBases(long genomeSizeBases, int maxSketchSize) {
901 return toSketchSizeKmers(Tools.max(0, genomeSizeBases-k+1), maxSketchSize);
902 }
903
904 private static final int toSketchSizeKmers(long genomeSizeKmers, int maxSketchSize) {
905 // System.err.println(genomeSizeKmers+", "+maxSketchSize);
906 // new Exception().printStackTrace();
907 if(AUTOSIZE){
908 if(aminoOrTranslate()){
909 float linear1=Tools.min(60+0.5f*(float)Math.sqrt(genomeSizeKmers),
910 maxGenomeFractionSmall*genomeSizeKmers*2);
911 float linear2=genomeSizeKmers*maxGenomeFraction*2;
912 float poly=0+1f*(float)Math.sqrt(genomeSizeKmers)+90f*(float)Math.pow(genomeSizeKmers, 0.3);
913 float log=Tools.max(1000, -4000+3500*(float)Math.log(Tools.max(1, genomeSizeKmers)));
914 float min=Tools.min(Tools.max(linear1, linear2), poly, log);
915 assert(min>=Integer.MIN_VALUE && min<=Integer.MAX_VALUE) : min;
916
917 // System.err.println(genomeSizeKmers+" -> "+linear1+", "+linear2+", "+poly+", "+log);
918
919 return (int)Tools.min(genomeSizeKmers*keyFraction2, min*AUTOSIZE_FACTOR);
920 // return (int)Tools.max(1, min*AUTOSIZE_FACTOR);
921 }else{
922 float linear1=Tools.min(smallSketchSize+0.5f*(float)Math.sqrt(genomeSizeKmers),
923 maxGenomeFractionSmall*genomeSizeKmers-10);
924 float linear2=genomeSizeKmers*maxGenomeFraction;
925 float poly=0+1f*(float)Math.sqrt(genomeSizeKmers)+90f*(float)Math.pow(genomeSizeKmers, 0.3);
926 float log=Tools.max(1000, -4000+3500*(float)Math.log(Tools.max(1, genomeSizeKmers))+8f*(float)Math.pow(genomeSizeKmers, 0.3));
927 float min=Tools.min(Tools.max(linear1, linear2), poly, log);
928 assert(min>=Integer.MIN_VALUE && min<=Integer.MAX_VALUE) : min;
929
930 // System.err.println(genomeSizeKmers+" -> "+linear1+", "+linear2+", "+poly+", "+log);
931
932 int result=(int)Tools.min(genomeSizeKmers*keyFraction2, min*AUTOSIZE_FACTOR);
933 // System.err.println(result);
934 return result;
935 // return (int)Tools.max(1, min*AUTOSIZE_FACTOR);
936 }
937 }else if(AUTOSIZE_LINEAR){
938 if(aminoOrTranslate()){
939 return (int)(Tools.min(10000000, Tools.min(maxGenomeFractionSmall, AUTOSIZE_LINEAR_DENSITY)*genomeSizeKmers));
940 }else{
941 return (int)(Tools.min(10000000, Tools.min(maxGenomeFractionSmall, AUTOSIZE_LINEAR_DENSITY)*genomeSizeKmers));
942 }
943 }else{
944 if(aminoOrTranslate()){//kmers are roughly 3x smaller...
945 return Tools.min((int)(2+3*maxGenomeFraction*genomeSizeKmers), maxSketchSize);
946 }else{
947 return Tools.min((int)(2+maxGenomeFraction*genomeSizeKmers), maxSketchSize);
948 }
949 }
950 }
951
952 // static final long toValue2(long kmer, long rkmer){
953 // assert(k2>0 && k2<k) : "k="+k+","+k2;
954 //
955 // final long value;
956 // if(rcomp){
957 // assert(!amino && !translate);
958 //// if(!rcomp){return kmer;}
959 // long kmer2=((kmer&k2midmask)>>>k2shift);
960 // long rkmer2=((rkmer&k2midmask)>>>k2shift);
961 //
962 //// assert(kmer2>=0);
963 //// assert(kmer<0 || kmer2<=kmer);
964 //// assert(rkmer<0 || rkmer2<=kmer);
965 //
966 //// assert(false) : "\n"+Long.toBinaryString(kmer)+
967 //// "\n"+Long.toBinaryString(rkmer)+
968 //// "\n"+Long.toBinaryString(kmer2)+
969 //// "\n"+Long.toBinaryString(rkmer2);
970 //
971 // //TODO: Slow
972 //// assert(kmer==AminoAcid.reverseComplementBinaryFast(rkmer, k)) :
973 //// "\n"+AminoAcid.kmerToString(kmer, k)+"\n"+AminoAcid.kmerToString(rkmer, k)+"\n"+AminoAcid.kmerToString(AminoAcid.reverseComplementBinaryFast(rkmer, k), k)+"\n";
974 //// assert(kmer2==AminoAcid.reverseComplementBinaryFast(rkmer2, k2));
975 // long max2=Tools.max(kmer2, rkmer2);
976 //// long cmasked=max2&kmerChoiceMask;
977 //// boolean longer=kmerChoiceBitset.get((int)cmasked);
978 // boolean longer=((max2%4999L)&1L)==0L;
979 // value=longer ? Tools.max(kmer, rkmer) : max2;
980 //// System.err.println("\n"+longer+", "+max2+", "+(max2%4999L)+", "+((max2%4999L)&1L)+
981 //// ", "+Long.toHexString(kmer)+", "+Long.toHexString(max2)+", "+Long.toHexString(value));
982 //// long value=longer ? (kmer2>rkmer2 ? kmer : rkmer) : max2;
983 //// System.err.println("\n"+AminoAcid.kmerToString(kmer, k)+"\n"+AminoAcid.kmerToString(rkmer, k)+
984 //// "\n"+AminoAcid.kmerToString(kmer2, k2)+"\n"+AminoAcid.kmerToString(rkmer2, k2)+"\n"+AminoAcid.kmerToString(value, k)+"\n");
985 //// assert(kmer2==AminoAcid.reverseComplementBinaryFast(rkmer2, k2));)
986 // }else{
987 // assert(amino || translate);
988 //// long kmer2=(kmer&k2mask);
989 // long kmer2=((kmer&k2midmask)>>>k2shift);
990 // boolean longer=((kmer2%4999)&1)==0;
991 // value=longer ? kmer : kmer2;
992 // }
993 // return value;
994 // }
995
996 private static final long toValue(long kmer, long rkmer){
997 // assert(toValue2(kmer, rkmer)==toValue2(rkmer, kmer));
998 assert(!useToValue2);
999 // if(useToValue2){return toValue2(kmer, rkmer);}
1000 long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
1001 return value;
1002 }
1003
1004 /*--------------------------------------------------------------*/
1005 /*---------------- Constants ----------------*/
1006 /*--------------------------------------------------------------*/
1007
1008 public static final int RAW=0, HEX=1, A48=2;
1009 public static final char[] codingArray={'R', 'H', 'A'};
1010
1011 static final byte[] hexTable=new byte[128];
1012 static {
1013 Arrays.fill(hexTable, (byte)-1);
1014 for(int i='0'; i<='9'; i++){
1015 hexTable[i]=(byte)(i-'0');
1016 }
1017 for(int i='A'; i<='F'; i++){
1018 hexTable[i]=hexTable[i+'a'-'A']=(byte)(i-'A'+10);
1019 }
1020 hexTable['x']=hexTable['X']=hexTable['-']=hexTable['+']=0;
1021 }
1022
1023 public static final int ONE_SKETCH=1, PER_SEQUENCE=2, PER_TAXA=3, PER_IMG=4/*, PER_HEADER=5*/, PER_FILE=6;
1024
1025 /*--------------------------------------------------------------*/
1026 /*---------------- Default Locations ----------------*/
1027 /*--------------------------------------------------------------*/
1028
1029 // public static ArrayList<String> IMG_PATH(){return makePaths(IMG_PATH, 31);}
1030 // public static ArrayList<String> NT_PATH(){return makePaths(NT_PATH, 31);}
1031 // public static ArrayList<String> NR_PATH(){throw new RuntimeException("NR is not currently available.");}
1032 // public static ArrayList<String> REFSEQ_PATH(){return makePaths(REFSEQ_PATH, 31);}
1033 // public static ArrayList<String> SILVA_PATH(){return makePaths(SILVA_PATH, 31);}
1034
1035 private static ArrayList<String> makePaths(String pattern, int files){
1036 ArrayList<String> list=new ArrayList<String>(files);
1037 for(int i=0; i<files; i++){
1038 list.add(pattern.replace("#", ""+i));
1039 }
1040 return list;
1041 }
1042
1043 private static final String IMG_PATH="/global/cfs/cdirs/bbtools/img/current/img#.sketch";
1044 private static final String NT_PATH="/global/cfs/cdirs/bbtools/nt/current/taxa#.sketch";
1045 private static final String NR_PATH="/global/cfs/cdirs/bbtools/nr/current/taxa#.sketch";
1046 private static final String REFSEQ_PATH="/global/cfs/cdirs/bbtools/refseq/current/taxa#.sketch";
1047 private static final String REFSEQ_PATH_BIG="/global/cfs/cdirs/bbtools/refseq/current/big#.sketch";
1048 private static final String SILVA_PATH="/global/cfs/cdirs/bbtools/silva/latest/both_taxa#.sketch";
1049 private static final String PROKPROT_PATH="/global/cfs/cdirs/bbtools/refseq/current/prot/taxa#.sketch";
1050 private static final String PROKPROT_PATH_BIG="/global/cfs/cdirs/bbtools/refseq/current/prot/big#.sketch";
1051 private static final String MITO_PATH="/global/cfs/cdirs/bbtools/mito2/taxa#.sketch";
1052 private static final String FUNGI_PATH="/global/cfs/cdirs/bbtools/mito2/fungi#.sketch";
1053
1054 private static final String IMG_PATH_IGBVM="/data/sketch/img/current/img#.sketch";
1055 private static final String NT_PATH_IGBVM="/data/sketch/nt/current/taxa#.sketch";
1056 private static final String NR_PATH_IGBVM="/data/sketch/nr/current/taxa#.sketch";
1057 private static final String REFSEQ_PATH_IGBVM="/data/sketch/refseq/current/taxa#.sketch";
1058 private static final String REFSEQ_PATH_BIG_IGBVM="/data/sketch/refseq/current/big#.sketch";
1059 private static final String SILVA_PATH_IGBVM="/data/sketch/silva/current/both_taxa#.sketch";
1060 private static final String PROKPROT_PATH_IGBVM="/data/sketch/refseq/current/prot/taxa#.sketch";
1061 private static final String PROKPROT_PATH_BIG_IGBVM="/data/sketch/refseq/current/prot/big#.sketch";
1062 private static final String MITO_PATH_IGBVM="/data/sketch/mito2/taxa#.sketch";
1063 private static final String FUNGI_PATH_IGBVM="/data/sketch/mito2/fungi#.sketch";
1064
1065 private static final String IMG_PATH_AWS=null;
1066 private static final String NT_PATH_AWS="/test1/sketch/latest/nt/taxa#.sketch";
1067 private static final String NR_PATH_AWS=null;
1068 private static final String REFSEQ_PATH_AWS="/test1/sketch/latest/refseq/taxa#.sketch";
1069 private static final String SILVA_PATH_AWS="/test1/sketch/latest/ribo/both_taxa#.sketch";
1070 private static final String PROKPROT_PATH_AWS="/test1/sketch/latest/protein/taxa#.sketch";
1071 private static final String MITO_PATH_AWS=null;
1072 private static final String FUNGI_PATH_AWS=null;
1073
1074 public static final String IMG_PATH(){return Shared.IGBVM ? IMG_PATH_IGBVM : Shared.AWS ? IMG_PATH_AWS : IMG_PATH;}
1075 public static final String NT_PATH(){return Shared.IGBVM ? NT_PATH_IGBVM : Shared.AWS ? NT_PATH_AWS : NT_PATH;}
1076 public static final String NR_PATH(){return Shared.IGBVM ? NR_PATH_IGBVM : Shared.AWS ? NR_PATH_AWS : NR_PATH;}
1077 public static final String REFSEQ_PATH(){return Shared.IGBVM ? REFSEQ_PATH_IGBVM : Shared.AWS ? REFSEQ_PATH_AWS : REFSEQ_PATH;}
1078 public static final String REFSEQ_PATH_BIG(){return Shared.IGBVM ? REFSEQ_PATH_BIG_IGBVM : REFSEQ_PATH_BIG;}
1079 public static final String SILVA_PATH(){return Shared.IGBVM ? SILVA_PATH_IGBVM : Shared.AWS ? SILVA_PATH_AWS : SILVA_PATH;}
1080 public static final String PROKPROT_PATH(){return Shared.IGBVM ? PROKPROT_PATH_IGBVM : Shared.AWS ? PROKPROT_PATH_AWS : PROKPROT_PATH;}
1081 public static final String PROKPROT_PATH_BIG(){return Shared.IGBVM ? PROKPROT_PATH_BIG_IGBVM : PROKPROT_PATH_BIG;}
1082 public static final String MITO_PATH(){return Shared.IGBVM ? MITO_PATH_IGBVM : Shared.AWS ? MITO_PATH_AWS : MITO_PATH;}
1083 public static final String FUNGI_PATH(){return Shared.IGBVM ? FUNGI_PATH_IGBVM : Shared.AWS ? FUNGI_PATH_AWS : FUNGI_PATH;}
1084
1085 /*--------------------------------------------------------------*/
1086 /*---------------- Getters ----------------*/
1087 /*--------------------------------------------------------------*/
1088
1089 public static boolean useWhitelist() {return useWhitelist;}
1090 public static String blacklist() {return blacklist;}
1091
1092 /*--------------------------------------------------------------*/
1093 /*---------------- Static Fields ----------------*/
1094 /*--------------------------------------------------------------*/
1095
1096 public static int CODING=A48;
1097 public static boolean deltaOut=true;
1098 public static boolean rcomp=true;
1099
1100 public static final int defaultK=32;
1101 public static final int defaultK2=24;
1102 public static final int defaultKAmino=12;
1103 public static final int defaultK2Amino=9;
1104 public static int k=defaultK;
1105 public static int k2=defaultK2;
1106 public static int entropyK=3;
1107 public static boolean setK=false;
1108 public static boolean amino=false;
1109 public static boolean amino8=false;
1110 public static boolean translate=false;
1111 public static boolean sixframes=false;
1112 public static boolean processSSU=true;
1113 public static int min_SSU_len=1000;
1114 public static int HASH_VERSION=2;
1115 public static String pgmFile=null;
1116 public static GeneModel pgm=null;
1117
1118 static boolean aminoOrTranslate(){return amino | translate;}
1119
1120 private static int bitsPerCycle=8; //Optimal speed for K=31 is 9bpc (15% faster than 8). 9, 10, and 11 are similar.
1121 private static long cycleMask=~((-1L)<<bitsPerCycle);
1122 private static final long codeOrMask=1L;
1123 private static final long codeAndMask=~1L;
1124 private static int maxCycles=(64+bitsPerCycle-1)/bitsPerCycle;
1125 private static int codeIncrement=(int)(cycleMask+1);
1126 private static int codeMax;
1127 private static int codeMax2;
1128
1129 private static long hashSeed=12345;
1130 private static long[][] codes;//=makeCodes(maxCycles, codeIncrement, hashSeed, false);
1131 private static long[] codes1D;//=makeCodes1D(codes);
1132 static boolean useToValue2=true;
1133
1134 //These are set in postParse()
1135 private static int bitsPerBase=2;
1136 private static int basesPerCycle=bitsPerCycle/bitsPerBase;
1137 private static int hashCycles=64/bitsPerCycle; //Note: Needs to be variable based on k to make k2 codes compatible with k codes
1138 private static int hashCycles2;
1139 private static long k2mask;
1140 private static long k2submask;
1141 //Difference in length of k and k2, in bits
1142 private static int k2shift;
1143 private static long k2midmask;
1144
1145 /**
1146 * Make the SketchHeap size this factor bigger,
1147 * when minKeyOccuranceCount is used
1148 */
1149 public static double sketchHeapFactor=8;
1150 // static int minKeyOccuranceCount=1;
1151
1152 public static int minSketchSize=3;
1153 public static int targetSketchSize=10000;
1154 public static boolean AUTOSIZE=true;
1155 public static float AUTOSIZE_FACTOR=1;
1156 public static boolean SET_AUTOSIZE_FACTOR=false;
1157 public static boolean SET_AUTOSIZE=false;
1158 public static boolean SET_TARGET_SIZE=false;
1159 public static boolean AUTOSIZE_LINEAR=false;
1160 public static double AUTOSIZE_LINEAR_DENSITY=0.001;
1161 public static float maxGenomeFraction=0.04f;//Was 0.015, but that is often too sparse
1162 public static float maxGenomeFractionSmall=0.10f;
1163 public static int smallSketchSize=150;
1164 public static boolean makeIndex=true;
1165 public static float prealloc=0;
1166 public static boolean allToAll=false;
1167 public static boolean compareSelf=false;
1168 public static boolean skipCompare=false;
1169 public static final int bitSetBits=2; //Needs to be 2 for unique counts.
1170
1171 private static double keyFraction=0.16;
1172 private static double keyFraction2=keyFraction*1.2;
1173 public static long minHashValue=setMinHashValue();
1174 public static double keyFraction(){return keyFraction;}
1175 public static void setKeyFraction(double d){
1176 assert(d>0);
1177 keyFraction=Tools.mid(0.0001, d, 0.5);
1178 keyFraction2=Tools.mid(0.0001, keyFraction*1.2, 0.5);
1179 }
1180 public static long setMinHashValue(){
1181 double mult=1-2*keyFraction;
1182 minHashValue=(long)(mult*Long.MAX_VALUE);
1183 assert(minHashValue>=0 && minHashValue<Long.MAX_VALUE);
1184 return minHashValue;
1185 }
1186
1187 public static int minFakeID=1900000000;
1188 static boolean hashNames=false;
1189 static boolean skipNonCanonical=true;
1190 static boolean useSizeEstimate=true;
1191 public static boolean allowMultithreadedFastq=false;
1192 static boolean forceDisableMultithreadedFastq=false;
1193
1194 static boolean preferSSUMap=false;
1195 static boolean preferSSUMapForEuks=true;
1196 static boolean useSSUMapOnly=false;
1197 static boolean useSSUMapOnlyForEuks=false;
1198 static boolean ban18SForProks=true;
1199 static boolean ban16SForEuks=true;
1200
1201 static long sampleseed=-1L;
1202
1203 public static TaxTree taxtree=null;
1204 private static String treefile=null;
1205 static String blacklist=null;
1206 static boolean useWhitelist=false;
1207 private static boolean postparsed=false;
1208 public static boolean KILL_OK=false;
1209 public static boolean EXACT_ANI=true;
1210 public static boolean useSSA=true;
1211 public static boolean useSSA3=false;
1212
1213 //Needs to be last due to dependencies.
1214 public static DisplayParams defaultParams=new DisplayParams();
1215
1216 static AlignmentThreadPool alignerPool=null;
1217
1218 public static boolean verbose2=false;
1219 public static boolean LOADER2=true;
1220
1221 }