comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/DisplayParams.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.Comparator;
6 import java.util.Locale;
7 import java.util.Map.Entry;
8
9 import json.JsonObject;
10 import shared.Colors;
11 import shared.Parse;
12 import shared.Tools;
13 import structures.ByteBuilder;
14 import tax.PrintTaxonomy;
15 import tax.TaxFilter;
16 import tax.TaxNode;
17 import tax.TaxTree;
18
19 public class DisplayParams implements Cloneable {
20
21 @Override
22 public DisplayParams clone(){
23 try {
24 DisplayParams copy=(DisplayParams)super.clone();
25 if(taxFilterWhite!=null){
26 copy.taxFilterWhite=taxFilterWhite.deepCopy();
27 }
28 if(taxFilterBlack!=null){
29 copy.taxFilterBlack=taxFilterBlack.deepCopy();
30 }
31 copy.postParsed=false;
32 return copy;
33 } catch (CloneNotSupportedException e) {
34 // TODO Auto-generated catch block
35 e.printStackTrace();
36 throw new RuntimeException();
37 }
38 }
39
40 public DisplayParams parseDoubleHeader(String s){
41 if(!s.startsWith("##")){return this;}
42 // if(!s.startsWith("##")){return this.clone();}
43 StringBuilder sb=new StringBuilder();
44 for(int i=2; i<s.length(); i++){
45 char c=s.charAt(i);
46 if(c=='\n'){break;}
47 sb.append(c);
48 }
49 return parseDoubleHeaderLine(sb.toString());
50 }
51
52 public DisplayParams parseDoubleHeaderLine(String line) {
53 if(line.startsWith("##")){line=line.substring(2);}
54 else{assert(!line.startsWith("#")) : line;}
55 if(line.length()<1){return this;}
56
57 DisplayParams params=this.clone();
58
59 String[] args=line.split(" ");
60 for(String arg : args){
61 String[] split=arg.split("=");
62 String a=split[0].toLowerCase();
63 String b=split.length>1 ? split[1] : null;
64 if(b==null || b.equalsIgnoreCase("null")){b=null;} //Normally handled by PreParser, but not in this case.
65 while(a.startsWith("-")){a=a.substring(1);} //Strip leading hyphens
66
67 boolean x=params.parse(arg, a, b);
68 // assert(x) : "Unknown parameter "+arg+"\n"+line;
69 if(!x){System.err.println("Warning: Unknown parameter "+arg);}
70 }
71 if(SketchObject.verbose2){System.err.println("Made it to post-parse. taxFilterWhite="+params.taxFilterWhite);}
72 params.postParse(true, true);
73 if(SketchObject.verbose2){System.err.println("Passed post-parse. taxFilterWhite="+params.taxFilterWhite);}
74
75 return params;
76 }
77
78 public boolean parse(String arg, String a, String b){
79
80 if(a.equals("chunk")){
81 chunkNum=Integer.parseInt(b);
82 }else if(a.equals("minhits") || a.equals("hits")){
83 minHits=Integer.parseInt(b);
84 }else if(a.equalsIgnoreCase("minwkid") || a.equalsIgnoreCase("wkid")){
85 minWKID=Float.parseFloat(b);
86 if(minWKID>1){minWKID/=100;}
87 assert(minWKID<=1) : "minWKID should between 0 and 1";
88 }else if(a.equalsIgnoreCase("minid") || a.equalsIgnoreCase("id") || a.equalsIgnoreCase("minani") || a.equalsIgnoreCase("ani")){
89 minANI=Float.parseFloat(b);
90 if(minANI>1){minANI/=100;}
91 assert(minANI<=1) : "minANI should between 0 and 1";
92 if(minANI>0){
93 minWKID=(float)Tools.max(minWKID, Comparison.aniToWkid(minANI, 32));//Lowest possible minWKID for this ANI
94 }
95 }else if(a.equals("minbases")){
96 minBases=Integer.parseInt(b);
97 }else if(a.equals("minsizeratio")){
98 minSizeRatio=Float.parseFloat(b);
99 // assert(minSizeRatio>=0f && minSizeRatio<=1.0f) : "\nminSizeRatio must be between 0 and 1, inclusive.\n";
100 if(minSizeRatio>1){minSizeRatio=1f/minSizeRatio;}
101 }else if(a.equals("records") || a.equals("maxrecords") || a.equals("results")){
102 maxRecords=Integer.parseInt(b);
103 assert(maxRecords>=1) : "Max records must be at least 1.";
104 }else if(a.equals("recordsperlevel")){
105 recordsPerLevel=Integer.parseInt(b);
106 }else if(a.equals("format")){
107 assert(b!=null) : "Invalid format: "+arg;
108 if(b.equalsIgnoreCase("json")){
109 format=FORMAT_JSON;
110 }else if(b.equalsIgnoreCase("jsonarray")){
111 format=FORMAT_JSON;
112 jsonArray=true;
113 }else if(b.equalsIgnoreCase("d3")){
114 format=FORMAT_JSON;
115 printD3=true;
116 }else if(b.equalsIgnoreCase("constellation")){
117 format=FORMAT_CONSTELLATION;
118 }else if(b.equalsIgnoreCase("3column") || b.equalsIgnoreCase("queryrefani")){
119 format=FORMAT_QUERY_REF_ANI;
120 }else if(Tools.isDigit(b.charAt(0))){
121 format=Integer.parseInt(b);
122 }else{
123 assert(false) : "Invalid format: "+arg;
124 }
125 }else if(a.equalsIgnoreCase("json")){
126 if(Parse.parseBoolean(b)){
127 format=FORMAT_JSON;
128 }else{
129 if(format==FORMAT_JSON){format=default_format;}
130 }
131 }else if(a.equalsIgnoreCase("jsonarray") || a.equalsIgnoreCase("jsonarrays")){
132 if(Parse.parseBoolean(b)){
133 format=FORMAT_JSON;
134 jsonArray=true;
135 }else{
136 jsonArray=false;
137 }
138 }else if(a.equalsIgnoreCase("d3") || a.equalsIgnoreCase("printd3")){
139 if(Parse.parseBoolean(b)){
140 format=FORMAT_JSON;
141 printD3=true;
142 }else{
143 printD3=false;
144 }
145 }else if(a.equalsIgnoreCase("jsonarray") || a.equalsIgnoreCase("jsonarrays")){
146 if(Parse.parseBoolean(b)){
147 jsonArray=true;
148 }else{
149 jsonArray=false;
150 }
151 }else if(a.equalsIgnoreCase("d3levelnodes")){
152 D3LevelNodes=Parse.parseBoolean(b);
153 }else if(a.equalsIgnoreCase("d3hitsize")){
154 if(Parse.parseBoolean(b)){D3sizeMode=D3_HIT_SIZE;}
155 }else if(a.equalsIgnoreCase("d3anisize")){
156 if(Parse.parseBoolean(b)){D3sizeMode=D3_ANI_SIZE;}
157 }else if(a.equalsIgnoreCase("d3wkidsize")){
158 if(Parse.parseBoolean(b)){D3sizeMode=D3_WKID_SIZE;}
159 }else if(a.equalsIgnoreCase("d3depthsize")){
160 if(Parse.parseBoolean(b)){
161 D3sizeMode=D3_DEPTH_SIZE;
162 printDepth=true;
163 }
164 }else if(a.equalsIgnoreCase("d3kidsize")){
165 if(Parse.parseBoolean(b)){D3sizeMode=D3_KID_SIZE;}
166 }else if(a.equalsIgnoreCase("D3sizeMode")){
167 D3sizeMode=Integer.parseInt(b);
168 }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){
169 taxLevel=TaxTree.parseLevel(b);//TODO: Change to extended
170 }
171
172 else if(a.equalsIgnoreCase("requireSSU")){
173 requireSSU=Parse.parseBoolean(b);
174 }
175
176 else if(a.equalsIgnoreCase("minRefSizeEstimate") || a.equalsIgnoreCase("minRefSize")){
177 minRefSizeEstimate=Long.parseLong(b);
178 }else if(a.equalsIgnoreCase("minRefSizeBases")){
179 minRefSizeBases=Long.parseLong(b);
180 }
181
182 else if(a.equalsIgnoreCase("printtax") || a.equalsIgnoreCase("printtaxa")){
183 printTax=Parse.parseBoolean(b);
184 }else if(a.equalsIgnoreCase("printssu") || a.equalsIgnoreCase("print16s") || a.equalsIgnoreCase("ssu")){
185 printSSU=Parse.parseBoolean(b);
186 }else if(a.equalsIgnoreCase("printSSULen") || a.equalsIgnoreCase("print16slen") || a.equalsIgnoreCase("ssulen")){
187 printSSULen=Parse.parseBoolean(b);
188 }else if(a.equalsIgnoreCase("printssusequence") || a.equalsIgnoreCase("print16ssequence")){
189 printSSUSequence=Parse.parseBoolean(b);
190 }else if(a.equalsIgnoreCase("printqueryfilename") || a.equalsIgnoreCase("printqfname") || a.equalsIgnoreCase("printqfile") || a.equalsIgnoreCase("qfname")){
191 printQueryFileName=Parse.parseBoolean(b);
192 }else if(a.equalsIgnoreCase("printreffilename") || a.equalsIgnoreCase("printrfname") || a.equalsIgnoreCase("printrfile") || a.equalsIgnoreCase("rfname")){
193 printRefFileName=Parse.parseBoolean(b);
194 }else if(a.equalsIgnoreCase("printfilename") || a.equalsIgnoreCase("printfname") || a.equalsIgnoreCase("printfile")){
195 printQueryFileName=printRefFileName=Parse.parseBoolean(b);
196 }else if(a.equalsIgnoreCase("printoriginalname") || a.equalsIgnoreCase("printseqname") || a.equalsIgnoreCase("printname0") || a.equals("pn0")){
197 printOriginalName=Parse.parseBoolean(b);
198 }else if(a.equalsIgnoreCase("printimg")){
199 printImg=Parse.parseBoolean(b);
200 }else if(a.equalsIgnoreCase("printcompleteness") || a.equalsIgnoreCase("completeness") || a.equalsIgnoreCase("printcomplt")){
201 printCompleteness=Parse.parseBoolean(b);
202 }else if(a.equalsIgnoreCase("printani") || a.equalsIgnoreCase("ani")){
203 printAni=Parse.parseBoolean(b);
204 }else if(a.equalsIgnoreCase("printkid") || a.equalsIgnoreCase("kid")){
205 printKID=Parse.parseBoolean(b);
206 }else if(a.equalsIgnoreCase("printwkid") || a.equalsIgnoreCase("wkid")){
207 printWKID=Parse.parseBoolean(b);
208 }else if(a.equalsIgnoreCase("printscore") || a.equalsIgnoreCase("score")){
209 printScore=Parse.parseBoolean(b);
210 }else if(a.equalsIgnoreCase("printevalue") || a.equalsIgnoreCase("evalue")){
211 printEValue=Parse.parseBoolean(b);
212 }
213
214 else if(a.equalsIgnoreCase("trackcounts")){
215 trackCounts=Parse.parseBoolean(b);
216 }else if(a.equalsIgnoreCase("printdepth") || a.equalsIgnoreCase("depth")){
217 printDepth=Parse.parseBoolean(b);
218 }else if(a.equalsIgnoreCase("printdepth2") || a.equalsIgnoreCase("depth2")){
219 printDepth2=Parse.parseBoolean(b);
220 }else if(a.equalsIgnoreCase("actualdepth") || a.equalsIgnoreCase("printactualdepth")){
221 printActualDepth=Parse.parseBoolean(b);
222 }else if(a.equalsIgnoreCase("printvolume") || a.equalsIgnoreCase("volume")){
223 printVolume=Parse.parseBoolean(b);
224 }else if(a.equalsIgnoreCase("printavgrefhits") || a.equalsIgnoreCase("printrefhits") || a.equalsIgnoreCase("avgrefhits") || a.equalsIgnoreCase("refhits")){
225 printRefHits=Parse.parseBoolean(b);
226 }
227
228 else if(a.equalsIgnoreCase("sortByDepth")){
229 boolean x=Parse.parseBoolean(b);
230 if(x){comparator=Comparison.depthComparator;}
231 }else if(a.equalsIgnoreCase("sortByDepth2")){
232 boolean x=Parse.parseBoolean(b);
233 if(x){comparator=Comparison.depth2Comparator;}
234 }else if(a.equalsIgnoreCase("sortByVolume")){
235 boolean x=Parse.parseBoolean(b);
236 if(x){comparator=Comparison.volumeComparator;}
237 }else if(a.equalsIgnoreCase("sortByScore")){
238 boolean x=Parse.parseBoolean(b);
239 if(x){comparator=Comparison.scoreComparator;}
240 }
241 else if(a.equalsIgnoreCase("sortByKID")){
242 boolean x=Parse.parseBoolean(b);
243 if(x){comparator=Comparison.KIDComparator;}
244 }else if(a.equalsIgnoreCase("sortByWKID") || a.equalsIgnoreCase("sortByANI")){
245 boolean x=Parse.parseBoolean(b);
246 if(x){comparator=Comparison.WKIDComparator;}
247 }else if(a.equalsIgnoreCase("sortBySSU") || a.equalsIgnoreCase("sortBy16S")){
248 boolean x=Parse.parseBoolean(b);
249 if(x){comparator=Comparison.SSUComparator;}
250 }else if(a.equalsIgnoreCase("sortByHits") || a.equalsIgnoreCase("sortByMatches")){
251 boolean x=Parse.parseBoolean(b);
252 if(x){comparator=Comparison.HitsComparator;}
253 }
254
255 else if(a.equalsIgnoreCase("printUMatches") || a.equalsIgnoreCase("printUHits") || a.equalsIgnoreCase("printUnique")){
256 printUnique=Parse.parseBoolean(b);
257 }else if(a.equalsIgnoreCase("printUMatches2") || a.equalsIgnoreCase("printUnique2") || a.equalsIgnoreCase("unique2")){
258 printUnique2=Parse.parseBoolean(b);
259 }else if(a.equalsIgnoreCase("printUMatches3") || a.equalsIgnoreCase("printUnique3") || a.equalsIgnoreCase("unique3")){
260 printUnique3=Parse.parseBoolean(b);
261 }else if(a.equalsIgnoreCase("printUContam")){
262 printUContam=Parse.parseBoolean(b);
263 }else if(a.equalsIgnoreCase("printNoHit")){
264 printNoHit=Parse.parseBoolean(b);
265 }else if(a.equalsIgnoreCase("contamhits") || a.equalsIgnoreCase("contam") || a.equalsIgnoreCase("printcontam")){
266 printContam=Parse.parseBoolean(b);
267 }else if(a.equalsIgnoreCase("contamhits2") || a.equalsIgnoreCase("contam2") || a.equalsIgnoreCase("printcontam2")){
268 if(b==null || b.length()<1){
269 printContam2=true;
270 }else if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){
271 contamLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b)));
272 printContam2=true;
273 }else if(TaxTree.levelMapExtendedContains(b)){
274 contamLevel=TaxTree.stringToLevelExtended(b);
275 printContam2=true;
276 }else{
277 printContam2=Parse.parseBoolean(b);
278 }
279 }else if(a.equalsIgnoreCase("contamLevel")){
280 if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){
281 contamLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b)));
282 printContam2=true;
283 }else if(TaxTree.levelMapExtendedContains(b)){
284 contamLevel=TaxTree.stringToLevelExtended(b);
285 printContam2=true;
286 }
287 }
288
289 else if(a.equalsIgnoreCase("reportAniOnly") || a.equalsIgnoreCase("AniOnly")){
290 reportAniOnly=Parse.parseBoolean(b);
291 }
292
293 else if(a.equalsIgnoreCase("printMatches")){
294 printMatches=Parse.parseBoolean(b);
295 }else if(a.equalsIgnoreCase("printLength")){
296 printLength=Parse.parseBoolean(b);
297 }else if(a.equalsIgnoreCase("printTaxID")){
298 printTaxID=Parse.parseBoolean(b);
299 }else if(a.equalsIgnoreCase("printGSize")){
300 printGSize=Parse.parseBoolean(b);
301 }else if(a.equalsIgnoreCase("gSizeKMG")){
302 gSizeKMG=Parse.parseBoolean(b);
303 }else if(a.equalsIgnoreCase("printGC")){
304 printGC=Parse.parseBoolean(b);
305 }else if(a.equalsIgnoreCase("printGKmers")){
306 printGKmers=Parse.parseBoolean(b);
307 }else if(a.equalsIgnoreCase("printCommonAncestor") || a.equalsIgnoreCase("printCA")){
308 printCommonAncestor=Parse.parseBoolean(b);
309 }else if(a.equalsIgnoreCase("printCommonAncestorLevel") || a.equalsIgnoreCase("printCAL")){
310 printCommonAncestorLevel=Parse.parseBoolean(b);
311 }else if(a.equalsIgnoreCase("printTaxName")){
312 printTaxName=Parse.parseBoolean(b);
313 }else if(a.equalsIgnoreCase("printGSeqs")){
314 printGSeqs=Parse.parseBoolean(b);
315 }else if(a.equalsIgnoreCase("printGBases")){
316 printGBases=Parse.parseBoolean(b);
317 }
318
319 else if(a.equalsIgnoreCase("minEntropy") || a.equalsIgnoreCase("entropy") || a.equalsIgnoreCase("efilter")){
320 minEntropy=Float.parseFloat(b);
321 }else if(a.equalsIgnoreCase("minprob") || a.equalsIgnoreCase("pfilter")){
322 minProb=(float)Double.parseDouble(b);
323 }else if(a.equalsIgnoreCase("minQual") || a.equalsIgnoreCase("minq")){
324 minQual=Byte.parseByte(b);
325 }
326
327 else if(a.equalsIgnoreCase("printColors") || a.equalsIgnoreCase("colors") || a.equalsIgnoreCase("color")){
328 // System.err.println("Parsing '"+arg+"'"); //123
329 if(b==null || b.length()<1){
330 printColors=true;
331 }else if(b.equalsIgnoreCase("t") || b.equalsIgnoreCase("true")){
332 printColors=true;
333 }else if(b.equalsIgnoreCase("f") || b.equalsIgnoreCase("false")){
334 printColors=false;
335 }else{
336 printColors=true;
337 if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){
338 colorLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b)));
339 }else{
340 colorLevel=TaxTree.stringToLevelExtended(b);
341 }
342 }
343 setColors=true;
344 // System.err.println("Parsed "+arg); //123
345 }else if(a.equalsIgnoreCase("colorLevel")){
346 // System.err.println("Parsing '"+arg+"'"); //123
347 if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){
348 colorLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b)));
349 }else{
350 colorLevel=TaxTree.stringToLevelExtended(b);
351 }
352 // System.err.println("Parsed "+arg); //123
353 }
354
355 else if(a.equalsIgnoreCase("printRefDivisor") || a.equalsIgnoreCase("printRDiv")){
356 printRefDivisor=Parse.parseBoolean(b);
357 }else if(a.equalsIgnoreCase("printQueryDivisor") || a.equalsIgnoreCase("printQDiv")){
358 printQueryDivisor=Parse.parseBoolean(b);
359 }else if(a.equalsIgnoreCase("printRefSize") || a.equalsIgnoreCase("printRSize")){
360 printRefSize=Parse.parseBoolean(b);
361 }else if(a.equalsIgnoreCase("printQuerySize") || a.equalsIgnoreCase("printQSize")){
362 printQuerySize=Parse.parseBoolean(b);
363 }else if(a.equalsIgnoreCase("printContamHits") || a.equalsIgnoreCase("printCHits")){
364 printContamHits=Parse.parseBoolean(b);
365 }
366
367 else if(a.equalsIgnoreCase("printIntersection") || a.equalsIgnoreCase("intersection") || a.equalsIgnoreCase("intersect")){
368 printIntersection=Parse.parseBoolean(b);
369 }else if(a.equalsIgnoreCase("mergePairs") || a.equalsIgnoreCase("merge")){
370 mergePairs=Parse.parseBoolean(b);
371 }
372
373 else if(a.equalsIgnoreCase("printAll")){
374 if(Parse.parseBoolean(b)){
375 setPrintAll();
376 }
377 }
378
379 else if(a.equals("samplerate")){
380 samplerate=Float.parseFloat(b);
381 }else if(a.equals("reads")){
382 maxReads=Parse.parseKMG(b);
383 }else if(a.equals("mode") || a.equalsIgnoreCase("single") || a.equalsIgnoreCase("singlesketch") || a.equalsIgnoreCase("onesketch")
384 || a.equalsIgnoreCase("persequence") || a.equalsIgnoreCase("sequence") || a.equalsIgnoreCase("pertaxa")
385 || a.equalsIgnoreCase("perheader") || a.equalsIgnoreCase("perfile")){
386 mode=SketchObject.parseMode(arg, a, b);
387 }
388
389 //For format 3
390 else if(a.equalsIgnoreCase("useTaxidName") || a.equalsIgnoreCase("useTaxidAsName")){
391 useTaxidName=Parse.parseBoolean(b);
392 }else if(a.equalsIgnoreCase("useImgName") || a.equalsIgnoreCase("useImgAsName")){
393 useImgName=Parse.parseBoolean(b);
394 }else if(a.equalsIgnoreCase("useTaxName") || a.equalsIgnoreCase("useTaxAsName")){
395 useTaxName=Parse.parseBoolean(b);
396 }else if(a.equalsIgnoreCase("useFilePrefixName") || a.equalsIgnoreCase("useFilePrefixAsName")){
397 useFilePrefixName=Parse.parseBoolean(b);
398 }
399
400 else if(a.equalsIgnoreCase("taxfilterincludelevel") || a.equalsIgnoreCase("includelevel")
401 || a.equalsIgnoreCase("taxlevelwhite") || a.equalsIgnoreCase("ilevel") || a.equalsIgnoreCase("whitelevel")){
402 taxLevelWhite=TaxTree.parseLevel(b);//TODO: Change to extended
403 }else if(a.equalsIgnoreCase("taxfilterinclude") || a.equalsIgnoreCase("include") || a.equalsIgnoreCase("taxfilterwhitelist")){
404 taxFilterWhiteList=b;
405 }else if(a.equalsIgnoreCase("taxfilterincludestring") || a.equalsIgnoreCase("includestring")
406 || a.equalsIgnoreCase("taxfilterwhitestring") || a.equalsIgnoreCase("istring")){
407 taxFilterWhiteString=b;
408 }else if(a.equalsIgnoreCase("banUnclassified") || a.equalsIgnoreCase("noUnclassified")){
409 banUnclassified=Parse.parseBoolean(b);
410 }else if(a.equalsIgnoreCase("banVirus") || a.equalsIgnoreCase("noVirus") || a.equalsIgnoreCase("banViruses") || a.equalsIgnoreCase("noViruses")){
411 banVirus=Parse.parseBoolean(b);
412 }
413
414 else if(a.equalsIgnoreCase("taxfilterexcludelevel") || a.equalsIgnoreCase("excludelevel")
415 || a.equalsIgnoreCase("taxlevelblack") || a.equalsIgnoreCase("elevel") || a.equalsIgnoreCase("blacklevel")){
416 taxLevelBlack=TaxTree.parseLevel(b);//TODO: Change to extended
417 }else if(a.equalsIgnoreCase("taxfilterexclude") || a.equalsIgnoreCase("exclude") || a.equalsIgnoreCase("taxfilterblacklist")){
418 taxFilterBlackList=b;
419 }else if(a.equalsIgnoreCase("taxfilterexcludestring") || a.equalsIgnoreCase("excludestring")
420 || a.equalsIgnoreCase("taxfilterblackstring") || a.equalsIgnoreCase("estring")){
421 taxFilterBlackString=b;
422 }
423
424 else if(a.equalsIgnoreCase("minkmercount") || a.equalsIgnoreCase("minkeycount") || a.equalsIgnoreCase("mincount") || a.equalsIgnoreCase("minKeyOccuranceCount")){
425 minKeyOccuranceCount=Tools.max(1, Integer.parseInt(b));
426 }
427
428 //TODO: Eventually remove support for "amino" and "k" and just support "hamino" and "hk"
429 //This stands for "header amino" and "header k".
430
431 //Parameters for compatibility verification
432 else if(a.equalsIgnoreCase("k") || a.equalsIgnoreCase("hk")){
433 // System.err.println("A: k="+k+", k2="+k2+", arg="+arg);
434 if(b.indexOf(',')>=0){
435 String[] split=b.split(",");
436 assert(split.length==2) : "\nBad argument "+arg+"\n"+b+"\n";
437 int x=Integer.parseInt(split[0]);
438 int y=Integer.parseInt(split[1]);
439 k=Tools.max(x, y);
440 k2=Tools.min(x, y);
441 if(k==k2){k2=0;}
442 // System.err.println("B: k="+k+", k2="+k2+", split="+Arrays.toString(split));
443 }else{
444 k=Integer.parseInt(b);
445 // System.err.println("C: k="+k+", k2="+k2);
446 }
447 }else if(a.equalsIgnoreCase("hashversion") || a.equalsIgnoreCase("hv")){
448 hashVersion=Integer.parseInt(b);
449 }else if(a.equalsIgnoreCase("amino") || a.equalsIgnoreCase("hamino")){
450 amino=Parse.parseBoolean(b);
451 if(amino){translate=false;}
452 }else if(a.equalsIgnoreCase("translate")){
453 translate=Parse.parseBoolean(b);
454 if(translate){amino=false;}
455 }else if(a.equalsIgnoreCase("sixframes")){
456 sixframes=Parse.parseBoolean(b);
457 if(sixframes){amino=false; translate=true;}
458 }
459
460 else if(a.equalsIgnoreCase("requiredmeta") || a.equalsIgnoreCase("rmeta")){
461 if(b==null){requiredMeta=null;}
462 else{
463 String[] split2=b.split(",");
464 requiredMeta=new ArrayList<String>(split2.length);
465 for(String mt : split2){
466 assert(mt.indexOf(':')>=0) : "Metadata tags must contain ':' symbol: "+mt;
467 requiredMeta.add(mt);
468 }
469 }
470 }else if(a.equalsIgnoreCase("bannedmeta") || a.equalsIgnoreCase("bmeta")){
471 if(b==null){bannedMeta=null;}
472 else{
473 String[] split2=b.split(",");
474 bannedMeta=new ArrayList<String>(split2.length);
475 for(String mt : split2){
476 assert(mt.indexOf(':')>=0) : "Metadata tags must contain ':' symbol: "+mt;
477 bannedMeta.add(mt);
478 }
479 }
480 }
481
482 // else if(a.equalsIgnoreCase("requiredtaxid") || a.equalsIgnoreCase("rtaxid")){
483 // if(b==null){requiredTaxid=null;}
484 // else{
485 // String[] split2=b.split(",");
486 // requiredTaxid=new IntList(split2.length);
487 // for(String mt : split2){
488 // requiredTaxid.add(Integer.parseInt(mt));
489 // }
490 // if(requiredTaxid.isEmpty()){requiredTaxid=null;}
491 // }
492 // }else if(a.equalsIgnoreCase("bannedtaxid") || a.equalsIgnoreCase("btaxid")){
493 // if(b==null){bannedTaxid=null;}
494 // else{
495 // String[] split2=b.split(",");
496 // bannedTaxid=new IntList(split2.length);
497 // for(String mt : split2){
498 // bannedTaxid.add(Integer.parseInt(mt));
499 // }
500 // if(bannedTaxid.isEmpty()){bannedTaxid=null;}
501 // }
502 // }
503
504 else if(a.equalsIgnoreCase("requiredmetaand") || a.equalsIgnoreCase("rmetaand")){
505 requiredMetaAnd=Parse.parseBoolean(b);
506 }else if(a.equalsIgnoreCase("requiredmetaor") || a.equalsIgnoreCase("rmetaor")){
507 requiredMetaAnd=!Parse.parseBoolean(b);
508 }
509
510 else if(a.equalsIgnoreCase("bbversion")){
511 inputVersion=b;
512 }
513
514 else{
515 return false;
516 }
517 return true;
518 }
519
520 public void postParse(boolean requireTree, boolean makeTaxFilters){
521 assert(!postParsed);
522 synchronized(this){
523 if(postParsed){return;}
524
525 if(makeTaxFilters){
526 if(taxFilterWhiteList!=null || taxFilterWhiteString!=null){
527 taxFilterWhite=new TaxFilter(SketchObject.taxtree, true);
528 taxFilterWhite.setLevel(taxLevelWhite, false);
529 taxFilterWhite.makeSet();
530 taxFilterWhite.addNamesOrNumbers(taxFilterWhiteList, false);
531 taxFilterWhite.setContainsString(taxFilterWhiteString);
532 if(requireTree){
533 assert(SketchObject.taxtree!=null) : "No taxtree loaded.";
534 taxFilterWhite.setTree(SketchObject.taxtree);
535 taxFilterWhite.promote();
536 }
537 }
538
539 if(taxFilterBlackList!=null || taxFilterBlackString!=null){
540 taxFilterBlack=new TaxFilter(SketchObject.taxtree, false);
541 taxFilterBlack.setLevel(taxLevelBlack, false);
542 taxFilterBlack.makeSet();
543 taxFilterBlack.addNamesOrNumbers(taxFilterBlackList, false);
544 taxFilterBlack.setContainsString(taxFilterBlackString);
545 if(requireTree){
546 assert(SketchObject.taxtree!=null) : "No taxtree loaded.";
547 taxFilterBlack.setTree(SketchObject.taxtree);
548 taxFilterBlack.promote();
549 }
550 }
551 }
552
553 noFilters=(!hasMetaFilters() && !hasTaxFilters() && !requireSSU && minRefSizeEstimate<1 && minRefSizeBases<1);
554 postParsed=true;
555 }
556 }
557
558 public boolean postParsed(){return postParsed;}
559
560 @Override
561 public String toString(){
562 return toString(-1);
563 }
564
565 public String toString(int chunkNum){
566 StringBuilder sb=new StringBuilder();
567 sb.append("##");
568 sb.append("hits=").append(minHits);
569 if(chunkNum>=0){sb.append(" chunk=").append(chunkNum);}
570 sb.append(" wkid=").append(String.format(Locale.ROOT, "%.5f",minWKID));
571 if(minANI>0){sb.append(" id=").append(String.format(Locale.ROOT, "%.5f",minANI));}
572 if(minBases>0){sb.append(" minbases=").append(minBases);}
573 if(minSizeRatio>0){sb.append(" minsizeratio=").append(String.format(Locale.ROOT, "%.5f",minSizeRatio));}
574 sb.append(" records=").append(maxRecords);
575 if(recordsPerLevel>0){sb.append(" recordsperlevel=").append(recordsPerLevel);}
576 sb.append(" format=").append(format);
577 sb.append(" level=").append(taxLevel);
578 if(inputVersion!=null){sb.append(" bbversion=").append(inputVersion);}
579
580 if(k!=SketchObject.defaultK || k2!=0 || k!=SketchObject.k || k2!=SketchObject.k2){
581 assert(k>0 && k2>=0 && k2<k) : "Bad values for k: "+k+", "+k2+", "+SketchObject.k+", "+SketchObject.k2;
582 assert(SketchObject.k>0 && SketchObject.k2>=0 && SketchObject.k2<SketchObject.k) : "Bad values for k: "+k+", "+k2+", "+SketchObject.k+", "+SketchObject.k2;
583 sb.append(" hk=").append(SketchObject.k).append(',').append(SketchObject.k2);
584 }
585 if(SketchObject.amino){sb.append(" hamino=").append(SketchObject.amino);} //TODO: This conflicts with Parser flag
586 if(SketchObject.translate){sb.append(" translate=").append(SketchObject.translate);}
587 if(SketchObject.sixframes){sb.append(" sixframes=").append(SketchObject.sixframes);}
588 if(SketchObject.HASH_VERSION>1){sb.append(" hashversion=").append(SketchObject.HASH_VERSION);}
589
590 if(true){sb.append(" printSSU=").append(printSSU());}
591 if(requireSSU){sb.append(" requireSSU=").append(requireSSU);}
592 if(minRefSizeEstimate>0){sb.append(" minRefSizeEstimate=").append(minRefSizeEstimate);}
593 if(minRefSizeBases>0){sb.append(" minRefSizeBases=").append(minRefSizeBases);}
594
595 if(json()){sb.append(" printSSUSequence=").append(printSSUSequence);}
596 if(printSSULen){sb.append(" printSSULen=").append(printSSULen);}
597 if(true || printTax!=default_printTax){sb.append(" printTax=").append(printTax);}
598 // if(true || printFileName!=default_printFileName){sb.append(" printfname=").append(printFileName);}
599 if(true || printQueryFileName!=default_printQueryFileName){sb.append(" printqfname=").append(printQueryFileName);}
600 if(true || printRefFileName!=default_printRefFileName){sb.append(" printrfname=").append(printRefFileName);}
601 if(true || printOriginalName!=default_printOriginalName){sb.append(" pn0=").append(printOriginalName);}
602 if(true || printImg!=default_printImg){sb.append(" printImg=").append(printImg);}
603 if(true || printAni!=default_printAni){sb.append(" printAni=").append(printAni);}
604 if(!printKID){sb.append(" printKID=").append(printKID);}
605 if(!printWKID){sb.append(" printWKID=").append(printWKID);}
606 if(true || printCompleteness!=default_printCompleteness){sb.append(" printCompleteness=").append(printCompleteness);}
607
608 if(true || printUnique!=default_printUnique){sb.append(" printUMatches=").append(printUnique);}
609 if(true || printUnique2!=default_printUnique2){sb.append(" printUnique2=").append(printUnique2);}
610 if(true || printUnique3!=default_printUnique3){sb.append(" printUnique3=").append(printUnique3);}
611 if(true || printUContam!=default_printUContam){sb.append(" printUContam=").append(printUContam);}
612 if(true || printNoHit!=default_printNoHit){sb.append(" printNoHit=").append(printNoHit);}
613 if(true || printContam!=default_printContam){sb.append(" contam=").append(printContam);}
614 if(true){sb.append(" contam2=").append(printContam2 ? TaxTree.extendedToLevel(contamLevel)+"" : "f");}
615
616 if(true || printScore!=default_printScore){sb.append(" printScore=").append(printScore);}
617 if(true || printEValue!=default_printEValue){sb.append(" printEValue=").append(printEValue);}
618
619 if(true || printDepth!=default_printDepth){sb.append(" printDepth=").append(printDepth);}
620 if(true || printDepth2!=default_printDepth2){sb.append(" printDepth2=").append(printDepth2);}
621 if(true || printActualDepth!=default_printActualDepth){sb.append(" printActualDepth=").append(printActualDepth);}
622 if(true || printVolume!=default_printVolume){sb.append(" printVolume=").append(printVolume);}
623 if(true || printRefHits!=default_printRefHits){sb.append(" printRefHits=").append(printRefHits);}
624
625 if(true || printMatches!=default_printMatches){sb.append(" printMatches=").append(printMatches);}
626 if(true || printLength!=default_printLength){sb.append(" printLength=").append(printLength);}
627 if(true || printTaxID!=default_printTaxID){sb.append(" printTaxID=").append(printTaxID);}
628 if(true || printGSize!=default_printGSize){sb.append(" printGSize=").append(printGSize);}
629 if(true || gSizeKMG!=default_gSizeKMG){sb.append(" gSizeKMG=").append(gSizeKMG);}
630 if(true || printGC!=default_printGC){sb.append(" printGC=").append(printGC);}
631 if(true || printGKmers!=default_printGKmers){sb.append(" printGKmers=").append(printGKmers);}
632
633 if(printCommonAncestor){sb.append(" printCommonAncestor=").append(printCommonAncestor);}
634 if(printCommonAncestorLevel){sb.append(" printCommonAncestorLevel=").append(printCommonAncestorLevel);}
635
636 if(true || printTaxName!=default_printTaxName){sb.append(" printTaxName=").append(printTaxName);}
637 if(true || printGSeqs!=default_printGSeqs){sb.append(" printGSeqs=").append(printGSeqs);}
638 if(true || printGBases!=default_printGBases){sb.append(" printGBases=").append(printGBases);}
639 if(true || minEntropy!=default_minEntropy){sb.append(" minEntropy=").append(String.format(Locale.ROOT, "%.4f", minEntropy));}
640 if(true || minProb!=default_minProb){sb.append(" minProb=").append(String.format(Locale.ROOT, "%.4f", minProb));}
641 if(true || minQual!=default_minQual){sb.append(" minQual=").append((int)minQual);}
642 if(jsonArray!=default_jsonArray){sb.append(" jsonArray=").append(jsonArray);}
643 if(printD3!=default_printD3){sb.append(" d3=").append(printD3);}
644 if(printD3){
645 sb.append(" D3sizeMode=").append(D3sizeMode);
646 sb.append(" D3LevelNodes=").append(D3LevelNodes);
647 }
648 if(comparator!=Comparison.scoreComparator){sb.append(" ").append(comparator.toString());}
649
650 if(taxFilterWhiteList!=null || taxFilterWhiteString!=null){
651 if(taxFilterWhiteList!=null){sb.append(" taxfilterwhitelist=").append(taxFilterWhiteList);}
652 if(taxFilterWhiteString!=null){sb.append(" taxfilterwhitestring=").append(taxFilterWhiteString);}
653 sb.append(" taxlevelwhite=").append(taxLevelWhite);
654 }
655 if(taxFilterBlackList!=null || taxFilterBlackString!=null){
656 if(taxFilterBlackList!=null){sb.append(" taxfilterblacklist=").append(taxFilterBlackList);}
657 if(taxFilterBlackString!=null){sb.append(" taxfilterblackstring=").append(taxFilterBlackString);}
658 sb.append(" taxlevelblack=").append(taxLevelBlack);
659 }
660 if(banUnclassified){sb.append(" banunclassified");}
661 if(banVirus){sb.append(" banvirus");}
662
663 if(useTaxidName){sb.append(" useTaxidName=").append(useTaxidName);}
664 if(useImgName){sb.append(" useImgName=").append(useImgName);}
665 if(useTaxName){sb.append(" useTaxName=").append(useTaxName);}
666
667 if(true){sb.append(" colors=").append(printColors ? TaxTree.extendedToLevel(colorLevel)+"" : "f");}
668
669 if(minKeyOccuranceCount!=default_minKeyOccuranceCount){sb.append(" minKeyOccuranceCount=").append(minKeyOccuranceCount);}
670
671 // if(printColors && colorLevel!=default_colorLevel){sb.append(" colorLevel=").append(TaxTree.extendedToLevel(colorLevel));}
672
673
674 if(printRefDivisor){sb.append(" printRefDivisor=").append(printRefDivisor);}
675 if(printQueryDivisor){sb.append(" printQueryDivisor=").append(printQueryDivisor);}
676 if(printRefSize){sb.append(" printRefSize=").append(printRefSize);}
677 if(printQuerySize){sb.append(" printQuerySize=").append(printQuerySize);}
678 if(printContamHits){sb.append(" printContamHits=").append(printContamHits);}
679 if(printIntersection){sb.append(" printIntersection=").append(printIntersection);}
680 if(mergePairs){sb.append(" mergePairs=").append(mergePairs);}
681
682 if(maxReads>-1){sb.append(" reads=").append(maxReads);}
683 if(mode!=default_mode){sb.append(" mode=").append(mode);}
684 if(samplerate!=default_samplerate){sb.append(" samplerate=").append(String.format(Locale.ROOT, "%.4f",samplerate));}
685
686 if(!requiredMetaAnd){sb.append(" requiredmetaand="+requiredMetaAnd);}
687 if(requiredMeta!=null && !requiredMeta.isEmpty()){
688 sb.append(" rmeta=");
689 for(String s : requiredMeta){
690 sb.append(s);
691 sb.append(',');
692 }
693 sb.setLength(sb.length()-1);
694 }
695 if(bannedMeta!=null && !bannedMeta.isEmpty()){
696 sb.append(" bmeta=");
697 for(String s : bannedMeta){
698 sb.append(s);
699 sb.append(',');
700 }
701 sb.setLength(sb.length()-1);
702 }
703 // if(requiredTaxid!=null && !requiredTaxid.isEmpty()){
704 // sb.append(" rtaxid=");
705 // for(int i=0; i<requiredTaxid.size; i++){
706 // sb.append(requiredTaxid.get(i));
707 // sb.append(',');
708 // }
709 // sb.setLength(sb.length()-1);
710 // }
711 // if(bannedTaxid!=null && !bannedTaxid.isEmpty()){
712 // sb.append(" btaxid=");
713 // for(int i=0; i<bannedTaxid.size; i++){
714 // sb.append(bannedTaxid.get(i));
715 // sb.append(',');
716 // }
717 // sb.setLength(sb.length()-1);
718 // }
719
720 sb.append('\n');
721 return sb.toString();
722 }
723
724 public boolean compatible(){
725 return SketchObject.k==k && SketchObject.k2==k2 && SketchObject.aminoOrTranslate()==aminoOrTranslate() && hashVersion==SketchObject.HASH_VERSION;
726 }
727
728 public void setPrintAll(){
729 printSSU=true;
730 printSSULen=true;
731 printSSUSequence=true;
732 printTax=true;
733 printQueryFileName=true;
734 printRefFileName=true;
735 printOriginalName=true;
736 printImg=true;
737 printAni=true;
738 printKID=true;
739 printWKID=true;
740 printCompleteness=true;
741 printScore=true;
742 printEValue=true;
743 printDepth=true;
744 printDepth2=true;
745 printVolume=true;
746 printRefHits=true;
747
748 printMatches=true;
749 printLength=true;
750 printTaxID=true;
751 printGSize=true;
752 printGC=true;
753 printGKmers=true;
754 printTaxName=true;
755 printGSeqs=true;
756 printGBases=true;
757
758 // printColors=true;
759
760 printUnique=true;
761 printUnique2=true;
762 printUnique3=true;
763 printUContam=true;
764 printNoHit=true;
765 printContam=true;
766 printContam2=true;
767
768 printRefDivisor=true;
769 printQueryDivisor=true;
770 printRefSize=true;
771 printQuerySize=true;
772 printContamHits=true;
773 }
774
775 /*--------------------------------------------------------------*/
776 /*---------------- JSON ----------------*/
777 /*--------------------------------------------------------------*/
778
779 public JsonObject toJson(SketchResults sr){
780 JsonObject j=toJson(sr.sketch);
781 if(sr.list!=null){
782 int i=0;
783 for(Comparison c : sr.list){
784 JsonObject jc=toJson(c);
785 j.add(c.name(), jc);
786 i++;
787 if(i>=maxRecords){break;}
788 }
789 }
790
791 if(jsonArray){
792 toJsonArrayForm(j);
793 }
794
795 if(printD3){
796 j.add("D3", toD3(sr));
797 }
798
799 return j;
800 }
801
802 public void toJsonArrayForm(JsonObject j0){
803 if(j0.jmapSize()<1){return;}
804 ArrayList<Object> list1=new ArrayList<Object>(j0.jmapSize());
805 Object[] keys=null;
806 for(Entry<String, JsonObject> e1 : j0.jmap.entrySet()){
807 JsonObject j1=e1.getValue();
808 ArrayList<Object> list2=new ArrayList<Object>(j1.omapSize());
809 for(Entry<String, Object> e2 : j1.omap.entrySet()){
810 Object o2=e2.getValue();
811 list2.add(o2);
812 }
813 list1.add(list2.toArray());
814 if(keys==null){
815 ArrayList<Object> keyList=new ArrayList<Object>(j1.omapSize());
816 for(Entry<String, Object> e2 : j1.omap.entrySet()){
817 Object o2=e2.getKey();
818 keyList.add(o2);
819 }
820 keys=keyList.toArray();
821 }
822 }
823
824 JsonObject title=new JsonObject();
825 for(Entry<String, Object> e : j0.omap.entrySet()){
826 title.add(e.getKey(), e.getValue());
827 }
828
829 j0.clearJson();
830 j0.clearOmap();
831
832 j0.add("title", title);
833 j0.add("header", keys);
834 j0.add("rows", list1.toArray());
835 }
836
837 public JsonObject toJson(Sketch sk){
838 assert(format==FORMAT_JSON);
839
840 JsonObject j=new JsonObject();
841 j.add("Name", sk.name());
842 if(dbName!=null){j.add("DB", dbName);}
843 j.add("SketchLen", sk.length());
844
845 j.add("Seqs", sk.genomeSequences);
846 j.add("Bases", sk.genomeSizeBases);
847 j.add("gSize", sk.genomeSizeEstimate());
848 if(sk.baseCounts!=null){j.addLiteral("GC", sk.gc(), 3);}
849 if(sk.probCorrect<1 && sk.probCorrect>0){j.add("Quality", sk.probCorrect);}
850 if(sk.keyCounts!=null){
851 double d=Tools.averageDouble(sk.keyCounts);
852 j.add("AvgCount", d);
853 j.add("Depth", Tools.observedToActualCoverage(d));
854 }
855
856 if(sk.imgID>0){j.add("IMG", sk.imgID);}
857 if(sk.spid>0){j.add("spid", sk.spid);}
858 if(sk.taxID>0 && sk.taxID<SketchObject.minFakeID){j.add("TaxID", sk.taxID);}
859
860 if((printRefFileName) && sk.fname()!=null){j.add("file", sk.fname());}
861 if(printOriginalName && sk.name0()!=null){j.add("SeqName", sk.name0());}
862
863 if(sk.meta!=null){
864 for(String st : sk.meta){
865 int colon=st.indexOf(':');
866 j.add(st.substring(0, colon), st.substring(colon+1));
867 }
868 }
869
870 if(printSSULen){
871 if(sk.r16SLen()>0){j.add("16SLen", sk.r16SLen());}
872 if(sk.r18SLen()>0){j.add("18SLen", sk.r18SLen());}
873 }
874 if(printSSUSequence){
875 if(sk.r16S()!=null){j.add("16SSequence", new String(sk.r16S()));}
876 if(sk.r18S()!=null){j.add("18SSequence", new String(sk.r18S()));}
877 }
878
879 return j;
880 }
881
882 public JsonObject toJson(Comparison c){
883 final int tid=c.taxID;
884
885 JsonObject j=new JsonObject();
886
887 //Text fields
888 if(printTaxName){j.add("taxName", c.taxName()==null ? "." : c.taxName());}
889
890 if(printCommonAncestor){j.add("commonAncestor", c.commonAncestor());}
891 if(printCommonAncestorLevel){j.add("commonAncestorLevel", c.commonAncestorLevel());}
892
893 if(printRefFileName){j.add("file", c.fname()==null ? "." : c.fname());}
894 if(printOriginalName){j.add("seqName", c.name0()==null ? "." : c.name0());}
895 if(printTax && SketchObject.taxtree!=null){
896 TaxNode tn=null;
897 if(tid>0 && tid<SketchObject.minFakeID){
898 tn=SketchObject.taxtree.getNode(tid);
899 }
900
901 if(tn!=null){
902 j.add("taxonomy", SketchObject.taxtree.toSemicolon(tn, SketchObject.skipNonCanonical, false));
903 }else{
904 j.add("taxonomy", (Object)null);
905 }
906 }
907
908 if(printWKID){j.addLiteral("WKID", 100*c.wkid(), 4);}
909 if(printKID){j.addLiteral("KID", 100*c.kid(), 4);}
910 // if(printSSU() && c.ssuIdentity()>0){j.addLiteral("SSU", 100*c.ssuIdentity(), 3);} //Old
911 if(printSSU() && c.ssuIdentity()>0){
912 j.addLiteral(c.ssuType()==18 ? "18S" : "16S", 100*c.ssuIdentity(), 3);
913 }
914
915 //Primary fields
916 if(printAni){j.addLiteral((aminoOrTranslate() ? "AAI" : "ANI"), 100*c.ani(), 3);}
917 if(printCompleteness){j.addLiteral("Complt", 100*c.completeness(), 3);}
918 if(printContam){j.addLiteral("Contam", 100*c.contamFraction(), 3);}
919 if(printContam2){j.addLiteral("Contam2", 100*c.contam2Fraction(), 3);}
920 if(printUContam){j.addLiteral("uContam", 100*c.uContamFraction(), 3);}
921 if(printScore){j.add("Score", c.score());}
922 if(printEValue){j.add("E-Val", String.format(Locale.ROOT, "%5.2e", c.eValue()));}
923
924 if(printDepth){j.add("Depth", c.depth(printActualDepth));}
925 if(printDepth2){j.add("Depth2", c.depth2(printActualDepth));}
926 if(printVolume){j.add("Volume", c.volume()+0.001);}
927 if(printRefHits){j.add("RefHits", c.avgRefHits());}
928
929 if(printMatches){j.add("Matches", c.hits());}
930 if(printUnique){j.add("Unique", c.uHits());}
931 if(printUnique2){j.add("Unique2", c.unique2());}
932 if(printUnique3){j.add("Unique3", c.unique3());}
933 if(printNoHit){j.add("noHit", c.noHits());}
934 if(printLength){j.add("Length", c.maxDivisor());}
935 if(printTaxID){j.add("TaxID", tid>=SketchObject.minFakeID ? -1 : tid);}
936 if(printImg){j.add("ImgID", c.imgID());}
937 if(printGBases){j.add("gBases", c.genomeSizeBases());}
938 if(printGKmers){j.add("gKmers", c.genomeSizeKmers());}
939 if(printGSize){j.add("gSize", c.genomeSizeEstimate());}
940 if(printGSeqs){j.add("gSeqs", c.genomeSequences());}
941 if(c.hasGC()){j.addLiteral("GC", c.gc(), 3);}
942
943 //Raw fields
944 if(printRefDivisor){j.add("rDiv", c.refDivisor());}
945 if(printQueryDivisor){j.add("qDiv", c.queryDivisor());}
946 if(printRefSize){j.add("rSize", c.refSize());}
947 if(printQuerySize){j.add("qSize", c.querySize());}
948 if(printContamHits){j.add("cHits", c.contamHits());}
949
950
951
952 if(printSSULen){
953 if(c.has18S()){j.add("18SLen", c.b.r18SLen());}
954 /*else*/ if(c.has16S()){j.add("16SLen", c.b.r16SLen());}
955 }
956 if(printSSUSequence){
957 if(c.has18S()){j.add("18SSequence", new String(c.b.r18S()));}
958 /*else*/ if(c.has16S()){j.add("16SSequence", new String(c.b.r16S()));}
959 }
960
961 if(printIntersection){
962 Sketch intersection=Sketch.intersection(c.a, c.b);
963 j.add("intersection", intersection.toString());
964 }
965
966 return j;
967 }
968
969 public boolean json(){return format==FORMAT_JSON;}
970
971 /*--------------------------------------------------------------*/
972 /*---------------- D3 ----------------*/
973 /*--------------------------------------------------------------*/
974
975 public JsonObject toD3(SketchResults sr){
976 if(sr==null || sr.isEmpty()){return new JsonObject("name", "no hits");}
977 JsonObject root=new JsonObject("name", "life");
978 root.add("level", TaxTree.LIFE_E);
979 if(sr.list!=null){
980 int i=0;
981 for(Comparison c : sr.list){
982 ArrayList<JsonObject> tax=toD3List(c);
983 addToLevel(root, tax, 0);
984 i++;
985 if(i>=maxRecords){break;}
986 }
987 }
988 if(D3LevelNodes){
989 root=converToD3ArrayFormat_LevelNode(root);
990 }else{
991 root=converToD3ArrayFormat_SingleNodeRoot(root);
992 }
993 return root;
994 }
995
996 private JsonObject converToD3ArrayFormat_SingleNodeRoot(JsonObject root){
997 JsonObject children=root.removeJson("children");
998 if(children==null){return root;}
999 Object[] array=children.toJmapArray();
1000 root=(JsonObject)array[0];//Life node
1001
1002 assert(root.getString("name").equalsIgnoreCase("Life")) : root;
1003 return converToD3ArrayFormat_SingleNode(root);
1004 }
1005
1006 private JsonObject converToD3ArrayFormat_SingleNode(JsonObject nameNode){
1007 Object[] levelNodes=nameNode.toJmapArray();
1008 if(levelNodes==null){return nameNode;}
1009 nameNode.clearJson();
1010
1011 ArrayList<JsonObject> fixed=new ArrayList<JsonObject>();
1012 for(Object o : levelNodes){
1013 JsonObject levelNode=(JsonObject)o;
1014 String level=levelNode.getString("name");
1015 JsonObject children=levelNode.removeJson("children");
1016 if(children!=null){
1017 Object[] childArray=children.toJmapArray();
1018 for(Object o2 : childArray){
1019 JsonObject child=(JsonObject)o2;//Now a name node
1020 String name=(String)child.removeObject("name");
1021 child.add("name", level+": "+name);
1022 converToD3ArrayFormat_SingleNode(child);
1023 fixed.add(child);
1024 }
1025 }
1026 }
1027 Object[] children=fixed.toArray();
1028 nameNode.add("children", children);
1029 return nameNode;
1030 }
1031
1032 private JsonObject converToD3ArrayFormat_LevelNode(JsonObject levelNode){
1033 JsonObject children=levelNode.removeJson("children");
1034 if(children==null){return levelNode;}
1035
1036 Object[] array=children.toJmapArray();
1037 levelNode.add("children", array);
1038 for(Object o : array){
1039 converToD3ArrayFormat_NameNode((JsonObject)o);
1040 }
1041 return levelNode;
1042 }
1043
1044 private JsonObject converToD3ArrayFormat_NameNode(JsonObject nameNode){
1045 Object[] array=nameNode.toJmapArray();
1046 if(array==null){return nameNode;}
1047
1048 nameNode.clearJson();
1049 nameNode.add("children", array);
1050 for(Object o : array){
1051 converToD3ArrayFormat_LevelNode((JsonObject)o);
1052 }
1053 return nameNode;
1054 }
1055
1056 void addToLevel(JsonObject levelNode, ArrayList<JsonObject> list, int pos){
1057 JsonObject jo=list.get(pos);
1058 int rootLevel=levelNode.getInt("level");
1059 int joLevel=jo.getInt("level");
1060 if(rootLevel==joLevel){
1061 assert(levelNode.getString("name").equalsIgnoreCase(jo.getString("levelname"))) : levelNode+"\n"+jo;
1062 addAsChild(levelNode, list, pos);
1063 }else{
1064 assert(joLevel<rootLevel) : levelNode+"\n"+jo;
1065 assert(false) : levelNode+"\n"+jo;
1066 }
1067 }
1068
1069 void addAsChild(JsonObject levelNode, ArrayList<JsonObject> list, int pos){
1070 JsonObject children=levelNode.getJson("children");
1071 if(children==null){
1072 children=new JsonObject();
1073 levelNode.add("children", children);
1074 }
1075 JsonObject jo=list.get(pos);
1076 String taxName=jo.getString("name");
1077 JsonObject nameNode=children.getJson(taxName);
1078 if(nameNode==null){
1079 nameNode=new JsonObject("name", taxName);
1080 children.add(taxName, nameNode);
1081 }
1082 Number size=jo.getNumber("size");
1083 Number oldSize=nameNode.getNumber("size");
1084 if(size!=null && (oldSize==null || oldSize.doubleValue()<size.doubleValue())){
1085 nameNode.add("size", jo.getNumber("size"));
1086 nameNode.add("kid", jo.getNumber("kid"));
1087 nameNode.add("wkid", jo.getNumber("wkid"));
1088 nameNode.add("ani", jo.getNumber("ani"));
1089 nameNode.add("hits", jo.getNumber("hits"));
1090 nameNode.add("depth", jo.getNumber("depth"));
1091 }
1092
1093 if(pos<list.size()-1){//recur
1094 jo=list.get(pos+1);
1095 String levelName=jo.getString("levelname");
1096 int level=jo.getInt("level");
1097 JsonObject nextLevelNode=nameNode.getJson(levelName);
1098 if(nextLevelNode==null){
1099 nextLevelNode=new JsonObject("name", levelName);
1100 nextLevelNode.add("level", level);
1101 nameNode.add(levelName, nextLevelNode);
1102 }
1103 addAsChild(nextLevelNode, list, pos+1);
1104 }
1105 }
1106
1107 int promote(int levelE) {
1108 if(levelE<0){return levelE;}
1109 while(!TaxTree.isSimple2(levelE) && levelE<TaxTree.LIFE){
1110 levelE++;
1111 }
1112 return levelE;
1113 }
1114
1115 public ArrayList<JsonObject> toD3List(Comparison c){
1116 final ArrayList<TaxNode> nodes=toTNList(c.taxID);
1117 ArrayList<JsonObject> list=new ArrayList<JsonObject>(nodes.size());
1118 for(TaxNode tn : nodes){
1119 JsonObject jo=new JsonObject("name", tn.name);
1120 int levelE=promote(tn.levelExtended);
1121 jo.add("level", levelE);
1122 jo.add("levelname", TaxTree.levelToStringExtended(levelE));
1123 list.add(jo);
1124 }
1125 if(list.size()>0){
1126 JsonObject tail=list.get(list.size()-1);
1127 tail.add("size", toD3Size(c));
1128 tail.add("kid", c.kid());
1129 tail.add("wkid", c.wkid());
1130 tail.add("ani", c.ani());
1131 tail.add("hits", c.hits());
1132 tail.add("depth", c.depth(printActualDepth));
1133 }
1134 return list;
1135 }
1136
1137 private Number toD3Size(Comparison c){
1138 if(D3sizeMode==D3_ANI_SIZE){
1139 return c.ani();
1140 }else if(D3sizeMode==D3_KID_SIZE){
1141 return c.kid();
1142 }else if(D3sizeMode==D3_WKID_SIZE){
1143 return c.wkid();
1144 }else if(D3sizeMode==D3_HIT_SIZE){
1145 return c.hits();
1146 }else if(D3sizeMode==D3_DEPTH_SIZE){
1147 return c.depth(printActualDepth);
1148 }
1149 assert(false) : "Invalid D3sizeMode "+D3sizeMode;
1150 return c.hits();
1151 }
1152
1153 public ArrayList<TaxNode> toTNList(final int tid){
1154 final TaxTree tree=TaxTree.getTree();
1155
1156 final ArrayList<TaxNode> list=new ArrayList<TaxNode>();
1157 int nulls=0;
1158 {
1159 TaxNode tn=tree.getNode(tid);
1160 if(tn.isRanked() && !tn.cellularOrganisms()){list.add(tn);}
1161 while(tn.pid!=tn.id){
1162 tn=tree.getNode(tn.pid);
1163 if(tn.isRanked() && !tn.cellularOrganisms()){list.add(tn);}
1164 }
1165 }
1166 Collections.reverse(list);
1167 int prevLevelE=TaxTree.LIFE;
1168 for(int i=0; i<list.size(); i++){
1169 TaxNode tn=list.get(i);
1170 int levelE=promote(tn.levelExtended);
1171
1172 if(!TaxTree.isSimple2(levelE) || (i>0 && levelE>=prevLevelE)){
1173 list.set(i, null);
1174 nulls++;
1175 }else{prevLevelE=levelE;}
1176 }
1177 if(nulls>0){Tools.condenseStrict(list);}
1178 return list;
1179 }
1180
1181 /*--------------------------------------------------------------*/
1182 /*---------------- Formatting ----------------*/
1183 /*--------------------------------------------------------------*/
1184
1185 ByteBuilder queryHeader(Sketch sk){
1186 ByteBuilder bb=new ByteBuilder();
1187 if(format>2){return bb;}
1188
1189 String color=toColor(sk.taxID);
1190 if(color!=null){bb.append(color);}
1191
1192 bb.append("\nQuery: ").append(sk.name()==null ? "." : sk.name());
1193 if(dbName!=null){bb.append("\tDB: ").append(dbName);}
1194 bb.append("\tSketchLen: ").append(sk.length());
1195 bb.append("\tSeqs: ").append(sk.genomeSequences).append(' ');
1196 bb.append("\t"+(aminoOrTranslate() ? "SeqLen" : "Bases")+": ").append(sk.genomeSizeBases);
1197 bb.append("\tgSize: ").append(sk.genomeSizeEstimate());
1198 if(sk.baseCounts!=null){bb.append("\tGC: ").append(sk.gc(), 3);}
1199 if(sk.probCorrect<1 && sk.probCorrect>0){bb.append("\tQuality: ").append(sk.probCorrect, 4);}
1200 if(sk.keyCounts!=null){
1201 double d=Tools.averageDouble(sk.keyCounts);
1202 bb.append("\tAvgCount: ").append(d, 3);
1203 bb.append("\tDepth: ").append(Tools.observedToActualCoverage(d), 3);
1204 }
1205
1206 if(sk.imgID>0){bb.append("\tIMG: ").append(sk.imgID);}
1207 if(sk.spid>0){bb.append("\tspid: ").append(sk.spid);}
1208 if(sk.taxID>0 && sk.taxID<SketchObject.minFakeID){bb.append("\tTaxID: ").append(sk.taxID);}
1209
1210 if(printQueryFileName && sk.fname()!=null){bb.append("\tFile: "+sk.fname());}
1211 if(printOriginalName && sk.name0()!=null && !sk.name0().equals(sk.name())){bb.append("\tSeqName: "+sk.name0());}
1212
1213 if(sk.meta!=null){
1214 for(String st : sk.meta){
1215 bb.append("\t").append(st.replaceFirst(":", ": "));
1216 }
1217 }
1218
1219 if(color!=null){bb.append(Colors.RESET);}
1220
1221 return bb;
1222 }
1223
1224 int toColorTid(final int taxID){
1225 if(!printColors || SketchObject.taxtree==null || taxID<=0 || taxID>=SketchObject.minFakeID){return 0;}
1226 TaxNode tn=SketchObject.taxtree.getNode(taxID);
1227 while(tn!=null && tn.id!=tn.pid && tn.levelExtended<colorLevel){
1228 tn=SketchObject.taxtree.getNode(tn.pid);
1229 // System.err.println(tn);
1230 }
1231 return tn==null || tn.levelExtended>=TaxTree.LIFE_E || (tn.levelExtended>colorLevel && tn.levelExtended>TaxTree.PHYLUM_E) ? 0 : tn.id;
1232 }
1233
1234 String toColor(final int taxID){
1235 if(!printColors || SketchObject.taxtree==null || taxID<=0 || taxID>=SketchObject.minFakeID){return null;}
1236 TaxNode tn=SketchObject.taxtree.getNode(taxID);
1237 while(tn!=null && tn.id!=tn.pid && tn.levelExtended<colorLevel){
1238 tn=SketchObject.taxtree.getNode(tn.pid);
1239 // System.err.println(tn);
1240 }
1241 if(tn==null){
1242 return null;
1243 }else{
1244 if(tn.levelExtended>=TaxTree.LIFE_E || (tn.levelExtended>colorLevel && tn.levelExtended>TaxTree.PHYLUM_E)){return Colors.WHITE;}
1245 else{
1246 // System.err.println("*"+tn.id+", "+tn.id%Colors.colorArray.length);
1247 return Colors.colorArray[tn.id%Colors.colorArray.length];
1248 }
1249 }
1250 }
1251
1252 String header(){
1253 if(format==FORMAT_JSON){return null;}
1254 final String ani=(aminoOrTranslate() ? "AAI" : "ANI");
1255 if(format==FORMAT_QUERY_REF_ANI || format==FORMAT_CONSTELLATION){
1256 if(reportAniOnly){return "#Query\tRef\t"+ani;}
1257 if(format==FORMAT_QUERY_REF_ANI){
1258 return "#Query\tRef\t"+ani+
1259 "\tQSize\tRefSize\tQBases\tRBases"+
1260 (printTaxID ? "\tQTaxID\tRTaxID" : "")+(printKID ? "\tKID" : "")+(printWKID ? "\tWKID" : "")+
1261 (printSSU() ? "\tSSU" : "")+(printCommonAncestorLevel ? "\tCALevel" : "");
1262 }
1263 if(format==FORMAT_CONSTELLATION){return "#Query\tRef\tKID\tWKID\t"+ani+"\tCmplt\tQSize\tRefSize\tQBases\tRefBases";}
1264 }
1265 return columnwiseHeader();
1266 }
1267
1268 String columnwiseHeader(){
1269 final String ani=(aminoOrTranslate() ? "AAI" : "ANI");
1270
1271 StringBuilder sb=new StringBuilder();
1272
1273 //Numeric fields
1274 if(printKID){sb.append("WKID\t");}
1275 if(printWKID){sb.append("KID\t");}
1276 if(printAni){sb.append(ani+"\t");}
1277 if(printSSU()){sb.append("SSU\t");}
1278 if(printSSULen){sb.append("SSULen\t");}
1279 if(printCompleteness){sb.append("Complt\t");}
1280 if(printContam){sb.append("Contam\t");}
1281 if(printContam2){sb.append("Contam2\t");}
1282 if(printUContam){sb.append("uContam\t");}
1283 if(printScore){sb.append("Score\t");}
1284 if(printEValue){sb.append("E-Val\t");}
1285
1286 if(printDepth){sb.append("Depth\t");}
1287 if(printDepth2){sb.append("Depth2\t");}
1288 if(printVolume){sb.append("Volume\t");}
1289 if(printRefHits){sb.append("RefHits\t");}
1290 if(printMatches){sb.append("Matches\t");}
1291 if(printUnique){sb.append("Unique\t");}
1292 if(printUnique2){sb.append("Unique2\t");}
1293 if(printUnique3){sb.append("Unique3\t");}
1294 if(printNoHit){sb.append("noHit\t");}
1295 if(printLength){sb.append("Length\t");}
1296 if(printTaxID){sb.append("TaxID\t");}
1297 if(printImg){sb.append("ImgID \t");}
1298 if(printGBases){sb.append("gBases\t");}
1299 if(printGKmers){sb.append("gKmers\t");}
1300 if(printGSize){sb.append("gSize\t");}
1301 if(printGSeqs){sb.append("gSeqs\t");}
1302 if(printGC){sb.append("GC\t");}
1303
1304
1305 //Raw fields
1306 if(printRefDivisor){sb.append("rDiv\t");}
1307 if(printQueryDivisor){sb.append("qDiv\t");}
1308 if(printRefSize){sb.append("rSize\t");}
1309 if(printQuerySize){sb.append("qSize\t");}
1310 if(printContamHits){sb.append("cHits\t");}
1311
1312 //Text fields
1313 if(printCommonAncestor){sb.append("CA\t");}
1314 if(printCommonAncestorLevel){sb.append("CALevel\t");}
1315 if(printTaxName){sb.append("taxName\t");}
1316 if(printRefFileName){sb.append("file\t");}
1317 if(printOriginalName){sb.append("seqName\t");}
1318 if(printTax && SketchObject.taxtree!=null){sb.append("taxonomy\t");}
1319
1320 if(sb.length()>1){sb.setLength(sb.length()-1);}//trim trailing tab
1321
1322 return sb.toString();
1323 }
1324
1325 void formatComparisonColumnwise(Comparison c, ByteBuilder bb, int prevTid){
1326 final int tid=c.taxID;
1327 boolean reset=false;
1328
1329 if(printColors){
1330 final int ctid=toColorTid(tid);
1331 final int prevCtid=toColorTid(prevTid);
1332
1333 final int cnum=ctid%Colors.colorArray.length;
1334 final int prevCnum=prevCtid%Colors.colorArray.length;
1335
1336 String color=toColor(tid);
1337 String underline=(printColors && cnum==prevCnum && ctid!=prevCtid && (ctid>1 && prevCtid>1) ? Colors.UNDERLINE : null);
1338
1339 if(color!=null){bb.append(color);}
1340 if(underline!=null){bb.append(underline);}
1341 reset=(color!=null || underline!=null);
1342
1343 // System.err.println((color==null ? "" : color)+(underline==null ? "" : underline)+
1344 // tid+", "+prevTid+"; \t"+ctid+", "+prevCtid+"; \t"+cnum+", "+prevCnum+"; \t"+((underline!=null)+"")+Colors.RESET);
1345 // System.err.println(color==null ? "null" : color.substring(1));
1346 }
1347
1348 // sb.append(String.format(Locale.ROOT, "%.2f%%\t%.2f%%", 100*c.idMinDivisor(), 100*c.idMaxDivisor()));
1349 if(printWKID){bb.append(100*c.wkid(), 2).append('%').tab();}
1350 if(printKID){bb.append(100*c.kid(), 2).append('%');}
1351
1352 // if(printAni){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.ani()));}
1353 // if(printCompleteness){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.completeness()));}
1354 // if(printContam){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.contamFraction()));}
1355 // if(printContam2){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.contam2Fraction()));}
1356 // if(printUContam){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.uContamFraction()));}
1357
1358 if(printAni){bb.tab().append(100*c.ani(), 2).append('%');}
1359 if(printSSU()){
1360 float id=100*c.ssuIdentity();
1361 if(id>0){
1362 bb.tab().append(id, 2).append(c.ssuType()==16 ? '%' : '*'); //This is where 16S and 18S are differentiated
1363 }else{
1364 bb.tab().append('.');
1365 }
1366 }
1367 if(printSSULen){
1368 bb.tab().append(c.ssuLen());
1369 }
1370 if(printCompleteness){bb.tab().append(100*c.completeness(), 2).append('%');}
1371 if(printContam){bb.tab().append(100*c.contamFraction(), 2).append('%');}
1372 if(printContam2){bb.tab().append(100*c.contam2Fraction(), 2).append('%');}
1373 if(printUContam){bb.tab().append(100*c.uContamFraction(), 2).append('%');}
1374 if(printScore){bb.tab().append(c.scoreS());}
1375 if(printEValue){bb.tab().append(String.format(Locale.ROOT, "%5.2e", c.eValue()));}
1376
1377 if(printDepth){bb.tab().append(c.depthS(printActualDepth));}
1378 if(printDepth2){bb.tab().append(c.depth2S(printActualDepth));}
1379 if(printVolume){bb.tab().append(c.volumeS());}
1380 if(printRefHits){bb.tab().append(c.avgRefHitsS());}
1381
1382 if(printMatches){bb.tab().append(c.hits());}
1383 if(printUnique){bb.tab().append(c.uHits());}
1384 if(printUnique2){bb.tab().append(c.unique2());}
1385 if(printUnique3){bb.tab().append(c.unique3());}
1386 if(printNoHit){bb.tab().append(c.noHits());}
1387 if(printLength){bb.tab().append( c.maxDivisor());}
1388 if(printTaxID){bb.tab().append(tid>=SketchObject.minFakeID ? -1 : tid);}
1389 if(printImg){bb.tab().append(c.imgID());}
1390 if(printGBases){appendKMG(c.genomeSizeBases(), bb);}
1391 if(printGKmers){appendKMG(c.genomeSizeKmers(), bb);}
1392 if(printGSize){appendKMG(c.genomeSizeEstimate(), bb);}
1393 if(printGSeqs){appendKMG(c.genomeSequences(), bb);}
1394 if(printGC){bb.tab().append(c.gc(),3);}
1395
1396 //Raw fields
1397 if(printRefDivisor){bb.tab().append(c.refDivisor());}
1398 if(printQueryDivisor){bb.tab().append(c.queryDivisor());}
1399 if(printRefSize){bb.tab().append(c.refSize());}
1400 if(printQuerySize){bb.tab().append(c.querySize());}
1401 if(printContamHits){bb.tab().append(c.contamHits());}
1402
1403 //Text fields
1404 if(printCommonAncestor){bb.tab().append(c.commonAncestor());}
1405 if(printCommonAncestorLevel){bb.tab().append(c.commonAncestorLevel());}
1406 if(printTaxName){bb.tab().append(c.taxName()==null ? "." : c.taxName());}
1407 if(printRefFileName){bb.tab().append(c.fname()==null ? "." : c.fname());}
1408 if(printOriginalName){bb.tab().append(c.name0()==null ? "." : c.name0());}
1409 if(printTax && SketchObject.taxtree!=null){
1410 bb.tab();
1411 TaxNode tn=null;
1412 if(tid>0 && tid<SketchObject.minFakeID){
1413 tn=SketchObject.taxtree.getNode(tid);
1414 }
1415
1416 if(tn!=null){
1417 bb.append(SketchObject.taxtree.toSemicolon(tn, SketchObject.skipNonCanonical, false));
1418 }else{
1419 bb.append('.');
1420 }
1421 }
1422 if(printTaxName && !printOriginalName && !printRefFileName && c.taxName()==null && c.name0()!=null){bb.tab().append(c.name0());} //Extra column
1423
1424 if(reset){bb.append(Colors.RESET);}
1425
1426 bb.append('\n');
1427
1428 if(printIntersection){
1429 Sketch intersection=Sketch.intersection(c.a, c.b);
1430 bb.append(intersection.toString());
1431 bb.append('\n');
1432 }
1433
1434 }
1435
1436 void appendKMG(long value, ByteBuilder bb){
1437 if(gSizeKMG){
1438 bb.tab().append(toKMG(value));
1439 }else{
1440 bb.tab().append(value);
1441 }
1442 }
1443
1444 String toKMG(long value){
1445 if(value<10000000L){return Long.toString(value);}
1446 value+=5;
1447 if(value<1000000000L){return value/1000L+"K";}
1448 if(value<1000000000000L){return value/1000000L+"M";}
1449 if(value<1000000000000000L){return value/1000000000L+"G";}
1450 return value/1000000000000L+"T";
1451 }
1452
1453 void formatComparison3Column(Comparison c, ByteBuilder sb, int prevTid){
1454 Sketch query=c.a;
1455 final long sea=Tools.max(1, c.a.genomeSizeEstimate());
1456 final long seb=Tools.max(1, c.b.genomeSizeEstimate());
1457 final long ba=Tools.max(1, c.a.genomeSizeBases);
1458 final long bb=Tools.max(1, c.b.genomeSizeBases);
1459 final String qName=format==FORMAT_CONSTELLATION ? (useFilePrefixName ? query.filePrefix() : ""+query.sketchID) : useTaxidName ? ""+query.taxID :
1460 useImgName ? ""+query.imgID : useTaxName ? query.taxName() : query.name();
1461 final String rName=format==FORMAT_CONSTELLATION ? (useFilePrefixName ? c.b.filePrefix() : ""+c.b.sketchID) : useTaxidName ? ""+c.taxID() :
1462 useImgName ? ""+c.imgID() : useTaxName ? c.taxName() : c.name();
1463 final int tid=c.taxID;
1464 boolean reset=false;
1465
1466 sb.append(qName).append('\t');
1467 if(printColors){
1468 final int ctid=toColorTid(tid);
1469 final int prevCtid=toColorTid(prevTid);
1470
1471 final int cnum=ctid%Colors.colorArray.length;
1472 final int prevCnum=prevCtid%Colors.colorArray.length;
1473
1474 String color=toColor(tid);
1475 String underline=(printColors && cnum==prevCnum && ctid!=prevCtid && (ctid>1 && prevCtid>1) ? Colors.UNDERLINE : null);
1476
1477 if(color!=null){sb.append(color);}
1478 if(underline!=null){sb.append(underline);}
1479 reset=(color!=null || underline!=null);
1480
1481 // System.err.println((color==null ? "" : color)+(underline==null ? "" : underline)+
1482 // tid+", "+prevTid+"; \t"+ctid+", "+prevCtid+"; \t"+cnum+", "+prevCnum+"; \t"+((underline!=null)+"")+Colors.RESET);
1483 // System.err.println(color==null ? "null" : color.substring(1));
1484 }
1485
1486 // sb.append(rName).append(String.format(Locale.ROOT, "\t%.2f\t%.3f", 100*c.ani(), sea/(float)seb));
1487 // sb.append(rName).append(String.format(Locale.ROOT, "\t%.2f\t%d\t%d\t%d", 100*c.ani(), sea, seb, ba));
1488
1489 //"#Query\tRef\tKID\tWKID\tANI\tCmplt\tQSize\tRefSize\tQBases\tRefBases";
1490
1491 float kid=100*c.kid();
1492 float wkid=100*c.wkid();
1493 float ani=100*c.ani();
1494 float complt=100*c.completeness();
1495 float ssu=printSSU() ? 100*c.ssuIdentity() : 0;
1496
1497 sb.append(rName).append('\t');
1498 if(reportAniOnly){
1499 sb.append(ani, 3).append('\t');
1500 }else if(format==FORMAT_CONSTELLATION){
1501 sb.append(kid, 3).append('\t');
1502 sb.append(wkid, 3).append('\t');
1503 sb.append(ani, 3).append('\t');
1504 sb.append(complt, 3).append('\t');
1505 sb.append(sea).append('\t');
1506 sb.append(seb).append('\t');
1507 // sb.append(ba).append('\t');
1508 // sb.append(bb).append('\t');
1509 }else{
1510 sb.append(ani, 3).append('\t');
1511 sb.append(sea).append('\t');
1512 sb.append(seb).append('\t');
1513 sb.append(ba).append('\t');
1514 sb.append(bb).append('\t');
1515 if(printTaxID){sb.append(c.a.taxID).append('\t');}
1516 if(printTaxID){sb.append(c.b.taxID).append('\t');}
1517 if(printKID){sb.append(kid, 3).append('\t');}
1518 if(printWKID){sb.append(wkid, 3).append('\t');}
1519 if(printSSU()){
1520 if(ssu>0){
1521 sb.append(ssu, 3).append('\t');
1522 }else{
1523 sb.append('.').append('\t');
1524 }
1525 }
1526 if(printCommonAncestorLevel){sb.append(c.commonAncestorLevel()).append('\t');}
1527 }
1528 sb.setLength(sb.length()-1);
1529 if(reset){sb.append(Colors.RESET);}
1530
1531 sb.append('\n');
1532
1533 // System.err.println(sb);
1534 }
1535
1536 void formatComparison(Comparison c, ByteBuilder sb, int prevTaxID){
1537 if(format==FORMAT_MULTICOLUMN){
1538 formatComparisonColumnwise(c, sb, prevTaxID);
1539 return;
1540 }else if(format==FORMAT_QUERY_REF_ANI || format==FORMAT_CONSTELLATION){
1541 formatComparison3Column(c, sb, prevTaxID);
1542 return;
1543 }
1544 String complt=(printCompleteness ? String.format(Locale.ROOT, "\tcomplt %.2f%%%%", 100*c.completeness()) : "");
1545 String contam=(printContam ? String.format(Locale.ROOT, "\tcontam %.2f%%%%", 100*c.contamFraction()) : "");
1546 // String score=(printScore ? String.format(Locale.ROOT, "\tscore %.2f", c.score2()) : "");
1547 String score=(printScore ? "\tscore "+c.scoreS() : "");
1548 String depth=(printDepth ? "\tdepth "+c.depthS(printActualDepth) : "");
1549 String depth2=(printDepth2 ? "\tdepth2 "+c.depth2S(printActualDepth) : "");
1550 String volume=(printVolume ? "\tvolume "+c.volumeS() : "");
1551 String ccs=complt+contam+score;
1552
1553 if(format==FORMAT_OLD){
1554 sb.append(String.format(Locale.ROOT, "WKID %.2f%%\tKID %.2f%%"+ccs+"\tmatches %d\tcompared %d",
1555 100*c.wkid(), 100*c.kid(), c.hits(), c.minDivisor())+"\ttaxID "+c.taxID()+
1556 (printImg ? "\timgID "+c.imgID() : "")+"\tgKmers "+c.genomeSizeKmers()+"\t"+
1557 (c.taxName()==null ? "." : c.taxName())+
1558 ((printOriginalName || (c.taxName()==null && c.name0()!=null)) ? "\t"+(c.name0()==null ? "." : c.name0()) : "")+"\n");
1559 if(printTax && SketchObject.taxtree!=null){
1560 if(c.taxID()>=0 && c.taxID()<SketchObject.minFakeID){
1561 TaxNode tn=SketchObject.taxtree.getNode(c.taxID());
1562 if(tn!=null){
1563 PrintTaxonomy.printTaxonomy(tn, sb, SketchObject.taxtree, TaxTree.DOMAIN, SketchObject.skipNonCanonical);
1564 }
1565 }
1566 sb.append('\n');
1567 }
1568 }else{
1569 ArrayList<TaxNode> tnl=new ArrayList<TaxNode>();
1570 if(SketchObject.taxtree!=null && c.taxID()>=0 && c.taxID()<SketchObject.minFakeID){
1571 TaxNode tn=SketchObject.taxtree.getNode(c.taxID());
1572 while(tn!=null && tn.pid!=tn.id && tn.level<=TaxTree.DOMAIN){
1573 tnl.add(tn);
1574 tn=SketchObject.taxtree.getNode(tn.pid);
1575 }
1576 }
1577
1578 sb.append(String.format(Locale.ROOT, "WKID %.2f%%\tKID %.2f%%"+ccs+"\tmatches %d\tcompared %d\t",
1579 100*c.wkid(), 100*c.kid(), c.hits(), c.minDivisor()));
1580 sb.append("\ttaxID ").append(c.taxID()).append('\t');
1581 if(printImg){sb.append("\timgID ").append(c.imgID()).append('\t');}
1582 sb.append(c.taxName()).append('\t');
1583 if(printRefFileName){sb.append(c.fname()).append('\t');}
1584 if(printOriginalName || (c.taxName()==null && c.name0()!=null && !printRefFileName)){sb.append(c.name0()).append('\t');}
1585
1586 if(printTax){
1587 for(int i=tnl.size()-1; i>=0; i--){
1588 TaxNode tn=tnl.get(i);
1589 sb.append(tn.name);
1590 if(i>0){sb.append(';');}
1591 }
1592 }
1593 sb.append('\n');
1594
1595 tnl.clear();
1596 }
1597 }
1598
1599 /*--------------------------------------------------------------*/
1600 /*---------------- Filtering ----------------*/
1601 /*--------------------------------------------------------------*/
1602
1603 public boolean passesFilter(Sketch sk){
1604 assert(postParsed);
1605 if(noFilters){return true;}
1606 return passesSSUFilter(sk) && passesSizeFilter(sk) && passesTaxFilter(sk) && passesMetaFilter(sk);
1607 }
1608
1609 private boolean passesTaxFilter(Sketch sk){
1610 if(taxFilterWhite==null && taxFilterBlack==null){return true;}
1611 int id=sk.taxID;
1612 if(id>0){
1613 if(banUnclassified && SketchObject.taxtree.isUnclassified(id)){return false;}
1614 if(banVirus && SketchObject.taxtree.isVirus(id)){return false;}
1615 }
1616 String s=sk.name();
1617 return passesTaxFilter(taxFilterWhite, id, s) && passesTaxFilter(taxFilterBlack, id, s);
1618 }
1619
1620 private boolean passesTaxFilter(TaxFilter filter, int id, String s){
1621 if(filter==null){return true;}
1622 if(id>0 && !filter.passesFilter(id)){return false;}
1623 // if(id>0 && !filter.passesFilterFast(id)){return false;}
1624 if(s!=null && !filter.passesFilterByNameOnly(s)){return false;}
1625 return true;
1626 }
1627
1628 private boolean passesMetaFilter(Sketch sk){
1629 if(requiredMeta==null && bannedMeta==null){return true;}
1630 return sk.passesMeta(requiredMeta, bannedMeta, requiredMetaAnd);
1631 }
1632
1633 private boolean passesSSUFilter(Sketch sk){
1634 return !requireSSU || sk.hasSSU();
1635 }
1636
1637 private boolean passesSizeFilter(Sketch sk){
1638 if(minRefSizeEstimate>0 && sk.genomeSizeEstimate()<minRefSizeEstimate){return false;}
1639 return sk.genomeSizeBases>=minRefSizeBases;
1640 }
1641
1642 /*--------------------------------------------------------------*/
1643 /*---------------- Fields ----------------*/
1644 /*--------------------------------------------------------------*/
1645
1646 //These are shared with SketchObject
1647 //They do not affect anything and are just for the server to validate remote settings.
1648 private int hashVersion=SketchObject.HASH_VERSION;
1649 private int k=SketchObject.k;
1650 private int k2=SketchObject.k2;
1651 boolean amino=SketchObject.amino;
1652 boolean translate=SketchObject.translate;
1653 boolean sixframes=SketchObject.sixframes;
1654 private boolean aminoOrTranslate(){return amino | translate;}
1655
1656 boolean noFilters=false;
1657 boolean postParsed=false;
1658
1659 boolean amino(){return amino;}
1660
1661 //These are unique
1662 public int maxRecords=default_maxRecords;
1663 public int recordsPerLevel=0;
1664 public float minANI=0;
1665 public int minBases=0;
1666 public float minSizeRatio=0;
1667 public float minWKID=default_minWKID;
1668 public int format=default_format;
1669
1670 /** For tracking unique SendSketch queries */
1671 public int chunkNum=-1;
1672 public int minHits=default_minHits;
1673 public int taxLevel=default_taxLevel;
1674 public int mode=default_mode;
1675 public float samplerate=default_samplerate;
1676 public long maxReads=default_maxReads;
1677 public int minKeyOccuranceCount=default_minKeyOccuranceCount;
1678 public String inputVersion=null;
1679
1680 public String dbName=null;
1681
1682 boolean hasMetaFilters(){return requiredMeta!=null || bannedMeta!=null/* || requiredTaxid!=null || bannedTaxid!=null*/;}
1683 boolean hasTaxFilters(){return taxFilterWhite!=null || taxFilterBlack!=null || banUnclassified || banVirus;}
1684 boolean requireSSU=false;
1685 long minRefSizeEstimate=-1;
1686 long minRefSizeBases=-1;
1687
1688 boolean requiredMetaAnd=true;
1689 ArrayList<String> requiredMeta=null;
1690 ArrayList<String> bannedMeta=null;
1691
1692 /*--------------------------------------------------------------*/
1693 /*---------------- Print Columns ----------------*/
1694 /*--------------------------------------------------------------*/
1695
1696 public boolean printKID=true;
1697 public boolean printWKID=true;
1698 public boolean printSSU=true;
1699 public boolean printSSULen=false;
1700 public boolean printSSU(){return SketchObject.processSSU && printSSU;}
1701 public boolean printSSUSequence=default_printSSUSequence;
1702
1703 //For format 2
1704 public boolean printTax=default_printTax;
1705 public boolean printOriginalName=default_printOriginalName;
1706 public boolean printQueryFileName=default_printQueryFileName;
1707 public boolean printRefFileName=default_printRefFileName;
1708 public boolean printImg=default_printImg;
1709 public boolean printAni=default_printAni;
1710 public boolean printCompleteness=default_printCompleteness;
1711 public boolean printScore=default_printScore;
1712 public boolean printEValue=default_printEValue;
1713
1714 private boolean trackCounts=default_trackCounts;
1715 public boolean printDepth=default_printDepth;
1716 public boolean printDepth2=default_printDepth2;
1717 public boolean printActualDepth=default_printActualDepth;
1718 public boolean printVolume=default_printVolume;
1719 public boolean printRefHits=default_printRefHits;
1720
1721 public boolean printLength=default_printLength;
1722 public boolean printTaxID=default_printTaxID;
1723 public boolean printGSize=default_printGSize;
1724 public boolean printGC=default_printGC;
1725 public boolean gSizeKMG=default_gSizeKMG;
1726 public boolean printGKmers=default_printGKmers;
1727 public boolean printCommonAncestor=default_printCommonAncestor;
1728 public boolean printCommonAncestorLevel=default_printCommonAncestorLevel;
1729 public boolean printTaxName=default_printTaxName;
1730 public boolean printGSeqs=default_printGSeqs;
1731 public boolean printGBases=default_printGBases;
1732
1733 public boolean jsonArray=default_jsonArray;
1734 public boolean printD3=default_printD3;
1735 public boolean D3LevelNodes=false;
1736 public int D3sizeMode=D3_HIT_SIZE;
1737 public static final int D3_HIT_SIZE=0, D3_ANI_SIZE=1, D3_KID_SIZE=2, D3_WKID_SIZE=3, D3_DEPTH_SIZE=4;
1738
1739 public float minEntropy=default_minEntropy;
1740
1741 //For k=32:
1742 //0.000095f is >=Q6 (75%); 0.0008 is >=Q7 (80%); 0.0039 is >=Q8 (84%).
1743 //0.002f is >=Q7.53 (82.3%)
1744 //0.0017f is >=Q7.44 (82.0%)
1745 //0.6f works better for Illumina reads but this is more robust for PacBio.
1746 public float minProb=0.0008f;
1747 public byte minQual=0;
1748
1749 public boolean printUnique=default_printUnique;
1750 public boolean printUnique2=default_printUnique2;
1751 public boolean printUnique3=default_printUnique3;
1752 public boolean printUContam=default_printUContam;
1753 public boolean printNoHit=default_printNoHit;
1754
1755 public boolean printColors=default_printColors;
1756 public boolean setColors=false;
1757 public int colorLevel=default_colorLevel;
1758
1759 /** TODO: Note this is conflated between printing %contam and calculating things based on contam hits. */
1760 public boolean printContam=default_printContam;
1761 public boolean printContam2=default_printContam2;
1762 private int contamLevel=default_contamLevel;
1763
1764 /** Raw fields */
1765 public boolean printMatches=default_printMatches;
1766
1767 public boolean printRefDivisor=false;
1768 public boolean printQueryDivisor=false;
1769 public boolean printRefSize=false;
1770 public boolean printQuerySize=false;
1771 public boolean printContamHits=false;
1772
1773 public boolean mergePairs=false;
1774 public boolean printIntersection=false;
1775
1776 //For format 3 or 5
1777 public boolean useTaxidName=false;
1778 public boolean useImgName=false;
1779 public boolean useTaxName=false;
1780 public boolean useFilePrefixName=false;
1781 public boolean reportAniOnly=false;
1782
1783 public int taxLevelWhite=0;
1784 public int taxLevelBlack=0;
1785
1786 public String taxFilterWhiteList=null;
1787 public String taxFilterBlackList=null;
1788
1789 public String taxFilterWhiteString=null;
1790 public String taxFilterBlackString=null;
1791
1792 public TaxFilter taxFilterWhite=null;
1793 public TaxFilter taxFilterBlack=null;
1794
1795 public boolean banUnclassified=false;
1796 public boolean banVirus=false;
1797
1798 /** Make sure the settings are consistent, for CompareSketch.
1799 * This is not yet complete. */
1800 public boolean checkValid(){
1801 if(printUnique2 || printUnique3){
1802 assert(contamLevel()>=TaxTree.SUBSPECIES_E);
1803 assert(needContamCounts());
1804 assert(SketchObject.makeIndex);
1805 assert(SketchObject.taxtree!=null);
1806 }
1807 if(printContam2){
1808 assert(contamLevel()>=TaxTree.SUBSPECIES_E);
1809 assert(needContamCounts());
1810 assert(SketchObject.makeIndex);
1811 assert(SketchObject.taxtree!=null);
1812 }
1813 return true;
1814 }
1815
1816 public boolean trackCounts() {
1817 return trackCounts || printDepth || printDepth2 || printVolume
1818 || comparator!=Comparison.scoreComparator || printD3; //|| minKeyOccuranceCount>1;
1819 }
1820
1821 public boolean needContamCounts() {
1822 return printContam || printContam2 || printContamHits || printUnique || printUnique2 || printUnique3 || printUContam || printNoHit; // || true
1823 }
1824
1825 public boolean needIndex(){
1826 return printContam2 || printUnique2 || printUnique3;
1827 }
1828
1829 public int contamLevel() {
1830 return needIndex() ? contamLevel : -1;
1831 }
1832
1833 public int compare(Comparison a, Comparison b){
1834 return comparator.compare(a, b);
1835 }
1836
1837 public Comparator<Comparison> comparator=Comparison.scoreComparator;
1838
1839 /*--------------------------------------------------------------*/
1840 /*---------------- Constants ----------------*/
1841 /*--------------------------------------------------------------*/
1842
1843 public static final int FORMAT_OLD=0, FORMAT_MULTICOLUMN=2, FORMAT_QUERY_REF_ANI=3, FORMAT_JSON=4, FORMAT_CONSTELLATION=5;
1844 public static final boolean default_printD3=false;
1845 public static final boolean default_jsonArray=false;
1846
1847 public static final int default_maxRecords=20;
1848 public static final float default_minWKID=0.0001f;
1849 public static final int default_format=FORMAT_MULTICOLUMN;
1850 public static final boolean default_printSSUSequence=false;
1851 public static final boolean default_printTax=false;
1852 public static final boolean default_printOriginalName=false;
1853 public static final boolean default_printQueryFileName=true;
1854 public static final boolean default_printRefFileName=false;
1855 public static final boolean default_printImg=false;
1856 public static final boolean default_printAni=true;
1857 public static final boolean default_printCompleteness=true;
1858 public static final boolean default_printScore=false;
1859 public static final boolean default_printEValue=false;
1860
1861 public static final boolean default_trackCounts=false;
1862 public static final boolean default_printDepth=false;
1863 public static final boolean default_printDepth2=false;
1864 public static final boolean default_printActualDepth=true;
1865 public static final boolean default_printVolume=false;
1866 public static final boolean default_printRefHits=false;
1867
1868 public static final boolean default_printContam=true;
1869 public static final boolean default_printContam2=false;
1870
1871 public static final boolean default_printMatches=true;
1872 public static final boolean default_printLength=false;
1873 public static final boolean default_printTaxID=true;
1874 public static final boolean default_printGSize=true;
1875 public static final boolean default_printGC=false;
1876 public static final boolean default_gSizeKMG=true;
1877 public static final boolean default_printGKmers=false;
1878 public static final boolean default_printCommonAncestor=false;
1879 public static final boolean default_printCommonAncestorLevel=false;
1880 public static final boolean default_printTaxName=true;
1881 public static final boolean default_printGSeqs=true;
1882 public static final boolean default_printGBases=false;
1883
1884 public static final float default_minEntropy=0.66f;
1885 public static final float default_minEntropy_amino=0.70f;
1886 public static final float default_minProb=0.0008f;
1887 public static final byte default_minQual=0;
1888
1889 public static final boolean default_printUnique=true;
1890 public static final boolean default_printUnique2=false;
1891 public static final boolean default_printUnique3=false;
1892 public static final boolean default_printUContam=false;
1893 public static final boolean default_printNoHit=false;
1894
1895 public static final boolean default_printColors=true;
1896 public static final int default_colorLevel=TaxTree.FAMILY_E;
1897
1898 public static final int default_taxLevel=TaxTree.SPECIES;
1899 public static final int default_contamLevel=TaxTree.GENUS_E;
1900
1901 public static final int default_mode=SketchObject.ONE_SKETCH;
1902
1903 public static final int default_minHits=3;
1904 public static final float default_samplerate=1;
1905 public static final long default_maxReads=-1;
1906 public static final int default_minKeyOccuranceCount=1;
1907
1908 }