comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SummarizeSketchStats.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Locale;
8
9 import fileIO.TextFile;
10 import fileIO.TextStreamWriter;
11 import shared.Colors;
12 import shared.Parse;
13 import shared.Parser;
14 import shared.PreParser;
15 import shared.Shared;
16 import shared.Tools;
17 import tax.TaxNode;
18 import tax.TaxTree;
19
20 /**
21 * @author Brian Bushnell
22 * @date June 28, 2017
23 *
24 */
25 public class SummarizeSketchStats {
26
27 /**
28 * Code entrance from the command line.
29 * @param args Command line arguments
30 */
31 public static void main(String[] args){
32 //Create a new SummarizeSketchStats instance
33 SummarizeSketchStats x=new SummarizeSketchStats(args);
34
35 ///And run it
36 x.summarize();
37
38 //Close the print stream if it was redirected
39 Shared.closeStream(x.outstream);
40 }
41
42 public SummarizeSketchStats(String[] args){
43
44 {//Preparse block for help, config files, and outstream
45 PreParser pp=new PreParser(args, getClass(), false);
46 args=pp.args;
47 outstream=pp.outstream;
48 }
49
50 Parser parser=new Parser();
51 ArrayList<String> names=new ArrayList<String>();
52 String taxTreeFile=null;
53
54 /* Parse arguments */
55 for(int i=0; i<args.length; i++){
56
57 final String arg=args[i];
58 String[] split=arg.split("=");
59 String a=split[0].toLowerCase();
60 String b=split.length>1 ? split[1] : null;
61
62 if(a.equals("printtotal") || a.equals("pt")){
63 printTotal=Parse.parseBoolean(b);
64 }else if(a.equals("ignoresametaxa")){
65 ignoreSameTaxa=Parse.parseBoolean(b);
66 }else if(a.equals("ignoresamebarcode") || a.equals("ignoresameindex")){
67 ignoreSameBarcode=Parse.parseBoolean(b);
68 }else if(a.equals("ignoresamelocation") || a.equals("ignoresameloc")){
69 ignoreSameLocation=Parse.parseBoolean(b);
70 }else if(a.equals("usetotal") || a.equals("totaldenominator") || a.equals("totald") || a.equals("td")){
71 totalDenominator=Parse.parseBoolean(b);
72 }
73
74 else if(a.equals("taxtree") || a.equals("tree")){
75 taxTreeFile=b;
76 }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){
77 taxLevel=TaxTree.parseLevel(b);
78 if(taxLevel>=0){
79 taxLevel=TaxTree.levelToExtended(taxLevel);
80 }
81 }else if(a.equalsIgnoreCase("unique") || a.equalsIgnoreCase("uniquehits")){
82 uniqueHitsForSecond=Parse.parseBoolean(b);
83 }else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("printheader")){
84 printHeader=Parse.parseBoolean(b);
85 }
86
87 else if(parser.parse(arg, a, b)){
88 //do nothing
89 }else if(!arg.contains("=")){
90 String[] x=(new File(arg).exists() ? new String[] {arg} : arg.split(","));
91 for(String x2 : x){names.add(x2);}
92 }else{
93 throw new RuntimeException("Unknown parameter "+arg);
94 }
95 }
96 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
97
98 {//Process parser fields
99 out=(parser.out1==null ? "stdout" : parser.out1);
100 if(parser.in1!=null){
101 String[] x=(new File(parser.in1).exists() ? new String[] {parser.in1} : parser.in1.split(","));
102 for(String x2 : x){names.add(x2);}
103 }
104 }
105
106 in=new ArrayList<String>();
107 for(String s : names){
108 Tools.getFileOrFiles(s, in, false, false, false, true);
109 }
110
111 if(taxTreeFile!=null){setTaxtree(taxTreeFile);}
112 }
113
114 void setTaxtree(String taxTreeFile){
115 if(taxTreeFile==null){
116 return;
117 }
118 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, false, false);
119 }
120
121 public void summarize(){
122 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
123 for(String fname : in){
124 ArrayList<SketchResultsSummary> ssl=summarize(fname);
125 list.addAll(ssl);
126 }
127
128 TextStreamWriter tsw=new TextStreamWriter(out, true, false, false);
129 tsw.start();
130 if(printHeader){tsw.print(header());}
131 // if(printTotal){
132 // tsw.println(total.toString());
133 // }
134 for(SketchResultsSummary ss : list){
135 tsw.print(ss.toString());
136 }
137 tsw.poisonAndWait();
138 }
139
140 // Query: Troseus_1X_k55.fa Seqs: 121 Bases: 2410606 gSize: 2368581 SketchLen: 8923
141 // WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName
142 // 99.89% 50.73% 100.00% 50.77% 0.02% 5683 5683 5 0 4719674 1 . Troseus
143
144 private ArrayList<SketchResultsSummary> summarize(String fname){
145 TextFile tf=new TextFile(fname);
146 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
147 SketchResultsSummary current=null;
148
149 final String format="WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName";
150 for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){
151 if(line.startsWith("Query:")){
152 if(current!=null){list.add(current);}
153 current=new SketchResultsSummary(line);
154 }else if(line.startsWith("WKID")){
155 assert(line.equals(format)) :
156 "Format should be:\n"+format;
157 }else if(line.length()>0){
158 assert(current!=null) : "No Query Header for line "+line;
159 current.add(line);
160 }
161 }
162 if(current!=null){list.add(current);}
163 tf.close();
164 return list;
165 }
166
167 public static String header(){
168 StringBuilder sb=new StringBuilder();
169
170 sb.append("#query");
171
172 sb.append('\t').append("seqs");
173 sb.append('\t').append("bases");
174 sb.append('\t').append("gSize");
175 sb.append('\t').append("sketchLen");
176
177 sb.append('\t').append("primaryHits");
178 sb.append('\t').append("primaryUnique");
179 sb.append('\t').append("primaryNoHit");
180
181 sb.append('\t').append("WKID");
182 sb.append('\t').append("KID");
183 sb.append('\t').append("ANI");
184 sb.append('\t').append("Complt");
185 sb.append('\t').append("Contam");
186 sb.append('\t').append("TaxID");
187 sb.append('\t').append("TaxName");
188 sb.append('\t').append("topContamID");
189 sb.append('\t').append("topContamName");
190
191 sb.append('\n');
192
193 return sb.toString();
194 }
195
196 private class SketchResultsSummary {
197
198 SketchResultsSummary(String line){
199 parseHeader(line);
200 }
201
202 void parseHeader(String line){
203 String[] split=line.split("\t");
204 for(String s : split){
205 String[] split2=s.trim().split(": ");
206 assert(split2.length==2) : "\n"+line+"\n"+s+"\n"+Arrays.toString(split2)+"\n";
207 String a=split2[0], b=split2[1];
208 // outstream.println(a+", "+b);
209 if(a.equals("Query")){
210 query=b;
211 }else if(a.equals("Seqs")){
212 seqs=Integer.parseInt(b);
213 }else if(a.equals("Bases")){
214 bases=Long.parseLong(b);
215 }else if(a.equals("gSize")){
216 gSize=Long.parseLong(b);
217 }else if(a.equals("SketchLen")){
218 sketchLen=Integer.parseInt(b);
219 }else if(a.equals("TaxID")){
220 taxID=Integer.parseInt(b);
221 }else if(a.equals("IMG")){
222 img=Long.parseLong(b);
223 }else if(a.equals("File")){
224 sketchLen=Integer.parseInt(b);
225 }
226 }
227 }
228
229 public void add(String line) {
230 SketchResultsLine srl=new SketchResultsLine(line);
231 list.add(srl);
232 }
233
234 @Override
235 public String toString(){
236 StringBuilder sb=new StringBuilder();
237
238 sb.append(query);
239
240 sb.append('\t').append(seqs);
241 sb.append('\t').append(bases);
242 sb.append('\t').append(gSize);
243 sb.append('\t').append(sketchLen);
244
245 int primaryHits=0;
246 int primaryUnique=0;
247 int primaryNoHit=0;
248
249 float WKID=0;
250 float KID=0;
251 float ANI=0;
252 float Complt=0;
253 float Contam=0;
254 int TaxID=0;
255 String TaxName=".";
256 int topContamID=0;
257 String topContamName=".";
258
259 SketchResultsLine first=list.size()>0 ? list.get(0) : null;
260 SketchResultsLine second=list.size()>1 ? list.get(1) : null;
261 for(int i=2; tree!=null && i<list.size() && failsLevelFilter(first.taxID, second.taxID); i++){
262 second=list.get(i);
263 }
264 if(second!=null && failsLevelFilter(first.taxID, second.taxID)){second=list.get(1);}
265
266 if(second!=null && uniqueHitsForSecond){
267 for(int i=1; i<list.size(); i++){
268
269 SketchResultsLine line=list.get(i);
270 if(!failsLevelFilter(first.taxID, line.taxID) && line.unique>second.unique && line.unique>=minUniqueHits){
271 second=line;
272 }
273 }
274 }
275
276 if(first!=null){
277 primaryHits=first.matches;
278 primaryUnique=first.unique;
279 primaryNoHit=first.noHit;
280
281 WKID=first.wkid;
282 KID=first.kid;
283 ANI=first.ani;
284 Complt=first.complt;
285 Contam=first.contam;
286 TaxID=first.taxID;
287 TaxName=first.name;
288 }
289 if(second!=null){
290 topContamID=second.taxID;
291 topContamName=second.name;
292 }
293
294 sb.append('\t').append(primaryHits);
295 sb.append('\t').append(primaryUnique);
296 sb.append('\t').append(primaryNoHit);
297
298 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", WKID));
299 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", KID));
300 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", ANI));
301 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Complt));
302 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Contam));
303 sb.append('\t').append(TaxID);
304 sb.append('\t').append(TaxName);
305 sb.append('\t').append(topContamID);
306 sb.append('\t').append(topContamName);
307
308 sb.append('\n');
309
310 return sb.toString();
311 }
312
313 private boolean failsLevelFilter(int a, int b) {
314 if(a<1 || b<1 || tree==null){return false;}
315 int c=tree.commonAncestor(a, b);
316 TaxNode tn=tree.getNode(c);
317 while(!tn.cellularOrganisms() && tn.levelExtended==TaxTree.NO_RANK_E){tn=tree.getNode(tn.pid);}
318
319 return tn.levelExtended<=taxLevel;
320 }
321
322 String query;
323 String fname;
324 int seqs;
325 long bases;
326 long gSize;
327 int sketchLen;
328 int taxID;
329 long img;
330
331 ArrayList<SketchResultsLine> list=new ArrayList<SketchResultsLine>();
332
333 }
334
335 private class SketchResultsLine{
336
337 SketchResultsLine(String line){
338 //Handle colors
339 if(line.startsWith(Colors.esc)){
340 int first=line.indexOf('m');
341 int last=line.lastIndexOf(Colors.esc);
342 line=line.substring(first+1, last);
343 }
344 String[] split=line.replaceAll("%", "").split("\t");
345 wkid=Float.parseFloat(split[0]);
346 kid=Float.parseFloat(split[1]);
347 ani=Float.parseFloat(split[2]);
348 complt=Float.parseFloat(split[3]);
349 contam=Float.parseFloat(split[4]);
350
351 matches=Integer.parseInt(split[5]);
352 unique=Integer.parseInt(split[6]);
353 noHit=Integer.parseInt(split[7]);
354 taxID=Integer.parseInt(split[8]);
355 gSize=Integer.parseInt(split[9]);
356 gSeqs=Integer.parseInt(split[10]);
357
358 name=split[11];
359 if(name.equals(".") && split.length>11){
360 name=split[12];
361 }
362 }
363
364 float wkid;
365 float kid;
366 float ani;
367 float complt;
368 float contam;
369 int matches;
370 int unique;
371 int noHit;
372 int taxID;
373 int gSize;
374 int gSeqs;
375 String name;
376 }
377
378 final ArrayList<String> in;
379 final String out;
380
381 TaxTree tree=null;
382 int taxLevel=TaxTree.GENUS_E;
383 boolean uniqueHitsForSecond=false;
384 int minUniqueHits=3;
385 boolean printHeader=true;
386
387 /** Legacy code from SealStats */
388 boolean ignoreSameTaxa=false;
389 boolean ignoreSameBarcode=false;
390 boolean ignoreSameLocation=false;
391 boolean totalDenominator=false;
392 boolean printTotal=true;
393
394 PrintStream outstream=System.err;
395
396 }