Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SummarizeSketchStats.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Arrays; | |
7 import java.util.Locale; | |
8 | |
9 import fileIO.TextFile; | |
10 import fileIO.TextStreamWriter; | |
11 import shared.Colors; | |
12 import shared.Parse; | |
13 import shared.Parser; | |
14 import shared.PreParser; | |
15 import shared.Shared; | |
16 import shared.Tools; | |
17 import tax.TaxNode; | |
18 import tax.TaxTree; | |
19 | |
20 /** | |
21 * @author Brian Bushnell | |
22 * @date June 28, 2017 | |
23 * | |
24 */ | |
25 public class SummarizeSketchStats { | |
26 | |
27 /** | |
28 * Code entrance from the command line. | |
29 * @param args Command line arguments | |
30 */ | |
31 public static void main(String[] args){ | |
32 //Create a new SummarizeSketchStats instance | |
33 SummarizeSketchStats x=new SummarizeSketchStats(args); | |
34 | |
35 ///And run it | |
36 x.summarize(); | |
37 | |
38 //Close the print stream if it was redirected | |
39 Shared.closeStream(x.outstream); | |
40 } | |
41 | |
42 public SummarizeSketchStats(String[] args){ | |
43 | |
44 {//Preparse block for help, config files, and outstream | |
45 PreParser pp=new PreParser(args, getClass(), false); | |
46 args=pp.args; | |
47 outstream=pp.outstream; | |
48 } | |
49 | |
50 Parser parser=new Parser(); | |
51 ArrayList<String> names=new ArrayList<String>(); | |
52 String taxTreeFile=null; | |
53 | |
54 /* Parse arguments */ | |
55 for(int i=0; i<args.length; i++){ | |
56 | |
57 final String arg=args[i]; | |
58 String[] split=arg.split("="); | |
59 String a=split[0].toLowerCase(); | |
60 String b=split.length>1 ? split[1] : null; | |
61 | |
62 if(a.equals("printtotal") || a.equals("pt")){ | |
63 printTotal=Parse.parseBoolean(b); | |
64 }else if(a.equals("ignoresametaxa")){ | |
65 ignoreSameTaxa=Parse.parseBoolean(b); | |
66 }else if(a.equals("ignoresamebarcode") || a.equals("ignoresameindex")){ | |
67 ignoreSameBarcode=Parse.parseBoolean(b); | |
68 }else if(a.equals("ignoresamelocation") || a.equals("ignoresameloc")){ | |
69 ignoreSameLocation=Parse.parseBoolean(b); | |
70 }else if(a.equals("usetotal") || a.equals("totaldenominator") || a.equals("totald") || a.equals("td")){ | |
71 totalDenominator=Parse.parseBoolean(b); | |
72 } | |
73 | |
74 else if(a.equals("taxtree") || a.equals("tree")){ | |
75 taxTreeFile=b; | |
76 }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){ | |
77 taxLevel=TaxTree.parseLevel(b); | |
78 if(taxLevel>=0){ | |
79 taxLevel=TaxTree.levelToExtended(taxLevel); | |
80 } | |
81 }else if(a.equalsIgnoreCase("unique") || a.equalsIgnoreCase("uniquehits")){ | |
82 uniqueHitsForSecond=Parse.parseBoolean(b); | |
83 }else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("printheader")){ | |
84 printHeader=Parse.parseBoolean(b); | |
85 } | |
86 | |
87 else if(parser.parse(arg, a, b)){ | |
88 //do nothing | |
89 }else if(!arg.contains("=")){ | |
90 String[] x=(new File(arg).exists() ? new String[] {arg} : arg.split(",")); | |
91 for(String x2 : x){names.add(x2);} | |
92 }else{ | |
93 throw new RuntimeException("Unknown parameter "+arg); | |
94 } | |
95 } | |
96 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} | |
97 | |
98 {//Process parser fields | |
99 out=(parser.out1==null ? "stdout" : parser.out1); | |
100 if(parser.in1!=null){ | |
101 String[] x=(new File(parser.in1).exists() ? new String[] {parser.in1} : parser.in1.split(",")); | |
102 for(String x2 : x){names.add(x2);} | |
103 } | |
104 } | |
105 | |
106 in=new ArrayList<String>(); | |
107 for(String s : names){ | |
108 Tools.getFileOrFiles(s, in, false, false, false, true); | |
109 } | |
110 | |
111 if(taxTreeFile!=null){setTaxtree(taxTreeFile);} | |
112 } | |
113 | |
114 void setTaxtree(String taxTreeFile){ | |
115 if(taxTreeFile==null){ | |
116 return; | |
117 } | |
118 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, false, false); | |
119 } | |
120 | |
121 public void summarize(){ | |
122 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>(); | |
123 for(String fname : in){ | |
124 ArrayList<SketchResultsSummary> ssl=summarize(fname); | |
125 list.addAll(ssl); | |
126 } | |
127 | |
128 TextStreamWriter tsw=new TextStreamWriter(out, true, false, false); | |
129 tsw.start(); | |
130 if(printHeader){tsw.print(header());} | |
131 // if(printTotal){ | |
132 // tsw.println(total.toString()); | |
133 // } | |
134 for(SketchResultsSummary ss : list){ | |
135 tsw.print(ss.toString()); | |
136 } | |
137 tsw.poisonAndWait(); | |
138 } | |
139 | |
140 // Query: Troseus_1X_k55.fa Seqs: 121 Bases: 2410606 gSize: 2368581 SketchLen: 8923 | |
141 // WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName | |
142 // 99.89% 50.73% 100.00% 50.77% 0.02% 5683 5683 5 0 4719674 1 . Troseus | |
143 | |
144 private ArrayList<SketchResultsSummary> summarize(String fname){ | |
145 TextFile tf=new TextFile(fname); | |
146 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>(); | |
147 SketchResultsSummary current=null; | |
148 | |
149 final String format="WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName"; | |
150 for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ | |
151 if(line.startsWith("Query:")){ | |
152 if(current!=null){list.add(current);} | |
153 current=new SketchResultsSummary(line); | |
154 }else if(line.startsWith("WKID")){ | |
155 assert(line.equals(format)) : | |
156 "Format should be:\n"+format; | |
157 }else if(line.length()>0){ | |
158 assert(current!=null) : "No Query Header for line "+line; | |
159 current.add(line); | |
160 } | |
161 } | |
162 if(current!=null){list.add(current);} | |
163 tf.close(); | |
164 return list; | |
165 } | |
166 | |
167 public static String header(){ | |
168 StringBuilder sb=new StringBuilder(); | |
169 | |
170 sb.append("#query"); | |
171 | |
172 sb.append('\t').append("seqs"); | |
173 sb.append('\t').append("bases"); | |
174 sb.append('\t').append("gSize"); | |
175 sb.append('\t').append("sketchLen"); | |
176 | |
177 sb.append('\t').append("primaryHits"); | |
178 sb.append('\t').append("primaryUnique"); | |
179 sb.append('\t').append("primaryNoHit"); | |
180 | |
181 sb.append('\t').append("WKID"); | |
182 sb.append('\t').append("KID"); | |
183 sb.append('\t').append("ANI"); | |
184 sb.append('\t').append("Complt"); | |
185 sb.append('\t').append("Contam"); | |
186 sb.append('\t').append("TaxID"); | |
187 sb.append('\t').append("TaxName"); | |
188 sb.append('\t').append("topContamID"); | |
189 sb.append('\t').append("topContamName"); | |
190 | |
191 sb.append('\n'); | |
192 | |
193 return sb.toString(); | |
194 } | |
195 | |
196 private class SketchResultsSummary { | |
197 | |
198 SketchResultsSummary(String line){ | |
199 parseHeader(line); | |
200 } | |
201 | |
202 void parseHeader(String line){ | |
203 String[] split=line.split("\t"); | |
204 for(String s : split){ | |
205 String[] split2=s.trim().split(": "); | |
206 assert(split2.length==2) : "\n"+line+"\n"+s+"\n"+Arrays.toString(split2)+"\n"; | |
207 String a=split2[0], b=split2[1]; | |
208 // outstream.println(a+", "+b); | |
209 if(a.equals("Query")){ | |
210 query=b; | |
211 }else if(a.equals("Seqs")){ | |
212 seqs=Integer.parseInt(b); | |
213 }else if(a.equals("Bases")){ | |
214 bases=Long.parseLong(b); | |
215 }else if(a.equals("gSize")){ | |
216 gSize=Long.parseLong(b); | |
217 }else if(a.equals("SketchLen")){ | |
218 sketchLen=Integer.parseInt(b); | |
219 }else if(a.equals("TaxID")){ | |
220 taxID=Integer.parseInt(b); | |
221 }else if(a.equals("IMG")){ | |
222 img=Long.parseLong(b); | |
223 }else if(a.equals("File")){ | |
224 sketchLen=Integer.parseInt(b); | |
225 } | |
226 } | |
227 } | |
228 | |
229 public void add(String line) { | |
230 SketchResultsLine srl=new SketchResultsLine(line); | |
231 list.add(srl); | |
232 } | |
233 | |
234 @Override | |
235 public String toString(){ | |
236 StringBuilder sb=new StringBuilder(); | |
237 | |
238 sb.append(query); | |
239 | |
240 sb.append('\t').append(seqs); | |
241 sb.append('\t').append(bases); | |
242 sb.append('\t').append(gSize); | |
243 sb.append('\t').append(sketchLen); | |
244 | |
245 int primaryHits=0; | |
246 int primaryUnique=0; | |
247 int primaryNoHit=0; | |
248 | |
249 float WKID=0; | |
250 float KID=0; | |
251 float ANI=0; | |
252 float Complt=0; | |
253 float Contam=0; | |
254 int TaxID=0; | |
255 String TaxName="."; | |
256 int topContamID=0; | |
257 String topContamName="."; | |
258 | |
259 SketchResultsLine first=list.size()>0 ? list.get(0) : null; | |
260 SketchResultsLine second=list.size()>1 ? list.get(1) : null; | |
261 for(int i=2; tree!=null && i<list.size() && failsLevelFilter(first.taxID, second.taxID); i++){ | |
262 second=list.get(i); | |
263 } | |
264 if(second!=null && failsLevelFilter(first.taxID, second.taxID)){second=list.get(1);} | |
265 | |
266 if(second!=null && uniqueHitsForSecond){ | |
267 for(int i=1; i<list.size(); i++){ | |
268 | |
269 SketchResultsLine line=list.get(i); | |
270 if(!failsLevelFilter(first.taxID, line.taxID) && line.unique>second.unique && line.unique>=minUniqueHits){ | |
271 second=line; | |
272 } | |
273 } | |
274 } | |
275 | |
276 if(first!=null){ | |
277 primaryHits=first.matches; | |
278 primaryUnique=first.unique; | |
279 primaryNoHit=first.noHit; | |
280 | |
281 WKID=first.wkid; | |
282 KID=first.kid; | |
283 ANI=first.ani; | |
284 Complt=first.complt; | |
285 Contam=first.contam; | |
286 TaxID=first.taxID; | |
287 TaxName=first.name; | |
288 } | |
289 if(second!=null){ | |
290 topContamID=second.taxID; | |
291 topContamName=second.name; | |
292 } | |
293 | |
294 sb.append('\t').append(primaryHits); | |
295 sb.append('\t').append(primaryUnique); | |
296 sb.append('\t').append(primaryNoHit); | |
297 | |
298 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", WKID)); | |
299 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", KID)); | |
300 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", ANI)); | |
301 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Complt)); | |
302 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Contam)); | |
303 sb.append('\t').append(TaxID); | |
304 sb.append('\t').append(TaxName); | |
305 sb.append('\t').append(topContamID); | |
306 sb.append('\t').append(topContamName); | |
307 | |
308 sb.append('\n'); | |
309 | |
310 return sb.toString(); | |
311 } | |
312 | |
313 private boolean failsLevelFilter(int a, int b) { | |
314 if(a<1 || b<1 || tree==null){return false;} | |
315 int c=tree.commonAncestor(a, b); | |
316 TaxNode tn=tree.getNode(c); | |
317 while(!tn.cellularOrganisms() && tn.levelExtended==TaxTree.NO_RANK_E){tn=tree.getNode(tn.pid);} | |
318 | |
319 return tn.levelExtended<=taxLevel; | |
320 } | |
321 | |
322 String query; | |
323 String fname; | |
324 int seqs; | |
325 long bases; | |
326 long gSize; | |
327 int sketchLen; | |
328 int taxID; | |
329 long img; | |
330 | |
331 ArrayList<SketchResultsLine> list=new ArrayList<SketchResultsLine>(); | |
332 | |
333 } | |
334 | |
335 private class SketchResultsLine{ | |
336 | |
337 SketchResultsLine(String line){ | |
338 //Handle colors | |
339 if(line.startsWith(Colors.esc)){ | |
340 int first=line.indexOf('m'); | |
341 int last=line.lastIndexOf(Colors.esc); | |
342 line=line.substring(first+1, last); | |
343 } | |
344 String[] split=line.replaceAll("%", "").split("\t"); | |
345 wkid=Float.parseFloat(split[0]); | |
346 kid=Float.parseFloat(split[1]); | |
347 ani=Float.parseFloat(split[2]); | |
348 complt=Float.parseFloat(split[3]); | |
349 contam=Float.parseFloat(split[4]); | |
350 | |
351 matches=Integer.parseInt(split[5]); | |
352 unique=Integer.parseInt(split[6]); | |
353 noHit=Integer.parseInt(split[7]); | |
354 taxID=Integer.parseInt(split[8]); | |
355 gSize=Integer.parseInt(split[9]); | |
356 gSeqs=Integer.parseInt(split[10]); | |
357 | |
358 name=split[11]; | |
359 if(name.equals(".") && split.length>11){ | |
360 name=split[12]; | |
361 } | |
362 } | |
363 | |
364 float wkid; | |
365 float kid; | |
366 float ani; | |
367 float complt; | |
368 float contam; | |
369 int matches; | |
370 int unique; | |
371 int noHit; | |
372 int taxID; | |
373 int gSize; | |
374 int gSeqs; | |
375 String name; | |
376 } | |
377 | |
378 final ArrayList<String> in; | |
379 final String out; | |
380 | |
381 TaxTree tree=null; | |
382 int taxLevel=TaxTree.GENUS_E; | |
383 boolean uniqueHitsForSecond=false; | |
384 int minUniqueHits=3; | |
385 boolean printHeader=true; | |
386 | |
387 /** Legacy code from SealStats */ | |
388 boolean ignoreSameTaxa=false; | |
389 boolean ignoreSameBarcode=false; | |
390 boolean ignoreSameLocation=false; | |
391 boolean totalDenominator=false; | |
392 boolean printTotal=true; | |
393 | |
394 PrintStream outstream=System.err; | |
395 | |
396 } |