jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6 import java.util.Arrays;
|
jpayne@68
|
7 import java.util.Locale;
|
jpayne@68
|
8
|
jpayne@68
|
9 import fileIO.TextFile;
|
jpayne@68
|
10 import fileIO.TextStreamWriter;
|
jpayne@68
|
11 import shared.Colors;
|
jpayne@68
|
12 import shared.Parse;
|
jpayne@68
|
13 import shared.Parser;
|
jpayne@68
|
14 import shared.PreParser;
|
jpayne@68
|
15 import shared.Shared;
|
jpayne@68
|
16 import shared.Tools;
|
jpayne@68
|
17 import tax.TaxNode;
|
jpayne@68
|
18 import tax.TaxTree;
|
jpayne@68
|
19
|
jpayne@68
|
20 /**
|
jpayne@68
|
21 * @author Brian Bushnell
|
jpayne@68
|
22 * @date June 28, 2017
|
jpayne@68
|
23 *
|
jpayne@68
|
24 */
|
jpayne@68
|
25 public class SummarizeSketchStats {
|
jpayne@68
|
26
|
jpayne@68
|
27 /**
|
jpayne@68
|
28 * Code entrance from the command line.
|
jpayne@68
|
29 * @param args Command line arguments
|
jpayne@68
|
30 */
|
jpayne@68
|
31 public static void main(String[] args){
|
jpayne@68
|
32 //Create a new SummarizeSketchStats instance
|
jpayne@68
|
33 SummarizeSketchStats x=new SummarizeSketchStats(args);
|
jpayne@68
|
34
|
jpayne@68
|
35 ///And run it
|
jpayne@68
|
36 x.summarize();
|
jpayne@68
|
37
|
jpayne@68
|
38 //Close the print stream if it was redirected
|
jpayne@68
|
39 Shared.closeStream(x.outstream);
|
jpayne@68
|
40 }
|
jpayne@68
|
41
|
jpayne@68
|
42 public SummarizeSketchStats(String[] args){
|
jpayne@68
|
43
|
jpayne@68
|
44 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
45 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
46 args=pp.args;
|
jpayne@68
|
47 outstream=pp.outstream;
|
jpayne@68
|
48 }
|
jpayne@68
|
49
|
jpayne@68
|
50 Parser parser=new Parser();
|
jpayne@68
|
51 ArrayList<String> names=new ArrayList<String>();
|
jpayne@68
|
52 String taxTreeFile=null;
|
jpayne@68
|
53
|
jpayne@68
|
54 /* Parse arguments */
|
jpayne@68
|
55 for(int i=0; i<args.length; i++){
|
jpayne@68
|
56
|
jpayne@68
|
57 final String arg=args[i];
|
jpayne@68
|
58 String[] split=arg.split("=");
|
jpayne@68
|
59 String a=split[0].toLowerCase();
|
jpayne@68
|
60 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
61
|
jpayne@68
|
62 if(a.equals("printtotal") || a.equals("pt")){
|
jpayne@68
|
63 printTotal=Parse.parseBoolean(b);
|
jpayne@68
|
64 }else if(a.equals("ignoresametaxa")){
|
jpayne@68
|
65 ignoreSameTaxa=Parse.parseBoolean(b);
|
jpayne@68
|
66 }else if(a.equals("ignoresamebarcode") || a.equals("ignoresameindex")){
|
jpayne@68
|
67 ignoreSameBarcode=Parse.parseBoolean(b);
|
jpayne@68
|
68 }else if(a.equals("ignoresamelocation") || a.equals("ignoresameloc")){
|
jpayne@68
|
69 ignoreSameLocation=Parse.parseBoolean(b);
|
jpayne@68
|
70 }else if(a.equals("usetotal") || a.equals("totaldenominator") || a.equals("totald") || a.equals("td")){
|
jpayne@68
|
71 totalDenominator=Parse.parseBoolean(b);
|
jpayne@68
|
72 }
|
jpayne@68
|
73
|
jpayne@68
|
74 else if(a.equals("taxtree") || a.equals("tree")){
|
jpayne@68
|
75 taxTreeFile=b;
|
jpayne@68
|
76 }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){
|
jpayne@68
|
77 taxLevel=TaxTree.parseLevel(b);
|
jpayne@68
|
78 if(taxLevel>=0){
|
jpayne@68
|
79 taxLevel=TaxTree.levelToExtended(taxLevel);
|
jpayne@68
|
80 }
|
jpayne@68
|
81 }else if(a.equalsIgnoreCase("unique") || a.equalsIgnoreCase("uniquehits")){
|
jpayne@68
|
82 uniqueHitsForSecond=Parse.parseBoolean(b);
|
jpayne@68
|
83 }else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("printheader")){
|
jpayne@68
|
84 printHeader=Parse.parseBoolean(b);
|
jpayne@68
|
85 }
|
jpayne@68
|
86
|
jpayne@68
|
87 else if(parser.parse(arg, a, b)){
|
jpayne@68
|
88 //do nothing
|
jpayne@68
|
89 }else if(!arg.contains("=")){
|
jpayne@68
|
90 String[] x=(new File(arg).exists() ? new String[] {arg} : arg.split(","));
|
jpayne@68
|
91 for(String x2 : x){names.add(x2);}
|
jpayne@68
|
92 }else{
|
jpayne@68
|
93 throw new RuntimeException("Unknown parameter "+arg);
|
jpayne@68
|
94 }
|
jpayne@68
|
95 }
|
jpayne@68
|
96 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
|
jpayne@68
|
97
|
jpayne@68
|
98 {//Process parser fields
|
jpayne@68
|
99 out=(parser.out1==null ? "stdout" : parser.out1);
|
jpayne@68
|
100 if(parser.in1!=null){
|
jpayne@68
|
101 String[] x=(new File(parser.in1).exists() ? new String[] {parser.in1} : parser.in1.split(","));
|
jpayne@68
|
102 for(String x2 : x){names.add(x2);}
|
jpayne@68
|
103 }
|
jpayne@68
|
104 }
|
jpayne@68
|
105
|
jpayne@68
|
106 in=new ArrayList<String>();
|
jpayne@68
|
107 for(String s : names){
|
jpayne@68
|
108 Tools.getFileOrFiles(s, in, false, false, false, true);
|
jpayne@68
|
109 }
|
jpayne@68
|
110
|
jpayne@68
|
111 if(taxTreeFile!=null){setTaxtree(taxTreeFile);}
|
jpayne@68
|
112 }
|
jpayne@68
|
113
|
jpayne@68
|
114 void setTaxtree(String taxTreeFile){
|
jpayne@68
|
115 if(taxTreeFile==null){
|
jpayne@68
|
116 return;
|
jpayne@68
|
117 }
|
jpayne@68
|
118 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, false, false);
|
jpayne@68
|
119 }
|
jpayne@68
|
120
|
jpayne@68
|
121 public void summarize(){
|
jpayne@68
|
122 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
|
jpayne@68
|
123 for(String fname : in){
|
jpayne@68
|
124 ArrayList<SketchResultsSummary> ssl=summarize(fname);
|
jpayne@68
|
125 list.addAll(ssl);
|
jpayne@68
|
126 }
|
jpayne@68
|
127
|
jpayne@68
|
128 TextStreamWriter tsw=new TextStreamWriter(out, true, false, false);
|
jpayne@68
|
129 tsw.start();
|
jpayne@68
|
130 if(printHeader){tsw.print(header());}
|
jpayne@68
|
131 // if(printTotal){
|
jpayne@68
|
132 // tsw.println(total.toString());
|
jpayne@68
|
133 // }
|
jpayne@68
|
134 for(SketchResultsSummary ss : list){
|
jpayne@68
|
135 tsw.print(ss.toString());
|
jpayne@68
|
136 }
|
jpayne@68
|
137 tsw.poisonAndWait();
|
jpayne@68
|
138 }
|
jpayne@68
|
139
|
jpayne@68
|
140 // Query: Troseus_1X_k55.fa Seqs: 121 Bases: 2410606 gSize: 2368581 SketchLen: 8923
|
jpayne@68
|
141 // WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName
|
jpayne@68
|
142 // 99.89% 50.73% 100.00% 50.77% 0.02% 5683 5683 5 0 4719674 1 . Troseus
|
jpayne@68
|
143
|
jpayne@68
|
144 private ArrayList<SketchResultsSummary> summarize(String fname){
|
jpayne@68
|
145 TextFile tf=new TextFile(fname);
|
jpayne@68
|
146 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
|
jpayne@68
|
147 SketchResultsSummary current=null;
|
jpayne@68
|
148
|
jpayne@68
|
149 final String format="WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName";
|
jpayne@68
|
150 for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){
|
jpayne@68
|
151 if(line.startsWith("Query:")){
|
jpayne@68
|
152 if(current!=null){list.add(current);}
|
jpayne@68
|
153 current=new SketchResultsSummary(line);
|
jpayne@68
|
154 }else if(line.startsWith("WKID")){
|
jpayne@68
|
155 assert(line.equals(format)) :
|
jpayne@68
|
156 "Format should be:\n"+format;
|
jpayne@68
|
157 }else if(line.length()>0){
|
jpayne@68
|
158 assert(current!=null) : "No Query Header for line "+line;
|
jpayne@68
|
159 current.add(line);
|
jpayne@68
|
160 }
|
jpayne@68
|
161 }
|
jpayne@68
|
162 if(current!=null){list.add(current);}
|
jpayne@68
|
163 tf.close();
|
jpayne@68
|
164 return list;
|
jpayne@68
|
165 }
|
jpayne@68
|
166
|
jpayne@68
|
167 public static String header(){
|
jpayne@68
|
168 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
169
|
jpayne@68
|
170 sb.append("#query");
|
jpayne@68
|
171
|
jpayne@68
|
172 sb.append('\t').append("seqs");
|
jpayne@68
|
173 sb.append('\t').append("bases");
|
jpayne@68
|
174 sb.append('\t').append("gSize");
|
jpayne@68
|
175 sb.append('\t').append("sketchLen");
|
jpayne@68
|
176
|
jpayne@68
|
177 sb.append('\t').append("primaryHits");
|
jpayne@68
|
178 sb.append('\t').append("primaryUnique");
|
jpayne@68
|
179 sb.append('\t').append("primaryNoHit");
|
jpayne@68
|
180
|
jpayne@68
|
181 sb.append('\t').append("WKID");
|
jpayne@68
|
182 sb.append('\t').append("KID");
|
jpayne@68
|
183 sb.append('\t').append("ANI");
|
jpayne@68
|
184 sb.append('\t').append("Complt");
|
jpayne@68
|
185 sb.append('\t').append("Contam");
|
jpayne@68
|
186 sb.append('\t').append("TaxID");
|
jpayne@68
|
187 sb.append('\t').append("TaxName");
|
jpayne@68
|
188 sb.append('\t').append("topContamID");
|
jpayne@68
|
189 sb.append('\t').append("topContamName");
|
jpayne@68
|
190
|
jpayne@68
|
191 sb.append('\n');
|
jpayne@68
|
192
|
jpayne@68
|
193 return sb.toString();
|
jpayne@68
|
194 }
|
jpayne@68
|
195
|
jpayne@68
|
196 private class SketchResultsSummary {
|
jpayne@68
|
197
|
jpayne@68
|
198 SketchResultsSummary(String line){
|
jpayne@68
|
199 parseHeader(line);
|
jpayne@68
|
200 }
|
jpayne@68
|
201
|
jpayne@68
|
202 void parseHeader(String line){
|
jpayne@68
|
203 String[] split=line.split("\t");
|
jpayne@68
|
204 for(String s : split){
|
jpayne@68
|
205 String[] split2=s.trim().split(": ");
|
jpayne@68
|
206 assert(split2.length==2) : "\n"+line+"\n"+s+"\n"+Arrays.toString(split2)+"\n";
|
jpayne@68
|
207 String a=split2[0], b=split2[1];
|
jpayne@68
|
208 // outstream.println(a+", "+b);
|
jpayne@68
|
209 if(a.equals("Query")){
|
jpayne@68
|
210 query=b;
|
jpayne@68
|
211 }else if(a.equals("Seqs")){
|
jpayne@68
|
212 seqs=Integer.parseInt(b);
|
jpayne@68
|
213 }else if(a.equals("Bases")){
|
jpayne@68
|
214 bases=Long.parseLong(b);
|
jpayne@68
|
215 }else if(a.equals("gSize")){
|
jpayne@68
|
216 gSize=Long.parseLong(b);
|
jpayne@68
|
217 }else if(a.equals("SketchLen")){
|
jpayne@68
|
218 sketchLen=Integer.parseInt(b);
|
jpayne@68
|
219 }else if(a.equals("TaxID")){
|
jpayne@68
|
220 taxID=Integer.parseInt(b);
|
jpayne@68
|
221 }else if(a.equals("IMG")){
|
jpayne@68
|
222 img=Long.parseLong(b);
|
jpayne@68
|
223 }else if(a.equals("File")){
|
jpayne@68
|
224 sketchLen=Integer.parseInt(b);
|
jpayne@68
|
225 }
|
jpayne@68
|
226 }
|
jpayne@68
|
227 }
|
jpayne@68
|
228
|
jpayne@68
|
229 public void add(String line) {
|
jpayne@68
|
230 SketchResultsLine srl=new SketchResultsLine(line);
|
jpayne@68
|
231 list.add(srl);
|
jpayne@68
|
232 }
|
jpayne@68
|
233
|
jpayne@68
|
234 @Override
|
jpayne@68
|
235 public String toString(){
|
jpayne@68
|
236 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
237
|
jpayne@68
|
238 sb.append(query);
|
jpayne@68
|
239
|
jpayne@68
|
240 sb.append('\t').append(seqs);
|
jpayne@68
|
241 sb.append('\t').append(bases);
|
jpayne@68
|
242 sb.append('\t').append(gSize);
|
jpayne@68
|
243 sb.append('\t').append(sketchLen);
|
jpayne@68
|
244
|
jpayne@68
|
245 int primaryHits=0;
|
jpayne@68
|
246 int primaryUnique=0;
|
jpayne@68
|
247 int primaryNoHit=0;
|
jpayne@68
|
248
|
jpayne@68
|
249 float WKID=0;
|
jpayne@68
|
250 float KID=0;
|
jpayne@68
|
251 float ANI=0;
|
jpayne@68
|
252 float Complt=0;
|
jpayne@68
|
253 float Contam=0;
|
jpayne@68
|
254 int TaxID=0;
|
jpayne@68
|
255 String TaxName=".";
|
jpayne@68
|
256 int topContamID=0;
|
jpayne@68
|
257 String topContamName=".";
|
jpayne@68
|
258
|
jpayne@68
|
259 SketchResultsLine first=list.size()>0 ? list.get(0) : null;
|
jpayne@68
|
260 SketchResultsLine second=list.size()>1 ? list.get(1) : null;
|
jpayne@68
|
261 for(int i=2; tree!=null && i<list.size() && failsLevelFilter(first.taxID, second.taxID); i++){
|
jpayne@68
|
262 second=list.get(i);
|
jpayne@68
|
263 }
|
jpayne@68
|
264 if(second!=null && failsLevelFilter(first.taxID, second.taxID)){second=list.get(1);}
|
jpayne@68
|
265
|
jpayne@68
|
266 if(second!=null && uniqueHitsForSecond){
|
jpayne@68
|
267 for(int i=1; i<list.size(); i++){
|
jpayne@68
|
268
|
jpayne@68
|
269 SketchResultsLine line=list.get(i);
|
jpayne@68
|
270 if(!failsLevelFilter(first.taxID, line.taxID) && line.unique>second.unique && line.unique>=minUniqueHits){
|
jpayne@68
|
271 second=line;
|
jpayne@68
|
272 }
|
jpayne@68
|
273 }
|
jpayne@68
|
274 }
|
jpayne@68
|
275
|
jpayne@68
|
276 if(first!=null){
|
jpayne@68
|
277 primaryHits=first.matches;
|
jpayne@68
|
278 primaryUnique=first.unique;
|
jpayne@68
|
279 primaryNoHit=first.noHit;
|
jpayne@68
|
280
|
jpayne@68
|
281 WKID=first.wkid;
|
jpayne@68
|
282 KID=first.kid;
|
jpayne@68
|
283 ANI=first.ani;
|
jpayne@68
|
284 Complt=first.complt;
|
jpayne@68
|
285 Contam=first.contam;
|
jpayne@68
|
286 TaxID=first.taxID;
|
jpayne@68
|
287 TaxName=first.name;
|
jpayne@68
|
288 }
|
jpayne@68
|
289 if(second!=null){
|
jpayne@68
|
290 topContamID=second.taxID;
|
jpayne@68
|
291 topContamName=second.name;
|
jpayne@68
|
292 }
|
jpayne@68
|
293
|
jpayne@68
|
294 sb.append('\t').append(primaryHits);
|
jpayne@68
|
295 sb.append('\t').append(primaryUnique);
|
jpayne@68
|
296 sb.append('\t').append(primaryNoHit);
|
jpayne@68
|
297
|
jpayne@68
|
298 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", WKID));
|
jpayne@68
|
299 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", KID));
|
jpayne@68
|
300 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", ANI));
|
jpayne@68
|
301 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Complt));
|
jpayne@68
|
302 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Contam));
|
jpayne@68
|
303 sb.append('\t').append(TaxID);
|
jpayne@68
|
304 sb.append('\t').append(TaxName);
|
jpayne@68
|
305 sb.append('\t').append(topContamID);
|
jpayne@68
|
306 sb.append('\t').append(topContamName);
|
jpayne@68
|
307
|
jpayne@68
|
308 sb.append('\n');
|
jpayne@68
|
309
|
jpayne@68
|
310 return sb.toString();
|
jpayne@68
|
311 }
|
jpayne@68
|
312
|
jpayne@68
|
313 private boolean failsLevelFilter(int a, int b) {
|
jpayne@68
|
314 if(a<1 || b<1 || tree==null){return false;}
|
jpayne@68
|
315 int c=tree.commonAncestor(a, b);
|
jpayne@68
|
316 TaxNode tn=tree.getNode(c);
|
jpayne@68
|
317 while(!tn.cellularOrganisms() && tn.levelExtended==TaxTree.NO_RANK_E){tn=tree.getNode(tn.pid);}
|
jpayne@68
|
318
|
jpayne@68
|
319 return tn.levelExtended<=taxLevel;
|
jpayne@68
|
320 }
|
jpayne@68
|
321
|
jpayne@68
|
322 String query;
|
jpayne@68
|
323 String fname;
|
jpayne@68
|
324 int seqs;
|
jpayne@68
|
325 long bases;
|
jpayne@68
|
326 long gSize;
|
jpayne@68
|
327 int sketchLen;
|
jpayne@68
|
328 int taxID;
|
jpayne@68
|
329 long img;
|
jpayne@68
|
330
|
jpayne@68
|
331 ArrayList<SketchResultsLine> list=new ArrayList<SketchResultsLine>();
|
jpayne@68
|
332
|
jpayne@68
|
333 }
|
jpayne@68
|
334
|
jpayne@68
|
335 private class SketchResultsLine{
|
jpayne@68
|
336
|
jpayne@68
|
337 SketchResultsLine(String line){
|
jpayne@68
|
338 //Handle colors
|
jpayne@68
|
339 if(line.startsWith(Colors.esc)){
|
jpayne@68
|
340 int first=line.indexOf('m');
|
jpayne@68
|
341 int last=line.lastIndexOf(Colors.esc);
|
jpayne@68
|
342 line=line.substring(first+1, last);
|
jpayne@68
|
343 }
|
jpayne@68
|
344 String[] split=line.replaceAll("%", "").split("\t");
|
jpayne@68
|
345 wkid=Float.parseFloat(split[0]);
|
jpayne@68
|
346 kid=Float.parseFloat(split[1]);
|
jpayne@68
|
347 ani=Float.parseFloat(split[2]);
|
jpayne@68
|
348 complt=Float.parseFloat(split[3]);
|
jpayne@68
|
349 contam=Float.parseFloat(split[4]);
|
jpayne@68
|
350
|
jpayne@68
|
351 matches=Integer.parseInt(split[5]);
|
jpayne@68
|
352 unique=Integer.parseInt(split[6]);
|
jpayne@68
|
353 noHit=Integer.parseInt(split[7]);
|
jpayne@68
|
354 taxID=Integer.parseInt(split[8]);
|
jpayne@68
|
355 gSize=Integer.parseInt(split[9]);
|
jpayne@68
|
356 gSeqs=Integer.parseInt(split[10]);
|
jpayne@68
|
357
|
jpayne@68
|
358 name=split[11];
|
jpayne@68
|
359 if(name.equals(".") && split.length>11){
|
jpayne@68
|
360 name=split[12];
|
jpayne@68
|
361 }
|
jpayne@68
|
362 }
|
jpayne@68
|
363
|
jpayne@68
|
364 float wkid;
|
jpayne@68
|
365 float kid;
|
jpayne@68
|
366 float ani;
|
jpayne@68
|
367 float complt;
|
jpayne@68
|
368 float contam;
|
jpayne@68
|
369 int matches;
|
jpayne@68
|
370 int unique;
|
jpayne@68
|
371 int noHit;
|
jpayne@68
|
372 int taxID;
|
jpayne@68
|
373 int gSize;
|
jpayne@68
|
374 int gSeqs;
|
jpayne@68
|
375 String name;
|
jpayne@68
|
376 }
|
jpayne@68
|
377
|
jpayne@68
|
378 final ArrayList<String> in;
|
jpayne@68
|
379 final String out;
|
jpayne@68
|
380
|
jpayne@68
|
381 TaxTree tree=null;
|
jpayne@68
|
382 int taxLevel=TaxTree.GENUS_E;
|
jpayne@68
|
383 boolean uniqueHitsForSecond=false;
|
jpayne@68
|
384 int minUniqueHits=3;
|
jpayne@68
|
385 boolean printHeader=true;
|
jpayne@68
|
386
|
jpayne@68
|
387 /** Legacy code from SealStats */
|
jpayne@68
|
388 boolean ignoreSameTaxa=false;
|
jpayne@68
|
389 boolean ignoreSameBarcode=false;
|
jpayne@68
|
390 boolean ignoreSameLocation=false;
|
jpayne@68
|
391 boolean totalDenominator=false;
|
jpayne@68
|
392 boolean printTotal=true;
|
jpayne@68
|
393
|
jpayne@68
|
394 PrintStream outstream=System.err;
|
jpayne@68
|
395
|
jpayne@68
|
396 }
|