annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SummarizeSketchStats.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6 import java.util.Arrays;
jpayne@68 7 import java.util.Locale;
jpayne@68 8
jpayne@68 9 import fileIO.TextFile;
jpayne@68 10 import fileIO.TextStreamWriter;
jpayne@68 11 import shared.Colors;
jpayne@68 12 import shared.Parse;
jpayne@68 13 import shared.Parser;
jpayne@68 14 import shared.PreParser;
jpayne@68 15 import shared.Shared;
jpayne@68 16 import shared.Tools;
jpayne@68 17 import tax.TaxNode;
jpayne@68 18 import tax.TaxTree;
jpayne@68 19
jpayne@68 20 /**
jpayne@68 21 * @author Brian Bushnell
jpayne@68 22 * @date June 28, 2017
jpayne@68 23 *
jpayne@68 24 */
jpayne@68 25 public class SummarizeSketchStats {
jpayne@68 26
jpayne@68 27 /**
jpayne@68 28 * Code entrance from the command line.
jpayne@68 29 * @param args Command line arguments
jpayne@68 30 */
jpayne@68 31 public static void main(String[] args){
jpayne@68 32 //Create a new SummarizeSketchStats instance
jpayne@68 33 SummarizeSketchStats x=new SummarizeSketchStats(args);
jpayne@68 34
jpayne@68 35 ///And run it
jpayne@68 36 x.summarize();
jpayne@68 37
jpayne@68 38 //Close the print stream if it was redirected
jpayne@68 39 Shared.closeStream(x.outstream);
jpayne@68 40 }
jpayne@68 41
jpayne@68 42 public SummarizeSketchStats(String[] args){
jpayne@68 43
jpayne@68 44 {//Preparse block for help, config files, and outstream
jpayne@68 45 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 46 args=pp.args;
jpayne@68 47 outstream=pp.outstream;
jpayne@68 48 }
jpayne@68 49
jpayne@68 50 Parser parser=new Parser();
jpayne@68 51 ArrayList<String> names=new ArrayList<String>();
jpayne@68 52 String taxTreeFile=null;
jpayne@68 53
jpayne@68 54 /* Parse arguments */
jpayne@68 55 for(int i=0; i<args.length; i++){
jpayne@68 56
jpayne@68 57 final String arg=args[i];
jpayne@68 58 String[] split=arg.split("=");
jpayne@68 59 String a=split[0].toLowerCase();
jpayne@68 60 String b=split.length>1 ? split[1] : null;
jpayne@68 61
jpayne@68 62 if(a.equals("printtotal") || a.equals("pt")){
jpayne@68 63 printTotal=Parse.parseBoolean(b);
jpayne@68 64 }else if(a.equals("ignoresametaxa")){
jpayne@68 65 ignoreSameTaxa=Parse.parseBoolean(b);
jpayne@68 66 }else if(a.equals("ignoresamebarcode") || a.equals("ignoresameindex")){
jpayne@68 67 ignoreSameBarcode=Parse.parseBoolean(b);
jpayne@68 68 }else if(a.equals("ignoresamelocation") || a.equals("ignoresameloc")){
jpayne@68 69 ignoreSameLocation=Parse.parseBoolean(b);
jpayne@68 70 }else if(a.equals("usetotal") || a.equals("totaldenominator") || a.equals("totald") || a.equals("td")){
jpayne@68 71 totalDenominator=Parse.parseBoolean(b);
jpayne@68 72 }
jpayne@68 73
jpayne@68 74 else if(a.equals("taxtree") || a.equals("tree")){
jpayne@68 75 taxTreeFile=b;
jpayne@68 76 }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){
jpayne@68 77 taxLevel=TaxTree.parseLevel(b);
jpayne@68 78 if(taxLevel>=0){
jpayne@68 79 taxLevel=TaxTree.levelToExtended(taxLevel);
jpayne@68 80 }
jpayne@68 81 }else if(a.equalsIgnoreCase("unique") || a.equalsIgnoreCase("uniquehits")){
jpayne@68 82 uniqueHitsForSecond=Parse.parseBoolean(b);
jpayne@68 83 }else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("printheader")){
jpayne@68 84 printHeader=Parse.parseBoolean(b);
jpayne@68 85 }
jpayne@68 86
jpayne@68 87 else if(parser.parse(arg, a, b)){
jpayne@68 88 //do nothing
jpayne@68 89 }else if(!arg.contains("=")){
jpayne@68 90 String[] x=(new File(arg).exists() ? new String[] {arg} : arg.split(","));
jpayne@68 91 for(String x2 : x){names.add(x2);}
jpayne@68 92 }else{
jpayne@68 93 throw new RuntimeException("Unknown parameter "+arg);
jpayne@68 94 }
jpayne@68 95 }
jpayne@68 96 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
jpayne@68 97
jpayne@68 98 {//Process parser fields
jpayne@68 99 out=(parser.out1==null ? "stdout" : parser.out1);
jpayne@68 100 if(parser.in1!=null){
jpayne@68 101 String[] x=(new File(parser.in1).exists() ? new String[] {parser.in1} : parser.in1.split(","));
jpayne@68 102 for(String x2 : x){names.add(x2);}
jpayne@68 103 }
jpayne@68 104 }
jpayne@68 105
jpayne@68 106 in=new ArrayList<String>();
jpayne@68 107 for(String s : names){
jpayne@68 108 Tools.getFileOrFiles(s, in, false, false, false, true);
jpayne@68 109 }
jpayne@68 110
jpayne@68 111 if(taxTreeFile!=null){setTaxtree(taxTreeFile);}
jpayne@68 112 }
jpayne@68 113
jpayne@68 114 void setTaxtree(String taxTreeFile){
jpayne@68 115 if(taxTreeFile==null){
jpayne@68 116 return;
jpayne@68 117 }
jpayne@68 118 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, false, false);
jpayne@68 119 }
jpayne@68 120
jpayne@68 121 public void summarize(){
jpayne@68 122 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
jpayne@68 123 for(String fname : in){
jpayne@68 124 ArrayList<SketchResultsSummary> ssl=summarize(fname);
jpayne@68 125 list.addAll(ssl);
jpayne@68 126 }
jpayne@68 127
jpayne@68 128 TextStreamWriter tsw=new TextStreamWriter(out, true, false, false);
jpayne@68 129 tsw.start();
jpayne@68 130 if(printHeader){tsw.print(header());}
jpayne@68 131 // if(printTotal){
jpayne@68 132 // tsw.println(total.toString());
jpayne@68 133 // }
jpayne@68 134 for(SketchResultsSummary ss : list){
jpayne@68 135 tsw.print(ss.toString());
jpayne@68 136 }
jpayne@68 137 tsw.poisonAndWait();
jpayne@68 138 }
jpayne@68 139
jpayne@68 140 // Query: Troseus_1X_k55.fa Seqs: 121 Bases: 2410606 gSize: 2368581 SketchLen: 8923
jpayne@68 141 // WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName
jpayne@68 142 // 99.89% 50.73% 100.00% 50.77% 0.02% 5683 5683 5 0 4719674 1 . Troseus
jpayne@68 143
jpayne@68 144 private ArrayList<SketchResultsSummary> summarize(String fname){
jpayne@68 145 TextFile tf=new TextFile(fname);
jpayne@68 146 ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
jpayne@68 147 SketchResultsSummary current=null;
jpayne@68 148
jpayne@68 149 final String format="WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName";
jpayne@68 150 for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){
jpayne@68 151 if(line.startsWith("Query:")){
jpayne@68 152 if(current!=null){list.add(current);}
jpayne@68 153 current=new SketchResultsSummary(line);
jpayne@68 154 }else if(line.startsWith("WKID")){
jpayne@68 155 assert(line.equals(format)) :
jpayne@68 156 "Format should be:\n"+format;
jpayne@68 157 }else if(line.length()>0){
jpayne@68 158 assert(current!=null) : "No Query Header for line "+line;
jpayne@68 159 current.add(line);
jpayne@68 160 }
jpayne@68 161 }
jpayne@68 162 if(current!=null){list.add(current);}
jpayne@68 163 tf.close();
jpayne@68 164 return list;
jpayne@68 165 }
jpayne@68 166
jpayne@68 167 public static String header(){
jpayne@68 168 StringBuilder sb=new StringBuilder();
jpayne@68 169
jpayne@68 170 sb.append("#query");
jpayne@68 171
jpayne@68 172 sb.append('\t').append("seqs");
jpayne@68 173 sb.append('\t').append("bases");
jpayne@68 174 sb.append('\t').append("gSize");
jpayne@68 175 sb.append('\t').append("sketchLen");
jpayne@68 176
jpayne@68 177 sb.append('\t').append("primaryHits");
jpayne@68 178 sb.append('\t').append("primaryUnique");
jpayne@68 179 sb.append('\t').append("primaryNoHit");
jpayne@68 180
jpayne@68 181 sb.append('\t').append("WKID");
jpayne@68 182 sb.append('\t').append("KID");
jpayne@68 183 sb.append('\t').append("ANI");
jpayne@68 184 sb.append('\t').append("Complt");
jpayne@68 185 sb.append('\t').append("Contam");
jpayne@68 186 sb.append('\t').append("TaxID");
jpayne@68 187 sb.append('\t').append("TaxName");
jpayne@68 188 sb.append('\t').append("topContamID");
jpayne@68 189 sb.append('\t').append("topContamName");
jpayne@68 190
jpayne@68 191 sb.append('\n');
jpayne@68 192
jpayne@68 193 return sb.toString();
jpayne@68 194 }
jpayne@68 195
jpayne@68 196 private class SketchResultsSummary {
jpayne@68 197
jpayne@68 198 SketchResultsSummary(String line){
jpayne@68 199 parseHeader(line);
jpayne@68 200 }
jpayne@68 201
jpayne@68 202 void parseHeader(String line){
jpayne@68 203 String[] split=line.split("\t");
jpayne@68 204 for(String s : split){
jpayne@68 205 String[] split2=s.trim().split(": ");
jpayne@68 206 assert(split2.length==2) : "\n"+line+"\n"+s+"\n"+Arrays.toString(split2)+"\n";
jpayne@68 207 String a=split2[0], b=split2[1];
jpayne@68 208 // outstream.println(a+", "+b);
jpayne@68 209 if(a.equals("Query")){
jpayne@68 210 query=b;
jpayne@68 211 }else if(a.equals("Seqs")){
jpayne@68 212 seqs=Integer.parseInt(b);
jpayne@68 213 }else if(a.equals("Bases")){
jpayne@68 214 bases=Long.parseLong(b);
jpayne@68 215 }else if(a.equals("gSize")){
jpayne@68 216 gSize=Long.parseLong(b);
jpayne@68 217 }else if(a.equals("SketchLen")){
jpayne@68 218 sketchLen=Integer.parseInt(b);
jpayne@68 219 }else if(a.equals("TaxID")){
jpayne@68 220 taxID=Integer.parseInt(b);
jpayne@68 221 }else if(a.equals("IMG")){
jpayne@68 222 img=Long.parseLong(b);
jpayne@68 223 }else if(a.equals("File")){
jpayne@68 224 sketchLen=Integer.parseInt(b);
jpayne@68 225 }
jpayne@68 226 }
jpayne@68 227 }
jpayne@68 228
jpayne@68 229 public void add(String line) {
jpayne@68 230 SketchResultsLine srl=new SketchResultsLine(line);
jpayne@68 231 list.add(srl);
jpayne@68 232 }
jpayne@68 233
jpayne@68 234 @Override
jpayne@68 235 public String toString(){
jpayne@68 236 StringBuilder sb=new StringBuilder();
jpayne@68 237
jpayne@68 238 sb.append(query);
jpayne@68 239
jpayne@68 240 sb.append('\t').append(seqs);
jpayne@68 241 sb.append('\t').append(bases);
jpayne@68 242 sb.append('\t').append(gSize);
jpayne@68 243 sb.append('\t').append(sketchLen);
jpayne@68 244
jpayne@68 245 int primaryHits=0;
jpayne@68 246 int primaryUnique=0;
jpayne@68 247 int primaryNoHit=0;
jpayne@68 248
jpayne@68 249 float WKID=0;
jpayne@68 250 float KID=0;
jpayne@68 251 float ANI=0;
jpayne@68 252 float Complt=0;
jpayne@68 253 float Contam=0;
jpayne@68 254 int TaxID=0;
jpayne@68 255 String TaxName=".";
jpayne@68 256 int topContamID=0;
jpayne@68 257 String topContamName=".";
jpayne@68 258
jpayne@68 259 SketchResultsLine first=list.size()>0 ? list.get(0) : null;
jpayne@68 260 SketchResultsLine second=list.size()>1 ? list.get(1) : null;
jpayne@68 261 for(int i=2; tree!=null && i<list.size() && failsLevelFilter(first.taxID, second.taxID); i++){
jpayne@68 262 second=list.get(i);
jpayne@68 263 }
jpayne@68 264 if(second!=null && failsLevelFilter(first.taxID, second.taxID)){second=list.get(1);}
jpayne@68 265
jpayne@68 266 if(second!=null && uniqueHitsForSecond){
jpayne@68 267 for(int i=1; i<list.size(); i++){
jpayne@68 268
jpayne@68 269 SketchResultsLine line=list.get(i);
jpayne@68 270 if(!failsLevelFilter(first.taxID, line.taxID) && line.unique>second.unique && line.unique>=minUniqueHits){
jpayne@68 271 second=line;
jpayne@68 272 }
jpayne@68 273 }
jpayne@68 274 }
jpayne@68 275
jpayne@68 276 if(first!=null){
jpayne@68 277 primaryHits=first.matches;
jpayne@68 278 primaryUnique=first.unique;
jpayne@68 279 primaryNoHit=first.noHit;
jpayne@68 280
jpayne@68 281 WKID=first.wkid;
jpayne@68 282 KID=first.kid;
jpayne@68 283 ANI=first.ani;
jpayne@68 284 Complt=first.complt;
jpayne@68 285 Contam=first.contam;
jpayne@68 286 TaxID=first.taxID;
jpayne@68 287 TaxName=first.name;
jpayne@68 288 }
jpayne@68 289 if(second!=null){
jpayne@68 290 topContamID=second.taxID;
jpayne@68 291 topContamName=second.name;
jpayne@68 292 }
jpayne@68 293
jpayne@68 294 sb.append('\t').append(primaryHits);
jpayne@68 295 sb.append('\t').append(primaryUnique);
jpayne@68 296 sb.append('\t').append(primaryNoHit);
jpayne@68 297
jpayne@68 298 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", WKID));
jpayne@68 299 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", KID));
jpayne@68 300 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", ANI));
jpayne@68 301 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Complt));
jpayne@68 302 sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Contam));
jpayne@68 303 sb.append('\t').append(TaxID);
jpayne@68 304 sb.append('\t').append(TaxName);
jpayne@68 305 sb.append('\t').append(topContamID);
jpayne@68 306 sb.append('\t').append(topContamName);
jpayne@68 307
jpayne@68 308 sb.append('\n');
jpayne@68 309
jpayne@68 310 return sb.toString();
jpayne@68 311 }
jpayne@68 312
jpayne@68 313 private boolean failsLevelFilter(int a, int b) {
jpayne@68 314 if(a<1 || b<1 || tree==null){return false;}
jpayne@68 315 int c=tree.commonAncestor(a, b);
jpayne@68 316 TaxNode tn=tree.getNode(c);
jpayne@68 317 while(!tn.cellularOrganisms() && tn.levelExtended==TaxTree.NO_RANK_E){tn=tree.getNode(tn.pid);}
jpayne@68 318
jpayne@68 319 return tn.levelExtended<=taxLevel;
jpayne@68 320 }
jpayne@68 321
jpayne@68 322 String query;
jpayne@68 323 String fname;
jpayne@68 324 int seqs;
jpayne@68 325 long bases;
jpayne@68 326 long gSize;
jpayne@68 327 int sketchLen;
jpayne@68 328 int taxID;
jpayne@68 329 long img;
jpayne@68 330
jpayne@68 331 ArrayList<SketchResultsLine> list=new ArrayList<SketchResultsLine>();
jpayne@68 332
jpayne@68 333 }
jpayne@68 334
jpayne@68 335 private class SketchResultsLine{
jpayne@68 336
jpayne@68 337 SketchResultsLine(String line){
jpayne@68 338 //Handle colors
jpayne@68 339 if(line.startsWith(Colors.esc)){
jpayne@68 340 int first=line.indexOf('m');
jpayne@68 341 int last=line.lastIndexOf(Colors.esc);
jpayne@68 342 line=line.substring(first+1, last);
jpayne@68 343 }
jpayne@68 344 String[] split=line.replaceAll("%", "").split("\t");
jpayne@68 345 wkid=Float.parseFloat(split[0]);
jpayne@68 346 kid=Float.parseFloat(split[1]);
jpayne@68 347 ani=Float.parseFloat(split[2]);
jpayne@68 348 complt=Float.parseFloat(split[3]);
jpayne@68 349 contam=Float.parseFloat(split[4]);
jpayne@68 350
jpayne@68 351 matches=Integer.parseInt(split[5]);
jpayne@68 352 unique=Integer.parseInt(split[6]);
jpayne@68 353 noHit=Integer.parseInt(split[7]);
jpayne@68 354 taxID=Integer.parseInt(split[8]);
jpayne@68 355 gSize=Integer.parseInt(split[9]);
jpayne@68 356 gSeqs=Integer.parseInt(split[10]);
jpayne@68 357
jpayne@68 358 name=split[11];
jpayne@68 359 if(name.equals(".") && split.length>11){
jpayne@68 360 name=split[12];
jpayne@68 361 }
jpayne@68 362 }
jpayne@68 363
jpayne@68 364 float wkid;
jpayne@68 365 float kid;
jpayne@68 366 float ani;
jpayne@68 367 float complt;
jpayne@68 368 float contam;
jpayne@68 369 int matches;
jpayne@68 370 int unique;
jpayne@68 371 int noHit;
jpayne@68 372 int taxID;
jpayne@68 373 int gSize;
jpayne@68 374 int gSeqs;
jpayne@68 375 String name;
jpayne@68 376 }
jpayne@68 377
jpayne@68 378 final ArrayList<String> in;
jpayne@68 379 final String out;
jpayne@68 380
jpayne@68 381 TaxTree tree=null;
jpayne@68 382 int taxLevel=TaxTree.GENUS_E;
jpayne@68 383 boolean uniqueHitsForSecond=false;
jpayne@68 384 int minUniqueHits=3;
jpayne@68 385 boolean printHeader=true;
jpayne@68 386
jpayne@68 387 /** Legacy code from SealStats */
jpayne@68 388 boolean ignoreSameTaxa=false;
jpayne@68 389 boolean ignoreSameBarcode=false;
jpayne@68 390 boolean ignoreSameLocation=false;
jpayne@68 391 boolean totalDenominator=false;
jpayne@68 392 boolean printTotal=true;
jpayne@68 393
jpayne@68 394 PrintStream outstream=System.err;
jpayne@68 395
jpayne@68 396 }