Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SummarizeSketchStats.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package sketch; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Locale; import fileIO.TextFile; import fileIO.TextStreamWriter; import shared.Colors; import shared.Parse; import shared.Parser; import shared.PreParser; import shared.Shared; import shared.Tools; import tax.TaxNode; import tax.TaxTree; /** * @author Brian Bushnell * @date June 28, 2017 * */ public class SummarizeSketchStats { /** * Code entrance from the command line. * @param args Command line arguments */ public static void main(String[] args){ //Create a new SummarizeSketchStats instance SummarizeSketchStats x=new SummarizeSketchStats(args); ///And run it x.summarize(); //Close the print stream if it was redirected Shared.closeStream(x.outstream); } public SummarizeSketchStats(String[] args){ {//Preparse block for help, config files, and outstream PreParser pp=new PreParser(args, getClass(), false); args=pp.args; outstream=pp.outstream; } Parser parser=new Parser(); ArrayList<String> names=new ArrayList<String>(); String taxTreeFile=null; /* Parse arguments */ for(int i=0; i<args.length; i++){ final String arg=args[i]; String[] split=arg.split("="); String a=split[0].toLowerCase(); String b=split.length>1 ? split[1] : null; if(a.equals("printtotal") || a.equals("pt")){ printTotal=Parse.parseBoolean(b); }else if(a.equals("ignoresametaxa")){ ignoreSameTaxa=Parse.parseBoolean(b); }else if(a.equals("ignoresamebarcode") || a.equals("ignoresameindex")){ ignoreSameBarcode=Parse.parseBoolean(b); }else if(a.equals("ignoresamelocation") || a.equals("ignoresameloc")){ ignoreSameLocation=Parse.parseBoolean(b); }else if(a.equals("usetotal") || a.equals("totaldenominator") || a.equals("totald") || a.equals("td")){ totalDenominator=Parse.parseBoolean(b); } else if(a.equals("taxtree") || a.equals("tree")){ taxTreeFile=b; }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){ taxLevel=TaxTree.parseLevel(b); if(taxLevel>=0){ taxLevel=TaxTree.levelToExtended(taxLevel); } }else if(a.equalsIgnoreCase("unique") || a.equalsIgnoreCase("uniquehits")){ uniqueHitsForSecond=Parse.parseBoolean(b); }else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("printheader")){ printHeader=Parse.parseBoolean(b); } else if(parser.parse(arg, a, b)){ //do nothing }else if(!arg.contains("=")){ String[] x=(new File(arg).exists() ? new String[] {arg} : arg.split(",")); for(String x2 : x){names.add(x2);} }else{ throw new RuntimeException("Unknown parameter "+arg); } } if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} {//Process parser fields out=(parser.out1==null ? "stdout" : parser.out1); if(parser.in1!=null){ String[] x=(new File(parser.in1).exists() ? new String[] {parser.in1} : parser.in1.split(",")); for(String x2 : x){names.add(x2);} } } in=new ArrayList<String>(); for(String s : names){ Tools.getFileOrFiles(s, in, false, false, false, true); } if(taxTreeFile!=null){setTaxtree(taxTreeFile);} } void setTaxtree(String taxTreeFile){ if(taxTreeFile==null){ return; } tree=TaxTree.loadTaxTree(taxTreeFile, outstream, false, false); } public void summarize(){ ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>(); for(String fname : in){ ArrayList<SketchResultsSummary> ssl=summarize(fname); list.addAll(ssl); } TextStreamWriter tsw=new TextStreamWriter(out, true, false, false); tsw.start(); if(printHeader){tsw.print(header());} // if(printTotal){ // tsw.println(total.toString()); // } for(SketchResultsSummary ss : list){ tsw.print(ss.toString()); } tsw.poisonAndWait(); } // Query: Troseus_1X_k55.fa Seqs: 121 Bases: 2410606 gSize: 2368581 SketchLen: 8923 // WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName // 99.89% 50.73% 100.00% 50.77% 0.02% 5683 5683 5 0 4719674 1 . Troseus private ArrayList<SketchResultsSummary> summarize(String fname){ TextFile tf=new TextFile(fname); ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>(); SketchResultsSummary current=null; final String format="WKID KID ANI Complt Contam Matches Unique noHit TaxID gSize gSeqs taxName"; for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ if(line.startsWith("Query:")){ if(current!=null){list.add(current);} current=new SketchResultsSummary(line); }else if(line.startsWith("WKID")){ assert(line.equals(format)) : "Format should be:\n"+format; }else if(line.length()>0){ assert(current!=null) : "No Query Header for line "+line; current.add(line); } } if(current!=null){list.add(current);} tf.close(); return list; } public static String header(){ StringBuilder sb=new StringBuilder(); sb.append("#query"); sb.append('\t').append("seqs"); sb.append('\t').append("bases"); sb.append('\t').append("gSize"); sb.append('\t').append("sketchLen"); sb.append('\t').append("primaryHits"); sb.append('\t').append("primaryUnique"); sb.append('\t').append("primaryNoHit"); sb.append('\t').append("WKID"); sb.append('\t').append("KID"); sb.append('\t').append("ANI"); sb.append('\t').append("Complt"); sb.append('\t').append("Contam"); sb.append('\t').append("TaxID"); sb.append('\t').append("TaxName"); sb.append('\t').append("topContamID"); sb.append('\t').append("topContamName"); sb.append('\n'); return sb.toString(); } private class SketchResultsSummary { SketchResultsSummary(String line){ parseHeader(line); } void parseHeader(String line){ String[] split=line.split("\t"); for(String s : split){ String[] split2=s.trim().split(": "); assert(split2.length==2) : "\n"+line+"\n"+s+"\n"+Arrays.toString(split2)+"\n"; String a=split2[0], b=split2[1]; // outstream.println(a+", "+b); if(a.equals("Query")){ query=b; }else if(a.equals("Seqs")){ seqs=Integer.parseInt(b); }else if(a.equals("Bases")){ bases=Long.parseLong(b); }else if(a.equals("gSize")){ gSize=Long.parseLong(b); }else if(a.equals("SketchLen")){ sketchLen=Integer.parseInt(b); }else if(a.equals("TaxID")){ taxID=Integer.parseInt(b); }else if(a.equals("IMG")){ img=Long.parseLong(b); }else if(a.equals("File")){ sketchLen=Integer.parseInt(b); } } } public void add(String line) { SketchResultsLine srl=new SketchResultsLine(line); list.add(srl); } @Override public String toString(){ StringBuilder sb=new StringBuilder(); sb.append(query); sb.append('\t').append(seqs); sb.append('\t').append(bases); sb.append('\t').append(gSize); sb.append('\t').append(sketchLen); int primaryHits=0; int primaryUnique=0; int primaryNoHit=0; float WKID=0; float KID=0; float ANI=0; float Complt=0; float Contam=0; int TaxID=0; String TaxName="."; int topContamID=0; String topContamName="."; SketchResultsLine first=list.size()>0 ? list.get(0) : null; SketchResultsLine second=list.size()>1 ? list.get(1) : null; for(int i=2; tree!=null && i<list.size() && failsLevelFilter(first.taxID, second.taxID); i++){ second=list.get(i); } if(second!=null && failsLevelFilter(first.taxID, second.taxID)){second=list.get(1);} if(second!=null && uniqueHitsForSecond){ for(int i=1; i<list.size(); i++){ SketchResultsLine line=list.get(i); if(!failsLevelFilter(first.taxID, line.taxID) && line.unique>second.unique && line.unique>=minUniqueHits){ second=line; } } } if(first!=null){ primaryHits=first.matches; primaryUnique=first.unique; primaryNoHit=first.noHit; WKID=first.wkid; KID=first.kid; ANI=first.ani; Complt=first.complt; Contam=first.contam; TaxID=first.taxID; TaxName=first.name; } if(second!=null){ topContamID=second.taxID; topContamName=second.name; } sb.append('\t').append(primaryHits); sb.append('\t').append(primaryUnique); sb.append('\t').append(primaryNoHit); sb.append('\t').append(String.format(Locale.ROOT, "%.2f", WKID)); sb.append('\t').append(String.format(Locale.ROOT, "%.2f", KID)); sb.append('\t').append(String.format(Locale.ROOT, "%.2f", ANI)); sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Complt)); sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Contam)); sb.append('\t').append(TaxID); sb.append('\t').append(TaxName); sb.append('\t').append(topContamID); sb.append('\t').append(topContamName); sb.append('\n'); return sb.toString(); } private boolean failsLevelFilter(int a, int b) { if(a<1 || b<1 || tree==null){return false;} int c=tree.commonAncestor(a, b); TaxNode tn=tree.getNode(c); while(!tn.cellularOrganisms() && tn.levelExtended==TaxTree.NO_RANK_E){tn=tree.getNode(tn.pid);} return tn.levelExtended<=taxLevel; } String query; String fname; int seqs; long bases; long gSize; int sketchLen; int taxID; long img; ArrayList<SketchResultsLine> list=new ArrayList<SketchResultsLine>(); } private class SketchResultsLine{ SketchResultsLine(String line){ //Handle colors if(line.startsWith(Colors.esc)){ int first=line.indexOf('m'); int last=line.lastIndexOf(Colors.esc); line=line.substring(first+1, last); } String[] split=line.replaceAll("%", "").split("\t"); wkid=Float.parseFloat(split[0]); kid=Float.parseFloat(split[1]); ani=Float.parseFloat(split[2]); complt=Float.parseFloat(split[3]); contam=Float.parseFloat(split[4]); matches=Integer.parseInt(split[5]); unique=Integer.parseInt(split[6]); noHit=Integer.parseInt(split[7]); taxID=Integer.parseInt(split[8]); gSize=Integer.parseInt(split[9]); gSeqs=Integer.parseInt(split[10]); name=split[11]; if(name.equals(".") && split.length>11){ name=split[12]; } } float wkid; float kid; float ani; float complt; float contam; int matches; int unique; int noHit; int taxID; int gSize; int gSeqs; String name; } final ArrayList<String> in; final String out; TaxTree tree=null; int taxLevel=TaxTree.GENUS_E; boolean uniqueHitsForSecond=false; int minUniqueHits=3; boolean printHeader=true; /** Legacy code from SealStats */ boolean ignoreSameTaxa=false; boolean ignoreSameBarcode=false; boolean ignoreSameLocation=false; boolean totalDenominator=false; boolean printTotal=true; PrintStream outstream=System.err; }