diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SummarizeSketchStats.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SummarizeSketchStats.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,396 @@
+package sketch;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Locale;
+
+import fileIO.TextFile;
+import fileIO.TextStreamWriter;
+import shared.Colors;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.Shared;
+import shared.Tools;
+import tax.TaxNode;
+import tax.TaxTree;
+
+/**
+ * @author Brian Bushnell
+ * @date June 28, 2017
+ *
+ */
+public class SummarizeSketchStats {
+	
+	/**
+	 * Code entrance from the command line.
+	 * @param args Command line arguments
+	 */
+	public static void main(String[] args){
+		//Create a new SummarizeSketchStats instance
+		SummarizeSketchStats x=new SummarizeSketchStats(args);
+		
+		///And run it
+		x.summarize();
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	public SummarizeSketchStats(String[] args){
+
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		Parser parser=new Parser();
+		ArrayList<String> names=new ArrayList<String>();
+		String taxTreeFile=null;
+		
+		/* Parse arguments */
+		for(int i=0; i<args.length; i++){
+
+			final String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+			
+			if(a.equals("printtotal") || a.equals("pt")){
+				printTotal=Parse.parseBoolean(b);
+			}else if(a.equals("ignoresametaxa")){
+				ignoreSameTaxa=Parse.parseBoolean(b);
+			}else if(a.equals("ignoresamebarcode") || a.equals("ignoresameindex")){
+				ignoreSameBarcode=Parse.parseBoolean(b);
+			}else if(a.equals("ignoresamelocation") || a.equals("ignoresameloc")){
+				ignoreSameLocation=Parse.parseBoolean(b);
+			}else if(a.equals("usetotal") || a.equals("totaldenominator") || a.equals("totald") || a.equals("td")){
+				totalDenominator=Parse.parseBoolean(b);
+			}
+			
+			else if(a.equals("taxtree") || a.equals("tree")){
+				taxTreeFile=b;
+			}else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){
+				taxLevel=TaxTree.parseLevel(b);
+				if(taxLevel>=0){
+					taxLevel=TaxTree.levelToExtended(taxLevel);
+				}
+			}else if(a.equalsIgnoreCase("unique") || a.equalsIgnoreCase("uniquehits")){
+				uniqueHitsForSecond=Parse.parseBoolean(b);
+			}else if(a.equalsIgnoreCase("header") || a.equalsIgnoreCase("printheader")){
+				printHeader=Parse.parseBoolean(b);
+			}
+			
+			else if(parser.parse(arg, a, b)){
+				//do nothing
+			}else if(!arg.contains("=")){
+				String[] x=(new File(arg).exists() ? new String[] {arg} : arg.split(","));
+				for(String x2 : x){names.add(x2);}
+			}else{
+				throw new RuntimeException("Unknown parameter "+arg);
+			}
+		}
+		if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
+		
+		{//Process parser fields
+			out=(parser.out1==null ? "stdout" : parser.out1);
+			if(parser.in1!=null){
+				String[] x=(new File(parser.in1).exists() ? new String[] {parser.in1} : parser.in1.split(","));
+				for(String x2 : x){names.add(x2);}
+			}
+		}
+
+		in=new ArrayList<String>();
+		for(String s : names){
+			Tools.getFileOrFiles(s, in, false, false, false, true);
+		}
+		
+		if(taxTreeFile!=null){setTaxtree(taxTreeFile);}
+	}
+	
+	void setTaxtree(String taxTreeFile){
+		if(taxTreeFile==null){
+			return;
+		}
+		tree=TaxTree.loadTaxTree(taxTreeFile, outstream, false, false);
+	}
+	
+	public void summarize(){
+		ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
+		for(String fname : in){
+			ArrayList<SketchResultsSummary> ssl=summarize(fname);
+			list.addAll(ssl);
+		}
+		
+		TextStreamWriter tsw=new TextStreamWriter(out, true, false, false);
+		tsw.start();
+		if(printHeader){tsw.print(header());}
+//		if(printTotal){
+//			tsw.println(total.toString());
+//		}
+		for(SketchResultsSummary ss : list){
+			tsw.print(ss.toString());
+		}
+		tsw.poisonAndWait();
+	}
+	
+//	Query: Troseus_1X_k55.fa	Seqs: 121 	Bases: 2410606	gSize: 2368581	SketchLen: 8923
+//	WKID	KID	ANI	Complt	Contam	Matches	Unique	noHit	TaxID	gSize	gSeqs	taxName
+//	99.89%	50.73%	100.00%	50.77%	0.02%	5683	5683	5	0	4719674	1	.	Troseus
+	
+	private ArrayList<SketchResultsSummary> summarize(String fname){
+		TextFile tf=new TextFile(fname);
+		ArrayList<SketchResultsSummary> list=new ArrayList<SketchResultsSummary>();
+		SketchResultsSummary current=null;
+		
+		final String format="WKID	KID	ANI	Complt	Contam	Matches	Unique	noHit	TaxID	gSize	gSeqs	taxName";
+		for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){
+			if(line.startsWith("Query:")){
+				if(current!=null){list.add(current);}
+				current=new SketchResultsSummary(line);
+			}else if(line.startsWith("WKID")){
+				assert(line.equals(format)) :
+					"Format should be:\n"+format;
+			}else if(line.length()>0){
+				assert(current!=null) : "No Query Header for line "+line;
+				current.add(line);
+			}
+		}
+		if(current!=null){list.add(current);}
+		tf.close();
+		return list;
+	}
+	
+	public static String header(){
+		StringBuilder sb=new StringBuilder();
+		
+		sb.append("#query");
+
+		sb.append('\t').append("seqs");
+		sb.append('\t').append("bases");
+		sb.append('\t').append("gSize");
+		sb.append('\t').append("sketchLen");
+		
+		sb.append('\t').append("primaryHits");
+		sb.append('\t').append("primaryUnique");
+		sb.append('\t').append("primaryNoHit");
+
+		sb.append('\t').append("WKID");
+		sb.append('\t').append("KID");
+		sb.append('\t').append("ANI");
+		sb.append('\t').append("Complt");
+		sb.append('\t').append("Contam");
+		sb.append('\t').append("TaxID");
+		sb.append('\t').append("TaxName");
+		sb.append('\t').append("topContamID");
+		sb.append('\t').append("topContamName");
+		
+		sb.append('\n');
+		
+		return sb.toString();
+	}
+	
+	private class SketchResultsSummary {
+		
+		SketchResultsSummary(String line){
+			parseHeader(line);
+		}
+
+		void parseHeader(String line){
+			String[] split=line.split("\t");
+			for(String s : split){
+				String[] split2=s.trim().split(": ");
+				assert(split2.length==2) : "\n"+line+"\n"+s+"\n"+Arrays.toString(split2)+"\n";
+				String a=split2[0], b=split2[1];
+//				outstream.println(a+", "+b);
+				if(a.equals("Query")){
+					query=b;
+				}else if(a.equals("Seqs")){
+					seqs=Integer.parseInt(b);
+				}else if(a.equals("Bases")){
+					bases=Long.parseLong(b);
+				}else if(a.equals("gSize")){
+					gSize=Long.parseLong(b);
+				}else if(a.equals("SketchLen")){
+					sketchLen=Integer.parseInt(b);
+				}else if(a.equals("TaxID")){
+					taxID=Integer.parseInt(b);
+				}else if(a.equals("IMG")){
+					img=Long.parseLong(b);
+				}else if(a.equals("File")){
+					sketchLen=Integer.parseInt(b);
+				}
+			}
+		}
+		
+		public void add(String line) {
+			SketchResultsLine srl=new SketchResultsLine(line);
+			list.add(srl);
+		}
+		
+		@Override
+		public String toString(){
+			StringBuilder sb=new StringBuilder();
+			
+			sb.append(query);
+
+			sb.append('\t').append(seqs);
+			sb.append('\t').append(bases);
+			sb.append('\t').append(gSize);
+			sb.append('\t').append(sketchLen);
+			
+			int primaryHits=0;
+			int primaryUnique=0;
+			int primaryNoHit=0;
+
+			float WKID=0;
+			float KID=0;
+			float ANI=0;
+			float Complt=0;
+			float Contam=0;
+			int TaxID=0;
+			String TaxName=".";
+			int topContamID=0;
+			String topContamName=".";
+			
+			SketchResultsLine first=list.size()>0 ? list.get(0) : null;
+			SketchResultsLine second=list.size()>1 ? list.get(1) : null;
+			for(int i=2; tree!=null && i<list.size() && failsLevelFilter(first.taxID, second.taxID); i++){
+				second=list.get(i);
+			}
+			if(second!=null && failsLevelFilter(first.taxID, second.taxID)){second=list.get(1);}
+			
+			if(second!=null && uniqueHitsForSecond){
+				for(int i=1; i<list.size(); i++){
+					
+					SketchResultsLine line=list.get(i);
+					if(!failsLevelFilter(first.taxID, line.taxID) && line.unique>second.unique && line.unique>=minUniqueHits){
+						second=line;
+					}
+				}
+			}
+			
+			if(first!=null){
+				primaryHits=first.matches;
+				primaryUnique=first.unique;
+				primaryNoHit=first.noHit;
+
+				WKID=first.wkid;
+				KID=first.kid;
+				ANI=first.ani;
+				Complt=first.complt;
+				Contam=first.contam;
+				TaxID=first.taxID;
+				TaxName=first.name;
+			}
+			if(second!=null){
+				topContamID=second.taxID;
+				topContamName=second.name;
+			}
+			
+			sb.append('\t').append(primaryHits);
+			sb.append('\t').append(primaryUnique);
+			sb.append('\t').append(primaryNoHit);
+
+			sb.append('\t').append(String.format(Locale.ROOT, "%.2f", WKID));
+			sb.append('\t').append(String.format(Locale.ROOT, "%.2f", KID));
+			sb.append('\t').append(String.format(Locale.ROOT, "%.2f", ANI));
+			sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Complt));
+			sb.append('\t').append(String.format(Locale.ROOT, "%.2f", Contam));
+			sb.append('\t').append(TaxID);
+			sb.append('\t').append(TaxName);
+			sb.append('\t').append(topContamID);
+			sb.append('\t').append(topContamName);
+			
+			sb.append('\n');
+			
+			return sb.toString();
+		}
+		
+		private boolean failsLevelFilter(int a, int b) {
+			if(a<1 || b<1 || tree==null){return false;}
+			int c=tree.commonAncestor(a, b);
+			TaxNode tn=tree.getNode(c);
+			while(!tn.cellularOrganisms() && tn.levelExtended==TaxTree.NO_RANK_E){tn=tree.getNode(tn.pid);}
+			
+			return tn.levelExtended<=taxLevel;
+		}
+
+		String query;
+		String fname;
+		int seqs;
+		long bases;
+		long gSize;
+		int sketchLen;
+		int taxID;
+		long img;
+		
+		ArrayList<SketchResultsLine> list=new ArrayList<SketchResultsLine>();
+		
+	}
+	
+	private class SketchResultsLine{
+		
+		SketchResultsLine(String line){
+			//Handle colors
+			if(line.startsWith(Colors.esc)){
+				int first=line.indexOf('m');
+				int last=line.lastIndexOf(Colors.esc);
+				line=line.substring(first+1, last);
+			}
+			String[] split=line.replaceAll("%", "").split("\t");
+			wkid=Float.parseFloat(split[0]);
+			kid=Float.parseFloat(split[1]);
+			ani=Float.parseFloat(split[2]);
+			complt=Float.parseFloat(split[3]);
+			contam=Float.parseFloat(split[4]);
+			
+			matches=Integer.parseInt(split[5]);
+			unique=Integer.parseInt(split[6]);
+			noHit=Integer.parseInt(split[7]);
+			taxID=Integer.parseInt(split[8]);
+			gSize=Integer.parseInt(split[9]);
+			gSeqs=Integer.parseInt(split[10]);
+			
+			name=split[11];
+			if(name.equals(".") && split.length>11){
+				name=split[12];
+			}
+		}
+		
+		float wkid;
+		float kid;
+		float ani;
+		float complt;
+		float contam;
+		int matches;
+		int unique;
+		int noHit;
+		int taxID;
+		int gSize;
+		int gSeqs;
+		String name;
+	}
+	
+	final ArrayList<String> in;
+	final String out;
+	
+	TaxTree tree=null;
+	int taxLevel=TaxTree.GENUS_E;
+	boolean uniqueHitsForSecond=false;
+	int minUniqueHits=3;
+	boolean printHeader=true;
+	
+	/** Legacy code from SealStats */
+	boolean ignoreSameTaxa=false;
+	boolean ignoreSameBarcode=false;
+	boolean ignoreSameLocation=false;
+	boolean totalDenominator=false;
+	boolean printTotal=true;
+	
+	PrintStream outstream=System.err;
+	
+}