diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,537 @@
+package sketch;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import fileIO.ByteFile;
+import fileIO.ByteStreamWriter;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import structures.ByteBuilder;
+import structures.FloatList;
+import tax.TaxTree;
+import template.ThreadWaiter;
+
+/**
+ * @author Brian Bushnell
+ * @date May 9, 2016
+ *
+ */
+public class AnalyzeSketchResults {
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Initialization        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/**
+	 * Code entrance from the command line.
+	 * @param args Command line arguments
+	 */
+	public static void main(String[] args){
+		//Start a timer immediately upon code entrance.
+		Timer t=new Timer();
+		
+		//Create an instance of this class
+		AnalyzeSketchResults x=new AnalyzeSketchResults(args);
+		
+		//Run the object
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	/**
+	 * Constructor.
+	 * @param args Command line arguments
+	 */
+	public AnalyzeSketchResults(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, /*getClass()*/null, false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		//Set shared static variables prior to parsing
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		
+		{//Parse the arguments
+			final Parser parser=parse(args);
+			overwrite=parser.overwrite;
+			append=parser.append;
+			
+			in1=parser.in1;
+			in2=parser.in2;
+
+			out1=parser.out1;
+		}
+		
+		fixExtensions(); //Add or remove .gz or .bz2 as needed
+		checkFileExistence(); //Ensure files can be read and written
+		checkStatics(); //Adjust file-related static fields as needed for this program
+
+		ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, overwrite, append, false);
+		ffoutMap=FileFormat.testOutput(outMap, FileFormat.TXT, null, true, overwrite, append, false);
+		ffoutAccuracy=FileFormat.testOutput(outAccuracy, FileFormat.TXT, null, true, overwrite, append, false);
+		ffoutBad=FileFormat.testOutput(outBad, FileFormat.TXT, null, true, overwrite, append, false);
+		ffin1=FileFormat.testInput(in1, FileFormat.TXT, null, true, true);
+		ffin2=FileFormat.testInput(in2, FileFormat.TXT, null, true, true);
+		
+		recordSets=(ffoutAccuracy==null && ffoutBad==null ? null : new ArrayList<RecordSet>());
+		tree=(treeFile==null) ? null : TaxTree.loadTaxTree(treeFile, outstream, true, false);
+		SSUMap.load(outstream);
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------    Initialization Helpers    ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Parse arguments from the command line */
+	private Parser parse(String[] args){
+		
+		Parser parser=new Parser();
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+			if(b!=null && b.equalsIgnoreCase("null")){b=null;}
+
+			if(a.equals("map") || a.equals("outmap")){
+				outMap=b;
+			}else if(a.equals("accuracy") || a.equals("outacc") || a.equals("outaccuracy")){
+				outAccuracy=b;
+			}else if(a.equals("outbad")){
+				outBad=b;
+			}else if(a.equals("tree")){
+				treeFile=b;
+			}else if(a.equals("shrinkonly")){
+				shrinkOnly=Parse.parseBoolean(b);
+			}else if(a.equals("ssu") || a.equals("ssufile")){
+				assert(false) : "ssu and ssufile are deprecated; please specify 16S or 18S independently";
+			}else if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){
+				SSUMap.r16SFile=b;
+			}else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){
+				SSUMap.r18SFile=b;
+			}else if(a.equals("mash")){
+				mode=MASH_MODE;
+			}else if(a.equals("sourmash")){
+				mode=SOURMASH_MODE;
+			}else if(a.equals("blast")){
+				mode=BLAST_MODE;
+			}else if(a.equals("bbsketch")){
+				mode=BBSKETCH_MODE;
+			}else if(a.equals("lines")){
+				maxLines=Long.parseLong(b);
+				if(maxLines<0){maxLines=Long.MAX_VALUE;}
+			}else if(a.equals("minsamples")){
+				minSamples=Integer.parseInt(b);
+			}else if(a.equals("verbose")){
+				RecordSet.verbose=Record.verbose=verbose=Parse.parseBoolean(b);
+			}else if(parser.parse(arg, a, b)){
+				//do nothing
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+				//				throw new RuntimeException("Unknown parameter "+args[i]);
+			}
+		}
+		
+		return parser;
+	}
+	
+	/** Add or remove .gz or .bz2 as needed */
+	private void fixExtensions(){
+		in1=Tools.fixExtension(in1);
+		in2=Tools.fixExtension(in2);
+		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
+	}
+	
+	/** Ensure files can be read and written */
+	private void checkFileExistence(){
+		//Ensure output files can be written
+		if(!Tools.testOutputFiles(overwrite, append, false, out1, outMap)){
+			outstream.println((out1==null)+", "+out1);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
+		}
+		
+		//Ensure input files can be read
+		if(!Tools.testInputFiles(false, true, in1, in2)){
+			throw new RuntimeException("\nCan't read some input files.\n");  
+		}
+		
+		//Ensure that no file was specified multiple times
+		if(!Tools.testForDuplicateFiles(true, in1, in2, out1, outMap)){
+			throw new RuntimeException("\nSome file names were specified multiple times.\n");
+		}
+	}
+	
+	/** Adjust file-related static fields as needed for this program */
+	private static void checkStatics(){
+		//Adjust the number of threads for input file reading
+		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
+			ByteFile.FORCE_MODE_BF2=true;
+		}
+		
+//		if(!ByteFile.FORCE_MODE_BF2){
+//			ByteFile.FORCE_MODE_BF2=false;
+//			ByteFile.FORCE_MODE_BF1=true;
+//		}
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Outer Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	void process(Timer t){
+		if(verbose){System.err.println("process()");}
+
+		ByteFile bf1=ByteFile.makeByteFile(ffin1);
+		ByteFile bf2=(ffin2==null ? null : ByteFile.makeByteFile(ffin2));
+
+		if(shrinkOnly){
+			runShrinkOnly(bf1);
+			return;
+		}
+		
+		bswBad=makeBSW(ffoutBad);
+		ResultLineParser parser=new ResultLineParser(mode, tree, bswBad, recordSets, false);
+		
+		processInner(bf1, parser, ffin2==null ? null : aniMap);
+		ByteStreamWriter bsw=makeBSW(ffout1);
+		printResults(parser, bsw);
+		if(bsw!=null){errorState|=bsw.poisonAndWait();}
+
+		ByteStreamWriter bswAcc=makeBSW(ffoutAccuracy);
+		if(recordSets!=null){
+			if(SSUMap.hasMap()){
+				processSetsThreaded();
+			}
+			printAccuracy(bswAcc);
+		}
+		if(bswAcc!=null){errorState|=bswAcc.poisonAndWait();}
+		if(bswBad!=null){errorState|=bswBad.poisonAndWait();}
+
+		ByteStreamWriter bswMap=makeBSW(ffoutMap);
+		if(bf2!=null){
+			processInner(bf2, parser, aaiMap);
+			printMap(bswMap);
+		}
+		if(bswMap!=null){errorState|=bswMap.poisonAndWait();}
+		
+		t.stop();
+		
+		outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
+		
+		outstream.println();
+		outstream.println("Lines Out:         \t"+linesOut);
+		outstream.println("Bytes Out:         \t"+bytesOut);
+		
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Inner Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	private void runShrinkOnly(ByteFile bf){
+		if(verbose){System.err.println("runShrinkOnly("+bf.name()+")");}
+		ResultLineParser parser=new ResultLineParser(mode, tree, null, null, true);
+		ByteStreamWriter bsw=makeBSW(ffout1);
+
+		byte[] line=bf.nextLine();
+
+		while(line!=null){
+			if(line.length>0){
+				if(maxLines>0 && linesProcessed>=maxLines){break;}
+				linesProcessed++;
+				bytesProcessed+=(line.length+1);
+				
+				parser.parse(line);
+				RecordSet rs=parser.processData(null, true);
+				if(rs!=null){
+					rs.sortAndSweep();
+					for(Record r : rs.records){
+						if(bsw!=null){
+							bsw.println(r.text);
+						}
+					}
+				}
+			}
+			line=bf.nextLine();
+		}
+		errorState|=bf.close();
+		
+		RecordSet rs=parser.currentSet;
+		if(rs!=null){
+			rs.sortAndSweep();
+			for(Record r : rs.records){
+				if(bsw!=null){
+					bsw.println(r.text);
+				}
+			}
+		}
+		
+		if(bsw!=null){errorState|=bsw.poisonAndWait();}
+	}
+	
+	private void processInner(ByteFile bf, ResultLineParser parser, HashMap<Long, Float> map){
+		if(verbose){System.err.println("processInner("+bf.name()+")");}
+		byte[] line=bf.nextLine();
+		
+		while(line!=null){
+			if(line.length>0){
+				if(maxLines>0 && linesProcessed>=maxLines){break;}
+				linesProcessed++;
+				bytesProcessed+=(line.length+1);
+				
+				parser.parse(line);
+				parser.processData(map, recordSets!=null);
+			}
+			line=bf.nextLine();
+		}
+		errorState|=bf.close();
+	}
+	
+	private void processSetsThreaded(){
+		if(verbose){System.err.println("processSetsThreaded("+Shared.threads()+")");}
+		final int threads=Shared.threads();
+//		final ThreadWaiter waiter=new ThreadWaiter();
+		final ArrayList<SSUThread> list=new ArrayList<SSUThread>(threads);
+		final AtomicInteger atom=new AtomicInteger(0);
+		for(int i=0; i<threads; i++){
+			list.add(new SSUThread(atom));
+		}
+		boolean success=ThreadWaiter.startAndWait(list);
+		if(!success){errorState=true;}
+	}
+	
+	private void printAccuracy(ByteStreamWriter bswAcc){
+		if(verbose){System.err.println("printAccuracy("+bswAcc+")");}
+		int[][] results=new int[taxLevels][16];
+		for(RecordSet rs : recordSets){
+			if(mode==MASH_MODE){
+				rs.sortAndSweep();
+				rs.processSSU();
+			}
+			int[] statusArray=rs.test(bswBad);
+			for(int level=0; level<statusArray.length; level++){
+				int status=statusArray[level];
+				results[level][status]++;
+				assert(status==NOHIT || status==CORRECT || 
+						status==INCORRECT_TAX_CORRECT_SSU || status==INCORRECT_TAX_INCORRECT_SSU || 
+						status==INCORRECT_TAX_MISSING_SSU) : status;
+			}
+		}
+		ByteBuilder bb=new ByteBuilder();
+		bb.append("#Level          \tCorrect\tbadTaxGoodSSU\tBadTaxNoSSU\tbadTaxBadSSU\tNoHit\n");
+		if(bswAcc!=null){bswAcc.print(bb);}
+		for(String levelName : printLevels){
+			int level=TaxTree.stringToLevelExtended(levelName);
+			bb.clear();
+			bb.append(TaxTree.levelToStringExtended(level));
+			while(bb.length<"species subgroup".length()){bb.append(' ');}
+			bb.tab();
+			bb.append(results[level][CORRECT]).tab();
+			bb.append(results[level][INCORRECT_TAX_CORRECT_SSU]).tab();
+			bb.append(results[level][INCORRECT_TAX_MISSING_SSU]).tab();
+			bb.append(results[level][INCORRECT_TAX_INCORRECT_SSU]).tab();
+			bb.append(results[level][NOHIT]).nl();
+			if(bswAcc!=null){bswAcc.print(bb);}
+		}
+	}
+	
+	private void printMap(ByteStreamWriter bsw){
+		if(verbose){System.err.println("printMap("+bsw+")");}
+		ByteBuilder bb=new ByteBuilder();
+		bb.append("#qID\trID\tANI\tAAI\n");
+		if(bsw!=null){bsw.print(bb);}
+		for(Entry<Long, Float> e : aniMap.entrySet()){
+			long key=e.getKey();
+			int qID=((int)(key>>>32));
+			int rID=(int)(key&Integer.MAX_VALUE);
+			float ani=e.getValue();
+			Float aai=aaiMap.get(key);
+			if(aai!=null){
+				bb.clear();
+				bb.append(qID).tab().append(rID).tab().append(ani, 3).tab().append(aai, 3).nl();
+				if(bsw!=null){bsw.print(bb);}
+				linesOut++;
+				bytesOut+=bb.length();
+			}
+		}
+	}
+	
+	private void printResults(ResultLineParser parser, ByteStreamWriter bsw){
+		if(verbose){System.err.println("printResults("+bsw+")");}
+		ByteBuilder bb=new ByteBuilder();
+		bb.append("#Level    \tRank\tANI_AVG\tSSU_AVG\tANI_STD\tSSU_STD\tSamples");
+		if(bsw!=null){bsw.println(bb);}
+		for(int level=0; level<taxLevels; level++){
+			long aniCount=parser.levelCounts[level];
+			long ssuCount=parser.levelCountsSSU[level];
+			if(aniCount>=minSamples && ((1L<<level)&printLevelsMask)!=0){
+				bb.clear();
+				String name=TaxTree.levelToStringExtended(level);
+				double aniSum=parser.levelAniSums[level];
+				double ssuSum=parser.levelSSUSums[level];
+				FloatList aniList=parser.aniLists[level];
+				FloatList ssuList=parser.ssuLists[level];
+				aniList.sort();
+				ssuList.sort();
+
+				double aniAvg=aniSum/aniCount;
+				double ssuAvg=ssuSum/ssuCount;
+				double aniStd=aniList.stdev();
+				double ssuStd=ssuList.stdev();
+
+				bb.append(name);
+				while(bb.length<9){bb.append(' ');}
+				bb.tab().append(level);
+				bb.tab().append(aniAvg, 3);
+				bb.tab().append(ssuAvg, 3);
+				bb.tab().append(aniStd, 3);
+				bb.tab().append(ssuStd, 3);
+				bb.tab().append(aniCount);
+				if(bsw!=null){bsw.println(bb);}
+				linesOut++;
+				bytesOut+=bb.length();
+			}
+		}
+	}
+	
+	private static ByteStreamWriter makeBSW(FileFormat ff){
+		if(verbose){System.err.println("makeBSW("+ff+")");}
+		if(ff==null){return null;}
+		ByteStreamWriter bsw=new ByteStreamWriter(ff);
+		bsw.start();
+		return bsw;
+	}
+	
+	private static long makePrintLevelsMask(String[] printLevelsArray){
+		long mask=0;
+		for(String s : printLevelsArray){
+			int level=TaxTree.stringToLevelExtended(s);
+			long bit=1L<<level;
+			assert(Long.bitCount(bit)==1);
+			mask|=bit;
+		}
+		return mask;
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	private class SSUThread extends Thread{
+		
+		SSUThread(AtomicInteger atom_){
+			atom=atom_;
+		}
+		
+		@Override
+		public void run(){
+			for(int idx=atom.getAndIncrement(); idx<recordSets.size(); idx=atom.getAndIncrement()){
+				RecordSet rs=recordSets.get(idx);
+				rs.sortAndSweep();
+				rs.processSSU();
+			}
+		}
+		
+		private final AtomicInteger atom;
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	
+	
+	/*--------------------------------------------------------------*/
+	/*----------------            Fields            ----------------*/
+	/*--------------------------------------------------------------*/
+
+	private String in1=null;
+	private String in2=null;
+	private String out1="stdout.txt";
+	private String outMap=null;
+	private String outAccuracy=null;
+	private String outBad=null;
+	private String treeFile=null;
+	
+	private ByteStreamWriter bswBad=null;
+	
+	/*--------------------------------------------------------------*/
+	
+	private long linesProcessed=0;
+	private long linesOut=0;
+	private long bytesProcessed=0;
+	private long bytesOut=0;
+	
+	private boolean shrinkOnly=false;
+	
+	int minSamples=1;
+	private long maxLines=Long.MAX_VALUE;
+	
+	final static int taxLevels=TaxTree.numTaxLevelNamesExtended;
+
+	final HashMap<Long, Float> aniMap=new HashMap<Long, Float>();
+	final HashMap<Long, Float> aaiMap=new HashMap<Long, Float>();
+	
+	final TaxTree tree;
+	final ArrayList<RecordSet> recordSets;
+	
+//	static HashMap<Integer, byte[]> ssuMap;
+	
+	static final String[] printLevels=new String[] {"strain", "species", "genus", "family", "order", "class", "phylum", "superkingdom", "life"};
+	static final long printLevelsMask=makePrintLevelsMask(printLevels);
+	
+	/*--------------------------------------------------------------*/
+
+	static int NOHIT=0;
+	static int CORRECT=1;
+	static int INCORRECT_TAX=2;
+	static int INCORRECT_SSU=4;
+	static int MISSING_SSU=8;
+	private static int INCORRECT_TAX_CORRECT_SSU=INCORRECT_TAX;
+	private static int INCORRECT_TAX_INCORRECT_SSU=INCORRECT_TAX|INCORRECT_SSU;
+	private static int INCORRECT_TAX_MISSING_SSU=INCORRECT_TAX|MISSING_SSU;
+	
+	static final int BBSKETCH_MODE=0;
+	static final int MASH_MODE=1;
+	static final int SOURMASH_MODE=2;
+	static final int BLAST_MODE=3;
+	static int mode=BBSKETCH_MODE;
+	
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Final Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+
+	private final FileFormat ffin1;
+	private final FileFormat ffin2;
+	private final FileFormat ffout1;
+	private final FileFormat ffoutMap;
+	private final FileFormat ffoutAccuracy;
+	private final FileFormat ffoutBad;
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Common Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	private PrintStream outstream=System.err;
+	public static boolean verbose=false;
+	public boolean errorState=false;
+	private boolean overwrite=false;
+	private boolean append=false;
+	
+}