Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,537 @@ +package sketch; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicInteger; + +import fileIO.ByteFile; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import structures.ByteBuilder; +import structures.FloatList; +import tax.TaxTree; +import template.ThreadWaiter; + +/** + * @author Brian Bushnell + * @date May 9, 2016 + * + */ +public class AnalyzeSketchResults { + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + //Start a timer immediately upon code entrance. + Timer t=new Timer(); + + //Create an instance of this class + AnalyzeSketchResults x=new AnalyzeSketchResults(args); + + //Run the object + x.process(t); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + } + + /** + * Constructor. + * @param args Command line arguments + */ + public AnalyzeSketchResults(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, /*getClass()*/null, false); + args=pp.args; + outstream=pp.outstream; + } + + //Set shared static variables prior to parsing + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + + {//Parse the arguments + final Parser parser=parse(args); + overwrite=parser.overwrite; + append=parser.append; + + in1=parser.in1; + in2=parser.in2; + + out1=parser.out1; + } + + fixExtensions(); //Add or remove .gz or .bz2 as needed + checkFileExistence(); //Ensure files can be read and written + checkStatics(); //Adjust file-related static fields as needed for this program + + ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, overwrite, append, false); + ffoutMap=FileFormat.testOutput(outMap, FileFormat.TXT, null, true, overwrite, append, false); + ffoutAccuracy=FileFormat.testOutput(outAccuracy, FileFormat.TXT, null, true, overwrite, append, false); + ffoutBad=FileFormat.testOutput(outBad, FileFormat.TXT, null, true, overwrite, append, false); + ffin1=FileFormat.testInput(in1, FileFormat.TXT, null, true, true); + ffin2=FileFormat.testInput(in2, FileFormat.TXT, null, true, true); + + recordSets=(ffoutAccuracy==null && ffoutBad==null ? null : new ArrayList<RecordSet>()); + tree=(treeFile==null) ? null : TaxTree.loadTaxTree(treeFile, outstream, true, false); + SSUMap.load(outstream); + } + + /*--------------------------------------------------------------*/ + /*---------------- Initialization Helpers ----------------*/ + /*--------------------------------------------------------------*/ + + /** Parse arguments from the command line */ + private Parser parse(String[] args){ + + Parser parser=new Parser(); + for(int i=0; i<args.length; i++){ + String arg=args[i]; + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + if(b!=null && b.equalsIgnoreCase("null")){b=null;} + + if(a.equals("map") || a.equals("outmap")){ + outMap=b; + }else if(a.equals("accuracy") || a.equals("outacc") || a.equals("outaccuracy")){ + outAccuracy=b; + }else if(a.equals("outbad")){ + outBad=b; + }else if(a.equals("tree")){ + treeFile=b; + }else if(a.equals("shrinkonly")){ + shrinkOnly=Parse.parseBoolean(b); + }else if(a.equals("ssu") || a.equals("ssufile")){ + assert(false) : "ssu and ssufile are deprecated; please specify 16S or 18S independently"; + }else if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){ + SSUMap.r16SFile=b; + }else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){ + SSUMap.r18SFile=b; + }else if(a.equals("mash")){ + mode=MASH_MODE; + }else if(a.equals("sourmash")){ + mode=SOURMASH_MODE; + }else if(a.equals("blast")){ + mode=BLAST_MODE; + }else if(a.equals("bbsketch")){ + mode=BBSKETCH_MODE; + }else if(a.equals("lines")){ + maxLines=Long.parseLong(b); + if(maxLines<0){maxLines=Long.MAX_VALUE;} + }else if(a.equals("minsamples")){ + minSamples=Integer.parseInt(b); + }else if(a.equals("verbose")){ + RecordSet.verbose=Record.verbose=verbose=Parse.parseBoolean(b); + }else if(parser.parse(arg, a, b)){ + //do nothing + }else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + // throw new RuntimeException("Unknown parameter "+args[i]); + } + } + + return parser; + } + + /** Add or remove .gz or .bz2 as needed */ + private void fixExtensions(){ + in1=Tools.fixExtension(in1); + in2=Tools.fixExtension(in2); + if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} + } + + /** Ensure files can be read and written */ + private void checkFileExistence(){ + //Ensure output files can be written + if(!Tools.testOutputFiles(overwrite, append, false, out1, outMap)){ + outstream.println((out1==null)+", "+out1); + throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n"); + } + + //Ensure input files can be read + if(!Tools.testInputFiles(false, true, in1, in2)){ + throw new RuntimeException("\nCan't read some input files.\n"); + } + + //Ensure that no file was specified multiple times + if(!Tools.testForDuplicateFiles(true, in1, in2, out1, outMap)){ + throw new RuntimeException("\nSome file names were specified multiple times.\n"); + } + } + + /** Adjust file-related static fields as needed for this program */ + private static void checkStatics(){ + //Adjust the number of threads for input file reading + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ + ByteFile.FORCE_MODE_BF2=true; + } + +// if(!ByteFile.FORCE_MODE_BF2){ +// ByteFile.FORCE_MODE_BF2=false; +// ByteFile.FORCE_MODE_BF1=true; +// } + } + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + void process(Timer t){ + if(verbose){System.err.println("process()");} + + ByteFile bf1=ByteFile.makeByteFile(ffin1); + ByteFile bf2=(ffin2==null ? null : ByteFile.makeByteFile(ffin2)); + + if(shrinkOnly){ + runShrinkOnly(bf1); + return; + } + + bswBad=makeBSW(ffoutBad); + ResultLineParser parser=new ResultLineParser(mode, tree, bswBad, recordSets, false); + + processInner(bf1, parser, ffin2==null ? null : aniMap); + ByteStreamWriter bsw=makeBSW(ffout1); + printResults(parser, bsw); + if(bsw!=null){errorState|=bsw.poisonAndWait();} + + ByteStreamWriter bswAcc=makeBSW(ffoutAccuracy); + if(recordSets!=null){ + if(SSUMap.hasMap()){ + processSetsThreaded(); + } + printAccuracy(bswAcc); + } + if(bswAcc!=null){errorState|=bswAcc.poisonAndWait();} + if(bswBad!=null){errorState|=bswBad.poisonAndWait();} + + ByteStreamWriter bswMap=makeBSW(ffoutMap); + if(bf2!=null){ + processInner(bf2, parser, aaiMap); + printMap(bswMap); + } + if(bswMap!=null){errorState|=bswMap.poisonAndWait();} + + t.stop(); + + outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8)); + + outstream.println(); + outstream.println("Lines Out: \t"+linesOut); + outstream.println("Bytes Out: \t"+bytesOut); + + if(errorState){ + throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + private void runShrinkOnly(ByteFile bf){ + if(verbose){System.err.println("runShrinkOnly("+bf.name()+")");} + ResultLineParser parser=new ResultLineParser(mode, tree, null, null, true); + ByteStreamWriter bsw=makeBSW(ffout1); + + byte[] line=bf.nextLine(); + + while(line!=null){ + if(line.length>0){ + if(maxLines>0 && linesProcessed>=maxLines){break;} + linesProcessed++; + bytesProcessed+=(line.length+1); + + parser.parse(line); + RecordSet rs=parser.processData(null, true); + if(rs!=null){ + rs.sortAndSweep(); + for(Record r : rs.records){ + if(bsw!=null){ + bsw.println(r.text); + } + } + } + } + line=bf.nextLine(); + } + errorState|=bf.close(); + + RecordSet rs=parser.currentSet; + if(rs!=null){ + rs.sortAndSweep(); + for(Record r : rs.records){ + if(bsw!=null){ + bsw.println(r.text); + } + } + } + + if(bsw!=null){errorState|=bsw.poisonAndWait();} + } + + private void processInner(ByteFile bf, ResultLineParser parser, HashMap<Long, Float> map){ + if(verbose){System.err.println("processInner("+bf.name()+")");} + byte[] line=bf.nextLine(); + + while(line!=null){ + if(line.length>0){ + if(maxLines>0 && linesProcessed>=maxLines){break;} + linesProcessed++; + bytesProcessed+=(line.length+1); + + parser.parse(line); + parser.processData(map, recordSets!=null); + } + line=bf.nextLine(); + } + errorState|=bf.close(); + } + + private void processSetsThreaded(){ + if(verbose){System.err.println("processSetsThreaded("+Shared.threads()+")");} + final int threads=Shared.threads(); +// final ThreadWaiter waiter=new ThreadWaiter(); + final ArrayList<SSUThread> list=new ArrayList<SSUThread>(threads); + final AtomicInteger atom=new AtomicInteger(0); + for(int i=0; i<threads; i++){ + list.add(new SSUThread(atom)); + } + boolean success=ThreadWaiter.startAndWait(list); + if(!success){errorState=true;} + } + + private void printAccuracy(ByteStreamWriter bswAcc){ + if(verbose){System.err.println("printAccuracy("+bswAcc+")");} + int[][] results=new int[taxLevels][16]; + for(RecordSet rs : recordSets){ + if(mode==MASH_MODE){ + rs.sortAndSweep(); + rs.processSSU(); + } + int[] statusArray=rs.test(bswBad); + for(int level=0; level<statusArray.length; level++){ + int status=statusArray[level]; + results[level][status]++; + assert(status==NOHIT || status==CORRECT || + status==INCORRECT_TAX_CORRECT_SSU || status==INCORRECT_TAX_INCORRECT_SSU || + status==INCORRECT_TAX_MISSING_SSU) : status; + } + } + ByteBuilder bb=new ByteBuilder(); + bb.append("#Level \tCorrect\tbadTaxGoodSSU\tBadTaxNoSSU\tbadTaxBadSSU\tNoHit\n"); + if(bswAcc!=null){bswAcc.print(bb);} + for(String levelName : printLevels){ + int level=TaxTree.stringToLevelExtended(levelName); + bb.clear(); + bb.append(TaxTree.levelToStringExtended(level)); + while(bb.length<"species subgroup".length()){bb.append(' ');} + bb.tab(); + bb.append(results[level][CORRECT]).tab(); + bb.append(results[level][INCORRECT_TAX_CORRECT_SSU]).tab(); + bb.append(results[level][INCORRECT_TAX_MISSING_SSU]).tab(); + bb.append(results[level][INCORRECT_TAX_INCORRECT_SSU]).tab(); + bb.append(results[level][NOHIT]).nl(); + if(bswAcc!=null){bswAcc.print(bb);} + } + } + + private void printMap(ByteStreamWriter bsw){ + if(verbose){System.err.println("printMap("+bsw+")");} + ByteBuilder bb=new ByteBuilder(); + bb.append("#qID\trID\tANI\tAAI\n"); + if(bsw!=null){bsw.print(bb);} + for(Entry<Long, Float> e : aniMap.entrySet()){ + long key=e.getKey(); + int qID=((int)(key>>>32)); + int rID=(int)(key&Integer.MAX_VALUE); + float ani=e.getValue(); + Float aai=aaiMap.get(key); + if(aai!=null){ + bb.clear(); + bb.append(qID).tab().append(rID).tab().append(ani, 3).tab().append(aai, 3).nl(); + if(bsw!=null){bsw.print(bb);} + linesOut++; + bytesOut+=bb.length(); + } + } + } + + private void printResults(ResultLineParser parser, ByteStreamWriter bsw){ + if(verbose){System.err.println("printResults("+bsw+")");} + ByteBuilder bb=new ByteBuilder(); + bb.append("#Level \tRank\tANI_AVG\tSSU_AVG\tANI_STD\tSSU_STD\tSamples"); + if(bsw!=null){bsw.println(bb);} + for(int level=0; level<taxLevels; level++){ + long aniCount=parser.levelCounts[level]; + long ssuCount=parser.levelCountsSSU[level]; + if(aniCount>=minSamples && ((1L<<level)&printLevelsMask)!=0){ + bb.clear(); + String name=TaxTree.levelToStringExtended(level); + double aniSum=parser.levelAniSums[level]; + double ssuSum=parser.levelSSUSums[level]; + FloatList aniList=parser.aniLists[level]; + FloatList ssuList=parser.ssuLists[level]; + aniList.sort(); + ssuList.sort(); + + double aniAvg=aniSum/aniCount; + double ssuAvg=ssuSum/ssuCount; + double aniStd=aniList.stdev(); + double ssuStd=ssuList.stdev(); + + bb.append(name); + while(bb.length<9){bb.append(' ');} + bb.tab().append(level); + bb.tab().append(aniAvg, 3); + bb.tab().append(ssuAvg, 3); + bb.tab().append(aniStd, 3); + bb.tab().append(ssuStd, 3); + bb.tab().append(aniCount); + if(bsw!=null){bsw.println(bb);} + linesOut++; + bytesOut+=bb.length(); + } + } + } + + private static ByteStreamWriter makeBSW(FileFormat ff){ + if(verbose){System.err.println("makeBSW("+ff+")");} + if(ff==null){return null;} + ByteStreamWriter bsw=new ByteStreamWriter(ff); + bsw.start(); + return bsw; + } + + private static long makePrintLevelsMask(String[] printLevelsArray){ + long mask=0; + for(String s : printLevelsArray){ + int level=TaxTree.stringToLevelExtended(s); + long bit=1L<<level; + assert(Long.bitCount(bit)==1); + mask|=bit; + } + return mask; + } + + /*--------------------------------------------------------------*/ + + private class SSUThread extends Thread{ + + SSUThread(AtomicInteger atom_){ + atom=atom_; + } + + @Override + public void run(){ + for(int idx=atom.getAndIncrement(); idx<recordSets.size(); idx=atom.getAndIncrement()){ + RecordSet rs=recordSets.get(idx); + rs.sortAndSweep(); + rs.processSSU(); + } + } + + private final AtomicInteger atom; + } + + /*--------------------------------------------------------------*/ + + + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private String in1=null; + private String in2=null; + private String out1="stdout.txt"; + private String outMap=null; + private String outAccuracy=null; + private String outBad=null; + private String treeFile=null; + + private ByteStreamWriter bswBad=null; + + /*--------------------------------------------------------------*/ + + private long linesProcessed=0; + private long linesOut=0; + private long bytesProcessed=0; + private long bytesOut=0; + + private boolean shrinkOnly=false; + + int minSamples=1; + private long maxLines=Long.MAX_VALUE; + + final static int taxLevels=TaxTree.numTaxLevelNamesExtended; + + final HashMap<Long, Float> aniMap=new HashMap<Long, Float>(); + final HashMap<Long, Float> aaiMap=new HashMap<Long, Float>(); + + final TaxTree tree; + final ArrayList<RecordSet> recordSets; + +// static HashMap<Integer, byte[]> ssuMap; + + static final String[] printLevels=new String[] {"strain", "species", "genus", "family", "order", "class", "phylum", "superkingdom", "life"}; + static final long printLevelsMask=makePrintLevelsMask(printLevels); + + /*--------------------------------------------------------------*/ + + static int NOHIT=0; + static int CORRECT=1; + static int INCORRECT_TAX=2; + static int INCORRECT_SSU=4; + static int MISSING_SSU=8; + private static int INCORRECT_TAX_CORRECT_SSU=INCORRECT_TAX; + private static int INCORRECT_TAX_INCORRECT_SSU=INCORRECT_TAX|INCORRECT_SSU; + private static int INCORRECT_TAX_MISSING_SSU=INCORRECT_TAX|MISSING_SSU; + + static final int BBSKETCH_MODE=0; + static final int MASH_MODE=1; + static final int SOURMASH_MODE=2; + static final int BLAST_MODE=3; + static int mode=BBSKETCH_MODE; + + + /*--------------------------------------------------------------*/ + /*---------------- Final Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private final FileFormat ffin1; + private final FileFormat ffin2; + private final FileFormat ffout1; + private final FileFormat ffoutMap; + private final FileFormat ffoutAccuracy; + private final FileFormat ffoutBad; + + /*--------------------------------------------------------------*/ + /*---------------- Common Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private PrintStream outstream=System.err; + public static boolean verbose=false; + public boolean errorState=false; + private boolean overwrite=false; + private boolean append=false; + +}