Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package sketch; import java.io.PrintStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicInteger; import fileIO.ByteFile; import fileIO.ByteStreamWriter; import fileIO.FileFormat; import fileIO.ReadWrite; import shared.Parse; import shared.Parser; import shared.PreParser; import shared.Shared; import shared.Timer; import shared.Tools; import structures.ByteBuilder; import structures.FloatList; import tax.TaxTree; import template.ThreadWaiter; /** * @author Brian Bushnell * @date May 9, 2016 * */ public class AnalyzeSketchResults { /*--------------------------------------------------------------*/ /*---------------- Initialization ----------------*/ /*--------------------------------------------------------------*/ /** * Code entrance from the command line. * @param args Command line arguments */ public static void main(String[] args){ //Start a timer immediately upon code entrance. Timer t=new Timer(); //Create an instance of this class AnalyzeSketchResults x=new AnalyzeSketchResults(args); //Run the object x.process(t); //Close the print stream if it was redirected Shared.closeStream(x.outstream); } /** * Constructor. * @param args Command line arguments */ public AnalyzeSketchResults(String[] args){ {//Preparse block for help, config files, and outstream PreParser pp=new PreParser(args, /*getClass()*/null, false); args=pp.args; outstream=pp.outstream; } //Set shared static variables prior to parsing ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; ReadWrite.MAX_ZIP_THREADS=Shared.threads(); {//Parse the arguments final Parser parser=parse(args); overwrite=parser.overwrite; append=parser.append; in1=parser.in1; in2=parser.in2; out1=parser.out1; } fixExtensions(); //Add or remove .gz or .bz2 as needed checkFileExistence(); //Ensure files can be read and written checkStatics(); //Adjust file-related static fields as needed for this program ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, overwrite, append, false); ffoutMap=FileFormat.testOutput(outMap, FileFormat.TXT, null, true, overwrite, append, false); ffoutAccuracy=FileFormat.testOutput(outAccuracy, FileFormat.TXT, null, true, overwrite, append, false); ffoutBad=FileFormat.testOutput(outBad, FileFormat.TXT, null, true, overwrite, append, false); ffin1=FileFormat.testInput(in1, FileFormat.TXT, null, true, true); ffin2=FileFormat.testInput(in2, FileFormat.TXT, null, true, true); recordSets=(ffoutAccuracy==null && ffoutBad==null ? null : new ArrayList<RecordSet>()); tree=(treeFile==null) ? null : TaxTree.loadTaxTree(treeFile, outstream, true, false); SSUMap.load(outstream); } /*--------------------------------------------------------------*/ /*---------------- Initialization Helpers ----------------*/ /*--------------------------------------------------------------*/ /** Parse arguments from the command line */ private Parser parse(String[] args){ Parser parser=new Parser(); for(int i=0; i<args.length; i++){ String arg=args[i]; String[] split=arg.split("="); String a=split[0].toLowerCase(); String b=split.length>1 ? split[1] : null; if(b!=null && b.equalsIgnoreCase("null")){b=null;} if(a.equals("map") || a.equals("outmap")){ outMap=b; }else if(a.equals("accuracy") || a.equals("outacc") || a.equals("outaccuracy")){ outAccuracy=b; }else if(a.equals("outbad")){ outBad=b; }else if(a.equals("tree")){ treeFile=b; }else if(a.equals("shrinkonly")){ shrinkOnly=Parse.parseBoolean(b); }else if(a.equals("ssu") || a.equals("ssufile")){ assert(false) : "ssu and ssufile are deprecated; please specify 16S or 18S independently"; }else if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){ SSUMap.r16SFile=b; }else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){ SSUMap.r18SFile=b; }else if(a.equals("mash")){ mode=MASH_MODE; }else if(a.equals("sourmash")){ mode=SOURMASH_MODE; }else if(a.equals("blast")){ mode=BLAST_MODE; }else if(a.equals("bbsketch")){ mode=BBSKETCH_MODE; }else if(a.equals("lines")){ maxLines=Long.parseLong(b); if(maxLines<0){maxLines=Long.MAX_VALUE;} }else if(a.equals("minsamples")){ minSamples=Integer.parseInt(b); }else if(a.equals("verbose")){ RecordSet.verbose=Record.verbose=verbose=Parse.parseBoolean(b); }else if(parser.parse(arg, a, b)){ //do nothing }else{ outstream.println("Unknown parameter "+args[i]); assert(false) : "Unknown parameter "+args[i]; // throw new RuntimeException("Unknown parameter "+args[i]); } } return parser; } /** Add or remove .gz or .bz2 as needed */ private void fixExtensions(){ in1=Tools.fixExtension(in1); in2=Tools.fixExtension(in2); if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} } /** Ensure files can be read and written */ private void checkFileExistence(){ //Ensure output files can be written if(!Tools.testOutputFiles(overwrite, append, false, out1, outMap)){ outstream.println((out1==null)+", "+out1); throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n"); } //Ensure input files can be read if(!Tools.testInputFiles(false, true, in1, in2)){ throw new RuntimeException("\nCan't read some input files.\n"); } //Ensure that no file was specified multiple times if(!Tools.testForDuplicateFiles(true, in1, in2, out1, outMap)){ throw new RuntimeException("\nSome file names were specified multiple times.\n"); } } /** Adjust file-related static fields as needed for this program */ private static void checkStatics(){ //Adjust the number of threads for input file reading if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ ByteFile.FORCE_MODE_BF2=true; } // if(!ByteFile.FORCE_MODE_BF2){ // ByteFile.FORCE_MODE_BF2=false; // ByteFile.FORCE_MODE_BF1=true; // } } /*--------------------------------------------------------------*/ /*---------------- Outer Methods ----------------*/ /*--------------------------------------------------------------*/ void process(Timer t){ if(verbose){System.err.println("process()");} ByteFile bf1=ByteFile.makeByteFile(ffin1); ByteFile bf2=(ffin2==null ? null : ByteFile.makeByteFile(ffin2)); if(shrinkOnly){ runShrinkOnly(bf1); return; } bswBad=makeBSW(ffoutBad); ResultLineParser parser=new ResultLineParser(mode, tree, bswBad, recordSets, false); processInner(bf1, parser, ffin2==null ? null : aniMap); ByteStreamWriter bsw=makeBSW(ffout1); printResults(parser, bsw); if(bsw!=null){errorState|=bsw.poisonAndWait();} ByteStreamWriter bswAcc=makeBSW(ffoutAccuracy); if(recordSets!=null){ if(SSUMap.hasMap()){ processSetsThreaded(); } printAccuracy(bswAcc); } if(bswAcc!=null){errorState|=bswAcc.poisonAndWait();} if(bswBad!=null){errorState|=bswBad.poisonAndWait();} ByteStreamWriter bswMap=makeBSW(ffoutMap); if(bf2!=null){ processInner(bf2, parser, aaiMap); printMap(bswMap); } if(bswMap!=null){errorState|=bswMap.poisonAndWait();} t.stop(); outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8)); outstream.println(); outstream.println("Lines Out: \t"+linesOut); outstream.println("Bytes Out: \t"+bytesOut); if(errorState){ throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); } } /*--------------------------------------------------------------*/ /*---------------- Inner Methods ----------------*/ /*--------------------------------------------------------------*/ private void runShrinkOnly(ByteFile bf){ if(verbose){System.err.println("runShrinkOnly("+bf.name()+")");} ResultLineParser parser=new ResultLineParser(mode, tree, null, null, true); ByteStreamWriter bsw=makeBSW(ffout1); byte[] line=bf.nextLine(); while(line!=null){ if(line.length>0){ if(maxLines>0 && linesProcessed>=maxLines){break;} linesProcessed++; bytesProcessed+=(line.length+1); parser.parse(line); RecordSet rs=parser.processData(null, true); if(rs!=null){ rs.sortAndSweep(); for(Record r : rs.records){ if(bsw!=null){ bsw.println(r.text); } } } } line=bf.nextLine(); } errorState|=bf.close(); RecordSet rs=parser.currentSet; if(rs!=null){ rs.sortAndSweep(); for(Record r : rs.records){ if(bsw!=null){ bsw.println(r.text); } } } if(bsw!=null){errorState|=bsw.poisonAndWait();} } private void processInner(ByteFile bf, ResultLineParser parser, HashMap<Long, Float> map){ if(verbose){System.err.println("processInner("+bf.name()+")");} byte[] line=bf.nextLine(); while(line!=null){ if(line.length>0){ if(maxLines>0 && linesProcessed>=maxLines){break;} linesProcessed++; bytesProcessed+=(line.length+1); parser.parse(line); parser.processData(map, recordSets!=null); } line=bf.nextLine(); } errorState|=bf.close(); } private void processSetsThreaded(){ if(verbose){System.err.println("processSetsThreaded("+Shared.threads()+")");} final int threads=Shared.threads(); // final ThreadWaiter waiter=new ThreadWaiter(); final ArrayList<SSUThread> list=new ArrayList<SSUThread>(threads); final AtomicInteger atom=new AtomicInteger(0); for(int i=0; i<threads; i++){ list.add(new SSUThread(atom)); } boolean success=ThreadWaiter.startAndWait(list); if(!success){errorState=true;} } private void printAccuracy(ByteStreamWriter bswAcc){ if(verbose){System.err.println("printAccuracy("+bswAcc+")");} int[][] results=new int[taxLevels][16]; for(RecordSet rs : recordSets){ if(mode==MASH_MODE){ rs.sortAndSweep(); rs.processSSU(); } int[] statusArray=rs.test(bswBad); for(int level=0; level<statusArray.length; level++){ int status=statusArray[level]; results[level][status]++; assert(status==NOHIT || status==CORRECT || status==INCORRECT_TAX_CORRECT_SSU || status==INCORRECT_TAX_INCORRECT_SSU || status==INCORRECT_TAX_MISSING_SSU) : status; } } ByteBuilder bb=new ByteBuilder(); bb.append("#Level \tCorrect\tbadTaxGoodSSU\tBadTaxNoSSU\tbadTaxBadSSU\tNoHit\n"); if(bswAcc!=null){bswAcc.print(bb);} for(String levelName : printLevels){ int level=TaxTree.stringToLevelExtended(levelName); bb.clear(); bb.append(TaxTree.levelToStringExtended(level)); while(bb.length<"species subgroup".length()){bb.append(' ');} bb.tab(); bb.append(results[level][CORRECT]).tab(); bb.append(results[level][INCORRECT_TAX_CORRECT_SSU]).tab(); bb.append(results[level][INCORRECT_TAX_MISSING_SSU]).tab(); bb.append(results[level][INCORRECT_TAX_INCORRECT_SSU]).tab(); bb.append(results[level][NOHIT]).nl(); if(bswAcc!=null){bswAcc.print(bb);} } } private void printMap(ByteStreamWriter bsw){ if(verbose){System.err.println("printMap("+bsw+")");} ByteBuilder bb=new ByteBuilder(); bb.append("#qID\trID\tANI\tAAI\n"); if(bsw!=null){bsw.print(bb);} for(Entry<Long, Float> e : aniMap.entrySet()){ long key=e.getKey(); int qID=((int)(key>>>32)); int rID=(int)(key&Integer.MAX_VALUE); float ani=e.getValue(); Float aai=aaiMap.get(key); if(aai!=null){ bb.clear(); bb.append(qID).tab().append(rID).tab().append(ani, 3).tab().append(aai, 3).nl(); if(bsw!=null){bsw.print(bb);} linesOut++; bytesOut+=bb.length(); } } } private void printResults(ResultLineParser parser, ByteStreamWriter bsw){ if(verbose){System.err.println("printResults("+bsw+")");} ByteBuilder bb=new ByteBuilder(); bb.append("#Level \tRank\tANI_AVG\tSSU_AVG\tANI_STD\tSSU_STD\tSamples"); if(bsw!=null){bsw.println(bb);} for(int level=0; level<taxLevels; level++){ long aniCount=parser.levelCounts[level]; long ssuCount=parser.levelCountsSSU[level]; if(aniCount>=minSamples && ((1L<<level)&printLevelsMask)!=0){ bb.clear(); String name=TaxTree.levelToStringExtended(level); double aniSum=parser.levelAniSums[level]; double ssuSum=parser.levelSSUSums[level]; FloatList aniList=parser.aniLists[level]; FloatList ssuList=parser.ssuLists[level]; aniList.sort(); ssuList.sort(); double aniAvg=aniSum/aniCount; double ssuAvg=ssuSum/ssuCount; double aniStd=aniList.stdev(); double ssuStd=ssuList.stdev(); bb.append(name); while(bb.length<9){bb.append(' ');} bb.tab().append(level); bb.tab().append(aniAvg, 3); bb.tab().append(ssuAvg, 3); bb.tab().append(aniStd, 3); bb.tab().append(ssuStd, 3); bb.tab().append(aniCount); if(bsw!=null){bsw.println(bb);} linesOut++; bytesOut+=bb.length(); } } } private static ByteStreamWriter makeBSW(FileFormat ff){ if(verbose){System.err.println("makeBSW("+ff+")");} if(ff==null){return null;} ByteStreamWriter bsw=new ByteStreamWriter(ff); bsw.start(); return bsw; } private static long makePrintLevelsMask(String[] printLevelsArray){ long mask=0; for(String s : printLevelsArray){ int level=TaxTree.stringToLevelExtended(s); long bit=1L<<level; assert(Long.bitCount(bit)==1); mask|=bit; } return mask; } /*--------------------------------------------------------------*/ private class SSUThread extends Thread{ SSUThread(AtomicInteger atom_){ atom=atom_; } @Override public void run(){ for(int idx=atom.getAndIncrement(); idx<recordSets.size(); idx=atom.getAndIncrement()){ RecordSet rs=recordSets.get(idx); rs.sortAndSweep(); rs.processSSU(); } } private final AtomicInteger atom; } /*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ private String in1=null; private String in2=null; private String out1="stdout.txt"; private String outMap=null; private String outAccuracy=null; private String outBad=null; private String treeFile=null; private ByteStreamWriter bswBad=null; /*--------------------------------------------------------------*/ private long linesProcessed=0; private long linesOut=0; private long bytesProcessed=0; private long bytesOut=0; private boolean shrinkOnly=false; int minSamples=1; private long maxLines=Long.MAX_VALUE; final static int taxLevels=TaxTree.numTaxLevelNamesExtended; final HashMap<Long, Float> aniMap=new HashMap<Long, Float>(); final HashMap<Long, Float> aaiMap=new HashMap<Long, Float>(); final TaxTree tree; final ArrayList<RecordSet> recordSets; // static HashMap<Integer, byte[]> ssuMap; static final String[] printLevels=new String[] {"strain", "species", "genus", "family", "order", "class", "phylum", "superkingdom", "life"}; static final long printLevelsMask=makePrintLevelsMask(printLevels); /*--------------------------------------------------------------*/ static int NOHIT=0; static int CORRECT=1; static int INCORRECT_TAX=2; static int INCORRECT_SSU=4; static int MISSING_SSU=8; private static int INCORRECT_TAX_CORRECT_SSU=INCORRECT_TAX; private static int INCORRECT_TAX_INCORRECT_SSU=INCORRECT_TAX|INCORRECT_SSU; private static int INCORRECT_TAX_MISSING_SSU=INCORRECT_TAX|MISSING_SSU; static final int BBSKETCH_MODE=0; static final int MASH_MODE=1; static final int SOURMASH_MODE=2; static final int BLAST_MODE=3; static int mode=BBSKETCH_MODE; /*--------------------------------------------------------------*/ /*---------------- Final Fields ----------------*/ /*--------------------------------------------------------------*/ private final FileFormat ffin1; private final FileFormat ffin2; private final FileFormat ffout1; private final FileFormat ffoutMap; private final FileFormat ffoutAccuracy; private final FileFormat ffoutBad; /*--------------------------------------------------------------*/ /*---------------- Common Fields ----------------*/ /*--------------------------------------------------------------*/ private PrintStream outstream=System.err; public static boolean verbose=false; public boolean errorState=false; private boolean overwrite=false; private boolean append=false; }