Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/CompareSketch.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/CompareSketch.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,583 @@ +package sketch; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +import fileIO.ByteFile; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import kmer.AbstractKmerTableSet; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.ReadStats; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import structures.ByteBuilder; +import tax.TaxFilter; +import tax.TaxTree; + +/** + * Compares one or more input sketches to a set of reference sketches. + * + * @author Brian Bushnell + * @date July 29, 2016 + * + */ +public class CompareSketch extends SketchObject { + + + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + + //Start a timer immediately upon code entrance. + Timer t=new Timer(); + + final int oldBufLen=Shared.bufferLen(); + + //Create an instance of this class + CompareSketch x=new CompareSketch(args); + + //Run the object + x.process(t); + + Shared.setBufferLen(oldBufLen); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + + alignerPool.poison(); + } + + /** + * Constructor. + * @param args Command line arguments + */ + public CompareSketch(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, null, false); +// PreParser pp=new PreParser(args, getClass(), false); + args=pp.args; + outstream=pp.outstream; + silent=PreParser.silent; + if(silent){AbstractKmerTableSet.DISPLAY_PROGRESS=false;} + } + + //Set shared static variables + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + KILL_OK=true; + TaxFilter.REQUIRE_PRESENT=false; + defaultParams.mode=PER_FILE; + + //Create a parser object + Parser parser=new Parser(); + parser.out1="stdout.txt"; + + //Parse each argument + for(int i=0; i<args.length; i++){ + String arg=args[i]; + + //Break arguments into their constituent parts, in the form of "a=b" + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(a.equals("verbose")){ + verbose=Parse.parseBoolean(b); + }else if(a.equals("in")){ + addFiles(b, in); + }else if(parseSketchFlags(arg, a, b)){ + //Do nothing + }else if(a.equals("parse_flag_goes_here")){ + long fake_variable=Parse.parseKMG(b); + //Set a variable here + }else if(a.equals("ordered")){ + ordered=Parse.parseBoolean(b); + }else if(a.equals("alltoall") || a.equals("ata")){ + allToAll=Parse.parseBoolean(b); + }else if(a.equals("skipcompare") || a.equals("sketchonly")){ + skipCompare=Parse.parseBoolean(b); + }else if(a.equals("compareself") || a.equals("includeself")){ + compareSelf=Parse.parseBoolean(b); + }else if(a.equals("printmemory")){ + printMemory=Parse.parseBoolean(b); + }else if(a.equals("parsesubunit")){ + SketchMaker.parseSubunit=Parse.parseBoolean(b); + } + + else if(a.equals("taxtree") || a.equals("tree")){ + taxTreeFile=b; + } + + else if(a.equals("name") || a.equals("taxname")){ + outTaxName=b; + }else if(a.equals("name0")){ + outName0=b; + }else if(a.equals("fname")){ + outFname=b; + }else if(a.equals("outsketch") || a.equals("sketchout") || a.equals("outs") || a.equals("sketch")){ + outSketch=b; + }else if(a.equals("files")){ + sketchFiles=Integer.parseInt(b); + }else if(a.equals("taxid") || a.equals("tid")){ + outTaxID=Integer.parseInt(b); + }else if(a.equals("spid")){ + outSpid=Integer.parseInt(b); + }else if(a.equals("imgid")){ + outImgID=Integer.parseInt(b); + }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){ + if(outMeta==null){outMeta=new ArrayList<String>();} + int underscore=a.indexOf('_', 0); + outMeta.add(a.substring(underscore+1)+":"+b); + } + + else if(searcher.parse(arg, a, b, false)){ +// System.err.println("*"+arg); + parser.parse(arg, a, b); //Catches shared flags like "threads" + Blacklist.parseBlacklist(arg, a, b); //Catches flags like "nt" or "refseq" + } + + else if(parser.parse(arg, a, b)){//Parse standard flags in the parser + //do nothing + } + + else if(searcher.parse(arg, a, b, true)){ +// System.err.println("**"+arg); + //do nothing + } + + else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + } + } + if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} + + outMeta=SketchObject.fixMeta(outMeta); + SketchObject.postParse(); + + if(skipCompare){ + allToAll=false; + searcher.autoIndex=false; + makeIndex=false; + in.addAll(searcher.refFiles); + searcher.refFiles.clear(); + }else if(in.isEmpty() && args.length>0 && !allToAll){ //Allows first argument to be used as the input file without in= flag + String x=args[0]; + if(x.indexOf('=')<0 && new File(x).exists() && searcher.refFiles.contains(x)){ + searcher.refFiles.remove(x); + in.add(x); + } + } + + {//Process parser fields + overwrite=ReadStats.overwrite=parser.overwrite; + append=ReadStats.append=parser.append; + + out=parser.out1; + } + +// assert(false) : in+"\n"+searcher.refFiles; + + if(allToAll){ + LinkedHashSet<String> set=new LinkedHashSet<String>(); + set.addAll(in); + set.addAll(searcher.refFiles); + in.clear(); + searcher.refFiles.clear(); + in.addAll(set); + searcher.refFiles.addAll(set); + } + + //Ensure there is an input file + if(in.isEmpty() && !skipCompare){throw new RuntimeException("Error - at least one input file is required.");} + + //Ensure there is an ref file + if(searcher.refFiles.isEmpty() && !skipCompare){ + if(outSketch==null){throw new RuntimeException("Error - at least one reference file is required.");} + } + + //Adjust the number of threads for input file reading + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ + ByteFile.FORCE_MODE_BF2=true; + } + + ffout=FileFormat.testOutput(out, FileFormat.TEXT, null, false, overwrite, append, ordered); + if(!ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;} + + //Ensure input files can be read + if(!Tools.testInputFiles(false, true, taxTreeFile)){ + throw new RuntimeException("\nCan't read some input files.\n"); + } + if(!Tools.testInputFiles(true, false, in.toArray(new String[0]))){ + if(in.size()==1){ + String s=in.get(0); + String s1=s.replaceFirst("#", "1"), s2=s.replaceFirst("#", "2"); + Tools.testInputFiles(true, false, s1, s2); + }else{ + throw new RuntimeException("\nCan't read some input files.\n"); + } + } + +// assert(makeIndex || defaultParams.printContam2==false) : "Contam2 requires the flag index=t"; + + SSUMap.load(outstream); + if(taxTreeFile!=null){setTaxtree(taxTreeFile, silent ? null : outstream);} + defaultParams.postParse(true, true); + if(!defaultParams.printSSU){processSSU=false;} + allowMultithreadedFastq=in.size()<2 && !allToAll; + if(!allowMultithreadedFastq){Shared.capBufferLen(40);} +// assert(defaultParams.checkValid()); + } + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + public void process(Timer t){ + Timer ttotal=new Timer(); + + t.start(); + + if(!silent){outstream.println("Loading sketches.");} + searcher.makeTool(1, false, defaultParams.mergePairs); + SketchTool tool=new SketchTool(targetSketchSize, defaultParams); + + final int mode2=(defaultParams.mode==PER_FILE ? PER_FILE : PER_TAXA); + if(skipCompare){ + makeIndex=false; + inSketches=tool.loadSketches_MT(defaultParams, in); + }else if(!useWhitelist || allToAll){ + if(allToAll){ + makeIndex=searcher.refFileCount()>0 && (makeIndex || defaultParams.needIndex() || searcher.autoIndex); + searcher.loadReferences(mode2, defaultParams); + inSketches=(ArrayList<Sketch>) searcher.refSketches.clone(); + }else{ + inSketches=tool.loadSketches_MT(defaultParams, in); + + for(Sketch sk : inSketches){ + if(sk.taxID<1 || sk.taxID>=minFakeID || outTaxID>0){sk.taxID=outTaxID;} + if(outSpid>0){sk.spid=outSpid;} + if(outImgID>0){sk.imgID=outImgID;} + if(outTaxName!=null){sk.setTaxName(outTaxName);} + if(outFname!=null){sk.setFname(outFname);} + if(outName0!=null){sk.setName0(outName0);} + if(SketchMaker.parseSubunit && sk.name0()!=null){ + if(outMeta!=null){ + sk.meta=(ArrayList<String>)sk.meta.clone(); + }else if(sk.meta==null){ + if(sk.name0().contains("SSU_")){ + sk.addMeta("subunit:ssu"); + }else if(sk.name0().contains("LSU_")){ + sk.addMeta("subunit:lsu"); + } + } + } + sk.setMeta(outMeta); + if(defaultParams.printSSU()){sk.loadSSU();}//since taxID was just set + } + + if(outTaxID>0){ + for(Sketch sk : inSketches){ + if(sk.taxID<1 || sk.taxID>=minFakeID){sk.taxID=outTaxID;} + } + } + makeIndex=searcher.refFileCount()>0 && ((searcher.autoIndex && inSketches.size()>8) || defaultParams.needIndex() || (makeIndex && !searcher.autoIndex)); + searcher.loadReferences(mode2, defaultParams); + if(mode2==PER_FILE){ + int max=inSketches.size(); + for(int i=0; i<searcher.refSketches.size(); i++){ + searcher.refSketches.get(i).sketchID=max+i+1; + } + } + } + }else{ + //assert(searcher.makeIndex && !searcher.autoIndex) : "whitelist=t requires index=t"; + makeIndex=true; //(searcher.refFileCount()>0); //Index is required in whitelist mode. + searcher.loadReferences(mode2, defaultParams); + inSketches=tool.loadSketches_MT(defaultParams, in); + } + + if(outSketch!=null){ + writeSketches(outSketch, sketchFiles); + } + + final int numLoaded=(inSketches.size()+searcher.refSketchCount())/(allToAll ? 2 : 1); + t.stop(); + if(!silent){outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" in "+t.toString());} + if(printMemory){ + System.gc(); + Shared.printMemory(); + } + + if(skipCompare) { + ttotal.stop("Total Time: \t"); + return; + } + + t.start(); + + + ByteStreamWriter tsw=(ffout==null ? null : new ByteStreamWriter(ffout)); + if(tsw!=null){ + tsw.start(); + if(defaultParams.format==DisplayParams.FORMAT_QUERY_REF_ANI || defaultParams.format==DisplayParams.FORMAT_CONSTELLATION){ + String s=defaultParams.header()+"\n"; + tsw.forcePrint(s.getBytes()); + } + } + + boolean success=true; + final int inSize=inSketches.size(); + if(inSize==1 || Shared.threads()<2 || inSize<4){ + ByteBuilder sb=new ByteBuilder(); + success=searcher.compare(inSketches, sb, defaultParams, Shared.threads()); + success&=(!searcher.errorState); + if(tsw!=null){ + sb.append('\n'); + if(ordered){ + tsw.addJob(sb); + }else{ + tsw.println(sb); + } + } + }else{//More sketches than threads, and more than one thread + final int threads=Tools.min(Shared.threads(), inSize); + + ArrayList<CompareThread> alct=new ArrayList<CompareThread>(threads); + AtomicInteger next=new AtomicInteger(0); + for(int i=0; i<threads; i++){ + alct.add(new CompareThread(i, next, tsw)); + } + for(CompareThread ct : alct){ct.start();} + for(CompareThread ct : alct){ + + //Wait until this thread has terminated + while(ct.getState()!=Thread.State.TERMINATED){ + try { + //Attempt a join operation + ct.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + synchronized(ct){ + success&=ct.success; + } + } + alct=null; + } + + //Track whether any threads failed + if(!success){errorState=true;} + if(tsw!=null){errorState|=tsw.poisonAndWait();} + + t.stop(); +// long comparisons=(makeIndex ? searcher.comparisons.get() : +// allToAll ? (inSketches.size()*(long)(inSketches.size()-(compareSelf ? 0 : 1))) +// : inSketches.size()*(long)searcher.refSketchCount()); + long comparisons=searcher.comparisons.get(); + if(!skipCompare && !silent) {outstream.println("\nRan "+comparisons+" comparison"+(comparisons==1 ? "" : "s")+" in "+t);} + ttotal.stop(); + if(!silent){outstream.println("Total Time: \t"+ttotal);} + } + + void writeSketches(String fname, int files){ + if(fname==null){return;} + if(files==1 || fname.indexOf('#')<0){ + writeOneSketchFile(fname); + }else{ + writeManySketchFiles(fname, files); + } + } + + void writeOneSketchFile(String fname){ + if(fname==null){return;} + ByteBuilder bb=new ByteBuilder(); + ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH); + bsw.start(); + for(Sketch sk : inSketches){ + sk.toBytes(bb); + bsw.print(bb); + bb.clear(); + } + bsw.poisonAndWait(); + errorState|=bsw.errorState; + } + + void writeManySketchFiles(String fname, int files){ + if(fname==null){return;} + assert(fname.indexOf('#')>=0) : fname; + assert(files>0) : files; + + ByteStreamWriter[] bswa=new ByteStreamWriter[files]; + for(int i=0; i<files; i++){ + ByteStreamWriter bsw=new ByteStreamWriter(outSketch.replaceFirst("#", ""+i), overwrite, append, true, FileFormat.SKETCH); + bsw.start(); + bswa[i]=bsw; + } + for(Sketch sk : inSketches){ + ByteBuilder bb=new ByteBuilder(4096); + sk.toBytes(bb); + bswa[sk.sketchID%files].addJob(bb); + } + for(ByteStreamWriter bsw : bswa){ + bsw.poisonAndWait(); + errorState|=bsw.errorState; + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + private static void addFiles(String a, Collection<String> list){ + if(a==null){return;} + File f=null; + if(a.indexOf(',')>=0){f=new File(a);} + if(f==null || f.exists()){ + list.add(a); + }else{ + for(String s : a.split(",")){list.add(s);} + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Classes ----------------*/ + /*--------------------------------------------------------------*/ + + private class CompareThread extends Thread { + + CompareThread(final int tid_, final AtomicInteger nextSketch_, ByteStreamWriter tsw_){ + tid=tid_; + nextSketch=nextSketch_; + tsw=tsw_; + } + + @Override + public void run(){ + success=false; + final int inLim=inSketches.size(); + final boolean json=defaultParams.json(); + + for(int inNum=nextSketch.getAndIncrement(); inNum<inLim; inNum=nextSketch.getAndIncrement()){ + Sketch a=inSketches.get(inNum); + assert(buffer.cbs==null); //Because this sketch will only be used by one thread at a time, so per-buffer bitsets are not needed. + SketchResults sr=searcher.processSketch(a, buffer, fakeID, map, defaultParams, 1); + a.clearRefHitCounts(); + + if(tsw!=null){ + ByteBuilder sb=sr.toText(defaultParams); + synchronized(tsw){ + if(ordered){ + if(json){ + if(inNum==0){ + sb.insert(0, (byte)'[');//Rare, slow case + } + if(inNum<inLim-1){ + sb.append(','); + }else{ + sb.append(']'); + } + } + tsw.add(sb, inNum); + }else{ + if(json){ + if(resultsPrinted==0){ + tsw.print('['); + }else{ + sb.insert(0, (byte)','); + } + } + tsw.print(sb); + } + resultsPrinted++; + } + } + } + synchronized(this){success=true;} + } + + private final int tid; + private final CompareBuffer buffer=new CompareBuffer(false); + + private final AtomicInteger nextSketch; + private final AtomicInteger fakeID=new AtomicInteger(minFakeID); + private ConcurrentHashMap<Integer, Comparison> map=new ConcurrentHashMap<Integer, Comparison>(101); + final ByteStreamWriter tsw; + + boolean success=false; + + } + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private ArrayList<String> in=new ArrayList<String>(); + + private String out="stdout.txt"; + + private String taxTreeFile=null; + + private ArrayList<Sketch> inSketches; + + public final SketchSearcher searcher=new SketchSearcher(); + + private boolean printMemory=false; + private boolean silent=false; + + /*Override metadata */ + private String outTaxName=null; + private String outFname=null; + private String outName0=null; + private String outSketch=null; + private int sketchFiles=1; + private int outTaxID=-1; + private long outSpid=-1; + private long outImgID=-1; + private ArrayList<String> outMeta=null; + private long resultsPrinted=0; + + /*--------------------------------------------------------------*/ + /*---------------- Final Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Primary output file */ + private final FileFormat ffout; + + /*--------------------------------------------------------------*/ + /*---------------- Common Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Print status messages to this output stream */ + private PrintStream outstream=System.err; + /** Print verbose messages */ + public static boolean verbose=false; + /** True if an error was encountered */ + public boolean errorState=false; + /** Overwrite existing output files */ + private boolean overwrite=false; + /** Append to existing output files */ + private boolean append=false; + private boolean ordered=true; + +}