Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/MergeSketch.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/MergeSketch.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,321 @@ +package sketch; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collection; + +import fileIO.ByteFile; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.ReadStats; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import structures.ByteBuilder; + +/** + * Combines multiple sketches into a single sketch. + * + * @author Brian Bushnell + * @date July 23, 2018 + * + */ +public class MergeSketch extends SketchObject { + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + //Start a timer immediately upon code entrance. + Timer t=new Timer(); + + final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; + final int oldBufLen=Shared.bufferLen(); + + //Create an instance of this class + MergeSketch x=new MergeSketch(args); + + //Run the object + x.process(t); + + ReadWrite.USE_UNPIGZ=oldUnpigz; + Shared.setBufferLen(oldBufLen); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + + assert(!x.errorState) : "This program ended in an error state."; + } + + /** + * Constructor. + * @param args Command line arguments + */ + public MergeSketch(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, null, false); + args=pp.args; + outstream=pp.outstream; + } + + //Set shared static variables + ReadWrite.USE_UNPIGZ=true; + KILL_OK=true; + + //Create a parser object + Parser parser=new Parser(); + parser.out1="stdout.txt"; + + defaultParams.printRefFileName=true; + + //Parse each argument + for(int i=0; i<args.length; i++){ + String arg=args[i]; + + //Break arguments into their constituent parts, in the form of "a=b" + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(a.equals("verbose")){ + verbose=Parse.parseBoolean(b); + }else if(a.equals("in")){ + addFiles(b, in); + }else if(parseSketchFlags(arg, a, b)){ + //Do nothing + }else if(defaultParams.parse(arg, a, b)){ + //Do nothing + } +// else if(a.equals("size")){ +// size=Parse.parseIntKMG(b); +// } + + else if(a.equals("parse_flag_goes_here")){ + long fake_variable=Parse.parseKMG(b); + //Set a variable here + } + + else if(a.equals("name") || a.equals("taxname")){ + outTaxName=b; + }else if(a.equals("name0")){ + outName0=b; + }else if(a.equals("fname")){ + outFname=b; + }else if(a.equals("taxid") || a.equals("tid")){ + outTaxID=Integer.parseInt(b); + }else if(a.equals("spid")){ + outSpid=Integer.parseInt(b); + }else if(a.equals("imgid")){ + outImgID=Integer.parseInt(b); + }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){ + if(outMeta==null){outMeta=new ArrayList<String>();} + int underscore=a.indexOf('_', 0); + outMeta.add(a.substring(underscore+1)+":"+b); + } + + else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){ + outSketch=b; + } + + else if(parser.parse(arg, a, b)){//Parse standard flags in the parser + //do nothing + } + + else if(b==null && new File(arg).exists()){ + in.add(arg); + } + + else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + } + } + outMeta=SketchObject.fixMeta(outMeta); + + blacklist=null; + + postParse(); + + {//Process parser fields + overwrite=ReadStats.overwrite=parser.overwrite; + append=ReadStats.append=parser.append; + } + + //Ensure there is an input file + if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} + + //Adjust the number of threads for input file reading + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ + ByteFile.FORCE_MODE_BF2=true; + } + + ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false); + if(ffout!=null && !ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;} + + if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){ + throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n"); + } + + //Ensure that no file was specified multiple times + if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){ + throw new RuntimeException("\nSome file names were specified multiple times.\n"); + } + + tool=new SketchTool(targetSketchSize, defaultParams); + +// assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION; + if(verbose){ + if(useWhitelist){outstream.println("Using a whitelist.");} + if(blacklist!=null){outstream.println("Using a blacklist.");} + } + + defaultParams.postParse(false, false); + allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2); + if(!allowMultithreadedFastq){Shared.capBufferLen(40);} + } + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + private void process(Timer t){ + Timer ttotal=new Timer(); + + t.start(); + inSketches=tool.loadSketches_MT(defaultParams, in); + final int numLoaded=(inSketches.size()); + long sum=0; + for(Sketch sk : inSketches){ + sum+=sk.length(); + } + t.stop(); + outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t); + t.start(); +// outstream.println(inSketches.get(0)); + + ByteBuilder bb=new ByteBuilder(); + + int sizeOut=(int)(Sketch.AUTOSIZE ? sum : Tools.min(Sketch.targetSketchSize, sum)); + { + Sketch.AUTOSIZE=false; + Sketch.targetSketchSize=sizeOut; + Sketch.maxGenomeFraction=1; + } + SketchHeap heap=new SketchHeap(sizeOut, 0, tool.trackCounts); + for(Sketch sk : inSketches){ + heap.add(sk); + } + heap.genomeSizeKmers=Tools.max(heap.genomeSizeKmers, sizeOut); + ArrayList<String> meta=inSketches.get(0).meta; + if(meta==null){meta=outMeta;} + else if(outMeta!=null){meta.addAll(outMeta);} + Sketch union=new Sketch(heap, false, tool.trackCounts, outMeta); + + if(outTaxName!=null){union.setTaxName(outTaxName);} + if(outFname!=null){union.setFname(outFname);} + if(outName0!=null){union.setName0(outName0);} + + if(outTaxID>=0){union.taxID=(outTaxID);} + if(outSpid>=0){union.spid=(outSpid);} + if(outImgID>=0){union.imgID=(outImgID);} + + if(outSketch!=null){ + ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH); + bsw.start(); + union.toBytes(bb); + bsw.print(bb); + bb.clear(); + bsw.poisonAndWait(); + errorState|=bsw.errorState; + t.stop(); + outstream.println("Wrote "+1+" sketch of total size "+union.length()+" in \t"+t); + } + + t.stop(); +// outstream.println("\nRan "+(inSketches.size()*refSketches.size())+" comparisons in \t"+t); + ttotal.stop(); + outstream.println("Total Time: \t"+ttotal); + } + + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + private static boolean addFiles(String a, Collection<String> list){ + int initial=list.size(); + if(a==null){return false;} + File f=null; + if(a.indexOf(',')>=0){f=new File(a);} + if(f==null || f.exists()){ + list.add(a); + }else{ + for(String s : a.split(",")){ + list.add(s); + } + } + return list.size()>initial; + } + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private ArrayList<String> in=new ArrayList<String>(); + + private String outSketch=null; + + private final SketchTool tool; + + private ArrayList<Sketch> inSketches; + + /*Override metadata */ + private String outTaxName=null; + private String outFname=null; + private String outName0=null; + private int outTaxID=-1; + private long outSpid=-1; + private long outImgID=-1; + private ArrayList<String> outMeta=null; + + /*--------------------------------------------------------------*/ + /*---------------- Final Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Primary output file */ + private final FileFormat ffout; + + /*--------------------------------------------------------------*/ + /*---------------- Common Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Print status messages to this output stream */ + private PrintStream outstream=System.err; + /** Print verbose messages */ + public static boolean verbose=false; + /** True if an error was encountered */ + public boolean errorState=false; + /** Overwrite existing output files */ + private boolean overwrite=false; + /** Append to existing output files */ + private boolean append=false; + + /*--------------------------------------------------------------*/ + /*---------------- Static Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Don't print caught exceptions */ + public static boolean suppressErrors=false; + +}