jpayne@68: package sketch; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Collection; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import structures.ByteBuilder; jpayne@68: jpayne@68: /** jpayne@68: * Combines multiple sketches into a single sketch. jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date July 23, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class MergeSketch extends SketchObject { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; jpayne@68: final int oldBufLen=Shared.bufferLen(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: MergeSketch x=new MergeSketch(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: ReadWrite.USE_UNPIGZ=oldUnpigz; jpayne@68: Shared.setBufferLen(oldBufLen); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: jpayne@68: assert(!x.errorState) : "This program ended in an error state."; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public MergeSketch(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, null, false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables jpayne@68: ReadWrite.USE_UNPIGZ=true; jpayne@68: KILL_OK=true; jpayne@68: jpayne@68: //Create a parser object jpayne@68: Parser parser=new Parser(); jpayne@68: parser.out1="stdout.txt"; jpayne@68: jpayne@68: defaultParams.printRefFileName=true; jpayne@68: jpayne@68: //Parse each argument jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("in")){ jpayne@68: addFiles(b, in); jpayne@68: }else if(parseSketchFlags(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: }else if(defaultParams.parse(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: } jpayne@68: // else if(a.equals("size")){ jpayne@68: // size=Parse.parseIntKMG(b); jpayne@68: // } jpayne@68: jpayne@68: else if(a.equals("parse_flag_goes_here")){ jpayne@68: long fake_variable=Parse.parseKMG(b); jpayne@68: //Set a variable here jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("name") || a.equals("taxname")){ jpayne@68: outTaxName=b; jpayne@68: }else if(a.equals("name0")){ jpayne@68: outName0=b; jpayne@68: }else if(a.equals("fname")){ jpayne@68: outFname=b; jpayne@68: }else if(a.equals("taxid") || a.equals("tid")){ jpayne@68: outTaxID=Integer.parseInt(b); jpayne@68: }else if(a.equals("spid")){ jpayne@68: outSpid=Integer.parseInt(b); jpayne@68: }else if(a.equals("imgid")){ jpayne@68: outImgID=Integer.parseInt(b); jpayne@68: }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){ jpayne@68: if(outMeta==null){outMeta=new ArrayList();} jpayne@68: int underscore=a.indexOf('_', 0); jpayne@68: outMeta.add(a.substring(underscore+1)+":"+b); jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){ jpayne@68: outSketch=b; jpayne@68: } jpayne@68: jpayne@68: else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: } jpayne@68: jpayne@68: else if(b==null && new File(arg).exists()){ jpayne@68: in.add(arg); jpayne@68: } jpayne@68: jpayne@68: else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: } jpayne@68: } jpayne@68: outMeta=SketchObject.fixMeta(outMeta); jpayne@68: jpayne@68: blacklist=null; jpayne@68: jpayne@68: postParse(); jpayne@68: jpayne@68: {//Process parser fields jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: } jpayne@68: jpayne@68: //Ensure there is an input file jpayne@68: if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false); jpayne@68: if(ffout!=null && !ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;} jpayne@68: jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){ jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure that no file was specified multiple times jpayne@68: if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){ jpayne@68: throw new RuntimeException("\nSome file names were specified multiple times.\n"); jpayne@68: } jpayne@68: jpayne@68: tool=new SketchTool(targetSketchSize, defaultParams); jpayne@68: jpayne@68: // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION; jpayne@68: if(verbose){ jpayne@68: if(useWhitelist){outstream.println("Using a whitelist.");} jpayne@68: if(blacklist!=null){outstream.println("Using a blacklist.");} jpayne@68: } jpayne@68: jpayne@68: defaultParams.postParse(false, false); jpayne@68: allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2); jpayne@68: if(!allowMultithreadedFastq){Shared.capBufferLen(40);} jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private void process(Timer t){ jpayne@68: Timer ttotal=new Timer(); jpayne@68: jpayne@68: t.start(); jpayne@68: inSketches=tool.loadSketches_MT(defaultParams, in); jpayne@68: final int numLoaded=(inSketches.size()); jpayne@68: long sum=0; jpayne@68: for(Sketch sk : inSketches){ jpayne@68: sum+=sk.length(); jpayne@68: } jpayne@68: t.stop(); jpayne@68: outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t); jpayne@68: t.start(); jpayne@68: // outstream.println(inSketches.get(0)); jpayne@68: jpayne@68: ByteBuilder bb=new ByteBuilder(); jpayne@68: jpayne@68: int sizeOut=(int)(Sketch.AUTOSIZE ? sum : Tools.min(Sketch.targetSketchSize, sum)); jpayne@68: { jpayne@68: Sketch.AUTOSIZE=false; jpayne@68: Sketch.targetSketchSize=sizeOut; jpayne@68: Sketch.maxGenomeFraction=1; jpayne@68: } jpayne@68: SketchHeap heap=new SketchHeap(sizeOut, 0, tool.trackCounts); jpayne@68: for(Sketch sk : inSketches){ jpayne@68: heap.add(sk); jpayne@68: } jpayne@68: heap.genomeSizeKmers=Tools.max(heap.genomeSizeKmers, sizeOut); jpayne@68: ArrayList meta=inSketches.get(0).meta; jpayne@68: if(meta==null){meta=outMeta;} jpayne@68: else if(outMeta!=null){meta.addAll(outMeta);} jpayne@68: Sketch union=new Sketch(heap, false, tool.trackCounts, outMeta); jpayne@68: jpayne@68: if(outTaxName!=null){union.setTaxName(outTaxName);} jpayne@68: if(outFname!=null){union.setFname(outFname);} jpayne@68: if(outName0!=null){union.setName0(outName0);} jpayne@68: jpayne@68: if(outTaxID>=0){union.taxID=(outTaxID);} jpayne@68: if(outSpid>=0){union.spid=(outSpid);} jpayne@68: if(outImgID>=0){union.imgID=(outImgID);} jpayne@68: jpayne@68: if(outSketch!=null){ jpayne@68: ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH); jpayne@68: bsw.start(); jpayne@68: union.toBytes(bb); jpayne@68: bsw.print(bb); jpayne@68: bb.clear(); jpayne@68: bsw.poisonAndWait(); jpayne@68: errorState|=bsw.errorState; jpayne@68: t.stop(); jpayne@68: outstream.println("Wrote "+1+" sketch of total size "+union.length()+" in \t"+t); jpayne@68: } jpayne@68: jpayne@68: t.stop(); jpayne@68: // outstream.println("\nRan "+(inSketches.size()*refSketches.size())+" comparisons in \t"+t); jpayne@68: ttotal.stop(); jpayne@68: outstream.println("Total Time: \t"+ttotal); jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private static boolean addFiles(String a, Collection list){ jpayne@68: int initial=list.size(); jpayne@68: if(a==null){return false;} jpayne@68: File f=null; jpayne@68: if(a.indexOf(',')>=0){f=new File(a);} jpayne@68: if(f==null || f.exists()){ jpayne@68: list.add(a); jpayne@68: }else{ jpayne@68: for(String s : a.split(",")){ jpayne@68: list.add(s); jpayne@68: } jpayne@68: } jpayne@68: return list.size()>initial; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private ArrayList in=new ArrayList(); jpayne@68: jpayne@68: private String outSketch=null; jpayne@68: jpayne@68: private final SketchTool tool; jpayne@68: jpayne@68: private ArrayList inSketches; jpayne@68: jpayne@68: /*Override metadata */ jpayne@68: private String outTaxName=null; jpayne@68: private String outFname=null; jpayne@68: private String outName0=null; jpayne@68: private int outTaxID=-1; jpayne@68: private long outSpid=-1; jpayne@68: private long outImgID=-1; jpayne@68: private ArrayList outMeta=null; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Primary output file */ jpayne@68: private final FileFormat ffout; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Print status messages to this output stream */ jpayne@68: private PrintStream outstream=System.err; jpayne@68: /** Print verbose messages */ jpayne@68: public static boolean verbose=false; jpayne@68: /** True if an error was encountered */ jpayne@68: public boolean errorState=false; jpayne@68: /** Overwrite existing output files */ jpayne@68: private boolean overwrite=false; jpayne@68: /** Append to existing output files */ jpayne@68: private boolean append=false; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Static Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Don't print caught exceptions */ jpayne@68: public static boolean suppressErrors=false; jpayne@68: jpayne@68: }