jpayne@68: package sketch; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Collection; jpayne@68: import java.util.LinkedHashSet; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import structures.ByteBuilder; jpayne@68: jpayne@68: /** jpayne@68: * Generates smaller sketches from input sketches. jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date July 23, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class SubSketch extends SketchObject { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; jpayne@68: final int oldBufLen=Shared.bufferLen(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: SubSketch x=new SubSketch(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: ReadWrite.USE_UNPIGZ=oldUnpigz; jpayne@68: Shared.setBufferLen(oldBufLen); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: jpayne@68: assert(!x.errorState) : "This program ended in an error state."; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public SubSketch(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, null, false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables jpayne@68: ReadWrite.USE_UNPIGZ=true; jpayne@68: KILL_OK=true; jpayne@68: jpayne@68: //Create a parser object jpayne@68: Parser parser=new Parser(); jpayne@68: jpayne@68: defaultParams.printRefFileName=true; jpayne@68: jpayne@68: //Parse each argument jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("in")){ jpayne@68: addFiles(b, in); jpayne@68: }else if(a.equals("files")){ jpayne@68: files=Integer.parseInt(b); jpayne@68: }else if(parseSketchFlags(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: }else if(defaultParams.parse(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: } jpayne@68: // else if(a.equals("size")){ jpayne@68: // size=Parse.parseIntKMG(b); jpayne@68: // } jpayne@68: jpayne@68: else if(a.equals("parse_flag_goes_here")){ jpayne@68: long fake_variable=Parse.parseKMG(b); jpayne@68: //Set a variable here jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){ jpayne@68: outSketch=b; jpayne@68: } jpayne@68: jpayne@68: else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: } jpayne@68: jpayne@68: else if(b==null && new File(arg).exists()){ jpayne@68: in.add(arg); jpayne@68: } jpayne@68: jpayne@68: else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: } jpayne@68: } jpayne@68: assert(targetSketchSize>0) : "Must set size."; jpayne@68: jpayne@68: {//Expand # symbol jpayne@68: LinkedHashSet expanded=new LinkedHashSet(); jpayne@68: for(String s : in){SketchSearcher.addFiles(s, expanded);} jpayne@68: in.clear(); jpayne@68: in.addAll(expanded); jpayne@68: } jpayne@68: jpayne@68: postParse(); jpayne@68: jpayne@68: {//Process parser fields jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: } jpayne@68: jpayne@68: //Ensure there is an input file jpayne@68: if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){ jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n"); jpayne@68: } jpayne@68: // assert(false) : ffout; jpayne@68: jpayne@68: //Ensure that no file was specified multiple times jpayne@68: if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){ jpayne@68: throw new RuntimeException("\nSome file names were specified multiple times.\n"); jpayne@68: } jpayne@68: jpayne@68: tool=new SketchTool(targetSketchSize, defaultParams); jpayne@68: jpayne@68: // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION; jpayne@68: if(verbose || true){ jpayne@68: if(useWhitelist){outstream.println("Using a whitelist.");} jpayne@68: if(blacklist!=null){outstream.println("Using a blacklist.");} jpayne@68: } jpayne@68: jpayne@68: defaultParams.postParse(false, false); jpayne@68: allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2); jpayne@68: if(!allowMultithreadedFastq){Shared.capBufferLen(40);} jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private void process(Timer t){ jpayne@68: Timer ttotal=new Timer(); jpayne@68: jpayne@68: t.start(); jpayne@68: inSketches=tool.loadSketches_MT(defaultParams, in); jpayne@68: final int numLoaded=(inSketches.size()); jpayne@68: long sum=0; jpayne@68: for(Sketch sk : inSketches){ jpayne@68: sum+=sk.length(); jpayne@68: } jpayne@68: t.stop(); jpayne@68: outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t); jpayne@68: t.start(); jpayne@68: if(verbose && numLoaded>0){ jpayne@68: System.err.println("First sketch:\n"+inSketches.get(0)); jpayne@68: } jpayne@68: // outstream.println(inSketches.get(0)); jpayne@68: jpayne@68: int sizeOut=Sketch.targetSketchSize; jpayne@68: { jpayne@68: if(Sketch.SET_TARGET_SIZE){Sketch.AUTOSIZE=false;} jpayne@68: Sketch.targetSketchSize=sizeOut; jpayne@68: Sketch.maxGenomeFraction=1; jpayne@68: } jpayne@68: jpayne@68: if(outSketch!=null && outSketch.indexOf('#')>=1 && files>1){ jpayne@68: ByteStreamWriter[] bswArray=new ByteStreamWriter[files]; jpayne@68: for(int i=0; i sketches, ByteStreamWriter bsw){ jpayne@68: ByteBuilder bb=new ByteBuilder(); jpayne@68: for(Sketch sk : sketches){ jpayne@68: final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; jpayne@68: // if(!defaultParams.trackCounts()){sk.keyCounts=null;} jpayne@68: if(blacklist!=null){blackKeys+=sk.applyBlacklist();} jpayne@68: if(sk.length()>target){ jpayne@68: sk.resize(target); jpayne@68: if(verbose){System.err.println("Resized to:\n"+sk);} jpayne@68: } jpayne@68: if(sk.length()>=minSketchSize){ jpayne@68: keysOut+=sk.length(); jpayne@68: sketchesOut++; jpayne@68: sk.toBytes(bb); jpayne@68: if(verbose){System.err.println("toBytes:\n"+bb);} jpayne@68: if(bsw!=null){bsw.print(bb);} jpayne@68: bb.clear(); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: void processInner(ArrayList sketches, ByteStreamWriter bswa[]){ jpayne@68: ByteBuilder bb=new ByteBuilder(); jpayne@68: for(Sketch sk : sketches){ jpayne@68: //final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; jpayne@68: // if(!defaultParams.trackCounts()){sk.keyCounts=null;} jpayne@68: if(blacklist!=null){blackKeys+=sk.applyBlacklist();} jpayne@68: jpayne@68: //Calculating target after applying blacklist gives better consistency with actual usage jpayne@68: final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; jpayne@68: jpayne@68: if(sk.length()>target){ jpayne@68: sk.resize(target); jpayne@68: if(verbose){System.err.println("Resized to:\n"+sk);} jpayne@68: } jpayne@68: if(sk.length()>=minSketchSize){ jpayne@68: keysOut+=sk.length(); jpayne@68: sketchesOut++; jpayne@68: jpayne@68: if(bswa!=null){ jpayne@68: ByteStreamWriter bsw=bswa[sk.sketchID%files]; jpayne@68: if(sk.fname()!=null && sk.fname().endsWith(".sketch")){sk.setFname(bsw.fname);} jpayne@68: sk.toBytes(bb);//This is the time-limiting factor; could be multithreaded. jpayne@68: if(verbose){System.err.println("toBytes:\n"+bb);} jpayne@68: bsw.print(bb); jpayne@68: } jpayne@68: bb.clear(); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private static boolean addFiles(String a, Collection list){ jpayne@68: int initial=list.size(); jpayne@68: if(a==null){return false;} jpayne@68: File f=null; jpayne@68: if(a.indexOf(',')>=0){f=new File(a);} jpayne@68: if(f==null || f.exists()){ jpayne@68: list.add(a); jpayne@68: }else{ jpayne@68: for(String s : a.split(",")){ jpayne@68: list.add(s); jpayne@68: } jpayne@68: } jpayne@68: return list.size()>initial; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private LinkedHashSet in=new LinkedHashSet(); jpayne@68: jpayne@68: private String outSketch=null; jpayne@68: jpayne@68: private final SketchTool tool; jpayne@68: jpayne@68: private ArrayList inSketches; jpayne@68: jpayne@68: private long keysOut=0; jpayne@68: private long sketchesOut=0; jpayne@68: private long blackKeys=0; jpayne@68: jpayne@68: private int files=31; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Print status messages to this output stream */ jpayne@68: private PrintStream outstream=System.err; jpayne@68: /** Print verbose messages */ jpayne@68: public static boolean verbose=false; jpayne@68: /** True if an error was encountered */ jpayne@68: public boolean errorState=false; jpayne@68: /** Overwrite existing output files */ jpayne@68: private boolean overwrite=false; jpayne@68: /** Append to existing output files */ jpayne@68: private boolean append=false; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Static Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Don't print caught exceptions */ jpayne@68: public static boolean suppressErrors=false; jpayne@68: jpayne@68: }