Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SubSketch.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package sketch; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashSet; import fileIO.ByteFile; import fileIO.ByteStreamWriter; import fileIO.FileFormat; import fileIO.ReadWrite; import shared.Parse; import shared.Parser; import shared.PreParser; import shared.ReadStats; import shared.Shared; import shared.Timer; import shared.Tools; import structures.ByteBuilder; /** * Generates smaller sketches from input sketches. * * @author Brian Bushnell * @date July 23, 2018 * */ public class SubSketch extends SketchObject { /*--------------------------------------------------------------*/ /*---------------- Initialization ----------------*/ /*--------------------------------------------------------------*/ /** * Code entrance from the command line. * @param args Command line arguments */ public static void main(String[] args){ //Start a timer immediately upon code entrance. Timer t=new Timer(); final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; final int oldBufLen=Shared.bufferLen(); //Create an instance of this class SubSketch x=new SubSketch(args); //Run the object x.process(t); ReadWrite.USE_UNPIGZ=oldUnpigz; Shared.setBufferLen(oldBufLen); //Close the print stream if it was redirected Shared.closeStream(x.outstream); assert(!x.errorState) : "This program ended in an error state."; } /** * Constructor. * @param args Command line arguments */ public SubSketch(String[] args){ {//Preparse block for help, config files, and outstream PreParser pp=new PreParser(args, null, false); args=pp.args; outstream=pp.outstream; } //Set shared static variables ReadWrite.USE_UNPIGZ=true; KILL_OK=true; //Create a parser object Parser parser=new Parser(); defaultParams.printRefFileName=true; //Parse each argument for(int i=0; i<args.length; i++){ String arg=args[i]; //Break arguments into their constituent parts, in the form of "a=b" String[] split=arg.split("="); String a=split[0].toLowerCase(); String b=split.length>1 ? split[1] : null; if(a.equals("verbose")){ verbose=Parse.parseBoolean(b); }else if(a.equals("in")){ addFiles(b, in); }else if(a.equals("files")){ files=Integer.parseInt(b); }else if(parseSketchFlags(arg, a, b)){ //Do nothing }else if(defaultParams.parse(arg, a, b)){ //Do nothing } // else if(a.equals("size")){ // size=Parse.parseIntKMG(b); // } else if(a.equals("parse_flag_goes_here")){ long fake_variable=Parse.parseKMG(b); //Set a variable here } else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){ outSketch=b; } else if(parser.parse(arg, a, b)){//Parse standard flags in the parser //do nothing } else if(b==null && new File(arg).exists()){ in.add(arg); } else{ outstream.println("Unknown parameter "+args[i]); assert(false) : "Unknown parameter "+args[i]; } } assert(targetSketchSize>0) : "Must set size."; {//Expand # symbol LinkedHashSet<String> expanded=new LinkedHashSet<String>(); for(String s : in){SketchSearcher.addFiles(s, expanded);} in.clear(); in.addAll(expanded); } postParse(); {//Process parser fields overwrite=ReadStats.overwrite=parser.overwrite; append=ReadStats.append=parser.append; } //Ensure there is an input file if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} //Adjust the number of threads for input file reading if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ ByteFile.FORCE_MODE_BF2=true; } if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){ throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n"); } // assert(false) : ffout; //Ensure that no file was specified multiple times if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){ throw new RuntimeException("\nSome file names were specified multiple times.\n"); } tool=new SketchTool(targetSketchSize, defaultParams); // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION; if(verbose || true){ if(useWhitelist){outstream.println("Using a whitelist.");} if(blacklist!=null){outstream.println("Using a blacklist.");} } defaultParams.postParse(false, false); allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2); if(!allowMultithreadedFastq){Shared.capBufferLen(40);} } /*--------------------------------------------------------------*/ /*---------------- Outer Methods ----------------*/ /*--------------------------------------------------------------*/ private void process(Timer t){ Timer ttotal=new Timer(); t.start(); inSketches=tool.loadSketches_MT(defaultParams, in); final int numLoaded=(inSketches.size()); long sum=0; for(Sketch sk : inSketches){ sum+=sk.length(); } t.stop(); outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t); t.start(); if(verbose && numLoaded>0){ System.err.println("First sketch:\n"+inSketches.get(0)); } // outstream.println(inSketches.get(0)); int sizeOut=Sketch.targetSketchSize; { if(Sketch.SET_TARGET_SIZE){Sketch.AUTOSIZE=false;} Sketch.targetSketchSize=sizeOut; Sketch.maxGenomeFraction=1; } if(outSketch!=null && outSketch.indexOf('#')>=1 && files>1){ ByteStreamWriter[] bswArray=new ByteStreamWriter[files]; for(int i=0; i<files; i++){ FileFormat ffout=FileFormat.testOutput(outSketch.replace("#", ""+i), FileFormat.SKETCH, null, false, overwrite, append, false); ByteStreamWriter bsw=new ByteStreamWriter(ffout); bsw.start(); bswArray[i]=bsw; } processInner(inSketches, bswArray); for(ByteStreamWriter bsw : bswArray){ bsw.poisonAndWait(); errorState|=bsw.errorState; } }else{ FileFormat ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false); ByteStreamWriter bsw=null; if(ffout!=null){ bsw=new ByteStreamWriter(ffout); bsw.start(); } processInner(inSketches, bsw); if(bsw!=null){ bsw.poisonAndWait(); errorState|=bsw.errorState; } } t.stop(); if(blacklist!=null){outstream.println("Evicted "+blackKeys+" blacklisted keys.");} outstream.println("Wrote "+sketchesOut+" sketches of total size "+keysOut+" in "+t); t.stop(); ttotal.stop(); outstream.println("Total Time: \t"+ttotal); } void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bsw){ ByteBuilder bb=new ByteBuilder(); for(Sketch sk : sketches){ final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; // if(!defaultParams.trackCounts()){sk.keyCounts=null;} if(blacklist!=null){blackKeys+=sk.applyBlacklist();} if(sk.length()>target){ sk.resize(target); if(verbose){System.err.println("Resized to:\n"+sk);} } if(sk.length()>=minSketchSize){ keysOut+=sk.length(); sketchesOut++; sk.toBytes(bb); if(verbose){System.err.println("toBytes:\n"+bb);} if(bsw!=null){bsw.print(bb);} bb.clear(); } } } void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bswa[]){ ByteBuilder bb=new ByteBuilder(); for(Sketch sk : sketches){ //final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; // if(!defaultParams.trackCounts()){sk.keyCounts=null;} if(blacklist!=null){blackKeys+=sk.applyBlacklist();} //Calculating target after applying blacklist gives better consistency with actual usage final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; if(sk.length()>target){ sk.resize(target); if(verbose){System.err.println("Resized to:\n"+sk);} } if(sk.length()>=minSketchSize){ keysOut+=sk.length(); sketchesOut++; if(bswa!=null){ ByteStreamWriter bsw=bswa[sk.sketchID%files]; if(sk.fname()!=null && sk.fname().endsWith(".sketch")){sk.setFname(bsw.fname);} sk.toBytes(bb);//This is the time-limiting factor; could be multithreaded. if(verbose){System.err.println("toBytes:\n"+bb);} bsw.print(bb); } bb.clear(); } } } /*--------------------------------------------------------------*/ /*---------------- Inner Methods ----------------*/ /*--------------------------------------------------------------*/ private static boolean addFiles(String a, Collection<String> list){ int initial=list.size(); if(a==null){return false;} File f=null; if(a.indexOf(',')>=0){f=new File(a);} if(f==null || f.exists()){ list.add(a); }else{ for(String s : a.split(",")){ list.add(s); } } return list.size()>initial; } /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ private LinkedHashSet<String> in=new LinkedHashSet<String>(); private String outSketch=null; private final SketchTool tool; private ArrayList<Sketch> inSketches; private long keysOut=0; private long sketchesOut=0; private long blackKeys=0; private int files=31; /*--------------------------------------------------------------*/ /*---------------- Final Fields ----------------*/ /*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/ /*---------------- Common Fields ----------------*/ /*--------------------------------------------------------------*/ /** Print status messages to this output stream */ private PrintStream outstream=System.err; /** Print verbose messages */ public static boolean verbose=false; /** True if an error was encountered */ public boolean errorState=false; /** Overwrite existing output files */ private boolean overwrite=false; /** Append to existing output files */ private boolean append=false; /*--------------------------------------------------------------*/ /*---------------- Static Fields ----------------*/ /*--------------------------------------------------------------*/ /** Don't print caught exceptions */ public static boolean suppressErrors=false; }