annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SubSketch.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6 import java.util.Collection;
jpayne@68 7 import java.util.LinkedHashSet;
jpayne@68 8
jpayne@68 9 import fileIO.ByteFile;
jpayne@68 10 import fileIO.ByteStreamWriter;
jpayne@68 11 import fileIO.FileFormat;
jpayne@68 12 import fileIO.ReadWrite;
jpayne@68 13 import shared.Parse;
jpayne@68 14 import shared.Parser;
jpayne@68 15 import shared.PreParser;
jpayne@68 16 import shared.ReadStats;
jpayne@68 17 import shared.Shared;
jpayne@68 18 import shared.Timer;
jpayne@68 19 import shared.Tools;
jpayne@68 20 import structures.ByteBuilder;
jpayne@68 21
jpayne@68 22 /**
jpayne@68 23 * Generates smaller sketches from input sketches.
jpayne@68 24 *
jpayne@68 25 * @author Brian Bushnell
jpayne@68 26 * @date July 23, 2018
jpayne@68 27 *
jpayne@68 28 */
jpayne@68 29 public class SubSketch extends SketchObject {
jpayne@68 30
jpayne@68 31 /*--------------------------------------------------------------*/
jpayne@68 32 /*---------------- Initialization ----------------*/
jpayne@68 33 /*--------------------------------------------------------------*/
jpayne@68 34
jpayne@68 35 /**
jpayne@68 36 * Code entrance from the command line.
jpayne@68 37 * @param args Command line arguments
jpayne@68 38 */
jpayne@68 39 public static void main(String[] args){
jpayne@68 40 //Start a timer immediately upon code entrance.
jpayne@68 41 Timer t=new Timer();
jpayne@68 42
jpayne@68 43 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
jpayne@68 44 final int oldBufLen=Shared.bufferLen();
jpayne@68 45
jpayne@68 46 //Create an instance of this class
jpayne@68 47 SubSketch x=new SubSketch(args);
jpayne@68 48
jpayne@68 49 //Run the object
jpayne@68 50 x.process(t);
jpayne@68 51
jpayne@68 52 ReadWrite.USE_UNPIGZ=oldUnpigz;
jpayne@68 53 Shared.setBufferLen(oldBufLen);
jpayne@68 54
jpayne@68 55 //Close the print stream if it was redirected
jpayne@68 56 Shared.closeStream(x.outstream);
jpayne@68 57
jpayne@68 58 assert(!x.errorState) : "This program ended in an error state.";
jpayne@68 59 }
jpayne@68 60
jpayne@68 61 /**
jpayne@68 62 * Constructor.
jpayne@68 63 * @param args Command line arguments
jpayne@68 64 */
jpayne@68 65 public SubSketch(String[] args){
jpayne@68 66
jpayne@68 67 {//Preparse block for help, config files, and outstream
jpayne@68 68 PreParser pp=new PreParser(args, null, false);
jpayne@68 69 args=pp.args;
jpayne@68 70 outstream=pp.outstream;
jpayne@68 71 }
jpayne@68 72
jpayne@68 73 //Set shared static variables
jpayne@68 74 ReadWrite.USE_UNPIGZ=true;
jpayne@68 75 KILL_OK=true;
jpayne@68 76
jpayne@68 77 //Create a parser object
jpayne@68 78 Parser parser=new Parser();
jpayne@68 79
jpayne@68 80 defaultParams.printRefFileName=true;
jpayne@68 81
jpayne@68 82 //Parse each argument
jpayne@68 83 for(int i=0; i<args.length; i++){
jpayne@68 84 String arg=args[i];
jpayne@68 85
jpayne@68 86 //Break arguments into their constituent parts, in the form of "a=b"
jpayne@68 87 String[] split=arg.split("=");
jpayne@68 88 String a=split[0].toLowerCase();
jpayne@68 89 String b=split.length>1 ? split[1] : null;
jpayne@68 90
jpayne@68 91 if(a.equals("verbose")){
jpayne@68 92 verbose=Parse.parseBoolean(b);
jpayne@68 93 }else if(a.equals("in")){
jpayne@68 94 addFiles(b, in);
jpayne@68 95 }else if(a.equals("files")){
jpayne@68 96 files=Integer.parseInt(b);
jpayne@68 97 }else if(parseSketchFlags(arg, a, b)){
jpayne@68 98 //Do nothing
jpayne@68 99 }else if(defaultParams.parse(arg, a, b)){
jpayne@68 100 //Do nothing
jpayne@68 101 }
jpayne@68 102 // else if(a.equals("size")){
jpayne@68 103 // size=Parse.parseIntKMG(b);
jpayne@68 104 // }
jpayne@68 105
jpayne@68 106 else if(a.equals("parse_flag_goes_here")){
jpayne@68 107 long fake_variable=Parse.parseKMG(b);
jpayne@68 108 //Set a variable here
jpayne@68 109 }
jpayne@68 110
jpayne@68 111 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){
jpayne@68 112 outSketch=b;
jpayne@68 113 }
jpayne@68 114
jpayne@68 115 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
jpayne@68 116 //do nothing
jpayne@68 117 }
jpayne@68 118
jpayne@68 119 else if(b==null && new File(arg).exists()){
jpayne@68 120 in.add(arg);
jpayne@68 121 }
jpayne@68 122
jpayne@68 123 else{
jpayne@68 124 outstream.println("Unknown parameter "+args[i]);
jpayne@68 125 assert(false) : "Unknown parameter "+args[i];
jpayne@68 126 }
jpayne@68 127 }
jpayne@68 128 assert(targetSketchSize>0) : "Must set size.";
jpayne@68 129
jpayne@68 130 {//Expand # symbol
jpayne@68 131 LinkedHashSet<String> expanded=new LinkedHashSet<String>();
jpayne@68 132 for(String s : in){SketchSearcher.addFiles(s, expanded);}
jpayne@68 133 in.clear();
jpayne@68 134 in.addAll(expanded);
jpayne@68 135 }
jpayne@68 136
jpayne@68 137 postParse();
jpayne@68 138
jpayne@68 139 {//Process parser fields
jpayne@68 140 overwrite=ReadStats.overwrite=parser.overwrite;
jpayne@68 141 append=ReadStats.append=parser.append;
jpayne@68 142 }
jpayne@68 143
jpayne@68 144 //Ensure there is an input file
jpayne@68 145 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 146
jpayne@68 147 //Adjust the number of threads for input file reading
jpayne@68 148 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 149 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 150 }
jpayne@68 151
jpayne@68 152 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){
jpayne@68 153 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n");
jpayne@68 154 }
jpayne@68 155 // assert(false) : ffout;
jpayne@68 156
jpayne@68 157 //Ensure that no file was specified multiple times
jpayne@68 158 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){
jpayne@68 159 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 160 }
jpayne@68 161
jpayne@68 162 tool=new SketchTool(targetSketchSize, defaultParams);
jpayne@68 163
jpayne@68 164 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION;
jpayne@68 165 if(verbose || true){
jpayne@68 166 if(useWhitelist){outstream.println("Using a whitelist.");}
jpayne@68 167 if(blacklist!=null){outstream.println("Using a blacklist.");}
jpayne@68 168 }
jpayne@68 169
jpayne@68 170 defaultParams.postParse(false, false);
jpayne@68 171 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2);
jpayne@68 172 if(!allowMultithreadedFastq){Shared.capBufferLen(40);}
jpayne@68 173 }
jpayne@68 174
jpayne@68 175 /*--------------------------------------------------------------*/
jpayne@68 176 /*---------------- Outer Methods ----------------*/
jpayne@68 177 /*--------------------------------------------------------------*/
jpayne@68 178
jpayne@68 179 private void process(Timer t){
jpayne@68 180 Timer ttotal=new Timer();
jpayne@68 181
jpayne@68 182 t.start();
jpayne@68 183 inSketches=tool.loadSketches_MT(defaultParams, in);
jpayne@68 184 final int numLoaded=(inSketches.size());
jpayne@68 185 long sum=0;
jpayne@68 186 for(Sketch sk : inSketches){
jpayne@68 187 sum+=sk.length();
jpayne@68 188 }
jpayne@68 189 t.stop();
jpayne@68 190 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t);
jpayne@68 191 t.start();
jpayne@68 192 if(verbose && numLoaded>0){
jpayne@68 193 System.err.println("First sketch:\n"+inSketches.get(0));
jpayne@68 194 }
jpayne@68 195 // outstream.println(inSketches.get(0));
jpayne@68 196
jpayne@68 197 int sizeOut=Sketch.targetSketchSize;
jpayne@68 198 {
jpayne@68 199 if(Sketch.SET_TARGET_SIZE){Sketch.AUTOSIZE=false;}
jpayne@68 200 Sketch.targetSketchSize=sizeOut;
jpayne@68 201 Sketch.maxGenomeFraction=1;
jpayne@68 202 }
jpayne@68 203
jpayne@68 204 if(outSketch!=null && outSketch.indexOf('#')>=1 && files>1){
jpayne@68 205 ByteStreamWriter[] bswArray=new ByteStreamWriter[files];
jpayne@68 206 for(int i=0; i<files; i++){
jpayne@68 207 FileFormat ffout=FileFormat.testOutput(outSketch.replace("#", ""+i), FileFormat.SKETCH, null, false, overwrite, append, false);
jpayne@68 208 ByteStreamWriter bsw=new ByteStreamWriter(ffout);
jpayne@68 209 bsw.start();
jpayne@68 210 bswArray[i]=bsw;
jpayne@68 211 }
jpayne@68 212
jpayne@68 213 processInner(inSketches, bswArray);
jpayne@68 214
jpayne@68 215 for(ByteStreamWriter bsw : bswArray){
jpayne@68 216 bsw.poisonAndWait();
jpayne@68 217 errorState|=bsw.errorState;
jpayne@68 218 }
jpayne@68 219 }else{
jpayne@68 220 FileFormat ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false);
jpayne@68 221 ByteStreamWriter bsw=null;
jpayne@68 222 if(ffout!=null){
jpayne@68 223 bsw=new ByteStreamWriter(ffout);
jpayne@68 224 bsw.start();
jpayne@68 225 }
jpayne@68 226
jpayne@68 227 processInner(inSketches, bsw);
jpayne@68 228
jpayne@68 229 if(bsw!=null){
jpayne@68 230 bsw.poisonAndWait();
jpayne@68 231 errorState|=bsw.errorState;
jpayne@68 232 }
jpayne@68 233 }
jpayne@68 234
jpayne@68 235 t.stop();
jpayne@68 236 if(blacklist!=null){outstream.println("Evicted "+blackKeys+" blacklisted keys.");}
jpayne@68 237 outstream.println("Wrote "+sketchesOut+" sketches of total size "+keysOut+" in "+t);
jpayne@68 238
jpayne@68 239 t.stop();
jpayne@68 240 ttotal.stop();
jpayne@68 241 outstream.println("Total Time: \t"+ttotal);
jpayne@68 242 }
jpayne@68 243
jpayne@68 244 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bsw){
jpayne@68 245 ByteBuilder bb=new ByteBuilder();
jpayne@68 246 for(Sketch sk : sketches){
jpayne@68 247 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
jpayne@68 248 // if(!defaultParams.trackCounts()){sk.keyCounts=null;}
jpayne@68 249 if(blacklist!=null){blackKeys+=sk.applyBlacklist();}
jpayne@68 250 if(sk.length()>target){
jpayne@68 251 sk.resize(target);
jpayne@68 252 if(verbose){System.err.println("Resized to:\n"+sk);}
jpayne@68 253 }
jpayne@68 254 if(sk.length()>=minSketchSize){
jpayne@68 255 keysOut+=sk.length();
jpayne@68 256 sketchesOut++;
jpayne@68 257 sk.toBytes(bb);
jpayne@68 258 if(verbose){System.err.println("toBytes:\n"+bb);}
jpayne@68 259 if(bsw!=null){bsw.print(bb);}
jpayne@68 260 bb.clear();
jpayne@68 261 }
jpayne@68 262 }
jpayne@68 263 }
jpayne@68 264
jpayne@68 265 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bswa[]){
jpayne@68 266 ByteBuilder bb=new ByteBuilder();
jpayne@68 267 for(Sketch sk : sketches){
jpayne@68 268 //final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
jpayne@68 269 // if(!defaultParams.trackCounts()){sk.keyCounts=null;}
jpayne@68 270 if(blacklist!=null){blackKeys+=sk.applyBlacklist();}
jpayne@68 271
jpayne@68 272 //Calculating target after applying blacklist gives better consistency with actual usage
jpayne@68 273 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
jpayne@68 274
jpayne@68 275 if(sk.length()>target){
jpayne@68 276 sk.resize(target);
jpayne@68 277 if(verbose){System.err.println("Resized to:\n"+sk);}
jpayne@68 278 }
jpayne@68 279 if(sk.length()>=minSketchSize){
jpayne@68 280 keysOut+=sk.length();
jpayne@68 281 sketchesOut++;
jpayne@68 282
jpayne@68 283 if(bswa!=null){
jpayne@68 284 ByteStreamWriter bsw=bswa[sk.sketchID%files];
jpayne@68 285 if(sk.fname()!=null && sk.fname().endsWith(".sketch")){sk.setFname(bsw.fname);}
jpayne@68 286 sk.toBytes(bb);//This is the time-limiting factor; could be multithreaded.
jpayne@68 287 if(verbose){System.err.println("toBytes:\n"+bb);}
jpayne@68 288 bsw.print(bb);
jpayne@68 289 }
jpayne@68 290 bb.clear();
jpayne@68 291 }
jpayne@68 292 }
jpayne@68 293 }
jpayne@68 294
jpayne@68 295 /*--------------------------------------------------------------*/
jpayne@68 296 /*---------------- Inner Methods ----------------*/
jpayne@68 297 /*--------------------------------------------------------------*/
jpayne@68 298
jpayne@68 299 private static boolean addFiles(String a, Collection<String> list){
jpayne@68 300 int initial=list.size();
jpayne@68 301 if(a==null){return false;}
jpayne@68 302 File f=null;
jpayne@68 303 if(a.indexOf(',')>=0){f=new File(a);}
jpayne@68 304 if(f==null || f.exists()){
jpayne@68 305 list.add(a);
jpayne@68 306 }else{
jpayne@68 307 for(String s : a.split(",")){
jpayne@68 308 list.add(s);
jpayne@68 309 }
jpayne@68 310 }
jpayne@68 311 return list.size()>initial;
jpayne@68 312 }
jpayne@68 313
jpayne@68 314 /*--------------------------------------------------------------*/
jpayne@68 315 /*---------------- Fields ----------------*/
jpayne@68 316 /*--------------------------------------------------------------*/
jpayne@68 317
jpayne@68 318 private LinkedHashSet<String> in=new LinkedHashSet<String>();
jpayne@68 319
jpayne@68 320 private String outSketch=null;
jpayne@68 321
jpayne@68 322 private final SketchTool tool;
jpayne@68 323
jpayne@68 324 private ArrayList<Sketch> inSketches;
jpayne@68 325
jpayne@68 326 private long keysOut=0;
jpayne@68 327 private long sketchesOut=0;
jpayne@68 328 private long blackKeys=0;
jpayne@68 329
jpayne@68 330 private int files=31;
jpayne@68 331
jpayne@68 332 /*--------------------------------------------------------------*/
jpayne@68 333 /*---------------- Final Fields ----------------*/
jpayne@68 334 /*--------------------------------------------------------------*/
jpayne@68 335
jpayne@68 336 /*--------------------------------------------------------------*/
jpayne@68 337 /*---------------- Common Fields ----------------*/
jpayne@68 338 /*--------------------------------------------------------------*/
jpayne@68 339
jpayne@68 340 /** Print status messages to this output stream */
jpayne@68 341 private PrintStream outstream=System.err;
jpayne@68 342 /** Print verbose messages */
jpayne@68 343 public static boolean verbose=false;
jpayne@68 344 /** True if an error was encountered */
jpayne@68 345 public boolean errorState=false;
jpayne@68 346 /** Overwrite existing output files */
jpayne@68 347 private boolean overwrite=false;
jpayne@68 348 /** Append to existing output files */
jpayne@68 349 private boolean append=false;
jpayne@68 350
jpayne@68 351 /*--------------------------------------------------------------*/
jpayne@68 352 /*---------------- Static Fields ----------------*/
jpayne@68 353 /*--------------------------------------------------------------*/
jpayne@68 354
jpayne@68 355 /** Don't print caught exceptions */
jpayne@68 356 public static boolean suppressErrors=false;
jpayne@68 357
jpayne@68 358 }