annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/MergeSketch.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6 import java.util.Collection;
jpayne@68 7
jpayne@68 8 import fileIO.ByteFile;
jpayne@68 9 import fileIO.ByteStreamWriter;
jpayne@68 10 import fileIO.FileFormat;
jpayne@68 11 import fileIO.ReadWrite;
jpayne@68 12 import shared.Parse;
jpayne@68 13 import shared.Parser;
jpayne@68 14 import shared.PreParser;
jpayne@68 15 import shared.ReadStats;
jpayne@68 16 import shared.Shared;
jpayne@68 17 import shared.Timer;
jpayne@68 18 import shared.Tools;
jpayne@68 19 import structures.ByteBuilder;
jpayne@68 20
jpayne@68 21 /**
jpayne@68 22 * Combines multiple sketches into a single sketch.
jpayne@68 23 *
jpayne@68 24 * @author Brian Bushnell
jpayne@68 25 * @date July 23, 2018
jpayne@68 26 *
jpayne@68 27 */
jpayne@68 28 public class MergeSketch extends SketchObject {
jpayne@68 29
jpayne@68 30 /*--------------------------------------------------------------*/
jpayne@68 31 /*---------------- Initialization ----------------*/
jpayne@68 32 /*--------------------------------------------------------------*/
jpayne@68 33
jpayne@68 34 /**
jpayne@68 35 * Code entrance from the command line.
jpayne@68 36 * @param args Command line arguments
jpayne@68 37 */
jpayne@68 38 public static void main(String[] args){
jpayne@68 39 //Start a timer immediately upon code entrance.
jpayne@68 40 Timer t=new Timer();
jpayne@68 41
jpayne@68 42 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
jpayne@68 43 final int oldBufLen=Shared.bufferLen();
jpayne@68 44
jpayne@68 45 //Create an instance of this class
jpayne@68 46 MergeSketch x=new MergeSketch(args);
jpayne@68 47
jpayne@68 48 //Run the object
jpayne@68 49 x.process(t);
jpayne@68 50
jpayne@68 51 ReadWrite.USE_UNPIGZ=oldUnpigz;
jpayne@68 52 Shared.setBufferLen(oldBufLen);
jpayne@68 53
jpayne@68 54 //Close the print stream if it was redirected
jpayne@68 55 Shared.closeStream(x.outstream);
jpayne@68 56
jpayne@68 57 assert(!x.errorState) : "This program ended in an error state.";
jpayne@68 58 }
jpayne@68 59
jpayne@68 60 /**
jpayne@68 61 * Constructor.
jpayne@68 62 * @param args Command line arguments
jpayne@68 63 */
jpayne@68 64 public MergeSketch(String[] args){
jpayne@68 65
jpayne@68 66 {//Preparse block for help, config files, and outstream
jpayne@68 67 PreParser pp=new PreParser(args, null, false);
jpayne@68 68 args=pp.args;
jpayne@68 69 outstream=pp.outstream;
jpayne@68 70 }
jpayne@68 71
jpayne@68 72 //Set shared static variables
jpayne@68 73 ReadWrite.USE_UNPIGZ=true;
jpayne@68 74 KILL_OK=true;
jpayne@68 75
jpayne@68 76 //Create a parser object
jpayne@68 77 Parser parser=new Parser();
jpayne@68 78 parser.out1="stdout.txt";
jpayne@68 79
jpayne@68 80 defaultParams.printRefFileName=true;
jpayne@68 81
jpayne@68 82 //Parse each argument
jpayne@68 83 for(int i=0; i<args.length; i++){
jpayne@68 84 String arg=args[i];
jpayne@68 85
jpayne@68 86 //Break arguments into their constituent parts, in the form of "a=b"
jpayne@68 87 String[] split=arg.split("=");
jpayne@68 88 String a=split[0].toLowerCase();
jpayne@68 89 String b=split.length>1 ? split[1] : null;
jpayne@68 90
jpayne@68 91 if(a.equals("verbose")){
jpayne@68 92 verbose=Parse.parseBoolean(b);
jpayne@68 93 }else if(a.equals("in")){
jpayne@68 94 addFiles(b, in);
jpayne@68 95 }else if(parseSketchFlags(arg, a, b)){
jpayne@68 96 //Do nothing
jpayne@68 97 }else if(defaultParams.parse(arg, a, b)){
jpayne@68 98 //Do nothing
jpayne@68 99 }
jpayne@68 100 // else if(a.equals("size")){
jpayne@68 101 // size=Parse.parseIntKMG(b);
jpayne@68 102 // }
jpayne@68 103
jpayne@68 104 else if(a.equals("parse_flag_goes_here")){
jpayne@68 105 long fake_variable=Parse.parseKMG(b);
jpayne@68 106 //Set a variable here
jpayne@68 107 }
jpayne@68 108
jpayne@68 109 else if(a.equals("name") || a.equals("taxname")){
jpayne@68 110 outTaxName=b;
jpayne@68 111 }else if(a.equals("name0")){
jpayne@68 112 outName0=b;
jpayne@68 113 }else if(a.equals("fname")){
jpayne@68 114 outFname=b;
jpayne@68 115 }else if(a.equals("taxid") || a.equals("tid")){
jpayne@68 116 outTaxID=Integer.parseInt(b);
jpayne@68 117 }else if(a.equals("spid")){
jpayne@68 118 outSpid=Integer.parseInt(b);
jpayne@68 119 }else if(a.equals("imgid")){
jpayne@68 120 outImgID=Integer.parseInt(b);
jpayne@68 121 }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){
jpayne@68 122 if(outMeta==null){outMeta=new ArrayList<String>();}
jpayne@68 123 int underscore=a.indexOf('_', 0);
jpayne@68 124 outMeta.add(a.substring(underscore+1)+":"+b);
jpayne@68 125 }
jpayne@68 126
jpayne@68 127 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){
jpayne@68 128 outSketch=b;
jpayne@68 129 }
jpayne@68 130
jpayne@68 131 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
jpayne@68 132 //do nothing
jpayne@68 133 }
jpayne@68 134
jpayne@68 135 else if(b==null && new File(arg).exists()){
jpayne@68 136 in.add(arg);
jpayne@68 137 }
jpayne@68 138
jpayne@68 139 else{
jpayne@68 140 outstream.println("Unknown parameter "+args[i]);
jpayne@68 141 assert(false) : "Unknown parameter "+args[i];
jpayne@68 142 }
jpayne@68 143 }
jpayne@68 144 outMeta=SketchObject.fixMeta(outMeta);
jpayne@68 145
jpayne@68 146 blacklist=null;
jpayne@68 147
jpayne@68 148 postParse();
jpayne@68 149
jpayne@68 150 {//Process parser fields
jpayne@68 151 overwrite=ReadStats.overwrite=parser.overwrite;
jpayne@68 152 append=ReadStats.append=parser.append;
jpayne@68 153 }
jpayne@68 154
jpayne@68 155 //Ensure there is an input file
jpayne@68 156 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 157
jpayne@68 158 //Adjust the number of threads for input file reading
jpayne@68 159 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 160 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 161 }
jpayne@68 162
jpayne@68 163 ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false);
jpayne@68 164 if(ffout!=null && !ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;}
jpayne@68 165
jpayne@68 166 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){
jpayne@68 167 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n");
jpayne@68 168 }
jpayne@68 169
jpayne@68 170 //Ensure that no file was specified multiple times
jpayne@68 171 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){
jpayne@68 172 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 173 }
jpayne@68 174
jpayne@68 175 tool=new SketchTool(targetSketchSize, defaultParams);
jpayne@68 176
jpayne@68 177 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION;
jpayne@68 178 if(verbose){
jpayne@68 179 if(useWhitelist){outstream.println("Using a whitelist.");}
jpayne@68 180 if(blacklist!=null){outstream.println("Using a blacklist.");}
jpayne@68 181 }
jpayne@68 182
jpayne@68 183 defaultParams.postParse(false, false);
jpayne@68 184 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2);
jpayne@68 185 if(!allowMultithreadedFastq){Shared.capBufferLen(40);}
jpayne@68 186 }
jpayne@68 187
jpayne@68 188 /*--------------------------------------------------------------*/
jpayne@68 189 /*---------------- Outer Methods ----------------*/
jpayne@68 190 /*--------------------------------------------------------------*/
jpayne@68 191
jpayne@68 192 private void process(Timer t){
jpayne@68 193 Timer ttotal=new Timer();
jpayne@68 194
jpayne@68 195 t.start();
jpayne@68 196 inSketches=tool.loadSketches_MT(defaultParams, in);
jpayne@68 197 final int numLoaded=(inSketches.size());
jpayne@68 198 long sum=0;
jpayne@68 199 for(Sketch sk : inSketches){
jpayne@68 200 sum+=sk.length();
jpayne@68 201 }
jpayne@68 202 t.stop();
jpayne@68 203 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t);
jpayne@68 204 t.start();
jpayne@68 205 // outstream.println(inSketches.get(0));
jpayne@68 206
jpayne@68 207 ByteBuilder bb=new ByteBuilder();
jpayne@68 208
jpayne@68 209 int sizeOut=(int)(Sketch.AUTOSIZE ? sum : Tools.min(Sketch.targetSketchSize, sum));
jpayne@68 210 {
jpayne@68 211 Sketch.AUTOSIZE=false;
jpayne@68 212 Sketch.targetSketchSize=sizeOut;
jpayne@68 213 Sketch.maxGenomeFraction=1;
jpayne@68 214 }
jpayne@68 215 SketchHeap heap=new SketchHeap(sizeOut, 0, tool.trackCounts);
jpayne@68 216 for(Sketch sk : inSketches){
jpayne@68 217 heap.add(sk);
jpayne@68 218 }
jpayne@68 219 heap.genomeSizeKmers=Tools.max(heap.genomeSizeKmers, sizeOut);
jpayne@68 220 ArrayList<String> meta=inSketches.get(0).meta;
jpayne@68 221 if(meta==null){meta=outMeta;}
jpayne@68 222 else if(outMeta!=null){meta.addAll(outMeta);}
jpayne@68 223 Sketch union=new Sketch(heap, false, tool.trackCounts, outMeta);
jpayne@68 224
jpayne@68 225 if(outTaxName!=null){union.setTaxName(outTaxName);}
jpayne@68 226 if(outFname!=null){union.setFname(outFname);}
jpayne@68 227 if(outName0!=null){union.setName0(outName0);}
jpayne@68 228
jpayne@68 229 if(outTaxID>=0){union.taxID=(outTaxID);}
jpayne@68 230 if(outSpid>=0){union.spid=(outSpid);}
jpayne@68 231 if(outImgID>=0){union.imgID=(outImgID);}
jpayne@68 232
jpayne@68 233 if(outSketch!=null){
jpayne@68 234 ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH);
jpayne@68 235 bsw.start();
jpayne@68 236 union.toBytes(bb);
jpayne@68 237 bsw.print(bb);
jpayne@68 238 bb.clear();
jpayne@68 239 bsw.poisonAndWait();
jpayne@68 240 errorState|=bsw.errorState;
jpayne@68 241 t.stop();
jpayne@68 242 outstream.println("Wrote "+1+" sketch of total size "+union.length()+" in \t"+t);
jpayne@68 243 }
jpayne@68 244
jpayne@68 245 t.stop();
jpayne@68 246 // outstream.println("\nRan "+(inSketches.size()*refSketches.size())+" comparisons in \t"+t);
jpayne@68 247 ttotal.stop();
jpayne@68 248 outstream.println("Total Time: \t"+ttotal);
jpayne@68 249 }
jpayne@68 250
jpayne@68 251
jpayne@68 252 /*--------------------------------------------------------------*/
jpayne@68 253 /*---------------- Inner Methods ----------------*/
jpayne@68 254 /*--------------------------------------------------------------*/
jpayne@68 255
jpayne@68 256 private static boolean addFiles(String a, Collection<String> list){
jpayne@68 257 int initial=list.size();
jpayne@68 258 if(a==null){return false;}
jpayne@68 259 File f=null;
jpayne@68 260 if(a.indexOf(',')>=0){f=new File(a);}
jpayne@68 261 if(f==null || f.exists()){
jpayne@68 262 list.add(a);
jpayne@68 263 }else{
jpayne@68 264 for(String s : a.split(",")){
jpayne@68 265 list.add(s);
jpayne@68 266 }
jpayne@68 267 }
jpayne@68 268 return list.size()>initial;
jpayne@68 269 }
jpayne@68 270
jpayne@68 271 /*--------------------------------------------------------------*/
jpayne@68 272 /*---------------- Fields ----------------*/
jpayne@68 273 /*--------------------------------------------------------------*/
jpayne@68 274
jpayne@68 275 private ArrayList<String> in=new ArrayList<String>();
jpayne@68 276
jpayne@68 277 private String outSketch=null;
jpayne@68 278
jpayne@68 279 private final SketchTool tool;
jpayne@68 280
jpayne@68 281 private ArrayList<Sketch> inSketches;
jpayne@68 282
jpayne@68 283 /*Override metadata */
jpayne@68 284 private String outTaxName=null;
jpayne@68 285 private String outFname=null;
jpayne@68 286 private String outName0=null;
jpayne@68 287 private int outTaxID=-1;
jpayne@68 288 private long outSpid=-1;
jpayne@68 289 private long outImgID=-1;
jpayne@68 290 private ArrayList<String> outMeta=null;
jpayne@68 291
jpayne@68 292 /*--------------------------------------------------------------*/
jpayne@68 293 /*---------------- Final Fields ----------------*/
jpayne@68 294 /*--------------------------------------------------------------*/
jpayne@68 295
jpayne@68 296 /** Primary output file */
jpayne@68 297 private final FileFormat ffout;
jpayne@68 298
jpayne@68 299 /*--------------------------------------------------------------*/
jpayne@68 300 /*---------------- Common Fields ----------------*/
jpayne@68 301 /*--------------------------------------------------------------*/
jpayne@68 302
jpayne@68 303 /** Print status messages to this output stream */
jpayne@68 304 private PrintStream outstream=System.err;
jpayne@68 305 /** Print verbose messages */
jpayne@68 306 public static boolean verbose=false;
jpayne@68 307 /** True if an error was encountered */
jpayne@68 308 public boolean errorState=false;
jpayne@68 309 /** Overwrite existing output files */
jpayne@68 310 private boolean overwrite=false;
jpayne@68 311 /** Append to existing output files */
jpayne@68 312 private boolean append=false;
jpayne@68 313
jpayne@68 314 /*--------------------------------------------------------------*/
jpayne@68 315 /*---------------- Static Fields ----------------*/
jpayne@68 316 /*--------------------------------------------------------------*/
jpayne@68 317
jpayne@68 318 /** Don't print caught exceptions */
jpayne@68 319 public static boolean suppressErrors=false;
jpayne@68 320
jpayne@68 321 }