comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SubSketch.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.Collection;
7 import java.util.LinkedHashSet;
8
9 import fileIO.ByteFile;
10 import fileIO.ByteStreamWriter;
11 import fileIO.FileFormat;
12 import fileIO.ReadWrite;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.ReadStats;
17 import shared.Shared;
18 import shared.Timer;
19 import shared.Tools;
20 import structures.ByteBuilder;
21
22 /**
23 * Generates smaller sketches from input sketches.
24 *
25 * @author Brian Bushnell
26 * @date July 23, 2018
27 *
28 */
29 public class SubSketch extends SketchObject {
30
31 /*--------------------------------------------------------------*/
32 /*---------------- Initialization ----------------*/
33 /*--------------------------------------------------------------*/
34
35 /**
36 * Code entrance from the command line.
37 * @param args Command line arguments
38 */
39 public static void main(String[] args){
40 //Start a timer immediately upon code entrance.
41 Timer t=new Timer();
42
43 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
44 final int oldBufLen=Shared.bufferLen();
45
46 //Create an instance of this class
47 SubSketch x=new SubSketch(args);
48
49 //Run the object
50 x.process(t);
51
52 ReadWrite.USE_UNPIGZ=oldUnpigz;
53 Shared.setBufferLen(oldBufLen);
54
55 //Close the print stream if it was redirected
56 Shared.closeStream(x.outstream);
57
58 assert(!x.errorState) : "This program ended in an error state.";
59 }
60
61 /**
62 * Constructor.
63 * @param args Command line arguments
64 */
65 public SubSketch(String[] args){
66
67 {//Preparse block for help, config files, and outstream
68 PreParser pp=new PreParser(args, null, false);
69 args=pp.args;
70 outstream=pp.outstream;
71 }
72
73 //Set shared static variables
74 ReadWrite.USE_UNPIGZ=true;
75 KILL_OK=true;
76
77 //Create a parser object
78 Parser parser=new Parser();
79
80 defaultParams.printRefFileName=true;
81
82 //Parse each argument
83 for(int i=0; i<args.length; i++){
84 String arg=args[i];
85
86 //Break arguments into their constituent parts, in the form of "a=b"
87 String[] split=arg.split("=");
88 String a=split[0].toLowerCase();
89 String b=split.length>1 ? split[1] : null;
90
91 if(a.equals("verbose")){
92 verbose=Parse.parseBoolean(b);
93 }else if(a.equals("in")){
94 addFiles(b, in);
95 }else if(a.equals("files")){
96 files=Integer.parseInt(b);
97 }else if(parseSketchFlags(arg, a, b)){
98 //Do nothing
99 }else if(defaultParams.parse(arg, a, b)){
100 //Do nothing
101 }
102 // else if(a.equals("size")){
103 // size=Parse.parseIntKMG(b);
104 // }
105
106 else if(a.equals("parse_flag_goes_here")){
107 long fake_variable=Parse.parseKMG(b);
108 //Set a variable here
109 }
110
111 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){
112 outSketch=b;
113 }
114
115 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
116 //do nothing
117 }
118
119 else if(b==null && new File(arg).exists()){
120 in.add(arg);
121 }
122
123 else{
124 outstream.println("Unknown parameter "+args[i]);
125 assert(false) : "Unknown parameter "+args[i];
126 }
127 }
128 assert(targetSketchSize>0) : "Must set size.";
129
130 {//Expand # symbol
131 LinkedHashSet<String> expanded=new LinkedHashSet<String>();
132 for(String s : in){SketchSearcher.addFiles(s, expanded);}
133 in.clear();
134 in.addAll(expanded);
135 }
136
137 postParse();
138
139 {//Process parser fields
140 overwrite=ReadStats.overwrite=parser.overwrite;
141 append=ReadStats.append=parser.append;
142 }
143
144 //Ensure there is an input file
145 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
146
147 //Adjust the number of threads for input file reading
148 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
149 ByteFile.FORCE_MODE_BF2=true;
150 }
151
152 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){
153 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n");
154 }
155 // assert(false) : ffout;
156
157 //Ensure that no file was specified multiple times
158 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){
159 throw new RuntimeException("\nSome file names were specified multiple times.\n");
160 }
161
162 tool=new SketchTool(targetSketchSize, defaultParams);
163
164 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION;
165 if(verbose || true){
166 if(useWhitelist){outstream.println("Using a whitelist.");}
167 if(blacklist!=null){outstream.println("Using a blacklist.");}
168 }
169
170 defaultParams.postParse(false, false);
171 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2);
172 if(!allowMultithreadedFastq){Shared.capBufferLen(40);}
173 }
174
175 /*--------------------------------------------------------------*/
176 /*---------------- Outer Methods ----------------*/
177 /*--------------------------------------------------------------*/
178
179 private void process(Timer t){
180 Timer ttotal=new Timer();
181
182 t.start();
183 inSketches=tool.loadSketches_MT(defaultParams, in);
184 final int numLoaded=(inSketches.size());
185 long sum=0;
186 for(Sketch sk : inSketches){
187 sum+=sk.length();
188 }
189 t.stop();
190 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t);
191 t.start();
192 if(verbose && numLoaded>0){
193 System.err.println("First sketch:\n"+inSketches.get(0));
194 }
195 // outstream.println(inSketches.get(0));
196
197 int sizeOut=Sketch.targetSketchSize;
198 {
199 if(Sketch.SET_TARGET_SIZE){Sketch.AUTOSIZE=false;}
200 Sketch.targetSketchSize=sizeOut;
201 Sketch.maxGenomeFraction=1;
202 }
203
204 if(outSketch!=null && outSketch.indexOf('#')>=1 && files>1){
205 ByteStreamWriter[] bswArray=new ByteStreamWriter[files];
206 for(int i=0; i<files; i++){
207 FileFormat ffout=FileFormat.testOutput(outSketch.replace("#", ""+i), FileFormat.SKETCH, null, false, overwrite, append, false);
208 ByteStreamWriter bsw=new ByteStreamWriter(ffout);
209 bsw.start();
210 bswArray[i]=bsw;
211 }
212
213 processInner(inSketches, bswArray);
214
215 for(ByteStreamWriter bsw : bswArray){
216 bsw.poisonAndWait();
217 errorState|=bsw.errorState;
218 }
219 }else{
220 FileFormat ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false);
221 ByteStreamWriter bsw=null;
222 if(ffout!=null){
223 bsw=new ByteStreamWriter(ffout);
224 bsw.start();
225 }
226
227 processInner(inSketches, bsw);
228
229 if(bsw!=null){
230 bsw.poisonAndWait();
231 errorState|=bsw.errorState;
232 }
233 }
234
235 t.stop();
236 if(blacklist!=null){outstream.println("Evicted "+blackKeys+" blacklisted keys.");}
237 outstream.println("Wrote "+sketchesOut+" sketches of total size "+keysOut+" in "+t);
238
239 t.stop();
240 ttotal.stop();
241 outstream.println("Total Time: \t"+ttotal);
242 }
243
244 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bsw){
245 ByteBuilder bb=new ByteBuilder();
246 for(Sketch sk : sketches){
247 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
248 // if(!defaultParams.trackCounts()){sk.keyCounts=null;}
249 if(blacklist!=null){blackKeys+=sk.applyBlacklist();}
250 if(sk.length()>target){
251 sk.resize(target);
252 if(verbose){System.err.println("Resized to:\n"+sk);}
253 }
254 if(sk.length()>=minSketchSize){
255 keysOut+=sk.length();
256 sketchesOut++;
257 sk.toBytes(bb);
258 if(verbose){System.err.println("toBytes:\n"+bb);}
259 if(bsw!=null){bsw.print(bb);}
260 bb.clear();
261 }
262 }
263 }
264
265 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bswa[]){
266 ByteBuilder bb=new ByteBuilder();
267 for(Sketch sk : sketches){
268 //final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
269 // if(!defaultParams.trackCounts()){sk.keyCounts=null;}
270 if(blacklist!=null){blackKeys+=sk.applyBlacklist();}
271
272 //Calculating target after applying blacklist gives better consistency with actual usage
273 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
274
275 if(sk.length()>target){
276 sk.resize(target);
277 if(verbose){System.err.println("Resized to:\n"+sk);}
278 }
279 if(sk.length()>=minSketchSize){
280 keysOut+=sk.length();
281 sketchesOut++;
282
283 if(bswa!=null){
284 ByteStreamWriter bsw=bswa[sk.sketchID%files];
285 if(sk.fname()!=null && sk.fname().endsWith(".sketch")){sk.setFname(bsw.fname);}
286 sk.toBytes(bb);//This is the time-limiting factor; could be multithreaded.
287 if(verbose){System.err.println("toBytes:\n"+bb);}
288 bsw.print(bb);
289 }
290 bb.clear();
291 }
292 }
293 }
294
295 /*--------------------------------------------------------------*/
296 /*---------------- Inner Methods ----------------*/
297 /*--------------------------------------------------------------*/
298
299 private static boolean addFiles(String a, Collection<String> list){
300 int initial=list.size();
301 if(a==null){return false;}
302 File f=null;
303 if(a.indexOf(',')>=0){f=new File(a);}
304 if(f==null || f.exists()){
305 list.add(a);
306 }else{
307 for(String s : a.split(",")){
308 list.add(s);
309 }
310 }
311 return list.size()>initial;
312 }
313
314 /*--------------------------------------------------------------*/
315 /*---------------- Fields ----------------*/
316 /*--------------------------------------------------------------*/
317
318 private LinkedHashSet<String> in=new LinkedHashSet<String>();
319
320 private String outSketch=null;
321
322 private final SketchTool tool;
323
324 private ArrayList<Sketch> inSketches;
325
326 private long keysOut=0;
327 private long sketchesOut=0;
328 private long blackKeys=0;
329
330 private int files=31;
331
332 /*--------------------------------------------------------------*/
333 /*---------------- Final Fields ----------------*/
334 /*--------------------------------------------------------------*/
335
336 /*--------------------------------------------------------------*/
337 /*---------------- Common Fields ----------------*/
338 /*--------------------------------------------------------------*/
339
340 /** Print status messages to this output stream */
341 private PrintStream outstream=System.err;
342 /** Print verbose messages */
343 public static boolean verbose=false;
344 /** True if an error was encountered */
345 public boolean errorState=false;
346 /** Overwrite existing output files */
347 private boolean overwrite=false;
348 /** Append to existing output files */
349 private boolean append=false;
350
351 /*--------------------------------------------------------------*/
352 /*---------------- Static Fields ----------------*/
353 /*--------------------------------------------------------------*/
354
355 /** Don't print caught exceptions */
356 public static boolean suppressErrors=false;
357
358 }