Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/SubSketch.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Collection; | |
7 import java.util.LinkedHashSet; | |
8 | |
9 import fileIO.ByteFile; | |
10 import fileIO.ByteStreamWriter; | |
11 import fileIO.FileFormat; | |
12 import fileIO.ReadWrite; | |
13 import shared.Parse; | |
14 import shared.Parser; | |
15 import shared.PreParser; | |
16 import shared.ReadStats; | |
17 import shared.Shared; | |
18 import shared.Timer; | |
19 import shared.Tools; | |
20 import structures.ByteBuilder; | |
21 | |
22 /** | |
23 * Generates smaller sketches from input sketches. | |
24 * | |
25 * @author Brian Bushnell | |
26 * @date July 23, 2018 | |
27 * | |
28 */ | |
29 public class SubSketch extends SketchObject { | |
30 | |
31 /*--------------------------------------------------------------*/ | |
32 /*---------------- Initialization ----------------*/ | |
33 /*--------------------------------------------------------------*/ | |
34 | |
35 /** | |
36 * Code entrance from the command line. | |
37 * @param args Command line arguments | |
38 */ | |
39 public static void main(String[] args){ | |
40 //Start a timer immediately upon code entrance. | |
41 Timer t=new Timer(); | |
42 | |
43 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; | |
44 final int oldBufLen=Shared.bufferLen(); | |
45 | |
46 //Create an instance of this class | |
47 SubSketch x=new SubSketch(args); | |
48 | |
49 //Run the object | |
50 x.process(t); | |
51 | |
52 ReadWrite.USE_UNPIGZ=oldUnpigz; | |
53 Shared.setBufferLen(oldBufLen); | |
54 | |
55 //Close the print stream if it was redirected | |
56 Shared.closeStream(x.outstream); | |
57 | |
58 assert(!x.errorState) : "This program ended in an error state."; | |
59 } | |
60 | |
61 /** | |
62 * Constructor. | |
63 * @param args Command line arguments | |
64 */ | |
65 public SubSketch(String[] args){ | |
66 | |
67 {//Preparse block for help, config files, and outstream | |
68 PreParser pp=new PreParser(args, null, false); | |
69 args=pp.args; | |
70 outstream=pp.outstream; | |
71 } | |
72 | |
73 //Set shared static variables | |
74 ReadWrite.USE_UNPIGZ=true; | |
75 KILL_OK=true; | |
76 | |
77 //Create a parser object | |
78 Parser parser=new Parser(); | |
79 | |
80 defaultParams.printRefFileName=true; | |
81 | |
82 //Parse each argument | |
83 for(int i=0; i<args.length; i++){ | |
84 String arg=args[i]; | |
85 | |
86 //Break arguments into their constituent parts, in the form of "a=b" | |
87 String[] split=arg.split("="); | |
88 String a=split[0].toLowerCase(); | |
89 String b=split.length>1 ? split[1] : null; | |
90 | |
91 if(a.equals("verbose")){ | |
92 verbose=Parse.parseBoolean(b); | |
93 }else if(a.equals("in")){ | |
94 addFiles(b, in); | |
95 }else if(a.equals("files")){ | |
96 files=Integer.parseInt(b); | |
97 }else if(parseSketchFlags(arg, a, b)){ | |
98 //Do nothing | |
99 }else if(defaultParams.parse(arg, a, b)){ | |
100 //Do nothing | |
101 } | |
102 // else if(a.equals("size")){ | |
103 // size=Parse.parseIntKMG(b); | |
104 // } | |
105 | |
106 else if(a.equals("parse_flag_goes_here")){ | |
107 long fake_variable=Parse.parseKMG(b); | |
108 //Set a variable here | |
109 } | |
110 | |
111 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){ | |
112 outSketch=b; | |
113 } | |
114 | |
115 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser | |
116 //do nothing | |
117 } | |
118 | |
119 else if(b==null && new File(arg).exists()){ | |
120 in.add(arg); | |
121 } | |
122 | |
123 else{ | |
124 outstream.println("Unknown parameter "+args[i]); | |
125 assert(false) : "Unknown parameter "+args[i]; | |
126 } | |
127 } | |
128 assert(targetSketchSize>0) : "Must set size."; | |
129 | |
130 {//Expand # symbol | |
131 LinkedHashSet<String> expanded=new LinkedHashSet<String>(); | |
132 for(String s : in){SketchSearcher.addFiles(s, expanded);} | |
133 in.clear(); | |
134 in.addAll(expanded); | |
135 } | |
136 | |
137 postParse(); | |
138 | |
139 {//Process parser fields | |
140 overwrite=ReadStats.overwrite=parser.overwrite; | |
141 append=ReadStats.append=parser.append; | |
142 } | |
143 | |
144 //Ensure there is an input file | |
145 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} | |
146 | |
147 //Adjust the number of threads for input file reading | |
148 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
149 ByteFile.FORCE_MODE_BF2=true; | |
150 } | |
151 | |
152 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){ | |
153 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n"); | |
154 } | |
155 // assert(false) : ffout; | |
156 | |
157 //Ensure that no file was specified multiple times | |
158 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){ | |
159 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
160 } | |
161 | |
162 tool=new SketchTool(targetSketchSize, defaultParams); | |
163 | |
164 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION; | |
165 if(verbose || true){ | |
166 if(useWhitelist){outstream.println("Using a whitelist.");} | |
167 if(blacklist!=null){outstream.println("Using a blacklist.");} | |
168 } | |
169 | |
170 defaultParams.postParse(false, false); | |
171 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2); | |
172 if(!allowMultithreadedFastq){Shared.capBufferLen(40);} | |
173 } | |
174 | |
175 /*--------------------------------------------------------------*/ | |
176 /*---------------- Outer Methods ----------------*/ | |
177 /*--------------------------------------------------------------*/ | |
178 | |
179 private void process(Timer t){ | |
180 Timer ttotal=new Timer(); | |
181 | |
182 t.start(); | |
183 inSketches=tool.loadSketches_MT(defaultParams, in); | |
184 final int numLoaded=(inSketches.size()); | |
185 long sum=0; | |
186 for(Sketch sk : inSketches){ | |
187 sum+=sk.length(); | |
188 } | |
189 t.stop(); | |
190 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t); | |
191 t.start(); | |
192 if(verbose && numLoaded>0){ | |
193 System.err.println("First sketch:\n"+inSketches.get(0)); | |
194 } | |
195 // outstream.println(inSketches.get(0)); | |
196 | |
197 int sizeOut=Sketch.targetSketchSize; | |
198 { | |
199 if(Sketch.SET_TARGET_SIZE){Sketch.AUTOSIZE=false;} | |
200 Sketch.targetSketchSize=sizeOut; | |
201 Sketch.maxGenomeFraction=1; | |
202 } | |
203 | |
204 if(outSketch!=null && outSketch.indexOf('#')>=1 && files>1){ | |
205 ByteStreamWriter[] bswArray=new ByteStreamWriter[files]; | |
206 for(int i=0; i<files; i++){ | |
207 FileFormat ffout=FileFormat.testOutput(outSketch.replace("#", ""+i), FileFormat.SKETCH, null, false, overwrite, append, false); | |
208 ByteStreamWriter bsw=new ByteStreamWriter(ffout); | |
209 bsw.start(); | |
210 bswArray[i]=bsw; | |
211 } | |
212 | |
213 processInner(inSketches, bswArray); | |
214 | |
215 for(ByteStreamWriter bsw : bswArray){ | |
216 bsw.poisonAndWait(); | |
217 errorState|=bsw.errorState; | |
218 } | |
219 }else{ | |
220 FileFormat ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false); | |
221 ByteStreamWriter bsw=null; | |
222 if(ffout!=null){ | |
223 bsw=new ByteStreamWriter(ffout); | |
224 bsw.start(); | |
225 } | |
226 | |
227 processInner(inSketches, bsw); | |
228 | |
229 if(bsw!=null){ | |
230 bsw.poisonAndWait(); | |
231 errorState|=bsw.errorState; | |
232 } | |
233 } | |
234 | |
235 t.stop(); | |
236 if(blacklist!=null){outstream.println("Evicted "+blackKeys+" blacklisted keys.");} | |
237 outstream.println("Wrote "+sketchesOut+" sketches of total size "+keysOut+" in "+t); | |
238 | |
239 t.stop(); | |
240 ttotal.stop(); | |
241 outstream.println("Total Time: \t"+ttotal); | |
242 } | |
243 | |
244 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bsw){ | |
245 ByteBuilder bb=new ByteBuilder(); | |
246 for(Sketch sk : sketches){ | |
247 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; | |
248 // if(!defaultParams.trackCounts()){sk.keyCounts=null;} | |
249 if(blacklist!=null){blackKeys+=sk.applyBlacklist();} | |
250 if(sk.length()>target){ | |
251 sk.resize(target); | |
252 if(verbose){System.err.println("Resized to:\n"+sk);} | |
253 } | |
254 if(sk.length()>=minSketchSize){ | |
255 keysOut+=sk.length(); | |
256 sketchesOut++; | |
257 sk.toBytes(bb); | |
258 if(verbose){System.err.println("toBytes:\n"+bb);} | |
259 if(bsw!=null){bsw.print(bb);} | |
260 bb.clear(); | |
261 } | |
262 } | |
263 } | |
264 | |
265 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bswa[]){ | |
266 ByteBuilder bb=new ByteBuilder(); | |
267 for(Sketch sk : sketches){ | |
268 //final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; | |
269 // if(!defaultParams.trackCounts()){sk.keyCounts=null;} | |
270 if(blacklist!=null){blackKeys+=sk.applyBlacklist();} | |
271 | |
272 //Calculating target after applying blacklist gives better consistency with actual usage | |
273 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize; | |
274 | |
275 if(sk.length()>target){ | |
276 sk.resize(target); | |
277 if(verbose){System.err.println("Resized to:\n"+sk);} | |
278 } | |
279 if(sk.length()>=minSketchSize){ | |
280 keysOut+=sk.length(); | |
281 sketchesOut++; | |
282 | |
283 if(bswa!=null){ | |
284 ByteStreamWriter bsw=bswa[sk.sketchID%files]; | |
285 if(sk.fname()!=null && sk.fname().endsWith(".sketch")){sk.setFname(bsw.fname);} | |
286 sk.toBytes(bb);//This is the time-limiting factor; could be multithreaded. | |
287 if(verbose){System.err.println("toBytes:\n"+bb);} | |
288 bsw.print(bb); | |
289 } | |
290 bb.clear(); | |
291 } | |
292 } | |
293 } | |
294 | |
295 /*--------------------------------------------------------------*/ | |
296 /*---------------- Inner Methods ----------------*/ | |
297 /*--------------------------------------------------------------*/ | |
298 | |
299 private static boolean addFiles(String a, Collection<String> list){ | |
300 int initial=list.size(); | |
301 if(a==null){return false;} | |
302 File f=null; | |
303 if(a.indexOf(',')>=0){f=new File(a);} | |
304 if(f==null || f.exists()){ | |
305 list.add(a); | |
306 }else{ | |
307 for(String s : a.split(",")){ | |
308 list.add(s); | |
309 } | |
310 } | |
311 return list.size()>initial; | |
312 } | |
313 | |
314 /*--------------------------------------------------------------*/ | |
315 /*---------------- Fields ----------------*/ | |
316 /*--------------------------------------------------------------*/ | |
317 | |
318 private LinkedHashSet<String> in=new LinkedHashSet<String>(); | |
319 | |
320 private String outSketch=null; | |
321 | |
322 private final SketchTool tool; | |
323 | |
324 private ArrayList<Sketch> inSketches; | |
325 | |
326 private long keysOut=0; | |
327 private long sketchesOut=0; | |
328 private long blackKeys=0; | |
329 | |
330 private int files=31; | |
331 | |
332 /*--------------------------------------------------------------*/ | |
333 /*---------------- Final Fields ----------------*/ | |
334 /*--------------------------------------------------------------*/ | |
335 | |
336 /*--------------------------------------------------------------*/ | |
337 /*---------------- Common Fields ----------------*/ | |
338 /*--------------------------------------------------------------*/ | |
339 | |
340 /** Print status messages to this output stream */ | |
341 private PrintStream outstream=System.err; | |
342 /** Print verbose messages */ | |
343 public static boolean verbose=false; | |
344 /** True if an error was encountered */ | |
345 public boolean errorState=false; | |
346 /** Overwrite existing output files */ | |
347 private boolean overwrite=false; | |
348 /** Append to existing output files */ | |
349 private boolean append=false; | |
350 | |
351 /*--------------------------------------------------------------*/ | |
352 /*---------------- Static Fields ----------------*/ | |
353 /*--------------------------------------------------------------*/ | |
354 | |
355 /** Don't print caught exceptions */ | |
356 public static boolean suppressErrors=false; | |
357 | |
358 } |