Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/MergeSketch.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Collection; | |
7 | |
8 import fileIO.ByteFile; | |
9 import fileIO.ByteStreamWriter; | |
10 import fileIO.FileFormat; | |
11 import fileIO.ReadWrite; | |
12 import shared.Parse; | |
13 import shared.Parser; | |
14 import shared.PreParser; | |
15 import shared.ReadStats; | |
16 import shared.Shared; | |
17 import shared.Timer; | |
18 import shared.Tools; | |
19 import structures.ByteBuilder; | |
20 | |
21 /** | |
22 * Combines multiple sketches into a single sketch. | |
23 * | |
24 * @author Brian Bushnell | |
25 * @date July 23, 2018 | |
26 * | |
27 */ | |
28 public class MergeSketch extends SketchObject { | |
29 | |
30 /*--------------------------------------------------------------*/ | |
31 /*---------------- Initialization ----------------*/ | |
32 /*--------------------------------------------------------------*/ | |
33 | |
34 /** | |
35 * Code entrance from the command line. | |
36 * @param args Command line arguments | |
37 */ | |
38 public static void main(String[] args){ | |
39 //Start a timer immediately upon code entrance. | |
40 Timer t=new Timer(); | |
41 | |
42 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; | |
43 final int oldBufLen=Shared.bufferLen(); | |
44 | |
45 //Create an instance of this class | |
46 MergeSketch x=new MergeSketch(args); | |
47 | |
48 //Run the object | |
49 x.process(t); | |
50 | |
51 ReadWrite.USE_UNPIGZ=oldUnpigz; | |
52 Shared.setBufferLen(oldBufLen); | |
53 | |
54 //Close the print stream if it was redirected | |
55 Shared.closeStream(x.outstream); | |
56 | |
57 assert(!x.errorState) : "This program ended in an error state."; | |
58 } | |
59 | |
60 /** | |
61 * Constructor. | |
62 * @param args Command line arguments | |
63 */ | |
64 public MergeSketch(String[] args){ | |
65 | |
66 {//Preparse block for help, config files, and outstream | |
67 PreParser pp=new PreParser(args, null, false); | |
68 args=pp.args; | |
69 outstream=pp.outstream; | |
70 } | |
71 | |
72 //Set shared static variables | |
73 ReadWrite.USE_UNPIGZ=true; | |
74 KILL_OK=true; | |
75 | |
76 //Create a parser object | |
77 Parser parser=new Parser(); | |
78 parser.out1="stdout.txt"; | |
79 | |
80 defaultParams.printRefFileName=true; | |
81 | |
82 //Parse each argument | |
83 for(int i=0; i<args.length; i++){ | |
84 String arg=args[i]; | |
85 | |
86 //Break arguments into their constituent parts, in the form of "a=b" | |
87 String[] split=arg.split("="); | |
88 String a=split[0].toLowerCase(); | |
89 String b=split.length>1 ? split[1] : null; | |
90 | |
91 if(a.equals("verbose")){ | |
92 verbose=Parse.parseBoolean(b); | |
93 }else if(a.equals("in")){ | |
94 addFiles(b, in); | |
95 }else if(parseSketchFlags(arg, a, b)){ | |
96 //Do nothing | |
97 }else if(defaultParams.parse(arg, a, b)){ | |
98 //Do nothing | |
99 } | |
100 // else if(a.equals("size")){ | |
101 // size=Parse.parseIntKMG(b); | |
102 // } | |
103 | |
104 else if(a.equals("parse_flag_goes_here")){ | |
105 long fake_variable=Parse.parseKMG(b); | |
106 //Set a variable here | |
107 } | |
108 | |
109 else if(a.equals("name") || a.equals("taxname")){ | |
110 outTaxName=b; | |
111 }else if(a.equals("name0")){ | |
112 outName0=b; | |
113 }else if(a.equals("fname")){ | |
114 outFname=b; | |
115 }else if(a.equals("taxid") || a.equals("tid")){ | |
116 outTaxID=Integer.parseInt(b); | |
117 }else if(a.equals("spid")){ | |
118 outSpid=Integer.parseInt(b); | |
119 }else if(a.equals("imgid")){ | |
120 outImgID=Integer.parseInt(b); | |
121 }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){ | |
122 if(outMeta==null){outMeta=new ArrayList<String>();} | |
123 int underscore=a.indexOf('_', 0); | |
124 outMeta.add(a.substring(underscore+1)+":"+b); | |
125 } | |
126 | |
127 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){ | |
128 outSketch=b; | |
129 } | |
130 | |
131 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser | |
132 //do nothing | |
133 } | |
134 | |
135 else if(b==null && new File(arg).exists()){ | |
136 in.add(arg); | |
137 } | |
138 | |
139 else{ | |
140 outstream.println("Unknown parameter "+args[i]); | |
141 assert(false) : "Unknown parameter "+args[i]; | |
142 } | |
143 } | |
144 outMeta=SketchObject.fixMeta(outMeta); | |
145 | |
146 blacklist=null; | |
147 | |
148 postParse(); | |
149 | |
150 {//Process parser fields | |
151 overwrite=ReadStats.overwrite=parser.overwrite; | |
152 append=ReadStats.append=parser.append; | |
153 } | |
154 | |
155 //Ensure there is an input file | |
156 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} | |
157 | |
158 //Adjust the number of threads for input file reading | |
159 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
160 ByteFile.FORCE_MODE_BF2=true; | |
161 } | |
162 | |
163 ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false); | |
164 if(ffout!=null && !ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;} | |
165 | |
166 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){ | |
167 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n"); | |
168 } | |
169 | |
170 //Ensure that no file was specified multiple times | |
171 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){ | |
172 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
173 } | |
174 | |
175 tool=new SketchTool(targetSketchSize, defaultParams); | |
176 | |
177 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION; | |
178 if(verbose){ | |
179 if(useWhitelist){outstream.println("Using a whitelist.");} | |
180 if(blacklist!=null){outstream.println("Using a blacklist.");} | |
181 } | |
182 | |
183 defaultParams.postParse(false, false); | |
184 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2); | |
185 if(!allowMultithreadedFastq){Shared.capBufferLen(40);} | |
186 } | |
187 | |
188 /*--------------------------------------------------------------*/ | |
189 /*---------------- Outer Methods ----------------*/ | |
190 /*--------------------------------------------------------------*/ | |
191 | |
192 private void process(Timer t){ | |
193 Timer ttotal=new Timer(); | |
194 | |
195 t.start(); | |
196 inSketches=tool.loadSketches_MT(defaultParams, in); | |
197 final int numLoaded=(inSketches.size()); | |
198 long sum=0; | |
199 for(Sketch sk : inSketches){ | |
200 sum+=sk.length(); | |
201 } | |
202 t.stop(); | |
203 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t); | |
204 t.start(); | |
205 // outstream.println(inSketches.get(0)); | |
206 | |
207 ByteBuilder bb=new ByteBuilder(); | |
208 | |
209 int sizeOut=(int)(Sketch.AUTOSIZE ? sum : Tools.min(Sketch.targetSketchSize, sum)); | |
210 { | |
211 Sketch.AUTOSIZE=false; | |
212 Sketch.targetSketchSize=sizeOut; | |
213 Sketch.maxGenomeFraction=1; | |
214 } | |
215 SketchHeap heap=new SketchHeap(sizeOut, 0, tool.trackCounts); | |
216 for(Sketch sk : inSketches){ | |
217 heap.add(sk); | |
218 } | |
219 heap.genomeSizeKmers=Tools.max(heap.genomeSizeKmers, sizeOut); | |
220 ArrayList<String> meta=inSketches.get(0).meta; | |
221 if(meta==null){meta=outMeta;} | |
222 else if(outMeta!=null){meta.addAll(outMeta);} | |
223 Sketch union=new Sketch(heap, false, tool.trackCounts, outMeta); | |
224 | |
225 if(outTaxName!=null){union.setTaxName(outTaxName);} | |
226 if(outFname!=null){union.setFname(outFname);} | |
227 if(outName0!=null){union.setName0(outName0);} | |
228 | |
229 if(outTaxID>=0){union.taxID=(outTaxID);} | |
230 if(outSpid>=0){union.spid=(outSpid);} | |
231 if(outImgID>=0){union.imgID=(outImgID);} | |
232 | |
233 if(outSketch!=null){ | |
234 ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH); | |
235 bsw.start(); | |
236 union.toBytes(bb); | |
237 bsw.print(bb); | |
238 bb.clear(); | |
239 bsw.poisonAndWait(); | |
240 errorState|=bsw.errorState; | |
241 t.stop(); | |
242 outstream.println("Wrote "+1+" sketch of total size "+union.length()+" in \t"+t); | |
243 } | |
244 | |
245 t.stop(); | |
246 // outstream.println("\nRan "+(inSketches.size()*refSketches.size())+" comparisons in \t"+t); | |
247 ttotal.stop(); | |
248 outstream.println("Total Time: \t"+ttotal); | |
249 } | |
250 | |
251 | |
252 /*--------------------------------------------------------------*/ | |
253 /*---------------- Inner Methods ----------------*/ | |
254 /*--------------------------------------------------------------*/ | |
255 | |
256 private static boolean addFiles(String a, Collection<String> list){ | |
257 int initial=list.size(); | |
258 if(a==null){return false;} | |
259 File f=null; | |
260 if(a.indexOf(',')>=0){f=new File(a);} | |
261 if(f==null || f.exists()){ | |
262 list.add(a); | |
263 }else{ | |
264 for(String s : a.split(",")){ | |
265 list.add(s); | |
266 } | |
267 } | |
268 return list.size()>initial; | |
269 } | |
270 | |
271 /*--------------------------------------------------------------*/ | |
272 /*---------------- Fields ----------------*/ | |
273 /*--------------------------------------------------------------*/ | |
274 | |
275 private ArrayList<String> in=new ArrayList<String>(); | |
276 | |
277 private String outSketch=null; | |
278 | |
279 private final SketchTool tool; | |
280 | |
281 private ArrayList<Sketch> inSketches; | |
282 | |
283 /*Override metadata */ | |
284 private String outTaxName=null; | |
285 private String outFname=null; | |
286 private String outName0=null; | |
287 private int outTaxID=-1; | |
288 private long outSpid=-1; | |
289 private long outImgID=-1; | |
290 private ArrayList<String> outMeta=null; | |
291 | |
292 /*--------------------------------------------------------------*/ | |
293 /*---------------- Final Fields ----------------*/ | |
294 /*--------------------------------------------------------------*/ | |
295 | |
296 /** Primary output file */ | |
297 private final FileFormat ffout; | |
298 | |
299 /*--------------------------------------------------------------*/ | |
300 /*---------------- Common Fields ----------------*/ | |
301 /*--------------------------------------------------------------*/ | |
302 | |
303 /** Print status messages to this output stream */ | |
304 private PrintStream outstream=System.err; | |
305 /** Print verbose messages */ | |
306 public static boolean verbose=false; | |
307 /** True if an error was encountered */ | |
308 public boolean errorState=false; | |
309 /** Overwrite existing output files */ | |
310 private boolean overwrite=false; | |
311 /** Append to existing output files */ | |
312 private boolean append=false; | |
313 | |
314 /*--------------------------------------------------------------*/ | |
315 /*---------------- Static Fields ----------------*/ | |
316 /*--------------------------------------------------------------*/ | |
317 | |
318 /** Don't print caught exceptions */ | |
319 public static boolean suppressErrors=false; | |
320 | |
321 } |