comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/MergeSketch.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.Collection;
7
8 import fileIO.ByteFile;
9 import fileIO.ByteStreamWriter;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import shared.Parse;
13 import shared.Parser;
14 import shared.PreParser;
15 import shared.ReadStats;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import structures.ByteBuilder;
20
21 /**
22 * Combines multiple sketches into a single sketch.
23 *
24 * @author Brian Bushnell
25 * @date July 23, 2018
26 *
27 */
28 public class MergeSketch extends SketchObject {
29
30 /*--------------------------------------------------------------*/
31 /*---------------- Initialization ----------------*/
32 /*--------------------------------------------------------------*/
33
34 /**
35 * Code entrance from the command line.
36 * @param args Command line arguments
37 */
38 public static void main(String[] args){
39 //Start a timer immediately upon code entrance.
40 Timer t=new Timer();
41
42 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
43 final int oldBufLen=Shared.bufferLen();
44
45 //Create an instance of this class
46 MergeSketch x=new MergeSketch(args);
47
48 //Run the object
49 x.process(t);
50
51 ReadWrite.USE_UNPIGZ=oldUnpigz;
52 Shared.setBufferLen(oldBufLen);
53
54 //Close the print stream if it was redirected
55 Shared.closeStream(x.outstream);
56
57 assert(!x.errorState) : "This program ended in an error state.";
58 }
59
60 /**
61 * Constructor.
62 * @param args Command line arguments
63 */
64 public MergeSketch(String[] args){
65
66 {//Preparse block for help, config files, and outstream
67 PreParser pp=new PreParser(args, null, false);
68 args=pp.args;
69 outstream=pp.outstream;
70 }
71
72 //Set shared static variables
73 ReadWrite.USE_UNPIGZ=true;
74 KILL_OK=true;
75
76 //Create a parser object
77 Parser parser=new Parser();
78 parser.out1="stdout.txt";
79
80 defaultParams.printRefFileName=true;
81
82 //Parse each argument
83 for(int i=0; i<args.length; i++){
84 String arg=args[i];
85
86 //Break arguments into their constituent parts, in the form of "a=b"
87 String[] split=arg.split("=");
88 String a=split[0].toLowerCase();
89 String b=split.length>1 ? split[1] : null;
90
91 if(a.equals("verbose")){
92 verbose=Parse.parseBoolean(b);
93 }else if(a.equals("in")){
94 addFiles(b, in);
95 }else if(parseSketchFlags(arg, a, b)){
96 //Do nothing
97 }else if(defaultParams.parse(arg, a, b)){
98 //Do nothing
99 }
100 // else if(a.equals("size")){
101 // size=Parse.parseIntKMG(b);
102 // }
103
104 else if(a.equals("parse_flag_goes_here")){
105 long fake_variable=Parse.parseKMG(b);
106 //Set a variable here
107 }
108
109 else if(a.equals("name") || a.equals("taxname")){
110 outTaxName=b;
111 }else if(a.equals("name0")){
112 outName0=b;
113 }else if(a.equals("fname")){
114 outFname=b;
115 }else if(a.equals("taxid") || a.equals("tid")){
116 outTaxID=Integer.parseInt(b);
117 }else if(a.equals("spid")){
118 outSpid=Integer.parseInt(b);
119 }else if(a.equals("imgid")){
120 outImgID=Integer.parseInt(b);
121 }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){
122 if(outMeta==null){outMeta=new ArrayList<String>();}
123 int underscore=a.indexOf('_', 0);
124 outMeta.add(a.substring(underscore+1)+":"+b);
125 }
126
127 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){
128 outSketch=b;
129 }
130
131 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
132 //do nothing
133 }
134
135 else if(b==null && new File(arg).exists()){
136 in.add(arg);
137 }
138
139 else{
140 outstream.println("Unknown parameter "+args[i]);
141 assert(false) : "Unknown parameter "+args[i];
142 }
143 }
144 outMeta=SketchObject.fixMeta(outMeta);
145
146 blacklist=null;
147
148 postParse();
149
150 {//Process parser fields
151 overwrite=ReadStats.overwrite=parser.overwrite;
152 append=ReadStats.append=parser.append;
153 }
154
155 //Ensure there is an input file
156 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
157
158 //Adjust the number of threads for input file reading
159 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
160 ByteFile.FORCE_MODE_BF2=true;
161 }
162
163 ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false);
164 if(ffout!=null && !ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;}
165
166 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){
167 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n");
168 }
169
170 //Ensure that no file was specified multiple times
171 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){
172 throw new RuntimeException("\nSome file names were specified multiple times.\n");
173 }
174
175 tool=new SketchTool(targetSketchSize, defaultParams);
176
177 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION;
178 if(verbose){
179 if(useWhitelist){outstream.println("Using a whitelist.");}
180 if(blacklist!=null){outstream.println("Using a blacklist.");}
181 }
182
183 defaultParams.postParse(false, false);
184 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2);
185 if(!allowMultithreadedFastq){Shared.capBufferLen(40);}
186 }
187
188 /*--------------------------------------------------------------*/
189 /*---------------- Outer Methods ----------------*/
190 /*--------------------------------------------------------------*/
191
192 private void process(Timer t){
193 Timer ttotal=new Timer();
194
195 t.start();
196 inSketches=tool.loadSketches_MT(defaultParams, in);
197 final int numLoaded=(inSketches.size());
198 long sum=0;
199 for(Sketch sk : inSketches){
200 sum+=sk.length();
201 }
202 t.stop();
203 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t);
204 t.start();
205 // outstream.println(inSketches.get(0));
206
207 ByteBuilder bb=new ByteBuilder();
208
209 int sizeOut=(int)(Sketch.AUTOSIZE ? sum : Tools.min(Sketch.targetSketchSize, sum));
210 {
211 Sketch.AUTOSIZE=false;
212 Sketch.targetSketchSize=sizeOut;
213 Sketch.maxGenomeFraction=1;
214 }
215 SketchHeap heap=new SketchHeap(sizeOut, 0, tool.trackCounts);
216 for(Sketch sk : inSketches){
217 heap.add(sk);
218 }
219 heap.genomeSizeKmers=Tools.max(heap.genomeSizeKmers, sizeOut);
220 ArrayList<String> meta=inSketches.get(0).meta;
221 if(meta==null){meta=outMeta;}
222 else if(outMeta!=null){meta.addAll(outMeta);}
223 Sketch union=new Sketch(heap, false, tool.trackCounts, outMeta);
224
225 if(outTaxName!=null){union.setTaxName(outTaxName);}
226 if(outFname!=null){union.setFname(outFname);}
227 if(outName0!=null){union.setName0(outName0);}
228
229 if(outTaxID>=0){union.taxID=(outTaxID);}
230 if(outSpid>=0){union.spid=(outSpid);}
231 if(outImgID>=0){union.imgID=(outImgID);}
232
233 if(outSketch!=null){
234 ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH);
235 bsw.start();
236 union.toBytes(bb);
237 bsw.print(bb);
238 bb.clear();
239 bsw.poisonAndWait();
240 errorState|=bsw.errorState;
241 t.stop();
242 outstream.println("Wrote "+1+" sketch of total size "+union.length()+" in \t"+t);
243 }
244
245 t.stop();
246 // outstream.println("\nRan "+(inSketches.size()*refSketches.size())+" comparisons in \t"+t);
247 ttotal.stop();
248 outstream.println("Total Time: \t"+ttotal);
249 }
250
251
252 /*--------------------------------------------------------------*/
253 /*---------------- Inner Methods ----------------*/
254 /*--------------------------------------------------------------*/
255
256 private static boolean addFiles(String a, Collection<String> list){
257 int initial=list.size();
258 if(a==null){return false;}
259 File f=null;
260 if(a.indexOf(',')>=0){f=new File(a);}
261 if(f==null || f.exists()){
262 list.add(a);
263 }else{
264 for(String s : a.split(",")){
265 list.add(s);
266 }
267 }
268 return list.size()>initial;
269 }
270
271 /*--------------------------------------------------------------*/
272 /*---------------- Fields ----------------*/
273 /*--------------------------------------------------------------*/
274
275 private ArrayList<String> in=new ArrayList<String>();
276
277 private String outSketch=null;
278
279 private final SketchTool tool;
280
281 private ArrayList<Sketch> inSketches;
282
283 /*Override metadata */
284 private String outTaxName=null;
285 private String outFname=null;
286 private String outName0=null;
287 private int outTaxID=-1;
288 private long outSpid=-1;
289 private long outImgID=-1;
290 private ArrayList<String> outMeta=null;
291
292 /*--------------------------------------------------------------*/
293 /*---------------- Final Fields ----------------*/
294 /*--------------------------------------------------------------*/
295
296 /** Primary output file */
297 private final FileFormat ffout;
298
299 /*--------------------------------------------------------------*/
300 /*---------------- Common Fields ----------------*/
301 /*--------------------------------------------------------------*/
302
303 /** Print status messages to this output stream */
304 private PrintStream outstream=System.err;
305 /** Print verbose messages */
306 public static boolean verbose=false;
307 /** True if an error was encountered */
308 public boolean errorState=false;
309 /** Overwrite existing output files */
310 private boolean overwrite=false;
311 /** Append to existing output files */
312 private boolean append=false;
313
314 /*--------------------------------------------------------------*/
315 /*---------------- Static Fields ----------------*/
316 /*--------------------------------------------------------------*/
317
318 /** Don't print caught exceptions */
319 public static boolean suppressErrors=false;
320
321 }