comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/CompareSketch.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.Collection;
7 import java.util.LinkedHashSet;
8 import java.util.concurrent.ConcurrentHashMap;
9 import java.util.concurrent.atomic.AtomicInteger;
10
11 import fileIO.ByteFile;
12 import fileIO.ByteStreamWriter;
13 import fileIO.FileFormat;
14 import fileIO.ReadWrite;
15 import kmer.AbstractKmerTableSet;
16 import shared.Parse;
17 import shared.Parser;
18 import shared.PreParser;
19 import shared.ReadStats;
20 import shared.Shared;
21 import shared.Timer;
22 import shared.Tools;
23 import structures.ByteBuilder;
24 import tax.TaxFilter;
25 import tax.TaxTree;
26
27 /**
28 * Compares one or more input sketches to a set of reference sketches.
29 *
30 * @author Brian Bushnell
31 * @date July 29, 2016
32 *
33 */
34 public class CompareSketch extends SketchObject {
35
36
37
38 /*--------------------------------------------------------------*/
39 /*---------------- Initialization ----------------*/
40 /*--------------------------------------------------------------*/
41
42 /**
43 * Code entrance from the command line.
44 * @param args Command line arguments
45 */
46 public static void main(String[] args){
47
48 //Start a timer immediately upon code entrance.
49 Timer t=new Timer();
50
51 final int oldBufLen=Shared.bufferLen();
52
53 //Create an instance of this class
54 CompareSketch x=new CompareSketch(args);
55
56 //Run the object
57 x.process(t);
58
59 Shared.setBufferLen(oldBufLen);
60
61 //Close the print stream if it was redirected
62 Shared.closeStream(x.outstream);
63
64 alignerPool.poison();
65 }
66
67 /**
68 * Constructor.
69 * @param args Command line arguments
70 */
71 public CompareSketch(String[] args){
72
73 {//Preparse block for help, config files, and outstream
74 PreParser pp=new PreParser(args, null, false);
75 // PreParser pp=new PreParser(args, getClass(), false);
76 args=pp.args;
77 outstream=pp.outstream;
78 silent=PreParser.silent;
79 if(silent){AbstractKmerTableSet.DISPLAY_PROGRESS=false;}
80 }
81
82 //Set shared static variables
83 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
84 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
85 KILL_OK=true;
86 TaxFilter.REQUIRE_PRESENT=false;
87 defaultParams.mode=PER_FILE;
88
89 //Create a parser object
90 Parser parser=new Parser();
91 parser.out1="stdout.txt";
92
93 //Parse each argument
94 for(int i=0; i<args.length; i++){
95 String arg=args[i];
96
97 //Break arguments into their constituent parts, in the form of "a=b"
98 String[] split=arg.split("=");
99 String a=split[0].toLowerCase();
100 String b=split.length>1 ? split[1] : null;
101
102 if(a.equals("verbose")){
103 verbose=Parse.parseBoolean(b);
104 }else if(a.equals("in")){
105 addFiles(b, in);
106 }else if(parseSketchFlags(arg, a, b)){
107 //Do nothing
108 }else if(a.equals("parse_flag_goes_here")){
109 long fake_variable=Parse.parseKMG(b);
110 //Set a variable here
111 }else if(a.equals("ordered")){
112 ordered=Parse.parseBoolean(b);
113 }else if(a.equals("alltoall") || a.equals("ata")){
114 allToAll=Parse.parseBoolean(b);
115 }else if(a.equals("skipcompare") || a.equals("sketchonly")){
116 skipCompare=Parse.parseBoolean(b);
117 }else if(a.equals("compareself") || a.equals("includeself")){
118 compareSelf=Parse.parseBoolean(b);
119 }else if(a.equals("printmemory")){
120 printMemory=Parse.parseBoolean(b);
121 }else if(a.equals("parsesubunit")){
122 SketchMaker.parseSubunit=Parse.parseBoolean(b);
123 }
124
125 else if(a.equals("taxtree") || a.equals("tree")){
126 taxTreeFile=b;
127 }
128
129 else if(a.equals("name") || a.equals("taxname")){
130 outTaxName=b;
131 }else if(a.equals("name0")){
132 outName0=b;
133 }else if(a.equals("fname")){
134 outFname=b;
135 }else if(a.equals("outsketch") || a.equals("sketchout") || a.equals("outs") || a.equals("sketch")){
136 outSketch=b;
137 }else if(a.equals("files")){
138 sketchFiles=Integer.parseInt(b);
139 }else if(a.equals("taxid") || a.equals("tid")){
140 outTaxID=Integer.parseInt(b);
141 }else if(a.equals("spid")){
142 outSpid=Integer.parseInt(b);
143 }else if(a.equals("imgid")){
144 outImgID=Integer.parseInt(b);
145 }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){
146 if(outMeta==null){outMeta=new ArrayList<String>();}
147 int underscore=a.indexOf('_', 0);
148 outMeta.add(a.substring(underscore+1)+":"+b);
149 }
150
151 else if(searcher.parse(arg, a, b, false)){
152 // System.err.println("*"+arg);
153 parser.parse(arg, a, b); //Catches shared flags like "threads"
154 Blacklist.parseBlacklist(arg, a, b); //Catches flags like "nt" or "refseq"
155 }
156
157 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
158 //do nothing
159 }
160
161 else if(searcher.parse(arg, a, b, true)){
162 // System.err.println("**"+arg);
163 //do nothing
164 }
165
166 else{
167 outstream.println("Unknown parameter "+args[i]);
168 assert(false) : "Unknown parameter "+args[i];
169 }
170 }
171 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
172
173 outMeta=SketchObject.fixMeta(outMeta);
174 SketchObject.postParse();
175
176 if(skipCompare){
177 allToAll=false;
178 searcher.autoIndex=false;
179 makeIndex=false;
180 in.addAll(searcher.refFiles);
181 searcher.refFiles.clear();
182 }else if(in.isEmpty() && args.length>0 && !allToAll){ //Allows first argument to be used as the input file without in= flag
183 String x=args[0];
184 if(x.indexOf('=')<0 && new File(x).exists() && searcher.refFiles.contains(x)){
185 searcher.refFiles.remove(x);
186 in.add(x);
187 }
188 }
189
190 {//Process parser fields
191 overwrite=ReadStats.overwrite=parser.overwrite;
192 append=ReadStats.append=parser.append;
193
194 out=parser.out1;
195 }
196
197 // assert(false) : in+"\n"+searcher.refFiles;
198
199 if(allToAll){
200 LinkedHashSet<String> set=new LinkedHashSet<String>();
201 set.addAll(in);
202 set.addAll(searcher.refFiles);
203 in.clear();
204 searcher.refFiles.clear();
205 in.addAll(set);
206 searcher.refFiles.addAll(set);
207 }
208
209 //Ensure there is an input file
210 if(in.isEmpty() && !skipCompare){throw new RuntimeException("Error - at least one input file is required.");}
211
212 //Ensure there is an ref file
213 if(searcher.refFiles.isEmpty() && !skipCompare){
214 if(outSketch==null){throw new RuntimeException("Error - at least one reference file is required.");}
215 }
216
217 //Adjust the number of threads for input file reading
218 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
219 ByteFile.FORCE_MODE_BF2=true;
220 }
221
222 ffout=FileFormat.testOutput(out, FileFormat.TEXT, null, false, overwrite, append, ordered);
223 if(!ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;}
224
225 //Ensure input files can be read
226 if(!Tools.testInputFiles(false, true, taxTreeFile)){
227 throw new RuntimeException("\nCan't read some input files.\n");
228 }
229 if(!Tools.testInputFiles(true, false, in.toArray(new String[0]))){
230 if(in.size()==1){
231 String s=in.get(0);
232 String s1=s.replaceFirst("#", "1"), s2=s.replaceFirst("#", "2");
233 Tools.testInputFiles(true, false, s1, s2);
234 }else{
235 throw new RuntimeException("\nCan't read some input files.\n");
236 }
237 }
238
239 // assert(makeIndex || defaultParams.printContam2==false) : "Contam2 requires the flag index=t";
240
241 SSUMap.load(outstream);
242 if(taxTreeFile!=null){setTaxtree(taxTreeFile, silent ? null : outstream);}
243 defaultParams.postParse(true, true);
244 if(!defaultParams.printSSU){processSSU=false;}
245 allowMultithreadedFastq=in.size()<2 && !allToAll;
246 if(!allowMultithreadedFastq){Shared.capBufferLen(40);}
247 // assert(defaultParams.checkValid());
248 }
249
250 /*--------------------------------------------------------------*/
251 /*---------------- Outer Methods ----------------*/
252 /*--------------------------------------------------------------*/
253
254 public void process(Timer t){
255 Timer ttotal=new Timer();
256
257 t.start();
258
259 if(!silent){outstream.println("Loading sketches.");}
260 searcher.makeTool(1, false, defaultParams.mergePairs);
261 SketchTool tool=new SketchTool(targetSketchSize, defaultParams);
262
263 final int mode2=(defaultParams.mode==PER_FILE ? PER_FILE : PER_TAXA);
264 if(skipCompare){
265 makeIndex=false;
266 inSketches=tool.loadSketches_MT(defaultParams, in);
267 }else if(!useWhitelist || allToAll){
268 if(allToAll){
269 makeIndex=searcher.refFileCount()>0 && (makeIndex || defaultParams.needIndex() || searcher.autoIndex);
270 searcher.loadReferences(mode2, defaultParams);
271 inSketches=(ArrayList<Sketch>) searcher.refSketches.clone();
272 }else{
273 inSketches=tool.loadSketches_MT(defaultParams, in);
274
275 for(Sketch sk : inSketches){
276 if(sk.taxID<1 || sk.taxID>=minFakeID || outTaxID>0){sk.taxID=outTaxID;}
277 if(outSpid>0){sk.spid=outSpid;}
278 if(outImgID>0){sk.imgID=outImgID;}
279 if(outTaxName!=null){sk.setTaxName(outTaxName);}
280 if(outFname!=null){sk.setFname(outFname);}
281 if(outName0!=null){sk.setName0(outName0);}
282 if(SketchMaker.parseSubunit && sk.name0()!=null){
283 if(outMeta!=null){
284 sk.meta=(ArrayList<String>)sk.meta.clone();
285 }else if(sk.meta==null){
286 if(sk.name0().contains("SSU_")){
287 sk.addMeta("subunit:ssu");
288 }else if(sk.name0().contains("LSU_")){
289 sk.addMeta("subunit:lsu");
290 }
291 }
292 }
293 sk.setMeta(outMeta);
294 if(defaultParams.printSSU()){sk.loadSSU();}//since taxID was just set
295 }
296
297 if(outTaxID>0){
298 for(Sketch sk : inSketches){
299 if(sk.taxID<1 || sk.taxID>=minFakeID){sk.taxID=outTaxID;}
300 }
301 }
302 makeIndex=searcher.refFileCount()>0 && ((searcher.autoIndex && inSketches.size()>8) || defaultParams.needIndex() || (makeIndex && !searcher.autoIndex));
303 searcher.loadReferences(mode2, defaultParams);
304 if(mode2==PER_FILE){
305 int max=inSketches.size();
306 for(int i=0; i<searcher.refSketches.size(); i++){
307 searcher.refSketches.get(i).sketchID=max+i+1;
308 }
309 }
310 }
311 }else{
312 //assert(searcher.makeIndex && !searcher.autoIndex) : "whitelist=t requires index=t";
313 makeIndex=true; //(searcher.refFileCount()>0); //Index is required in whitelist mode.
314 searcher.loadReferences(mode2, defaultParams);
315 inSketches=tool.loadSketches_MT(defaultParams, in);
316 }
317
318 if(outSketch!=null){
319 writeSketches(outSketch, sketchFiles);
320 }
321
322 final int numLoaded=(inSketches.size()+searcher.refSketchCount())/(allToAll ? 2 : 1);
323 t.stop();
324 if(!silent){outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" in "+t.toString());}
325 if(printMemory){
326 System.gc();
327 Shared.printMemory();
328 }
329
330 if(skipCompare) {
331 ttotal.stop("Total Time: \t");
332 return;
333 }
334
335 t.start();
336
337
338 ByteStreamWriter tsw=(ffout==null ? null : new ByteStreamWriter(ffout));
339 if(tsw!=null){
340 tsw.start();
341 if(defaultParams.format==DisplayParams.FORMAT_QUERY_REF_ANI || defaultParams.format==DisplayParams.FORMAT_CONSTELLATION){
342 String s=defaultParams.header()+"\n";
343 tsw.forcePrint(s.getBytes());
344 }
345 }
346
347 boolean success=true;
348 final int inSize=inSketches.size();
349 if(inSize==1 || Shared.threads()<2 || inSize<4){
350 ByteBuilder sb=new ByteBuilder();
351 success=searcher.compare(inSketches, sb, defaultParams, Shared.threads());
352 success&=(!searcher.errorState);
353 if(tsw!=null){
354 sb.append('\n');
355 if(ordered){
356 tsw.addJob(sb);
357 }else{
358 tsw.println(sb);
359 }
360 }
361 }else{//More sketches than threads, and more than one thread
362 final int threads=Tools.min(Shared.threads(), inSize);
363
364 ArrayList<CompareThread> alct=new ArrayList<CompareThread>(threads);
365 AtomicInteger next=new AtomicInteger(0);
366 for(int i=0; i<threads; i++){
367 alct.add(new CompareThread(i, next, tsw));
368 }
369 for(CompareThread ct : alct){ct.start();}
370 for(CompareThread ct : alct){
371
372 //Wait until this thread has terminated
373 while(ct.getState()!=Thread.State.TERMINATED){
374 try {
375 //Attempt a join operation
376 ct.join();
377 } catch (InterruptedException e) {
378 e.printStackTrace();
379 }
380 }
381
382 synchronized(ct){
383 success&=ct.success;
384 }
385 }
386 alct=null;
387 }
388
389 //Track whether any threads failed
390 if(!success){errorState=true;}
391 if(tsw!=null){errorState|=tsw.poisonAndWait();}
392
393 t.stop();
394 // long comparisons=(makeIndex ? searcher.comparisons.get() :
395 // allToAll ? (inSketches.size()*(long)(inSketches.size()-(compareSelf ? 0 : 1)))
396 // : inSketches.size()*(long)searcher.refSketchCount());
397 long comparisons=searcher.comparisons.get();
398 if(!skipCompare && !silent) {outstream.println("\nRan "+comparisons+" comparison"+(comparisons==1 ? "" : "s")+" in "+t);}
399 ttotal.stop();
400 if(!silent){outstream.println("Total Time: \t"+ttotal);}
401 }
402
403 void writeSketches(String fname, int files){
404 if(fname==null){return;}
405 if(files==1 || fname.indexOf('#')<0){
406 writeOneSketchFile(fname);
407 }else{
408 writeManySketchFiles(fname, files);
409 }
410 }
411
412 void writeOneSketchFile(String fname){
413 if(fname==null){return;}
414 ByteBuilder bb=new ByteBuilder();
415 ByteStreamWriter bsw=new ByteStreamWriter(outSketch, overwrite, append, true, FileFormat.SKETCH);
416 bsw.start();
417 for(Sketch sk : inSketches){
418 sk.toBytes(bb);
419 bsw.print(bb);
420 bb.clear();
421 }
422 bsw.poisonAndWait();
423 errorState|=bsw.errorState;
424 }
425
426 void writeManySketchFiles(String fname, int files){
427 if(fname==null){return;}
428 assert(fname.indexOf('#')>=0) : fname;
429 assert(files>0) : files;
430
431 ByteStreamWriter[] bswa=new ByteStreamWriter[files];
432 for(int i=0; i<files; i++){
433 ByteStreamWriter bsw=new ByteStreamWriter(outSketch.replaceFirst("#", ""+i), overwrite, append, true, FileFormat.SKETCH);
434 bsw.start();
435 bswa[i]=bsw;
436 }
437 for(Sketch sk : inSketches){
438 ByteBuilder bb=new ByteBuilder(4096);
439 sk.toBytes(bb);
440 bswa[sk.sketchID%files].addJob(bb);
441 }
442 for(ByteStreamWriter bsw : bswa){
443 bsw.poisonAndWait();
444 errorState|=bsw.errorState;
445 }
446 }
447
448 /*--------------------------------------------------------------*/
449 /*---------------- Inner Methods ----------------*/
450 /*--------------------------------------------------------------*/
451
452 private static void addFiles(String a, Collection<String> list){
453 if(a==null){return;}
454 File f=null;
455 if(a.indexOf(',')>=0){f=new File(a);}
456 if(f==null || f.exists()){
457 list.add(a);
458 }else{
459 for(String s : a.split(",")){list.add(s);}
460 }
461 }
462
463 /*--------------------------------------------------------------*/
464 /*---------------- Inner Classes ----------------*/
465 /*--------------------------------------------------------------*/
466
467 private class CompareThread extends Thread {
468
469 CompareThread(final int tid_, final AtomicInteger nextSketch_, ByteStreamWriter tsw_){
470 tid=tid_;
471 nextSketch=nextSketch_;
472 tsw=tsw_;
473 }
474
475 @Override
476 public void run(){
477 success=false;
478 final int inLim=inSketches.size();
479 final boolean json=defaultParams.json();
480
481 for(int inNum=nextSketch.getAndIncrement(); inNum<inLim; inNum=nextSketch.getAndIncrement()){
482 Sketch a=inSketches.get(inNum);
483 assert(buffer.cbs==null); //Because this sketch will only be used by one thread at a time, so per-buffer bitsets are not needed.
484 SketchResults sr=searcher.processSketch(a, buffer, fakeID, map, defaultParams, 1);
485 a.clearRefHitCounts();
486
487 if(tsw!=null){
488 ByteBuilder sb=sr.toText(defaultParams);
489 synchronized(tsw){
490 if(ordered){
491 if(json){
492 if(inNum==0){
493 sb.insert(0, (byte)'[');//Rare, slow case
494 }
495 if(inNum<inLim-1){
496 sb.append(',');
497 }else{
498 sb.append(']');
499 }
500 }
501 tsw.add(sb, inNum);
502 }else{
503 if(json){
504 if(resultsPrinted==0){
505 tsw.print('[');
506 }else{
507 sb.insert(0, (byte)',');
508 }
509 }
510 tsw.print(sb);
511 }
512 resultsPrinted++;
513 }
514 }
515 }
516 synchronized(this){success=true;}
517 }
518
519 private final int tid;
520 private final CompareBuffer buffer=new CompareBuffer(false);
521
522 private final AtomicInteger nextSketch;
523 private final AtomicInteger fakeID=new AtomicInteger(minFakeID);
524 private ConcurrentHashMap<Integer, Comparison> map=new ConcurrentHashMap<Integer, Comparison>(101);
525 final ByteStreamWriter tsw;
526
527 boolean success=false;
528
529 }
530
531 /*--------------------------------------------------------------*/
532 /*---------------- Fields ----------------*/
533 /*--------------------------------------------------------------*/
534
535 private ArrayList<String> in=new ArrayList<String>();
536
537 private String out="stdout.txt";
538
539 private String taxTreeFile=null;
540
541 private ArrayList<Sketch> inSketches;
542
543 public final SketchSearcher searcher=new SketchSearcher();
544
545 private boolean printMemory=false;
546 private boolean silent=false;
547
548 /*Override metadata */
549 private String outTaxName=null;
550 private String outFname=null;
551 private String outName0=null;
552 private String outSketch=null;
553 private int sketchFiles=1;
554 private int outTaxID=-1;
555 private long outSpid=-1;
556 private long outImgID=-1;
557 private ArrayList<String> outMeta=null;
558 private long resultsPrinted=0;
559
560 /*--------------------------------------------------------------*/
561 /*---------------- Final Fields ----------------*/
562 /*--------------------------------------------------------------*/
563
564 /** Primary output file */
565 private final FileFormat ffout;
566
567 /*--------------------------------------------------------------*/
568 /*---------------- Common Fields ----------------*/
569 /*--------------------------------------------------------------*/
570
571 /** Print status messages to this output stream */
572 private PrintStream outstream=System.err;
573 /** Print verbose messages */
574 public static boolean verbose=false;
575 /** True if an error was encountered */
576 public boolean errorState=false;
577 /** Overwrite existing output files */
578 private boolean overwrite=false;
579 /** Append to existing output files */
580 private boolean append=false;
581 private boolean ordered=true;
582
583 }