comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package sketch;
2
3 import java.io.PrintStream;
4 import java.util.ArrayList;
5 import java.util.HashMap;
6 import java.util.Map.Entry;
7 import java.util.concurrent.atomic.AtomicInteger;
8
9 import fileIO.ByteFile;
10 import fileIO.ByteStreamWriter;
11 import fileIO.FileFormat;
12 import fileIO.ReadWrite;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import structures.ByteBuilder;
20 import structures.FloatList;
21 import tax.TaxTree;
22 import template.ThreadWaiter;
23
24 /**
25 * @author Brian Bushnell
26 * @date May 9, 2016
27 *
28 */
29 public class AnalyzeSketchResults {
30
31 /*--------------------------------------------------------------*/
32 /*---------------- Initialization ----------------*/
33 /*--------------------------------------------------------------*/
34
35 /**
36 * Code entrance from the command line.
37 * @param args Command line arguments
38 */
39 public static void main(String[] args){
40 //Start a timer immediately upon code entrance.
41 Timer t=new Timer();
42
43 //Create an instance of this class
44 AnalyzeSketchResults x=new AnalyzeSketchResults(args);
45
46 //Run the object
47 x.process(t);
48
49 //Close the print stream if it was redirected
50 Shared.closeStream(x.outstream);
51 }
52
53 /**
54 * Constructor.
55 * @param args Command line arguments
56 */
57 public AnalyzeSketchResults(String[] args){
58
59 {//Preparse block for help, config files, and outstream
60 PreParser pp=new PreParser(args, /*getClass()*/null, false);
61 args=pp.args;
62 outstream=pp.outstream;
63 }
64
65 //Set shared static variables prior to parsing
66 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
67 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
68
69 {//Parse the arguments
70 final Parser parser=parse(args);
71 overwrite=parser.overwrite;
72 append=parser.append;
73
74 in1=parser.in1;
75 in2=parser.in2;
76
77 out1=parser.out1;
78 }
79
80 fixExtensions(); //Add or remove .gz or .bz2 as needed
81 checkFileExistence(); //Ensure files can be read and written
82 checkStatics(); //Adjust file-related static fields as needed for this program
83
84 ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, overwrite, append, false);
85 ffoutMap=FileFormat.testOutput(outMap, FileFormat.TXT, null, true, overwrite, append, false);
86 ffoutAccuracy=FileFormat.testOutput(outAccuracy, FileFormat.TXT, null, true, overwrite, append, false);
87 ffoutBad=FileFormat.testOutput(outBad, FileFormat.TXT, null, true, overwrite, append, false);
88 ffin1=FileFormat.testInput(in1, FileFormat.TXT, null, true, true);
89 ffin2=FileFormat.testInput(in2, FileFormat.TXT, null, true, true);
90
91 recordSets=(ffoutAccuracy==null && ffoutBad==null ? null : new ArrayList<RecordSet>());
92 tree=(treeFile==null) ? null : TaxTree.loadTaxTree(treeFile, outstream, true, false);
93 SSUMap.load(outstream);
94 }
95
96 /*--------------------------------------------------------------*/
97 /*---------------- Initialization Helpers ----------------*/
98 /*--------------------------------------------------------------*/
99
100 /** Parse arguments from the command line */
101 private Parser parse(String[] args){
102
103 Parser parser=new Parser();
104 for(int i=0; i<args.length; i++){
105 String arg=args[i];
106 String[] split=arg.split("=");
107 String a=split[0].toLowerCase();
108 String b=split.length>1 ? split[1] : null;
109 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
110
111 if(a.equals("map") || a.equals("outmap")){
112 outMap=b;
113 }else if(a.equals("accuracy") || a.equals("outacc") || a.equals("outaccuracy")){
114 outAccuracy=b;
115 }else if(a.equals("outbad")){
116 outBad=b;
117 }else if(a.equals("tree")){
118 treeFile=b;
119 }else if(a.equals("shrinkonly")){
120 shrinkOnly=Parse.parseBoolean(b);
121 }else if(a.equals("ssu") || a.equals("ssufile")){
122 assert(false) : "ssu and ssufile are deprecated; please specify 16S or 18S independently";
123 }else if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){
124 SSUMap.r16SFile=b;
125 }else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){
126 SSUMap.r18SFile=b;
127 }else if(a.equals("mash")){
128 mode=MASH_MODE;
129 }else if(a.equals("sourmash")){
130 mode=SOURMASH_MODE;
131 }else if(a.equals("blast")){
132 mode=BLAST_MODE;
133 }else if(a.equals("bbsketch")){
134 mode=BBSKETCH_MODE;
135 }else if(a.equals("lines")){
136 maxLines=Long.parseLong(b);
137 if(maxLines<0){maxLines=Long.MAX_VALUE;}
138 }else if(a.equals("minsamples")){
139 minSamples=Integer.parseInt(b);
140 }else if(a.equals("verbose")){
141 RecordSet.verbose=Record.verbose=verbose=Parse.parseBoolean(b);
142 }else if(parser.parse(arg, a, b)){
143 //do nothing
144 }else{
145 outstream.println("Unknown parameter "+args[i]);
146 assert(false) : "Unknown parameter "+args[i];
147 // throw new RuntimeException("Unknown parameter "+args[i]);
148 }
149 }
150
151 return parser;
152 }
153
154 /** Add or remove .gz or .bz2 as needed */
155 private void fixExtensions(){
156 in1=Tools.fixExtension(in1);
157 in2=Tools.fixExtension(in2);
158 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
159 }
160
161 /** Ensure files can be read and written */
162 private void checkFileExistence(){
163 //Ensure output files can be written
164 if(!Tools.testOutputFiles(overwrite, append, false, out1, outMap)){
165 outstream.println((out1==null)+", "+out1);
166 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
167 }
168
169 //Ensure input files can be read
170 if(!Tools.testInputFiles(false, true, in1, in2)){
171 throw new RuntimeException("\nCan't read some input files.\n");
172 }
173
174 //Ensure that no file was specified multiple times
175 if(!Tools.testForDuplicateFiles(true, in1, in2, out1, outMap)){
176 throw new RuntimeException("\nSome file names were specified multiple times.\n");
177 }
178 }
179
180 /** Adjust file-related static fields as needed for this program */
181 private static void checkStatics(){
182 //Adjust the number of threads for input file reading
183 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
184 ByteFile.FORCE_MODE_BF2=true;
185 }
186
187 // if(!ByteFile.FORCE_MODE_BF2){
188 // ByteFile.FORCE_MODE_BF2=false;
189 // ByteFile.FORCE_MODE_BF1=true;
190 // }
191 }
192
193 /*--------------------------------------------------------------*/
194 /*---------------- Outer Methods ----------------*/
195 /*--------------------------------------------------------------*/
196
197 void process(Timer t){
198 if(verbose){System.err.println("process()");}
199
200 ByteFile bf1=ByteFile.makeByteFile(ffin1);
201 ByteFile bf2=(ffin2==null ? null : ByteFile.makeByteFile(ffin2));
202
203 if(shrinkOnly){
204 runShrinkOnly(bf1);
205 return;
206 }
207
208 bswBad=makeBSW(ffoutBad);
209 ResultLineParser parser=new ResultLineParser(mode, tree, bswBad, recordSets, false);
210
211 processInner(bf1, parser, ffin2==null ? null : aniMap);
212 ByteStreamWriter bsw=makeBSW(ffout1);
213 printResults(parser, bsw);
214 if(bsw!=null){errorState|=bsw.poisonAndWait();}
215
216 ByteStreamWriter bswAcc=makeBSW(ffoutAccuracy);
217 if(recordSets!=null){
218 if(SSUMap.hasMap()){
219 processSetsThreaded();
220 }
221 printAccuracy(bswAcc);
222 }
223 if(bswAcc!=null){errorState|=bswAcc.poisonAndWait();}
224 if(bswBad!=null){errorState|=bswBad.poisonAndWait();}
225
226 ByteStreamWriter bswMap=makeBSW(ffoutMap);
227 if(bf2!=null){
228 processInner(bf2, parser, aaiMap);
229 printMap(bswMap);
230 }
231 if(bswMap!=null){errorState|=bswMap.poisonAndWait();}
232
233 t.stop();
234
235 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
236
237 outstream.println();
238 outstream.println("Lines Out: \t"+linesOut);
239 outstream.println("Bytes Out: \t"+bytesOut);
240
241 if(errorState){
242 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
243 }
244 }
245
246 /*--------------------------------------------------------------*/
247 /*---------------- Inner Methods ----------------*/
248 /*--------------------------------------------------------------*/
249
250 private void runShrinkOnly(ByteFile bf){
251 if(verbose){System.err.println("runShrinkOnly("+bf.name()+")");}
252 ResultLineParser parser=new ResultLineParser(mode, tree, null, null, true);
253 ByteStreamWriter bsw=makeBSW(ffout1);
254
255 byte[] line=bf.nextLine();
256
257 while(line!=null){
258 if(line.length>0){
259 if(maxLines>0 && linesProcessed>=maxLines){break;}
260 linesProcessed++;
261 bytesProcessed+=(line.length+1);
262
263 parser.parse(line);
264 RecordSet rs=parser.processData(null, true);
265 if(rs!=null){
266 rs.sortAndSweep();
267 for(Record r : rs.records){
268 if(bsw!=null){
269 bsw.println(r.text);
270 }
271 }
272 }
273 }
274 line=bf.nextLine();
275 }
276 errorState|=bf.close();
277
278 RecordSet rs=parser.currentSet;
279 if(rs!=null){
280 rs.sortAndSweep();
281 for(Record r : rs.records){
282 if(bsw!=null){
283 bsw.println(r.text);
284 }
285 }
286 }
287
288 if(bsw!=null){errorState|=bsw.poisonAndWait();}
289 }
290
291 private void processInner(ByteFile bf, ResultLineParser parser, HashMap<Long, Float> map){
292 if(verbose){System.err.println("processInner("+bf.name()+")");}
293 byte[] line=bf.nextLine();
294
295 while(line!=null){
296 if(line.length>0){
297 if(maxLines>0 && linesProcessed>=maxLines){break;}
298 linesProcessed++;
299 bytesProcessed+=(line.length+1);
300
301 parser.parse(line);
302 parser.processData(map, recordSets!=null);
303 }
304 line=bf.nextLine();
305 }
306 errorState|=bf.close();
307 }
308
309 private void processSetsThreaded(){
310 if(verbose){System.err.println("processSetsThreaded("+Shared.threads()+")");}
311 final int threads=Shared.threads();
312 // final ThreadWaiter waiter=new ThreadWaiter();
313 final ArrayList<SSUThread> list=new ArrayList<SSUThread>(threads);
314 final AtomicInteger atom=new AtomicInteger(0);
315 for(int i=0; i<threads; i++){
316 list.add(new SSUThread(atom));
317 }
318 boolean success=ThreadWaiter.startAndWait(list);
319 if(!success){errorState=true;}
320 }
321
322 private void printAccuracy(ByteStreamWriter bswAcc){
323 if(verbose){System.err.println("printAccuracy("+bswAcc+")");}
324 int[][] results=new int[taxLevels][16];
325 for(RecordSet rs : recordSets){
326 if(mode==MASH_MODE){
327 rs.sortAndSweep();
328 rs.processSSU();
329 }
330 int[] statusArray=rs.test(bswBad);
331 for(int level=0; level<statusArray.length; level++){
332 int status=statusArray[level];
333 results[level][status]++;
334 assert(status==NOHIT || status==CORRECT ||
335 status==INCORRECT_TAX_CORRECT_SSU || status==INCORRECT_TAX_INCORRECT_SSU ||
336 status==INCORRECT_TAX_MISSING_SSU) : status;
337 }
338 }
339 ByteBuilder bb=new ByteBuilder();
340 bb.append("#Level \tCorrect\tbadTaxGoodSSU\tBadTaxNoSSU\tbadTaxBadSSU\tNoHit\n");
341 if(bswAcc!=null){bswAcc.print(bb);}
342 for(String levelName : printLevels){
343 int level=TaxTree.stringToLevelExtended(levelName);
344 bb.clear();
345 bb.append(TaxTree.levelToStringExtended(level));
346 while(bb.length<"species subgroup".length()){bb.append(' ');}
347 bb.tab();
348 bb.append(results[level][CORRECT]).tab();
349 bb.append(results[level][INCORRECT_TAX_CORRECT_SSU]).tab();
350 bb.append(results[level][INCORRECT_TAX_MISSING_SSU]).tab();
351 bb.append(results[level][INCORRECT_TAX_INCORRECT_SSU]).tab();
352 bb.append(results[level][NOHIT]).nl();
353 if(bswAcc!=null){bswAcc.print(bb);}
354 }
355 }
356
357 private void printMap(ByteStreamWriter bsw){
358 if(verbose){System.err.println("printMap("+bsw+")");}
359 ByteBuilder bb=new ByteBuilder();
360 bb.append("#qID\trID\tANI\tAAI\n");
361 if(bsw!=null){bsw.print(bb);}
362 for(Entry<Long, Float> e : aniMap.entrySet()){
363 long key=e.getKey();
364 int qID=((int)(key>>>32));
365 int rID=(int)(key&Integer.MAX_VALUE);
366 float ani=e.getValue();
367 Float aai=aaiMap.get(key);
368 if(aai!=null){
369 bb.clear();
370 bb.append(qID).tab().append(rID).tab().append(ani, 3).tab().append(aai, 3).nl();
371 if(bsw!=null){bsw.print(bb);}
372 linesOut++;
373 bytesOut+=bb.length();
374 }
375 }
376 }
377
378 private void printResults(ResultLineParser parser, ByteStreamWriter bsw){
379 if(verbose){System.err.println("printResults("+bsw+")");}
380 ByteBuilder bb=new ByteBuilder();
381 bb.append("#Level \tRank\tANI_AVG\tSSU_AVG\tANI_STD\tSSU_STD\tSamples");
382 if(bsw!=null){bsw.println(bb);}
383 for(int level=0; level<taxLevels; level++){
384 long aniCount=parser.levelCounts[level];
385 long ssuCount=parser.levelCountsSSU[level];
386 if(aniCount>=minSamples && ((1L<<level)&printLevelsMask)!=0){
387 bb.clear();
388 String name=TaxTree.levelToStringExtended(level);
389 double aniSum=parser.levelAniSums[level];
390 double ssuSum=parser.levelSSUSums[level];
391 FloatList aniList=parser.aniLists[level];
392 FloatList ssuList=parser.ssuLists[level];
393 aniList.sort();
394 ssuList.sort();
395
396 double aniAvg=aniSum/aniCount;
397 double ssuAvg=ssuSum/ssuCount;
398 double aniStd=aniList.stdev();
399 double ssuStd=ssuList.stdev();
400
401 bb.append(name);
402 while(bb.length<9){bb.append(' ');}
403 bb.tab().append(level);
404 bb.tab().append(aniAvg, 3);
405 bb.tab().append(ssuAvg, 3);
406 bb.tab().append(aniStd, 3);
407 bb.tab().append(ssuStd, 3);
408 bb.tab().append(aniCount);
409 if(bsw!=null){bsw.println(bb);}
410 linesOut++;
411 bytesOut+=bb.length();
412 }
413 }
414 }
415
416 private static ByteStreamWriter makeBSW(FileFormat ff){
417 if(verbose){System.err.println("makeBSW("+ff+")");}
418 if(ff==null){return null;}
419 ByteStreamWriter bsw=new ByteStreamWriter(ff);
420 bsw.start();
421 return bsw;
422 }
423
424 private static long makePrintLevelsMask(String[] printLevelsArray){
425 long mask=0;
426 for(String s : printLevelsArray){
427 int level=TaxTree.stringToLevelExtended(s);
428 long bit=1L<<level;
429 assert(Long.bitCount(bit)==1);
430 mask|=bit;
431 }
432 return mask;
433 }
434
435 /*--------------------------------------------------------------*/
436
437 private class SSUThread extends Thread{
438
439 SSUThread(AtomicInteger atom_){
440 atom=atom_;
441 }
442
443 @Override
444 public void run(){
445 for(int idx=atom.getAndIncrement(); idx<recordSets.size(); idx=atom.getAndIncrement()){
446 RecordSet rs=recordSets.get(idx);
447 rs.sortAndSweep();
448 rs.processSSU();
449 }
450 }
451
452 private final AtomicInteger atom;
453 }
454
455 /*--------------------------------------------------------------*/
456
457
458
459 /*--------------------------------------------------------------*/
460 /*---------------- Fields ----------------*/
461 /*--------------------------------------------------------------*/
462
463 private String in1=null;
464 private String in2=null;
465 private String out1="stdout.txt";
466 private String outMap=null;
467 private String outAccuracy=null;
468 private String outBad=null;
469 private String treeFile=null;
470
471 private ByteStreamWriter bswBad=null;
472
473 /*--------------------------------------------------------------*/
474
475 private long linesProcessed=0;
476 private long linesOut=0;
477 private long bytesProcessed=0;
478 private long bytesOut=0;
479
480 private boolean shrinkOnly=false;
481
482 int minSamples=1;
483 private long maxLines=Long.MAX_VALUE;
484
485 final static int taxLevels=TaxTree.numTaxLevelNamesExtended;
486
487 final HashMap<Long, Float> aniMap=new HashMap<Long, Float>();
488 final HashMap<Long, Float> aaiMap=new HashMap<Long, Float>();
489
490 final TaxTree tree;
491 final ArrayList<RecordSet> recordSets;
492
493 // static HashMap<Integer, byte[]> ssuMap;
494
495 static final String[] printLevels=new String[] {"strain", "species", "genus", "family", "order", "class", "phylum", "superkingdom", "life"};
496 static final long printLevelsMask=makePrintLevelsMask(printLevels);
497
498 /*--------------------------------------------------------------*/
499
500 static int NOHIT=0;
501 static int CORRECT=1;
502 static int INCORRECT_TAX=2;
503 static int INCORRECT_SSU=4;
504 static int MISSING_SSU=8;
505 private static int INCORRECT_TAX_CORRECT_SSU=INCORRECT_TAX;
506 private static int INCORRECT_TAX_INCORRECT_SSU=INCORRECT_TAX|INCORRECT_SSU;
507 private static int INCORRECT_TAX_MISSING_SSU=INCORRECT_TAX|MISSING_SSU;
508
509 static final int BBSKETCH_MODE=0;
510 static final int MASH_MODE=1;
511 static final int SOURMASH_MODE=2;
512 static final int BLAST_MODE=3;
513 static int mode=BBSKETCH_MODE;
514
515
516 /*--------------------------------------------------------------*/
517 /*---------------- Final Fields ----------------*/
518 /*--------------------------------------------------------------*/
519
520 private final FileFormat ffin1;
521 private final FileFormat ffin2;
522 private final FileFormat ffout1;
523 private final FileFormat ffoutMap;
524 private final FileFormat ffoutAccuracy;
525 private final FileFormat ffoutBad;
526
527 /*--------------------------------------------------------------*/
528 /*---------------- Common Fields ----------------*/
529 /*--------------------------------------------------------------*/
530
531 private PrintStream outstream=System.err;
532 public static boolean verbose=false;
533 public boolean errorState=false;
534 private boolean overwrite=false;
535 private boolean append=false;
536
537 }