Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AnalyzeSketchResults.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.io.PrintStream; | |
4 import java.util.ArrayList; | |
5 import java.util.HashMap; | |
6 import java.util.Map.Entry; | |
7 import java.util.concurrent.atomic.AtomicInteger; | |
8 | |
9 import fileIO.ByteFile; | |
10 import fileIO.ByteStreamWriter; | |
11 import fileIO.FileFormat; | |
12 import fileIO.ReadWrite; | |
13 import shared.Parse; | |
14 import shared.Parser; | |
15 import shared.PreParser; | |
16 import shared.Shared; | |
17 import shared.Timer; | |
18 import shared.Tools; | |
19 import structures.ByteBuilder; | |
20 import structures.FloatList; | |
21 import tax.TaxTree; | |
22 import template.ThreadWaiter; | |
23 | |
24 /** | |
25 * @author Brian Bushnell | |
26 * @date May 9, 2016 | |
27 * | |
28 */ | |
29 public class AnalyzeSketchResults { | |
30 | |
31 /*--------------------------------------------------------------*/ | |
32 /*---------------- Initialization ----------------*/ | |
33 /*--------------------------------------------------------------*/ | |
34 | |
35 /** | |
36 * Code entrance from the command line. | |
37 * @param args Command line arguments | |
38 */ | |
39 public static void main(String[] args){ | |
40 //Start a timer immediately upon code entrance. | |
41 Timer t=new Timer(); | |
42 | |
43 //Create an instance of this class | |
44 AnalyzeSketchResults x=new AnalyzeSketchResults(args); | |
45 | |
46 //Run the object | |
47 x.process(t); | |
48 | |
49 //Close the print stream if it was redirected | |
50 Shared.closeStream(x.outstream); | |
51 } | |
52 | |
53 /** | |
54 * Constructor. | |
55 * @param args Command line arguments | |
56 */ | |
57 public AnalyzeSketchResults(String[] args){ | |
58 | |
59 {//Preparse block for help, config files, and outstream | |
60 PreParser pp=new PreParser(args, /*getClass()*/null, false); | |
61 args=pp.args; | |
62 outstream=pp.outstream; | |
63 } | |
64 | |
65 //Set shared static variables prior to parsing | |
66 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
67 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
68 | |
69 {//Parse the arguments | |
70 final Parser parser=parse(args); | |
71 overwrite=parser.overwrite; | |
72 append=parser.append; | |
73 | |
74 in1=parser.in1; | |
75 in2=parser.in2; | |
76 | |
77 out1=parser.out1; | |
78 } | |
79 | |
80 fixExtensions(); //Add or remove .gz or .bz2 as needed | |
81 checkFileExistence(); //Ensure files can be read and written | |
82 checkStatics(); //Adjust file-related static fields as needed for this program | |
83 | |
84 ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, overwrite, append, false); | |
85 ffoutMap=FileFormat.testOutput(outMap, FileFormat.TXT, null, true, overwrite, append, false); | |
86 ffoutAccuracy=FileFormat.testOutput(outAccuracy, FileFormat.TXT, null, true, overwrite, append, false); | |
87 ffoutBad=FileFormat.testOutput(outBad, FileFormat.TXT, null, true, overwrite, append, false); | |
88 ffin1=FileFormat.testInput(in1, FileFormat.TXT, null, true, true); | |
89 ffin2=FileFormat.testInput(in2, FileFormat.TXT, null, true, true); | |
90 | |
91 recordSets=(ffoutAccuracy==null && ffoutBad==null ? null : new ArrayList<RecordSet>()); | |
92 tree=(treeFile==null) ? null : TaxTree.loadTaxTree(treeFile, outstream, true, false); | |
93 SSUMap.load(outstream); | |
94 } | |
95 | |
96 /*--------------------------------------------------------------*/ | |
97 /*---------------- Initialization Helpers ----------------*/ | |
98 /*--------------------------------------------------------------*/ | |
99 | |
100 /** Parse arguments from the command line */ | |
101 private Parser parse(String[] args){ | |
102 | |
103 Parser parser=new Parser(); | |
104 for(int i=0; i<args.length; i++){ | |
105 String arg=args[i]; | |
106 String[] split=arg.split("="); | |
107 String a=split[0].toLowerCase(); | |
108 String b=split.length>1 ? split[1] : null; | |
109 if(b!=null && b.equalsIgnoreCase("null")){b=null;} | |
110 | |
111 if(a.equals("map") || a.equals("outmap")){ | |
112 outMap=b; | |
113 }else if(a.equals("accuracy") || a.equals("outacc") || a.equals("outaccuracy")){ | |
114 outAccuracy=b; | |
115 }else if(a.equals("outbad")){ | |
116 outBad=b; | |
117 }else if(a.equals("tree")){ | |
118 treeFile=b; | |
119 }else if(a.equals("shrinkonly")){ | |
120 shrinkOnly=Parse.parseBoolean(b); | |
121 }else if(a.equals("ssu") || a.equals("ssufile")){ | |
122 assert(false) : "ssu and ssufile are deprecated; please specify 16S or 18S independently"; | |
123 }else if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){ | |
124 SSUMap.r16SFile=b; | |
125 }else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){ | |
126 SSUMap.r18SFile=b; | |
127 }else if(a.equals("mash")){ | |
128 mode=MASH_MODE; | |
129 }else if(a.equals("sourmash")){ | |
130 mode=SOURMASH_MODE; | |
131 }else if(a.equals("blast")){ | |
132 mode=BLAST_MODE; | |
133 }else if(a.equals("bbsketch")){ | |
134 mode=BBSKETCH_MODE; | |
135 }else if(a.equals("lines")){ | |
136 maxLines=Long.parseLong(b); | |
137 if(maxLines<0){maxLines=Long.MAX_VALUE;} | |
138 }else if(a.equals("minsamples")){ | |
139 minSamples=Integer.parseInt(b); | |
140 }else if(a.equals("verbose")){ | |
141 RecordSet.verbose=Record.verbose=verbose=Parse.parseBoolean(b); | |
142 }else if(parser.parse(arg, a, b)){ | |
143 //do nothing | |
144 }else{ | |
145 outstream.println("Unknown parameter "+args[i]); | |
146 assert(false) : "Unknown parameter "+args[i]; | |
147 // throw new RuntimeException("Unknown parameter "+args[i]); | |
148 } | |
149 } | |
150 | |
151 return parser; | |
152 } | |
153 | |
154 /** Add or remove .gz or .bz2 as needed */ | |
155 private void fixExtensions(){ | |
156 in1=Tools.fixExtension(in1); | |
157 in2=Tools.fixExtension(in2); | |
158 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} | |
159 } | |
160 | |
161 /** Ensure files can be read and written */ | |
162 private void checkFileExistence(){ | |
163 //Ensure output files can be written | |
164 if(!Tools.testOutputFiles(overwrite, append, false, out1, outMap)){ | |
165 outstream.println((out1==null)+", "+out1); | |
166 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n"); | |
167 } | |
168 | |
169 //Ensure input files can be read | |
170 if(!Tools.testInputFiles(false, true, in1, in2)){ | |
171 throw new RuntimeException("\nCan't read some input files.\n"); | |
172 } | |
173 | |
174 //Ensure that no file was specified multiple times | |
175 if(!Tools.testForDuplicateFiles(true, in1, in2, out1, outMap)){ | |
176 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
177 } | |
178 } | |
179 | |
180 /** Adjust file-related static fields as needed for this program */ | |
181 private static void checkStatics(){ | |
182 //Adjust the number of threads for input file reading | |
183 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
184 ByteFile.FORCE_MODE_BF2=true; | |
185 } | |
186 | |
187 // if(!ByteFile.FORCE_MODE_BF2){ | |
188 // ByteFile.FORCE_MODE_BF2=false; | |
189 // ByteFile.FORCE_MODE_BF1=true; | |
190 // } | |
191 } | |
192 | |
193 /*--------------------------------------------------------------*/ | |
194 /*---------------- Outer Methods ----------------*/ | |
195 /*--------------------------------------------------------------*/ | |
196 | |
197 void process(Timer t){ | |
198 if(verbose){System.err.println("process()");} | |
199 | |
200 ByteFile bf1=ByteFile.makeByteFile(ffin1); | |
201 ByteFile bf2=(ffin2==null ? null : ByteFile.makeByteFile(ffin2)); | |
202 | |
203 if(shrinkOnly){ | |
204 runShrinkOnly(bf1); | |
205 return; | |
206 } | |
207 | |
208 bswBad=makeBSW(ffoutBad); | |
209 ResultLineParser parser=new ResultLineParser(mode, tree, bswBad, recordSets, false); | |
210 | |
211 processInner(bf1, parser, ffin2==null ? null : aniMap); | |
212 ByteStreamWriter bsw=makeBSW(ffout1); | |
213 printResults(parser, bsw); | |
214 if(bsw!=null){errorState|=bsw.poisonAndWait();} | |
215 | |
216 ByteStreamWriter bswAcc=makeBSW(ffoutAccuracy); | |
217 if(recordSets!=null){ | |
218 if(SSUMap.hasMap()){ | |
219 processSetsThreaded(); | |
220 } | |
221 printAccuracy(bswAcc); | |
222 } | |
223 if(bswAcc!=null){errorState|=bswAcc.poisonAndWait();} | |
224 if(bswBad!=null){errorState|=bswBad.poisonAndWait();} | |
225 | |
226 ByteStreamWriter bswMap=makeBSW(ffoutMap); | |
227 if(bf2!=null){ | |
228 processInner(bf2, parser, aaiMap); | |
229 printMap(bswMap); | |
230 } | |
231 if(bswMap!=null){errorState|=bswMap.poisonAndWait();} | |
232 | |
233 t.stop(); | |
234 | |
235 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8)); | |
236 | |
237 outstream.println(); | |
238 outstream.println("Lines Out: \t"+linesOut); | |
239 outstream.println("Bytes Out: \t"+bytesOut); | |
240 | |
241 if(errorState){ | |
242 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
243 } | |
244 } | |
245 | |
246 /*--------------------------------------------------------------*/ | |
247 /*---------------- Inner Methods ----------------*/ | |
248 /*--------------------------------------------------------------*/ | |
249 | |
250 private void runShrinkOnly(ByteFile bf){ | |
251 if(verbose){System.err.println("runShrinkOnly("+bf.name()+")");} | |
252 ResultLineParser parser=new ResultLineParser(mode, tree, null, null, true); | |
253 ByteStreamWriter bsw=makeBSW(ffout1); | |
254 | |
255 byte[] line=bf.nextLine(); | |
256 | |
257 while(line!=null){ | |
258 if(line.length>0){ | |
259 if(maxLines>0 && linesProcessed>=maxLines){break;} | |
260 linesProcessed++; | |
261 bytesProcessed+=(line.length+1); | |
262 | |
263 parser.parse(line); | |
264 RecordSet rs=parser.processData(null, true); | |
265 if(rs!=null){ | |
266 rs.sortAndSweep(); | |
267 for(Record r : rs.records){ | |
268 if(bsw!=null){ | |
269 bsw.println(r.text); | |
270 } | |
271 } | |
272 } | |
273 } | |
274 line=bf.nextLine(); | |
275 } | |
276 errorState|=bf.close(); | |
277 | |
278 RecordSet rs=parser.currentSet; | |
279 if(rs!=null){ | |
280 rs.sortAndSweep(); | |
281 for(Record r : rs.records){ | |
282 if(bsw!=null){ | |
283 bsw.println(r.text); | |
284 } | |
285 } | |
286 } | |
287 | |
288 if(bsw!=null){errorState|=bsw.poisonAndWait();} | |
289 } | |
290 | |
291 private void processInner(ByteFile bf, ResultLineParser parser, HashMap<Long, Float> map){ | |
292 if(verbose){System.err.println("processInner("+bf.name()+")");} | |
293 byte[] line=bf.nextLine(); | |
294 | |
295 while(line!=null){ | |
296 if(line.length>0){ | |
297 if(maxLines>0 && linesProcessed>=maxLines){break;} | |
298 linesProcessed++; | |
299 bytesProcessed+=(line.length+1); | |
300 | |
301 parser.parse(line); | |
302 parser.processData(map, recordSets!=null); | |
303 } | |
304 line=bf.nextLine(); | |
305 } | |
306 errorState|=bf.close(); | |
307 } | |
308 | |
309 private void processSetsThreaded(){ | |
310 if(verbose){System.err.println("processSetsThreaded("+Shared.threads()+")");} | |
311 final int threads=Shared.threads(); | |
312 // final ThreadWaiter waiter=new ThreadWaiter(); | |
313 final ArrayList<SSUThread> list=new ArrayList<SSUThread>(threads); | |
314 final AtomicInteger atom=new AtomicInteger(0); | |
315 for(int i=0; i<threads; i++){ | |
316 list.add(new SSUThread(atom)); | |
317 } | |
318 boolean success=ThreadWaiter.startAndWait(list); | |
319 if(!success){errorState=true;} | |
320 } | |
321 | |
322 private void printAccuracy(ByteStreamWriter bswAcc){ | |
323 if(verbose){System.err.println("printAccuracy("+bswAcc+")");} | |
324 int[][] results=new int[taxLevels][16]; | |
325 for(RecordSet rs : recordSets){ | |
326 if(mode==MASH_MODE){ | |
327 rs.sortAndSweep(); | |
328 rs.processSSU(); | |
329 } | |
330 int[] statusArray=rs.test(bswBad); | |
331 for(int level=0; level<statusArray.length; level++){ | |
332 int status=statusArray[level]; | |
333 results[level][status]++; | |
334 assert(status==NOHIT || status==CORRECT || | |
335 status==INCORRECT_TAX_CORRECT_SSU || status==INCORRECT_TAX_INCORRECT_SSU || | |
336 status==INCORRECT_TAX_MISSING_SSU) : status; | |
337 } | |
338 } | |
339 ByteBuilder bb=new ByteBuilder(); | |
340 bb.append("#Level \tCorrect\tbadTaxGoodSSU\tBadTaxNoSSU\tbadTaxBadSSU\tNoHit\n"); | |
341 if(bswAcc!=null){bswAcc.print(bb);} | |
342 for(String levelName : printLevels){ | |
343 int level=TaxTree.stringToLevelExtended(levelName); | |
344 bb.clear(); | |
345 bb.append(TaxTree.levelToStringExtended(level)); | |
346 while(bb.length<"species subgroup".length()){bb.append(' ');} | |
347 bb.tab(); | |
348 bb.append(results[level][CORRECT]).tab(); | |
349 bb.append(results[level][INCORRECT_TAX_CORRECT_SSU]).tab(); | |
350 bb.append(results[level][INCORRECT_TAX_MISSING_SSU]).tab(); | |
351 bb.append(results[level][INCORRECT_TAX_INCORRECT_SSU]).tab(); | |
352 bb.append(results[level][NOHIT]).nl(); | |
353 if(bswAcc!=null){bswAcc.print(bb);} | |
354 } | |
355 } | |
356 | |
357 private void printMap(ByteStreamWriter bsw){ | |
358 if(verbose){System.err.println("printMap("+bsw+")");} | |
359 ByteBuilder bb=new ByteBuilder(); | |
360 bb.append("#qID\trID\tANI\tAAI\n"); | |
361 if(bsw!=null){bsw.print(bb);} | |
362 for(Entry<Long, Float> e : aniMap.entrySet()){ | |
363 long key=e.getKey(); | |
364 int qID=((int)(key>>>32)); | |
365 int rID=(int)(key&Integer.MAX_VALUE); | |
366 float ani=e.getValue(); | |
367 Float aai=aaiMap.get(key); | |
368 if(aai!=null){ | |
369 bb.clear(); | |
370 bb.append(qID).tab().append(rID).tab().append(ani, 3).tab().append(aai, 3).nl(); | |
371 if(bsw!=null){bsw.print(bb);} | |
372 linesOut++; | |
373 bytesOut+=bb.length(); | |
374 } | |
375 } | |
376 } | |
377 | |
378 private void printResults(ResultLineParser parser, ByteStreamWriter bsw){ | |
379 if(verbose){System.err.println("printResults("+bsw+")");} | |
380 ByteBuilder bb=new ByteBuilder(); | |
381 bb.append("#Level \tRank\tANI_AVG\tSSU_AVG\tANI_STD\tSSU_STD\tSamples"); | |
382 if(bsw!=null){bsw.println(bb);} | |
383 for(int level=0; level<taxLevels; level++){ | |
384 long aniCount=parser.levelCounts[level]; | |
385 long ssuCount=parser.levelCountsSSU[level]; | |
386 if(aniCount>=minSamples && ((1L<<level)&printLevelsMask)!=0){ | |
387 bb.clear(); | |
388 String name=TaxTree.levelToStringExtended(level); | |
389 double aniSum=parser.levelAniSums[level]; | |
390 double ssuSum=parser.levelSSUSums[level]; | |
391 FloatList aniList=parser.aniLists[level]; | |
392 FloatList ssuList=parser.ssuLists[level]; | |
393 aniList.sort(); | |
394 ssuList.sort(); | |
395 | |
396 double aniAvg=aniSum/aniCount; | |
397 double ssuAvg=ssuSum/ssuCount; | |
398 double aniStd=aniList.stdev(); | |
399 double ssuStd=ssuList.stdev(); | |
400 | |
401 bb.append(name); | |
402 while(bb.length<9){bb.append(' ');} | |
403 bb.tab().append(level); | |
404 bb.tab().append(aniAvg, 3); | |
405 bb.tab().append(ssuAvg, 3); | |
406 bb.tab().append(aniStd, 3); | |
407 bb.tab().append(ssuStd, 3); | |
408 bb.tab().append(aniCount); | |
409 if(bsw!=null){bsw.println(bb);} | |
410 linesOut++; | |
411 bytesOut+=bb.length(); | |
412 } | |
413 } | |
414 } | |
415 | |
416 private static ByteStreamWriter makeBSW(FileFormat ff){ | |
417 if(verbose){System.err.println("makeBSW("+ff+")");} | |
418 if(ff==null){return null;} | |
419 ByteStreamWriter bsw=new ByteStreamWriter(ff); | |
420 bsw.start(); | |
421 return bsw; | |
422 } | |
423 | |
424 private static long makePrintLevelsMask(String[] printLevelsArray){ | |
425 long mask=0; | |
426 for(String s : printLevelsArray){ | |
427 int level=TaxTree.stringToLevelExtended(s); | |
428 long bit=1L<<level; | |
429 assert(Long.bitCount(bit)==1); | |
430 mask|=bit; | |
431 } | |
432 return mask; | |
433 } | |
434 | |
435 /*--------------------------------------------------------------*/ | |
436 | |
437 private class SSUThread extends Thread{ | |
438 | |
439 SSUThread(AtomicInteger atom_){ | |
440 atom=atom_; | |
441 } | |
442 | |
443 @Override | |
444 public void run(){ | |
445 for(int idx=atom.getAndIncrement(); idx<recordSets.size(); idx=atom.getAndIncrement()){ | |
446 RecordSet rs=recordSets.get(idx); | |
447 rs.sortAndSweep(); | |
448 rs.processSSU(); | |
449 } | |
450 } | |
451 | |
452 private final AtomicInteger atom; | |
453 } | |
454 | |
455 /*--------------------------------------------------------------*/ | |
456 | |
457 | |
458 | |
459 /*--------------------------------------------------------------*/ | |
460 /*---------------- Fields ----------------*/ | |
461 /*--------------------------------------------------------------*/ | |
462 | |
463 private String in1=null; | |
464 private String in2=null; | |
465 private String out1="stdout.txt"; | |
466 private String outMap=null; | |
467 private String outAccuracy=null; | |
468 private String outBad=null; | |
469 private String treeFile=null; | |
470 | |
471 private ByteStreamWriter bswBad=null; | |
472 | |
473 /*--------------------------------------------------------------*/ | |
474 | |
475 private long linesProcessed=0; | |
476 private long linesOut=0; | |
477 private long bytesProcessed=0; | |
478 private long bytesOut=0; | |
479 | |
480 private boolean shrinkOnly=false; | |
481 | |
482 int minSamples=1; | |
483 private long maxLines=Long.MAX_VALUE; | |
484 | |
485 final static int taxLevels=TaxTree.numTaxLevelNamesExtended; | |
486 | |
487 final HashMap<Long, Float> aniMap=new HashMap<Long, Float>(); | |
488 final HashMap<Long, Float> aaiMap=new HashMap<Long, Float>(); | |
489 | |
490 final TaxTree tree; | |
491 final ArrayList<RecordSet> recordSets; | |
492 | |
493 // static HashMap<Integer, byte[]> ssuMap; | |
494 | |
495 static final String[] printLevels=new String[] {"strain", "species", "genus", "family", "order", "class", "phylum", "superkingdom", "life"}; | |
496 static final long printLevelsMask=makePrintLevelsMask(printLevels); | |
497 | |
498 /*--------------------------------------------------------------*/ | |
499 | |
500 static int NOHIT=0; | |
501 static int CORRECT=1; | |
502 static int INCORRECT_TAX=2; | |
503 static int INCORRECT_SSU=4; | |
504 static int MISSING_SSU=8; | |
505 private static int INCORRECT_TAX_CORRECT_SSU=INCORRECT_TAX; | |
506 private static int INCORRECT_TAX_INCORRECT_SSU=INCORRECT_TAX|INCORRECT_SSU; | |
507 private static int INCORRECT_TAX_MISSING_SSU=INCORRECT_TAX|MISSING_SSU; | |
508 | |
509 static final int BBSKETCH_MODE=0; | |
510 static final int MASH_MODE=1; | |
511 static final int SOURMASH_MODE=2; | |
512 static final int BLAST_MODE=3; | |
513 static int mode=BBSKETCH_MODE; | |
514 | |
515 | |
516 /*--------------------------------------------------------------*/ | |
517 /*---------------- Final Fields ----------------*/ | |
518 /*--------------------------------------------------------------*/ | |
519 | |
520 private final FileFormat ffin1; | |
521 private final FileFormat ffin2; | |
522 private final FileFormat ffout1; | |
523 private final FileFormat ffoutMap; | |
524 private final FileFormat ffoutAccuracy; | |
525 private final FileFormat ffoutBad; | |
526 | |
527 /*--------------------------------------------------------------*/ | |
528 /*---------------- Common Fields ----------------*/ | |
529 /*--------------------------------------------------------------*/ | |
530 | |
531 private PrintStream outstream=System.err; | |
532 public static boolean verbose=false; | |
533 public boolean errorState=false; | |
534 private boolean overwrite=false; | |
535 private boolean append=false; | |
536 | |
537 } |