annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ExplodeTree.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package tax;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.LinkedHashMap;
jpayne@68 6 import java.util.Locale;
jpayne@68 7
jpayne@68 8 import fileIO.ByteFile;
jpayne@68 9 import fileIO.ByteStreamWriter;
jpayne@68 10 import fileIO.FileFormat;
jpayne@68 11 import fileIO.ReadWrite;
jpayne@68 12 import fileIO.TextStreamWriter;
jpayne@68 13 import shared.Parse;
jpayne@68 14 import shared.Parser;
jpayne@68 15 import shared.PreParser;
jpayne@68 16 import shared.Shared;
jpayne@68 17 import shared.Timer;
jpayne@68 18 import shared.Tools;
jpayne@68 19 import stream.FastaReadInputStream;
jpayne@68 20
jpayne@68 21 /**
jpayne@68 22 * Constructs a directory and file tree of sequences
jpayne@68 23 * corresponding to a taxonomic tree.
jpayne@68 24 *
jpayne@68 25 * @author Brian Bushnell
jpayne@68 26 * @date December 12, 2017
jpayne@68 27 *
jpayne@68 28 */
jpayne@68 29 public class ExplodeTree {
jpayne@68 30
jpayne@68 31 /*--------------------------------------------------------------*/
jpayne@68 32 /*---------------- Initialization ----------------*/
jpayne@68 33 /*--------------------------------------------------------------*/
jpayne@68 34
jpayne@68 35 /**
jpayne@68 36 * Code entrance from the command line.
jpayne@68 37 * @param args Command line arguments
jpayne@68 38 */
jpayne@68 39 public static void main(String[] args){
jpayne@68 40 Timer t=new Timer();
jpayne@68 41 ExplodeTree x=new ExplodeTree(args);
jpayne@68 42 x.process(t);
jpayne@68 43
jpayne@68 44 //Close the print stream if it was redirected
jpayne@68 45 Shared.closeStream(x.outstream);
jpayne@68 46 }
jpayne@68 47
jpayne@68 48 /**
jpayne@68 49 * Constructor.
jpayne@68 50 * @param args Command line arguments
jpayne@68 51 */
jpayne@68 52 public ExplodeTree(String[] args){
jpayne@68 53
jpayne@68 54 {//Preparse block for help, config files, and outstream
jpayne@68 55 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 56 args=pp.args;
jpayne@68 57 outstream=pp.outstream;
jpayne@68 58 }
jpayne@68 59
jpayne@68 60 //Set shared static variables
jpayne@68 61 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 62 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 63
jpayne@68 64 //Create a parser object
jpayne@68 65 Parser parser=new Parser();
jpayne@68 66
jpayne@68 67 //Parse each argument
jpayne@68 68 for(int i=0; i<args.length; i++){
jpayne@68 69 String arg=args[i];
jpayne@68 70
jpayne@68 71 //Break arguments into their constituent parts, in the form of "a=b"
jpayne@68 72 String[] split=arg.split("=");
jpayne@68 73 String a=split[0].toLowerCase();
jpayne@68 74 String b=split.length>1 ? split[1] : null;
jpayne@68 75
jpayne@68 76 if(a.equals("verbose")){
jpayne@68 77 verbose=Parse.parseBoolean(b);
jpayne@68 78 }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){
jpayne@68 79 outPath=b;
jpayne@68 80 }else if(a.equals("prefix")){
jpayne@68 81 prefix=b;
jpayne@68 82 }else if(a.equals("results") || a.equals("result")){
jpayne@68 83 resultsFile=b;
jpayne@68 84 }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){
jpayne@68 85 makeDirectories=Parse.parseBoolean(b);
jpayne@68 86 }else if(a.equals("tree") || a.equals("taxtree")){
jpayne@68 87 taxTreeFile=b;
jpayne@68 88 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
jpayne@68 89 //do nothing
jpayne@68 90 }else{
jpayne@68 91 outstream.println("Unknown parameter "+args[i]);
jpayne@68 92 assert(false) : "Unknown parameter "+args[i];
jpayne@68 93 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 94 }
jpayne@68 95 }
jpayne@68 96 if(prefix==null){prefix="";}
jpayne@68 97 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
jpayne@68 98
jpayne@68 99 {//Process parser fields
jpayne@68 100 Parser.processQuality();
jpayne@68 101
jpayne@68 102 maxReads=parser.maxReads;
jpayne@68 103
jpayne@68 104 overwrite=parser.overwrite;
jpayne@68 105
jpayne@68 106 in1=parser.in1;
jpayne@68 107
jpayne@68 108 extin=parser.extin;
jpayne@68 109 }
jpayne@68 110
jpayne@68 111 if(outPath==null || outPath.trim().length()==0){outPath="";}
jpayne@68 112 else{
jpayne@68 113 outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/");
jpayne@68 114 if(!outPath.endsWith("/")){outPath=outPath+"/";}
jpayne@68 115 }
jpayne@68 116
jpayne@68 117 assert(FastaReadInputStream.settingsOK());
jpayne@68 118
jpayne@68 119 //Ensure there is an input file
jpayne@68 120 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 121
jpayne@68 122 //Adjust the number of threads for input file reading
jpayne@68 123 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 124 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 125 }
jpayne@68 126
jpayne@68 127 //Ensure output files can be written
jpayne@68 128 if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){
jpayne@68 129 outstream.println(resultsFile);
jpayne@68 130 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n");
jpayne@68 131 }
jpayne@68 132
jpayne@68 133 //Ensure input files can be read
jpayne@68 134 if(!Tools.testInputFiles(false, true, in1)){
jpayne@68 135 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 136 }
jpayne@68 137
jpayne@68 138 //Ensure that no file was specified multiple times
jpayne@68 139 if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){
jpayne@68 140 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 141 }
jpayne@68 142
jpayne@68 143 //Create input FileFormat objects
jpayne@68 144 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true);
jpayne@68 145
jpayne@68 146 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false);
jpayne@68 147 }
jpayne@68 148
jpayne@68 149 /*--------------------------------------------------------------*/
jpayne@68 150 /*---------------- Outer Methods ----------------*/
jpayne@68 151 /*--------------------------------------------------------------*/
jpayne@68 152
jpayne@68 153 public void makeDirectoryTree(String root, boolean writeNames){
jpayne@68 154 for(TaxNode node : tree.nodes){
jpayne@68 155 if(node!=null){
jpayne@68 156 String dir=tree.toDir(node, root);
jpayne@68 157 File df=new File(dir);
jpayne@68 158 if(!df.exists()){df.mkdirs();}
jpayne@68 159 if(writeNames){
jpayne@68 160 try {
jpayne@68 161 String fname=node.simpleName()+".name";
jpayne@68 162 File nf=new File(fname);
jpayne@68 163 if(!nf.exists()){
jpayne@68 164 ReadWrite.writeString(node.name, dir+fname);
jpayne@68 165 }
jpayne@68 166 } catch (Exception e) {
jpayne@68 167 // TODO Auto-generated catch block
jpayne@68 168 e.printStackTrace();
jpayne@68 169 }
jpayne@68 170 }
jpayne@68 171 }
jpayne@68 172 }
jpayne@68 173 }
jpayne@68 174
jpayne@68 175 /** Create read streams and process all data */
jpayne@68 176 public void process(Timer t){
jpayne@68 177
jpayne@68 178 Timer t2=new Timer();
jpayne@68 179 if(makeDirectories){
jpayne@68 180 makeDirectoryTree(outPath, true);
jpayne@68 181 t2.stop("Finished making directories. ");
jpayne@68 182 t2.start();
jpayne@68 183 }
jpayne@68 184 processInner();
jpayne@68 185 t2.stop();
jpayne@68 186 t2.stop("Finished writing data. ");
jpayne@68 187
jpayne@68 188 //Do anything necessary after processing
jpayne@68 189
jpayne@68 190 if(resultsFile!=null){
jpayne@68 191 TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false);
jpayne@68 192 tsw.start();
jpayne@68 193 for(TaxNode tn : nodes.keySet()){
jpayne@68 194 Long data=nodes.get(tn);
jpayne@68 195 if(data==null){data=0L;}
jpayne@68 196 tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name);
jpayne@68 197 }
jpayne@68 198 errorState|=tsw.poisonAndWait();
jpayne@68 199 }
jpayne@68 200
jpayne@68 201 //Report timing and results
jpayne@68 202 {
jpayne@68 203 t.stop();
jpayne@68 204
jpayne@68 205 //Calculate units per nanosecond
jpayne@68 206 double rpnano=readsProcessed/(double)(t.elapsed);
jpayne@68 207 double lpnano=linesProcessed/(double)(t.elapsed);
jpayne@68 208 double bpnano=basesProcessed/(double)(t.elapsed);
jpayne@68 209
jpayne@68 210 //Add "k" and "m" for large numbers
jpayne@68 211 String rpstring=Tools.padKM(readsProcessed, 8);
jpayne@68 212 String lpstring=Tools.padKM(linesProcessed, 8);
jpayne@68 213 String bpstring=Tools.padKM(basesProcessed, 8);
jpayne@68 214
jpayne@68 215 String li="Lines In: \t"+linesProcessed+" lines";
jpayne@68 216 String lo="Lines Out: \t"+linesOut+" lines";
jpayne@68 217 while(lo.length()<li.length()){lo=lo+" ";}
jpayne@68 218
jpayne@68 219 String ri="Reads In: \t"+readsProcessed+" reads";
jpayne@68 220 String ro="Reads Out: \t"+readsOut+" reads";
jpayne@68 221 while(ro.length()<ri.length()){ro=ro+" ";}
jpayne@68 222
jpayne@68 223 outstream.println(ri+"\t"+basesProcessed+" bases");
jpayne@68 224 outstream.println(ro+"\t"+basesOut+" bases");
jpayne@68 225 outstream.println(li);
jpayne@68 226 outstream.println(lo);
jpayne@68 227 outstream.println();
jpayne@68 228
jpayne@68 229 outstream.println("Time: \t"+t);
jpayne@68 230 outstream.println("Reads Processed: "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000));
jpayne@68 231 outstream.println("Lines Processed: "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000));
jpayne@68 232 outstream.println("Bases Processed: "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000));
jpayne@68 233 }
jpayne@68 234
jpayne@68 235 //Throw an exception of there was an error in a thread
jpayne@68 236 if(errorState){
jpayne@68 237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 238 }
jpayne@68 239 }
jpayne@68 240
jpayne@68 241 /*--------------------------------------------------------------*/
jpayne@68 242 /*---------------- Inner Methods ----------------*/
jpayne@68 243 /*--------------------------------------------------------------*/
jpayne@68 244
jpayne@68 245 /** Iterate through the reads */
jpayne@68 246 void processInner(){
jpayne@68 247 ByteFile bf=ByteFile.makeByteFile(ffin1);
jpayne@68 248 TaxNode currentNode=null;
jpayne@68 249 long currentSize=0;
jpayne@68 250 ByteStreamWriter bsw=null;
jpayne@68 251 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){
jpayne@68 252 linesProcessed++;
jpayne@68 253 if(line.length>0){
jpayne@68 254 final boolean header=(line[0]=='>');
jpayne@68 255 if(header){
jpayne@68 256 if(maxReads>0 && readsProcessed>=maxReads){break;}
jpayne@68 257 readsProcessed++;
jpayne@68 258 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
jpayne@68 259
jpayne@68 260 final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false);
jpayne@68 261
jpayne@68 262 if(tn==null || tn!=currentNode){
jpayne@68 263 if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;}
jpayne@68 264 }
jpayne@68 265 if(tn!=null && tn!=currentNode){
jpayne@68 266 String dir=tree.toDir(tn, outPath);
jpayne@68 267 final boolean found=nodes.containsKey(tn);
jpayne@68 268 if(!found){nodes.put(tn, 0L);}
jpayne@68 269 FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false);
jpayne@68 270 bsw=new ByteStreamWriter(ff);
jpayne@68 271 bsw.start();
jpayne@68 272 }
jpayne@68 273
jpayne@68 274 currentNode=tn;
jpayne@68 275 currentSize=0;
jpayne@68 276 if(bsw!=null){readsOut++;}
jpayne@68 277 }else{
jpayne@68 278 basesProcessed+=line.length;
jpayne@68 279 currentSize+=line.length;
jpayne@68 280 }
jpayne@68 281 if(bsw!=null){
jpayne@68 282 linesOut++;
jpayne@68 283 if(!header){basesOut+=line.length;}
jpayne@68 284 bsw.println(line);
jpayne@68 285 }
jpayne@68 286 }
jpayne@68 287 }
jpayne@68 288 if(bsw!=null){
jpayne@68 289 errorState=bsw.poisonAndWait()|errorState; bsw=null;
jpayne@68 290 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
jpayne@68 291 }
jpayne@68 292 bf.close();
jpayne@68 293 }
jpayne@68 294
jpayne@68 295 /*--------------------------------------------------------------*/
jpayne@68 296 /*---------------- Fields ----------------*/
jpayne@68 297 /*--------------------------------------------------------------*/
jpayne@68 298
jpayne@68 299 /** Primary input file path */
jpayne@68 300 private String in1=null;
jpayne@68 301
jpayne@68 302 /** Primary output file path */
jpayne@68 303 private String outPath=null;
jpayne@68 304
jpayne@68 305 private String prefix;
jpayne@68 306
jpayne@68 307 /** Override input file extension */
jpayne@68 308 private String extin=null;
jpayne@68 309
jpayne@68 310 /** For listing what is present in the output */
jpayne@68 311 public String resultsFile=null;
jpayne@68 312
jpayne@68 313 public String taxTreeFile=null;
jpayne@68 314
jpayne@68 315 public boolean makeDirectories=true;
jpayne@68 316
jpayne@68 317 public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>();
jpayne@68 318
jpayne@68 319 /*--------------------------------------------------------------*/
jpayne@68 320
jpayne@68 321 /** Number of reads processed */
jpayne@68 322 protected long readsProcessed=0;
jpayne@68 323 /** Number of lines processed */
jpayne@68 324 protected long linesProcessed=0;
jpayne@68 325 /** Number of bases processed */
jpayne@68 326 protected long basesProcessed=0;
jpayne@68 327
jpayne@68 328 /** Number of reads out */
jpayne@68 329 public long readsOut=0;
jpayne@68 330 /** Number of lines out */
jpayne@68 331 public long linesOut=0;
jpayne@68 332 /** Number of bases out */
jpayne@68 333 public long basesOut=0;
jpayne@68 334
jpayne@68 335 /** Quit after processing this many input reads; -1 means no limit */
jpayne@68 336 private long maxReads=-1;
jpayne@68 337
jpayne@68 338 /*--------------------------------------------------------------*/
jpayne@68 339 /*---------------- Final Fields ----------------*/
jpayne@68 340 /*--------------------------------------------------------------*/
jpayne@68 341
jpayne@68 342 /** Primary input file */
jpayne@68 343 private final FileFormat ffin1;
jpayne@68 344
jpayne@68 345 private final TaxTree tree;
jpayne@68 346
jpayne@68 347 /*--------------------------------------------------------------*/
jpayne@68 348 /*---------------- Common Fields ----------------*/
jpayne@68 349 /*--------------------------------------------------------------*/
jpayne@68 350
jpayne@68 351 /** Print status messages to this output stream */
jpayne@68 352 private PrintStream outstream=System.err;
jpayne@68 353 /** Print verbose messages */
jpayne@68 354 public static boolean verbose=false;
jpayne@68 355 /** True if an error was encountered */
jpayne@68 356 public boolean errorState=false;
jpayne@68 357 /** Overwrite existing output files */
jpayne@68 358 private boolean overwrite=true;
jpayne@68 359
jpayne@68 360 }