Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ExplodeTree.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package tax; import java.io.File; import java.io.PrintStream; import java.util.LinkedHashMap; import java.util.Locale; import fileIO.ByteFile; import fileIO.ByteStreamWriter; import fileIO.FileFormat; import fileIO.ReadWrite; import fileIO.TextStreamWriter; import shared.Parse; import shared.Parser; import shared.PreParser; import shared.Shared; import shared.Timer; import shared.Tools; import stream.FastaReadInputStream; /** * Constructs a directory and file tree of sequences * corresponding to a taxonomic tree. * * @author Brian Bushnell * @date December 12, 2017 * */ public class ExplodeTree { /*--------------------------------------------------------------*/ /*---------------- Initialization ----------------*/ /*--------------------------------------------------------------*/ /** * Code entrance from the command line. * @param args Command line arguments */ public static void main(String[] args){ Timer t=new Timer(); ExplodeTree x=new ExplodeTree(args); x.process(t); //Close the print stream if it was redirected Shared.closeStream(x.outstream); } /** * Constructor. * @param args Command line arguments */ public ExplodeTree(String[] args){ {//Preparse block for help, config files, and outstream PreParser pp=new PreParser(args, getClass(), false); args=pp.args; outstream=pp.outstream; } //Set shared static variables ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; ReadWrite.MAX_ZIP_THREADS=Shared.threads(); //Create a parser object Parser parser=new Parser(); //Parse each argument for(int i=0; i<args.length; i++){ String arg=args[i]; //Break arguments into their constituent parts, in the form of "a=b" String[] split=arg.split("="); String a=split[0].toLowerCase(); String b=split.length>1 ? split[1] : null; if(a.equals("verbose")){ verbose=Parse.parseBoolean(b); }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){ outPath=b; }else if(a.equals("prefix")){ prefix=b; }else if(a.equals("results") || a.equals("result")){ resultsFile=b; }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){ makeDirectories=Parse.parseBoolean(b); }else if(a.equals("tree") || a.equals("taxtree")){ taxTreeFile=b; }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser //do nothing }else{ outstream.println("Unknown parameter "+args[i]); assert(false) : "Unknown parameter "+args[i]; // throw new RuntimeException("Unknown parameter "+args[i]); } } if(prefix==null){prefix="";} if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} {//Process parser fields Parser.processQuality(); maxReads=parser.maxReads; overwrite=parser.overwrite; in1=parser.in1; extin=parser.extin; } if(outPath==null || outPath.trim().length()==0){outPath="";} else{ outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/"); if(!outPath.endsWith("/")){outPath=outPath+"/";} } assert(FastaReadInputStream.settingsOK()); //Ensure there is an input file if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} //Adjust the number of threads for input file reading if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ ByteFile.FORCE_MODE_BF2=true; } //Ensure output files can be written if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){ outstream.println(resultsFile); throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n"); } //Ensure input files can be read if(!Tools.testInputFiles(false, true, in1)){ throw new RuntimeException("\nCan't read some input files.\n"); } //Ensure that no file was specified multiple times if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){ throw new RuntimeException("\nSome file names were specified multiple times.\n"); } //Create input FileFormat objects ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true); tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false); } /*--------------------------------------------------------------*/ /*---------------- Outer Methods ----------------*/ /*--------------------------------------------------------------*/ public void makeDirectoryTree(String root, boolean writeNames){ for(TaxNode node : tree.nodes){ if(node!=null){ String dir=tree.toDir(node, root); File df=new File(dir); if(!df.exists()){df.mkdirs();} if(writeNames){ try { String fname=node.simpleName()+".name"; File nf=new File(fname); if(!nf.exists()){ ReadWrite.writeString(node.name, dir+fname); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } } /** Create read streams and process all data */ public void process(Timer t){ Timer t2=new Timer(); if(makeDirectories){ makeDirectoryTree(outPath, true); t2.stop("Finished making directories. "); t2.start(); } processInner(); t2.stop(); t2.stop("Finished writing data. "); //Do anything necessary after processing if(resultsFile!=null){ TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false); tsw.start(); for(TaxNode tn : nodes.keySet()){ Long data=nodes.get(tn); if(data==null){data=0L;} tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name); } errorState|=tsw.poisonAndWait(); } //Report timing and results { t.stop(); //Calculate units per nanosecond double rpnano=readsProcessed/(double)(t.elapsed); double lpnano=linesProcessed/(double)(t.elapsed); double bpnano=basesProcessed/(double)(t.elapsed); //Add "k" and "m" for large numbers String rpstring=Tools.padKM(readsProcessed, 8); String lpstring=Tools.padKM(linesProcessed, 8); String bpstring=Tools.padKM(basesProcessed, 8); String li="Lines In: \t"+linesProcessed+" lines"; String lo="Lines Out: \t"+linesOut+" lines"; while(lo.length()<li.length()){lo=lo+" ";} String ri="Reads In: \t"+readsProcessed+" reads"; String ro="Reads Out: \t"+readsOut+" reads"; while(ro.length()<ri.length()){ro=ro+" ";} outstream.println(ri+"\t"+basesProcessed+" bases"); outstream.println(ro+"\t"+basesOut+" bases"); outstream.println(li); outstream.println(lo); outstream.println(); outstream.println("Time: \t"+t); outstream.println("Reads Processed: "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000)); outstream.println("Lines Processed: "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000)); outstream.println("Bases Processed: "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000)); } //Throw an exception of there was an error in a thread if(errorState){ throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); } } /*--------------------------------------------------------------*/ /*---------------- Inner Methods ----------------*/ /*--------------------------------------------------------------*/ /** Iterate through the reads */ void processInner(){ ByteFile bf=ByteFile.makeByteFile(ffin1); TaxNode currentNode=null; long currentSize=0; ByteStreamWriter bsw=null; for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){ linesProcessed++; if(line.length>0){ final boolean header=(line[0]=='>'); if(header){ if(maxReads>0 && readsProcessed>=maxReads){break;} readsProcessed++; if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false); if(tn==null || tn!=currentNode){ if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;} } if(tn!=null && tn!=currentNode){ String dir=tree.toDir(tn, outPath); final boolean found=nodes.containsKey(tn); if(!found){nodes.put(tn, 0L);} FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false); bsw=new ByteStreamWriter(ff); bsw.start(); } currentNode=tn; currentSize=0; if(bsw!=null){readsOut++;} }else{ basesProcessed+=line.length; currentSize+=line.length; } if(bsw!=null){ linesOut++; if(!header){basesOut+=line.length;} bsw.println(line); } } } if(bsw!=null){ errorState=bsw.poisonAndWait()|errorState; bsw=null; if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} } bf.close(); } /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ /** Primary input file path */ private String in1=null; /** Primary output file path */ private String outPath=null; private String prefix; /** Override input file extension */ private String extin=null; /** For listing what is present in the output */ public String resultsFile=null; public String taxTreeFile=null; public boolean makeDirectories=true; public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>(); /*--------------------------------------------------------------*/ /** Number of reads processed */ protected long readsProcessed=0; /** Number of lines processed */ protected long linesProcessed=0; /** Number of bases processed */ protected long basesProcessed=0; /** Number of reads out */ public long readsOut=0; /** Number of lines out */ public long linesOut=0; /** Number of bases out */ public long basesOut=0; /** Quit after processing this many input reads; -1 means no limit */ private long maxReads=-1; /*--------------------------------------------------------------*/ /*---------------- Final Fields ----------------*/ /*--------------------------------------------------------------*/ /** Primary input file */ private final FileFormat ffin1; private final TaxTree tree; /*--------------------------------------------------------------*/ /*---------------- Common Fields ----------------*/ /*--------------------------------------------------------------*/ /** Print status messages to this output stream */ private PrintStream outstream=System.err; /** Print verbose messages */ public static boolean verbose=false; /** True if an error was encountered */ public boolean errorState=false; /** Overwrite existing output files */ private boolean overwrite=true; }