Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ExplodeTree.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ExplodeTree.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,360 @@ +package tax; + +import java.io.File; +import java.io.PrintStream; +import java.util.LinkedHashMap; +import java.util.Locale; + +import fileIO.ByteFile; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import fileIO.TextStreamWriter; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import stream.FastaReadInputStream; + +/** + * Constructs a directory and file tree of sequences + * corresponding to a taxonomic tree. + * + * @author Brian Bushnell + * @date December 12, 2017 + * + */ +public class ExplodeTree { + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + Timer t=new Timer(); + ExplodeTree x=new ExplodeTree(args); + x.process(t); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + } + + /** + * Constructor. + * @param args Command line arguments + */ + public ExplodeTree(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, getClass(), false); + args=pp.args; + outstream=pp.outstream; + } + + //Set shared static variables + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + + //Create a parser object + Parser parser=new Parser(); + + //Parse each argument + for(int i=0; i<args.length; i++){ + String arg=args[i]; + + //Break arguments into their constituent parts, in the form of "a=b" + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(a.equals("verbose")){ + verbose=Parse.parseBoolean(b); + }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){ + outPath=b; + }else if(a.equals("prefix")){ + prefix=b; + }else if(a.equals("results") || a.equals("result")){ + resultsFile=b; + }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){ + makeDirectories=Parse.parseBoolean(b); + }else if(a.equals("tree") || a.equals("taxtree")){ + taxTreeFile=b; + }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser + //do nothing + }else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + // throw new RuntimeException("Unknown parameter "+args[i]); + } + } + if(prefix==null){prefix="";} + if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} + + {//Process parser fields + Parser.processQuality(); + + maxReads=parser.maxReads; + + overwrite=parser.overwrite; + + in1=parser.in1; + + extin=parser.extin; + } + + if(outPath==null || outPath.trim().length()==0){outPath="";} + else{ + outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/"); + if(!outPath.endsWith("/")){outPath=outPath+"/";} + } + + assert(FastaReadInputStream.settingsOK()); + + //Ensure there is an input file + if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} + + //Adjust the number of threads for input file reading + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ + ByteFile.FORCE_MODE_BF2=true; + } + + //Ensure output files can be written + if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){ + outstream.println(resultsFile); + throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n"); + } + + //Ensure input files can be read + if(!Tools.testInputFiles(false, true, in1)){ + throw new RuntimeException("\nCan't read some input files.\n"); + } + + //Ensure that no file was specified multiple times + if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){ + throw new RuntimeException("\nSome file names were specified multiple times.\n"); + } + + //Create input FileFormat objects + ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true); + + tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false); + } + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + public void makeDirectoryTree(String root, boolean writeNames){ + for(TaxNode node : tree.nodes){ + if(node!=null){ + String dir=tree.toDir(node, root); + File df=new File(dir); + if(!df.exists()){df.mkdirs();} + if(writeNames){ + try { + String fname=node.simpleName()+".name"; + File nf=new File(fname); + if(!nf.exists()){ + ReadWrite.writeString(node.name, dir+fname); + } + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + } + } + + /** Create read streams and process all data */ + public void process(Timer t){ + + Timer t2=new Timer(); + if(makeDirectories){ + makeDirectoryTree(outPath, true); + t2.stop("Finished making directories. "); + t2.start(); + } + processInner(); + t2.stop(); + t2.stop("Finished writing data. "); + + //Do anything necessary after processing + + if(resultsFile!=null){ + TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false); + tsw.start(); + for(TaxNode tn : nodes.keySet()){ + Long data=nodes.get(tn); + if(data==null){data=0L;} + tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name); + } + errorState|=tsw.poisonAndWait(); + } + + //Report timing and results + { + t.stop(); + + //Calculate units per nanosecond + double rpnano=readsProcessed/(double)(t.elapsed); + double lpnano=linesProcessed/(double)(t.elapsed); + double bpnano=basesProcessed/(double)(t.elapsed); + + //Add "k" and "m" for large numbers + String rpstring=Tools.padKM(readsProcessed, 8); + String lpstring=Tools.padKM(linesProcessed, 8); + String bpstring=Tools.padKM(basesProcessed, 8); + + String li="Lines In: \t"+linesProcessed+" lines"; + String lo="Lines Out: \t"+linesOut+" lines"; + while(lo.length()<li.length()){lo=lo+" ";} + + String ri="Reads In: \t"+readsProcessed+" reads"; + String ro="Reads Out: \t"+readsOut+" reads"; + while(ro.length()<ri.length()){ro=ro+" ";} + + outstream.println(ri+"\t"+basesProcessed+" bases"); + outstream.println(ro+"\t"+basesOut+" bases"); + outstream.println(li); + outstream.println(lo); + outstream.println(); + + outstream.println("Time: \t"+t); + outstream.println("Reads Processed: "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000)); + outstream.println("Lines Processed: "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000)); + outstream.println("Bases Processed: "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000)); + } + + //Throw an exception of there was an error in a thread + if(errorState){ + throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + /** Iterate through the reads */ + void processInner(){ + ByteFile bf=ByteFile.makeByteFile(ffin1); + TaxNode currentNode=null; + long currentSize=0; + ByteStreamWriter bsw=null; + for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){ + linesProcessed++; + if(line.length>0){ + final boolean header=(line[0]=='>'); + if(header){ + if(maxReads>0 && readsProcessed>=maxReads){break;} + readsProcessed++; + if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} + + final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false); + + if(tn==null || tn!=currentNode){ + if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;} + } + if(tn!=null && tn!=currentNode){ + String dir=tree.toDir(tn, outPath); + final boolean found=nodes.containsKey(tn); + if(!found){nodes.put(tn, 0L);} + FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false); + bsw=new ByteStreamWriter(ff); + bsw.start(); + } + + currentNode=tn; + currentSize=0; + if(bsw!=null){readsOut++;} + }else{ + basesProcessed+=line.length; + currentSize+=line.length; + } + if(bsw!=null){ + linesOut++; + if(!header){basesOut+=line.length;} + bsw.println(line); + } + } + } + if(bsw!=null){ + errorState=bsw.poisonAndWait()|errorState; bsw=null; + if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} + } + bf.close(); + } + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Primary input file path */ + private String in1=null; + + /** Primary output file path */ + private String outPath=null; + + private String prefix; + + /** Override input file extension */ + private String extin=null; + + /** For listing what is present in the output */ + public String resultsFile=null; + + public String taxTreeFile=null; + + public boolean makeDirectories=true; + + public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>(); + + /*--------------------------------------------------------------*/ + + /** Number of reads processed */ + protected long readsProcessed=0; + /** Number of lines processed */ + protected long linesProcessed=0; + /** Number of bases processed */ + protected long basesProcessed=0; + + /** Number of reads out */ + public long readsOut=0; + /** Number of lines out */ + public long linesOut=0; + /** Number of bases out */ + public long basesOut=0; + + /** Quit after processing this many input reads; -1 means no limit */ + private long maxReads=-1; + + /*--------------------------------------------------------------*/ + /*---------------- Final Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Primary input file */ + private final FileFormat ffin1; + + private final TaxTree tree; + + /*--------------------------------------------------------------*/ + /*---------------- Common Fields ----------------*/ + /*--------------------------------------------------------------*/ + + /** Print status messages to this output stream */ + private PrintStream outstream=System.err; + /** Print verbose messages */ + public static boolean verbose=false; + /** True if an error was encountered */ + public boolean errorState=false; + /** Overwrite existing output files */ + private boolean overwrite=true; + +}