jpayne@68: package tax; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.LinkedHashMap; jpayne@68: import java.util.Locale; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import fileIO.TextStreamWriter; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.FastaReadInputStream; jpayne@68: jpayne@68: /** jpayne@68: * Constructs a directory and file tree of sequences jpayne@68: * corresponding to a taxonomic tree. jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date December 12, 2017 jpayne@68: * jpayne@68: */ jpayne@68: public class ExplodeTree { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: Timer t=new Timer(); jpayne@68: ExplodeTree x=new ExplodeTree(args); jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public ExplodeTree(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: jpayne@68: //Create a parser object jpayne@68: Parser parser=new Parser(); jpayne@68: jpayne@68: //Parse each argument jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){ jpayne@68: outPath=b; jpayne@68: }else if(a.equals("prefix")){ jpayne@68: prefix=b; jpayne@68: }else if(a.equals("results") || a.equals("result")){ jpayne@68: resultsFile=b; jpayne@68: }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){ jpayne@68: makeDirectories=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("tree") || a.equals("taxtree")){ jpayne@68: taxTreeFile=b; jpayne@68: }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: if(prefix==null){prefix="";} jpayne@68: if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} jpayne@68: jpayne@68: {//Process parser fields jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: maxReads=parser.maxReads; jpayne@68: jpayne@68: overwrite=parser.overwrite; jpayne@68: jpayne@68: in1=parser.in1; jpayne@68: jpayne@68: extin=parser.extin; jpayne@68: } jpayne@68: jpayne@68: if(outPath==null || outPath.trim().length()==0){outPath="";} jpayne@68: else{ jpayne@68: outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/"); jpayne@68: if(!outPath.endsWith("/")){outPath=outPath+"/";} jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: jpayne@68: //Ensure there is an input file jpayne@68: if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: //Ensure output files can be written jpayne@68: if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){ jpayne@68: outstream.println(resultsFile); jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: if(!Tools.testInputFiles(false, true, in1)){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure that no file was specified multiple times jpayne@68: if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){ jpayne@68: throw new RuntimeException("\nSome file names were specified multiple times.\n"); jpayne@68: } jpayne@68: jpayne@68: //Create input FileFormat objects jpayne@68: ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true); jpayne@68: jpayne@68: tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public void makeDirectoryTree(String root, boolean writeNames){ jpayne@68: for(TaxNode node : tree.nodes){ jpayne@68: if(node!=null){ jpayne@68: String dir=tree.toDir(node, root); jpayne@68: File df=new File(dir); jpayne@68: if(!df.exists()){df.mkdirs();} jpayne@68: if(writeNames){ jpayne@68: try { jpayne@68: String fname=node.simpleName()+".name"; jpayne@68: File nf=new File(fname); jpayne@68: if(!nf.exists()){ jpayne@68: ReadWrite.writeString(node.name, dir+fname); jpayne@68: } jpayne@68: } catch (Exception e) { jpayne@68: // TODO Auto-generated catch block jpayne@68: e.printStackTrace(); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** Create read streams and process all data */ jpayne@68: public void process(Timer t){ jpayne@68: jpayne@68: Timer t2=new Timer(); jpayne@68: if(makeDirectories){ jpayne@68: makeDirectoryTree(outPath, true); jpayne@68: t2.stop("Finished making directories. "); jpayne@68: t2.start(); jpayne@68: } jpayne@68: processInner(); jpayne@68: t2.stop(); jpayne@68: t2.stop("Finished writing data. "); jpayne@68: jpayne@68: //Do anything necessary after processing jpayne@68: jpayne@68: if(resultsFile!=null){ jpayne@68: TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false); jpayne@68: tsw.start(); jpayne@68: for(TaxNode tn : nodes.keySet()){ jpayne@68: Long data=nodes.get(tn); jpayne@68: if(data==null){data=0L;} jpayne@68: tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name); jpayne@68: } jpayne@68: errorState|=tsw.poisonAndWait(); jpayne@68: } jpayne@68: jpayne@68: //Report timing and results jpayne@68: { jpayne@68: t.stop(); jpayne@68: jpayne@68: //Calculate units per nanosecond jpayne@68: double rpnano=readsProcessed/(double)(t.elapsed); jpayne@68: double lpnano=linesProcessed/(double)(t.elapsed); jpayne@68: double bpnano=basesProcessed/(double)(t.elapsed); jpayne@68: jpayne@68: //Add "k" and "m" for large numbers jpayne@68: String rpstring=Tools.padKM(readsProcessed, 8); jpayne@68: String lpstring=Tools.padKM(linesProcessed, 8); jpayne@68: String bpstring=Tools.padKM(basesProcessed, 8); jpayne@68: jpayne@68: String li="Lines In: \t"+linesProcessed+" lines"; jpayne@68: String lo="Lines Out: \t"+linesOut+" lines"; jpayne@68: while(lo.length()0){ jpayne@68: final boolean header=(line[0]=='>'); jpayne@68: if(header){ jpayne@68: if(maxReads>0 && readsProcessed>=maxReads){break;} jpayne@68: readsProcessed++; jpayne@68: if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} jpayne@68: jpayne@68: final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false); jpayne@68: jpayne@68: if(tn==null || tn!=currentNode){ jpayne@68: if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;} jpayne@68: } jpayne@68: if(tn!=null && tn!=currentNode){ jpayne@68: String dir=tree.toDir(tn, outPath); jpayne@68: final boolean found=nodes.containsKey(tn); jpayne@68: if(!found){nodes.put(tn, 0L);} jpayne@68: FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false); jpayne@68: bsw=new ByteStreamWriter(ff); jpayne@68: bsw.start(); jpayne@68: } jpayne@68: jpayne@68: currentNode=tn; jpayne@68: currentSize=0; jpayne@68: if(bsw!=null){readsOut++;} jpayne@68: }else{ jpayne@68: basesProcessed+=line.length; jpayne@68: currentSize+=line.length; jpayne@68: } jpayne@68: if(bsw!=null){ jpayne@68: linesOut++; jpayne@68: if(!header){basesOut+=line.length;} jpayne@68: bsw.println(line); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: if(bsw!=null){ jpayne@68: errorState=bsw.poisonAndWait()|errorState; bsw=null; jpayne@68: if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} jpayne@68: } jpayne@68: bf.close(); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Primary input file path */ jpayne@68: private String in1=null; jpayne@68: jpayne@68: /** Primary output file path */ jpayne@68: private String outPath=null; jpayne@68: jpayne@68: private String prefix; jpayne@68: jpayne@68: /** Override input file extension */ jpayne@68: private String extin=null; jpayne@68: jpayne@68: /** For listing what is present in the output */ jpayne@68: public String resultsFile=null; jpayne@68: jpayne@68: public String taxTreeFile=null; jpayne@68: jpayne@68: public boolean makeDirectories=true; jpayne@68: jpayne@68: public LinkedHashMap nodes=new LinkedHashMap(); jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Number of reads processed */ jpayne@68: protected long readsProcessed=0; jpayne@68: /** Number of lines processed */ jpayne@68: protected long linesProcessed=0; jpayne@68: /** Number of bases processed */ jpayne@68: protected long basesProcessed=0; jpayne@68: jpayne@68: /** Number of reads out */ jpayne@68: public long readsOut=0; jpayne@68: /** Number of lines out */ jpayne@68: public long linesOut=0; jpayne@68: /** Number of bases out */ jpayne@68: public long basesOut=0; jpayne@68: jpayne@68: /** Quit after processing this many input reads; -1 means no limit */ jpayne@68: private long maxReads=-1; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Primary input file */ jpayne@68: private final FileFormat ffin1; jpayne@68: jpayne@68: private final TaxTree tree; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Print status messages to this output stream */ jpayne@68: private PrintStream outstream=System.err; jpayne@68: /** Print verbose messages */ jpayne@68: public static boolean verbose=false; jpayne@68: /** True if an error was encountered */ jpayne@68: public boolean errorState=false; jpayne@68: /** Overwrite existing output files */ jpayne@68: private boolean overwrite=true; jpayne@68: jpayne@68: }