jpayne@68
|
1 package tax;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.LinkedHashMap;
|
jpayne@68
|
6 import java.util.Locale;
|
jpayne@68
|
7
|
jpayne@68
|
8 import fileIO.ByteFile;
|
jpayne@68
|
9 import fileIO.ByteStreamWriter;
|
jpayne@68
|
10 import fileIO.FileFormat;
|
jpayne@68
|
11 import fileIO.ReadWrite;
|
jpayne@68
|
12 import fileIO.TextStreamWriter;
|
jpayne@68
|
13 import shared.Parse;
|
jpayne@68
|
14 import shared.Parser;
|
jpayne@68
|
15 import shared.PreParser;
|
jpayne@68
|
16 import shared.Shared;
|
jpayne@68
|
17 import shared.Timer;
|
jpayne@68
|
18 import shared.Tools;
|
jpayne@68
|
19 import stream.FastaReadInputStream;
|
jpayne@68
|
20
|
jpayne@68
|
21 /**
|
jpayne@68
|
22 * Constructs a directory and file tree of sequences
|
jpayne@68
|
23 * corresponding to a taxonomic tree.
|
jpayne@68
|
24 *
|
jpayne@68
|
25 * @author Brian Bushnell
|
jpayne@68
|
26 * @date December 12, 2017
|
jpayne@68
|
27 *
|
jpayne@68
|
28 */
|
jpayne@68
|
29 public class ExplodeTree {
|
jpayne@68
|
30
|
jpayne@68
|
31 /*--------------------------------------------------------------*/
|
jpayne@68
|
32 /*---------------- Initialization ----------------*/
|
jpayne@68
|
33 /*--------------------------------------------------------------*/
|
jpayne@68
|
34
|
jpayne@68
|
35 /**
|
jpayne@68
|
36 * Code entrance from the command line.
|
jpayne@68
|
37 * @param args Command line arguments
|
jpayne@68
|
38 */
|
jpayne@68
|
39 public static void main(String[] args){
|
jpayne@68
|
40 Timer t=new Timer();
|
jpayne@68
|
41 ExplodeTree x=new ExplodeTree(args);
|
jpayne@68
|
42 x.process(t);
|
jpayne@68
|
43
|
jpayne@68
|
44 //Close the print stream if it was redirected
|
jpayne@68
|
45 Shared.closeStream(x.outstream);
|
jpayne@68
|
46 }
|
jpayne@68
|
47
|
jpayne@68
|
48 /**
|
jpayne@68
|
49 * Constructor.
|
jpayne@68
|
50 * @param args Command line arguments
|
jpayne@68
|
51 */
|
jpayne@68
|
52 public ExplodeTree(String[] args){
|
jpayne@68
|
53
|
jpayne@68
|
54 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
55 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
56 args=pp.args;
|
jpayne@68
|
57 outstream=pp.outstream;
|
jpayne@68
|
58 }
|
jpayne@68
|
59
|
jpayne@68
|
60 //Set shared static variables
|
jpayne@68
|
61 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
62 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
63
|
jpayne@68
|
64 //Create a parser object
|
jpayne@68
|
65 Parser parser=new Parser();
|
jpayne@68
|
66
|
jpayne@68
|
67 //Parse each argument
|
jpayne@68
|
68 for(int i=0; i<args.length; i++){
|
jpayne@68
|
69 String arg=args[i];
|
jpayne@68
|
70
|
jpayne@68
|
71 //Break arguments into their constituent parts, in the form of "a=b"
|
jpayne@68
|
72 String[] split=arg.split("=");
|
jpayne@68
|
73 String a=split[0].toLowerCase();
|
jpayne@68
|
74 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
75
|
jpayne@68
|
76 if(a.equals("verbose")){
|
jpayne@68
|
77 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
78 }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){
|
jpayne@68
|
79 outPath=b;
|
jpayne@68
|
80 }else if(a.equals("prefix")){
|
jpayne@68
|
81 prefix=b;
|
jpayne@68
|
82 }else if(a.equals("results") || a.equals("result")){
|
jpayne@68
|
83 resultsFile=b;
|
jpayne@68
|
84 }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){
|
jpayne@68
|
85 makeDirectories=Parse.parseBoolean(b);
|
jpayne@68
|
86 }else if(a.equals("tree") || a.equals("taxtree")){
|
jpayne@68
|
87 taxTreeFile=b;
|
jpayne@68
|
88 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
|
jpayne@68
|
89 //do nothing
|
jpayne@68
|
90 }else{
|
jpayne@68
|
91 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
92 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
93 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
94 }
|
jpayne@68
|
95 }
|
jpayne@68
|
96 if(prefix==null){prefix="";}
|
jpayne@68
|
97 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
|
jpayne@68
|
98
|
jpayne@68
|
99 {//Process parser fields
|
jpayne@68
|
100 Parser.processQuality();
|
jpayne@68
|
101
|
jpayne@68
|
102 maxReads=parser.maxReads;
|
jpayne@68
|
103
|
jpayne@68
|
104 overwrite=parser.overwrite;
|
jpayne@68
|
105
|
jpayne@68
|
106 in1=parser.in1;
|
jpayne@68
|
107
|
jpayne@68
|
108 extin=parser.extin;
|
jpayne@68
|
109 }
|
jpayne@68
|
110
|
jpayne@68
|
111 if(outPath==null || outPath.trim().length()==0){outPath="";}
|
jpayne@68
|
112 else{
|
jpayne@68
|
113 outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/");
|
jpayne@68
|
114 if(!outPath.endsWith("/")){outPath=outPath+"/";}
|
jpayne@68
|
115 }
|
jpayne@68
|
116
|
jpayne@68
|
117 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
118
|
jpayne@68
|
119 //Ensure there is an input file
|
jpayne@68
|
120 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
121
|
jpayne@68
|
122 //Adjust the number of threads for input file reading
|
jpayne@68
|
123 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
124 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
125 }
|
jpayne@68
|
126
|
jpayne@68
|
127 //Ensure output files can be written
|
jpayne@68
|
128 if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){
|
jpayne@68
|
129 outstream.println(resultsFile);
|
jpayne@68
|
130 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n");
|
jpayne@68
|
131 }
|
jpayne@68
|
132
|
jpayne@68
|
133 //Ensure input files can be read
|
jpayne@68
|
134 if(!Tools.testInputFiles(false, true, in1)){
|
jpayne@68
|
135 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
136 }
|
jpayne@68
|
137
|
jpayne@68
|
138 //Ensure that no file was specified multiple times
|
jpayne@68
|
139 if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){
|
jpayne@68
|
140 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
141 }
|
jpayne@68
|
142
|
jpayne@68
|
143 //Create input FileFormat objects
|
jpayne@68
|
144 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true);
|
jpayne@68
|
145
|
jpayne@68
|
146 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false);
|
jpayne@68
|
147 }
|
jpayne@68
|
148
|
jpayne@68
|
149 /*--------------------------------------------------------------*/
|
jpayne@68
|
150 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
151 /*--------------------------------------------------------------*/
|
jpayne@68
|
152
|
jpayne@68
|
153 public void makeDirectoryTree(String root, boolean writeNames){
|
jpayne@68
|
154 for(TaxNode node : tree.nodes){
|
jpayne@68
|
155 if(node!=null){
|
jpayne@68
|
156 String dir=tree.toDir(node, root);
|
jpayne@68
|
157 File df=new File(dir);
|
jpayne@68
|
158 if(!df.exists()){df.mkdirs();}
|
jpayne@68
|
159 if(writeNames){
|
jpayne@68
|
160 try {
|
jpayne@68
|
161 String fname=node.simpleName()+".name";
|
jpayne@68
|
162 File nf=new File(fname);
|
jpayne@68
|
163 if(!nf.exists()){
|
jpayne@68
|
164 ReadWrite.writeString(node.name, dir+fname);
|
jpayne@68
|
165 }
|
jpayne@68
|
166 } catch (Exception e) {
|
jpayne@68
|
167 // TODO Auto-generated catch block
|
jpayne@68
|
168 e.printStackTrace();
|
jpayne@68
|
169 }
|
jpayne@68
|
170 }
|
jpayne@68
|
171 }
|
jpayne@68
|
172 }
|
jpayne@68
|
173 }
|
jpayne@68
|
174
|
jpayne@68
|
175 /** Create read streams and process all data */
|
jpayne@68
|
176 public void process(Timer t){
|
jpayne@68
|
177
|
jpayne@68
|
178 Timer t2=new Timer();
|
jpayne@68
|
179 if(makeDirectories){
|
jpayne@68
|
180 makeDirectoryTree(outPath, true);
|
jpayne@68
|
181 t2.stop("Finished making directories. ");
|
jpayne@68
|
182 t2.start();
|
jpayne@68
|
183 }
|
jpayne@68
|
184 processInner();
|
jpayne@68
|
185 t2.stop();
|
jpayne@68
|
186 t2.stop("Finished writing data. ");
|
jpayne@68
|
187
|
jpayne@68
|
188 //Do anything necessary after processing
|
jpayne@68
|
189
|
jpayne@68
|
190 if(resultsFile!=null){
|
jpayne@68
|
191 TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false);
|
jpayne@68
|
192 tsw.start();
|
jpayne@68
|
193 for(TaxNode tn : nodes.keySet()){
|
jpayne@68
|
194 Long data=nodes.get(tn);
|
jpayne@68
|
195 if(data==null){data=0L;}
|
jpayne@68
|
196 tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name);
|
jpayne@68
|
197 }
|
jpayne@68
|
198 errorState|=tsw.poisonAndWait();
|
jpayne@68
|
199 }
|
jpayne@68
|
200
|
jpayne@68
|
201 //Report timing and results
|
jpayne@68
|
202 {
|
jpayne@68
|
203 t.stop();
|
jpayne@68
|
204
|
jpayne@68
|
205 //Calculate units per nanosecond
|
jpayne@68
|
206 double rpnano=readsProcessed/(double)(t.elapsed);
|
jpayne@68
|
207 double lpnano=linesProcessed/(double)(t.elapsed);
|
jpayne@68
|
208 double bpnano=basesProcessed/(double)(t.elapsed);
|
jpayne@68
|
209
|
jpayne@68
|
210 //Add "k" and "m" for large numbers
|
jpayne@68
|
211 String rpstring=Tools.padKM(readsProcessed, 8);
|
jpayne@68
|
212 String lpstring=Tools.padKM(linesProcessed, 8);
|
jpayne@68
|
213 String bpstring=Tools.padKM(basesProcessed, 8);
|
jpayne@68
|
214
|
jpayne@68
|
215 String li="Lines In: \t"+linesProcessed+" lines";
|
jpayne@68
|
216 String lo="Lines Out: \t"+linesOut+" lines";
|
jpayne@68
|
217 while(lo.length()<li.length()){lo=lo+" ";}
|
jpayne@68
|
218
|
jpayne@68
|
219 String ri="Reads In: \t"+readsProcessed+" reads";
|
jpayne@68
|
220 String ro="Reads Out: \t"+readsOut+" reads";
|
jpayne@68
|
221 while(ro.length()<ri.length()){ro=ro+" ";}
|
jpayne@68
|
222
|
jpayne@68
|
223 outstream.println(ri+"\t"+basesProcessed+" bases");
|
jpayne@68
|
224 outstream.println(ro+"\t"+basesOut+" bases");
|
jpayne@68
|
225 outstream.println(li);
|
jpayne@68
|
226 outstream.println(lo);
|
jpayne@68
|
227 outstream.println();
|
jpayne@68
|
228
|
jpayne@68
|
229 outstream.println("Time: \t"+t);
|
jpayne@68
|
230 outstream.println("Reads Processed: "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000));
|
jpayne@68
|
231 outstream.println("Lines Processed: "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000));
|
jpayne@68
|
232 outstream.println("Bases Processed: "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000));
|
jpayne@68
|
233 }
|
jpayne@68
|
234
|
jpayne@68
|
235 //Throw an exception of there was an error in a thread
|
jpayne@68
|
236 if(errorState){
|
jpayne@68
|
237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
238 }
|
jpayne@68
|
239 }
|
jpayne@68
|
240
|
jpayne@68
|
241 /*--------------------------------------------------------------*/
|
jpayne@68
|
242 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
243 /*--------------------------------------------------------------*/
|
jpayne@68
|
244
|
jpayne@68
|
245 /** Iterate through the reads */
|
jpayne@68
|
246 void processInner(){
|
jpayne@68
|
247 ByteFile bf=ByteFile.makeByteFile(ffin1);
|
jpayne@68
|
248 TaxNode currentNode=null;
|
jpayne@68
|
249 long currentSize=0;
|
jpayne@68
|
250 ByteStreamWriter bsw=null;
|
jpayne@68
|
251 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){
|
jpayne@68
|
252 linesProcessed++;
|
jpayne@68
|
253 if(line.length>0){
|
jpayne@68
|
254 final boolean header=(line[0]=='>');
|
jpayne@68
|
255 if(header){
|
jpayne@68
|
256 if(maxReads>0 && readsProcessed>=maxReads){break;}
|
jpayne@68
|
257 readsProcessed++;
|
jpayne@68
|
258 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
|
jpayne@68
|
259
|
jpayne@68
|
260 final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false);
|
jpayne@68
|
261
|
jpayne@68
|
262 if(tn==null || tn!=currentNode){
|
jpayne@68
|
263 if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;}
|
jpayne@68
|
264 }
|
jpayne@68
|
265 if(tn!=null && tn!=currentNode){
|
jpayne@68
|
266 String dir=tree.toDir(tn, outPath);
|
jpayne@68
|
267 final boolean found=nodes.containsKey(tn);
|
jpayne@68
|
268 if(!found){nodes.put(tn, 0L);}
|
jpayne@68
|
269 FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false);
|
jpayne@68
|
270 bsw=new ByteStreamWriter(ff);
|
jpayne@68
|
271 bsw.start();
|
jpayne@68
|
272 }
|
jpayne@68
|
273
|
jpayne@68
|
274 currentNode=tn;
|
jpayne@68
|
275 currentSize=0;
|
jpayne@68
|
276 if(bsw!=null){readsOut++;}
|
jpayne@68
|
277 }else{
|
jpayne@68
|
278 basesProcessed+=line.length;
|
jpayne@68
|
279 currentSize+=line.length;
|
jpayne@68
|
280 }
|
jpayne@68
|
281 if(bsw!=null){
|
jpayne@68
|
282 linesOut++;
|
jpayne@68
|
283 if(!header){basesOut+=line.length;}
|
jpayne@68
|
284 bsw.println(line);
|
jpayne@68
|
285 }
|
jpayne@68
|
286 }
|
jpayne@68
|
287 }
|
jpayne@68
|
288 if(bsw!=null){
|
jpayne@68
|
289 errorState=bsw.poisonAndWait()|errorState; bsw=null;
|
jpayne@68
|
290 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
|
jpayne@68
|
291 }
|
jpayne@68
|
292 bf.close();
|
jpayne@68
|
293 }
|
jpayne@68
|
294
|
jpayne@68
|
295 /*--------------------------------------------------------------*/
|
jpayne@68
|
296 /*---------------- Fields ----------------*/
|
jpayne@68
|
297 /*--------------------------------------------------------------*/
|
jpayne@68
|
298
|
jpayne@68
|
299 /** Primary input file path */
|
jpayne@68
|
300 private String in1=null;
|
jpayne@68
|
301
|
jpayne@68
|
302 /** Primary output file path */
|
jpayne@68
|
303 private String outPath=null;
|
jpayne@68
|
304
|
jpayne@68
|
305 private String prefix;
|
jpayne@68
|
306
|
jpayne@68
|
307 /** Override input file extension */
|
jpayne@68
|
308 private String extin=null;
|
jpayne@68
|
309
|
jpayne@68
|
310 /** For listing what is present in the output */
|
jpayne@68
|
311 public String resultsFile=null;
|
jpayne@68
|
312
|
jpayne@68
|
313 public String taxTreeFile=null;
|
jpayne@68
|
314
|
jpayne@68
|
315 public boolean makeDirectories=true;
|
jpayne@68
|
316
|
jpayne@68
|
317 public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>();
|
jpayne@68
|
318
|
jpayne@68
|
319 /*--------------------------------------------------------------*/
|
jpayne@68
|
320
|
jpayne@68
|
321 /** Number of reads processed */
|
jpayne@68
|
322 protected long readsProcessed=0;
|
jpayne@68
|
323 /** Number of lines processed */
|
jpayne@68
|
324 protected long linesProcessed=0;
|
jpayne@68
|
325 /** Number of bases processed */
|
jpayne@68
|
326 protected long basesProcessed=0;
|
jpayne@68
|
327
|
jpayne@68
|
328 /** Number of reads out */
|
jpayne@68
|
329 public long readsOut=0;
|
jpayne@68
|
330 /** Number of lines out */
|
jpayne@68
|
331 public long linesOut=0;
|
jpayne@68
|
332 /** Number of bases out */
|
jpayne@68
|
333 public long basesOut=0;
|
jpayne@68
|
334
|
jpayne@68
|
335 /** Quit after processing this many input reads; -1 means no limit */
|
jpayne@68
|
336 private long maxReads=-1;
|
jpayne@68
|
337
|
jpayne@68
|
338 /*--------------------------------------------------------------*/
|
jpayne@68
|
339 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
340 /*--------------------------------------------------------------*/
|
jpayne@68
|
341
|
jpayne@68
|
342 /** Primary input file */
|
jpayne@68
|
343 private final FileFormat ffin1;
|
jpayne@68
|
344
|
jpayne@68
|
345 private final TaxTree tree;
|
jpayne@68
|
346
|
jpayne@68
|
347 /*--------------------------------------------------------------*/
|
jpayne@68
|
348 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
349 /*--------------------------------------------------------------*/
|
jpayne@68
|
350
|
jpayne@68
|
351 /** Print status messages to this output stream */
|
jpayne@68
|
352 private PrintStream outstream=System.err;
|
jpayne@68
|
353 /** Print verbose messages */
|
jpayne@68
|
354 public static boolean verbose=false;
|
jpayne@68
|
355 /** True if an error was encountered */
|
jpayne@68
|
356 public boolean errorState=false;
|
jpayne@68
|
357 /** Overwrite existing output files */
|
jpayne@68
|
358 private boolean overwrite=true;
|
jpayne@68
|
359
|
jpayne@68
|
360 }
|