comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ExplodeTree.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package tax;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.LinkedHashMap;
6 import java.util.Locale;
7
8 import fileIO.ByteFile;
9 import fileIO.ByteStreamWriter;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import fileIO.TextStreamWriter;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.Shared;
17 import shared.Timer;
18 import shared.Tools;
19 import stream.FastaReadInputStream;
20
21 /**
22 * Constructs a directory and file tree of sequences
23 * corresponding to a taxonomic tree.
24 *
25 * @author Brian Bushnell
26 * @date December 12, 2017
27 *
28 */
29 public class ExplodeTree {
30
31 /*--------------------------------------------------------------*/
32 /*---------------- Initialization ----------------*/
33 /*--------------------------------------------------------------*/
34
35 /**
36 * Code entrance from the command line.
37 * @param args Command line arguments
38 */
39 public static void main(String[] args){
40 Timer t=new Timer();
41 ExplodeTree x=new ExplodeTree(args);
42 x.process(t);
43
44 //Close the print stream if it was redirected
45 Shared.closeStream(x.outstream);
46 }
47
48 /**
49 * Constructor.
50 * @param args Command line arguments
51 */
52 public ExplodeTree(String[] args){
53
54 {//Preparse block for help, config files, and outstream
55 PreParser pp=new PreParser(args, getClass(), false);
56 args=pp.args;
57 outstream=pp.outstream;
58 }
59
60 //Set shared static variables
61 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
62 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
63
64 //Create a parser object
65 Parser parser=new Parser();
66
67 //Parse each argument
68 for(int i=0; i<args.length; i++){
69 String arg=args[i];
70
71 //Break arguments into their constituent parts, in the form of "a=b"
72 String[] split=arg.split("=");
73 String a=split[0].toLowerCase();
74 String b=split.length>1 ? split[1] : null;
75
76 if(a.equals("verbose")){
77 verbose=Parse.parseBoolean(b);
78 }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){
79 outPath=b;
80 }else if(a.equals("prefix")){
81 prefix=b;
82 }else if(a.equals("results") || a.equals("result")){
83 resultsFile=b;
84 }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){
85 makeDirectories=Parse.parseBoolean(b);
86 }else if(a.equals("tree") || a.equals("taxtree")){
87 taxTreeFile=b;
88 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
89 //do nothing
90 }else{
91 outstream.println("Unknown parameter "+args[i]);
92 assert(false) : "Unknown parameter "+args[i];
93 // throw new RuntimeException("Unknown parameter "+args[i]);
94 }
95 }
96 if(prefix==null){prefix="";}
97 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
98
99 {//Process parser fields
100 Parser.processQuality();
101
102 maxReads=parser.maxReads;
103
104 overwrite=parser.overwrite;
105
106 in1=parser.in1;
107
108 extin=parser.extin;
109 }
110
111 if(outPath==null || outPath.trim().length()==0){outPath="";}
112 else{
113 outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/");
114 if(!outPath.endsWith("/")){outPath=outPath+"/";}
115 }
116
117 assert(FastaReadInputStream.settingsOK());
118
119 //Ensure there is an input file
120 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
121
122 //Adjust the number of threads for input file reading
123 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
124 ByteFile.FORCE_MODE_BF2=true;
125 }
126
127 //Ensure output files can be written
128 if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){
129 outstream.println(resultsFile);
130 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n");
131 }
132
133 //Ensure input files can be read
134 if(!Tools.testInputFiles(false, true, in1)){
135 throw new RuntimeException("\nCan't read some input files.\n");
136 }
137
138 //Ensure that no file was specified multiple times
139 if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){
140 throw new RuntimeException("\nSome file names were specified multiple times.\n");
141 }
142
143 //Create input FileFormat objects
144 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true);
145
146 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false);
147 }
148
149 /*--------------------------------------------------------------*/
150 /*---------------- Outer Methods ----------------*/
151 /*--------------------------------------------------------------*/
152
153 public void makeDirectoryTree(String root, boolean writeNames){
154 for(TaxNode node : tree.nodes){
155 if(node!=null){
156 String dir=tree.toDir(node, root);
157 File df=new File(dir);
158 if(!df.exists()){df.mkdirs();}
159 if(writeNames){
160 try {
161 String fname=node.simpleName()+".name";
162 File nf=new File(fname);
163 if(!nf.exists()){
164 ReadWrite.writeString(node.name, dir+fname);
165 }
166 } catch (Exception e) {
167 // TODO Auto-generated catch block
168 e.printStackTrace();
169 }
170 }
171 }
172 }
173 }
174
175 /** Create read streams and process all data */
176 public void process(Timer t){
177
178 Timer t2=new Timer();
179 if(makeDirectories){
180 makeDirectoryTree(outPath, true);
181 t2.stop("Finished making directories. ");
182 t2.start();
183 }
184 processInner();
185 t2.stop();
186 t2.stop("Finished writing data. ");
187
188 //Do anything necessary after processing
189
190 if(resultsFile!=null){
191 TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false);
192 tsw.start();
193 for(TaxNode tn : nodes.keySet()){
194 Long data=nodes.get(tn);
195 if(data==null){data=0L;}
196 tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name);
197 }
198 errorState|=tsw.poisonAndWait();
199 }
200
201 //Report timing and results
202 {
203 t.stop();
204
205 //Calculate units per nanosecond
206 double rpnano=readsProcessed/(double)(t.elapsed);
207 double lpnano=linesProcessed/(double)(t.elapsed);
208 double bpnano=basesProcessed/(double)(t.elapsed);
209
210 //Add "k" and "m" for large numbers
211 String rpstring=Tools.padKM(readsProcessed, 8);
212 String lpstring=Tools.padKM(linesProcessed, 8);
213 String bpstring=Tools.padKM(basesProcessed, 8);
214
215 String li="Lines In: \t"+linesProcessed+" lines";
216 String lo="Lines Out: \t"+linesOut+" lines";
217 while(lo.length()<li.length()){lo=lo+" ";}
218
219 String ri="Reads In: \t"+readsProcessed+" reads";
220 String ro="Reads Out: \t"+readsOut+" reads";
221 while(ro.length()<ri.length()){ro=ro+" ";}
222
223 outstream.println(ri+"\t"+basesProcessed+" bases");
224 outstream.println(ro+"\t"+basesOut+" bases");
225 outstream.println(li);
226 outstream.println(lo);
227 outstream.println();
228
229 outstream.println("Time: \t"+t);
230 outstream.println("Reads Processed: "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000));
231 outstream.println("Lines Processed: "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000));
232 outstream.println("Bases Processed: "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000));
233 }
234
235 //Throw an exception of there was an error in a thread
236 if(errorState){
237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
238 }
239 }
240
241 /*--------------------------------------------------------------*/
242 /*---------------- Inner Methods ----------------*/
243 /*--------------------------------------------------------------*/
244
245 /** Iterate through the reads */
246 void processInner(){
247 ByteFile bf=ByteFile.makeByteFile(ffin1);
248 TaxNode currentNode=null;
249 long currentSize=0;
250 ByteStreamWriter bsw=null;
251 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){
252 linesProcessed++;
253 if(line.length>0){
254 final boolean header=(line[0]=='>');
255 if(header){
256 if(maxReads>0 && readsProcessed>=maxReads){break;}
257 readsProcessed++;
258 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
259
260 final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false);
261
262 if(tn==null || tn!=currentNode){
263 if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;}
264 }
265 if(tn!=null && tn!=currentNode){
266 String dir=tree.toDir(tn, outPath);
267 final boolean found=nodes.containsKey(tn);
268 if(!found){nodes.put(tn, 0L);}
269 FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false);
270 bsw=new ByteStreamWriter(ff);
271 bsw.start();
272 }
273
274 currentNode=tn;
275 currentSize=0;
276 if(bsw!=null){readsOut++;}
277 }else{
278 basesProcessed+=line.length;
279 currentSize+=line.length;
280 }
281 if(bsw!=null){
282 linesOut++;
283 if(!header){basesOut+=line.length;}
284 bsw.println(line);
285 }
286 }
287 }
288 if(bsw!=null){
289 errorState=bsw.poisonAndWait()|errorState; bsw=null;
290 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);}
291 }
292 bf.close();
293 }
294
295 /*--------------------------------------------------------------*/
296 /*---------------- Fields ----------------*/
297 /*--------------------------------------------------------------*/
298
299 /** Primary input file path */
300 private String in1=null;
301
302 /** Primary output file path */
303 private String outPath=null;
304
305 private String prefix;
306
307 /** Override input file extension */
308 private String extin=null;
309
310 /** For listing what is present in the output */
311 public String resultsFile=null;
312
313 public String taxTreeFile=null;
314
315 public boolean makeDirectories=true;
316
317 public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>();
318
319 /*--------------------------------------------------------------*/
320
321 /** Number of reads processed */
322 protected long readsProcessed=0;
323 /** Number of lines processed */
324 protected long linesProcessed=0;
325 /** Number of bases processed */
326 protected long basesProcessed=0;
327
328 /** Number of reads out */
329 public long readsOut=0;
330 /** Number of lines out */
331 public long linesOut=0;
332 /** Number of bases out */
333 public long basesOut=0;
334
335 /** Quit after processing this many input reads; -1 means no limit */
336 private long maxReads=-1;
337
338 /*--------------------------------------------------------------*/
339 /*---------------- Final Fields ----------------*/
340 /*--------------------------------------------------------------*/
341
342 /** Primary input file */
343 private final FileFormat ffin1;
344
345 private final TaxTree tree;
346
347 /*--------------------------------------------------------------*/
348 /*---------------- Common Fields ----------------*/
349 /*--------------------------------------------------------------*/
350
351 /** Print status messages to this output stream */
352 private PrintStream outstream=System.err;
353 /** Print verbose messages */
354 public static boolean verbose=false;
355 /** True if an error was encountered */
356 public boolean errorState=false;
357 /** Overwrite existing output files */
358 private boolean overwrite=true;
359
360 }