Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ExplodeTree.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package tax; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.LinkedHashMap; | |
6 import java.util.Locale; | |
7 | |
8 import fileIO.ByteFile; | |
9 import fileIO.ByteStreamWriter; | |
10 import fileIO.FileFormat; | |
11 import fileIO.ReadWrite; | |
12 import fileIO.TextStreamWriter; | |
13 import shared.Parse; | |
14 import shared.Parser; | |
15 import shared.PreParser; | |
16 import shared.Shared; | |
17 import shared.Timer; | |
18 import shared.Tools; | |
19 import stream.FastaReadInputStream; | |
20 | |
21 /** | |
22 * Constructs a directory and file tree of sequences | |
23 * corresponding to a taxonomic tree. | |
24 * | |
25 * @author Brian Bushnell | |
26 * @date December 12, 2017 | |
27 * | |
28 */ | |
29 public class ExplodeTree { | |
30 | |
31 /*--------------------------------------------------------------*/ | |
32 /*---------------- Initialization ----------------*/ | |
33 /*--------------------------------------------------------------*/ | |
34 | |
35 /** | |
36 * Code entrance from the command line. | |
37 * @param args Command line arguments | |
38 */ | |
39 public static void main(String[] args){ | |
40 Timer t=new Timer(); | |
41 ExplodeTree x=new ExplodeTree(args); | |
42 x.process(t); | |
43 | |
44 //Close the print stream if it was redirected | |
45 Shared.closeStream(x.outstream); | |
46 } | |
47 | |
48 /** | |
49 * Constructor. | |
50 * @param args Command line arguments | |
51 */ | |
52 public ExplodeTree(String[] args){ | |
53 | |
54 {//Preparse block for help, config files, and outstream | |
55 PreParser pp=new PreParser(args, getClass(), false); | |
56 args=pp.args; | |
57 outstream=pp.outstream; | |
58 } | |
59 | |
60 //Set shared static variables | |
61 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
62 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
63 | |
64 //Create a parser object | |
65 Parser parser=new Parser(); | |
66 | |
67 //Parse each argument | |
68 for(int i=0; i<args.length; i++){ | |
69 String arg=args[i]; | |
70 | |
71 //Break arguments into their constituent parts, in the form of "a=b" | |
72 String[] split=arg.split("="); | |
73 String a=split[0].toLowerCase(); | |
74 String b=split.length>1 ? split[1] : null; | |
75 | |
76 if(a.equals("verbose")){ | |
77 verbose=Parse.parseBoolean(b); | |
78 }else if(a.equals("out") || a.equals("path") || a.equals("outpath")){ | |
79 outPath=b; | |
80 }else if(a.equals("prefix")){ | |
81 prefix=b; | |
82 }else if(a.equals("results") || a.equals("result")){ | |
83 resultsFile=b; | |
84 }else if(a.equals("makedirectories") || a.equals("mkdirs") || a.equals("mkdir")){ | |
85 makeDirectories=Parse.parseBoolean(b); | |
86 }else if(a.equals("tree") || a.equals("taxtree")){ | |
87 taxTreeFile=b; | |
88 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser | |
89 //do nothing | |
90 }else{ | |
91 outstream.println("Unknown parameter "+args[i]); | |
92 assert(false) : "Unknown parameter "+args[i]; | |
93 // throw new RuntimeException("Unknown parameter "+args[i]); | |
94 } | |
95 } | |
96 if(prefix==null){prefix="";} | |
97 if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} | |
98 | |
99 {//Process parser fields | |
100 Parser.processQuality(); | |
101 | |
102 maxReads=parser.maxReads; | |
103 | |
104 overwrite=parser.overwrite; | |
105 | |
106 in1=parser.in1; | |
107 | |
108 extin=parser.extin; | |
109 } | |
110 | |
111 if(outPath==null || outPath.trim().length()==0){outPath="";} | |
112 else{ | |
113 outPath=outPath.trim().replace('\\', '/').replaceAll("/+", "/"); | |
114 if(!outPath.endsWith("/")){outPath=outPath+"/";} | |
115 } | |
116 | |
117 assert(FastaReadInputStream.settingsOK()); | |
118 | |
119 //Ensure there is an input file | |
120 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} | |
121 | |
122 //Adjust the number of threads for input file reading | |
123 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
124 ByteFile.FORCE_MODE_BF2=true; | |
125 } | |
126 | |
127 //Ensure output files can be written | |
128 if(!Tools.testOutputFiles(overwrite, false, false, resultsFile)){ | |
129 outstream.println(resultsFile); | |
130 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+resultsFile+"\n"); | |
131 } | |
132 | |
133 //Ensure input files can be read | |
134 if(!Tools.testInputFiles(false, true, in1)){ | |
135 throw new RuntimeException("\nCan't read some input files.\n"); | |
136 } | |
137 | |
138 //Ensure that no file was specified multiple times | |
139 if(!Tools.testForDuplicateFiles(true, in1, resultsFile)){ | |
140 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
141 } | |
142 | |
143 //Create input FileFormat objects | |
144 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, extin, true, true); | |
145 | |
146 tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false); | |
147 } | |
148 | |
149 /*--------------------------------------------------------------*/ | |
150 /*---------------- Outer Methods ----------------*/ | |
151 /*--------------------------------------------------------------*/ | |
152 | |
153 public void makeDirectoryTree(String root, boolean writeNames){ | |
154 for(TaxNode node : tree.nodes){ | |
155 if(node!=null){ | |
156 String dir=tree.toDir(node, root); | |
157 File df=new File(dir); | |
158 if(!df.exists()){df.mkdirs();} | |
159 if(writeNames){ | |
160 try { | |
161 String fname=node.simpleName()+".name"; | |
162 File nf=new File(fname); | |
163 if(!nf.exists()){ | |
164 ReadWrite.writeString(node.name, dir+fname); | |
165 } | |
166 } catch (Exception e) { | |
167 // TODO Auto-generated catch block | |
168 e.printStackTrace(); | |
169 } | |
170 } | |
171 } | |
172 } | |
173 } | |
174 | |
175 /** Create read streams and process all data */ | |
176 public void process(Timer t){ | |
177 | |
178 Timer t2=new Timer(); | |
179 if(makeDirectories){ | |
180 makeDirectoryTree(outPath, true); | |
181 t2.stop("Finished making directories. "); | |
182 t2.start(); | |
183 } | |
184 processInner(); | |
185 t2.stop(); | |
186 t2.stop("Finished writing data. "); | |
187 | |
188 //Do anything necessary after processing | |
189 | |
190 if(resultsFile!=null){ | |
191 TextStreamWriter tsw=new TextStreamWriter(resultsFile, overwrite, false, false); | |
192 tsw.start(); | |
193 for(TaxNode tn : nodes.keySet()){ | |
194 Long data=nodes.get(tn); | |
195 if(data==null){data=0L;} | |
196 tsw.println(tn.id+"\t"+data+"\t"+tn.levelStringExtended(false)+"\t"+tn.name); | |
197 } | |
198 errorState|=tsw.poisonAndWait(); | |
199 } | |
200 | |
201 //Report timing and results | |
202 { | |
203 t.stop(); | |
204 | |
205 //Calculate units per nanosecond | |
206 double rpnano=readsProcessed/(double)(t.elapsed); | |
207 double lpnano=linesProcessed/(double)(t.elapsed); | |
208 double bpnano=basesProcessed/(double)(t.elapsed); | |
209 | |
210 //Add "k" and "m" for large numbers | |
211 String rpstring=Tools.padKM(readsProcessed, 8); | |
212 String lpstring=Tools.padKM(linesProcessed, 8); | |
213 String bpstring=Tools.padKM(basesProcessed, 8); | |
214 | |
215 String li="Lines In: \t"+linesProcessed+" lines"; | |
216 String lo="Lines Out: \t"+linesOut+" lines"; | |
217 while(lo.length()<li.length()){lo=lo+" ";} | |
218 | |
219 String ri="Reads In: \t"+readsProcessed+" reads"; | |
220 String ro="Reads Out: \t"+readsOut+" reads"; | |
221 while(ro.length()<ri.length()){ro=ro+" ";} | |
222 | |
223 outstream.println(ri+"\t"+basesProcessed+" bases"); | |
224 outstream.println(ro+"\t"+basesOut+" bases"); | |
225 outstream.println(li); | |
226 outstream.println(lo); | |
227 outstream.println(); | |
228 | |
229 outstream.println("Time: \t"+t); | |
230 outstream.println("Reads Processed: "+rpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", rpnano*1000000)); | |
231 outstream.println("Lines Processed: "+lpstring+" \t"+String.format(Locale.ROOT, "%.2fk reads/sec", lpnano*1000000)); | |
232 outstream.println("Bases Processed: "+bpstring+" \t"+String.format(Locale.ROOT, "%.2fm bases/sec", bpnano*1000)); | |
233 } | |
234 | |
235 //Throw an exception of there was an error in a thread | |
236 if(errorState){ | |
237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
238 } | |
239 } | |
240 | |
241 /*--------------------------------------------------------------*/ | |
242 /*---------------- Inner Methods ----------------*/ | |
243 /*--------------------------------------------------------------*/ | |
244 | |
245 /** Iterate through the reads */ | |
246 void processInner(){ | |
247 ByteFile bf=ByteFile.makeByteFile(ffin1); | |
248 TaxNode currentNode=null; | |
249 long currentSize=0; | |
250 ByteStreamWriter bsw=null; | |
251 for(byte[] line=bf.nextLine(); line!=null; line=bf.nextLine()){ | |
252 linesProcessed++; | |
253 if(line.length>0){ | |
254 final boolean header=(line[0]=='>'); | |
255 if(header){ | |
256 if(maxReads>0 && readsProcessed>=maxReads){break;} | |
257 readsProcessed++; | |
258 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} | |
259 | |
260 final TaxNode tn=tree.parseNodeFromHeader(new String(line, 1, line.length-1), false); | |
261 | |
262 if(tn==null || tn!=currentNode){ | |
263 if(bsw!=null){errorState=bsw.poisonAndWait()|errorState; bsw=null;} | |
264 } | |
265 if(tn!=null && tn!=currentNode){ | |
266 String dir=tree.toDir(tn, outPath); | |
267 final boolean found=nodes.containsKey(tn); | |
268 if(!found){nodes.put(tn, 0L);} | |
269 FileFormat ff=FileFormat.testOutput(dir+prefix+tn.id+".fa.gz", FileFormat.FASTA, null, true, overwrite && !found, found, false); | |
270 bsw=new ByteStreamWriter(ff); | |
271 bsw.start(); | |
272 } | |
273 | |
274 currentNode=tn; | |
275 currentSize=0; | |
276 if(bsw!=null){readsOut++;} | |
277 }else{ | |
278 basesProcessed+=line.length; | |
279 currentSize+=line.length; | |
280 } | |
281 if(bsw!=null){ | |
282 linesOut++; | |
283 if(!header){basesOut+=line.length;} | |
284 bsw.println(line); | |
285 } | |
286 } | |
287 } | |
288 if(bsw!=null){ | |
289 errorState=bsw.poisonAndWait()|errorState; bsw=null; | |
290 if(currentNode!=null){nodes.put(currentNode, nodes.get(currentNode)+currentSize);} | |
291 } | |
292 bf.close(); | |
293 } | |
294 | |
295 /*--------------------------------------------------------------*/ | |
296 /*---------------- Fields ----------------*/ | |
297 /*--------------------------------------------------------------*/ | |
298 | |
299 /** Primary input file path */ | |
300 private String in1=null; | |
301 | |
302 /** Primary output file path */ | |
303 private String outPath=null; | |
304 | |
305 private String prefix; | |
306 | |
307 /** Override input file extension */ | |
308 private String extin=null; | |
309 | |
310 /** For listing what is present in the output */ | |
311 public String resultsFile=null; | |
312 | |
313 public String taxTreeFile=null; | |
314 | |
315 public boolean makeDirectories=true; | |
316 | |
317 public LinkedHashMap<TaxNode, Long> nodes=new LinkedHashMap<TaxNode, Long>(); | |
318 | |
319 /*--------------------------------------------------------------*/ | |
320 | |
321 /** Number of reads processed */ | |
322 protected long readsProcessed=0; | |
323 /** Number of lines processed */ | |
324 protected long linesProcessed=0; | |
325 /** Number of bases processed */ | |
326 protected long basesProcessed=0; | |
327 | |
328 /** Number of reads out */ | |
329 public long readsOut=0; | |
330 /** Number of lines out */ | |
331 public long linesOut=0; | |
332 /** Number of bases out */ | |
333 public long basesOut=0; | |
334 | |
335 /** Quit after processing this many input reads; -1 means no limit */ | |
336 private long maxReads=-1; | |
337 | |
338 /*--------------------------------------------------------------*/ | |
339 /*---------------- Final Fields ----------------*/ | |
340 /*--------------------------------------------------------------*/ | |
341 | |
342 /** Primary input file */ | |
343 private final FileFormat ffin1; | |
344 | |
345 private final TaxTree tree; | |
346 | |
347 /*--------------------------------------------------------------*/ | |
348 /*---------------- Common Fields ----------------*/ | |
349 /*--------------------------------------------------------------*/ | |
350 | |
351 /** Print status messages to this output stream */ | |
352 private PrintStream outstream=System.err; | |
353 /** Print verbose messages */ | |
354 public static boolean verbose=false; | |
355 /** True if an error was encountered */ | |
356 public boolean errorState=false; | |
357 /** Overwrite existing output files */ | |
358 private boolean overwrite=true; | |
359 | |
360 } |