Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/AnalyzeGenes.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package prok; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Locale; | |
7 import java.util.concurrent.atomic.AtomicInteger; | |
8 | |
9 import fileIO.ByteFile; | |
10 import fileIO.ByteStreamWriter; | |
11 import fileIO.FileFormat; | |
12 import fileIO.ReadWrite; | |
13 import shared.Parse; | |
14 import shared.Parser; | |
15 import shared.PreParser; | |
16 import shared.Shared; | |
17 import shared.Timer; | |
18 import shared.Tools; | |
19 import structures.ByteBuilder; | |
20 import structures.IntList; | |
21 | |
22 /** | |
23 * This class is designed to analyze paired prokaryotic fna and gff files | |
24 * to calculate the patterns in coding and noncoding frames, start and stop sites. | |
25 * It outputs a pgm file. | |
26 * @author Brian Bushnell | |
27 * @date Sep 27, 2018 | |
28 * | |
29 */ | |
30 public class AnalyzeGenes { | |
31 | |
32 /*--------------------------------------------------------------*/ | |
33 /*---------------- Initialization ----------------*/ | |
34 /*--------------------------------------------------------------*/ | |
35 | |
36 /** | |
37 * Code entrance from the command line. | |
38 * @param args Command line arguments | |
39 */ | |
40 public static void main(String[] args){ | |
41 //Start a timer immediately upon code entrance. | |
42 Timer t=new Timer(); | |
43 | |
44 //Create an instance of this class | |
45 AnalyzeGenes x=new AnalyzeGenes(args); | |
46 | |
47 //Run the object | |
48 x.process(t); | |
49 | |
50 //Close the print stream if it was redirected | |
51 Shared.closeStream(x.outstream); | |
52 } | |
53 | |
54 /** | |
55 * Constructor. | |
56 * @param args Command line arguments | |
57 */ | |
58 public AnalyzeGenes(String[] args){ | |
59 | |
60 {//Preparse block for help, config files, and outstream | |
61 PreParser pp=new PreParser(args, null/*getClass()*/, false); | |
62 args=pp.args; | |
63 outstream=pp.outstream; | |
64 } | |
65 | |
66 //Set shared static variables prior to parsing | |
67 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
68 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
69 | |
70 {//Parse the arguments | |
71 final Parser parser=parse(args); | |
72 overwrite=parser.overwrite; | |
73 append=parser.append; | |
74 | |
75 out=parser.out1; | |
76 } | |
77 | |
78 if(alignRibo){ | |
79 //Load sequences | |
80 ProkObject.loadConsensusSequenceFromFile(false, false); | |
81 } | |
82 | |
83 fixExtensions(); //Add or remove .gz or .bz2 as needed | |
84 checkFileExistence(); //Ensure files can be read and written | |
85 checkStatics(); //Adjust file-related static fields as needed for this program | |
86 | |
87 //Determine how many threads may be used | |
88 threads=Tools.min(fnaList.size(), Shared.threads(), Tools.max(32, Shared.CALC_LOGICAL_PROCESSORS()/2)); | |
89 | |
90 ffout=FileFormat.testOutput(out, FileFormat.PGM, null, true, overwrite, append, false); | |
91 } | |
92 | |
93 /*--------------------------------------------------------------*/ | |
94 /*---------------- Initialization Helpers ----------------*/ | |
95 /*--------------------------------------------------------------*/ | |
96 | |
97 /** Parse arguments from the command line */ | |
98 private Parser parse(String[] args){ | |
99 | |
100 Parser parser=new Parser(); | |
101 parser.overwrite=overwrite; | |
102 for(int i=0; i<args.length; i++){ | |
103 String arg=args[i]; | |
104 String[] split=arg.split("="); | |
105 String a=split[0].toLowerCase(); | |
106 String b=split.length>1 ? split[1] : null; | |
107 if(b!=null && b.equalsIgnoreCase("null")){b=null;} | |
108 | |
109 // outstream.println(arg+", "+a+", "+b); | |
110 if(PGMTools.parseStatic(arg, a, b)){ | |
111 //do nothing | |
112 }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna") || a.equals("ref")){ | |
113 assert(b!=null); | |
114 Tools.addFiles(b, fnaList); | |
115 }else if(a.equals("gff") || a.equals("ingff") || a.equals("gffin")){ | |
116 assert(b!=null); | |
117 Tools.addFiles(b, gffList); | |
118 }else if(a.equals("verbose")){ | |
119 verbose=Parse.parseBoolean(b); | |
120 ReadWrite.verbose=verbose; | |
121 }else if(a.equals("alignribo") || a.equals("align")){ | |
122 alignRibo=Parse.parseBoolean(b); | |
123 }else if(a.equals("adjustendpoints")){ | |
124 adjustEndpoints=Parse.parseBoolean(b); | |
125 } | |
126 | |
127 else if(ProkObject.parse(arg, a, b)){} | |
128 | |
129 else if(parser.parse(arg, a, b)){ | |
130 //do nothing | |
131 }else if(arg.indexOf('=')<0 && new File(arg).exists() && FileFormat.isFastaFile(arg)){ | |
132 fnaList.add(arg); | |
133 }else{ | |
134 outstream.println("Unknown parameter "+args[i]); | |
135 assert(false) : "Unknown parameter "+args[i]; | |
136 // throw new RuntimeException("Unknown parameter "+args[i]); | |
137 } | |
138 } | |
139 | |
140 if(gffList.isEmpty()){ | |
141 for(String s : fnaList){ | |
142 String prefix=ReadWrite.stripExtension(s); | |
143 String gff=prefix+".gff"; | |
144 File f=new File(gff); | |
145 if(!f.exists()){ | |
146 String gz=gff+".gz"; | |
147 f=new File(gz); | |
148 assert(f.exists() && f.canRead()) : "Can't read file "+gff; | |
149 gff=gz; | |
150 } | |
151 gffList.add(gff); | |
152 } | |
153 } | |
154 assert(gffList.size()==fnaList.size()) : "Number of fna and gff files do not match: "+fnaList.size()+", "+gffList.size(); | |
155 return parser; | |
156 } | |
157 | |
158 /** Add or remove .gz or .bz2 as needed */ | |
159 private void fixExtensions(){ | |
160 fnaList=Tools.fixExtension(fnaList); | |
161 gffList=Tools.fixExtension(gffList); | |
162 if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} | |
163 } | |
164 | |
165 /** Ensure files can be read and written */ | |
166 private void checkFileExistence(){ | |
167 //Ensure output files can be written | |
168 if(!Tools.testOutputFiles(overwrite, append, false, out)){ | |
169 outstream.println((out==null)+", "+out); | |
170 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n"); | |
171 } | |
172 | |
173 //Ensure input files can be read | |
174 ArrayList<String> foo=new ArrayList<String>(); | |
175 foo.addAll(fnaList); | |
176 foo.addAll(gffList); | |
177 if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){ | |
178 throw new RuntimeException("\nCan't read some input files.\n"); | |
179 } | |
180 | |
181 //Ensure that no file was specified multiple times | |
182 foo.add(out); | |
183 if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){ | |
184 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
185 } | |
186 } | |
187 | |
188 /** Adjust file-related static fields as needed for this program */ | |
189 private static void checkStatics(){ | |
190 //Adjust the number of threads for input file reading | |
191 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
192 ByteFile.FORCE_MODE_BF2=true; | |
193 } | |
194 } | |
195 | |
196 /*--------------------------------------------------------------*/ | |
197 /*---------------- Outer Methods ----------------*/ | |
198 /*--------------------------------------------------------------*/ | |
199 | |
200 void process(Timer t){ | |
201 | |
202 final GeneModel pgm; | |
203 if(Shared.threads()<2 || fnaList.size()<2){ | |
204 pgm=makeModelST(); | |
205 }else{ | |
206 pgm=spawnThreads(); | |
207 } | |
208 | |
209 ByteStreamWriter bsw=ByteStreamWriter.makeBSW(ffout); | |
210 | |
211 ByteBuilder bb=new ByteBuilder(); | |
212 pgm.appendTo(bb); | |
213 bytesOut+=bb.length; | |
214 | |
215 if(bsw!=null){ | |
216 bsw.addJob(bb); | |
217 errorState|=bsw.poisonAndWait(); | |
218 } | |
219 | |
220 t.stop(); | |
221 | |
222 outstream.println(timeReadsBasesGenesProcessed(t, pgm.readsProcessed, pgm.basesProcessed, pgm.genesProcessed, pgm.filesProcessed, 8)); | |
223 | |
224 outstream.println(); | |
225 outstream.println(typesProcessed(pgm, 12)); | |
226 | |
227 //outstream.println("Bytes Out: \t"+bytesOut); | |
228 | |
229 if(errorState){ | |
230 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
231 } | |
232 } | |
233 | |
234 private static String timeReadsBasesGenesProcessed(Timer t, long readsProcessed, long basesProcessed, long genesProcessed, long filesProcessed, int pad){ | |
235 return ("Time: \t"+t+"\n"+readsBasesGenesProcessed(t.elapsed, readsProcessed, basesProcessed, genesProcessed, filesProcessed, pad)); | |
236 } | |
237 | |
238 private static String readsBasesGenesProcessed(long elapsed, long reads, long bases, long genes, long files, int pad){ | |
239 double rpnano=reads/(double)elapsed; | |
240 double bpnano=bases/(double)elapsed; | |
241 double gpnano=genes/(double)elapsed; | |
242 double fpnano=files/(double)elapsed; | |
243 | |
244 String rstring=Tools.padKM(reads, pad); | |
245 String bstring=Tools.padKM(bases, pad); | |
246 String gstring=Tools.padKM(genes, pad); | |
247 String fstring=Tools.padKM(files, pad); | |
248 ByteBuilder sb=new ByteBuilder(); | |
249 sb.append("Files Processed: ").append(fstring).append(String.format(Locale.ROOT, " \t%.2f files/sec", fpnano*1000000000)).append('\n'); | |
250 sb.append("Sequences Processed:").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk seqs/sec", rpnano*1000000)).append('\n'); | |
251 sb.append("Genes Processed: ").append(gstring).append(String.format(Locale.ROOT, " \t%.2fk genes/sec", gpnano*1000000)).append('\n'); | |
252 sb.append("Bases Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bases/sec", bpnano*1000)); | |
253 return sb.toString(); | |
254 } | |
255 | |
256 private static String typesProcessed(GeneModel pgm, int pad){ | |
257 | |
258 ByteBuilder sb=new ByteBuilder(); | |
259 sb.append("CDS: "+Tools.padLeft(pgm.statsCDS.lengthCount, pad)).nl(); | |
260 sb.append("tRNA: "+Tools.padLeft(pgm.statstRNA.lengthCount, pad)).nl(); | |
261 sb.append("16S: "+Tools.padLeft(pgm.stats16S.lengthCount, pad)).nl(); | |
262 sb.append("23S: "+Tools.padLeft(pgm.stats23S.lengthCount, pad)).nl(); | |
263 sb.append("5S: "+Tools.padLeft(pgm.stats5S.lengthCount, pad)).nl(); | |
264 sb.append("18S: "+Tools.padLeft(pgm.stats18S.lengthCount, pad)); | |
265 return sb.toString(); | |
266 } | |
267 | |
268 /*--------------------------------------------------------------*/ | |
269 /*---------------- Inner Methods ----------------*/ | |
270 /*--------------------------------------------------------------*/ | |
271 | |
272 //TODO: Process each file in a thread. | |
273 private GeneModel makeModelST(){ | |
274 GeneModel pgmSum=new GeneModel(true); | |
275 | |
276 for(int i=0; i<fnaList.size(); i++){ | |
277 String fna=fnaList.get(i); | |
278 String gff=gffList.get(i); | |
279 pgmSum.process(fna, gff); | |
280 } | |
281 return pgmSum; | |
282 } | |
283 | |
284 /*--------------------------------------------------------------*/ | |
285 /*---------------- Thread Management ----------------*/ | |
286 /*--------------------------------------------------------------*/ | |
287 | |
288 /** Spawn process threads */ | |
289 private GeneModel spawnThreads(){ | |
290 | |
291 //Do anything necessary prior to processing | |
292 | |
293 final AtomicInteger aint=new AtomicInteger(0); | |
294 | |
295 //Fill a list with FileThreads | |
296 ArrayList<FileThread> alpt=new ArrayList<FileThread>(threads); | |
297 for(int i=0; i<threads; i++){ | |
298 alpt.add(new FileThread(aint)); | |
299 } | |
300 | |
301 //Start the threads | |
302 for(FileThread pt : alpt){ | |
303 pt.start(); | |
304 } | |
305 | |
306 //Wait for threads to finish | |
307 GeneModel pgm=waitForThreads(alpt); | |
308 | |
309 //Do anything necessary after processing | |
310 return pgm; | |
311 } | |
312 | |
313 private GeneModel waitForThreads(ArrayList<FileThread> alpt){ | |
314 | |
315 GeneModel pgm=new GeneModel(false); | |
316 | |
317 //Wait for completion of all threads | |
318 boolean success=true; | |
319 for(FileThread pt : alpt){ | |
320 | |
321 //Wait until this thread has terminated | |
322 while(pt.getState()!=Thread.State.TERMINATED){ | |
323 try { | |
324 //Attempt a join operation | |
325 pt.join(); | |
326 } catch (InterruptedException e) { | |
327 //Potentially handle this, if it is expected to occur | |
328 e.printStackTrace(); | |
329 } | |
330 } | |
331 | |
332 //Accumulate per-thread statistics | |
333 pgm.add(pt.pgm); | |
334 | |
335 success&=pt.success; | |
336 errorState|=pt.errorStateT; | |
337 } | |
338 | |
339 //Track whether any threads failed | |
340 if(!success){errorState=true;} | |
341 return pgm; | |
342 } | |
343 | |
344 /*--------------------------------------------------------------*/ | |
345 /*---------------- Inner Classes ----------------*/ | |
346 /*--------------------------------------------------------------*/ | |
347 | |
348 private class FileThread extends Thread { | |
349 | |
350 FileThread(AtomicInteger fnum_){ | |
351 fnum=fnum_; | |
352 pgm=new GeneModel(true); | |
353 } | |
354 | |
355 @Override | |
356 public void run(){ | |
357 for(int i=fnum.getAndIncrement(); i<fnaList.size(); i=fnum.getAndIncrement()){ | |
358 String fna=fnaList.get(i); | |
359 String gff=gffList.get(i); | |
360 errorStateT=pgm.process(fna, gff)|errorState; | |
361 // System.err.println("Processed "+fna+" in "+this.toString()); | |
362 } | |
363 success=true; | |
364 } | |
365 | |
366 private final AtomicInteger fnum; | |
367 private final GeneModel pgm; | |
368 boolean errorStateT=false; | |
369 boolean success=false; | |
370 } | |
371 | |
372 /*--------------------------------------------------------------*/ | |
373 /*---------------- Fields ----------------*/ | |
374 /*--------------------------------------------------------------*/ | |
375 | |
376 private ArrayList<String> fnaList=new ArrayList<String>(); | |
377 private ArrayList<String> gffList=new ArrayList<String>(); | |
378 private IntList taxList=new IntList(); | |
379 private String out=null; | |
380 | |
381 /*--------------------------------------------------------------*/ | |
382 | |
383 private long bytesOut=0; | |
384 static boolean alignRibo=true; | |
385 static boolean adjustEndpoints=true; | |
386 | |
387 /*--------------------------------------------------------------*/ | |
388 /*---------------- Final Fields ----------------*/ | |
389 /*--------------------------------------------------------------*/ | |
390 | |
391 private final FileFormat ffout; | |
392 private final int threads; | |
393 | |
394 /*--------------------------------------------------------------*/ | |
395 /*---------------- Common Fields ----------------*/ | |
396 /*--------------------------------------------------------------*/ | |
397 | |
398 private PrintStream outstream=System.err; | |
399 public static boolean verbose=false; | |
400 public boolean errorState=false; | |
401 private boolean overwrite=true; | |
402 private boolean append=false; | |
403 | |
404 } | |
405 |