Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/CallGenes.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package prok; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Arrays; | |
7 | |
8 import dna.AminoAcid; | |
9 import dna.Data; | |
10 import fileIO.ByteFile; | |
11 import fileIO.ByteStreamWriter; | |
12 import fileIO.FileFormat; | |
13 import fileIO.ReadWrite; | |
14 import gff.CompareGff; | |
15 import jgi.BBMerge; | |
16 import json.JsonObject; | |
17 import shared.Parse; | |
18 import shared.Parser; | |
19 import shared.PreParser; | |
20 import shared.ReadStats; | |
21 import shared.Shared; | |
22 import shared.Timer; | |
23 import shared.Tools; | |
24 import stream.ConcurrentReadInputStream; | |
25 import stream.ConcurrentReadOutputStream; | |
26 import stream.Read; | |
27 import structures.ByteBuilder; | |
28 import structures.ListNum; | |
29 | |
30 /** | |
31 * This is the executable class for gene-calling. | |
32 * @author Brian Bushnell | |
33 * @date Sep 24, 2018 | |
34 * | |
35 */ | |
36 public class CallGenes extends ProkObject { | |
37 | |
38 /*--------------------------------------------------------------*/ | |
39 /*---------------- Initialization ----------------*/ | |
40 /*--------------------------------------------------------------*/ | |
41 | |
42 /** | |
43 * Code entrance from the command line. | |
44 * @param args Command line arguments | |
45 */ | |
46 public static void main(String[] args){ | |
47 //Start a timer immediately upon code entrance. | |
48 Timer t=new Timer(); | |
49 | |
50 //Create an instance of this class | |
51 CallGenes x=new CallGenes(args); | |
52 | |
53 //Run the object | |
54 x.process(t); | |
55 | |
56 //Close the print stream if it was redirected | |
57 Shared.closeStream(x.outstream); | |
58 } | |
59 | |
60 /** | |
61 * Constructor. | |
62 * @param args Command line arguments | |
63 */ | |
64 public CallGenes(String[] args){ | |
65 | |
66 {//Preparse block for help, config files, and outstream | |
67 PreParser pp=new PreParser(args, (args.length>40 ? null : getClass()), false); | |
68 args=pp.args; | |
69 outstream=pp.outstream; | |
70 } | |
71 | |
72 //Set shared static variables prior to parsing | |
73 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
74 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
75 | |
76 {//Parse the arguments | |
77 final Parser parser=parse(args); | |
78 overwrite=parser.overwrite; | |
79 append=parser.append; | |
80 | |
81 outGff=parser.out1; | |
82 maxReads=parser.maxReads; | |
83 } | |
84 | |
85 fixExtensions(); //Add or remove .gz or .bz2 as needed | |
86 checkFileExistence(); //Ensure files can be read and written | |
87 checkStatics(); //Adjust file-related static fields as needed for this program | |
88 | |
89 ffoutGff=FileFormat.testOutput(outGff, FileFormat.GFF, null, true, overwrite, append, ordered); | |
90 ffoutAmino=FileFormat.testOutput(outAmino, FileFormat.FA, null, true, overwrite, append, ordered); | |
91 ffout16S=FileFormat.testOutput(out16S, FileFormat.FA, null, true, overwrite, append, ordered); | |
92 ffout18S=FileFormat.testOutput(out18S, FileFormat.FA, null, true, overwrite, append, ordered); | |
93 | |
94 if(ffoutGff!=null){ | |
95 assert(!ffoutGff.isSequence()) : "\nout is for gff files. To output sequence, please use outa."; | |
96 } | |
97 if(ffoutAmino!=null){ | |
98 assert(!ffoutAmino.gff()) : "\nouta is for sequence data. To output gff, please use out."; | |
99 } | |
100 if(ffout16S!=null){ | |
101 assert(!ffout16S.gff()) : "\nout16S is for sequence data. To output gff, please use out."; | |
102 } | |
103 if(ffout18S!=null){ | |
104 assert(!ffout18S.gff()) : "\nout18S is for sequence data. To output gff, please use out."; | |
105 } | |
106 | |
107 if(geneHistFile==null){geneHistBins=0;} | |
108 else{ | |
109 assert(geneHistBins>1) : "geneHistBins="+geneHistBins+"; should be >1"; | |
110 assert(geneHistDiv>=1) : "geneHistDiv="+geneHistDiv+"; should be >=1"; | |
111 } | |
112 geneHist=geneHistBins>1 ? new long[geneHistBins] : null; | |
113 } | |
114 | |
115 /*--------------------------------------------------------------*/ | |
116 /*---------------- Initialization Helpers ----------------*/ | |
117 /*--------------------------------------------------------------*/ | |
118 | |
119 /** Parse arguments from the command line */ | |
120 private Parser parse(String[] args){ | |
121 | |
122 Parser parser=new Parser(); | |
123 for(int i=0; i<args.length; i++){ | |
124 String arg=args[i]; | |
125 String[] split=arg.split("="); | |
126 String a=split[0].toLowerCase(); | |
127 String b=split.length>1 ? split[1] : null; | |
128 if(b!=null && b.equalsIgnoreCase("null")){b=null;} | |
129 | |
130 // outstream.println(arg+", "+a+", "+b); | |
131 if(PGMTools.parseStatic(arg, a, b)){ | |
132 //do nothing | |
133 }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna")){ | |
134 assert(b!=null); | |
135 Tools.addFiles(b, fnaList); | |
136 }else if(b==null && new File(arg).exists() && FileFormat.isFastaFile(arg)){ | |
137 fnaList.add(arg); | |
138 }else if(a.equals("pgm") || a.equals("gm") || a.equals("model")){ | |
139 assert(b!=null); | |
140 if(b.equalsIgnoreCase("auto") || b.equalsIgnoreCase("default")){ | |
141 b=Data.findPath("?model.pgm"); | |
142 pgmList.add(b); | |
143 }else{ | |
144 Tools.addFiles(b, pgmList); | |
145 } | |
146 }else if(b==null && new File(arg).exists() && FileFormat.isPgmFile(arg)){ | |
147 Tools.addFiles(arg, pgmList); | |
148 }else if(a.equals("outamino") || a.equals("aminoout") || a.equals("outa") || a.equals("outaa") || a.equals("aaout") || a.equals("amino")){ | |
149 outAmino=b; | |
150 }else if(a.equalsIgnoreCase("out16s") || a.equalsIgnoreCase("16sout")){ | |
151 out16S=b; | |
152 }else if(a.equalsIgnoreCase("out18s") || a.equalsIgnoreCase("18sout")){ | |
153 out18S=b; | |
154 }else if(a.equals("verbose")){ | |
155 verbose=Parse.parseBoolean(b); | |
156 //ReadWrite.verbose=verbose; | |
157 GeneCaller.verbose=verbose; | |
158 } | |
159 | |
160 else if(a.equals("json_out") || a.equalsIgnoreCase("json")){ | |
161 json_out=Parse.parseBoolean(b); | |
162 }else if(a.equals("stats") || a.equalsIgnoreCase("outstats")){ | |
163 outStats=b; | |
164 }else if(a.equals("hist") || a.equalsIgnoreCase("outhist") || a.equalsIgnoreCase("lengthhist") || a.equalsIgnoreCase("lhist") || a.equalsIgnoreCase("genehist")){ | |
165 geneHistFile=b; | |
166 }else if(a.equals("bins")){ | |
167 geneHistBins=Integer.parseInt(b); | |
168 }else if(a.equals("binlen") || a.equals("binlength") || a.equals("histdiv")){ | |
169 geneHistBins=Integer.parseInt(b); | |
170 }else if(a.equals("printzero") || a.equals("pz")){ | |
171 printZeroCountHist=Parse.parseBoolean(b); | |
172 } | |
173 | |
174 else if(a.equals("merge")){ | |
175 merge=Parse.parseBoolean(b); | |
176 }else if(a.equals("ecco")){ | |
177 ecco=Parse.parseBoolean(b); | |
178 } | |
179 | |
180 else if(a.equalsIgnoreCase("setbias16s")) { | |
181 GeneCaller.biases[r16S]=Float.parseFloat(b); | |
182 }else if(a.equalsIgnoreCase("setbias18s")) { | |
183 GeneCaller.biases[r18S]=Float.parseFloat(b); | |
184 }else if(a.equalsIgnoreCase("setbias23s")) { | |
185 GeneCaller.biases[r23S]=Float.parseFloat(b); | |
186 }else if(a.equalsIgnoreCase("setbias5s")) { | |
187 GeneCaller.biases[r5S]=Float.parseFloat(b); | |
188 }else if(a.equalsIgnoreCase("setbiastRNA")) { | |
189 GeneCaller.biases[tRNA]=Float.parseFloat(b); | |
190 }else if(a.equalsIgnoreCase("setbiasCDS")) { | |
191 GeneCaller.biases[CDS]=Float.parseFloat(b); | |
192 } | |
193 | |
194 else if(a.equals("ordered")){ | |
195 ordered=Parse.parseBoolean(b); | |
196 } | |
197 | |
198 else if(a.equals("translate")){ | |
199 mode=TRANSLATE; | |
200 }else if(a.equals("retranslate") || a.equals("detranslate")){ | |
201 mode=RETRANSLATE; | |
202 }else if(a.equals("recode")){ | |
203 mode=RECODE; | |
204 } | |
205 | |
206 else if(a.equalsIgnoreCase("minlen") || a.equals("minlength")){ | |
207 minLen=Integer.parseInt(b); | |
208 }else if(a.equals("maxoverlapss") || a.equals("overlapss") || a.equals("overlapsamestrand") || a.equals("moss") || a.equalsIgnoreCase("maxOverlapSameStrand")){ | |
209 maxOverlapSameStrand=Integer.parseInt(b); | |
210 }else if(a.equals("maxoverlapos") || a.equals("overlapos") || a.equals("overlapoppositestrand") || a.equals("moos") || a.equalsIgnoreCase("maxOverlapOppositeStrand")){ | |
211 maxOverlapOppositeStrand=Integer.parseInt(b); | |
212 }else if(a.equalsIgnoreCase("minStartScore")){ | |
213 minStartScore=Float.parseFloat(b); | |
214 }else if(a.equalsIgnoreCase("minStopScore")){ | |
215 minStopScore=Float.parseFloat(b); | |
216 }else if(a.equalsIgnoreCase("minInnerScore") || a.equalsIgnoreCase("minKmerScore")){ | |
217 minKmerScore=Float.parseFloat(b); | |
218 }else if(a.equalsIgnoreCase("minOrfScore") || a.equalsIgnoreCase("minScore")){ | |
219 minOrfScore=Float.parseFloat(b); | |
220 }else if(a.equalsIgnoreCase("minAvgScore")){ | |
221 minAvgScore=Float.parseFloat(b); | |
222 }else if(a.equalsIgnoreCase("breakLimit")){ | |
223 GeneCaller.breakLimit=Integer.parseInt(b); | |
224 }else if(a.equalsIgnoreCase("clearcutoffs") || a.equalsIgnoreCase("clearfilters")){ | |
225 GeneCaller.breakLimit=9999; | |
226 minOrfScore=-9999; | |
227 minAvgScore=-9999; | |
228 minKmerScore=-9999; | |
229 minStopScore=-9999; | |
230 minStartScore=-9999; | |
231 } | |
232 | |
233 else if(a.equalsIgnoreCase("e1")){ | |
234 Orf.e1=Float.parseFloat(b); | |
235 }else if(a.equalsIgnoreCase("e2")){ | |
236 Orf.e2=Float.parseFloat(b); | |
237 }else if(a.equalsIgnoreCase("e3")){ | |
238 Orf.e3=Float.parseFloat(b); | |
239 } | |
240 else if(a.equalsIgnoreCase("f1")){ | |
241 Orf.f1=Float.parseFloat(b); | |
242 }else if(a.equalsIgnoreCase("f2")){ | |
243 Orf.f2=Float.parseFloat(b); | |
244 }else if(a.equalsIgnoreCase("f3")){ | |
245 Orf.f3=Float.parseFloat(b); | |
246 } | |
247 else if(a.equalsIgnoreCase("p0")){ | |
248 GeneCaller.p0=Float.parseFloat(b); | |
249 }else if(a.equalsIgnoreCase("p1")){ | |
250 GeneCaller.p1=Float.parseFloat(b); | |
251 }else if(a.equalsIgnoreCase("p2")){ | |
252 GeneCaller.p2=Float.parseFloat(b); | |
253 }else if(a.equalsIgnoreCase("p3")){ | |
254 GeneCaller.p3=Float.parseFloat(b); | |
255 }else if(a.equalsIgnoreCase("p4")){ | |
256 GeneCaller.p4=Float.parseFloat(b); | |
257 }else if(a.equalsIgnoreCase("p5")){ | |
258 GeneCaller.p5=Float.parseFloat(b); | |
259 }else if(a.equalsIgnoreCase("p6")){ | |
260 GeneCaller.p6=Float.parseFloat(b); | |
261 } | |
262 else if(a.equalsIgnoreCase("q1")){ | |
263 GeneCaller.q1=Float.parseFloat(b); | |
264 }else if(a.equalsIgnoreCase("q2")){ | |
265 GeneCaller.q2=Float.parseFloat(b); | |
266 }else if(a.equalsIgnoreCase("q3")){ | |
267 GeneCaller.q3=Float.parseFloat(b); | |
268 }else if(a.equalsIgnoreCase("q4")){ | |
269 GeneCaller.q4=Float.parseFloat(b); | |
270 }else if(a.equalsIgnoreCase("q5")){ | |
271 GeneCaller.q5=Float.parseFloat(b); | |
272 } | |
273 else if(a.equalsIgnoreCase("lookback")){ | |
274 GeneCaller.lookbackPlus=GeneCaller.lookbackMinus=Integer.parseInt(b); | |
275 }else if(a.equalsIgnoreCase("lookbackplus")){ | |
276 GeneCaller.lookbackPlus=Integer.parseInt(b); | |
277 }else if(a.equalsIgnoreCase("lookbackminus")){ | |
278 GeneCaller.lookbackMinus=Integer.parseInt(b); | |
279 } | |
280 | |
281 else if(a.equalsIgnoreCase("compareto")){ | |
282 compareToGff=b; | |
283 } | |
284 | |
285 else if(ProkObject.parse(arg, a, b)){} | |
286 | |
287 else if(parser.parse(arg, a, b)){ | |
288 //do nothing | |
289 }else{ | |
290 outstream.println("Unknown parameter "+args[i]); | |
291 assert(false) : "Unknown parameter "+args[i]; | |
292 // throw new RuntimeException("Unknown parameter "+args[i]); | |
293 } | |
294 } | |
295 | |
296 if(pgmList.isEmpty()){ | |
297 String b=Data.findPath("?model.pgm"); | |
298 pgmList.add(b); | |
299 } | |
300 for(int i=0; i<pgmList.size(); i++){ | |
301 String s=pgmList.get(i); | |
302 if(s.equalsIgnoreCase("auto") || s.equalsIgnoreCase("default")){ | |
303 pgmList.set(i, Data.findPath("?model.pgm")); | |
304 } | |
305 } | |
306 | |
307 if(Shared.threads()<2){ordered=false;} | |
308 assert(!fnaList.isEmpty()) : "At least 1 fasta file is required."; | |
309 assert(!pgmList.isEmpty()) : "At least 1 pgm file is required."; | |
310 return parser; | |
311 } | |
312 | |
313 /** Add or remove .gz or .bz2 as needed */ | |
314 private void fixExtensions(){ | |
315 fnaList=Tools.fixExtension(fnaList); | |
316 pgmList=Tools.fixExtension(pgmList); | |
317 if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} | |
318 } | |
319 | |
320 /** Ensure files can be read and written */ | |
321 private void checkFileExistence(){ | |
322 //Ensure output files can be written | |
323 if(!Tools.testOutputFiles(overwrite, append, false, outGff, outAmino, out16S, out18S, outStats, geneHistFile)){ | |
324 outstream.println((outGff==null)+", "+outGff); | |
325 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files " | |
326 +outGff+", "+outAmino+", "+out16S+", "+out18S+", "+outStats+", "+geneHistFile+"\n"); | |
327 } | |
328 | |
329 //Ensure input files can be read | |
330 ArrayList<String> foo=new ArrayList<String>(); | |
331 foo.addAll(fnaList); | |
332 foo.addAll(pgmList); | |
333 if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){ | |
334 throw new RuntimeException("\nCan't read some input files.\n"); | |
335 } | |
336 | |
337 //Ensure that no file was specified multiple times | |
338 foo.add(outGff); | |
339 foo.add(outAmino); | |
340 foo.add(out16S); | |
341 foo.add(out18S); | |
342 foo.add(outStats); | |
343 foo.add(geneHistFile); | |
344 if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){ | |
345 throw new RuntimeException("\nSome file names were specified multiple times.\n"); | |
346 } | |
347 } | |
348 | |
349 /** Adjust file-related static fields as needed for this program */ | |
350 private static void checkStatics(){ | |
351 //Adjust the number of threads for input file reading | |
352 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
353 ByteFile.FORCE_MODE_BF2=true; | |
354 } | |
355 } | |
356 | |
357 /*--------------------------------------------------------------*/ | |
358 /*---------------- Outer Methods ----------------*/ | |
359 /*--------------------------------------------------------------*/ | |
360 | |
361 /** Create read streams and process all data */ | |
362 void process(Timer t){ | |
363 | |
364 GeneModel pgm=PGMTools.loadAndMerge(pgmList); | |
365 | |
366 if(call16S || call18S || call23S || calltRNA || call5S){ | |
367 loadLongKmers(); | |
368 loadConsensusSequenceFromFile(false, false); | |
369 } | |
370 | |
371 ByteStreamWriter bsw=makeBSW(ffoutGff); | |
372 if(bsw!=null){ | |
373 bsw.forcePrint("##gff-version 3\n"); | |
374 } | |
375 | |
376 ConcurrentReadOutputStream rosAmino=makeCros(ffoutAmino); | |
377 ConcurrentReadOutputStream ros16S=makeCros(ffout16S); | |
378 ConcurrentReadOutputStream ros18S=makeCros(ffout18S); | |
379 | |
380 //Turn off read validation in the input threads to increase speed | |
381 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR; | |
382 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4; | |
383 | |
384 //Reset counters | |
385 readsIn=genesOut=0; | |
386 basesIn=bytesOut=0; | |
387 | |
388 for(String fname : fnaList){ | |
389 //Create a read input stream | |
390 final ConcurrentReadInputStream cris=makeCris(fname); | |
391 | |
392 //Process the reads in separate threads | |
393 spawnThreads(cris, bsw, rosAmino, ros16S, ros18S, pgm); | |
394 | |
395 //Close the input stream | |
396 errorState|=ReadWrite.closeStream(cris); | |
397 } | |
398 | |
399 //Close the input stream | |
400 errorState|=ReadWrite.closeStreams(null, rosAmino, ros16S, ros18S); | |
401 | |
402 if(verbose){outstream.println("Finished; closing streams.");} | |
403 | |
404 //Write anything that was accumulated by ReadStats | |
405 errorState|=ReadStats.writeAll(); | |
406 //Close the output stream | |
407 if(bsw!=null){errorState|=bsw.poisonAndWait();} | |
408 | |
409 //Reset read validation | |
410 Read.VALIDATE_IN_CONSTRUCTOR=vic; | |
411 | |
412 //Report timing and results | |
413 t.stop(); | |
414 outstream.println(Tools.timeReadsBasesProcessed(t, readsIn, basesIn, 8)); | |
415 outstream.println(Tools.linesBytesOut(readsIn, basesIn, genesOut, bytesOut, 8, false)); | |
416 outstream.println(); | |
417 | |
418 if(json_out){ | |
419 printStatsJson(outStats); | |
420 }else{ | |
421 printStats(outStats); | |
422 } | |
423 | |
424 if(geneHistFile!=null){ | |
425 printHist(geneHistFile); | |
426 } | |
427 | |
428 //Throw an exception of there was an error in a thread | |
429 if(errorState){ | |
430 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
431 } | |
432 | |
433 if(compareToGff!=null){ | |
434 if(compareToGff.equals("auto")){ | |
435 compareToGff=fnaList.get(0).replace(".fna", ".gff"); | |
436 compareToGff=compareToGff.replace(".fa", ".gff"); | |
437 compareToGff=compareToGff.replace(".fasta", ".gff"); | |
438 } | |
439 CompareGff.main(new String[] {outGff, compareToGff}); | |
440 } | |
441 } | |
442 | |
443 private void printStats(String fname){ | |
444 if(fname==null){return;} | |
445 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false); | |
446 bsw.start(); | |
447 | |
448 if(callCDS){ | |
449 bsw.println("Gene Starts Made: \t "+Tools.padLeft(geneStartsMade, 12)); | |
450 bsw.println("Gene Stops Made: \t "+Tools.padLeft(geneStopsMade, 12)); | |
451 bsw.println("Gene Starts Retained: \t "+Tools.padLeft(geneStartsRetained, 12)); | |
452 bsw.println("Gene Stops Retained: \t "+Tools.padLeft(geneStopsRetained, 12)); | |
453 bsw.println("CDS Out: \t "+Tools.padLeft(geneStartsOut, 12)); | |
454 } | |
455 if(call16S){bsw.println("16S Out: \t "+Tools.padLeft(r16SOut, 12));} | |
456 if(call18S){bsw.println("18S Out: \t "+Tools.padLeft(r18SOut, 12));} | |
457 if(call23S){bsw.println("23S Out: \t "+Tools.padLeft(r23SOut, 12));} | |
458 if(call5S){bsw.println("5S Out: \t "+Tools.padLeft(r5SOut, 12));} | |
459 if(calltRNA){bsw.println("tRNA Out: \t "+Tools.padLeft(tRNAOut, 12));} | |
460 | |
461 if(callCDS){ | |
462 bsw.println(); | |
463 bsw.println("All ORF Stats:"); | |
464 bsw.print(stCds.toString()); | |
465 | |
466 bsw.println(); | |
467 bsw.println("Retained ORF Stats:"); | |
468 bsw.print(stCds2.toString()); | |
469 | |
470 bsw.println(); | |
471 bsw.println("Called ORF Stats:"); | |
472 stCdsPass.genomeSize=basesIn; | |
473 bsw.print(stCdsPass.toString()); | |
474 } | |
475 | |
476 if(call16S){ | |
477 bsw.println(); | |
478 bsw.println("Called 16S Stats:"); | |
479 bsw.print(st16s.toString()); | |
480 } | |
481 if(call23S){ | |
482 bsw.println(); | |
483 bsw.println("Called 23S Stats:"); | |
484 bsw.print(st23s.toString()); | |
485 } | |
486 if(call5S){ | |
487 bsw.println(); | |
488 bsw.println("Called 5S Stats:"); | |
489 bsw.print(st5s.toString()); | |
490 } | |
491 if(call18S){ | |
492 bsw.println(); | |
493 bsw.println("Called 18S Stats:"); | |
494 bsw.print(st18s.toString()); | |
495 } | |
496 bsw.poisonAndWait(); | |
497 } | |
498 | |
499 private void printStatsJson(String fname){ | |
500 if(fname==null){return;} | |
501 | |
502 JsonObject outer=new JsonObject(); | |
503 | |
504 { | |
505 JsonObject jo=new JsonObject(); | |
506 if(callCDS){ | |
507 jo.add("Gene Starts Made", geneStartsMade); | |
508 jo.add("Gene Stops Made", geneStopsMade); | |
509 jo.add("Gene Starts Retained", geneStartsRetained); | |
510 jo.add("Gene Stops Retained", geneStopsRetained); | |
511 jo.add("CDS Out", geneStartsOut); | |
512 } | |
513 if(call16S){jo.add("16S Out", r16SOut);} | |
514 if(call18S){jo.add("18S Out", r18SOut);} | |
515 if(call23S){jo.add("23S Out", r23SOut);} | |
516 if(call5S){jo.add("5S Out", r5SOut);} | |
517 if(calltRNA){jo.add("tRNA Out", tRNAOut);} | |
518 outer.add("Overall", jo); | |
519 } | |
520 | |
521 if(callCDS){ | |
522 outer.add("All ORF Stats", stCds.toJson()); | |
523 outer.add("Retained ORF Stats", stCds2.toJson()); | |
524 stCdsPass.genomeSize=basesIn; | |
525 outer.add("Called ORF Stats", stCdsPass.toJson()); | |
526 } | |
527 | |
528 if(call16S){outer.add("Called 16S Stats", st16s.toJson());} | |
529 if(call18S){outer.add("Called 18S Stats", st18s.toJson());} | |
530 if(call23S){outer.add("Called 23S Stats", st23s.toJson());} | |
531 if(call5S){outer.add("Called 5S Stats", st5s.toJson());} | |
532 if(calltRNA){outer.add("Called tRNA Stats", sttRNA.toJson());} | |
533 | |
534 | |
535 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false); | |
536 bsw.start(); | |
537 bsw.println(outer.toText()); | |
538 bsw.poisonAndWait(); | |
539 } | |
540 | |
541 private void printHist(String fname){ | |
542 if(fname==null || geneHist==null){return;} | |
543 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false); | |
544 bsw.start(); | |
545 long sum=Tools.sum(geneHist); | |
546 double mean=Tools.averageHistogram(geneHist)*geneHistDiv; | |
547 int median=Tools.medianHistogram(geneHist)*geneHistDiv; | |
548 double std=Tools.standardDeviationHistogram(geneHist)*geneHistDiv; | |
549 bsw.println("#Gene Length Histogram"); | |
550 bsw.print("#Genes:\t").println(sum); | |
551 bsw.print("#Mean:\t").println(mean, 4); | |
552 bsw.print("#Median:\t").println(median); | |
553 bsw.print("#STDDev:\t").println(std, 4); | |
554 bsw.print("#Length\tCount\n"); | |
555 long cum=0; | |
556 for(int i=0; i<geneHist.length && cum<sum; i++){ | |
557 int len=i*geneHistDiv; | |
558 long count=geneHist[i]; | |
559 cum+=count; | |
560 if(count>0 || printZeroCountHist){ | |
561 bsw.print(len).tab().println(count); | |
562 } | |
563 } | |
564 bsw.poisonAndWait(); | |
565 } | |
566 | |
567 private ConcurrentReadInputStream makeCris(String fname){ | |
568 FileFormat ffin=FileFormat.testInput(fname, FileFormat.FA, null, true, true); | |
569 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ffin, null); | |
570 cris.start(); //Start the stream | |
571 if(verbose){outstream.println("Started cris");} | |
572 return cris; | |
573 } | |
574 | |
575 /** Spawn process threads */ | |
576 private void spawnThreads(final ConcurrentReadInputStream cris, final ByteStreamWriter bsw, | |
577 ConcurrentReadOutputStream rosAmino, ConcurrentReadOutputStream ros16S, ConcurrentReadOutputStream ros18S, GeneModel pgm){ | |
578 | |
579 //Do anything necessary prior to processing | |
580 | |
581 //Determine how many threads may be used | |
582 final int threads=Shared.threads(); | |
583 | |
584 //Fill a list with ProcessThreads | |
585 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads); | |
586 for(int i=0; i<threads; i++){ | |
587 alpt.add(new ProcessThread(cris, bsw, rosAmino, ros16S, ros18S, pgm, minLen, i)); | |
588 } | |
589 | |
590 //Start the threads | |
591 for(ProcessThread pt : alpt){ | |
592 pt.start(); | |
593 } | |
594 | |
595 //Wait for threads to finish | |
596 waitForThreads(alpt); | |
597 | |
598 //Do anything necessary after processing | |
599 | |
600 } | |
601 | |
602 private void waitForThreads(ArrayList<ProcessThread> alpt){ | |
603 | |
604 //Wait for completion of all threads | |
605 boolean success=true; | |
606 for(ProcessThread pt : alpt){ | |
607 | |
608 //Wait until this thread has terminated | |
609 while(pt.getState()!=Thread.State.TERMINATED){ | |
610 try { | |
611 //Attempt a join operation | |
612 pt.join(); | |
613 } catch (InterruptedException e) { | |
614 //Potentially handle this, if it is expected to occur | |
615 e.printStackTrace(); | |
616 } | |
617 } | |
618 | |
619 //Accumulate per-thread statistics | |
620 readsIn+=pt.readsInT; | |
621 basesIn+=pt.basesInT; | |
622 genesOut+=pt.genesOutT; | |
623 bytesOut+=pt.bytesOutT; | |
624 | |
625 geneStopsMade+=pt.caller.geneStopsMade; | |
626 geneStartsMade+=pt.caller.geneStartsMade; | |
627 geneStartsRetained+=pt.caller.geneStartsRetained; | |
628 geneStopsRetained+=pt.caller.geneStopsRetained; | |
629 geneStartsOut+=pt.caller.geneStartsOut; | |
630 | |
631 r16SOut+=pt.caller.r16SOut; | |
632 r18SOut+=pt.caller.r18SOut; | |
633 r23SOut+=pt.caller.r23SOut; | |
634 r5SOut+=pt.caller.r5SOut; | |
635 tRNAOut+=pt.caller.tRNAOut; | |
636 | |
637 stCds.add(pt.caller.stCds); | |
638 stCds2.add(pt.caller.stCds2); | |
639 // stCdsPass.add(pt.caller.stCdsPass); | |
640 | |
641 for(int i=0; i<trackers.length; i++){ | |
642 trackers[i].add(pt.caller.trackers[i]); | |
643 } | |
644 | |
645 if(geneHist!=null){Tools.add(geneHist, pt.geneHistT);} | |
646 | |
647 success&=pt.success; | |
648 } | |
649 | |
650 //Track whether any threads failed | |
651 if(!success){errorState=true;} | |
652 } | |
653 | |
654 /*--------------------------------------------------------------*/ | |
655 /*---------------- Inner Methods ----------------*/ | |
656 /*--------------------------------------------------------------*/ | |
657 | |
658 private static ByteStreamWriter makeBSW(FileFormat ff){ | |
659 if(ff==null){return null;} | |
660 ByteStreamWriter bsw=new ByteStreamWriter(ff); | |
661 bsw.start(); | |
662 return bsw; | |
663 } | |
664 | |
665 private ConcurrentReadOutputStream makeCros(FileFormat ff){ | |
666 if(ff==null){return null;} | |
667 | |
668 //Select output buffer size based on whether it needs to be ordered | |
669 final int buff=(ordered ? Tools.mid(4, 64, (Shared.threads()*2)/3) : 4); | |
670 | |
671 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ff, null, buff, null, false); | |
672 ros.start(); //Start the stream | |
673 return ros; | |
674 } | |
675 | |
676 /*--------------------------------------------------------------*/ | |
677 /*---------------- Inner Classes ----------------*/ | |
678 /*--------------------------------------------------------------*/ | |
679 | |
680 /** This class is static to prevent accidental writing to shared variables. | |
681 * It is safe to remove the static modifier. */ | |
682 private class ProcessThread extends Thread { | |
683 | |
684 //Constructor | |
685 ProcessThread(final ConcurrentReadInputStream cris_, final ByteStreamWriter bsw_, | |
686 ConcurrentReadOutputStream rosAmino_, ConcurrentReadOutputStream ros16S_, ConcurrentReadOutputStream ros18S_, | |
687 GeneModel pgm_, final int minLen, final int tid_){ | |
688 cris=cris_; | |
689 bsw=bsw_; | |
690 rosAmino=rosAmino_; | |
691 ros16S=ros16S_; | |
692 ros18S=ros18S_; | |
693 pgm=pgm_; | |
694 tid=tid_; | |
695 geneHistT=(geneHistBins>1 ? new long[geneHistBins] : null); | |
696 caller=new GeneCaller(minLen, maxOverlapSameStrand, maxOverlapOppositeStrand, | |
697 minStartScore, minStopScore, minKmerScore, minOrfScore, minAvgScore, pgm); | |
698 } | |
699 | |
700 //Called by start() | |
701 @Override | |
702 public void run(){ | |
703 //Do anything necessary prior to processing | |
704 | |
705 //Process the reads | |
706 processInner(); | |
707 | |
708 //Do anything necessary after processing | |
709 | |
710 //Indicate successful exit status | |
711 success=true; | |
712 } | |
713 | |
714 /** Iterate through the reads */ | |
715 void processInner(){ | |
716 | |
717 //Grab the first ListNum of reads | |
718 ListNum<Read> ln=cris.nextList(); | |
719 | |
720 //Check to ensure pairing is as expected | |
721 if(ln!=null && !ln.isEmpty()){ | |
722 Read r=ln.get(0); | |
723 // assert(ffin1.samOrBam() || (r.mate!=null)==cris.paired()); //Disabled due to non-static access | |
724 } | |
725 | |
726 //As long as there is a nonempty read list... | |
727 while(ln!=null && ln.size()>0){ | |
728 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access | |
729 | |
730 processList(ln); | |
731 | |
732 //Fetch a new list | |
733 ln=cris.nextList(); | |
734 } | |
735 | |
736 //Notify the input stream that the final list was used | |
737 if(ln!=null){ | |
738 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); | |
739 } | |
740 } | |
741 | |
742 void processList(ListNum<Read> ln){ | |
743 //Grab the actual read list from the ListNum | |
744 final ArrayList<Read> reads=ln.list; | |
745 | |
746 // System.err.println(reads.size()); | |
747 | |
748 ArrayList<Orf> orfList=new ArrayList<Orf>(); | |
749 | |
750 //Loop through each read in the list | |
751 for(int idx=0; idx<reads.size(); idx++){ | |
752 Read r1=reads.get(idx); | |
753 Read r2=r1.mate; | |
754 | |
755 //Validate reads in worker threads | |
756 if(!r1.validated()){r1.validate(true);} | |
757 if(r2!=null && !r2.validated()){r2.validate(true);} | |
758 | |
759 //Track the initial length for statistics | |
760 final int initialLength1=r1.length(); | |
761 final int initialLength2=r1.mateLength(); | |
762 | |
763 //Increment counters | |
764 readsInT+=r1.pairCount(); | |
765 basesInT+=initialLength1+initialLength2; | |
766 | |
767 if(r2!=null){ | |
768 if(merge){ | |
769 final int insert=BBMerge.findOverlapStrict(r1, r2, false); | |
770 if(insert>0){ | |
771 r2.reverseComplement(); | |
772 r1=r1.joinRead(insert); | |
773 r2=null; | |
774 } | |
775 }else if(ecco){ | |
776 BBMerge.findOverlapStrict(r1, r2, true); | |
777 } | |
778 } | |
779 | |
780 { | |
781 //Reads are processed in this block. | |
782 { | |
783 ArrayList<Orf> list=processRead(r1); | |
784 if(list!=null){orfList.addAll(list);} | |
785 } | |
786 if(r2!=null){ | |
787 ArrayList<Orf> list=processRead(r2); | |
788 if(list!=null){orfList.addAll(list);} | |
789 } | |
790 } | |
791 } | |
792 | |
793 genesOutT+=orfList.size(); | |
794 ByteBuilder bb=new ByteBuilder(); | |
795 | |
796 if(bsw!=null){ | |
797 if(bsw.ordered){ | |
798 for(Orf orf : orfList){ | |
799 orf.appendGff(bb); | |
800 bb.nl(); | |
801 } | |
802 bsw.add(bb, ln.id); | |
803 bytesOutT+=bb.length(); | |
804 }else{ | |
805 for(Orf orf : orfList){ | |
806 orf.appendGff(bb); | |
807 bb.nl(); | |
808 // if(bb.length()>=32000){ | |
809 // bytesOutT+=bb.length(); | |
810 // bsw.addJob(bb); | |
811 // bb=new ByteBuilder(); | |
812 // } | |
813 } | |
814 if(bb.length()>0){ | |
815 bsw.addJob(bb); | |
816 bytesOutT+=bb.length(); | |
817 } | |
818 } | |
819 } | |
820 | |
821 //Notify the input stream that the list was used | |
822 cris.returnList(ln); | |
823 // if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access | |
824 } | |
825 | |
826 /** | |
827 * Process a read or a read pair. | |
828 * @param r1 Read 1 | |
829 * @param r2 Read 2 (may be null) | |
830 * @return True if the reads should be kept, false if they should be discarded. | |
831 */ | |
832 ArrayList<Orf> processRead(final Read r){ | |
833 ArrayList<Orf> list=caller.callGenes(r, pgm); | |
834 | |
835 if(geneHistT!=null && list!=null){ | |
836 for(Orf o : list){ | |
837 int bin=Tools.min(geneHistT.length-1, o.length()/geneHistDiv); | |
838 geneHistT[bin]++; | |
839 } | |
840 } | |
841 | |
842 if(ros16S!=null){ | |
843 if(list!=null && !list.isEmpty()){ | |
844 // System.err.println("Looking for 16S."); | |
845 ArrayList<Read> ssu=fetchType(r, list, r16S); | |
846 if(ssu!=null && !ssu.isEmpty()){ | |
847 // System.err.println("Found "+ssu.size()+" 16S."); | |
848 ros16S.add(ssu, r.numericID); | |
849 } | |
850 } | |
851 } | |
852 if(ros18S!=null){ | |
853 if(list!=null && !list.isEmpty()){ | |
854 ArrayList<Read> ssu=fetchType(r, list, r18S); | |
855 if(ssu!=null && !ssu.isEmpty()){ros18S.add(ssu, r.numericID);} | |
856 } | |
857 } | |
858 | |
859 if(rosAmino!=null){ | |
860 if(mode==TRANSLATE){ | |
861 if(list!=null && !list.isEmpty()){ | |
862 ArrayList<Read> prots=translate(r, list); | |
863 if(prots!=null){rosAmino.add(prots, r.numericID);} | |
864 } | |
865 }else if(mode==RETRANSLATE) { | |
866 if(list!=null && !list.isEmpty()){ | |
867 ArrayList<Read> prots=translate(r, list); | |
868 ArrayList<Read> ret=detranslate(prots); | |
869 if(ret!=null){rosAmino.add(ret, r.numericID);} | |
870 } | |
871 }else if(mode==RECODE) { | |
872 if(list!=null && !list.isEmpty()){ | |
873 Read recoded=recode(r, list); | |
874 r.mate=null; | |
875 ArrayList<Read> rec=new ArrayList<Read>(1); | |
876 rec.add(recoded); | |
877 if(rec!=null){rosAmino.add(rec, r.numericID);} | |
878 } | |
879 }else{ | |
880 assert(false) : mode; | |
881 } | |
882 } | |
883 | |
884 return list; | |
885 } | |
886 | |
887 /** Number of reads processed by this thread */ | |
888 protected long readsInT=0; | |
889 /** Number of bases processed by this thread */ | |
890 protected long basesInT=0; | |
891 | |
892 /** Number of genes called by this thread */ | |
893 protected long genesOutT=0; | |
894 /** Number of bytes written by this thread */ | |
895 protected long bytesOutT=0; | |
896 | |
897 final long[] geneHistT; | |
898 | |
899 protected ConcurrentReadOutputStream rosAmino; | |
900 protected ConcurrentReadOutputStream ros16S; | |
901 protected ConcurrentReadOutputStream ros18S; | |
902 | |
903 /** True only if this thread has completed successfully */ | |
904 boolean success=false; | |
905 | |
906 /** Shared input stream */ | |
907 private final ConcurrentReadInputStream cris; | |
908 /** Shared output stream */ | |
909 private final ByteStreamWriter bsw; | |
910 /** Gene Model for annotation (not really needed) */ | |
911 private final GeneModel pgm; | |
912 /** Gene Caller for annotation */ | |
913 final GeneCaller caller; | |
914 /** Thread ID */ | |
915 final int tid; | |
916 } | |
917 | |
918 public static ArrayList<Read> fetchType(final Read r, final ArrayList<Orf> list, int type){ | |
919 if(list==null || list.isEmpty()){return null;} | |
920 ArrayList<Read> ret=new ArrayList<Read>(list.size()); | |
921 for(int strand=0; strand<2; strand++){ | |
922 for(Orf orf : list){ | |
923 if(orf.strand==strand && orf.type==type){ | |
924 Read sequence=fetch(orf, r.bases, r.id); | |
925 ret.add(sequence); | |
926 } | |
927 } | |
928 r.reverseComplement(); | |
929 } | |
930 return (ret.size()>0 ? ret : null); | |
931 } | |
932 | |
933 public static ArrayList<Read> translate(final Read r, final ArrayList<Orf> list){ | |
934 if(list==null || list.isEmpty()){return null;} | |
935 ArrayList<Read> prots=new ArrayList<Read>(list.size()); | |
936 for(int strand=0; strand<2; strand++){ | |
937 for(Orf orf : list){ | |
938 if(orf.strand==strand && orf.type==CDS){ | |
939 Read aa=translate(orf, r.bases, r.id); | |
940 prots.add(aa); | |
941 } | |
942 } | |
943 r.reverseComplement(); | |
944 } | |
945 return prots.isEmpty() ? null : prots; | |
946 } | |
947 | |
948 public static Read recode(final Read r, final ArrayList<Orf> list){ | |
949 if(list==null || list.isEmpty()){return r;} | |
950 for(int strand=0; strand<2; strand++){ | |
951 for(Orf orf : list){ | |
952 if(orf.strand==strand && orf.type==CDS){ | |
953 recode(orf, r.bases); | |
954 } | |
955 } | |
956 r.reverseComplement(); | |
957 } | |
958 return r; | |
959 } | |
960 | |
961 public static ArrayList<Read> detranslate(final ArrayList<Read> prots){ | |
962 if(prots==null || prots.isEmpty()){return null;} | |
963 ArrayList<Read> nucs=new ArrayList<Read>(prots.size()); | |
964 for(int strand=0; strand<2; strand++){ | |
965 for(Read prot : prots){ | |
966 Read nuc=detranslate(prot); | |
967 nucs.add(nuc); | |
968 } | |
969 } | |
970 return nucs; | |
971 } | |
972 | |
973 public static Read translate(Orf orf, byte[] bases, String id){ | |
974 // assert(orf.length()%3==0) : orf.length(); //Happens sometimes on genes that go off the end, perhaps | |
975 if(orf.strand==1){orf.flip();} | |
976 byte[] acids=AminoAcid.toAAs(bases, orf.start, orf.stop); | |
977 if(orf.strand==1){orf.flip();} | |
978 Read r=new Read(acids, null, id+"\t"+(Shared.strandCodes[orf.strand]+"\t"+orf.start+"-"+orf.stop), 0, Read.AAMASK); | |
979 // assert((r.length()+1)*3==orf.length()); | |
980 return r; | |
981 } | |
982 | |
983 public static Read fetch(Orf orf, Read source){ | |
984 assert(orf.start>=0 && orf.stop<source.length()) : source.length()+"\n"+orf; | |
985 if(orf.strand==1){source.reverseComplement();} | |
986 Read r=fetch(orf, source.bases, source.id); | |
987 if(orf.strand==1){source.reverseComplement();} | |
988 return r; | |
989 } | |
990 | |
991 public static Read fetch(Orf orf, byte[] bases, String id){ | |
992 assert(orf.start>=0 && orf.stop<bases.length) : bases.length+"\n"+orf; | |
993 if(orf.strand==1){orf.flip();} | |
994 byte[] sub=Arrays.copyOfRange(bases, orf.start, orf.stop+1); | |
995 if(orf.strand==1){orf.flip();} | |
996 Read r=new Read(sub, null, id+"\t"+(Shared.strandCodes[orf.strand]+"\t"+orf.start+"-"+orf.stop), 0, 0); | |
997 assert(r.length()==orf.length()) : r.length()+", "+orf.length(); | |
998 return r; | |
999 } | |
1000 | |
1001 public static void recode(Orf orf, byte[] bases){ | |
1002 if(orf.strand==1){orf.flip();} | |
1003 byte[] acids=AminoAcid.toAAs(bases, orf.start, orf.stop); | |
1004 for(int apos=0, bpos=orf.start; apos<acids.length; apos++){ | |
1005 byte aa=acids[apos]; | |
1006 int number=AminoAcid.acidToNumber[aa]; | |
1007 String codon=(number>=0 ? AminoAcid.canonicalCodons[number] : "NNN"); | |
1008 for(int i=0; i<3; i++, bpos++) { | |
1009 bases[bpos]=(byte)codon.charAt(i); | |
1010 } | |
1011 } | |
1012 if(orf.strand==1){orf.flip();} | |
1013 } | |
1014 | |
1015 public static Read detranslate(Read prot){ | |
1016 ByteBuilder bb=new ByteBuilder(prot.length()*3); | |
1017 for(byte aa : prot.bases){ | |
1018 int number=AminoAcid.acidToNumber[aa]; | |
1019 String codon=(number>=0 ? AminoAcid.canonicalCodons[number] : "NNN"); | |
1020 bb.append(codon); | |
1021 } | |
1022 Read r=new Read(bb.array, null, prot.id, prot.numericID, 0); | |
1023 return r; | |
1024 } | |
1025 | |
1026 /*--------------------------------------------------------------*/ | |
1027 /*---------------- Fields ----------------*/ | |
1028 /*--------------------------------------------------------------*/ | |
1029 | |
1030 public static GeneCaller makeGeneCaller(GeneModel pgm){ | |
1031 GeneCaller caller=new GeneCaller(minLen, maxOverlapSameStrand, maxOverlapOppositeStrand, | |
1032 minStartScore, minStopScore, minKmerScore, minOrfScore, minAvgScore, pgm); | |
1033 return caller; | |
1034 } | |
1035 | |
1036 private long maxReads=-1; | |
1037 private boolean merge; | |
1038 private boolean ecco; | |
1039 | |
1040 private long readsIn=0; | |
1041 private long basesIn=0; | |
1042 private long genesOut=0; | |
1043 private long bytesOut=0; | |
1044 | |
1045 private static int minLen=80;//Actually a much higher value like 200 seems optimal compared to NCBI | |
1046 private static int maxOverlapSameStrand=80; | |
1047 private static int maxOverlapOppositeStrand=110; | |
1048 | |
1049 /* for kinnercds=6 */ | |
1050 // private static float minStartScore=-0.10f; | |
1051 // private static float minStopScore=-0.5f;//Not useful; disabled | |
1052 // private static float minKmerScore=0.04f;//Does not seem useful. | |
1053 // private static float minOrfScore=40f; //Higher increases SNR dramatically but reduces TP rate | |
1054 // private static float minAvgScore=0.08f; //Not very effective | |
1055 | |
1056 /* for kinnercds=7 */ | |
1057 private static float minStartScore=-0.10f; | |
1058 private static float minStopScore=-0.5f;//Not useful; disabled | |
1059 private static float minKmerScore=0.02f;//Does not seem useful. | |
1060 private static float minOrfScore=50f; //Higher increases SNR dramatically but reduces TP rate | |
1061 private static float minAvgScore=0.08f; //Not very effective | |
1062 | |
1063 long geneStopsMade=0; | |
1064 long geneStartsMade=0; | |
1065 long geneStartsRetained=0; | |
1066 long geneStopsRetained=0; | |
1067 long geneStartsOut=0; | |
1068 | |
1069 long tRNAOut=0; | |
1070 long r16SOut=0; | |
1071 long r23SOut=0; | |
1072 long r5SOut=0; | |
1073 long r18SOut=0; | |
1074 | |
1075 ScoreTracker stCds=new ScoreTracker(CDS); | |
1076 ScoreTracker stCds2=new ScoreTracker(CDS); | |
1077 ScoreTracker stCdsPass=new ScoreTracker(CDS); | |
1078 ScoreTracker sttRNA=new ScoreTracker(tRNA); | |
1079 ScoreTracker st16s=new ScoreTracker(r16S); | |
1080 ScoreTracker st23s=new ScoreTracker(r23S); | |
1081 ScoreTracker st5s=new ScoreTracker(r5S); | |
1082 ScoreTracker st18s=new ScoreTracker(r18S); | |
1083 | |
1084 ScoreTracker[] trackers=new ScoreTracker[] {stCdsPass, sttRNA, st16s, st23s, st5s, st18s}; | |
1085 | |
1086 private int geneHistBins=2000; | |
1087 private int geneHistDiv=20; | |
1088 private final long[] geneHist; | |
1089 private boolean printZeroCountHist=false; | |
1090 | |
1091 /*--------------------------------------------------------------*/ | |
1092 | |
1093 private ArrayList<String> fnaList=new ArrayList<String>(); | |
1094 private ArrayList<String> pgmList=new ArrayList<String>(); | |
1095 private String outGff=null; | |
1096 private String outAmino=null; | |
1097 private String out16S=null; | |
1098 private String out18S=null; | |
1099 private String compareToGff=null; | |
1100 private String outStats="stderr"; | |
1101 private String geneHistFile=null; | |
1102 private boolean json_out=false; | |
1103 | |
1104 /*--------------------------------------------------------------*/ | |
1105 /*---------------- Final Fields ----------------*/ | |
1106 /*--------------------------------------------------------------*/ | |
1107 | |
1108 private final FileFormat ffoutGff; | |
1109 private final FileFormat ffoutAmino; | |
1110 private final FileFormat ffout16S; | |
1111 private final FileFormat ffout18S; | |
1112 | |
1113 /** Determines how sequence is processed if it will be output */ | |
1114 int mode=TRANSLATE; | |
1115 | |
1116 /** Translate nucleotides to amino acids */ | |
1117 private static final int TRANSLATE=1; | |
1118 /** Translate nucleotides to amino acids, | |
1119 * then translate back to nucleotides */ | |
1120 private static final int RETRANSLATE=2; | |
1121 /** Re-encode coding regions of nucleotide | |
1122 * sequences as a canonical codons */ | |
1123 private static final int RECODE=3; | |
1124 | |
1125 /*--------------------------------------------------------------*/ | |
1126 | |
1127 private PrintStream outstream=System.err; | |
1128 public boolean verbose=false; | |
1129 public boolean errorState=false; | |
1130 private boolean overwrite=false; | |
1131 private boolean append=false; | |
1132 private boolean ordered=false; //this is OK sometimes, but sometimes hangs (e.g. on RefSeq mito), possibly if a sequence produces nothing. | |
1133 //To fix it, just ensure functions like translate always produce an array, even if it is empty. | |
1134 | |
1135 } |