annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/CallGenes.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package prok;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6 import java.util.Arrays;
jpayne@68 7
jpayne@68 8 import dna.AminoAcid;
jpayne@68 9 import dna.Data;
jpayne@68 10 import fileIO.ByteFile;
jpayne@68 11 import fileIO.ByteStreamWriter;
jpayne@68 12 import fileIO.FileFormat;
jpayne@68 13 import fileIO.ReadWrite;
jpayne@68 14 import gff.CompareGff;
jpayne@68 15 import jgi.BBMerge;
jpayne@68 16 import json.JsonObject;
jpayne@68 17 import shared.Parse;
jpayne@68 18 import shared.Parser;
jpayne@68 19 import shared.PreParser;
jpayne@68 20 import shared.ReadStats;
jpayne@68 21 import shared.Shared;
jpayne@68 22 import shared.Timer;
jpayne@68 23 import shared.Tools;
jpayne@68 24 import stream.ConcurrentReadInputStream;
jpayne@68 25 import stream.ConcurrentReadOutputStream;
jpayne@68 26 import stream.Read;
jpayne@68 27 import structures.ByteBuilder;
jpayne@68 28 import structures.ListNum;
jpayne@68 29
jpayne@68 30 /**
jpayne@68 31 * This is the executable class for gene-calling.
jpayne@68 32 * @author Brian Bushnell
jpayne@68 33 * @date Sep 24, 2018
jpayne@68 34 *
jpayne@68 35 */
jpayne@68 36 public class CallGenes extends ProkObject {
jpayne@68 37
jpayne@68 38 /*--------------------------------------------------------------*/
jpayne@68 39 /*---------------- Initialization ----------------*/
jpayne@68 40 /*--------------------------------------------------------------*/
jpayne@68 41
jpayne@68 42 /**
jpayne@68 43 * Code entrance from the command line.
jpayne@68 44 * @param args Command line arguments
jpayne@68 45 */
jpayne@68 46 public static void main(String[] args){
jpayne@68 47 //Start a timer immediately upon code entrance.
jpayne@68 48 Timer t=new Timer();
jpayne@68 49
jpayne@68 50 //Create an instance of this class
jpayne@68 51 CallGenes x=new CallGenes(args);
jpayne@68 52
jpayne@68 53 //Run the object
jpayne@68 54 x.process(t);
jpayne@68 55
jpayne@68 56 //Close the print stream if it was redirected
jpayne@68 57 Shared.closeStream(x.outstream);
jpayne@68 58 }
jpayne@68 59
jpayne@68 60 /**
jpayne@68 61 * Constructor.
jpayne@68 62 * @param args Command line arguments
jpayne@68 63 */
jpayne@68 64 public CallGenes(String[] args){
jpayne@68 65
jpayne@68 66 {//Preparse block for help, config files, and outstream
jpayne@68 67 PreParser pp=new PreParser(args, (args.length>40 ? null : getClass()), false);
jpayne@68 68 args=pp.args;
jpayne@68 69 outstream=pp.outstream;
jpayne@68 70 }
jpayne@68 71
jpayne@68 72 //Set shared static variables prior to parsing
jpayne@68 73 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 74 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 75
jpayne@68 76 {//Parse the arguments
jpayne@68 77 final Parser parser=parse(args);
jpayne@68 78 overwrite=parser.overwrite;
jpayne@68 79 append=parser.append;
jpayne@68 80
jpayne@68 81 outGff=parser.out1;
jpayne@68 82 maxReads=parser.maxReads;
jpayne@68 83 }
jpayne@68 84
jpayne@68 85 fixExtensions(); //Add or remove .gz or .bz2 as needed
jpayne@68 86 checkFileExistence(); //Ensure files can be read and written
jpayne@68 87 checkStatics(); //Adjust file-related static fields as needed for this program
jpayne@68 88
jpayne@68 89 ffoutGff=FileFormat.testOutput(outGff, FileFormat.GFF, null, true, overwrite, append, ordered);
jpayne@68 90 ffoutAmino=FileFormat.testOutput(outAmino, FileFormat.FA, null, true, overwrite, append, ordered);
jpayne@68 91 ffout16S=FileFormat.testOutput(out16S, FileFormat.FA, null, true, overwrite, append, ordered);
jpayne@68 92 ffout18S=FileFormat.testOutput(out18S, FileFormat.FA, null, true, overwrite, append, ordered);
jpayne@68 93
jpayne@68 94 if(ffoutGff!=null){
jpayne@68 95 assert(!ffoutGff.isSequence()) : "\nout is for gff files. To output sequence, please use outa.";
jpayne@68 96 }
jpayne@68 97 if(ffoutAmino!=null){
jpayne@68 98 assert(!ffoutAmino.gff()) : "\nouta is for sequence data. To output gff, please use out.";
jpayne@68 99 }
jpayne@68 100 if(ffout16S!=null){
jpayne@68 101 assert(!ffout16S.gff()) : "\nout16S is for sequence data. To output gff, please use out.";
jpayne@68 102 }
jpayne@68 103 if(ffout18S!=null){
jpayne@68 104 assert(!ffout18S.gff()) : "\nout18S is for sequence data. To output gff, please use out.";
jpayne@68 105 }
jpayne@68 106
jpayne@68 107 if(geneHistFile==null){geneHistBins=0;}
jpayne@68 108 else{
jpayne@68 109 assert(geneHistBins>1) : "geneHistBins="+geneHistBins+"; should be >1";
jpayne@68 110 assert(geneHistDiv>=1) : "geneHistDiv="+geneHistDiv+"; should be >=1";
jpayne@68 111 }
jpayne@68 112 geneHist=geneHistBins>1 ? new long[geneHistBins] : null;
jpayne@68 113 }
jpayne@68 114
jpayne@68 115 /*--------------------------------------------------------------*/
jpayne@68 116 /*---------------- Initialization Helpers ----------------*/
jpayne@68 117 /*--------------------------------------------------------------*/
jpayne@68 118
jpayne@68 119 /** Parse arguments from the command line */
jpayne@68 120 private Parser parse(String[] args){
jpayne@68 121
jpayne@68 122 Parser parser=new Parser();
jpayne@68 123 for(int i=0; i<args.length; i++){
jpayne@68 124 String arg=args[i];
jpayne@68 125 String[] split=arg.split("=");
jpayne@68 126 String a=split[0].toLowerCase();
jpayne@68 127 String b=split.length>1 ? split[1] : null;
jpayne@68 128 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
jpayne@68 129
jpayne@68 130 // outstream.println(arg+", "+a+", "+b);
jpayne@68 131 if(PGMTools.parseStatic(arg, a, b)){
jpayne@68 132 //do nothing
jpayne@68 133 }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna")){
jpayne@68 134 assert(b!=null);
jpayne@68 135 Tools.addFiles(b, fnaList);
jpayne@68 136 }else if(b==null && new File(arg).exists() && FileFormat.isFastaFile(arg)){
jpayne@68 137 fnaList.add(arg);
jpayne@68 138 }else if(a.equals("pgm") || a.equals("gm") || a.equals("model")){
jpayne@68 139 assert(b!=null);
jpayne@68 140 if(b.equalsIgnoreCase("auto") || b.equalsIgnoreCase("default")){
jpayne@68 141 b=Data.findPath("?model.pgm");
jpayne@68 142 pgmList.add(b);
jpayne@68 143 }else{
jpayne@68 144 Tools.addFiles(b, pgmList);
jpayne@68 145 }
jpayne@68 146 }else if(b==null && new File(arg).exists() && FileFormat.isPgmFile(arg)){
jpayne@68 147 Tools.addFiles(arg, pgmList);
jpayne@68 148 }else if(a.equals("outamino") || a.equals("aminoout") || a.equals("outa") || a.equals("outaa") || a.equals("aaout") || a.equals("amino")){
jpayne@68 149 outAmino=b;
jpayne@68 150 }else if(a.equalsIgnoreCase("out16s") || a.equalsIgnoreCase("16sout")){
jpayne@68 151 out16S=b;
jpayne@68 152 }else if(a.equalsIgnoreCase("out18s") || a.equalsIgnoreCase("18sout")){
jpayne@68 153 out18S=b;
jpayne@68 154 }else if(a.equals("verbose")){
jpayne@68 155 verbose=Parse.parseBoolean(b);
jpayne@68 156 //ReadWrite.verbose=verbose;
jpayne@68 157 GeneCaller.verbose=verbose;
jpayne@68 158 }
jpayne@68 159
jpayne@68 160 else if(a.equals("json_out") || a.equalsIgnoreCase("json")){
jpayne@68 161 json_out=Parse.parseBoolean(b);
jpayne@68 162 }else if(a.equals("stats") || a.equalsIgnoreCase("outstats")){
jpayne@68 163 outStats=b;
jpayne@68 164 }else if(a.equals("hist") || a.equalsIgnoreCase("outhist") || a.equalsIgnoreCase("lengthhist") || a.equalsIgnoreCase("lhist") || a.equalsIgnoreCase("genehist")){
jpayne@68 165 geneHistFile=b;
jpayne@68 166 }else if(a.equals("bins")){
jpayne@68 167 geneHistBins=Integer.parseInt(b);
jpayne@68 168 }else if(a.equals("binlen") || a.equals("binlength") || a.equals("histdiv")){
jpayne@68 169 geneHistBins=Integer.parseInt(b);
jpayne@68 170 }else if(a.equals("printzero") || a.equals("pz")){
jpayne@68 171 printZeroCountHist=Parse.parseBoolean(b);
jpayne@68 172 }
jpayne@68 173
jpayne@68 174 else if(a.equals("merge")){
jpayne@68 175 merge=Parse.parseBoolean(b);
jpayne@68 176 }else if(a.equals("ecco")){
jpayne@68 177 ecco=Parse.parseBoolean(b);
jpayne@68 178 }
jpayne@68 179
jpayne@68 180 else if(a.equalsIgnoreCase("setbias16s")) {
jpayne@68 181 GeneCaller.biases[r16S]=Float.parseFloat(b);
jpayne@68 182 }else if(a.equalsIgnoreCase("setbias18s")) {
jpayne@68 183 GeneCaller.biases[r18S]=Float.parseFloat(b);
jpayne@68 184 }else if(a.equalsIgnoreCase("setbias23s")) {
jpayne@68 185 GeneCaller.biases[r23S]=Float.parseFloat(b);
jpayne@68 186 }else if(a.equalsIgnoreCase("setbias5s")) {
jpayne@68 187 GeneCaller.biases[r5S]=Float.parseFloat(b);
jpayne@68 188 }else if(a.equalsIgnoreCase("setbiastRNA")) {
jpayne@68 189 GeneCaller.biases[tRNA]=Float.parseFloat(b);
jpayne@68 190 }else if(a.equalsIgnoreCase("setbiasCDS")) {
jpayne@68 191 GeneCaller.biases[CDS]=Float.parseFloat(b);
jpayne@68 192 }
jpayne@68 193
jpayne@68 194 else if(a.equals("ordered")){
jpayne@68 195 ordered=Parse.parseBoolean(b);
jpayne@68 196 }
jpayne@68 197
jpayne@68 198 else if(a.equals("translate")){
jpayne@68 199 mode=TRANSLATE;
jpayne@68 200 }else if(a.equals("retranslate") || a.equals("detranslate")){
jpayne@68 201 mode=RETRANSLATE;
jpayne@68 202 }else if(a.equals("recode")){
jpayne@68 203 mode=RECODE;
jpayne@68 204 }
jpayne@68 205
jpayne@68 206 else if(a.equalsIgnoreCase("minlen") || a.equals("minlength")){
jpayne@68 207 minLen=Integer.parseInt(b);
jpayne@68 208 }else if(a.equals("maxoverlapss") || a.equals("overlapss") || a.equals("overlapsamestrand") || a.equals("moss") || a.equalsIgnoreCase("maxOverlapSameStrand")){
jpayne@68 209 maxOverlapSameStrand=Integer.parseInt(b);
jpayne@68 210 }else if(a.equals("maxoverlapos") || a.equals("overlapos") || a.equals("overlapoppositestrand") || a.equals("moos") || a.equalsIgnoreCase("maxOverlapOppositeStrand")){
jpayne@68 211 maxOverlapOppositeStrand=Integer.parseInt(b);
jpayne@68 212 }else if(a.equalsIgnoreCase("minStartScore")){
jpayne@68 213 minStartScore=Float.parseFloat(b);
jpayne@68 214 }else if(a.equalsIgnoreCase("minStopScore")){
jpayne@68 215 minStopScore=Float.parseFloat(b);
jpayne@68 216 }else if(a.equalsIgnoreCase("minInnerScore") || a.equalsIgnoreCase("minKmerScore")){
jpayne@68 217 minKmerScore=Float.parseFloat(b);
jpayne@68 218 }else if(a.equalsIgnoreCase("minOrfScore") || a.equalsIgnoreCase("minScore")){
jpayne@68 219 minOrfScore=Float.parseFloat(b);
jpayne@68 220 }else if(a.equalsIgnoreCase("minAvgScore")){
jpayne@68 221 minAvgScore=Float.parseFloat(b);
jpayne@68 222 }else if(a.equalsIgnoreCase("breakLimit")){
jpayne@68 223 GeneCaller.breakLimit=Integer.parseInt(b);
jpayne@68 224 }else if(a.equalsIgnoreCase("clearcutoffs") || a.equalsIgnoreCase("clearfilters")){
jpayne@68 225 GeneCaller.breakLimit=9999;
jpayne@68 226 minOrfScore=-9999;
jpayne@68 227 minAvgScore=-9999;
jpayne@68 228 minKmerScore=-9999;
jpayne@68 229 minStopScore=-9999;
jpayne@68 230 minStartScore=-9999;
jpayne@68 231 }
jpayne@68 232
jpayne@68 233 else if(a.equalsIgnoreCase("e1")){
jpayne@68 234 Orf.e1=Float.parseFloat(b);
jpayne@68 235 }else if(a.equalsIgnoreCase("e2")){
jpayne@68 236 Orf.e2=Float.parseFloat(b);
jpayne@68 237 }else if(a.equalsIgnoreCase("e3")){
jpayne@68 238 Orf.e3=Float.parseFloat(b);
jpayne@68 239 }
jpayne@68 240 else if(a.equalsIgnoreCase("f1")){
jpayne@68 241 Orf.f1=Float.parseFloat(b);
jpayne@68 242 }else if(a.equalsIgnoreCase("f2")){
jpayne@68 243 Orf.f2=Float.parseFloat(b);
jpayne@68 244 }else if(a.equalsIgnoreCase("f3")){
jpayne@68 245 Orf.f3=Float.parseFloat(b);
jpayne@68 246 }
jpayne@68 247 else if(a.equalsIgnoreCase("p0")){
jpayne@68 248 GeneCaller.p0=Float.parseFloat(b);
jpayne@68 249 }else if(a.equalsIgnoreCase("p1")){
jpayne@68 250 GeneCaller.p1=Float.parseFloat(b);
jpayne@68 251 }else if(a.equalsIgnoreCase("p2")){
jpayne@68 252 GeneCaller.p2=Float.parseFloat(b);
jpayne@68 253 }else if(a.equalsIgnoreCase("p3")){
jpayne@68 254 GeneCaller.p3=Float.parseFloat(b);
jpayne@68 255 }else if(a.equalsIgnoreCase("p4")){
jpayne@68 256 GeneCaller.p4=Float.parseFloat(b);
jpayne@68 257 }else if(a.equalsIgnoreCase("p5")){
jpayne@68 258 GeneCaller.p5=Float.parseFloat(b);
jpayne@68 259 }else if(a.equalsIgnoreCase("p6")){
jpayne@68 260 GeneCaller.p6=Float.parseFloat(b);
jpayne@68 261 }
jpayne@68 262 else if(a.equalsIgnoreCase("q1")){
jpayne@68 263 GeneCaller.q1=Float.parseFloat(b);
jpayne@68 264 }else if(a.equalsIgnoreCase("q2")){
jpayne@68 265 GeneCaller.q2=Float.parseFloat(b);
jpayne@68 266 }else if(a.equalsIgnoreCase("q3")){
jpayne@68 267 GeneCaller.q3=Float.parseFloat(b);
jpayne@68 268 }else if(a.equalsIgnoreCase("q4")){
jpayne@68 269 GeneCaller.q4=Float.parseFloat(b);
jpayne@68 270 }else if(a.equalsIgnoreCase("q5")){
jpayne@68 271 GeneCaller.q5=Float.parseFloat(b);
jpayne@68 272 }
jpayne@68 273 else if(a.equalsIgnoreCase("lookback")){
jpayne@68 274 GeneCaller.lookbackPlus=GeneCaller.lookbackMinus=Integer.parseInt(b);
jpayne@68 275 }else if(a.equalsIgnoreCase("lookbackplus")){
jpayne@68 276 GeneCaller.lookbackPlus=Integer.parseInt(b);
jpayne@68 277 }else if(a.equalsIgnoreCase("lookbackminus")){
jpayne@68 278 GeneCaller.lookbackMinus=Integer.parseInt(b);
jpayne@68 279 }
jpayne@68 280
jpayne@68 281 else if(a.equalsIgnoreCase("compareto")){
jpayne@68 282 compareToGff=b;
jpayne@68 283 }
jpayne@68 284
jpayne@68 285 else if(ProkObject.parse(arg, a, b)){}
jpayne@68 286
jpayne@68 287 else if(parser.parse(arg, a, b)){
jpayne@68 288 //do nothing
jpayne@68 289 }else{
jpayne@68 290 outstream.println("Unknown parameter "+args[i]);
jpayne@68 291 assert(false) : "Unknown parameter "+args[i];
jpayne@68 292 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 293 }
jpayne@68 294 }
jpayne@68 295
jpayne@68 296 if(pgmList.isEmpty()){
jpayne@68 297 String b=Data.findPath("?model.pgm");
jpayne@68 298 pgmList.add(b);
jpayne@68 299 }
jpayne@68 300 for(int i=0; i<pgmList.size(); i++){
jpayne@68 301 String s=pgmList.get(i);
jpayne@68 302 if(s.equalsIgnoreCase("auto") || s.equalsIgnoreCase("default")){
jpayne@68 303 pgmList.set(i, Data.findPath("?model.pgm"));
jpayne@68 304 }
jpayne@68 305 }
jpayne@68 306
jpayne@68 307 if(Shared.threads()<2){ordered=false;}
jpayne@68 308 assert(!fnaList.isEmpty()) : "At least 1 fasta file is required.";
jpayne@68 309 assert(!pgmList.isEmpty()) : "At least 1 pgm file is required.";
jpayne@68 310 return parser;
jpayne@68 311 }
jpayne@68 312
jpayne@68 313 /** Add or remove .gz or .bz2 as needed */
jpayne@68 314 private void fixExtensions(){
jpayne@68 315 fnaList=Tools.fixExtension(fnaList);
jpayne@68 316 pgmList=Tools.fixExtension(pgmList);
jpayne@68 317 if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 318 }
jpayne@68 319
jpayne@68 320 /** Ensure files can be read and written */
jpayne@68 321 private void checkFileExistence(){
jpayne@68 322 //Ensure output files can be written
jpayne@68 323 if(!Tools.testOutputFiles(overwrite, append, false, outGff, outAmino, out16S, out18S, outStats, geneHistFile)){
jpayne@68 324 outstream.println((outGff==null)+", "+outGff);
jpayne@68 325 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "
jpayne@68 326 +outGff+", "+outAmino+", "+out16S+", "+out18S+", "+outStats+", "+geneHistFile+"\n");
jpayne@68 327 }
jpayne@68 328
jpayne@68 329 //Ensure input files can be read
jpayne@68 330 ArrayList<String> foo=new ArrayList<String>();
jpayne@68 331 foo.addAll(fnaList);
jpayne@68 332 foo.addAll(pgmList);
jpayne@68 333 if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){
jpayne@68 334 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 335 }
jpayne@68 336
jpayne@68 337 //Ensure that no file was specified multiple times
jpayne@68 338 foo.add(outGff);
jpayne@68 339 foo.add(outAmino);
jpayne@68 340 foo.add(out16S);
jpayne@68 341 foo.add(out18S);
jpayne@68 342 foo.add(outStats);
jpayne@68 343 foo.add(geneHistFile);
jpayne@68 344 if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){
jpayne@68 345 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 346 }
jpayne@68 347 }
jpayne@68 348
jpayne@68 349 /** Adjust file-related static fields as needed for this program */
jpayne@68 350 private static void checkStatics(){
jpayne@68 351 //Adjust the number of threads for input file reading
jpayne@68 352 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 353 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 354 }
jpayne@68 355 }
jpayne@68 356
jpayne@68 357 /*--------------------------------------------------------------*/
jpayne@68 358 /*---------------- Outer Methods ----------------*/
jpayne@68 359 /*--------------------------------------------------------------*/
jpayne@68 360
jpayne@68 361 /** Create read streams and process all data */
jpayne@68 362 void process(Timer t){
jpayne@68 363
jpayne@68 364 GeneModel pgm=PGMTools.loadAndMerge(pgmList);
jpayne@68 365
jpayne@68 366 if(call16S || call18S || call23S || calltRNA || call5S){
jpayne@68 367 loadLongKmers();
jpayne@68 368 loadConsensusSequenceFromFile(false, false);
jpayne@68 369 }
jpayne@68 370
jpayne@68 371 ByteStreamWriter bsw=makeBSW(ffoutGff);
jpayne@68 372 if(bsw!=null){
jpayne@68 373 bsw.forcePrint("##gff-version 3\n");
jpayne@68 374 }
jpayne@68 375
jpayne@68 376 ConcurrentReadOutputStream rosAmino=makeCros(ffoutAmino);
jpayne@68 377 ConcurrentReadOutputStream ros16S=makeCros(ffout16S);
jpayne@68 378 ConcurrentReadOutputStream ros18S=makeCros(ffout18S);
jpayne@68 379
jpayne@68 380 //Turn off read validation in the input threads to increase speed
jpayne@68 381 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
jpayne@68 382 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
jpayne@68 383
jpayne@68 384 //Reset counters
jpayne@68 385 readsIn=genesOut=0;
jpayne@68 386 basesIn=bytesOut=0;
jpayne@68 387
jpayne@68 388 for(String fname : fnaList){
jpayne@68 389 //Create a read input stream
jpayne@68 390 final ConcurrentReadInputStream cris=makeCris(fname);
jpayne@68 391
jpayne@68 392 //Process the reads in separate threads
jpayne@68 393 spawnThreads(cris, bsw, rosAmino, ros16S, ros18S, pgm);
jpayne@68 394
jpayne@68 395 //Close the input stream
jpayne@68 396 errorState|=ReadWrite.closeStream(cris);
jpayne@68 397 }
jpayne@68 398
jpayne@68 399 //Close the input stream
jpayne@68 400 errorState|=ReadWrite.closeStreams(null, rosAmino, ros16S, ros18S);
jpayne@68 401
jpayne@68 402 if(verbose){outstream.println("Finished; closing streams.");}
jpayne@68 403
jpayne@68 404 //Write anything that was accumulated by ReadStats
jpayne@68 405 errorState|=ReadStats.writeAll();
jpayne@68 406 //Close the output stream
jpayne@68 407 if(bsw!=null){errorState|=bsw.poisonAndWait();}
jpayne@68 408
jpayne@68 409 //Reset read validation
jpayne@68 410 Read.VALIDATE_IN_CONSTRUCTOR=vic;
jpayne@68 411
jpayne@68 412 //Report timing and results
jpayne@68 413 t.stop();
jpayne@68 414 outstream.println(Tools.timeReadsBasesProcessed(t, readsIn, basesIn, 8));
jpayne@68 415 outstream.println(Tools.linesBytesOut(readsIn, basesIn, genesOut, bytesOut, 8, false));
jpayne@68 416 outstream.println();
jpayne@68 417
jpayne@68 418 if(json_out){
jpayne@68 419 printStatsJson(outStats);
jpayne@68 420 }else{
jpayne@68 421 printStats(outStats);
jpayne@68 422 }
jpayne@68 423
jpayne@68 424 if(geneHistFile!=null){
jpayne@68 425 printHist(geneHistFile);
jpayne@68 426 }
jpayne@68 427
jpayne@68 428 //Throw an exception of there was an error in a thread
jpayne@68 429 if(errorState){
jpayne@68 430 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 431 }
jpayne@68 432
jpayne@68 433 if(compareToGff!=null){
jpayne@68 434 if(compareToGff.equals("auto")){
jpayne@68 435 compareToGff=fnaList.get(0).replace(".fna", ".gff");
jpayne@68 436 compareToGff=compareToGff.replace(".fa", ".gff");
jpayne@68 437 compareToGff=compareToGff.replace(".fasta", ".gff");
jpayne@68 438 }
jpayne@68 439 CompareGff.main(new String[] {outGff, compareToGff});
jpayne@68 440 }
jpayne@68 441 }
jpayne@68 442
jpayne@68 443 private void printStats(String fname){
jpayne@68 444 if(fname==null){return;}
jpayne@68 445 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false);
jpayne@68 446 bsw.start();
jpayne@68 447
jpayne@68 448 if(callCDS){
jpayne@68 449 bsw.println("Gene Starts Made: \t "+Tools.padLeft(geneStartsMade, 12));
jpayne@68 450 bsw.println("Gene Stops Made: \t "+Tools.padLeft(geneStopsMade, 12));
jpayne@68 451 bsw.println("Gene Starts Retained: \t "+Tools.padLeft(geneStartsRetained, 12));
jpayne@68 452 bsw.println("Gene Stops Retained: \t "+Tools.padLeft(geneStopsRetained, 12));
jpayne@68 453 bsw.println("CDS Out: \t "+Tools.padLeft(geneStartsOut, 12));
jpayne@68 454 }
jpayne@68 455 if(call16S){bsw.println("16S Out: \t "+Tools.padLeft(r16SOut, 12));}
jpayne@68 456 if(call18S){bsw.println("18S Out: \t "+Tools.padLeft(r18SOut, 12));}
jpayne@68 457 if(call23S){bsw.println("23S Out: \t "+Tools.padLeft(r23SOut, 12));}
jpayne@68 458 if(call5S){bsw.println("5S Out: \t "+Tools.padLeft(r5SOut, 12));}
jpayne@68 459 if(calltRNA){bsw.println("tRNA Out: \t "+Tools.padLeft(tRNAOut, 12));}
jpayne@68 460
jpayne@68 461 if(callCDS){
jpayne@68 462 bsw.println();
jpayne@68 463 bsw.println("All ORF Stats:");
jpayne@68 464 bsw.print(stCds.toString());
jpayne@68 465
jpayne@68 466 bsw.println();
jpayne@68 467 bsw.println("Retained ORF Stats:");
jpayne@68 468 bsw.print(stCds2.toString());
jpayne@68 469
jpayne@68 470 bsw.println();
jpayne@68 471 bsw.println("Called ORF Stats:");
jpayne@68 472 stCdsPass.genomeSize=basesIn;
jpayne@68 473 bsw.print(stCdsPass.toString());
jpayne@68 474 }
jpayne@68 475
jpayne@68 476 if(call16S){
jpayne@68 477 bsw.println();
jpayne@68 478 bsw.println("Called 16S Stats:");
jpayne@68 479 bsw.print(st16s.toString());
jpayne@68 480 }
jpayne@68 481 if(call23S){
jpayne@68 482 bsw.println();
jpayne@68 483 bsw.println("Called 23S Stats:");
jpayne@68 484 bsw.print(st23s.toString());
jpayne@68 485 }
jpayne@68 486 if(call5S){
jpayne@68 487 bsw.println();
jpayne@68 488 bsw.println("Called 5S Stats:");
jpayne@68 489 bsw.print(st5s.toString());
jpayne@68 490 }
jpayne@68 491 if(call18S){
jpayne@68 492 bsw.println();
jpayne@68 493 bsw.println("Called 18S Stats:");
jpayne@68 494 bsw.print(st18s.toString());
jpayne@68 495 }
jpayne@68 496 bsw.poisonAndWait();
jpayne@68 497 }
jpayne@68 498
jpayne@68 499 private void printStatsJson(String fname){
jpayne@68 500 if(fname==null){return;}
jpayne@68 501
jpayne@68 502 JsonObject outer=new JsonObject();
jpayne@68 503
jpayne@68 504 {
jpayne@68 505 JsonObject jo=new JsonObject();
jpayne@68 506 if(callCDS){
jpayne@68 507 jo.add("Gene Starts Made", geneStartsMade);
jpayne@68 508 jo.add("Gene Stops Made", geneStopsMade);
jpayne@68 509 jo.add("Gene Starts Retained", geneStartsRetained);
jpayne@68 510 jo.add("Gene Stops Retained", geneStopsRetained);
jpayne@68 511 jo.add("CDS Out", geneStartsOut);
jpayne@68 512 }
jpayne@68 513 if(call16S){jo.add("16S Out", r16SOut);}
jpayne@68 514 if(call18S){jo.add("18S Out", r18SOut);}
jpayne@68 515 if(call23S){jo.add("23S Out", r23SOut);}
jpayne@68 516 if(call5S){jo.add("5S Out", r5SOut);}
jpayne@68 517 if(calltRNA){jo.add("tRNA Out", tRNAOut);}
jpayne@68 518 outer.add("Overall", jo);
jpayne@68 519 }
jpayne@68 520
jpayne@68 521 if(callCDS){
jpayne@68 522 outer.add("All ORF Stats", stCds.toJson());
jpayne@68 523 outer.add("Retained ORF Stats", stCds2.toJson());
jpayne@68 524 stCdsPass.genomeSize=basesIn;
jpayne@68 525 outer.add("Called ORF Stats", stCdsPass.toJson());
jpayne@68 526 }
jpayne@68 527
jpayne@68 528 if(call16S){outer.add("Called 16S Stats", st16s.toJson());}
jpayne@68 529 if(call18S){outer.add("Called 18S Stats", st18s.toJson());}
jpayne@68 530 if(call23S){outer.add("Called 23S Stats", st23s.toJson());}
jpayne@68 531 if(call5S){outer.add("Called 5S Stats", st5s.toJson());}
jpayne@68 532 if(calltRNA){outer.add("Called tRNA Stats", sttRNA.toJson());}
jpayne@68 533
jpayne@68 534
jpayne@68 535 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false);
jpayne@68 536 bsw.start();
jpayne@68 537 bsw.println(outer.toText());
jpayne@68 538 bsw.poisonAndWait();
jpayne@68 539 }
jpayne@68 540
jpayne@68 541 private void printHist(String fname){
jpayne@68 542 if(fname==null || geneHist==null){return;}
jpayne@68 543 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false);
jpayne@68 544 bsw.start();
jpayne@68 545 long sum=Tools.sum(geneHist);
jpayne@68 546 double mean=Tools.averageHistogram(geneHist)*geneHistDiv;
jpayne@68 547 int median=Tools.medianHistogram(geneHist)*geneHistDiv;
jpayne@68 548 double std=Tools.standardDeviationHistogram(geneHist)*geneHistDiv;
jpayne@68 549 bsw.println("#Gene Length Histogram");
jpayne@68 550 bsw.print("#Genes:\t").println(sum);
jpayne@68 551 bsw.print("#Mean:\t").println(mean, 4);
jpayne@68 552 bsw.print("#Median:\t").println(median);
jpayne@68 553 bsw.print("#STDDev:\t").println(std, 4);
jpayne@68 554 bsw.print("#Length\tCount\n");
jpayne@68 555 long cum=0;
jpayne@68 556 for(int i=0; i<geneHist.length && cum<sum; i++){
jpayne@68 557 int len=i*geneHistDiv;
jpayne@68 558 long count=geneHist[i];
jpayne@68 559 cum+=count;
jpayne@68 560 if(count>0 || printZeroCountHist){
jpayne@68 561 bsw.print(len).tab().println(count);
jpayne@68 562 }
jpayne@68 563 }
jpayne@68 564 bsw.poisonAndWait();
jpayne@68 565 }
jpayne@68 566
jpayne@68 567 private ConcurrentReadInputStream makeCris(String fname){
jpayne@68 568 FileFormat ffin=FileFormat.testInput(fname, FileFormat.FA, null, true, true);
jpayne@68 569 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ffin, null);
jpayne@68 570 cris.start(); //Start the stream
jpayne@68 571 if(verbose){outstream.println("Started cris");}
jpayne@68 572 return cris;
jpayne@68 573 }
jpayne@68 574
jpayne@68 575 /** Spawn process threads */
jpayne@68 576 private void spawnThreads(final ConcurrentReadInputStream cris, final ByteStreamWriter bsw,
jpayne@68 577 ConcurrentReadOutputStream rosAmino, ConcurrentReadOutputStream ros16S, ConcurrentReadOutputStream ros18S, GeneModel pgm){
jpayne@68 578
jpayne@68 579 //Do anything necessary prior to processing
jpayne@68 580
jpayne@68 581 //Determine how many threads may be used
jpayne@68 582 final int threads=Shared.threads();
jpayne@68 583
jpayne@68 584 //Fill a list with ProcessThreads
jpayne@68 585 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
jpayne@68 586 for(int i=0; i<threads; i++){
jpayne@68 587 alpt.add(new ProcessThread(cris, bsw, rosAmino, ros16S, ros18S, pgm, minLen, i));
jpayne@68 588 }
jpayne@68 589
jpayne@68 590 //Start the threads
jpayne@68 591 for(ProcessThread pt : alpt){
jpayne@68 592 pt.start();
jpayne@68 593 }
jpayne@68 594
jpayne@68 595 //Wait for threads to finish
jpayne@68 596 waitForThreads(alpt);
jpayne@68 597
jpayne@68 598 //Do anything necessary after processing
jpayne@68 599
jpayne@68 600 }
jpayne@68 601
jpayne@68 602 private void waitForThreads(ArrayList<ProcessThread> alpt){
jpayne@68 603
jpayne@68 604 //Wait for completion of all threads
jpayne@68 605 boolean success=true;
jpayne@68 606 for(ProcessThread pt : alpt){
jpayne@68 607
jpayne@68 608 //Wait until this thread has terminated
jpayne@68 609 while(pt.getState()!=Thread.State.TERMINATED){
jpayne@68 610 try {
jpayne@68 611 //Attempt a join operation
jpayne@68 612 pt.join();
jpayne@68 613 } catch (InterruptedException e) {
jpayne@68 614 //Potentially handle this, if it is expected to occur
jpayne@68 615 e.printStackTrace();
jpayne@68 616 }
jpayne@68 617 }
jpayne@68 618
jpayne@68 619 //Accumulate per-thread statistics
jpayne@68 620 readsIn+=pt.readsInT;
jpayne@68 621 basesIn+=pt.basesInT;
jpayne@68 622 genesOut+=pt.genesOutT;
jpayne@68 623 bytesOut+=pt.bytesOutT;
jpayne@68 624
jpayne@68 625 geneStopsMade+=pt.caller.geneStopsMade;
jpayne@68 626 geneStartsMade+=pt.caller.geneStartsMade;
jpayne@68 627 geneStartsRetained+=pt.caller.geneStartsRetained;
jpayne@68 628 geneStopsRetained+=pt.caller.geneStopsRetained;
jpayne@68 629 geneStartsOut+=pt.caller.geneStartsOut;
jpayne@68 630
jpayne@68 631 r16SOut+=pt.caller.r16SOut;
jpayne@68 632 r18SOut+=pt.caller.r18SOut;
jpayne@68 633 r23SOut+=pt.caller.r23SOut;
jpayne@68 634 r5SOut+=pt.caller.r5SOut;
jpayne@68 635 tRNAOut+=pt.caller.tRNAOut;
jpayne@68 636
jpayne@68 637 stCds.add(pt.caller.stCds);
jpayne@68 638 stCds2.add(pt.caller.stCds2);
jpayne@68 639 // stCdsPass.add(pt.caller.stCdsPass);
jpayne@68 640
jpayne@68 641 for(int i=0; i<trackers.length; i++){
jpayne@68 642 trackers[i].add(pt.caller.trackers[i]);
jpayne@68 643 }
jpayne@68 644
jpayne@68 645 if(geneHist!=null){Tools.add(geneHist, pt.geneHistT);}
jpayne@68 646
jpayne@68 647 success&=pt.success;
jpayne@68 648 }
jpayne@68 649
jpayne@68 650 //Track whether any threads failed
jpayne@68 651 if(!success){errorState=true;}
jpayne@68 652 }
jpayne@68 653
jpayne@68 654 /*--------------------------------------------------------------*/
jpayne@68 655 /*---------------- Inner Methods ----------------*/
jpayne@68 656 /*--------------------------------------------------------------*/
jpayne@68 657
jpayne@68 658 private static ByteStreamWriter makeBSW(FileFormat ff){
jpayne@68 659 if(ff==null){return null;}
jpayne@68 660 ByteStreamWriter bsw=new ByteStreamWriter(ff);
jpayne@68 661 bsw.start();
jpayne@68 662 return bsw;
jpayne@68 663 }
jpayne@68 664
jpayne@68 665 private ConcurrentReadOutputStream makeCros(FileFormat ff){
jpayne@68 666 if(ff==null){return null;}
jpayne@68 667
jpayne@68 668 //Select output buffer size based on whether it needs to be ordered
jpayne@68 669 final int buff=(ordered ? Tools.mid(4, 64, (Shared.threads()*2)/3) : 4);
jpayne@68 670
jpayne@68 671 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ff, null, buff, null, false);
jpayne@68 672 ros.start(); //Start the stream
jpayne@68 673 return ros;
jpayne@68 674 }
jpayne@68 675
jpayne@68 676 /*--------------------------------------------------------------*/
jpayne@68 677 /*---------------- Inner Classes ----------------*/
jpayne@68 678 /*--------------------------------------------------------------*/
jpayne@68 679
jpayne@68 680 /** This class is static to prevent accidental writing to shared variables.
jpayne@68 681 * It is safe to remove the static modifier. */
jpayne@68 682 private class ProcessThread extends Thread {
jpayne@68 683
jpayne@68 684 //Constructor
jpayne@68 685 ProcessThread(final ConcurrentReadInputStream cris_, final ByteStreamWriter bsw_,
jpayne@68 686 ConcurrentReadOutputStream rosAmino_, ConcurrentReadOutputStream ros16S_, ConcurrentReadOutputStream ros18S_,
jpayne@68 687 GeneModel pgm_, final int minLen, final int tid_){
jpayne@68 688 cris=cris_;
jpayne@68 689 bsw=bsw_;
jpayne@68 690 rosAmino=rosAmino_;
jpayne@68 691 ros16S=ros16S_;
jpayne@68 692 ros18S=ros18S_;
jpayne@68 693 pgm=pgm_;
jpayne@68 694 tid=tid_;
jpayne@68 695 geneHistT=(geneHistBins>1 ? new long[geneHistBins] : null);
jpayne@68 696 caller=new GeneCaller(minLen, maxOverlapSameStrand, maxOverlapOppositeStrand,
jpayne@68 697 minStartScore, minStopScore, minKmerScore, minOrfScore, minAvgScore, pgm);
jpayne@68 698 }
jpayne@68 699
jpayne@68 700 //Called by start()
jpayne@68 701 @Override
jpayne@68 702 public void run(){
jpayne@68 703 //Do anything necessary prior to processing
jpayne@68 704
jpayne@68 705 //Process the reads
jpayne@68 706 processInner();
jpayne@68 707
jpayne@68 708 //Do anything necessary after processing
jpayne@68 709
jpayne@68 710 //Indicate successful exit status
jpayne@68 711 success=true;
jpayne@68 712 }
jpayne@68 713
jpayne@68 714 /** Iterate through the reads */
jpayne@68 715 void processInner(){
jpayne@68 716
jpayne@68 717 //Grab the first ListNum of reads
jpayne@68 718 ListNum<Read> ln=cris.nextList();
jpayne@68 719
jpayne@68 720 //Check to ensure pairing is as expected
jpayne@68 721 if(ln!=null && !ln.isEmpty()){
jpayne@68 722 Read r=ln.get(0);
jpayne@68 723 // assert(ffin1.samOrBam() || (r.mate!=null)==cris.paired()); //Disabled due to non-static access
jpayne@68 724 }
jpayne@68 725
jpayne@68 726 //As long as there is a nonempty read list...
jpayne@68 727 while(ln!=null && ln.size()>0){
jpayne@68 728 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
jpayne@68 729
jpayne@68 730 processList(ln);
jpayne@68 731
jpayne@68 732 //Fetch a new list
jpayne@68 733 ln=cris.nextList();
jpayne@68 734 }
jpayne@68 735
jpayne@68 736 //Notify the input stream that the final list was used
jpayne@68 737 if(ln!=null){
jpayne@68 738 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
jpayne@68 739 }
jpayne@68 740 }
jpayne@68 741
jpayne@68 742 void processList(ListNum<Read> ln){
jpayne@68 743 //Grab the actual read list from the ListNum
jpayne@68 744 final ArrayList<Read> reads=ln.list;
jpayne@68 745
jpayne@68 746 // System.err.println(reads.size());
jpayne@68 747
jpayne@68 748 ArrayList<Orf> orfList=new ArrayList<Orf>();
jpayne@68 749
jpayne@68 750 //Loop through each read in the list
jpayne@68 751 for(int idx=0; idx<reads.size(); idx++){
jpayne@68 752 Read r1=reads.get(idx);
jpayne@68 753 Read r2=r1.mate;
jpayne@68 754
jpayne@68 755 //Validate reads in worker threads
jpayne@68 756 if(!r1.validated()){r1.validate(true);}
jpayne@68 757 if(r2!=null && !r2.validated()){r2.validate(true);}
jpayne@68 758
jpayne@68 759 //Track the initial length for statistics
jpayne@68 760 final int initialLength1=r1.length();
jpayne@68 761 final int initialLength2=r1.mateLength();
jpayne@68 762
jpayne@68 763 //Increment counters
jpayne@68 764 readsInT+=r1.pairCount();
jpayne@68 765 basesInT+=initialLength1+initialLength2;
jpayne@68 766
jpayne@68 767 if(r2!=null){
jpayne@68 768 if(merge){
jpayne@68 769 final int insert=BBMerge.findOverlapStrict(r1, r2, false);
jpayne@68 770 if(insert>0){
jpayne@68 771 r2.reverseComplement();
jpayne@68 772 r1=r1.joinRead(insert);
jpayne@68 773 r2=null;
jpayne@68 774 }
jpayne@68 775 }else if(ecco){
jpayne@68 776 BBMerge.findOverlapStrict(r1, r2, true);
jpayne@68 777 }
jpayne@68 778 }
jpayne@68 779
jpayne@68 780 {
jpayne@68 781 //Reads are processed in this block.
jpayne@68 782 {
jpayne@68 783 ArrayList<Orf> list=processRead(r1);
jpayne@68 784 if(list!=null){orfList.addAll(list);}
jpayne@68 785 }
jpayne@68 786 if(r2!=null){
jpayne@68 787 ArrayList<Orf> list=processRead(r2);
jpayne@68 788 if(list!=null){orfList.addAll(list);}
jpayne@68 789 }
jpayne@68 790 }
jpayne@68 791 }
jpayne@68 792
jpayne@68 793 genesOutT+=orfList.size();
jpayne@68 794 ByteBuilder bb=new ByteBuilder();
jpayne@68 795
jpayne@68 796 if(bsw!=null){
jpayne@68 797 if(bsw.ordered){
jpayne@68 798 for(Orf orf : orfList){
jpayne@68 799 orf.appendGff(bb);
jpayne@68 800 bb.nl();
jpayne@68 801 }
jpayne@68 802 bsw.add(bb, ln.id);
jpayne@68 803 bytesOutT+=bb.length();
jpayne@68 804 }else{
jpayne@68 805 for(Orf orf : orfList){
jpayne@68 806 orf.appendGff(bb);
jpayne@68 807 bb.nl();
jpayne@68 808 // if(bb.length()>=32000){
jpayne@68 809 // bytesOutT+=bb.length();
jpayne@68 810 // bsw.addJob(bb);
jpayne@68 811 // bb=new ByteBuilder();
jpayne@68 812 // }
jpayne@68 813 }
jpayne@68 814 if(bb.length()>0){
jpayne@68 815 bsw.addJob(bb);
jpayne@68 816 bytesOutT+=bb.length();
jpayne@68 817 }
jpayne@68 818 }
jpayne@68 819 }
jpayne@68 820
jpayne@68 821 //Notify the input stream that the list was used
jpayne@68 822 cris.returnList(ln);
jpayne@68 823 // if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access
jpayne@68 824 }
jpayne@68 825
jpayne@68 826 /**
jpayne@68 827 * Process a read or a read pair.
jpayne@68 828 * @param r1 Read 1
jpayne@68 829 * @param r2 Read 2 (may be null)
jpayne@68 830 * @return True if the reads should be kept, false if they should be discarded.
jpayne@68 831 */
jpayne@68 832 ArrayList<Orf> processRead(final Read r){
jpayne@68 833 ArrayList<Orf> list=caller.callGenes(r, pgm);
jpayne@68 834
jpayne@68 835 if(geneHistT!=null && list!=null){
jpayne@68 836 for(Orf o : list){
jpayne@68 837 int bin=Tools.min(geneHistT.length-1, o.length()/geneHistDiv);
jpayne@68 838 geneHistT[bin]++;
jpayne@68 839 }
jpayne@68 840 }
jpayne@68 841
jpayne@68 842 if(ros16S!=null){
jpayne@68 843 if(list!=null && !list.isEmpty()){
jpayne@68 844 // System.err.println("Looking for 16S.");
jpayne@68 845 ArrayList<Read> ssu=fetchType(r, list, r16S);
jpayne@68 846 if(ssu!=null && !ssu.isEmpty()){
jpayne@68 847 // System.err.println("Found "+ssu.size()+" 16S.");
jpayne@68 848 ros16S.add(ssu, r.numericID);
jpayne@68 849 }
jpayne@68 850 }
jpayne@68 851 }
jpayne@68 852 if(ros18S!=null){
jpayne@68 853 if(list!=null && !list.isEmpty()){
jpayne@68 854 ArrayList<Read> ssu=fetchType(r, list, r18S);
jpayne@68 855 if(ssu!=null && !ssu.isEmpty()){ros18S.add(ssu, r.numericID);}
jpayne@68 856 }
jpayne@68 857 }
jpayne@68 858
jpayne@68 859 if(rosAmino!=null){
jpayne@68 860 if(mode==TRANSLATE){
jpayne@68 861 if(list!=null && !list.isEmpty()){
jpayne@68 862 ArrayList<Read> prots=translate(r, list);
jpayne@68 863 if(prots!=null){rosAmino.add(prots, r.numericID);}
jpayne@68 864 }
jpayne@68 865 }else if(mode==RETRANSLATE) {
jpayne@68 866 if(list!=null && !list.isEmpty()){
jpayne@68 867 ArrayList<Read> prots=translate(r, list);
jpayne@68 868 ArrayList<Read> ret=detranslate(prots);
jpayne@68 869 if(ret!=null){rosAmino.add(ret, r.numericID);}
jpayne@68 870 }
jpayne@68 871 }else if(mode==RECODE) {
jpayne@68 872 if(list!=null && !list.isEmpty()){
jpayne@68 873 Read recoded=recode(r, list);
jpayne@68 874 r.mate=null;
jpayne@68 875 ArrayList<Read> rec=new ArrayList<Read>(1);
jpayne@68 876 rec.add(recoded);
jpayne@68 877 if(rec!=null){rosAmino.add(rec, r.numericID);}
jpayne@68 878 }
jpayne@68 879 }else{
jpayne@68 880 assert(false) : mode;
jpayne@68 881 }
jpayne@68 882 }
jpayne@68 883
jpayne@68 884 return list;
jpayne@68 885 }
jpayne@68 886
jpayne@68 887 /** Number of reads processed by this thread */
jpayne@68 888 protected long readsInT=0;
jpayne@68 889 /** Number of bases processed by this thread */
jpayne@68 890 protected long basesInT=0;
jpayne@68 891
jpayne@68 892 /** Number of genes called by this thread */
jpayne@68 893 protected long genesOutT=0;
jpayne@68 894 /** Number of bytes written by this thread */
jpayne@68 895 protected long bytesOutT=0;
jpayne@68 896
jpayne@68 897 final long[] geneHistT;
jpayne@68 898
jpayne@68 899 protected ConcurrentReadOutputStream rosAmino;
jpayne@68 900 protected ConcurrentReadOutputStream ros16S;
jpayne@68 901 protected ConcurrentReadOutputStream ros18S;
jpayne@68 902
jpayne@68 903 /** True only if this thread has completed successfully */
jpayne@68 904 boolean success=false;
jpayne@68 905
jpayne@68 906 /** Shared input stream */
jpayne@68 907 private final ConcurrentReadInputStream cris;
jpayne@68 908 /** Shared output stream */
jpayne@68 909 private final ByteStreamWriter bsw;
jpayne@68 910 /** Gene Model for annotation (not really needed) */
jpayne@68 911 private final GeneModel pgm;
jpayne@68 912 /** Gene Caller for annotation */
jpayne@68 913 final GeneCaller caller;
jpayne@68 914 /** Thread ID */
jpayne@68 915 final int tid;
jpayne@68 916 }
jpayne@68 917
jpayne@68 918 public static ArrayList<Read> fetchType(final Read r, final ArrayList<Orf> list, int type){
jpayne@68 919 if(list==null || list.isEmpty()){return null;}
jpayne@68 920 ArrayList<Read> ret=new ArrayList<Read>(list.size());
jpayne@68 921 for(int strand=0; strand<2; strand++){
jpayne@68 922 for(Orf orf : list){
jpayne@68 923 if(orf.strand==strand && orf.type==type){
jpayne@68 924 Read sequence=fetch(orf, r.bases, r.id);
jpayne@68 925 ret.add(sequence);
jpayne@68 926 }
jpayne@68 927 }
jpayne@68 928 r.reverseComplement();
jpayne@68 929 }
jpayne@68 930 return (ret.size()>0 ? ret : null);
jpayne@68 931 }
jpayne@68 932
jpayne@68 933 public static ArrayList<Read> translate(final Read r, final ArrayList<Orf> list){
jpayne@68 934 if(list==null || list.isEmpty()){return null;}
jpayne@68 935 ArrayList<Read> prots=new ArrayList<Read>(list.size());
jpayne@68 936 for(int strand=0; strand<2; strand++){
jpayne@68 937 for(Orf orf : list){
jpayne@68 938 if(orf.strand==strand && orf.type==CDS){
jpayne@68 939 Read aa=translate(orf, r.bases, r.id);
jpayne@68 940 prots.add(aa);
jpayne@68 941 }
jpayne@68 942 }
jpayne@68 943 r.reverseComplement();
jpayne@68 944 }
jpayne@68 945 return prots.isEmpty() ? null : prots;
jpayne@68 946 }
jpayne@68 947
jpayne@68 948 public static Read recode(final Read r, final ArrayList<Orf> list){
jpayne@68 949 if(list==null || list.isEmpty()){return r;}
jpayne@68 950 for(int strand=0; strand<2; strand++){
jpayne@68 951 for(Orf orf : list){
jpayne@68 952 if(orf.strand==strand && orf.type==CDS){
jpayne@68 953 recode(orf, r.bases);
jpayne@68 954 }
jpayne@68 955 }
jpayne@68 956 r.reverseComplement();
jpayne@68 957 }
jpayne@68 958 return r;
jpayne@68 959 }
jpayne@68 960
jpayne@68 961 public static ArrayList<Read> detranslate(final ArrayList<Read> prots){
jpayne@68 962 if(prots==null || prots.isEmpty()){return null;}
jpayne@68 963 ArrayList<Read> nucs=new ArrayList<Read>(prots.size());
jpayne@68 964 for(int strand=0; strand<2; strand++){
jpayne@68 965 for(Read prot : prots){
jpayne@68 966 Read nuc=detranslate(prot);
jpayne@68 967 nucs.add(nuc);
jpayne@68 968 }
jpayne@68 969 }
jpayne@68 970 return nucs;
jpayne@68 971 }
jpayne@68 972
jpayne@68 973 public static Read translate(Orf orf, byte[] bases, String id){
jpayne@68 974 // assert(orf.length()%3==0) : orf.length(); //Happens sometimes on genes that go off the end, perhaps
jpayne@68 975 if(orf.strand==1){orf.flip();}
jpayne@68 976 byte[] acids=AminoAcid.toAAs(bases, orf.start, orf.stop);
jpayne@68 977 if(orf.strand==1){orf.flip();}
jpayne@68 978 Read r=new Read(acids, null, id+"\t"+(Shared.strandCodes[orf.strand]+"\t"+orf.start+"-"+orf.stop), 0, Read.AAMASK);
jpayne@68 979 // assert((r.length()+1)*3==orf.length());
jpayne@68 980 return r;
jpayne@68 981 }
jpayne@68 982
jpayne@68 983 public static Read fetch(Orf orf, Read source){
jpayne@68 984 assert(orf.start>=0 && orf.stop<source.length()) : source.length()+"\n"+orf;
jpayne@68 985 if(orf.strand==1){source.reverseComplement();}
jpayne@68 986 Read r=fetch(orf, source.bases, source.id);
jpayne@68 987 if(orf.strand==1){source.reverseComplement();}
jpayne@68 988 return r;
jpayne@68 989 }
jpayne@68 990
jpayne@68 991 public static Read fetch(Orf orf, byte[] bases, String id){
jpayne@68 992 assert(orf.start>=0 && orf.stop<bases.length) : bases.length+"\n"+orf;
jpayne@68 993 if(orf.strand==1){orf.flip();}
jpayne@68 994 byte[] sub=Arrays.copyOfRange(bases, orf.start, orf.stop+1);
jpayne@68 995 if(orf.strand==1){orf.flip();}
jpayne@68 996 Read r=new Read(sub, null, id+"\t"+(Shared.strandCodes[orf.strand]+"\t"+orf.start+"-"+orf.stop), 0, 0);
jpayne@68 997 assert(r.length()==orf.length()) : r.length()+", "+orf.length();
jpayne@68 998 return r;
jpayne@68 999 }
jpayne@68 1000
jpayne@68 1001 public static void recode(Orf orf, byte[] bases){
jpayne@68 1002 if(orf.strand==1){orf.flip();}
jpayne@68 1003 byte[] acids=AminoAcid.toAAs(bases, orf.start, orf.stop);
jpayne@68 1004 for(int apos=0, bpos=orf.start; apos<acids.length; apos++){
jpayne@68 1005 byte aa=acids[apos];
jpayne@68 1006 int number=AminoAcid.acidToNumber[aa];
jpayne@68 1007 String codon=(number>=0 ? AminoAcid.canonicalCodons[number] : "NNN");
jpayne@68 1008 for(int i=0; i<3; i++, bpos++) {
jpayne@68 1009 bases[bpos]=(byte)codon.charAt(i);
jpayne@68 1010 }
jpayne@68 1011 }
jpayne@68 1012 if(orf.strand==1){orf.flip();}
jpayne@68 1013 }
jpayne@68 1014
jpayne@68 1015 public static Read detranslate(Read prot){
jpayne@68 1016 ByteBuilder bb=new ByteBuilder(prot.length()*3);
jpayne@68 1017 for(byte aa : prot.bases){
jpayne@68 1018 int number=AminoAcid.acidToNumber[aa];
jpayne@68 1019 String codon=(number>=0 ? AminoAcid.canonicalCodons[number] : "NNN");
jpayne@68 1020 bb.append(codon);
jpayne@68 1021 }
jpayne@68 1022 Read r=new Read(bb.array, null, prot.id, prot.numericID, 0);
jpayne@68 1023 return r;
jpayne@68 1024 }
jpayne@68 1025
jpayne@68 1026 /*--------------------------------------------------------------*/
jpayne@68 1027 /*---------------- Fields ----------------*/
jpayne@68 1028 /*--------------------------------------------------------------*/
jpayne@68 1029
jpayne@68 1030 public static GeneCaller makeGeneCaller(GeneModel pgm){
jpayne@68 1031 GeneCaller caller=new GeneCaller(minLen, maxOverlapSameStrand, maxOverlapOppositeStrand,
jpayne@68 1032 minStartScore, minStopScore, minKmerScore, minOrfScore, minAvgScore, pgm);
jpayne@68 1033 return caller;
jpayne@68 1034 }
jpayne@68 1035
jpayne@68 1036 private long maxReads=-1;
jpayne@68 1037 private boolean merge;
jpayne@68 1038 private boolean ecco;
jpayne@68 1039
jpayne@68 1040 private long readsIn=0;
jpayne@68 1041 private long basesIn=0;
jpayne@68 1042 private long genesOut=0;
jpayne@68 1043 private long bytesOut=0;
jpayne@68 1044
jpayne@68 1045 private static int minLen=80;//Actually a much higher value like 200 seems optimal compared to NCBI
jpayne@68 1046 private static int maxOverlapSameStrand=80;
jpayne@68 1047 private static int maxOverlapOppositeStrand=110;
jpayne@68 1048
jpayne@68 1049 /* for kinnercds=6 */
jpayne@68 1050 // private static float minStartScore=-0.10f;
jpayne@68 1051 // private static float minStopScore=-0.5f;//Not useful; disabled
jpayne@68 1052 // private static float minKmerScore=0.04f;//Does not seem useful.
jpayne@68 1053 // private static float minOrfScore=40f; //Higher increases SNR dramatically but reduces TP rate
jpayne@68 1054 // private static float minAvgScore=0.08f; //Not very effective
jpayne@68 1055
jpayne@68 1056 /* for kinnercds=7 */
jpayne@68 1057 private static float minStartScore=-0.10f;
jpayne@68 1058 private static float minStopScore=-0.5f;//Not useful; disabled
jpayne@68 1059 private static float minKmerScore=0.02f;//Does not seem useful.
jpayne@68 1060 private static float minOrfScore=50f; //Higher increases SNR dramatically but reduces TP rate
jpayne@68 1061 private static float minAvgScore=0.08f; //Not very effective
jpayne@68 1062
jpayne@68 1063 long geneStopsMade=0;
jpayne@68 1064 long geneStartsMade=0;
jpayne@68 1065 long geneStartsRetained=0;
jpayne@68 1066 long geneStopsRetained=0;
jpayne@68 1067 long geneStartsOut=0;
jpayne@68 1068
jpayne@68 1069 long tRNAOut=0;
jpayne@68 1070 long r16SOut=0;
jpayne@68 1071 long r23SOut=0;
jpayne@68 1072 long r5SOut=0;
jpayne@68 1073 long r18SOut=0;
jpayne@68 1074
jpayne@68 1075 ScoreTracker stCds=new ScoreTracker(CDS);
jpayne@68 1076 ScoreTracker stCds2=new ScoreTracker(CDS);
jpayne@68 1077 ScoreTracker stCdsPass=new ScoreTracker(CDS);
jpayne@68 1078 ScoreTracker sttRNA=new ScoreTracker(tRNA);
jpayne@68 1079 ScoreTracker st16s=new ScoreTracker(r16S);
jpayne@68 1080 ScoreTracker st23s=new ScoreTracker(r23S);
jpayne@68 1081 ScoreTracker st5s=new ScoreTracker(r5S);
jpayne@68 1082 ScoreTracker st18s=new ScoreTracker(r18S);
jpayne@68 1083
jpayne@68 1084 ScoreTracker[] trackers=new ScoreTracker[] {stCdsPass, sttRNA, st16s, st23s, st5s, st18s};
jpayne@68 1085
jpayne@68 1086 private int geneHistBins=2000;
jpayne@68 1087 private int geneHistDiv=20;
jpayne@68 1088 private final long[] geneHist;
jpayne@68 1089 private boolean printZeroCountHist=false;
jpayne@68 1090
jpayne@68 1091 /*--------------------------------------------------------------*/
jpayne@68 1092
jpayne@68 1093 private ArrayList<String> fnaList=new ArrayList<String>();
jpayne@68 1094 private ArrayList<String> pgmList=new ArrayList<String>();
jpayne@68 1095 private String outGff=null;
jpayne@68 1096 private String outAmino=null;
jpayne@68 1097 private String out16S=null;
jpayne@68 1098 private String out18S=null;
jpayne@68 1099 private String compareToGff=null;
jpayne@68 1100 private String outStats="stderr";
jpayne@68 1101 private String geneHistFile=null;
jpayne@68 1102 private boolean json_out=false;
jpayne@68 1103
jpayne@68 1104 /*--------------------------------------------------------------*/
jpayne@68 1105 /*---------------- Final Fields ----------------*/
jpayne@68 1106 /*--------------------------------------------------------------*/
jpayne@68 1107
jpayne@68 1108 private final FileFormat ffoutGff;
jpayne@68 1109 private final FileFormat ffoutAmino;
jpayne@68 1110 private final FileFormat ffout16S;
jpayne@68 1111 private final FileFormat ffout18S;
jpayne@68 1112
jpayne@68 1113 /** Determines how sequence is processed if it will be output */
jpayne@68 1114 int mode=TRANSLATE;
jpayne@68 1115
jpayne@68 1116 /** Translate nucleotides to amino acids */
jpayne@68 1117 private static final int TRANSLATE=1;
jpayne@68 1118 /** Translate nucleotides to amino acids,
jpayne@68 1119 * then translate back to nucleotides */
jpayne@68 1120 private static final int RETRANSLATE=2;
jpayne@68 1121 /** Re-encode coding regions of nucleotide
jpayne@68 1122 * sequences as a canonical codons */
jpayne@68 1123 private static final int RECODE=3;
jpayne@68 1124
jpayne@68 1125 /*--------------------------------------------------------------*/
jpayne@68 1126
jpayne@68 1127 private PrintStream outstream=System.err;
jpayne@68 1128 public boolean verbose=false;
jpayne@68 1129 public boolean errorState=false;
jpayne@68 1130 private boolean overwrite=false;
jpayne@68 1131 private boolean append=false;
jpayne@68 1132 private boolean ordered=false; //this is OK sometimes, but sometimes hangs (e.g. on RefSeq mito), possibly if a sequence produces nothing.
jpayne@68 1133 //To fix it, just ensure functions like translate always produce an array, even if it is empty.
jpayne@68 1134
jpayne@68 1135 }