diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/CallGenes.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/CallGenes.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,1135 @@
+package prok;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import dna.AminoAcid;
+import dna.Data;
+import fileIO.ByteFile;
+import fileIO.ByteStreamWriter;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import gff.CompareGff;
+import jgi.BBMerge;
+import json.JsonObject;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.ReadStats;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.ConcurrentReadInputStream;
+import stream.ConcurrentReadOutputStream;
+import stream.Read;
+import structures.ByteBuilder;
+import structures.ListNum;
+
+/**
+ * This is the executable class for gene-calling.
+ * @author Brian Bushnell
+ * @date Sep 24, 2018
+ *
+ */
+public class CallGenes extends ProkObject {
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Initialization        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/**
+	 * Code entrance from the command line.
+	 * @param args Command line arguments
+	 */
+	public static void main(String[] args){
+		//Start a timer immediately upon code entrance.
+		Timer t=new Timer();
+		
+		//Create an instance of this class
+		CallGenes x=new CallGenes(args);
+		
+		//Run the object
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	/**
+	 * Constructor.
+	 * @param args Command line arguments
+	 */
+	public CallGenes(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, (args.length>40 ? null : getClass()), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		//Set shared static variables prior to parsing
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		
+		{//Parse the arguments
+			final Parser parser=parse(args);
+			overwrite=parser.overwrite;
+			append=parser.append;
+			
+			outGff=parser.out1;
+			maxReads=parser.maxReads;
+		}
+		
+		fixExtensions(); //Add or remove .gz or .bz2 as needed
+		checkFileExistence(); //Ensure files can be read and written
+		checkStatics(); //Adjust file-related static fields as needed for this program
+		
+		ffoutGff=FileFormat.testOutput(outGff, FileFormat.GFF, null, true, overwrite, append, ordered);
+		ffoutAmino=FileFormat.testOutput(outAmino, FileFormat.FA, null, true, overwrite, append, ordered);
+		ffout16S=FileFormat.testOutput(out16S, FileFormat.FA, null, true, overwrite, append, ordered);
+		ffout18S=FileFormat.testOutput(out18S, FileFormat.FA, null, true, overwrite, append, ordered);
+		
+		if(ffoutGff!=null){
+			assert(!ffoutGff.isSequence()) : "\nout is for gff files.  To output sequence, please use outa.";
+		}
+		if(ffoutAmino!=null){
+			assert(!ffoutAmino.gff()) : "\nouta is for sequence data.  To output gff, please use out.";
+		}
+		if(ffout16S!=null){
+			assert(!ffout16S.gff()) : "\nout16S is for sequence data.  To output gff, please use out.";
+		}
+		if(ffout18S!=null){
+			assert(!ffout18S.gff()) : "\nout18S is for sequence data.  To output gff, please use out.";
+		}
+		
+		if(geneHistFile==null){geneHistBins=0;}
+		else{
+			assert(geneHistBins>1) : "geneHistBins="+geneHistBins+"; should be >1";
+			assert(geneHistDiv>=1) : "geneHistDiv="+geneHistDiv+"; should be >=1";
+		}
+		geneHist=geneHistBins>1 ? new long[geneHistBins] : null;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------    Initialization Helpers    ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Parse arguments from the command line */
+	private Parser parse(String[] args){
+		
+		Parser parser=new Parser();
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+			if(b!=null && b.equalsIgnoreCase("null")){b=null;}
+
+//			outstream.println(arg+", "+a+", "+b);
+			if(PGMTools.parseStatic(arg, a, b)){
+				//do nothing
+			}else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna")){
+				assert(b!=null);
+				Tools.addFiles(b, fnaList);
+			}else if(b==null && new File(arg).exists() && FileFormat.isFastaFile(arg)){
+				fnaList.add(arg);
+			}else if(a.equals("pgm") || a.equals("gm") || a.equals("model")){
+				assert(b!=null);
+				if(b.equalsIgnoreCase("auto") || b.equalsIgnoreCase("default")){
+					b=Data.findPath("?model.pgm");
+					pgmList.add(b);
+				}else{
+					Tools.addFiles(b, pgmList);
+				}
+			}else if(b==null && new File(arg).exists() && FileFormat.isPgmFile(arg)){
+				Tools.addFiles(arg, pgmList);
+			}else if(a.equals("outamino") || a.equals("aminoout") || a.equals("outa") || a.equals("outaa") || a.equals("aaout") || a.equals("amino")){
+				outAmino=b;
+			}else if(a.equalsIgnoreCase("out16s") || a.equalsIgnoreCase("16sout")){
+				out16S=b;
+			}else if(a.equalsIgnoreCase("out18s") || a.equalsIgnoreCase("18sout")){
+				out18S=b;
+			}else if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+				//ReadWrite.verbose=verbose;
+				GeneCaller.verbose=verbose;
+			}
+			
+			else if(a.equals("json_out") || a.equalsIgnoreCase("json")){
+				json_out=Parse.parseBoolean(b);
+			}else if(a.equals("stats") || a.equalsIgnoreCase("outstats")){
+				outStats=b;
+			}else if(a.equals("hist") || a.equalsIgnoreCase("outhist") || a.equalsIgnoreCase("lengthhist") || a.equalsIgnoreCase("lhist") || a.equalsIgnoreCase("genehist")){
+				geneHistFile=b;
+			}else if(a.equals("bins")){
+				geneHistBins=Integer.parseInt(b);
+			}else if(a.equals("binlen") || a.equals("binlength") || a.equals("histdiv")){
+				geneHistBins=Integer.parseInt(b);
+			}else if(a.equals("printzero") || a.equals("pz")){
+				printZeroCountHist=Parse.parseBoolean(b);
+			}
+			
+			else if(a.equals("merge")){
+				merge=Parse.parseBoolean(b);
+			}else if(a.equals("ecco")){
+				ecco=Parse.parseBoolean(b);
+			}
+
+			else if(a.equalsIgnoreCase("setbias16s")) {
+				GeneCaller.biases[r16S]=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("setbias18s")) {
+				GeneCaller.biases[r18S]=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("setbias23s")) {
+				GeneCaller.biases[r23S]=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("setbias5s")) {
+				GeneCaller.biases[r5S]=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("setbiastRNA")) {
+				GeneCaller.biases[tRNA]=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("setbiasCDS")) {
+				GeneCaller.biases[CDS]=Float.parseFloat(b);
+			}
+			
+			else if(a.equals("ordered")){
+				ordered=Parse.parseBoolean(b);
+			}
+			
+			else if(a.equals("translate")){
+				mode=TRANSLATE;
+			}else if(a.equals("retranslate") || a.equals("detranslate")){
+				mode=RETRANSLATE;
+			}else if(a.equals("recode")){
+				mode=RECODE;
+			}
+			
+			else if(a.equalsIgnoreCase("minlen") || a.equals("minlength")){
+				minLen=Integer.parseInt(b);
+			}else if(a.equals("maxoverlapss") || a.equals("overlapss") || a.equals("overlapsamestrand") || a.equals("moss") || a.equalsIgnoreCase("maxOverlapSameStrand")){
+				maxOverlapSameStrand=Integer.parseInt(b);
+			}else if(a.equals("maxoverlapos") || a.equals("overlapos") || a.equals("overlapoppositestrand") || a.equals("moos") || a.equalsIgnoreCase("maxOverlapOppositeStrand")){
+				maxOverlapOppositeStrand=Integer.parseInt(b);
+			}else if(a.equalsIgnoreCase("minStartScore")){
+				minStartScore=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("minStopScore")){
+				minStopScore=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("minInnerScore") || a.equalsIgnoreCase("minKmerScore")){
+				minKmerScore=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("minOrfScore") || a.equalsIgnoreCase("minScore")){
+				minOrfScore=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("minAvgScore")){
+				minAvgScore=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("breakLimit")){
+				GeneCaller.breakLimit=Integer.parseInt(b);
+			}else if(a.equalsIgnoreCase("clearcutoffs") || a.equalsIgnoreCase("clearfilters")){
+				GeneCaller.breakLimit=9999;
+				minOrfScore=-9999;
+				minAvgScore=-9999;
+				minKmerScore=-9999;
+				minStopScore=-9999;
+				minStartScore=-9999;
+			}
+
+			else if(a.equalsIgnoreCase("e1")){
+				Orf.e1=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("e2")){
+				Orf.e2=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("e3")){
+				Orf.e3=Float.parseFloat(b);
+			}
+			else if(a.equalsIgnoreCase("f1")){
+				Orf.f1=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("f2")){
+				Orf.f2=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("f3")){
+				Orf.f3=Float.parseFloat(b);
+			}
+			else if(a.equalsIgnoreCase("p0")){
+				GeneCaller.p0=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("p1")){
+				GeneCaller.p1=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("p2")){
+				GeneCaller.p2=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("p3")){
+				GeneCaller.p3=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("p4")){
+				GeneCaller.p4=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("p5")){
+				GeneCaller.p5=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("p6")){
+				GeneCaller.p6=Float.parseFloat(b);
+			}
+			else if(a.equalsIgnoreCase("q1")){
+				GeneCaller.q1=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("q2")){
+				GeneCaller.q2=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("q3")){
+				GeneCaller.q3=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("q4")){
+				GeneCaller.q4=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("q5")){
+				GeneCaller.q5=Float.parseFloat(b);
+			}
+			else if(a.equalsIgnoreCase("lookback")){
+				GeneCaller.lookbackPlus=GeneCaller.lookbackMinus=Integer.parseInt(b);
+			}else if(a.equalsIgnoreCase("lookbackplus")){
+				GeneCaller.lookbackPlus=Integer.parseInt(b);
+			}else if(a.equalsIgnoreCase("lookbackminus")){
+				GeneCaller.lookbackMinus=Integer.parseInt(b);
+			}
+			
+			else if(a.equalsIgnoreCase("compareto")){
+				compareToGff=b;
+			}
+			
+			else if(ProkObject.parse(arg, a, b)){}
+			
+			else if(parser.parse(arg, a, b)){
+				//do nothing
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+				//				throw new RuntimeException("Unknown parameter "+args[i]);
+			}
+		}
+
+		if(pgmList.isEmpty()){
+			String b=Data.findPath("?model.pgm");
+			pgmList.add(b);
+		}
+		for(int i=0; i<pgmList.size(); i++){
+			String s=pgmList.get(i);
+			if(s.equalsIgnoreCase("auto") || s.equalsIgnoreCase("default")){
+				pgmList.set(i, Data.findPath("?model.pgm"));
+			}
+		}
+		
+		if(Shared.threads()<2){ordered=false;}
+		assert(!fnaList.isEmpty()) : "At least 1 fasta file is required.";
+		assert(!pgmList.isEmpty()) : "At least 1 pgm file is required.";
+		return parser;
+	}
+	
+	/** Add or remove .gz or .bz2 as needed */
+	private void fixExtensions(){
+		fnaList=Tools.fixExtension(fnaList);
+		pgmList=Tools.fixExtension(pgmList);
+		if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
+	}
+	
+	/** Ensure files can be read and written */
+	private void checkFileExistence(){
+		//Ensure output files can be written
+		if(!Tools.testOutputFiles(overwrite, append, false, outGff, outAmino, out16S, out18S, outStats, geneHistFile)){
+			outstream.println((outGff==null)+", "+outGff);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "
+					+outGff+", "+outAmino+", "+out16S+", "+out18S+", "+outStats+", "+geneHistFile+"\n");
+		}
+		
+		//Ensure input files can be read
+		ArrayList<String> foo=new ArrayList<String>();
+		foo.addAll(fnaList);
+		foo.addAll(pgmList);
+		if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){
+			throw new RuntimeException("\nCan't read some input files.\n");  
+		}
+		
+		//Ensure that no file was specified multiple times
+		foo.add(outGff);
+		foo.add(outAmino);
+		foo.add(out16S);
+		foo.add(out18S);
+		foo.add(outStats);
+		foo.add(geneHistFile);
+		if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){
+			throw new RuntimeException("\nSome file names were specified multiple times.\n");
+		}
+	}
+	
+	/** Adjust file-related static fields as needed for this program */
+	private static void checkStatics(){
+		//Adjust the number of threads for input file reading
+		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
+			ByteFile.FORCE_MODE_BF2=true;
+		}
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Outer Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Create read streams and process all data */
+	void process(Timer t){
+		
+		GeneModel pgm=PGMTools.loadAndMerge(pgmList);
+		
+		if(call16S || call18S || call23S || calltRNA || call5S){
+			loadLongKmers();
+			loadConsensusSequenceFromFile(false, false);
+		}
+		
+		ByteStreamWriter bsw=makeBSW(ffoutGff);
+		if(bsw!=null){
+			bsw.forcePrint("##gff-version 3\n");
+		}
+
+		ConcurrentReadOutputStream rosAmino=makeCros(ffoutAmino);
+		ConcurrentReadOutputStream ros16S=makeCros(ffout16S);
+		ConcurrentReadOutputStream ros18S=makeCros(ffout18S);
+		
+		//Turn off read validation in the input threads to increase speed
+		final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
+		Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
+		
+		//Reset counters
+		readsIn=genesOut=0;
+		basesIn=bytesOut=0;
+		
+		for(String fname : fnaList){
+			//Create a read input stream
+			final ConcurrentReadInputStream cris=makeCris(fname);
+
+			//Process the reads in separate threads
+			spawnThreads(cris, bsw, rosAmino, ros16S, ros18S, pgm);
+			
+			//Close the input stream
+			errorState|=ReadWrite.closeStream(cris);
+		}
+		
+		//Close the input stream
+		errorState|=ReadWrite.closeStreams(null, rosAmino, ros16S, ros18S);
+		
+		if(verbose){outstream.println("Finished; closing streams.");}
+		
+		//Write anything that was accumulated by ReadStats
+		errorState|=ReadStats.writeAll();
+		//Close the output stream
+		if(bsw!=null){errorState|=bsw.poisonAndWait();}
+		
+		//Reset read validation
+		Read.VALIDATE_IN_CONSTRUCTOR=vic;
+		
+		//Report timing and results
+		t.stop();
+		outstream.println(Tools.timeReadsBasesProcessed(t, readsIn, basesIn, 8));
+		outstream.println(Tools.linesBytesOut(readsIn, basesIn, genesOut, bytesOut, 8, false));
+		outstream.println();
+		
+		if(json_out){
+			printStatsJson(outStats);
+		}else{
+			printStats(outStats);
+		}
+		
+		if(geneHistFile!=null){
+			printHist(geneHistFile);
+		}
+		
+		//Throw an exception of there was an error in a thread
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+		
+		if(compareToGff!=null){
+			if(compareToGff.equals("auto")){
+				compareToGff=fnaList.get(0).replace(".fna", ".gff");
+				compareToGff=compareToGff.replace(".fa", ".gff");
+				compareToGff=compareToGff.replace(".fasta", ".gff");
+			}
+			CompareGff.main(new String[] {outGff, compareToGff});
+		}
+	}
+	
+	private void printStats(String fname){
+		if(fname==null){return;}
+		ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false);
+		bsw.start();
+		
+		if(callCDS){
+			bsw.println("Gene Starts Made:     \t "+Tools.padLeft(geneStartsMade, 12));
+			bsw.println("Gene Stops Made:      \t "+Tools.padLeft(geneStopsMade, 12));
+			bsw.println("Gene Starts Retained: \t "+Tools.padLeft(geneStartsRetained, 12));
+			bsw.println("Gene Stops Retained:  \t "+Tools.padLeft(geneStopsRetained, 12));
+			bsw.println("CDS Out:              \t "+Tools.padLeft(geneStartsOut, 12));
+		}
+		if(call16S){bsw.println("16S Out:              \t "+Tools.padLeft(r16SOut, 12));}
+		if(call18S){bsw.println("18S Out:              \t "+Tools.padLeft(r18SOut, 12));}
+		if(call23S){bsw.println("23S Out:              \t "+Tools.padLeft(r23SOut, 12));}
+		if(call5S){bsw.println("5S Out:               \t "+Tools.padLeft(r5SOut, 12));}
+		if(calltRNA){bsw.println("tRNA Out:             \t "+Tools.padLeft(tRNAOut, 12));}
+		
+		if(callCDS){
+			bsw.println();
+			bsw.println("All ORF Stats:");
+			bsw.print(stCds.toString());
+
+			bsw.println();
+			bsw.println("Retained ORF Stats:");
+			bsw.print(stCds2.toString());
+
+			bsw.println();
+			bsw.println("Called ORF Stats:");
+			stCdsPass.genomeSize=basesIn;
+			bsw.print(stCdsPass.toString());
+		}
+
+		if(call16S){
+			bsw.println();
+			bsw.println("Called 16S Stats:");
+			bsw.print(st16s.toString());
+		}
+		if(call23S){
+			bsw.println();
+			bsw.println("Called 23S Stats:");
+			bsw.print(st23s.toString());
+		}
+		if(call5S){
+			bsw.println();
+			bsw.println("Called 5S Stats:");
+			bsw.print(st5s.toString());
+		}
+		if(call18S){
+			bsw.println();
+			bsw.println("Called 18S Stats:");
+			bsw.print(st18s.toString());
+		}
+		bsw.poisonAndWait();
+	}
+	
+	private void printStatsJson(String fname){
+		if(fname==null){return;}
+		
+		JsonObject outer=new JsonObject();
+		
+		{
+			JsonObject jo=new JsonObject();
+			if(callCDS){
+				jo.add("Gene Starts Made", geneStartsMade);
+				jo.add("Gene Stops Made", geneStopsMade);
+				jo.add("Gene Starts Retained", geneStartsRetained);
+				jo.add("Gene Stops Retained", geneStopsRetained);
+				jo.add("CDS Out", geneStartsOut);
+			}
+			if(call16S){jo.add("16S Out", r16SOut);}
+			if(call18S){jo.add("18S Out", r18SOut);}
+			if(call23S){jo.add("23S Out", r23SOut);}
+			if(call5S){jo.add("5S Out", r5SOut);}
+			if(calltRNA){jo.add("tRNA Out", tRNAOut);}
+			outer.add("Overall", jo);
+		}
+		
+		if(callCDS){
+			outer.add("All ORF Stats", stCds.toJson());
+			outer.add("Retained ORF Stats", stCds2.toJson());
+			stCdsPass.genomeSize=basesIn;
+			outer.add("Called ORF Stats", stCdsPass.toJson());
+		}
+
+		if(call16S){outer.add("Called 16S Stats", st16s.toJson());}
+		if(call18S){outer.add("Called 18S Stats", st18s.toJson());}
+		if(call23S){outer.add("Called 23S Stats", st23s.toJson());}
+		if(call5S){outer.add("Called 5S Stats", st5s.toJson());}
+		if(calltRNA){outer.add("Called tRNA Stats", sttRNA.toJson());}
+		
+
+		ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false);
+		bsw.start();
+		bsw.println(outer.toText());
+		bsw.poisonAndWait();
+	}
+	
+	private void printHist(String fname){
+		if(fname==null || geneHist==null){return;}
+		ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, false);
+		bsw.start();
+		long sum=Tools.sum(geneHist);
+		double mean=Tools.averageHistogram(geneHist)*geneHistDiv;
+		int median=Tools.medianHistogram(geneHist)*geneHistDiv;
+		double std=Tools.standardDeviationHistogram(geneHist)*geneHistDiv;
+		bsw.println("#Gene Length Histogram");
+		bsw.print("#Genes:\t").println(sum);
+		bsw.print("#Mean:\t").println(mean, 4);
+		bsw.print("#Median:\t").println(median);
+		bsw.print("#STDDev:\t").println(std, 4);
+		bsw.print("#Length\tCount\n");
+		long cum=0;
+		for(int i=0; i<geneHist.length && cum<sum; i++){
+			int len=i*geneHistDiv;
+			long count=geneHist[i];
+			cum+=count;
+			if(count>0 || printZeroCountHist){
+				bsw.print(len).tab().println(count);
+			}
+		}
+		bsw.poisonAndWait();
+	}
+	
+	private ConcurrentReadInputStream makeCris(String fname){
+		FileFormat ffin=FileFormat.testInput(fname, FileFormat.FA, null, true, true);
+		ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ffin, null);
+		cris.start(); //Start the stream
+		if(verbose){outstream.println("Started cris");}
+		return cris;
+	}
+	
+	/** Spawn process threads */
+	private void spawnThreads(final ConcurrentReadInputStream cris, final ByteStreamWriter bsw, 
+			ConcurrentReadOutputStream rosAmino, ConcurrentReadOutputStream ros16S, ConcurrentReadOutputStream ros18S, GeneModel pgm){
+		
+		//Do anything necessary prior to processing
+		
+		//Determine how many threads may be used
+		final int threads=Shared.threads();
+		
+		//Fill a list with ProcessThreads
+		ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
+		for(int i=0; i<threads; i++){
+			alpt.add(new ProcessThread(cris, bsw, rosAmino, ros16S, ros18S, pgm, minLen, i));
+		}
+		
+		//Start the threads
+		for(ProcessThread pt : alpt){
+			pt.start();
+		}
+		
+		//Wait for threads to finish
+		waitForThreads(alpt);
+		
+		//Do anything necessary after processing
+		
+	}
+	
+	private void waitForThreads(ArrayList<ProcessThread> alpt){
+		
+		//Wait for completion of all threads
+		boolean success=true;
+		for(ProcessThread pt : alpt){
+			
+			//Wait until this thread has terminated
+			while(pt.getState()!=Thread.State.TERMINATED){
+				try {
+					//Attempt a join operation
+					pt.join();
+				} catch (InterruptedException e) {
+					//Potentially handle this, if it is expected to occur
+					e.printStackTrace();
+				}
+			}
+			
+			//Accumulate per-thread statistics
+			readsIn+=pt.readsInT;
+			basesIn+=pt.basesInT;
+			genesOut+=pt.genesOutT;
+			bytesOut+=pt.bytesOutT;
+			
+			geneStopsMade+=pt.caller.geneStopsMade;
+			geneStartsMade+=pt.caller.geneStartsMade;
+			geneStartsRetained+=pt.caller.geneStartsRetained;
+			geneStopsRetained+=pt.caller.geneStopsRetained;
+			geneStartsOut+=pt.caller.geneStartsOut;
+
+			r16SOut+=pt.caller.r16SOut;
+			r18SOut+=pt.caller.r18SOut;
+			r23SOut+=pt.caller.r23SOut;
+			r5SOut+=pt.caller.r5SOut;
+			tRNAOut+=pt.caller.tRNAOut;
+			
+			stCds.add(pt.caller.stCds);
+			stCds2.add(pt.caller.stCds2);
+//			stCdsPass.add(pt.caller.stCdsPass);
+			
+			for(int i=0; i<trackers.length; i++){
+				trackers[i].add(pt.caller.trackers[i]);
+			}
+			
+			if(geneHist!=null){Tools.add(geneHist, pt.geneHistT);}
+			
+			success&=pt.success;
+		}
+		
+		//Track whether any threads failed
+		if(!success){errorState=true;}
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Inner Methods         ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	private static ByteStreamWriter makeBSW(FileFormat ff){
+		if(ff==null){return null;}
+		ByteStreamWriter bsw=new ByteStreamWriter(ff);
+		bsw.start();
+		return bsw;
+	}
+	
+	private ConcurrentReadOutputStream makeCros(FileFormat ff){
+		if(ff==null){return null;}
+
+		//Select output buffer size based on whether it needs to be ordered
+		final int buff=(ordered ? Tools.mid(4, 64, (Shared.threads()*2)/3) : 4);
+
+		final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ff, null, buff, null, false);
+		ros.start(); //Start the stream
+		return ros;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Inner Classes        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** This class is static to prevent accidental writing to shared variables.
+	 * It is safe to remove the static modifier. */
+	private class ProcessThread extends Thread {
+		
+		//Constructor
+		ProcessThread(final ConcurrentReadInputStream cris_, final ByteStreamWriter bsw_, 
+				ConcurrentReadOutputStream rosAmino_, ConcurrentReadOutputStream ros16S_, ConcurrentReadOutputStream ros18S_, 
+				GeneModel pgm_, final int minLen, final int tid_){
+			cris=cris_;
+			bsw=bsw_;
+			rosAmino=rosAmino_;
+			ros16S=ros16S_;
+			ros18S=ros18S_;
+			pgm=pgm_;
+			tid=tid_;
+			geneHistT=(geneHistBins>1 ? new long[geneHistBins] : null);
+			caller=new GeneCaller(minLen, maxOverlapSameStrand, maxOverlapOppositeStrand, 
+					minStartScore, minStopScore, minKmerScore, minOrfScore, minAvgScore, pgm);
+		}
+		
+		//Called by start()
+		@Override
+		public void run(){
+			//Do anything necessary prior to processing
+			
+			//Process the reads
+			processInner();
+			
+			//Do anything necessary after processing
+			
+			//Indicate successful exit status
+			success=true;
+		}
+		
+		/** Iterate through the reads */
+		void processInner(){
+			
+			//Grab the first ListNum of reads
+			ListNum<Read> ln=cris.nextList();
+
+			//Check to ensure pairing is as expected
+			if(ln!=null && !ln.isEmpty()){
+				Read r=ln.get(0);
+//				assert(ffin1.samOrBam() || (r.mate!=null)==cris.paired()); //Disabled due to non-static access
+			}
+
+			//As long as there is a nonempty read list...
+			while(ln!=null && ln.size()>0){
+//				if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
+				
+				processList(ln);
+
+				//Fetch a new list
+				ln=cris.nextList();
+			}
+
+			//Notify the input stream that the final list was used
+			if(ln!=null){
+				cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
+			}
+		}
+		
+		void processList(ListNum<Read> ln){
+			//Grab the actual read list from the ListNum
+			final ArrayList<Read> reads=ln.list;
+
+//			System.err.println(reads.size());
+			
+			ArrayList<Orf> orfList=new ArrayList<Orf>();
+			
+			//Loop through each read in the list
+			for(int idx=0; idx<reads.size(); idx++){
+				Read r1=reads.get(idx);
+				Read r2=r1.mate;
+				
+				//Validate reads in worker threads
+				if(!r1.validated()){r1.validate(true);}
+				if(r2!=null && !r2.validated()){r2.validate(true);}
+
+				//Track the initial length for statistics
+				final int initialLength1=r1.length();
+				final int initialLength2=r1.mateLength();
+
+				//Increment counters
+				readsInT+=r1.pairCount();
+				basesInT+=initialLength1+initialLength2;
+				
+				if(r2!=null){
+					if(merge){
+						final int insert=BBMerge.findOverlapStrict(r1, r2, false);
+						if(insert>0){
+							r2.reverseComplement();
+							r1=r1.joinRead(insert);
+							r2=null;
+						}
+					}else if(ecco){
+						BBMerge.findOverlapStrict(r1, r2, true);
+					}
+				}
+				
+				{
+					//Reads are processed in this block.
+					{
+						ArrayList<Orf> list=processRead(r1);
+						if(list!=null){orfList.addAll(list);}
+					}
+					if(r2!=null){
+						ArrayList<Orf> list=processRead(r2);
+						if(list!=null){orfList.addAll(list);}
+					}
+				}
+			}
+			
+			genesOutT+=orfList.size();
+			ByteBuilder bb=new ByteBuilder();
+			
+			if(bsw!=null){
+				if(bsw.ordered){
+					for(Orf orf : orfList){
+						orf.appendGff(bb);
+						bb.nl();
+					}
+					bsw.add(bb, ln.id);
+					bytesOutT+=bb.length();
+				}else{
+					for(Orf orf : orfList){
+						orf.appendGff(bb);
+						bb.nl();
+//						if(bb.length()>=32000){
+//							bytesOutT+=bb.length();
+//							bsw.addJob(bb);
+//							bb=new ByteBuilder();
+//						}
+					}
+					if(bb.length()>0){
+						bsw.addJob(bb);
+						bytesOutT+=bb.length();
+					}
+				}
+			}
+
+			//Notify the input stream that the list was used
+			cris.returnList(ln);
+//			if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access
+		}
+		
+		/**
+		 * Process a read or a read pair.
+		 * @param r1 Read 1
+		 * @param r2 Read 2 (may be null)
+		 * @return True if the reads should be kept, false if they should be discarded.
+		 */
+		ArrayList<Orf> processRead(final Read r){
+			ArrayList<Orf> list=caller.callGenes(r, pgm);
+			
+			if(geneHistT!=null && list!=null){
+				for(Orf o : list){
+					int bin=Tools.min(geneHistT.length-1, o.length()/geneHistDiv);
+					geneHistT[bin]++;
+				}
+			}
+			
+			if(ros16S!=null){
+				if(list!=null && !list.isEmpty()){
+//					System.err.println("Looking for 16S.");
+					ArrayList<Read> ssu=fetchType(r, list, r16S);
+					if(ssu!=null && !ssu.isEmpty()){
+//						System.err.println("Found "+ssu.size()+" 16S.");
+						ros16S.add(ssu, r.numericID);
+					}
+				}
+			}
+			if(ros18S!=null){
+				if(list!=null && !list.isEmpty()){
+					ArrayList<Read> ssu=fetchType(r, list, r18S);
+					if(ssu!=null && !ssu.isEmpty()){ros18S.add(ssu, r.numericID);}
+				}
+			}
+			
+			if(rosAmino!=null){
+				if(mode==TRANSLATE){
+					if(list!=null && !list.isEmpty()){
+						ArrayList<Read> prots=translate(r, list);
+						if(prots!=null){rosAmino.add(prots, r.numericID);}
+					}
+				}else if(mode==RETRANSLATE) {
+					if(list!=null && !list.isEmpty()){
+						ArrayList<Read> prots=translate(r, list);
+						ArrayList<Read> ret=detranslate(prots);
+						if(ret!=null){rosAmino.add(ret, r.numericID);}
+					}
+				}else if(mode==RECODE) {
+					if(list!=null && !list.isEmpty()){
+						Read recoded=recode(r, list);
+						r.mate=null;
+						ArrayList<Read> rec=new ArrayList<Read>(1);
+						rec.add(recoded);
+						if(rec!=null){rosAmino.add(rec, r.numericID);}
+					}
+				}else{
+					assert(false) : mode;
+				}
+			}
+			
+			return list;
+		}
+		
+		/** Number of reads processed by this thread */
+		protected long readsInT=0;
+		/** Number of bases processed by this thread */
+		protected long basesInT=0;
+		
+		/** Number of genes called by this thread */
+		protected long genesOutT=0;
+		/** Number of bytes written by this thread */
+		protected long bytesOutT=0;
+		
+		final long[] geneHistT;
+
+		protected ConcurrentReadOutputStream rosAmino;
+		protected ConcurrentReadOutputStream ros16S;
+		protected ConcurrentReadOutputStream ros18S;
+		
+		/** True only if this thread has completed successfully */
+		boolean success=false;
+		
+		/** Shared input stream */
+		private final ConcurrentReadInputStream cris;
+		/** Shared output stream */
+		private final ByteStreamWriter bsw;
+		/** Gene Model for annotation (not really needed) */
+		private final GeneModel pgm;
+		/** Gene Caller for annotation */
+		final GeneCaller caller;
+		/** Thread ID */
+		final int tid;
+	}
+	
+	public static ArrayList<Read> fetchType(final Read r, final ArrayList<Orf> list, int type){
+		if(list==null || list.isEmpty()){return null;}
+		ArrayList<Read> ret=new ArrayList<Read>(list.size());
+		for(int strand=0; strand<2; strand++){
+			for(Orf orf : list){
+				if(orf.strand==strand && orf.type==type){
+					Read sequence=fetch(orf, r.bases, r.id);
+					ret.add(sequence);
+				}
+			}
+			r.reverseComplement();
+		}
+		return (ret.size()>0 ? ret : null);
+	}
+	
+	public static ArrayList<Read> translate(final Read r, final ArrayList<Orf> list){
+		if(list==null || list.isEmpty()){return null;}
+		ArrayList<Read> prots=new ArrayList<Read>(list.size());
+		for(int strand=0; strand<2; strand++){
+			for(Orf orf : list){
+				if(orf.strand==strand && orf.type==CDS){
+					Read aa=translate(orf, r.bases, r.id);
+					prots.add(aa);
+				}
+			}
+			r.reverseComplement();
+		}
+		return prots.isEmpty() ? null : prots;
+	}
+	
+	public static Read recode(final Read r, final ArrayList<Orf> list){
+		if(list==null || list.isEmpty()){return r;}
+		for(int strand=0; strand<2; strand++){
+			for(Orf orf : list){
+				if(orf.strand==strand && orf.type==CDS){
+					recode(orf, r.bases);
+				}
+			}
+			r.reverseComplement();
+		}
+		return r;
+	}
+	
+	public static ArrayList<Read> detranslate(final ArrayList<Read> prots){
+		if(prots==null || prots.isEmpty()){return null;}
+		ArrayList<Read> nucs=new ArrayList<Read>(prots.size());
+		for(int strand=0; strand<2; strand++){
+			for(Read prot : prots){
+				Read nuc=detranslate(prot);
+				nucs.add(nuc);
+			}
+		}
+		return nucs;
+	}
+	
+	public static Read translate(Orf orf, byte[] bases, String id){
+//		assert(orf.length()%3==0) : orf.length(); //Happens sometimes on genes that go off the end, perhaps
+		if(orf.strand==1){orf.flip();}
+		byte[] acids=AminoAcid.toAAs(bases, orf.start, orf.stop);
+		if(orf.strand==1){orf.flip();}
+		Read r=new Read(acids, null, id+"\t"+(Shared.strandCodes[orf.strand]+"\t"+orf.start+"-"+orf.stop), 0, Read.AAMASK);
+//		assert((r.length()+1)*3==orf.length());
+		return r;
+	}
+	
+	public static Read fetch(Orf orf, Read source){
+		assert(orf.start>=0 && orf.stop<source.length()) : source.length()+"\n"+orf;
+		if(orf.strand==1){source.reverseComplement();}
+		Read r=fetch(orf, source.bases, source.id);
+		if(orf.strand==1){source.reverseComplement();}
+		return r;
+	}
+	
+	public static Read fetch(Orf orf, byte[] bases, String id){
+		assert(orf.start>=0 && orf.stop<bases.length) : bases.length+"\n"+orf;
+		if(orf.strand==1){orf.flip();}
+		byte[] sub=Arrays.copyOfRange(bases, orf.start, orf.stop+1);
+		if(orf.strand==1){orf.flip();}
+		Read r=new Read(sub, null, id+"\t"+(Shared.strandCodes[orf.strand]+"\t"+orf.start+"-"+orf.stop), 0, 0);
+		assert(r.length()==orf.length()) : r.length()+", "+orf.length();
+		return r;
+	}
+	
+	public static void recode(Orf orf, byte[] bases){
+		if(orf.strand==1){orf.flip();}
+		byte[] acids=AminoAcid.toAAs(bases, orf.start, orf.stop);
+		for(int apos=0, bpos=orf.start; apos<acids.length; apos++){
+			byte aa=acids[apos];
+			int number=AminoAcid.acidToNumber[aa];
+			String codon=(number>=0 ? AminoAcid.canonicalCodons[number] : "NNN");
+			for(int i=0; i<3; i++, bpos++) {
+				bases[bpos]=(byte)codon.charAt(i);
+			}
+		}
+		if(orf.strand==1){orf.flip();}
+	}
+	
+	public static Read detranslate(Read prot){
+		ByteBuilder bb=new ByteBuilder(prot.length()*3);
+		for(byte aa : prot.bases){
+			int number=AminoAcid.acidToNumber[aa];
+			String codon=(number>=0 ? AminoAcid.canonicalCodons[number] : "NNN");
+			bb.append(codon);
+		}
+		Read r=new Read(bb.array, null, prot.id, prot.numericID, 0);
+		return r;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------            Fields            ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	public static GeneCaller makeGeneCaller(GeneModel pgm){
+		GeneCaller caller=new GeneCaller(minLen, maxOverlapSameStrand, maxOverlapOppositeStrand, 
+				minStartScore, minStopScore, minKmerScore, minOrfScore, minAvgScore, pgm);
+		return caller;
+	}
+	
+	private long maxReads=-1;
+	private boolean merge;
+	private boolean ecco;
+	
+	private long readsIn=0;
+	private long basesIn=0;
+	private long genesOut=0;
+	private long bytesOut=0;
+	
+	private static int minLen=80;//Actually a much higher value like 200 seems optimal compared to NCBI
+	private static int maxOverlapSameStrand=80;
+	private static int maxOverlapOppositeStrand=110;
+	
+	/* for kinnercds=6 */
+//	private static float minStartScore=-0.10f;
+//	private static float minStopScore=-0.5f;//Not useful; disabled
+//	private static float minKmerScore=0.04f;//Does not seem useful.
+//	private static float minOrfScore=40f; //Higher increases SNR dramatically but reduces TP rate
+//	private static float minAvgScore=0.08f; //Not very effective
+
+	/* for kinnercds=7 */
+	private static float minStartScore=-0.10f;
+	private static float minStopScore=-0.5f;//Not useful; disabled
+	private static float minKmerScore=0.02f;//Does not seem useful.
+	private static float minOrfScore=50f; //Higher increases SNR dramatically but reduces TP rate
+	private static float minAvgScore=0.08f; //Not very effective
+	
+	long geneStopsMade=0;
+	long geneStartsMade=0;
+	long geneStartsRetained=0;
+	long geneStopsRetained=0;
+	long geneStartsOut=0;
+
+	long tRNAOut=0;
+	long r16SOut=0;
+	long r23SOut=0;
+	long r5SOut=0;
+	long r18SOut=0;
+	
+	ScoreTracker stCds=new ScoreTracker(CDS);
+	ScoreTracker stCds2=new ScoreTracker(CDS);
+	ScoreTracker stCdsPass=new ScoreTracker(CDS);
+	ScoreTracker sttRNA=new ScoreTracker(tRNA);
+	ScoreTracker st16s=new ScoreTracker(r16S);
+	ScoreTracker st23s=new ScoreTracker(r23S);
+	ScoreTracker st5s=new ScoreTracker(r5S);
+	ScoreTracker st18s=new ScoreTracker(r18S);
+	
+	ScoreTracker[] trackers=new ScoreTracker[] {stCdsPass, sttRNA, st16s, st23s, st5s, st18s};
+	
+	private int geneHistBins=2000;
+	private int geneHistDiv=20;
+	private final long[] geneHist;
+	private boolean printZeroCountHist=false;
+	
+	/*--------------------------------------------------------------*/
+
+	private ArrayList<String> fnaList=new ArrayList<String>();
+	private ArrayList<String> pgmList=new ArrayList<String>();
+	private String outGff=null;
+	private String outAmino=null;
+	private String out16S=null;
+	private String out18S=null;
+	private String compareToGff=null;
+	private String outStats="stderr";
+	private String geneHistFile=null;
+	private boolean json_out=false;
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Final Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	private final FileFormat ffoutGff;
+	private final FileFormat ffoutAmino;
+	private final FileFormat ffout16S;
+	private final FileFormat ffout18S;
+	
+	/** Determines how sequence is processed if it will be output */
+	int mode=TRANSLATE;
+	
+	/** Translate nucleotides to amino acids */
+	private static final int TRANSLATE=1;
+	/** Translate nucleotides to amino acids,
+	 * then translate back to nucleotides */
+	private static final int RETRANSLATE=2;
+	/** Re-encode coding regions of nucleotide
+	 * sequences as a canonical codons */
+	private static final int RECODE=3;
+	
+	/*--------------------------------------------------------------*/
+	
+	private PrintStream outstream=System.err;
+	public boolean verbose=false;
+	public boolean errorState=false;
+	private boolean overwrite=false;
+	private boolean append=false;
+	private boolean ordered=false; //this is OK sometimes, but sometimes hangs (e.g. on RefSeq mito), possibly if a sequence produces nothing.
+	//To fix it, just ensure functions like translate always produce an array, even if it is empty.
+	
+}