jpayne@68: package prok; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Locale; jpayne@68: import java.util.concurrent.atomic.AtomicInteger; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.IntList; jpayne@68: jpayne@68: /** jpayne@68: * This class is designed to analyze paired prokaryotic fna and gff files jpayne@68: * to calculate the patterns in coding and noncoding frames, start and stop sites. jpayne@68: * It outputs a pgm file. jpayne@68: * @author Brian Bushnell jpayne@68: * @date Sep 27, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class AnalyzeGenes { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: AnalyzeGenes x=new AnalyzeGenes(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public AnalyzeGenes(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, null/*getClass()*/, false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables prior to parsing jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: jpayne@68: {//Parse the arguments jpayne@68: final Parser parser=parse(args); jpayne@68: overwrite=parser.overwrite; jpayne@68: append=parser.append; jpayne@68: jpayne@68: out=parser.out1; jpayne@68: } jpayne@68: jpayne@68: if(alignRibo){ jpayne@68: //Load sequences jpayne@68: ProkObject.loadConsensusSequenceFromFile(false, false); jpayne@68: } jpayne@68: jpayne@68: fixExtensions(); //Add or remove .gz or .bz2 as needed jpayne@68: checkFileExistence(); //Ensure files can be read and written jpayne@68: checkStatics(); //Adjust file-related static fields as needed for this program jpayne@68: jpayne@68: //Determine how many threads may be used jpayne@68: threads=Tools.min(fnaList.size(), Shared.threads(), Tools.max(32, Shared.CALC_LOGICAL_PROCESSORS()/2)); jpayne@68: jpayne@68: ffout=FileFormat.testOutput(out, FileFormat.PGM, null, true, overwrite, append, false); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization Helpers ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Parse arguments from the command line */ jpayne@68: private Parser parse(String[] args){ jpayne@68: jpayne@68: Parser parser=new Parser(); jpayne@68: parser.overwrite=overwrite; jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: if(b!=null && b.equalsIgnoreCase("null")){b=null;} jpayne@68: jpayne@68: // outstream.println(arg+", "+a+", "+b); jpayne@68: if(PGMTools.parseStatic(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna") || a.equals("ref")){ jpayne@68: assert(b!=null); jpayne@68: Tools.addFiles(b, fnaList); jpayne@68: }else if(a.equals("gff") || a.equals("ingff") || a.equals("gffin")){ jpayne@68: assert(b!=null); jpayne@68: Tools.addFiles(b, gffList); jpayne@68: }else if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: ReadWrite.verbose=verbose; jpayne@68: }else if(a.equals("alignribo") || a.equals("align")){ jpayne@68: alignRibo=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("adjustendpoints")){ jpayne@68: adjustEndpoints=Parse.parseBoolean(b); jpayne@68: } jpayne@68: jpayne@68: else if(ProkObject.parse(arg, a, b)){} jpayne@68: jpayne@68: else if(parser.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(arg.indexOf('=')<0 && new File(arg).exists() && FileFormat.isFastaFile(arg)){ jpayne@68: fnaList.add(arg); jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(gffList.isEmpty()){ jpayne@68: for(String s : fnaList){ jpayne@68: String prefix=ReadWrite.stripExtension(s); jpayne@68: String gff=prefix+".gff"; jpayne@68: File f=new File(gff); jpayne@68: if(!f.exists()){ jpayne@68: String gz=gff+".gz"; jpayne@68: f=new File(gz); jpayne@68: assert(f.exists() && f.canRead()) : "Can't read file "+gff; jpayne@68: gff=gz; jpayne@68: } jpayne@68: gffList.add(gff); jpayne@68: } jpayne@68: } jpayne@68: assert(gffList.size()==fnaList.size()) : "Number of fna and gff files do not match: "+fnaList.size()+", "+gffList.size(); jpayne@68: return parser; jpayne@68: } jpayne@68: jpayne@68: /** Add or remove .gz or .bz2 as needed */ jpayne@68: private void fixExtensions(){ jpayne@68: fnaList=Tools.fixExtension(fnaList); jpayne@68: gffList=Tools.fixExtension(gffList); jpayne@68: if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: } jpayne@68: jpayne@68: /** Ensure files can be read and written */ jpayne@68: private void checkFileExistence(){ jpayne@68: //Ensure output files can be written jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, out)){ jpayne@68: outstream.println((out==null)+", "+out); jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: ArrayList foo=new ArrayList(); jpayne@68: foo.addAll(fnaList); jpayne@68: foo.addAll(gffList); jpayne@68: if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure that no file was specified multiple times jpayne@68: foo.add(out); jpayne@68: if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){ jpayne@68: throw new RuntimeException("\nSome file names were specified multiple times.\n"); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** Adjust file-related static fields as needed for this program */ jpayne@68: private static void checkStatics(){ jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: void process(Timer t){ jpayne@68: jpayne@68: final GeneModel pgm; jpayne@68: if(Shared.threads()<2 || fnaList.size()<2){ jpayne@68: pgm=makeModelST(); jpayne@68: }else{ jpayne@68: pgm=spawnThreads(); jpayne@68: } jpayne@68: jpayne@68: ByteStreamWriter bsw=ByteStreamWriter.makeBSW(ffout); jpayne@68: jpayne@68: ByteBuilder bb=new ByteBuilder(); jpayne@68: pgm.appendTo(bb); jpayne@68: bytesOut+=bb.length; jpayne@68: jpayne@68: if(bsw!=null){ jpayne@68: bsw.addJob(bb); jpayne@68: errorState|=bsw.poisonAndWait(); jpayne@68: } jpayne@68: jpayne@68: t.stop(); jpayne@68: jpayne@68: outstream.println(timeReadsBasesGenesProcessed(t, pgm.readsProcessed, pgm.basesProcessed, pgm.genesProcessed, pgm.filesProcessed, 8)); jpayne@68: jpayne@68: outstream.println(); jpayne@68: outstream.println(typesProcessed(pgm, 12)); jpayne@68: jpayne@68: //outstream.println("Bytes Out: \t"+bytesOut); jpayne@68: jpayne@68: if(errorState){ jpayne@68: throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private static String timeReadsBasesGenesProcessed(Timer t, long readsProcessed, long basesProcessed, long genesProcessed, long filesProcessed, int pad){ jpayne@68: return ("Time: \t"+t+"\n"+readsBasesGenesProcessed(t.elapsed, readsProcessed, basesProcessed, genesProcessed, filesProcessed, pad)); jpayne@68: } jpayne@68: jpayne@68: private static String readsBasesGenesProcessed(long elapsed, long reads, long bases, long genes, long files, int pad){ jpayne@68: double rpnano=reads/(double)elapsed; jpayne@68: double bpnano=bases/(double)elapsed; jpayne@68: double gpnano=genes/(double)elapsed; jpayne@68: double fpnano=files/(double)elapsed; jpayne@68: jpayne@68: String rstring=Tools.padKM(reads, pad); jpayne@68: String bstring=Tools.padKM(bases, pad); jpayne@68: String gstring=Tools.padKM(genes, pad); jpayne@68: String fstring=Tools.padKM(files, pad); jpayne@68: ByteBuilder sb=new ByteBuilder(); jpayne@68: sb.append("Files Processed: ").append(fstring).append(String.format(Locale.ROOT, " \t%.2f files/sec", fpnano*1000000000)).append('\n'); jpayne@68: sb.append("Sequences Processed:").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk seqs/sec", rpnano*1000000)).append('\n'); jpayne@68: sb.append("Genes Processed: ").append(gstring).append(String.format(Locale.ROOT, " \t%.2fk genes/sec", gpnano*1000000)).append('\n'); jpayne@68: sb.append("Bases Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bases/sec", bpnano*1000)); jpayne@68: return sb.toString(); jpayne@68: } jpayne@68: jpayne@68: private static String typesProcessed(GeneModel pgm, int pad){ jpayne@68: jpayne@68: ByteBuilder sb=new ByteBuilder(); jpayne@68: sb.append("CDS: "+Tools.padLeft(pgm.statsCDS.lengthCount, pad)).nl(); jpayne@68: sb.append("tRNA: "+Tools.padLeft(pgm.statstRNA.lengthCount, pad)).nl(); jpayne@68: sb.append("16S: "+Tools.padLeft(pgm.stats16S.lengthCount, pad)).nl(); jpayne@68: sb.append("23S: "+Tools.padLeft(pgm.stats23S.lengthCount, pad)).nl(); jpayne@68: sb.append("5S: "+Tools.padLeft(pgm.stats5S.lengthCount, pad)).nl(); jpayne@68: sb.append("18S: "+Tools.padLeft(pgm.stats18S.lengthCount, pad)); jpayne@68: return sb.toString(); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: //TODO: Process each file in a thread. jpayne@68: private GeneModel makeModelST(){ jpayne@68: GeneModel pgmSum=new GeneModel(true); jpayne@68: jpayne@68: for(int i=0; i alpt=new ArrayList(threads); jpayne@68: for(int i=0; i alpt){ jpayne@68: jpayne@68: GeneModel pgm=new GeneModel(false); jpayne@68: jpayne@68: //Wait for completion of all threads jpayne@68: boolean success=true; jpayne@68: for(FileThread pt : alpt){ jpayne@68: jpayne@68: //Wait until this thread has terminated jpayne@68: while(pt.getState()!=Thread.State.TERMINATED){ jpayne@68: try { jpayne@68: //Attempt a join operation jpayne@68: pt.join(); jpayne@68: } catch (InterruptedException e) { jpayne@68: //Potentially handle this, if it is expected to occur jpayne@68: e.printStackTrace(); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Accumulate per-thread statistics jpayne@68: pgm.add(pt.pgm); jpayne@68: jpayne@68: success&=pt.success; jpayne@68: errorState|=pt.errorStateT; jpayne@68: } jpayne@68: jpayne@68: //Track whether any threads failed jpayne@68: if(!success){errorState=true;} jpayne@68: return pgm; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Classes ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private class FileThread extends Thread { jpayne@68: jpayne@68: FileThread(AtomicInteger fnum_){ jpayne@68: fnum=fnum_; jpayne@68: pgm=new GeneModel(true); jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public void run(){ jpayne@68: for(int i=fnum.getAndIncrement(); i fnaList=new ArrayList(); jpayne@68: private ArrayList gffList=new ArrayList(); jpayne@68: private IntList taxList=new IntList(); jpayne@68: private String out=null; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private long bytesOut=0; jpayne@68: static boolean alignRibo=true; jpayne@68: static boolean adjustEndpoints=true; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private final FileFormat ffout; jpayne@68: private final int threads; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public boolean errorState=false; jpayne@68: private boolean overwrite=true; jpayne@68: private boolean append=false; jpayne@68: jpayne@68: } jpayne@68: