Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/AnalyzeGenes.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/AnalyzeGenes.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,405 @@ +package prok; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Locale; +import java.util.concurrent.atomic.AtomicInteger; + +import fileIO.ByteFile; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import structures.ByteBuilder; +import structures.IntList; + +/** + * This class is designed to analyze paired prokaryotic fna and gff files + * to calculate the patterns in coding and noncoding frames, start and stop sites. + * It outputs a pgm file. + * @author Brian Bushnell + * @date Sep 27, 2018 + * + */ +public class AnalyzeGenes { + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + //Start a timer immediately upon code entrance. + Timer t=new Timer(); + + //Create an instance of this class + AnalyzeGenes x=new AnalyzeGenes(args); + + //Run the object + x.process(t); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + } + + /** + * Constructor. + * @param args Command line arguments + */ + public AnalyzeGenes(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, null/*getClass()*/, false); + args=pp.args; + outstream=pp.outstream; + } + + //Set shared static variables prior to parsing + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + + {//Parse the arguments + final Parser parser=parse(args); + overwrite=parser.overwrite; + append=parser.append; + + out=parser.out1; + } + + if(alignRibo){ + //Load sequences + ProkObject.loadConsensusSequenceFromFile(false, false); + } + + fixExtensions(); //Add or remove .gz or .bz2 as needed + checkFileExistence(); //Ensure files can be read and written + checkStatics(); //Adjust file-related static fields as needed for this program + + //Determine how many threads may be used + threads=Tools.min(fnaList.size(), Shared.threads(), Tools.max(32, Shared.CALC_LOGICAL_PROCESSORS()/2)); + + ffout=FileFormat.testOutput(out, FileFormat.PGM, null, true, overwrite, append, false); + } + + /*--------------------------------------------------------------*/ + /*---------------- Initialization Helpers ----------------*/ + /*--------------------------------------------------------------*/ + + /** Parse arguments from the command line */ + private Parser parse(String[] args){ + + Parser parser=new Parser(); + parser.overwrite=overwrite; + for(int i=0; i<args.length; i++){ + String arg=args[i]; + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + if(b!=null && b.equalsIgnoreCase("null")){b=null;} + +// outstream.println(arg+", "+a+", "+b); + if(PGMTools.parseStatic(arg, a, b)){ + //do nothing + }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna") || a.equals("ref")){ + assert(b!=null); + Tools.addFiles(b, fnaList); + }else if(a.equals("gff") || a.equals("ingff") || a.equals("gffin")){ + assert(b!=null); + Tools.addFiles(b, gffList); + }else if(a.equals("verbose")){ + verbose=Parse.parseBoolean(b); + ReadWrite.verbose=verbose; + }else if(a.equals("alignribo") || a.equals("align")){ + alignRibo=Parse.parseBoolean(b); + }else if(a.equals("adjustendpoints")){ + adjustEndpoints=Parse.parseBoolean(b); + } + + else if(ProkObject.parse(arg, a, b)){} + + else if(parser.parse(arg, a, b)){ + //do nothing + }else if(arg.indexOf('=')<0 && new File(arg).exists() && FileFormat.isFastaFile(arg)){ + fnaList.add(arg); + }else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + // throw new RuntimeException("Unknown parameter "+args[i]); + } + } + + if(gffList.isEmpty()){ + for(String s : fnaList){ + String prefix=ReadWrite.stripExtension(s); + String gff=prefix+".gff"; + File f=new File(gff); + if(!f.exists()){ + String gz=gff+".gz"; + f=new File(gz); + assert(f.exists() && f.canRead()) : "Can't read file "+gff; + gff=gz; + } + gffList.add(gff); + } + } + assert(gffList.size()==fnaList.size()) : "Number of fna and gff files do not match: "+fnaList.size()+", "+gffList.size(); + return parser; + } + + /** Add or remove .gz or .bz2 as needed */ + private void fixExtensions(){ + fnaList=Tools.fixExtension(fnaList); + gffList=Tools.fixExtension(gffList); + if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} + } + + /** Ensure files can be read and written */ + private void checkFileExistence(){ + //Ensure output files can be written + if(!Tools.testOutputFiles(overwrite, append, false, out)){ + outstream.println((out==null)+", "+out); + throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n"); + } + + //Ensure input files can be read + ArrayList<String> foo=new ArrayList<String>(); + foo.addAll(fnaList); + foo.addAll(gffList); + if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){ + throw new RuntimeException("\nCan't read some input files.\n"); + } + + //Ensure that no file was specified multiple times + foo.add(out); + if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){ + throw new RuntimeException("\nSome file names were specified multiple times.\n"); + } + } + + /** Adjust file-related static fields as needed for this program */ + private static void checkStatics(){ + //Adjust the number of threads for input file reading + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ + ByteFile.FORCE_MODE_BF2=true; + } + } + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + void process(Timer t){ + + final GeneModel pgm; + if(Shared.threads()<2 || fnaList.size()<2){ + pgm=makeModelST(); + }else{ + pgm=spawnThreads(); + } + + ByteStreamWriter bsw=ByteStreamWriter.makeBSW(ffout); + + ByteBuilder bb=new ByteBuilder(); + pgm.appendTo(bb); + bytesOut+=bb.length; + + if(bsw!=null){ + bsw.addJob(bb); + errorState|=bsw.poisonAndWait(); + } + + t.stop(); + + outstream.println(timeReadsBasesGenesProcessed(t, pgm.readsProcessed, pgm.basesProcessed, pgm.genesProcessed, pgm.filesProcessed, 8)); + + outstream.println(); + outstream.println(typesProcessed(pgm, 12)); + + //outstream.println("Bytes Out: \t"+bytesOut); + + if(errorState){ + throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); + } + } + + private static String timeReadsBasesGenesProcessed(Timer t, long readsProcessed, long basesProcessed, long genesProcessed, long filesProcessed, int pad){ + return ("Time: \t"+t+"\n"+readsBasesGenesProcessed(t.elapsed, readsProcessed, basesProcessed, genesProcessed, filesProcessed, pad)); + } + + private static String readsBasesGenesProcessed(long elapsed, long reads, long bases, long genes, long files, int pad){ + double rpnano=reads/(double)elapsed; + double bpnano=bases/(double)elapsed; + double gpnano=genes/(double)elapsed; + double fpnano=files/(double)elapsed; + + String rstring=Tools.padKM(reads, pad); + String bstring=Tools.padKM(bases, pad); + String gstring=Tools.padKM(genes, pad); + String fstring=Tools.padKM(files, pad); + ByteBuilder sb=new ByteBuilder(); + sb.append("Files Processed: ").append(fstring).append(String.format(Locale.ROOT, " \t%.2f files/sec", fpnano*1000000000)).append('\n'); + sb.append("Sequences Processed:").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk seqs/sec", rpnano*1000000)).append('\n'); + sb.append("Genes Processed: ").append(gstring).append(String.format(Locale.ROOT, " \t%.2fk genes/sec", gpnano*1000000)).append('\n'); + sb.append("Bases Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bases/sec", bpnano*1000)); + return sb.toString(); + } + + private static String typesProcessed(GeneModel pgm, int pad){ + + ByteBuilder sb=new ByteBuilder(); + sb.append("CDS: "+Tools.padLeft(pgm.statsCDS.lengthCount, pad)).nl(); + sb.append("tRNA: "+Tools.padLeft(pgm.statstRNA.lengthCount, pad)).nl(); + sb.append("16S: "+Tools.padLeft(pgm.stats16S.lengthCount, pad)).nl(); + sb.append("23S: "+Tools.padLeft(pgm.stats23S.lengthCount, pad)).nl(); + sb.append("5S: "+Tools.padLeft(pgm.stats5S.lengthCount, pad)).nl(); + sb.append("18S: "+Tools.padLeft(pgm.stats18S.lengthCount, pad)); + return sb.toString(); + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + //TODO: Process each file in a thread. + private GeneModel makeModelST(){ + GeneModel pgmSum=new GeneModel(true); + + for(int i=0; i<fnaList.size(); i++){ + String fna=fnaList.get(i); + String gff=gffList.get(i); + pgmSum.process(fna, gff); + } + return pgmSum; + } + + /*--------------------------------------------------------------*/ + /*---------------- Thread Management ----------------*/ + /*--------------------------------------------------------------*/ + + /** Spawn process threads */ + private GeneModel spawnThreads(){ + + //Do anything necessary prior to processing + + final AtomicInteger aint=new AtomicInteger(0); + + //Fill a list with FileThreads + ArrayList<FileThread> alpt=new ArrayList<FileThread>(threads); + for(int i=0; i<threads; i++){ + alpt.add(new FileThread(aint)); + } + + //Start the threads + for(FileThread pt : alpt){ + pt.start(); + } + + //Wait for threads to finish + GeneModel pgm=waitForThreads(alpt); + + //Do anything necessary after processing + return pgm; + } + + private GeneModel waitForThreads(ArrayList<FileThread> alpt){ + + GeneModel pgm=new GeneModel(false); + + //Wait for completion of all threads + boolean success=true; + for(FileThread pt : alpt){ + + //Wait until this thread has terminated + while(pt.getState()!=Thread.State.TERMINATED){ + try { + //Attempt a join operation + pt.join(); + } catch (InterruptedException e) { + //Potentially handle this, if it is expected to occur + e.printStackTrace(); + } + } + + //Accumulate per-thread statistics + pgm.add(pt.pgm); + + success&=pt.success; + errorState|=pt.errorStateT; + } + + //Track whether any threads failed + if(!success){errorState=true;} + return pgm; + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Classes ----------------*/ + /*--------------------------------------------------------------*/ + + private class FileThread extends Thread { + + FileThread(AtomicInteger fnum_){ + fnum=fnum_; + pgm=new GeneModel(true); + } + + @Override + public void run(){ + for(int i=fnum.getAndIncrement(); i<fnaList.size(); i=fnum.getAndIncrement()){ + String fna=fnaList.get(i); + String gff=gffList.get(i); + errorStateT=pgm.process(fna, gff)|errorState; +// System.err.println("Processed "+fna+" in "+this.toString()); + } + success=true; + } + + private final AtomicInteger fnum; + private final GeneModel pgm; + boolean errorStateT=false; + boolean success=false; + } + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private ArrayList<String> fnaList=new ArrayList<String>(); + private ArrayList<String> gffList=new ArrayList<String>(); + private IntList taxList=new IntList(); + private String out=null; + + /*--------------------------------------------------------------*/ + + private long bytesOut=0; + static boolean alignRibo=true; + static boolean adjustEndpoints=true; + + /*--------------------------------------------------------------*/ + /*---------------- Final Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private final FileFormat ffout; + private final int threads; + + /*--------------------------------------------------------------*/ + /*---------------- Common Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private PrintStream outstream=System.err; + public static boolean verbose=false; + public boolean errorState=false; + private boolean overwrite=true; + private boolean append=false; + +} +