annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/AnalyzeGenes.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package prok;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6 import java.util.Locale;
jpayne@68 7 import java.util.concurrent.atomic.AtomicInteger;
jpayne@68 8
jpayne@68 9 import fileIO.ByteFile;
jpayne@68 10 import fileIO.ByteStreamWriter;
jpayne@68 11 import fileIO.FileFormat;
jpayne@68 12 import fileIO.ReadWrite;
jpayne@68 13 import shared.Parse;
jpayne@68 14 import shared.Parser;
jpayne@68 15 import shared.PreParser;
jpayne@68 16 import shared.Shared;
jpayne@68 17 import shared.Timer;
jpayne@68 18 import shared.Tools;
jpayne@68 19 import structures.ByteBuilder;
jpayne@68 20 import structures.IntList;
jpayne@68 21
jpayne@68 22 /**
jpayne@68 23 * This class is designed to analyze paired prokaryotic fna and gff files
jpayne@68 24 * to calculate the patterns in coding and noncoding frames, start and stop sites.
jpayne@68 25 * It outputs a pgm file.
jpayne@68 26 * @author Brian Bushnell
jpayne@68 27 * @date Sep 27, 2018
jpayne@68 28 *
jpayne@68 29 */
jpayne@68 30 public class AnalyzeGenes {
jpayne@68 31
jpayne@68 32 /*--------------------------------------------------------------*/
jpayne@68 33 /*---------------- Initialization ----------------*/
jpayne@68 34 /*--------------------------------------------------------------*/
jpayne@68 35
jpayne@68 36 /**
jpayne@68 37 * Code entrance from the command line.
jpayne@68 38 * @param args Command line arguments
jpayne@68 39 */
jpayne@68 40 public static void main(String[] args){
jpayne@68 41 //Start a timer immediately upon code entrance.
jpayne@68 42 Timer t=new Timer();
jpayne@68 43
jpayne@68 44 //Create an instance of this class
jpayne@68 45 AnalyzeGenes x=new AnalyzeGenes(args);
jpayne@68 46
jpayne@68 47 //Run the object
jpayne@68 48 x.process(t);
jpayne@68 49
jpayne@68 50 //Close the print stream if it was redirected
jpayne@68 51 Shared.closeStream(x.outstream);
jpayne@68 52 }
jpayne@68 53
jpayne@68 54 /**
jpayne@68 55 * Constructor.
jpayne@68 56 * @param args Command line arguments
jpayne@68 57 */
jpayne@68 58 public AnalyzeGenes(String[] args){
jpayne@68 59
jpayne@68 60 {//Preparse block for help, config files, and outstream
jpayne@68 61 PreParser pp=new PreParser(args, null/*getClass()*/, false);
jpayne@68 62 args=pp.args;
jpayne@68 63 outstream=pp.outstream;
jpayne@68 64 }
jpayne@68 65
jpayne@68 66 //Set shared static variables prior to parsing
jpayne@68 67 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 68 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 69
jpayne@68 70 {//Parse the arguments
jpayne@68 71 final Parser parser=parse(args);
jpayne@68 72 overwrite=parser.overwrite;
jpayne@68 73 append=parser.append;
jpayne@68 74
jpayne@68 75 out=parser.out1;
jpayne@68 76 }
jpayne@68 77
jpayne@68 78 if(alignRibo){
jpayne@68 79 //Load sequences
jpayne@68 80 ProkObject.loadConsensusSequenceFromFile(false, false);
jpayne@68 81 }
jpayne@68 82
jpayne@68 83 fixExtensions(); //Add or remove .gz or .bz2 as needed
jpayne@68 84 checkFileExistence(); //Ensure files can be read and written
jpayne@68 85 checkStatics(); //Adjust file-related static fields as needed for this program
jpayne@68 86
jpayne@68 87 //Determine how many threads may be used
jpayne@68 88 threads=Tools.min(fnaList.size(), Shared.threads(), Tools.max(32, Shared.CALC_LOGICAL_PROCESSORS()/2));
jpayne@68 89
jpayne@68 90 ffout=FileFormat.testOutput(out, FileFormat.PGM, null, true, overwrite, append, false);
jpayne@68 91 }
jpayne@68 92
jpayne@68 93 /*--------------------------------------------------------------*/
jpayne@68 94 /*---------------- Initialization Helpers ----------------*/
jpayne@68 95 /*--------------------------------------------------------------*/
jpayne@68 96
jpayne@68 97 /** Parse arguments from the command line */
jpayne@68 98 private Parser parse(String[] args){
jpayne@68 99
jpayne@68 100 Parser parser=new Parser();
jpayne@68 101 parser.overwrite=overwrite;
jpayne@68 102 for(int i=0; i<args.length; i++){
jpayne@68 103 String arg=args[i];
jpayne@68 104 String[] split=arg.split("=");
jpayne@68 105 String a=split[0].toLowerCase();
jpayne@68 106 String b=split.length>1 ? split[1] : null;
jpayne@68 107 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
jpayne@68 108
jpayne@68 109 // outstream.println(arg+", "+a+", "+b);
jpayne@68 110 if(PGMTools.parseStatic(arg, a, b)){
jpayne@68 111 //do nothing
jpayne@68 112 }else if(a.equals("in") || a.equals("infna") || a.equals("fnain") || a.equals("fna") || a.equals("ref")){
jpayne@68 113 assert(b!=null);
jpayne@68 114 Tools.addFiles(b, fnaList);
jpayne@68 115 }else if(a.equals("gff") || a.equals("ingff") || a.equals("gffin")){
jpayne@68 116 assert(b!=null);
jpayne@68 117 Tools.addFiles(b, gffList);
jpayne@68 118 }else if(a.equals("verbose")){
jpayne@68 119 verbose=Parse.parseBoolean(b);
jpayne@68 120 ReadWrite.verbose=verbose;
jpayne@68 121 }else if(a.equals("alignribo") || a.equals("align")){
jpayne@68 122 alignRibo=Parse.parseBoolean(b);
jpayne@68 123 }else if(a.equals("adjustendpoints")){
jpayne@68 124 adjustEndpoints=Parse.parseBoolean(b);
jpayne@68 125 }
jpayne@68 126
jpayne@68 127 else if(ProkObject.parse(arg, a, b)){}
jpayne@68 128
jpayne@68 129 else if(parser.parse(arg, a, b)){
jpayne@68 130 //do nothing
jpayne@68 131 }else if(arg.indexOf('=')<0 && new File(arg).exists() && FileFormat.isFastaFile(arg)){
jpayne@68 132 fnaList.add(arg);
jpayne@68 133 }else{
jpayne@68 134 outstream.println("Unknown parameter "+args[i]);
jpayne@68 135 assert(false) : "Unknown parameter "+args[i];
jpayne@68 136 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 137 }
jpayne@68 138 }
jpayne@68 139
jpayne@68 140 if(gffList.isEmpty()){
jpayne@68 141 for(String s : fnaList){
jpayne@68 142 String prefix=ReadWrite.stripExtension(s);
jpayne@68 143 String gff=prefix+".gff";
jpayne@68 144 File f=new File(gff);
jpayne@68 145 if(!f.exists()){
jpayne@68 146 String gz=gff+".gz";
jpayne@68 147 f=new File(gz);
jpayne@68 148 assert(f.exists() && f.canRead()) : "Can't read file "+gff;
jpayne@68 149 gff=gz;
jpayne@68 150 }
jpayne@68 151 gffList.add(gff);
jpayne@68 152 }
jpayne@68 153 }
jpayne@68 154 assert(gffList.size()==fnaList.size()) : "Number of fna and gff files do not match: "+fnaList.size()+", "+gffList.size();
jpayne@68 155 return parser;
jpayne@68 156 }
jpayne@68 157
jpayne@68 158 /** Add or remove .gz or .bz2 as needed */
jpayne@68 159 private void fixExtensions(){
jpayne@68 160 fnaList=Tools.fixExtension(fnaList);
jpayne@68 161 gffList=Tools.fixExtension(gffList);
jpayne@68 162 if(fnaList.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 163 }
jpayne@68 164
jpayne@68 165 /** Ensure files can be read and written */
jpayne@68 166 private void checkFileExistence(){
jpayne@68 167 //Ensure output files can be written
jpayne@68 168 if(!Tools.testOutputFiles(overwrite, append, false, out)){
jpayne@68 169 outstream.println((out==null)+", "+out);
jpayne@68 170 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n");
jpayne@68 171 }
jpayne@68 172
jpayne@68 173 //Ensure input files can be read
jpayne@68 174 ArrayList<String> foo=new ArrayList<String>();
jpayne@68 175 foo.addAll(fnaList);
jpayne@68 176 foo.addAll(gffList);
jpayne@68 177 if(!Tools.testInputFiles(false, true, foo.toArray(new String[0]))){
jpayne@68 178 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 179 }
jpayne@68 180
jpayne@68 181 //Ensure that no file was specified multiple times
jpayne@68 182 foo.add(out);
jpayne@68 183 if(!Tools.testForDuplicateFiles(true, foo.toArray(new String[0]))){
jpayne@68 184 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 185 }
jpayne@68 186 }
jpayne@68 187
jpayne@68 188 /** Adjust file-related static fields as needed for this program */
jpayne@68 189 private static void checkStatics(){
jpayne@68 190 //Adjust the number of threads for input file reading
jpayne@68 191 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 192 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 193 }
jpayne@68 194 }
jpayne@68 195
jpayne@68 196 /*--------------------------------------------------------------*/
jpayne@68 197 /*---------------- Outer Methods ----------------*/
jpayne@68 198 /*--------------------------------------------------------------*/
jpayne@68 199
jpayne@68 200 void process(Timer t){
jpayne@68 201
jpayne@68 202 final GeneModel pgm;
jpayne@68 203 if(Shared.threads()<2 || fnaList.size()<2){
jpayne@68 204 pgm=makeModelST();
jpayne@68 205 }else{
jpayne@68 206 pgm=spawnThreads();
jpayne@68 207 }
jpayne@68 208
jpayne@68 209 ByteStreamWriter bsw=ByteStreamWriter.makeBSW(ffout);
jpayne@68 210
jpayne@68 211 ByteBuilder bb=new ByteBuilder();
jpayne@68 212 pgm.appendTo(bb);
jpayne@68 213 bytesOut+=bb.length;
jpayne@68 214
jpayne@68 215 if(bsw!=null){
jpayne@68 216 bsw.addJob(bb);
jpayne@68 217 errorState|=bsw.poisonAndWait();
jpayne@68 218 }
jpayne@68 219
jpayne@68 220 t.stop();
jpayne@68 221
jpayne@68 222 outstream.println(timeReadsBasesGenesProcessed(t, pgm.readsProcessed, pgm.basesProcessed, pgm.genesProcessed, pgm.filesProcessed, 8));
jpayne@68 223
jpayne@68 224 outstream.println();
jpayne@68 225 outstream.println(typesProcessed(pgm, 12));
jpayne@68 226
jpayne@68 227 //outstream.println("Bytes Out: \t"+bytesOut);
jpayne@68 228
jpayne@68 229 if(errorState){
jpayne@68 230 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 231 }
jpayne@68 232 }
jpayne@68 233
jpayne@68 234 private static String timeReadsBasesGenesProcessed(Timer t, long readsProcessed, long basesProcessed, long genesProcessed, long filesProcessed, int pad){
jpayne@68 235 return ("Time: \t"+t+"\n"+readsBasesGenesProcessed(t.elapsed, readsProcessed, basesProcessed, genesProcessed, filesProcessed, pad));
jpayne@68 236 }
jpayne@68 237
jpayne@68 238 private static String readsBasesGenesProcessed(long elapsed, long reads, long bases, long genes, long files, int pad){
jpayne@68 239 double rpnano=reads/(double)elapsed;
jpayne@68 240 double bpnano=bases/(double)elapsed;
jpayne@68 241 double gpnano=genes/(double)elapsed;
jpayne@68 242 double fpnano=files/(double)elapsed;
jpayne@68 243
jpayne@68 244 String rstring=Tools.padKM(reads, pad);
jpayne@68 245 String bstring=Tools.padKM(bases, pad);
jpayne@68 246 String gstring=Tools.padKM(genes, pad);
jpayne@68 247 String fstring=Tools.padKM(files, pad);
jpayne@68 248 ByteBuilder sb=new ByteBuilder();
jpayne@68 249 sb.append("Files Processed: ").append(fstring).append(String.format(Locale.ROOT, " \t%.2f files/sec", fpnano*1000000000)).append('\n');
jpayne@68 250 sb.append("Sequences Processed:").append(rstring).append(String.format(Locale.ROOT, " \t%.2fk seqs/sec", rpnano*1000000)).append('\n');
jpayne@68 251 sb.append("Genes Processed: ").append(gstring).append(String.format(Locale.ROOT, " \t%.2fk genes/sec", gpnano*1000000)).append('\n');
jpayne@68 252 sb.append("Bases Processed: ").append(bstring).append(String.format(Locale.ROOT, " \t%.2fm bases/sec", bpnano*1000));
jpayne@68 253 return sb.toString();
jpayne@68 254 }
jpayne@68 255
jpayne@68 256 private static String typesProcessed(GeneModel pgm, int pad){
jpayne@68 257
jpayne@68 258 ByteBuilder sb=new ByteBuilder();
jpayne@68 259 sb.append("CDS: "+Tools.padLeft(pgm.statsCDS.lengthCount, pad)).nl();
jpayne@68 260 sb.append("tRNA: "+Tools.padLeft(pgm.statstRNA.lengthCount, pad)).nl();
jpayne@68 261 sb.append("16S: "+Tools.padLeft(pgm.stats16S.lengthCount, pad)).nl();
jpayne@68 262 sb.append("23S: "+Tools.padLeft(pgm.stats23S.lengthCount, pad)).nl();
jpayne@68 263 sb.append("5S: "+Tools.padLeft(pgm.stats5S.lengthCount, pad)).nl();
jpayne@68 264 sb.append("18S: "+Tools.padLeft(pgm.stats18S.lengthCount, pad));
jpayne@68 265 return sb.toString();
jpayne@68 266 }
jpayne@68 267
jpayne@68 268 /*--------------------------------------------------------------*/
jpayne@68 269 /*---------------- Inner Methods ----------------*/
jpayne@68 270 /*--------------------------------------------------------------*/
jpayne@68 271
jpayne@68 272 //TODO: Process each file in a thread.
jpayne@68 273 private GeneModel makeModelST(){
jpayne@68 274 GeneModel pgmSum=new GeneModel(true);
jpayne@68 275
jpayne@68 276 for(int i=0; i<fnaList.size(); i++){
jpayne@68 277 String fna=fnaList.get(i);
jpayne@68 278 String gff=gffList.get(i);
jpayne@68 279 pgmSum.process(fna, gff);
jpayne@68 280 }
jpayne@68 281 return pgmSum;
jpayne@68 282 }
jpayne@68 283
jpayne@68 284 /*--------------------------------------------------------------*/
jpayne@68 285 /*---------------- Thread Management ----------------*/
jpayne@68 286 /*--------------------------------------------------------------*/
jpayne@68 287
jpayne@68 288 /** Spawn process threads */
jpayne@68 289 private GeneModel spawnThreads(){
jpayne@68 290
jpayne@68 291 //Do anything necessary prior to processing
jpayne@68 292
jpayne@68 293 final AtomicInteger aint=new AtomicInteger(0);
jpayne@68 294
jpayne@68 295 //Fill a list with FileThreads
jpayne@68 296 ArrayList<FileThread> alpt=new ArrayList<FileThread>(threads);
jpayne@68 297 for(int i=0; i<threads; i++){
jpayne@68 298 alpt.add(new FileThread(aint));
jpayne@68 299 }
jpayne@68 300
jpayne@68 301 //Start the threads
jpayne@68 302 for(FileThread pt : alpt){
jpayne@68 303 pt.start();
jpayne@68 304 }
jpayne@68 305
jpayne@68 306 //Wait for threads to finish
jpayne@68 307 GeneModel pgm=waitForThreads(alpt);
jpayne@68 308
jpayne@68 309 //Do anything necessary after processing
jpayne@68 310 return pgm;
jpayne@68 311 }
jpayne@68 312
jpayne@68 313 private GeneModel waitForThreads(ArrayList<FileThread> alpt){
jpayne@68 314
jpayne@68 315 GeneModel pgm=new GeneModel(false);
jpayne@68 316
jpayne@68 317 //Wait for completion of all threads
jpayne@68 318 boolean success=true;
jpayne@68 319 for(FileThread pt : alpt){
jpayne@68 320
jpayne@68 321 //Wait until this thread has terminated
jpayne@68 322 while(pt.getState()!=Thread.State.TERMINATED){
jpayne@68 323 try {
jpayne@68 324 //Attempt a join operation
jpayne@68 325 pt.join();
jpayne@68 326 } catch (InterruptedException e) {
jpayne@68 327 //Potentially handle this, if it is expected to occur
jpayne@68 328 e.printStackTrace();
jpayne@68 329 }
jpayne@68 330 }
jpayne@68 331
jpayne@68 332 //Accumulate per-thread statistics
jpayne@68 333 pgm.add(pt.pgm);
jpayne@68 334
jpayne@68 335 success&=pt.success;
jpayne@68 336 errorState|=pt.errorStateT;
jpayne@68 337 }
jpayne@68 338
jpayne@68 339 //Track whether any threads failed
jpayne@68 340 if(!success){errorState=true;}
jpayne@68 341 return pgm;
jpayne@68 342 }
jpayne@68 343
jpayne@68 344 /*--------------------------------------------------------------*/
jpayne@68 345 /*---------------- Inner Classes ----------------*/
jpayne@68 346 /*--------------------------------------------------------------*/
jpayne@68 347
jpayne@68 348 private class FileThread extends Thread {
jpayne@68 349
jpayne@68 350 FileThread(AtomicInteger fnum_){
jpayne@68 351 fnum=fnum_;
jpayne@68 352 pgm=new GeneModel(true);
jpayne@68 353 }
jpayne@68 354
jpayne@68 355 @Override
jpayne@68 356 public void run(){
jpayne@68 357 for(int i=fnum.getAndIncrement(); i<fnaList.size(); i=fnum.getAndIncrement()){
jpayne@68 358 String fna=fnaList.get(i);
jpayne@68 359 String gff=gffList.get(i);
jpayne@68 360 errorStateT=pgm.process(fna, gff)|errorState;
jpayne@68 361 // System.err.println("Processed "+fna+" in "+this.toString());
jpayne@68 362 }
jpayne@68 363 success=true;
jpayne@68 364 }
jpayne@68 365
jpayne@68 366 private final AtomicInteger fnum;
jpayne@68 367 private final GeneModel pgm;
jpayne@68 368 boolean errorStateT=false;
jpayne@68 369 boolean success=false;
jpayne@68 370 }
jpayne@68 371
jpayne@68 372 /*--------------------------------------------------------------*/
jpayne@68 373 /*---------------- Fields ----------------*/
jpayne@68 374 /*--------------------------------------------------------------*/
jpayne@68 375
jpayne@68 376 private ArrayList<String> fnaList=new ArrayList<String>();
jpayne@68 377 private ArrayList<String> gffList=new ArrayList<String>();
jpayne@68 378 private IntList taxList=new IntList();
jpayne@68 379 private String out=null;
jpayne@68 380
jpayne@68 381 /*--------------------------------------------------------------*/
jpayne@68 382
jpayne@68 383 private long bytesOut=0;
jpayne@68 384 static boolean alignRibo=true;
jpayne@68 385 static boolean adjustEndpoints=true;
jpayne@68 386
jpayne@68 387 /*--------------------------------------------------------------*/
jpayne@68 388 /*---------------- Final Fields ----------------*/
jpayne@68 389 /*--------------------------------------------------------------*/
jpayne@68 390
jpayne@68 391 private final FileFormat ffout;
jpayne@68 392 private final int threads;
jpayne@68 393
jpayne@68 394 /*--------------------------------------------------------------*/
jpayne@68 395 /*---------------- Common Fields ----------------*/
jpayne@68 396 /*--------------------------------------------------------------*/
jpayne@68 397
jpayne@68 398 private PrintStream outstream=System.err;
jpayne@68 399 public static boolean verbose=false;
jpayne@68 400 public boolean errorState=false;
jpayne@68 401 private boolean overwrite=true;
jpayne@68 402 private boolean append=false;
jpayne@68 403
jpayne@68 404 }
jpayne@68 405