Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/prok/RiboMaker.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package prok; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; import java.util.PriorityQueue; import aligner.Alignment; import dna.AminoAcid; import fileIO.ByteFile; import fileIO.FileFormat; import fileIO.ReadWrite; import shared.Parse; import shared.Parser; import shared.PreParser; import shared.ReadStats; import shared.Shared; import shared.Timer; import shared.Tools; import stream.ConcurrentReadInputStream; import stream.ConcurrentReadOutputStream; import stream.FASTQ; import stream.FastaReadInputStream; import stream.Read; import stream.ReadInputStream; import structures.ListNum; import structures.LongHashSet; import template.Accumulator; import template.ThreadWaiter; /** * Makes a consensus ribosomal sequence using raw reads as input. * * @author Brian Bushnell * @date October 10, 2019 * */ public class RiboMaker implements Accumulator<RiboMaker.ProcessThread> { /*--------------------------------------------------------------*/ /*---------------- Initialization ----------------*/ /*--------------------------------------------------------------*/ /** * Code entrance from the command line. * @param args Command line arguments */ public static void main(String[] args){ assert(false) : "TODO"; //Start a timer immediately upon code entrance. Timer t=new Timer(); //Create an instance of this class RiboMaker x=new RiboMaker(args); //Run the object x.process(t); //Close the print stream if it was redirected Shared.closeStream(x.outstream); } /** * Constructor. * @param args Command line arguments */ public RiboMaker(String[] args){ {//Preparse block for help, config files, and outstream PreParser pp=new PreParser(args, getClass(), false); args=pp.args; outstream=pp.outstream; } //Set shared static variables prior to parsing ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; ReadWrite.MAX_ZIP_THREADS=Shared.threads(); {//Parse the arguments final Parser parser=parse(args); Parser.processQuality(); maxReads=parser.maxReads; overwrite=ReadStats.overwrite=parser.overwrite; append=ReadStats.append=parser.append; setInterleaved=parser.setInterleaved; in1=parser.in1; in2=parser.in2; qfin1=parser.qfin1; qfin2=parser.qfin2; extin=parser.extin; out1=parser.out1; qfout1=parser.qfout1; extout=parser.extout; } validateParams(); doPoundReplacement(); //Replace # with 1 and 2 adjustInterleaving(); //Make sure interleaving agrees with number of input and output files fixExtensions(); //Add or remove .gz or .bz2 as needed checkFileExistence(); //Ensure files can be read and written checkStatics(); //Adjust file-related static fields as needed for this program //Create output FileFormat objects ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, ordered); fffilter=FileFormat.testInput(filterFile, FileFormat.FASTA, null, true, true); ffref=FileFormat.testInput(refFile, FileFormat.FASTA, null, true, true); //Create input FileFormat objects ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true); if(fffilter==null){ filter=null; }else{ filter=loadFilter(fffilter, k); } loadRef(); } /*--------------------------------------------------------------*/ /*---------------- Initialization Helpers ----------------*/ /*--------------------------------------------------------------*/ /** Parse arguments from the command line */ private Parser parse(String[] args){ //Create a parser object Parser parser=new Parser(); //Set any necessary Parser defaults here //parser.foo=bar; //Parse each argument for(int i=0; i<args.length; i++){ String arg=args[i]; //Break arguments into their constituent parts, in the form of "a=b" String[] split=arg.split("="); String a=split[0].toLowerCase(); String b=split.length>1 ? split[1] : null; if(b!=null && b.equalsIgnoreCase("null")){b=null;} if(a.equals("verbose")){ verbose=Parse.parseBoolean(b); }else if(a.equals("ordered")){ ordered=Parse.parseBoolean(b); }else if(a.equals("filter")){ filterFile=b; }else if(a.equals("ref")){ refFile=b; }else if(a.equals("parse_flag_goes_here")){ long fake_variable=Parse.parseKMG(b); //Set a variable here }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser //do nothing }else{ outstream.println("Unknown parameter "+args[i]); assert(false) : "Unknown parameter "+args[i]; } } return parser; } /** Replace # with 1 and 2 in headers */ private void doPoundReplacement(){ //Do input file # replacement if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ in2=in1.replace("#", "2"); in1=in1.replace("#", "1"); } //Ensure there is an input file if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} } /** Add or remove .gz or .bz2 as needed */ private void fixExtensions(){ in1=Tools.fixExtension(in1); in2=Tools.fixExtension(in2); qfin1=Tools.fixExtension(qfin1); qfin2=Tools.fixExtension(qfin2); } /** Ensure files can be read and written */ private void checkFileExistence(){ //Ensure output files can be written if(!Tools.testOutputFiles(overwrite, append, false, out1)){ outstream.println((out1==null)+", "+out1); throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n"); } //Ensure input files can be read if(!Tools.testInputFiles(false, true, in1, in2, filterFile, refFile)){ throw new RuntimeException("\nCan't read some input files.\n"); } //Ensure that no file was specified multiple times if(!Tools.testForDuplicateFiles(true, in1, in2, out1, filterFile, refFile)){ throw new RuntimeException("\nSome file names were specified multiple times.\n"); } assert(refFile!=null); } /** Make sure interleaving agrees with number of input and output files */ private void adjustInterleaving(){ //Adjust interleaved detection based on the number of input files if(in2!=null){ if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");} FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; } //Adjust interleaved settings based on number of output files if(!setInterleaved){ assert(in1!=null) : "\nin1="+in1+"\nin2="+in2+"\nout1="+out1+"\n"; if(in2!=null){ //If there are 2 input streams. FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED); } } } /** Adjust file-related static fields as needed for this program */ private static void checkStatics(){ //Adjust the number of threads for input file reading if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ ByteFile.FORCE_MODE_BF2=true; } assert(FastaReadInputStream.settingsOK()); } /** Ensure parameter ranges are within bounds and required parameters are set */ private boolean validateParams(){ // assert(minfoo>0 && minfoo<=maxfoo) : minfoo+", "+maxfoo; assert(false) : "TODO"; return true; } /*--------------------------------------------------------------*/ /*---------------- Outer Methods ----------------*/ /*--------------------------------------------------------------*/ /** Create read streams and process all data */ void process(Timer t){ //Turn off read validation in the input threads to increase speed final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR; Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4; //Create a read input stream final ConcurrentReadInputStream cris=makeCris(); //Optionally create a read output stream final ConcurrentReadOutputStream ros=makeCros(cris.paired()); //Reset counters readsProcessed=readsOut=0; basesProcessed=basesOut=0; //Process the reads in separate threads spawnThreads(cris, ros); if(verbose){outstream.println("Finished; closing streams.");} //Write anything that was accumulated by ReadStats errorState|=ReadStats.writeAll(); //Close the read streams errorState|=ReadWrite.closeStreams(cris, ros); //Reset read validation Read.VALIDATE_IN_CONSTRUCTOR=vic; //Report timing and results t.stop(); outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false)); //Throw an exception of there was an error in a thread if(errorState){ throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); } } private void loadRef(){ ArrayList<Read> reads=ReadInputStream.toReads(ffref, -1); ref0=reads.get(0).bases; ref=new byte[ref0.length+2*padding]; for(int i=0, j=-padding; i<ref.length; i++, j++){ byte b=(j>=0 && j<ref0.length ? ref0[j] : (byte)'N'); ref[i]=b; } queues=new PriorityQueue[1+ref.length/queueWidth]; for(int i=0; i<queues.length; i++){ queues[i]=new PriorityQueue<Alignment>(queueLen); } } public static LongHashSet loadFilter(FileFormat ff, int k){ if(ff==null){return null;} ArrayList<Read> reads=ReadInputStream.toReads(ff, -1); if(reads==null || reads.size()==0){return null;} LongHashSet set=new LongHashSet(4096); final int shift=2*k; final int shift2=shift-2; final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); int len=0; long kmer=0, rkmer=0; for(Read r : reads){ final byte[] bases=r.bases; for(byte b : bases) { long x=AminoAcid.baseToNumber[b]; long x2=AminoAcid.baseToComplementNumber[b]; kmer=((kmer<<2)|x)&mask; rkmer=((rkmer>>>2)|(x2<<shift2))&mask; if(x>=0){ len++; if(len>=k){ set.add(Tools.max(kmer, rkmer)); } }else{ len=0; kmer=rkmer=0; } } } return set; } public static boolean passesFilter(Read r, int k, LongHashSet set){ if(r==null) {return false;} if(set==null){return true;} final int shift=2*k; final int shift2=shift-2; final long mask=(shift>63 ? -1L : ~((-1L)<<shift)); int len=0; long kmer=0, rkmer=0; final byte[] bases=r.bases; for(byte b : bases) { long x=AminoAcid.baseToNumber[b]; long x2=AminoAcid.baseToComplementNumber[b]; kmer=((kmer<<2)|x)&mask; rkmer=((rkmer>>>2)|(x2<<shift2))&mask; if(x>=0){ len++; if(len>=k){ long key=Tools.max(kmer, rkmer); if(set.contains(key)){return true;} } }else{ len=0; kmer=rkmer=0; } } return false; } private ConcurrentReadInputStream makeCris(){ ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, qfin1, qfin2); cris.start(); //Start the stream if(verbose){outstream.println("Started cris");} boolean paired=cris.paired(); if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));} return cris; } private ConcurrentReadOutputStream makeCros(boolean pairedInput){ if(ffout1==null){return null;} //Select output buffer size based on whether it needs to be ordered final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8); final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ffout1, null, qfout1, null, buff, null, false); ros.start(); //Start the stream return ros; } /*--------------------------------------------------------------*/ /*---------------- Thread Management ----------------*/ /*--------------------------------------------------------------*/ /** Spawn process threads */ private void spawnThreads(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros){ //Do anything necessary prior to processing //Determine how many threads may be used final int threads=Shared.threads(); //Fill a list with ProcessThreads ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads); for(int i=0; i<threads; i++){ alpt.add(new ProcessThread(cris, i)); } //Start the threads and wait for them to finish boolean success=ThreadWaiter.startAndWait(alpt, this); errorState&=!success; //Do anything necessary after processing assert(false) : "TODO: Make consensus and write it?"; } @Override public final void accumulate(ProcessThread pt){ readsProcessed+=pt.readsProcessedT; basesProcessed+=pt.basesProcessedT; readsOut+=pt.readsOutT; basesOut+=pt.basesOutT; errorState|=(!pt.success); for(int i=0; i<queues.length; i++){ PriorityQueue<Alignment> q=queues[i]; PriorityQueue<Alignment> qt=pt.queuesT[i]; for(Alignment a : qt){ addToQueue(a, q); } } } @Override public final boolean success(){return !errorState;} /*--------------------------------------------------------------*/ /*---------------- Inner Methods ----------------*/ /*--------------------------------------------------------------*/ boolean addToQueue(Alignment best, PriorityQueue<Alignment>[] queues){ int start=best.start; int qnum=start/queueWidth; PriorityQueue<Alignment> queue=queues[qnum]; return addToQueue(best, queue); } boolean addToQueue(Alignment best, PriorityQueue<Alignment> queue){ if(queue.size()<queueLen){queue.add(best);} else{ Alignment bottom=queue.peek(); if(bottom.compareTo(best)>=0){return false;} queue.poll(); queue.add(best); } return true; } /*--------------------------------------------------------------*/ /*---------------- Inner Classes ----------------*/ /*--------------------------------------------------------------*/ /** This class is static to prevent accidental writing to shared variables. * It is safe to remove the static modifier. */ class ProcessThread extends Thread { //Constructor ProcessThread(final ConcurrentReadInputStream cris_, final int tid_){ cris=cris_; tid=tid_; queuesT=new PriorityQueue[1+ref.length/queueWidth]; for(int i=0; i<queuesT.length; i++){ queuesT[i]=new PriorityQueue<Alignment>(queueLen); } } //Called by start() @Override public void run(){ //Do anything necessary prior to processing //Process the reads processInner(); //Do anything necessary after processing //Indicate successful exit status success=true; } /** Iterate through the reads */ void processInner(){ //Grab the first ListNum of reads ListNum<Read> ln=cris.nextList(); //Check to ensure pairing is as expected if(ln!=null && !ln.isEmpty()){ Read r=ln.get(0); // assert(ffin1.samOrBam() || (r.mate!=null)==cris.paired()); //Disabled due to non-static access } //As long as there is a nonempty read list... while(ln!=null && ln.size()>0){ // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access processList(ln); //Notify the input stream that the list was used cris.returnList(ln); // if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access //Fetch a new list ln=cris.nextList(); } //Notify the input stream that the final list was used if(ln!=null){ cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); } } void processList(ListNum<Read> ln){ //Grab the actual read list from the ListNum final ArrayList<Read> reads=ln.list; //Loop through each read in the list for(int idx=0; idx<reads.size(); idx++){ final Read r1=reads.get(idx); final Read r2=r1.mate; //Validate reads in worker threads if(!r1.validated()){r1.validate(true);} if(r2!=null && !r2.validated()){r2.validate(true);} //Track the initial length for statistics final int initialLength1=r1.length(); final int initialLength2=r1.mateLength(); //Increment counters readsProcessedT+=r1.pairCount(); basesProcessedT+=initialLength1+initialLength2; { //Reads are processed in this block. processReadPair(r1, r2); // if(!keep){reads.set(idx, null);} // else{ // readsOutT+=r1.pairCount(); // basesOutT+=r1.pairLength(); // } } } //Output reads to the output stream // if(ros!=null){ros.add(reads, ln.id);} } /** * Process a read or a read pair. * @param r1 Read 1 * @param r2 Read 2 (may be null) * @return True if the reads should be kept, false if they should be discarded. */ void processReadPair(final Read r1, final Read r2){ boolean pass=passesFilter(r1, k, filter) || passesFilter(r2, k, filter); if(!pass){return;} processRead(r1); processRead(r2); } void processRead(final Read r){ Alignment plus=new Alignment(r); plus.align(ref); r.reverseComplement(); Alignment minus=new Alignment(r); minus.align(ref); Alignment best=null; if(plus.id>=minus.id){ r.reverseComplement(); best=plus; }else{ best=minus; } if(best.id<minID) {return;} addToQueue(best, queuesT); } /** Number of reads processed by this thread */ protected long readsProcessedT=0; /** Number of bases processed by this thread */ protected long basesProcessedT=0; /** Number of reads retained by this thread */ protected long readsOutT=0; /** Number of bases retained by this thread */ protected long basesOutT=0; /** True only if this thread has completed successfully */ boolean success=false; private PriorityQueue<Alignment>[] queuesT; /** Shared input stream */ private final ConcurrentReadInputStream cris; /** Thread ID */ final int tid; } /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ /** Primary input file path */ private String in1=null; /** Secondary input file path */ private String in2=null; private String qfin1=null; private String qfin2=null; /** Primary output file path */ private String out1=null; private String qfout1=null; private String filterFile; private String refFile; /** Override input file extension */ private String extin=null; /** Override output file extension */ private String extout=null; /** Whether interleaved was explicitly set. */ private boolean setInterleaved=false; /** Original ref */ private byte[] ref0; /** Padded ref */ private byte[] ref; private int padding=100; private int queueLen=20; private int queueWidth=20; private float minID=0.4f; private PriorityQueue<Alignment>[] queues; /*--------------------------------------------------------------*/ /** Number of reads processed */ protected long readsProcessed=0; /** Number of bases processed */ protected long basesProcessed=0; /** Number of reads retained */ protected long readsOut=0; /** Number of bases retained */ protected long basesOut=0; /** Quit after processing this many input reads; -1 means no limit */ private long maxReads=-1; /*--------------------------------------------------------------*/ /*---------------- Final Fields ----------------*/ /*--------------------------------------------------------------*/ /** Primary input file */ private final FileFormat ffin1; /** Secondary input file */ private final FileFormat ffin2; /** Filter input file */ private final FileFormat fffilter; /** Ref input file */ private final FileFormat ffref; /** Primary output file */ private final FileFormat ffout1; private final LongHashSet filter; private final int k=31; /*--------------------------------------------------------------*/ /*---------------- Common Fields ----------------*/ /*--------------------------------------------------------------*/ /** Print status messages to this output stream */ private PrintStream outstream=System.err; /** Print verbose messages */ public static boolean verbose=false; /** True if an error was encountered */ public boolean errorState=false; /** Overwrite existing output files */ private boolean overwrite=false; /** Append to existing output files */ private boolean append=false; /** Reads are output in input order */ private boolean ordered=false; }