jpayne@68: package prok; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Arrays; jpayne@68: jpayne@68: import aligner.SingleStateAlignerFlat2; jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.ConcurrentReadInputStream; jpayne@68: import stream.ConcurrentReadOutputStream; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import stream.Read; jpayne@68: import structures.ListNum; jpayne@68: import template.Accumulator; jpayne@68: import template.ThreadWaiter; jpayne@68: jpayne@68: /** jpayne@68: * Splits a mix of ribosomal sequences (such as Silva) into different files per type (16S, 18S, etc). jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date November 19, 2015 jpayne@68: * jpayne@68: */ jpayne@68: public class SplitRibo implements Accumulator { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: SplitRibo x=new SplitRibo(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public SplitRibo(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables prior to parsing jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: Shared.capBufferLen(50); jpayne@68: ReadWrite.ZIPLEVEL=9; jpayne@68: jpayne@68: {//Parse the arguments jpayne@68: final Parser parser=parse(args); jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: maxReads=parser.maxReads; jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: jpayne@68: in1=parser.in1; jpayne@68: qfin1=parser.qfin1; jpayne@68: extin=parser.extin; jpayne@68: jpayne@68: outPattern=parser.out1; jpayne@68: extout=parser.extout; jpayne@68: } jpayne@68: jpayne@68: validateParams(); jpayne@68: fixExtensions(); //Add or remove .gz or .bz2 as needed jpayne@68: checkFileExistence(); //Ensure files can be read and written jpayne@68: checkStatics(); //Adjust file-related static fields as needed for this program jpayne@68: jpayne@68: //Create input FileFormat objects jpayne@68: ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); jpayne@68: jpayne@68: numTypes=sequenceTypes.length; jpayne@68: readsOut=new long[numTypes]; jpayne@68: basesOut=new long[numTypes]; jpayne@68: consensusSequences=loadConsensusSequenceFromFile(); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization Helpers ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Parse arguments from the command line */ jpayne@68: private Parser parse(String[] args){ jpayne@68: jpayne@68: //Create a parser object jpayne@68: Parser parser=new Parser(); jpayne@68: jpayne@68: //Set any necessary Parser defaults here jpayne@68: //parser.foo=bar; jpayne@68: jpayne@68: //Parse each argument jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: if(b!=null && b.equalsIgnoreCase("null")){b=null;} jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("ordered")){ jpayne@68: ordered=Parse.parseBoolean(b); jpayne@68: }else if(a.equalsIgnoreCase("minid")){ jpayne@68: minID=Float.parseFloat(b); jpayne@68: }else if(a.equalsIgnoreCase("minid2") || a.equalsIgnoreCase("refineid")){ jpayne@68: refineID=Float.parseFloat(b); jpayne@68: }else if(a.equals("out") || a.equals("pattern") || a.equals("outpattern")){ jpayne@68: parser.out1=b; jpayne@68: }else if(a.equals("type") || a.equals("types")){ jpayne@68: parseTypes(b); jpayne@68: }else if(a.equals("parse_flag_goes_here")){ jpayne@68: long fake_variable=Parse.parseKMG(b); jpayne@68: //Set a variable here jpayne@68: }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: return parser; jpayne@68: } jpayne@68: jpayne@68: private void parseTypes(String b){ jpayne@68: sequenceTypes=null; jpayne@68: if(b==null){ jpayne@68: assert(false) : "'types' flag requires a list of types, such as 'types=16S,18S'"; jpayne@68: sequenceTypes=new String[] {"Other"}; jpayne@68: }else{ jpayne@68: String[] split=b.split(","); jpayne@68: sequenceTypes=new String[split.length+1]; jpayne@68: sequenceTypes[0]="Other"; jpayne@68: for(int i=0; i2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: } jpayne@68: jpayne@68: /** Ensure parameter ranges are within bounds and required parameters are set */ jpayne@68: private boolean validateParams(){ jpayne@68: // assert(minfoo>0 && minfoo<=maxfoo) : minfoo+", "+maxfoo; jpayne@68: if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: private final Read[][] loadConsensusSequenceFromFile(){ jpayne@68: Read[][] seqs=new Read[numTypes][]; jpayne@68: m16S_index=Tools.find("m16S", sequenceTypes); jpayne@68: m18S_index=Tools.find("m18S", sequenceTypes); jpayne@68: p16S_index=Tools.find("p16S", sequenceTypes); jpayne@68: boolean stripM16S=(m16S_index>=0); jpayne@68: boolean stripM18S=(m18S_index>=0); jpayne@68: boolean stripP16S=(p16S_index>=0); jpayne@68: for(int st=1; st alpt=new ArrayList(threads); jpayne@68: for(int i=0; i ln=cris.nextList(); jpayne@68: jpayne@68: //Check to ensure pairing is as expected jpayne@68: if(ln!=null && !ln.isEmpty()){ jpayne@68: Read r=ln.get(0); jpayne@68: assert(r.mate==null); jpayne@68: } jpayne@68: jpayne@68: //As long as there is a nonempty read list... jpayne@68: while(ln!=null && ln.size()>0){ jpayne@68: // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access jpayne@68: jpayne@68: processList(ln); jpayne@68: jpayne@68: //Notify the input stream that the list was used jpayne@68: cris.returnList(ln); jpayne@68: // if(verbose){outstream.println("Returned a list.");} //Disabled due to non-static access jpayne@68: jpayne@68: //Fetch a new list jpayne@68: ln=cris.nextList(); jpayne@68: } jpayne@68: jpayne@68: //Notify the input stream that the final list was used jpayne@68: if(ln!=null){ jpayne@68: cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: void processList(ListNum ln){ jpayne@68: jpayne@68: //Grab the actual read list from the ListNum jpayne@68: final ArrayList reads=ln.list; jpayne@68: jpayne@68: @SuppressWarnings("unchecked") jpayne@68: final ArrayList[] out=new ArrayList[numTypes]; jpayne@68: for(int i=0; i list=new ArrayList(50); jpayne@68: out[i]=list; jpayne@68: } jpayne@68: jpayne@68: //Loop through each read in the list jpayne@68: for(int idx=0; idxbestID && id>=minID){ jpayne@68: bestType=type; jpayne@68: bestID=id; jpayne@68: } jpayne@68: } jpayne@68: if(bestType<1 || bestIDbestID && id>=minID){ jpayne@68: bestType=type; jpayne@68: bestID=id; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: r.obj=bestID;//If desired... in actuality, more info might be useful, like alignment length jpayne@68: return bestID