jpayne@68: package clump; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Random; jpayne@68: jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import jgi.BBMerge; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import sort.SortByName; jpayne@68: import stream.FASTQ; jpayne@68: import stream.Read; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.Quantizer; jpayne@68: jpayne@68: /** jpayne@68: * @author Brian Bushnell jpayne@68: * @date Nov 6, 2015 jpayne@68: * jpayne@68: */ jpayne@68: public class Clumpify { jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: Timer t=new Timer(); jpayne@68: ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6); jpayne@68: jpayne@68: //Capture values of static variables that might be modified in case this is called by another class. jpayne@68: final boolean oldCQ=Read.CHANGE_QUALITY; jpayne@68: final boolean oldBgzip=ReadWrite.USE_BGZIP, oldPreferBgzip=ReadWrite.PREFER_BGZIP; jpayne@68: jpayne@68: BBMerge.changeQuality=Read.CHANGE_QUALITY=false; jpayne@68: ReadWrite.USE_BGZIP=true; jpayne@68: ReadWrite.PREFER_BGZIP=true; jpayne@68: jpayne@68: Clumpify x=new Clumpify(args); jpayne@68: x.process(t); jpayne@68: jpayne@68: //Restore values of static variables. jpayne@68: // Shared.setBuffers(oldCap); jpayne@68: // ReadWrite.ZIPLEVEL=oldZl; jpayne@68: // ReadWrite.USE_PIGZ=oldPigz; jpayne@68: ReadWrite.USE_BGZIP=oldBgzip; jpayne@68: ReadWrite.PREFER_BGZIP=oldPreferBgzip; jpayne@68: // ReadWrite.USE_UNPIGZ=oldUnpigz; jpayne@68: // ReadWrite.MAX_ZIP_THREADS=oldZipThreads; jpayne@68: BBMerge.changeQuality=Read.CHANGE_QUALITY=oldCQ; jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public Clumpify(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), true); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4; jpayne@68: jpayne@68: args2=new ArrayList(); jpayne@68: args2.add("in1"); jpayne@68: args2.add("in2"); jpayne@68: args2.add("out1"); jpayne@68: args2.add("out2"); jpayne@68: args2.add("groups"); jpayne@68: args2.add("ecco=f"); jpayne@68: args2.add("rename=f"); jpayne@68: args2.add("shortname=f"); jpayne@68: args2.add("unpair=f"); jpayne@68: args2.add("repair=f"); jpayne@68: args2.add("namesort=f"); jpayne@68: args2.add("overwrite=t"); jpayne@68: jpayne@68: String gString="auto"; jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("in") || a.equals("in1")){ jpayne@68: in1=b; jpayne@68: }else if(a.equals("in2")){ jpayne@68: in2=b; jpayne@68: }else if(a.equals("out") || a.equals("out1")){ jpayne@68: out1=b; jpayne@68: }else if(a.equals("out2")){ jpayne@68: out2=b; jpayne@68: }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){ jpayne@68: gString=b; jpayne@68: }else if(a.equals("delete") || a.equals("deletetemp")){ jpayne@68: delete=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("deleteinput")){ jpayne@68: deleteInput=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("usetmpdir")){ jpayne@68: useTmpdir=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("ecco")){ jpayne@68: ecco=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("compresstemp") || a.equals("ct")){ jpayne@68: if(b!=null && b.equalsIgnoreCase("auto")){forceCompressTemp=forceRawTemp=false;} jpayne@68: else{ jpayne@68: forceCompressTemp=Parse.parseBoolean(b); jpayne@68: forceRawTemp=!forceCompressTemp; jpayne@68: } jpayne@68: }else if(a.equals("tmpdir")){ jpayne@68: Shared.setTmpdir(b); jpayne@68: }else if(a.equals("rename") || a.equals("addname")){ jpayne@68: addName=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("shortname") || a.equals("shortnames")){ jpayne@68: shortName=b; jpayne@68: }else if(a.equals("seed")){ jpayne@68: KmerComparator.defaultSeed=Long.parseLong(b); jpayne@68: }else if(a.equals("hashes")){ jpayne@68: KmerComparator.setHashes(Integer.parseInt(b)); jpayne@68: }else if(a.equals("passes")){ jpayne@68: passes=Integer.parseInt(b); jpayne@68: args2.add(arg); jpayne@68: // }else if(a.equals("k")){ jpayne@68: // k=Integer.parseInt(b); jpayne@68: // args2.add(arg); jpayne@68: }else if(a.equals("border")){ jpayne@68: KmerComparator.defaultBorder=Integer.parseInt(b); jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("unpair")){ jpayne@68: unpair=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("repair")){ jpayne@68: repair=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("namesort") || a.equals("sort")){ jpayne@68: namesort=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("overwrite")){ jpayne@68: overwrite=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("v1") || a.equals("kmersort1")){ jpayne@68: boolean x=Parse.parseBoolean(b); jpayne@68: if(x){V2=V3=false;} jpayne@68: }else if(a.equals("v2") || a.equals("kmersort2")){ jpayne@68: V2=Parse.parseBoolean(b); jpayne@68: if(V2){V3=false;} jpayne@68: }else if(a.equals("v3") || a.equals("kmersort3")){ jpayne@68: V3=Parse.parseBoolean(b); jpayne@68: if(V3){V2=false;} jpayne@68: }else if(a.equals("fetchthreads")){ jpayne@68: KmerSort3.fetchThreads=Integer.parseInt(b); jpayne@68: assert(KmerSort3.fetchThreads>0) : KmerSort3.fetchThreads+"\nFetch threads must be at least 1."; jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("comparesequence")){ jpayne@68: KmerComparator.compareSequence=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("allowadjacenttiles") || a.equals("spantiles")){ jpayne@68: ReadKey.spanTilesX=ReadKey.spanTilesY=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("spanx") || a.equals("spantilesx")){ jpayne@68: ReadKey.spanTilesX=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("spany") || a.equals("spantilesy")){ jpayne@68: ReadKey.spanTilesY=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("spanadjacent") || a.equals("spanadjacentonly") || a.equals("adjacentonly") || a.equals("adjacent")){ jpayne@68: ReadKey.spanAdjacentOnly=Parse.parseBoolean(b); jpayne@68: } jpayne@68: jpayne@68: // else if(a.equals("repair")){ jpayne@68: // repair=Parse.parseBoolean(b); jpayne@68: // }else if(a.equals("namesort") || a.equals("sort")){ jpayne@68: // namesort=Parse.parseBoolean(b); jpayne@68: // } jpayne@68: jpayne@68: else if(a.equals("interleaved") || a.equals("int")){ jpayne@68: if("auto".equalsIgnoreCase(b)){FASTQ.FORCE_INTERLEAVED=!(FASTQ.TEST_INTERLEAVED=true);} jpayne@68: else{ jpayne@68: FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=Parse.parseBoolean(b); jpayne@68: System.err.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED); jpayne@68: } jpayne@68: }else if(a.equals("cq") || a.equals("changequality")){ jpayne@68: BBMerge.changeQuality=Read.CHANGE_QUALITY=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("quantize") || a.equals("quantizesticky")){ jpayne@68: quantizeQuality=Quantizer.parse(arg, a, b); jpayne@68: }else if(a.equals("lowcomplexity")){ jpayne@68: lowComplexity=Parse.parseBoolean(b); jpayne@68: } jpayne@68: jpayne@68: else if(Clump.parseStatic(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: }else if(Parser.parseQuality(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: } jpayne@68: jpayne@68: else{ jpayne@68: args2.add(arg); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: Clump.setXY(); jpayne@68: jpayne@68: KmerSplit.quantizeQuality=KmerSort1.quantizeQuality=quantizeQuality; jpayne@68: jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: assert(!unpair || !KmerComparator.mergeFirst) : "Unpair and mergefirst may not be used together."; jpayne@68: jpayne@68: if(in1==null){throw new RuntimeException("\nOne input file is required.\n");} jpayne@68: jpayne@68: if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ jpayne@68: in2=in1.replace("#", "2"); jpayne@68: in1=in1.replace("#", "1"); jpayne@68: } jpayne@68: if(out1!=null && out2==null && out1.indexOf('#')>-1){ jpayne@68: out2=out1.replace("#", "2"); jpayne@68: out1=out1.replace("#", "1"); jpayne@68: } jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: if(!Tools.testInputFiles(false, true, in1)){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: jpayne@68: // assert(false) : ReadKey.spanTiles()+", "+ReadKey.spanTilesX+", "+ReadKey.spanTilesY+", "+Clump.sortX+", "+Clump.sortY; jpayne@68: jpayne@68: autoSetGroups(gString); jpayne@68: jpayne@68: if((in2!=null || out2!=null) && groups>1){FASTQ.FORCE_INTERLEAVED=true;} //Fix for crash with twin fasta files jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Create read streams and process all data */ jpayne@68: public void process(Timer t){ jpayne@68: String[] args=args2.toArray(new String[0]); jpayne@68: args[4]="groups="+groups; jpayne@68: jpayne@68: useSharedHeader=(FileFormat.hasSamOrBamExtension(in1) && out1!=null jpayne@68: && FileFormat.hasSamOrBamExtension(out1)); jpayne@68: jpayne@68: if(groups==1){ jpayne@68: args[0]="in1="+in1; jpayne@68: args[1]="in2="+in2; jpayne@68: args[2]="out1="+out1; jpayne@68: args[3]="out2="+out2; jpayne@68: args[5]="ecco="+ecco; jpayne@68: args[6]="rename="+addName; jpayne@68: args[7]="shortname="+shortName; jpayne@68: args[8]="unpair="+unpair; jpayne@68: args[9]="repair="+repair; jpayne@68: args[10]="namesort="+namesort; jpayne@68: args[11]="ow="+overwrite; jpayne@68: KmerSort1.main(args); jpayne@68: }else{ jpayne@68: String pin1=in1, pin2=in2, temp; jpayne@68: final int conservativePasses=Clump.conservativeFlag ? passes : Tools.max(1, passes/2); jpayne@68: if(passes>1){Clump.setConservative(true);} jpayne@68: long fileMem=-1; jpayne@68: for(int pass=1; pass<=passes; pass++){ jpayne@68: if(/*passes>1 &&*/ (V2 || V3)){ jpayne@68: // System.err.println("Running pass with fileMem="+fileMem); jpayne@68: // out=(pass==passes ? out1 : getTempFname("clumpify_p"+(pass+1)+"_temp%_")); jpayne@68: temp=getTempFname("clumpify_p"+(pass+1)+"_temp%_"); jpayne@68: if(pass==passes){ jpayne@68: fileMem=runOnePass_v2(args, pass, pin1, pin2, out1, out2, fileMem); jpayne@68: }else{ jpayne@68: fileMem=runOnePass_v2(args, pass, pin1, pin2, temp, null, fileMem); jpayne@68: } jpayne@68: // System.err.println("New fileMem="+fileMem); jpayne@68: }else{ jpayne@68: // out=(pass==passes ? out1 : getTempFname("clumpify_temp_pass"+pass+"_")); jpayne@68: temp=getTempFname("clumpify_temp_pass"+pass+"_"); jpayne@68: if(pass==passes){ jpayne@68: runOnePass(args, pass, pin1, pin2, out1, out2); jpayne@68: }else{ jpayne@68: runOnePass(args, pass, pin1, pin2, temp, null); jpayne@68: } jpayne@68: } jpayne@68: pin1=temp; jpayne@68: pin2=null; jpayne@68: KmerComparator.defaultBorder=Tools.max(0, KmerComparator.defaultBorder-1); jpayne@68: KmerComparator.defaultSeed++; jpayne@68: if(pass>=conservativePasses){Clump.setConservative(false);} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(deleteInput && !sharedErrorState && out1!=null && in1!=null){ jpayne@68: try { jpayne@68: new File(in1).delete(); jpayne@68: if(in2!=null){new File(in2).delete();} jpayne@68: } catch (Exception e) { jpayne@68: System.err.println("WARNING: Failed to delete input files."); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: t.stop(); jpayne@68: System.err.println("Total time: \t"+t); jpayne@68: jpayne@68: } jpayne@68: jpayne@68: private void runOnePass(String[] args, int pass, String in1, String in2, String out1, String out2){ jpayne@68: assert(groups>1); jpayne@68: if(pass>1){ jpayne@68: ecco=false; jpayne@68: shortName="f"; jpayne@68: addName=false; jpayne@68: } jpayne@68: jpayne@68: String temp=getTempFname("clumpify_p"+pass+"_temp%_"); jpayne@68: jpayne@68: String temp2=temp.replace("%", "FINAL"); jpayne@68: final boolean externalSort=(pass==passes && (repair || namesort)); jpayne@68: jpayne@68: args[0]="in1="+in1; jpayne@68: args[1]="in2="+in2; jpayne@68: args[2]="out="+temp; jpayne@68: args[3]="out2="+null; jpayne@68: args[5]="ecco="+ecco; jpayne@68: args[6]="addname=f"; jpayne@68: args[7]="shortname="+shortName; jpayne@68: args[8]="unpair="+unpair; jpayne@68: args[9]="repair=f"; jpayne@68: args[10]="namesort=f"; jpayne@68: args[11]="ow="+overwrite; jpayne@68: KmerSplit.maxZipLevel=2; jpayne@68: KmerSplit.main(args); jpayne@68: jpayne@68: FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=false; jpayne@68: FASTQ.ASCII_OFFSET=FASTQ.ASCII_OFFSET_OUT; jpayne@68: jpayne@68: args[0]="in="+temp; jpayne@68: args[1]="in2="+null; jpayne@68: args[2]="out="+(externalSort ? temp2 : out1); jpayne@68: args[3]="out2="+(externalSort ? "null" : out2); jpayne@68: args[5]="ecco=f"; jpayne@68: args[6]="addname="+addName; jpayne@68: args[7]="shortname=f"; jpayne@68: args[8]="unpair=f"; jpayne@68: args[9]="repair="+(repair && externalSort); jpayne@68: args[10]="namesort="+(namesort && externalSort); jpayne@68: args[11]="ow="+overwrite; jpayne@68: if(unpair){ jpayne@68: FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; jpayne@68: } jpayne@68: KmerSort1.main(args); jpayne@68: jpayne@68: if(delete){ jpayne@68: for(int i=0; i1){ jpayne@68: assert(in2==null); jpayne@68: new File(in1).delete(); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(externalSort){ jpayne@68: outstream.println(); jpayne@68: String[] sortArgs=new String[] {"in="+temp2, "out="+out1, "ow="+overwrite}; jpayne@68: if(out2!=null){sortArgs=new String[] {"in="+temp2, "out="+out1, "out2="+out2, "ow="+overwrite};} jpayne@68: SortByName.main(sortArgs); jpayne@68: if(delete){new File(temp2).delete();} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private long runOnePass_v2(String[] args, int pass, String in1, String in2, String out1, String out2, long fileMem){ jpayne@68: assert(groups>1); jpayne@68: if(pass>1){ jpayne@68: ecco=false; jpayne@68: shortName="f"; jpayne@68: addName=false; jpayne@68: } jpayne@68: jpayne@68: String temp=getTempFname("clumpify_p"+pass+"_temp%_"); jpayne@68: jpayne@68: // String temp2=temp.replace("%", "FINAL"); jpayne@68: String namesorted=temp.replace("%", "namesorted_%"); jpayne@68: final boolean externalSort=(pass==passes && (repair || namesort)); jpayne@68: jpayne@68: if(pass==1){ jpayne@68: args[0]="in1="+in1; jpayne@68: args[1]="in2="+in2; jpayne@68: args[2]="out="+temp; jpayne@68: args[3]="out2="+null; jpayne@68: args[5]="ecco="+ecco; jpayne@68: args[6]="addname=f"; jpayne@68: args[7]="shortname="+shortName; jpayne@68: args[8]="unpair="+unpair; jpayne@68: args[9]="repair=f"; jpayne@68: args[10]="namesort=f"; jpayne@68: args[11]="ow="+overwrite; jpayne@68: KmerSplit.maxZipLevel=2; jpayne@68: KmerSplit.main(args); jpayne@68: fileMem=KmerSplit.lastMemProcessed; jpayne@68: jpayne@68: FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=false; jpayne@68: FASTQ.ASCII_OFFSET=FASTQ.ASCII_OFFSET_OUT; jpayne@68: } jpayne@68: jpayne@68: args[0]="in1="+(pass==1 ? temp : in1); jpayne@68: args[1]="in2="+null; jpayne@68: args[2]="out="+(externalSort ? namesorted : out1); jpayne@68: args[3]="out2="+(externalSort ? "null" : out2); jpayne@68: args[5]="ecco=f"; jpayne@68: args[6]="addname="+addName; jpayne@68: args[7]="shortname=f"; jpayne@68: args[8]="unpair=f"; jpayne@68: args[9]="repair="+(repair && externalSort); jpayne@68: args[10]="namesort="+(namesort && externalSort); jpayne@68: args[11]="ow="+overwrite; jpayne@68: if(unpair){ jpayne@68: FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; jpayne@68: } jpayne@68: if(externalSort){ jpayne@68: KmerSort.doHashAndSplit=false; jpayne@68: } jpayne@68: if(V3){ jpayne@68: KmerSort3.main(fileMem, pass, passes, args); jpayne@68: if(fileMem<1){fileMem=KmerSort3.lastMemProcessed;} jpayne@68: }else{KmerSort2.main(args);} jpayne@68: jpayne@68: if(delete){ jpayne@68: for(int i=0; i names=new ArrayList(); jpayne@68: for(int i=0; iworstCase && readEstimate args2=new ArrayList(); jpayne@68: private PrintStream outstream=System.err; jpayne@68: jpayne@68: public static boolean sharedErrorState=false; jpayne@68: jpayne@68: }