jpayne@68: package clump; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: jpayne@68: import bloom.KCountArray; jpayne@68: import bloom.ReadCounter; jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import jgi.BBMerge; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.ConcurrentReadInputStream; jpayne@68: import stream.FASTQ; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import stream.Read; jpayne@68: import structures.ListNum; jpayne@68: jpayne@68: /** jpayne@68: * Reduces reads to their feature kmer. jpayne@68: * @author Brian Bushnell jpayne@68: * @date August 19, 2016 jpayne@68: * jpayne@68: */ jpayne@68: public class PivotSet { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Static Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: makeSet(args); jpayne@68: } jpayne@68: jpayne@68: public static KCountArray makeSet(String[] args){ jpayne@68: final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ; jpayne@68: Timer t=new Timer(); jpayne@68: PivotSet x=new PivotSet(args); jpayne@68: KCountArray kca=x.process(t, false); jpayne@68: ReadWrite.USE_PIGZ=pigz; jpayne@68: ReadWrite.USE_UNPIGZ=unpigz; jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: jpayne@68: return kca; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public PivotSet(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: jpayne@68: Parser parser=new Parser(); jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(parser.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("verbose")){ jpayne@68: verbose=KmerComparator.verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("parse_flag_goes_here")){ jpayne@68: //Set a variable here jpayne@68: }else if(a.equals("k")){ jpayne@68: k=Integer.parseInt(b); jpayne@68: assert(k>0 && k<32); jpayne@68: }else if(a.equals("ecco")){ jpayne@68: ecco=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("rename") || a.equals("addname")){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("rcomp") || a.equals("reversecomplement")){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("condense") || a.equals("consensus")){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("mincount") || a.equals("consensus")){ jpayne@68: minCount=Integer.parseInt(b); jpayne@68: }else if(a.equals("correct") || a.equals("ecc")){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("seed")){ jpayne@68: KmerComparator.defaultSeed=Long.parseLong(b); jpayne@68: }else if(a.equals("hashes")){ jpayne@68: KmerComparator.setHashes(Integer.parseInt(b)); jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: {//Process parser fields jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: maxReads=parser.maxReads; jpayne@68: jpayne@68: in1=parser.in1; jpayne@68: in2=parser.in2; jpayne@68: jpayne@68: extin=parser.extin; jpayne@68: } jpayne@68: jpayne@68: if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ jpayne@68: in2=in1.replace("#", "2"); jpayne@68: in1=in1.replace("#", "1"); jpayne@68: } jpayne@68: if(in2!=null){ jpayne@68: if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");} jpayne@68: FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: jpayne@68: if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); jpayne@68: ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true); jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private static long getCells(double fraction, int cbits){ jpayne@68: final long memory=Runtime.getRuntime().maxMemory(); jpayne@68: final long usable=(long)Tools.max(((memory-96000000)*.73), memory*0.45); jpayne@68: final double filterMem=usable*fraction; jpayne@68: return (long)((filterMem*8)/cbits); jpayne@68: } jpayne@68: jpayne@68: /** Create read streams and process all data */ jpayne@68: public KCountArray process(Timer t, boolean amino){ jpayne@68: int cbits=2; jpayne@68: while((1L<1x: \t"+(long)kca.estimateUniqueKmers(filterHashes, minCount)); jpayne@68: jpayne@68: outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); jpayne@68: jpayne@68: if(errorState){ jpayne@68: Clumpify.sharedErrorState=true; jpayne@68: throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); jpayne@68: } jpayne@68: return kca; jpayne@68: } jpayne@68: jpayne@68: /** Manage threads */ jpayne@68: public static KCountArray makeKcaStatic(final ConcurrentReadInputStream cris, int k, int minCount, boolean amino){ jpayne@68: jpayne@68: KmerComparator kc=new KmerComparator(k, false, false); jpayne@68: int cbits=2; jpayne@68: while((1L< alht=new ArrayList(threads); jpayne@68: for(int i=0; i alht=new ArrayList(threads); jpayne@68: for(int i=0; i ln=cris.nextList(); jpayne@68: ArrayList reads=(ln!=null ? ln.list : null); jpayne@68: jpayne@68: while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning jpayne@68: for(Read r1 : reads){ jpayne@68: Read r2=r1.mate; jpayne@68: readsProcessedT+=r1.pairCount(); jpayne@68: basesProcessedT+=r1.pairLength(); jpayne@68: if(ecco && r2!=null){ jpayne@68: if(r2!=null){BBMerge.findOverlapStrict(r1, r2, true);} jpayne@68: } jpayne@68: { jpayne@68: final long kmer=kc.hash(r1, null, 0, false); jpayne@68: if(kmer>=0){ jpayne@68: kca.increment(kmer); jpayne@68: } jpayne@68: } jpayne@68: if(r2!=null){ jpayne@68: final long kmer=kc.hash(r2, null, 0, false); jpayne@68: if(kmer>=0){ jpayne@68: kca.increment(kmer); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: cris.returnList(ln); jpayne@68: ln=cris.nextList(); jpayne@68: reads=(ln!=null ? ln.list : null); jpayne@68: } jpayne@68: if(ln!=null){ jpayne@68: cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: final ConcurrentReadInputStream cris; jpayne@68: final KmerComparator kc; jpayne@68: final KCountArray kca; jpayne@68: final boolean ecco; jpayne@68: jpayne@68: protected long readsProcessedT=0; jpayne@68: protected long basesProcessedT=0; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private int k=31; jpayne@68: private int minCount=2; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- I/O Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private String in1=null; jpayne@68: private String in2=null; jpayne@68: jpayne@68: private String extin=null; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: protected long readsProcessed=0; jpayne@68: protected long basesProcessed=0; jpayne@68: jpayne@68: private long maxReads=-1; jpayne@68: private boolean ecco=false; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private final FileFormat ffin1; jpayne@68: private final FileFormat ffin2; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public boolean errorState=false; jpayne@68: jpayne@68: }