Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/PivotSet.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/PivotSet.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,375 @@ +package clump; + +import java.io.File; +import java.io.PrintStream; +import java.util.ArrayList; + +import bloom.KCountArray; +import bloom.ReadCounter; +import fileIO.ByteFile; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import jgi.BBMerge; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.ReadStats; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import stream.ConcurrentReadInputStream; +import stream.FASTQ; +import stream.FastaReadInputStream; +import stream.Read; +import structures.ListNum; + +/** + * Reduces reads to their feature kmer. + * @author Brian Bushnell + * @date August 19, 2016 + * + */ +public class PivotSet { + + /*--------------------------------------------------------------*/ + /*---------------- Static Methods ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Code entrance from the command line. + * @param args Command line arguments + */ + public static void main(String[] args){ + makeSet(args); + } + + public static KCountArray makeSet(String[] args){ + final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ; + Timer t=new Timer(); + PivotSet x=new PivotSet(args); + KCountArray kca=x.process(t, false); + ReadWrite.USE_PIGZ=pigz; + ReadWrite.USE_UNPIGZ=unpigz; + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + + return kca; + } + + /*--------------------------------------------------------------*/ + /*---------------- Initialization ----------------*/ + /*--------------------------------------------------------------*/ + + /** + * Constructor. + * @param args Command line arguments + */ + public PivotSet(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, getClass(), false); + args=pp.args; + outstream=pp.outstream; + } + + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + + Parser parser=new Parser(); + for(int i=0; i<args.length; i++){ + String arg=args[i]; + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(parser.parse(arg, a, b)){ + //do nothing + }else if(a.equals("verbose")){ + verbose=KmerComparator.verbose=Parse.parseBoolean(b); + }else if(a.equals("parse_flag_goes_here")){ + //Set a variable here + }else if(a.equals("k")){ + k=Integer.parseInt(b); + assert(k>0 && k<32); + }else if(a.equals("ecco")){ + ecco=Parse.parseBoolean(b); + }else if(a.equals("rename") || a.equals("addname")){ + //do nothing + }else if(a.equals("rcomp") || a.equals("reversecomplement")){ + //do nothing + }else if(a.equals("condense") || a.equals("consensus")){ + //do nothing + }else if(a.equals("mincount") || a.equals("consensus")){ + minCount=Integer.parseInt(b); + }else if(a.equals("correct") || a.equals("ecc")){ + //do nothing + }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){ + //do nothing + }else if(a.equals("seed")){ + KmerComparator.defaultSeed=Long.parseLong(b); + }else if(a.equals("hashes")){ + KmerComparator.setHashes(Integer.parseInt(b)); + }else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + // throw new RuntimeException("Unknown parameter "+args[i]); + } + } + + {//Process parser fields + Parser.processQuality(); + + maxReads=parser.maxReads; + + in1=parser.in1; + in2=parser.in2; + + extin=parser.extin; + } + + if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ + in2=in1.replace("#", "2"); + in1=in1.replace("#", "1"); + } + if(in2!=null){ + if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");} + FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; + } + + assert(FastaReadInputStream.settingsOK()); + + if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ + ByteFile.FORCE_MODE_BF2=true; + } + + ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); + ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true); + } + + + /*--------------------------------------------------------------*/ + /*---------------- Outer Methods ----------------*/ + /*--------------------------------------------------------------*/ + + private static long getCells(double fraction, int cbits){ + final long memory=Runtime.getRuntime().maxMemory(); + final long usable=(long)Tools.max(((memory-96000000)*.73), memory*0.45); + final double filterMem=usable*fraction; + return (long)((filterMem*8)/cbits); + } + + /** Create read streams and process all data */ + public KCountArray process(Timer t, boolean amino){ + int cbits=2; + while((1L<<cbits)<=minCount){cbits*=2;} + int filterHashes=2; + float fraction=0.1f; + long cells=getCells(fraction, cbits); + ReadCounter rc=new ReadCounter(k, true, ecco, false, amino); + KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, maxReads, 1, 1, 1, 1, null, 0); + + final ConcurrentReadInputStream cris; + { + cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null); + cris.start(); + if(verbose){outstream.println("Started cris");} + } + + readsProcessed=0; + basesProcessed=0; + + //Process the read stream + processInner(cris, kca); + + if(verbose){outstream.println("Finished; closing streams.");} + + errorState|=ReadStats.writeAll(); + errorState|=ReadWrite.closeStreams(cris); + + t.stop(); + + outstream.println("Made filter: \t"+kca.toShortString(filterHashes)); + outstream.println("Estimated pivots: \t"+(long)kca.estimateUniqueKmers(filterHashes)); + outstream.println("Estimated pivots >1x: \t"+(long)kca.estimateUniqueKmers(filterHashes, minCount)); + + outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); + + if(errorState){ + Clumpify.sharedErrorState=true; + throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); + } + return kca; + } + + /** Manage threads */ + public static KCountArray makeKcaStatic(final ConcurrentReadInputStream cris, int k, int minCount, boolean amino){ + + KmerComparator kc=new KmerComparator(k, false, false); + int cbits=2; + while((1L<<cbits)<=minCount){cbits*=2;} + int filterHashes=2; + float fraction=0.1f; + long cells=getCells(fraction, cbits); + ReadCounter rc=new ReadCounter(k, true, false, false, amino); + KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, -1, 1, 1, 1, 1, null, 0); + + if(verbose){System.err.println("Making hash threads.");} + final int threads=Shared.threads(); + ArrayList<HashThread> alht=new ArrayList<HashThread>(threads); + for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, false));} + + if(verbose){System.err.println("Starting threads.");} + for(HashThread ht : alht){ht.start();} + + if(verbose){System.err.println("Waiting for threads.");} + /* Wait for threads to die */ + for(HashThread ht : alht){ + + /* Wait for a thread to die */ + while(ht.getState()!=Thread.State.TERMINATED){ + try { + ht.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + kca.shutdown(); + return kca; + } + + /** Manage threads */ + public void processInner(final ConcurrentReadInputStream cris, KCountArray kca){ + if(verbose){outstream.println("Making comparator.");} + KmerComparator kc=new KmerComparator(k, false, false); + + if(verbose){outstream.println("Making hash threads.");} + final int threads=Shared.threads(); + ArrayList<HashThread> alht=new ArrayList<HashThread>(threads); + for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, ecco));} + + if(verbose){outstream.println("Starting threads.");} + for(HashThread ht : alht){ht.start();} + + if(verbose){outstream.println("Waiting for threads.");} + /* Wait for threads to die */ + for(HashThread ht : alht){ + + /* Wait for a thread to die */ + while(ht.getState()!=Thread.State.TERMINATED){ + try { + ht.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + readsProcessed+=ht.readsProcessedT; + basesProcessed+=ht.basesProcessedT; + } + kca.shutdown(); + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Classes ----------------*/ + /*--------------------------------------------------------------*/ + + private static class HashThread extends Thread{ + + HashThread(ConcurrentReadInputStream cris_, KmerComparator kc_, KCountArray kca_, boolean ecco_){ + cris=cris_; + kc=kc_; + kca=kca_; + ecco=ecco_; + } + + @Override + public void run(){ + + ListNum<Read> ln=cris.nextList(); + ArrayList<Read> reads=(ln!=null ? ln.list : null); + + while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning + for(Read r1 : reads){ + Read r2=r1.mate; + readsProcessedT+=r1.pairCount(); + basesProcessedT+=r1.pairLength(); + if(ecco && r2!=null){ + if(r2!=null){BBMerge.findOverlapStrict(r1, r2, true);} + } + { + final long kmer=kc.hash(r1, null, 0, false); + if(kmer>=0){ + kca.increment(kmer); + } + } + if(r2!=null){ + final long kmer=kc.hash(r2, null, 0, false); + if(kmer>=0){ + kca.increment(kmer); + } + } + } + cris.returnList(ln); + ln=cris.nextList(); + reads=(ln!=null ? ln.list : null); + } + if(ln!=null){ + cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); + } + } + + final ConcurrentReadInputStream cris; + final KmerComparator kc; + final KCountArray kca; + final boolean ecco; + + protected long readsProcessedT=0; + protected long basesProcessedT=0; + } + + /*--------------------------------------------------------------*/ + /*---------------- Inner Methods ----------------*/ + /*--------------------------------------------------------------*/ + + /*--------------------------------------------------------------*/ + /*---------------- Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private int k=31; + private int minCount=2; + + /*--------------------------------------------------------------*/ + /*---------------- I/O Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private String in1=null; + private String in2=null; + + private String extin=null; + + /*--------------------------------------------------------------*/ + + protected long readsProcessed=0; + protected long basesProcessed=0; + + private long maxReads=-1; + private boolean ecco=false; + + /*--------------------------------------------------------------*/ + /*---------------- Final Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private final FileFormat ffin1; + private final FileFormat ffin2; + + /*--------------------------------------------------------------*/ + /*---------------- Common Fields ----------------*/ + /*--------------------------------------------------------------*/ + + private PrintStream outstream=System.err; + public static boolean verbose=false; + public boolean errorState=false; + +}