comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/PivotSet.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package clump;
2
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6
7 import bloom.KCountArray;
8 import bloom.ReadCounter;
9 import fileIO.ByteFile;
10 import fileIO.FileFormat;
11 import fileIO.ReadWrite;
12 import jgi.BBMerge;
13 import shared.Parse;
14 import shared.Parser;
15 import shared.PreParser;
16 import shared.ReadStats;
17 import shared.Shared;
18 import shared.Timer;
19 import shared.Tools;
20 import stream.ConcurrentReadInputStream;
21 import stream.FASTQ;
22 import stream.FastaReadInputStream;
23 import stream.Read;
24 import structures.ListNum;
25
26 /**
27 * Reduces reads to their feature kmer.
28 * @author Brian Bushnell
29 * @date August 19, 2016
30 *
31 */
32 public class PivotSet {
33
34 /*--------------------------------------------------------------*/
35 /*---------------- Static Methods ----------------*/
36 /*--------------------------------------------------------------*/
37
38 /**
39 * Code entrance from the command line.
40 * @param args Command line arguments
41 */
42 public static void main(String[] args){
43 makeSet(args);
44 }
45
46 public static KCountArray makeSet(String[] args){
47 final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ;
48 Timer t=new Timer();
49 PivotSet x=new PivotSet(args);
50 KCountArray kca=x.process(t, false);
51 ReadWrite.USE_PIGZ=pigz;
52 ReadWrite.USE_UNPIGZ=unpigz;
53
54 //Close the print stream if it was redirected
55 Shared.closeStream(x.outstream);
56
57 return kca;
58 }
59
60 /*--------------------------------------------------------------*/
61 /*---------------- Initialization ----------------*/
62 /*--------------------------------------------------------------*/
63
64 /**
65 * Constructor.
66 * @param args Command line arguments
67 */
68 public PivotSet(String[] args){
69
70 {//Preparse block for help, config files, and outstream
71 PreParser pp=new PreParser(args, getClass(), false);
72 args=pp.args;
73 outstream=pp.outstream;
74 }
75
76 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
77 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
78
79 Parser parser=new Parser();
80 for(int i=0; i<args.length; i++){
81 String arg=args[i];
82 String[] split=arg.split("=");
83 String a=split[0].toLowerCase();
84 String b=split.length>1 ? split[1] : null;
85
86 if(parser.parse(arg, a, b)){
87 //do nothing
88 }else if(a.equals("verbose")){
89 verbose=KmerComparator.verbose=Parse.parseBoolean(b);
90 }else if(a.equals("parse_flag_goes_here")){
91 //Set a variable here
92 }else if(a.equals("k")){
93 k=Integer.parseInt(b);
94 assert(k>0 && k<32);
95 }else if(a.equals("ecco")){
96 ecco=Parse.parseBoolean(b);
97 }else if(a.equals("rename") || a.equals("addname")){
98 //do nothing
99 }else if(a.equals("rcomp") || a.equals("reversecomplement")){
100 //do nothing
101 }else if(a.equals("condense") || a.equals("consensus")){
102 //do nothing
103 }else if(a.equals("mincount") || a.equals("consensus")){
104 minCount=Integer.parseInt(b);
105 }else if(a.equals("correct") || a.equals("ecc")){
106 //do nothing
107 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){
108 //do nothing
109 }else if(a.equals("seed")){
110 KmerComparator.defaultSeed=Long.parseLong(b);
111 }else if(a.equals("hashes")){
112 KmerComparator.setHashes(Integer.parseInt(b));
113 }else{
114 outstream.println("Unknown parameter "+args[i]);
115 assert(false) : "Unknown parameter "+args[i];
116 // throw new RuntimeException("Unknown parameter "+args[i]);
117 }
118 }
119
120 {//Process parser fields
121 Parser.processQuality();
122
123 maxReads=parser.maxReads;
124
125 in1=parser.in1;
126 in2=parser.in2;
127
128 extin=parser.extin;
129 }
130
131 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
132 in2=in1.replace("#", "2");
133 in1=in1.replace("#", "1");
134 }
135 if(in2!=null){
136 if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");}
137 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
138 }
139
140 assert(FastaReadInputStream.settingsOK());
141
142 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
143 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
144 ByteFile.FORCE_MODE_BF2=true;
145 }
146
147 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
148 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true);
149 }
150
151
152 /*--------------------------------------------------------------*/
153 /*---------------- Outer Methods ----------------*/
154 /*--------------------------------------------------------------*/
155
156 private static long getCells(double fraction, int cbits){
157 final long memory=Runtime.getRuntime().maxMemory();
158 final long usable=(long)Tools.max(((memory-96000000)*.73), memory*0.45);
159 final double filterMem=usable*fraction;
160 return (long)((filterMem*8)/cbits);
161 }
162
163 /** Create read streams and process all data */
164 public KCountArray process(Timer t, boolean amino){
165 int cbits=2;
166 while((1L<<cbits)<=minCount){cbits*=2;}
167 int filterHashes=2;
168 float fraction=0.1f;
169 long cells=getCells(fraction, cbits);
170 ReadCounter rc=new ReadCounter(k, true, ecco, false, amino);
171 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, maxReads, 1, 1, 1, 1, null, 0);
172
173 final ConcurrentReadInputStream cris;
174 {
175 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null);
176 cris.start();
177 if(verbose){outstream.println("Started cris");}
178 }
179
180 readsProcessed=0;
181 basesProcessed=0;
182
183 //Process the read stream
184 processInner(cris, kca);
185
186 if(verbose){outstream.println("Finished; closing streams.");}
187
188 errorState|=ReadStats.writeAll();
189 errorState|=ReadWrite.closeStreams(cris);
190
191 t.stop();
192
193 outstream.println("Made filter: \t"+kca.toShortString(filterHashes));
194 outstream.println("Estimated pivots: \t"+(long)kca.estimateUniqueKmers(filterHashes));
195 outstream.println("Estimated pivots >1x: \t"+(long)kca.estimateUniqueKmers(filterHashes, minCount));
196
197 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
198
199 if(errorState){
200 Clumpify.sharedErrorState=true;
201 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
202 }
203 return kca;
204 }
205
206 /** Manage threads */
207 public static KCountArray makeKcaStatic(final ConcurrentReadInputStream cris, int k, int minCount, boolean amino){
208
209 KmerComparator kc=new KmerComparator(k, false, false);
210 int cbits=2;
211 while((1L<<cbits)<=minCount){cbits*=2;}
212 int filterHashes=2;
213 float fraction=0.1f;
214 long cells=getCells(fraction, cbits);
215 ReadCounter rc=new ReadCounter(k, true, false, false, amino);
216 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, -1, 1, 1, 1, 1, null, 0);
217
218 if(verbose){System.err.println("Making hash threads.");}
219 final int threads=Shared.threads();
220 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
221 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, false));}
222
223 if(verbose){System.err.println("Starting threads.");}
224 for(HashThread ht : alht){ht.start();}
225
226 if(verbose){System.err.println("Waiting for threads.");}
227 /* Wait for threads to die */
228 for(HashThread ht : alht){
229
230 /* Wait for a thread to die */
231 while(ht.getState()!=Thread.State.TERMINATED){
232 try {
233 ht.join();
234 } catch (InterruptedException e) {
235 e.printStackTrace();
236 }
237 }
238 }
239 kca.shutdown();
240 return kca;
241 }
242
243 /** Manage threads */
244 public void processInner(final ConcurrentReadInputStream cris, KCountArray kca){
245 if(verbose){outstream.println("Making comparator.");}
246 KmerComparator kc=new KmerComparator(k, false, false);
247
248 if(verbose){outstream.println("Making hash threads.");}
249 final int threads=Shared.threads();
250 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
251 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, ecco));}
252
253 if(verbose){outstream.println("Starting threads.");}
254 for(HashThread ht : alht){ht.start();}
255
256 if(verbose){outstream.println("Waiting for threads.");}
257 /* Wait for threads to die */
258 for(HashThread ht : alht){
259
260 /* Wait for a thread to die */
261 while(ht.getState()!=Thread.State.TERMINATED){
262 try {
263 ht.join();
264 } catch (InterruptedException e) {
265 e.printStackTrace();
266 }
267 }
268 readsProcessed+=ht.readsProcessedT;
269 basesProcessed+=ht.basesProcessedT;
270 }
271 kca.shutdown();
272 }
273
274 /*--------------------------------------------------------------*/
275 /*---------------- Inner Classes ----------------*/
276 /*--------------------------------------------------------------*/
277
278 private static class HashThread extends Thread{
279
280 HashThread(ConcurrentReadInputStream cris_, KmerComparator kc_, KCountArray kca_, boolean ecco_){
281 cris=cris_;
282 kc=kc_;
283 kca=kca_;
284 ecco=ecco_;
285 }
286
287 @Override
288 public void run(){
289
290 ListNum<Read> ln=cris.nextList();
291 ArrayList<Read> reads=(ln!=null ? ln.list : null);
292
293 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
294 for(Read r1 : reads){
295 Read r2=r1.mate;
296 readsProcessedT+=r1.pairCount();
297 basesProcessedT+=r1.pairLength();
298 if(ecco && r2!=null){
299 if(r2!=null){BBMerge.findOverlapStrict(r1, r2, true);}
300 }
301 {
302 final long kmer=kc.hash(r1, null, 0, false);
303 if(kmer>=0){
304 kca.increment(kmer);
305 }
306 }
307 if(r2!=null){
308 final long kmer=kc.hash(r2, null, 0, false);
309 if(kmer>=0){
310 kca.increment(kmer);
311 }
312 }
313 }
314 cris.returnList(ln);
315 ln=cris.nextList();
316 reads=(ln!=null ? ln.list : null);
317 }
318 if(ln!=null){
319 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
320 }
321 }
322
323 final ConcurrentReadInputStream cris;
324 final KmerComparator kc;
325 final KCountArray kca;
326 final boolean ecco;
327
328 protected long readsProcessedT=0;
329 protected long basesProcessedT=0;
330 }
331
332 /*--------------------------------------------------------------*/
333 /*---------------- Inner Methods ----------------*/
334 /*--------------------------------------------------------------*/
335
336 /*--------------------------------------------------------------*/
337 /*---------------- Fields ----------------*/
338 /*--------------------------------------------------------------*/
339
340 private int k=31;
341 private int minCount=2;
342
343 /*--------------------------------------------------------------*/
344 /*---------------- I/O Fields ----------------*/
345 /*--------------------------------------------------------------*/
346
347 private String in1=null;
348 private String in2=null;
349
350 private String extin=null;
351
352 /*--------------------------------------------------------------*/
353
354 protected long readsProcessed=0;
355 protected long basesProcessed=0;
356
357 private long maxReads=-1;
358 private boolean ecco=false;
359
360 /*--------------------------------------------------------------*/
361 /*---------------- Final Fields ----------------*/
362 /*--------------------------------------------------------------*/
363
364 private final FileFormat ffin1;
365 private final FileFormat ffin2;
366
367 /*--------------------------------------------------------------*/
368 /*---------------- Common Fields ----------------*/
369 /*--------------------------------------------------------------*/
370
371 private PrintStream outstream=System.err;
372 public static boolean verbose=false;
373 public boolean errorState=false;
374
375 }