Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/PivotSet.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package clump; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 | |
7 import bloom.KCountArray; | |
8 import bloom.ReadCounter; | |
9 import fileIO.ByteFile; | |
10 import fileIO.FileFormat; | |
11 import fileIO.ReadWrite; | |
12 import jgi.BBMerge; | |
13 import shared.Parse; | |
14 import shared.Parser; | |
15 import shared.PreParser; | |
16 import shared.ReadStats; | |
17 import shared.Shared; | |
18 import shared.Timer; | |
19 import shared.Tools; | |
20 import stream.ConcurrentReadInputStream; | |
21 import stream.FASTQ; | |
22 import stream.FastaReadInputStream; | |
23 import stream.Read; | |
24 import structures.ListNum; | |
25 | |
26 /** | |
27 * Reduces reads to their feature kmer. | |
28 * @author Brian Bushnell | |
29 * @date August 19, 2016 | |
30 * | |
31 */ | |
32 public class PivotSet { | |
33 | |
34 /*--------------------------------------------------------------*/ | |
35 /*---------------- Static Methods ----------------*/ | |
36 /*--------------------------------------------------------------*/ | |
37 | |
38 /** | |
39 * Code entrance from the command line. | |
40 * @param args Command line arguments | |
41 */ | |
42 public static void main(String[] args){ | |
43 makeSet(args); | |
44 } | |
45 | |
46 public static KCountArray makeSet(String[] args){ | |
47 final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ; | |
48 Timer t=new Timer(); | |
49 PivotSet x=new PivotSet(args); | |
50 KCountArray kca=x.process(t, false); | |
51 ReadWrite.USE_PIGZ=pigz; | |
52 ReadWrite.USE_UNPIGZ=unpigz; | |
53 | |
54 //Close the print stream if it was redirected | |
55 Shared.closeStream(x.outstream); | |
56 | |
57 return kca; | |
58 } | |
59 | |
60 /*--------------------------------------------------------------*/ | |
61 /*---------------- Initialization ----------------*/ | |
62 /*--------------------------------------------------------------*/ | |
63 | |
64 /** | |
65 * Constructor. | |
66 * @param args Command line arguments | |
67 */ | |
68 public PivotSet(String[] args){ | |
69 | |
70 {//Preparse block for help, config files, and outstream | |
71 PreParser pp=new PreParser(args, getClass(), false); | |
72 args=pp.args; | |
73 outstream=pp.outstream; | |
74 } | |
75 | |
76 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
77 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
78 | |
79 Parser parser=new Parser(); | |
80 for(int i=0; i<args.length; i++){ | |
81 String arg=args[i]; | |
82 String[] split=arg.split("="); | |
83 String a=split[0].toLowerCase(); | |
84 String b=split.length>1 ? split[1] : null; | |
85 | |
86 if(parser.parse(arg, a, b)){ | |
87 //do nothing | |
88 }else if(a.equals("verbose")){ | |
89 verbose=KmerComparator.verbose=Parse.parseBoolean(b); | |
90 }else if(a.equals("parse_flag_goes_here")){ | |
91 //Set a variable here | |
92 }else if(a.equals("k")){ | |
93 k=Integer.parseInt(b); | |
94 assert(k>0 && k<32); | |
95 }else if(a.equals("ecco")){ | |
96 ecco=Parse.parseBoolean(b); | |
97 }else if(a.equals("rename") || a.equals("addname")){ | |
98 //do nothing | |
99 }else if(a.equals("rcomp") || a.equals("reversecomplement")){ | |
100 //do nothing | |
101 }else if(a.equals("condense") || a.equals("consensus")){ | |
102 //do nothing | |
103 }else if(a.equals("mincount") || a.equals("consensus")){ | |
104 minCount=Integer.parseInt(b); | |
105 }else if(a.equals("correct") || a.equals("ecc")){ | |
106 //do nothing | |
107 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){ | |
108 //do nothing | |
109 }else if(a.equals("seed")){ | |
110 KmerComparator.defaultSeed=Long.parseLong(b); | |
111 }else if(a.equals("hashes")){ | |
112 KmerComparator.setHashes(Integer.parseInt(b)); | |
113 }else{ | |
114 outstream.println("Unknown parameter "+args[i]); | |
115 assert(false) : "Unknown parameter "+args[i]; | |
116 // throw new RuntimeException("Unknown parameter "+args[i]); | |
117 } | |
118 } | |
119 | |
120 {//Process parser fields | |
121 Parser.processQuality(); | |
122 | |
123 maxReads=parser.maxReads; | |
124 | |
125 in1=parser.in1; | |
126 in2=parser.in2; | |
127 | |
128 extin=parser.extin; | |
129 } | |
130 | |
131 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ | |
132 in2=in1.replace("#", "2"); | |
133 in1=in1.replace("#", "1"); | |
134 } | |
135 if(in2!=null){ | |
136 if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");} | |
137 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; | |
138 } | |
139 | |
140 assert(FastaReadInputStream.settingsOK()); | |
141 | |
142 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} | |
143 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
144 ByteFile.FORCE_MODE_BF2=true; | |
145 } | |
146 | |
147 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); | |
148 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true); | |
149 } | |
150 | |
151 | |
152 /*--------------------------------------------------------------*/ | |
153 /*---------------- Outer Methods ----------------*/ | |
154 /*--------------------------------------------------------------*/ | |
155 | |
156 private static long getCells(double fraction, int cbits){ | |
157 final long memory=Runtime.getRuntime().maxMemory(); | |
158 final long usable=(long)Tools.max(((memory-96000000)*.73), memory*0.45); | |
159 final double filterMem=usable*fraction; | |
160 return (long)((filterMem*8)/cbits); | |
161 } | |
162 | |
163 /** Create read streams and process all data */ | |
164 public KCountArray process(Timer t, boolean amino){ | |
165 int cbits=2; | |
166 while((1L<<cbits)<=minCount){cbits*=2;} | |
167 int filterHashes=2; | |
168 float fraction=0.1f; | |
169 long cells=getCells(fraction, cbits); | |
170 ReadCounter rc=new ReadCounter(k, true, ecco, false, amino); | |
171 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, maxReads, 1, 1, 1, 1, null, 0); | |
172 | |
173 final ConcurrentReadInputStream cris; | |
174 { | |
175 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null); | |
176 cris.start(); | |
177 if(verbose){outstream.println("Started cris");} | |
178 } | |
179 | |
180 readsProcessed=0; | |
181 basesProcessed=0; | |
182 | |
183 //Process the read stream | |
184 processInner(cris, kca); | |
185 | |
186 if(verbose){outstream.println("Finished; closing streams.");} | |
187 | |
188 errorState|=ReadStats.writeAll(); | |
189 errorState|=ReadWrite.closeStreams(cris); | |
190 | |
191 t.stop(); | |
192 | |
193 outstream.println("Made filter: \t"+kca.toShortString(filterHashes)); | |
194 outstream.println("Estimated pivots: \t"+(long)kca.estimateUniqueKmers(filterHashes)); | |
195 outstream.println("Estimated pivots >1x: \t"+(long)kca.estimateUniqueKmers(filterHashes, minCount)); | |
196 | |
197 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); | |
198 | |
199 if(errorState){ | |
200 Clumpify.sharedErrorState=true; | |
201 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
202 } | |
203 return kca; | |
204 } | |
205 | |
206 /** Manage threads */ | |
207 public static KCountArray makeKcaStatic(final ConcurrentReadInputStream cris, int k, int minCount, boolean amino){ | |
208 | |
209 KmerComparator kc=new KmerComparator(k, false, false); | |
210 int cbits=2; | |
211 while((1L<<cbits)<=minCount){cbits*=2;} | |
212 int filterHashes=2; | |
213 float fraction=0.1f; | |
214 long cells=getCells(fraction, cbits); | |
215 ReadCounter rc=new ReadCounter(k, true, false, false, amino); | |
216 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, -1, 1, 1, 1, 1, null, 0); | |
217 | |
218 if(verbose){System.err.println("Making hash threads.");} | |
219 final int threads=Shared.threads(); | |
220 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads); | |
221 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, false));} | |
222 | |
223 if(verbose){System.err.println("Starting threads.");} | |
224 for(HashThread ht : alht){ht.start();} | |
225 | |
226 if(verbose){System.err.println("Waiting for threads.");} | |
227 /* Wait for threads to die */ | |
228 for(HashThread ht : alht){ | |
229 | |
230 /* Wait for a thread to die */ | |
231 while(ht.getState()!=Thread.State.TERMINATED){ | |
232 try { | |
233 ht.join(); | |
234 } catch (InterruptedException e) { | |
235 e.printStackTrace(); | |
236 } | |
237 } | |
238 } | |
239 kca.shutdown(); | |
240 return kca; | |
241 } | |
242 | |
243 /** Manage threads */ | |
244 public void processInner(final ConcurrentReadInputStream cris, KCountArray kca){ | |
245 if(verbose){outstream.println("Making comparator.");} | |
246 KmerComparator kc=new KmerComparator(k, false, false); | |
247 | |
248 if(verbose){outstream.println("Making hash threads.");} | |
249 final int threads=Shared.threads(); | |
250 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads); | |
251 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, ecco));} | |
252 | |
253 if(verbose){outstream.println("Starting threads.");} | |
254 for(HashThread ht : alht){ht.start();} | |
255 | |
256 if(verbose){outstream.println("Waiting for threads.");} | |
257 /* Wait for threads to die */ | |
258 for(HashThread ht : alht){ | |
259 | |
260 /* Wait for a thread to die */ | |
261 while(ht.getState()!=Thread.State.TERMINATED){ | |
262 try { | |
263 ht.join(); | |
264 } catch (InterruptedException e) { | |
265 e.printStackTrace(); | |
266 } | |
267 } | |
268 readsProcessed+=ht.readsProcessedT; | |
269 basesProcessed+=ht.basesProcessedT; | |
270 } | |
271 kca.shutdown(); | |
272 } | |
273 | |
274 /*--------------------------------------------------------------*/ | |
275 /*---------------- Inner Classes ----------------*/ | |
276 /*--------------------------------------------------------------*/ | |
277 | |
278 private static class HashThread extends Thread{ | |
279 | |
280 HashThread(ConcurrentReadInputStream cris_, KmerComparator kc_, KCountArray kca_, boolean ecco_){ | |
281 cris=cris_; | |
282 kc=kc_; | |
283 kca=kca_; | |
284 ecco=ecco_; | |
285 } | |
286 | |
287 @Override | |
288 public void run(){ | |
289 | |
290 ListNum<Read> ln=cris.nextList(); | |
291 ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
292 | |
293 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning | |
294 for(Read r1 : reads){ | |
295 Read r2=r1.mate; | |
296 readsProcessedT+=r1.pairCount(); | |
297 basesProcessedT+=r1.pairLength(); | |
298 if(ecco && r2!=null){ | |
299 if(r2!=null){BBMerge.findOverlapStrict(r1, r2, true);} | |
300 } | |
301 { | |
302 final long kmer=kc.hash(r1, null, 0, false); | |
303 if(kmer>=0){ | |
304 kca.increment(kmer); | |
305 } | |
306 } | |
307 if(r2!=null){ | |
308 final long kmer=kc.hash(r2, null, 0, false); | |
309 if(kmer>=0){ | |
310 kca.increment(kmer); | |
311 } | |
312 } | |
313 } | |
314 cris.returnList(ln); | |
315 ln=cris.nextList(); | |
316 reads=(ln!=null ? ln.list : null); | |
317 } | |
318 if(ln!=null){ | |
319 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); | |
320 } | |
321 } | |
322 | |
323 final ConcurrentReadInputStream cris; | |
324 final KmerComparator kc; | |
325 final KCountArray kca; | |
326 final boolean ecco; | |
327 | |
328 protected long readsProcessedT=0; | |
329 protected long basesProcessedT=0; | |
330 } | |
331 | |
332 /*--------------------------------------------------------------*/ | |
333 /*---------------- Inner Methods ----------------*/ | |
334 /*--------------------------------------------------------------*/ | |
335 | |
336 /*--------------------------------------------------------------*/ | |
337 /*---------------- Fields ----------------*/ | |
338 /*--------------------------------------------------------------*/ | |
339 | |
340 private int k=31; | |
341 private int minCount=2; | |
342 | |
343 /*--------------------------------------------------------------*/ | |
344 /*---------------- I/O Fields ----------------*/ | |
345 /*--------------------------------------------------------------*/ | |
346 | |
347 private String in1=null; | |
348 private String in2=null; | |
349 | |
350 private String extin=null; | |
351 | |
352 /*--------------------------------------------------------------*/ | |
353 | |
354 protected long readsProcessed=0; | |
355 protected long basesProcessed=0; | |
356 | |
357 private long maxReads=-1; | |
358 private boolean ecco=false; | |
359 | |
360 /*--------------------------------------------------------------*/ | |
361 /*---------------- Final Fields ----------------*/ | |
362 /*--------------------------------------------------------------*/ | |
363 | |
364 private final FileFormat ffin1; | |
365 private final FileFormat ffin2; | |
366 | |
367 /*--------------------------------------------------------------*/ | |
368 /*---------------- Common Fields ----------------*/ | |
369 /*--------------------------------------------------------------*/ | |
370 | |
371 private PrintStream outstream=System.err; | |
372 public static boolean verbose=false; | |
373 public boolean errorState=false; | |
374 | |
375 } |