Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/KmerReduce.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package clump; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 import java.util.Random; | |
7 | |
8 import assemble.AbstractRemoveThread; | |
9 import dna.AminoAcid; | |
10 import fileIO.ByteFile; | |
11 import fileIO.FileFormat; | |
12 import fileIO.ReadWrite; | |
13 import jgi.BBMerge; | |
14 import kmer.KmerTableSet; | |
15 import shared.KillSwitch; | |
16 import shared.Parse; | |
17 import shared.Parser; | |
18 import shared.PreParser; | |
19 import shared.ReadStats; | |
20 import shared.Shared; | |
21 import shared.Timer; | |
22 import shared.Tools; | |
23 import stream.ConcurrentReadInputStream; | |
24 import stream.ConcurrentReadOutputStream; | |
25 import stream.FASTQ; | |
26 import stream.FastaReadInputStream; | |
27 import stream.Read; | |
28 import structures.ListNum; | |
29 | |
30 /** | |
31 * Reduces reads to their feature kmer. | |
32 * @author Brian Bushnell | |
33 * @date Nov 10, 2015 | |
34 * | |
35 */ | |
36 public class KmerReduce { | |
37 | |
38 /*--------------------------------------------------------------*/ | |
39 /*---------------- Static Methods ----------------*/ | |
40 /*--------------------------------------------------------------*/ | |
41 | |
42 /** | |
43 * Code entrance from the command line. | |
44 * @param args Command line arguments | |
45 */ | |
46 public static void main(String[] args){ | |
47 final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ; | |
48 Timer t=new Timer(); | |
49 KmerReduce x=new KmerReduce(args); | |
50 x.process(t); | |
51 ReadWrite.USE_PIGZ=pigz; | |
52 ReadWrite.USE_UNPIGZ=unpigz; | |
53 | |
54 //Close the print stream if it was redirected | |
55 Shared.closeStream(x.outstream); | |
56 } | |
57 | |
58 /** | |
59 * @param fname0 Input filename of reads | |
60 * @param k Kmer length | |
61 * @param cutoff Minimum count to retain | |
62 * @return Set of pivot kmers | |
63 */ | |
64 public static KmerTableSet getValidKmersFromReads(final String fname0, int k, int cutoff){ | |
65 final String fname=fname0+"_"+(new Random().nextLong()>>>1)+".fa.gz"; | |
66 assert(!new File(fname).exists()); | |
67 | |
68 ArrayList<String> arglist=new ArrayList<String>(); | |
69 arglist.add("in="+fname0); | |
70 arglist.add("out="+fname); | |
71 arglist.add("k="+k); | |
72 String[] args=arglist.toArray(new String[0]); | |
73 | |
74 main(args); | |
75 | |
76 assert(false) : fname+", "+k+", "+cutoff; | |
77 KmerTableSet set=getValidKmers(fname, k, cutoff); | |
78 File f=new File(fname); | |
79 if(f.exists()){f.delete();} | |
80 | |
81 return set; | |
82 } | |
83 | |
84 /** | |
85 * @param fname Input filename of pivot kmers | |
86 * @param k Kmer length | |
87 * @param cutoff Minimum count to retain | |
88 * @return Set of pivot kmers | |
89 */ | |
90 public static KmerTableSet getValidKmers(final String fname, int k, int cutoff){ | |
91 ArrayList<String> arglist=new ArrayList<String>(); | |
92 arglist.add("in="+fname); | |
93 arglist.add("k="+k); | |
94 if(cutoff>1 && prefilter){ | |
95 arglist.add("prefilter="+(cutoff-1)); | |
96 } | |
97 | |
98 String[] args=arglist.toArray(new String[0]); | |
99 KmerTableSet set=new KmerTableSet(args, 12); | |
100 | |
101 Timer t=new Timer(); | |
102 | |
103 set.process(t); | |
104 // errorState|=set.errorState; | |
105 assert(!set.errorState); | |
106 t.stop(); | |
107 | |
108 set.prefilterArray=null; | |
109 AbstractRemoveThread.process(Shared.threads(), cutoff, Integer.MAX_VALUE, set, true); | |
110 | |
111 return set; | |
112 } | |
113 | |
114 /*--------------------------------------------------------------*/ | |
115 /*---------------- Initialization ----------------*/ | |
116 /*--------------------------------------------------------------*/ | |
117 | |
118 /** | |
119 * Constructor. | |
120 * @param args Command line arguments | |
121 */ | |
122 public KmerReduce(String[] args){ | |
123 | |
124 {//Preparse block for help, config files, and outstream | |
125 PreParser pp=new PreParser(args, getClass(), false); | |
126 args=pp.args; | |
127 outstream=pp.outstream; | |
128 } | |
129 | |
130 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
131 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
132 | |
133 Parser parser=new Parser(); | |
134 for(int i=0; i<args.length; i++){ | |
135 String arg=args[i]; | |
136 String[] split=arg.split("="); | |
137 String a=split[0].toLowerCase(); | |
138 String b=split.length>1 ? split[1] : null; | |
139 | |
140 if(parser.parse(arg, a, b)){ | |
141 //do nothing | |
142 }else if(a.equals("verbose")){ | |
143 verbose=Parse.parseBoolean(b); | |
144 }else if(a.equals("parse_flag_goes_here")){ | |
145 //Set a variable here | |
146 }else if(a.equals("k")){ | |
147 k=Integer.parseInt(b); | |
148 assert(k>0 && k<32); | |
149 }else if(a.equals("comparisons") || a.equals("c")){ | |
150 //do nothing | |
151 }else if(a.equals("ecco")){ | |
152 ecco=Parse.parseBoolean(b); | |
153 }else if(a.equals("rename") || a.equals("addname")){ | |
154 //do nothing | |
155 }else if(a.equals("rcomp") || a.equals("reversecomplement")){ | |
156 //do nothing | |
157 }else if(a.equals("condense") || a.equals("consensus")){ | |
158 //do nothing | |
159 }else if(a.equals("correct") || a.equals("ecc")){ | |
160 //do nothing | |
161 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){ | |
162 //do nothing | |
163 }else if(a.equals("seed")){ | |
164 KmerComparator.defaultSeed=Long.parseLong(b); | |
165 }else if(a.equals("hashes")){ | |
166 KmerComparator.setHashes(Integer.parseInt(b)); | |
167 }else{ | |
168 outstream.println("Unknown parameter "+args[i]); | |
169 assert(false) : "Unknown parameter "+args[i]; | |
170 // throw new RuntimeException("Unknown parameter "+args[i]); | |
171 } | |
172 } | |
173 | |
174 {//Process parser fields | |
175 Parser.processQuality(); | |
176 | |
177 maxReads=parser.maxReads; | |
178 | |
179 overwrite=ReadStats.overwrite=parser.overwrite; | |
180 append=ReadStats.append=parser.append; | |
181 | |
182 in1=parser.in1; | |
183 in2=parser.in2; | |
184 | |
185 out1=parser.out1; | |
186 | |
187 extin=parser.extin; | |
188 extout=parser.extout; | |
189 } | |
190 | |
191 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){ | |
192 in2=in1.replace("#", "2"); | |
193 in1=in1.replace("#", "1"); | |
194 } | |
195 if(in2!=null){ | |
196 if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");} | |
197 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; | |
198 } | |
199 | |
200 assert(FastaReadInputStream.settingsOK()); | |
201 | |
202 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} | |
203 | |
204 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ | |
205 ByteFile.FORCE_MODE_BF2=true; | |
206 } | |
207 | |
208 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;} | |
209 | |
210 if(!Tools.testOutputFiles(overwrite, append, false, out1)){ | |
211 outstream.println((out1==null)+", "+out1); | |
212 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n"); | |
213 } | |
214 | |
215 ffout1=FileFormat.testOutput(out1, FileFormat.FASTQ, extout, true, overwrite, append, false); | |
216 | |
217 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); | |
218 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true); | |
219 } | |
220 | |
221 | |
222 /*--------------------------------------------------------------*/ | |
223 /*---------------- Outer Methods ----------------*/ | |
224 /*--------------------------------------------------------------*/ | |
225 | |
226 /** Create read streams and process all data */ | |
227 void process(Timer t){ | |
228 | |
229 final ConcurrentReadInputStream cris; | |
230 { | |
231 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null); | |
232 cris.start(); | |
233 if(verbose){outstream.println("Started cris");} | |
234 } | |
235 boolean paired=cris.paired(); | |
236 | |
237 final ConcurrentReadOutputStream ros; | |
238 if(out1!=null){ | |
239 final int buff=Tools.max(4, Shared.threads()); | |
240 | |
241 assert(!out1.equalsIgnoreCase(in1) && !out1.equalsIgnoreCase(in1)) : "Input file and output file have same name."; | |
242 | |
243 ros=ConcurrentReadOutputStream.getStream(ffout1, null, buff, null, false); | |
244 ros.start(); | |
245 }else{ros=null;} | |
246 | |
247 readsProcessed=0; | |
248 basesProcessed=0; | |
249 | |
250 //Process the read stream | |
251 processInner(cris, ros); | |
252 | |
253 if(verbose){outstream.println("Finished; closing streams.");} | |
254 | |
255 errorState|=ReadStats.writeAll(); | |
256 errorState|=ReadWrite.closeStreams(cris, ros); | |
257 | |
258 t.stop(); | |
259 | |
260 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); | |
261 | |
262 if(errorState){ | |
263 Clumpify.sharedErrorState=true; | |
264 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
265 } | |
266 } | |
267 | |
268 /** Manage threads */ | |
269 public void processInner(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream ros){ | |
270 if(verbose){outstream.println("Making comparator.");} | |
271 KmerComparator kc=new KmerComparator(k, false, false); | |
272 | |
273 if(verbose){outstream.println("Making hash threads.");} | |
274 final int threads=Shared.threads(); | |
275 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads); | |
276 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, ros, kc));} | |
277 | |
278 if(verbose){outstream.println("Starting threads.");} | |
279 for(HashThread ht : alht){ht.start();} | |
280 | |
281 if(verbose){outstream.println("Waiting for threads.");} | |
282 /* Wait for threads to die */ | |
283 for(HashThread ht : alht){ | |
284 | |
285 /* Wait for a thread to die */ | |
286 while(ht.getState()!=Thread.State.TERMINATED){ | |
287 try { | |
288 ht.join(); | |
289 } catch (InterruptedException e) { | |
290 e.printStackTrace(); | |
291 } | |
292 } | |
293 readsProcessed+=ht.readsProcessedT; | |
294 basesProcessed+=ht.basesProcessedT; | |
295 } | |
296 } | |
297 | |
298 /*--------------------------------------------------------------*/ | |
299 /*---------------- Inner Classes ----------------*/ | |
300 /*--------------------------------------------------------------*/ | |
301 | |
302 private class HashThread extends Thread{ | |
303 | |
304 HashThread(ConcurrentReadInputStream cris_, ConcurrentReadOutputStream ros_, KmerComparator kc_){ | |
305 cris=cris_; | |
306 ros=ros_; | |
307 kc=kc_; | |
308 } | |
309 | |
310 @Override | |
311 public void run(){ | |
312 | |
313 ListNum<Read> ln=cris.nextList(); | |
314 ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
315 | |
316 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning | |
317 ArrayList<Read> out=new ArrayList<Read>(reads.size()); | |
318 for(Read r : reads){ | |
319 if(ecco && r.mate!=null){ | |
320 if(r.mate!=null){BBMerge.findOverlapStrict(r, r.mate, true);} | |
321 } | |
322 final long kmer=kc.hash(r, null, 0, false); | |
323 readsProcessedT++; | |
324 basesProcessedT+=r.length(); | |
325 if(kmer>=0){ | |
326 Read temp=new Read(toBytes(kmer), null, header, r.numericID); | |
327 out.add(temp); | |
328 } | |
329 } | |
330 if(ros!=null){ros.add(out, ln.id);} | |
331 cris.returnList(ln); | |
332 ln=cris.nextList(); | |
333 reads=(ln!=null ? ln.list : null); | |
334 } | |
335 if(ln!=null){ | |
336 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); | |
337 } | |
338 } | |
339 | |
340 final ConcurrentReadInputStream cris; | |
341 final ConcurrentReadOutputStream ros; | |
342 final KmerComparator kc; | |
343 | |
344 protected long readsProcessedT=0; | |
345 protected long basesProcessedT=0; | |
346 | |
347 private static final String header="1"; | |
348 } | |
349 | |
350 /*--------------------------------------------------------------*/ | |
351 /*---------------- Inner Methods ----------------*/ | |
352 /*--------------------------------------------------------------*/ | |
353 | |
354 public byte[] toBytes(final long kmer){ | |
355 byte[] dest=KillSwitch.allocByte1D(k); | |
356 fill(kmer, dest, 0); | |
357 return dest; | |
358 } | |
359 | |
360 public void fill(final long kmer, final byte[] dest, int pos){ | |
361 for(int i=k-1; i>=0; i--, pos++){ | |
362 int x=(int)((kmer>>(2*i))&3); | |
363 dest[pos]=AminoAcid.numberToBase[x]; | |
364 } | |
365 } | |
366 | |
367 /*--------------------------------------------------------------*/ | |
368 /*---------------- Fields ----------------*/ | |
369 /*--------------------------------------------------------------*/ | |
370 | |
371 private int k=31; | |
372 static boolean prefilter=true; | |
373 | |
374 /*--------------------------------------------------------------*/ | |
375 /*---------------- I/O Fields ----------------*/ | |
376 /*--------------------------------------------------------------*/ | |
377 | |
378 private String in1=null; | |
379 private String in2=null; | |
380 | |
381 private String out1=null; | |
382 | |
383 private String extin=null; | |
384 private String extout=null; | |
385 | |
386 /*--------------------------------------------------------------*/ | |
387 | |
388 protected long readsProcessed=0; | |
389 protected long basesProcessed=0; | |
390 | |
391 private long maxReads=-1; | |
392 protected boolean ecco=false; | |
393 | |
394 /*--------------------------------------------------------------*/ | |
395 /*---------------- Final Fields ----------------*/ | |
396 /*--------------------------------------------------------------*/ | |
397 | |
398 private final FileFormat ffin1; | |
399 private final FileFormat ffin2; | |
400 | |
401 private final FileFormat ffout1; | |
402 | |
403 /*--------------------------------------------------------------*/ | |
404 /*---------------- Common Fields ----------------*/ | |
405 /*--------------------------------------------------------------*/ | |
406 | |
407 private PrintStream outstream=System.err; | |
408 public static boolean verbose=false; | |
409 public boolean errorState=false; | |
410 private boolean overwrite=false; | |
411 private boolean append=false; | |
412 | |
413 } |