jpayne@68
|
1 package clump;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6
|
jpayne@68
|
7 import bloom.KCountArray;
|
jpayne@68
|
8 import bloom.ReadCounter;
|
jpayne@68
|
9 import fileIO.ByteFile;
|
jpayne@68
|
10 import fileIO.FileFormat;
|
jpayne@68
|
11 import fileIO.ReadWrite;
|
jpayne@68
|
12 import jgi.BBMerge;
|
jpayne@68
|
13 import shared.Parse;
|
jpayne@68
|
14 import shared.Parser;
|
jpayne@68
|
15 import shared.PreParser;
|
jpayne@68
|
16 import shared.ReadStats;
|
jpayne@68
|
17 import shared.Shared;
|
jpayne@68
|
18 import shared.Timer;
|
jpayne@68
|
19 import shared.Tools;
|
jpayne@68
|
20 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
21 import stream.FASTQ;
|
jpayne@68
|
22 import stream.FastaReadInputStream;
|
jpayne@68
|
23 import stream.Read;
|
jpayne@68
|
24 import structures.ListNum;
|
jpayne@68
|
25
|
jpayne@68
|
26 /**
|
jpayne@68
|
27 * Reduces reads to their feature kmer.
|
jpayne@68
|
28 * @author Brian Bushnell
|
jpayne@68
|
29 * @date August 19, 2016
|
jpayne@68
|
30 *
|
jpayne@68
|
31 */
|
jpayne@68
|
32 public class PivotSet {
|
jpayne@68
|
33
|
jpayne@68
|
34 /*--------------------------------------------------------------*/
|
jpayne@68
|
35 /*---------------- Static Methods ----------------*/
|
jpayne@68
|
36 /*--------------------------------------------------------------*/
|
jpayne@68
|
37
|
jpayne@68
|
38 /**
|
jpayne@68
|
39 * Code entrance from the command line.
|
jpayne@68
|
40 * @param args Command line arguments
|
jpayne@68
|
41 */
|
jpayne@68
|
42 public static void main(String[] args){
|
jpayne@68
|
43 makeSet(args);
|
jpayne@68
|
44 }
|
jpayne@68
|
45
|
jpayne@68
|
46 public static KCountArray makeSet(String[] args){
|
jpayne@68
|
47 final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ;
|
jpayne@68
|
48 Timer t=new Timer();
|
jpayne@68
|
49 PivotSet x=new PivotSet(args);
|
jpayne@68
|
50 KCountArray kca=x.process(t, false);
|
jpayne@68
|
51 ReadWrite.USE_PIGZ=pigz;
|
jpayne@68
|
52 ReadWrite.USE_UNPIGZ=unpigz;
|
jpayne@68
|
53
|
jpayne@68
|
54 //Close the print stream if it was redirected
|
jpayne@68
|
55 Shared.closeStream(x.outstream);
|
jpayne@68
|
56
|
jpayne@68
|
57 return kca;
|
jpayne@68
|
58 }
|
jpayne@68
|
59
|
jpayne@68
|
60 /*--------------------------------------------------------------*/
|
jpayne@68
|
61 /*---------------- Initialization ----------------*/
|
jpayne@68
|
62 /*--------------------------------------------------------------*/
|
jpayne@68
|
63
|
jpayne@68
|
64 /**
|
jpayne@68
|
65 * Constructor.
|
jpayne@68
|
66 * @param args Command line arguments
|
jpayne@68
|
67 */
|
jpayne@68
|
68 public PivotSet(String[] args){
|
jpayne@68
|
69
|
jpayne@68
|
70 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
71 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
72 args=pp.args;
|
jpayne@68
|
73 outstream=pp.outstream;
|
jpayne@68
|
74 }
|
jpayne@68
|
75
|
jpayne@68
|
76 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
77 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
78
|
jpayne@68
|
79 Parser parser=new Parser();
|
jpayne@68
|
80 for(int i=0; i<args.length; i++){
|
jpayne@68
|
81 String arg=args[i];
|
jpayne@68
|
82 String[] split=arg.split("=");
|
jpayne@68
|
83 String a=split[0].toLowerCase();
|
jpayne@68
|
84 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
85
|
jpayne@68
|
86 if(parser.parse(arg, a, b)){
|
jpayne@68
|
87 //do nothing
|
jpayne@68
|
88 }else if(a.equals("verbose")){
|
jpayne@68
|
89 verbose=KmerComparator.verbose=Parse.parseBoolean(b);
|
jpayne@68
|
90 }else if(a.equals("parse_flag_goes_here")){
|
jpayne@68
|
91 //Set a variable here
|
jpayne@68
|
92 }else if(a.equals("k")){
|
jpayne@68
|
93 k=Integer.parseInt(b);
|
jpayne@68
|
94 assert(k>0 && k<32);
|
jpayne@68
|
95 }else if(a.equals("ecco")){
|
jpayne@68
|
96 ecco=Parse.parseBoolean(b);
|
jpayne@68
|
97 }else if(a.equals("rename") || a.equals("addname")){
|
jpayne@68
|
98 //do nothing
|
jpayne@68
|
99 }else if(a.equals("rcomp") || a.equals("reversecomplement")){
|
jpayne@68
|
100 //do nothing
|
jpayne@68
|
101 }else if(a.equals("condense") || a.equals("consensus")){
|
jpayne@68
|
102 //do nothing
|
jpayne@68
|
103 }else if(a.equals("mincount") || a.equals("consensus")){
|
jpayne@68
|
104 minCount=Integer.parseInt(b);
|
jpayne@68
|
105 }else if(a.equals("correct") || a.equals("ecc")){
|
jpayne@68
|
106 //do nothing
|
jpayne@68
|
107 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){
|
jpayne@68
|
108 //do nothing
|
jpayne@68
|
109 }else if(a.equals("seed")){
|
jpayne@68
|
110 KmerComparator.defaultSeed=Long.parseLong(b);
|
jpayne@68
|
111 }else if(a.equals("hashes")){
|
jpayne@68
|
112 KmerComparator.setHashes(Integer.parseInt(b));
|
jpayne@68
|
113 }else{
|
jpayne@68
|
114 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
115 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
116 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
117 }
|
jpayne@68
|
118 }
|
jpayne@68
|
119
|
jpayne@68
|
120 {//Process parser fields
|
jpayne@68
|
121 Parser.processQuality();
|
jpayne@68
|
122
|
jpayne@68
|
123 maxReads=parser.maxReads;
|
jpayne@68
|
124
|
jpayne@68
|
125 in1=parser.in1;
|
jpayne@68
|
126 in2=parser.in2;
|
jpayne@68
|
127
|
jpayne@68
|
128 extin=parser.extin;
|
jpayne@68
|
129 }
|
jpayne@68
|
130
|
jpayne@68
|
131 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
|
jpayne@68
|
132 in2=in1.replace("#", "2");
|
jpayne@68
|
133 in1=in1.replace("#", "1");
|
jpayne@68
|
134 }
|
jpayne@68
|
135 if(in2!=null){
|
jpayne@68
|
136 if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");}
|
jpayne@68
|
137 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
|
jpayne@68
|
138 }
|
jpayne@68
|
139
|
jpayne@68
|
140 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
141
|
jpayne@68
|
142 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
143 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
144 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
145 }
|
jpayne@68
|
146
|
jpayne@68
|
147 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
|
jpayne@68
|
148 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true);
|
jpayne@68
|
149 }
|
jpayne@68
|
150
|
jpayne@68
|
151
|
jpayne@68
|
152 /*--------------------------------------------------------------*/
|
jpayne@68
|
153 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
154 /*--------------------------------------------------------------*/
|
jpayne@68
|
155
|
jpayne@68
|
156 private static long getCells(double fraction, int cbits){
|
jpayne@68
|
157 final long memory=Runtime.getRuntime().maxMemory();
|
jpayne@68
|
158 final long usable=(long)Tools.max(((memory-96000000)*.73), memory*0.45);
|
jpayne@68
|
159 final double filterMem=usable*fraction;
|
jpayne@68
|
160 return (long)((filterMem*8)/cbits);
|
jpayne@68
|
161 }
|
jpayne@68
|
162
|
jpayne@68
|
163 /** Create read streams and process all data */
|
jpayne@68
|
164 public KCountArray process(Timer t, boolean amino){
|
jpayne@68
|
165 int cbits=2;
|
jpayne@68
|
166 while((1L<<cbits)<=minCount){cbits*=2;}
|
jpayne@68
|
167 int filterHashes=2;
|
jpayne@68
|
168 float fraction=0.1f;
|
jpayne@68
|
169 long cells=getCells(fraction, cbits);
|
jpayne@68
|
170 ReadCounter rc=new ReadCounter(k, true, ecco, false, amino);
|
jpayne@68
|
171 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, maxReads, 1, 1, 1, 1, null, 0);
|
jpayne@68
|
172
|
jpayne@68
|
173 final ConcurrentReadInputStream cris;
|
jpayne@68
|
174 {
|
jpayne@68
|
175 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null);
|
jpayne@68
|
176 cris.start();
|
jpayne@68
|
177 if(verbose){outstream.println("Started cris");}
|
jpayne@68
|
178 }
|
jpayne@68
|
179
|
jpayne@68
|
180 readsProcessed=0;
|
jpayne@68
|
181 basesProcessed=0;
|
jpayne@68
|
182
|
jpayne@68
|
183 //Process the read stream
|
jpayne@68
|
184 processInner(cris, kca);
|
jpayne@68
|
185
|
jpayne@68
|
186 if(verbose){outstream.println("Finished; closing streams.");}
|
jpayne@68
|
187
|
jpayne@68
|
188 errorState|=ReadStats.writeAll();
|
jpayne@68
|
189 errorState|=ReadWrite.closeStreams(cris);
|
jpayne@68
|
190
|
jpayne@68
|
191 t.stop();
|
jpayne@68
|
192
|
jpayne@68
|
193 outstream.println("Made filter: \t"+kca.toShortString(filterHashes));
|
jpayne@68
|
194 outstream.println("Estimated pivots: \t"+(long)kca.estimateUniqueKmers(filterHashes));
|
jpayne@68
|
195 outstream.println("Estimated pivots >1x: \t"+(long)kca.estimateUniqueKmers(filterHashes, minCount));
|
jpayne@68
|
196
|
jpayne@68
|
197 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
|
jpayne@68
|
198
|
jpayne@68
|
199 if(errorState){
|
jpayne@68
|
200 Clumpify.sharedErrorState=true;
|
jpayne@68
|
201 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
202 }
|
jpayne@68
|
203 return kca;
|
jpayne@68
|
204 }
|
jpayne@68
|
205
|
jpayne@68
|
206 /** Manage threads */
|
jpayne@68
|
207 public static KCountArray makeKcaStatic(final ConcurrentReadInputStream cris, int k, int minCount, boolean amino){
|
jpayne@68
|
208
|
jpayne@68
|
209 KmerComparator kc=new KmerComparator(k, false, false);
|
jpayne@68
|
210 int cbits=2;
|
jpayne@68
|
211 while((1L<<cbits)<=minCount){cbits*=2;}
|
jpayne@68
|
212 int filterHashes=2;
|
jpayne@68
|
213 float fraction=0.1f;
|
jpayne@68
|
214 long cells=getCells(fraction, cbits);
|
jpayne@68
|
215 ReadCounter rc=new ReadCounter(k, true, false, false, amino);
|
jpayne@68
|
216 KCountArray kca=rc.makeKca(null, null, null, cbits, cells, filterHashes, 0, -1, 1, 1, 1, 1, null, 0);
|
jpayne@68
|
217
|
jpayne@68
|
218 if(verbose){System.err.println("Making hash threads.");}
|
jpayne@68
|
219 final int threads=Shared.threads();
|
jpayne@68
|
220 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
|
jpayne@68
|
221 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, false));}
|
jpayne@68
|
222
|
jpayne@68
|
223 if(verbose){System.err.println("Starting threads.");}
|
jpayne@68
|
224 for(HashThread ht : alht){ht.start();}
|
jpayne@68
|
225
|
jpayne@68
|
226 if(verbose){System.err.println("Waiting for threads.");}
|
jpayne@68
|
227 /* Wait for threads to die */
|
jpayne@68
|
228 for(HashThread ht : alht){
|
jpayne@68
|
229
|
jpayne@68
|
230 /* Wait for a thread to die */
|
jpayne@68
|
231 while(ht.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
232 try {
|
jpayne@68
|
233 ht.join();
|
jpayne@68
|
234 } catch (InterruptedException e) {
|
jpayne@68
|
235 e.printStackTrace();
|
jpayne@68
|
236 }
|
jpayne@68
|
237 }
|
jpayne@68
|
238 }
|
jpayne@68
|
239 kca.shutdown();
|
jpayne@68
|
240 return kca;
|
jpayne@68
|
241 }
|
jpayne@68
|
242
|
jpayne@68
|
243 /** Manage threads */
|
jpayne@68
|
244 public void processInner(final ConcurrentReadInputStream cris, KCountArray kca){
|
jpayne@68
|
245 if(verbose){outstream.println("Making comparator.");}
|
jpayne@68
|
246 KmerComparator kc=new KmerComparator(k, false, false);
|
jpayne@68
|
247
|
jpayne@68
|
248 if(verbose){outstream.println("Making hash threads.");}
|
jpayne@68
|
249 final int threads=Shared.threads();
|
jpayne@68
|
250 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
|
jpayne@68
|
251 for(int i=0; i<threads; i++){alht.add(new HashThread(cris, kc, kca, ecco));}
|
jpayne@68
|
252
|
jpayne@68
|
253 if(verbose){outstream.println("Starting threads.");}
|
jpayne@68
|
254 for(HashThread ht : alht){ht.start();}
|
jpayne@68
|
255
|
jpayne@68
|
256 if(verbose){outstream.println("Waiting for threads.");}
|
jpayne@68
|
257 /* Wait for threads to die */
|
jpayne@68
|
258 for(HashThread ht : alht){
|
jpayne@68
|
259
|
jpayne@68
|
260 /* Wait for a thread to die */
|
jpayne@68
|
261 while(ht.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
262 try {
|
jpayne@68
|
263 ht.join();
|
jpayne@68
|
264 } catch (InterruptedException e) {
|
jpayne@68
|
265 e.printStackTrace();
|
jpayne@68
|
266 }
|
jpayne@68
|
267 }
|
jpayne@68
|
268 readsProcessed+=ht.readsProcessedT;
|
jpayne@68
|
269 basesProcessed+=ht.basesProcessedT;
|
jpayne@68
|
270 }
|
jpayne@68
|
271 kca.shutdown();
|
jpayne@68
|
272 }
|
jpayne@68
|
273
|
jpayne@68
|
274 /*--------------------------------------------------------------*/
|
jpayne@68
|
275 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
276 /*--------------------------------------------------------------*/
|
jpayne@68
|
277
|
jpayne@68
|
278 private static class HashThread extends Thread{
|
jpayne@68
|
279
|
jpayne@68
|
280 HashThread(ConcurrentReadInputStream cris_, KmerComparator kc_, KCountArray kca_, boolean ecco_){
|
jpayne@68
|
281 cris=cris_;
|
jpayne@68
|
282 kc=kc_;
|
jpayne@68
|
283 kca=kca_;
|
jpayne@68
|
284 ecco=ecco_;
|
jpayne@68
|
285 }
|
jpayne@68
|
286
|
jpayne@68
|
287 @Override
|
jpayne@68
|
288 public void run(){
|
jpayne@68
|
289
|
jpayne@68
|
290 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
291 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
292
|
jpayne@68
|
293 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
294 for(Read r1 : reads){
|
jpayne@68
|
295 Read r2=r1.mate;
|
jpayne@68
|
296 readsProcessedT+=r1.pairCount();
|
jpayne@68
|
297 basesProcessedT+=r1.pairLength();
|
jpayne@68
|
298 if(ecco && r2!=null){
|
jpayne@68
|
299 if(r2!=null){BBMerge.findOverlapStrict(r1, r2, true);}
|
jpayne@68
|
300 }
|
jpayne@68
|
301 {
|
jpayne@68
|
302 final long kmer=kc.hash(r1, null, 0, false);
|
jpayne@68
|
303 if(kmer>=0){
|
jpayne@68
|
304 kca.increment(kmer);
|
jpayne@68
|
305 }
|
jpayne@68
|
306 }
|
jpayne@68
|
307 if(r2!=null){
|
jpayne@68
|
308 final long kmer=kc.hash(r2, null, 0, false);
|
jpayne@68
|
309 if(kmer>=0){
|
jpayne@68
|
310 kca.increment(kmer);
|
jpayne@68
|
311 }
|
jpayne@68
|
312 }
|
jpayne@68
|
313 }
|
jpayne@68
|
314 cris.returnList(ln);
|
jpayne@68
|
315 ln=cris.nextList();
|
jpayne@68
|
316 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
317 }
|
jpayne@68
|
318 if(ln!=null){
|
jpayne@68
|
319 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
|
jpayne@68
|
320 }
|
jpayne@68
|
321 }
|
jpayne@68
|
322
|
jpayne@68
|
323 final ConcurrentReadInputStream cris;
|
jpayne@68
|
324 final KmerComparator kc;
|
jpayne@68
|
325 final KCountArray kca;
|
jpayne@68
|
326 final boolean ecco;
|
jpayne@68
|
327
|
jpayne@68
|
328 protected long readsProcessedT=0;
|
jpayne@68
|
329 protected long basesProcessedT=0;
|
jpayne@68
|
330 }
|
jpayne@68
|
331
|
jpayne@68
|
332 /*--------------------------------------------------------------*/
|
jpayne@68
|
333 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
334 /*--------------------------------------------------------------*/
|
jpayne@68
|
335
|
jpayne@68
|
336 /*--------------------------------------------------------------*/
|
jpayne@68
|
337 /*---------------- Fields ----------------*/
|
jpayne@68
|
338 /*--------------------------------------------------------------*/
|
jpayne@68
|
339
|
jpayne@68
|
340 private int k=31;
|
jpayne@68
|
341 private int minCount=2;
|
jpayne@68
|
342
|
jpayne@68
|
343 /*--------------------------------------------------------------*/
|
jpayne@68
|
344 /*---------------- I/O Fields ----------------*/
|
jpayne@68
|
345 /*--------------------------------------------------------------*/
|
jpayne@68
|
346
|
jpayne@68
|
347 private String in1=null;
|
jpayne@68
|
348 private String in2=null;
|
jpayne@68
|
349
|
jpayne@68
|
350 private String extin=null;
|
jpayne@68
|
351
|
jpayne@68
|
352 /*--------------------------------------------------------------*/
|
jpayne@68
|
353
|
jpayne@68
|
354 protected long readsProcessed=0;
|
jpayne@68
|
355 protected long basesProcessed=0;
|
jpayne@68
|
356
|
jpayne@68
|
357 private long maxReads=-1;
|
jpayne@68
|
358 private boolean ecco=false;
|
jpayne@68
|
359
|
jpayne@68
|
360 /*--------------------------------------------------------------*/
|
jpayne@68
|
361 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
362 /*--------------------------------------------------------------*/
|
jpayne@68
|
363
|
jpayne@68
|
364 private final FileFormat ffin1;
|
jpayne@68
|
365 private final FileFormat ffin2;
|
jpayne@68
|
366
|
jpayne@68
|
367 /*--------------------------------------------------------------*/
|
jpayne@68
|
368 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
369 /*--------------------------------------------------------------*/
|
jpayne@68
|
370
|
jpayne@68
|
371 private PrintStream outstream=System.err;
|
jpayne@68
|
372 public static boolean verbose=false;
|
jpayne@68
|
373 public boolean errorState=false;
|
jpayne@68
|
374
|
jpayne@68
|
375 }
|