jpayne@68
|
1 package clump;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6
|
jpayne@68
|
7 import bloom.KCountArray;
|
jpayne@68
|
8 import fileIO.ByteFile;
|
jpayne@68
|
9 import fileIO.FileFormat;
|
jpayne@68
|
10 import fileIO.ReadWrite;
|
jpayne@68
|
11 import jgi.BBMerge;
|
jpayne@68
|
12 import shared.KillSwitch;
|
jpayne@68
|
13 import shared.Parse;
|
jpayne@68
|
14 import shared.Parser;
|
jpayne@68
|
15 import shared.PreParser;
|
jpayne@68
|
16 import shared.ReadStats;
|
jpayne@68
|
17 import shared.Shared;
|
jpayne@68
|
18 import shared.Timer;
|
jpayne@68
|
19 import shared.Tools;
|
jpayne@68
|
20 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
21 import stream.ConcurrentReadOutputStream;
|
jpayne@68
|
22 import stream.FASTQ;
|
jpayne@68
|
23 import stream.FastaReadInputStream;
|
jpayne@68
|
24 import stream.Read;
|
jpayne@68
|
25 import structures.ListNum;
|
jpayne@68
|
26 import structures.Quantizer;
|
jpayne@68
|
27
|
jpayne@68
|
28 /**
|
jpayne@68
|
29 * @author Brian Bushnell
|
jpayne@68
|
30 * @date June 20, 2014
|
jpayne@68
|
31 *
|
jpayne@68
|
32 */
|
jpayne@68
|
33 public class KmerSplit {
|
jpayne@68
|
34
|
jpayne@68
|
35 /*--------------------------------------------------------------*/
|
jpayne@68
|
36 /*---------------- Initialization ----------------*/
|
jpayne@68
|
37 /*--------------------------------------------------------------*/
|
jpayne@68
|
38
|
jpayne@68
|
39 /**
|
jpayne@68
|
40 * Code entrance from the command line.
|
jpayne@68
|
41 * @param args Command line arguments
|
jpayne@68
|
42 */
|
jpayne@68
|
43 public static void main(String[] args){
|
jpayne@68
|
44 final boolean pigz=ReadWrite.USE_PIGZ, unpigz=ReadWrite.USE_UNPIGZ;
|
jpayne@68
|
45 final boolean oldFInt=FASTQ.FORCE_INTERLEAVED, oldTInt=FASTQ.TEST_INTERLEAVED;
|
jpayne@68
|
46 final int zl=ReadWrite.ZIPLEVEL;
|
jpayne@68
|
47 final float ztd=ReadWrite.ZIP_THREAD_MULT;
|
jpayne@68
|
48 final int mzt=ReadWrite.MAX_ZIP_THREADS;
|
jpayne@68
|
49 Timer t=new Timer();
|
jpayne@68
|
50 KmerSplit x=new KmerSplit(args);
|
jpayne@68
|
51 ReadWrite.ZIPLEVEL=Tools.min(ReadWrite.ZIPLEVEL, maxZipLevel);
|
jpayne@68
|
52 x.process(t);
|
jpayne@68
|
53 ReadWrite.USE_PIGZ=pigz;
|
jpayne@68
|
54 ReadWrite.USE_UNPIGZ=unpigz;
|
jpayne@68
|
55 ReadWrite.ZIPLEVEL=zl;
|
jpayne@68
|
56 ReadWrite.ZIP_THREAD_MULT=ztd;
|
jpayne@68
|
57 ReadWrite.MAX_ZIP_THREADS=mzt;
|
jpayne@68
|
58 FASTQ.FORCE_INTERLEAVED=oldFInt;
|
jpayne@68
|
59 FASTQ.TEST_INTERLEAVED=oldTInt;
|
jpayne@68
|
60
|
jpayne@68
|
61 //Close the print stream if it was redirected
|
jpayne@68
|
62 Shared.closeStream(x.outstream);
|
jpayne@68
|
63 }
|
jpayne@68
|
64
|
jpayne@68
|
65 /**
|
jpayne@68
|
66 * Constructor.
|
jpayne@68
|
67 * @param args Command line arguments
|
jpayne@68
|
68 */
|
jpayne@68
|
69 public KmerSplit(String[] args){
|
jpayne@68
|
70
|
jpayne@68
|
71 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
72 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
73 args=pp.args;
|
jpayne@68
|
74 outstream=pp.outstream;
|
jpayne@68
|
75 }
|
jpayne@68
|
76
|
jpayne@68
|
77 ReadWrite.USE_PIGZ=false;
|
jpayne@68
|
78 ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
79 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
80
|
jpayne@68
|
81 boolean setInterleaved=false; //Whether it was explicitly set.
|
jpayne@68
|
82 Parser parser=new Parser();
|
jpayne@68
|
83
|
jpayne@68
|
84 for(int i=0; i<args.length; i++){
|
jpayne@68
|
85 String arg=args[i];
|
jpayne@68
|
86 String[] split=arg.split("=");
|
jpayne@68
|
87 String a=split[0].toLowerCase();
|
jpayne@68
|
88 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
89
|
jpayne@68
|
90 if(parser.parse(arg, a, b)){
|
jpayne@68
|
91 //do nothing
|
jpayne@68
|
92 }else if(a.equals("verbose")){
|
jpayne@68
|
93 verbose=KmerComparator.verbose=Parse.parseBoolean(b);
|
jpayne@68
|
94 }else if(a.equals("parse_flag_goes_here")){
|
jpayne@68
|
95 //Set a variable here
|
jpayne@68
|
96 }else if(a.equals("k")){
|
jpayne@68
|
97 k=Integer.parseInt(b);
|
jpayne@68
|
98 assert(k>0 && k<32);
|
jpayne@68
|
99 }else if(a.equals("mincount") || a.equals("mincr")){
|
jpayne@68
|
100 minCount=Integer.parseInt(b);
|
jpayne@68
|
101 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){
|
jpayne@68
|
102 groups=Integer.parseInt(b);
|
jpayne@68
|
103 }else if(a.equals("rename") || a.equals("addname")){
|
jpayne@68
|
104 //Do nothing
|
jpayne@68
|
105 // addName=Parse.parseBoolean(b);
|
jpayne@68
|
106 }else if(a.equals("shortname") || a.equals("shortnames")){
|
jpayne@68
|
107 if(b!=null && b.equals("shrink")){
|
jpayne@68
|
108 shrinkName=true;
|
jpayne@68
|
109 }else{
|
jpayne@68
|
110 shrinkName=false;
|
jpayne@68
|
111 shortName=Parse.parseBoolean(b);
|
jpayne@68
|
112 }
|
jpayne@68
|
113 }else if(a.equals("rcomp") || a.equals("reversecomplement")){
|
jpayne@68
|
114 //ignore rcomp=Parse.parseBoolean(b);
|
jpayne@68
|
115 }else if(a.equals("condense") || a.equals("consensus") || a.equals("concensus")){//Note the last one is intentionally misspelled
|
jpayne@68
|
116 //ignore
|
jpayne@68
|
117 }else if(a.equals("correct") || a.equals("ecc")){
|
jpayne@68
|
118 //ignore
|
jpayne@68
|
119 }else if(a.equals("passes")){
|
jpayne@68
|
120 int x=Integer.parseInt(b);
|
jpayne@68
|
121 // if(x>1){outstream.println("Warning: KmerSplit does not support multiple passes.");}
|
jpayne@68
|
122 }
|
jpayne@68
|
123
|
jpayne@68
|
124 else if(a.equals("dedupe")){
|
jpayne@68
|
125 //ignore
|
jpayne@68
|
126 }else if(a.equals("entryfilter")){
|
jpayne@68
|
127 //ignore
|
jpayne@68
|
128 }else if(a.equals("markduplicates")){
|
jpayne@68
|
129 //ignore
|
jpayne@68
|
130 }else if(a.equals("markall")){
|
jpayne@68
|
131 //ignore
|
jpayne@68
|
132 }else if(a.equals("addcount") || a.equals("renamebycount")){
|
jpayne@68
|
133 //ignore
|
jpayne@68
|
134 }else if(a.equals("optical") || a.equals("opticalonly")){
|
jpayne@68
|
135 //ignore
|
jpayne@68
|
136 }else if(a.equals("dupesubs") || a.equals("duplicatesubs") || a.equals("dsubs") || a.equals("subs") || a.equals("s")){
|
jpayne@68
|
137 //ignore
|
jpayne@68
|
138 }else if(a.equals("dupedist") || a.equals("duplicatedistance") || a.equals("ddist") || a.equals("dist") || a.equals("opticaldist") || a.equals("distance")){
|
jpayne@68
|
139 //ignore
|
jpayne@68
|
140 }else if(a.equals("scanlimit") || a.equals("scan")){
|
jpayne@68
|
141 //ignore
|
jpayne@68
|
142 }else if(a.equals("removeallduplicates") || a.equals("allduplicates")){
|
jpayne@68
|
143 //ignore
|
jpayne@68
|
144 }else if(a.equals("allowns")){
|
jpayne@68
|
145 //ignore
|
jpayne@68
|
146 }else if(a.equals("containment") || a.equals("absorbcontainment") || a.equals("ac") || a.equals("contains")){
|
jpayne@68
|
147 //ignore
|
jpayne@68
|
148 }else if(a.equalsIgnoreCase("prefixOrSuffix") || a.equalsIgnoreCase("suffixOrPrefix") || a.equals("affix") || a.equals("pos")){
|
jpayne@68
|
149 //ignore
|
jpayne@68
|
150 }else if(a.equals("printduplicates")){
|
jpayne@68
|
151 //ignore
|
jpayne@68
|
152 }else if(a.equals("dupeidentity")){
|
jpayne@68
|
153 //ignore
|
jpayne@68
|
154 }else if(a.equals("dupesubrate") || a.equals("dsr") || a.equals("subrate")){
|
jpayne@68
|
155 //ignore
|
jpayne@68
|
156 }
|
jpayne@68
|
157
|
jpayne@68
|
158 else if(a.equals("prefilter")){
|
jpayne@68
|
159 KmerReduce.prefilter=Parse.parseBoolean(b);
|
jpayne@68
|
160 }else if(a.equals("ecco")){
|
jpayne@68
|
161 ecco=Parse.parseBoolean(b);
|
jpayne@68
|
162 }else if(a.equals("seed")){
|
jpayne@68
|
163 KmerComparator.defaultSeed=Long.parseLong(b);
|
jpayne@68
|
164 }else if(a.equals("hashes")){
|
jpayne@68
|
165 KmerComparator.setHashes(Integer.parseInt(b));
|
jpayne@68
|
166 }else if(a.equals("border")){
|
jpayne@68
|
167 KmerComparator.defaultBorder=Integer.parseInt(b);
|
jpayne@68
|
168 }else if(a.equals("minprob")){
|
jpayne@68
|
169 KmerComparator.minProb=Float.parseFloat(b);
|
jpayne@68
|
170 }else if(a.equals("unpair")){
|
jpayne@68
|
171 unpair=Parse.parseBoolean(b);
|
jpayne@68
|
172 }else if(a.equals("repair")){
|
jpayne@68
|
173 //Do nothing
|
jpayne@68
|
174 }else if(a.equals("namesort") || a.equals("sort")){
|
jpayne@68
|
175 //Do nothing
|
jpayne@68
|
176 }else if(a.equals("fetchthreads")){
|
jpayne@68
|
177 //Do nothing
|
jpayne@68
|
178 }else if(a.equals("reorder") || a.equals("reorderclumps")){
|
jpayne@68
|
179 //reorder=Parse.parseBoolean(b);
|
jpayne@68
|
180 }else if(a.equals("reorderpaired") || a.equals("reorderclumpspaired")){
|
jpayne@68
|
181 // reorderpaired=Parse.parseBoolean(b);
|
jpayne@68
|
182 }
|
jpayne@68
|
183
|
jpayne@68
|
184
|
jpayne@68
|
185 else if(Clump.parseStatic(arg, a, b)){
|
jpayne@68
|
186 //Do nothing
|
jpayne@68
|
187 }
|
jpayne@68
|
188
|
jpayne@68
|
189 else{
|
jpayne@68
|
190 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
191 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
192 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
193 }
|
jpayne@68
|
194 }
|
jpayne@68
|
195
|
jpayne@68
|
196 {//Process parser fields
|
jpayne@68
|
197 Parser.processQuality();
|
jpayne@68
|
198
|
jpayne@68
|
199 maxReads=parser.maxReads;
|
jpayne@68
|
200
|
jpayne@68
|
201 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
202 append=ReadStats.append=parser.append;
|
jpayne@68
|
203
|
jpayne@68
|
204 setInterleaved=parser.setInterleaved;
|
jpayne@68
|
205
|
jpayne@68
|
206 in1=parser.in1;
|
jpayne@68
|
207 in2=parser.in2;
|
jpayne@68
|
208
|
jpayne@68
|
209 out1=parser.out1;
|
jpayne@68
|
210
|
jpayne@68
|
211 extin=parser.extin;
|
jpayne@68
|
212 extout=parser.extout;
|
jpayne@68
|
213 }
|
jpayne@68
|
214
|
jpayne@68
|
215 if(groups>2){ReadWrite.USE_PIGZ=false;}
|
jpayne@68
|
216
|
jpayne@68
|
217 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
|
jpayne@68
|
218 in2=in1.replace("#", "2");
|
jpayne@68
|
219 in1=in1.replace("#", "1");
|
jpayne@68
|
220 }
|
jpayne@68
|
221 if(in2!=null){
|
jpayne@68
|
222 if(FASTQ.FORCE_INTERLEAVED){outstream.println("Reset INTERLEAVED to false because paired input files were specified.");}
|
jpayne@68
|
223 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
|
jpayne@68
|
224 }
|
jpayne@68
|
225
|
jpayne@68
|
226 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
227
|
jpayne@68
|
228 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
229 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
230 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
231 }
|
jpayne@68
|
232
|
jpayne@68
|
233 if(!setInterleaved){
|
jpayne@68
|
234 assert(in1!=null) : "\nin1="+in1+"\nin2="+in2+"\nout1="+out1+"\n";
|
jpayne@68
|
235 if(in2!=null){ //If there are 2 input streams.
|
jpayne@68
|
236 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
|
jpayne@68
|
237 outstream.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
|
jpayne@68
|
238 }
|
jpayne@68
|
239 }
|
jpayne@68
|
240
|
jpayne@68
|
241 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
|
jpayne@68
|
242
|
jpayne@68
|
243 if(out1!=null){
|
jpayne@68
|
244 assert(out1.contains("%"));
|
jpayne@68
|
245 outArray=new String[groups];
|
jpayne@68
|
246 for(int i=0; i<groups; i++){
|
jpayne@68
|
247 outArray[i]=out1.replaceFirst("%", ""+i);
|
jpayne@68
|
248 }
|
jpayne@68
|
249 if(!Tools.testOutputFiles(overwrite, append, false, outArray)){
|
jpayne@68
|
250 outstream.println((out1==null)+", "+out1);
|
jpayne@68
|
251 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
|
jpayne@68
|
252 }
|
jpayne@68
|
253 ffout=new FileFormat[groups];
|
jpayne@68
|
254 if(groups>1){ReadWrite.setZipThreadMult(Tools.min(0.5f, 2f/(groups+1)));}
|
jpayne@68
|
255 for(int i=0; i<groups; i++){
|
jpayne@68
|
256 ffout[i]=FileFormat.testOutput(outArray[i], FileFormat.FASTQ, extout, groups<10, overwrite, append, false);
|
jpayne@68
|
257 }
|
jpayne@68
|
258 }else{
|
jpayne@68
|
259 outArray=null;
|
jpayne@68
|
260 throw new RuntimeException("out is a required parameter.");
|
jpayne@68
|
261 }
|
jpayne@68
|
262
|
jpayne@68
|
263 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
|
jpayne@68
|
264 ffin2=FileFormat.testInput(in2, FileFormat.FASTQ, extin, true, true);
|
jpayne@68
|
265 }
|
jpayne@68
|
266
|
jpayne@68
|
267
|
jpayne@68
|
268 /*--------------------------------------------------------------*/
|
jpayne@68
|
269 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
270 /*--------------------------------------------------------------*/
|
jpayne@68
|
271
|
jpayne@68
|
272 /** Count kmers */
|
jpayne@68
|
273 void preprocess(){
|
jpayne@68
|
274 if(minCount>1){
|
jpayne@68
|
275 table=ClumpTools.getTable(in1, in2, k, minCount);
|
jpayne@68
|
276 }
|
jpayne@68
|
277 }
|
jpayne@68
|
278
|
jpayne@68
|
279 /** Create read streams and process all data */
|
jpayne@68
|
280 void process(Timer t){
|
jpayne@68
|
281
|
jpayne@68
|
282 preprocess();
|
jpayne@68
|
283
|
jpayne@68
|
284 final ConcurrentReadInputStream cris;
|
jpayne@68
|
285 {
|
jpayne@68
|
286 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, ffin2, null, null);
|
jpayne@68
|
287 cris.start();
|
jpayne@68
|
288 if(verbose){outstream.println("Started cris");}
|
jpayne@68
|
289 }
|
jpayne@68
|
290 boolean paired=cris.paired();
|
jpayne@68
|
291 if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));}
|
jpayne@68
|
292 if(cris.paired() && (in1==null || !in1.contains(".sam") && !unpair)){
|
jpayne@68
|
293 outstream.println("Writing interleaved.");
|
jpayne@68
|
294 }
|
jpayne@68
|
295
|
jpayne@68
|
296 final ConcurrentReadOutputStream ros[]=new ConcurrentReadOutputStream[groups];
|
jpayne@68
|
297 try {
|
jpayne@68
|
298 for(int i=0; i<groups; i++){
|
jpayne@68
|
299 final int buff=8;
|
jpayne@68
|
300
|
jpayne@68
|
301 assert(!out1.equalsIgnoreCase(in1) && !out1.equalsIgnoreCase(in1)) : "Input file and output file have same name.";
|
jpayne@68
|
302
|
jpayne@68
|
303 ros[i]=ConcurrentReadOutputStream.getStream(ffout[i], null, null, null, buff, null, false);
|
jpayne@68
|
304 ros[i].start();
|
jpayne@68
|
305 }
|
jpayne@68
|
306 } catch (OutOfMemoryError e) {
|
jpayne@68
|
307 KillSwitch.memKill(e);
|
jpayne@68
|
308 }
|
jpayne@68
|
309
|
jpayne@68
|
310 readsProcessed=0;
|
jpayne@68
|
311 basesProcessed=0;
|
jpayne@68
|
312
|
jpayne@68
|
313 //Process the read stream
|
jpayne@68
|
314 processInner(cris, ros);
|
jpayne@68
|
315
|
jpayne@68
|
316 errorState|=ReadStats.writeAll();
|
jpayne@68
|
317
|
jpayne@68
|
318 t.stop();
|
jpayne@68
|
319
|
jpayne@68
|
320 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
|
jpayne@68
|
321
|
jpayne@68
|
322 if(errorState){
|
jpayne@68
|
323 Clumpify.sharedErrorState=true;
|
jpayne@68
|
324 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
325 }
|
jpayne@68
|
326 }
|
jpayne@68
|
327
|
jpayne@68
|
328 /** Collect and sort the reads */
|
jpayne@68
|
329 void processInner(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream[] ros){
|
jpayne@68
|
330 if(verbose){outstream.println("Making comparator.");}
|
jpayne@68
|
331 KmerComparator kc=new KmerComparator(k, false, false);
|
jpayne@68
|
332 if(verbose){outstream.println("Seed: "+kc.seed);}
|
jpayne@68
|
333
|
jpayne@68
|
334 if(verbose){outstream.println("Splitting reads.");}
|
jpayne@68
|
335 splitReads(cris, ros, kc);
|
jpayne@68
|
336 lastMemProcessed=memProcessed;
|
jpayne@68
|
337
|
jpayne@68
|
338 if(verbose){outstream.println("Done!");}
|
jpayne@68
|
339 }
|
jpayne@68
|
340
|
jpayne@68
|
341 public void splitReads(final ConcurrentReadInputStream cris, final ConcurrentReadOutputStream[] ros, final KmerComparator kc){
|
jpayne@68
|
342 Timer t=new Timer();
|
jpayne@68
|
343 if(verbose){t.start("Making hash threads.");}
|
jpayne@68
|
344 final int threads=Shared.threads();
|
jpayne@68
|
345 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
|
jpayne@68
|
346 for(int i=0; i<threads; i++){alht.add(new HashThread(i, cris, ros, kc));}
|
jpayne@68
|
347
|
jpayne@68
|
348 if(verbose){outstream.println("Starting threads.");}
|
jpayne@68
|
349 for(HashThread ht : alht){ht.start();}
|
jpayne@68
|
350
|
jpayne@68
|
351
|
jpayne@68
|
352 if(verbose){outstream.println("Waiting for threads.");}
|
jpayne@68
|
353 /* Wait for threads to die */
|
jpayne@68
|
354 for(HashThread ht : alht){
|
jpayne@68
|
355
|
jpayne@68
|
356 /* Wait for a thread to die */
|
jpayne@68
|
357 while(ht.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
358 try {
|
jpayne@68
|
359 ht.join();
|
jpayne@68
|
360 } catch (InterruptedException e) {
|
jpayne@68
|
361 e.printStackTrace();
|
jpayne@68
|
362 }
|
jpayne@68
|
363 }
|
jpayne@68
|
364 readsProcessed+=ht.readsProcessedT;
|
jpayne@68
|
365 basesProcessed+=ht.basesProcessedT;
|
jpayne@68
|
366 diskProcessed+=ht.diskProcessedT;
|
jpayne@68
|
367 memProcessed+=ht.memProcessedT;
|
jpayne@68
|
368 }
|
jpayne@68
|
369
|
jpayne@68
|
370 if(verbose){outstream.println("Closing streams.");}
|
jpayne@68
|
371 errorState=ReadWrite.closeStreams(cris, ros)|errorState;
|
jpayne@68
|
372 if(verbose){t.stop("Split time: ");}
|
jpayne@68
|
373 }
|
jpayne@68
|
374
|
jpayne@68
|
375 /*--------------------------------------------------------------*/
|
jpayne@68
|
376 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
377 /*--------------------------------------------------------------*/
|
jpayne@68
|
378
|
jpayne@68
|
379 /*--------------------------------------------------------------*/
|
jpayne@68
|
380 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
381 /*--------------------------------------------------------------*/
|
jpayne@68
|
382
|
jpayne@68
|
383 private class HashThread extends Thread{
|
jpayne@68
|
384
|
jpayne@68
|
385 HashThread(int id_, ConcurrentReadInputStream cris_, ConcurrentReadOutputStream[] ros_, KmerComparator kc_){
|
jpayne@68
|
386 id=id_;
|
jpayne@68
|
387 cris=cris_;
|
jpayne@68
|
388 ros=ros_;
|
jpayne@68
|
389 kc=kc_;
|
jpayne@68
|
390 }
|
jpayne@68
|
391
|
jpayne@68
|
392 @Override
|
jpayne@68
|
393 public void run(){
|
jpayne@68
|
394
|
jpayne@68
|
395 final boolean paired=cris.paired();
|
jpayne@68
|
396 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
397 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
398
|
jpayne@68
|
399 ArrayList<Read>[] array=new ArrayList[groups];
|
jpayne@68
|
400 for(int i=0; i<groups; i++){
|
jpayne@68
|
401 array[i]=new ArrayList<Read>(buffer);
|
jpayne@68
|
402 }
|
jpayne@68
|
403
|
jpayne@68
|
404 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
405
|
jpayne@68
|
406 for(Read r : reads){
|
jpayne@68
|
407 if(!r.validated()){
|
jpayne@68
|
408 r.validate(true);
|
jpayne@68
|
409 if(r.mate!=null){r.mate.validate(true);}
|
jpayne@68
|
410 }
|
jpayne@68
|
411 readsProcessedT+=1+r.mateCount();
|
jpayne@68
|
412 basesProcessedT+=r.length()+r.mateLength();
|
jpayne@68
|
413 diskProcessedT+=r.countFastqBytes()+r.countMateFastqBytes();
|
jpayne@68
|
414 memProcessedT+=r.countBytes()+r.countMateBytes()+ReadKey.overhead;
|
jpayne@68
|
415 if(shrinkName){
|
jpayne@68
|
416 Clumpify.shrinkName(r);
|
jpayne@68
|
417 Clumpify.shrinkName(r.mate);
|
jpayne@68
|
418 }else if(shortName){
|
jpayne@68
|
419 Clumpify.shortName(r);
|
jpayne@68
|
420 Clumpify.shortName(r.mate);
|
jpayne@68
|
421 }
|
jpayne@68
|
422
|
jpayne@68
|
423 if(quantizeQuality){
|
jpayne@68
|
424 Quantizer.quantize(r, r.mate);
|
jpayne@68
|
425 }
|
jpayne@68
|
426 }
|
jpayne@68
|
427
|
jpayne@68
|
428 if(ecco){
|
jpayne@68
|
429 for(Read r : reads){
|
jpayne@68
|
430 if(r.mate!=null){BBMerge.findOverlapStrict(r, r.mate, true);}
|
jpayne@68
|
431 }
|
jpayne@68
|
432 }
|
jpayne@68
|
433
|
jpayne@68
|
434 ArrayList<Read> hashList=reads;
|
jpayne@68
|
435 if(paired && unpair){
|
jpayne@68
|
436 hashList=new ArrayList<Read>(reads.size()*2);
|
jpayne@68
|
437 for(Read r1 : reads){
|
jpayne@68
|
438 Read r2=r1.mate;
|
jpayne@68
|
439 hashList.add(r1);
|
jpayne@68
|
440 hashList.add(r2);
|
jpayne@68
|
441 r1.mate=null;
|
jpayne@68
|
442 r2.mate=null;
|
jpayne@68
|
443 }
|
jpayne@68
|
444 }
|
jpayne@68
|
445
|
jpayne@68
|
446 kc.hash(hashList, table, minCount, true);
|
jpayne@68
|
447 for(Read r : hashList){
|
jpayne@68
|
448 long kmer=((ReadKey)r.obj).kmer;
|
jpayne@68
|
449 long code=kc.hash(kmer);
|
jpayne@68
|
450 int code2=(int)(code%groups);
|
jpayne@68
|
451 assert(code2>=0 && code2<array.length) : code2+", "+groups+", "+array.length+", "+kmer+", "+r.obj+"\n"+r;
|
jpayne@68
|
452 array[code2].add(r);
|
jpayne@68
|
453 if(array[code2].size()>=buffer){
|
jpayne@68
|
454 ros[code2].add(array[code2], 0);
|
jpayne@68
|
455 array[code2]=new ArrayList<Read>(buffer);
|
jpayne@68
|
456 }
|
jpayne@68
|
457 }
|
jpayne@68
|
458 cris.returnList(ln);
|
jpayne@68
|
459 ln=cris.nextList();
|
jpayne@68
|
460 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
461 }
|
jpayne@68
|
462 if(ln!=null){
|
jpayne@68
|
463 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
|
jpayne@68
|
464 }
|
jpayne@68
|
465 for(int i=0; i<groups; i++){
|
jpayne@68
|
466 if(!array[i].isEmpty()){
|
jpayne@68
|
467 ros[i].add(array[i], 0);
|
jpayne@68
|
468 }
|
jpayne@68
|
469 }
|
jpayne@68
|
470 }
|
jpayne@68
|
471
|
jpayne@68
|
472 final int id;
|
jpayne@68
|
473 final ConcurrentReadInputStream cris;
|
jpayne@68
|
474 final ConcurrentReadOutputStream[] ros;
|
jpayne@68
|
475 final KmerComparator kc;
|
jpayne@68
|
476 static final int buffer=200;
|
jpayne@68
|
477
|
jpayne@68
|
478 protected long readsProcessedT=0;
|
jpayne@68
|
479 protected long basesProcessedT=0;
|
jpayne@68
|
480 protected long diskProcessedT=0;
|
jpayne@68
|
481 protected long memProcessedT=0;
|
jpayne@68
|
482 }
|
jpayne@68
|
483
|
jpayne@68
|
484 /*--------------------------------------------------------------*/
|
jpayne@68
|
485 /*---------------- Fields ----------------*/
|
jpayne@68
|
486 /*--------------------------------------------------------------*/
|
jpayne@68
|
487
|
jpayne@68
|
488 private int k=31;
|
jpayne@68
|
489 int groups=16;
|
jpayne@68
|
490 int minCount=0;
|
jpayne@68
|
491
|
jpayne@68
|
492 KCountArray table=null;
|
jpayne@68
|
493
|
jpayne@68
|
494 /*--------------------------------------------------------------*/
|
jpayne@68
|
495 /*---------------- I/O Fields ----------------*/
|
jpayne@68
|
496 /*--------------------------------------------------------------*/
|
jpayne@68
|
497
|
jpayne@68
|
498 private String in1=null;
|
jpayne@68
|
499 private String in2=null;
|
jpayne@68
|
500
|
jpayne@68
|
501 private String out1=null;
|
jpayne@68
|
502 private String[] outArray=null;
|
jpayne@68
|
503
|
jpayne@68
|
504 private String extin=null;
|
jpayne@68
|
505 private String extout=null;
|
jpayne@68
|
506
|
jpayne@68
|
507 /*--------------------------------------------------------------*/
|
jpayne@68
|
508
|
jpayne@68
|
509 protected long readsProcessed=0;
|
jpayne@68
|
510 protected long basesProcessed=0;
|
jpayne@68
|
511 protected long diskProcessed=0;
|
jpayne@68
|
512 protected long memProcessed=0;
|
jpayne@68
|
513
|
jpayne@68
|
514 protected static long lastMemProcessed=0;
|
jpayne@68
|
515
|
jpayne@68
|
516 private long maxReads=-1;
|
jpayne@68
|
517 // private boolean addName=false;
|
jpayne@68
|
518 boolean shortName=false;
|
jpayne@68
|
519 boolean shrinkName=false;
|
jpayne@68
|
520 boolean ecco=false;
|
jpayne@68
|
521 boolean unpair=false;
|
jpayne@68
|
522
|
jpayne@68
|
523 static int maxZipLevel=2;
|
jpayne@68
|
524
|
jpayne@68
|
525 static boolean quantizeQuality=false;
|
jpayne@68
|
526
|
jpayne@68
|
527 /*--------------------------------------------------------------*/
|
jpayne@68
|
528 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
529 /*--------------------------------------------------------------*/
|
jpayne@68
|
530
|
jpayne@68
|
531 private final FileFormat ffin1;
|
jpayne@68
|
532 private final FileFormat ffin2;
|
jpayne@68
|
533
|
jpayne@68
|
534 private final FileFormat[] ffout;
|
jpayne@68
|
535
|
jpayne@68
|
536 /*--------------------------------------------------------------*/
|
jpayne@68
|
537 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
538 /*--------------------------------------------------------------*/
|
jpayne@68
|
539
|
jpayne@68
|
540 private PrintStream outstream=System.err;
|
jpayne@68
|
541 public static boolean verbose=false;
|
jpayne@68
|
542 public boolean errorState=false;
|
jpayne@68
|
543 private boolean overwrite=false;
|
jpayne@68
|
544 private boolean append=false;
|
jpayne@68
|
545
|
jpayne@68
|
546 }
|