jpayne@68
|
1 package clump;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6 import java.util.Random;
|
jpayne@68
|
7
|
jpayne@68
|
8 import fileIO.FileFormat;
|
jpayne@68
|
9 import fileIO.ReadWrite;
|
jpayne@68
|
10 import jgi.BBMerge;
|
jpayne@68
|
11 import shared.Parse;
|
jpayne@68
|
12 import shared.Parser;
|
jpayne@68
|
13 import shared.PreParser;
|
jpayne@68
|
14 import shared.Shared;
|
jpayne@68
|
15 import shared.Timer;
|
jpayne@68
|
16 import shared.Tools;
|
jpayne@68
|
17 import sort.SortByName;
|
jpayne@68
|
18 import stream.FASTQ;
|
jpayne@68
|
19 import stream.Read;
|
jpayne@68
|
20 import structures.ByteBuilder;
|
jpayne@68
|
21 import structures.Quantizer;
|
jpayne@68
|
22
|
jpayne@68
|
23 /**
|
jpayne@68
|
24 * @author Brian Bushnell
|
jpayne@68
|
25 * @date Nov 6, 2015
|
jpayne@68
|
26 *
|
jpayne@68
|
27 */
|
jpayne@68
|
28 public class Clumpify {
|
jpayne@68
|
29
|
jpayne@68
|
30 /**
|
jpayne@68
|
31 * Code entrance from the command line.
|
jpayne@68
|
32 * @param args Command line arguments
|
jpayne@68
|
33 */
|
jpayne@68
|
34 public static void main(String[] args){
|
jpayne@68
|
35 Timer t=new Timer();
|
jpayne@68
|
36 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
|
jpayne@68
|
37
|
jpayne@68
|
38 //Capture values of static variables that might be modified in case this is called by another class.
|
jpayne@68
|
39 final boolean oldCQ=Read.CHANGE_QUALITY;
|
jpayne@68
|
40 final boolean oldBgzip=ReadWrite.USE_BGZIP, oldPreferBgzip=ReadWrite.PREFER_BGZIP;
|
jpayne@68
|
41
|
jpayne@68
|
42 BBMerge.changeQuality=Read.CHANGE_QUALITY=false;
|
jpayne@68
|
43 ReadWrite.USE_BGZIP=true;
|
jpayne@68
|
44 ReadWrite.PREFER_BGZIP=true;
|
jpayne@68
|
45
|
jpayne@68
|
46 Clumpify x=new Clumpify(args);
|
jpayne@68
|
47 x.process(t);
|
jpayne@68
|
48
|
jpayne@68
|
49 //Restore values of static variables.
|
jpayne@68
|
50 // Shared.setBuffers(oldCap);
|
jpayne@68
|
51 // ReadWrite.ZIPLEVEL=oldZl;
|
jpayne@68
|
52 // ReadWrite.USE_PIGZ=oldPigz;
|
jpayne@68
|
53 ReadWrite.USE_BGZIP=oldBgzip;
|
jpayne@68
|
54 ReadWrite.PREFER_BGZIP=oldPreferBgzip;
|
jpayne@68
|
55 // ReadWrite.USE_UNPIGZ=oldUnpigz;
|
jpayne@68
|
56 // ReadWrite.MAX_ZIP_THREADS=oldZipThreads;
|
jpayne@68
|
57 BBMerge.changeQuality=Read.CHANGE_QUALITY=oldCQ;
|
jpayne@68
|
58
|
jpayne@68
|
59 //Close the print stream if it was redirected
|
jpayne@68
|
60 Shared.closeStream(x.outstream);
|
jpayne@68
|
61 }
|
jpayne@68
|
62
|
jpayne@68
|
63 /**
|
jpayne@68
|
64 * Constructor.
|
jpayne@68
|
65 * @param args Command line arguments
|
jpayne@68
|
66 */
|
jpayne@68
|
67 public Clumpify(String[] args){
|
jpayne@68
|
68
|
jpayne@68
|
69 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
70 PreParser pp=new PreParser(args, getClass(), true);
|
jpayne@68
|
71 args=pp.args;
|
jpayne@68
|
72 outstream=pp.outstream;
|
jpayne@68
|
73 }
|
jpayne@68
|
74
|
jpayne@68
|
75 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
|
jpayne@68
|
76
|
jpayne@68
|
77 args2=new ArrayList<String>();
|
jpayne@68
|
78 args2.add("in1");
|
jpayne@68
|
79 args2.add("in2");
|
jpayne@68
|
80 args2.add("out1");
|
jpayne@68
|
81 args2.add("out2");
|
jpayne@68
|
82 args2.add("groups");
|
jpayne@68
|
83 args2.add("ecco=f");
|
jpayne@68
|
84 args2.add("rename=f");
|
jpayne@68
|
85 args2.add("shortname=f");
|
jpayne@68
|
86 args2.add("unpair=f");
|
jpayne@68
|
87 args2.add("repair=f");
|
jpayne@68
|
88 args2.add("namesort=f");
|
jpayne@68
|
89 args2.add("overwrite=t");
|
jpayne@68
|
90
|
jpayne@68
|
91 String gString="auto";
|
jpayne@68
|
92 for(int i=0; i<args.length; i++){
|
jpayne@68
|
93 String arg=args[i];
|
jpayne@68
|
94 String[] split=arg.split("=");
|
jpayne@68
|
95 String a=split[0].toLowerCase();
|
jpayne@68
|
96 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
97
|
jpayne@68
|
98 if(a.equals("in") || a.equals("in1")){
|
jpayne@68
|
99 in1=b;
|
jpayne@68
|
100 }else if(a.equals("in2")){
|
jpayne@68
|
101 in2=b;
|
jpayne@68
|
102 }else if(a.equals("out") || a.equals("out1")){
|
jpayne@68
|
103 out1=b;
|
jpayne@68
|
104 }else if(a.equals("out2")){
|
jpayne@68
|
105 out2=b;
|
jpayne@68
|
106 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){
|
jpayne@68
|
107 gString=b;
|
jpayne@68
|
108 }else if(a.equals("delete") || a.equals("deletetemp")){
|
jpayne@68
|
109 delete=Parse.parseBoolean(b);
|
jpayne@68
|
110 }else if(a.equals("deleteinput")){
|
jpayne@68
|
111 deleteInput=Parse.parseBoolean(b);
|
jpayne@68
|
112 }else if(a.equals("usetmpdir")){
|
jpayne@68
|
113 useTmpdir=Parse.parseBoolean(b);
|
jpayne@68
|
114 }else if(a.equals("ecco")){
|
jpayne@68
|
115 ecco=Parse.parseBoolean(b);
|
jpayne@68
|
116 }else if(a.equals("compresstemp") || a.equals("ct")){
|
jpayne@68
|
117 if(b!=null && b.equalsIgnoreCase("auto")){forceCompressTemp=forceRawTemp=false;}
|
jpayne@68
|
118 else{
|
jpayne@68
|
119 forceCompressTemp=Parse.parseBoolean(b);
|
jpayne@68
|
120 forceRawTemp=!forceCompressTemp;
|
jpayne@68
|
121 }
|
jpayne@68
|
122 }else if(a.equals("tmpdir")){
|
jpayne@68
|
123 Shared.setTmpdir(b);
|
jpayne@68
|
124 }else if(a.equals("rename") || a.equals("addname")){
|
jpayne@68
|
125 addName=Parse.parseBoolean(b);
|
jpayne@68
|
126 }else if(a.equals("shortname") || a.equals("shortnames")){
|
jpayne@68
|
127 shortName=b;
|
jpayne@68
|
128 }else if(a.equals("seed")){
|
jpayne@68
|
129 KmerComparator.defaultSeed=Long.parseLong(b);
|
jpayne@68
|
130 }else if(a.equals("hashes")){
|
jpayne@68
|
131 KmerComparator.setHashes(Integer.parseInt(b));
|
jpayne@68
|
132 }else if(a.equals("passes")){
|
jpayne@68
|
133 passes=Integer.parseInt(b);
|
jpayne@68
|
134 args2.add(arg);
|
jpayne@68
|
135 // }else if(a.equals("k")){
|
jpayne@68
|
136 // k=Integer.parseInt(b);
|
jpayne@68
|
137 // args2.add(arg);
|
jpayne@68
|
138 }else if(a.equals("border")){
|
jpayne@68
|
139 KmerComparator.defaultBorder=Integer.parseInt(b);
|
jpayne@68
|
140 }
|
jpayne@68
|
141
|
jpayne@68
|
142 else if(a.equals("unpair")){
|
jpayne@68
|
143 unpair=Parse.parseBoolean(b);
|
jpayne@68
|
144 }else if(a.equals("repair")){
|
jpayne@68
|
145 repair=Parse.parseBoolean(b);
|
jpayne@68
|
146 }else if(a.equals("namesort") || a.equals("sort")){
|
jpayne@68
|
147 namesort=Parse.parseBoolean(b);
|
jpayne@68
|
148 }else if(a.equals("overwrite")){
|
jpayne@68
|
149 overwrite=Parse.parseBoolean(b);
|
jpayne@68
|
150 }else if(a.equals("v1") || a.equals("kmersort1")){
|
jpayne@68
|
151 boolean x=Parse.parseBoolean(b);
|
jpayne@68
|
152 if(x){V2=V3=false;}
|
jpayne@68
|
153 }else if(a.equals("v2") || a.equals("kmersort2")){
|
jpayne@68
|
154 V2=Parse.parseBoolean(b);
|
jpayne@68
|
155 if(V2){V3=false;}
|
jpayne@68
|
156 }else if(a.equals("v3") || a.equals("kmersort3")){
|
jpayne@68
|
157 V3=Parse.parseBoolean(b);
|
jpayne@68
|
158 if(V3){V2=false;}
|
jpayne@68
|
159 }else if(a.equals("fetchthreads")){
|
jpayne@68
|
160 KmerSort3.fetchThreads=Integer.parseInt(b);
|
jpayne@68
|
161 assert(KmerSort3.fetchThreads>0) : KmerSort3.fetchThreads+"\nFetch threads must be at least 1.";
|
jpayne@68
|
162 }
|
jpayne@68
|
163
|
jpayne@68
|
164 else if(a.equals("comparesequence")){
|
jpayne@68
|
165 KmerComparator.compareSequence=Parse.parseBoolean(b);
|
jpayne@68
|
166 }else if(a.equals("allowadjacenttiles") || a.equals("spantiles")){
|
jpayne@68
|
167 ReadKey.spanTilesX=ReadKey.spanTilesY=Parse.parseBoolean(b);
|
jpayne@68
|
168 }else if(a.equals("spanx") || a.equals("spantilesx")){
|
jpayne@68
|
169 ReadKey.spanTilesX=Parse.parseBoolean(b);
|
jpayne@68
|
170 }else if(a.equals("spany") || a.equals("spantilesy")){
|
jpayne@68
|
171 ReadKey.spanTilesY=Parse.parseBoolean(b);
|
jpayne@68
|
172 }else if(a.equals("spanadjacent") || a.equals("spanadjacentonly") || a.equals("adjacentonly") || a.equals("adjacent")){
|
jpayne@68
|
173 ReadKey.spanAdjacentOnly=Parse.parseBoolean(b);
|
jpayne@68
|
174 }
|
jpayne@68
|
175
|
jpayne@68
|
176 // else if(a.equals("repair")){
|
jpayne@68
|
177 // repair=Parse.parseBoolean(b);
|
jpayne@68
|
178 // }else if(a.equals("namesort") || a.equals("sort")){
|
jpayne@68
|
179 // namesort=Parse.parseBoolean(b);
|
jpayne@68
|
180 // }
|
jpayne@68
|
181
|
jpayne@68
|
182 else if(a.equals("interleaved") || a.equals("int")){
|
jpayne@68
|
183 if("auto".equalsIgnoreCase(b)){FASTQ.FORCE_INTERLEAVED=!(FASTQ.TEST_INTERLEAVED=true);}
|
jpayne@68
|
184 else{
|
jpayne@68
|
185 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=Parse.parseBoolean(b);
|
jpayne@68
|
186 System.err.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
|
jpayne@68
|
187 }
|
jpayne@68
|
188 }else if(a.equals("cq") || a.equals("changequality")){
|
jpayne@68
|
189 BBMerge.changeQuality=Read.CHANGE_QUALITY=Parse.parseBoolean(b);
|
jpayne@68
|
190 }else if(a.equals("quantize") || a.equals("quantizesticky")){
|
jpayne@68
|
191 quantizeQuality=Quantizer.parse(arg, a, b);
|
jpayne@68
|
192 }else if(a.equals("lowcomplexity")){
|
jpayne@68
|
193 lowComplexity=Parse.parseBoolean(b);
|
jpayne@68
|
194 }
|
jpayne@68
|
195
|
jpayne@68
|
196 else if(Clump.parseStatic(arg, a, b)){
|
jpayne@68
|
197 //Do nothing
|
jpayne@68
|
198 }else if(Parser.parseQuality(arg, a, b)){
|
jpayne@68
|
199 //Do nothing
|
jpayne@68
|
200 }
|
jpayne@68
|
201
|
jpayne@68
|
202 else{
|
jpayne@68
|
203 args2.add(arg);
|
jpayne@68
|
204 }
|
jpayne@68
|
205 }
|
jpayne@68
|
206
|
jpayne@68
|
207 Clump.setXY();
|
jpayne@68
|
208
|
jpayne@68
|
209 KmerSplit.quantizeQuality=KmerSort1.quantizeQuality=quantizeQuality;
|
jpayne@68
|
210
|
jpayne@68
|
211 Parser.processQuality();
|
jpayne@68
|
212
|
jpayne@68
|
213 assert(!unpair || !KmerComparator.mergeFirst) : "Unpair and mergefirst may not be used together.";
|
jpayne@68
|
214
|
jpayne@68
|
215 if(in1==null){throw new RuntimeException("\nOne input file is required.\n");}
|
jpayne@68
|
216
|
jpayne@68
|
217 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
|
jpayne@68
|
218 in2=in1.replace("#", "2");
|
jpayne@68
|
219 in1=in1.replace("#", "1");
|
jpayne@68
|
220 }
|
jpayne@68
|
221 if(out1!=null && out2==null && out1.indexOf('#')>-1){
|
jpayne@68
|
222 out2=out1.replace("#", "2");
|
jpayne@68
|
223 out1=out1.replace("#", "1");
|
jpayne@68
|
224 }
|
jpayne@68
|
225
|
jpayne@68
|
226 //Ensure input files can be read
|
jpayne@68
|
227 if(!Tools.testInputFiles(false, true, in1)){
|
jpayne@68
|
228 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
229 }
|
jpayne@68
|
230
|
jpayne@68
|
231 // assert(false) : ReadKey.spanTiles()+", "+ReadKey.spanTilesX+", "+ReadKey.spanTilesY+", "+Clump.sortX+", "+Clump.sortY;
|
jpayne@68
|
232
|
jpayne@68
|
233 autoSetGroups(gString);
|
jpayne@68
|
234
|
jpayne@68
|
235 if((in2!=null || out2!=null) && groups>1){FASTQ.FORCE_INTERLEAVED=true;} //Fix for crash with twin fasta files
|
jpayne@68
|
236 }
|
jpayne@68
|
237
|
jpayne@68
|
238
|
jpayne@68
|
239 /*--------------------------------------------------------------*/
|
jpayne@68
|
240 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
241 /*--------------------------------------------------------------*/
|
jpayne@68
|
242
|
jpayne@68
|
243 /** Create read streams and process all data */
|
jpayne@68
|
244 public void process(Timer t){
|
jpayne@68
|
245 String[] args=args2.toArray(new String[0]);
|
jpayne@68
|
246 args[4]="groups="+groups;
|
jpayne@68
|
247
|
jpayne@68
|
248 useSharedHeader=(FileFormat.hasSamOrBamExtension(in1) && out1!=null
|
jpayne@68
|
249 && FileFormat.hasSamOrBamExtension(out1));
|
jpayne@68
|
250
|
jpayne@68
|
251 if(groups==1){
|
jpayne@68
|
252 args[0]="in1="+in1;
|
jpayne@68
|
253 args[1]="in2="+in2;
|
jpayne@68
|
254 args[2]="out1="+out1;
|
jpayne@68
|
255 args[3]="out2="+out2;
|
jpayne@68
|
256 args[5]="ecco="+ecco;
|
jpayne@68
|
257 args[6]="rename="+addName;
|
jpayne@68
|
258 args[7]="shortname="+shortName;
|
jpayne@68
|
259 args[8]="unpair="+unpair;
|
jpayne@68
|
260 args[9]="repair="+repair;
|
jpayne@68
|
261 args[10]="namesort="+namesort;
|
jpayne@68
|
262 args[11]="ow="+overwrite;
|
jpayne@68
|
263 KmerSort1.main(args);
|
jpayne@68
|
264 }else{
|
jpayne@68
|
265 String pin1=in1, pin2=in2, temp;
|
jpayne@68
|
266 final int conservativePasses=Clump.conservativeFlag ? passes : Tools.max(1, passes/2);
|
jpayne@68
|
267 if(passes>1){Clump.setConservative(true);}
|
jpayne@68
|
268 long fileMem=-1;
|
jpayne@68
|
269 for(int pass=1; pass<=passes; pass++){
|
jpayne@68
|
270 if(/*passes>1 &&*/ (V2 || V3)){
|
jpayne@68
|
271 // System.err.println("Running pass with fileMem="+fileMem);
|
jpayne@68
|
272 // out=(pass==passes ? out1 : getTempFname("clumpify_p"+(pass+1)+"_temp%_"));
|
jpayne@68
|
273 temp=getTempFname("clumpify_p"+(pass+1)+"_temp%_");
|
jpayne@68
|
274 if(pass==passes){
|
jpayne@68
|
275 fileMem=runOnePass_v2(args, pass, pin1, pin2, out1, out2, fileMem);
|
jpayne@68
|
276 }else{
|
jpayne@68
|
277 fileMem=runOnePass_v2(args, pass, pin1, pin2, temp, null, fileMem);
|
jpayne@68
|
278 }
|
jpayne@68
|
279 // System.err.println("New fileMem="+fileMem);
|
jpayne@68
|
280 }else{
|
jpayne@68
|
281 // out=(pass==passes ? out1 : getTempFname("clumpify_temp_pass"+pass+"_"));
|
jpayne@68
|
282 temp=getTempFname("clumpify_temp_pass"+pass+"_");
|
jpayne@68
|
283 if(pass==passes){
|
jpayne@68
|
284 runOnePass(args, pass, pin1, pin2, out1, out2);
|
jpayne@68
|
285 }else{
|
jpayne@68
|
286 runOnePass(args, pass, pin1, pin2, temp, null);
|
jpayne@68
|
287 }
|
jpayne@68
|
288 }
|
jpayne@68
|
289 pin1=temp;
|
jpayne@68
|
290 pin2=null;
|
jpayne@68
|
291 KmerComparator.defaultBorder=Tools.max(0, KmerComparator.defaultBorder-1);
|
jpayne@68
|
292 KmerComparator.defaultSeed++;
|
jpayne@68
|
293 if(pass>=conservativePasses){Clump.setConservative(false);}
|
jpayne@68
|
294 }
|
jpayne@68
|
295 }
|
jpayne@68
|
296
|
jpayne@68
|
297 if(deleteInput && !sharedErrorState && out1!=null && in1!=null){
|
jpayne@68
|
298 try {
|
jpayne@68
|
299 new File(in1).delete();
|
jpayne@68
|
300 if(in2!=null){new File(in2).delete();}
|
jpayne@68
|
301 } catch (Exception e) {
|
jpayne@68
|
302 System.err.println("WARNING: Failed to delete input files.");
|
jpayne@68
|
303 }
|
jpayne@68
|
304 }
|
jpayne@68
|
305
|
jpayne@68
|
306 t.stop();
|
jpayne@68
|
307 System.err.println("Total time: \t"+t);
|
jpayne@68
|
308
|
jpayne@68
|
309 }
|
jpayne@68
|
310
|
jpayne@68
|
311 private void runOnePass(String[] args, int pass, String in1, String in2, String out1, String out2){
|
jpayne@68
|
312 assert(groups>1);
|
jpayne@68
|
313 if(pass>1){
|
jpayne@68
|
314 ecco=false;
|
jpayne@68
|
315 shortName="f";
|
jpayne@68
|
316 addName=false;
|
jpayne@68
|
317 }
|
jpayne@68
|
318
|
jpayne@68
|
319 String temp=getTempFname("clumpify_p"+pass+"_temp%_");
|
jpayne@68
|
320
|
jpayne@68
|
321 String temp2=temp.replace("%", "FINAL");
|
jpayne@68
|
322 final boolean externalSort=(pass==passes && (repair || namesort));
|
jpayne@68
|
323
|
jpayne@68
|
324 args[0]="in1="+in1;
|
jpayne@68
|
325 args[1]="in2="+in2;
|
jpayne@68
|
326 args[2]="out="+temp;
|
jpayne@68
|
327 args[3]="out2="+null;
|
jpayne@68
|
328 args[5]="ecco="+ecco;
|
jpayne@68
|
329 args[6]="addname=f";
|
jpayne@68
|
330 args[7]="shortname="+shortName;
|
jpayne@68
|
331 args[8]="unpair="+unpair;
|
jpayne@68
|
332 args[9]="repair=f";
|
jpayne@68
|
333 args[10]="namesort=f";
|
jpayne@68
|
334 args[11]="ow="+overwrite;
|
jpayne@68
|
335 KmerSplit.maxZipLevel=2;
|
jpayne@68
|
336 KmerSplit.main(args);
|
jpayne@68
|
337
|
jpayne@68
|
338 FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=false;
|
jpayne@68
|
339 FASTQ.ASCII_OFFSET=FASTQ.ASCII_OFFSET_OUT;
|
jpayne@68
|
340
|
jpayne@68
|
341 args[0]="in="+temp;
|
jpayne@68
|
342 args[1]="in2="+null;
|
jpayne@68
|
343 args[2]="out="+(externalSort ? temp2 : out1);
|
jpayne@68
|
344 args[3]="out2="+(externalSort ? "null" : out2);
|
jpayne@68
|
345 args[5]="ecco=f";
|
jpayne@68
|
346 args[6]="addname="+addName;
|
jpayne@68
|
347 args[7]="shortname=f";
|
jpayne@68
|
348 args[8]="unpair=f";
|
jpayne@68
|
349 args[9]="repair="+(repair && externalSort);
|
jpayne@68
|
350 args[10]="namesort="+(namesort && externalSort);
|
jpayne@68
|
351 args[11]="ow="+overwrite;
|
jpayne@68
|
352 if(unpair){
|
jpayne@68
|
353 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
|
jpayne@68
|
354 }
|
jpayne@68
|
355 KmerSort1.main(args);
|
jpayne@68
|
356
|
jpayne@68
|
357 if(delete){
|
jpayne@68
|
358 for(int i=0; i<groups; i++){
|
jpayne@68
|
359 new File(temp.replaceFirst("%", ""+i)).delete();
|
jpayne@68
|
360 }
|
jpayne@68
|
361 if(pass>1){
|
jpayne@68
|
362 assert(in2==null);
|
jpayne@68
|
363 new File(in1).delete();
|
jpayne@68
|
364 }
|
jpayne@68
|
365 }
|
jpayne@68
|
366
|
jpayne@68
|
367 if(externalSort){
|
jpayne@68
|
368 outstream.println();
|
jpayne@68
|
369 String[] sortArgs=new String[] {"in="+temp2, "out="+out1, "ow="+overwrite};
|
jpayne@68
|
370 if(out2!=null){sortArgs=new String[] {"in="+temp2, "out="+out1, "out2="+out2, "ow="+overwrite};}
|
jpayne@68
|
371 SortByName.main(sortArgs);
|
jpayne@68
|
372 if(delete){new File(temp2).delete();}
|
jpayne@68
|
373 }
|
jpayne@68
|
374 }
|
jpayne@68
|
375
|
jpayne@68
|
376 private long runOnePass_v2(String[] args, int pass, String in1, String in2, String out1, String out2, long fileMem){
|
jpayne@68
|
377 assert(groups>1);
|
jpayne@68
|
378 if(pass>1){
|
jpayne@68
|
379 ecco=false;
|
jpayne@68
|
380 shortName="f";
|
jpayne@68
|
381 addName=false;
|
jpayne@68
|
382 }
|
jpayne@68
|
383
|
jpayne@68
|
384 String temp=getTempFname("clumpify_p"+pass+"_temp%_");
|
jpayne@68
|
385
|
jpayne@68
|
386 // String temp2=temp.replace("%", "FINAL");
|
jpayne@68
|
387 String namesorted=temp.replace("%", "namesorted_%");
|
jpayne@68
|
388 final boolean externalSort=(pass==passes && (repair || namesort));
|
jpayne@68
|
389
|
jpayne@68
|
390 if(pass==1){
|
jpayne@68
|
391 args[0]="in1="+in1;
|
jpayne@68
|
392 args[1]="in2="+in2;
|
jpayne@68
|
393 args[2]="out="+temp;
|
jpayne@68
|
394 args[3]="out2="+null;
|
jpayne@68
|
395 args[5]="ecco="+ecco;
|
jpayne@68
|
396 args[6]="addname=f";
|
jpayne@68
|
397 args[7]="shortname="+shortName;
|
jpayne@68
|
398 args[8]="unpair="+unpair;
|
jpayne@68
|
399 args[9]="repair=f";
|
jpayne@68
|
400 args[10]="namesort=f";
|
jpayne@68
|
401 args[11]="ow="+overwrite;
|
jpayne@68
|
402 KmerSplit.maxZipLevel=2;
|
jpayne@68
|
403 KmerSplit.main(args);
|
jpayne@68
|
404 fileMem=KmerSplit.lastMemProcessed;
|
jpayne@68
|
405
|
jpayne@68
|
406 FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=false;
|
jpayne@68
|
407 FASTQ.ASCII_OFFSET=FASTQ.ASCII_OFFSET_OUT;
|
jpayne@68
|
408 }
|
jpayne@68
|
409
|
jpayne@68
|
410 args[0]="in1="+(pass==1 ? temp : in1);
|
jpayne@68
|
411 args[1]="in2="+null;
|
jpayne@68
|
412 args[2]="out="+(externalSort ? namesorted : out1);
|
jpayne@68
|
413 args[3]="out2="+(externalSort ? "null" : out2);
|
jpayne@68
|
414 args[5]="ecco=f";
|
jpayne@68
|
415 args[6]="addname="+addName;
|
jpayne@68
|
416 args[7]="shortname=f";
|
jpayne@68
|
417 args[8]="unpair=f";
|
jpayne@68
|
418 args[9]="repair="+(repair && externalSort);
|
jpayne@68
|
419 args[10]="namesort="+(namesort && externalSort);
|
jpayne@68
|
420 args[11]="ow="+overwrite;
|
jpayne@68
|
421 if(unpair){
|
jpayne@68
|
422 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
|
jpayne@68
|
423 }
|
jpayne@68
|
424 if(externalSort){
|
jpayne@68
|
425 KmerSort.doHashAndSplit=false;
|
jpayne@68
|
426 }
|
jpayne@68
|
427 if(V3){
|
jpayne@68
|
428 KmerSort3.main(fileMem, pass, passes, args);
|
jpayne@68
|
429 if(fileMem<1){fileMem=KmerSort3.lastMemProcessed;}
|
jpayne@68
|
430 }else{KmerSort2.main(args);}
|
jpayne@68
|
431
|
jpayne@68
|
432 if(delete){
|
jpayne@68
|
433 for(int i=0; i<groups; i++){
|
jpayne@68
|
434 new File((pass==1 ? temp : in1).replaceFirst("%", ""+i)).delete();
|
jpayne@68
|
435 }
|
jpayne@68
|
436 }
|
jpayne@68
|
437
|
jpayne@68
|
438 if(externalSort){
|
jpayne@68
|
439 outstream.println();
|
jpayne@68
|
440
|
jpayne@68
|
441 ArrayList<String> names=new ArrayList<String>();
|
jpayne@68
|
442 for(int i=0; i<groups; i++){
|
jpayne@68
|
443 names.add(namesorted.replaceFirst("%", ""+i));
|
jpayne@68
|
444 }
|
jpayne@68
|
445 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
446
|
jpayne@68
|
447 ReadWrite.USE_PIGZ=true;
|
jpayne@68
|
448 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
|
jpayne@68
|
449 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
|
jpayne@68
|
450 FileFormat dest=FileFormat.testOutput(out1, FileFormat.FASTQ, null, true, overwrite, false, false);
|
jpayne@68
|
451 FileFormat dest2=FileFormat.testOutput(out2, FileFormat.FASTQ, null, true, overwrite, false, false);
|
jpayne@68
|
452 SortByName.mergeAndDump(names, /*null, */dest, dest2, delete, useSharedHeader, false, outstream, 1000);
|
jpayne@68
|
453 }
|
jpayne@68
|
454
|
jpayne@68
|
455 // if(externalSort){
|
jpayne@68
|
456 // outstream.println();
|
jpayne@68
|
457 // SortByName.main(new String[] {"in="+temp2, "out="+out, "ow="+overwrite});
|
jpayne@68
|
458 // if(delete){new File(temp2).delete();}
|
jpayne@68
|
459 // }
|
jpayne@68
|
460 return fileMem;
|
jpayne@68
|
461 }
|
jpayne@68
|
462
|
jpayne@68
|
463 /*--------------------------------------------------------------*/
|
jpayne@68
|
464 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
465 /*--------------------------------------------------------------*/
|
jpayne@68
|
466
|
jpayne@68
|
467 private void autoSetGroups(String s) {
|
jpayne@68
|
468 if(s==null || s.equalsIgnoreCase("null")){return;}
|
jpayne@68
|
469 if(Tools.isDigit(s.charAt(0))){
|
jpayne@68
|
470 groups=Integer.parseInt(s);
|
jpayne@68
|
471 return;
|
jpayne@68
|
472 }
|
jpayne@68
|
473 assert(s.equalsIgnoreCase("auto")) : "Unknown groups setting: "+s;
|
jpayne@68
|
474
|
jpayne@68
|
475 final long maxMem=Shared.memAvailable(1);
|
jpayne@68
|
476 FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, false, false);
|
jpayne@68
|
477 if(ff1==null || ff1.stdio()){return;}
|
jpayne@68
|
478
|
jpayne@68
|
479 // outstream.println("in1="+in1+", overhead="+(0.5*(ReadKey.overhead+Clump.overhead)));
|
jpayne@68
|
480
|
jpayne@68
|
481 double[] estimates=Tools.estimateFileMemory(in1, 1000, 0.5*(ReadKey.overhead+Clump.overhead), true, lowComplexity);
|
jpayne@68
|
482 if(in2!=null){
|
jpayne@68
|
483 double[] estimates2=Tools.estimateFileMemory(in2, 1000, 0.5*(ReadKey.overhead+Clump.overhead), true, lowComplexity);
|
jpayne@68
|
484 estimates[0]+=estimates2[0];
|
jpayne@68
|
485 estimates[1]+=estimates2[1];
|
jpayne@68
|
486 estimates[4]+=estimates2[4];
|
jpayne@68
|
487 }
|
jpayne@68
|
488
|
jpayne@68
|
489 // outstream.println(Arrays.toString(estimates));
|
jpayne@68
|
490
|
jpayne@68
|
491 double memEstimate=estimates==null ? 0 : estimates[0];
|
jpayne@68
|
492 double diskEstimate=estimates==null ? 0 : estimates[1];
|
jpayne@68
|
493 double readEstimate=estimates==null ? 0 : estimates[4];
|
jpayne@68
|
494 double worstCase=memEstimate*1.5;
|
jpayne@68
|
495
|
jpayne@68
|
496 // outstream.println("Raw Disk Size Estimate: "+(long)(diskEstimate/(1024*1024))+" MB");
|
jpayne@68
|
497 outstream.println("Read Estimate: "+(long)(readEstimate));
|
jpayne@68
|
498 outstream.println("Memory Estimate: "+(long)(memEstimate/(1024*1024))+" MB");
|
jpayne@68
|
499 outstream.println("Memory Available: "+(maxMem/(1024*1024))+" MB");
|
jpayne@68
|
500
|
jpayne@68
|
501 if(maxMem>worstCase && readEstimate<Integer.MAX_VALUE){
|
jpayne@68
|
502 groups=1;
|
jpayne@68
|
503 }else{
|
jpayne@68
|
504 groups=Tools.max(11, (int)(3+(3*worstCase/maxMem)*(V3 ? KmerSort3.fetchThreads : 2)), (int)((2*readEstimate)/Integer.MAX_VALUE))|1;
|
jpayne@68
|
505 }
|
jpayne@68
|
506 outstream.println("Set groups to "+groups);
|
jpayne@68
|
507 }
|
jpayne@68
|
508
|
jpayne@68
|
509 private String getTempFname(String core){
|
jpayne@68
|
510 // outstream.println(core);
|
jpayne@68
|
511 String temp;
|
jpayne@68
|
512 String path="", extension=".fq";
|
jpayne@68
|
513 if(out1!=null){
|
jpayne@68
|
514 core=ReadWrite.stripToCore(out1)+"_"+core;
|
jpayne@68
|
515 path=ReadWrite.getPath(out1);
|
jpayne@68
|
516 extension=ReadWrite.getExtension(out1);
|
jpayne@68
|
517 }
|
jpayne@68
|
518
|
jpayne@68
|
519 if(useTmpdir && Shared.tmpdir()!=null){
|
jpayne@68
|
520 temp=Shared.tmpdir()+core+Long.toHexString((randy.nextLong()&Long.MAX_VALUE))+extension;
|
jpayne@68
|
521 }else{
|
jpayne@68
|
522 temp=path+core+Long.toHexString((randy.nextLong()&Long.MAX_VALUE))+extension;
|
jpayne@68
|
523 }
|
jpayne@68
|
524 // assert(false) : path+", "+temp+", "+core+", "+out1;
|
jpayne@68
|
525
|
jpayne@68
|
526 String comp=ReadWrite.compressionType(temp);
|
jpayne@68
|
527 if(comp!=null){comp=".gz";} //Prevent bz2 temp files which cause a crash
|
jpayne@68
|
528
|
jpayne@68
|
529 if(forceCompressTemp && comp==null){
|
jpayne@68
|
530 temp+=".gz";
|
jpayne@68
|
531 }else if(comp!=null && forceRawTemp){
|
jpayne@68
|
532 temp=temp.substring(0, temp.lastIndexOf('.'));
|
jpayne@68
|
533 }
|
jpayne@68
|
534 if(temp.endsWith(".bz2")){temp=temp.substring(0, temp.length()-4);} //Prevent bz2 temp files which cause a crash
|
jpayne@68
|
535
|
jpayne@68
|
536 // outstream.println(temp);
|
jpayne@68
|
537 return temp;
|
jpayne@68
|
538 }
|
jpayne@68
|
539
|
jpayne@68
|
540 public static void shrinkName(Read r) {
|
jpayne@68
|
541 if(r==null){return;}
|
jpayne@68
|
542 String s=r.id;
|
jpayne@68
|
543 if(s.contains("HISEQ")){s=s.replace("HISEQ", "H");}
|
jpayne@68
|
544 if(s.contains("MISEQ")){
|
jpayne@68
|
545 s=s.replace("MISEQ", "M");
|
jpayne@68
|
546 }
|
jpayne@68
|
547 if(s.contains(":000000000-")){
|
jpayne@68
|
548 s=s.replace(":000000000-", ":");
|
jpayne@68
|
549 }
|
jpayne@68
|
550 r.id=s;
|
jpayne@68
|
551 }
|
jpayne@68
|
552
|
jpayne@68
|
553 public static void shortName(Read r) {
|
jpayne@68
|
554 ByteBuilder sb=new ByteBuilder(14);
|
jpayne@68
|
555 long x=r.numericID|1;
|
jpayne@68
|
556
|
jpayne@68
|
557 while(x<1000000000L){
|
jpayne@68
|
558 x*=10;
|
jpayne@68
|
559 sb.append('0');
|
jpayne@68
|
560 }
|
jpayne@68
|
561 sb.append(r.numericID);
|
jpayne@68
|
562
|
jpayne@68
|
563 // while(x<0x10000000L){
|
jpayne@68
|
564 // x*=16;
|
jpayne@68
|
565 // sb.append('0');
|
jpayne@68
|
566 // }
|
jpayne@68
|
567 // sb.append(Long.toHexString(r.numericID));
|
jpayne@68
|
568
|
jpayne@68
|
569 sb.append(r.pairnum()==0 ? " 1:" : " 2:");
|
jpayne@68
|
570 r.id=sb.toString();
|
jpayne@68
|
571 }
|
jpayne@68
|
572
|
jpayne@68
|
573 /*--------------------------------------------------------------*/
|
jpayne@68
|
574 /*---------------- Fields ----------------*/
|
jpayne@68
|
575 /*--------------------------------------------------------------*/
|
jpayne@68
|
576
|
jpayne@68
|
577 private boolean lowComplexity=false;
|
jpayne@68
|
578
|
jpayne@68
|
579 private boolean quantizeQuality=false;
|
jpayne@68
|
580 private Random randy=new Random();
|
jpayne@68
|
581 private int groups=31;
|
jpayne@68
|
582 private int passes=1;
|
jpayne@68
|
583 private boolean ecco=false;
|
jpayne@68
|
584 private boolean addName=false;
|
jpayne@68
|
585 private String shortName="f";
|
jpayne@68
|
586 private boolean useTmpdir=false;
|
jpayne@68
|
587 private boolean delete=true;
|
jpayne@68
|
588 private boolean deleteInput=false;
|
jpayne@68
|
589 private boolean useSharedHeader=false;
|
jpayne@68
|
590 private boolean forceCompressTemp=false;
|
jpayne@68
|
591 private boolean forceRawTemp=false;
|
jpayne@68
|
592 private boolean overwrite=true;
|
jpayne@68
|
593
|
jpayne@68
|
594 private boolean unpair=false;
|
jpayne@68
|
595 private boolean repair=false;
|
jpayne@68
|
596 private boolean namesort=false;
|
jpayne@68
|
597 private boolean V2=false;
|
jpayne@68
|
598 private boolean V3=true;
|
jpayne@68
|
599
|
jpayne@68
|
600 private String in1=null;
|
jpayne@68
|
601 private String in2=null;
|
jpayne@68
|
602 private String out1=null;
|
jpayne@68
|
603 private String out2=null;
|
jpayne@68
|
604
|
jpayne@68
|
605 ArrayList<String> args2=new ArrayList<String>();
|
jpayne@68
|
606 private PrintStream outstream=System.err;
|
jpayne@68
|
607
|
jpayne@68
|
608 public static boolean sharedErrorState=false;
|
jpayne@68
|
609
|
jpayne@68
|
610 }
|