annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/clump/Clumpify.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package clump;
jpayne@68 2
jpayne@68 3 import java.io.File;
jpayne@68 4 import java.io.PrintStream;
jpayne@68 5 import java.util.ArrayList;
jpayne@68 6 import java.util.Random;
jpayne@68 7
jpayne@68 8 import fileIO.FileFormat;
jpayne@68 9 import fileIO.ReadWrite;
jpayne@68 10 import jgi.BBMerge;
jpayne@68 11 import shared.Parse;
jpayne@68 12 import shared.Parser;
jpayne@68 13 import shared.PreParser;
jpayne@68 14 import shared.Shared;
jpayne@68 15 import shared.Timer;
jpayne@68 16 import shared.Tools;
jpayne@68 17 import sort.SortByName;
jpayne@68 18 import stream.FASTQ;
jpayne@68 19 import stream.Read;
jpayne@68 20 import structures.ByteBuilder;
jpayne@68 21 import structures.Quantizer;
jpayne@68 22
jpayne@68 23 /**
jpayne@68 24 * @author Brian Bushnell
jpayne@68 25 * @date Nov 6, 2015
jpayne@68 26 *
jpayne@68 27 */
jpayne@68 28 public class Clumpify {
jpayne@68 29
jpayne@68 30 /**
jpayne@68 31 * Code entrance from the command line.
jpayne@68 32 * @param args Command line arguments
jpayne@68 33 */
jpayne@68 34 public static void main(String[] args){
jpayne@68 35 Timer t=new Timer();
jpayne@68 36 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
jpayne@68 37
jpayne@68 38 //Capture values of static variables that might be modified in case this is called by another class.
jpayne@68 39 final boolean oldCQ=Read.CHANGE_QUALITY;
jpayne@68 40 final boolean oldBgzip=ReadWrite.USE_BGZIP, oldPreferBgzip=ReadWrite.PREFER_BGZIP;
jpayne@68 41
jpayne@68 42 BBMerge.changeQuality=Read.CHANGE_QUALITY=false;
jpayne@68 43 ReadWrite.USE_BGZIP=true;
jpayne@68 44 ReadWrite.PREFER_BGZIP=true;
jpayne@68 45
jpayne@68 46 Clumpify x=new Clumpify(args);
jpayne@68 47 x.process(t);
jpayne@68 48
jpayne@68 49 //Restore values of static variables.
jpayne@68 50 // Shared.setBuffers(oldCap);
jpayne@68 51 // ReadWrite.ZIPLEVEL=oldZl;
jpayne@68 52 // ReadWrite.USE_PIGZ=oldPigz;
jpayne@68 53 ReadWrite.USE_BGZIP=oldBgzip;
jpayne@68 54 ReadWrite.PREFER_BGZIP=oldPreferBgzip;
jpayne@68 55 // ReadWrite.USE_UNPIGZ=oldUnpigz;
jpayne@68 56 // ReadWrite.MAX_ZIP_THREADS=oldZipThreads;
jpayne@68 57 BBMerge.changeQuality=Read.CHANGE_QUALITY=oldCQ;
jpayne@68 58
jpayne@68 59 //Close the print stream if it was redirected
jpayne@68 60 Shared.closeStream(x.outstream);
jpayne@68 61 }
jpayne@68 62
jpayne@68 63 /**
jpayne@68 64 * Constructor.
jpayne@68 65 * @param args Command line arguments
jpayne@68 66 */
jpayne@68 67 public Clumpify(String[] args){
jpayne@68 68
jpayne@68 69 {//Preparse block for help, config files, and outstream
jpayne@68 70 PreParser pp=new PreParser(args, getClass(), true);
jpayne@68 71 args=pp.args;
jpayne@68 72 outstream=pp.outstream;
jpayne@68 73 }
jpayne@68 74
jpayne@68 75 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
jpayne@68 76
jpayne@68 77 args2=new ArrayList<String>();
jpayne@68 78 args2.add("in1");
jpayne@68 79 args2.add("in2");
jpayne@68 80 args2.add("out1");
jpayne@68 81 args2.add("out2");
jpayne@68 82 args2.add("groups");
jpayne@68 83 args2.add("ecco=f");
jpayne@68 84 args2.add("rename=f");
jpayne@68 85 args2.add("shortname=f");
jpayne@68 86 args2.add("unpair=f");
jpayne@68 87 args2.add("repair=f");
jpayne@68 88 args2.add("namesort=f");
jpayne@68 89 args2.add("overwrite=t");
jpayne@68 90
jpayne@68 91 String gString="auto";
jpayne@68 92 for(int i=0; i<args.length; i++){
jpayne@68 93 String arg=args[i];
jpayne@68 94 String[] split=arg.split("=");
jpayne@68 95 String a=split[0].toLowerCase();
jpayne@68 96 String b=split.length>1 ? split[1] : null;
jpayne@68 97
jpayne@68 98 if(a.equals("in") || a.equals("in1")){
jpayne@68 99 in1=b;
jpayne@68 100 }else if(a.equals("in2")){
jpayne@68 101 in2=b;
jpayne@68 102 }else if(a.equals("out") || a.equals("out1")){
jpayne@68 103 out1=b;
jpayne@68 104 }else if(a.equals("out2")){
jpayne@68 105 out2=b;
jpayne@68 106 }else if(a.equals("groups") || a.equals("g") || a.equals("sets") || a.equals("ways")){
jpayne@68 107 gString=b;
jpayne@68 108 }else if(a.equals("delete") || a.equals("deletetemp")){
jpayne@68 109 delete=Parse.parseBoolean(b);
jpayne@68 110 }else if(a.equals("deleteinput")){
jpayne@68 111 deleteInput=Parse.parseBoolean(b);
jpayne@68 112 }else if(a.equals("usetmpdir")){
jpayne@68 113 useTmpdir=Parse.parseBoolean(b);
jpayne@68 114 }else if(a.equals("ecco")){
jpayne@68 115 ecco=Parse.parseBoolean(b);
jpayne@68 116 }else if(a.equals("compresstemp") || a.equals("ct")){
jpayne@68 117 if(b!=null && b.equalsIgnoreCase("auto")){forceCompressTemp=forceRawTemp=false;}
jpayne@68 118 else{
jpayne@68 119 forceCompressTemp=Parse.parseBoolean(b);
jpayne@68 120 forceRawTemp=!forceCompressTemp;
jpayne@68 121 }
jpayne@68 122 }else if(a.equals("tmpdir")){
jpayne@68 123 Shared.setTmpdir(b);
jpayne@68 124 }else if(a.equals("rename") || a.equals("addname")){
jpayne@68 125 addName=Parse.parseBoolean(b);
jpayne@68 126 }else if(a.equals("shortname") || a.equals("shortnames")){
jpayne@68 127 shortName=b;
jpayne@68 128 }else if(a.equals("seed")){
jpayne@68 129 KmerComparator.defaultSeed=Long.parseLong(b);
jpayne@68 130 }else if(a.equals("hashes")){
jpayne@68 131 KmerComparator.setHashes(Integer.parseInt(b));
jpayne@68 132 }else if(a.equals("passes")){
jpayne@68 133 passes=Integer.parseInt(b);
jpayne@68 134 args2.add(arg);
jpayne@68 135 // }else if(a.equals("k")){
jpayne@68 136 // k=Integer.parseInt(b);
jpayne@68 137 // args2.add(arg);
jpayne@68 138 }else if(a.equals("border")){
jpayne@68 139 KmerComparator.defaultBorder=Integer.parseInt(b);
jpayne@68 140 }
jpayne@68 141
jpayne@68 142 else if(a.equals("unpair")){
jpayne@68 143 unpair=Parse.parseBoolean(b);
jpayne@68 144 }else if(a.equals("repair")){
jpayne@68 145 repair=Parse.parseBoolean(b);
jpayne@68 146 }else if(a.equals("namesort") || a.equals("sort")){
jpayne@68 147 namesort=Parse.parseBoolean(b);
jpayne@68 148 }else if(a.equals("overwrite")){
jpayne@68 149 overwrite=Parse.parseBoolean(b);
jpayne@68 150 }else if(a.equals("v1") || a.equals("kmersort1")){
jpayne@68 151 boolean x=Parse.parseBoolean(b);
jpayne@68 152 if(x){V2=V3=false;}
jpayne@68 153 }else if(a.equals("v2") || a.equals("kmersort2")){
jpayne@68 154 V2=Parse.parseBoolean(b);
jpayne@68 155 if(V2){V3=false;}
jpayne@68 156 }else if(a.equals("v3") || a.equals("kmersort3")){
jpayne@68 157 V3=Parse.parseBoolean(b);
jpayne@68 158 if(V3){V2=false;}
jpayne@68 159 }else if(a.equals("fetchthreads")){
jpayne@68 160 KmerSort3.fetchThreads=Integer.parseInt(b);
jpayne@68 161 assert(KmerSort3.fetchThreads>0) : KmerSort3.fetchThreads+"\nFetch threads must be at least 1.";
jpayne@68 162 }
jpayne@68 163
jpayne@68 164 else if(a.equals("comparesequence")){
jpayne@68 165 KmerComparator.compareSequence=Parse.parseBoolean(b);
jpayne@68 166 }else if(a.equals("allowadjacenttiles") || a.equals("spantiles")){
jpayne@68 167 ReadKey.spanTilesX=ReadKey.spanTilesY=Parse.parseBoolean(b);
jpayne@68 168 }else if(a.equals("spanx") || a.equals("spantilesx")){
jpayne@68 169 ReadKey.spanTilesX=Parse.parseBoolean(b);
jpayne@68 170 }else if(a.equals("spany") || a.equals("spantilesy")){
jpayne@68 171 ReadKey.spanTilesY=Parse.parseBoolean(b);
jpayne@68 172 }else if(a.equals("spanadjacent") || a.equals("spanadjacentonly") || a.equals("adjacentonly") || a.equals("adjacent")){
jpayne@68 173 ReadKey.spanAdjacentOnly=Parse.parseBoolean(b);
jpayne@68 174 }
jpayne@68 175
jpayne@68 176 // else if(a.equals("repair")){
jpayne@68 177 // repair=Parse.parseBoolean(b);
jpayne@68 178 // }else if(a.equals("namesort") || a.equals("sort")){
jpayne@68 179 // namesort=Parse.parseBoolean(b);
jpayne@68 180 // }
jpayne@68 181
jpayne@68 182 else if(a.equals("interleaved") || a.equals("int")){
jpayne@68 183 if("auto".equalsIgnoreCase(b)){FASTQ.FORCE_INTERLEAVED=!(FASTQ.TEST_INTERLEAVED=true);}
jpayne@68 184 else{
jpayne@68 185 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=Parse.parseBoolean(b);
jpayne@68 186 System.err.println("Set INTERLEAVED to "+FASTQ.FORCE_INTERLEAVED);
jpayne@68 187 }
jpayne@68 188 }else if(a.equals("cq") || a.equals("changequality")){
jpayne@68 189 BBMerge.changeQuality=Read.CHANGE_QUALITY=Parse.parseBoolean(b);
jpayne@68 190 }else if(a.equals("quantize") || a.equals("quantizesticky")){
jpayne@68 191 quantizeQuality=Quantizer.parse(arg, a, b);
jpayne@68 192 }else if(a.equals("lowcomplexity")){
jpayne@68 193 lowComplexity=Parse.parseBoolean(b);
jpayne@68 194 }
jpayne@68 195
jpayne@68 196 else if(Clump.parseStatic(arg, a, b)){
jpayne@68 197 //Do nothing
jpayne@68 198 }else if(Parser.parseQuality(arg, a, b)){
jpayne@68 199 //Do nothing
jpayne@68 200 }
jpayne@68 201
jpayne@68 202 else{
jpayne@68 203 args2.add(arg);
jpayne@68 204 }
jpayne@68 205 }
jpayne@68 206
jpayne@68 207 Clump.setXY();
jpayne@68 208
jpayne@68 209 KmerSplit.quantizeQuality=KmerSort1.quantizeQuality=quantizeQuality;
jpayne@68 210
jpayne@68 211 Parser.processQuality();
jpayne@68 212
jpayne@68 213 assert(!unpair || !KmerComparator.mergeFirst) : "Unpair and mergefirst may not be used together.";
jpayne@68 214
jpayne@68 215 if(in1==null){throw new RuntimeException("\nOne input file is required.\n");}
jpayne@68 216
jpayne@68 217 if(in1!=null && in2==null && in1.indexOf('#')>-1 && !new File(in1).exists()){
jpayne@68 218 in2=in1.replace("#", "2");
jpayne@68 219 in1=in1.replace("#", "1");
jpayne@68 220 }
jpayne@68 221 if(out1!=null && out2==null && out1.indexOf('#')>-1){
jpayne@68 222 out2=out1.replace("#", "2");
jpayne@68 223 out1=out1.replace("#", "1");
jpayne@68 224 }
jpayne@68 225
jpayne@68 226 //Ensure input files can be read
jpayne@68 227 if(!Tools.testInputFiles(false, true, in1)){
jpayne@68 228 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 229 }
jpayne@68 230
jpayne@68 231 // assert(false) : ReadKey.spanTiles()+", "+ReadKey.spanTilesX+", "+ReadKey.spanTilesY+", "+Clump.sortX+", "+Clump.sortY;
jpayne@68 232
jpayne@68 233 autoSetGroups(gString);
jpayne@68 234
jpayne@68 235 if((in2!=null || out2!=null) && groups>1){FASTQ.FORCE_INTERLEAVED=true;} //Fix for crash with twin fasta files
jpayne@68 236 }
jpayne@68 237
jpayne@68 238
jpayne@68 239 /*--------------------------------------------------------------*/
jpayne@68 240 /*---------------- Outer Methods ----------------*/
jpayne@68 241 /*--------------------------------------------------------------*/
jpayne@68 242
jpayne@68 243 /** Create read streams and process all data */
jpayne@68 244 public void process(Timer t){
jpayne@68 245 String[] args=args2.toArray(new String[0]);
jpayne@68 246 args[4]="groups="+groups;
jpayne@68 247
jpayne@68 248 useSharedHeader=(FileFormat.hasSamOrBamExtension(in1) && out1!=null
jpayne@68 249 && FileFormat.hasSamOrBamExtension(out1));
jpayne@68 250
jpayne@68 251 if(groups==1){
jpayne@68 252 args[0]="in1="+in1;
jpayne@68 253 args[1]="in2="+in2;
jpayne@68 254 args[2]="out1="+out1;
jpayne@68 255 args[3]="out2="+out2;
jpayne@68 256 args[5]="ecco="+ecco;
jpayne@68 257 args[6]="rename="+addName;
jpayne@68 258 args[7]="shortname="+shortName;
jpayne@68 259 args[8]="unpair="+unpair;
jpayne@68 260 args[9]="repair="+repair;
jpayne@68 261 args[10]="namesort="+namesort;
jpayne@68 262 args[11]="ow="+overwrite;
jpayne@68 263 KmerSort1.main(args);
jpayne@68 264 }else{
jpayne@68 265 String pin1=in1, pin2=in2, temp;
jpayne@68 266 final int conservativePasses=Clump.conservativeFlag ? passes : Tools.max(1, passes/2);
jpayne@68 267 if(passes>1){Clump.setConservative(true);}
jpayne@68 268 long fileMem=-1;
jpayne@68 269 for(int pass=1; pass<=passes; pass++){
jpayne@68 270 if(/*passes>1 &&*/ (V2 || V3)){
jpayne@68 271 // System.err.println("Running pass with fileMem="+fileMem);
jpayne@68 272 // out=(pass==passes ? out1 : getTempFname("clumpify_p"+(pass+1)+"_temp%_"));
jpayne@68 273 temp=getTempFname("clumpify_p"+(pass+1)+"_temp%_");
jpayne@68 274 if(pass==passes){
jpayne@68 275 fileMem=runOnePass_v2(args, pass, pin1, pin2, out1, out2, fileMem);
jpayne@68 276 }else{
jpayne@68 277 fileMem=runOnePass_v2(args, pass, pin1, pin2, temp, null, fileMem);
jpayne@68 278 }
jpayne@68 279 // System.err.println("New fileMem="+fileMem);
jpayne@68 280 }else{
jpayne@68 281 // out=(pass==passes ? out1 : getTempFname("clumpify_temp_pass"+pass+"_"));
jpayne@68 282 temp=getTempFname("clumpify_temp_pass"+pass+"_");
jpayne@68 283 if(pass==passes){
jpayne@68 284 runOnePass(args, pass, pin1, pin2, out1, out2);
jpayne@68 285 }else{
jpayne@68 286 runOnePass(args, pass, pin1, pin2, temp, null);
jpayne@68 287 }
jpayne@68 288 }
jpayne@68 289 pin1=temp;
jpayne@68 290 pin2=null;
jpayne@68 291 KmerComparator.defaultBorder=Tools.max(0, KmerComparator.defaultBorder-1);
jpayne@68 292 KmerComparator.defaultSeed++;
jpayne@68 293 if(pass>=conservativePasses){Clump.setConservative(false);}
jpayne@68 294 }
jpayne@68 295 }
jpayne@68 296
jpayne@68 297 if(deleteInput && !sharedErrorState && out1!=null && in1!=null){
jpayne@68 298 try {
jpayne@68 299 new File(in1).delete();
jpayne@68 300 if(in2!=null){new File(in2).delete();}
jpayne@68 301 } catch (Exception e) {
jpayne@68 302 System.err.println("WARNING: Failed to delete input files.");
jpayne@68 303 }
jpayne@68 304 }
jpayne@68 305
jpayne@68 306 t.stop();
jpayne@68 307 System.err.println("Total time: \t"+t);
jpayne@68 308
jpayne@68 309 }
jpayne@68 310
jpayne@68 311 private void runOnePass(String[] args, int pass, String in1, String in2, String out1, String out2){
jpayne@68 312 assert(groups>1);
jpayne@68 313 if(pass>1){
jpayne@68 314 ecco=false;
jpayne@68 315 shortName="f";
jpayne@68 316 addName=false;
jpayne@68 317 }
jpayne@68 318
jpayne@68 319 String temp=getTempFname("clumpify_p"+pass+"_temp%_");
jpayne@68 320
jpayne@68 321 String temp2=temp.replace("%", "FINAL");
jpayne@68 322 final boolean externalSort=(pass==passes && (repair || namesort));
jpayne@68 323
jpayne@68 324 args[0]="in1="+in1;
jpayne@68 325 args[1]="in2="+in2;
jpayne@68 326 args[2]="out="+temp;
jpayne@68 327 args[3]="out2="+null;
jpayne@68 328 args[5]="ecco="+ecco;
jpayne@68 329 args[6]="addname=f";
jpayne@68 330 args[7]="shortname="+shortName;
jpayne@68 331 args[8]="unpair="+unpair;
jpayne@68 332 args[9]="repair=f";
jpayne@68 333 args[10]="namesort=f";
jpayne@68 334 args[11]="ow="+overwrite;
jpayne@68 335 KmerSplit.maxZipLevel=2;
jpayne@68 336 KmerSplit.main(args);
jpayne@68 337
jpayne@68 338 FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=false;
jpayne@68 339 FASTQ.ASCII_OFFSET=FASTQ.ASCII_OFFSET_OUT;
jpayne@68 340
jpayne@68 341 args[0]="in="+temp;
jpayne@68 342 args[1]="in2="+null;
jpayne@68 343 args[2]="out="+(externalSort ? temp2 : out1);
jpayne@68 344 args[3]="out2="+(externalSort ? "null" : out2);
jpayne@68 345 args[5]="ecco=f";
jpayne@68 346 args[6]="addname="+addName;
jpayne@68 347 args[7]="shortname=f";
jpayne@68 348 args[8]="unpair=f";
jpayne@68 349 args[9]="repair="+(repair && externalSort);
jpayne@68 350 args[10]="namesort="+(namesort && externalSort);
jpayne@68 351 args[11]="ow="+overwrite;
jpayne@68 352 if(unpair){
jpayne@68 353 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
jpayne@68 354 }
jpayne@68 355 KmerSort1.main(args);
jpayne@68 356
jpayne@68 357 if(delete){
jpayne@68 358 for(int i=0; i<groups; i++){
jpayne@68 359 new File(temp.replaceFirst("%", ""+i)).delete();
jpayne@68 360 }
jpayne@68 361 if(pass>1){
jpayne@68 362 assert(in2==null);
jpayne@68 363 new File(in1).delete();
jpayne@68 364 }
jpayne@68 365 }
jpayne@68 366
jpayne@68 367 if(externalSort){
jpayne@68 368 outstream.println();
jpayne@68 369 String[] sortArgs=new String[] {"in="+temp2, "out="+out1, "ow="+overwrite};
jpayne@68 370 if(out2!=null){sortArgs=new String[] {"in="+temp2, "out="+out1, "out2="+out2, "ow="+overwrite};}
jpayne@68 371 SortByName.main(sortArgs);
jpayne@68 372 if(delete){new File(temp2).delete();}
jpayne@68 373 }
jpayne@68 374 }
jpayne@68 375
jpayne@68 376 private long runOnePass_v2(String[] args, int pass, String in1, String in2, String out1, String out2, long fileMem){
jpayne@68 377 assert(groups>1);
jpayne@68 378 if(pass>1){
jpayne@68 379 ecco=false;
jpayne@68 380 shortName="f";
jpayne@68 381 addName=false;
jpayne@68 382 }
jpayne@68 383
jpayne@68 384 String temp=getTempFname("clumpify_p"+pass+"_temp%_");
jpayne@68 385
jpayne@68 386 // String temp2=temp.replace("%", "FINAL");
jpayne@68 387 String namesorted=temp.replace("%", "namesorted_%");
jpayne@68 388 final boolean externalSort=(pass==passes && (repair || namesort));
jpayne@68 389
jpayne@68 390 if(pass==1){
jpayne@68 391 args[0]="in1="+in1;
jpayne@68 392 args[1]="in2="+in2;
jpayne@68 393 args[2]="out="+temp;
jpayne@68 394 args[3]="out2="+null;
jpayne@68 395 args[5]="ecco="+ecco;
jpayne@68 396 args[6]="addname=f";
jpayne@68 397 args[7]="shortname="+shortName;
jpayne@68 398 args[8]="unpair="+unpair;
jpayne@68 399 args[9]="repair=f";
jpayne@68 400 args[10]="namesort=f";
jpayne@68 401 args[11]="ow="+overwrite;
jpayne@68 402 KmerSplit.maxZipLevel=2;
jpayne@68 403 KmerSplit.main(args);
jpayne@68 404 fileMem=KmerSplit.lastMemProcessed;
jpayne@68 405
jpayne@68 406 FASTQ.DETECT_QUALITY=FASTQ.DETECT_QUALITY_OUT=false;
jpayne@68 407 FASTQ.ASCII_OFFSET=FASTQ.ASCII_OFFSET_OUT;
jpayne@68 408 }
jpayne@68 409
jpayne@68 410 args[0]="in1="+(pass==1 ? temp : in1);
jpayne@68 411 args[1]="in2="+null;
jpayne@68 412 args[2]="out="+(externalSort ? namesorted : out1);
jpayne@68 413 args[3]="out2="+(externalSort ? "null" : out2);
jpayne@68 414 args[5]="ecco=f";
jpayne@68 415 args[6]="addname="+addName;
jpayne@68 416 args[7]="shortname=f";
jpayne@68 417 args[8]="unpair=f";
jpayne@68 418 args[9]="repair="+(repair && externalSort);
jpayne@68 419 args[10]="namesort="+(namesort && externalSort);
jpayne@68 420 args[11]="ow="+overwrite;
jpayne@68 421 if(unpair){
jpayne@68 422 FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false;
jpayne@68 423 }
jpayne@68 424 if(externalSort){
jpayne@68 425 KmerSort.doHashAndSplit=false;
jpayne@68 426 }
jpayne@68 427 if(V3){
jpayne@68 428 KmerSort3.main(fileMem, pass, passes, args);
jpayne@68 429 if(fileMem<1){fileMem=KmerSort3.lastMemProcessed;}
jpayne@68 430 }else{KmerSort2.main(args);}
jpayne@68 431
jpayne@68 432 if(delete){
jpayne@68 433 for(int i=0; i<groups; i++){
jpayne@68 434 new File((pass==1 ? temp : in1).replaceFirst("%", ""+i)).delete();
jpayne@68 435 }
jpayne@68 436 }
jpayne@68 437
jpayne@68 438 if(externalSort){
jpayne@68 439 outstream.println();
jpayne@68 440
jpayne@68 441 ArrayList<String> names=new ArrayList<String>();
jpayne@68 442 for(int i=0; i<groups; i++){
jpayne@68 443 names.add(namesorted.replaceFirst("%", ""+i));
jpayne@68 444 }
jpayne@68 445 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 446
jpayne@68 447 ReadWrite.USE_PIGZ=true;
jpayne@68 448 ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
jpayne@68 449 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
jpayne@68 450 FileFormat dest=FileFormat.testOutput(out1, FileFormat.FASTQ, null, true, overwrite, false, false);
jpayne@68 451 FileFormat dest2=FileFormat.testOutput(out2, FileFormat.FASTQ, null, true, overwrite, false, false);
jpayne@68 452 SortByName.mergeAndDump(names, /*null, */dest, dest2, delete, useSharedHeader, false, outstream, 1000);
jpayne@68 453 }
jpayne@68 454
jpayne@68 455 // if(externalSort){
jpayne@68 456 // outstream.println();
jpayne@68 457 // SortByName.main(new String[] {"in="+temp2, "out="+out, "ow="+overwrite});
jpayne@68 458 // if(delete){new File(temp2).delete();}
jpayne@68 459 // }
jpayne@68 460 return fileMem;
jpayne@68 461 }
jpayne@68 462
jpayne@68 463 /*--------------------------------------------------------------*/
jpayne@68 464 /*---------------- Inner Methods ----------------*/
jpayne@68 465 /*--------------------------------------------------------------*/
jpayne@68 466
jpayne@68 467 private void autoSetGroups(String s) {
jpayne@68 468 if(s==null || s.equalsIgnoreCase("null")){return;}
jpayne@68 469 if(Tools.isDigit(s.charAt(0))){
jpayne@68 470 groups=Integer.parseInt(s);
jpayne@68 471 return;
jpayne@68 472 }
jpayne@68 473 assert(s.equalsIgnoreCase("auto")) : "Unknown groups setting: "+s;
jpayne@68 474
jpayne@68 475 final long maxMem=Shared.memAvailable(1);
jpayne@68 476 FileFormat ff1=FileFormat.testInput(in1, FileFormat.FASTQ, null, false, false);
jpayne@68 477 if(ff1==null || ff1.stdio()){return;}
jpayne@68 478
jpayne@68 479 // outstream.println("in1="+in1+", overhead="+(0.5*(ReadKey.overhead+Clump.overhead)));
jpayne@68 480
jpayne@68 481 double[] estimates=Tools.estimateFileMemory(in1, 1000, 0.5*(ReadKey.overhead+Clump.overhead), true, lowComplexity);
jpayne@68 482 if(in2!=null){
jpayne@68 483 double[] estimates2=Tools.estimateFileMemory(in2, 1000, 0.5*(ReadKey.overhead+Clump.overhead), true, lowComplexity);
jpayne@68 484 estimates[0]+=estimates2[0];
jpayne@68 485 estimates[1]+=estimates2[1];
jpayne@68 486 estimates[4]+=estimates2[4];
jpayne@68 487 }
jpayne@68 488
jpayne@68 489 // outstream.println(Arrays.toString(estimates));
jpayne@68 490
jpayne@68 491 double memEstimate=estimates==null ? 0 : estimates[0];
jpayne@68 492 double diskEstimate=estimates==null ? 0 : estimates[1];
jpayne@68 493 double readEstimate=estimates==null ? 0 : estimates[4];
jpayne@68 494 double worstCase=memEstimate*1.5;
jpayne@68 495
jpayne@68 496 // outstream.println("Raw Disk Size Estimate: "+(long)(diskEstimate/(1024*1024))+" MB");
jpayne@68 497 outstream.println("Read Estimate: "+(long)(readEstimate));
jpayne@68 498 outstream.println("Memory Estimate: "+(long)(memEstimate/(1024*1024))+" MB");
jpayne@68 499 outstream.println("Memory Available: "+(maxMem/(1024*1024))+" MB");
jpayne@68 500
jpayne@68 501 if(maxMem>worstCase && readEstimate<Integer.MAX_VALUE){
jpayne@68 502 groups=1;
jpayne@68 503 }else{
jpayne@68 504 groups=Tools.max(11, (int)(3+(3*worstCase/maxMem)*(V3 ? KmerSort3.fetchThreads : 2)), (int)((2*readEstimate)/Integer.MAX_VALUE))|1;
jpayne@68 505 }
jpayne@68 506 outstream.println("Set groups to "+groups);
jpayne@68 507 }
jpayne@68 508
jpayne@68 509 private String getTempFname(String core){
jpayne@68 510 // outstream.println(core);
jpayne@68 511 String temp;
jpayne@68 512 String path="", extension=".fq";
jpayne@68 513 if(out1!=null){
jpayne@68 514 core=ReadWrite.stripToCore(out1)+"_"+core;
jpayne@68 515 path=ReadWrite.getPath(out1);
jpayne@68 516 extension=ReadWrite.getExtension(out1);
jpayne@68 517 }
jpayne@68 518
jpayne@68 519 if(useTmpdir && Shared.tmpdir()!=null){
jpayne@68 520 temp=Shared.tmpdir()+core+Long.toHexString((randy.nextLong()&Long.MAX_VALUE))+extension;
jpayne@68 521 }else{
jpayne@68 522 temp=path+core+Long.toHexString((randy.nextLong()&Long.MAX_VALUE))+extension;
jpayne@68 523 }
jpayne@68 524 // assert(false) : path+", "+temp+", "+core+", "+out1;
jpayne@68 525
jpayne@68 526 String comp=ReadWrite.compressionType(temp);
jpayne@68 527 if(comp!=null){comp=".gz";} //Prevent bz2 temp files which cause a crash
jpayne@68 528
jpayne@68 529 if(forceCompressTemp && comp==null){
jpayne@68 530 temp+=".gz";
jpayne@68 531 }else if(comp!=null && forceRawTemp){
jpayne@68 532 temp=temp.substring(0, temp.lastIndexOf('.'));
jpayne@68 533 }
jpayne@68 534 if(temp.endsWith(".bz2")){temp=temp.substring(0, temp.length()-4);} //Prevent bz2 temp files which cause a crash
jpayne@68 535
jpayne@68 536 // outstream.println(temp);
jpayne@68 537 return temp;
jpayne@68 538 }
jpayne@68 539
jpayne@68 540 public static void shrinkName(Read r) {
jpayne@68 541 if(r==null){return;}
jpayne@68 542 String s=r.id;
jpayne@68 543 if(s.contains("HISEQ")){s=s.replace("HISEQ", "H");}
jpayne@68 544 if(s.contains("MISEQ")){
jpayne@68 545 s=s.replace("MISEQ", "M");
jpayne@68 546 }
jpayne@68 547 if(s.contains(":000000000-")){
jpayne@68 548 s=s.replace(":000000000-", ":");
jpayne@68 549 }
jpayne@68 550 r.id=s;
jpayne@68 551 }
jpayne@68 552
jpayne@68 553 public static void shortName(Read r) {
jpayne@68 554 ByteBuilder sb=new ByteBuilder(14);
jpayne@68 555 long x=r.numericID|1;
jpayne@68 556
jpayne@68 557 while(x<1000000000L){
jpayne@68 558 x*=10;
jpayne@68 559 sb.append('0');
jpayne@68 560 }
jpayne@68 561 sb.append(r.numericID);
jpayne@68 562
jpayne@68 563 // while(x<0x10000000L){
jpayne@68 564 // x*=16;
jpayne@68 565 // sb.append('0');
jpayne@68 566 // }
jpayne@68 567 // sb.append(Long.toHexString(r.numericID));
jpayne@68 568
jpayne@68 569 sb.append(r.pairnum()==0 ? " 1:" : " 2:");
jpayne@68 570 r.id=sb.toString();
jpayne@68 571 }
jpayne@68 572
jpayne@68 573 /*--------------------------------------------------------------*/
jpayne@68 574 /*---------------- Fields ----------------*/
jpayne@68 575 /*--------------------------------------------------------------*/
jpayne@68 576
jpayne@68 577 private boolean lowComplexity=false;
jpayne@68 578
jpayne@68 579 private boolean quantizeQuality=false;
jpayne@68 580 private Random randy=new Random();
jpayne@68 581 private int groups=31;
jpayne@68 582 private int passes=1;
jpayne@68 583 private boolean ecco=false;
jpayne@68 584 private boolean addName=false;
jpayne@68 585 private String shortName="f";
jpayne@68 586 private boolean useTmpdir=false;
jpayne@68 587 private boolean delete=true;
jpayne@68 588 private boolean deleteInput=false;
jpayne@68 589 private boolean useSharedHeader=false;
jpayne@68 590 private boolean forceCompressTemp=false;
jpayne@68 591 private boolean forceRawTemp=false;
jpayne@68 592 private boolean overwrite=true;
jpayne@68 593
jpayne@68 594 private boolean unpair=false;
jpayne@68 595 private boolean repair=false;
jpayne@68 596 private boolean namesort=false;
jpayne@68 597 private boolean V2=false;
jpayne@68 598 private boolean V3=true;
jpayne@68 599
jpayne@68 600 private String in1=null;
jpayne@68 601 private String in2=null;
jpayne@68 602 private String out1=null;
jpayne@68 603 private String out2=null;
jpayne@68 604
jpayne@68 605 ArrayList<String> args2=new ArrayList<String>();
jpayne@68 606 private PrintStream outstream=System.err;
jpayne@68 607
jpayne@68 608 public static boolean sharedErrorState=false;
jpayne@68 609
jpayne@68 610 }