annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/AddSSU.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package sketch;
jpayne@68 2
jpayne@68 3 import java.io.PrintStream;
jpayne@68 4 import java.util.ArrayList;
jpayne@68 5 import java.util.Arrays;
jpayne@68 6
jpayne@68 7 import fileIO.ByteFile;
jpayne@68 8 import fileIO.ByteStreamWriter;
jpayne@68 9 import fileIO.FileFormat;
jpayne@68 10 import fileIO.ReadWrite;
jpayne@68 11 import shared.Parse;
jpayne@68 12 import shared.Parser;
jpayne@68 13 import shared.PreParser;
jpayne@68 14 import shared.Shared;
jpayne@68 15 import shared.Timer;
jpayne@68 16 import shared.Tools;
jpayne@68 17 import structures.ByteBuilder;
jpayne@68 18 import tax.TaxTree;
jpayne@68 19
jpayne@68 20 /**
jpayne@68 21 * @author Brian Bushnell
jpayne@68 22 * @date May 9, 2016
jpayne@68 23 *
jpayne@68 24 */
jpayne@68 25 public class AddSSU {
jpayne@68 26
jpayne@68 27 /*--------------------------------------------------------------*/
jpayne@68 28 /*---------------- Initialization ----------------*/
jpayne@68 29 /*--------------------------------------------------------------*/
jpayne@68 30
jpayne@68 31 /**
jpayne@68 32 * Code entrance from the command line.
jpayne@68 33 * @param args Command line arguments
jpayne@68 34 */
jpayne@68 35 public static void main(String[] args){
jpayne@68 36 //Start a timer immediately upon code entrance.
jpayne@68 37 Timer t=new Timer();
jpayne@68 38
jpayne@68 39 //Create an instance of this class
jpayne@68 40 AddSSU x=new AddSSU(args);
jpayne@68 41
jpayne@68 42 //Run the object
jpayne@68 43 x.process(t);
jpayne@68 44
jpayne@68 45 //Close the print stream if it was redirected
jpayne@68 46 Shared.closeStream(x.outstream);
jpayne@68 47 }
jpayne@68 48
jpayne@68 49 /**
jpayne@68 50 * Constructor.
jpayne@68 51 * @param args Command line arguments
jpayne@68 52 */
jpayne@68 53 public AddSSU(String[] args){
jpayne@68 54
jpayne@68 55 {//Preparse block for help, config files, and outstream
jpayne@68 56 PreParser pp=new PreParser(args, /*getClass()*/null, false);
jpayne@68 57 args=pp.args;
jpayne@68 58 outstream=pp.outstream;
jpayne@68 59 }
jpayne@68 60
jpayne@68 61 //Set shared static variables prior to parsing
jpayne@68 62 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 63 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 64
jpayne@68 65 {//Parse the arguments
jpayne@68 66 final Parser parser=parse(args);
jpayne@68 67 overwrite=parser.overwrite;
jpayne@68 68 append=parser.append;
jpayne@68 69
jpayne@68 70 in1=parser.in1;
jpayne@68 71
jpayne@68 72 out1=parser.out1;
jpayne@68 73 }
jpayne@68 74
jpayne@68 75 fixExtensions(); //Add or remove .gz or .bz2 as needed
jpayne@68 76 checkFileExistence(); //Ensure files can be read and written
jpayne@68 77 checkStatics(); //Adjust file-related static fields as needed for this program
jpayne@68 78
jpayne@68 79 ffout1=FileFormat.testOutput(out1, FileFormat.SKETCH, null, true, overwrite, append, false);
jpayne@68 80 ffin1=FileFormat.testInput(in1, FileFormat.SKETCH, null, true, false);
jpayne@68 81
jpayne@68 82 if(verbose){
jpayne@68 83 System.err.println("Set r16SFile="+r16SFile);
jpayne@68 84 System.err.println("Set r18SFile="+r18SFile);
jpayne@68 85 }
jpayne@68 86
jpayne@68 87 tree=(treeFile!=null && (preferSSUMapEuks || preferSSUMapProks || clear16SEuks || clear18SEuks ||
jpayne@68 88 clear16SProks || clear18SProks || useSSUMapOnlyEuks || useSSUMapOnlyProks) ? TaxTree.loadTaxTree(treeFile, outstream, false, false) : null);
jpayne@68 89
jpayne@68 90 if(preferSSUMapEuks || preferSSUMapProks || clear16SEuks || clear18SEuks || clear16SProks || clear18SProks || useSSUMapOnlyEuks || useSSUMapOnlyProks){
jpayne@68 91 assert(tree!=null) : "preferSSUMapForEuks, clear16SEuks, and clear18SEuks require a TaxTree.";
jpayne@68 92 }
jpayne@68 93 }
jpayne@68 94
jpayne@68 95 /*--------------------------------------------------------------*/
jpayne@68 96 /*---------------- Initialization Helpers ----------------*/
jpayne@68 97 /*--------------------------------------------------------------*/
jpayne@68 98
jpayne@68 99 /** Parse arguments from the command line */
jpayne@68 100 private Parser parse(String[] args){
jpayne@68 101
jpayne@68 102 Parser parser=new Parser();
jpayne@68 103 for(int i=0; i<args.length; i++){
jpayne@68 104 String arg=args[i];
jpayne@68 105 String[] split=arg.split("=");
jpayne@68 106 String a=split[0].toLowerCase();
jpayne@68 107 String b=split.length>1 ? split[1] : null;
jpayne@68 108 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
jpayne@68 109
jpayne@68 110 if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){
jpayne@68 111 r16SFile=b;
jpayne@68 112 }else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){
jpayne@68 113 r18SFile=b;
jpayne@68 114 }else if(a.equalsIgnoreCase("tree") || a.equalsIgnoreCase("treefile")){
jpayne@68 115 treeFile=b;
jpayne@68 116 }else if(a.equals("lines")){
jpayne@68 117 maxLines=Long.parseLong(b);
jpayne@68 118 if(maxLines<0){maxLines=Long.MAX_VALUE;}
jpayne@68 119 }else if(a.equals("verbose")){
jpayne@68 120 verbose=Parse.parseBoolean(b);
jpayne@68 121 // ByteFile1.verbose=verbose;
jpayne@68 122 // ByteFile2.verbose=verbose;
jpayne@68 123 // ReadWrite.verbose=verbose;
jpayne@68 124 }
jpayne@68 125
jpayne@68 126 else if(a.equalsIgnoreCase("preferSSUMap")){
jpayne@68 127 preferSSUMap=Parse.parseBoolean(b);
jpayne@68 128 }else if(a.equalsIgnoreCase("preferSSUMapForEuks") || a.equalsIgnoreCase("preferSSUMapEuks")){
jpayne@68 129 preferSSUMapEuks=Parse.parseBoolean(b);
jpayne@68 130 }else if(a.equalsIgnoreCase("useSSUMapOnly")){
jpayne@68 131 useSSUMapOnly=Parse.parseBoolean(b);
jpayne@68 132 }else if(a.equalsIgnoreCase("useSSUMapOnlyEuks") || a.equalsIgnoreCase("SSUMapOnlyEuks")){
jpayne@68 133 useSSUMapOnlyEuks=Parse.parseBoolean(b);
jpayne@68 134 }else if(a.equalsIgnoreCase("useSSUMapOnlyProks") || a.equalsIgnoreCase("SSUMapOnlyProks")){
jpayne@68 135 useSSUMapOnlyProks=Parse.parseBoolean(b);
jpayne@68 136 }else if(a.equalsIgnoreCase("preferSSUMapForProks") || a.equalsIgnoreCase("preferSSUMapProks")){
jpayne@68 137 preferSSUMapProks=Parse.parseBoolean(b);
jpayne@68 138 }
jpayne@68 139
jpayne@68 140 else if(a.equalsIgnoreCase("clearAll")){
jpayne@68 141 clear16S=clear18S=Parse.parseBoolean(b);
jpayne@68 142 }else if(a.equalsIgnoreCase("clear16S")){
jpayne@68 143 clear16S=Parse.parseBoolean(b);
jpayne@68 144 }else if(a.equalsIgnoreCase("clear18S")){
jpayne@68 145 clear18S=Parse.parseBoolean(b);
jpayne@68 146 }else if(a.equalsIgnoreCase("clear16SEuks")){
jpayne@68 147 clear16SEuks=Parse.parseBoolean(b);
jpayne@68 148 }else if(a.equalsIgnoreCase("clear18SEuks")){
jpayne@68 149 clear18SEuks=Parse.parseBoolean(b);
jpayne@68 150 }else if(a.equalsIgnoreCase("clear16SProks")){
jpayne@68 151 clear16SProks=Parse.parseBoolean(b);
jpayne@68 152 }else if(a.equalsIgnoreCase("clear18SProks")){
jpayne@68 153 clear18SProks=Parse.parseBoolean(b);
jpayne@68 154 }
jpayne@68 155
jpayne@68 156 else if(parser.parse(arg, a, b)){
jpayne@68 157 //do nothing
jpayne@68 158 }else{
jpayne@68 159 outstream.println("Unknown parameter "+args[i]);
jpayne@68 160 assert(false) : "Unknown parameter "+args[i];
jpayne@68 161 // throw new RuntimeException("Unknown parameter "+args[i]);
jpayne@68 162 }
jpayne@68 163 }
jpayne@68 164 if("auto".equalsIgnoreCase(r16SFile)){r16SFile=TaxTree.default16SFile();}
jpayne@68 165 if("auto".equalsIgnoreCase(r18SFile)){r18SFile=TaxTree.default18SFile();}
jpayne@68 166 SSUMap.r16SFile=r16SFile;
jpayne@68 167 SSUMap.r18SFile=r18SFile;
jpayne@68 168
jpayne@68 169 return parser;
jpayne@68 170 }
jpayne@68 171
jpayne@68 172 /** Add or remove .gz or .bz2 as needed */
jpayne@68 173 private void fixExtensions(){
jpayne@68 174 in1=Tools.fixExtension(in1);
jpayne@68 175 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 176 }
jpayne@68 177
jpayne@68 178 /** Ensure files can be read and written */
jpayne@68 179 private void checkFileExistence(){
jpayne@68 180 //Ensure output files can be written
jpayne@68 181 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
jpayne@68 182 outstream.println((out1==null)+", "+out1);
jpayne@68 183 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
jpayne@68 184 }
jpayne@68 185
jpayne@68 186 //Ensure input files can be read
jpayne@68 187 if(!Tools.testInputFiles(false, true, in1, r16SFile, r18SFile)){
jpayne@68 188 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 189 }
jpayne@68 190 assert(in1!=null) : "Input sketch file is required";
jpayne@68 191 assert(r16SFile!=null || r18SFile!=null) : "Input SSU file is required";
jpayne@68 192
jpayne@68 193 //Ensure that no file was specified multiple times
jpayne@68 194 if(!Tools.testForDuplicateFiles(true, in1, out1, r16SFile, r18SFile)){
jpayne@68 195 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 196 }
jpayne@68 197 }
jpayne@68 198
jpayne@68 199 /** Adjust file-related static fields as needed for this program */
jpayne@68 200 private static void checkStatics(){
jpayne@68 201 //Adjust the number of threads for input file reading
jpayne@68 202 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 203 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 204 }
jpayne@68 205
jpayne@68 206 // if(!ByteFile.FORCE_MODE_BF2){
jpayne@68 207 // ByteFile.FORCE_MODE_BF2=false;
jpayne@68 208 // ByteFile.FORCE_MODE_BF1=true;
jpayne@68 209 // }
jpayne@68 210 }
jpayne@68 211
jpayne@68 212 /*--------------------------------------------------------------*/
jpayne@68 213 /*---------------- Outer Methods ----------------*/
jpayne@68 214 /*--------------------------------------------------------------*/
jpayne@68 215
jpayne@68 216 void process(Timer t){
jpayne@68 217
jpayne@68 218 ByteFile bf=ByteFile.makeByteFile(ffin1);
jpayne@68 219 ByteStreamWriter bsw=makeBSW(ffout1);
jpayne@68 220
jpayne@68 221 processInner(bf, bsw);
jpayne@68 222
jpayne@68 223 errorState|=bf.close();
jpayne@68 224 if(bsw!=null){errorState|=bsw.poisonAndWait();}
jpayne@68 225
jpayne@68 226 t.stop();
jpayne@68 227
jpayne@68 228 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
jpayne@68 229 outstream.println(Tools.linesBytesOut(linesProcessed, bytesProcessed, linesOut, bytesOut, 8, true));
jpayne@68 230
jpayne@68 231 outstream.println();
jpayne@68 232 outstream.println(Tools.number("Sketches:", sketchCount, 8));
jpayne@68 233 outstream.println(Tools.number("16S In:", r16Sin, 8));
jpayne@68 234 outstream.println(Tools.number("18S In:", r18Sin, 8));
jpayne@68 235 outstream.println(Tools.number("16S Added:", r16SfromMap, 8));
jpayne@68 236 outstream.println(Tools.number("18S Added:", r18SfromMap, 8));
jpayne@68 237 outstream.println(Tools.numberPercent("16S Out:", r16Sout, r16Sout*100.0/sketchCount, 2, 8));
jpayne@68 238 outstream.println(Tools.numberPercent("18S Out:", r18Sout, r18Sout*100.0/sketchCount, 2, 8));
jpayne@68 239
jpayne@68 240 if(errorState){
jpayne@68 241 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 242 }
jpayne@68 243 }
jpayne@68 244
jpayne@68 245 /*--------------------------------------------------------------*/
jpayne@68 246 /*---------------- Inner Methods ----------------*/
jpayne@68 247 /*--------------------------------------------------------------*/
jpayne@68 248
jpayne@68 249 private static ByteStreamWriter makeBSW(FileFormat ff){
jpayne@68 250 if(ff==null){return null;}
jpayne@68 251 ByteStreamWriter bsw=new ByteStreamWriter(ff);
jpayne@68 252 bsw.start();
jpayne@68 253 return bsw;
jpayne@68 254 }
jpayne@68 255
jpayne@68 256 // private void processInner_old(ByteFile bf, ByteStreamWriter bsw){
jpayne@68 257 // SSUMap.load(outstream);
jpayne@68 258 //
jpayne@68 259 // if(verbose){
jpayne@68 260 // System.err.println("Loaded SSUMap; |16S|="+SSUMap.r16SCount()+", |18S|="+SSUMap.r18SCount());
jpayne@68 261 // }
jpayne@68 262 //
jpayne@68 263 // byte[] line=bf.nextLine();
jpayne@68 264 //// ByteBuilder bb=new ByteBuilder();
jpayne@68 265 //
jpayne@68 266 // final byte[] ssuBytes="SSU:".getBytes();
jpayne@68 267 // final byte[] r16SBytes="16S:".getBytes();
jpayne@68 268 // final byte[] r18SBytes="18S:".getBytes();
jpayne@68 269 //
jpayne@68 270 // while(line!=null){
jpayne@68 271 // if(line.length>0){
jpayne@68 272 // if(maxLines>0 && linesProcessed>=maxLines){break;}
jpayne@68 273 // linesProcessed++;
jpayne@68 274 // bytesProcessed+=(line.length+1);
jpayne@68 275 //
jpayne@68 276 // final boolean header=(line[0]=='#');
jpayne@68 277 //
jpayne@68 278 // linesOut++;
jpayne@68 279 // bytesOut+=(line.length+1);
jpayne@68 280 //
jpayne@68 281 // if(header){
jpayne@68 282 // if(Tools.startsWith(line, "#SZ:")){
jpayne@68 283 // sketchCount++;
jpayne@68 284 //
jpayne@68 285 // bsw.print(line);
jpayne@68 286 //
jpayne@68 287 // final int tid=parseTaxID(line);
jpayne@68 288 // final boolean has16S=Tools.contains(line, ssuBytes, 0) || Tools.contains(line, r16SBytes, 0);
jpayne@68 289 // final boolean has18S=Tools.contains(line, r18SBytes, 0);
jpayne@68 290 //
jpayne@68 291 // if(verbose){
jpayne@68 292 // System.err.println("For line "+new String(line)+":");
jpayne@68 293 // System.err.println("tid="+tid+", has16S="+has16S+", has18S="+has18S);
jpayne@68 294 // }
jpayne@68 295 //
jpayne@68 296 // if(tid>0){
jpayne@68 297 // final byte[] r16S=has16S ? null : SSUMap.r16SMap.get(tid);
jpayne@68 298 // final byte[] r18S=has18S ? null : SSUMap.r18SMap.get(tid);
jpayne@68 299 // if(r16S!=null){bsw.print("\t16S:").print(r16S.length); ssuOut++;}
jpayne@68 300 // if(r18S!=null){bsw.print("\t18S:").print(r18S.length); ssuOut++;}
jpayne@68 301 // if(r16S!=null){bsw.print("\n#16S:").print(r16S);}
jpayne@68 302 // if(r18S!=null){bsw.print("\n#18S:").print(r18S);}
jpayne@68 303 //
jpayne@68 304 // if(verbose){System.err.println("Found 16S: "+(r16S!=null)+"; found 18S: "+(r18S!=null));}
jpayne@68 305 // }
jpayne@68 306 // bsw.println();
jpayne@68 307 // }else if(Tools.startsWith(line, "#16S:") || Tools.startsWith(line, "#18S:") || Tools.startsWith(line, "#SSU:")){
jpayne@68 308 // bsw.println(line);
jpayne@68 309 // ssuIn++;
jpayne@68 310 // ssuOut++;
jpayne@68 311 // }else{
jpayne@68 312 // assert(Tools.startsWith(line, "##")) : new String(line);
jpayne@68 313 // bsw.println(line);
jpayne@68 314 // }
jpayne@68 315 // }else{
jpayne@68 316 // bsw.println(line);
jpayne@68 317 // }
jpayne@68 318 // }
jpayne@68 319 // line=bf.nextLine();
jpayne@68 320 // }
jpayne@68 321 // }
jpayne@68 322
jpayne@68 323 private void processInner(ByteFile bf, ByteStreamWriter bsw){
jpayne@68 324 SSUMap.load(outstream);
jpayne@68 325
jpayne@68 326 if(verbose){
jpayne@68 327 System.err.println("Loaded SSUMap; |16S|="+SSUMap.r16SCount()+", |18S|="+SSUMap.r18SCount());
jpayne@68 328 }
jpayne@68 329
jpayne@68 330 byte[] line=bf.nextLine();
jpayne@68 331 // ByteBuilder bb=new ByteBuilder();
jpayne@68 332
jpayne@68 333 // final byte[] ssuBytes="SSU:".getBytes();
jpayne@68 334 // final byte[] r16SBytes="16S:".getBytes();
jpayne@68 335 // final byte[] r18SBytes="18S:".getBytes();
jpayne@68 336
jpayne@68 337 SketchHeader header=null;
jpayne@68 338 while(line!=null){
jpayne@68 339 if(line.length>0){
jpayne@68 340 if(maxLines>0 && linesProcessed>=maxLines){break;}
jpayne@68 341 linesProcessed++;
jpayne@68 342 bytesProcessed+=(line.length+1);
jpayne@68 343
jpayne@68 344 final boolean isHeader=(line[0]=='#');
jpayne@68 345
jpayne@68 346 if(isHeader){
jpayne@68 347 if(Tools.startsWith(line, "#SZ:")){
jpayne@68 348 assert(header==null) : "\nReplacing this:\n"+header.toBytes()+"\nWith this:\n"+new String(line)+"\n";
jpayne@68 349 header=new SketchHeader(line);
jpayne@68 350 sketchCount++;
jpayne@68 351 }else if(Tools.startsWith(line, "##")){
jpayne@68 352 bsw.println(line);
jpayne@68 353
jpayne@68 354 linesOut++;
jpayne@68 355 bytesOut+=(line.length+1);
jpayne@68 356 }else{
jpayne@68 357 header.addLine(line);
jpayne@68 358 }
jpayne@68 359 }else{
jpayne@68 360 if(header!=null){
jpayne@68 361 try {
jpayne@68 362 processHeader(header);
jpayne@68 363 } catch (Throwable e) {
jpayne@68 364 e.printStackTrace();
jpayne@68 365 assert(false) : header.toBytes();
jpayne@68 366 }
jpayne@68 367 r16Sout+=(header.r16S==null ? 0 : 1);
jpayne@68 368 r18Sout+=(header.r18S==null ? 0 : 1);
jpayne@68 369 linesOut+=1+(header.r16S==null ? 0 : 1)+(header.r18S==null ? 0 : 1);
jpayne@68 370 ByteBuilder bb=header.toBytes();
jpayne@68 371 bytesOut+=(bb.length+1);
jpayne@68 372 bsw.println(bb);
jpayne@68 373 header=null;
jpayne@68 374 }
jpayne@68 375 bsw.println(line);
jpayne@68 376
jpayne@68 377 linesOut++;
jpayne@68 378 bytesOut+=(line.length+1);
jpayne@68 379 }
jpayne@68 380 }
jpayne@68 381 line=bf.nextLine();
jpayne@68 382 }
jpayne@68 383 }
jpayne@68 384
jpayne@68 385 void processHeader(SketchHeader header){
jpayne@68 386
jpayne@68 387 if(verbose){System.err.println("Processing tid "+header.tid+":\n"+header.toBytes()+"\n");}
jpayne@68 388
jpayne@68 389 final boolean euk=(tree!=null && header.tid>0 && header.tid<SketchObject.minFakeID) ? tree.isEukaryote(header.tid) : false;
jpayne@68 390 final boolean prok=(tree!=null && header.tid>0 && header.tid<SketchObject.minFakeID) ? tree.isProkaryote(header.tid) : false;
jpayne@68 391 if(useSSUMapOnly || (useSSUMapOnlyEuks && euk) || (useSSUMapOnlyProks && prok)){header.r16S=header.r18S=null;}
jpayne@68 392 if(header.tid>0){
jpayne@68 393 final boolean preferMap=(preferSSUMap || (preferSSUMapEuks && euk) || (preferSSUMapProks && prok));
jpayne@68 394 byte[] r16S=(SSUMap.r16SMap==null ? null : SSUMap.r16SMap.get(header.tid));
jpayne@68 395 byte[] r18S=(SSUMap.r18SMap==null ? null : SSUMap.r18SMap.get(header.tid));
jpayne@68 396 if(r16S!=null && (preferMap || header.r16S==null)){
jpayne@68 397 header.r16S=r16S;
jpayne@68 398 r16SfromMap++;
jpayne@68 399 }
jpayne@68 400 if(r18S!=null && (preferMap || header.r18S==null)){
jpayne@68 401 header.r18S=r18S;
jpayne@68 402 r18SfromMap++;
jpayne@68 403 }
jpayne@68 404 }
jpayne@68 405 if(clear16S || (clear16SEuks && euk) || (clear16SProks && prok)){header.r16S=null;}
jpayne@68 406 if(clear18S || (clear18SEuks && euk) || (clear18SProks && prok)){header.r18S=null;}
jpayne@68 407 }
jpayne@68 408
jpayne@68 409 int parseTaxID(byte[] line){
jpayne@68 410 String[] split=Tools.tabPattern.split(new String(line));
jpayne@68 411 for(String s : split){
jpayne@68 412 if(s.startsWith("ID:") || s.startsWith("TAXID:")){
jpayne@68 413 final int colon=s.indexOf(':');
jpayne@68 414 final String sub=s.substring(colon+1);
jpayne@68 415 return Integer.parseInt(sub);
jpayne@68 416 }
jpayne@68 417 }
jpayne@68 418 return -1;
jpayne@68 419 }
jpayne@68 420
jpayne@68 421 /*--------------------------------------------------------------*/
jpayne@68 422
jpayne@68 423 //A very limited parser
jpayne@68 424 private class SketchHeader {
jpayne@68 425
jpayne@68 426 SketchHeader(byte[] line){
jpayne@68 427 this(new String(line, 1, line.length-1));
jpayne@68 428 }
jpayne@68 429
jpayne@68 430 SketchHeader(String line){
jpayne@68 431 if(line.charAt(0)=='#'){line=line.substring(1);}
jpayne@68 432 assert(line.startsWith("SZ:"));
jpayne@68 433 String[] split=Tools.tabPattern.split(line);
jpayne@68 434 fields=new ArrayList<String>(line.length()+2);
jpayne@68 435 int tid_=-1;
jpayne@68 436 for(String s : split){
jpayne@68 437 if(s.startsWith("16S:") || s.startsWith("18S:") || s.startsWith("SSU:")){
jpayne@68 438 //do nothing
jpayne@68 439 }else{
jpayne@68 440 if(s.startsWith("ID:") || s.startsWith("TAXID:")){
jpayne@68 441 final int colon=s.indexOf(':');
jpayne@68 442 final String sub=s.substring(colon+1);
jpayne@68 443 tid_=Integer.parseInt(sub);
jpayne@68 444 }
jpayne@68 445 fields.add(s);
jpayne@68 446 }
jpayne@68 447 }
jpayne@68 448 tid=tid_;
jpayne@68 449 }
jpayne@68 450
jpayne@68 451 void addLine(byte[] line){
jpayne@68 452 assert(line[0]=='#');
jpayne@68 453 assert(line[1]=='1' || line[1]=='S') : new String(line);
jpayne@68 454 if(Tools.startsWith(line, "#16S:") || Tools.startsWith(line, "#SSU:")){
jpayne@68 455 assert(r16S==null);
jpayne@68 456 r16S=Arrays.copyOfRange(line, 5, line.length);
jpayne@68 457 r16Sin++;
jpayne@68 458 }else if(Tools.startsWith(line, "#18S:")){
jpayne@68 459 assert(r18S==null);
jpayne@68 460 r18S=Arrays.copyOfRange(line, 5, line.length);
jpayne@68 461 r18Sin++;
jpayne@68 462 }else{
jpayne@68 463 assert(false) : new String(line);
jpayne@68 464 }
jpayne@68 465 }
jpayne@68 466
jpayne@68 467 ByteBuilder toBytes(){
jpayne@68 468 ByteBuilder bb=new ByteBuilder(1000);
jpayne@68 469 bb.append('#');
jpayne@68 470 for(int i=0; i<fields.size(); i++){
jpayne@68 471 if(i>0){bb.tab();}
jpayne@68 472 bb.append(fields.get(i));
jpayne@68 473 }
jpayne@68 474 if(r16S!=null){bb.tab().append("16S:").append(r16S.length);}
jpayne@68 475 if(r18S!=null){bb.tab().append("18S:").append(r18S.length);}
jpayne@68 476
jpayne@68 477 if(r16S!=null){bb.nl().append("#16S:").append(r16S);}
jpayne@68 478 if(r18S!=null){bb.nl().append("#18S:").append(r18S);}
jpayne@68 479 return bb;
jpayne@68 480 }
jpayne@68 481
jpayne@68 482 final int tid;
jpayne@68 483 ArrayList<String> fields;
jpayne@68 484 byte[] r16S;
jpayne@68 485 byte[] r18S;
jpayne@68 486 }
jpayne@68 487
jpayne@68 488 /*--------------------------------------------------------------*/
jpayne@68 489 /*---------------- Fields ----------------*/
jpayne@68 490 /*--------------------------------------------------------------*/
jpayne@68 491
jpayne@68 492 private String in1=null;
jpayne@68 493 private String out1=null;
jpayne@68 494 private String r16SFile="auto";
jpayne@68 495 private String r18SFile="auto";
jpayne@68 496 private String treeFile="auto";
jpayne@68 497
jpayne@68 498 boolean preferSSUMap=false;
jpayne@68 499 boolean preferSSUMapEuks=false;
jpayne@68 500 boolean preferSSUMapProks=false;
jpayne@68 501 boolean useSSUMapOnly=false;
jpayne@68 502 boolean useSSUMapOnlyEuks=false;
jpayne@68 503 boolean useSSUMapOnlyProks=false;
jpayne@68 504 boolean clear16S=false;
jpayne@68 505 boolean clear18S=false;
jpayne@68 506 boolean clear16SEuks=false;
jpayne@68 507 boolean clear18SEuks=false;
jpayne@68 508 boolean clear16SProks=false;
jpayne@68 509 boolean clear18SProks=false;
jpayne@68 510
jpayne@68 511 /*--------------------------------------------------------------*/
jpayne@68 512
jpayne@68 513 private long linesProcessed=0;
jpayne@68 514 private long linesOut=0;
jpayne@68 515 private long bytesProcessed=0;
jpayne@68 516 private long bytesOut=0;
jpayne@68 517
jpayne@68 518 private long sketchCount=0;
jpayne@68 519
jpayne@68 520 private long r16Sin=0;
jpayne@68 521 private long r16Sout=0;
jpayne@68 522 private long r16SfromMap=0;
jpayne@68 523 private long r18Sin=0;
jpayne@68 524 private long r18Sout=0;
jpayne@68 525 private long r18SfromMap=0;
jpayne@68 526
jpayne@68 527 private long maxLines=Long.MAX_VALUE;
jpayne@68 528
jpayne@68 529 /*--------------------------------------------------------------*/
jpayne@68 530 /*---------------- Final Fields ----------------*/
jpayne@68 531 /*--------------------------------------------------------------*/
jpayne@68 532
jpayne@68 533 private final FileFormat ffin1;
jpayne@68 534 private final FileFormat ffout1;
jpayne@68 535
jpayne@68 536 private final TaxTree tree;
jpayne@68 537
jpayne@68 538 /*--------------------------------------------------------------*/
jpayne@68 539 /*---------------- Common Fields ----------------*/
jpayne@68 540 /*--------------------------------------------------------------*/
jpayne@68 541
jpayne@68 542 private PrintStream outstream=System.err;
jpayne@68 543 public static boolean verbose=false;
jpayne@68 544 public boolean errorState=false;
jpayne@68 545 private boolean overwrite=false;
jpayne@68 546 private boolean append=false;
jpayne@68 547
jpayne@68 548 }