jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Arrays;
|
jpayne@68
|
6
|
jpayne@68
|
7 import fileIO.ByteFile;
|
jpayne@68
|
8 import fileIO.ByteStreamWriter;
|
jpayne@68
|
9 import fileIO.FileFormat;
|
jpayne@68
|
10 import fileIO.ReadWrite;
|
jpayne@68
|
11 import shared.Parse;
|
jpayne@68
|
12 import shared.Parser;
|
jpayne@68
|
13 import shared.PreParser;
|
jpayne@68
|
14 import shared.Shared;
|
jpayne@68
|
15 import shared.Timer;
|
jpayne@68
|
16 import shared.Tools;
|
jpayne@68
|
17 import structures.ByteBuilder;
|
jpayne@68
|
18 import tax.TaxTree;
|
jpayne@68
|
19
|
jpayne@68
|
20 /**
|
jpayne@68
|
21 * @author Brian Bushnell
|
jpayne@68
|
22 * @date May 9, 2016
|
jpayne@68
|
23 *
|
jpayne@68
|
24 */
|
jpayne@68
|
25 public class AddSSU {
|
jpayne@68
|
26
|
jpayne@68
|
27 /*--------------------------------------------------------------*/
|
jpayne@68
|
28 /*---------------- Initialization ----------------*/
|
jpayne@68
|
29 /*--------------------------------------------------------------*/
|
jpayne@68
|
30
|
jpayne@68
|
31 /**
|
jpayne@68
|
32 * Code entrance from the command line.
|
jpayne@68
|
33 * @param args Command line arguments
|
jpayne@68
|
34 */
|
jpayne@68
|
35 public static void main(String[] args){
|
jpayne@68
|
36 //Start a timer immediately upon code entrance.
|
jpayne@68
|
37 Timer t=new Timer();
|
jpayne@68
|
38
|
jpayne@68
|
39 //Create an instance of this class
|
jpayne@68
|
40 AddSSU x=new AddSSU(args);
|
jpayne@68
|
41
|
jpayne@68
|
42 //Run the object
|
jpayne@68
|
43 x.process(t);
|
jpayne@68
|
44
|
jpayne@68
|
45 //Close the print stream if it was redirected
|
jpayne@68
|
46 Shared.closeStream(x.outstream);
|
jpayne@68
|
47 }
|
jpayne@68
|
48
|
jpayne@68
|
49 /**
|
jpayne@68
|
50 * Constructor.
|
jpayne@68
|
51 * @param args Command line arguments
|
jpayne@68
|
52 */
|
jpayne@68
|
53 public AddSSU(String[] args){
|
jpayne@68
|
54
|
jpayne@68
|
55 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
56 PreParser pp=new PreParser(args, /*getClass()*/null, false);
|
jpayne@68
|
57 args=pp.args;
|
jpayne@68
|
58 outstream=pp.outstream;
|
jpayne@68
|
59 }
|
jpayne@68
|
60
|
jpayne@68
|
61 //Set shared static variables prior to parsing
|
jpayne@68
|
62 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
63 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
64
|
jpayne@68
|
65 {//Parse the arguments
|
jpayne@68
|
66 final Parser parser=parse(args);
|
jpayne@68
|
67 overwrite=parser.overwrite;
|
jpayne@68
|
68 append=parser.append;
|
jpayne@68
|
69
|
jpayne@68
|
70 in1=parser.in1;
|
jpayne@68
|
71
|
jpayne@68
|
72 out1=parser.out1;
|
jpayne@68
|
73 }
|
jpayne@68
|
74
|
jpayne@68
|
75 fixExtensions(); //Add or remove .gz or .bz2 as needed
|
jpayne@68
|
76 checkFileExistence(); //Ensure files can be read and written
|
jpayne@68
|
77 checkStatics(); //Adjust file-related static fields as needed for this program
|
jpayne@68
|
78
|
jpayne@68
|
79 ffout1=FileFormat.testOutput(out1, FileFormat.SKETCH, null, true, overwrite, append, false);
|
jpayne@68
|
80 ffin1=FileFormat.testInput(in1, FileFormat.SKETCH, null, true, false);
|
jpayne@68
|
81
|
jpayne@68
|
82 if(verbose){
|
jpayne@68
|
83 System.err.println("Set r16SFile="+r16SFile);
|
jpayne@68
|
84 System.err.println("Set r18SFile="+r18SFile);
|
jpayne@68
|
85 }
|
jpayne@68
|
86
|
jpayne@68
|
87 tree=(treeFile!=null && (preferSSUMapEuks || preferSSUMapProks || clear16SEuks || clear18SEuks ||
|
jpayne@68
|
88 clear16SProks || clear18SProks || useSSUMapOnlyEuks || useSSUMapOnlyProks) ? TaxTree.loadTaxTree(treeFile, outstream, false, false) : null);
|
jpayne@68
|
89
|
jpayne@68
|
90 if(preferSSUMapEuks || preferSSUMapProks || clear16SEuks || clear18SEuks || clear16SProks || clear18SProks || useSSUMapOnlyEuks || useSSUMapOnlyProks){
|
jpayne@68
|
91 assert(tree!=null) : "preferSSUMapForEuks, clear16SEuks, and clear18SEuks require a TaxTree.";
|
jpayne@68
|
92 }
|
jpayne@68
|
93 }
|
jpayne@68
|
94
|
jpayne@68
|
95 /*--------------------------------------------------------------*/
|
jpayne@68
|
96 /*---------------- Initialization Helpers ----------------*/
|
jpayne@68
|
97 /*--------------------------------------------------------------*/
|
jpayne@68
|
98
|
jpayne@68
|
99 /** Parse arguments from the command line */
|
jpayne@68
|
100 private Parser parse(String[] args){
|
jpayne@68
|
101
|
jpayne@68
|
102 Parser parser=new Parser();
|
jpayne@68
|
103 for(int i=0; i<args.length; i++){
|
jpayne@68
|
104 String arg=args[i];
|
jpayne@68
|
105 String[] split=arg.split("=");
|
jpayne@68
|
106 String a=split[0].toLowerCase();
|
jpayne@68
|
107 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
108 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
|
jpayne@68
|
109
|
jpayne@68
|
110 if(a.equalsIgnoreCase("16S") || a.equalsIgnoreCase("16Sfile")){
|
jpayne@68
|
111 r16SFile=b;
|
jpayne@68
|
112 }else if(a.equalsIgnoreCase("18S") || a.equalsIgnoreCase("18Sfile")){
|
jpayne@68
|
113 r18SFile=b;
|
jpayne@68
|
114 }else if(a.equalsIgnoreCase("tree") || a.equalsIgnoreCase("treefile")){
|
jpayne@68
|
115 treeFile=b;
|
jpayne@68
|
116 }else if(a.equals("lines")){
|
jpayne@68
|
117 maxLines=Long.parseLong(b);
|
jpayne@68
|
118 if(maxLines<0){maxLines=Long.MAX_VALUE;}
|
jpayne@68
|
119 }else if(a.equals("verbose")){
|
jpayne@68
|
120 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
121 // ByteFile1.verbose=verbose;
|
jpayne@68
|
122 // ByteFile2.verbose=verbose;
|
jpayne@68
|
123 // ReadWrite.verbose=verbose;
|
jpayne@68
|
124 }
|
jpayne@68
|
125
|
jpayne@68
|
126 else if(a.equalsIgnoreCase("preferSSUMap")){
|
jpayne@68
|
127 preferSSUMap=Parse.parseBoolean(b);
|
jpayne@68
|
128 }else if(a.equalsIgnoreCase("preferSSUMapForEuks") || a.equalsIgnoreCase("preferSSUMapEuks")){
|
jpayne@68
|
129 preferSSUMapEuks=Parse.parseBoolean(b);
|
jpayne@68
|
130 }else if(a.equalsIgnoreCase("useSSUMapOnly")){
|
jpayne@68
|
131 useSSUMapOnly=Parse.parseBoolean(b);
|
jpayne@68
|
132 }else if(a.equalsIgnoreCase("useSSUMapOnlyEuks") || a.equalsIgnoreCase("SSUMapOnlyEuks")){
|
jpayne@68
|
133 useSSUMapOnlyEuks=Parse.parseBoolean(b);
|
jpayne@68
|
134 }else if(a.equalsIgnoreCase("useSSUMapOnlyProks") || a.equalsIgnoreCase("SSUMapOnlyProks")){
|
jpayne@68
|
135 useSSUMapOnlyProks=Parse.parseBoolean(b);
|
jpayne@68
|
136 }else if(a.equalsIgnoreCase("preferSSUMapForProks") || a.equalsIgnoreCase("preferSSUMapProks")){
|
jpayne@68
|
137 preferSSUMapProks=Parse.parseBoolean(b);
|
jpayne@68
|
138 }
|
jpayne@68
|
139
|
jpayne@68
|
140 else if(a.equalsIgnoreCase("clearAll")){
|
jpayne@68
|
141 clear16S=clear18S=Parse.parseBoolean(b);
|
jpayne@68
|
142 }else if(a.equalsIgnoreCase("clear16S")){
|
jpayne@68
|
143 clear16S=Parse.parseBoolean(b);
|
jpayne@68
|
144 }else if(a.equalsIgnoreCase("clear18S")){
|
jpayne@68
|
145 clear18S=Parse.parseBoolean(b);
|
jpayne@68
|
146 }else if(a.equalsIgnoreCase("clear16SEuks")){
|
jpayne@68
|
147 clear16SEuks=Parse.parseBoolean(b);
|
jpayne@68
|
148 }else if(a.equalsIgnoreCase("clear18SEuks")){
|
jpayne@68
|
149 clear18SEuks=Parse.parseBoolean(b);
|
jpayne@68
|
150 }else if(a.equalsIgnoreCase("clear16SProks")){
|
jpayne@68
|
151 clear16SProks=Parse.parseBoolean(b);
|
jpayne@68
|
152 }else if(a.equalsIgnoreCase("clear18SProks")){
|
jpayne@68
|
153 clear18SProks=Parse.parseBoolean(b);
|
jpayne@68
|
154 }
|
jpayne@68
|
155
|
jpayne@68
|
156 else if(parser.parse(arg, a, b)){
|
jpayne@68
|
157 //do nothing
|
jpayne@68
|
158 }else{
|
jpayne@68
|
159 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
160 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
161 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
162 }
|
jpayne@68
|
163 }
|
jpayne@68
|
164 if("auto".equalsIgnoreCase(r16SFile)){r16SFile=TaxTree.default16SFile();}
|
jpayne@68
|
165 if("auto".equalsIgnoreCase(r18SFile)){r18SFile=TaxTree.default18SFile();}
|
jpayne@68
|
166 SSUMap.r16SFile=r16SFile;
|
jpayne@68
|
167 SSUMap.r18SFile=r18SFile;
|
jpayne@68
|
168
|
jpayne@68
|
169 return parser;
|
jpayne@68
|
170 }
|
jpayne@68
|
171
|
jpayne@68
|
172 /** Add or remove .gz or .bz2 as needed */
|
jpayne@68
|
173 private void fixExtensions(){
|
jpayne@68
|
174 in1=Tools.fixExtension(in1);
|
jpayne@68
|
175 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
176 }
|
jpayne@68
|
177
|
jpayne@68
|
178 /** Ensure files can be read and written */
|
jpayne@68
|
179 private void checkFileExistence(){
|
jpayne@68
|
180 //Ensure output files can be written
|
jpayne@68
|
181 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
|
jpayne@68
|
182 outstream.println((out1==null)+", "+out1);
|
jpayne@68
|
183 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n");
|
jpayne@68
|
184 }
|
jpayne@68
|
185
|
jpayne@68
|
186 //Ensure input files can be read
|
jpayne@68
|
187 if(!Tools.testInputFiles(false, true, in1, r16SFile, r18SFile)){
|
jpayne@68
|
188 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
189 }
|
jpayne@68
|
190 assert(in1!=null) : "Input sketch file is required";
|
jpayne@68
|
191 assert(r16SFile!=null || r18SFile!=null) : "Input SSU file is required";
|
jpayne@68
|
192
|
jpayne@68
|
193 //Ensure that no file was specified multiple times
|
jpayne@68
|
194 if(!Tools.testForDuplicateFiles(true, in1, out1, r16SFile, r18SFile)){
|
jpayne@68
|
195 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
196 }
|
jpayne@68
|
197 }
|
jpayne@68
|
198
|
jpayne@68
|
199 /** Adjust file-related static fields as needed for this program */
|
jpayne@68
|
200 private static void checkStatics(){
|
jpayne@68
|
201 //Adjust the number of threads for input file reading
|
jpayne@68
|
202 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
203 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
204 }
|
jpayne@68
|
205
|
jpayne@68
|
206 // if(!ByteFile.FORCE_MODE_BF2){
|
jpayne@68
|
207 // ByteFile.FORCE_MODE_BF2=false;
|
jpayne@68
|
208 // ByteFile.FORCE_MODE_BF1=true;
|
jpayne@68
|
209 // }
|
jpayne@68
|
210 }
|
jpayne@68
|
211
|
jpayne@68
|
212 /*--------------------------------------------------------------*/
|
jpayne@68
|
213 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
214 /*--------------------------------------------------------------*/
|
jpayne@68
|
215
|
jpayne@68
|
216 void process(Timer t){
|
jpayne@68
|
217
|
jpayne@68
|
218 ByteFile bf=ByteFile.makeByteFile(ffin1);
|
jpayne@68
|
219 ByteStreamWriter bsw=makeBSW(ffout1);
|
jpayne@68
|
220
|
jpayne@68
|
221 processInner(bf, bsw);
|
jpayne@68
|
222
|
jpayne@68
|
223 errorState|=bf.close();
|
jpayne@68
|
224 if(bsw!=null){errorState|=bsw.poisonAndWait();}
|
jpayne@68
|
225
|
jpayne@68
|
226 t.stop();
|
jpayne@68
|
227
|
jpayne@68
|
228 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
|
jpayne@68
|
229 outstream.println(Tools.linesBytesOut(linesProcessed, bytesProcessed, linesOut, bytesOut, 8, true));
|
jpayne@68
|
230
|
jpayne@68
|
231 outstream.println();
|
jpayne@68
|
232 outstream.println(Tools.number("Sketches:", sketchCount, 8));
|
jpayne@68
|
233 outstream.println(Tools.number("16S In:", r16Sin, 8));
|
jpayne@68
|
234 outstream.println(Tools.number("18S In:", r18Sin, 8));
|
jpayne@68
|
235 outstream.println(Tools.number("16S Added:", r16SfromMap, 8));
|
jpayne@68
|
236 outstream.println(Tools.number("18S Added:", r18SfromMap, 8));
|
jpayne@68
|
237 outstream.println(Tools.numberPercent("16S Out:", r16Sout, r16Sout*100.0/sketchCount, 2, 8));
|
jpayne@68
|
238 outstream.println(Tools.numberPercent("18S Out:", r18Sout, r18Sout*100.0/sketchCount, 2, 8));
|
jpayne@68
|
239
|
jpayne@68
|
240 if(errorState){
|
jpayne@68
|
241 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
242 }
|
jpayne@68
|
243 }
|
jpayne@68
|
244
|
jpayne@68
|
245 /*--------------------------------------------------------------*/
|
jpayne@68
|
246 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
247 /*--------------------------------------------------------------*/
|
jpayne@68
|
248
|
jpayne@68
|
249 private static ByteStreamWriter makeBSW(FileFormat ff){
|
jpayne@68
|
250 if(ff==null){return null;}
|
jpayne@68
|
251 ByteStreamWriter bsw=new ByteStreamWriter(ff);
|
jpayne@68
|
252 bsw.start();
|
jpayne@68
|
253 return bsw;
|
jpayne@68
|
254 }
|
jpayne@68
|
255
|
jpayne@68
|
256 // private void processInner_old(ByteFile bf, ByteStreamWriter bsw){
|
jpayne@68
|
257 // SSUMap.load(outstream);
|
jpayne@68
|
258 //
|
jpayne@68
|
259 // if(verbose){
|
jpayne@68
|
260 // System.err.println("Loaded SSUMap; |16S|="+SSUMap.r16SCount()+", |18S|="+SSUMap.r18SCount());
|
jpayne@68
|
261 // }
|
jpayne@68
|
262 //
|
jpayne@68
|
263 // byte[] line=bf.nextLine();
|
jpayne@68
|
264 //// ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
265 //
|
jpayne@68
|
266 // final byte[] ssuBytes="SSU:".getBytes();
|
jpayne@68
|
267 // final byte[] r16SBytes="16S:".getBytes();
|
jpayne@68
|
268 // final byte[] r18SBytes="18S:".getBytes();
|
jpayne@68
|
269 //
|
jpayne@68
|
270 // while(line!=null){
|
jpayne@68
|
271 // if(line.length>0){
|
jpayne@68
|
272 // if(maxLines>0 && linesProcessed>=maxLines){break;}
|
jpayne@68
|
273 // linesProcessed++;
|
jpayne@68
|
274 // bytesProcessed+=(line.length+1);
|
jpayne@68
|
275 //
|
jpayne@68
|
276 // final boolean header=(line[0]=='#');
|
jpayne@68
|
277 //
|
jpayne@68
|
278 // linesOut++;
|
jpayne@68
|
279 // bytesOut+=(line.length+1);
|
jpayne@68
|
280 //
|
jpayne@68
|
281 // if(header){
|
jpayne@68
|
282 // if(Tools.startsWith(line, "#SZ:")){
|
jpayne@68
|
283 // sketchCount++;
|
jpayne@68
|
284 //
|
jpayne@68
|
285 // bsw.print(line);
|
jpayne@68
|
286 //
|
jpayne@68
|
287 // final int tid=parseTaxID(line);
|
jpayne@68
|
288 // final boolean has16S=Tools.contains(line, ssuBytes, 0) || Tools.contains(line, r16SBytes, 0);
|
jpayne@68
|
289 // final boolean has18S=Tools.contains(line, r18SBytes, 0);
|
jpayne@68
|
290 //
|
jpayne@68
|
291 // if(verbose){
|
jpayne@68
|
292 // System.err.println("For line "+new String(line)+":");
|
jpayne@68
|
293 // System.err.println("tid="+tid+", has16S="+has16S+", has18S="+has18S);
|
jpayne@68
|
294 // }
|
jpayne@68
|
295 //
|
jpayne@68
|
296 // if(tid>0){
|
jpayne@68
|
297 // final byte[] r16S=has16S ? null : SSUMap.r16SMap.get(tid);
|
jpayne@68
|
298 // final byte[] r18S=has18S ? null : SSUMap.r18SMap.get(tid);
|
jpayne@68
|
299 // if(r16S!=null){bsw.print("\t16S:").print(r16S.length); ssuOut++;}
|
jpayne@68
|
300 // if(r18S!=null){bsw.print("\t18S:").print(r18S.length); ssuOut++;}
|
jpayne@68
|
301 // if(r16S!=null){bsw.print("\n#16S:").print(r16S);}
|
jpayne@68
|
302 // if(r18S!=null){bsw.print("\n#18S:").print(r18S);}
|
jpayne@68
|
303 //
|
jpayne@68
|
304 // if(verbose){System.err.println("Found 16S: "+(r16S!=null)+"; found 18S: "+(r18S!=null));}
|
jpayne@68
|
305 // }
|
jpayne@68
|
306 // bsw.println();
|
jpayne@68
|
307 // }else if(Tools.startsWith(line, "#16S:") || Tools.startsWith(line, "#18S:") || Tools.startsWith(line, "#SSU:")){
|
jpayne@68
|
308 // bsw.println(line);
|
jpayne@68
|
309 // ssuIn++;
|
jpayne@68
|
310 // ssuOut++;
|
jpayne@68
|
311 // }else{
|
jpayne@68
|
312 // assert(Tools.startsWith(line, "##")) : new String(line);
|
jpayne@68
|
313 // bsw.println(line);
|
jpayne@68
|
314 // }
|
jpayne@68
|
315 // }else{
|
jpayne@68
|
316 // bsw.println(line);
|
jpayne@68
|
317 // }
|
jpayne@68
|
318 // }
|
jpayne@68
|
319 // line=bf.nextLine();
|
jpayne@68
|
320 // }
|
jpayne@68
|
321 // }
|
jpayne@68
|
322
|
jpayne@68
|
323 private void processInner(ByteFile bf, ByteStreamWriter bsw){
|
jpayne@68
|
324 SSUMap.load(outstream);
|
jpayne@68
|
325
|
jpayne@68
|
326 if(verbose){
|
jpayne@68
|
327 System.err.println("Loaded SSUMap; |16S|="+SSUMap.r16SCount()+", |18S|="+SSUMap.r18SCount());
|
jpayne@68
|
328 }
|
jpayne@68
|
329
|
jpayne@68
|
330 byte[] line=bf.nextLine();
|
jpayne@68
|
331 // ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
332
|
jpayne@68
|
333 // final byte[] ssuBytes="SSU:".getBytes();
|
jpayne@68
|
334 // final byte[] r16SBytes="16S:".getBytes();
|
jpayne@68
|
335 // final byte[] r18SBytes="18S:".getBytes();
|
jpayne@68
|
336
|
jpayne@68
|
337 SketchHeader header=null;
|
jpayne@68
|
338 while(line!=null){
|
jpayne@68
|
339 if(line.length>0){
|
jpayne@68
|
340 if(maxLines>0 && linesProcessed>=maxLines){break;}
|
jpayne@68
|
341 linesProcessed++;
|
jpayne@68
|
342 bytesProcessed+=(line.length+1);
|
jpayne@68
|
343
|
jpayne@68
|
344 final boolean isHeader=(line[0]=='#');
|
jpayne@68
|
345
|
jpayne@68
|
346 if(isHeader){
|
jpayne@68
|
347 if(Tools.startsWith(line, "#SZ:")){
|
jpayne@68
|
348 assert(header==null) : "\nReplacing this:\n"+header.toBytes()+"\nWith this:\n"+new String(line)+"\n";
|
jpayne@68
|
349 header=new SketchHeader(line);
|
jpayne@68
|
350 sketchCount++;
|
jpayne@68
|
351 }else if(Tools.startsWith(line, "##")){
|
jpayne@68
|
352 bsw.println(line);
|
jpayne@68
|
353
|
jpayne@68
|
354 linesOut++;
|
jpayne@68
|
355 bytesOut+=(line.length+1);
|
jpayne@68
|
356 }else{
|
jpayne@68
|
357 header.addLine(line);
|
jpayne@68
|
358 }
|
jpayne@68
|
359 }else{
|
jpayne@68
|
360 if(header!=null){
|
jpayne@68
|
361 try {
|
jpayne@68
|
362 processHeader(header);
|
jpayne@68
|
363 } catch (Throwable e) {
|
jpayne@68
|
364 e.printStackTrace();
|
jpayne@68
|
365 assert(false) : header.toBytes();
|
jpayne@68
|
366 }
|
jpayne@68
|
367 r16Sout+=(header.r16S==null ? 0 : 1);
|
jpayne@68
|
368 r18Sout+=(header.r18S==null ? 0 : 1);
|
jpayne@68
|
369 linesOut+=1+(header.r16S==null ? 0 : 1)+(header.r18S==null ? 0 : 1);
|
jpayne@68
|
370 ByteBuilder bb=header.toBytes();
|
jpayne@68
|
371 bytesOut+=(bb.length+1);
|
jpayne@68
|
372 bsw.println(bb);
|
jpayne@68
|
373 header=null;
|
jpayne@68
|
374 }
|
jpayne@68
|
375 bsw.println(line);
|
jpayne@68
|
376
|
jpayne@68
|
377 linesOut++;
|
jpayne@68
|
378 bytesOut+=(line.length+1);
|
jpayne@68
|
379 }
|
jpayne@68
|
380 }
|
jpayne@68
|
381 line=bf.nextLine();
|
jpayne@68
|
382 }
|
jpayne@68
|
383 }
|
jpayne@68
|
384
|
jpayne@68
|
385 void processHeader(SketchHeader header){
|
jpayne@68
|
386
|
jpayne@68
|
387 if(verbose){System.err.println("Processing tid "+header.tid+":\n"+header.toBytes()+"\n");}
|
jpayne@68
|
388
|
jpayne@68
|
389 final boolean euk=(tree!=null && header.tid>0 && header.tid<SketchObject.minFakeID) ? tree.isEukaryote(header.tid) : false;
|
jpayne@68
|
390 final boolean prok=(tree!=null && header.tid>0 && header.tid<SketchObject.minFakeID) ? tree.isProkaryote(header.tid) : false;
|
jpayne@68
|
391 if(useSSUMapOnly || (useSSUMapOnlyEuks && euk) || (useSSUMapOnlyProks && prok)){header.r16S=header.r18S=null;}
|
jpayne@68
|
392 if(header.tid>0){
|
jpayne@68
|
393 final boolean preferMap=(preferSSUMap || (preferSSUMapEuks && euk) || (preferSSUMapProks && prok));
|
jpayne@68
|
394 byte[] r16S=(SSUMap.r16SMap==null ? null : SSUMap.r16SMap.get(header.tid));
|
jpayne@68
|
395 byte[] r18S=(SSUMap.r18SMap==null ? null : SSUMap.r18SMap.get(header.tid));
|
jpayne@68
|
396 if(r16S!=null && (preferMap || header.r16S==null)){
|
jpayne@68
|
397 header.r16S=r16S;
|
jpayne@68
|
398 r16SfromMap++;
|
jpayne@68
|
399 }
|
jpayne@68
|
400 if(r18S!=null && (preferMap || header.r18S==null)){
|
jpayne@68
|
401 header.r18S=r18S;
|
jpayne@68
|
402 r18SfromMap++;
|
jpayne@68
|
403 }
|
jpayne@68
|
404 }
|
jpayne@68
|
405 if(clear16S || (clear16SEuks && euk) || (clear16SProks && prok)){header.r16S=null;}
|
jpayne@68
|
406 if(clear18S || (clear18SEuks && euk) || (clear18SProks && prok)){header.r18S=null;}
|
jpayne@68
|
407 }
|
jpayne@68
|
408
|
jpayne@68
|
409 int parseTaxID(byte[] line){
|
jpayne@68
|
410 String[] split=Tools.tabPattern.split(new String(line));
|
jpayne@68
|
411 for(String s : split){
|
jpayne@68
|
412 if(s.startsWith("ID:") || s.startsWith("TAXID:")){
|
jpayne@68
|
413 final int colon=s.indexOf(':');
|
jpayne@68
|
414 final String sub=s.substring(colon+1);
|
jpayne@68
|
415 return Integer.parseInt(sub);
|
jpayne@68
|
416 }
|
jpayne@68
|
417 }
|
jpayne@68
|
418 return -1;
|
jpayne@68
|
419 }
|
jpayne@68
|
420
|
jpayne@68
|
421 /*--------------------------------------------------------------*/
|
jpayne@68
|
422
|
jpayne@68
|
423 //A very limited parser
|
jpayne@68
|
424 private class SketchHeader {
|
jpayne@68
|
425
|
jpayne@68
|
426 SketchHeader(byte[] line){
|
jpayne@68
|
427 this(new String(line, 1, line.length-1));
|
jpayne@68
|
428 }
|
jpayne@68
|
429
|
jpayne@68
|
430 SketchHeader(String line){
|
jpayne@68
|
431 if(line.charAt(0)=='#'){line=line.substring(1);}
|
jpayne@68
|
432 assert(line.startsWith("SZ:"));
|
jpayne@68
|
433 String[] split=Tools.tabPattern.split(line);
|
jpayne@68
|
434 fields=new ArrayList<String>(line.length()+2);
|
jpayne@68
|
435 int tid_=-1;
|
jpayne@68
|
436 for(String s : split){
|
jpayne@68
|
437 if(s.startsWith("16S:") || s.startsWith("18S:") || s.startsWith("SSU:")){
|
jpayne@68
|
438 //do nothing
|
jpayne@68
|
439 }else{
|
jpayne@68
|
440 if(s.startsWith("ID:") || s.startsWith("TAXID:")){
|
jpayne@68
|
441 final int colon=s.indexOf(':');
|
jpayne@68
|
442 final String sub=s.substring(colon+1);
|
jpayne@68
|
443 tid_=Integer.parseInt(sub);
|
jpayne@68
|
444 }
|
jpayne@68
|
445 fields.add(s);
|
jpayne@68
|
446 }
|
jpayne@68
|
447 }
|
jpayne@68
|
448 tid=tid_;
|
jpayne@68
|
449 }
|
jpayne@68
|
450
|
jpayne@68
|
451 void addLine(byte[] line){
|
jpayne@68
|
452 assert(line[0]=='#');
|
jpayne@68
|
453 assert(line[1]=='1' || line[1]=='S') : new String(line);
|
jpayne@68
|
454 if(Tools.startsWith(line, "#16S:") || Tools.startsWith(line, "#SSU:")){
|
jpayne@68
|
455 assert(r16S==null);
|
jpayne@68
|
456 r16S=Arrays.copyOfRange(line, 5, line.length);
|
jpayne@68
|
457 r16Sin++;
|
jpayne@68
|
458 }else if(Tools.startsWith(line, "#18S:")){
|
jpayne@68
|
459 assert(r18S==null);
|
jpayne@68
|
460 r18S=Arrays.copyOfRange(line, 5, line.length);
|
jpayne@68
|
461 r18Sin++;
|
jpayne@68
|
462 }else{
|
jpayne@68
|
463 assert(false) : new String(line);
|
jpayne@68
|
464 }
|
jpayne@68
|
465 }
|
jpayne@68
|
466
|
jpayne@68
|
467 ByteBuilder toBytes(){
|
jpayne@68
|
468 ByteBuilder bb=new ByteBuilder(1000);
|
jpayne@68
|
469 bb.append('#');
|
jpayne@68
|
470 for(int i=0; i<fields.size(); i++){
|
jpayne@68
|
471 if(i>0){bb.tab();}
|
jpayne@68
|
472 bb.append(fields.get(i));
|
jpayne@68
|
473 }
|
jpayne@68
|
474 if(r16S!=null){bb.tab().append("16S:").append(r16S.length);}
|
jpayne@68
|
475 if(r18S!=null){bb.tab().append("18S:").append(r18S.length);}
|
jpayne@68
|
476
|
jpayne@68
|
477 if(r16S!=null){bb.nl().append("#16S:").append(r16S);}
|
jpayne@68
|
478 if(r18S!=null){bb.nl().append("#18S:").append(r18S);}
|
jpayne@68
|
479 return bb;
|
jpayne@68
|
480 }
|
jpayne@68
|
481
|
jpayne@68
|
482 final int tid;
|
jpayne@68
|
483 ArrayList<String> fields;
|
jpayne@68
|
484 byte[] r16S;
|
jpayne@68
|
485 byte[] r18S;
|
jpayne@68
|
486 }
|
jpayne@68
|
487
|
jpayne@68
|
488 /*--------------------------------------------------------------*/
|
jpayne@68
|
489 /*---------------- Fields ----------------*/
|
jpayne@68
|
490 /*--------------------------------------------------------------*/
|
jpayne@68
|
491
|
jpayne@68
|
492 private String in1=null;
|
jpayne@68
|
493 private String out1=null;
|
jpayne@68
|
494 private String r16SFile="auto";
|
jpayne@68
|
495 private String r18SFile="auto";
|
jpayne@68
|
496 private String treeFile="auto";
|
jpayne@68
|
497
|
jpayne@68
|
498 boolean preferSSUMap=false;
|
jpayne@68
|
499 boolean preferSSUMapEuks=false;
|
jpayne@68
|
500 boolean preferSSUMapProks=false;
|
jpayne@68
|
501 boolean useSSUMapOnly=false;
|
jpayne@68
|
502 boolean useSSUMapOnlyEuks=false;
|
jpayne@68
|
503 boolean useSSUMapOnlyProks=false;
|
jpayne@68
|
504 boolean clear16S=false;
|
jpayne@68
|
505 boolean clear18S=false;
|
jpayne@68
|
506 boolean clear16SEuks=false;
|
jpayne@68
|
507 boolean clear18SEuks=false;
|
jpayne@68
|
508 boolean clear16SProks=false;
|
jpayne@68
|
509 boolean clear18SProks=false;
|
jpayne@68
|
510
|
jpayne@68
|
511 /*--------------------------------------------------------------*/
|
jpayne@68
|
512
|
jpayne@68
|
513 private long linesProcessed=0;
|
jpayne@68
|
514 private long linesOut=0;
|
jpayne@68
|
515 private long bytesProcessed=0;
|
jpayne@68
|
516 private long bytesOut=0;
|
jpayne@68
|
517
|
jpayne@68
|
518 private long sketchCount=0;
|
jpayne@68
|
519
|
jpayne@68
|
520 private long r16Sin=0;
|
jpayne@68
|
521 private long r16Sout=0;
|
jpayne@68
|
522 private long r16SfromMap=0;
|
jpayne@68
|
523 private long r18Sin=0;
|
jpayne@68
|
524 private long r18Sout=0;
|
jpayne@68
|
525 private long r18SfromMap=0;
|
jpayne@68
|
526
|
jpayne@68
|
527 private long maxLines=Long.MAX_VALUE;
|
jpayne@68
|
528
|
jpayne@68
|
529 /*--------------------------------------------------------------*/
|
jpayne@68
|
530 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
531 /*--------------------------------------------------------------*/
|
jpayne@68
|
532
|
jpayne@68
|
533 private final FileFormat ffin1;
|
jpayne@68
|
534 private final FileFormat ffout1;
|
jpayne@68
|
535
|
jpayne@68
|
536 private final TaxTree tree;
|
jpayne@68
|
537
|
jpayne@68
|
538 /*--------------------------------------------------------------*/
|
jpayne@68
|
539 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
540 /*--------------------------------------------------------------*/
|
jpayne@68
|
541
|
jpayne@68
|
542 private PrintStream outstream=System.err;
|
jpayne@68
|
543 public static boolean verbose=false;
|
jpayne@68
|
544 public boolean errorState=false;
|
jpayne@68
|
545 private boolean overwrite=false;
|
jpayne@68
|
546 private boolean append=false;
|
jpayne@68
|
547
|
jpayne@68
|
548 }
|