jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6 import java.util.Collection;
|
jpayne@68
|
7 import java.util.LinkedHashSet;
|
jpayne@68
|
8
|
jpayne@68
|
9 import fileIO.ByteFile;
|
jpayne@68
|
10 import fileIO.ByteStreamWriter;
|
jpayne@68
|
11 import fileIO.FileFormat;
|
jpayne@68
|
12 import fileIO.ReadWrite;
|
jpayne@68
|
13 import shared.Parse;
|
jpayne@68
|
14 import shared.Parser;
|
jpayne@68
|
15 import shared.PreParser;
|
jpayne@68
|
16 import shared.ReadStats;
|
jpayne@68
|
17 import shared.Shared;
|
jpayne@68
|
18 import shared.Timer;
|
jpayne@68
|
19 import shared.Tools;
|
jpayne@68
|
20 import structures.ByteBuilder;
|
jpayne@68
|
21
|
jpayne@68
|
22 /**
|
jpayne@68
|
23 * Generates smaller sketches from input sketches.
|
jpayne@68
|
24 *
|
jpayne@68
|
25 * @author Brian Bushnell
|
jpayne@68
|
26 * @date July 23, 2018
|
jpayne@68
|
27 *
|
jpayne@68
|
28 */
|
jpayne@68
|
29 public class SubSketch extends SketchObject {
|
jpayne@68
|
30
|
jpayne@68
|
31 /*--------------------------------------------------------------*/
|
jpayne@68
|
32 /*---------------- Initialization ----------------*/
|
jpayne@68
|
33 /*--------------------------------------------------------------*/
|
jpayne@68
|
34
|
jpayne@68
|
35 /**
|
jpayne@68
|
36 * Code entrance from the command line.
|
jpayne@68
|
37 * @param args Command line arguments
|
jpayne@68
|
38 */
|
jpayne@68
|
39 public static void main(String[] args){
|
jpayne@68
|
40 //Start a timer immediately upon code entrance.
|
jpayne@68
|
41 Timer t=new Timer();
|
jpayne@68
|
42
|
jpayne@68
|
43 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
|
jpayne@68
|
44 final int oldBufLen=Shared.bufferLen();
|
jpayne@68
|
45
|
jpayne@68
|
46 //Create an instance of this class
|
jpayne@68
|
47 SubSketch x=new SubSketch(args);
|
jpayne@68
|
48
|
jpayne@68
|
49 //Run the object
|
jpayne@68
|
50 x.process(t);
|
jpayne@68
|
51
|
jpayne@68
|
52 ReadWrite.USE_UNPIGZ=oldUnpigz;
|
jpayne@68
|
53 Shared.setBufferLen(oldBufLen);
|
jpayne@68
|
54
|
jpayne@68
|
55 //Close the print stream if it was redirected
|
jpayne@68
|
56 Shared.closeStream(x.outstream);
|
jpayne@68
|
57
|
jpayne@68
|
58 assert(!x.errorState) : "This program ended in an error state.";
|
jpayne@68
|
59 }
|
jpayne@68
|
60
|
jpayne@68
|
61 /**
|
jpayne@68
|
62 * Constructor.
|
jpayne@68
|
63 * @param args Command line arguments
|
jpayne@68
|
64 */
|
jpayne@68
|
65 public SubSketch(String[] args){
|
jpayne@68
|
66
|
jpayne@68
|
67 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
68 PreParser pp=new PreParser(args, null, false);
|
jpayne@68
|
69 args=pp.args;
|
jpayne@68
|
70 outstream=pp.outstream;
|
jpayne@68
|
71 }
|
jpayne@68
|
72
|
jpayne@68
|
73 //Set shared static variables
|
jpayne@68
|
74 ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
75 KILL_OK=true;
|
jpayne@68
|
76
|
jpayne@68
|
77 //Create a parser object
|
jpayne@68
|
78 Parser parser=new Parser();
|
jpayne@68
|
79
|
jpayne@68
|
80 defaultParams.printRefFileName=true;
|
jpayne@68
|
81
|
jpayne@68
|
82 //Parse each argument
|
jpayne@68
|
83 for(int i=0; i<args.length; i++){
|
jpayne@68
|
84 String arg=args[i];
|
jpayne@68
|
85
|
jpayne@68
|
86 //Break arguments into their constituent parts, in the form of "a=b"
|
jpayne@68
|
87 String[] split=arg.split("=");
|
jpayne@68
|
88 String a=split[0].toLowerCase();
|
jpayne@68
|
89 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
90
|
jpayne@68
|
91 if(a.equals("verbose")){
|
jpayne@68
|
92 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
93 }else if(a.equals("in")){
|
jpayne@68
|
94 addFiles(b, in);
|
jpayne@68
|
95 }else if(a.equals("files")){
|
jpayne@68
|
96 files=Integer.parseInt(b);
|
jpayne@68
|
97 }else if(parseSketchFlags(arg, a, b)){
|
jpayne@68
|
98 //Do nothing
|
jpayne@68
|
99 }else if(defaultParams.parse(arg, a, b)){
|
jpayne@68
|
100 //Do nothing
|
jpayne@68
|
101 }
|
jpayne@68
|
102 // else if(a.equals("size")){
|
jpayne@68
|
103 // size=Parse.parseIntKMG(b);
|
jpayne@68
|
104 // }
|
jpayne@68
|
105
|
jpayne@68
|
106 else if(a.equals("parse_flag_goes_here")){
|
jpayne@68
|
107 long fake_variable=Parse.parseKMG(b);
|
jpayne@68
|
108 //Set a variable here
|
jpayne@68
|
109 }
|
jpayne@68
|
110
|
jpayne@68
|
111 else if(a.equals("out") || a.equals("outsketch") || a.equals("outs") || a.equals("sketchout") || a.equals("sketch")){
|
jpayne@68
|
112 outSketch=b;
|
jpayne@68
|
113 }
|
jpayne@68
|
114
|
jpayne@68
|
115 else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
|
jpayne@68
|
116 //do nothing
|
jpayne@68
|
117 }
|
jpayne@68
|
118
|
jpayne@68
|
119 else if(b==null && new File(arg).exists()){
|
jpayne@68
|
120 in.add(arg);
|
jpayne@68
|
121 }
|
jpayne@68
|
122
|
jpayne@68
|
123 else{
|
jpayne@68
|
124 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
125 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
126 }
|
jpayne@68
|
127 }
|
jpayne@68
|
128 assert(targetSketchSize>0) : "Must set size.";
|
jpayne@68
|
129
|
jpayne@68
|
130 {//Expand # symbol
|
jpayne@68
|
131 LinkedHashSet<String> expanded=new LinkedHashSet<String>();
|
jpayne@68
|
132 for(String s : in){SketchSearcher.addFiles(s, expanded);}
|
jpayne@68
|
133 in.clear();
|
jpayne@68
|
134 in.addAll(expanded);
|
jpayne@68
|
135 }
|
jpayne@68
|
136
|
jpayne@68
|
137 postParse();
|
jpayne@68
|
138
|
jpayne@68
|
139 {//Process parser fields
|
jpayne@68
|
140 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
141 append=ReadStats.append=parser.append;
|
jpayne@68
|
142 }
|
jpayne@68
|
143
|
jpayne@68
|
144 //Ensure there is an input file
|
jpayne@68
|
145 if(in.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
146
|
jpayne@68
|
147 //Adjust the number of threads for input file reading
|
jpayne@68
|
148 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
149 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
150 }
|
jpayne@68
|
151
|
jpayne@68
|
152 if(!Tools.testOutputFiles(overwrite, append, false, outSketch)){
|
jpayne@68
|
153 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+outSketch+"\n");
|
jpayne@68
|
154 }
|
jpayne@68
|
155 // assert(false) : ffout;
|
jpayne@68
|
156
|
jpayne@68
|
157 //Ensure that no file was specified multiple times
|
jpayne@68
|
158 if(!Tools.testForDuplicateFiles(true, in.toArray(new String[0]))){
|
jpayne@68
|
159 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
160 }
|
jpayne@68
|
161
|
jpayne@68
|
162 tool=new SketchTool(targetSketchSize, defaultParams);
|
jpayne@68
|
163
|
jpayne@68
|
164 // assert(false) : defaultParams.toString()+"\n"+k+", "+amino+", "+HASH_VERSION;
|
jpayne@68
|
165 if(verbose || true){
|
jpayne@68
|
166 if(useWhitelist){outstream.println("Using a whitelist.");}
|
jpayne@68
|
167 if(blacklist!=null){outstream.println("Using a blacklist.");}
|
jpayne@68
|
168 }
|
jpayne@68
|
169
|
jpayne@68
|
170 defaultParams.postParse(false, false);
|
jpayne@68
|
171 allowMultithreadedFastq=(in.size()==1 && Shared.threads()>2);
|
jpayne@68
|
172 if(!allowMultithreadedFastq){Shared.capBufferLen(40);}
|
jpayne@68
|
173 }
|
jpayne@68
|
174
|
jpayne@68
|
175 /*--------------------------------------------------------------*/
|
jpayne@68
|
176 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
177 /*--------------------------------------------------------------*/
|
jpayne@68
|
178
|
jpayne@68
|
179 private void process(Timer t){
|
jpayne@68
|
180 Timer ttotal=new Timer();
|
jpayne@68
|
181
|
jpayne@68
|
182 t.start();
|
jpayne@68
|
183 inSketches=tool.loadSketches_MT(defaultParams, in);
|
jpayne@68
|
184 final int numLoaded=(inSketches.size());
|
jpayne@68
|
185 long sum=0;
|
jpayne@68
|
186 for(Sketch sk : inSketches){
|
jpayne@68
|
187 sum+=sk.length();
|
jpayne@68
|
188 }
|
jpayne@68
|
189 t.stop();
|
jpayne@68
|
190 outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" of total size "+sum+" in "+t);
|
jpayne@68
|
191 t.start();
|
jpayne@68
|
192 if(verbose && numLoaded>0){
|
jpayne@68
|
193 System.err.println("First sketch:\n"+inSketches.get(0));
|
jpayne@68
|
194 }
|
jpayne@68
|
195 // outstream.println(inSketches.get(0));
|
jpayne@68
|
196
|
jpayne@68
|
197 int sizeOut=Sketch.targetSketchSize;
|
jpayne@68
|
198 {
|
jpayne@68
|
199 if(Sketch.SET_TARGET_SIZE){Sketch.AUTOSIZE=false;}
|
jpayne@68
|
200 Sketch.targetSketchSize=sizeOut;
|
jpayne@68
|
201 Sketch.maxGenomeFraction=1;
|
jpayne@68
|
202 }
|
jpayne@68
|
203
|
jpayne@68
|
204 if(outSketch!=null && outSketch.indexOf('#')>=1 && files>1){
|
jpayne@68
|
205 ByteStreamWriter[] bswArray=new ByteStreamWriter[files];
|
jpayne@68
|
206 for(int i=0; i<files; i++){
|
jpayne@68
|
207 FileFormat ffout=FileFormat.testOutput(outSketch.replace("#", ""+i), FileFormat.SKETCH, null, false, overwrite, append, false);
|
jpayne@68
|
208 ByteStreamWriter bsw=new ByteStreamWriter(ffout);
|
jpayne@68
|
209 bsw.start();
|
jpayne@68
|
210 bswArray[i]=bsw;
|
jpayne@68
|
211 }
|
jpayne@68
|
212
|
jpayne@68
|
213 processInner(inSketches, bswArray);
|
jpayne@68
|
214
|
jpayne@68
|
215 for(ByteStreamWriter bsw : bswArray){
|
jpayne@68
|
216 bsw.poisonAndWait();
|
jpayne@68
|
217 errorState|=bsw.errorState;
|
jpayne@68
|
218 }
|
jpayne@68
|
219 }else{
|
jpayne@68
|
220 FileFormat ffout=FileFormat.testOutput(outSketch, FileFormat.SKETCH, null, false, overwrite, append, false);
|
jpayne@68
|
221 ByteStreamWriter bsw=null;
|
jpayne@68
|
222 if(ffout!=null){
|
jpayne@68
|
223 bsw=new ByteStreamWriter(ffout);
|
jpayne@68
|
224 bsw.start();
|
jpayne@68
|
225 }
|
jpayne@68
|
226
|
jpayne@68
|
227 processInner(inSketches, bsw);
|
jpayne@68
|
228
|
jpayne@68
|
229 if(bsw!=null){
|
jpayne@68
|
230 bsw.poisonAndWait();
|
jpayne@68
|
231 errorState|=bsw.errorState;
|
jpayne@68
|
232 }
|
jpayne@68
|
233 }
|
jpayne@68
|
234
|
jpayne@68
|
235 t.stop();
|
jpayne@68
|
236 if(blacklist!=null){outstream.println("Evicted "+blackKeys+" blacklisted keys.");}
|
jpayne@68
|
237 outstream.println("Wrote "+sketchesOut+" sketches of total size "+keysOut+" in "+t);
|
jpayne@68
|
238
|
jpayne@68
|
239 t.stop();
|
jpayne@68
|
240 ttotal.stop();
|
jpayne@68
|
241 outstream.println("Total Time: \t"+ttotal);
|
jpayne@68
|
242 }
|
jpayne@68
|
243
|
jpayne@68
|
244 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bsw){
|
jpayne@68
|
245 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
246 for(Sketch sk : sketches){
|
jpayne@68
|
247 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
|
jpayne@68
|
248 // if(!defaultParams.trackCounts()){sk.keyCounts=null;}
|
jpayne@68
|
249 if(blacklist!=null){blackKeys+=sk.applyBlacklist();}
|
jpayne@68
|
250 if(sk.length()>target){
|
jpayne@68
|
251 sk.resize(target);
|
jpayne@68
|
252 if(verbose){System.err.println("Resized to:\n"+sk);}
|
jpayne@68
|
253 }
|
jpayne@68
|
254 if(sk.length()>=minSketchSize){
|
jpayne@68
|
255 keysOut+=sk.length();
|
jpayne@68
|
256 sketchesOut++;
|
jpayne@68
|
257 sk.toBytes(bb);
|
jpayne@68
|
258 if(verbose){System.err.println("toBytes:\n"+bb);}
|
jpayne@68
|
259 if(bsw!=null){bsw.print(bb);}
|
jpayne@68
|
260 bb.clear();
|
jpayne@68
|
261 }
|
jpayne@68
|
262 }
|
jpayne@68
|
263 }
|
jpayne@68
|
264
|
jpayne@68
|
265 void processInner(ArrayList<Sketch> sketches, ByteStreamWriter bswa[]){
|
jpayne@68
|
266 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
267 for(Sketch sk : sketches){
|
jpayne@68
|
268 //final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
|
jpayne@68
|
269 // if(!defaultParams.trackCounts()){sk.keyCounts=null;}
|
jpayne@68
|
270 if(blacklist!=null){blackKeys+=sk.applyBlacklist();}
|
jpayne@68
|
271
|
jpayne@68
|
272 //Calculating target after applying blacklist gives better consistency with actual usage
|
jpayne@68
|
273 final int target=Sketch.AUTOSIZE ? toSketchSize(sk.genomeSizeBases, sk.genomeSizeKmers, sk.genomeSizeEstimate(), targetSketchSize) : targetSketchSize;
|
jpayne@68
|
274
|
jpayne@68
|
275 if(sk.length()>target){
|
jpayne@68
|
276 sk.resize(target);
|
jpayne@68
|
277 if(verbose){System.err.println("Resized to:\n"+sk);}
|
jpayne@68
|
278 }
|
jpayne@68
|
279 if(sk.length()>=minSketchSize){
|
jpayne@68
|
280 keysOut+=sk.length();
|
jpayne@68
|
281 sketchesOut++;
|
jpayne@68
|
282
|
jpayne@68
|
283 if(bswa!=null){
|
jpayne@68
|
284 ByteStreamWriter bsw=bswa[sk.sketchID%files];
|
jpayne@68
|
285 if(sk.fname()!=null && sk.fname().endsWith(".sketch")){sk.setFname(bsw.fname);}
|
jpayne@68
|
286 sk.toBytes(bb);//This is the time-limiting factor; could be multithreaded.
|
jpayne@68
|
287 if(verbose){System.err.println("toBytes:\n"+bb);}
|
jpayne@68
|
288 bsw.print(bb);
|
jpayne@68
|
289 }
|
jpayne@68
|
290 bb.clear();
|
jpayne@68
|
291 }
|
jpayne@68
|
292 }
|
jpayne@68
|
293 }
|
jpayne@68
|
294
|
jpayne@68
|
295 /*--------------------------------------------------------------*/
|
jpayne@68
|
296 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
297 /*--------------------------------------------------------------*/
|
jpayne@68
|
298
|
jpayne@68
|
299 private static boolean addFiles(String a, Collection<String> list){
|
jpayne@68
|
300 int initial=list.size();
|
jpayne@68
|
301 if(a==null){return false;}
|
jpayne@68
|
302 File f=null;
|
jpayne@68
|
303 if(a.indexOf(',')>=0){f=new File(a);}
|
jpayne@68
|
304 if(f==null || f.exists()){
|
jpayne@68
|
305 list.add(a);
|
jpayne@68
|
306 }else{
|
jpayne@68
|
307 for(String s : a.split(",")){
|
jpayne@68
|
308 list.add(s);
|
jpayne@68
|
309 }
|
jpayne@68
|
310 }
|
jpayne@68
|
311 return list.size()>initial;
|
jpayne@68
|
312 }
|
jpayne@68
|
313
|
jpayne@68
|
314 /*--------------------------------------------------------------*/
|
jpayne@68
|
315 /*---------------- Fields ----------------*/
|
jpayne@68
|
316 /*--------------------------------------------------------------*/
|
jpayne@68
|
317
|
jpayne@68
|
318 private LinkedHashSet<String> in=new LinkedHashSet<String>();
|
jpayne@68
|
319
|
jpayne@68
|
320 private String outSketch=null;
|
jpayne@68
|
321
|
jpayne@68
|
322 private final SketchTool tool;
|
jpayne@68
|
323
|
jpayne@68
|
324 private ArrayList<Sketch> inSketches;
|
jpayne@68
|
325
|
jpayne@68
|
326 private long keysOut=0;
|
jpayne@68
|
327 private long sketchesOut=0;
|
jpayne@68
|
328 private long blackKeys=0;
|
jpayne@68
|
329
|
jpayne@68
|
330 private int files=31;
|
jpayne@68
|
331
|
jpayne@68
|
332 /*--------------------------------------------------------------*/
|
jpayne@68
|
333 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
334 /*--------------------------------------------------------------*/
|
jpayne@68
|
335
|
jpayne@68
|
336 /*--------------------------------------------------------------*/
|
jpayne@68
|
337 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
338 /*--------------------------------------------------------------*/
|
jpayne@68
|
339
|
jpayne@68
|
340 /** Print status messages to this output stream */
|
jpayne@68
|
341 private PrintStream outstream=System.err;
|
jpayne@68
|
342 /** Print verbose messages */
|
jpayne@68
|
343 public static boolean verbose=false;
|
jpayne@68
|
344 /** True if an error was encountered */
|
jpayne@68
|
345 public boolean errorState=false;
|
jpayne@68
|
346 /** Overwrite existing output files */
|
jpayne@68
|
347 private boolean overwrite=false;
|
jpayne@68
|
348 /** Append to existing output files */
|
jpayne@68
|
349 private boolean append=false;
|
jpayne@68
|
350
|
jpayne@68
|
351 /*--------------------------------------------------------------*/
|
jpayne@68
|
352 /*---------------- Static Fields ----------------*/
|
jpayne@68
|
353 /*--------------------------------------------------------------*/
|
jpayne@68
|
354
|
jpayne@68
|
355 /** Don't print caught exceptions */
|
jpayne@68
|
356 public static boolean suppressErrors=false;
|
jpayne@68
|
357
|
jpayne@68
|
358 }
|