jpayne@68
|
1 package sketch;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6
|
jpayne@68
|
7 import dna.AminoAcid;
|
jpayne@68
|
8 import fileIO.ByteFile;
|
jpayne@68
|
9 import fileIO.ByteFile1;
|
jpayne@68
|
10 import fileIO.ByteFile2;
|
jpayne@68
|
11 import fileIO.ByteStreamWriter;
|
jpayne@68
|
12 import fileIO.FileFormat;
|
jpayne@68
|
13 import fileIO.ReadWrite;
|
jpayne@68
|
14 import shared.Parse;
|
jpayne@68
|
15 import shared.Parser;
|
jpayne@68
|
16 import shared.PreParser;
|
jpayne@68
|
17 import shared.ReadStats;
|
jpayne@68
|
18 import shared.Shared;
|
jpayne@68
|
19 import shared.Timer;
|
jpayne@68
|
20 import shared.Tools;
|
jpayne@68
|
21 import stream.ConcurrentGenericReadInputStream;
|
jpayne@68
|
22 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
23 import stream.FASTQ;
|
jpayne@68
|
24 import stream.FastaReadInputStream;
|
jpayne@68
|
25 import stream.Read;
|
jpayne@68
|
26 import structures.ListNum;
|
jpayne@68
|
27 import structures.LongHashSet;
|
jpayne@68
|
28
|
jpayne@68
|
29 /**
|
jpayne@68
|
30 * @author Brian Bushnell
|
jpayne@68
|
31 * @date Oct 17, 2014
|
jpayne@68
|
32 *
|
jpayne@68
|
33 */
|
jpayne@68
|
34 public class InvertKey extends SketchObject {
|
jpayne@68
|
35
|
jpayne@68
|
36 public static void main(String[] args){
|
jpayne@68
|
37 Timer t=new Timer();
|
jpayne@68
|
38 InvertKey x=new InvertKey(args);
|
jpayne@68
|
39 x.process(t);
|
jpayne@68
|
40
|
jpayne@68
|
41 //Close the print stream if it was redirected
|
jpayne@68
|
42 Shared.closeStream(x.outstream);
|
jpayne@68
|
43 }
|
jpayne@68
|
44
|
jpayne@68
|
45 public InvertKey(String[] args){
|
jpayne@68
|
46
|
jpayne@68
|
47 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
48 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
49 args=pp.args;
|
jpayne@68
|
50 outstream=pp.outstream;
|
jpayne@68
|
51 }
|
jpayne@68
|
52
|
jpayne@68
|
53 Shared.capBuffers(4);
|
jpayne@68
|
54 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
55 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
56
|
jpayne@68
|
57 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
|
jpayne@68
|
58 int k_=32, k2_=0;
|
jpayne@68
|
59
|
jpayne@68
|
60 Parser parser=new Parser();
|
jpayne@68
|
61 for(int i=0; i<args.length; i++){
|
jpayne@68
|
62 String arg=args[i];
|
jpayne@68
|
63 String[] split=arg.split("=");
|
jpayne@68
|
64 String a=split[0].toLowerCase();
|
jpayne@68
|
65 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
66
|
jpayne@68
|
67 if(a.equals("verbose")){
|
jpayne@68
|
68 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
69 ByteFile1.verbose=verbose;
|
jpayne@68
|
70 ByteFile2.verbose=verbose;
|
jpayne@68
|
71 stream.FastaReadInputStream.verbose=verbose;
|
jpayne@68
|
72 ConcurrentGenericReadInputStream.verbose=verbose;
|
jpayne@68
|
73 stream.FastqReadInputStream.verbose=verbose;
|
jpayne@68
|
74 ReadWrite.verbose=verbose;
|
jpayne@68
|
75 }else if(a.equals("key")){
|
jpayne@68
|
76 keyString=b;
|
jpayne@68
|
77 }else if(a.equals("out")){
|
jpayne@68
|
78 out1=b;
|
jpayne@68
|
79 }else if(a.equalsIgnoreCase("k")){
|
jpayne@68
|
80 assert(b!=null) : "Bad parameter: "+arg;
|
jpayne@68
|
81 if(b.indexOf(',')>=0){
|
jpayne@68
|
82 String[] bsplit=b.split(",");
|
jpayne@68
|
83 assert(bsplit.length==2) : "Bad argument "+arg;
|
jpayne@68
|
84 int x=Integer.parseInt(bsplit[0]);
|
jpayne@68
|
85 int y=Integer.parseInt(bsplit[1]);
|
jpayne@68
|
86 k_=Tools.max(x, y);
|
jpayne@68
|
87 k2_=Tools.min(x, y);
|
jpayne@68
|
88 if(k_==k2_){k2_=0;}
|
jpayne@68
|
89 }else{
|
jpayne@68
|
90 k_=Integer.parseInt(b);
|
jpayne@68
|
91 k2_=0;
|
jpayne@68
|
92 }
|
jpayne@68
|
93 }else if(a.equalsIgnoreCase("printonce")){
|
jpayne@68
|
94 printOnce=Parse.parseBoolean(b);
|
jpayne@68
|
95 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
|
jpayne@68
|
96 parser.in1=arg;
|
jpayne@68
|
97 }else if(parser.out1==null && i==1 && !arg.contains("=")){
|
jpayne@68
|
98 out1=arg;
|
jpayne@68
|
99 }else if(parser.parse(arg, a, b)){
|
jpayne@68
|
100 //do nothing
|
jpayne@68
|
101 }else{
|
jpayne@68
|
102 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
103 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
104 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
105 }
|
jpayne@68
|
106 }
|
jpayne@68
|
107
|
jpayne@68
|
108 k=k_;
|
jpayne@68
|
109 k2=k2_;
|
jpayne@68
|
110 shift=2*k;
|
jpayne@68
|
111 shift2=shift-2;
|
jpayne@68
|
112 mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32
|
jpayne@68
|
113
|
jpayne@68
|
114 {//Process parser fields
|
jpayne@68
|
115 Parser.processQuality();
|
jpayne@68
|
116
|
jpayne@68
|
117 maxReads=parser.maxReads;
|
jpayne@68
|
118
|
jpayne@68
|
119 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
120 append=ReadStats.append=parser.append;
|
jpayne@68
|
121
|
jpayne@68
|
122 in1=parser.in1;
|
jpayne@68
|
123 }
|
jpayne@68
|
124
|
jpayne@68
|
125 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
126
|
jpayne@68
|
127 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
128 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
|
jpayne@68
|
129 ByteFile.FORCE_MODE_BF2=false;
|
jpayne@68
|
130 ByteFile.FORCE_MODE_BF1=true;
|
jpayne@68
|
131 }
|
jpayne@68
|
132
|
jpayne@68
|
133 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
|
jpayne@68
|
134
|
jpayne@68
|
135 if(!Tools.testOutputFiles(overwrite, append, false, out1)){
|
jpayne@68
|
136 outstream.println((out1==null)+", "+out1);
|
jpayne@68
|
137 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
|
jpayne@68
|
138 }
|
jpayne@68
|
139
|
jpayne@68
|
140 ffout1=FileFormat.testOutput(out1, FileFormat.FASTA, null, true, overwrite, append, false);
|
jpayne@68
|
141
|
jpayne@68
|
142 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, null, true, true);
|
jpayne@68
|
143
|
jpayne@68
|
144 SketchObject.postParse();
|
jpayne@68
|
145
|
jpayne@68
|
146 if(keyString.indexOf(',')>0){
|
jpayne@68
|
147 String[] split=keyString.split(",");
|
jpayne@68
|
148 set=new LongHashSet(split.length*2);
|
jpayne@68
|
149 for(String s : split){
|
jpayne@68
|
150 long x=Long.MAX_VALUE-Sketch.parseA48(s);
|
jpayne@68
|
151 set.add(x);
|
jpayne@68
|
152 // assert(set.contains(x)) : x+", "+set.size()+", "+set.toStringListView();
|
jpayne@68
|
153 }
|
jpayne@68
|
154 key0=-1;
|
jpayne@68
|
155 // System.err.println(set.toStringListView()+", "+set.size());
|
jpayne@68
|
156 assert(!set.isEmpty());
|
jpayne@68
|
157 }else if(keyString.endsWith(".sketch")){
|
jpayne@68
|
158 SketchTool tool=new SketchTool(10000, 0, false, false);
|
jpayne@68
|
159 Sketch sk=tool.loadSketchesFromFile(keyString, null, 0, 1000000, SketchObject.ONE_SKETCH, 1f, 0f, 0f, (byte)0, false).get(0);
|
jpayne@68
|
160 set=new LongHashSet(sk.length()*2);
|
jpayne@68
|
161 for(long x : sk.keys){set.add(Long.MAX_VALUE-x);}
|
jpayne@68
|
162 key0=-1;
|
jpayne@68
|
163 // System.err.println(set.toStringListView()+", "+set.size());
|
jpayne@68
|
164 assert(!set.isEmpty());
|
jpayne@68
|
165 }else{
|
jpayne@68
|
166 key0=Long.MAX_VALUE-Sketch.parseA48(keyString);
|
jpayne@68
|
167 set=null;
|
jpayne@68
|
168 // System.err.println(key0);
|
jpayne@68
|
169 }
|
jpayne@68
|
170 }
|
jpayne@68
|
171
|
jpayne@68
|
172 void process(Timer t){
|
jpayne@68
|
173
|
jpayne@68
|
174 final ConcurrentReadInputStream cris;
|
jpayne@68
|
175 {
|
jpayne@68
|
176 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null, null, null);
|
jpayne@68
|
177 cris.start();
|
jpayne@68
|
178 if(verbose){outstream.println("Started cris");}
|
jpayne@68
|
179 }
|
jpayne@68
|
180 boolean paired=cris.paired();
|
jpayne@68
|
181 // if(verbose){
|
jpayne@68
|
182 if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));}
|
jpayne@68
|
183 // }
|
jpayne@68
|
184
|
jpayne@68
|
185 final ByteStreamWriter bsw;
|
jpayne@68
|
186 if(out1!=null){
|
jpayne@68
|
187 fasta=ffout1.fasta() && !out1.endsWith(".txt");
|
jpayne@68
|
188 bsw=new ByteStreamWriter(ffout1);
|
jpayne@68
|
189 bsw.start();
|
jpayne@68
|
190 }else{bsw=null;}
|
jpayne@68
|
191
|
jpayne@68
|
192 long readsProcessed=0;
|
jpayne@68
|
193 long basesProcessed=0;
|
jpayne@68
|
194 boolean finished=false;
|
jpayne@68
|
195
|
jpayne@68
|
196 {
|
jpayne@68
|
197
|
jpayne@68
|
198 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
199 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
200
|
jpayne@68
|
201 // outstream.println("Fetched "+reads);
|
jpayne@68
|
202
|
jpayne@68
|
203 if(reads!=null && !reads.isEmpty()){
|
jpayne@68
|
204 Read r=reads.get(0);
|
jpayne@68
|
205 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired());
|
jpayne@68
|
206 }
|
jpayne@68
|
207
|
jpayne@68
|
208 while(reads!=null && reads.size()>0 && !finished){
|
jpayne@68
|
209
|
jpayne@68
|
210 for(int idx=0; idx<reads.size() && !finished; idx++){
|
jpayne@68
|
211 final Read r1=reads.get(idx);
|
jpayne@68
|
212
|
jpayne@68
|
213 finished=invert(key0, r1, bsw);
|
jpayne@68
|
214
|
jpayne@68
|
215 final int initialLength1=r1.length();
|
jpayne@68
|
216
|
jpayne@68
|
217 readsProcessed++;
|
jpayne@68
|
218 basesProcessed+=initialLength1;
|
jpayne@68
|
219 }
|
jpayne@68
|
220
|
jpayne@68
|
221 cris.returnList(ln);
|
jpayne@68
|
222 ln=cris.nextList();
|
jpayne@68
|
223 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
224 }
|
jpayne@68
|
225 if(ln!=null){
|
jpayne@68
|
226 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
|
jpayne@68
|
227 }
|
jpayne@68
|
228 }
|
jpayne@68
|
229
|
jpayne@68
|
230 errorState|=(ReadWrite.closeStream(cris));
|
jpayne@68
|
231 if(bsw!=null){errorState|=bsw.poisonAndWait();}
|
jpayne@68
|
232
|
jpayne@68
|
233 t.stop();
|
jpayne@68
|
234 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
|
jpayne@68
|
235
|
jpayne@68
|
236 if(errorState && !finished && maxReads<1){
|
jpayne@68
|
237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
238 }
|
jpayne@68
|
239 }
|
jpayne@68
|
240
|
jpayne@68
|
241 private boolean invert(long key2, Read r, ByteStreamWriter bsw) {
|
jpayne@68
|
242 final byte[] bases=r.bases;
|
jpayne@68
|
243
|
jpayne@68
|
244 long kmer=0;
|
jpayne@68
|
245 long rkmer=0;
|
jpayne@68
|
246 int len=0;
|
jpayne@68
|
247
|
jpayne@68
|
248
|
jpayne@68
|
249 // System.err.println("Looking for "+key+"\t"+Sketch.toA48(key)+"\t"+Sketch.toA48(Long.MAX_VALUE-key));
|
jpayne@68
|
250
|
jpayne@68
|
251 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
252 byte b=bases[i];
|
jpayne@68
|
253 long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
254 long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
255 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
256 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
257 if(x<0){len=0; rkmer=0;}else{len++;}
|
jpayne@68
|
258 if(len>=k){
|
jpayne@68
|
259 kmersProcessed++;
|
jpayne@68
|
260 final long hashcode=hash(kmer, rkmer);
|
jpayne@68
|
261 boolean found=(key0>=0 ? hashcode==key0 : set.contains(hashcode));
|
jpayne@68
|
262 if(found){
|
jpayne@68
|
263 if(fasta){bsw.println(">"+Sketch.toA48(Long.MAX_VALUE-hashcode)+" "+(i-k+1)+" "+r.id);}
|
jpayne@68
|
264 bsw.println(AminoAcid.kmerToString(Tools.min(kmer, rkmer), k));
|
jpayne@68
|
265 if(printOnce){
|
jpayne@68
|
266 if(key0>=0){return true;}
|
jpayne@68
|
267 else{
|
jpayne@68
|
268 set.remove(hashcode);
|
jpayne@68
|
269 return set.isEmpty();
|
jpayne@68
|
270 }
|
jpayne@68
|
271 }
|
jpayne@68
|
272 }
|
jpayne@68
|
273 }
|
jpayne@68
|
274 }
|
jpayne@68
|
275 return false;
|
jpayne@68
|
276 }
|
jpayne@68
|
277
|
jpayne@68
|
278 /*--------------------------------------------------------------*/
|
jpayne@68
|
279
|
jpayne@68
|
280 final long key0;
|
jpayne@68
|
281 final LongHashSet set;
|
jpayne@68
|
282
|
jpayne@68
|
283 final int shift;
|
jpayne@68
|
284 final int shift2;
|
jpayne@68
|
285 final long mask;
|
jpayne@68
|
286
|
jpayne@68
|
287 boolean printOnce=true;
|
jpayne@68
|
288 long kmersProcessed=0;
|
jpayne@68
|
289
|
jpayne@68
|
290 private String in1=null;
|
jpayne@68
|
291 boolean fasta;
|
jpayne@68
|
292 boolean sketch;
|
jpayne@68
|
293 private String keyString=null;
|
jpayne@68
|
294
|
jpayne@68
|
295 private String out1="stdout.fa";
|
jpayne@68
|
296
|
jpayne@68
|
297 /*--------------------------------------------------------------*/
|
jpayne@68
|
298
|
jpayne@68
|
299 private long maxReads=-1;
|
jpayne@68
|
300
|
jpayne@68
|
301 /*--------------------------------------------------------------*/
|
jpayne@68
|
302
|
jpayne@68
|
303 private final FileFormat ffin1;
|
jpayne@68
|
304
|
jpayne@68
|
305 private final FileFormat ffout1;
|
jpayne@68
|
306
|
jpayne@68
|
307
|
jpayne@68
|
308 /*--------------------------------------------------------------*/
|
jpayne@68
|
309
|
jpayne@68
|
310 private PrintStream outstream=System.err;
|
jpayne@68
|
311 public static boolean verbose=false;
|
jpayne@68
|
312 public boolean errorState=false;
|
jpayne@68
|
313 private boolean overwrite=false;
|
jpayne@68
|
314 private boolean append=false;
|
jpayne@68
|
315
|
jpayne@68
|
316 }
|