Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/sketch/InvertKey.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package sketch; | |
2 | |
3 import java.io.File; | |
4 import java.io.PrintStream; | |
5 import java.util.ArrayList; | |
6 | |
7 import dna.AminoAcid; | |
8 import fileIO.ByteFile; | |
9 import fileIO.ByteFile1; | |
10 import fileIO.ByteFile2; | |
11 import fileIO.ByteStreamWriter; | |
12 import fileIO.FileFormat; | |
13 import fileIO.ReadWrite; | |
14 import shared.Parse; | |
15 import shared.Parser; | |
16 import shared.PreParser; | |
17 import shared.ReadStats; | |
18 import shared.Shared; | |
19 import shared.Timer; | |
20 import shared.Tools; | |
21 import stream.ConcurrentGenericReadInputStream; | |
22 import stream.ConcurrentReadInputStream; | |
23 import stream.FASTQ; | |
24 import stream.FastaReadInputStream; | |
25 import stream.Read; | |
26 import structures.ListNum; | |
27 import structures.LongHashSet; | |
28 | |
29 /** | |
30 * @author Brian Bushnell | |
31 * @date Oct 17, 2014 | |
32 * | |
33 */ | |
34 public class InvertKey extends SketchObject { | |
35 | |
36 public static void main(String[] args){ | |
37 Timer t=new Timer(); | |
38 InvertKey x=new InvertKey(args); | |
39 x.process(t); | |
40 | |
41 //Close the print stream if it was redirected | |
42 Shared.closeStream(x.outstream); | |
43 } | |
44 | |
45 public InvertKey(String[] args){ | |
46 | |
47 {//Preparse block for help, config files, and outstream | |
48 PreParser pp=new PreParser(args, getClass(), false); | |
49 args=pp.args; | |
50 outstream=pp.outstream; | |
51 } | |
52 | |
53 Shared.capBuffers(4); | |
54 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; | |
55 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); | |
56 | |
57 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false; | |
58 int k_=32, k2_=0; | |
59 | |
60 Parser parser=new Parser(); | |
61 for(int i=0; i<args.length; i++){ | |
62 String arg=args[i]; | |
63 String[] split=arg.split("="); | |
64 String a=split[0].toLowerCase(); | |
65 String b=split.length>1 ? split[1] : null; | |
66 | |
67 if(a.equals("verbose")){ | |
68 verbose=Parse.parseBoolean(b); | |
69 ByteFile1.verbose=verbose; | |
70 ByteFile2.verbose=verbose; | |
71 stream.FastaReadInputStream.verbose=verbose; | |
72 ConcurrentGenericReadInputStream.verbose=verbose; | |
73 stream.FastqReadInputStream.verbose=verbose; | |
74 ReadWrite.verbose=verbose; | |
75 }else if(a.equals("key")){ | |
76 keyString=b; | |
77 }else if(a.equals("out")){ | |
78 out1=b; | |
79 }else if(a.equalsIgnoreCase("k")){ | |
80 assert(b!=null) : "Bad parameter: "+arg; | |
81 if(b.indexOf(',')>=0){ | |
82 String[] bsplit=b.split(","); | |
83 assert(bsplit.length==2) : "Bad argument "+arg; | |
84 int x=Integer.parseInt(bsplit[0]); | |
85 int y=Integer.parseInt(bsplit[1]); | |
86 k_=Tools.max(x, y); | |
87 k2_=Tools.min(x, y); | |
88 if(k_==k2_){k2_=0;} | |
89 }else{ | |
90 k_=Integer.parseInt(b); | |
91 k2_=0; | |
92 } | |
93 }else if(a.equalsIgnoreCase("printonce")){ | |
94 printOnce=Parse.parseBoolean(b); | |
95 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ | |
96 parser.in1=arg; | |
97 }else if(parser.out1==null && i==1 && !arg.contains("=")){ | |
98 out1=arg; | |
99 }else if(parser.parse(arg, a, b)){ | |
100 //do nothing | |
101 }else{ | |
102 outstream.println("Unknown parameter "+args[i]); | |
103 assert(false) : "Unknown parameter "+args[i]; | |
104 // throw new RuntimeException("Unknown parameter "+args[i]); | |
105 } | |
106 } | |
107 | |
108 k=k_; | |
109 k2=k2_; | |
110 shift=2*k; | |
111 shift2=shift-2; | |
112 mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32 | |
113 | |
114 {//Process parser fields | |
115 Parser.processQuality(); | |
116 | |
117 maxReads=parser.maxReads; | |
118 | |
119 overwrite=ReadStats.overwrite=parser.overwrite; | |
120 append=ReadStats.append=parser.append; | |
121 | |
122 in1=parser.in1; | |
123 } | |
124 | |
125 assert(FastaReadInputStream.settingsOK()); | |
126 | |
127 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} | |
128 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ | |
129 ByteFile.FORCE_MODE_BF2=false; | |
130 ByteFile.FORCE_MODE_BF1=true; | |
131 } | |
132 | |
133 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;} | |
134 | |
135 if(!Tools.testOutputFiles(overwrite, append, false, out1)){ | |
136 outstream.println((out1==null)+", "+out1); | |
137 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n"); | |
138 } | |
139 | |
140 ffout1=FileFormat.testOutput(out1, FileFormat.FASTA, null, true, overwrite, append, false); | |
141 | |
142 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, null, true, true); | |
143 | |
144 SketchObject.postParse(); | |
145 | |
146 if(keyString.indexOf(',')>0){ | |
147 String[] split=keyString.split(","); | |
148 set=new LongHashSet(split.length*2); | |
149 for(String s : split){ | |
150 long x=Long.MAX_VALUE-Sketch.parseA48(s); | |
151 set.add(x); | |
152 // assert(set.contains(x)) : x+", "+set.size()+", "+set.toStringListView(); | |
153 } | |
154 key0=-1; | |
155 // System.err.println(set.toStringListView()+", "+set.size()); | |
156 assert(!set.isEmpty()); | |
157 }else if(keyString.endsWith(".sketch")){ | |
158 SketchTool tool=new SketchTool(10000, 0, false, false); | |
159 Sketch sk=tool.loadSketchesFromFile(keyString, null, 0, 1000000, SketchObject.ONE_SKETCH, 1f, 0f, 0f, (byte)0, false).get(0); | |
160 set=new LongHashSet(sk.length()*2); | |
161 for(long x : sk.keys){set.add(Long.MAX_VALUE-x);} | |
162 key0=-1; | |
163 // System.err.println(set.toStringListView()+", "+set.size()); | |
164 assert(!set.isEmpty()); | |
165 }else{ | |
166 key0=Long.MAX_VALUE-Sketch.parseA48(keyString); | |
167 set=null; | |
168 // System.err.println(key0); | |
169 } | |
170 } | |
171 | |
172 void process(Timer t){ | |
173 | |
174 final ConcurrentReadInputStream cris; | |
175 { | |
176 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null, null, null); | |
177 cris.start(); | |
178 if(verbose){outstream.println("Started cris");} | |
179 } | |
180 boolean paired=cris.paired(); | |
181 // if(verbose){ | |
182 if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));} | |
183 // } | |
184 | |
185 final ByteStreamWriter bsw; | |
186 if(out1!=null){ | |
187 fasta=ffout1.fasta() && !out1.endsWith(".txt"); | |
188 bsw=new ByteStreamWriter(ffout1); | |
189 bsw.start(); | |
190 }else{bsw=null;} | |
191 | |
192 long readsProcessed=0; | |
193 long basesProcessed=0; | |
194 boolean finished=false; | |
195 | |
196 { | |
197 | |
198 ListNum<Read> ln=cris.nextList(); | |
199 ArrayList<Read> reads=(ln!=null ? ln.list : null); | |
200 | |
201 // outstream.println("Fetched "+reads); | |
202 | |
203 if(reads!=null && !reads.isEmpty()){ | |
204 Read r=reads.get(0); | |
205 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); | |
206 } | |
207 | |
208 while(reads!=null && reads.size()>0 && !finished){ | |
209 | |
210 for(int idx=0; idx<reads.size() && !finished; idx++){ | |
211 final Read r1=reads.get(idx); | |
212 | |
213 finished=invert(key0, r1, bsw); | |
214 | |
215 final int initialLength1=r1.length(); | |
216 | |
217 readsProcessed++; | |
218 basesProcessed+=initialLength1; | |
219 } | |
220 | |
221 cris.returnList(ln); | |
222 ln=cris.nextList(); | |
223 reads=(ln!=null ? ln.list : null); | |
224 } | |
225 if(ln!=null){ | |
226 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); | |
227 } | |
228 } | |
229 | |
230 errorState|=(ReadWrite.closeStream(cris)); | |
231 if(bsw!=null){errorState|=bsw.poisonAndWait();} | |
232 | |
233 t.stop(); | |
234 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); | |
235 | |
236 if(errorState && !finished && maxReads<1){ | |
237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); | |
238 } | |
239 } | |
240 | |
241 private boolean invert(long key2, Read r, ByteStreamWriter bsw) { | |
242 final byte[] bases=r.bases; | |
243 | |
244 long kmer=0; | |
245 long rkmer=0; | |
246 int len=0; | |
247 | |
248 | |
249 // System.err.println("Looking for "+key+"\t"+Sketch.toA48(key)+"\t"+Sketch.toA48(Long.MAX_VALUE-key)); | |
250 | |
251 for(int i=0; i<bases.length; i++){ | |
252 byte b=bases[i]; | |
253 long x=AminoAcid.baseToNumber[b]; | |
254 long x2=AminoAcid.baseToComplementNumber[b]; | |
255 kmer=((kmer<<2)|x)&mask; | |
256 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; | |
257 if(x<0){len=0; rkmer=0;}else{len++;} | |
258 if(len>=k){ | |
259 kmersProcessed++; | |
260 final long hashcode=hash(kmer, rkmer); | |
261 boolean found=(key0>=0 ? hashcode==key0 : set.contains(hashcode)); | |
262 if(found){ | |
263 if(fasta){bsw.println(">"+Sketch.toA48(Long.MAX_VALUE-hashcode)+" "+(i-k+1)+" "+r.id);} | |
264 bsw.println(AminoAcid.kmerToString(Tools.min(kmer, rkmer), k)); | |
265 if(printOnce){ | |
266 if(key0>=0){return true;} | |
267 else{ | |
268 set.remove(hashcode); | |
269 return set.isEmpty(); | |
270 } | |
271 } | |
272 } | |
273 } | |
274 } | |
275 return false; | |
276 } | |
277 | |
278 /*--------------------------------------------------------------*/ | |
279 | |
280 final long key0; | |
281 final LongHashSet set; | |
282 | |
283 final int shift; | |
284 final int shift2; | |
285 final long mask; | |
286 | |
287 boolean printOnce=true; | |
288 long kmersProcessed=0; | |
289 | |
290 private String in1=null; | |
291 boolean fasta; | |
292 boolean sketch; | |
293 private String keyString=null; | |
294 | |
295 private String out1="stdout.fa"; | |
296 | |
297 /*--------------------------------------------------------------*/ | |
298 | |
299 private long maxReads=-1; | |
300 | |
301 /*--------------------------------------------------------------*/ | |
302 | |
303 private final FileFormat ffin1; | |
304 | |
305 private final FileFormat ffout1; | |
306 | |
307 | |
308 /*--------------------------------------------------------------*/ | |
309 | |
310 private PrintStream outstream=System.err; | |
311 public static boolean verbose=false; | |
312 public boolean errorState=false; | |
313 private boolean overwrite=false; | |
314 private boolean append=false; | |
315 | |
316 } |