jpayne@68
|
1 package kmer;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Arrays;
|
jpayne@68
|
6 import java.util.concurrent.ArrayBlockingQueue;
|
jpayne@68
|
7
|
jpayne@68
|
8 import dna.AminoAcid;
|
jpayne@68
|
9 import fileIO.FileFormat;
|
jpayne@68
|
10 import fileIO.ReadWrite;
|
jpayne@68
|
11 import fileIO.TextStreamWriter;
|
jpayne@68
|
12 import jgi.BBMerge;
|
jpayne@68
|
13 import shared.PreParser;
|
jpayne@68
|
14 import shared.Shared;
|
jpayne@68
|
15 import shared.Timer;
|
jpayne@68
|
16 import shared.Tools;
|
jpayne@68
|
17 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
18 import stream.Read;
|
jpayne@68
|
19 import structures.IntList;
|
jpayne@68
|
20 import structures.ListNum;
|
jpayne@68
|
21
|
jpayne@68
|
22 /**
|
jpayne@68
|
23 * @author Brian Bushnell
|
jpayne@68
|
24 * @date Mar 4, 2015
|
jpayne@68
|
25 *
|
jpayne@68
|
26 */
|
jpayne@68
|
27 public class TableLoaderLockFree {
|
jpayne@68
|
28
|
jpayne@68
|
29 /*--------------------------------------------------------------*/
|
jpayne@68
|
30 /*---------------- Initialization ----------------*/
|
jpayne@68
|
31 /*--------------------------------------------------------------*/
|
jpayne@68
|
32
|
jpayne@68
|
33 /**
|
jpayne@68
|
34 * Code entrance from the command line.
|
jpayne@68
|
35 * @param args Command line arguments
|
jpayne@68
|
36 */
|
jpayne@68
|
37 public static void main(String[] args){
|
jpayne@68
|
38
|
jpayne@68
|
39 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
40 PreParser pp=new PreParser(args, null, false);
|
jpayne@68
|
41 args=pp.args;
|
jpayne@68
|
42 outstream=pp.outstream;
|
jpayne@68
|
43 }
|
jpayne@68
|
44
|
jpayne@68
|
45 Timer t=new Timer();
|
jpayne@68
|
46
|
jpayne@68
|
47 AbstractKmerTable[] tables=makeTables(AbstractKmerTable.ARRAY1D, 12, -1L, false, 1.0);
|
jpayne@68
|
48
|
jpayne@68
|
49 int k=31;
|
jpayne@68
|
50 int mink=0;
|
jpayne@68
|
51 int speed=0;
|
jpayne@68
|
52 int hdist=0;
|
jpayne@68
|
53 int edist=0;
|
jpayne@68
|
54 boolean rcomp=true;
|
jpayne@68
|
55 boolean maskMiddle=false;
|
jpayne@68
|
56
|
jpayne@68
|
57 //Create a new Loader instance
|
jpayne@68
|
58 TableLoaderLockFree loader=new TableLoaderLockFree(tables, k, mink, speed, hdist, edist, rcomp, maskMiddle);
|
jpayne@68
|
59 loader.setRefSkip(0);
|
jpayne@68
|
60 loader.hammingDistance2=0;
|
jpayne@68
|
61 loader.editDistance2=0;
|
jpayne@68
|
62 loader.storeMode(SET_IF_NOT_PRESENT);
|
jpayne@68
|
63
|
jpayne@68
|
64 ///And run it
|
jpayne@68
|
65 String[] refs=args;
|
jpayne@68
|
66 String[] literals=null;
|
jpayne@68
|
67 boolean keepNames=false;
|
jpayne@68
|
68 boolean useRefNames=false;
|
jpayne@68
|
69 long kmers=loader.processData(refs, literals, keepNames, useRefNames, false);
|
jpayne@68
|
70 t.stop();
|
jpayne@68
|
71
|
jpayne@68
|
72 outstream.println("Time: \t"+t);
|
jpayne@68
|
73 outstream.println("Return: \t"+kmers);
|
jpayne@68
|
74 outstream.println("refKmers: \t"+loader.refKmers);
|
jpayne@68
|
75 outstream.println("refBases: \t"+loader.refBases);
|
jpayne@68
|
76 outstream.println("refReads: \t"+loader.refReads);
|
jpayne@68
|
77
|
jpayne@68
|
78 //Close the print stream if it was redirected
|
jpayne@68
|
79 Shared.closeStream(outstream);
|
jpayne@68
|
80 }
|
jpayne@68
|
81
|
jpayne@68
|
82 public TableLoaderLockFree(AbstractKmerTable[] tables_, int k_){
|
jpayne@68
|
83 this(tables_, k_, 0, 0, 0, 0, true, false);
|
jpayne@68
|
84 }
|
jpayne@68
|
85
|
jpayne@68
|
86 public TableLoaderLockFree(AbstractKmerTable[] tables_, int k_, int mink_, int speed_, int hdist_, int edist_, boolean rcomp_, boolean maskMiddle_){
|
jpayne@68
|
87 tables=tables_;
|
jpayne@68
|
88 k=k_;
|
jpayne@68
|
89 k2=k-1;
|
jpayne@68
|
90 mink=mink_;
|
jpayne@68
|
91 rcomp=rcomp_;
|
jpayne@68
|
92 useShortKmers=(mink>0 && mink<k);
|
jpayne@68
|
93 speed=speed_;
|
jpayne@68
|
94 hammingDistance=hdist_;
|
jpayne@68
|
95 editDistance=edist_;
|
jpayne@68
|
96 middleMask=maskMiddle ? ~(3L<<(2*(k/2))) : -1L;
|
jpayne@68
|
97 }
|
jpayne@68
|
98
|
jpayne@68
|
99
|
jpayne@68
|
100 /*--------------------------------------------------------------*/
|
jpayne@68
|
101 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
102 /*--------------------------------------------------------------*/
|
jpayne@68
|
103
|
jpayne@68
|
104
|
jpayne@68
|
105 // public static AbstractKmerTable[] makeTables(int tableType, int initialSize, long coreMask, boolean growable){
|
jpayne@68
|
106 // return AbstractKmerTable.preallocate(WAYS, tableType, initialSize, coreMask, growable);
|
jpayne@68
|
107 // }
|
jpayne@68
|
108
|
jpayne@68
|
109 // public static AbstractKmerTable[] makeTables(int tableType, int[] schedule, long coreMask){
|
jpayne@68
|
110 // return AbstractKmerTable.preallocate(WAYS, tableType, schedule, coreMask);
|
jpayne@68
|
111 // }
|
jpayne@68
|
112
|
jpayne@68
|
113 public static AbstractKmerTable[] makeTables(int tableType, int bytesPerKmer, long coreMask,
|
jpayne@68
|
114 boolean prealloc, double memRatio){
|
jpayne@68
|
115 ScheduleMaker scheduleMaker=new ScheduleMaker(WAYS, bytesPerKmer, prealloc, memRatio);
|
jpayne@68
|
116 int[] schedule=scheduleMaker.makeSchedule();
|
jpayne@68
|
117 return AbstractKmerTable.preallocate(WAYS, tableType, schedule, coreMask);
|
jpayne@68
|
118 }
|
jpayne@68
|
119
|
jpayne@68
|
120 ScheduleMaker scheduleMaker=new ScheduleMaker(WAYS, 12, false, 0.8);
|
jpayne@68
|
121 int[] schedule=scheduleMaker.makeSchedule();
|
jpayne@68
|
122
|
jpayne@68
|
123 public long processData(String[] ref, String[] literal, boolean keepNames, boolean useRefNames, boolean ecc_){
|
jpayne@68
|
124
|
jpayne@68
|
125 scaffoldNames=null;
|
jpayne@68
|
126 refNames=null;
|
jpayne@68
|
127 refScafCounts=null;
|
jpayne@68
|
128 scaffoldLengths=null;
|
jpayne@68
|
129 ecc=ecc_;
|
jpayne@68
|
130
|
jpayne@68
|
131 if(keepNames){
|
jpayne@68
|
132 scaffoldNames=new ArrayList<String>();
|
jpayne@68
|
133 refNames=new ArrayList<String>();
|
jpayne@68
|
134 scaffoldLengths=new IntList();
|
jpayne@68
|
135
|
jpayne@68
|
136 if(ref!=null){
|
jpayne@68
|
137 for(String s : ref){refNames.add(s);}
|
jpayne@68
|
138 }
|
jpayne@68
|
139 if(literal!=null){refNames.add("literal");}
|
jpayne@68
|
140 refScafCounts=new int[refNames.size()];
|
jpayne@68
|
141
|
jpayne@68
|
142 if(useRefNames){toRefNames();}
|
jpayne@68
|
143 }
|
jpayne@68
|
144
|
jpayne@68
|
145 return spawnLoadThreads(ref, literal);
|
jpayne@68
|
146 }
|
jpayne@68
|
147
|
jpayne@68
|
148 public void setRefSkip(int x){setRefSkip(x, x);}
|
jpayne@68
|
149
|
jpayne@68
|
150 public void setRefSkip(int min, int max){
|
jpayne@68
|
151 max=Tools.max(min, max);
|
jpayne@68
|
152 if(min==max){
|
jpayne@68
|
153 minRefSkip=maxRefSkip=min;
|
jpayne@68
|
154 }else{
|
jpayne@68
|
155 minRefSkip=min;
|
jpayne@68
|
156 maxRefSkip=max;
|
jpayne@68
|
157 }
|
jpayne@68
|
158 variableRefSkip=(minRefSkip!=maxRefSkip);
|
jpayne@68
|
159 }
|
jpayne@68
|
160
|
jpayne@68
|
161 public void storeMode(final int x){
|
jpayne@68
|
162 assert(x==SET_IF_NOT_PRESENT || x==SET_ALWAYS || x==INCREMENT);
|
jpayne@68
|
163 storeMode=x;
|
jpayne@68
|
164 }
|
jpayne@68
|
165
|
jpayne@68
|
166 /*--------------------------------------------------------------*/
|
jpayne@68
|
167 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
168 /*--------------------------------------------------------------*/
|
jpayne@68
|
169
|
jpayne@68
|
170
|
jpayne@68
|
171 /**
|
jpayne@68
|
172 * Fills tables with kmers from references, using multiple LoadThread.
|
jpayne@68
|
173 * @return Number of kmers stored.
|
jpayne@68
|
174 */
|
jpayne@68
|
175 private long spawnLoadThreads(String[] ref, String[] literal){
|
jpayne@68
|
176 Timer t=new Timer();
|
jpayne@68
|
177 if((ref==null || ref.length<1) && (literal==null || literal.length<1)){return 0;}
|
jpayne@68
|
178 long added=0;
|
jpayne@68
|
179
|
jpayne@68
|
180 /* Create load threads */
|
jpayne@68
|
181 LoadThread[] loaders=new LoadThread[WAYS];
|
jpayne@68
|
182 for(int i=0; i<loaders.length; i++){
|
jpayne@68
|
183 loaders[i]=new LoadThread(i);
|
jpayne@68
|
184 loaders[i].start();
|
jpayne@68
|
185 }
|
jpayne@68
|
186
|
jpayne@68
|
187 /* For each reference file... */
|
jpayne@68
|
188 int refNum=0;
|
jpayne@68
|
189 if(ref!=null){
|
jpayne@68
|
190 for(String refname : ref){
|
jpayne@68
|
191
|
jpayne@68
|
192 /* Start an input stream */
|
jpayne@68
|
193 FileFormat ff=FileFormat.testInput(refname, FileFormat.FASTA, null, false, true);
|
jpayne@68
|
194 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(-1L, false, ff, null, null, null, Shared.USE_MPI, true);
|
jpayne@68
|
195 cris.start(); //4567
|
jpayne@68
|
196 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
197 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
198
|
jpayne@68
|
199 /* Iterate through read lists from the input stream */
|
jpayne@68
|
200 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
201 {
|
jpayne@68
|
202 /* Assign a unique ID number to each scaffold */
|
jpayne@68
|
203 ArrayList<Read> reads2=new ArrayList<Read>(reads);
|
jpayne@68
|
204 if(scaffoldNames!=null){
|
jpayne@68
|
205 for(Read r1 : reads2){
|
jpayne@68
|
206 final Read r2=r1.mate;
|
jpayne@68
|
207 final Integer id=scaffoldNames.size();
|
jpayne@68
|
208 if(ecc && r1!=null && r2!=null){BBMerge.findOverlapStrict(r1, r2, true);}
|
jpayne@68
|
209 refScafCounts[refNum]++;
|
jpayne@68
|
210 scaffoldNames.add(r1.id==null ? id.toString() : r1.id);
|
jpayne@68
|
211 int len=r1.length();
|
jpayne@68
|
212 r1.obj=id;
|
jpayne@68
|
213 if(r2!=null){
|
jpayne@68
|
214 r2.obj=id;
|
jpayne@68
|
215 len+=r2.length();
|
jpayne@68
|
216 }
|
jpayne@68
|
217 scaffoldLengths.add(len);
|
jpayne@68
|
218 }
|
jpayne@68
|
219 }
|
jpayne@68
|
220
|
jpayne@68
|
221 if(REPLICATE_AMBIGUOUS){
|
jpayne@68
|
222 reads2=Tools.replicateAmbiguous(reads2, Tools.min(k, mink));
|
jpayne@68
|
223 }
|
jpayne@68
|
224
|
jpayne@68
|
225 /* Send a pointer to the read list to each LoadThread */
|
jpayne@68
|
226 for(LoadThread lt : loaders){
|
jpayne@68
|
227 boolean b=true;
|
jpayne@68
|
228 while(b){
|
jpayne@68
|
229 try {
|
jpayne@68
|
230 lt.queue.put(reads2);
|
jpayne@68
|
231 b=false;
|
jpayne@68
|
232 } catch (InterruptedException e) {
|
jpayne@68
|
233 //TODO: This will hang due to still-running threads.
|
jpayne@68
|
234 throw new RuntimeException(e);
|
jpayne@68
|
235 }
|
jpayne@68
|
236 }
|
jpayne@68
|
237 }
|
jpayne@68
|
238 }
|
jpayne@68
|
239
|
jpayne@68
|
240 /* Dispose of the old list and fetch a new one */
|
jpayne@68
|
241 cris.returnList(ln);
|
jpayne@68
|
242 ln=cris.nextList();
|
jpayne@68
|
243 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
244 }
|
jpayne@68
|
245 /* Cleanup */
|
jpayne@68
|
246 cris.returnList(ln);
|
jpayne@68
|
247 errorState|=ReadWrite.closeStream(cris);
|
jpayne@68
|
248 refNum++;
|
jpayne@68
|
249 }
|
jpayne@68
|
250 }
|
jpayne@68
|
251
|
jpayne@68
|
252 /* If there are literal sequences to use as references */
|
jpayne@68
|
253 if(literal!=null){
|
jpayne@68
|
254 ArrayList<Read> list=new ArrayList<Read>(literal.length);
|
jpayne@68
|
255 if(verbose){outstream.println("Adding literals "+Arrays.toString(literal));}
|
jpayne@68
|
256
|
jpayne@68
|
257 /* Assign a unique ID number to each literal sequence */
|
jpayne@68
|
258 for(int i=0; i<literal.length; i++){
|
jpayne@68
|
259 if(scaffoldNames!=null){
|
jpayne@68
|
260 final Integer id=scaffoldNames.size();
|
jpayne@68
|
261 final Read r=new Read(literal[i].getBytes(), null, id);
|
jpayne@68
|
262 refScafCounts[refNum]++;
|
jpayne@68
|
263 scaffoldNames.add(id.toString());
|
jpayne@68
|
264 scaffoldLengths.add(r.length());
|
jpayne@68
|
265 r.obj=id;
|
jpayne@68
|
266 }else{
|
jpayne@68
|
267 final Read r=new Read(literal[i].getBytes(), null, i);
|
jpayne@68
|
268 list.add(r);
|
jpayne@68
|
269 }
|
jpayne@68
|
270 }
|
jpayne@68
|
271
|
jpayne@68
|
272 if(REPLICATE_AMBIGUOUS){
|
jpayne@68
|
273 list=Tools.replicateAmbiguous(list, Tools.min(k, mink));
|
jpayne@68
|
274 }
|
jpayne@68
|
275
|
jpayne@68
|
276 /* Send a pointer to the read list to each LoadThread */
|
jpayne@68
|
277 for(LoadThread lt : loaders){
|
jpayne@68
|
278 boolean b=true;
|
jpayne@68
|
279 while(b){
|
jpayne@68
|
280 try {
|
jpayne@68
|
281 lt.queue.put(list);
|
jpayne@68
|
282 b=false;
|
jpayne@68
|
283 } catch (InterruptedException e) {
|
jpayne@68
|
284 //TODO: This will hang due to still-running threads.
|
jpayne@68
|
285 throw new RuntimeException(e);
|
jpayne@68
|
286 }
|
jpayne@68
|
287 }
|
jpayne@68
|
288 }
|
jpayne@68
|
289 }
|
jpayne@68
|
290
|
jpayne@68
|
291 /* Signal loaders to terminate */
|
jpayne@68
|
292 for(LoadThread lt : loaders){
|
jpayne@68
|
293 boolean b=true;
|
jpayne@68
|
294 while(b){
|
jpayne@68
|
295 try {
|
jpayne@68
|
296 lt.queue.put(POISON);
|
jpayne@68
|
297 b=false;
|
jpayne@68
|
298 } catch (InterruptedException e) {
|
jpayne@68
|
299 //TODO: This will hang due to still-running threads.
|
jpayne@68
|
300 throw new RuntimeException(e);
|
jpayne@68
|
301 }
|
jpayne@68
|
302 }
|
jpayne@68
|
303 }
|
jpayne@68
|
304
|
jpayne@68
|
305 /* Wait for loaders to die, and gather statistics */
|
jpayne@68
|
306 for(LoadThread lt : loaders){
|
jpayne@68
|
307 while(lt.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
308 try {
|
jpayne@68
|
309 lt.join();
|
jpayne@68
|
310 } catch (InterruptedException e) {
|
jpayne@68
|
311 // TODO Auto-generated catch block
|
jpayne@68
|
312 e.printStackTrace();
|
jpayne@68
|
313 }
|
jpayne@68
|
314 }
|
jpayne@68
|
315 added+=lt.addedT;
|
jpayne@68
|
316 refKmers+=lt.refKmersT;
|
jpayne@68
|
317 refBases+=lt.refBasesT;
|
jpayne@68
|
318 refReads+=lt.refReadsT;
|
jpayne@68
|
319 }
|
jpayne@68
|
320 //Correct statistics for number of threads, since each thread processes all reference data
|
jpayne@68
|
321 refKmers/=WAYS;
|
jpayne@68
|
322 refBases/=WAYS;
|
jpayne@68
|
323 refReads/=WAYS;
|
jpayne@68
|
324
|
jpayne@68
|
325 t.stop();
|
jpayne@68
|
326 if(DISPLAY_PROGRESS){
|
jpayne@68
|
327 outstream.println("Added "+added+" kmers; time: \t"+t);
|
jpayne@68
|
328 Shared.printMemory();
|
jpayne@68
|
329 outstream.println();
|
jpayne@68
|
330 }
|
jpayne@68
|
331
|
jpayne@68
|
332 if(verbose){
|
jpayne@68
|
333 TextStreamWriter tsw=new TextStreamWriter("stdout", false, false, false, FileFormat.TEXT);
|
jpayne@68
|
334 tsw.start();
|
jpayne@68
|
335 for(AbstractKmerTable table : tables){
|
jpayne@68
|
336 table.dumpKmersAsText(tsw, k, 1, Integer.MAX_VALUE);
|
jpayne@68
|
337 }
|
jpayne@68
|
338 tsw.poisonAndWait();
|
jpayne@68
|
339 }
|
jpayne@68
|
340
|
jpayne@68
|
341 return added;
|
jpayne@68
|
342 }
|
jpayne@68
|
343
|
jpayne@68
|
344
|
jpayne@68
|
345
|
jpayne@68
|
346 /**
|
jpayne@68
|
347 * Fills the scaffold names array with reference names.
|
jpayne@68
|
348 */
|
jpayne@68
|
349 public void toRefNames(){
|
jpayne@68
|
350 final int numRefs=refNames.size();
|
jpayne@68
|
351 for(int r=0, s=1; r<numRefs; r++){
|
jpayne@68
|
352 final int scafs=refScafCounts[r];
|
jpayne@68
|
353 final int lim=s+scafs;
|
jpayne@68
|
354 final String name=ReadWrite.stripToCore(refNames.get(r));
|
jpayne@68
|
355 // outstream.println("r="+r+", s="+s+", scafs="+scafs+", lim="+lim+", name="+name);
|
jpayne@68
|
356 while(s<lim){
|
jpayne@68
|
357 // outstream.println(r+", "+s+". Setting "+scaffoldNames.get(s)+" -> "+name);
|
jpayne@68
|
358 scaffoldNames.set(s, name);
|
jpayne@68
|
359 s++;
|
jpayne@68
|
360 }
|
jpayne@68
|
361 }
|
jpayne@68
|
362 }
|
jpayne@68
|
363
|
jpayne@68
|
364 /*--------------------------------------------------------------*/
|
jpayne@68
|
365 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
366 /*--------------------------------------------------------------*/
|
jpayne@68
|
367
|
jpayne@68
|
368 /**
|
jpayne@68
|
369 * Loads kmers into a table. Each thread handles all kmers X such that X%WAYS==tnum.
|
jpayne@68
|
370 */
|
jpayne@68
|
371 private class LoadThread extends Thread{
|
jpayne@68
|
372
|
jpayne@68
|
373 public LoadThread(final int tnum_){
|
jpayne@68
|
374 tnum=tnum_;
|
jpayne@68
|
375 map=tables[tnum];
|
jpayne@68
|
376 }
|
jpayne@68
|
377
|
jpayne@68
|
378 /**
|
jpayne@68
|
379 * Get the next list of reads (or scaffolds) from the queue.
|
jpayne@68
|
380 * @return List of reads
|
jpayne@68
|
381 */
|
jpayne@68
|
382 private ArrayList<Read> fetch(){
|
jpayne@68
|
383 ArrayList<Read> list=null;
|
jpayne@68
|
384 while(list==null){
|
jpayne@68
|
385 try {
|
jpayne@68
|
386 list=queue.take();
|
jpayne@68
|
387 } catch (InterruptedException e) {
|
jpayne@68
|
388 // TODO Auto-generated catch block
|
jpayne@68
|
389 e.printStackTrace();
|
jpayne@68
|
390 }
|
jpayne@68
|
391 }
|
jpayne@68
|
392 return list;
|
jpayne@68
|
393 }
|
jpayne@68
|
394
|
jpayne@68
|
395 @Override
|
jpayne@68
|
396 public void run(){
|
jpayne@68
|
397 ArrayList<Read> reads=fetch();
|
jpayne@68
|
398 while(reads!=POISON){
|
jpayne@68
|
399 for(Read r1 : reads){
|
jpayne@68
|
400 assert(r1.pairnum()==0);
|
jpayne@68
|
401 final Read r2=r1.mate;
|
jpayne@68
|
402
|
jpayne@68
|
403 addedT+=addToMap(r1, minRefSkip);
|
jpayne@68
|
404 if(r2!=null){addedT+=addToMap(r2, minRefSkip);}
|
jpayne@68
|
405 }
|
jpayne@68
|
406 reads=fetch();
|
jpayne@68
|
407 }
|
jpayne@68
|
408
|
jpayne@68
|
409 if(map.canRebalance() && map.size()>2L*map.arrayLength()){
|
jpayne@68
|
410 map.rebalance();
|
jpayne@68
|
411 }
|
jpayne@68
|
412 }
|
jpayne@68
|
413
|
jpayne@68
|
414 /**
|
jpayne@68
|
415 * @param r The current read to process
|
jpayne@68
|
416 * @param skip Number of bases to skip between kmers
|
jpayne@68
|
417 * @return Number of kmers stored
|
jpayne@68
|
418 */
|
jpayne@68
|
419 private long addToMap(Read r, int skip){
|
jpayne@68
|
420 if(variableRefSkip){
|
jpayne@68
|
421 int rblen=r.length();
|
jpayne@68
|
422 skip=(rblen>20000000 ? k : rblen>5000000 ? 11 : rblen>500000 ? 2 : 0);
|
jpayne@68
|
423 skip=Tools.mid(minRefSkip, maxRefSkip, skip);
|
jpayne@68
|
424 }
|
jpayne@68
|
425 final byte[] bases=r.bases;
|
jpayne@68
|
426 final int shift=2*k;
|
jpayne@68
|
427 final int shift2=shift-2;
|
jpayne@68
|
428 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
429 final long kmask=kMasks[k];
|
jpayne@68
|
430 long kmer=0;
|
jpayne@68
|
431 long rkmer=0;
|
jpayne@68
|
432 long added=0;
|
jpayne@68
|
433 int len=0;
|
jpayne@68
|
434
|
jpayne@68
|
435 if(bases!=null){
|
jpayne@68
|
436 refReadsT++;
|
jpayne@68
|
437 refBasesT+=bases.length;
|
jpayne@68
|
438 }
|
jpayne@68
|
439 if(bases==null || bases.length<k){return 0;}
|
jpayne@68
|
440
|
jpayne@68
|
441 final int id=(r.obj==null ? 1 : ((Integer)r.obj).intValue());
|
jpayne@68
|
442
|
jpayne@68
|
443 if(skip>1){ //Process while skipping some kmers
|
jpayne@68
|
444 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
445 byte b=bases[i];
|
jpayne@68
|
446 long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
447 long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
448 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
449 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
450 if(x<0){len=0; rkmer=0;}else{len++;}
|
jpayne@68
|
451 if(verbose){outstream.println("Scanning1 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
452 if(len>=k){
|
jpayne@68
|
453 refKmersT++;
|
jpayne@68
|
454 if(len%skip==0){
|
jpayne@68
|
455 final long extraBase=(i>=bases.length-1 ? -1 : AminoAcid.baseToNumber[bases[i+1]]);
|
jpayne@68
|
456 added+=addToMap(kmer, rkmer, k, extraBase, id, kmask, hammingDistance, editDistance);
|
jpayne@68
|
457 if(useShortKmers){
|
jpayne@68
|
458 if(i==k2){added+=addToMapRightShift(kmer, rkmer, id);}
|
jpayne@68
|
459 if(i==bases.length-1){added+=addToMapLeftShift(kmer, rkmer, extraBase, id);}
|
jpayne@68
|
460 }
|
jpayne@68
|
461 }
|
jpayne@68
|
462 }
|
jpayne@68
|
463 }
|
jpayne@68
|
464 }else{ //Process all kmers
|
jpayne@68
|
465 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
466 byte b=bases[i];
|
jpayne@68
|
467 long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
468 long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
469 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
470 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
471 if(x<0){len=0; rkmer=0;}else{len++;}
|
jpayne@68
|
472 if(verbose){outstream.println("Scanning2 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
473 if(len>=k){
|
jpayne@68
|
474 refKmersT++;
|
jpayne@68
|
475 final long extraBase=(i>=bases.length-1 ? -1 : AminoAcid.baseToNumber[bases[i+1]]);
|
jpayne@68
|
476 final long atm=addToMap(kmer, rkmer, k, extraBase, id, kmask, hammingDistance, editDistance);
|
jpayne@68
|
477 added+=atm;
|
jpayne@68
|
478 // assert(false) : atm+", "+map.contains(toValue(kmer, rkmer, kmask));
|
jpayne@68
|
479 if(useShortKmers){
|
jpayne@68
|
480 if(i==k2){added+=addToMapRightShift(kmer, rkmer, id);}
|
jpayne@68
|
481 if(i==bases.length-1){added+=addToMapLeftShift(kmer, rkmer, extraBase, id);}
|
jpayne@68
|
482 }
|
jpayne@68
|
483 }
|
jpayne@68
|
484 }
|
jpayne@68
|
485 }
|
jpayne@68
|
486 return added;
|
jpayne@68
|
487 }
|
jpayne@68
|
488
|
jpayne@68
|
489
|
jpayne@68
|
490 /**
|
jpayne@68
|
491 * Adds short kmers on the left end of the read.
|
jpayne@68
|
492 * @param kmer Forward kmer
|
jpayne@68
|
493 * @param rkmer Reverse kmer
|
jpayne@68
|
494 * @param extraBase Base added to end in case of deletions
|
jpayne@68
|
495 * @param id Scaffold number
|
jpayne@68
|
496 * @return Number of kmers stored
|
jpayne@68
|
497 */
|
jpayne@68
|
498 private long addToMapLeftShift(long kmer, long rkmer, final long extraBase, final int id){
|
jpayne@68
|
499 if(verbose){outstream.println("addToMapLeftShift");}
|
jpayne@68
|
500 long added=0;
|
jpayne@68
|
501 for(int i=k-1; i>=mink; i--){
|
jpayne@68
|
502 kmer=kmer&rightMasks[i];
|
jpayne@68
|
503 rkmer=rkmer>>>2;
|
jpayne@68
|
504 long x=addToMap(kmer, rkmer, i, extraBase, id, kMasks[i], hammingDistance2, editDistance2);
|
jpayne@68
|
505 added+=x;
|
jpayne@68
|
506 if(verbose){
|
jpayne@68
|
507 if((toValue(kmer, rkmer, kMasks[i]))%WAYS==tnum){
|
jpayne@68
|
508 outstream.println("added="+x+"; i="+i+"; tnum="+tnum+"; Added left-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i)+"; value="+(toValue(kmer, rkmer, kMasks[i]))+"; kmer="+kmer+"; rkmer="+rkmer+"; kmask="+kMasks[i]+"; rightMasks[i+1]="+rightMasks[i+1]);
|
jpayne@68
|
509 outstream.println("i="+i+"; tnum="+tnum+"; Looking for left-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i));
|
jpayne@68
|
510 final long value=toValue(kmer, rkmer, kMasks[i]);
|
jpayne@68
|
511 if(map.contains(value)){outstream.println("Found "+value);}
|
jpayne@68
|
512 }
|
jpayne@68
|
513 }
|
jpayne@68
|
514 }
|
jpayne@68
|
515 return added;
|
jpayne@68
|
516 }
|
jpayne@68
|
517
|
jpayne@68
|
518
|
jpayne@68
|
519 /**
|
jpayne@68
|
520 * Adds short kmers on the right end of the read.
|
jpayne@68
|
521 * @param kmer Forward kmer
|
jpayne@68
|
522 * @param rkmer Reverse kmer
|
jpayne@68
|
523 * @param id Scaffold number
|
jpayne@68
|
524 * @return Number of kmers stored
|
jpayne@68
|
525 */
|
jpayne@68
|
526 private long addToMapRightShift(long kmer, long rkmer, final int id){
|
jpayne@68
|
527 if(verbose){outstream.println("addToMapRightShift");}
|
jpayne@68
|
528 long added=0;
|
jpayne@68
|
529 for(int i=k-1; i>=mink; i--){
|
jpayne@68
|
530 long extraBase=kmer&3L;
|
jpayne@68
|
531 kmer=kmer>>>2;
|
jpayne@68
|
532 rkmer=rkmer&rightMasks[i];
|
jpayne@68
|
533 // assert(Long.numberOfLeadingZeros(kmer)>=2*(32-i)) : Long.numberOfLeadingZeros(kmer)+", "+i+", "+kmer+", "+kMasks[i];
|
jpayne@68
|
534 // assert(Long.numberOfLeadingZeros(rkmer)>=2*(32-i)) : Long.numberOfLeadingZeros(rkmer)+", "+i+", "+rkmer+", "+kMasks[i];
|
jpayne@68
|
535 long x=addToMap(kmer, rkmer, i, extraBase, id, kMasks[i], hammingDistance2, editDistance2);
|
jpayne@68
|
536 added+=x;
|
jpayne@68
|
537 if(verbose){
|
jpayne@68
|
538 if((toValue(kmer, rkmer, kMasks[i]))%WAYS==tnum){
|
jpayne@68
|
539 outstream.println("added="+x+"; i="+i+"; tnum="+tnum+"; Added right-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i)+"; value="+(toValue(kmer, rkmer, kMasks[i]))+"; kmer="+kmer+"; rkmer="+rkmer+"; kmask="+kMasks[i]+"; rightMasks[i+1]="+rightMasks[i+1]);
|
jpayne@68
|
540 outstream.println("i="+i+"; tnum="+tnum+"; Looking for right-shift kmer "+AminoAcid.kmerToString(kmer&~kMasks[i], i));
|
jpayne@68
|
541 final long value=toValue(kmer, rkmer, kMasks[i]);
|
jpayne@68
|
542 if(map.contains(value)){outstream.println("Found "+value);}
|
jpayne@68
|
543 }
|
jpayne@68
|
544 }
|
jpayne@68
|
545 }
|
jpayne@68
|
546 return added;
|
jpayne@68
|
547 }
|
jpayne@68
|
548
|
jpayne@68
|
549
|
jpayne@68
|
550 /**
|
jpayne@68
|
551 * Adds this kmer to the table, including any mutations implied by editDistance or hammingDistance.
|
jpayne@68
|
552 * @param kmer Forward kmer
|
jpayne@68
|
553 * @param rkmer Reverse kmer
|
jpayne@68
|
554 * @param len Kmer length
|
jpayne@68
|
555 * @param extraBase Base added to end in case of deletions
|
jpayne@68
|
556 * @param id Scaffold number
|
jpayne@68
|
557 * @param kmask0
|
jpayne@68
|
558 * @return Number of kmers stored
|
jpayne@68
|
559 */
|
jpayne@68
|
560 private long addToMap(final long kmer, final long rkmer, final int len, final long extraBase, final int id, final long kmask0, final int hdist, final int edist){
|
jpayne@68
|
561
|
jpayne@68
|
562 assert(kmask0==kMasks[len]) : kmask0+", "+len+", "+kMasks[len]+", "+Long.numberOfTrailingZeros(kmask0)+", "+Long.numberOfTrailingZeros(kMasks[len]);
|
jpayne@68
|
563
|
jpayne@68
|
564 if(verbose){outstream.println("addToMap_A; len="+len+"; kMasks[len]="+kMasks[len]);}
|
jpayne@68
|
565 assert((kmer&kmask0)==0);
|
jpayne@68
|
566 final long added;
|
jpayne@68
|
567 if(hdist==0){
|
jpayne@68
|
568 final long key=toValue(kmer, rkmer, kmask0);
|
jpayne@68
|
569 if(failsSpeed(key)){return 0;}
|
jpayne@68
|
570 if(key%WAYS!=tnum){return 0;}
|
jpayne@68
|
571 if(verbose){outstream.println("addToMap_B: "+AminoAcid.kmerToString(kmer&~kMasks[len], len)+" = "+key);}
|
jpayne@68
|
572 if(storeMode==SET_IF_NOT_PRESENT){
|
jpayne@68
|
573 added=map.setIfNotPresent(key, id);
|
jpayne@68
|
574 }else if(storeMode==SET_ALWAYS){
|
jpayne@68
|
575 added=map.set(key, id);
|
jpayne@68
|
576 }else{
|
jpayne@68
|
577 assert(storeMode==INCREMENT);
|
jpayne@68
|
578 added=map.increment(key, 1);
|
jpayne@68
|
579 }
|
jpayne@68
|
580 }else if(edist>0){
|
jpayne@68
|
581 // long extraBase=(i>=bases.length-1 ? -1 : AminoAcid.baseToNumber[bases[i+1]]);
|
jpayne@68
|
582 added=mutate(kmer, rkmer, len, id, edist, extraBase);
|
jpayne@68
|
583 }else{
|
jpayne@68
|
584 added=mutate(kmer, rkmer, len, id, hdist, -1);
|
jpayne@68
|
585 }
|
jpayne@68
|
586 if(verbose){outstream.println("addToMap added "+added+" keys.");}
|
jpayne@68
|
587 return added;
|
jpayne@68
|
588 }
|
jpayne@68
|
589
|
jpayne@68
|
590 /**
|
jpayne@68
|
591 * Mutate and store this kmer through 'dist' recursions.
|
jpayne@68
|
592 * @param kmer Forward kmer
|
jpayne@68
|
593 * @param rkmer Reverse kmer
|
jpayne@68
|
594 * @param id Scaffold number
|
jpayne@68
|
595 * @param dist Number of mutations
|
jpayne@68
|
596 * @param extraBase Base added to end in case of deletions
|
jpayne@68
|
597 * @return Number of kmers stored
|
jpayne@68
|
598 */
|
jpayne@68
|
599 private long mutate(final long kmer, final long rkmer, final int len, final int id, final int dist, final long extraBase){
|
jpayne@68
|
600 long added=0;
|
jpayne@68
|
601
|
jpayne@68
|
602 final long key=toValue(kmer, rkmer, kMasks[len]);
|
jpayne@68
|
603
|
jpayne@68
|
604 if(verbose){outstream.println("mutate_A; len="+len+"; kmer="+kmer+"; rkmer="+rkmer+"; kMasks[len]="+kMasks[len]);}
|
jpayne@68
|
605 if(key%WAYS==tnum){
|
jpayne@68
|
606 if(verbose){outstream.println("mutate_B: "+AminoAcid.kmerToString(kmer&~kMasks[len], len)+" = "+key);}
|
jpayne@68
|
607 int x;
|
jpayne@68
|
608 if(storeMode==SET_IF_NOT_PRESENT){
|
jpayne@68
|
609 x=map.setIfNotPresent(key, id);
|
jpayne@68
|
610 }else if(storeMode==SET_ALWAYS){
|
jpayne@68
|
611 x=map.set(key, id);
|
jpayne@68
|
612 }else{
|
jpayne@68
|
613 assert(storeMode==INCREMENT);
|
jpayne@68
|
614 x=map.increment(key, 1);
|
jpayne@68
|
615 x=(x==1 ? 1 : 0);
|
jpayne@68
|
616 }
|
jpayne@68
|
617 if(verbose){outstream.println("mutate_B added "+x+" keys.");}
|
jpayne@68
|
618 added+=x;
|
jpayne@68
|
619 assert(map.contains(key));
|
jpayne@68
|
620 }
|
jpayne@68
|
621
|
jpayne@68
|
622 if(dist>0){
|
jpayne@68
|
623 final int dist2=dist-1;
|
jpayne@68
|
624
|
jpayne@68
|
625 //Sub
|
jpayne@68
|
626 for(int j=0; j<4; j++){
|
jpayne@68
|
627 for(int i=0; i<len; i++){
|
jpayne@68
|
628 final long temp=(kmer&clearMasks[i])|setMasks[j][i];
|
jpayne@68
|
629 if(temp!=kmer){
|
jpayne@68
|
630 long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
|
jpayne@68
|
631 added+=mutate(temp, rtemp, len, id, dist2, extraBase);
|
jpayne@68
|
632 }
|
jpayne@68
|
633 }
|
jpayne@68
|
634 }
|
jpayne@68
|
635
|
jpayne@68
|
636 if(editDistance>0){
|
jpayne@68
|
637 //Del
|
jpayne@68
|
638 if(extraBase>=0 && extraBase<=3){
|
jpayne@68
|
639 for(int i=1; i<len; i++){
|
jpayne@68
|
640 final long temp=(kmer&leftMasks[i])|((kmer<<2)&rightMasks[i])|extraBase;
|
jpayne@68
|
641 if(temp!=kmer){
|
jpayne@68
|
642 long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
|
jpayne@68
|
643 added+=mutate(temp, rtemp, len, id, dist2, -1);
|
jpayne@68
|
644 }
|
jpayne@68
|
645 }
|
jpayne@68
|
646 }
|
jpayne@68
|
647
|
jpayne@68
|
648 //Ins
|
jpayne@68
|
649 final long eb2=kmer&3;
|
jpayne@68
|
650 for(int i=1; i<len; i++){
|
jpayne@68
|
651 final long temp0=(kmer&leftMasks[i])|((kmer&rightMasks[i])>>2);
|
jpayne@68
|
652 for(int j=0; j<4; j++){
|
jpayne@68
|
653 final long temp=temp0|setMasks[j][i-1];
|
jpayne@68
|
654 if(temp!=kmer){
|
jpayne@68
|
655 long rtemp=AminoAcid.reverseComplementBinaryFast(temp, len);
|
jpayne@68
|
656 added+=mutate(temp, rtemp, len, id, dist2, eb2);
|
jpayne@68
|
657 }
|
jpayne@68
|
658 }
|
jpayne@68
|
659 }
|
jpayne@68
|
660 }
|
jpayne@68
|
661
|
jpayne@68
|
662 }
|
jpayne@68
|
663
|
jpayne@68
|
664 return added;
|
jpayne@68
|
665 }
|
jpayne@68
|
666
|
jpayne@68
|
667 /*--------------------------------------------------------------*/
|
jpayne@68
|
668
|
jpayne@68
|
669 /** Number of kmers stored by this thread */
|
jpayne@68
|
670 public long addedT=0;
|
jpayne@68
|
671 /** Number of items encountered by this thread */
|
jpayne@68
|
672 public long refKmersT=0, refReadsT=0, refBasesT=0;
|
jpayne@68
|
673 /** Thread number; used to determine which kmers to store */
|
jpayne@68
|
674 public final int tnum;
|
jpayne@68
|
675 /** Buffer of input read lists */
|
jpayne@68
|
676 public final ArrayBlockingQueue<ArrayList<Read>> queue=new ArrayBlockingQueue<ArrayList<Read>>(32);
|
jpayne@68
|
677
|
jpayne@68
|
678 /** Destination for storing kmers */
|
jpayne@68
|
679 private final AbstractKmerTable map;
|
jpayne@68
|
680
|
jpayne@68
|
681 }
|
jpayne@68
|
682
|
jpayne@68
|
683 /*--------------------------------------------------------------*/
|
jpayne@68
|
684 /*---------------- Static Methods ----------------*/
|
jpayne@68
|
685 /*--------------------------------------------------------------*/
|
jpayne@68
|
686
|
jpayne@68
|
687 /**
|
jpayne@68
|
688 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined.
|
jpayne@68
|
689 * @param kmer Forward kmer
|
jpayne@68
|
690 * @param rkmer Reverse kmer
|
jpayne@68
|
691 * @param lengthMask Bitmask with single '1' set to left of kmer
|
jpayne@68
|
692 * @return Canonical value
|
jpayne@68
|
693 */
|
jpayne@68
|
694 private final long toValue(long kmer, long rkmer, long lengthMask){
|
jpayne@68
|
695 assert(lengthMask==0 || (kmer<lengthMask && rkmer<lengthMask)) : lengthMask+", "+kmer+", "+rkmer;
|
jpayne@68
|
696 long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
|
jpayne@68
|
697 return (value&middleMask)|lengthMask;
|
jpayne@68
|
698 }
|
jpayne@68
|
699
|
jpayne@68
|
700 final boolean passesSpeed(long key){
|
jpayne@68
|
701 return speed<1 || ((key&Long.MAX_VALUE)%17)>=speed;
|
jpayne@68
|
702 }
|
jpayne@68
|
703
|
jpayne@68
|
704 final boolean failsSpeed(long key){
|
jpayne@68
|
705 return speed>0 && ((key&Long.MAX_VALUE)%17)<speed;
|
jpayne@68
|
706 }
|
jpayne@68
|
707
|
jpayne@68
|
708 /*--------------------------------------------------------------*/
|
jpayne@68
|
709 /*---------------- Fields ----------------*/
|
jpayne@68
|
710 /*--------------------------------------------------------------*/
|
jpayne@68
|
711
|
jpayne@68
|
712 /** Has this class encountered errors while processing? */
|
jpayne@68
|
713 public boolean errorState=false;
|
jpayne@68
|
714
|
jpayne@68
|
715 /** How to associate values with kmers */
|
jpayne@68
|
716 private int storeMode=SET_IF_NOT_PRESENT;
|
jpayne@68
|
717
|
jpayne@68
|
718 /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in keySets[Y] */
|
jpayne@68
|
719 public AbstractKmerTable[] tables;
|
jpayne@68
|
720 /** A scaffold's name is stored at scaffoldNames.get(id).
|
jpayne@68
|
721 * scaffoldNames[0] is reserved, so the first id is 1. */
|
jpayne@68
|
722 public ArrayList<String> scaffoldNames;
|
jpayne@68
|
723 /** Names of reference files (refNames[0] is valid). */
|
jpayne@68
|
724 public ArrayList<String> refNames;
|
jpayne@68
|
725 /** Number of scaffolds per reference. */
|
jpayne@68
|
726 public int[] refScafCounts;
|
jpayne@68
|
727 /** scaffoldLengths[id] stores the length of that scaffold */
|
jpayne@68
|
728 public IntList scaffoldLengths=new IntList();
|
jpayne@68
|
729
|
jpayne@68
|
730 /** Make the middle base in a kmer a wildcard to improve sensitivity */
|
jpayne@68
|
731 public final boolean maskMiddle=false;
|
jpayne@68
|
732 /** Correct errors via read overlap */
|
jpayne@68
|
733 public boolean ecc=false;
|
jpayne@68
|
734
|
jpayne@68
|
735 /** Store reference kmers with up to this many substitutions */
|
jpayne@68
|
736 public final int hammingDistance;
|
jpayne@68
|
737 /** Store reference kmers with up to this many edits (including indels) */
|
jpayne@68
|
738 public final int editDistance;
|
jpayne@68
|
739 /** Store short reference kmers with up to this many substitutions */
|
jpayne@68
|
740 public int hammingDistance2=-1;
|
jpayne@68
|
741 /** Store short reference kmers with up to this many edits (including indels) */
|
jpayne@68
|
742 public int editDistance2=-1;
|
jpayne@68
|
743 /** Always skip at least this many consecutive kmers when hashing reference.
|
jpayne@68
|
744 * 1 means every kmer is used, 2 means every other, etc. */
|
jpayne@68
|
745 private int minRefSkip=0;
|
jpayne@68
|
746 /** Never skip more than this many consecutive kmers when hashing reference. */
|
jpayne@68
|
747 private int maxRefSkip=0;
|
jpayne@68
|
748
|
jpayne@68
|
749 private boolean variableRefSkip=false;
|
jpayne@68
|
750
|
jpayne@68
|
751 /*--------------------------------------------------------------*/
|
jpayne@68
|
752 /*---------------- Statistics ----------------*/
|
jpayne@68
|
753 /*--------------------------------------------------------------*/
|
jpayne@68
|
754
|
jpayne@68
|
755 long refReads=0;
|
jpayne@68
|
756 long refBases=0;
|
jpayne@68
|
757 long refKmers=0;
|
jpayne@68
|
758
|
jpayne@68
|
759 long storedKmers=0;
|
jpayne@68
|
760
|
jpayne@68
|
761 /*--------------------------------------------------------------*/
|
jpayne@68
|
762 /*---------------- Final Primitives ----------------*/
|
jpayne@68
|
763 /*--------------------------------------------------------------*/
|
jpayne@68
|
764
|
jpayne@68
|
765 /** Look for reverse-complements as well as forward kmers. Default: true */
|
jpayne@68
|
766 private final boolean rcomp;
|
jpayne@68
|
767 /** AND bitmask with 0's at the middle base */
|
jpayne@68
|
768 private final long middleMask;
|
jpayne@68
|
769
|
jpayne@68
|
770 /** Normal kmer length */
|
jpayne@68
|
771 private final int k;
|
jpayne@68
|
772 /** k-1; used in some expressions */
|
jpayne@68
|
773 private final int k2;
|
jpayne@68
|
774 /** Shortest kmer to use for trimming */
|
jpayne@68
|
775 private final int mink;
|
jpayne@68
|
776 /** Attempt to match kmers shorter than normal k on read ends when doing kTrimming. */
|
jpayne@68
|
777 private final boolean useShortKmers;
|
jpayne@68
|
778
|
jpayne@68
|
779 /** Fraction of kmers to skip, 0 to 16 out of 17 */
|
jpayne@68
|
780 private final int speed;
|
jpayne@68
|
781
|
jpayne@68
|
782 /*--------------------------------------------------------------*/
|
jpayne@68
|
783 /*---------------- Static Fields ----------------*/
|
jpayne@68
|
784 /*--------------------------------------------------------------*/
|
jpayne@68
|
785
|
jpayne@68
|
786 /** Default initial size of data structures */
|
jpayne@68
|
787 private static final int initialSizeDefault=128000;
|
jpayne@68
|
788
|
jpayne@68
|
789 /** Number of tables (and threads, during loading) */
|
jpayne@68
|
790 private static final int WAYS=7; //123
|
jpayne@68
|
791 /** Verbose messages */
|
jpayne@68
|
792 public static final boolean verbose=false; //123
|
jpayne@68
|
793
|
jpayne@68
|
794 /** Print messages to this stream */
|
jpayne@68
|
795 private static PrintStream outstream=System.err;
|
jpayne@68
|
796 /** Display progress messages such as memory usage */
|
jpayne@68
|
797 public static boolean DISPLAY_PROGRESS=true;
|
jpayne@68
|
798 /** Indicates end of input stream */
|
jpayne@68
|
799 private static final ArrayList<Read> POISON=new ArrayList<Read>(0);
|
jpayne@68
|
800 /** Make unambiguous copies of ref sequences with ambiguous bases */
|
jpayne@68
|
801 public static boolean REPLICATE_AMBIGUOUS=false;
|
jpayne@68
|
802
|
jpayne@68
|
803 /** x&clearMasks[i] will clear base i */
|
jpayne@68
|
804 private static final long[] clearMasks;
|
jpayne@68
|
805 /** x|setMasks[i][j] will set base i to j */
|
jpayne@68
|
806 private static final long[][] setMasks;
|
jpayne@68
|
807 /** x&leftMasks[i] will clear all bases to the right of i (exclusive) */
|
jpayne@68
|
808 private static final long[] leftMasks;
|
jpayne@68
|
809 /** x&rightMasks[i] will clear all bases to the left of i (inclusive) */
|
jpayne@68
|
810 private static final long[] rightMasks;
|
jpayne@68
|
811 /** x|kMasks[i] will set the bit to the left of the leftmost base */
|
jpayne@68
|
812 private static final long[] kMasks;
|
jpayne@68
|
813
|
jpayne@68
|
814 public static final int SET_IF_NOT_PRESENT=1, SET_ALWAYS=2, INCREMENT=3;
|
jpayne@68
|
815
|
jpayne@68
|
816 /*--------------------------------------------------------------*/
|
jpayne@68
|
817 /*---------------- Static Initializers ----------------*/
|
jpayne@68
|
818 /*--------------------------------------------------------------*/
|
jpayne@68
|
819
|
jpayne@68
|
820 static{
|
jpayne@68
|
821 clearMasks=new long[32];
|
jpayne@68
|
822 leftMasks=new long[32];
|
jpayne@68
|
823 rightMasks=new long[32];
|
jpayne@68
|
824 kMasks=new long[32];
|
jpayne@68
|
825 setMasks=new long[4][32];
|
jpayne@68
|
826 for(int i=0; i<32; i++){
|
jpayne@68
|
827 clearMasks[i]=~(3L<<(2*i));
|
jpayne@68
|
828 }
|
jpayne@68
|
829 for(int i=0; i<32; i++){
|
jpayne@68
|
830 leftMasks[i]=((-1L)<<(2*i));
|
jpayne@68
|
831 }
|
jpayne@68
|
832 for(int i=0; i<32; i++){
|
jpayne@68
|
833 rightMasks[i]=~((-1L)<<(2*i));
|
jpayne@68
|
834 }
|
jpayne@68
|
835 for(int i=0; i<32; i++){
|
jpayne@68
|
836 kMasks[i]=((1L)<<(2*i));
|
jpayne@68
|
837 }
|
jpayne@68
|
838 for(int i=0; i<32; i++){
|
jpayne@68
|
839 for(long j=0; j<4; j++){
|
jpayne@68
|
840 setMasks[(int)j][i]=(j<<(2*i));
|
jpayne@68
|
841 }
|
jpayne@68
|
842 }
|
jpayne@68
|
843 }
|
jpayne@68
|
844
|
jpayne@68
|
845 }
|