jpayne@68
|
1 package kmer;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Arrays;
|
jpayne@68
|
6 import java.util.BitSet;
|
jpayne@68
|
7 import java.util.concurrent.atomic.AtomicLong;
|
jpayne@68
|
8
|
jpayne@68
|
9 import assemble.Contig;
|
jpayne@68
|
10 import bloom.KmerCountAbstract;
|
jpayne@68
|
11 import dna.AminoAcid;
|
jpayne@68
|
12 import fileIO.ByteStreamWriter;
|
jpayne@68
|
13 import fileIO.FileFormat;
|
jpayne@68
|
14 import fileIO.ReadWrite;
|
jpayne@68
|
15 import jgi.BBMerge;
|
jpayne@68
|
16 import shared.Parse;
|
jpayne@68
|
17 import shared.Parser;
|
jpayne@68
|
18 import shared.PreParser;
|
jpayne@68
|
19 import shared.Primes;
|
jpayne@68
|
20 import shared.ReadStats;
|
jpayne@68
|
21 import shared.Shared;
|
jpayne@68
|
22 import shared.Timer;
|
jpayne@68
|
23 import shared.Tools;
|
jpayne@68
|
24 import shared.TrimRead;
|
jpayne@68
|
25 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
26 import stream.FastaReadInputStream;
|
jpayne@68
|
27 import stream.Read;
|
jpayne@68
|
28 import structures.ByteBuilder;
|
jpayne@68
|
29 import structures.IntList;
|
jpayne@68
|
30 import structures.ListNum;
|
jpayne@68
|
31 import structures.LongList;
|
jpayne@68
|
32 import ukmer.Kmer;
|
jpayne@68
|
33
|
jpayne@68
|
34
|
jpayne@68
|
35 /**
|
jpayne@68
|
36 * Loads and holds kmers for Tadpole
|
jpayne@68
|
37 * @author Brian Bushnell
|
jpayne@68
|
38 * @date Jun 22, 2015
|
jpayne@68
|
39 *
|
jpayne@68
|
40 */
|
jpayne@68
|
41 public class KmerTableSet extends AbstractKmerTableSet {
|
jpayne@68
|
42
|
jpayne@68
|
43 /**
|
jpayne@68
|
44 * Code entrance from the command line.
|
jpayne@68
|
45 * @param args Command line arguments
|
jpayne@68
|
46 */
|
jpayne@68
|
47 public static void main(String[] args){
|
jpayne@68
|
48 Timer t=new Timer(), t2=new Timer();
|
jpayne@68
|
49 t.start();
|
jpayne@68
|
50 t2.start();
|
jpayne@68
|
51 KmerTableSet set=new KmerTableSet(args);
|
jpayne@68
|
52 t2.stop();
|
jpayne@68
|
53 outstream.println("Initialization Time: \t"+t2);
|
jpayne@68
|
54
|
jpayne@68
|
55 ///And run it
|
jpayne@68
|
56 set.process(t);
|
jpayne@68
|
57
|
jpayne@68
|
58 //Close the print stream if it was redirected
|
jpayne@68
|
59 Shared.closeStream(outstream);
|
jpayne@68
|
60 }
|
jpayne@68
|
61
|
jpayne@68
|
62
|
jpayne@68
|
63 /**
|
jpayne@68
|
64 * Constructor.
|
jpayne@68
|
65 * @param args Command line arguments
|
jpayne@68
|
66 */
|
jpayne@68
|
67 private KmerTableSet(String[] args){
|
jpayne@68
|
68 this(args, 12);//+5 if using ownership and building contigs
|
jpayne@68
|
69 }
|
jpayne@68
|
70
|
jpayne@68
|
71
|
jpayne@68
|
72 /**
|
jpayne@68
|
73 * Constructor.
|
jpayne@68
|
74 * @param args Command line arguments
|
jpayne@68
|
75 */
|
jpayne@68
|
76 public KmerTableSet(String[] args, final int bytesPerKmer_){
|
jpayne@68
|
77 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
78 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
79 args=pp.args;
|
jpayne@68
|
80 outstream=pp.outstream;
|
jpayne@68
|
81 }//TODO - no easy way to close outstream
|
jpayne@68
|
82
|
jpayne@68
|
83 /* Initialize local variables with defaults */
|
jpayne@68
|
84 Parser parser=new Parser();
|
jpayne@68
|
85 boolean prealloc_=false;
|
jpayne@68
|
86 int k_=31;
|
jpayne@68
|
87 int ways_=-1;
|
jpayne@68
|
88 int filterMax_=2;
|
jpayne@68
|
89 boolean ecco_=false, merge_=false;
|
jpayne@68
|
90 boolean rcomp_=true;
|
jpayne@68
|
91 double minProb_=defaultMinprob;
|
jpayne@68
|
92 int tableType_=AbstractKmerTable.ARRAY1D;
|
jpayne@68
|
93 /* Parse arguments */
|
jpayne@68
|
94 for(int i=0; i<args.length; i++){
|
jpayne@68
|
95
|
jpayne@68
|
96 final String arg=args[i];
|
jpayne@68
|
97 String[] split=arg.split("=");
|
jpayne@68
|
98 String a=split[0].toLowerCase();
|
jpayne@68
|
99 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
100
|
jpayne@68
|
101 if(Parser.parseCommonStatic(arg, a, b)){
|
jpayne@68
|
102 //do nothing
|
jpayne@68
|
103 }else if(Parser.parseZip(arg, a, b)){
|
jpayne@68
|
104 //do nothing
|
jpayne@68
|
105 }else if(Parser.parseQuality(arg, a, b)){
|
jpayne@68
|
106 //do nothing
|
jpayne@68
|
107 }else if(Parser.parseFasta(arg, a, b)){
|
jpayne@68
|
108 //do nothing
|
jpayne@68
|
109 }else if(parser.parseInterleaved(arg, a, b)){
|
jpayne@68
|
110 //do nothing
|
jpayne@68
|
111 }else if(parser.parseTrim(arg, a, b)){
|
jpayne@68
|
112 //do nothing
|
jpayne@68
|
113 }else if(a.equals("in") || a.equals("in1")){
|
jpayne@68
|
114 in1.clear();
|
jpayne@68
|
115 if(b!=null){
|
jpayne@68
|
116 String[] s=b.split(",");
|
jpayne@68
|
117 for(String ss : s){
|
jpayne@68
|
118 in1.add(ss);
|
jpayne@68
|
119 }
|
jpayne@68
|
120 }
|
jpayne@68
|
121 }else if(a.equals("in2")){
|
jpayne@68
|
122 in2.clear();
|
jpayne@68
|
123 if(b!=null){
|
jpayne@68
|
124 String[] s=b.split(",");
|
jpayne@68
|
125 for(String ss : s){
|
jpayne@68
|
126 in2.add(ss);
|
jpayne@68
|
127 }
|
jpayne@68
|
128 }
|
jpayne@68
|
129 }else if(a.equals("extra")){
|
jpayne@68
|
130 extra.clear();
|
jpayne@68
|
131 if(b!=null){
|
jpayne@68
|
132 String[] s=b.split(",");
|
jpayne@68
|
133 for(String ss : s){
|
jpayne@68
|
134 extra.add(ss);
|
jpayne@68
|
135 }
|
jpayne@68
|
136 }
|
jpayne@68
|
137 }else if(a.equals("append") || a.equals("app")){
|
jpayne@68
|
138 append=ReadStats.append=Parse.parseBoolean(b);
|
jpayne@68
|
139 }else if(a.equals("overwrite") || a.equals("ow")){
|
jpayne@68
|
140 overwrite=Parse.parseBoolean(b);
|
jpayne@68
|
141 }else if(a.equals("initialsize")){
|
jpayne@68
|
142 initialSize=Parse.parseIntKMG(b);
|
jpayne@68
|
143 }else if(a.equals("showstats") || a.equals("stats")){
|
jpayne@68
|
144 showStats=Parse.parseBoolean(b);
|
jpayne@68
|
145 }else if(a.equals("ways")){
|
jpayne@68
|
146 ways_=Parse.parseIntKMG(b);
|
jpayne@68
|
147 }else if(a.equals("buflen") || a.equals("bufflen") || a.equals("bufferlength")){
|
jpayne@68
|
148 buflen=Parse.parseIntKMG(b);
|
jpayne@68
|
149 }else if(a.equals("k")){
|
jpayne@68
|
150 assert(b!=null) : "\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n";
|
jpayne@68
|
151 k_=Parse.parseIntKMG(b);
|
jpayne@68
|
152 }else if(a.equals("threads") || a.equals("t")){
|
jpayne@68
|
153 THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b));
|
jpayne@68
|
154 }else if(a.equals("showspeed") || a.equals("ss")){
|
jpayne@68
|
155 showSpeed=Parse.parseBoolean(b);
|
jpayne@68
|
156 }else if(a.equals("ecco")){
|
jpayne@68
|
157 ecco_=Parse.parseBoolean(b);
|
jpayne@68
|
158 }else if(a.equals("merge")){
|
jpayne@68
|
159 merge_=Parse.parseBoolean(b);
|
jpayne@68
|
160 }else if(a.equals("verbose")){
|
jpayne@68
|
161 // assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
|
jpayne@68
|
162 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
163 }else if(a.equals("verbose2")){
|
jpayne@68
|
164 // assert(false) : "Verbose flag is currently static final; must be recompiled to change.";
|
jpayne@68
|
165 verbose2=Parse.parseBoolean(b);
|
jpayne@68
|
166 }else if(a.equals("minprob")){
|
jpayne@68
|
167 minProb_=Double.parseDouble(b);
|
jpayne@68
|
168 }else if(a.equals("minprobprefilter") || a.equals("mpp")){
|
jpayne@68
|
169 minProbPrefilter=Parse.parseBoolean(b);
|
jpayne@68
|
170 }else if(a.equals("minprobmain") || a.equals("mpm")){
|
jpayne@68
|
171 minProbMain=Parse.parseBoolean(b);
|
jpayne@68
|
172 }else if(a.equals("reads") || a.startsWith("maxreads")){
|
jpayne@68
|
173 maxReads=Parse.parseKMG(b);
|
jpayne@68
|
174 }else if(a.equals("prealloc") || a.equals("preallocate")){
|
jpayne@68
|
175 if(b==null || b.length()<1 || Character.isLetter(b.charAt(0))){
|
jpayne@68
|
176 prealloc_=Parse.parseBoolean(b);
|
jpayne@68
|
177 }else{
|
jpayne@68
|
178 preallocFraction=Tools.max(0, Double.parseDouble(b));
|
jpayne@68
|
179 prealloc_=(preallocFraction>0);
|
jpayne@68
|
180 }
|
jpayne@68
|
181 }else if(a.equals("prefilter")){
|
jpayne@68
|
182 if(b==null || b.length()<1 || !Tools.isDigit(b.charAt(0))){
|
jpayne@68
|
183 prefilter=Parse.parseBoolean(b);
|
jpayne@68
|
184 }else{
|
jpayne@68
|
185 filterMax_=Parse.parseIntKMG(b);
|
jpayne@68
|
186 prefilter=filterMax_>0;
|
jpayne@68
|
187 }
|
jpayne@68
|
188 }else if(a.equals("prefiltersize") || a.equals("prefilterfraction") || a.equals("pff")){
|
jpayne@68
|
189 prefilterFraction=Tools.max(0, Double.parseDouble(b));
|
jpayne@68
|
190 assert(prefilterFraction<=1) : "prefiltersize must be 0-1, a fraction of total memory.";
|
jpayne@68
|
191 prefilter=prefilterFraction>0;
|
jpayne@68
|
192 }else if(a.equals("prehashes") || a.equals("hashes")){
|
jpayne@68
|
193 prehashes=Parse.parseIntKMG(b);
|
jpayne@68
|
194 }else if(a.equals("prefilterpasses") || a.equals("prepasses")){
|
jpayne@68
|
195 assert(b!=null) : "Bad parameter: "+arg;
|
jpayne@68
|
196 if(b.equalsIgnoreCase("auto")){
|
jpayne@68
|
197 prepasses=-1;
|
jpayne@68
|
198 }else{
|
jpayne@68
|
199 prepasses=Parse.parseIntKMG(b);
|
jpayne@68
|
200 }
|
jpayne@68
|
201 }else if(a.equals("onepass")){
|
jpayne@68
|
202 onePass=Parse.parseBoolean(b);
|
jpayne@68
|
203 }else if(a.equals("passes")){
|
jpayne@68
|
204 int passes=Parse.parseIntKMG(b);
|
jpayne@68
|
205 onePass=(passes<2);
|
jpayne@68
|
206 }else if(a.equals("rcomp")){
|
jpayne@68
|
207 rcomp_=Parse.parseBoolean(b);
|
jpayne@68
|
208 }else if(a.equals("tabletype")){
|
jpayne@68
|
209 tableType_=Integer.parseInt(b);
|
jpayne@68
|
210 }
|
jpayne@68
|
211
|
jpayne@68
|
212 else if(a.equalsIgnoreCase("filterMemoryOverride") || a.equalsIgnoreCase("filterMemory") ||
|
jpayne@68
|
213 a.equalsIgnoreCase("prefilterMemory") || a.equalsIgnoreCase("filtermem")){
|
jpayne@68
|
214 filterMemoryOverride=Parse.parseKMG(b);
|
jpayne@68
|
215 }
|
jpayne@68
|
216
|
jpayne@68
|
217 else if(IGNORE_UNKNOWN_ARGS){
|
jpayne@68
|
218 //Do nothing
|
jpayne@68
|
219 }else{
|
jpayne@68
|
220 throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
221 }
|
jpayne@68
|
222 }
|
jpayne@68
|
223
|
jpayne@68
|
224 {//Process parser fields
|
jpayne@68
|
225 Parser.processQuality();
|
jpayne@68
|
226
|
jpayne@68
|
227 qtrimLeft=parser.qtrimLeft;
|
jpayne@68
|
228 qtrimRight=parser.qtrimRight;
|
jpayne@68
|
229 trimq=parser.trimq;
|
jpayne@68
|
230 trimE=parser.trimE();
|
jpayne@68
|
231
|
jpayne@68
|
232 minAvgQuality=parser.minAvgQuality;
|
jpayne@68
|
233 minAvgQualityBases=parser.minAvgQualityBases;
|
jpayne@68
|
234 amino=Shared.AMINO_IN;
|
jpayne@68
|
235 if(amino){k_=Tools.min(k_, 12);}
|
jpayne@68
|
236 }
|
jpayne@68
|
237
|
jpayne@68
|
238 if(prepasses==0 || !prefilter){
|
jpayne@68
|
239 prepasses=0;
|
jpayne@68
|
240 prefilter=false;
|
jpayne@68
|
241 }
|
jpayne@68
|
242
|
jpayne@68
|
243
|
jpayne@68
|
244 // assert(false) : prepasses+", "+onePass+", "+prefilter;
|
jpayne@68
|
245
|
jpayne@68
|
246 {
|
jpayne@68
|
247 long memory=Runtime.getRuntime().maxMemory();
|
jpayne@68
|
248 double xmsRatio=Shared.xmsRatio();
|
jpayne@68
|
249 // long tmemory=Runtime.getRuntime().totalMemory();
|
jpayne@68
|
250 usableMemory=(long)Tools.max(((memory-96000000)*(xmsRatio>0.97 ? 0.82 : 0.72)), memory*0.45);
|
jpayne@68
|
251 if(prepasses==0 || !prefilter){
|
jpayne@68
|
252 filterMemory0=filterMemory1=0;
|
jpayne@68
|
253 }else if(filterMemoryOverride>0){
|
jpayne@68
|
254 filterMemory0=filterMemory1=filterMemoryOverride;
|
jpayne@68
|
255 }else{
|
jpayne@68
|
256 double low=Tools.min(prefilterFraction, 1-prefilterFraction);
|
jpayne@68
|
257 double high=1-low;
|
jpayne@68
|
258 if(prepasses<0 || (prepasses&1)==1){//odd passes
|
jpayne@68
|
259 filterMemory0=(long)(usableMemory*low);
|
jpayne@68
|
260 filterMemory1=(long)(usableMemory*high);
|
jpayne@68
|
261 }else{//even passes
|
jpayne@68
|
262 filterMemory0=(long)(usableMemory*high);
|
jpayne@68
|
263 filterMemory1=(long)(usableMemory*low);
|
jpayne@68
|
264 }
|
jpayne@68
|
265 }
|
jpayne@68
|
266 tableMemory=(long)(usableMemory*.95-filterMemory0);
|
jpayne@68
|
267 }
|
jpayne@68
|
268
|
jpayne@68
|
269 tableType=tableType_;
|
jpayne@68
|
270 prealloc=prealloc_;
|
jpayne@68
|
271 bytesPerKmer=bytesPerKmer_;
|
jpayne@68
|
272 if(ways_<1){
|
jpayne@68
|
273 long maxKmers=(2*tableMemory)/bytesPerKmer;
|
jpayne@68
|
274 long minWays=Tools.min(10000, maxKmers/Integer.MAX_VALUE);
|
jpayne@68
|
275 ways_=(int)Tools.max(31, (int)(Tools.min(96, Shared.threads())*2.5), minWays);
|
jpayne@68
|
276 ways_=(int)Primes.primeAtLeast(ways_);
|
jpayne@68
|
277 assert(ways_>0);
|
jpayne@68
|
278 // System.err.println("ways="+ways_);
|
jpayne@68
|
279 }
|
jpayne@68
|
280
|
jpayne@68
|
281 /* Set final variables; post-process and validate argument combinations */
|
jpayne@68
|
282
|
jpayne@68
|
283 onePass=onePass&prefilter;
|
jpayne@68
|
284 ways=ways_;
|
jpayne@68
|
285 filterMax=Tools.min(filterMax_, 0x7FFFFFFF);
|
jpayne@68
|
286 ecco=ecco_;
|
jpayne@68
|
287 merge=merge_;
|
jpayne@68
|
288 minProb=(float)minProb_;
|
jpayne@68
|
289 rcomp=rcomp_;
|
jpayne@68
|
290 // assert(false) : tableMemory+", "+bytesPerKmer+", "+prealloc+", "+preallocFraction;
|
jpayne@68
|
291 estimatedKmerCapacity=(long)((tableMemory*1.0/bytesPerKmer)*((prealloc ? preallocFraction : 0.81))*(HashArray.maxLoadFactorFinal*0.97));
|
jpayne@68
|
292
|
jpayne@68
|
293 // System.err.println("tableMemory="+tableMemory+", bytesPerKmer="+bytesPerKmer+", estimatedKmerCapacity="+estimatedKmerCapacity);
|
jpayne@68
|
294 KmerCountAbstract.minProb=(minProbPrefilter ? minProb : 0);
|
jpayne@68
|
295 k=k_;
|
jpayne@68
|
296 k2=k-1;
|
jpayne@68
|
297 int bitsPerBase=(amino ? 5 : 2);
|
jpayne@68
|
298 int mask=(amino ? 31 : 3);
|
jpayne@68
|
299 coreMask=(AbstractKmerTableSet.MASK_CORE ? ~(((-1L)<<(bitsPerBase*(k_-1)))|mask) : -1L);
|
jpayne@68
|
300
|
jpayne@68
|
301 if(k<1 || k>31){throw new RuntimeException("\nk needs an integer value from 1 to 31, such as k=27. Default is 31.\n");}
|
jpayne@68
|
302
|
jpayne@68
|
303 if(initialSize<1){
|
jpayne@68
|
304 final long memOverWays=tableMemory/(bytesPerKmer*ways);
|
jpayne@68
|
305 final double mem2=(prealloc ? preallocFraction : 1)*tableMemory;
|
jpayne@68
|
306 initialSize=(prealloc || memOverWays<initialSizeDefault ? (int)Tools.min(2142000000, (long)(mem2/(bytesPerKmer*ways))) : initialSizeDefault);
|
jpayne@68
|
307 if(initialSize!=initialSizeDefault){
|
jpayne@68
|
308 System.err.println("Initial size set to "+initialSize);
|
jpayne@68
|
309 }
|
jpayne@68
|
310 }
|
jpayne@68
|
311
|
jpayne@68
|
312 /* Adjust I/O settings and filenames */
|
jpayne@68
|
313
|
jpayne@68
|
314 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
315
|
jpayne@68
|
316 if(in1.isEmpty()){
|
jpayne@68
|
317 //throw new RuntimeException("Error - at least one input file is required.");
|
jpayne@68
|
318 }
|
jpayne@68
|
319
|
jpayne@68
|
320 for(int i=0; i<in1.size(); i++){
|
jpayne@68
|
321 String s=in1.get(i);
|
jpayne@68
|
322 if(s!=null && s.contains("#") && !new File(s).exists()){
|
jpayne@68
|
323 int pound=s.lastIndexOf('#');
|
jpayne@68
|
324 String a=s.substring(0, pound);
|
jpayne@68
|
325 String b=s.substring(pound+1);
|
jpayne@68
|
326 in1.set(i, a+1+b);
|
jpayne@68
|
327 in2.add(a+2+b);
|
jpayne@68
|
328 }
|
jpayne@68
|
329 }
|
jpayne@68
|
330
|
jpayne@68
|
331 if(!extra.isEmpty()){
|
jpayne@68
|
332 ArrayList<String> temp=(ArrayList<String>) extra.clone();
|
jpayne@68
|
333 extra.clear();
|
jpayne@68
|
334 for(String s : temp){
|
jpayne@68
|
335 if(s!=null && s.contains("#") && !new File(s).exists()){
|
jpayne@68
|
336 int pound=s.lastIndexOf('#');
|
jpayne@68
|
337 String a=s.substring(0, pound);
|
jpayne@68
|
338 String b=s.substring(pound+1);
|
jpayne@68
|
339 extra.add(a);
|
jpayne@68
|
340 extra.add(b);
|
jpayne@68
|
341 }else{
|
jpayne@68
|
342 extra.add(s);
|
jpayne@68
|
343 }
|
jpayne@68
|
344 }
|
jpayne@68
|
345 }
|
jpayne@68
|
346
|
jpayne@68
|
347 {
|
jpayne@68
|
348 boolean allowDuplicates=true;
|
jpayne@68
|
349 if(!Tools.testInputFiles(allowDuplicates, true, in1, in2, extra)){
|
jpayne@68
|
350 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
351 }
|
jpayne@68
|
352 }
|
jpayne@68
|
353 assert(THREADS>0);
|
jpayne@68
|
354
|
jpayne@68
|
355 if(DISPLAY_PROGRESS){
|
jpayne@68
|
356 // assert(false);
|
jpayne@68
|
357 outstream.println("Initial:");
|
jpayne@68
|
358 outstream.println("Ways="+ways+", initialSize="+initialSize+", prefilter="+(prefilter ? "t" : "f")+", prealloc="+(prealloc ? (""+preallocFraction) : "f"));
|
jpayne@68
|
359 Shared.printMemory();
|
jpayne@68
|
360 outstream.println();
|
jpayne@68
|
361 }
|
jpayne@68
|
362 }
|
jpayne@68
|
363
|
jpayne@68
|
364 /*--------------------------------------------------------------*/
|
jpayne@68
|
365 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
366 /*--------------------------------------------------------------*/
|
jpayne@68
|
367
|
jpayne@68
|
368 @Override
|
jpayne@68
|
369 public void clear(){
|
jpayne@68
|
370 tables=null;
|
jpayne@68
|
371 }
|
jpayne@68
|
372
|
jpayne@68
|
373 /*--------------------------------------------------------------*/
|
jpayne@68
|
374 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
375 /*--------------------------------------------------------------*/
|
jpayne@68
|
376
|
jpayne@68
|
377 @Override
|
jpayne@68
|
378 public void allocateTables(){
|
jpayne@68
|
379 assert(tables==null);
|
jpayne@68
|
380 tables=null;
|
jpayne@68
|
381 final long coreMask=(MASK_CORE ? ~(((-1L)<<(2*(k-1)))|3) : -1L);
|
jpayne@68
|
382
|
jpayne@68
|
383 ScheduleMaker scheduleMaker=new ScheduleMaker(ways, bytesPerKmer, prealloc,
|
jpayne@68
|
384 (prealloc ? preallocFraction : 1.0), -1, (prefilter ? prepasses : 0), prefilterFraction, filterMemoryOverride);
|
jpayne@68
|
385 int[] schedule=scheduleMaker.makeSchedule();
|
jpayne@68
|
386 //assert(false) : preallocFraction+", "+ways+", "+Arrays.toString(schedule);
|
jpayne@68
|
387 // System.err.println("DEBUG "+preallocFraction+", "+ways+", "+Arrays.toString(schedule));
|
jpayne@68
|
388 tables=AbstractKmerTable.preallocate(ways, tableType, schedule, coreMask);
|
jpayne@68
|
389
|
jpayne@68
|
390 // tables=AbstractKmerTable.preallocate(ways, tableType, initialSize, coreMask, (!prealloc || preallocFraction<1));
|
jpayne@68
|
391 }
|
jpayne@68
|
392
|
jpayne@68
|
393 /**
|
jpayne@68
|
394 * Load reads into tables, using multiple LoadThread.
|
jpayne@68
|
395 */
|
jpayne@68
|
396 @Override
|
jpayne@68
|
397 public long loadKmers(String fname1, String fname2){
|
jpayne@68
|
398
|
jpayne@68
|
399 /* Create read input stream */
|
jpayne@68
|
400 final ConcurrentReadInputStream cris;
|
jpayne@68
|
401 {
|
jpayne@68
|
402 FileFormat ff1=FileFormat.testInput(fname1, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
403 FileFormat ff2=FileFormat.testInput(fname2, FileFormat.FASTQ, null, true, true);
|
jpayne@68
|
404 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, false, ff1, ff2);
|
jpayne@68
|
405 cris.start(); //4567
|
jpayne@68
|
406 }
|
jpayne@68
|
407
|
jpayne@68
|
408 // /* Optionally skip the first reads, since initial reads may have lower quality */
|
jpayne@68
|
409 // if(skipreads>0){
|
jpayne@68
|
410 // long skipped=0;
|
jpayne@68
|
411 //
|
jpayne@68
|
412 // ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
413 // ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
414 //
|
jpayne@68
|
415 // while(skipped<skipreads && reads!=null && reads.size()>0){
|
jpayne@68
|
416 // skipped+=reads.size();
|
jpayne@68
|
417 //
|
jpayne@68
|
418 // cris.returnList(ln);
|
jpayne@68
|
419 // ln=cris.nextList();
|
jpayne@68
|
420 // reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
421 // }
|
jpayne@68
|
422 // cris.returnList(ln);
|
jpayne@68
|
423 // if(reads==null || reads.isEmpty()){
|
jpayne@68
|
424 // ReadWrite.closeStreams(cris);
|
jpayne@68
|
425 // System.err.println("Skipped all of the reads.");
|
jpayne@68
|
426 // System.exit(0);
|
jpayne@68
|
427 // }
|
jpayne@68
|
428 // }
|
jpayne@68
|
429
|
jpayne@68
|
430 /* Create ProcessThreads */
|
jpayne@68
|
431 ArrayList<LoadThread> alpt=new ArrayList<LoadThread>(THREADS);
|
jpayne@68
|
432 for(int i=0; i<THREADS; i++){alpt.add(new LoadThread(cris));}
|
jpayne@68
|
433 for(LoadThread pt : alpt){pt.start();}
|
jpayne@68
|
434
|
jpayne@68
|
435 long added=0;
|
jpayne@68
|
436
|
jpayne@68
|
437 /* Wait for threads to die, and gather statistics */
|
jpayne@68
|
438 for(LoadThread pt : alpt){
|
jpayne@68
|
439 while(pt.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
440 try {
|
jpayne@68
|
441 pt.join();
|
jpayne@68
|
442 } catch (InterruptedException e) {
|
jpayne@68
|
443 // TODO Auto-generated catch block
|
jpayne@68
|
444 e.printStackTrace();
|
jpayne@68
|
445 }
|
jpayne@68
|
446 }
|
jpayne@68
|
447 added+=pt.added;
|
jpayne@68
|
448
|
jpayne@68
|
449 readsIn+=pt.readsInT;
|
jpayne@68
|
450 basesIn+=pt.basesInT;
|
jpayne@68
|
451 lowqReads+=pt.lowqReadsT;
|
jpayne@68
|
452 lowqBases+=pt.lowqBasesT;
|
jpayne@68
|
453 readsTrimmed+=pt.readsTrimmedT;
|
jpayne@68
|
454 basesTrimmed+=pt.basesTrimmedT;
|
jpayne@68
|
455 kmersIn+=pt.kmersInT;
|
jpayne@68
|
456 }
|
jpayne@68
|
457
|
jpayne@68
|
458 /* Shut down I/O streams; capture error status */
|
jpayne@68
|
459 errorState|=ReadWrite.closeStreams(cris);
|
jpayne@68
|
460 return added;
|
jpayne@68
|
461 }
|
jpayne@68
|
462
|
jpayne@68
|
463 /*--------------------------------------------------------------*/
|
jpayne@68
|
464 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
465 /*--------------------------------------------------------------*/
|
jpayne@68
|
466
|
jpayne@68
|
467 /**
|
jpayne@68
|
468 * Loads kmers.
|
jpayne@68
|
469 */
|
jpayne@68
|
470 private class LoadThread extends Thread{
|
jpayne@68
|
471
|
jpayne@68
|
472 /**
|
jpayne@68
|
473 * Constructor
|
jpayne@68
|
474 * @param cris_ Read input stream
|
jpayne@68
|
475 */
|
jpayne@68
|
476 public LoadThread(ConcurrentReadInputStream cris_){
|
jpayne@68
|
477 cris=cris_;
|
jpayne@68
|
478 table=new HashBuffer(tables, buflen, k, false, true);
|
jpayne@68
|
479 }
|
jpayne@68
|
480
|
jpayne@68
|
481 @Override
|
jpayne@68
|
482 public void run(){
|
jpayne@68
|
483 ListNum<Read> ln=cris.nextList();
|
jpayne@68
|
484 ArrayList<Read> reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
485
|
jpayne@68
|
486 //While there are more reads lists...
|
jpayne@68
|
487 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
|
jpayne@68
|
488
|
jpayne@68
|
489 //For each read (or pair) in the list...
|
jpayne@68
|
490 for(int i=0; i<reads.size(); i++){
|
jpayne@68
|
491 Read r1=reads.get(i);
|
jpayne@68
|
492 Read r2=r1.mate;
|
jpayne@68
|
493
|
jpayne@68
|
494 if(!r1.validated()){r1.validate(true);}
|
jpayne@68
|
495 if(r2!=null && !r2.validated()){r2.validate(true);}
|
jpayne@68
|
496
|
jpayne@68
|
497 if(verbose){System.err.println("Considering read "+r1.id+" "+new String(r1.bases));}
|
jpayne@68
|
498
|
jpayne@68
|
499 readsInT++;
|
jpayne@68
|
500 basesInT+=r1.length();
|
jpayne@68
|
501 if(r2!=null){
|
jpayne@68
|
502 readsInT++;
|
jpayne@68
|
503 basesInT+=r2.length();
|
jpayne@68
|
504 }
|
jpayne@68
|
505
|
jpayne@68
|
506 if(maxNs<Integer.MAX_VALUE){
|
jpayne@68
|
507 if(r1!=null && r1.countUndefined()>maxNs){r1.setDiscarded(true);}
|
jpayne@68
|
508 if(r2!=null && r2.countUndefined()>maxNs){r2.setDiscarded(true);}
|
jpayne@68
|
509 }
|
jpayne@68
|
510
|
jpayne@68
|
511 //Determine whether to discard the reads based on average quality
|
jpayne@68
|
512 if(minAvgQuality>0){
|
jpayne@68
|
513 if(r1!=null && r1.quality!=null && r1.avgQuality(false, minAvgQualityBases)<minAvgQuality){r1.setDiscarded(true);}
|
jpayne@68
|
514 if(r2!=null && r2.quality!=null && r2.avgQuality(false, minAvgQualityBases)<minAvgQuality){r2.setDiscarded(true);}
|
jpayne@68
|
515 }
|
jpayne@68
|
516
|
jpayne@68
|
517 if(r1!=null){
|
jpayne@68
|
518 if(qtrimLeft || qtrimRight){
|
jpayne@68
|
519 int x=TrimRead.trimFast(r1, qtrimLeft, qtrimRight, trimq, trimE, 1, false);
|
jpayne@68
|
520 basesTrimmedT+=x;
|
jpayne@68
|
521 readsTrimmedT+=(x>0 ? 1 : 0);
|
jpayne@68
|
522 }
|
jpayne@68
|
523 if(r1.length()<k){r1.setDiscarded(true);}
|
jpayne@68
|
524 }
|
jpayne@68
|
525 if(r2!=null){
|
jpayne@68
|
526 if(qtrimLeft || qtrimRight){
|
jpayne@68
|
527 int x=TrimRead.trimFast(r2, qtrimLeft, qtrimRight, trimq, trimE, 1, false);
|
jpayne@68
|
528 basesTrimmedT+=x;
|
jpayne@68
|
529 readsTrimmedT+=(x>0 ? 1 : 0);
|
jpayne@68
|
530 }
|
jpayne@68
|
531 if(r2.length()<k){r2.setDiscarded(true);}
|
jpayne@68
|
532 }
|
jpayne@68
|
533
|
jpayne@68
|
534 if((ecco || merge) && r1!=null && r2!=null && !r1.discarded() && !r2.discarded()){
|
jpayne@68
|
535 if(merge){
|
jpayne@68
|
536 final int insert=BBMerge.findOverlapStrict(r1, r2, false);
|
jpayne@68
|
537 if(insert>0){
|
jpayne@68
|
538 r2.reverseComplement();
|
jpayne@68
|
539 r1=r1.joinRead(insert);
|
jpayne@68
|
540 r2=null;
|
jpayne@68
|
541 }
|
jpayne@68
|
542 }else if(ecco){
|
jpayne@68
|
543 BBMerge.findOverlapStrict(r1, r2, true);
|
jpayne@68
|
544 }
|
jpayne@68
|
545 }
|
jpayne@68
|
546
|
jpayne@68
|
547 if(minLen>0){
|
jpayne@68
|
548 if(r1!=null && r1.length()<minLen){r1.setDiscarded(true);}
|
jpayne@68
|
549 if(r2!=null && r2.length()<minLen){r2.setDiscarded(true);}
|
jpayne@68
|
550 }
|
jpayne@68
|
551
|
jpayne@68
|
552 if(r1!=null){
|
jpayne@68
|
553 if(r1.discarded()){
|
jpayne@68
|
554 lowqBasesT+=r1.length();
|
jpayne@68
|
555 lowqReadsT++;
|
jpayne@68
|
556 }else{
|
jpayne@68
|
557 long temp=addKmersToTable(r1);
|
jpayne@68
|
558 added+=temp;
|
jpayne@68
|
559 if(verbose){System.err.println("A: Added "+temp);}
|
jpayne@68
|
560 }
|
jpayne@68
|
561 }
|
jpayne@68
|
562 if(r2!=null){
|
jpayne@68
|
563 if(r2.discarded()){
|
jpayne@68
|
564 lowqBasesT+=r2.length();
|
jpayne@68
|
565 lowqReadsT++;
|
jpayne@68
|
566 }else{
|
jpayne@68
|
567 long temp=addKmersToTable(r2);
|
jpayne@68
|
568 added+=temp;
|
jpayne@68
|
569 if(verbose){System.err.println("B: Added "+temp);}
|
jpayne@68
|
570 }
|
jpayne@68
|
571 }
|
jpayne@68
|
572 }
|
jpayne@68
|
573
|
jpayne@68
|
574 //Fetch a new read list
|
jpayne@68
|
575 cris.returnList(ln);
|
jpayne@68
|
576 ln=cris.nextList();
|
jpayne@68
|
577 reads=(ln!=null ? ln.list : null);
|
jpayne@68
|
578 }
|
jpayne@68
|
579 cris.returnList(ln);
|
jpayne@68
|
580 long temp=table.flush();
|
jpayne@68
|
581 if(verbose){System.err.println("Flush: Added "+temp);}
|
jpayne@68
|
582 added+=temp;
|
jpayne@68
|
583 }
|
jpayne@68
|
584
|
jpayne@68
|
585 private final int addKmersToTableAA(final Read r){
|
jpayne@68
|
586 if(r==null || r.bases==null){return 0;}
|
jpayne@68
|
587 final float minProb2=(minProbMain ? minProb : 0);
|
jpayne@68
|
588 final byte[] bases=r.bases;
|
jpayne@68
|
589 final byte[] quals=r.quality;
|
jpayne@68
|
590 final int shift=5*k;
|
jpayne@68
|
591 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
592 long kmer=0;
|
jpayne@68
|
593 int created=0;
|
jpayne@68
|
594 int len=0;
|
jpayne@68
|
595
|
jpayne@68
|
596 if(bases==null || bases.length<k){return -1;}
|
jpayne@68
|
597
|
jpayne@68
|
598 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
599 float prob=1;
|
jpayne@68
|
600 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
601 final byte b=bases[i];
|
jpayne@68
|
602 final long x=AminoAcid.acidToNumber[b];
|
jpayne@68
|
603
|
jpayne@68
|
604 //Update kmers
|
jpayne@68
|
605 kmer=((kmer<<5)|x)&mask;
|
jpayne@68
|
606
|
jpayne@68
|
607 if(minProb2>0 && quals!=null){//Update probability
|
jpayne@68
|
608 prob=prob*PROB_CORRECT[quals[i]];
|
jpayne@68
|
609 if(len>k){
|
jpayne@68
|
610 byte oldq=quals[i-k];
|
jpayne@68
|
611 prob=prob*PROB_CORRECT_INVERSE[oldq];
|
jpayne@68
|
612 }
|
jpayne@68
|
613 }
|
jpayne@68
|
614
|
jpayne@68
|
615 //Handle Ns
|
jpayne@68
|
616 if(x<0){
|
jpayne@68
|
617 len=0;
|
jpayne@68
|
618 kmer=0;
|
jpayne@68
|
619 prob=1;
|
jpayne@68
|
620 }else{len++;}
|
jpayne@68
|
621
|
jpayne@68
|
622 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
623 if(len>=k && prob>=minProb2){
|
jpayne@68
|
624 kmersInT++;
|
jpayne@68
|
625 final long key=kmer;
|
jpayne@68
|
626 if(!prefilter || prefilterArray.read(key)>filterMax2){
|
jpayne@68
|
627 int temp=table.incrementAndReturnNumCreated(key, 1);
|
jpayne@68
|
628 created+=temp;
|
jpayne@68
|
629 if(verbose){System.err.println("C: Added "+temp);}
|
jpayne@68
|
630 }
|
jpayne@68
|
631 }
|
jpayne@68
|
632 }
|
jpayne@68
|
633
|
jpayne@68
|
634 return created;
|
jpayne@68
|
635 }
|
jpayne@68
|
636
|
jpayne@68
|
637 private final int addKmersToTable(final Read r){
|
jpayne@68
|
638 if(amino){return addKmersToTableAA(r);}
|
jpayne@68
|
639 if(onePass){return addKmersToTable_onePass(r);}
|
jpayne@68
|
640 if(r==null || r.bases==null){return 0;}
|
jpayne@68
|
641 final float minProb2=(minProbMain ? minProb : 0);
|
jpayne@68
|
642 final byte[] bases=r.bases;
|
jpayne@68
|
643 final byte[] quals=r.quality;
|
jpayne@68
|
644 final int shift=2*k;
|
jpayne@68
|
645 final int shift2=shift-2;
|
jpayne@68
|
646 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
647 long kmer=0;
|
jpayne@68
|
648 long rkmer=0;
|
jpayne@68
|
649 int created=0;
|
jpayne@68
|
650 int len=0;
|
jpayne@68
|
651
|
jpayne@68
|
652 if(bases==null || bases.length<k){return -1;}
|
jpayne@68
|
653
|
jpayne@68
|
654 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
655 float prob=1;
|
jpayne@68
|
656 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
657 final byte b=bases[i];
|
jpayne@68
|
658 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
659 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
660
|
jpayne@68
|
661 //Update kmers
|
jpayne@68
|
662 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
663 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
664
|
jpayne@68
|
665 if(minProb2>0 && quals!=null){//Update probability
|
jpayne@68
|
666 prob=prob*PROB_CORRECT[quals[i]];
|
jpayne@68
|
667 if(len>k){
|
jpayne@68
|
668 byte oldq=quals[i-k];
|
jpayne@68
|
669 prob=prob*PROB_CORRECT_INVERSE[oldq];
|
jpayne@68
|
670 }
|
jpayne@68
|
671 }
|
jpayne@68
|
672
|
jpayne@68
|
673 //Handle Ns
|
jpayne@68
|
674 if(x<0){
|
jpayne@68
|
675 len=0;
|
jpayne@68
|
676 kmer=rkmer=0;
|
jpayne@68
|
677 prob=1;
|
jpayne@68
|
678 }else{len++;}
|
jpayne@68
|
679
|
jpayne@68
|
680 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
681 if(len>=k && prob>=minProb2){
|
jpayne@68
|
682 kmersInT++;
|
jpayne@68
|
683 final long key=toValue(kmer, rkmer);
|
jpayne@68
|
684 if(!prefilter || prefilterArray.read(key)>filterMax2){
|
jpayne@68
|
685 int temp=table.incrementAndReturnNumCreated(key, 1);
|
jpayne@68
|
686 created+=temp;
|
jpayne@68
|
687 if(verbose){System.err.println("C: Added "+temp);}
|
jpayne@68
|
688 }
|
jpayne@68
|
689 }
|
jpayne@68
|
690 }
|
jpayne@68
|
691
|
jpayne@68
|
692 return created;
|
jpayne@68
|
693 }
|
jpayne@68
|
694
|
jpayne@68
|
695
|
jpayne@68
|
696 private final int addKmersToTable_onePass(final Read r){
|
jpayne@68
|
697 assert(prefilter);
|
jpayne@68
|
698 if(r==null || r.bases==null){return 0;}
|
jpayne@68
|
699 final byte[] bases=r.bases;
|
jpayne@68
|
700 final byte[] quals=r.quality;
|
jpayne@68
|
701 final int shift=2*k;
|
jpayne@68
|
702 final int shift2=shift-2;
|
jpayne@68
|
703 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
704 long kmer=0;
|
jpayne@68
|
705 long rkmer=0;
|
jpayne@68
|
706 int created=0;
|
jpayne@68
|
707 int len=0;
|
jpayne@68
|
708
|
jpayne@68
|
709 if(bases==null || bases.length<k){return -1;}
|
jpayne@68
|
710
|
jpayne@68
|
711 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
712 float prob=1;
|
jpayne@68
|
713 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
714 final byte b=bases[i];
|
jpayne@68
|
715 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
716 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
717
|
jpayne@68
|
718 //Update kmers
|
jpayne@68
|
719 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
720 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
721
|
jpayne@68
|
722 if(minProb>0 && quals!=null){//Update probability
|
jpayne@68
|
723 prob=prob*PROB_CORRECT[quals[i]];
|
jpayne@68
|
724 if(len>k){
|
jpayne@68
|
725 byte oldq=quals[i-k];
|
jpayne@68
|
726 prob=prob*PROB_CORRECT_INVERSE[oldq];
|
jpayne@68
|
727 }
|
jpayne@68
|
728 }
|
jpayne@68
|
729
|
jpayne@68
|
730 //Handle Ns
|
jpayne@68
|
731 if(x<0){
|
jpayne@68
|
732 len=0;
|
jpayne@68
|
733 kmer=rkmer=0;
|
jpayne@68
|
734 prob=1;
|
jpayne@68
|
735 }else{len++;}
|
jpayne@68
|
736
|
jpayne@68
|
737 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
738 if(len>=k){
|
jpayne@68
|
739 final long key=toValue(kmer, rkmer);
|
jpayne@68
|
740 int count=prefilterArray.incrementAndReturnUnincremented(key, 1);
|
jpayne@68
|
741 if(count>=filterMax2){
|
jpayne@68
|
742 int temp=table.incrementAndReturnNumCreated(key, 1);
|
jpayne@68
|
743 created+=temp;
|
jpayne@68
|
744 if(verbose){System.err.println("D: Added "+temp);}
|
jpayne@68
|
745 }
|
jpayne@68
|
746 }
|
jpayne@68
|
747 }
|
jpayne@68
|
748 return created;
|
jpayne@68
|
749 }
|
jpayne@68
|
750
|
jpayne@68
|
751 /*--------------------------------------------------------------*/
|
jpayne@68
|
752
|
jpayne@68
|
753 /** Input read stream */
|
jpayne@68
|
754 private final ConcurrentReadInputStream cris;
|
jpayne@68
|
755
|
jpayne@68
|
756 private final HashBuffer table;
|
jpayne@68
|
757
|
jpayne@68
|
758 public long added=0;
|
jpayne@68
|
759
|
jpayne@68
|
760 private long readsInT=0;
|
jpayne@68
|
761 private long basesInT=0;
|
jpayne@68
|
762 private long lowqReadsT=0;
|
jpayne@68
|
763 private long lowqBasesT=0;
|
jpayne@68
|
764 private long readsTrimmedT=0;
|
jpayne@68
|
765 private long basesTrimmedT=0;
|
jpayne@68
|
766 private long kmersInT=0;
|
jpayne@68
|
767
|
jpayne@68
|
768 }
|
jpayne@68
|
769
|
jpayne@68
|
770
|
jpayne@68
|
771 /*--------------------------------------------------------------*/
|
jpayne@68
|
772 /*---------------- Convenience ----------------*/
|
jpayne@68
|
773 /*--------------------------------------------------------------*/
|
jpayne@68
|
774
|
jpayne@68
|
775 public void regenerateKmers(byte[] bases, LongList kmers, IntList counts, final int a){
|
jpayne@68
|
776 final int loc=a+k;
|
jpayne@68
|
777 final int lim=Tools.min(counts.size, a+k+1);
|
jpayne@68
|
778 final int shift=2*k;
|
jpayne@68
|
779 final int shift2=shift-2;
|
jpayne@68
|
780 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
781 long kmer=kmers.get(a);
|
jpayne@68
|
782 long rkmer=rcomp(kmer);
|
jpayne@68
|
783 int len=k;
|
jpayne@68
|
784
|
jpayne@68
|
785 // assert(false) : a+", "+loc+", "+lim;
|
jpayne@68
|
786
|
jpayne@68
|
787 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
788 for(int i=loc, j=a+1; j<lim; i++, j++){
|
jpayne@68
|
789 final byte b=bases[i];
|
jpayne@68
|
790 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
791 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
792 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
793 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
794 if(x<0){
|
jpayne@68
|
795 len=0;
|
jpayne@68
|
796 kmer=rkmer=0;
|
jpayne@68
|
797 }else{len++;}
|
jpayne@68
|
798
|
jpayne@68
|
799 if(len>=k){
|
jpayne@68
|
800 assert(kmers.get(j)!=kmer);
|
jpayne@68
|
801 kmers.set(j, kmer);
|
jpayne@68
|
802 int count=getCount(kmer, rkmer);
|
jpayne@68
|
803 counts.set(j, count);
|
jpayne@68
|
804 }else{
|
jpayne@68
|
805 kmers.set(j, -1);
|
jpayne@68
|
806 counts.set(j, 0);
|
jpayne@68
|
807 }
|
jpayne@68
|
808 }
|
jpayne@68
|
809 }
|
jpayne@68
|
810
|
jpayne@68
|
811 @Override
|
jpayne@68
|
812 public int regenerateCounts(byte[] bases, IntList counts, final Kmer dummy, BitSet changed){
|
jpayne@68
|
813 assert(!changed.isEmpty());
|
jpayne@68
|
814 final int firstBase=changed.nextSetBit(0), lastBase=changed.length()-1;
|
jpayne@68
|
815 final int ca=firstBase-k;
|
jpayne@68
|
816 // final int b=changed.nextSetBit(0);ca+kbig; //first base changed
|
jpayne@68
|
817 final int firstCount=Tools.max(firstBase-k+1, 0), lastCount=Tools.min(counts.size-1, lastBase); //count limit
|
jpayne@68
|
818 // System.err.println("ca="+ca+", b="+b+", lim="+lim);
|
jpayne@68
|
819 // System.err.println("Regen from count "+(ca+1)+"-"+lim);
|
jpayne@68
|
820
|
jpayne@68
|
821 final int shift=2*k;
|
jpayne@68
|
822 final int shift2=shift-2;
|
jpayne@68
|
823 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
824 long kmer=0, rkmer=0;
|
jpayne@68
|
825 int len=0;
|
jpayne@68
|
826 int valid=0;
|
jpayne@68
|
827
|
jpayne@68
|
828 // System.err.println("ca="+ca+", b="+b+", lim="+lim+", "+counts);
|
jpayne@68
|
829
|
jpayne@68
|
830 for(int i=Tools.max(0, firstBase-k+1), lim=Tools.min(lastBase+k-1, bases.length-1); i<=lim; i++){
|
jpayne@68
|
831 final byte base=bases[i];
|
jpayne@68
|
832 final long x=AminoAcid.baseToNumber[base];
|
jpayne@68
|
833 final long x2=AminoAcid.baseToComplementNumber[base];
|
jpayne@68
|
834 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
835 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
836
|
jpayne@68
|
837 if(x<0){
|
jpayne@68
|
838 len=0;
|
jpayne@68
|
839 kmer=rkmer=0;
|
jpayne@68
|
840 }else{
|
jpayne@68
|
841 len++;
|
jpayne@68
|
842 }
|
jpayne@68
|
843
|
jpayne@68
|
844 final int c=i-k+1;
|
jpayne@68
|
845 if(i>=firstBase){
|
jpayne@68
|
846 if(len>=k){
|
jpayne@68
|
847 valid++;
|
jpayne@68
|
848 int count=getCount(kmer, rkmer);
|
jpayne@68
|
849 counts.set(c, count);
|
jpayne@68
|
850 }else if(c>=0){
|
jpayne@68
|
851 counts.set(c, 0);
|
jpayne@68
|
852 }
|
jpayne@68
|
853 }
|
jpayne@68
|
854 }
|
jpayne@68
|
855
|
jpayne@68
|
856 return valid;
|
jpayne@68
|
857 }
|
jpayne@68
|
858
|
jpayne@68
|
859 @Override
|
jpayne@68
|
860 public int fillSpecificCounts(byte[] bases, IntList counts, BitSet positions, final Kmer dummy){
|
jpayne@68
|
861 final int b=k-1;
|
jpayne@68
|
862 final int lim=(positions==null ? bases.length : Tools.min(bases.length, positions.length()+k-1));
|
jpayne@68
|
863 final int shift=2*k;
|
jpayne@68
|
864 final int shift2=shift-2;
|
jpayne@68
|
865 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
866 long kmer=0, rkmer=0;
|
jpayne@68
|
867 int len=0;
|
jpayne@68
|
868 int valid=0;
|
jpayne@68
|
869
|
jpayne@68
|
870 counts.clear();
|
jpayne@68
|
871
|
jpayne@68
|
872 for(int i=0, j=-b; i<lim; i++, j++){
|
jpayne@68
|
873 final byte base=bases[i];
|
jpayne@68
|
874 final long x=AminoAcid.baseToNumber[base];
|
jpayne@68
|
875 final long x2=AminoAcid.baseToComplementNumber[base];
|
jpayne@68
|
876 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
877 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
878
|
jpayne@68
|
879 if(x<0){
|
jpayne@68
|
880 len=0;
|
jpayne@68
|
881 kmer=rkmer=0;
|
jpayne@68
|
882 }else{
|
jpayne@68
|
883 len++;
|
jpayne@68
|
884 }
|
jpayne@68
|
885
|
jpayne@68
|
886 if(j>=0){
|
jpayne@68
|
887 if(len>=k && (positions==null || positions.get(j))){
|
jpayne@68
|
888 valid++;
|
jpayne@68
|
889 int count=getCount(kmer, rkmer);
|
jpayne@68
|
890 assert(i-k+1==counts.size()) : "j="+j+", counts="+counts.size()+", b="+(b)+", (i-k+1)="+(i-k+1);
|
jpayne@68
|
891 assert(j==counts.size());
|
jpayne@68
|
892 counts.add(Tools.max(count, 0));
|
jpayne@68
|
893 // counts.set(i-k+1, count);
|
jpayne@68
|
894 }else{
|
jpayne@68
|
895 counts.add(0);
|
jpayne@68
|
896 // counts.set(i-k+1, 0);
|
jpayne@68
|
897 }
|
jpayne@68
|
898 }
|
jpayne@68
|
899 }
|
jpayne@68
|
900
|
jpayne@68
|
901 // assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size;
|
jpayne@68
|
902 assert((counts.size==0 && bases.length<k) || counts.size==lim-k+1) : bases.length+", "+k+", "+counts.size;
|
jpayne@68
|
903
|
jpayne@68
|
904 return valid;
|
jpayne@68
|
905 }
|
jpayne@68
|
906
|
jpayne@68
|
907 public int regenerateCounts(byte[] bases, IntList counts, final int ca){
|
jpayne@68
|
908 final int b=ca+k-1;
|
jpayne@68
|
909 final int lim=Tools.min(bases.length, b+k+1);
|
jpayne@68
|
910 final int shift=2*k;
|
jpayne@68
|
911 final int shift2=shift-2;
|
jpayne@68
|
912 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
913 long kmer=0, rkmer=0;
|
jpayne@68
|
914 int len=0;
|
jpayne@68
|
915 int valid=0;
|
jpayne@68
|
916
|
jpayne@68
|
917 final int clen=counts.size;
|
jpayne@68
|
918
|
jpayne@68
|
919 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts.
|
jpayne@68
|
920 * i is an index in the base array, j is an index in the count array. */
|
jpayne@68
|
921 for(int i=b-k+1; i<lim; i++){
|
jpayne@68
|
922 final byte base=bases[i];
|
jpayne@68
|
923 final long x=AminoAcid.baseToNumber[base];
|
jpayne@68
|
924 final long x2=AminoAcid.baseToComplementNumber[base];
|
jpayne@68
|
925 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
926 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
927
|
jpayne@68
|
928 if(x<0){
|
jpayne@68
|
929 len=0;
|
jpayne@68
|
930 kmer=rkmer=0;
|
jpayne@68
|
931 }else{
|
jpayne@68
|
932 len++;
|
jpayne@68
|
933 }
|
jpayne@68
|
934
|
jpayne@68
|
935 if(i>=b){
|
jpayne@68
|
936 if(len>=k){
|
jpayne@68
|
937 valid++;
|
jpayne@68
|
938 int count=getCount(kmer, rkmer);
|
jpayne@68
|
939 counts.set(i-k+1, count);
|
jpayne@68
|
940 }else{
|
jpayne@68
|
941 counts.set(i-k+1, 0);
|
jpayne@68
|
942 }
|
jpayne@68
|
943 }
|
jpayne@68
|
944 }
|
jpayne@68
|
945
|
jpayne@68
|
946 assert((counts.size==0 && bases.length<k) || counts.size==bases.length-k+1) : bases.length+", "+k+", "+counts.size;
|
jpayne@68
|
947 assert(clen==counts.size);
|
jpayne@68
|
948
|
jpayne@68
|
949 return valid;
|
jpayne@68
|
950 }
|
jpayne@68
|
951
|
jpayne@68
|
952 /** Returns number of valid kmers */
|
jpayne@68
|
953 public int fillKmers(byte[] bases, LongList kmers){
|
jpayne@68
|
954 final int blen=bases.length;
|
jpayne@68
|
955 if(blen<k){return 0;}
|
jpayne@68
|
956 final int min=k-1;
|
jpayne@68
|
957 final int shift=2*k;
|
jpayne@68
|
958 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
959 long kmer=0;
|
jpayne@68
|
960 int len=0;
|
jpayne@68
|
961 int valid=0;
|
jpayne@68
|
962
|
jpayne@68
|
963 kmers.clear();
|
jpayne@68
|
964
|
jpayne@68
|
965 /* Loop through the bases, maintaining a forward kmer via bitshifts */
|
jpayne@68
|
966 for(int i=0; i<blen; i++){
|
jpayne@68
|
967 final byte b=bases[i];
|
jpayne@68
|
968 assert(b>=0) : Arrays.toString(bases);
|
jpayne@68
|
969 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
970 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
971 if(x<0){
|
jpayne@68
|
972 len=0;
|
jpayne@68
|
973 kmer=0;
|
jpayne@68
|
974 }else{len++;}
|
jpayne@68
|
975 if(i>=min){
|
jpayne@68
|
976 if(len>=k){
|
jpayne@68
|
977 kmers.add(kmer);
|
jpayne@68
|
978 valid++;
|
jpayne@68
|
979 }else{
|
jpayne@68
|
980 kmers.add(-1);
|
jpayne@68
|
981 }
|
jpayne@68
|
982 }
|
jpayne@68
|
983 }
|
jpayne@68
|
984 return valid;
|
jpayne@68
|
985 }
|
jpayne@68
|
986
|
jpayne@68
|
987 public void fillCounts(LongList kmers, IntList counts){
|
jpayne@68
|
988 counts.clear();
|
jpayne@68
|
989 for(int i=0; i<kmers.size; i++){
|
jpayne@68
|
990 long kmer=kmers.get(i);
|
jpayne@68
|
991 if(kmer>=0){
|
jpayne@68
|
992 long rkmer=rcomp(kmer);
|
jpayne@68
|
993 int count=getCount(kmer, rkmer);
|
jpayne@68
|
994 counts.add(count);
|
jpayne@68
|
995 }else{
|
jpayne@68
|
996 counts.add(0);
|
jpayne@68
|
997 }
|
jpayne@68
|
998 }
|
jpayne@68
|
999 }
|
jpayne@68
|
1000
|
jpayne@68
|
1001
|
jpayne@68
|
1002 /*--------------------------------------------------------------*/
|
jpayne@68
|
1003 /*---------------- Helper Methods ----------------*/
|
jpayne@68
|
1004 /*--------------------------------------------------------------*/
|
jpayne@68
|
1005
|
jpayne@68
|
1006 @Override
|
jpayne@68
|
1007 public long regenerate(final int limit){
|
jpayne@68
|
1008 long sum=0;
|
jpayne@68
|
1009 for(AbstractKmerTable akt : tables){
|
jpayne@68
|
1010 sum+=akt.regenerate(limit);
|
jpayne@68
|
1011 }
|
jpayne@68
|
1012 return sum;
|
jpayne@68
|
1013 }
|
jpayne@68
|
1014
|
jpayne@68
|
1015 public HashArray1D getTableForKey(long key){
|
jpayne@68
|
1016 return (HashArray1D) tables[kmerToWay(key)];
|
jpayne@68
|
1017 }
|
jpayne@68
|
1018
|
jpayne@68
|
1019 @Override
|
jpayne@68
|
1020 public HashArray1D getTable(int tnum){
|
jpayne@68
|
1021 return (HashArray1D) tables[tnum];
|
jpayne@68
|
1022 }
|
jpayne@68
|
1023
|
jpayne@68
|
1024 @Override
|
jpayne@68
|
1025 public long[] fillHistogram(int histMax) {
|
jpayne@68
|
1026 return HistogramMaker.fillHistogram(tables, histMax);
|
jpayne@68
|
1027 }
|
jpayne@68
|
1028
|
jpayne@68
|
1029 @Override
|
jpayne@68
|
1030 public void countGC(long[] gcCounts, int max) {
|
jpayne@68
|
1031 for(AbstractKmerTable set : tables){
|
jpayne@68
|
1032 set.countGC(gcCounts, max);
|
jpayne@68
|
1033 }
|
jpayne@68
|
1034 }
|
jpayne@68
|
1035
|
jpayne@68
|
1036 @Override
|
jpayne@68
|
1037 public void initializeOwnership(){
|
jpayne@68
|
1038 OwnershipThread.initialize(tables);
|
jpayne@68
|
1039 }
|
jpayne@68
|
1040
|
jpayne@68
|
1041 @Override
|
jpayne@68
|
1042 public void clearOwnership(){
|
jpayne@68
|
1043 OwnershipThread.clear(tables);
|
jpayne@68
|
1044 }
|
jpayne@68
|
1045
|
jpayne@68
|
1046 public long rightmostKmer(final ByteBuilder bb){
|
jpayne@68
|
1047 return rightmostKmer(bb.array, bb.length());
|
jpayne@68
|
1048 }
|
jpayne@68
|
1049
|
jpayne@68
|
1050 public long rightmostKmer(final byte[] bases, final int blen){
|
jpayne@68
|
1051 if(blen<k){return -1;}
|
jpayne@68
|
1052 final int shift=2*k;
|
jpayne@68
|
1053 final int shift2=shift-2;
|
jpayne@68
|
1054 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1055 long kmer=0;
|
jpayne@68
|
1056 long rkmer=0;
|
jpayne@68
|
1057 int len=0;
|
jpayne@68
|
1058
|
jpayne@68
|
1059 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the rightmost kmer */
|
jpayne@68
|
1060 {
|
jpayne@68
|
1061 for(int i=blen-k; i<blen; i++){
|
jpayne@68
|
1062 final byte b=bases[i];
|
jpayne@68
|
1063 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1064 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1065 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1066 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1067 if(x<0){
|
jpayne@68
|
1068 len=0;
|
jpayne@68
|
1069 kmer=rkmer=0;
|
jpayne@68
|
1070 }else{len++;}
|
jpayne@68
|
1071 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
1072 }
|
jpayne@68
|
1073 }
|
jpayne@68
|
1074
|
jpayne@68
|
1075 if(len<k){return -1;}
|
jpayne@68
|
1076 else{assert(len==k);}
|
jpayne@68
|
1077 return kmer;
|
jpayne@68
|
1078 }
|
jpayne@68
|
1079
|
jpayne@68
|
1080 public long leftmostKmer(final ByteBuilder bb){
|
jpayne@68
|
1081 return leftmostKmer(bb.array, bb.length());
|
jpayne@68
|
1082 }
|
jpayne@68
|
1083
|
jpayne@68
|
1084 public long leftmostKmer(final byte[] bases, final int blen){
|
jpayne@68
|
1085 if(blen<k){return -1;}
|
jpayne@68
|
1086 final int shift=2*k;
|
jpayne@68
|
1087 final int shift2=shift-2;
|
jpayne@68
|
1088 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1089 long kmer=0;
|
jpayne@68
|
1090 long rkmer=0;
|
jpayne@68
|
1091 int len=0;
|
jpayne@68
|
1092
|
jpayne@68
|
1093 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts, to get the leftmost kmer */
|
jpayne@68
|
1094 {
|
jpayne@68
|
1095 for(int i=0; i<k; i++){
|
jpayne@68
|
1096 final byte b=bases[i];
|
jpayne@68
|
1097 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1098 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1099 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1100 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1101 if(x<0){
|
jpayne@68
|
1102 len=0;
|
jpayne@68
|
1103 kmer=rkmer=0;
|
jpayne@68
|
1104 }else{len++;}
|
jpayne@68
|
1105 if(verbose){outstream.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
1106 }
|
jpayne@68
|
1107 }
|
jpayne@68
|
1108
|
jpayne@68
|
1109 if(len<k){return -1;}
|
jpayne@68
|
1110 else{assert(len==k);}
|
jpayne@68
|
1111 return kmer;
|
jpayne@68
|
1112 }
|
jpayne@68
|
1113
|
jpayne@68
|
1114 public boolean doubleClaim(final ByteBuilder bb, final int id){
|
jpayne@68
|
1115 return doubleClaim(bb.array, bb.length(), id);
|
jpayne@68
|
1116 }
|
jpayne@68
|
1117
|
jpayne@68
|
1118 /** Ensures there can be only one owner. */
|
jpayne@68
|
1119 public boolean doubleClaim(final byte[] bases, final int blength, final int id){
|
jpayne@68
|
1120 boolean success=claim(bases, blength, id, true);
|
jpayne@68
|
1121 if(verbose){outstream.println("success1="+success+", id="+id+", blength="+blength);}
|
jpayne@68
|
1122 if(!success){return false;}
|
jpayne@68
|
1123 success=claim(bases, blength, id+CLAIM_OFFSET, true);
|
jpayne@68
|
1124 if(verbose){outstream.println("success2="+success+", id="+id+", blength="+blength);}
|
jpayne@68
|
1125 return success;
|
jpayne@68
|
1126 }
|
jpayne@68
|
1127
|
jpayne@68
|
1128 public boolean claim(final ByteBuilder bb, final int id, final boolean exitEarly){
|
jpayne@68
|
1129 return claim(bb.array, bb.length(), id, exitEarly);
|
jpayne@68
|
1130 }
|
jpayne@68
|
1131
|
jpayne@68
|
1132 public float calcCoverage(final byte[] bases, final int blength){
|
jpayne@68
|
1133 final int shift=2*k;
|
jpayne@68
|
1134 final int shift2=shift-2;
|
jpayne@68
|
1135 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1136 long kmer=0, rkmer=0;
|
jpayne@68
|
1137 int len=0;
|
jpayne@68
|
1138 long sum=0, max=0;
|
jpayne@68
|
1139 int kmers=0;
|
jpayne@68
|
1140
|
jpayne@68
|
1141 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
1142 for(int i=0; i<blength; i++){
|
jpayne@68
|
1143 final byte b=bases[i];
|
jpayne@68
|
1144 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1145 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1146 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1147 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1148 if(x<0){
|
jpayne@68
|
1149 len=0;
|
jpayne@68
|
1150 kmer=rkmer=0;
|
jpayne@68
|
1151 }else{len++;}
|
jpayne@68
|
1152 if(len>=k){
|
jpayne@68
|
1153 int count=getCount(kmer, rkmer);
|
jpayne@68
|
1154 sum+=count;
|
jpayne@68
|
1155 max=Tools.max(count, max);
|
jpayne@68
|
1156 kmers++;
|
jpayne@68
|
1157 }
|
jpayne@68
|
1158 }
|
jpayne@68
|
1159 return sum==0 ? 0 : sum/(float)kmers;
|
jpayne@68
|
1160 }
|
jpayne@68
|
1161
|
jpayne@68
|
1162 public float calcCoverage(final Contig contig){
|
jpayne@68
|
1163 final byte[] bases=contig.bases;
|
jpayne@68
|
1164 if(bases.length<k){return 0;}
|
jpayne@68
|
1165 final int shift=2*k;
|
jpayne@68
|
1166 final int shift2=shift-2;
|
jpayne@68
|
1167 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1168 long kmer=0, rkmer=0;
|
jpayne@68
|
1169 int len=0;
|
jpayne@68
|
1170 long sum=0;
|
jpayne@68
|
1171 int max=0, min=Integer.MAX_VALUE;
|
jpayne@68
|
1172 int kmers=0;
|
jpayne@68
|
1173
|
jpayne@68
|
1174 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
1175 for(int i=0; i<bases.length; i++){
|
jpayne@68
|
1176 final byte b=bases[i];
|
jpayne@68
|
1177 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1178 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1179 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1180 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1181 if(x<0){
|
jpayne@68
|
1182 len=0;
|
jpayne@68
|
1183 kmer=rkmer=0;
|
jpayne@68
|
1184 }else{len++;}
|
jpayne@68
|
1185 if(len>=k){
|
jpayne@68
|
1186 int count=getCount(kmer, rkmer);
|
jpayne@68
|
1187 sum+=count;
|
jpayne@68
|
1188 max=Tools.max(count, max);
|
jpayne@68
|
1189 min=Tools.min(count, min);
|
jpayne@68
|
1190 kmers++;
|
jpayne@68
|
1191 }
|
jpayne@68
|
1192 }
|
jpayne@68
|
1193 contig.coverage=sum==0 ? 0 : sum/(float)kmers;
|
jpayne@68
|
1194 contig.maxCov=max;
|
jpayne@68
|
1195 contig.minCov=sum==0 ? 0 : min;
|
jpayne@68
|
1196 return contig.coverage;
|
jpayne@68
|
1197 }
|
jpayne@68
|
1198
|
jpayne@68
|
1199 public boolean claim(final byte[] bases, final int blength, final int id, boolean exitEarly){
|
jpayne@68
|
1200 if(verbose){outstream.println("Thread "+id+" claim start.");}
|
jpayne@68
|
1201 final int shift=2*k;
|
jpayne@68
|
1202 final int shift2=shift-2;
|
jpayne@68
|
1203 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1204 long kmer=0, rkmer=0;
|
jpayne@68
|
1205 int len=0;
|
jpayne@68
|
1206 boolean success=true;
|
jpayne@68
|
1207 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
1208 for(int i=0; i<blength && success; i++){
|
jpayne@68
|
1209 final byte b=bases[i];
|
jpayne@68
|
1210 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1211 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1212 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1213 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1214 if(x<0){
|
jpayne@68
|
1215 len=0;
|
jpayne@68
|
1216 kmer=rkmer=0;
|
jpayne@68
|
1217 }else{len++;}
|
jpayne@68
|
1218 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
1219 if(len>=k){
|
jpayne@68
|
1220 success=claim(kmer, rkmer, id/*, rid, i*/);
|
jpayne@68
|
1221 success=(success || !exitEarly);
|
jpayne@68
|
1222 }
|
jpayne@68
|
1223 }
|
jpayne@68
|
1224 return success;
|
jpayne@68
|
1225 }
|
jpayne@68
|
1226
|
jpayne@68
|
1227 public boolean claim(final long kmer, final long rkmer, final int id/*, final long rid, final int pos*/){
|
jpayne@68
|
1228 //TODO: rid and pos are just for debugging.
|
jpayne@68
|
1229 final long key=toValue(kmer, rkmer);
|
jpayne@68
|
1230 final int way=kmerToWay(key);
|
jpayne@68
|
1231 final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1232 final int count=table.getValue(key);
|
jpayne@68
|
1233 assert(count==-1 || count>0) : count;
|
jpayne@68
|
1234 // if(verbose /*|| true*/){outstream.println("Count="+count+".");}
|
jpayne@68
|
1235 if(count<0){return true;}
|
jpayne@68
|
1236 assert(count>0) : count;
|
jpayne@68
|
1237 final int owner=table.setOwner(key, id);
|
jpayne@68
|
1238 if(verbose){outstream.println("owner="+owner+".");}
|
jpayne@68
|
1239 // assert(owner==id) : id+", "+owner+", "+rid+", "+pos;
|
jpayne@68
|
1240 return owner==id;
|
jpayne@68
|
1241 }
|
jpayne@68
|
1242
|
jpayne@68
|
1243 public void release(ByteBuilder bb, final int id){
|
jpayne@68
|
1244 release(bb.array, bb.length(), id);
|
jpayne@68
|
1245 }
|
jpayne@68
|
1246
|
jpayne@68
|
1247 public void release(final byte[] bases, final int blength, final int id){
|
jpayne@68
|
1248 if(verbose /*|| true*/){outstream.println("*Thread "+id+" release start.");}
|
jpayne@68
|
1249 final int shift=2*k;
|
jpayne@68
|
1250 final int shift2=shift-2;
|
jpayne@68
|
1251 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1252 long kmer=0, rkmer=0;
|
jpayne@68
|
1253 int len=0;
|
jpayne@68
|
1254 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
1255 for(int i=0; i<blength; i++){
|
jpayne@68
|
1256 final byte b=bases[i];
|
jpayne@68
|
1257 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1258 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1259 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1260 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1261 if(x<0){
|
jpayne@68
|
1262 len=0;
|
jpayne@68
|
1263 kmer=rkmer=0;
|
jpayne@68
|
1264 }else{len++;}
|
jpayne@68
|
1265 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
1266 if(len>=k){
|
jpayne@68
|
1267 release(kmer, rkmer, id);
|
jpayne@68
|
1268 }
|
jpayne@68
|
1269 }
|
jpayne@68
|
1270 }
|
jpayne@68
|
1271
|
jpayne@68
|
1272 public boolean release(final long kmer, final long rkmer, final int id){
|
jpayne@68
|
1273 return release(toValue(kmer, rkmer), id);
|
jpayne@68
|
1274 }
|
jpayne@68
|
1275
|
jpayne@68
|
1276 public boolean release(final long key, final int id){
|
jpayne@68
|
1277 final int way=kmerToWay(key);
|
jpayne@68
|
1278 final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1279 final int count=table.getValue(key);
|
jpayne@68
|
1280 // if(verbose /*|| true*/){outstream.println("Count="+count+".");}
|
jpayne@68
|
1281 if(count<1){return true;}
|
jpayne@68
|
1282 return table.clearOwner(key, id);
|
jpayne@68
|
1283 }
|
jpayne@68
|
1284
|
jpayne@68
|
1285 public int findOwner(ByteBuilder bb, final int id){
|
jpayne@68
|
1286 return findOwner(bb.array, bb.length(), id);
|
jpayne@68
|
1287 }
|
jpayne@68
|
1288
|
jpayne@68
|
1289 public int findOwner(final byte[] bases, final int blength, final int id){
|
jpayne@68
|
1290 final int shift=2*k;
|
jpayne@68
|
1291 final int shift2=shift-2;
|
jpayne@68
|
1292 final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
|
jpayne@68
|
1293 long kmer=0, rkmer=0;
|
jpayne@68
|
1294 int len=0;
|
jpayne@68
|
1295 int maxOwner=-1;
|
jpayne@68
|
1296 /* Loop through the bases, maintaining a forward and reverse kmer via bitshifts */
|
jpayne@68
|
1297 for(int i=0; i<blength; i++){
|
jpayne@68
|
1298 final byte b=bases[i];
|
jpayne@68
|
1299 final long x=AminoAcid.baseToNumber[b];
|
jpayne@68
|
1300 final long x2=AminoAcid.baseToComplementNumber[b];
|
jpayne@68
|
1301 kmer=((kmer<<2)|x)&mask;
|
jpayne@68
|
1302 rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
|
jpayne@68
|
1303 if(x<0){
|
jpayne@68
|
1304 len=0;
|
jpayne@68
|
1305 kmer=rkmer=0;
|
jpayne@68
|
1306 }else{len++;}
|
jpayne@68
|
1307 if(verbose){System.err.println("Scanning i="+i+", len="+len+", kmer="+kmer+", rkmer="+rkmer+"\t"+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
|
jpayne@68
|
1308 if(len>=k){
|
jpayne@68
|
1309 int owner=findOwner(kmer, rkmer);
|
jpayne@68
|
1310 maxOwner=Tools.max(owner, maxOwner);
|
jpayne@68
|
1311 if(maxOwner>id){break;}
|
jpayne@68
|
1312 }
|
jpayne@68
|
1313 }
|
jpayne@68
|
1314 return maxOwner;
|
jpayne@68
|
1315 }
|
jpayne@68
|
1316
|
jpayne@68
|
1317 public int findOwner(final long kmer){
|
jpayne@68
|
1318 return findOwner(kmer, rcomp(kmer));
|
jpayne@68
|
1319 }
|
jpayne@68
|
1320
|
jpayne@68
|
1321 public int findOwner(final long kmer, final long rkmer){
|
jpayne@68
|
1322 final long key=toValue(kmer, rkmer);
|
jpayne@68
|
1323 final int way=kmerToWay(key);
|
jpayne@68
|
1324 final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1325 final int count=table.getValue(key);
|
jpayne@68
|
1326 if(count<0){return -1;}
|
jpayne@68
|
1327 final int owner=table.getOwner(key);
|
jpayne@68
|
1328 return owner;
|
jpayne@68
|
1329 }
|
jpayne@68
|
1330
|
jpayne@68
|
1331 public int getCount(long kmer, long rkmer){
|
jpayne@68
|
1332 long key=toValue(kmer, rkmer);
|
jpayne@68
|
1333 int way=kmerToWay(key);
|
jpayne@68
|
1334 return tables[way].getValue(key);
|
jpayne@68
|
1335 }
|
jpayne@68
|
1336
|
jpayne@68
|
1337 public int getCount(long key){
|
jpayne@68
|
1338 int way=kmerToWay(key);
|
jpayne@68
|
1339 return tables[way].getValue(key);
|
jpayne@68
|
1340 }
|
jpayne@68
|
1341
|
jpayne@68
|
1342 /*--------------------------------------------------------------*/
|
jpayne@68
|
1343 /*---------------- Fill Counts ----------------*/
|
jpayne@68
|
1344 /*--------------------------------------------------------------*/
|
jpayne@68
|
1345
|
jpayne@68
|
1346 public int fillRightCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){
|
jpayne@68
|
1347 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){
|
jpayne@68
|
1348 return fillRightCounts_fast(kmer, rkmer, counts, mask, shift2);
|
jpayne@68
|
1349 }else{
|
jpayne@68
|
1350 return fillRightCounts_safe(kmer, rkmer, counts, mask, shift2);
|
jpayne@68
|
1351 }
|
jpayne@68
|
1352 }
|
jpayne@68
|
1353
|
jpayne@68
|
1354 public int fillRightCounts_fast(final long kmer0, final long rkmer0, int[] counts,
|
jpayne@68
|
1355 long mask, int shift2){
|
jpayne@68
|
1356 // assert((k&1)==1) : k;
|
jpayne@68
|
1357 assert(MASK_CORE);
|
jpayne@68
|
1358 final long kmer=(kmer0<<2)&mask;
|
jpayne@68
|
1359 final long rkmer=(rkmer0>>>2);
|
jpayne@68
|
1360 final long a=kmer&coreMask, b=rkmer&coreMask;
|
jpayne@68
|
1361
|
jpayne@68
|
1362 int max=-1, maxPos=0;
|
jpayne@68
|
1363 if(a==b){
|
jpayne@68
|
1364 return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
|
jpayne@68
|
1365 }else if(a>b){
|
jpayne@68
|
1366 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
|
jpayne@68
|
1367 final long core=a;
|
jpayne@68
|
1368 final int way=kmerToWay(core);
|
jpayne@68
|
1369 // final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1370 final HashArray table=(HashArray) tables[way];
|
jpayne@68
|
1371 final int cell=table.kmerToCell(kmer);
|
jpayne@68
|
1372 for(int i=0; i<=3; i++){
|
jpayne@68
|
1373 final long key=kmer|i;
|
jpayne@68
|
1374 final int count=Tools.max(0, table.getValue(key, cell));
|
jpayne@68
|
1375 // final int count=Tools.max(0, table.getValue(key));
|
jpayne@68
|
1376 counts[i]=count;
|
jpayne@68
|
1377 if(count>max){
|
jpayne@68
|
1378 max=count;
|
jpayne@68
|
1379 maxPos=i;
|
jpayne@68
|
1380 }
|
jpayne@68
|
1381 }
|
jpayne@68
|
1382 }else{
|
jpayne@68
|
1383 // return fillRightCounts_safe(kmer0, rkmer0, counts, mask, shift2);
|
jpayne@68
|
1384 //Use a higher increment for the left bits
|
jpayne@68
|
1385 final long core=b;
|
jpayne@68
|
1386 final int way=kmerToWay(core);
|
jpayne@68
|
1387 // final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1388 final HashArray table=(HashArray) tables[way];
|
jpayne@68
|
1389 final int cell=table.kmerToCell(rkmer);
|
jpayne@68
|
1390 final long incr=1L<<(2*(k-1));
|
jpayne@68
|
1391 long j=3*incr;
|
jpayne@68
|
1392 for(int i=0; i<=3; i++, j-=incr){
|
jpayne@68
|
1393 final long key=rkmer|j;
|
jpayne@68
|
1394 final int count=Tools.max(0, table.getValue(key, cell));
|
jpayne@68
|
1395 // final int count=Tools.max(0, table.getValue(key));
|
jpayne@68
|
1396 counts[i]=count;
|
jpayne@68
|
1397 if(count>max){
|
jpayne@68
|
1398 max=count;
|
jpayne@68
|
1399 maxPos=i;
|
jpayne@68
|
1400 }
|
jpayne@68
|
1401 }
|
jpayne@68
|
1402 }
|
jpayne@68
|
1403 return maxPos;
|
jpayne@68
|
1404 }
|
jpayne@68
|
1405
|
jpayne@68
|
1406 //TODO: Change this to take advantage of coreMask
|
jpayne@68
|
1407 //Requires special handling of core palindromes.
|
jpayne@68
|
1408 //Thus it would be easiest to just handle odd kmers, and K is normally 31 anyway.
|
jpayne@68
|
1409 public int fillRightCounts_safe(long kmer, long rkmer, int[] counts, long mask, int shift2){
|
jpayne@68
|
1410 assert(kmer==rcomp(rkmer));
|
jpayne@68
|
1411 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));}
|
jpayne@68
|
1412 kmer=(kmer<<2)&mask;
|
jpayne@68
|
1413 rkmer=(rkmer>>>2);
|
jpayne@68
|
1414 int max=-1, maxPos=0;
|
jpayne@68
|
1415
|
jpayne@68
|
1416 for(int i=0; i<=3; i++){
|
jpayne@68
|
1417 long kmer2=kmer|((long)i);
|
jpayne@68
|
1418 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2);
|
jpayne@68
|
1419 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
|
jpayne@68
|
1420 assert(kmer2==(kmer2&mask));
|
jpayne@68
|
1421 assert(rkmer2==(rkmer2&mask));
|
jpayne@68
|
1422 assert(kmer2==rcomp(rkmer2));
|
jpayne@68
|
1423 long key=toValue(kmer2, rkmer2);
|
jpayne@68
|
1424 int way=kmerToWay(key);
|
jpayne@68
|
1425 int count=tables[way].getValue(key);
|
jpayne@68
|
1426 assert(count==NOT_PRESENT || count>=0);
|
jpayne@68
|
1427 count=Tools.max(count, 0);
|
jpayne@68
|
1428 counts[i]=count;
|
jpayne@68
|
1429 if(count>max){
|
jpayne@68
|
1430 max=count;
|
jpayne@68
|
1431 maxPos=i;
|
jpayne@68
|
1432 }
|
jpayne@68
|
1433 }
|
jpayne@68
|
1434 return maxPos;
|
jpayne@68
|
1435 }
|
jpayne@68
|
1436
|
jpayne@68
|
1437 /** For KmerCompressor. */
|
jpayne@68
|
1438 public int fillRightCountsRcompOnly(long kmer, long rkmer, int[] counts, long mask, int shift2){
|
jpayne@68
|
1439 assert(kmer==rcomp(rkmer));
|
jpayne@68
|
1440 if(verbose){outstream.println("fillRightCounts: "+toText(kmer)+", "+toText(rkmer));}
|
jpayne@68
|
1441 kmer=(kmer<<2)&mask;
|
jpayne@68
|
1442 rkmer=(rkmer>>>2);
|
jpayne@68
|
1443 int max=-1, maxPos=0;
|
jpayne@68
|
1444
|
jpayne@68
|
1445 for(int i=0; i<=3; i++){
|
jpayne@68
|
1446 long kmer2=kmer|((long)i);
|
jpayne@68
|
1447 long rkmer2=rkmer|(((long)AminoAcid.numberToComplement[i])<<shift2);
|
jpayne@68
|
1448 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
|
jpayne@68
|
1449 assert(kmer2==(kmer2&mask));
|
jpayne@68
|
1450 assert(rkmer2==(rkmer2&mask));
|
jpayne@68
|
1451 assert(kmer2==rcomp(rkmer2));
|
jpayne@68
|
1452 long key=rkmer2;
|
jpayne@68
|
1453 int way=kmerToWay(key);
|
jpayne@68
|
1454 int count=tables[way].getValue(key);
|
jpayne@68
|
1455 assert(count==NOT_PRESENT || count>=0);
|
jpayne@68
|
1456 count=Tools.max(count, 0);
|
jpayne@68
|
1457 counts[i]=count;
|
jpayne@68
|
1458 if(count>max){
|
jpayne@68
|
1459 max=count;
|
jpayne@68
|
1460 maxPos=i;
|
jpayne@68
|
1461 }
|
jpayne@68
|
1462 }
|
jpayne@68
|
1463 return maxPos;
|
jpayne@68
|
1464 }
|
jpayne@68
|
1465
|
jpayne@68
|
1466 public int fillLeftCounts(long kmer, long rkmer, int[] counts, long mask, int shift2){
|
jpayne@68
|
1467 if(FAST_FILL && MASK_CORE && k>2/*((k&1)==1)*/){
|
jpayne@68
|
1468 return fillLeftCounts_fast(kmer, rkmer, counts, mask, shift2);
|
jpayne@68
|
1469 }else{
|
jpayne@68
|
1470 return fillLeftCounts_safe(kmer, rkmer, counts, mask, shift2);
|
jpayne@68
|
1471 }
|
jpayne@68
|
1472 }
|
jpayne@68
|
1473
|
jpayne@68
|
1474 public int fillLeftCounts_fast(final long kmer0, final long rkmer0, int[] counts,
|
jpayne@68
|
1475 long mask, int shift2){
|
jpayne@68
|
1476 // assert((k&1)==1) : k;
|
jpayne@68
|
1477 assert(MASK_CORE);
|
jpayne@68
|
1478 final long rkmer=(rkmer0<<2)&mask;
|
jpayne@68
|
1479 final long kmer=(kmer0>>>2);
|
jpayne@68
|
1480 final long a=kmer&coreMask, b=rkmer&coreMask;
|
jpayne@68
|
1481
|
jpayne@68
|
1482 int max=-1, maxPos=0;
|
jpayne@68
|
1483 if(a==b){
|
jpayne@68
|
1484 return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
|
jpayne@68
|
1485 }else if(a>b){
|
jpayne@68
|
1486 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
|
jpayne@68
|
1487
|
jpayne@68
|
1488 final long core=a;
|
jpayne@68
|
1489 final int way=kmerToWay(core);
|
jpayne@68
|
1490 // final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1491 final HashArray table=(HashArray) tables[way];
|
jpayne@68
|
1492 final int cell=table.kmerToCell(kmer);
|
jpayne@68
|
1493 final long incr=1L<<(2*(k-1));
|
jpayne@68
|
1494 long j=0;//Must be declared as a long, outside of loop
|
jpayne@68
|
1495 for(int i=0; i<=3; i++, j+=incr){
|
jpayne@68
|
1496 // final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]);
|
jpayne@68
|
1497 // final long kmer2=kmer|(((long)i)<<shift2);
|
jpayne@68
|
1498 // final long key2=toValue(rkmer2, kmer2);
|
jpayne@68
|
1499 // int way2=kmerToWay(key2);
|
jpayne@68
|
1500 // int count2=tables[way2].getValue(key2);
|
jpayne@68
|
1501 // count2=Tools.max(count2, 0);
|
jpayne@68
|
1502
|
jpayne@68
|
1503 final long key=kmer|j;
|
jpayne@68
|
1504 final int count=Tools.max(0, table.getValue(key, cell));
|
jpayne@68
|
1505 // int count=Tools.max(0, table.getValue(key));
|
jpayne@68
|
1506
|
jpayne@68
|
1507 // assert(way==way2) : i+", "+way+", "+way2;
|
jpayne@68
|
1508 // assert(key==key2) : i+", "+Long.toBinaryString(kmer)+", "+
|
jpayne@68
|
1509 // Long.toBinaryString(key)+", "+Long.toBinaryString(key2)+
|
jpayne@68
|
1510 // ", "+Long.toBinaryString(j)+", "+Long.toBinaryString(incr);
|
jpayne@68
|
1511 // assert(count==count2) : i+", "+count+", "+count2;
|
jpayne@68
|
1512
|
jpayne@68
|
1513 counts[i]=count;
|
jpayne@68
|
1514 if(count>max){
|
jpayne@68
|
1515 max=count;
|
jpayne@68
|
1516 maxPos=i;
|
jpayne@68
|
1517 }
|
jpayne@68
|
1518 }
|
jpayne@68
|
1519 return maxPos;
|
jpayne@68
|
1520 }else{
|
jpayne@68
|
1521 // return fillLeftCounts_safe(kmer0, rkmer0, counts, mask, shift2);
|
jpayne@68
|
1522 final long core=b;
|
jpayne@68
|
1523 final int way=kmerToWay(core);
|
jpayne@68
|
1524 // final AbstractKmerTable table=tables[way];
|
jpayne@68
|
1525 final HashArray table=(HashArray) tables[way];
|
jpayne@68
|
1526 final int cell=table.kmerToCell(rkmer);
|
jpayne@68
|
1527 for(int i=0, j=3; i<=3; i++, j--){
|
jpayne@68
|
1528 final long key=rkmer|j;
|
jpayne@68
|
1529 final int count=Tools.max(0, table.getValue(key, cell));
|
jpayne@68
|
1530 // final int count=Tools.max(0, table.getValue(key));
|
jpayne@68
|
1531
|
jpayne@68
|
1532 counts[i]=count;
|
jpayne@68
|
1533 if(count>max){
|
jpayne@68
|
1534 max=count;
|
jpayne@68
|
1535 maxPos=i;
|
jpayne@68
|
1536 }
|
jpayne@68
|
1537 }
|
jpayne@68
|
1538 return maxPos;
|
jpayne@68
|
1539 }
|
jpayne@68
|
1540 }
|
jpayne@68
|
1541
|
jpayne@68
|
1542 public int fillLeftCounts_safe(final long kmer0, final long rkmer0, int[] counts, long mask, int shift2){
|
jpayne@68
|
1543 assert(kmer0==rcomp(rkmer0));
|
jpayne@68
|
1544 if(verbose){outstream.println("fillLeftCounts: "+toText(kmer0)+", "+toText(rkmer0));}
|
jpayne@68
|
1545 long rkmer=(rkmer0<<2)&mask;
|
jpayne@68
|
1546 long kmer=(kmer0>>>2);
|
jpayne@68
|
1547 int max=-1, maxPos=0;
|
jpayne@68
|
1548 // assert(false) : shift2+", "+k;
|
jpayne@68
|
1549 for(int i=0; i<=3; i++){
|
jpayne@68
|
1550 final long rkmer2=rkmer|((long)AminoAcid.numberToComplement[i]);
|
jpayne@68
|
1551 final long kmer2=kmer|(((long)i)<<shift2);
|
jpayne@68
|
1552 if(verbose){outstream.println("kmer: "+toText(kmer2)+", "+toText(rkmer2));}
|
jpayne@68
|
1553 assert(kmer2==(kmer2&mask));
|
jpayne@68
|
1554 assert(rkmer2==(rkmer2&mask));
|
jpayne@68
|
1555 assert(kmer2==rcomp(rkmer2)) : "\n"+"kmer: \t"+toText(rcomp(rkmer2))+", "+toText(rcomp(kmer2));
|
jpayne@68
|
1556 long key=toValue(rkmer2, kmer2);
|
jpayne@68
|
1557 int way=kmerToWay(key);
|
jpayne@68
|
1558 int count=tables[way].getValue(key);
|
jpayne@68
|
1559 assert(count==NOT_PRESENT || count>=0);
|
jpayne@68
|
1560 count=Tools.max(count, 0);
|
jpayne@68
|
1561 counts[i]=count;
|
jpayne@68
|
1562 if(count>max){
|
jpayne@68
|
1563 max=count;
|
jpayne@68
|
1564 maxPos=i;
|
jpayne@68
|
1565 }
|
jpayne@68
|
1566 }
|
jpayne@68
|
1567 return maxPos;
|
jpayne@68
|
1568 }
|
jpayne@68
|
1569
|
jpayne@68
|
1570 /*--------------------------------------------------------------*/
|
jpayne@68
|
1571 /*---------------- Printing Methods ----------------*/
|
jpayne@68
|
1572 /*--------------------------------------------------------------*/
|
jpayne@68
|
1573
|
jpayne@68
|
1574 @Override
|
jpayne@68
|
1575 public boolean dumpKmersAsBytes(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
|
jpayne@68
|
1576 if(fname==null){return false;}
|
jpayne@68
|
1577 Timer t=new Timer();
|
jpayne@68
|
1578
|
jpayne@68
|
1579 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
|
jpayne@68
|
1580 bsw.start();
|
jpayne@68
|
1581 for(AbstractKmerTable set : tables){
|
jpayne@68
|
1582 set.dumpKmersAsBytes(bsw, k, mincount, maxcount, remaining);
|
jpayne@68
|
1583 }
|
jpayne@68
|
1584 bsw.poisonAndWait();
|
jpayne@68
|
1585
|
jpayne@68
|
1586 t.stop();
|
jpayne@68
|
1587 if(printTime){outstream.println("Kmer Dump Time: \t"+t);}
|
jpayne@68
|
1588 return bsw.errorState;
|
jpayne@68
|
1589 }
|
jpayne@68
|
1590
|
jpayne@68
|
1591 @Override
|
jpayne@68
|
1592 public boolean dumpKmersAsBytes_MT(String fname, int mincount, int maxcount, boolean printTime, AtomicLong remaining){
|
jpayne@68
|
1593 final int threads=Tools.min(Shared.threads(), tables.length);
|
jpayne@68
|
1594 if(threads<3 || DumpThread.NUM_THREADS==1){return dumpKmersAsBytes(fname, mincount, maxcount, printTime, remaining);}
|
jpayne@68
|
1595
|
jpayne@68
|
1596 if(fname==null){return false;}
|
jpayne@68
|
1597 Timer t=new Timer();
|
jpayne@68
|
1598
|
jpayne@68
|
1599 ByteStreamWriter bsw=new ByteStreamWriter(fname, overwrite, append, true);
|
jpayne@68
|
1600 bsw.start();
|
jpayne@68
|
1601 DumpThread.dump(k, mincount, maxcount, tables, bsw, remaining);
|
jpayne@68
|
1602 bsw.poisonAndWait();
|
jpayne@68
|
1603
|
jpayne@68
|
1604 t.stop();
|
jpayne@68
|
1605 if(printTime){outstream.println("Kmer Dump Time: \t"+t);}
|
jpayne@68
|
1606 return bsw.errorState;
|
jpayne@68
|
1607 }
|
jpayne@68
|
1608
|
jpayne@68
|
1609 /*--------------------------------------------------------------*/
|
jpayne@68
|
1610 /*---------------- Recall Methods ----------------*/
|
jpayne@68
|
1611 /*--------------------------------------------------------------*/
|
jpayne@68
|
1612
|
jpayne@68
|
1613 private final long rcomp(long kmer){return AminoAcid.reverseComplementBinaryFast(kmer, k);}
|
jpayne@68
|
1614 private final StringBuilder toText(long kmer){return AbstractKmerTable.toText(kmer, k);}
|
jpayne@68
|
1615
|
jpayne@68
|
1616 /*--------------------------------------------------------------*/
|
jpayne@68
|
1617 /*---------------- Static Methods ----------------*/
|
jpayne@68
|
1618 /*--------------------------------------------------------------*/
|
jpayne@68
|
1619
|
jpayne@68
|
1620 /**
|
jpayne@68
|
1621 * Transforms a kmer into a canonical value stored in the table. Expected to be inlined.
|
jpayne@68
|
1622 * @param kmer Forward kmer
|
jpayne@68
|
1623 * @param rkmer Reverse kmer
|
jpayne@68
|
1624 * @return Canonical value
|
jpayne@68
|
1625 */
|
jpayne@68
|
1626 public final long toValue(long kmer, long rkmer){
|
jpayne@68
|
1627 // long value=(rcomp ? Tools.max(kmer, rkmer) : kmer);
|
jpayne@68
|
1628 // return value;
|
jpayne@68
|
1629 if(!rcomp){return kmer;}
|
jpayne@68
|
1630 final long a=kmer&coreMask, b=rkmer&coreMask;
|
jpayne@68
|
1631 return a>b ? kmer : a<b ? rkmer : Tools.max(kmer, rkmer);
|
jpayne@68
|
1632 }
|
jpayne@68
|
1633
|
jpayne@68
|
1634 /*--------------------------------------------------------------*/
|
jpayne@68
|
1635 /*---------------- Final Primitives ----------------*/
|
jpayne@68
|
1636 /*--------------------------------------------------------------*/
|
jpayne@68
|
1637
|
jpayne@68
|
1638 @Override
|
jpayne@68
|
1639 public int kbig(){return k;}
|
jpayne@68
|
1640 @Override
|
jpayne@68
|
1641 public long filterMemory(int pass){return ((pass&1)==0) ? filterMemory0 : filterMemory1;}
|
jpayne@68
|
1642 @Override
|
jpayne@68
|
1643 public boolean ecco(){return ecco;}
|
jpayne@68
|
1644 @Override
|
jpayne@68
|
1645 public boolean qtrimLeft(){return qtrimLeft;}
|
jpayne@68
|
1646 @Override
|
jpayne@68
|
1647 public boolean qtrimRight(){return qtrimRight;}
|
jpayne@68
|
1648 @Override
|
jpayne@68
|
1649 public float minAvgQuality(){return minAvgQuality;}
|
jpayne@68
|
1650 @Override
|
jpayne@68
|
1651 public long tableMemory(){return tableMemory;}
|
jpayne@68
|
1652 @Override
|
jpayne@68
|
1653 public long estimatedKmerCapacity(){return estimatedKmerCapacity;}
|
jpayne@68
|
1654 @Override
|
jpayne@68
|
1655 public int ways(){return ways;}
|
jpayne@68
|
1656 @Override
|
jpayne@68
|
1657 public boolean rcomp(){return rcomp;}
|
jpayne@68
|
1658
|
jpayne@68
|
1659 public final int kmerToWay(final long kmer){
|
jpayne@68
|
1660 final int way=(int)((kmer&coreMask)%ways);
|
jpayne@68
|
1661 return way;
|
jpayne@68
|
1662 }
|
jpayne@68
|
1663
|
jpayne@68
|
1664 /** Hold kmers. A kmer X such that X%WAYS=Y will be stored in tables[Y] */
|
jpayne@68
|
1665 private AbstractKmerTable[] tables;
|
jpayne@68
|
1666
|
jpayne@68
|
1667 public AbstractKmerTable[] tables(){return tables;}
|
jpayne@68
|
1668
|
jpayne@68
|
1669 public long filterMemoryOverride=0;
|
jpayne@68
|
1670
|
jpayne@68
|
1671 public final int tableType; //AbstractKmerTable.ARRAY1D;
|
jpayne@68
|
1672
|
jpayne@68
|
1673 private final int bytesPerKmer;
|
jpayne@68
|
1674
|
jpayne@68
|
1675 private final long usableMemory;
|
jpayne@68
|
1676 private final long filterMemory0;
|
jpayne@68
|
1677 private final long filterMemory1;
|
jpayne@68
|
1678 private final long tableMemory;
|
jpayne@68
|
1679 private final long estimatedKmerCapacity;
|
jpayne@68
|
1680
|
jpayne@68
|
1681 /** Number of tables (and threads, during loading) */
|
jpayne@68
|
1682 private final boolean prealloc;
|
jpayne@68
|
1683
|
jpayne@68
|
1684 /** Number of tables (and threads, during loading) */
|
jpayne@68
|
1685 public final int ways;
|
jpayne@68
|
1686
|
jpayne@68
|
1687 /** Normal kmer length */
|
jpayne@68
|
1688 public final int k;
|
jpayne@68
|
1689 /** k-1; used in some expressions */
|
jpayne@68
|
1690 public final int k2;
|
jpayne@68
|
1691
|
jpayne@68
|
1692 public final long coreMask;
|
jpayne@68
|
1693
|
jpayne@68
|
1694 /** Look for reverse-complements as well as forward kmers. Default: true */
|
jpayne@68
|
1695 private final boolean rcomp;
|
jpayne@68
|
1696
|
jpayne@68
|
1697 /** Quality-trim the left side */
|
jpayne@68
|
1698 public final boolean qtrimLeft;
|
jpayne@68
|
1699 /** Quality-trim the right side */
|
jpayne@68
|
1700 public final boolean qtrimRight;
|
jpayne@68
|
1701 /** Trim bases at this quality or below. Default: 4 */
|
jpayne@68
|
1702 public final float trimq;
|
jpayne@68
|
1703 /** Error rate for trimming (derived from trimq) */
|
jpayne@68
|
1704 private final float trimE;
|
jpayne@68
|
1705
|
jpayne@68
|
1706 /** Throw away reads below this average quality after trimming. Default: 0 */
|
jpayne@68
|
1707 public final float minAvgQuality;
|
jpayne@68
|
1708 /** If positive, calculate average quality from the first X bases only. Default: 0 */
|
jpayne@68
|
1709 public final int minAvgQualityBases;
|
jpayne@68
|
1710
|
jpayne@68
|
1711 /** Ignore kmers with probability of correctness less than this */
|
jpayne@68
|
1712 public final float minProb;
|
jpayne@68
|
1713
|
jpayne@68
|
1714 /** Correct via overlap */
|
jpayne@68
|
1715 private final boolean ecco;
|
jpayne@68
|
1716
|
jpayne@68
|
1717 /** Attempt to merge via overlap prior to counting kmers */
|
jpayne@68
|
1718 private final boolean merge;
|
jpayne@68
|
1719
|
jpayne@68
|
1720 /*--------------------------------------------------------------*/
|
jpayne@68
|
1721 /*---------------- Walker ----------------*/
|
jpayne@68
|
1722 /*--------------------------------------------------------------*/
|
jpayne@68
|
1723
|
jpayne@68
|
1724 public Walker walk(){
|
jpayne@68
|
1725 return new WalkerKST();
|
jpayne@68
|
1726 }
|
jpayne@68
|
1727
|
jpayne@68
|
1728 public class WalkerKST extends Walker {
|
jpayne@68
|
1729
|
jpayne@68
|
1730 WalkerKST(){
|
jpayne@68
|
1731 w=tables[0].walk();
|
jpayne@68
|
1732 }
|
jpayne@68
|
1733
|
jpayne@68
|
1734 /**
|
jpayne@68
|
1735 * Fills this object with the next key and value.
|
jpayne@68
|
1736 * @return True if successful.
|
jpayne@68
|
1737 */
|
jpayne@68
|
1738 public boolean next(){
|
jpayne@68
|
1739 if(w==null){return false;}
|
jpayne@68
|
1740 if(w.next()){return true;}
|
jpayne@68
|
1741 if(tnum<tables.length){tnum++;}
|
jpayne@68
|
1742 w=(tnum<tables.length ? tables[tnum].walk() : null);
|
jpayne@68
|
1743 return w==null ? false : w.next();
|
jpayne@68
|
1744 }
|
jpayne@68
|
1745
|
jpayne@68
|
1746 public long kmer(){return w.kmer();}
|
jpayne@68
|
1747 public int value(){return w.value();}
|
jpayne@68
|
1748
|
jpayne@68
|
1749 private Walker w=null;
|
jpayne@68
|
1750
|
jpayne@68
|
1751 /** current table number */
|
jpayne@68
|
1752 private int tnum=0;
|
jpayne@68
|
1753 }
|
jpayne@68
|
1754
|
jpayne@68
|
1755 }
|