jpayne@68
|
1 package tax;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.io.PrintStream;
|
jpayne@68
|
5 import java.util.ArrayList;
|
jpayne@68
|
6 import java.util.Arrays;
|
jpayne@68
|
7 import java.util.Collections;
|
jpayne@68
|
8 import java.util.HashMap;
|
jpayne@68
|
9 import java.util.concurrent.atomic.AtomicLongArray;
|
jpayne@68
|
10
|
jpayne@68
|
11 import fileIO.ByteFile;
|
jpayne@68
|
12 import fileIO.ByteFile1;
|
jpayne@68
|
13 import fileIO.ByteFile2;
|
jpayne@68
|
14 import fileIO.FileFormat;
|
jpayne@68
|
15 import fileIO.ReadWrite;
|
jpayne@68
|
16 import kmer.HashBuffer;
|
jpayne@68
|
17 import kmer.KmerTableSet;
|
jpayne@68
|
18 import shared.Parse;
|
jpayne@68
|
19 import shared.Parser;
|
jpayne@68
|
20 import shared.PreParser;
|
jpayne@68
|
21 import shared.Shared;
|
jpayne@68
|
22 import shared.Timer;
|
jpayne@68
|
23 import shared.Tools;
|
jpayne@68
|
24 import stream.ConcurrentGenericReadInputStream;
|
jpayne@68
|
25 import stream.FastaReadInputStream;
|
jpayne@68
|
26 import structures.StringNum;
|
jpayne@68
|
27
|
jpayne@68
|
28 /**
|
jpayne@68
|
29 * New version loads with multiple threads per input file.
|
jpayne@68
|
30 * @author Brian Bushnell
|
jpayne@68
|
31 * @date December 16, 2016
|
jpayne@68
|
32 *
|
jpayne@68
|
33 */
|
jpayne@68
|
34 public class AccessionToTaxid {
|
jpayne@68
|
35
|
jpayne@68
|
36 public static void load(String files){
|
jpayne@68
|
37 final boolean oldBf2=ByteFile.FORCE_MODE_BF2;
|
jpayne@68
|
38 final boolean oldBf1=ByteFile.FORCE_MODE_BF1;
|
jpayne@68
|
39 final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
|
jpayne@68
|
40 final boolean oldGunzip=ReadWrite.USE_UNPIGZ;
|
jpayne@68
|
41
|
jpayne@68
|
42 main(new String[] {"in="+files, "unpigz="+ReadWrite.USE_UNPIGZ, "gunzip="+ReadWrite.USE_GUNZIP});
|
jpayne@68
|
43
|
jpayne@68
|
44 ByteFile.FORCE_MODE_BF2=oldBf2;
|
jpayne@68
|
45 ByteFile.FORCE_MODE_BF1=oldBf1;
|
jpayne@68
|
46 ReadWrite.USE_UNPIGZ=oldUnpigz;
|
jpayne@68
|
47 ReadWrite.USE_UNPIGZ=oldGunzip;
|
jpayne@68
|
48 }
|
jpayne@68
|
49
|
jpayne@68
|
50 public static void main(String[] args){
|
jpayne@68
|
51 Timer t=new Timer();
|
jpayne@68
|
52 AccessionToTaxid x=new AccessionToTaxid(args);
|
jpayne@68
|
53 x.process(t);
|
jpayne@68
|
54
|
jpayne@68
|
55 //Close the print stream if it was redirected
|
jpayne@68
|
56 Shared.closeStream(x.outstream);
|
jpayne@68
|
57 }
|
jpayne@68
|
58
|
jpayne@68
|
59 public AccessionToTaxid(String[] args){
|
jpayne@68
|
60
|
jpayne@68
|
61 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
62 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
63 args=pp.args;
|
jpayne@68
|
64 outstream=pp.outstream;
|
jpayne@68
|
65 }
|
jpayne@68
|
66
|
jpayne@68
|
67 ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
68
|
jpayne@68
|
69 Parser parser=new Parser();
|
jpayne@68
|
70 for(int i=0; i<args.length; i++){
|
jpayne@68
|
71 String arg=args[i];
|
jpayne@68
|
72 String[] split=arg.split("=");
|
jpayne@68
|
73 String a=split[0].toLowerCase();
|
jpayne@68
|
74 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
75
|
jpayne@68
|
76 if(a.equals("verbose")){
|
jpayne@68
|
77 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
78 ByteFile1.verbose=verbose;
|
jpayne@68
|
79 ByteFile2.verbose=verbose;
|
jpayne@68
|
80 stream.FastaReadInputStream.verbose=verbose;
|
jpayne@68
|
81 ConcurrentGenericReadInputStream.verbose=verbose;
|
jpayne@68
|
82 stream.FastqReadInputStream.verbose=verbose;
|
jpayne@68
|
83 ReadWrite.verbose=verbose;
|
jpayne@68
|
84 }else if(a.equals("stripunderscore")){
|
jpayne@68
|
85 // STRIP_UNDERSCORE=Parse.parseBoolean(b);
|
jpayne@68
|
86 assert(false) : "stripunderscore is disabled.";
|
jpayne@68
|
87 }else if(a.equals("usetables")){
|
jpayne@68
|
88 // USE_TABLES=Parse.parseBoolean(b);
|
jpayne@68
|
89 }else if(a.equals("usetables")){
|
jpayne@68
|
90 // USE_TABLES=Parse.parseBoolean(b);
|
jpayne@68
|
91 }else if(a.equals("skipparse")){
|
jpayne@68
|
92 skipParse=Parse.parseBoolean(b);
|
jpayne@68
|
93 }else if(a.equals("skiphash")){
|
jpayne@68
|
94 skipHash=Parse.parseBoolean(b);
|
jpayne@68
|
95 }else if(a.equals("prealloc")){
|
jpayne@68
|
96 if(b==null || Character.isLetter(b.charAt(0))){
|
jpayne@68
|
97 if(Parse.parseBoolean(b)){
|
jpayne@68
|
98 prealloc=0.78f;
|
jpayne@68
|
99 }else{
|
jpayne@68
|
100 prealloc=0;
|
jpayne@68
|
101 }
|
jpayne@68
|
102 }else{
|
jpayne@68
|
103 prealloc=Float.parseFloat(b);
|
jpayne@68
|
104 }
|
jpayne@68
|
105 }else if(a.equals("maxpigzprocesses")){
|
jpayne@68
|
106 maxPigzProcesses=Integer.parseInt(b);
|
jpayne@68
|
107 }else if(a.equals("in")){
|
jpayne@68
|
108 assert(b!=null) : "Bad parameter: "+arg;
|
jpayne@68
|
109 String[] temp=b.split(",");
|
jpayne@68
|
110 for(String s : temp){in.add(s);}
|
jpayne@68
|
111 }else if(parser.parse(arg, a, b)){
|
jpayne@68
|
112 //do nothing
|
jpayne@68
|
113 }else if(b==null){
|
jpayne@68
|
114 if(new File(arg).exists()){
|
jpayne@68
|
115 in.add(arg);
|
jpayne@68
|
116 }
|
jpayne@68
|
117 }else{
|
jpayne@68
|
118 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
119 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
120 // throw new RuntimeException("Unknown parameter "+args[i]);
|
jpayne@68
|
121 }
|
jpayne@68
|
122 }
|
jpayne@68
|
123
|
jpayne@68
|
124 {//Process parser fields
|
jpayne@68
|
125 overwrite=parser.overwrite;
|
jpayne@68
|
126
|
jpayne@68
|
127 // out=parser.out1;
|
jpayne@68
|
128 }
|
jpayne@68
|
129
|
jpayne@68
|
130 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
131
|
jpayne@68
|
132 if(in==null || in.size()==0){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
133
|
jpayne@68
|
134 if(ReadWrite.USE_UNPIGZ && !ByteFile.FORCE_MODE_BF2){
|
jpayne@68
|
135 ByteFile.FORCE_MODE_BF2=false;
|
jpayne@68
|
136 ByteFile.FORCE_MODE_BF1=true;
|
jpayne@68
|
137 }
|
jpayne@68
|
138
|
jpayne@68
|
139 // if(out!=null && out.equalsIgnoreCase("null")){out=null;}
|
jpayne@68
|
140
|
jpayne@68
|
141 // if(!Tools.testOutputFiles(overwrite, false, false, out)){
|
jpayne@68
|
142 // outstream.println((out==null)+", "+out);
|
jpayne@68
|
143 // throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
|
jpayne@68
|
144 // }
|
jpayne@68
|
145
|
jpayne@68
|
146 {//Reorder by size, ascending
|
jpayne@68
|
147 ArrayList<StringNum> list=new ArrayList<StringNum>();
|
jpayne@68
|
148 for(String s : in){
|
jpayne@68
|
149 list.add(new StringNum(s, new File(s).length()));
|
jpayne@68
|
150 }
|
jpayne@68
|
151 Collections.sort(list);
|
jpayne@68
|
152 in.clear();
|
jpayne@68
|
153 for(StringNum sn : list){
|
jpayne@68
|
154 in.add(sn.s);
|
jpayne@68
|
155 }
|
jpayne@68
|
156 }
|
jpayne@68
|
157
|
jpayne@68
|
158 // ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, false, false);
|
jpayne@68
|
159 ffin=new FileFormat[in.size()];
|
jpayne@68
|
160
|
jpayne@68
|
161 /* Note */
|
jpayne@68
|
162 /* Java 1.7 works fine here (54 seconds skipping parsing). */
|
jpayne@68
|
163 /* Java 1.8 has immense speed-downs if pigz is used (80-100s normally, >1000s with unpigz). */
|
jpayne@68
|
164 /* Java 1.8_144 is unpredictable and incredibly slow (80-900s normally, 500-1800 with unpigz) */
|
jpayne@68
|
165
|
jpayne@68
|
166 int processes=0;
|
jpayne@68
|
167 for(int i=0; i<in.size(); i++){
|
jpayne@68
|
168 String s=in.get(i);
|
jpayne@68
|
169 if(!new File(s).exists()){
|
jpayne@68
|
170 if(s.startsWith("shrunk.") && new File(s.substring(7)).exists()){
|
jpayne@68
|
171 s=s.substring(7);
|
jpayne@68
|
172 }
|
jpayne@68
|
173 }
|
jpayne@68
|
174 FileFormat ff=FileFormat.testInput(s, FileFormat.TXT, null, true, false);
|
jpayne@68
|
175 if(ff.gzip() && processes>maxPigzProcesses){
|
jpayne@68
|
176 processes++;
|
jpayne@68
|
177 // if(processes>maxPigzProcesses){
|
jpayne@68
|
178 ff=FileFormat.testInput(s, FileFormat.TXT, null, false, false);
|
jpayne@68
|
179 // }
|
jpayne@68
|
180 }
|
jpayne@68
|
181 ffin[i]=ff;
|
jpayne@68
|
182 }
|
jpayne@68
|
183 }
|
jpayne@68
|
184
|
jpayne@68
|
185 @SuppressWarnings("unchecked")
|
jpayne@68
|
186 void process(Timer t){
|
jpayne@68
|
187
|
jpayne@68
|
188 // if(USE_MAPS){
|
jpayne@68
|
189 assert(maps==null);
|
jpayne@68
|
190 maps=new HashMap[128];
|
jpayne@68
|
191 for(int i=0; i<maps.length; i++){
|
jpayne@68
|
192 maps[i]=new HashMap<String, Integer>();
|
jpayne@68
|
193 }
|
jpayne@68
|
194 // }
|
jpayne@68
|
195
|
jpayne@68
|
196 assert(tables==null);
|
jpayne@68
|
197 if(USE_TABLES){
|
jpayne@68
|
198 tables=new KmerTableSet(new String[] {"ways=31",("prealloc="+(prealloc>0 ? prealloc : "f"))}, 12);
|
jpayne@68
|
199 tables.allocateTables();
|
jpayne@68
|
200 }
|
jpayne@68
|
201
|
jpayne@68
|
202 if(ffin.length>4){//Addresses a multithreaded read bug in Java
|
jpayne@68
|
203 // FileFormat[] ffa1=Arrays.copyOf(ffin, 2);
|
jpayne@68
|
204 // FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length);
|
jpayne@68
|
205 // spawnThreads(ffa1);
|
jpayne@68
|
206 // spawnThreads(ffa2);
|
jpayne@68
|
207
|
jpayne@68
|
208 FileFormat[] ffa1=Arrays.copyOf(ffin, 2);
|
jpayne@68
|
209 FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length);
|
jpayne@68
|
210 spawnThreads(ffa1, 2);
|
jpayne@68
|
211 spawnThreads(ffa2, 200);
|
jpayne@68
|
212 }else{
|
jpayne@68
|
213 spawnThreads(ffin, 200);
|
jpayne@68
|
214 }
|
jpayne@68
|
215
|
jpayne@68
|
216 //Do anything necessary after processing
|
jpayne@68
|
217 System.gc();
|
jpayne@68
|
218
|
jpayne@68
|
219 t.stop();
|
jpayne@68
|
220 outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
|
jpayne@68
|
221
|
jpayne@68
|
222 outstream.println();
|
jpayne@68
|
223 outstream.println("Valid Lines: \t"+linesValid);
|
jpayne@68
|
224 outstream.println("Invalid Lines: \t"+(linesProcessed-linesValid));
|
jpayne@68
|
225
|
jpayne@68
|
226 if(lengthCounts!=null){
|
jpayne@68
|
227 outstream.println();
|
jpayne@68
|
228 outstream.println("Length counts:");
|
jpayne@68
|
229
|
jpayne@68
|
230 for(int i=0; i<lengthCounts.length(); i++){
|
jpayne@68
|
231 long count=lengthCounts.get(i);
|
jpayne@68
|
232 if(count>0){outstream.println(i+"\t"+count);}
|
jpayne@68
|
233 }
|
jpayne@68
|
234 }
|
jpayne@68
|
235
|
jpayne@68
|
236 if(symbolCounts!=null){
|
jpayne@68
|
237 outstream.println();
|
jpayne@68
|
238 outstream.println("Symbols:");
|
jpayne@68
|
239
|
jpayne@68
|
240 String comma="";
|
jpayne@68
|
241 for(int i=0; i<symbolCounts.length(); i++){
|
jpayne@68
|
242 long count=symbolCounts.get(i);
|
jpayne@68
|
243 if(count>0){
|
jpayne@68
|
244 outstream.print(comma+i);
|
jpayne@68
|
245 comma=",";
|
jpayne@68
|
246 }
|
jpayne@68
|
247 }
|
jpayne@68
|
248 }
|
jpayne@68
|
249
|
jpayne@68
|
250 if(counts_underscore!=null){
|
jpayne@68
|
251 outstream.println();
|
jpayne@68
|
252 outstream.println("Length_underscore counts:");
|
jpayne@68
|
253
|
jpayne@68
|
254 for(int i=0; i<counts_underscore.length(); i++){
|
jpayne@68
|
255 long count=counts_underscore.get(i);
|
jpayne@68
|
256 if(count>0){outstream.println(i+"\t"+count);}
|
jpayne@68
|
257 }
|
jpayne@68
|
258 }
|
jpayne@68
|
259
|
jpayne@68
|
260 if(counts_underscore2!=null){
|
jpayne@68
|
261 outstream.println();
|
jpayne@68
|
262 outstream.println("Length_underscore2 counts:");
|
jpayne@68
|
263
|
jpayne@68
|
264 for(int i=0; i<counts_underscore2.length(); i++){
|
jpayne@68
|
265 long count=counts_underscore2.get(i);
|
jpayne@68
|
266 if(count>0){outstream.println(i+"\t"+count);}
|
jpayne@68
|
267 }
|
jpayne@68
|
268 }
|
jpayne@68
|
269 outstream.println();
|
jpayne@68
|
270 Shared.printMemory();
|
jpayne@68
|
271
|
jpayne@68
|
272 if(errorState){
|
jpayne@68
|
273 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
274 }
|
jpayne@68
|
275
|
jpayne@68
|
276 LOADED=true;
|
jpayne@68
|
277 }
|
jpayne@68
|
278
|
jpayne@68
|
279 /** Spawn process threads */
|
jpayne@68
|
280 private void spawnThreads(FileFormat[] ffa, int threadLimit){
|
jpayne@68
|
281
|
jpayne@68
|
282 //Do anything necessary prior to processing
|
jpayne@68
|
283 Tools.reverseInPlace(ffa, 0, ffa.length);
|
jpayne@68
|
284
|
jpayne@68
|
285 //Fill a list with ProcessThreads
|
jpayne@68
|
286 ArrayList<ByteFile> albf=new ArrayList<ByteFile>(ffa.length);
|
jpayne@68
|
287 for(FileFormat ff : ffa){
|
jpayne@68
|
288 if(ff!=null){
|
jpayne@68
|
289 System.err.println("Loading "+ff.name());
|
jpayne@68
|
290 ByteFile bf=ByteFile.makeByteFile(ff, 1);
|
jpayne@68
|
291 albf.add(bf);
|
jpayne@68
|
292 }
|
jpayne@68
|
293 }
|
jpayne@68
|
294 final int threads=Tools.min(threadLimit, Tools.max(albf.size(), Shared.threads()));
|
jpayne@68
|
295 ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
|
jpayne@68
|
296
|
jpayne@68
|
297 for(int i=0; i<threads; i++){
|
jpayne@68
|
298 ByteFile bf=albf.get(i%albf.size());
|
jpayne@68
|
299 alht.add(new HashThread(bf));
|
jpayne@68
|
300 }
|
jpayne@68
|
301
|
jpayne@68
|
302 //Start the threads
|
jpayne@68
|
303 for(HashThread pt : alht){
|
jpayne@68
|
304 pt.start();
|
jpayne@68
|
305 }
|
jpayne@68
|
306
|
jpayne@68
|
307 //Wait for completion of all threads
|
jpayne@68
|
308 boolean success=true;
|
jpayne@68
|
309 for(HashThread pt : alht){
|
jpayne@68
|
310
|
jpayne@68
|
311 //Wait until this thread has terminated
|
jpayne@68
|
312 while(pt.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
313 try {
|
jpayne@68
|
314 //Attempt a join operation
|
jpayne@68
|
315 pt.join();
|
jpayne@68
|
316 } catch (InterruptedException e) {
|
jpayne@68
|
317 //Potentially handle this, if it is expected to occur
|
jpayne@68
|
318 e.printStackTrace();
|
jpayne@68
|
319 }
|
jpayne@68
|
320 }
|
jpayne@68
|
321
|
jpayne@68
|
322 linesProcessed+=pt.linesProcessedT;
|
jpayne@68
|
323 linesValid+=pt.linesValidT;
|
jpayne@68
|
324 bytesProcessed+=pt.bytesProcessedT;
|
jpayne@68
|
325
|
jpayne@68
|
326 accumulate(lengthCounts, pt.lengthCountsT);
|
jpayne@68
|
327 accumulate(symbolCounts, pt.symbolCountsT);
|
jpayne@68
|
328 accumulate(counts_underscore, pt.counts_underscoreT);
|
jpayne@68
|
329 accumulate(counts_underscore2, pt.counts_underscore2T);
|
jpayne@68
|
330
|
jpayne@68
|
331 success&=pt.success;
|
jpayne@68
|
332 }
|
jpayne@68
|
333
|
jpayne@68
|
334 //Close the byte files
|
jpayne@68
|
335 for(ByteFile bf : albf){
|
jpayne@68
|
336 errorState=bf.close()|errorState;
|
jpayne@68
|
337 }
|
jpayne@68
|
338
|
jpayne@68
|
339 //Track whether any threads failed
|
jpayne@68
|
340 if(!success){errorState=true;}
|
jpayne@68
|
341 }
|
jpayne@68
|
342
|
jpayne@68
|
343 private static void accumulate(AtomicLongArray a, long[] b){
|
jpayne@68
|
344 if(a==null || b==null){return;}
|
jpayne@68
|
345 for(int i=0; i<b.length; i++){
|
jpayne@68
|
346 a.getAndAdd(i, b[i]);
|
jpayne@68
|
347 }
|
jpayne@68
|
348 }
|
jpayne@68
|
349
|
jpayne@68
|
350 /*--------------------------------------------------------------*/
|
jpayne@68
|
351
|
jpayne@68
|
352 public static int get(String accession){
|
jpayne@68
|
353 if(accession==null){return -1;}
|
jpayne@68
|
354 // if(STRIP_UNDERSCORE){
|
jpayne@68
|
355 // accession=accession.replaceAll("[_-]", "");
|
jpayne@68
|
356 // }
|
jpayne@68
|
357
|
jpayne@68
|
358 int len=accession.length();
|
jpayne@68
|
359 for(int i=0; i<len; i++){
|
jpayne@68
|
360 char c=accession.charAt(i);
|
jpayne@68
|
361 if(c=='.' || c==':' || c==','){
|
jpayne@68
|
362 len=i; break;
|
jpayne@68
|
363 }
|
jpayne@68
|
364 }
|
jpayne@68
|
365
|
jpayne@68
|
366 if(USE_TABLES){
|
jpayne@68
|
367 if(AnalyzeAccession.codeMap!=null){
|
jpayne@68
|
368 // if(dot>AnalyzeAccession.longestPattern){return false;}
|
jpayne@68
|
369 final long number=AnalyzeAccession.digitize(accession);
|
jpayne@68
|
370 if(number>=0){
|
jpayne@68
|
371 int value=tables.getCount(number);
|
jpayne@68
|
372 return value<0 ? -1 : value;
|
jpayne@68
|
373 }
|
jpayne@68
|
374 }else if(len<=12){
|
jpayne@68
|
375 long number=hash(accession);
|
jpayne@68
|
376
|
jpayne@68
|
377 int value=tables.getCount(number);
|
jpayne@68
|
378 return value<1 ? -1 : value;
|
jpayne@68
|
379 }
|
jpayne@68
|
380 }
|
jpayne@68
|
381
|
jpayne@68
|
382 if(len<accession.length()){accession=accession.substring(0, len);}
|
jpayne@68
|
383 if(accession.length()<1){return -1;}
|
jpayne@68
|
384 int way=accession.charAt(0);
|
jpayne@68
|
385 Integer value=maps[way].get(accession);
|
jpayne@68
|
386 return value==null ? -1 : value.intValue();
|
jpayne@68
|
387 }
|
jpayne@68
|
388
|
jpayne@68
|
389 public static boolean isValidAccession(String s){
|
jpayne@68
|
390 if(s==null || s.length()<4){return false;}
|
jpayne@68
|
391 for(int i=0; i<s.length(); i++){
|
jpayne@68
|
392 char c=s.charAt(i);
|
jpayne@68
|
393 if((c>='0' && c<='9') || (c>='A' && c<='Z') /*|| (c>='a' && c<='z')*/
|
jpayne@68
|
394 || c=='.' || c=='_' || c=='-' || c==':' || c==','){
|
jpayne@68
|
395 //do nothing
|
jpayne@68
|
396 }else{
|
jpayne@68
|
397 return false;
|
jpayne@68
|
398 }
|
jpayne@68
|
399 }
|
jpayne@68
|
400 return true;
|
jpayne@68
|
401 }
|
jpayne@68
|
402
|
jpayne@68
|
403 static long hash(String accession){
|
jpayne@68
|
404 long number=0;
|
jpayne@68
|
405 for(int i=0, max=accession.length(); i<max; i++){
|
jpayne@68
|
406 long c=accession.charAt(i);
|
jpayne@68
|
407 if(c=='.' || c==':' || c==','){break;}
|
jpayne@68
|
408 if(c>='0' && c<='9'){c=c-'0';}
|
jpayne@68
|
409 else if(c>='A' && c<='Z'){c=c+offset;}
|
jpayne@68
|
410 else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK
|
jpayne@68
|
411 else if(c>='a' && c<='z'){c=c+offsetLower;}
|
jpayne@68
|
412 else{
|
jpayne@68
|
413 assert(false) : accession;
|
jpayne@68
|
414 }
|
jpayne@68
|
415 number=(number*37)+c;
|
jpayne@68
|
416 }
|
jpayne@68
|
417 return number;
|
jpayne@68
|
418 }
|
jpayne@68
|
419
|
jpayne@68
|
420 static long hash(final byte[] line, final int limit){
|
jpayne@68
|
421 long number=0;
|
jpayne@68
|
422 for(int i=0; i<limit; i++){
|
jpayne@68
|
423 long c=line[i];
|
jpayne@68
|
424 if(c=='.' || c==':' || c==','){break;}
|
jpayne@68
|
425 if(c>='0' && c<='9'){c=c-'0';}
|
jpayne@68
|
426 else if(c>='A' && c<='Z'){c=c+offset;}
|
jpayne@68
|
427 else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK
|
jpayne@68
|
428 else if(c>='a' && c<='z'){c=c+offsetLower;}
|
jpayne@68
|
429 else{
|
jpayne@68
|
430 assert(false) : new String(line);
|
jpayne@68
|
431 }
|
jpayne@68
|
432 number=(number*37)+c;
|
jpayne@68
|
433 }
|
jpayne@68
|
434 return number;
|
jpayne@68
|
435 }
|
jpayne@68
|
436
|
jpayne@68
|
437 public static int parseLineToTaxid(final byte[] line, final byte delimiter){
|
jpayne@68
|
438 int a=0, b=0;
|
jpayne@68
|
439
|
jpayne@68
|
440 final int ncbi;
|
jpayne@68
|
441
|
jpayne@68
|
442 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
443 assert(b>a) : "Missing field 0: "+new String(line);
|
jpayne@68
|
444 b++;
|
jpayne@68
|
445 a=b;
|
jpayne@68
|
446
|
jpayne@68
|
447 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
448 // assert(b>a) : "Missing field 1: "+new String(line);
|
jpayne@68
|
449 assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
|
jpayne@68
|
450 //accession2=new String(line, a, b-a);
|
jpayne@68
|
451 b++;
|
jpayne@68
|
452 a=b;
|
jpayne@68
|
453
|
jpayne@68
|
454 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
455 assert(b>a) : "Missing field 2: "+new String(line);
|
jpayne@68
|
456 ncbi=Parse.parseInt(line, a, b);
|
jpayne@68
|
457 b++;
|
jpayne@68
|
458 a=b;
|
jpayne@68
|
459
|
jpayne@68
|
460 return ncbi;
|
jpayne@68
|
461 }
|
jpayne@68
|
462
|
jpayne@68
|
463 public static int parseLineToTaxid_2col(final byte[] line, final byte delimiter){
|
jpayne@68
|
464 int a=0, b=0;
|
jpayne@68
|
465
|
jpayne@68
|
466 final int ncbi;
|
jpayne@68
|
467
|
jpayne@68
|
468 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
469 assert(b>a) : "Missing field 0: "+new String(line);
|
jpayne@68
|
470 b++;
|
jpayne@68
|
471 a=b;
|
jpayne@68
|
472
|
jpayne@68
|
473 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
474 assert(b>a) : "Missing field 1: "+new String(line);
|
jpayne@68
|
475 ncbi=Parse.parseInt(line, a, b);
|
jpayne@68
|
476 b++;
|
jpayne@68
|
477 a=b;
|
jpayne@68
|
478
|
jpayne@68
|
479 return ncbi;
|
jpayne@68
|
480 }
|
jpayne@68
|
481
|
jpayne@68
|
482 /*--------------------------------------------------------------*/
|
jpayne@68
|
483
|
jpayne@68
|
484 public static class HashThread extends Thread {
|
jpayne@68
|
485
|
jpayne@68
|
486 @SuppressWarnings("unchecked")
|
jpayne@68
|
487 public HashThread(ByteFile bf_){
|
jpayne@68
|
488 // if(USE_MAPS){
|
jpayne@68
|
489 mapsT=new HashMap[128];
|
jpayne@68
|
490 for(int i=0; i<mapsT.length; i++){
|
jpayne@68
|
491 mapsT[i]=new HashMap<String, Integer>();
|
jpayne@68
|
492 }
|
jpayne@68
|
493 // }
|
jpayne@68
|
494 if(USE_TABLES){
|
jpayne@68
|
495 table=new HashBuffer(tables.tables(), 1000, 31, true, true);
|
jpayne@68
|
496 }
|
jpayne@68
|
497 bf=bf_;
|
jpayne@68
|
498 }
|
jpayne@68
|
499
|
jpayne@68
|
500 ArrayList<byte[]> fetch(int limit){
|
jpayne@68
|
501 ArrayList<byte[]> list=new ArrayList<byte[]>(limit);
|
jpayne@68
|
502 synchronized(bf){
|
jpayne@68
|
503 byte[] line=bf.nextLine();
|
jpayne@68
|
504 // while(line!=null && Tools.startsWith(line, "accession")){line=bf.nextLine();}
|
jpayne@68
|
505 if(line==null){return null;}
|
jpayne@68
|
506 for(int i=0; line!=null;){
|
jpayne@68
|
507 list.add(line);
|
jpayne@68
|
508 i++;
|
jpayne@68
|
509 if(i>=limit){break;}
|
jpayne@68
|
510 line=bf.nextLine();
|
jpayne@68
|
511 }
|
jpayne@68
|
512 }
|
jpayne@68
|
513 return list.size()>0 ? list : null;
|
jpayne@68
|
514 }
|
jpayne@68
|
515
|
jpayne@68
|
516 @Override
|
jpayne@68
|
517 public void run(){
|
jpayne@68
|
518 // System.err.println("Processing "+bf.name());
|
jpayne@68
|
519 final int fetchSize=1000;
|
jpayne@68
|
520 for(ArrayList<byte[]> list=fetch(fetchSize); list!=null; list=fetch(fetchSize)){
|
jpayne@68
|
521 for(byte[] line : list){
|
jpayne@68
|
522 if(line.length>0){
|
jpayne@68
|
523 linesProcessedT++;
|
jpayne@68
|
524 bytesProcessedT+=line.length;
|
jpayne@68
|
525
|
jpayne@68
|
526 // final boolean valid=(!Tools.startsWith(line, "accession\t")) & !skipParse;
|
jpayne@68
|
527 final boolean valid=(!Tools.startsWith(line, "accession")) & !skipParse;
|
jpayne@68
|
528 // assert(valid); //Not true if concatenated
|
jpayne@68
|
529
|
jpayne@68
|
530 // if(Tools.startsWith(line, "NZ_LM994619")){
|
jpayne@68
|
531 // boolean b=parseLine2(line, (byte)'\t');
|
jpayne@68
|
532 // assert(false) : b+", "+new String(line);
|
jpayne@68
|
533 // }
|
jpayne@68
|
534
|
jpayne@68
|
535 if(valid){
|
jpayne@68
|
536 boolean b=parseLine2(line, (byte)'\t');
|
jpayne@68
|
537 if(b){linesValidT++;}
|
jpayne@68
|
538 }
|
jpayne@68
|
539 }
|
jpayne@68
|
540 }
|
jpayne@68
|
541 }
|
jpayne@68
|
542
|
jpayne@68
|
543 // if(USE_MAPS){
|
jpayne@68
|
544 for(int i=0; i<mapsT.length; i++){
|
jpayne@68
|
545 if(mapsT[i].size()>0){
|
jpayne@68
|
546 synchronized(maps[i]){
|
jpayne@68
|
547 maps[i].putAll(mapsT[i]);
|
jpayne@68
|
548 }
|
jpayne@68
|
549 }
|
jpayne@68
|
550 mapsT[i]=null;
|
jpayne@68
|
551 }
|
jpayne@68
|
552 // }
|
jpayne@68
|
553 if(USE_TABLES){
|
jpayne@68
|
554 long temp=table.flush();
|
jpayne@68
|
555 }
|
jpayne@68
|
556
|
jpayne@68
|
557 success=true;
|
jpayne@68
|
558 }
|
jpayne@68
|
559
|
jpayne@68
|
560 // public boolean parseLineNumeric(final byte[] line, final byte delimiter){
|
jpayne@68
|
561 // int a=0, b=0;
|
jpayne@68
|
562 //
|
jpayne@68
|
563 // long accession=0;
|
jpayne@68
|
564 // final int ncbi, gi;
|
jpayne@68
|
565 //
|
jpayne@68
|
566 // while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
567 // assert(b>a) : "Missing field 0: "+new String(line);
|
jpayne@68
|
568 // for(int i=a; i<b; i++){
|
jpayne@68
|
569 // long c=line[i];
|
jpayne@68
|
570 // if(c=='.'){break;}
|
jpayne@68
|
571 // if(c<='9'){c=c-'0';}
|
jpayne@68
|
572 // else{c=c-'A'+10;}
|
jpayne@68
|
573 // accession=(accession*36)+c;
|
jpayne@68
|
574 // }
|
jpayne@68
|
575 // b++;
|
jpayne@68
|
576 // a=b;
|
jpayne@68
|
577 //
|
jpayne@68
|
578 // while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
579 // assert(b>a) : "Missing field 1: "+new String(line);
|
jpayne@68
|
580 // //accession2=new String(line, a, b-a);
|
jpayne@68
|
581 // b++;
|
jpayne@68
|
582 // a=b;
|
jpayne@68
|
583 //
|
jpayne@68
|
584 // while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
585 // assert(b>a) : "Missing field 2: "+new String(line);
|
jpayne@68
|
586 // ncbi=Parse.parseInt(line, a, b);
|
jpayne@68
|
587 // b++;
|
jpayne@68
|
588 // a=b;
|
jpayne@68
|
589 //
|
jpayne@68
|
590 //// while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
591 //// assert(b>a) : "Missing field 3: "+new String(line);
|
jpayne@68
|
592 ////// gi=Parse.parseInt(line, a, b);
|
jpayne@68
|
593 //// b++;
|
jpayne@68
|
594 //// a=b;
|
jpayne@68
|
595 //
|
jpayne@68
|
596 // if(ncbi<1){return false;}
|
jpayne@68
|
597 //
|
jpayne@68
|
598 // if(tree!=null){
|
jpayne@68
|
599 // if(ncbi>=tree.nodes.length){return false;}
|
jpayne@68
|
600 // TaxNode tn=tree.getNode(ncbi);
|
jpayne@68
|
601 // if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE || tn.level==TaxTree.DOMAIN){return false;}
|
jpayne@68
|
602 // if(tn.pid>=tree.nodes.length){return false;}
|
jpayne@68
|
603 // tn=tree.getNode(tn.pid);
|
jpayne@68
|
604 // if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE){return false;}
|
jpayne@68
|
605 // }
|
jpayne@68
|
606 // assert(accession>=0) : new String(line);
|
jpayne@68
|
607 // table.set(accession, ncbi);
|
jpayne@68
|
608 // return true;
|
jpayne@68
|
609 // }
|
jpayne@68
|
610
|
jpayne@68
|
611 //This code is no longer used and can be safely deleted.
|
jpayne@68
|
612 @Deprecated
|
jpayne@68
|
613 public boolean parseLine(final byte[] line, final byte delimiter){
|
jpayne@68
|
614 int a=0, b=0;
|
jpayne@68
|
615
|
jpayne@68
|
616 String accession;
|
jpayne@68
|
617 final int ncbi, gi;
|
jpayne@68
|
618
|
jpayne@68
|
619 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
620 assert(b>a) : "Missing field 0: "+new String(line);
|
jpayne@68
|
621 accession=new String(line, a, b-a);
|
jpayne@68
|
622 final int dot=accession.indexOf('.');//and :, but this is deprecated.
|
jpayne@68
|
623 if(dot>=0){//Should never happen
|
jpayne@68
|
624 // System.err.println(accession);
|
jpayne@68
|
625 // assert(dot==accession.length()-2) : accession;
|
jpayne@68
|
626 accession=accession.substring(0, dot);
|
jpayne@68
|
627 }
|
jpayne@68
|
628 // if(STRIP_UNDERSCORE){
|
jpayne@68
|
629 // accession=accession.replaceAll("[_-]", "");
|
jpayne@68
|
630 // }
|
jpayne@68
|
631 if(lengthCountsT!=null){lengthCountsT[b-a]++;}
|
jpayne@68
|
632 if(symbolCountsT!=null){
|
jpayne@68
|
633 for(int i=a; i<b; i++){symbolCountsT[line[i]]++;}
|
jpayne@68
|
634 }
|
jpayne@68
|
635 final int underscore=accession.indexOf('_');
|
jpayne@68
|
636 if(underscore>=0){
|
jpayne@68
|
637 if(counts_underscoreT!=null){counts_underscoreT[b-a]++;}
|
jpayne@68
|
638 if(counts_underscore2T!=null && underscore==2){counts_underscore2T[b-a]++;}
|
jpayne@68
|
639 }
|
jpayne@68
|
640 b++;
|
jpayne@68
|
641 a=b;
|
jpayne@68
|
642
|
jpayne@68
|
643 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
644 // assert(b>a) : "Missing field 1: "+new String(line);
|
jpayne@68
|
645 assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
|
jpayne@68
|
646 //accession2=new String(line, a, b-a);
|
jpayne@68
|
647 b++;
|
jpayne@68
|
648 a=b;
|
jpayne@68
|
649
|
jpayne@68
|
650 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
651 assert(b>a) : "Missing field 2: "+new String(line);
|
jpayne@68
|
652 ncbi=Parse.parseInt(line, a, b);
|
jpayne@68
|
653 b++;
|
jpayne@68
|
654 a=b;
|
jpayne@68
|
655
|
jpayne@68
|
656 // while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
657 // assert(b>a) : "Missing field 3: "+new String(line);
|
jpayne@68
|
658 //// gi=Parse.parseInt(line, a, b);
|
jpayne@68
|
659 // b++;
|
jpayne@68
|
660 // a=b;
|
jpayne@68
|
661
|
jpayne@68
|
662 if(ncbi<1){return false;}
|
jpayne@68
|
663
|
jpayne@68
|
664 if(tree!=null){
|
jpayne@68
|
665 if(ncbi>=tree.nodes.length){return false;}
|
jpayne@68
|
666 TaxNode tn=tree.getNode(ncbi);
|
jpayne@68
|
667 if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;}
|
jpayne@68
|
668 if(tn.pid>=tree.nodes.length){return false;}
|
jpayne@68
|
669 tn=tree.getNode(tn.pid);
|
jpayne@68
|
670 if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;}
|
jpayne@68
|
671 }
|
jpayne@68
|
672
|
jpayne@68
|
673 if(accession.length()<13 && USE_TABLES){
|
jpayne@68
|
674 long number=hash(accession);
|
jpayne@68
|
675 assert(number>=0) : new String(line);
|
jpayne@68
|
676 table.set(number, ncbi);
|
jpayne@68
|
677 return true;
|
jpayne@68
|
678 }
|
jpayne@68
|
679
|
jpayne@68
|
680 int way=accession.charAt(0);
|
jpayne@68
|
681 mapsT[way].put(accession, ncbi);
|
jpayne@68
|
682 // Integer old=mapsT[way].put(accession, ncbi);
|
jpayne@68
|
683 // assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi;
|
jpayne@68
|
684 // System.err.println("'"+accession+"': "+old+" -> "+ncbi);
|
jpayne@68
|
685 // assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi;
|
jpayne@68
|
686 return true;
|
jpayne@68
|
687 }
|
jpayne@68
|
688
|
jpayne@68
|
689 public boolean parseLine2(final byte[] line, final byte delimiter){
|
jpayne@68
|
690 int a=0, b=0;
|
jpayne@68
|
691
|
jpayne@68
|
692 final int ncbi, gi;
|
jpayne@68
|
693
|
jpayne@68
|
694 while(b<line.length && line[b]!=delimiter
|
jpayne@68
|
695 && line[b]!='.' && line[b]!=':' && line[b]!=','){b++;}//parse unique part of accession
|
jpayne@68
|
696 final int dot=b;
|
jpayne@68
|
697 assert(b>a) : "Missing field 0: "+new String(line);
|
jpayne@68
|
698 while(b<line.length && line[b]!=delimiter){b++;}//skip the rest of the accession
|
jpayne@68
|
699
|
jpayne@68
|
700 //System.err.println("Line: "+new String(line)+"\n"+Arrays.toString(line));
|
jpayne@68
|
701 //System.err.println("A: dot="+dot+", a="+a+", b="+b);
|
jpayne@68
|
702
|
jpayne@68
|
703 {//Optional block
|
jpayne@68
|
704 if(lengthCountsT!=null){lengthCountsT[dot]++;}
|
jpayne@68
|
705 if(symbolCountsT!=null){
|
jpayne@68
|
706 for(int i=0; i<dot; i++){symbolCountsT[line[i]]++;}
|
jpayne@68
|
707 }
|
jpayne@68
|
708 if(counts_underscoreT!=null || counts_underscore2T!=null){
|
jpayne@68
|
709 int underscore=-1;
|
jpayne@68
|
710 for(int i=0; i<dot; i++){
|
jpayne@68
|
711 if(line[i]=='_'){
|
jpayne@68
|
712 underscore=i;
|
jpayne@68
|
713 break;
|
jpayne@68
|
714 }
|
jpayne@68
|
715 }
|
jpayne@68
|
716 if(underscore>=0){
|
jpayne@68
|
717 if(counts_underscoreT!=null){counts_underscoreT[dot]++;}
|
jpayne@68
|
718 if(counts_underscore2T!=null && underscore==2){counts_underscore2T[dot]++;}
|
jpayne@68
|
719 }
|
jpayne@68
|
720 }
|
jpayne@68
|
721 }
|
jpayne@68
|
722 b++;
|
jpayne@68
|
723 a=b;
|
jpayne@68
|
724
|
jpayne@68
|
725 //System.err.println("B: a="+a+", b="+b);
|
jpayne@68
|
726
|
jpayne@68
|
727 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
728 // assert(b>a) : "Missing field 1: "+new String(line);
|
jpayne@68
|
729 assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
|
jpayne@68
|
730 //accession2=new String(line, a, b-a);
|
jpayne@68
|
731 b++;
|
jpayne@68
|
732 a=b;
|
jpayne@68
|
733
|
jpayne@68
|
734 //System.err.println("C: a="+a+", b="+b);
|
jpayne@68
|
735
|
jpayne@68
|
736 while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
737 assert(b>a) : "Missing field 2: "+new String(line);
|
jpayne@68
|
738 ncbi=Parse.parseInt(line, a, b);
|
jpayne@68
|
739 //System.err.println("D: a="+a+", b="+b+", ncbi="+ncbi+", '"+(new String(line, a, b-a))+"'");
|
jpayne@68
|
740 b++;
|
jpayne@68
|
741 a=b;
|
jpayne@68
|
742
|
jpayne@68
|
743 // while(b<line.length && line[b]!=delimiter){b++;}
|
jpayne@68
|
744 // assert(b>a) : "Missing field 3: "+new String(line);
|
jpayne@68
|
745 //// gi=Parse.parseInt(line, a, b);
|
jpayne@68
|
746 // b++;
|
jpayne@68
|
747 // a=b;
|
jpayne@68
|
748
|
jpayne@68
|
749 if(ncbi<1){return false;}
|
jpayne@68
|
750 //System.err.println("E: a="+a+", b="+b);
|
jpayne@68
|
751 if(skipHash){return false;}//123
|
jpayne@68
|
752 //System.err.println("F: a="+a+", b="+b);
|
jpayne@68
|
753
|
jpayne@68
|
754 if(tree!=null){
|
jpayne@68
|
755 if(ncbi>=tree.nodes.length){return false;}
|
jpayne@68
|
756 //System.err.println("G");
|
jpayne@68
|
757 TaxNode tn=tree.getNode(ncbi);
|
jpayne@68
|
758 if(tn==null || /*tn.levelExtended==TaxTree.NO_RANK_E ||*/ tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;}
|
jpayne@68
|
759 //System.err.println("H: "+tn);
|
jpayne@68
|
760 if(tn.pid>=tree.nodes.length){return false;}
|
jpayne@68
|
761 //System.err.println("I: "+tn);
|
jpayne@68
|
762 // TaxNode parent=tree.getNode(tn.pid);
|
jpayne@68
|
763 // System.err.println("J: "+tn);
|
jpayne@68
|
764 // if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;}
|
jpayne@68
|
765 // System.err.println("K");
|
jpayne@68
|
766 }
|
jpayne@68
|
767
|
jpayne@68
|
768 if(distributed){
|
jpayne@68
|
769 String accession=new String(line, 0, dot);//slow
|
jpayne@68
|
770 assert(accession.equals(accession.toUpperCase()));//TODO: Disable. (slow)
|
jpayne@68
|
771 if(accession.hashCode()%serverCount!=serverNum){return false;}
|
jpayne@68
|
772 }
|
jpayne@68
|
773
|
jpayne@68
|
774 if(USE_TABLES){
|
jpayne@68
|
775 if(AnalyzeAccession.codeMap!=null){
|
jpayne@68
|
776 // if(dot>AnalyzeAccession.longestPattern){return false;}
|
jpayne@68
|
777 final long number=AnalyzeAccession.digitize(line);
|
jpayne@68
|
778 if(number>=0){
|
jpayne@68
|
779 table.set(number, ncbi);
|
jpayne@68
|
780 return true;
|
jpayne@68
|
781 }
|
jpayne@68
|
782 assert(number==-1) : number+", "+new String(line);
|
jpayne@68
|
783 }else{
|
jpayne@68
|
784 if(dot<13){
|
jpayne@68
|
785 // long number=hash(accession);
|
jpayne@68
|
786 final long number=hash(line, dot);
|
jpayne@68
|
787 assert(number>=0) : new String(line);
|
jpayne@68
|
788 table.set(number, ncbi);
|
jpayne@68
|
789 return true;
|
jpayne@68
|
790 }
|
jpayne@68
|
791 }
|
jpayne@68
|
792 }
|
jpayne@68
|
793
|
jpayne@68
|
794 String accession=new String(line, 0, dot);
|
jpayne@68
|
795 int way=accession.charAt(0);
|
jpayne@68
|
796 mapsT[way].put(accession, ncbi);
|
jpayne@68
|
797 // Integer old=mapsT[way].put(accession, ncbi);
|
jpayne@68
|
798 // assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi;
|
jpayne@68
|
799 // System.err.println("'"+accession+"': "+old+" -> "+ncbi);
|
jpayne@68
|
800 // assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi;
|
jpayne@68
|
801 return true;
|
jpayne@68
|
802 }
|
jpayne@68
|
803
|
jpayne@68
|
804 private long linesProcessedT=0;
|
jpayne@68
|
805 private long linesValidT=0;
|
jpayne@68
|
806 private long bytesProcessedT=0;
|
jpayne@68
|
807
|
jpayne@68
|
808 final ByteFile bf;
|
jpayne@68
|
809 HashMap<String, Integer>[] mapsT;
|
jpayne@68
|
810 HashBuffer table;
|
jpayne@68
|
811 boolean success=false;
|
jpayne@68
|
812
|
jpayne@68
|
813 private long[] lengthCountsT=null;//new AtomicLongArray(20);
|
jpayne@68
|
814 private long[] symbolCountsT=null;//new AtomicLongArray(255);
|
jpayne@68
|
815 private long[] counts_underscoreT=null;//new AtomicLongArray(20);
|
jpayne@68
|
816 private long[] counts_underscore2T=null;//new AtomicLongArray(20);
|
jpayne@68
|
817 }
|
jpayne@68
|
818
|
jpayne@68
|
819 /*--------------------------------------------------------------*/
|
jpayne@68
|
820
|
jpayne@68
|
821
|
jpayne@68
|
822 /*--------------------------------------------------------------*/
|
jpayne@68
|
823
|
jpayne@68
|
824 private ArrayList<String> in=new ArrayList<String>();
|
jpayne@68
|
825 // private String out=null;
|
jpayne@68
|
826
|
jpayne@68
|
827 static int maxPigzProcesses=12;
|
jpayne@68
|
828
|
jpayne@68
|
829 /*--------------------------------------------------------------*/
|
jpayne@68
|
830
|
jpayne@68
|
831 private long linesProcessed=0;
|
jpayne@68
|
832 private long linesValid=0;
|
jpayne@68
|
833 private long bytesProcessed=0;
|
jpayne@68
|
834
|
jpayne@68
|
835 private AtomicLongArray lengthCounts=null;//new AtomicLongArray(20);
|
jpayne@68
|
836 private AtomicLongArray symbolCounts=null;//new AtomicLongArray(255);
|
jpayne@68
|
837 private AtomicLongArray counts_underscore=null;//new AtomicLongArray(20);
|
jpayne@68
|
838 private AtomicLongArray counts_underscore2=null;//new AtomicLongArray(20);
|
jpayne@68
|
839
|
jpayne@68
|
840 /*--------------------------------------------------------------*/
|
jpayne@68
|
841
|
jpayne@68
|
842 private final FileFormat ffin[];
|
jpayne@68
|
843 // private final FileFormat ffout;
|
jpayne@68
|
844
|
jpayne@68
|
845
|
jpayne@68
|
846 /*--------------------------------------------------------------*/
|
jpayne@68
|
847
|
jpayne@68
|
848 public static boolean LOADED(){return LOADED;}
|
jpayne@68
|
849
|
jpayne@68
|
850 private static boolean LOADED=false;
|
jpayne@68
|
851 private static HashMap<String, Integer>[] maps=null;
|
jpayne@68
|
852 private static KmerTableSet tables;
|
jpayne@68
|
853 public static TaxTree tree=null;
|
jpayne@68
|
854 // public static final boolean USE_MAPS=true;
|
jpayne@68
|
855 public static final boolean USE_TABLES=true;
|
jpayne@68
|
856 // public static boolean STRIP_UNDERSCORE=false;
|
jpayne@68
|
857 public static boolean skipParse=false;
|
jpayne@68
|
858 public static boolean skipHash=false;
|
jpayne@68
|
859 public static float prealloc=0;
|
jpayne@68
|
860 private static final long offset=-'A'+11;
|
jpayne@68
|
861 private static final long offsetLower=-'a'+11;
|
jpayne@68
|
862
|
jpayne@68
|
863 public static int serverNum=0;
|
jpayne@68
|
864 public static int serverCount=1;
|
jpayne@68
|
865 public static boolean distributed=false;
|
jpayne@68
|
866
|
jpayne@68
|
867 /*--------------------------------------------------------------*/
|
jpayne@68
|
868
|
jpayne@68
|
869 private PrintStream outstream=System.err;
|
jpayne@68
|
870 public static boolean verbose=false;
|
jpayne@68
|
871 public boolean errorState=false;
|
jpayne@68
|
872 private boolean overwrite=false;
|
jpayne@68
|
873
|
jpayne@68
|
874 }
|