jpayne@68
|
1 package tax;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.File;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5
|
jpayne@68
|
6 import fileIO.ByteFile;
|
jpayne@68
|
7 import fileIO.ReadWrite;
|
jpayne@68
|
8 import shared.Parse;
|
jpayne@68
|
9 import shared.Shared;
|
jpayne@68
|
10 import shared.Tools;
|
jpayne@68
|
11 import structures.IntList;
|
jpayne@68
|
12
|
jpayne@68
|
13 /**
|
jpayne@68
|
14 * @author Brian Bushnell
|
jpayne@68
|
15 * @date Mar 10, 2015
|
jpayne@68
|
16 *
|
jpayne@68
|
17 */
|
jpayne@68
|
18 public class GiToTaxid {
|
jpayne@68
|
19
|
jpayne@68
|
20 public static void main(String[] args){
|
jpayne@68
|
21 ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
22 ReadWrite.USE_PIGZ=true;
|
jpayne@68
|
23 ReadWrite.ZIPLEVEL=9;
|
jpayne@68
|
24 ReadWrite.PIGZ_BLOCKSIZE=256;
|
jpayne@68
|
25 // ReadWrite.PIGZ_ITERATIONS=30;
|
jpayne@68
|
26
|
jpayne@68
|
27 for(String arg : args){
|
jpayne@68
|
28 String[] split=arg.split("=");
|
jpayne@68
|
29 String a=split[0].toLowerCase();
|
jpayne@68
|
30 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
31 shared.Parser.parseZip(arg, a, b);
|
jpayne@68
|
32 }
|
jpayne@68
|
33 // if(args.length>2 && false){//Run a test
|
jpayne@68
|
34 // test(args);
|
jpayne@68
|
35 // }else
|
jpayne@68
|
36 if(args.length>=2){//Write array
|
jpayne@68
|
37 initialize(args[0]);
|
jpayne@68
|
38 ReadWrite.write(array, args[1], true);
|
jpayne@68
|
39 }
|
jpayne@68
|
40 }
|
jpayne@68
|
41
|
jpayne@68
|
42 public static void test(String[] args){
|
jpayne@68
|
43 System.err.println(getID(1000));
|
jpayne@68
|
44 System.err.println(getID(10000));
|
jpayne@68
|
45 System.err.println(getID(10001));
|
jpayne@68
|
46 System.err.println(getID(10002));
|
jpayne@68
|
47 System.err.println(getID(10003));
|
jpayne@68
|
48 System.err.println(getID(10004));
|
jpayne@68
|
49 System.err.println(getID(10005));
|
jpayne@68
|
50 System.err.println(getID(100000));
|
jpayne@68
|
51 System.err.println(getID(1000000));
|
jpayne@68
|
52 System.err.println(getID(10000000));
|
jpayne@68
|
53
|
jpayne@68
|
54 TaxTree tree=null;
|
jpayne@68
|
55 if(args.length>1){
|
jpayne@68
|
56 tree=TaxTree.loadTaxTree(args[0], System.err, true, true);
|
jpayne@68
|
57 }
|
jpayne@68
|
58
|
jpayne@68
|
59 System.err.println("Strings:");
|
jpayne@68
|
60 int x;
|
jpayne@68
|
61 x=getID("gi|18104025|emb|AJ427095.1| Ceratitis capitata centromeric or pericentromeric satellite DNA, clone 44");
|
jpayne@68
|
62 System.err.println(x);
|
jpayne@68
|
63 if(tree!=null){
|
jpayne@68
|
64 System.err.println(tree.getNode(x));
|
jpayne@68
|
65 tree.incrementRaw(x, 30);
|
jpayne@68
|
66 }
|
jpayne@68
|
67 x=getID("gi|15982920|gb|AY057568.1| Arabidopsis thaliana AT5g43500/MWF20_22 mRNA, complete cds");
|
jpayne@68
|
68 System.err.println(x);
|
jpayne@68
|
69 if(tree!=null){
|
jpayne@68
|
70 System.err.println(tree.getNode(x));
|
jpayne@68
|
71 tree.incrementRaw(x, 40);
|
jpayne@68
|
72 }
|
jpayne@68
|
73 x=getID("gi|481043749|gb|KC494054.1| Plesiochorus cymbiformis isolate ST05-58 internal transcribed spacer 2, partial sequence");
|
jpayne@68
|
74 System.err.println(x);
|
jpayne@68
|
75 if(tree!=null){
|
jpayne@68
|
76 System.err.println(tree.getNode(x));
|
jpayne@68
|
77 tree.incrementRaw(x, 20);
|
jpayne@68
|
78 }
|
jpayne@68
|
79
|
jpayne@68
|
80 if(tree!=null){
|
jpayne@68
|
81 tree.percolateUp();
|
jpayne@68
|
82 ArrayList<TaxNode> nodes=tree.gatherNodesAtLeastLimit(35);
|
jpayne@68
|
83 for(TaxNode n : nodes){
|
jpayne@68
|
84 System.err.println(n);
|
jpayne@68
|
85 }
|
jpayne@68
|
86 }
|
jpayne@68
|
87 }
|
jpayne@68
|
88
|
jpayne@68
|
89 public static int parseGiToTaxid(String s){return parseGiToTaxid(s, '|');}
|
jpayne@68
|
90 public static int parseGiToTaxid(String s, char delimiter){
|
jpayne@68
|
91 long x=parseGiNumber(s, delimiter);
|
jpayne@68
|
92 assert(x>=0) : x+", "+s;
|
jpayne@68
|
93 return getID(x);
|
jpayne@68
|
94 }
|
jpayne@68
|
95
|
jpayne@68
|
96
|
jpayne@68
|
97 public static int parseGiToTaxid(byte[] s){return parseGiToTaxid(s, '|');}
|
jpayne@68
|
98 public static int parseGiToTaxid(byte[] s, char delimiter){
|
jpayne@68
|
99 long x=parseGiNumber(s, delimiter);
|
jpayne@68
|
100 return x<0 ? -1 : getID(x);
|
jpayne@68
|
101 }
|
jpayne@68
|
102
|
jpayne@68
|
103 /** Parse a gi number, or return -1 if formatted incorrectly. */
|
jpayne@68
|
104 static long parseGiNumber(String s, char delimiter){
|
jpayne@68
|
105 if(s==null || s.length()<4){return -1;}
|
jpayne@68
|
106 if(s.charAt(0)=='>'){return getID(s.substring(1), delimiter);}
|
jpayne@68
|
107 if(!s.startsWith("gi")){return -1;}
|
jpayne@68
|
108 int initial=s.indexOf(delimiter);
|
jpayne@68
|
109 if(initial<0){
|
jpayne@68
|
110 if(delimiter!='~'){
|
jpayne@68
|
111 delimiter='~';
|
jpayne@68
|
112 initial=s.indexOf(delimiter);
|
jpayne@68
|
113 }
|
jpayne@68
|
114 if(initial<0){
|
jpayne@68
|
115 delimiter='_';
|
jpayne@68
|
116 initial=s.indexOf(delimiter);
|
jpayne@68
|
117 }
|
jpayne@68
|
118 if(initial<0){return -1;}
|
jpayne@68
|
119 }
|
jpayne@68
|
120 if(!Tools.isDigit(s.charAt(initial+1))){return -1;}
|
jpayne@68
|
121
|
jpayne@68
|
122 long number=0;
|
jpayne@68
|
123 for(int i=initial+1; i<s.length(); i++){
|
jpayne@68
|
124 char c=s.charAt(i);
|
jpayne@68
|
125 if(c==delimiter){break;}
|
jpayne@68
|
126 assert(Tools.isDigit(c));
|
jpayne@68
|
127 number=(number*10)+(c-'0');
|
jpayne@68
|
128 }
|
jpayne@68
|
129 return number;
|
jpayne@68
|
130 }
|
jpayne@68
|
131
|
jpayne@68
|
132 /** Parse a ncbi number, or return -1 if formatted incorrectly. */
|
jpayne@68
|
133 public static int parseTaxidNumber(String s, char delimiter){
|
jpayne@68
|
134 if(s==null || s.length()<5){return -1;}
|
jpayne@68
|
135 if(s.charAt(0)=='>'){return parseTaxidNumber(s.substring(1), delimiter);}
|
jpayne@68
|
136 if(!s.startsWith("ncbi") && !s.startsWith("tid")){return -1;}
|
jpayne@68
|
137 int initial=s.indexOf(delimiter);
|
jpayne@68
|
138 if(initial<0){
|
jpayne@68
|
139 delimiter='_';
|
jpayne@68
|
140 initial=s.indexOf(delimiter);
|
jpayne@68
|
141 if(initial<0){return -1;}
|
jpayne@68
|
142 }
|
jpayne@68
|
143 if(!Tools.isDigit(s.charAt(initial+1))){return -1;}
|
jpayne@68
|
144
|
jpayne@68
|
145 int number=0;
|
jpayne@68
|
146 for(int i=initial+1; i<s.length(); i++){
|
jpayne@68
|
147 char c=s.charAt(i);
|
jpayne@68
|
148 if(c==delimiter || c==' '){break;}
|
jpayne@68
|
149 assert(Tools.isDigit(c)) : c+"\n"+s;
|
jpayne@68
|
150 number=(number*10)+(c-'0');
|
jpayne@68
|
151 }
|
jpayne@68
|
152 return number;
|
jpayne@68
|
153 }
|
jpayne@68
|
154
|
jpayne@68
|
155
|
jpayne@68
|
156 public static int getID(String s){return getID(s, '|');}
|
jpayne@68
|
157 /** Get the taxID from a header starting with a taxID or gi number */
|
jpayne@68
|
158 public static int getID(String s, char delimiter){
|
jpayne@68
|
159 long x=parseTaxidNumber(s, delimiter);
|
jpayne@68
|
160 if(x>=0){return (int)x;}
|
jpayne@68
|
161 x=parseGiNumber(s, delimiter);
|
jpayne@68
|
162 return x<0 ? -1 : getID(x);
|
jpayne@68
|
163 }
|
jpayne@68
|
164
|
jpayne@68
|
165 /** Parse a gi number, or return -1 if formatted incorrectly. */
|
jpayne@68
|
166 static long parseGiNumber(byte[] s, char delimiter){
|
jpayne@68
|
167 if(s==null || s.length<4){return -1;}
|
jpayne@68
|
168 if(!Tools.startsWith(s, "gi") && !Tools.startsWith(s, ">gi")){return -1;}
|
jpayne@68
|
169 int initial=Tools.indexOf(s, (byte)delimiter);
|
jpayne@68
|
170 if(initial<0){
|
jpayne@68
|
171 delimiter='_';
|
jpayne@68
|
172 initial=Tools.indexOf(s, (byte)delimiter);
|
jpayne@68
|
173 if(initial<0){return -1;}
|
jpayne@68
|
174 }
|
jpayne@68
|
175 if(!Tools.isDigit(s[initial+1])){return -1;}
|
jpayne@68
|
176
|
jpayne@68
|
177 long number=0;
|
jpayne@68
|
178 for(int i=initial+1; i<s.length; i++){
|
jpayne@68
|
179 byte c=s[i];
|
jpayne@68
|
180 if(c==delimiter){break;}
|
jpayne@68
|
181 assert(Tools.isDigit(c));
|
jpayne@68
|
182 number=(number*10)+(c-'0');
|
jpayne@68
|
183 }
|
jpayne@68
|
184 return number;
|
jpayne@68
|
185 }
|
jpayne@68
|
186
|
jpayne@68
|
187 /** Parse a gi number, or return -1 if formatted incorrectly. */
|
jpayne@68
|
188 static int parseNcbiNumber(byte[] s, char delimiter){
|
jpayne@68
|
189 if(s==null || s.length<3){return -1;}
|
jpayne@68
|
190 if(!Tools.startsWith(s, "ncbi") && !Tools.startsWith(s, ">ncbi") && !Tools.startsWith(s, "tid") && !Tools.startsWith(s, ">tid")){return -1;}
|
jpayne@68
|
191 int initial=Tools.indexOf(s, (byte)delimiter);
|
jpayne@68
|
192 if(initial<0){
|
jpayne@68
|
193 delimiter='_';
|
jpayne@68
|
194 initial=Tools.indexOf(s, (byte)delimiter);
|
jpayne@68
|
195 if(initial<0){return -1;}
|
jpayne@68
|
196 }
|
jpayne@68
|
197 if(!Tools.isDigit(s[initial+1])){return -1;}
|
jpayne@68
|
198
|
jpayne@68
|
199 int number=0;
|
jpayne@68
|
200 for(int i=initial+1; i<s.length; i++){
|
jpayne@68
|
201 byte c=s[i];
|
jpayne@68
|
202 if(c==delimiter){break;}
|
jpayne@68
|
203 assert(Tools.isDigit(c));
|
jpayne@68
|
204 number=(number*10)+(c-'0');
|
jpayne@68
|
205 }
|
jpayne@68
|
206 return number;
|
jpayne@68
|
207 }
|
jpayne@68
|
208
|
jpayne@68
|
209 public static int getID(byte[] s){return getID(s, '|');}
|
jpayne@68
|
210 /** Get the taxID from a header starting with a taxID or gi number */
|
jpayne@68
|
211 public static int getID(byte[] s, char delimiter){
|
jpayne@68
|
212 long x=parseGiNumber(s, delimiter);
|
jpayne@68
|
213 if(x>=0){return getID(x, true);}
|
jpayne@68
|
214 return parseNcbiNumber(s, delimiter);
|
jpayne@68
|
215 }
|
jpayne@68
|
216
|
jpayne@68
|
217 /** Get the taxID from a gi number;
|
jpayne@68
|
218 * -1 if not present or invalid (negative input),
|
jpayne@68
|
219 * -2 if out of range (too high) */
|
jpayne@68
|
220 public static int getID(long gi){
|
jpayne@68
|
221 return getID(gi, true);
|
jpayne@68
|
222 }
|
jpayne@68
|
223
|
jpayne@68
|
224 /** Get the taxID from a gi number;
|
jpayne@68
|
225 * 0 if not present,
|
jpayne@68
|
226 * -1 if invalid (negative input),
|
jpayne@68
|
227 * -2 if out of range (too high) */
|
jpayne@68
|
228 public static int getID(long gi, boolean assertInRange){
|
jpayne@68
|
229 assert(initialized) : "To use gi numbers, you must load a gi table.";
|
jpayne@68
|
230 if(gi<0 || gi>maxGiLoaded){
|
jpayne@68
|
231 assert(!assertInRange) : gi<0 ? "gi number "+gi+" is invalid." :
|
jpayne@68
|
232 "The gi number "+gi+" is too big: Max loaded gi number is "+maxGiLoaded+".\n"
|
jpayne@68
|
233 + "Please update the gi table with the latest version from NCBI"
|
jpayne@68
|
234 + " as per the instructions in gitable.sh.\n"
|
jpayne@68
|
235 + "To ignore this problem, please run with the -da flag.\n";
|
jpayne@68
|
236 return gi<0 ? -1 : -2;
|
jpayne@68
|
237 }
|
jpayne@68
|
238 final long upper=gi>>>SHIFT;
|
jpayne@68
|
239 final int lower=(int)(gi&LOWERMASK);
|
jpayne@68
|
240 assert(upper<Shared.MAX_ARRAY_LEN && upper<array.length) : gi+", "+upper+", "+array.length;
|
jpayne@68
|
241 final int[] slice=array[(int)upper];
|
jpayne@68
|
242 return slice==null || slice.length<=lower ? 0 : slice[lower];
|
jpayne@68
|
243 }
|
jpayne@68
|
244
|
jpayne@68
|
245 public static void initialize(String fname){
|
jpayne@68
|
246 assert(fname!=null);
|
jpayne@68
|
247 if(fileString==null || !fileString.equals(fname)){
|
jpayne@68
|
248 synchronized(GiToTaxid.class){
|
jpayne@68
|
249 if(!initialized || fileString==null || !fileString.equals(fname)){
|
jpayne@68
|
250 fileString=fname;
|
jpayne@68
|
251 if(fname.contains(".int2d")){
|
jpayne@68
|
252 array=ReadWrite.read(int[][].class, fname, true);
|
jpayne@68
|
253 maxGiLoaded=-1;
|
jpayne@68
|
254 if(array!=null && array.length>0){
|
jpayne@68
|
255 int upper=array.length-1;
|
jpayne@68
|
256 int[] section=array[upper];
|
jpayne@68
|
257 int lower=section.length-1;
|
jpayne@68
|
258 maxGiLoaded=(((long)upper)<<SHIFT)|lower;
|
jpayne@68
|
259 }
|
jpayne@68
|
260 }else if(fname.contains(".int1d")){
|
jpayne@68
|
261 throw new RuntimeException("Old gi table format filename "+fname+".\n"
|
jpayne@68
|
262 + "Current files should end in .int2d.");
|
jpayne@68
|
263
|
jpayne@68
|
264 }else{
|
jpayne@68
|
265 array=makeArray(fname);
|
jpayne@68
|
266 }
|
jpayne@68
|
267 }
|
jpayne@68
|
268 initialized=true;
|
jpayne@68
|
269 }
|
jpayne@68
|
270 }
|
jpayne@68
|
271 }
|
jpayne@68
|
272
|
jpayne@68
|
273 public static boolean isInitialized(){return initialized;}
|
jpayne@68
|
274
|
jpayne@68
|
275 public static synchronized void unload(){
|
jpayne@68
|
276 maxGiLoaded=-1;
|
jpayne@68
|
277 array=null;
|
jpayne@68
|
278 fileString=null;
|
jpayne@68
|
279 initialized=false;
|
jpayne@68
|
280 }
|
jpayne@68
|
281
|
jpayne@68
|
282 private static int[][] makeArray(String fnames){
|
jpayne@68
|
283 String[] split;
|
jpayne@68
|
284 if(new File(fnames).exists()){split=new String[] {fnames};}
|
jpayne@68
|
285 else if(fnames.indexOf(',')>=0){split=fnames.split(",");}
|
jpayne@68
|
286 else if(fnames.indexOf('#')>=0){
|
jpayne@68
|
287 assert(fnames.indexOf("/")<0) : "Note: Wildcard # only works for "
|
jpayne@68
|
288 + "relative paths in present working directory.";
|
jpayne@68
|
289 File dir=new File(System.getProperty("user.dir"));
|
jpayne@68
|
290 String prefix=fnames.substring(0, fnames.indexOf('#'));
|
jpayne@68
|
291 String suffix=fnames.substring(fnames.indexOf('#')+1);
|
jpayne@68
|
292
|
jpayne@68
|
293 File[] array=dir.listFiles();
|
jpayne@68
|
294 StringBuilder sb=new StringBuilder();
|
jpayne@68
|
295 String comma="";
|
jpayne@68
|
296 for(File f : array){
|
jpayne@68
|
297 String s=f.getName();
|
jpayne@68
|
298 if(s.startsWith(prefix) && s.startsWith(suffix)){
|
jpayne@68
|
299 sb.append(comma);
|
jpayne@68
|
300 sb.append(s);
|
jpayne@68
|
301 comma=",";
|
jpayne@68
|
302 }
|
jpayne@68
|
303 }
|
jpayne@68
|
304 split=sb.toString().split(",");
|
jpayne@68
|
305 }else{
|
jpayne@68
|
306 throw new RuntimeException("Invalid file: "+fnames);
|
jpayne@68
|
307 }
|
jpayne@68
|
308
|
jpayne@68
|
309 int numLists=32;
|
jpayne@68
|
310 IntList[] lists=new IntList[numLists];
|
jpayne@68
|
311
|
jpayne@68
|
312 long total=0;
|
jpayne@68
|
313 for(String s : split){
|
jpayne@68
|
314 long count=addToList(s, lists);
|
jpayne@68
|
315 total+=count;
|
jpayne@68
|
316 }
|
jpayne@68
|
317 for(int i=0; i<lists.length; i++){
|
jpayne@68
|
318 if(lists[i]!=null && lists[i].size>0){
|
jpayne@68
|
319 lists[i].shrink();
|
jpayne@68
|
320 numLists=i+1;
|
jpayne@68
|
321 }
|
jpayne@68
|
322 }
|
jpayne@68
|
323 int[][] table=new int[numLists][];
|
jpayne@68
|
324 for(int i=0; i<numLists; i++){
|
jpayne@68
|
325 table[i]=lists[i].array;
|
jpayne@68
|
326 }
|
jpayne@68
|
327 return table;
|
jpayne@68
|
328 }
|
jpayne@68
|
329
|
jpayne@68
|
330 private static long addToList(String fname, IntList[] lists){
|
jpayne@68
|
331 boolean warned=false;
|
jpayne@68
|
332 ByteFile bf=ByteFile.makeByteFile(fname, true);
|
jpayne@68
|
333 long count=0, invalid=0;
|
jpayne@68
|
334 byte[] line=bf.nextLine();
|
jpayne@68
|
335 while(line!=null){
|
jpayne@68
|
336 if(line.length>0 && Tools.isDigit(line[line.length-1])){//Invalid lines will end with tab or na
|
jpayne@68
|
337 count++;
|
jpayne@68
|
338 int tab2=Tools.indexOfNth(line, '\t', 2);
|
jpayne@68
|
339 int tab3=Tools.indexOfNth(line, '\t', 1, tab2+1);
|
jpayne@68
|
340 assert(tab2>0 && (tab2<tab3) && tab3<line.length) : tab2+", "+tab3+", "+line.length;
|
jpayne@68
|
341 assert(tab2<line.length && line[tab2]=='\t') : tab2+", "+tab3+", '"+new String(line)+"'";
|
jpayne@68
|
342 assert(tab3<line.length && line[tab3]=='\t') : tab2+", "+tab3+", '"+new String(line)+"'";
|
jpayne@68
|
343 //assert(false) : tab2+", "+tab3+", '"+new String(line)+"'";
|
jpayne@68
|
344 int tid=Parse.parseInt(line, tab2+1, tab3);
|
jpayne@68
|
345 int gi=Parse.parseInt(line, tab3+1, line.length);
|
jpayne@68
|
346 if(gi<0){
|
jpayne@68
|
347 invalid++;
|
jpayne@68
|
348 }else{
|
jpayne@68
|
349 assert(gi>=0) : "tid="+tid+", gi="+gi+", line=\n'"+new String(line)+"'";
|
jpayne@68
|
350 int old=setID(gi, tid, lists);
|
jpayne@68
|
351 assert(old<1 || old==tid) : "Contradictory entries for gi "+gi+": "+old+" -> "+tid+"\n'"+new String(line)+"'\ntab2="+tab2+", tab3="+tab3;
|
jpayne@68
|
352 }
|
jpayne@68
|
353 }else{
|
jpayne@68
|
354 //if(line.length==0){System.err.println(fname+", "+count);}//debug
|
jpayne@68
|
355 invalid++;
|
jpayne@68
|
356 }
|
jpayne@68
|
357 line=bf.nextLine();
|
jpayne@68
|
358 }
|
jpayne@68
|
359 if(verbose){System.err.println("Count: "+count+"; \tInvalid: "+invalid);}
|
jpayne@68
|
360 bf.close();
|
jpayne@68
|
361 return count;
|
jpayne@68
|
362 }
|
jpayne@68
|
363
|
jpayne@68
|
364 private static int getID(long gi, IntList[] lists){
|
jpayne@68
|
365 assert(gi>=0) : "gi number "+gi+" is invalid.";
|
jpayne@68
|
366 final long upper=gi>>>SHIFT;
|
jpayne@68
|
367 final int lower=(int)(gi&LOWERMASK);
|
jpayne@68
|
368 assert(upper<Shared.MAX_ARRAY_LEN) : gi+", "+upper;
|
jpayne@68
|
369 IntList list=lists[(int)upper];
|
jpayne@68
|
370 return lower<0 ? -1 : lower>=list.size ? -2 : list.get(lower);
|
jpayne@68
|
371 }
|
jpayne@68
|
372
|
jpayne@68
|
373 private static int setID(long gi, int tid, IntList[] lists){
|
jpayne@68
|
374 assert(gi>=0) : "gi number "+gi+" is invalid.";
|
jpayne@68
|
375 final long upper=gi>>>SHIFT;
|
jpayne@68
|
376 final int lower=(int)(gi&LOWERMASK);
|
jpayne@68
|
377 assert(upper<Shared.MAX_ARRAY_LEN) : gi+", "+upper;
|
jpayne@68
|
378 IntList list=lists[(int)upper];
|
jpayne@68
|
379 if(list==null){list=lists[(int)upper]=new IntList();}
|
jpayne@68
|
380 int old=lower<0 ? -1 : lower>=list.size ? -2 : list.get(lower);
|
jpayne@68
|
381 list.set(lower, tid);
|
jpayne@68
|
382 maxGiLoaded=Tools.max(gi, maxGiLoaded);
|
jpayne@68
|
383 return old;
|
jpayne@68
|
384 }
|
jpayne@68
|
385
|
jpayne@68
|
386 // private static int[] makeArrayOld(String fnames){
|
jpayne@68
|
387 // String[] split;
|
jpayne@68
|
388 // if(new File(fnames).exists()){split=new String[] {fnames};}
|
jpayne@68
|
389 // else{split=fnames.split(",");}
|
jpayne@68
|
390 //
|
jpayne@68
|
391 // long max=0;
|
jpayne@68
|
392 // for(String s : split){
|
jpayne@68
|
393 // max=Tools.max(max, findMaxID(s));
|
jpayne@68
|
394 // }
|
jpayne@68
|
395 //
|
jpayne@68
|
396 // assert(max<Integer.MAX_VALUE) : "Overflow.";
|
jpayne@68
|
397 // int[] x=new int[(int)max+1];
|
jpayne@68
|
398 // Arrays.fill(x, -1);
|
jpayne@68
|
399 //
|
jpayne@68
|
400 // long total=0;
|
jpayne@68
|
401 // for(String s : split){
|
jpayne@68
|
402 // long count=fillArray(s, x);
|
jpayne@68
|
403 // total+=count;
|
jpayne@68
|
404 // }
|
jpayne@68
|
405 // return x;
|
jpayne@68
|
406 // }
|
jpayne@68
|
407 //
|
jpayne@68
|
408 // private static long findMaxID(String fname){
|
jpayne@68
|
409 // ByteFile bf=ByteFile.makeByteFile(fname, true);
|
jpayne@68
|
410 // long count=0, max=0;
|
jpayne@68
|
411 // byte[] line=bf.nextLine();
|
jpayne@68
|
412 // while(line!=null){
|
jpayne@68
|
413 // count++;
|
jpayne@68
|
414 // int tab=Tools.indexOf(line, (byte)'\t');
|
jpayne@68
|
415 // long gi=Parse.parseLong(line, 0, tab);
|
jpayne@68
|
416 // max=Tools.max(max, gi);
|
jpayne@68
|
417 // line=bf.nextLine();
|
jpayne@68
|
418 // }
|
jpayne@68
|
419 // bf.close();
|
jpayne@68
|
420 // return max;
|
jpayne@68
|
421 // }
|
jpayne@68
|
422 //
|
jpayne@68
|
423 // private static long fillArray(String fname, int[] x){
|
jpayne@68
|
424 // boolean warned=false;
|
jpayne@68
|
425 // ByteFile bf=ByteFile.makeByteFile(fname, true);
|
jpayne@68
|
426 // long count=0;
|
jpayne@68
|
427 // byte[] line=bf.nextLine();
|
jpayne@68
|
428 // while(line!=null){
|
jpayne@68
|
429 // count++;
|
jpayne@68
|
430 // int tab=Tools.indexOf(line, (byte)'\t');
|
jpayne@68
|
431 // int gi=Parse.parseInt(line, 0, tab);
|
jpayne@68
|
432 // int ncbi=Parse.parseInt(line, tab+1, line.length);
|
jpayne@68
|
433 // //assert(x[gi]==-1 || x[gi]==ncbi) : "Contradictory entries for gi "+gi+": "+x[gi]+" -> "+ncbi;
|
jpayne@68
|
434 // if(x[gi]!=-1 && x[gi]!=ncbi){
|
jpayne@68
|
435 // if(!warned){
|
jpayne@68
|
436 // System.err.println("***WARNING*** For file "+fname+":\n"+
|
jpayne@68
|
437 // ("Contradictory entries for gi "+gi+": mapped to both taxID "+x[gi]+" and taxID "+ncbi)+
|
jpayne@68
|
438 // "\nThis may be an error from NCBI and you may wish to report it, but it is\n"
|
jpayne@68
|
439 // + "being suppressed because NCBI data is known to contain multi-mapped gi numbers,\n"
|
jpayne@68
|
440 // + "at least between nucleotide and protein, and gi numbers are deprecated anyway.");
|
jpayne@68
|
441 // warned=true;
|
jpayne@68
|
442 // }
|
jpayne@68
|
443 // }else{
|
jpayne@68
|
444 // x[gi]=ncbi;
|
jpayne@68
|
445 // }
|
jpayne@68
|
446 // line=bf.nextLine();
|
jpayne@68
|
447 // }
|
jpayne@68
|
448 // if(verbose){System.err.println("Count: "+count);}
|
jpayne@68
|
449 // bf.close();
|
jpayne@68
|
450 // return count;
|
jpayne@68
|
451 // }
|
jpayne@68
|
452
|
jpayne@68
|
453 private static long maxGiLoaded=-1;
|
jpayne@68
|
454 private static int[][] array;
|
jpayne@68
|
455 private static final int SHIFT=30;
|
jpayne@68
|
456 private static final long UPPERMASK=(-1L)<<SHIFT;
|
jpayne@68
|
457 private static final long LOWERMASK=~UPPERMASK;
|
jpayne@68
|
458
|
jpayne@68
|
459 private static String fileString;
|
jpayne@68
|
460
|
jpayne@68
|
461 public static boolean verbose=false;
|
jpayne@68
|
462 private static boolean initialized=false;
|
jpayne@68
|
463 }
|