Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/shared/Parse.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 package shared; | |
2 | |
3 import structures.ByteBuilder; | |
4 import structures.LongList; | |
5 | |
6 public class Parse { | |
7 | |
8 | |
9 public static int parseIntKMG(String b){ | |
10 long x=parseKMG(b); | |
11 assert(x<=Integer.MAX_VALUE && x>Integer.MIN_VALUE) : "Value "+x+" is out of range for integers: "+b; | |
12 return (int)x; | |
13 } | |
14 | |
15 public static long parseKMG(String b){ | |
16 if(b==null){return 0;} | |
17 assert(b.length()>0); | |
18 final char c=Tools.toLowerCase(b.charAt(b.length()-1)); | |
19 final boolean dot=b.indexOf('.')>=0; | |
20 if(!dot && !Tools.isLetter(c)){return Long.parseLong(b);} | |
21 // if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);} | |
22 | |
23 if(b.equalsIgnoreCase("big") || b.equalsIgnoreCase("inf") || b.equalsIgnoreCase("infinity") || b.equalsIgnoreCase("max") || b.equalsIgnoreCase("huge")){ | |
24 return Long.MAX_VALUE; | |
25 } | |
26 | |
27 long mult=1; | |
28 if(Tools.isLetter(c)){ | |
29 if(c=='k'){mult=1000;} | |
30 else if(c=='m'){mult=1000000;} | |
31 else if(c=='g' || c=='b'){mult=1000000000;} | |
32 else if(c=='t'){mult=1000000000000L;} | |
33 else if(c=='p' || c=='q'){mult=1000000000000000L;} | |
34 else if(c=='e'){mult=1000000000000000000L;} | |
35 // else if(c=='z'){mult=1000000000000000000000L;}//Out of range | |
36 else if(c=='c' || c=='h'){mult=100;} | |
37 else if(c=='d'){mult=10;} | |
38 else{throw new RuntimeException(b);} | |
39 b=b.substring(0, b.length()-1); | |
40 } | |
41 | |
42 //Calculate product, check for overflow, and return | |
43 if(!dot){ | |
44 long m=Long.parseLong(b); | |
45 long p=m*mult; | |
46 assert(p>=m) : p+", "+m+", "+b; | |
47 return p; | |
48 }else{ | |
49 double m=Double.parseDouble(b); | |
50 long p=(long)(m*mult); | |
51 assert(p>=m) : p+", "+m+", "+b; | |
52 return p; | |
53 } | |
54 } | |
55 | |
56 public static long parseKMGBinary(String b){ | |
57 if(b==null){return 0;} | |
58 char c=Tools.toLowerCase(b.charAt(b.length()-1)); | |
59 boolean dot=b.indexOf('.')>=0; | |
60 if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);} | |
61 | |
62 long mult=1; | |
63 if(Tools.isLetter(c)){ | |
64 if(c=='k'){mult=1024;} | |
65 else if(c=='m'){mult=1024*1024;} | |
66 else if(c=='g' || c=='b'){mult=1024*1024*1024;} | |
67 else if(c=='t'){mult=1024L*1024L*1024L*1024L;} | |
68 else{throw new RuntimeException(b);} | |
69 b=b.substring(0, b.length()-1); | |
70 } | |
71 | |
72 if(!dot){return Long.parseLong(b)*mult;} | |
73 | |
74 return (long)(Double.parseDouble(b)*mult); | |
75 } | |
76 | |
77 public static boolean isNumber(String s){ | |
78 if(s==null || s.length()==0){return false;} | |
79 char c=s.charAt(0); | |
80 return Tools.isDigit(c) || c=='.' || c=='-'; | |
81 } | |
82 | |
83 /** | |
84 * Parse this argument. More liberal than Boolean.parseBoolean. | |
85 * Null, t, true, or 1 all yield true. | |
86 * Everything else, including the String "null", is false. | |
87 * @param s Argument to parse | |
88 * @return boolean form | |
89 */ | |
90 public static boolean parseBoolean(String s){ | |
91 if(s==null || s.length()<1){return true;} | |
92 if(s.length()==1){ | |
93 char c=Tools.toLowerCase(s.charAt(0)); | |
94 return c=='t' || c=='1'; | |
95 } | |
96 if(s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return false;} | |
97 return Boolean.parseBoolean(s); | |
98 } | |
99 | |
100 public static boolean parseYesNo(String s){ | |
101 if(s==null || s.length()<1){return true;} | |
102 if(s.length()==1){ | |
103 char c=Tools.toLowerCase(s.charAt(0)); | |
104 if(c=='y'){return true;} | |
105 if(c=='n'){return false;} | |
106 throw new RuntimeException(s); | |
107 } | |
108 | |
109 if(s.equalsIgnoreCase("yes")){return true;} | |
110 if(s.equalsIgnoreCase("no")){return false;} | |
111 if(s.equalsIgnoreCase("unknown")){return false;} //Special case for IMG database | |
112 | |
113 throw new RuntimeException(s); | |
114 } | |
115 | |
116 public static int[] parseIntArray(String s, String regex){ | |
117 if(s==null){return null;} | |
118 String[] split=s.split(regex); | |
119 int[] array=new int[split.length]; | |
120 for(int i=0; i<split.length; i++){ | |
121 array[i]=Integer.parseInt(split[i]); | |
122 } | |
123 return array; | |
124 } | |
125 | |
126 public static byte[] parseByteArray(String s, String regex){ | |
127 if(s==null){return null;} | |
128 String[] split=s.split(regex); | |
129 byte[] array=new byte[split.length]; | |
130 for(int i=0; i<split.length; i++){ | |
131 array[i]=Byte.parseByte(split[i]); | |
132 } | |
133 return array; | |
134 } | |
135 | |
136 public static int parseIntHexDecOctBin(final String s){ | |
137 if(s==null || s.length()<1){return 0;} | |
138 int radix=10; | |
139 if(s.length()>1 && s.charAt(1)=='0'){ | |
140 final char c=s.charAt(1); | |
141 if(c=='x' || c=='X'){radix=16;} | |
142 else if(c=='b' || c=='B'){radix=2;} | |
143 else if(c=='o' || c=='O'){radix=8;} | |
144 } | |
145 return Integer.parseInt(s, radix); | |
146 } | |
147 | |
148 /** | |
149 * @param array Text | |
150 * @param a Index of first digit | |
151 * @param b Index after last digit (e.g., array.length) | |
152 * @return Parsed number | |
153 */ | |
154 public static float parseFloat(byte[] array, int a, int b){ | |
155 return (float)parseDouble(array, a, b); | |
156 } | |
157 | |
158 /** | |
159 * @param array Text | |
160 * @param a Index of first digit | |
161 * @param b Index after last digit (e.g., array.length) | |
162 * @return Parsed number | |
163 */ | |
164 public static double parseDoubleSlow(byte[] array, int a, int b){ | |
165 String s=new String(array, a, b-a); | |
166 return Double.parseDouble(s); | |
167 } | |
168 | |
169 public static double parseDouble(final byte[] array, final int start){ | |
170 return parseDouble(array, start, array.length); | |
171 } | |
172 | |
173 /** | |
174 * @param array Text | |
175 * @param a0 Index of first digit | |
176 * @param b Index after last digit (e.g., array.length) | |
177 * @return Parsed number | |
178 */ | |
179 public static double parseDouble(final byte[] array, final int a0, final int b){ | |
180 if(Tools.FORCE_JAVA_PARSE_DOUBLE){ | |
181 return Double.parseDouble(new String(array, a0, b-a0)); | |
182 } | |
183 int a=a0; | |
184 assert(b>a); | |
185 long upper=0; | |
186 final byte z='0'; | |
187 long mult=1; | |
188 if(array[a]=='-'){mult=-1; a++;} | |
189 | |
190 for(; a<b; a++){ | |
191 final byte c=array[a]; | |
192 if(c=='.'){break;} | |
193 final int x=(c-z); | |
194 assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b; | |
195 upper=(upper*10)+x; | |
196 } | |
197 | |
198 long lower=0; | |
199 int places=0; | |
200 for(a++; a<b; a++){ | |
201 final byte c=array[a]; | |
202 final int x=(c-z); | |
203 assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b+ | |
204 "\nThis function does not support exponents; if the input has an exponent, add the flag 'forceJavaParseDouble'."; | |
205 lower=(lower*10)+x; | |
206 places++; | |
207 } | |
208 | |
209 double d=mult*(upper+lower*ByteBuilder.decimalInvMult[places]); | |
210 // assert(d==parseDoubleSlow(array, a0, b)) : d+", "+parseDoubleSlow(array, a0, b); | |
211 return d; | |
212 } | |
213 | |
214 public static int parseInt(byte[] array, int start){ | |
215 return parseInt(array, start, array.length); | |
216 } | |
217 | |
218 // /** | |
219 // * @param array Text | |
220 // * @param a Index of first digit | |
221 // * @param b Index after last digit (e.g., array.length) | |
222 // * @return Parsed number | |
223 // */ | |
224 // public static int parseInt(byte[] array, int a, int b){ | |
225 // assert(b>a); | |
226 // int r=0; | |
227 // final byte z='0'; | |
228 // int mult=1; | |
229 // if(array[a]=='-'){mult=-1; a++;} | |
230 // for(; a<b; a++){ | |
231 // int x=(array[a]-z); | |
232 // assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b; | |
233 // r=(r*10)+x; | |
234 // } | |
235 // return r*mult; | |
236 // } | |
237 | |
238 /** | |
239 * Returns the int representation of a number represented in ASCII text, from position a to b. | |
240 * This function is much faster than creating a substring and calling Integer.parseInt() | |
241 * Throws Assertions rather than Exceptions for invalid input. | |
242 * This function does NOT detect overflows, e.g., values over 2^31-1 (Integer.MAX_VALUE). | |
243 * This function has no side-effects. | |
244 * @param array byte array containing the text to parse. | |
245 * @param a Index of the first digit of the number. | |
246 * @param b Index after the last digit (e.g., array.length). | |
247 * @return int representation of the parsed number. | |
248 * @throws Assertions rather than Exceptions for invalid input. | |
249 * | |
250 * @TODO Correctly represent Integer.MIN_VALUE | |
251 * @TODO Detect overflow. | |
252 */ | |
253 public static int parseInt(byte[] array, int a, int b){ | |
254 assert(b>a) : "The start position of the text to parse must come before the stop position: "+ | |
255 a+","+b+","+new String(array); | |
256 int r=0; //Initialize the return value to 0. | |
257 | |
258 //z holds the ASCII code for 0, which is subtracted from other ASCII codes | |
259 //to yield the int value of a character. For example, '7'-'0'=7, | |
260 //because ASCII '7'=55, while ASCII '0'=48, and 55-48=7. | |
261 final byte z='0'; | |
262 | |
263 //mult is 1 for positive numbers, or -1 for negative numbers. | |
264 //It will be multiplied by the unsigned result to yield the final signed result. | |
265 int mult=1; | |
266 | |
267 //If the term starts with a minus sign, set the multiplier to -1 and increment the position. | |
268 if(array[a]=='-'){mult=-1; a++;} | |
269 | |
270 //Iterate through every position, incrementing a, up to b (exclusive). | |
271 for(; a<b; a++){ | |
272 //x is the numeric value of the character at position a. | |
273 //In other words, if array[a]='7', | |
274 //x would be 7, not the ASCII code for '7' (which is 55). | |
275 int x=(array[a]-z); | |
276 | |
277 //Assert that x is in the range of 0-9; otherwise, the character was not a digit. | |
278 //The ASCII code will be printed here because in some cases the character could be | |
279 //a control character (like carriage return or vertical tab or bell) which is unprintable. | |
280 //But if possible the character will be printed to, as well as the position, | |
281 //and the entire String from which the number is to be parsed. | |
282 assert(x<10 && x>=0) : "Non-digit character with ASCII code "+(int)array[a]+" was encountered.\n" | |
283 +"x="+x+"; char="+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b; | |
284 | |
285 //Multiply the old value by 10, then add the new 1's digit. | |
286 //This is because the text is assumed to be base-10, | |
287 //so each subsequent character will represent 1/10th the significance of the previous character. | |
288 r=(r*10)+x; | |
289 } | |
290 | |
291 //Change the unsigned value into a signed result, and return it. | |
292 return r*mult; | |
293 } | |
294 | |
295 /** | |
296 * @param array Text | |
297 * @param a Index of first digit | |
298 * @param b Index after last digit (e.g., array.length) | |
299 * @return Parsed number | |
300 */ | |
301 public static int parseInt(String array, int a, int b){ | |
302 // assert(false) : Character.toString(array.charAt(a)); | |
303 assert(b>a); | |
304 int r=0; | |
305 final byte z='0'; | |
306 int mult=1; | |
307 if(array.charAt(a)=='-'){mult=-1; a++;} | |
308 for(; a<b; a++){ | |
309 int x=(array.charAt(a)-z); | |
310 assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b; | |
311 r=(r*10)+x; | |
312 } | |
313 return r*mult; | |
314 } | |
315 | |
316 public static long parseLong(byte[] array){return parseLong(array, 0, array.length);} | |
317 | |
318 public static long parseLong(byte[] array, int start){return parseLong(array, start, array.length);} | |
319 | |
320 /** | |
321 * @param array Text | |
322 * @param a Index of first digit | |
323 * @param b Index after last digit (e.g., array.length) | |
324 * @return Parsed number | |
325 */ | |
326 public static long parseLong(byte[] array, int a, int b){ | |
327 assert(b>a); | |
328 long r=0; | |
329 final byte z='0'; | |
330 long mult=1; | |
331 if(array[a]=='-'){mult=-1; a++;} | |
332 for(; a<b; a++){ | |
333 int x=(array[a]-z); | |
334 assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b; | |
335 r=(r*10)+x; | |
336 } | |
337 return r*mult; | |
338 } | |
339 | |
340 /** | |
341 * @param array Text | |
342 * @param a Index of first digit | |
343 * @param b Index after last digit (e.g., array.length) | |
344 * @return Parsed number | |
345 */ | |
346 public static long parseLong(String array, int a, int b){ | |
347 assert(b>a); | |
348 long r=0; | |
349 final byte z='0'; | |
350 long mult=1; | |
351 if(array.charAt(a)=='-'){mult=-1; a++;} | |
352 for(; a<b; a++){ | |
353 int x=(array.charAt(a)-z); | |
354 assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b; | |
355 r=(r*10)+x; | |
356 } | |
357 return r*mult; | |
358 } | |
359 | |
360 | |
361 //Note: clen is optional, but allows poorly-formatted input like trailing whitespace | |
362 //Without clen ",,," would become {0,0,0,0} | |
363 public static long[] parseLongArray(String sub) { | |
364 if(sub==null || sub.length()<1){return null;} | |
365 long current=0; | |
366 // int clen=0; | |
367 LongList list=new LongList(min(8, 1+sub.length()/2)); | |
368 for(int i=0, len=sub.length(); i<len; i++){ | |
369 // System.err.println(); | |
370 int c=sub.charAt(i)-'0'; | |
371 if(c<0 || c>9){ | |
372 // System.err.println('A'); | |
373 //assert(clen>0); | |
374 list.add(current); | |
375 current=0; | |
376 // clen=0; | |
377 }else{ | |
378 // System.err.println('B'); | |
379 current=(current*10)+c; | |
380 // clen++; | |
381 } | |
382 // System.err.println("i="+i+", c="+c+", current="+current+", list="+list); | |
383 } | |
384 // if(clen>0){ | |
385 list.add(current); | |
386 // } | |
387 // assert(false) : "\n'"+sub+"'\n"+Arrays.toString(list.toArray()); | |
388 return list.toArray(); | |
389 } | |
390 | |
391 public static int parseZmw(String id){ | |
392 //Example: m54283_190403_183820/4194374/919_2614 | |
393 //Run ID is m54283_190403_183820 | |
394 //zmw ID is 4194374. | |
395 //Read start/stop coordinates are 919_2614 | |
396 int under=id.indexOf('_'); | |
397 int slash=id.indexOf('/'); | |
398 if(under<0 || slash<0){return -1;} | |
399 String[] split=id.split("/"); | |
400 String z=split[1]; | |
401 return Integer.parseInt(z); | |
402 } | |
403 | |
404 public static char parseSymbolToCharacter(String b){ | |
405 b=parseSymbol(b); | |
406 while(b.length()>1 && b.charAt(0)=='\\'){ | |
407 b=b.substring(1); | |
408 } | |
409 return b.charAt(0); | |
410 } | |
411 | |
412 public static String parseSymbol(String b){ | |
413 if(b==null || b.length()<2){return b;} | |
414 | |
415 //Convenience characters | |
416 if(b.equalsIgnoreCase("space")){ | |
417 return " "; | |
418 }else if(b.equalsIgnoreCase("tab")){ | |
419 return "\t"; | |
420 }else if(b.equalsIgnoreCase("whitespace")){ | |
421 return "\\s+"; | |
422 }else if(b.equalsIgnoreCase("pound")){ | |
423 return "#"; | |
424 }else if(b.equalsIgnoreCase("greaterthan")){ | |
425 return ">"; | |
426 }else if(b.equalsIgnoreCase("lessthan")){ | |
427 return "<"; | |
428 }else if(b.equalsIgnoreCase("equals")){ | |
429 return "="; | |
430 }else if(b.equalsIgnoreCase("colon")){ | |
431 return ":"; | |
432 }else if(b.equalsIgnoreCase("semicolon")){ | |
433 return ";"; | |
434 }else if(b.equalsIgnoreCase("bang")){ | |
435 return "!"; | |
436 }else if(b.equalsIgnoreCase("and") || b.equalsIgnoreCase("ampersand")){ | |
437 return "&"; | |
438 }else if(b.equalsIgnoreCase("quote") || b.equalsIgnoreCase("doublequote")){ | |
439 return "\""; | |
440 }else if(b.equalsIgnoreCase("singlequote") || b.equalsIgnoreCase("apostrophe")){ | |
441 return "'"; | |
442 } | |
443 | |
444 //Java meta characters | |
445 if(b.equalsIgnoreCase("backslash")){ | |
446 return "\\\\"; | |
447 }else if(b.equalsIgnoreCase("hat") || b.equalsIgnoreCase("caret")){ | |
448 return "\\^"; | |
449 }else if(b.equalsIgnoreCase("dollar")){ | |
450 return "\\$"; | |
451 }else if(b.equalsIgnoreCase("dot")){ | |
452 return "\\."; | |
453 }else if(b.equalsIgnoreCase("pipe") || b.equalsIgnoreCase("or")){ | |
454 return "\\|"; | |
455 }else if(b.equalsIgnoreCase("questionmark")){ | |
456 return "\\?"; | |
457 }else if(b.equalsIgnoreCase("star") || b.equalsIgnoreCase("asterisk")){ | |
458 return "\\*"; | |
459 }else if(b.equalsIgnoreCase("plus")){ | |
460 return "\\+"; | |
461 }else if(b.equalsIgnoreCase("openparen")){ | |
462 return "\\("; | |
463 }else if(b.equalsIgnoreCase("closeparen")){ | |
464 return "\\)"; | |
465 }else if(b.equalsIgnoreCase("opensquare")){ | |
466 return "\\["; | |
467 }else if(b.equalsIgnoreCase("opencurly")){ | |
468 return "\\{"; | |
469 } | |
470 | |
471 //No matches, return the literal | |
472 return b; | |
473 } | |
474 | |
475 public static byte[] parseRemap(String b){ | |
476 final byte[] remap; | |
477 if(b==null || ("f".equalsIgnoreCase(b) || "false".equalsIgnoreCase(b))){ | |
478 remap=null; | |
479 }else{ | |
480 assert((b.length()&1)==0) : "Length of remap argument must be even. No whitespace is allowed."; | |
481 | |
482 remap=new byte[128]; | |
483 for(int j=0; j<remap.length; j++){remap[j]=(byte)j;} | |
484 for(int j=0; j<b.length(); j+=2){ | |
485 char x=b.charAt(j), y=b.charAt(j+1); | |
486 remap[x]=(byte)y; | |
487 } | |
488 } | |
489 return remap; | |
490 } | |
491 | |
492 public static final int min(int x, int y){return x<y ? x : y;} | |
493 public static final int max(int x, int y){return x>y ? x : y;} | |
494 | |
495 } |