comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/shared/Parse.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package shared;
2
3 import structures.ByteBuilder;
4 import structures.LongList;
5
6 public class Parse {
7
8
9 public static int parseIntKMG(String b){
10 long x=parseKMG(b);
11 assert(x<=Integer.MAX_VALUE && x>Integer.MIN_VALUE) : "Value "+x+" is out of range for integers: "+b;
12 return (int)x;
13 }
14
15 public static long parseKMG(String b){
16 if(b==null){return 0;}
17 assert(b.length()>0);
18 final char c=Tools.toLowerCase(b.charAt(b.length()-1));
19 final boolean dot=b.indexOf('.')>=0;
20 if(!dot && !Tools.isLetter(c)){return Long.parseLong(b);}
21 // if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
22
23 if(b.equalsIgnoreCase("big") || b.equalsIgnoreCase("inf") || b.equalsIgnoreCase("infinity") || b.equalsIgnoreCase("max") || b.equalsIgnoreCase("huge")){
24 return Long.MAX_VALUE;
25 }
26
27 long mult=1;
28 if(Tools.isLetter(c)){
29 if(c=='k'){mult=1000;}
30 else if(c=='m'){mult=1000000;}
31 else if(c=='g' || c=='b'){mult=1000000000;}
32 else if(c=='t'){mult=1000000000000L;}
33 else if(c=='p' || c=='q'){mult=1000000000000000L;}
34 else if(c=='e'){mult=1000000000000000000L;}
35 // else if(c=='z'){mult=1000000000000000000000L;}//Out of range
36 else if(c=='c' || c=='h'){mult=100;}
37 else if(c=='d'){mult=10;}
38 else{throw new RuntimeException(b);}
39 b=b.substring(0, b.length()-1);
40 }
41
42 //Calculate product, check for overflow, and return
43 if(!dot){
44 long m=Long.parseLong(b);
45 long p=m*mult;
46 assert(p>=m) : p+", "+m+", "+b;
47 return p;
48 }else{
49 double m=Double.parseDouble(b);
50 long p=(long)(m*mult);
51 assert(p>=m) : p+", "+m+", "+b;
52 return p;
53 }
54 }
55
56 public static long parseKMGBinary(String b){
57 if(b==null){return 0;}
58 char c=Tools.toLowerCase(b.charAt(b.length()-1));
59 boolean dot=b.indexOf('.')>=0;
60 if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
61
62 long mult=1;
63 if(Tools.isLetter(c)){
64 if(c=='k'){mult=1024;}
65 else if(c=='m'){mult=1024*1024;}
66 else if(c=='g' || c=='b'){mult=1024*1024*1024;}
67 else if(c=='t'){mult=1024L*1024L*1024L*1024L;}
68 else{throw new RuntimeException(b);}
69 b=b.substring(0, b.length()-1);
70 }
71
72 if(!dot){return Long.parseLong(b)*mult;}
73
74 return (long)(Double.parseDouble(b)*mult);
75 }
76
77 public static boolean isNumber(String s){
78 if(s==null || s.length()==0){return false;}
79 char c=s.charAt(0);
80 return Tools.isDigit(c) || c=='.' || c=='-';
81 }
82
83 /**
84 * Parse this argument. More liberal than Boolean.parseBoolean.
85 * Null, t, true, or 1 all yield true.
86 * Everything else, including the String "null", is false.
87 * @param s Argument to parse
88 * @return boolean form
89 */
90 public static boolean parseBoolean(String s){
91 if(s==null || s.length()<1){return true;}
92 if(s.length()==1){
93 char c=Tools.toLowerCase(s.charAt(0));
94 return c=='t' || c=='1';
95 }
96 if(s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return false;}
97 return Boolean.parseBoolean(s);
98 }
99
100 public static boolean parseYesNo(String s){
101 if(s==null || s.length()<1){return true;}
102 if(s.length()==1){
103 char c=Tools.toLowerCase(s.charAt(0));
104 if(c=='y'){return true;}
105 if(c=='n'){return false;}
106 throw new RuntimeException(s);
107 }
108
109 if(s.equalsIgnoreCase("yes")){return true;}
110 if(s.equalsIgnoreCase("no")){return false;}
111 if(s.equalsIgnoreCase("unknown")){return false;} //Special case for IMG database
112
113 throw new RuntimeException(s);
114 }
115
116 public static int[] parseIntArray(String s, String regex){
117 if(s==null){return null;}
118 String[] split=s.split(regex);
119 int[] array=new int[split.length];
120 for(int i=0; i<split.length; i++){
121 array[i]=Integer.parseInt(split[i]);
122 }
123 return array;
124 }
125
126 public static byte[] parseByteArray(String s, String regex){
127 if(s==null){return null;}
128 String[] split=s.split(regex);
129 byte[] array=new byte[split.length];
130 for(int i=0; i<split.length; i++){
131 array[i]=Byte.parseByte(split[i]);
132 }
133 return array;
134 }
135
136 public static int parseIntHexDecOctBin(final String s){
137 if(s==null || s.length()<1){return 0;}
138 int radix=10;
139 if(s.length()>1 && s.charAt(1)=='0'){
140 final char c=s.charAt(1);
141 if(c=='x' || c=='X'){radix=16;}
142 else if(c=='b' || c=='B'){radix=2;}
143 else if(c=='o' || c=='O'){radix=8;}
144 }
145 return Integer.parseInt(s, radix);
146 }
147
148 /**
149 * @param array Text
150 * @param a Index of first digit
151 * @param b Index after last digit (e.g., array.length)
152 * @return Parsed number
153 */
154 public static float parseFloat(byte[] array, int a, int b){
155 return (float)parseDouble(array, a, b);
156 }
157
158 /**
159 * @param array Text
160 * @param a Index of first digit
161 * @param b Index after last digit (e.g., array.length)
162 * @return Parsed number
163 */
164 public static double parseDoubleSlow(byte[] array, int a, int b){
165 String s=new String(array, a, b-a);
166 return Double.parseDouble(s);
167 }
168
169 public static double parseDouble(final byte[] array, final int start){
170 return parseDouble(array, start, array.length);
171 }
172
173 /**
174 * @param array Text
175 * @param a0 Index of first digit
176 * @param b Index after last digit (e.g., array.length)
177 * @return Parsed number
178 */
179 public static double parseDouble(final byte[] array, final int a0, final int b){
180 if(Tools.FORCE_JAVA_PARSE_DOUBLE){
181 return Double.parseDouble(new String(array, a0, b-a0));
182 }
183 int a=a0;
184 assert(b>a);
185 long upper=0;
186 final byte z='0';
187 long mult=1;
188 if(array[a]=='-'){mult=-1; a++;}
189
190 for(; a<b; a++){
191 final byte c=array[a];
192 if(c=='.'){break;}
193 final int x=(c-z);
194 assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b;
195 upper=(upper*10)+x;
196 }
197
198 long lower=0;
199 int places=0;
200 for(a++; a<b; a++){
201 final byte c=array[a];
202 final int x=(c-z);
203 assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b+
204 "\nThis function does not support exponents; if the input has an exponent, add the flag 'forceJavaParseDouble'.";
205 lower=(lower*10)+x;
206 places++;
207 }
208
209 double d=mult*(upper+lower*ByteBuilder.decimalInvMult[places]);
210 // assert(d==parseDoubleSlow(array, a0, b)) : d+", "+parseDoubleSlow(array, a0, b);
211 return d;
212 }
213
214 public static int parseInt(byte[] array, int start){
215 return parseInt(array, start, array.length);
216 }
217
218 // /**
219 // * @param array Text
220 // * @param a Index of first digit
221 // * @param b Index after last digit (e.g., array.length)
222 // * @return Parsed number
223 // */
224 // public static int parseInt(byte[] array, int a, int b){
225 // assert(b>a);
226 // int r=0;
227 // final byte z='0';
228 // int mult=1;
229 // if(array[a]=='-'){mult=-1; a++;}
230 // for(; a<b; a++){
231 // int x=(array[a]-z);
232 // assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
233 // r=(r*10)+x;
234 // }
235 // return r*mult;
236 // }
237
238 /**
239 * Returns the int representation of a number represented in ASCII text, from position a to b.
240 * This function is much faster than creating a substring and calling Integer.parseInt()
241 * Throws Assertions rather than Exceptions for invalid input.
242 * This function does NOT detect overflows, e.g., values over 2^31-1 (Integer.MAX_VALUE).
243 * This function has no side-effects.
244 * @param array byte array containing the text to parse.
245 * @param a Index of the first digit of the number.
246 * @param b Index after the last digit (e.g., array.length).
247 * @return int representation of the parsed number.
248 * @throws Assertions rather than Exceptions for invalid input.
249 *
250 * @TODO Correctly represent Integer.MIN_VALUE
251 * @TODO Detect overflow.
252 */
253 public static int parseInt(byte[] array, int a, int b){
254 assert(b>a) : "The start position of the text to parse must come before the stop position: "+
255 a+","+b+","+new String(array);
256 int r=0; //Initialize the return value to 0.
257
258 //z holds the ASCII code for 0, which is subtracted from other ASCII codes
259 //to yield the int value of a character. For example, '7'-'0'=7,
260 //because ASCII '7'=55, while ASCII '0'=48, and 55-48=7.
261 final byte z='0';
262
263 //mult is 1 for positive numbers, or -1 for negative numbers.
264 //It will be multiplied by the unsigned result to yield the final signed result.
265 int mult=1;
266
267 //If the term starts with a minus sign, set the multiplier to -1 and increment the position.
268 if(array[a]=='-'){mult=-1; a++;}
269
270 //Iterate through every position, incrementing a, up to b (exclusive).
271 for(; a<b; a++){
272 //x is the numeric value of the character at position a.
273 //In other words, if array[a]='7',
274 //x would be 7, not the ASCII code for '7' (which is 55).
275 int x=(array[a]-z);
276
277 //Assert that x is in the range of 0-9; otherwise, the character was not a digit.
278 //The ASCII code will be printed here because in some cases the character could be
279 //a control character (like carriage return or vertical tab or bell) which is unprintable.
280 //But if possible the character will be printed to, as well as the position,
281 //and the entire String from which the number is to be parsed.
282 assert(x<10 && x>=0) : "Non-digit character with ASCII code "+(int)array[a]+" was encountered.\n"
283 +"x="+x+"; char="+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
284
285 //Multiply the old value by 10, then add the new 1's digit.
286 //This is because the text is assumed to be base-10,
287 //so each subsequent character will represent 1/10th the significance of the previous character.
288 r=(r*10)+x;
289 }
290
291 //Change the unsigned value into a signed result, and return it.
292 return r*mult;
293 }
294
295 /**
296 * @param array Text
297 * @param a Index of first digit
298 * @param b Index after last digit (e.g., array.length)
299 * @return Parsed number
300 */
301 public static int parseInt(String array, int a, int b){
302 // assert(false) : Character.toString(array.charAt(a));
303 assert(b>a);
304 int r=0;
305 final byte z='0';
306 int mult=1;
307 if(array.charAt(a)=='-'){mult=-1; a++;}
308 for(; a<b; a++){
309 int x=(array.charAt(a)-z);
310 assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
311 r=(r*10)+x;
312 }
313 return r*mult;
314 }
315
316 public static long parseLong(byte[] array){return parseLong(array, 0, array.length);}
317
318 public static long parseLong(byte[] array, int start){return parseLong(array, start, array.length);}
319
320 /**
321 * @param array Text
322 * @param a Index of first digit
323 * @param b Index after last digit (e.g., array.length)
324 * @return Parsed number
325 */
326 public static long parseLong(byte[] array, int a, int b){
327 assert(b>a);
328 long r=0;
329 final byte z='0';
330 long mult=1;
331 if(array[a]=='-'){mult=-1; a++;}
332 for(; a<b; a++){
333 int x=(array[a]-z);
334 assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
335 r=(r*10)+x;
336 }
337 return r*mult;
338 }
339
340 /**
341 * @param array Text
342 * @param a Index of first digit
343 * @param b Index after last digit (e.g., array.length)
344 * @return Parsed number
345 */
346 public static long parseLong(String array, int a, int b){
347 assert(b>a);
348 long r=0;
349 final byte z='0';
350 long mult=1;
351 if(array.charAt(a)=='-'){mult=-1; a++;}
352 for(; a<b; a++){
353 int x=(array.charAt(a)-z);
354 assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
355 r=(r*10)+x;
356 }
357 return r*mult;
358 }
359
360
361 //Note: clen is optional, but allows poorly-formatted input like trailing whitespace
362 //Without clen ",,," would become {0,0,0,0}
363 public static long[] parseLongArray(String sub) {
364 if(sub==null || sub.length()<1){return null;}
365 long current=0;
366 // int clen=0;
367 LongList list=new LongList(min(8, 1+sub.length()/2));
368 for(int i=0, len=sub.length(); i<len; i++){
369 // System.err.println();
370 int c=sub.charAt(i)-'0';
371 if(c<0 || c>9){
372 // System.err.println('A');
373 //assert(clen>0);
374 list.add(current);
375 current=0;
376 // clen=0;
377 }else{
378 // System.err.println('B');
379 current=(current*10)+c;
380 // clen++;
381 }
382 // System.err.println("i="+i+", c="+c+", current="+current+", list="+list);
383 }
384 // if(clen>0){
385 list.add(current);
386 // }
387 // assert(false) : "\n'"+sub+"'\n"+Arrays.toString(list.toArray());
388 return list.toArray();
389 }
390
391 public static int parseZmw(String id){
392 //Example: m54283_190403_183820/4194374/919_2614
393 //Run ID is m54283_190403_183820
394 //zmw ID is 4194374.
395 //Read start/stop coordinates are 919_2614
396 int under=id.indexOf('_');
397 int slash=id.indexOf('/');
398 if(under<0 || slash<0){return -1;}
399 String[] split=id.split("/");
400 String z=split[1];
401 return Integer.parseInt(z);
402 }
403
404 public static char parseSymbolToCharacter(String b){
405 b=parseSymbol(b);
406 while(b.length()>1 && b.charAt(0)=='\\'){
407 b=b.substring(1);
408 }
409 return b.charAt(0);
410 }
411
412 public static String parseSymbol(String b){
413 if(b==null || b.length()<2){return b;}
414
415 //Convenience characters
416 if(b.equalsIgnoreCase("space")){
417 return " ";
418 }else if(b.equalsIgnoreCase("tab")){
419 return "\t";
420 }else if(b.equalsIgnoreCase("whitespace")){
421 return "\\s+";
422 }else if(b.equalsIgnoreCase("pound")){
423 return "#";
424 }else if(b.equalsIgnoreCase("greaterthan")){
425 return ">";
426 }else if(b.equalsIgnoreCase("lessthan")){
427 return "<";
428 }else if(b.equalsIgnoreCase("equals")){
429 return "=";
430 }else if(b.equalsIgnoreCase("colon")){
431 return ":";
432 }else if(b.equalsIgnoreCase("semicolon")){
433 return ";";
434 }else if(b.equalsIgnoreCase("bang")){
435 return "!";
436 }else if(b.equalsIgnoreCase("and") || b.equalsIgnoreCase("ampersand")){
437 return "&";
438 }else if(b.equalsIgnoreCase("quote") || b.equalsIgnoreCase("doublequote")){
439 return "\"";
440 }else if(b.equalsIgnoreCase("singlequote") || b.equalsIgnoreCase("apostrophe")){
441 return "'";
442 }
443
444 //Java meta characters
445 if(b.equalsIgnoreCase("backslash")){
446 return "\\\\";
447 }else if(b.equalsIgnoreCase("hat") || b.equalsIgnoreCase("caret")){
448 return "\\^";
449 }else if(b.equalsIgnoreCase("dollar")){
450 return "\\$";
451 }else if(b.equalsIgnoreCase("dot")){
452 return "\\.";
453 }else if(b.equalsIgnoreCase("pipe") || b.equalsIgnoreCase("or")){
454 return "\\|";
455 }else if(b.equalsIgnoreCase("questionmark")){
456 return "\\?";
457 }else if(b.equalsIgnoreCase("star") || b.equalsIgnoreCase("asterisk")){
458 return "\\*";
459 }else if(b.equalsIgnoreCase("plus")){
460 return "\\+";
461 }else if(b.equalsIgnoreCase("openparen")){
462 return "\\(";
463 }else if(b.equalsIgnoreCase("closeparen")){
464 return "\\)";
465 }else if(b.equalsIgnoreCase("opensquare")){
466 return "\\[";
467 }else if(b.equalsIgnoreCase("opencurly")){
468 return "\\{";
469 }
470
471 //No matches, return the literal
472 return b;
473 }
474
475 public static byte[] parseRemap(String b){
476 final byte[] remap;
477 if(b==null || ("f".equalsIgnoreCase(b) || "false".equalsIgnoreCase(b))){
478 remap=null;
479 }else{
480 assert((b.length()&1)==0) : "Length of remap argument must be even. No whitespace is allowed.";
481
482 remap=new byte[128];
483 for(int j=0; j<remap.length; j++){remap[j]=(byte)j;}
484 for(int j=0; j<b.length(); j+=2){
485 char x=b.charAt(j), y=b.charAt(j+1);
486 remap[x]=(byte)y;
487 }
488 }
489 return remap;
490 }
491
492 public static final int min(int x, int y){return x<y ? x : y;}
493 public static final int max(int x, int y){return x>y ? x : y;}
494
495 }