view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/shared/Parse.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package shared;

import structures.ByteBuilder;
import structures.LongList;

public class Parse {
	

	public static int parseIntKMG(String b){
		long x=parseKMG(b);
		assert(x<=Integer.MAX_VALUE && x>Integer.MIN_VALUE) : "Value "+x+" is out of range for integers: "+b;
		return (int)x;
	}
	
	public static long parseKMG(String b){
		if(b==null){return 0;}
		assert(b.length()>0);
		final char c=Tools.toLowerCase(b.charAt(b.length()-1));
		final boolean dot=b.indexOf('.')>=0;
		if(!dot && !Tools.isLetter(c)){return Long.parseLong(b);}
//		if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
		
		if(b.equalsIgnoreCase("big") || b.equalsIgnoreCase("inf") || b.equalsIgnoreCase("infinity") || b.equalsIgnoreCase("max") || b.equalsIgnoreCase("huge")){
			return Long.MAX_VALUE;
		}
		
		long mult=1;
		if(Tools.isLetter(c)){
			if(c=='k'){mult=1000;}
			else if(c=='m'){mult=1000000;}
			else if(c=='g' || c=='b'){mult=1000000000;}
			else if(c=='t'){mult=1000000000000L;}
			else if(c=='p' || c=='q'){mult=1000000000000000L;}
			else if(c=='e'){mult=1000000000000000000L;}
//			else if(c=='z'){mult=1000000000000000000000L;}//Out of range
			else if(c=='c' || c=='h'){mult=100;}
			else if(c=='d'){mult=10;}
			else{throw new RuntimeException(b);}
			b=b.substring(0, b.length()-1);
		}
		
		//Calculate product, check for overflow, and return
		if(!dot){
			long m=Long.parseLong(b);
			long p=m*mult;
			assert(p>=m) : p+", "+m+", "+b;
			return p;
		}else{
			double m=Double.parseDouble(b);
			long p=(long)(m*mult);
			assert(p>=m) : p+", "+m+", "+b;
			return p;
		}
	}
	
	public static long parseKMGBinary(String b){
		if(b==null){return 0;}
		char c=Tools.toLowerCase(b.charAt(b.length()-1));
		boolean dot=b.indexOf('.')>=0;
		if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
		
		long mult=1;
		if(Tools.isLetter(c)){
			if(c=='k'){mult=1024;}
			else if(c=='m'){mult=1024*1024;}
			else if(c=='g' || c=='b'){mult=1024*1024*1024;}
			else if(c=='t'){mult=1024L*1024L*1024L*1024L;}
			else{throw new RuntimeException(b);}
			b=b.substring(0, b.length()-1);
		}
		
		if(!dot){return Long.parseLong(b)*mult;}
		
		return (long)(Double.parseDouble(b)*mult);
	}
	
	public static boolean isNumber(String s){
		if(s==null || s.length()==0){return false;}
		char c=s.charAt(0);
		return Tools.isDigit(c) || c=='.' || c=='-';
	}
	
	/**
	 * Parse this argument.  More liberal than Boolean.parseBoolean.
	 * Null, t, true, or 1 all yield true.
	 * Everything else, including the String "null", is false.
	 * @param s Argument to parse
	 * @return boolean form
	 */
	public static boolean parseBoolean(String s){
		if(s==null || s.length()<1){return true;}
		if(s.length()==1){
			char c=Tools.toLowerCase(s.charAt(0));
			return c=='t' || c=='1';
		}
		if(s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return false;}
		return Boolean.parseBoolean(s);
	}
	
	public static boolean parseYesNo(String s){
		if(s==null || s.length()<1){return true;}
		if(s.length()==1){
			char c=Tools.toLowerCase(s.charAt(0));
			if(c=='y'){return true;}
			if(c=='n'){return false;}
			throw new RuntimeException(s);
		}
		
		if(s.equalsIgnoreCase("yes")){return true;}
		if(s.equalsIgnoreCase("no")){return false;}
		if(s.equalsIgnoreCase("unknown")){return false;} //Special case for IMG database
		
		throw new RuntimeException(s);
	}
	
	public static int[] parseIntArray(String s, String regex){
		if(s==null){return null;}
		String[] split=s.split(regex);
		int[] array=new int[split.length];
		for(int i=0; i<split.length; i++){
			array[i]=Integer.parseInt(split[i]);
		}
		return array;
	}
	
	public static byte[] parseByteArray(String s, String regex){
		if(s==null){return null;}
		String[] split=s.split(regex);
		byte[] array=new byte[split.length];
		for(int i=0; i<split.length; i++){
			array[i]=Byte.parseByte(split[i]);
		}
		return array;
	}
	
	public static int parseIntHexDecOctBin(final String s){
		if(s==null || s.length()<1){return 0;}
		int radix=10;
		if(s.length()>1 && s.charAt(1)=='0'){
			final char c=s.charAt(1);
			if(c=='x' || c=='X'){radix=16;}
			else if(c=='b' || c=='B'){radix=2;}
			else if(c=='o' || c=='O'){radix=8;}
		}
		return Integer.parseInt(s, radix);
	}
	
	/**
	 * @param array Text
	 * @param a Index of first digit
	 * @param b Index after last digit (e.g., array.length)
	 * @return Parsed number
	 */
	public static float parseFloat(byte[] array, int a, int b){
		return (float)parseDouble(array, a, b);
	}
	
	/**
	 * @param array Text
	 * @param a Index of first digit
	 * @param b Index after last digit (e.g., array.length)
	 * @return Parsed number
	 */
	public static double parseDoubleSlow(byte[] array, int a, int b){
		String s=new String(array, a, b-a);
		return Double.parseDouble(s);
	}

	public static double parseDouble(final byte[] array, final int start){
		return parseDouble(array, start, array.length);
	}
	
	/**
	 * @param array Text
	 * @param a0 Index of first digit
	 * @param b Index after last digit (e.g., array.length)
	 * @return Parsed number
	 */
	public static double parseDouble(final byte[] array, final int a0, final int b){
		if(Tools.FORCE_JAVA_PARSE_DOUBLE){
			return Double.parseDouble(new String(array, a0, b-a0));
		}
		int a=a0;
		assert(b>a);
		long upper=0;
		final byte z='0';
		long mult=1;
		if(array[a]=='-'){mult=-1; a++;}
		
		for(; a<b; a++){
			final byte c=array[a];
			if(c=='.'){break;}
			final int x=(c-z);
			assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b;
			upper=(upper*10)+x;
		}
		
		long lower=0;
		int places=0;
		for(a++; a<b; a++){
			final byte c=array[a];
			final int x=(c-z);
			assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b+
				"\nThis function does not support exponents; if the input has an exponent, add the flag 'forceJavaParseDouble'.";
			lower=(lower*10)+x;
			places++;
		}
		
		double d=mult*(upper+lower*ByteBuilder.decimalInvMult[places]);
//		assert(d==parseDoubleSlow(array, a0, b)) : d+", "+parseDoubleSlow(array, a0, b);
		return d;
	}

	public static int parseInt(byte[] array, int start){
		return parseInt(array, start, array.length);
	}
	
//	/**
//	 * @param array Text
//	 * @param a Index of first digit
//	 * @param b Index after last digit (e.g., array.length)
//	 * @return Parsed number
//	 */
//	public static int parseInt(byte[] array, int a, int b){
//		assert(b>a);
//		int r=0;
//		final byte z='0';
//		int mult=1;
//		if(array[a]=='-'){mult=-1; a++;}
//		for(; a<b; a++){
//			int x=(array[a]-z);
//			assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
//			r=(r*10)+x;
//		}
//		return r*mult;
//	}
	
	/** 
	 * Returns the int representation of a number represented in ASCII text, from position a to b.
	 * This function is much faster than creating a substring and calling Integer.parseInt()
	 * Throws Assertions rather than Exceptions for invalid input.
	 * This function does NOT detect overflows, e.g., values over 2^31-1 (Integer.MAX_VALUE).
	 * This function has no side-effects.
	 * @param array byte array containing the text to parse.
	 * @param a Index of the first digit of the number.
	 * @param b Index after the last digit (e.g., array.length).
	 * @return int representation of the parsed number.
	 * @throws Assertions rather than Exceptions for invalid input.
	 * 
	 * @TODO Correctly represent Integer.MIN_VALUE
	 * @TODO Detect overflow.
	 */
	public static int parseInt(byte[] array, int a, int b){
		assert(b>a) : "The start position of the text to parse must come before the stop position: "+
			a+","+b+","+new String(array);
		int r=0; //Initialize the return value to 0.

		//z holds the ASCII code for 0, which is subtracted from other ASCII codes
		//to yield the int value of a character.  For example, '7'-'0'=7,
		//because ASCII '7'=55, while ASCII '0'=48, and 55-48=7. 
		final byte z='0';

		//mult is 1 for positive numbers, or -1 for negative numbers.
		//It will be multiplied by the unsigned result to yield the final signed result.
		int mult=1;
		
		//If the term starts with a minus sign, set the multiplier to -1 and increment the position.
		if(array[a]=='-'){mult=-1; a++;}
		
		//Iterate through every position, incrementing a, up to b (exclusive).
		for(; a<b; a++){
			//x is the numeric value of the character at position a.
			//In other words, if array[a]='7',
			//x would be 7, not the ASCII code for '7' (which is 55).
			int x=(array[a]-z);
			
			//Assert that x is in the range of 0-9; otherwise, the character was not a digit.
			//The ASCII code will be printed here because in some cases the character could be
			//a control character (like carriage return or vertical tab or bell) which is unprintable.
			//But if possible the character will be printed to, as well as the position,
			//and the entire String from which the number is to be parsed.
			assert(x<10 && x>=0) : "Non-digit character with ASCII code "+(int)array[a]+" was encountered.\n"
					+"x="+x+"; char="+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
			
			//Multiply the old value by 10, then add the new 1's digit.
			//This is because the text is assumed to be base-10,
			//so each subsequent character will represent 1/10th the significance of the previous character.
			r=(r*10)+x;
		}
		
		//Change the unsigned value into a signed result, and return it.
		return r*mult;
	}
	
	/**
	 * @param array Text
	 * @param a Index of first digit
	 * @param b Index after last digit (e.g., array.length)
	 * @return Parsed number
	 */
	public static int parseInt(String array, int a, int b){
//		assert(false) : Character.toString(array.charAt(a));
		assert(b>a);
		int r=0;
		final byte z='0';
		int mult=1;
		if(array.charAt(a)=='-'){mult=-1; a++;}
		for(; a<b; a++){
			int x=(array.charAt(a)-z);
			assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
			r=(r*10)+x;
		}
		return r*mult;
	}
	
	public static long parseLong(byte[] array){return parseLong(array, 0, array.length);}
	
	public static long parseLong(byte[] array, int start){return parseLong(array, start, array.length);}
	
	/**
	 * @param array Text
	 * @param a Index of first digit
	 * @param b Index after last digit (e.g., array.length)
	 * @return Parsed number
	 */
	public static long parseLong(byte[] array, int a, int b){
		assert(b>a);
		long r=0;
		final byte z='0';
		long mult=1;
		if(array[a]=='-'){mult=-1; a++;}
		for(; a<b; a++){
			int x=(array[a]-z);
			assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
			r=(r*10)+x;
		}
		return r*mult;
	}
	
	/**
	 * @param array Text
	 * @param a Index of first digit
	 * @param b Index after last digit (e.g., array.length)
	 * @return Parsed number
	 */
	public static long parseLong(String array, int a, int b){
		assert(b>a);
		long r=0;
		final byte z='0';
		long mult=1;
		if(array.charAt(a)=='-'){mult=-1; a++;}
		for(; a<b; a++){
			int x=(array.charAt(a)-z);
			assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
			r=(r*10)+x;
		}
		return r*mult;
	}


	//Note: clen is optional, but allows poorly-formatted input like trailing whitespace
	//Without clen ",,," would become {0,0,0,0} 
	public static long[] parseLongArray(String sub) {
		if(sub==null || sub.length()<1){return null;}
		long current=0;
//		int clen=0;
		LongList list=new LongList(min(8, 1+sub.length()/2));
		for(int i=0, len=sub.length(); i<len; i++){
//			System.err.println();
			int c=sub.charAt(i)-'0';
			if(c<0 || c>9){
//				System.err.println('A');
				//assert(clen>0);
				list.add(current);
				current=0;
//				clen=0;
			}else{
//				System.err.println('B');
				current=(current*10)+c;
//				clen++;
			}
//			System.err.println("i="+i+", c="+c+", current="+current+", list="+list);
		}
//		if(clen>0){
			list.add(current);
//		}
//		assert(false) : "\n'"+sub+"'\n"+Arrays.toString(list.toArray());
		return list.toArray();
	}
	
	public static int parseZmw(String id){
		//Example: m54283_190403_183820/4194374/919_2614
		//Run ID is m54283_190403_183820
		//zmw ID is 4194374.
		//Read start/stop coordinates are 919_2614
		int under=id.indexOf('_');
		int slash=id.indexOf('/');
		if(under<0 || slash<0){return -1;}
		String[] split=id.split("/");
		String z=split[1];
		return Integer.parseInt(z);
	}
	
	public static char parseSymbolToCharacter(String b){
		b=parseSymbol(b);
		while(b.length()>1 && b.charAt(0)=='\\'){
			b=b.substring(1);
		}
		return b.charAt(0);
	}
	
	public static String parseSymbol(String b){
		if(b==null || b.length()<2){return b;}
		
		//Convenience characters
		if(b.equalsIgnoreCase("space")){
			return " ";
		}else if(b.equalsIgnoreCase("tab")){
			return "\t";
		}else if(b.equalsIgnoreCase("whitespace")){
			return "\\s+";
		}else if(b.equalsIgnoreCase("pound")){
			return "#";
		}else if(b.equalsIgnoreCase("greaterthan")){
			return ">";
		}else if(b.equalsIgnoreCase("lessthan")){
			return "<";
		}else if(b.equalsIgnoreCase("equals")){
			return "=";
		}else if(b.equalsIgnoreCase("colon")){
			return ":";
		}else if(b.equalsIgnoreCase("semicolon")){
			return ";";
		}else if(b.equalsIgnoreCase("bang")){
			return "!";
		}else if(b.equalsIgnoreCase("and") || b.equalsIgnoreCase("ampersand")){
			return "&";
		}else if(b.equalsIgnoreCase("quote") || b.equalsIgnoreCase("doublequote")){
			return "\"";
		}else if(b.equalsIgnoreCase("singlequote") || b.equalsIgnoreCase("apostrophe")){
			return "'";
		}
		
		//Java meta characters
		if(b.equalsIgnoreCase("backslash")){
			return "\\\\";
		}else if(b.equalsIgnoreCase("hat") || b.equalsIgnoreCase("caret")){
			return "\\^";
		}else if(b.equalsIgnoreCase("dollar")){
			return "\\$";
		}else if(b.equalsIgnoreCase("dot")){
			return "\\.";
		}else if(b.equalsIgnoreCase("pipe") || b.equalsIgnoreCase("or")){
			return "\\|";
		}else if(b.equalsIgnoreCase("questionmark")){
			return "\\?";
		}else if(b.equalsIgnoreCase("star") || b.equalsIgnoreCase("asterisk")){
			return "\\*";
		}else if(b.equalsIgnoreCase("plus")){
			return "\\+";
		}else if(b.equalsIgnoreCase("openparen")){
			return "\\(";
		}else if(b.equalsIgnoreCase("closeparen")){
			return "\\)";
		}else if(b.equalsIgnoreCase("opensquare")){
			return "\\[";
		}else if(b.equalsIgnoreCase("opencurly")){
			return "\\{";
		}
		
		//No matches, return the literal
		return b;
	}
	
	public static byte[] parseRemap(String b){
		final byte[] remap;
		if(b==null || ("f".equalsIgnoreCase(b) || "false".equalsIgnoreCase(b))){
			remap=null;
		}else{
			assert((b.length()&1)==0) : "Length of remap argument must be even.  No whitespace is allowed.";
			
			remap=new byte[128];
			for(int j=0; j<remap.length; j++){remap[j]=(byte)j;}
			for(int j=0; j<b.length(); j+=2){
				char x=b.charAt(j), y=b.charAt(j+1);
				remap[x]=(byte)y;
			}
		}
		return remap;
	}
	
	public static final int min(int x, int y){return x<y ? x : y;}
	public static final int max(int x, int y){return x>y ? x : y;}

}