diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/shared/Parse.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/shared/Parse.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,495 @@
+package shared;
+
+import structures.ByteBuilder;
+import structures.LongList;
+
+public class Parse {
+	
+
+	public static int parseIntKMG(String b){
+		long x=parseKMG(b);
+		assert(x<=Integer.MAX_VALUE && x>Integer.MIN_VALUE) : "Value "+x+" is out of range for integers: "+b;
+		return (int)x;
+	}
+	
+	public static long parseKMG(String b){
+		if(b==null){return 0;}
+		assert(b.length()>0);
+		final char c=Tools.toLowerCase(b.charAt(b.length()-1));
+		final boolean dot=b.indexOf('.')>=0;
+		if(!dot && !Tools.isLetter(c)){return Long.parseLong(b);}
+//		if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
+		
+		if(b.equalsIgnoreCase("big") || b.equalsIgnoreCase("inf") || b.equalsIgnoreCase("infinity") || b.equalsIgnoreCase("max") || b.equalsIgnoreCase("huge")){
+			return Long.MAX_VALUE;
+		}
+		
+		long mult=1;
+		if(Tools.isLetter(c)){
+			if(c=='k'){mult=1000;}
+			else if(c=='m'){mult=1000000;}
+			else if(c=='g' || c=='b'){mult=1000000000;}
+			else if(c=='t'){mult=1000000000000L;}
+			else if(c=='p' || c=='q'){mult=1000000000000000L;}
+			else if(c=='e'){mult=1000000000000000000L;}
+//			else if(c=='z'){mult=1000000000000000000000L;}//Out of range
+			else if(c=='c' || c=='h'){mult=100;}
+			else if(c=='d'){mult=10;}
+			else{throw new RuntimeException(b);}
+			b=b.substring(0, b.length()-1);
+		}
+		
+		//Calculate product, check for overflow, and return
+		if(!dot){
+			long m=Long.parseLong(b);
+			long p=m*mult;
+			assert(p>=m) : p+", "+m+", "+b;
+			return p;
+		}else{
+			double m=Double.parseDouble(b);
+			long p=(long)(m*mult);
+			assert(p>=m) : p+", "+m+", "+b;
+			return p;
+		}
+	}
+	
+	public static long parseKMGBinary(String b){
+		if(b==null){return 0;}
+		char c=Tools.toLowerCase(b.charAt(b.length()-1));
+		boolean dot=b.indexOf('.')>=0;
+		if(!Tools.isLetter(c) && !dot){return Long.parseLong(b);}
+		
+		long mult=1;
+		if(Tools.isLetter(c)){
+			if(c=='k'){mult=1024;}
+			else if(c=='m'){mult=1024*1024;}
+			else if(c=='g' || c=='b'){mult=1024*1024*1024;}
+			else if(c=='t'){mult=1024L*1024L*1024L*1024L;}
+			else{throw new RuntimeException(b);}
+			b=b.substring(0, b.length()-1);
+		}
+		
+		if(!dot){return Long.parseLong(b)*mult;}
+		
+		return (long)(Double.parseDouble(b)*mult);
+	}
+	
+	public static boolean isNumber(String s){
+		if(s==null || s.length()==0){return false;}
+		char c=s.charAt(0);
+		return Tools.isDigit(c) || c=='.' || c=='-';
+	}
+	
+	/**
+	 * Parse this argument.  More liberal than Boolean.parseBoolean.
+	 * Null, t, true, or 1 all yield true.
+	 * Everything else, including the String "null", is false.
+	 * @param s Argument to parse
+	 * @return boolean form
+	 */
+	public static boolean parseBoolean(String s){
+		if(s==null || s.length()<1){return true;}
+		if(s.length()==1){
+			char c=Tools.toLowerCase(s.charAt(0));
+			return c=='t' || c=='1';
+		}
+		if(s.equalsIgnoreCase("null") || s.equalsIgnoreCase("none")){return false;}
+		return Boolean.parseBoolean(s);
+	}
+	
+	public static boolean parseYesNo(String s){
+		if(s==null || s.length()<1){return true;}
+		if(s.length()==1){
+			char c=Tools.toLowerCase(s.charAt(0));
+			if(c=='y'){return true;}
+			if(c=='n'){return false;}
+			throw new RuntimeException(s);
+		}
+		
+		if(s.equalsIgnoreCase("yes")){return true;}
+		if(s.equalsIgnoreCase("no")){return false;}
+		if(s.equalsIgnoreCase("unknown")){return false;} //Special case for IMG database
+		
+		throw new RuntimeException(s);
+	}
+	
+	public static int[] parseIntArray(String s, String regex){
+		if(s==null){return null;}
+		String[] split=s.split(regex);
+		int[] array=new int[split.length];
+		for(int i=0; i<split.length; i++){
+			array[i]=Integer.parseInt(split[i]);
+		}
+		return array;
+	}
+	
+	public static byte[] parseByteArray(String s, String regex){
+		if(s==null){return null;}
+		String[] split=s.split(regex);
+		byte[] array=new byte[split.length];
+		for(int i=0; i<split.length; i++){
+			array[i]=Byte.parseByte(split[i]);
+		}
+		return array;
+	}
+	
+	public static int parseIntHexDecOctBin(final String s){
+		if(s==null || s.length()<1){return 0;}
+		int radix=10;
+		if(s.length()>1 && s.charAt(1)=='0'){
+			final char c=s.charAt(1);
+			if(c=='x' || c=='X'){radix=16;}
+			else if(c=='b' || c=='B'){radix=2;}
+			else if(c=='o' || c=='O'){radix=8;}
+		}
+		return Integer.parseInt(s, radix);
+	}
+	
+	/**
+	 * @param array Text
+	 * @param a Index of first digit
+	 * @param b Index after last digit (e.g., array.length)
+	 * @return Parsed number
+	 */
+	public static float parseFloat(byte[] array, int a, int b){
+		return (float)parseDouble(array, a, b);
+	}
+	
+	/**
+	 * @param array Text
+	 * @param a Index of first digit
+	 * @param b Index after last digit (e.g., array.length)
+	 * @return Parsed number
+	 */
+	public static double parseDoubleSlow(byte[] array, int a, int b){
+		String s=new String(array, a, b-a);
+		return Double.parseDouble(s);
+	}
+
+	public static double parseDouble(final byte[] array, final int start){
+		return parseDouble(array, start, array.length);
+	}
+	
+	/**
+	 * @param array Text
+	 * @param a0 Index of first digit
+	 * @param b Index after last digit (e.g., array.length)
+	 * @return Parsed number
+	 */
+	public static double parseDouble(final byte[] array, final int a0, final int b){
+		if(Tools.FORCE_JAVA_PARSE_DOUBLE){
+			return Double.parseDouble(new String(array, a0, b-a0));
+		}
+		int a=a0;
+		assert(b>a);
+		long upper=0;
+		final byte z='0';
+		long mult=1;
+		if(array[a]=='-'){mult=-1; a++;}
+		
+		for(; a<b; a++){
+			final byte c=array[a];
+			if(c=='.'){break;}
+			final int x=(c-z);
+			assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b;
+			upper=(upper*10)+x;
+		}
+		
+		long lower=0;
+		int places=0;
+		for(a++; a<b; a++){
+			final byte c=array[a];
+			final int x=(c-z);
+			assert(x<10 && x>=0) : x+" = "+(char)c+"\narray="+new String(array)+", start="+a+", stop="+b+
+				"\nThis function does not support exponents; if the input has an exponent, add the flag 'forceJavaParseDouble'.";
+			lower=(lower*10)+x;
+			places++;
+		}
+		
+		double d=mult*(upper+lower*ByteBuilder.decimalInvMult[places]);
+//		assert(d==parseDoubleSlow(array, a0, b)) : d+", "+parseDoubleSlow(array, a0, b);
+		return d;
+	}
+
+	public static int parseInt(byte[] array, int start){
+		return parseInt(array, start, array.length);
+	}
+	
+//	/**
+//	 * @param array Text
+//	 * @param a Index of first digit
+//	 * @param b Index after last digit (e.g., array.length)
+//	 * @return Parsed number
+//	 */
+//	public static int parseInt(byte[] array, int a, int b){
+//		assert(b>a);
+//		int r=0;
+//		final byte z='0';
+//		int mult=1;
+//		if(array[a]=='-'){mult=-1; a++;}
+//		for(; a<b; a++){
+//			int x=(array[a]-z);
+//			assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
+//			r=(r*10)+x;
+//		}
+//		return r*mult;
+//	}
+	
+	/** 
+	 * Returns the int representation of a number represented in ASCII text, from position a to b.
+	 * This function is much faster than creating a substring and calling Integer.parseInt()
+	 * Throws Assertions rather than Exceptions for invalid input.
+	 * This function does NOT detect overflows, e.g., values over 2^31-1 (Integer.MAX_VALUE).
+	 * This function has no side-effects.
+	 * @param array byte array containing the text to parse.
+	 * @param a Index of the first digit of the number.
+	 * @param b Index after the last digit (e.g., array.length).
+	 * @return int representation of the parsed number.
+	 * @throws Assertions rather than Exceptions for invalid input.
+	 * 
+	 * @TODO Correctly represent Integer.MIN_VALUE
+	 * @TODO Detect overflow.
+	 */
+	public static int parseInt(byte[] array, int a, int b){
+		assert(b>a) : "The start position of the text to parse must come before the stop position: "+
+			a+","+b+","+new String(array);
+		int r=0; //Initialize the return value to 0.
+
+		//z holds the ASCII code for 0, which is subtracted from other ASCII codes
+		//to yield the int value of a character.  For example, '7'-'0'=7,
+		//because ASCII '7'=55, while ASCII '0'=48, and 55-48=7. 
+		final byte z='0';
+
+		//mult is 1 for positive numbers, or -1 for negative numbers.
+		//It will be multiplied by the unsigned result to yield the final signed result.
+		int mult=1;
+		
+		//If the term starts with a minus sign, set the multiplier to -1 and increment the position.
+		if(array[a]=='-'){mult=-1; a++;}
+		
+		//Iterate through every position, incrementing a, up to b (exclusive).
+		for(; a<b; a++){
+			//x is the numeric value of the character at position a.
+			//In other words, if array[a]='7',
+			//x would be 7, not the ASCII code for '7' (which is 55).
+			int x=(array[a]-z);
+			
+			//Assert that x is in the range of 0-9; otherwise, the character was not a digit.
+			//The ASCII code will be printed here because in some cases the character could be
+			//a control character (like carriage return or vertical tab or bell) which is unprintable.
+			//But if possible the character will be printed to, as well as the position,
+			//and the entire String from which the number is to be parsed.
+			assert(x<10 && x>=0) : "Non-digit character with ASCII code "+(int)array[a]+" was encountered.\n"
+					+"x="+x+"; char="+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
+			
+			//Multiply the old value by 10, then add the new 1's digit.
+			//This is because the text is assumed to be base-10,
+			//so each subsequent character will represent 1/10th the significance of the previous character.
+			r=(r*10)+x;
+		}
+		
+		//Change the unsigned value into a signed result, and return it.
+		return r*mult;
+	}
+	
+	/**
+	 * @param array Text
+	 * @param a Index of first digit
+	 * @param b Index after last digit (e.g., array.length)
+	 * @return Parsed number
+	 */
+	public static int parseInt(String array, int a, int b){
+//		assert(false) : Character.toString(array.charAt(a));
+		assert(b>a);
+		int r=0;
+		final byte z='0';
+		int mult=1;
+		if(array.charAt(a)=='-'){mult=-1; a++;}
+		for(; a<b; a++){
+			int x=(array.charAt(a)-z);
+			assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
+			r=(r*10)+x;
+		}
+		return r*mult;
+	}
+	
+	public static long parseLong(byte[] array){return parseLong(array, 0, array.length);}
+	
+	public static long parseLong(byte[] array, int start){return parseLong(array, start, array.length);}
+	
+	/**
+	 * @param array Text
+	 * @param a Index of first digit
+	 * @param b Index after last digit (e.g., array.length)
+	 * @return Parsed number
+	 */
+	public static long parseLong(byte[] array, int a, int b){
+		assert(b>a);
+		long r=0;
+		final byte z='0';
+		long mult=1;
+		if(array[a]=='-'){mult=-1; a++;}
+		for(; a<b; a++){
+			int x=(array[a]-z);
+			assert(x<10 && x>=0) : x+" = "+(char)array[a]+"\narray="+new String(array)+", start="+a+", stop="+b;
+			r=(r*10)+x;
+		}
+		return r*mult;
+	}
+	
+	/**
+	 * @param array Text
+	 * @param a Index of first digit
+	 * @param b Index after last digit (e.g., array.length)
+	 * @return Parsed number
+	 */
+	public static long parseLong(String array, int a, int b){
+		assert(b>a);
+		long r=0;
+		final byte z='0';
+		long mult=1;
+		if(array.charAt(a)=='-'){mult=-1; a++;}
+		for(; a<b; a++){
+			int x=(array.charAt(a)-z);
+			assert(x<10 && x>=0) : x+" = "+array.charAt(a)+"\narray="+new String(array)+", start="+a+", stop="+b;
+			r=(r*10)+x;
+		}
+		return r*mult;
+	}
+
+
+	//Note: clen is optional, but allows poorly-formatted input like trailing whitespace
+	//Without clen ",,," would become {0,0,0,0} 
+	public static long[] parseLongArray(String sub) {
+		if(sub==null || sub.length()<1){return null;}
+		long current=0;
+//		int clen=0;
+		LongList list=new LongList(min(8, 1+sub.length()/2));
+		for(int i=0, len=sub.length(); i<len; i++){
+//			System.err.println();
+			int c=sub.charAt(i)-'0';
+			if(c<0 || c>9){
+//				System.err.println('A');
+				//assert(clen>0);
+				list.add(current);
+				current=0;
+//				clen=0;
+			}else{
+//				System.err.println('B');
+				current=(current*10)+c;
+//				clen++;
+			}
+//			System.err.println("i="+i+", c="+c+", current="+current+", list="+list);
+		}
+//		if(clen>0){
+			list.add(current);
+//		}
+//		assert(false) : "\n'"+sub+"'\n"+Arrays.toString(list.toArray());
+		return list.toArray();
+	}
+	
+	public static int parseZmw(String id){
+		//Example: m54283_190403_183820/4194374/919_2614
+		//Run ID is m54283_190403_183820
+		//zmw ID is 4194374.
+		//Read start/stop coordinates are 919_2614
+		int under=id.indexOf('_');
+		int slash=id.indexOf('/');
+		if(under<0 || slash<0){return -1;}
+		String[] split=id.split("/");
+		String z=split[1];
+		return Integer.parseInt(z);
+	}
+	
+	public static char parseSymbolToCharacter(String b){
+		b=parseSymbol(b);
+		while(b.length()>1 && b.charAt(0)=='\\'){
+			b=b.substring(1);
+		}
+		return b.charAt(0);
+	}
+	
+	public static String parseSymbol(String b){
+		if(b==null || b.length()<2){return b;}
+		
+		//Convenience characters
+		if(b.equalsIgnoreCase("space")){
+			return " ";
+		}else if(b.equalsIgnoreCase("tab")){
+			return "\t";
+		}else if(b.equalsIgnoreCase("whitespace")){
+			return "\\s+";
+		}else if(b.equalsIgnoreCase("pound")){
+			return "#";
+		}else if(b.equalsIgnoreCase("greaterthan")){
+			return ">";
+		}else if(b.equalsIgnoreCase("lessthan")){
+			return "<";
+		}else if(b.equalsIgnoreCase("equals")){
+			return "=";
+		}else if(b.equalsIgnoreCase("colon")){
+			return ":";
+		}else if(b.equalsIgnoreCase("semicolon")){
+			return ";";
+		}else if(b.equalsIgnoreCase("bang")){
+			return "!";
+		}else if(b.equalsIgnoreCase("and") || b.equalsIgnoreCase("ampersand")){
+			return "&";
+		}else if(b.equalsIgnoreCase("quote") || b.equalsIgnoreCase("doublequote")){
+			return "\"";
+		}else if(b.equalsIgnoreCase("singlequote") || b.equalsIgnoreCase("apostrophe")){
+			return "'";
+		}
+		
+		//Java meta characters
+		if(b.equalsIgnoreCase("backslash")){
+			return "\\\\";
+		}else if(b.equalsIgnoreCase("hat") || b.equalsIgnoreCase("caret")){
+			return "\\^";
+		}else if(b.equalsIgnoreCase("dollar")){
+			return "\\$";
+		}else if(b.equalsIgnoreCase("dot")){
+			return "\\.";
+		}else if(b.equalsIgnoreCase("pipe") || b.equalsIgnoreCase("or")){
+			return "\\|";
+		}else if(b.equalsIgnoreCase("questionmark")){
+			return "\\?";
+		}else if(b.equalsIgnoreCase("star") || b.equalsIgnoreCase("asterisk")){
+			return "\\*";
+		}else if(b.equalsIgnoreCase("plus")){
+			return "\\+";
+		}else if(b.equalsIgnoreCase("openparen")){
+			return "\\(";
+		}else if(b.equalsIgnoreCase("closeparen")){
+			return "\\)";
+		}else if(b.equalsIgnoreCase("opensquare")){
+			return "\\[";
+		}else if(b.equalsIgnoreCase("opencurly")){
+			return "\\{";
+		}
+		
+		//No matches, return the literal
+		return b;
+	}
+	
+	public static byte[] parseRemap(String b){
+		final byte[] remap;
+		if(b==null || ("f".equalsIgnoreCase(b) || "false".equalsIgnoreCase(b))){
+			remap=null;
+		}else{
+			assert((b.length()&1)==0) : "Length of remap argument must be even.  No whitespace is allowed.";
+			
+			remap=new byte[128];
+			for(int j=0; j<remap.length; j++){remap[j]=(byte)j;}
+			for(int j=0; j<b.length(); j+=2){
+				char x=b.charAt(j), y=b.charAt(j+1);
+				remap[x]=(byte)y;
+			}
+		}
+		return remap;
+	}
+	
+	public static final int min(int x, int y){return x<y ? x : y;}
+	public static final int max(int x, int y){return x>y ? x : y;}
+
+}