diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/AccessionToTaxid.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/AccessionToTaxid.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,874 @@
+package tax;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.concurrent.atomic.AtomicLongArray;
+
+import fileIO.ByteFile;
+import fileIO.ByteFile1;
+import fileIO.ByteFile2;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import kmer.HashBuffer;
+import kmer.KmerTableSet;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.ConcurrentGenericReadInputStream;
+import stream.FastaReadInputStream;
+import structures.StringNum;
+
+/**
+ * New version loads with multiple threads per input file.
+ * @author Brian Bushnell
+ * @date December 16, 2016
+ *
+ */
+public class AccessionToTaxid {
+	
+	public static void load(String files){
+		final boolean oldBf2=ByteFile.FORCE_MODE_BF2;
+		final boolean oldBf1=ByteFile.FORCE_MODE_BF1;
+		final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
+		final boolean oldGunzip=ReadWrite.USE_UNPIGZ;
+		
+		main(new String[] {"in="+files, "unpigz="+ReadWrite.USE_UNPIGZ, "gunzip="+ReadWrite.USE_GUNZIP});
+
+		ByteFile.FORCE_MODE_BF2=oldBf2;
+		ByteFile.FORCE_MODE_BF1=oldBf1;
+		ReadWrite.USE_UNPIGZ=oldUnpigz;
+		ReadWrite.USE_UNPIGZ=oldGunzip;
+	}
+	
+	public static void main(String[] args){
+		Timer t=new Timer();
+		AccessionToTaxid x=new AccessionToTaxid(args);
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	public AccessionToTaxid(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		ReadWrite.USE_UNPIGZ=true;
+		
+		Parser parser=new Parser();
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+
+			if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+				ByteFile1.verbose=verbose;
+				ByteFile2.verbose=verbose;
+				stream.FastaReadInputStream.verbose=verbose;
+				ConcurrentGenericReadInputStream.verbose=verbose;
+				stream.FastqReadInputStream.verbose=verbose;
+				ReadWrite.verbose=verbose;
+			}else if(a.equals("stripunderscore")){
+//				STRIP_UNDERSCORE=Parse.parseBoolean(b);
+				assert(false) : "stripunderscore is disabled.";
+			}else if(a.equals("usetables")){
+//				USE_TABLES=Parse.parseBoolean(b);
+			}else if(a.equals("usetables")){
+//				USE_TABLES=Parse.parseBoolean(b);
+			}else if(a.equals("skipparse")){
+				skipParse=Parse.parseBoolean(b);
+			}else if(a.equals("skiphash")){
+				skipHash=Parse.parseBoolean(b);
+			}else if(a.equals("prealloc")){ 
+				if(b==null || Character.isLetter(b.charAt(0))){
+					if(Parse.parseBoolean(b)){
+						prealloc=0.78f;
+					}else{
+						prealloc=0;
+					}
+				}else{
+					prealloc=Float.parseFloat(b);
+				}
+			}else if(a.equals("maxpigzprocesses")){
+				maxPigzProcesses=Integer.parseInt(b);
+			}else if(a.equals("in")){
+				assert(b!=null) : "Bad parameter: "+arg;
+				String[] temp=b.split(",");
+				for(String s : temp){in.add(s);}
+			}else if(parser.parse(arg, a, b)){
+				//do nothing
+			}else if(b==null){
+				if(new File(arg).exists()){
+					in.add(arg);
+				}
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+				//				throw new RuntimeException("Unknown parameter "+args[i]);
+			}
+		}
+		
+		{//Process parser fields
+			overwrite=parser.overwrite;
+
+//			out=parser.out1;
+		}
+		
+		assert(FastaReadInputStream.settingsOK());
+		
+		if(in==null || in.size()==0){throw new RuntimeException("Error - at least one input file is required.");}
+		
+		if(ReadWrite.USE_UNPIGZ && !ByteFile.FORCE_MODE_BF2){
+			ByteFile.FORCE_MODE_BF2=false;
+			ByteFile.FORCE_MODE_BF1=true;
+		}
+
+//		if(out!=null && out.equalsIgnoreCase("null")){out=null;}
+		
+//		if(!Tools.testOutputFiles(overwrite, false, false, out)){
+//			outstream.println((out==null)+", "+out);
+//			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
+//		}
+
+		{//Reorder by size, ascending
+			ArrayList<StringNum> list=new ArrayList<StringNum>();
+			for(String s : in){
+				list.add(new StringNum(s, new File(s).length()));
+			}
+			Collections.sort(list);
+			in.clear();
+			for(StringNum sn : list){
+				in.add(sn.s);
+			}
+		}
+		
+//		ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, false, false);
+		ffin=new FileFormat[in.size()];
+		
+		/* Note */
+		/* Java 1.7 works fine here (54 seconds skipping parsing). */
+		/* Java 1.8 has immense speed-downs if pigz is used (80-100s normally, >1000s with unpigz). */
+		/* Java 1.8_144 is unpredictable and incredibly slow (80-900s normally, 500-1800 with unpigz) */
+		
+		int processes=0;
+		for(int i=0; i<in.size(); i++){
+			String s=in.get(i);
+			if(!new File(s).exists()){
+				if(s.startsWith("shrunk.") && new File(s.substring(7)).exists()){
+					s=s.substring(7);
+				}
+			}
+			FileFormat ff=FileFormat.testInput(s, FileFormat.TXT, null, true, false);
+			if(ff.gzip() && processes>maxPigzProcesses){
+				processes++;
+//				if(processes>maxPigzProcesses){
+					ff=FileFormat.testInput(s, FileFormat.TXT, null, false, false);
+//				}
+			}
+			ffin[i]=ff;
+		}
+	}
+	
+	@SuppressWarnings("unchecked")
+	void process(Timer t){
+
+//		if(USE_MAPS){
+			assert(maps==null);
+			maps=new HashMap[128];
+			for(int i=0; i<maps.length; i++){
+				maps[i]=new HashMap<String, Integer>();
+			}
+//		}
+
+		assert(tables==null);
+		if(USE_TABLES){
+			tables=new KmerTableSet(new String[] {"ways=31",("prealloc="+(prealloc>0 ? prealloc : "f"))}, 12);
+			tables.allocateTables();
+		}
+		
+		if(ffin.length>4){//Addresses a multithreaded read bug in Java
+//			FileFormat[] ffa1=Arrays.copyOf(ffin, 2);
+//			FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length);
+//			spawnThreads(ffa1);
+//			spawnThreads(ffa2);
+
+			FileFormat[] ffa1=Arrays.copyOf(ffin, 2);
+			FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length);
+			spawnThreads(ffa1, 2);
+			spawnThreads(ffa2, 200);
+		}else{
+			spawnThreads(ffin, 200);
+		}
+		
+		//Do anything necessary after processing
+		System.gc();
+		
+		t.stop();
+		outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
+		
+		outstream.println();
+		outstream.println("Valid Lines:       \t"+linesValid);
+		outstream.println("Invalid Lines:     \t"+(linesProcessed-linesValid));
+
+		if(lengthCounts!=null){
+			outstream.println();
+			outstream.println("Length counts:");
+
+			for(int i=0; i<lengthCounts.length(); i++){
+				long count=lengthCounts.get(i);
+				if(count>0){outstream.println(i+"\t"+count);}
+			}
+		}
+
+		if(symbolCounts!=null){
+			outstream.println();
+			outstream.println("Symbols:");
+			
+			String comma="";
+			for(int i=0; i<symbolCounts.length(); i++){
+				long count=symbolCounts.get(i);
+				if(count>0){
+					outstream.print(comma+i);
+					comma=",";
+				}
+			}
+		}
+
+		if(counts_underscore!=null){
+			outstream.println();
+			outstream.println("Length_underscore counts:");
+
+			for(int i=0; i<counts_underscore.length(); i++){
+				long count=counts_underscore.get(i);
+				if(count>0){outstream.println(i+"\t"+count);}
+			}
+		}
+
+		if(counts_underscore2!=null){
+			outstream.println();
+			outstream.println("Length_underscore2 counts:");
+
+			for(int i=0; i<counts_underscore2.length(); i++){
+				long count=counts_underscore2.get(i);
+				if(count>0){outstream.println(i+"\t"+count);}
+			}
+		}
+		outstream.println();
+		Shared.printMemory();
+		
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+		
+		LOADED=true;
+	}
+	
+	/** Spawn process threads */
+	private void spawnThreads(FileFormat[] ffa, int threadLimit){
+		
+		//Do anything necessary prior to processing
+		Tools.reverseInPlace(ffa, 0, ffa.length);
+		
+		//Fill a list with ProcessThreads
+		ArrayList<ByteFile> albf=new ArrayList<ByteFile>(ffa.length);
+		for(FileFormat ff : ffa){
+			if(ff!=null){
+				System.err.println("Loading "+ff.name());
+				ByteFile bf=ByteFile.makeByteFile(ff, 1);
+				albf.add(bf);
+			}
+		}
+		final int threads=Tools.min(threadLimit, Tools.max(albf.size(), Shared.threads()));
+		ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
+		
+		for(int i=0; i<threads; i++){
+			ByteFile bf=albf.get(i%albf.size());
+			alht.add(new HashThread(bf));
+		}
+		
+		//Start the threads
+		for(HashThread pt : alht){
+			pt.start();
+		}
+		
+		//Wait for completion of all threads
+		boolean success=true;
+		for(HashThread pt : alht){
+			
+			//Wait until this thread has terminated
+			while(pt.getState()!=Thread.State.TERMINATED){
+				try {
+					//Attempt a join operation
+					pt.join();
+				} catch (InterruptedException e) {
+					//Potentially handle this, if it is expected to occur
+					e.printStackTrace();
+				}
+			}
+			
+			linesProcessed+=pt.linesProcessedT;
+			linesValid+=pt.linesValidT;
+			bytesProcessed+=pt.bytesProcessedT;
+
+			accumulate(lengthCounts, pt.lengthCountsT);
+			accumulate(symbolCounts, pt.symbolCountsT);
+			accumulate(counts_underscore, pt.counts_underscoreT);
+			accumulate(counts_underscore2, pt.counts_underscore2T);
+			
+			success&=pt.success;
+		}
+		
+		//Close the byte files
+		for(ByteFile bf : albf){
+			errorState=bf.close()|errorState;
+		}
+		
+		//Track whether any threads failed
+		if(!success){errorState=true;}
+	}
+	
+	private static void accumulate(AtomicLongArray a, long[] b){
+		if(a==null || b==null){return;}
+		for(int i=0; i<b.length; i++){
+			a.getAndAdd(i, b[i]);
+		}
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	public static int get(String accession){
+		if(accession==null){return -1;}
+//		if(STRIP_UNDERSCORE){
+//			accession=accession.replaceAll("[_-]", "");
+//		}
+
+		int len=accession.length();
+		for(int i=0; i<len; i++){
+			char c=accession.charAt(i);
+			if(c=='.' || c==':' || c==','){
+				len=i; break;
+			}
+		}
+		
+		if(USE_TABLES){
+			if(AnalyzeAccession.codeMap!=null){
+//				if(dot>AnalyzeAccession.longestPattern){return false;}
+				final long number=AnalyzeAccession.digitize(accession);
+				if(number>=0){
+					int value=tables.getCount(number);
+					return value<0 ? -1 : value;
+				}
+			}else if(len<=12){
+				long number=hash(accession);
+
+				int value=tables.getCount(number);
+				return value<1 ? -1 : value;
+			}
+		}
+		
+		if(len<accession.length()){accession=accession.substring(0, len);}
+		if(accession.length()<1){return -1;}
+		int way=accession.charAt(0);
+		Integer value=maps[way].get(accession);
+		return value==null ? -1 : value.intValue();
+	}
+	
+	public static boolean isValidAccession(String s){
+		if(s==null || s.length()<4){return false;}
+		for(int i=0; i<s.length(); i++){
+			char c=s.charAt(i);
+			if((c>='0' && c<='9') || (c>='A' && c<='Z') /*|| (c>='a' && c<='z')*/ 
+					|| c=='.' || c=='_' || c=='-' || c==':' || c==','){
+				//do nothing
+			}else{
+				return false;
+			}
+		}
+		return true;
+	}
+	
+	static long hash(String accession){
+		long number=0;
+		for(int i=0, max=accession.length(); i<max; i++){
+			long c=accession.charAt(i);
+			if(c=='.' || c==':' || c==','){break;}
+			if(c>='0' && c<='9'){c=c-'0';}
+			else if(c>='A' && c<='Z'){c=c+offset;}
+			else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK
+			else if(c>='a' && c<='z'){c=c+offsetLower;}
+			else{
+				assert(false) : accession;
+			}
+			number=(number*37)+c;
+		}
+		return number;
+	}
+	
+	static long hash(final byte[] line, final int limit){
+		long number=0;
+		for(int i=0; i<limit; i++){
+			long c=line[i];
+			if(c=='.' || c==':' || c==','){break;}
+			if(c>='0' && c<='9'){c=c-'0';}
+			else if(c>='A' && c<='Z'){c=c+offset;}
+			else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK
+			else if(c>='a' && c<='z'){c=c+offsetLower;}
+			else{
+				assert(false) : new String(line);
+			}
+			number=(number*37)+c;
+		}
+		return number;
+	}
+	
+	public static int parseLineToTaxid(final byte[] line, final byte delimiter){
+		int a=0, b=0;
+		
+		final int ncbi;
+		
+		while(b<line.length && line[b]!=delimiter){b++;}
+		assert(b>a) : "Missing field 0: "+new String(line);
+		b++;
+		a=b;
+		
+		while(b<line.length && line[b]!=delimiter){b++;}
+//		assert(b>a) : "Missing field 1: "+new String(line);
+		assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
+		//accession2=new String(line, a, b-a);
+		b++;
+		a=b;
+		
+		while(b<line.length && line[b]!=delimiter){b++;}
+		assert(b>a) : "Missing field 2: "+new String(line);
+		ncbi=Parse.parseInt(line, a, b);
+		b++;
+		a=b;
+		
+		return ncbi;
+	}
+	
+	public static int parseLineToTaxid_2col(final byte[] line, final byte delimiter){
+		int a=0, b=0;
+		
+		final int ncbi;
+		
+		while(b<line.length && line[b]!=delimiter){b++;}
+		assert(b>a) : "Missing field 0: "+new String(line);
+		b++;
+		a=b;
+		
+		while(b<line.length && line[b]!=delimiter){b++;}
+		assert(b>a) : "Missing field 1: "+new String(line);
+		ncbi=Parse.parseInt(line, a, b);
+		b++;
+		a=b;
+		
+		return ncbi;
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	public static class HashThread extends Thread {
+		
+		@SuppressWarnings("unchecked")
+		public HashThread(ByteFile bf_){
+//			if(USE_MAPS){
+				mapsT=new HashMap[128];
+				for(int i=0; i<mapsT.length; i++){
+					mapsT[i]=new HashMap<String, Integer>();
+				}
+//			}
+			if(USE_TABLES){
+				table=new HashBuffer(tables.tables(), 1000, 31, true, true);
+			}
+			bf=bf_;
+		}
+		
+		ArrayList<byte[]> fetch(int limit){
+			ArrayList<byte[]> list=new ArrayList<byte[]>(limit);
+			synchronized(bf){
+				byte[] line=bf.nextLine();
+//				while(line!=null && Tools.startsWith(line, "accession")){line=bf.nextLine();}
+				if(line==null){return null;}
+				for(int i=0; line!=null;){
+					list.add(line);
+					i++;
+					if(i>=limit){break;}
+					line=bf.nextLine();
+				}
+			}
+			return list.size()>0 ? list : null;
+		}
+		
+		@Override
+		public void run(){
+//			System.err.println("Processing "+bf.name());
+			final int fetchSize=1000;
+			for(ArrayList<byte[]> list=fetch(fetchSize); list!=null; list=fetch(fetchSize)){
+				for(byte[] line : list){
+					if(line.length>0){
+						linesProcessedT++;
+						bytesProcessedT+=line.length;
+
+//						final boolean valid=(!Tools.startsWith(line, "accession\t")) & !skipParse;
+						final boolean valid=(!Tools.startsWith(line, "accession")) & !skipParse;
+						//					assert(valid); //Not true if concatenated
+
+						//					if(Tools.startsWith(line, "NZ_LM994619")){
+						//						boolean b=parseLine2(line, (byte)'\t');
+						//						assert(false) : b+", "+new String(line);
+						//					}
+
+						if(valid){
+							boolean b=parseLine2(line, (byte)'\t');
+							if(b){linesValidT++;}
+						}
+					}
+				}
+			}
+			
+//			if(USE_MAPS){
+				for(int i=0; i<mapsT.length; i++){
+					if(mapsT[i].size()>0){
+						synchronized(maps[i]){
+							maps[i].putAll(mapsT[i]);
+						}
+					}
+					mapsT[i]=null;
+				}
+//			}
+			if(USE_TABLES){
+				long temp=table.flush();
+			}
+			
+			success=true;
+		}
+		
+//		public boolean parseLineNumeric(final byte[] line, final byte delimiter){
+//			int a=0, b=0;
+//
+//			long accession=0;
+//			final int ncbi, gi;
+//
+//			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 0: "+new String(line);
+//			for(int i=a; i<b; i++){
+//				long c=line[i];
+//				if(c=='.'){break;}
+//				if(c<='9'){c=c-'0';}
+//				else{c=c-'A'+10;}
+//				accession=(accession*36)+c;
+//			}
+//			b++;
+//			a=b;
+//
+//			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 1: "+new String(line);
+//			//accession2=new String(line, a, b-a);
+//			b++;
+//			a=b;
+//
+//			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 2: "+new String(line);
+//			ncbi=Parse.parseInt(line, a, b);
+//			b++;
+//			a=b;
+//
+////			while(b<line.length && line[b]!=delimiter){b++;}
+////			assert(b>a) : "Missing field 3: "+new String(line);
+//////			gi=Parse.parseInt(line, a, b);
+////			b++;
+////			a=b;
+//
+//			if(ncbi<1){return false;}
+//
+//			if(tree!=null){
+//				if(ncbi>=tree.nodes.length){return false;}
+//				TaxNode tn=tree.getNode(ncbi);
+//				if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE || tn.level==TaxTree.DOMAIN){return false;}
+//				if(tn.pid>=tree.nodes.length){return false;}
+//				tn=tree.getNode(tn.pid);
+//				if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE){return false;}
+//			}
+//			assert(accession>=0) : new String(line);
+//			table.set(accession, ncbi);
+//			return true;
+//		}
+		
+		//This code is no longer used and can be safely deleted.
+		@Deprecated
+		public boolean parseLine(final byte[] line, final byte delimiter){
+			int a=0, b=0;
+			
+			String accession;
+			final int ncbi, gi;
+			
+			while(b<line.length && line[b]!=delimiter){b++;}
+			assert(b>a) : "Missing field 0: "+new String(line);
+			accession=new String(line, a, b-a);
+			final int dot=accession.indexOf('.');//and :, but this is deprecated.
+			if(dot>=0){//Should never happen
+//				System.err.println(accession);
+//				assert(dot==accession.length()-2) : accession;
+				accession=accession.substring(0, dot);
+			}
+//			if(STRIP_UNDERSCORE){
+//				accession=accession.replaceAll("[_-]", "");
+//			}
+			if(lengthCountsT!=null){lengthCountsT[b-a]++;}
+			if(symbolCountsT!=null){
+				for(int i=a; i<b; i++){symbolCountsT[line[i]]++;}
+			}
+			final int underscore=accession.indexOf('_');
+			if(underscore>=0){
+				if(counts_underscoreT!=null){counts_underscoreT[b-a]++;}
+				if(counts_underscore2T!=null && underscore==2){counts_underscore2T[b-a]++;}
+			}
+			b++;
+			a=b;
+			
+			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 1: "+new String(line);
+			assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
+			//accession2=new String(line, a, b-a);
+			b++;
+			a=b;
+			
+			while(b<line.length && line[b]!=delimiter){b++;}
+			assert(b>a) : "Missing field 2: "+new String(line);
+			ncbi=Parse.parseInt(line, a, b);
+			b++;
+			a=b;
+			
+//			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 3: "+new String(line);
+////			gi=Parse.parseInt(line, a, b);
+//			b++;
+//			a=b;
+			
+			if(ncbi<1){return false;}
+			
+			if(tree!=null){
+				if(ncbi>=tree.nodes.length){return false;}
+				TaxNode tn=tree.getNode(ncbi);
+				if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;}
+				if(tn.pid>=tree.nodes.length){return false;}
+				tn=tree.getNode(tn.pid);
+				if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;}
+			}
+			
+			if(accession.length()<13 && USE_TABLES){
+				long number=hash(accession);
+				assert(number>=0) : new String(line);
+				table.set(number, ncbi);
+				return true;
+			}
+			
+			int way=accession.charAt(0);
+			mapsT[way].put(accession, ncbi);
+//			Integer old=mapsT[way].put(accession, ncbi);
+//			assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi;
+//			System.err.println("'"+accession+"': "+old+" -> "+ncbi);
+//			assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi;
+			return true;
+		}
+		
+		public boolean parseLine2(final byte[] line, final byte delimiter){
+			int a=0, b=0;
+			
+			final int ncbi, gi;
+
+			while(b<line.length && line[b]!=delimiter 
+					&& line[b]!='.' && line[b]!=':' && line[b]!=','){b++;}//parse unique part of accession
+			final int dot=b;
+			assert(b>a) : "Missing field 0: "+new String(line);
+			while(b<line.length && line[b]!=delimiter){b++;}//skip the rest of the accession
+
+			//System.err.println("Line: "+new String(line)+"\n"+Arrays.toString(line));
+			//System.err.println("A: dot="+dot+", a="+a+", b="+b);
+			
+			{//Optional block
+				if(lengthCountsT!=null){lengthCountsT[dot]++;}
+				if(symbolCountsT!=null){
+					for(int i=0; i<dot; i++){symbolCountsT[line[i]]++;}
+				}
+				if(counts_underscoreT!=null || counts_underscore2T!=null){
+					int underscore=-1;
+					for(int i=0; i<dot; i++){
+						if(line[i]=='_'){
+							underscore=i;
+							break;
+						}
+					}
+					if(underscore>=0){
+						if(counts_underscoreT!=null){counts_underscoreT[dot]++;}
+						if(counts_underscore2T!=null && underscore==2){counts_underscore2T[dot]++;}
+					}
+				}
+			}
+			b++;
+			a=b;
+			
+			//System.err.println("B: a="+a+", b="+b);
+			
+			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 1: "+new String(line);
+			assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
+			//accession2=new String(line, a, b-a);
+			b++;
+			a=b;
+			
+			//System.err.println("C: a="+a+", b="+b);
+			
+			while(b<line.length && line[b]!=delimiter){b++;}
+			assert(b>a) : "Missing field 2: "+new String(line);
+			ncbi=Parse.parseInt(line, a, b);
+			//System.err.println("D: a="+a+", b="+b+", ncbi="+ncbi+", '"+(new String(line, a, b-a))+"'");
+			b++;
+			a=b;
+			
+//			while(b<line.length && line[b]!=delimiter){b++;}
+//			assert(b>a) : "Missing field 3: "+new String(line);
+////			gi=Parse.parseInt(line, a, b);
+//			b++;
+//			a=b;
+			
+			if(ncbi<1){return false;}
+			//System.err.println("E: a="+a+", b="+b);
+			if(skipHash){return false;}//123
+			//System.err.println("F: a="+a+", b="+b);
+			
+			if(tree!=null){
+				if(ncbi>=tree.nodes.length){return false;}
+				//System.err.println("G");
+				TaxNode tn=tree.getNode(ncbi);
+				if(tn==null || /*tn.levelExtended==TaxTree.NO_RANK_E ||*/ tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;}
+				//System.err.println("H: "+tn);
+				if(tn.pid>=tree.nodes.length){return false;}
+				//System.err.println("I: "+tn);
+//				TaxNode parent=tree.getNode(tn.pid);
+//				System.err.println("J: "+tn);
+//				if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;}
+//				System.err.println("K");
+			}
+			
+			if(distributed){
+				String accession=new String(line, 0, dot);//slow
+				assert(accession.equals(accession.toUpperCase()));//TODO: Disable. (slow)
+				if(accession.hashCode()%serverCount!=serverNum){return false;}
+			}
+			
+			if(USE_TABLES){
+				if(AnalyzeAccession.codeMap!=null){
+//					if(dot>AnalyzeAccession.longestPattern){return false;}
+					final long number=AnalyzeAccession.digitize(line);
+					if(number>=0){
+						table.set(number, ncbi);
+						return true;
+					}
+					assert(number==-1) : number+", "+new String(line);
+				}else{
+					if(dot<13){
+						//				long number=hash(accession);
+						final long number=hash(line, dot);
+						assert(number>=0) : new String(line);
+						table.set(number, ncbi);
+						return true;
+					}
+				}
+			}
+			
+			String accession=new String(line, 0, dot);
+			int way=accession.charAt(0);
+			mapsT[way].put(accession, ncbi);
+//			Integer old=mapsT[way].put(accession, ncbi);
+//			assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi;
+//			System.err.println("'"+accession+"': "+old+" -> "+ncbi);
+//			assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi;
+			return true;
+		}
+		
+		private long linesProcessedT=0;
+		private long linesValidT=0;
+		private long bytesProcessedT=0;
+		
+		final ByteFile bf;
+		HashMap<String, Integer>[] mapsT;
+		HashBuffer table;
+		boolean success=false;
+		
+		private long[] lengthCountsT=null;//new AtomicLongArray(20);
+		private long[] symbolCountsT=null;//new AtomicLongArray(255);
+		private long[] counts_underscoreT=null;//new AtomicLongArray(20);
+		private long[] counts_underscore2T=null;//new AtomicLongArray(20);
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	
+	/*--------------------------------------------------------------*/
+	
+	private ArrayList<String> in=new ArrayList<String>();
+//	private String out=null;
+	
+	static int maxPigzProcesses=12;
+	
+	/*--------------------------------------------------------------*/
+	
+	private long linesProcessed=0;
+	private long linesValid=0;
+	private long bytesProcessed=0;
+	
+	private AtomicLongArray lengthCounts=null;//new AtomicLongArray(20);
+	private AtomicLongArray symbolCounts=null;//new AtomicLongArray(255);
+	private AtomicLongArray counts_underscore=null;//new AtomicLongArray(20);
+	private AtomicLongArray counts_underscore2=null;//new AtomicLongArray(20);
+	
+	/*--------------------------------------------------------------*/
+
+	private final FileFormat ffin[];
+//	private final FileFormat ffout;
+	
+	
+	/*--------------------------------------------------------------*/
+	
+	public static boolean LOADED(){return LOADED;}
+	
+	private static boolean LOADED=false;
+	private static HashMap<String, Integer>[] maps=null;
+	private static KmerTableSet tables;
+	public static TaxTree tree=null;
+//	public static final boolean USE_MAPS=true;
+	public static final boolean USE_TABLES=true;
+//	public static boolean STRIP_UNDERSCORE=false;
+	public static boolean skipParse=false;
+	public static boolean skipHash=false;
+	public static float prealloc=0;
+	private static final long offset=-'A'+11;
+	private static final long offsetLower=-'a'+11;
+	
+	public static int serverNum=0;
+	public static int serverCount=1;
+	public static boolean distributed=false;
+	
+	/*--------------------------------------------------------------*/
+	
+	private PrintStream outstream=System.err;
+	public static boolean verbose=false;
+	public boolean errorState=false;
+	private boolean overwrite=false;
+	
+}