view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/AccessionToTaxid.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package tax;

import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicLongArray;

import fileIO.ByteFile;
import fileIO.ByteFile1;
import fileIO.ByteFile2;
import fileIO.FileFormat;
import fileIO.ReadWrite;
import kmer.HashBuffer;
import kmer.KmerTableSet;
import shared.Parse;
import shared.Parser;
import shared.PreParser;
import shared.Shared;
import shared.Timer;
import shared.Tools;
import stream.ConcurrentGenericReadInputStream;
import stream.FastaReadInputStream;
import structures.StringNum;

/**
 * New version loads with multiple threads per input file.
 * @author Brian Bushnell
 * @date December 16, 2016
 *
 */
public class AccessionToTaxid {
	
	public static void load(String files){
		final boolean oldBf2=ByteFile.FORCE_MODE_BF2;
		final boolean oldBf1=ByteFile.FORCE_MODE_BF1;
		final boolean oldUnpigz=ReadWrite.USE_UNPIGZ;
		final boolean oldGunzip=ReadWrite.USE_UNPIGZ;
		
		main(new String[] {"in="+files, "unpigz="+ReadWrite.USE_UNPIGZ, "gunzip="+ReadWrite.USE_GUNZIP});

		ByteFile.FORCE_MODE_BF2=oldBf2;
		ByteFile.FORCE_MODE_BF1=oldBf1;
		ReadWrite.USE_UNPIGZ=oldUnpigz;
		ReadWrite.USE_UNPIGZ=oldGunzip;
	}
	
	public static void main(String[] args){
		Timer t=new Timer();
		AccessionToTaxid x=new AccessionToTaxid(args);
		x.process(t);
		
		//Close the print stream if it was redirected
		Shared.closeStream(x.outstream);
	}
	
	public AccessionToTaxid(String[] args){
		
		{//Preparse block for help, config files, and outstream
			PreParser pp=new PreParser(args, getClass(), false);
			args=pp.args;
			outstream=pp.outstream;
		}
		
		ReadWrite.USE_UNPIGZ=true;
		
		Parser parser=new Parser();
		for(int i=0; i<args.length; i++){
			String arg=args[i];
			String[] split=arg.split("=");
			String a=split[0].toLowerCase();
			String b=split.length>1 ? split[1] : null;

			if(a.equals("verbose")){
				verbose=Parse.parseBoolean(b);
				ByteFile1.verbose=verbose;
				ByteFile2.verbose=verbose;
				stream.FastaReadInputStream.verbose=verbose;
				ConcurrentGenericReadInputStream.verbose=verbose;
				stream.FastqReadInputStream.verbose=verbose;
				ReadWrite.verbose=verbose;
			}else if(a.equals("stripunderscore")){
//				STRIP_UNDERSCORE=Parse.parseBoolean(b);
				assert(false) : "stripunderscore is disabled.";
			}else if(a.equals("usetables")){
//				USE_TABLES=Parse.parseBoolean(b);
			}else if(a.equals("usetables")){
//				USE_TABLES=Parse.parseBoolean(b);
			}else if(a.equals("skipparse")){
				skipParse=Parse.parseBoolean(b);
			}else if(a.equals("skiphash")){
				skipHash=Parse.parseBoolean(b);
			}else if(a.equals("prealloc")){ 
				if(b==null || Character.isLetter(b.charAt(0))){
					if(Parse.parseBoolean(b)){
						prealloc=0.78f;
					}else{
						prealloc=0;
					}
				}else{
					prealloc=Float.parseFloat(b);
				}
			}else if(a.equals("maxpigzprocesses")){
				maxPigzProcesses=Integer.parseInt(b);
			}else if(a.equals("in")){
				assert(b!=null) : "Bad parameter: "+arg;
				String[] temp=b.split(",");
				for(String s : temp){in.add(s);}
			}else if(parser.parse(arg, a, b)){
				//do nothing
			}else if(b==null){
				if(new File(arg).exists()){
					in.add(arg);
				}
			}else{
				outstream.println("Unknown parameter "+args[i]);
				assert(false) : "Unknown parameter "+args[i];
				//				throw new RuntimeException("Unknown parameter "+args[i]);
			}
		}
		
		{//Process parser fields
			overwrite=parser.overwrite;

//			out=parser.out1;
		}
		
		assert(FastaReadInputStream.settingsOK());
		
		if(in==null || in.size()==0){throw new RuntimeException("Error - at least one input file is required.");}
		
		if(ReadWrite.USE_UNPIGZ && !ByteFile.FORCE_MODE_BF2){
			ByteFile.FORCE_MODE_BF2=false;
			ByteFile.FORCE_MODE_BF1=true;
		}

//		if(out!=null && out.equalsIgnoreCase("null")){out=null;}
		
//		if(!Tools.testOutputFiles(overwrite, false, false, out)){
//			outstream.println((out==null)+", "+out);
//			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
//		}

		{//Reorder by size, ascending
			ArrayList<StringNum> list=new ArrayList<StringNum>();
			for(String s : in){
				list.add(new StringNum(s, new File(s).length()));
			}
			Collections.sort(list);
			in.clear();
			for(StringNum sn : list){
				in.add(sn.s);
			}
		}
		
//		ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, false, false);
		ffin=new FileFormat[in.size()];
		
		/* Note */
		/* Java 1.7 works fine here (54 seconds skipping parsing). */
		/* Java 1.8 has immense speed-downs if pigz is used (80-100s normally, >1000s with unpigz). */
		/* Java 1.8_144 is unpredictable and incredibly slow (80-900s normally, 500-1800 with unpigz) */
		
		int processes=0;
		for(int i=0; i<in.size(); i++){
			String s=in.get(i);
			if(!new File(s).exists()){
				if(s.startsWith("shrunk.") && new File(s.substring(7)).exists()){
					s=s.substring(7);
				}
			}
			FileFormat ff=FileFormat.testInput(s, FileFormat.TXT, null, true, false);
			if(ff.gzip() && processes>maxPigzProcesses){
				processes++;
//				if(processes>maxPigzProcesses){
					ff=FileFormat.testInput(s, FileFormat.TXT, null, false, false);
//				}
			}
			ffin[i]=ff;
		}
	}
	
	@SuppressWarnings("unchecked")
	void process(Timer t){

//		if(USE_MAPS){
			assert(maps==null);
			maps=new HashMap[128];
			for(int i=0; i<maps.length; i++){
				maps[i]=new HashMap<String, Integer>();
			}
//		}

		assert(tables==null);
		if(USE_TABLES){
			tables=new KmerTableSet(new String[] {"ways=31",("prealloc="+(prealloc>0 ? prealloc : "f"))}, 12);
			tables.allocateTables();
		}
		
		if(ffin.length>4){//Addresses a multithreaded read bug in Java
//			FileFormat[] ffa1=Arrays.copyOf(ffin, 2);
//			FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length);
//			spawnThreads(ffa1);
//			spawnThreads(ffa2);

			FileFormat[] ffa1=Arrays.copyOf(ffin, 2);
			FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length);
			spawnThreads(ffa1, 2);
			spawnThreads(ffa2, 200);
		}else{
			spawnThreads(ffin, 200);
		}
		
		//Do anything necessary after processing
		System.gc();
		
		t.stop();
		outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8));
		
		outstream.println();
		outstream.println("Valid Lines:       \t"+linesValid);
		outstream.println("Invalid Lines:     \t"+(linesProcessed-linesValid));

		if(lengthCounts!=null){
			outstream.println();
			outstream.println("Length counts:");

			for(int i=0; i<lengthCounts.length(); i++){
				long count=lengthCounts.get(i);
				if(count>0){outstream.println(i+"\t"+count);}
			}
		}

		if(symbolCounts!=null){
			outstream.println();
			outstream.println("Symbols:");
			
			String comma="";
			for(int i=0; i<symbolCounts.length(); i++){
				long count=symbolCounts.get(i);
				if(count>0){
					outstream.print(comma+i);
					comma=",";
				}
			}
		}

		if(counts_underscore!=null){
			outstream.println();
			outstream.println("Length_underscore counts:");

			for(int i=0; i<counts_underscore.length(); i++){
				long count=counts_underscore.get(i);
				if(count>0){outstream.println(i+"\t"+count);}
			}
		}

		if(counts_underscore2!=null){
			outstream.println();
			outstream.println("Length_underscore2 counts:");

			for(int i=0; i<counts_underscore2.length(); i++){
				long count=counts_underscore2.get(i);
				if(count>0){outstream.println(i+"\t"+count);}
			}
		}
		outstream.println();
		Shared.printMemory();
		
		if(errorState){
			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
		}
		
		LOADED=true;
	}
	
	/** Spawn process threads */
	private void spawnThreads(FileFormat[] ffa, int threadLimit){
		
		//Do anything necessary prior to processing
		Tools.reverseInPlace(ffa, 0, ffa.length);
		
		//Fill a list with ProcessThreads
		ArrayList<ByteFile> albf=new ArrayList<ByteFile>(ffa.length);
		for(FileFormat ff : ffa){
			if(ff!=null){
				System.err.println("Loading "+ff.name());
				ByteFile bf=ByteFile.makeByteFile(ff, 1);
				albf.add(bf);
			}
		}
		final int threads=Tools.min(threadLimit, Tools.max(albf.size(), Shared.threads()));
		ArrayList<HashThread> alht=new ArrayList<HashThread>(threads);
		
		for(int i=0; i<threads; i++){
			ByteFile bf=albf.get(i%albf.size());
			alht.add(new HashThread(bf));
		}
		
		//Start the threads
		for(HashThread pt : alht){
			pt.start();
		}
		
		//Wait for completion of all threads
		boolean success=true;
		for(HashThread pt : alht){
			
			//Wait until this thread has terminated
			while(pt.getState()!=Thread.State.TERMINATED){
				try {
					//Attempt a join operation
					pt.join();
				} catch (InterruptedException e) {
					//Potentially handle this, if it is expected to occur
					e.printStackTrace();
				}
			}
			
			linesProcessed+=pt.linesProcessedT;
			linesValid+=pt.linesValidT;
			bytesProcessed+=pt.bytesProcessedT;

			accumulate(lengthCounts, pt.lengthCountsT);
			accumulate(symbolCounts, pt.symbolCountsT);
			accumulate(counts_underscore, pt.counts_underscoreT);
			accumulate(counts_underscore2, pt.counts_underscore2T);
			
			success&=pt.success;
		}
		
		//Close the byte files
		for(ByteFile bf : albf){
			errorState=bf.close()|errorState;
		}
		
		//Track whether any threads failed
		if(!success){errorState=true;}
	}
	
	private static void accumulate(AtomicLongArray a, long[] b){
		if(a==null || b==null){return;}
		for(int i=0; i<b.length; i++){
			a.getAndAdd(i, b[i]);
		}
	}
	
	/*--------------------------------------------------------------*/
	
	public static int get(String accession){
		if(accession==null){return -1;}
//		if(STRIP_UNDERSCORE){
//			accession=accession.replaceAll("[_-]", "");
//		}

		int len=accession.length();
		for(int i=0; i<len; i++){
			char c=accession.charAt(i);
			if(c=='.' || c==':' || c==','){
				len=i; break;
			}
		}
		
		if(USE_TABLES){
			if(AnalyzeAccession.codeMap!=null){
//				if(dot>AnalyzeAccession.longestPattern){return false;}
				final long number=AnalyzeAccession.digitize(accession);
				if(number>=0){
					int value=tables.getCount(number);
					return value<0 ? -1 : value;
				}
			}else if(len<=12){
				long number=hash(accession);

				int value=tables.getCount(number);
				return value<1 ? -1 : value;
			}
		}
		
		if(len<accession.length()){accession=accession.substring(0, len);}
		if(accession.length()<1){return -1;}
		int way=accession.charAt(0);
		Integer value=maps[way].get(accession);
		return value==null ? -1 : value.intValue();
	}
	
	public static boolean isValidAccession(String s){
		if(s==null || s.length()<4){return false;}
		for(int i=0; i<s.length(); i++){
			char c=s.charAt(i);
			if((c>='0' && c<='9') || (c>='A' && c<='Z') /*|| (c>='a' && c<='z')*/ 
					|| c=='.' || c=='_' || c=='-' || c==':' || c==','){
				//do nothing
			}else{
				return false;
			}
		}
		return true;
	}
	
	static long hash(String accession){
		long number=0;
		for(int i=0, max=accession.length(); i<max; i++){
			long c=accession.charAt(i);
			if(c=='.' || c==':' || c==','){break;}
			if(c>='0' && c<='9'){c=c-'0';}
			else if(c>='A' && c<='Z'){c=c+offset;}
			else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK
			else if(c>='a' && c<='z'){c=c+offsetLower;}
			else{
				assert(false) : accession;
			}
			number=(number*37)+c;
		}
		return number;
	}
	
	static long hash(final byte[] line, final int limit){
		long number=0;
		for(int i=0; i<limit; i++){
			long c=line[i];
			if(c=='.' || c==':' || c==','){break;}
			if(c>='0' && c<='9'){c=c-'0';}
			else if(c>='A' && c<='Z'){c=c+offset;}
			else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK
			else if(c>='a' && c<='z'){c=c+offsetLower;}
			else{
				assert(false) : new String(line);
			}
			number=(number*37)+c;
		}
		return number;
	}
	
	public static int parseLineToTaxid(final byte[] line, final byte delimiter){
		int a=0, b=0;
		
		final int ncbi;
		
		while(b<line.length && line[b]!=delimiter){b++;}
		assert(b>a) : "Missing field 0: "+new String(line);
		b++;
		a=b;
		
		while(b<line.length && line[b]!=delimiter){b++;}
//		assert(b>a) : "Missing field 1: "+new String(line);
		assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
		//accession2=new String(line, a, b-a);
		b++;
		a=b;
		
		while(b<line.length && line[b]!=delimiter){b++;}
		assert(b>a) : "Missing field 2: "+new String(line);
		ncbi=Parse.parseInt(line, a, b);
		b++;
		a=b;
		
		return ncbi;
	}
	
	public static int parseLineToTaxid_2col(final byte[] line, final byte delimiter){
		int a=0, b=0;
		
		final int ncbi;
		
		while(b<line.length && line[b]!=delimiter){b++;}
		assert(b>a) : "Missing field 0: "+new String(line);
		b++;
		a=b;
		
		while(b<line.length && line[b]!=delimiter){b++;}
		assert(b>a) : "Missing field 1: "+new String(line);
		ncbi=Parse.parseInt(line, a, b);
		b++;
		a=b;
		
		return ncbi;
	}
	
	/*--------------------------------------------------------------*/
	
	public static class HashThread extends Thread {
		
		@SuppressWarnings("unchecked")
		public HashThread(ByteFile bf_){
//			if(USE_MAPS){
				mapsT=new HashMap[128];
				for(int i=0; i<mapsT.length; i++){
					mapsT[i]=new HashMap<String, Integer>();
				}
//			}
			if(USE_TABLES){
				table=new HashBuffer(tables.tables(), 1000, 31, true, true);
			}
			bf=bf_;
		}
		
		ArrayList<byte[]> fetch(int limit){
			ArrayList<byte[]> list=new ArrayList<byte[]>(limit);
			synchronized(bf){
				byte[] line=bf.nextLine();
//				while(line!=null && Tools.startsWith(line, "accession")){line=bf.nextLine();}
				if(line==null){return null;}
				for(int i=0; line!=null;){
					list.add(line);
					i++;
					if(i>=limit){break;}
					line=bf.nextLine();
				}
			}
			return list.size()>0 ? list : null;
		}
		
		@Override
		public void run(){
//			System.err.println("Processing "+bf.name());
			final int fetchSize=1000;
			for(ArrayList<byte[]> list=fetch(fetchSize); list!=null; list=fetch(fetchSize)){
				for(byte[] line : list){
					if(line.length>0){
						linesProcessedT++;
						bytesProcessedT+=line.length;

//						final boolean valid=(!Tools.startsWith(line, "accession\t")) & !skipParse;
						final boolean valid=(!Tools.startsWith(line, "accession")) & !skipParse;
						//					assert(valid); //Not true if concatenated

						//					if(Tools.startsWith(line, "NZ_LM994619")){
						//						boolean b=parseLine2(line, (byte)'\t');
						//						assert(false) : b+", "+new String(line);
						//					}

						if(valid){
							boolean b=parseLine2(line, (byte)'\t');
							if(b){linesValidT++;}
						}
					}
				}
			}
			
//			if(USE_MAPS){
				for(int i=0; i<mapsT.length; i++){
					if(mapsT[i].size()>0){
						synchronized(maps[i]){
							maps[i].putAll(mapsT[i]);
						}
					}
					mapsT[i]=null;
				}
//			}
			if(USE_TABLES){
				long temp=table.flush();
			}
			
			success=true;
		}
		
//		public boolean parseLineNumeric(final byte[] line, final byte delimiter){
//			int a=0, b=0;
//
//			long accession=0;
//			final int ncbi, gi;
//
//			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 0: "+new String(line);
//			for(int i=a; i<b; i++){
//				long c=line[i];
//				if(c=='.'){break;}
//				if(c<='9'){c=c-'0';}
//				else{c=c-'A'+10;}
//				accession=(accession*36)+c;
//			}
//			b++;
//			a=b;
//
//			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 1: "+new String(line);
//			//accession2=new String(line, a, b-a);
//			b++;
//			a=b;
//
//			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 2: "+new String(line);
//			ncbi=Parse.parseInt(line, a, b);
//			b++;
//			a=b;
//
////			while(b<line.length && line[b]!=delimiter){b++;}
////			assert(b>a) : "Missing field 3: "+new String(line);
//////			gi=Parse.parseInt(line, a, b);
////			b++;
////			a=b;
//
//			if(ncbi<1){return false;}
//
//			if(tree!=null){
//				if(ncbi>=tree.nodes.length){return false;}
//				TaxNode tn=tree.getNode(ncbi);
//				if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE || tn.level==TaxTree.DOMAIN){return false;}
//				if(tn.pid>=tree.nodes.length){return false;}
//				tn=tree.getNode(tn.pid);
//				if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE){return false;}
//			}
//			assert(accession>=0) : new String(line);
//			table.set(accession, ncbi);
//			return true;
//		}
		
		//This code is no longer used and can be safely deleted.
		@Deprecated
		public boolean parseLine(final byte[] line, final byte delimiter){
			int a=0, b=0;
			
			String accession;
			final int ncbi, gi;
			
			while(b<line.length && line[b]!=delimiter){b++;}
			assert(b>a) : "Missing field 0: "+new String(line);
			accession=new String(line, a, b-a);
			final int dot=accession.indexOf('.');//and :, but this is deprecated.
			if(dot>=0){//Should never happen
//				System.err.println(accession);
//				assert(dot==accession.length()-2) : accession;
				accession=accession.substring(0, dot);
			}
//			if(STRIP_UNDERSCORE){
//				accession=accession.replaceAll("[_-]", "");
//			}
			if(lengthCountsT!=null){lengthCountsT[b-a]++;}
			if(symbolCountsT!=null){
				for(int i=a; i<b; i++){symbolCountsT[line[i]]++;}
			}
			final int underscore=accession.indexOf('_');
			if(underscore>=0){
				if(counts_underscoreT!=null){counts_underscoreT[b-a]++;}
				if(counts_underscore2T!=null && underscore==2){counts_underscore2T[b-a]++;}
			}
			b++;
			a=b;
			
			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 1: "+new String(line);
			assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
			//accession2=new String(line, a, b-a);
			b++;
			a=b;
			
			while(b<line.length && line[b]!=delimiter){b++;}
			assert(b>a) : "Missing field 2: "+new String(line);
			ncbi=Parse.parseInt(line, a, b);
			b++;
			a=b;
			
//			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 3: "+new String(line);
////			gi=Parse.parseInt(line, a, b);
//			b++;
//			a=b;
			
			if(ncbi<1){return false;}
			
			if(tree!=null){
				if(ncbi>=tree.nodes.length){return false;}
				TaxNode tn=tree.getNode(ncbi);
				if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;}
				if(tn.pid>=tree.nodes.length){return false;}
				tn=tree.getNode(tn.pid);
				if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;}
			}
			
			if(accession.length()<13 && USE_TABLES){
				long number=hash(accession);
				assert(number>=0) : new String(line);
				table.set(number, ncbi);
				return true;
			}
			
			int way=accession.charAt(0);
			mapsT[way].put(accession, ncbi);
//			Integer old=mapsT[way].put(accession, ncbi);
//			assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi;
//			System.err.println("'"+accession+"': "+old+" -> "+ncbi);
//			assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi;
			return true;
		}
		
		public boolean parseLine2(final byte[] line, final byte delimiter){
			int a=0, b=0;
			
			final int ncbi, gi;

			while(b<line.length && line[b]!=delimiter 
					&& line[b]!='.' && line[b]!=':' && line[b]!=','){b++;}//parse unique part of accession
			final int dot=b;
			assert(b>a) : "Missing field 0: "+new String(line);
			while(b<line.length && line[b]!=delimiter){b++;}//skip the rest of the accession

			//System.err.println("Line: "+new String(line)+"\n"+Arrays.toString(line));
			//System.err.println("A: dot="+dot+", a="+a+", b="+b);
			
			{//Optional block
				if(lengthCountsT!=null){lengthCountsT[dot]++;}
				if(symbolCountsT!=null){
					for(int i=0; i<dot; i++){symbolCountsT[line[i]]++;}
				}
				if(counts_underscoreT!=null || counts_underscore2T!=null){
					int underscore=-1;
					for(int i=0; i<dot; i++){
						if(line[i]=='_'){
							underscore=i;
							break;
						}
					}
					if(underscore>=0){
						if(counts_underscoreT!=null){counts_underscoreT[dot]++;}
						if(counts_underscore2T!=null && underscore==2){counts_underscore2T[dot]++;}
					}
				}
			}
			b++;
			a=b;
			
			//System.err.println("B: a="+a+", b="+b);
			
			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 1: "+new String(line);
			assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b;
			//accession2=new String(line, a, b-a);
			b++;
			a=b;
			
			//System.err.println("C: a="+a+", b="+b);
			
			while(b<line.length && line[b]!=delimiter){b++;}
			assert(b>a) : "Missing field 2: "+new String(line);
			ncbi=Parse.parseInt(line, a, b);
			//System.err.println("D: a="+a+", b="+b+", ncbi="+ncbi+", '"+(new String(line, a, b-a))+"'");
			b++;
			a=b;
			
//			while(b<line.length && line[b]!=delimiter){b++;}
//			assert(b>a) : "Missing field 3: "+new String(line);
////			gi=Parse.parseInt(line, a, b);
//			b++;
//			a=b;
			
			if(ncbi<1){return false;}
			//System.err.println("E: a="+a+", b="+b);
			if(skipHash){return false;}//123
			//System.err.println("F: a="+a+", b="+b);
			
			if(tree!=null){
				if(ncbi>=tree.nodes.length){return false;}
				//System.err.println("G");
				TaxNode tn=tree.getNode(ncbi);
				if(tn==null || /*tn.levelExtended==TaxTree.NO_RANK_E ||*/ tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;}
				//System.err.println("H: "+tn);
				if(tn.pid>=tree.nodes.length){return false;}
				//System.err.println("I: "+tn);
//				TaxNode parent=tree.getNode(tn.pid);
//				System.err.println("J: "+tn);
//				if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;}
//				System.err.println("K");
			}
			
			if(distributed){
				String accession=new String(line, 0, dot);//slow
				assert(accession.equals(accession.toUpperCase()));//TODO: Disable. (slow)
				if(accession.hashCode()%serverCount!=serverNum){return false;}
			}
			
			if(USE_TABLES){
				if(AnalyzeAccession.codeMap!=null){
//					if(dot>AnalyzeAccession.longestPattern){return false;}
					final long number=AnalyzeAccession.digitize(line);
					if(number>=0){
						table.set(number, ncbi);
						return true;
					}
					assert(number==-1) : number+", "+new String(line);
				}else{
					if(dot<13){
						//				long number=hash(accession);
						final long number=hash(line, dot);
						assert(number>=0) : new String(line);
						table.set(number, ncbi);
						return true;
					}
				}
			}
			
			String accession=new String(line, 0, dot);
			int way=accession.charAt(0);
			mapsT[way].put(accession, ncbi);
//			Integer old=mapsT[way].put(accession, ncbi);
//			assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi;
//			System.err.println("'"+accession+"': "+old+" -> "+ncbi);
//			assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi;
			return true;
		}
		
		private long linesProcessedT=0;
		private long linesValidT=0;
		private long bytesProcessedT=0;
		
		final ByteFile bf;
		HashMap<String, Integer>[] mapsT;
		HashBuffer table;
		boolean success=false;
		
		private long[] lengthCountsT=null;//new AtomicLongArray(20);
		private long[] symbolCountsT=null;//new AtomicLongArray(255);
		private long[] counts_underscoreT=null;//new AtomicLongArray(20);
		private long[] counts_underscore2T=null;//new AtomicLongArray(20);
	}
	
	/*--------------------------------------------------------------*/
	
	
	/*--------------------------------------------------------------*/
	
	private ArrayList<String> in=new ArrayList<String>();
//	private String out=null;
	
	static int maxPigzProcesses=12;
	
	/*--------------------------------------------------------------*/
	
	private long linesProcessed=0;
	private long linesValid=0;
	private long bytesProcessed=0;
	
	private AtomicLongArray lengthCounts=null;//new AtomicLongArray(20);
	private AtomicLongArray symbolCounts=null;//new AtomicLongArray(255);
	private AtomicLongArray counts_underscore=null;//new AtomicLongArray(20);
	private AtomicLongArray counts_underscore2=null;//new AtomicLongArray(20);
	
	/*--------------------------------------------------------------*/

	private final FileFormat ffin[];
//	private final FileFormat ffout;
	
	
	/*--------------------------------------------------------------*/
	
	public static boolean LOADED(){return LOADED;}
	
	private static boolean LOADED=false;
	private static HashMap<String, Integer>[] maps=null;
	private static KmerTableSet tables;
	public static TaxTree tree=null;
//	public static final boolean USE_MAPS=true;
	public static final boolean USE_TABLES=true;
//	public static boolean STRIP_UNDERSCORE=false;
	public static boolean skipParse=false;
	public static boolean skipHash=false;
	public static float prealloc=0;
	private static final long offset=-'A'+11;
	private static final long offsetLower=-'a'+11;
	
	public static int serverNum=0;
	public static int serverCount=1;
	public static boolean distributed=false;
	
	/*--------------------------------------------------------------*/
	
	private PrintStream outstream=System.err;
	public static boolean verbose=false;
	public boolean errorState=false;
	private boolean overwrite=false;
	
}