diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameGiToTaxid.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameGiToTaxid.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,893 @@
+package tax;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+
+import fileIO.ByteFile;
+import fileIO.ByteFile1;
+import fileIO.ByteFile2;
+import fileIO.ByteStreamWriter;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import kmer.HashArray1D;
+import shared.KillSwitch;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.ReadStats;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.ConcurrentGenericReadInputStream;
+import stream.FASTQ;
+import stream.FastaReadInputStream;
+import structures.ByteBuilder;
+import structures.IntList;
+
+/**
+ * @author Brian Bushnell
+ * @date Mar 10, 2015
+ *
+ */
+public class RenameGiToTaxid {
+	
+	public static void main(String[] args){
+		Timer t=new Timer();
+		RenameGiToTaxid x=new RenameGiToTaxid(args);
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	public RenameGiToTaxid(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		Shared.capBuffers(4);
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.USE_BGZIP=ReadWrite.USE_UNBGZIP=ReadWrite.PREFER_BGZIP=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
+		
+		Parser parser=new Parser();
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+
+			if(a.equals("prefix")){
+				prefix=Parse.parseBoolean(b);
+			
+			}else if(a.equals("server") || a.equals("useserver")){
+				if(b!=null && b.startsWith("http")){
+					useServer=true;
+					String path=b;
+					if(!path.endsWith("/")){path+="/";}
+					Shared.setTaxServer(path);
+				}else{
+					useServer=Parse.parseBoolean(b);
+				}
+			}else if(a.equals("title")){
+				title=(b==null ? ">" : (">"+b+"|")).getBytes();
+			}else if(a.equals("table") || a.equals("gi") || a.equals("gitable")){
+				giTableFile=b;
+			}else if(a.equals("accession")){
+				accessionFile=b;
+			}else if(a.equals("pattern")){
+				patternFile=b;
+			}else if(a.equals("tree") || a.equals("taxtree")){
+				taxTreeFile=b;
+			}else if(a.equals("invalid")){
+				outInvalid=b;
+			}else if(a.equals("deleteinvalid")){
+				deleteInvalid=Parse.parseBoolean(b);
+			}else if(a.equals("badheaders")){
+				badHeaders=b;
+			}else if(a.equals("maxbadheaders") || a.equals("maxinvalidheaders")){
+				maxInvalidHeaders=Parse.parseKMG(b);
+			}else if(a.equals("keepall")){
+				keepAll=Parse.parseBoolean(b);
+			}else if(a.equals("shrinknames")){
+				shrinkNames=Parse.parseBoolean(b);
+			}else if(a.equals("warn")){
+				warnBadHeaders=Parse.parseBoolean(b);
+			}
+			
+			else if(a.equals("maxpigzprocesses")){
+				AccessionToTaxid.maxPigzProcesses=Integer.parseInt(b);
+			}else if(a.equals("skipparse")){
+				AccessionToTaxid.skipParse=Parse.parseBoolean(b);
+			}else if(a.equals("skiphash")){
+				AccessionToTaxid.skipHash=Parse.parseBoolean(b);
+			}
+			
+			else if(a.equals("mode")){
+				if(b!=null && Character.isDigit(b.charAt(0))){
+					mode=Integer.parseInt(b);
+				}else if("accession".equalsIgnoreCase(b)){
+					mode=ACCESSION_MODE;
+				}else if("unite".equalsIgnoreCase(b)){
+					mode=UNITE_MODE;
+					TaxTree.UNITE_MODE=true;
+				}else if("gi".equalsIgnoreCase(b)){
+					mode=GI_MODE;
+				}else if("header".equalsIgnoreCase(b)){
+					mode=HEADER_MODE;
+				}else{
+					assert(false) : "Bad mode: "+b;
+				}
+			}
+			
+			else if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+				ByteFile1.verbose=verbose;
+				ByteFile2.verbose=verbose;
+				stream.FastaReadInputStream.verbose=verbose;
+				ConcurrentGenericReadInputStream.verbose=verbose;
+				stream.FastqReadInputStream.verbose=verbose;
+				ReadWrite.verbose=verbose;
+			}else if(a.equals("in") || a.equals("in1")){
+				assert(b!=null) : "Bad parameter: "+arg;
+				if(new File(b).exists()){
+					in1.add(b);
+				}else{
+					for(String bb : b.split(",")){
+						in1.add(bb);
+					}
+				}
+			}else if(new File(arg).exists()){ //For asterisk expansion
+				in1.add(arg);
+			}else if(parser.parse(arg, a, b)){
+				//do nothing
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+				//				throw new RuntimeException("Unknown parameter "+args[i]);
+			}
+		}
+		
+		if(useServer){
+			giTableFile=null;
+			accessionFile=null;
+			patternFile=null;
+			if(mode!=UNITE_MODE){taxTreeFile=null;}
+		}//else if taxpath!=null... set them
+		
+		{//Process parser fields
+			Parser.processQuality();
+			
+			maxReads=parser.maxReads;
+			
+			overwrite=ReadStats.overwrite=parser.overwrite;
+			append=ReadStats.append=parser.append;
+
+			out1=parser.out1;
+		}
+		
+		if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();}
+		if("auto".equalsIgnoreCase(giTableFile)){giTableFile=TaxTree.defaultTableFile();}
+		if("auto".equalsIgnoreCase(accessionFile)){accessionFile=TaxTree.defaultAccessionFile();}
+		if("auto".equalsIgnoreCase(patternFile)){patternFile=TaxTree.defaultPatternFile();}
+		
+		assert(FastaReadInputStream.settingsOK());
+		
+		if(in1==null || in1.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");}
+		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
+			ByteFile.FORCE_MODE_BF2=false;
+			ByteFile.FORCE_MODE_BF1=true;
+		}
+
+		if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
+		assert(out1!=null) : "This program requires an output file.";
+		
+		if(!Tools.testOutputFiles(overwrite, append, false, out1)){
+			outstream.println((out1==null)+", "+out1);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
+		}
+		if(!Tools.testInputFiles(false, true, in1.toArray(new String[0]))){
+			throw new RuntimeException("\nCan't read some input files.\n");  
+		}
+
+		ffout1=FileFormat.testOutput(out1, FileFormat.FA, null, true, overwrite, append, false);
+		ffoutInvalid=FileFormat.testOutput(outInvalid, FileFormat.FA, null, true, overwrite, append, false);
+		ffin1=new ArrayList<FileFormat>(in1.size());
+		for(String s : in1){
+			FileFormat ff=FileFormat.testInput(s, FileFormat.FA, null, true, true);
+			ffin1.add(ff);
+		}
+		
+		if(ffoutInvalid!=null){keepAll=false;}
+		
+		assert(giTableFile!=null || accessionFile!=null || TaxTree.SILVA_MODE || useServer) : "No gi or accession information loaded.";
+		
+		if(taxTreeFile!=null){
+			tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false);
+			assert(tree.nameMap!=null);
+		}else{
+			tree=null;
+			if(!useServer){throw new RuntimeException("No tree specified.");}
+		}
+		
+		if(giTableFile!=null){
+			GiToTaxid.initialize(giTableFile);
+		}
+		
+		if(patternFile!=null){
+			Timer t=new Timer();
+			AnalyzeAccession.loadCodeMap(patternFile);
+			outstream.println("Loading pattern table.");
+			t.stopAndPrint();
+		}
+		
+		if(accessionFile!=null){
+			AccessionToTaxid.tree=tree;
+			outstream.println("Loading accession table.");
+			AccessionToTaxid.load(accessionFile);
+//			System.gc();
+		}
+	}
+	
+	void process(Timer t){
+		
+		ByteStreamWriter bsw=(ffout1==null ? null : new ByteStreamWriter(ffout1)); //Actually, this is required.
+		if(bsw!=null){bsw.start();}
+		
+		ByteStreamWriter bswInvalid=null;
+		if(ffoutInvalid!=null){
+			bswInvalid=new ByteStreamWriter(ffoutInvalid);
+			bswInvalid.start();
+		}
+		
+		ByteStreamWriter bswBadHeaders=null;
+		if(badHeaders!=null) {
+			bswBadHeaders=new ByteStreamWriter(badHeaders, overwrite, append, false);
+			bswBadHeaders.start();
+		}
+		
+		final HashArray1D counts=(countTable && !prefix) ? new HashArray1D(256000, -1L, true) : null;
+		
+		gffIn=false;
+		for(FileFormat ffin : ffin1){
+			gffIn=gffIn||ffin.gff();
+			ByteFile bf=ByteFile.makeByteFile(ffin);
+			if(useServer){
+				processInner_server(bf, bsw, bswInvalid, bswBadHeaders, counts, ffin.format());
+			}else{
+//				IntList list=(useServer ? getIds(bf) : null);
+				processInner(bf, bsw, bswInvalid, bswBadHeaders, counts, null);
+			}
+		}
+		
+		if(bsw!=null){
+			errorState|=bsw.poisonAndWait();
+			if(deleteInvalid && invalidReads>0 && !ffout1.stdio()){
+				try {
+					System.err.println("Deleting "+out1);
+					new File(out1).delete();
+				} catch (Exception e) {
+					System.err.println("An error occured while attempting to delete "+out1);
+					e.printStackTrace();
+				}
+			}
+		}
+		if(bswInvalid!=null){errorState|=bswInvalid.poisonAndWait();}
+		if(bswBadHeaders!=null){errorState|=bswBadHeaders.poisonAndWait();}
+		
+		t.stop();
+		if(!gffIn) {
+			outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
+
+			outstream.println();
+			outstream.println("Valid Sequences:   \t"+validReads);
+			outstream.println("Valid Bases:       \t"+validBases);
+			outstream.println("Invalid Sequences: \t"+invalidReads);
+			outstream.println("Invalid Bases:     \t"+invalidBases);
+		}else{
+			outstream.println(Tools.timeLinesBytesProcessed(t, linesIn, basesProcessed, 8));
+
+			outstream.println();
+			outstream.println("Valid Lines:       \t"+validLines);
+			outstream.println("Valid Bytes:       \t"+validBases);
+			outstream.println("Invalid Lines:     \t"+invalidLines);
+			outstream.println("Invalid Bytes:     \t"+invalidBases);
+		}
+		if(counts!=null){
+			outstream.println("Unique Taxa:       \t"+taxaCounted);
+		}
+		
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+	}
+	
+	//Unused; not efficient
+//	public IntList getIds(ByteFile bf){
+//		IntList ids=new IntList();
+//		
+//		int readsProcessedInner=0;
+//		
+//		byte[] line=bf.nextLine();
+//		ByteBuilder bb=new ByteBuilder();
+//		while(line!=null){
+//			if(line.length>0 && line[0]=='>'){
+//				readsProcessedInner++;
+//				if(maxReads>0 && readsProcessedInner>maxReads){break;}
+//				
+//				for(int i=1; i<line.length; i++){
+//					byte b=line[i];
+//					if(b==' ' || b=='.'){break;}
+//					else{bb.append(b);}
+//				}
+//				bb.append(',');
+//				if(bb.length()>100000){
+//					bb.setLength(bb.length()-1);
+//					int[] ret;
+//					if(mode==ACCESSION_MODE){
+//						ret=TaxClient.accessionToTaxidArray(bb.toString());
+//					}else if(mode==GI_MODE){
+//						ret=TaxClient.giToTaxidArray(bb.toString());
+//					}else{
+//						ret=TaxClient.headerToTaxidArray(bb.toString());
+//					}
+//					assert(ret!=null) : bb.toString();
+//					for(int i : ret){ids.add(i);}
+//					bb.clear();
+//				}
+//			}
+//			line=bf.nextLine();
+//		}
+//		if(bb.length()>0){
+//			bb.setLength(bb.length()-1);
+//			int[] ret;
+//			if(mode==ACCESSION_MODE){
+//				ret=TaxClient.accessionToTaxidArray(bb.toString());
+//			}else if(mode==GI_MODE){
+//				ret=TaxClient.giToTaxidArray(bb.toString());
+//			}else{
+//				ret=TaxClient.headerToTaxidArray(bb.toString());
+//			}
+//			assert(ret!=null) : bb.toString();
+//			for(int i : ret){ids.add(i);}
+//			bb.clear();
+//		}
+//		
+//		bf.reset();
+//		return ids;
+//	}
+	
+	private void processInner(ByteFile bf, ByteStreamWriter bsw, ByteStreamWriter bswInvalid, ByteStreamWriter bswBadHeaders, HashArray1D counts, IntList ids){
+
+		int readsProcessedInner=0;
+		
+		byte[] line=bf.nextLine();
+		boolean valid=false;
+		while(line!=null){
+			if(line.length>0 && line[0]=='>'){
+				readsProcessedInner++;
+				readsProcessed++;
+				if(maxReads>0 && readsProcessed>maxReads){break;}
+				int initial=1, terminal=line.length;
+				final int number;
+				if(ids==null){
+					final TaxNode tn;
+
+					{
+						{
+							//					Handles renumbering when the format is correct but the number is wrong.
+							if(Tools.startsWith(line, ">tid|")){
+								initial=6;
+								while(initial<=line.length && line[initial-1]!='|'){initial++;}
+							}else if(Tools.startsWith(line, ">ncbi|")){
+								initial=7;
+								while(initial<=line.length && line[initial-1]!='|'){initial++;}
+							}
+						}
+						
+						if(shrinkNames){//This is for nr/nt
+							for(int i=initial; i<terminal; i++){
+								if(line[i]==1){//SOH
+									terminal=i;
+								}
+							}
+						}
+
+						String s=new String(line, initial, terminal-initial);
+
+						tn=tree.parseNodeFromHeader(s, true);
+					}
+					number=(tn==null ? -1 : tn.id);
+				}else{
+					number=ids.get((int)(readsProcessedInner-1));
+					
+					if(shrinkNames){//This is for nr/nt
+						for(int i=initial; i<terminal; i++){
+							if(line[i]==1){//SOH
+								terminal=i;
+							}
+						}
+					}
+				}
+				
+				valid=(number>=0);
+				if(valid){
+					validReads++;
+					bsw.print(title);
+					bsw.print(number);
+					if(prefix){
+						bsw.print('|');
+						for(int i=initial; i<terminal; i++){
+							bsw.print(line[i]);
+						}
+					}else if(counts!=null){
+						bsw.print('|');
+						int count=counts.increment(number, 1);
+						bsw.print(count);
+						if(count==1){taxaCounted++;}
+					}
+					bsw.println();
+				}else{
+					invalidReads++;
+					if(deleteInvalid){
+						System.err.println("Invalid sequence detected; aborting.\n");
+						break;
+					}
+					if(bswBadHeaders!=null){bswBadHeaders.println(line);}
+					if(maxInvalidHeaders>=0 && invalidReads>maxInvalidHeaders){
+						KillSwitch.kill("Maximum bad headers exceeded: "+maxInvalidHeaders+"\n"+new String(line));
+					}
+					if(keepAll){
+						if(shrinkNames){
+							for(int i=0; i<terminal; i++){
+								bsw.print(line[i]);
+							}
+							bsw.println();
+						}else{
+							bsw.println(line);
+						}
+					}else if(bswInvalid!=null){
+						if(shrinkNames){
+							for(int i=0; i<terminal; i++){
+								bswInvalid.print(line[i]);
+							}
+							bswInvalid.println();
+						}else{
+							bswInvalid.println(line);
+						}
+					}
+				}
+			}else{
+				basesProcessed+=line.length;
+				if(valid || keepAll){
+					if(valid){validBases+=line.length;}
+					else{invalidBases+=line.length;}
+					bsw.println(line);
+				}else{
+					invalidBases+=line.length;
+					if(bswInvalid!=null){
+						bswInvalid.println(line);
+					}
+				}
+			}
+			line=bf.nextLine();
+		}
+		
+		errorState|=bf.close();
+	}
+	
+	private static boolean looksLikeRealAccession(byte[] line){
+		int space=Tools.indexOf(line, ' ');
+		if(space<0){space=line.length;}
+		if(space>18 || space<4){return false;}
+		//...  hmm...  this is a pretty short list for false cases!
+		int dot=-1;
+		for(int i=0; i<space; i++){
+			if(line[i]=='.'){
+				if(dot>=0){return false;}//Only 1 dot allowed
+				dot=i;
+			}
+		}
+		if(dot>0){
+			if(dot!=space-2){return false;}
+		}
+		for(int i=0; i<space; i++){
+			byte b=line[i];
+			if(b!='_' && b!='-' && b!='.' && !Tools.isLetterOrDigit(b)){return false;}
+		}
+		return true;
+	}
+	
+	void appendHeaderLine(byte[] line, ByteBuilder bb){
+		assert(line[0]=='>' || line[0]=='@') : new String(line);
+		
+		if(mode==ACCESSION_MODE){
+			for(int i=1; i<line.length; i++){
+				byte b=line[i];
+				if(b==' ' || b=='.'){break;}
+				else{bb.append(b);}
+			}
+		}else if(mode==GI_MODE){
+			for(int i=1; i<line.length; i++){
+				byte b=line[i];
+				if(b==' ' || b=='|'){break;}
+				else{bb.append(b);}
+			}
+		}else if(mode==UNITE_MODE){
+			int initial=Tools.indexOf(line, '|');
+			for(int i=initial+1; i<line.length; i++){
+				byte b=line[i];
+				if(b==' ' || b=='.' || b=='|'){break;}
+				else{bb.append(b);}
+			}
+		}else{
+			for(int i=1; i<line.length; i++){
+				byte b=line[i];
+				bb.append(b);
+			}
+		}
+		bb.append(',');
+	}
+	
+	private void updateHeadersFromServer(ArrayList<byte[]> lines, HashArray1D counts, ByteStreamWriter bswBadHeaders, int format){
+		if(format==FileFormat.FA){
+			updateHeadersFromServer_fasta(lines, counts, bswBadHeaders);
+		}else if(format==FileFormat.GFF){
+			updateHeadersFromServer_gff(lines, counts, bswBadHeaders);
+		}else{
+			assert(false) : "Unsupported type: "+format;
+		}
+	}
+	
+	private void updateHeadersFromServer_fasta(ArrayList<byte[]> lines, HashArray1D counts, ByteStreamWriter bswBadHeaders){
+		ByteBuilder bb=new ByteBuilder();
+		ArrayList<String> names=new ArrayList<String>();
+		for(byte[] line : lines){
+			if(line[0]=='>' && !Tools.startsWith(line, ">tid")){
+				appendHeaderLine(line, bb);
+				if(mode==UNITE_MODE){
+					int bar=Tools.indexOf(line, '|');
+					names.add(new String(line, 1, bar-1));
+				}
+			}
+		}
+		if(bb.length()<1){return;}
+		
+		assert(bb.endsWith(','));
+		bb.length--;
+		
+//		System.err.println("Sending '"+bb+"'");
+		
+		final int[] serverIds;
+		if(mode==ACCESSION_MODE || mode==UNITE_MODE){
+			serverIds=TaxClient.accessionToTaxidArray(bb.toString());
+		}else if(mode==GI_MODE){
+			serverIds=TaxClient.giToTaxidArray(bb.toString());
+		}else{
+			serverIds=TaxClient.headerToTaxidArray(bb.toString());
+		}
+		assert(serverIds!=null) : "Null response for '"+bb.toString()+"'";
+		bb.clear();
+		
+		if(!names.isEmpty()){
+			assert(tree!=null) : "Need to load a TaxTree.";
+			assert(names.size()==serverIds.length);
+			for(int i=0; i<serverIds.length; i++){
+				final String name=names.get(i);
+				if(serverIds[i]<0){
+					TaxNode tn=tree.getNodeByName(name);
+					if(tn!=null){serverIds[i]=tn.id;}
+//					else {
+//						assert(false) : names.get(i);
+//					}
+				}else{
+					//Sometimes the species gets renamed.
+//					TaxNode tn=tree.getNodeByName(name);
+//					if(tn==null || tn.id==serverIds[i]) {System.err.println(name+", "+serverIds[i]+", "+tn+", "+tree.getNodesByName(name));}
+				}
+			}
+		}
+		
+		for(int lineNum=0, serverNum=0; lineNum<=lines.size(); lineNum++){
+			byte[] line=lines.get(lineNum);
+			if(line[0]=='>' && !Tools.startsWith(line, ">tid")){
+				bb.clear();
+				final int tid=serverIds[serverNum];
+				if(tid<0){
+					//WARN
+					if(bswBadHeaders!=null){
+						bswBadHeaders.print(tid).tab();
+						bswBadHeaders.print(looksLikeRealAccession(line)).tab();
+						bswBadHeaders.println(line);
+					}else if(warnBadHeaders){
+						System.err.println(tid+"\t"+looksLikeRealAccession(line)+"\t"+new String(line));
+					}
+				}
+				int initial=1, terminal=line.length; 
+				if(shrinkNames){//This is for nr/nt
+					for(int i=initial; i<terminal; i++){
+						if(line[i]==1){//SOH
+							terminal=i;
+						}
+					}
+				}
+				
+				bb.append(title);
+				bb.append(tid);
+				if(prefix){
+					bb.append('|');
+					for(int i=initial; i<terminal; i++){
+						bb.append(line[i]);
+					}
+				}else if(counts!=null && tid>=0){
+					bb.append('|');
+					int count=counts.increment(tid, 1);
+					bb.append(count);
+					if(count==1){taxaCounted++;}
+				}
+				
+				lines.set(lineNum, bb.toBytes());
+				
+				serverNum++;
+				if(serverNum>=serverIds.length){break;}
+			}
+		}
+		if(maxInvalidHeaders>=0 && invalidReads>maxInvalidHeaders){
+			KillSwitch.kill("Maximum bad headers exceeded: "+maxInvalidHeaders);
+		}
+	}
+	
+	private void updateHeadersFromServer_gff(ArrayList<byte[]> lines, HashArray1D counts, ByteStreamWriter bswBadHeaders){
+		ByteBuilder bb=new ByteBuilder();
+		ArrayList<String> names=new ArrayList<String>();
+		for(byte[] line : lines){
+			if(line[0]!='#' && !Tools.startsWith(line, "tid")){
+				if(bb.length()>0){bb.append(',');}
+				for(byte b : line){
+					if(b=='\t'){break;}
+					bb.append(b);
+				}
+			}
+		}
+		if(bb.length()<1){return;}
+		
+//		assert(false) : bb;
+		
+//		System.err.println("Sending '"+bb+"'");
+		
+		int[] serverIds;
+		if(mode==ACCESSION_MODE || mode==UNITE_MODE){
+			serverIds=TaxClient.accessionToTaxidArray(bb.toString());
+		}else if(mode==GI_MODE){
+			serverIds=TaxClient.giToTaxidArray(bb.toString());
+		}else{
+			serverIds=TaxClient.headerToTaxidArray(bb.toString());
+		}
+		if(serverIds==null){
+			KillSwitch.kill("Null response for '"+bb.toString()+"'");
+		}
+//		assert(serverIds!=null) : "Null response for '"+bb.toString()+"'";
+		bb.clear();
+		
+		if(!names.isEmpty()){
+			assert(tree!=null) : "Need to load a TaxTree.";
+			assert(names.size()==serverIds.length);
+			for(int i=0; i<serverIds.length; i++){
+				final String name=names.get(i);
+				if(serverIds[i]<0){
+					TaxNode tn=tree.getNodeByName(name);
+					if(tn!=null){serverIds[i]=tn.id;}
+//					else {
+//						assert(false) : names.get(i);
+//					}
+				}else{
+					//Sometimes the species gets renamed.
+//					TaxNode tn=tree.getNodeByName(name);
+//					if(tn==null || tn.id==serverIds[i]) {System.err.println(name+", "+serverIds[i]+", "+tn+", "+tree.getNodesByName(name));}
+				}
+			}
+		}
+		
+		for(int lineNum=0, serverNum=0; lineNum<=lines.size(); lineNum++){
+			byte[] line=lines.get(lineNum);
+			if(line[0]!='#' && !Tools.startsWith(line, "tid")){
+				bb.clear();
+				final int tid=serverIds[serverNum];
+				if(tid<0){
+					//WARN
+					if(bswBadHeaders!=null){
+						bswBadHeaders.print(tid).tab();
+						bswBadHeaders.print(looksLikeRealAccession(line)).tab();
+						bswBadHeaders.println(line);
+					}else if(warnBadHeaders){
+						System.err.println(tid+"\t"+looksLikeRealAccession(line)+"\t"+new String(line));
+					}
+				}
+				
+				bb.append("tid|");
+				bb.append(tid);
+				if(prefix){
+					bb.append('|');
+					bb.append(line);
+				}else if(counts!=null && tid>=0){
+					bb.append('|');
+					int count=counts.increment(tid, 1);
+					bb.append(count);
+					if(count==1){taxaCounted++;}
+				}
+				
+				lines.set(lineNum, bb.toBytes());
+				
+				serverNum++;
+				if(serverNum>=serverIds.length){break;}
+			}
+		}
+		if(maxInvalidHeaders>=0 && invalidReads>maxInvalidHeaders){
+			KillSwitch.kill("Maximum bad headers exceeded: "+maxInvalidHeaders);
+		}
+	}
+	
+	private void processInner_server(ByteFile bf, ByteStreamWriter bsw, ByteStreamWriter bswInvalid, ByteStreamWriter bswBadHeaders, HashArray1D counts, int format){
+		
+		ArrayList<byte[]> lines=new ArrayList<byte[]>();
+		byte[] line=bf.nextLine();
+		boolean valid=false;
+		long storedBytes=0;
+		
+		while(line!=null){
+			
+			if(line.length>0){
+				linesIn++;
+				lines.add(line);
+				storedBytes+=line.length;
+				if(storedBytes>=maxStoredBytes){
+					updateHeadersFromServer(lines, counts, bswBadHeaders, format);
+					valid=dumpBuffer(lines, valid, bsw, bswInvalid);
+					lines=new ArrayList<byte[]>();
+					storedBytes=0;
+					if(deleteInvalid && invalidReads>0){
+							System.err.println("Invalid sequence detected; aborting.\n"
+									+ "Input file:  \t"+bf.name()+"\n"
+									+ "Output file: \t"+(bsw==null ? "null" : bsw.fname)+"\n"
+									+ "Line:        \t"+new String(line)+"\n");
+						break;
+					}
+				}
+			}
+			line=bf.nextLine();
+		}
+		
+		if(storedBytes>0){
+			updateHeadersFromServer(lines, counts, bswBadHeaders, format);
+			valid=dumpBuffer(lines, valid, bsw, bswInvalid);
+			lines=new ArrayList<byte[]>();
+			storedBytes=0;
+		}
+		
+		errorState|=bf.close();
+	}
+	
+	private boolean dumpBuffer(ArrayList<byte[]> lines, boolean valid, ByteStreamWriter bsw, ByteStreamWriter bswInvalid){
+		
+		for(byte[] line : lines){
+		
+			if(line.length>0 && line[0]=='>'){
+				readsProcessed++;
+				if(maxReads>0 && readsProcessed>maxReads){break;}
+				
+				if(Tools.startsWith(line, invalidTitle)){
+					valid=false;
+					invalidReads++;
+					invalidLines++;
+					if(deleteInvalid){break;}
+				}else{
+					assert(Tools.startsWith(line, title));
+					valid=true;
+					validReads++;
+					validLines++;
+				}
+			}else if(gffIn){
+				basesProcessed+=line.length;
+				valid=!Tools.startsWith(line, invalidGffTitle);
+				if(valid){
+					validBases+=line.length;
+					validLines++;
+				}else{
+					invalidBases+=line.length;
+					invalidLines++;
+				}
+			}else{
+				basesProcessed+=line.length;
+				if(valid){
+					validBases+=line.length;
+					validLines++;
+				}else{
+					invalidBases+=line.length;
+					invalidLines++;
+				}
+			}
+			
+			if(valid || keepAll){
+				if(bsw!=null){bsw.println(line);}
+			}else{
+				if(bswInvalid!=null){bswInvalid.println(line);}
+			}
+		}
+		return valid;
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	
+	/*--------------------------------------------------------------*/
+	
+	private LinkedHashSet<String> in1=new LinkedHashSet<String>();
+	private String out1=null;
+	private String outInvalid=null;
+	private String badHeaders=null;
+
+	private String taxTreeFile=null;
+	private String giTableFile=null;
+	private String accessionFile=null;
+	private String patternFile=null;
+	
+	/*--------------------------------------------------------------*/
+
+	private long maxReads=-1;
+
+	private long validReads=0;
+	private long validBases=0;
+	private long invalidReads=0;
+	private long invalidBases=0;
+	private long taxaCounted=0;
+
+	private long linesIn=0;
+	private long validLines=0;
+	private long invalidLines=0;
+	
+	private long maxStoredBytes=10000000;
+	
+	private long readsProcessed=0, basesProcessed=0;
+
+	private boolean prefix=true;
+	private boolean countTable=true;
+	private boolean keepAll=true;
+	private boolean shrinkNames=false;
+	private boolean warnBadHeaders=true;
+	private boolean useServer=false;
+	/** Crash if the number of invalid headers exceeds this */
+	private long maxInvalidHeaders=-1;
+	/** Delete the output file if there are any invalid headers */
+	private boolean deleteInvalid=false;
+	
+	private int mode;
+	private static final int ACCESSION_MODE=0, GI_MODE=1, HEADER_MODE=2, UNITE_MODE=3;
+	
+	private boolean gffIn=false;
+	
+	/*--------------------------------------------------------------*/
+	
+	private final ArrayList<FileFormat> ffin1;
+	private final FileFormat ffout1;
+	private final FileFormat ffoutInvalid;
+	private final TaxTree tree;
+	
+	/*--------------------------------------------------------------*/
+	
+	private PrintStream outstream=System.err;
+	public static boolean verbose=false;
+	public boolean errorState=false;
+	private boolean overwrite=false;
+	private boolean append=false;
+
+	private static byte[] title=">tid|".getBytes();
+	private static byte[] invalidTitle=">tid|-1".getBytes();
+	private static byte[] invalidGffTitle="tid|-1".getBytes();
+	
+}