view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line source
package tax;

import java.io.File;
import java.io.PrintStream;

import dna.Data;
import fileIO.ByteFile;
import fileIO.ByteFile1;
import fileIO.ByteFile2;
import fileIO.ByteStreamWriter;
import fileIO.FileFormat;
import fileIO.ReadWrite;
import shared.Parse;
import shared.Parser;
import shared.PreParser;
import shared.ReadStats;
import shared.Shared;
import shared.Timer;
import shared.Tools;
import stream.FastaReadInputStream;
import structures.ByteBuilder;

/**
 * @author Brian Bushnell
 * @date April 4, 2017
 *
 */
public class ShrinkAccession {
	
	public static void main(String[] args){
		Timer t=new Timer();
		ShrinkAccession x=new ShrinkAccession(args);
		x.process(t);
		
		//Close the print stream if it was redirected
		Shared.closeStream(x.outstream);
	}
	
	public ShrinkAccession(String[] args){
		
		{//Preparse block for help, config files, and outstream
			PreParser pp=new PreParser(args, getClass(), false);
			args=pp.args;
			outstream=pp.outstream;
		}
		
		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
		if(Data.PIGZ()){
			ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
		}
		
		Parser parser=new Parser();
		for(int i=0; i<args.length; i++){
			String arg=args[i];
			String[] split=arg.split("=");
			String a=split[0].toLowerCase();
			String b=split.length>1 ? split[1] : null;

			if(parser.parse(arg, a, b)){
				//do nothing
			}else if(a.equals("verbose")){
				verbose=Parse.parseBoolean(b);
				ByteFile1.verbose=verbose;
				ByteFile2.verbose=verbose;
				ReadWrite.verbose=verbose;
			}else if(a.equals("gi")){
				KEEP_GI_NUMBERS=Parse.parseBoolean(b);
			}else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){
				giOut=b;
			}else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
				parser.in1=arg;
			}else if(parser.out1==null && i==1 && !arg.contains("=")){
				parser.out1=arg;
			}else{
				outstream.println("Unknown parameter "+args[i]);
				assert(false) : "Unknown parameter "+args[i];
				//				throw new RuntimeException("Unknown parameter "+args[i]);
			}
		}
		
		{//Process parser fields
			Parser.processQuality();
			
			overwrite=ReadStats.overwrite=parser.overwrite;
			append=ReadStats.append=parser.append;
			
			in=parser.in1;

			out=parser.out1;
		}
		
		assert(FastaReadInputStream.settingsOK());
		
		if(in==null){throw new RuntimeException("Error - at least one input file is required.");}
		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
			ByteFile.FORCE_MODE_BF2=false;
			ByteFile.FORCE_MODE_BF1=true;
		}

		if(out!=null && out.equalsIgnoreCase("null")){out=null;}
		
		if(!Tools.testOutputFiles(overwrite, append, false, out)){
			outstream.println((out==null)+", "+out);
			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
		}

		ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false);
		ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false);
		ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true);
		
	}
	
	void process(Timer t){
		
		ByteFile bf=ByteFile.makeByteFile(ffin);
		ByteStreamWriter bsw=new ByteStreamWriter(ffout);
		bsw.start();

		long linesProcessed=0;
		long charsProcessed=0;
		long badLines=0;
		
		byte[] line=bf.nextLine();
		ByteBuilder bb=new ByteBuilder(10000);
		int columns=4;
		while(line!=null){
			if(Tools.startsWith(line, "accession\t")){
				bb.append(line);
				bb.nl();
			}else if(Tools.startsWith(line, "accession.version\ttaxid")){
				columns=2;
				bb.append("accession\t\ttaxid\t");//dummy header
				bb.nl();
			}else{
				charsProcessed+=line.length+1;
				linesProcessed++;
				
				final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') : 
					AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t'));
				if(tid<1){
					badLines++;
				}else{
					int i=0;
					
					while(i<line.length){//Accession
						byte b=line[i];
						bb.append(b);
						i++;
						if(b=='\t'){break;}
					}
					
					if(columns==4){
						while(i<line.length){//Accession with decimal
							byte b=line[i];
							//						bb.append(b);
							i++;
							if(b=='\t'){break;}
						}
					}
					bb.append('\t');
					
					while(i<line.length){//Taxid
						byte b=line[i];
						bb.append(b);
						i++;
						if(b=='\t'){break;}
					}
					
					if(KEEP_GI_NUMBERS){
						if(line.length>i && Tools.isDigit(line[i])){//GI number or "na"
							while(i<line.length){
								byte b=line[i];
								bb.append(b);
								i++;
//								if(b=='\t'){break;}
							}
						}
					}
					bb.nl();
				}
				
//				String[] split=new String(line).split("\t");
//				bb.append(split[0]);
//				bb.tab();
//				bb.tab();
//				bb.append(split[2]);
//				bb.tab();
//				bb.nl();
			}
			if(bb.length()>8000){
				bsw.print(bb);
				bb.clear();
			}
			line=bf.nextLine();
		}
		if(bb.length()>0){
			bsw.print(bb);
			bb.clear();
		}
		
		errorState|=bf.close();
		if(bsw!=null){errorState|=bsw.poisonAndWait();}
		
		t.stop();
		outstream.println("Discarded "+badLines+" lines.\n");
		outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8));
		
		if(errorState){
			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
		}
	}
	
	/*--------------------------------------------------------------*/
	
	
	/*--------------------------------------------------------------*/
	
	private String in=null;
	private String out=null;
	private String giOut=null;
	
	/*--------------------------------------------------------------*/
	
	private final FileFormat ffin;
	private final FileFormat ffout;
	private final FileFormat ffoutGi;
	
	/*--------------------------------------------------------------*/
	
	private PrintStream outstream=System.err;
	public static boolean verbose=false;
	public static boolean KEEP_GI_NUMBERS=true;
	public boolean errorState=false;
	private boolean overwrite=false;
	private boolean append=false;
	
}