diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,238 @@
+package tax;
+
+import java.io.File;
+import java.io.PrintStream;
+
+import dna.Data;
+import fileIO.ByteFile;
+import fileIO.ByteFile1;
+import fileIO.ByteFile2;
+import fileIO.ByteStreamWriter;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.ReadStats;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.FastaReadInputStream;
+import structures.ByteBuilder;
+
+/**
+ * @author Brian Bushnell
+ * @date April 4, 2017
+ *
+ */
+public class ShrinkAccession {
+	
+	public static void main(String[] args){
+		Timer t=new Timer();
+		ShrinkAccession x=new ShrinkAccession(args);
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	public ShrinkAccession(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		if(Data.PIGZ()){
+			ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6);
+		}
+		
+		Parser parser=new Parser();
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+
+			if(parser.parse(arg, a, b)){
+				//do nothing
+			}else if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+				ByteFile1.verbose=verbose;
+				ByteFile2.verbose=verbose;
+				ReadWrite.verbose=verbose;
+			}else if(a.equals("gi")){
+				KEEP_GI_NUMBERS=Parse.parseBoolean(b);
+			}else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){
+				giOut=b;
+			}else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){
+				parser.in1=arg;
+			}else if(parser.out1==null && i==1 && !arg.contains("=")){
+				parser.out1=arg;
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+				//				throw new RuntimeException("Unknown parameter "+args[i]);
+			}
+		}
+		
+		{//Process parser fields
+			Parser.processQuality();
+			
+			overwrite=ReadStats.overwrite=parser.overwrite;
+			append=ReadStats.append=parser.append;
+			
+			in=parser.in1;
+
+			out=parser.out1;
+		}
+		
+		assert(FastaReadInputStream.settingsOK());
+		
+		if(in==null){throw new RuntimeException("Error - at least one input file is required.");}
+		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){
+			ByteFile.FORCE_MODE_BF2=false;
+			ByteFile.FORCE_MODE_BF1=true;
+		}
+
+		if(out!=null && out.equalsIgnoreCase("null")){out=null;}
+		
+		if(!Tools.testOutputFiles(overwrite, append, false, out)){
+			outstream.println((out==null)+", "+out);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n");
+		}
+
+		ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false);
+		ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false);
+		ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true);
+		
+	}
+	
+	void process(Timer t){
+		
+		ByteFile bf=ByteFile.makeByteFile(ffin);
+		ByteStreamWriter bsw=new ByteStreamWriter(ffout);
+		bsw.start();
+
+		long linesProcessed=0;
+		long charsProcessed=0;
+		long badLines=0;
+		
+		byte[] line=bf.nextLine();
+		ByteBuilder bb=new ByteBuilder(10000);
+		int columns=4;
+		while(line!=null){
+			if(Tools.startsWith(line, "accession\t")){
+				bb.append(line);
+				bb.nl();
+			}else if(Tools.startsWith(line, "accession.version\ttaxid")){
+				columns=2;
+				bb.append("accession\t\ttaxid\t");//dummy header
+				bb.nl();
+			}else{
+				charsProcessed+=line.length+1;
+				linesProcessed++;
+				
+				final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') : 
+					AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t'));
+				if(tid<1){
+					badLines++;
+				}else{
+					int i=0;
+					
+					while(i<line.length){//Accession
+						byte b=line[i];
+						bb.append(b);
+						i++;
+						if(b=='\t'){break;}
+					}
+					
+					if(columns==4){
+						while(i<line.length){//Accession with decimal
+							byte b=line[i];
+							//						bb.append(b);
+							i++;
+							if(b=='\t'){break;}
+						}
+					}
+					bb.append('\t');
+					
+					while(i<line.length){//Taxid
+						byte b=line[i];
+						bb.append(b);
+						i++;
+						if(b=='\t'){break;}
+					}
+					
+					if(KEEP_GI_NUMBERS){
+						if(line.length>i && Tools.isDigit(line[i])){//GI number or "na"
+							while(i<line.length){
+								byte b=line[i];
+								bb.append(b);
+								i++;
+//								if(b=='\t'){break;}
+							}
+						}
+					}
+					bb.nl();
+				}
+				
+//				String[] split=new String(line).split("\t");
+//				bb.append(split[0]);
+//				bb.tab();
+//				bb.tab();
+//				bb.append(split[2]);
+//				bb.tab();
+//				bb.nl();
+			}
+			if(bb.length()>8000){
+				bsw.print(bb);
+				bb.clear();
+			}
+			line=bf.nextLine();
+		}
+		if(bb.length()>0){
+			bsw.print(bb);
+			bb.clear();
+		}
+		
+		errorState|=bf.close();
+		if(bsw!=null){errorState|=bsw.poisonAndWait();}
+		
+		t.stop();
+		outstream.println("Discarded "+badLines+" lines.\n");
+		outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8));
+		
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	
+	/*--------------------------------------------------------------*/
+	
+	private String in=null;
+	private String out=null;
+	private String giOut=null;
+	
+	/*--------------------------------------------------------------*/
+	
+	private final FileFormat ffin;
+	private final FileFormat ffout;
+	private final FileFormat ffoutGi;
+	
+	/*--------------------------------------------------------------*/
+	
+	private PrintStream outstream=System.err;
+	public static boolean verbose=false;
+	public static boolean KEEP_GI_NUMBERS=true;
+	public boolean errorState=false;
+	private boolean overwrite=false;
+	private boolean append=false;
+	
+}