diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameIMG.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameIMG.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,254 @@
+package tax;
+
+import java.io.File;
+import java.io.PrintStream;
+
+import fileIO.ByteFile;
+import fileIO.ByteFile1;
+import fileIO.ByteFile2;
+import fileIO.ByteStreamWriter;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.ConcurrentGenericReadInputStream;
+import stream.FastaReadInputStream;
+import structures.ByteBuilder;
+import structures.IntHashSet;
+
+/**
+ * @author Brian Bushnell
+ * @date May 9, 2016
+ *
+ */
+public class RenameIMG {
+	
+	public static void main(String[] args){
+		Timer t=new Timer();
+		RenameIMG x=new RenameIMG(args);
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	public RenameIMG(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		
+		Parser parser=new Parser();
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+
+			if(a.equals("lines")){
+				maxLines=Long.parseLong(b);
+				if(maxLines<0){maxLines=Long.MAX_VALUE;}
+			}else if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+				ByteFile1.verbose=verbose;
+				ByteFile2.verbose=verbose;
+				stream.FastaReadInputStream.verbose=verbose;
+				ConcurrentGenericReadInputStream.verbose=verbose;
+				stream.FastqReadInputStream.verbose=verbose;
+				ReadWrite.verbose=verbose;
+			}else if(a.equals("img")){
+				imgFile=b;
+			}else if(parser.parse(arg, a, b)){
+				//do nothing
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+				//				throw new RuntimeException("Unknown parameter "+args[i]);
+			}
+		}
+		
+		{//Process parser fields
+			overwrite=parser.overwrite;
+			append=parser.append;
+			
+			in1=parser.in1;
+
+			out1=parser.out1;
+		}
+		
+		assert(FastaReadInputStream.settingsOK());
+		
+		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
+		if("auto".equalsIgnoreCase(imgFile)){imgFile=TaxTree.defaultImgFile();}//TODO: why are these set to the same default?
+		if("auto".equalsIgnoreCase(in1)){in1=TaxTree.defaultImgFile();}
+		
+		if(!ByteFile.FORCE_MODE_BF2){
+			ByteFile.FORCE_MODE_BF2=false;
+			ByteFile.FORCE_MODE_BF1=true;
+		}
+
+		if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;}
+		
+		if(!Tools.testOutputFiles(overwrite, append, false, out1)){
+			outstream.println((out1==null)+", "+out1);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n");
+		}
+
+		ffout1=FileFormat.testOutput(out1, FileFormat.FA, null, true, overwrite, append, false);
+	}
+	
+	void copyFiles(ImgRecord[] array){
+		if(useSet){set=new IntHashSet(10000);}
+		ByteStreamWriter bsw=new ByteStreamWriter(ffout1);
+		bsw.start();
+		for(ImgRecord ir : array){
+			if(ir.taxID>0){set.add(ir.taxID);}
+			else{unknownTaxid++;}
+			FileFormat ffin=FileFormat.testInput(ir.path(), FileFormat.FA, null, true, true);
+			process_inner(ffin, bsw, ir.imgID);
+		}
+		knownTaxid=set.size();
+		set=null;
+		if(bsw!=null){errorState|=bsw.poisonAndWait();}
+	}
+	
+	void process(Timer t){
+		ImgRecord[] array=ImgRecord.toArray(in1, TaxTree.IMG_HQ);
+		if(imgFile==null){
+			TaxTree.loadIMG(array);
+		}else{
+			ImgRecord[] array2=ImgRecord.toArray(imgFile, TaxTree.IMG_HQ);
+			TaxTree.loadIMG(array2);
+		}
+		
+		copyFiles(array);
+		
+		t.stop();
+
+		final int spaces=8;
+		String fpstring=""+filesProcessed;
+		String cpstring=Tools.padKM(sequencesProcessed, spaces);
+		String bapstring=Tools.padKM(basesProcessed, spaces);
+		String tpstring=""+knownTaxid;
+		
+		outstream.println("Time:                         \t"+t);
+		outstream.println("Files Processed:    "+fpstring);
+		outstream.println("Contigs Processed:  "+cpstring);
+		outstream.println("Bases Processed:    "+bapstring);
+		if(useSet){outstream.println("TaxIDs Processed:   "+tpstring+" \t"+"("+unknownTaxid+" unknown)");}
+		outstream.println(Tools.linesBytesProcessed(t.elapsed, linesProcessed, bytesProcessed, spaces));
+		
+		outstream.println();
+		outstream.println("Valid Files:       \t"+filesValid);
+		outstream.println("Invalid Files:     \t"+(filesProcessed-filesValid));
+		outstream.println("Valid Lines:       \t"+linesValid);
+		outstream.println("Invalid Lines:     \t"+(linesProcessed-linesValid));
+		
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+	}
+	
+	void process_inner(final FileFormat ffin, final ByteStreamWriter bsw, final long img){
+		
+		filesProcessed++;
+		{
+			File f=new File(ffin.name());
+			if(!f.exists() || !f.canRead()){
+				System.err.println("Can't find "+f);
+				errorState=true;
+				return;
+			}
+		}
+		final int tid=TaxTree.imgToTaxid(img);
+		ByteFile bf=ByteFile.makeByteFile(ffin);
+		
+		byte[] line=bf.nextLine();
+		ByteBuilder bb=new ByteBuilder();
+		
+		while(line!=null){
+			if(line.length>0){
+				if(maxLines>0 && linesProcessed>=maxLines){break;}
+				linesProcessed++;
+				bytesProcessed+=line.length;
+
+				linesValid++;
+				if(line[0]=='>'){
+					sequencesProcessed++;
+					bb.append('>');
+					if(tid>=0){
+						bb.append("tid|");
+						bb.append(tid);
+						bb.append('|');
+					}
+					bb.append("img|");
+					bb.append(img);
+					bb.append(' ');
+					for(int i=1; i<line.length; i++){
+						bb.append(line[i]);
+					}
+				}else{
+					basesProcessed+=line.length;
+					bb.append(line);
+				}
+				bb.nl();
+				bsw.print(bb.toBytes());
+				bb.clear();
+			}
+			line=bf.nextLine();
+		}
+		
+		filesValid++;
+		errorState|=bf.close();
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	
+	/*--------------------------------------------------------------*/
+	
+	private String in1=null;
+	private String out1=null;
+	private String imgFile=null;
+	
+	/*--------------------------------------------------------------*/
+	
+	private IntHashSet set=null;
+	private int knownTaxid=0;
+	private int unknownTaxid=0;
+	private boolean useSet=true;
+	
+	private long linesProcessed=0;
+	private long linesValid=0;
+	private long bytesProcessed=0;
+
+	private long basesProcessed=0;
+	private long sequencesProcessed=0;
+	private long filesProcessed=0;
+	private long filesValid=0;
+	
+	private long maxLines=Long.MAX_VALUE;
+	
+	/*--------------------------------------------------------------*/
+	
+	private final FileFormat ffout1;
+	
+	
+	/*--------------------------------------------------------------*/
+	
+	private PrintStream outstream=System.err;
+	public static boolean verbose=false;
+	public boolean errorState=false;
+	private boolean overwrite=false;
+	private boolean append=false;
+	
+}