Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/ShrinkAccession.java Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,238 @@ +package tax; + +import java.io.File; +import java.io.PrintStream; + +import dna.Data; +import fileIO.ByteFile; +import fileIO.ByteFile1; +import fileIO.ByteFile2; +import fileIO.ByteStreamWriter; +import fileIO.FileFormat; +import fileIO.ReadWrite; +import shared.Parse; +import shared.Parser; +import shared.PreParser; +import shared.ReadStats; +import shared.Shared; +import shared.Timer; +import shared.Tools; +import stream.FastaReadInputStream; +import structures.ByteBuilder; + +/** + * @author Brian Bushnell + * @date April 4, 2017 + * + */ +public class ShrinkAccession { + + public static void main(String[] args){ + Timer t=new Timer(); + ShrinkAccession x=new ShrinkAccession(args); + x.process(t); + + //Close the print stream if it was redirected + Shared.closeStream(x.outstream); + } + + public ShrinkAccession(String[] args){ + + {//Preparse block for help, config files, and outstream + PreParser pp=new PreParser(args, getClass(), false); + args=pp.args; + outstream=pp.outstream; + } + + ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; + ReadWrite.MAX_ZIP_THREADS=Shared.threads(); + if(Data.PIGZ()){ + ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6); + } + + Parser parser=new Parser(); + for(int i=0; i<args.length; i++){ + String arg=args[i]; + String[] split=arg.split("="); + String a=split[0].toLowerCase(); + String b=split.length>1 ? split[1] : null; + + if(parser.parse(arg, a, b)){ + //do nothing + }else if(a.equals("verbose")){ + verbose=Parse.parseBoolean(b); + ByteFile1.verbose=verbose; + ByteFile2.verbose=verbose; + ReadWrite.verbose=verbose; + }else if(a.equals("gi")){ + KEEP_GI_NUMBERS=Parse.parseBoolean(b); + }else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){ + giOut=b; + }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ + parser.in1=arg; + }else if(parser.out1==null && i==1 && !arg.contains("=")){ + parser.out1=arg; + }else{ + outstream.println("Unknown parameter "+args[i]); + assert(false) : "Unknown parameter "+args[i]; + // throw new RuntimeException("Unknown parameter "+args[i]); + } + } + + {//Process parser fields + Parser.processQuality(); + + overwrite=ReadStats.overwrite=parser.overwrite; + append=ReadStats.append=parser.append; + + in=parser.in1; + + out=parser.out1; + } + + assert(FastaReadInputStream.settingsOK()); + + if(in==null){throw new RuntimeException("Error - at least one input file is required.");} + if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ + ByteFile.FORCE_MODE_BF2=false; + ByteFile.FORCE_MODE_BF1=true; + } + + if(out!=null && out.equalsIgnoreCase("null")){out=null;} + + if(!Tools.testOutputFiles(overwrite, append, false, out)){ + outstream.println((out==null)+", "+out); + throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n"); + } + + ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false); + ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false); + ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true); + + } + + void process(Timer t){ + + ByteFile bf=ByteFile.makeByteFile(ffin); + ByteStreamWriter bsw=new ByteStreamWriter(ffout); + bsw.start(); + + long linesProcessed=0; + long charsProcessed=0; + long badLines=0; + + byte[] line=bf.nextLine(); + ByteBuilder bb=new ByteBuilder(10000); + int columns=4; + while(line!=null){ + if(Tools.startsWith(line, "accession\t")){ + bb.append(line); + bb.nl(); + }else if(Tools.startsWith(line, "accession.version\ttaxid")){ + columns=2; + bb.append("accession\t\ttaxid\t");//dummy header + bb.nl(); + }else{ + charsProcessed+=line.length+1; + linesProcessed++; + + final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') : + AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t')); + if(tid<1){ + badLines++; + }else{ + int i=0; + + while(i<line.length){//Accession + byte b=line[i]; + bb.append(b); + i++; + if(b=='\t'){break;} + } + + if(columns==4){ + while(i<line.length){//Accession with decimal + byte b=line[i]; + // bb.append(b); + i++; + if(b=='\t'){break;} + } + } + bb.append('\t'); + + while(i<line.length){//Taxid + byte b=line[i]; + bb.append(b); + i++; + if(b=='\t'){break;} + } + + if(KEEP_GI_NUMBERS){ + if(line.length>i && Tools.isDigit(line[i])){//GI number or "na" + while(i<line.length){ + byte b=line[i]; + bb.append(b); + i++; +// if(b=='\t'){break;} + } + } + } + bb.nl(); + } + +// String[] split=new String(line).split("\t"); +// bb.append(split[0]); +// bb.tab(); +// bb.tab(); +// bb.append(split[2]); +// bb.tab(); +// bb.nl(); + } + if(bb.length()>8000){ + bsw.print(bb); + bb.clear(); + } + line=bf.nextLine(); + } + if(bb.length()>0){ + bsw.print(bb); + bb.clear(); + } + + errorState|=bf.close(); + if(bsw!=null){errorState|=bsw.poisonAndWait();} + + t.stop(); + outstream.println("Discarded "+badLines+" lines.\n"); + outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8)); + + if(errorState){ + throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); + } + } + + /*--------------------------------------------------------------*/ + + + /*--------------------------------------------------------------*/ + + private String in=null; + private String out=null; + private String giOut=null; + + /*--------------------------------------------------------------*/ + + private final FileFormat ffin; + private final FileFormat ffout; + private final FileFormat ffoutGi; + + /*--------------------------------------------------------------*/ + + private PrintStream outstream=System.err; + public static boolean verbose=false; + public static boolean KEEP_GI_NUMBERS=true; + public boolean errorState=false; + private boolean overwrite=false; + private boolean append=false; + +}