jpayne@68: package tax; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: jpayne@68: import dna.Data; jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteFile1; jpayne@68: import fileIO.ByteFile2; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import structures.ByteBuilder; jpayne@68: jpayne@68: /** jpayne@68: * @author Brian Bushnell jpayne@68: * @date April 4, 2017 jpayne@68: * jpayne@68: */ jpayne@68: public class ShrinkAccession { jpayne@68: jpayne@68: public static void main(String[] args){ jpayne@68: Timer t=new Timer(); jpayne@68: ShrinkAccession x=new ShrinkAccession(args); jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: public ShrinkAccession(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: if(Data.PIGZ()){ jpayne@68: ReadWrite.ZIPLEVEL=Tools.max(ReadWrite.ZIPLEVEL, 6); jpayne@68: } jpayne@68: jpayne@68: Parser parser=new Parser(); jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(parser.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: ByteFile1.verbose=verbose; jpayne@68: ByteFile2.verbose=verbose; jpayne@68: ReadWrite.verbose=verbose; jpayne@68: }else if(a.equals("gi")){ jpayne@68: KEEP_GI_NUMBERS=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("outgi") || a.equals("giout") || a.equals("gi")){ jpayne@68: giOut=b; jpayne@68: }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ jpayne@68: parser.in1=arg; jpayne@68: }else if(parser.out1==null && i==1 && !arg.contains("=")){ jpayne@68: parser.out1=arg; jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: {//Process parser fields jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: jpayne@68: in=parser.in1; jpayne@68: jpayne@68: out=parser.out1; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: jpayne@68: if(in==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ jpayne@68: ByteFile.FORCE_MODE_BF2=false; jpayne@68: ByteFile.FORCE_MODE_BF1=true; jpayne@68: } jpayne@68: jpayne@68: if(out!=null && out.equalsIgnoreCase("null")){out=null;} jpayne@68: jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, out)){ jpayne@68: outstream.println((out==null)+", "+out); jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n"); jpayne@68: } jpayne@68: jpayne@68: ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false); jpayne@68: ffoutGi=FileFormat.testOutput(giOut, FileFormat.TXT, null, true, overwrite, append, false); jpayne@68: ffin=FileFormat.testInput(in, FileFormat.TXT, null, true, true); jpayne@68: jpayne@68: } jpayne@68: jpayne@68: void process(Timer t){ jpayne@68: jpayne@68: ByteFile bf=ByteFile.makeByteFile(ffin); jpayne@68: ByteStreamWriter bsw=new ByteStreamWriter(ffout); jpayne@68: bsw.start(); jpayne@68: jpayne@68: long linesProcessed=0; jpayne@68: long charsProcessed=0; jpayne@68: long badLines=0; jpayne@68: jpayne@68: byte[] line=bf.nextLine(); jpayne@68: ByteBuilder bb=new ByteBuilder(10000); jpayne@68: int columns=4; jpayne@68: while(line!=null){ jpayne@68: if(Tools.startsWith(line, "accession\t")){ jpayne@68: bb.append(line); jpayne@68: bb.nl(); jpayne@68: }else if(Tools.startsWith(line, "accession.version\ttaxid")){ jpayne@68: columns=2; jpayne@68: bb.append("accession\t\ttaxid\t");//dummy header jpayne@68: bb.nl(); jpayne@68: }else{ jpayne@68: charsProcessed+=line.length+1; jpayne@68: linesProcessed++; jpayne@68: jpayne@68: final int tid=(columns==4 ? AccessionToTaxid.parseLineToTaxid(line, (byte)'\t') : jpayne@68: AccessionToTaxid.parseLineToTaxid_2col(line, (byte)'\t')); jpayne@68: if(tid<1){ jpayne@68: badLines++; jpayne@68: }else{ jpayne@68: int i=0; jpayne@68: jpayne@68: while(ii && Tools.isDigit(line[i])){//GI number or "na" jpayne@68: while(i8000){ jpayne@68: bsw.print(bb); jpayne@68: bb.clear(); jpayne@68: } jpayne@68: line=bf.nextLine(); jpayne@68: } jpayne@68: if(bb.length()>0){ jpayne@68: bsw.print(bb); jpayne@68: bb.clear(); jpayne@68: } jpayne@68: jpayne@68: errorState|=bf.close(); jpayne@68: if(bsw!=null){errorState|=bsw.poisonAndWait();} jpayne@68: jpayne@68: t.stop(); jpayne@68: outstream.println("Discarded "+badLines+" lines.\n"); jpayne@68: outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, charsProcessed, 8)); jpayne@68: jpayne@68: if(errorState){ jpayne@68: throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private String in=null; jpayne@68: private String out=null; jpayne@68: private String giOut=null; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private final FileFormat ffin; jpayne@68: private final FileFormat ffout; jpayne@68: private final FileFormat ffoutGi; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public static boolean KEEP_GI_NUMBERS=true; jpayne@68: public boolean errorState=false; jpayne@68: private boolean overwrite=false; jpayne@68: private boolean append=false; jpayne@68: jpayne@68: }