Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/tax/RenameGiToTaxid.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package tax; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; import java.util.LinkedHashSet; import fileIO.ByteFile; import fileIO.ByteFile1; import fileIO.ByteFile2; import fileIO.ByteStreamWriter; import fileIO.FileFormat; import fileIO.ReadWrite; import kmer.HashArray1D; import shared.KillSwitch; import shared.Parse; import shared.Parser; import shared.PreParser; import shared.ReadStats; import shared.Shared; import shared.Timer; import shared.Tools; import stream.ConcurrentGenericReadInputStream; import stream.FASTQ; import stream.FastaReadInputStream; import structures.ByteBuilder; import structures.IntList; /** * @author Brian Bushnell * @date Mar 10, 2015 * */ public class RenameGiToTaxid { public static void main(String[] args){ Timer t=new Timer(); RenameGiToTaxid x=new RenameGiToTaxid(args); x.process(t); //Close the print stream if it was redirected Shared.closeStream(x.outstream); } public RenameGiToTaxid(String[] args){ {//Preparse block for help, config files, and outstream PreParser pp=new PreParser(args, getClass(), false); args=pp.args; outstream=pp.outstream; } Shared.capBuffers(4); ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; ReadWrite.USE_BGZIP=ReadWrite.USE_UNBGZIP=ReadWrite.PREFER_BGZIP=true; ReadWrite.MAX_ZIP_THREADS=Shared.threads(); FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false; Parser parser=new Parser(); for(int i=0; i<args.length; i++){ String arg=args[i]; String[] split=arg.split("="); String a=split[0].toLowerCase(); String b=split.length>1 ? split[1] : null; if(a.equals("prefix")){ prefix=Parse.parseBoolean(b); }else if(a.equals("server") || a.equals("useserver")){ if(b!=null && b.startsWith("http")){ useServer=true; String path=b; if(!path.endsWith("/")){path+="/";} Shared.setTaxServer(path); }else{ useServer=Parse.parseBoolean(b); } }else if(a.equals("title")){ title=(b==null ? ">" : (">"+b+"|")).getBytes(); }else if(a.equals("table") || a.equals("gi") || a.equals("gitable")){ giTableFile=b; }else if(a.equals("accession")){ accessionFile=b; }else if(a.equals("pattern")){ patternFile=b; }else if(a.equals("tree") || a.equals("taxtree")){ taxTreeFile=b; }else if(a.equals("invalid")){ outInvalid=b; }else if(a.equals("deleteinvalid")){ deleteInvalid=Parse.parseBoolean(b); }else if(a.equals("badheaders")){ badHeaders=b; }else if(a.equals("maxbadheaders") || a.equals("maxinvalidheaders")){ maxInvalidHeaders=Parse.parseKMG(b); }else if(a.equals("keepall")){ keepAll=Parse.parseBoolean(b); }else if(a.equals("shrinknames")){ shrinkNames=Parse.parseBoolean(b); }else if(a.equals("warn")){ warnBadHeaders=Parse.parseBoolean(b); } else if(a.equals("maxpigzprocesses")){ AccessionToTaxid.maxPigzProcesses=Integer.parseInt(b); }else if(a.equals("skipparse")){ AccessionToTaxid.skipParse=Parse.parseBoolean(b); }else if(a.equals("skiphash")){ AccessionToTaxid.skipHash=Parse.parseBoolean(b); } else if(a.equals("mode")){ if(b!=null && Character.isDigit(b.charAt(0))){ mode=Integer.parseInt(b); }else if("accession".equalsIgnoreCase(b)){ mode=ACCESSION_MODE; }else if("unite".equalsIgnoreCase(b)){ mode=UNITE_MODE; TaxTree.UNITE_MODE=true; }else if("gi".equalsIgnoreCase(b)){ mode=GI_MODE; }else if("header".equalsIgnoreCase(b)){ mode=HEADER_MODE; }else{ assert(false) : "Bad mode: "+b; } } else if(a.equals("verbose")){ verbose=Parse.parseBoolean(b); ByteFile1.verbose=verbose; ByteFile2.verbose=verbose; stream.FastaReadInputStream.verbose=verbose; ConcurrentGenericReadInputStream.verbose=verbose; stream.FastqReadInputStream.verbose=verbose; ReadWrite.verbose=verbose; }else if(a.equals("in") || a.equals("in1")){ assert(b!=null) : "Bad parameter: "+arg; if(new File(b).exists()){ in1.add(b); }else{ for(String bb : b.split(",")){ in1.add(bb); } } }else if(new File(arg).exists()){ //For asterisk expansion in1.add(arg); }else if(parser.parse(arg, a, b)){ //do nothing }else{ outstream.println("Unknown parameter "+args[i]); assert(false) : "Unknown parameter "+args[i]; // throw new RuntimeException("Unknown parameter "+args[i]); } } if(useServer){ giTableFile=null; accessionFile=null; patternFile=null; if(mode!=UNITE_MODE){taxTreeFile=null;} }//else if taxpath!=null... set them {//Process parser fields Parser.processQuality(); maxReads=parser.maxReads; overwrite=ReadStats.overwrite=parser.overwrite; append=ReadStats.append=parser.append; out1=parser.out1; } if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} if("auto".equalsIgnoreCase(giTableFile)){giTableFile=TaxTree.defaultTableFile();} if("auto".equalsIgnoreCase(accessionFile)){accessionFile=TaxTree.defaultAccessionFile();} if("auto".equalsIgnoreCase(patternFile)){patternFile=TaxTree.defaultPatternFile();} assert(FastaReadInputStream.settingsOK()); if(in1==null || in1.isEmpty()){throw new RuntimeException("Error - at least one input file is required.");} if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ ByteFile.FORCE_MODE_BF2=false; ByteFile.FORCE_MODE_BF1=true; } if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;} assert(out1!=null) : "This program requires an output file."; if(!Tools.testOutputFiles(overwrite, append, false, out1)){ outstream.println((out1==null)+", "+out1); throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n"); } if(!Tools.testInputFiles(false, true, in1.toArray(new String[0]))){ throw new RuntimeException("\nCan't read some input files.\n"); } ffout1=FileFormat.testOutput(out1, FileFormat.FA, null, true, overwrite, append, false); ffoutInvalid=FileFormat.testOutput(outInvalid, FileFormat.FA, null, true, overwrite, append, false); ffin1=new ArrayList<FileFormat>(in1.size()); for(String s : in1){ FileFormat ff=FileFormat.testInput(s, FileFormat.FA, null, true, true); ffin1.add(ff); } if(ffoutInvalid!=null){keepAll=false;} assert(giTableFile!=null || accessionFile!=null || TaxTree.SILVA_MODE || useServer) : "No gi or accession information loaded."; if(taxTreeFile!=null){ tree=TaxTree.loadTaxTree(taxTreeFile, outstream, true, false); assert(tree.nameMap!=null); }else{ tree=null; if(!useServer){throw new RuntimeException("No tree specified.");} } if(giTableFile!=null){ GiToTaxid.initialize(giTableFile); } if(patternFile!=null){ Timer t=new Timer(); AnalyzeAccession.loadCodeMap(patternFile); outstream.println("Loading pattern table."); t.stopAndPrint(); } if(accessionFile!=null){ AccessionToTaxid.tree=tree; outstream.println("Loading accession table."); AccessionToTaxid.load(accessionFile); // System.gc(); } } void process(Timer t){ ByteStreamWriter bsw=(ffout1==null ? null : new ByteStreamWriter(ffout1)); //Actually, this is required. if(bsw!=null){bsw.start();} ByteStreamWriter bswInvalid=null; if(ffoutInvalid!=null){ bswInvalid=new ByteStreamWriter(ffoutInvalid); bswInvalid.start(); } ByteStreamWriter bswBadHeaders=null; if(badHeaders!=null) { bswBadHeaders=new ByteStreamWriter(badHeaders, overwrite, append, false); bswBadHeaders.start(); } final HashArray1D counts=(countTable && !prefix) ? new HashArray1D(256000, -1L, true) : null; gffIn=false; for(FileFormat ffin : ffin1){ gffIn=gffIn||ffin.gff(); ByteFile bf=ByteFile.makeByteFile(ffin); if(useServer){ processInner_server(bf, bsw, bswInvalid, bswBadHeaders, counts, ffin.format()); }else{ // IntList list=(useServer ? getIds(bf) : null); processInner(bf, bsw, bswInvalid, bswBadHeaders, counts, null); } } if(bsw!=null){ errorState|=bsw.poisonAndWait(); if(deleteInvalid && invalidReads>0 && !ffout1.stdio()){ try { System.err.println("Deleting "+out1); new File(out1).delete(); } catch (Exception e) { System.err.println("An error occured while attempting to delete "+out1); e.printStackTrace(); } } } if(bswInvalid!=null){errorState|=bswInvalid.poisonAndWait();} if(bswBadHeaders!=null){errorState|=bswBadHeaders.poisonAndWait();} t.stop(); if(!gffIn) { outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); outstream.println(); outstream.println("Valid Sequences: \t"+validReads); outstream.println("Valid Bases: \t"+validBases); outstream.println("Invalid Sequences: \t"+invalidReads); outstream.println("Invalid Bases: \t"+invalidBases); }else{ outstream.println(Tools.timeLinesBytesProcessed(t, linesIn, basesProcessed, 8)); outstream.println(); outstream.println("Valid Lines: \t"+validLines); outstream.println("Valid Bytes: \t"+validBases); outstream.println("Invalid Lines: \t"+invalidLines); outstream.println("Invalid Bytes: \t"+invalidBases); } if(counts!=null){ outstream.println("Unique Taxa: \t"+taxaCounted); } if(errorState){ throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); } } //Unused; not efficient // public IntList getIds(ByteFile bf){ // IntList ids=new IntList(); // // int readsProcessedInner=0; // // byte[] line=bf.nextLine(); // ByteBuilder bb=new ByteBuilder(); // while(line!=null){ // if(line.length>0 && line[0]=='>'){ // readsProcessedInner++; // if(maxReads>0 && readsProcessedInner>maxReads){break;} // // for(int i=1; i<line.length; i++){ // byte b=line[i]; // if(b==' ' || b=='.'){break;} // else{bb.append(b);} // } // bb.append(','); // if(bb.length()>100000){ // bb.setLength(bb.length()-1); // int[] ret; // if(mode==ACCESSION_MODE){ // ret=TaxClient.accessionToTaxidArray(bb.toString()); // }else if(mode==GI_MODE){ // ret=TaxClient.giToTaxidArray(bb.toString()); // }else{ // ret=TaxClient.headerToTaxidArray(bb.toString()); // } // assert(ret!=null) : bb.toString(); // for(int i : ret){ids.add(i);} // bb.clear(); // } // } // line=bf.nextLine(); // } // if(bb.length()>0){ // bb.setLength(bb.length()-1); // int[] ret; // if(mode==ACCESSION_MODE){ // ret=TaxClient.accessionToTaxidArray(bb.toString()); // }else if(mode==GI_MODE){ // ret=TaxClient.giToTaxidArray(bb.toString()); // }else{ // ret=TaxClient.headerToTaxidArray(bb.toString()); // } // assert(ret!=null) : bb.toString(); // for(int i : ret){ids.add(i);} // bb.clear(); // } // // bf.reset(); // return ids; // } private void processInner(ByteFile bf, ByteStreamWriter bsw, ByteStreamWriter bswInvalid, ByteStreamWriter bswBadHeaders, HashArray1D counts, IntList ids){ int readsProcessedInner=0; byte[] line=bf.nextLine(); boolean valid=false; while(line!=null){ if(line.length>0 && line[0]=='>'){ readsProcessedInner++; readsProcessed++; if(maxReads>0 && readsProcessed>maxReads){break;} int initial=1, terminal=line.length; final int number; if(ids==null){ final TaxNode tn; { { // Handles renumbering when the format is correct but the number is wrong. if(Tools.startsWith(line, ">tid|")){ initial=6; while(initial<=line.length && line[initial-1]!='|'){initial++;} }else if(Tools.startsWith(line, ">ncbi|")){ initial=7; while(initial<=line.length && line[initial-1]!='|'){initial++;} } } if(shrinkNames){//This is for nr/nt for(int i=initial; i<terminal; i++){ if(line[i]==1){//SOH terminal=i; } } } String s=new String(line, initial, terminal-initial); tn=tree.parseNodeFromHeader(s, true); } number=(tn==null ? -1 : tn.id); }else{ number=ids.get((int)(readsProcessedInner-1)); if(shrinkNames){//This is for nr/nt for(int i=initial; i<terminal; i++){ if(line[i]==1){//SOH terminal=i; } } } } valid=(number>=0); if(valid){ validReads++; bsw.print(title); bsw.print(number); if(prefix){ bsw.print('|'); for(int i=initial; i<terminal; i++){ bsw.print(line[i]); } }else if(counts!=null){ bsw.print('|'); int count=counts.increment(number, 1); bsw.print(count); if(count==1){taxaCounted++;} } bsw.println(); }else{ invalidReads++; if(deleteInvalid){ System.err.println("Invalid sequence detected; aborting.\n"); break; } if(bswBadHeaders!=null){bswBadHeaders.println(line);} if(maxInvalidHeaders>=0 && invalidReads>maxInvalidHeaders){ KillSwitch.kill("Maximum bad headers exceeded: "+maxInvalidHeaders+"\n"+new String(line)); } if(keepAll){ if(shrinkNames){ for(int i=0; i<terminal; i++){ bsw.print(line[i]); } bsw.println(); }else{ bsw.println(line); } }else if(bswInvalid!=null){ if(shrinkNames){ for(int i=0; i<terminal; i++){ bswInvalid.print(line[i]); } bswInvalid.println(); }else{ bswInvalid.println(line); } } } }else{ basesProcessed+=line.length; if(valid || keepAll){ if(valid){validBases+=line.length;} else{invalidBases+=line.length;} bsw.println(line); }else{ invalidBases+=line.length; if(bswInvalid!=null){ bswInvalid.println(line); } } } line=bf.nextLine(); } errorState|=bf.close(); } private static boolean looksLikeRealAccession(byte[] line){ int space=Tools.indexOf(line, ' '); if(space<0){space=line.length;} if(space>18 || space<4){return false;} //... hmm... this is a pretty short list for false cases! int dot=-1; for(int i=0; i<space; i++){ if(line[i]=='.'){ if(dot>=0){return false;}//Only 1 dot allowed dot=i; } } if(dot>0){ if(dot!=space-2){return false;} } for(int i=0; i<space; i++){ byte b=line[i]; if(b!='_' && b!='-' && b!='.' && !Tools.isLetterOrDigit(b)){return false;} } return true; } void appendHeaderLine(byte[] line, ByteBuilder bb){ assert(line[0]=='>' || line[0]=='@') : new String(line); if(mode==ACCESSION_MODE){ for(int i=1; i<line.length; i++){ byte b=line[i]; if(b==' ' || b=='.'){break;} else{bb.append(b);} } }else if(mode==GI_MODE){ for(int i=1; i<line.length; i++){ byte b=line[i]; if(b==' ' || b=='|'){break;} else{bb.append(b);} } }else if(mode==UNITE_MODE){ int initial=Tools.indexOf(line, '|'); for(int i=initial+1; i<line.length; i++){ byte b=line[i]; if(b==' ' || b=='.' || b=='|'){break;} else{bb.append(b);} } }else{ for(int i=1; i<line.length; i++){ byte b=line[i]; bb.append(b); } } bb.append(','); } private void updateHeadersFromServer(ArrayList<byte[]> lines, HashArray1D counts, ByteStreamWriter bswBadHeaders, int format){ if(format==FileFormat.FA){ updateHeadersFromServer_fasta(lines, counts, bswBadHeaders); }else if(format==FileFormat.GFF){ updateHeadersFromServer_gff(lines, counts, bswBadHeaders); }else{ assert(false) : "Unsupported type: "+format; } } private void updateHeadersFromServer_fasta(ArrayList<byte[]> lines, HashArray1D counts, ByteStreamWriter bswBadHeaders){ ByteBuilder bb=new ByteBuilder(); ArrayList<String> names=new ArrayList<String>(); for(byte[] line : lines){ if(line[0]=='>' && !Tools.startsWith(line, ">tid")){ appendHeaderLine(line, bb); if(mode==UNITE_MODE){ int bar=Tools.indexOf(line, '|'); names.add(new String(line, 1, bar-1)); } } } if(bb.length()<1){return;} assert(bb.endsWith(',')); bb.length--; // System.err.println("Sending '"+bb+"'"); final int[] serverIds; if(mode==ACCESSION_MODE || mode==UNITE_MODE){ serverIds=TaxClient.accessionToTaxidArray(bb.toString()); }else if(mode==GI_MODE){ serverIds=TaxClient.giToTaxidArray(bb.toString()); }else{ serverIds=TaxClient.headerToTaxidArray(bb.toString()); } assert(serverIds!=null) : "Null response for '"+bb.toString()+"'"; bb.clear(); if(!names.isEmpty()){ assert(tree!=null) : "Need to load a TaxTree."; assert(names.size()==serverIds.length); for(int i=0; i<serverIds.length; i++){ final String name=names.get(i); if(serverIds[i]<0){ TaxNode tn=tree.getNodeByName(name); if(tn!=null){serverIds[i]=tn.id;} // else { // assert(false) : names.get(i); // } }else{ //Sometimes the species gets renamed. // TaxNode tn=tree.getNodeByName(name); // if(tn==null || tn.id==serverIds[i]) {System.err.println(name+", "+serverIds[i]+", "+tn+", "+tree.getNodesByName(name));} } } } for(int lineNum=0, serverNum=0; lineNum<=lines.size(); lineNum++){ byte[] line=lines.get(lineNum); if(line[0]=='>' && !Tools.startsWith(line, ">tid")){ bb.clear(); final int tid=serverIds[serverNum]; if(tid<0){ //WARN if(bswBadHeaders!=null){ bswBadHeaders.print(tid).tab(); bswBadHeaders.print(looksLikeRealAccession(line)).tab(); bswBadHeaders.println(line); }else if(warnBadHeaders){ System.err.println(tid+"\t"+looksLikeRealAccession(line)+"\t"+new String(line)); } } int initial=1, terminal=line.length; if(shrinkNames){//This is for nr/nt for(int i=initial; i<terminal; i++){ if(line[i]==1){//SOH terminal=i; } } } bb.append(title); bb.append(tid); if(prefix){ bb.append('|'); for(int i=initial; i<terminal; i++){ bb.append(line[i]); } }else if(counts!=null && tid>=0){ bb.append('|'); int count=counts.increment(tid, 1); bb.append(count); if(count==1){taxaCounted++;} } lines.set(lineNum, bb.toBytes()); serverNum++; if(serverNum>=serverIds.length){break;} } } if(maxInvalidHeaders>=0 && invalidReads>maxInvalidHeaders){ KillSwitch.kill("Maximum bad headers exceeded: "+maxInvalidHeaders); } } private void updateHeadersFromServer_gff(ArrayList<byte[]> lines, HashArray1D counts, ByteStreamWriter bswBadHeaders){ ByteBuilder bb=new ByteBuilder(); ArrayList<String> names=new ArrayList<String>(); for(byte[] line : lines){ if(line[0]!='#' && !Tools.startsWith(line, "tid")){ if(bb.length()>0){bb.append(',');} for(byte b : line){ if(b=='\t'){break;} bb.append(b); } } } if(bb.length()<1){return;} // assert(false) : bb; // System.err.println("Sending '"+bb+"'"); int[] serverIds; if(mode==ACCESSION_MODE || mode==UNITE_MODE){ serverIds=TaxClient.accessionToTaxidArray(bb.toString()); }else if(mode==GI_MODE){ serverIds=TaxClient.giToTaxidArray(bb.toString()); }else{ serverIds=TaxClient.headerToTaxidArray(bb.toString()); } if(serverIds==null){ KillSwitch.kill("Null response for '"+bb.toString()+"'"); } // assert(serverIds!=null) : "Null response for '"+bb.toString()+"'"; bb.clear(); if(!names.isEmpty()){ assert(tree!=null) : "Need to load a TaxTree."; assert(names.size()==serverIds.length); for(int i=0; i<serverIds.length; i++){ final String name=names.get(i); if(serverIds[i]<0){ TaxNode tn=tree.getNodeByName(name); if(tn!=null){serverIds[i]=tn.id;} // else { // assert(false) : names.get(i); // } }else{ //Sometimes the species gets renamed. // TaxNode tn=tree.getNodeByName(name); // if(tn==null || tn.id==serverIds[i]) {System.err.println(name+", "+serverIds[i]+", "+tn+", "+tree.getNodesByName(name));} } } } for(int lineNum=0, serverNum=0; lineNum<=lines.size(); lineNum++){ byte[] line=lines.get(lineNum); if(line[0]!='#' && !Tools.startsWith(line, "tid")){ bb.clear(); final int tid=serverIds[serverNum]; if(tid<0){ //WARN if(bswBadHeaders!=null){ bswBadHeaders.print(tid).tab(); bswBadHeaders.print(looksLikeRealAccession(line)).tab(); bswBadHeaders.println(line); }else if(warnBadHeaders){ System.err.println(tid+"\t"+looksLikeRealAccession(line)+"\t"+new String(line)); } } bb.append("tid|"); bb.append(tid); if(prefix){ bb.append('|'); bb.append(line); }else if(counts!=null && tid>=0){ bb.append('|'); int count=counts.increment(tid, 1); bb.append(count); if(count==1){taxaCounted++;} } lines.set(lineNum, bb.toBytes()); serverNum++; if(serverNum>=serverIds.length){break;} } } if(maxInvalidHeaders>=0 && invalidReads>maxInvalidHeaders){ KillSwitch.kill("Maximum bad headers exceeded: "+maxInvalidHeaders); } } private void processInner_server(ByteFile bf, ByteStreamWriter bsw, ByteStreamWriter bswInvalid, ByteStreamWriter bswBadHeaders, HashArray1D counts, int format){ ArrayList<byte[]> lines=new ArrayList<byte[]>(); byte[] line=bf.nextLine(); boolean valid=false; long storedBytes=0; while(line!=null){ if(line.length>0){ linesIn++; lines.add(line); storedBytes+=line.length; if(storedBytes>=maxStoredBytes){ updateHeadersFromServer(lines, counts, bswBadHeaders, format); valid=dumpBuffer(lines, valid, bsw, bswInvalid); lines=new ArrayList<byte[]>(); storedBytes=0; if(deleteInvalid && invalidReads>0){ System.err.println("Invalid sequence detected; aborting.\n" + "Input file: \t"+bf.name()+"\n" + "Output file: \t"+(bsw==null ? "null" : bsw.fname)+"\n" + "Line: \t"+new String(line)+"\n"); break; } } } line=bf.nextLine(); } if(storedBytes>0){ updateHeadersFromServer(lines, counts, bswBadHeaders, format); valid=dumpBuffer(lines, valid, bsw, bswInvalid); lines=new ArrayList<byte[]>(); storedBytes=0; } errorState|=bf.close(); } private boolean dumpBuffer(ArrayList<byte[]> lines, boolean valid, ByteStreamWriter bsw, ByteStreamWriter bswInvalid){ for(byte[] line : lines){ if(line.length>0 && line[0]=='>'){ readsProcessed++; if(maxReads>0 && readsProcessed>maxReads){break;} if(Tools.startsWith(line, invalidTitle)){ valid=false; invalidReads++; invalidLines++; if(deleteInvalid){break;} }else{ assert(Tools.startsWith(line, title)); valid=true; validReads++; validLines++; } }else if(gffIn){ basesProcessed+=line.length; valid=!Tools.startsWith(line, invalidGffTitle); if(valid){ validBases+=line.length; validLines++; }else{ invalidBases+=line.length; invalidLines++; } }else{ basesProcessed+=line.length; if(valid){ validBases+=line.length; validLines++; }else{ invalidBases+=line.length; invalidLines++; } } if(valid || keepAll){ if(bsw!=null){bsw.println(line);} }else{ if(bswInvalid!=null){bswInvalid.println(line);} } } return valid; } /*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/ private LinkedHashSet<String> in1=new LinkedHashSet<String>(); private String out1=null; private String outInvalid=null; private String badHeaders=null; private String taxTreeFile=null; private String giTableFile=null; private String accessionFile=null; private String patternFile=null; /*--------------------------------------------------------------*/ private long maxReads=-1; private long validReads=0; private long validBases=0; private long invalidReads=0; private long invalidBases=0; private long taxaCounted=0; private long linesIn=0; private long validLines=0; private long invalidLines=0; private long maxStoredBytes=10000000; private long readsProcessed=0, basesProcessed=0; private boolean prefix=true; private boolean countTable=true; private boolean keepAll=true; private boolean shrinkNames=false; private boolean warnBadHeaders=true; private boolean useServer=false; /** Crash if the number of invalid headers exceeds this */ private long maxInvalidHeaders=-1; /** Delete the output file if there are any invalid headers */ private boolean deleteInvalid=false; private int mode; private static final int ACCESSION_MODE=0, GI_MODE=1, HEADER_MODE=2, UNITE_MODE=3; private boolean gffIn=false; /*--------------------------------------------------------------*/ private final ArrayList<FileFormat> ffin1; private final FileFormat ffout1; private final FileFormat ffoutInvalid; private final TaxTree tree; /*--------------------------------------------------------------*/ private PrintStream outstream=System.err; public static boolean verbose=false; public boolean errorState=false; private boolean overwrite=false; private boolean append=false; private static byte[] title=">tid|".getBytes(); private static byte[] invalidTitle=">tid|-1".getBytes(); private static byte[] invalidGffTitle="tid|-1".getBytes(); }