jpayne@68: package tax; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Arrays; jpayne@68: import java.util.Collections; jpayne@68: import java.util.HashMap; jpayne@68: import java.util.concurrent.atomic.AtomicLongArray; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteFile1; jpayne@68: import fileIO.ByteFile2; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import kmer.HashBuffer; jpayne@68: import kmer.KmerTableSet; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.ConcurrentGenericReadInputStream; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import structures.StringNum; jpayne@68: jpayne@68: /** jpayne@68: * New version loads with multiple threads per input file. jpayne@68: * @author Brian Bushnell jpayne@68: * @date December 16, 2016 jpayne@68: * jpayne@68: */ jpayne@68: public class AccessionToTaxid { jpayne@68: jpayne@68: public static void load(String files){ jpayne@68: final boolean oldBf2=ByteFile.FORCE_MODE_BF2; jpayne@68: final boolean oldBf1=ByteFile.FORCE_MODE_BF1; jpayne@68: final boolean oldUnpigz=ReadWrite.USE_UNPIGZ; jpayne@68: final boolean oldGunzip=ReadWrite.USE_UNPIGZ; jpayne@68: jpayne@68: main(new String[] {"in="+files, "unpigz="+ReadWrite.USE_UNPIGZ, "gunzip="+ReadWrite.USE_GUNZIP}); jpayne@68: jpayne@68: ByteFile.FORCE_MODE_BF2=oldBf2; jpayne@68: ByteFile.FORCE_MODE_BF1=oldBf1; jpayne@68: ReadWrite.USE_UNPIGZ=oldUnpigz; jpayne@68: ReadWrite.USE_UNPIGZ=oldGunzip; jpayne@68: } jpayne@68: jpayne@68: public static void main(String[] args){ jpayne@68: Timer t=new Timer(); jpayne@68: AccessionToTaxid x=new AccessionToTaxid(args); jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: public AccessionToTaxid(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: ReadWrite.USE_UNPIGZ=true; jpayne@68: jpayne@68: Parser parser=new Parser(); jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: ByteFile1.verbose=verbose; jpayne@68: ByteFile2.verbose=verbose; jpayne@68: stream.FastaReadInputStream.verbose=verbose; jpayne@68: ConcurrentGenericReadInputStream.verbose=verbose; jpayne@68: stream.FastqReadInputStream.verbose=verbose; jpayne@68: ReadWrite.verbose=verbose; jpayne@68: }else if(a.equals("stripunderscore")){ jpayne@68: // STRIP_UNDERSCORE=Parse.parseBoolean(b); jpayne@68: assert(false) : "stripunderscore is disabled."; jpayne@68: }else if(a.equals("usetables")){ jpayne@68: // USE_TABLES=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("usetables")){ jpayne@68: // USE_TABLES=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("skipparse")){ jpayne@68: skipParse=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("skiphash")){ jpayne@68: skipHash=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("prealloc")){ jpayne@68: if(b==null || Character.isLetter(b.charAt(0))){ jpayne@68: if(Parse.parseBoolean(b)){ jpayne@68: prealloc=0.78f; jpayne@68: }else{ jpayne@68: prealloc=0; jpayne@68: } jpayne@68: }else{ jpayne@68: prealloc=Float.parseFloat(b); jpayne@68: } jpayne@68: }else if(a.equals("maxpigzprocesses")){ jpayne@68: maxPigzProcesses=Integer.parseInt(b); jpayne@68: }else if(a.equals("in")){ jpayne@68: assert(b!=null) : "Bad parameter: "+arg; jpayne@68: String[] temp=b.split(","); jpayne@68: for(String s : temp){in.add(s);} jpayne@68: }else if(parser.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(b==null){ jpayne@68: if(new File(arg).exists()){ jpayne@68: in.add(arg); jpayne@68: } jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: {//Process parser fields jpayne@68: overwrite=parser.overwrite; jpayne@68: jpayne@68: // out=parser.out1; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: jpayne@68: if(in==null || in.size()==0){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: jpayne@68: if(ReadWrite.USE_UNPIGZ && !ByteFile.FORCE_MODE_BF2){ jpayne@68: ByteFile.FORCE_MODE_BF2=false; jpayne@68: ByteFile.FORCE_MODE_BF1=true; jpayne@68: } jpayne@68: jpayne@68: // if(out!=null && out.equalsIgnoreCase("null")){out=null;} jpayne@68: jpayne@68: // if(!Tools.testOutputFiles(overwrite, false, false, out)){ jpayne@68: // outstream.println((out==null)+", "+out); jpayne@68: // throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n"); jpayne@68: // } jpayne@68: jpayne@68: {//Reorder by size, ascending jpayne@68: ArrayList list=new ArrayList(); jpayne@68: for(String s : in){ jpayne@68: list.add(new StringNum(s, new File(s).length())); jpayne@68: } jpayne@68: Collections.sort(list); jpayne@68: in.clear(); jpayne@68: for(StringNum sn : list){ jpayne@68: in.add(sn.s); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: // ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, false, false); jpayne@68: ffin=new FileFormat[in.size()]; jpayne@68: jpayne@68: /* Note */ jpayne@68: /* Java 1.7 works fine here (54 seconds skipping parsing). */ jpayne@68: /* Java 1.8 has immense speed-downs if pigz is used (80-100s normally, >1000s with unpigz). */ jpayne@68: /* Java 1.8_144 is unpredictable and incredibly slow (80-900s normally, 500-1800 with unpigz) */ jpayne@68: jpayne@68: int processes=0; jpayne@68: for(int i=0; imaxPigzProcesses){ jpayne@68: processes++; jpayne@68: // if(processes>maxPigzProcesses){ jpayne@68: ff=FileFormat.testInput(s, FileFormat.TXT, null, false, false); jpayne@68: // } jpayne@68: } jpayne@68: ffin[i]=ff; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: @SuppressWarnings("unchecked") jpayne@68: void process(Timer t){ jpayne@68: jpayne@68: // if(USE_MAPS){ jpayne@68: assert(maps==null); jpayne@68: maps=new HashMap[128]; jpayne@68: for(int i=0; i(); jpayne@68: } jpayne@68: // } jpayne@68: jpayne@68: assert(tables==null); jpayne@68: if(USE_TABLES){ jpayne@68: tables=new KmerTableSet(new String[] {"ways=31",("prealloc="+(prealloc>0 ? prealloc : "f"))}, 12); jpayne@68: tables.allocateTables(); jpayne@68: } jpayne@68: jpayne@68: if(ffin.length>4){//Addresses a multithreaded read bug in Java jpayne@68: // FileFormat[] ffa1=Arrays.copyOf(ffin, 2); jpayne@68: // FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length); jpayne@68: // spawnThreads(ffa1); jpayne@68: // spawnThreads(ffa2); jpayne@68: jpayne@68: FileFormat[] ffa1=Arrays.copyOf(ffin, 2); jpayne@68: FileFormat[] ffa2=Arrays.copyOfRange(ffin, 2, ffin.length); jpayne@68: spawnThreads(ffa1, 2); jpayne@68: spawnThreads(ffa2, 200); jpayne@68: }else{ jpayne@68: spawnThreads(ffin, 200); jpayne@68: } jpayne@68: jpayne@68: //Do anything necessary after processing jpayne@68: System.gc(); jpayne@68: jpayne@68: t.stop(); jpayne@68: outstream.println(Tools.timeLinesBytesProcessed(t, linesProcessed, bytesProcessed, 8)); jpayne@68: jpayne@68: outstream.println(); jpayne@68: outstream.println("Valid Lines: \t"+linesValid); jpayne@68: outstream.println("Invalid Lines: \t"+(linesProcessed-linesValid)); jpayne@68: jpayne@68: if(lengthCounts!=null){ jpayne@68: outstream.println(); jpayne@68: outstream.println("Length counts:"); jpayne@68: jpayne@68: for(int i=0; i0){outstream.println(i+"\t"+count);} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(symbolCounts!=null){ jpayne@68: outstream.println(); jpayne@68: outstream.println("Symbols:"); jpayne@68: jpayne@68: String comma=""; jpayne@68: for(int i=0; i0){ jpayne@68: outstream.print(comma+i); jpayne@68: comma=","; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(counts_underscore!=null){ jpayne@68: outstream.println(); jpayne@68: outstream.println("Length_underscore counts:"); jpayne@68: jpayne@68: for(int i=0; i0){outstream.println(i+"\t"+count);} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(counts_underscore2!=null){ jpayne@68: outstream.println(); jpayne@68: outstream.println("Length_underscore2 counts:"); jpayne@68: jpayne@68: for(int i=0; i0){outstream.println(i+"\t"+count);} jpayne@68: } jpayne@68: } jpayne@68: outstream.println(); jpayne@68: Shared.printMemory(); jpayne@68: jpayne@68: if(errorState){ jpayne@68: throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); jpayne@68: } jpayne@68: jpayne@68: LOADED=true; jpayne@68: } jpayne@68: jpayne@68: /** Spawn process threads */ jpayne@68: private void spawnThreads(FileFormat[] ffa, int threadLimit){ jpayne@68: jpayne@68: //Do anything necessary prior to processing jpayne@68: Tools.reverseInPlace(ffa, 0, ffa.length); jpayne@68: jpayne@68: //Fill a list with ProcessThreads jpayne@68: ArrayList albf=new ArrayList(ffa.length); jpayne@68: for(FileFormat ff : ffa){ jpayne@68: if(ff!=null){ jpayne@68: System.err.println("Loading "+ff.name()); jpayne@68: ByteFile bf=ByteFile.makeByteFile(ff, 1); jpayne@68: albf.add(bf); jpayne@68: } jpayne@68: } jpayne@68: final int threads=Tools.min(threadLimit, Tools.max(albf.size(), Shared.threads())); jpayne@68: ArrayList alht=new ArrayList(threads); jpayne@68: jpayne@68: for(int i=0; iAnalyzeAccession.longestPattern){return false;} jpayne@68: final long number=AnalyzeAccession.digitize(accession); jpayne@68: if(number>=0){ jpayne@68: int value=tables.getCount(number); jpayne@68: return value<0 ? -1 : value; jpayne@68: } jpayne@68: }else if(len<=12){ jpayne@68: long number=hash(accession); jpayne@68: jpayne@68: int value=tables.getCount(number); jpayne@68: return value<1 ? -1 : value; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(len='0' && c<='9') || (c>='A' && c<='Z') /*|| (c>='a' && c<='z')*/ jpayne@68: || c=='.' || c=='_' || c=='-' || c==':' || c==','){ jpayne@68: //do nothing jpayne@68: }else{ jpayne@68: return false; jpayne@68: } jpayne@68: } jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: static long hash(String accession){ jpayne@68: long number=0; jpayne@68: for(int i=0, max=accession.length(); i='0' && c<='9'){c=c-'0';} jpayne@68: else if(c>='A' && c<='Z'){c=c+offset;} jpayne@68: else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK jpayne@68: else if(c>='a' && c<='z'){c=c+offsetLower;} jpayne@68: else{ jpayne@68: assert(false) : accession; jpayne@68: } jpayne@68: number=(number*37)+c; jpayne@68: } jpayne@68: return number; jpayne@68: } jpayne@68: jpayne@68: static long hash(final byte[] line, final int limit){ jpayne@68: long number=0; jpayne@68: for(int i=0; i='0' && c<='9'){c=c-'0';} jpayne@68: else if(c>='A' && c<='Z'){c=c+offset;} jpayne@68: else if(c=='_' || c=='-'){c=10;}//Collision, but should be OK jpayne@68: else if(c>='a' && c<='z'){c=c+offsetLower;} jpayne@68: else{ jpayne@68: assert(false) : new String(line); jpayne@68: } jpayne@68: number=(number*37)+c; jpayne@68: } jpayne@68: return number; jpayne@68: } jpayne@68: jpayne@68: public static int parseLineToTaxid(final byte[] line, final byte delimiter){ jpayne@68: int a=0, b=0; jpayne@68: jpayne@68: final int ncbi; jpayne@68: jpayne@68: while(ba) : "Missing field 0: "+new String(line); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: while(ba) : "Missing field 1: "+new String(line); jpayne@68: assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b; jpayne@68: //accession2=new String(line, a, b-a); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: while(ba) : "Missing field 2: "+new String(line); jpayne@68: ncbi=Parse.parseInt(line, a, b); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: return ncbi; jpayne@68: } jpayne@68: jpayne@68: public static int parseLineToTaxid_2col(final byte[] line, final byte delimiter){ jpayne@68: int a=0, b=0; jpayne@68: jpayne@68: final int ncbi; jpayne@68: jpayne@68: while(ba) : "Missing field 0: "+new String(line); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: while(ba) : "Missing field 1: "+new String(line); jpayne@68: ncbi=Parse.parseInt(line, a, b); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: return ncbi; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public static class HashThread extends Thread { jpayne@68: jpayne@68: @SuppressWarnings("unchecked") jpayne@68: public HashThread(ByteFile bf_){ jpayne@68: // if(USE_MAPS){ jpayne@68: mapsT=new HashMap[128]; jpayne@68: for(int i=0; i(); jpayne@68: } jpayne@68: // } jpayne@68: if(USE_TABLES){ jpayne@68: table=new HashBuffer(tables.tables(), 1000, 31, true, true); jpayne@68: } jpayne@68: bf=bf_; jpayne@68: } jpayne@68: jpayne@68: ArrayList fetch(int limit){ jpayne@68: ArrayList list=new ArrayList(limit); jpayne@68: synchronized(bf){ jpayne@68: byte[] line=bf.nextLine(); jpayne@68: // while(line!=null && Tools.startsWith(line, "accession")){line=bf.nextLine();} jpayne@68: if(line==null){return null;} jpayne@68: for(int i=0; line!=null;){ jpayne@68: list.add(line); jpayne@68: i++; jpayne@68: if(i>=limit){break;} jpayne@68: line=bf.nextLine(); jpayne@68: } jpayne@68: } jpayne@68: return list.size()>0 ? list : null; jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public void run(){ jpayne@68: // System.err.println("Processing "+bf.name()); jpayne@68: final int fetchSize=1000; jpayne@68: for(ArrayList list=fetch(fetchSize); list!=null; list=fetch(fetchSize)){ jpayne@68: for(byte[] line : list){ jpayne@68: if(line.length>0){ jpayne@68: linesProcessedT++; jpayne@68: bytesProcessedT+=line.length; jpayne@68: jpayne@68: // final boolean valid=(!Tools.startsWith(line, "accession\t")) & !skipParse; jpayne@68: final boolean valid=(!Tools.startsWith(line, "accession")) & !skipParse; jpayne@68: // assert(valid); //Not true if concatenated jpayne@68: jpayne@68: // if(Tools.startsWith(line, "NZ_LM994619")){ jpayne@68: // boolean b=parseLine2(line, (byte)'\t'); jpayne@68: // assert(false) : b+", "+new String(line); jpayne@68: // } jpayne@68: jpayne@68: if(valid){ jpayne@68: boolean b=parseLine2(line, (byte)'\t'); jpayne@68: if(b){linesValidT++;} jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: // if(USE_MAPS){ jpayne@68: for(int i=0; i0){ jpayne@68: synchronized(maps[i]){ jpayne@68: maps[i].putAll(mapsT[i]); jpayne@68: } jpayne@68: } jpayne@68: mapsT[i]=null; jpayne@68: } jpayne@68: // } jpayne@68: if(USE_TABLES){ jpayne@68: long temp=table.flush(); jpayne@68: } jpayne@68: jpayne@68: success=true; jpayne@68: } jpayne@68: jpayne@68: // public boolean parseLineNumeric(final byte[] line, final byte delimiter){ jpayne@68: // int a=0, b=0; jpayne@68: // jpayne@68: // long accession=0; jpayne@68: // final int ncbi, gi; jpayne@68: // jpayne@68: // while(ba) : "Missing field 0: "+new String(line); jpayne@68: // for(int i=a; ia) : "Missing field 1: "+new String(line); jpayne@68: // //accession2=new String(line, a, b-a); jpayne@68: // b++; jpayne@68: // a=b; jpayne@68: // jpayne@68: // while(ba) : "Missing field 2: "+new String(line); jpayne@68: // ncbi=Parse.parseInt(line, a, b); jpayne@68: // b++; jpayne@68: // a=b; jpayne@68: // jpayne@68: //// while(ba) : "Missing field 3: "+new String(line); jpayne@68: ////// gi=Parse.parseInt(line, a, b); jpayne@68: //// b++; jpayne@68: //// a=b; jpayne@68: // jpayne@68: // if(ncbi<1){return false;} jpayne@68: // jpayne@68: // if(tree!=null){ jpayne@68: // if(ncbi>=tree.nodes.length){return false;} jpayne@68: // TaxNode tn=tree.getNode(ncbi); jpayne@68: // if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE || tn.level==TaxTree.DOMAIN){return false;} jpayne@68: // if(tn.pid>=tree.nodes.length){return false;} jpayne@68: // tn=tree.getNode(tn.pid); jpayne@68: // if(tn==null || tn.level==TaxTree.NO_RANK || tn.level==TaxTree.LIFE){return false;} jpayne@68: // } jpayne@68: // assert(accession>=0) : new String(line); jpayne@68: // table.set(accession, ncbi); jpayne@68: // return true; jpayne@68: // } jpayne@68: jpayne@68: //This code is no longer used and can be safely deleted. jpayne@68: @Deprecated jpayne@68: public boolean parseLine(final byte[] line, final byte delimiter){ jpayne@68: int a=0, b=0; jpayne@68: jpayne@68: String accession; jpayne@68: final int ncbi, gi; jpayne@68: jpayne@68: while(ba) : "Missing field 0: "+new String(line); jpayne@68: accession=new String(line, a, b-a); jpayne@68: final int dot=accession.indexOf('.');//and :, but this is deprecated. jpayne@68: if(dot>=0){//Should never happen jpayne@68: // System.err.println(accession); jpayne@68: // assert(dot==accession.length()-2) : accession; jpayne@68: accession=accession.substring(0, dot); jpayne@68: } jpayne@68: // if(STRIP_UNDERSCORE){ jpayne@68: // accession=accession.replaceAll("[_-]", ""); jpayne@68: // } jpayne@68: if(lengthCountsT!=null){lengthCountsT[b-a]++;} jpayne@68: if(symbolCountsT!=null){ jpayne@68: for(int i=a; i=0){ jpayne@68: if(counts_underscoreT!=null){counts_underscoreT[b-a]++;} jpayne@68: if(counts_underscore2T!=null && underscore==2){counts_underscore2T[b-a]++;} jpayne@68: } jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: while(ba) : "Missing field 1: "+new String(line); jpayne@68: assert(b>=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b; jpayne@68: //accession2=new String(line, a, b-a); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: while(ba) : "Missing field 2: "+new String(line); jpayne@68: ncbi=Parse.parseInt(line, a, b); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: // while(ba) : "Missing field 3: "+new String(line); jpayne@68: //// gi=Parse.parseInt(line, a, b); jpayne@68: // b++; jpayne@68: // a=b; jpayne@68: jpayne@68: if(ncbi<1){return false;} jpayne@68: jpayne@68: if(tree!=null){ jpayne@68: if(ncbi>=tree.nodes.length){return false;} jpayne@68: TaxNode tn=tree.getNode(ncbi); jpayne@68: if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;} jpayne@68: if(tn.pid>=tree.nodes.length){return false;} jpayne@68: tn=tree.getNode(tn.pid); jpayne@68: if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;} jpayne@68: } jpayne@68: jpayne@68: if(accession.length()<13 && USE_TABLES){ jpayne@68: long number=hash(accession); jpayne@68: assert(number>=0) : new String(line); jpayne@68: table.set(number, ncbi); jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: int way=accession.charAt(0); jpayne@68: mapsT[way].put(accession, ncbi); jpayne@68: // Integer old=mapsT[way].put(accession, ncbi); jpayne@68: // assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi; jpayne@68: // System.err.println("'"+accession+"': "+old+" -> "+ncbi); jpayne@68: // assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi; jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: public boolean parseLine2(final byte[] line, final byte delimiter){ jpayne@68: int a=0, b=0; jpayne@68: jpayne@68: final int ncbi, gi; jpayne@68: jpayne@68: while(ba) : "Missing field 0: "+new String(line); jpayne@68: while(b=a) : "Missing field 1: "+new String(line)+"\n"+a+", "+b; jpayne@68: //accession2=new String(line, a, b-a); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: //System.err.println("C: a="+a+", b="+b); jpayne@68: jpayne@68: while(ba) : "Missing field 2: "+new String(line); jpayne@68: ncbi=Parse.parseInt(line, a, b); jpayne@68: //System.err.println("D: a="+a+", b="+b+", ncbi="+ncbi+", '"+(new String(line, a, b-a))+"'"); jpayne@68: b++; jpayne@68: a=b; jpayne@68: jpayne@68: // while(ba) : "Missing field 3: "+new String(line); jpayne@68: //// gi=Parse.parseInt(line, a, b); jpayne@68: // b++; jpayne@68: // a=b; jpayne@68: jpayne@68: if(ncbi<1){return false;} jpayne@68: //System.err.println("E: a="+a+", b="+b); jpayne@68: if(skipHash){return false;}//123 jpayne@68: //System.err.println("F: a="+a+", b="+b); jpayne@68: jpayne@68: if(tree!=null){ jpayne@68: if(ncbi>=tree.nodes.length){return false;} jpayne@68: //System.err.println("G"); jpayne@68: TaxNode tn=tree.getNode(ncbi); jpayne@68: if(tn==null || /*tn.levelExtended==TaxTree.NO_RANK_E ||*/ tn.levelExtended==TaxTree.LIFE_E || tn.levelExtended==TaxTree.DOMAIN_E){return false;} jpayne@68: //System.err.println("H: "+tn); jpayne@68: if(tn.pid>=tree.nodes.length){return false;} jpayne@68: //System.err.println("I: "+tn); jpayne@68: // TaxNode parent=tree.getNode(tn.pid); jpayne@68: // System.err.println("J: "+tn); jpayne@68: // if(tn==null || tn.levelExtended==TaxTree.NO_RANK_E || tn.levelExtended==TaxTree.LIFE_E){return false;} jpayne@68: // System.err.println("K"); jpayne@68: } jpayne@68: jpayne@68: if(distributed){ jpayne@68: String accession=new String(line, 0, dot);//slow jpayne@68: assert(accession.equals(accession.toUpperCase()));//TODO: Disable. (slow) jpayne@68: if(accession.hashCode()%serverCount!=serverNum){return false;} jpayne@68: } jpayne@68: jpayne@68: if(USE_TABLES){ jpayne@68: if(AnalyzeAccession.codeMap!=null){ jpayne@68: // if(dot>AnalyzeAccession.longestPattern){return false;} jpayne@68: final long number=AnalyzeAccession.digitize(line); jpayne@68: if(number>=0){ jpayne@68: table.set(number, ncbi); jpayne@68: return true; jpayne@68: } jpayne@68: assert(number==-1) : number+", "+new String(line); jpayne@68: }else{ jpayne@68: if(dot<13){ jpayne@68: // long number=hash(accession); jpayne@68: final long number=hash(line, dot); jpayne@68: assert(number>=0) : new String(line); jpayne@68: table.set(number, ncbi); jpayne@68: return true; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: String accession=new String(line, 0, dot); jpayne@68: int way=accession.charAt(0); jpayne@68: mapsT[way].put(accession, ncbi); jpayne@68: // Integer old=mapsT[way].put(accession, ncbi); jpayne@68: // assert(old==null || old==ncbi) : "'"+accession+"': "+old+" -> "+ncbi; jpayne@68: // System.err.println("'"+accession+"': "+old+" -> "+ncbi); jpayne@68: // assert(dot==-1) : "'"+accession+"': "+old+" -> "+ncbi; jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: private long linesProcessedT=0; jpayne@68: private long linesValidT=0; jpayne@68: private long bytesProcessedT=0; jpayne@68: jpayne@68: final ByteFile bf; jpayne@68: HashMap[] mapsT; jpayne@68: HashBuffer table; jpayne@68: boolean success=false; jpayne@68: jpayne@68: private long[] lengthCountsT=null;//new AtomicLongArray(20); jpayne@68: private long[] symbolCountsT=null;//new AtomicLongArray(255); jpayne@68: private long[] counts_underscoreT=null;//new AtomicLongArray(20); jpayne@68: private long[] counts_underscore2T=null;//new AtomicLongArray(20); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private ArrayList in=new ArrayList(); jpayne@68: // private String out=null; jpayne@68: jpayne@68: static int maxPigzProcesses=12; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private long linesProcessed=0; jpayne@68: private long linesValid=0; jpayne@68: private long bytesProcessed=0; jpayne@68: jpayne@68: private AtomicLongArray lengthCounts=null;//new AtomicLongArray(20); jpayne@68: private AtomicLongArray symbolCounts=null;//new AtomicLongArray(255); jpayne@68: private AtomicLongArray counts_underscore=null;//new AtomicLongArray(20); jpayne@68: private AtomicLongArray counts_underscore2=null;//new AtomicLongArray(20); jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private final FileFormat ffin[]; jpayne@68: // private final FileFormat ffout; jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public static boolean LOADED(){return LOADED;} jpayne@68: jpayne@68: private static boolean LOADED=false; jpayne@68: private static HashMap[] maps=null; jpayne@68: private static KmerTableSet tables; jpayne@68: public static TaxTree tree=null; jpayne@68: // public static final boolean USE_MAPS=true; jpayne@68: public static final boolean USE_TABLES=true; jpayne@68: // public static boolean STRIP_UNDERSCORE=false; jpayne@68: public static boolean skipParse=false; jpayne@68: public static boolean skipHash=false; jpayne@68: public static float prealloc=0; jpayne@68: private static final long offset=-'A'+11; jpayne@68: private static final long offsetLower=-'a'+11; jpayne@68: jpayne@68: public static int serverNum=0; jpayne@68: public static int serverCount=1; jpayne@68: public static boolean distributed=false; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public boolean errorState=false; jpayne@68: private boolean overwrite=false; jpayne@68: jpayne@68: }