jpayne@68: package tax; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Arrays; jpayne@68: import java.util.Collections; jpayne@68: import java.util.HashMap; jpayne@68: import java.util.Locale; jpayne@68: import java.util.Map.Entry; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteFile1; jpayne@68: import fileIO.ByteFile2; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import fileIO.TextFile; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.ConcurrentGenericReadInputStream; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.ListNum; jpayne@68: import structures.StringNum; jpayne@68: import template.Accumulator; jpayne@68: import template.ThreadWaiter; jpayne@68: jpayne@68: /** jpayne@68: * Counts patterns in Accessions. jpayne@68: * Handles hashing for Accession to TaxID lookups. jpayne@68: * @author Brian Bushnell jpayne@68: * @date May 9, 2018 jpayne@68: * jpayne@68: */ jpayne@68: public class AnalyzeAccession implements Accumulator { jpayne@68: jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: AnalyzeAccession x=new AnalyzeAccession(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: public AnalyzeAccession(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: jpayne@68: Parser parser=new Parser(); jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: ByteFile1.verbose=verbose; jpayne@68: ByteFile2.verbose=verbose; jpayne@68: stream.FastaReadInputStream.verbose=verbose; jpayne@68: ConcurrentGenericReadInputStream.verbose=verbose; jpayne@68: stream.FastqReadInputStream.verbose=verbose; jpayne@68: ReadWrite.verbose=verbose; jpayne@68: }else if(a.equals("in")){ jpayne@68: if(b==null){in.clear();} jpayne@68: else{ jpayne@68: String[] split2=b.split(","); jpayne@68: for(String s2 : split2){ jpayne@68: in.add(s2); jpayne@68: } jpayne@68: } jpayne@68: }else if(a.equals("perfile")){ jpayne@68: perFile=Parse.parseBoolean(b); jpayne@68: }else if(b==null && new File(arg).exists()){ jpayne@68: in.add(arg); jpayne@68: }else if(parser.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: // throw new RuntimeException("Unknown parameter "+args[i]); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: {//Process parser fields jpayne@68: overwrite=parser.overwrite; jpayne@68: append=parser.append; jpayne@68: jpayne@68: out=parser.out1; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: jpayne@68: if(in==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: jpayne@68: // if(!ByteFile.FORCE_MODE_BF2){ jpayne@68: // ByteFile.FORCE_MODE_BF2=false; jpayne@68: // ByteFile.FORCE_MODE_BF1=true; jpayne@68: // } jpayne@68: jpayne@68: if(out!=null && out.equalsIgnoreCase("null")){out=null;} jpayne@68: jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, out)){ jpayne@68: outstream.println((out==null)+", "+out); jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out+"\n"); jpayne@68: } jpayne@68: jpayne@68: ffout=FileFormat.testOutput(out, FileFormat.TXT, null, true, overwrite, append, false); jpayne@68: ffina=new FileFormat[in.size()]; jpayne@68: for(int i=0; i list=new ArrayList(); jpayne@68: list.addAll(countMap.values()); jpayne@68: Collections.sort(list); jpayne@68: Collections.reverse(list); jpayne@68: for(StringNum sn : list){ jpayne@68: double combos=1; jpayne@68: for(int i=0; i alpt=new ArrayList(threads); jpayne@68: for(int i=0; i> perFileList=new ArrayList>(ffina.length); jpayne@68: for(FileFormat ffin : ffina) { jpayne@68: ByteFile bf=ByteFile.makeByteFile(ffin); jpayne@68: jpayne@68: final int threads=Tools.min(16, Shared.threads()); jpayne@68: ArrayList alpt=new ArrayList(threads); jpayne@68: for(int i=0; i alpt : perFileList){ jpayne@68: boolean success=ThreadWaiter.waitForThreads(alpt, this); jpayne@68: errorState|=!success; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: static class ProcessThread extends Thread { jpayne@68: jpayne@68: ProcessThread(ByteFile bf_){ jpayne@68: bf=bf_; jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public void run() { jpayne@68: final StringBuilder buffer=new StringBuilder(128); jpayne@68: for(ListNum lines=bf.nextList(); lines!=null; lines=bf.nextList()){ jpayne@68: assert(lines.size()>0); jpayne@68: if(lines.id==0){ jpayne@68: //This one is not really important; the header could be missing. jpayne@68: assert(Tools.startsWith(lines.get(0), "accession")) : bf.name()+"[0]: "+new String(lines.get(0)); jpayne@68: }else{ jpayne@68: assert(!Tools.startsWith(lines.get(0), "accession")) : bf.name()+"["+lines.id+"]: "+new String(lines.get(0)); jpayne@68: } jpayne@68: for(byte[] line : lines){ jpayne@68: if(line.length>0){ jpayne@68: linesProcessedT++; jpayne@68: bytesProcessedT+=(line.length+1); jpayne@68: jpayne@68: boolean valid=lines.id>0 || !(Tools.startsWith(line, "accession")); //Skips test for most lines jpayne@68: jpayne@68: if(valid){ jpayne@68: linesOutT++; jpayne@68: increment(line, buffer); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: void increment(byte[] line, StringBuilder buffer){ jpayne@68: buffer.setLength(0); jpayne@68: for(int i=0; i countMapT=new HashMap(); jpayne@68: private final ByteFile bf; jpayne@68: long linesProcessedT=0; jpayne@68: long linesOutT=0; jpayne@68: long bytesProcessedT=0; jpayne@68: jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: @Override jpayne@68: public void accumulate(ProcessThread t) { jpayne@68: linesProcessed+=t.linesProcessedT; jpayne@68: linesOut+=t.linesOutT; jpayne@68: bytesProcessed+=t.bytesProcessedT; jpayne@68: for(Entry e : t.countMapT.entrySet()){ jpayne@68: StringNum value=e.getValue(); jpayne@68: final String key=e.getKey(); jpayne@68: StringNum old=countMap.get(key); jpayne@68: if(old==null){countMap.put(key, value);} jpayne@68: else{old.add(value);} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public boolean success() { jpayne@68: return !errorState; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public static long combos(String s){ jpayne@68: double combos=1; jpayne@68: for(int i=0; i=Long.MAX_VALUE ? Long.MAX_VALUE : (long)Math.ceil(combos)); jpayne@68: } jpayne@68: jpayne@68: public static long combos(byte[] s){ jpayne@68: double combos=1; jpayne@68: for(int i=0; i=Long.MAX_VALUE ? -1 : (long)Math.ceil(combos)); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public static HashMap loadCodeMap(String fname){ jpayne@68: assert(codeMap==null); jpayne@68: TextFile tf=new TextFile(fname); jpayne@68: ArrayList list=new ArrayList(); jpayne@68: for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ jpayne@68: if(!line.startsWith("#")){ jpayne@68: String[] split=line.split("\t"); jpayne@68: list.add(split[0]); jpayne@68: } jpayne@68: } jpayne@68: HashMap map=new HashMap(list.size()*3); jpayne@68: codeBits=(int)Math.ceil(Tools.log2(list.size())); jpayne@68: final int patternBits=63-codeBits; jpayne@68: final long maxCombos=((1L<<(patternBits-1))-1); jpayne@68: for(int i=0; i=maxCombos){map.put(s, -1);} jpayne@68: else{map.put(s, i);} jpayne@68: } jpayne@68: codeMap=map; jpayne@68: return map; jpayne@68: } jpayne@68: jpayne@68: public static long digitize(String s){ jpayne@68: String pattern=remap(s); jpayne@68: Integer code=codeMap.get(pattern); jpayne@68: if(code==null){return -2;} jpayne@68: if(code.intValue()<0){return -1;} jpayne@68: jpayne@68: long number=0; jpayne@68: for(int i=0; i in=new ArrayList(); jpayne@68: private String out=null; jpayne@68: private boolean perFile=true; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private HashMap countMap=new HashMap(); jpayne@68: public static HashMap codeMap; jpayne@68: private static int codeBits=-1; jpayne@68: private static int longestPattern=-1; jpayne@68: jpayne@68: private long linesProcessed=0; jpayne@68: private long linesOut=0; jpayne@68: private long bytesProcessed=0; jpayne@68: private long bytesOut=0; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private final FileFormat[] ffina; jpayne@68: private final FileFormat ffout; jpayne@68: jpayne@68: private static final byte[] remap=makeRemap(); jpayne@68: jpayne@68: private static byte[] makeRemap(){ jpayne@68: byte[] array=new byte[128]; jpayne@68: Arrays.fill(array, (byte)'?'); jpayne@68: for(int i='A'; i<='Z'; i++){array[i]='L';} jpayne@68: for(int i='a'; i<='z'; i++){array[i]='L';} jpayne@68: for(int i='0'; i<='9'; i++){array[i]='D';} jpayne@68: array['_']=array['-']='-'; jpayne@68: return array; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private PrintStream outstream=System.err; jpayne@68: public static boolean verbose=false; jpayne@68: public boolean errorState=false; jpayne@68: private boolean overwrite=false; jpayne@68: private boolean append=false; jpayne@68: jpayne@68: }