jpayne@68: package sketch; jpayne@68: jpayne@68: import java.io.File; jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Collection; jpayne@68: import java.util.LinkedHashSet; jpayne@68: import java.util.concurrent.ConcurrentHashMap; jpayne@68: import java.util.concurrent.atomic.AtomicInteger; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import kmer.AbstractKmerTableSet; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import structures.ByteBuilder; jpayne@68: import tax.TaxFilter; jpayne@68: import tax.TaxTree; jpayne@68: jpayne@68: /** jpayne@68: * Compares one or more input sketches to a set of reference sketches. jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date July 29, 2016 jpayne@68: * jpayne@68: */ jpayne@68: public class CompareSketch extends SketchObject { jpayne@68: jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: final int oldBufLen=Shared.bufferLen(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: CompareSketch x=new CompareSketch(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: Shared.setBufferLen(oldBufLen); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: jpayne@68: alignerPool.poison(); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public CompareSketch(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, null, false); jpayne@68: // PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: silent=PreParser.silent; jpayne@68: if(silent){AbstractKmerTableSet.DISPLAY_PROGRESS=false;} jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: KILL_OK=true; jpayne@68: TaxFilter.REQUIRE_PRESENT=false; jpayne@68: defaultParams.mode=PER_FILE; jpayne@68: jpayne@68: //Create a parser object jpayne@68: Parser parser=new Parser(); jpayne@68: parser.out1="stdout.txt"; jpayne@68: jpayne@68: //Parse each argument jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("in")){ jpayne@68: addFiles(b, in); jpayne@68: }else if(parseSketchFlags(arg, a, b)){ jpayne@68: //Do nothing jpayne@68: }else if(a.equals("parse_flag_goes_here")){ jpayne@68: long fake_variable=Parse.parseKMG(b); jpayne@68: //Set a variable here jpayne@68: }else if(a.equals("ordered")){ jpayne@68: ordered=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("alltoall") || a.equals("ata")){ jpayne@68: allToAll=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("skipcompare") || a.equals("sketchonly")){ jpayne@68: skipCompare=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("compareself") || a.equals("includeself")){ jpayne@68: compareSelf=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("printmemory")){ jpayne@68: printMemory=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("parsesubunit")){ jpayne@68: SketchMaker.parseSubunit=Parse.parseBoolean(b); jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("taxtree") || a.equals("tree")){ jpayne@68: taxTreeFile=b; jpayne@68: } jpayne@68: jpayne@68: else if(a.equals("name") || a.equals("taxname")){ jpayne@68: outTaxName=b; jpayne@68: }else if(a.equals("name0")){ jpayne@68: outName0=b; jpayne@68: }else if(a.equals("fname")){ jpayne@68: outFname=b; jpayne@68: }else if(a.equals("outsketch") || a.equals("sketchout") || a.equals("outs") || a.equals("sketch")){ jpayne@68: outSketch=b; jpayne@68: }else if(a.equals("files")){ jpayne@68: sketchFiles=Integer.parseInt(b); jpayne@68: }else if(a.equals("taxid") || a.equals("tid")){ jpayne@68: outTaxID=Integer.parseInt(b); jpayne@68: }else if(a.equals("spid")){ jpayne@68: outSpid=Integer.parseInt(b); jpayne@68: }else if(a.equals("imgid")){ jpayne@68: outImgID=Integer.parseInt(b); jpayne@68: }else if((a.startsWith("meta_") || a.startsWith("mt_")) && b!=null){ jpayne@68: if(outMeta==null){outMeta=new ArrayList();} jpayne@68: int underscore=a.indexOf('_', 0); jpayne@68: outMeta.add(a.substring(underscore+1)+":"+b); jpayne@68: } jpayne@68: jpayne@68: else if(searcher.parse(arg, a, b, false)){ jpayne@68: // System.err.println("*"+arg); jpayne@68: parser.parse(arg, a, b); //Catches shared flags like "threads" jpayne@68: Blacklist.parseBlacklist(arg, a, b); //Catches flags like "nt" or "refseq" jpayne@68: } jpayne@68: jpayne@68: else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: } jpayne@68: jpayne@68: else if(searcher.parse(arg, a, b, true)){ jpayne@68: // System.err.println("**"+arg); jpayne@68: //do nothing jpayne@68: } jpayne@68: jpayne@68: else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: } jpayne@68: } jpayne@68: if("auto".equalsIgnoreCase(taxTreeFile)){taxTreeFile=TaxTree.defaultTreeFile();} jpayne@68: jpayne@68: outMeta=SketchObject.fixMeta(outMeta); jpayne@68: SketchObject.postParse(); jpayne@68: jpayne@68: if(skipCompare){ jpayne@68: allToAll=false; jpayne@68: searcher.autoIndex=false; jpayne@68: makeIndex=false; jpayne@68: in.addAll(searcher.refFiles); jpayne@68: searcher.refFiles.clear(); jpayne@68: }else if(in.isEmpty() && args.length>0 && !allToAll){ //Allows first argument to be used as the input file without in= flag jpayne@68: String x=args[0]; jpayne@68: if(x.indexOf('=')<0 && new File(x).exists() && searcher.refFiles.contains(x)){ jpayne@68: searcher.refFiles.remove(x); jpayne@68: in.add(x); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: {//Process parser fields jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: jpayne@68: out=parser.out1; jpayne@68: } jpayne@68: jpayne@68: // assert(false) : in+"\n"+searcher.refFiles; jpayne@68: jpayne@68: if(allToAll){ jpayne@68: LinkedHashSet set=new LinkedHashSet(); jpayne@68: set.addAll(in); jpayne@68: set.addAll(searcher.refFiles); jpayne@68: in.clear(); jpayne@68: searcher.refFiles.clear(); jpayne@68: in.addAll(set); jpayne@68: searcher.refFiles.addAll(set); jpayne@68: } jpayne@68: jpayne@68: //Ensure there is an input file jpayne@68: if(in.isEmpty() && !skipCompare){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: jpayne@68: //Ensure there is an ref file jpayne@68: if(searcher.refFiles.isEmpty() && !skipCompare){ jpayne@68: if(outSketch==null){throw new RuntimeException("Error - at least one reference file is required.");} jpayne@68: } jpayne@68: jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: ffout=FileFormat.testOutput(out, FileFormat.TEXT, null, false, overwrite, append, ordered); jpayne@68: if(!ffout.stdio() && !defaultParams.setColors){defaultParams.printColors=false;} jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: if(!Tools.testInputFiles(false, true, taxTreeFile)){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: if(!Tools.testInputFiles(true, false, in.toArray(new String[0]))){ jpayne@68: if(in.size()==1){ jpayne@68: String s=in.get(0); jpayne@68: String s1=s.replaceFirst("#", "1"), s2=s.replaceFirst("#", "2"); jpayne@68: Tools.testInputFiles(true, false, s1, s2); jpayne@68: }else{ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: // assert(makeIndex || defaultParams.printContam2==false) : "Contam2 requires the flag index=t"; jpayne@68: jpayne@68: SSUMap.load(outstream); jpayne@68: if(taxTreeFile!=null){setTaxtree(taxTreeFile, silent ? null : outstream);} jpayne@68: defaultParams.postParse(true, true); jpayne@68: if(!defaultParams.printSSU){processSSU=false;} jpayne@68: allowMultithreadedFastq=in.size()<2 && !allToAll; jpayne@68: if(!allowMultithreadedFastq){Shared.capBufferLen(40);} jpayne@68: // assert(defaultParams.checkValid()); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public void process(Timer t){ jpayne@68: Timer ttotal=new Timer(); jpayne@68: jpayne@68: t.start(); jpayne@68: jpayne@68: if(!silent){outstream.println("Loading sketches.");} jpayne@68: searcher.makeTool(1, false, defaultParams.mergePairs); jpayne@68: SketchTool tool=new SketchTool(targetSketchSize, defaultParams); jpayne@68: jpayne@68: final int mode2=(defaultParams.mode==PER_FILE ? PER_FILE : PER_TAXA); jpayne@68: if(skipCompare){ jpayne@68: makeIndex=false; jpayne@68: inSketches=tool.loadSketches_MT(defaultParams, in); jpayne@68: }else if(!useWhitelist || allToAll){ jpayne@68: if(allToAll){ jpayne@68: makeIndex=searcher.refFileCount()>0 && (makeIndex || defaultParams.needIndex() || searcher.autoIndex); jpayne@68: searcher.loadReferences(mode2, defaultParams); jpayne@68: inSketches=(ArrayList) searcher.refSketches.clone(); jpayne@68: }else{ jpayne@68: inSketches=tool.loadSketches_MT(defaultParams, in); jpayne@68: jpayne@68: for(Sketch sk : inSketches){ jpayne@68: if(sk.taxID<1 || sk.taxID>=minFakeID || outTaxID>0){sk.taxID=outTaxID;} jpayne@68: if(outSpid>0){sk.spid=outSpid;} jpayne@68: if(outImgID>0){sk.imgID=outImgID;} jpayne@68: if(outTaxName!=null){sk.setTaxName(outTaxName);} jpayne@68: if(outFname!=null){sk.setFname(outFname);} jpayne@68: if(outName0!=null){sk.setName0(outName0);} jpayne@68: if(SketchMaker.parseSubunit && sk.name0()!=null){ jpayne@68: if(outMeta!=null){ jpayne@68: sk.meta=(ArrayList)sk.meta.clone(); jpayne@68: }else if(sk.meta==null){ jpayne@68: if(sk.name0().contains("SSU_")){ jpayne@68: sk.addMeta("subunit:ssu"); jpayne@68: }else if(sk.name0().contains("LSU_")){ jpayne@68: sk.addMeta("subunit:lsu"); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: sk.setMeta(outMeta); jpayne@68: if(defaultParams.printSSU()){sk.loadSSU();}//since taxID was just set jpayne@68: } jpayne@68: jpayne@68: if(outTaxID>0){ jpayne@68: for(Sketch sk : inSketches){ jpayne@68: if(sk.taxID<1 || sk.taxID>=minFakeID){sk.taxID=outTaxID;} jpayne@68: } jpayne@68: } jpayne@68: makeIndex=searcher.refFileCount()>0 && ((searcher.autoIndex && inSketches.size()>8) || defaultParams.needIndex() || (makeIndex && !searcher.autoIndex)); jpayne@68: searcher.loadReferences(mode2, defaultParams); jpayne@68: if(mode2==PER_FILE){ jpayne@68: int max=inSketches.size(); jpayne@68: for(int i=0; i0); //Index is required in whitelist mode. jpayne@68: searcher.loadReferences(mode2, defaultParams); jpayne@68: inSketches=tool.loadSketches_MT(defaultParams, in); jpayne@68: } jpayne@68: jpayne@68: if(outSketch!=null){ jpayne@68: writeSketches(outSketch, sketchFiles); jpayne@68: } jpayne@68: jpayne@68: final int numLoaded=(inSketches.size()+searcher.refSketchCount())/(allToAll ? 2 : 1); jpayne@68: t.stop(); jpayne@68: if(!silent){outstream.println("Loaded "+numLoaded+" sketch"+(numLoaded==1 ? "" : "es")+" in "+t.toString());} jpayne@68: if(printMemory){ jpayne@68: System.gc(); jpayne@68: Shared.printMemory(); jpayne@68: } jpayne@68: jpayne@68: if(skipCompare) { jpayne@68: ttotal.stop("Total Time: \t"); jpayne@68: return; jpayne@68: } jpayne@68: jpayne@68: t.start(); jpayne@68: jpayne@68: jpayne@68: ByteStreamWriter tsw=(ffout==null ? null : new ByteStreamWriter(ffout)); jpayne@68: if(tsw!=null){ jpayne@68: tsw.start(); jpayne@68: if(defaultParams.format==DisplayParams.FORMAT_QUERY_REF_ANI || defaultParams.format==DisplayParams.FORMAT_CONSTELLATION){ jpayne@68: String s=defaultParams.header()+"\n"; jpayne@68: tsw.forcePrint(s.getBytes()); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: boolean success=true; jpayne@68: final int inSize=inSketches.size(); jpayne@68: if(inSize==1 || Shared.threads()<2 || inSize<4){ jpayne@68: ByteBuilder sb=new ByteBuilder(); jpayne@68: success=searcher.compare(inSketches, sb, defaultParams, Shared.threads()); jpayne@68: success&=(!searcher.errorState); jpayne@68: if(tsw!=null){ jpayne@68: sb.append('\n'); jpayne@68: if(ordered){ jpayne@68: tsw.addJob(sb); jpayne@68: }else{ jpayne@68: tsw.println(sb); jpayne@68: } jpayne@68: } jpayne@68: }else{//More sketches than threads, and more than one thread jpayne@68: final int threads=Tools.min(Shared.threads(), inSize); jpayne@68: jpayne@68: ArrayList alct=new ArrayList(threads); jpayne@68: AtomicInteger next=new AtomicInteger(0); jpayne@68: for(int i=0; i=0) : fname; jpayne@68: assert(files>0) : files; jpayne@68: jpayne@68: ByteStreamWriter[] bswa=new ByteStreamWriter[files]; jpayne@68: for(int i=0; i list){ jpayne@68: if(a==null){return;} jpayne@68: File f=null; jpayne@68: if(a.indexOf(',')>=0){f=new File(a);} jpayne@68: if(f==null || f.exists()){ jpayne@68: list.add(a); jpayne@68: }else{ jpayne@68: for(String s : a.split(",")){list.add(s);} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Inner Classes ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private class CompareThread extends Thread { jpayne@68: jpayne@68: CompareThread(final int tid_, final AtomicInteger nextSketch_, ByteStreamWriter tsw_){ jpayne@68: tid=tid_; jpayne@68: nextSketch=nextSketch_; jpayne@68: tsw=tsw_; jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public void run(){ jpayne@68: success=false; jpayne@68: final int inLim=inSketches.size(); jpayne@68: final boolean json=defaultParams.json(); jpayne@68: jpayne@68: for(int inNum=nextSketch.getAndIncrement(); inNum map=new ConcurrentHashMap(101); jpayne@68: final ByteStreamWriter tsw; jpayne@68: jpayne@68: boolean success=false; jpayne@68: jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: private ArrayList in=new ArrayList(); jpayne@68: jpayne@68: private String out="stdout.txt"; jpayne@68: jpayne@68: private String taxTreeFile=null; jpayne@68: jpayne@68: private ArrayList inSketches; jpayne@68: jpayne@68: public final SketchSearcher searcher=new SketchSearcher(); jpayne@68: jpayne@68: private boolean printMemory=false; jpayne@68: private boolean silent=false; jpayne@68: jpayne@68: /*Override metadata */ jpayne@68: private String outTaxName=null; jpayne@68: private String outFname=null; jpayne@68: private String outName0=null; jpayne@68: private String outSketch=null; jpayne@68: private int sketchFiles=1; jpayne@68: private int outTaxID=-1; jpayne@68: private long outSpid=-1; jpayne@68: private long outImgID=-1; jpayne@68: private ArrayList outMeta=null; jpayne@68: private long resultsPrinted=0; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Primary output file */ jpayne@68: private final FileFormat ffout; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Common Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Print status messages to this output stream */ jpayne@68: private PrintStream outstream=System.err; jpayne@68: /** Print verbose messages */ jpayne@68: public static boolean verbose=false; jpayne@68: /** True if an error was encountered */ jpayne@68: public boolean errorState=false; jpayne@68: /** Overwrite existing output files */ jpayne@68: private boolean overwrite=false; jpayne@68: /** Append to existing output files */ jpayne@68: private boolean append=false; jpayne@68: private boolean ordered=true; jpayne@68: jpayne@68: }