jpayne@68: package sketch; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Collections; jpayne@68: import java.util.HashMap; jpayne@68: import java.util.Map.Entry; jpayne@68: import java.util.concurrent.atomic.AtomicInteger; jpayne@68: jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.FASTQ; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import stream.Read; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.FloatList; jpayne@68: import tax.TaxNode; jpayne@68: import tax.TaxTree; jpayne@68: import template.Accumulator; jpayne@68: import template.ThreadWaiter; jpayne@68: jpayne@68: /** jpayne@68: * Compares SSUs, all-to-all or fractional matrix. jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date December 2, 2019 jpayne@68: * jpayne@68: */ jpayne@68: public class CompareSSU implements Accumulator { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: CompareSSU x=new CompareSSU(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public CompareSSU(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: //Set shared static variables prior to parsing jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: jpayne@68: {//Parse the arguments jpayne@68: final Parser parser=parse(args); jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: maxReads=parser.maxReads; jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: jpayne@68: in1=parser.in1; jpayne@68: jpayne@68: out1=parser.out1; jpayne@68: } jpayne@68: jpayne@68: validateParams(); jpayne@68: if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: FASTQ.FORCE_INTERLEAVED=FASTQ.TEST_INTERLEAVED=false; jpayne@68: checkFileExistence(); //Ensure files can be read and written jpayne@68: checkStatics(); //Adjust file-related static fields as needed for this program jpayne@68: jpayne@68: //Create output FileFormat objects jpayne@68: ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, overwrite, append, ordered); jpayne@68: jpayne@68: tree=(treeFile==null) ? null : TaxTree.loadTaxTree(treeFile, outstream, true, false); jpayne@68: jpayne@68: SSUMap.r16SFile=in1; jpayne@68: if(SSUMap.r16SFile!=null){ jpayne@68: SSUMap.load(outstream); jpayne@68: HashMap ssuMap=SSUMap.r16SMap; jpayne@68: ssuList=new ArrayList(ssuMap.size()); jpayne@68: for(Entry e : ssuMap.entrySet()){ jpayne@68: int id=e.getKey(); jpayne@68: byte[] value=e.getValue(); jpayne@68: if(value.length>=minlen && value.length<=maxlen){ jpayne@68: Read r=new Read(value, null, id);//Sets numeric ID to TaxID. jpayne@68: if(maxns<0 || r.countNocalls()<=maxns){ jpayne@68: ssuList.add(r); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: Collections.shuffle(ssuList); jpayne@68: } jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: if(b!=null && b.equalsIgnoreCase("null")){b=null;} jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("tree")){ jpayne@68: treeFile=b; jpayne@68: }else if(a.equals("ordered")){ jpayne@68: ordered=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("ata") || a.equals("alltoall")){ jpayne@68: allToAll=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("store") || a.equals("storeresults")){ jpayne@68: storeResults=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("minlen") || a.equals("maxlength")){ jpayne@68: minlen=Parse.parseIntKMG(b); jpayne@68: }else if(a.equals("maxlen") || a.equals("maxlength")){ jpayne@68: maxlen=Parse.parseIntKMG(b); jpayne@68: }else if(a.equalsIgnoreCase("maxns")){ jpayne@68: maxns=Parse.parseIntKMG(b); jpayne@68: }else if(a.equals("parse_flag_goes_here")){ jpayne@68: long fake_variable=Parse.parseKMG(b); jpayne@68: //Set a variable here jpayne@68: }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: return parser; jpayne@68: } jpayne@68: jpayne@68: /** Ensure files can be read and written */ jpayne@68: private void checkFileExistence(){ jpayne@68: //Ensure output files can be written jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, out1)){ jpayne@68: outstream.println((out1==null)+", "+out1); jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out1+"\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: if(!Tools.testInputFiles(false, true, in1)){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure that no file was specified multiple times jpayne@68: if(!Tools.testForDuplicateFiles(true, in1, out1)){ jpayne@68: throw new RuntimeException("\nSome file names were specified multiple times.\n"); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** Adjust file-related static fields as needed for this program */ jpayne@68: private static void checkStatics(){ jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: } jpayne@68: jpayne@68: /** Ensure parameter ranges are within bounds and required parameters are set */ jpayne@68: private boolean validateParams(){ jpayne@68: return true; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Create read streams and process all data */ jpayne@68: void process(Timer t){ jpayne@68: jpayne@68: ByteStreamWriter bsw=makeBSW(ffout1); jpayne@68: if(bsw!=null){ jpayne@68: bsw.forcePrint("#Level\tIdentity\tQueryID\tRefID\n"); jpayne@68: } jpayne@68: jpayne@68: //Reset counters jpayne@68: queriesProcessed=0; jpayne@68: comparisons=0; jpayne@68: jpayne@68: //Process the reads in separate threads jpayne@68: spawnThreads(bsw); jpayne@68: jpayne@68: if(verbose){outstream.println("Finished; closing streams.");} jpayne@68: jpayne@68: //Close the read streams jpayne@68: if(bsw!=null){errorState|=bsw.poisonAndWait();} jpayne@68: jpayne@68: { jpayne@68: ByteBuilder bb=new ByteBuilder(); jpayne@68: bb.append("\nLevel \tCount\tMean"+(storeResults ? "\tMedian\t90%ile\t10%ile\tSTDev" : "")+"\n"); jpayne@68: outstream.print(bb); jpayne@68: final int minlen="superkingdom".length(); jpayne@68: for(int level=0; level0){ jpayne@68: bb.clear(); jpayne@68: bb.append(TaxTree.levelToStringExtended(level)); jpayne@68: while(bb.length() alpt=new ArrayList(threads); jpayne@68: for(int i=0; i(ssuList.size()); jpayne@68: listCopy.addAll(ssuList); jpayne@68: for(int i=0; i0 ? maxReads : Integer.MAX_VALUE)); jpayne@68: jpayne@68: for(int num=next.getAndIncrement(); num0 && tree.getNode(rid)!=null){ jpayne@68: int aid=tree.commonAncestor(qid, rid); jpayne@68: if(aid>0){ jpayne@68: TaxNode tn=tree.getNode(aid); jpayne@68: if(tn.isRanked()){ jpayne@68: int level=tn.levelExtended; jpayne@68: long mask=1L< listCopy; jpayne@68: jpayne@68: final FloatList[] idListsT=new FloatList[taxLevels]; jpayne@68: long[] countsT=new long[taxLevels]; jpayne@68: double[] sumsT=new double[taxLevels]; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Primary input file path */ jpayne@68: private String in1=null; jpayne@68: jpayne@68: private String treeFile="auto"; jpayne@68: jpayne@68: /** Primary output file path */ jpayne@68: private String out1=null; jpayne@68: jpayne@68: public static ArrayList ssuList=null; jpayne@68: jpayne@68: final static int taxLevels=TaxTree.numTaxLevelNamesExtended; jpayne@68: static final String[] printLevelsArray=new String[] {"strain", "species", "genus", "family", "order", "class", "phylum", "superkingdom", "life"}; jpayne@68: static final long printLevels=makePrintLevels(printLevelsArray); jpayne@68: jpayne@68: private final TaxTree tree; jpayne@68: jpayne@68: private static final long makePrintLevels(String[] names){ jpayne@68: long mask=0; jpayne@68: for(String s : names){ jpayne@68: int level=TaxTree.stringToLevelExtended(s); jpayne@68: mask|=(1L<