jpayne@68: package icecream; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.ArrayList; jpayne@68: import java.util.Arrays; jpayne@68: jpayne@68: import aligner.AlignmentResult; jpayne@68: import aligner.FlatAligner; jpayne@68: import aligner.MultiStateAligner9PacBioAdapter2; jpayne@68: import aligner.SingleStateAlignerPacBioAdapter; jpayne@68: import dna.AminoAcid; jpayne@68: import dna.Data; jpayne@68: import fileIO.ByteFile; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.FileFormat; jpayne@68: import fileIO.ReadWrite; jpayne@68: import json.JsonObject; jpayne@68: import shared.Parse; jpayne@68: import shared.Parser; jpayne@68: import shared.PreParser; jpayne@68: import shared.ReadStats; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import shared.TrimRead; jpayne@68: import stream.ConcurrentReadOutputStream; jpayne@68: import stream.FASTQ; jpayne@68: import stream.FastaReadInputStream; jpayne@68: import stream.Read; jpayne@68: import stream.SamLine; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.EntropyTracker; jpayne@68: jpayne@68: /** jpayne@68: * Detects inverted repeats in PacBio reads. jpayne@68: * Attempts to determine whether reads are chimeric jpayne@68: * due to a missing adapter. jpayne@68: * jpayne@68: * @author Brian Bushnell jpayne@68: * @date June 5, 2019 jpayne@68: * jpayne@68: */ jpayne@68: public final class IceCreamFinder { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: //Start a timer immediately upon code entrance. jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: //Create an instance of this class jpayne@68: IceCreamFinder x=new IceCreamFinder(args); jpayne@68: jpayne@68: //Run the object jpayne@68: x.process(t); jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(x.outstream); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Constructor. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public IceCreamFinder(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, getClass(), false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: Parser.setQuality(33); jpayne@68: jpayne@68: //Set shared static variables prior to parsing jpayne@68: ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; jpayne@68: ReadWrite.USE_BGZIP=ReadWrite.USE_UNBGZIP=ReadWrite.PREFER_BGZIP=true; jpayne@68: ReadWrite.MAX_ZIP_THREADS=Shared.threads(); jpayne@68: FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false; jpayne@68: SamLine.SET_FROM_OK=true; jpayne@68: Shared.setBufferData(1000000); jpayne@68: // Shared.FASTA_WRAP=511; jpayne@68: Data.USE_SAMBAMBA=false;//Sambamba changes PacBio headers. jpayne@68: Read.CHANGE_QUALITY=false; jpayne@68: EntropyTracker.defaultK=3; jpayne@68: jpayne@68: {//Parse the arguments jpayne@68: final Parser parser=parse(args); jpayne@68: Parser.processQuality(); jpayne@68: jpayne@68: maxReads=parser.maxReads; jpayne@68: overwrite=ReadStats.overwrite=parser.overwrite; jpayne@68: append=ReadStats.append=parser.append; jpayne@68: jpayne@68: in1=parser.in1; jpayne@68: extin=parser.extin; jpayne@68: jpayne@68: if(outg==null){outg=parser.out1;} jpayne@68: extout=parser.extout; jpayne@68: } jpayne@68: jpayne@68: //Determine how many threads may be used jpayne@68: threads=Shared.threads(); jpayne@68: jpayne@68: //Ensure there is an input file jpayne@68: if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} jpayne@68: fixExtensions(); //Add or remove .gz or .bz2 as needed jpayne@68: checkFileExistence(); //Ensure files can be read and written jpayne@68: checkStatics(); //Adjust file-related static fields as needed for this program jpayne@68: jpayne@68: //Create output FileFormat objects jpayne@68: ffoutg=FileFormat.testOutput(outg, FileFormat.FASTQ, extout, true, overwrite, append, false); jpayne@68: ffouta=FileFormat.testOutput(outa, FileFormat.FASTQ, extout, true, overwrite, append, false); jpayne@68: ffoutb=FileFormat.testOutput(outb, FileFormat.FASTQ, extout, true, overwrite, append, false); jpayne@68: ffoutj=FileFormat.testOutput(outj, FileFormat.FASTA, extout, true, overwrite, append, false); jpayne@68: ffstats=FileFormat.testOutput(outstats, FileFormat.TXT, null, false, overwrite, append, false); jpayne@68: ffasrhist=FileFormat.testOutput(asrhist, FileFormat.TXT, null, false, overwrite, append, false); jpayne@68: ffirsrhist=FileFormat.testOutput(irsrhist, FileFormat.TXT, null, false, overwrite, append, false); jpayne@68: jpayne@68: if(!setAmbig && ffouta!=null){ jpayne@68: sendAmbigToGood=sendAmbigToBad=false; jpayne@68: } jpayne@68: jpayne@68: //Create input FileFormat objects jpayne@68: ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true); jpayne@68: jpayne@68: final int alen=(adapter==null ? 20 : adapter.length); jpayne@68: stride=(int)(alen*1.9f); jpayne@68: window=(int)(alen*3.8f+10); jpayne@68: ALIGN_ROWS=alen+1; jpayne@68: ALIGN_COLUMNS=window+2; jpayne@68: jpayne@68: maxSwScore=aligner.MultiStateAligner9PacBioAdapter.maxQuality(alen); jpayne@68: minSwScore=(int)(maxSwScore*adapterRatio2); jpayne@68: minSwScoreSuspect=(int)(maxSwScore*Tools.min(adapterRatio2*suspectRatio, adapterRatio2-((1-suspectRatio)*.2f))); jpayne@68: maxImperfectSwScore=aligner.MultiStateAligner9PacBioAdapter.maxImperfectScore(alen); jpayne@68: suspectMidpoint=(minSwScoreSuspect+minSwScore)/2; jpayne@68: if(adapter==null){alignAdapter=false;} jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization Helpers ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Parse arguments from the command line */ jpayne@68: private Parser parse(String[] args){ jpayne@68: jpayne@68: //Create a parser object jpayne@68: Parser parser=new Parser(); jpayne@68: jpayne@68: //Set any necessary Parser defaults here jpayne@68: //parser.foo=bar; jpayne@68: jpayne@68: //Parse each argument jpayne@68: for(int i=0; i1 ? split[1] : null; jpayne@68: if(b!=null && b.equalsIgnoreCase("null")){b=null;} jpayne@68: jpayne@68: if(a.equals("verbose")){ jpayne@68: verbose=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("format")){ jpayne@68: if(b==null){ jpayne@68: assert(false) : arg; jpayne@68: }else if(Tools.isDigit(b.charAt(i))){ jpayne@68: format=Integer.parseInt(b); jpayne@68: }else if(b.equalsIgnoreCase("json")){ jpayne@68: format=FORMAT_JSON; jpayne@68: }else if(b.equalsIgnoreCase("text")){ jpayne@68: format=FORMAT_TEXT; jpayne@68: }else{ jpayne@68: assert(false) : arg; jpayne@68: } jpayne@68: assert(format>=1 && format<=2) : arg; jpayne@68: }else if(a.equals("json")){ jpayne@68: boolean x=Parse.parseBoolean(b); jpayne@68: format=(x ? FORMAT_JSON : FORMAT_TEXT); jpayne@68: }else if(a.equals("ss") || a.equals("samstreamer") || a.equals("streamer")){ jpayne@68: if(b!=null && Tools.isDigit(b.charAt(0))){ jpayne@68: ZMWStreamer.useStreamer=true; jpayne@68: assert(Integer.parseInt(b)==1) : "ZMWStreamer threads currently capped at 1."; jpayne@68: // ZMWStreamer.streamerThreads=Tools.max(1, Integer.parseInt(b)); jpayne@68: }else{ jpayne@68: ZMWStreamer.useStreamer=Parse.parseBoolean(b); jpayne@68: } jpayne@68: }else if(a.equals("icecreamonly") || a.equals("ico")){ jpayne@68: filterIceCreamOnly=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("keepshortreads") || a.equals("ksr")){ jpayne@68: keepShortReads=Parse.parseBoolean(b); jpayne@68: }else if(a.equalsIgnoreCase("keepzmwstogether") || a.equals("kzt") || a.equals("keepreadstogether") || a.equals("krt")){ jpayne@68: keepZMWsTogether=Parse.parseBoolean(b); jpayne@68: }else if(a.equalsIgnoreCase("samplerate")){ jpayne@68: float f=Float.parseFloat(b); jpayne@68: assert(false) : "TODO"; //TODO jpayne@68: }else if(a.equals("modifyheader") || a.equals("modifyheaders") || a.equals("changeheader") || a.equals("changeheaders")){ jpayne@68: modifyHeader=Parse.parseBoolean(b); jpayne@68: }else if(a.equalsIgnoreCase("ccs")){ jpayne@68: CCS=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("npad")){ jpayne@68: npad=Integer.parseInt(b); jpayne@68: }else if(a.equals("minlength") || a.equals("minlen")){ jpayne@68: minLengthAfterTrimming=Integer.parseInt(b); jpayne@68: }else if(a.equals("realign")){ jpayne@68: realign=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("aligninverse") || a.equals("alignrc") || a.equals("findicecream")){ jpayne@68: alignRC=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("alignadapter")){ jpayne@68: alignAdapter=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("timeless")){ jpayne@68: timeless=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("flaglongreads")){ jpayne@68: flagLongReads=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("trimreads") || a.equals("trim")){ jpayne@68: trimReads=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("adapter")){ jpayne@68: adapter=b==null ? null : b.getBytes(); jpayne@68: }else if(a.equals("targetqlen") || a.equals("qlen")){ jpayne@68: targetQlen=Integer.parseInt(b); jpayne@68: }else if(a.equals("maxqlenfraction") || a.equals("maxfraction") || a.equals("qlenfraction")){ jpayne@68: maxQlenFraction=Float.parseFloat(b); jpayne@68: }else if(a.equals("junctionfraction") || a.equals("shortfraction")){ jpayne@68: minJunctionFraction=Float.parseFloat(b); jpayne@68: }else if(a.equals("minratio1") || a.equals("ratio1") || a.equals("id1")){ jpayne@68: minRatio1=Float.parseFloat(b); jpayne@68: }else if(a.equals("minratio2") || a.equals("ratio2") || a.equals("id2")){ jpayne@68: minRatio2=Float.parseFloat(b); jpayne@68: }else if(a.equals("minratio") || a.equals("ratio") || a.equals("id")){ jpayne@68: minRatio1=minRatio2=Float.parseFloat(b); jpayne@68: }else if(a.equals("adapterratio") || a.equals("adapterratio1") || a.equals("ratior") || a.equals("ratior1")){ jpayne@68: adapterRatio=Float.parseFloat(b); jpayne@68: }else if(a.equals("adapterratio2") || a.equals("ratior2")){ jpayne@68: adapterRatio2=Float.parseFloat(b); jpayne@68: }else if(a.equals("suspectratio")){ jpayne@68: suspectRatio=Float.parseFloat(b); jpayne@68: }else if(a.equals("minqlen")){ jpayne@68: minQlen=Integer.parseInt(b); jpayne@68: }else if(a.equals("minscore")){ jpayne@68: minScore=Integer.parseInt(b); jpayne@68: }else if(a.equals("parsecustom")){ jpayne@68: parseCustom=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("printtiming") || a.equals("extended")){ jpayne@68: printTiming=Parse.parseBoolean(b); jpayne@68: }else if(a.equals("queuelen") || a.equals("qlen")){ jpayne@68: queuelen=Integer.parseInt(b); jpayne@68: }else if(a.equals("outg") || a.equals("outgood")){ jpayne@68: outg=b; jpayne@68: }else if(a.equals("outa") || a.equals("outambig")){ jpayne@68: outa=b; jpayne@68: }else if(a.equals("outb") || a.equals("outbad")){ jpayne@68: outb=b; jpayne@68: }else if(a.equals("outj") || a.equals("outjunctions") || a.equals("junctions")){ jpayne@68: outj=b; jpayne@68: }else if(a.equals("outs") || a.equals("outstats") || a.equals("stats")){ jpayne@68: outstats=b; jpayne@68: }else if(a.equals("asrhist") || a.equals("ahist")){ jpayne@68: asrhist=b; jpayne@68: }else if(a.equals("irsrhist") || a.equals("irhist")){ jpayne@68: irsrhist=b; jpayne@68: }else if(a.equals("ambig")){ jpayne@68: sendAmbigToGood=sendAmbigToBad=false; jpayne@68: if(b!=null){ jpayne@68: String[] split2=b.split(","); jpayne@68: for(String s2 : split2){ jpayne@68: if(s2.equalsIgnoreCase("good")){sendAmbigToGood=true;} jpayne@68: else if(s2.equalsIgnoreCase("bad") || s2.equalsIgnoreCase("toss")){sendAmbigToBad=true;} jpayne@68: else if(s2.equalsIgnoreCase("ambig")){} jpayne@68: else{assert(false) : "Bad argument: '"+s2+"' in '"+arg+"'; should be good or bad";} jpayne@68: } jpayne@68: } jpayne@68: setAmbig=true; jpayne@68: }else if(a.equalsIgnoreCase("trimpolya")){//Parse standard flags in the parser jpayne@68: trimPolyA=Parse.parseBoolean(b); jpayne@68: }else if(PolymerTrimmer.parse(arg, a, b)){ jpayne@68: //do nothing jpayne@68: }else if(a.equals("minentropy") || a.equals("entropy") || a.equals("entropyfilter") || a.equals("efilter")){ jpayne@68: if(b==null || Character.isLetter(b.charAt(0))){ jpayne@68: if(Parse.parseBoolean(b)){ jpayne@68: entropyCutoff=0.55f; jpayne@68: }else{ jpayne@68: entropyCutoff=-1; jpayne@68: } jpayne@68: }else{ jpayne@68: entropyCutoff=Float.parseFloat(b); jpayne@68: } jpayne@68: }else if(a.equals("entropyblock") || a.equals("entropylength") || a.equals("entropylen") || a.equals("entlen")){ jpayne@68: entropyLength=Parse.parseIntKMG(b); jpayne@68: }else if(a.equals("entropyfraction") || a.equals("entfraction")){ jpayne@68: entropyFraction=Float.parseFloat(b); jpayne@68: }else if(a.equals("monomerfraction") || a.equals("maxmonomerfraction") || a.equals("mmf")){ jpayne@68: maxMonomerFraction=Float.parseFloat(b); jpayne@68: }else if(a.equals("parse_flag_goes_here")){ jpayne@68: long fake_variable=Parse.parseKMG(b); jpayne@68: //Set a variable here jpayne@68: }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser jpayne@68: //do nothing jpayne@68: }else{ jpayne@68: outstream.println("Unknown parameter "+args[i]); jpayne@68: assert(false) : "Unknown parameter "+args[i]; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: return parser; jpayne@68: } jpayne@68: jpayne@68: /** Add or remove .gz or .bz2 as needed */ jpayne@68: private void fixExtensions(){ jpayne@68: in1=Tools.fixExtension(in1); jpayne@68: } jpayne@68: jpayne@68: /** Ensure files can be read and written */ jpayne@68: private void checkFileExistence(){ jpayne@68: jpayne@68: //Ensure output files can be written jpayne@68: if(!Tools.testOutputFiles(overwrite, append, false, outg, outa, outb, outj, outstats, asrhist, irsrhist)){ jpayne@68: outstream.println((outg==null)+", "+(outb==null)+", "+outg+", "+outa+", "+outb+", "+outj+", "+outstats); jpayne@68: throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+outg+", "+outa+", "+outb+", "+outj+"\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure input files can be read jpayne@68: if(!Tools.testInputFiles(false, true, in1)){ jpayne@68: throw new RuntimeException("\nCan't read some input files.\n"); jpayne@68: } jpayne@68: jpayne@68: //Ensure that no file was specified multiple times jpayne@68: if(!Tools.testForDuplicateFiles(true, in1, outg, outa, outb, outj, outstats, asrhist, irsrhist)){ jpayne@68: throw new RuntimeException("\nSome file names were specified multiple times.\n"); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: /** Adjust file-related static fields as needed for this program */ jpayne@68: private static void checkStatics(){ jpayne@68: //Adjust the number of threads for input file reading jpayne@68: if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){ jpayne@68: ByteFile.FORCE_MODE_BF2=true; jpayne@68: } jpayne@68: jpayne@68: assert(FastaReadInputStream.settingsOK()); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Outer Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Create read streams and process all data */ jpayne@68: void process(Timer t){ jpayne@68: jpayne@68: //Turn off read validation in the input threads to increase speed jpayne@68: final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR; jpayne@68: Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4; jpayne@68: jpayne@68: //Create a read input stream jpayne@68: ZMWStreamer zstream=new ZMWStreamer(ffin1, Shared.threads(), maxReads, -1); jpayne@68: jpayne@68: //Optionally create read output streams jpayne@68: final ConcurrentReadOutputStream rosg=makeCros(ffoutg); jpayne@68: final ConcurrentReadOutputStream rosa=makeCros(ffouta); jpayne@68: final ConcurrentReadOutputStream rosb=makeCros(ffoutb); jpayne@68: final ConcurrentReadOutputStream rosj=makeCros(ffoutj); jpayne@68: jpayne@68: //Reset counters jpayne@68: readsProcessed=readsOut=0; jpayne@68: basesProcessed=basesOut=0; jpayne@68: junctionsOut=0; jpayne@68: jpayne@68: //Process the reads in separate threads jpayne@68: spawnThreads(zstream, rosg, rosa, rosb, rosj); jpayne@68: jpayne@68: if(verbose){outstream.println("Finished; closing streams.");} jpayne@68: jpayne@68: //Write anything that was accumulated by ReadStats jpayne@68: errorState|=ReadStats.writeAll(); jpayne@68: //Close the read streams jpayne@68: errorState|=ReadWrite.closeStreams(null, rosg, rosa, rosb, rosj); jpayne@68: jpayne@68: //Reset read validation jpayne@68: Read.VALIDATE_IN_CONSTRUCTOR=vic; jpayne@68: jpayne@68: writeScoreRatioHistogram(ffasrhist, adapterScores); jpayne@68: writeScoreRatioHistogram(ffirsrhist, repeatScores); jpayne@68: jpayne@68: //Report timing and results jpayne@68: t.stop(); jpayne@68: jpayne@68: String stats=null; jpayne@68: if(format==FORMAT_TEXT){ jpayne@68: ByteBuilder bb=toText(t); jpayne@68: stats=bb.nl().toString(); jpayne@68: }else if(format==FORMAT_JSON){ jpayne@68: JsonObject jo=toJson(t); jpayne@68: stats=jo.toStringln(); jpayne@68: }else{ jpayne@68: assert(false) : "Bad format: "+format; jpayne@68: } jpayne@68: if(ffstats==null){ jpayne@68: outstream.print(stats); jpayne@68: }else{ jpayne@68: ReadWrite.writeString(stats, outstats); jpayne@68: } jpayne@68: jpayne@68: //Throw an exception of there was an error in a thread jpayne@68: if(errorState){ jpayne@68: throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: private ByteBuilder toText(Timer t){ jpayne@68: ByteBuilder bb=new ByteBuilder(); jpayne@68: jpayne@68: bb.appendln(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); jpayne@68: bb.appendln(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false)); jpayne@68: jpayne@68: long readsFiltered=readsProcessed-readsOut; jpayne@68: bb.appendln(Tools.numberPercent("Reads Filtered:", readsFiltered, readsFiltered*100.0/(readsProcessed), 3, 8)); jpayne@68: if(trimReads || trimPolyA){ jpayne@68: bb.appendln(Tools.numberPercent("Reads Trimmed:", readsTrimmed, readsTrimmed*100.0/(readsProcessed), 3, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Bases Trimmed:", basesTrimmed, basesTrimmed*100.0/(basesProcessed), 3, 8)); jpayne@68: } jpayne@68: bb.appendln(Tools.number("Total ZMWs:", ZMWs, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Bad ZMWs:", iceCreamZMWs, iceCreamZMWs*100.0/(ZMWs), 3, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Absent Adapter:", missingAdapterZMWs, missingAdapterZMWs*100.0/(ZMWs), 3, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Hidden Adapter:", hiddenAdapterZMWs, hiddenAdapterZMWs*100.0/(ZMWs), 3, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Ambiguous IR:", ambiguousZMWs, ambiguousZMWs*100.0/(ZMWs), 3, 8)); jpayne@68: // bb.appendln(Tools.numberPercent("Low Entropy:", lowEntropyReads, lowEntropyReads*100.0/(readsProcessed), 3, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Low Entropy:", lowEntropyZMWs, lowEntropyZMWs*100.0/(ZMWs), 3, 8)); jpayne@68: jpayne@68: bb.appendln(Tools.number("Avg Score Ratio:", iceCreamRatio/ratiosCounted, 3, 8)); jpayne@68: bb.appendln(Tools.number("Score Cutoff:", minRatio2, 3, 8)); jpayne@68: jpayne@68: if(printTiming){ jpayne@68: bb.appendln("Iterations: "+alignmentIters/1000000+"m"); jpayne@68: bb.appendln("Short Iterations: "+alignmentItersShort/1000000+"m"); jpayne@68: bb.appendln("Elapsed: "+elapsed/1000000+"ms"); jpayne@68: bb.appendln("Elapsed Short: "+elapsedShort/1000000+"ms"); jpayne@68: } jpayne@68: jpayne@68: if(parseCustom){ jpayne@68: { jpayne@68: bb.appendln("\nReads:"); jpayne@68: bb.appendln(Tools.numberPercent("True Positive:", truePositiveReads, truePositiveReads*100.0/(readsProcessed), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("True Negative:", trueNegativeReads, trueNegativeReads*100.0/(readsProcessed), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("False Positive:", falsePositiveReads, falsePositiveReads*100.0/(readsProcessed), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("False Negative:", falseNegativeReads, falseNegativeReads*100.0/(readsProcessed), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Ambiguous:", ambiguousReads, ambiguousReads*100.0/(readsProcessed), 2, 8)); jpayne@68: jpayne@68: double snr=(truePositiveReads+trueNegativeReads+ambiguousReads)/Tools.max(1, falsePositiveReads+falseNegativeReads+ambiguousReads); jpayne@68: snr=10*Math.log10(snr); jpayne@68: bb.appendln(Tools.number("SNR:", snr, 2, 8)); jpayne@68: } jpayne@68: { jpayne@68: bb.appendln("\nZMWs:"); jpayne@68: bb.appendln(Tools.numberPercent("True Positive:", truePositiveZMWs, truePositiveZMWs*100.0/(ZMWs), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("True Negative:", trueNegativeZMWs, trueNegativeZMWs*100.0/(ZMWs), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("False Positive:", falsePositiveZMWs, falsePositiveZMWs*100.0/(ZMWs), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("False Negative:", falseNegativeZMWs, falseNegativeZMWs*100.0/(ZMWs), 2, 8)); jpayne@68: bb.appendln(Tools.numberPercent("Ambiguous:", ambiguousZMWs, ambiguousZMWs*100.0/(ZMWs), 2, 8)); jpayne@68: jpayne@68: double snr=(truePositiveZMWs+trueNegativeZMWs+ambiguousReads)/Tools.max(1, falsePositiveZMWs+falseNegativeZMWs+ambiguousZMWs); jpayne@68: snr=10*Math.log10(snr); jpayne@68: bb.appendln(Tools.number("SNR:", snr, 2, 8)); jpayne@68: } jpayne@68: } jpayne@68: return bb; jpayne@68: } jpayne@68: jpayne@68: private JsonObject toJson(Timer t){ jpayne@68: JsonObject jo=new JsonObject(); jpayne@68: long readsFiltered=readsProcessed-readsOut; jpayne@68: jpayne@68: jo.add("Time", t.timeInSeconds()); jpayne@68: jo.add("Reads_Processed", readsProcessed); jpayne@68: jo.add("Bases_Processed", basesProcessed); jpayne@68: jo.add("Reads_Out", readsOut); jpayne@68: jo.add("Bases_Out", basesOut); jpayne@68: jo.add("Reads_Filtered", readsFiltered); jpayne@68: jo.add("Reads_Filtered_Pct", readsFiltered*100.0/(readsProcessed)); jpayne@68: if(trimReads){ jpayne@68: jo.add("Reads_Trimmed", readsTrimmed); jpayne@68: jo.add("Reads_Trimmed_Pct", readsTrimmed*100.0/(readsProcessed)); jpayne@68: jo.add("Bases_Trimmed", basesTrimmed); jpayne@68: jo.add("Bases_Trimmed_Pct", basesTrimmed*100.0/(basesProcessed)); jpayne@68: } jpayne@68: jo.add("Total_ZMWs", ZMWs); jpayne@68: jo.add("Bad_ZMWs", iceCreamZMWs); jpayne@68: jo.add("Bad_ZMWs_Pct", iceCreamZMWs*100.0/(ZMWs)); jpayne@68: jo.add("Absent_Adapter", missingAdapterZMWs); jpayne@68: jo.add("Absent_Adapter_Pct", missingAdapterZMWs*100.0/(ZMWs)); jpayne@68: jo.add("Hidden_Adapter", hiddenAdapterZMWs); jpayne@68: jo.add("Hidden_Adapter_Pct", hiddenAdapterZMWs*100.0/(ZMWs)); jpayne@68: // jo.add("Low_Entropy", lowEntropyReads); jpayne@68: // jo.add("Low_Entropy_Pct", lowEntropyReads*100.0/(readsProcessed)); jpayne@68: jo.add("Low_Entropy", lowEntropyZMWs); jpayne@68: jo.add("Low_Entropy_Pct", lowEntropyZMWs*100.0/(ZMWs)); jpayne@68: jo.add("Ambiguous_Inverted_Repeat", ambiguousZMWs); jpayne@68: jo.add("Ambiguous_Inverted_Repeat_Pct", ambiguousZMWs*100.0/(ZMWs)); jpayne@68: jo.add("Avg_Score_Ratio_IR", iceCreamRatio/ratiosCounted); jpayne@68: jo.add("Score_Cutoff_IR", minRatio2); jpayne@68: jpayne@68: jo.add("Alignment_Iterations_IR", alignmentIters); jpayne@68: jo.add("Short_Alignment_Iterations_IR", alignmentItersShort); jpayne@68: jpayne@68: if(parseCustom){ jpayne@68: { jpayne@68: double snr=(truePositiveReads+trueNegativeReads+ambiguousReads)/Tools.max(1, falsePositiveReads+falseNegativeReads+ambiguousReads); jpayne@68: snr=10*Math.log10(snr); jpayne@68: jo.add("TP_Reads", truePositiveReads); jpayne@68: jo.add("TN_Reads", trueNegativeReads); jpayne@68: jo.add("FP_Reads", falsePositiveReads); jpayne@68: jo.add("FN_Reads", falseNegativeReads); jpayne@68: jo.add("AM_Reads", ambiguousReads); jpayne@68: jpayne@68: jo.add("TP_Reads_Pct", truePositiveReads*100.0/(readsProcessed)); jpayne@68: jo.add("TN_Reads_Pct", trueNegativeReads*100.0/(readsProcessed)); jpayne@68: jo.add("FP_Reads_Pct", falsePositiveReads*100.0/(readsProcessed)); jpayne@68: jo.add("FN_Reads_Pct", falseNegativeReads*100.0/(readsProcessed)); jpayne@68: jo.add("AM_Reads_Pct", ambiguousReads*100.0/(readsProcessed)); jpayne@68: jpayne@68: jo.add("SNR_Reads", snr); jpayne@68: } jpayne@68: { jpayne@68: double snr=(truePositiveZMWs+trueNegativeZMWs+ambiguousZMWs)/Tools.max(1, falsePositiveZMWs+falseNegativeZMWs+ambiguousZMWs); jpayne@68: snr=10*Math.log10(snr); jpayne@68: jo.add("TP_ZMWs", truePositiveZMWs); jpayne@68: jo.add("TN_ZMWs", trueNegativeZMWs); jpayne@68: jo.add("FP_ZMWs", falsePositiveZMWs); jpayne@68: jo.add("FN_ZMWs", falseNegativeZMWs); jpayne@68: jo.add("AM_ZMWs", ambiguousZMWs); jpayne@68: jpayne@68: jo.add("TP_ZMWs_Pct", truePositiveZMWs*100.0/(ZMWs)); jpayne@68: jo.add("TN_ZMWs_Pct", trueNegativeZMWs*100.0/(ZMWs)); jpayne@68: jo.add("FP_ZMWs_Pct", falsePositiveZMWs*100.0/(ZMWs)); jpayne@68: jo.add("FN_ZMWs_Pct", falseNegativeZMWs*100.0/(ZMWs)); jpayne@68: jo.add("AM_ZMWs_Pct", ambiguousZMWs*100.0/(ZMWs)); jpayne@68: jpayne@68: jo.add("SNR_ZMWs", snr); jpayne@68: } jpayne@68: } jpayne@68: return jo; jpayne@68: } jpayne@68: jpayne@68: private static void writeScoreRatioHistogram(FileFormat ff, long[] hist){ jpayne@68: if(ff==null){return;} jpayne@68: final ByteStreamWriter bsw=new ByteStreamWriter(ff); jpayne@68: bsw.start(); jpayne@68: final float mult=1.0f/(hist.length-1); jpayne@68: jpayne@68: bsw.print("#Counted\t").println(Tools.sum(hist)); jpayne@68: bsw.print("#Mean\t").println(Tools.averageHistogram(hist)*mult, 3); jpayne@68: bsw.print("#Median\t").println(Tools.medianHistogram(hist)*mult, 3); jpayne@68: bsw.print("#Mode\t").println(Tools.calcModeHistogram(hist)*mult, 3); jpayne@68: bsw.print("#STDev\t").println(Tools.standardDeviationHistogram(hist)*mult, 3); jpayne@68: bsw.print("#Value\tOccurances\n"); jpayne@68: jpayne@68: for(int i=0; i alpt=new ArrayList(threads); jpayne@68: for(int i=0; i alpt){ jpayne@68: jpayne@68: //Wait for completion of all threads jpayne@68: boolean success=true; jpayne@68: for(ProcessThread pt : alpt){ jpayne@68: jpayne@68: //Wait until this thread has terminated jpayne@68: while(pt.getState()!=Thread.State.TERMINATED){ jpayne@68: try { jpayne@68: //Attempt a join operation jpayne@68: pt.join(); jpayne@68: } catch (InterruptedException e) { jpayne@68: //Potentially handle this, if it is expected to occur jpayne@68: e.printStackTrace(); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Accumulate per-thread statistics jpayne@68: readsProcessed+=pt.readsProcessedT; jpayne@68: basesProcessed+=pt.basesProcessedT; jpayne@68: jpayne@68: truePositiveReads+=pt.truePositiveReadsT; jpayne@68: trueNegativeReads+=pt.trueNegativeReadsT; jpayne@68: falsePositiveReads+=pt.falsePositiveReadsT; jpayne@68: falseNegativeReads+=pt.falseNegativeReadsT; jpayne@68: ambiguousReads+=pt.ambiguousReadsT; jpayne@68: jpayne@68: truePositiveZMWs+=pt.truePositiveZMWsT; jpayne@68: trueNegativeZMWs+=pt.trueNegativeZMWsT; jpayne@68: falsePositiveZMWs+=pt.falsePositiveZMWsT; jpayne@68: falseNegativeZMWs+=pt.falseNegativeZMWsT; jpayne@68: ambiguousZMWs+=pt.ambiguousZMWsT; jpayne@68: jpayne@68: readsOut+=pt.readsOutT; jpayne@68: basesOut+=pt.basesOutT; jpayne@68: junctionsOut+=pt.junctionsOutT; jpayne@68: jpayne@68: alignmentIters+=pt.ica.iters(); jpayne@68: alignmentItersShort+=pt.ica.itersShort(); jpayne@68: elapsed+=pt.elapsedT; jpayne@68: elapsedShort+=pt.elapsedShortT; jpayne@68: jpayne@68: iceCreamReads+=pt.iceCreamReadsT; jpayne@68: iceCreamBases+=pt.iceCreamBasesT; jpayne@68: iceCreamZMWs+=pt.iceCreamZMWsT; jpayne@68: iceCreamRatio+=pt.iceCreamRatioT; jpayne@68: ratiosCounted+=pt.ratiosCountedT; jpayne@68: missingAdapterZMWs+=pt.missingAdapterZMWsT; jpayne@68: hiddenAdapterZMWs+=pt.hiddenAdapterZMWsT; jpayne@68: lowEntropyZMWs+=pt.lowEntropyZMWsT; jpayne@68: lowEntropyReads+=pt.lowEntropyReadsT; jpayne@68: jpayne@68: basesTrimmed+=pt.basesTrimmedT; jpayne@68: readsTrimmed+=pt.readsTrimmedT; jpayne@68: jpayne@68: for(int i=0; i=0){ jpayne@68: eTracker=new EntropyTracker(false, entropyCutoff, true); jpayne@68: }else{ jpayne@68: eTracker=null; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Called by start() jpayne@68: @Override jpayne@68: public void run(){ jpayne@68: //Do anything necessary prior to processing jpayne@68: jpayne@68: //Process the reads jpayne@68: processInner(); jpayne@68: jpayne@68: //Do anything necessary after processing jpayne@68: jpayne@68: //Indicate successful exit status jpayne@68: success=true; jpayne@68: } jpayne@68: jpayne@68: /** Iterate through the reads */ jpayne@68: void processInner(){ jpayne@68: jpayne@68: //As long as there is a nonempty read list... jpayne@68: for(ZMW reads=zstream.nextZMW(); reads!=null; reads=zstream.nextZMW()){ jpayne@68: // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access jpayne@68: jpayne@68: processList(reads); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: int flagLowEntropyReads(final ZMW reads, final float minEnt, jpayne@68: final int minLen0, final float minFract){ jpayne@68: int found=0; jpayne@68: for(Read r : reads){ jpayne@68: if(!r.discarded()){ jpayne@68: int minLen=Tools.min((int)(r.length()*minFract), minLen0); jpayne@68: int maxBlock=eTracker.longestLowEntropyBlock(r.bases, false, maxMonomerFraction); jpayne@68: if(maxBlock>=minLen){ jpayne@68: r.setDiscarded(true); jpayne@68: r.setJunk(true); jpayne@68: found++; jpayne@68: // System.err.println(r.toFasta()); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: return found; jpayne@68: } jpayne@68: jpayne@68: int flagLongReads(final ZMW reads, int median){ jpayne@68: int found=0; jpayne@68: for(Read r : reads){ jpayne@68: if(r.length()>longReadMult*median){ jpayne@68: r.setDiscarded(true); jpayne@68: r.setHasAdapter(true); jpayne@68: found++; jpayne@68: } jpayne@68: } jpayne@68: return found; jpayne@68: } jpayne@68: jpayne@68: /** Each list is presumed to be all reads from a ZMW, in order */ jpayne@68: void processList(final ZMW reads){ jpayne@68: long numBases=0; jpayne@68: jpayne@68: //Loop through each read in the list for statistics jpayne@68: for(int idx=0; idx=3; jpayne@68: jpayne@68: if(trimReads || trimPolyA){ jpayne@68: int removed=0; jpayne@68: for(int i=0; i0){ jpayne@68: trimmed+=trimRead(r, adapterTipLen); jpayne@68: r.setTrimmed(true); jpayne@68: } jpayne@68: } jpayne@68: if(trimPolyA){ jpayne@68: int x=trimPolyA(r); jpayne@68: trimmed+=x; jpayne@68: if(x>0){r.setTrimmed(true);} jpayne@68: } jpayne@68: jpayne@68: if(trimmed>0){ jpayne@68: basesTrimmedT+=trimmed; jpayne@68: readsTrimmedT++; jpayne@68: } jpayne@68: jpayne@68: //TODO: Note again, removing intermediate reads messes up forward-rev ordering jpayne@68: if(r.length()0){ jpayne@68: Tools.condenseStrict(reads); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(entropyCutoff>0){ jpayne@68: int bad=flagLowEntropyReads(reads, entropyCutoff, entropyLength, entropyFraction); jpayne@68: if(bad>0){ jpayne@68: lowEntropyZMWsT++; jpayne@68: lowEntropyReadsT+=bad; jpayne@68: if(bad>=reads.size()){ jpayne@68: if(!reads.isEmpty()){outputReads(reads, null);} jpayne@68: return; //No point in continuing... jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(reads.isEmpty()){return;} jpayne@68: jpayne@68: Read sample=null;//Read that will be searched for inverted repeat, typically median length jpayne@68: Read shortest=null;//shortest read in the middle, or overall if no middle reads jpayne@68: final int medianLength=reads.medianLength(true); jpayne@68: boolean foundInvertedRepeat=false; jpayne@68: int longReads=0; jpayne@68: int adapterReads=0; jpayne@68: int maxAdapters=0; jpayne@68: int sampleNum=0; jpayne@68: jpayne@68: if(reads.size()>=3){ jpayne@68: if(flagLongReads){longReads=flagLongReads(reads, medianLength);} jpayne@68: for(int i=1; i0 ? 1 : 0); jpayne@68: maxAdapters=Tools.max(x, maxAdapters); jpayne@68: } jpayne@68: jpayne@68: if(reads.size()>2){ jpayne@68: Read a=reads.get(0), b=reads.get(reads.size()-1); jpayne@68: Read r=a.length()>b.length() ? a : b; jpayne@68: if(needsAdapterTest(r)){ jpayne@68: int x=lookForAdapter(r, mult); jpayne@68: adapterReads+=(x>0 ? 1 : 0); jpayne@68: maxAdapters=Tools.max(x, maxAdapters); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: for(Read r : reads){ jpayne@68: if((r.length()>=shortReadMult*shortest.length() || adapterReads>0 || longReads>0 || foundInvertedRepeat) jpayne@68: && needsAdapterTest(r)){ jpayne@68: int x=lookForAdapter(r, mult); jpayne@68: adapterReads+=(x>0 ? 1 : 0); jpayne@68: maxAdapters=Tools.max(x, maxAdapters); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(ar!=null && ar.icecream){ jpayne@68: iceCreamRatioT+=ar.ratio; jpayne@68: ratiosCountedT++; jpayne@68: int idx=(int)(ar.ratio*200); jpayne@68: repeatScoresT[idx]++; jpayne@68: if(longReads+adapterReads==0){missingAdapterZMWsT++;} jpayne@68: } jpayne@68: if(longReads+adapterReads>0){ jpayne@68: hiddenAdapterZMWsT++; jpayne@68: } jpayne@68: jpayne@68: if(keepShortReads && maxAdapters<2){ jpayne@68: if(foundInvertedRepeat && !sample.hasAdapter()){ jpayne@68: if(reads.size()>2){ jpayne@68: for(int i=1; i=veryShortMult*medianLength){r.setDiscarded(true);} jpayne@68: r=reads.get(reads.size()-1); jpayne@68: if(r.length()>=veryShortMult*medianLength){r.setDiscarded(true);} jpayne@68: }else if(reads.size()==2){ jpayne@68: for(Read r : reads){ jpayne@68: if(r.length()>=veryShortMult*sample.length()){ jpayne@68: if(ar.icecream){ jpayne@68: r.setDiscarded(true); jpayne@68: }else if(ar.ambiguous){ jpayne@68: r.setAmbiguous(true); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: }else if(sample.discarded() || (longReads+adapterReads>0)){ jpayne@68: for(Read r : reads){ jpayne@68: r.setDiscarded(true); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: ArrayList junctions=null; jpayne@68: if(ar!=null){ jpayne@68: if(rosj!=null && !sample.hasAdapter()){ jpayne@68: Read r=ar.alignedRead; jpayne@68: int width=Tools.min(200, ar.junctionLoc, r.length()-ar.junctionLoc); jpayne@68: int a=ar.junctionLoc-width, b=ar.junctionLoc+width; jpayne@68: byte[] bases=Arrays.copyOfRange(r.bases, a, b); jpayne@68: Read junction=new Read(bases, null, r.id+"\tjunction:"+a+"-"+b, r.numericID); jpayne@68: junctions=new ArrayList(1); jpayne@68: junctions.add(junction); jpayne@68: junctionsOutT++; jpayne@68: } jpayne@68: if(modifyHeader){ jpayne@68: sample.id=sample.id+"\tratio="+ar.ratio+"\tjunction="+ar.junctionLoc+ jpayne@68: "\tIR="+sample.invertedRepeat()+"\tAD="+sample.hasAdapter()+"\tFP="+fullPass+"\tsubreads="+reads.size(); jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: removeShortTrash(reads); jpayne@68: jpayne@68: if(!reads.isEmpty()){outputReads(reads, junctions);} jpayne@68: } jpayne@68: jpayne@68: //TODO: Now that I think about it. The order of the reads is important. jpayne@68: //Since they go forward-rev-forward-rev it's imprudent to discard inner reads. jpayne@68: private void removeShortTrash(ZMW reads) { jpayne@68: int removed=0; jpayne@68: for(int i=0; i0){Tools.condenseStrict(reads);} jpayne@68: } jpayne@68: jpayne@68: int trimRead(Read r, int lookahead){ jpayne@68: final byte[] bases=r.bases; jpayne@68: jpayne@68: int left=calcLeftTrim(bases, lookahead); jpayne@68: int right=calcRightTrim(bases, lookahead); jpayne@68: int trimmed=0; jpayne@68: jpayne@68: if(left+right>0){ jpayne@68: // System.err.println(r.length()+", "+left+", "+right+", "+r.id); jpayne@68: trimmed=TrimRead.trimByAmount(r, left, right, 1, false); jpayne@68: // System.err.println(r.length()+", "+left+", "+right+", "+r.id); jpayne@68: ZMW.fixReadHeader(r, left, right); jpayne@68: // System.err.println(r.length()+", "+left+", "+right+", "+r.id); jpayne@68: } jpayne@68: jpayne@68: if(r.samline!=null){ jpayne@68: r.samline.seq=r.bases; jpayne@68: r.samline.qual=r.quality; jpayne@68: } jpayne@68: return trimmed; jpayne@68: } jpayne@68: jpayne@68: int trimPolyA(Read r){ jpayne@68: final byte[] bases=r.bases; jpayne@68: jpayne@68: int left=Tools.max(PolymerTrimmer.testLeft(bases, 'A'), PolymerTrimmer.testLeft(bases, 'T')); jpayne@68: int right=Tools.max(PolymerTrimmer.testRight(bases, 'A'), PolymerTrimmer.testRight(bases, 'T')); jpayne@68: int trimmed=0; jpayne@68: jpayne@68: if(left+right>0){ jpayne@68: // System.err.println(r.length()+", "+left+", "+right+", "+r.id); jpayne@68: trimmed=TrimRead.trimByAmount(r, left, right, 1, false); jpayne@68: // System.err.println(r.length()+", "+left+", "+right+", "+r.id); jpayne@68: ZMW.fixReadHeader(r, left, right); jpayne@68: // System.err.println(r.length()+", "+left+", "+right+", "+r.id); jpayne@68: } jpayne@68: jpayne@68: if(r.samline!=null){ jpayne@68: r.samline.seq=r.bases; jpayne@68: r.samline.qual=r.quality; jpayne@68: } jpayne@68: return trimmed; jpayne@68: } jpayne@68: jpayne@68: final int calcLeftTrim(final byte[] bases, int lookahead){ jpayne@68: final int len=bases.length; jpayne@68: int lastUndef=-1; jpayne@68: for(int i=0, defined=0; i=0 && defined junctions){ jpayne@68: final int size=reads.size(); jpayne@68: final ArrayList good=(rosg==null ? null : new ArrayList(size)); jpayne@68: final ArrayList ambig=(rosa==null ? null : new ArrayList(size)); jpayne@68: final ArrayList bad=(rosb==null ? null : new ArrayList(size)); jpayne@68: jpayne@68: int discardedReads=0; jpayne@68: int ambigReads=0; jpayne@68: int trimmedReads=0; jpayne@68: boolean sendAllToDiscarded=false; jpayne@68: boolean sendAllToAmbiguous=false; jpayne@68: jpayne@68: //Check to see if any reads are discarded or ambiguous jpayne@68: for(Read r : reads){ jpayne@68: if(r.discarded()){ jpayne@68: discardedReads++; jpayne@68: }else if(r.ambiguous()){ jpayne@68: ambigReads++; jpayne@68: }else if(r.trimmed()){ jpayne@68: trimmedReads++; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Unify flags on all reads jpayne@68: if(keepZMWsTogether){ jpayne@68: if(discardedReads>0){sendAllToDiscarded=true;} jpayne@68: else if(ambigReads>0){sendAllToAmbiguous=true;} jpayne@68: } jpayne@68: // if(discardedReads>0 || ambigReads>0){System.err.println("\nd="+discardedReads+", a="+ambigReads);} jpayne@68: if(discardedReads>0){iceCreamZMWsT++;} jpayne@68: jpayne@68: int trueIceCreamReads=0; jpayne@68: for(Read r : reads){ jpayne@68: boolean trueIceCream=(parseCustom ? ReadBuilder.isIceCream(r.id) : false); jpayne@68: trueIceCreamReads+=(trueIceCream ? 1 : 0); jpayne@68: if(r.discarded() || sendAllToDiscarded){ jpayne@68: // assert(false); jpayne@68: if(bad!=null){bad.add(r);} jpayne@68: if(trueIceCream){truePositiveReadsT++;} jpayne@68: else{falsePositiveReadsT++;} jpayne@68: // System.err.println("d:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size()); jpayne@68: }else if(r.ambiguous() || sendAllToAmbiguous){ jpayne@68: if(ambig!=null){ambig.add(r);} jpayne@68: if(sendAmbigToGood){ jpayne@68: readsOutT++; jpayne@68: basesOutT+=r.length(); jpayne@68: if(good!=null) {good.add(r);} jpayne@68: } jpayne@68: if(sendAmbigToBad && bad!=null){bad.add(r);} jpayne@68: ambiguousReadsT++; jpayne@68: // System.err.println("a*:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size()); jpayne@68: }else{ jpayne@68: if(good!=null){ jpayne@68: good.add(r); jpayne@68: } jpayne@68: readsOutT++; jpayne@68: basesOutT+=r.length(); jpayne@68: if(trueIceCream && !r.trimmed()){falseNegativeReadsT++;} jpayne@68: else{trueNegativeReadsT++;} jpayne@68: // if(discardedReads>0 || ambigReads>0){System.err.println("g*:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());} jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(trueIceCreamReads>0){ jpayne@68: if(discardedReads>0 || trimmedReads>0){ jpayne@68: truePositiveZMWsT++; jpayne@68: }else if(ambigReads>0){ jpayne@68: ambiguousZMWs++; jpayne@68: }else{ jpayne@68: falseNegativeZMWsT++; jpayne@68: // StringBuilder sb=new StringBuilder(); jpayne@68: // for(Read r : reads) {sb.append("\n").append(r.id);} jpayne@68: // System.err.println(sb); jpayne@68: } jpayne@68: }else{ jpayne@68: if(discardedReads>0){ jpayne@68: falsePositiveZMWsT++; jpayne@68: }else if(ambigReads>0){ jpayne@68: ambiguousZMWs++; jpayne@68: }else{ jpayne@68: trueNegativeZMWsT++; jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: if(rosg!=null && good!=null && !good.isEmpty()){rosg.add(good, 0);} jpayne@68: if(rosa!=null && ambig!=null && !ambig.isEmpty()){rosa.add(ambig, 0);} jpayne@68: if(rosb!=null && bad!=null && !bad.isEmpty()){rosb.add(bad, 0);} jpayne@68: if(rosj!=null && junctions!=null && !junctions.isEmpty()){rosj.add(junctions, 0);} jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Align part of a read to itself to look for inverted repeats. jpayne@68: */ jpayne@68: AlignmentResult align(final Read r, boolean fullPass, int passes, int readNum){ jpayne@68: if(!alignRC){return null;} jpayne@68: final byte[] bases=r.bases; jpayne@68: jpayne@68: int qlen=(int)Tools.max(minQlen, Tools.min(targetQlen, bases.length*maxQlenFraction)); jpayne@68: if(qlen>0.45f*bases.length){return null;}//Ignore short stuff jpayne@68: jpayne@68: //Perform an initial scan using the tips of the reads to look for an inverted repeat jpayne@68: boolean tipOnly=filterIceCreamOnly && fullPass; jpayne@68: // System.err.println(filterIceCreamOnly+", "+fullPass+", "+tipOnly); jpayne@68: AlignmentResult a=alignLeft(bases, qlen, minRatio1, true, tipOnly); jpayne@68: AlignmentResult b=(fullPass && false ? null : alignRight(bases, qlen, minRatio1, true, tipOnly));//A second alignment is not needed for a full pass. jpayne@68: AlignmentResult ar=(a==null ? b : b==null ? a : a.maxScore>=b.maxScore ? a : b); jpayne@68: jpayne@68: //If nothing was detected, return jpayne@68: if(ar==null){return null;} jpayne@68: ar.alignedRead=r; jpayne@68: jpayne@68: //At this point, the read contains an inverted repeat of length qlen. jpayne@68: final int expectedJunction=ar.rLen/2; jpayne@68: jpayne@68: if(ar.left){ jpayne@68: ar.junctionLoc=ar.maxRpos/2; jpayne@68: }else{ jpayne@68: int innerLeft=ar.maxRpos; jpayne@68: int innerRight=ar.rLen-ar.qLen; jpayne@68: ar.junctionLoc=(innerLeft+innerRight)/2; jpayne@68: } jpayne@68: jpayne@68: // if(fullPass){//This code doesn't seem to have any effect and it's not clear why it is present jpayne@68: // int dif=Tools.absdif(expectedJunction, ar.junctionLoc); jpayne@68: // if(dif>expectedJunction*0.1) { jpayne@68: // if(filterIceCreamOnly){return ar;} jpayne@68: // } jpayne@68: // } jpayne@68: jpayne@68: if(realign){ jpayne@68: if(ar.junctionLoc=qlen){ jpayne@68: ar=alignLeft(bases, qlen2, minRatio2, false, false); jpayne@68: } jpayne@68: }else{ jpayne@68: int qlen2=(int)((ar.rLen-ar.junctionLoc)*0.9); jpayne@68: if(qlen2>=qlen){ jpayne@68: ar=alignRight(bases, qlen2, minRatio2, false, false); jpayne@68: } jpayne@68: } jpayne@68: if(ar==null){return null;} jpayne@68: ar.alignedRead=r; jpayne@68: } jpayne@68: jpayne@68: //At this point, the read contains an inverted repeat mirrored across a junction. jpayne@68: final float junctionFraction; jpayne@68: if(ar.left){ jpayne@68: ar.junctionLoc=ar.maxRpos/2; jpayne@68: junctionFraction=ar.junctionLoc/(float)ar.rLen; jpayne@68: }else{ jpayne@68: int innerLeft=ar.maxRpos; jpayne@68: int innerRight=ar.rLen-ar.qLen; jpayne@68: ar.junctionLoc=(innerLeft+innerRight)/2; jpayne@68: junctionFraction=(ar.rLen-ar.junctionLoc)/(float)ar.rLen; jpayne@68: } jpayne@68: jpayne@68: final int dif=Tools.absdif(expectedJunction, ar.junctionLoc); jpayne@68: if(fullPass){ jpayne@68: if(junctionFractionexpectedJunction){ jpayne@68: ar.icecream=false; jpayne@68: }else{ jpayne@68: ar.ambiguous=true; jpayne@68: } jpayne@68: }else{ jpayne@68: //Last read, the junction should be closer to the end for real ice cream jpayne@68: assert(readNum==1) : readNum; jpayne@68: if(ar.junctionLoc=r.length()){return 0;} jpayne@68: jpayne@68: int suspectThresh=(int)(minSwScoreSuspect*mult); jpayne@68: int scoreThresh=(int)(minSwScore*mult); jpayne@68: jpayne@68: // final byte[] array=npad(r.bases, pad ? npad : 0); jpayne@68: final byte[] array=npad(r.bases, npad); jpayne@68: jpayne@68: // assert(array==r.bases) : npad; jpayne@68: jpayne@68: int lim=array.length-npad-stride; jpayne@68: jpayne@68: int found=0; jpayne@68: jpayne@68: int lastSuspect=-1; jpayne@68: int lastConfirmed=-1; jpayne@68: int maxScore=0; jpayne@68: jpayne@68: // final int revisedStride=(r.length()>1800 ? stride : (int)(stride*0.6f)); jpayne@68: for(int i=begin; i=suspectThresh){ jpayne@68: final int score=rvec[0]; jpayne@68: final int start=rvec[1]; jpayne@68: final int stop=rvec[2]; jpayne@68: assert(score>=suspectThresh); jpayne@68: if((i==0 || start>i) && (j==array.length-1 || stop=scoreThresh || jpayne@68: (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect0 && start>=lastConfirmed && start-lastConfirmedwindow){//Look ahead jpayne@68: rvec=ssa.fillAndScoreLimited(adapter, array, stop, stop+window, suspectThresh); jpayne@68: if(rvec!=null){ jpayne@68: if(score>=suspectMidpoint && rvec[0]>=suspectThresh && rvec[1]-stop=suspectThresh && rvec[0]>=scoreThresh && rvec[1]-stop=(scoreThresh)){kill=true;} jpayne@68: } jpayne@68: jpayne@68: if(kill){ jpayne@68: maxScore=Tools.max(maxScore, score); jpayne@68: // if(print) {System.err.println("Found adapter at distance "+Tools.min(start, array.length-stop));} jpayne@68: // System.out.println("-:"+score+", "+scoreThresh+", "+suspectThresh+"\t"+lastSuspect+", "+start+", "+stop); jpayne@68: found++; jpayne@68: for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';} jpayne@68: // assert(Shared.threads()!=1) : new String(array)+"\n"+start+", "+stop; jpayne@68: if(useLocality && score>=scoreThresh){lastConfirmed=Tools.max(lastConfirmed, stop);} jpayne@68: } jpayne@68: } jpayne@68: // System.out.println("Set lastSuspect="+stop+" on score "+score); jpayne@68: if(useLocality){lastSuspect=Tools.max(lastSuspect, stop);} jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: r.setTested(true); jpayne@68: if(found>0){ jpayne@68: r.setHasAdapter(true); jpayne@68: r.setDiscarded(true); jpayne@68: r.setAmbiguous(false); jpayne@68: jpayne@68: int idx=(int)((maxScore*200.0)/maxSwScore); jpayne@68: adapterScoresT[idx]++; jpayne@68: }else{ jpayne@68: r.setHasAdapter(false); jpayne@68: } jpayne@68: jpayne@68: return found; jpayne@68: } jpayne@68: jpayne@68: /** Align the adapter sequence to the read ends, and trim if needed. */ jpayne@68: private int trimTerminalAdapters(Read r, double mult) { jpayne@68: assert(!r.hasAdapter() && !r.tested()); jpayne@68: int begin=0; jpayne@68: while(begin=r.length()){return 0;} jpayne@68: jpayne@68: int suspectThresh=(int)(minSwScoreSuspect*mult); jpayne@68: int scoreThresh=(int)(minSwScore*mult); jpayne@68: jpayne@68: // final byte[] array=npad(r.bases, pad ? npad : 0); jpayne@68: final byte[] array=npad(r.bases, npad); jpayne@68: jpayne@68: // assert(array==r.bases) : npad; jpayne@68: jpayne@68: int lim=array.length-npad-stride; jpayne@68: jpayne@68: int found=0; jpayne@68: jpayne@68: int lastSuspect=-1; jpayne@68: int lastConfirmed=-1; jpayne@68: int maxScore=0; jpayne@68: jpayne@68: // final int revisedStride=(r.length()>1800 ? stride : (int)(stride*0.6f)); jpayne@68: for(int i=begin; i=suspectThresh){ jpayne@68: final int score=rvec[0]; jpayne@68: final int start=rvec[1]; jpayne@68: final int stop=rvec[2]; jpayne@68: assert(score>=suspectThresh); jpayne@68: if((i==0 || start>i) && (j==array.length-1 || stop=scoreThresh || jpayne@68: (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect0 && start>=lastConfirmed && start-lastConfirmedwindow){//Look ahead jpayne@68: rvec=ssa.fillAndScoreLimited(adapter, array, stop, stop+window, suspectThresh); jpayne@68: if(rvec!=null){ jpayne@68: if(score>=suspectMidpoint && rvec[0]>=suspectThresh && rvec[1]-stop=suspectThresh && rvec[0]>=scoreThresh && rvec[1]-stop=(scoreThresh)){kill=true;} jpayne@68: } jpayne@68: jpayne@68: if(kill){ jpayne@68: maxScore=Tools.max(maxScore, score); jpayne@68: // if(print) {System.err.println("Found adapter at distance "+Tools.min(start, array.length-stop));} jpayne@68: // System.out.println("-:"+score+", "+scoreThresh+", "+suspectThresh+"\t"+lastSuspect+", "+start+", "+stop); jpayne@68: found++; jpayne@68: for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';} jpayne@68: // assert(Shared.threads()!=1) : new String(array)+"\n"+start+", "+stop; jpayne@68: if(useLocality && score>=scoreThresh){lastConfirmed=Tools.max(lastConfirmed, stop);} jpayne@68: } jpayne@68: } jpayne@68: // System.out.println("Set lastSuspect="+stop+" on score "+score); jpayne@68: if(useLocality){lastSuspect=Tools.max(lastSuspect, stop);} jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: r.setTested(true); jpayne@68: if(found>0){ jpayne@68: r.setHasAdapter(true); jpayne@68: r.setDiscarded(true); jpayne@68: r.setAmbiguous(false); jpayne@68: jpayne@68: int idx=(int)((maxScore*200.0)/maxSwScore); jpayne@68: adapterScoresT[idx]++; jpayne@68: }else{ jpayne@68: r.setHasAdapter(false); jpayne@68: } jpayne@68: jpayne@68: return found; jpayne@68: } jpayne@68: jpayne@68: private byte[] npad(final byte[] array, final int pad){ jpayne@68: if(pad<=0){return array;} jpayne@68: final int len=array.length+2*pad; jpayne@68: byte[] r=new byte[len]; jpayne@68: for(int i=0; i