annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/icecream/IceCreamFinder.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 package icecream;
jpayne@68 2
jpayne@68 3 import java.io.PrintStream;
jpayne@68 4 import java.util.ArrayList;
jpayne@68 5 import java.util.Arrays;
jpayne@68 6
jpayne@68 7 import aligner.AlignmentResult;
jpayne@68 8 import aligner.FlatAligner;
jpayne@68 9 import aligner.MultiStateAligner9PacBioAdapter2;
jpayne@68 10 import aligner.SingleStateAlignerPacBioAdapter;
jpayne@68 11 import dna.AminoAcid;
jpayne@68 12 import dna.Data;
jpayne@68 13 import fileIO.ByteFile;
jpayne@68 14 import fileIO.ByteStreamWriter;
jpayne@68 15 import fileIO.FileFormat;
jpayne@68 16 import fileIO.ReadWrite;
jpayne@68 17 import json.JsonObject;
jpayne@68 18 import shared.Parse;
jpayne@68 19 import shared.Parser;
jpayne@68 20 import shared.PreParser;
jpayne@68 21 import shared.ReadStats;
jpayne@68 22 import shared.Shared;
jpayne@68 23 import shared.Timer;
jpayne@68 24 import shared.Tools;
jpayne@68 25 import shared.TrimRead;
jpayne@68 26 import stream.ConcurrentReadOutputStream;
jpayne@68 27 import stream.FASTQ;
jpayne@68 28 import stream.FastaReadInputStream;
jpayne@68 29 import stream.Read;
jpayne@68 30 import stream.SamLine;
jpayne@68 31 import structures.ByteBuilder;
jpayne@68 32 import structures.EntropyTracker;
jpayne@68 33
jpayne@68 34 /**
jpayne@68 35 * Detects inverted repeats in PacBio reads.
jpayne@68 36 * Attempts to determine whether reads are chimeric
jpayne@68 37 * due to a missing adapter.
jpayne@68 38 *
jpayne@68 39 * @author Brian Bushnell
jpayne@68 40 * @date June 5, 2019
jpayne@68 41 *
jpayne@68 42 */
jpayne@68 43 public final class IceCreamFinder {
jpayne@68 44
jpayne@68 45 /*--------------------------------------------------------------*/
jpayne@68 46 /*---------------- Initialization ----------------*/
jpayne@68 47 /*--------------------------------------------------------------*/
jpayne@68 48
jpayne@68 49 /**
jpayne@68 50 * Code entrance from the command line.
jpayne@68 51 * @param args Command line arguments
jpayne@68 52 */
jpayne@68 53 public static void main(String[] args){
jpayne@68 54 //Start a timer immediately upon code entrance.
jpayne@68 55 Timer t=new Timer();
jpayne@68 56
jpayne@68 57 //Create an instance of this class
jpayne@68 58 IceCreamFinder x=new IceCreamFinder(args);
jpayne@68 59
jpayne@68 60 //Run the object
jpayne@68 61 x.process(t);
jpayne@68 62
jpayne@68 63 //Close the print stream if it was redirected
jpayne@68 64 Shared.closeStream(x.outstream);
jpayne@68 65 }
jpayne@68 66
jpayne@68 67 /**
jpayne@68 68 * Constructor.
jpayne@68 69 * @param args Command line arguments
jpayne@68 70 */
jpayne@68 71 public IceCreamFinder(String[] args){
jpayne@68 72
jpayne@68 73 {//Preparse block for help, config files, and outstream
jpayne@68 74 PreParser pp=new PreParser(args, getClass(), false);
jpayne@68 75 args=pp.args;
jpayne@68 76 outstream=pp.outstream;
jpayne@68 77 }
jpayne@68 78
jpayne@68 79 Parser.setQuality(33);
jpayne@68 80
jpayne@68 81 //Set shared static variables prior to parsing
jpayne@68 82 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
jpayne@68 83 ReadWrite.USE_BGZIP=ReadWrite.USE_UNBGZIP=ReadWrite.PREFER_BGZIP=true;
jpayne@68 84 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
jpayne@68 85 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
jpayne@68 86 SamLine.SET_FROM_OK=true;
jpayne@68 87 Shared.setBufferData(1000000);
jpayne@68 88 // Shared.FASTA_WRAP=511;
jpayne@68 89 Data.USE_SAMBAMBA=false;//Sambamba changes PacBio headers.
jpayne@68 90 Read.CHANGE_QUALITY=false;
jpayne@68 91 EntropyTracker.defaultK=3;
jpayne@68 92
jpayne@68 93 {//Parse the arguments
jpayne@68 94 final Parser parser=parse(args);
jpayne@68 95 Parser.processQuality();
jpayne@68 96
jpayne@68 97 maxReads=parser.maxReads;
jpayne@68 98 overwrite=ReadStats.overwrite=parser.overwrite;
jpayne@68 99 append=ReadStats.append=parser.append;
jpayne@68 100
jpayne@68 101 in1=parser.in1;
jpayne@68 102 extin=parser.extin;
jpayne@68 103
jpayne@68 104 if(outg==null){outg=parser.out1;}
jpayne@68 105 extout=parser.extout;
jpayne@68 106 }
jpayne@68 107
jpayne@68 108 //Determine how many threads may be used
jpayne@68 109 threads=Shared.threads();
jpayne@68 110
jpayne@68 111 //Ensure there is an input file
jpayne@68 112 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
jpayne@68 113 fixExtensions(); //Add or remove .gz or .bz2 as needed
jpayne@68 114 checkFileExistence(); //Ensure files can be read and written
jpayne@68 115 checkStatics(); //Adjust file-related static fields as needed for this program
jpayne@68 116
jpayne@68 117 //Create output FileFormat objects
jpayne@68 118 ffoutg=FileFormat.testOutput(outg, FileFormat.FASTQ, extout, true, overwrite, append, false);
jpayne@68 119 ffouta=FileFormat.testOutput(outa, FileFormat.FASTQ, extout, true, overwrite, append, false);
jpayne@68 120 ffoutb=FileFormat.testOutput(outb, FileFormat.FASTQ, extout, true, overwrite, append, false);
jpayne@68 121 ffoutj=FileFormat.testOutput(outj, FileFormat.FASTA, extout, true, overwrite, append, false);
jpayne@68 122 ffstats=FileFormat.testOutput(outstats, FileFormat.TXT, null, false, overwrite, append, false);
jpayne@68 123 ffasrhist=FileFormat.testOutput(asrhist, FileFormat.TXT, null, false, overwrite, append, false);
jpayne@68 124 ffirsrhist=FileFormat.testOutput(irsrhist, FileFormat.TXT, null, false, overwrite, append, false);
jpayne@68 125
jpayne@68 126 if(!setAmbig && ffouta!=null){
jpayne@68 127 sendAmbigToGood=sendAmbigToBad=false;
jpayne@68 128 }
jpayne@68 129
jpayne@68 130 //Create input FileFormat objects
jpayne@68 131 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
jpayne@68 132
jpayne@68 133 final int alen=(adapter==null ? 20 : adapter.length);
jpayne@68 134 stride=(int)(alen*1.9f);
jpayne@68 135 window=(int)(alen*3.8f+10);
jpayne@68 136 ALIGN_ROWS=alen+1;
jpayne@68 137 ALIGN_COLUMNS=window+2;
jpayne@68 138
jpayne@68 139 maxSwScore=aligner.MultiStateAligner9PacBioAdapter.maxQuality(alen);
jpayne@68 140 minSwScore=(int)(maxSwScore*adapterRatio2);
jpayne@68 141 minSwScoreSuspect=(int)(maxSwScore*Tools.min(adapterRatio2*suspectRatio, adapterRatio2-((1-suspectRatio)*.2f)));
jpayne@68 142 maxImperfectSwScore=aligner.MultiStateAligner9PacBioAdapter.maxImperfectScore(alen);
jpayne@68 143 suspectMidpoint=(minSwScoreSuspect+minSwScore)/2;
jpayne@68 144 if(adapter==null){alignAdapter=false;}
jpayne@68 145 }
jpayne@68 146
jpayne@68 147 /*--------------------------------------------------------------*/
jpayne@68 148 /*---------------- Initialization Helpers ----------------*/
jpayne@68 149 /*--------------------------------------------------------------*/
jpayne@68 150
jpayne@68 151 /** Parse arguments from the command line */
jpayne@68 152 private Parser parse(String[] args){
jpayne@68 153
jpayne@68 154 //Create a parser object
jpayne@68 155 Parser parser=new Parser();
jpayne@68 156
jpayne@68 157 //Set any necessary Parser defaults here
jpayne@68 158 //parser.foo=bar;
jpayne@68 159
jpayne@68 160 //Parse each argument
jpayne@68 161 for(int i=0; i<args.length; i++){
jpayne@68 162 String arg=args[i];
jpayne@68 163
jpayne@68 164 //Break arguments into their constituent parts, in the form of "a=b"
jpayne@68 165 String[] split=arg.split("=");
jpayne@68 166 String a=split[0].toLowerCase();
jpayne@68 167 String b=split.length>1 ? split[1] : null;
jpayne@68 168 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
jpayne@68 169
jpayne@68 170 if(a.equals("verbose")){
jpayne@68 171 verbose=Parse.parseBoolean(b);
jpayne@68 172 }else if(a.equals("format")){
jpayne@68 173 if(b==null){
jpayne@68 174 assert(false) : arg;
jpayne@68 175 }else if(Tools.isDigit(b.charAt(i))){
jpayne@68 176 format=Integer.parseInt(b);
jpayne@68 177 }else if(b.equalsIgnoreCase("json")){
jpayne@68 178 format=FORMAT_JSON;
jpayne@68 179 }else if(b.equalsIgnoreCase("text")){
jpayne@68 180 format=FORMAT_TEXT;
jpayne@68 181 }else{
jpayne@68 182 assert(false) : arg;
jpayne@68 183 }
jpayne@68 184 assert(format>=1 && format<=2) : arg;
jpayne@68 185 }else if(a.equals("json")){
jpayne@68 186 boolean x=Parse.parseBoolean(b);
jpayne@68 187 format=(x ? FORMAT_JSON : FORMAT_TEXT);
jpayne@68 188 }else if(a.equals("ss") || a.equals("samstreamer") || a.equals("streamer")){
jpayne@68 189 if(b!=null && Tools.isDigit(b.charAt(0))){
jpayne@68 190 ZMWStreamer.useStreamer=true;
jpayne@68 191 assert(Integer.parseInt(b)==1) : "ZMWStreamer threads currently capped at 1.";
jpayne@68 192 // ZMWStreamer.streamerThreads=Tools.max(1, Integer.parseInt(b));
jpayne@68 193 }else{
jpayne@68 194 ZMWStreamer.useStreamer=Parse.parseBoolean(b);
jpayne@68 195 }
jpayne@68 196 }else if(a.equals("icecreamonly") || a.equals("ico")){
jpayne@68 197 filterIceCreamOnly=Parse.parseBoolean(b);
jpayne@68 198 }else if(a.equals("keepshortreads") || a.equals("ksr")){
jpayne@68 199 keepShortReads=Parse.parseBoolean(b);
jpayne@68 200 }else if(a.equalsIgnoreCase("keepzmwstogether") || a.equals("kzt") || a.equals("keepreadstogether") || a.equals("krt")){
jpayne@68 201 keepZMWsTogether=Parse.parseBoolean(b);
jpayne@68 202 }else if(a.equalsIgnoreCase("samplerate")){
jpayne@68 203 float f=Float.parseFloat(b);
jpayne@68 204 assert(false) : "TODO"; //TODO
jpayne@68 205 }else if(a.equals("modifyheader") || a.equals("modifyheaders") || a.equals("changeheader") || a.equals("changeheaders")){
jpayne@68 206 modifyHeader=Parse.parseBoolean(b);
jpayne@68 207 }else if(a.equalsIgnoreCase("ccs")){
jpayne@68 208 CCS=Parse.parseBoolean(b);
jpayne@68 209 }else if(a.equals("npad")){
jpayne@68 210 npad=Integer.parseInt(b);
jpayne@68 211 }else if(a.equals("minlength") || a.equals("minlen")){
jpayne@68 212 minLengthAfterTrimming=Integer.parseInt(b);
jpayne@68 213 }else if(a.equals("realign")){
jpayne@68 214 realign=Parse.parseBoolean(b);
jpayne@68 215 }else if(a.equals("aligninverse") || a.equals("alignrc") || a.equals("findicecream")){
jpayne@68 216 alignRC=Parse.parseBoolean(b);
jpayne@68 217 }else if(a.equals("alignadapter")){
jpayne@68 218 alignAdapter=Parse.parseBoolean(b);
jpayne@68 219 }else if(a.equals("timeless")){
jpayne@68 220 timeless=Parse.parseBoolean(b);
jpayne@68 221 }else if(a.equals("flaglongreads")){
jpayne@68 222 flagLongReads=Parse.parseBoolean(b);
jpayne@68 223 }else if(a.equals("trimreads") || a.equals("trim")){
jpayne@68 224 trimReads=Parse.parseBoolean(b);
jpayne@68 225 }else if(a.equals("adapter")){
jpayne@68 226 adapter=b==null ? null : b.getBytes();
jpayne@68 227 }else if(a.equals("targetqlen") || a.equals("qlen")){
jpayne@68 228 targetQlen=Integer.parseInt(b);
jpayne@68 229 }else if(a.equals("maxqlenfraction") || a.equals("maxfraction") || a.equals("qlenfraction")){
jpayne@68 230 maxQlenFraction=Float.parseFloat(b);
jpayne@68 231 }else if(a.equals("junctionfraction") || a.equals("shortfraction")){
jpayne@68 232 minJunctionFraction=Float.parseFloat(b);
jpayne@68 233 }else if(a.equals("minratio1") || a.equals("ratio1") || a.equals("id1")){
jpayne@68 234 minRatio1=Float.parseFloat(b);
jpayne@68 235 }else if(a.equals("minratio2") || a.equals("ratio2") || a.equals("id2")){
jpayne@68 236 minRatio2=Float.parseFloat(b);
jpayne@68 237 }else if(a.equals("minratio") || a.equals("ratio") || a.equals("id")){
jpayne@68 238 minRatio1=minRatio2=Float.parseFloat(b);
jpayne@68 239 }else if(a.equals("adapterratio") || a.equals("adapterratio1") || a.equals("ratior") || a.equals("ratior1")){
jpayne@68 240 adapterRatio=Float.parseFloat(b);
jpayne@68 241 }else if(a.equals("adapterratio2") || a.equals("ratior2")){
jpayne@68 242 adapterRatio2=Float.parseFloat(b);
jpayne@68 243 }else if(a.equals("suspectratio")){
jpayne@68 244 suspectRatio=Float.parseFloat(b);
jpayne@68 245 }else if(a.equals("minqlen")){
jpayne@68 246 minQlen=Integer.parseInt(b);
jpayne@68 247 }else if(a.equals("minscore")){
jpayne@68 248 minScore=Integer.parseInt(b);
jpayne@68 249 }else if(a.equals("parsecustom")){
jpayne@68 250 parseCustom=Parse.parseBoolean(b);
jpayne@68 251 }else if(a.equals("printtiming") || a.equals("extended")){
jpayne@68 252 printTiming=Parse.parseBoolean(b);
jpayne@68 253 }else if(a.equals("queuelen") || a.equals("qlen")){
jpayne@68 254 queuelen=Integer.parseInt(b);
jpayne@68 255 }else if(a.equals("outg") || a.equals("outgood")){
jpayne@68 256 outg=b;
jpayne@68 257 }else if(a.equals("outa") || a.equals("outambig")){
jpayne@68 258 outa=b;
jpayne@68 259 }else if(a.equals("outb") || a.equals("outbad")){
jpayne@68 260 outb=b;
jpayne@68 261 }else if(a.equals("outj") || a.equals("outjunctions") || a.equals("junctions")){
jpayne@68 262 outj=b;
jpayne@68 263 }else if(a.equals("outs") || a.equals("outstats") || a.equals("stats")){
jpayne@68 264 outstats=b;
jpayne@68 265 }else if(a.equals("asrhist") || a.equals("ahist")){
jpayne@68 266 asrhist=b;
jpayne@68 267 }else if(a.equals("irsrhist") || a.equals("irhist")){
jpayne@68 268 irsrhist=b;
jpayne@68 269 }else if(a.equals("ambig")){
jpayne@68 270 sendAmbigToGood=sendAmbigToBad=false;
jpayne@68 271 if(b!=null){
jpayne@68 272 String[] split2=b.split(",");
jpayne@68 273 for(String s2 : split2){
jpayne@68 274 if(s2.equalsIgnoreCase("good")){sendAmbigToGood=true;}
jpayne@68 275 else if(s2.equalsIgnoreCase("bad") || s2.equalsIgnoreCase("toss")){sendAmbigToBad=true;}
jpayne@68 276 else if(s2.equalsIgnoreCase("ambig")){}
jpayne@68 277 else{assert(false) : "Bad argument: '"+s2+"' in '"+arg+"'; should be good or bad";}
jpayne@68 278 }
jpayne@68 279 }
jpayne@68 280 setAmbig=true;
jpayne@68 281 }else if(a.equalsIgnoreCase("trimpolya")){//Parse standard flags in the parser
jpayne@68 282 trimPolyA=Parse.parseBoolean(b);
jpayne@68 283 }else if(PolymerTrimmer.parse(arg, a, b)){
jpayne@68 284 //do nothing
jpayne@68 285 }else if(a.equals("minentropy") || a.equals("entropy") || a.equals("entropyfilter") || a.equals("efilter")){
jpayne@68 286 if(b==null || Character.isLetter(b.charAt(0))){
jpayne@68 287 if(Parse.parseBoolean(b)){
jpayne@68 288 entropyCutoff=0.55f;
jpayne@68 289 }else{
jpayne@68 290 entropyCutoff=-1;
jpayne@68 291 }
jpayne@68 292 }else{
jpayne@68 293 entropyCutoff=Float.parseFloat(b);
jpayne@68 294 }
jpayne@68 295 }else if(a.equals("entropyblock") || a.equals("entropylength") || a.equals("entropylen") || a.equals("entlen")){
jpayne@68 296 entropyLength=Parse.parseIntKMG(b);
jpayne@68 297 }else if(a.equals("entropyfraction") || a.equals("entfraction")){
jpayne@68 298 entropyFraction=Float.parseFloat(b);
jpayne@68 299 }else if(a.equals("monomerfraction") || a.equals("maxmonomerfraction") || a.equals("mmf")){
jpayne@68 300 maxMonomerFraction=Float.parseFloat(b);
jpayne@68 301 }else if(a.equals("parse_flag_goes_here")){
jpayne@68 302 long fake_variable=Parse.parseKMG(b);
jpayne@68 303 //Set a variable here
jpayne@68 304 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
jpayne@68 305 //do nothing
jpayne@68 306 }else{
jpayne@68 307 outstream.println("Unknown parameter "+args[i]);
jpayne@68 308 assert(false) : "Unknown parameter "+args[i];
jpayne@68 309 }
jpayne@68 310 }
jpayne@68 311
jpayne@68 312 return parser;
jpayne@68 313 }
jpayne@68 314
jpayne@68 315 /** Add or remove .gz or .bz2 as needed */
jpayne@68 316 private void fixExtensions(){
jpayne@68 317 in1=Tools.fixExtension(in1);
jpayne@68 318 }
jpayne@68 319
jpayne@68 320 /** Ensure files can be read and written */
jpayne@68 321 private void checkFileExistence(){
jpayne@68 322
jpayne@68 323 //Ensure output files can be written
jpayne@68 324 if(!Tools.testOutputFiles(overwrite, append, false, outg, outa, outb, outj, outstats, asrhist, irsrhist)){
jpayne@68 325 outstream.println((outg==null)+", "+(outb==null)+", "+outg+", "+outa+", "+outb+", "+outj+", "+outstats);
jpayne@68 326 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+outg+", "+outa+", "+outb+", "+outj+"\n");
jpayne@68 327 }
jpayne@68 328
jpayne@68 329 //Ensure input files can be read
jpayne@68 330 if(!Tools.testInputFiles(false, true, in1)){
jpayne@68 331 throw new RuntimeException("\nCan't read some input files.\n");
jpayne@68 332 }
jpayne@68 333
jpayne@68 334 //Ensure that no file was specified multiple times
jpayne@68 335 if(!Tools.testForDuplicateFiles(true, in1, outg, outa, outb, outj, outstats, asrhist, irsrhist)){
jpayne@68 336 throw new RuntimeException("\nSome file names were specified multiple times.\n");
jpayne@68 337 }
jpayne@68 338 }
jpayne@68 339
jpayne@68 340 /** Adjust file-related static fields as needed for this program */
jpayne@68 341 private static void checkStatics(){
jpayne@68 342 //Adjust the number of threads for input file reading
jpayne@68 343 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
jpayne@68 344 ByteFile.FORCE_MODE_BF2=true;
jpayne@68 345 }
jpayne@68 346
jpayne@68 347 assert(FastaReadInputStream.settingsOK());
jpayne@68 348 }
jpayne@68 349
jpayne@68 350 /*--------------------------------------------------------------*/
jpayne@68 351 /*---------------- Outer Methods ----------------*/
jpayne@68 352 /*--------------------------------------------------------------*/
jpayne@68 353
jpayne@68 354 /** Create read streams and process all data */
jpayne@68 355 void process(Timer t){
jpayne@68 356
jpayne@68 357 //Turn off read validation in the input threads to increase speed
jpayne@68 358 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
jpayne@68 359 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
jpayne@68 360
jpayne@68 361 //Create a read input stream
jpayne@68 362 ZMWStreamer zstream=new ZMWStreamer(ffin1, Shared.threads(), maxReads, -1);
jpayne@68 363
jpayne@68 364 //Optionally create read output streams
jpayne@68 365 final ConcurrentReadOutputStream rosg=makeCros(ffoutg);
jpayne@68 366 final ConcurrentReadOutputStream rosa=makeCros(ffouta);
jpayne@68 367 final ConcurrentReadOutputStream rosb=makeCros(ffoutb);
jpayne@68 368 final ConcurrentReadOutputStream rosj=makeCros(ffoutj);
jpayne@68 369
jpayne@68 370 //Reset counters
jpayne@68 371 readsProcessed=readsOut=0;
jpayne@68 372 basesProcessed=basesOut=0;
jpayne@68 373 junctionsOut=0;
jpayne@68 374
jpayne@68 375 //Process the reads in separate threads
jpayne@68 376 spawnThreads(zstream, rosg, rosa, rosb, rosj);
jpayne@68 377
jpayne@68 378 if(verbose){outstream.println("Finished; closing streams.");}
jpayne@68 379
jpayne@68 380 //Write anything that was accumulated by ReadStats
jpayne@68 381 errorState|=ReadStats.writeAll();
jpayne@68 382 //Close the read streams
jpayne@68 383 errorState|=ReadWrite.closeStreams(null, rosg, rosa, rosb, rosj);
jpayne@68 384
jpayne@68 385 //Reset read validation
jpayne@68 386 Read.VALIDATE_IN_CONSTRUCTOR=vic;
jpayne@68 387
jpayne@68 388 writeScoreRatioHistogram(ffasrhist, adapterScores);
jpayne@68 389 writeScoreRatioHistogram(ffirsrhist, repeatScores);
jpayne@68 390
jpayne@68 391 //Report timing and results
jpayne@68 392 t.stop();
jpayne@68 393
jpayne@68 394 String stats=null;
jpayne@68 395 if(format==FORMAT_TEXT){
jpayne@68 396 ByteBuilder bb=toText(t);
jpayne@68 397 stats=bb.nl().toString();
jpayne@68 398 }else if(format==FORMAT_JSON){
jpayne@68 399 JsonObject jo=toJson(t);
jpayne@68 400 stats=jo.toStringln();
jpayne@68 401 }else{
jpayne@68 402 assert(false) : "Bad format: "+format;
jpayne@68 403 }
jpayne@68 404 if(ffstats==null){
jpayne@68 405 outstream.print(stats);
jpayne@68 406 }else{
jpayne@68 407 ReadWrite.writeString(stats, outstats);
jpayne@68 408 }
jpayne@68 409
jpayne@68 410 //Throw an exception of there was an error in a thread
jpayne@68 411 if(errorState){
jpayne@68 412 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
jpayne@68 413 }
jpayne@68 414 }
jpayne@68 415
jpayne@68 416 private ByteBuilder toText(Timer t){
jpayne@68 417 ByteBuilder bb=new ByteBuilder();
jpayne@68 418
jpayne@68 419 bb.appendln(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
jpayne@68 420 bb.appendln(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
jpayne@68 421
jpayne@68 422 long readsFiltered=readsProcessed-readsOut;
jpayne@68 423 bb.appendln(Tools.numberPercent("Reads Filtered:", readsFiltered, readsFiltered*100.0/(readsProcessed), 3, 8));
jpayne@68 424 if(trimReads || trimPolyA){
jpayne@68 425 bb.appendln(Tools.numberPercent("Reads Trimmed:", readsTrimmed, readsTrimmed*100.0/(readsProcessed), 3, 8));
jpayne@68 426 bb.appendln(Tools.numberPercent("Bases Trimmed:", basesTrimmed, basesTrimmed*100.0/(basesProcessed), 3, 8));
jpayne@68 427 }
jpayne@68 428 bb.appendln(Tools.number("Total ZMWs:", ZMWs, 8));
jpayne@68 429 bb.appendln(Tools.numberPercent("Bad ZMWs:", iceCreamZMWs, iceCreamZMWs*100.0/(ZMWs), 3, 8));
jpayne@68 430 bb.appendln(Tools.numberPercent("Absent Adapter:", missingAdapterZMWs, missingAdapterZMWs*100.0/(ZMWs), 3, 8));
jpayne@68 431 bb.appendln(Tools.numberPercent("Hidden Adapter:", hiddenAdapterZMWs, hiddenAdapterZMWs*100.0/(ZMWs), 3, 8));
jpayne@68 432 bb.appendln(Tools.numberPercent("Ambiguous IR:", ambiguousZMWs, ambiguousZMWs*100.0/(ZMWs), 3, 8));
jpayne@68 433 // bb.appendln(Tools.numberPercent("Low Entropy:", lowEntropyReads, lowEntropyReads*100.0/(readsProcessed), 3, 8));
jpayne@68 434 bb.appendln(Tools.numberPercent("Low Entropy:", lowEntropyZMWs, lowEntropyZMWs*100.0/(ZMWs), 3, 8));
jpayne@68 435
jpayne@68 436 bb.appendln(Tools.number("Avg Score Ratio:", iceCreamRatio/ratiosCounted, 3, 8));
jpayne@68 437 bb.appendln(Tools.number("Score Cutoff:", minRatio2, 3, 8));
jpayne@68 438
jpayne@68 439 if(printTiming){
jpayne@68 440 bb.appendln("Iterations: "+alignmentIters/1000000+"m");
jpayne@68 441 bb.appendln("Short Iterations: "+alignmentItersShort/1000000+"m");
jpayne@68 442 bb.appendln("Elapsed: "+elapsed/1000000+"ms");
jpayne@68 443 bb.appendln("Elapsed Short: "+elapsedShort/1000000+"ms");
jpayne@68 444 }
jpayne@68 445
jpayne@68 446 if(parseCustom){
jpayne@68 447 {
jpayne@68 448 bb.appendln("\nReads:");
jpayne@68 449 bb.appendln(Tools.numberPercent("True Positive:", truePositiveReads, truePositiveReads*100.0/(readsProcessed), 2, 8));
jpayne@68 450 bb.appendln(Tools.numberPercent("True Negative:", trueNegativeReads, trueNegativeReads*100.0/(readsProcessed), 2, 8));
jpayne@68 451 bb.appendln(Tools.numberPercent("False Positive:", falsePositiveReads, falsePositiveReads*100.0/(readsProcessed), 2, 8));
jpayne@68 452 bb.appendln(Tools.numberPercent("False Negative:", falseNegativeReads, falseNegativeReads*100.0/(readsProcessed), 2, 8));
jpayne@68 453 bb.appendln(Tools.numberPercent("Ambiguous:", ambiguousReads, ambiguousReads*100.0/(readsProcessed), 2, 8));
jpayne@68 454
jpayne@68 455 double snr=(truePositiveReads+trueNegativeReads+ambiguousReads)/Tools.max(1, falsePositiveReads+falseNegativeReads+ambiguousReads);
jpayne@68 456 snr=10*Math.log10(snr);
jpayne@68 457 bb.appendln(Tools.number("SNR:", snr, 2, 8));
jpayne@68 458 }
jpayne@68 459 {
jpayne@68 460 bb.appendln("\nZMWs:");
jpayne@68 461 bb.appendln(Tools.numberPercent("True Positive:", truePositiveZMWs, truePositiveZMWs*100.0/(ZMWs), 2, 8));
jpayne@68 462 bb.appendln(Tools.numberPercent("True Negative:", trueNegativeZMWs, trueNegativeZMWs*100.0/(ZMWs), 2, 8));
jpayne@68 463 bb.appendln(Tools.numberPercent("False Positive:", falsePositiveZMWs, falsePositiveZMWs*100.0/(ZMWs), 2, 8));
jpayne@68 464 bb.appendln(Tools.numberPercent("False Negative:", falseNegativeZMWs, falseNegativeZMWs*100.0/(ZMWs), 2, 8));
jpayne@68 465 bb.appendln(Tools.numberPercent("Ambiguous:", ambiguousZMWs, ambiguousZMWs*100.0/(ZMWs), 2, 8));
jpayne@68 466
jpayne@68 467 double snr=(truePositiveZMWs+trueNegativeZMWs+ambiguousReads)/Tools.max(1, falsePositiveZMWs+falseNegativeZMWs+ambiguousZMWs);
jpayne@68 468 snr=10*Math.log10(snr);
jpayne@68 469 bb.appendln(Tools.number("SNR:", snr, 2, 8));
jpayne@68 470 }
jpayne@68 471 }
jpayne@68 472 return bb;
jpayne@68 473 }
jpayne@68 474
jpayne@68 475 private JsonObject toJson(Timer t){
jpayne@68 476 JsonObject jo=new JsonObject();
jpayne@68 477 long readsFiltered=readsProcessed-readsOut;
jpayne@68 478
jpayne@68 479 jo.add("Time", t.timeInSeconds());
jpayne@68 480 jo.add("Reads_Processed", readsProcessed);
jpayne@68 481 jo.add("Bases_Processed", basesProcessed);
jpayne@68 482 jo.add("Reads_Out", readsOut);
jpayne@68 483 jo.add("Bases_Out", basesOut);
jpayne@68 484 jo.add("Reads_Filtered", readsFiltered);
jpayne@68 485 jo.add("Reads_Filtered_Pct", readsFiltered*100.0/(readsProcessed));
jpayne@68 486 if(trimReads){
jpayne@68 487 jo.add("Reads_Trimmed", readsTrimmed);
jpayne@68 488 jo.add("Reads_Trimmed_Pct", readsTrimmed*100.0/(readsProcessed));
jpayne@68 489 jo.add("Bases_Trimmed", basesTrimmed);
jpayne@68 490 jo.add("Bases_Trimmed_Pct", basesTrimmed*100.0/(basesProcessed));
jpayne@68 491 }
jpayne@68 492 jo.add("Total_ZMWs", ZMWs);
jpayne@68 493 jo.add("Bad_ZMWs", iceCreamZMWs);
jpayne@68 494 jo.add("Bad_ZMWs_Pct", iceCreamZMWs*100.0/(ZMWs));
jpayne@68 495 jo.add("Absent_Adapter", missingAdapterZMWs);
jpayne@68 496 jo.add("Absent_Adapter_Pct", missingAdapterZMWs*100.0/(ZMWs));
jpayne@68 497 jo.add("Hidden_Adapter", hiddenAdapterZMWs);
jpayne@68 498 jo.add("Hidden_Adapter_Pct", hiddenAdapterZMWs*100.0/(ZMWs));
jpayne@68 499 // jo.add("Low_Entropy", lowEntropyReads);
jpayne@68 500 // jo.add("Low_Entropy_Pct", lowEntropyReads*100.0/(readsProcessed));
jpayne@68 501 jo.add("Low_Entropy", lowEntropyZMWs);
jpayne@68 502 jo.add("Low_Entropy_Pct", lowEntropyZMWs*100.0/(ZMWs));
jpayne@68 503 jo.add("Ambiguous_Inverted_Repeat", ambiguousZMWs);
jpayne@68 504 jo.add("Ambiguous_Inverted_Repeat_Pct", ambiguousZMWs*100.0/(ZMWs));
jpayne@68 505 jo.add("Avg_Score_Ratio_IR", iceCreamRatio/ratiosCounted);
jpayne@68 506 jo.add("Score_Cutoff_IR", minRatio2);
jpayne@68 507
jpayne@68 508 jo.add("Alignment_Iterations_IR", alignmentIters);
jpayne@68 509 jo.add("Short_Alignment_Iterations_IR", alignmentItersShort);
jpayne@68 510
jpayne@68 511 if(parseCustom){
jpayne@68 512 {
jpayne@68 513 double snr=(truePositiveReads+trueNegativeReads+ambiguousReads)/Tools.max(1, falsePositiveReads+falseNegativeReads+ambiguousReads);
jpayne@68 514 snr=10*Math.log10(snr);
jpayne@68 515 jo.add("TP_Reads", truePositiveReads);
jpayne@68 516 jo.add("TN_Reads", trueNegativeReads);
jpayne@68 517 jo.add("FP_Reads", falsePositiveReads);
jpayne@68 518 jo.add("FN_Reads", falseNegativeReads);
jpayne@68 519 jo.add("AM_Reads", ambiguousReads);
jpayne@68 520
jpayne@68 521 jo.add("TP_Reads_Pct", truePositiveReads*100.0/(readsProcessed));
jpayne@68 522 jo.add("TN_Reads_Pct", trueNegativeReads*100.0/(readsProcessed));
jpayne@68 523 jo.add("FP_Reads_Pct", falsePositiveReads*100.0/(readsProcessed));
jpayne@68 524 jo.add("FN_Reads_Pct", falseNegativeReads*100.0/(readsProcessed));
jpayne@68 525 jo.add("AM_Reads_Pct", ambiguousReads*100.0/(readsProcessed));
jpayne@68 526
jpayne@68 527 jo.add("SNR_Reads", snr);
jpayne@68 528 }
jpayne@68 529 {
jpayne@68 530 double snr=(truePositiveZMWs+trueNegativeZMWs+ambiguousZMWs)/Tools.max(1, falsePositiveZMWs+falseNegativeZMWs+ambiguousZMWs);
jpayne@68 531 snr=10*Math.log10(snr);
jpayne@68 532 jo.add("TP_ZMWs", truePositiveZMWs);
jpayne@68 533 jo.add("TN_ZMWs", trueNegativeZMWs);
jpayne@68 534 jo.add("FP_ZMWs", falsePositiveZMWs);
jpayne@68 535 jo.add("FN_ZMWs", falseNegativeZMWs);
jpayne@68 536 jo.add("AM_ZMWs", ambiguousZMWs);
jpayne@68 537
jpayne@68 538 jo.add("TP_ZMWs_Pct", truePositiveZMWs*100.0/(ZMWs));
jpayne@68 539 jo.add("TN_ZMWs_Pct", trueNegativeZMWs*100.0/(ZMWs));
jpayne@68 540 jo.add("FP_ZMWs_Pct", falsePositiveZMWs*100.0/(ZMWs));
jpayne@68 541 jo.add("FN_ZMWs_Pct", falseNegativeZMWs*100.0/(ZMWs));
jpayne@68 542 jo.add("AM_ZMWs_Pct", ambiguousZMWs*100.0/(ZMWs));
jpayne@68 543
jpayne@68 544 jo.add("SNR_ZMWs", snr);
jpayne@68 545 }
jpayne@68 546 }
jpayne@68 547 return jo;
jpayne@68 548 }
jpayne@68 549
jpayne@68 550 private static void writeScoreRatioHistogram(FileFormat ff, long[] hist){
jpayne@68 551 if(ff==null){return;}
jpayne@68 552 final ByteStreamWriter bsw=new ByteStreamWriter(ff);
jpayne@68 553 bsw.start();
jpayne@68 554 final float mult=1.0f/(hist.length-1);
jpayne@68 555
jpayne@68 556 bsw.print("#Counted\t").println(Tools.sum(hist));
jpayne@68 557 bsw.print("#Mean\t").println(Tools.averageHistogram(hist)*mult, 3);
jpayne@68 558 bsw.print("#Median\t").println(Tools.medianHistogram(hist)*mult, 3);
jpayne@68 559 bsw.print("#Mode\t").println(Tools.calcModeHistogram(hist)*mult, 3);
jpayne@68 560 bsw.print("#STDev\t").println(Tools.standardDeviationHistogram(hist)*mult, 3);
jpayne@68 561 bsw.print("#Value\tOccurances\n");
jpayne@68 562
jpayne@68 563 for(int i=0; i<hist.length; i++){
jpayne@68 564 bsw.print(i*mult, 3).tab().println(hist[i]);
jpayne@68 565 }
jpayne@68 566 bsw.poisonAndWait();
jpayne@68 567 }
jpayne@68 568
jpayne@68 569 private ConcurrentReadOutputStream makeCros(FileFormat ff){
jpayne@68 570 if(ff==null){return null;}
jpayne@68 571
jpayne@68 572 //Select output buffer size based on whether it needs to be ordered
jpayne@68 573 final int buff=16;
jpayne@68 574
jpayne@68 575 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ff, null, buff, null, ff.samOrBam() && ffin1.samOrBam());
jpayne@68 576 ros.start(); //Start the stream
jpayne@68 577 return ros;
jpayne@68 578 }
jpayne@68 579
jpayne@68 580 /** Spawn process threads */
jpayne@68 581 private void spawnThreads(final ZMWStreamer zstream,
jpayne@68 582 final ConcurrentReadOutputStream rosg, final ConcurrentReadOutputStream rosa,
jpayne@68 583 final ConcurrentReadOutputStream rosb, final ConcurrentReadOutputStream rosj){
jpayne@68 584
jpayne@68 585 //Do anything necessary prior to processing
jpayne@68 586
jpayne@68 587 //Fill a list with ProcessThreads
jpayne@68 588 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
jpayne@68 589 for(int i=0; i<threads; i++){
jpayne@68 590 alpt.add(new ProcessThread(zstream, rosg, rosa, rosb, rosj, i));
jpayne@68 591 }
jpayne@68 592
jpayne@68 593 //Start the threads
jpayne@68 594 for(ProcessThread pt : alpt){
jpayne@68 595 pt.start();
jpayne@68 596 }
jpayne@68 597
jpayne@68 598 zstream.runStreamer(false);
jpayne@68 599
jpayne@68 600 //Wait for threads to finish
jpayne@68 601 waitForThreads(alpt);
jpayne@68 602
jpayne@68 603 //Do anything necessary after processing
jpayne@68 604 ZMWs=zstream.ZMWs;
jpayne@68 605 }
jpayne@68 606
jpayne@68 607 private void waitForThreads(ArrayList<ProcessThread> alpt){
jpayne@68 608
jpayne@68 609 //Wait for completion of all threads
jpayne@68 610 boolean success=true;
jpayne@68 611 for(ProcessThread pt : alpt){
jpayne@68 612
jpayne@68 613 //Wait until this thread has terminated
jpayne@68 614 while(pt.getState()!=Thread.State.TERMINATED){
jpayne@68 615 try {
jpayne@68 616 //Attempt a join operation
jpayne@68 617 pt.join();
jpayne@68 618 } catch (InterruptedException e) {
jpayne@68 619 //Potentially handle this, if it is expected to occur
jpayne@68 620 e.printStackTrace();
jpayne@68 621 }
jpayne@68 622 }
jpayne@68 623
jpayne@68 624 //Accumulate per-thread statistics
jpayne@68 625 readsProcessed+=pt.readsProcessedT;
jpayne@68 626 basesProcessed+=pt.basesProcessedT;
jpayne@68 627
jpayne@68 628 truePositiveReads+=pt.truePositiveReadsT;
jpayne@68 629 trueNegativeReads+=pt.trueNegativeReadsT;
jpayne@68 630 falsePositiveReads+=pt.falsePositiveReadsT;
jpayne@68 631 falseNegativeReads+=pt.falseNegativeReadsT;
jpayne@68 632 ambiguousReads+=pt.ambiguousReadsT;
jpayne@68 633
jpayne@68 634 truePositiveZMWs+=pt.truePositiveZMWsT;
jpayne@68 635 trueNegativeZMWs+=pt.trueNegativeZMWsT;
jpayne@68 636 falsePositiveZMWs+=pt.falsePositiveZMWsT;
jpayne@68 637 falseNegativeZMWs+=pt.falseNegativeZMWsT;
jpayne@68 638 ambiguousZMWs+=pt.ambiguousZMWsT;
jpayne@68 639
jpayne@68 640 readsOut+=pt.readsOutT;
jpayne@68 641 basesOut+=pt.basesOutT;
jpayne@68 642 junctionsOut+=pt.junctionsOutT;
jpayne@68 643
jpayne@68 644 alignmentIters+=pt.ica.iters();
jpayne@68 645 alignmentItersShort+=pt.ica.itersShort();
jpayne@68 646 elapsed+=pt.elapsedT;
jpayne@68 647 elapsedShort+=pt.elapsedShortT;
jpayne@68 648
jpayne@68 649 iceCreamReads+=pt.iceCreamReadsT;
jpayne@68 650 iceCreamBases+=pt.iceCreamBasesT;
jpayne@68 651 iceCreamZMWs+=pt.iceCreamZMWsT;
jpayne@68 652 iceCreamRatio+=pt.iceCreamRatioT;
jpayne@68 653 ratiosCounted+=pt.ratiosCountedT;
jpayne@68 654 missingAdapterZMWs+=pt.missingAdapterZMWsT;
jpayne@68 655 hiddenAdapterZMWs+=pt.hiddenAdapterZMWsT;
jpayne@68 656 lowEntropyZMWs+=pt.lowEntropyZMWsT;
jpayne@68 657 lowEntropyReads+=pt.lowEntropyReadsT;
jpayne@68 658
jpayne@68 659 basesTrimmed+=pt.basesTrimmedT;
jpayne@68 660 readsTrimmed+=pt.readsTrimmedT;
jpayne@68 661
jpayne@68 662 for(int i=0; i<adapterScores.length; i++){
jpayne@68 663 adapterScores[i]+=pt.adapterScoresT[i];
jpayne@68 664 repeatScores[i]+=pt.repeatScoresT[i];
jpayne@68 665 }
jpayne@68 666
jpayne@68 667 success&=pt.success;
jpayne@68 668 }
jpayne@68 669
jpayne@68 670 //Track whether any threads failed
jpayne@68 671 if(!success){errorState=true;}
jpayne@68 672 }
jpayne@68 673
jpayne@68 674 /*--------------------------------------------------------------*/
jpayne@68 675 /*---------------- Inner Methods ----------------*/
jpayne@68 676 /*--------------------------------------------------------------*/
jpayne@68 677
jpayne@68 678 /*--------------------------------------------------------------*/
jpayne@68 679 /*---------------- Inner Classes ----------------*/
jpayne@68 680 /*--------------------------------------------------------------*/
jpayne@68 681
jpayne@68 682 /** Processes reads. */
jpayne@68 683 private class ProcessThread extends Thread {
jpayne@68 684
jpayne@68 685 //Constructor
jpayne@68 686 ProcessThread(final ZMWStreamer zstream_,
jpayne@68 687 final ConcurrentReadOutputStream ros_, final ConcurrentReadOutputStream rosa_,
jpayne@68 688 final ConcurrentReadOutputStream rosb_, final ConcurrentReadOutputStream rosj_, final int tid_){
jpayne@68 689 zstream=zstream_;
jpayne@68 690 rosg=ros_;
jpayne@68 691 rosa=rosa_;
jpayne@68 692 rosb=rosb_;
jpayne@68 693 rosj=rosj_;
jpayne@68 694 tid=tid_;
jpayne@68 695
jpayne@68 696 Arrays.fill(tipBufferLeft, (byte)'N');
jpayne@68 697 Arrays.fill(tipBufferRight, (byte)'N');
jpayne@68 698
jpayne@68 699 if(entropyCutoff>=0){
jpayne@68 700 eTracker=new EntropyTracker(false, entropyCutoff, true);
jpayne@68 701 }else{
jpayne@68 702 eTracker=null;
jpayne@68 703 }
jpayne@68 704 }
jpayne@68 705
jpayne@68 706 //Called by start()
jpayne@68 707 @Override
jpayne@68 708 public void run(){
jpayne@68 709 //Do anything necessary prior to processing
jpayne@68 710
jpayne@68 711 //Process the reads
jpayne@68 712 processInner();
jpayne@68 713
jpayne@68 714 //Do anything necessary after processing
jpayne@68 715
jpayne@68 716 //Indicate successful exit status
jpayne@68 717 success=true;
jpayne@68 718 }
jpayne@68 719
jpayne@68 720 /** Iterate through the reads */
jpayne@68 721 void processInner(){
jpayne@68 722
jpayne@68 723 //As long as there is a nonempty read list...
jpayne@68 724 for(ZMW reads=zstream.nextZMW(); reads!=null; reads=zstream.nextZMW()){
jpayne@68 725 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
jpayne@68 726
jpayne@68 727 processList(reads);
jpayne@68 728 }
jpayne@68 729 }
jpayne@68 730
jpayne@68 731 int flagLowEntropyReads(final ZMW reads, final float minEnt,
jpayne@68 732 final int minLen0, final float minFract){
jpayne@68 733 int found=0;
jpayne@68 734 for(Read r : reads){
jpayne@68 735 if(!r.discarded()){
jpayne@68 736 int minLen=Tools.min((int)(r.length()*minFract), minLen0);
jpayne@68 737 int maxBlock=eTracker.longestLowEntropyBlock(r.bases, false, maxMonomerFraction);
jpayne@68 738 if(maxBlock>=minLen){
jpayne@68 739 r.setDiscarded(true);
jpayne@68 740 r.setJunk(true);
jpayne@68 741 found++;
jpayne@68 742 // System.err.println(r.toFasta());
jpayne@68 743 }
jpayne@68 744 }
jpayne@68 745 }
jpayne@68 746 return found;
jpayne@68 747 }
jpayne@68 748
jpayne@68 749 int flagLongReads(final ZMW reads, int median){
jpayne@68 750 int found=0;
jpayne@68 751 for(Read r : reads){
jpayne@68 752 if(r.length()>longReadMult*median){
jpayne@68 753 r.setDiscarded(true);
jpayne@68 754 r.setHasAdapter(true);
jpayne@68 755 found++;
jpayne@68 756 }
jpayne@68 757 }
jpayne@68 758 return found;
jpayne@68 759 }
jpayne@68 760
jpayne@68 761 /** Each list is presumed to be all reads from a ZMW, in order */
jpayne@68 762 void processList(final ZMW reads){
jpayne@68 763 long numBases=0;
jpayne@68 764
jpayne@68 765 //Loop through each read in the list for statistics
jpayne@68 766 for(int idx=0; idx<reads.size(); idx++){
jpayne@68 767 final Read r1=reads.get(idx);
jpayne@68 768
jpayne@68 769 //Validate reads in worker threads
jpayne@68 770 if(!r1.validated()){r1.validate(true);}
jpayne@68 771
jpayne@68 772 //Track the initial length for statistics
jpayne@68 773 final int initialLength1=r1.length();
jpayne@68 774
jpayne@68 775 //Increment counters
jpayne@68 776 readsProcessedT++;
jpayne@68 777 basesProcessedT+=initialLength1;
jpayne@68 778 numBases+=initialLength1;
jpayne@68 779 }
jpayne@68 780 final boolean fullPass=CCS || reads.size()>=3;
jpayne@68 781
jpayne@68 782 if(trimReads || trimPolyA){
jpayne@68 783 int removed=0;
jpayne@68 784 for(int i=0; i<reads.size(); i++){
jpayne@68 785 Read r=reads.get(i);
jpayne@68 786 byte a=r.bases[0], b=r.bases[r.length()-1];
jpayne@68 787 int trimmed=0;
jpayne@68 788 if(!AminoAcid.isFullyDefined(a) || !AminoAcid.isFullyDefined(b)){
jpayne@68 789 trimmed+=trimRead(r, 80);
jpayne@68 790 }
jpayne@68 791
jpayne@68 792 if(trimReads && adapter!=null){
jpayne@68 793 int leftAdapterBases=alignLeftTipAdapter(r);
jpayne@68 794 int rightAdapterBases=alignRightTipAdapter(r);
jpayne@68 795 if(leftAdapterBases+rightAdapterBases>0){
jpayne@68 796 trimmed+=trimRead(r, adapterTipLen);
jpayne@68 797 r.setTrimmed(true);
jpayne@68 798 }
jpayne@68 799 }
jpayne@68 800 if(trimPolyA){
jpayne@68 801 int x=trimPolyA(r);
jpayne@68 802 trimmed+=x;
jpayne@68 803 if(x>0){r.setTrimmed(true);}
jpayne@68 804 }
jpayne@68 805
jpayne@68 806 if(trimmed>0){
jpayne@68 807 basesTrimmedT+=trimmed;
jpayne@68 808 readsTrimmedT++;
jpayne@68 809 }
jpayne@68 810
jpayne@68 811 //TODO: Note again, removing intermediate reads messes up forward-rev ordering
jpayne@68 812 if(r.length()<minLengthAfterTrimming){//Discard short trash
jpayne@68 813 reads.set(i, null);
jpayne@68 814 removed++;
jpayne@68 815 }
jpayne@68 816 }
jpayne@68 817 if(removed>0){
jpayne@68 818 Tools.condenseStrict(reads);
jpayne@68 819 }
jpayne@68 820 }
jpayne@68 821
jpayne@68 822 if(entropyCutoff>0){
jpayne@68 823 int bad=flagLowEntropyReads(reads, entropyCutoff, entropyLength, entropyFraction);
jpayne@68 824 if(bad>0){
jpayne@68 825 lowEntropyZMWsT++;
jpayne@68 826 lowEntropyReadsT+=bad;
jpayne@68 827 if(bad>=reads.size()){
jpayne@68 828 if(!reads.isEmpty()){outputReads(reads, null);}
jpayne@68 829 return; //No point in continuing...
jpayne@68 830 }
jpayne@68 831 }
jpayne@68 832 }
jpayne@68 833
jpayne@68 834 if(reads.isEmpty()){return;}
jpayne@68 835
jpayne@68 836 Read sample=null;//Read that will be searched for inverted repeat, typically median length
jpayne@68 837 Read shortest=null;//shortest read in the middle, or overall if no middle reads
jpayne@68 838 final int medianLength=reads.medianLength(true);
jpayne@68 839 boolean foundInvertedRepeat=false;
jpayne@68 840 int longReads=0;
jpayne@68 841 int adapterReads=0;
jpayne@68 842 int maxAdapters=0;
jpayne@68 843 int sampleNum=0;
jpayne@68 844
jpayne@68 845 if(reads.size()>=3){
jpayne@68 846 if(flagLongReads){longReads=flagLongReads(reads, medianLength);}
jpayne@68 847 for(int i=1; i<reads.size()-1; i++){
jpayne@68 848 Read r=reads.get(i);
jpayne@68 849 if(sample==null && r.length()==medianLength){
jpayne@68 850 sample=r;
jpayne@68 851 sampleNum=i;
jpayne@68 852 }
jpayne@68 853 if(shortest==null || r.length()<shortest.length()){shortest=r;}
jpayne@68 854 }
jpayne@68 855 }else{
jpayne@68 856 for(int i=0; i<reads.size(); i++){
jpayne@68 857 Read r=reads.get(i);
jpayne@68 858 if(sample==null || sample.length()<r.length()){
jpayne@68 859 sample=r;
jpayne@68 860 sampleNum=i;
jpayne@68 861 }
jpayne@68 862 if(shortest==null || r.length()<shortest.length()){shortest=r;}
jpayne@68 863 }
jpayne@68 864 }
jpayne@68 865 assert(sample!=null);
jpayne@68 866 final AlignmentResult ar=align(sample, fullPass, reads.size(), sampleNum);
jpayne@68 867
jpayne@68 868 if(ar!=null){
jpayne@68 869 foundInvertedRepeat=true;
jpayne@68 870 sample.setInvertedRepeat(true);
jpayne@68 871 if(ar.icecream || !filterIceCreamOnly){
jpayne@68 872 sample.setDiscarded(true);
jpayne@68 873 }else if(ar.ambiguous){
jpayne@68 874 sample.setAmbiguous(true);
jpayne@68 875 }
jpayne@68 876 }
jpayne@68 877
jpayne@68 878 if(alignAdapter){
jpayne@68 879 double mult=foundInvertedRepeat ? 0.9 : 1.0;
jpayne@68 880 if(needsAdapterTest(sample)){
jpayne@68 881 int x=lookForAdapter(sample, mult);
jpayne@68 882 adapterReads+=(x>0 ? 1 : 0);
jpayne@68 883 maxAdapters=Tools.max(x, maxAdapters);
jpayne@68 884 }
jpayne@68 885
jpayne@68 886 if(reads.size()>2){
jpayne@68 887 Read a=reads.get(0), b=reads.get(reads.size()-1);
jpayne@68 888 Read r=a.length()>b.length() ? a : b;
jpayne@68 889 if(needsAdapterTest(r)){
jpayne@68 890 int x=lookForAdapter(r, mult);
jpayne@68 891 adapterReads+=(x>0 ? 1 : 0);
jpayne@68 892 maxAdapters=Tools.max(x, maxAdapters);
jpayne@68 893 }
jpayne@68 894 }
jpayne@68 895
jpayne@68 896 for(Read r : reads){
jpayne@68 897 if((r.length()>=shortReadMult*shortest.length() || adapterReads>0 || longReads>0 || foundInvertedRepeat)
jpayne@68 898 && needsAdapterTest(r)){
jpayne@68 899 int x=lookForAdapter(r, mult);
jpayne@68 900 adapterReads+=(x>0 ? 1 : 0);
jpayne@68 901 maxAdapters=Tools.max(x, maxAdapters);
jpayne@68 902 }
jpayne@68 903 }
jpayne@68 904 }
jpayne@68 905
jpayne@68 906 if(ar!=null && ar.icecream){
jpayne@68 907 iceCreamRatioT+=ar.ratio;
jpayne@68 908 ratiosCountedT++;
jpayne@68 909 int idx=(int)(ar.ratio*200);
jpayne@68 910 repeatScoresT[idx]++;
jpayne@68 911 if(longReads+adapterReads==0){missingAdapterZMWsT++;}
jpayne@68 912 }
jpayne@68 913 if(longReads+adapterReads>0){
jpayne@68 914 hiddenAdapterZMWsT++;
jpayne@68 915 }
jpayne@68 916
jpayne@68 917 if(keepShortReads && maxAdapters<2){
jpayne@68 918 if(foundInvertedRepeat && !sample.hasAdapter()){
jpayne@68 919 if(reads.size()>2){
jpayne@68 920 for(int i=1; i<reads.size()-1; i++){
jpayne@68 921 reads.get(i).setDiscarded(true);
jpayne@68 922 }
jpayne@68 923 Read r=reads.get(0);
jpayne@68 924 if(r.length()>=veryShortMult*medianLength){r.setDiscarded(true);}
jpayne@68 925 r=reads.get(reads.size()-1);
jpayne@68 926 if(r.length()>=veryShortMult*medianLength){r.setDiscarded(true);}
jpayne@68 927 }else if(reads.size()==2){
jpayne@68 928 for(Read r : reads){
jpayne@68 929 if(r.length()>=veryShortMult*sample.length()){
jpayne@68 930 if(ar.icecream){
jpayne@68 931 r.setDiscarded(true);
jpayne@68 932 }else if(ar.ambiguous){
jpayne@68 933 r.setAmbiguous(true);
jpayne@68 934 }
jpayne@68 935 }
jpayne@68 936 }
jpayne@68 937 }
jpayne@68 938 }
jpayne@68 939 }else if(sample.discarded() || (longReads+adapterReads>0)){
jpayne@68 940 for(Read r : reads){
jpayne@68 941 r.setDiscarded(true);
jpayne@68 942 }
jpayne@68 943 }
jpayne@68 944
jpayne@68 945 ArrayList<Read> junctions=null;
jpayne@68 946 if(ar!=null){
jpayne@68 947 if(rosj!=null && !sample.hasAdapter()){
jpayne@68 948 Read r=ar.alignedRead;
jpayne@68 949 int width=Tools.min(200, ar.junctionLoc, r.length()-ar.junctionLoc);
jpayne@68 950 int a=ar.junctionLoc-width, b=ar.junctionLoc+width;
jpayne@68 951 byte[] bases=Arrays.copyOfRange(r.bases, a, b);
jpayne@68 952 Read junction=new Read(bases, null, r.id+"\tjunction:"+a+"-"+b, r.numericID);
jpayne@68 953 junctions=new ArrayList<Read>(1);
jpayne@68 954 junctions.add(junction);
jpayne@68 955 junctionsOutT++;
jpayne@68 956 }
jpayne@68 957 if(modifyHeader){
jpayne@68 958 sample.id=sample.id+"\tratio="+ar.ratio+"\tjunction="+ar.junctionLoc+
jpayne@68 959 "\tIR="+sample.invertedRepeat()+"\tAD="+sample.hasAdapter()+"\tFP="+fullPass+"\tsubreads="+reads.size();
jpayne@68 960 }
jpayne@68 961 }
jpayne@68 962
jpayne@68 963 removeShortTrash(reads);
jpayne@68 964
jpayne@68 965 if(!reads.isEmpty()){outputReads(reads, junctions);}
jpayne@68 966 }
jpayne@68 967
jpayne@68 968 //TODO: Now that I think about it. The order of the reads is important.
jpayne@68 969 //Since they go forward-rev-forward-rev it's imprudent to discard inner reads.
jpayne@68 970 private void removeShortTrash(ZMW reads) {
jpayne@68 971 int removed=0;
jpayne@68 972 for(int i=0; i<reads.size(); i++){
jpayne@68 973 Read r=reads.get(i);
jpayne@68 974 if(r.length()<minLengthAfterTrimming){//Discard short trash
jpayne@68 975 reads.set(i, null);
jpayne@68 976 removed++;
jpayne@68 977 }
jpayne@68 978 }
jpayne@68 979 if(removed>0){Tools.condenseStrict(reads);}
jpayne@68 980 }
jpayne@68 981
jpayne@68 982 int trimRead(Read r, int lookahead){
jpayne@68 983 final byte[] bases=r.bases;
jpayne@68 984
jpayne@68 985 int left=calcLeftTrim(bases, lookahead);
jpayne@68 986 int right=calcRightTrim(bases, lookahead);
jpayne@68 987 int trimmed=0;
jpayne@68 988
jpayne@68 989 if(left+right>0){
jpayne@68 990 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
jpayne@68 991 trimmed=TrimRead.trimByAmount(r, left, right, 1, false);
jpayne@68 992 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
jpayne@68 993 ZMW.fixReadHeader(r, left, right);
jpayne@68 994 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
jpayne@68 995 }
jpayne@68 996
jpayne@68 997 if(r.samline!=null){
jpayne@68 998 r.samline.seq=r.bases;
jpayne@68 999 r.samline.qual=r.quality;
jpayne@68 1000 }
jpayne@68 1001 return trimmed;
jpayne@68 1002 }
jpayne@68 1003
jpayne@68 1004 int trimPolyA(Read r){
jpayne@68 1005 final byte[] bases=r.bases;
jpayne@68 1006
jpayne@68 1007 int left=Tools.max(PolymerTrimmer.testLeft(bases, 'A'), PolymerTrimmer.testLeft(bases, 'T'));
jpayne@68 1008 int right=Tools.max(PolymerTrimmer.testRight(bases, 'A'), PolymerTrimmer.testRight(bases, 'T'));
jpayne@68 1009 int trimmed=0;
jpayne@68 1010
jpayne@68 1011 if(left+right>0){
jpayne@68 1012 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
jpayne@68 1013 trimmed=TrimRead.trimByAmount(r, left, right, 1, false);
jpayne@68 1014 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
jpayne@68 1015 ZMW.fixReadHeader(r, left, right);
jpayne@68 1016 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
jpayne@68 1017 }
jpayne@68 1018
jpayne@68 1019 if(r.samline!=null){
jpayne@68 1020 r.samline.seq=r.bases;
jpayne@68 1021 r.samline.qual=r.quality;
jpayne@68 1022 }
jpayne@68 1023 return trimmed;
jpayne@68 1024 }
jpayne@68 1025
jpayne@68 1026 final int calcLeftTrim(final byte[] bases, int lookahead){
jpayne@68 1027 final int len=bases.length;
jpayne@68 1028 int lastUndef=-1;
jpayne@68 1029 for(int i=0, defined=0; i<len && defined<lookahead; i++){
jpayne@68 1030 if(AminoAcid.isFullyDefined(bases[i])){
jpayne@68 1031 defined++;
jpayne@68 1032 }else{
jpayne@68 1033 lastUndef=i;
jpayne@68 1034 defined=0;
jpayne@68 1035 }
jpayne@68 1036 }
jpayne@68 1037 return lastUndef+1;
jpayne@68 1038 }
jpayne@68 1039
jpayne@68 1040 final int calcRightTrim(final byte[] bases, int lookahead){
jpayne@68 1041 final int len=bases.length;
jpayne@68 1042 int lastUndef=len;
jpayne@68 1043 for(int i=len-1, defined=0; i>=0 && defined<lookahead; i--){
jpayne@68 1044 if(AminoAcid.isFullyDefined(bases[i])){
jpayne@68 1045 defined++;
jpayne@68 1046 }else{
jpayne@68 1047 lastUndef=i;
jpayne@68 1048 defined=0;
jpayne@68 1049 }
jpayne@68 1050 }
jpayne@68 1051 return len-lastUndef-1;
jpayne@68 1052 }
jpayne@68 1053
jpayne@68 1054 final int alignLeftTipAdapter(Read r){
jpayne@68 1055 assert(adapter.length<adapterTipLen); //Time to increase adapterTipLen
jpayne@68 1056 if(r.length()<adapterTipLen){return 0;}
jpayne@68 1057 final byte[] array=tipBufferLeft;
jpayne@68 1058
jpayne@68 1059 for(int i=adapterTipPad, j=0; i<array.length; i++, j++){array[i]=r.bases[j];}
jpayne@68 1060 int[] rvec=ssa.fillAndScoreLimited(adapter, array, 0, array.length, minSwScore);
jpayne@68 1061
jpayne@68 1062 if(rvec==null || rvec[0]<minSwScore){return 0;}
jpayne@68 1063 final int score=rvec[0];
jpayne@68 1064 final int start=Tools.max(0, rvec[1]-adapterTipPad);
jpayne@68 1065 final int stop=rvec[2]-adapterTipPad;
jpayne@68 1066 for(int i=start; i<=stop; i++){r.bases[i]='X';}
jpayne@68 1067 return stop-start+1;
jpayne@68 1068 }
jpayne@68 1069
jpayne@68 1070 final int alignRightTipAdapter(Read r){
jpayne@68 1071 final byte[] bases=r.bases;
jpayne@68 1072 assert(adapter.length<adapterTipLen); //Time to increase adapterTipLen
jpayne@68 1073 if(r.length()<adapterTipLen){return 0;}
jpayne@68 1074 final byte[] array=tipBufferRight;
jpayne@68 1075
jpayne@68 1076 for(int i=0, j=bases.length-adapterTipLen; i<adapterTipLen; i++, j++){array[i]=bases[j];}
jpayne@68 1077 int[] rvec=ssa.fillAndScoreLimited(adapter, array, 0, array.length, minSwScore);
jpayne@68 1078
jpayne@68 1079 if(rvec==null || rvec[0]<minSwScore){return 0;}
jpayne@68 1080 final int score=rvec[0];
jpayne@68 1081 final int start=Tools.max(0, rvec[1]-adapterTipPad);
jpayne@68 1082 final int stop=rvec[2]-adapterTipPad;
jpayne@68 1083 for(int i=start; i<=stop; i++){r.bases[i]='X';}
jpayne@68 1084 return stop-start+1;
jpayne@68 1085 }
jpayne@68 1086
jpayne@68 1087 boolean needsAdapterTest(Read r){
jpayne@68 1088 if(r.tested() || r.hasAdapter()){return false;}
jpayne@68 1089 if(adapterRatio<=0 || r.discarded()){return true;}
jpayne@68 1090 AlignmentResult aa=fla.alignForwardShort(adapter, r.bases, 0, r.bases.length-1, adapterRatio);
jpayne@68 1091 return aa!=null;
jpayne@68 1092 }
jpayne@68 1093
jpayne@68 1094 private void outputReads(ZMW reads, ArrayList<Read> junctions){
jpayne@68 1095 final int size=reads.size();
jpayne@68 1096 final ArrayList<Read> good=(rosg==null ? null : new ArrayList<Read>(size));
jpayne@68 1097 final ArrayList<Read> ambig=(rosa==null ? null : new ArrayList<Read>(size));
jpayne@68 1098 final ArrayList<Read> bad=(rosb==null ? null : new ArrayList<Read>(size));
jpayne@68 1099
jpayne@68 1100 int discardedReads=0;
jpayne@68 1101 int ambigReads=0;
jpayne@68 1102 int trimmedReads=0;
jpayne@68 1103 boolean sendAllToDiscarded=false;
jpayne@68 1104 boolean sendAllToAmbiguous=false;
jpayne@68 1105
jpayne@68 1106 //Check to see if any reads are discarded or ambiguous
jpayne@68 1107 for(Read r : reads){
jpayne@68 1108 if(r.discarded()){
jpayne@68 1109 discardedReads++;
jpayne@68 1110 }else if(r.ambiguous()){
jpayne@68 1111 ambigReads++;
jpayne@68 1112 }else if(r.trimmed()){
jpayne@68 1113 trimmedReads++;
jpayne@68 1114 }
jpayne@68 1115 }
jpayne@68 1116
jpayne@68 1117 //Unify flags on all reads
jpayne@68 1118 if(keepZMWsTogether){
jpayne@68 1119 if(discardedReads>0){sendAllToDiscarded=true;}
jpayne@68 1120 else if(ambigReads>0){sendAllToAmbiguous=true;}
jpayne@68 1121 }
jpayne@68 1122 // if(discardedReads>0 || ambigReads>0){System.err.println("\nd="+discardedReads+", a="+ambigReads);}
jpayne@68 1123 if(discardedReads>0){iceCreamZMWsT++;}
jpayne@68 1124
jpayne@68 1125 int trueIceCreamReads=0;
jpayne@68 1126 for(Read r : reads){
jpayne@68 1127 boolean trueIceCream=(parseCustom ? ReadBuilder.isIceCream(r.id) : false);
jpayne@68 1128 trueIceCreamReads+=(trueIceCream ? 1 : 0);
jpayne@68 1129 if(r.discarded() || sendAllToDiscarded){
jpayne@68 1130 // assert(false);
jpayne@68 1131 if(bad!=null){bad.add(r);}
jpayne@68 1132 if(trueIceCream){truePositiveReadsT++;}
jpayne@68 1133 else{falsePositiveReadsT++;}
jpayne@68 1134 // System.err.println("d:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());
jpayne@68 1135 }else if(r.ambiguous() || sendAllToAmbiguous){
jpayne@68 1136 if(ambig!=null){ambig.add(r);}
jpayne@68 1137 if(sendAmbigToGood){
jpayne@68 1138 readsOutT++;
jpayne@68 1139 basesOutT+=r.length();
jpayne@68 1140 if(good!=null) {good.add(r);}
jpayne@68 1141 }
jpayne@68 1142 if(sendAmbigToBad && bad!=null){bad.add(r);}
jpayne@68 1143 ambiguousReadsT++;
jpayne@68 1144 // System.err.println("a*:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());
jpayne@68 1145 }else{
jpayne@68 1146 if(good!=null){
jpayne@68 1147 good.add(r);
jpayne@68 1148 }
jpayne@68 1149 readsOutT++;
jpayne@68 1150 basesOutT+=r.length();
jpayne@68 1151 if(trueIceCream && !r.trimmed()){falseNegativeReadsT++;}
jpayne@68 1152 else{trueNegativeReadsT++;}
jpayne@68 1153 // if(discardedReads>0 || ambigReads>0){System.err.println("g*:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());}
jpayne@68 1154 }
jpayne@68 1155 }
jpayne@68 1156
jpayne@68 1157 if(trueIceCreamReads>0){
jpayne@68 1158 if(discardedReads>0 || trimmedReads>0){
jpayne@68 1159 truePositiveZMWsT++;
jpayne@68 1160 }else if(ambigReads>0){
jpayne@68 1161 ambiguousZMWs++;
jpayne@68 1162 }else{
jpayne@68 1163 falseNegativeZMWsT++;
jpayne@68 1164 // StringBuilder sb=new StringBuilder();
jpayne@68 1165 // for(Read r : reads) {sb.append("\n").append(r.id);}
jpayne@68 1166 // System.err.println(sb);
jpayne@68 1167 }
jpayne@68 1168 }else{
jpayne@68 1169 if(discardedReads>0){
jpayne@68 1170 falsePositiveZMWsT++;
jpayne@68 1171 }else if(ambigReads>0){
jpayne@68 1172 ambiguousZMWs++;
jpayne@68 1173 }else{
jpayne@68 1174 trueNegativeZMWsT++;
jpayne@68 1175 }
jpayne@68 1176 }
jpayne@68 1177
jpayne@68 1178 if(rosg!=null && good!=null && !good.isEmpty()){rosg.add(good, 0);}
jpayne@68 1179 if(rosa!=null && ambig!=null && !ambig.isEmpty()){rosa.add(ambig, 0);}
jpayne@68 1180 if(rosb!=null && bad!=null && !bad.isEmpty()){rosb.add(bad, 0);}
jpayne@68 1181 if(rosj!=null && junctions!=null && !junctions.isEmpty()){rosj.add(junctions, 0);}
jpayne@68 1182 }
jpayne@68 1183
jpayne@68 1184 /**
jpayne@68 1185 * Align part of a read to itself to look for inverted repeats.
jpayne@68 1186 */
jpayne@68 1187 AlignmentResult align(final Read r, boolean fullPass, int passes, int readNum){
jpayne@68 1188 if(!alignRC){return null;}
jpayne@68 1189 final byte[] bases=r.bases;
jpayne@68 1190
jpayne@68 1191 int qlen=(int)Tools.max(minQlen, Tools.min(targetQlen, bases.length*maxQlenFraction));
jpayne@68 1192 if(qlen>0.45f*bases.length){return null;}//Ignore short stuff
jpayne@68 1193
jpayne@68 1194 //Perform an initial scan using the tips of the reads to look for an inverted repeat
jpayne@68 1195 boolean tipOnly=filterIceCreamOnly && fullPass;
jpayne@68 1196 // System.err.println(filterIceCreamOnly+", "+fullPass+", "+tipOnly);
jpayne@68 1197 AlignmentResult a=alignLeft(bases, qlen, minRatio1, true, tipOnly);
jpayne@68 1198 AlignmentResult b=(fullPass && false ? null : alignRight(bases, qlen, minRatio1, true, tipOnly));//A second alignment is not needed for a full pass.
jpayne@68 1199 AlignmentResult ar=(a==null ? b : b==null ? a : a.maxScore>=b.maxScore ? a : b);
jpayne@68 1200
jpayne@68 1201 //If nothing was detected, return
jpayne@68 1202 if(ar==null){return null;}
jpayne@68 1203 ar.alignedRead=r;
jpayne@68 1204
jpayne@68 1205 //At this point, the read contains an inverted repeat of length qlen.
jpayne@68 1206 final int expectedJunction=ar.rLen/2;
jpayne@68 1207
jpayne@68 1208 if(ar.left){
jpayne@68 1209 ar.junctionLoc=ar.maxRpos/2;
jpayne@68 1210 }else{
jpayne@68 1211 int innerLeft=ar.maxRpos;
jpayne@68 1212 int innerRight=ar.rLen-ar.qLen;
jpayne@68 1213 ar.junctionLoc=(innerLeft+innerRight)/2;
jpayne@68 1214 }
jpayne@68 1215
jpayne@68 1216 // if(fullPass){//This code doesn't seem to have any effect and it's not clear why it is present
jpayne@68 1217 // int dif=Tools.absdif(expectedJunction, ar.junctionLoc);
jpayne@68 1218 // if(dif>expectedJunction*0.1) {
jpayne@68 1219 // if(filterIceCreamOnly){return ar;}
jpayne@68 1220 // }
jpayne@68 1221 // }
jpayne@68 1222
jpayne@68 1223 if(realign){
jpayne@68 1224 if(ar.junctionLoc<expectedJunction){
jpayne@68 1225 int qlen2=(int)(ar.junctionLoc*0.9);
jpayne@68 1226 if(qlen2>=qlen){
jpayne@68 1227 ar=alignLeft(bases, qlen2, minRatio2, false, false);
jpayne@68 1228 }
jpayne@68 1229 }else{
jpayne@68 1230 int qlen2=(int)((ar.rLen-ar.junctionLoc)*0.9);
jpayne@68 1231 if(qlen2>=qlen){
jpayne@68 1232 ar=alignRight(bases, qlen2, minRatio2, false, false);
jpayne@68 1233 }
jpayne@68 1234 }
jpayne@68 1235 if(ar==null){return null;}
jpayne@68 1236 ar.alignedRead=r;
jpayne@68 1237 }
jpayne@68 1238
jpayne@68 1239 //At this point, the read contains an inverted repeat mirrored across a junction.
jpayne@68 1240 final float junctionFraction;
jpayne@68 1241 if(ar.left){
jpayne@68 1242 ar.junctionLoc=ar.maxRpos/2;
jpayne@68 1243 junctionFraction=ar.junctionLoc/(float)ar.rLen;
jpayne@68 1244 }else{
jpayne@68 1245 int innerLeft=ar.maxRpos;
jpayne@68 1246 int innerRight=ar.rLen-ar.qLen;
jpayne@68 1247 ar.junctionLoc=(innerLeft+innerRight)/2;
jpayne@68 1248 junctionFraction=(ar.rLen-ar.junctionLoc)/(float)ar.rLen;
jpayne@68 1249 }
jpayne@68 1250
jpayne@68 1251 final int dif=Tools.absdif(expectedJunction, ar.junctionLoc);
jpayne@68 1252 if(fullPass){
jpayne@68 1253 if(junctionFraction<minJunctionFraction){
jpayne@68 1254 ar.icecream=false;
jpayne@68 1255 }else{
jpayne@68 1256 ar.icecream=true;
jpayne@68 1257 }
jpayne@68 1258 }else if(passes==2){
jpayne@68 1259 if(junctionFraction<minJunctionFraction){
jpayne@68 1260 if(readNum==0){
jpayne@68 1261 //First read, the junction should be closer to the beginning for real ice cream
jpayne@68 1262 if(ar.junctionLoc>expectedJunction){
jpayne@68 1263 ar.icecream=false;
jpayne@68 1264 }else{
jpayne@68 1265 ar.ambiguous=true;
jpayne@68 1266 }
jpayne@68 1267 }else{
jpayne@68 1268 //Last read, the junction should be closer to the end for real ice cream
jpayne@68 1269 assert(readNum==1) : readNum;
jpayne@68 1270 if(ar.junctionLoc<expectedJunction){
jpayne@68 1271 ar.icecream=false;
jpayne@68 1272 }else{
jpayne@68 1273 ar.ambiguous=true;
jpayne@68 1274 }
jpayne@68 1275 }
jpayne@68 1276 return ar;
jpayne@68 1277 }else{
jpayne@68 1278 ar.icecream=true;
jpayne@68 1279 }
jpayne@68 1280 }else{
jpayne@68 1281 if(junctionFraction<minJunctionFraction){
jpayne@68 1282 ar.icecream=true;
jpayne@68 1283 }else{
jpayne@68 1284 ar.ambiguous=true;
jpayne@68 1285 }
jpayne@68 1286 }
jpayne@68 1287 return ar;
jpayne@68 1288 }
jpayne@68 1289
jpayne@68 1290 /** Align the left qlen bases to the rest of the read. */
jpayne@68 1291 private AlignmentResult alignLeft(final byte[] bases, final int qlen, final float minRatio, boolean v2, boolean tipOnly){
jpayne@68 1292 final byte[] query=Arrays.copyOfRange(bases, 0, qlen);
jpayne@68 1293 AminoAcid.reverseComplementBasesInPlace(query);
jpayne@68 1294 final AlignmentResult ar;
jpayne@68 1295 final int rstop=bases.length-1;
jpayne@68 1296 final int rstart=(tipOnly ? Tools.max(qlen, rstop-(int)(tipRatio*qlen)) : qlen);
jpayne@68 1297 if(v2){
jpayne@68 1298 // elapsedShortT-=System.nanoTime();
jpayne@68 1299 ar=ica.alignForwardShort(query, bases, rstart, rstop, minScore, minRatio);
jpayne@68 1300 // elapsedShortT+=System.nanoTime();
jpayne@68 1301 }else{
jpayne@68 1302 // elapsedT-=System.nanoTime();
jpayne@68 1303 ar=ica.alignForward(query, bases, rstart, rstop, minScore, minRatio);
jpayne@68 1304 // elapsedT+=System.nanoTime();
jpayne@68 1305 }
jpayne@68 1306 if(ar!=null){ar.left=true;}
jpayne@68 1307 return ar;
jpayne@68 1308 }
jpayne@68 1309
jpayne@68 1310 /** Align the right qlen bases to the rest of the read. */
jpayne@68 1311 private AlignmentResult alignRight(final byte[] bases, final int qlen, final float minRatio, boolean v2, boolean tipOnly){
jpayne@68 1312 final byte[] query=Arrays.copyOfRange(bases, bases.length-qlen, bases.length);
jpayne@68 1313 AminoAcid.reverseComplementBasesInPlace(query);
jpayne@68 1314 final AlignmentResult ar;
jpayne@68 1315 final int rstop=(tipOnly ? Tools.min((int)(tipRatio*qlen), bases.length-qlen-1) : bases.length-qlen-1);
jpayne@68 1316 final int rstart=0;
jpayne@68 1317 if(v2){
jpayne@68 1318 // elapsedShortT-=System.nanoTime();
jpayne@68 1319 ar=ica.alignForwardShort(query, bases, rstart, rstop, minScore, minRatio);
jpayne@68 1320 // elapsedShortT+=System.nanoTime();
jpayne@68 1321 }else{
jpayne@68 1322 // elapsedT-=System.nanoTime();
jpayne@68 1323 ar=ica.alignForward(query, bases, rstart, rstop, minScore, minRatio);
jpayne@68 1324 // elapsedT+=System.nanoTime();
jpayne@68 1325 }
jpayne@68 1326 if(ar!=null){ar.left=false;}
jpayne@68 1327 return ar;
jpayne@68 1328 }
jpayne@68 1329
jpayne@68 1330 /** Align the adapter sequence to the read, piecewise, to count matches. */
jpayne@68 1331 private int lookForAdapter(Read r, double mult) {
jpayne@68 1332 assert(!r.hasAdapter() && !r.tested());
jpayne@68 1333 int begin=0;
jpayne@68 1334 while(begin<r.length() && r.bases[begin]=='N'){begin++;} //Skip reads made of 'N'
jpayne@68 1335 if(begin>=r.length()){return 0;}
jpayne@68 1336
jpayne@68 1337 int suspectThresh=(int)(minSwScoreSuspect*mult);
jpayne@68 1338 int scoreThresh=(int)(minSwScore*mult);
jpayne@68 1339
jpayne@68 1340 // final byte[] array=npad(r.bases, pad ? npad : 0);
jpayne@68 1341 final byte[] array=npad(r.bases, npad);
jpayne@68 1342
jpayne@68 1343 // assert(array==r.bases) : npad;
jpayne@68 1344
jpayne@68 1345 int lim=array.length-npad-stride;
jpayne@68 1346
jpayne@68 1347 int found=0;
jpayne@68 1348
jpayne@68 1349 int lastSuspect=-1;
jpayne@68 1350 int lastConfirmed=-1;
jpayne@68 1351 int maxScore=0;
jpayne@68 1352
jpayne@68 1353 // final int revisedStride=(r.length()>1800 ? stride : (int)(stride*0.6f));
jpayne@68 1354 for(int i=begin; i<lim; i+=stride){
jpayne@68 1355 int j=Tools.min(i+window, array.length-1);
jpayne@68 1356 if(j-i<window){
jpayne@68 1357 lim=0; //Last loop cycle
jpayne@68 1358 // i=Tools.max(0, array.length-2*query1.length);
jpayne@68 1359 }
jpayne@68 1360
jpayne@68 1361 {
jpayne@68 1362
jpayne@68 1363 int[] rvec;
jpayne@68 1364 // if(timeless){//A speed-optimized version
jpayne@68 1365 rvec=ssa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);
jpayne@68 1366 // }else{rvec=msa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);}
jpayne@68 1367 if(rvec!=null && rvec[0]>=suspectThresh){
jpayne@68 1368 final int score=rvec[0];
jpayne@68 1369 final int start=rvec[1];
jpayne@68 1370 final int stop=rvec[2];
jpayne@68 1371 assert(score>=suspectThresh);
jpayne@68 1372 if((i==0 || start>i) && (j==array.length-1 || stop<j)){
jpayne@68 1373 boolean kill=(score>=scoreThresh ||
jpayne@68 1374 (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect<suspectDistance) ||
jpayne@68 1375 (lastConfirmed>0 && start>=lastConfirmed && start-lastConfirmed<suspectDistance));
jpayne@68 1376
jpayne@68 1377 if(!kill && useLocality && array.length-stop>window){//Look ahead
jpayne@68 1378 rvec=ssa.fillAndScoreLimited(adapter, array, stop, stop+window, suspectThresh);
jpayne@68 1379 if(rvec!=null){
jpayne@68 1380 if(score>=suspectMidpoint && rvec[0]>=suspectThresh && rvec[1]-stop<suspectDistance){kill=true;}
jpayne@68 1381 else if(score>=suspectThresh && rvec[0]>=scoreThresh && rvec[1]-stop<suspectDistance){kill=true;}
jpayne@68 1382 }
jpayne@68 1383 }
jpayne@68 1384
jpayne@68 1385 if(!kill && useAltMsa){//Try alternate msa
jpayne@68 1386 rvec=msa2.fillAndScoreLimited(adapter, array, Tools.max(0, start-4), Tools.min(stop+4, array.length-1), suspectThresh);
jpayne@68 1387 if(rvec!=null && rvec[0]>=(scoreThresh)){kill=true;}
jpayne@68 1388 }
jpayne@68 1389
jpayne@68 1390 if(kill){
jpayne@68 1391 maxScore=Tools.max(maxScore, score);
jpayne@68 1392 // if(print) {System.err.println("Found adapter at distance "+Tools.min(start, array.length-stop));}
jpayne@68 1393 // System.out.println("-:"+score+", "+scoreThresh+", "+suspectThresh+"\t"+lastSuspect+", "+start+", "+stop);
jpayne@68 1394 found++;
jpayne@68 1395 for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';}
jpayne@68 1396 // assert(Shared.threads()!=1) : new String(array)+"\n"+start+", "+stop;
jpayne@68 1397 if(useLocality && score>=scoreThresh){lastConfirmed=Tools.max(lastConfirmed, stop);}
jpayne@68 1398 }
jpayne@68 1399 }
jpayne@68 1400 // System.out.println("Set lastSuspect="+stop+" on score "+score);
jpayne@68 1401 if(useLocality){lastSuspect=Tools.max(lastSuspect, stop);}
jpayne@68 1402 }
jpayne@68 1403 }
jpayne@68 1404 }
jpayne@68 1405
jpayne@68 1406 r.setTested(true);
jpayne@68 1407 if(found>0){
jpayne@68 1408 r.setHasAdapter(true);
jpayne@68 1409 r.setDiscarded(true);
jpayne@68 1410 r.setAmbiguous(false);
jpayne@68 1411
jpayne@68 1412 int idx=(int)((maxScore*200.0)/maxSwScore);
jpayne@68 1413 adapterScoresT[idx]++;
jpayne@68 1414 }else{
jpayne@68 1415 r.setHasAdapter(false);
jpayne@68 1416 }
jpayne@68 1417
jpayne@68 1418 return found;
jpayne@68 1419 }
jpayne@68 1420
jpayne@68 1421 /** Align the adapter sequence to the read ends, and trim if needed. */
jpayne@68 1422 private int trimTerminalAdapters(Read r, double mult) {
jpayne@68 1423 assert(!r.hasAdapter() && !r.tested());
jpayne@68 1424 int begin=0;
jpayne@68 1425 while(begin<r.length() && r.bases[begin]=='N'){begin++;} //Skip reads made of 'N'
jpayne@68 1426 if(begin>=r.length()){return 0;}
jpayne@68 1427
jpayne@68 1428 int suspectThresh=(int)(minSwScoreSuspect*mult);
jpayne@68 1429 int scoreThresh=(int)(minSwScore*mult);
jpayne@68 1430
jpayne@68 1431 // final byte[] array=npad(r.bases, pad ? npad : 0);
jpayne@68 1432 final byte[] array=npad(r.bases, npad);
jpayne@68 1433
jpayne@68 1434 // assert(array==r.bases) : npad;
jpayne@68 1435
jpayne@68 1436 int lim=array.length-npad-stride;
jpayne@68 1437
jpayne@68 1438 int found=0;
jpayne@68 1439
jpayne@68 1440 int lastSuspect=-1;
jpayne@68 1441 int lastConfirmed=-1;
jpayne@68 1442 int maxScore=0;
jpayne@68 1443
jpayne@68 1444 // final int revisedStride=(r.length()>1800 ? stride : (int)(stride*0.6f));
jpayne@68 1445 for(int i=begin; i<lim; i+=stride){
jpayne@68 1446 int j=Tools.min(i+window, array.length-1);
jpayne@68 1447 if(j-i<window){
jpayne@68 1448 lim=0; //Last loop cycle
jpayne@68 1449 // i=Tools.max(0, array.length-2*query1.length);
jpayne@68 1450 }
jpayne@68 1451
jpayne@68 1452 {
jpayne@68 1453
jpayne@68 1454 int[] rvec;
jpayne@68 1455 // if(timeless){//A speed-optimized version
jpayne@68 1456 rvec=ssa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);
jpayne@68 1457 // }else{rvec=msa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);}
jpayne@68 1458 if(rvec!=null && rvec[0]>=suspectThresh){
jpayne@68 1459 final int score=rvec[0];
jpayne@68 1460 final int start=rvec[1];
jpayne@68 1461 final int stop=rvec[2];
jpayne@68 1462 assert(score>=suspectThresh);
jpayne@68 1463 if((i==0 || start>i) && (j==array.length-1 || stop<j)){
jpayne@68 1464 boolean kill=(score>=scoreThresh ||
jpayne@68 1465 (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect<suspectDistance) ||
jpayne@68 1466 (lastConfirmed>0 && start>=lastConfirmed && start-lastConfirmed<suspectDistance));
jpayne@68 1467
jpayne@68 1468 if(!kill && useLocality && array.length-stop>window){//Look ahead
jpayne@68 1469 rvec=ssa.fillAndScoreLimited(adapter, array, stop, stop+window, suspectThresh);
jpayne@68 1470 if(rvec!=null){
jpayne@68 1471 if(score>=suspectMidpoint && rvec[0]>=suspectThresh && rvec[1]-stop<suspectDistance){kill=true;}
jpayne@68 1472 else if(score>=suspectThresh && rvec[0]>=scoreThresh && rvec[1]-stop<suspectDistance){kill=true;}
jpayne@68 1473 }
jpayne@68 1474 }
jpayne@68 1475
jpayne@68 1476 if(!kill && useAltMsa){//Try alternate msa
jpayne@68 1477 rvec=msa2.fillAndScoreLimited(adapter, array, Tools.max(0, start-4), Tools.min(stop+4, array.length-1), suspectThresh);
jpayne@68 1478 if(rvec!=null && rvec[0]>=(scoreThresh)){kill=true;}
jpayne@68 1479 }
jpayne@68 1480
jpayne@68 1481 if(kill){
jpayne@68 1482 maxScore=Tools.max(maxScore, score);
jpayne@68 1483 // if(print) {System.err.println("Found adapter at distance "+Tools.min(start, array.length-stop));}
jpayne@68 1484 // System.out.println("-:"+score+", "+scoreThresh+", "+suspectThresh+"\t"+lastSuspect+", "+start+", "+stop);
jpayne@68 1485 found++;
jpayne@68 1486 for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';}
jpayne@68 1487 // assert(Shared.threads()!=1) : new String(array)+"\n"+start+", "+stop;
jpayne@68 1488 if(useLocality && score>=scoreThresh){lastConfirmed=Tools.max(lastConfirmed, stop);}
jpayne@68 1489 }
jpayne@68 1490 }
jpayne@68 1491 // System.out.println("Set lastSuspect="+stop+" on score "+score);
jpayne@68 1492 if(useLocality){lastSuspect=Tools.max(lastSuspect, stop);}
jpayne@68 1493 }
jpayne@68 1494 }
jpayne@68 1495 }
jpayne@68 1496
jpayne@68 1497 r.setTested(true);
jpayne@68 1498 if(found>0){
jpayne@68 1499 r.setHasAdapter(true);
jpayne@68 1500 r.setDiscarded(true);
jpayne@68 1501 r.setAmbiguous(false);
jpayne@68 1502
jpayne@68 1503 int idx=(int)((maxScore*200.0)/maxSwScore);
jpayne@68 1504 adapterScoresT[idx]++;
jpayne@68 1505 }else{
jpayne@68 1506 r.setHasAdapter(false);
jpayne@68 1507 }
jpayne@68 1508
jpayne@68 1509 return found;
jpayne@68 1510 }
jpayne@68 1511
jpayne@68 1512 private byte[] npad(final byte[] array, final int pad){
jpayne@68 1513 if(pad<=0){return array;}
jpayne@68 1514 final int len=array.length+2*pad;
jpayne@68 1515 byte[] r=new byte[len];
jpayne@68 1516 for(int i=0; i<pad; i++){r[i]='N';}
jpayne@68 1517 for(int i=pad, j=0; j<array.length; i++, j++){r[i]=array[j];}
jpayne@68 1518 for(int i=array.length+pad, limit=Tools.min(r.length, len+50); i<limit; i++){r[i]='N';}
jpayne@68 1519 return r;
jpayne@68 1520 }
jpayne@68 1521
jpayne@68 1522 /*--------------------------------------------------------------*/
jpayne@68 1523 /*---------------- Inner Class Fields ----------------*/
jpayne@68 1524 /*--------------------------------------------------------------*/
jpayne@68 1525
jpayne@68 1526 /** Number of reads processed by this thread */
jpayne@68 1527 protected long readsProcessedT=0;
jpayne@68 1528 /** Number of bases processed by this thread */
jpayne@68 1529 protected long basesProcessedT=0;
jpayne@68 1530
jpayne@68 1531
jpayne@68 1532 protected long truePositiveReadsT=0;
jpayne@68 1533 protected long falsePositiveReadsT=0;
jpayne@68 1534 protected long trueNegativeReadsT=0;
jpayne@68 1535 protected long falseNegativeReadsT=0;
jpayne@68 1536 protected long ambiguousReadsT=0;
jpayne@68 1537
jpayne@68 1538 protected long truePositiveZMWsT=0;
jpayne@68 1539 protected long falsePositiveZMWsT=0;
jpayne@68 1540 protected long trueNegativeZMWsT=0;
jpayne@68 1541 protected long falseNegativeZMWsT=0;
jpayne@68 1542 protected long ambiguousZMWsT=0;
jpayne@68 1543
jpayne@68 1544
jpayne@68 1545 protected long elapsedT=0;
jpayne@68 1546 protected long elapsedShortT=0;
jpayne@68 1547
jpayne@68 1548 //Unused
jpayne@68 1549 protected long iceCreamReadsT=0;
jpayne@68 1550 protected long iceCreamBasesT=0;
jpayne@68 1551
jpayne@68 1552 protected long iceCreamZMWsT=0;
jpayne@68 1553 protected double iceCreamRatioT=0;
jpayne@68 1554 protected long ratiosCountedT=0;
jpayne@68 1555 protected long missingAdapterZMWsT=0;
jpayne@68 1556 protected long hiddenAdapterZMWsT=0;
jpayne@68 1557
jpayne@68 1558 protected long basesTrimmedT=0;
jpayne@68 1559 protected long readsTrimmedT=0;
jpayne@68 1560
jpayne@68 1561 /** Number of reads retained by this thread */
jpayne@68 1562 protected long readsOutT=0;
jpayne@68 1563 /** Number of bases retained by this thread */
jpayne@68 1564 protected long basesOutT=0;
jpayne@68 1565 /** Number of junctions detected in IR reads by this thread */
jpayne@68 1566 protected long junctionsOutT=0;
jpayne@68 1567
jpayne@68 1568 protected long lowEntropyZMWsT=0;
jpayne@68 1569 protected long lowEntropyReadsT=0;
jpayne@68 1570
jpayne@68 1571 /** True only if this thread has completed successfully */
jpayne@68 1572 boolean success=false;
jpayne@68 1573
jpayne@68 1574 /** Shared output stream */
jpayne@68 1575 private final ConcurrentReadOutputStream rosg;
jpayne@68 1576 /** Shared output stream for ambiguous reads */
jpayne@68 1577 private final ConcurrentReadOutputStream rosa;
jpayne@68 1578 /** Shared output stream for bad reads */
jpayne@68 1579 private final ConcurrentReadOutputStream rosb;
jpayne@68 1580 /** Shared output stream for junctions */
jpayne@68 1581 private final ConcurrentReadOutputStream rosj;
jpayne@68 1582 /** Thread ID */
jpayne@68 1583 final int tid;
jpayne@68 1584
jpayne@68 1585 /* Aligners for this thread */
jpayne@68 1586
jpayne@68 1587 /** For inverted repeat alignment */
jpayne@68 1588 final IceCreamAligner ica=IceCreamAligner.makeAligner(32);
jpayne@68 1589 /** For quickly aligning adapter sequence to whole read */
jpayne@68 1590 final FlatAligner fla=new FlatAligner();
jpayne@68 1591 // final MultiStateAligner9PacBioAdapter msa=alignAdapter ? new MultiStateAligner9PacBioAdapter(ALIGN_ROWS, ALIGN_COLUMNS) : null;
jpayne@68 1592 /** For slowly aligning adapter sequence sectionwise */
jpayne@68 1593 final SingleStateAlignerPacBioAdapter ssa=(alignAdapter || trimReads) ? new SingleStateAlignerPacBioAdapter(ALIGN_ROWS, ALIGN_COLUMNS, adapter.length) : null;
jpayne@68 1594 /** Alternate scoring for slow adapter alignment */
jpayne@68 1595 final MultiStateAligner9PacBioAdapter2 msa2=(alignAdapter || trimReads) ? new MultiStateAligner9PacBioAdapter2() : null;
jpayne@68 1596
jpayne@68 1597 final ZMWStreamer zstream;
jpayne@68 1598 final EntropyTracker eTracker;
jpayne@68 1599
jpayne@68 1600 final long[] adapterScoresT=new long[201];
jpayne@68 1601 final long[] repeatScoresT=new long[201];
jpayne@68 1602 final byte[] tipBufferLeft=new byte[adapterTipLen+adapterTipPad];
jpayne@68 1603 final byte[] tipBufferRight=new byte[adapterTipLen+adapterTipPad];
jpayne@68 1604 }
jpayne@68 1605
jpayne@68 1606 /*--------------------------------------------------------------*/
jpayne@68 1607 /*---------------- Fields ----------------*/
jpayne@68 1608 /*--------------------------------------------------------------*/
jpayne@68 1609
jpayne@68 1610 /** Primary input file path */
jpayne@68 1611 private String in1=null;
jpayne@68 1612
jpayne@68 1613 /** Primary output file path */
jpayne@68 1614 private String outg=null;
jpayne@68 1615 /** Ambiguous output file path */
jpayne@68 1616 private String outa=null;
jpayne@68 1617 /** Bad output file path */
jpayne@68 1618 private String outb=null;
jpayne@68 1619 /** Junction output file path */
jpayne@68 1620 private String outj=null;
jpayne@68 1621 /** Stats (screen) output file path */
jpayne@68 1622 private String outstats=null;
jpayne@68 1623
jpayne@68 1624 /** Adapter score ratio histogram */
jpayne@68 1625 private String asrhist=null;
jpayne@68 1626
jpayne@68 1627 /** Inverted repeat score ratio histogram */
jpayne@68 1628 private String irsrhist=null;
jpayne@68 1629
jpayne@68 1630 /** Override input file extension */
jpayne@68 1631 private String extin=null;
jpayne@68 1632 /** Override output file extension */
jpayne@68 1633 private String extout=null;
jpayne@68 1634
jpayne@68 1635 private int targetQlen=352;
jpayne@68 1636 private int minQlen=100;
jpayne@68 1637
jpayne@68 1638 /** Make a query at most this fraction of read length */
jpayne@68 1639 private float maxQlenFraction=0.15f;
jpayne@68 1640
jpayne@68 1641 /** Exit alignment early if score drops below this.
jpayne@68 1642 * An aggressive optimization that may miss some low-quality inverted repeats.
jpayne@68 1643 * -700 seems safe. */
jpayne@68 1644 private int minScore=-800;
jpayne@68 1645
jpayne@68 1646 /** Fraction of maximum alignment score to consider as matching for initial alignment */
jpayne@68 1647 private float minRatio1=0.59f;
jpayne@68 1648
jpayne@68 1649 /** Fraction of maximum alignment score to consider as matching for realignment */
jpayne@68 1650 private float minRatio2=0.64f;
jpayne@68 1651
jpayne@68 1652 private float adapterRatio=0.18f; //.18 for fa, or 0.57/0.6 for ica
jpayne@68 1653 private float adapterRatio2=0.325f; //0.31f normal, 0.325 timeless
jpayne@68 1654 private float suspectRatio=0.85f;
jpayne@68 1655 private boolean useLocality=true;
jpayne@68 1656 private boolean useAltMsa=true;
jpayne@68 1657
jpayne@68 1658
jpayne@68 1659 private float tipRatio=1.5f;
jpayne@68 1660
jpayne@68 1661 private float longReadMult=1.5f;
jpayne@68 1662
jpayne@68 1663 private float shortReadMult=1.5f;
jpayne@68 1664
jpayne@68 1665 private float veryShortMult=0.35f;
jpayne@68 1666
jpayne@68 1667 /** Short half of inverted repeat must be at least this fraction of read length to be classed as a triangle */
jpayne@68 1668 private float minJunctionFraction=0.4f;
jpayne@68 1669
jpayne@68 1670 /** Only filter triangle reads, not all inverted repeats */
jpayne@68 1671 private boolean filterIceCreamOnly=true;
jpayne@68 1672
jpayne@68 1673 /** Align again once the junction position is provisionally identified */
jpayne@68 1674 private boolean realign=true;
jpayne@68 1675
jpayne@68 1676 /** For internal read array transfers */
jpayne@68 1677 private int queuelen=80;
jpayne@68 1678
jpayne@68 1679 /** For grading synthetic data */
jpayne@68 1680 private boolean parseCustom;
jpayne@68 1681
jpayne@68 1682 /** Input reads are CCS (full pass) */
jpayne@68 1683 private boolean CCS;
jpayne@68 1684
jpayne@68 1685 private boolean modifyHeader=false;
jpayne@68 1686
jpayne@68 1687 private boolean sendAmbigToGood=true;
jpayne@68 1688 private boolean sendAmbigToBad=false;
jpayne@68 1689 private boolean setAmbig=false;
jpayne@68 1690
jpayne@68 1691 //Note: These flags are very similar... they need to be better-defined or merged.
jpayne@68 1692 private boolean keepZMWsTogether=false;
jpayne@68 1693 private boolean keepShortReads=true;
jpayne@68 1694
jpayne@68 1695 private int format=FORMAT_TEXT;
jpayne@68 1696 private static final int FORMAT_TEXT=1, FORMAT_JSON=2;
jpayne@68 1697
jpayne@68 1698 /*--------------------------------------------------------------*/
jpayne@68 1699
jpayne@68 1700 /** Alignment iterations for inverted repeat calculation with ref columns and query rows */
jpayne@68 1701 protected long alignmentIters=0;
jpayne@68 1702
jpayne@68 1703 /** Alignment iterations for inverted repeat calculation with query columns and ref rows */
jpayne@68 1704 protected long alignmentItersShort=0;
jpayne@68 1705
jpayne@68 1706 /** Time spent in long iterations */
jpayne@68 1707 protected long elapsed=0;
jpayne@68 1708
jpayne@68 1709 /** Time spent in short iterations */
jpayne@68 1710 protected long elapsedShort=0;
jpayne@68 1711
jpayne@68 1712 /** Print iteration time statistics */
jpayne@68 1713 protected boolean printTiming=false;
jpayne@68 1714
jpayne@68 1715 /** Number of reads processed */
jpayne@68 1716 protected long readsProcessed=0;
jpayne@68 1717 /** Number of bases processed */
jpayne@68 1718 protected long basesProcessed=0;
jpayne@68 1719
jpayne@68 1720 /** Number of reads retained */
jpayne@68 1721 protected long readsOut=0;
jpayne@68 1722 /** Number of bases retained */
jpayne@68 1723 protected long basesOut=0;
jpayne@68 1724 /** Number of junctions detected in IR reads */
jpayne@68 1725 protected long junctionsOut=0;
jpayne@68 1726
jpayne@68 1727 /** Quit after processing this many input reads; -1 means no limit */
jpayne@68 1728 private long maxReads=-1;
jpayne@68 1729
jpayne@68 1730 //Unused
jpayne@68 1731 protected long iceCreamReads=0;
jpayne@68 1732 protected long iceCreamBases=0;
jpayne@68 1733
jpayne@68 1734 /** ZMWs with discarded reads */
jpayne@68 1735 protected long iceCreamZMWs=0;
jpayne@68 1736
jpayne@68 1737 /** Sum of IR alignment ratios for IR reads not containing adapters */
jpayne@68 1738 protected double iceCreamRatio=0;
jpayne@68 1739
jpayne@68 1740 /** Number of ratios in iceCreamRatio */
jpayne@68 1741 protected long ratiosCounted=0;
jpayne@68 1742
jpayne@68 1743 /** Histogram */
jpayne@68 1744 protected final long[] adapterScores=new long[201];
jpayne@68 1745
jpayne@68 1746 /** Histogram */
jpayne@68 1747 protected final long[] repeatScores=new long[201];
jpayne@68 1748
jpayne@68 1749 /** ZMWs with a triangle read but no hidden adapters */
jpayne@68 1750 protected long missingAdapterZMWs=0;
jpayne@68 1751
jpayne@68 1752 /** ZMWs with hidden adapters */
jpayne@68 1753 protected long hiddenAdapterZMWs=0;
jpayne@68 1754
jpayne@68 1755 protected long basesTrimmed=0;
jpayne@68 1756 protected long readsTrimmed=0;
jpayne@68 1757
jpayne@68 1758 protected long lowEntropyZMWs=0;
jpayne@68 1759 protected long lowEntropyReads=0;
jpayne@68 1760
jpayne@68 1761 /** Total ZMWs observed */
jpayne@68 1762 protected long ZMWs=0;
jpayne@68 1763
jpayne@68 1764 protected long truePositiveReads=0;
jpayne@68 1765 protected long falsePositiveReads=0;
jpayne@68 1766 protected long trueNegativeReads=0;
jpayne@68 1767 protected long falseNegativeReads=0;
jpayne@68 1768 protected long ambiguousReads=0;
jpayne@68 1769
jpayne@68 1770 protected long truePositiveZMWs=0;
jpayne@68 1771 protected long falsePositiveZMWs=0;
jpayne@68 1772 protected long trueNegativeZMWs=0;
jpayne@68 1773 protected long falseNegativeZMWs=0;
jpayne@68 1774 protected long ambiguousZMWs=0;
jpayne@68 1775
jpayne@68 1776 protected final int stride;
jpayne@68 1777 protected final int window;
jpayne@68 1778 protected final int ALIGN_ROWS;
jpayne@68 1779 protected final int ALIGN_COLUMNS;
jpayne@68 1780
jpayne@68 1781 private boolean timeless=true;
jpayne@68 1782 protected final int maxSwScore;
jpayne@68 1783 protected final int minSwScore;
jpayne@68 1784 protected final int minSwScoreSuspect;
jpayne@68 1785 protected final int maxImperfectSwScore;
jpayne@68 1786 protected final int suspectMidpoint;
jpayne@68 1787 protected final int suspectDistance=100;
jpayne@68 1788 protected int npad=0; //This is for catching adapters on the tips, which are not very relevant to ice cream. Set to 35 for tip apdapters.
jpayne@68 1789
jpayne@68 1790 private byte[] adapter="ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT".getBytes(); //This one seems to be correct
jpayne@68 1791 // private byte[] adapter="ATCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAT".getBytes();
jpayne@68 1792 private boolean alignAdapter=true;
jpayne@68 1793 private boolean alignRC=true;
jpayne@68 1794 private boolean flagLongReads=true;
jpayne@68 1795 private boolean trimReads=true;
jpayne@68 1796 private int minLengthAfterTrimming=40;
jpayne@68 1797 private int adapterTipLen=100;
jpayne@68 1798 private int adapterTipPad=35;
jpayne@68 1799
jpayne@68 1800 boolean trimPolyA=false;
jpayne@68 1801
jpayne@68 1802 /*--------------------------------------------------------------*/
jpayne@68 1803 /*---------------- Entropy Fields ----------------*/
jpayne@68 1804 /*--------------------------------------------------------------*/
jpayne@68 1805
jpayne@68 1806 /** Minimum entropy to be considered "complex", on a scale of 0-1 */
jpayne@68 1807 float entropyCutoff=-1; //suggested 0.55
jpayne@68 1808 /** Minimum length of a low-entropy block to fail a read */
jpayne@68 1809 int entropyLength=350;
jpayne@68 1810 /** Minimum length of a low-entropy block as a fraction of read length;
jpayne@68 1811 * the minimum of the two will be used */
jpayne@68 1812 float entropyFraction=0.5f;
jpayne@68 1813
jpayne@68 1814 float maxMonomerFraction=0.74f; //Suggested... 0.74
jpayne@68 1815
jpayne@68 1816 /*--------------------------------------------------------------*/
jpayne@68 1817 /*---------------- Final Fields ----------------*/
jpayne@68 1818 /*--------------------------------------------------------------*/
jpayne@68 1819
jpayne@68 1820 /** Primary input file */
jpayne@68 1821 private final FileFormat ffin1;
jpayne@68 1822
jpayne@68 1823 /** Primary output file */
jpayne@68 1824 private final FileFormat ffoutg;
jpayne@68 1825
jpayne@68 1826 /** Ambiguous output file */
jpayne@68 1827 private final FileFormat ffouta;
jpayne@68 1828
jpayne@68 1829 /** Bad output file */
jpayne@68 1830 private final FileFormat ffoutb;
jpayne@68 1831
jpayne@68 1832 /** Junction output file */
jpayne@68 1833 private final FileFormat ffoutj;
jpayne@68 1834
jpayne@68 1835 /** Stats output file */
jpayne@68 1836 private final FileFormat ffstats;
jpayne@68 1837
jpayne@68 1838 /** Adapter score ratio histogram */
jpayne@68 1839 private final FileFormat ffasrhist;
jpayne@68 1840
jpayne@68 1841 /** Inverted repeat score ratio histogram */
jpayne@68 1842 private final FileFormat ffirsrhist;
jpayne@68 1843
jpayne@68 1844 private final int threads;
jpayne@68 1845
jpayne@68 1846 /*--------------------------------------------------------------*/
jpayne@68 1847 /*---------------- Common Fields ----------------*/
jpayne@68 1848 /*--------------------------------------------------------------*/
jpayne@68 1849
jpayne@68 1850 /** Print status messages to this output stream */
jpayne@68 1851 private PrintStream outstream=System.err;
jpayne@68 1852 /** Print verbose messages */
jpayne@68 1853 public static boolean verbose=false;
jpayne@68 1854 /** True if an error was encountered */
jpayne@68 1855 public boolean errorState=false;
jpayne@68 1856 /** Overwrite existing output files */
jpayne@68 1857 private boolean overwrite=false;
jpayne@68 1858 /** Append to existing output files */
jpayne@68 1859 private boolean append=false;
jpayne@68 1860
jpayne@68 1861 }