jpayne@68
|
1 package icecream;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.Arrays;
|
jpayne@68
|
6
|
jpayne@68
|
7 import aligner.AlignmentResult;
|
jpayne@68
|
8 import aligner.FlatAligner;
|
jpayne@68
|
9 import aligner.MultiStateAligner9PacBioAdapter2;
|
jpayne@68
|
10 import aligner.SingleStateAlignerPacBioAdapter;
|
jpayne@68
|
11 import dna.AminoAcid;
|
jpayne@68
|
12 import dna.Data;
|
jpayne@68
|
13 import fileIO.ByteFile;
|
jpayne@68
|
14 import fileIO.ByteStreamWriter;
|
jpayne@68
|
15 import fileIO.FileFormat;
|
jpayne@68
|
16 import fileIO.ReadWrite;
|
jpayne@68
|
17 import json.JsonObject;
|
jpayne@68
|
18 import shared.Parse;
|
jpayne@68
|
19 import shared.Parser;
|
jpayne@68
|
20 import shared.PreParser;
|
jpayne@68
|
21 import shared.ReadStats;
|
jpayne@68
|
22 import shared.Shared;
|
jpayne@68
|
23 import shared.Timer;
|
jpayne@68
|
24 import shared.Tools;
|
jpayne@68
|
25 import shared.TrimRead;
|
jpayne@68
|
26 import stream.ConcurrentReadOutputStream;
|
jpayne@68
|
27 import stream.FASTQ;
|
jpayne@68
|
28 import stream.FastaReadInputStream;
|
jpayne@68
|
29 import stream.Read;
|
jpayne@68
|
30 import stream.SamLine;
|
jpayne@68
|
31 import structures.ByteBuilder;
|
jpayne@68
|
32 import structures.EntropyTracker;
|
jpayne@68
|
33
|
jpayne@68
|
34 /**
|
jpayne@68
|
35 * Detects inverted repeats in PacBio reads.
|
jpayne@68
|
36 * Attempts to determine whether reads are chimeric
|
jpayne@68
|
37 * due to a missing adapter.
|
jpayne@68
|
38 *
|
jpayne@68
|
39 * @author Brian Bushnell
|
jpayne@68
|
40 * @date June 5, 2019
|
jpayne@68
|
41 *
|
jpayne@68
|
42 */
|
jpayne@68
|
43 public final class IceCreamFinder {
|
jpayne@68
|
44
|
jpayne@68
|
45 /*--------------------------------------------------------------*/
|
jpayne@68
|
46 /*---------------- Initialization ----------------*/
|
jpayne@68
|
47 /*--------------------------------------------------------------*/
|
jpayne@68
|
48
|
jpayne@68
|
49 /**
|
jpayne@68
|
50 * Code entrance from the command line.
|
jpayne@68
|
51 * @param args Command line arguments
|
jpayne@68
|
52 */
|
jpayne@68
|
53 public static void main(String[] args){
|
jpayne@68
|
54 //Start a timer immediately upon code entrance.
|
jpayne@68
|
55 Timer t=new Timer();
|
jpayne@68
|
56
|
jpayne@68
|
57 //Create an instance of this class
|
jpayne@68
|
58 IceCreamFinder x=new IceCreamFinder(args);
|
jpayne@68
|
59
|
jpayne@68
|
60 //Run the object
|
jpayne@68
|
61 x.process(t);
|
jpayne@68
|
62
|
jpayne@68
|
63 //Close the print stream if it was redirected
|
jpayne@68
|
64 Shared.closeStream(x.outstream);
|
jpayne@68
|
65 }
|
jpayne@68
|
66
|
jpayne@68
|
67 /**
|
jpayne@68
|
68 * Constructor.
|
jpayne@68
|
69 * @param args Command line arguments
|
jpayne@68
|
70 */
|
jpayne@68
|
71 public IceCreamFinder(String[] args){
|
jpayne@68
|
72
|
jpayne@68
|
73 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
74 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
75 args=pp.args;
|
jpayne@68
|
76 outstream=pp.outstream;
|
jpayne@68
|
77 }
|
jpayne@68
|
78
|
jpayne@68
|
79 Parser.setQuality(33);
|
jpayne@68
|
80
|
jpayne@68
|
81 //Set shared static variables prior to parsing
|
jpayne@68
|
82 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
83 ReadWrite.USE_BGZIP=ReadWrite.USE_UNBGZIP=ReadWrite.PREFER_BGZIP=true;
|
jpayne@68
|
84 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
85 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false;
|
jpayne@68
|
86 SamLine.SET_FROM_OK=true;
|
jpayne@68
|
87 Shared.setBufferData(1000000);
|
jpayne@68
|
88 // Shared.FASTA_WRAP=511;
|
jpayne@68
|
89 Data.USE_SAMBAMBA=false;//Sambamba changes PacBio headers.
|
jpayne@68
|
90 Read.CHANGE_QUALITY=false;
|
jpayne@68
|
91 EntropyTracker.defaultK=3;
|
jpayne@68
|
92
|
jpayne@68
|
93 {//Parse the arguments
|
jpayne@68
|
94 final Parser parser=parse(args);
|
jpayne@68
|
95 Parser.processQuality();
|
jpayne@68
|
96
|
jpayne@68
|
97 maxReads=parser.maxReads;
|
jpayne@68
|
98 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
99 append=ReadStats.append=parser.append;
|
jpayne@68
|
100
|
jpayne@68
|
101 in1=parser.in1;
|
jpayne@68
|
102 extin=parser.extin;
|
jpayne@68
|
103
|
jpayne@68
|
104 if(outg==null){outg=parser.out1;}
|
jpayne@68
|
105 extout=parser.extout;
|
jpayne@68
|
106 }
|
jpayne@68
|
107
|
jpayne@68
|
108 //Determine how many threads may be used
|
jpayne@68
|
109 threads=Shared.threads();
|
jpayne@68
|
110
|
jpayne@68
|
111 //Ensure there is an input file
|
jpayne@68
|
112 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
|
jpayne@68
|
113 fixExtensions(); //Add or remove .gz or .bz2 as needed
|
jpayne@68
|
114 checkFileExistence(); //Ensure files can be read and written
|
jpayne@68
|
115 checkStatics(); //Adjust file-related static fields as needed for this program
|
jpayne@68
|
116
|
jpayne@68
|
117 //Create output FileFormat objects
|
jpayne@68
|
118 ffoutg=FileFormat.testOutput(outg, FileFormat.FASTQ, extout, true, overwrite, append, false);
|
jpayne@68
|
119 ffouta=FileFormat.testOutput(outa, FileFormat.FASTQ, extout, true, overwrite, append, false);
|
jpayne@68
|
120 ffoutb=FileFormat.testOutput(outb, FileFormat.FASTQ, extout, true, overwrite, append, false);
|
jpayne@68
|
121 ffoutj=FileFormat.testOutput(outj, FileFormat.FASTA, extout, true, overwrite, append, false);
|
jpayne@68
|
122 ffstats=FileFormat.testOutput(outstats, FileFormat.TXT, null, false, overwrite, append, false);
|
jpayne@68
|
123 ffasrhist=FileFormat.testOutput(asrhist, FileFormat.TXT, null, false, overwrite, append, false);
|
jpayne@68
|
124 ffirsrhist=FileFormat.testOutput(irsrhist, FileFormat.TXT, null, false, overwrite, append, false);
|
jpayne@68
|
125
|
jpayne@68
|
126 if(!setAmbig && ffouta!=null){
|
jpayne@68
|
127 sendAmbigToGood=sendAmbigToBad=false;
|
jpayne@68
|
128 }
|
jpayne@68
|
129
|
jpayne@68
|
130 //Create input FileFormat objects
|
jpayne@68
|
131 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, extin, true, true);
|
jpayne@68
|
132
|
jpayne@68
|
133 final int alen=(adapter==null ? 20 : adapter.length);
|
jpayne@68
|
134 stride=(int)(alen*1.9f);
|
jpayne@68
|
135 window=(int)(alen*3.8f+10);
|
jpayne@68
|
136 ALIGN_ROWS=alen+1;
|
jpayne@68
|
137 ALIGN_COLUMNS=window+2;
|
jpayne@68
|
138
|
jpayne@68
|
139 maxSwScore=aligner.MultiStateAligner9PacBioAdapter.maxQuality(alen);
|
jpayne@68
|
140 minSwScore=(int)(maxSwScore*adapterRatio2);
|
jpayne@68
|
141 minSwScoreSuspect=(int)(maxSwScore*Tools.min(adapterRatio2*suspectRatio, adapterRatio2-((1-suspectRatio)*.2f)));
|
jpayne@68
|
142 maxImperfectSwScore=aligner.MultiStateAligner9PacBioAdapter.maxImperfectScore(alen);
|
jpayne@68
|
143 suspectMidpoint=(minSwScoreSuspect+minSwScore)/2;
|
jpayne@68
|
144 if(adapter==null){alignAdapter=false;}
|
jpayne@68
|
145 }
|
jpayne@68
|
146
|
jpayne@68
|
147 /*--------------------------------------------------------------*/
|
jpayne@68
|
148 /*---------------- Initialization Helpers ----------------*/
|
jpayne@68
|
149 /*--------------------------------------------------------------*/
|
jpayne@68
|
150
|
jpayne@68
|
151 /** Parse arguments from the command line */
|
jpayne@68
|
152 private Parser parse(String[] args){
|
jpayne@68
|
153
|
jpayne@68
|
154 //Create a parser object
|
jpayne@68
|
155 Parser parser=new Parser();
|
jpayne@68
|
156
|
jpayne@68
|
157 //Set any necessary Parser defaults here
|
jpayne@68
|
158 //parser.foo=bar;
|
jpayne@68
|
159
|
jpayne@68
|
160 //Parse each argument
|
jpayne@68
|
161 for(int i=0; i<args.length; i++){
|
jpayne@68
|
162 String arg=args[i];
|
jpayne@68
|
163
|
jpayne@68
|
164 //Break arguments into their constituent parts, in the form of "a=b"
|
jpayne@68
|
165 String[] split=arg.split("=");
|
jpayne@68
|
166 String a=split[0].toLowerCase();
|
jpayne@68
|
167 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
168 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
|
jpayne@68
|
169
|
jpayne@68
|
170 if(a.equals("verbose")){
|
jpayne@68
|
171 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
172 }else if(a.equals("format")){
|
jpayne@68
|
173 if(b==null){
|
jpayne@68
|
174 assert(false) : arg;
|
jpayne@68
|
175 }else if(Tools.isDigit(b.charAt(i))){
|
jpayne@68
|
176 format=Integer.parseInt(b);
|
jpayne@68
|
177 }else if(b.equalsIgnoreCase("json")){
|
jpayne@68
|
178 format=FORMAT_JSON;
|
jpayne@68
|
179 }else if(b.equalsIgnoreCase("text")){
|
jpayne@68
|
180 format=FORMAT_TEXT;
|
jpayne@68
|
181 }else{
|
jpayne@68
|
182 assert(false) : arg;
|
jpayne@68
|
183 }
|
jpayne@68
|
184 assert(format>=1 && format<=2) : arg;
|
jpayne@68
|
185 }else if(a.equals("json")){
|
jpayne@68
|
186 boolean x=Parse.parseBoolean(b);
|
jpayne@68
|
187 format=(x ? FORMAT_JSON : FORMAT_TEXT);
|
jpayne@68
|
188 }else if(a.equals("ss") || a.equals("samstreamer") || a.equals("streamer")){
|
jpayne@68
|
189 if(b!=null && Tools.isDigit(b.charAt(0))){
|
jpayne@68
|
190 ZMWStreamer.useStreamer=true;
|
jpayne@68
|
191 assert(Integer.parseInt(b)==1) : "ZMWStreamer threads currently capped at 1.";
|
jpayne@68
|
192 // ZMWStreamer.streamerThreads=Tools.max(1, Integer.parseInt(b));
|
jpayne@68
|
193 }else{
|
jpayne@68
|
194 ZMWStreamer.useStreamer=Parse.parseBoolean(b);
|
jpayne@68
|
195 }
|
jpayne@68
|
196 }else if(a.equals("icecreamonly") || a.equals("ico")){
|
jpayne@68
|
197 filterIceCreamOnly=Parse.parseBoolean(b);
|
jpayne@68
|
198 }else if(a.equals("keepshortreads") || a.equals("ksr")){
|
jpayne@68
|
199 keepShortReads=Parse.parseBoolean(b);
|
jpayne@68
|
200 }else if(a.equalsIgnoreCase("keepzmwstogether") || a.equals("kzt") || a.equals("keepreadstogether") || a.equals("krt")){
|
jpayne@68
|
201 keepZMWsTogether=Parse.parseBoolean(b);
|
jpayne@68
|
202 }else if(a.equalsIgnoreCase("samplerate")){
|
jpayne@68
|
203 float f=Float.parseFloat(b);
|
jpayne@68
|
204 assert(false) : "TODO"; //TODO
|
jpayne@68
|
205 }else if(a.equals("modifyheader") || a.equals("modifyheaders") || a.equals("changeheader") || a.equals("changeheaders")){
|
jpayne@68
|
206 modifyHeader=Parse.parseBoolean(b);
|
jpayne@68
|
207 }else if(a.equalsIgnoreCase("ccs")){
|
jpayne@68
|
208 CCS=Parse.parseBoolean(b);
|
jpayne@68
|
209 }else if(a.equals("npad")){
|
jpayne@68
|
210 npad=Integer.parseInt(b);
|
jpayne@68
|
211 }else if(a.equals("minlength") || a.equals("minlen")){
|
jpayne@68
|
212 minLengthAfterTrimming=Integer.parseInt(b);
|
jpayne@68
|
213 }else if(a.equals("realign")){
|
jpayne@68
|
214 realign=Parse.parseBoolean(b);
|
jpayne@68
|
215 }else if(a.equals("aligninverse") || a.equals("alignrc") || a.equals("findicecream")){
|
jpayne@68
|
216 alignRC=Parse.parseBoolean(b);
|
jpayne@68
|
217 }else if(a.equals("alignadapter")){
|
jpayne@68
|
218 alignAdapter=Parse.parseBoolean(b);
|
jpayne@68
|
219 }else if(a.equals("timeless")){
|
jpayne@68
|
220 timeless=Parse.parseBoolean(b);
|
jpayne@68
|
221 }else if(a.equals("flaglongreads")){
|
jpayne@68
|
222 flagLongReads=Parse.parseBoolean(b);
|
jpayne@68
|
223 }else if(a.equals("trimreads") || a.equals("trim")){
|
jpayne@68
|
224 trimReads=Parse.parseBoolean(b);
|
jpayne@68
|
225 }else if(a.equals("adapter")){
|
jpayne@68
|
226 adapter=b==null ? null : b.getBytes();
|
jpayne@68
|
227 }else if(a.equals("targetqlen") || a.equals("qlen")){
|
jpayne@68
|
228 targetQlen=Integer.parseInt(b);
|
jpayne@68
|
229 }else if(a.equals("maxqlenfraction") || a.equals("maxfraction") || a.equals("qlenfraction")){
|
jpayne@68
|
230 maxQlenFraction=Float.parseFloat(b);
|
jpayne@68
|
231 }else if(a.equals("junctionfraction") || a.equals("shortfraction")){
|
jpayne@68
|
232 minJunctionFraction=Float.parseFloat(b);
|
jpayne@68
|
233 }else if(a.equals("minratio1") || a.equals("ratio1") || a.equals("id1")){
|
jpayne@68
|
234 minRatio1=Float.parseFloat(b);
|
jpayne@68
|
235 }else if(a.equals("minratio2") || a.equals("ratio2") || a.equals("id2")){
|
jpayne@68
|
236 minRatio2=Float.parseFloat(b);
|
jpayne@68
|
237 }else if(a.equals("minratio") || a.equals("ratio") || a.equals("id")){
|
jpayne@68
|
238 minRatio1=minRatio2=Float.parseFloat(b);
|
jpayne@68
|
239 }else if(a.equals("adapterratio") || a.equals("adapterratio1") || a.equals("ratior") || a.equals("ratior1")){
|
jpayne@68
|
240 adapterRatio=Float.parseFloat(b);
|
jpayne@68
|
241 }else if(a.equals("adapterratio2") || a.equals("ratior2")){
|
jpayne@68
|
242 adapterRatio2=Float.parseFloat(b);
|
jpayne@68
|
243 }else if(a.equals("suspectratio")){
|
jpayne@68
|
244 suspectRatio=Float.parseFloat(b);
|
jpayne@68
|
245 }else if(a.equals("minqlen")){
|
jpayne@68
|
246 minQlen=Integer.parseInt(b);
|
jpayne@68
|
247 }else if(a.equals("minscore")){
|
jpayne@68
|
248 minScore=Integer.parseInt(b);
|
jpayne@68
|
249 }else if(a.equals("parsecustom")){
|
jpayne@68
|
250 parseCustom=Parse.parseBoolean(b);
|
jpayne@68
|
251 }else if(a.equals("printtiming") || a.equals("extended")){
|
jpayne@68
|
252 printTiming=Parse.parseBoolean(b);
|
jpayne@68
|
253 }else if(a.equals("queuelen") || a.equals("qlen")){
|
jpayne@68
|
254 queuelen=Integer.parseInt(b);
|
jpayne@68
|
255 }else if(a.equals("outg") || a.equals("outgood")){
|
jpayne@68
|
256 outg=b;
|
jpayne@68
|
257 }else if(a.equals("outa") || a.equals("outambig")){
|
jpayne@68
|
258 outa=b;
|
jpayne@68
|
259 }else if(a.equals("outb") || a.equals("outbad")){
|
jpayne@68
|
260 outb=b;
|
jpayne@68
|
261 }else if(a.equals("outj") || a.equals("outjunctions") || a.equals("junctions")){
|
jpayne@68
|
262 outj=b;
|
jpayne@68
|
263 }else if(a.equals("outs") || a.equals("outstats") || a.equals("stats")){
|
jpayne@68
|
264 outstats=b;
|
jpayne@68
|
265 }else if(a.equals("asrhist") || a.equals("ahist")){
|
jpayne@68
|
266 asrhist=b;
|
jpayne@68
|
267 }else if(a.equals("irsrhist") || a.equals("irhist")){
|
jpayne@68
|
268 irsrhist=b;
|
jpayne@68
|
269 }else if(a.equals("ambig")){
|
jpayne@68
|
270 sendAmbigToGood=sendAmbigToBad=false;
|
jpayne@68
|
271 if(b!=null){
|
jpayne@68
|
272 String[] split2=b.split(",");
|
jpayne@68
|
273 for(String s2 : split2){
|
jpayne@68
|
274 if(s2.equalsIgnoreCase("good")){sendAmbigToGood=true;}
|
jpayne@68
|
275 else if(s2.equalsIgnoreCase("bad") || s2.equalsIgnoreCase("toss")){sendAmbigToBad=true;}
|
jpayne@68
|
276 else if(s2.equalsIgnoreCase("ambig")){}
|
jpayne@68
|
277 else{assert(false) : "Bad argument: '"+s2+"' in '"+arg+"'; should be good or bad";}
|
jpayne@68
|
278 }
|
jpayne@68
|
279 }
|
jpayne@68
|
280 setAmbig=true;
|
jpayne@68
|
281 }else if(a.equalsIgnoreCase("trimpolya")){//Parse standard flags in the parser
|
jpayne@68
|
282 trimPolyA=Parse.parseBoolean(b);
|
jpayne@68
|
283 }else if(PolymerTrimmer.parse(arg, a, b)){
|
jpayne@68
|
284 //do nothing
|
jpayne@68
|
285 }else if(a.equals("minentropy") || a.equals("entropy") || a.equals("entropyfilter") || a.equals("efilter")){
|
jpayne@68
|
286 if(b==null || Character.isLetter(b.charAt(0))){
|
jpayne@68
|
287 if(Parse.parseBoolean(b)){
|
jpayne@68
|
288 entropyCutoff=0.55f;
|
jpayne@68
|
289 }else{
|
jpayne@68
|
290 entropyCutoff=-1;
|
jpayne@68
|
291 }
|
jpayne@68
|
292 }else{
|
jpayne@68
|
293 entropyCutoff=Float.parseFloat(b);
|
jpayne@68
|
294 }
|
jpayne@68
|
295 }else if(a.equals("entropyblock") || a.equals("entropylength") || a.equals("entropylen") || a.equals("entlen")){
|
jpayne@68
|
296 entropyLength=Parse.parseIntKMG(b);
|
jpayne@68
|
297 }else if(a.equals("entropyfraction") || a.equals("entfraction")){
|
jpayne@68
|
298 entropyFraction=Float.parseFloat(b);
|
jpayne@68
|
299 }else if(a.equals("monomerfraction") || a.equals("maxmonomerfraction") || a.equals("mmf")){
|
jpayne@68
|
300 maxMonomerFraction=Float.parseFloat(b);
|
jpayne@68
|
301 }else if(a.equals("parse_flag_goes_here")){
|
jpayne@68
|
302 long fake_variable=Parse.parseKMG(b);
|
jpayne@68
|
303 //Set a variable here
|
jpayne@68
|
304 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
|
jpayne@68
|
305 //do nothing
|
jpayne@68
|
306 }else{
|
jpayne@68
|
307 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
308 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
309 }
|
jpayne@68
|
310 }
|
jpayne@68
|
311
|
jpayne@68
|
312 return parser;
|
jpayne@68
|
313 }
|
jpayne@68
|
314
|
jpayne@68
|
315 /** Add or remove .gz or .bz2 as needed */
|
jpayne@68
|
316 private void fixExtensions(){
|
jpayne@68
|
317 in1=Tools.fixExtension(in1);
|
jpayne@68
|
318 }
|
jpayne@68
|
319
|
jpayne@68
|
320 /** Ensure files can be read and written */
|
jpayne@68
|
321 private void checkFileExistence(){
|
jpayne@68
|
322
|
jpayne@68
|
323 //Ensure output files can be written
|
jpayne@68
|
324 if(!Tools.testOutputFiles(overwrite, append, false, outg, outa, outb, outj, outstats, asrhist, irsrhist)){
|
jpayne@68
|
325 outstream.println((outg==null)+", "+(outb==null)+", "+outg+", "+outa+", "+outb+", "+outj+", "+outstats);
|
jpayne@68
|
326 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+outg+", "+outa+", "+outb+", "+outj+"\n");
|
jpayne@68
|
327 }
|
jpayne@68
|
328
|
jpayne@68
|
329 //Ensure input files can be read
|
jpayne@68
|
330 if(!Tools.testInputFiles(false, true, in1)){
|
jpayne@68
|
331 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
332 }
|
jpayne@68
|
333
|
jpayne@68
|
334 //Ensure that no file was specified multiple times
|
jpayne@68
|
335 if(!Tools.testForDuplicateFiles(true, in1, outg, outa, outb, outj, outstats, asrhist, irsrhist)){
|
jpayne@68
|
336 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
337 }
|
jpayne@68
|
338 }
|
jpayne@68
|
339
|
jpayne@68
|
340 /** Adjust file-related static fields as needed for this program */
|
jpayne@68
|
341 private static void checkStatics(){
|
jpayne@68
|
342 //Adjust the number of threads for input file reading
|
jpayne@68
|
343 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
344 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
345 }
|
jpayne@68
|
346
|
jpayne@68
|
347 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
348 }
|
jpayne@68
|
349
|
jpayne@68
|
350 /*--------------------------------------------------------------*/
|
jpayne@68
|
351 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
352 /*--------------------------------------------------------------*/
|
jpayne@68
|
353
|
jpayne@68
|
354 /** Create read streams and process all data */
|
jpayne@68
|
355 void process(Timer t){
|
jpayne@68
|
356
|
jpayne@68
|
357 //Turn off read validation in the input threads to increase speed
|
jpayne@68
|
358 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
|
jpayne@68
|
359 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
|
jpayne@68
|
360
|
jpayne@68
|
361 //Create a read input stream
|
jpayne@68
|
362 ZMWStreamer zstream=new ZMWStreamer(ffin1, Shared.threads(), maxReads, -1);
|
jpayne@68
|
363
|
jpayne@68
|
364 //Optionally create read output streams
|
jpayne@68
|
365 final ConcurrentReadOutputStream rosg=makeCros(ffoutg);
|
jpayne@68
|
366 final ConcurrentReadOutputStream rosa=makeCros(ffouta);
|
jpayne@68
|
367 final ConcurrentReadOutputStream rosb=makeCros(ffoutb);
|
jpayne@68
|
368 final ConcurrentReadOutputStream rosj=makeCros(ffoutj);
|
jpayne@68
|
369
|
jpayne@68
|
370 //Reset counters
|
jpayne@68
|
371 readsProcessed=readsOut=0;
|
jpayne@68
|
372 basesProcessed=basesOut=0;
|
jpayne@68
|
373 junctionsOut=0;
|
jpayne@68
|
374
|
jpayne@68
|
375 //Process the reads in separate threads
|
jpayne@68
|
376 spawnThreads(zstream, rosg, rosa, rosb, rosj);
|
jpayne@68
|
377
|
jpayne@68
|
378 if(verbose){outstream.println("Finished; closing streams.");}
|
jpayne@68
|
379
|
jpayne@68
|
380 //Write anything that was accumulated by ReadStats
|
jpayne@68
|
381 errorState|=ReadStats.writeAll();
|
jpayne@68
|
382 //Close the read streams
|
jpayne@68
|
383 errorState|=ReadWrite.closeStreams(null, rosg, rosa, rosb, rosj);
|
jpayne@68
|
384
|
jpayne@68
|
385 //Reset read validation
|
jpayne@68
|
386 Read.VALIDATE_IN_CONSTRUCTOR=vic;
|
jpayne@68
|
387
|
jpayne@68
|
388 writeScoreRatioHistogram(ffasrhist, adapterScores);
|
jpayne@68
|
389 writeScoreRatioHistogram(ffirsrhist, repeatScores);
|
jpayne@68
|
390
|
jpayne@68
|
391 //Report timing and results
|
jpayne@68
|
392 t.stop();
|
jpayne@68
|
393
|
jpayne@68
|
394 String stats=null;
|
jpayne@68
|
395 if(format==FORMAT_TEXT){
|
jpayne@68
|
396 ByteBuilder bb=toText(t);
|
jpayne@68
|
397 stats=bb.nl().toString();
|
jpayne@68
|
398 }else if(format==FORMAT_JSON){
|
jpayne@68
|
399 JsonObject jo=toJson(t);
|
jpayne@68
|
400 stats=jo.toStringln();
|
jpayne@68
|
401 }else{
|
jpayne@68
|
402 assert(false) : "Bad format: "+format;
|
jpayne@68
|
403 }
|
jpayne@68
|
404 if(ffstats==null){
|
jpayne@68
|
405 outstream.print(stats);
|
jpayne@68
|
406 }else{
|
jpayne@68
|
407 ReadWrite.writeString(stats, outstats);
|
jpayne@68
|
408 }
|
jpayne@68
|
409
|
jpayne@68
|
410 //Throw an exception of there was an error in a thread
|
jpayne@68
|
411 if(errorState){
|
jpayne@68
|
412 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
413 }
|
jpayne@68
|
414 }
|
jpayne@68
|
415
|
jpayne@68
|
416 private ByteBuilder toText(Timer t){
|
jpayne@68
|
417 ByteBuilder bb=new ByteBuilder();
|
jpayne@68
|
418
|
jpayne@68
|
419 bb.appendln(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
|
jpayne@68
|
420 bb.appendln(Tools.readsBasesOut(readsProcessed, basesProcessed, readsOut, basesOut, 8, false));
|
jpayne@68
|
421
|
jpayne@68
|
422 long readsFiltered=readsProcessed-readsOut;
|
jpayne@68
|
423 bb.appendln(Tools.numberPercent("Reads Filtered:", readsFiltered, readsFiltered*100.0/(readsProcessed), 3, 8));
|
jpayne@68
|
424 if(trimReads || trimPolyA){
|
jpayne@68
|
425 bb.appendln(Tools.numberPercent("Reads Trimmed:", readsTrimmed, readsTrimmed*100.0/(readsProcessed), 3, 8));
|
jpayne@68
|
426 bb.appendln(Tools.numberPercent("Bases Trimmed:", basesTrimmed, basesTrimmed*100.0/(basesProcessed), 3, 8));
|
jpayne@68
|
427 }
|
jpayne@68
|
428 bb.appendln(Tools.number("Total ZMWs:", ZMWs, 8));
|
jpayne@68
|
429 bb.appendln(Tools.numberPercent("Bad ZMWs:", iceCreamZMWs, iceCreamZMWs*100.0/(ZMWs), 3, 8));
|
jpayne@68
|
430 bb.appendln(Tools.numberPercent("Absent Adapter:", missingAdapterZMWs, missingAdapterZMWs*100.0/(ZMWs), 3, 8));
|
jpayne@68
|
431 bb.appendln(Tools.numberPercent("Hidden Adapter:", hiddenAdapterZMWs, hiddenAdapterZMWs*100.0/(ZMWs), 3, 8));
|
jpayne@68
|
432 bb.appendln(Tools.numberPercent("Ambiguous IR:", ambiguousZMWs, ambiguousZMWs*100.0/(ZMWs), 3, 8));
|
jpayne@68
|
433 // bb.appendln(Tools.numberPercent("Low Entropy:", lowEntropyReads, lowEntropyReads*100.0/(readsProcessed), 3, 8));
|
jpayne@68
|
434 bb.appendln(Tools.numberPercent("Low Entropy:", lowEntropyZMWs, lowEntropyZMWs*100.0/(ZMWs), 3, 8));
|
jpayne@68
|
435
|
jpayne@68
|
436 bb.appendln(Tools.number("Avg Score Ratio:", iceCreamRatio/ratiosCounted, 3, 8));
|
jpayne@68
|
437 bb.appendln(Tools.number("Score Cutoff:", minRatio2, 3, 8));
|
jpayne@68
|
438
|
jpayne@68
|
439 if(printTiming){
|
jpayne@68
|
440 bb.appendln("Iterations: "+alignmentIters/1000000+"m");
|
jpayne@68
|
441 bb.appendln("Short Iterations: "+alignmentItersShort/1000000+"m");
|
jpayne@68
|
442 bb.appendln("Elapsed: "+elapsed/1000000+"ms");
|
jpayne@68
|
443 bb.appendln("Elapsed Short: "+elapsedShort/1000000+"ms");
|
jpayne@68
|
444 }
|
jpayne@68
|
445
|
jpayne@68
|
446 if(parseCustom){
|
jpayne@68
|
447 {
|
jpayne@68
|
448 bb.appendln("\nReads:");
|
jpayne@68
|
449 bb.appendln(Tools.numberPercent("True Positive:", truePositiveReads, truePositiveReads*100.0/(readsProcessed), 2, 8));
|
jpayne@68
|
450 bb.appendln(Tools.numberPercent("True Negative:", trueNegativeReads, trueNegativeReads*100.0/(readsProcessed), 2, 8));
|
jpayne@68
|
451 bb.appendln(Tools.numberPercent("False Positive:", falsePositiveReads, falsePositiveReads*100.0/(readsProcessed), 2, 8));
|
jpayne@68
|
452 bb.appendln(Tools.numberPercent("False Negative:", falseNegativeReads, falseNegativeReads*100.0/(readsProcessed), 2, 8));
|
jpayne@68
|
453 bb.appendln(Tools.numberPercent("Ambiguous:", ambiguousReads, ambiguousReads*100.0/(readsProcessed), 2, 8));
|
jpayne@68
|
454
|
jpayne@68
|
455 double snr=(truePositiveReads+trueNegativeReads+ambiguousReads)/Tools.max(1, falsePositiveReads+falseNegativeReads+ambiguousReads);
|
jpayne@68
|
456 snr=10*Math.log10(snr);
|
jpayne@68
|
457 bb.appendln(Tools.number("SNR:", snr, 2, 8));
|
jpayne@68
|
458 }
|
jpayne@68
|
459 {
|
jpayne@68
|
460 bb.appendln("\nZMWs:");
|
jpayne@68
|
461 bb.appendln(Tools.numberPercent("True Positive:", truePositiveZMWs, truePositiveZMWs*100.0/(ZMWs), 2, 8));
|
jpayne@68
|
462 bb.appendln(Tools.numberPercent("True Negative:", trueNegativeZMWs, trueNegativeZMWs*100.0/(ZMWs), 2, 8));
|
jpayne@68
|
463 bb.appendln(Tools.numberPercent("False Positive:", falsePositiveZMWs, falsePositiveZMWs*100.0/(ZMWs), 2, 8));
|
jpayne@68
|
464 bb.appendln(Tools.numberPercent("False Negative:", falseNegativeZMWs, falseNegativeZMWs*100.0/(ZMWs), 2, 8));
|
jpayne@68
|
465 bb.appendln(Tools.numberPercent("Ambiguous:", ambiguousZMWs, ambiguousZMWs*100.0/(ZMWs), 2, 8));
|
jpayne@68
|
466
|
jpayne@68
|
467 double snr=(truePositiveZMWs+trueNegativeZMWs+ambiguousReads)/Tools.max(1, falsePositiveZMWs+falseNegativeZMWs+ambiguousZMWs);
|
jpayne@68
|
468 snr=10*Math.log10(snr);
|
jpayne@68
|
469 bb.appendln(Tools.number("SNR:", snr, 2, 8));
|
jpayne@68
|
470 }
|
jpayne@68
|
471 }
|
jpayne@68
|
472 return bb;
|
jpayne@68
|
473 }
|
jpayne@68
|
474
|
jpayne@68
|
475 private JsonObject toJson(Timer t){
|
jpayne@68
|
476 JsonObject jo=new JsonObject();
|
jpayne@68
|
477 long readsFiltered=readsProcessed-readsOut;
|
jpayne@68
|
478
|
jpayne@68
|
479 jo.add("Time", t.timeInSeconds());
|
jpayne@68
|
480 jo.add("Reads_Processed", readsProcessed);
|
jpayne@68
|
481 jo.add("Bases_Processed", basesProcessed);
|
jpayne@68
|
482 jo.add("Reads_Out", readsOut);
|
jpayne@68
|
483 jo.add("Bases_Out", basesOut);
|
jpayne@68
|
484 jo.add("Reads_Filtered", readsFiltered);
|
jpayne@68
|
485 jo.add("Reads_Filtered_Pct", readsFiltered*100.0/(readsProcessed));
|
jpayne@68
|
486 if(trimReads){
|
jpayne@68
|
487 jo.add("Reads_Trimmed", readsTrimmed);
|
jpayne@68
|
488 jo.add("Reads_Trimmed_Pct", readsTrimmed*100.0/(readsProcessed));
|
jpayne@68
|
489 jo.add("Bases_Trimmed", basesTrimmed);
|
jpayne@68
|
490 jo.add("Bases_Trimmed_Pct", basesTrimmed*100.0/(basesProcessed));
|
jpayne@68
|
491 }
|
jpayne@68
|
492 jo.add("Total_ZMWs", ZMWs);
|
jpayne@68
|
493 jo.add("Bad_ZMWs", iceCreamZMWs);
|
jpayne@68
|
494 jo.add("Bad_ZMWs_Pct", iceCreamZMWs*100.0/(ZMWs));
|
jpayne@68
|
495 jo.add("Absent_Adapter", missingAdapterZMWs);
|
jpayne@68
|
496 jo.add("Absent_Adapter_Pct", missingAdapterZMWs*100.0/(ZMWs));
|
jpayne@68
|
497 jo.add("Hidden_Adapter", hiddenAdapterZMWs);
|
jpayne@68
|
498 jo.add("Hidden_Adapter_Pct", hiddenAdapterZMWs*100.0/(ZMWs));
|
jpayne@68
|
499 // jo.add("Low_Entropy", lowEntropyReads);
|
jpayne@68
|
500 // jo.add("Low_Entropy_Pct", lowEntropyReads*100.0/(readsProcessed));
|
jpayne@68
|
501 jo.add("Low_Entropy", lowEntropyZMWs);
|
jpayne@68
|
502 jo.add("Low_Entropy_Pct", lowEntropyZMWs*100.0/(ZMWs));
|
jpayne@68
|
503 jo.add("Ambiguous_Inverted_Repeat", ambiguousZMWs);
|
jpayne@68
|
504 jo.add("Ambiguous_Inverted_Repeat_Pct", ambiguousZMWs*100.0/(ZMWs));
|
jpayne@68
|
505 jo.add("Avg_Score_Ratio_IR", iceCreamRatio/ratiosCounted);
|
jpayne@68
|
506 jo.add("Score_Cutoff_IR", minRatio2);
|
jpayne@68
|
507
|
jpayne@68
|
508 jo.add("Alignment_Iterations_IR", alignmentIters);
|
jpayne@68
|
509 jo.add("Short_Alignment_Iterations_IR", alignmentItersShort);
|
jpayne@68
|
510
|
jpayne@68
|
511 if(parseCustom){
|
jpayne@68
|
512 {
|
jpayne@68
|
513 double snr=(truePositiveReads+trueNegativeReads+ambiguousReads)/Tools.max(1, falsePositiveReads+falseNegativeReads+ambiguousReads);
|
jpayne@68
|
514 snr=10*Math.log10(snr);
|
jpayne@68
|
515 jo.add("TP_Reads", truePositiveReads);
|
jpayne@68
|
516 jo.add("TN_Reads", trueNegativeReads);
|
jpayne@68
|
517 jo.add("FP_Reads", falsePositiveReads);
|
jpayne@68
|
518 jo.add("FN_Reads", falseNegativeReads);
|
jpayne@68
|
519 jo.add("AM_Reads", ambiguousReads);
|
jpayne@68
|
520
|
jpayne@68
|
521 jo.add("TP_Reads_Pct", truePositiveReads*100.0/(readsProcessed));
|
jpayne@68
|
522 jo.add("TN_Reads_Pct", trueNegativeReads*100.0/(readsProcessed));
|
jpayne@68
|
523 jo.add("FP_Reads_Pct", falsePositiveReads*100.0/(readsProcessed));
|
jpayne@68
|
524 jo.add("FN_Reads_Pct", falseNegativeReads*100.0/(readsProcessed));
|
jpayne@68
|
525 jo.add("AM_Reads_Pct", ambiguousReads*100.0/(readsProcessed));
|
jpayne@68
|
526
|
jpayne@68
|
527 jo.add("SNR_Reads", snr);
|
jpayne@68
|
528 }
|
jpayne@68
|
529 {
|
jpayne@68
|
530 double snr=(truePositiveZMWs+trueNegativeZMWs+ambiguousZMWs)/Tools.max(1, falsePositiveZMWs+falseNegativeZMWs+ambiguousZMWs);
|
jpayne@68
|
531 snr=10*Math.log10(snr);
|
jpayne@68
|
532 jo.add("TP_ZMWs", truePositiveZMWs);
|
jpayne@68
|
533 jo.add("TN_ZMWs", trueNegativeZMWs);
|
jpayne@68
|
534 jo.add("FP_ZMWs", falsePositiveZMWs);
|
jpayne@68
|
535 jo.add("FN_ZMWs", falseNegativeZMWs);
|
jpayne@68
|
536 jo.add("AM_ZMWs", ambiguousZMWs);
|
jpayne@68
|
537
|
jpayne@68
|
538 jo.add("TP_ZMWs_Pct", truePositiveZMWs*100.0/(ZMWs));
|
jpayne@68
|
539 jo.add("TN_ZMWs_Pct", trueNegativeZMWs*100.0/(ZMWs));
|
jpayne@68
|
540 jo.add("FP_ZMWs_Pct", falsePositiveZMWs*100.0/(ZMWs));
|
jpayne@68
|
541 jo.add("FN_ZMWs_Pct", falseNegativeZMWs*100.0/(ZMWs));
|
jpayne@68
|
542 jo.add("AM_ZMWs_Pct", ambiguousZMWs*100.0/(ZMWs));
|
jpayne@68
|
543
|
jpayne@68
|
544 jo.add("SNR_ZMWs", snr);
|
jpayne@68
|
545 }
|
jpayne@68
|
546 }
|
jpayne@68
|
547 return jo;
|
jpayne@68
|
548 }
|
jpayne@68
|
549
|
jpayne@68
|
550 private static void writeScoreRatioHistogram(FileFormat ff, long[] hist){
|
jpayne@68
|
551 if(ff==null){return;}
|
jpayne@68
|
552 final ByteStreamWriter bsw=new ByteStreamWriter(ff);
|
jpayne@68
|
553 bsw.start();
|
jpayne@68
|
554 final float mult=1.0f/(hist.length-1);
|
jpayne@68
|
555
|
jpayne@68
|
556 bsw.print("#Counted\t").println(Tools.sum(hist));
|
jpayne@68
|
557 bsw.print("#Mean\t").println(Tools.averageHistogram(hist)*mult, 3);
|
jpayne@68
|
558 bsw.print("#Median\t").println(Tools.medianHistogram(hist)*mult, 3);
|
jpayne@68
|
559 bsw.print("#Mode\t").println(Tools.calcModeHistogram(hist)*mult, 3);
|
jpayne@68
|
560 bsw.print("#STDev\t").println(Tools.standardDeviationHistogram(hist)*mult, 3);
|
jpayne@68
|
561 bsw.print("#Value\tOccurances\n");
|
jpayne@68
|
562
|
jpayne@68
|
563 for(int i=0; i<hist.length; i++){
|
jpayne@68
|
564 bsw.print(i*mult, 3).tab().println(hist[i]);
|
jpayne@68
|
565 }
|
jpayne@68
|
566 bsw.poisonAndWait();
|
jpayne@68
|
567 }
|
jpayne@68
|
568
|
jpayne@68
|
569 private ConcurrentReadOutputStream makeCros(FileFormat ff){
|
jpayne@68
|
570 if(ff==null){return null;}
|
jpayne@68
|
571
|
jpayne@68
|
572 //Select output buffer size based on whether it needs to be ordered
|
jpayne@68
|
573 final int buff=16;
|
jpayne@68
|
574
|
jpayne@68
|
575 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ff, null, buff, null, ff.samOrBam() && ffin1.samOrBam());
|
jpayne@68
|
576 ros.start(); //Start the stream
|
jpayne@68
|
577 return ros;
|
jpayne@68
|
578 }
|
jpayne@68
|
579
|
jpayne@68
|
580 /** Spawn process threads */
|
jpayne@68
|
581 private void spawnThreads(final ZMWStreamer zstream,
|
jpayne@68
|
582 final ConcurrentReadOutputStream rosg, final ConcurrentReadOutputStream rosa,
|
jpayne@68
|
583 final ConcurrentReadOutputStream rosb, final ConcurrentReadOutputStream rosj){
|
jpayne@68
|
584
|
jpayne@68
|
585 //Do anything necessary prior to processing
|
jpayne@68
|
586
|
jpayne@68
|
587 //Fill a list with ProcessThreads
|
jpayne@68
|
588 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
|
jpayne@68
|
589 for(int i=0; i<threads; i++){
|
jpayne@68
|
590 alpt.add(new ProcessThread(zstream, rosg, rosa, rosb, rosj, i));
|
jpayne@68
|
591 }
|
jpayne@68
|
592
|
jpayne@68
|
593 //Start the threads
|
jpayne@68
|
594 for(ProcessThread pt : alpt){
|
jpayne@68
|
595 pt.start();
|
jpayne@68
|
596 }
|
jpayne@68
|
597
|
jpayne@68
|
598 zstream.runStreamer(false);
|
jpayne@68
|
599
|
jpayne@68
|
600 //Wait for threads to finish
|
jpayne@68
|
601 waitForThreads(alpt);
|
jpayne@68
|
602
|
jpayne@68
|
603 //Do anything necessary after processing
|
jpayne@68
|
604 ZMWs=zstream.ZMWs;
|
jpayne@68
|
605 }
|
jpayne@68
|
606
|
jpayne@68
|
607 private void waitForThreads(ArrayList<ProcessThread> alpt){
|
jpayne@68
|
608
|
jpayne@68
|
609 //Wait for completion of all threads
|
jpayne@68
|
610 boolean success=true;
|
jpayne@68
|
611 for(ProcessThread pt : alpt){
|
jpayne@68
|
612
|
jpayne@68
|
613 //Wait until this thread has terminated
|
jpayne@68
|
614 while(pt.getState()!=Thread.State.TERMINATED){
|
jpayne@68
|
615 try {
|
jpayne@68
|
616 //Attempt a join operation
|
jpayne@68
|
617 pt.join();
|
jpayne@68
|
618 } catch (InterruptedException e) {
|
jpayne@68
|
619 //Potentially handle this, if it is expected to occur
|
jpayne@68
|
620 e.printStackTrace();
|
jpayne@68
|
621 }
|
jpayne@68
|
622 }
|
jpayne@68
|
623
|
jpayne@68
|
624 //Accumulate per-thread statistics
|
jpayne@68
|
625 readsProcessed+=pt.readsProcessedT;
|
jpayne@68
|
626 basesProcessed+=pt.basesProcessedT;
|
jpayne@68
|
627
|
jpayne@68
|
628 truePositiveReads+=pt.truePositiveReadsT;
|
jpayne@68
|
629 trueNegativeReads+=pt.trueNegativeReadsT;
|
jpayne@68
|
630 falsePositiveReads+=pt.falsePositiveReadsT;
|
jpayne@68
|
631 falseNegativeReads+=pt.falseNegativeReadsT;
|
jpayne@68
|
632 ambiguousReads+=pt.ambiguousReadsT;
|
jpayne@68
|
633
|
jpayne@68
|
634 truePositiveZMWs+=pt.truePositiveZMWsT;
|
jpayne@68
|
635 trueNegativeZMWs+=pt.trueNegativeZMWsT;
|
jpayne@68
|
636 falsePositiveZMWs+=pt.falsePositiveZMWsT;
|
jpayne@68
|
637 falseNegativeZMWs+=pt.falseNegativeZMWsT;
|
jpayne@68
|
638 ambiguousZMWs+=pt.ambiguousZMWsT;
|
jpayne@68
|
639
|
jpayne@68
|
640 readsOut+=pt.readsOutT;
|
jpayne@68
|
641 basesOut+=pt.basesOutT;
|
jpayne@68
|
642 junctionsOut+=pt.junctionsOutT;
|
jpayne@68
|
643
|
jpayne@68
|
644 alignmentIters+=pt.ica.iters();
|
jpayne@68
|
645 alignmentItersShort+=pt.ica.itersShort();
|
jpayne@68
|
646 elapsed+=pt.elapsedT;
|
jpayne@68
|
647 elapsedShort+=pt.elapsedShortT;
|
jpayne@68
|
648
|
jpayne@68
|
649 iceCreamReads+=pt.iceCreamReadsT;
|
jpayne@68
|
650 iceCreamBases+=pt.iceCreamBasesT;
|
jpayne@68
|
651 iceCreamZMWs+=pt.iceCreamZMWsT;
|
jpayne@68
|
652 iceCreamRatio+=pt.iceCreamRatioT;
|
jpayne@68
|
653 ratiosCounted+=pt.ratiosCountedT;
|
jpayne@68
|
654 missingAdapterZMWs+=pt.missingAdapterZMWsT;
|
jpayne@68
|
655 hiddenAdapterZMWs+=pt.hiddenAdapterZMWsT;
|
jpayne@68
|
656 lowEntropyZMWs+=pt.lowEntropyZMWsT;
|
jpayne@68
|
657 lowEntropyReads+=pt.lowEntropyReadsT;
|
jpayne@68
|
658
|
jpayne@68
|
659 basesTrimmed+=pt.basesTrimmedT;
|
jpayne@68
|
660 readsTrimmed+=pt.readsTrimmedT;
|
jpayne@68
|
661
|
jpayne@68
|
662 for(int i=0; i<adapterScores.length; i++){
|
jpayne@68
|
663 adapterScores[i]+=pt.adapterScoresT[i];
|
jpayne@68
|
664 repeatScores[i]+=pt.repeatScoresT[i];
|
jpayne@68
|
665 }
|
jpayne@68
|
666
|
jpayne@68
|
667 success&=pt.success;
|
jpayne@68
|
668 }
|
jpayne@68
|
669
|
jpayne@68
|
670 //Track whether any threads failed
|
jpayne@68
|
671 if(!success){errorState=true;}
|
jpayne@68
|
672 }
|
jpayne@68
|
673
|
jpayne@68
|
674 /*--------------------------------------------------------------*/
|
jpayne@68
|
675 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
676 /*--------------------------------------------------------------*/
|
jpayne@68
|
677
|
jpayne@68
|
678 /*--------------------------------------------------------------*/
|
jpayne@68
|
679 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
680 /*--------------------------------------------------------------*/
|
jpayne@68
|
681
|
jpayne@68
|
682 /** Processes reads. */
|
jpayne@68
|
683 private class ProcessThread extends Thread {
|
jpayne@68
|
684
|
jpayne@68
|
685 //Constructor
|
jpayne@68
|
686 ProcessThread(final ZMWStreamer zstream_,
|
jpayne@68
|
687 final ConcurrentReadOutputStream ros_, final ConcurrentReadOutputStream rosa_,
|
jpayne@68
|
688 final ConcurrentReadOutputStream rosb_, final ConcurrentReadOutputStream rosj_, final int tid_){
|
jpayne@68
|
689 zstream=zstream_;
|
jpayne@68
|
690 rosg=ros_;
|
jpayne@68
|
691 rosa=rosa_;
|
jpayne@68
|
692 rosb=rosb_;
|
jpayne@68
|
693 rosj=rosj_;
|
jpayne@68
|
694 tid=tid_;
|
jpayne@68
|
695
|
jpayne@68
|
696 Arrays.fill(tipBufferLeft, (byte)'N');
|
jpayne@68
|
697 Arrays.fill(tipBufferRight, (byte)'N');
|
jpayne@68
|
698
|
jpayne@68
|
699 if(entropyCutoff>=0){
|
jpayne@68
|
700 eTracker=new EntropyTracker(false, entropyCutoff, true);
|
jpayne@68
|
701 }else{
|
jpayne@68
|
702 eTracker=null;
|
jpayne@68
|
703 }
|
jpayne@68
|
704 }
|
jpayne@68
|
705
|
jpayne@68
|
706 //Called by start()
|
jpayne@68
|
707 @Override
|
jpayne@68
|
708 public void run(){
|
jpayne@68
|
709 //Do anything necessary prior to processing
|
jpayne@68
|
710
|
jpayne@68
|
711 //Process the reads
|
jpayne@68
|
712 processInner();
|
jpayne@68
|
713
|
jpayne@68
|
714 //Do anything necessary after processing
|
jpayne@68
|
715
|
jpayne@68
|
716 //Indicate successful exit status
|
jpayne@68
|
717 success=true;
|
jpayne@68
|
718 }
|
jpayne@68
|
719
|
jpayne@68
|
720 /** Iterate through the reads */
|
jpayne@68
|
721 void processInner(){
|
jpayne@68
|
722
|
jpayne@68
|
723 //As long as there is a nonempty read list...
|
jpayne@68
|
724 for(ZMW reads=zstream.nextZMW(); reads!=null; reads=zstream.nextZMW()){
|
jpayne@68
|
725 // if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} //Disabled due to non-static access
|
jpayne@68
|
726
|
jpayne@68
|
727 processList(reads);
|
jpayne@68
|
728 }
|
jpayne@68
|
729 }
|
jpayne@68
|
730
|
jpayne@68
|
731 int flagLowEntropyReads(final ZMW reads, final float minEnt,
|
jpayne@68
|
732 final int minLen0, final float minFract){
|
jpayne@68
|
733 int found=0;
|
jpayne@68
|
734 for(Read r : reads){
|
jpayne@68
|
735 if(!r.discarded()){
|
jpayne@68
|
736 int minLen=Tools.min((int)(r.length()*minFract), minLen0);
|
jpayne@68
|
737 int maxBlock=eTracker.longestLowEntropyBlock(r.bases, false, maxMonomerFraction);
|
jpayne@68
|
738 if(maxBlock>=minLen){
|
jpayne@68
|
739 r.setDiscarded(true);
|
jpayne@68
|
740 r.setJunk(true);
|
jpayne@68
|
741 found++;
|
jpayne@68
|
742 // System.err.println(r.toFasta());
|
jpayne@68
|
743 }
|
jpayne@68
|
744 }
|
jpayne@68
|
745 }
|
jpayne@68
|
746 return found;
|
jpayne@68
|
747 }
|
jpayne@68
|
748
|
jpayne@68
|
749 int flagLongReads(final ZMW reads, int median){
|
jpayne@68
|
750 int found=0;
|
jpayne@68
|
751 for(Read r : reads){
|
jpayne@68
|
752 if(r.length()>longReadMult*median){
|
jpayne@68
|
753 r.setDiscarded(true);
|
jpayne@68
|
754 r.setHasAdapter(true);
|
jpayne@68
|
755 found++;
|
jpayne@68
|
756 }
|
jpayne@68
|
757 }
|
jpayne@68
|
758 return found;
|
jpayne@68
|
759 }
|
jpayne@68
|
760
|
jpayne@68
|
761 /** Each list is presumed to be all reads from a ZMW, in order */
|
jpayne@68
|
762 void processList(final ZMW reads){
|
jpayne@68
|
763 long numBases=0;
|
jpayne@68
|
764
|
jpayne@68
|
765 //Loop through each read in the list for statistics
|
jpayne@68
|
766 for(int idx=0; idx<reads.size(); idx++){
|
jpayne@68
|
767 final Read r1=reads.get(idx);
|
jpayne@68
|
768
|
jpayne@68
|
769 //Validate reads in worker threads
|
jpayne@68
|
770 if(!r1.validated()){r1.validate(true);}
|
jpayne@68
|
771
|
jpayne@68
|
772 //Track the initial length for statistics
|
jpayne@68
|
773 final int initialLength1=r1.length();
|
jpayne@68
|
774
|
jpayne@68
|
775 //Increment counters
|
jpayne@68
|
776 readsProcessedT++;
|
jpayne@68
|
777 basesProcessedT+=initialLength1;
|
jpayne@68
|
778 numBases+=initialLength1;
|
jpayne@68
|
779 }
|
jpayne@68
|
780 final boolean fullPass=CCS || reads.size()>=3;
|
jpayne@68
|
781
|
jpayne@68
|
782 if(trimReads || trimPolyA){
|
jpayne@68
|
783 int removed=0;
|
jpayne@68
|
784 for(int i=0; i<reads.size(); i++){
|
jpayne@68
|
785 Read r=reads.get(i);
|
jpayne@68
|
786 byte a=r.bases[0], b=r.bases[r.length()-1];
|
jpayne@68
|
787 int trimmed=0;
|
jpayne@68
|
788 if(!AminoAcid.isFullyDefined(a) || !AminoAcid.isFullyDefined(b)){
|
jpayne@68
|
789 trimmed+=trimRead(r, 80);
|
jpayne@68
|
790 }
|
jpayne@68
|
791
|
jpayne@68
|
792 if(trimReads && adapter!=null){
|
jpayne@68
|
793 int leftAdapterBases=alignLeftTipAdapter(r);
|
jpayne@68
|
794 int rightAdapterBases=alignRightTipAdapter(r);
|
jpayne@68
|
795 if(leftAdapterBases+rightAdapterBases>0){
|
jpayne@68
|
796 trimmed+=trimRead(r, adapterTipLen);
|
jpayne@68
|
797 r.setTrimmed(true);
|
jpayne@68
|
798 }
|
jpayne@68
|
799 }
|
jpayne@68
|
800 if(trimPolyA){
|
jpayne@68
|
801 int x=trimPolyA(r);
|
jpayne@68
|
802 trimmed+=x;
|
jpayne@68
|
803 if(x>0){r.setTrimmed(true);}
|
jpayne@68
|
804 }
|
jpayne@68
|
805
|
jpayne@68
|
806 if(trimmed>0){
|
jpayne@68
|
807 basesTrimmedT+=trimmed;
|
jpayne@68
|
808 readsTrimmedT++;
|
jpayne@68
|
809 }
|
jpayne@68
|
810
|
jpayne@68
|
811 //TODO: Note again, removing intermediate reads messes up forward-rev ordering
|
jpayne@68
|
812 if(r.length()<minLengthAfterTrimming){//Discard short trash
|
jpayne@68
|
813 reads.set(i, null);
|
jpayne@68
|
814 removed++;
|
jpayne@68
|
815 }
|
jpayne@68
|
816 }
|
jpayne@68
|
817 if(removed>0){
|
jpayne@68
|
818 Tools.condenseStrict(reads);
|
jpayne@68
|
819 }
|
jpayne@68
|
820 }
|
jpayne@68
|
821
|
jpayne@68
|
822 if(entropyCutoff>0){
|
jpayne@68
|
823 int bad=flagLowEntropyReads(reads, entropyCutoff, entropyLength, entropyFraction);
|
jpayne@68
|
824 if(bad>0){
|
jpayne@68
|
825 lowEntropyZMWsT++;
|
jpayne@68
|
826 lowEntropyReadsT+=bad;
|
jpayne@68
|
827 if(bad>=reads.size()){
|
jpayne@68
|
828 if(!reads.isEmpty()){outputReads(reads, null);}
|
jpayne@68
|
829 return; //No point in continuing...
|
jpayne@68
|
830 }
|
jpayne@68
|
831 }
|
jpayne@68
|
832 }
|
jpayne@68
|
833
|
jpayne@68
|
834 if(reads.isEmpty()){return;}
|
jpayne@68
|
835
|
jpayne@68
|
836 Read sample=null;//Read that will be searched for inverted repeat, typically median length
|
jpayne@68
|
837 Read shortest=null;//shortest read in the middle, or overall if no middle reads
|
jpayne@68
|
838 final int medianLength=reads.medianLength(true);
|
jpayne@68
|
839 boolean foundInvertedRepeat=false;
|
jpayne@68
|
840 int longReads=0;
|
jpayne@68
|
841 int adapterReads=0;
|
jpayne@68
|
842 int maxAdapters=0;
|
jpayne@68
|
843 int sampleNum=0;
|
jpayne@68
|
844
|
jpayne@68
|
845 if(reads.size()>=3){
|
jpayne@68
|
846 if(flagLongReads){longReads=flagLongReads(reads, medianLength);}
|
jpayne@68
|
847 for(int i=1; i<reads.size()-1; i++){
|
jpayne@68
|
848 Read r=reads.get(i);
|
jpayne@68
|
849 if(sample==null && r.length()==medianLength){
|
jpayne@68
|
850 sample=r;
|
jpayne@68
|
851 sampleNum=i;
|
jpayne@68
|
852 }
|
jpayne@68
|
853 if(shortest==null || r.length()<shortest.length()){shortest=r;}
|
jpayne@68
|
854 }
|
jpayne@68
|
855 }else{
|
jpayne@68
|
856 for(int i=0; i<reads.size(); i++){
|
jpayne@68
|
857 Read r=reads.get(i);
|
jpayne@68
|
858 if(sample==null || sample.length()<r.length()){
|
jpayne@68
|
859 sample=r;
|
jpayne@68
|
860 sampleNum=i;
|
jpayne@68
|
861 }
|
jpayne@68
|
862 if(shortest==null || r.length()<shortest.length()){shortest=r;}
|
jpayne@68
|
863 }
|
jpayne@68
|
864 }
|
jpayne@68
|
865 assert(sample!=null);
|
jpayne@68
|
866 final AlignmentResult ar=align(sample, fullPass, reads.size(), sampleNum);
|
jpayne@68
|
867
|
jpayne@68
|
868 if(ar!=null){
|
jpayne@68
|
869 foundInvertedRepeat=true;
|
jpayne@68
|
870 sample.setInvertedRepeat(true);
|
jpayne@68
|
871 if(ar.icecream || !filterIceCreamOnly){
|
jpayne@68
|
872 sample.setDiscarded(true);
|
jpayne@68
|
873 }else if(ar.ambiguous){
|
jpayne@68
|
874 sample.setAmbiguous(true);
|
jpayne@68
|
875 }
|
jpayne@68
|
876 }
|
jpayne@68
|
877
|
jpayne@68
|
878 if(alignAdapter){
|
jpayne@68
|
879 double mult=foundInvertedRepeat ? 0.9 : 1.0;
|
jpayne@68
|
880 if(needsAdapterTest(sample)){
|
jpayne@68
|
881 int x=lookForAdapter(sample, mult);
|
jpayne@68
|
882 adapterReads+=(x>0 ? 1 : 0);
|
jpayne@68
|
883 maxAdapters=Tools.max(x, maxAdapters);
|
jpayne@68
|
884 }
|
jpayne@68
|
885
|
jpayne@68
|
886 if(reads.size()>2){
|
jpayne@68
|
887 Read a=reads.get(0), b=reads.get(reads.size()-1);
|
jpayne@68
|
888 Read r=a.length()>b.length() ? a : b;
|
jpayne@68
|
889 if(needsAdapterTest(r)){
|
jpayne@68
|
890 int x=lookForAdapter(r, mult);
|
jpayne@68
|
891 adapterReads+=(x>0 ? 1 : 0);
|
jpayne@68
|
892 maxAdapters=Tools.max(x, maxAdapters);
|
jpayne@68
|
893 }
|
jpayne@68
|
894 }
|
jpayne@68
|
895
|
jpayne@68
|
896 for(Read r : reads){
|
jpayne@68
|
897 if((r.length()>=shortReadMult*shortest.length() || adapterReads>0 || longReads>0 || foundInvertedRepeat)
|
jpayne@68
|
898 && needsAdapterTest(r)){
|
jpayne@68
|
899 int x=lookForAdapter(r, mult);
|
jpayne@68
|
900 adapterReads+=(x>0 ? 1 : 0);
|
jpayne@68
|
901 maxAdapters=Tools.max(x, maxAdapters);
|
jpayne@68
|
902 }
|
jpayne@68
|
903 }
|
jpayne@68
|
904 }
|
jpayne@68
|
905
|
jpayne@68
|
906 if(ar!=null && ar.icecream){
|
jpayne@68
|
907 iceCreamRatioT+=ar.ratio;
|
jpayne@68
|
908 ratiosCountedT++;
|
jpayne@68
|
909 int idx=(int)(ar.ratio*200);
|
jpayne@68
|
910 repeatScoresT[idx]++;
|
jpayne@68
|
911 if(longReads+adapterReads==0){missingAdapterZMWsT++;}
|
jpayne@68
|
912 }
|
jpayne@68
|
913 if(longReads+adapterReads>0){
|
jpayne@68
|
914 hiddenAdapterZMWsT++;
|
jpayne@68
|
915 }
|
jpayne@68
|
916
|
jpayne@68
|
917 if(keepShortReads && maxAdapters<2){
|
jpayne@68
|
918 if(foundInvertedRepeat && !sample.hasAdapter()){
|
jpayne@68
|
919 if(reads.size()>2){
|
jpayne@68
|
920 for(int i=1; i<reads.size()-1; i++){
|
jpayne@68
|
921 reads.get(i).setDiscarded(true);
|
jpayne@68
|
922 }
|
jpayne@68
|
923 Read r=reads.get(0);
|
jpayne@68
|
924 if(r.length()>=veryShortMult*medianLength){r.setDiscarded(true);}
|
jpayne@68
|
925 r=reads.get(reads.size()-1);
|
jpayne@68
|
926 if(r.length()>=veryShortMult*medianLength){r.setDiscarded(true);}
|
jpayne@68
|
927 }else if(reads.size()==2){
|
jpayne@68
|
928 for(Read r : reads){
|
jpayne@68
|
929 if(r.length()>=veryShortMult*sample.length()){
|
jpayne@68
|
930 if(ar.icecream){
|
jpayne@68
|
931 r.setDiscarded(true);
|
jpayne@68
|
932 }else if(ar.ambiguous){
|
jpayne@68
|
933 r.setAmbiguous(true);
|
jpayne@68
|
934 }
|
jpayne@68
|
935 }
|
jpayne@68
|
936 }
|
jpayne@68
|
937 }
|
jpayne@68
|
938 }
|
jpayne@68
|
939 }else if(sample.discarded() || (longReads+adapterReads>0)){
|
jpayne@68
|
940 for(Read r : reads){
|
jpayne@68
|
941 r.setDiscarded(true);
|
jpayne@68
|
942 }
|
jpayne@68
|
943 }
|
jpayne@68
|
944
|
jpayne@68
|
945 ArrayList<Read> junctions=null;
|
jpayne@68
|
946 if(ar!=null){
|
jpayne@68
|
947 if(rosj!=null && !sample.hasAdapter()){
|
jpayne@68
|
948 Read r=ar.alignedRead;
|
jpayne@68
|
949 int width=Tools.min(200, ar.junctionLoc, r.length()-ar.junctionLoc);
|
jpayne@68
|
950 int a=ar.junctionLoc-width, b=ar.junctionLoc+width;
|
jpayne@68
|
951 byte[] bases=Arrays.copyOfRange(r.bases, a, b);
|
jpayne@68
|
952 Read junction=new Read(bases, null, r.id+"\tjunction:"+a+"-"+b, r.numericID);
|
jpayne@68
|
953 junctions=new ArrayList<Read>(1);
|
jpayne@68
|
954 junctions.add(junction);
|
jpayne@68
|
955 junctionsOutT++;
|
jpayne@68
|
956 }
|
jpayne@68
|
957 if(modifyHeader){
|
jpayne@68
|
958 sample.id=sample.id+"\tratio="+ar.ratio+"\tjunction="+ar.junctionLoc+
|
jpayne@68
|
959 "\tIR="+sample.invertedRepeat()+"\tAD="+sample.hasAdapter()+"\tFP="+fullPass+"\tsubreads="+reads.size();
|
jpayne@68
|
960 }
|
jpayne@68
|
961 }
|
jpayne@68
|
962
|
jpayne@68
|
963 removeShortTrash(reads);
|
jpayne@68
|
964
|
jpayne@68
|
965 if(!reads.isEmpty()){outputReads(reads, junctions);}
|
jpayne@68
|
966 }
|
jpayne@68
|
967
|
jpayne@68
|
968 //TODO: Now that I think about it. The order of the reads is important.
|
jpayne@68
|
969 //Since they go forward-rev-forward-rev it's imprudent to discard inner reads.
|
jpayne@68
|
970 private void removeShortTrash(ZMW reads) {
|
jpayne@68
|
971 int removed=0;
|
jpayne@68
|
972 for(int i=0; i<reads.size(); i++){
|
jpayne@68
|
973 Read r=reads.get(i);
|
jpayne@68
|
974 if(r.length()<minLengthAfterTrimming){//Discard short trash
|
jpayne@68
|
975 reads.set(i, null);
|
jpayne@68
|
976 removed++;
|
jpayne@68
|
977 }
|
jpayne@68
|
978 }
|
jpayne@68
|
979 if(removed>0){Tools.condenseStrict(reads);}
|
jpayne@68
|
980 }
|
jpayne@68
|
981
|
jpayne@68
|
982 int trimRead(Read r, int lookahead){
|
jpayne@68
|
983 final byte[] bases=r.bases;
|
jpayne@68
|
984
|
jpayne@68
|
985 int left=calcLeftTrim(bases, lookahead);
|
jpayne@68
|
986 int right=calcRightTrim(bases, lookahead);
|
jpayne@68
|
987 int trimmed=0;
|
jpayne@68
|
988
|
jpayne@68
|
989 if(left+right>0){
|
jpayne@68
|
990 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
|
jpayne@68
|
991 trimmed=TrimRead.trimByAmount(r, left, right, 1, false);
|
jpayne@68
|
992 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
|
jpayne@68
|
993 ZMW.fixReadHeader(r, left, right);
|
jpayne@68
|
994 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
|
jpayne@68
|
995 }
|
jpayne@68
|
996
|
jpayne@68
|
997 if(r.samline!=null){
|
jpayne@68
|
998 r.samline.seq=r.bases;
|
jpayne@68
|
999 r.samline.qual=r.quality;
|
jpayne@68
|
1000 }
|
jpayne@68
|
1001 return trimmed;
|
jpayne@68
|
1002 }
|
jpayne@68
|
1003
|
jpayne@68
|
1004 int trimPolyA(Read r){
|
jpayne@68
|
1005 final byte[] bases=r.bases;
|
jpayne@68
|
1006
|
jpayne@68
|
1007 int left=Tools.max(PolymerTrimmer.testLeft(bases, 'A'), PolymerTrimmer.testLeft(bases, 'T'));
|
jpayne@68
|
1008 int right=Tools.max(PolymerTrimmer.testRight(bases, 'A'), PolymerTrimmer.testRight(bases, 'T'));
|
jpayne@68
|
1009 int trimmed=0;
|
jpayne@68
|
1010
|
jpayne@68
|
1011 if(left+right>0){
|
jpayne@68
|
1012 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
|
jpayne@68
|
1013 trimmed=TrimRead.trimByAmount(r, left, right, 1, false);
|
jpayne@68
|
1014 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
|
jpayne@68
|
1015 ZMW.fixReadHeader(r, left, right);
|
jpayne@68
|
1016 // System.err.println(r.length()+", "+left+", "+right+", "+r.id);
|
jpayne@68
|
1017 }
|
jpayne@68
|
1018
|
jpayne@68
|
1019 if(r.samline!=null){
|
jpayne@68
|
1020 r.samline.seq=r.bases;
|
jpayne@68
|
1021 r.samline.qual=r.quality;
|
jpayne@68
|
1022 }
|
jpayne@68
|
1023 return trimmed;
|
jpayne@68
|
1024 }
|
jpayne@68
|
1025
|
jpayne@68
|
1026 final int calcLeftTrim(final byte[] bases, int lookahead){
|
jpayne@68
|
1027 final int len=bases.length;
|
jpayne@68
|
1028 int lastUndef=-1;
|
jpayne@68
|
1029 for(int i=0, defined=0; i<len && defined<lookahead; i++){
|
jpayne@68
|
1030 if(AminoAcid.isFullyDefined(bases[i])){
|
jpayne@68
|
1031 defined++;
|
jpayne@68
|
1032 }else{
|
jpayne@68
|
1033 lastUndef=i;
|
jpayne@68
|
1034 defined=0;
|
jpayne@68
|
1035 }
|
jpayne@68
|
1036 }
|
jpayne@68
|
1037 return lastUndef+1;
|
jpayne@68
|
1038 }
|
jpayne@68
|
1039
|
jpayne@68
|
1040 final int calcRightTrim(final byte[] bases, int lookahead){
|
jpayne@68
|
1041 final int len=bases.length;
|
jpayne@68
|
1042 int lastUndef=len;
|
jpayne@68
|
1043 for(int i=len-1, defined=0; i>=0 && defined<lookahead; i--){
|
jpayne@68
|
1044 if(AminoAcid.isFullyDefined(bases[i])){
|
jpayne@68
|
1045 defined++;
|
jpayne@68
|
1046 }else{
|
jpayne@68
|
1047 lastUndef=i;
|
jpayne@68
|
1048 defined=0;
|
jpayne@68
|
1049 }
|
jpayne@68
|
1050 }
|
jpayne@68
|
1051 return len-lastUndef-1;
|
jpayne@68
|
1052 }
|
jpayne@68
|
1053
|
jpayne@68
|
1054 final int alignLeftTipAdapter(Read r){
|
jpayne@68
|
1055 assert(adapter.length<adapterTipLen); //Time to increase adapterTipLen
|
jpayne@68
|
1056 if(r.length()<adapterTipLen){return 0;}
|
jpayne@68
|
1057 final byte[] array=tipBufferLeft;
|
jpayne@68
|
1058
|
jpayne@68
|
1059 for(int i=adapterTipPad, j=0; i<array.length; i++, j++){array[i]=r.bases[j];}
|
jpayne@68
|
1060 int[] rvec=ssa.fillAndScoreLimited(adapter, array, 0, array.length, minSwScore);
|
jpayne@68
|
1061
|
jpayne@68
|
1062 if(rvec==null || rvec[0]<minSwScore){return 0;}
|
jpayne@68
|
1063 final int score=rvec[0];
|
jpayne@68
|
1064 final int start=Tools.max(0, rvec[1]-adapterTipPad);
|
jpayne@68
|
1065 final int stop=rvec[2]-adapterTipPad;
|
jpayne@68
|
1066 for(int i=start; i<=stop; i++){r.bases[i]='X';}
|
jpayne@68
|
1067 return stop-start+1;
|
jpayne@68
|
1068 }
|
jpayne@68
|
1069
|
jpayne@68
|
1070 final int alignRightTipAdapter(Read r){
|
jpayne@68
|
1071 final byte[] bases=r.bases;
|
jpayne@68
|
1072 assert(adapter.length<adapterTipLen); //Time to increase adapterTipLen
|
jpayne@68
|
1073 if(r.length()<adapterTipLen){return 0;}
|
jpayne@68
|
1074 final byte[] array=tipBufferRight;
|
jpayne@68
|
1075
|
jpayne@68
|
1076 for(int i=0, j=bases.length-adapterTipLen; i<adapterTipLen; i++, j++){array[i]=bases[j];}
|
jpayne@68
|
1077 int[] rvec=ssa.fillAndScoreLimited(adapter, array, 0, array.length, minSwScore);
|
jpayne@68
|
1078
|
jpayne@68
|
1079 if(rvec==null || rvec[0]<minSwScore){return 0;}
|
jpayne@68
|
1080 final int score=rvec[0];
|
jpayne@68
|
1081 final int start=Tools.max(0, rvec[1]-adapterTipPad);
|
jpayne@68
|
1082 final int stop=rvec[2]-adapterTipPad;
|
jpayne@68
|
1083 for(int i=start; i<=stop; i++){r.bases[i]='X';}
|
jpayne@68
|
1084 return stop-start+1;
|
jpayne@68
|
1085 }
|
jpayne@68
|
1086
|
jpayne@68
|
1087 boolean needsAdapterTest(Read r){
|
jpayne@68
|
1088 if(r.tested() || r.hasAdapter()){return false;}
|
jpayne@68
|
1089 if(adapterRatio<=0 || r.discarded()){return true;}
|
jpayne@68
|
1090 AlignmentResult aa=fla.alignForwardShort(adapter, r.bases, 0, r.bases.length-1, adapterRatio);
|
jpayne@68
|
1091 return aa!=null;
|
jpayne@68
|
1092 }
|
jpayne@68
|
1093
|
jpayne@68
|
1094 private void outputReads(ZMW reads, ArrayList<Read> junctions){
|
jpayne@68
|
1095 final int size=reads.size();
|
jpayne@68
|
1096 final ArrayList<Read> good=(rosg==null ? null : new ArrayList<Read>(size));
|
jpayne@68
|
1097 final ArrayList<Read> ambig=(rosa==null ? null : new ArrayList<Read>(size));
|
jpayne@68
|
1098 final ArrayList<Read> bad=(rosb==null ? null : new ArrayList<Read>(size));
|
jpayne@68
|
1099
|
jpayne@68
|
1100 int discardedReads=0;
|
jpayne@68
|
1101 int ambigReads=0;
|
jpayne@68
|
1102 int trimmedReads=0;
|
jpayne@68
|
1103 boolean sendAllToDiscarded=false;
|
jpayne@68
|
1104 boolean sendAllToAmbiguous=false;
|
jpayne@68
|
1105
|
jpayne@68
|
1106 //Check to see if any reads are discarded or ambiguous
|
jpayne@68
|
1107 for(Read r : reads){
|
jpayne@68
|
1108 if(r.discarded()){
|
jpayne@68
|
1109 discardedReads++;
|
jpayne@68
|
1110 }else if(r.ambiguous()){
|
jpayne@68
|
1111 ambigReads++;
|
jpayne@68
|
1112 }else if(r.trimmed()){
|
jpayne@68
|
1113 trimmedReads++;
|
jpayne@68
|
1114 }
|
jpayne@68
|
1115 }
|
jpayne@68
|
1116
|
jpayne@68
|
1117 //Unify flags on all reads
|
jpayne@68
|
1118 if(keepZMWsTogether){
|
jpayne@68
|
1119 if(discardedReads>0){sendAllToDiscarded=true;}
|
jpayne@68
|
1120 else if(ambigReads>0){sendAllToAmbiguous=true;}
|
jpayne@68
|
1121 }
|
jpayne@68
|
1122 // if(discardedReads>0 || ambigReads>0){System.err.println("\nd="+discardedReads+", a="+ambigReads);}
|
jpayne@68
|
1123 if(discardedReads>0){iceCreamZMWsT++;}
|
jpayne@68
|
1124
|
jpayne@68
|
1125 int trueIceCreamReads=0;
|
jpayne@68
|
1126 for(Read r : reads){
|
jpayne@68
|
1127 boolean trueIceCream=(parseCustom ? ReadBuilder.isIceCream(r.id) : false);
|
jpayne@68
|
1128 trueIceCreamReads+=(trueIceCream ? 1 : 0);
|
jpayne@68
|
1129 if(r.discarded() || sendAllToDiscarded){
|
jpayne@68
|
1130 // assert(false);
|
jpayne@68
|
1131 if(bad!=null){bad.add(r);}
|
jpayne@68
|
1132 if(trueIceCream){truePositiveReadsT++;}
|
jpayne@68
|
1133 else{falsePositiveReadsT++;}
|
jpayne@68
|
1134 // System.err.println("d:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());
|
jpayne@68
|
1135 }else if(r.ambiguous() || sendAllToAmbiguous){
|
jpayne@68
|
1136 if(ambig!=null){ambig.add(r);}
|
jpayne@68
|
1137 if(sendAmbigToGood){
|
jpayne@68
|
1138 readsOutT++;
|
jpayne@68
|
1139 basesOutT+=r.length();
|
jpayne@68
|
1140 if(good!=null) {good.add(r);}
|
jpayne@68
|
1141 }
|
jpayne@68
|
1142 if(sendAmbigToBad && bad!=null){bad.add(r);}
|
jpayne@68
|
1143 ambiguousReadsT++;
|
jpayne@68
|
1144 // System.err.println("a*:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());
|
jpayne@68
|
1145 }else{
|
jpayne@68
|
1146 if(good!=null){
|
jpayne@68
|
1147 good.add(r);
|
jpayne@68
|
1148 }
|
jpayne@68
|
1149 readsOutT++;
|
jpayne@68
|
1150 basesOutT+=r.length();
|
jpayne@68
|
1151 if(trueIceCream && !r.trimmed()){falseNegativeReadsT++;}
|
jpayne@68
|
1152 else{trueNegativeReadsT++;}
|
jpayne@68
|
1153 // if(discardedReads>0 || ambigReads>0){System.err.println("g*:t="+r.tested()+",ad="+r.hasAdapter()+",ir="+r.invertedRepeat()+","+r.id+", reads="+reads.size());}
|
jpayne@68
|
1154 }
|
jpayne@68
|
1155 }
|
jpayne@68
|
1156
|
jpayne@68
|
1157 if(trueIceCreamReads>0){
|
jpayne@68
|
1158 if(discardedReads>0 || trimmedReads>0){
|
jpayne@68
|
1159 truePositiveZMWsT++;
|
jpayne@68
|
1160 }else if(ambigReads>0){
|
jpayne@68
|
1161 ambiguousZMWs++;
|
jpayne@68
|
1162 }else{
|
jpayne@68
|
1163 falseNegativeZMWsT++;
|
jpayne@68
|
1164 // StringBuilder sb=new StringBuilder();
|
jpayne@68
|
1165 // for(Read r : reads) {sb.append("\n").append(r.id);}
|
jpayne@68
|
1166 // System.err.println(sb);
|
jpayne@68
|
1167 }
|
jpayne@68
|
1168 }else{
|
jpayne@68
|
1169 if(discardedReads>0){
|
jpayne@68
|
1170 falsePositiveZMWsT++;
|
jpayne@68
|
1171 }else if(ambigReads>0){
|
jpayne@68
|
1172 ambiguousZMWs++;
|
jpayne@68
|
1173 }else{
|
jpayne@68
|
1174 trueNegativeZMWsT++;
|
jpayne@68
|
1175 }
|
jpayne@68
|
1176 }
|
jpayne@68
|
1177
|
jpayne@68
|
1178 if(rosg!=null && good!=null && !good.isEmpty()){rosg.add(good, 0);}
|
jpayne@68
|
1179 if(rosa!=null && ambig!=null && !ambig.isEmpty()){rosa.add(ambig, 0);}
|
jpayne@68
|
1180 if(rosb!=null && bad!=null && !bad.isEmpty()){rosb.add(bad, 0);}
|
jpayne@68
|
1181 if(rosj!=null && junctions!=null && !junctions.isEmpty()){rosj.add(junctions, 0);}
|
jpayne@68
|
1182 }
|
jpayne@68
|
1183
|
jpayne@68
|
1184 /**
|
jpayne@68
|
1185 * Align part of a read to itself to look for inverted repeats.
|
jpayne@68
|
1186 */
|
jpayne@68
|
1187 AlignmentResult align(final Read r, boolean fullPass, int passes, int readNum){
|
jpayne@68
|
1188 if(!alignRC){return null;}
|
jpayne@68
|
1189 final byte[] bases=r.bases;
|
jpayne@68
|
1190
|
jpayne@68
|
1191 int qlen=(int)Tools.max(minQlen, Tools.min(targetQlen, bases.length*maxQlenFraction));
|
jpayne@68
|
1192 if(qlen>0.45f*bases.length){return null;}//Ignore short stuff
|
jpayne@68
|
1193
|
jpayne@68
|
1194 //Perform an initial scan using the tips of the reads to look for an inverted repeat
|
jpayne@68
|
1195 boolean tipOnly=filterIceCreamOnly && fullPass;
|
jpayne@68
|
1196 // System.err.println(filterIceCreamOnly+", "+fullPass+", "+tipOnly);
|
jpayne@68
|
1197 AlignmentResult a=alignLeft(bases, qlen, minRatio1, true, tipOnly);
|
jpayne@68
|
1198 AlignmentResult b=(fullPass && false ? null : alignRight(bases, qlen, minRatio1, true, tipOnly));//A second alignment is not needed for a full pass.
|
jpayne@68
|
1199 AlignmentResult ar=(a==null ? b : b==null ? a : a.maxScore>=b.maxScore ? a : b);
|
jpayne@68
|
1200
|
jpayne@68
|
1201 //If nothing was detected, return
|
jpayne@68
|
1202 if(ar==null){return null;}
|
jpayne@68
|
1203 ar.alignedRead=r;
|
jpayne@68
|
1204
|
jpayne@68
|
1205 //At this point, the read contains an inverted repeat of length qlen.
|
jpayne@68
|
1206 final int expectedJunction=ar.rLen/2;
|
jpayne@68
|
1207
|
jpayne@68
|
1208 if(ar.left){
|
jpayne@68
|
1209 ar.junctionLoc=ar.maxRpos/2;
|
jpayne@68
|
1210 }else{
|
jpayne@68
|
1211 int innerLeft=ar.maxRpos;
|
jpayne@68
|
1212 int innerRight=ar.rLen-ar.qLen;
|
jpayne@68
|
1213 ar.junctionLoc=(innerLeft+innerRight)/2;
|
jpayne@68
|
1214 }
|
jpayne@68
|
1215
|
jpayne@68
|
1216 // if(fullPass){//This code doesn't seem to have any effect and it's not clear why it is present
|
jpayne@68
|
1217 // int dif=Tools.absdif(expectedJunction, ar.junctionLoc);
|
jpayne@68
|
1218 // if(dif>expectedJunction*0.1) {
|
jpayne@68
|
1219 // if(filterIceCreamOnly){return ar;}
|
jpayne@68
|
1220 // }
|
jpayne@68
|
1221 // }
|
jpayne@68
|
1222
|
jpayne@68
|
1223 if(realign){
|
jpayne@68
|
1224 if(ar.junctionLoc<expectedJunction){
|
jpayne@68
|
1225 int qlen2=(int)(ar.junctionLoc*0.9);
|
jpayne@68
|
1226 if(qlen2>=qlen){
|
jpayne@68
|
1227 ar=alignLeft(bases, qlen2, minRatio2, false, false);
|
jpayne@68
|
1228 }
|
jpayne@68
|
1229 }else{
|
jpayne@68
|
1230 int qlen2=(int)((ar.rLen-ar.junctionLoc)*0.9);
|
jpayne@68
|
1231 if(qlen2>=qlen){
|
jpayne@68
|
1232 ar=alignRight(bases, qlen2, minRatio2, false, false);
|
jpayne@68
|
1233 }
|
jpayne@68
|
1234 }
|
jpayne@68
|
1235 if(ar==null){return null;}
|
jpayne@68
|
1236 ar.alignedRead=r;
|
jpayne@68
|
1237 }
|
jpayne@68
|
1238
|
jpayne@68
|
1239 //At this point, the read contains an inverted repeat mirrored across a junction.
|
jpayne@68
|
1240 final float junctionFraction;
|
jpayne@68
|
1241 if(ar.left){
|
jpayne@68
|
1242 ar.junctionLoc=ar.maxRpos/2;
|
jpayne@68
|
1243 junctionFraction=ar.junctionLoc/(float)ar.rLen;
|
jpayne@68
|
1244 }else{
|
jpayne@68
|
1245 int innerLeft=ar.maxRpos;
|
jpayne@68
|
1246 int innerRight=ar.rLen-ar.qLen;
|
jpayne@68
|
1247 ar.junctionLoc=(innerLeft+innerRight)/2;
|
jpayne@68
|
1248 junctionFraction=(ar.rLen-ar.junctionLoc)/(float)ar.rLen;
|
jpayne@68
|
1249 }
|
jpayne@68
|
1250
|
jpayne@68
|
1251 final int dif=Tools.absdif(expectedJunction, ar.junctionLoc);
|
jpayne@68
|
1252 if(fullPass){
|
jpayne@68
|
1253 if(junctionFraction<minJunctionFraction){
|
jpayne@68
|
1254 ar.icecream=false;
|
jpayne@68
|
1255 }else{
|
jpayne@68
|
1256 ar.icecream=true;
|
jpayne@68
|
1257 }
|
jpayne@68
|
1258 }else if(passes==2){
|
jpayne@68
|
1259 if(junctionFraction<minJunctionFraction){
|
jpayne@68
|
1260 if(readNum==0){
|
jpayne@68
|
1261 //First read, the junction should be closer to the beginning for real ice cream
|
jpayne@68
|
1262 if(ar.junctionLoc>expectedJunction){
|
jpayne@68
|
1263 ar.icecream=false;
|
jpayne@68
|
1264 }else{
|
jpayne@68
|
1265 ar.ambiguous=true;
|
jpayne@68
|
1266 }
|
jpayne@68
|
1267 }else{
|
jpayne@68
|
1268 //Last read, the junction should be closer to the end for real ice cream
|
jpayne@68
|
1269 assert(readNum==1) : readNum;
|
jpayne@68
|
1270 if(ar.junctionLoc<expectedJunction){
|
jpayne@68
|
1271 ar.icecream=false;
|
jpayne@68
|
1272 }else{
|
jpayne@68
|
1273 ar.ambiguous=true;
|
jpayne@68
|
1274 }
|
jpayne@68
|
1275 }
|
jpayne@68
|
1276 return ar;
|
jpayne@68
|
1277 }else{
|
jpayne@68
|
1278 ar.icecream=true;
|
jpayne@68
|
1279 }
|
jpayne@68
|
1280 }else{
|
jpayne@68
|
1281 if(junctionFraction<minJunctionFraction){
|
jpayne@68
|
1282 ar.icecream=true;
|
jpayne@68
|
1283 }else{
|
jpayne@68
|
1284 ar.ambiguous=true;
|
jpayne@68
|
1285 }
|
jpayne@68
|
1286 }
|
jpayne@68
|
1287 return ar;
|
jpayne@68
|
1288 }
|
jpayne@68
|
1289
|
jpayne@68
|
1290 /** Align the left qlen bases to the rest of the read. */
|
jpayne@68
|
1291 private AlignmentResult alignLeft(final byte[] bases, final int qlen, final float minRatio, boolean v2, boolean tipOnly){
|
jpayne@68
|
1292 final byte[] query=Arrays.copyOfRange(bases, 0, qlen);
|
jpayne@68
|
1293 AminoAcid.reverseComplementBasesInPlace(query);
|
jpayne@68
|
1294 final AlignmentResult ar;
|
jpayne@68
|
1295 final int rstop=bases.length-1;
|
jpayne@68
|
1296 final int rstart=(tipOnly ? Tools.max(qlen, rstop-(int)(tipRatio*qlen)) : qlen);
|
jpayne@68
|
1297 if(v2){
|
jpayne@68
|
1298 // elapsedShortT-=System.nanoTime();
|
jpayne@68
|
1299 ar=ica.alignForwardShort(query, bases, rstart, rstop, minScore, minRatio);
|
jpayne@68
|
1300 // elapsedShortT+=System.nanoTime();
|
jpayne@68
|
1301 }else{
|
jpayne@68
|
1302 // elapsedT-=System.nanoTime();
|
jpayne@68
|
1303 ar=ica.alignForward(query, bases, rstart, rstop, minScore, minRatio);
|
jpayne@68
|
1304 // elapsedT+=System.nanoTime();
|
jpayne@68
|
1305 }
|
jpayne@68
|
1306 if(ar!=null){ar.left=true;}
|
jpayne@68
|
1307 return ar;
|
jpayne@68
|
1308 }
|
jpayne@68
|
1309
|
jpayne@68
|
1310 /** Align the right qlen bases to the rest of the read. */
|
jpayne@68
|
1311 private AlignmentResult alignRight(final byte[] bases, final int qlen, final float minRatio, boolean v2, boolean tipOnly){
|
jpayne@68
|
1312 final byte[] query=Arrays.copyOfRange(bases, bases.length-qlen, bases.length);
|
jpayne@68
|
1313 AminoAcid.reverseComplementBasesInPlace(query);
|
jpayne@68
|
1314 final AlignmentResult ar;
|
jpayne@68
|
1315 final int rstop=(tipOnly ? Tools.min((int)(tipRatio*qlen), bases.length-qlen-1) : bases.length-qlen-1);
|
jpayne@68
|
1316 final int rstart=0;
|
jpayne@68
|
1317 if(v2){
|
jpayne@68
|
1318 // elapsedShortT-=System.nanoTime();
|
jpayne@68
|
1319 ar=ica.alignForwardShort(query, bases, rstart, rstop, minScore, minRatio);
|
jpayne@68
|
1320 // elapsedShortT+=System.nanoTime();
|
jpayne@68
|
1321 }else{
|
jpayne@68
|
1322 // elapsedT-=System.nanoTime();
|
jpayne@68
|
1323 ar=ica.alignForward(query, bases, rstart, rstop, minScore, minRatio);
|
jpayne@68
|
1324 // elapsedT+=System.nanoTime();
|
jpayne@68
|
1325 }
|
jpayne@68
|
1326 if(ar!=null){ar.left=false;}
|
jpayne@68
|
1327 return ar;
|
jpayne@68
|
1328 }
|
jpayne@68
|
1329
|
jpayne@68
|
1330 /** Align the adapter sequence to the read, piecewise, to count matches. */
|
jpayne@68
|
1331 private int lookForAdapter(Read r, double mult) {
|
jpayne@68
|
1332 assert(!r.hasAdapter() && !r.tested());
|
jpayne@68
|
1333 int begin=0;
|
jpayne@68
|
1334 while(begin<r.length() && r.bases[begin]=='N'){begin++;} //Skip reads made of 'N'
|
jpayne@68
|
1335 if(begin>=r.length()){return 0;}
|
jpayne@68
|
1336
|
jpayne@68
|
1337 int suspectThresh=(int)(minSwScoreSuspect*mult);
|
jpayne@68
|
1338 int scoreThresh=(int)(minSwScore*mult);
|
jpayne@68
|
1339
|
jpayne@68
|
1340 // final byte[] array=npad(r.bases, pad ? npad : 0);
|
jpayne@68
|
1341 final byte[] array=npad(r.bases, npad);
|
jpayne@68
|
1342
|
jpayne@68
|
1343 // assert(array==r.bases) : npad;
|
jpayne@68
|
1344
|
jpayne@68
|
1345 int lim=array.length-npad-stride;
|
jpayne@68
|
1346
|
jpayne@68
|
1347 int found=0;
|
jpayne@68
|
1348
|
jpayne@68
|
1349 int lastSuspect=-1;
|
jpayne@68
|
1350 int lastConfirmed=-1;
|
jpayne@68
|
1351 int maxScore=0;
|
jpayne@68
|
1352
|
jpayne@68
|
1353 // final int revisedStride=(r.length()>1800 ? stride : (int)(stride*0.6f));
|
jpayne@68
|
1354 for(int i=begin; i<lim; i+=stride){
|
jpayne@68
|
1355 int j=Tools.min(i+window, array.length-1);
|
jpayne@68
|
1356 if(j-i<window){
|
jpayne@68
|
1357 lim=0; //Last loop cycle
|
jpayne@68
|
1358 // i=Tools.max(0, array.length-2*query1.length);
|
jpayne@68
|
1359 }
|
jpayne@68
|
1360
|
jpayne@68
|
1361 {
|
jpayne@68
|
1362
|
jpayne@68
|
1363 int[] rvec;
|
jpayne@68
|
1364 // if(timeless){//A speed-optimized version
|
jpayne@68
|
1365 rvec=ssa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);
|
jpayne@68
|
1366 // }else{rvec=msa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);}
|
jpayne@68
|
1367 if(rvec!=null && rvec[0]>=suspectThresh){
|
jpayne@68
|
1368 final int score=rvec[0];
|
jpayne@68
|
1369 final int start=rvec[1];
|
jpayne@68
|
1370 final int stop=rvec[2];
|
jpayne@68
|
1371 assert(score>=suspectThresh);
|
jpayne@68
|
1372 if((i==0 || start>i) && (j==array.length-1 || stop<j)){
|
jpayne@68
|
1373 boolean kill=(score>=scoreThresh ||
|
jpayne@68
|
1374 (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect<suspectDistance) ||
|
jpayne@68
|
1375 (lastConfirmed>0 && start>=lastConfirmed && start-lastConfirmed<suspectDistance));
|
jpayne@68
|
1376
|
jpayne@68
|
1377 if(!kill && useLocality && array.length-stop>window){//Look ahead
|
jpayne@68
|
1378 rvec=ssa.fillAndScoreLimited(adapter, array, stop, stop+window, suspectThresh);
|
jpayne@68
|
1379 if(rvec!=null){
|
jpayne@68
|
1380 if(score>=suspectMidpoint && rvec[0]>=suspectThresh && rvec[1]-stop<suspectDistance){kill=true;}
|
jpayne@68
|
1381 else if(score>=suspectThresh && rvec[0]>=scoreThresh && rvec[1]-stop<suspectDistance){kill=true;}
|
jpayne@68
|
1382 }
|
jpayne@68
|
1383 }
|
jpayne@68
|
1384
|
jpayne@68
|
1385 if(!kill && useAltMsa){//Try alternate msa
|
jpayne@68
|
1386 rvec=msa2.fillAndScoreLimited(adapter, array, Tools.max(0, start-4), Tools.min(stop+4, array.length-1), suspectThresh);
|
jpayne@68
|
1387 if(rvec!=null && rvec[0]>=(scoreThresh)){kill=true;}
|
jpayne@68
|
1388 }
|
jpayne@68
|
1389
|
jpayne@68
|
1390 if(kill){
|
jpayne@68
|
1391 maxScore=Tools.max(maxScore, score);
|
jpayne@68
|
1392 // if(print) {System.err.println("Found adapter at distance "+Tools.min(start, array.length-stop));}
|
jpayne@68
|
1393 // System.out.println("-:"+score+", "+scoreThresh+", "+suspectThresh+"\t"+lastSuspect+", "+start+", "+stop);
|
jpayne@68
|
1394 found++;
|
jpayne@68
|
1395 for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';}
|
jpayne@68
|
1396 // assert(Shared.threads()!=1) : new String(array)+"\n"+start+", "+stop;
|
jpayne@68
|
1397 if(useLocality && score>=scoreThresh){lastConfirmed=Tools.max(lastConfirmed, stop);}
|
jpayne@68
|
1398 }
|
jpayne@68
|
1399 }
|
jpayne@68
|
1400 // System.out.println("Set lastSuspect="+stop+" on score "+score);
|
jpayne@68
|
1401 if(useLocality){lastSuspect=Tools.max(lastSuspect, stop);}
|
jpayne@68
|
1402 }
|
jpayne@68
|
1403 }
|
jpayne@68
|
1404 }
|
jpayne@68
|
1405
|
jpayne@68
|
1406 r.setTested(true);
|
jpayne@68
|
1407 if(found>0){
|
jpayne@68
|
1408 r.setHasAdapter(true);
|
jpayne@68
|
1409 r.setDiscarded(true);
|
jpayne@68
|
1410 r.setAmbiguous(false);
|
jpayne@68
|
1411
|
jpayne@68
|
1412 int idx=(int)((maxScore*200.0)/maxSwScore);
|
jpayne@68
|
1413 adapterScoresT[idx]++;
|
jpayne@68
|
1414 }else{
|
jpayne@68
|
1415 r.setHasAdapter(false);
|
jpayne@68
|
1416 }
|
jpayne@68
|
1417
|
jpayne@68
|
1418 return found;
|
jpayne@68
|
1419 }
|
jpayne@68
|
1420
|
jpayne@68
|
1421 /** Align the adapter sequence to the read ends, and trim if needed. */
|
jpayne@68
|
1422 private int trimTerminalAdapters(Read r, double mult) {
|
jpayne@68
|
1423 assert(!r.hasAdapter() && !r.tested());
|
jpayne@68
|
1424 int begin=0;
|
jpayne@68
|
1425 while(begin<r.length() && r.bases[begin]=='N'){begin++;} //Skip reads made of 'N'
|
jpayne@68
|
1426 if(begin>=r.length()){return 0;}
|
jpayne@68
|
1427
|
jpayne@68
|
1428 int suspectThresh=(int)(minSwScoreSuspect*mult);
|
jpayne@68
|
1429 int scoreThresh=(int)(minSwScore*mult);
|
jpayne@68
|
1430
|
jpayne@68
|
1431 // final byte[] array=npad(r.bases, pad ? npad : 0);
|
jpayne@68
|
1432 final byte[] array=npad(r.bases, npad);
|
jpayne@68
|
1433
|
jpayne@68
|
1434 // assert(array==r.bases) : npad;
|
jpayne@68
|
1435
|
jpayne@68
|
1436 int lim=array.length-npad-stride;
|
jpayne@68
|
1437
|
jpayne@68
|
1438 int found=0;
|
jpayne@68
|
1439
|
jpayne@68
|
1440 int lastSuspect=-1;
|
jpayne@68
|
1441 int lastConfirmed=-1;
|
jpayne@68
|
1442 int maxScore=0;
|
jpayne@68
|
1443
|
jpayne@68
|
1444 // final int revisedStride=(r.length()>1800 ? stride : (int)(stride*0.6f));
|
jpayne@68
|
1445 for(int i=begin; i<lim; i+=stride){
|
jpayne@68
|
1446 int j=Tools.min(i+window, array.length-1);
|
jpayne@68
|
1447 if(j-i<window){
|
jpayne@68
|
1448 lim=0; //Last loop cycle
|
jpayne@68
|
1449 // i=Tools.max(0, array.length-2*query1.length);
|
jpayne@68
|
1450 }
|
jpayne@68
|
1451
|
jpayne@68
|
1452 {
|
jpayne@68
|
1453
|
jpayne@68
|
1454 int[] rvec;
|
jpayne@68
|
1455 // if(timeless){//A speed-optimized version
|
jpayne@68
|
1456 rvec=ssa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);
|
jpayne@68
|
1457 // }else{rvec=msa.fillAndScoreLimited(adapter, array, i, j, suspectThresh);}
|
jpayne@68
|
1458 if(rvec!=null && rvec[0]>=suspectThresh){
|
jpayne@68
|
1459 final int score=rvec[0];
|
jpayne@68
|
1460 final int start=rvec[1];
|
jpayne@68
|
1461 final int stop=rvec[2];
|
jpayne@68
|
1462 assert(score>=suspectThresh);
|
jpayne@68
|
1463 if((i==0 || start>i) && (j==array.length-1 || stop<j)){
|
jpayne@68
|
1464 boolean kill=(score>=scoreThresh ||
|
jpayne@68
|
1465 (score>=suspectMidpoint && lastSuspect>0 && start>=lastSuspect && start-lastSuspect<suspectDistance) ||
|
jpayne@68
|
1466 (lastConfirmed>0 && start>=lastConfirmed && start-lastConfirmed<suspectDistance));
|
jpayne@68
|
1467
|
jpayne@68
|
1468 if(!kill && useLocality && array.length-stop>window){//Look ahead
|
jpayne@68
|
1469 rvec=ssa.fillAndScoreLimited(adapter, array, stop, stop+window, suspectThresh);
|
jpayne@68
|
1470 if(rvec!=null){
|
jpayne@68
|
1471 if(score>=suspectMidpoint && rvec[0]>=suspectThresh && rvec[1]-stop<suspectDistance){kill=true;}
|
jpayne@68
|
1472 else if(score>=suspectThresh && rvec[0]>=scoreThresh && rvec[1]-stop<suspectDistance){kill=true;}
|
jpayne@68
|
1473 }
|
jpayne@68
|
1474 }
|
jpayne@68
|
1475
|
jpayne@68
|
1476 if(!kill && useAltMsa){//Try alternate msa
|
jpayne@68
|
1477 rvec=msa2.fillAndScoreLimited(adapter, array, Tools.max(0, start-4), Tools.min(stop+4, array.length-1), suspectThresh);
|
jpayne@68
|
1478 if(rvec!=null && rvec[0]>=(scoreThresh)){kill=true;}
|
jpayne@68
|
1479 }
|
jpayne@68
|
1480
|
jpayne@68
|
1481 if(kill){
|
jpayne@68
|
1482 maxScore=Tools.max(maxScore, score);
|
jpayne@68
|
1483 // if(print) {System.err.println("Found adapter at distance "+Tools.min(start, array.length-stop));}
|
jpayne@68
|
1484 // System.out.println("-:"+score+", "+scoreThresh+", "+suspectThresh+"\t"+lastSuspect+", "+start+", "+stop);
|
jpayne@68
|
1485 found++;
|
jpayne@68
|
1486 for(int x=Tools.max(0, start); x<=stop; x++){array[x]='X';}
|
jpayne@68
|
1487 // assert(Shared.threads()!=1) : new String(array)+"\n"+start+", "+stop;
|
jpayne@68
|
1488 if(useLocality && score>=scoreThresh){lastConfirmed=Tools.max(lastConfirmed, stop);}
|
jpayne@68
|
1489 }
|
jpayne@68
|
1490 }
|
jpayne@68
|
1491 // System.out.println("Set lastSuspect="+stop+" on score "+score);
|
jpayne@68
|
1492 if(useLocality){lastSuspect=Tools.max(lastSuspect, stop);}
|
jpayne@68
|
1493 }
|
jpayne@68
|
1494 }
|
jpayne@68
|
1495 }
|
jpayne@68
|
1496
|
jpayne@68
|
1497 r.setTested(true);
|
jpayne@68
|
1498 if(found>0){
|
jpayne@68
|
1499 r.setHasAdapter(true);
|
jpayne@68
|
1500 r.setDiscarded(true);
|
jpayne@68
|
1501 r.setAmbiguous(false);
|
jpayne@68
|
1502
|
jpayne@68
|
1503 int idx=(int)((maxScore*200.0)/maxSwScore);
|
jpayne@68
|
1504 adapterScoresT[idx]++;
|
jpayne@68
|
1505 }else{
|
jpayne@68
|
1506 r.setHasAdapter(false);
|
jpayne@68
|
1507 }
|
jpayne@68
|
1508
|
jpayne@68
|
1509 return found;
|
jpayne@68
|
1510 }
|
jpayne@68
|
1511
|
jpayne@68
|
1512 private byte[] npad(final byte[] array, final int pad){
|
jpayne@68
|
1513 if(pad<=0){return array;}
|
jpayne@68
|
1514 final int len=array.length+2*pad;
|
jpayne@68
|
1515 byte[] r=new byte[len];
|
jpayne@68
|
1516 for(int i=0; i<pad; i++){r[i]='N';}
|
jpayne@68
|
1517 for(int i=pad, j=0; j<array.length; i++, j++){r[i]=array[j];}
|
jpayne@68
|
1518 for(int i=array.length+pad, limit=Tools.min(r.length, len+50); i<limit; i++){r[i]='N';}
|
jpayne@68
|
1519 return r;
|
jpayne@68
|
1520 }
|
jpayne@68
|
1521
|
jpayne@68
|
1522 /*--------------------------------------------------------------*/
|
jpayne@68
|
1523 /*---------------- Inner Class Fields ----------------*/
|
jpayne@68
|
1524 /*--------------------------------------------------------------*/
|
jpayne@68
|
1525
|
jpayne@68
|
1526 /** Number of reads processed by this thread */
|
jpayne@68
|
1527 protected long readsProcessedT=0;
|
jpayne@68
|
1528 /** Number of bases processed by this thread */
|
jpayne@68
|
1529 protected long basesProcessedT=0;
|
jpayne@68
|
1530
|
jpayne@68
|
1531
|
jpayne@68
|
1532 protected long truePositiveReadsT=0;
|
jpayne@68
|
1533 protected long falsePositiveReadsT=0;
|
jpayne@68
|
1534 protected long trueNegativeReadsT=0;
|
jpayne@68
|
1535 protected long falseNegativeReadsT=0;
|
jpayne@68
|
1536 protected long ambiguousReadsT=0;
|
jpayne@68
|
1537
|
jpayne@68
|
1538 protected long truePositiveZMWsT=0;
|
jpayne@68
|
1539 protected long falsePositiveZMWsT=0;
|
jpayne@68
|
1540 protected long trueNegativeZMWsT=0;
|
jpayne@68
|
1541 protected long falseNegativeZMWsT=0;
|
jpayne@68
|
1542 protected long ambiguousZMWsT=0;
|
jpayne@68
|
1543
|
jpayne@68
|
1544
|
jpayne@68
|
1545 protected long elapsedT=0;
|
jpayne@68
|
1546 protected long elapsedShortT=0;
|
jpayne@68
|
1547
|
jpayne@68
|
1548 //Unused
|
jpayne@68
|
1549 protected long iceCreamReadsT=0;
|
jpayne@68
|
1550 protected long iceCreamBasesT=0;
|
jpayne@68
|
1551
|
jpayne@68
|
1552 protected long iceCreamZMWsT=0;
|
jpayne@68
|
1553 protected double iceCreamRatioT=0;
|
jpayne@68
|
1554 protected long ratiosCountedT=0;
|
jpayne@68
|
1555 protected long missingAdapterZMWsT=0;
|
jpayne@68
|
1556 protected long hiddenAdapterZMWsT=0;
|
jpayne@68
|
1557
|
jpayne@68
|
1558 protected long basesTrimmedT=0;
|
jpayne@68
|
1559 protected long readsTrimmedT=0;
|
jpayne@68
|
1560
|
jpayne@68
|
1561 /** Number of reads retained by this thread */
|
jpayne@68
|
1562 protected long readsOutT=0;
|
jpayne@68
|
1563 /** Number of bases retained by this thread */
|
jpayne@68
|
1564 protected long basesOutT=0;
|
jpayne@68
|
1565 /** Number of junctions detected in IR reads by this thread */
|
jpayne@68
|
1566 protected long junctionsOutT=0;
|
jpayne@68
|
1567
|
jpayne@68
|
1568 protected long lowEntropyZMWsT=0;
|
jpayne@68
|
1569 protected long lowEntropyReadsT=0;
|
jpayne@68
|
1570
|
jpayne@68
|
1571 /** True only if this thread has completed successfully */
|
jpayne@68
|
1572 boolean success=false;
|
jpayne@68
|
1573
|
jpayne@68
|
1574 /** Shared output stream */
|
jpayne@68
|
1575 private final ConcurrentReadOutputStream rosg;
|
jpayne@68
|
1576 /** Shared output stream for ambiguous reads */
|
jpayne@68
|
1577 private final ConcurrentReadOutputStream rosa;
|
jpayne@68
|
1578 /** Shared output stream for bad reads */
|
jpayne@68
|
1579 private final ConcurrentReadOutputStream rosb;
|
jpayne@68
|
1580 /** Shared output stream for junctions */
|
jpayne@68
|
1581 private final ConcurrentReadOutputStream rosj;
|
jpayne@68
|
1582 /** Thread ID */
|
jpayne@68
|
1583 final int tid;
|
jpayne@68
|
1584
|
jpayne@68
|
1585 /* Aligners for this thread */
|
jpayne@68
|
1586
|
jpayne@68
|
1587 /** For inverted repeat alignment */
|
jpayne@68
|
1588 final IceCreamAligner ica=IceCreamAligner.makeAligner(32);
|
jpayne@68
|
1589 /** For quickly aligning adapter sequence to whole read */
|
jpayne@68
|
1590 final FlatAligner fla=new FlatAligner();
|
jpayne@68
|
1591 // final MultiStateAligner9PacBioAdapter msa=alignAdapter ? new MultiStateAligner9PacBioAdapter(ALIGN_ROWS, ALIGN_COLUMNS) : null;
|
jpayne@68
|
1592 /** For slowly aligning adapter sequence sectionwise */
|
jpayne@68
|
1593 final SingleStateAlignerPacBioAdapter ssa=(alignAdapter || trimReads) ? new SingleStateAlignerPacBioAdapter(ALIGN_ROWS, ALIGN_COLUMNS, adapter.length) : null;
|
jpayne@68
|
1594 /** Alternate scoring for slow adapter alignment */
|
jpayne@68
|
1595 final MultiStateAligner9PacBioAdapter2 msa2=(alignAdapter || trimReads) ? new MultiStateAligner9PacBioAdapter2() : null;
|
jpayne@68
|
1596
|
jpayne@68
|
1597 final ZMWStreamer zstream;
|
jpayne@68
|
1598 final EntropyTracker eTracker;
|
jpayne@68
|
1599
|
jpayne@68
|
1600 final long[] adapterScoresT=new long[201];
|
jpayne@68
|
1601 final long[] repeatScoresT=new long[201];
|
jpayne@68
|
1602 final byte[] tipBufferLeft=new byte[adapterTipLen+adapterTipPad];
|
jpayne@68
|
1603 final byte[] tipBufferRight=new byte[adapterTipLen+adapterTipPad];
|
jpayne@68
|
1604 }
|
jpayne@68
|
1605
|
jpayne@68
|
1606 /*--------------------------------------------------------------*/
|
jpayne@68
|
1607 /*---------------- Fields ----------------*/
|
jpayne@68
|
1608 /*--------------------------------------------------------------*/
|
jpayne@68
|
1609
|
jpayne@68
|
1610 /** Primary input file path */
|
jpayne@68
|
1611 private String in1=null;
|
jpayne@68
|
1612
|
jpayne@68
|
1613 /** Primary output file path */
|
jpayne@68
|
1614 private String outg=null;
|
jpayne@68
|
1615 /** Ambiguous output file path */
|
jpayne@68
|
1616 private String outa=null;
|
jpayne@68
|
1617 /** Bad output file path */
|
jpayne@68
|
1618 private String outb=null;
|
jpayne@68
|
1619 /** Junction output file path */
|
jpayne@68
|
1620 private String outj=null;
|
jpayne@68
|
1621 /** Stats (screen) output file path */
|
jpayne@68
|
1622 private String outstats=null;
|
jpayne@68
|
1623
|
jpayne@68
|
1624 /** Adapter score ratio histogram */
|
jpayne@68
|
1625 private String asrhist=null;
|
jpayne@68
|
1626
|
jpayne@68
|
1627 /** Inverted repeat score ratio histogram */
|
jpayne@68
|
1628 private String irsrhist=null;
|
jpayne@68
|
1629
|
jpayne@68
|
1630 /** Override input file extension */
|
jpayne@68
|
1631 private String extin=null;
|
jpayne@68
|
1632 /** Override output file extension */
|
jpayne@68
|
1633 private String extout=null;
|
jpayne@68
|
1634
|
jpayne@68
|
1635 private int targetQlen=352;
|
jpayne@68
|
1636 private int minQlen=100;
|
jpayne@68
|
1637
|
jpayne@68
|
1638 /** Make a query at most this fraction of read length */
|
jpayne@68
|
1639 private float maxQlenFraction=0.15f;
|
jpayne@68
|
1640
|
jpayne@68
|
1641 /** Exit alignment early if score drops below this.
|
jpayne@68
|
1642 * An aggressive optimization that may miss some low-quality inverted repeats.
|
jpayne@68
|
1643 * -700 seems safe. */
|
jpayne@68
|
1644 private int minScore=-800;
|
jpayne@68
|
1645
|
jpayne@68
|
1646 /** Fraction of maximum alignment score to consider as matching for initial alignment */
|
jpayne@68
|
1647 private float minRatio1=0.59f;
|
jpayne@68
|
1648
|
jpayne@68
|
1649 /** Fraction of maximum alignment score to consider as matching for realignment */
|
jpayne@68
|
1650 private float minRatio2=0.64f;
|
jpayne@68
|
1651
|
jpayne@68
|
1652 private float adapterRatio=0.18f; //.18 for fa, or 0.57/0.6 for ica
|
jpayne@68
|
1653 private float adapterRatio2=0.325f; //0.31f normal, 0.325 timeless
|
jpayne@68
|
1654 private float suspectRatio=0.85f;
|
jpayne@68
|
1655 private boolean useLocality=true;
|
jpayne@68
|
1656 private boolean useAltMsa=true;
|
jpayne@68
|
1657
|
jpayne@68
|
1658
|
jpayne@68
|
1659 private float tipRatio=1.5f;
|
jpayne@68
|
1660
|
jpayne@68
|
1661 private float longReadMult=1.5f;
|
jpayne@68
|
1662
|
jpayne@68
|
1663 private float shortReadMult=1.5f;
|
jpayne@68
|
1664
|
jpayne@68
|
1665 private float veryShortMult=0.35f;
|
jpayne@68
|
1666
|
jpayne@68
|
1667 /** Short half of inverted repeat must be at least this fraction of read length to be classed as a triangle */
|
jpayne@68
|
1668 private float minJunctionFraction=0.4f;
|
jpayne@68
|
1669
|
jpayne@68
|
1670 /** Only filter triangle reads, not all inverted repeats */
|
jpayne@68
|
1671 private boolean filterIceCreamOnly=true;
|
jpayne@68
|
1672
|
jpayne@68
|
1673 /** Align again once the junction position is provisionally identified */
|
jpayne@68
|
1674 private boolean realign=true;
|
jpayne@68
|
1675
|
jpayne@68
|
1676 /** For internal read array transfers */
|
jpayne@68
|
1677 private int queuelen=80;
|
jpayne@68
|
1678
|
jpayne@68
|
1679 /** For grading synthetic data */
|
jpayne@68
|
1680 private boolean parseCustom;
|
jpayne@68
|
1681
|
jpayne@68
|
1682 /** Input reads are CCS (full pass) */
|
jpayne@68
|
1683 private boolean CCS;
|
jpayne@68
|
1684
|
jpayne@68
|
1685 private boolean modifyHeader=false;
|
jpayne@68
|
1686
|
jpayne@68
|
1687 private boolean sendAmbigToGood=true;
|
jpayne@68
|
1688 private boolean sendAmbigToBad=false;
|
jpayne@68
|
1689 private boolean setAmbig=false;
|
jpayne@68
|
1690
|
jpayne@68
|
1691 //Note: These flags are very similar... they need to be better-defined or merged.
|
jpayne@68
|
1692 private boolean keepZMWsTogether=false;
|
jpayne@68
|
1693 private boolean keepShortReads=true;
|
jpayne@68
|
1694
|
jpayne@68
|
1695 private int format=FORMAT_TEXT;
|
jpayne@68
|
1696 private static final int FORMAT_TEXT=1, FORMAT_JSON=2;
|
jpayne@68
|
1697
|
jpayne@68
|
1698 /*--------------------------------------------------------------*/
|
jpayne@68
|
1699
|
jpayne@68
|
1700 /** Alignment iterations for inverted repeat calculation with ref columns and query rows */
|
jpayne@68
|
1701 protected long alignmentIters=0;
|
jpayne@68
|
1702
|
jpayne@68
|
1703 /** Alignment iterations for inverted repeat calculation with query columns and ref rows */
|
jpayne@68
|
1704 protected long alignmentItersShort=0;
|
jpayne@68
|
1705
|
jpayne@68
|
1706 /** Time spent in long iterations */
|
jpayne@68
|
1707 protected long elapsed=0;
|
jpayne@68
|
1708
|
jpayne@68
|
1709 /** Time spent in short iterations */
|
jpayne@68
|
1710 protected long elapsedShort=0;
|
jpayne@68
|
1711
|
jpayne@68
|
1712 /** Print iteration time statistics */
|
jpayne@68
|
1713 protected boolean printTiming=false;
|
jpayne@68
|
1714
|
jpayne@68
|
1715 /** Number of reads processed */
|
jpayne@68
|
1716 protected long readsProcessed=0;
|
jpayne@68
|
1717 /** Number of bases processed */
|
jpayne@68
|
1718 protected long basesProcessed=0;
|
jpayne@68
|
1719
|
jpayne@68
|
1720 /** Number of reads retained */
|
jpayne@68
|
1721 protected long readsOut=0;
|
jpayne@68
|
1722 /** Number of bases retained */
|
jpayne@68
|
1723 protected long basesOut=0;
|
jpayne@68
|
1724 /** Number of junctions detected in IR reads */
|
jpayne@68
|
1725 protected long junctionsOut=0;
|
jpayne@68
|
1726
|
jpayne@68
|
1727 /** Quit after processing this many input reads; -1 means no limit */
|
jpayne@68
|
1728 private long maxReads=-1;
|
jpayne@68
|
1729
|
jpayne@68
|
1730 //Unused
|
jpayne@68
|
1731 protected long iceCreamReads=0;
|
jpayne@68
|
1732 protected long iceCreamBases=0;
|
jpayne@68
|
1733
|
jpayne@68
|
1734 /** ZMWs with discarded reads */
|
jpayne@68
|
1735 protected long iceCreamZMWs=0;
|
jpayne@68
|
1736
|
jpayne@68
|
1737 /** Sum of IR alignment ratios for IR reads not containing adapters */
|
jpayne@68
|
1738 protected double iceCreamRatio=0;
|
jpayne@68
|
1739
|
jpayne@68
|
1740 /** Number of ratios in iceCreamRatio */
|
jpayne@68
|
1741 protected long ratiosCounted=0;
|
jpayne@68
|
1742
|
jpayne@68
|
1743 /** Histogram */
|
jpayne@68
|
1744 protected final long[] adapterScores=new long[201];
|
jpayne@68
|
1745
|
jpayne@68
|
1746 /** Histogram */
|
jpayne@68
|
1747 protected final long[] repeatScores=new long[201];
|
jpayne@68
|
1748
|
jpayne@68
|
1749 /** ZMWs with a triangle read but no hidden adapters */
|
jpayne@68
|
1750 protected long missingAdapterZMWs=0;
|
jpayne@68
|
1751
|
jpayne@68
|
1752 /** ZMWs with hidden adapters */
|
jpayne@68
|
1753 protected long hiddenAdapterZMWs=0;
|
jpayne@68
|
1754
|
jpayne@68
|
1755 protected long basesTrimmed=0;
|
jpayne@68
|
1756 protected long readsTrimmed=0;
|
jpayne@68
|
1757
|
jpayne@68
|
1758 protected long lowEntropyZMWs=0;
|
jpayne@68
|
1759 protected long lowEntropyReads=0;
|
jpayne@68
|
1760
|
jpayne@68
|
1761 /** Total ZMWs observed */
|
jpayne@68
|
1762 protected long ZMWs=0;
|
jpayne@68
|
1763
|
jpayne@68
|
1764 protected long truePositiveReads=0;
|
jpayne@68
|
1765 protected long falsePositiveReads=0;
|
jpayne@68
|
1766 protected long trueNegativeReads=0;
|
jpayne@68
|
1767 protected long falseNegativeReads=0;
|
jpayne@68
|
1768 protected long ambiguousReads=0;
|
jpayne@68
|
1769
|
jpayne@68
|
1770 protected long truePositiveZMWs=0;
|
jpayne@68
|
1771 protected long falsePositiveZMWs=0;
|
jpayne@68
|
1772 protected long trueNegativeZMWs=0;
|
jpayne@68
|
1773 protected long falseNegativeZMWs=0;
|
jpayne@68
|
1774 protected long ambiguousZMWs=0;
|
jpayne@68
|
1775
|
jpayne@68
|
1776 protected final int stride;
|
jpayne@68
|
1777 protected final int window;
|
jpayne@68
|
1778 protected final int ALIGN_ROWS;
|
jpayne@68
|
1779 protected final int ALIGN_COLUMNS;
|
jpayne@68
|
1780
|
jpayne@68
|
1781 private boolean timeless=true;
|
jpayne@68
|
1782 protected final int maxSwScore;
|
jpayne@68
|
1783 protected final int minSwScore;
|
jpayne@68
|
1784 protected final int minSwScoreSuspect;
|
jpayne@68
|
1785 protected final int maxImperfectSwScore;
|
jpayne@68
|
1786 protected final int suspectMidpoint;
|
jpayne@68
|
1787 protected final int suspectDistance=100;
|
jpayne@68
|
1788 protected int npad=0; //This is for catching adapters on the tips, which are not very relevant to ice cream. Set to 35 for tip apdapters.
|
jpayne@68
|
1789
|
jpayne@68
|
1790 private byte[] adapter="ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT".getBytes(); //This one seems to be correct
|
jpayne@68
|
1791 // private byte[] adapter="ATCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAT".getBytes();
|
jpayne@68
|
1792 private boolean alignAdapter=true;
|
jpayne@68
|
1793 private boolean alignRC=true;
|
jpayne@68
|
1794 private boolean flagLongReads=true;
|
jpayne@68
|
1795 private boolean trimReads=true;
|
jpayne@68
|
1796 private int minLengthAfterTrimming=40;
|
jpayne@68
|
1797 private int adapterTipLen=100;
|
jpayne@68
|
1798 private int adapterTipPad=35;
|
jpayne@68
|
1799
|
jpayne@68
|
1800 boolean trimPolyA=false;
|
jpayne@68
|
1801
|
jpayne@68
|
1802 /*--------------------------------------------------------------*/
|
jpayne@68
|
1803 /*---------------- Entropy Fields ----------------*/
|
jpayne@68
|
1804 /*--------------------------------------------------------------*/
|
jpayne@68
|
1805
|
jpayne@68
|
1806 /** Minimum entropy to be considered "complex", on a scale of 0-1 */
|
jpayne@68
|
1807 float entropyCutoff=-1; //suggested 0.55
|
jpayne@68
|
1808 /** Minimum length of a low-entropy block to fail a read */
|
jpayne@68
|
1809 int entropyLength=350;
|
jpayne@68
|
1810 /** Minimum length of a low-entropy block as a fraction of read length;
|
jpayne@68
|
1811 * the minimum of the two will be used */
|
jpayne@68
|
1812 float entropyFraction=0.5f;
|
jpayne@68
|
1813
|
jpayne@68
|
1814 float maxMonomerFraction=0.74f; //Suggested... 0.74
|
jpayne@68
|
1815
|
jpayne@68
|
1816 /*--------------------------------------------------------------*/
|
jpayne@68
|
1817 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
1818 /*--------------------------------------------------------------*/
|
jpayne@68
|
1819
|
jpayne@68
|
1820 /** Primary input file */
|
jpayne@68
|
1821 private final FileFormat ffin1;
|
jpayne@68
|
1822
|
jpayne@68
|
1823 /** Primary output file */
|
jpayne@68
|
1824 private final FileFormat ffoutg;
|
jpayne@68
|
1825
|
jpayne@68
|
1826 /** Ambiguous output file */
|
jpayne@68
|
1827 private final FileFormat ffouta;
|
jpayne@68
|
1828
|
jpayne@68
|
1829 /** Bad output file */
|
jpayne@68
|
1830 private final FileFormat ffoutb;
|
jpayne@68
|
1831
|
jpayne@68
|
1832 /** Junction output file */
|
jpayne@68
|
1833 private final FileFormat ffoutj;
|
jpayne@68
|
1834
|
jpayne@68
|
1835 /** Stats output file */
|
jpayne@68
|
1836 private final FileFormat ffstats;
|
jpayne@68
|
1837
|
jpayne@68
|
1838 /** Adapter score ratio histogram */
|
jpayne@68
|
1839 private final FileFormat ffasrhist;
|
jpayne@68
|
1840
|
jpayne@68
|
1841 /** Inverted repeat score ratio histogram */
|
jpayne@68
|
1842 private final FileFormat ffirsrhist;
|
jpayne@68
|
1843
|
jpayne@68
|
1844 private final int threads;
|
jpayne@68
|
1845
|
jpayne@68
|
1846 /*--------------------------------------------------------------*/
|
jpayne@68
|
1847 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
1848 /*--------------------------------------------------------------*/
|
jpayne@68
|
1849
|
jpayne@68
|
1850 /** Print status messages to this output stream */
|
jpayne@68
|
1851 private PrintStream outstream=System.err;
|
jpayne@68
|
1852 /** Print verbose messages */
|
jpayne@68
|
1853 public static boolean verbose=false;
|
jpayne@68
|
1854 /** True if an error was encountered */
|
jpayne@68
|
1855 public boolean errorState=false;
|
jpayne@68
|
1856 /** Overwrite existing output files */
|
jpayne@68
|
1857 private boolean overwrite=false;
|
jpayne@68
|
1858 /** Append to existing output files */
|
jpayne@68
|
1859 private boolean append=false;
|
jpayne@68
|
1860
|
jpayne@68
|
1861 }
|