jpayne@68
|
1 package consensus;
|
jpayne@68
|
2
|
jpayne@68
|
3 import java.io.PrintStream;
|
jpayne@68
|
4 import java.util.ArrayList;
|
jpayne@68
|
5 import java.util.LinkedHashMap;
|
jpayne@68
|
6 import java.util.Map.Entry;
|
jpayne@68
|
7 import java.util.concurrent.atomic.AtomicIntegerArray;
|
jpayne@68
|
8 import java.util.concurrent.atomic.AtomicLongArray;
|
jpayne@68
|
9
|
jpayne@68
|
10 import dna.AminoAcid;
|
jpayne@68
|
11 import fileIO.ByteFile;
|
jpayne@68
|
12 import fileIO.FileFormat;
|
jpayne@68
|
13 import fileIO.ReadWrite;
|
jpayne@68
|
14 import shared.Parse;
|
jpayne@68
|
15 import shared.Parser;
|
jpayne@68
|
16 import shared.PreParser;
|
jpayne@68
|
17 import shared.ReadStats;
|
jpayne@68
|
18 import shared.Shared;
|
jpayne@68
|
19 import shared.Timer;
|
jpayne@68
|
20 import shared.Tools;
|
jpayne@68
|
21 import stream.ConcurrentReadInputStream;
|
jpayne@68
|
22 import stream.ConcurrentReadOutputStream;
|
jpayne@68
|
23 import stream.FastaReadInputStream;
|
jpayne@68
|
24 import stream.Read;
|
jpayne@68
|
25 import stream.SamLine;
|
jpayne@68
|
26 import stream.SamReadStreamer;
|
jpayne@68
|
27 import stream.SamStreamer;
|
jpayne@68
|
28 import structures.ByteBuilder;
|
jpayne@68
|
29 import structures.ListNum;
|
jpayne@68
|
30 import template.Accumulator;
|
jpayne@68
|
31 import template.ThreadWaiter;
|
jpayne@68
|
32 import var2.SamFilter;
|
jpayne@68
|
33
|
jpayne@68
|
34 /**
|
jpayne@68
|
35 * Scaffolds contigs based on paired read mapping.
|
jpayne@68
|
36 *
|
jpayne@68
|
37 * @author Brian Bushnell
|
jpayne@68
|
38 * @date September 11, 2019
|
jpayne@68
|
39 *
|
jpayne@68
|
40 */
|
jpayne@68
|
41 public class Lilypad implements Accumulator<Lilypad.ProcessThread> {
|
jpayne@68
|
42
|
jpayne@68
|
43 /*--------------------------------------------------------------*/
|
jpayne@68
|
44 /*---------------- Initialization ----------------*/
|
jpayne@68
|
45 /*--------------------------------------------------------------*/
|
jpayne@68
|
46
|
jpayne@68
|
47 /**
|
jpayne@68
|
48 * Code entrance from the command line.
|
jpayne@68
|
49 * @param args Command line arguments
|
jpayne@68
|
50 */
|
jpayne@68
|
51 public static void main(String[] args){
|
jpayne@68
|
52 //Start a timer immediately upon code entrance.
|
jpayne@68
|
53 Timer t=new Timer();
|
jpayne@68
|
54
|
jpayne@68
|
55 //Create an instance of this class
|
jpayne@68
|
56 Lilypad x=new Lilypad(args);
|
jpayne@68
|
57
|
jpayne@68
|
58 //Run the object
|
jpayne@68
|
59 x.process(t);
|
jpayne@68
|
60
|
jpayne@68
|
61 //Close the print stream if it was redirected
|
jpayne@68
|
62 Shared.closeStream(x.outstream);
|
jpayne@68
|
63 }
|
jpayne@68
|
64
|
jpayne@68
|
65 /**
|
jpayne@68
|
66 * Constructor.
|
jpayne@68
|
67 * @param args Command line arguments
|
jpayne@68
|
68 */
|
jpayne@68
|
69 public Lilypad(String[] args){
|
jpayne@68
|
70
|
jpayne@68
|
71 {//Preparse block for help, config files, and outstream
|
jpayne@68
|
72 PreParser pp=new PreParser(args, getClass(), false);
|
jpayne@68
|
73 args=pp.args;
|
jpayne@68
|
74 outstream=pp.outstream;
|
jpayne@68
|
75 }
|
jpayne@68
|
76
|
jpayne@68
|
77 //Set shared static variables prior to parsing
|
jpayne@68
|
78 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
|
jpayne@68
|
79 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
|
jpayne@68
|
80 SamLine.RNAME_AS_BYTES=false;
|
jpayne@68
|
81
|
jpayne@68
|
82 samFilter.includeUnmapped=false;
|
jpayne@68
|
83 samFilter.includeSupplimentary=false;
|
jpayne@68
|
84 // samFilter.includeDuplicate=false;
|
jpayne@68
|
85 samFilter.includeNonPrimary=false;
|
jpayne@68
|
86 samFilter.includeQfail=false;
|
jpayne@68
|
87 samFilter.minMapq=4;
|
jpayne@68
|
88
|
jpayne@68
|
89 {//Parse the arguments
|
jpayne@68
|
90 final Parser parser=parse(args);
|
jpayne@68
|
91
|
jpayne@68
|
92 Parser.processQuality();
|
jpayne@68
|
93
|
jpayne@68
|
94 maxReads=parser.maxReads;
|
jpayne@68
|
95 overwrite=ReadStats.overwrite=parser.overwrite;
|
jpayne@68
|
96 append=ReadStats.append=parser.append;
|
jpayne@68
|
97
|
jpayne@68
|
98 in=parser.in1;
|
jpayne@68
|
99 extin=parser.extin;
|
jpayne@68
|
100
|
jpayne@68
|
101 out=parser.out1;
|
jpayne@68
|
102 extout=parser.extout;
|
jpayne@68
|
103 }
|
jpayne@68
|
104
|
jpayne@68
|
105 {
|
jpayne@68
|
106 // if("auto".equalsIgnoreCase(atomic)){Scaffold.setCA3A(Shared.threads()>8);}
|
jpayne@68
|
107 // else{Scaffold.setCA3A(Parse.parseBoolean(atomic));}
|
jpayne@68
|
108 samFilter.setSamtoolsFilter();
|
jpayne@68
|
109
|
jpayne@68
|
110 streamerThreads=Tools.max(1, Tools.min(streamerThreads, Shared.threads()));
|
jpayne@68
|
111 assert(streamerThreads>0) : streamerThreads;
|
jpayne@68
|
112 }
|
jpayne@68
|
113
|
jpayne@68
|
114 validateParams();
|
jpayne@68
|
115 fixExtensions(); //Add or remove .gz or .bz2 as needed
|
jpayne@68
|
116 checkFileExistence(); //Ensure files can be read and written
|
jpayne@68
|
117 checkStatics(); //Adjust file-related static fields as needed for this program
|
jpayne@68
|
118
|
jpayne@68
|
119 //Create output FileFormat objects
|
jpayne@68
|
120 ffout=FileFormat.testOutput(out, FileFormat.FASTA, extout, true, overwrite, append, ordered);
|
jpayne@68
|
121
|
jpayne@68
|
122 //Create input FileFormat objects
|
jpayne@68
|
123 ffin=FileFormat.testInput(in, FileFormat.SAM, extin, true, true);
|
jpayne@68
|
124 ffref=FileFormat.testInput(ref, FileFormat.FASTA, null, true, true);
|
jpayne@68
|
125 }
|
jpayne@68
|
126
|
jpayne@68
|
127 /*--------------------------------------------------------------*/
|
jpayne@68
|
128 /*---------------- Initialization Helpers ----------------*/
|
jpayne@68
|
129 /*--------------------------------------------------------------*/
|
jpayne@68
|
130
|
jpayne@68
|
131 /** Parse arguments from the command line */
|
jpayne@68
|
132 private Parser parse(String[] args){
|
jpayne@68
|
133
|
jpayne@68
|
134 //Create a parser object
|
jpayne@68
|
135 Parser parser=new Parser();
|
jpayne@68
|
136
|
jpayne@68
|
137 //Set any necessary Parser defaults here
|
jpayne@68
|
138 //parser.foo=bar;
|
jpayne@68
|
139
|
jpayne@68
|
140 //Parse each argument
|
jpayne@68
|
141 for(int i=0; i<args.length; i++){
|
jpayne@68
|
142 String arg=args[i];
|
jpayne@68
|
143
|
jpayne@68
|
144 //Break arguments into their constituent parts, in the form of "a=b"
|
jpayne@68
|
145 String[] split=arg.split("=");
|
jpayne@68
|
146 String a=split[0].toLowerCase();
|
jpayne@68
|
147 String b=split.length>1 ? split[1] : null;
|
jpayne@68
|
148 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
|
jpayne@68
|
149
|
jpayne@68
|
150 if(a.equals("verbose")){
|
jpayne@68
|
151 verbose=Parse.parseBoolean(b);
|
jpayne@68
|
152 }else if(a.equals("ref") || a.equals("scaffolds")){
|
jpayne@68
|
153 ref=b;
|
jpayne@68
|
154 }else if(a.equals("insertlist")){
|
jpayne@68
|
155 insertList=b;
|
jpayne@68
|
156 }else if(a.equals("ordered")){
|
jpayne@68
|
157 ordered=Parse.parseBoolean(b);
|
jpayne@68
|
158 }else if(a.equalsIgnoreCase("sameStrandPairs")){
|
jpayne@68
|
159 sameStrandPairs=Parse.parseBoolean(b);
|
jpayne@68
|
160 }else if(a.equalsIgnoreCase("ns") || a.equalsIgnoreCase("n") || a.equalsIgnoreCase("scaffoldbreak") || a.equalsIgnoreCase("gap") || a.equalsIgnoreCase("mingap")){
|
jpayne@68
|
161 scaffoldBreakNs=Integer.parseInt(b);
|
jpayne@68
|
162 assert(scaffoldBreakNs>0);
|
jpayne@68
|
163 }else if(a.equalsIgnoreCase("mindepth")){
|
jpayne@68
|
164 minDepth=Integer.parseInt(b);
|
jpayne@68
|
165 assert(minDepth>0);
|
jpayne@68
|
166 }else if(a.equalsIgnoreCase("maxinsert")){
|
jpayne@68
|
167 maxPairDist=Parse.parseIntKMG(b);
|
jpayne@68
|
168 }else if(a.equalsIgnoreCase("minWeightRatio") || a.equalsIgnoreCase("minwr")){
|
jpayne@68
|
169 minWeightRatio=Float.parseFloat(b);
|
jpayne@68
|
170 }else if(a.equalsIgnoreCase("minStrandRatio") || a.equalsIgnoreCase("minsr")){
|
jpayne@68
|
171 minStrandRatio=Float.parseFloat(b);
|
jpayne@68
|
172 }else if(a.equals("clearfilters") || a.equals("clearfilter")){
|
jpayne@68
|
173 if(Parse.parseBoolean(b)){
|
jpayne@68
|
174 samFilter.clear();
|
jpayne@68
|
175 }
|
jpayne@68
|
176 }else if(a.equals("parse_flag_goes_here")){
|
jpayne@68
|
177 long fake_variable=Parse.parseKMG(b);
|
jpayne@68
|
178 //Set a variable here
|
jpayne@68
|
179 }else if(samFilter.parse(arg, a, b)){
|
jpayne@68
|
180 //do nothing
|
jpayne@68
|
181 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
|
jpayne@68
|
182 //do nothing
|
jpayne@68
|
183 }else{
|
jpayne@68
|
184 outstream.println("Unknown parameter "+args[i]);
|
jpayne@68
|
185 assert(false) : "Unknown parameter "+args[i];
|
jpayne@68
|
186 }
|
jpayne@68
|
187 }
|
jpayne@68
|
188
|
jpayne@68
|
189 return parser;
|
jpayne@68
|
190 }
|
jpayne@68
|
191
|
jpayne@68
|
192 /** Add or remove .gz or .bz2 as needed */
|
jpayne@68
|
193 private void fixExtensions(){
|
jpayne@68
|
194 in=Tools.fixExtension(in);
|
jpayne@68
|
195 ref=Tools.fixExtension(ref);
|
jpayne@68
|
196 }
|
jpayne@68
|
197
|
jpayne@68
|
198 /** Ensure files can be read and written */
|
jpayne@68
|
199 private void checkFileExistence(){
|
jpayne@68
|
200
|
jpayne@68
|
201 //Ensure there is an input file
|
jpayne@68
|
202 if(in==null){throw new RuntimeException("Error - an input file is required.");}
|
jpayne@68
|
203
|
jpayne@68
|
204 //Ensure there is an input file
|
jpayne@68
|
205 if(ref==null){throw new RuntimeException("Error - a reference file is required.");}
|
jpayne@68
|
206
|
jpayne@68
|
207 //Ensure output files can be written
|
jpayne@68
|
208 if(!Tools.testOutputFiles(overwrite, append, false, out)){
|
jpayne@68
|
209 outstream.println((out==null)+", "+out);
|
jpayne@68
|
210 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n");
|
jpayne@68
|
211 }
|
jpayne@68
|
212
|
jpayne@68
|
213 //Ensure input files can be read
|
jpayne@68
|
214 if(!Tools.testInputFiles(false, true, in, ref)){
|
jpayne@68
|
215 throw new RuntimeException("\nCan't read some input files.\n");
|
jpayne@68
|
216 }
|
jpayne@68
|
217
|
jpayne@68
|
218 //Ensure that no file was specified multiple times
|
jpayne@68
|
219 if(!Tools.testForDuplicateFiles(true, in, ref, out)){
|
jpayne@68
|
220 throw new RuntimeException("\nSome file names were specified multiple times.\n");
|
jpayne@68
|
221 }
|
jpayne@68
|
222 }
|
jpayne@68
|
223
|
jpayne@68
|
224 /** Adjust file-related static fields as needed for this program */
|
jpayne@68
|
225 private static void checkStatics(){
|
jpayne@68
|
226 //Adjust the number of threads for input file reading
|
jpayne@68
|
227 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
|
jpayne@68
|
228 ByteFile.FORCE_MODE_BF2=true;
|
jpayne@68
|
229 }
|
jpayne@68
|
230
|
jpayne@68
|
231 assert(FastaReadInputStream.settingsOK());
|
jpayne@68
|
232 }
|
jpayne@68
|
233
|
jpayne@68
|
234 /** Ensure parameter ranges are within bounds and required parameters are set */
|
jpayne@68
|
235 private boolean validateParams(){
|
jpayne@68
|
236 // assert(minfoo>0 && minfoo<=maxfoo) : minfoo+", "+maxfoo;
|
jpayne@68
|
237 return true;
|
jpayne@68
|
238 }
|
jpayne@68
|
239
|
jpayne@68
|
240 /*--------------------------------------------------------------*/
|
jpayne@68
|
241 /*---------------- Outer Methods ----------------*/
|
jpayne@68
|
242 /*--------------------------------------------------------------*/
|
jpayne@68
|
243
|
jpayne@68
|
244 /** Create read streams and process all data */
|
jpayne@68
|
245 void process(Timer t){
|
jpayne@68
|
246
|
jpayne@68
|
247 //Turn off read validation in the input threads to increase speed
|
jpayne@68
|
248 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
|
jpayne@68
|
249 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
|
jpayne@68
|
250
|
jpayne@68
|
251 //Create a read input stream
|
jpayne@68
|
252 final SamStreamer ss=makeStreamer(ffin);
|
jpayne@68
|
253
|
jpayne@68
|
254 //Load reference
|
jpayne@68
|
255 loadReferenceCustom();
|
jpayne@68
|
256
|
jpayne@68
|
257 //Reset counters
|
jpayne@68
|
258 readsProcessed=readsOut=0;
|
jpayne@68
|
259 basesProcessed=basesOut=0;
|
jpayne@68
|
260
|
jpayne@68
|
261 //Process the reads in separate threads
|
jpayne@68
|
262 spawnThreads(ss);
|
jpayne@68
|
263
|
jpayne@68
|
264 //Optionally create a read output stream
|
jpayne@68
|
265 final ConcurrentReadOutputStream ros=makeCros();
|
jpayne@68
|
266
|
jpayne@68
|
267 if(verbose){outstream.println("Fixing reference.");}
|
jpayne@68
|
268
|
jpayne@68
|
269 makeScaffolds(ros);
|
jpayne@68
|
270
|
jpayne@68
|
271 if(verbose){outstream.println("Finished; closing streams.");}
|
jpayne@68
|
272
|
jpayne@68
|
273 //Write anything that was accumulated by ReadStats
|
jpayne@68
|
274 errorState|=ReadStats.writeAll();
|
jpayne@68
|
275 //Close the read streams
|
jpayne@68
|
276 errorState|=ReadWrite.closeStream(ros);
|
jpayne@68
|
277
|
jpayne@68
|
278 //Reset read validation
|
jpayne@68
|
279 Read.VALIDATE_IN_CONSTRUCTOR=vic;
|
jpayne@68
|
280
|
jpayne@68
|
281 //Report timing and results
|
jpayne@68
|
282 t.stop();
|
jpayne@68
|
283 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
|
jpayne@68
|
284 outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, scaffoldsOut, scaffoldLengthOut, 8, false));
|
jpayne@68
|
285
|
jpayne@68
|
286 outstream.println();
|
jpayne@68
|
287 outstream.println(Tools.number("Average Insert", totalAverageInsert, 2, 8));
|
jpayne@68
|
288 outstream.println(Tools.number("Joins Made ", gapsAdded, 8));
|
jpayne@68
|
289 outstream.println(Tools.number("Ns Added ", nsAdded, 8));
|
jpayne@68
|
290 outstream.println(Tools.number("Contigs In ", refMap.size(), 8));
|
jpayne@68
|
291 outstream.println(Tools.number("Scaffolds Out ", scaffoldsOut, 8));
|
jpayne@68
|
292
|
jpayne@68
|
293
|
jpayne@68
|
294 //Throw an exception of there was an error in a thread
|
jpayne@68
|
295 if(errorState){
|
jpayne@68
|
296 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
|
jpayne@68
|
297 }
|
jpayne@68
|
298 }
|
jpayne@68
|
299
|
jpayne@68
|
300 private synchronized void loadReferenceCustom(){
|
jpayne@68
|
301 assert(!loadedRef);
|
jpayne@68
|
302 ConcurrentReadInputStream cris=makeRefCris();
|
jpayne@68
|
303 for(ListNum<Read> ln=cris.nextList(); ln!=null && ln.size()>0; ln=cris.nextList()) {
|
jpayne@68
|
304 for(Read r : ln){
|
jpayne@68
|
305 String name=r.id;
|
jpayne@68
|
306 String name2=Tools.trimToWhitespace(r.id);
|
jpayne@68
|
307 Contig cont=new Contig(name, r.bases, r.numericID);
|
jpayne@68
|
308 refMap.put(name, cont);
|
jpayne@68
|
309 refMap2.put(name2, cont);
|
jpayne@68
|
310 }
|
jpayne@68
|
311 cris.returnList(ln);
|
jpayne@68
|
312 }
|
jpayne@68
|
313 ReadWrite.closeStream(cris);
|
jpayne@68
|
314 loadedRef=true;
|
jpayne@68
|
315 }
|
jpayne@68
|
316
|
jpayne@68
|
317 private ConcurrentReadInputStream makeRefCris(){
|
jpayne@68
|
318 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffref, null);
|
jpayne@68
|
319 cris.start(); //Start the stream
|
jpayne@68
|
320 if(verbose){outstream.println("Started cris");}
|
jpayne@68
|
321 boolean paired=cris.paired();
|
jpayne@68
|
322 assert(!paired) : "References should not be paired.";
|
jpayne@68
|
323 return cris;
|
jpayne@68
|
324 }
|
jpayne@68
|
325
|
jpayne@68
|
326 private SamStreamer makeStreamer(FileFormat ff){
|
jpayne@68
|
327 if(ff==null){return null;}
|
jpayne@68
|
328 SamStreamer ss=new SamReadStreamer(ff, streamerThreads, true, maxReads);
|
jpayne@68
|
329 ss.start(); //Start the stream
|
jpayne@68
|
330 if(verbose){outstream.println("Started Streamer");}
|
jpayne@68
|
331 return ss;
|
jpayne@68
|
332 }
|
jpayne@68
|
333
|
jpayne@68
|
334 private ConcurrentReadOutputStream makeCros(){
|
jpayne@68
|
335 if(ffout==null){return null;}
|
jpayne@68
|
336
|
jpayne@68
|
337 //Select output buffer size based on whether it needs to be ordered
|
jpayne@68
|
338 final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8);
|
jpayne@68
|
339
|
jpayne@68
|
340 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ffout, null, buff, null, false);
|
jpayne@68
|
341 ros.start(); //Start the stream
|
jpayne@68
|
342 return ros;
|
jpayne@68
|
343 }
|
jpayne@68
|
344
|
jpayne@68
|
345 /*--------------------------------------------------------------*/
|
jpayne@68
|
346 /*---------------- Thread Management ----------------*/
|
jpayne@68
|
347 /*--------------------------------------------------------------*/
|
jpayne@68
|
348
|
jpayne@68
|
349 /** Spawn process threads */
|
jpayne@68
|
350 private void spawnThreads(final SamStreamer ss){
|
jpayne@68
|
351
|
jpayne@68
|
352 //Do anything necessary prior to processing
|
jpayne@68
|
353
|
jpayne@68
|
354 //Determine how many threads may be used
|
jpayne@68
|
355 final int threads=Shared.threads();
|
jpayne@68
|
356
|
jpayne@68
|
357 //Fill a list with ProcessThreads
|
jpayne@68
|
358 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
|
jpayne@68
|
359 for(int i=0; i<threads; i++){
|
jpayne@68
|
360 alpt.add(new ProcessThread(ss, i));
|
jpayne@68
|
361 }
|
jpayne@68
|
362
|
jpayne@68
|
363 //Start the threads
|
jpayne@68
|
364 for(ProcessThread pt : alpt){
|
jpayne@68
|
365 pt.start();
|
jpayne@68
|
366 }
|
jpayne@68
|
367
|
jpayne@68
|
368 //Wait for threads to finish
|
jpayne@68
|
369 boolean success=ThreadWaiter.waitForThreads(alpt, this);
|
jpayne@68
|
370 errorState&=!success;
|
jpayne@68
|
371
|
jpayne@68
|
372 //Do anything necessary after processing
|
jpayne@68
|
373 totalAverageInsert=totalInsertSum/(double)totalInsertCount;
|
jpayne@68
|
374 insertByPercentile=Tools.makeHistogram(insertCounts, buckets);
|
jpayne@68
|
375 }
|
jpayne@68
|
376
|
jpayne@68
|
377 @Override
|
jpayne@68
|
378 public final void accumulate(ProcessThread pt){
|
jpayne@68
|
379 readsProcessed+=pt.readsProcessedT;
|
jpayne@68
|
380 basesProcessed+=pt.basesProcessedT;
|
jpayne@68
|
381 readsOut+=pt.readsOutT;
|
jpayne@68
|
382 basesOut+=pt.basesOutT;
|
jpayne@68
|
383
|
jpayne@68
|
384 totalInsertSum+=pt.totalInsertSumT;
|
jpayne@68
|
385 totalInsertCount+=pt.totalInsertCountT;
|
jpayne@68
|
386
|
jpayne@68
|
387 errorState|=(!pt.success);
|
jpayne@68
|
388 }
|
jpayne@68
|
389
|
jpayne@68
|
390 @Override
|
jpayne@68
|
391 public final boolean success(){return !errorState;}
|
jpayne@68
|
392
|
jpayne@68
|
393 /*--------------------------------------------------------------*/
|
jpayne@68
|
394 /*---------------- Inner Methods ----------------*/
|
jpayne@68
|
395 /*--------------------------------------------------------------*/
|
jpayne@68
|
396
|
jpayne@68
|
397 private void makeScaffolds(ConcurrentReadOutputStream ros){
|
jpayne@68
|
398 ByteBuilder bb=new ByteBuilder(1000000);
|
jpayne@68
|
399
|
jpayne@68
|
400 ArrayList<Read> list=new ArrayList<Read>(200);
|
jpayne@68
|
401 long num=0;
|
jpayne@68
|
402 long lengthSum=0;
|
jpayne@68
|
403 for(Entry<String, Contig> e : refMap.entrySet()){
|
jpayne@68
|
404 Contig cont=e.getValue();
|
jpayne@68
|
405 if(!cont.processed()){
|
jpayne@68
|
406 Read r=cont.makeScaffold(bb);
|
jpayne@68
|
407 assert(r!=null);
|
jpayne@68
|
408
|
jpayne@68
|
409 lengthSum+=r.length();
|
jpayne@68
|
410 list.add(r);
|
jpayne@68
|
411 scaffoldsOut++;
|
jpayne@68
|
412 scaffoldLengthOut+=r.length();
|
jpayne@68
|
413
|
jpayne@68
|
414 if(list.size()>=200 || lengthSum>=100000){
|
jpayne@68
|
415 if(ros!=null){ros.add(list, num);}
|
jpayne@68
|
416 list=new ArrayList<Read>(200);
|
jpayne@68
|
417 num++;
|
jpayne@68
|
418 lengthSum=0;
|
jpayne@68
|
419 }
|
jpayne@68
|
420 assert(cont.processed());
|
jpayne@68
|
421 }
|
jpayne@68
|
422 }
|
jpayne@68
|
423 if(list.size()>0){
|
jpayne@68
|
424 if(ros!=null){ros.add(list, num);}
|
jpayne@68
|
425 }
|
jpayne@68
|
426 }
|
jpayne@68
|
427
|
jpayne@68
|
428 private static int calcInsertSize(SamLine sl) {
|
jpayne@68
|
429 assert(sl.mapped() && sl.pairedOnSameChrom());
|
jpayne@68
|
430 assert(sl.primary());
|
jpayne@68
|
431 assert(!sl.supplementary());
|
jpayne@68
|
432 assert(sl.leftmost());
|
jpayne@68
|
433
|
jpayne@68
|
434 assert(sl.tlen>0) : sl.tlen+"\n\n"+sl;
|
jpayne@68
|
435 return sl.tlen>0 ? sl.tlen : -sl.tlen;
|
jpayne@68
|
436
|
jpayne@68
|
437 // final int insertSize;
|
jpayne@68
|
438 // String insertTag=null;
|
jpayne@68
|
439 // if(sl.optional!=null){
|
jpayne@68
|
440 // for(String s : sl.optional){
|
jpayne@68
|
441 // if(s.startsWith("X8:Z:")){
|
jpayne@68
|
442 // insertTag=s;
|
jpayne@68
|
443 // break;
|
jpayne@68
|
444 // }
|
jpayne@68
|
445 // }
|
jpayne@68
|
446 // }
|
jpayne@68
|
447 // if(insertTag!=null){
|
jpayne@68
|
448 // insertSize=Integer.parseInt(insertTag.substring(5));
|
jpayne@68
|
449 // }else{
|
jpayne@68
|
450 // insertSize=sl.tlen;//This is unsafe due to indels.
|
jpayne@68
|
451 // assert(false) : "Reads need insert size tags.";
|
jpayne@68
|
452 // }
|
jpayne@68
|
453 // assert(insertSize>0) : sl;
|
jpayne@68
|
454 //
|
jpayne@68
|
455 // return insertSize;
|
jpayne@68
|
456 }
|
jpayne@68
|
457
|
jpayne@68
|
458 private Contig getScaffold(String rname){
|
jpayne@68
|
459 Contig scaf=refMap.get(rname);
|
jpayne@68
|
460 if(scaf==null){scaf=refMap2.get(Tools.trimToWhitespace(rname));}
|
jpayne@68
|
461 assert(scaf!=null) : "Can't find graph for "+rname;
|
jpayne@68
|
462 return scaf;
|
jpayne@68
|
463 }
|
jpayne@68
|
464
|
jpayne@68
|
465 /*--------------------------------------------------------------*/
|
jpayne@68
|
466 /*---------------- Inner Classes ----------------*/
|
jpayne@68
|
467 /*--------------------------------------------------------------*/
|
jpayne@68
|
468
|
jpayne@68
|
469 /** This class is static to prevent accidental writing to shared variables.
|
jpayne@68
|
470 * It is safe to remove the static modifier. */
|
jpayne@68
|
471 class ProcessThread extends Thread {
|
jpayne@68
|
472
|
jpayne@68
|
473 //Constructor
|
jpayne@68
|
474 ProcessThread(final SamStreamer ss_, final int tid_){
|
jpayne@68
|
475 ss=ss_;
|
jpayne@68
|
476 tid=tid_;
|
jpayne@68
|
477 }
|
jpayne@68
|
478
|
jpayne@68
|
479 //Called by start()
|
jpayne@68
|
480 @Override
|
jpayne@68
|
481 public void run(){
|
jpayne@68
|
482 //Do anything necessary prior to processing
|
jpayne@68
|
483
|
jpayne@68
|
484 //Process the reads
|
jpayne@68
|
485 processInner();
|
jpayne@68
|
486
|
jpayne@68
|
487 //Do anything necessary after processing
|
jpayne@68
|
488
|
jpayne@68
|
489 //Indicate successful exit status
|
jpayne@68
|
490 success=true;
|
jpayne@68
|
491 }
|
jpayne@68
|
492
|
jpayne@68
|
493 /** Iterate through the reads */
|
jpayne@68
|
494 void processInner(){
|
jpayne@68
|
495
|
jpayne@68
|
496 //Grab and process all lists
|
jpayne@68
|
497 for(ListNum<Read> ln=ss.nextReads(); ln!=null; ln=ss.nextReads()){
|
jpayne@68
|
498 // if(verbose){outstream.println("Got list of size "+list.size());} //Disabled due to non-static access
|
jpayne@68
|
499
|
jpayne@68
|
500 processList(ln);
|
jpayne@68
|
501 }
|
jpayne@68
|
502
|
jpayne@68
|
503 }
|
jpayne@68
|
504
|
jpayne@68
|
505 void processList(ListNum<Read> ln){
|
jpayne@68
|
506
|
jpayne@68
|
507 //Grab the actual read list from the ListNum
|
jpayne@68
|
508 final ArrayList<Read> reads=ln.list;
|
jpayne@68
|
509
|
jpayne@68
|
510 //Loop through each read in the list
|
jpayne@68
|
511 for(int idx=0; idx<reads.size(); idx++){
|
jpayne@68
|
512 final Read r=reads.get(idx);
|
jpayne@68
|
513
|
jpayne@68
|
514 //Validate reads in worker threads
|
jpayne@68
|
515 if(!r.validated()){r.validate(true);}
|
jpayne@68
|
516
|
jpayne@68
|
517 //Track the initial length for statistics
|
jpayne@68
|
518 final int initialLength=r.length();
|
jpayne@68
|
519
|
jpayne@68
|
520 //Increment counters
|
jpayne@68
|
521 readsProcessedT+=r.pairCount();
|
jpayne@68
|
522 basesProcessedT+=initialLength;
|
jpayne@68
|
523
|
jpayne@68
|
524 processRead(r);
|
jpayne@68
|
525 }
|
jpayne@68
|
526 }
|
jpayne@68
|
527
|
jpayne@68
|
528 /**
|
jpayne@68
|
529 * Process a read or a read pair.
|
jpayne@68
|
530 * @param r Read 1
|
jpayne@68
|
531 * @param r2 Read 2 (may be null)
|
jpayne@68
|
532 * @return True if the reads should be kept, false if they should be discarded.
|
jpayne@68
|
533 */
|
jpayne@68
|
534 void processRead(final Read r){
|
jpayne@68
|
535 final SamLine sl=r.samline;
|
jpayne@68
|
536 assert(sl!=null) : sl;
|
jpayne@68
|
537 if(samFilter!=null && !samFilter.passesFilter(sl)){return;}
|
jpayne@68
|
538
|
jpayne@68
|
539 //sl.nextMapped();
|
jpayne@68
|
540 if(sl.mapped() && sl.primary() && !sl.supplementary()){
|
jpayne@68
|
541 final String rname=sl.rnameS();
|
jpayne@68
|
542 Contig scaf=getScaffold(rname);
|
jpayne@68
|
543 if(scaf!=null){
|
jpayne@68
|
544 if(sl.pairedOnSameChrom() && sl.properPair() && sl.leftmost()){
|
jpayne@68
|
545 final int insertSize=calcInsertSize(sl);
|
jpayne@68
|
546 insertCounts.incrementAndGet(Tools.mid(0, insertSize, insertCounts.length()-1));
|
jpayne@68
|
547 totalInsertSumT+=insertSize;
|
jpayne@68
|
548 totalInsertCountT++;
|
jpayne@68
|
549 }
|
jpayne@68
|
550 scaf.add(sl);
|
jpayne@68
|
551
|
jpayne@68
|
552 readsOutT++;
|
jpayne@68
|
553 basesOutT+=r.length();
|
jpayne@68
|
554 }
|
jpayne@68
|
555 }
|
jpayne@68
|
556 if(sl.mapped() && sl.pairedOnSameChrom() && sl.properPair() && sl.primary() && !sl.supplementary() && sl.leftmost()){
|
jpayne@68
|
557 final String rname=sl.rnameS();
|
jpayne@68
|
558
|
jpayne@68
|
559 Contig scaf=getScaffold(rname);
|
jpayne@68
|
560 if(scaf!=null){
|
jpayne@68
|
561 final int insertSize=calcInsertSize(sl);
|
jpayne@68
|
562 insertCounts.incrementAndGet(Tools.mid(0, insertSize, insertCounts.length()-1));
|
jpayne@68
|
563 scaf.add(sl);
|
jpayne@68
|
564
|
jpayne@68
|
565 readsOutT++;
|
jpayne@68
|
566 basesOutT+=r.length();
|
jpayne@68
|
567
|
jpayne@68
|
568 totalInsertSumT+=insertSize;
|
jpayne@68
|
569 totalInsertCountT++;
|
jpayne@68
|
570 }
|
jpayne@68
|
571 }
|
jpayne@68
|
572 }
|
jpayne@68
|
573
|
jpayne@68
|
574 /** Number of reads processed by this thread */
|
jpayne@68
|
575 protected long readsProcessedT=0;
|
jpayne@68
|
576 /** Number of bases processed by this thread */
|
jpayne@68
|
577 protected long basesProcessedT=0;
|
jpayne@68
|
578
|
jpayne@68
|
579 /** Number of reads retained by this thread */
|
jpayne@68
|
580 protected long readsOutT=0;
|
jpayne@68
|
581 /** Number of bases retained by this thread */
|
jpayne@68
|
582 protected long basesOutT=0;
|
jpayne@68
|
583
|
jpayne@68
|
584 protected long totalInsertSumT=0;
|
jpayne@68
|
585 protected long totalInsertCountT=0;
|
jpayne@68
|
586
|
jpayne@68
|
587 long insertSum=0;
|
jpayne@68
|
588
|
jpayne@68
|
589 /** True only if this thread has completed successfully */
|
jpayne@68
|
590 boolean success=false;
|
jpayne@68
|
591
|
jpayne@68
|
592 /** Shared input stream */
|
jpayne@68
|
593 private final SamStreamer ss;
|
jpayne@68
|
594 /** Thread ID */
|
jpayne@68
|
595 final int tid;
|
jpayne@68
|
596 }
|
jpayne@68
|
597
|
jpayne@68
|
598 Contig findLeftmost(Contig source){
|
jpayne@68
|
599 if(verbose){System.err.println("findLeftmost("+source.name+")");}
|
jpayne@68
|
600 while(true) {
|
jpayne@68
|
601 assert(!source.processed());
|
jpayne@68
|
602 if(source.processed()){return null;}
|
jpayne@68
|
603 source.processedLeft=true;
|
jpayne@68
|
604 Edge se=source.bestEdge(true);
|
jpayne@68
|
605 if(verbose){System.err.println("Found source edge "+se);}
|
jpayne@68
|
606 if(se==null){return source;}
|
jpayne@68
|
607 Contig dest=se.dest;
|
jpayne@68
|
608 if(dest.processed()){
|
jpayne@68
|
609 if(verbose){System.err.println("Dest was processed; returning.");}
|
jpayne@68
|
610 return source;
|
jpayne@68
|
611 }
|
jpayne@68
|
612 if(se.sameStrand()){
|
jpayne@68
|
613 if(source.strand==dest.strand){
|
jpayne@68
|
614
|
jpayne@68
|
615 }else{
|
jpayne@68
|
616 if(verbose){System.err.println("Flipping "+dest.name);}
|
jpayne@68
|
617 dest.flip();
|
jpayne@68
|
618 }
|
jpayne@68
|
619 }else{
|
jpayne@68
|
620 if(source.strand==dest.strand){
|
jpayne@68
|
621 if(verbose){System.err.println("Flipping "+dest.name);}
|
jpayne@68
|
622 dest.flip();
|
jpayne@68
|
623 }else{
|
jpayne@68
|
624
|
jpayne@68
|
625 }
|
jpayne@68
|
626 }
|
jpayne@68
|
627 Edge de=dest.bestEdge(false);
|
jpayne@68
|
628 if(verbose){System.err.println("Found dest edge "+de);}
|
jpayne@68
|
629 if(de==null || de.dest!=source){
|
jpayne@68
|
630 if(dest.strand==1){dest.flip();}
|
jpayne@68
|
631 if(verbose){System.err.println("Dest edge did not match; returning.");}
|
jpayne@68
|
632 return source;
|
jpayne@68
|
633 }
|
jpayne@68
|
634 source=dest;
|
jpayne@68
|
635 if(verbose){System.err.println("Migrated to next node.");}
|
jpayne@68
|
636 }
|
jpayne@68
|
637 }
|
jpayne@68
|
638
|
jpayne@68
|
639 Read expandRight(final Contig source0, ByteBuilder bb){
|
jpayne@68
|
640 if(verbose){System.err.println("expandRight("+source0.name+")");}
|
jpayne@68
|
641 bb.clear();
|
jpayne@68
|
642 Contig source=source0;
|
jpayne@68
|
643 while(true) {
|
jpayne@68
|
644 assert(!source.processedRight);
|
jpayne@68
|
645 if(source.processedRight){return null;}
|
jpayne@68
|
646 if(source.strand==1){
|
jpayne@68
|
647 Tools.reverseInPlace(source.depthArray);
|
jpayne@68
|
648 }
|
jpayne@68
|
649 source.processedRight=true;
|
jpayne@68
|
650 bb.append(source.bases);
|
jpayne@68
|
651 Edge se=source.bestEdge(false);
|
jpayne@68
|
652 if(verbose){System.err.println("Found source edge "+se);}
|
jpayne@68
|
653 if(se==null){break;}
|
jpayne@68
|
654 Contig dest=se.dest;
|
jpayne@68
|
655 if(dest.processedRight){
|
jpayne@68
|
656 if(verbose){System.err.println("Dest was processed; returning.");}
|
jpayne@68
|
657 break;
|
jpayne@68
|
658 }
|
jpayne@68
|
659 if(se.sameStrand()){
|
jpayne@68
|
660 if(source.strand==dest.strand){
|
jpayne@68
|
661
|
jpayne@68
|
662 }else{
|
jpayne@68
|
663 if(verbose){System.err.println("Flipping "+dest.name);}
|
jpayne@68
|
664 dest.flip();
|
jpayne@68
|
665 }
|
jpayne@68
|
666 }else{
|
jpayne@68
|
667 if(source.strand==dest.strand){
|
jpayne@68
|
668 if(verbose){System.err.println("Flipping "+dest.name);}
|
jpayne@68
|
669 dest.flip();
|
jpayne@68
|
670 }else{
|
jpayne@68
|
671
|
jpayne@68
|
672 }
|
jpayne@68
|
673 }
|
jpayne@68
|
674 Edge de=dest.bestEdge(true);
|
jpayne@68
|
675 if(verbose){System.err.println("Found dest edge "+de);}
|
jpayne@68
|
676 if(de==null || de.dest!=source){
|
jpayne@68
|
677 if(verbose){System.err.println("Dest edge did not match; returning.");}
|
jpayne@68
|
678 if(dest.strand==1){dest.flip();}
|
jpayne@68
|
679 break;
|
jpayne@68
|
680 }
|
jpayne@68
|
681
|
jpayne@68
|
682 //Now append Ns
|
jpayne@68
|
683 int observedLength=(int)(se.distanceSum/se.count());
|
jpayne@68
|
684 long depth=se.count();
|
jpayne@68
|
685 int depthProxyIndex=(source.length()-Tools.min(source.length()/2, 300));
|
jpayne@68
|
686 long depthProxy=source.depthArray.get(depthProxyIndex);
|
jpayne@68
|
687 int percentile=(int)(buckets*depth/(float)(depth+depthProxy));
|
jpayne@68
|
688 int inferredLength=insertByPercentile[percentile];
|
jpayne@68
|
689
|
jpayne@68
|
690 int Ns=(Tools.max(scaffoldBreakNs, inferredLength-observedLength));
|
jpayne@68
|
691 for(int i=0; i<Ns; i++){bb.append('N');}
|
jpayne@68
|
692 source=dest;
|
jpayne@68
|
693
|
jpayne@68
|
694 gapsAdded++;
|
jpayne@68
|
695 nsAdded+=Ns;
|
jpayne@68
|
696 }
|
jpayne@68
|
697 Read r=new Read(bb.toBytes(), null, source0.name, source0.numericID);
|
jpayne@68
|
698 return r;
|
jpayne@68
|
699 }
|
jpayne@68
|
700
|
jpayne@68
|
701 /*--------------------------------------------------------------*/
|
jpayne@68
|
702
|
jpayne@68
|
703 private class Contig {
|
jpayne@68
|
704
|
jpayne@68
|
705 Contig(String name_, byte[] bases_, long numericID_){
|
jpayne@68
|
706 name=name_;
|
jpayne@68
|
707 bases=bases_;
|
jpayne@68
|
708 numericID=(int)numericID_;
|
jpayne@68
|
709 depthArray=new AtomicIntegerArray(bases.length);
|
jpayne@68
|
710 }
|
jpayne@68
|
711
|
jpayne@68
|
712 public Read makeScaffold(ByteBuilder bb) {
|
jpayne@68
|
713 assert(!processed());
|
jpayne@68
|
714 Contig leftmost=findLeftmost(this);
|
jpayne@68
|
715 return expandRight(leftmost, bb);
|
jpayne@68
|
716 }
|
jpayne@68
|
717
|
jpayne@68
|
718 Edge bestEdge(boolean left) {
|
jpayne@68
|
719 final LinkedHashMap<String, Edge> map=(left ? leftEdgeMap : rightEdgeMap);
|
jpayne@68
|
720 if(map.isEmpty()){return null;}
|
jpayne@68
|
721 long weightSum=0;
|
jpayne@68
|
722 long countSum=0;
|
jpayne@68
|
723 Edge best=null;
|
jpayne@68
|
724 for(Entry<String, Edge> entry : map.entrySet()){
|
jpayne@68
|
725 Edge e=entry.getValue();
|
jpayne@68
|
726 weightSum+=e.weight;
|
jpayne@68
|
727 countSum+=e.count();
|
jpayne@68
|
728 if(best==null || e.weight>best.weight){best=e;}
|
jpayne@68
|
729 }
|
jpayne@68
|
730 if(best.count()<minDepth){return null;}
|
jpayne@68
|
731 if(weightSum*minWeightRatio>best.weight){return null;}
|
jpayne@68
|
732 if(best.strandRatio()<minStrandRatio){return null;}
|
jpayne@68
|
733 if(best.badCount>0.5*best.count()){return null;}
|
jpayne@68
|
734 return best;
|
jpayne@68
|
735 }
|
jpayne@68
|
736
|
jpayne@68
|
737 void add(SamLine sl){
|
jpayne@68
|
738 assert(sl.mapped() && sl.primary() && !sl.supplementary());
|
jpayne@68
|
739 if(sl.nextMapped()){
|
jpayne@68
|
740 if(sl.pairedOnSameChrom()){
|
jpayne@68
|
741 if(!sl.properPair()){
|
jpayne@68
|
742 addCoverageSingleton(sl);
|
jpayne@68
|
743 }else if(sl.leftmost()){
|
jpayne@68
|
744 addCoveragePaired(sl);
|
jpayne@68
|
745 }
|
jpayne@68
|
746 }else{
|
jpayne@68
|
747 addCoverageSingleton(sl);
|
jpayne@68
|
748 handleMixedPair(sl);
|
jpayne@68
|
749 }
|
jpayne@68
|
750 }else{
|
jpayne@68
|
751 addCoverageSingleton(sl);
|
jpayne@68
|
752 }
|
jpayne@68
|
753 }
|
jpayne@68
|
754
|
jpayne@68
|
755 private void addCoverageSingleton(SamLine sl){
|
jpayne@68
|
756 assert(sl.cigar!=null);
|
jpayne@68
|
757 int start=sl.pos-1;
|
jpayne@68
|
758 int stop=start+SamLine.calcCigarLength(sl.cigar, false, false);
|
jpayne@68
|
759
|
jpayne@68
|
760 for(int i=start; i<stop; i++){
|
jpayne@68
|
761 if(i>=0 && i<bases.length){
|
jpayne@68
|
762 depthArray.incrementAndGet(i);
|
jpayne@68
|
763 }
|
jpayne@68
|
764 }
|
jpayne@68
|
765 }
|
jpayne@68
|
766
|
jpayne@68
|
767 private void addCoveragePaired(SamLine sl){
|
jpayne@68
|
768 assert(sl.cigar!=null);
|
jpayne@68
|
769 assert(sl.leftmost() && sl.pairedOnSameChrom() && sl.nextMapped());
|
jpayne@68
|
770 int start=sl.pos-1;
|
jpayne@68
|
771 int stop=start+sl.tlen;
|
jpayne@68
|
772
|
jpayne@68
|
773 for(int i=start; i<stop; i++){
|
jpayne@68
|
774 if(i>=0 && i<bases.length){
|
jpayne@68
|
775 depthArray.incrementAndGet(i);
|
jpayne@68
|
776 }
|
jpayne@68
|
777 }
|
jpayne@68
|
778 }
|
jpayne@68
|
779
|
jpayne@68
|
780 /** Reads mapping to different contigs */
|
jpayne@68
|
781 private void handleMixedPair(SamLine sl){
|
jpayne@68
|
782 assert(sl.mapped() && sl.nextMapped() && !sl.pairedOnSameChrom());
|
jpayne@68
|
783 String rname=sl.rnameS();
|
jpayne@68
|
784 String rnext=sl.rnextS();
|
jpayne@68
|
785 if(rname.equals(rnext)){return;}
|
jpayne@68
|
786
|
jpayne@68
|
787 final boolean left=(sl.strand()==1);
|
jpayne@68
|
788 LinkedHashMap<String, Edge> map=(left ? leftEdgeMap : rightEdgeMap);
|
jpayne@68
|
789 Edge e=map.get(rnext);
|
jpayne@68
|
790 Contig dest=null;
|
jpayne@68
|
791 if(e==null){
|
jpayne@68
|
792 dest=getScaffold(rnext);
|
jpayne@68
|
793 if(dest==null){return;}
|
jpayne@68
|
794 e=new Edge(this, dest, left);
|
jpayne@68
|
795 map.put(rnext, e);
|
jpayne@68
|
796 }
|
jpayne@68
|
797 e.add(sl);
|
jpayne@68
|
798 }
|
jpayne@68
|
799
|
jpayne@68
|
800 void flip(){//Be careful with this
|
jpayne@68
|
801 AminoAcid.reverseComplementBasesInPlace(bases);
|
jpayne@68
|
802 strand^=1;
|
jpayne@68
|
803 LinkedHashMap<String, Edge> temp=leftEdgeMap;
|
jpayne@68
|
804 leftEdgeMap=rightEdgeMap;
|
jpayne@68
|
805 rightEdgeMap=temp;
|
jpayne@68
|
806 }
|
jpayne@68
|
807
|
jpayne@68
|
808 int length(){return bases.length;}
|
jpayne@68
|
809
|
jpayne@68
|
810 final int numericID;
|
jpayne@68
|
811 final String name;
|
jpayne@68
|
812 final byte[] bases;
|
jpayne@68
|
813 final AtomicIntegerArray depthArray;
|
jpayne@68
|
814 int strand=0;
|
jpayne@68
|
815
|
jpayne@68
|
816 boolean processedLeft=false;
|
jpayne@68
|
817 boolean processedRight=false;
|
jpayne@68
|
818 boolean processed(){return processedLeft || processedRight;}
|
jpayne@68
|
819
|
jpayne@68
|
820 LinkedHashMap<String, Edge> leftEdgeMap=new LinkedHashMap<String, Edge>();
|
jpayne@68
|
821 LinkedHashMap<String, Edge> rightEdgeMap=new LinkedHashMap<String, Edge>();
|
jpayne@68
|
822 }
|
jpayne@68
|
823
|
jpayne@68
|
824 private class Edge{
|
jpayne@68
|
825
|
jpayne@68
|
826 Edge(Contig source_, Contig dest_, boolean left_){
|
jpayne@68
|
827 source=source_;
|
jpayne@68
|
828 dest=dest_;
|
jpayne@68
|
829 leftEdge=left_;
|
jpayne@68
|
830 }
|
jpayne@68
|
831
|
jpayne@68
|
832 void add(SamLine sl){
|
jpayne@68
|
833 final boolean sameStrandReads=(sl.strand()==sl.mateStrand());
|
jpayne@68
|
834 final boolean sameStrandContigs=(sameStrandPairs==sameStrandReads);
|
jpayne@68
|
835 final int spos, dpos;
|
jpayne@68
|
836 if(leftEdge){
|
jpayne@68
|
837 spos=sl.pos+sl.calcCigarLength(true, false)-1;
|
jpayne@68
|
838 dpos=(sameStrandContigs ? dest.length()-sl.pnext : sl.pnext+sl.length())-1;
|
jpayne@68
|
839 }else{
|
jpayne@68
|
840 spos=source.length()-sl.pos-1;
|
jpayne@68
|
841 dpos=(sameStrandContigs ? sl.pnext+sl.length() : dest.length()-sl.pnext)-1;
|
jpayne@68
|
842 }
|
jpayne@68
|
843 final int distance=spos+dpos;
|
jpayne@68
|
844
|
jpayne@68
|
845 if(distance>maxPairDist){
|
jpayne@68
|
846
|
jpayne@68
|
847 // assert(false) : "distance="+distance+", spos="+spos+", dpos="+dpos+", sameStrandContigs="+sameStrandContigs+
|
jpayne@68
|
848 // "\nsl.pos="+sl.pos+", sl.pnext="+sl.pnext+", strand="+sl.strand()+", nextStrand="+sl.mateStrand()+", left="+leftEdge
|
jpayne@68
|
849 // +"\n"+sl;
|
jpayne@68
|
850 // badCount++;
|
jpayne@68
|
851 return;
|
jpayne@68
|
852 }
|
jpayne@68
|
853
|
jpayne@68
|
854 distanceSum+=distance;
|
jpayne@68
|
855
|
jpayne@68
|
856 weight+=sl.mapq;
|
jpayne@68
|
857 if(sameStrandContigs){
|
jpayne@68
|
858 sameStrandCount++;
|
jpayne@68
|
859 }else{
|
jpayne@68
|
860 difStrandCount++;
|
jpayne@68
|
861 }
|
jpayne@68
|
862 // assert(false) : weight;
|
jpayne@68
|
863 }
|
jpayne@68
|
864
|
jpayne@68
|
865 public float strandRatio() {
|
jpayne@68
|
866 return Tools.max(sameStrandCount, difStrandCount)/(float)(sameStrandCount+difStrandCount);
|
jpayne@68
|
867 }
|
jpayne@68
|
868
|
jpayne@68
|
869 public boolean sameStrand(){
|
jpayne@68
|
870 return sameStrandCount>=difStrandCount;
|
jpayne@68
|
871 }
|
jpayne@68
|
872
|
jpayne@68
|
873 @Override
|
jpayne@68
|
874 public String toString(){
|
jpayne@68
|
875 return "("+source.name+"->"+dest.name+", "+(leftEdge ? "left" : "right")+", weight="+weight+
|
jpayne@68
|
876 ", same="+sameStrandCount+", dif="+difStrandCount+", bad="+badCount+")";
|
jpayne@68
|
877 }
|
jpayne@68
|
878
|
jpayne@68
|
879 long count(){return sameStrandCount+difStrandCount;}
|
jpayne@68
|
880
|
jpayne@68
|
881 final Contig source;
|
jpayne@68
|
882 final Contig dest;
|
jpayne@68
|
883 long sameStrandCount;
|
jpayne@68
|
884 long difStrandCount;
|
jpayne@68
|
885 long distanceSum;
|
jpayne@68
|
886 long weight;
|
jpayne@68
|
887 long badCount;
|
jpayne@68
|
888 final boolean leftEdge;
|
jpayne@68
|
889 }
|
jpayne@68
|
890
|
jpayne@68
|
891 /*--------------------------------------------------------------*/
|
jpayne@68
|
892 /*---------------- Fields ----------------*/
|
jpayne@68
|
893 /*--------------------------------------------------------------*/
|
jpayne@68
|
894
|
jpayne@68
|
895 /** Primary input file path */
|
jpayne@68
|
896 private String in=null;
|
jpayne@68
|
897 /** Secondary input file path */
|
jpayne@68
|
898 private String ref=null;
|
jpayne@68
|
899
|
jpayne@68
|
900 /** Primary output file path */
|
jpayne@68
|
901 private String out=null;
|
jpayne@68
|
902
|
jpayne@68
|
903 /** Override input file extension */
|
jpayne@68
|
904 private String extin=null;
|
jpayne@68
|
905 /** Override output file extension */
|
jpayne@68
|
906 private String extout=null;
|
jpayne@68
|
907
|
jpayne@68
|
908 private String insertList=null;
|
jpayne@68
|
909
|
jpayne@68
|
910 /*--------------------------------------------------------------*/
|
jpayne@68
|
911
|
jpayne@68
|
912 /** Number of reads processed */
|
jpayne@68
|
913 protected long readsProcessed=0;
|
jpayne@68
|
914 /** Number of bases processed */
|
jpayne@68
|
915 protected long basesProcessed=0;
|
jpayne@68
|
916
|
jpayne@68
|
917 /** Number of reads retained */
|
jpayne@68
|
918 protected long readsOut=0;
|
jpayne@68
|
919 /** Number of bases retained */
|
jpayne@68
|
920 protected long basesOut=0;
|
jpayne@68
|
921
|
jpayne@68
|
922 protected long scaffoldsOut=0;
|
jpayne@68
|
923 protected long scaffoldLengthOut=0;
|
jpayne@68
|
924
|
jpayne@68
|
925 protected long totalInsertSum=0;
|
jpayne@68
|
926 protected long totalInsertCount=0;
|
jpayne@68
|
927 protected double totalAverageInsert;
|
jpayne@68
|
928
|
jpayne@68
|
929 /** Quit after processing this many input reads; -1 means no limit */
|
jpayne@68
|
930 private long maxReads=-1;
|
jpayne@68
|
931
|
jpayne@68
|
932 boolean sameStrandPairs=false;
|
jpayne@68
|
933
|
jpayne@68
|
934 int gapsAdded=0;
|
jpayne@68
|
935 long nsAdded=0;
|
jpayne@68
|
936
|
jpayne@68
|
937 /*--------------------------------------------------------------*/
|
jpayne@68
|
938
|
jpayne@68
|
939 /** Threads dedicated to reading the sam file */
|
jpayne@68
|
940 private int streamerThreads=SamStreamer.DEFAULT_THREADS;
|
jpayne@68
|
941
|
jpayne@68
|
942 private boolean loadedRef=false;
|
jpayne@68
|
943
|
jpayne@68
|
944 private int minDepth=4;
|
jpayne@68
|
945
|
jpayne@68
|
946 private float minWeightRatio=0.8f;
|
jpayne@68
|
947 private float minStrandRatio=0.8f;
|
jpayne@68
|
948
|
jpayne@68
|
949 private int scaffoldBreakNs=10;
|
jpayne@68
|
950
|
jpayne@68
|
951 private int maxPairDist=3000;
|
jpayne@68
|
952
|
jpayne@68
|
953 private int buckets=1000;
|
jpayne@68
|
954 protected AtomicLongArray insertCounts=new AtomicLongArray(20000);
|
jpayne@68
|
955 protected int[] insertByPercentile;
|
jpayne@68
|
956
|
jpayne@68
|
957 public final SamFilter samFilter=new SamFilter();
|
jpayne@68
|
958
|
jpayne@68
|
959 /** Uses full ref names */
|
jpayne@68
|
960 public LinkedHashMap<String, Contig> refMap=new LinkedHashMap<String, Contig>();
|
jpayne@68
|
961 /** Uses truncated ref names */
|
jpayne@68
|
962 public LinkedHashMap<String, Contig> refMap2=new LinkedHashMap<String, Contig>();
|
jpayne@68
|
963
|
jpayne@68
|
964 /*--------------------------------------------------------------*/
|
jpayne@68
|
965 /*---------------- Final Fields ----------------*/
|
jpayne@68
|
966 /*--------------------------------------------------------------*/
|
jpayne@68
|
967
|
jpayne@68
|
968 /** Primary input file */
|
jpayne@68
|
969 private final FileFormat ffin;
|
jpayne@68
|
970 /** Secondary input file */
|
jpayne@68
|
971 private final FileFormat ffref;
|
jpayne@68
|
972
|
jpayne@68
|
973 /** Primary output file */
|
jpayne@68
|
974 private final FileFormat ffout;
|
jpayne@68
|
975
|
jpayne@68
|
976 /*--------------------------------------------------------------*/
|
jpayne@68
|
977 /*---------------- Common Fields ----------------*/
|
jpayne@68
|
978 /*--------------------------------------------------------------*/
|
jpayne@68
|
979
|
jpayne@68
|
980 /** Print status messages to this output stream */
|
jpayne@68
|
981 private PrintStream outstream=System.err;
|
jpayne@68
|
982 /** Print verbose messages */
|
jpayne@68
|
983 public static boolean verbose=false;
|
jpayne@68
|
984 /** True if an error was encountered */
|
jpayne@68
|
985 public boolean errorState=false;
|
jpayne@68
|
986 /** Overwrite existing output files */
|
jpayne@68
|
987 private boolean overwrite=false;
|
jpayne@68
|
988 /** Append to existing output files */
|
jpayne@68
|
989 private boolean append=false;
|
jpayne@68
|
990 /** Reads are output in input order */
|
jpayne@68
|
991 private boolean ordered=false;
|
jpayne@68
|
992
|
jpayne@68
|
993 }
|