comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/consensus/Lilypad.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 package consensus;
2
3 import java.io.PrintStream;
4 import java.util.ArrayList;
5 import java.util.LinkedHashMap;
6 import java.util.Map.Entry;
7 import java.util.concurrent.atomic.AtomicIntegerArray;
8 import java.util.concurrent.atomic.AtomicLongArray;
9
10 import dna.AminoAcid;
11 import fileIO.ByteFile;
12 import fileIO.FileFormat;
13 import fileIO.ReadWrite;
14 import shared.Parse;
15 import shared.Parser;
16 import shared.PreParser;
17 import shared.ReadStats;
18 import shared.Shared;
19 import shared.Timer;
20 import shared.Tools;
21 import stream.ConcurrentReadInputStream;
22 import stream.ConcurrentReadOutputStream;
23 import stream.FastaReadInputStream;
24 import stream.Read;
25 import stream.SamLine;
26 import stream.SamReadStreamer;
27 import stream.SamStreamer;
28 import structures.ByteBuilder;
29 import structures.ListNum;
30 import template.Accumulator;
31 import template.ThreadWaiter;
32 import var2.SamFilter;
33
34 /**
35 * Scaffolds contigs based on paired read mapping.
36 *
37 * @author Brian Bushnell
38 * @date September 11, 2019
39 *
40 */
41 public class Lilypad implements Accumulator<Lilypad.ProcessThread> {
42
43 /*--------------------------------------------------------------*/
44 /*---------------- Initialization ----------------*/
45 /*--------------------------------------------------------------*/
46
47 /**
48 * Code entrance from the command line.
49 * @param args Command line arguments
50 */
51 public static void main(String[] args){
52 //Start a timer immediately upon code entrance.
53 Timer t=new Timer();
54
55 //Create an instance of this class
56 Lilypad x=new Lilypad(args);
57
58 //Run the object
59 x.process(t);
60
61 //Close the print stream if it was redirected
62 Shared.closeStream(x.outstream);
63 }
64
65 /**
66 * Constructor.
67 * @param args Command line arguments
68 */
69 public Lilypad(String[] args){
70
71 {//Preparse block for help, config files, and outstream
72 PreParser pp=new PreParser(args, getClass(), false);
73 args=pp.args;
74 outstream=pp.outstream;
75 }
76
77 //Set shared static variables prior to parsing
78 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
79 ReadWrite.MAX_ZIP_THREADS=Shared.threads();
80 SamLine.RNAME_AS_BYTES=false;
81
82 samFilter.includeUnmapped=false;
83 samFilter.includeSupplimentary=false;
84 // samFilter.includeDuplicate=false;
85 samFilter.includeNonPrimary=false;
86 samFilter.includeQfail=false;
87 samFilter.minMapq=4;
88
89 {//Parse the arguments
90 final Parser parser=parse(args);
91
92 Parser.processQuality();
93
94 maxReads=parser.maxReads;
95 overwrite=ReadStats.overwrite=parser.overwrite;
96 append=ReadStats.append=parser.append;
97
98 in=parser.in1;
99 extin=parser.extin;
100
101 out=parser.out1;
102 extout=parser.extout;
103 }
104
105 {
106 // if("auto".equalsIgnoreCase(atomic)){Scaffold.setCA3A(Shared.threads()>8);}
107 // else{Scaffold.setCA3A(Parse.parseBoolean(atomic));}
108 samFilter.setSamtoolsFilter();
109
110 streamerThreads=Tools.max(1, Tools.min(streamerThreads, Shared.threads()));
111 assert(streamerThreads>0) : streamerThreads;
112 }
113
114 validateParams();
115 fixExtensions(); //Add or remove .gz or .bz2 as needed
116 checkFileExistence(); //Ensure files can be read and written
117 checkStatics(); //Adjust file-related static fields as needed for this program
118
119 //Create output FileFormat objects
120 ffout=FileFormat.testOutput(out, FileFormat.FASTA, extout, true, overwrite, append, ordered);
121
122 //Create input FileFormat objects
123 ffin=FileFormat.testInput(in, FileFormat.SAM, extin, true, true);
124 ffref=FileFormat.testInput(ref, FileFormat.FASTA, null, true, true);
125 }
126
127 /*--------------------------------------------------------------*/
128 /*---------------- Initialization Helpers ----------------*/
129 /*--------------------------------------------------------------*/
130
131 /** Parse arguments from the command line */
132 private Parser parse(String[] args){
133
134 //Create a parser object
135 Parser parser=new Parser();
136
137 //Set any necessary Parser defaults here
138 //parser.foo=bar;
139
140 //Parse each argument
141 for(int i=0; i<args.length; i++){
142 String arg=args[i];
143
144 //Break arguments into their constituent parts, in the form of "a=b"
145 String[] split=arg.split("=");
146 String a=split[0].toLowerCase();
147 String b=split.length>1 ? split[1] : null;
148 if(b!=null && b.equalsIgnoreCase("null")){b=null;}
149
150 if(a.equals("verbose")){
151 verbose=Parse.parseBoolean(b);
152 }else if(a.equals("ref") || a.equals("scaffolds")){
153 ref=b;
154 }else if(a.equals("insertlist")){
155 insertList=b;
156 }else if(a.equals("ordered")){
157 ordered=Parse.parseBoolean(b);
158 }else if(a.equalsIgnoreCase("sameStrandPairs")){
159 sameStrandPairs=Parse.parseBoolean(b);
160 }else if(a.equalsIgnoreCase("ns") || a.equalsIgnoreCase("n") || a.equalsIgnoreCase("scaffoldbreak") || a.equalsIgnoreCase("gap") || a.equalsIgnoreCase("mingap")){
161 scaffoldBreakNs=Integer.parseInt(b);
162 assert(scaffoldBreakNs>0);
163 }else if(a.equalsIgnoreCase("mindepth")){
164 minDepth=Integer.parseInt(b);
165 assert(minDepth>0);
166 }else if(a.equalsIgnoreCase("maxinsert")){
167 maxPairDist=Parse.parseIntKMG(b);
168 }else if(a.equalsIgnoreCase("minWeightRatio") || a.equalsIgnoreCase("minwr")){
169 minWeightRatio=Float.parseFloat(b);
170 }else if(a.equalsIgnoreCase("minStrandRatio") || a.equalsIgnoreCase("minsr")){
171 minStrandRatio=Float.parseFloat(b);
172 }else if(a.equals("clearfilters") || a.equals("clearfilter")){
173 if(Parse.parseBoolean(b)){
174 samFilter.clear();
175 }
176 }else if(a.equals("parse_flag_goes_here")){
177 long fake_variable=Parse.parseKMG(b);
178 //Set a variable here
179 }else if(samFilter.parse(arg, a, b)){
180 //do nothing
181 }else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
182 //do nothing
183 }else{
184 outstream.println("Unknown parameter "+args[i]);
185 assert(false) : "Unknown parameter "+args[i];
186 }
187 }
188
189 return parser;
190 }
191
192 /** Add or remove .gz or .bz2 as needed */
193 private void fixExtensions(){
194 in=Tools.fixExtension(in);
195 ref=Tools.fixExtension(ref);
196 }
197
198 /** Ensure files can be read and written */
199 private void checkFileExistence(){
200
201 //Ensure there is an input file
202 if(in==null){throw new RuntimeException("Error - an input file is required.");}
203
204 //Ensure there is an input file
205 if(ref==null){throw new RuntimeException("Error - a reference file is required.");}
206
207 //Ensure output files can be written
208 if(!Tools.testOutputFiles(overwrite, append, false, out)){
209 outstream.println((out==null)+", "+out);
210 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n");
211 }
212
213 //Ensure input files can be read
214 if(!Tools.testInputFiles(false, true, in, ref)){
215 throw new RuntimeException("\nCan't read some input files.\n");
216 }
217
218 //Ensure that no file was specified multiple times
219 if(!Tools.testForDuplicateFiles(true, in, ref, out)){
220 throw new RuntimeException("\nSome file names were specified multiple times.\n");
221 }
222 }
223
224 /** Adjust file-related static fields as needed for this program */
225 private static void checkStatics(){
226 //Adjust the number of threads for input file reading
227 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
228 ByteFile.FORCE_MODE_BF2=true;
229 }
230
231 assert(FastaReadInputStream.settingsOK());
232 }
233
234 /** Ensure parameter ranges are within bounds and required parameters are set */
235 private boolean validateParams(){
236 // assert(minfoo>0 && minfoo<=maxfoo) : minfoo+", "+maxfoo;
237 return true;
238 }
239
240 /*--------------------------------------------------------------*/
241 /*---------------- Outer Methods ----------------*/
242 /*--------------------------------------------------------------*/
243
244 /** Create read streams and process all data */
245 void process(Timer t){
246
247 //Turn off read validation in the input threads to increase speed
248 final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
249 Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
250
251 //Create a read input stream
252 final SamStreamer ss=makeStreamer(ffin);
253
254 //Load reference
255 loadReferenceCustom();
256
257 //Reset counters
258 readsProcessed=readsOut=0;
259 basesProcessed=basesOut=0;
260
261 //Process the reads in separate threads
262 spawnThreads(ss);
263
264 //Optionally create a read output stream
265 final ConcurrentReadOutputStream ros=makeCros();
266
267 if(verbose){outstream.println("Fixing reference.");}
268
269 makeScaffolds(ros);
270
271 if(verbose){outstream.println("Finished; closing streams.");}
272
273 //Write anything that was accumulated by ReadStats
274 errorState|=ReadStats.writeAll();
275 //Close the read streams
276 errorState|=ReadWrite.closeStream(ros);
277
278 //Reset read validation
279 Read.VALIDATE_IN_CONSTRUCTOR=vic;
280
281 //Report timing and results
282 t.stop();
283 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
284 outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, scaffoldsOut, scaffoldLengthOut, 8, false));
285
286 outstream.println();
287 outstream.println(Tools.number("Average Insert", totalAverageInsert, 2, 8));
288 outstream.println(Tools.number("Joins Made ", gapsAdded, 8));
289 outstream.println(Tools.number("Ns Added ", nsAdded, 8));
290 outstream.println(Tools.number("Contigs In ", refMap.size(), 8));
291 outstream.println(Tools.number("Scaffolds Out ", scaffoldsOut, 8));
292
293
294 //Throw an exception of there was an error in a thread
295 if(errorState){
296 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
297 }
298 }
299
300 private synchronized void loadReferenceCustom(){
301 assert(!loadedRef);
302 ConcurrentReadInputStream cris=makeRefCris();
303 for(ListNum<Read> ln=cris.nextList(); ln!=null && ln.size()>0; ln=cris.nextList()) {
304 for(Read r : ln){
305 String name=r.id;
306 String name2=Tools.trimToWhitespace(r.id);
307 Contig cont=new Contig(name, r.bases, r.numericID);
308 refMap.put(name, cont);
309 refMap2.put(name2, cont);
310 }
311 cris.returnList(ln);
312 }
313 ReadWrite.closeStream(cris);
314 loadedRef=true;
315 }
316
317 private ConcurrentReadInputStream makeRefCris(){
318 ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffref, null);
319 cris.start(); //Start the stream
320 if(verbose){outstream.println("Started cris");}
321 boolean paired=cris.paired();
322 assert(!paired) : "References should not be paired.";
323 return cris;
324 }
325
326 private SamStreamer makeStreamer(FileFormat ff){
327 if(ff==null){return null;}
328 SamStreamer ss=new SamReadStreamer(ff, streamerThreads, true, maxReads);
329 ss.start(); //Start the stream
330 if(verbose){outstream.println("Started Streamer");}
331 return ss;
332 }
333
334 private ConcurrentReadOutputStream makeCros(){
335 if(ffout==null){return null;}
336
337 //Select output buffer size based on whether it needs to be ordered
338 final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8);
339
340 final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ffout, null, buff, null, false);
341 ros.start(); //Start the stream
342 return ros;
343 }
344
345 /*--------------------------------------------------------------*/
346 /*---------------- Thread Management ----------------*/
347 /*--------------------------------------------------------------*/
348
349 /** Spawn process threads */
350 private void spawnThreads(final SamStreamer ss){
351
352 //Do anything necessary prior to processing
353
354 //Determine how many threads may be used
355 final int threads=Shared.threads();
356
357 //Fill a list with ProcessThreads
358 ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
359 for(int i=0; i<threads; i++){
360 alpt.add(new ProcessThread(ss, i));
361 }
362
363 //Start the threads
364 for(ProcessThread pt : alpt){
365 pt.start();
366 }
367
368 //Wait for threads to finish
369 boolean success=ThreadWaiter.waitForThreads(alpt, this);
370 errorState&=!success;
371
372 //Do anything necessary after processing
373 totalAverageInsert=totalInsertSum/(double)totalInsertCount;
374 insertByPercentile=Tools.makeHistogram(insertCounts, buckets);
375 }
376
377 @Override
378 public final void accumulate(ProcessThread pt){
379 readsProcessed+=pt.readsProcessedT;
380 basesProcessed+=pt.basesProcessedT;
381 readsOut+=pt.readsOutT;
382 basesOut+=pt.basesOutT;
383
384 totalInsertSum+=pt.totalInsertSumT;
385 totalInsertCount+=pt.totalInsertCountT;
386
387 errorState|=(!pt.success);
388 }
389
390 @Override
391 public final boolean success(){return !errorState;}
392
393 /*--------------------------------------------------------------*/
394 /*---------------- Inner Methods ----------------*/
395 /*--------------------------------------------------------------*/
396
397 private void makeScaffolds(ConcurrentReadOutputStream ros){
398 ByteBuilder bb=new ByteBuilder(1000000);
399
400 ArrayList<Read> list=new ArrayList<Read>(200);
401 long num=0;
402 long lengthSum=0;
403 for(Entry<String, Contig> e : refMap.entrySet()){
404 Contig cont=e.getValue();
405 if(!cont.processed()){
406 Read r=cont.makeScaffold(bb);
407 assert(r!=null);
408
409 lengthSum+=r.length();
410 list.add(r);
411 scaffoldsOut++;
412 scaffoldLengthOut+=r.length();
413
414 if(list.size()>=200 || lengthSum>=100000){
415 if(ros!=null){ros.add(list, num);}
416 list=new ArrayList<Read>(200);
417 num++;
418 lengthSum=0;
419 }
420 assert(cont.processed());
421 }
422 }
423 if(list.size()>0){
424 if(ros!=null){ros.add(list, num);}
425 }
426 }
427
428 private static int calcInsertSize(SamLine sl) {
429 assert(sl.mapped() && sl.pairedOnSameChrom());
430 assert(sl.primary());
431 assert(!sl.supplementary());
432 assert(sl.leftmost());
433
434 assert(sl.tlen>0) : sl.tlen+"\n\n"+sl;
435 return sl.tlen>0 ? sl.tlen : -sl.tlen;
436
437 // final int insertSize;
438 // String insertTag=null;
439 // if(sl.optional!=null){
440 // for(String s : sl.optional){
441 // if(s.startsWith("X8:Z:")){
442 // insertTag=s;
443 // break;
444 // }
445 // }
446 // }
447 // if(insertTag!=null){
448 // insertSize=Integer.parseInt(insertTag.substring(5));
449 // }else{
450 // insertSize=sl.tlen;//This is unsafe due to indels.
451 // assert(false) : "Reads need insert size tags.";
452 // }
453 // assert(insertSize>0) : sl;
454 //
455 // return insertSize;
456 }
457
458 private Contig getScaffold(String rname){
459 Contig scaf=refMap.get(rname);
460 if(scaf==null){scaf=refMap2.get(Tools.trimToWhitespace(rname));}
461 assert(scaf!=null) : "Can't find graph for "+rname;
462 return scaf;
463 }
464
465 /*--------------------------------------------------------------*/
466 /*---------------- Inner Classes ----------------*/
467 /*--------------------------------------------------------------*/
468
469 /** This class is static to prevent accidental writing to shared variables.
470 * It is safe to remove the static modifier. */
471 class ProcessThread extends Thread {
472
473 //Constructor
474 ProcessThread(final SamStreamer ss_, final int tid_){
475 ss=ss_;
476 tid=tid_;
477 }
478
479 //Called by start()
480 @Override
481 public void run(){
482 //Do anything necessary prior to processing
483
484 //Process the reads
485 processInner();
486
487 //Do anything necessary after processing
488
489 //Indicate successful exit status
490 success=true;
491 }
492
493 /** Iterate through the reads */
494 void processInner(){
495
496 //Grab and process all lists
497 for(ListNum<Read> ln=ss.nextReads(); ln!=null; ln=ss.nextReads()){
498 // if(verbose){outstream.println("Got list of size "+list.size());} //Disabled due to non-static access
499
500 processList(ln);
501 }
502
503 }
504
505 void processList(ListNum<Read> ln){
506
507 //Grab the actual read list from the ListNum
508 final ArrayList<Read> reads=ln.list;
509
510 //Loop through each read in the list
511 for(int idx=0; idx<reads.size(); idx++){
512 final Read r=reads.get(idx);
513
514 //Validate reads in worker threads
515 if(!r.validated()){r.validate(true);}
516
517 //Track the initial length for statistics
518 final int initialLength=r.length();
519
520 //Increment counters
521 readsProcessedT+=r.pairCount();
522 basesProcessedT+=initialLength;
523
524 processRead(r);
525 }
526 }
527
528 /**
529 * Process a read or a read pair.
530 * @param r Read 1
531 * @param r2 Read 2 (may be null)
532 * @return True if the reads should be kept, false if they should be discarded.
533 */
534 void processRead(final Read r){
535 final SamLine sl=r.samline;
536 assert(sl!=null) : sl;
537 if(samFilter!=null && !samFilter.passesFilter(sl)){return;}
538
539 //sl.nextMapped();
540 if(sl.mapped() && sl.primary() && !sl.supplementary()){
541 final String rname=sl.rnameS();
542 Contig scaf=getScaffold(rname);
543 if(scaf!=null){
544 if(sl.pairedOnSameChrom() && sl.properPair() && sl.leftmost()){
545 final int insertSize=calcInsertSize(sl);
546 insertCounts.incrementAndGet(Tools.mid(0, insertSize, insertCounts.length()-1));
547 totalInsertSumT+=insertSize;
548 totalInsertCountT++;
549 }
550 scaf.add(sl);
551
552 readsOutT++;
553 basesOutT+=r.length();
554 }
555 }
556 if(sl.mapped() && sl.pairedOnSameChrom() && sl.properPair() && sl.primary() && !sl.supplementary() && sl.leftmost()){
557 final String rname=sl.rnameS();
558
559 Contig scaf=getScaffold(rname);
560 if(scaf!=null){
561 final int insertSize=calcInsertSize(sl);
562 insertCounts.incrementAndGet(Tools.mid(0, insertSize, insertCounts.length()-1));
563 scaf.add(sl);
564
565 readsOutT++;
566 basesOutT+=r.length();
567
568 totalInsertSumT+=insertSize;
569 totalInsertCountT++;
570 }
571 }
572 }
573
574 /** Number of reads processed by this thread */
575 protected long readsProcessedT=0;
576 /** Number of bases processed by this thread */
577 protected long basesProcessedT=0;
578
579 /** Number of reads retained by this thread */
580 protected long readsOutT=0;
581 /** Number of bases retained by this thread */
582 protected long basesOutT=0;
583
584 protected long totalInsertSumT=0;
585 protected long totalInsertCountT=0;
586
587 long insertSum=0;
588
589 /** True only if this thread has completed successfully */
590 boolean success=false;
591
592 /** Shared input stream */
593 private final SamStreamer ss;
594 /** Thread ID */
595 final int tid;
596 }
597
598 Contig findLeftmost(Contig source){
599 if(verbose){System.err.println("findLeftmost("+source.name+")");}
600 while(true) {
601 assert(!source.processed());
602 if(source.processed()){return null;}
603 source.processedLeft=true;
604 Edge se=source.bestEdge(true);
605 if(verbose){System.err.println("Found source edge "+se);}
606 if(se==null){return source;}
607 Contig dest=se.dest;
608 if(dest.processed()){
609 if(verbose){System.err.println("Dest was processed; returning.");}
610 return source;
611 }
612 if(se.sameStrand()){
613 if(source.strand==dest.strand){
614
615 }else{
616 if(verbose){System.err.println("Flipping "+dest.name);}
617 dest.flip();
618 }
619 }else{
620 if(source.strand==dest.strand){
621 if(verbose){System.err.println("Flipping "+dest.name);}
622 dest.flip();
623 }else{
624
625 }
626 }
627 Edge de=dest.bestEdge(false);
628 if(verbose){System.err.println("Found dest edge "+de);}
629 if(de==null || de.dest!=source){
630 if(dest.strand==1){dest.flip();}
631 if(verbose){System.err.println("Dest edge did not match; returning.");}
632 return source;
633 }
634 source=dest;
635 if(verbose){System.err.println("Migrated to next node.");}
636 }
637 }
638
639 Read expandRight(final Contig source0, ByteBuilder bb){
640 if(verbose){System.err.println("expandRight("+source0.name+")");}
641 bb.clear();
642 Contig source=source0;
643 while(true) {
644 assert(!source.processedRight);
645 if(source.processedRight){return null;}
646 if(source.strand==1){
647 Tools.reverseInPlace(source.depthArray);
648 }
649 source.processedRight=true;
650 bb.append(source.bases);
651 Edge se=source.bestEdge(false);
652 if(verbose){System.err.println("Found source edge "+se);}
653 if(se==null){break;}
654 Contig dest=se.dest;
655 if(dest.processedRight){
656 if(verbose){System.err.println("Dest was processed; returning.");}
657 break;
658 }
659 if(se.sameStrand()){
660 if(source.strand==dest.strand){
661
662 }else{
663 if(verbose){System.err.println("Flipping "+dest.name);}
664 dest.flip();
665 }
666 }else{
667 if(source.strand==dest.strand){
668 if(verbose){System.err.println("Flipping "+dest.name);}
669 dest.flip();
670 }else{
671
672 }
673 }
674 Edge de=dest.bestEdge(true);
675 if(verbose){System.err.println("Found dest edge "+de);}
676 if(de==null || de.dest!=source){
677 if(verbose){System.err.println("Dest edge did not match; returning.");}
678 if(dest.strand==1){dest.flip();}
679 break;
680 }
681
682 //Now append Ns
683 int observedLength=(int)(se.distanceSum/se.count());
684 long depth=se.count();
685 int depthProxyIndex=(source.length()-Tools.min(source.length()/2, 300));
686 long depthProxy=source.depthArray.get(depthProxyIndex);
687 int percentile=(int)(buckets*depth/(float)(depth+depthProxy));
688 int inferredLength=insertByPercentile[percentile];
689
690 int Ns=(Tools.max(scaffoldBreakNs, inferredLength-observedLength));
691 for(int i=0; i<Ns; i++){bb.append('N');}
692 source=dest;
693
694 gapsAdded++;
695 nsAdded+=Ns;
696 }
697 Read r=new Read(bb.toBytes(), null, source0.name, source0.numericID);
698 return r;
699 }
700
701 /*--------------------------------------------------------------*/
702
703 private class Contig {
704
705 Contig(String name_, byte[] bases_, long numericID_){
706 name=name_;
707 bases=bases_;
708 numericID=(int)numericID_;
709 depthArray=new AtomicIntegerArray(bases.length);
710 }
711
712 public Read makeScaffold(ByteBuilder bb) {
713 assert(!processed());
714 Contig leftmost=findLeftmost(this);
715 return expandRight(leftmost, bb);
716 }
717
718 Edge bestEdge(boolean left) {
719 final LinkedHashMap<String, Edge> map=(left ? leftEdgeMap : rightEdgeMap);
720 if(map.isEmpty()){return null;}
721 long weightSum=0;
722 long countSum=0;
723 Edge best=null;
724 for(Entry<String, Edge> entry : map.entrySet()){
725 Edge e=entry.getValue();
726 weightSum+=e.weight;
727 countSum+=e.count();
728 if(best==null || e.weight>best.weight){best=e;}
729 }
730 if(best.count()<minDepth){return null;}
731 if(weightSum*minWeightRatio>best.weight){return null;}
732 if(best.strandRatio()<minStrandRatio){return null;}
733 if(best.badCount>0.5*best.count()){return null;}
734 return best;
735 }
736
737 void add(SamLine sl){
738 assert(sl.mapped() && sl.primary() && !sl.supplementary());
739 if(sl.nextMapped()){
740 if(sl.pairedOnSameChrom()){
741 if(!sl.properPair()){
742 addCoverageSingleton(sl);
743 }else if(sl.leftmost()){
744 addCoveragePaired(sl);
745 }
746 }else{
747 addCoverageSingleton(sl);
748 handleMixedPair(sl);
749 }
750 }else{
751 addCoverageSingleton(sl);
752 }
753 }
754
755 private void addCoverageSingleton(SamLine sl){
756 assert(sl.cigar!=null);
757 int start=sl.pos-1;
758 int stop=start+SamLine.calcCigarLength(sl.cigar, false, false);
759
760 for(int i=start; i<stop; i++){
761 if(i>=0 && i<bases.length){
762 depthArray.incrementAndGet(i);
763 }
764 }
765 }
766
767 private void addCoveragePaired(SamLine sl){
768 assert(sl.cigar!=null);
769 assert(sl.leftmost() && sl.pairedOnSameChrom() && sl.nextMapped());
770 int start=sl.pos-1;
771 int stop=start+sl.tlen;
772
773 for(int i=start; i<stop; i++){
774 if(i>=0 && i<bases.length){
775 depthArray.incrementAndGet(i);
776 }
777 }
778 }
779
780 /** Reads mapping to different contigs */
781 private void handleMixedPair(SamLine sl){
782 assert(sl.mapped() && sl.nextMapped() && !sl.pairedOnSameChrom());
783 String rname=sl.rnameS();
784 String rnext=sl.rnextS();
785 if(rname.equals(rnext)){return;}
786
787 final boolean left=(sl.strand()==1);
788 LinkedHashMap<String, Edge> map=(left ? leftEdgeMap : rightEdgeMap);
789 Edge e=map.get(rnext);
790 Contig dest=null;
791 if(e==null){
792 dest=getScaffold(rnext);
793 if(dest==null){return;}
794 e=new Edge(this, dest, left);
795 map.put(rnext, e);
796 }
797 e.add(sl);
798 }
799
800 void flip(){//Be careful with this
801 AminoAcid.reverseComplementBasesInPlace(bases);
802 strand^=1;
803 LinkedHashMap<String, Edge> temp=leftEdgeMap;
804 leftEdgeMap=rightEdgeMap;
805 rightEdgeMap=temp;
806 }
807
808 int length(){return bases.length;}
809
810 final int numericID;
811 final String name;
812 final byte[] bases;
813 final AtomicIntegerArray depthArray;
814 int strand=0;
815
816 boolean processedLeft=false;
817 boolean processedRight=false;
818 boolean processed(){return processedLeft || processedRight;}
819
820 LinkedHashMap<String, Edge> leftEdgeMap=new LinkedHashMap<String, Edge>();
821 LinkedHashMap<String, Edge> rightEdgeMap=new LinkedHashMap<String, Edge>();
822 }
823
824 private class Edge{
825
826 Edge(Contig source_, Contig dest_, boolean left_){
827 source=source_;
828 dest=dest_;
829 leftEdge=left_;
830 }
831
832 void add(SamLine sl){
833 final boolean sameStrandReads=(sl.strand()==sl.mateStrand());
834 final boolean sameStrandContigs=(sameStrandPairs==sameStrandReads);
835 final int spos, dpos;
836 if(leftEdge){
837 spos=sl.pos+sl.calcCigarLength(true, false)-1;
838 dpos=(sameStrandContigs ? dest.length()-sl.pnext : sl.pnext+sl.length())-1;
839 }else{
840 spos=source.length()-sl.pos-1;
841 dpos=(sameStrandContigs ? sl.pnext+sl.length() : dest.length()-sl.pnext)-1;
842 }
843 final int distance=spos+dpos;
844
845 if(distance>maxPairDist){
846
847 // assert(false) : "distance="+distance+", spos="+spos+", dpos="+dpos+", sameStrandContigs="+sameStrandContigs+
848 // "\nsl.pos="+sl.pos+", sl.pnext="+sl.pnext+", strand="+sl.strand()+", nextStrand="+sl.mateStrand()+", left="+leftEdge
849 // +"\n"+sl;
850 // badCount++;
851 return;
852 }
853
854 distanceSum+=distance;
855
856 weight+=sl.mapq;
857 if(sameStrandContigs){
858 sameStrandCount++;
859 }else{
860 difStrandCount++;
861 }
862 // assert(false) : weight;
863 }
864
865 public float strandRatio() {
866 return Tools.max(sameStrandCount, difStrandCount)/(float)(sameStrandCount+difStrandCount);
867 }
868
869 public boolean sameStrand(){
870 return sameStrandCount>=difStrandCount;
871 }
872
873 @Override
874 public String toString(){
875 return "("+source.name+"->"+dest.name+", "+(leftEdge ? "left" : "right")+", weight="+weight+
876 ", same="+sameStrandCount+", dif="+difStrandCount+", bad="+badCount+")";
877 }
878
879 long count(){return sameStrandCount+difStrandCount;}
880
881 final Contig source;
882 final Contig dest;
883 long sameStrandCount;
884 long difStrandCount;
885 long distanceSum;
886 long weight;
887 long badCount;
888 final boolean leftEdge;
889 }
890
891 /*--------------------------------------------------------------*/
892 /*---------------- Fields ----------------*/
893 /*--------------------------------------------------------------*/
894
895 /** Primary input file path */
896 private String in=null;
897 /** Secondary input file path */
898 private String ref=null;
899
900 /** Primary output file path */
901 private String out=null;
902
903 /** Override input file extension */
904 private String extin=null;
905 /** Override output file extension */
906 private String extout=null;
907
908 private String insertList=null;
909
910 /*--------------------------------------------------------------*/
911
912 /** Number of reads processed */
913 protected long readsProcessed=0;
914 /** Number of bases processed */
915 protected long basesProcessed=0;
916
917 /** Number of reads retained */
918 protected long readsOut=0;
919 /** Number of bases retained */
920 protected long basesOut=0;
921
922 protected long scaffoldsOut=0;
923 protected long scaffoldLengthOut=0;
924
925 protected long totalInsertSum=0;
926 protected long totalInsertCount=0;
927 protected double totalAverageInsert;
928
929 /** Quit after processing this many input reads; -1 means no limit */
930 private long maxReads=-1;
931
932 boolean sameStrandPairs=false;
933
934 int gapsAdded=0;
935 long nsAdded=0;
936
937 /*--------------------------------------------------------------*/
938
939 /** Threads dedicated to reading the sam file */
940 private int streamerThreads=SamStreamer.DEFAULT_THREADS;
941
942 private boolean loadedRef=false;
943
944 private int minDepth=4;
945
946 private float minWeightRatio=0.8f;
947 private float minStrandRatio=0.8f;
948
949 private int scaffoldBreakNs=10;
950
951 private int maxPairDist=3000;
952
953 private int buckets=1000;
954 protected AtomicLongArray insertCounts=new AtomicLongArray(20000);
955 protected int[] insertByPercentile;
956
957 public final SamFilter samFilter=new SamFilter();
958
959 /** Uses full ref names */
960 public LinkedHashMap<String, Contig> refMap=new LinkedHashMap<String, Contig>();
961 /** Uses truncated ref names */
962 public LinkedHashMap<String, Contig> refMap2=new LinkedHashMap<String, Contig>();
963
964 /*--------------------------------------------------------------*/
965 /*---------------- Final Fields ----------------*/
966 /*--------------------------------------------------------------*/
967
968 /** Primary input file */
969 private final FileFormat ffin;
970 /** Secondary input file */
971 private final FileFormat ffref;
972
973 /** Primary output file */
974 private final FileFormat ffout;
975
976 /*--------------------------------------------------------------*/
977 /*---------------- Common Fields ----------------*/
978 /*--------------------------------------------------------------*/
979
980 /** Print status messages to this output stream */
981 private PrintStream outstream=System.err;
982 /** Print verbose messages */
983 public static boolean verbose=false;
984 /** True if an error was encountered */
985 public boolean errorState=false;
986 /** Overwrite existing output files */
987 private boolean overwrite=false;
988 /** Append to existing output files */
989 private boolean append=false;
990 /** Reads are output in input order */
991 private boolean ordered=false;
992
993 }