diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/consensus/Lilypad.java @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/consensus/Lilypad.java	Tue Mar 18 16:23:26 2025 -0400
@@ -0,0 +1,993 @@
+package consensus;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicIntegerArray;
+import java.util.concurrent.atomic.AtomicLongArray;
+
+import dna.AminoAcid;
+import fileIO.ByteFile;
+import fileIO.FileFormat;
+import fileIO.ReadWrite;
+import shared.Parse;
+import shared.Parser;
+import shared.PreParser;
+import shared.ReadStats;
+import shared.Shared;
+import shared.Timer;
+import shared.Tools;
+import stream.ConcurrentReadInputStream;
+import stream.ConcurrentReadOutputStream;
+import stream.FastaReadInputStream;
+import stream.Read;
+import stream.SamLine;
+import stream.SamReadStreamer;
+import stream.SamStreamer;
+import structures.ByteBuilder;
+import structures.ListNum;
+import template.Accumulator;
+import template.ThreadWaiter;
+import var2.SamFilter;
+
+/**
+ * Scaffolds contigs based on paired read mapping.
+ * 
+ * @author Brian Bushnell
+ * @date September 11, 2019
+ *
+ */
+public class Lilypad implements Accumulator<Lilypad.ProcessThread> {
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Initialization        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/**
+	 * Code entrance from the command line.
+	 * @param args Command line arguments
+	 */
+	public static void main(String[] args){
+		//Start a timer immediately upon code entrance.
+		Timer t=new Timer();
+		
+		//Create an instance of this class
+		Lilypad x=new Lilypad(args);
+		
+		//Run the object
+		x.process(t);
+		
+		//Close the print stream if it was redirected
+		Shared.closeStream(x.outstream);
+	}
+	
+	/**
+	 * Constructor.
+	 * @param args Command line arguments
+	 */
+	public Lilypad(String[] args){
+		
+		{//Preparse block for help, config files, and outstream
+			PreParser pp=new PreParser(args, getClass(), false);
+			args=pp.args;
+			outstream=pp.outstream;
+		}
+		
+		//Set shared static variables prior to parsing
+		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
+		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
+		SamLine.RNAME_AS_BYTES=false;
+		
+		samFilter.includeUnmapped=false;
+		samFilter.includeSupplimentary=false;
+//		samFilter.includeDuplicate=false;
+		samFilter.includeNonPrimary=false;
+		samFilter.includeQfail=false;
+		samFilter.minMapq=4;
+		
+		{//Parse the arguments
+			final Parser parser=parse(args);
+			
+			Parser.processQuality();
+			
+			maxReads=parser.maxReads;
+			overwrite=ReadStats.overwrite=parser.overwrite;
+			append=ReadStats.append=parser.append;
+			
+			in=parser.in1;
+			extin=parser.extin;
+
+			out=parser.out1;
+			extout=parser.extout;
+		}
+		
+		{
+//			if("auto".equalsIgnoreCase(atomic)){Scaffold.setCA3A(Shared.threads()>8);}
+//			else{Scaffold.setCA3A(Parse.parseBoolean(atomic));}
+			samFilter.setSamtoolsFilter();
+			
+			streamerThreads=Tools.max(1, Tools.min(streamerThreads, Shared.threads()));
+			assert(streamerThreads>0) : streamerThreads;
+		}
+
+		validateParams();
+		fixExtensions(); //Add or remove .gz or .bz2 as needed
+		checkFileExistence(); //Ensure files can be read and written
+		checkStatics(); //Adjust file-related static fields as needed for this program 
+		
+		//Create output FileFormat objects
+		ffout=FileFormat.testOutput(out, FileFormat.FASTA, extout, true, overwrite, append, ordered);
+
+		//Create input FileFormat objects
+		ffin=FileFormat.testInput(in, FileFormat.SAM, extin, true, true);
+		ffref=FileFormat.testInput(ref, FileFormat.FASTA, null, true, true);
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------    Initialization Helpers    ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Parse arguments from the command line */
+	private Parser parse(String[] args){
+		
+		//Create a parser object
+		Parser parser=new Parser();
+		
+		//Set any necessary Parser defaults here
+		//parser.foo=bar;
+		
+		//Parse each argument
+		for(int i=0; i<args.length; i++){
+			String arg=args[i];
+			
+			//Break arguments into their constituent parts, in the form of "a=b"
+			String[] split=arg.split("=");
+			String a=split[0].toLowerCase();
+			String b=split.length>1 ? split[1] : null;
+			if(b!=null && b.equalsIgnoreCase("null")){b=null;}
+			
+			if(a.equals("verbose")){
+				verbose=Parse.parseBoolean(b);
+			}else if(a.equals("ref") || a.equals("scaffolds")){
+				ref=b;
+			}else if(a.equals("insertlist")){
+				insertList=b;
+			}else if(a.equals("ordered")){
+				ordered=Parse.parseBoolean(b);
+			}else if(a.equalsIgnoreCase("sameStrandPairs")){
+				sameStrandPairs=Parse.parseBoolean(b);
+			}else if(a.equalsIgnoreCase("ns") || a.equalsIgnoreCase("n") || a.equalsIgnoreCase("scaffoldbreak") || a.equalsIgnoreCase("gap") || a.equalsIgnoreCase("mingap")){
+				scaffoldBreakNs=Integer.parseInt(b);
+				assert(scaffoldBreakNs>0);
+			}else if(a.equalsIgnoreCase("mindepth")){
+				minDepth=Integer.parseInt(b);
+				assert(minDepth>0);
+			}else if(a.equalsIgnoreCase("maxinsert")){
+				maxPairDist=Parse.parseIntKMG(b);
+			}else if(a.equalsIgnoreCase("minWeightRatio") || a.equalsIgnoreCase("minwr")){
+				minWeightRatio=Float.parseFloat(b);
+			}else if(a.equalsIgnoreCase("minStrandRatio") || a.equalsIgnoreCase("minsr")){
+				minStrandRatio=Float.parseFloat(b);
+			}else if(a.equals("clearfilters") || a.equals("clearfilter")){
+				if(Parse.parseBoolean(b)){
+					samFilter.clear();
+				}
+			}else if(a.equals("parse_flag_goes_here")){
+				long fake_variable=Parse.parseKMG(b);
+				//Set a variable here
+			}else if(samFilter.parse(arg, a, b)){
+				//do nothing
+			}else if(parser.parse(arg, a, b)){//Parse standard flags in the parser
+				//do nothing
+			}else{
+				outstream.println("Unknown parameter "+args[i]);
+				assert(false) : "Unknown parameter "+args[i];
+			}
+		}
+		
+		return parser;
+	}
+	
+	/** Add or remove .gz or .bz2 as needed */
+	private void fixExtensions(){
+		in=Tools.fixExtension(in);
+		ref=Tools.fixExtension(ref);
+	}
+	
+	/** Ensure files can be read and written */
+	private void checkFileExistence(){
+
+		//Ensure there is an input file
+		if(in==null){throw new RuntimeException("Error - an input file is required.");}
+
+		//Ensure there is an input file
+		if(ref==null){throw new RuntimeException("Error - a reference file is required.");}
+		
+		//Ensure output files can be written
+		if(!Tools.testOutputFiles(overwrite, append, false, out)){
+			outstream.println((out==null)+", "+out);
+			throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output file "+out+"\n");
+		}
+		
+		//Ensure input files can be read
+		if(!Tools.testInputFiles(false, true, in, ref)){
+			throw new RuntimeException("\nCan't read some input files.\n");  
+		}
+		
+		//Ensure that no file was specified multiple times
+		if(!Tools.testForDuplicateFiles(true, in, ref, out)){
+			throw new RuntimeException("\nSome file names were specified multiple times.\n");
+		}
+	}
+	
+	/** Adjust file-related static fields as needed for this program */
+	private static void checkStatics(){
+		//Adjust the number of threads for input file reading
+		if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2 && Shared.threads()>2){
+			ByteFile.FORCE_MODE_BF2=true;
+		}
+		
+		assert(FastaReadInputStream.settingsOK());
+	}
+	
+	/** Ensure parameter ranges are within bounds and required parameters are set */
+	private boolean validateParams(){
+//		assert(minfoo>0 && minfoo<=maxfoo) : minfoo+", "+maxfoo;
+		return true;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Outer Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+
+	/** Create read streams and process all data */
+	void process(Timer t){
+		
+		//Turn off read validation in the input threads to increase speed
+		final boolean vic=Read.VALIDATE_IN_CONSTRUCTOR;
+		Read.VALIDATE_IN_CONSTRUCTOR=Shared.threads()<4;
+		
+		//Create a read input stream
+		final SamStreamer ss=makeStreamer(ffin);
+		
+		//Load reference
+		loadReferenceCustom();
+		
+		//Reset counters
+		readsProcessed=readsOut=0;
+		basesProcessed=basesOut=0;
+		
+		//Process the reads in separate threads
+		spawnThreads(ss);
+
+		//Optionally create a read output stream
+		final ConcurrentReadOutputStream ros=makeCros();
+		
+		if(verbose){outstream.println("Fixing reference.");}
+		
+		makeScaffolds(ros);
+		
+		if(verbose){outstream.println("Finished; closing streams.");}
+		
+		//Write anything that was accumulated by ReadStats
+		errorState|=ReadStats.writeAll();
+		//Close the read streams
+		errorState|=ReadWrite.closeStream(ros);
+		
+		//Reset read validation
+		Read.VALIDATE_IN_CONSTRUCTOR=vic;
+		
+		//Report timing and results
+		t.stop();
+		outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
+		outstream.println(Tools.readsBasesOut(readsProcessed, basesProcessed, scaffoldsOut, scaffoldLengthOut, 8, false));
+		
+		outstream.println();
+		outstream.println(Tools.number("Average Insert", totalAverageInsert, 2, 8));
+		outstream.println(Tools.number("Joins Made    ", gapsAdded, 8));
+		outstream.println(Tools.number("Ns Added      ", nsAdded, 8));
+		outstream.println(Tools.number("Contigs In    ", refMap.size(), 8));
+		outstream.println(Tools.number("Scaffolds Out ", scaffoldsOut, 8));
+		
+		
+		//Throw an exception of there was an error in a thread
+		if(errorState){
+			throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt.");
+		}
+	}
+
+	private synchronized void loadReferenceCustom(){
+		assert(!loadedRef);
+		ConcurrentReadInputStream cris=makeRefCris();
+		for(ListNum<Read> ln=cris.nextList(); ln!=null && ln.size()>0; ln=cris.nextList()) {
+			for(Read r : ln){
+				String name=r.id;
+				String name2=Tools.trimToWhitespace(r.id);
+				Contig cont=new Contig(name, r.bases, r.numericID);
+				refMap.put(name, cont);
+				refMap2.put(name2, cont);
+			}
+			cris.returnList(ln);
+		}
+		ReadWrite.closeStream(cris);
+		loadedRef=true;
+	}
+	
+	private ConcurrentReadInputStream makeRefCris(){
+		ConcurrentReadInputStream cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffref, null);
+		cris.start(); //Start the stream
+		if(verbose){outstream.println("Started cris");}
+		boolean paired=cris.paired();
+		assert(!paired) : "References should not be paired.";
+		return cris;
+	}
+	
+	private SamStreamer makeStreamer(FileFormat ff){
+		if(ff==null){return null;}
+		SamStreamer ss=new SamReadStreamer(ff, streamerThreads, true, maxReads);
+		ss.start(); //Start the stream
+		if(verbose){outstream.println("Started Streamer");}
+		return ss;
+	}
+	
+	private ConcurrentReadOutputStream makeCros(){
+		if(ffout==null){return null;}
+
+		//Select output buffer size based on whether it needs to be ordered
+		final int buff=(ordered ? Tools.mid(16, 128, (Shared.threads()*2)/3) : 8);
+
+		final ConcurrentReadOutputStream ros=ConcurrentReadOutputStream.getStream(ffout, null, buff, null, false);
+		ros.start(); //Start the stream
+		return ros;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------       Thread Management      ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Spawn process threads */
+	private void spawnThreads(final SamStreamer ss){
+		
+		//Do anything necessary prior to processing
+		
+		//Determine how many threads may be used
+		final int threads=Shared.threads();
+		
+		//Fill a list with ProcessThreads
+		ArrayList<ProcessThread> alpt=new ArrayList<ProcessThread>(threads);
+		for(int i=0; i<threads; i++){
+			alpt.add(new ProcessThread(ss, i));
+		}
+		
+		//Start the threads
+		for(ProcessThread pt : alpt){
+			pt.start();
+		}
+		
+		//Wait for threads to finish
+		boolean success=ThreadWaiter.waitForThreads(alpt, this);
+		errorState&=!success;
+		
+		//Do anything necessary after processing
+		totalAverageInsert=totalInsertSum/(double)totalInsertCount;
+		insertByPercentile=Tools.makeHistogram(insertCounts, buckets);
+	}
+	
+	@Override
+	public final void accumulate(ProcessThread pt){
+		readsProcessed+=pt.readsProcessedT;
+		basesProcessed+=pt.basesProcessedT;
+		readsOut+=pt.readsOutT;
+		basesOut+=pt.basesOutT;
+		
+		totalInsertSum+=pt.totalInsertSumT;
+		totalInsertCount+=pt.totalInsertCountT;
+		
+		errorState|=(!pt.success);
+	}
+	
+	@Override
+	public final boolean success(){return !errorState;}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Inner Methods        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	private void makeScaffolds(ConcurrentReadOutputStream ros){
+		ByteBuilder bb=new ByteBuilder(1000000);
+
+		ArrayList<Read> list=new ArrayList<Read>(200);
+		long num=0;
+		long lengthSum=0;
+		for(Entry<String, Contig> e : refMap.entrySet()){
+			Contig cont=e.getValue();
+			if(!cont.processed()){
+				Read r=cont.makeScaffold(bb);
+				assert(r!=null);
+
+				lengthSum+=r.length();
+				list.add(r);
+				scaffoldsOut++;
+				scaffoldLengthOut+=r.length();
+
+				if(list.size()>=200 || lengthSum>=100000){
+					if(ros!=null){ros.add(list, num);}
+					list=new ArrayList<Read>(200);
+					num++;
+					lengthSum=0;
+				}
+				assert(cont.processed());
+			}
+		}
+		if(list.size()>0){
+			if(ros!=null){ros.add(list, num);}
+		}
+	}
+	
+	private static int calcInsertSize(SamLine sl) {
+		assert(sl.mapped() && sl.pairedOnSameChrom());
+		assert(sl.primary());
+		assert(!sl.supplementary());
+		assert(sl.leftmost());
+		
+		assert(sl.tlen>0) : sl.tlen+"\n\n"+sl;
+		return sl.tlen>0 ? sl.tlen : -sl.tlen;
+		
+//		final int insertSize;
+//		String insertTag=null;
+//		if(sl.optional!=null){
+//			for(String s : sl.optional){
+//				if(s.startsWith("X8:Z:")){
+//					insertTag=s;
+//					break;
+//				}
+//			}
+//		}
+//		if(insertTag!=null){
+//			insertSize=Integer.parseInt(insertTag.substring(5));
+//		}else{
+//			insertSize=sl.tlen;//This is unsafe due to indels.
+//			assert(false) : "Reads need insert size tags.";
+//		}
+//		assert(insertSize>0) : sl;
+//		
+//		return insertSize;
+	}
+	
+	private Contig getScaffold(String rname){
+		Contig scaf=refMap.get(rname);
+		if(scaf==null){scaf=refMap2.get(Tools.trimToWhitespace(rname));}
+		assert(scaf!=null) : "Can't find graph for "+rname;
+		return scaf;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Inner Classes        ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** This class is static to prevent accidental writing to shared variables.
+	 * It is safe to remove the static modifier. */
+	class ProcessThread extends Thread {
+		
+		//Constructor
+		ProcessThread(final SamStreamer ss_, final int tid_){
+			ss=ss_;
+			tid=tid_;
+		}
+		
+		//Called by start()
+		@Override
+		public void run(){
+			//Do anything necessary prior to processing
+			
+			//Process the reads
+			processInner();
+			
+			//Do anything necessary after processing
+			
+			//Indicate successful exit status
+			success=true;
+		}
+		
+		/** Iterate through the reads */
+		void processInner(){
+			
+			//Grab and process all lists
+			for(ListNum<Read> ln=ss.nextReads(); ln!=null; ln=ss.nextReads()){
+//				if(verbose){outstream.println("Got list of size "+list.size());} //Disabled due to non-static access
+				
+				processList(ln);
+			}
+			
+		}
+		
+		void processList(ListNum<Read> ln){
+
+			//Grab the actual read list from the ListNum
+			final ArrayList<Read> reads=ln.list;
+			
+			//Loop through each read in the list
+			for(int idx=0; idx<reads.size(); idx++){
+				final Read r=reads.get(idx);
+				
+				//Validate reads in worker threads
+				if(!r.validated()){r.validate(true);}
+
+				//Track the initial length for statistics
+				final int initialLength=r.length();
+
+				//Increment counters
+				readsProcessedT+=r.pairCount();
+				basesProcessedT+=initialLength;
+				
+				processRead(r);
+			}
+		}
+		
+		/**
+		 * Process a read or a read pair.
+		 * @param r Read 1
+		 * @param r2 Read 2 (may be null)
+		 * @return True if the reads should be kept, false if they should be discarded.
+		 */
+		void processRead(final Read r){
+			final SamLine sl=r.samline;
+			assert(sl!=null) : sl;
+			if(samFilter!=null && !samFilter.passesFilter(sl)){return;}
+			
+			//sl.nextMapped();
+			if(sl.mapped() && sl.primary() && !sl.supplementary()){
+				final String rname=sl.rnameS();
+				Contig scaf=getScaffold(rname);
+				if(scaf!=null){
+					if(sl.pairedOnSameChrom() && sl.properPair() && sl.leftmost()){
+						final int insertSize=calcInsertSize(sl);
+						insertCounts.incrementAndGet(Tools.mid(0, insertSize, insertCounts.length()-1));
+						totalInsertSumT+=insertSize;
+						totalInsertCountT++;
+					}
+					scaf.add(sl);
+
+					readsOutT++;
+					basesOutT+=r.length();
+				}
+			}
+			if(sl.mapped() && sl.pairedOnSameChrom() && sl.properPair() && sl.primary() && !sl.supplementary() && sl.leftmost()){
+				final String rname=sl.rnameS();
+				
+				Contig scaf=getScaffold(rname);
+				if(scaf!=null){
+					final int insertSize=calcInsertSize(sl);
+					insertCounts.incrementAndGet(Tools.mid(0, insertSize, insertCounts.length()-1));
+					scaf.add(sl);
+
+					readsOutT++;
+					basesOutT+=r.length();
+
+					totalInsertSumT+=insertSize;
+					totalInsertCountT++;
+				}
+			}
+		}
+
+		/** Number of reads processed by this thread */
+		protected long readsProcessedT=0;
+		/** Number of bases processed by this thread */
+		protected long basesProcessedT=0;
+		
+		/** Number of reads retained by this thread */
+		protected long readsOutT=0;
+		/** Number of bases retained by this thread */
+		protected long basesOutT=0;
+		
+		protected long totalInsertSumT=0;
+		protected long totalInsertCountT=0;
+		
+		long insertSum=0;
+		
+		/** True only if this thread has completed successfully */
+		boolean success=false;
+		
+		/** Shared input stream */
+		private final SamStreamer ss;
+		/** Thread ID */
+		final int tid;
+	}
+	
+	Contig findLeftmost(Contig source){
+		if(verbose){System.err.println("findLeftmost("+source.name+")");}
+		while(true) {
+			assert(!source.processed());
+			if(source.processed()){return null;}
+			source.processedLeft=true;
+			Edge se=source.bestEdge(true);
+			if(verbose){System.err.println("Found source edge "+se);}
+			if(se==null){return source;}
+			Contig dest=se.dest;
+			if(dest.processed()){
+				if(verbose){System.err.println("Dest was processed; returning.");}
+				return source;
+			}
+			if(se.sameStrand()){
+				if(source.strand==dest.strand){
+
+				}else{
+					if(verbose){System.err.println("Flipping "+dest.name);}
+					dest.flip();
+				}
+			}else{
+				if(source.strand==dest.strand){
+					if(verbose){System.err.println("Flipping "+dest.name);}
+					dest.flip();
+				}else{
+
+				}
+			}
+			Edge de=dest.bestEdge(false);
+			if(verbose){System.err.println("Found dest edge "+de);}
+			if(de==null || de.dest!=source){
+				if(dest.strand==1){dest.flip();}
+				if(verbose){System.err.println("Dest edge did not match; returning.");}
+				return source;
+			}
+			source=dest;
+			if(verbose){System.err.println("Migrated to next node.");}
+		}
+	}
+	
+	Read expandRight(final Contig source0, ByteBuilder bb){
+		if(verbose){System.err.println("expandRight("+source0.name+")");}
+		bb.clear();
+		Contig source=source0;
+		while(true) {
+			assert(!source.processedRight);
+			if(source.processedRight){return null;}
+			if(source.strand==1){
+				Tools.reverseInPlace(source.depthArray);
+			}
+			source.processedRight=true;
+			bb.append(source.bases);
+			Edge se=source.bestEdge(false);
+			if(verbose){System.err.println("Found source edge "+se);}
+			if(se==null){break;}
+			Contig dest=se.dest;
+			if(dest.processedRight){
+				if(verbose){System.err.println("Dest was processed; returning.");}
+				break;
+			}
+			if(se.sameStrand()){
+				if(source.strand==dest.strand){
+
+				}else{
+					if(verbose){System.err.println("Flipping "+dest.name);}
+					dest.flip();
+				}
+			}else{
+				if(source.strand==dest.strand){
+					if(verbose){System.err.println("Flipping "+dest.name);}
+					dest.flip();
+				}else{
+
+				}
+			}
+			Edge de=dest.bestEdge(true);
+			if(verbose){System.err.println("Found dest edge "+de);}
+			if(de==null || de.dest!=source){
+				if(verbose){System.err.println("Dest edge did not match; returning.");}
+				if(dest.strand==1){dest.flip();}
+				break;
+			}
+			
+			//Now append Ns
+			int observedLength=(int)(se.distanceSum/se.count());
+			long depth=se.count();
+			int depthProxyIndex=(source.length()-Tools.min(source.length()/2, 300));
+			long depthProxy=source.depthArray.get(depthProxyIndex);
+			int percentile=(int)(buckets*depth/(float)(depth+depthProxy));
+			int inferredLength=insertByPercentile[percentile];
+			
+			int Ns=(Tools.max(scaffoldBreakNs, inferredLength-observedLength));
+			for(int i=0; i<Ns; i++){bb.append('N');}
+			source=dest;
+			
+			gapsAdded++;
+			nsAdded+=Ns;
+		}
+		Read r=new Read(bb.toBytes(), null, source0.name, source0.numericID);
+		return r;
+	}
+	
+	/*--------------------------------------------------------------*/
+	
+	private class Contig {
+		
+		Contig(String name_, byte[] bases_, long numericID_){
+			name=name_;
+			bases=bases_;
+			numericID=(int)numericID_;
+			depthArray=new AtomicIntegerArray(bases.length);
+		}
+		
+		public Read makeScaffold(ByteBuilder bb) {
+			assert(!processed());
+			Contig leftmost=findLeftmost(this);
+			return expandRight(leftmost, bb);
+		}
+		
+		Edge bestEdge(boolean left) {
+			final LinkedHashMap<String, Edge> map=(left ? leftEdgeMap : rightEdgeMap);
+			if(map.isEmpty()){return null;}
+			long weightSum=0;
+			long countSum=0;
+			Edge best=null;
+			for(Entry<String, Edge> entry : map.entrySet()){
+				Edge e=entry.getValue();
+				weightSum+=e.weight;
+				countSum+=e.count();
+				if(best==null || e.weight>best.weight){best=e;}
+			}
+			if(best.count()<minDepth){return null;}
+			if(weightSum*minWeightRatio>best.weight){return null;}
+			if(best.strandRatio()<minStrandRatio){return null;}
+			if(best.badCount>0.5*best.count()){return null;}
+			return best;
+		}
+
+		void add(SamLine sl){
+			assert(sl.mapped() && sl.primary() && !sl.supplementary());
+			if(sl.nextMapped()){
+				if(sl.pairedOnSameChrom()){
+					if(!sl.properPair()){
+						addCoverageSingleton(sl);
+					}else if(sl.leftmost()){
+						addCoveragePaired(sl);
+					}
+				}else{
+					addCoverageSingleton(sl);
+					handleMixedPair(sl);
+				}
+			}else{
+				addCoverageSingleton(sl);
+			}
+		}
+		
+		private void addCoverageSingleton(SamLine sl){
+			assert(sl.cigar!=null);
+			int start=sl.pos-1;
+			int stop=start+SamLine.calcCigarLength(sl.cigar, false, false);
+			
+			for(int i=start; i<stop; i++){
+				if(i>=0 && i<bases.length){
+					depthArray.incrementAndGet(i);
+				}
+			}
+		}
+		
+		private void addCoveragePaired(SamLine sl){
+			assert(sl.cigar!=null);
+			assert(sl.leftmost() && sl.pairedOnSameChrom() && sl.nextMapped());
+			int start=sl.pos-1;
+			int stop=start+sl.tlen;
+			
+			for(int i=start; i<stop; i++){
+				if(i>=0 && i<bases.length){
+					depthArray.incrementAndGet(i);
+				}
+			}
+		}
+		
+		/** Reads mapping to different contigs */
+		private void handleMixedPair(SamLine sl){
+			assert(sl.mapped() && sl.nextMapped() && !sl.pairedOnSameChrom());
+			String rname=sl.rnameS();
+			String rnext=sl.rnextS();
+			if(rname.equals(rnext)){return;}
+			
+			final boolean left=(sl.strand()==1);
+			LinkedHashMap<String, Edge> map=(left ? leftEdgeMap : rightEdgeMap);
+			Edge e=map.get(rnext);
+			Contig dest=null;
+			if(e==null){
+				dest=getScaffold(rnext);
+				if(dest==null){return;}
+				e=new Edge(this, dest, left);
+				map.put(rnext, e);
+			}
+			e.add(sl);
+		}
+		
+		void flip(){//Be careful with this
+			AminoAcid.reverseComplementBasesInPlace(bases);
+			strand^=1;
+			LinkedHashMap<String, Edge> temp=leftEdgeMap;
+			leftEdgeMap=rightEdgeMap;
+			rightEdgeMap=temp;
+		}
+		
+		int length(){return bases.length;}
+		
+		final int numericID;
+		final String name;
+		final byte[] bases;
+		final AtomicIntegerArray depthArray;
+		int strand=0;
+
+		boolean processedLeft=false;
+		boolean processedRight=false;
+		boolean processed(){return processedLeft || processedRight;}
+
+		LinkedHashMap<String, Edge> leftEdgeMap=new LinkedHashMap<String, Edge>();
+		LinkedHashMap<String, Edge> rightEdgeMap=new LinkedHashMap<String, Edge>();
+	}
+	
+	private class Edge{
+		
+		Edge(Contig source_, Contig dest_, boolean left_){
+			source=source_;
+			dest=dest_;
+			leftEdge=left_;
+		}
+
+		void add(SamLine sl){
+			final boolean sameStrandReads=(sl.strand()==sl.mateStrand());
+			final boolean sameStrandContigs=(sameStrandPairs==sameStrandReads);
+			final int spos, dpos;
+			if(leftEdge){
+				spos=sl.pos+sl.calcCigarLength(true, false)-1;
+				dpos=(sameStrandContigs ? dest.length()-sl.pnext : sl.pnext+sl.length())-1;
+			}else{
+				spos=source.length()-sl.pos-1;
+				dpos=(sameStrandContigs ? sl.pnext+sl.length() : dest.length()-sl.pnext)-1;
+			}
+			final int distance=spos+dpos;
+			
+			if(distance>maxPairDist){
+
+//				assert(false) : "distance="+distance+", spos="+spos+", dpos="+dpos+", sameStrandContigs="+sameStrandContigs+
+//					"\nsl.pos="+sl.pos+", sl.pnext="+sl.pnext+", strand="+sl.strand()+", nextStrand="+sl.mateStrand()+", left="+leftEdge
+//					+"\n"+sl;
+//				badCount++;
+				return;
+			}
+			
+			distanceSum+=distance;
+			
+			weight+=sl.mapq;
+			if(sameStrandContigs){
+				sameStrandCount++;
+			}else{
+				difStrandCount++;
+			}
+//			assert(false) : weight;
+		}
+		
+		public float strandRatio() {
+			return Tools.max(sameStrandCount, difStrandCount)/(float)(sameStrandCount+difStrandCount);
+		}
+		
+		public boolean sameStrand(){
+			return sameStrandCount>=difStrandCount;
+		}
+		
+		@Override
+		public String toString(){
+			return "("+source.name+"->"+dest.name+", "+(leftEdge ? "left" : "right")+", weight="+weight+
+					", same="+sameStrandCount+", dif="+difStrandCount+", bad="+badCount+")";
+		}
+		
+		long count(){return sameStrandCount+difStrandCount;}
+		
+		final Contig source;
+		final Contig dest;
+		long sameStrandCount;
+		long difStrandCount;
+		long distanceSum;
+		long weight;
+		long badCount;
+		final boolean leftEdge;
+	}
+	
+	/*--------------------------------------------------------------*/
+	/*----------------            Fields            ----------------*/
+	/*--------------------------------------------------------------*/
+
+	/** Primary input file path */
+	private String in=null;
+	/** Secondary input file path */
+	private String ref=null;
+
+	/** Primary output file path */
+	private String out=null;
+	
+	/** Override input file extension */
+	private String extin=null;
+	/** Override output file extension */
+	private String extout=null;
+	
+	private String insertList=null;
+	
+	/*--------------------------------------------------------------*/
+
+	/** Number of reads processed */
+	protected long readsProcessed=0;
+	/** Number of bases processed */
+	protected long basesProcessed=0;
+
+	/** Number of reads retained */
+	protected long readsOut=0;
+	/** Number of bases retained */
+	protected long basesOut=0;
+
+	protected long scaffoldsOut=0;
+	protected long scaffoldLengthOut=0;
+	
+	protected long totalInsertSum=0;
+	protected long totalInsertCount=0;
+	protected double totalAverageInsert;
+	
+	/** Quit after processing this many input reads; -1 means no limit */
+	private long maxReads=-1;
+	
+	boolean sameStrandPairs=false;
+	
+	int gapsAdded=0;
+	long nsAdded=0;
+	
+	/*--------------------------------------------------------------*/
+	
+	/** Threads dedicated to reading the sam file */
+	private int streamerThreads=SamStreamer.DEFAULT_THREADS;
+	
+	private boolean loadedRef=false;
+	
+	private int minDepth=4;
+
+	private float minWeightRatio=0.8f;
+	private float minStrandRatio=0.8f;
+	
+	private int scaffoldBreakNs=10;
+	
+	private int maxPairDist=3000;
+	
+	private int buckets=1000;
+	protected AtomicLongArray insertCounts=new AtomicLongArray(20000);
+	protected int[] insertByPercentile;
+	
+	public final SamFilter samFilter=new SamFilter();
+	
+	/** Uses full ref names */
+	public LinkedHashMap<String, Contig> refMap=new LinkedHashMap<String, Contig>();
+	/** Uses truncated ref names */
+	public LinkedHashMap<String, Contig> refMap2=new LinkedHashMap<String, Contig>();
+	
+	/*--------------------------------------------------------------*/
+	/*----------------         Final Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+
+	/** Primary input file */
+	private final FileFormat ffin;
+	/** Secondary input file */
+	private final FileFormat ffref;
+	
+	/** Primary output file */
+	private final FileFormat ffout;
+	
+	/*--------------------------------------------------------------*/
+	/*----------------        Common Fields         ----------------*/
+	/*--------------------------------------------------------------*/
+	
+	/** Print status messages to this output stream */
+	private PrintStream outstream=System.err;
+	/** Print verbose messages */
+	public static boolean verbose=false;
+	/** True if an error was encountered */
+	public boolean errorState=false;
+	/** Overwrite existing output files */
+	private boolean overwrite=false;
+	/** Append to existing output files */
+	private boolean append=false;
+	/** Reads are output in input order */
+	private boolean ordered=false;
+	
+}