jpayne@68: package kmer; jpayne@68: jpayne@68: import java.io.PrintStream; jpayne@68: import java.util.BitSet; jpayne@68: jpayne@68: import dna.AminoAcid; jpayne@68: import jgi.Dedupe; jpayne@68: import shared.PreParser; jpayne@68: import shared.Shared; jpayne@68: import shared.Timer; jpayne@68: import shared.Tools; jpayne@68: import stream.Read; jpayne@68: import structures.IntList; jpayne@68: jpayne@68: /** jpayne@68: * @author Brian Bushnell jpayne@68: * @date Mar 5, 2015 jpayne@68: * jpayne@68: */ jpayne@68: public class TableReader { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Initialization ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** jpayne@68: * Code entrance from the command line. jpayne@68: * @param args Command line arguments jpayne@68: */ jpayne@68: public static void main(String[] args){ jpayne@68: jpayne@68: {//Preparse block for help, config files, and outstream jpayne@68: PreParser pp=new PreParser(args, null, false); jpayne@68: args=pp.args; jpayne@68: outstream=pp.outstream; jpayne@68: } jpayne@68: jpayne@68: Timer t=new Timer(); jpayne@68: jpayne@68: AbstractKmerTable[] tables=TableLoaderLockFree.makeTables(AbstractKmerTable.ARRAY1D, 12, -1L, false, 1.0); jpayne@68: jpayne@68: int k=31; jpayne@68: int mink=0; jpayne@68: int speed=0; jpayne@68: int hdist=0; jpayne@68: int edist=0; jpayne@68: boolean rcomp=true; jpayne@68: boolean maskMiddle=false; jpayne@68: jpayne@68: //Create a new Loader instance jpayne@68: TableLoaderLockFree loader=new TableLoaderLockFree(tables, k, mink, speed, hdist, edist, rcomp, maskMiddle); jpayne@68: loader.setRefSkip(0); jpayne@68: loader.hammingDistance2=0; jpayne@68: loader.editDistance2=0; jpayne@68: loader.storeMode(TableLoaderLockFree.SET_IF_NOT_PRESENT); jpayne@68: jpayne@68: ///And run it jpayne@68: String[] refs=args; jpayne@68: String[] literals=null; jpayne@68: boolean keepNames=false; jpayne@68: boolean useRefNames=false; jpayne@68: long kmers=loader.processData(refs, literals, keepNames, useRefNames, false); jpayne@68: t.stop(); jpayne@68: jpayne@68: outstream.println("Load Time:\t"+t); jpayne@68: outstream.println("Return: \t"+kmers); jpayne@68: outstream.println("refKmers: \t"+loader.refKmers); jpayne@68: outstream.println("refBases: \t"+loader.refBases); jpayne@68: outstream.println("refReads: \t"+loader.refReads); jpayne@68: jpayne@68: int qskip=0; jpayne@68: int qhdist=0; jpayne@68: TableReader tr=new TableReader(k, mink, speed, qskip, qhdist, rcomp, maskMiddle); jpayne@68: jpayne@68: //TODO: Stuff... jpayne@68: jpayne@68: //Close the print stream if it was redirected jpayne@68: Shared.closeStream(outstream); jpayne@68: } jpayne@68: jpayne@68: public TableReader(int k_){ jpayne@68: this(k_, 0, 0, 0, 0, true, false); jpayne@68: } jpayne@68: jpayne@68: public TableReader(int k_, int mink_, int speed_, int qskip_, int qhdist_, boolean rcomp_, boolean maskMiddle_){ jpayne@68: k=k_; jpayne@68: k2=k-1; jpayne@68: mink=mink_; jpayne@68: rcomp=rcomp_; jpayne@68: useShortKmers=(mink>0 && mink0); jpayne@68: jpayne@68: //Replace kmer hit zone with the trim symbol jpayne@68: for(int i=0; i63 ? -1L : ~((-1L)<>>2)|(x2<=minHits){ jpayne@68: return (found=found+1); //Early exit jpayne@68: } jpayne@68: found++; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: return found; jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Returns the id of the sequence with the most kmer matches to this read, or -1 if none are at least minHits. jpayne@68: * @param r Read to process jpayne@68: * @param sets Kmer tables jpayne@68: * @return id of best match jpayne@68: */ jpayne@68: public final int findBestMatch(final Read r, final AbstractKmerTable[] sets){ jpayne@68: idList.size=0; jpayne@68: if(r==null || r.length()63 ? -1L : ~((-1L)<>>2)|(x2<=minHits){ jpayne@68: max=condenseLoose(countArray, idList, countList); jpayne@68: int id0=-1; jpayne@68: for(int i=0; i63 ? -1L : ~((-1L)<>>2)|(x2<0){ jpayne@68: if(id0<0){id0=id;} jpayne@68: if(verbose){ jpayne@68: outstream.println("b: Found "+kmer); jpayne@68: outstream.println("Setting "+0+", "+(i+plus)); jpayne@68: } jpayne@68: bs.set(0, i+plus); jpayne@68: found++; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: //Look for short kmers on right side jpayne@68: { jpayne@68: kmer=0; jpayne@68: rkmer=0; jpayne@68: len=0; jpayne@68: final int lim=Tools.max(-1, stop-k); jpayne@68: for(int i=stop-1; i>lim; i--){ jpayne@68: byte b=bases[i]; jpayne@68: long x=Dedupe.baseToNumber[b]; jpayne@68: long x2=Dedupe.baseToComplementNumber[b]; jpayne@68: kmer=kmer|(x<<(2*len)); jpayne@68: rkmer=((rkmer<<2)|x2)&mask; jpayne@68: len++; jpayne@68: if(verbose){outstream.println("Scanning5 i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));} jpayne@68: if(len>=mink){ jpayne@68: if(verbose){ jpayne@68: outstream.println("Looking for right kmer "+ jpayne@68: AminoAcid.kmerToString(kmer&~lengthMasks[len], len)+"; value="+toValue(kmer, rkmer, lengthMasks[len])+"; kmask="+lengthMasks[len]); jpayne@68: } jpayne@68: final int id=getValue(kmer, rkmer, len, qHammingDistance2, i, sets); jpayne@68: if(id>0){ jpayne@68: if(id0<0){id0=id;} jpayne@68: if(verbose){ jpayne@68: outstream.println("c: Found "+kmer); jpayne@68: outstream.println("Setting "+Tools.max(0, i-trimPad)+", "+bases.length); jpayne@68: } jpayne@68: bs.set(Tools.max(0, i-trimPad), bases.length); jpayne@68: found++; jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: jpayne@68: if(verbose){outstream.println("found="+found+", bitset="+bs);} jpayne@68: jpayne@68: if(found==0){return null;} jpayne@68: assert(found>0) : "Overflow in 'found' variable."; jpayne@68: jpayne@68: int cardinality=bs.cardinality(); jpayne@68: assert(cardinality>0); jpayne@68: jpayne@68: return bs; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Helper Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /** jpayne@68: * Transforms a kmer into all canonical values for a given Hamming distance. jpayne@68: * Returns the related id stored in the tables. jpayne@68: * @param kmer Forward kmer jpayne@68: * @param rkmer Reverse kmer jpayne@68: * @param len kmer length jpayne@68: * @param qHDist Hamming distance jpayne@68: * @param qPos Position of kmer in query jpayne@68: * @param sets Kmer hash tables jpayne@68: * @return Value stored in table, or -1 jpayne@68: */ jpayne@68: public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final int qPos, final AbstractKmerTable[] sets){ jpayne@68: if(qSkip>1 && (qPos%qSkip!=0)){return -1;} jpayne@68: return qHDist<1 ? getValue(kmer, rkmer, len, sets) : getValue(kmer, rkmer, len, qHDist, sets); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * Transforms a kmer into all canonical values for a given Hamming distance. jpayne@68: * Returns the related id stored in the tables. jpayne@68: * @param kmer Forward kmer jpayne@68: * @param rkmer Reverse kmer jpayne@68: * @param len kmer length jpayne@68: * @param qHDist Hamming distance jpayne@68: * @param sets Kmer hash tables jpayne@68: * @return Value stored in table, or -1 jpayne@68: */ jpayne@68: public final int getValue(final long kmer, final long rkmer, final int len, final int qHDist, final AbstractKmerTable[] sets){ jpayne@68: int id=getValue(kmer, rkmer, len, sets); jpayne@68: if(id<1 && qHDist>0){ jpayne@68: final int qHDistMinusOne=qHDist-1; jpayne@68: jpayne@68: //Sub jpayne@68: for(int j=0; j<4 && id<1; j++){ jpayne@68: for(int i=0; i=speed){ jpayne@68: if(verbose){outstream.println("Testing key "+key);} jpayne@68: AbstractKmerTable set=sets[(int)(key%WAYS)]; jpayne@68: final int id=set.getValue(key); jpayne@68: return id; jpayne@68: } jpayne@68: return -1; jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** jpayne@68: * Transforms a kmer into a canonical value stored in the table. Expected to be inlined. jpayne@68: * @param kmer Forward kmer jpayne@68: * @param rkmer Reverse kmer jpayne@68: * @param lengthMask Bitmask with single '1' set to left of kmer jpayne@68: * @return Canonical value jpayne@68: */ jpayne@68: private final long toValue(long kmer, long rkmer, long lengthMask){ jpayne@68: assert(lengthMask==0 || (kmer0 or edist>0. Default: false. */ jpayne@68: public boolean forbidNs=false; jpayne@68: jpayne@68: /** Replace bases covered by matched kmers with this symbol */ jpayne@68: public byte trimSymbol='N'; jpayne@68: jpayne@68: /** Convert masked bases to lowercase */ jpayne@68: public boolean kmaskLowercase=false; jpayne@68: jpayne@68: /** Don't look for kmers in read 1 */ jpayne@68: public boolean skipR1=false; jpayne@68: /** Don't look for kmers in read 2 */ jpayne@68: public boolean skipR2=false; jpayne@68: jpayne@68: /** A read must contain at least this many kmer hits before being considered a match. Default: 1 */ jpayne@68: public int minHits=1; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Statistics ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: // public long storedKmers=0; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Per-Thread Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public int[] countArray; jpayne@68: jpayne@68: private final IntList idList=new IntList(); jpayne@68: private final IntList countList=new IntList(); jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Final Primitives ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Look for reverse-complements as well as forward kmers. Default: true */ jpayne@68: private final boolean rcomp; jpayne@68: /** AND bitmask with 0's at the middle base */ jpayne@68: private final long middleMask; jpayne@68: jpayne@68: /** Normal kmer length */ jpayne@68: private final int k; jpayne@68: /** k-1; used in some expressions */ jpayne@68: private final int k2; jpayne@68: /** Shortest kmer to use for trimming */ jpayne@68: private final int mink; jpayne@68: /** Attempt to match kmers shorter than normal k on read ends when doing kTrimming. */ jpayne@68: private final boolean useShortKmers; jpayne@68: jpayne@68: /** Fraction of kmers to skip, 0 to 15 out of 16 */ jpayne@68: private final int speed; jpayne@68: jpayne@68: /** Skip this many kmers when examining the read. Default 1. jpayne@68: * 1 means every kmer is used, 2 means every other, etc. */ jpayne@68: private final int qSkip; jpayne@68: jpayne@68: /** noAccel is true if speed and qSkip are disabled, accel is the opposite. */ jpayne@68: private final boolean noAccel, accel; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Static Fields ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Number of tables (and threads, during loading) */ jpayne@68: private static final int WAYS=7; //123 jpayne@68: /** Verbose messages */ jpayne@68: public static final boolean verbose=false; //123 jpayne@68: jpayne@68: /** Print messages to this stream */ jpayne@68: private static PrintStream outstream=System.err; jpayne@68: jpayne@68: /** x&clearMasks[i] will clear base i */ jpayne@68: private static final long[] clearMasks; jpayne@68: /** x|setMasks[i][j] will set base i to j */ jpayne@68: private static final long[][] setMasks; jpayne@68: /** x&leftMasks[i] will clear all bases to the right of i (exclusive) */ jpayne@68: private static final long[] leftMasks; jpayne@68: /** x&rightMasks[i] will clear all bases to the left of i (inclusive) */ jpayne@68: private static final long[] rightMasks; jpayne@68: /** x|kMasks[i] will set the bit to the left of the leftmost base */ jpayne@68: private static final long[] lengthMasks; jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Static Initializers ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: static{ jpayne@68: clearMasks=new long[32]; jpayne@68: leftMasks=new long[32]; jpayne@68: rightMasks=new long[32]; jpayne@68: lengthMasks=new long[32]; jpayne@68: setMasks=new long[4][32]; jpayne@68: for(int i=0; i<32; i++){ jpayne@68: clearMasks[i]=~(3L<<(2*i)); jpayne@68: } jpayne@68: for(int i=0; i<32; i++){ jpayne@68: leftMasks[i]=((-1L)<<(2*i)); jpayne@68: } jpayne@68: for(int i=0; i<32; i++){ jpayne@68: rightMasks[i]=~((-1L)<<(2*i)); jpayne@68: } jpayne@68: for(int i=0; i<32; i++){ jpayne@68: lengthMasks[i]=((1L)<<(2*i)); jpayne@68: } jpayne@68: for(int i=0; i<32; i++){ jpayne@68: for(long j=0; j<4; j++){ jpayne@68: setMasks[(int)j][i]=(j<<(2*i)); jpayne@68: } jpayne@68: } jpayne@68: } jpayne@68: jpayne@68: }