jpayne@68: package kmer; jpayne@68: jpayne@68: import java.util.concurrent.atomic.AtomicIntegerArray; jpayne@68: import java.util.concurrent.atomic.AtomicLong; jpayne@68: import java.util.concurrent.locks.Lock; jpayne@68: jpayne@68: import dna.AminoAcid; jpayne@68: import fileIO.ByteStreamWriter; jpayne@68: import fileIO.TextStreamWriter; jpayne@68: import shared.KillSwitch; jpayne@68: import shared.Shared; jpayne@68: import shared.Tools; jpayne@68: import structures.ByteBuilder; jpayne@68: import structures.SuperLongList; jpayne@68: jpayne@68: jpayne@68: /** jpayne@68: * @author Brian Bushnell jpayne@68: * @date Oct 23, 2013 jpayne@68: * jpayne@68: */ jpayne@68: public abstract class AbstractKmerTable { jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Abstract Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: // /** Returns count */ jpayne@68: // public final int increment(long kmer){return increment(kmer, 1);} jpayne@68: jpayne@68: /** Returns count */ jpayne@68: public abstract int increment(final long kmer, final int incr); jpayne@68: jpayne@68: // /** Returns number of entries created */ jpayne@68: // public final int incrementAndReturnNumCreated(final long kmer){return incrementAndReturnNumCreated(kmer, 1);} jpayne@68: jpayne@68: /** Returns number of entries created. Incr must be positive. */ jpayne@68: public abstract int incrementAndReturnNumCreated(final long kmer, final int incr); jpayne@68: jpayne@68: public abstract int set(long kmer, int value); jpayne@68: jpayne@68: // public abstract int set(long kmer, int[] vals); jpayne@68: jpayne@68: /** This is for IntList3 support with HashArrayHybridFast */ jpayne@68: public abstract int set(long kmer, int[] vals, int vlen); jpayne@68: jpayne@68: /** Returns number of kmers added */ jpayne@68: public abstract int setIfNotPresent(long kmer, int value); jpayne@68: jpayne@68: /** jpayne@68: * Fetch the value associated with a kmer. jpayne@68: * @param kmer jpayne@68: * @return A value. -1 means the kmer was not present. jpayne@68: */ jpayne@68: public abstract int getValue(long kmer); jpayne@68: jpayne@68: /** jpayne@68: * Fetch the values associated with a kmer. jpayne@68: * @param kmer jpayne@68: * @param singleton A blank array of length 1. jpayne@68: * @return An array filled with values. Values of -1 are invalid. jpayne@68: */ jpayne@68: public abstract int[] getValues(long kmer, int[] singleton); jpayne@68: jpayne@68: public abstract boolean contains(long kmer); jpayne@68: jpayne@68: public final boolean contains(long kmer, int v){ jpayne@68: assert(TESTMODE); jpayne@68: int[] set=getValues(kmer, new int[] {-1}); jpayne@68: if(set==null){return false;} jpayne@68: for(int s : set){ jpayne@68: if(s==-1){break;} jpayne@68: if(s==v){return true;} jpayne@68: } jpayne@68: return false; jpayne@68: } jpayne@68: jpayne@68: public final boolean contains(long kmer, int[] vals){ jpayne@68: assert(TESTMODE); jpayne@68: int[] set=getValues(kmer, new int[] {-1}); jpayne@68: if(set==null){return false;} jpayne@68: boolean success=true; jpayne@68: for(int v : vals){ jpayne@68: if(v==-1){break;} jpayne@68: success=false; jpayne@68: for(int s : set){ jpayne@68: if(s==v){ jpayne@68: success=true; jpayne@68: break; jpayne@68: } jpayne@68: } jpayne@68: if(!success){break;} jpayne@68: } jpayne@68: return success; jpayne@68: } jpayne@68: jpayne@68: public abstract void rebalance(); jpayne@68: jpayne@68: public abstract long size(); jpayne@68: public abstract int arrayLength(); jpayne@68: public abstract boolean canRebalance(); jpayne@68: jpayne@68: public abstract boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount); jpayne@68: public abstract boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining); jpayne@68: public abstract boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining); jpayne@68: jpayne@68: public abstract void fillHistogram(long[] ca, int max); jpayne@68: public abstract void fillHistogram(SuperLongList sll); jpayne@68: public abstract void countGC(long[] gcCounts, int max); jpayne@68: jpayne@68: public static final int gc(long kmer){ jpayne@68: int gc=0; jpayne@68: while(kmer>0){ jpayne@68: long x=kmer&3; jpayne@68: kmer>>>=2; jpayne@68: if(x==1 || x==2){gc++;} jpayne@68: } jpayne@68: return gc; jpayne@68: } jpayne@68: jpayne@68: abstract Object get(long kmer); jpayne@68: abstract void resize(); jpayne@68: abstract boolean canResize(); jpayne@68: jpayne@68: jpayne@68: jpayne@68: /** jpayne@68: * Removes entries with a value of the limit or less. jpayne@68: * Rehashes the remainder. jpayne@68: * @return Number removed. jpayne@68: */ jpayne@68: abstract long regenerate(int limit); jpayne@68: jpayne@68: final void lock(){getLock().lock();} jpayne@68: final void unlock(){getLock().unlock();} jpayne@68: final boolean tryLock(){return getLock().tryLock();} jpayne@68: Lock getLock(){ jpayne@68: throw new RuntimeException("Unimplemented."); jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*--------------- Allocation Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: final static AtomicIntegerArray allocAtomicInt(int len){ jpayne@68: return KillSwitch.allocAtomicInt(len); jpayne@68: } jpayne@68: jpayne@68: final static long[] allocLong1D(int len){ jpayne@68: return KillSwitch.allocLong1D(len); jpayne@68: } jpayne@68: jpayne@68: final static long[][] allocLong2D(int mult, int len){ jpayne@68: return KillSwitch.allocLong2D(mult, len); jpayne@68: } jpayne@68: jpayne@68: final static int[] allocInt1D(int len){ jpayne@68: return KillSwitch.allocInt1D(len); jpayne@68: } jpayne@68: jpayne@68: final static int[][] allocInt2D(int len){ jpayne@68: return KillSwitch.allocInt2D(len); jpayne@68: } jpayne@68: jpayne@68: final static KmerNode[] allocKmerNodeArray(int len){ jpayne@68: KmerNode[] ret=null; jpayne@68: try { jpayne@68: ret=new KmerNode[len]; jpayne@68: } catch (OutOfMemoryError e) { jpayne@68: synchronized(killMessage){ jpayne@68: e.printStackTrace(); jpayne@68: System.err.println(killMessage); jpayne@68: // Shared.printMemory(); jpayne@68: KillSwitch.killSilent(); jpayne@68: } jpayne@68: } jpayne@68: return ret; jpayne@68: } jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*--------------- Ownership Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: /** Set the thread owning this kmer. Return the new owner. jpayne@68: * Will only change the owner if newOwner is greater than current owner. */ jpayne@68: public abstract int setOwner(long kmer, int newOwner); jpayne@68: jpayne@68: /** Reset owner to -1 if this is the current owner. */ jpayne@68: public abstract boolean clearOwner(long kmer, int owner); jpayne@68: jpayne@68: /** Return the thread ID owning this kmer, or -1. */ jpayne@68: public abstract int getOwner(long kmer); jpayne@68: jpayne@68: /** Create data structures needed for ownership representation */ jpayne@68: public abstract void initializeOwnership(); jpayne@68: jpayne@68: /** Eliminate ownership data structures or set them to -1. */ jpayne@68: public abstract void clearOwnership(); jpayne@68: jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: /*---------------- Methods ----------------*/ jpayne@68: /*--------------------------------------------------------------*/ jpayne@68: jpayne@68: public static final StringBuilder toText(long kmer, int k){ jpayne@68: byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); jpayne@68: int bits=(Shared.AMINO_IN ? 5 : 2); jpayne@68: int mask=(Shared.AMINO_IN ? 31 : 3); jpayne@68: StringBuilder sb=new StringBuilder(k); jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: sb.append((char)lookup[x]); jpayne@68: } jpayne@68: return sb; jpayne@68: } jpayne@68: jpayne@68: static final StringBuilder toText(long kmer, int count, int k){ jpayne@68: StringBuilder sb=new StringBuilder(k+10); jpayne@68: return toText(kmer, count, k, sb); jpayne@68: } jpayne@68: jpayne@68: static final ByteBuilder toBytes(long kmer, int count, int k){ jpayne@68: ByteBuilder bb=new ByteBuilder(k+10); jpayne@68: return toBytes(kmer, count, k, bb); jpayne@68: } jpayne@68: jpayne@68: static final StringBuilder toText(long kmer, int[] values, int k){ jpayne@68: StringBuilder sb=new StringBuilder(k+10); jpayne@68: return toText(kmer, values, k, sb); jpayne@68: } jpayne@68: jpayne@68: static final ByteBuilder toBytes(long kmer, int[] values, int k){ jpayne@68: ByteBuilder bb=new ByteBuilder(k+10); jpayne@68: return toBytes(kmer, values, k, bb); jpayne@68: } jpayne@68: jpayne@68: static final StringBuilder toText(long kmer, int count, int k, StringBuilder sb){ jpayne@68: byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); jpayne@68: int bits=(Shared.AMINO_IN ? 5 : 2); jpayne@68: int mask=(Shared.AMINO_IN ? 31 : 3); jpayne@68: if(FASTA_DUMP){ jpayne@68: sb.append('>'); jpayne@68: sb.append(count); jpayne@68: sb.append('\n'); jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: sb.append((char)lookup[x]); jpayne@68: } jpayne@68: }else{ jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: sb.append((char)lookup[x]); jpayne@68: } jpayne@68: sb.append('\t'); jpayne@68: sb.append(count); jpayne@68: } jpayne@68: return sb; jpayne@68: } jpayne@68: jpayne@68: static final StringBuilder toText(long kmer, int[] values, int k, StringBuilder sb){ jpayne@68: byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); jpayne@68: int bits=(Shared.AMINO_IN ? 5 : 2); jpayne@68: int mask=(Shared.AMINO_IN ? 31 : 3); jpayne@68: if(FASTA_DUMP){ jpayne@68: sb.append('>'); jpayne@68: for(int i=0; i0){sb.append(',');} jpayne@68: sb.append(x); jpayne@68: } jpayne@68: sb.append('\n'); jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: sb.append((char)lookup[x]); jpayne@68: } jpayne@68: }else{ jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: sb.append((char)lookup[x]); jpayne@68: } jpayne@68: sb.append('\t'); jpayne@68: for(int i=0; i0){sb.append(',');} jpayne@68: sb.append(x); jpayne@68: } jpayne@68: } jpayne@68: return sb; jpayne@68: } jpayne@68: jpayne@68: public static final ByteBuilder toBytes(long kmer, long count, int k, ByteBuilder bb){ jpayne@68: byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); jpayne@68: int bits=(Shared.AMINO_IN ? 5 : 2); jpayne@68: int mask=(Shared.AMINO_IN ? 31 : 3); jpayne@68: if(FASTA_DUMP){ jpayne@68: bb.append('>'); jpayne@68: bb.append(count); jpayne@68: bb.nl(); jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: bb.append(lookup[x]); jpayne@68: } jpayne@68: // assert(false) : kmer+"->\n"+bb+"\n"+AminoAcid.kmerToStringAA(kmer, k); jpayne@68: }else if(NUMERIC_DUMP){ jpayne@68: bb.append(Long.toHexString(kmer)); jpayne@68: bb.tab(); jpayne@68: bb.append(count); jpayne@68: }else{ jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: bb.append(lookup[x]); jpayne@68: } jpayne@68: bb.tab(); jpayne@68: bb.append(count); jpayne@68: } jpayne@68: return bb; jpayne@68: } jpayne@68: jpayne@68: public static final ByteBuilder toBytes(long kmer, int[] values, int k, ByteBuilder bb){ jpayne@68: byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); jpayne@68: int bits=(Shared.AMINO_IN ? 5 : 2); jpayne@68: int mask=(Shared.AMINO_IN ? 31 : 3); jpayne@68: if(FASTA_DUMP){ jpayne@68: bb.append('>'); jpayne@68: for(int i=0; i0){bb.append(',');} jpayne@68: bb.append(x); jpayne@68: } jpayne@68: bb.nl(); jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: bb.append(lookup[x]); jpayne@68: } jpayne@68: }else if(NUMERIC_DUMP){ jpayne@68: bb.append(Long.toHexString(kmer)); jpayne@68: bb.tab(); jpayne@68: for(int i=0; i0){bb.append(',');} jpayne@68: bb.append(x); jpayne@68: } jpayne@68: }else{ jpayne@68: for(int i=k-1; i>=0; i--){ jpayne@68: int x=(int)((kmer>>(bits*i))&mask); jpayne@68: bb.append(lookup[x]); jpayne@68: } jpayne@68: bb.tab(); jpayne@68: for(int i=0; i0){bb.append(',');} jpayne@68: bb.append(x); jpayne@68: } jpayne@68: } jpayne@68: return bb; jpayne@68: } jpayne@68: jpayne@68: // static void appendKmerText(long kmer, int count, int k, StringBuilder sb){ jpayne@68: // sb.setLength(0); jpayne@68: // toText(kmer, count, k, sb); jpayne@68: // sb.append('\n'); jpayne@68: // } jpayne@68: jpayne@68: static void appendKmerText(long kmer, int count, int k, ByteBuilder bb){ jpayne@68: bb.setLength(0); jpayne@68: toBytes(kmer, count, k, bb); jpayne@68: bb.nl(); jpayne@68: } jpayne@68: jpayne@68: jpayne@68: /** For buffered tables. */ jpayne@68: long flush(){ jpayne@68: throw new RuntimeException("Unsupported."); jpayne@68: } jpayne@68: jpayne@68: /** jpayne@68: * This allocates the data structures in multiple threads. Unfortunately, it does not lead to any speedup, at least for ARRAY type. jpayne@68: * @param ways jpayne@68: * @param tableType jpayne@68: * @param schedule jpayne@68: * @param mask jpayne@68: * @return The preallocated table jpayne@68: */ jpayne@68: public static final AbstractKmerTable[] preallocate(int ways, int tableType, int[] schedule, long mask){ jpayne@68: jpayne@68: final AbstractKmerTable[] tables=new AbstractKmerTable[ways]; jpayne@68: jpayne@68: { jpayne@68: shared.Timer tm=new shared.Timer(); jpayne@68: final int t=Tools.max(1, Tools.min(Shared.threads(), 2, ways)); //More than 2 still improves allocation time, but only slightly; ~25% faster at t=4. jpayne@68: final AllocThread[] allocators=new AllocThread[t]; jpayne@68: for(int i=0; i1; jpayne@68: tables=tables_; jpayne@68: } jpayne@68: jpayne@68: @Override jpayne@68: public void run(){ jpayne@68: //Initialize tables jpayne@68: long sum=0; jpayne@68: jpayne@68: // Shared.printMemory();} jpayne@68: for(int i=mod; i