Mercurial > repos > rliterman > csp2
view CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/current/kmer/AbstractKmerTable.java @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line source
package kmer; import java.util.concurrent.atomic.AtomicIntegerArray; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Lock; import dna.AminoAcid; import fileIO.ByteStreamWriter; import fileIO.TextStreamWriter; import shared.KillSwitch; import shared.Shared; import shared.Tools; import structures.ByteBuilder; import structures.SuperLongList; /** * @author Brian Bushnell * @date Oct 23, 2013 * */ public abstract class AbstractKmerTable { /*--------------------------------------------------------------*/ /*---------------- Abstract Methods ----------------*/ /*--------------------------------------------------------------*/ // /** Returns count */ // public final int increment(long kmer){return increment(kmer, 1);} /** Returns count */ public abstract int increment(final long kmer, final int incr); // /** Returns number of entries created */ // public final int incrementAndReturnNumCreated(final long kmer){return incrementAndReturnNumCreated(kmer, 1);} /** Returns number of entries created. Incr must be positive. */ public abstract int incrementAndReturnNumCreated(final long kmer, final int incr); public abstract int set(long kmer, int value); // public abstract int set(long kmer, int[] vals); /** This is for IntList3 support with HashArrayHybridFast */ public abstract int set(long kmer, int[] vals, int vlen); /** Returns number of kmers added */ public abstract int setIfNotPresent(long kmer, int value); /** * Fetch the value associated with a kmer. * @param kmer * @return A value. -1 means the kmer was not present. */ public abstract int getValue(long kmer); /** * Fetch the values associated with a kmer. * @param kmer * @param singleton A blank array of length 1. * @return An array filled with values. Values of -1 are invalid. */ public abstract int[] getValues(long kmer, int[] singleton); public abstract boolean contains(long kmer); public final boolean contains(long kmer, int v){ assert(TESTMODE); int[] set=getValues(kmer, new int[] {-1}); if(set==null){return false;} for(int s : set){ if(s==-1){break;} if(s==v){return true;} } return false; } public final boolean contains(long kmer, int[] vals){ assert(TESTMODE); int[] set=getValues(kmer, new int[] {-1}); if(set==null){return false;} boolean success=true; for(int v : vals){ if(v==-1){break;} success=false; for(int s : set){ if(s==v){ success=true; break; } } if(!success){break;} } return success; } public abstract void rebalance(); public abstract long size(); public abstract int arrayLength(); public abstract boolean canRebalance(); public abstract boolean dumpKmersAsText(TextStreamWriter tsw, int k, int mincount, int maxcount); public abstract boolean dumpKmersAsBytes(ByteStreamWriter bsw, int k, int mincount, int maxcount, AtomicLong remaining); public abstract boolean dumpKmersAsBytes_MT(final ByteStreamWriter bsw, final ByteBuilder bb, final int k, final int mincount, int maxcount, AtomicLong remaining); public abstract void fillHistogram(long[] ca, int max); public abstract void fillHistogram(SuperLongList sll); public abstract void countGC(long[] gcCounts, int max); public static final int gc(long kmer){ int gc=0; while(kmer>0){ long x=kmer&3; kmer>>>=2; if(x==1 || x==2){gc++;} } return gc; } abstract Object get(long kmer); abstract void resize(); abstract boolean canResize(); /** * Removes entries with a value of the limit or less. * Rehashes the remainder. * @return Number removed. */ abstract long regenerate(int limit); final void lock(){getLock().lock();} final void unlock(){getLock().unlock();} final boolean tryLock(){return getLock().tryLock();} Lock getLock(){ throw new RuntimeException("Unimplemented."); } /*--------------------------------------------------------------*/ /*--------------- Allocation Methods ----------------*/ /*--------------------------------------------------------------*/ final static AtomicIntegerArray allocAtomicInt(int len){ return KillSwitch.allocAtomicInt(len); } final static long[] allocLong1D(int len){ return KillSwitch.allocLong1D(len); } final static long[][] allocLong2D(int mult, int len){ return KillSwitch.allocLong2D(mult, len); } final static int[] allocInt1D(int len){ return KillSwitch.allocInt1D(len); } final static int[][] allocInt2D(int len){ return KillSwitch.allocInt2D(len); } final static KmerNode[] allocKmerNodeArray(int len){ KmerNode[] ret=null; try { ret=new KmerNode[len]; } catch (OutOfMemoryError e) { synchronized(killMessage){ e.printStackTrace(); System.err.println(killMessage); // Shared.printMemory(); KillSwitch.killSilent(); } } return ret; } /*--------------------------------------------------------------*/ /*--------------- Ownership Methods ----------------*/ /*--------------------------------------------------------------*/ /** Set the thread owning this kmer. Return the new owner. * Will only change the owner if newOwner is greater than current owner. */ public abstract int setOwner(long kmer, int newOwner); /** Reset owner to -1 if this is the current owner. */ public abstract boolean clearOwner(long kmer, int owner); /** Return the thread ID owning this kmer, or -1. */ public abstract int getOwner(long kmer); /** Create data structures needed for ownership representation */ public abstract void initializeOwnership(); /** Eliminate ownership data structures or set them to -1. */ public abstract void clearOwnership(); /*--------------------------------------------------------------*/ /*---------------- Methods ----------------*/ /*--------------------------------------------------------------*/ public static final StringBuilder toText(long kmer, int k){ byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); int bits=(Shared.AMINO_IN ? 5 : 2); int mask=(Shared.AMINO_IN ? 31 : 3); StringBuilder sb=new StringBuilder(k); for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); sb.append((char)lookup[x]); } return sb; } static final StringBuilder toText(long kmer, int count, int k){ StringBuilder sb=new StringBuilder(k+10); return toText(kmer, count, k, sb); } static final ByteBuilder toBytes(long kmer, int count, int k){ ByteBuilder bb=new ByteBuilder(k+10); return toBytes(kmer, count, k, bb); } static final StringBuilder toText(long kmer, int[] values, int k){ StringBuilder sb=new StringBuilder(k+10); return toText(kmer, values, k, sb); } static final ByteBuilder toBytes(long kmer, int[] values, int k){ ByteBuilder bb=new ByteBuilder(k+10); return toBytes(kmer, values, k, bb); } static final StringBuilder toText(long kmer, int count, int k, StringBuilder sb){ byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); int bits=(Shared.AMINO_IN ? 5 : 2); int mask=(Shared.AMINO_IN ? 31 : 3); if(FASTA_DUMP){ sb.append('>'); sb.append(count); sb.append('\n'); for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); sb.append((char)lookup[x]); } }else{ for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); sb.append((char)lookup[x]); } sb.append('\t'); sb.append(count); } return sb; } static final StringBuilder toText(long kmer, int[] values, int k, StringBuilder sb){ byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); int bits=(Shared.AMINO_IN ? 5 : 2); int mask=(Shared.AMINO_IN ? 31 : 3); if(FASTA_DUMP){ sb.append('>'); for(int i=0; i<values.length; i++){ int x=values[i]; if(x==-1){break;} if(i>0){sb.append(',');} sb.append(x); } sb.append('\n'); for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); sb.append((char)lookup[x]); } }else{ for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); sb.append((char)lookup[x]); } sb.append('\t'); for(int i=0; i<values.length; i++){ int x=values[i]; if(x==-1){break;} if(i>0){sb.append(',');} sb.append(x); } } return sb; } public static final ByteBuilder toBytes(long kmer, long count, int k, ByteBuilder bb){ byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); int bits=(Shared.AMINO_IN ? 5 : 2); int mask=(Shared.AMINO_IN ? 31 : 3); if(FASTA_DUMP){ bb.append('>'); bb.append(count); bb.nl(); for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); bb.append(lookup[x]); } // assert(false) : kmer+"->\n"+bb+"\n"+AminoAcid.kmerToStringAA(kmer, k); }else if(NUMERIC_DUMP){ bb.append(Long.toHexString(kmer)); bb.tab(); bb.append(count); }else{ for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); bb.append(lookup[x]); } bb.tab(); bb.append(count); } return bb; } public static final ByteBuilder toBytes(long kmer, int[] values, int k, ByteBuilder bb){ byte[] lookup=(Shared.AMINO_IN ? AminoAcid.numberToAcid : AminoAcid.numberToBase); int bits=(Shared.AMINO_IN ? 5 : 2); int mask=(Shared.AMINO_IN ? 31 : 3); if(FASTA_DUMP){ bb.append('>'); for(int i=0; i<values.length; i++){ int x=values[i]; if(x==-1){break;} if(i>0){bb.append(',');} bb.append(x); } bb.nl(); for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); bb.append(lookup[x]); } }else if(NUMERIC_DUMP){ bb.append(Long.toHexString(kmer)); bb.tab(); for(int i=0; i<values.length; i++){ int x=values[i]; if(x==-1){break;} if(i>0){bb.append(',');} bb.append(x); } }else{ for(int i=k-1; i>=0; i--){ int x=(int)((kmer>>(bits*i))&mask); bb.append(lookup[x]); } bb.tab(); for(int i=0; i<values.length; i++){ int x=values[i]; if(x==-1){break;} if(i>0){bb.append(',');} bb.append(x); } } return bb; } // static void appendKmerText(long kmer, int count, int k, StringBuilder sb){ // sb.setLength(0); // toText(kmer, count, k, sb); // sb.append('\n'); // } static void appendKmerText(long kmer, int count, int k, ByteBuilder bb){ bb.setLength(0); toBytes(kmer, count, k, bb); bb.nl(); } /** For buffered tables. */ long flush(){ throw new RuntimeException("Unsupported."); } /** * This allocates the data structures in multiple threads. Unfortunately, it does not lead to any speedup, at least for ARRAY type. * @param ways * @param tableType * @param schedule * @param mask * @return The preallocated table */ public static final AbstractKmerTable[] preallocate(int ways, int tableType, int[] schedule, long mask){ final AbstractKmerTable[] tables=new AbstractKmerTable[ways]; { shared.Timer tm=new shared.Timer(); final int t=Tools.max(1, Tools.min(Shared.threads(), 2, ways)); //More than 2 still improves allocation time, but only slightly; ~25% faster at t=4. final AllocThread[] allocators=new AllocThread[t]; for(int i=0; i<t; i++){ allocators[i]=new AllocThread(tableType, schedule, i, t, mask, tables); } for(AllocThread at : allocators){at.start();} for(AllocThread at : allocators){ while(at.getState()!=Thread.State.TERMINATED){ try { at.join(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } tm.stop(); if(AbstractKmerTableSet.DISPLAY_PROGRESS){System.err.println(tm);} } synchronized(tables){ for(int i=0; i<tables.length; i++){ final AbstractKmerTable akt=tables[i]; if(akt==null){ throw new RuntimeException("KmerTable allocation failed, probably due to lack of RAM: "+i+", "+tables.length); } } } return tables; } /*--------------------------------------------------------------*/ /*---------------- Nested Classes ----------------*/ /*--------------------------------------------------------------*/ private static class AllocThread extends Thread{ AllocThread(int type_, int[] schedule_, int mod_, int div_, long mask_, AbstractKmerTable[] tables_){ type=type_; schedule=schedule_; size=schedule[0]; mod=mod_; div=div_; mask=mask_; growable=schedule.length>1; tables=tables_; } @Override public void run(){ //Initialize tables long sum=0; // Shared.printMemory();} for(int i=mod; i<tables.length; i+=div){ // System.err.println("T"+i+" allocating "+i); final AbstractKmerTable akt; if(type==FOREST1D){ akt=new HashForest(size, growable, false); }else if(type==TABLE){ akt=new KmerTable(size, growable); }else if(type==ARRAY1D){ akt=new HashArray1D(schedule, mask); // long len=((HashArray1D)akt).arrayLength(); // long mem=((HashArray1D)akt).calcMem(); // sum+=mem; // akt=new HashArray1D(size, -1, mask, growable);//TODO: Set maxSize }else if(type==NODE1D){ throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); // akt=new KmerNode2(-1, 0); }else if(type==FOREST2D){ akt=new HashForest(size, growable, true); }else if(type==TABLE2D){ throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); }else if(type==ARRAY2D){ akt=new HashArray2D(schedule, mask); // akt=new HashArray2D(size, -1, mask, growable);//TODO: Set maxSize }else if(type==NODE2D){ throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); // akt=new KmerNode(-1, 0); }else if(type==ARRAYH){ akt=new HashArrayHybrid(schedule, mask); // akt=new HashArrayHybrid(size, -1, mask, growable);//TODO: Set maxSize }else if(type==ARRAYHF){ akt=new HashArrayHybridFast(schedule, mask); // akt=new HashArrayHybrid(size, -1, mask, growable);//TODO: Set maxSize }else{ throw new RuntimeException("Must use forest, table, or array data structure. Type="+type); } synchronized(tables){ tables[i]=akt; } // System.err.println("T"+i+" allocated "+i); // if(i%100==0){ // System.err.println("Allocated "+(sum/1000000)+"MB"); // Shared.printMemory(); // } } // Shared.printMemory(); // assert(false) : ("Allocated "+(sum/1000000)+"MB"); } private final int type; private final int[] schedule; private final int size; private final int mod; private final int div; private final long mask; private final boolean growable; final AbstractKmerTable[] tables; } public Walker walk() { throw new RuntimeException("Unimplemented"); } /*--------------------------------------------------------------*/ /*---------------- Fields ----------------*/ /*--------------------------------------------------------------*/ public static boolean FASTA_DUMP=true; public static boolean NUMERIC_DUMP=false; public static boolean TWO_PASS_RESIZE=false; public static final boolean verbose=false; public static final boolean TESTMODE=false; //123 SLOW! public static final int UNKNOWN=0, ARRAY1D=1, FOREST1D=2, TABLE=3, NODE1D=4, ARRAY2D=5, FOREST2D=6, TABLE2D=7, NODE2D=8, ARRAYH=9, ARRAYHF=10; public static final int NOT_PRESENT=-1, HASH_COLLISION=-2; public static final int NO_OWNER=-1; private static final String killMessage=new String("\nThis program ran out of memory. Try increasing the -Xmx flag and setting prealloc."); }