jpayne@69: #ifndef HashHeapCounted_h jpayne@69: #define HashHeapCounted_h jpayne@69: jpayne@69: #include "HashList.h" jpayne@69: #include "HashPriorityQueue.h" jpayne@69: #include "HashSet.h" jpayne@69: #include jpayne@69: #include "bloom_filter.hpp" jpayne@69: jpayne@69: class MinHashHeap jpayne@69: { jpayne@69: public: jpayne@69: jpayne@69: MinHashHeap(bool use64New, uint64_t cardinalityMaximumNew, uint64_t multiplicityMinimumNew = 1, uint64_t memoryBoundBytes = 0); jpayne@69: ~MinHashHeap(); jpayne@69: void computeStats(); jpayne@69: void clear(); jpayne@69: double estimateMultiplicity() const; jpayne@69: double estimateSetSize() const; jpayne@69: void toCounts(std::vector & counts) const; jpayne@69: void toHashList(HashList & hashList) const; jpayne@69: void tryInsert(hash_u hash); jpayne@69: jpayne@69: private: jpayne@69: jpayne@69: bool use64; jpayne@69: jpayne@69: HashSet hashes; jpayne@69: HashPriorityQueue hashesQueue; jpayne@69: jpayne@69: HashSet hashesPending; jpayne@69: HashPriorityQueue hashesQueuePending; jpayne@69: jpayne@69: uint64_t cardinalityMaximum; jpayne@69: uint64_t multiplicityMinimum; jpayne@69: jpayne@69: uint64_t multiplicitySum; jpayne@69: jpayne@69: bloom_filter * bloomFilter; jpayne@69: jpayne@69: uint64_t kmersTotal; jpayne@69: uint64_t kmersUsed; jpayne@69: }; jpayne@69: jpayne@69: inline double MinHashHeap::estimateMultiplicity() const {return hashes.size() ? (double)multiplicitySum / hashes.size() : 0;} jpayne@69: inline double MinHashHeap::estimateSetSize() const {return hashes.size() ? pow(2.0, use64 ? 64.0 : 32.0) * (double)hashes.size() / (use64 ? (double)hashesQueue.top().hash64 : (double)hashesQueue.top().hash32) : 0;} jpayne@69: inline void MinHashHeap::toHashList(HashList & hashList) const {hashes.toHashList(hashList);} jpayne@69: inline void MinHashHeap::toCounts(std::vector & counts) const {hashes.toCounts(counts);} jpayne@69: jpayne@69: #endif