Mercurial > repos > rliterman > csp2
annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/MinHashHeap.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
rev | line source |
---|---|
jpayne@69 | 1 #ifndef HashHeapCounted_h |
jpayne@69 | 2 #define HashHeapCounted_h |
jpayne@69 | 3 |
jpayne@69 | 4 #include "HashList.h" |
jpayne@69 | 5 #include "HashPriorityQueue.h" |
jpayne@69 | 6 #include "HashSet.h" |
jpayne@69 | 7 #include <math.h> |
jpayne@69 | 8 #include "bloom_filter.hpp" |
jpayne@69 | 9 |
jpayne@69 | 10 class MinHashHeap |
jpayne@69 | 11 { |
jpayne@69 | 12 public: |
jpayne@69 | 13 |
jpayne@69 | 14 MinHashHeap(bool use64New, uint64_t cardinalityMaximumNew, uint64_t multiplicityMinimumNew = 1, uint64_t memoryBoundBytes = 0); |
jpayne@69 | 15 ~MinHashHeap(); |
jpayne@69 | 16 void computeStats(); |
jpayne@69 | 17 void clear(); |
jpayne@69 | 18 double estimateMultiplicity() const; |
jpayne@69 | 19 double estimateSetSize() const; |
jpayne@69 | 20 void toCounts(std::vector<uint32_t> & counts) const; |
jpayne@69 | 21 void toHashList(HashList & hashList) const; |
jpayne@69 | 22 void tryInsert(hash_u hash); |
jpayne@69 | 23 |
jpayne@69 | 24 private: |
jpayne@69 | 25 |
jpayne@69 | 26 bool use64; |
jpayne@69 | 27 |
jpayne@69 | 28 HashSet hashes; |
jpayne@69 | 29 HashPriorityQueue hashesQueue; |
jpayne@69 | 30 |
jpayne@69 | 31 HashSet hashesPending; |
jpayne@69 | 32 HashPriorityQueue hashesQueuePending; |
jpayne@69 | 33 |
jpayne@69 | 34 uint64_t cardinalityMaximum; |
jpayne@69 | 35 uint64_t multiplicityMinimum; |
jpayne@69 | 36 |
jpayne@69 | 37 uint64_t multiplicitySum; |
jpayne@69 | 38 |
jpayne@69 | 39 bloom_filter * bloomFilter; |
jpayne@69 | 40 |
jpayne@69 | 41 uint64_t kmersTotal; |
jpayne@69 | 42 uint64_t kmersUsed; |
jpayne@69 | 43 }; |
jpayne@69 | 44 |
jpayne@69 | 45 inline double MinHashHeap::estimateMultiplicity() const {return hashes.size() ? (double)multiplicitySum / hashes.size() : 0;} |
jpayne@69 | 46 inline double MinHashHeap::estimateSetSize() const {return hashes.size() ? pow(2.0, use64 ? 64.0 : 32.0) * (double)hashes.size() / (use64 ? (double)hashesQueue.top().hash64 : (double)hashesQueue.top().hash32) : 0;} |
jpayne@69 | 47 inline void MinHashHeap::toHashList(HashList & hashList) const {hashes.toHashList(hashList);} |
jpayne@69 | 48 inline void MinHashHeap::toCounts(std::vector<uint32_t> & counts) const {hashes.toCounts(counts);} |
jpayne@69 | 49 |
jpayne@69 | 50 #endif |