Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/MinHashHeap.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #ifndef HashHeapCounted_h | |
2 #define HashHeapCounted_h | |
3 | |
4 #include "HashList.h" | |
5 #include "HashPriorityQueue.h" | |
6 #include "HashSet.h" | |
7 #include <math.h> | |
8 #include "bloom_filter.hpp" | |
9 | |
10 class MinHashHeap | |
11 { | |
12 public: | |
13 | |
14 MinHashHeap(bool use64New, uint64_t cardinalityMaximumNew, uint64_t multiplicityMinimumNew = 1, uint64_t memoryBoundBytes = 0); | |
15 ~MinHashHeap(); | |
16 void computeStats(); | |
17 void clear(); | |
18 double estimateMultiplicity() const; | |
19 double estimateSetSize() const; | |
20 void toCounts(std::vector<uint32_t> & counts) const; | |
21 void toHashList(HashList & hashList) const; | |
22 void tryInsert(hash_u hash); | |
23 | |
24 private: | |
25 | |
26 bool use64; | |
27 | |
28 HashSet hashes; | |
29 HashPriorityQueue hashesQueue; | |
30 | |
31 HashSet hashesPending; | |
32 HashPriorityQueue hashesQueuePending; | |
33 | |
34 uint64_t cardinalityMaximum; | |
35 uint64_t multiplicityMinimum; | |
36 | |
37 uint64_t multiplicitySum; | |
38 | |
39 bloom_filter * bloomFilter; | |
40 | |
41 uint64_t kmersTotal; | |
42 uint64_t kmersUsed; | |
43 }; | |
44 | |
45 inline double MinHashHeap::estimateMultiplicity() const {return hashes.size() ? (double)multiplicitySum / hashes.size() : 0;} | |
46 inline double MinHashHeap::estimateSetSize() const {return hashes.size() ? pow(2.0, use64 ? 64.0 : 32.0) * (double)hashes.size() / (use64 ? (double)hashesQueue.top().hash64 : (double)hashesQueue.top().hash32) : 0;} | |
47 inline void MinHashHeap::toHashList(HashList & hashList) const {hashes.toHashList(hashList);} | |
48 inline void MinHashHeap::toCounts(std::vector<uint32_t> & counts) const {hashes.toCounts(counts);} | |
49 | |
50 #endif |