annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/MinHashHeap.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 #ifndef HashHeapCounted_h
jpayne@69 2 #define HashHeapCounted_h
jpayne@69 3
jpayne@69 4 #include "HashList.h"
jpayne@69 5 #include "HashPriorityQueue.h"
jpayne@69 6 #include "HashSet.h"
jpayne@69 7 #include <math.h>
jpayne@69 8 #include "bloom_filter.hpp"
jpayne@69 9
jpayne@69 10 class MinHashHeap
jpayne@69 11 {
jpayne@69 12 public:
jpayne@69 13
jpayne@69 14 MinHashHeap(bool use64New, uint64_t cardinalityMaximumNew, uint64_t multiplicityMinimumNew = 1, uint64_t memoryBoundBytes = 0);
jpayne@69 15 ~MinHashHeap();
jpayne@69 16 void computeStats();
jpayne@69 17 void clear();
jpayne@69 18 double estimateMultiplicity() const;
jpayne@69 19 double estimateSetSize() const;
jpayne@69 20 void toCounts(std::vector<uint32_t> & counts) const;
jpayne@69 21 void toHashList(HashList & hashList) const;
jpayne@69 22 void tryInsert(hash_u hash);
jpayne@69 23
jpayne@69 24 private:
jpayne@69 25
jpayne@69 26 bool use64;
jpayne@69 27
jpayne@69 28 HashSet hashes;
jpayne@69 29 HashPriorityQueue hashesQueue;
jpayne@69 30
jpayne@69 31 HashSet hashesPending;
jpayne@69 32 HashPriorityQueue hashesQueuePending;
jpayne@69 33
jpayne@69 34 uint64_t cardinalityMaximum;
jpayne@69 35 uint64_t multiplicityMinimum;
jpayne@69 36
jpayne@69 37 uint64_t multiplicitySum;
jpayne@69 38
jpayne@69 39 bloom_filter * bloomFilter;
jpayne@69 40
jpayne@69 41 uint64_t kmersTotal;
jpayne@69 42 uint64_t kmersUsed;
jpayne@69 43 };
jpayne@69 44
jpayne@69 45 inline double MinHashHeap::estimateMultiplicity() const {return hashes.size() ? (double)multiplicitySum / hashes.size() : 0;}
jpayne@69 46 inline double MinHashHeap::estimateSetSize() const {return hashes.size() ? pow(2.0, use64 ? 64.0 : 32.0) * (double)hashes.size() / (use64 ? (double)hashesQueue.top().hash64 : (double)hashesQueue.top().hash32) : 0;}
jpayne@69 47 inline void MinHashHeap::toHashList(HashList & hashList) const {hashes.toHashList(hashList);}
jpayne@69 48 inline void MinHashHeap::toCounts(std::vector<uint32_t> & counts) const {hashes.toCounts(counts);}
jpayne@69 49
jpayne@69 50 #endif