Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/CommandScreen.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/CommandScreen.h Tue Mar 18 17:55:14 2025 -0400 @@ -0,0 +1,171 @@ +// Copyright © 2015, Battelle National Biodefense Institute (BNBI); +// all rights reserved. Authored by: Brian Ondov, Todd Treangen, +// Sergey Koren, and Adam Phillippy +// +// See the LICENSE.txt file included with this software for license information. + +#ifndef INCLUDED_CommandScreen +#define INCLUDED_CommandScreen + +#include "Command.h" +#include "Sketch.h" +#include <list> +#include <string> +#include <vector> +#include <atomic> +#include "robin_hood.h" +#include "MinHashHeap.h" + +namespace mash { + +struct HashTableEntry +{ + HashTableEntry() : count(0) {} + + uint32_t count; + robin_hood::unordered_set<uint64_t> indices; +}; + +//typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable; +typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable; + +static const robin_hood::unordered_map< std::string, char > codons = +{ + {"AAA", 'K'}, + {"AAC", 'N'}, + {"AAG", 'K'}, + {"AAT", 'N'}, + {"ACA", 'T'}, + {"ACC", 'T'}, + {"ACG", 'T'}, + {"ACT", 'T'}, + {"AGA", 'R'}, + {"AGC", 'S'}, + {"AGG", 'R'}, + {"AGT", 'S'}, + {"ATA", 'I'}, + {"ATC", 'I'}, + {"ATG", 'M'}, + {"ATT", 'I'}, + {"CAA", 'Q'}, + {"CAC", 'H'}, + {"CAG", 'Q'}, + {"CAT", 'H'}, + {"CCA", 'P'}, + {"CCC", 'P'}, + {"CCG", 'P'}, + {"CCT", 'P'}, + {"CGA", 'R'}, + {"CGC", 'R'}, + {"CGG", 'R'}, + {"CGT", 'R'}, + {"CTA", 'L'}, + {"CTC", 'L'}, + {"CTG", 'L'}, + {"CTT", 'L'}, + {"GAA", 'E'}, + {"GAC", 'D'}, + {"GAG", 'E'}, + {"GAT", 'D'}, + {"GCA", 'A'}, + {"GCC", 'A'}, + {"GCG", 'A'}, + {"GCT", 'A'}, + {"GGA", 'G'}, + {"GGC", 'G'}, + {"GGG", 'G'}, + {"GGT", 'G'}, + {"GTA", 'V'}, + {"GTC", 'V'}, + {"GTG", 'V'}, + {"GTT", 'V'}, + {"TAA", '*'}, + {"TAC", 'Y'}, + {"TAG", '*'}, + {"TAT", 'Y'}, + {"TCA", 'S'}, + {"TCC", 'S'}, + {"TCG", 'S'}, + {"TCT", 'S'}, + {"TGA", '*'}, + {"TGC", 'C'}, + {"TGG", 'W'}, + {"TGT", 'C'}, + {"TTA", 'L'}, + {"TTC", 'F'}, + {"TTG", 'L'}, + {"TTT", 'F'} +}; + +class CommandScreen : public Command +{ +public: + + struct HashInput + { + HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew) + : + hashCounts(hashCountsNew), + minHashHeap(minHashHeapNew), + seq(seqNew), + length(lengthNew), + parameters(parametersNew), + trans(transNew) + {} + + ~HashInput() + { + if ( seq != 0 ) + { + delete [] seq; + } + } + + std::string fileName; + + char * seq; + uint64_t length; + bool trans; + + Sketch::Parameters parameters; + robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts; + MinHashHeap * minHashHeap; + }; + + struct HashOutput + { + HashOutput(MinHashHeap * minHashHeapNew) + : + minHashHeap(minHashHeapNew) + {} + + MinHashHeap * minHashHeap; + }; + + CommandScreen(); + + int run() const; // override + +private: + + struct Reference + { + Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew) + : amerCount(amerCountNew), name(nameNew), comment(commentNew) {} + + uint64_t amerCount; + std::string name; + std::string comment; + }; +}; + +char aaFromCodon(const char * codon); +double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double kmerSpace); +CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input); +double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize); +void translate(const char * src, char * dst, uint64_t len); +void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps); + +} // namespace mash + +#endif