annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/CommandScreen.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // Copyright © 2015, Battelle National Biodefense Institute (BNBI);
jpayne@69 2 // all rights reserved. Authored by: Brian Ondov, Todd Treangen,
jpayne@69 3 // Sergey Koren, and Adam Phillippy
jpayne@69 4 //
jpayne@69 5 // See the LICENSE.txt file included with this software for license information.
jpayne@69 6
jpayne@69 7 #ifndef INCLUDED_CommandScreen
jpayne@69 8 #define INCLUDED_CommandScreen
jpayne@69 9
jpayne@69 10 #include "Command.h"
jpayne@69 11 #include "Sketch.h"
jpayne@69 12 #include <list>
jpayne@69 13 #include <string>
jpayne@69 14 #include <vector>
jpayne@69 15 #include <atomic>
jpayne@69 16 #include "robin_hood.h"
jpayne@69 17 #include "MinHashHeap.h"
jpayne@69 18
jpayne@69 19 namespace mash {
jpayne@69 20
jpayne@69 21 struct HashTableEntry
jpayne@69 22 {
jpayne@69 23 HashTableEntry() : count(0) {}
jpayne@69 24
jpayne@69 25 uint32_t count;
jpayne@69 26 robin_hood::unordered_set<uint64_t> indices;
jpayne@69 27 };
jpayne@69 28
jpayne@69 29 //typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
jpayne@69 30 typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;
jpayne@69 31
jpayne@69 32 static const robin_hood::unordered_map< std::string, char > codons =
jpayne@69 33 {
jpayne@69 34 {"AAA", 'K'},
jpayne@69 35 {"AAC", 'N'},
jpayne@69 36 {"AAG", 'K'},
jpayne@69 37 {"AAT", 'N'},
jpayne@69 38 {"ACA", 'T'},
jpayne@69 39 {"ACC", 'T'},
jpayne@69 40 {"ACG", 'T'},
jpayne@69 41 {"ACT", 'T'},
jpayne@69 42 {"AGA", 'R'},
jpayne@69 43 {"AGC", 'S'},
jpayne@69 44 {"AGG", 'R'},
jpayne@69 45 {"AGT", 'S'},
jpayne@69 46 {"ATA", 'I'},
jpayne@69 47 {"ATC", 'I'},
jpayne@69 48 {"ATG", 'M'},
jpayne@69 49 {"ATT", 'I'},
jpayne@69 50 {"CAA", 'Q'},
jpayne@69 51 {"CAC", 'H'},
jpayne@69 52 {"CAG", 'Q'},
jpayne@69 53 {"CAT", 'H'},
jpayne@69 54 {"CCA", 'P'},
jpayne@69 55 {"CCC", 'P'},
jpayne@69 56 {"CCG", 'P'},
jpayne@69 57 {"CCT", 'P'},
jpayne@69 58 {"CGA", 'R'},
jpayne@69 59 {"CGC", 'R'},
jpayne@69 60 {"CGG", 'R'},
jpayne@69 61 {"CGT", 'R'},
jpayne@69 62 {"CTA", 'L'},
jpayne@69 63 {"CTC", 'L'},
jpayne@69 64 {"CTG", 'L'},
jpayne@69 65 {"CTT", 'L'},
jpayne@69 66 {"GAA", 'E'},
jpayne@69 67 {"GAC", 'D'},
jpayne@69 68 {"GAG", 'E'},
jpayne@69 69 {"GAT", 'D'},
jpayne@69 70 {"GCA", 'A'},
jpayne@69 71 {"GCC", 'A'},
jpayne@69 72 {"GCG", 'A'},
jpayne@69 73 {"GCT", 'A'},
jpayne@69 74 {"GGA", 'G'},
jpayne@69 75 {"GGC", 'G'},
jpayne@69 76 {"GGG", 'G'},
jpayne@69 77 {"GGT", 'G'},
jpayne@69 78 {"GTA", 'V'},
jpayne@69 79 {"GTC", 'V'},
jpayne@69 80 {"GTG", 'V'},
jpayne@69 81 {"GTT", 'V'},
jpayne@69 82 {"TAA", '*'},
jpayne@69 83 {"TAC", 'Y'},
jpayne@69 84 {"TAG", '*'},
jpayne@69 85 {"TAT", 'Y'},
jpayne@69 86 {"TCA", 'S'},
jpayne@69 87 {"TCC", 'S'},
jpayne@69 88 {"TCG", 'S'},
jpayne@69 89 {"TCT", 'S'},
jpayne@69 90 {"TGA", '*'},
jpayne@69 91 {"TGC", 'C'},
jpayne@69 92 {"TGG", 'W'},
jpayne@69 93 {"TGT", 'C'},
jpayne@69 94 {"TTA", 'L'},
jpayne@69 95 {"TTC", 'F'},
jpayne@69 96 {"TTG", 'L'},
jpayne@69 97 {"TTT", 'F'}
jpayne@69 98 };
jpayne@69 99
jpayne@69 100 class CommandScreen : public Command
jpayne@69 101 {
jpayne@69 102 public:
jpayne@69 103
jpayne@69 104 struct HashInput
jpayne@69 105 {
jpayne@69 106 HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
jpayne@69 107 :
jpayne@69 108 hashCounts(hashCountsNew),
jpayne@69 109 minHashHeap(minHashHeapNew),
jpayne@69 110 seq(seqNew),
jpayne@69 111 length(lengthNew),
jpayne@69 112 parameters(parametersNew),
jpayne@69 113 trans(transNew)
jpayne@69 114 {}
jpayne@69 115
jpayne@69 116 ~HashInput()
jpayne@69 117 {
jpayne@69 118 if ( seq != 0 )
jpayne@69 119 {
jpayne@69 120 delete [] seq;
jpayne@69 121 }
jpayne@69 122 }
jpayne@69 123
jpayne@69 124 std::string fileName;
jpayne@69 125
jpayne@69 126 char * seq;
jpayne@69 127 uint64_t length;
jpayne@69 128 bool trans;
jpayne@69 129
jpayne@69 130 Sketch::Parameters parameters;
jpayne@69 131 robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
jpayne@69 132 MinHashHeap * minHashHeap;
jpayne@69 133 };
jpayne@69 134
jpayne@69 135 struct HashOutput
jpayne@69 136 {
jpayne@69 137 HashOutput(MinHashHeap * minHashHeapNew)
jpayne@69 138 :
jpayne@69 139 minHashHeap(minHashHeapNew)
jpayne@69 140 {}
jpayne@69 141
jpayne@69 142 MinHashHeap * minHashHeap;
jpayne@69 143 };
jpayne@69 144
jpayne@69 145 CommandScreen();
jpayne@69 146
jpayne@69 147 int run() const; // override
jpayne@69 148
jpayne@69 149 private:
jpayne@69 150
jpayne@69 151 struct Reference
jpayne@69 152 {
jpayne@69 153 Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew)
jpayne@69 154 : amerCount(amerCountNew), name(nameNew), comment(commentNew) {}
jpayne@69 155
jpayne@69 156 uint64_t amerCount;
jpayne@69 157 std::string name;
jpayne@69 158 std::string comment;
jpayne@69 159 };
jpayne@69 160 };
jpayne@69 161
jpayne@69 162 char aaFromCodon(const char * codon);
jpayne@69 163 double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double kmerSpace);
jpayne@69 164 CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
jpayne@69 165 double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
jpayne@69 166 void translate(const char * src, char * dst, uint64_t len);
jpayne@69 167 void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);
jpayne@69 168
jpayne@69 169 } // namespace mash
jpayne@69 170
jpayne@69 171 #endif