comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/CommandScreen.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 // Copyright © 2015, Battelle National Biodefense Institute (BNBI);
2 // all rights reserved. Authored by: Brian Ondov, Todd Treangen,
3 // Sergey Koren, and Adam Phillippy
4 //
5 // See the LICENSE.txt file included with this software for license information.
6
7 #ifndef INCLUDED_CommandScreen
8 #define INCLUDED_CommandScreen
9
10 #include "Command.h"
11 #include "Sketch.h"
12 #include <list>
13 #include <string>
14 #include <vector>
15 #include <atomic>
16 #include "robin_hood.h"
17 #include "MinHashHeap.h"
18
19 namespace mash {
20
21 struct HashTableEntry
22 {
23 HashTableEntry() : count(0) {}
24
25 uint32_t count;
26 robin_hood::unordered_set<uint64_t> indices;
27 };
28
29 //typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
30 typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;
31
32 static const robin_hood::unordered_map< std::string, char > codons =
33 {
34 {"AAA", 'K'},
35 {"AAC", 'N'},
36 {"AAG", 'K'},
37 {"AAT", 'N'},
38 {"ACA", 'T'},
39 {"ACC", 'T'},
40 {"ACG", 'T'},
41 {"ACT", 'T'},
42 {"AGA", 'R'},
43 {"AGC", 'S'},
44 {"AGG", 'R'},
45 {"AGT", 'S'},
46 {"ATA", 'I'},
47 {"ATC", 'I'},
48 {"ATG", 'M'},
49 {"ATT", 'I'},
50 {"CAA", 'Q'},
51 {"CAC", 'H'},
52 {"CAG", 'Q'},
53 {"CAT", 'H'},
54 {"CCA", 'P'},
55 {"CCC", 'P'},
56 {"CCG", 'P'},
57 {"CCT", 'P'},
58 {"CGA", 'R'},
59 {"CGC", 'R'},
60 {"CGG", 'R'},
61 {"CGT", 'R'},
62 {"CTA", 'L'},
63 {"CTC", 'L'},
64 {"CTG", 'L'},
65 {"CTT", 'L'},
66 {"GAA", 'E'},
67 {"GAC", 'D'},
68 {"GAG", 'E'},
69 {"GAT", 'D'},
70 {"GCA", 'A'},
71 {"GCC", 'A'},
72 {"GCG", 'A'},
73 {"GCT", 'A'},
74 {"GGA", 'G'},
75 {"GGC", 'G'},
76 {"GGG", 'G'},
77 {"GGT", 'G'},
78 {"GTA", 'V'},
79 {"GTC", 'V'},
80 {"GTG", 'V'},
81 {"GTT", 'V'},
82 {"TAA", '*'},
83 {"TAC", 'Y'},
84 {"TAG", '*'},
85 {"TAT", 'Y'},
86 {"TCA", 'S'},
87 {"TCC", 'S'},
88 {"TCG", 'S'},
89 {"TCT", 'S'},
90 {"TGA", '*'},
91 {"TGC", 'C'},
92 {"TGG", 'W'},
93 {"TGT", 'C'},
94 {"TTA", 'L'},
95 {"TTC", 'F'},
96 {"TTG", 'L'},
97 {"TTT", 'F'}
98 };
99
100 class CommandScreen : public Command
101 {
102 public:
103
104 struct HashInput
105 {
106 HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
107 :
108 hashCounts(hashCountsNew),
109 minHashHeap(minHashHeapNew),
110 seq(seqNew),
111 length(lengthNew),
112 parameters(parametersNew),
113 trans(transNew)
114 {}
115
116 ~HashInput()
117 {
118 if ( seq != 0 )
119 {
120 delete [] seq;
121 }
122 }
123
124 std::string fileName;
125
126 char * seq;
127 uint64_t length;
128 bool trans;
129
130 Sketch::Parameters parameters;
131 robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
132 MinHashHeap * minHashHeap;
133 };
134
135 struct HashOutput
136 {
137 HashOutput(MinHashHeap * minHashHeapNew)
138 :
139 minHashHeap(minHashHeapNew)
140 {}
141
142 MinHashHeap * minHashHeap;
143 };
144
145 CommandScreen();
146
147 int run() const; // override
148
149 private:
150
151 struct Reference
152 {
153 Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew)
154 : amerCount(amerCountNew), name(nameNew), comment(commentNew) {}
155
156 uint64_t amerCount;
157 std::string name;
158 std::string comment;
159 };
160 };
161
162 char aaFromCodon(const char * codon);
163 double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double kmerSpace);
164 CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
165 double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
166 void translate(const char * src, char * dst, uint64_t len);
167 void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);
168
169 } // namespace mash
170
171 #endif