Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/mash/CommandScreen.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // Copyright © 2015, Battelle National Biodefense Institute (BNBI); | |
2 // all rights reserved. Authored by: Brian Ondov, Todd Treangen, | |
3 // Sergey Koren, and Adam Phillippy | |
4 // | |
5 // See the LICENSE.txt file included with this software for license information. | |
6 | |
7 #ifndef INCLUDED_CommandScreen | |
8 #define INCLUDED_CommandScreen | |
9 | |
10 #include "Command.h" | |
11 #include "Sketch.h" | |
12 #include <list> | |
13 #include <string> | |
14 #include <vector> | |
15 #include <atomic> | |
16 #include "robin_hood.h" | |
17 #include "MinHashHeap.h" | |
18 | |
19 namespace mash { | |
20 | |
21 struct HashTableEntry | |
22 { | |
23 HashTableEntry() : count(0) {} | |
24 | |
25 uint32_t count; | |
26 robin_hood::unordered_set<uint64_t> indices; | |
27 }; | |
28 | |
29 //typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable; | |
30 typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable; | |
31 | |
32 static const robin_hood::unordered_map< std::string, char > codons = | |
33 { | |
34 {"AAA", 'K'}, | |
35 {"AAC", 'N'}, | |
36 {"AAG", 'K'}, | |
37 {"AAT", 'N'}, | |
38 {"ACA", 'T'}, | |
39 {"ACC", 'T'}, | |
40 {"ACG", 'T'}, | |
41 {"ACT", 'T'}, | |
42 {"AGA", 'R'}, | |
43 {"AGC", 'S'}, | |
44 {"AGG", 'R'}, | |
45 {"AGT", 'S'}, | |
46 {"ATA", 'I'}, | |
47 {"ATC", 'I'}, | |
48 {"ATG", 'M'}, | |
49 {"ATT", 'I'}, | |
50 {"CAA", 'Q'}, | |
51 {"CAC", 'H'}, | |
52 {"CAG", 'Q'}, | |
53 {"CAT", 'H'}, | |
54 {"CCA", 'P'}, | |
55 {"CCC", 'P'}, | |
56 {"CCG", 'P'}, | |
57 {"CCT", 'P'}, | |
58 {"CGA", 'R'}, | |
59 {"CGC", 'R'}, | |
60 {"CGG", 'R'}, | |
61 {"CGT", 'R'}, | |
62 {"CTA", 'L'}, | |
63 {"CTC", 'L'}, | |
64 {"CTG", 'L'}, | |
65 {"CTT", 'L'}, | |
66 {"GAA", 'E'}, | |
67 {"GAC", 'D'}, | |
68 {"GAG", 'E'}, | |
69 {"GAT", 'D'}, | |
70 {"GCA", 'A'}, | |
71 {"GCC", 'A'}, | |
72 {"GCG", 'A'}, | |
73 {"GCT", 'A'}, | |
74 {"GGA", 'G'}, | |
75 {"GGC", 'G'}, | |
76 {"GGG", 'G'}, | |
77 {"GGT", 'G'}, | |
78 {"GTA", 'V'}, | |
79 {"GTC", 'V'}, | |
80 {"GTG", 'V'}, | |
81 {"GTT", 'V'}, | |
82 {"TAA", '*'}, | |
83 {"TAC", 'Y'}, | |
84 {"TAG", '*'}, | |
85 {"TAT", 'Y'}, | |
86 {"TCA", 'S'}, | |
87 {"TCC", 'S'}, | |
88 {"TCG", 'S'}, | |
89 {"TCT", 'S'}, | |
90 {"TGA", '*'}, | |
91 {"TGC", 'C'}, | |
92 {"TGG", 'W'}, | |
93 {"TGT", 'C'}, | |
94 {"TTA", 'L'}, | |
95 {"TTC", 'F'}, | |
96 {"TTG", 'L'}, | |
97 {"TTT", 'F'} | |
98 }; | |
99 | |
100 class CommandScreen : public Command | |
101 { | |
102 public: | |
103 | |
104 struct HashInput | |
105 { | |
106 HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew) | |
107 : | |
108 hashCounts(hashCountsNew), | |
109 minHashHeap(minHashHeapNew), | |
110 seq(seqNew), | |
111 length(lengthNew), | |
112 parameters(parametersNew), | |
113 trans(transNew) | |
114 {} | |
115 | |
116 ~HashInput() | |
117 { | |
118 if ( seq != 0 ) | |
119 { | |
120 delete [] seq; | |
121 } | |
122 } | |
123 | |
124 std::string fileName; | |
125 | |
126 char * seq; | |
127 uint64_t length; | |
128 bool trans; | |
129 | |
130 Sketch::Parameters parameters; | |
131 robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts; | |
132 MinHashHeap * minHashHeap; | |
133 }; | |
134 | |
135 struct HashOutput | |
136 { | |
137 HashOutput(MinHashHeap * minHashHeapNew) | |
138 : | |
139 minHashHeap(minHashHeapNew) | |
140 {} | |
141 | |
142 MinHashHeap * minHashHeap; | |
143 }; | |
144 | |
145 CommandScreen(); | |
146 | |
147 int run() const; // override | |
148 | |
149 private: | |
150 | |
151 struct Reference | |
152 { | |
153 Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew) | |
154 : amerCount(amerCountNew), name(nameNew), comment(commentNew) {} | |
155 | |
156 uint64_t amerCount; | |
157 std::string name; | |
158 std::string comment; | |
159 }; | |
160 }; | |
161 | |
162 char aaFromCodon(const char * codon); | |
163 double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double kmerSpace); | |
164 CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input); | |
165 double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize); | |
166 void translate(const char * src, char * dst, uint64_t len); | |
167 void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps); | |
168 | |
169 } // namespace mash | |
170 | |
171 #endif |