jpayne@69
|
1 // Copyright © 2015, Battelle National Biodefense Institute (BNBI);
|
jpayne@69
|
2 // all rights reserved. Authored by: Brian Ondov, Todd Treangen,
|
jpayne@69
|
3 // Sergey Koren, and Adam Phillippy
|
jpayne@69
|
4 //
|
jpayne@69
|
5 // See the LICENSE.txt file included with this software for license information.
|
jpayne@69
|
6
|
jpayne@69
|
7 #ifndef INCLUDED_CommandScreen
|
jpayne@69
|
8 #define INCLUDED_CommandScreen
|
jpayne@69
|
9
|
jpayne@69
|
10 #include "Command.h"
|
jpayne@69
|
11 #include "Sketch.h"
|
jpayne@69
|
12 #include <list>
|
jpayne@69
|
13 #include <string>
|
jpayne@69
|
14 #include <vector>
|
jpayne@69
|
15 #include <atomic>
|
jpayne@69
|
16 #include "robin_hood.h"
|
jpayne@69
|
17 #include "MinHashHeap.h"
|
jpayne@69
|
18
|
jpayne@69
|
19 namespace mash {
|
jpayne@69
|
20
|
jpayne@69
|
21 struct HashTableEntry
|
jpayne@69
|
22 {
|
jpayne@69
|
23 HashTableEntry() : count(0) {}
|
jpayne@69
|
24
|
jpayne@69
|
25 uint32_t count;
|
jpayne@69
|
26 robin_hood::unordered_set<uint64_t> indices;
|
jpayne@69
|
27 };
|
jpayne@69
|
28
|
jpayne@69
|
29 //typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
|
jpayne@69
|
30 typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;
|
jpayne@69
|
31
|
jpayne@69
|
32 static const robin_hood::unordered_map< std::string, char > codons =
|
jpayne@69
|
33 {
|
jpayne@69
|
34 {"AAA", 'K'},
|
jpayne@69
|
35 {"AAC", 'N'},
|
jpayne@69
|
36 {"AAG", 'K'},
|
jpayne@69
|
37 {"AAT", 'N'},
|
jpayne@69
|
38 {"ACA", 'T'},
|
jpayne@69
|
39 {"ACC", 'T'},
|
jpayne@69
|
40 {"ACG", 'T'},
|
jpayne@69
|
41 {"ACT", 'T'},
|
jpayne@69
|
42 {"AGA", 'R'},
|
jpayne@69
|
43 {"AGC", 'S'},
|
jpayne@69
|
44 {"AGG", 'R'},
|
jpayne@69
|
45 {"AGT", 'S'},
|
jpayne@69
|
46 {"ATA", 'I'},
|
jpayne@69
|
47 {"ATC", 'I'},
|
jpayne@69
|
48 {"ATG", 'M'},
|
jpayne@69
|
49 {"ATT", 'I'},
|
jpayne@69
|
50 {"CAA", 'Q'},
|
jpayne@69
|
51 {"CAC", 'H'},
|
jpayne@69
|
52 {"CAG", 'Q'},
|
jpayne@69
|
53 {"CAT", 'H'},
|
jpayne@69
|
54 {"CCA", 'P'},
|
jpayne@69
|
55 {"CCC", 'P'},
|
jpayne@69
|
56 {"CCG", 'P'},
|
jpayne@69
|
57 {"CCT", 'P'},
|
jpayne@69
|
58 {"CGA", 'R'},
|
jpayne@69
|
59 {"CGC", 'R'},
|
jpayne@69
|
60 {"CGG", 'R'},
|
jpayne@69
|
61 {"CGT", 'R'},
|
jpayne@69
|
62 {"CTA", 'L'},
|
jpayne@69
|
63 {"CTC", 'L'},
|
jpayne@69
|
64 {"CTG", 'L'},
|
jpayne@69
|
65 {"CTT", 'L'},
|
jpayne@69
|
66 {"GAA", 'E'},
|
jpayne@69
|
67 {"GAC", 'D'},
|
jpayne@69
|
68 {"GAG", 'E'},
|
jpayne@69
|
69 {"GAT", 'D'},
|
jpayne@69
|
70 {"GCA", 'A'},
|
jpayne@69
|
71 {"GCC", 'A'},
|
jpayne@69
|
72 {"GCG", 'A'},
|
jpayne@69
|
73 {"GCT", 'A'},
|
jpayne@69
|
74 {"GGA", 'G'},
|
jpayne@69
|
75 {"GGC", 'G'},
|
jpayne@69
|
76 {"GGG", 'G'},
|
jpayne@69
|
77 {"GGT", 'G'},
|
jpayne@69
|
78 {"GTA", 'V'},
|
jpayne@69
|
79 {"GTC", 'V'},
|
jpayne@69
|
80 {"GTG", 'V'},
|
jpayne@69
|
81 {"GTT", 'V'},
|
jpayne@69
|
82 {"TAA", '*'},
|
jpayne@69
|
83 {"TAC", 'Y'},
|
jpayne@69
|
84 {"TAG", '*'},
|
jpayne@69
|
85 {"TAT", 'Y'},
|
jpayne@69
|
86 {"TCA", 'S'},
|
jpayne@69
|
87 {"TCC", 'S'},
|
jpayne@69
|
88 {"TCG", 'S'},
|
jpayne@69
|
89 {"TCT", 'S'},
|
jpayne@69
|
90 {"TGA", '*'},
|
jpayne@69
|
91 {"TGC", 'C'},
|
jpayne@69
|
92 {"TGG", 'W'},
|
jpayne@69
|
93 {"TGT", 'C'},
|
jpayne@69
|
94 {"TTA", 'L'},
|
jpayne@69
|
95 {"TTC", 'F'},
|
jpayne@69
|
96 {"TTG", 'L'},
|
jpayne@69
|
97 {"TTT", 'F'}
|
jpayne@69
|
98 };
|
jpayne@69
|
99
|
jpayne@69
|
100 class CommandScreen : public Command
|
jpayne@69
|
101 {
|
jpayne@69
|
102 public:
|
jpayne@69
|
103
|
jpayne@69
|
104 struct HashInput
|
jpayne@69
|
105 {
|
jpayne@69
|
106 HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
|
jpayne@69
|
107 :
|
jpayne@69
|
108 hashCounts(hashCountsNew),
|
jpayne@69
|
109 minHashHeap(minHashHeapNew),
|
jpayne@69
|
110 seq(seqNew),
|
jpayne@69
|
111 length(lengthNew),
|
jpayne@69
|
112 parameters(parametersNew),
|
jpayne@69
|
113 trans(transNew)
|
jpayne@69
|
114 {}
|
jpayne@69
|
115
|
jpayne@69
|
116 ~HashInput()
|
jpayne@69
|
117 {
|
jpayne@69
|
118 if ( seq != 0 )
|
jpayne@69
|
119 {
|
jpayne@69
|
120 delete [] seq;
|
jpayne@69
|
121 }
|
jpayne@69
|
122 }
|
jpayne@69
|
123
|
jpayne@69
|
124 std::string fileName;
|
jpayne@69
|
125
|
jpayne@69
|
126 char * seq;
|
jpayne@69
|
127 uint64_t length;
|
jpayne@69
|
128 bool trans;
|
jpayne@69
|
129
|
jpayne@69
|
130 Sketch::Parameters parameters;
|
jpayne@69
|
131 robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
|
jpayne@69
|
132 MinHashHeap * minHashHeap;
|
jpayne@69
|
133 };
|
jpayne@69
|
134
|
jpayne@69
|
135 struct HashOutput
|
jpayne@69
|
136 {
|
jpayne@69
|
137 HashOutput(MinHashHeap * minHashHeapNew)
|
jpayne@69
|
138 :
|
jpayne@69
|
139 minHashHeap(minHashHeapNew)
|
jpayne@69
|
140 {}
|
jpayne@69
|
141
|
jpayne@69
|
142 MinHashHeap * minHashHeap;
|
jpayne@69
|
143 };
|
jpayne@69
|
144
|
jpayne@69
|
145 CommandScreen();
|
jpayne@69
|
146
|
jpayne@69
|
147 int run() const; // override
|
jpayne@69
|
148
|
jpayne@69
|
149 private:
|
jpayne@69
|
150
|
jpayne@69
|
151 struct Reference
|
jpayne@69
|
152 {
|
jpayne@69
|
153 Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew)
|
jpayne@69
|
154 : amerCount(amerCountNew), name(nameNew), comment(commentNew) {}
|
jpayne@69
|
155
|
jpayne@69
|
156 uint64_t amerCount;
|
jpayne@69
|
157 std::string name;
|
jpayne@69
|
158 std::string comment;
|
jpayne@69
|
159 };
|
jpayne@69
|
160 };
|
jpayne@69
|
161
|
jpayne@69
|
162 char aaFromCodon(const char * codon);
|
jpayne@69
|
163 double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double kmerSpace);
|
jpayne@69
|
164 CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
|
jpayne@69
|
165 double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
|
jpayne@69
|
166 void translate(const char * src, char * dst, uint64_t len);
|
jpayne@69
|
167 void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);
|
jpayne@69
|
168
|
jpayne@69
|
169 } // namespace mash
|
jpayne@69
|
170
|
jpayne@69
|
171 #endif
|