Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.hh @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 //////////////////////////////////////////////////////////////////////////////// | |
2 //! \file | |
3 //! \author Adam M Phillippy | |
4 //! \date 03/26/2003 | |
5 //! | |
6 //! \brief Class declarations for the handling of delta alignment files | |
7 //! | |
8 //! \see delta.cc | |
9 //////////////////////////////////////////////////////////////////////////////// | |
10 | |
11 #ifndef __DELTA_HH | |
12 #define __DELTA_HH | |
13 | |
14 #include "tigrinc.hh" | |
15 #include <cassert> | |
16 #include <string> | |
17 #include <vector> | |
18 #include <fstream> | |
19 #include <cstdlib> | |
20 #include <iostream> | |
21 #include <map> | |
22 | |
23 | |
24 const std::string NUCMER_STRING = "NUCMER"; //!< nucmer id string | |
25 const std::string PROMER_STRING = "PROMER"; //!< promer id string | |
26 | |
27 typedef char AlignmentType_t; //!< type of alignment data | |
28 const AlignmentType_t NULL_DATA = 0; //!< unknown alignment data type | |
29 const AlignmentType_t NUCMER_DATA = 'N'; //!< nucmer alignment data | |
30 const AlignmentType_t PROMER_DATA = 'P'; //!< promer alignment data | |
31 | |
32 typedef unsigned char Dir_t; //!< directional type | |
33 const Dir_t FORWARD_DIR = 0; //!< forward direction | |
34 const Dir_t REVERSE_DIR = 1; //!< reverse direction | |
35 | |
36 | |
37 | |
38 //===================================================== DeltaAlignment_t ======= | |
39 struct DeltaAlignment_t | |
40 //!< A single delta encoded alignment region | |
41 { | |
42 long sR; //!< start coordinate in the reference | |
43 long eR; //!< end coordinate in the reference | |
44 long sQ; //!< start coordinate in the reference | |
45 long eQ; //!< end coordinate in the reference | |
46 long idyc; //!< number of mismatches in the alignment | |
47 long simc; //!< number of similarity scores < 1 in the alignment | |
48 long stpc; //!< number of stop codons in the alignment | |
49 | |
50 float idy; //!< percent identity [0 - 100] | |
51 float sim; //!< percent similarity [0 - 100] | |
52 float stp; //!< percent stop codon [0 - 100] | |
53 | |
54 std::vector<long> deltas; //!< delta encoded alignment informaiton | |
55 | |
56 DeltaAlignment_t ( ) | |
57 { | |
58 clear ( ); | |
59 } | |
60 | |
61 void clear ( ) | |
62 { | |
63 sR = eR = sQ = eQ = 0; | |
64 idy = sim = stp = 0; | |
65 deltas.clear ( ); | |
66 } | |
67 }; | |
68 | |
69 | |
70 | |
71 //===================================================== DeltaRecord_t ========== | |
72 struct DeltaRecord_t | |
73 //!< A delta record representing the alignments between two sequences | |
74 { | |
75 std::string idR; //!< reference contig ID | |
76 std::string idQ; //!< query contig ID | |
77 long lenR; //!< length of the reference contig | |
78 long lenQ; //!< length of the query contig | |
79 | |
80 std::vector<DeltaAlignment_t> aligns; //!< alignments between the two seqs | |
81 | |
82 DeltaRecord_t ( ) | |
83 { | |
84 clear ( ); | |
85 } | |
86 | |
87 void clear ( ) | |
88 { | |
89 idR.erase ( ); | |
90 idQ.erase ( ); | |
91 lenR = lenQ = 0; | |
92 aligns.clear ( ); | |
93 } | |
94 }; | |
95 | |
96 | |
97 | |
98 //===================================================== DeltaReader_t ========== | |
99 //! \brief Delta encoded file reader class | |
100 //! | |
101 //! Handles the input of delta encoded alignment information for various MUMmer | |
102 //! utilities. Very basic functionality, can be expanded as necessary... | |
103 //! | |
104 //! \see DeltaRecord_t | |
105 //============================================================================== | |
106 class DeltaReader_t { | |
107 | |
108 private: | |
109 | |
110 std::string delta_path_m; //!< the name of the delta input file | |
111 std::ifstream delta_stream_m; //!< the delta file input stream | |
112 std::string data_type_m; //!< the type of alignment data | |
113 std::string reference_path_m; //!< the name of the reference file | |
114 std::string query_path_m; //!< the name of the query file | |
115 DeltaRecord_t record_m; //!< the current delta information record | |
116 bool is_record_m; //!< there is a valid record in record_m | |
117 bool is_open_m; //!< delta stream is open | |
118 | |
119 | |
120 //--------------------------------------------------- readNextAlignment ------ | |
121 //! \brief Reads in a delta encoded alignment | |
122 //! | |
123 //! \param align read info into this structure | |
124 //! \param read_deltas read delta information yes/no | |
125 //! \pre delta_stream is positioned at the beginning of an alignment header | |
126 //! \return void | |
127 //! | |
128 void readNextAlignment (DeltaAlignment_t & align, const bool read_deltas); | |
129 | |
130 | |
131 //--------------------------------------------------- readNextRecord --------- | |
132 //! \brief Reads in the next delta record from the delta file | |
133 //! | |
134 //! \param read_deltas read delta information yes/no | |
135 //! \pre delta file must be open | |
136 //! \return true on success, false on EOF | |
137 //! | |
138 bool readNextRecord (const bool read_deltas); | |
139 | |
140 | |
141 //--------------------------------------------------- checkStream ------------ | |
142 //! \brief Check stream status and abort program if an error has occured | |
143 //! | |
144 //! \return void | |
145 //! | |
146 void checkStream ( ) | |
147 { | |
148 if ( !delta_stream_m.good ( ) ) | |
149 { | |
150 std::cerr << "ERROR: Could not parse delta file, " | |
151 << delta_path_m << std::endl; | |
152 | |
153 | |
154 std::cerr << "error no: " | |
155 << int(delta_stream_m.rdstate() & std::ifstream::failbit) | |
156 << int(delta_stream_m.rdstate() & std::ifstream::badbit) | |
157 << int(delta_stream_m.rdstate() & std::ifstream::eofbit) | |
158 << std::endl; | |
159 exit (-1); | |
160 } | |
161 } | |
162 | |
163 | |
164 public: | |
165 //--------------------------------------------------- DeltaReader_t ---------- | |
166 //! \brief Default constructor | |
167 //! | |
168 //! \return void | |
169 //! | |
170 DeltaReader_t ( ) | |
171 { | |
172 is_record_m = false; | |
173 is_open_m = false; | |
174 } | |
175 | |
176 | |
177 //--------------------------------------------------- ~DeltaReader_t --------- | |
178 //! \brief Default destructor | |
179 //! | |
180 //! \return void | |
181 //! | |
182 ~DeltaReader_t ( ) | |
183 { | |
184 close ( ); | |
185 } | |
186 | |
187 | |
188 //--------------------------------------------------- open ------------------- | |
189 //! \brief Opens a delta file by path | |
190 //! | |
191 //! \param delta file to open | |
192 //! \return void | |
193 //! | |
194 void open (const std::string & delta_path); | |
195 | |
196 | |
197 //--------------------------------------------------- close ------------------ | |
198 //! \brief Closes any open delta file and resets class members | |
199 //! | |
200 //! \return void | |
201 //! | |
202 void close ( ) | |
203 { | |
204 delta_path_m.erase ( ); | |
205 delta_stream_m.close ( ); | |
206 data_type_m.erase ( ); | |
207 reference_path_m.erase ( ); | |
208 query_path_m.erase ( ); | |
209 record_m.clear ( ); | |
210 is_record_m = false; | |
211 is_open_m = false; | |
212 } | |
213 | |
214 | |
215 //--------------------------------------------------- readNext -------------- | |
216 //! \brief Reads in the next delta record from the delta file | |
217 //! | |
218 //! \param read_deltas read delta information yes/no | |
219 //! \pre delta file must be open | |
220 //! \return true on success, false on EOF | |
221 //! | |
222 inline bool readNext (bool getdeltas = true) | |
223 { | |
224 return readNextRecord (getdeltas); | |
225 } | |
226 | |
227 | |
228 | |
229 //--------------------------------------------------- readNextHeadersOnly ---- | |
230 //! \brief Reads in the next delta record from the delta file | |
231 //! | |
232 //! Only reads the alignment header information, does not read in the delta | |
233 //! information and leaves each alignment structure's delta vector empty. | |
234 //! | |
235 //! \pre delta file must be open | |
236 //! \return true on success, false on EOF | |
237 //! | |
238 inline bool readNextHeadersOnly ( ) | |
239 { | |
240 return readNextRecord (false); | |
241 } | |
242 | |
243 | |
244 //--------------------------------------------------- getRecord -------------- | |
245 //! \brief Returns a reference to the current delta record | |
246 //! | |
247 //! \pre readNext( ) was successfully called in advance | |
248 //! \return true on success, false on failure or end of file | |
249 //! | |
250 const DeltaRecord_t & getRecord ( ) const | |
251 { | |
252 assert (is_record_m); | |
253 return record_m; | |
254 } | |
255 | |
256 | |
257 //--------------------------------------------------- getDeltaPath ----------- | |
258 //! \brief Get the path of the current delta file | |
259 //! | |
260 //! \pre delta file is open | |
261 //! \return the path of the delta file | |
262 //! | |
263 const std::string & getDeltaPath ( ) const | |
264 { | |
265 assert (is_open_m); | |
266 return delta_path_m; | |
267 } | |
268 | |
269 | |
270 //--------------------------------------------------- getDataType ------------ | |
271 //! \brief Get the data type of the current delta file | |
272 //! | |
273 //! \pre delta file is open | |
274 //! \return the data type of the current file | |
275 //! | |
276 const std::string & getDataType ( ) const | |
277 { | |
278 assert (is_open_m); | |
279 return data_type_m; | |
280 } | |
281 | |
282 | |
283 //--------------------------------------------------- getReferencePath ------- | |
284 //! \brief Get the path of the MUMmer reference file | |
285 //! | |
286 //! \pre delta file is open | |
287 //! \return the path of the MUMmer reference file | |
288 //! | |
289 const std::string & getReferencePath ( ) const | |
290 { | |
291 assert (is_open_m); | |
292 return reference_path_m; | |
293 } | |
294 | |
295 | |
296 //--------------------------------------------------- getQueryPath ----------- | |
297 //! \brief Get the path of the MUMmer query file | |
298 //! | |
299 //! \pre delta file is open | |
300 //! \return the path of the MUMmer query file | |
301 //! | |
302 const std::string & getQueryPath ( ) const | |
303 { | |
304 assert (is_open_m); | |
305 return query_path_m; | |
306 } | |
307 }; | |
308 | |
309 | |
310 | |
311 //===================================================== SNP_t ================== | |
312 struct DeltaEdgelet_t; | |
313 struct DeltaEdge_t; | |
314 struct DeltaNode_t; | |
315 struct SNP_t | |
316 //!< A single nuc/aa poly | |
317 { | |
318 long buff; | |
319 char cQ, cR; | |
320 long pQ, pR; | |
321 int conQ, conR; | |
322 std::string ctxQ, ctxR; | |
323 DeltaEdgelet_t * lp; | |
324 DeltaEdge_t * ep; | |
325 | |
326 SNP_t ( ) | |
327 { | |
328 cQ = cR = 0; | |
329 buff = pQ = pR = 0; | |
330 conQ = conR = 0; | |
331 }; | |
332 }; | |
333 | |
334 | |
335 | |
336 //===================================================== DeltaEdgelet_t ========= | |
337 struct DeltaEdgelet_t | |
338 //!< A piece of a delta graph edge, a single alignment | |
339 { | |
340 unsigned char isGOOD : 1; //!< meets the requirements | |
341 unsigned char isQLIS : 1; //!< is part of the query's LIS | |
342 unsigned char isRLIS : 1; //!< is part of the reference's LIS | |
343 unsigned char isGLIS : 1; //!< is part of the reference/query LIS | |
344 unsigned char dirR : 1; //!< reference match direction | |
345 unsigned char dirQ : 1; //!< query match direction | |
346 | |
347 DeltaEdge_t * edge; | |
348 float idy, sim, stp; //!< percent identity [0 - 1] | |
349 long idyc, simc, stpc; //!< idy, sim, stp counts | |
350 long loQ, hiQ, loR, hiR; //!< alignment bounds | |
351 int frmQ, frmR; //!< reading frame | |
352 | |
353 std::string delta; //!< delta information | |
354 std::vector<SNP_t *> snps; //!< snps for this edgelet | |
355 | |
356 DeltaEdgelet_t ( ) | |
357 { | |
358 edge = NULL; | |
359 isGOOD = true; | |
360 isQLIS = isRLIS = isGLIS = false; | |
361 dirR = dirQ = FORWARD_DIR; | |
362 idy = sim = stp = 0; | |
363 idyc = simc = stpc = 0; | |
364 loQ = hiQ = loR = hiR = 0; | |
365 frmQ = frmR = 1; | |
366 } | |
367 | |
368 ~DeltaEdgelet_t ( ) | |
369 { | |
370 std::vector<SNP_t *>::iterator i; | |
371 for ( i = snps . begin( ); i != snps . end( ); ++ i ) | |
372 delete (*i); | |
373 } | |
374 | |
375 bool isNegative() const | |
376 { return ( dirR != dirQ ); } | |
377 | |
378 bool isPositive() const | |
379 { return ( dirR == dirQ ); } | |
380 | |
381 int slope() const | |
382 { return ( dirR == dirQ ? +1 : -1 ); } | |
383 | |
384 long loR2Q() const | |
385 { return ( isPositive() ? loQ : hiQ ); } | |
386 | |
387 long hiR2Q() const | |
388 { return ( isPositive() ? hiQ : loQ ); } | |
389 | |
390 long loQ2R() const | |
391 { return ( isPositive() ? loR : hiR ); } | |
392 | |
393 long hiQ2R() const | |
394 { return ( isPositive() ? hiR : loR ); } | |
395 }; | |
396 | |
397 | |
398 | |
399 //===================================================== DeltaEdge_t ============ | |
400 struct DeltaEdge_t | |
401 //!< A delta graph edge, alignments between a single reference and query | |
402 { | |
403 DeltaNode_t * refnode; //!< the adjacent reference node | |
404 DeltaNode_t * qrynode; //!< the adjacent query node | |
405 std::vector<DeltaEdgelet_t *> edgelets; //!< the set of individual alignments | |
406 | |
407 DeltaEdge_t ( ) | |
408 { refnode = qrynode = NULL; } | |
409 | |
410 ~DeltaEdge_t ( ) | |
411 { | |
412 std::vector<DeltaEdgelet_t *>::iterator i; | |
413 for ( i = edgelets . begin( ); i != edgelets . end( ); ++ i ) | |
414 delete (*i); | |
415 } | |
416 | |
417 void build (const DeltaRecord_t & rec); | |
418 }; | |
419 | |
420 | |
421 //===================================================== DeltaNode_t ============ | |
422 struct DeltaNode_t | |
423 //!< A delta graph node, contains the sequence information | |
424 { | |
425 const std::string * id; //!< the id of the sequence | |
426 char * seq; //!< the DNA sequence | |
427 long len; //!< the length of the sequence | |
428 std::vector<DeltaEdge_t *> edges; //!< the set of related edges | |
429 | |
430 DeltaNode_t ( ) | |
431 { id = NULL; seq = NULL; len = 0; } | |
432 | |
433 ~DeltaNode_t ( ) | |
434 { free (seq); } // DeltaGraph_t will take care of destructing the edges | |
435 }; | |
436 | |
437 | |
438 | |
439 //===================================================== DeltaGraph_t =========== | |
440 //! \brief A graph of sequences (nodes) and their alignments (edges) | |
441 //! | |
442 //! A bipartite graph with two partite sets, R and Q, where R is the set of | |
443 //! reference sequences and Q is the set of query sequences. These nodes are | |
444 //! named "DeltaNode_t". We connect a node in R to a node in Q if an alignment | |
445 //! is present between the two sequences. The group of all alignments between | |
446 //! the two is named "DeltaEdge_t" and a single alignment between the two is | |
447 //! named a "DeltaEdgelet_t". Alignment coordinates reference the forward | |
448 //! strand and are stored lo before hi. | |
449 //! | |
450 //============================================================================== | |
451 class DeltaGraph_t | |
452 { | |
453 public: | |
454 | |
455 std::map<std::string, DeltaNode_t> refnodes; | |
456 //!< the reference graph nodes, 1 per aligned sequence | |
457 | |
458 std::map<std::string, DeltaNode_t> qrynodes; | |
459 //!< the query graph nodes, 1 per aligned sequence | |
460 | |
461 std::string refpath; //!< path of the reference FastA file | |
462 std::string qrypath; //!< path of the query FastA file | |
463 AlignmentType_t datatype; //!< alignment data type | |
464 | |
465 DeltaGraph_t() | |
466 { datatype = NULL_DATA; } | |
467 | |
468 ~DeltaGraph_t ( ) | |
469 { clear(); } | |
470 | |
471 void build(const std::string & deltapath, bool getdeltas = true); | |
472 void clean(); | |
473 void clear(); | |
474 long getNodeCount(); | |
475 long getEdgeCount(); | |
476 long getEdgeletCount(); | |
477 | |
478 void flagGOOD(); | |
479 void flag1to1(float epsilon = -1, float maxolap = 100.0); | |
480 void flagMtoM(float epsilon = -1, float maxolap = 100.0); | |
481 void flagGLIS(float epsilon = -1); | |
482 void flagQLIS(float epsilon = -1, | |
483 float maxolap = 100.0, | |
484 bool flagBad = true); | |
485 void flagRLIS(float epsilon = -1, | |
486 float maxolap = 100.0, | |
487 bool flagbad = true); | |
488 void flagScore(long minlen, float minidy); | |
489 void flagUNIQ(float minuniq); | |
490 | |
491 void loadSequences(); | |
492 std::ostream & outputDelta(std::ostream & out); | |
493 }; | |
494 | |
495 #endif // #ifndef __DELTA_HH |