comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.hh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 ////////////////////////////////////////////////////////////////////////////////
2 //! \file
3 //! \author Adam M Phillippy
4 //! \date 03/26/2003
5 //!
6 //! \brief Class declarations for the handling of delta alignment files
7 //!
8 //! \see delta.cc
9 ////////////////////////////////////////////////////////////////////////////////
10
11 #ifndef __DELTA_HH
12 #define __DELTA_HH
13
14 #include "tigrinc.hh"
15 #include <cassert>
16 #include <string>
17 #include <vector>
18 #include <fstream>
19 #include <cstdlib>
20 #include <iostream>
21 #include <map>
22
23
24 const std::string NUCMER_STRING = "NUCMER"; //!< nucmer id string
25 const std::string PROMER_STRING = "PROMER"; //!< promer id string
26
27 typedef char AlignmentType_t; //!< type of alignment data
28 const AlignmentType_t NULL_DATA = 0; //!< unknown alignment data type
29 const AlignmentType_t NUCMER_DATA = 'N'; //!< nucmer alignment data
30 const AlignmentType_t PROMER_DATA = 'P'; //!< promer alignment data
31
32 typedef unsigned char Dir_t; //!< directional type
33 const Dir_t FORWARD_DIR = 0; //!< forward direction
34 const Dir_t REVERSE_DIR = 1; //!< reverse direction
35
36
37
38 //===================================================== DeltaAlignment_t =======
39 struct DeltaAlignment_t
40 //!< A single delta encoded alignment region
41 {
42 long sR; //!< start coordinate in the reference
43 long eR; //!< end coordinate in the reference
44 long sQ; //!< start coordinate in the reference
45 long eQ; //!< end coordinate in the reference
46 long idyc; //!< number of mismatches in the alignment
47 long simc; //!< number of similarity scores < 1 in the alignment
48 long stpc; //!< number of stop codons in the alignment
49
50 float idy; //!< percent identity [0 - 100]
51 float sim; //!< percent similarity [0 - 100]
52 float stp; //!< percent stop codon [0 - 100]
53
54 std::vector<long> deltas; //!< delta encoded alignment informaiton
55
56 DeltaAlignment_t ( )
57 {
58 clear ( );
59 }
60
61 void clear ( )
62 {
63 sR = eR = sQ = eQ = 0;
64 idy = sim = stp = 0;
65 deltas.clear ( );
66 }
67 };
68
69
70
71 //===================================================== DeltaRecord_t ==========
72 struct DeltaRecord_t
73 //!< A delta record representing the alignments between two sequences
74 {
75 std::string idR; //!< reference contig ID
76 std::string idQ; //!< query contig ID
77 long lenR; //!< length of the reference contig
78 long lenQ; //!< length of the query contig
79
80 std::vector<DeltaAlignment_t> aligns; //!< alignments between the two seqs
81
82 DeltaRecord_t ( )
83 {
84 clear ( );
85 }
86
87 void clear ( )
88 {
89 idR.erase ( );
90 idQ.erase ( );
91 lenR = lenQ = 0;
92 aligns.clear ( );
93 }
94 };
95
96
97
98 //===================================================== DeltaReader_t ==========
99 //! \brief Delta encoded file reader class
100 //!
101 //! Handles the input of delta encoded alignment information for various MUMmer
102 //! utilities. Very basic functionality, can be expanded as necessary...
103 //!
104 //! \see DeltaRecord_t
105 //==============================================================================
106 class DeltaReader_t {
107
108 private:
109
110 std::string delta_path_m; //!< the name of the delta input file
111 std::ifstream delta_stream_m; //!< the delta file input stream
112 std::string data_type_m; //!< the type of alignment data
113 std::string reference_path_m; //!< the name of the reference file
114 std::string query_path_m; //!< the name of the query file
115 DeltaRecord_t record_m; //!< the current delta information record
116 bool is_record_m; //!< there is a valid record in record_m
117 bool is_open_m; //!< delta stream is open
118
119
120 //--------------------------------------------------- readNextAlignment ------
121 //! \brief Reads in a delta encoded alignment
122 //!
123 //! \param align read info into this structure
124 //! \param read_deltas read delta information yes/no
125 //! \pre delta_stream is positioned at the beginning of an alignment header
126 //! \return void
127 //!
128 void readNextAlignment (DeltaAlignment_t & align, const bool read_deltas);
129
130
131 //--------------------------------------------------- readNextRecord ---------
132 //! \brief Reads in the next delta record from the delta file
133 //!
134 //! \param read_deltas read delta information yes/no
135 //! \pre delta file must be open
136 //! \return true on success, false on EOF
137 //!
138 bool readNextRecord (const bool read_deltas);
139
140
141 //--------------------------------------------------- checkStream ------------
142 //! \brief Check stream status and abort program if an error has occured
143 //!
144 //! \return void
145 //!
146 void checkStream ( )
147 {
148 if ( !delta_stream_m.good ( ) )
149 {
150 std::cerr << "ERROR: Could not parse delta file, "
151 << delta_path_m << std::endl;
152
153
154 std::cerr << "error no: "
155 << int(delta_stream_m.rdstate() & std::ifstream::failbit)
156 << int(delta_stream_m.rdstate() & std::ifstream::badbit)
157 << int(delta_stream_m.rdstate() & std::ifstream::eofbit)
158 << std::endl;
159 exit (-1);
160 }
161 }
162
163
164 public:
165 //--------------------------------------------------- DeltaReader_t ----------
166 //! \brief Default constructor
167 //!
168 //! \return void
169 //!
170 DeltaReader_t ( )
171 {
172 is_record_m = false;
173 is_open_m = false;
174 }
175
176
177 //--------------------------------------------------- ~DeltaReader_t ---------
178 //! \brief Default destructor
179 //!
180 //! \return void
181 //!
182 ~DeltaReader_t ( )
183 {
184 close ( );
185 }
186
187
188 //--------------------------------------------------- open -------------------
189 //! \brief Opens a delta file by path
190 //!
191 //! \param delta file to open
192 //! \return void
193 //!
194 void open (const std::string & delta_path);
195
196
197 //--------------------------------------------------- close ------------------
198 //! \brief Closes any open delta file and resets class members
199 //!
200 //! \return void
201 //!
202 void close ( )
203 {
204 delta_path_m.erase ( );
205 delta_stream_m.close ( );
206 data_type_m.erase ( );
207 reference_path_m.erase ( );
208 query_path_m.erase ( );
209 record_m.clear ( );
210 is_record_m = false;
211 is_open_m = false;
212 }
213
214
215 //--------------------------------------------------- readNext --------------
216 //! \brief Reads in the next delta record from the delta file
217 //!
218 //! \param read_deltas read delta information yes/no
219 //! \pre delta file must be open
220 //! \return true on success, false on EOF
221 //!
222 inline bool readNext (bool getdeltas = true)
223 {
224 return readNextRecord (getdeltas);
225 }
226
227
228
229 //--------------------------------------------------- readNextHeadersOnly ----
230 //! \brief Reads in the next delta record from the delta file
231 //!
232 //! Only reads the alignment header information, does not read in the delta
233 //! information and leaves each alignment structure's delta vector empty.
234 //!
235 //! \pre delta file must be open
236 //! \return true on success, false on EOF
237 //!
238 inline bool readNextHeadersOnly ( )
239 {
240 return readNextRecord (false);
241 }
242
243
244 //--------------------------------------------------- getRecord --------------
245 //! \brief Returns a reference to the current delta record
246 //!
247 //! \pre readNext( ) was successfully called in advance
248 //! \return true on success, false on failure or end of file
249 //!
250 const DeltaRecord_t & getRecord ( ) const
251 {
252 assert (is_record_m);
253 return record_m;
254 }
255
256
257 //--------------------------------------------------- getDeltaPath -----------
258 //! \brief Get the path of the current delta file
259 //!
260 //! \pre delta file is open
261 //! \return the path of the delta file
262 //!
263 const std::string & getDeltaPath ( ) const
264 {
265 assert (is_open_m);
266 return delta_path_m;
267 }
268
269
270 //--------------------------------------------------- getDataType ------------
271 //! \brief Get the data type of the current delta file
272 //!
273 //! \pre delta file is open
274 //! \return the data type of the current file
275 //!
276 const std::string & getDataType ( ) const
277 {
278 assert (is_open_m);
279 return data_type_m;
280 }
281
282
283 //--------------------------------------------------- getReferencePath -------
284 //! \brief Get the path of the MUMmer reference file
285 //!
286 //! \pre delta file is open
287 //! \return the path of the MUMmer reference file
288 //!
289 const std::string & getReferencePath ( ) const
290 {
291 assert (is_open_m);
292 return reference_path_m;
293 }
294
295
296 //--------------------------------------------------- getQueryPath -----------
297 //! \brief Get the path of the MUMmer query file
298 //!
299 //! \pre delta file is open
300 //! \return the path of the MUMmer query file
301 //!
302 const std::string & getQueryPath ( ) const
303 {
304 assert (is_open_m);
305 return query_path_m;
306 }
307 };
308
309
310
311 //===================================================== SNP_t ==================
312 struct DeltaEdgelet_t;
313 struct DeltaEdge_t;
314 struct DeltaNode_t;
315 struct SNP_t
316 //!< A single nuc/aa poly
317 {
318 long buff;
319 char cQ, cR;
320 long pQ, pR;
321 int conQ, conR;
322 std::string ctxQ, ctxR;
323 DeltaEdgelet_t * lp;
324 DeltaEdge_t * ep;
325
326 SNP_t ( )
327 {
328 cQ = cR = 0;
329 buff = pQ = pR = 0;
330 conQ = conR = 0;
331 };
332 };
333
334
335
336 //===================================================== DeltaEdgelet_t =========
337 struct DeltaEdgelet_t
338 //!< A piece of a delta graph edge, a single alignment
339 {
340 unsigned char isGOOD : 1; //!< meets the requirements
341 unsigned char isQLIS : 1; //!< is part of the query's LIS
342 unsigned char isRLIS : 1; //!< is part of the reference's LIS
343 unsigned char isGLIS : 1; //!< is part of the reference/query LIS
344 unsigned char dirR : 1; //!< reference match direction
345 unsigned char dirQ : 1; //!< query match direction
346
347 DeltaEdge_t * edge;
348 float idy, sim, stp; //!< percent identity [0 - 1]
349 long idyc, simc, stpc; //!< idy, sim, stp counts
350 long loQ, hiQ, loR, hiR; //!< alignment bounds
351 int frmQ, frmR; //!< reading frame
352
353 std::string delta; //!< delta information
354 std::vector<SNP_t *> snps; //!< snps for this edgelet
355
356 DeltaEdgelet_t ( )
357 {
358 edge = NULL;
359 isGOOD = true;
360 isQLIS = isRLIS = isGLIS = false;
361 dirR = dirQ = FORWARD_DIR;
362 idy = sim = stp = 0;
363 idyc = simc = stpc = 0;
364 loQ = hiQ = loR = hiR = 0;
365 frmQ = frmR = 1;
366 }
367
368 ~DeltaEdgelet_t ( )
369 {
370 std::vector<SNP_t *>::iterator i;
371 for ( i = snps . begin( ); i != snps . end( ); ++ i )
372 delete (*i);
373 }
374
375 bool isNegative() const
376 { return ( dirR != dirQ ); }
377
378 bool isPositive() const
379 { return ( dirR == dirQ ); }
380
381 int slope() const
382 { return ( dirR == dirQ ? +1 : -1 ); }
383
384 long loR2Q() const
385 { return ( isPositive() ? loQ : hiQ ); }
386
387 long hiR2Q() const
388 { return ( isPositive() ? hiQ : loQ ); }
389
390 long loQ2R() const
391 { return ( isPositive() ? loR : hiR ); }
392
393 long hiQ2R() const
394 { return ( isPositive() ? hiR : loR ); }
395 };
396
397
398
399 //===================================================== DeltaEdge_t ============
400 struct DeltaEdge_t
401 //!< A delta graph edge, alignments between a single reference and query
402 {
403 DeltaNode_t * refnode; //!< the adjacent reference node
404 DeltaNode_t * qrynode; //!< the adjacent query node
405 std::vector<DeltaEdgelet_t *> edgelets; //!< the set of individual alignments
406
407 DeltaEdge_t ( )
408 { refnode = qrynode = NULL; }
409
410 ~DeltaEdge_t ( )
411 {
412 std::vector<DeltaEdgelet_t *>::iterator i;
413 for ( i = edgelets . begin( ); i != edgelets . end( ); ++ i )
414 delete (*i);
415 }
416
417 void build (const DeltaRecord_t & rec);
418 };
419
420
421 //===================================================== DeltaNode_t ============
422 struct DeltaNode_t
423 //!< A delta graph node, contains the sequence information
424 {
425 const std::string * id; //!< the id of the sequence
426 char * seq; //!< the DNA sequence
427 long len; //!< the length of the sequence
428 std::vector<DeltaEdge_t *> edges; //!< the set of related edges
429
430 DeltaNode_t ( )
431 { id = NULL; seq = NULL; len = 0; }
432
433 ~DeltaNode_t ( )
434 { free (seq); } // DeltaGraph_t will take care of destructing the edges
435 };
436
437
438
439 //===================================================== DeltaGraph_t ===========
440 //! \brief A graph of sequences (nodes) and their alignments (edges)
441 //!
442 //! A bipartite graph with two partite sets, R and Q, where R is the set of
443 //! reference sequences and Q is the set of query sequences. These nodes are
444 //! named "DeltaNode_t". We connect a node in R to a node in Q if an alignment
445 //! is present between the two sequences. The group of all alignments between
446 //! the two is named "DeltaEdge_t" and a single alignment between the two is
447 //! named a "DeltaEdgelet_t". Alignment coordinates reference the forward
448 //! strand and are stored lo before hi.
449 //!
450 //==============================================================================
451 class DeltaGraph_t
452 {
453 public:
454
455 std::map<std::string, DeltaNode_t> refnodes;
456 //!< the reference graph nodes, 1 per aligned sequence
457
458 std::map<std::string, DeltaNode_t> qrynodes;
459 //!< the query graph nodes, 1 per aligned sequence
460
461 std::string refpath; //!< path of the reference FastA file
462 std::string qrypath; //!< path of the query FastA file
463 AlignmentType_t datatype; //!< alignment data type
464
465 DeltaGraph_t()
466 { datatype = NULL_DATA; }
467
468 ~DeltaGraph_t ( )
469 { clear(); }
470
471 void build(const std::string & deltapath, bool getdeltas = true);
472 void clean();
473 void clear();
474 long getNodeCount();
475 long getEdgeCount();
476 long getEdgeletCount();
477
478 void flagGOOD();
479 void flag1to1(float epsilon = -1, float maxolap = 100.0);
480 void flagMtoM(float epsilon = -1, float maxolap = 100.0);
481 void flagGLIS(float epsilon = -1);
482 void flagQLIS(float epsilon = -1,
483 float maxolap = 100.0,
484 bool flagBad = true);
485 void flagRLIS(float epsilon = -1,
486 float maxolap = 100.0,
487 bool flagbad = true);
488 void flagScore(long minlen, float minidy);
489 void flagUNIQ(float minuniq);
490
491 void loadSequences();
492 std::ostream & outputDelta(std::ostream & out);
493 };
494
495 #endif // #ifndef __DELTA_HH