annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.hh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 ////////////////////////////////////////////////////////////////////////////////
jpayne@69 2 //! \file
jpayne@69 3 //! \author Adam M Phillippy
jpayne@69 4 //! \date 03/26/2003
jpayne@69 5 //!
jpayne@69 6 //! \brief Class declarations for the handling of delta alignment files
jpayne@69 7 //!
jpayne@69 8 //! \see delta.cc
jpayne@69 9 ////////////////////////////////////////////////////////////////////////////////
jpayne@69 10
jpayne@69 11 #ifndef __DELTA_HH
jpayne@69 12 #define __DELTA_HH
jpayne@69 13
jpayne@69 14 #include "tigrinc.hh"
jpayne@69 15 #include <cassert>
jpayne@69 16 #include <string>
jpayne@69 17 #include <vector>
jpayne@69 18 #include <fstream>
jpayne@69 19 #include <cstdlib>
jpayne@69 20 #include <iostream>
jpayne@69 21 #include <map>
jpayne@69 22
jpayne@69 23
jpayne@69 24 const std::string NUCMER_STRING = "NUCMER"; //!< nucmer id string
jpayne@69 25 const std::string PROMER_STRING = "PROMER"; //!< promer id string
jpayne@69 26
jpayne@69 27 typedef char AlignmentType_t; //!< type of alignment data
jpayne@69 28 const AlignmentType_t NULL_DATA = 0; //!< unknown alignment data type
jpayne@69 29 const AlignmentType_t NUCMER_DATA = 'N'; //!< nucmer alignment data
jpayne@69 30 const AlignmentType_t PROMER_DATA = 'P'; //!< promer alignment data
jpayne@69 31
jpayne@69 32 typedef unsigned char Dir_t; //!< directional type
jpayne@69 33 const Dir_t FORWARD_DIR = 0; //!< forward direction
jpayne@69 34 const Dir_t REVERSE_DIR = 1; //!< reverse direction
jpayne@69 35
jpayne@69 36
jpayne@69 37
jpayne@69 38 //===================================================== DeltaAlignment_t =======
jpayne@69 39 struct DeltaAlignment_t
jpayne@69 40 //!< A single delta encoded alignment region
jpayne@69 41 {
jpayne@69 42 long sR; //!< start coordinate in the reference
jpayne@69 43 long eR; //!< end coordinate in the reference
jpayne@69 44 long sQ; //!< start coordinate in the reference
jpayne@69 45 long eQ; //!< end coordinate in the reference
jpayne@69 46 long idyc; //!< number of mismatches in the alignment
jpayne@69 47 long simc; //!< number of similarity scores < 1 in the alignment
jpayne@69 48 long stpc; //!< number of stop codons in the alignment
jpayne@69 49
jpayne@69 50 float idy; //!< percent identity [0 - 100]
jpayne@69 51 float sim; //!< percent similarity [0 - 100]
jpayne@69 52 float stp; //!< percent stop codon [0 - 100]
jpayne@69 53
jpayne@69 54 std::vector<long> deltas; //!< delta encoded alignment informaiton
jpayne@69 55
jpayne@69 56 DeltaAlignment_t ( )
jpayne@69 57 {
jpayne@69 58 clear ( );
jpayne@69 59 }
jpayne@69 60
jpayne@69 61 void clear ( )
jpayne@69 62 {
jpayne@69 63 sR = eR = sQ = eQ = 0;
jpayne@69 64 idy = sim = stp = 0;
jpayne@69 65 deltas.clear ( );
jpayne@69 66 }
jpayne@69 67 };
jpayne@69 68
jpayne@69 69
jpayne@69 70
jpayne@69 71 //===================================================== DeltaRecord_t ==========
jpayne@69 72 struct DeltaRecord_t
jpayne@69 73 //!< A delta record representing the alignments between two sequences
jpayne@69 74 {
jpayne@69 75 std::string idR; //!< reference contig ID
jpayne@69 76 std::string idQ; //!< query contig ID
jpayne@69 77 long lenR; //!< length of the reference contig
jpayne@69 78 long lenQ; //!< length of the query contig
jpayne@69 79
jpayne@69 80 std::vector<DeltaAlignment_t> aligns; //!< alignments between the two seqs
jpayne@69 81
jpayne@69 82 DeltaRecord_t ( )
jpayne@69 83 {
jpayne@69 84 clear ( );
jpayne@69 85 }
jpayne@69 86
jpayne@69 87 void clear ( )
jpayne@69 88 {
jpayne@69 89 idR.erase ( );
jpayne@69 90 idQ.erase ( );
jpayne@69 91 lenR = lenQ = 0;
jpayne@69 92 aligns.clear ( );
jpayne@69 93 }
jpayne@69 94 };
jpayne@69 95
jpayne@69 96
jpayne@69 97
jpayne@69 98 //===================================================== DeltaReader_t ==========
jpayne@69 99 //! \brief Delta encoded file reader class
jpayne@69 100 //!
jpayne@69 101 //! Handles the input of delta encoded alignment information for various MUMmer
jpayne@69 102 //! utilities. Very basic functionality, can be expanded as necessary...
jpayne@69 103 //!
jpayne@69 104 //! \see DeltaRecord_t
jpayne@69 105 //==============================================================================
jpayne@69 106 class DeltaReader_t {
jpayne@69 107
jpayne@69 108 private:
jpayne@69 109
jpayne@69 110 std::string delta_path_m; //!< the name of the delta input file
jpayne@69 111 std::ifstream delta_stream_m; //!< the delta file input stream
jpayne@69 112 std::string data_type_m; //!< the type of alignment data
jpayne@69 113 std::string reference_path_m; //!< the name of the reference file
jpayne@69 114 std::string query_path_m; //!< the name of the query file
jpayne@69 115 DeltaRecord_t record_m; //!< the current delta information record
jpayne@69 116 bool is_record_m; //!< there is a valid record in record_m
jpayne@69 117 bool is_open_m; //!< delta stream is open
jpayne@69 118
jpayne@69 119
jpayne@69 120 //--------------------------------------------------- readNextAlignment ------
jpayne@69 121 //! \brief Reads in a delta encoded alignment
jpayne@69 122 //!
jpayne@69 123 //! \param align read info into this structure
jpayne@69 124 //! \param read_deltas read delta information yes/no
jpayne@69 125 //! \pre delta_stream is positioned at the beginning of an alignment header
jpayne@69 126 //! \return void
jpayne@69 127 //!
jpayne@69 128 void readNextAlignment (DeltaAlignment_t & align, const bool read_deltas);
jpayne@69 129
jpayne@69 130
jpayne@69 131 //--------------------------------------------------- readNextRecord ---------
jpayne@69 132 //! \brief Reads in the next delta record from the delta file
jpayne@69 133 //!
jpayne@69 134 //! \param read_deltas read delta information yes/no
jpayne@69 135 //! \pre delta file must be open
jpayne@69 136 //! \return true on success, false on EOF
jpayne@69 137 //!
jpayne@69 138 bool readNextRecord (const bool read_deltas);
jpayne@69 139
jpayne@69 140
jpayne@69 141 //--------------------------------------------------- checkStream ------------
jpayne@69 142 //! \brief Check stream status and abort program if an error has occured
jpayne@69 143 //!
jpayne@69 144 //! \return void
jpayne@69 145 //!
jpayne@69 146 void checkStream ( )
jpayne@69 147 {
jpayne@69 148 if ( !delta_stream_m.good ( ) )
jpayne@69 149 {
jpayne@69 150 std::cerr << "ERROR: Could not parse delta file, "
jpayne@69 151 << delta_path_m << std::endl;
jpayne@69 152
jpayne@69 153
jpayne@69 154 std::cerr << "error no: "
jpayne@69 155 << int(delta_stream_m.rdstate() & std::ifstream::failbit)
jpayne@69 156 << int(delta_stream_m.rdstate() & std::ifstream::badbit)
jpayne@69 157 << int(delta_stream_m.rdstate() & std::ifstream::eofbit)
jpayne@69 158 << std::endl;
jpayne@69 159 exit (-1);
jpayne@69 160 }
jpayne@69 161 }
jpayne@69 162
jpayne@69 163
jpayne@69 164 public:
jpayne@69 165 //--------------------------------------------------- DeltaReader_t ----------
jpayne@69 166 //! \brief Default constructor
jpayne@69 167 //!
jpayne@69 168 //! \return void
jpayne@69 169 //!
jpayne@69 170 DeltaReader_t ( )
jpayne@69 171 {
jpayne@69 172 is_record_m = false;
jpayne@69 173 is_open_m = false;
jpayne@69 174 }
jpayne@69 175
jpayne@69 176
jpayne@69 177 //--------------------------------------------------- ~DeltaReader_t ---------
jpayne@69 178 //! \brief Default destructor
jpayne@69 179 //!
jpayne@69 180 //! \return void
jpayne@69 181 //!
jpayne@69 182 ~DeltaReader_t ( )
jpayne@69 183 {
jpayne@69 184 close ( );
jpayne@69 185 }
jpayne@69 186
jpayne@69 187
jpayne@69 188 //--------------------------------------------------- open -------------------
jpayne@69 189 //! \brief Opens a delta file by path
jpayne@69 190 //!
jpayne@69 191 //! \param delta file to open
jpayne@69 192 //! \return void
jpayne@69 193 //!
jpayne@69 194 void open (const std::string & delta_path);
jpayne@69 195
jpayne@69 196
jpayne@69 197 //--------------------------------------------------- close ------------------
jpayne@69 198 //! \brief Closes any open delta file and resets class members
jpayne@69 199 //!
jpayne@69 200 //! \return void
jpayne@69 201 //!
jpayne@69 202 void close ( )
jpayne@69 203 {
jpayne@69 204 delta_path_m.erase ( );
jpayne@69 205 delta_stream_m.close ( );
jpayne@69 206 data_type_m.erase ( );
jpayne@69 207 reference_path_m.erase ( );
jpayne@69 208 query_path_m.erase ( );
jpayne@69 209 record_m.clear ( );
jpayne@69 210 is_record_m = false;
jpayne@69 211 is_open_m = false;
jpayne@69 212 }
jpayne@69 213
jpayne@69 214
jpayne@69 215 //--------------------------------------------------- readNext --------------
jpayne@69 216 //! \brief Reads in the next delta record from the delta file
jpayne@69 217 //!
jpayne@69 218 //! \param read_deltas read delta information yes/no
jpayne@69 219 //! \pre delta file must be open
jpayne@69 220 //! \return true on success, false on EOF
jpayne@69 221 //!
jpayne@69 222 inline bool readNext (bool getdeltas = true)
jpayne@69 223 {
jpayne@69 224 return readNextRecord (getdeltas);
jpayne@69 225 }
jpayne@69 226
jpayne@69 227
jpayne@69 228
jpayne@69 229 //--------------------------------------------------- readNextHeadersOnly ----
jpayne@69 230 //! \brief Reads in the next delta record from the delta file
jpayne@69 231 //!
jpayne@69 232 //! Only reads the alignment header information, does not read in the delta
jpayne@69 233 //! information and leaves each alignment structure's delta vector empty.
jpayne@69 234 //!
jpayne@69 235 //! \pre delta file must be open
jpayne@69 236 //! \return true on success, false on EOF
jpayne@69 237 //!
jpayne@69 238 inline bool readNextHeadersOnly ( )
jpayne@69 239 {
jpayne@69 240 return readNextRecord (false);
jpayne@69 241 }
jpayne@69 242
jpayne@69 243
jpayne@69 244 //--------------------------------------------------- getRecord --------------
jpayne@69 245 //! \brief Returns a reference to the current delta record
jpayne@69 246 //!
jpayne@69 247 //! \pre readNext( ) was successfully called in advance
jpayne@69 248 //! \return true on success, false on failure or end of file
jpayne@69 249 //!
jpayne@69 250 const DeltaRecord_t & getRecord ( ) const
jpayne@69 251 {
jpayne@69 252 assert (is_record_m);
jpayne@69 253 return record_m;
jpayne@69 254 }
jpayne@69 255
jpayne@69 256
jpayne@69 257 //--------------------------------------------------- getDeltaPath -----------
jpayne@69 258 //! \brief Get the path of the current delta file
jpayne@69 259 //!
jpayne@69 260 //! \pre delta file is open
jpayne@69 261 //! \return the path of the delta file
jpayne@69 262 //!
jpayne@69 263 const std::string & getDeltaPath ( ) const
jpayne@69 264 {
jpayne@69 265 assert (is_open_m);
jpayne@69 266 return delta_path_m;
jpayne@69 267 }
jpayne@69 268
jpayne@69 269
jpayne@69 270 //--------------------------------------------------- getDataType ------------
jpayne@69 271 //! \brief Get the data type of the current delta file
jpayne@69 272 //!
jpayne@69 273 //! \pre delta file is open
jpayne@69 274 //! \return the data type of the current file
jpayne@69 275 //!
jpayne@69 276 const std::string & getDataType ( ) const
jpayne@69 277 {
jpayne@69 278 assert (is_open_m);
jpayne@69 279 return data_type_m;
jpayne@69 280 }
jpayne@69 281
jpayne@69 282
jpayne@69 283 //--------------------------------------------------- getReferencePath -------
jpayne@69 284 //! \brief Get the path of the MUMmer reference file
jpayne@69 285 //!
jpayne@69 286 //! \pre delta file is open
jpayne@69 287 //! \return the path of the MUMmer reference file
jpayne@69 288 //!
jpayne@69 289 const std::string & getReferencePath ( ) const
jpayne@69 290 {
jpayne@69 291 assert (is_open_m);
jpayne@69 292 return reference_path_m;
jpayne@69 293 }
jpayne@69 294
jpayne@69 295
jpayne@69 296 //--------------------------------------------------- getQueryPath -----------
jpayne@69 297 //! \brief Get the path of the MUMmer query file
jpayne@69 298 //!
jpayne@69 299 //! \pre delta file is open
jpayne@69 300 //! \return the path of the MUMmer query file
jpayne@69 301 //!
jpayne@69 302 const std::string & getQueryPath ( ) const
jpayne@69 303 {
jpayne@69 304 assert (is_open_m);
jpayne@69 305 return query_path_m;
jpayne@69 306 }
jpayne@69 307 };
jpayne@69 308
jpayne@69 309
jpayne@69 310
jpayne@69 311 //===================================================== SNP_t ==================
jpayne@69 312 struct DeltaEdgelet_t;
jpayne@69 313 struct DeltaEdge_t;
jpayne@69 314 struct DeltaNode_t;
jpayne@69 315 struct SNP_t
jpayne@69 316 //!< A single nuc/aa poly
jpayne@69 317 {
jpayne@69 318 long buff;
jpayne@69 319 char cQ, cR;
jpayne@69 320 long pQ, pR;
jpayne@69 321 int conQ, conR;
jpayne@69 322 std::string ctxQ, ctxR;
jpayne@69 323 DeltaEdgelet_t * lp;
jpayne@69 324 DeltaEdge_t * ep;
jpayne@69 325
jpayne@69 326 SNP_t ( )
jpayne@69 327 {
jpayne@69 328 cQ = cR = 0;
jpayne@69 329 buff = pQ = pR = 0;
jpayne@69 330 conQ = conR = 0;
jpayne@69 331 };
jpayne@69 332 };
jpayne@69 333
jpayne@69 334
jpayne@69 335
jpayne@69 336 //===================================================== DeltaEdgelet_t =========
jpayne@69 337 struct DeltaEdgelet_t
jpayne@69 338 //!< A piece of a delta graph edge, a single alignment
jpayne@69 339 {
jpayne@69 340 unsigned char isGOOD : 1; //!< meets the requirements
jpayne@69 341 unsigned char isQLIS : 1; //!< is part of the query's LIS
jpayne@69 342 unsigned char isRLIS : 1; //!< is part of the reference's LIS
jpayne@69 343 unsigned char isGLIS : 1; //!< is part of the reference/query LIS
jpayne@69 344 unsigned char dirR : 1; //!< reference match direction
jpayne@69 345 unsigned char dirQ : 1; //!< query match direction
jpayne@69 346
jpayne@69 347 DeltaEdge_t * edge;
jpayne@69 348 float idy, sim, stp; //!< percent identity [0 - 1]
jpayne@69 349 long idyc, simc, stpc; //!< idy, sim, stp counts
jpayne@69 350 long loQ, hiQ, loR, hiR; //!< alignment bounds
jpayne@69 351 int frmQ, frmR; //!< reading frame
jpayne@69 352
jpayne@69 353 std::string delta; //!< delta information
jpayne@69 354 std::vector<SNP_t *> snps; //!< snps for this edgelet
jpayne@69 355
jpayne@69 356 DeltaEdgelet_t ( )
jpayne@69 357 {
jpayne@69 358 edge = NULL;
jpayne@69 359 isGOOD = true;
jpayne@69 360 isQLIS = isRLIS = isGLIS = false;
jpayne@69 361 dirR = dirQ = FORWARD_DIR;
jpayne@69 362 idy = sim = stp = 0;
jpayne@69 363 idyc = simc = stpc = 0;
jpayne@69 364 loQ = hiQ = loR = hiR = 0;
jpayne@69 365 frmQ = frmR = 1;
jpayne@69 366 }
jpayne@69 367
jpayne@69 368 ~DeltaEdgelet_t ( )
jpayne@69 369 {
jpayne@69 370 std::vector<SNP_t *>::iterator i;
jpayne@69 371 for ( i = snps . begin( ); i != snps . end( ); ++ i )
jpayne@69 372 delete (*i);
jpayne@69 373 }
jpayne@69 374
jpayne@69 375 bool isNegative() const
jpayne@69 376 { return ( dirR != dirQ ); }
jpayne@69 377
jpayne@69 378 bool isPositive() const
jpayne@69 379 { return ( dirR == dirQ ); }
jpayne@69 380
jpayne@69 381 int slope() const
jpayne@69 382 { return ( dirR == dirQ ? +1 : -1 ); }
jpayne@69 383
jpayne@69 384 long loR2Q() const
jpayne@69 385 { return ( isPositive() ? loQ : hiQ ); }
jpayne@69 386
jpayne@69 387 long hiR2Q() const
jpayne@69 388 { return ( isPositive() ? hiQ : loQ ); }
jpayne@69 389
jpayne@69 390 long loQ2R() const
jpayne@69 391 { return ( isPositive() ? loR : hiR ); }
jpayne@69 392
jpayne@69 393 long hiQ2R() const
jpayne@69 394 { return ( isPositive() ? hiR : loR ); }
jpayne@69 395 };
jpayne@69 396
jpayne@69 397
jpayne@69 398
jpayne@69 399 //===================================================== DeltaEdge_t ============
jpayne@69 400 struct DeltaEdge_t
jpayne@69 401 //!< A delta graph edge, alignments between a single reference and query
jpayne@69 402 {
jpayne@69 403 DeltaNode_t * refnode; //!< the adjacent reference node
jpayne@69 404 DeltaNode_t * qrynode; //!< the adjacent query node
jpayne@69 405 std::vector<DeltaEdgelet_t *> edgelets; //!< the set of individual alignments
jpayne@69 406
jpayne@69 407 DeltaEdge_t ( )
jpayne@69 408 { refnode = qrynode = NULL; }
jpayne@69 409
jpayne@69 410 ~DeltaEdge_t ( )
jpayne@69 411 {
jpayne@69 412 std::vector<DeltaEdgelet_t *>::iterator i;
jpayne@69 413 for ( i = edgelets . begin( ); i != edgelets . end( ); ++ i )
jpayne@69 414 delete (*i);
jpayne@69 415 }
jpayne@69 416
jpayne@69 417 void build (const DeltaRecord_t & rec);
jpayne@69 418 };
jpayne@69 419
jpayne@69 420
jpayne@69 421 //===================================================== DeltaNode_t ============
jpayne@69 422 struct DeltaNode_t
jpayne@69 423 //!< A delta graph node, contains the sequence information
jpayne@69 424 {
jpayne@69 425 const std::string * id; //!< the id of the sequence
jpayne@69 426 char * seq; //!< the DNA sequence
jpayne@69 427 long len; //!< the length of the sequence
jpayne@69 428 std::vector<DeltaEdge_t *> edges; //!< the set of related edges
jpayne@69 429
jpayne@69 430 DeltaNode_t ( )
jpayne@69 431 { id = NULL; seq = NULL; len = 0; }
jpayne@69 432
jpayne@69 433 ~DeltaNode_t ( )
jpayne@69 434 { free (seq); } // DeltaGraph_t will take care of destructing the edges
jpayne@69 435 };
jpayne@69 436
jpayne@69 437
jpayne@69 438
jpayne@69 439 //===================================================== DeltaGraph_t ===========
jpayne@69 440 //! \brief A graph of sequences (nodes) and their alignments (edges)
jpayne@69 441 //!
jpayne@69 442 //! A bipartite graph with two partite sets, R and Q, where R is the set of
jpayne@69 443 //! reference sequences and Q is the set of query sequences. These nodes are
jpayne@69 444 //! named "DeltaNode_t". We connect a node in R to a node in Q if an alignment
jpayne@69 445 //! is present between the two sequences. The group of all alignments between
jpayne@69 446 //! the two is named "DeltaEdge_t" and a single alignment between the two is
jpayne@69 447 //! named a "DeltaEdgelet_t". Alignment coordinates reference the forward
jpayne@69 448 //! strand and are stored lo before hi.
jpayne@69 449 //!
jpayne@69 450 //==============================================================================
jpayne@69 451 class DeltaGraph_t
jpayne@69 452 {
jpayne@69 453 public:
jpayne@69 454
jpayne@69 455 std::map<std::string, DeltaNode_t> refnodes;
jpayne@69 456 //!< the reference graph nodes, 1 per aligned sequence
jpayne@69 457
jpayne@69 458 std::map<std::string, DeltaNode_t> qrynodes;
jpayne@69 459 //!< the query graph nodes, 1 per aligned sequence
jpayne@69 460
jpayne@69 461 std::string refpath; //!< path of the reference FastA file
jpayne@69 462 std::string qrypath; //!< path of the query FastA file
jpayne@69 463 AlignmentType_t datatype; //!< alignment data type
jpayne@69 464
jpayne@69 465 DeltaGraph_t()
jpayne@69 466 { datatype = NULL_DATA; }
jpayne@69 467
jpayne@69 468 ~DeltaGraph_t ( )
jpayne@69 469 { clear(); }
jpayne@69 470
jpayne@69 471 void build(const std::string & deltapath, bool getdeltas = true);
jpayne@69 472 void clean();
jpayne@69 473 void clear();
jpayne@69 474 long getNodeCount();
jpayne@69 475 long getEdgeCount();
jpayne@69 476 long getEdgeletCount();
jpayne@69 477
jpayne@69 478 void flagGOOD();
jpayne@69 479 void flag1to1(float epsilon = -1, float maxolap = 100.0);
jpayne@69 480 void flagMtoM(float epsilon = -1, float maxolap = 100.0);
jpayne@69 481 void flagGLIS(float epsilon = -1);
jpayne@69 482 void flagQLIS(float epsilon = -1,
jpayne@69 483 float maxolap = 100.0,
jpayne@69 484 bool flagBad = true);
jpayne@69 485 void flagRLIS(float epsilon = -1,
jpayne@69 486 float maxolap = 100.0,
jpayne@69 487 bool flagbad = true);
jpayne@69 488 void flagScore(long minlen, float minidy);
jpayne@69 489 void flagUNIQ(float minuniq);
jpayne@69 490
jpayne@69 491 void loadSequences();
jpayne@69 492 std::ostream & outputDelta(std::ostream & out);
jpayne@69 493 };
jpayne@69 494
jpayne@69 495 #endif // #ifndef __DELTA_HH