diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.hh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.hh	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,495 @@
+////////////////////////////////////////////////////////////////////////////////
+//! \file
+//! \author Adam M Phillippy
+//! \date 03/26/2003
+//!
+//! \brief Class declarations for the handling of delta alignment files
+//!
+//! \see delta.cc
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef __DELTA_HH
+#define __DELTA_HH
+
+#include "tigrinc.hh"
+#include <cassert>
+#include <string>
+#include <vector>
+#include <fstream>
+#include <cstdlib>
+#include <iostream>
+#include <map>
+
+
+const std::string NUCMER_STRING = "NUCMER"; //!< nucmer id string
+const std::string PROMER_STRING = "PROMER"; //!< promer id string
+
+typedef char AlignmentType_t;               //!< type of alignment data
+const AlignmentType_t NULL_DATA = 0;        //!< unknown alignment data type
+const AlignmentType_t NUCMER_DATA = 'N';    //!< nucmer alignment data
+const AlignmentType_t PROMER_DATA = 'P';    //!< promer alignment data
+
+typedef unsigned char Dir_t;                //!< directional type
+const Dir_t FORWARD_DIR = 0;                //!< forward direction
+const Dir_t REVERSE_DIR = 1;                //!< reverse direction
+
+
+
+//===================================================== DeltaAlignment_t =======
+struct DeltaAlignment_t
+//!< A single delta encoded alignment region
+{
+  long sR;    //!< start coordinate in the reference
+  long eR;    //!< end coordinate in the reference
+  long sQ;    //!< start coordinate in the reference
+  long eQ;    //!< end coordinate in the reference
+  long idyc;  //!< number of mismatches in the alignment
+  long simc;  //!< number of similarity scores < 1 in the alignment
+  long stpc;  //!< number of stop codons in the alignment
+
+  float idy;               //!< percent identity [0 - 100]
+  float sim;               //!< percent similarity [0 - 100]
+  float stp;               //!< percent stop codon [0 - 100]
+
+  std::vector<long> deltas;  //!< delta encoded alignment informaiton
+
+  DeltaAlignment_t ( )
+  {
+    clear ( );
+  }
+
+  void clear ( )
+  {
+    sR = eR = sQ = eQ = 0;
+    idy = sim = stp = 0;
+    deltas.clear ( );
+  }
+};
+
+
+
+//===================================================== DeltaRecord_t ==========
+struct DeltaRecord_t
+//!< A delta record representing the alignments between two sequences
+{
+  std::string idR;         //!< reference contig ID
+  std::string idQ;         //!< query contig ID
+  long lenR;  //!< length of the reference contig
+  long lenQ;  //!< length of the query contig
+
+  std::vector<DeltaAlignment_t> aligns; //!< alignments between the two seqs
+
+  DeltaRecord_t ( )
+  {
+    clear ( );
+  }
+
+  void clear ( )
+  {
+    idR.erase ( );
+    idQ.erase ( );
+    lenR = lenQ = 0;
+    aligns.clear ( );
+  }
+};
+
+
+
+//===================================================== DeltaReader_t ==========
+//! \brief Delta encoded file reader class
+//!
+//! Handles the input of delta encoded alignment information for various MUMmer
+//! utilities. Very basic functionality, can be expanded as necessary...
+//!
+//! \see DeltaRecord_t
+//==============================================================================
+class DeltaReader_t {
+
+private:
+
+  std::string delta_path_m;      //!< the name of the delta input file
+  std::ifstream delta_stream_m;  //!< the delta file input stream
+  std::string data_type_m;       //!< the type of alignment data
+  std::string reference_path_m;  //!< the name of the reference file
+  std::string query_path_m;      //!< the name of the query file
+  DeltaRecord_t record_m;        //!< the current delta information record
+  bool is_record_m;              //!< there is a valid record in record_m
+  bool is_open_m;                //!< delta stream is open
+
+
+  //--------------------------------------------------- readNextAlignment ------
+  //! \brief Reads in a delta encoded alignment
+  //!
+  //! \param align read info into this structure
+  //! \param read_deltas read delta information yes/no
+  //! \pre delta_stream is positioned at the beginning of an alignment header
+  //! \return void
+  //!
+  void readNextAlignment (DeltaAlignment_t & align, const bool read_deltas);
+
+
+  //--------------------------------------------------- readNextRecord ---------
+  //! \brief Reads in the next delta record from the delta file
+  //!
+  //! \param read_deltas read delta information yes/no
+  //! \pre delta file must be open
+  //! \return true on success, false on EOF
+  //!
+  bool readNextRecord (const bool read_deltas);
+
+
+  //--------------------------------------------------- checkStream ------------
+  //! \brief Check stream status and abort program if an error has occured
+  //!
+  //! \return void
+  //!
+  void checkStream ( )
+  {
+    if ( !delta_stream_m.good ( ) )
+      {
+	std::cerr << "ERROR: Could not parse delta file, "
+		  << delta_path_m << std::endl;
+
+
+        std::cerr << "error no: "
+                  << int(delta_stream_m.rdstate() & std::ifstream::failbit)
+                  << int(delta_stream_m.rdstate() & std::ifstream::badbit)
+                  << int(delta_stream_m.rdstate() & std::ifstream::eofbit)
+                  << std::endl;
+	exit (-1);
+      }
+  }
+
+
+public:
+  //--------------------------------------------------- DeltaReader_t ----------
+  //! \brief Default constructor
+  //!
+  //! \return void
+  //!
+  DeltaReader_t ( )
+  {
+    is_record_m = false;
+    is_open_m = false;
+  }
+
+
+  //--------------------------------------------------- ~DeltaReader_t ---------
+  //! \brief Default destructor
+  //!
+  //! \return void
+  //!
+  ~DeltaReader_t ( )
+  {
+    close ( );
+  }
+
+
+  //--------------------------------------------------- open -------------------
+  //! \brief Opens a delta file by path
+  //!
+  //! \param delta file to open
+  //! \return void
+  //!
+  void open (const std::string & delta_path);
+
+
+  //--------------------------------------------------- close ------------------
+  //! \brief Closes any open delta file and resets class members
+  //!
+  //! \return void
+  //!
+  void close ( )
+  {
+    delta_path_m.erase ( );
+    delta_stream_m.close ( );
+    data_type_m.erase ( );
+    reference_path_m.erase ( );
+    query_path_m.erase ( );
+    record_m.clear ( );
+    is_record_m = false;
+    is_open_m = false;
+  }
+
+
+  //--------------------------------------------------- readNext --------------
+  //! \brief Reads in the next delta record from the delta file
+  //!
+  //! \param read_deltas read delta information yes/no
+  //! \pre delta file must be open
+  //! \return true on success, false on EOF
+  //!
+  inline bool readNext (bool getdeltas = true)
+  {
+    return readNextRecord (getdeltas);
+  }
+
+
+
+  //--------------------------------------------------- readNextHeadersOnly ----
+  //! \brief Reads in the next delta record from the delta file
+  //!
+  //! Only reads the alignment header information, does not read in the delta
+  //! information and leaves each alignment structure's delta vector empty.
+  //!
+  //! \pre delta file must be open
+  //! \return true on success, false on EOF
+  //!
+  inline bool readNextHeadersOnly ( )
+  {
+    return readNextRecord (false);
+  }
+
+
+  //--------------------------------------------------- getRecord --------------
+  //! \brief Returns a reference to the current delta record
+  //!
+  //! \pre readNext( ) was successfully called in advance
+  //! \return true on success, false on failure or end of file
+  //!
+  const DeltaRecord_t & getRecord ( ) const
+  {
+    assert (is_record_m);
+    return record_m;
+  }
+
+
+  //--------------------------------------------------- getDeltaPath -----------
+  //! \brief Get the path of the current delta file
+  //!
+  //! \pre delta file is open
+  //! \return the path of the delta file
+  //!
+  const std::string & getDeltaPath ( ) const
+  {
+    assert (is_open_m);
+    return delta_path_m;
+  }
+
+
+  //--------------------------------------------------- getDataType ------------
+  //! \brief Get the data type of the current delta file
+  //!
+  //! \pre delta file is open
+  //! \return the data type of the current file
+  //!
+  const std::string & getDataType ( ) const
+  {
+    assert (is_open_m);
+    return data_type_m;
+  }
+
+
+  //--------------------------------------------------- getReferencePath -------
+  //! \brief Get the path of the MUMmer reference file
+  //!
+  //! \pre delta file is open
+  //! \return the path of the MUMmer reference file
+  //!
+  const std::string & getReferencePath ( ) const
+  {
+    assert (is_open_m);
+    return reference_path_m;
+  }
+
+
+  //--------------------------------------------------- getQueryPath -----------
+  //! \brief Get the path of the MUMmer query file
+  //!
+  //! \pre delta file is open
+  //! \return the path of the MUMmer query file
+  //!
+  const std::string & getQueryPath ( ) const
+  {
+    assert (is_open_m);
+    return query_path_m;
+  }
+};
+
+
+
+//===================================================== SNP_t ==================
+struct DeltaEdgelet_t;
+struct DeltaEdge_t;
+struct DeltaNode_t;
+struct SNP_t
+     //!< A single nuc/aa poly
+{
+  long buff;
+  char cQ, cR;
+  long pQ, pR;
+  int conQ, conR;
+  std::string ctxQ, ctxR;
+  DeltaEdgelet_t * lp;
+  DeltaEdge_t * ep;
+
+  SNP_t ( )
+  {
+    cQ = cR = 0;
+    buff = pQ = pR = 0;
+    conQ = conR = 0;
+  };
+};
+
+
+
+//===================================================== DeltaEdgelet_t =========
+struct DeltaEdgelet_t
+//!< A piece of a delta graph edge, a single alignment
+{
+  unsigned char isGOOD : 1;   //!< meets the requirements
+  unsigned char isQLIS : 1;   //!< is part of the query's LIS
+  unsigned char isRLIS : 1;   //!< is part of the reference's LIS
+  unsigned char isGLIS : 1;   //!< is part of the reference/query LIS
+  unsigned char dirR   : 1;   //!< reference match direction
+  unsigned char dirQ   : 1;   //!< query match direction
+
+  DeltaEdge_t * edge;
+  float idy, sim, stp;        //!< percent identity [0 - 1]
+  long idyc, simc, stpc;      //!< idy, sim, stp counts
+  long loQ, hiQ, loR, hiR;    //!< alignment bounds
+  int frmQ, frmR;             //!< reading frame
+
+  std::string delta;          //!< delta information
+  std::vector<SNP_t *> snps;  //!< snps for this edgelet
+
+  DeltaEdgelet_t ( )
+  {
+    edge = NULL;
+    isGOOD = true;
+    isQLIS = isRLIS = isGLIS = false;
+    dirR = dirQ = FORWARD_DIR;
+    idy = sim = stp = 0;
+    idyc = simc = stpc = 0;
+    loQ = hiQ = loR = hiR = 0;
+    frmQ = frmR = 1;
+  }
+
+  ~DeltaEdgelet_t ( )
+  {
+    std::vector<SNP_t *>::iterator i;
+    for ( i = snps . begin( ); i != snps . end( ); ++ i )
+      delete (*i);
+  }
+
+  bool isNegative() const
+  { return ( dirR != dirQ ); }
+
+  bool isPositive() const
+  { return ( dirR == dirQ ); }
+
+  int slope() const
+  { return ( dirR == dirQ ? +1 : -1 ); }
+
+  long loR2Q() const
+  { return ( isPositive() ? loQ : hiQ ); }
+
+  long hiR2Q() const
+  { return ( isPositive() ? hiQ : loQ ); }
+
+  long loQ2R() const
+  { return ( isPositive() ? loR : hiR ); }
+
+  long hiQ2R() const
+  { return ( isPositive() ? hiR : loR ); }
+};
+
+
+
+//===================================================== DeltaEdge_t ============
+struct DeltaEdge_t
+//!< A delta graph edge, alignments between a single reference and query
+{
+  DeltaNode_t * refnode;      //!< the adjacent reference node
+  DeltaNode_t * qrynode;      //!< the adjacent query node
+  std::vector<DeltaEdgelet_t *> edgelets;  //!< the set of individual alignments
+
+  DeltaEdge_t ( )
+  { refnode = qrynode = NULL; }
+
+  ~DeltaEdge_t ( )
+  {
+    std::vector<DeltaEdgelet_t *>::iterator i;
+    for ( i = edgelets . begin( ); i != edgelets . end( ); ++ i )
+      delete (*i);
+  }
+
+  void build (const DeltaRecord_t & rec);
+};
+
+
+//===================================================== DeltaNode_t ============
+struct DeltaNode_t
+//!< A delta graph node, contains the sequence information
+{
+  const std::string * id;             //!< the id of the sequence
+  char * seq;                         //!< the DNA sequence
+  long len;              //!< the length of the sequence
+  std::vector<DeltaEdge_t *> edges;   //!< the set of related edges
+
+  DeltaNode_t ( )
+  { id = NULL; seq = NULL; len = 0; }
+
+  ~DeltaNode_t ( )
+  { free (seq); } // DeltaGraph_t will take care of destructing the edges
+};
+
+
+
+//===================================================== DeltaGraph_t ===========
+//! \brief A graph of sequences (nodes) and their alignments (edges)
+//!
+//!  A bipartite graph with two partite sets, R and Q, where R is the set of
+//!  reference sequences and Q is the set of query sequences. These nodes are
+//!  named "DeltaNode_t". We connect a node in R to a node in Q if an alignment
+//!  is present between the two sequences. The group of all alignments between
+//!  the two is named "DeltaEdge_t" and a single alignment between the two is
+//!  named a "DeltaEdgelet_t". Alignment coordinates reference the forward
+//!  strand and are stored lo before hi.
+//!
+//==============================================================================
+class DeltaGraph_t
+{
+public:
+
+  std::map<std::string, DeltaNode_t> refnodes;
+  //!< the reference graph nodes, 1 per aligned sequence
+
+  std::map<std::string, DeltaNode_t> qrynodes;
+  //!< the query graph nodes, 1 per aligned sequence
+
+  std::string refpath;         //!< path of the reference FastA file
+  std::string qrypath;         //!< path of the query FastA file
+  AlignmentType_t datatype;    //!< alignment data type
+
+  DeltaGraph_t()
+  { datatype = NULL_DATA; }
+
+  ~DeltaGraph_t ( )
+  { clear(); }
+
+  void build(const std::string & deltapath, bool getdeltas = true);
+  void clean();
+  void clear();
+  long getNodeCount();
+  long getEdgeCount();
+  long getEdgeletCount();
+
+  void flagGOOD();
+  void flag1to1(float epsilon = -1, float maxolap = 100.0);
+  void flagMtoM(float epsilon = -1, float maxolap = 100.0);
+  void flagGLIS(float epsilon = -1);
+  void flagQLIS(float epsilon = -1,
+                float maxolap = 100.0,
+                bool flagBad = true);
+  void flagRLIS(float epsilon = -1,
+                float maxolap = 100.0,
+                bool flagbad = true);
+  void flagScore(long minlen, float minidy);
+  void flagUNIQ(float minuniq);
+
+  void loadSequences();
+  std::ostream & outputDelta(std::ostream & out);
+};
+
+#endif // #ifndef __DELTA_HH