jpayne@69: //////////////////////////////////////////////////////////////////////////////// jpayne@69: //! \file jpayne@69: //! \author Adam M Phillippy jpayne@69: //! \date 03/26/2003 jpayne@69: //! jpayne@69: //! \brief Class declarations for the handling of delta alignment files jpayne@69: //! jpayne@69: //! \see delta.cc jpayne@69: //////////////////////////////////////////////////////////////////////////////// jpayne@69: jpayne@69: #ifndef __DELTA_HH jpayne@69: #define __DELTA_HH jpayne@69: jpayne@69: #include "tigrinc.hh" jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: jpayne@69: jpayne@69: const std::string NUCMER_STRING = "NUCMER"; //!< nucmer id string jpayne@69: const std::string PROMER_STRING = "PROMER"; //!< promer id string jpayne@69: jpayne@69: typedef char AlignmentType_t; //!< type of alignment data jpayne@69: const AlignmentType_t NULL_DATA = 0; //!< unknown alignment data type jpayne@69: const AlignmentType_t NUCMER_DATA = 'N'; //!< nucmer alignment data jpayne@69: const AlignmentType_t PROMER_DATA = 'P'; //!< promer alignment data jpayne@69: jpayne@69: typedef unsigned char Dir_t; //!< directional type jpayne@69: const Dir_t FORWARD_DIR = 0; //!< forward direction jpayne@69: const Dir_t REVERSE_DIR = 1; //!< reverse direction jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaAlignment_t ======= jpayne@69: struct DeltaAlignment_t jpayne@69: //!< A single delta encoded alignment region jpayne@69: { jpayne@69: long sR; //!< start coordinate in the reference jpayne@69: long eR; //!< end coordinate in the reference jpayne@69: long sQ; //!< start coordinate in the reference jpayne@69: long eQ; //!< end coordinate in the reference jpayne@69: long idyc; //!< number of mismatches in the alignment jpayne@69: long simc; //!< number of similarity scores < 1 in the alignment jpayne@69: long stpc; //!< number of stop codons in the alignment jpayne@69: jpayne@69: float idy; //!< percent identity [0 - 100] jpayne@69: float sim; //!< percent similarity [0 - 100] jpayne@69: float stp; //!< percent stop codon [0 - 100] jpayne@69: jpayne@69: std::vector deltas; //!< delta encoded alignment informaiton jpayne@69: jpayne@69: DeltaAlignment_t ( ) jpayne@69: { jpayne@69: clear ( ); jpayne@69: } jpayne@69: jpayne@69: void clear ( ) jpayne@69: { jpayne@69: sR = eR = sQ = eQ = 0; jpayne@69: idy = sim = stp = 0; jpayne@69: deltas.clear ( ); jpayne@69: } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaRecord_t ========== jpayne@69: struct DeltaRecord_t jpayne@69: //!< A delta record representing the alignments between two sequences jpayne@69: { jpayne@69: std::string idR; //!< reference contig ID jpayne@69: std::string idQ; //!< query contig ID jpayne@69: long lenR; //!< length of the reference contig jpayne@69: long lenQ; //!< length of the query contig jpayne@69: jpayne@69: std::vector aligns; //!< alignments between the two seqs jpayne@69: jpayne@69: DeltaRecord_t ( ) jpayne@69: { jpayne@69: clear ( ); jpayne@69: } jpayne@69: jpayne@69: void clear ( ) jpayne@69: { jpayne@69: idR.erase ( ); jpayne@69: idQ.erase ( ); jpayne@69: lenR = lenQ = 0; jpayne@69: aligns.clear ( ); jpayne@69: } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaReader_t ========== jpayne@69: //! \brief Delta encoded file reader class jpayne@69: //! jpayne@69: //! Handles the input of delta encoded alignment information for various MUMmer jpayne@69: //! utilities. Very basic functionality, can be expanded as necessary... jpayne@69: //! jpayne@69: //! \see DeltaRecord_t jpayne@69: //============================================================================== jpayne@69: class DeltaReader_t { jpayne@69: jpayne@69: private: jpayne@69: jpayne@69: std::string delta_path_m; //!< the name of the delta input file jpayne@69: std::ifstream delta_stream_m; //!< the delta file input stream jpayne@69: std::string data_type_m; //!< the type of alignment data jpayne@69: std::string reference_path_m; //!< the name of the reference file jpayne@69: std::string query_path_m; //!< the name of the query file jpayne@69: DeltaRecord_t record_m; //!< the current delta information record jpayne@69: bool is_record_m; //!< there is a valid record in record_m jpayne@69: bool is_open_m; //!< delta stream is open jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- readNextAlignment ------ jpayne@69: //! \brief Reads in a delta encoded alignment jpayne@69: //! jpayne@69: //! \param align read info into this structure jpayne@69: //! \param read_deltas read delta information yes/no jpayne@69: //! \pre delta_stream is positioned at the beginning of an alignment header jpayne@69: //! \return void jpayne@69: //! jpayne@69: void readNextAlignment (DeltaAlignment_t & align, const bool read_deltas); jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- readNextRecord --------- jpayne@69: //! \brief Reads in the next delta record from the delta file jpayne@69: //! jpayne@69: //! \param read_deltas read delta information yes/no jpayne@69: //! \pre delta file must be open jpayne@69: //! \return true on success, false on EOF jpayne@69: //! jpayne@69: bool readNextRecord (const bool read_deltas); jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- checkStream ------------ jpayne@69: //! \brief Check stream status and abort program if an error has occured jpayne@69: //! jpayne@69: //! \return void jpayne@69: //! jpayne@69: void checkStream ( ) jpayne@69: { jpayne@69: if ( !delta_stream_m.good ( ) ) jpayne@69: { jpayne@69: std::cerr << "ERROR: Could not parse delta file, " jpayne@69: << delta_path_m << std::endl; jpayne@69: jpayne@69: jpayne@69: std::cerr << "error no: " jpayne@69: << int(delta_stream_m.rdstate() & std::ifstream::failbit) jpayne@69: << int(delta_stream_m.rdstate() & std::ifstream::badbit) jpayne@69: << int(delta_stream_m.rdstate() & std::ifstream::eofbit) jpayne@69: << std::endl; jpayne@69: exit (-1); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: public: jpayne@69: //--------------------------------------------------- DeltaReader_t ---------- jpayne@69: //! \brief Default constructor jpayne@69: //! jpayne@69: //! \return void jpayne@69: //! jpayne@69: DeltaReader_t ( ) jpayne@69: { jpayne@69: is_record_m = false; jpayne@69: is_open_m = false; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- ~DeltaReader_t --------- jpayne@69: //! \brief Default destructor jpayne@69: //! jpayne@69: //! \return void jpayne@69: //! jpayne@69: ~DeltaReader_t ( ) jpayne@69: { jpayne@69: close ( ); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- open ------------------- jpayne@69: //! \brief Opens a delta file by path jpayne@69: //! jpayne@69: //! \param delta file to open jpayne@69: //! \return void jpayne@69: //! jpayne@69: void open (const std::string & delta_path); jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- close ------------------ jpayne@69: //! \brief Closes any open delta file and resets class members jpayne@69: //! jpayne@69: //! \return void jpayne@69: //! jpayne@69: void close ( ) jpayne@69: { jpayne@69: delta_path_m.erase ( ); jpayne@69: delta_stream_m.close ( ); jpayne@69: data_type_m.erase ( ); jpayne@69: reference_path_m.erase ( ); jpayne@69: query_path_m.erase ( ); jpayne@69: record_m.clear ( ); jpayne@69: is_record_m = false; jpayne@69: is_open_m = false; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- readNext -------------- jpayne@69: //! \brief Reads in the next delta record from the delta file jpayne@69: //! jpayne@69: //! \param read_deltas read delta information yes/no jpayne@69: //! \pre delta file must be open jpayne@69: //! \return true on success, false on EOF jpayne@69: //! jpayne@69: inline bool readNext (bool getdeltas = true) jpayne@69: { jpayne@69: return readNextRecord (getdeltas); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- readNextHeadersOnly ---- jpayne@69: //! \brief Reads in the next delta record from the delta file jpayne@69: //! jpayne@69: //! Only reads the alignment header information, does not read in the delta jpayne@69: //! information and leaves each alignment structure's delta vector empty. jpayne@69: //! jpayne@69: //! \pre delta file must be open jpayne@69: //! \return true on success, false on EOF jpayne@69: //! jpayne@69: inline bool readNextHeadersOnly ( ) jpayne@69: { jpayne@69: return readNextRecord (false); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- getRecord -------------- jpayne@69: //! \brief Returns a reference to the current delta record jpayne@69: //! jpayne@69: //! \pre readNext( ) was successfully called in advance jpayne@69: //! \return true on success, false on failure or end of file jpayne@69: //! jpayne@69: const DeltaRecord_t & getRecord ( ) const jpayne@69: { jpayne@69: assert (is_record_m); jpayne@69: return record_m; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- getDeltaPath ----------- jpayne@69: //! \brief Get the path of the current delta file jpayne@69: //! jpayne@69: //! \pre delta file is open jpayne@69: //! \return the path of the delta file jpayne@69: //! jpayne@69: const std::string & getDeltaPath ( ) const jpayne@69: { jpayne@69: assert (is_open_m); jpayne@69: return delta_path_m; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- getDataType ------------ jpayne@69: //! \brief Get the data type of the current delta file jpayne@69: //! jpayne@69: //! \pre delta file is open jpayne@69: //! \return the data type of the current file jpayne@69: //! jpayne@69: const std::string & getDataType ( ) const jpayne@69: { jpayne@69: assert (is_open_m); jpayne@69: return data_type_m; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- getReferencePath ------- jpayne@69: //! \brief Get the path of the MUMmer reference file jpayne@69: //! jpayne@69: //! \pre delta file is open jpayne@69: //! \return the path of the MUMmer reference file jpayne@69: //! jpayne@69: const std::string & getReferencePath ( ) const jpayne@69: { jpayne@69: assert (is_open_m); jpayne@69: return reference_path_m; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //--------------------------------------------------- getQueryPath ----------- jpayne@69: //! \brief Get the path of the MUMmer query file jpayne@69: //! jpayne@69: //! \pre delta file is open jpayne@69: //! \return the path of the MUMmer query file jpayne@69: //! jpayne@69: const std::string & getQueryPath ( ) const jpayne@69: { jpayne@69: assert (is_open_m); jpayne@69: return query_path_m; jpayne@69: } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== SNP_t ================== jpayne@69: struct DeltaEdgelet_t; jpayne@69: struct DeltaEdge_t; jpayne@69: struct DeltaNode_t; jpayne@69: struct SNP_t jpayne@69: //!< A single nuc/aa poly jpayne@69: { jpayne@69: long buff; jpayne@69: char cQ, cR; jpayne@69: long pQ, pR; jpayne@69: int conQ, conR; jpayne@69: std::string ctxQ, ctxR; jpayne@69: DeltaEdgelet_t * lp; jpayne@69: DeltaEdge_t * ep; jpayne@69: jpayne@69: SNP_t ( ) jpayne@69: { jpayne@69: cQ = cR = 0; jpayne@69: buff = pQ = pR = 0; jpayne@69: conQ = conR = 0; jpayne@69: }; jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaEdgelet_t ========= jpayne@69: struct DeltaEdgelet_t jpayne@69: //!< A piece of a delta graph edge, a single alignment jpayne@69: { jpayne@69: unsigned char isGOOD : 1; //!< meets the requirements jpayne@69: unsigned char isQLIS : 1; //!< is part of the query's LIS jpayne@69: unsigned char isRLIS : 1; //!< is part of the reference's LIS jpayne@69: unsigned char isGLIS : 1; //!< is part of the reference/query LIS jpayne@69: unsigned char dirR : 1; //!< reference match direction jpayne@69: unsigned char dirQ : 1; //!< query match direction jpayne@69: jpayne@69: DeltaEdge_t * edge; jpayne@69: float idy, sim, stp; //!< percent identity [0 - 1] jpayne@69: long idyc, simc, stpc; //!< idy, sim, stp counts jpayne@69: long loQ, hiQ, loR, hiR; //!< alignment bounds jpayne@69: int frmQ, frmR; //!< reading frame jpayne@69: jpayne@69: std::string delta; //!< delta information jpayne@69: std::vector snps; //!< snps for this edgelet jpayne@69: jpayne@69: DeltaEdgelet_t ( ) jpayne@69: { jpayne@69: edge = NULL; jpayne@69: isGOOD = true; jpayne@69: isQLIS = isRLIS = isGLIS = false; jpayne@69: dirR = dirQ = FORWARD_DIR; jpayne@69: idy = sim = stp = 0; jpayne@69: idyc = simc = stpc = 0; jpayne@69: loQ = hiQ = loR = hiR = 0; jpayne@69: frmQ = frmR = 1; jpayne@69: } jpayne@69: jpayne@69: ~DeltaEdgelet_t ( ) jpayne@69: { jpayne@69: std::vector::iterator i; jpayne@69: for ( i = snps . begin( ); i != snps . end( ); ++ i ) jpayne@69: delete (*i); jpayne@69: } jpayne@69: jpayne@69: bool isNegative() const jpayne@69: { return ( dirR != dirQ ); } jpayne@69: jpayne@69: bool isPositive() const jpayne@69: { return ( dirR == dirQ ); } jpayne@69: jpayne@69: int slope() const jpayne@69: { return ( dirR == dirQ ? +1 : -1 ); } jpayne@69: jpayne@69: long loR2Q() const jpayne@69: { return ( isPositive() ? loQ : hiQ ); } jpayne@69: jpayne@69: long hiR2Q() const jpayne@69: { return ( isPositive() ? hiQ : loQ ); } jpayne@69: jpayne@69: long loQ2R() const jpayne@69: { return ( isPositive() ? loR : hiR ); } jpayne@69: jpayne@69: long hiQ2R() const jpayne@69: { return ( isPositive() ? hiR : loR ); } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaEdge_t ============ jpayne@69: struct DeltaEdge_t jpayne@69: //!< A delta graph edge, alignments between a single reference and query jpayne@69: { jpayne@69: DeltaNode_t * refnode; //!< the adjacent reference node jpayne@69: DeltaNode_t * qrynode; //!< the adjacent query node jpayne@69: std::vector edgelets; //!< the set of individual alignments jpayne@69: jpayne@69: DeltaEdge_t ( ) jpayne@69: { refnode = qrynode = NULL; } jpayne@69: jpayne@69: ~DeltaEdge_t ( ) jpayne@69: { jpayne@69: std::vector::iterator i; jpayne@69: for ( i = edgelets . begin( ); i != edgelets . end( ); ++ i ) jpayne@69: delete (*i); jpayne@69: } jpayne@69: jpayne@69: void build (const DeltaRecord_t & rec); jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaNode_t ============ jpayne@69: struct DeltaNode_t jpayne@69: //!< A delta graph node, contains the sequence information jpayne@69: { jpayne@69: const std::string * id; //!< the id of the sequence jpayne@69: char * seq; //!< the DNA sequence jpayne@69: long len; //!< the length of the sequence jpayne@69: std::vector edges; //!< the set of related edges jpayne@69: jpayne@69: DeltaNode_t ( ) jpayne@69: { id = NULL; seq = NULL; len = 0; } jpayne@69: jpayne@69: ~DeltaNode_t ( ) jpayne@69: { free (seq); } // DeltaGraph_t will take care of destructing the edges jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: //===================================================== DeltaGraph_t =========== jpayne@69: //! \brief A graph of sequences (nodes) and their alignments (edges) jpayne@69: //! jpayne@69: //! A bipartite graph with two partite sets, R and Q, where R is the set of jpayne@69: //! reference sequences and Q is the set of query sequences. These nodes are jpayne@69: //! named "DeltaNode_t". We connect a node in R to a node in Q if an alignment jpayne@69: //! is present between the two sequences. The group of all alignments between jpayne@69: //! the two is named "DeltaEdge_t" and a single alignment between the two is jpayne@69: //! named a "DeltaEdgelet_t". Alignment coordinates reference the forward jpayne@69: //! strand and are stored lo before hi. jpayne@69: //! jpayne@69: //============================================================================== jpayne@69: class DeltaGraph_t jpayne@69: { jpayne@69: public: jpayne@69: jpayne@69: std::map refnodes; jpayne@69: //!< the reference graph nodes, 1 per aligned sequence jpayne@69: jpayne@69: std::map qrynodes; jpayne@69: //!< the query graph nodes, 1 per aligned sequence jpayne@69: jpayne@69: std::string refpath; //!< path of the reference FastA file jpayne@69: std::string qrypath; //!< path of the query FastA file jpayne@69: AlignmentType_t datatype; //!< alignment data type jpayne@69: jpayne@69: DeltaGraph_t() jpayne@69: { datatype = NULL_DATA; } jpayne@69: jpayne@69: ~DeltaGraph_t ( ) jpayne@69: { clear(); } jpayne@69: jpayne@69: void build(const std::string & deltapath, bool getdeltas = true); jpayne@69: void clean(); jpayne@69: void clear(); jpayne@69: long getNodeCount(); jpayne@69: long getEdgeCount(); jpayne@69: long getEdgeletCount(); jpayne@69: jpayne@69: void flagGOOD(); jpayne@69: void flag1to1(float epsilon = -1, float maxolap = 100.0); jpayne@69: void flagMtoM(float epsilon = -1, float maxolap = 100.0); jpayne@69: void flagGLIS(float epsilon = -1); jpayne@69: void flagQLIS(float epsilon = -1, jpayne@69: float maxolap = 100.0, jpayne@69: bool flagBad = true); jpayne@69: void flagRLIS(float epsilon = -1, jpayne@69: float maxolap = 100.0, jpayne@69: bool flagbad = true); jpayne@69: void flagScore(long minlen, float minidy); jpayne@69: void flagUNIQ(float minuniq); jpayne@69: jpayne@69: void loadSequences(); jpayne@69: std::ostream & outputDelta(std::ostream & out); jpayne@69: }; jpayne@69: jpayne@69: #endif // #ifndef __DELTA_HH