jpayne@69: //------------------------------------------------------------------------------ jpayne@69: // Programmer: Adam M Phillippy, The Institute for Genomic Research jpayne@69: // File: postpro.cc jpayne@69: // Date: 08 / 20 / 2002 jpayne@69: // jpayne@69: // Purpose: To translate the coordinates referencing the concatenated jpayne@69: // reference sequences back to the individual sequences and deal jpayne@69: // with any conflict that may have arisen (i.e. a MUM that spans jpayne@69: // the boundry between two sequences). Then to extend each cluster jpayne@69: // via Smith-Waterman techniques to expand the alignment coverage. jpayne@69: // Alignments which encounter each other will be fused to form one jpayne@69: // encompasing alignment when appropriate. jpayne@69: // jpayne@69: // Input: Input is the output of the .mgaps program from stdin. On the jpayne@69: // command line, the file names of the two original sequence files jpayne@69: // should come first, followed by the prefix that should be jpayne@69: // placed in front of the two output filenames .cluster and jpayne@69: // .delta jpayne@69: // jpayne@69: // NOTE: Cluster file is now suppressed by default (see -d option). jpayne@69: // jpayne@69: // Output: Output is to two output files, .cluster and .delta. jpayne@69: // .cluster lists MUM clusters as identified by "mgaps". jpayne@69: // However, the clusters now reference their corresponding jpayne@69: // sequence and are all listed under headers that specify this jpayne@69: // sequence. In addition, the coordinates now reference the original jpayne@69: // DNA input and not the amino acid translations. The .delta file jpayne@69: // is the alignment object that contains all the information necessary jpayne@69: // to reproduce the alignments generated by the MUM extension process. jpayne@69: // The coordinates in this file reference the original DNA input, however jpayne@69: // the delta values represent amino acids, so 1 delta = 3 nucleotides. jpayne@69: // Please refer to the output file documentation for an in-depth jpayne@69: // description of these file formats. jpayne@69: // jpayne@69: // Usage: postpro < jpayne@69: // Where and are the original input sequences of jpayne@69: // PROmer and is the prefix that should be added to the jpayne@69: // beginning of the .cluster and .delta output filenames. jpayne@69: // jpayne@69: //------------------------------------------------------------------------------ jpayne@69: jpayne@69: //-- NOTE: this option will significantly hamper program performance, jpayne@69: // mostly the alignment extension performance (sw_align.h) jpayne@69: //#define _DEBUG_ASSERT // self testing assert functions jpayne@69: jpayne@69: #include "tigrinc.hh" jpayne@69: #include "sw_align.hh" jpayne@69: #include "translate.hh" jpayne@69: #include jpayne@69: #include jpayne@69: using namespace std; jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------ Globals -------------// jpayne@69: bool DO_DELTA = true; jpayne@69: bool DO_EXTEND = true; jpayne@69: bool TO_SEQEND = false; jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------ Type Definitions ----// jpayne@69: enum LineType jpayne@69: //-- The type of input line from jpayne@69: { jpayne@69: NO_LINE, HEADER_LINE, MATCH_LINE jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct FastaRecord jpayne@69: //-- The essential data of a sequence jpayne@69: { jpayne@69: char * Id; // the fasta ID header tag jpayne@69: long int len; // the length of the sequence jpayne@69: char * seq; // the sequence data jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct Match jpayne@69: //-- An exact match between two sequences A and B jpayne@69: { jpayne@69: long int sA, sB, len; // start coordinate in A, in B and the length jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct Cluster jpayne@69: //-- An ordered list of matches between two sequences A and B jpayne@69: { jpayne@69: bool wasFused; // have the cluster matches been extended yet? jpayne@69: int frameA; // the reference sequence frame (1-6) jpayne@69: int frameB; // the query sequence frame jpayne@69: vector matches; // the ordered set of matches in the cluster jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct Synteny jpayne@69: //-- An ordered list of clusters between two sequences A and B jpayne@69: { jpayne@69: FastaRecord * AfP; // a pointer to the reference sequence record jpayne@69: FastaRecord Bf; // the query sequence record (w/o the sequence) jpayne@69: vector clusters; // the ordered set of clusters between A and B jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct Alignment jpayne@69: //-- An alignment object between two sequences A and B jpayne@69: { jpayne@69: int frameA; // the reference sequence frame (1-6) jpayne@69: int frameB; // the query sequence frame jpayne@69: long int sA, sB, eA, eB; // the start in A, B and the end in A, B jpayne@69: vector delta; // the delta values, with NO zero at the end jpayne@69: long int deltaApos; // sum of abs(deltas) - #of negative deltas jpayne@69: // trust me, it is a very helpful value jpayne@69: long int Errors, SimErrors, NonAlphas; // errors, similarity errors, nonalphas jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct AscendingClusterSort jpayne@69: //-- For sorting clusters in ascending order of their sA coordinate jpayne@69: { jpayne@69: bool operator() (const Cluster & pA, const Cluster & pB) jpayne@69: { jpayne@69: return ( pA.matches.begin( )->sA < pB.matches.begin( )->sA ); jpayne@69: } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------- Function Declarations ----// jpayne@69: void addNewAlignment jpayne@69: (vector & Alignments, vector::iterator Cp, jpayne@69: vector::iterator Mp); jpayne@69: jpayne@69: bool extendBackward jpayne@69: (vector & Alignments, vector::iterator CurrAp, jpayne@69: vector::iterator TargetAp, const char * A, const char * B); jpayne@69: jpayne@69: bool extendForward jpayne@69: (vector::iterator Ap, const char * A, long int targetA, jpayne@69: const char * B, long int targetB, unsigned int m_o); jpayne@69: jpayne@69: void extendClusters jpayne@69: (vector & Clusters, jpayne@69: const FastaRecord * A, const FastaRecord * B, FILE * DeltaFile); jpayne@69: jpayne@69: void flushAlignments jpayne@69: (vector & Alignments, jpayne@69: const FastaRecord * Af, const FastaRecord * Bf, jpayne@69: FILE * DeltaFile); jpayne@69: jpayne@69: void flushSyntenys jpayne@69: (vector & Syntenys, FILE * ClusterFile); jpayne@69: jpayne@69: vector::iterator getForwardTargetCluster jpayne@69: (vector & Clusters, vector::iterator CurrCp, jpayne@69: long int & targetA, long int & targetB); jpayne@69: jpayne@69: vector::iterator getReverseTargetAlignment jpayne@69: (vector & Alignments, vector::iterator CurrAp); jpayne@69: jpayne@69: bool isShadowedCluster jpayne@69: (vector::iterator CurrCp, jpayne@69: vector & Alignments, vector::iterator Ap); jpayne@69: jpayne@69: void parseAbort jpayne@69: (const char *); jpayne@69: jpayne@69: void parseDelta jpayne@69: (vector & Alignments, jpayne@69: const FastaRecord * Af, const FastaRecord *Bf); jpayne@69: jpayne@69: void processSyntenys jpayne@69: (vector & Syntenys, jpayne@69: FastaRecord * Af, long int As, jpayne@69: FILE * QryFile, FILE * ClusterFile, FILE * DeltaFile); jpayne@69: jpayne@69: inline long int transC jpayne@69: (long int Coord, int Frame, long int Len); jpayne@69: jpayne@69: inline long int refLen jpayne@69: (long int ntLen); jpayne@69: jpayne@69: inline long int revC jpayne@69: (long int Coord, long int Len); jpayne@69: jpayne@69: void printHelp jpayne@69: (const char *); jpayne@69: jpayne@69: void printUsage jpayne@69: (const char *); jpayne@69: jpayne@69: void validateData jpayne@69: (vector Alignments, vector Clusters, jpayne@69: const FastaRecord * Af, const FastaRecord * Bf); jpayne@69: jpayne@69: jpayne@69: jpayne@69: //-------------------------------------------------- Function Definitions ----// jpayne@69: int main jpayne@69: (int argc, char ** argv) jpayne@69: { jpayne@69: FastaRecord * Af; // array of all the reference sequences jpayne@69: jpayne@69: vector Syntenys; // vector of all sets of clusters jpayne@69: vector::reverse_iterator CurrSp; // current set of clusters jpayne@69: vector::reverse_iterator Sp; // temporary synteny pointer jpayne@69: jpayne@69: Synteny Asyn; // a single set of clusters jpayne@69: Cluster Aclu; // a single cluster of matches jpayne@69: Match Amat; // a single match jpayne@69: jpayne@69: LineType PrevLine; // the current input line jpayne@69: jpayne@69: bool Found; // temporary search flag jpayne@69: jpayne@69: char Line [MAX_LINE]; // a single line of input jpayne@69: char CurrIdB [MAX_LINE], IdA [MAX_LINE], IdB [MAX_LINE]; // fasta ID headers jpayne@69: char ClusterFileName [MAX_LINE], DeltaFileName [MAX_LINE]; // file names jpayne@69: char RefFileName [MAX_LINE], QryFileName [MAX_LINE]; jpayne@69: jpayne@69: int FrameA, FrameB; // reading frames (1-6) jpayne@69: int CurrFrameA, CurrFrameB; jpayne@69: jpayne@69: long int i; // temporary counter jpayne@69: long int Seqi; // current reference sequence index jpayne@69: long int InitSize; // initial sequence memory space jpayne@69: long int As = 0; // the size of the reference array jpayne@69: long int Ac = 100; // the capacity of the reference array jpayne@69: long int sA, sB, len; // current match start in A, B and length jpayne@69: jpayne@69: FILE * RefFile, * QryFile; // reference and query input files jpayne@69: FILE * ClusterFile, * DeltaFile; // cluster and delta output files jpayne@69: jpayne@69: //-- Set the alignment data type and break length (sw_align.h) jpayne@69: setMatrixType ( BLOSUM62 ); jpayne@69: setBreakLen ( 60 ); jpayne@69: jpayne@69: //-- Parse the command line arguments jpayne@69: { jpayne@69: optarg = NULL; jpayne@69: int ch, errflg = 0; jpayne@69: while ( !errflg && ((ch = getopt (argc, argv, "dehb:tx:")) != EOF) ) jpayne@69: switch (ch) jpayne@69: { jpayne@69: case 'b' : jpayne@69: setBreakLen (atoi (optarg)); jpayne@69: break; jpayne@69: jpayne@69: case 'd' : jpayne@69: DO_DELTA = false; jpayne@69: break; jpayne@69: jpayne@69: case 'e' : jpayne@69: DO_EXTEND = false; jpayne@69: break; jpayne@69: jpayne@69: case 'h' : jpayne@69: printHelp (argv[0]); jpayne@69: exit (EXIT_SUCCESS); jpayne@69: break; jpayne@69: jpayne@69: case 't' : jpayne@69: TO_SEQEND = true; jpayne@69: break; jpayne@69: jpayne@69: case 'x' : jpayne@69: setMatrixType ( atoi (optarg) ); jpayne@69: if ( getMatrixType( ) == NUCLEOTIDE ) jpayne@69: { jpayne@69: fprintf (stderr, "WARNING: invalid matrix type %d, ignoring\n", jpayne@69: getMatrixType( )); jpayne@69: setMatrixType ( BLOSUM62 ); jpayne@69: } jpayne@69: break; jpayne@69: jpayne@69: default : jpayne@69: errflg ++; jpayne@69: } jpayne@69: jpayne@69: if ( errflg > 0 || argc - optind != 3 ) jpayne@69: { jpayne@69: printUsage (argv[0]); jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- Read and create the I/O file names jpayne@69: strcpy (RefFileName, argv[optind ++]); jpayne@69: strcpy (QryFileName, argv[optind ++]); jpayne@69: strcpy (ClusterFileName, argv[optind ++]); jpayne@69: strcpy (DeltaFileName, ClusterFileName); jpayne@69: strcat (ClusterFileName, ".cluster"); jpayne@69: strcat (DeltaFileName, ".delta"); jpayne@69: jpayne@69: //-- Open all the files jpayne@69: RefFile = File_Open (RefFileName, "r"); jpayne@69: QryFile = File_Open (QryFileName, "r"); jpayne@69: if ( DO_DELTA ) jpayne@69: { jpayne@69: ClusterFile = NULL; jpayne@69: DeltaFile = File_Open (DeltaFileName, "w"); jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: ClusterFile = File_Open (ClusterFileName, "w"); jpayne@69: DeltaFile = NULL; jpayne@69: } jpayne@69: jpayne@69: //-- Print the headers of the output files jpayne@69: if ( DO_DELTA ) jpayne@69: fprintf (DeltaFile, "%s %s\nPROMER\n", RefFileName, QryFileName); jpayne@69: else jpayne@69: fprintf (ClusterFile, "%s %s\nPROMER\n", RefFileName, QryFileName); jpayne@69: jpayne@69: //-- Generate the array of the reference sequences jpayne@69: InitSize = INIT_SIZE; jpayne@69: Af = (FastaRecord *) Safe_malloc ( sizeof(FastaRecord) * Ac ); jpayne@69: Af[As].seq = (char *) Safe_malloc ( sizeof(char) * InitSize ); jpayne@69: while ( Read_String (RefFile, Af[As].seq, InitSize, IdA, FALSE) ) jpayne@69: { jpayne@69: Af[As].Id = (char *) Safe_malloc (sizeof(char) * (strlen(IdA) + 1)); jpayne@69: strcpy (Af[As].Id, IdA); jpayne@69: jpayne@69: Af[As].len = strlen (Af[As].seq + 1); jpayne@69: jpayne@69: if ( ++ As >= Ac ) jpayne@69: { jpayne@69: Ac *= 2; jpayne@69: Af = (FastaRecord *) Safe_realloc ( Af, sizeof(FastaRecord) * Ac ); jpayne@69: } jpayne@69: jpayne@69: InitSize = INIT_SIZE; jpayne@69: Af[As].seq = (char *) Safe_malloc ( sizeof(char) * InitSize ); jpayne@69: } jpayne@69: fclose (RefFile); jpayne@69: if ( As <= 0 ) jpayne@69: parseAbort ( RefFileName ); jpayne@69: jpayne@69: jpayne@69: jpayne@69: //-- Process the input from line by line jpayne@69: PrevLine = NO_LINE; jpayne@69: IdA[0] = '\0'; jpayne@69: IdB[0] = '\0'; jpayne@69: FrameA = 0; jpayne@69: FrameB = 0; jpayne@69: CurrFrameA = -1; jpayne@69: CurrFrameB = -1; jpayne@69: CurrIdB[0] = '\0'; jpayne@69: while ( fgets(Line, MAX_LINE, stdin) != NULL ) jpayne@69: { jpayne@69: jpayne@69: //-- If the current line is a fasta HEADER_LINE jpayne@69: if ( Line[0] == '>' ) jpayne@69: { jpayne@69: if ( sscanf (Line + 1, "%s", CurrIdB) != 1 ) jpayne@69: parseAbort ("stdin"); jpayne@69: jpayne@69: sscanf (CurrIdB + strlen(CurrIdB) - 1, "%d", &CurrFrameB); jpayne@69: CurrIdB [strlen(CurrIdB) - 2] = '\0'; jpayne@69: jpayne@69: PrevLine = HEADER_LINE; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //-- If the current line is a cluster HEADER_LINE jpayne@69: else if ( Line[0] == '#' ) jpayne@69: { jpayne@69: PrevLine = HEADER_LINE; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //-- If the current line is a MATCH_LINE jpayne@69: else jpayne@69: { jpayne@69: if ( sscanf (Line, "%ld %ld %ld", &sA, &sB, &len) != 3 ) jpayne@69: parseAbort ("stdin"); jpayne@69: jpayne@69: //-- Re-map the reference coordinate back to its original sequence jpayne@69: // NOTE: (len + 1) * 2 = refLen(len) = concatenated frame length jpayne@69: // including the appended 'X' on each frame translation jpayne@69: for ( Seqi = 0; sA > refLen (Af[Seqi].len); Seqi ++ ) jpayne@69: sA -= refLen (Af[Seqi].len); jpayne@69: jpayne@69: assert ( Seqi < As ); jpayne@69: jpayne@69: //-- Get the correct frame, translate startA coordinate to frame jpayne@69: for ( i = 0; sA > (Af[Seqi].len - (i % 3)) / 3 + 1; i ++ ) jpayne@69: sA -= (Af[Seqi].len - (i % 3)) / 3 + 1; jpayne@69: CurrFrameA = i + 1; jpayne@69: assert ( CurrFrameA > 0 && CurrFrameA < 7 ); jpayne@69: jpayne@69: //-- If the match spans across a frame boundry jpayne@69: if ( CurrFrameA < 1 || CurrFrameA > 6 || jpayne@69: sA + len - 1 > (Af[Seqi].len - ((CurrFrameA - 1) % 3)) / 3 + 1 || jpayne@69: sA <= 0 ) jpayne@69: { jpayne@69: fprintf (stderr, jpayne@69: "\nWARNING: A MUM was found extending beyond the boundry of:\n" jpayne@69: " Reference sequence '>%s', frame %d\n" jpayne@69: "Please file a bug report\n" jpayne@69: "Attempting to continue.\n", Af[Seqi].Id, CurrFrameA); jpayne@69: continue; jpayne@69: } jpayne@69: jpayne@69: //-- If the match spans across a sequence boundry jpayne@69: if ( sA + len - 1 > refLen (Af[Seqi].len) || sA <= 0 ) jpayne@69: { jpayne@69: fprintf (stderr, jpayne@69: "\nWARNING: A MUM was found extending beyond the boundry of:\n" jpayne@69: " Reference sequence '>%s'\n" jpayne@69: "Please file a bug report\n" jpayne@69: "Attempting to continue.\n", Af[Seqi].Id); jpayne@69: continue; jpayne@69: } jpayne@69: jpayne@69: //-- Check and update the current synteny region jpayne@69: if ( strcmp (IdA, Af[Seqi].Id) != 0 || strcmp (IdB, CurrIdB) != 0 || jpayne@69: CurrFrameA != FrameA || CurrFrameB != FrameB ) jpayne@69: { jpayne@69: Found = false; jpayne@69: if ( strcmp (IdB, CurrIdB) == 0 ) jpayne@69: { jpayne@69: //-- Has this header been seen before? jpayne@69: for ( Sp = Syntenys.rbegin( ); Sp < Syntenys.rend( ); Sp ++ ) jpayne@69: { jpayne@69: if ( strcmp (Sp->AfP->Id, Af[Seqi].Id) == 0 ) jpayne@69: { jpayne@69: assert ( Sp->AfP->len == Af[Seqi].len ); jpayne@69: assert ( strcmp (Sp->Bf.Id, IdB) == 0 ); jpayne@69: CurrSp = Sp; jpayne@69: Found = true; jpayne@69: break; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: //-- New B sequence header, process all the old synteny's jpayne@69: processSyntenys (Syntenys, Af, As, jpayne@69: QryFile, ClusterFile, DeltaFile); jpayne@69: jpayne@69: } jpayne@69: jpayne@69: strcpy (IdA, Af[Seqi].Id); jpayne@69: strcpy (IdB, CurrIdB); jpayne@69: FrameA = CurrFrameA; jpayne@69: FrameB = CurrFrameB; jpayne@69: jpayne@69: //-- If not seen yet, create a new synteny region jpayne@69: if ( ! Found ) jpayne@69: { jpayne@69: Asyn.AfP = Af + Seqi; jpayne@69: Asyn.Bf.len = -1; jpayne@69: Asyn.Bf.Id = (char *) Safe_malloc jpayne@69: (sizeof(char) * (strlen(IdB) + 1)); jpayne@69: strcpy (Asyn.Bf.Id, IdB); jpayne@69: jpayne@69: Syntenys.push_back (Asyn); jpayne@69: CurrSp = Syntenys.rbegin( ); jpayne@69: } jpayne@69: jpayne@69: //-- Add a new cluster to the current synteny jpayne@69: if ( !Syntenys.empty( ) && !CurrSp->clusters.empty( ) ) jpayne@69: if ( CurrSp->clusters.rbegin( )->matches.empty( ) ) jpayne@69: CurrSp->clusters.pop_back( ); // hack to remove empties jpayne@69: Aclu.frameA = FrameA; jpayne@69: Aclu.frameB = FrameB; jpayne@69: Aclu.wasFused = false; jpayne@69: CurrSp->clusters.push_back (Aclu); jpayne@69: } jpayne@69: else if ( PrevLine == HEADER_LINE ) jpayne@69: { jpayne@69: //-- Add a new cluster to the current synteny jpayne@69: if ( !Syntenys.empty( ) && !CurrSp->clusters.empty( ) ) jpayne@69: if ( CurrSp->clusters.rbegin( )->matches.empty( ) ) jpayne@69: CurrSp->clusters.pop_back( ); jpayne@69: Aclu.frameA = FrameA; jpayne@69: Aclu.frameB = FrameB; jpayne@69: Aclu.wasFused = false; jpayne@69: CurrSp->clusters.push_back (Aclu); jpayne@69: } jpayne@69: jpayne@69: //-- Add a new match to the current cluster jpayne@69: // NOTE: A and B coordinates still reference the appropriate jpayne@69: // amino acid sequence frame, not the DNA (same with len) jpayne@69: if ( len > 1 ) jpayne@69: { jpayne@69: Amat.sA = sA; jpayne@69: Amat.sB = sB; jpayne@69: Amat.len = len; jpayne@69: CurrSp->clusters.rbegin( )->matches.push_back (Amat); jpayne@69: } jpayne@69: jpayne@69: PrevLine = MATCH_LINE; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- Process the left-over syntenys jpayne@69: if ( !Syntenys.empty( ) && !CurrSp->clusters.empty( ) ) jpayne@69: if ( CurrSp->clusters.rbegin( )->matches.empty( ) ) jpayne@69: CurrSp->clusters.pop_back( ); jpayne@69: processSyntenys (Syntenys, Af, As, QryFile, ClusterFile, DeltaFile); jpayne@69: fclose (QryFile); jpayne@69: jpayne@69: //-- Free the reference sequences jpayne@69: for ( i = 0; i < As; i ++ ) jpayne@69: { jpayne@69: free (Af[i].Id); jpayne@69: free (Af[i].seq); jpayne@69: } jpayne@69: free (Af); jpayne@69: jpayne@69: return EXIT_SUCCESS; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void addNewAlignment jpayne@69: (vector & Alignments, vector::iterator Cp, jpayne@69: vector::iterator Mp) jpayne@69: jpayne@69: // Create and add a new alignment object based on the current match jpayne@69: // and cluster information pointed to by Cp and Mp. jpayne@69: jpayne@69: { jpayne@69: Alignment Align; jpayne@69: jpayne@69: //-- Initialize the data jpayne@69: Align.sA = Mp->sA; jpayne@69: Align.sB = Mp->sB; jpayne@69: Align.eA = Mp->sA + Mp->len - 1; jpayne@69: Align.eB = Mp->sB + Mp->len - 1; jpayne@69: Align.frameA = Cp->frameA; jpayne@69: Align.frameB = Cp->frameB; jpayne@69: Align.delta.clear( ); jpayne@69: Align.deltaApos = 0; jpayne@69: jpayne@69: //-- Add the alignment object jpayne@69: Alignments.push_back (Align); jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: bool extendBackward jpayne@69: (vector & Alignments, vector::iterator CurrAp, jpayne@69: vector::iterator TargetAp, const char * A, const char * B) jpayne@69: jpayne@69: // Extend an alignment backwards off of the current alignment object. jpayne@69: // The current alignment object must be freshly created and consist jpayne@69: // only of an exact match (i.e. the delta vector MUST be empty). jpayne@69: // If the TargetAp alignment object is reached by the extension, it will jpayne@69: // be merged with CurrAp and CurrAp will be destroyed. If TargetAp is jpayne@69: // NULL the function will extend as far as possible. It is a strange jpayne@69: // and dangerous function because it can delete CurrAp, so edit with jpayne@69: // caution. Returns true if TargetAp was reached and merged, else false jpayne@69: // Designed only as a subroutine for extendClusters, should be used jpayne@69: // nowhere else. jpayne@69: jpayne@69: { jpayne@69: bool target_reached = false; jpayne@69: bool overflow_flag = false; jpayne@69: bool double_flag = false; jpayne@69: jpayne@69: vector::iterator Dp; jpayne@69: jpayne@69: unsigned int m_o; jpayne@69: long int targetA, targetB; jpayne@69: jpayne@69: m_o = BACKWARD_SEARCH; jpayne@69: jpayne@69: //-- Set the target coordinates jpayne@69: if ( TargetAp != Alignments.end( ) ) jpayne@69: { jpayne@69: targetA = TargetAp->eA; jpayne@69: targetB = TargetAp->eB; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: targetA = 1; jpayne@69: targetB = 1; jpayne@69: m_o |= OPTIMAL_BIT; jpayne@69: } jpayne@69: jpayne@69: //-- If alignment is too long, bring with bounds and set overflow_flag true jpayne@69: if ( CurrAp->sA - targetA + 1 > MAX_ALIGNMENT_LENGTH ) jpayne@69: { jpayne@69: targetA = CurrAp->sA - MAX_ALIGNMENT_LENGTH + 1; jpayne@69: overflow_flag = true; jpayne@69: m_o |= OPTIMAL_BIT; jpayne@69: } jpayne@69: if ( CurrAp->sB - targetB + 1 > MAX_ALIGNMENT_LENGTH ) jpayne@69: { jpayne@69: targetB = CurrAp->sB - MAX_ALIGNMENT_LENGTH + 1; jpayne@69: if ( overflow_flag ) jpayne@69: double_flag = true; jpayne@69: else jpayne@69: overflow_flag = true; jpayne@69: m_o |= OPTIMAL_BIT; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: if ( TO_SEQEND && !double_flag ) jpayne@69: m_o |= SEQEND_BIT; jpayne@69: jpayne@69: jpayne@69: //-- Attempt to reach the target jpayne@69: target_reached = alignSearch (A, CurrAp->sA, targetA, jpayne@69: B, CurrAp->sB, targetB, m_o); jpayne@69: jpayne@69: if ( overflow_flag || TargetAp == Alignments.end( ) ) jpayne@69: target_reached = false; jpayne@69: jpayne@69: if ( target_reached ) jpayne@69: { jpayne@69: //-- assert that CurrAp is new and at the end of the Alignment vector jpayne@69: assert ( CurrAp->delta.empty( ) ); jpayne@69: assert ( CurrAp == Alignments.end( ) - 1 ); jpayne@69: jpayne@69: //-- Merge the two alignment objects jpayne@69: extendForward (TargetAp, A, CurrAp->sA, jpayne@69: B, CurrAp->sB, FORCED_FORWARD_ALIGN); jpayne@69: TargetAp->eA = CurrAp->eA; jpayne@69: TargetAp->eB = CurrAp->eB; jpayne@69: Alignments.pop_back( ); jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: alignTarget (A, targetA, CurrAp->sA, jpayne@69: B, targetB, CurrAp->sB, jpayne@69: CurrAp->delta, FORCED_FORWARD_ALIGN); jpayne@69: CurrAp->sA = targetA; jpayne@69: CurrAp->sB = targetB; jpayne@69: jpayne@69: //-- Update the deltaApos value for the alignment object jpayne@69: for ( Dp = CurrAp->delta.begin( ); Dp < CurrAp->delta.end( ); Dp ++ ) jpayne@69: CurrAp->deltaApos += *Dp > 0 ? *Dp : labs(*Dp) - 1; jpayne@69: } jpayne@69: jpayne@69: return target_reached; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: bool extendForward jpayne@69: (vector::iterator CurrAp, const char * A, long int targetA, jpayne@69: const char * B, long int targetB, unsigned int m_o) jpayne@69: jpayne@69: // Extend an alignment forwards off the current alignment object until jpayne@69: // target or end of sequence is reached, and merge the delta values of the jpayne@69: // alignment object with the new delta values generated by the extension. jpayne@69: // Return true if the target was reached, else false jpayne@69: jpayne@69: { jpayne@69: long int ValA; jpayne@69: long int ValB; jpayne@69: unsigned int Di; jpayne@69: bool target_reached; jpayne@69: bool overflow_flag = false; jpayne@69: bool double_flag = false; jpayne@69: vector::iterator Dp; jpayne@69: jpayne@69: //-- Set Di to the end of the delta vector jpayne@69: Di = CurrAp->delta.size( ); jpayne@69: jpayne@69: //-- Calculate the distance between the start and end positions jpayne@69: ValA = targetA - CurrAp->eA + 1; jpayne@69: ValB = targetB - CurrAp->eB + 1; jpayne@69: jpayne@69: //-- If the distance is too long, shrink it and set the overflow_flag jpayne@69: if ( ValA > MAX_ALIGNMENT_LENGTH ) jpayne@69: { jpayne@69: targetA = CurrAp->eA + MAX_ALIGNMENT_LENGTH - 1; jpayne@69: overflow_flag = true; jpayne@69: m_o |= OPTIMAL_BIT; jpayne@69: } jpayne@69: if ( ValB > MAX_ALIGNMENT_LENGTH ) jpayne@69: { jpayne@69: targetB = CurrAp->eB + MAX_ALIGNMENT_LENGTH - 1; jpayne@69: if ( overflow_flag ) jpayne@69: double_flag = true; jpayne@69: else jpayne@69: overflow_flag = true; jpayne@69: m_o |= OPTIMAL_BIT; jpayne@69: } jpayne@69: jpayne@69: if ( double_flag ) jpayne@69: m_o &= ~SEQEND_BIT; jpayne@69: jpayne@69: target_reached = alignTarget (A, CurrAp->eA, targetA, jpayne@69: B, CurrAp->eB, targetB, jpayne@69: CurrAp->delta, m_o); jpayne@69: jpayne@69: //-- Notify user if alignment was chopped short jpayne@69: if ( target_reached && overflow_flag ) jpayne@69: target_reached = false; jpayne@69: jpayne@69: //-- Pick up delta where left off (Di) and merge with new deltas jpayne@69: if ( Di < CurrAp->delta.size( ) ) jpayne@69: { jpayne@69: //-- Merge the deltas jpayne@69: ValA = (CurrAp->eA - CurrAp->sA + 1) - CurrAp->deltaApos - 1; jpayne@69: CurrAp->delta[Di] += CurrAp->delta[Di] > 0 ? ValA : -(ValA); jpayne@69: if ( CurrAp->delta[Di] == 0 || ValA < 0 ) jpayne@69: { jpayne@69: fprintf(stderr, jpayne@69: "ERROR: failed to merge alignments at position %ld\n" jpayne@69: " Please file a bug report\n", jpayne@69: CurrAp->eA); jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: jpayne@69: //-- Update the deltaApos jpayne@69: for ( Dp = CurrAp->delta.begin( ) + Di; Dp < CurrAp->delta.end( ); Dp ++ ) jpayne@69: CurrAp->deltaApos += *Dp > 0 ? *Dp : labs(*Dp) - 1; jpayne@69: } jpayne@69: jpayne@69: //-- Set the alignment coordinates jpayne@69: CurrAp->eA = targetA; jpayne@69: CurrAp->eB = targetB; jpayne@69: jpayne@69: return target_reached; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void extendClusters jpayne@69: (vector & Clusters, jpayne@69: const FastaRecord * Af, const FastaRecord * Bf, FILE * DeltaFile) jpayne@69: jpayne@69: // Connect all the matches in every cluster between sequences A and B. jpayne@69: // Also, extend alignments off of the front and back of each cluster to jpayne@69: // expand total alignment coverage. When these extensions encounter an jpayne@69: // adjacent cluster (in a consistent frame), fuse the two regions to jpayne@69: // create one single encompassing region. This routine will create jpayne@69: // alignment objects from these extensions and output the resulting jpayne@69: // delta information to the delta output file. Also, all coordintes jpayne@69: // for the clusters and the alignment regions are translated to reference jpayne@69: // the original DNA sequecne. jpayne@69: jpayne@69: { jpayne@69: //-- Sort the clusters (ascending) by their start coordinate in sequence A jpayne@69: sort (Clusters.begin( ), Clusters.end( ), AscendingClusterSort( )); jpayne@69: jpayne@69: //-- If no delta file is requested jpayne@69: if ( ! DO_DELTA ) jpayne@69: return; jpayne@69: jpayne@69: jpayne@69: bool target_reached = false; // reached the adjacent match or cluster jpayne@69: // the amino acid sequences for A and B jpayne@69: char * A [7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: char * B [7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: jpayne@69: long int Alen [7], Blen [7]; // the length of the amino acid sequences jpayne@69: int i; jpayne@69: jpayne@69: unsigned int m_o; jpayne@69: long int targetA, targetB; // alignment extension targets in A and B jpayne@69: jpayne@69: vector::iterator Mp; // match pointer jpayne@69: jpayne@69: vector::iterator PrevCp; // where the extensions last left off jpayne@69: vector::iterator CurrCp; // the current cluster being extended jpayne@69: vector::iterator TargetCp = Clusters.end( ); // the target cluster jpayne@69: jpayne@69: vector Alignments; // the vector of alignment objects jpayne@69: vector::iterator CurrAp = Alignments.begin( ); // current align jpayne@69: vector::iterator TargetAp; // target align jpayne@69: jpayne@69: //-- Calculate the length of each amino acid frame translation jpayne@69: for ( i = 0; i < 6; i ++ ) jpayne@69: { jpayne@69: Alen [i + 1] = (Af->len - (i % 3)) / 3; jpayne@69: Blen [i + 1] = (Bf->len - (i % 3)) / 3; jpayne@69: } jpayne@69: jpayne@69: //-- Extend each cluster jpayne@69: PrevCp = Clusters.begin( ); jpayne@69: CurrCp = Clusters.begin( ); jpayne@69: while ( CurrCp < Clusters.end( ) ) jpayne@69: { jpayne@69: if ( DO_EXTEND ) jpayne@69: { jpayne@69: //-- Ignore if shadowed or already extended jpayne@69: if ( ! target_reached ) jpayne@69: if ( CurrCp->wasFused || jpayne@69: isShadowedCluster (CurrCp, Alignments, CurrAp) ) jpayne@69: { jpayne@69: CurrCp->wasFused = true; jpayne@69: CurrCp = ++ PrevCp; jpayne@69: continue; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- Initialize the right amino acid sequence for A and B if need be jpayne@69: if ( A [CurrCp->frameA] == NULL ) jpayne@69: { jpayne@69: A [CurrCp->frameA] = (char *) Safe_malloc jpayne@69: ( sizeof(char) * ( (Af->len / 3) + 2) ); jpayne@69: A [CurrCp->frameA] [0] = '\0'; jpayne@69: Alen [CurrCp->frameA] = Translate_DNA ( Af->seq, A [CurrCp->frameA], jpayne@69: CurrCp->frameA ); jpayne@69: } jpayne@69: if ( B [CurrCp->frameB] == NULL ) jpayne@69: { jpayne@69: B [CurrCp->frameB] = (char *) Safe_malloc jpayne@69: ( sizeof(char) * ( (Bf->len / 3) + 2) ); jpayne@69: B [CurrCp->frameB] [0] = '\0'; jpayne@69: Blen [CurrCp->frameB] = Translate_DNA ( Bf->seq, B [CurrCp->frameB], jpayne@69: CurrCp->frameB ); jpayne@69: } jpayne@69: jpayne@69: //-- Extend each match in the cluster jpayne@69: for ( Mp = CurrCp->matches.begin( ); Mp < CurrCp->matches.end( ); Mp ++ ) jpayne@69: { jpayne@69: //-- Try to extend the current match backwards jpayne@69: if ( target_reached ) jpayne@69: { jpayne@69: //-- Merge with the previous match jpayne@69: if ( CurrAp->eA != Mp->sA || CurrAp->eB != Mp->sB ) jpayne@69: { jpayne@69: //-- Strip matches until the targeted match is found jpayne@69: assert (Mp < CurrCp->matches.end( ) - 1); jpayne@69: continue; jpayne@69: } jpayne@69: jpayne@69: CurrAp->eA += Mp->len - 1; jpayne@69: CurrAp->eB += Mp->len - 1; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: //-- Create a new alignment object jpayne@69: addNewAlignment (Alignments, CurrCp, Mp); jpayne@69: CurrAp = Alignments.end( ) - 1; jpayne@69: jpayne@69: if ( DO_EXTEND || Mp != CurrCp->matches.begin ( ) ) jpayne@69: { jpayne@69: //-- Target the closest/best alignment object jpayne@69: TargetAp = getReverseTargetAlignment (Alignments, CurrAp); jpayne@69: jpayne@69: //-- Extend the new alignment object backwards jpayne@69: if ( extendBackward (Alignments, CurrAp, TargetAp, jpayne@69: A [CurrCp->frameA], B [CurrCp->frameB]) ) jpayne@69: CurrAp = TargetAp; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: m_o = FORWARD_ALIGN; jpayne@69: jpayne@69: //-- Try to extend the current match forwards jpayne@69: if ( Mp < CurrCp->matches.end( ) - 1 ) jpayne@69: { jpayne@69: //-- Target the next match in the cluster jpayne@69: targetA = (Mp + 1)->sA; jpayne@69: targetB = (Mp + 1)->sB; jpayne@69: jpayne@69: //-- Extend the current alignment object forward jpayne@69: target_reached = extendForward (CurrAp, jpayne@69: A [CurrCp->frameA], targetA, jpayne@69: B [CurrCp->frameB], targetB, m_o); jpayne@69: } jpayne@69: else if ( DO_EXTEND ) jpayne@69: { jpayne@69: targetA = Alen [CurrCp->frameA]; jpayne@69: targetB = Blen [CurrCp->frameB]; jpayne@69: jpayne@69: //-- Target the closest/best match in a future cluster jpayne@69: TargetCp = getForwardTargetCluster (Clusters, CurrCp, jpayne@69: targetA, targetB); jpayne@69: jpayne@69: if ( TargetCp == Clusters.end( ) ) jpayne@69: { jpayne@69: m_o |= OPTIMAL_BIT; jpayne@69: if ( TO_SEQEND ) jpayne@69: m_o |= SEQEND_BIT; jpayne@69: } jpayne@69: jpayne@69: //-- Extend the current alignment object forward jpayne@69: target_reached = extendForward (CurrAp, jpayne@69: A [CurrCp->frameA], targetA, jpayne@69: B [CurrCp->frameB], targetB, m_o); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if ( TargetCp == Clusters.end( ) ) jpayne@69: target_reached = false; jpayne@69: jpayne@69: CurrCp->wasFused = true; jpayne@69: jpayne@69: if ( target_reached == false ) jpayne@69: CurrCp = ++ PrevCp; jpayne@69: else jpayne@69: CurrCp = TargetCp; jpayne@69: } jpayne@69: jpayne@69: #ifdef _DEBUG_ASSERT jpayne@69: validateData (Alignments, Clusters, Af, Bf); jpayne@69: #endif jpayne@69: jpayne@69: //-- Output the alignment data to the delta file jpayne@69: flushAlignments (Alignments, Af, Bf, DeltaFile); jpayne@69: jpayne@69: for ( i = 0; i < 7; i ++ ) jpayne@69: { jpayne@69: if ( A [i] != NULL ) jpayne@69: free ( A [i] ); jpayne@69: if ( B [i] != NULL ) jpayne@69: free ( B [i] ); jpayne@69: } jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void flushAlignments jpayne@69: (vector & Alignments, jpayne@69: const FastaRecord * Af, const FastaRecord * Bf, jpayne@69: FILE * DeltaFile) jpayne@69: jpayne@69: // Simply output the delta information stored in Alignments to the jpayne@69: // given delta file. Free the memory used by Alignments once the jpayne@69: // data is successfully output to the file. jpayne@69: jpayne@69: { jpayne@69: vector::iterator Ap; // alignment pointer jpayne@69: vector::iterator Dp; // delta pointer jpayne@69: jpayne@69: fprintf (DeltaFile, ">%s %s %ld %ld\n", Af->Id, Bf->Id, Af->len, Bf->len); jpayne@69: jpayne@69: //-- Generate the error counts jpayne@69: parseDelta (Alignments, Af, Bf); jpayne@69: jpayne@69: for ( Ap = Alignments.begin( ); Ap < Alignments.end( ); Ap ++ ) jpayne@69: { jpayne@69: fprintf (DeltaFile, "%ld %ld %ld %ld %ld %ld %ld\n", jpayne@69: transC (Ap->sA, Ap->frameA, Af->len), jpayne@69: transC (Ap->eA, Ap->frameA, Af->len) + (Ap->frameA > 3 ? -2:2), jpayne@69: transC (Ap->sB, Ap->frameB, Bf->len), jpayne@69: transC (Ap->eB, Ap->frameB, Bf->len) + (Ap->frameB > 3 ? -2:2), jpayne@69: Ap->Errors, Ap->SimErrors, Ap->NonAlphas); jpayne@69: jpayne@69: for ( Dp = Ap->delta.begin( ); Dp < Ap->delta.end( ); Dp ++ ) jpayne@69: fprintf (DeltaFile, "%ld\n", *Dp); jpayne@69: fprintf (DeltaFile, "0\n"); jpayne@69: jpayne@69: Ap->delta.clear( ); jpayne@69: } jpayne@69: jpayne@69: Alignments.clear( ); jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void flushSyntenys jpayne@69: (vector & Syntenys, FILE * ClusterFile) jpayne@69: jpayne@69: // Simply output the synteny/cluster information generated by the mgaps jpayne@69: // program. However, now the coordinates reference their appropriate jpayne@69: // reference sequence, and the reference sequecne header is added to jpayne@69: // the appropriate lines. Free the memory used by Syntenys once the jpayne@69: // data is successfully output to the file. jpayne@69: jpayne@69: { jpayne@69: vector::iterator Sp; // synteny pointer jpayne@69: vector::iterator Cp; // cluster pointer jpayne@69: vector::iterator Mp; // match pointer jpayne@69: jpayne@69: if ( ClusterFile ) jpayne@69: { jpayne@69: for ( Sp = Syntenys.begin( ); Sp < Syntenys.end( ); Sp ++ ) jpayne@69: { jpayne@69: fprintf (ClusterFile, ">%s %s %ld %ld\n", Sp->AfP->Id, Sp->Bf.Id, jpayne@69: Sp->AfP->len, Sp->Bf.len); jpayne@69: for ( Cp = Sp->clusters.begin( ); Cp < Sp->clusters.end( ); Cp ++ ) jpayne@69: { jpayne@69: fprintf (ClusterFile, "%2d %2d\n", jpayne@69: Cp->frameA > 3 ? (Cp->frameA - 3) * -1 : Cp->frameA, jpayne@69: Cp->frameB > 3 ? (Cp->frameB - 3) * -1 : Cp->frameB); jpayne@69: for ( Mp = Cp->matches.begin( ); Mp < Cp->matches.end( ); Mp ++ ) jpayne@69: { jpayne@69: fprintf (ClusterFile, "%8ld %8ld %6ld", jpayne@69: transC (Mp->sA, Cp->frameA, Sp->AfP->len), jpayne@69: transC (Mp->sB, Cp->frameB, Sp->Bf.len), jpayne@69: Mp->len * 3); jpayne@69: jpayne@69: if ( Mp != Cp->matches.begin( ) ) jpayne@69: fprintf (ClusterFile, "%6ld %6ld\n", jpayne@69: (Mp->sA - (Mp - 1)->sA - (Mp - 1)->len) * 3, jpayne@69: (Mp->sB - (Mp - 1)->sB - (Mp - 1)->len) * 3); jpayne@69: else jpayne@69: fprintf (ClusterFile, "%6s %6s\n", "-", "-"); jpayne@69: } jpayne@69: Cp->matches.clear( ); jpayne@69: } jpayne@69: Sp->clusters.clear( ); jpayne@69: } jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: for ( Sp = Syntenys.begin( ); Sp < Syntenys.end( ); Sp ++ ) jpayne@69: { jpayne@69: for ( Cp = Sp->clusters.begin( ); Cp < Sp->clusters.end( ); Cp ++ ) jpayne@69: { jpayne@69: Cp->matches.clear( ); jpayne@69: } jpayne@69: Sp->clusters.clear( ); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: Syntenys.clear( ); jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: vector::iterator getForwardTargetCluster jpayne@69: (vector & Clusters, vector::iterator CurrCp, jpayne@69: long int & targetA, long int & targetB) jpayne@69: jpayne@69: // Return the cluster that is most likely to successfully join (in a jpayne@69: // forward direction) with the current cluster. The returned cluster jpayne@69: // must contain 1 or more matches that are strictly greater than the end jpayne@69: // of the current cluster. The targeted cluster must also be on a jpayne@69: // diagonal close enough to the current cluster, so that a connection jpayne@69: // could possibly be made by the alignment extender. Also, this targeted jpayne@69: // cluster must be consistent in frame with the current cluster. Assumes jpayne@69: // clusters have been sorted via AscendingClusterSort. Returns targeted jpayne@69: // cluster and stores the target coordinates in targetA and targetB. If no jpayne@69: // suitable cluster was found, the function will return NULL and target jpayne@69: // A and targetB will remain unchanged. jpayne@69: jpayne@69: { jpayne@69: vector::iterator Mip; // match iteratrive pointer jpayne@69: vector::iterator Cp; // cluster pointer jpayne@69: vector::iterator Cip; // cluster iterative pointer jpayne@69: long int eA, eB; // possible target jpayne@69: long int greater, lesser; // gap sizes between two clusters jpayne@69: long int sA = CurrCp->matches.rbegin( )->sA + jpayne@69: CurrCp->matches.rbegin( )->len - 1; // the endA of the current cluster jpayne@69: long int sB = CurrCp->matches.rbegin( )->sB + jpayne@69: CurrCp->matches.rbegin( )->len - 1; // the endB of the current cluster jpayne@69: jpayne@69: //-- End of sequences is the default target, set distance accordingly jpayne@69: long int dist = (targetA - sA < targetB - sB ? targetA - sA : targetB - sB); jpayne@69: jpayne@69: //-- For all clusters greater than the current cluster (on sequence A) jpayne@69: Cp = Clusters.end( ); jpayne@69: for ( Cip = CurrCp + 1; Cip < Clusters.end( ); Cip ++ ) jpayne@69: { jpayne@69: //-- If the cluster is in the same frame jpayne@69: if ( CurrCp->frameA == Cip->frameA && CurrCp->frameB == Cip->frameB ) jpayne@69: { jpayne@69: eA = Cip->matches.begin( )->sA; jpayne@69: eB = Cip->matches.begin( )->sB; jpayne@69: jpayne@69: //-- If the cluster overlaps the current cluster, strip some matches jpayne@69: if ( ( eA < sA || eB < sB ) && jpayne@69: Cip->matches.rbegin( )->sA >= sA && jpayne@69: Cip->matches.rbegin( )->sB >= sB ) jpayne@69: { jpayne@69: for ( Mip = Cip->matches.begin( ); jpayne@69: Mip < Cip->matches.end( ) && ( eA < sA || eB < sB ); jpayne@69: Mip ++ ) jpayne@69: { jpayne@69: eA = Mip->sA; jpayne@69: eB = Mip->sB; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- If the cluster is strictly greater than current cluster jpayne@69: if ( eA >= sA && eB >= sB ) jpayne@69: { jpayne@69: if ( eA - sA > eB - sB ) jpayne@69: { jpayne@69: greater = eA - sA; jpayne@69: lesser = eB - sB; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: lesser = eA - sA; jpayne@69: greater = eB - sB; jpayne@69: } jpayne@69: jpayne@69: //-- If the cluster is close enough jpayne@69: if ( greater <= getBreakLen( ) || jpayne@69: (lesser) * GOOD_SCORE [getMatrixType( )] + jpayne@69: (greater - lesser) * CONT_GAP_SCORE [getMatrixType( )] > 0 ) jpayne@69: { jpayne@69: Cp = Cip; jpayne@69: targetA = eA; jpayne@69: targetB = eB; jpayne@69: break; jpayne@69: } jpayne@69: else if ( (greater << 1) - lesser < dist ) jpayne@69: { jpayne@69: Cp = Cip; jpayne@69: targetA = eA; jpayne@69: targetB = eB; jpayne@69: dist = (greater << 1) - lesser; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: return Cp; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: vector::iterator getReverseTargetAlignment jpayne@69: (vector & Alignments, vector::iterator CurrAp) jpayne@69: jpayne@69: // Return the alignment that is most likely to successfully join (in a jpayne@69: // reverse direction) with the current alignment. The returned alignment jpayne@69: // must be strictly less than the current cluster and be on a diagonal jpayne@69: // close enough to the current alignment, so that a connection jpayne@69: // could possibly be made by the alignment extender. Also, this targeted jpayne@69: // alignment must be consistent in frame with the current alignment. jpayne@69: // Assumes clusters have been sorted via AscendingClusterSort and jpayne@69: // processed in order, so therefore all alignments are in order by their jpayne@69: // start A coordinate. jpayne@69: jpayne@69: { jpayne@69: vector::iterator Ap; // alignment pointer jpayne@69: vector::iterator Aip; // alignment iterative pointer jpayne@69: long int eA, eB; // possible targets jpayne@69: long int greater, lesser; // gap sizes between the two alignments jpayne@69: long int sA = CurrAp->sA; // the startA of the current alignment jpayne@69: long int sB = CurrAp->sB; // the startB of the current alignment jpayne@69: jpayne@69: //-- Beginning of sequences is the default target, set distance accordingly jpayne@69: long int dist = (sA < sB ? sA : sB); jpayne@69: jpayne@69: //-- For all alignments less than the current alignment (on sequence A) jpayne@69: Ap = Alignments.end( ); jpayne@69: for ( Aip = CurrAp - 1; Aip >= Alignments.begin( ); Aip -- ) jpayne@69: { jpayne@69: //-- If the alignment is on the same direction jpayne@69: if ( CurrAp->frameA == Aip->frameA && CurrAp->frameB == Aip->frameB ) jpayne@69: { jpayne@69: eA = Aip->eA; jpayne@69: eB = Aip->eB; jpayne@69: jpayne@69: //-- If the alignment is strictly greater than current cluster jpayne@69: if ( eA <= sA && eB <= sB ) jpayne@69: { jpayne@69: if ( sA - eA > sB - eB ) jpayne@69: { jpayne@69: greater = sA - eA; jpayne@69: lesser = sB - eB; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: lesser = sA - eA; jpayne@69: greater = sB - eB; jpayne@69: } jpayne@69: jpayne@69: //-- If the cluster is close enough jpayne@69: if ( greater <= getBreakLen( ) || jpayne@69: (lesser) * GOOD_SCORE [getMatrixType( )] + jpayne@69: (greater - lesser) * CONT_GAP_SCORE [getMatrixType( )] > 0 ) jpayne@69: { jpayne@69: Ap = Aip; jpayne@69: break; jpayne@69: } jpayne@69: else if ( (greater << 1) - lesser < dist ) jpayne@69: { jpayne@69: Ap = Aip; jpayne@69: dist = (greater << 1) - lesser; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: return Ap; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: bool isShadowedCluster jpayne@69: (vector::iterator CurrCp, jpayne@69: vector & Alignments, vector::iterator Ap) jpayne@69: jpayne@69: // Check if the current cluster is shadowed by a previously produced jpayne@69: // alignment region. Return true if it is, else false. jpayne@69: jpayne@69: { jpayne@69: vector::iterator Aip; // alignment pointer jpayne@69: jpayne@69: long int sA = CurrCp->matches.begin( )->sA; jpayne@69: long int eA = CurrCp->matches.rbegin( )->sA + jpayne@69: CurrCp->matches.rbegin( )->len - 1; jpayne@69: long int sB = CurrCp->matches.begin( )->sB; jpayne@69: long int eB = CurrCp->matches.rbegin( )->sB + jpayne@69: CurrCp->matches.rbegin( )->len - 1; jpayne@69: jpayne@69: if ( ! Alignments.empty( ) ) // if there are alignments to use jpayne@69: { jpayne@69: //-- Look backwards in hope of finding a shadowing alignment jpayne@69: for ( Aip = Ap; Aip >= Alignments.begin( ); Aip -- ) jpayne@69: { jpayne@69: //-- If in the same frames and shadowing the current cluster, break jpayne@69: if ( Aip->frameA == CurrCp->frameA && Aip->frameB == CurrCp->frameB ) jpayne@69: if ( Aip->eA >= eA && Aip->eB >= eB ) jpayne@69: if ( Aip->sA <= sA && Aip->sB <= sB ) jpayne@69: break; jpayne@69: } jpayne@69: jpayne@69: //-- Return true if the loop was broken, i.e. shadow found jpayne@69: if ( Aip >= Alignments.begin( ) ) jpayne@69: return true; jpayne@69: } jpayne@69: jpayne@69: //-- Return false if Alignments was empty or loop was not broken jpayne@69: return false; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void parseAbort jpayne@69: (const char * s) jpayne@69: jpayne@69: // Abort the program if there was an error in parsing file 's' jpayne@69: jpayne@69: { jpayne@69: fprintf (stderr, jpayne@69: "ERROR: Could not parse input from '%s'. \n" jpayne@69: "Please check the filename and format, or file a bug report\n", s); jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void parseDelta jpayne@69: (vector & Alignments, jpayne@69: const FastaRecord * Af, const FastaRecord *Bf) jpayne@69: jpayne@69: // Use the delta information to generate the error counts for each jpayne@69: // alignment, and fill this information into the data type jpayne@69: jpayne@69: { jpayne@69: // pointers to Af.seq and Bf.seq jpayne@69: char * A [7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: char * B [7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: jpayne@69: char ch1, ch2; jpayne@69: long int Delta; jpayne@69: int Sign; jpayne@69: long int i, Apos, Bpos; jpayne@69: long int Remain, Total; jpayne@69: long int Errors, SimErrors; jpayne@69: long int NonAlphas; jpayne@69: vector::iterator Ap; jpayne@69: vector::iterator Dp; jpayne@69: jpayne@69: for ( Ap = Alignments.begin( ); Ap < Alignments.end( ); Ap ++ ) jpayne@69: { jpayne@69: if ( A [Ap->frameA] == NULL ) jpayne@69: { jpayne@69: A [Ap->frameA] = (char *) Safe_malloc jpayne@69: ( sizeof(char) * ( (Af->len / 3) + 2 ) ); jpayne@69: A [Ap->frameA] [0] = '\0'; jpayne@69: Translate_DNA ( Af->seq, A [Ap->frameA], Ap->frameA ); jpayne@69: } jpayne@69: if ( B [Ap->frameB] == NULL ) jpayne@69: { jpayne@69: B [Ap->frameB] = (char *) Safe_malloc jpayne@69: ( sizeof(char) * ( (Bf->len / 3) + 2) ); jpayne@69: B [Ap->frameB] [0] = '\0'; jpayne@69: Translate_DNA ( Bf->seq, B [Ap->frameB], Ap->frameB ); jpayne@69: } jpayne@69: jpayne@69: Apos = Ap->sA; jpayne@69: Bpos = Ap->sB; jpayne@69: jpayne@69: Errors = 0; jpayne@69: SimErrors = 0; jpayne@69: NonAlphas = 0; jpayne@69: Remain = Ap->eA - Ap->sA + 1; jpayne@69: Total = Remain; jpayne@69: jpayne@69: //-- For all delta's in this alignment jpayne@69: for ( Dp = Ap->delta.begin( ); Dp < Ap->delta.end( ); Dp ++ ) jpayne@69: { jpayne@69: Delta = *Dp; jpayne@69: Sign = Delta > 0 ? 1 : -1; jpayne@69: Delta = labs ( Delta ); jpayne@69: jpayne@69: //-- For all the bases before the next indel jpayne@69: for ( i = 1; i < Delta; i ++ ) jpayne@69: { jpayne@69: ch1 = A [Ap->frameA] [Apos ++]; jpayne@69: ch2 = B [Ap->frameB] [Bpos ++]; jpayne@69: jpayne@69: if ( !isalpha (ch1) ) jpayne@69: { jpayne@69: ch1 = STOP_CHAR; jpayne@69: NonAlphas ++; jpayne@69: } jpayne@69: if ( !isalpha (ch2) ) jpayne@69: { jpayne@69: ch2 = STOP_CHAR; jpayne@69: NonAlphas ++; jpayne@69: } jpayne@69: jpayne@69: ch1 = toupper(ch1); jpayne@69: ch2 = toupper(ch2); jpayne@69: if ( 1 > MATCH_SCORE jpayne@69: [getMatrixType( )] jpayne@69: [ch1 - 'A'] jpayne@69: [ch2 - 'A'] ) jpayne@69: SimErrors ++; jpayne@69: if ( ch1 != ch2 ) jpayne@69: Errors ++; jpayne@69: } jpayne@69: jpayne@69: //-- Process the current indel jpayne@69: Remain -= i - 1; jpayne@69: Errors ++; jpayne@69: SimErrors ++; jpayne@69: jpayne@69: if ( Sign == 1 ) jpayne@69: { jpayne@69: if ( !isalpha (A [Ap->frameA] [Apos ++]) ) jpayne@69: NonAlphas ++; jpayne@69: Remain --; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: if ( !isalpha (B [Ap->frameB] [Bpos ++]) ) jpayne@69: NonAlphas ++; jpayne@69: Total ++; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- For all the bases after the final indel jpayne@69: for ( i = 0; i < Remain; i ++ ) jpayne@69: { jpayne@69: //-- Score character match and update error counters jpayne@69: ch1 = A [Ap->frameA] [Apos ++]; jpayne@69: ch2 = B [Ap->frameB] [Bpos ++]; jpayne@69: jpayne@69: if ( !isalpha (ch1) ) jpayne@69: { jpayne@69: ch1 = STOP_CHAR; jpayne@69: NonAlphas ++; jpayne@69: } jpayne@69: if ( !isalpha (ch2) ) jpayne@69: { jpayne@69: ch2 = STOP_CHAR; jpayne@69: NonAlphas ++; jpayne@69: } jpayne@69: jpayne@69: ch1 = toupper(ch1); jpayne@69: ch2 = toupper(ch2); jpayne@69: if ( 1 > MATCH_SCORE jpayne@69: [getMatrixType( )] jpayne@69: [ch1 - 'A'] jpayne@69: [ch2 - 'A'] ) jpayne@69: SimErrors ++; jpayne@69: if ( ch1 != ch2 ) jpayne@69: Errors ++; jpayne@69: } jpayne@69: jpayne@69: Ap->Errors = Errors; jpayne@69: Ap->SimErrors = SimErrors; jpayne@69: Ap->NonAlphas = NonAlphas; jpayne@69: } jpayne@69: jpayne@69: for ( i = 1; i <= 6; i ++ ) jpayne@69: { jpayne@69: if ( A [i] != NULL ) jpayne@69: free ( A [i] ); jpayne@69: A [i] = NULL; jpayne@69: jpayne@69: if ( B [i] != NULL ) jpayne@69: free ( B [i] ); jpayne@69: B [i] = NULL; jpayne@69: } jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void processSyntenys jpayne@69: (vector & Syntenys, FastaRecord * Af, long int As, jpayne@69: FILE * QryFile, FILE * ClusterFile, FILE * DeltaFile) jpayne@69: jpayne@69: // For each syntenic region with clusters, read in the B sequence and jpayne@69: // extend the clusters to expand total alignment coverage. Clusters should jpayne@69: // still reference the amino acid concatenations, the translation back to jpayne@69: // DNA will be taken care of by the extendClusters function. jpayne@69: // processSyntenys should ONLY be called once all the clusters for jpayne@69: // the contained syntenic regions have been stored in the data structure. jpayne@69: // Frees the memory used by the the syntenic regions once the output of jpayne@69: // extendClusters and flushSyntenys has been produced. jpayne@69: jpayne@69: { jpayne@69: FastaRecord Bf; // the current B sequence jpayne@69: Bf.Id = (char *) Safe_malloc (sizeof(char) * (MAX_LINE + 1)); jpayne@69: jpayne@69: long int InitSize = INIT_SIZE; // the initial memory capacity of B jpayne@69: vector::iterator CurrSp; // current synteny pointer jpayne@69: jpayne@69: //-- Initialize the B sequence storage jpayne@69: Bf.seq = (char *) Safe_malloc ( sizeof(char) * InitSize ); jpayne@69: jpayne@69: //-- For all the contained syntenys jpayne@69: for ( CurrSp = Syntenys.begin( ); CurrSp < Syntenys.end( ); CurrSp ++ ) jpayne@69: { jpayne@69: //-- If no clusters, ignore jpayne@69: if ( CurrSp->clusters.empty( ) ) jpayne@69: continue; jpayne@69: jpayne@69: //-- If a B sequence not seen yet, read it in jpayne@69: //-- IMPORTANT: The B sequences in the synteny object are assumed to be jpayne@69: // ordered as output by mgaps, if they are not in order the program jpayne@69: // will fail. (All like tags must be adjacent and in the same order jpayne@69: // as the query file) jpayne@69: if ( CurrSp == Syntenys.begin( ) || jpayne@69: strcmp (CurrSp->Bf.Id, (CurrSp-1)->Bf.Id) != 0 ) jpayne@69: { jpayne@69: //-- Read in the B sequence jpayne@69: while ( Read_String (QryFile, Bf.seq, InitSize, Bf.Id, FALSE) ) jpayne@69: if ( strcmp (CurrSp->Bf.Id, Bf.Id) == 0 ) jpayne@69: break; jpayne@69: if ( strcmp (CurrSp->Bf.Id, Bf.Id) != 0 ) jpayne@69: { jpayne@69: fprintf(stderr,"%s\n", CurrSp->Bf.Id); jpayne@69: parseAbort ( "Query File" ); jpayne@69: } jpayne@69: Bf.len = strlen (Bf.seq + 1); jpayne@69: } jpayne@69: jpayne@69: //-- Extend clusters and create the alignment information jpayne@69: CurrSp->Bf.len = Bf.len; jpayne@69: extendClusters (CurrSp->clusters, CurrSp->AfP, &Bf, DeltaFile); jpayne@69: } jpayne@69: jpayne@69: //-- Create the cluster information and clear the Synteny structure jpayne@69: flushSyntenys (Syntenys, ClusterFile); jpayne@69: jpayne@69: free (Bf.Id); jpayne@69: free (Bf.seq); jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: inline long int transC jpayne@69: (long int Coord, int Frame, long int Len) jpayne@69: jpayne@69: // Translate an amino acid coordinate to a nucleotide coordinate jpayne@69: // relative to the forward strand. The Len parameter should be the jpayne@69: // length of the original DNA strand. jpayne@69: jpayne@69: { jpayne@69: assert ( Frame > 0 && Frame < 7 ); jpayne@69: if ( Frame > 3 ) jpayne@69: return revC ( (Coord * 3) - (3 - (Frame - 3)), Len ); jpayne@69: else jpayne@69: return (Coord * 3) - (3 - (Frame)); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: inline long int refLen jpayne@69: (long int ntLen) jpayne@69: jpayne@69: // Return the length of the concatenated amino acid sequence frames, jpayne@69: // including the appended 'X' at the end of each sequence frame for the jpayne@69: // given DNA input length. (The returned length will be the length of jpayne@69: // the concatenated frames for this sequence as output by prepro.cc) jpayne@69: jpayne@69: { jpayne@69: return (ntLen + 1) << 1; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: inline long int revC jpayne@69: (long int Coord, long int Len) jpayne@69: jpayne@69: // Reverse complement the given coordinate for the given length. jpayne@69: jpayne@69: { jpayne@69: assert (Len - Coord + 1 > 0); jpayne@69: return (Len - Coord + 1); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void printHelp jpayne@69: (const char * s) jpayne@69: jpayne@69: // Display the program's help information to stderr. jpayne@69: jpayne@69: { jpayne@69: fprintf (stderr, jpayne@69: "\nUSAGE: %s [options] < \n\n", s); jpayne@69: fprintf (stderr, jpayne@69: "-b int set the alignment break (give-up) length to int (amino acids)\n" jpayne@69: "-d output only match clusters rather than extended alignments\n" jpayne@69: "-e do not extend alignments outward from clusters\n" jpayne@69: "-h display help information\n" jpayne@69: "-t force alignment to ends of sequence if within -b distance\n" jpayne@69: "-x type set the matrix type to \"type\" - Default is 2 (BLOSUM 62),\n" jpayne@69: " other options include 1 (BLOSUM 45) and 3 (BLOSUM 80)\n\n"); jpayne@69: fprintf (stderr, jpayne@69: " Input is the output of the \"mgaps\" program from stdin, and\n" jpayne@69: "the two original PROmer sequence files passed on the command\n" jpayne@69: "line. is the prefix to be added to the front of the\n" jpayne@69: "output file .delta\n" jpayne@69: " .delta is the alignment object that catalogs the distance\n" jpayne@69: "between insertions and deletions. For further information\n" jpayne@69: "regarding this file, please refer to the documentation under\n" jpayne@69: "the .delta output description.\n\n"); jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void printUsage jpayne@69: (const char * s) jpayne@69: jpayne@69: // Display the program's usage information to stderr. jpayne@69: jpayne@69: { jpayne@69: fprintf (stderr, jpayne@69: "\nUSAGE: %s [options] < \n\n", s); jpayne@69: fprintf (stderr, "Try '%s -h' for more information.\n", s); jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: void validateData jpayne@69: (vector Alignments, vector Clusters, jpayne@69: const FastaRecord * Af, const FastaRecord * Bf) jpayne@69: jpayne@69: // Self test function to check that the cluster and alignment information jpayne@69: // is valid jpayne@69: jpayne@69: { jpayne@69: char * A [7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: char * B [7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: jpayne@69: long int Alen [7], Blen [7]; // the length of the amino acid sequences jpayne@69: vector::iterator Cp; jpayne@69: vector::iterator Mp; jpayne@69: vector::iterator Ap; jpayne@69: long int x, y, i; jpayne@69: char Xc, Yc; jpayne@69: jpayne@69: for ( Cp = Clusters.begin( ); Cp < Clusters.end( ); Cp ++ ) jpayne@69: { jpayne@69: assert ( Cp->wasFused ); jpayne@69: jpayne@69: //-- Initialize the right amino acid sequence for A and B if need be jpayne@69: if ( A [Cp->frameA] == NULL ) jpayne@69: { jpayne@69: A [Cp->frameA] = (char *) Safe_malloc jpayne@69: ( sizeof(char) * ( (Af->len / 3) + 2) ); jpayne@69: A [Cp->frameA] [0] = '\0'; jpayne@69: Alen [Cp->frameA] = Translate_DNA ( Af->seq, A [Cp->frameA], jpayne@69: Cp->frameA ); jpayne@69: } jpayne@69: if ( B [Cp->frameB] == NULL ) jpayne@69: { jpayne@69: B [Cp->frameB] = (char *) Safe_malloc jpayne@69: ( sizeof(char) * ( (Bf->len / 3) + 2) ); jpayne@69: B [Cp->frameB] [0] = '\0'; jpayne@69: Blen [Cp->frameB] = Translate_DNA ( Bf->seq, B [Cp->frameB], jpayne@69: Cp->frameB ); jpayne@69: } jpayne@69: jpayne@69: for ( Mp = Cp->matches.begin( ); Mp < Cp->matches.end( ); Mp ++ ) jpayne@69: { jpayne@69: //-- assert for each match in cluster, it is indeed a match jpayne@69: x = Mp->sA; jpayne@69: y = Mp->sB; jpayne@69: for ( i = 0; i < Mp->len; i ++ ) jpayne@69: assert ( A[Cp->frameA][x ++] == B[Cp->frameB][y ++] ); jpayne@69: jpayne@69: //-- assert for each match in cluster, it is contained in an alignment jpayne@69: for ( Ap = Alignments.begin( ); Ap < Alignments.end( ); Ap ++ ) jpayne@69: { jpayne@69: if ( Ap->sA <= Mp->sA && Ap->sB <= Mp->sB && jpayne@69: Ap->eA >= Mp->sA + Mp->len - 1 && jpayne@69: Ap->eB >= Mp->sB + Mp->len - 1 ) jpayne@69: break; jpayne@69: } jpayne@69: assert ( Ap < Alignments.end( ) ); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- assert alignments are optimal (quick check if first and last chars equal) jpayne@69: for ( Ap = Alignments.begin( ); Ap < Alignments.end( ); Ap ++ ) jpayne@69: { jpayne@69: assert ( Ap->sA <= Ap->eA ); jpayne@69: assert ( Ap->sB <= Ap->eB ); jpayne@69: jpayne@69: assert ( Ap->sA >= 1 && Ap->sA <= Af->len ); jpayne@69: assert ( Ap->eA >= 1 && Ap->eA <= Af->len ); jpayne@69: assert ( Ap->sB >= 1 && Ap->sB <= Bf->len ); jpayne@69: assert ( Ap->eB >= 1 && Ap->eB <= Bf->len ); jpayne@69: jpayne@69: Xc = toupper(isalpha(A[Ap->frameA][Ap->sA]) ? jpayne@69: A[Ap->frameA][Ap->sA] : STOP_CHAR); jpayne@69: Yc = toupper(isalpha(B[Ap->frameB][Ap->sB]) ? jpayne@69: B[Ap->frameB][Ap->sB] : STOP_CHAR); jpayne@69: assert ( 0 <= MATCH_SCORE [getMatrixType( )] [Xc - 'A'] [Yc - 'A'] ); jpayne@69: jpayne@69: Xc = toupper(isalpha(A[Ap->frameA][Ap->eA]) ? jpayne@69: A[Ap->frameA][Ap->eA] : STOP_CHAR); jpayne@69: Yc = toupper(isalpha(B[Ap->frameB][Ap->eB]) ? jpayne@69: B[Ap->frameB][Ap->eB] : STOP_CHAR); jpayne@69: assert ( 0 <= MATCH_SCORE [getMatrixType( )] [Xc - 'A'] [Yc - 'A'] ); jpayne@69: } jpayne@69: jpayne@69: for ( i = 0; i < 7; i ++ ) jpayne@69: { jpayne@69: if ( A [i] != NULL ) jpayne@69: free ( A [i] ); jpayne@69: if ( B [i] != NULL ) jpayne@69: free ( B [i] ); jpayne@69: } jpayne@69: jpayne@69: return; jpayne@69: }