jpayne@69: /* Programmer: A. Delcher jpayne@69: * Written: 7 September 1998 jpayne@69: * File: ~delcher/Align/annotate.cc jpayne@69: * jpayne@69: * This program reads the output from the gaps program and jpayne@69: * adds alignment information to it. Reads frag info from jpayne@69: * the file named on the '>' lines of the gap file. The word jpayne@69: * "reverse" after that name will make the query sequence be reverse jpayne@69: * complemented. jpayne@69: */ jpayne@69: jpayne@69: #include "tigrinc.hh" jpayne@69: #include jpayne@69: using namespace std; jpayne@69: jpayne@69: #define FIELD_LEN 20 jpayne@69: #define MAX_ALIGN 10000 jpayne@69: #define MAX_LINE_LEN 200 jpayne@69: #define MAX_NAME_LEN 500 jpayne@69: #define PREFIX_LEN 10 jpayne@69: #define WIDTH 60 jpayne@69: jpayne@69: #ifndef EXIT_FAILURE jpayne@69: #define EXIT_FAILURE -1 jpayne@69: #endif jpayne@69: #ifndef EXIT_SUCCESS jpayne@69: #define EXIT_SUCCESS 0 jpayne@69: #endif jpayne@69: jpayne@69: jpayne@69: void Show_Alignment (char [], long int, char [], long int); jpayne@69: jpayne@69: jpayne@69: char Line [MAX_LINE_LEN]; jpayne@69: FILE * Gaps_With_Errors_File; jpayne@69: jpayne@69: jpayne@69: jpayne@69: int main (int argc, char * argv []) jpayne@69: jpayne@69: { jpayne@69: FILE * Gap_File, * Data_File; jpayne@69: char * Data1, * Data2; jpayne@69: char Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN]; jpayne@69: char Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN]; jpayne@69: int Tag_Len; jpayne@69: long int Input_Size, Len1, Len2, Line_Len; jpayne@69: long int Start1, Start2, Len, Olap, Gap1, Gap2; jpayne@69: long int i; jpayne@69: jpayne@69: if (argc < 3) jpayne@69: { jpayne@69: printf ("Usage: %s \n", argv [0]); jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: Data_File = File_Open (argv [2], "r"); jpayne@69: Data2 = (char *) Safe_malloc (INIT_SIZE); jpayne@69: Input_Size = INIT_SIZE; jpayne@69: jpayne@69: Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w"); jpayne@69: jpayne@69: Read_String (Data_File, Data2, Input_Size, Name, FALSE); jpayne@69: fclose (Data_File); jpayne@69: Data_File = NULL; jpayne@69: Len2 = strlen (Data2 + 1); jpayne@69: jpayne@69: Gap_File = File_Open (argv [1], "r"); jpayne@69: jpayne@69: Data1 = (char *) Safe_malloc (INIT_SIZE); jpayne@69: Input_Size = INIT_SIZE; jpayne@69: jpayne@69: while (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL) jpayne@69: { jpayne@69: printf ("%s", Line); jpayne@69: for (i = 0; isspace (Line [i]) && Line [i] != '\n'; i ++) jpayne@69: ; jpayne@69: if (Line [i] == '>') jpayne@69: { jpayne@69: Tag [0] = '\0'; jpayne@69: sscanf (Line + i + 1, "%s %s", Name, Tag); jpayne@69: if ( ! strcmp (Name, "Wrap") ) jpayne@69: continue; jpayne@69: jpayne@69: Data_File = File_Open (Name, "r"); jpayne@69: Read_String (Data_File, Data1, Input_Size, Name, FALSE); jpayne@69: fclose (Data_File); jpayne@69: jpayne@69: Len1 = strlen (Data1 + 1); jpayne@69: jpayne@69: Tag_Len = strlen (Tag); jpayne@69: for (i = 0; i < Tag_Len; i ++) jpayne@69: Tag [i] = tolower (Tag [i]); jpayne@69: if (strcmp (Tag, "reverse") == 0) jpayne@69: Reverse_Complement (Data2, 1, Len2); jpayne@69: fprintf (Gaps_With_Errors_File, "%s", Line); jpayne@69: continue; jpayne@69: } jpayne@69: else if (! isdigit (Line [i])) jpayne@69: { jpayne@69: fprintf (Gaps_With_Errors_File, "%s", Line); jpayne@69: continue; jpayne@69: } jpayne@69: sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2, jpayne@69: & Len, Olap_String, Gap_String1, Gap_String2); jpayne@69: jpayne@69: if (Gap_String1 [0] == '-' || Gap_String2 [0] == '-') jpayne@69: { jpayne@69: fprintf (Gaps_With_Errors_File, "%s", Line); jpayne@69: continue; jpayne@69: } jpayne@69: Gap1 = strtol (Gap_String1, NULL, 10); jpayne@69: Gap2 = strtol (Gap_String2, NULL, 10); jpayne@69: if (isalpha (Olap_String [0])) jpayne@69: Olap = 0; jpayne@69: else jpayne@69: Olap = strtol (Olap_String, NULL, 10); jpayne@69: jpayne@69: Start1 -= Gap1 + PREFIX_LEN - Olap; jpayne@69: Start2 -= Gap2 + PREFIX_LEN - Olap; jpayne@69: jpayne@69: Line_Len = strlen (Line); jpayne@69: Line [Line_Len - 1] = '\0'; jpayne@69: jpayne@69: Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN, jpayne@69: Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN); jpayne@69: jpayne@69: } jpayne@69: jpayne@69: fclose (Gaps_With_Errors_File); jpayne@69: jpayne@69: return 0; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: template jpayne@69: class Matrix_t jpayne@69: { jpayne@69: public: jpayne@69: Matrix_t() jpayne@69: { clear(); } jpayne@69: jpayne@69: void clear() jpayne@69: { jpayne@69: d_m.clear(); jpayne@69: nRows_m = nCols_m = 0; jpayne@69: } jpayne@69: jpayne@69: void resize(long nRows, long nCols) jpayne@69: { jpayne@69: nRows_m = nRows; jpayne@69: nCols_m = nCols; jpayne@69: if ( nRows*nCols > d_m.size() ) jpayne@69: { jpayne@69: try { jpayne@69: d_m.resize(nRows*nCols); jpayne@69: } catch (...) { jpayne@69: d_m.clear(); jpayne@69: throw; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: inline T & operator()(long row, long col) jpayne@69: { return d_m[nCols_m*row+col]; } jpayne@69: jpayne@69: private: jpayne@69: jpayne@69: vector d_m; jpayne@69: long nRows_m, nCols_m; jpayne@69: }; jpayne@69: jpayne@69: void Show_Alignment (char A [], long int M, char B [], long int N) jpayne@69: jpayne@69: // Print the alignment between strings A [1 .. M] and B [1 .. N] . jpayne@69: jpayne@69: { jpayne@69: static Matrix_t D; jpayne@69: static Matrix_t Op; jpayne@69: static vector Show_A; jpayne@69: static vector Show_B; jpayne@69: int Errors, Tmp; jpayne@69: long int i, j, Ct; jpayne@69: jpayne@69: if (M >= MAX_ALIGN || N >= MAX_ALIGN) jpayne@69: { jpayne@69: printf ("\n *** Too long ***\n\n"); jpayne@69: fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-"); jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: try { jpayne@69: D.resize(M+1,N+1); jpayne@69: Op.resize(M+1,N+1); jpayne@69: Show_A.resize(M+N+2); jpayne@69: Show_B.resize(M+N+2); jpayne@69: } catch (...) { jpayne@69: printf ("\n *** Too long ***\n\n"); jpayne@69: fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-"); jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: D (0,0) = 0; jpayne@69: Op (0,0) = 'a'; jpayne@69: for (i = 1; i <= M; i ++) jpayne@69: { jpayne@69: D (i,0) = i + 1; jpayne@69: Op (i,0) = 'i'; jpayne@69: } jpayne@69: for (j = 1; j <= N; j ++) jpayne@69: { jpayne@69: D (0,j) = j + 1; jpayne@69: Op (0,j) = 'd'; jpayne@69: } jpayne@69: jpayne@69: for (i = 1; i <= M; i ++) // for each row (A) jpayne@69: for (j = 1; j <= N; j ++) // for each col (B) jpayne@69: { jpayne@69: D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1); jpayne@69: Op (i,j) = 'd'; jpayne@69: Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1); jpayne@69: if (Tmp < D (i,j)) jpayne@69: { jpayne@69: D (i,j) = Tmp; jpayne@69: Op (i,j) = 'i'; jpayne@69: } jpayne@69: Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1); jpayne@69: if (Tmp < D (i,j)) jpayne@69: { jpayne@69: D (i,j) = Tmp; jpayne@69: Op (i,j) = 'a'; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: Ct = 0; jpayne@69: i = M; jpayne@69: j = N; jpayne@69: while (i > 0 || j > 0) jpayne@69: switch (Op (i,j)) jpayne@69: { jpayne@69: case 'a' : // align jpayne@69: Show_A [Ct] = A [i --]; jpayne@69: Show_B [Ct ++] = B [j --]; jpayne@69: break; jpayne@69: case 'i' : // insert into A jpayne@69: Show_A [Ct] = A [i --]; jpayne@69: Show_B [Ct ++] = '.'; jpayne@69: break; jpayne@69: case 'd' : // delete from A jpayne@69: Show_A [Ct] = '.'; jpayne@69: Show_B [Ct ++] = B [j --]; jpayne@69: break; jpayne@69: default : jpayne@69: printf (" *** i = %ld j = %ld Op (i,j) = %c\n", jpayne@69: i, j, Op (i,j)); jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: Errors = 0; jpayne@69: for (i = 0; i < Ct; i ++) jpayne@69: if (Show_A [i] != Show_B [i]) jpayne@69: Errors ++; jpayne@69: printf (" Errors = %d\n", Errors); jpayne@69: fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors); jpayne@69: do jpayne@69: { jpayne@69: printf ("T: "); jpayne@69: for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) jpayne@69: putchar (Show_A [i]); jpayne@69: putchar ('\n'); jpayne@69: printf ("S: "); jpayne@69: for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) jpayne@69: putchar (Show_B [i]); jpayne@69: putchar ('\n'); jpayne@69: printf (" "); jpayne@69: for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) jpayne@69: putchar (Show_A [i] == Show_B [i] ? ' ' : '^'); jpayne@69: putchar ('\n'); jpayne@69: Ct -= WIDTH; jpayne@69: } while (Ct > 0); jpayne@69: return; jpayne@69: }