Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc Tue Mar 18 17:55:14 2025 -0400 @@ -0,0 +1,282 @@ +/* Programmer: A. Delcher +* Written: 7 September 1998 +* File: ~delcher/Align/annotate.cc +* +* This program reads the output from the gaps program and +* adds alignment information to it. Reads frag info from +* the file named on the '>' lines of the gap file. The word +* "reverse" after that name will make the query sequence be reverse +* complemented. +*/ + +#include "tigrinc.hh" +#include <vector> +using namespace std; + +#define FIELD_LEN 20 +#define MAX_ALIGN 10000 +#define MAX_LINE_LEN 200 +#define MAX_NAME_LEN 500 +#define PREFIX_LEN 10 +#define WIDTH 60 + +#ifndef EXIT_FAILURE + #define EXIT_FAILURE -1 +#endif +#ifndef EXIT_SUCCESS + #define EXIT_SUCCESS 0 +#endif + + +void Show_Alignment (char [], long int, char [], long int); + + +char Line [MAX_LINE_LEN]; +FILE * Gaps_With_Errors_File; + + + +int main (int argc, char * argv []) + + { + FILE * Gap_File, * Data_File; + char * Data1, * Data2; + char Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN]; + char Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN]; + int Tag_Len; + long int Input_Size, Len1, Len2, Line_Len; + long int Start1, Start2, Len, Olap, Gap1, Gap2; + long int i; + + if (argc < 3) + { + printf ("Usage: %s <gapfile> <datafile> \n", argv [0]); + exit (EXIT_FAILURE); + } + + + Data_File = File_Open (argv [2], "r"); + Data2 = (char *) Safe_malloc (INIT_SIZE); + Input_Size = INIT_SIZE; + + Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w"); + + Read_String (Data_File, Data2, Input_Size, Name, FALSE); + fclose (Data_File); + Data_File = NULL; + Len2 = strlen (Data2 + 1); + + Gap_File = File_Open (argv [1], "r"); + + Data1 = (char *) Safe_malloc (INIT_SIZE); + Input_Size = INIT_SIZE; + + while (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL) + { + printf ("%s", Line); + for (i = 0; isspace (Line [i]) && Line [i] != '\n'; i ++) + ; + if (Line [i] == '>') + { + Tag [0] = '\0'; + sscanf (Line + i + 1, "%s %s", Name, Tag); + if ( ! strcmp (Name, "Wrap") ) + continue; + + Data_File = File_Open (Name, "r"); + Read_String (Data_File, Data1, Input_Size, Name, FALSE); + fclose (Data_File); + + Len1 = strlen (Data1 + 1); + + Tag_Len = strlen (Tag); + for (i = 0; i < Tag_Len; i ++) + Tag [i] = tolower (Tag [i]); + if (strcmp (Tag, "reverse") == 0) + Reverse_Complement (Data2, 1, Len2); + fprintf (Gaps_With_Errors_File, "%s", Line); + continue; + } + else if (! isdigit (Line [i])) + { + fprintf (Gaps_With_Errors_File, "%s", Line); + continue; + } + sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2, + & Len, Olap_String, Gap_String1, Gap_String2); + + if (Gap_String1 [0] == '-' || Gap_String2 [0] == '-') + { + fprintf (Gaps_With_Errors_File, "%s", Line); + continue; + } + Gap1 = strtol (Gap_String1, NULL, 10); + Gap2 = strtol (Gap_String2, NULL, 10); + if (isalpha (Olap_String [0])) + Olap = 0; + else + Olap = strtol (Olap_String, NULL, 10); + + Start1 -= Gap1 + PREFIX_LEN - Olap; + Start2 -= Gap2 + PREFIX_LEN - Olap; + + Line_Len = strlen (Line); + Line [Line_Len - 1] = '\0'; + + Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN, + Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN); + + } + + fclose (Gaps_With_Errors_File); + + return 0; + } + + +template<typename T> +class Matrix_t +{ +public: + Matrix_t() + { clear(); } + + void clear() + { + d_m.clear(); + nRows_m = nCols_m = 0; + } + + void resize(long nRows, long nCols) + { + nRows_m = nRows; + nCols_m = nCols; + if ( nRows*nCols > d_m.size() ) + { + try { + d_m.resize(nRows*nCols); + } catch (...) { + d_m.clear(); + throw; + } + } + } + + inline T & operator()(long row, long col) + { return d_m[nCols_m*row+col]; } + +private: + + vector<T> d_m; + long nRows_m, nCols_m; +}; + +void Show_Alignment (char A [], long int M, char B [], long int N) + +// Print the alignment between strings A [1 .. M] and B [1 .. N] . + + { + static Matrix_t<int> D; + static Matrix_t<char> Op; + static vector<char> Show_A; + static vector<char> Show_B; + int Errors, Tmp; + long int i, j, Ct; + + if (M >= MAX_ALIGN || N >= MAX_ALIGN) + { + printf ("\n *** Too long ***\n\n"); + fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-"); + return; + } + + try { + D.resize(M+1,N+1); + Op.resize(M+1,N+1); + Show_A.resize(M+N+2); + Show_B.resize(M+N+2); + } catch (...) { + printf ("\n *** Too long ***\n\n"); + fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-"); + return; + } + + D (0,0) = 0; + Op (0,0) = 'a'; + for (i = 1; i <= M; i ++) + { + D (i,0) = i + 1; + Op (i,0) = 'i'; + } + for (j = 1; j <= N; j ++) + { + D (0,j) = j + 1; + Op (0,j) = 'd'; + } + + for (i = 1; i <= M; i ++) // for each row (A) + for (j = 1; j <= N; j ++) // for each col (B) + { + D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1); + Op (i,j) = 'd'; + Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1); + if (Tmp < D (i,j)) + { + D (i,j) = Tmp; + Op (i,j) = 'i'; + } + Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1); + if (Tmp < D (i,j)) + { + D (i,j) = Tmp; + Op (i,j) = 'a'; + } + } + + Ct = 0; + i = M; + j = N; + while (i > 0 || j > 0) + switch (Op (i,j)) + { + case 'a' : // align + Show_A [Ct] = A [i --]; + Show_B [Ct ++] = B [j --]; + break; + case 'i' : // insert into A + Show_A [Ct] = A [i --]; + Show_B [Ct ++] = '.'; + break; + case 'd' : // delete from A + Show_A [Ct] = '.'; + Show_B [Ct ++] = B [j --]; + break; + default : + printf (" *** i = %ld j = %ld Op (i,j) = %c\n", + i, j, Op (i,j)); + exit (EXIT_FAILURE); + } + Errors = 0; + for (i = 0; i < Ct; i ++) + if (Show_A [i] != Show_B [i]) + Errors ++; + printf (" Errors = %d\n", Errors); + fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors); + do + { + printf ("T: "); + for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) + putchar (Show_A [i]); + putchar ('\n'); + printf ("S: "); + for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) + putchar (Show_B [i]); + putchar ('\n'); + printf (" "); + for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) + putchar (Show_A [i] == Show_B [i] ? ' ' : '^'); + putchar ('\n'); + Ct -= WIDTH; + } while (Ct > 0); + return; + }