jpayne@69
|
1 /* Programmer: A. Delcher
|
jpayne@69
|
2 * Written: 7 September 1998
|
jpayne@69
|
3 * File: ~delcher/Align/annotate.cc
|
jpayne@69
|
4 *
|
jpayne@69
|
5 * This program reads the output from the gaps program and
|
jpayne@69
|
6 * adds alignment information to it. Reads frag info from
|
jpayne@69
|
7 * the file named on the '>' lines of the gap file. The word
|
jpayne@69
|
8 * "reverse" after that name will make the query sequence be reverse
|
jpayne@69
|
9 * complemented.
|
jpayne@69
|
10 */
|
jpayne@69
|
11
|
jpayne@69
|
12 #include "tigrinc.hh"
|
jpayne@69
|
13 #include <vector>
|
jpayne@69
|
14 using namespace std;
|
jpayne@69
|
15
|
jpayne@69
|
16 #define FIELD_LEN 20
|
jpayne@69
|
17 #define MAX_ALIGN 10000
|
jpayne@69
|
18 #define MAX_LINE_LEN 200
|
jpayne@69
|
19 #define MAX_NAME_LEN 500
|
jpayne@69
|
20 #define PREFIX_LEN 10
|
jpayne@69
|
21 #define WIDTH 60
|
jpayne@69
|
22
|
jpayne@69
|
23 #ifndef EXIT_FAILURE
|
jpayne@69
|
24 #define EXIT_FAILURE -1
|
jpayne@69
|
25 #endif
|
jpayne@69
|
26 #ifndef EXIT_SUCCESS
|
jpayne@69
|
27 #define EXIT_SUCCESS 0
|
jpayne@69
|
28 #endif
|
jpayne@69
|
29
|
jpayne@69
|
30
|
jpayne@69
|
31 void Show_Alignment (char [], long int, char [], long int);
|
jpayne@69
|
32
|
jpayne@69
|
33
|
jpayne@69
|
34 char Line [MAX_LINE_LEN];
|
jpayne@69
|
35 FILE * Gaps_With_Errors_File;
|
jpayne@69
|
36
|
jpayne@69
|
37
|
jpayne@69
|
38
|
jpayne@69
|
39 int main (int argc, char * argv [])
|
jpayne@69
|
40
|
jpayne@69
|
41 {
|
jpayne@69
|
42 FILE * Gap_File, * Data_File;
|
jpayne@69
|
43 char * Data1, * Data2;
|
jpayne@69
|
44 char Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN];
|
jpayne@69
|
45 char Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN];
|
jpayne@69
|
46 int Tag_Len;
|
jpayne@69
|
47 long int Input_Size, Len1, Len2, Line_Len;
|
jpayne@69
|
48 long int Start1, Start2, Len, Olap, Gap1, Gap2;
|
jpayne@69
|
49 long int i;
|
jpayne@69
|
50
|
jpayne@69
|
51 if (argc < 3)
|
jpayne@69
|
52 {
|
jpayne@69
|
53 printf ("Usage: %s <gapfile> <datafile> \n", argv [0]);
|
jpayne@69
|
54 exit (EXIT_FAILURE);
|
jpayne@69
|
55 }
|
jpayne@69
|
56
|
jpayne@69
|
57
|
jpayne@69
|
58 Data_File = File_Open (argv [2], "r");
|
jpayne@69
|
59 Data2 = (char *) Safe_malloc (INIT_SIZE);
|
jpayne@69
|
60 Input_Size = INIT_SIZE;
|
jpayne@69
|
61
|
jpayne@69
|
62 Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w");
|
jpayne@69
|
63
|
jpayne@69
|
64 Read_String (Data_File, Data2, Input_Size, Name, FALSE);
|
jpayne@69
|
65 fclose (Data_File);
|
jpayne@69
|
66 Data_File = NULL;
|
jpayne@69
|
67 Len2 = strlen (Data2 + 1);
|
jpayne@69
|
68
|
jpayne@69
|
69 Gap_File = File_Open (argv [1], "r");
|
jpayne@69
|
70
|
jpayne@69
|
71 Data1 = (char *) Safe_malloc (INIT_SIZE);
|
jpayne@69
|
72 Input_Size = INIT_SIZE;
|
jpayne@69
|
73
|
jpayne@69
|
74 while (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL)
|
jpayne@69
|
75 {
|
jpayne@69
|
76 printf ("%s", Line);
|
jpayne@69
|
77 for (i = 0; isspace (Line [i]) && Line [i] != '\n'; i ++)
|
jpayne@69
|
78 ;
|
jpayne@69
|
79 if (Line [i] == '>')
|
jpayne@69
|
80 {
|
jpayne@69
|
81 Tag [0] = '\0';
|
jpayne@69
|
82 sscanf (Line + i + 1, "%s %s", Name, Tag);
|
jpayne@69
|
83 if ( ! strcmp (Name, "Wrap") )
|
jpayne@69
|
84 continue;
|
jpayne@69
|
85
|
jpayne@69
|
86 Data_File = File_Open (Name, "r");
|
jpayne@69
|
87 Read_String (Data_File, Data1, Input_Size, Name, FALSE);
|
jpayne@69
|
88 fclose (Data_File);
|
jpayne@69
|
89
|
jpayne@69
|
90 Len1 = strlen (Data1 + 1);
|
jpayne@69
|
91
|
jpayne@69
|
92 Tag_Len = strlen (Tag);
|
jpayne@69
|
93 for (i = 0; i < Tag_Len; i ++)
|
jpayne@69
|
94 Tag [i] = tolower (Tag [i]);
|
jpayne@69
|
95 if (strcmp (Tag, "reverse") == 0)
|
jpayne@69
|
96 Reverse_Complement (Data2, 1, Len2);
|
jpayne@69
|
97 fprintf (Gaps_With_Errors_File, "%s", Line);
|
jpayne@69
|
98 continue;
|
jpayne@69
|
99 }
|
jpayne@69
|
100 else if (! isdigit (Line [i]))
|
jpayne@69
|
101 {
|
jpayne@69
|
102 fprintf (Gaps_With_Errors_File, "%s", Line);
|
jpayne@69
|
103 continue;
|
jpayne@69
|
104 }
|
jpayne@69
|
105 sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2,
|
jpayne@69
|
106 & Len, Olap_String, Gap_String1, Gap_String2);
|
jpayne@69
|
107
|
jpayne@69
|
108 if (Gap_String1 [0] == '-' || Gap_String2 [0] == '-')
|
jpayne@69
|
109 {
|
jpayne@69
|
110 fprintf (Gaps_With_Errors_File, "%s", Line);
|
jpayne@69
|
111 continue;
|
jpayne@69
|
112 }
|
jpayne@69
|
113 Gap1 = strtol (Gap_String1, NULL, 10);
|
jpayne@69
|
114 Gap2 = strtol (Gap_String2, NULL, 10);
|
jpayne@69
|
115 if (isalpha (Olap_String [0]))
|
jpayne@69
|
116 Olap = 0;
|
jpayne@69
|
117 else
|
jpayne@69
|
118 Olap = strtol (Olap_String, NULL, 10);
|
jpayne@69
|
119
|
jpayne@69
|
120 Start1 -= Gap1 + PREFIX_LEN - Olap;
|
jpayne@69
|
121 Start2 -= Gap2 + PREFIX_LEN - Olap;
|
jpayne@69
|
122
|
jpayne@69
|
123 Line_Len = strlen (Line);
|
jpayne@69
|
124 Line [Line_Len - 1] = '\0';
|
jpayne@69
|
125
|
jpayne@69
|
126 Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN,
|
jpayne@69
|
127 Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN);
|
jpayne@69
|
128
|
jpayne@69
|
129 }
|
jpayne@69
|
130
|
jpayne@69
|
131 fclose (Gaps_With_Errors_File);
|
jpayne@69
|
132
|
jpayne@69
|
133 return 0;
|
jpayne@69
|
134 }
|
jpayne@69
|
135
|
jpayne@69
|
136
|
jpayne@69
|
137 template<typename T>
|
jpayne@69
|
138 class Matrix_t
|
jpayne@69
|
139 {
|
jpayne@69
|
140 public:
|
jpayne@69
|
141 Matrix_t()
|
jpayne@69
|
142 { clear(); }
|
jpayne@69
|
143
|
jpayne@69
|
144 void clear()
|
jpayne@69
|
145 {
|
jpayne@69
|
146 d_m.clear();
|
jpayne@69
|
147 nRows_m = nCols_m = 0;
|
jpayne@69
|
148 }
|
jpayne@69
|
149
|
jpayne@69
|
150 void resize(long nRows, long nCols)
|
jpayne@69
|
151 {
|
jpayne@69
|
152 nRows_m = nRows;
|
jpayne@69
|
153 nCols_m = nCols;
|
jpayne@69
|
154 if ( nRows*nCols > d_m.size() )
|
jpayne@69
|
155 {
|
jpayne@69
|
156 try {
|
jpayne@69
|
157 d_m.resize(nRows*nCols);
|
jpayne@69
|
158 } catch (...) {
|
jpayne@69
|
159 d_m.clear();
|
jpayne@69
|
160 throw;
|
jpayne@69
|
161 }
|
jpayne@69
|
162 }
|
jpayne@69
|
163 }
|
jpayne@69
|
164
|
jpayne@69
|
165 inline T & operator()(long row, long col)
|
jpayne@69
|
166 { return d_m[nCols_m*row+col]; }
|
jpayne@69
|
167
|
jpayne@69
|
168 private:
|
jpayne@69
|
169
|
jpayne@69
|
170 vector<T> d_m;
|
jpayne@69
|
171 long nRows_m, nCols_m;
|
jpayne@69
|
172 };
|
jpayne@69
|
173
|
jpayne@69
|
174 void Show_Alignment (char A [], long int M, char B [], long int N)
|
jpayne@69
|
175
|
jpayne@69
|
176 // Print the alignment between strings A [1 .. M] and B [1 .. N] .
|
jpayne@69
|
177
|
jpayne@69
|
178 {
|
jpayne@69
|
179 static Matrix_t<int> D;
|
jpayne@69
|
180 static Matrix_t<char> Op;
|
jpayne@69
|
181 static vector<char> Show_A;
|
jpayne@69
|
182 static vector<char> Show_B;
|
jpayne@69
|
183 int Errors, Tmp;
|
jpayne@69
|
184 long int i, j, Ct;
|
jpayne@69
|
185
|
jpayne@69
|
186 if (M >= MAX_ALIGN || N >= MAX_ALIGN)
|
jpayne@69
|
187 {
|
jpayne@69
|
188 printf ("\n *** Too long ***\n\n");
|
jpayne@69
|
189 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
|
jpayne@69
|
190 return;
|
jpayne@69
|
191 }
|
jpayne@69
|
192
|
jpayne@69
|
193 try {
|
jpayne@69
|
194 D.resize(M+1,N+1);
|
jpayne@69
|
195 Op.resize(M+1,N+1);
|
jpayne@69
|
196 Show_A.resize(M+N+2);
|
jpayne@69
|
197 Show_B.resize(M+N+2);
|
jpayne@69
|
198 } catch (...) {
|
jpayne@69
|
199 printf ("\n *** Too long ***\n\n");
|
jpayne@69
|
200 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
|
jpayne@69
|
201 return;
|
jpayne@69
|
202 }
|
jpayne@69
|
203
|
jpayne@69
|
204 D (0,0) = 0;
|
jpayne@69
|
205 Op (0,0) = 'a';
|
jpayne@69
|
206 for (i = 1; i <= M; i ++)
|
jpayne@69
|
207 {
|
jpayne@69
|
208 D (i,0) = i + 1;
|
jpayne@69
|
209 Op (i,0) = 'i';
|
jpayne@69
|
210 }
|
jpayne@69
|
211 for (j = 1; j <= N; j ++)
|
jpayne@69
|
212 {
|
jpayne@69
|
213 D (0,j) = j + 1;
|
jpayne@69
|
214 Op (0,j) = 'd';
|
jpayne@69
|
215 }
|
jpayne@69
|
216
|
jpayne@69
|
217 for (i = 1; i <= M; i ++) // for each row (A)
|
jpayne@69
|
218 for (j = 1; j <= N; j ++) // for each col (B)
|
jpayne@69
|
219 {
|
jpayne@69
|
220 D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1);
|
jpayne@69
|
221 Op (i,j) = 'd';
|
jpayne@69
|
222 Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1);
|
jpayne@69
|
223 if (Tmp < D (i,j))
|
jpayne@69
|
224 {
|
jpayne@69
|
225 D (i,j) = Tmp;
|
jpayne@69
|
226 Op (i,j) = 'i';
|
jpayne@69
|
227 }
|
jpayne@69
|
228 Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1);
|
jpayne@69
|
229 if (Tmp < D (i,j))
|
jpayne@69
|
230 {
|
jpayne@69
|
231 D (i,j) = Tmp;
|
jpayne@69
|
232 Op (i,j) = 'a';
|
jpayne@69
|
233 }
|
jpayne@69
|
234 }
|
jpayne@69
|
235
|
jpayne@69
|
236 Ct = 0;
|
jpayne@69
|
237 i = M;
|
jpayne@69
|
238 j = N;
|
jpayne@69
|
239 while (i > 0 || j > 0)
|
jpayne@69
|
240 switch (Op (i,j))
|
jpayne@69
|
241 {
|
jpayne@69
|
242 case 'a' : // align
|
jpayne@69
|
243 Show_A [Ct] = A [i --];
|
jpayne@69
|
244 Show_B [Ct ++] = B [j --];
|
jpayne@69
|
245 break;
|
jpayne@69
|
246 case 'i' : // insert into A
|
jpayne@69
|
247 Show_A [Ct] = A [i --];
|
jpayne@69
|
248 Show_B [Ct ++] = '.';
|
jpayne@69
|
249 break;
|
jpayne@69
|
250 case 'd' : // delete from A
|
jpayne@69
|
251 Show_A [Ct] = '.';
|
jpayne@69
|
252 Show_B [Ct ++] = B [j --];
|
jpayne@69
|
253 break;
|
jpayne@69
|
254 default :
|
jpayne@69
|
255 printf (" *** i = %ld j = %ld Op (i,j) = %c\n",
|
jpayne@69
|
256 i, j, Op (i,j));
|
jpayne@69
|
257 exit (EXIT_FAILURE);
|
jpayne@69
|
258 }
|
jpayne@69
|
259 Errors = 0;
|
jpayne@69
|
260 for (i = 0; i < Ct; i ++)
|
jpayne@69
|
261 if (Show_A [i] != Show_B [i])
|
jpayne@69
|
262 Errors ++;
|
jpayne@69
|
263 printf (" Errors = %d\n", Errors);
|
jpayne@69
|
264 fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors);
|
jpayne@69
|
265 do
|
jpayne@69
|
266 {
|
jpayne@69
|
267 printf ("T: ");
|
jpayne@69
|
268 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
|
jpayne@69
|
269 putchar (Show_A [i]);
|
jpayne@69
|
270 putchar ('\n');
|
jpayne@69
|
271 printf ("S: ");
|
jpayne@69
|
272 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
|
jpayne@69
|
273 putchar (Show_B [i]);
|
jpayne@69
|
274 putchar ('\n');
|
jpayne@69
|
275 printf (" ");
|
jpayne@69
|
276 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
|
jpayne@69
|
277 putchar (Show_A [i] == Show_B [i] ? ' ' : '^');
|
jpayne@69
|
278 putchar ('\n');
|
jpayne@69
|
279 Ct -= WIDTH;
|
jpayne@69
|
280 } while (Ct > 0);
|
jpayne@69
|
281 return;
|
jpayne@69
|
282 }
|