comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 /* Programmer: A. Delcher
2 * Written: 7 September 1998
3 * File: ~delcher/Align/annotate.cc
4 *
5 * This program reads the output from the gaps program and
6 * adds alignment information to it. Reads frag info from
7 * the file named on the '>' lines of the gap file. The word
8 * "reverse" after that name will make the query sequence be reverse
9 * complemented.
10 */
11
12 #include "tigrinc.hh"
13 #include <vector>
14 using namespace std;
15
16 #define FIELD_LEN 20
17 #define MAX_ALIGN 10000
18 #define MAX_LINE_LEN 200
19 #define MAX_NAME_LEN 500
20 #define PREFIX_LEN 10
21 #define WIDTH 60
22
23 #ifndef EXIT_FAILURE
24 #define EXIT_FAILURE -1
25 #endif
26 #ifndef EXIT_SUCCESS
27 #define EXIT_SUCCESS 0
28 #endif
29
30
31 void Show_Alignment (char [], long int, char [], long int);
32
33
34 char Line [MAX_LINE_LEN];
35 FILE * Gaps_With_Errors_File;
36
37
38
39 int main (int argc, char * argv [])
40
41 {
42 FILE * Gap_File, * Data_File;
43 char * Data1, * Data2;
44 char Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN];
45 char Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN];
46 int Tag_Len;
47 long int Input_Size, Len1, Len2, Line_Len;
48 long int Start1, Start2, Len, Olap, Gap1, Gap2;
49 long int i;
50
51 if (argc < 3)
52 {
53 printf ("Usage: %s <gapfile> <datafile> \n", argv [0]);
54 exit (EXIT_FAILURE);
55 }
56
57
58 Data_File = File_Open (argv [2], "r");
59 Data2 = (char *) Safe_malloc (INIT_SIZE);
60 Input_Size = INIT_SIZE;
61
62 Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w");
63
64 Read_String (Data_File, Data2, Input_Size, Name, FALSE);
65 fclose (Data_File);
66 Data_File = NULL;
67 Len2 = strlen (Data2 + 1);
68
69 Gap_File = File_Open (argv [1], "r");
70
71 Data1 = (char *) Safe_malloc (INIT_SIZE);
72 Input_Size = INIT_SIZE;
73
74 while (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL)
75 {
76 printf ("%s", Line);
77 for (i = 0; isspace (Line [i]) && Line [i] != '\n'; i ++)
78 ;
79 if (Line [i] == '>')
80 {
81 Tag [0] = '\0';
82 sscanf (Line + i + 1, "%s %s", Name, Tag);
83 if ( ! strcmp (Name, "Wrap") )
84 continue;
85
86 Data_File = File_Open (Name, "r");
87 Read_String (Data_File, Data1, Input_Size, Name, FALSE);
88 fclose (Data_File);
89
90 Len1 = strlen (Data1 + 1);
91
92 Tag_Len = strlen (Tag);
93 for (i = 0; i < Tag_Len; i ++)
94 Tag [i] = tolower (Tag [i]);
95 if (strcmp (Tag, "reverse") == 0)
96 Reverse_Complement (Data2, 1, Len2);
97 fprintf (Gaps_With_Errors_File, "%s", Line);
98 continue;
99 }
100 else if (! isdigit (Line [i]))
101 {
102 fprintf (Gaps_With_Errors_File, "%s", Line);
103 continue;
104 }
105 sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2,
106 & Len, Olap_String, Gap_String1, Gap_String2);
107
108 if (Gap_String1 [0] == '-' || Gap_String2 [0] == '-')
109 {
110 fprintf (Gaps_With_Errors_File, "%s", Line);
111 continue;
112 }
113 Gap1 = strtol (Gap_String1, NULL, 10);
114 Gap2 = strtol (Gap_String2, NULL, 10);
115 if (isalpha (Olap_String [0]))
116 Olap = 0;
117 else
118 Olap = strtol (Olap_String, NULL, 10);
119
120 Start1 -= Gap1 + PREFIX_LEN - Olap;
121 Start2 -= Gap2 + PREFIX_LEN - Olap;
122
123 Line_Len = strlen (Line);
124 Line [Line_Len - 1] = '\0';
125
126 Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN,
127 Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN);
128
129 }
130
131 fclose (Gaps_With_Errors_File);
132
133 return 0;
134 }
135
136
137 template<typename T>
138 class Matrix_t
139 {
140 public:
141 Matrix_t()
142 { clear(); }
143
144 void clear()
145 {
146 d_m.clear();
147 nRows_m = nCols_m = 0;
148 }
149
150 void resize(long nRows, long nCols)
151 {
152 nRows_m = nRows;
153 nCols_m = nCols;
154 if ( nRows*nCols > d_m.size() )
155 {
156 try {
157 d_m.resize(nRows*nCols);
158 } catch (...) {
159 d_m.clear();
160 throw;
161 }
162 }
163 }
164
165 inline T & operator()(long row, long col)
166 { return d_m[nCols_m*row+col]; }
167
168 private:
169
170 vector<T> d_m;
171 long nRows_m, nCols_m;
172 };
173
174 void Show_Alignment (char A [], long int M, char B [], long int N)
175
176 // Print the alignment between strings A [1 .. M] and B [1 .. N] .
177
178 {
179 static Matrix_t<int> D;
180 static Matrix_t<char> Op;
181 static vector<char> Show_A;
182 static vector<char> Show_B;
183 int Errors, Tmp;
184 long int i, j, Ct;
185
186 if (M >= MAX_ALIGN || N >= MAX_ALIGN)
187 {
188 printf ("\n *** Too long ***\n\n");
189 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
190 return;
191 }
192
193 try {
194 D.resize(M+1,N+1);
195 Op.resize(M+1,N+1);
196 Show_A.resize(M+N+2);
197 Show_B.resize(M+N+2);
198 } catch (...) {
199 printf ("\n *** Too long ***\n\n");
200 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
201 return;
202 }
203
204 D (0,0) = 0;
205 Op (0,0) = 'a';
206 for (i = 1; i <= M; i ++)
207 {
208 D (i,0) = i + 1;
209 Op (i,0) = 'i';
210 }
211 for (j = 1; j <= N; j ++)
212 {
213 D (0,j) = j + 1;
214 Op (0,j) = 'd';
215 }
216
217 for (i = 1; i <= M; i ++) // for each row (A)
218 for (j = 1; j <= N; j ++) // for each col (B)
219 {
220 D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1);
221 Op (i,j) = 'd';
222 Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1);
223 if (Tmp < D (i,j))
224 {
225 D (i,j) = Tmp;
226 Op (i,j) = 'i';
227 }
228 Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1);
229 if (Tmp < D (i,j))
230 {
231 D (i,j) = Tmp;
232 Op (i,j) = 'a';
233 }
234 }
235
236 Ct = 0;
237 i = M;
238 j = N;
239 while (i > 0 || j > 0)
240 switch (Op (i,j))
241 {
242 case 'a' : // align
243 Show_A [Ct] = A [i --];
244 Show_B [Ct ++] = B [j --];
245 break;
246 case 'i' : // insert into A
247 Show_A [Ct] = A [i --];
248 Show_B [Ct ++] = '.';
249 break;
250 case 'd' : // delete from A
251 Show_A [Ct] = '.';
252 Show_B [Ct ++] = B [j --];
253 break;
254 default :
255 printf (" *** i = %ld j = %ld Op (i,j) = %c\n",
256 i, j, Op (i,j));
257 exit (EXIT_FAILURE);
258 }
259 Errors = 0;
260 for (i = 0; i < Ct; i ++)
261 if (Show_A [i] != Show_B [i])
262 Errors ++;
263 printf (" Errors = %d\n", Errors);
264 fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors);
265 do
266 {
267 printf ("T: ");
268 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
269 putchar (Show_A [i]);
270 putchar ('\n');
271 printf ("S: ");
272 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
273 putchar (Show_B [i]);
274 putchar ('\n');
275 printf (" ");
276 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
277 putchar (Show_A [i] == Show_B [i] ? ' ' : '^');
278 putchar ('\n');
279 Ct -= WIDTH;
280 } while (Ct > 0);
281 return;
282 }