Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 /* Programmer: A. Delcher | |
2 * Written: 7 September 1998 | |
3 * File: ~delcher/Align/annotate.cc | |
4 * | |
5 * This program reads the output from the gaps program and | |
6 * adds alignment information to it. Reads frag info from | |
7 * the file named on the '>' lines of the gap file. The word | |
8 * "reverse" after that name will make the query sequence be reverse | |
9 * complemented. | |
10 */ | |
11 | |
12 #include "tigrinc.hh" | |
13 #include <vector> | |
14 using namespace std; | |
15 | |
16 #define FIELD_LEN 20 | |
17 #define MAX_ALIGN 10000 | |
18 #define MAX_LINE_LEN 200 | |
19 #define MAX_NAME_LEN 500 | |
20 #define PREFIX_LEN 10 | |
21 #define WIDTH 60 | |
22 | |
23 #ifndef EXIT_FAILURE | |
24 #define EXIT_FAILURE -1 | |
25 #endif | |
26 #ifndef EXIT_SUCCESS | |
27 #define EXIT_SUCCESS 0 | |
28 #endif | |
29 | |
30 | |
31 void Show_Alignment (char [], long int, char [], long int); | |
32 | |
33 | |
34 char Line [MAX_LINE_LEN]; | |
35 FILE * Gaps_With_Errors_File; | |
36 | |
37 | |
38 | |
39 int main (int argc, char * argv []) | |
40 | |
41 { | |
42 FILE * Gap_File, * Data_File; | |
43 char * Data1, * Data2; | |
44 char Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN]; | |
45 char Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN]; | |
46 int Tag_Len; | |
47 long int Input_Size, Len1, Len2, Line_Len; | |
48 long int Start1, Start2, Len, Olap, Gap1, Gap2; | |
49 long int i; | |
50 | |
51 if (argc < 3) | |
52 { | |
53 printf ("Usage: %s <gapfile> <datafile> \n", argv [0]); | |
54 exit (EXIT_FAILURE); | |
55 } | |
56 | |
57 | |
58 Data_File = File_Open (argv [2], "r"); | |
59 Data2 = (char *) Safe_malloc (INIT_SIZE); | |
60 Input_Size = INIT_SIZE; | |
61 | |
62 Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w"); | |
63 | |
64 Read_String (Data_File, Data2, Input_Size, Name, FALSE); | |
65 fclose (Data_File); | |
66 Data_File = NULL; | |
67 Len2 = strlen (Data2 + 1); | |
68 | |
69 Gap_File = File_Open (argv [1], "r"); | |
70 | |
71 Data1 = (char *) Safe_malloc (INIT_SIZE); | |
72 Input_Size = INIT_SIZE; | |
73 | |
74 while (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL) | |
75 { | |
76 printf ("%s", Line); | |
77 for (i = 0; isspace (Line [i]) && Line [i] != '\n'; i ++) | |
78 ; | |
79 if (Line [i] == '>') | |
80 { | |
81 Tag [0] = '\0'; | |
82 sscanf (Line + i + 1, "%s %s", Name, Tag); | |
83 if ( ! strcmp (Name, "Wrap") ) | |
84 continue; | |
85 | |
86 Data_File = File_Open (Name, "r"); | |
87 Read_String (Data_File, Data1, Input_Size, Name, FALSE); | |
88 fclose (Data_File); | |
89 | |
90 Len1 = strlen (Data1 + 1); | |
91 | |
92 Tag_Len = strlen (Tag); | |
93 for (i = 0; i < Tag_Len; i ++) | |
94 Tag [i] = tolower (Tag [i]); | |
95 if (strcmp (Tag, "reverse") == 0) | |
96 Reverse_Complement (Data2, 1, Len2); | |
97 fprintf (Gaps_With_Errors_File, "%s", Line); | |
98 continue; | |
99 } | |
100 else if (! isdigit (Line [i])) | |
101 { | |
102 fprintf (Gaps_With_Errors_File, "%s", Line); | |
103 continue; | |
104 } | |
105 sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2, | |
106 & Len, Olap_String, Gap_String1, Gap_String2); | |
107 | |
108 if (Gap_String1 [0] == '-' || Gap_String2 [0] == '-') | |
109 { | |
110 fprintf (Gaps_With_Errors_File, "%s", Line); | |
111 continue; | |
112 } | |
113 Gap1 = strtol (Gap_String1, NULL, 10); | |
114 Gap2 = strtol (Gap_String2, NULL, 10); | |
115 if (isalpha (Olap_String [0])) | |
116 Olap = 0; | |
117 else | |
118 Olap = strtol (Olap_String, NULL, 10); | |
119 | |
120 Start1 -= Gap1 + PREFIX_LEN - Olap; | |
121 Start2 -= Gap2 + PREFIX_LEN - Olap; | |
122 | |
123 Line_Len = strlen (Line); | |
124 Line [Line_Len - 1] = '\0'; | |
125 | |
126 Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN, | |
127 Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN); | |
128 | |
129 } | |
130 | |
131 fclose (Gaps_With_Errors_File); | |
132 | |
133 return 0; | |
134 } | |
135 | |
136 | |
137 template<typename T> | |
138 class Matrix_t | |
139 { | |
140 public: | |
141 Matrix_t() | |
142 { clear(); } | |
143 | |
144 void clear() | |
145 { | |
146 d_m.clear(); | |
147 nRows_m = nCols_m = 0; | |
148 } | |
149 | |
150 void resize(long nRows, long nCols) | |
151 { | |
152 nRows_m = nRows; | |
153 nCols_m = nCols; | |
154 if ( nRows*nCols > d_m.size() ) | |
155 { | |
156 try { | |
157 d_m.resize(nRows*nCols); | |
158 } catch (...) { | |
159 d_m.clear(); | |
160 throw; | |
161 } | |
162 } | |
163 } | |
164 | |
165 inline T & operator()(long row, long col) | |
166 { return d_m[nCols_m*row+col]; } | |
167 | |
168 private: | |
169 | |
170 vector<T> d_m; | |
171 long nRows_m, nCols_m; | |
172 }; | |
173 | |
174 void Show_Alignment (char A [], long int M, char B [], long int N) | |
175 | |
176 // Print the alignment between strings A [1 .. M] and B [1 .. N] . | |
177 | |
178 { | |
179 static Matrix_t<int> D; | |
180 static Matrix_t<char> Op; | |
181 static vector<char> Show_A; | |
182 static vector<char> Show_B; | |
183 int Errors, Tmp; | |
184 long int i, j, Ct; | |
185 | |
186 if (M >= MAX_ALIGN || N >= MAX_ALIGN) | |
187 { | |
188 printf ("\n *** Too long ***\n\n"); | |
189 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-"); | |
190 return; | |
191 } | |
192 | |
193 try { | |
194 D.resize(M+1,N+1); | |
195 Op.resize(M+1,N+1); | |
196 Show_A.resize(M+N+2); | |
197 Show_B.resize(M+N+2); | |
198 } catch (...) { | |
199 printf ("\n *** Too long ***\n\n"); | |
200 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-"); | |
201 return; | |
202 } | |
203 | |
204 D (0,0) = 0; | |
205 Op (0,0) = 'a'; | |
206 for (i = 1; i <= M; i ++) | |
207 { | |
208 D (i,0) = i + 1; | |
209 Op (i,0) = 'i'; | |
210 } | |
211 for (j = 1; j <= N; j ++) | |
212 { | |
213 D (0,j) = j + 1; | |
214 Op (0,j) = 'd'; | |
215 } | |
216 | |
217 for (i = 1; i <= M; i ++) // for each row (A) | |
218 for (j = 1; j <= N; j ++) // for each col (B) | |
219 { | |
220 D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1); | |
221 Op (i,j) = 'd'; | |
222 Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1); | |
223 if (Tmp < D (i,j)) | |
224 { | |
225 D (i,j) = Tmp; | |
226 Op (i,j) = 'i'; | |
227 } | |
228 Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1); | |
229 if (Tmp < D (i,j)) | |
230 { | |
231 D (i,j) = Tmp; | |
232 Op (i,j) = 'a'; | |
233 } | |
234 } | |
235 | |
236 Ct = 0; | |
237 i = M; | |
238 j = N; | |
239 while (i > 0 || j > 0) | |
240 switch (Op (i,j)) | |
241 { | |
242 case 'a' : // align | |
243 Show_A [Ct] = A [i --]; | |
244 Show_B [Ct ++] = B [j --]; | |
245 break; | |
246 case 'i' : // insert into A | |
247 Show_A [Ct] = A [i --]; | |
248 Show_B [Ct ++] = '.'; | |
249 break; | |
250 case 'd' : // delete from A | |
251 Show_A [Ct] = '.'; | |
252 Show_B [Ct ++] = B [j --]; | |
253 break; | |
254 default : | |
255 printf (" *** i = %ld j = %ld Op (i,j) = %c\n", | |
256 i, j, Op (i,j)); | |
257 exit (EXIT_FAILURE); | |
258 } | |
259 Errors = 0; | |
260 for (i = 0; i < Ct; i ++) | |
261 if (Show_A [i] != Show_B [i]) | |
262 Errors ++; | |
263 printf (" Errors = %d\n", Errors); | |
264 fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors); | |
265 do | |
266 { | |
267 printf ("T: "); | |
268 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) | |
269 putchar (Show_A [i]); | |
270 putchar ('\n'); | |
271 printf ("S: "); | |
272 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) | |
273 putchar (Show_B [i]); | |
274 putchar ('\n'); | |
275 printf (" "); | |
276 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --) | |
277 putchar (Show_A [i] == Show_B [i] ? ' ' : '^'); | |
278 putchar ('\n'); | |
279 Ct -= WIDTH; | |
280 } while (Ct > 0); | |
281 return; | |
282 } |