annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 /* Programmer: A. Delcher
jpayne@69 2 * Written: 7 September 1998
jpayne@69 3 * File: ~delcher/Align/annotate.cc
jpayne@69 4 *
jpayne@69 5 * This program reads the output from the gaps program and
jpayne@69 6 * adds alignment information to it. Reads frag info from
jpayne@69 7 * the file named on the '>' lines of the gap file. The word
jpayne@69 8 * "reverse" after that name will make the query sequence be reverse
jpayne@69 9 * complemented.
jpayne@69 10 */
jpayne@69 11
jpayne@69 12 #include "tigrinc.hh"
jpayne@69 13 #include <vector>
jpayne@69 14 using namespace std;
jpayne@69 15
jpayne@69 16 #define FIELD_LEN 20
jpayne@69 17 #define MAX_ALIGN 10000
jpayne@69 18 #define MAX_LINE_LEN 200
jpayne@69 19 #define MAX_NAME_LEN 500
jpayne@69 20 #define PREFIX_LEN 10
jpayne@69 21 #define WIDTH 60
jpayne@69 22
jpayne@69 23 #ifndef EXIT_FAILURE
jpayne@69 24 #define EXIT_FAILURE -1
jpayne@69 25 #endif
jpayne@69 26 #ifndef EXIT_SUCCESS
jpayne@69 27 #define EXIT_SUCCESS 0
jpayne@69 28 #endif
jpayne@69 29
jpayne@69 30
jpayne@69 31 void Show_Alignment (char [], long int, char [], long int);
jpayne@69 32
jpayne@69 33
jpayne@69 34 char Line [MAX_LINE_LEN];
jpayne@69 35 FILE * Gaps_With_Errors_File;
jpayne@69 36
jpayne@69 37
jpayne@69 38
jpayne@69 39 int main (int argc, char * argv [])
jpayne@69 40
jpayne@69 41 {
jpayne@69 42 FILE * Gap_File, * Data_File;
jpayne@69 43 char * Data1, * Data2;
jpayne@69 44 char Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN];
jpayne@69 45 char Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN];
jpayne@69 46 int Tag_Len;
jpayne@69 47 long int Input_Size, Len1, Len2, Line_Len;
jpayne@69 48 long int Start1, Start2, Len, Olap, Gap1, Gap2;
jpayne@69 49 long int i;
jpayne@69 50
jpayne@69 51 if (argc < 3)
jpayne@69 52 {
jpayne@69 53 printf ("Usage: %s <gapfile> <datafile> \n", argv [0]);
jpayne@69 54 exit (EXIT_FAILURE);
jpayne@69 55 }
jpayne@69 56
jpayne@69 57
jpayne@69 58 Data_File = File_Open (argv [2], "r");
jpayne@69 59 Data2 = (char *) Safe_malloc (INIT_SIZE);
jpayne@69 60 Input_Size = INIT_SIZE;
jpayne@69 61
jpayne@69 62 Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w");
jpayne@69 63
jpayne@69 64 Read_String (Data_File, Data2, Input_Size, Name, FALSE);
jpayne@69 65 fclose (Data_File);
jpayne@69 66 Data_File = NULL;
jpayne@69 67 Len2 = strlen (Data2 + 1);
jpayne@69 68
jpayne@69 69 Gap_File = File_Open (argv [1], "r");
jpayne@69 70
jpayne@69 71 Data1 = (char *) Safe_malloc (INIT_SIZE);
jpayne@69 72 Input_Size = INIT_SIZE;
jpayne@69 73
jpayne@69 74 while (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL)
jpayne@69 75 {
jpayne@69 76 printf ("%s", Line);
jpayne@69 77 for (i = 0; isspace (Line [i]) && Line [i] != '\n'; i ++)
jpayne@69 78 ;
jpayne@69 79 if (Line [i] == '>')
jpayne@69 80 {
jpayne@69 81 Tag [0] = '\0';
jpayne@69 82 sscanf (Line + i + 1, "%s %s", Name, Tag);
jpayne@69 83 if ( ! strcmp (Name, "Wrap") )
jpayne@69 84 continue;
jpayne@69 85
jpayne@69 86 Data_File = File_Open (Name, "r");
jpayne@69 87 Read_String (Data_File, Data1, Input_Size, Name, FALSE);
jpayne@69 88 fclose (Data_File);
jpayne@69 89
jpayne@69 90 Len1 = strlen (Data1 + 1);
jpayne@69 91
jpayne@69 92 Tag_Len = strlen (Tag);
jpayne@69 93 for (i = 0; i < Tag_Len; i ++)
jpayne@69 94 Tag [i] = tolower (Tag [i]);
jpayne@69 95 if (strcmp (Tag, "reverse") == 0)
jpayne@69 96 Reverse_Complement (Data2, 1, Len2);
jpayne@69 97 fprintf (Gaps_With_Errors_File, "%s", Line);
jpayne@69 98 continue;
jpayne@69 99 }
jpayne@69 100 else if (! isdigit (Line [i]))
jpayne@69 101 {
jpayne@69 102 fprintf (Gaps_With_Errors_File, "%s", Line);
jpayne@69 103 continue;
jpayne@69 104 }
jpayne@69 105 sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2,
jpayne@69 106 & Len, Olap_String, Gap_String1, Gap_String2);
jpayne@69 107
jpayne@69 108 if (Gap_String1 [0] == '-' || Gap_String2 [0] == '-')
jpayne@69 109 {
jpayne@69 110 fprintf (Gaps_With_Errors_File, "%s", Line);
jpayne@69 111 continue;
jpayne@69 112 }
jpayne@69 113 Gap1 = strtol (Gap_String1, NULL, 10);
jpayne@69 114 Gap2 = strtol (Gap_String2, NULL, 10);
jpayne@69 115 if (isalpha (Olap_String [0]))
jpayne@69 116 Olap = 0;
jpayne@69 117 else
jpayne@69 118 Olap = strtol (Olap_String, NULL, 10);
jpayne@69 119
jpayne@69 120 Start1 -= Gap1 + PREFIX_LEN - Olap;
jpayne@69 121 Start2 -= Gap2 + PREFIX_LEN - Olap;
jpayne@69 122
jpayne@69 123 Line_Len = strlen (Line);
jpayne@69 124 Line [Line_Len - 1] = '\0';
jpayne@69 125
jpayne@69 126 Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN,
jpayne@69 127 Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN);
jpayne@69 128
jpayne@69 129 }
jpayne@69 130
jpayne@69 131 fclose (Gaps_With_Errors_File);
jpayne@69 132
jpayne@69 133 return 0;
jpayne@69 134 }
jpayne@69 135
jpayne@69 136
jpayne@69 137 template<typename T>
jpayne@69 138 class Matrix_t
jpayne@69 139 {
jpayne@69 140 public:
jpayne@69 141 Matrix_t()
jpayne@69 142 { clear(); }
jpayne@69 143
jpayne@69 144 void clear()
jpayne@69 145 {
jpayne@69 146 d_m.clear();
jpayne@69 147 nRows_m = nCols_m = 0;
jpayne@69 148 }
jpayne@69 149
jpayne@69 150 void resize(long nRows, long nCols)
jpayne@69 151 {
jpayne@69 152 nRows_m = nRows;
jpayne@69 153 nCols_m = nCols;
jpayne@69 154 if ( nRows*nCols > d_m.size() )
jpayne@69 155 {
jpayne@69 156 try {
jpayne@69 157 d_m.resize(nRows*nCols);
jpayne@69 158 } catch (...) {
jpayne@69 159 d_m.clear();
jpayne@69 160 throw;
jpayne@69 161 }
jpayne@69 162 }
jpayne@69 163 }
jpayne@69 164
jpayne@69 165 inline T & operator()(long row, long col)
jpayne@69 166 { return d_m[nCols_m*row+col]; }
jpayne@69 167
jpayne@69 168 private:
jpayne@69 169
jpayne@69 170 vector<T> d_m;
jpayne@69 171 long nRows_m, nCols_m;
jpayne@69 172 };
jpayne@69 173
jpayne@69 174 void Show_Alignment (char A [], long int M, char B [], long int N)
jpayne@69 175
jpayne@69 176 // Print the alignment between strings A [1 .. M] and B [1 .. N] .
jpayne@69 177
jpayne@69 178 {
jpayne@69 179 static Matrix_t<int> D;
jpayne@69 180 static Matrix_t<char> Op;
jpayne@69 181 static vector<char> Show_A;
jpayne@69 182 static vector<char> Show_B;
jpayne@69 183 int Errors, Tmp;
jpayne@69 184 long int i, j, Ct;
jpayne@69 185
jpayne@69 186 if (M >= MAX_ALIGN || N >= MAX_ALIGN)
jpayne@69 187 {
jpayne@69 188 printf ("\n *** Too long ***\n\n");
jpayne@69 189 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
jpayne@69 190 return;
jpayne@69 191 }
jpayne@69 192
jpayne@69 193 try {
jpayne@69 194 D.resize(M+1,N+1);
jpayne@69 195 Op.resize(M+1,N+1);
jpayne@69 196 Show_A.resize(M+N+2);
jpayne@69 197 Show_B.resize(M+N+2);
jpayne@69 198 } catch (...) {
jpayne@69 199 printf ("\n *** Too long ***\n\n");
jpayne@69 200 fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
jpayne@69 201 return;
jpayne@69 202 }
jpayne@69 203
jpayne@69 204 D (0,0) = 0;
jpayne@69 205 Op (0,0) = 'a';
jpayne@69 206 for (i = 1; i <= M; i ++)
jpayne@69 207 {
jpayne@69 208 D (i,0) = i + 1;
jpayne@69 209 Op (i,0) = 'i';
jpayne@69 210 }
jpayne@69 211 for (j = 1; j <= N; j ++)
jpayne@69 212 {
jpayne@69 213 D (0,j) = j + 1;
jpayne@69 214 Op (0,j) = 'd';
jpayne@69 215 }
jpayne@69 216
jpayne@69 217 for (i = 1; i <= M; i ++) // for each row (A)
jpayne@69 218 for (j = 1; j <= N; j ++) // for each col (B)
jpayne@69 219 {
jpayne@69 220 D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1);
jpayne@69 221 Op (i,j) = 'd';
jpayne@69 222 Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1);
jpayne@69 223 if (Tmp < D (i,j))
jpayne@69 224 {
jpayne@69 225 D (i,j) = Tmp;
jpayne@69 226 Op (i,j) = 'i';
jpayne@69 227 }
jpayne@69 228 Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1);
jpayne@69 229 if (Tmp < D (i,j))
jpayne@69 230 {
jpayne@69 231 D (i,j) = Tmp;
jpayne@69 232 Op (i,j) = 'a';
jpayne@69 233 }
jpayne@69 234 }
jpayne@69 235
jpayne@69 236 Ct = 0;
jpayne@69 237 i = M;
jpayne@69 238 j = N;
jpayne@69 239 while (i > 0 || j > 0)
jpayne@69 240 switch (Op (i,j))
jpayne@69 241 {
jpayne@69 242 case 'a' : // align
jpayne@69 243 Show_A [Ct] = A [i --];
jpayne@69 244 Show_B [Ct ++] = B [j --];
jpayne@69 245 break;
jpayne@69 246 case 'i' : // insert into A
jpayne@69 247 Show_A [Ct] = A [i --];
jpayne@69 248 Show_B [Ct ++] = '.';
jpayne@69 249 break;
jpayne@69 250 case 'd' : // delete from A
jpayne@69 251 Show_A [Ct] = '.';
jpayne@69 252 Show_B [Ct ++] = B [j --];
jpayne@69 253 break;
jpayne@69 254 default :
jpayne@69 255 printf (" *** i = %ld j = %ld Op (i,j) = %c\n",
jpayne@69 256 i, j, Op (i,j));
jpayne@69 257 exit (EXIT_FAILURE);
jpayne@69 258 }
jpayne@69 259 Errors = 0;
jpayne@69 260 for (i = 0; i < Ct; i ++)
jpayne@69 261 if (Show_A [i] != Show_B [i])
jpayne@69 262 Errors ++;
jpayne@69 263 printf (" Errors = %d\n", Errors);
jpayne@69 264 fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors);
jpayne@69 265 do
jpayne@69 266 {
jpayne@69 267 printf ("T: ");
jpayne@69 268 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
jpayne@69 269 putchar (Show_A [i]);
jpayne@69 270 putchar ('\n');
jpayne@69 271 printf ("S: ");
jpayne@69 272 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
jpayne@69 273 putchar (Show_B [i]);
jpayne@69 274 putchar ('\n');
jpayne@69 275 printf (" ");
jpayne@69 276 for (i = Ct - 1; i >= 0 && i >= Ct - WIDTH; i --)
jpayne@69 277 putchar (Show_A [i] == Show_B [i] ? ' ' : '^');
jpayne@69 278 putchar ('\n');
jpayne@69 279 Ct -= WIDTH;
jpayne@69 280 } while (Ct > 0);
jpayne@69 281 return;
jpayne@69 282 }