jpayne@69
|
1 //------------------------------------------------------------------------------
|
jpayne@69
|
2 // Programmer: Adam M Phillippy, The Institute for Genomic Research
|
jpayne@69
|
3 // File: show-aligns.cc
|
jpayne@69
|
4 // Date: 10 / 18 / 2002
|
jpayne@69
|
5 //
|
jpayne@69
|
6 // Usage: show-aligns [options] <deltafile>
|
jpayne@69
|
7 // Try 'show-aligns -h' for more information
|
jpayne@69
|
8 //
|
jpayne@69
|
9 // Description: For use in conjunction with the MUMmer package.
|
jpayne@69
|
10 // "show-aligns" displays human readable information from the
|
jpayne@69
|
11 // .delta output of the "nucmer" and "promer" programs. Outputs
|
jpayne@69
|
12 // pairwise alignments to stdout. Works for both nucleotide and
|
jpayne@69
|
13 // amino-acid alignments.
|
jpayne@69
|
14 //
|
jpayne@69
|
15 //------------------------------------------------------------------------------
|
jpayne@69
|
16
|
jpayne@69
|
17 #include "delta.hh"
|
jpayne@69
|
18 #include "tigrinc.hh"
|
jpayne@69
|
19 #include "translate.hh"
|
jpayne@69
|
20 #include "sw_alignscore.hh"
|
jpayne@69
|
21 #include <vector>
|
jpayne@69
|
22 #include <algorithm>
|
jpayne@69
|
23 using namespace std;
|
jpayne@69
|
24
|
jpayne@69
|
25 //------------------------------------------------------------- Constants ----//
|
jpayne@69
|
26
|
jpayne@69
|
27 const char NUCMER_MISMATCH_CHAR = '^';
|
jpayne@69
|
28 const char NUCMER_MATCH_CHAR = ' ';
|
jpayne@69
|
29 const char PROMER_SIM_CHAR = '+';
|
jpayne@69
|
30 const char PROMER_MISMATCH_CHAR = ' ';
|
jpayne@69
|
31
|
jpayne@69
|
32 //-- Note: if coord exceeds LINE_PREFIX_LEN - 1 digits,
|
jpayne@69
|
33 // increase these accordingly
|
jpayne@69
|
34 #define LINE_PREFIX_LEN 11
|
jpayne@69
|
35 #define PREFIX_FORMAT "%-10ld "
|
jpayne@69
|
36
|
jpayne@69
|
37 #define DEFAULT_SCREEN_WIDTH 60
|
jpayne@69
|
38 int Screen_Width = DEFAULT_SCREEN_WIDTH;
|
jpayne@69
|
39
|
jpayne@69
|
40
|
jpayne@69
|
41
|
jpayne@69
|
42 //------------------------------------------------------ Type Definitions ----//
|
jpayne@69
|
43 struct AlignStats
|
jpayne@69
|
44 //-- Alignment statistics data structure
|
jpayne@69
|
45 {
|
jpayne@69
|
46 long int sQ, eQ, sR, eR; // start and end in Query and Reference
|
jpayne@69
|
47 // relative to the directional strand
|
jpayne@69
|
48 vector<long int> Delta; // delta information
|
jpayne@69
|
49 };
|
jpayne@69
|
50
|
jpayne@69
|
51
|
jpayne@69
|
52
|
jpayne@69
|
53 struct sR_Sort
|
jpayne@69
|
54 //-- For sorting alignments by their sR coordinate
|
jpayne@69
|
55 {
|
jpayne@69
|
56 bool operator( ) (const AlignStats & pA, const AlignStats & pB)
|
jpayne@69
|
57 {
|
jpayne@69
|
58 //-- sort sR
|
jpayne@69
|
59 if ( pA.sR < pB.sR )
|
jpayne@69
|
60 return true;
|
jpayne@69
|
61 else
|
jpayne@69
|
62 return false;
|
jpayne@69
|
63 }
|
jpayne@69
|
64 };
|
jpayne@69
|
65
|
jpayne@69
|
66
|
jpayne@69
|
67
|
jpayne@69
|
68 struct sQ_Sort
|
jpayne@69
|
69 //-- For sorting alignments by their sQ coordinate
|
jpayne@69
|
70 {
|
jpayne@69
|
71 bool operator( ) (const AlignStats & pA, const AlignStats & pB)
|
jpayne@69
|
72 {
|
jpayne@69
|
73 //-- sort sQ
|
jpayne@69
|
74 if ( pA.sQ < pB.sQ )
|
jpayne@69
|
75 return true;
|
jpayne@69
|
76 else
|
jpayne@69
|
77 return false;
|
jpayne@69
|
78 }
|
jpayne@69
|
79 };
|
jpayne@69
|
80
|
jpayne@69
|
81
|
jpayne@69
|
82
|
jpayne@69
|
83
|
jpayne@69
|
84 //------------------------------------------------------ Global Variables ----//
|
jpayne@69
|
85 bool isSortByQuery = false; // -q option
|
jpayne@69
|
86 bool isSortByReference = false; // -r option
|
jpayne@69
|
87
|
jpayne@69
|
88 int DATA_TYPE = NUCMER_DATA;
|
jpayne@69
|
89 int MATRIX_TYPE = BLOSUM62;
|
jpayne@69
|
90
|
jpayne@69
|
91 char InputFileName [MAX_LINE];
|
jpayne@69
|
92 char RefFileName [MAX_LINE], QryFileName [MAX_LINE];
|
jpayne@69
|
93
|
jpayne@69
|
94
|
jpayne@69
|
95
|
jpayne@69
|
96 //------------------------------------------------- Function Declarations ----//
|
jpayne@69
|
97 long int toFwd
|
jpayne@69
|
98 (long int coord, long int len, int frame);
|
jpayne@69
|
99
|
jpayne@69
|
100 void parseDelta
|
jpayne@69
|
101 (vector<AlignStats> & Aligns, char * IdR, char * IdQ);
|
jpayne@69
|
102
|
jpayne@69
|
103 void printAlignments
|
jpayne@69
|
104 (vector<AlignStats> Aligns, char * R, char * Q);
|
jpayne@69
|
105
|
jpayne@69
|
106 void printHelp
|
jpayne@69
|
107 (const char * s);
|
jpayne@69
|
108
|
jpayne@69
|
109 void printUsage
|
jpayne@69
|
110 (const char * s);
|
jpayne@69
|
111
|
jpayne@69
|
112 long int revC
|
jpayne@69
|
113 (long int coord, long int len);
|
jpayne@69
|
114
|
jpayne@69
|
115
|
jpayne@69
|
116
|
jpayne@69
|
117 //-------------------------------------------------- Function Definitions ----//
|
jpayne@69
|
118 int main
|
jpayne@69
|
119 (int argc, char ** argv)
|
jpayne@69
|
120 {
|
jpayne@69
|
121 long int i;
|
jpayne@69
|
122
|
jpayne@69
|
123 FILE * RefFile = NULL;
|
jpayne@69
|
124 FILE * QryFile = NULL;
|
jpayne@69
|
125
|
jpayne@69
|
126 vector<AlignStats> Aligns;
|
jpayne@69
|
127
|
jpayne@69
|
128 char * R, * Q;
|
jpayne@69
|
129
|
jpayne@69
|
130 long int InitSize = INIT_SIZE;
|
jpayne@69
|
131 char Id [MAX_LINE], IdR [MAX_LINE], IdQ [MAX_LINE];
|
jpayne@69
|
132
|
jpayne@69
|
133 //-- Parse the command line arguments
|
jpayne@69
|
134 {
|
jpayne@69
|
135 int ch, errflg = 0;
|
jpayne@69
|
136 optarg = NULL;
|
jpayne@69
|
137
|
jpayne@69
|
138 while ( !errflg && ((ch = getopt
|
jpayne@69
|
139 (argc, argv, "hqrw:x:")) != EOF) )
|
jpayne@69
|
140 switch (ch)
|
jpayne@69
|
141 {
|
jpayne@69
|
142 case 'h' :
|
jpayne@69
|
143 printHelp (argv[0]);
|
jpayne@69
|
144 exit (EXIT_SUCCESS);
|
jpayne@69
|
145 break;
|
jpayne@69
|
146
|
jpayne@69
|
147 case 'q' :
|
jpayne@69
|
148 isSortByQuery = true;
|
jpayne@69
|
149 break;
|
jpayne@69
|
150
|
jpayne@69
|
151 case 'r' :
|
jpayne@69
|
152 isSortByReference = true;
|
jpayne@69
|
153 break;
|
jpayne@69
|
154
|
jpayne@69
|
155 case 'w' :
|
jpayne@69
|
156 Screen_Width = atoi (optarg);
|
jpayne@69
|
157 if ( Screen_Width <= LINE_PREFIX_LEN )
|
jpayne@69
|
158 {
|
jpayne@69
|
159 fprintf(stderr,
|
jpayne@69
|
160 "WARNING: invalid screen width %d, using default\n",
|
jpayne@69
|
161 DEFAULT_SCREEN_WIDTH);
|
jpayne@69
|
162 Screen_Width = DEFAULT_SCREEN_WIDTH;
|
jpayne@69
|
163 }
|
jpayne@69
|
164 break;
|
jpayne@69
|
165
|
jpayne@69
|
166 case 'x' :
|
jpayne@69
|
167 MATRIX_TYPE = atoi (optarg);
|
jpayne@69
|
168 if ( MATRIX_TYPE < 1 || MATRIX_TYPE > 3 )
|
jpayne@69
|
169 {
|
jpayne@69
|
170 fprintf(stderr,
|
jpayne@69
|
171 "WARNING: invalid matrix type %d, using default\n",
|
jpayne@69
|
172 MATRIX_TYPE);
|
jpayne@69
|
173 MATRIX_TYPE = BLOSUM62;
|
jpayne@69
|
174 }
|
jpayne@69
|
175 break;
|
jpayne@69
|
176
|
jpayne@69
|
177 default :
|
jpayne@69
|
178 errflg ++;
|
jpayne@69
|
179 }
|
jpayne@69
|
180
|
jpayne@69
|
181 if ( errflg > 0 || argc - optind != 3 )
|
jpayne@69
|
182 {
|
jpayne@69
|
183 printUsage (argv[0]);
|
jpayne@69
|
184 exit (EXIT_FAILURE);
|
jpayne@69
|
185 }
|
jpayne@69
|
186
|
jpayne@69
|
187 if ( isSortByQuery && isSortByReference )
|
jpayne@69
|
188 fprintf (stderr,
|
jpayne@69
|
189 "WARNING: both -r and -q were passed, -q ignored\n");
|
jpayne@69
|
190 }
|
jpayne@69
|
191
|
jpayne@69
|
192 strcpy (InputFileName, argv[optind ++]);
|
jpayne@69
|
193 strcpy (IdR, argv[optind ++]);
|
jpayne@69
|
194 strcpy (IdQ, argv[optind ++]);
|
jpayne@69
|
195
|
jpayne@69
|
196 //-- Read in the alignment data
|
jpayne@69
|
197 parseDelta (Aligns, IdR, IdQ);
|
jpayne@69
|
198
|
jpayne@69
|
199 //-- Find, and read in the reference sequence
|
jpayne@69
|
200 RefFile = File_Open (RefFileName, "r");
|
jpayne@69
|
201 InitSize = INIT_SIZE;
|
jpayne@69
|
202 R = (char *) Safe_malloc ( sizeof(char) * InitSize );
|
jpayne@69
|
203 while ( Read_String (RefFile, R, InitSize, Id, FALSE) )
|
jpayne@69
|
204 if ( strcmp (Id, IdR) == 0 )
|
jpayne@69
|
205 break;
|
jpayne@69
|
206 fclose (RefFile);
|
jpayne@69
|
207 if ( strcmp (Id, IdR) != 0 )
|
jpayne@69
|
208 {
|
jpayne@69
|
209 fprintf(stderr,"ERROR: Could not find %s in the reference file\n", IdR);
|
jpayne@69
|
210 exit (EXIT_FAILURE);
|
jpayne@69
|
211 }
|
jpayne@69
|
212
|
jpayne@69
|
213
|
jpayne@69
|
214 //-- Find, and read in the query sequence
|
jpayne@69
|
215 QryFile = File_Open (QryFileName, "r");
|
jpayne@69
|
216 InitSize = INIT_SIZE;
|
jpayne@69
|
217 Q = (char *) Safe_malloc ( sizeof(char) * InitSize );
|
jpayne@69
|
218 while ( Read_String (QryFile, Q, InitSize, Id, FALSE) )
|
jpayne@69
|
219 if ( strcmp (Id, IdQ) == 0 )
|
jpayne@69
|
220 break;
|
jpayne@69
|
221 fclose (QryFile);
|
jpayne@69
|
222 if ( strcmp (Id, IdQ) != 0 )
|
jpayne@69
|
223 {
|
jpayne@69
|
224 fprintf(stderr,"ERROR: Could not find %s in the query file\n", IdQ);
|
jpayne@69
|
225 exit (EXIT_FAILURE);
|
jpayne@69
|
226 }
|
jpayne@69
|
227
|
jpayne@69
|
228 //-- Sort the alignment regions if user passed -r or -q option
|
jpayne@69
|
229 if ( isSortByReference )
|
jpayne@69
|
230 sort (Aligns.begin( ), Aligns.end( ), sR_Sort( ));
|
jpayne@69
|
231 else if ( isSortByQuery )
|
jpayne@69
|
232 sort (Aligns.begin( ), Aligns.end( ), sQ_Sort( ));
|
jpayne@69
|
233
|
jpayne@69
|
234
|
jpayne@69
|
235 //-- Output the alignments to stdout
|
jpayne@69
|
236 printf("%s %s\n\n", RefFileName, QryFileName);
|
jpayne@69
|
237 for ( i = 0; i < Screen_Width; i ++ ) printf("=");
|
jpayne@69
|
238 printf("\n-- Alignments between %s and %s\n\n", IdR, IdQ);
|
jpayne@69
|
239 printAlignments (Aligns, R, Q);
|
jpayne@69
|
240 printf("\n");
|
jpayne@69
|
241 for ( i = 0; i < Screen_Width; i ++ ) printf("=");
|
jpayne@69
|
242 printf("\n");
|
jpayne@69
|
243
|
jpayne@69
|
244 return EXIT_SUCCESS;
|
jpayne@69
|
245 }
|
jpayne@69
|
246
|
jpayne@69
|
247
|
jpayne@69
|
248
|
jpayne@69
|
249
|
jpayne@69
|
250 long int toFwd
|
jpayne@69
|
251 (long int coord, long int len, int frame)
|
jpayne@69
|
252
|
jpayne@69
|
253 // Switch relative coordinate to reference forward DNA strand
|
jpayne@69
|
254
|
jpayne@69
|
255 {
|
jpayne@69
|
256 long int newc = coord;
|
jpayne@69
|
257
|
jpayne@69
|
258 if ( DATA_TYPE == PROMER_DATA )
|
jpayne@69
|
259 newc = newc * 3 - (3 - labs(frame));
|
jpayne@69
|
260
|
jpayne@69
|
261 if ( frame < 0 )
|
jpayne@69
|
262 return revC ( newc, len );
|
jpayne@69
|
263 else
|
jpayne@69
|
264 return newc;
|
jpayne@69
|
265 }
|
jpayne@69
|
266
|
jpayne@69
|
267
|
jpayne@69
|
268
|
jpayne@69
|
269
|
jpayne@69
|
270 void parseDelta
|
jpayne@69
|
271 (vector<AlignStats> & Aligns, char * IdR, char * IdQ)
|
jpayne@69
|
272
|
jpayne@69
|
273 // Read in the alignments from the desired region
|
jpayne@69
|
274
|
jpayne@69
|
275 {
|
jpayne@69
|
276 AlignStats aStats; // single alignment region
|
jpayne@69
|
277 bool found = false;
|
jpayne@69
|
278
|
jpayne@69
|
279 DeltaReader_t dr;
|
jpayne@69
|
280 dr.open (InputFileName);
|
jpayne@69
|
281 DATA_TYPE = dr.getDataType( ) == NUCMER_STRING ?
|
jpayne@69
|
282 NUCMER_DATA : PROMER_DATA;
|
jpayne@69
|
283 strcpy (RefFileName, dr.getReferencePath( ).c_str( ));
|
jpayne@69
|
284 strcpy (QryFileName, dr.getQueryPath( ).c_str( ));
|
jpayne@69
|
285
|
jpayne@69
|
286 while ( dr.readNext( ) )
|
jpayne@69
|
287 {
|
jpayne@69
|
288 if ( dr.getRecord( ).idR == IdR &&
|
jpayne@69
|
289 dr.getRecord( ).idQ == IdQ )
|
jpayne@69
|
290 {
|
jpayne@69
|
291 found = true;
|
jpayne@69
|
292 break;
|
jpayne@69
|
293 }
|
jpayne@69
|
294 }
|
jpayne@69
|
295 if ( !found )
|
jpayne@69
|
296 {
|
jpayne@69
|
297 fprintf(stderr, "ERROR: Could not find any alignments for %s and %s\n",
|
jpayne@69
|
298 IdR, IdQ);
|
jpayne@69
|
299 exit (EXIT_FAILURE);
|
jpayne@69
|
300 }
|
jpayne@69
|
301
|
jpayne@69
|
302 for ( unsigned int i = 0; i < dr.getRecord( ).aligns.size( ); i ++ )
|
jpayne@69
|
303 {
|
jpayne@69
|
304 aStats.sR = dr.getRecord( ).aligns[i].sR;
|
jpayne@69
|
305 aStats.eR = dr.getRecord( ).aligns[i].eR;
|
jpayne@69
|
306 aStats.sQ = dr.getRecord( ).aligns[i].sQ;
|
jpayne@69
|
307 aStats.eQ = dr.getRecord( ).aligns[i].eQ;
|
jpayne@69
|
308
|
jpayne@69
|
309 aStats.Delta = dr.getRecord( ).aligns[i].deltas;
|
jpayne@69
|
310
|
jpayne@69
|
311 //-- Add the new alignment
|
jpayne@69
|
312 Aligns.push_back (aStats);
|
jpayne@69
|
313 }
|
jpayne@69
|
314 dr.close( );
|
jpayne@69
|
315
|
jpayne@69
|
316 return;
|
jpayne@69
|
317 }
|
jpayne@69
|
318
|
jpayne@69
|
319
|
jpayne@69
|
320
|
jpayne@69
|
321
|
jpayne@69
|
322 void printAlignments
|
jpayne@69
|
323 (vector<AlignStats> Aligns, char * R, char * Q)
|
jpayne@69
|
324
|
jpayne@69
|
325 // Print the alignments to the screen
|
jpayne@69
|
326
|
jpayne@69
|
327 {
|
jpayne@69
|
328 vector<AlignStats>::iterator Ap;
|
jpayne@69
|
329 vector<long int>::iterator Dp;
|
jpayne@69
|
330
|
jpayne@69
|
331 char * A[7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL};
|
jpayne@69
|
332 char * B[7] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL};
|
jpayne@69
|
333 int Ai, Bi, i;
|
jpayne@69
|
334
|
jpayne@69
|
335 char Buff1 [Screen_Width + 1],
|
jpayne@69
|
336 Buff2 [Screen_Width + 1],
|
jpayne@69
|
337 Buff3 [Screen_Width + 1];
|
jpayne@69
|
338
|
jpayne@69
|
339 int Sign;
|
jpayne@69
|
340 long int Delta;
|
jpayne@69
|
341 long int Total, Errors, Remain;
|
jpayne@69
|
342 long int Pos;
|
jpayne@69
|
343
|
jpayne@69
|
344 long int sR, eR, sQ, eQ;
|
jpayne@69
|
345 long int Apos, Bpos;
|
jpayne@69
|
346 long int SeqLenR, SeqLenQ;
|
jpayne@69
|
347 int frameR, frameQ;
|
jpayne@69
|
348
|
jpayne@69
|
349 for ( i = 0; i < LINE_PREFIX_LEN; i ++ )
|
jpayne@69
|
350 Buff3[i] = ' ';
|
jpayne@69
|
351
|
jpayne@69
|
352 SeqLenR = strlen (R + 1);
|
jpayne@69
|
353 SeqLenQ = strlen (Q + 1);
|
jpayne@69
|
354
|
jpayne@69
|
355 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
356 {
|
jpayne@69
|
357 A[1] = R;
|
jpayne@69
|
358 A[4] = (char *) Safe_malloc ( sizeof(char) * (SeqLenR + 2) );
|
jpayne@69
|
359 strcpy ( A[4] + 1, A[1] + 1 );
|
jpayne@69
|
360 A[4][0] = '\0';
|
jpayne@69
|
361 Reverse_Complement ( A[4], 1, SeqLenR );
|
jpayne@69
|
362
|
jpayne@69
|
363 B[1] = Q;
|
jpayne@69
|
364 B[4] = (char *) Safe_malloc ( sizeof(char) * (SeqLenQ + 2) );
|
jpayne@69
|
365 strcpy ( B[4] + 1, B[1] + 1 );
|
jpayne@69
|
366 B[4][0] = '\0';
|
jpayne@69
|
367 Reverse_Complement ( B[4], 1, SeqLenQ );
|
jpayne@69
|
368 }
|
jpayne@69
|
369
|
jpayne@69
|
370 for ( Ap = Aligns.begin( ); Ap < Aligns.end( ); Ap ++ )
|
jpayne@69
|
371 {
|
jpayne@69
|
372 sR = Ap->sR;
|
jpayne@69
|
373 eR = Ap->eR;
|
jpayne@69
|
374 sQ = Ap->sQ;
|
jpayne@69
|
375 eQ = Ap->eQ;
|
jpayne@69
|
376
|
jpayne@69
|
377 //-- Get the coords and frame right
|
jpayne@69
|
378 frameR = 1;
|
jpayne@69
|
379 if ( sR > eR )
|
jpayne@69
|
380 {
|
jpayne@69
|
381 sR = revC (sR, SeqLenR);
|
jpayne@69
|
382 eR = revC (eR, SeqLenR);
|
jpayne@69
|
383 frameR += 3;
|
jpayne@69
|
384 }
|
jpayne@69
|
385 frameQ = 1;
|
jpayne@69
|
386 if ( sQ > eQ )
|
jpayne@69
|
387 {
|
jpayne@69
|
388 sQ = revC (sQ, SeqLenQ);
|
jpayne@69
|
389 eQ = revC (eQ, SeqLenQ);
|
jpayne@69
|
390 frameQ += 3;
|
jpayne@69
|
391 }
|
jpayne@69
|
392
|
jpayne@69
|
393 if ( DATA_TYPE == PROMER_DATA )
|
jpayne@69
|
394 {
|
jpayne@69
|
395 frameR += (sR + 2) % 3;
|
jpayne@69
|
396 frameQ += (sQ + 2) % 3;
|
jpayne@69
|
397
|
jpayne@69
|
398 //-- Translate the coordinates from DNA to Amino Acid
|
jpayne@69
|
399 // remeber that eR and eQ point to the last nucleotide in the codon
|
jpayne@69
|
400 sR = (sR + 2) / 3;
|
jpayne@69
|
401 eR = eR / 3;
|
jpayne@69
|
402 sQ = (sQ + 2) / 3;
|
jpayne@69
|
403 eQ = eQ / 3;
|
jpayne@69
|
404 }
|
jpayne@69
|
405 Ai = frameR;
|
jpayne@69
|
406 Bi = frameQ;
|
jpayne@69
|
407 if ( frameR > 3 )
|
jpayne@69
|
408 frameR = -(frameR - 3);
|
jpayne@69
|
409 if ( frameQ > 3 )
|
jpayne@69
|
410 frameQ = -(frameQ - 3);
|
jpayne@69
|
411
|
jpayne@69
|
412 //-- Get the sequence
|
jpayne@69
|
413 if ( A[Ai] == NULL )
|
jpayne@69
|
414 {
|
jpayne@69
|
415 assert ( DATA_TYPE == PROMER_DATA );
|
jpayne@69
|
416 A[Ai] = (char *) Safe_malloc ( sizeof(char) * ( SeqLenR / 3 + 2 ) );
|
jpayne@69
|
417 A[Ai][0] = '\0';
|
jpayne@69
|
418 Translate_DNA ( R, A[Ai], Ai );
|
jpayne@69
|
419 }
|
jpayne@69
|
420 if ( B[Bi] == NULL )
|
jpayne@69
|
421 {
|
jpayne@69
|
422 assert ( DATA_TYPE == PROMER_DATA );
|
jpayne@69
|
423 B[Bi] = (char *) Safe_malloc ( sizeof(char) * ( SeqLenQ / 3 + 2 ) );
|
jpayne@69
|
424 B[Bi][0] = '\0';
|
jpayne@69
|
425 Translate_DNA ( Q, B[Bi], Bi );
|
jpayne@69
|
426 }
|
jpayne@69
|
427
|
jpayne@69
|
428
|
jpayne@69
|
429 //-- Generate the alignment
|
jpayne@69
|
430 printf("-- BEGIN alignment [ %s%d %ld - %ld | %s%d %ld - %ld ]\n\n\n",
|
jpayne@69
|
431 frameR > 0 ? "+" : "-", abs(frameR), Ap->sR, Ap->eR,
|
jpayne@69
|
432 frameQ > 0 ? "+" : "-", abs(frameQ), Ap->sQ, Ap->eQ);
|
jpayne@69
|
433
|
jpayne@69
|
434 Apos = sR;
|
jpayne@69
|
435 Bpos = sQ;
|
jpayne@69
|
436
|
jpayne@69
|
437 Errors = 0;
|
jpayne@69
|
438 Total = 0;
|
jpayne@69
|
439 Remain = eR - sR + 1;
|
jpayne@69
|
440
|
jpayne@69
|
441 sprintf(Buff1, PREFIX_FORMAT, toFwd (Apos, SeqLenR, frameR));
|
jpayne@69
|
442 sprintf(Buff2, PREFIX_FORMAT, toFwd (Bpos, SeqLenQ, frameQ));
|
jpayne@69
|
443 Pos = LINE_PREFIX_LEN;
|
jpayne@69
|
444
|
jpayne@69
|
445 for ( Dp = Ap->Delta.begin( );
|
jpayne@69
|
446 Dp < Ap->Delta.end( ) &&
|
jpayne@69
|
447 *Dp != 0; Dp ++ )
|
jpayne@69
|
448 {
|
jpayne@69
|
449 Delta = *Dp;
|
jpayne@69
|
450 Sign = Delta > 0 ? 1 : -1;
|
jpayne@69
|
451 Delta = labs ( Delta );
|
jpayne@69
|
452
|
jpayne@69
|
453
|
jpayne@69
|
454 //-- For all the bases before the next indel
|
jpayne@69
|
455 for ( i = 1; i < Delta; i ++ )
|
jpayne@69
|
456 {
|
jpayne@69
|
457 if ( Pos >= Screen_Width )
|
jpayne@69
|
458 {
|
jpayne@69
|
459 Buff1[Pos] = Buff2[Pos] = Buff3[Pos] = '\0';
|
jpayne@69
|
460 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
461 printf("%s\n%s\n%s\n\n", Buff1, Buff2, Buff3);
|
jpayne@69
|
462 else
|
jpayne@69
|
463 printf("%s\n%s\n%s\n\n", Buff1, Buff3, Buff2);
|
jpayne@69
|
464 sprintf(Buff1, PREFIX_FORMAT, toFwd (Apos, SeqLenR, frameR));
|
jpayne@69
|
465 sprintf(Buff2, PREFIX_FORMAT, toFwd (Bpos, SeqLenQ, frameQ));
|
jpayne@69
|
466 Pos = LINE_PREFIX_LEN;
|
jpayne@69
|
467 }
|
jpayne@69
|
468
|
jpayne@69
|
469 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
470 Buff3[Pos] = A[Ai][Apos] == B[Bi][Bpos] ?
|
jpayne@69
|
471 NUCMER_MATCH_CHAR : NUCMER_MISMATCH_CHAR;
|
jpayne@69
|
472 else if ( A[Ai][Apos] == B[Bi][Bpos] )
|
jpayne@69
|
473 Buff3[Pos] = A[Ai][Apos];
|
jpayne@69
|
474 else
|
jpayne@69
|
475 Buff3[Pos] = MATCH_SCORE
|
jpayne@69
|
476 [MATRIX_TYPE]
|
jpayne@69
|
477 [toupper(A[Ai][Apos]) - 'A']
|
jpayne@69
|
478 [toupper(B[Bi][Bpos]) - 'A'] > 0 ?
|
jpayne@69
|
479 PROMER_SIM_CHAR : PROMER_MISMATCH_CHAR;
|
jpayne@69
|
480 Buff1[Pos] = A[Ai][Apos ++];
|
jpayne@69
|
481 Buff2[Pos ++] = B[Bi][Bpos ++];
|
jpayne@69
|
482 }
|
jpayne@69
|
483
|
jpayne@69
|
484
|
jpayne@69
|
485 //-- For the indel
|
jpayne@69
|
486 Remain -= i - 1;
|
jpayne@69
|
487
|
jpayne@69
|
488 if ( Pos >= Screen_Width )
|
jpayne@69
|
489 {
|
jpayne@69
|
490 Buff1[Pos] = Buff2[Pos] = Buff3[Pos] = '\0';
|
jpayne@69
|
491 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
492 printf("%s\n%s\n%s\n\n", Buff1, Buff2, Buff3);
|
jpayne@69
|
493 else
|
jpayne@69
|
494 printf("%s\n%s\n%s\n\n", Buff1, Buff3, Buff2);
|
jpayne@69
|
495 sprintf(Buff1, PREFIX_FORMAT, toFwd (Apos, SeqLenR, frameR));
|
jpayne@69
|
496 sprintf(Buff2, PREFIX_FORMAT, toFwd (Bpos, SeqLenQ, frameQ));
|
jpayne@69
|
497 Pos = LINE_PREFIX_LEN;
|
jpayne@69
|
498 }
|
jpayne@69
|
499
|
jpayne@69
|
500 if ( Sign == 1 )
|
jpayne@69
|
501 {
|
jpayne@69
|
502 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
503 Buff3[Pos] = NUCMER_MISMATCH_CHAR;
|
jpayne@69
|
504 else
|
jpayne@69
|
505 Buff3[Pos] = PROMER_MISMATCH_CHAR;
|
jpayne@69
|
506 Buff1[Pos] = A[Ai][Apos ++];
|
jpayne@69
|
507 Buff2[Pos ++] = '.';
|
jpayne@69
|
508 Remain --;
|
jpayne@69
|
509 }
|
jpayne@69
|
510 else
|
jpayne@69
|
511 {
|
jpayne@69
|
512 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
513 Buff3[Pos] = NUCMER_MISMATCH_CHAR;
|
jpayne@69
|
514 else
|
jpayne@69
|
515 Buff3[Pos] = PROMER_MISMATCH_CHAR;
|
jpayne@69
|
516 Buff1[Pos] = '.';
|
jpayne@69
|
517 Buff2[Pos ++] = B[Bi][Bpos ++];
|
jpayne@69
|
518 Total ++;
|
jpayne@69
|
519 }
|
jpayne@69
|
520 }
|
jpayne@69
|
521
|
jpayne@69
|
522
|
jpayne@69
|
523 //-- For all the bases remaining after the last indel
|
jpayne@69
|
524 for ( i = 0; i < Remain; i ++ )
|
jpayne@69
|
525 {
|
jpayne@69
|
526 if ( Pos >= Screen_Width )
|
jpayne@69
|
527 {
|
jpayne@69
|
528 Buff1[Pos] = Buff2[Pos] = Buff3[Pos] = '\0';
|
jpayne@69
|
529 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
530 printf("%s\n%s\n%s\n\n", Buff1, Buff2, Buff3);
|
jpayne@69
|
531 else
|
jpayne@69
|
532 printf("%s\n%s\n%s\n\n", Buff1, Buff3, Buff2);
|
jpayne@69
|
533 sprintf(Buff1, PREFIX_FORMAT, toFwd (Apos, SeqLenR, frameR));
|
jpayne@69
|
534 sprintf(Buff2, PREFIX_FORMAT, toFwd (Bpos, SeqLenQ, frameQ));
|
jpayne@69
|
535 Pos = LINE_PREFIX_LEN;
|
jpayne@69
|
536 }
|
jpayne@69
|
537
|
jpayne@69
|
538 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
539 Buff3[Pos] = A[Ai][Apos] == B[Bi][Bpos] ?
|
jpayne@69
|
540 NUCMER_MATCH_CHAR : NUCMER_MISMATCH_CHAR;
|
jpayne@69
|
541 else if ( A[Ai][Apos] == B[Bi][Bpos] )
|
jpayne@69
|
542 Buff3[Pos] = A[Ai][Apos];
|
jpayne@69
|
543 else
|
jpayne@69
|
544 Buff3[Pos] = MATCH_SCORE
|
jpayne@69
|
545 [MATRIX_TYPE]
|
jpayne@69
|
546 [toupper(A[Ai][Apos]) - 'A']
|
jpayne@69
|
547 [toupper(B[Bi][Bpos]) - 'A'] > 0 ?
|
jpayne@69
|
548 PROMER_SIM_CHAR : PROMER_MISMATCH_CHAR;
|
jpayne@69
|
549 Buff1[Pos] = A[Ai][Apos ++];
|
jpayne@69
|
550 Buff2[Pos ++] = B[Bi][Bpos ++];
|
jpayne@69
|
551 }
|
jpayne@69
|
552
|
jpayne@69
|
553
|
jpayne@69
|
554 //-- For the remaining buffered output
|
jpayne@69
|
555 if ( Pos > LINE_PREFIX_LEN )
|
jpayne@69
|
556 {
|
jpayne@69
|
557 Buff1[Pos] = Buff2[Pos] = Buff3[Pos] = '\0';
|
jpayne@69
|
558 if ( DATA_TYPE == NUCMER_DATA )
|
jpayne@69
|
559 printf("%s\n%s\n%s\n\n", Buff1, Buff2, Buff3);
|
jpayne@69
|
560 else
|
jpayne@69
|
561 printf("%s\n%s\n%s\n\n", Buff1, Buff3, Buff2);
|
jpayne@69
|
562 sprintf(Buff1, PREFIX_FORMAT, toFwd (Apos, SeqLenR, frameR));
|
jpayne@69
|
563 sprintf(Buff2, PREFIX_FORMAT, toFwd (Bpos, SeqLenQ, frameQ));
|
jpayne@69
|
564 Pos = LINE_PREFIX_LEN;
|
jpayne@69
|
565 }
|
jpayne@69
|
566
|
jpayne@69
|
567 printf("\n-- END alignment [ %s%d %ld - %ld | %s%d %ld - %ld ]\n",
|
jpayne@69
|
568 frameR > 0 ? "+" : "-", abs(frameR), Ap->sR, Ap->eR,
|
jpayne@69
|
569 frameQ > 0 ? "+" : "-", abs(frameQ), Ap->sQ, Ap->eQ);
|
jpayne@69
|
570 }
|
jpayne@69
|
571
|
jpayne@69
|
572 //-- Free the sequences, except for the originals
|
jpayne@69
|
573 for ( i = 0; i < 7; i ++ )
|
jpayne@69
|
574 {
|
jpayne@69
|
575 if ( (DATA_TYPE != NUCMER_DATA || i != 1) && A[i] != NULL )
|
jpayne@69
|
576 free ( A[i] );
|
jpayne@69
|
577 if ( (DATA_TYPE != NUCMER_DATA || i != 1) && B[i] != NULL )
|
jpayne@69
|
578 free ( B[i] );
|
jpayne@69
|
579 }
|
jpayne@69
|
580
|
jpayne@69
|
581 return;
|
jpayne@69
|
582 }
|
jpayne@69
|
583
|
jpayne@69
|
584
|
jpayne@69
|
585
|
jpayne@69
|
586
|
jpayne@69
|
587 void printHelp
|
jpayne@69
|
588 (const char * s)
|
jpayne@69
|
589
|
jpayne@69
|
590 // Display the program's help information to stderr
|
jpayne@69
|
591
|
jpayne@69
|
592 {
|
jpayne@69
|
593 fprintf (stderr,
|
jpayne@69
|
594 "\nUSAGE: %s [options] <deltafile> <ref ID> <qry ID>\n\n", s);
|
jpayne@69
|
595 fprintf (stderr,
|
jpayne@69
|
596 "-h Display help information\n"
|
jpayne@69
|
597 "-q Sort alignments by the query start coordinate\n"
|
jpayne@69
|
598 "-r Sort alignments by the reference start coordinate\n"
|
jpayne@69
|
599 "-w int Set the screen width - default is 60\n"
|
jpayne@69
|
600 "-x int Set the matrix type - default is 2 (BLOSUM 62),\n"
|
jpayne@69
|
601 " other options include 1 (BLOSUM 45) and 3 (BLOSUM 80)\n"
|
jpayne@69
|
602 " note: only has effect on amino acid alignments\n\n");
|
jpayne@69
|
603 fprintf (stderr,
|
jpayne@69
|
604 " Input is the .delta output of either the \"nucmer\" or the\n"
|
jpayne@69
|
605 "\"promer\" program passed on the command line.\n"
|
jpayne@69
|
606 " Output is to stdout, and consists of all the alignments between the\n"
|
jpayne@69
|
607 "query and reference sequences identified on the command line.\n"
|
jpayne@69
|
608 " NOTE: No sorting is done by default, therefore the alignments\n"
|
jpayne@69
|
609 "will be ordered as found in the <deltafile> input.\n\n");
|
jpayne@69
|
610 return;
|
jpayne@69
|
611 }
|
jpayne@69
|
612
|
jpayne@69
|
613
|
jpayne@69
|
614
|
jpayne@69
|
615
|
jpayne@69
|
616 void printUsage
|
jpayne@69
|
617 (const char * s)
|
jpayne@69
|
618
|
jpayne@69
|
619 // Display the program's usage information to stderr.
|
jpayne@69
|
620
|
jpayne@69
|
621 {
|
jpayne@69
|
622 fprintf (stderr,
|
jpayne@69
|
623 "\nUSAGE: %s [options] <deltafile> <ref ID> <qry ID>\n\n", s);
|
jpayne@69
|
624 fprintf (stderr, "Try '%s -h' for more information.\n", s);
|
jpayne@69
|
625 return;
|
jpayne@69
|
626 }
|
jpayne@69
|
627
|
jpayne@69
|
628
|
jpayne@69
|
629
|
jpayne@69
|
630
|
jpayne@69
|
631 long int revC
|
jpayne@69
|
632 (long int coord, long int len)
|
jpayne@69
|
633 {
|
jpayne@69
|
634 return len - coord + 1;
|
jpayne@69
|
635 }
|