jpayne@69
|
1 //------------------------------------------------------------------------------
|
jpayne@69
|
2 // Programmer: Adam M Phillippy, The Institute for Genomic Research
|
jpayne@69
|
3 // File: show-snps.cc
|
jpayne@69
|
4 // Date: 12 / 08 / 2004
|
jpayne@69
|
5 //
|
jpayne@69
|
6 // Usage: show-snps [options] <deltafile>
|
jpayne@69
|
7 // Try 'show-snps -h' for more information
|
jpayne@69
|
8 //
|
jpayne@69
|
9 // Description: For use in conjunction with the MUMmer package. "show-snps"
|
jpayne@69
|
10 // displays human readable (and machine parse-able) single
|
jpayne@69
|
11 // base-pair polymorphisms, including indels from the .delta output
|
jpayne@69
|
12 // of the "nucmer" program. Outputs SNP positions and relative
|
jpayne@69
|
13 // information to stdout.
|
jpayne@69
|
14 //
|
jpayne@69
|
15 //------------------------------------------------------------------------------
|
jpayne@69
|
16
|
jpayne@69
|
17 #include "delta.hh"
|
jpayne@69
|
18 #include "tigrinc.hh"
|
jpayne@69
|
19 #include "translate.hh"
|
jpayne@69
|
20 #include "sw_alignscore.hh"
|
jpayne@69
|
21 #include <vector>
|
jpayne@69
|
22 #include <algorithm>
|
jpayne@69
|
23 #include <string>
|
jpayne@69
|
24 #include <sstream>
|
jpayne@69
|
25 #include <cstring>
|
jpayne@69
|
26 #include <map>
|
jpayne@69
|
27 #include <set>
|
jpayne@69
|
28 #include <cstdio>
|
jpayne@69
|
29 using namespace std;
|
jpayne@69
|
30
|
jpayne@69
|
31
|
jpayne@69
|
32
|
jpayne@69
|
33
|
jpayne@69
|
34 //=============================================================== Options ====//
|
jpayne@69
|
35 string OPT_AlignName; // delta file name
|
jpayne@69
|
36 string OPT_ReferenceName; // reference sequence file name
|
jpayne@69
|
37 string OPT_QueryName; // query sequence file name
|
jpayne@69
|
38
|
jpayne@69
|
39 bool OPT_SortReference = false; // -r option
|
jpayne@69
|
40 bool OPT_SortQuery = false; // -q option
|
jpayne@69
|
41 bool OPT_ShowLength = false; // -l option
|
jpayne@69
|
42 bool OPT_ShowConflict = true; // -C option
|
jpayne@69
|
43 bool OPT_ShowIndels = true; // -I option
|
jpayne@69
|
44 bool OPT_PrintTabular = false; // -T option
|
jpayne@69
|
45 bool OPT_PrintHeader = true; // -H option
|
jpayne@69
|
46 bool OPT_SelectAligns = false; // -S option
|
jpayne@69
|
47
|
jpayne@69
|
48 int OPT_Context = 0; // -x option
|
jpayne@69
|
49
|
jpayne@69
|
50 set<string> OPT_Aligns; // -S option
|
jpayne@69
|
51
|
jpayne@69
|
52
|
jpayne@69
|
53
|
jpayne@69
|
54
|
jpayne@69
|
55 //============================================================= Constants ====//
|
jpayne@69
|
56 const char INDEL_CHAR = '.';
|
jpayne@69
|
57 const char SEQEND_CHAR = '-';
|
jpayne@69
|
58
|
jpayne@69
|
59
|
jpayne@69
|
60
|
jpayne@69
|
61 struct SNP_R_Sort
|
jpayne@69
|
62 {
|
jpayne@69
|
63 bool operator() (const SNP_t * a, const SNP_t * b)
|
jpayne@69
|
64 {
|
jpayne@69
|
65 int i = a->ep->refnode->id->compare (*(b->ep->refnode->id));
|
jpayne@69
|
66
|
jpayne@69
|
67 if ( i < 0 )
|
jpayne@69
|
68 return true;
|
jpayne@69
|
69 else if ( i > 0 )
|
jpayne@69
|
70 return false;
|
jpayne@69
|
71 else
|
jpayne@69
|
72 {
|
jpayne@69
|
73 if ( a -> pR < b -> pR )
|
jpayne@69
|
74 return true;
|
jpayne@69
|
75 else if ( a -> pR > b -> pR )
|
jpayne@69
|
76 return false;
|
jpayne@69
|
77 else
|
jpayne@69
|
78 {
|
jpayne@69
|
79 int j = a->ep->qrynode->id->compare (*(b->ep->qrynode->id));
|
jpayne@69
|
80
|
jpayne@69
|
81 if ( j < 0 )
|
jpayne@69
|
82 return true;
|
jpayne@69
|
83 else if ( j > 0 )
|
jpayne@69
|
84 return false;
|
jpayne@69
|
85 else
|
jpayne@69
|
86 {
|
jpayne@69
|
87 if ( a -> pQ < b -> pQ )
|
jpayne@69
|
88 return true;
|
jpayne@69
|
89 else
|
jpayne@69
|
90 return false;
|
jpayne@69
|
91 }
|
jpayne@69
|
92 }
|
jpayne@69
|
93 }
|
jpayne@69
|
94 }
|
jpayne@69
|
95 };
|
jpayne@69
|
96
|
jpayne@69
|
97
|
jpayne@69
|
98 struct SNP_Q_Sort
|
jpayne@69
|
99 {
|
jpayne@69
|
100 bool operator() (const SNP_t * a, const SNP_t * b)
|
jpayne@69
|
101 {
|
jpayne@69
|
102 int i = a->ep->qrynode->id->compare (*(b->ep->qrynode->id));
|
jpayne@69
|
103
|
jpayne@69
|
104 if ( i < 0 )
|
jpayne@69
|
105 return true;
|
jpayne@69
|
106 else if ( i > 0 )
|
jpayne@69
|
107 return false;
|
jpayne@69
|
108 else
|
jpayne@69
|
109 {
|
jpayne@69
|
110 if ( a -> pQ < b -> pQ )
|
jpayne@69
|
111 return true;
|
jpayne@69
|
112 else if ( a -> pQ > b -> pQ )
|
jpayne@69
|
113 return false;
|
jpayne@69
|
114 else
|
jpayne@69
|
115 {
|
jpayne@69
|
116 int j = a->ep->refnode->id->compare (*(b->ep->refnode->id));
|
jpayne@69
|
117
|
jpayne@69
|
118 if ( j < 0 )
|
jpayne@69
|
119 return true;
|
jpayne@69
|
120 else if ( j > 0 )
|
jpayne@69
|
121 return false;
|
jpayne@69
|
122 else
|
jpayne@69
|
123 {
|
jpayne@69
|
124 if ( a -> pR < b -> pR )
|
jpayne@69
|
125 return true;
|
jpayne@69
|
126 else
|
jpayne@69
|
127 return false;
|
jpayne@69
|
128 }
|
jpayne@69
|
129 }
|
jpayne@69
|
130 }
|
jpayne@69
|
131 }
|
jpayne@69
|
132 };
|
jpayne@69
|
133
|
jpayne@69
|
134
|
jpayne@69
|
135
|
jpayne@69
|
136
|
jpayne@69
|
137 //========================================================== Fuction Decs ====//
|
jpayne@69
|
138 //------------------------------------------------------------------ RevC ----//
|
jpayne@69
|
139 inline long RevC (long coord, long len)
|
jpayne@69
|
140 {
|
jpayne@69
|
141 return len - coord + 1;
|
jpayne@69
|
142 }
|
jpayne@69
|
143
|
jpayne@69
|
144
|
jpayne@69
|
145 //------------------------------------------------------------------ Norm ----//
|
jpayne@69
|
146 inline long Norm (long c, long l, int f, AlignmentType_t d)
|
jpayne@69
|
147 {
|
jpayne@69
|
148 long retval = (d == PROMER_DATA ? c * 3 - (3 - abs(f)) : c);
|
jpayne@69
|
149 if ( f < 0 ) retval = RevC (retval, l);
|
jpayne@69
|
150 return retval;
|
jpayne@69
|
151 }
|
jpayne@69
|
152
|
jpayne@69
|
153
|
jpayne@69
|
154 //------------------------------------------------------------------ Swap ----//
|
jpayne@69
|
155 inline void Swap (long & a, long & b)
|
jpayne@69
|
156 {
|
jpayne@69
|
157 long t = a; a = b; b = t;
|
jpayne@69
|
158 }
|
jpayne@69
|
159
|
jpayne@69
|
160
|
jpayne@69
|
161 //------------------------------------------------------------- CheckSNPs ----//
|
jpayne@69
|
162 void CheckSNPs (DeltaGraph_t & graph);
|
jpayne@69
|
163
|
jpayne@69
|
164
|
jpayne@69
|
165 //-------------------------------------------------------------- FindSNPs ----//
|
jpayne@69
|
166 void FindSNPs (DeltaGraph_t & graph);
|
jpayne@69
|
167
|
jpayne@69
|
168
|
jpayne@69
|
169 //------------------------------------------------------------ PrintHuman ----//
|
jpayne@69
|
170 void PrintHuman (const vector<const SNP_t *> & snps,
|
jpayne@69
|
171 const DeltaGraph_t & graph);
|
jpayne@69
|
172
|
jpayne@69
|
173
|
jpayne@69
|
174 //---------------------------------------------------------- PrintTabular ----//
|
jpayne@69
|
175 void PrintTabular (const vector<const SNP_t *> & snps,
|
jpayne@69
|
176 const DeltaGraph_t & graph);
|
jpayne@69
|
177
|
jpayne@69
|
178
|
jpayne@69
|
179 //---------------------------------------------------------- SelectAligns ----//
|
jpayne@69
|
180 void SelectAligns ( );
|
jpayne@69
|
181
|
jpayne@69
|
182
|
jpayne@69
|
183 //------------------------------------------------------------- ParseArgs ----//
|
jpayne@69
|
184 void ParseArgs (int argc, char ** argv);
|
jpayne@69
|
185
|
jpayne@69
|
186
|
jpayne@69
|
187 //------------------------------------------------------------- PrintHelp ----//
|
jpayne@69
|
188 void PrintHelp (const char * s);
|
jpayne@69
|
189
|
jpayne@69
|
190
|
jpayne@69
|
191 //------------------------------------------------------------ PrintUsage ----//
|
jpayne@69
|
192 void PrintUsage (const char * s);
|
jpayne@69
|
193
|
jpayne@69
|
194
|
jpayne@69
|
195
|
jpayne@69
|
196
|
jpayne@69
|
197 //========================================================= Function Defs ====//
|
jpayne@69
|
198 int main (int argc, char ** argv)
|
jpayne@69
|
199 {
|
jpayne@69
|
200 vector<const SNP_t *> snps;
|
jpayne@69
|
201 DeltaGraph_t graph;
|
jpayne@69
|
202
|
jpayne@69
|
203
|
jpayne@69
|
204 //-- Command line parsing
|
jpayne@69
|
205 ParseArgs (argc, argv);
|
jpayne@69
|
206
|
jpayne@69
|
207 //-- Select alignments
|
jpayne@69
|
208 if ( OPT_SelectAligns )
|
jpayne@69
|
209 SelectAligns ( );
|
jpayne@69
|
210
|
jpayne@69
|
211 //-- Build the alignment graph from the delta file
|
jpayne@69
|
212 graph . build (OPT_AlignName, true);
|
jpayne@69
|
213
|
jpayne@69
|
214 //-- Read sequences
|
jpayne@69
|
215 graph . loadSequences ( );
|
jpayne@69
|
216
|
jpayne@69
|
217 //-- Locate the SNPs
|
jpayne@69
|
218 FindSNPs (graph);
|
jpayne@69
|
219
|
jpayne@69
|
220 //-- Check for ambiguous alignment regions
|
jpayne@69
|
221 CheckSNPs (graph);
|
jpayne@69
|
222
|
jpayne@69
|
223
|
jpayne@69
|
224 //-- Collect and sort the SNPs
|
jpayne@69
|
225 map<string, DeltaNode_t>::iterator mi;
|
jpayne@69
|
226 vector<DeltaEdge_t *>::iterator ei;
|
jpayne@69
|
227 vector<DeltaEdgelet_t *>::iterator li;
|
jpayne@69
|
228 vector<SNP_t *>::iterator si;
|
jpayne@69
|
229 for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi )
|
jpayne@69
|
230 for ( ei = mi->second.edges.begin( ); ei != mi->second.edges.end( ); ++ ei )
|
jpayne@69
|
231 for ( li = (*ei)->edgelets.begin( ); li != (*ei)->edgelets.end( ); ++ li )
|
jpayne@69
|
232 for ( si = (*li)->snps.begin( ); si != (*li)->snps.end( ); ++ si )
|
jpayne@69
|
233 if ( (OPT_ShowConflict ||
|
jpayne@69
|
234 ((*si)->conR == 0 && (*si)->conQ == 0))
|
jpayne@69
|
235 &&
|
jpayne@69
|
236 (OPT_ShowIndels ||
|
jpayne@69
|
237 ((*si)->cR != INDEL_CHAR && (*si)->cQ != INDEL_CHAR)) )
|
jpayne@69
|
238 snps . push_back (*si);
|
jpayne@69
|
239
|
jpayne@69
|
240 if ( OPT_SortReference )
|
jpayne@69
|
241 sort (snps . begin( ), snps . end( ), SNP_R_Sort( ));
|
jpayne@69
|
242 else
|
jpayne@69
|
243 sort (snps . begin( ), snps . end( ), SNP_Q_Sort( ));
|
jpayne@69
|
244
|
jpayne@69
|
245
|
jpayne@69
|
246 //-- Output data to stdout
|
jpayne@69
|
247 if ( OPT_PrintTabular )
|
jpayne@69
|
248 PrintTabular (snps, graph);
|
jpayne@69
|
249 else
|
jpayne@69
|
250 PrintHuman (snps, graph);
|
jpayne@69
|
251
|
jpayne@69
|
252
|
jpayne@69
|
253 return EXIT_SUCCESS;
|
jpayne@69
|
254 }
|
jpayne@69
|
255
|
jpayne@69
|
256
|
jpayne@69
|
257
|
jpayne@69
|
258 //------------------------------------------------------------- CheckSNPs ----//
|
jpayne@69
|
259 void CheckSNPs (DeltaGraph_t & graph)
|
jpayne@69
|
260 {
|
jpayne@69
|
261 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
262 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
263 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
264 vector<SNP_t *>::iterator si;
|
jpayne@69
|
265 long i;
|
jpayne@69
|
266
|
jpayne@69
|
267 //-- For each reference sequence
|
jpayne@69
|
268 long ref_size = 0;
|
jpayne@69
|
269 long ref_len = 0;
|
jpayne@69
|
270 unsigned char * ref_cov = NULL;
|
jpayne@69
|
271 for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi )
|
jpayne@69
|
272 {
|
jpayne@69
|
273 //-- Reset the reference coverage array
|
jpayne@69
|
274 ref_len = (mi -> second) . len;
|
jpayne@69
|
275 if ( ref_len > ref_size )
|
jpayne@69
|
276 {
|
jpayne@69
|
277 ref_cov = (unsigned char *) Safe_realloc (ref_cov, ref_len + 1);
|
jpayne@69
|
278 ref_size = ref_len;
|
jpayne@69
|
279 }
|
jpayne@69
|
280 for ( i = 1; i <= ref_len; ++ i )
|
jpayne@69
|
281 ref_cov[i] = 0;
|
jpayne@69
|
282
|
jpayne@69
|
283 //-- Add to the reference coverage
|
jpayne@69
|
284 for ( ei = (mi -> second) . edges . begin( );
|
jpayne@69
|
285 ei != (mi -> second) . edges . end( ); ++ ei )
|
jpayne@69
|
286 for ( eli = (*ei) -> edgelets . begin( );
|
jpayne@69
|
287 eli != (*ei) -> edgelets . end( ); ++ eli )
|
jpayne@69
|
288 for ( i = (*eli) -> loR; i <= (*eli) -> hiR; i ++ )
|
jpayne@69
|
289 if ( ref_cov [i] < UCHAR_MAX )
|
jpayne@69
|
290 ref_cov [i] ++;
|
jpayne@69
|
291
|
jpayne@69
|
292 //-- Set the SNP conflict counter
|
jpayne@69
|
293 for ( ei = (mi -> second) . edges . begin( );
|
jpayne@69
|
294 ei != (mi -> second) . edges . end( ); ++ ei )
|
jpayne@69
|
295 for ( eli = (*ei) -> edgelets . begin( );
|
jpayne@69
|
296 eli != (*ei) -> edgelets . end( ); ++ eli )
|
jpayne@69
|
297 for ( si = (*eli)->snps.begin( ); si != (*eli)->snps.end( ); ++ si )
|
jpayne@69
|
298 (*si) -> conR = ref_cov [(*si)->pR] - 1;
|
jpayne@69
|
299 }
|
jpayne@69
|
300 free (ref_cov);
|
jpayne@69
|
301
|
jpayne@69
|
302
|
jpayne@69
|
303 //-- For each query sequence
|
jpayne@69
|
304 long qry_size = 0;
|
jpayne@69
|
305 long qry_len = 0;
|
jpayne@69
|
306 unsigned char * qry_cov = NULL;
|
jpayne@69
|
307 for ( mi = graph.qrynodes.begin( ); mi != graph.qrynodes.end( ); ++ mi )
|
jpayne@69
|
308 {
|
jpayne@69
|
309 //-- Reset the query coverage array
|
jpayne@69
|
310 qry_len = (mi -> second) . len;
|
jpayne@69
|
311 if ( qry_len > qry_size )
|
jpayne@69
|
312 {
|
jpayne@69
|
313 qry_cov = (unsigned char *) Safe_realloc (qry_cov, qry_len + 1);
|
jpayne@69
|
314 qry_size = qry_len;
|
jpayne@69
|
315 }
|
jpayne@69
|
316 for ( i = 1; i <= qry_len; ++ i )
|
jpayne@69
|
317 qry_cov[i] = 0;
|
jpayne@69
|
318
|
jpayne@69
|
319 //-- Add to the query coverage
|
jpayne@69
|
320 for ( ei = (mi -> second) . edges . begin( );
|
jpayne@69
|
321 ei != (mi -> second) . edges . end( ); ++ ei )
|
jpayne@69
|
322 for ( eli = (*ei) -> edgelets . begin( );
|
jpayne@69
|
323 eli != (*ei) -> edgelets . end( ); ++ eli )
|
jpayne@69
|
324 for ( i = (*eli) -> loQ; i <= (*eli) -> hiQ; i ++ )
|
jpayne@69
|
325 if ( qry_cov [i] < UCHAR_MAX )
|
jpayne@69
|
326 qry_cov [i] ++;
|
jpayne@69
|
327
|
jpayne@69
|
328 //-- Set the SNP conflict counter
|
jpayne@69
|
329 for ( ei = (mi -> second) . edges . begin( );
|
jpayne@69
|
330 ei != (mi -> second) . edges . end( ); ++ ei )
|
jpayne@69
|
331 for ( eli = (*ei) -> edgelets . begin( );
|
jpayne@69
|
332 eli != (*ei) -> edgelets . end( ); ++ eli )
|
jpayne@69
|
333 for ( si = (*eli)->snps.begin( ); si != (*eli)->snps.end( ); ++ si )
|
jpayne@69
|
334 (*si) -> conQ = qry_cov [(*si)->pQ] - 1;
|
jpayne@69
|
335 }
|
jpayne@69
|
336 free (qry_cov);
|
jpayne@69
|
337 }
|
jpayne@69
|
338
|
jpayne@69
|
339
|
jpayne@69
|
340
|
jpayne@69
|
341
|
jpayne@69
|
342 //-------------------------------------------------------------- FindSNPs ----//
|
jpayne@69
|
343 void FindSNPs (DeltaGraph_t & graph)
|
jpayne@69
|
344 {
|
jpayne@69
|
345 map<string, DeltaNode_t>::iterator mi;
|
jpayne@69
|
346 vector<DeltaEdge_t *>::iterator ei;
|
jpayne@69
|
347 vector<DeltaEdgelet_t *>::iterator li;
|
jpayne@69
|
348 vector<SNP_t *>::iterator si, psi, nsi;
|
jpayne@69
|
349
|
jpayne@69
|
350 //-- For each alignment, identify the SNPs
|
jpayne@69
|
351 for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi )
|
jpayne@69
|
352 for ( ei = mi->second.edges.begin( ); ei != mi->second.edges.end( ); ++ ei )
|
jpayne@69
|
353 {
|
jpayne@69
|
354 SNP_t * snp;
|
jpayne@69
|
355 int ri, qi;
|
jpayne@69
|
356 char * R[] = {(*ei)->refnode->seq, NULL, NULL, NULL, NULL, NULL, NULL};
|
jpayne@69
|
357 char * Q[] = {(*ei)->qrynode->seq, NULL, NULL, NULL, NULL, NULL, NULL};
|
jpayne@69
|
358
|
jpayne@69
|
359 long i;
|
jpayne@69
|
360 long lenR = (*ei) -> refnode -> len;
|
jpayne@69
|
361 long lenQ = (*ei) -> qrynode -> len;
|
jpayne@69
|
362
|
jpayne@69
|
363 for (li = (*ei)->edgelets.begin( ); li != (*ei)->edgelets.end( ); ++ li)
|
jpayne@69
|
364 {
|
jpayne@69
|
365 long delta;
|
jpayne@69
|
366 int frameR, frameQ, sign;
|
jpayne@69
|
367 long sR, eR, sQ, eQ;
|
jpayne@69
|
368 long rpos, qpos, remain;
|
jpayne@69
|
369 long rctx, qctx;
|
jpayne@69
|
370 long alenR = lenR;
|
jpayne@69
|
371 long alenQ = lenQ;
|
jpayne@69
|
372
|
jpayne@69
|
373 //-- Only do the ones requested by user
|
jpayne@69
|
374 if ( OPT_SelectAligns )
|
jpayne@69
|
375 {
|
jpayne@69
|
376 ostringstream ss;
|
jpayne@69
|
377 set<string>::iterator si;
|
jpayne@69
|
378
|
jpayne@69
|
379 if ( (*li) -> dirR == FORWARD_DIR )
|
jpayne@69
|
380 ss << (*li) -> loR << ' ' << (*li) -> hiR << ' ';
|
jpayne@69
|
381 else
|
jpayne@69
|
382 ss << (*li) -> hiR << ' ' << (*li) -> loR << ' ';
|
jpayne@69
|
383
|
jpayne@69
|
384 if ( (*li) -> dirQ == FORWARD_DIR )
|
jpayne@69
|
385 ss << (*li) -> loQ << ' ' << (*li) -> hiQ << ' ';
|
jpayne@69
|
386 else
|
jpayne@69
|
387 ss << (*li) -> hiQ << ' ' << (*li) -> loQ << ' ';
|
jpayne@69
|
388
|
jpayne@69
|
389 ss << *((*ei)->refnode->id) << ' ' << *((*ei)->qrynode->id);
|
jpayne@69
|
390
|
jpayne@69
|
391 si = OPT_Aligns . find (ss .str( ));
|
jpayne@69
|
392 if ( si == OPT_Aligns . end( ) )
|
jpayne@69
|
393 continue;
|
jpayne@69
|
394 else
|
jpayne@69
|
395 OPT_Aligns . erase (si);
|
jpayne@69
|
396 }
|
jpayne@69
|
397
|
jpayne@69
|
398 //-- Point the coords the right direction
|
jpayne@69
|
399 frameR = 1;
|
jpayne@69
|
400 if ( (*li) -> dirR == REVERSE_DIR )
|
jpayne@69
|
401 {
|
jpayne@69
|
402 sR = RevC ((*li) -> hiR, lenR);
|
jpayne@69
|
403 eR = RevC ((*li) -> loR, lenR);
|
jpayne@69
|
404 frameR += 3;
|
jpayne@69
|
405 }
|
jpayne@69
|
406 else
|
jpayne@69
|
407 {
|
jpayne@69
|
408 sR = (*li) -> loR;
|
jpayne@69
|
409 eR = (*li) -> hiR;
|
jpayne@69
|
410 }
|
jpayne@69
|
411
|
jpayne@69
|
412 frameQ = 1;
|
jpayne@69
|
413 if ( (*li) -> dirQ == REVERSE_DIR )
|
jpayne@69
|
414 {
|
jpayne@69
|
415 sQ = RevC ((*li) -> hiQ, lenQ);
|
jpayne@69
|
416 eQ = RevC ((*li) -> loQ, lenQ);
|
jpayne@69
|
417 frameQ += 3;
|
jpayne@69
|
418 }
|
jpayne@69
|
419 else
|
jpayne@69
|
420 {
|
jpayne@69
|
421 sQ = (*li) -> loQ;
|
jpayne@69
|
422 eQ = (*li) -> hiQ;
|
jpayne@69
|
423 }
|
jpayne@69
|
424
|
jpayne@69
|
425 //-- Translate coords to AA if necessary
|
jpayne@69
|
426 if ( graph . datatype == PROMER_DATA )
|
jpayne@69
|
427 {
|
jpayne@69
|
428 alenR /= 3;
|
jpayne@69
|
429 alenQ /= 3;
|
jpayne@69
|
430
|
jpayne@69
|
431 frameR += (sR + 2) % 3;
|
jpayne@69
|
432 frameQ += (sQ + 2) % 3;
|
jpayne@69
|
433
|
jpayne@69
|
434 // remeber that eR and eQ point to the last base in the codon
|
jpayne@69
|
435 sR = (sR + 2) / 3;
|
jpayne@69
|
436 eR = eR / 3;
|
jpayne@69
|
437 sQ = (sQ + 2) / 3;
|
jpayne@69
|
438 eQ = eQ / 3;
|
jpayne@69
|
439 }
|
jpayne@69
|
440
|
jpayne@69
|
441 ri = frameR;
|
jpayne@69
|
442 qi = frameQ;
|
jpayne@69
|
443
|
jpayne@69
|
444 if ( frameR > 3 )
|
jpayne@69
|
445 frameR = -(frameR - 3);
|
jpayne@69
|
446 if ( frameQ > 3 )
|
jpayne@69
|
447 frameQ = -(frameQ - 3);
|
jpayne@69
|
448
|
jpayne@69
|
449 //-- Generate the sequences if needed
|
jpayne@69
|
450 if ( R [ri] == NULL )
|
jpayne@69
|
451 {
|
jpayne@69
|
452 if ( graph . datatype == PROMER_DATA )
|
jpayne@69
|
453 {
|
jpayne@69
|
454 R [ri] = (char *) Safe_malloc (alenR + 2);
|
jpayne@69
|
455 R [ri][0] = '\0';
|
jpayne@69
|
456 Translate_DNA (R [0], R [ri], ri);
|
jpayne@69
|
457 }
|
jpayne@69
|
458 else
|
jpayne@69
|
459 {
|
jpayne@69
|
460 R [ri] = (char *) Safe_malloc (alenR + 2);
|
jpayne@69
|
461 R [ri][0] = '\0';
|
jpayne@69
|
462 strcpy (R [ri] + 1, R [0] + 1);
|
jpayne@69
|
463 if ( (*li) -> dirR == REVERSE_DIR )
|
jpayne@69
|
464 Reverse_Complement (R [ri], 1, lenR);
|
jpayne@69
|
465 }
|
jpayne@69
|
466 }
|
jpayne@69
|
467 if ( Q [qi] == NULL )
|
jpayne@69
|
468 {
|
jpayne@69
|
469 if ( graph . datatype == PROMER_DATA )
|
jpayne@69
|
470 {
|
jpayne@69
|
471 Q [qi] = (char *) Safe_malloc (alenQ + 2);
|
jpayne@69
|
472 Q [qi][0] = '\0';
|
jpayne@69
|
473 Translate_DNA (Q [0], Q [qi], qi);
|
jpayne@69
|
474 }
|
jpayne@69
|
475 else
|
jpayne@69
|
476 {
|
jpayne@69
|
477 Q [qi] = (char *) Safe_malloc (alenQ + 2);
|
jpayne@69
|
478 Q [qi][0] = '\0';
|
jpayne@69
|
479 strcpy (Q [qi] + 1, Q [0] + 1);
|
jpayne@69
|
480 if ( (*li) -> dirQ == REVERSE_DIR )
|
jpayne@69
|
481 Reverse_Complement (Q [qi], 1, lenQ);
|
jpayne@69
|
482 }
|
jpayne@69
|
483 }
|
jpayne@69
|
484
|
jpayne@69
|
485 //-- Locate the SNPs
|
jpayne@69
|
486 rpos = sR;
|
jpayne@69
|
487 qpos = sQ;
|
jpayne@69
|
488 remain = eR - sR + 1;
|
jpayne@69
|
489
|
jpayne@69
|
490 (*li) -> frmR = frameR;
|
jpayne@69
|
491 (*li) -> frmQ = frameQ;
|
jpayne@69
|
492
|
jpayne@69
|
493 istringstream ss;
|
jpayne@69
|
494 ss . str ((*li)->delta);
|
jpayne@69
|
495
|
jpayne@69
|
496 while ( ss >> delta && delta != 0 )
|
jpayne@69
|
497 {
|
jpayne@69
|
498 sign = delta > 0 ? 1 : -1;
|
jpayne@69
|
499 delta = labs (delta);
|
jpayne@69
|
500
|
jpayne@69
|
501 //-- For all SNPs before the next indel
|
jpayne@69
|
502 for ( i = 1; i < delta; i ++ )
|
jpayne@69
|
503 if ( R [ri] [rpos ++] != Q [qi] [qpos ++] )
|
jpayne@69
|
504 {
|
jpayne@69
|
505 if ( graph.datatype == NUCMER_DATA &&
|
jpayne@69
|
506 CompareIUPAC (R [ri][rpos-1], Q [qi][qpos-1]) )
|
jpayne@69
|
507 continue;
|
jpayne@69
|
508
|
jpayne@69
|
509 snp = new SNP_t;
|
jpayne@69
|
510 snp -> ep = *ei;
|
jpayne@69
|
511 snp -> lp = *li;
|
jpayne@69
|
512 snp -> pR = Norm (rpos-1, lenR, frameR, graph.datatype);
|
jpayne@69
|
513 snp -> pQ = Norm (qpos-1, lenQ, frameQ, graph.datatype);
|
jpayne@69
|
514 snp -> cR = toupper (R [ri] [rpos-1]);
|
jpayne@69
|
515 snp -> cQ = toupper (Q [qi] [qpos-1]);
|
jpayne@69
|
516
|
jpayne@69
|
517 for ( rctx = rpos - OPT_Context - 1;
|
jpayne@69
|
518 rctx < rpos + OPT_Context; rctx ++ )
|
jpayne@69
|
519 if ( rctx < 1 || rctx > alenR )
|
jpayne@69
|
520 snp -> ctxR . push_back (SEQEND_CHAR);
|
jpayne@69
|
521 else if ( rctx == rpos - 1 )
|
jpayne@69
|
522 snp -> ctxR . push_back (snp -> cR);
|
jpayne@69
|
523 else
|
jpayne@69
|
524 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
|
jpayne@69
|
525
|
jpayne@69
|
526 for ( qctx = qpos - OPT_Context - 1;
|
jpayne@69
|
527 qctx < qpos + OPT_Context; qctx ++ )
|
jpayne@69
|
528 if ( qctx < 1 || qctx > alenQ )
|
jpayne@69
|
529 snp -> ctxQ . push_back (SEQEND_CHAR);
|
jpayne@69
|
530 else if ( qctx == qpos - 1 )
|
jpayne@69
|
531 snp -> ctxQ . push_back (snp -> cQ);
|
jpayne@69
|
532 else
|
jpayne@69
|
533 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
|
jpayne@69
|
534
|
jpayne@69
|
535 (*li) -> snps . push_back (snp);
|
jpayne@69
|
536 }
|
jpayne@69
|
537
|
jpayne@69
|
538 //-- For the indel
|
jpayne@69
|
539 snp = new SNP_t;
|
jpayne@69
|
540 snp -> ep = *ei;
|
jpayne@69
|
541 snp -> lp = *li;
|
jpayne@69
|
542
|
jpayne@69
|
543 for ( rctx = rpos - OPT_Context; rctx < rpos; rctx ++ )
|
jpayne@69
|
544 if ( rctx < 1 )
|
jpayne@69
|
545 snp -> ctxR . push_back (SEQEND_CHAR);
|
jpayne@69
|
546 else
|
jpayne@69
|
547 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
|
jpayne@69
|
548
|
jpayne@69
|
549 for ( qctx = qpos - OPT_Context; qctx < qpos; qctx ++ )
|
jpayne@69
|
550 if ( qctx < 1 )
|
jpayne@69
|
551 snp -> ctxQ . push_back (SEQEND_CHAR);
|
jpayne@69
|
552 else
|
jpayne@69
|
553 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
|
jpayne@69
|
554
|
jpayne@69
|
555 if ( sign > 0 )
|
jpayne@69
|
556 {
|
jpayne@69
|
557 snp -> pR = Norm (rpos, lenR, frameR, graph.datatype);
|
jpayne@69
|
558 if ( frameQ > 0 )
|
jpayne@69
|
559 snp -> pQ = Norm (qpos - 1, lenQ, frameQ, graph.datatype);
|
jpayne@69
|
560 else
|
jpayne@69
|
561 snp -> pQ = Norm (qpos, lenQ, frameQ, graph.datatype);
|
jpayne@69
|
562
|
jpayne@69
|
563 snp -> cR = toupper (R [ri] [rpos ++]);
|
jpayne@69
|
564 snp -> cQ = INDEL_CHAR;
|
jpayne@69
|
565
|
jpayne@69
|
566 remain -= i;
|
jpayne@69
|
567 rctx ++;
|
jpayne@69
|
568 }
|
jpayne@69
|
569 else
|
jpayne@69
|
570 {
|
jpayne@69
|
571 snp -> pQ = Norm (qpos, lenQ, frameQ, graph.datatype);
|
jpayne@69
|
572 if ( frameR > 0 )
|
jpayne@69
|
573 snp -> pR = Norm (rpos - 1, lenR, frameR, graph.datatype);
|
jpayne@69
|
574 else
|
jpayne@69
|
575 snp -> pR = Norm (rpos, lenR, frameR, graph.datatype);
|
jpayne@69
|
576
|
jpayne@69
|
577 snp -> cR = INDEL_CHAR;
|
jpayne@69
|
578 snp -> cQ = toupper (Q [qi] [qpos ++]);
|
jpayne@69
|
579
|
jpayne@69
|
580 remain -= i - 1;
|
jpayne@69
|
581 qctx ++;
|
jpayne@69
|
582 }
|
jpayne@69
|
583
|
jpayne@69
|
584 snp -> ctxR . push_back (snp -> cR);
|
jpayne@69
|
585 for ( ; rctx < rpos + OPT_Context; rctx ++ )
|
jpayne@69
|
586 if ( rctx > alenR )
|
jpayne@69
|
587 snp -> ctxR . push_back (SEQEND_CHAR);
|
jpayne@69
|
588 else
|
jpayne@69
|
589 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
|
jpayne@69
|
590
|
jpayne@69
|
591 snp -> ctxQ . push_back (snp -> cQ);
|
jpayne@69
|
592 for ( ; qctx < qpos + OPT_Context; qctx ++ )
|
jpayne@69
|
593 if ( qctx > alenQ )
|
jpayne@69
|
594 snp -> ctxQ . push_back (SEQEND_CHAR);
|
jpayne@69
|
595 else
|
jpayne@69
|
596 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
|
jpayne@69
|
597
|
jpayne@69
|
598 (*li) -> snps . push_back (snp);
|
jpayne@69
|
599 }
|
jpayne@69
|
600
|
jpayne@69
|
601 //-- For all SNPs after the final indel
|
jpayne@69
|
602 for ( i = 0; i < remain; i ++ )
|
jpayne@69
|
603 if ( R [ri] [rpos ++] != Q [qi] [qpos ++] )
|
jpayne@69
|
604 {
|
jpayne@69
|
605 if ( graph.datatype == NUCMER_DATA &&
|
jpayne@69
|
606 CompareIUPAC (R [ri][rpos-1], Q [qi][qpos-1]) )
|
jpayne@69
|
607 continue;
|
jpayne@69
|
608
|
jpayne@69
|
609 snp = new SNP_t;
|
jpayne@69
|
610 snp -> ep = *ei;
|
jpayne@69
|
611 snp -> lp = *li;
|
jpayne@69
|
612 snp -> pR = Norm (rpos-1, lenR, frameR, graph.datatype);
|
jpayne@69
|
613 snp -> pQ = Norm (qpos-1, lenQ, frameQ, graph.datatype);
|
jpayne@69
|
614 snp -> cR = toupper (R [ri] [rpos-1]);
|
jpayne@69
|
615 snp -> cQ = toupper (Q [qi] [qpos-1]);
|
jpayne@69
|
616
|
jpayne@69
|
617 for ( rctx = rpos - OPT_Context - 1;
|
jpayne@69
|
618 rctx < rpos + OPT_Context; rctx ++ )
|
jpayne@69
|
619 if ( rctx < 1 || rctx > alenR )
|
jpayne@69
|
620 snp -> ctxR . push_back (SEQEND_CHAR);
|
jpayne@69
|
621 else if ( rctx == rpos - 1 )
|
jpayne@69
|
622 snp -> ctxR . push_back (snp -> cR);
|
jpayne@69
|
623 else
|
jpayne@69
|
624 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
|
jpayne@69
|
625
|
jpayne@69
|
626 for ( qctx = qpos - OPT_Context - 1;
|
jpayne@69
|
627 qctx < qpos + OPT_Context; qctx ++ )
|
jpayne@69
|
628 if ( qctx < 1 || qctx > alenQ )
|
jpayne@69
|
629 snp -> ctxQ . push_back (SEQEND_CHAR);
|
jpayne@69
|
630 else if ( qctx == qpos - 1 )
|
jpayne@69
|
631 snp -> ctxQ . push_back (snp -> cQ);
|
jpayne@69
|
632 else
|
jpayne@69
|
633 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
|
jpayne@69
|
634
|
jpayne@69
|
635 (*li) -> snps . push_back (snp);
|
jpayne@69
|
636 }
|
jpayne@69
|
637
|
jpayne@69
|
638
|
jpayne@69
|
639 //-- Sort SNPs and calculate distances
|
jpayne@69
|
640 if ( OPT_SortReference )
|
jpayne@69
|
641 {
|
jpayne@69
|
642 sort ((*li)->snps.begin( ), (*li)->snps.end( ), SNP_R_Sort( ));
|
jpayne@69
|
643
|
jpayne@69
|
644 for ( si = (*li)->snps.begin(); si != (*li)->snps.end(); ++ si )
|
jpayne@69
|
645 {
|
jpayne@69
|
646 psi = si - 1;
|
jpayne@69
|
647 nsi = si + 1;
|
jpayne@69
|
648
|
jpayne@69
|
649 (*si) -> buff = 1 +
|
jpayne@69
|
650 ((*si)->pR - (*li)->loR < (*li)->hiR - (*si)->pR ?
|
jpayne@69
|
651 (*si)->pR - (*li)->loR : (*li)->hiR - (*si)->pR);
|
jpayne@69
|
652
|
jpayne@69
|
653 if ( psi >= (*li) -> snps . begin( ) &&
|
jpayne@69
|
654 (*si)->pR - (*psi)->pR < (*si)->buff )
|
jpayne@69
|
655 (*si) -> buff = (*si)->pR - (*psi)->pR;
|
jpayne@69
|
656
|
jpayne@69
|
657 if ( nsi < (*li) -> snps . end( ) &&
|
jpayne@69
|
658 (*nsi)->pR - (*si)->pR < (*si)->buff )
|
jpayne@69
|
659 (*si) -> buff = (*nsi)->pR - (*si)->pR;
|
jpayne@69
|
660 }
|
jpayne@69
|
661 }
|
jpayne@69
|
662 else
|
jpayne@69
|
663 {
|
jpayne@69
|
664 sort ((*li)->snps.begin( ), (*li)->snps.end( ), SNP_Q_Sort( ));
|
jpayne@69
|
665
|
jpayne@69
|
666 for ( si = (*li)->snps.begin(); si != (*li)->snps.end(); ++ si )
|
jpayne@69
|
667 {
|
jpayne@69
|
668 psi = si - 1;
|
jpayne@69
|
669 nsi = si + 1;
|
jpayne@69
|
670
|
jpayne@69
|
671 (*si) -> buff = 1 +
|
jpayne@69
|
672 ((*si)->pQ - (*li)->loQ < (*li)->hiQ - (*si)->pQ ?
|
jpayne@69
|
673 (*si)->pQ - (*li)->loQ : (*li)->hiQ - (*si)->pQ);
|
jpayne@69
|
674
|
jpayne@69
|
675 if ( psi >= (*li) -> snps . begin( ) &&
|
jpayne@69
|
676 (*si)->pQ - (*psi)->pQ < (*si)->buff )
|
jpayne@69
|
677 (*si) -> buff = (*si)->pQ - (*psi)->pQ;
|
jpayne@69
|
678
|
jpayne@69
|
679 if ( nsi < (*li) -> snps . end( ) &&
|
jpayne@69
|
680 (*nsi)->pQ - (*si)->pQ < (*si)->buff )
|
jpayne@69
|
681 (*si) -> buff = (*nsi)->pQ - (*si)->pQ;
|
jpayne@69
|
682 }
|
jpayne@69
|
683 }
|
jpayne@69
|
684 }
|
jpayne@69
|
685
|
jpayne@69
|
686 //-- Clear up the seq
|
jpayne@69
|
687 for ( i = 1; i <= 6; i ++ )
|
jpayne@69
|
688 {
|
jpayne@69
|
689 free (R[i]);
|
jpayne@69
|
690 free (Q[i]);
|
jpayne@69
|
691 }
|
jpayne@69
|
692 }
|
jpayne@69
|
693
|
jpayne@69
|
694 if ( OPT_SelectAligns && ! OPT_Aligns . empty( ) )
|
jpayne@69
|
695 {
|
jpayne@69
|
696 cerr << "ERROR: One or more alignments from stdin could not be found\n";
|
jpayne@69
|
697 exit (EXIT_FAILURE);
|
jpayne@69
|
698 }
|
jpayne@69
|
699 }
|
jpayne@69
|
700
|
jpayne@69
|
701
|
jpayne@69
|
702
|
jpayne@69
|
703
|
jpayne@69
|
704 //------------------------------------------------------------ PrintHuman ----//
|
jpayne@69
|
705 void PrintHuman (const vector<const SNP_t *> & snps,
|
jpayne@69
|
706 const DeltaGraph_t & graph)
|
jpayne@69
|
707 {
|
jpayne@69
|
708 vector<const SNP_t *>::const_iterator si;
|
jpayne@69
|
709 long dist, distR, distQ;
|
jpayne@69
|
710 int ctxw = 2 * OPT_Context + 1;
|
jpayne@69
|
711 int ctxc = ctxw < 7 ? 7 : ctxw;
|
jpayne@69
|
712
|
jpayne@69
|
713 if ( OPT_PrintHeader )
|
jpayne@69
|
714 {
|
jpayne@69
|
715 printf ("%s %s\n%s\n\n",
|
jpayne@69
|
716 graph . refpath . c_str( ), graph . qrypath . c_str( ),
|
jpayne@69
|
717 graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER");
|
jpayne@69
|
718 printf ("%8s %5s %-8s | ", "[P1]", "[SUB]", "[P2]");
|
jpayne@69
|
719 printf ("%8s %8s | ", "[BUFF]", "[DIST]");
|
jpayne@69
|
720 if ( OPT_ShowConflict )
|
jpayne@69
|
721 printf ("%4s %4s | ", "[R]", "[Q]");
|
jpayne@69
|
722 if ( OPT_ShowLength )
|
jpayne@69
|
723 printf ("%8s %8s | ", "[LEN R]", "[LEN Q]");
|
jpayne@69
|
724 if ( OPT_Context )
|
jpayne@69
|
725 {
|
jpayne@69
|
726 for ( int i = 0; i < ctxc - 7; i ++ )
|
jpayne@69
|
727 putchar (' ');
|
jpayne@69
|
728 printf (" [CTX R] ");
|
jpayne@69
|
729 for ( int i = 0; i < ctxc - 7; i ++ )
|
jpayne@69
|
730 putchar (' ');
|
jpayne@69
|
731 printf ("[CTX Q] | ");
|
jpayne@69
|
732 }
|
jpayne@69
|
733 printf ("%5s ", "[FRM]");
|
jpayne@69
|
734 printf ("%s", "[TAGS]");
|
jpayne@69
|
735 printf ("\n");
|
jpayne@69
|
736
|
jpayne@69
|
737 if ( OPT_ShowConflict )
|
jpayne@69
|
738 printf ("=============");
|
jpayne@69
|
739 if ( OPT_ShowLength )
|
jpayne@69
|
740 printf ("=====================");
|
jpayne@69
|
741 if ( OPT_Context )
|
jpayne@69
|
742 for ( int i = 0; i < 2 * ctxc + 7; i ++ )
|
jpayne@69
|
743 putchar ('=');
|
jpayne@69
|
744 printf("================================="
|
jpayne@69
|
745 "==================================\n");
|
jpayne@69
|
746 }
|
jpayne@69
|
747
|
jpayne@69
|
748 for ( si = snps . begin( ); si != snps . end( ); ++ si )
|
jpayne@69
|
749 {
|
jpayne@69
|
750 distR = (*si)->pR < (signed long)(*si)->ep->refnode->len - (*si)->pR + 1 ?
|
jpayne@69
|
751 (*si)->pR : (*si)->ep->refnode->len - (*si)->pR + 1;
|
jpayne@69
|
752 distQ = (*si)->pQ < (signed long)(*si)->ep->qrynode->len - (*si)->pQ + 1 ?
|
jpayne@69
|
753 (*si)->pQ : (*si)->ep->qrynode->len - (*si)->pQ + 1;
|
jpayne@69
|
754 dist = distR < distQ ? distR : distQ;
|
jpayne@69
|
755
|
jpayne@69
|
756 printf ("%8ld %c %c %-8ld | ",
|
jpayne@69
|
757 (*si)->pR, (*si)->cR, (*si)->cQ, (*si)->pQ);
|
jpayne@69
|
758 printf ("%8ld %8ld | ", (*si)->buff, dist);
|
jpayne@69
|
759 if ( OPT_ShowConflict )
|
jpayne@69
|
760 printf ("%4d %4d | ", (*si)->conR, (*si)->conQ);
|
jpayne@69
|
761 if ( OPT_ShowLength )
|
jpayne@69
|
762 printf ("%8ld %8ld | ",
|
jpayne@69
|
763 (*si)->ep->refnode->len, (*si)->ep->qrynode->len);
|
jpayne@69
|
764 if ( OPT_Context )
|
jpayne@69
|
765 {
|
jpayne@69
|
766 for ( int i = 0; i < ctxc - ctxw; i ++ )
|
jpayne@69
|
767 putchar (' ');
|
jpayne@69
|
768 printf (" %s ", (*si)->ctxR.c_str( ));
|
jpayne@69
|
769 for ( int i = 0; i < ctxc - ctxw; i ++ )
|
jpayne@69
|
770 putchar (' ');
|
jpayne@69
|
771 printf ("%s | ", (*si)->ctxQ.c_str( ));
|
jpayne@69
|
772 }
|
jpayne@69
|
773 printf ("%2d %2d ", (*si)->lp->frmR, (*si)->lp->frmQ);
|
jpayne@69
|
774 printf ("%s\t%s",
|
jpayne@69
|
775 (*si)->ep->refnode->id->c_str( ),
|
jpayne@69
|
776 (*si)->ep->qrynode->id->c_str( ));
|
jpayne@69
|
777 printf ("\n");
|
jpayne@69
|
778 }
|
jpayne@69
|
779 }
|
jpayne@69
|
780
|
jpayne@69
|
781
|
jpayne@69
|
782
|
jpayne@69
|
783
|
jpayne@69
|
784 //---------------------------------------------------------- PrintTabular ----//
|
jpayne@69
|
785 void PrintTabular (const vector<const SNP_t *> & snps,
|
jpayne@69
|
786 const DeltaGraph_t & graph)
|
jpayne@69
|
787 {
|
jpayne@69
|
788 vector<const SNP_t *>::const_iterator si;
|
jpayne@69
|
789 long dist, distR, distQ;
|
jpayne@69
|
790
|
jpayne@69
|
791 if ( OPT_PrintHeader )
|
jpayne@69
|
792 {
|
jpayne@69
|
793 printf ("%s %s\n%s\n\n",
|
jpayne@69
|
794 graph . refpath . c_str( ), graph . qrypath . c_str( ),
|
jpayne@69
|
795 graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER");
|
jpayne@69
|
796 printf ("%s\t%s\t%s\t%s\t", "[P1]", "[SUB]", "[SUB]", "[P2]");
|
jpayne@69
|
797 printf ("%s\t%s\t", "[BUFF]", "[DIST]");
|
jpayne@69
|
798 if ( OPT_ShowConflict )
|
jpayne@69
|
799 printf ("%s\t%s\t", "[R]", "[Q]");
|
jpayne@69
|
800 if ( OPT_ShowLength )
|
jpayne@69
|
801 printf ("%s\t%s\t", "[LEN R]", "[LEN Q]");
|
jpayne@69
|
802 if ( OPT_Context )
|
jpayne@69
|
803 printf ("%s\t%s\t", "[CTX R]", "[CTX Q]");
|
jpayne@69
|
804 printf ("%s\t", "[FRM]");
|
jpayne@69
|
805 printf ("%s\n", "[TAGS]");
|
jpayne@69
|
806 }
|
jpayne@69
|
807
|
jpayne@69
|
808 for ( si = snps . begin( ); si != snps . end( ); ++ si )
|
jpayne@69
|
809 {
|
jpayne@69
|
810 distR = (*si)->pR < (signed long)(*si)->ep->refnode->len - (*si)->pR + 1 ?
|
jpayne@69
|
811 (*si)->pR : (*si)->ep->refnode->len - (*si)->pR + 1;
|
jpayne@69
|
812 distQ = (*si)->pQ < (signed long)(*si)->ep->qrynode->len - (*si)->pQ + 1 ?
|
jpayne@69
|
813 (*si)->pQ : (*si)->ep->qrynode->len - (*si)->pQ + 1;
|
jpayne@69
|
814 dist = distR < distQ ? distR : distQ;
|
jpayne@69
|
815
|
jpayne@69
|
816 printf ("%ld\t%c\t%c\t%ld\t",
|
jpayne@69
|
817 (*si)->pR, (*si)->cR, (*si)->cQ, (*si)->pQ);
|
jpayne@69
|
818 printf ("%ld\t%ld\t", (*si)->buff, dist);
|
jpayne@69
|
819 if ( OPT_ShowConflict )
|
jpayne@69
|
820 printf ("%d\t%d\t", (*si)->conR, (*si)->conQ);
|
jpayne@69
|
821 if ( OPT_ShowLength )
|
jpayne@69
|
822 printf ("%ld\t%ld\t",
|
jpayne@69
|
823 (*si)->ep->refnode->len, (*si)->ep->qrynode->len);
|
jpayne@69
|
824 if ( OPT_Context )
|
jpayne@69
|
825 printf ("%s\t%s\t", (*si)->ctxR.c_str( ), (*si)->ctxQ.c_str( ));
|
jpayne@69
|
826 printf ("%d\t%d\t", (*si)->lp->frmR, (*si)->lp->frmQ);
|
jpayne@69
|
827 printf ("%s\t%s",
|
jpayne@69
|
828 (*si)->ep->refnode->id->c_str( ),
|
jpayne@69
|
829 (*si)->ep->qrynode->id->c_str( ));
|
jpayne@69
|
830 printf ("\n");
|
jpayne@69
|
831 }
|
jpayne@69
|
832 }
|
jpayne@69
|
833
|
jpayne@69
|
834
|
jpayne@69
|
835
|
jpayne@69
|
836
|
jpayne@69
|
837 //---------------------------------------------------------- SelectAligns ----//
|
jpayne@69
|
838 void SelectAligns ( )
|
jpayne@69
|
839 {
|
jpayne@69
|
840 string line, part;
|
jpayne@69
|
841 string s1, e1, s2, e2, tR, tQ;
|
jpayne@69
|
842 istringstream sin;
|
jpayne@69
|
843 ostringstream sout;
|
jpayne@69
|
844
|
jpayne@69
|
845 while ( cin )
|
jpayne@69
|
846 {
|
jpayne@69
|
847 getline (cin, line);
|
jpayne@69
|
848 if ( line . empty( ) )
|
jpayne@69
|
849 continue;
|
jpayne@69
|
850
|
jpayne@69
|
851 sin . clear( );
|
jpayne@69
|
852 sin . str (line);
|
jpayne@69
|
853 sin >> s1 >> e1 >> s2;
|
jpayne@69
|
854 if ( s2 == "|" ) sin >> s2;
|
jpayne@69
|
855 sin >> e2 >> tR >> tQ;
|
jpayne@69
|
856
|
jpayne@69
|
857 if ( sin . fail( ) )
|
jpayne@69
|
858 {
|
jpayne@69
|
859 cerr << "ERROR: Could not parse input from stdin\n";
|
jpayne@69
|
860 exit (EXIT_FAILURE);
|
jpayne@69
|
861 }
|
jpayne@69
|
862
|
jpayne@69
|
863 while ( sin >> part )
|
jpayne@69
|
864 {
|
jpayne@69
|
865 tR = tQ;
|
jpayne@69
|
866 tQ = part;
|
jpayne@69
|
867 }
|
jpayne@69
|
868
|
jpayne@69
|
869 sout . clear( );
|
jpayne@69
|
870 sout . str ("");
|
jpayne@69
|
871 sout << s1 << ' ' << e1 << ' '
|
jpayne@69
|
872 << s2 << ' ' << e2 << ' '
|
jpayne@69
|
873 << tR << ' ' << tQ;
|
jpayne@69
|
874
|
jpayne@69
|
875 OPT_Aligns . insert (sout . str( ));
|
jpayne@69
|
876 }
|
jpayne@69
|
877 }
|
jpayne@69
|
878
|
jpayne@69
|
879
|
jpayne@69
|
880
|
jpayne@69
|
881
|
jpayne@69
|
882 //------------------------------------------------------------- ParseArgs ----//
|
jpayne@69
|
883 void ParseArgs (int argc, char ** argv)
|
jpayne@69
|
884 {
|
jpayne@69
|
885 int ch, errflg = 0;
|
jpayne@69
|
886 optarg = NULL;
|
jpayne@69
|
887
|
jpayne@69
|
888 while ( !errflg &&
|
jpayne@69
|
889 ((ch = getopt (argc, argv, "ChHIlqrSTx:")) != EOF) )
|
jpayne@69
|
890 switch (ch)
|
jpayne@69
|
891 {
|
jpayne@69
|
892 case 'C':
|
jpayne@69
|
893 OPT_ShowConflict = false;
|
jpayne@69
|
894 break;
|
jpayne@69
|
895
|
jpayne@69
|
896 case 'h':
|
jpayne@69
|
897 PrintHelp (argv[0]);
|
jpayne@69
|
898 exit (EXIT_SUCCESS);
|
jpayne@69
|
899 break;
|
jpayne@69
|
900
|
jpayne@69
|
901 case 'H':
|
jpayne@69
|
902 OPT_PrintHeader = false;
|
jpayne@69
|
903 break;
|
jpayne@69
|
904
|
jpayne@69
|
905 case 'I':
|
jpayne@69
|
906 OPT_ShowIndels = false;
|
jpayne@69
|
907 break;
|
jpayne@69
|
908
|
jpayne@69
|
909 case 'l':
|
jpayne@69
|
910 OPT_ShowLength = true;
|
jpayne@69
|
911 break;
|
jpayne@69
|
912
|
jpayne@69
|
913 case 'q':
|
jpayne@69
|
914 OPT_SortQuery = true;
|
jpayne@69
|
915 break;
|
jpayne@69
|
916
|
jpayne@69
|
917 case 'r':
|
jpayne@69
|
918 OPT_SortReference = true;
|
jpayne@69
|
919 break;
|
jpayne@69
|
920
|
jpayne@69
|
921 case 'S':
|
jpayne@69
|
922 OPT_SelectAligns = true;
|
jpayne@69
|
923 break;
|
jpayne@69
|
924
|
jpayne@69
|
925 case 'T':
|
jpayne@69
|
926 OPT_PrintTabular = true;
|
jpayne@69
|
927 break;
|
jpayne@69
|
928
|
jpayne@69
|
929 case 'x':
|
jpayne@69
|
930 OPT_Context = atoi (optarg);
|
jpayne@69
|
931 break;
|
jpayne@69
|
932
|
jpayne@69
|
933 default:
|
jpayne@69
|
934 errflg ++;
|
jpayne@69
|
935 }
|
jpayne@69
|
936
|
jpayne@69
|
937 if ( OPT_Context < 0 )
|
jpayne@69
|
938 {
|
jpayne@69
|
939 cerr << "ERROR: SNP context must be a positive int\n";
|
jpayne@69
|
940 errflg ++;
|
jpayne@69
|
941 }
|
jpayne@69
|
942
|
jpayne@69
|
943 if ( OPT_SortReference && OPT_SortQuery )
|
jpayne@69
|
944 cerr << "WARNING: both -r and -q were passed, -q ignored\n";
|
jpayne@69
|
945
|
jpayne@69
|
946 if ( !OPT_SortReference && !OPT_SortQuery )
|
jpayne@69
|
947 OPT_SortReference = true;
|
jpayne@69
|
948
|
jpayne@69
|
949 if ( errflg > 0 || optind != argc - 1 )
|
jpayne@69
|
950 {
|
jpayne@69
|
951 PrintUsage (argv[0]);
|
jpayne@69
|
952 cerr << "Try '" << argv[0] << " -h' for more information.\n";
|
jpayne@69
|
953 exit (EXIT_FAILURE);
|
jpayne@69
|
954 }
|
jpayne@69
|
955
|
jpayne@69
|
956 OPT_AlignName = argv [optind ++];
|
jpayne@69
|
957 }
|
jpayne@69
|
958
|
jpayne@69
|
959
|
jpayne@69
|
960
|
jpayne@69
|
961
|
jpayne@69
|
962 //------------------------------------------------------------- PrintHelp ----//
|
jpayne@69
|
963 void PrintHelp (const char * s)
|
jpayne@69
|
964 {
|
jpayne@69
|
965 PrintUsage (s);
|
jpayne@69
|
966 cerr
|
jpayne@69
|
967 << "-C Do not report SNPs from alignments with an ambiguous\n"
|
jpayne@69
|
968 << " mapping, i.e. only report SNPs where the [R] and [Q]\n"
|
jpayne@69
|
969 << " columns equal 0 and do not output these columns\n"
|
jpayne@69
|
970 << "-h Display help information\n"
|
jpayne@69
|
971 << "-H Do not print the output header\n"
|
jpayne@69
|
972 << "-I Do not report indels\n"
|
jpayne@69
|
973 << "-l Include sequence length information in the output\n"
|
jpayne@69
|
974 << "-q Sort output lines by query IDs and SNP positions\n"
|
jpayne@69
|
975 << "-r Sort output lines by reference IDs and SNP positions\n"
|
jpayne@69
|
976 << "-S Specify which alignments to report by passing\n"
|
jpayne@69
|
977 << " 'show-coords' lines to stdin\n"
|
jpayne@69
|
978 << "-T Switch to tab-delimited format\n"
|
jpayne@69
|
979 << "-x int Include x characters of surrounding SNP context in the\n"
|
jpayne@69
|
980 << " output, default "
|
jpayne@69
|
981 << OPT_Context << endl
|
jpayne@69
|
982 << endl;
|
jpayne@69
|
983
|
jpayne@69
|
984 cerr
|
jpayne@69
|
985 << " Input is the .delta output of either the nucmer or promer program\n"
|
jpayne@69
|
986 << "passed on the command line.\n"
|
jpayne@69
|
987 << " Output is to stdout, and consists of a list of SNPs (or amino acid\n"
|
jpayne@69
|
988 << "substitutions for promer) with positions and other useful info.\n"
|
jpayne@69
|
989 << "Output will be sorted with -r by default and the [BUFF] column will\n"
|
jpayne@69
|
990 << "always refer to the sequence whose positions have been sorted. This\n"
|
jpayne@69
|
991 << "value specifies the distance from this SNP to the nearest mismatch\n"
|
jpayne@69
|
992 << "(end of alignment, indel, SNP, etc) in the same alignment, while the\n"
|
jpayne@69
|
993 << "[DIST] column specifies the distance from this SNP to the nearest\n"
|
jpayne@69
|
994 << "sequence end. SNPs for which the [R] and [Q] columns are greater than\n"
|
jpayne@69
|
995 << "0 should be evaluated with caution, as these columns specify the\n"
|
jpayne@69
|
996 << "number of other alignments which overlap this position. Use -C to\n"
|
jpayne@69
|
997 << "assure SNPs are only reported from unique alignment regions.\n"
|
jpayne@69
|
998 << endl;
|
jpayne@69
|
999
|
jpayne@69
|
1000 return;
|
jpayne@69
|
1001 }
|
jpayne@69
|
1002
|
jpayne@69
|
1003
|
jpayne@69
|
1004
|
jpayne@69
|
1005
|
jpayne@69
|
1006 //------------------------------------------------------------ PrintUsage ----//
|
jpayne@69
|
1007 void PrintUsage (const char * s)
|
jpayne@69
|
1008 {
|
jpayne@69
|
1009 cerr
|
jpayne@69
|
1010 << "\nUSAGE: " << s << " [options] <deltafile>\n\n";
|
jpayne@69
|
1011 return;
|
jpayne@69
|
1012 }
|