comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/show-snps.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 //------------------------------------------------------------------------------
2 // Programmer: Adam M Phillippy, The Institute for Genomic Research
3 // File: show-snps.cc
4 // Date: 12 / 08 / 2004
5 //
6 // Usage: show-snps [options] <deltafile>
7 // Try 'show-snps -h' for more information
8 //
9 // Description: For use in conjunction with the MUMmer package. "show-snps"
10 // displays human readable (and machine parse-able) single
11 // base-pair polymorphisms, including indels from the .delta output
12 // of the "nucmer" program. Outputs SNP positions and relative
13 // information to stdout.
14 //
15 //------------------------------------------------------------------------------
16
17 #include "delta.hh"
18 #include "tigrinc.hh"
19 #include "translate.hh"
20 #include "sw_alignscore.hh"
21 #include <vector>
22 #include <algorithm>
23 #include <string>
24 #include <sstream>
25 #include <cstring>
26 #include <map>
27 #include <set>
28 #include <cstdio>
29 using namespace std;
30
31
32
33
34 //=============================================================== Options ====//
35 string OPT_AlignName; // delta file name
36 string OPT_ReferenceName; // reference sequence file name
37 string OPT_QueryName; // query sequence file name
38
39 bool OPT_SortReference = false; // -r option
40 bool OPT_SortQuery = false; // -q option
41 bool OPT_ShowLength = false; // -l option
42 bool OPT_ShowConflict = true; // -C option
43 bool OPT_ShowIndels = true; // -I option
44 bool OPT_PrintTabular = false; // -T option
45 bool OPT_PrintHeader = true; // -H option
46 bool OPT_SelectAligns = false; // -S option
47
48 int OPT_Context = 0; // -x option
49
50 set<string> OPT_Aligns; // -S option
51
52
53
54
55 //============================================================= Constants ====//
56 const char INDEL_CHAR = '.';
57 const char SEQEND_CHAR = '-';
58
59
60
61 struct SNP_R_Sort
62 {
63 bool operator() (const SNP_t * a, const SNP_t * b)
64 {
65 int i = a->ep->refnode->id->compare (*(b->ep->refnode->id));
66
67 if ( i < 0 )
68 return true;
69 else if ( i > 0 )
70 return false;
71 else
72 {
73 if ( a -> pR < b -> pR )
74 return true;
75 else if ( a -> pR > b -> pR )
76 return false;
77 else
78 {
79 int j = a->ep->qrynode->id->compare (*(b->ep->qrynode->id));
80
81 if ( j < 0 )
82 return true;
83 else if ( j > 0 )
84 return false;
85 else
86 {
87 if ( a -> pQ < b -> pQ )
88 return true;
89 else
90 return false;
91 }
92 }
93 }
94 }
95 };
96
97
98 struct SNP_Q_Sort
99 {
100 bool operator() (const SNP_t * a, const SNP_t * b)
101 {
102 int i = a->ep->qrynode->id->compare (*(b->ep->qrynode->id));
103
104 if ( i < 0 )
105 return true;
106 else if ( i > 0 )
107 return false;
108 else
109 {
110 if ( a -> pQ < b -> pQ )
111 return true;
112 else if ( a -> pQ > b -> pQ )
113 return false;
114 else
115 {
116 int j = a->ep->refnode->id->compare (*(b->ep->refnode->id));
117
118 if ( j < 0 )
119 return true;
120 else if ( j > 0 )
121 return false;
122 else
123 {
124 if ( a -> pR < b -> pR )
125 return true;
126 else
127 return false;
128 }
129 }
130 }
131 }
132 };
133
134
135
136
137 //========================================================== Fuction Decs ====//
138 //------------------------------------------------------------------ RevC ----//
139 inline long RevC (long coord, long len)
140 {
141 return len - coord + 1;
142 }
143
144
145 //------------------------------------------------------------------ Norm ----//
146 inline long Norm (long c, long l, int f, AlignmentType_t d)
147 {
148 long retval = (d == PROMER_DATA ? c * 3 - (3 - abs(f)) : c);
149 if ( f < 0 ) retval = RevC (retval, l);
150 return retval;
151 }
152
153
154 //------------------------------------------------------------------ Swap ----//
155 inline void Swap (long & a, long & b)
156 {
157 long t = a; a = b; b = t;
158 }
159
160
161 //------------------------------------------------------------- CheckSNPs ----//
162 void CheckSNPs (DeltaGraph_t & graph);
163
164
165 //-------------------------------------------------------------- FindSNPs ----//
166 void FindSNPs (DeltaGraph_t & graph);
167
168
169 //------------------------------------------------------------ PrintHuman ----//
170 void PrintHuman (const vector<const SNP_t *> & snps,
171 const DeltaGraph_t & graph);
172
173
174 //---------------------------------------------------------- PrintTabular ----//
175 void PrintTabular (const vector<const SNP_t *> & snps,
176 const DeltaGraph_t & graph);
177
178
179 //---------------------------------------------------------- SelectAligns ----//
180 void SelectAligns ( );
181
182
183 //------------------------------------------------------------- ParseArgs ----//
184 void ParseArgs (int argc, char ** argv);
185
186
187 //------------------------------------------------------------- PrintHelp ----//
188 void PrintHelp (const char * s);
189
190
191 //------------------------------------------------------------ PrintUsage ----//
192 void PrintUsage (const char * s);
193
194
195
196
197 //========================================================= Function Defs ====//
198 int main (int argc, char ** argv)
199 {
200 vector<const SNP_t *> snps;
201 DeltaGraph_t graph;
202
203
204 //-- Command line parsing
205 ParseArgs (argc, argv);
206
207 //-- Select alignments
208 if ( OPT_SelectAligns )
209 SelectAligns ( );
210
211 //-- Build the alignment graph from the delta file
212 graph . build (OPT_AlignName, true);
213
214 //-- Read sequences
215 graph . loadSequences ( );
216
217 //-- Locate the SNPs
218 FindSNPs (graph);
219
220 //-- Check for ambiguous alignment regions
221 CheckSNPs (graph);
222
223
224 //-- Collect and sort the SNPs
225 map<string, DeltaNode_t>::iterator mi;
226 vector<DeltaEdge_t *>::iterator ei;
227 vector<DeltaEdgelet_t *>::iterator li;
228 vector<SNP_t *>::iterator si;
229 for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi )
230 for ( ei = mi->second.edges.begin( ); ei != mi->second.edges.end( ); ++ ei )
231 for ( li = (*ei)->edgelets.begin( ); li != (*ei)->edgelets.end( ); ++ li )
232 for ( si = (*li)->snps.begin( ); si != (*li)->snps.end( ); ++ si )
233 if ( (OPT_ShowConflict ||
234 ((*si)->conR == 0 && (*si)->conQ == 0))
235 &&
236 (OPT_ShowIndels ||
237 ((*si)->cR != INDEL_CHAR && (*si)->cQ != INDEL_CHAR)) )
238 snps . push_back (*si);
239
240 if ( OPT_SortReference )
241 sort (snps . begin( ), snps . end( ), SNP_R_Sort( ));
242 else
243 sort (snps . begin( ), snps . end( ), SNP_Q_Sort( ));
244
245
246 //-- Output data to stdout
247 if ( OPT_PrintTabular )
248 PrintTabular (snps, graph);
249 else
250 PrintHuman (snps, graph);
251
252
253 return EXIT_SUCCESS;
254 }
255
256
257
258 //------------------------------------------------------------- CheckSNPs ----//
259 void CheckSNPs (DeltaGraph_t & graph)
260 {
261 map<string, DeltaNode_t>::const_iterator mi;
262 vector<DeltaEdge_t *>::const_iterator ei;
263 vector<DeltaEdgelet_t *>::iterator eli;
264 vector<SNP_t *>::iterator si;
265 long i;
266
267 //-- For each reference sequence
268 long ref_size = 0;
269 long ref_len = 0;
270 unsigned char * ref_cov = NULL;
271 for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi )
272 {
273 //-- Reset the reference coverage array
274 ref_len = (mi -> second) . len;
275 if ( ref_len > ref_size )
276 {
277 ref_cov = (unsigned char *) Safe_realloc (ref_cov, ref_len + 1);
278 ref_size = ref_len;
279 }
280 for ( i = 1; i <= ref_len; ++ i )
281 ref_cov[i] = 0;
282
283 //-- Add to the reference coverage
284 for ( ei = (mi -> second) . edges . begin( );
285 ei != (mi -> second) . edges . end( ); ++ ei )
286 for ( eli = (*ei) -> edgelets . begin( );
287 eli != (*ei) -> edgelets . end( ); ++ eli )
288 for ( i = (*eli) -> loR; i <= (*eli) -> hiR; i ++ )
289 if ( ref_cov [i] < UCHAR_MAX )
290 ref_cov [i] ++;
291
292 //-- Set the SNP conflict counter
293 for ( ei = (mi -> second) . edges . begin( );
294 ei != (mi -> second) . edges . end( ); ++ ei )
295 for ( eli = (*ei) -> edgelets . begin( );
296 eli != (*ei) -> edgelets . end( ); ++ eli )
297 for ( si = (*eli)->snps.begin( ); si != (*eli)->snps.end( ); ++ si )
298 (*si) -> conR = ref_cov [(*si)->pR] - 1;
299 }
300 free (ref_cov);
301
302
303 //-- For each query sequence
304 long qry_size = 0;
305 long qry_len = 0;
306 unsigned char * qry_cov = NULL;
307 for ( mi = graph.qrynodes.begin( ); mi != graph.qrynodes.end( ); ++ mi )
308 {
309 //-- Reset the query coverage array
310 qry_len = (mi -> second) . len;
311 if ( qry_len > qry_size )
312 {
313 qry_cov = (unsigned char *) Safe_realloc (qry_cov, qry_len + 1);
314 qry_size = qry_len;
315 }
316 for ( i = 1; i <= qry_len; ++ i )
317 qry_cov[i] = 0;
318
319 //-- Add to the query coverage
320 for ( ei = (mi -> second) . edges . begin( );
321 ei != (mi -> second) . edges . end( ); ++ ei )
322 for ( eli = (*ei) -> edgelets . begin( );
323 eli != (*ei) -> edgelets . end( ); ++ eli )
324 for ( i = (*eli) -> loQ; i <= (*eli) -> hiQ; i ++ )
325 if ( qry_cov [i] < UCHAR_MAX )
326 qry_cov [i] ++;
327
328 //-- Set the SNP conflict counter
329 for ( ei = (mi -> second) . edges . begin( );
330 ei != (mi -> second) . edges . end( ); ++ ei )
331 for ( eli = (*ei) -> edgelets . begin( );
332 eli != (*ei) -> edgelets . end( ); ++ eli )
333 for ( si = (*eli)->snps.begin( ); si != (*eli)->snps.end( ); ++ si )
334 (*si) -> conQ = qry_cov [(*si)->pQ] - 1;
335 }
336 free (qry_cov);
337 }
338
339
340
341
342 //-------------------------------------------------------------- FindSNPs ----//
343 void FindSNPs (DeltaGraph_t & graph)
344 {
345 map<string, DeltaNode_t>::iterator mi;
346 vector<DeltaEdge_t *>::iterator ei;
347 vector<DeltaEdgelet_t *>::iterator li;
348 vector<SNP_t *>::iterator si, psi, nsi;
349
350 //-- For each alignment, identify the SNPs
351 for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi )
352 for ( ei = mi->second.edges.begin( ); ei != mi->second.edges.end( ); ++ ei )
353 {
354 SNP_t * snp;
355 int ri, qi;
356 char * R[] = {(*ei)->refnode->seq, NULL, NULL, NULL, NULL, NULL, NULL};
357 char * Q[] = {(*ei)->qrynode->seq, NULL, NULL, NULL, NULL, NULL, NULL};
358
359 long i;
360 long lenR = (*ei) -> refnode -> len;
361 long lenQ = (*ei) -> qrynode -> len;
362
363 for (li = (*ei)->edgelets.begin( ); li != (*ei)->edgelets.end( ); ++ li)
364 {
365 long delta;
366 int frameR, frameQ, sign;
367 long sR, eR, sQ, eQ;
368 long rpos, qpos, remain;
369 long rctx, qctx;
370 long alenR = lenR;
371 long alenQ = lenQ;
372
373 //-- Only do the ones requested by user
374 if ( OPT_SelectAligns )
375 {
376 ostringstream ss;
377 set<string>::iterator si;
378
379 if ( (*li) -> dirR == FORWARD_DIR )
380 ss << (*li) -> loR << ' ' << (*li) -> hiR << ' ';
381 else
382 ss << (*li) -> hiR << ' ' << (*li) -> loR << ' ';
383
384 if ( (*li) -> dirQ == FORWARD_DIR )
385 ss << (*li) -> loQ << ' ' << (*li) -> hiQ << ' ';
386 else
387 ss << (*li) -> hiQ << ' ' << (*li) -> loQ << ' ';
388
389 ss << *((*ei)->refnode->id) << ' ' << *((*ei)->qrynode->id);
390
391 si = OPT_Aligns . find (ss .str( ));
392 if ( si == OPT_Aligns . end( ) )
393 continue;
394 else
395 OPT_Aligns . erase (si);
396 }
397
398 //-- Point the coords the right direction
399 frameR = 1;
400 if ( (*li) -> dirR == REVERSE_DIR )
401 {
402 sR = RevC ((*li) -> hiR, lenR);
403 eR = RevC ((*li) -> loR, lenR);
404 frameR += 3;
405 }
406 else
407 {
408 sR = (*li) -> loR;
409 eR = (*li) -> hiR;
410 }
411
412 frameQ = 1;
413 if ( (*li) -> dirQ == REVERSE_DIR )
414 {
415 sQ = RevC ((*li) -> hiQ, lenQ);
416 eQ = RevC ((*li) -> loQ, lenQ);
417 frameQ += 3;
418 }
419 else
420 {
421 sQ = (*li) -> loQ;
422 eQ = (*li) -> hiQ;
423 }
424
425 //-- Translate coords to AA if necessary
426 if ( graph . datatype == PROMER_DATA )
427 {
428 alenR /= 3;
429 alenQ /= 3;
430
431 frameR += (sR + 2) % 3;
432 frameQ += (sQ + 2) % 3;
433
434 // remeber that eR and eQ point to the last base in the codon
435 sR = (sR + 2) / 3;
436 eR = eR / 3;
437 sQ = (sQ + 2) / 3;
438 eQ = eQ / 3;
439 }
440
441 ri = frameR;
442 qi = frameQ;
443
444 if ( frameR > 3 )
445 frameR = -(frameR - 3);
446 if ( frameQ > 3 )
447 frameQ = -(frameQ - 3);
448
449 //-- Generate the sequences if needed
450 if ( R [ri] == NULL )
451 {
452 if ( graph . datatype == PROMER_DATA )
453 {
454 R [ri] = (char *) Safe_malloc (alenR + 2);
455 R [ri][0] = '\0';
456 Translate_DNA (R [0], R [ri], ri);
457 }
458 else
459 {
460 R [ri] = (char *) Safe_malloc (alenR + 2);
461 R [ri][0] = '\0';
462 strcpy (R [ri] + 1, R [0] + 1);
463 if ( (*li) -> dirR == REVERSE_DIR )
464 Reverse_Complement (R [ri], 1, lenR);
465 }
466 }
467 if ( Q [qi] == NULL )
468 {
469 if ( graph . datatype == PROMER_DATA )
470 {
471 Q [qi] = (char *) Safe_malloc (alenQ + 2);
472 Q [qi][0] = '\0';
473 Translate_DNA (Q [0], Q [qi], qi);
474 }
475 else
476 {
477 Q [qi] = (char *) Safe_malloc (alenQ + 2);
478 Q [qi][0] = '\0';
479 strcpy (Q [qi] + 1, Q [0] + 1);
480 if ( (*li) -> dirQ == REVERSE_DIR )
481 Reverse_Complement (Q [qi], 1, lenQ);
482 }
483 }
484
485 //-- Locate the SNPs
486 rpos = sR;
487 qpos = sQ;
488 remain = eR - sR + 1;
489
490 (*li) -> frmR = frameR;
491 (*li) -> frmQ = frameQ;
492
493 istringstream ss;
494 ss . str ((*li)->delta);
495
496 while ( ss >> delta && delta != 0 )
497 {
498 sign = delta > 0 ? 1 : -1;
499 delta = labs (delta);
500
501 //-- For all SNPs before the next indel
502 for ( i = 1; i < delta; i ++ )
503 if ( R [ri] [rpos ++] != Q [qi] [qpos ++] )
504 {
505 if ( graph.datatype == NUCMER_DATA &&
506 CompareIUPAC (R [ri][rpos-1], Q [qi][qpos-1]) )
507 continue;
508
509 snp = new SNP_t;
510 snp -> ep = *ei;
511 snp -> lp = *li;
512 snp -> pR = Norm (rpos-1, lenR, frameR, graph.datatype);
513 snp -> pQ = Norm (qpos-1, lenQ, frameQ, graph.datatype);
514 snp -> cR = toupper (R [ri] [rpos-1]);
515 snp -> cQ = toupper (Q [qi] [qpos-1]);
516
517 for ( rctx = rpos - OPT_Context - 1;
518 rctx < rpos + OPT_Context; rctx ++ )
519 if ( rctx < 1 || rctx > alenR )
520 snp -> ctxR . push_back (SEQEND_CHAR);
521 else if ( rctx == rpos - 1 )
522 snp -> ctxR . push_back (snp -> cR);
523 else
524 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
525
526 for ( qctx = qpos - OPT_Context - 1;
527 qctx < qpos + OPT_Context; qctx ++ )
528 if ( qctx < 1 || qctx > alenQ )
529 snp -> ctxQ . push_back (SEQEND_CHAR);
530 else if ( qctx == qpos - 1 )
531 snp -> ctxQ . push_back (snp -> cQ);
532 else
533 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
534
535 (*li) -> snps . push_back (snp);
536 }
537
538 //-- For the indel
539 snp = new SNP_t;
540 snp -> ep = *ei;
541 snp -> lp = *li;
542
543 for ( rctx = rpos - OPT_Context; rctx < rpos; rctx ++ )
544 if ( rctx < 1 )
545 snp -> ctxR . push_back (SEQEND_CHAR);
546 else
547 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
548
549 for ( qctx = qpos - OPT_Context; qctx < qpos; qctx ++ )
550 if ( qctx < 1 )
551 snp -> ctxQ . push_back (SEQEND_CHAR);
552 else
553 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
554
555 if ( sign > 0 )
556 {
557 snp -> pR = Norm (rpos, lenR, frameR, graph.datatype);
558 if ( frameQ > 0 )
559 snp -> pQ = Norm (qpos - 1, lenQ, frameQ, graph.datatype);
560 else
561 snp -> pQ = Norm (qpos, lenQ, frameQ, graph.datatype);
562
563 snp -> cR = toupper (R [ri] [rpos ++]);
564 snp -> cQ = INDEL_CHAR;
565
566 remain -= i;
567 rctx ++;
568 }
569 else
570 {
571 snp -> pQ = Norm (qpos, lenQ, frameQ, graph.datatype);
572 if ( frameR > 0 )
573 snp -> pR = Norm (rpos - 1, lenR, frameR, graph.datatype);
574 else
575 snp -> pR = Norm (rpos, lenR, frameR, graph.datatype);
576
577 snp -> cR = INDEL_CHAR;
578 snp -> cQ = toupper (Q [qi] [qpos ++]);
579
580 remain -= i - 1;
581 qctx ++;
582 }
583
584 snp -> ctxR . push_back (snp -> cR);
585 for ( ; rctx < rpos + OPT_Context; rctx ++ )
586 if ( rctx > alenR )
587 snp -> ctxR . push_back (SEQEND_CHAR);
588 else
589 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
590
591 snp -> ctxQ . push_back (snp -> cQ);
592 for ( ; qctx < qpos + OPT_Context; qctx ++ )
593 if ( qctx > alenQ )
594 snp -> ctxQ . push_back (SEQEND_CHAR);
595 else
596 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
597
598 (*li) -> snps . push_back (snp);
599 }
600
601 //-- For all SNPs after the final indel
602 for ( i = 0; i < remain; i ++ )
603 if ( R [ri] [rpos ++] != Q [qi] [qpos ++] )
604 {
605 if ( graph.datatype == NUCMER_DATA &&
606 CompareIUPAC (R [ri][rpos-1], Q [qi][qpos-1]) )
607 continue;
608
609 snp = new SNP_t;
610 snp -> ep = *ei;
611 snp -> lp = *li;
612 snp -> pR = Norm (rpos-1, lenR, frameR, graph.datatype);
613 snp -> pQ = Norm (qpos-1, lenQ, frameQ, graph.datatype);
614 snp -> cR = toupper (R [ri] [rpos-1]);
615 snp -> cQ = toupper (Q [qi] [qpos-1]);
616
617 for ( rctx = rpos - OPT_Context - 1;
618 rctx < rpos + OPT_Context; rctx ++ )
619 if ( rctx < 1 || rctx > alenR )
620 snp -> ctxR . push_back (SEQEND_CHAR);
621 else if ( rctx == rpos - 1 )
622 snp -> ctxR . push_back (snp -> cR);
623 else
624 snp -> ctxR . push_back (toupper (R [ri] [rctx]));
625
626 for ( qctx = qpos - OPT_Context - 1;
627 qctx < qpos + OPT_Context; qctx ++ )
628 if ( qctx < 1 || qctx > alenQ )
629 snp -> ctxQ . push_back (SEQEND_CHAR);
630 else if ( qctx == qpos - 1 )
631 snp -> ctxQ . push_back (snp -> cQ);
632 else
633 snp -> ctxQ . push_back (toupper (Q [qi] [qctx]));
634
635 (*li) -> snps . push_back (snp);
636 }
637
638
639 //-- Sort SNPs and calculate distances
640 if ( OPT_SortReference )
641 {
642 sort ((*li)->snps.begin( ), (*li)->snps.end( ), SNP_R_Sort( ));
643
644 for ( si = (*li)->snps.begin(); si != (*li)->snps.end(); ++ si )
645 {
646 psi = si - 1;
647 nsi = si + 1;
648
649 (*si) -> buff = 1 +
650 ((*si)->pR - (*li)->loR < (*li)->hiR - (*si)->pR ?
651 (*si)->pR - (*li)->loR : (*li)->hiR - (*si)->pR);
652
653 if ( psi >= (*li) -> snps . begin( ) &&
654 (*si)->pR - (*psi)->pR < (*si)->buff )
655 (*si) -> buff = (*si)->pR - (*psi)->pR;
656
657 if ( nsi < (*li) -> snps . end( ) &&
658 (*nsi)->pR - (*si)->pR < (*si)->buff )
659 (*si) -> buff = (*nsi)->pR - (*si)->pR;
660 }
661 }
662 else
663 {
664 sort ((*li)->snps.begin( ), (*li)->snps.end( ), SNP_Q_Sort( ));
665
666 for ( si = (*li)->snps.begin(); si != (*li)->snps.end(); ++ si )
667 {
668 psi = si - 1;
669 nsi = si + 1;
670
671 (*si) -> buff = 1 +
672 ((*si)->pQ - (*li)->loQ < (*li)->hiQ - (*si)->pQ ?
673 (*si)->pQ - (*li)->loQ : (*li)->hiQ - (*si)->pQ);
674
675 if ( psi >= (*li) -> snps . begin( ) &&
676 (*si)->pQ - (*psi)->pQ < (*si)->buff )
677 (*si) -> buff = (*si)->pQ - (*psi)->pQ;
678
679 if ( nsi < (*li) -> snps . end( ) &&
680 (*nsi)->pQ - (*si)->pQ < (*si)->buff )
681 (*si) -> buff = (*nsi)->pQ - (*si)->pQ;
682 }
683 }
684 }
685
686 //-- Clear up the seq
687 for ( i = 1; i <= 6; i ++ )
688 {
689 free (R[i]);
690 free (Q[i]);
691 }
692 }
693
694 if ( OPT_SelectAligns && ! OPT_Aligns . empty( ) )
695 {
696 cerr << "ERROR: One or more alignments from stdin could not be found\n";
697 exit (EXIT_FAILURE);
698 }
699 }
700
701
702
703
704 //------------------------------------------------------------ PrintHuman ----//
705 void PrintHuman (const vector<const SNP_t *> & snps,
706 const DeltaGraph_t & graph)
707 {
708 vector<const SNP_t *>::const_iterator si;
709 long dist, distR, distQ;
710 int ctxw = 2 * OPT_Context + 1;
711 int ctxc = ctxw < 7 ? 7 : ctxw;
712
713 if ( OPT_PrintHeader )
714 {
715 printf ("%s %s\n%s\n\n",
716 graph . refpath . c_str( ), graph . qrypath . c_str( ),
717 graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER");
718 printf ("%8s %5s %-8s | ", "[P1]", "[SUB]", "[P2]");
719 printf ("%8s %8s | ", "[BUFF]", "[DIST]");
720 if ( OPT_ShowConflict )
721 printf ("%4s %4s | ", "[R]", "[Q]");
722 if ( OPT_ShowLength )
723 printf ("%8s %8s | ", "[LEN R]", "[LEN Q]");
724 if ( OPT_Context )
725 {
726 for ( int i = 0; i < ctxc - 7; i ++ )
727 putchar (' ');
728 printf (" [CTX R] ");
729 for ( int i = 0; i < ctxc - 7; i ++ )
730 putchar (' ');
731 printf ("[CTX Q] | ");
732 }
733 printf ("%5s ", "[FRM]");
734 printf ("%s", "[TAGS]");
735 printf ("\n");
736
737 if ( OPT_ShowConflict )
738 printf ("=============");
739 if ( OPT_ShowLength )
740 printf ("=====================");
741 if ( OPT_Context )
742 for ( int i = 0; i < 2 * ctxc + 7; i ++ )
743 putchar ('=');
744 printf("================================="
745 "==================================\n");
746 }
747
748 for ( si = snps . begin( ); si != snps . end( ); ++ si )
749 {
750 distR = (*si)->pR < (signed long)(*si)->ep->refnode->len - (*si)->pR + 1 ?
751 (*si)->pR : (*si)->ep->refnode->len - (*si)->pR + 1;
752 distQ = (*si)->pQ < (signed long)(*si)->ep->qrynode->len - (*si)->pQ + 1 ?
753 (*si)->pQ : (*si)->ep->qrynode->len - (*si)->pQ + 1;
754 dist = distR < distQ ? distR : distQ;
755
756 printf ("%8ld %c %c %-8ld | ",
757 (*si)->pR, (*si)->cR, (*si)->cQ, (*si)->pQ);
758 printf ("%8ld %8ld | ", (*si)->buff, dist);
759 if ( OPT_ShowConflict )
760 printf ("%4d %4d | ", (*si)->conR, (*si)->conQ);
761 if ( OPT_ShowLength )
762 printf ("%8ld %8ld | ",
763 (*si)->ep->refnode->len, (*si)->ep->qrynode->len);
764 if ( OPT_Context )
765 {
766 for ( int i = 0; i < ctxc - ctxw; i ++ )
767 putchar (' ');
768 printf (" %s ", (*si)->ctxR.c_str( ));
769 for ( int i = 0; i < ctxc - ctxw; i ++ )
770 putchar (' ');
771 printf ("%s | ", (*si)->ctxQ.c_str( ));
772 }
773 printf ("%2d %2d ", (*si)->lp->frmR, (*si)->lp->frmQ);
774 printf ("%s\t%s",
775 (*si)->ep->refnode->id->c_str( ),
776 (*si)->ep->qrynode->id->c_str( ));
777 printf ("\n");
778 }
779 }
780
781
782
783
784 //---------------------------------------------------------- PrintTabular ----//
785 void PrintTabular (const vector<const SNP_t *> & snps,
786 const DeltaGraph_t & graph)
787 {
788 vector<const SNP_t *>::const_iterator si;
789 long dist, distR, distQ;
790
791 if ( OPT_PrintHeader )
792 {
793 printf ("%s %s\n%s\n\n",
794 graph . refpath . c_str( ), graph . qrypath . c_str( ),
795 graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER");
796 printf ("%s\t%s\t%s\t%s\t", "[P1]", "[SUB]", "[SUB]", "[P2]");
797 printf ("%s\t%s\t", "[BUFF]", "[DIST]");
798 if ( OPT_ShowConflict )
799 printf ("%s\t%s\t", "[R]", "[Q]");
800 if ( OPT_ShowLength )
801 printf ("%s\t%s\t", "[LEN R]", "[LEN Q]");
802 if ( OPT_Context )
803 printf ("%s\t%s\t", "[CTX R]", "[CTX Q]");
804 printf ("%s\t", "[FRM]");
805 printf ("%s\n", "[TAGS]");
806 }
807
808 for ( si = snps . begin( ); si != snps . end( ); ++ si )
809 {
810 distR = (*si)->pR < (signed long)(*si)->ep->refnode->len - (*si)->pR + 1 ?
811 (*si)->pR : (*si)->ep->refnode->len - (*si)->pR + 1;
812 distQ = (*si)->pQ < (signed long)(*si)->ep->qrynode->len - (*si)->pQ + 1 ?
813 (*si)->pQ : (*si)->ep->qrynode->len - (*si)->pQ + 1;
814 dist = distR < distQ ? distR : distQ;
815
816 printf ("%ld\t%c\t%c\t%ld\t",
817 (*si)->pR, (*si)->cR, (*si)->cQ, (*si)->pQ);
818 printf ("%ld\t%ld\t", (*si)->buff, dist);
819 if ( OPT_ShowConflict )
820 printf ("%d\t%d\t", (*si)->conR, (*si)->conQ);
821 if ( OPT_ShowLength )
822 printf ("%ld\t%ld\t",
823 (*si)->ep->refnode->len, (*si)->ep->qrynode->len);
824 if ( OPT_Context )
825 printf ("%s\t%s\t", (*si)->ctxR.c_str( ), (*si)->ctxQ.c_str( ));
826 printf ("%d\t%d\t", (*si)->lp->frmR, (*si)->lp->frmQ);
827 printf ("%s\t%s",
828 (*si)->ep->refnode->id->c_str( ),
829 (*si)->ep->qrynode->id->c_str( ));
830 printf ("\n");
831 }
832 }
833
834
835
836
837 //---------------------------------------------------------- SelectAligns ----//
838 void SelectAligns ( )
839 {
840 string line, part;
841 string s1, e1, s2, e2, tR, tQ;
842 istringstream sin;
843 ostringstream sout;
844
845 while ( cin )
846 {
847 getline (cin, line);
848 if ( line . empty( ) )
849 continue;
850
851 sin . clear( );
852 sin . str (line);
853 sin >> s1 >> e1 >> s2;
854 if ( s2 == "|" ) sin >> s2;
855 sin >> e2 >> tR >> tQ;
856
857 if ( sin . fail( ) )
858 {
859 cerr << "ERROR: Could not parse input from stdin\n";
860 exit (EXIT_FAILURE);
861 }
862
863 while ( sin >> part )
864 {
865 tR = tQ;
866 tQ = part;
867 }
868
869 sout . clear( );
870 sout . str ("");
871 sout << s1 << ' ' << e1 << ' '
872 << s2 << ' ' << e2 << ' '
873 << tR << ' ' << tQ;
874
875 OPT_Aligns . insert (sout . str( ));
876 }
877 }
878
879
880
881
882 //------------------------------------------------------------- ParseArgs ----//
883 void ParseArgs (int argc, char ** argv)
884 {
885 int ch, errflg = 0;
886 optarg = NULL;
887
888 while ( !errflg &&
889 ((ch = getopt (argc, argv, "ChHIlqrSTx:")) != EOF) )
890 switch (ch)
891 {
892 case 'C':
893 OPT_ShowConflict = false;
894 break;
895
896 case 'h':
897 PrintHelp (argv[0]);
898 exit (EXIT_SUCCESS);
899 break;
900
901 case 'H':
902 OPT_PrintHeader = false;
903 break;
904
905 case 'I':
906 OPT_ShowIndels = false;
907 break;
908
909 case 'l':
910 OPT_ShowLength = true;
911 break;
912
913 case 'q':
914 OPT_SortQuery = true;
915 break;
916
917 case 'r':
918 OPT_SortReference = true;
919 break;
920
921 case 'S':
922 OPT_SelectAligns = true;
923 break;
924
925 case 'T':
926 OPT_PrintTabular = true;
927 break;
928
929 case 'x':
930 OPT_Context = atoi (optarg);
931 break;
932
933 default:
934 errflg ++;
935 }
936
937 if ( OPT_Context < 0 )
938 {
939 cerr << "ERROR: SNP context must be a positive int\n";
940 errflg ++;
941 }
942
943 if ( OPT_SortReference && OPT_SortQuery )
944 cerr << "WARNING: both -r and -q were passed, -q ignored\n";
945
946 if ( !OPT_SortReference && !OPT_SortQuery )
947 OPT_SortReference = true;
948
949 if ( errflg > 0 || optind != argc - 1 )
950 {
951 PrintUsage (argv[0]);
952 cerr << "Try '" << argv[0] << " -h' for more information.\n";
953 exit (EXIT_FAILURE);
954 }
955
956 OPT_AlignName = argv [optind ++];
957 }
958
959
960
961
962 //------------------------------------------------------------- PrintHelp ----//
963 void PrintHelp (const char * s)
964 {
965 PrintUsage (s);
966 cerr
967 << "-C Do not report SNPs from alignments with an ambiguous\n"
968 << " mapping, i.e. only report SNPs where the [R] and [Q]\n"
969 << " columns equal 0 and do not output these columns\n"
970 << "-h Display help information\n"
971 << "-H Do not print the output header\n"
972 << "-I Do not report indels\n"
973 << "-l Include sequence length information in the output\n"
974 << "-q Sort output lines by query IDs and SNP positions\n"
975 << "-r Sort output lines by reference IDs and SNP positions\n"
976 << "-S Specify which alignments to report by passing\n"
977 << " 'show-coords' lines to stdin\n"
978 << "-T Switch to tab-delimited format\n"
979 << "-x int Include x characters of surrounding SNP context in the\n"
980 << " output, default "
981 << OPT_Context << endl
982 << endl;
983
984 cerr
985 << " Input is the .delta output of either the nucmer or promer program\n"
986 << "passed on the command line.\n"
987 << " Output is to stdout, and consists of a list of SNPs (or amino acid\n"
988 << "substitutions for promer) with positions and other useful info.\n"
989 << "Output will be sorted with -r by default and the [BUFF] column will\n"
990 << "always refer to the sequence whose positions have been sorted. This\n"
991 << "value specifies the distance from this SNP to the nearest mismatch\n"
992 << "(end of alignment, indel, SNP, etc) in the same alignment, while the\n"
993 << "[DIST] column specifies the distance from this SNP to the nearest\n"
994 << "sequence end. SNPs for which the [R] and [Q] columns are greater than\n"
995 << "0 should be evaluated with caution, as these columns specify the\n"
996 << "number of other alignments which overlap this position. Use -C to\n"
997 << "assure SNPs are only reported from unique alignment regions.\n"
998 << endl;
999
1000 return;
1001 }
1002
1003
1004
1005
1006 //------------------------------------------------------------ PrintUsage ----//
1007 void PrintUsage (const char * s)
1008 {
1009 cerr
1010 << "\nUSAGE: " << s << " [options] <deltafile>\n\n";
1011 return;
1012 }