jpayne@69
|
1 //-----------------------------------------------------------------------------
|
jpayne@69
|
2 // Programmer: Adam M Phillippy, University of Maryland
|
jpayne@69
|
3 // File: show-diff.cc
|
jpayne@69
|
4 // Date: 12 / 01 / 2006
|
jpayne@69
|
5 //
|
jpayne@69
|
6 // Usage: show-diff [options] <deltafile>
|
jpayne@69
|
7 // Try 'show-diff -h' for more information
|
jpayne@69
|
8 //
|
jpayne@69
|
9 //-----------------------------------------------------------------------------
|
jpayne@69
|
10
|
jpayne@69
|
11 #include "delta.hh"
|
jpayne@69
|
12 #include "tigrinc.hh"
|
jpayne@69
|
13 #include <string>
|
jpayne@69
|
14 #include <cstdlib>
|
jpayne@69
|
15 #include <cassert>
|
jpayne@69
|
16 #include <climits>
|
jpayne@69
|
17 #include <algorithm>
|
jpayne@69
|
18 #include <vector>
|
jpayne@69
|
19
|
jpayne@69
|
20 using namespace std;
|
jpayne@69
|
21
|
jpayne@69
|
22
|
jpayne@69
|
23 //================================================================ Options ====
|
jpayne@69
|
24 string OPT_AlignName; // delta file name
|
jpayne@69
|
25 bool OPT_AMOS = false; // AMOS output
|
jpayne@69
|
26 bool OPT_RefDiff = false; // output reference diff
|
jpayne@69
|
27 bool OPT_QryDiff = false; // output query diff
|
jpayne@69
|
28 bool OPT_PrintHeader = true; // -H option
|
jpayne@69
|
29
|
jpayne@69
|
30
|
jpayne@69
|
31 //=========================================================== Declarations ====
|
jpayne@69
|
32 struct EdgeletLoQCmp_t
|
jpayne@69
|
33 //!< Sorts query by lo coord, lo to hi
|
jpayne@69
|
34 {
|
jpayne@69
|
35 bool operator()(const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
|
jpayne@69
|
36 { return ( i->loQ < j->loQ ); }
|
jpayne@69
|
37 };
|
jpayne@69
|
38
|
jpayne@69
|
39 struct EdgeletIdQLoQCmp_t
|
jpayne@69
|
40 //!< Sorts query by ID and lo coord, lo to hi
|
jpayne@69
|
41 {
|
jpayne@69
|
42 bool operator()(const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
|
jpayne@69
|
43 {
|
jpayne@69
|
44 if ( i->edge && j->edge )
|
jpayne@69
|
45 {
|
jpayne@69
|
46 if ( i->edge->qrynode < j->edge->qrynode )
|
jpayne@69
|
47 return true;
|
jpayne@69
|
48 else if ( i->edge->qrynode > j->edge->qrynode )
|
jpayne@69
|
49 return false;
|
jpayne@69
|
50 }
|
jpayne@69
|
51 else if ( !i->edge && j->edge )
|
jpayne@69
|
52 return true;
|
jpayne@69
|
53 else if ( i->edge && !j->edge )
|
jpayne@69
|
54 return false;
|
jpayne@69
|
55 return ( i->loQ < j->loQ );
|
jpayne@69
|
56 }
|
jpayne@69
|
57 };
|
jpayne@69
|
58
|
jpayne@69
|
59 struct EdgeletLoRCmp_t
|
jpayne@69
|
60 //!< Sorts reference by lo coord, lo to hi
|
jpayne@69
|
61 {
|
jpayne@69
|
62 bool operator()(const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
|
jpayne@69
|
63 { return ( i->loR < j->loR ); }
|
jpayne@69
|
64 };
|
jpayne@69
|
65
|
jpayne@69
|
66 struct EdgeletIdRLoRCmp_t
|
jpayne@69
|
67 //!< Sorts reference by ID and lo coord, lo to hi
|
jpayne@69
|
68 {
|
jpayne@69
|
69 bool operator()(const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
|
jpayne@69
|
70 {
|
jpayne@69
|
71 if ( i->edge && j->edge )
|
jpayne@69
|
72 {
|
jpayne@69
|
73 if ( i->edge->refnode < j->edge->refnode )
|
jpayne@69
|
74 return true;
|
jpayne@69
|
75 else if ( i->edge->refnode > j->edge->refnode )
|
jpayne@69
|
76 return false;
|
jpayne@69
|
77 }
|
jpayne@69
|
78 else if ( !i->edge && j->edge )
|
jpayne@69
|
79 return true;
|
jpayne@69
|
80 else if ( i->edge && !j->edge )
|
jpayne@69
|
81 return false;
|
jpayne@69
|
82 return ( i->loR < j->loR );
|
jpayne@69
|
83 }
|
jpayne@69
|
84 };
|
jpayne@69
|
85
|
jpayne@69
|
86
|
jpayne@69
|
87 void PrintDiff(DeltaGraph_t & graph);
|
jpayne@69
|
88 void PrintBrk(const char* seq, long s, long e);
|
jpayne@69
|
89 void PrintSeqJmp(const char* seq,
|
jpayne@69
|
90 const char* seqp, const char* seqn,
|
jpayne@69
|
91 long s, long e);
|
jpayne@69
|
92 void PrintLisJmp(const char* seq, long s, long e);
|
jpayne@69
|
93 void PrintInv(const char* seq, long s, long e);
|
jpayne@69
|
94 void PrintGap(const char* seq, long s, long e, long gap1, long gap2);
|
jpayne@69
|
95 void PrintDup(const char* seq, long s, long e);
|
jpayne@69
|
96 void ParseArgs(int argc, char ** argv);
|
jpayne@69
|
97 void PrintHelp(const char * s);
|
jpayne@69
|
98 void PrintUsage(const char * s);
|
jpayne@69
|
99
|
jpayne@69
|
100
|
jpayne@69
|
101 //============================================================ Definitions ====
|
jpayne@69
|
102 //------------------------------------------------------------------- main ----
|
jpayne@69
|
103 int main(int argc, char **argv)
|
jpayne@69
|
104 {
|
jpayne@69
|
105 DeltaGraph_t graph;
|
jpayne@69
|
106
|
jpayne@69
|
107 ParseArgs(argc, argv);
|
jpayne@69
|
108
|
jpayne@69
|
109 //-- Get M-to-M alignment, i.e. union of QLIS and RLIS
|
jpayne@69
|
110 graph.build(OPT_AlignName, false);
|
jpayne@69
|
111 graph.flagMtoM();
|
jpayne@69
|
112 graph.clean();
|
jpayne@69
|
113
|
jpayne@69
|
114 //-- Output diff
|
jpayne@69
|
115 PrintDiff(graph);
|
jpayne@69
|
116
|
jpayne@69
|
117 return EXIT_SUCCESS;
|
jpayne@69
|
118 }
|
jpayne@69
|
119
|
jpayne@69
|
120
|
jpayne@69
|
121 //-------------------------------------------------------------- PrintDiff ----
|
jpayne@69
|
122 void PrintDiff(DeltaGraph_t & graph)
|
jpayne@69
|
123 {
|
jpayne@69
|
124 if ( OPT_PrintHeader )
|
jpayne@69
|
125 {
|
jpayne@69
|
126 printf ("%s %s\n%s\n\n",
|
jpayne@69
|
127 graph . refpath . c_str( ), graph . qrypath . c_str( ),
|
jpayne@69
|
128 graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER");
|
jpayne@69
|
129 printf ("%s\t%s\t%s\t%s\t%s\n",
|
jpayne@69
|
130 "[SEQ]", "[TYPE]", "[S1]", "[E1]", "[LEN 1]");
|
jpayne@69
|
131 }
|
jpayne@69
|
132
|
jpayne@69
|
133 const char* refid;
|
jpayne@69
|
134 const char* qryid;
|
jpayne@69
|
135 long i,j;
|
jpayne@69
|
136 long nAligns, gapR, gapQ;
|
jpayne@69
|
137 DeltaEdgelet_t lpad, rpad; // padding for the alignment vector
|
jpayne@69
|
138 lpad.isRLIS = rpad.isRLIS = true;
|
jpayne@69
|
139 lpad.isQLIS = rpad.isQLIS = true;
|
jpayne@69
|
140 lpad.loR = lpad.hiR = lpad.loQ = lpad.hiQ = 0;
|
jpayne@69
|
141
|
jpayne@69
|
142 DeltaEdgelet_t *A, *PA, *PGA; // alignment, prev, prev global
|
jpayne@69
|
143 vector<DeltaEdgelet_t *> aligns;
|
jpayne@69
|
144
|
jpayne@69
|
145 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
146 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
147 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
148
|
jpayne@69
|
149 //-- For each reference sequence
|
jpayne@69
|
150 if ( OPT_RefDiff )
|
jpayne@69
|
151 for ( mi = graph.refnodes.begin(); mi != graph.refnodes.end(); ++ mi )
|
jpayne@69
|
152 {
|
jpayne@69
|
153 refid = mi->first.c_str();
|
jpayne@69
|
154
|
jpayne@69
|
155 //-- Collect all alignments for this reference sequence
|
jpayne@69
|
156 aligns.clear();
|
jpayne@69
|
157 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
158 ei != (mi->second).edges.end(); ++ei )
|
jpayne@69
|
159 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
160 eli != (*ei)->edgelets.end(); ++eli )
|
jpayne@69
|
161 aligns.push_back(*eli);
|
jpayne@69
|
162
|
jpayne@69
|
163 //-- Pad the front and back of the alignment vector
|
jpayne@69
|
164 rpad.loR = rpad.hiR = mi->second.len + 1;
|
jpayne@69
|
165 rpad.loQ = rpad.hiQ = LONG_MAX;
|
jpayne@69
|
166 aligns.push_back(&lpad);
|
jpayne@69
|
167 aligns.push_back(&rpad);
|
jpayne@69
|
168
|
jpayne@69
|
169 nAligns = aligns.size();
|
jpayne@69
|
170
|
jpayne@69
|
171 //-- OVERRIDE *stpc* value with loQ QLIS ordering
|
jpayne@69
|
172 sort(aligns.begin(), aligns.end(), EdgeletIdQLoQCmp_t());
|
jpayne@69
|
173 for ( i = 0, j = 0; i != nAligns; ++i )
|
jpayne@69
|
174 aligns[i]->stpc = aligns[i]->isQLIS ? j++ : -1;
|
jpayne@69
|
175
|
jpayne@69
|
176 //-- Sort by reference order
|
jpayne@69
|
177 sort(aligns.begin(), aligns.end(), EdgeletLoRCmp_t());
|
jpayne@69
|
178 assert ( aligns[0] == &lpad && aligns[nAligns-1] == &rpad );
|
jpayne@69
|
179
|
jpayne@69
|
180 //-- Walk reference cover alignments, low to high
|
jpayne@69
|
181 PA = PGA = aligns[0];
|
jpayne@69
|
182 for ( i = 1; i != nAligns; ++i )
|
jpayne@69
|
183 {
|
jpayne@69
|
184 //-- Only interested in reference covering alignments
|
jpayne@69
|
185 if ( !aligns[i]->isRLIS ) continue;
|
jpayne@69
|
186
|
jpayne@69
|
187 A = aligns[i];
|
jpayne@69
|
188 gapR = A->loR - PA->hiR - 1;
|
jpayne@69
|
189
|
jpayne@69
|
190 //-- Reached end of alignments
|
jpayne@69
|
191 if ( A->edge == NULL )
|
jpayne@69
|
192 {
|
jpayne@69
|
193 PrintBrk(refid, PA->hiR+1, A->loR-1);
|
jpayne@69
|
194 }
|
jpayne@69
|
195 //-- 1-to-1 alignment
|
jpayne@69
|
196 else if ( A->isQLIS && A->edge == PGA->edge )
|
jpayne@69
|
197 {
|
jpayne@69
|
198 //-- Jump within Q
|
jpayne@69
|
199 if ( A->slope() != PGA->slope() ||
|
jpayne@69
|
200 A->stpc != PGA->stpc + PGA->slope() )
|
jpayne@69
|
201 {
|
jpayne@69
|
202 if ( A->slope() == PGA->slope() )
|
jpayne@69
|
203 PrintLisJmp(refid, PA->hiR+1, A->loR-1);
|
jpayne@69
|
204 else
|
jpayne@69
|
205 PrintInv(refid, PA->hiR+1, A->loR-1);
|
jpayne@69
|
206 }
|
jpayne@69
|
207 //-- Lined up, nothing in between
|
jpayne@69
|
208 else if ( PA == PGA )
|
jpayne@69
|
209 {
|
jpayne@69
|
210 gapQ = A->isPositive() ?
|
jpayne@69
|
211 A->loQ - PGA->hiQ - 1 :
|
jpayne@69
|
212 PGA->loQ - A->hiQ - 1;
|
jpayne@69
|
213 PrintGap(refid, PA->hiR+1, A->loR-1, gapR, gapQ);
|
jpayne@69
|
214 }
|
jpayne@69
|
215 //-- Lined up, duplication in between
|
jpayne@69
|
216 else
|
jpayne@69
|
217 {
|
jpayne@69
|
218 PrintBrk(refid, PA->hiR+1, A->loR-1);
|
jpayne@69
|
219 }
|
jpayne@69
|
220 }
|
jpayne@69
|
221 //-- Not in QLIS? Must be a duplication in R
|
jpayne@69
|
222 else if ( !A->isQLIS )
|
jpayne@69
|
223 {
|
jpayne@69
|
224 PrintBrk(refid, PA->hiR+1, A->loR-1);
|
jpayne@69
|
225 PrintDup(refid, A->loR, A->hiR);
|
jpayne@69
|
226 }
|
jpayne@69
|
227 //-- A->edge != PGA->edge? Jump to different query sequence
|
jpayne@69
|
228 else if ( PGA->edge != NULL )
|
jpayne@69
|
229 {
|
jpayne@69
|
230 PrintSeqJmp(refid,
|
jpayne@69
|
231 PGA->edge->qrynode->id->c_str(),
|
jpayne@69
|
232 A->edge->qrynode->id->c_str(),
|
jpayne@69
|
233 PA->hiR+1, A->loR-1);
|
jpayne@69
|
234 }
|
jpayne@69
|
235 //-- Gap before first alignment
|
jpayne@69
|
236 else
|
jpayne@69
|
237 {
|
jpayne@69
|
238 PrintBrk(refid, PA->hiR+1, A->loR-1);
|
jpayne@69
|
239 }
|
jpayne@69
|
240
|
jpayne@69
|
241 if ( A->isQLIS )
|
jpayne@69
|
242 PGA = A;
|
jpayne@69
|
243 PA = A;
|
jpayne@69
|
244 }
|
jpayne@69
|
245 }
|
jpayne@69
|
246
|
jpayne@69
|
247
|
jpayne@69
|
248 //---------- WARNING! Same code as above but Q's for R's and R's for Q's
|
jpayne@69
|
249 if ( OPT_QryDiff )
|
jpayne@69
|
250 for ( mi = graph.qrynodes.begin(); mi != graph.qrynodes.end(); ++ mi )
|
jpayne@69
|
251 {
|
jpayne@69
|
252 qryid = mi->first.c_str();
|
jpayne@69
|
253
|
jpayne@69
|
254 aligns.clear();
|
jpayne@69
|
255 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
256 ei != (mi->second).edges.end(); ++ei )
|
jpayne@69
|
257 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
258 eli != (*ei)->edgelets.end(); ++eli )
|
jpayne@69
|
259 aligns.push_back(*eli);
|
jpayne@69
|
260
|
jpayne@69
|
261 rpad.loQ = rpad.hiQ = mi->second.len + 1;
|
jpayne@69
|
262 rpad.loR = rpad.hiR = LONG_MAX;
|
jpayne@69
|
263 aligns.push_back(&lpad);
|
jpayne@69
|
264 aligns.push_back(&rpad);
|
jpayne@69
|
265
|
jpayne@69
|
266 nAligns = aligns.size();
|
jpayne@69
|
267
|
jpayne@69
|
268 sort(aligns.begin(), aligns.end(), EdgeletIdRLoRCmp_t());
|
jpayne@69
|
269 for ( i = 0, j = 0; i != nAligns; ++i )
|
jpayne@69
|
270 aligns[i]->stpc = aligns[i]->isRLIS ? j++ : -1;
|
jpayne@69
|
271
|
jpayne@69
|
272 sort(aligns.begin(), aligns.end(), EdgeletLoQCmp_t());
|
jpayne@69
|
273 assert ( aligns[0] == &lpad && aligns[nAligns-1] == &rpad );
|
jpayne@69
|
274
|
jpayne@69
|
275 PA = PGA = aligns[0];
|
jpayne@69
|
276 for ( i = 1; i != nAligns; ++i )
|
jpayne@69
|
277 {
|
jpayne@69
|
278 if ( !aligns[i]->isQLIS ) continue;
|
jpayne@69
|
279
|
jpayne@69
|
280 A = aligns[i];
|
jpayne@69
|
281 gapQ = A->loQ - PA->hiQ - 1;
|
jpayne@69
|
282
|
jpayne@69
|
283 if ( A->edge == NULL )
|
jpayne@69
|
284 {
|
jpayne@69
|
285 PrintBrk(qryid, PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
286 }
|
jpayne@69
|
287 else if ( A->isRLIS && A->edge == PGA->edge )
|
jpayne@69
|
288 {
|
jpayne@69
|
289 if ( A->slope() != PGA->slope() ||
|
jpayne@69
|
290 A->stpc != PGA->stpc + PGA->slope() )
|
jpayne@69
|
291 {
|
jpayne@69
|
292 if ( A->slope() == PGA->slope() )
|
jpayne@69
|
293 PrintLisJmp(qryid, PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
294 else
|
jpayne@69
|
295 PrintInv(qryid, PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
296 }
|
jpayne@69
|
297 else if ( PA == PGA )
|
jpayne@69
|
298 {
|
jpayne@69
|
299 gapR = A->isPositive() ?
|
jpayne@69
|
300 A->loR - PGA->hiR - 1 :
|
jpayne@69
|
301 PGA->loR - A->hiR - 1;
|
jpayne@69
|
302 PrintGap(qryid, PA->hiQ+1, A->loQ-1, gapQ, gapR);
|
jpayne@69
|
303 }
|
jpayne@69
|
304 else
|
jpayne@69
|
305 {
|
jpayne@69
|
306 PrintBrk(qryid, PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
307 }
|
jpayne@69
|
308 }
|
jpayne@69
|
309 else if ( !A->isRLIS )
|
jpayne@69
|
310 {
|
jpayne@69
|
311 PrintBrk(qryid, PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
312 PrintDup(qryid, A->loQ, A->hiQ);
|
jpayne@69
|
313 }
|
jpayne@69
|
314 else if ( PGA->edge != NULL )
|
jpayne@69
|
315 {
|
jpayne@69
|
316 PrintSeqJmp(qryid,
|
jpayne@69
|
317 PA->edge->refnode->id->c_str(),
|
jpayne@69
|
318 A->edge->refnode->id->c_str(),
|
jpayne@69
|
319 PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
320 }
|
jpayne@69
|
321 else
|
jpayne@69
|
322 {
|
jpayne@69
|
323 PrintBrk(qryid, PA->hiQ+1, A->loQ-1);
|
jpayne@69
|
324 }
|
jpayne@69
|
325
|
jpayne@69
|
326 if ( A->isRLIS )
|
jpayne@69
|
327 PGA = A;
|
jpayne@69
|
328 PA = A;
|
jpayne@69
|
329 }
|
jpayne@69
|
330 }
|
jpayne@69
|
331 }
|
jpayne@69
|
332
|
jpayne@69
|
333
|
jpayne@69
|
334 void PrintBrk(const char* seq, long s, long e)
|
jpayne@69
|
335 {
|
jpayne@69
|
336 if ( e-s+1 <= 0 ) return;
|
jpayne@69
|
337
|
jpayne@69
|
338 if ( !OPT_AMOS )
|
jpayne@69
|
339 printf("%s\tBRK\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
340 seq, s, e, e-s+1);
|
jpayne@69
|
341 else
|
jpayne@69
|
342 printf("%s\tA\t%ld\t%ld\tBRK\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
343 seq, s, e, s, e, e-s+1);
|
jpayne@69
|
344 }
|
jpayne@69
|
345
|
jpayne@69
|
346 void PrintSeqJmp(const char* seq,
|
jpayne@69
|
347 const char* seqp,
|
jpayne@69
|
348 const char* seqn,
|
jpayne@69
|
349 long s, long e)
|
jpayne@69
|
350 {
|
jpayne@69
|
351 if ( !OPT_AMOS )
|
jpayne@69
|
352 printf("%s\tSEQ\t%ld\t%ld\t%ld\t%s\t%s\n",
|
jpayne@69
|
353 seq, s, e, e-s+1, seqp, seqn);
|
jpayne@69
|
354 else
|
jpayne@69
|
355 printf("%s\tA\t%ld\t%ld\tSEQ\t%ld\t%ld\t%ld\t%s\t%s\n",
|
jpayne@69
|
356 seq, s, e, s, e, e-s+1, seqp, seqn);
|
jpayne@69
|
357 }
|
jpayne@69
|
358
|
jpayne@69
|
359 void PrintLisJmp(const char* seq, long s, long e)
|
jpayne@69
|
360 {
|
jpayne@69
|
361 if ( !OPT_AMOS )
|
jpayne@69
|
362 printf("%s\tJMP\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
363 seq, s, e, e-s+1);
|
jpayne@69
|
364 else
|
jpayne@69
|
365 printf("%s\tA\t%ld\t%ld\tJMP\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
366 seq, s, e, s, e, e-s+1);
|
jpayne@69
|
367 }
|
jpayne@69
|
368
|
jpayne@69
|
369 void PrintInv(const char* seq, long s, long e)
|
jpayne@69
|
370 {
|
jpayne@69
|
371 if ( !OPT_AMOS )
|
jpayne@69
|
372 printf("%s\tINV\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
373 seq, s, e, e-s+1);
|
jpayne@69
|
374 else
|
jpayne@69
|
375 printf("%s\tA\t%ld\t%ld\tINV\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
376 seq, s, e, s, e, e-s+1);
|
jpayne@69
|
377 }
|
jpayne@69
|
378
|
jpayne@69
|
379 void PrintGap(const char* seq, long s, long e, long gap1, long gap2)
|
jpayne@69
|
380 {
|
jpayne@69
|
381 if ( !OPT_AMOS )
|
jpayne@69
|
382 printf("%s\tGAP\t%ld\t%ld\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
383 seq, s, e, gap1, gap2, gap1-gap2);
|
jpayne@69
|
384 else
|
jpayne@69
|
385 printf("%s\tA\t%ld\t%ld\tGAP\t%ld\t%ld\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
386 seq, s, e, s, e, gap1, gap2, gap1-gap2);
|
jpayne@69
|
387 }
|
jpayne@69
|
388
|
jpayne@69
|
389 void PrintDup(const char* seq, long s, long e)
|
jpayne@69
|
390 {
|
jpayne@69
|
391 if ( !OPT_AMOS )
|
jpayne@69
|
392 printf("%s\tDUP\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
393 seq, s, e, e-s+1);
|
jpayne@69
|
394 else
|
jpayne@69
|
395 printf("%s\tA\t%ld\t%ld\tDUP\t%ld\t%ld\t%ld\n",
|
jpayne@69
|
396 seq, s, e, s, e, e-s+1);
|
jpayne@69
|
397 }
|
jpayne@69
|
398
|
jpayne@69
|
399
|
jpayne@69
|
400 //-------------------------------------------------------------- ParseArgs ----
|
jpayne@69
|
401 void ParseArgs (int argc, char ** argv)
|
jpayne@69
|
402 {
|
jpayne@69
|
403 int ch, errflg = 0;
|
jpayne@69
|
404 optarg = NULL;
|
jpayne@69
|
405
|
jpayne@69
|
406 while ( !errflg &&
|
jpayne@69
|
407 ((ch = getopt (argc, argv, "fhHqr")) != EOF) )
|
jpayne@69
|
408 switch (ch)
|
jpayne@69
|
409 {
|
jpayne@69
|
410 case 'f':
|
jpayne@69
|
411 OPT_AMOS = true;
|
jpayne@69
|
412 break;
|
jpayne@69
|
413
|
jpayne@69
|
414 case 'h':
|
jpayne@69
|
415 PrintHelp (argv[0]);
|
jpayne@69
|
416 exit (EXIT_SUCCESS);
|
jpayne@69
|
417 break;
|
jpayne@69
|
418
|
jpayne@69
|
419 case 'H':
|
jpayne@69
|
420 OPT_PrintHeader = false;
|
jpayne@69
|
421 break;
|
jpayne@69
|
422
|
jpayne@69
|
423 case 'q':
|
jpayne@69
|
424 OPT_QryDiff = true;
|
jpayne@69
|
425 break;
|
jpayne@69
|
426
|
jpayne@69
|
427 case 'r':
|
jpayne@69
|
428 OPT_RefDiff = true;
|
jpayne@69
|
429 break;
|
jpayne@69
|
430
|
jpayne@69
|
431 default:
|
jpayne@69
|
432 errflg ++;
|
jpayne@69
|
433 }
|
jpayne@69
|
434
|
jpayne@69
|
435 if ( OPT_RefDiff && OPT_QryDiff )
|
jpayne@69
|
436 {
|
jpayne@69
|
437 cerr << "ERROR: -r and -q options cannot be combined\n";
|
jpayne@69
|
438 errflg++;
|
jpayne@69
|
439 }
|
jpayne@69
|
440 if ( !OPT_RefDiff && !OPT_QryDiff )
|
jpayne@69
|
441 OPT_RefDiff = true;
|
jpayne@69
|
442
|
jpayne@69
|
443 if ( errflg > 0 || optind != argc - 1 )
|
jpayne@69
|
444 {
|
jpayne@69
|
445 PrintUsage(argv[0]);
|
jpayne@69
|
446 cerr << "Try '" << argv[0] << " -h' for more information.\n";
|
jpayne@69
|
447 exit(EXIT_FAILURE);
|
jpayne@69
|
448 }
|
jpayne@69
|
449
|
jpayne@69
|
450 OPT_AlignName = argv [optind ++];
|
jpayne@69
|
451 }
|
jpayne@69
|
452
|
jpayne@69
|
453
|
jpayne@69
|
454 //-------------------------------------------------------------- PrintHelp ----
|
jpayne@69
|
455 void PrintHelp (const char * s)
|
jpayne@69
|
456 {
|
jpayne@69
|
457 PrintUsage (s);
|
jpayne@69
|
458 cerr
|
jpayne@69
|
459 << "-f Output diff information as AMOS features\n"
|
jpayne@69
|
460 << "-h Display help information\n"
|
jpayne@69
|
461 << "-H Do not show header\n"
|
jpayne@69
|
462 << "-q Show diff information for queries\n"
|
jpayne@69
|
463 << "-r Show diff information for references (default)\n"
|
jpayne@69
|
464 << endl;
|
jpayne@69
|
465
|
jpayne@69
|
466 cerr
|
jpayne@69
|
467 << " Outputs a list of structural differences for each sequence in\n"
|
jpayne@69
|
468 << "the reference and query, sorted by position. For a reference\n"
|
jpayne@69
|
469 << "sequence R, and its matching query sequence Q, differences are\n"
|
jpayne@69
|
470 << "categorized as GAP (gap between two mutually consistent alignments),\n"
|
jpayne@69
|
471 << "DUP (inserted duplication), BRK (other inserted sequence), JMP\n"
|
jpayne@69
|
472 << "(rearrangement), INV (rearrangement with inversion), SEQ\n"
|
jpayne@69
|
473 << "(rearrangement with another sequence). The first five columns of\n"
|
jpayne@69
|
474 << "the output are seq ID, feature type, feature start, feature end,\n"
|
jpayne@69
|
475 << "and feature length. Additional columns are added depending on the\n"
|
jpayne@69
|
476 << "feature type. Negative feature lengths indicate overlapping adjacent\n"
|
jpayne@69
|
477 << "alignment blocks.\n"
|
jpayne@69
|
478 << " IDR GAP gap-start gap-end gap-length-R gap-length-Q gap-diff\n"
|
jpayne@69
|
479 << " IDR DUP dup-start dup-end dup-length\n"
|
jpayne@69
|
480 << " IDR BRK gap-start gap-end gap-length\n"
|
jpayne@69
|
481 << " IDR JMP gap-start gap-end gap-length\n"
|
jpayne@69
|
482 << " IDR INV gap-start gap-end gap-length\n"
|
jpayne@69
|
483 << " IDR SEQ gap-start gap-end gap-length prev-sequence next-sequence\n"
|
jpayne@69
|
484 << "Positions always reference the sequence with the given ID. The\n"
|
jpayne@69
|
485 << "sum of the fifth column (ignoring negative values) is the total\n"
|
jpayne@69
|
486 << "amount of inserted sequence. Summing the fifth column after removing\n"
|
jpayne@69
|
487 << "DUP features is total unique inserted sequence. Note that unaligned\n"
|
jpayne@69
|
488 << "sequence are not counted, and could represent additional \"unique\"\n"
|
jpayne@69
|
489 << "sequences. See documentation for tips on how to interpret these\n"
|
jpayne@69
|
490 << "alignment break features.\n"
|
jpayne@69
|
491 << endl;
|
jpayne@69
|
492
|
jpayne@69
|
493 return;
|
jpayne@69
|
494 }
|
jpayne@69
|
495
|
jpayne@69
|
496
|
jpayne@69
|
497 //------------------------------------------------------------- PrintUsage ----
|
jpayne@69
|
498 void PrintUsage (const char * s)
|
jpayne@69
|
499 {
|
jpayne@69
|
500 cerr
|
jpayne@69
|
501 << "\nUSAGE: " << s << " [options] <deltafile>\n\n";
|
jpayne@69
|
502 return;
|
jpayne@69
|
503 }
|