jpayne@69
|
1 /* Programmer: A. Delcher
|
jpayne@69
|
2 * File: ~delcher/TIGR/gaps.cc
|
jpayne@69
|
3 *
|
jpayne@69
|
4 * This program reads a list of unique matches between 2 strings and
|
jpayne@69
|
5 * outputs first the longest consistent set of matches, and
|
jpayne@69
|
6 * then all the other matches.
|
jpayne@69
|
7 */
|
jpayne@69
|
8
|
jpayne@69
|
9 #include "tigrinc.hh"
|
jpayne@69
|
10
|
jpayne@69
|
11 const int ALLOW_WRAPAROUND = FALSE;
|
jpayne@69
|
12
|
jpayne@69
|
13
|
jpayne@69
|
14 struct Match
|
jpayne@69
|
15 {
|
jpayne@69
|
16 long int Start1, Start2, Len;
|
jpayne@69
|
17 long int Simple_Score, Wrap_Score;
|
jpayne@69
|
18 long int Simple_From, Wrap_From;
|
jpayne@69
|
19 long int Simple_Adj, Wrap_Adj;
|
jpayne@69
|
20 int Good : 1;
|
jpayne@69
|
21 int Wrap_Here : 1;
|
jpayne@69
|
22 };
|
jpayne@69
|
23
|
jpayne@69
|
24
|
jpayne@69
|
25 int Cmp (const void * A, const void * B)
|
jpayne@69
|
26
|
jpayne@69
|
27 // Return how A and B compare if converted to Match
|
jpayne@69
|
28
|
jpayne@69
|
29 {
|
jpayne@69
|
30 if (((Match *) A) -> Start2 < ((Match *) B) -> Start2)
|
jpayne@69
|
31 return -1;
|
jpayne@69
|
32 else if (((Match *) A) -> Start2 == ((Match *) B) -> Start2)
|
jpayne@69
|
33 return 0;
|
jpayne@69
|
34 else
|
jpayne@69
|
35 return 1;
|
jpayne@69
|
36 }
|
jpayne@69
|
37
|
jpayne@69
|
38
|
jpayne@69
|
39 inline
|
jpayne@69
|
40 long int Max (long int A, long int B)
|
jpayne@69
|
41
|
jpayne@69
|
42 // Return the larger of A and B .
|
jpayne@69
|
43
|
jpayne@69
|
44 {
|
jpayne@69
|
45 if (A < B)
|
jpayne@69
|
46 return B;
|
jpayne@69
|
47 else
|
jpayne@69
|
48 return A;
|
jpayne@69
|
49 }
|
jpayne@69
|
50
|
jpayne@69
|
51
|
jpayne@69
|
52 int main (int argc, char * argv [])
|
jpayne@69
|
53
|
jpayne@69
|
54 {
|
jpayne@69
|
55 Match * A = NULL;
|
jpayne@69
|
56 char * File_Name;
|
jpayne@69
|
57 int Used_Wrap, Use_Reverse_Complement;
|
jpayne@69
|
58 long int N = 0;
|
jpayne@69
|
59 long int Adj, Olap, Olap1, Olap2, Prev, S1, S2, Len;
|
jpayne@69
|
60 long int i, j, Best, Next;
|
jpayne@69
|
61
|
jpayne@69
|
62
|
jpayne@69
|
63 if (argc < 2)
|
jpayne@69
|
64 {
|
jpayne@69
|
65 fprintf (stderr, "ERROR: Specify first genome filename\n");
|
jpayne@69
|
66 exit (-1);
|
jpayne@69
|
67 }
|
jpayne@69
|
68 File_Name = argv [1];
|
jpayne@69
|
69
|
jpayne@69
|
70 Use_Reverse_Complement = (argc > 2 && strcmp (argv [2], "-r") == 0);
|
jpayne@69
|
71
|
jpayne@69
|
72 while (scanf ("%ld %ld %ld", & S1, & S2, & Len) != EOF)
|
jpayne@69
|
73 {
|
jpayne@69
|
74 A = (Match *) Safe_realloc (A, (N + 1) * sizeof (Match));
|
jpayne@69
|
75 A [N] . Start1 = S1;
|
jpayne@69
|
76 A [N] . Start2 = S2;
|
jpayne@69
|
77 A [N] . Len = Len;
|
jpayne@69
|
78 A [N] . Good = FALSE;
|
jpayne@69
|
79 A [N] . Wrap_Here = FALSE;
|
jpayne@69
|
80 N ++;
|
jpayne@69
|
81 }
|
jpayne@69
|
82
|
jpayne@69
|
83 qsort (A, N, sizeof (Match), Cmp);
|
jpayne@69
|
84
|
jpayne@69
|
85 for (i = 0; i < N; i ++)
|
jpayne@69
|
86 {
|
jpayne@69
|
87 A [i] . Simple_Score = A [i] . Wrap_Score = A [i] . Len;
|
jpayne@69
|
88 A [i] . Simple_Adj = A [i] . Wrap_Adj = 0;
|
jpayne@69
|
89 A [i] . Simple_From = A [i] . Wrap_From = -1;
|
jpayne@69
|
90 for (j = 0; j < i; j ++)
|
jpayne@69
|
91 {
|
jpayne@69
|
92 Olap2 = A [j] . Start2 + A [j] . Len - A [i] . Start2;
|
jpayne@69
|
93 Olap = Max (0, Olap2);
|
jpayne@69
|
94 if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Wrap_Score)
|
jpayne@69
|
95 {
|
jpayne@69
|
96 A [i] . Wrap_From = j;
|
jpayne@69
|
97 A [i] . Wrap_Score = A [j] . Simple_Score + A [i] . Len - Olap;
|
jpayne@69
|
98 A [i] . Wrap_Adj = Olap;
|
jpayne@69
|
99 A [i] . Wrap_Here = TRUE;
|
jpayne@69
|
100 }
|
jpayne@69
|
101 Olap1 = A [j] . Start1 + A [j] . Len - A [i] . Start1;
|
jpayne@69
|
102 Olap = Max (Olap, Olap1);
|
jpayne@69
|
103 if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Simple_Score)
|
jpayne@69
|
104 {
|
jpayne@69
|
105 A [i] . Simple_From = j;
|
jpayne@69
|
106 A [i] . Simple_Score = A [j] . Simple_Score + A [i] . Len - Olap;
|
jpayne@69
|
107 A [i] . Simple_Adj = Olap;
|
jpayne@69
|
108 }
|
jpayne@69
|
109 if (A [j] . Wrap_Score + A [i] . Len - Olap >= A [i] . Wrap_Score)
|
jpayne@69
|
110 {
|
jpayne@69
|
111 A [i] . Wrap_From = j;
|
jpayne@69
|
112 A [i] . Wrap_Score = A [j] . Wrap_Score + A [i] . Len - Olap;
|
jpayne@69
|
113 A [i] . Wrap_Adj = Olap;
|
jpayne@69
|
114 A [i] . Wrap_Here = FALSE;
|
jpayne@69
|
115 }
|
jpayne@69
|
116 }
|
jpayne@69
|
117 }
|
jpayne@69
|
118
|
jpayne@69
|
119 if (ALLOW_WRAPAROUND)
|
jpayne@69
|
120 {
|
jpayne@69
|
121 Best = 0;
|
jpayne@69
|
122 for (i = 1; i < N; i ++)
|
jpayne@69
|
123 if (A [i] . Wrap_Score > A [Best] . Wrap_Score)
|
jpayne@69
|
124 Best = i;
|
jpayne@69
|
125 Used_Wrap = FALSE;
|
jpayne@69
|
126 for (i = Best; i >= 0; i = Next)
|
jpayne@69
|
127 {
|
jpayne@69
|
128 A [i] . Good = TRUE;
|
jpayne@69
|
129 if (Used_Wrap)
|
jpayne@69
|
130 {
|
jpayne@69
|
131 Next = A [i] . Simple_From;
|
jpayne@69
|
132 A [i] . Wrap_Here = FALSE;
|
jpayne@69
|
133 }
|
jpayne@69
|
134 else
|
jpayne@69
|
135 Next = A [i] . Wrap_From;
|
jpayne@69
|
136 if (A [i] . Wrap_Here)
|
jpayne@69
|
137 Used_Wrap = TRUE;
|
jpayne@69
|
138 }
|
jpayne@69
|
139 }
|
jpayne@69
|
140 else
|
jpayne@69
|
141 {
|
jpayne@69
|
142 Best = 0;
|
jpayne@69
|
143 for (i = 1; i < N; i ++)
|
jpayne@69
|
144 if (A [i] . Simple_Score > A [Best] . Simple_Score)
|
jpayne@69
|
145 Best = i;
|
jpayne@69
|
146 for (i = Best; i >= 0; i = A [i] . Simple_From)
|
jpayne@69
|
147 A [i] . Good = TRUE;
|
jpayne@69
|
148 }
|
jpayne@69
|
149
|
jpayne@69
|
150 printf ("> %s %s Consistent matches\n", File_Name,
|
jpayne@69
|
151 Use_Reverse_Complement ? "reverse" : "");
|
jpayne@69
|
152 Prev = -1;
|
jpayne@69
|
153 for (i = 0; i < N; i ++)
|
jpayne@69
|
154 if (A [i] . Good)
|
jpayne@69
|
155 {
|
jpayne@69
|
156 if (Prev == -1)
|
jpayne@69
|
157 printf ("%8ld %8ld %6ld %7s %6s %6s\n",
|
jpayne@69
|
158 A [i] . Start1, A [i] . Start2, A [i] . Len,
|
jpayne@69
|
159 "none", "-", "-");
|
jpayne@69
|
160 else if (ALLOW_WRAPAROUND)
|
jpayne@69
|
161 {
|
jpayne@69
|
162 Adj = A [i] . Wrap_Adj;
|
jpayne@69
|
163 if (A [i] . Wrap_Here)
|
jpayne@69
|
164 printf ("> Wrap around\n");
|
jpayne@69
|
165
|
jpayne@69
|
166 printf ("%8ld %8ld %6ld",
|
jpayne@69
|
167 A [i] . Start1 + Adj, A [i] . Start2 + Adj,
|
jpayne@69
|
168 A [i] . Len - Adj);
|
jpayne@69
|
169 if (Adj == 0)
|
jpayne@69
|
170 printf (" %7s", "none");
|
jpayne@69
|
171 else
|
jpayne@69
|
172 printf (" %7ld", - Adj);
|
jpayne@69
|
173 if (A [i] . Wrap_Here)
|
jpayne@69
|
174 printf (" %6s %6ld\n",
|
jpayne@69
|
175 "-",
|
jpayne@69
|
176 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
|
jpayne@69
|
177 else
|
jpayne@69
|
178 printf (" %6ld %6ld\n",
|
jpayne@69
|
179 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
|
jpayne@69
|
180 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
|
jpayne@69
|
181 }
|
jpayne@69
|
182 else
|
jpayne@69
|
183 {
|
jpayne@69
|
184 Adj = A [i] . Simple_Adj;
|
jpayne@69
|
185 printf ("%8ld %8ld %6ld",
|
jpayne@69
|
186 A [i] . Start1 + Adj, A [i] . Start2 + Adj,
|
jpayne@69
|
187 A [i] . Len - Adj);
|
jpayne@69
|
188 if (Adj == 0)
|
jpayne@69
|
189 printf (" %7s", "none");
|
jpayne@69
|
190 else
|
jpayne@69
|
191 printf (" %7ld", - Adj);
|
jpayne@69
|
192 printf (" %6ld %6ld\n",
|
jpayne@69
|
193 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
|
jpayne@69
|
194 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
|
jpayne@69
|
195 }
|
jpayne@69
|
196 Prev = i;
|
jpayne@69
|
197 }
|
jpayne@69
|
198
|
jpayne@69
|
199 printf ("> %s %s Other matches\n", File_Name,
|
jpayne@69
|
200 Use_Reverse_Complement ? "reverse" : "");
|
jpayne@69
|
201 Prev = -1;
|
jpayne@69
|
202 for (i = 0; i < N; i ++)
|
jpayne@69
|
203 if (! A [i] . Good)
|
jpayne@69
|
204 {
|
jpayne@69
|
205 if (Prev == -1)
|
jpayne@69
|
206 printf ("%8ld %8ld %6ld %7s %6s %6s\n",
|
jpayne@69
|
207 A [i] . Start1, A [i] . Start2, A [i] . Len,
|
jpayne@69
|
208 "none", "-", "-");
|
jpayne@69
|
209 else
|
jpayne@69
|
210 {
|
jpayne@69
|
211 if (A [i] . Simple_From == Prev)
|
jpayne@69
|
212 Adj = A [i] . Simple_Adj;
|
jpayne@69
|
213 else
|
jpayne@69
|
214 Adj = 0;
|
jpayne@69
|
215 printf ("%8ld %8ld %6ld",
|
jpayne@69
|
216 A [i] . Start1 + Adj, A [i] . Start2 + Adj,
|
jpayne@69
|
217 A [i] . Len - Adj);
|
jpayne@69
|
218 if (Adj == 0)
|
jpayne@69
|
219 printf (" %7s", "none");
|
jpayne@69
|
220 else
|
jpayne@69
|
221 printf (" %7ld", - Adj);
|
jpayne@69
|
222 if (A [i] . Simple_From == Prev)
|
jpayne@69
|
223 printf (" %6ld %6ld\n",
|
jpayne@69
|
224 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
|
jpayne@69
|
225 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
|
jpayne@69
|
226 else
|
jpayne@69
|
227 printf (" %6s %6s\n", "-", "-");
|
jpayne@69
|
228 }
|
jpayne@69
|
229 Prev = i;
|
jpayne@69
|
230 }
|
jpayne@69
|
231
|
jpayne@69
|
232 return 0;
|
jpayne@69
|
233 }
|