comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/gaps.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 /* Programmer: A. Delcher
2 * File: ~delcher/TIGR/gaps.cc
3 *
4 * This program reads a list of unique matches between 2 strings and
5 * outputs first the longest consistent set of matches, and
6 * then all the other matches.
7 */
8
9 #include "tigrinc.hh"
10
11 const int ALLOW_WRAPAROUND = FALSE;
12
13
14 struct Match
15 {
16 long int Start1, Start2, Len;
17 long int Simple_Score, Wrap_Score;
18 long int Simple_From, Wrap_From;
19 long int Simple_Adj, Wrap_Adj;
20 int Good : 1;
21 int Wrap_Here : 1;
22 };
23
24
25 int Cmp (const void * A, const void * B)
26
27 // Return how A and B compare if converted to Match
28
29 {
30 if (((Match *) A) -> Start2 < ((Match *) B) -> Start2)
31 return -1;
32 else if (((Match *) A) -> Start2 == ((Match *) B) -> Start2)
33 return 0;
34 else
35 return 1;
36 }
37
38
39 inline
40 long int Max (long int A, long int B)
41
42 // Return the larger of A and B .
43
44 {
45 if (A < B)
46 return B;
47 else
48 return A;
49 }
50
51
52 int main (int argc, char * argv [])
53
54 {
55 Match * A = NULL;
56 char * File_Name;
57 int Used_Wrap, Use_Reverse_Complement;
58 long int N = 0;
59 long int Adj, Olap, Olap1, Olap2, Prev, S1, S2, Len;
60 long int i, j, Best, Next;
61
62
63 if (argc < 2)
64 {
65 fprintf (stderr, "ERROR: Specify first genome filename\n");
66 exit (-1);
67 }
68 File_Name = argv [1];
69
70 Use_Reverse_Complement = (argc > 2 && strcmp (argv [2], "-r") == 0);
71
72 while (scanf ("%ld %ld %ld", & S1, & S2, & Len) != EOF)
73 {
74 A = (Match *) Safe_realloc (A, (N + 1) * sizeof (Match));
75 A [N] . Start1 = S1;
76 A [N] . Start2 = S2;
77 A [N] . Len = Len;
78 A [N] . Good = FALSE;
79 A [N] . Wrap_Here = FALSE;
80 N ++;
81 }
82
83 qsort (A, N, sizeof (Match), Cmp);
84
85 for (i = 0; i < N; i ++)
86 {
87 A [i] . Simple_Score = A [i] . Wrap_Score = A [i] . Len;
88 A [i] . Simple_Adj = A [i] . Wrap_Adj = 0;
89 A [i] . Simple_From = A [i] . Wrap_From = -1;
90 for (j = 0; j < i; j ++)
91 {
92 Olap2 = A [j] . Start2 + A [j] . Len - A [i] . Start2;
93 Olap = Max (0, Olap2);
94 if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Wrap_Score)
95 {
96 A [i] . Wrap_From = j;
97 A [i] . Wrap_Score = A [j] . Simple_Score + A [i] . Len - Olap;
98 A [i] . Wrap_Adj = Olap;
99 A [i] . Wrap_Here = TRUE;
100 }
101 Olap1 = A [j] . Start1 + A [j] . Len - A [i] . Start1;
102 Olap = Max (Olap, Olap1);
103 if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Simple_Score)
104 {
105 A [i] . Simple_From = j;
106 A [i] . Simple_Score = A [j] . Simple_Score + A [i] . Len - Olap;
107 A [i] . Simple_Adj = Olap;
108 }
109 if (A [j] . Wrap_Score + A [i] . Len - Olap >= A [i] . Wrap_Score)
110 {
111 A [i] . Wrap_From = j;
112 A [i] . Wrap_Score = A [j] . Wrap_Score + A [i] . Len - Olap;
113 A [i] . Wrap_Adj = Olap;
114 A [i] . Wrap_Here = FALSE;
115 }
116 }
117 }
118
119 if (ALLOW_WRAPAROUND)
120 {
121 Best = 0;
122 for (i = 1; i < N; i ++)
123 if (A [i] . Wrap_Score > A [Best] . Wrap_Score)
124 Best = i;
125 Used_Wrap = FALSE;
126 for (i = Best; i >= 0; i = Next)
127 {
128 A [i] . Good = TRUE;
129 if (Used_Wrap)
130 {
131 Next = A [i] . Simple_From;
132 A [i] . Wrap_Here = FALSE;
133 }
134 else
135 Next = A [i] . Wrap_From;
136 if (A [i] . Wrap_Here)
137 Used_Wrap = TRUE;
138 }
139 }
140 else
141 {
142 Best = 0;
143 for (i = 1; i < N; i ++)
144 if (A [i] . Simple_Score > A [Best] . Simple_Score)
145 Best = i;
146 for (i = Best; i >= 0; i = A [i] . Simple_From)
147 A [i] . Good = TRUE;
148 }
149
150 printf ("> %s %s Consistent matches\n", File_Name,
151 Use_Reverse_Complement ? "reverse" : "");
152 Prev = -1;
153 for (i = 0; i < N; i ++)
154 if (A [i] . Good)
155 {
156 if (Prev == -1)
157 printf ("%8ld %8ld %6ld %7s %6s %6s\n",
158 A [i] . Start1, A [i] . Start2, A [i] . Len,
159 "none", "-", "-");
160 else if (ALLOW_WRAPAROUND)
161 {
162 Adj = A [i] . Wrap_Adj;
163 if (A [i] . Wrap_Here)
164 printf ("> Wrap around\n");
165
166 printf ("%8ld %8ld %6ld",
167 A [i] . Start1 + Adj, A [i] . Start2 + Adj,
168 A [i] . Len - Adj);
169 if (Adj == 0)
170 printf (" %7s", "none");
171 else
172 printf (" %7ld", - Adj);
173 if (A [i] . Wrap_Here)
174 printf (" %6s %6ld\n",
175 "-",
176 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
177 else
178 printf (" %6ld %6ld\n",
179 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
180 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
181 }
182 else
183 {
184 Adj = A [i] . Simple_Adj;
185 printf ("%8ld %8ld %6ld",
186 A [i] . Start1 + Adj, A [i] . Start2 + Adj,
187 A [i] . Len - Adj);
188 if (Adj == 0)
189 printf (" %7s", "none");
190 else
191 printf (" %7ld", - Adj);
192 printf (" %6ld %6ld\n",
193 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
194 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
195 }
196 Prev = i;
197 }
198
199 printf ("> %s %s Other matches\n", File_Name,
200 Use_Reverse_Complement ? "reverse" : "");
201 Prev = -1;
202 for (i = 0; i < N; i ++)
203 if (! A [i] . Good)
204 {
205 if (Prev == -1)
206 printf ("%8ld %8ld %6ld %7s %6s %6s\n",
207 A [i] . Start1, A [i] . Start2, A [i] . Len,
208 "none", "-", "-");
209 else
210 {
211 if (A [i] . Simple_From == Prev)
212 Adj = A [i] . Simple_Adj;
213 else
214 Adj = 0;
215 printf ("%8ld %8ld %6ld",
216 A [i] . Start1 + Adj, A [i] . Start2 + Adj,
217 A [i] . Len - Adj);
218 if (Adj == 0)
219 printf (" %7s", "none");
220 else
221 printf (" %7ld", - Adj);
222 if (A [i] . Simple_From == Prev)
223 printf (" %6ld %6ld\n",
224 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
225 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
226 else
227 printf (" %6s %6s\n", "-", "-");
228 }
229 Prev = i;
230 }
231
232 return 0;
233 }