Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/gaps.cc @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 /* Programmer: A. Delcher | |
2 * File: ~delcher/TIGR/gaps.cc | |
3 * | |
4 * This program reads a list of unique matches between 2 strings and | |
5 * outputs first the longest consistent set of matches, and | |
6 * then all the other matches. | |
7 */ | |
8 | |
9 #include "tigrinc.hh" | |
10 | |
11 const int ALLOW_WRAPAROUND = FALSE; | |
12 | |
13 | |
14 struct Match | |
15 { | |
16 long int Start1, Start2, Len; | |
17 long int Simple_Score, Wrap_Score; | |
18 long int Simple_From, Wrap_From; | |
19 long int Simple_Adj, Wrap_Adj; | |
20 int Good : 1; | |
21 int Wrap_Here : 1; | |
22 }; | |
23 | |
24 | |
25 int Cmp (const void * A, const void * B) | |
26 | |
27 // Return how A and B compare if converted to Match | |
28 | |
29 { | |
30 if (((Match *) A) -> Start2 < ((Match *) B) -> Start2) | |
31 return -1; | |
32 else if (((Match *) A) -> Start2 == ((Match *) B) -> Start2) | |
33 return 0; | |
34 else | |
35 return 1; | |
36 } | |
37 | |
38 | |
39 inline | |
40 long int Max (long int A, long int B) | |
41 | |
42 // Return the larger of A and B . | |
43 | |
44 { | |
45 if (A < B) | |
46 return B; | |
47 else | |
48 return A; | |
49 } | |
50 | |
51 | |
52 int main (int argc, char * argv []) | |
53 | |
54 { | |
55 Match * A = NULL; | |
56 char * File_Name; | |
57 int Used_Wrap, Use_Reverse_Complement; | |
58 long int N = 0; | |
59 long int Adj, Olap, Olap1, Olap2, Prev, S1, S2, Len; | |
60 long int i, j, Best, Next; | |
61 | |
62 | |
63 if (argc < 2) | |
64 { | |
65 fprintf (stderr, "ERROR: Specify first genome filename\n"); | |
66 exit (-1); | |
67 } | |
68 File_Name = argv [1]; | |
69 | |
70 Use_Reverse_Complement = (argc > 2 && strcmp (argv [2], "-r") == 0); | |
71 | |
72 while (scanf ("%ld %ld %ld", & S1, & S2, & Len) != EOF) | |
73 { | |
74 A = (Match *) Safe_realloc (A, (N + 1) * sizeof (Match)); | |
75 A [N] . Start1 = S1; | |
76 A [N] . Start2 = S2; | |
77 A [N] . Len = Len; | |
78 A [N] . Good = FALSE; | |
79 A [N] . Wrap_Here = FALSE; | |
80 N ++; | |
81 } | |
82 | |
83 qsort (A, N, sizeof (Match), Cmp); | |
84 | |
85 for (i = 0; i < N; i ++) | |
86 { | |
87 A [i] . Simple_Score = A [i] . Wrap_Score = A [i] . Len; | |
88 A [i] . Simple_Adj = A [i] . Wrap_Adj = 0; | |
89 A [i] . Simple_From = A [i] . Wrap_From = -1; | |
90 for (j = 0; j < i; j ++) | |
91 { | |
92 Olap2 = A [j] . Start2 + A [j] . Len - A [i] . Start2; | |
93 Olap = Max (0, Olap2); | |
94 if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Wrap_Score) | |
95 { | |
96 A [i] . Wrap_From = j; | |
97 A [i] . Wrap_Score = A [j] . Simple_Score + A [i] . Len - Olap; | |
98 A [i] . Wrap_Adj = Olap; | |
99 A [i] . Wrap_Here = TRUE; | |
100 } | |
101 Olap1 = A [j] . Start1 + A [j] . Len - A [i] . Start1; | |
102 Olap = Max (Olap, Olap1); | |
103 if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Simple_Score) | |
104 { | |
105 A [i] . Simple_From = j; | |
106 A [i] . Simple_Score = A [j] . Simple_Score + A [i] . Len - Olap; | |
107 A [i] . Simple_Adj = Olap; | |
108 } | |
109 if (A [j] . Wrap_Score + A [i] . Len - Olap >= A [i] . Wrap_Score) | |
110 { | |
111 A [i] . Wrap_From = j; | |
112 A [i] . Wrap_Score = A [j] . Wrap_Score + A [i] . Len - Olap; | |
113 A [i] . Wrap_Adj = Olap; | |
114 A [i] . Wrap_Here = FALSE; | |
115 } | |
116 } | |
117 } | |
118 | |
119 if (ALLOW_WRAPAROUND) | |
120 { | |
121 Best = 0; | |
122 for (i = 1; i < N; i ++) | |
123 if (A [i] . Wrap_Score > A [Best] . Wrap_Score) | |
124 Best = i; | |
125 Used_Wrap = FALSE; | |
126 for (i = Best; i >= 0; i = Next) | |
127 { | |
128 A [i] . Good = TRUE; | |
129 if (Used_Wrap) | |
130 { | |
131 Next = A [i] . Simple_From; | |
132 A [i] . Wrap_Here = FALSE; | |
133 } | |
134 else | |
135 Next = A [i] . Wrap_From; | |
136 if (A [i] . Wrap_Here) | |
137 Used_Wrap = TRUE; | |
138 } | |
139 } | |
140 else | |
141 { | |
142 Best = 0; | |
143 for (i = 1; i < N; i ++) | |
144 if (A [i] . Simple_Score > A [Best] . Simple_Score) | |
145 Best = i; | |
146 for (i = Best; i >= 0; i = A [i] . Simple_From) | |
147 A [i] . Good = TRUE; | |
148 } | |
149 | |
150 printf ("> %s %s Consistent matches\n", File_Name, | |
151 Use_Reverse_Complement ? "reverse" : ""); | |
152 Prev = -1; | |
153 for (i = 0; i < N; i ++) | |
154 if (A [i] . Good) | |
155 { | |
156 if (Prev == -1) | |
157 printf ("%8ld %8ld %6ld %7s %6s %6s\n", | |
158 A [i] . Start1, A [i] . Start2, A [i] . Len, | |
159 "none", "-", "-"); | |
160 else if (ALLOW_WRAPAROUND) | |
161 { | |
162 Adj = A [i] . Wrap_Adj; | |
163 if (A [i] . Wrap_Here) | |
164 printf ("> Wrap around\n"); | |
165 | |
166 printf ("%8ld %8ld %6ld", | |
167 A [i] . Start1 + Adj, A [i] . Start2 + Adj, | |
168 A [i] . Len - Adj); | |
169 if (Adj == 0) | |
170 printf (" %7s", "none"); | |
171 else | |
172 printf (" %7ld", - Adj); | |
173 if (A [i] . Wrap_Here) | |
174 printf (" %6s %6ld\n", | |
175 "-", | |
176 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); | |
177 else | |
178 printf (" %6ld %6ld\n", | |
179 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, | |
180 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); | |
181 } | |
182 else | |
183 { | |
184 Adj = A [i] . Simple_Adj; | |
185 printf ("%8ld %8ld %6ld", | |
186 A [i] . Start1 + Adj, A [i] . Start2 + Adj, | |
187 A [i] . Len - Adj); | |
188 if (Adj == 0) | |
189 printf (" %7s", "none"); | |
190 else | |
191 printf (" %7ld", - Adj); | |
192 printf (" %6ld %6ld\n", | |
193 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, | |
194 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); | |
195 } | |
196 Prev = i; | |
197 } | |
198 | |
199 printf ("> %s %s Other matches\n", File_Name, | |
200 Use_Reverse_Complement ? "reverse" : ""); | |
201 Prev = -1; | |
202 for (i = 0; i < N; i ++) | |
203 if (! A [i] . Good) | |
204 { | |
205 if (Prev == -1) | |
206 printf ("%8ld %8ld %6ld %7s %6s %6s\n", | |
207 A [i] . Start1, A [i] . Start2, A [i] . Len, | |
208 "none", "-", "-"); | |
209 else | |
210 { | |
211 if (A [i] . Simple_From == Prev) | |
212 Adj = A [i] . Simple_Adj; | |
213 else | |
214 Adj = 0; | |
215 printf ("%8ld %8ld %6ld", | |
216 A [i] . Start1 + Adj, A [i] . Start2 + Adj, | |
217 A [i] . Len - Adj); | |
218 if (Adj == 0) | |
219 printf (" %7s", "none"); | |
220 else | |
221 printf (" %7ld", - Adj); | |
222 if (A [i] . Simple_From == Prev) | |
223 printf (" %6ld %6ld\n", | |
224 A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, | |
225 A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); | |
226 else | |
227 printf (" %6s %6s\n", "-", "-"); | |
228 } | |
229 Prev = i; | |
230 } | |
231 | |
232 return 0; | |
233 } |