jpayne@69: /* Programmer: A. Delcher jpayne@69: * File: ~delcher/TIGR/gaps.cc jpayne@69: * jpayne@69: * This program reads a list of unique matches between 2 strings and jpayne@69: * outputs first the longest consistent set of matches, and jpayne@69: * then all the other matches. jpayne@69: */ jpayne@69: jpayne@69: #include "tigrinc.hh" jpayne@69: jpayne@69: const int ALLOW_WRAPAROUND = FALSE; jpayne@69: jpayne@69: jpayne@69: struct Match jpayne@69: { jpayne@69: long int Start1, Start2, Len; jpayne@69: long int Simple_Score, Wrap_Score; jpayne@69: long int Simple_From, Wrap_From; jpayne@69: long int Simple_Adj, Wrap_Adj; jpayne@69: int Good : 1; jpayne@69: int Wrap_Here : 1; jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: int Cmp (const void * A, const void * B) jpayne@69: jpayne@69: // Return how A and B compare if converted to Match jpayne@69: jpayne@69: { jpayne@69: if (((Match *) A) -> Start2 < ((Match *) B) -> Start2) jpayne@69: return -1; jpayne@69: else if (((Match *) A) -> Start2 == ((Match *) B) -> Start2) jpayne@69: return 0; jpayne@69: else jpayne@69: return 1; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: inline jpayne@69: long int Max (long int A, long int B) jpayne@69: jpayne@69: // Return the larger of A and B . jpayne@69: jpayne@69: { jpayne@69: if (A < B) jpayne@69: return B; jpayne@69: else jpayne@69: return A; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: int main (int argc, char * argv []) jpayne@69: jpayne@69: { jpayne@69: Match * A = NULL; jpayne@69: char * File_Name; jpayne@69: int Used_Wrap, Use_Reverse_Complement; jpayne@69: long int N = 0; jpayne@69: long int Adj, Olap, Olap1, Olap2, Prev, S1, S2, Len; jpayne@69: long int i, j, Best, Next; jpayne@69: jpayne@69: jpayne@69: if (argc < 2) jpayne@69: { jpayne@69: fprintf (stderr, "ERROR: Specify first genome filename\n"); jpayne@69: exit (-1); jpayne@69: } jpayne@69: File_Name = argv [1]; jpayne@69: jpayne@69: Use_Reverse_Complement = (argc > 2 && strcmp (argv [2], "-r") == 0); jpayne@69: jpayne@69: while (scanf ("%ld %ld %ld", & S1, & S2, & Len) != EOF) jpayne@69: { jpayne@69: A = (Match *) Safe_realloc (A, (N + 1) * sizeof (Match)); jpayne@69: A [N] . Start1 = S1; jpayne@69: A [N] . Start2 = S2; jpayne@69: A [N] . Len = Len; jpayne@69: A [N] . Good = FALSE; jpayne@69: A [N] . Wrap_Here = FALSE; jpayne@69: N ++; jpayne@69: } jpayne@69: jpayne@69: qsort (A, N, sizeof (Match), Cmp); jpayne@69: jpayne@69: for (i = 0; i < N; i ++) jpayne@69: { jpayne@69: A [i] . Simple_Score = A [i] . Wrap_Score = A [i] . Len; jpayne@69: A [i] . Simple_Adj = A [i] . Wrap_Adj = 0; jpayne@69: A [i] . Simple_From = A [i] . Wrap_From = -1; jpayne@69: for (j = 0; j < i; j ++) jpayne@69: { jpayne@69: Olap2 = A [j] . Start2 + A [j] . Len - A [i] . Start2; jpayne@69: Olap = Max (0, Olap2); jpayne@69: if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Wrap_Score) jpayne@69: { jpayne@69: A [i] . Wrap_From = j; jpayne@69: A [i] . Wrap_Score = A [j] . Simple_Score + A [i] . Len - Olap; jpayne@69: A [i] . Wrap_Adj = Olap; jpayne@69: A [i] . Wrap_Here = TRUE; jpayne@69: } jpayne@69: Olap1 = A [j] . Start1 + A [j] . Len - A [i] . Start1; jpayne@69: Olap = Max (Olap, Olap1); jpayne@69: if (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Simple_Score) jpayne@69: { jpayne@69: A [i] . Simple_From = j; jpayne@69: A [i] . Simple_Score = A [j] . Simple_Score + A [i] . Len - Olap; jpayne@69: A [i] . Simple_Adj = Olap; jpayne@69: } jpayne@69: if (A [j] . Wrap_Score + A [i] . Len - Olap >= A [i] . Wrap_Score) jpayne@69: { jpayne@69: A [i] . Wrap_From = j; jpayne@69: A [i] . Wrap_Score = A [j] . Wrap_Score + A [i] . Len - Olap; jpayne@69: A [i] . Wrap_Adj = Olap; jpayne@69: A [i] . Wrap_Here = FALSE; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if (ALLOW_WRAPAROUND) jpayne@69: { jpayne@69: Best = 0; jpayne@69: for (i = 1; i < N; i ++) jpayne@69: if (A [i] . Wrap_Score > A [Best] . Wrap_Score) jpayne@69: Best = i; jpayne@69: Used_Wrap = FALSE; jpayne@69: for (i = Best; i >= 0; i = Next) jpayne@69: { jpayne@69: A [i] . Good = TRUE; jpayne@69: if (Used_Wrap) jpayne@69: { jpayne@69: Next = A [i] . Simple_From; jpayne@69: A [i] . Wrap_Here = FALSE; jpayne@69: } jpayne@69: else jpayne@69: Next = A [i] . Wrap_From; jpayne@69: if (A [i] . Wrap_Here) jpayne@69: Used_Wrap = TRUE; jpayne@69: } jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: Best = 0; jpayne@69: for (i = 1; i < N; i ++) jpayne@69: if (A [i] . Simple_Score > A [Best] . Simple_Score) jpayne@69: Best = i; jpayne@69: for (i = Best; i >= 0; i = A [i] . Simple_From) jpayne@69: A [i] . Good = TRUE; jpayne@69: } jpayne@69: jpayne@69: printf ("> %s %s Consistent matches\n", File_Name, jpayne@69: Use_Reverse_Complement ? "reverse" : ""); jpayne@69: Prev = -1; jpayne@69: for (i = 0; i < N; i ++) jpayne@69: if (A [i] . Good) jpayne@69: { jpayne@69: if (Prev == -1) jpayne@69: printf ("%8ld %8ld %6ld %7s %6s %6s\n", jpayne@69: A [i] . Start1, A [i] . Start2, A [i] . Len, jpayne@69: "none", "-", "-"); jpayne@69: else if (ALLOW_WRAPAROUND) jpayne@69: { jpayne@69: Adj = A [i] . Wrap_Adj; jpayne@69: if (A [i] . Wrap_Here) jpayne@69: printf ("> Wrap around\n"); jpayne@69: jpayne@69: printf ("%8ld %8ld %6ld", jpayne@69: A [i] . Start1 + Adj, A [i] . Start2 + Adj, jpayne@69: A [i] . Len - Adj); jpayne@69: if (Adj == 0) jpayne@69: printf (" %7s", "none"); jpayne@69: else jpayne@69: printf (" %7ld", - Adj); jpayne@69: if (A [i] . Wrap_Here) jpayne@69: printf (" %6s %6ld\n", jpayne@69: "-", jpayne@69: A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); jpayne@69: else jpayne@69: printf (" %6ld %6ld\n", jpayne@69: A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, jpayne@69: A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: Adj = A [i] . Simple_Adj; jpayne@69: printf ("%8ld %8ld %6ld", jpayne@69: A [i] . Start1 + Adj, A [i] . Start2 + Adj, jpayne@69: A [i] . Len - Adj); jpayne@69: if (Adj == 0) jpayne@69: printf (" %7s", "none"); jpayne@69: else jpayne@69: printf (" %7ld", - Adj); jpayne@69: printf (" %6ld %6ld\n", jpayne@69: A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, jpayne@69: A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); jpayne@69: } jpayne@69: Prev = i; jpayne@69: } jpayne@69: jpayne@69: printf ("> %s %s Other matches\n", File_Name, jpayne@69: Use_Reverse_Complement ? "reverse" : ""); jpayne@69: Prev = -1; jpayne@69: for (i = 0; i < N; i ++) jpayne@69: if (! A [i] . Good) jpayne@69: { jpayne@69: if (Prev == -1) jpayne@69: printf ("%8ld %8ld %6ld %7s %6s %6s\n", jpayne@69: A [i] . Start1, A [i] . Start2, A [i] . Len, jpayne@69: "none", "-", "-"); jpayne@69: else jpayne@69: { jpayne@69: if (A [i] . Simple_From == Prev) jpayne@69: Adj = A [i] . Simple_Adj; jpayne@69: else jpayne@69: Adj = 0; jpayne@69: printf ("%8ld %8ld %6ld", jpayne@69: A [i] . Start1 + Adj, A [i] . Start2 + Adj, jpayne@69: A [i] . Len - Adj); jpayne@69: if (Adj == 0) jpayne@69: printf (" %7s", "none"); jpayne@69: else jpayne@69: printf (" %7ld", - Adj); jpayne@69: if (A [i] . Simple_From == Prev) jpayne@69: printf (" %6ld %6ld\n", jpayne@69: A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, jpayne@69: A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); jpayne@69: else jpayne@69: printf (" %6s %6s\n", "-", "-"); jpayne@69: } jpayne@69: Prev = i; jpayne@69: } jpayne@69: jpayne@69: return 0; jpayne@69: }