diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/gaps.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/gaps.cc	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,233 @@
+/* Programmer:  A. Delcher
+*        File:  ~delcher/TIGR/gaps.cc
+*
+*  This program reads a list of unique matches between 2 strings and
+*  outputs first the longest consistent set of matches, and
+*  then all the other matches.
+*/
+
+#include "tigrinc.hh"
+
+const int  ALLOW_WRAPAROUND = FALSE;
+
+
+struct  Match
+  {
+   long int  Start1, Start2, Len;
+   long int  Simple_Score, Wrap_Score;
+   long int  Simple_From, Wrap_From;
+   long int  Simple_Adj, Wrap_Adj;
+   int  Good : 1;
+   int  Wrap_Here : 1;
+  };
+
+
+int  Cmp  (const void * A, const void * B)
+
+//  Return how  A  and  B  compare if converted to  Match
+
+  {
+   if  (((Match *) A) -> Start2 < ((Match *) B) -> Start2)
+       return  -1;
+   else if  (((Match *) A) -> Start2 == ((Match *) B) -> Start2)
+       return  0;
+     else
+       return  1;
+  }
+
+
+inline
+long int  Max  (long int A, long int B)
+
+//  Return the larger of  A  and  B .
+
+  {
+   if  (A < B)
+       return  B;
+     else
+       return  A;
+  }
+
+
+int main  (int argc, char * argv [])
+
+  {
+   Match  * A = NULL;
+   char  * File_Name;
+   int  Used_Wrap, Use_Reverse_Complement;
+   long int  N = 0;
+   long int  Adj, Olap, Olap1, Olap2, Prev, S1, S2, Len;
+   long int  i, j, Best, Next;
+
+
+   if  (argc < 2)
+       {
+        fprintf (stderr, "ERROR:  Specify first genome filename\n");
+        exit (-1);
+       }
+   File_Name = argv [1];
+
+   Use_Reverse_Complement = (argc > 2 && strcmp (argv [2], "-r") == 0);
+
+   while  (scanf ("%ld %ld %ld", & S1, & S2, & Len) != EOF)
+     {
+      A = (Match *) Safe_realloc (A, (N + 1) * sizeof (Match));
+      A [N] . Start1 = S1;
+      A [N] . Start2 = S2;
+      A [N] . Len = Len;
+      A [N] . Good = FALSE;
+      A [N] . Wrap_Here = FALSE;
+      N ++;
+     }
+
+   qsort (A, N, sizeof (Match), Cmp);
+
+   for  (i = 0;  i < N;  i ++)
+     {
+      A [i] . Simple_Score = A [i] . Wrap_Score = A [i] . Len;
+      A [i] . Simple_Adj = A [i] . Wrap_Adj = 0;
+      A [i] . Simple_From = A [i] . Wrap_From = -1;
+      for  (j = 0;  j < i;  j ++)
+        {
+         Olap2 = A [j] . Start2 + A [j] . Len - A [i] . Start2;
+         Olap = Max (0, Olap2);
+         if  (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Wrap_Score)
+             {
+              A [i] . Wrap_From = j;
+              A [i] . Wrap_Score = A [j] . Simple_Score + A [i] . Len - Olap;
+              A [i] . Wrap_Adj = Olap;
+              A [i] . Wrap_Here = TRUE;
+             }
+         Olap1 = A [j] . Start1 + A [j] . Len - A [i] . Start1;
+         Olap = Max (Olap, Olap1);
+         if  (A [j] . Simple_Score + A [i] . Len - Olap > A [i] . Simple_Score)
+             {
+              A [i] . Simple_From = j;
+              A [i] . Simple_Score = A [j] . Simple_Score + A [i] . Len - Olap;
+              A [i] . Simple_Adj = Olap;
+             }
+         if  (A [j] . Wrap_Score + A [i] . Len - Olap >= A [i] . Wrap_Score)
+             {
+              A [i] . Wrap_From = j;
+              A [i] . Wrap_Score = A [j] . Wrap_Score + A [i] . Len - Olap;
+              A [i] . Wrap_Adj = Olap;
+              A [i] . Wrap_Here = FALSE;
+             }
+        }
+     }
+
+   if  (ALLOW_WRAPAROUND)
+       {
+        Best = 0;
+        for  (i = 1;  i < N;  i ++)
+          if  (A [i] . Wrap_Score > A [Best] . Wrap_Score)
+              Best = i;
+        Used_Wrap = FALSE;
+        for  (i = Best;  i >= 0;  i = Next)
+          {
+           A [i] . Good = TRUE;
+           if  (Used_Wrap)
+               {
+                Next = A [i] . Simple_From;
+                A [i] . Wrap_Here = FALSE;
+               }
+             else
+               Next = A [i] . Wrap_From;
+           if  (A [i] . Wrap_Here)
+               Used_Wrap = TRUE;
+          }
+       }
+     else
+       {
+        Best = 0;
+        for  (i = 1;  i < N;  i ++)
+          if  (A [i] . Simple_Score > A [Best] . Simple_Score)
+              Best = i;
+        for  (i = Best;  i >= 0;  i = A [i] . Simple_From)
+          A [i] . Good = TRUE;
+       }
+
+   printf ("> %s %s Consistent matches\n", File_Name,
+                  Use_Reverse_Complement ? "reverse" : "");
+   Prev = -1;
+   for  (i = 0;  i < N;  i ++)
+     if  (A [i] . Good)
+         {
+          if  (Prev == -1)
+              printf ("%8ld %8ld %6ld %7s %6s %6s\n",
+                  A [i] . Start1, A [i] . Start2, A [i] . Len,
+                  "none", "-", "-");
+          else if  (ALLOW_WRAPAROUND)
+              {
+               Adj = A [i] . Wrap_Adj;
+               if  (A [i] . Wrap_Here)
+                   printf ("> Wrap around\n");
+               
+               printf ("%8ld %8ld %6ld",
+                   A [i] . Start1 + Adj, A [i] . Start2 + Adj,
+                   A [i] . Len - Adj);
+               if  (Adj == 0)
+                   printf (" %7s", "none");
+                 else
+                   printf (" %7ld", - Adj);
+               if  (A [i] . Wrap_Here)
+                   printf (" %6s %6ld\n",
+                       "-",
+                       A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
+                 else
+                   printf (" %6ld %6ld\n",
+                       A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
+                       A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
+              }
+            else
+              {
+               Adj = A [i] . Simple_Adj;
+               printf ("%8ld %8ld %6ld",
+                   A [i] . Start1 + Adj, A [i] . Start2 + Adj,
+                   A [i] . Len - Adj);
+               if  (Adj == 0)
+                   printf (" %7s", "none");
+                 else
+                   printf (" %7ld", - Adj);
+               printf (" %6ld %6ld\n",
+                   A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
+                   A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
+              }
+          Prev = i;
+         }
+
+   printf ("> %s %s Other matches\n", File_Name,
+                  Use_Reverse_Complement ? "reverse" : "");
+   Prev = -1;
+   for  (i = 0;  i < N;  i ++)
+     if  (! A [i] . Good)
+         {
+          if  (Prev == -1)
+              printf ("%8ld %8ld %6ld %7s %6s %6s\n",
+                  A [i] . Start1, A [i] . Start2, A [i] . Len,
+                  "none", "-", "-");
+            else
+              {
+               if  (A [i] . Simple_From == Prev)
+                   Adj = A [i] . Simple_Adj;
+                 else
+                   Adj = 0;
+               printf ("%8ld %8ld %6ld",
+                   A [i] . Start1 + Adj, A [i] . Start2 + Adj,
+                   A [i] . Len - Adj);
+               if  (Adj == 0)
+                   printf (" %7s", "none");
+                 else
+                   printf (" %7ld", - Adj);
+               if  (A [i] . Simple_From == Prev)
+                   printf (" %6ld %6ld\n",
+                       A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len,
+                       A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len);
+                 else
+                   printf (" %6s %6s\n", "-", "-");
+              }
+          Prev = i;
+         }
+
+   return  0;
+  }