diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/annotate.cc	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,282 @@
+/* Programmer:  A. Delcher
+*     Written:  7 September 1998
+*        File:  ~delcher/Align/annotate.cc
+*
+*  This program reads the output from the  gaps  program and
+*  adds alignment information to it.  Reads frag info from
+*  the file named on the '>' lines of the gap file.  The word
+*  "reverse" after that name will make the query sequence be reverse
+*  complemented.
+*/
+
+#include "tigrinc.hh"
+#include <vector>
+using namespace std;
+
+#define  FIELD_LEN  20
+#define  MAX_ALIGN  10000
+#define  MAX_LINE_LEN  200
+#define  MAX_NAME_LEN  500
+#define  PREFIX_LEN  10
+#define  WIDTH  60
+
+#ifndef  EXIT_FAILURE
+  #define  EXIT_FAILURE  -1
+#endif
+#ifndef  EXIT_SUCCESS
+  #define  EXIT_SUCCESS  0
+#endif
+
+
+void  Show_Alignment (char [], long int, char [], long int);
+
+
+char  Line [MAX_LINE_LEN];
+FILE  * Gaps_With_Errors_File;
+
+
+
+int main  (int argc, char * argv [])
+ 
+  {
+   FILE  * Gap_File, * Data_File;
+   char  * Data1, * Data2;
+   char  Name [MAX_NAME_LEN], Tag [MAX_NAME_LEN];
+   char  Olap_String [FIELD_LEN], Gap_String1 [FIELD_LEN], Gap_String2 [FIELD_LEN];
+   int  Tag_Len;
+   long int  Input_Size, Len1, Len2, Line_Len;
+   long int  Start1, Start2, Len, Olap, Gap1, Gap2;
+   long int  i;
+
+   if  (argc < 3)
+       {
+        printf ("Usage: %s <gapfile> <datafile> \n", argv [0]);
+        exit (EXIT_FAILURE);
+       }
+
+
+   Data_File = File_Open (argv [2], "r");
+   Data2 = (char *) Safe_malloc (INIT_SIZE);
+   Input_Size = INIT_SIZE;
+
+   Gaps_With_Errors_File = File_Open ("witherrors.gaps", "w");
+
+   Read_String (Data_File, Data2, Input_Size, Name, FALSE);
+   fclose (Data_File);
+   Data_File = NULL;
+   Len2 = strlen (Data2 + 1);
+
+   Gap_File = File_Open (argv [1], "r");
+
+   Data1 = (char *) Safe_malloc (INIT_SIZE);
+   Input_Size = INIT_SIZE;
+
+   while  (fgets (Line, MAX_LINE_LEN, Gap_File) != NULL)
+     {
+      printf ("%s", Line);
+      for  (i = 0;  isspace (Line [i]) && Line [i] != '\n';  i ++)
+        ;
+      if  (Line [i] == '>')
+          {
+           Tag [0] = '\0';
+           sscanf (Line + i + 1, "%s %s", Name, Tag);
+	   if ( ! strcmp (Name, "Wrap") )
+	     continue;
+
+           Data_File = File_Open (Name, "r");
+           Read_String (Data_File, Data1, Input_Size, Name, FALSE);
+           fclose (Data_File);
+
+           Len1 = strlen (Data1 + 1);
+
+           Tag_Len = strlen (Tag);
+           for  (i = 0;  i < Tag_Len;  i ++)
+             Tag [i] = tolower (Tag [i]);
+           if  (strcmp (Tag, "reverse") == 0)
+	     Reverse_Complement (Data2, 1, Len2);
+           fprintf (Gaps_With_Errors_File, "%s", Line);
+           continue;
+          }
+      else if  (! isdigit (Line [i]))
+          {
+           fprintf (Gaps_With_Errors_File, "%s", Line);
+           continue;
+          }
+      sscanf (Line, "%ld %ld %ld %s %s %s", & Start1, & Start2,
+          & Len, Olap_String, Gap_String1, Gap_String2);
+
+      if  (Gap_String1 [0] == '-' || Gap_String2 [0] == '-')
+          {
+           fprintf (Gaps_With_Errors_File, "%s", Line);
+           continue;
+          }
+      Gap1 = strtol (Gap_String1, NULL, 10);
+      Gap2 = strtol (Gap_String2, NULL, 10);
+      if  (isalpha (Olap_String [0]))
+          Olap = 0;
+        else
+          Olap = strtol (Olap_String, NULL, 10);
+
+      Start1 -= Gap1 + PREFIX_LEN - Olap;
+      Start2 -= Gap2 + PREFIX_LEN - Olap;
+
+      Line_Len = strlen (Line);
+      Line [Line_Len - 1] = '\0';
+
+      Show_Alignment (Data1 + Start1 - 1, Gap1 - Olap + 2 * PREFIX_LEN,
+                      Data2 + Start2 - 1, Gap2 - Olap + 2 * PREFIX_LEN);
+
+     }
+   
+   fclose (Gaps_With_Errors_File);
+
+   return  0;
+  }
+
+
+template<typename T>
+class Matrix_t
+{
+public:
+  Matrix_t()
+  { clear(); }
+
+  void clear()
+  {
+    d_m.clear();
+    nRows_m = nCols_m = 0;
+  }
+
+  void resize(long nRows, long nCols)
+  {
+    nRows_m = nRows;
+    nCols_m = nCols;
+    if ( nRows*nCols > d_m.size() )
+      {
+        try {
+          d_m.resize(nRows*nCols);
+        } catch (...) {
+          d_m.clear();
+          throw;
+        }
+      }
+  }
+
+  inline T & operator()(long row, long col)
+  { return d_m[nCols_m*row+col]; }
+
+private:
+
+  vector<T> d_m;
+  long nRows_m, nCols_m;
+};
+
+void  Show_Alignment (char A [], long int M, char B [], long int N)
+
+//  Print the alignment between strings  A [1 .. M]  and  B [1 .. N] .
+
+  {
+   static Matrix_t<int> D;
+   static Matrix_t<char> Op;
+   static vector<char> Show_A;
+   static vector<char> Show_B;
+   int  Errors, Tmp;
+   long int  i, j, Ct;
+
+   if  (M >= MAX_ALIGN || N >= MAX_ALIGN)
+       {
+        printf ("\n   *** Too long ***\n\n");
+        fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
+        return;
+       }
+
+   try {
+     D.resize(M+1,N+1);
+     Op.resize(M+1,N+1);
+     Show_A.resize(M+N+2);
+     Show_B.resize(M+N+2);
+   } catch (...) {
+     printf ("\n   *** Too long ***\n\n");
+     fprintf (Gaps_With_Errors_File, "%s %7s\n", Line, "-");
+     return;
+   }
+
+   D (0,0) = 0;
+   Op (0,0) = 'a';
+   for  (i = 1;  i <= M;  i ++)
+     {
+      D (i,0) = i + 1;
+      Op (i,0) = 'i';
+     }
+   for  (j = 1;  j <= N;  j ++)
+     {
+      D (0,j) = j + 1;
+      Op (0,j) = 'd';
+     }
+
+   for  (i = 1;  i <= M;  i ++) // for each row (A)
+     for  (j = 1;  j <= N;  j ++) // for each col (B)
+       {
+        D (i,j) = D (i,j - 1) + 1 + (Op (i,j - 1) == 'd' ? 0 : 1);
+        Op (i,j) = 'd';
+        Tmp = D (i - 1,j) + 1 + (Op (i - 1,j) == 'i' ? 0 : 1);
+        if  (Tmp < D (i,j))
+            {
+             D (i,j) = Tmp;
+             Op (i,j) = 'i';
+            }
+        Tmp = D (i - 1,j - 1) + (A [i] == B [j] ? 0 : 1);
+        if  (Tmp < D (i,j))
+            {
+             D (i,j) = Tmp;
+             Op (i,j) = 'a';
+            }
+       }
+
+   Ct = 0;
+   i = M;
+   j = N;
+   while  (i > 0 || j > 0)
+     switch  (Op (i,j))
+       {
+        case  'a' :                      // align
+          Show_A [Ct] = A [i --];
+          Show_B [Ct ++] = B [j --];
+          break;
+        case  'i' :                      // insert into A
+          Show_A [Ct] = A [i --];
+          Show_B [Ct ++] = '.';
+          break;
+        case  'd' :                      // delete from A
+          Show_A [Ct] = '.';
+          Show_B [Ct ++] = B [j --];
+          break;
+        default :
+          printf (" *** i = %ld   j = %ld   Op (i,j) = %c\n",
+                      i, j, Op (i,j));
+          exit (EXIT_FAILURE);
+       }
+   Errors = 0;
+   for  (i = 0;  i < Ct;  i ++)
+     if  (Show_A [i] != Show_B [i])
+         Errors ++;
+   printf ("    Errors = %d\n", Errors);
+   fprintf (Gaps_With_Errors_File, "%s %7d\n", Line, Errors);
+   do
+     {
+      printf ("T:  ");
+      for  (i = Ct - 1;  i >= 0 && i >= Ct - WIDTH;  i --)
+        putchar (Show_A [i]);
+      putchar ('\n');
+      printf ("S:  ");
+      for  (i = Ct - 1;  i >= 0 && i >= Ct - WIDTH;  i --)
+        putchar (Show_B [i]);
+      putchar ('\n');
+      printf ("    ");
+      for  (i = Ct - 1;  i >= 0 && i >= Ct - WIDTH;  i --)
+        putchar (Show_A [i] == Show_B [i] ? ' ' : '^');
+      putchar ('\n');
+      Ct -= WIDTH;
+     }  while  (Ct > 0);
+   return;
+  }