jpayne@69: /* Programmer: A. Delcher jpayne@69: * File: mgaps.cc jpayne@69: * jpayne@69: * This program reads lists of unique matches between a sequence of strings jpayne@69: * and a reference string. For each string in the sequence, it clusters jpayne@69: * the matches together into groups that may represent longer, inexact jpayne@69: * matches. jpayne@69: */ jpayne@69: jpayne@69: jpayne@69: #include "tigrinc.hh" jpayne@69: jpayne@69: jpayne@69: const int DEFAULT_FIXED_SEPARATION = 5; jpayne@69: const long int DEFAULT_MAX_SEPARATION = 1000; jpayne@69: const long int DEFAULT_MIN_OUTPUT_SCORE = 200; jpayne@69: const double DEFAULT_SEPARATION_FACTOR = 0.05; jpayne@69: jpayne@69: jpayne@69: struct Match_t jpayne@69: { jpayne@69: long int Start1, Start2, Len; jpayne@69: long int Simple_Score; jpayne@69: long int Simple_From; jpayne@69: long int Simple_Adj; jpayne@69: int cluster_id : 30; jpayne@69: unsigned int Good : 1; jpayne@69: unsigned int Tentative : 1; jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: inline jpayne@69: long int Max (long int A, long int B) jpayne@69: jpayne@69: // Return the larger of A and B . jpayne@69: jpayne@69: { jpayne@69: if (A < B) jpayne@69: return B; jpayne@69: else jpayne@69: return A; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: static int Check_Labels = FALSE; jpayne@69: static int Fixed_Separation = DEFAULT_FIXED_SEPARATION; jpayne@69: static long int Max_Separation = DEFAULT_MAX_SEPARATION; jpayne@69: static long int Min_Output_Score = DEFAULT_MIN_OUTPUT_SCORE; jpayne@69: static double Separation_Factor = DEFAULT_SEPARATION_FACTOR; jpayne@69: static int * UF = NULL; jpayne@69: static int Use_Extents = FALSE; jpayne@69: // If TRUE use end minus start as length of cluster instead of jpayne@69: // sum of component lengths jpayne@69: jpayne@69: jpayne@69: static int By_Start2 jpayne@69: (const void * A, const void * B); jpayne@69: static int By_Cluster jpayne@69: (const void * A, const void * B); jpayne@69: static void Filter_Matches jpayne@69: (Match_t * A, int & N); jpayne@69: static int Find jpayne@69: (int a); jpayne@69: static void Parse_Command_Line jpayne@69: (int argc, char * argv []); jpayne@69: static void Process_Matches jpayne@69: (Match_t * A, int N, char * label); jpayne@69: static int Process_Cluster jpayne@69: (Match_t * A, int N, char * label); jpayne@69: static void Union jpayne@69: (int a, int b); jpayne@69: static void Usage jpayne@69: (char * command); jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: int main jpayne@69: (int argc, char * argv []) jpayne@69: jpayne@69: { jpayne@69: Match_t * A = NULL; jpayne@69: char line [MAX_LINE]; jpayne@69: char save [MAX_LINE]; jpayne@69: int first = TRUE; jpayne@69: int header_line_ct = 0; jpayne@69: long int S1, S2, Len; jpayne@69: long int N = 0, Size = 0; jpayne@69: jpayne@69: Parse_Command_Line (argc, argv); jpayne@69: jpayne@69: Size = 500; jpayne@69: A = (Match_t *) Safe_malloc (Size * sizeof (Match_t)); jpayne@69: UF = (int *) Safe_malloc (Size * sizeof (int)); jpayne@69: jpayne@69: while (fgets (line, MAX_LINE, stdin) != NULL) jpayne@69: { jpayne@69: if (line [0] == '>') jpayne@69: { jpayne@69: if (first) jpayne@69: first = FALSE; jpayne@69: else jpayne@69: Process_Matches (A, N, save); jpayne@69: N = 0; jpayne@69: strcpy (save, line); jpayne@69: if (Check_Labels && (++ header_line_ct % 2 == 0)) jpayne@69: assert (strstr (line, "Reverse") != NULL); jpayne@69: } jpayne@69: else if (sscanf (line, "%ld %ld %ld", & S1, & S2, & Len) == 3) jpayne@69: { jpayne@69: if (N >= Size - 1) jpayne@69: { jpayne@69: Size *= 2; jpayne@69: A = (Match_t *) Safe_realloc (A, Size * sizeof (Match_t)); jpayne@69: UF = (int *) Safe_realloc (UF, Size * sizeof (int)); jpayne@69: } jpayne@69: N ++; jpayne@69: A [N] . Start1 = S1; jpayne@69: A [N] . Start2 = S2; jpayne@69: A [N] . Len = Len; jpayne@69: A [N] . Good = FALSE; jpayne@69: A [N] . Tentative = FALSE; jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: Process_Matches (A, N, save); jpayne@69: jpayne@69: jpayne@69: #if 0 jpayne@69: printf ("> Other matches\n"); jpayne@69: Prev = -1; jpayne@69: for (i = 0; i < N; i ++) jpayne@69: if (! A [i] . Good) jpayne@69: { jpayne@69: if (Prev == -1) jpayne@69: printf ("%8ld %8ld %6ld %7s %6s %6s\n", jpayne@69: A [i] . Start1, A [i] . Start2, A [i] . Len, jpayne@69: "none", "-", "-"); jpayne@69: else jpayne@69: { jpayne@69: if (A [i] . Simple_From == Prev) jpayne@69: Adj = A [i] . Simple_Adj; jpayne@69: else jpayne@69: Adj = 0; jpayne@69: printf ("%8ld %8ld %6ld", jpayne@69: A [i] . Start1 + Adj, A [i] . Start2 + Adj, jpayne@69: A [i] . Len - Adj); jpayne@69: if (Adj == 0) jpayne@69: printf (" %7s", "none"); jpayne@69: else jpayne@69: printf (" %7ld", - Adj); jpayne@69: if (A [i] . Simple_From == Prev) jpayne@69: printf (" %6ld %6ld\n", jpayne@69: A [i] . Start1 + Adj - A [Prev] . Start1 - A [Prev] . Len, jpayne@69: A [i] . Start2 + Adj - A [Prev] . Start2 - A [Prev] . Len); jpayne@69: else jpayne@69: printf (" %6s %6s\n", "-", "-"); jpayne@69: } jpayne@69: Prev = i; jpayne@69: } jpayne@69: #endif jpayne@69: jpayne@69: return 0; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static int By_Start2 jpayne@69: (const void * A, const void * B) jpayne@69: jpayne@69: // Return how A and B compare if converted to Match_t jpayne@69: // based on Start2 value. If Start2 values are equal use jpayne@69: // Start1 values for comparison. jpayne@69: jpayne@69: { jpayne@69: Match_t * x, * y; jpayne@69: jpayne@69: x = (Match_t *) A; jpayne@69: y = (Match_t *) B; jpayne@69: jpayne@69: if (x -> Start2 < y -> Start2) jpayne@69: return -1; jpayne@69: else if (x -> Start2 > y -> Start2) jpayne@69: return 1; jpayne@69: else if (x -> Start1 < y -> Start1) jpayne@69: return -1; jpayne@69: else if (x -> Start1 > y -> Start1) jpayne@69: return 1; jpayne@69: else jpayne@69: return 0; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static int By_Cluster jpayne@69: (const void * A, const void * B) jpayne@69: jpayne@69: // Return how A and B compare if converted to Match_t jpayne@69: // first based on cluster_id value, then by Start2 value, jpayne@69: // then by Start1 value. jpayne@69: jpayne@69: { jpayne@69: Match_t * x, * y; jpayne@69: jpayne@69: x = (Match_t *) A; jpayne@69: y = (Match_t *) B; jpayne@69: jpayne@69: if (x -> cluster_id < y -> cluster_id) jpayne@69: return -1; jpayne@69: else if (x -> cluster_id > y -> cluster_id) jpayne@69: return 1; jpayne@69: else if (x -> Start2 < y -> Start2) jpayne@69: return -1; jpayne@69: else if (x -> Start2 > y -> Start2) jpayne@69: return 1; jpayne@69: else if (x -> Start1 < y -> Start1) jpayne@69: return -1; jpayne@69: else if (x -> Start1 > y -> Start1) jpayne@69: return 1; jpayne@69: else jpayne@69: return 0; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static void Filter_Matches jpayne@69: (Match_t * A, int & N) jpayne@69: jpayne@69: // Remove from A [0 .. (N - 1)] any matches that are internal to a repeat, jpayne@69: // e.g., if seq1 has 27 As and seq2 has 20 then the first and jpayne@69: // last matches will be kept, but the 6 matches in the middle will jpayne@69: // be eliminated. Also combine overlapping matches on the same jpayne@69: // diagonal. Pack all remaining matches into the front of A and jpayne@69: // reduce the value of N if any matches are removed. jpayne@69: // Matches in A *MUST* be sorted by Start2 value. jpayne@69: jpayne@69: { jpayne@69: int i, j; jpayne@69: jpayne@69: for (i = 0; i < N; i ++) jpayne@69: A [i] . Good = TRUE; jpayne@69: jpayne@69: for (i = 0; i < N - 1; i ++) jpayne@69: { jpayne@69: int i_diag, i_end; jpayne@69: jpayne@69: if (! A [i] . Good) jpayne@69: continue; jpayne@69: jpayne@69: i_diag = A [i] . Start2 - A [i] . Start1; jpayne@69: i_end = A [i] . Start2 + A [i] . Len; jpayne@69: jpayne@69: for (j = i + 1; j < N && A [j] . Start2 <= i_end; j ++) jpayne@69: { jpayne@69: int olap; jpayne@69: int j_diag; jpayne@69: jpayne@69: assert (A [i] . Start2 <= A [j] . Start2); jpayne@69: jpayne@69: if (! A [j] . Good) jpayne@69: continue; jpayne@69: jpayne@69: j_diag = A [j] . Start2 - A [j] . Start1; jpayne@69: if (i_diag == j_diag) jpayne@69: { jpayne@69: int j_extent; jpayne@69: jpayne@69: j_extent = A [j] . Len + A [j] . Start2 - A [i] . Start2; jpayne@69: if (j_extent > A [i] . Len) jpayne@69: { jpayne@69: A [i] . Len = j_extent; jpayne@69: i_end = A [i] . Start2 + j_extent; jpayne@69: } jpayne@69: A [j] . Good = FALSE; jpayne@69: } jpayne@69: else if (A [i] . Start1 == A [j] . Start1) jpayne@69: { jpayne@69: olap = A [i] . Start2 + A [i] . Len - A [j] . Start2; jpayne@69: if (A [i] . Len < A [j] . Len) jpayne@69: { jpayne@69: if (olap >= A [i] . Len / 2) jpayne@69: { jpayne@69: A [i] . Good = FALSE; jpayne@69: break; jpayne@69: } jpayne@69: } jpayne@69: else if (A [j] . Len < A [i] . Len) jpayne@69: { jpayne@69: if (olap >= A [j] . Len / 2) jpayne@69: { jpayne@69: A [j] . Good = FALSE; jpayne@69: } jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: if (olap >= A [i] . Len / 2) jpayne@69: { jpayne@69: A [j] . Tentative = TRUE; jpayne@69: if (A [i] . Tentative) jpayne@69: { jpayne@69: A [i] . Good = FALSE; jpayne@69: break; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: else if (A [i] . Start2 == A [j] . Start2) jpayne@69: { jpayne@69: olap = A [i] . Start1 + A [i] . Len - A [j] . Start1; jpayne@69: if (A [i] . Len < A [j] . Len) jpayne@69: { jpayne@69: if (olap >= A [i] . Len / 2) jpayne@69: { jpayne@69: A [i] . Good = FALSE; jpayne@69: break; jpayne@69: } jpayne@69: } jpayne@69: else if (A [j] . Len < A [i] . Len) jpayne@69: { jpayne@69: if (olap >= A [j] . Len / 2) jpayne@69: { jpayne@69: A [j] . Good = FALSE; jpayne@69: } jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: if (olap >= A [i] . Len / 2) jpayne@69: { jpayne@69: A [j] . Tentative = TRUE; jpayne@69: if (A [i] . Tentative) jpayne@69: { jpayne@69: A [i] . Good = FALSE; jpayne@69: break; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: for (i = j = 0; i < N; i ++) jpayne@69: if (A [i] . Good) jpayne@69: { jpayne@69: if (i != j) jpayne@69: A [j] = A [i]; jpayne@69: j ++; jpayne@69: } jpayne@69: N = j; jpayne@69: jpayne@69: for (i = 0; i < N; i ++) jpayne@69: A [i] . Good = FALSE; jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static int Find jpayne@69: (int a) jpayne@69: jpayne@69: // Return the id of the set containing a in UF . jpayne@69: jpayne@69: { jpayne@69: int i, j, k; jpayne@69: jpayne@69: if (UF [a] < 0) jpayne@69: return a; jpayne@69: jpayne@69: for (i = a; UF [i] > 0; i = UF [i]) jpayne@69: ; jpayne@69: for (j = a; UF [j] != i; j = k) jpayne@69: { jpayne@69: k = UF [j]; jpayne@69: UF [j] = i; jpayne@69: } jpayne@69: jpayne@69: return i; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static void Parse_Command_Line jpayne@69: (int argc, char * argv []) jpayne@69: jpayne@69: // Get options and parameters from command line with argc jpayne@69: // arguments in argv [0 .. (argc - 1)] . jpayne@69: jpayne@69: { jpayne@69: int ch, errflg = FALSE; jpayne@69: char * p; jpayne@69: jpayne@69: optarg = NULL; jpayne@69: jpayne@69: while (! errflg jpayne@69: && ((ch = getopt (argc, argv, "Cd:ef:l:s:")) != EOF)) jpayne@69: switch (ch) jpayne@69: { jpayne@69: case 'C' : jpayne@69: Check_Labels = TRUE; jpayne@69: break; jpayne@69: jpayne@69: case 'd' : jpayne@69: Fixed_Separation = strtol (optarg, & p, 10); jpayne@69: break; jpayne@69: jpayne@69: case 'e' : jpayne@69: Use_Extents = TRUE; jpayne@69: break; jpayne@69: jpayne@69: case 'f' : jpayne@69: Separation_Factor = strtod (optarg, & p); jpayne@69: break; jpayne@69: jpayne@69: case 'l' : jpayne@69: Min_Output_Score = strtol (optarg, & p, 10); jpayne@69: break; jpayne@69: jpayne@69: case 's' : jpayne@69: Max_Separation = strtol (optarg, & p, 10); jpayne@69: break; jpayne@69: jpayne@69: case '?' : jpayne@69: fprintf (stderr, "Unrecognized option -%c\n", optopt); jpayne@69: jpayne@69: default : jpayne@69: errflg = TRUE; jpayne@69: } jpayne@69: jpayne@69: if (errflg || optind != argc) jpayne@69: { jpayne@69: Usage (argv [0]); jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static int Process_Cluster jpayne@69: (Match_t * A, int N, char * label) jpayne@69: jpayne@69: // Process the cluster of matches in A [0 .. (N - 1)] and output them jpayne@69: // after a line containing label . Return the number of clusters jpayne@69: // printed. jpayne@69: jpayne@69: { jpayne@69: long int adj, total, hi, lo, extent, score; jpayne@69: int best, prev; jpayne@69: int print_ct = 0; jpayne@69: int i, j, k; jpayne@69: jpayne@69: do jpayne@69: { jpayne@69: for (i = 0; i < N; i ++) jpayne@69: { jpayne@69: A [i] . Simple_Score = A [i] . Len; jpayne@69: A [i] . Simple_Adj = 0; jpayne@69: A [i] . Simple_From = -1; jpayne@69: for (j = 0; j < i; j ++) jpayne@69: { jpayne@69: long int Pen; jpayne@69: long int Olap, Olap1, Olap2; jpayne@69: jpayne@69: Olap1 = A [j] . Start1 + A [j] . Len - A [i] . Start1; jpayne@69: Olap = Max (0, Olap1); jpayne@69: Olap2 = A [j] . Start2 + A [j] . Len - A [i] . Start2; jpayne@69: Olap = Max (Olap, Olap2); jpayne@69: jpayne@69: // penalize off diagonal matches jpayne@69: Pen = Olap + abs ( (A [i] . Start2 - A [i] . Start1) - jpayne@69: (A [j] . Start2 - A [j] . Start1) ); jpayne@69: jpayne@69: if (A [j] . Simple_Score + A [i] . Len - Pen > A [i] . Simple_Score) jpayne@69: { jpayne@69: A [i] . Simple_From = j; jpayne@69: A [i] . Simple_Score = A [j] . Simple_Score + A [i] . Len - Pen; jpayne@69: A [i] . Simple_Adj = Olap; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: best = 0; jpayne@69: for (i = 1; i < N; i ++) jpayne@69: if (A [i] . Simple_Score > A [best] . Simple_Score) jpayne@69: best = i; jpayne@69: total = 0; jpayne@69: hi = LONG_MIN; jpayne@69: lo = LONG_MAX; jpayne@69: for (i = best; i >= 0; i = A [i] . Simple_From) jpayne@69: { jpayne@69: A [i] . Good = TRUE; jpayne@69: total += A [i] . Len; jpayne@69: if (A [i] . Start1 + A [i] . Len > hi) jpayne@69: hi = A [i] . Start1 + A [i] . Len; jpayne@69: if (A [i] . Start1 < lo) jpayne@69: lo = A [i] . Start1; jpayne@69: } jpayne@69: extent = hi - lo; jpayne@69: jpayne@69: if (Use_Extents) jpayne@69: score = extent; jpayne@69: else jpayne@69: score = total; jpayne@69: if (score >= Min_Output_Score) jpayne@69: { jpayne@69: print_ct ++; jpayne@69: fputs (label, stdout); jpayne@69: prev = -1; jpayne@69: for (i = 0; i < N; i ++) jpayne@69: if (A [i] . Good) jpayne@69: { jpayne@69: if (prev == -1) jpayne@69: printf ("%8ld %8ld %6ld %7s %6s %6s\n", jpayne@69: A [i] . Start1, A [i] . Start2, A [i] . Len, jpayne@69: "none", "-", "-"); jpayne@69: else jpayne@69: { jpayne@69: adj = A [i] . Simple_Adj; jpayne@69: printf ("%8ld %8ld %6ld", jpayne@69: A [i] . Start1 + adj, A [i] . Start2 + adj, jpayne@69: A [i] . Len - adj); jpayne@69: if (adj == 0) jpayne@69: printf (" %7s", "none"); jpayne@69: else jpayne@69: printf (" %7ld", - adj); jpayne@69: printf (" %6ld %6ld\n", jpayne@69: A [i] . Start1 + adj - A [prev] . Start1 - A [prev] . Len, jpayne@69: A [i] . Start2 + adj - A [prev] . Start2 - A [prev] . Len); jpayne@69: } jpayne@69: prev = i; jpayne@69: } jpayne@69: label = "#\n"; jpayne@69: } jpayne@69: jpayne@69: for (i = k = 0; i < N; i ++) jpayne@69: if (! A [i] . Good) jpayne@69: { jpayne@69: if (i != k) jpayne@69: { jpayne@69: A [k] = A [i]; jpayne@69: } jpayne@69: k ++; jpayne@69: } jpayne@69: N = k; jpayne@69: } while (N > 0); jpayne@69: jpayne@69: return print_ct; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: static void Process_Matches jpayne@69: (Match_t * A, int N, char * label) jpayne@69: jpayne@69: // Process matches A [1 .. N] and output them after jpayne@69: // a line containing label . jpayne@69: jpayne@69: { jpayne@69: long int cluster_size, sep; jpayne@69: int print_ct = 0; jpayne@69: int a, b, i, j; jpayne@69: jpayne@69: if (N <= 0) jpayne@69: { jpayne@69: fputs (label, stdout); jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: // Use Union-Find to create connected-components based on jpayne@69: // separation and similar diagonals between matches jpayne@69: jpayne@69: for (i = 1; i <= N; i ++) jpayne@69: UF [i] = -1; jpayne@69: jpayne@69: qsort (A + 1, N, sizeof (Match_t), By_Start2); jpayne@69: jpayne@69: Filter_Matches (A + 1, N); jpayne@69: jpayne@69: for (i = 1; i < N; i ++) jpayne@69: { jpayne@69: long int i_end = A [i] . Start2 + A [i] . Len; jpayne@69: long int i_diag = A [i] . Start2 - A [i] . Start1; jpayne@69: jpayne@69: for (j = i + 1; j <= N; j ++) jpayne@69: { jpayne@69: long int diag_diff; jpayne@69: jpayne@69: sep = A [j] . Start2 - i_end; jpayne@69: if (sep > Max_Separation) jpayne@69: break; jpayne@69: jpayne@69: diag_diff = abs ((A [j] . Start2 - A [j] . Start1) - i_diag); jpayne@69: if (diag_diff <= Max (Fixed_Separation, long (Separation_Factor * sep))) jpayne@69: { jpayne@69: a = Find (i); jpayne@69: b = Find (j); jpayne@69: if (a != b) jpayne@69: Union (a, b); jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: // Set the cluster id of each match jpayne@69: jpayne@69: for (i = 1; i <= N; i ++) jpayne@69: A [i] . cluster_id = Find (i); jpayne@69: jpayne@69: qsort (A + 1, N, sizeof (Match_t), By_Cluster); jpayne@69: jpayne@69: for (i = 1; i <= N; i += cluster_size) jpayne@69: { jpayne@69: jpayne@69: for (j = i + 1; j <= N && A [i] . cluster_id == A [j] . cluster_id; j ++) jpayne@69: ; jpayne@69: cluster_size = j - i; jpayne@69: print_ct += Process_Cluster (A + i, cluster_size, label); jpayne@69: if (print_ct > 0) jpayne@69: label = "#\n"; jpayne@69: } jpayne@69: jpayne@69: #if 0 jpayne@69: // Find the largest cluster jpayne@69: jpayne@69: j = 1; jpayne@69: for (i = 2; i <= N; i ++) jpayne@69: if (UF [i] < UF [j]) jpayne@69: j = i; jpayne@69: jpayne@69: // j is now the largest cluster jpayne@69: jpayne@69: cluster_size = - UF [j]; jpayne@69: jpayne@69: for (i = k = 1; i <= N; i ++) jpayne@69: if (Find (i) == j) jpayne@69: { jpayne@69: if (i != k) jpayne@69: { jpayne@69: Match_t tmp = A [i]; jpayne@69: jpayne@69: A [i] = A [k]; jpayne@69: A [k] = tmp; jpayne@69: } jpayne@69: k ++; jpayne@69: } jpayne@69: jpayne@69: assert (cluster_size == k - 1); jpayne@69: #endif jpayne@69: jpayne@69: if (print_ct == 0) jpayne@69: fputs (label, stdout); jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static void Union jpayne@69: (int a, int b) jpayne@69: jpayne@69: // Union the sets whose id's are a and b in UF . jpayne@69: jpayne@69: { jpayne@69: assert (UF [a] < 0 && UF [b] < 0); jpayne@69: jpayne@69: if (UF [a] < UF [b]) jpayne@69: { jpayne@69: UF [a] += UF [b]; jpayne@69: UF [b] = a; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: UF [b] += UF [a]; jpayne@69: UF [a] = b; jpayne@69: } jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: static void Usage jpayne@69: (char * command) jpayne@69: jpayne@69: // Print to stderr description of options and command line for jpayne@69: // this program. command is the command that was used to jpayne@69: // invoke it. jpayne@69: jpayne@69: { jpayne@69: fprintf (stderr, jpayne@69: "USAGE: %s [-d ] [-f ] [-l ]\n" jpayne@69: " [-s ]\n" jpayne@69: "\n" jpayne@69: "Clusters MUMs based on diagonals and separation.\n" jpayne@69: "Input is from stdin in format produced by mummer.\n" jpayne@69: "Ouput goes to stdout.\n" jpayne@69: "\n" jpayne@69: "Options:\n" jpayne@69: "-C Check that fasta header labels alternately have \"Reverse\"\n" jpayne@69: "-d num Fixed diagonal difference to join matches\n" jpayne@69: "-e Use extent of match (end - start) rather than sum of piece\n" jpayne@69: " lengths to determine length of cluster\n" jpayne@69: "-f num Fraction of separation for diagonal difference\n" jpayne@69: "-l num Minimum length of cluster match\n" jpayne@69: "-s num Maximum separation between matches in cluster\n", jpayne@69: command); jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: