jpayne@69: //------------------------------------------------------------------------------ jpayne@69: // Programmer: Adam M Phillippy, The Institute for Genomic Research jpayne@69: // File: show-snps.cc jpayne@69: // Date: 12 / 08 / 2004 jpayne@69: // jpayne@69: // Usage: show-snps [options] jpayne@69: // Try 'show-snps -h' for more information jpayne@69: // jpayne@69: // Description: For use in conjunction with the MUMmer package. "show-snps" jpayne@69: // displays human readable (and machine parse-able) single jpayne@69: // base-pair polymorphisms, including indels from the .delta output jpayne@69: // of the "nucmer" program. Outputs SNP positions and relative jpayne@69: // information to stdout. jpayne@69: // jpayne@69: //------------------------------------------------------------------------------ jpayne@69: jpayne@69: #include "delta.hh" jpayne@69: #include "tigrinc.hh" jpayne@69: #include "translate.hh" jpayne@69: #include "sw_alignscore.hh" jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: using namespace std; jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //=============================================================== Options ====// jpayne@69: string OPT_AlignName; // delta file name jpayne@69: string OPT_ReferenceName; // reference sequence file name jpayne@69: string OPT_QueryName; // query sequence file name jpayne@69: jpayne@69: bool OPT_SortReference = false; // -r option jpayne@69: bool OPT_SortQuery = false; // -q option jpayne@69: bool OPT_ShowLength = false; // -l option jpayne@69: bool OPT_ShowConflict = true; // -C option jpayne@69: bool OPT_ShowIndels = true; // -I option jpayne@69: bool OPT_PrintTabular = false; // -T option jpayne@69: bool OPT_PrintHeader = true; // -H option jpayne@69: bool OPT_SelectAligns = false; // -S option jpayne@69: jpayne@69: int OPT_Context = 0; // -x option jpayne@69: jpayne@69: set OPT_Aligns; // -S option jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //============================================================= Constants ====// jpayne@69: const char INDEL_CHAR = '.'; jpayne@69: const char SEQEND_CHAR = '-'; jpayne@69: jpayne@69: jpayne@69: jpayne@69: struct SNP_R_Sort jpayne@69: { jpayne@69: bool operator() (const SNP_t * a, const SNP_t * b) jpayne@69: { jpayne@69: int i = a->ep->refnode->id->compare (*(b->ep->refnode->id)); jpayne@69: jpayne@69: if ( i < 0 ) jpayne@69: return true; jpayne@69: else if ( i > 0 ) jpayne@69: return false; jpayne@69: else jpayne@69: { jpayne@69: if ( a -> pR < b -> pR ) jpayne@69: return true; jpayne@69: else if ( a -> pR > b -> pR ) jpayne@69: return false; jpayne@69: else jpayne@69: { jpayne@69: int j = a->ep->qrynode->id->compare (*(b->ep->qrynode->id)); jpayne@69: jpayne@69: if ( j < 0 ) jpayne@69: return true; jpayne@69: else if ( j > 0 ) jpayne@69: return false; jpayne@69: else jpayne@69: { jpayne@69: if ( a -> pQ < b -> pQ ) jpayne@69: return true; jpayne@69: else jpayne@69: return false; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: struct SNP_Q_Sort jpayne@69: { jpayne@69: bool operator() (const SNP_t * a, const SNP_t * b) jpayne@69: { jpayne@69: int i = a->ep->qrynode->id->compare (*(b->ep->qrynode->id)); jpayne@69: jpayne@69: if ( i < 0 ) jpayne@69: return true; jpayne@69: else if ( i > 0 ) jpayne@69: return false; jpayne@69: else jpayne@69: { jpayne@69: if ( a -> pQ < b -> pQ ) jpayne@69: return true; jpayne@69: else if ( a -> pQ > b -> pQ ) jpayne@69: return false; jpayne@69: else jpayne@69: { jpayne@69: int j = a->ep->refnode->id->compare (*(b->ep->refnode->id)); jpayne@69: jpayne@69: if ( j < 0 ) jpayne@69: return true; jpayne@69: else if ( j > 0 ) jpayne@69: return false; jpayne@69: else jpayne@69: { jpayne@69: if ( a -> pR < b -> pR ) jpayne@69: return true; jpayne@69: else jpayne@69: return false; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //========================================================== Fuction Decs ====// jpayne@69: //------------------------------------------------------------------ RevC ----// jpayne@69: inline long RevC (long coord, long len) jpayne@69: { jpayne@69: return len - coord + 1; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------------ Norm ----// jpayne@69: inline long Norm (long c, long l, int f, AlignmentType_t d) jpayne@69: { jpayne@69: long retval = (d == PROMER_DATA ? c * 3 - (3 - abs(f)) : c); jpayne@69: if ( f < 0 ) retval = RevC (retval, l); jpayne@69: return retval; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------------ Swap ----// jpayne@69: inline void Swap (long & a, long & b) jpayne@69: { jpayne@69: long t = a; a = b; b = t; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------- CheckSNPs ----// jpayne@69: void CheckSNPs (DeltaGraph_t & graph); jpayne@69: jpayne@69: jpayne@69: //-------------------------------------------------------------- FindSNPs ----// jpayne@69: void FindSNPs (DeltaGraph_t & graph); jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------ PrintHuman ----// jpayne@69: void PrintHuman (const vector & snps, jpayne@69: const DeltaGraph_t & graph); jpayne@69: jpayne@69: jpayne@69: //---------------------------------------------------------- PrintTabular ----// jpayne@69: void PrintTabular (const vector & snps, jpayne@69: const DeltaGraph_t & graph); jpayne@69: jpayne@69: jpayne@69: //---------------------------------------------------------- SelectAligns ----// jpayne@69: void SelectAligns ( ); jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------- ParseArgs ----// jpayne@69: void ParseArgs (int argc, char ** argv); jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------- PrintHelp ----// jpayne@69: void PrintHelp (const char * s); jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------ PrintUsage ----// jpayne@69: void PrintUsage (const char * s); jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //========================================================= Function Defs ====// jpayne@69: int main (int argc, char ** argv) jpayne@69: { jpayne@69: vector snps; jpayne@69: DeltaGraph_t graph; jpayne@69: jpayne@69: jpayne@69: //-- Command line parsing jpayne@69: ParseArgs (argc, argv); jpayne@69: jpayne@69: //-- Select alignments jpayne@69: if ( OPT_SelectAligns ) jpayne@69: SelectAligns ( ); jpayne@69: jpayne@69: //-- Build the alignment graph from the delta file jpayne@69: graph . build (OPT_AlignName, true); jpayne@69: jpayne@69: //-- Read sequences jpayne@69: graph . loadSequences ( ); jpayne@69: jpayne@69: //-- Locate the SNPs jpayne@69: FindSNPs (graph); jpayne@69: jpayne@69: //-- Check for ambiguous alignment regions jpayne@69: CheckSNPs (graph); jpayne@69: jpayne@69: jpayne@69: //-- Collect and sort the SNPs jpayne@69: map::iterator mi; jpayne@69: vector::iterator ei; jpayne@69: vector::iterator li; jpayne@69: vector::iterator si; jpayne@69: for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi ) jpayne@69: for ( ei = mi->second.edges.begin( ); ei != mi->second.edges.end( ); ++ ei ) jpayne@69: for ( li = (*ei)->edgelets.begin( ); li != (*ei)->edgelets.end( ); ++ li ) jpayne@69: for ( si = (*li)->snps.begin( ); si != (*li)->snps.end( ); ++ si ) jpayne@69: if ( (OPT_ShowConflict || jpayne@69: ((*si)->conR == 0 && (*si)->conQ == 0)) jpayne@69: && jpayne@69: (OPT_ShowIndels || jpayne@69: ((*si)->cR != INDEL_CHAR && (*si)->cQ != INDEL_CHAR)) ) jpayne@69: snps . push_back (*si); jpayne@69: jpayne@69: if ( OPT_SortReference ) jpayne@69: sort (snps . begin( ), snps . end( ), SNP_R_Sort( )); jpayne@69: else jpayne@69: sort (snps . begin( ), snps . end( ), SNP_Q_Sort( )); jpayne@69: jpayne@69: jpayne@69: //-- Output data to stdout jpayne@69: if ( OPT_PrintTabular ) jpayne@69: PrintTabular (snps, graph); jpayne@69: else jpayne@69: PrintHuman (snps, graph); jpayne@69: jpayne@69: jpayne@69: return EXIT_SUCCESS; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------- CheckSNPs ----// jpayne@69: void CheckSNPs (DeltaGraph_t & graph) jpayne@69: { jpayne@69: map::const_iterator mi; jpayne@69: vector::const_iterator ei; jpayne@69: vector::iterator eli; jpayne@69: vector::iterator si; jpayne@69: long i; jpayne@69: jpayne@69: //-- For each reference sequence jpayne@69: long ref_size = 0; jpayne@69: long ref_len = 0; jpayne@69: unsigned char * ref_cov = NULL; jpayne@69: for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi ) jpayne@69: { jpayne@69: //-- Reset the reference coverage array jpayne@69: ref_len = (mi -> second) . len; jpayne@69: if ( ref_len > ref_size ) jpayne@69: { jpayne@69: ref_cov = (unsigned char *) Safe_realloc (ref_cov, ref_len + 1); jpayne@69: ref_size = ref_len; jpayne@69: } jpayne@69: for ( i = 1; i <= ref_len; ++ i ) jpayne@69: ref_cov[i] = 0; jpayne@69: jpayne@69: //-- Add to the reference coverage jpayne@69: for ( ei = (mi -> second) . edges . begin( ); jpayne@69: ei != (mi -> second) . edges . end( ); ++ ei ) jpayne@69: for ( eli = (*ei) -> edgelets . begin( ); jpayne@69: eli != (*ei) -> edgelets . end( ); ++ eli ) jpayne@69: for ( i = (*eli) -> loR; i <= (*eli) -> hiR; i ++ ) jpayne@69: if ( ref_cov [i] < UCHAR_MAX ) jpayne@69: ref_cov [i] ++; jpayne@69: jpayne@69: //-- Set the SNP conflict counter jpayne@69: for ( ei = (mi -> second) . edges . begin( ); jpayne@69: ei != (mi -> second) . edges . end( ); ++ ei ) jpayne@69: for ( eli = (*ei) -> edgelets . begin( ); jpayne@69: eli != (*ei) -> edgelets . end( ); ++ eli ) jpayne@69: for ( si = (*eli)->snps.begin( ); si != (*eli)->snps.end( ); ++ si ) jpayne@69: (*si) -> conR = ref_cov [(*si)->pR] - 1; jpayne@69: } jpayne@69: free (ref_cov); jpayne@69: jpayne@69: jpayne@69: //-- For each query sequence jpayne@69: long qry_size = 0; jpayne@69: long qry_len = 0; jpayne@69: unsigned char * qry_cov = NULL; jpayne@69: for ( mi = graph.qrynodes.begin( ); mi != graph.qrynodes.end( ); ++ mi ) jpayne@69: { jpayne@69: //-- Reset the query coverage array jpayne@69: qry_len = (mi -> second) . len; jpayne@69: if ( qry_len > qry_size ) jpayne@69: { jpayne@69: qry_cov = (unsigned char *) Safe_realloc (qry_cov, qry_len + 1); jpayne@69: qry_size = qry_len; jpayne@69: } jpayne@69: for ( i = 1; i <= qry_len; ++ i ) jpayne@69: qry_cov[i] = 0; jpayne@69: jpayne@69: //-- Add to the query coverage jpayne@69: for ( ei = (mi -> second) . edges . begin( ); jpayne@69: ei != (mi -> second) . edges . end( ); ++ ei ) jpayne@69: for ( eli = (*ei) -> edgelets . begin( ); jpayne@69: eli != (*ei) -> edgelets . end( ); ++ eli ) jpayne@69: for ( i = (*eli) -> loQ; i <= (*eli) -> hiQ; i ++ ) jpayne@69: if ( qry_cov [i] < UCHAR_MAX ) jpayne@69: qry_cov [i] ++; jpayne@69: jpayne@69: //-- Set the SNP conflict counter jpayne@69: for ( ei = (mi -> second) . edges . begin( ); jpayne@69: ei != (mi -> second) . edges . end( ); ++ ei ) jpayne@69: for ( eli = (*ei) -> edgelets . begin( ); jpayne@69: eli != (*ei) -> edgelets . end( ); ++ eli ) jpayne@69: for ( si = (*eli)->snps.begin( ); si != (*eli)->snps.end( ); ++ si ) jpayne@69: (*si) -> conQ = qry_cov [(*si)->pQ] - 1; jpayne@69: } jpayne@69: free (qry_cov); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //-------------------------------------------------------------- FindSNPs ----// jpayne@69: void FindSNPs (DeltaGraph_t & graph) jpayne@69: { jpayne@69: map::iterator mi; jpayne@69: vector::iterator ei; jpayne@69: vector::iterator li; jpayne@69: vector::iterator si, psi, nsi; jpayne@69: jpayne@69: //-- For each alignment, identify the SNPs jpayne@69: for ( mi = graph.refnodes.begin( ); mi != graph.refnodes.end( ); ++ mi ) jpayne@69: for ( ei = mi->second.edges.begin( ); ei != mi->second.edges.end( ); ++ ei ) jpayne@69: { jpayne@69: SNP_t * snp; jpayne@69: int ri, qi; jpayne@69: char * R[] = {(*ei)->refnode->seq, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: char * Q[] = {(*ei)->qrynode->seq, NULL, NULL, NULL, NULL, NULL, NULL}; jpayne@69: jpayne@69: long i; jpayne@69: long lenR = (*ei) -> refnode -> len; jpayne@69: long lenQ = (*ei) -> qrynode -> len; jpayne@69: jpayne@69: for (li = (*ei)->edgelets.begin( ); li != (*ei)->edgelets.end( ); ++ li) jpayne@69: { jpayne@69: long delta; jpayne@69: int frameR, frameQ, sign; jpayne@69: long sR, eR, sQ, eQ; jpayne@69: long rpos, qpos, remain; jpayne@69: long rctx, qctx; jpayne@69: long alenR = lenR; jpayne@69: long alenQ = lenQ; jpayne@69: jpayne@69: //-- Only do the ones requested by user jpayne@69: if ( OPT_SelectAligns ) jpayne@69: { jpayne@69: ostringstream ss; jpayne@69: set::iterator si; jpayne@69: jpayne@69: if ( (*li) -> dirR == FORWARD_DIR ) jpayne@69: ss << (*li) -> loR << ' ' << (*li) -> hiR << ' '; jpayne@69: else jpayne@69: ss << (*li) -> hiR << ' ' << (*li) -> loR << ' '; jpayne@69: jpayne@69: if ( (*li) -> dirQ == FORWARD_DIR ) jpayne@69: ss << (*li) -> loQ << ' ' << (*li) -> hiQ << ' '; jpayne@69: else jpayne@69: ss << (*li) -> hiQ << ' ' << (*li) -> loQ << ' '; jpayne@69: jpayne@69: ss << *((*ei)->refnode->id) << ' ' << *((*ei)->qrynode->id); jpayne@69: jpayne@69: si = OPT_Aligns . find (ss .str( )); jpayne@69: if ( si == OPT_Aligns . end( ) ) jpayne@69: continue; jpayne@69: else jpayne@69: OPT_Aligns . erase (si); jpayne@69: } jpayne@69: jpayne@69: //-- Point the coords the right direction jpayne@69: frameR = 1; jpayne@69: if ( (*li) -> dirR == REVERSE_DIR ) jpayne@69: { jpayne@69: sR = RevC ((*li) -> hiR, lenR); jpayne@69: eR = RevC ((*li) -> loR, lenR); jpayne@69: frameR += 3; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: sR = (*li) -> loR; jpayne@69: eR = (*li) -> hiR; jpayne@69: } jpayne@69: jpayne@69: frameQ = 1; jpayne@69: if ( (*li) -> dirQ == REVERSE_DIR ) jpayne@69: { jpayne@69: sQ = RevC ((*li) -> hiQ, lenQ); jpayne@69: eQ = RevC ((*li) -> loQ, lenQ); jpayne@69: frameQ += 3; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: sQ = (*li) -> loQ; jpayne@69: eQ = (*li) -> hiQ; jpayne@69: } jpayne@69: jpayne@69: //-- Translate coords to AA if necessary jpayne@69: if ( graph . datatype == PROMER_DATA ) jpayne@69: { jpayne@69: alenR /= 3; jpayne@69: alenQ /= 3; jpayne@69: jpayne@69: frameR += (sR + 2) % 3; jpayne@69: frameQ += (sQ + 2) % 3; jpayne@69: jpayne@69: // remeber that eR and eQ point to the last base in the codon jpayne@69: sR = (sR + 2) / 3; jpayne@69: eR = eR / 3; jpayne@69: sQ = (sQ + 2) / 3; jpayne@69: eQ = eQ / 3; jpayne@69: } jpayne@69: jpayne@69: ri = frameR; jpayne@69: qi = frameQ; jpayne@69: jpayne@69: if ( frameR > 3 ) jpayne@69: frameR = -(frameR - 3); jpayne@69: if ( frameQ > 3 ) jpayne@69: frameQ = -(frameQ - 3); jpayne@69: jpayne@69: //-- Generate the sequences if needed jpayne@69: if ( R [ri] == NULL ) jpayne@69: { jpayne@69: if ( graph . datatype == PROMER_DATA ) jpayne@69: { jpayne@69: R [ri] = (char *) Safe_malloc (alenR + 2); jpayne@69: R [ri][0] = '\0'; jpayne@69: Translate_DNA (R [0], R [ri], ri); jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: R [ri] = (char *) Safe_malloc (alenR + 2); jpayne@69: R [ri][0] = '\0'; jpayne@69: strcpy (R [ri] + 1, R [0] + 1); jpayne@69: if ( (*li) -> dirR == REVERSE_DIR ) jpayne@69: Reverse_Complement (R [ri], 1, lenR); jpayne@69: } jpayne@69: } jpayne@69: if ( Q [qi] == NULL ) jpayne@69: { jpayne@69: if ( graph . datatype == PROMER_DATA ) jpayne@69: { jpayne@69: Q [qi] = (char *) Safe_malloc (alenQ + 2); jpayne@69: Q [qi][0] = '\0'; jpayne@69: Translate_DNA (Q [0], Q [qi], qi); jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: Q [qi] = (char *) Safe_malloc (alenQ + 2); jpayne@69: Q [qi][0] = '\0'; jpayne@69: strcpy (Q [qi] + 1, Q [0] + 1); jpayne@69: if ( (*li) -> dirQ == REVERSE_DIR ) jpayne@69: Reverse_Complement (Q [qi], 1, lenQ); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- Locate the SNPs jpayne@69: rpos = sR; jpayne@69: qpos = sQ; jpayne@69: remain = eR - sR + 1; jpayne@69: jpayne@69: (*li) -> frmR = frameR; jpayne@69: (*li) -> frmQ = frameQ; jpayne@69: jpayne@69: istringstream ss; jpayne@69: ss . str ((*li)->delta); jpayne@69: jpayne@69: while ( ss >> delta && delta != 0 ) jpayne@69: { jpayne@69: sign = delta > 0 ? 1 : -1; jpayne@69: delta = labs (delta); jpayne@69: jpayne@69: //-- For all SNPs before the next indel jpayne@69: for ( i = 1; i < delta; i ++ ) jpayne@69: if ( R [ri] [rpos ++] != Q [qi] [qpos ++] ) jpayne@69: { jpayne@69: if ( graph.datatype == NUCMER_DATA && jpayne@69: CompareIUPAC (R [ri][rpos-1], Q [qi][qpos-1]) ) jpayne@69: continue; jpayne@69: jpayne@69: snp = new SNP_t; jpayne@69: snp -> ep = *ei; jpayne@69: snp -> lp = *li; jpayne@69: snp -> pR = Norm (rpos-1, lenR, frameR, graph.datatype); jpayne@69: snp -> pQ = Norm (qpos-1, lenQ, frameQ, graph.datatype); jpayne@69: snp -> cR = toupper (R [ri] [rpos-1]); jpayne@69: snp -> cQ = toupper (Q [qi] [qpos-1]); jpayne@69: jpayne@69: for ( rctx = rpos - OPT_Context - 1; jpayne@69: rctx < rpos + OPT_Context; rctx ++ ) jpayne@69: if ( rctx < 1 || rctx > alenR ) jpayne@69: snp -> ctxR . push_back (SEQEND_CHAR); jpayne@69: else if ( rctx == rpos - 1 ) jpayne@69: snp -> ctxR . push_back (snp -> cR); jpayne@69: else jpayne@69: snp -> ctxR . push_back (toupper (R [ri] [rctx])); jpayne@69: jpayne@69: for ( qctx = qpos - OPT_Context - 1; jpayne@69: qctx < qpos + OPT_Context; qctx ++ ) jpayne@69: if ( qctx < 1 || qctx > alenQ ) jpayne@69: snp -> ctxQ . push_back (SEQEND_CHAR); jpayne@69: else if ( qctx == qpos - 1 ) jpayne@69: snp -> ctxQ . push_back (snp -> cQ); jpayne@69: else jpayne@69: snp -> ctxQ . push_back (toupper (Q [qi] [qctx])); jpayne@69: jpayne@69: (*li) -> snps . push_back (snp); jpayne@69: } jpayne@69: jpayne@69: //-- For the indel jpayne@69: snp = new SNP_t; jpayne@69: snp -> ep = *ei; jpayne@69: snp -> lp = *li; jpayne@69: jpayne@69: for ( rctx = rpos - OPT_Context; rctx < rpos; rctx ++ ) jpayne@69: if ( rctx < 1 ) jpayne@69: snp -> ctxR . push_back (SEQEND_CHAR); jpayne@69: else jpayne@69: snp -> ctxR . push_back (toupper (R [ri] [rctx])); jpayne@69: jpayne@69: for ( qctx = qpos - OPT_Context; qctx < qpos; qctx ++ ) jpayne@69: if ( qctx < 1 ) jpayne@69: snp -> ctxQ . push_back (SEQEND_CHAR); jpayne@69: else jpayne@69: snp -> ctxQ . push_back (toupper (Q [qi] [qctx])); jpayne@69: jpayne@69: if ( sign > 0 ) jpayne@69: { jpayne@69: snp -> pR = Norm (rpos, lenR, frameR, graph.datatype); jpayne@69: if ( frameQ > 0 ) jpayne@69: snp -> pQ = Norm (qpos - 1, lenQ, frameQ, graph.datatype); jpayne@69: else jpayne@69: snp -> pQ = Norm (qpos, lenQ, frameQ, graph.datatype); jpayne@69: jpayne@69: snp -> cR = toupper (R [ri] [rpos ++]); jpayne@69: snp -> cQ = INDEL_CHAR; jpayne@69: jpayne@69: remain -= i; jpayne@69: rctx ++; jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: snp -> pQ = Norm (qpos, lenQ, frameQ, graph.datatype); jpayne@69: if ( frameR > 0 ) jpayne@69: snp -> pR = Norm (rpos - 1, lenR, frameR, graph.datatype); jpayne@69: else jpayne@69: snp -> pR = Norm (rpos, lenR, frameR, graph.datatype); jpayne@69: jpayne@69: snp -> cR = INDEL_CHAR; jpayne@69: snp -> cQ = toupper (Q [qi] [qpos ++]); jpayne@69: jpayne@69: remain -= i - 1; jpayne@69: qctx ++; jpayne@69: } jpayne@69: jpayne@69: snp -> ctxR . push_back (snp -> cR); jpayne@69: for ( ; rctx < rpos + OPT_Context; rctx ++ ) jpayne@69: if ( rctx > alenR ) jpayne@69: snp -> ctxR . push_back (SEQEND_CHAR); jpayne@69: else jpayne@69: snp -> ctxR . push_back (toupper (R [ri] [rctx])); jpayne@69: jpayne@69: snp -> ctxQ . push_back (snp -> cQ); jpayne@69: for ( ; qctx < qpos + OPT_Context; qctx ++ ) jpayne@69: if ( qctx > alenQ ) jpayne@69: snp -> ctxQ . push_back (SEQEND_CHAR); jpayne@69: else jpayne@69: snp -> ctxQ . push_back (toupper (Q [qi] [qctx])); jpayne@69: jpayne@69: (*li) -> snps . push_back (snp); jpayne@69: } jpayne@69: jpayne@69: //-- For all SNPs after the final indel jpayne@69: for ( i = 0; i < remain; i ++ ) jpayne@69: if ( R [ri] [rpos ++] != Q [qi] [qpos ++] ) jpayne@69: { jpayne@69: if ( graph.datatype == NUCMER_DATA && jpayne@69: CompareIUPAC (R [ri][rpos-1], Q [qi][qpos-1]) ) jpayne@69: continue; jpayne@69: jpayne@69: snp = new SNP_t; jpayne@69: snp -> ep = *ei; jpayne@69: snp -> lp = *li; jpayne@69: snp -> pR = Norm (rpos-1, lenR, frameR, graph.datatype); jpayne@69: snp -> pQ = Norm (qpos-1, lenQ, frameQ, graph.datatype); jpayne@69: snp -> cR = toupper (R [ri] [rpos-1]); jpayne@69: snp -> cQ = toupper (Q [qi] [qpos-1]); jpayne@69: jpayne@69: for ( rctx = rpos - OPT_Context - 1; jpayne@69: rctx < rpos + OPT_Context; rctx ++ ) jpayne@69: if ( rctx < 1 || rctx > alenR ) jpayne@69: snp -> ctxR . push_back (SEQEND_CHAR); jpayne@69: else if ( rctx == rpos - 1 ) jpayne@69: snp -> ctxR . push_back (snp -> cR); jpayne@69: else jpayne@69: snp -> ctxR . push_back (toupper (R [ri] [rctx])); jpayne@69: jpayne@69: for ( qctx = qpos - OPT_Context - 1; jpayne@69: qctx < qpos + OPT_Context; qctx ++ ) jpayne@69: if ( qctx < 1 || qctx > alenQ ) jpayne@69: snp -> ctxQ . push_back (SEQEND_CHAR); jpayne@69: else if ( qctx == qpos - 1 ) jpayne@69: snp -> ctxQ . push_back (snp -> cQ); jpayne@69: else jpayne@69: snp -> ctxQ . push_back (toupper (Q [qi] [qctx])); jpayne@69: jpayne@69: (*li) -> snps . push_back (snp); jpayne@69: } jpayne@69: jpayne@69: jpayne@69: //-- Sort SNPs and calculate distances jpayne@69: if ( OPT_SortReference ) jpayne@69: { jpayne@69: sort ((*li)->snps.begin( ), (*li)->snps.end( ), SNP_R_Sort( )); jpayne@69: jpayne@69: for ( si = (*li)->snps.begin(); si != (*li)->snps.end(); ++ si ) jpayne@69: { jpayne@69: psi = si - 1; jpayne@69: nsi = si + 1; jpayne@69: jpayne@69: (*si) -> buff = 1 + jpayne@69: ((*si)->pR - (*li)->loR < (*li)->hiR - (*si)->pR ? jpayne@69: (*si)->pR - (*li)->loR : (*li)->hiR - (*si)->pR); jpayne@69: jpayne@69: if ( psi >= (*li) -> snps . begin( ) && jpayne@69: (*si)->pR - (*psi)->pR < (*si)->buff ) jpayne@69: (*si) -> buff = (*si)->pR - (*psi)->pR; jpayne@69: jpayne@69: if ( nsi < (*li) -> snps . end( ) && jpayne@69: (*nsi)->pR - (*si)->pR < (*si)->buff ) jpayne@69: (*si) -> buff = (*nsi)->pR - (*si)->pR; jpayne@69: } jpayne@69: } jpayne@69: else jpayne@69: { jpayne@69: sort ((*li)->snps.begin( ), (*li)->snps.end( ), SNP_Q_Sort( )); jpayne@69: jpayne@69: for ( si = (*li)->snps.begin(); si != (*li)->snps.end(); ++ si ) jpayne@69: { jpayne@69: psi = si - 1; jpayne@69: nsi = si + 1; jpayne@69: jpayne@69: (*si) -> buff = 1 + jpayne@69: ((*si)->pQ - (*li)->loQ < (*li)->hiQ - (*si)->pQ ? jpayne@69: (*si)->pQ - (*li)->loQ : (*li)->hiQ - (*si)->pQ); jpayne@69: jpayne@69: if ( psi >= (*li) -> snps . begin( ) && jpayne@69: (*si)->pQ - (*psi)->pQ < (*si)->buff ) jpayne@69: (*si) -> buff = (*si)->pQ - (*psi)->pQ; jpayne@69: jpayne@69: if ( nsi < (*li) -> snps . end( ) && jpayne@69: (*nsi)->pQ - (*si)->pQ < (*si)->buff ) jpayne@69: (*si) -> buff = (*nsi)->pQ - (*si)->pQ; jpayne@69: } jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: //-- Clear up the seq jpayne@69: for ( i = 1; i <= 6; i ++ ) jpayne@69: { jpayne@69: free (R[i]); jpayne@69: free (Q[i]); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: if ( OPT_SelectAligns && ! OPT_Aligns . empty( ) ) jpayne@69: { jpayne@69: cerr << "ERROR: One or more alignments from stdin could not be found\n"; jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------ PrintHuman ----// jpayne@69: void PrintHuman (const vector & snps, jpayne@69: const DeltaGraph_t & graph) jpayne@69: { jpayne@69: vector::const_iterator si; jpayne@69: long dist, distR, distQ; jpayne@69: int ctxw = 2 * OPT_Context + 1; jpayne@69: int ctxc = ctxw < 7 ? 7 : ctxw; jpayne@69: jpayne@69: if ( OPT_PrintHeader ) jpayne@69: { jpayne@69: printf ("%s %s\n%s\n\n", jpayne@69: graph . refpath . c_str( ), graph . qrypath . c_str( ), jpayne@69: graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER"); jpayne@69: printf ("%8s %5s %-8s | ", "[P1]", "[SUB]", "[P2]"); jpayne@69: printf ("%8s %8s | ", "[BUFF]", "[DIST]"); jpayne@69: if ( OPT_ShowConflict ) jpayne@69: printf ("%4s %4s | ", "[R]", "[Q]"); jpayne@69: if ( OPT_ShowLength ) jpayne@69: printf ("%8s %8s | ", "[LEN R]", "[LEN Q]"); jpayne@69: if ( OPT_Context ) jpayne@69: { jpayne@69: for ( int i = 0; i < ctxc - 7; i ++ ) jpayne@69: putchar (' '); jpayne@69: printf (" [CTX R] "); jpayne@69: for ( int i = 0; i < ctxc - 7; i ++ ) jpayne@69: putchar (' '); jpayne@69: printf ("[CTX Q] | "); jpayne@69: } jpayne@69: printf ("%5s ", "[FRM]"); jpayne@69: printf ("%s", "[TAGS]"); jpayne@69: printf ("\n"); jpayne@69: jpayne@69: if ( OPT_ShowConflict ) jpayne@69: printf ("============="); jpayne@69: if ( OPT_ShowLength ) jpayne@69: printf ("====================="); jpayne@69: if ( OPT_Context ) jpayne@69: for ( int i = 0; i < 2 * ctxc + 7; i ++ ) jpayne@69: putchar ('='); jpayne@69: printf("=================================" jpayne@69: "==================================\n"); jpayne@69: } jpayne@69: jpayne@69: for ( si = snps . begin( ); si != snps . end( ); ++ si ) jpayne@69: { jpayne@69: distR = (*si)->pR < (signed long)(*si)->ep->refnode->len - (*si)->pR + 1 ? jpayne@69: (*si)->pR : (*si)->ep->refnode->len - (*si)->pR + 1; jpayne@69: distQ = (*si)->pQ < (signed long)(*si)->ep->qrynode->len - (*si)->pQ + 1 ? jpayne@69: (*si)->pQ : (*si)->ep->qrynode->len - (*si)->pQ + 1; jpayne@69: dist = distR < distQ ? distR : distQ; jpayne@69: jpayne@69: printf ("%8ld %c %c %-8ld | ", jpayne@69: (*si)->pR, (*si)->cR, (*si)->cQ, (*si)->pQ); jpayne@69: printf ("%8ld %8ld | ", (*si)->buff, dist); jpayne@69: if ( OPT_ShowConflict ) jpayne@69: printf ("%4d %4d | ", (*si)->conR, (*si)->conQ); jpayne@69: if ( OPT_ShowLength ) jpayne@69: printf ("%8ld %8ld | ", jpayne@69: (*si)->ep->refnode->len, (*si)->ep->qrynode->len); jpayne@69: if ( OPT_Context ) jpayne@69: { jpayne@69: for ( int i = 0; i < ctxc - ctxw; i ++ ) jpayne@69: putchar (' '); jpayne@69: printf (" %s ", (*si)->ctxR.c_str( )); jpayne@69: for ( int i = 0; i < ctxc - ctxw; i ++ ) jpayne@69: putchar (' '); jpayne@69: printf ("%s | ", (*si)->ctxQ.c_str( )); jpayne@69: } jpayne@69: printf ("%2d %2d ", (*si)->lp->frmR, (*si)->lp->frmQ); jpayne@69: printf ("%s\t%s", jpayne@69: (*si)->ep->refnode->id->c_str( ), jpayne@69: (*si)->ep->qrynode->id->c_str( )); jpayne@69: printf ("\n"); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //---------------------------------------------------------- PrintTabular ----// jpayne@69: void PrintTabular (const vector & snps, jpayne@69: const DeltaGraph_t & graph) jpayne@69: { jpayne@69: vector::const_iterator si; jpayne@69: long dist, distR, distQ; jpayne@69: jpayne@69: if ( OPT_PrintHeader ) jpayne@69: { jpayne@69: printf ("%s %s\n%s\n\n", jpayne@69: graph . refpath . c_str( ), graph . qrypath . c_str( ), jpayne@69: graph . datatype == NUCMER_DATA ? "NUCMER" : "PROMER"); jpayne@69: printf ("%s\t%s\t%s\t%s\t", "[P1]", "[SUB]", "[SUB]", "[P2]"); jpayne@69: printf ("%s\t%s\t", "[BUFF]", "[DIST]"); jpayne@69: if ( OPT_ShowConflict ) jpayne@69: printf ("%s\t%s\t", "[R]", "[Q]"); jpayne@69: if ( OPT_ShowLength ) jpayne@69: printf ("%s\t%s\t", "[LEN R]", "[LEN Q]"); jpayne@69: if ( OPT_Context ) jpayne@69: printf ("%s\t%s\t", "[CTX R]", "[CTX Q]"); jpayne@69: printf ("%s\t", "[FRM]"); jpayne@69: printf ("%s\n", "[TAGS]"); jpayne@69: } jpayne@69: jpayne@69: for ( si = snps . begin( ); si != snps . end( ); ++ si ) jpayne@69: { jpayne@69: distR = (*si)->pR < (signed long)(*si)->ep->refnode->len - (*si)->pR + 1 ? jpayne@69: (*si)->pR : (*si)->ep->refnode->len - (*si)->pR + 1; jpayne@69: distQ = (*si)->pQ < (signed long)(*si)->ep->qrynode->len - (*si)->pQ + 1 ? jpayne@69: (*si)->pQ : (*si)->ep->qrynode->len - (*si)->pQ + 1; jpayne@69: dist = distR < distQ ? distR : distQ; jpayne@69: jpayne@69: printf ("%ld\t%c\t%c\t%ld\t", jpayne@69: (*si)->pR, (*si)->cR, (*si)->cQ, (*si)->pQ); jpayne@69: printf ("%ld\t%ld\t", (*si)->buff, dist); jpayne@69: if ( OPT_ShowConflict ) jpayne@69: printf ("%d\t%d\t", (*si)->conR, (*si)->conQ); jpayne@69: if ( OPT_ShowLength ) jpayne@69: printf ("%ld\t%ld\t", jpayne@69: (*si)->ep->refnode->len, (*si)->ep->qrynode->len); jpayne@69: if ( OPT_Context ) jpayne@69: printf ("%s\t%s\t", (*si)->ctxR.c_str( ), (*si)->ctxQ.c_str( )); jpayne@69: printf ("%d\t%d\t", (*si)->lp->frmR, (*si)->lp->frmQ); jpayne@69: printf ("%s\t%s", jpayne@69: (*si)->ep->refnode->id->c_str( ), jpayne@69: (*si)->ep->qrynode->id->c_str( )); jpayne@69: printf ("\n"); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //---------------------------------------------------------- SelectAligns ----// jpayne@69: void SelectAligns ( ) jpayne@69: { jpayne@69: string line, part; jpayne@69: string s1, e1, s2, e2, tR, tQ; jpayne@69: istringstream sin; jpayne@69: ostringstream sout; jpayne@69: jpayne@69: while ( cin ) jpayne@69: { jpayne@69: getline (cin, line); jpayne@69: if ( line . empty( ) ) jpayne@69: continue; jpayne@69: jpayne@69: sin . clear( ); jpayne@69: sin . str (line); jpayne@69: sin >> s1 >> e1 >> s2; jpayne@69: if ( s2 == "|" ) sin >> s2; jpayne@69: sin >> e2 >> tR >> tQ; jpayne@69: jpayne@69: if ( sin . fail( ) ) jpayne@69: { jpayne@69: cerr << "ERROR: Could not parse input from stdin\n"; jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: jpayne@69: while ( sin >> part ) jpayne@69: { jpayne@69: tR = tQ; jpayne@69: tQ = part; jpayne@69: } jpayne@69: jpayne@69: sout . clear( ); jpayne@69: sout . str (""); jpayne@69: sout << s1 << ' ' << e1 << ' ' jpayne@69: << s2 << ' ' << e2 << ' ' jpayne@69: << tR << ' ' << tQ; jpayne@69: jpayne@69: OPT_Aligns . insert (sout . str( )); jpayne@69: } jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------- ParseArgs ----// jpayne@69: void ParseArgs (int argc, char ** argv) jpayne@69: { jpayne@69: int ch, errflg = 0; jpayne@69: optarg = NULL; jpayne@69: jpayne@69: while ( !errflg && jpayne@69: ((ch = getopt (argc, argv, "ChHIlqrSTx:")) != EOF) ) jpayne@69: switch (ch) jpayne@69: { jpayne@69: case 'C': jpayne@69: OPT_ShowConflict = false; jpayne@69: break; jpayne@69: jpayne@69: case 'h': jpayne@69: PrintHelp (argv[0]); jpayne@69: exit (EXIT_SUCCESS); jpayne@69: break; jpayne@69: jpayne@69: case 'H': jpayne@69: OPT_PrintHeader = false; jpayne@69: break; jpayne@69: jpayne@69: case 'I': jpayne@69: OPT_ShowIndels = false; jpayne@69: break; jpayne@69: jpayne@69: case 'l': jpayne@69: OPT_ShowLength = true; jpayne@69: break; jpayne@69: jpayne@69: case 'q': jpayne@69: OPT_SortQuery = true; jpayne@69: break; jpayne@69: jpayne@69: case 'r': jpayne@69: OPT_SortReference = true; jpayne@69: break; jpayne@69: jpayne@69: case 'S': jpayne@69: OPT_SelectAligns = true; jpayne@69: break; jpayne@69: jpayne@69: case 'T': jpayne@69: OPT_PrintTabular = true; jpayne@69: break; jpayne@69: jpayne@69: case 'x': jpayne@69: OPT_Context = atoi (optarg); jpayne@69: break; jpayne@69: jpayne@69: default: jpayne@69: errflg ++; jpayne@69: } jpayne@69: jpayne@69: if ( OPT_Context < 0 ) jpayne@69: { jpayne@69: cerr << "ERROR: SNP context must be a positive int\n"; jpayne@69: errflg ++; jpayne@69: } jpayne@69: jpayne@69: if ( OPT_SortReference && OPT_SortQuery ) jpayne@69: cerr << "WARNING: both -r and -q were passed, -q ignored\n"; jpayne@69: jpayne@69: if ( !OPT_SortReference && !OPT_SortQuery ) jpayne@69: OPT_SortReference = true; jpayne@69: jpayne@69: if ( errflg > 0 || optind != argc - 1 ) jpayne@69: { jpayne@69: PrintUsage (argv[0]); jpayne@69: cerr << "Try '" << argv[0] << " -h' for more information.\n"; jpayne@69: exit (EXIT_FAILURE); jpayne@69: } jpayne@69: jpayne@69: OPT_AlignName = argv [optind ++]; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------- PrintHelp ----// jpayne@69: void PrintHelp (const char * s) jpayne@69: { jpayne@69: PrintUsage (s); jpayne@69: cerr jpayne@69: << "-C Do not report SNPs from alignments with an ambiguous\n" jpayne@69: << " mapping, i.e. only report SNPs where the [R] and [Q]\n" jpayne@69: << " columns equal 0 and do not output these columns\n" jpayne@69: << "-h Display help information\n" jpayne@69: << "-H Do not print the output header\n" jpayne@69: << "-I Do not report indels\n" jpayne@69: << "-l Include sequence length information in the output\n" jpayne@69: << "-q Sort output lines by query IDs and SNP positions\n" jpayne@69: << "-r Sort output lines by reference IDs and SNP positions\n" jpayne@69: << "-S Specify which alignments to report by passing\n" jpayne@69: << " 'show-coords' lines to stdin\n" jpayne@69: << "-T Switch to tab-delimited format\n" jpayne@69: << "-x int Include x characters of surrounding SNP context in the\n" jpayne@69: << " output, default " jpayne@69: << OPT_Context << endl jpayne@69: << endl; jpayne@69: jpayne@69: cerr jpayne@69: << " Input is the .delta output of either the nucmer or promer program\n" jpayne@69: << "passed on the command line.\n" jpayne@69: << " Output is to stdout, and consists of a list of SNPs (or amino acid\n" jpayne@69: << "substitutions for promer) with positions and other useful info.\n" jpayne@69: << "Output will be sorted with -r by default and the [BUFF] column will\n" jpayne@69: << "always refer to the sequence whose positions have been sorted. This\n" jpayne@69: << "value specifies the distance from this SNP to the nearest mismatch\n" jpayne@69: << "(end of alignment, indel, SNP, etc) in the same alignment, while the\n" jpayne@69: << "[DIST] column specifies the distance from this SNP to the nearest\n" jpayne@69: << "sequence end. SNPs for which the [R] and [Q] columns are greater than\n" jpayne@69: << "0 should be evaluated with caution, as these columns specify the\n" jpayne@69: << "number of other alignments which overlap this position. Use -C to\n" jpayne@69: << "assure SNPs are only reported from unique alignment regions.\n" jpayne@69: << endl; jpayne@69: jpayne@69: return; jpayne@69: } jpayne@69: jpayne@69: jpayne@69: jpayne@69: jpayne@69: //------------------------------------------------------------ PrintUsage ----// jpayne@69: void PrintUsage (const char * s) jpayne@69: { jpayne@69: cerr jpayne@69: << "\nUSAGE: " << s << " [options] \n\n"; jpayne@69: return; jpayne@69: }