annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.cc @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 ////////////////////////////////////////////////////////////////////////////////
jpayne@69 2 //! \file
jpayne@69 3 //! \author Adam M Phillippy
jpayne@69 4 //! \date 03/26/2003
jpayne@69 5 //!
jpayne@69 6 //! \brief Source for non-inline member functions of delta.hh
jpayne@69 7 //!
jpayne@69 8 //! \see delta.hh
jpayne@69 9 ////////////////////////////////////////////////////////////////////////////////
jpayne@69 10
jpayne@69 11 #include "delta.hh"
jpayne@69 12 #include <map>
jpayne@69 13 #include <vector>
jpayne@69 14 #include <cmath>
jpayne@69 15 #include <sstream>
jpayne@69 16 #include <algorithm>
jpayne@69 17 using namespace std;
jpayne@69 18
jpayne@69 19
jpayne@69 20 inline long ScoreLocal
jpayne@69 21 (long scorej,
jpayne@69 22 long leni, long lenj,
jpayne@69 23 long olap, float idyi, float maxolap);
jpayne@69 24
jpayne@69 25
jpayne@69 26 struct LIS_t
jpayne@69 27 //!< LIS score
jpayne@69 28 {
jpayne@69 29 DeltaEdgelet_t * a;
jpayne@69 30 long score;
jpayne@69 31 long diff;
jpayne@69 32 long from;
jpayne@69 33 bool used;
jpayne@69 34 };
jpayne@69 35
jpayne@69 36
jpayne@69 37 struct EdgeletQCmp_t
jpayne@69 38 //!< Compares query lo coord
jpayne@69 39 {
jpayne@69 40 bool operator() (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
jpayne@69 41 {
jpayne@69 42 //-- Sorting by score in the event of a tie ensures that when building
jpayne@69 43 // LIS chains, the highest scoring ones get seen first, thus avoiding
jpayne@69 44 // overlap problems
jpayne@69 45
jpayne@69 46 if ( i->loQ < j->loQ )
jpayne@69 47 return true;
jpayne@69 48 else if ( i->loQ > j->loQ )
jpayne@69 49 return false;
jpayne@69 50 else if ( ScoreLocal (0, i->hiQ - i->loQ + 1, 0, 0, i->idy, 0) >
jpayne@69 51 ScoreLocal (0, j->hiQ - j->loQ + 1, 0, 0, j->idy, 0) )
jpayne@69 52 return true;
jpayne@69 53 else
jpayne@69 54 return false;
jpayne@69 55 }
jpayne@69 56 };
jpayne@69 57
jpayne@69 58
jpayne@69 59 struct EdgeletRCmp_t
jpayne@69 60 //!< Compares reference lo coord
jpayne@69 61 {
jpayne@69 62 bool operator() (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
jpayne@69 63 {
jpayne@69 64 //-- Sorting by score in the event of a tie ensures that when building
jpayne@69 65 // LIS chains, the highest scoring ones get seen first, thus avoiding
jpayne@69 66 // overlap problems
jpayne@69 67
jpayne@69 68 if ( i->loR < j->loR )
jpayne@69 69 return true;
jpayne@69 70 else if ( i->loR > j->loR )
jpayne@69 71 return false;
jpayne@69 72 else if ( ScoreLocal (0, i->hiR - i->loR + 1, 0, 0, i->idy, 0) >
jpayne@69 73 ScoreLocal (0, j->hiR - j->loR + 1, 0, 0, j->idy, 0) )
jpayne@69 74 return true;
jpayne@69 75 else
jpayne@69 76 return false;
jpayne@69 77 }
jpayne@69 78 };
jpayne@69 79
jpayne@69 80
jpayne@69 81 struct NULLPred_t
jpayne@69 82 //!< Return true if pointer is not NULL
jpayne@69 83 {
jpayne@69 84 bool operator() (const void * i) const
jpayne@69 85 { return (i != NULL); }
jpayne@69 86 };
jpayne@69 87
jpayne@69 88
jpayne@69 89 //------------------------------------------------------------ DiffAligns ------
jpayne@69 90 inline long DiffAligns
jpayne@69 91 (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j)
jpayne@69 92 {
jpayne@69 93 return
jpayne@69 94 ( ( j->loR < i->loR ) ? labs (j->hiR - i->loR) : labs (i->hiR - j->loR) ) +
jpayne@69 95 ( ( j->loQ < i->loQ ) ? labs (j->hiQ - i->loQ) : labs (i->hiQ - j->loQ) );
jpayne@69 96 }
jpayne@69 97
jpayne@69 98
jpayne@69 99 //------------------------------------------------------------ PickBest --------
jpayne@69 100 long PickBest
jpayne@69 101 (const LIS_t * lis, const vector<long> & allbest, float epsilon)
jpayne@69 102 {
jpayne@69 103 long size = allbest.size();
jpayne@69 104 if ( epsilon < 0 && size != 0 )
jpayne@69 105 {
jpayne@69 106 long eqc = 0;
jpayne@69 107 for ( ; eqc < size; ++ eqc )
jpayne@69 108 if ( lis[allbest[eqc]].diff != lis[allbest.front()].diff )
jpayne@69 109 break;
jpayne@69 110
jpayne@69 111 return (int)((double)eqc*rand() / (RAND_MAX + 1.0));
jpayne@69 112 }
jpayne@69 113 return size;
jpayne@69 114 }
jpayne@69 115
jpayne@69 116
jpayne@69 117 //------------------------------------------------------------------ RevC ------
jpayne@69 118 inline long RevC (const long & coord,
jpayne@69 119 const long & len)
jpayne@69 120 {
jpayne@69 121 return len - coord + 1;
jpayne@69 122 }
jpayne@69 123
jpayne@69 124
jpayne@69 125 //------------------------------------------------------------ ScoreLocal ------
jpayne@69 126 inline long ScoreLocal
jpayne@69 127 (long scorej, long leni, long lenj,
jpayne@69 128 long olap, float idyi, float maxolap)
jpayne@69 129 {
jpayne@69 130 if ( olap > 0 &&
jpayne@69 131 ((float)olap / (float)leni * 100.0 > maxolap ||
jpayne@69 132 (float)olap / (float)lenj * 100.0 > maxolap) )
jpayne@69 133 return -1;
jpayne@69 134 else
jpayne@69 135 return (scorej + (long)((leni - olap) * pow (idyi, 2)));
jpayne@69 136 }
jpayne@69 137
jpayne@69 138
jpayne@69 139 //----------------------------------------------------------- ScoreGlobal ------
jpayne@69 140 inline long ScoreGlobal
jpayne@69 141 (long scorej, long leni, long olap, float idyi)
jpayne@69 142 {
jpayne@69 143 return (scorej + (long)((leni - olap) * pow (idyi, 2)));
jpayne@69 144 }
jpayne@69 145
jpayne@69 146
jpayne@69 147 //------------------------------------------------------------------ Swap ------
jpayne@69 148 inline void Swap (long & a, long & b)
jpayne@69 149 {
jpayne@69 150 long t = a; a = b; b = t;
jpayne@69 151 }
jpayne@69 152
jpayne@69 153
jpayne@69 154 //------------------------------------------------------------ UpdateBest ------
jpayne@69 155 bool UpdateBest
jpayne@69 156 (LIS_t * lis, long size, vector<long> & allbest, float epsilon)
jpayne@69 157 {
jpayne@69 158 if ( size == 0 )
jpayne@69 159 return false;
jpayne@69 160
jpayne@69 161 long best, i;
jpayne@69 162
jpayne@69 163 //-- Find the best
jpayne@69 164 for ( best = 0; best < size; ++ best )
jpayne@69 165 if ( ! lis[best].used )
jpayne@69 166 break;
jpayne@69 167 for ( i = best + 1; i < size; ++ i )
jpayne@69 168 if ( ! lis[i].used
jpayne@69 169 &&
jpayne@69 170 ( lis[i].score > lis[best].score
jpayne@69 171 ||
jpayne@69 172 (lis[i].score == lis[best].score &&
jpayne@69 173 lis[i].diff < lis[best].diff) ) )
jpayne@69 174 best = i;
jpayne@69 175
jpayne@69 176 //-- Nonequivalent
jpayne@69 177 if ( ! allbest.empty()
jpayne@69 178 &&
jpayne@69 179 (best == size
jpayne@69 180 ||
jpayne@69 181 (epsilon < 0 &&
jpayne@69 182 lis[best].score < lis[allbest.front()].score)
jpayne@69 183 ||
jpayne@69 184 (epsilon >= 0 &&
jpayne@69 185 (float)(lis[allbest.front()].score - lis[best].score) /
jpayne@69 186 (float)(lis[allbest.front()].score) * 100.0 > epsilon)) )
jpayne@69 187 return false;
jpayne@69 188
jpayne@69 189 //-- Equivalent
jpayne@69 190 allbest.push_back (best);
jpayne@69 191
jpayne@69 192 for ( i = best; i >= 0 && i < size; i = lis[i].from )
jpayne@69 193 lis[i].used = true;
jpayne@69 194
jpayne@69 195 return true;
jpayne@69 196 }
jpayne@69 197
jpayne@69 198
jpayne@69 199 //===================================================== DeltaReader_t ==========
jpayne@69 200 //----------------------------------------------------- open -------------------
jpayne@69 201 void DeltaReader_t::open
jpayne@69 202 (const string & delta_path)
jpayne@69 203 {
jpayne@69 204 delta_path_m = delta_path;
jpayne@69 205
jpayne@69 206 //-- Open the delta file
jpayne@69 207 delta_stream_m.open (delta_path_m.c_str ());
jpayne@69 208 checkStream ();
jpayne@69 209
jpayne@69 210 //-- Read the file header
jpayne@69 211 delta_stream_m >> reference_path_m;
jpayne@69 212 delta_stream_m >> query_path_m;
jpayne@69 213 delta_stream_m >> data_type_m;
jpayne@69 214 if ( (data_type_m != NUCMER_STRING && data_type_m != PROMER_STRING) )
jpayne@69 215 delta_stream_m.setstate (ios::failbit);
jpayne@69 216 checkStream ();
jpayne@69 217 is_open_m = true;
jpayne@69 218
jpayne@69 219 //-- Advance to first record header
jpayne@69 220 while ( delta_stream_m.peek () != '>' )
jpayne@69 221 if ( delta_stream_m.get () == EOF )
jpayne@69 222 break;
jpayne@69 223 }
jpayne@69 224
jpayne@69 225
jpayne@69 226 //----------------------------------------------------- readNextAlignment ------
jpayne@69 227 void DeltaReader_t::readNextAlignment
jpayne@69 228 (DeltaAlignment_t & align, const bool read_deltas)
jpayne@69 229 {
jpayne@69 230 long delta;
jpayne@69 231 float total;
jpayne@69 232
jpayne@69 233 //-- Make way for the new alignment
jpayne@69 234 align.clear ();
jpayne@69 235
jpayne@69 236 //-- Read the alignment header
jpayne@69 237 delta_stream_m >> align.sR;
jpayne@69 238 delta_stream_m >> align.eR;
jpayne@69 239 delta_stream_m >> align.sQ;
jpayne@69 240 delta_stream_m >> align.eQ;
jpayne@69 241 delta_stream_m >> align.idyc;
jpayne@69 242 delta_stream_m >> align.simc;
jpayne@69 243 delta_stream_m >> align.stpc;
jpayne@69 244 if ( align.sR <= 0 || align.eR <= 0 ||
jpayne@69 245 align.sQ <= 0 || align.eQ <= 0 ||
jpayne@69 246 align.idyc < 0 || align.simc < 0 || align.stpc < 0 )
jpayne@69 247 delta_stream_m.setstate (ios::failbit);
jpayne@69 248 checkStream ();
jpayne@69 249
jpayne@69 250 total = labs(align.eR - align.sR) + 1.0;
jpayne@69 251 if ( data_type_m == PROMER_STRING )
jpayne@69 252 total /= 3.0;
jpayne@69 253
jpayne@69 254 //-- Get all the deltas
jpayne@69 255 do
jpayne@69 256 {
jpayne@69 257 delta_stream_m >> delta;
jpayne@69 258 checkStream ();
jpayne@69 259
jpayne@69 260 if ( delta < 0 )
jpayne@69 261 total ++;
jpayne@69 262 if ( read_deltas )
jpayne@69 263 align.deltas.push_back (delta);
jpayne@69 264 } while ( delta != 0 );
jpayne@69 265
jpayne@69 266 //-- Flush the remaining whitespace
jpayne@69 267 while ( delta_stream_m.get () != '\n' );
jpayne@69 268
jpayne@69 269 //-- Calculate the identity, similarity and stopity
jpayne@69 270 align.idy = (total - (float)align.idyc) / total * 100.0;
jpayne@69 271 align.sim = (total - (float)align.simc) / total * 100.0;
jpayne@69 272 align.stp = (float)align.stpc / (total * 2.0) * 100.0;
jpayne@69 273 }
jpayne@69 274
jpayne@69 275
jpayne@69 276 //----------------------------------------------------- readNextRecord ---------
jpayne@69 277 bool DeltaReader_t::readNextRecord (const bool read_deltas)
jpayne@69 278 {
jpayne@69 279 //-- If EOF or any other abnormality
jpayne@69 280 if ( delta_stream_m.peek () != '>' )
jpayne@69 281 return false;
jpayne@69 282
jpayne@69 283 //-- Make way for the new record
jpayne@69 284 record_m.clear ();
jpayne@69 285 is_record_m = true;
jpayne@69 286
jpayne@69 287 //-- Read the record header
jpayne@69 288 delta_stream_m.get ();
jpayne@69 289 delta_stream_m >> record_m.idR;
jpayne@69 290 delta_stream_m >> record_m.idQ;
jpayne@69 291 delta_stream_m >> record_m.lenR;
jpayne@69 292 delta_stream_m >> record_m.lenQ;
jpayne@69 293 if ( record_m.lenR <= 0 || record_m.lenQ <= 0 )
jpayne@69 294 delta_stream_m.setstate (ios::failbit);
jpayne@69 295 checkStream ();
jpayne@69 296
jpayne@69 297 //-- Flush the remaining whitespace
jpayne@69 298 while ( delta_stream_m.get () != '\n' );
jpayne@69 299
jpayne@69 300 //-- For each alignment...
jpayne@69 301 DeltaAlignment_t align;
jpayne@69 302 while ( delta_stream_m.peek () != '>' &&
jpayne@69 303 delta_stream_m.peek () != EOF )
jpayne@69 304 {
jpayne@69 305 readNextAlignment (align, read_deltas);
jpayne@69 306 record_m.aligns.push_back (align);
jpayne@69 307 }
jpayne@69 308
jpayne@69 309 return true;
jpayne@69 310 }
jpayne@69 311
jpayne@69 312
jpayne@69 313 //===================================================== DeltaEdge_t ============
jpayne@69 314 //------------------------------------------------------build ------------------
jpayne@69 315 void DeltaEdge_t::build (const DeltaRecord_t & rec)
jpayne@69 316 {
jpayne@69 317 stringstream ss;
jpayne@69 318 vector<long>::const_iterator di;
jpayne@69 319 DeltaEdgelet_t * p;
jpayne@69 320
jpayne@69 321 vector<DeltaAlignment_t>::const_iterator i;
jpayne@69 322 for ( i = rec.aligns.begin(); i != rec.aligns.end(); ++ i )
jpayne@69 323 {
jpayne@69 324 //-- Set the edgelet
jpayne@69 325 p = new DeltaEdgelet_t();
jpayne@69 326
jpayne@69 327 p->edge = this;
jpayne@69 328
jpayne@69 329 p->idy = i->idy / 100.0;
jpayne@69 330 p->sim = i->sim / 100.0;
jpayne@69 331 p->stp = i->stp / 100.0;
jpayne@69 332
jpayne@69 333 p->idyc = i->idyc;
jpayne@69 334 p->simc = i->simc;
jpayne@69 335 p->stpc = i->stpc;
jpayne@69 336
jpayne@69 337 p->loR = i->sR;
jpayne@69 338 p->hiR = i->eR;
jpayne@69 339 p->loQ = i->sQ;
jpayne@69 340 p->hiQ = i->eQ;
jpayne@69 341
jpayne@69 342 p->dirR = p->hiR < p->loR ? REVERSE_DIR : FORWARD_DIR;
jpayne@69 343 p->dirQ = p->hiQ < p->loQ ? REVERSE_DIR : FORWARD_DIR;
jpayne@69 344
jpayne@69 345 //-- Get the delta information
jpayne@69 346 for ( di = i->deltas.begin(); di != i->deltas.end(); ++ di )
jpayne@69 347 {
jpayne@69 348 ss << *di << '\n';
jpayne@69 349 p->delta.append (ss.str());
jpayne@69 350 ss.str ("");
jpayne@69 351 }
jpayne@69 352
jpayne@69 353 //-- Force loR < hiR && loQ < hiQ
jpayne@69 354 if ( p->dirR == REVERSE_DIR )
jpayne@69 355 Swap (p->loR, p->hiR);
jpayne@69 356 if ( p->dirQ == REVERSE_DIR )
jpayne@69 357 Swap (p->loQ, p->hiQ);
jpayne@69 358
jpayne@69 359 edgelets.push_back (p);
jpayne@69 360 }
jpayne@69 361 }
jpayne@69 362
jpayne@69 363
jpayne@69 364 //===================================================== DeltaGraph_t ===========
jpayne@69 365 //----------------------------------------------------- build ------------------
jpayne@69 366 //! \brief Build a new graph from a deltafile
jpayne@69 367 //!
jpayne@69 368 //! Does not populate:
jpayne@69 369 //! node->seq
jpayne@69 370 //! edgelet->frmR/frmQ
jpayne@69 371 //! edgelet->snps
jpayne@69 372 //!
jpayne@69 373 //! \param deltapath The path of the deltafile to read
jpayne@69 374 //! \param getdeltas Read the delta-encoded gap positions? yes/no
jpayne@69 375 //! \return void
jpayne@69 376 //!
jpayne@69 377 void DeltaGraph_t::build (const string & deltapath, bool getdeltas)
jpayne@69 378 {
jpayne@69 379 DeltaReader_t dr;
jpayne@69 380 DeltaEdge_t * dep;
jpayne@69 381 pair<map<string, DeltaNode_t>::iterator, bool> insret;
jpayne@69 382
jpayne@69 383
jpayne@69 384 //-- Open the delta file and read in the alignment information
jpayne@69 385 dr.open (deltapath);
jpayne@69 386
jpayne@69 387 refpath = dr.getReferencePath();
jpayne@69 388 qrypath = dr.getQueryPath();
jpayne@69 389
jpayne@69 390 if ( dr.getDataType() == NUCMER_STRING )
jpayne@69 391 datatype = NUCMER_DATA;
jpayne@69 392 else if ( dr.getDataType() == PROMER_STRING )
jpayne@69 393 datatype = PROMER_DATA;
jpayne@69 394 else
jpayne@69 395 datatype = NULL_DATA;
jpayne@69 396
jpayne@69 397 //-- Read in the next graph edge, i.e. a new delta record
jpayne@69 398 while ( dr.readNext (getdeltas) )
jpayne@69 399 {
jpayne@69 400 dep = new DeltaEdge_t();
jpayne@69 401
jpayne@69 402
jpayne@69 403 //-- Find the reference node in the graph, add a new one if necessary
jpayne@69 404 insret = refnodes.insert
jpayne@69 405 (map<string, DeltaNode_t>::value_type
jpayne@69 406 (dr.getRecord().idR, DeltaNode_t()));
jpayne@69 407 dep->refnode = &((insret.first)->second);
jpayne@69 408
jpayne@69 409 //-- If a new reference node
jpayne@69 410 if ( insret.second )
jpayne@69 411 {
jpayne@69 412 dep->refnode->id = &((insret.first)->first);
jpayne@69 413 dep->refnode->len = dr.getRecord().lenR;
jpayne@69 414 }
jpayne@69 415
jpayne@69 416
jpayne@69 417 //-- Find the query node in the graph, add a new one if necessary
jpayne@69 418 insret = qrynodes.insert
jpayne@69 419 (map<string, DeltaNode_t>::value_type
jpayne@69 420 (dr.getRecord().idQ, DeltaNode_t()));
jpayne@69 421 dep->qrynode = &((insret.first)->second);
jpayne@69 422
jpayne@69 423 //-- If a new query node
jpayne@69 424 if ( insret.second )
jpayne@69 425 {
jpayne@69 426 dep->qrynode->id = &((insret.first)->first);
jpayne@69 427 dep->qrynode->len = dr.getRecord().lenQ;
jpayne@69 428 }
jpayne@69 429
jpayne@69 430
jpayne@69 431 //-- Build the edge
jpayne@69 432 dep->build (dr.getRecord());
jpayne@69 433 dep->refnode->edges.push_back (dep);
jpayne@69 434 dep->qrynode->edges.push_back (dep);
jpayne@69 435 }
jpayne@69 436 dr.close ();
jpayne@69 437 }
jpayne@69 438
jpayne@69 439
jpayne@69 440 //------------------------------------------------------------------- clean ----
jpayne@69 441 //! \brief Clean the graph of all edgelets where isGOOD = false
jpayne@69 442 //!
jpayne@69 443 //! Removes all edgelets from the graph where isGOOD = false. Afterwhich, all
jpayne@69 444 //! now empty edges or nodes are also removed.
jpayne@69 445 //!
jpayne@69 446 //! \return void
jpayne@69 447 //!
jpayne@69 448 void DeltaGraph_t::clean()
jpayne@69 449 {
jpayne@69 450 map<string, DeltaNode_t>::iterator i;
jpayne@69 451 map<string, DeltaNode_t>::iterator ii;
jpayne@69 452 vector<DeltaEdge_t *>::iterator j;
jpayne@69 453 vector<DeltaEdgelet_t *>::iterator k;
jpayne@69 454
jpayne@69 455 //-- For all ref nodes
jpayne@69 456 for ( i = refnodes.begin(); i != refnodes.end(); )
jpayne@69 457 {
jpayne@69 458 //-- For all edges
jpayne@69 459 for ( j = i->second.edges.begin();
jpayne@69 460 j != i->second.edges.end(); ++ j )
jpayne@69 461 {
jpayne@69 462 //-- For all edgelets
jpayne@69 463 for ( k = (*j)->edgelets.begin();
jpayne@69 464 k != (*j)->edgelets.end(); ++ k )
jpayne@69 465 {
jpayne@69 466 if ( ! (*k)->isGOOD )
jpayne@69 467 {
jpayne@69 468 //-- Delete the bad edgelet and mark for erasure
jpayne@69 469 delete (*k);
jpayne@69 470 *k = NULL;
jpayne@69 471 }
jpayne@69 472 }
jpayne@69 473
jpayne@69 474 //-- Erase the marked edgelets
jpayne@69 475 k = partition ((*j)->edgelets.begin(),
jpayne@69 476 (*j)->edgelets.end(),
jpayne@69 477 NULLPred_t());
jpayne@69 478 (*j)->edgelets.erase (k, (*j)->edgelets.end());
jpayne@69 479
jpayne@69 480 //-- Mark the edge if empty
jpayne@69 481 if ( (*j)->edgelets.empty() )
jpayne@69 482 *j = NULL;
jpayne@69 483 }
jpayne@69 484
jpayne@69 485 //-- Erase the marked edges
jpayne@69 486 j = partition (i->second.edges.begin(),
jpayne@69 487 i->second.edges.end(),
jpayne@69 488 NULLPred_t());
jpayne@69 489 i->second.edges.erase (j, i->second.edges.end());
jpayne@69 490
jpayne@69 491 //-- Erase the node if empty
jpayne@69 492 ii = i ++;
jpayne@69 493 if ( ii->second.edges.empty() )
jpayne@69 494 refnodes.erase (ii);
jpayne@69 495 }
jpayne@69 496
jpayne@69 497 //-- For all qry nodes
jpayne@69 498 for ( i = qrynodes.begin(); i != qrynodes.end(); )
jpayne@69 499 {
jpayne@69 500 for ( j = i->second.edges.begin();
jpayne@69 501 j != i->second.edges.end(); ++ j )
jpayne@69 502 {
jpayne@69 503 //-- Delete the edge if empty and mark for erasure
jpayne@69 504 if ( (*j)->edgelets.empty() )
jpayne@69 505 {
jpayne@69 506 delete (*j);
jpayne@69 507 *j = NULL;
jpayne@69 508 }
jpayne@69 509 }
jpayne@69 510
jpayne@69 511 //-- Erase the marked edges
jpayne@69 512 j = partition (i->second.edges.begin(),
jpayne@69 513 i->second.edges.end(),
jpayne@69 514 NULLPred_t());
jpayne@69 515 i->second.edges.erase (j, i->second.edges.end());
jpayne@69 516
jpayne@69 517 //-- Erase the node if empty
jpayne@69 518 ii = i ++;
jpayne@69 519 if ( ii->second.edges.empty() )
jpayne@69 520 qrynodes.erase (ii);
jpayne@69 521 }
jpayne@69 522
jpayne@69 523 }
jpayne@69 524
jpayne@69 525
jpayne@69 526 //------------------------------------------------------------------- clear ----
jpayne@69 527 //! \brief Clear the graph of all nodes, edges and edgelets
jpayne@69 528 //!
jpayne@69 529 //! \return void
jpayne@69 530 //!
jpayne@69 531 void DeltaGraph_t::clear ( )
jpayne@69 532 {
jpayne@69 533 //-- Make sure the edges only get destructed once
jpayne@69 534 std::map<std::string, DeltaNode_t>::iterator i;
jpayne@69 535 std::vector<DeltaEdge_t *>::iterator j;
jpayne@69 536 for ( i = refnodes . begin( ); i != refnodes . end( ); ++ i )
jpayne@69 537 for ( j = i -> second . edges . begin( );
jpayne@69 538 j != i -> second . edges . end( ); ++ j )
jpayne@69 539 delete (*j);
jpayne@69 540
jpayne@69 541 refnodes.clear( );
jpayne@69 542 qrynodes.clear( );
jpayne@69 543 refpath.erase( );
jpayne@69 544 qrypath.erase( );
jpayne@69 545 datatype = NULL_DATA;
jpayne@69 546 }
jpayne@69 547
jpayne@69 548
jpayne@69 549 //------------------------------------------------------------ getNodeCount ----
jpayne@69 550 //! \brief Counts and returns the number of graph nodes (sequences)
jpayne@69 551 //!
jpayne@69 552 //! \return The number of graph nodes
jpayne@69 553 //!
jpayne@69 554 long DeltaGraph_t::getNodeCount()
jpayne@69 555 {
jpayne@69 556 long sum = refnodes.size() + qrynodes.size();
jpayne@69 557 return sum;
jpayne@69 558 }
jpayne@69 559
jpayne@69 560
jpayne@69 561 //------------------------------------------------------------ getEdgeCount ----
jpayne@69 562 //! \brief Counts and returns the number of graph edges
jpayne@69 563 //!
jpayne@69 564 //! \return void
jpayne@69 565 //!
jpayne@69 566 long DeltaGraph_t::getEdgeCount()
jpayne@69 567 {
jpayne@69 568 long sum = 0;
jpayne@69 569
jpayne@69 570 map<string, DeltaNode_t>::iterator i;
jpayne@69 571 for ( i = refnodes.begin(); i != refnodes.end(); ++ i )
jpayne@69 572 sum += i->second.edges.size();
jpayne@69 573
jpayne@69 574 return sum;
jpayne@69 575 }
jpayne@69 576
jpayne@69 577
jpayne@69 578 //--------------------------------------------------------- getEdgeletCount ----
jpayne@69 579 //! \brief Counts and returns the number of graph edgelets (alignments)
jpayne@69 580 //!
jpayne@69 581 //! \return void
jpayne@69 582 //!
jpayne@69 583 long DeltaGraph_t::getEdgeletCount()
jpayne@69 584 {
jpayne@69 585 long sum = 0;
jpayne@69 586
jpayne@69 587 map<string, DeltaNode_t>::iterator i;
jpayne@69 588 vector<DeltaEdge_t *>::iterator j;
jpayne@69 589 for ( i = refnodes.begin(); i != refnodes.end(); ++ i )
jpayne@69 590 for ( j = i->second.edges.begin();
jpayne@69 591 j != i->second.edges.end(); ++ j )
jpayne@69 592 sum += (*j)->edgelets.size();
jpayne@69 593
jpayne@69 594 return sum;
jpayne@69 595 }
jpayne@69 596
jpayne@69 597
jpayne@69 598 //---------------------------------------------------------------- flagGOOD ----
jpayne@69 599 //! \brief Sets isGOOD = 1 for all edgelets
jpayne@69 600 void DeltaGraph_t::flagGOOD()
jpayne@69 601 {
jpayne@69 602 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 603 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 604 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 605
jpayne@69 606 //-- All references
jpayne@69 607 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 608 {
jpayne@69 609 //-- All queries matching this reference
jpayne@69 610 for ( ei = (mi->second).edges.begin();
jpayne@69 611 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 612 {
jpayne@69 613 //-- All alignments between reference and query
jpayne@69 614 for ( eli = (*ei)->edgelets.begin();
jpayne@69 615 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 616 {
jpayne@69 617 (*eli)->isGOOD = true;
jpayne@69 618 }
jpayne@69 619 }
jpayne@69 620 }
jpayne@69 621 }
jpayne@69 622
jpayne@69 623
jpayne@69 624 //---------------------------------------------------------------- flag1to1 ----
jpayne@69 625 //! \brief Intersection of flagQLIS and flagRLIS
jpayne@69 626 void DeltaGraph_t::flag1to1(float epsilon, float maxolap)
jpayne@69 627 {
jpayne@69 628 flagRLIS(epsilon, maxolap, false);
jpayne@69 629 flagQLIS(epsilon, maxolap, false);
jpayne@69 630
jpayne@69 631 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 632 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 633 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 634
jpayne@69 635 //-- All references
jpayne@69 636 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 637 {
jpayne@69 638 //-- All queries matching this reference
jpayne@69 639 for ( ei = (mi->second).edges.begin();
jpayne@69 640 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 641 {
jpayne@69 642 //-- All alignments between reference and query
jpayne@69 643 for ( eli = (*ei)->edgelets.begin();
jpayne@69 644 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 645 {
jpayne@69 646 if ( !(*eli)->isRLIS || !(*eli)->isQLIS )
jpayne@69 647 (*eli)->isGOOD = false;
jpayne@69 648 }
jpayne@69 649 }
jpayne@69 650 }
jpayne@69 651 }
jpayne@69 652
jpayne@69 653
jpayne@69 654 //---------------------------------------------------------------- flagMtoM ----
jpayne@69 655 //! \brief Union of flagQLIS and flagRLIS
jpayne@69 656 void DeltaGraph_t::flagMtoM(float epsilon, float maxolap)
jpayne@69 657 {
jpayne@69 658 flagRLIS(epsilon, maxolap, false);
jpayne@69 659 flagQLIS(epsilon, maxolap, false);
jpayne@69 660
jpayne@69 661 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 662 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 663 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 664
jpayne@69 665 //-- All references
jpayne@69 666 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 667 {
jpayne@69 668 //-- All queries matching this reference
jpayne@69 669 for ( ei = (mi->second).edges.begin();
jpayne@69 670 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 671 {
jpayne@69 672 //-- All alignments between reference and query
jpayne@69 673 for ( eli = (*ei)->edgelets.begin();
jpayne@69 674 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 675 {
jpayne@69 676 if ( !(*eli)->isRLIS && !(*eli)->isQLIS )
jpayne@69 677 (*eli)->isGOOD = false;
jpayne@69 678 }
jpayne@69 679 }
jpayne@69 680 }
jpayne@69 681 }
jpayne@69 682
jpayne@69 683
jpayne@69 684 //-------------------------------------------------------------- flagGLIS ------
jpayne@69 685 //! \brief Flag edgelets in the global LIS and unflag those who are not
jpayne@69 686 //!
jpayne@69 687 //! Runs a length*identity weigthed LIS to determine the longest, mutually
jpayne@69 688 //! consistent set of alignments between all pairs of sequences. This
jpayne@69 689 //! essentially constructs the global alignment between all sequence pairs.
jpayne@69 690 //!
jpayne@69 691 //! Sets isGLIS flag for good and unsets isGOOD flag for bad.
jpayne@69 692 //!
jpayne@69 693 //! \param epsilon Keep repeat alignments within epsilon % of the best align
jpayne@69 694 //! \return void
jpayne@69 695 //!
jpayne@69 696 void DeltaGraph_t::flagGLIS (float epsilon)
jpayne@69 697 {
jpayne@69 698 LIS_t * lis = NULL;
jpayne@69 699 long lis_size = 0;
jpayne@69 700 long i, j, n;
jpayne@69 701 long olap, olapQ, olapR, len, lenQ, lenR, score, diff;
jpayne@69 702
jpayne@69 703 vector<DeltaEdgelet_t *> edgelets;
jpayne@69 704
jpayne@69 705 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 706 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 707 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 708
jpayne@69 709
jpayne@69 710 //-- For each reference sequence
jpayne@69 711 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 712 {
jpayne@69 713 //-- For each query aligning to this reference
jpayne@69 714 for ( ei = (mi->second).edges.begin();
jpayne@69 715 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 716 {
jpayne@69 717 //-- Collect all the good edgelets
jpayne@69 718 edgelets.clear();
jpayne@69 719 for ( eli = (*ei)->edgelets.begin();
jpayne@69 720 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 721 if ( (*eli)->isGOOD )
jpayne@69 722 {
jpayne@69 723 edgelets.push_back (*eli);
jpayne@69 724
jpayne@69 725 //-- Fix the coordinates to make global LIS work
jpayne@69 726 if ( (*eli)->dirR == (*eli)->dirQ )
jpayne@69 727 {
jpayne@69 728 (*eli)->dirQ = FORWARD_DIR;
jpayne@69 729 }
jpayne@69 730 else
jpayne@69 731 {
jpayne@69 732 if ( (*eli)->dirQ == REVERSE_DIR )
jpayne@69 733 Swap ((*eli)->loQ, (*eli)->hiQ);
jpayne@69 734 (*eli)->loQ = RevC ((*eli)->loQ, (*ei)->qrynode->len);
jpayne@69 735 (*eli)->hiQ = RevC ((*eli)->hiQ, (*ei)->qrynode->len);
jpayne@69 736 (*eli)->dirQ = REVERSE_DIR;
jpayne@69 737 }
jpayne@69 738 }
jpayne@69 739
jpayne@69 740 //-- Resize and initialize
jpayne@69 741 n = edgelets.size();
jpayne@69 742 if ( n > lis_size )
jpayne@69 743 {
jpayne@69 744 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n);
jpayne@69 745 lis_size = n;
jpayne@69 746 }
jpayne@69 747 for ( i = 0; i < n; ++ i )
jpayne@69 748 lis[i].used = false;
jpayne@69 749
jpayne@69 750 //-- Sort by lo query coord
jpayne@69 751 sort (edgelets.begin(), edgelets.end(), EdgeletQCmp_t());
jpayne@69 752
jpayne@69 753 //-- Continue until all equivalent repeats are extracted
jpayne@69 754 vector<long> allbest;
jpayne@69 755 do
jpayne@69 756 {
jpayne@69 757 //-- Dynamic
jpayne@69 758 for ( i = 0; i < n; ++ i )
jpayne@69 759 {
jpayne@69 760 if ( lis[i].used ) continue;
jpayne@69 761
jpayne@69 762 lis[i].a = edgelets[i];
jpayne@69 763
jpayne@69 764 lenR = lis[i].a->hiR - lis[i].a->loR + 1;
jpayne@69 765 lenQ = lis[i].a->hiQ - lis[i].a->loQ + 1;
jpayne@69 766 len = lenR > lenQ ? lenQ : lenR;
jpayne@69 767 lis[i].score = ScoreGlobal (0, len, 0, lis[i].a->idy);
jpayne@69 768
jpayne@69 769 lis[i].from = -1;
jpayne@69 770 lis[i].diff = 0;
jpayne@69 771
jpayne@69 772 for ( j = 0; j < i; ++ j )
jpayne@69 773 {
jpayne@69 774 if ( lis[j].used ) continue;
jpayne@69 775
jpayne@69 776 if ( lis[i].a->dirQ != lis[j].a->dirQ )
jpayne@69 777 continue;
jpayne@69 778
jpayne@69 779 lenR = lis[i].a->hiR - lis[i].a->loR + 1;
jpayne@69 780 lenQ = lis[i].a->hiQ - lis[i].a->loQ + 1;
jpayne@69 781 len = lenR > lenQ ? lenQ : lenR;
jpayne@69 782
jpayne@69 783 olapR = lis[j].a->hiR - lis[i].a->loR + 1;
jpayne@69 784 olapQ = lis[j].a->hiQ - lis[i].a->loQ + 1;
jpayne@69 785 olap = olapR > olapQ ? olapR : olapQ;
jpayne@69 786 if ( olap < 0 )
jpayne@69 787 olap = 0;
jpayne@69 788
jpayne@69 789 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a);
jpayne@69 790
jpayne@69 791 score = ScoreGlobal
jpayne@69 792 (lis[j].score, len, olap, lis[i].a->idy);
jpayne@69 793
jpayne@69 794 if ( score > lis[i].score
jpayne@69 795 ||
jpayne@69 796 (score == lis[i].score && diff < lis[i].diff) )
jpayne@69 797 {
jpayne@69 798 lis[i].from = j;
jpayne@69 799 lis[i].score = score;
jpayne@69 800 lis[i].diff = diff;
jpayne@69 801 }
jpayne@69 802 }
jpayne@69 803 }
jpayne@69 804 } while ( UpdateBest (lis, n, allbest, epsilon) );
jpayne@69 805
jpayne@69 806 long beg = PickBest (lis, allbest, epsilon);
jpayne@69 807 long end = allbest.size();
jpayne@69 808 if ( beg == end ) beg = 0;
jpayne@69 809 else end = beg + 1;
jpayne@69 810
jpayne@69 811 //-- Flag the edgelets
jpayne@69 812 for ( ; beg < end; ++ beg )
jpayne@69 813 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from )
jpayne@69 814 lis[i].a->isGLIS = true;
jpayne@69 815
jpayne@69 816 //-- Repair the coordinates
jpayne@69 817 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
jpayne@69 818 {
jpayne@69 819 if ( ! (*eli)->isGLIS )
jpayne@69 820 (*eli)->isGOOD = false;
jpayne@69 821
jpayne@69 822 if ( (*eli)->dirQ == FORWARD_DIR )
jpayne@69 823 {
jpayne@69 824 (*eli)->dirQ = (*eli)->dirR;
jpayne@69 825 }
jpayne@69 826 else
jpayne@69 827 {
jpayne@69 828 if ( (*eli)->dirR == FORWARD_DIR )
jpayne@69 829 Swap ((*eli)->loQ, (*eli)->hiQ);
jpayne@69 830 (*eli)->loQ = RevC ((*eli)->loQ, (*ei)->qrynode->len);
jpayne@69 831 (*eli)->hiQ = RevC ((*eli)->hiQ, (*ei)->qrynode->len);
jpayne@69 832 (*eli)->dirQ =
jpayne@69 833 (*eli)->dirR == FORWARD_DIR ? REVERSE_DIR : FORWARD_DIR;
jpayne@69 834 }
jpayne@69 835 }
jpayne@69 836 }
jpayne@69 837 }
jpayne@69 838
jpayne@69 839 free (lis);
jpayne@69 840 }
jpayne@69 841
jpayne@69 842
jpayne@69 843 //-------------------------------------------------------------- flagQLIS ------
jpayne@69 844 //! \brief Flag edgelets in the query LIS and unflag those who are not
jpayne@69 845 //!
jpayne@69 846 //! Runs a length*identity weighted LIS to determine the longest, QUERY
jpayne@69 847 //! consistent set of alignments for all query sequences. This effectively
jpayne@69 848 //! identifies the "best" alignments for all positions of each query.
jpayne@69 849 //!
jpayne@69 850 //! Sets isQLIS flag for good and unsets isGOOD flag for bad.
jpayne@69 851 //!
jpayne@69 852 //! \param epsilon Keep repeat alignments within epsilon % of the best align
jpayne@69 853 //! \param maxolap Only allow alignments to overlap by maxolap percent [0-100]
jpayne@69 854 //! \param flagBad Flag non QLIS edgelets as !isGOOD
jpayne@69 855 //! \return void
jpayne@69 856 //!
jpayne@69 857 void DeltaGraph_t::flagQLIS (float epsilon, float maxolap, bool flagbad)
jpayne@69 858 {
jpayne@69 859 LIS_t * lis = NULL;
jpayne@69 860 long lis_size = 0;
jpayne@69 861 long i, j, n;
jpayne@69 862 long olap, leni, lenj, score, diff;
jpayne@69 863
jpayne@69 864 vector<DeltaEdgelet_t *> edgelets;
jpayne@69 865
jpayne@69 866 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 867 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 868 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 869
jpayne@69 870
jpayne@69 871 //-- For each query sequence
jpayne@69 872 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
jpayne@69 873 {
jpayne@69 874 //-- Collect all the good edgelets
jpayne@69 875 edgelets.clear();
jpayne@69 876 for ( ei = (mi->second).edges.begin();
jpayne@69 877 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 878 for ( eli = (*ei)->edgelets.begin();
jpayne@69 879 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 880 if ( (*eli)->isGOOD )
jpayne@69 881 edgelets.push_back (*eli);
jpayne@69 882
jpayne@69 883 //-- Resize and initialize
jpayne@69 884 n = edgelets.size();
jpayne@69 885 if ( n > lis_size )
jpayne@69 886 {
jpayne@69 887 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n);
jpayne@69 888 lis_size = n;
jpayne@69 889 }
jpayne@69 890 for ( i = 0; i < n; ++ i )
jpayne@69 891 lis[i].used = false;
jpayne@69 892
jpayne@69 893 //-- Sort by lo query coord
jpayne@69 894 sort (edgelets.begin(), edgelets.end(), EdgeletQCmp_t());
jpayne@69 895
jpayne@69 896 //-- Continue until all equivalent repeats are extracted
jpayne@69 897 vector<long> allbest;
jpayne@69 898 do
jpayne@69 899 {
jpayne@69 900 //-- Dynamic
jpayne@69 901 for ( i = 0; i < n; ++ i )
jpayne@69 902 {
jpayne@69 903 if ( lis[i].used ) continue;
jpayne@69 904
jpayne@69 905 lis[i].a = edgelets[i];
jpayne@69 906
jpayne@69 907 leni = lis[i].a->hiQ - lis[i].a->loQ + 1;
jpayne@69 908 lis[i].score =
jpayne@69 909 ScoreLocal (0, leni, 0, 0, lis[i].a->idy, 0);
jpayne@69 910
jpayne@69 911 lis[i].from = -1;
jpayne@69 912 lis[i].diff = 0;
jpayne@69 913
jpayne@69 914 for ( j = 0; j < i; ++ j )
jpayne@69 915 {
jpayne@69 916 if ( lis[j].used ) continue;
jpayne@69 917 if ( lis[j].from >= 0 &&
jpayne@69 918 lis[lis[j].from].a->hiQ >= lis[i].a->loQ )
jpayne@69 919 continue;
jpayne@69 920
jpayne@69 921 leni = lis[i].a->hiQ - lis[i].a->loQ + 1;
jpayne@69 922 lenj = lis[j].a->hiQ - lis[j].a->loQ + 1;
jpayne@69 923 olap = lis[j].a->hiQ - lis[i].a->loQ + 1;
jpayne@69 924 if ( olap < 0 )
jpayne@69 925 olap = 0;
jpayne@69 926
jpayne@69 927 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a);
jpayne@69 928
jpayne@69 929 score = ScoreLocal
jpayne@69 930 (lis[j].score, leni, lenj, olap, lis[i].a->idy, maxolap);
jpayne@69 931
jpayne@69 932 if ( score > lis[i].score
jpayne@69 933 ||
jpayne@69 934 (score == lis[i].score && diff < lis[i].diff) )
jpayne@69 935 {
jpayne@69 936 lis[i].from = j;
jpayne@69 937 lis[i].score = score;
jpayne@69 938 lis[i].diff = diff;
jpayne@69 939 }
jpayne@69 940 }
jpayne@69 941 }
jpayne@69 942 } while ( UpdateBest (lis, n, allbest, epsilon) );
jpayne@69 943
jpayne@69 944 long beg = PickBest (lis, allbest, epsilon);
jpayne@69 945 long end = allbest.size();
jpayne@69 946 if ( beg == end ) beg = 0;
jpayne@69 947 else end = beg + 1;
jpayne@69 948
jpayne@69 949 //-- Flag the edgelets
jpayne@69 950 for ( ; beg < end; ++ beg )
jpayne@69 951 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from )
jpayne@69 952 lis[i].a->isQLIS = true;
jpayne@69 953
jpayne@69 954 if ( flagbad )
jpayne@69 955 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
jpayne@69 956 if ( ! (*eli)->isQLIS )
jpayne@69 957 (*eli)->isGOOD = false;
jpayne@69 958 }
jpayne@69 959
jpayne@69 960 free (lis);
jpayne@69 961 }
jpayne@69 962
jpayne@69 963
jpayne@69 964 //-------------------------------------------------------------- flagRLIS ------
jpayne@69 965 //! \brief Flag edgelets in the reference LIS and unflag those who are not
jpayne@69 966 //!
jpayne@69 967 //! Runs a length*identity weighted LIS to determine the longest, REFERENCE
jpayne@69 968 //! consistent set of alignments for all reference sequences. This effectively
jpayne@69 969 //! identifies the "best" alignments for all positions of each reference.
jpayne@69 970 //!
jpayne@69 971 //! Sets isRLIS flag for good and unsets isGOOD flag for bad.
jpayne@69 972 //!
jpayne@69 973 //! \param epsilon Keep repeat alignments within epsilon % of the best align
jpayne@69 974 //! \param maxolap Only allow alignments to overlap by maxolap percent [0-100]
jpayne@69 975 //! \param flagBad Flag non RLIS edgelets as !isGOOD
jpayne@69 976 //! \return void
jpayne@69 977 //!
jpayne@69 978 void DeltaGraph_t::flagRLIS (float epsilon, float maxolap, bool flagbad)
jpayne@69 979 {
jpayne@69 980 LIS_t * lis = NULL;
jpayne@69 981 long lis_size = 0;
jpayne@69 982 long i, j, n;
jpayne@69 983 long olap, leni, lenj, score, diff;
jpayne@69 984
jpayne@69 985 vector<DeltaEdgelet_t *> edgelets;
jpayne@69 986
jpayne@69 987 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 988 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 989 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 990
jpayne@69 991
jpayne@69 992 //-- For each reference sequence
jpayne@69 993 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 994 {
jpayne@69 995 //-- Collect all the good edgelets
jpayne@69 996 edgelets.clear();
jpayne@69 997 for ( ei = (mi->second).edges.begin();
jpayne@69 998 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 999 for ( eli = (*ei)->edgelets.begin();
jpayne@69 1000 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 1001 if ( (*eli)->isGOOD )
jpayne@69 1002 edgelets.push_back (*eli);
jpayne@69 1003
jpayne@69 1004 //-- Resize
jpayne@69 1005 n = edgelets.size();
jpayne@69 1006 if ( n > lis_size )
jpayne@69 1007 {
jpayne@69 1008 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n);
jpayne@69 1009 lis_size = n;
jpayne@69 1010 }
jpayne@69 1011 for ( i = 0; i < n; ++ i )
jpayne@69 1012 lis[i].used = false;
jpayne@69 1013
jpayne@69 1014 //-- Sort by lo reference coord
jpayne@69 1015 sort (edgelets.begin(), edgelets.end(), EdgeletRCmp_t());
jpayne@69 1016
jpayne@69 1017 //-- Continue until all equivalent repeats are extracted
jpayne@69 1018 vector<long> allbest;
jpayne@69 1019 do
jpayne@69 1020 {
jpayne@69 1021 //-- Dynamic
jpayne@69 1022 for ( i = 0; i < n; ++ i )
jpayne@69 1023 {
jpayne@69 1024 if ( lis[i].used ) continue;
jpayne@69 1025
jpayne@69 1026 lis[i].a = edgelets[i];
jpayne@69 1027
jpayne@69 1028 leni = lis[i].a->hiR - lis[i].a->loR + 1;
jpayne@69 1029 lis[i].score =
jpayne@69 1030 ScoreLocal (0, leni, 0, 0, lis[i].a->idy, 0);
jpayne@69 1031
jpayne@69 1032 lis[i].from = -1;
jpayne@69 1033 lis[i].diff = 0;
jpayne@69 1034
jpayne@69 1035 for ( j = 0; j < i; ++ j )
jpayne@69 1036 {
jpayne@69 1037 if ( lis[j].used ) continue;
jpayne@69 1038 if ( lis[j].from >= 0 &&
jpayne@69 1039 lis[lis[j].from].a->hiR >= lis[i].a->loR )
jpayne@69 1040 continue;
jpayne@69 1041
jpayne@69 1042 leni = lis[i].a->hiR - lis[i].a->loR + 1;
jpayne@69 1043 lenj = lis[j].a->hiR - lis[j].a->loR + 1;
jpayne@69 1044 olap = lis[j].a->hiR - lis[i].a->loR + 1;
jpayne@69 1045 if ( olap < 0 )
jpayne@69 1046 olap = 0;
jpayne@69 1047
jpayne@69 1048 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a);
jpayne@69 1049
jpayne@69 1050 score = ScoreLocal
jpayne@69 1051 (lis[j].score, leni, lenj, olap, lis[i].a->idy, maxolap);
jpayne@69 1052
jpayne@69 1053 if ( score > lis[i].score
jpayne@69 1054 ||
jpayne@69 1055 (score == lis[i].score && diff < lis[i].diff) )
jpayne@69 1056 {
jpayne@69 1057 lis[i].from = j;
jpayne@69 1058 lis[i].score = score;
jpayne@69 1059 lis[i].diff = diff;
jpayne@69 1060 }
jpayne@69 1061 }
jpayne@69 1062 }
jpayne@69 1063 } while ( UpdateBest (lis, n, allbest, epsilon) );
jpayne@69 1064
jpayne@69 1065 long beg = PickBest (lis, allbest, epsilon);
jpayne@69 1066 long end = allbest.size();
jpayne@69 1067 if ( beg == end ) beg = 0;
jpayne@69 1068 else end = beg + 1;
jpayne@69 1069
jpayne@69 1070 //-- Flag the edgelets
jpayne@69 1071 for ( ; beg < end; ++ beg )
jpayne@69 1072 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from )
jpayne@69 1073 lis[i].a->isRLIS = true;
jpayne@69 1074
jpayne@69 1075 if ( flagbad )
jpayne@69 1076 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
jpayne@69 1077 if ( ! (*eli)->isRLIS )
jpayne@69 1078 (*eli)->isGOOD = false;
jpayne@69 1079 }
jpayne@69 1080
jpayne@69 1081 free (lis);
jpayne@69 1082 }
jpayne@69 1083
jpayne@69 1084
jpayne@69 1085 //------------------------------------------------------------- flagScore ------
jpayne@69 1086 //! \brief Flag edgelets with scores below a score threshold
jpayne@69 1087 //!
jpayne@69 1088 //! Unsets isGOOD for bad.
jpayne@69 1089 //!
jpayne@69 1090 //! \param minlen Flag edgelets if less than minlen in length
jpayne@69 1091 //! \param minidy Flag edgelets if less than minidy identity [0-100]
jpayne@69 1092 //! \return void
jpayne@69 1093 //!
jpayne@69 1094 void DeltaGraph_t::flagScore (long minlen, float minidy)
jpayne@69 1095 {
jpayne@69 1096 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 1097 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 1098 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 1099
jpayne@69 1100 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 1101 for ( ei = (mi->second).edges.begin();
jpayne@69 1102 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 1103 for ( eli = (*ei)->edgelets.begin();
jpayne@69 1104 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 1105 if ( (*eli)->isGOOD )
jpayne@69 1106 {
jpayne@69 1107 //-- Flag low identities
jpayne@69 1108 if ( (*eli)->idy * 100.0 < minidy )
jpayne@69 1109 (*eli)->isGOOD = false;
jpayne@69 1110
jpayne@69 1111 //-- Flag small lengths
jpayne@69 1112 if ( (*eli)->hiR - (*eli)->loR + 1 < minlen ||
jpayne@69 1113 (*eli)->hiQ - (*eli)->loQ + 1 < minlen )
jpayne@69 1114 (*eli)->isGOOD = false;
jpayne@69 1115 }
jpayne@69 1116 }
jpayne@69 1117
jpayne@69 1118
jpayne@69 1119 //-------------------------------------------------------------- flagUNIQ ------
jpayne@69 1120 //! \brief Flag edgelets with uniqueness below a certain threshold
jpayne@69 1121 //!
jpayne@69 1122 //! Unsets isGOOD for bad.
jpayne@69 1123 //!
jpayne@69 1124 //! \param minuniq Flag edgelets if less that minuniq percent [0-100] unique
jpayne@69 1125 //! \return void
jpayne@69 1126 //!
jpayne@69 1127 void DeltaGraph_t::flagUNIQ (float minuniq)
jpayne@69 1128 {
jpayne@69 1129 long i, uniq, len;
jpayne@69 1130
jpayne@69 1131 vector<DeltaEdgelet_t *> edgelets;
jpayne@69 1132
jpayne@69 1133 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 1134 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 1135 vector<DeltaEdgelet_t *>::iterator eli;
jpayne@69 1136
jpayne@69 1137
jpayne@69 1138 //-- For each reference sequence
jpayne@69 1139 long ref_size = 0;
jpayne@69 1140 long ref_len = 0;
jpayne@69 1141 unsigned char * ref_cov = NULL;
jpayne@69 1142 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 1143 {
jpayne@69 1144 //-- Reset the reference coverage array
jpayne@69 1145 ref_len = (mi->second).len;
jpayne@69 1146 if ( ref_len > ref_size )
jpayne@69 1147 {
jpayne@69 1148 ref_cov = (unsigned char *) Safe_realloc (ref_cov, ref_len + 1);
jpayne@69 1149 ref_size = ref_len;
jpayne@69 1150 }
jpayne@69 1151 for ( i = 1; i <= ref_len; ++ i )
jpayne@69 1152 ref_cov[i] = 0;
jpayne@69 1153
jpayne@69 1154 //-- Collect all the good edgelets
jpayne@69 1155 edgelets.clear();
jpayne@69 1156 for ( ei = (mi->second).edges.begin();
jpayne@69 1157 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 1158 for ( eli = (*ei)->edgelets.begin();
jpayne@69 1159 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 1160 if ( (*eli)->isGOOD )
jpayne@69 1161 {
jpayne@69 1162 edgelets.push_back (*eli);
jpayne@69 1163
jpayne@69 1164 //-- Add to the reference coverage
jpayne@69 1165 for ( i = (*eli)->loR; i <= (*eli)->hiR; i ++ )
jpayne@69 1166 if ( ref_cov[i] < UCHAR_MAX )
jpayne@69 1167 ref_cov[i] ++;
jpayne@69 1168 }
jpayne@69 1169
jpayne@69 1170 //-- Calculate the uniqueness of each edgelet
jpayne@69 1171 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
jpayne@69 1172 {
jpayne@69 1173 uniq = 0;
jpayne@69 1174 len = (*eli)->hiR - (*eli)->loR + 1;
jpayne@69 1175 for ( i = (*eli)->loR; i <= (*eli)->hiR; i ++ )
jpayne@69 1176 if ( ref_cov[i] == 1 )
jpayne@69 1177 uniq ++;
jpayne@69 1178
jpayne@69 1179 //-- Flag low reference uniqueness
jpayne@69 1180 if ( (float)uniq / (float)len * 100.0 < minuniq )
jpayne@69 1181 (*eli)->isGOOD = false;
jpayne@69 1182 }
jpayne@69 1183 }
jpayne@69 1184 free (ref_cov);
jpayne@69 1185
jpayne@69 1186
jpayne@69 1187 //-- For each query sequence
jpayne@69 1188 long qry_size = 0;
jpayne@69 1189 long qry_len = 0;
jpayne@69 1190 unsigned char * qry_cov = NULL;
jpayne@69 1191 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
jpayne@69 1192 {
jpayne@69 1193 //-- Reset the query coverage array
jpayne@69 1194 qry_len = (mi->second).len;
jpayne@69 1195 if ( qry_len > qry_size )
jpayne@69 1196 {
jpayne@69 1197 qry_cov = (unsigned char *) Safe_realloc (qry_cov, qry_len + 1);
jpayne@69 1198 qry_size = qry_len;
jpayne@69 1199 }
jpayne@69 1200 for ( i = 1; i <= qry_len; ++ i )
jpayne@69 1201 qry_cov[i] = 0;
jpayne@69 1202
jpayne@69 1203 //-- Collect all the good edgelets
jpayne@69 1204 edgelets.clear();
jpayne@69 1205 for ( ei = (mi->second).edges.begin();
jpayne@69 1206 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 1207 for ( eli = (*ei)->edgelets.begin();
jpayne@69 1208 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 1209 if ( (*eli)->isGOOD )
jpayne@69 1210 {
jpayne@69 1211 edgelets.push_back (*eli);
jpayne@69 1212
jpayne@69 1213 //-- Add to the query coverage
jpayne@69 1214 for ( i = (*eli)->loQ; i <= (*eli)->hiQ; i ++ )
jpayne@69 1215 if ( qry_cov[i] < UCHAR_MAX )
jpayne@69 1216 qry_cov[i] ++;
jpayne@69 1217 }
jpayne@69 1218
jpayne@69 1219 //-- Calculate the uniqueness of each edgelet
jpayne@69 1220 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
jpayne@69 1221 {
jpayne@69 1222 uniq = 0;
jpayne@69 1223 len = (*eli)->hiQ - (*eli)->loQ + 1;
jpayne@69 1224 for ( i = (*eli)->loQ; i <= (*eli)->hiQ; i ++ )
jpayne@69 1225 if ( qry_cov[i] == 1 )
jpayne@69 1226 uniq ++;
jpayne@69 1227
jpayne@69 1228 //-- Flag low query uniqueness
jpayne@69 1229 if ( (float)uniq / (float)len * 100.0 < minuniq )
jpayne@69 1230 (*eli)->isGOOD = false;
jpayne@69 1231 }
jpayne@69 1232 }
jpayne@69 1233 free (qry_cov);
jpayne@69 1234 }
jpayne@69 1235
jpayne@69 1236
jpayne@69 1237 //----------------------------------------------------- loadSequences ----------
jpayne@69 1238 //! \brief Load the sequence information into the DeltaNodes
jpayne@69 1239 //!
jpayne@69 1240 //! \return void
jpayne@69 1241 //!
jpayne@69 1242 void DeltaGraph_t::loadSequences ()
jpayne@69 1243 {
jpayne@69 1244 map<string, DeltaNode_t>::iterator mi;
jpayne@69 1245
jpayne@69 1246 FILE * qryfile, * reffile;
jpayne@69 1247 char * R = NULL;
jpayne@69 1248 char * Q = NULL;
jpayne@69 1249 char id [MAX_LINE];
jpayne@69 1250 long initsize;
jpayne@69 1251 long len;
jpayne@69 1252
jpayne@69 1253 //-- Read in the reference sequences
jpayne@69 1254 reffile = File_Open (refpath.c_str(), "r");
jpayne@69 1255 initsize = INIT_SIZE;
jpayne@69 1256 R = (char *) Safe_malloc (initsize);
jpayne@69 1257 while ( Read_String (reffile, R, initsize, id, FALSE) )
jpayne@69 1258 if ( (mi = refnodes.find (id)) != refnodes.end() )
jpayne@69 1259 {
jpayne@69 1260 len = strlen (R + 1);
jpayne@69 1261 mi->second.seq = (char *) Safe_malloc (len + 2);
jpayne@69 1262 mi->second.seq[0] = '\0';
jpayne@69 1263 strcpy (mi->second.seq + 1, R + 1);
jpayne@69 1264 if ( len != mi->second.len )
jpayne@69 1265 {
jpayne@69 1266 cerr << "ERROR: Reference input does not match delta file\n";
jpayne@69 1267 exit (EXIT_FAILURE);
jpayne@69 1268 }
jpayne@69 1269 }
jpayne@69 1270 fclose (reffile);
jpayne@69 1271 free (R);
jpayne@69 1272
jpayne@69 1273 //-- Read in the query sequences
jpayne@69 1274 qryfile = File_Open (qrypath.c_str(), "r");
jpayne@69 1275 initsize = INIT_SIZE;
jpayne@69 1276 Q = (char *) Safe_malloc (initsize);
jpayne@69 1277 while ( Read_String (qryfile, Q, initsize, id, FALSE) )
jpayne@69 1278 if ( (mi = qrynodes.find (id)) != qrynodes.end() )
jpayne@69 1279 {
jpayne@69 1280 len = strlen (Q + 1);
jpayne@69 1281 mi->second.seq = (char *) Safe_malloc (len + 2);
jpayne@69 1282 mi->second.seq[0] = '\0';
jpayne@69 1283 strcpy (mi->second.seq + 1, Q + 1);
jpayne@69 1284 if ( len != mi->second.len )
jpayne@69 1285 {
jpayne@69 1286 cerr << "ERROR: Query input does not match delta file\n";
jpayne@69 1287 exit (EXIT_FAILURE);
jpayne@69 1288 }
jpayne@69 1289 }
jpayne@69 1290 fclose (qryfile);
jpayne@69 1291 free (Q);
jpayne@69 1292
jpayne@69 1293
jpayne@69 1294 //-- Check that we found all the sequences
jpayne@69 1295 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
jpayne@69 1296 if ( mi->second.seq == NULL )
jpayne@69 1297 {
jpayne@69 1298 cerr << "ERROR: '" << mi->first << "' not found in reference file\n";
jpayne@69 1299 exit (EXIT_FAILURE);
jpayne@69 1300 }
jpayne@69 1301
jpayne@69 1302 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
jpayne@69 1303 if ( mi->second.seq == NULL )
jpayne@69 1304 {
jpayne@69 1305 cerr << "ERROR: '" << mi->first << "' not found in query file\n";
jpayne@69 1306 exit (EXIT_FAILURE);
jpayne@69 1307 }
jpayne@69 1308 }
jpayne@69 1309
jpayne@69 1310
jpayne@69 1311 //----------------------------------------------------- outputDelta ------------
jpayne@69 1312 //! \brief Outputs the contents of the graph as a deltafile
jpayne@69 1313 //!
jpayne@69 1314 //! \param out The output stream to write to
jpayne@69 1315 //! \return The output stream
jpayne@69 1316 //!
jpayne@69 1317 ostream & DeltaGraph_t::outputDelta (ostream & out)
jpayne@69 1318 {
jpayne@69 1319 bool header;
jpayne@69 1320 long s1, e1, s2, e2;
jpayne@69 1321
jpayne@69 1322 map<string, DeltaNode_t>::const_iterator mi;
jpayne@69 1323 vector<DeltaEdge_t *>::const_iterator ei;
jpayne@69 1324 vector<DeltaEdgelet_t *>::const_iterator eli;
jpayne@69 1325
jpayne@69 1326 //-- Print the file header
jpayne@69 1327 cout
jpayne@69 1328 << refpath << ' ' << qrypath << '\n'
jpayne@69 1329 << (datatype == PROMER_DATA ? PROMER_STRING : NUCMER_STRING) << '\n';
jpayne@69 1330
jpayne@69 1331 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
jpayne@69 1332 {
jpayne@69 1333 for ( ei = (mi->second).edges.begin();
jpayne@69 1334 ei != (mi->second).edges.end(); ++ ei )
jpayne@69 1335 {
jpayne@69 1336 header = false;
jpayne@69 1337
jpayne@69 1338 for ( eli = (*ei)->edgelets.begin();
jpayne@69 1339 eli != (*ei)->edgelets.end(); ++ eli )
jpayne@69 1340 {
jpayne@69 1341 if ( ! (*eli)->isGOOD )
jpayne@69 1342 continue;
jpayne@69 1343
jpayne@69 1344 //-- Print the sequence header
jpayne@69 1345 if ( ! header )
jpayne@69 1346 {
jpayne@69 1347 cout
jpayne@69 1348 << '>'
jpayne@69 1349 << *((*ei)->refnode->id) << ' '
jpayne@69 1350 << *((*ei)->qrynode->id) << ' '
jpayne@69 1351 << (*ei)->refnode->len << ' '
jpayne@69 1352 << (*ei)->qrynode->len << '\n';
jpayne@69 1353 header = true;
jpayne@69 1354 }
jpayne@69 1355 //-- Print the alignment
jpayne@69 1356 s1 = (*eli)->loR;
jpayne@69 1357 e1 = (*eli)->hiR;
jpayne@69 1358 s2 = (*eli)->loQ;
jpayne@69 1359 e2 = (*eli)->hiQ;
jpayne@69 1360 if ( (*eli)->dirR == REVERSE_DIR )
jpayne@69 1361 Swap (s1, e1);
jpayne@69 1362 if ( (*eli)->dirQ == REVERSE_DIR )
jpayne@69 1363 Swap (s2, e2);
jpayne@69 1364
jpayne@69 1365 cout
jpayne@69 1366 << s1 << ' ' << e1 << ' ' << s2 << ' ' << e2 << ' '
jpayne@69 1367 << (*eli)->idyc << ' '
jpayne@69 1368 << (*eli)->simc << ' '
jpayne@69 1369 << (*eli)->stpc << '\n'
jpayne@69 1370 << (*eli)->delta;
jpayne@69 1371 }
jpayne@69 1372 }
jpayne@69 1373 }
jpayne@69 1374 return out;
jpayne@69 1375 }