Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/mummer-3.23/src/tigr/delta.cc @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 //////////////////////////////////////////////////////////////////////////////// | |
2 //! \file | |
3 //! \author Adam M Phillippy | |
4 //! \date 03/26/2003 | |
5 //! | |
6 //! \brief Source for non-inline member functions of delta.hh | |
7 //! | |
8 //! \see delta.hh | |
9 //////////////////////////////////////////////////////////////////////////////// | |
10 | |
11 #include "delta.hh" | |
12 #include <map> | |
13 #include <vector> | |
14 #include <cmath> | |
15 #include <sstream> | |
16 #include <algorithm> | |
17 using namespace std; | |
18 | |
19 | |
20 inline long ScoreLocal | |
21 (long scorej, | |
22 long leni, long lenj, | |
23 long olap, float idyi, float maxolap); | |
24 | |
25 | |
26 struct LIS_t | |
27 //!< LIS score | |
28 { | |
29 DeltaEdgelet_t * a; | |
30 long score; | |
31 long diff; | |
32 long from; | |
33 bool used; | |
34 }; | |
35 | |
36 | |
37 struct EdgeletQCmp_t | |
38 //!< Compares query lo coord | |
39 { | |
40 bool operator() (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const | |
41 { | |
42 //-- Sorting by score in the event of a tie ensures that when building | |
43 // LIS chains, the highest scoring ones get seen first, thus avoiding | |
44 // overlap problems | |
45 | |
46 if ( i->loQ < j->loQ ) | |
47 return true; | |
48 else if ( i->loQ > j->loQ ) | |
49 return false; | |
50 else if ( ScoreLocal (0, i->hiQ - i->loQ + 1, 0, 0, i->idy, 0) > | |
51 ScoreLocal (0, j->hiQ - j->loQ + 1, 0, 0, j->idy, 0) ) | |
52 return true; | |
53 else | |
54 return false; | |
55 } | |
56 }; | |
57 | |
58 | |
59 struct EdgeletRCmp_t | |
60 //!< Compares reference lo coord | |
61 { | |
62 bool operator() (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const | |
63 { | |
64 //-- Sorting by score in the event of a tie ensures that when building | |
65 // LIS chains, the highest scoring ones get seen first, thus avoiding | |
66 // overlap problems | |
67 | |
68 if ( i->loR < j->loR ) | |
69 return true; | |
70 else if ( i->loR > j->loR ) | |
71 return false; | |
72 else if ( ScoreLocal (0, i->hiR - i->loR + 1, 0, 0, i->idy, 0) > | |
73 ScoreLocal (0, j->hiR - j->loR + 1, 0, 0, j->idy, 0) ) | |
74 return true; | |
75 else | |
76 return false; | |
77 } | |
78 }; | |
79 | |
80 | |
81 struct NULLPred_t | |
82 //!< Return true if pointer is not NULL | |
83 { | |
84 bool operator() (const void * i) const | |
85 { return (i != NULL); } | |
86 }; | |
87 | |
88 | |
89 //------------------------------------------------------------ DiffAligns ------ | |
90 inline long DiffAligns | |
91 (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) | |
92 { | |
93 return | |
94 ( ( j->loR < i->loR ) ? labs (j->hiR - i->loR) : labs (i->hiR - j->loR) ) + | |
95 ( ( j->loQ < i->loQ ) ? labs (j->hiQ - i->loQ) : labs (i->hiQ - j->loQ) ); | |
96 } | |
97 | |
98 | |
99 //------------------------------------------------------------ PickBest -------- | |
100 long PickBest | |
101 (const LIS_t * lis, const vector<long> & allbest, float epsilon) | |
102 { | |
103 long size = allbest.size(); | |
104 if ( epsilon < 0 && size != 0 ) | |
105 { | |
106 long eqc = 0; | |
107 for ( ; eqc < size; ++ eqc ) | |
108 if ( lis[allbest[eqc]].diff != lis[allbest.front()].diff ) | |
109 break; | |
110 | |
111 return (int)((double)eqc*rand() / (RAND_MAX + 1.0)); | |
112 } | |
113 return size; | |
114 } | |
115 | |
116 | |
117 //------------------------------------------------------------------ RevC ------ | |
118 inline long RevC (const long & coord, | |
119 const long & len) | |
120 { | |
121 return len - coord + 1; | |
122 } | |
123 | |
124 | |
125 //------------------------------------------------------------ ScoreLocal ------ | |
126 inline long ScoreLocal | |
127 (long scorej, long leni, long lenj, | |
128 long olap, float idyi, float maxolap) | |
129 { | |
130 if ( olap > 0 && | |
131 ((float)olap / (float)leni * 100.0 > maxolap || | |
132 (float)olap / (float)lenj * 100.0 > maxolap) ) | |
133 return -1; | |
134 else | |
135 return (scorej + (long)((leni - olap) * pow (idyi, 2))); | |
136 } | |
137 | |
138 | |
139 //----------------------------------------------------------- ScoreGlobal ------ | |
140 inline long ScoreGlobal | |
141 (long scorej, long leni, long olap, float idyi) | |
142 { | |
143 return (scorej + (long)((leni - olap) * pow (idyi, 2))); | |
144 } | |
145 | |
146 | |
147 //------------------------------------------------------------------ Swap ------ | |
148 inline void Swap (long & a, long & b) | |
149 { | |
150 long t = a; a = b; b = t; | |
151 } | |
152 | |
153 | |
154 //------------------------------------------------------------ UpdateBest ------ | |
155 bool UpdateBest | |
156 (LIS_t * lis, long size, vector<long> & allbest, float epsilon) | |
157 { | |
158 if ( size == 0 ) | |
159 return false; | |
160 | |
161 long best, i; | |
162 | |
163 //-- Find the best | |
164 for ( best = 0; best < size; ++ best ) | |
165 if ( ! lis[best].used ) | |
166 break; | |
167 for ( i = best + 1; i < size; ++ i ) | |
168 if ( ! lis[i].used | |
169 && | |
170 ( lis[i].score > lis[best].score | |
171 || | |
172 (lis[i].score == lis[best].score && | |
173 lis[i].diff < lis[best].diff) ) ) | |
174 best = i; | |
175 | |
176 //-- Nonequivalent | |
177 if ( ! allbest.empty() | |
178 && | |
179 (best == size | |
180 || | |
181 (epsilon < 0 && | |
182 lis[best].score < lis[allbest.front()].score) | |
183 || | |
184 (epsilon >= 0 && | |
185 (float)(lis[allbest.front()].score - lis[best].score) / | |
186 (float)(lis[allbest.front()].score) * 100.0 > epsilon)) ) | |
187 return false; | |
188 | |
189 //-- Equivalent | |
190 allbest.push_back (best); | |
191 | |
192 for ( i = best; i >= 0 && i < size; i = lis[i].from ) | |
193 lis[i].used = true; | |
194 | |
195 return true; | |
196 } | |
197 | |
198 | |
199 //===================================================== DeltaReader_t ========== | |
200 //----------------------------------------------------- open ------------------- | |
201 void DeltaReader_t::open | |
202 (const string & delta_path) | |
203 { | |
204 delta_path_m = delta_path; | |
205 | |
206 //-- Open the delta file | |
207 delta_stream_m.open (delta_path_m.c_str ()); | |
208 checkStream (); | |
209 | |
210 //-- Read the file header | |
211 delta_stream_m >> reference_path_m; | |
212 delta_stream_m >> query_path_m; | |
213 delta_stream_m >> data_type_m; | |
214 if ( (data_type_m != NUCMER_STRING && data_type_m != PROMER_STRING) ) | |
215 delta_stream_m.setstate (ios::failbit); | |
216 checkStream (); | |
217 is_open_m = true; | |
218 | |
219 //-- Advance to first record header | |
220 while ( delta_stream_m.peek () != '>' ) | |
221 if ( delta_stream_m.get () == EOF ) | |
222 break; | |
223 } | |
224 | |
225 | |
226 //----------------------------------------------------- readNextAlignment ------ | |
227 void DeltaReader_t::readNextAlignment | |
228 (DeltaAlignment_t & align, const bool read_deltas) | |
229 { | |
230 long delta; | |
231 float total; | |
232 | |
233 //-- Make way for the new alignment | |
234 align.clear (); | |
235 | |
236 //-- Read the alignment header | |
237 delta_stream_m >> align.sR; | |
238 delta_stream_m >> align.eR; | |
239 delta_stream_m >> align.sQ; | |
240 delta_stream_m >> align.eQ; | |
241 delta_stream_m >> align.idyc; | |
242 delta_stream_m >> align.simc; | |
243 delta_stream_m >> align.stpc; | |
244 if ( align.sR <= 0 || align.eR <= 0 || | |
245 align.sQ <= 0 || align.eQ <= 0 || | |
246 align.idyc < 0 || align.simc < 0 || align.stpc < 0 ) | |
247 delta_stream_m.setstate (ios::failbit); | |
248 checkStream (); | |
249 | |
250 total = labs(align.eR - align.sR) + 1.0; | |
251 if ( data_type_m == PROMER_STRING ) | |
252 total /= 3.0; | |
253 | |
254 //-- Get all the deltas | |
255 do | |
256 { | |
257 delta_stream_m >> delta; | |
258 checkStream (); | |
259 | |
260 if ( delta < 0 ) | |
261 total ++; | |
262 if ( read_deltas ) | |
263 align.deltas.push_back (delta); | |
264 } while ( delta != 0 ); | |
265 | |
266 //-- Flush the remaining whitespace | |
267 while ( delta_stream_m.get () != '\n' ); | |
268 | |
269 //-- Calculate the identity, similarity and stopity | |
270 align.idy = (total - (float)align.idyc) / total * 100.0; | |
271 align.sim = (total - (float)align.simc) / total * 100.0; | |
272 align.stp = (float)align.stpc / (total * 2.0) * 100.0; | |
273 } | |
274 | |
275 | |
276 //----------------------------------------------------- readNextRecord --------- | |
277 bool DeltaReader_t::readNextRecord (const bool read_deltas) | |
278 { | |
279 //-- If EOF or any other abnormality | |
280 if ( delta_stream_m.peek () != '>' ) | |
281 return false; | |
282 | |
283 //-- Make way for the new record | |
284 record_m.clear (); | |
285 is_record_m = true; | |
286 | |
287 //-- Read the record header | |
288 delta_stream_m.get (); | |
289 delta_stream_m >> record_m.idR; | |
290 delta_stream_m >> record_m.idQ; | |
291 delta_stream_m >> record_m.lenR; | |
292 delta_stream_m >> record_m.lenQ; | |
293 if ( record_m.lenR <= 0 || record_m.lenQ <= 0 ) | |
294 delta_stream_m.setstate (ios::failbit); | |
295 checkStream (); | |
296 | |
297 //-- Flush the remaining whitespace | |
298 while ( delta_stream_m.get () != '\n' ); | |
299 | |
300 //-- For each alignment... | |
301 DeltaAlignment_t align; | |
302 while ( delta_stream_m.peek () != '>' && | |
303 delta_stream_m.peek () != EOF ) | |
304 { | |
305 readNextAlignment (align, read_deltas); | |
306 record_m.aligns.push_back (align); | |
307 } | |
308 | |
309 return true; | |
310 } | |
311 | |
312 | |
313 //===================================================== DeltaEdge_t ============ | |
314 //------------------------------------------------------build ------------------ | |
315 void DeltaEdge_t::build (const DeltaRecord_t & rec) | |
316 { | |
317 stringstream ss; | |
318 vector<long>::const_iterator di; | |
319 DeltaEdgelet_t * p; | |
320 | |
321 vector<DeltaAlignment_t>::const_iterator i; | |
322 for ( i = rec.aligns.begin(); i != rec.aligns.end(); ++ i ) | |
323 { | |
324 //-- Set the edgelet | |
325 p = new DeltaEdgelet_t(); | |
326 | |
327 p->edge = this; | |
328 | |
329 p->idy = i->idy / 100.0; | |
330 p->sim = i->sim / 100.0; | |
331 p->stp = i->stp / 100.0; | |
332 | |
333 p->idyc = i->idyc; | |
334 p->simc = i->simc; | |
335 p->stpc = i->stpc; | |
336 | |
337 p->loR = i->sR; | |
338 p->hiR = i->eR; | |
339 p->loQ = i->sQ; | |
340 p->hiQ = i->eQ; | |
341 | |
342 p->dirR = p->hiR < p->loR ? REVERSE_DIR : FORWARD_DIR; | |
343 p->dirQ = p->hiQ < p->loQ ? REVERSE_DIR : FORWARD_DIR; | |
344 | |
345 //-- Get the delta information | |
346 for ( di = i->deltas.begin(); di != i->deltas.end(); ++ di ) | |
347 { | |
348 ss << *di << '\n'; | |
349 p->delta.append (ss.str()); | |
350 ss.str (""); | |
351 } | |
352 | |
353 //-- Force loR < hiR && loQ < hiQ | |
354 if ( p->dirR == REVERSE_DIR ) | |
355 Swap (p->loR, p->hiR); | |
356 if ( p->dirQ == REVERSE_DIR ) | |
357 Swap (p->loQ, p->hiQ); | |
358 | |
359 edgelets.push_back (p); | |
360 } | |
361 } | |
362 | |
363 | |
364 //===================================================== DeltaGraph_t =========== | |
365 //----------------------------------------------------- build ------------------ | |
366 //! \brief Build a new graph from a deltafile | |
367 //! | |
368 //! Does not populate: | |
369 //! node->seq | |
370 //! edgelet->frmR/frmQ | |
371 //! edgelet->snps | |
372 //! | |
373 //! \param deltapath The path of the deltafile to read | |
374 //! \param getdeltas Read the delta-encoded gap positions? yes/no | |
375 //! \return void | |
376 //! | |
377 void DeltaGraph_t::build (const string & deltapath, bool getdeltas) | |
378 { | |
379 DeltaReader_t dr; | |
380 DeltaEdge_t * dep; | |
381 pair<map<string, DeltaNode_t>::iterator, bool> insret; | |
382 | |
383 | |
384 //-- Open the delta file and read in the alignment information | |
385 dr.open (deltapath); | |
386 | |
387 refpath = dr.getReferencePath(); | |
388 qrypath = dr.getQueryPath(); | |
389 | |
390 if ( dr.getDataType() == NUCMER_STRING ) | |
391 datatype = NUCMER_DATA; | |
392 else if ( dr.getDataType() == PROMER_STRING ) | |
393 datatype = PROMER_DATA; | |
394 else | |
395 datatype = NULL_DATA; | |
396 | |
397 //-- Read in the next graph edge, i.e. a new delta record | |
398 while ( dr.readNext (getdeltas) ) | |
399 { | |
400 dep = new DeltaEdge_t(); | |
401 | |
402 | |
403 //-- Find the reference node in the graph, add a new one if necessary | |
404 insret = refnodes.insert | |
405 (map<string, DeltaNode_t>::value_type | |
406 (dr.getRecord().idR, DeltaNode_t())); | |
407 dep->refnode = &((insret.first)->second); | |
408 | |
409 //-- If a new reference node | |
410 if ( insret.second ) | |
411 { | |
412 dep->refnode->id = &((insret.first)->first); | |
413 dep->refnode->len = dr.getRecord().lenR; | |
414 } | |
415 | |
416 | |
417 //-- Find the query node in the graph, add a new one if necessary | |
418 insret = qrynodes.insert | |
419 (map<string, DeltaNode_t>::value_type | |
420 (dr.getRecord().idQ, DeltaNode_t())); | |
421 dep->qrynode = &((insret.first)->second); | |
422 | |
423 //-- If a new query node | |
424 if ( insret.second ) | |
425 { | |
426 dep->qrynode->id = &((insret.first)->first); | |
427 dep->qrynode->len = dr.getRecord().lenQ; | |
428 } | |
429 | |
430 | |
431 //-- Build the edge | |
432 dep->build (dr.getRecord()); | |
433 dep->refnode->edges.push_back (dep); | |
434 dep->qrynode->edges.push_back (dep); | |
435 } | |
436 dr.close (); | |
437 } | |
438 | |
439 | |
440 //------------------------------------------------------------------- clean ---- | |
441 //! \brief Clean the graph of all edgelets where isGOOD = false | |
442 //! | |
443 //! Removes all edgelets from the graph where isGOOD = false. Afterwhich, all | |
444 //! now empty edges or nodes are also removed. | |
445 //! | |
446 //! \return void | |
447 //! | |
448 void DeltaGraph_t::clean() | |
449 { | |
450 map<string, DeltaNode_t>::iterator i; | |
451 map<string, DeltaNode_t>::iterator ii; | |
452 vector<DeltaEdge_t *>::iterator j; | |
453 vector<DeltaEdgelet_t *>::iterator k; | |
454 | |
455 //-- For all ref nodes | |
456 for ( i = refnodes.begin(); i != refnodes.end(); ) | |
457 { | |
458 //-- For all edges | |
459 for ( j = i->second.edges.begin(); | |
460 j != i->second.edges.end(); ++ j ) | |
461 { | |
462 //-- For all edgelets | |
463 for ( k = (*j)->edgelets.begin(); | |
464 k != (*j)->edgelets.end(); ++ k ) | |
465 { | |
466 if ( ! (*k)->isGOOD ) | |
467 { | |
468 //-- Delete the bad edgelet and mark for erasure | |
469 delete (*k); | |
470 *k = NULL; | |
471 } | |
472 } | |
473 | |
474 //-- Erase the marked edgelets | |
475 k = partition ((*j)->edgelets.begin(), | |
476 (*j)->edgelets.end(), | |
477 NULLPred_t()); | |
478 (*j)->edgelets.erase (k, (*j)->edgelets.end()); | |
479 | |
480 //-- Mark the edge if empty | |
481 if ( (*j)->edgelets.empty() ) | |
482 *j = NULL; | |
483 } | |
484 | |
485 //-- Erase the marked edges | |
486 j = partition (i->second.edges.begin(), | |
487 i->second.edges.end(), | |
488 NULLPred_t()); | |
489 i->second.edges.erase (j, i->second.edges.end()); | |
490 | |
491 //-- Erase the node if empty | |
492 ii = i ++; | |
493 if ( ii->second.edges.empty() ) | |
494 refnodes.erase (ii); | |
495 } | |
496 | |
497 //-- For all qry nodes | |
498 for ( i = qrynodes.begin(); i != qrynodes.end(); ) | |
499 { | |
500 for ( j = i->second.edges.begin(); | |
501 j != i->second.edges.end(); ++ j ) | |
502 { | |
503 //-- Delete the edge if empty and mark for erasure | |
504 if ( (*j)->edgelets.empty() ) | |
505 { | |
506 delete (*j); | |
507 *j = NULL; | |
508 } | |
509 } | |
510 | |
511 //-- Erase the marked edges | |
512 j = partition (i->second.edges.begin(), | |
513 i->second.edges.end(), | |
514 NULLPred_t()); | |
515 i->second.edges.erase (j, i->second.edges.end()); | |
516 | |
517 //-- Erase the node if empty | |
518 ii = i ++; | |
519 if ( ii->second.edges.empty() ) | |
520 qrynodes.erase (ii); | |
521 } | |
522 | |
523 } | |
524 | |
525 | |
526 //------------------------------------------------------------------- clear ---- | |
527 //! \brief Clear the graph of all nodes, edges and edgelets | |
528 //! | |
529 //! \return void | |
530 //! | |
531 void DeltaGraph_t::clear ( ) | |
532 { | |
533 //-- Make sure the edges only get destructed once | |
534 std::map<std::string, DeltaNode_t>::iterator i; | |
535 std::vector<DeltaEdge_t *>::iterator j; | |
536 for ( i = refnodes . begin( ); i != refnodes . end( ); ++ i ) | |
537 for ( j = i -> second . edges . begin( ); | |
538 j != i -> second . edges . end( ); ++ j ) | |
539 delete (*j); | |
540 | |
541 refnodes.clear( ); | |
542 qrynodes.clear( ); | |
543 refpath.erase( ); | |
544 qrypath.erase( ); | |
545 datatype = NULL_DATA; | |
546 } | |
547 | |
548 | |
549 //------------------------------------------------------------ getNodeCount ---- | |
550 //! \brief Counts and returns the number of graph nodes (sequences) | |
551 //! | |
552 //! \return The number of graph nodes | |
553 //! | |
554 long DeltaGraph_t::getNodeCount() | |
555 { | |
556 long sum = refnodes.size() + qrynodes.size(); | |
557 return sum; | |
558 } | |
559 | |
560 | |
561 //------------------------------------------------------------ getEdgeCount ---- | |
562 //! \brief Counts and returns the number of graph edges | |
563 //! | |
564 //! \return void | |
565 //! | |
566 long DeltaGraph_t::getEdgeCount() | |
567 { | |
568 long sum = 0; | |
569 | |
570 map<string, DeltaNode_t>::iterator i; | |
571 for ( i = refnodes.begin(); i != refnodes.end(); ++ i ) | |
572 sum += i->second.edges.size(); | |
573 | |
574 return sum; | |
575 } | |
576 | |
577 | |
578 //--------------------------------------------------------- getEdgeletCount ---- | |
579 //! \brief Counts and returns the number of graph edgelets (alignments) | |
580 //! | |
581 //! \return void | |
582 //! | |
583 long DeltaGraph_t::getEdgeletCount() | |
584 { | |
585 long sum = 0; | |
586 | |
587 map<string, DeltaNode_t>::iterator i; | |
588 vector<DeltaEdge_t *>::iterator j; | |
589 for ( i = refnodes.begin(); i != refnodes.end(); ++ i ) | |
590 for ( j = i->second.edges.begin(); | |
591 j != i->second.edges.end(); ++ j ) | |
592 sum += (*j)->edgelets.size(); | |
593 | |
594 return sum; | |
595 } | |
596 | |
597 | |
598 //---------------------------------------------------------------- flagGOOD ---- | |
599 //! \brief Sets isGOOD = 1 for all edgelets | |
600 void DeltaGraph_t::flagGOOD() | |
601 { | |
602 map<string, DeltaNode_t>::const_iterator mi; | |
603 vector<DeltaEdge_t *>::const_iterator ei; | |
604 vector<DeltaEdgelet_t *>::iterator eli; | |
605 | |
606 //-- All references | |
607 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
608 { | |
609 //-- All queries matching this reference | |
610 for ( ei = (mi->second).edges.begin(); | |
611 ei != (mi->second).edges.end(); ++ ei ) | |
612 { | |
613 //-- All alignments between reference and query | |
614 for ( eli = (*ei)->edgelets.begin(); | |
615 eli != (*ei)->edgelets.end(); ++ eli ) | |
616 { | |
617 (*eli)->isGOOD = true; | |
618 } | |
619 } | |
620 } | |
621 } | |
622 | |
623 | |
624 //---------------------------------------------------------------- flag1to1 ---- | |
625 //! \brief Intersection of flagQLIS and flagRLIS | |
626 void DeltaGraph_t::flag1to1(float epsilon, float maxolap) | |
627 { | |
628 flagRLIS(epsilon, maxolap, false); | |
629 flagQLIS(epsilon, maxolap, false); | |
630 | |
631 map<string, DeltaNode_t>::const_iterator mi; | |
632 vector<DeltaEdge_t *>::const_iterator ei; | |
633 vector<DeltaEdgelet_t *>::iterator eli; | |
634 | |
635 //-- All references | |
636 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
637 { | |
638 //-- All queries matching this reference | |
639 for ( ei = (mi->second).edges.begin(); | |
640 ei != (mi->second).edges.end(); ++ ei ) | |
641 { | |
642 //-- All alignments between reference and query | |
643 for ( eli = (*ei)->edgelets.begin(); | |
644 eli != (*ei)->edgelets.end(); ++ eli ) | |
645 { | |
646 if ( !(*eli)->isRLIS || !(*eli)->isQLIS ) | |
647 (*eli)->isGOOD = false; | |
648 } | |
649 } | |
650 } | |
651 } | |
652 | |
653 | |
654 //---------------------------------------------------------------- flagMtoM ---- | |
655 //! \brief Union of flagQLIS and flagRLIS | |
656 void DeltaGraph_t::flagMtoM(float epsilon, float maxolap) | |
657 { | |
658 flagRLIS(epsilon, maxolap, false); | |
659 flagQLIS(epsilon, maxolap, false); | |
660 | |
661 map<string, DeltaNode_t>::const_iterator mi; | |
662 vector<DeltaEdge_t *>::const_iterator ei; | |
663 vector<DeltaEdgelet_t *>::iterator eli; | |
664 | |
665 //-- All references | |
666 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
667 { | |
668 //-- All queries matching this reference | |
669 for ( ei = (mi->second).edges.begin(); | |
670 ei != (mi->second).edges.end(); ++ ei ) | |
671 { | |
672 //-- All alignments between reference and query | |
673 for ( eli = (*ei)->edgelets.begin(); | |
674 eli != (*ei)->edgelets.end(); ++ eli ) | |
675 { | |
676 if ( !(*eli)->isRLIS && !(*eli)->isQLIS ) | |
677 (*eli)->isGOOD = false; | |
678 } | |
679 } | |
680 } | |
681 } | |
682 | |
683 | |
684 //-------------------------------------------------------------- flagGLIS ------ | |
685 //! \brief Flag edgelets in the global LIS and unflag those who are not | |
686 //! | |
687 //! Runs a length*identity weigthed LIS to determine the longest, mutually | |
688 //! consistent set of alignments between all pairs of sequences. This | |
689 //! essentially constructs the global alignment between all sequence pairs. | |
690 //! | |
691 //! Sets isGLIS flag for good and unsets isGOOD flag for bad. | |
692 //! | |
693 //! \param epsilon Keep repeat alignments within epsilon % of the best align | |
694 //! \return void | |
695 //! | |
696 void DeltaGraph_t::flagGLIS (float epsilon) | |
697 { | |
698 LIS_t * lis = NULL; | |
699 long lis_size = 0; | |
700 long i, j, n; | |
701 long olap, olapQ, olapR, len, lenQ, lenR, score, diff; | |
702 | |
703 vector<DeltaEdgelet_t *> edgelets; | |
704 | |
705 map<string, DeltaNode_t>::const_iterator mi; | |
706 vector<DeltaEdge_t *>::const_iterator ei; | |
707 vector<DeltaEdgelet_t *>::iterator eli; | |
708 | |
709 | |
710 //-- For each reference sequence | |
711 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
712 { | |
713 //-- For each query aligning to this reference | |
714 for ( ei = (mi->second).edges.begin(); | |
715 ei != (mi->second).edges.end(); ++ ei ) | |
716 { | |
717 //-- Collect all the good edgelets | |
718 edgelets.clear(); | |
719 for ( eli = (*ei)->edgelets.begin(); | |
720 eli != (*ei)->edgelets.end(); ++ eli ) | |
721 if ( (*eli)->isGOOD ) | |
722 { | |
723 edgelets.push_back (*eli); | |
724 | |
725 //-- Fix the coordinates to make global LIS work | |
726 if ( (*eli)->dirR == (*eli)->dirQ ) | |
727 { | |
728 (*eli)->dirQ = FORWARD_DIR; | |
729 } | |
730 else | |
731 { | |
732 if ( (*eli)->dirQ == REVERSE_DIR ) | |
733 Swap ((*eli)->loQ, (*eli)->hiQ); | |
734 (*eli)->loQ = RevC ((*eli)->loQ, (*ei)->qrynode->len); | |
735 (*eli)->hiQ = RevC ((*eli)->hiQ, (*ei)->qrynode->len); | |
736 (*eli)->dirQ = REVERSE_DIR; | |
737 } | |
738 } | |
739 | |
740 //-- Resize and initialize | |
741 n = edgelets.size(); | |
742 if ( n > lis_size ) | |
743 { | |
744 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n); | |
745 lis_size = n; | |
746 } | |
747 for ( i = 0; i < n; ++ i ) | |
748 lis[i].used = false; | |
749 | |
750 //-- Sort by lo query coord | |
751 sort (edgelets.begin(), edgelets.end(), EdgeletQCmp_t()); | |
752 | |
753 //-- Continue until all equivalent repeats are extracted | |
754 vector<long> allbest; | |
755 do | |
756 { | |
757 //-- Dynamic | |
758 for ( i = 0; i < n; ++ i ) | |
759 { | |
760 if ( lis[i].used ) continue; | |
761 | |
762 lis[i].a = edgelets[i]; | |
763 | |
764 lenR = lis[i].a->hiR - lis[i].a->loR + 1; | |
765 lenQ = lis[i].a->hiQ - lis[i].a->loQ + 1; | |
766 len = lenR > lenQ ? lenQ : lenR; | |
767 lis[i].score = ScoreGlobal (0, len, 0, lis[i].a->idy); | |
768 | |
769 lis[i].from = -1; | |
770 lis[i].diff = 0; | |
771 | |
772 for ( j = 0; j < i; ++ j ) | |
773 { | |
774 if ( lis[j].used ) continue; | |
775 | |
776 if ( lis[i].a->dirQ != lis[j].a->dirQ ) | |
777 continue; | |
778 | |
779 lenR = lis[i].a->hiR - lis[i].a->loR + 1; | |
780 lenQ = lis[i].a->hiQ - lis[i].a->loQ + 1; | |
781 len = lenR > lenQ ? lenQ : lenR; | |
782 | |
783 olapR = lis[j].a->hiR - lis[i].a->loR + 1; | |
784 olapQ = lis[j].a->hiQ - lis[i].a->loQ + 1; | |
785 olap = olapR > olapQ ? olapR : olapQ; | |
786 if ( olap < 0 ) | |
787 olap = 0; | |
788 | |
789 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a); | |
790 | |
791 score = ScoreGlobal | |
792 (lis[j].score, len, olap, lis[i].a->idy); | |
793 | |
794 if ( score > lis[i].score | |
795 || | |
796 (score == lis[i].score && diff < lis[i].diff) ) | |
797 { | |
798 lis[i].from = j; | |
799 lis[i].score = score; | |
800 lis[i].diff = diff; | |
801 } | |
802 } | |
803 } | |
804 } while ( UpdateBest (lis, n, allbest, epsilon) ); | |
805 | |
806 long beg = PickBest (lis, allbest, epsilon); | |
807 long end = allbest.size(); | |
808 if ( beg == end ) beg = 0; | |
809 else end = beg + 1; | |
810 | |
811 //-- Flag the edgelets | |
812 for ( ; beg < end; ++ beg ) | |
813 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from ) | |
814 lis[i].a->isGLIS = true; | |
815 | |
816 //-- Repair the coordinates | |
817 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli ) | |
818 { | |
819 if ( ! (*eli)->isGLIS ) | |
820 (*eli)->isGOOD = false; | |
821 | |
822 if ( (*eli)->dirQ == FORWARD_DIR ) | |
823 { | |
824 (*eli)->dirQ = (*eli)->dirR; | |
825 } | |
826 else | |
827 { | |
828 if ( (*eli)->dirR == FORWARD_DIR ) | |
829 Swap ((*eli)->loQ, (*eli)->hiQ); | |
830 (*eli)->loQ = RevC ((*eli)->loQ, (*ei)->qrynode->len); | |
831 (*eli)->hiQ = RevC ((*eli)->hiQ, (*ei)->qrynode->len); | |
832 (*eli)->dirQ = | |
833 (*eli)->dirR == FORWARD_DIR ? REVERSE_DIR : FORWARD_DIR; | |
834 } | |
835 } | |
836 } | |
837 } | |
838 | |
839 free (lis); | |
840 } | |
841 | |
842 | |
843 //-------------------------------------------------------------- flagQLIS ------ | |
844 //! \brief Flag edgelets in the query LIS and unflag those who are not | |
845 //! | |
846 //! Runs a length*identity weighted LIS to determine the longest, QUERY | |
847 //! consistent set of alignments for all query sequences. This effectively | |
848 //! identifies the "best" alignments for all positions of each query. | |
849 //! | |
850 //! Sets isQLIS flag for good and unsets isGOOD flag for bad. | |
851 //! | |
852 //! \param epsilon Keep repeat alignments within epsilon % of the best align | |
853 //! \param maxolap Only allow alignments to overlap by maxolap percent [0-100] | |
854 //! \param flagBad Flag non QLIS edgelets as !isGOOD | |
855 //! \return void | |
856 //! | |
857 void DeltaGraph_t::flagQLIS (float epsilon, float maxolap, bool flagbad) | |
858 { | |
859 LIS_t * lis = NULL; | |
860 long lis_size = 0; | |
861 long i, j, n; | |
862 long olap, leni, lenj, score, diff; | |
863 | |
864 vector<DeltaEdgelet_t *> edgelets; | |
865 | |
866 map<string, DeltaNode_t>::const_iterator mi; | |
867 vector<DeltaEdge_t *>::const_iterator ei; | |
868 vector<DeltaEdgelet_t *>::iterator eli; | |
869 | |
870 | |
871 //-- For each query sequence | |
872 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi ) | |
873 { | |
874 //-- Collect all the good edgelets | |
875 edgelets.clear(); | |
876 for ( ei = (mi->second).edges.begin(); | |
877 ei != (mi->second).edges.end(); ++ ei ) | |
878 for ( eli = (*ei)->edgelets.begin(); | |
879 eli != (*ei)->edgelets.end(); ++ eli ) | |
880 if ( (*eli)->isGOOD ) | |
881 edgelets.push_back (*eli); | |
882 | |
883 //-- Resize and initialize | |
884 n = edgelets.size(); | |
885 if ( n > lis_size ) | |
886 { | |
887 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n); | |
888 lis_size = n; | |
889 } | |
890 for ( i = 0; i < n; ++ i ) | |
891 lis[i].used = false; | |
892 | |
893 //-- Sort by lo query coord | |
894 sort (edgelets.begin(), edgelets.end(), EdgeletQCmp_t()); | |
895 | |
896 //-- Continue until all equivalent repeats are extracted | |
897 vector<long> allbest; | |
898 do | |
899 { | |
900 //-- Dynamic | |
901 for ( i = 0; i < n; ++ i ) | |
902 { | |
903 if ( lis[i].used ) continue; | |
904 | |
905 lis[i].a = edgelets[i]; | |
906 | |
907 leni = lis[i].a->hiQ - lis[i].a->loQ + 1; | |
908 lis[i].score = | |
909 ScoreLocal (0, leni, 0, 0, lis[i].a->idy, 0); | |
910 | |
911 lis[i].from = -1; | |
912 lis[i].diff = 0; | |
913 | |
914 for ( j = 0; j < i; ++ j ) | |
915 { | |
916 if ( lis[j].used ) continue; | |
917 if ( lis[j].from >= 0 && | |
918 lis[lis[j].from].a->hiQ >= lis[i].a->loQ ) | |
919 continue; | |
920 | |
921 leni = lis[i].a->hiQ - lis[i].a->loQ + 1; | |
922 lenj = lis[j].a->hiQ - lis[j].a->loQ + 1; | |
923 olap = lis[j].a->hiQ - lis[i].a->loQ + 1; | |
924 if ( olap < 0 ) | |
925 olap = 0; | |
926 | |
927 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a); | |
928 | |
929 score = ScoreLocal | |
930 (lis[j].score, leni, lenj, olap, lis[i].a->idy, maxolap); | |
931 | |
932 if ( score > lis[i].score | |
933 || | |
934 (score == lis[i].score && diff < lis[i].diff) ) | |
935 { | |
936 lis[i].from = j; | |
937 lis[i].score = score; | |
938 lis[i].diff = diff; | |
939 } | |
940 } | |
941 } | |
942 } while ( UpdateBest (lis, n, allbest, epsilon) ); | |
943 | |
944 long beg = PickBest (lis, allbest, epsilon); | |
945 long end = allbest.size(); | |
946 if ( beg == end ) beg = 0; | |
947 else end = beg + 1; | |
948 | |
949 //-- Flag the edgelets | |
950 for ( ; beg < end; ++ beg ) | |
951 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from ) | |
952 lis[i].a->isQLIS = true; | |
953 | |
954 if ( flagbad ) | |
955 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli ) | |
956 if ( ! (*eli)->isQLIS ) | |
957 (*eli)->isGOOD = false; | |
958 } | |
959 | |
960 free (lis); | |
961 } | |
962 | |
963 | |
964 //-------------------------------------------------------------- flagRLIS ------ | |
965 //! \brief Flag edgelets in the reference LIS and unflag those who are not | |
966 //! | |
967 //! Runs a length*identity weighted LIS to determine the longest, REFERENCE | |
968 //! consistent set of alignments for all reference sequences. This effectively | |
969 //! identifies the "best" alignments for all positions of each reference. | |
970 //! | |
971 //! Sets isRLIS flag for good and unsets isGOOD flag for bad. | |
972 //! | |
973 //! \param epsilon Keep repeat alignments within epsilon % of the best align | |
974 //! \param maxolap Only allow alignments to overlap by maxolap percent [0-100] | |
975 //! \param flagBad Flag non RLIS edgelets as !isGOOD | |
976 //! \return void | |
977 //! | |
978 void DeltaGraph_t::flagRLIS (float epsilon, float maxolap, bool flagbad) | |
979 { | |
980 LIS_t * lis = NULL; | |
981 long lis_size = 0; | |
982 long i, j, n; | |
983 long olap, leni, lenj, score, diff; | |
984 | |
985 vector<DeltaEdgelet_t *> edgelets; | |
986 | |
987 map<string, DeltaNode_t>::const_iterator mi; | |
988 vector<DeltaEdge_t *>::const_iterator ei; | |
989 vector<DeltaEdgelet_t *>::iterator eli; | |
990 | |
991 | |
992 //-- For each reference sequence | |
993 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
994 { | |
995 //-- Collect all the good edgelets | |
996 edgelets.clear(); | |
997 for ( ei = (mi->second).edges.begin(); | |
998 ei != (mi->second).edges.end(); ++ ei ) | |
999 for ( eli = (*ei)->edgelets.begin(); | |
1000 eli != (*ei)->edgelets.end(); ++ eli ) | |
1001 if ( (*eli)->isGOOD ) | |
1002 edgelets.push_back (*eli); | |
1003 | |
1004 //-- Resize | |
1005 n = edgelets.size(); | |
1006 if ( n > lis_size ) | |
1007 { | |
1008 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n); | |
1009 lis_size = n; | |
1010 } | |
1011 for ( i = 0; i < n; ++ i ) | |
1012 lis[i].used = false; | |
1013 | |
1014 //-- Sort by lo reference coord | |
1015 sort (edgelets.begin(), edgelets.end(), EdgeletRCmp_t()); | |
1016 | |
1017 //-- Continue until all equivalent repeats are extracted | |
1018 vector<long> allbest; | |
1019 do | |
1020 { | |
1021 //-- Dynamic | |
1022 for ( i = 0; i < n; ++ i ) | |
1023 { | |
1024 if ( lis[i].used ) continue; | |
1025 | |
1026 lis[i].a = edgelets[i]; | |
1027 | |
1028 leni = lis[i].a->hiR - lis[i].a->loR + 1; | |
1029 lis[i].score = | |
1030 ScoreLocal (0, leni, 0, 0, lis[i].a->idy, 0); | |
1031 | |
1032 lis[i].from = -1; | |
1033 lis[i].diff = 0; | |
1034 | |
1035 for ( j = 0; j < i; ++ j ) | |
1036 { | |
1037 if ( lis[j].used ) continue; | |
1038 if ( lis[j].from >= 0 && | |
1039 lis[lis[j].from].a->hiR >= lis[i].a->loR ) | |
1040 continue; | |
1041 | |
1042 leni = lis[i].a->hiR - lis[i].a->loR + 1; | |
1043 lenj = lis[j].a->hiR - lis[j].a->loR + 1; | |
1044 olap = lis[j].a->hiR - lis[i].a->loR + 1; | |
1045 if ( olap < 0 ) | |
1046 olap = 0; | |
1047 | |
1048 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a); | |
1049 | |
1050 score = ScoreLocal | |
1051 (lis[j].score, leni, lenj, olap, lis[i].a->idy, maxolap); | |
1052 | |
1053 if ( score > lis[i].score | |
1054 || | |
1055 (score == lis[i].score && diff < lis[i].diff) ) | |
1056 { | |
1057 lis[i].from = j; | |
1058 lis[i].score = score; | |
1059 lis[i].diff = diff; | |
1060 } | |
1061 } | |
1062 } | |
1063 } while ( UpdateBest (lis, n, allbest, epsilon) ); | |
1064 | |
1065 long beg = PickBest (lis, allbest, epsilon); | |
1066 long end = allbest.size(); | |
1067 if ( beg == end ) beg = 0; | |
1068 else end = beg + 1; | |
1069 | |
1070 //-- Flag the edgelets | |
1071 for ( ; beg < end; ++ beg ) | |
1072 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from ) | |
1073 lis[i].a->isRLIS = true; | |
1074 | |
1075 if ( flagbad ) | |
1076 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli ) | |
1077 if ( ! (*eli)->isRLIS ) | |
1078 (*eli)->isGOOD = false; | |
1079 } | |
1080 | |
1081 free (lis); | |
1082 } | |
1083 | |
1084 | |
1085 //------------------------------------------------------------- flagScore ------ | |
1086 //! \brief Flag edgelets with scores below a score threshold | |
1087 //! | |
1088 //! Unsets isGOOD for bad. | |
1089 //! | |
1090 //! \param minlen Flag edgelets if less than minlen in length | |
1091 //! \param minidy Flag edgelets if less than minidy identity [0-100] | |
1092 //! \return void | |
1093 //! | |
1094 void DeltaGraph_t::flagScore (long minlen, float minidy) | |
1095 { | |
1096 map<string, DeltaNode_t>::const_iterator mi; | |
1097 vector<DeltaEdge_t *>::const_iterator ei; | |
1098 vector<DeltaEdgelet_t *>::iterator eli; | |
1099 | |
1100 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
1101 for ( ei = (mi->second).edges.begin(); | |
1102 ei != (mi->second).edges.end(); ++ ei ) | |
1103 for ( eli = (*ei)->edgelets.begin(); | |
1104 eli != (*ei)->edgelets.end(); ++ eli ) | |
1105 if ( (*eli)->isGOOD ) | |
1106 { | |
1107 //-- Flag low identities | |
1108 if ( (*eli)->idy * 100.0 < minidy ) | |
1109 (*eli)->isGOOD = false; | |
1110 | |
1111 //-- Flag small lengths | |
1112 if ( (*eli)->hiR - (*eli)->loR + 1 < minlen || | |
1113 (*eli)->hiQ - (*eli)->loQ + 1 < minlen ) | |
1114 (*eli)->isGOOD = false; | |
1115 } | |
1116 } | |
1117 | |
1118 | |
1119 //-------------------------------------------------------------- flagUNIQ ------ | |
1120 //! \brief Flag edgelets with uniqueness below a certain threshold | |
1121 //! | |
1122 //! Unsets isGOOD for bad. | |
1123 //! | |
1124 //! \param minuniq Flag edgelets if less that minuniq percent [0-100] unique | |
1125 //! \return void | |
1126 //! | |
1127 void DeltaGraph_t::flagUNIQ (float minuniq) | |
1128 { | |
1129 long i, uniq, len; | |
1130 | |
1131 vector<DeltaEdgelet_t *> edgelets; | |
1132 | |
1133 map<string, DeltaNode_t>::const_iterator mi; | |
1134 vector<DeltaEdge_t *>::const_iterator ei; | |
1135 vector<DeltaEdgelet_t *>::iterator eli; | |
1136 | |
1137 | |
1138 //-- For each reference sequence | |
1139 long ref_size = 0; | |
1140 long ref_len = 0; | |
1141 unsigned char * ref_cov = NULL; | |
1142 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
1143 { | |
1144 //-- Reset the reference coverage array | |
1145 ref_len = (mi->second).len; | |
1146 if ( ref_len > ref_size ) | |
1147 { | |
1148 ref_cov = (unsigned char *) Safe_realloc (ref_cov, ref_len + 1); | |
1149 ref_size = ref_len; | |
1150 } | |
1151 for ( i = 1; i <= ref_len; ++ i ) | |
1152 ref_cov[i] = 0; | |
1153 | |
1154 //-- Collect all the good edgelets | |
1155 edgelets.clear(); | |
1156 for ( ei = (mi->second).edges.begin(); | |
1157 ei != (mi->second).edges.end(); ++ ei ) | |
1158 for ( eli = (*ei)->edgelets.begin(); | |
1159 eli != (*ei)->edgelets.end(); ++ eli ) | |
1160 if ( (*eli)->isGOOD ) | |
1161 { | |
1162 edgelets.push_back (*eli); | |
1163 | |
1164 //-- Add to the reference coverage | |
1165 for ( i = (*eli)->loR; i <= (*eli)->hiR; i ++ ) | |
1166 if ( ref_cov[i] < UCHAR_MAX ) | |
1167 ref_cov[i] ++; | |
1168 } | |
1169 | |
1170 //-- Calculate the uniqueness of each edgelet | |
1171 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli ) | |
1172 { | |
1173 uniq = 0; | |
1174 len = (*eli)->hiR - (*eli)->loR + 1; | |
1175 for ( i = (*eli)->loR; i <= (*eli)->hiR; i ++ ) | |
1176 if ( ref_cov[i] == 1 ) | |
1177 uniq ++; | |
1178 | |
1179 //-- Flag low reference uniqueness | |
1180 if ( (float)uniq / (float)len * 100.0 < minuniq ) | |
1181 (*eli)->isGOOD = false; | |
1182 } | |
1183 } | |
1184 free (ref_cov); | |
1185 | |
1186 | |
1187 //-- For each query sequence | |
1188 long qry_size = 0; | |
1189 long qry_len = 0; | |
1190 unsigned char * qry_cov = NULL; | |
1191 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi ) | |
1192 { | |
1193 //-- Reset the query coverage array | |
1194 qry_len = (mi->second).len; | |
1195 if ( qry_len > qry_size ) | |
1196 { | |
1197 qry_cov = (unsigned char *) Safe_realloc (qry_cov, qry_len + 1); | |
1198 qry_size = qry_len; | |
1199 } | |
1200 for ( i = 1; i <= qry_len; ++ i ) | |
1201 qry_cov[i] = 0; | |
1202 | |
1203 //-- Collect all the good edgelets | |
1204 edgelets.clear(); | |
1205 for ( ei = (mi->second).edges.begin(); | |
1206 ei != (mi->second).edges.end(); ++ ei ) | |
1207 for ( eli = (*ei)->edgelets.begin(); | |
1208 eli != (*ei)->edgelets.end(); ++ eli ) | |
1209 if ( (*eli)->isGOOD ) | |
1210 { | |
1211 edgelets.push_back (*eli); | |
1212 | |
1213 //-- Add to the query coverage | |
1214 for ( i = (*eli)->loQ; i <= (*eli)->hiQ; i ++ ) | |
1215 if ( qry_cov[i] < UCHAR_MAX ) | |
1216 qry_cov[i] ++; | |
1217 } | |
1218 | |
1219 //-- Calculate the uniqueness of each edgelet | |
1220 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli ) | |
1221 { | |
1222 uniq = 0; | |
1223 len = (*eli)->hiQ - (*eli)->loQ + 1; | |
1224 for ( i = (*eli)->loQ; i <= (*eli)->hiQ; i ++ ) | |
1225 if ( qry_cov[i] == 1 ) | |
1226 uniq ++; | |
1227 | |
1228 //-- Flag low query uniqueness | |
1229 if ( (float)uniq / (float)len * 100.0 < minuniq ) | |
1230 (*eli)->isGOOD = false; | |
1231 } | |
1232 } | |
1233 free (qry_cov); | |
1234 } | |
1235 | |
1236 | |
1237 //----------------------------------------------------- loadSequences ---------- | |
1238 //! \brief Load the sequence information into the DeltaNodes | |
1239 //! | |
1240 //! \return void | |
1241 //! | |
1242 void DeltaGraph_t::loadSequences () | |
1243 { | |
1244 map<string, DeltaNode_t>::iterator mi; | |
1245 | |
1246 FILE * qryfile, * reffile; | |
1247 char * R = NULL; | |
1248 char * Q = NULL; | |
1249 char id [MAX_LINE]; | |
1250 long initsize; | |
1251 long len; | |
1252 | |
1253 //-- Read in the reference sequences | |
1254 reffile = File_Open (refpath.c_str(), "r"); | |
1255 initsize = INIT_SIZE; | |
1256 R = (char *) Safe_malloc (initsize); | |
1257 while ( Read_String (reffile, R, initsize, id, FALSE) ) | |
1258 if ( (mi = refnodes.find (id)) != refnodes.end() ) | |
1259 { | |
1260 len = strlen (R + 1); | |
1261 mi->second.seq = (char *) Safe_malloc (len + 2); | |
1262 mi->second.seq[0] = '\0'; | |
1263 strcpy (mi->second.seq + 1, R + 1); | |
1264 if ( len != mi->second.len ) | |
1265 { | |
1266 cerr << "ERROR: Reference input does not match delta file\n"; | |
1267 exit (EXIT_FAILURE); | |
1268 } | |
1269 } | |
1270 fclose (reffile); | |
1271 free (R); | |
1272 | |
1273 //-- Read in the query sequences | |
1274 qryfile = File_Open (qrypath.c_str(), "r"); | |
1275 initsize = INIT_SIZE; | |
1276 Q = (char *) Safe_malloc (initsize); | |
1277 while ( Read_String (qryfile, Q, initsize, id, FALSE) ) | |
1278 if ( (mi = qrynodes.find (id)) != qrynodes.end() ) | |
1279 { | |
1280 len = strlen (Q + 1); | |
1281 mi->second.seq = (char *) Safe_malloc (len + 2); | |
1282 mi->second.seq[0] = '\0'; | |
1283 strcpy (mi->second.seq + 1, Q + 1); | |
1284 if ( len != mi->second.len ) | |
1285 { | |
1286 cerr << "ERROR: Query input does not match delta file\n"; | |
1287 exit (EXIT_FAILURE); | |
1288 } | |
1289 } | |
1290 fclose (qryfile); | |
1291 free (Q); | |
1292 | |
1293 | |
1294 //-- Check that we found all the sequences | |
1295 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi ) | |
1296 if ( mi->second.seq == NULL ) | |
1297 { | |
1298 cerr << "ERROR: '" << mi->first << "' not found in reference file\n"; | |
1299 exit (EXIT_FAILURE); | |
1300 } | |
1301 | |
1302 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi ) | |
1303 if ( mi->second.seq == NULL ) | |
1304 { | |
1305 cerr << "ERROR: '" << mi->first << "' not found in query file\n"; | |
1306 exit (EXIT_FAILURE); | |
1307 } | |
1308 } | |
1309 | |
1310 | |
1311 //----------------------------------------------------- outputDelta ------------ | |
1312 //! \brief Outputs the contents of the graph as a deltafile | |
1313 //! | |
1314 //! \param out The output stream to write to | |
1315 //! \return The output stream | |
1316 //! | |
1317 ostream & DeltaGraph_t::outputDelta (ostream & out) | |
1318 { | |
1319 bool header; | |
1320 long s1, e1, s2, e2; | |
1321 | |
1322 map<string, DeltaNode_t>::const_iterator mi; | |
1323 vector<DeltaEdge_t *>::const_iterator ei; | |
1324 vector<DeltaEdgelet_t *>::const_iterator eli; | |
1325 | |
1326 //-- Print the file header | |
1327 cout | |
1328 << refpath << ' ' << qrypath << '\n' | |
1329 << (datatype == PROMER_DATA ? PROMER_STRING : NUCMER_STRING) << '\n'; | |
1330 | |
1331 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi ) | |
1332 { | |
1333 for ( ei = (mi->second).edges.begin(); | |
1334 ei != (mi->second).edges.end(); ++ ei ) | |
1335 { | |
1336 header = false; | |
1337 | |
1338 for ( eli = (*ei)->edgelets.begin(); | |
1339 eli != (*ei)->edgelets.end(); ++ eli ) | |
1340 { | |
1341 if ( ! (*eli)->isGOOD ) | |
1342 continue; | |
1343 | |
1344 //-- Print the sequence header | |
1345 if ( ! header ) | |
1346 { | |
1347 cout | |
1348 << '>' | |
1349 << *((*ei)->refnode->id) << ' ' | |
1350 << *((*ei)->qrynode->id) << ' ' | |
1351 << (*ei)->refnode->len << ' ' | |
1352 << (*ei)->qrynode->len << '\n'; | |
1353 header = true; | |
1354 } | |
1355 //-- Print the alignment | |
1356 s1 = (*eli)->loR; | |
1357 e1 = (*eli)->hiR; | |
1358 s2 = (*eli)->loQ; | |
1359 e2 = (*eli)->hiQ; | |
1360 if ( (*eli)->dirR == REVERSE_DIR ) | |
1361 Swap (s1, e1); | |
1362 if ( (*eli)->dirQ == REVERSE_DIR ) | |
1363 Swap (s2, e2); | |
1364 | |
1365 cout | |
1366 << s1 << ' ' << e1 << ' ' << s2 << ' ' << e2 << ' ' | |
1367 << (*eli)->idyc << ' ' | |
1368 << (*eli)->simc << ' ' | |
1369 << (*eli)->stpc << '\n' | |
1370 << (*eli)->delta; | |
1371 } | |
1372 } | |
1373 } | |
1374 return out; | |
1375 } |