jpayne@69
|
1 ////////////////////////////////////////////////////////////////////////////////
|
jpayne@69
|
2 //! \file
|
jpayne@69
|
3 //! \author Adam M Phillippy
|
jpayne@69
|
4 //! \date 03/26/2003
|
jpayne@69
|
5 //!
|
jpayne@69
|
6 //! \brief Source for non-inline member functions of delta.hh
|
jpayne@69
|
7 //!
|
jpayne@69
|
8 //! \see delta.hh
|
jpayne@69
|
9 ////////////////////////////////////////////////////////////////////////////////
|
jpayne@69
|
10
|
jpayne@69
|
11 #include "delta.hh"
|
jpayne@69
|
12 #include <map>
|
jpayne@69
|
13 #include <vector>
|
jpayne@69
|
14 #include <cmath>
|
jpayne@69
|
15 #include <sstream>
|
jpayne@69
|
16 #include <algorithm>
|
jpayne@69
|
17 using namespace std;
|
jpayne@69
|
18
|
jpayne@69
|
19
|
jpayne@69
|
20 inline long ScoreLocal
|
jpayne@69
|
21 (long scorej,
|
jpayne@69
|
22 long leni, long lenj,
|
jpayne@69
|
23 long olap, float idyi, float maxolap);
|
jpayne@69
|
24
|
jpayne@69
|
25
|
jpayne@69
|
26 struct LIS_t
|
jpayne@69
|
27 //!< LIS score
|
jpayne@69
|
28 {
|
jpayne@69
|
29 DeltaEdgelet_t * a;
|
jpayne@69
|
30 long score;
|
jpayne@69
|
31 long diff;
|
jpayne@69
|
32 long from;
|
jpayne@69
|
33 bool used;
|
jpayne@69
|
34 };
|
jpayne@69
|
35
|
jpayne@69
|
36
|
jpayne@69
|
37 struct EdgeletQCmp_t
|
jpayne@69
|
38 //!< Compares query lo coord
|
jpayne@69
|
39 {
|
jpayne@69
|
40 bool operator() (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
|
jpayne@69
|
41 {
|
jpayne@69
|
42 //-- Sorting by score in the event of a tie ensures that when building
|
jpayne@69
|
43 // LIS chains, the highest scoring ones get seen first, thus avoiding
|
jpayne@69
|
44 // overlap problems
|
jpayne@69
|
45
|
jpayne@69
|
46 if ( i->loQ < j->loQ )
|
jpayne@69
|
47 return true;
|
jpayne@69
|
48 else if ( i->loQ > j->loQ )
|
jpayne@69
|
49 return false;
|
jpayne@69
|
50 else if ( ScoreLocal (0, i->hiQ - i->loQ + 1, 0, 0, i->idy, 0) >
|
jpayne@69
|
51 ScoreLocal (0, j->hiQ - j->loQ + 1, 0, 0, j->idy, 0) )
|
jpayne@69
|
52 return true;
|
jpayne@69
|
53 else
|
jpayne@69
|
54 return false;
|
jpayne@69
|
55 }
|
jpayne@69
|
56 };
|
jpayne@69
|
57
|
jpayne@69
|
58
|
jpayne@69
|
59 struct EdgeletRCmp_t
|
jpayne@69
|
60 //!< Compares reference lo coord
|
jpayne@69
|
61 {
|
jpayne@69
|
62 bool operator() (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j) const
|
jpayne@69
|
63 {
|
jpayne@69
|
64 //-- Sorting by score in the event of a tie ensures that when building
|
jpayne@69
|
65 // LIS chains, the highest scoring ones get seen first, thus avoiding
|
jpayne@69
|
66 // overlap problems
|
jpayne@69
|
67
|
jpayne@69
|
68 if ( i->loR < j->loR )
|
jpayne@69
|
69 return true;
|
jpayne@69
|
70 else if ( i->loR > j->loR )
|
jpayne@69
|
71 return false;
|
jpayne@69
|
72 else if ( ScoreLocal (0, i->hiR - i->loR + 1, 0, 0, i->idy, 0) >
|
jpayne@69
|
73 ScoreLocal (0, j->hiR - j->loR + 1, 0, 0, j->idy, 0) )
|
jpayne@69
|
74 return true;
|
jpayne@69
|
75 else
|
jpayne@69
|
76 return false;
|
jpayne@69
|
77 }
|
jpayne@69
|
78 };
|
jpayne@69
|
79
|
jpayne@69
|
80
|
jpayne@69
|
81 struct NULLPred_t
|
jpayne@69
|
82 //!< Return true if pointer is not NULL
|
jpayne@69
|
83 {
|
jpayne@69
|
84 bool operator() (const void * i) const
|
jpayne@69
|
85 { return (i != NULL); }
|
jpayne@69
|
86 };
|
jpayne@69
|
87
|
jpayne@69
|
88
|
jpayne@69
|
89 //------------------------------------------------------------ DiffAligns ------
|
jpayne@69
|
90 inline long DiffAligns
|
jpayne@69
|
91 (const DeltaEdgelet_t * i, const DeltaEdgelet_t * j)
|
jpayne@69
|
92 {
|
jpayne@69
|
93 return
|
jpayne@69
|
94 ( ( j->loR < i->loR ) ? labs (j->hiR - i->loR) : labs (i->hiR - j->loR) ) +
|
jpayne@69
|
95 ( ( j->loQ < i->loQ ) ? labs (j->hiQ - i->loQ) : labs (i->hiQ - j->loQ) );
|
jpayne@69
|
96 }
|
jpayne@69
|
97
|
jpayne@69
|
98
|
jpayne@69
|
99 //------------------------------------------------------------ PickBest --------
|
jpayne@69
|
100 long PickBest
|
jpayne@69
|
101 (const LIS_t * lis, const vector<long> & allbest, float epsilon)
|
jpayne@69
|
102 {
|
jpayne@69
|
103 long size = allbest.size();
|
jpayne@69
|
104 if ( epsilon < 0 && size != 0 )
|
jpayne@69
|
105 {
|
jpayne@69
|
106 long eqc = 0;
|
jpayne@69
|
107 for ( ; eqc < size; ++ eqc )
|
jpayne@69
|
108 if ( lis[allbest[eqc]].diff != lis[allbest.front()].diff )
|
jpayne@69
|
109 break;
|
jpayne@69
|
110
|
jpayne@69
|
111 return (int)((double)eqc*rand() / (RAND_MAX + 1.0));
|
jpayne@69
|
112 }
|
jpayne@69
|
113 return size;
|
jpayne@69
|
114 }
|
jpayne@69
|
115
|
jpayne@69
|
116
|
jpayne@69
|
117 //------------------------------------------------------------------ RevC ------
|
jpayne@69
|
118 inline long RevC (const long & coord,
|
jpayne@69
|
119 const long & len)
|
jpayne@69
|
120 {
|
jpayne@69
|
121 return len - coord + 1;
|
jpayne@69
|
122 }
|
jpayne@69
|
123
|
jpayne@69
|
124
|
jpayne@69
|
125 //------------------------------------------------------------ ScoreLocal ------
|
jpayne@69
|
126 inline long ScoreLocal
|
jpayne@69
|
127 (long scorej, long leni, long lenj,
|
jpayne@69
|
128 long olap, float idyi, float maxolap)
|
jpayne@69
|
129 {
|
jpayne@69
|
130 if ( olap > 0 &&
|
jpayne@69
|
131 ((float)olap / (float)leni * 100.0 > maxolap ||
|
jpayne@69
|
132 (float)olap / (float)lenj * 100.0 > maxolap) )
|
jpayne@69
|
133 return -1;
|
jpayne@69
|
134 else
|
jpayne@69
|
135 return (scorej + (long)((leni - olap) * pow (idyi, 2)));
|
jpayne@69
|
136 }
|
jpayne@69
|
137
|
jpayne@69
|
138
|
jpayne@69
|
139 //----------------------------------------------------------- ScoreGlobal ------
|
jpayne@69
|
140 inline long ScoreGlobal
|
jpayne@69
|
141 (long scorej, long leni, long olap, float idyi)
|
jpayne@69
|
142 {
|
jpayne@69
|
143 return (scorej + (long)((leni - olap) * pow (idyi, 2)));
|
jpayne@69
|
144 }
|
jpayne@69
|
145
|
jpayne@69
|
146
|
jpayne@69
|
147 //------------------------------------------------------------------ Swap ------
|
jpayne@69
|
148 inline void Swap (long & a, long & b)
|
jpayne@69
|
149 {
|
jpayne@69
|
150 long t = a; a = b; b = t;
|
jpayne@69
|
151 }
|
jpayne@69
|
152
|
jpayne@69
|
153
|
jpayne@69
|
154 //------------------------------------------------------------ UpdateBest ------
|
jpayne@69
|
155 bool UpdateBest
|
jpayne@69
|
156 (LIS_t * lis, long size, vector<long> & allbest, float epsilon)
|
jpayne@69
|
157 {
|
jpayne@69
|
158 if ( size == 0 )
|
jpayne@69
|
159 return false;
|
jpayne@69
|
160
|
jpayne@69
|
161 long best, i;
|
jpayne@69
|
162
|
jpayne@69
|
163 //-- Find the best
|
jpayne@69
|
164 for ( best = 0; best < size; ++ best )
|
jpayne@69
|
165 if ( ! lis[best].used )
|
jpayne@69
|
166 break;
|
jpayne@69
|
167 for ( i = best + 1; i < size; ++ i )
|
jpayne@69
|
168 if ( ! lis[i].used
|
jpayne@69
|
169 &&
|
jpayne@69
|
170 ( lis[i].score > lis[best].score
|
jpayne@69
|
171 ||
|
jpayne@69
|
172 (lis[i].score == lis[best].score &&
|
jpayne@69
|
173 lis[i].diff < lis[best].diff) ) )
|
jpayne@69
|
174 best = i;
|
jpayne@69
|
175
|
jpayne@69
|
176 //-- Nonequivalent
|
jpayne@69
|
177 if ( ! allbest.empty()
|
jpayne@69
|
178 &&
|
jpayne@69
|
179 (best == size
|
jpayne@69
|
180 ||
|
jpayne@69
|
181 (epsilon < 0 &&
|
jpayne@69
|
182 lis[best].score < lis[allbest.front()].score)
|
jpayne@69
|
183 ||
|
jpayne@69
|
184 (epsilon >= 0 &&
|
jpayne@69
|
185 (float)(lis[allbest.front()].score - lis[best].score) /
|
jpayne@69
|
186 (float)(lis[allbest.front()].score) * 100.0 > epsilon)) )
|
jpayne@69
|
187 return false;
|
jpayne@69
|
188
|
jpayne@69
|
189 //-- Equivalent
|
jpayne@69
|
190 allbest.push_back (best);
|
jpayne@69
|
191
|
jpayne@69
|
192 for ( i = best; i >= 0 && i < size; i = lis[i].from )
|
jpayne@69
|
193 lis[i].used = true;
|
jpayne@69
|
194
|
jpayne@69
|
195 return true;
|
jpayne@69
|
196 }
|
jpayne@69
|
197
|
jpayne@69
|
198
|
jpayne@69
|
199 //===================================================== DeltaReader_t ==========
|
jpayne@69
|
200 //----------------------------------------------------- open -------------------
|
jpayne@69
|
201 void DeltaReader_t::open
|
jpayne@69
|
202 (const string & delta_path)
|
jpayne@69
|
203 {
|
jpayne@69
|
204 delta_path_m = delta_path;
|
jpayne@69
|
205
|
jpayne@69
|
206 //-- Open the delta file
|
jpayne@69
|
207 delta_stream_m.open (delta_path_m.c_str ());
|
jpayne@69
|
208 checkStream ();
|
jpayne@69
|
209
|
jpayne@69
|
210 //-- Read the file header
|
jpayne@69
|
211 delta_stream_m >> reference_path_m;
|
jpayne@69
|
212 delta_stream_m >> query_path_m;
|
jpayne@69
|
213 delta_stream_m >> data_type_m;
|
jpayne@69
|
214 if ( (data_type_m != NUCMER_STRING && data_type_m != PROMER_STRING) )
|
jpayne@69
|
215 delta_stream_m.setstate (ios::failbit);
|
jpayne@69
|
216 checkStream ();
|
jpayne@69
|
217 is_open_m = true;
|
jpayne@69
|
218
|
jpayne@69
|
219 //-- Advance to first record header
|
jpayne@69
|
220 while ( delta_stream_m.peek () != '>' )
|
jpayne@69
|
221 if ( delta_stream_m.get () == EOF )
|
jpayne@69
|
222 break;
|
jpayne@69
|
223 }
|
jpayne@69
|
224
|
jpayne@69
|
225
|
jpayne@69
|
226 //----------------------------------------------------- readNextAlignment ------
|
jpayne@69
|
227 void DeltaReader_t::readNextAlignment
|
jpayne@69
|
228 (DeltaAlignment_t & align, const bool read_deltas)
|
jpayne@69
|
229 {
|
jpayne@69
|
230 long delta;
|
jpayne@69
|
231 float total;
|
jpayne@69
|
232
|
jpayne@69
|
233 //-- Make way for the new alignment
|
jpayne@69
|
234 align.clear ();
|
jpayne@69
|
235
|
jpayne@69
|
236 //-- Read the alignment header
|
jpayne@69
|
237 delta_stream_m >> align.sR;
|
jpayne@69
|
238 delta_stream_m >> align.eR;
|
jpayne@69
|
239 delta_stream_m >> align.sQ;
|
jpayne@69
|
240 delta_stream_m >> align.eQ;
|
jpayne@69
|
241 delta_stream_m >> align.idyc;
|
jpayne@69
|
242 delta_stream_m >> align.simc;
|
jpayne@69
|
243 delta_stream_m >> align.stpc;
|
jpayne@69
|
244 if ( align.sR <= 0 || align.eR <= 0 ||
|
jpayne@69
|
245 align.sQ <= 0 || align.eQ <= 0 ||
|
jpayne@69
|
246 align.idyc < 0 || align.simc < 0 || align.stpc < 0 )
|
jpayne@69
|
247 delta_stream_m.setstate (ios::failbit);
|
jpayne@69
|
248 checkStream ();
|
jpayne@69
|
249
|
jpayne@69
|
250 total = labs(align.eR - align.sR) + 1.0;
|
jpayne@69
|
251 if ( data_type_m == PROMER_STRING )
|
jpayne@69
|
252 total /= 3.0;
|
jpayne@69
|
253
|
jpayne@69
|
254 //-- Get all the deltas
|
jpayne@69
|
255 do
|
jpayne@69
|
256 {
|
jpayne@69
|
257 delta_stream_m >> delta;
|
jpayne@69
|
258 checkStream ();
|
jpayne@69
|
259
|
jpayne@69
|
260 if ( delta < 0 )
|
jpayne@69
|
261 total ++;
|
jpayne@69
|
262 if ( read_deltas )
|
jpayne@69
|
263 align.deltas.push_back (delta);
|
jpayne@69
|
264 } while ( delta != 0 );
|
jpayne@69
|
265
|
jpayne@69
|
266 //-- Flush the remaining whitespace
|
jpayne@69
|
267 while ( delta_stream_m.get () != '\n' );
|
jpayne@69
|
268
|
jpayne@69
|
269 //-- Calculate the identity, similarity and stopity
|
jpayne@69
|
270 align.idy = (total - (float)align.idyc) / total * 100.0;
|
jpayne@69
|
271 align.sim = (total - (float)align.simc) / total * 100.0;
|
jpayne@69
|
272 align.stp = (float)align.stpc / (total * 2.0) * 100.0;
|
jpayne@69
|
273 }
|
jpayne@69
|
274
|
jpayne@69
|
275
|
jpayne@69
|
276 //----------------------------------------------------- readNextRecord ---------
|
jpayne@69
|
277 bool DeltaReader_t::readNextRecord (const bool read_deltas)
|
jpayne@69
|
278 {
|
jpayne@69
|
279 //-- If EOF or any other abnormality
|
jpayne@69
|
280 if ( delta_stream_m.peek () != '>' )
|
jpayne@69
|
281 return false;
|
jpayne@69
|
282
|
jpayne@69
|
283 //-- Make way for the new record
|
jpayne@69
|
284 record_m.clear ();
|
jpayne@69
|
285 is_record_m = true;
|
jpayne@69
|
286
|
jpayne@69
|
287 //-- Read the record header
|
jpayne@69
|
288 delta_stream_m.get ();
|
jpayne@69
|
289 delta_stream_m >> record_m.idR;
|
jpayne@69
|
290 delta_stream_m >> record_m.idQ;
|
jpayne@69
|
291 delta_stream_m >> record_m.lenR;
|
jpayne@69
|
292 delta_stream_m >> record_m.lenQ;
|
jpayne@69
|
293 if ( record_m.lenR <= 0 || record_m.lenQ <= 0 )
|
jpayne@69
|
294 delta_stream_m.setstate (ios::failbit);
|
jpayne@69
|
295 checkStream ();
|
jpayne@69
|
296
|
jpayne@69
|
297 //-- Flush the remaining whitespace
|
jpayne@69
|
298 while ( delta_stream_m.get () != '\n' );
|
jpayne@69
|
299
|
jpayne@69
|
300 //-- For each alignment...
|
jpayne@69
|
301 DeltaAlignment_t align;
|
jpayne@69
|
302 while ( delta_stream_m.peek () != '>' &&
|
jpayne@69
|
303 delta_stream_m.peek () != EOF )
|
jpayne@69
|
304 {
|
jpayne@69
|
305 readNextAlignment (align, read_deltas);
|
jpayne@69
|
306 record_m.aligns.push_back (align);
|
jpayne@69
|
307 }
|
jpayne@69
|
308
|
jpayne@69
|
309 return true;
|
jpayne@69
|
310 }
|
jpayne@69
|
311
|
jpayne@69
|
312
|
jpayne@69
|
313 //===================================================== DeltaEdge_t ============
|
jpayne@69
|
314 //------------------------------------------------------build ------------------
|
jpayne@69
|
315 void DeltaEdge_t::build (const DeltaRecord_t & rec)
|
jpayne@69
|
316 {
|
jpayne@69
|
317 stringstream ss;
|
jpayne@69
|
318 vector<long>::const_iterator di;
|
jpayne@69
|
319 DeltaEdgelet_t * p;
|
jpayne@69
|
320
|
jpayne@69
|
321 vector<DeltaAlignment_t>::const_iterator i;
|
jpayne@69
|
322 for ( i = rec.aligns.begin(); i != rec.aligns.end(); ++ i )
|
jpayne@69
|
323 {
|
jpayne@69
|
324 //-- Set the edgelet
|
jpayne@69
|
325 p = new DeltaEdgelet_t();
|
jpayne@69
|
326
|
jpayne@69
|
327 p->edge = this;
|
jpayne@69
|
328
|
jpayne@69
|
329 p->idy = i->idy / 100.0;
|
jpayne@69
|
330 p->sim = i->sim / 100.0;
|
jpayne@69
|
331 p->stp = i->stp / 100.0;
|
jpayne@69
|
332
|
jpayne@69
|
333 p->idyc = i->idyc;
|
jpayne@69
|
334 p->simc = i->simc;
|
jpayne@69
|
335 p->stpc = i->stpc;
|
jpayne@69
|
336
|
jpayne@69
|
337 p->loR = i->sR;
|
jpayne@69
|
338 p->hiR = i->eR;
|
jpayne@69
|
339 p->loQ = i->sQ;
|
jpayne@69
|
340 p->hiQ = i->eQ;
|
jpayne@69
|
341
|
jpayne@69
|
342 p->dirR = p->hiR < p->loR ? REVERSE_DIR : FORWARD_DIR;
|
jpayne@69
|
343 p->dirQ = p->hiQ < p->loQ ? REVERSE_DIR : FORWARD_DIR;
|
jpayne@69
|
344
|
jpayne@69
|
345 //-- Get the delta information
|
jpayne@69
|
346 for ( di = i->deltas.begin(); di != i->deltas.end(); ++ di )
|
jpayne@69
|
347 {
|
jpayne@69
|
348 ss << *di << '\n';
|
jpayne@69
|
349 p->delta.append (ss.str());
|
jpayne@69
|
350 ss.str ("");
|
jpayne@69
|
351 }
|
jpayne@69
|
352
|
jpayne@69
|
353 //-- Force loR < hiR && loQ < hiQ
|
jpayne@69
|
354 if ( p->dirR == REVERSE_DIR )
|
jpayne@69
|
355 Swap (p->loR, p->hiR);
|
jpayne@69
|
356 if ( p->dirQ == REVERSE_DIR )
|
jpayne@69
|
357 Swap (p->loQ, p->hiQ);
|
jpayne@69
|
358
|
jpayne@69
|
359 edgelets.push_back (p);
|
jpayne@69
|
360 }
|
jpayne@69
|
361 }
|
jpayne@69
|
362
|
jpayne@69
|
363
|
jpayne@69
|
364 //===================================================== DeltaGraph_t ===========
|
jpayne@69
|
365 //----------------------------------------------------- build ------------------
|
jpayne@69
|
366 //! \brief Build a new graph from a deltafile
|
jpayne@69
|
367 //!
|
jpayne@69
|
368 //! Does not populate:
|
jpayne@69
|
369 //! node->seq
|
jpayne@69
|
370 //! edgelet->frmR/frmQ
|
jpayne@69
|
371 //! edgelet->snps
|
jpayne@69
|
372 //!
|
jpayne@69
|
373 //! \param deltapath The path of the deltafile to read
|
jpayne@69
|
374 //! \param getdeltas Read the delta-encoded gap positions? yes/no
|
jpayne@69
|
375 //! \return void
|
jpayne@69
|
376 //!
|
jpayne@69
|
377 void DeltaGraph_t::build (const string & deltapath, bool getdeltas)
|
jpayne@69
|
378 {
|
jpayne@69
|
379 DeltaReader_t dr;
|
jpayne@69
|
380 DeltaEdge_t * dep;
|
jpayne@69
|
381 pair<map<string, DeltaNode_t>::iterator, bool> insret;
|
jpayne@69
|
382
|
jpayne@69
|
383
|
jpayne@69
|
384 //-- Open the delta file and read in the alignment information
|
jpayne@69
|
385 dr.open (deltapath);
|
jpayne@69
|
386
|
jpayne@69
|
387 refpath = dr.getReferencePath();
|
jpayne@69
|
388 qrypath = dr.getQueryPath();
|
jpayne@69
|
389
|
jpayne@69
|
390 if ( dr.getDataType() == NUCMER_STRING )
|
jpayne@69
|
391 datatype = NUCMER_DATA;
|
jpayne@69
|
392 else if ( dr.getDataType() == PROMER_STRING )
|
jpayne@69
|
393 datatype = PROMER_DATA;
|
jpayne@69
|
394 else
|
jpayne@69
|
395 datatype = NULL_DATA;
|
jpayne@69
|
396
|
jpayne@69
|
397 //-- Read in the next graph edge, i.e. a new delta record
|
jpayne@69
|
398 while ( dr.readNext (getdeltas) )
|
jpayne@69
|
399 {
|
jpayne@69
|
400 dep = new DeltaEdge_t();
|
jpayne@69
|
401
|
jpayne@69
|
402
|
jpayne@69
|
403 //-- Find the reference node in the graph, add a new one if necessary
|
jpayne@69
|
404 insret = refnodes.insert
|
jpayne@69
|
405 (map<string, DeltaNode_t>::value_type
|
jpayne@69
|
406 (dr.getRecord().idR, DeltaNode_t()));
|
jpayne@69
|
407 dep->refnode = &((insret.first)->second);
|
jpayne@69
|
408
|
jpayne@69
|
409 //-- If a new reference node
|
jpayne@69
|
410 if ( insret.second )
|
jpayne@69
|
411 {
|
jpayne@69
|
412 dep->refnode->id = &((insret.first)->first);
|
jpayne@69
|
413 dep->refnode->len = dr.getRecord().lenR;
|
jpayne@69
|
414 }
|
jpayne@69
|
415
|
jpayne@69
|
416
|
jpayne@69
|
417 //-- Find the query node in the graph, add a new one if necessary
|
jpayne@69
|
418 insret = qrynodes.insert
|
jpayne@69
|
419 (map<string, DeltaNode_t>::value_type
|
jpayne@69
|
420 (dr.getRecord().idQ, DeltaNode_t()));
|
jpayne@69
|
421 dep->qrynode = &((insret.first)->second);
|
jpayne@69
|
422
|
jpayne@69
|
423 //-- If a new query node
|
jpayne@69
|
424 if ( insret.second )
|
jpayne@69
|
425 {
|
jpayne@69
|
426 dep->qrynode->id = &((insret.first)->first);
|
jpayne@69
|
427 dep->qrynode->len = dr.getRecord().lenQ;
|
jpayne@69
|
428 }
|
jpayne@69
|
429
|
jpayne@69
|
430
|
jpayne@69
|
431 //-- Build the edge
|
jpayne@69
|
432 dep->build (dr.getRecord());
|
jpayne@69
|
433 dep->refnode->edges.push_back (dep);
|
jpayne@69
|
434 dep->qrynode->edges.push_back (dep);
|
jpayne@69
|
435 }
|
jpayne@69
|
436 dr.close ();
|
jpayne@69
|
437 }
|
jpayne@69
|
438
|
jpayne@69
|
439
|
jpayne@69
|
440 //------------------------------------------------------------------- clean ----
|
jpayne@69
|
441 //! \brief Clean the graph of all edgelets where isGOOD = false
|
jpayne@69
|
442 //!
|
jpayne@69
|
443 //! Removes all edgelets from the graph where isGOOD = false. Afterwhich, all
|
jpayne@69
|
444 //! now empty edges or nodes are also removed.
|
jpayne@69
|
445 //!
|
jpayne@69
|
446 //! \return void
|
jpayne@69
|
447 //!
|
jpayne@69
|
448 void DeltaGraph_t::clean()
|
jpayne@69
|
449 {
|
jpayne@69
|
450 map<string, DeltaNode_t>::iterator i;
|
jpayne@69
|
451 map<string, DeltaNode_t>::iterator ii;
|
jpayne@69
|
452 vector<DeltaEdge_t *>::iterator j;
|
jpayne@69
|
453 vector<DeltaEdgelet_t *>::iterator k;
|
jpayne@69
|
454
|
jpayne@69
|
455 //-- For all ref nodes
|
jpayne@69
|
456 for ( i = refnodes.begin(); i != refnodes.end(); )
|
jpayne@69
|
457 {
|
jpayne@69
|
458 //-- For all edges
|
jpayne@69
|
459 for ( j = i->second.edges.begin();
|
jpayne@69
|
460 j != i->second.edges.end(); ++ j )
|
jpayne@69
|
461 {
|
jpayne@69
|
462 //-- For all edgelets
|
jpayne@69
|
463 for ( k = (*j)->edgelets.begin();
|
jpayne@69
|
464 k != (*j)->edgelets.end(); ++ k )
|
jpayne@69
|
465 {
|
jpayne@69
|
466 if ( ! (*k)->isGOOD )
|
jpayne@69
|
467 {
|
jpayne@69
|
468 //-- Delete the bad edgelet and mark for erasure
|
jpayne@69
|
469 delete (*k);
|
jpayne@69
|
470 *k = NULL;
|
jpayne@69
|
471 }
|
jpayne@69
|
472 }
|
jpayne@69
|
473
|
jpayne@69
|
474 //-- Erase the marked edgelets
|
jpayne@69
|
475 k = partition ((*j)->edgelets.begin(),
|
jpayne@69
|
476 (*j)->edgelets.end(),
|
jpayne@69
|
477 NULLPred_t());
|
jpayne@69
|
478 (*j)->edgelets.erase (k, (*j)->edgelets.end());
|
jpayne@69
|
479
|
jpayne@69
|
480 //-- Mark the edge if empty
|
jpayne@69
|
481 if ( (*j)->edgelets.empty() )
|
jpayne@69
|
482 *j = NULL;
|
jpayne@69
|
483 }
|
jpayne@69
|
484
|
jpayne@69
|
485 //-- Erase the marked edges
|
jpayne@69
|
486 j = partition (i->second.edges.begin(),
|
jpayne@69
|
487 i->second.edges.end(),
|
jpayne@69
|
488 NULLPred_t());
|
jpayne@69
|
489 i->second.edges.erase (j, i->second.edges.end());
|
jpayne@69
|
490
|
jpayne@69
|
491 //-- Erase the node if empty
|
jpayne@69
|
492 ii = i ++;
|
jpayne@69
|
493 if ( ii->second.edges.empty() )
|
jpayne@69
|
494 refnodes.erase (ii);
|
jpayne@69
|
495 }
|
jpayne@69
|
496
|
jpayne@69
|
497 //-- For all qry nodes
|
jpayne@69
|
498 for ( i = qrynodes.begin(); i != qrynodes.end(); )
|
jpayne@69
|
499 {
|
jpayne@69
|
500 for ( j = i->second.edges.begin();
|
jpayne@69
|
501 j != i->second.edges.end(); ++ j )
|
jpayne@69
|
502 {
|
jpayne@69
|
503 //-- Delete the edge if empty and mark for erasure
|
jpayne@69
|
504 if ( (*j)->edgelets.empty() )
|
jpayne@69
|
505 {
|
jpayne@69
|
506 delete (*j);
|
jpayne@69
|
507 *j = NULL;
|
jpayne@69
|
508 }
|
jpayne@69
|
509 }
|
jpayne@69
|
510
|
jpayne@69
|
511 //-- Erase the marked edges
|
jpayne@69
|
512 j = partition (i->second.edges.begin(),
|
jpayne@69
|
513 i->second.edges.end(),
|
jpayne@69
|
514 NULLPred_t());
|
jpayne@69
|
515 i->second.edges.erase (j, i->second.edges.end());
|
jpayne@69
|
516
|
jpayne@69
|
517 //-- Erase the node if empty
|
jpayne@69
|
518 ii = i ++;
|
jpayne@69
|
519 if ( ii->second.edges.empty() )
|
jpayne@69
|
520 qrynodes.erase (ii);
|
jpayne@69
|
521 }
|
jpayne@69
|
522
|
jpayne@69
|
523 }
|
jpayne@69
|
524
|
jpayne@69
|
525
|
jpayne@69
|
526 //------------------------------------------------------------------- clear ----
|
jpayne@69
|
527 //! \brief Clear the graph of all nodes, edges and edgelets
|
jpayne@69
|
528 //!
|
jpayne@69
|
529 //! \return void
|
jpayne@69
|
530 //!
|
jpayne@69
|
531 void DeltaGraph_t::clear ( )
|
jpayne@69
|
532 {
|
jpayne@69
|
533 //-- Make sure the edges only get destructed once
|
jpayne@69
|
534 std::map<std::string, DeltaNode_t>::iterator i;
|
jpayne@69
|
535 std::vector<DeltaEdge_t *>::iterator j;
|
jpayne@69
|
536 for ( i = refnodes . begin( ); i != refnodes . end( ); ++ i )
|
jpayne@69
|
537 for ( j = i -> second . edges . begin( );
|
jpayne@69
|
538 j != i -> second . edges . end( ); ++ j )
|
jpayne@69
|
539 delete (*j);
|
jpayne@69
|
540
|
jpayne@69
|
541 refnodes.clear( );
|
jpayne@69
|
542 qrynodes.clear( );
|
jpayne@69
|
543 refpath.erase( );
|
jpayne@69
|
544 qrypath.erase( );
|
jpayne@69
|
545 datatype = NULL_DATA;
|
jpayne@69
|
546 }
|
jpayne@69
|
547
|
jpayne@69
|
548
|
jpayne@69
|
549 //------------------------------------------------------------ getNodeCount ----
|
jpayne@69
|
550 //! \brief Counts and returns the number of graph nodes (sequences)
|
jpayne@69
|
551 //!
|
jpayne@69
|
552 //! \return The number of graph nodes
|
jpayne@69
|
553 //!
|
jpayne@69
|
554 long DeltaGraph_t::getNodeCount()
|
jpayne@69
|
555 {
|
jpayne@69
|
556 long sum = refnodes.size() + qrynodes.size();
|
jpayne@69
|
557 return sum;
|
jpayne@69
|
558 }
|
jpayne@69
|
559
|
jpayne@69
|
560
|
jpayne@69
|
561 //------------------------------------------------------------ getEdgeCount ----
|
jpayne@69
|
562 //! \brief Counts and returns the number of graph edges
|
jpayne@69
|
563 //!
|
jpayne@69
|
564 //! \return void
|
jpayne@69
|
565 //!
|
jpayne@69
|
566 long DeltaGraph_t::getEdgeCount()
|
jpayne@69
|
567 {
|
jpayne@69
|
568 long sum = 0;
|
jpayne@69
|
569
|
jpayne@69
|
570 map<string, DeltaNode_t>::iterator i;
|
jpayne@69
|
571 for ( i = refnodes.begin(); i != refnodes.end(); ++ i )
|
jpayne@69
|
572 sum += i->second.edges.size();
|
jpayne@69
|
573
|
jpayne@69
|
574 return sum;
|
jpayne@69
|
575 }
|
jpayne@69
|
576
|
jpayne@69
|
577
|
jpayne@69
|
578 //--------------------------------------------------------- getEdgeletCount ----
|
jpayne@69
|
579 //! \brief Counts and returns the number of graph edgelets (alignments)
|
jpayne@69
|
580 //!
|
jpayne@69
|
581 //! \return void
|
jpayne@69
|
582 //!
|
jpayne@69
|
583 long DeltaGraph_t::getEdgeletCount()
|
jpayne@69
|
584 {
|
jpayne@69
|
585 long sum = 0;
|
jpayne@69
|
586
|
jpayne@69
|
587 map<string, DeltaNode_t>::iterator i;
|
jpayne@69
|
588 vector<DeltaEdge_t *>::iterator j;
|
jpayne@69
|
589 for ( i = refnodes.begin(); i != refnodes.end(); ++ i )
|
jpayne@69
|
590 for ( j = i->second.edges.begin();
|
jpayne@69
|
591 j != i->second.edges.end(); ++ j )
|
jpayne@69
|
592 sum += (*j)->edgelets.size();
|
jpayne@69
|
593
|
jpayne@69
|
594 return sum;
|
jpayne@69
|
595 }
|
jpayne@69
|
596
|
jpayne@69
|
597
|
jpayne@69
|
598 //---------------------------------------------------------------- flagGOOD ----
|
jpayne@69
|
599 //! \brief Sets isGOOD = 1 for all edgelets
|
jpayne@69
|
600 void DeltaGraph_t::flagGOOD()
|
jpayne@69
|
601 {
|
jpayne@69
|
602 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
603 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
604 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
605
|
jpayne@69
|
606 //-- All references
|
jpayne@69
|
607 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
608 {
|
jpayne@69
|
609 //-- All queries matching this reference
|
jpayne@69
|
610 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
611 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
612 {
|
jpayne@69
|
613 //-- All alignments between reference and query
|
jpayne@69
|
614 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
615 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
616 {
|
jpayne@69
|
617 (*eli)->isGOOD = true;
|
jpayne@69
|
618 }
|
jpayne@69
|
619 }
|
jpayne@69
|
620 }
|
jpayne@69
|
621 }
|
jpayne@69
|
622
|
jpayne@69
|
623
|
jpayne@69
|
624 //---------------------------------------------------------------- flag1to1 ----
|
jpayne@69
|
625 //! \brief Intersection of flagQLIS and flagRLIS
|
jpayne@69
|
626 void DeltaGraph_t::flag1to1(float epsilon, float maxolap)
|
jpayne@69
|
627 {
|
jpayne@69
|
628 flagRLIS(epsilon, maxolap, false);
|
jpayne@69
|
629 flagQLIS(epsilon, maxolap, false);
|
jpayne@69
|
630
|
jpayne@69
|
631 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
632 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
633 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
634
|
jpayne@69
|
635 //-- All references
|
jpayne@69
|
636 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
637 {
|
jpayne@69
|
638 //-- All queries matching this reference
|
jpayne@69
|
639 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
640 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
641 {
|
jpayne@69
|
642 //-- All alignments between reference and query
|
jpayne@69
|
643 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
644 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
645 {
|
jpayne@69
|
646 if ( !(*eli)->isRLIS || !(*eli)->isQLIS )
|
jpayne@69
|
647 (*eli)->isGOOD = false;
|
jpayne@69
|
648 }
|
jpayne@69
|
649 }
|
jpayne@69
|
650 }
|
jpayne@69
|
651 }
|
jpayne@69
|
652
|
jpayne@69
|
653
|
jpayne@69
|
654 //---------------------------------------------------------------- flagMtoM ----
|
jpayne@69
|
655 //! \brief Union of flagQLIS and flagRLIS
|
jpayne@69
|
656 void DeltaGraph_t::flagMtoM(float epsilon, float maxolap)
|
jpayne@69
|
657 {
|
jpayne@69
|
658 flagRLIS(epsilon, maxolap, false);
|
jpayne@69
|
659 flagQLIS(epsilon, maxolap, false);
|
jpayne@69
|
660
|
jpayne@69
|
661 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
662 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
663 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
664
|
jpayne@69
|
665 //-- All references
|
jpayne@69
|
666 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
667 {
|
jpayne@69
|
668 //-- All queries matching this reference
|
jpayne@69
|
669 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
670 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
671 {
|
jpayne@69
|
672 //-- All alignments between reference and query
|
jpayne@69
|
673 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
674 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
675 {
|
jpayne@69
|
676 if ( !(*eli)->isRLIS && !(*eli)->isQLIS )
|
jpayne@69
|
677 (*eli)->isGOOD = false;
|
jpayne@69
|
678 }
|
jpayne@69
|
679 }
|
jpayne@69
|
680 }
|
jpayne@69
|
681 }
|
jpayne@69
|
682
|
jpayne@69
|
683
|
jpayne@69
|
684 //-------------------------------------------------------------- flagGLIS ------
|
jpayne@69
|
685 //! \brief Flag edgelets in the global LIS and unflag those who are not
|
jpayne@69
|
686 //!
|
jpayne@69
|
687 //! Runs a length*identity weigthed LIS to determine the longest, mutually
|
jpayne@69
|
688 //! consistent set of alignments between all pairs of sequences. This
|
jpayne@69
|
689 //! essentially constructs the global alignment between all sequence pairs.
|
jpayne@69
|
690 //!
|
jpayne@69
|
691 //! Sets isGLIS flag for good and unsets isGOOD flag for bad.
|
jpayne@69
|
692 //!
|
jpayne@69
|
693 //! \param epsilon Keep repeat alignments within epsilon % of the best align
|
jpayne@69
|
694 //! \return void
|
jpayne@69
|
695 //!
|
jpayne@69
|
696 void DeltaGraph_t::flagGLIS (float epsilon)
|
jpayne@69
|
697 {
|
jpayne@69
|
698 LIS_t * lis = NULL;
|
jpayne@69
|
699 long lis_size = 0;
|
jpayne@69
|
700 long i, j, n;
|
jpayne@69
|
701 long olap, olapQ, olapR, len, lenQ, lenR, score, diff;
|
jpayne@69
|
702
|
jpayne@69
|
703 vector<DeltaEdgelet_t *> edgelets;
|
jpayne@69
|
704
|
jpayne@69
|
705 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
706 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
707 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
708
|
jpayne@69
|
709
|
jpayne@69
|
710 //-- For each reference sequence
|
jpayne@69
|
711 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
712 {
|
jpayne@69
|
713 //-- For each query aligning to this reference
|
jpayne@69
|
714 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
715 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
716 {
|
jpayne@69
|
717 //-- Collect all the good edgelets
|
jpayne@69
|
718 edgelets.clear();
|
jpayne@69
|
719 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
720 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
721 if ( (*eli)->isGOOD )
|
jpayne@69
|
722 {
|
jpayne@69
|
723 edgelets.push_back (*eli);
|
jpayne@69
|
724
|
jpayne@69
|
725 //-- Fix the coordinates to make global LIS work
|
jpayne@69
|
726 if ( (*eli)->dirR == (*eli)->dirQ )
|
jpayne@69
|
727 {
|
jpayne@69
|
728 (*eli)->dirQ = FORWARD_DIR;
|
jpayne@69
|
729 }
|
jpayne@69
|
730 else
|
jpayne@69
|
731 {
|
jpayne@69
|
732 if ( (*eli)->dirQ == REVERSE_DIR )
|
jpayne@69
|
733 Swap ((*eli)->loQ, (*eli)->hiQ);
|
jpayne@69
|
734 (*eli)->loQ = RevC ((*eli)->loQ, (*ei)->qrynode->len);
|
jpayne@69
|
735 (*eli)->hiQ = RevC ((*eli)->hiQ, (*ei)->qrynode->len);
|
jpayne@69
|
736 (*eli)->dirQ = REVERSE_DIR;
|
jpayne@69
|
737 }
|
jpayne@69
|
738 }
|
jpayne@69
|
739
|
jpayne@69
|
740 //-- Resize and initialize
|
jpayne@69
|
741 n = edgelets.size();
|
jpayne@69
|
742 if ( n > lis_size )
|
jpayne@69
|
743 {
|
jpayne@69
|
744 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n);
|
jpayne@69
|
745 lis_size = n;
|
jpayne@69
|
746 }
|
jpayne@69
|
747 for ( i = 0; i < n; ++ i )
|
jpayne@69
|
748 lis[i].used = false;
|
jpayne@69
|
749
|
jpayne@69
|
750 //-- Sort by lo query coord
|
jpayne@69
|
751 sort (edgelets.begin(), edgelets.end(), EdgeletQCmp_t());
|
jpayne@69
|
752
|
jpayne@69
|
753 //-- Continue until all equivalent repeats are extracted
|
jpayne@69
|
754 vector<long> allbest;
|
jpayne@69
|
755 do
|
jpayne@69
|
756 {
|
jpayne@69
|
757 //-- Dynamic
|
jpayne@69
|
758 for ( i = 0; i < n; ++ i )
|
jpayne@69
|
759 {
|
jpayne@69
|
760 if ( lis[i].used ) continue;
|
jpayne@69
|
761
|
jpayne@69
|
762 lis[i].a = edgelets[i];
|
jpayne@69
|
763
|
jpayne@69
|
764 lenR = lis[i].a->hiR - lis[i].a->loR + 1;
|
jpayne@69
|
765 lenQ = lis[i].a->hiQ - lis[i].a->loQ + 1;
|
jpayne@69
|
766 len = lenR > lenQ ? lenQ : lenR;
|
jpayne@69
|
767 lis[i].score = ScoreGlobal (0, len, 0, lis[i].a->idy);
|
jpayne@69
|
768
|
jpayne@69
|
769 lis[i].from = -1;
|
jpayne@69
|
770 lis[i].diff = 0;
|
jpayne@69
|
771
|
jpayne@69
|
772 for ( j = 0; j < i; ++ j )
|
jpayne@69
|
773 {
|
jpayne@69
|
774 if ( lis[j].used ) continue;
|
jpayne@69
|
775
|
jpayne@69
|
776 if ( lis[i].a->dirQ != lis[j].a->dirQ )
|
jpayne@69
|
777 continue;
|
jpayne@69
|
778
|
jpayne@69
|
779 lenR = lis[i].a->hiR - lis[i].a->loR + 1;
|
jpayne@69
|
780 lenQ = lis[i].a->hiQ - lis[i].a->loQ + 1;
|
jpayne@69
|
781 len = lenR > lenQ ? lenQ : lenR;
|
jpayne@69
|
782
|
jpayne@69
|
783 olapR = lis[j].a->hiR - lis[i].a->loR + 1;
|
jpayne@69
|
784 olapQ = lis[j].a->hiQ - lis[i].a->loQ + 1;
|
jpayne@69
|
785 olap = olapR > olapQ ? olapR : olapQ;
|
jpayne@69
|
786 if ( olap < 0 )
|
jpayne@69
|
787 olap = 0;
|
jpayne@69
|
788
|
jpayne@69
|
789 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a);
|
jpayne@69
|
790
|
jpayne@69
|
791 score = ScoreGlobal
|
jpayne@69
|
792 (lis[j].score, len, olap, lis[i].a->idy);
|
jpayne@69
|
793
|
jpayne@69
|
794 if ( score > lis[i].score
|
jpayne@69
|
795 ||
|
jpayne@69
|
796 (score == lis[i].score && diff < lis[i].diff) )
|
jpayne@69
|
797 {
|
jpayne@69
|
798 lis[i].from = j;
|
jpayne@69
|
799 lis[i].score = score;
|
jpayne@69
|
800 lis[i].diff = diff;
|
jpayne@69
|
801 }
|
jpayne@69
|
802 }
|
jpayne@69
|
803 }
|
jpayne@69
|
804 } while ( UpdateBest (lis, n, allbest, epsilon) );
|
jpayne@69
|
805
|
jpayne@69
|
806 long beg = PickBest (lis, allbest, epsilon);
|
jpayne@69
|
807 long end = allbest.size();
|
jpayne@69
|
808 if ( beg == end ) beg = 0;
|
jpayne@69
|
809 else end = beg + 1;
|
jpayne@69
|
810
|
jpayne@69
|
811 //-- Flag the edgelets
|
jpayne@69
|
812 for ( ; beg < end; ++ beg )
|
jpayne@69
|
813 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from )
|
jpayne@69
|
814 lis[i].a->isGLIS = true;
|
jpayne@69
|
815
|
jpayne@69
|
816 //-- Repair the coordinates
|
jpayne@69
|
817 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
|
jpayne@69
|
818 {
|
jpayne@69
|
819 if ( ! (*eli)->isGLIS )
|
jpayne@69
|
820 (*eli)->isGOOD = false;
|
jpayne@69
|
821
|
jpayne@69
|
822 if ( (*eli)->dirQ == FORWARD_DIR )
|
jpayne@69
|
823 {
|
jpayne@69
|
824 (*eli)->dirQ = (*eli)->dirR;
|
jpayne@69
|
825 }
|
jpayne@69
|
826 else
|
jpayne@69
|
827 {
|
jpayne@69
|
828 if ( (*eli)->dirR == FORWARD_DIR )
|
jpayne@69
|
829 Swap ((*eli)->loQ, (*eli)->hiQ);
|
jpayne@69
|
830 (*eli)->loQ = RevC ((*eli)->loQ, (*ei)->qrynode->len);
|
jpayne@69
|
831 (*eli)->hiQ = RevC ((*eli)->hiQ, (*ei)->qrynode->len);
|
jpayne@69
|
832 (*eli)->dirQ =
|
jpayne@69
|
833 (*eli)->dirR == FORWARD_DIR ? REVERSE_DIR : FORWARD_DIR;
|
jpayne@69
|
834 }
|
jpayne@69
|
835 }
|
jpayne@69
|
836 }
|
jpayne@69
|
837 }
|
jpayne@69
|
838
|
jpayne@69
|
839 free (lis);
|
jpayne@69
|
840 }
|
jpayne@69
|
841
|
jpayne@69
|
842
|
jpayne@69
|
843 //-------------------------------------------------------------- flagQLIS ------
|
jpayne@69
|
844 //! \brief Flag edgelets in the query LIS and unflag those who are not
|
jpayne@69
|
845 //!
|
jpayne@69
|
846 //! Runs a length*identity weighted LIS to determine the longest, QUERY
|
jpayne@69
|
847 //! consistent set of alignments for all query sequences. This effectively
|
jpayne@69
|
848 //! identifies the "best" alignments for all positions of each query.
|
jpayne@69
|
849 //!
|
jpayne@69
|
850 //! Sets isQLIS flag for good and unsets isGOOD flag for bad.
|
jpayne@69
|
851 //!
|
jpayne@69
|
852 //! \param epsilon Keep repeat alignments within epsilon % of the best align
|
jpayne@69
|
853 //! \param maxolap Only allow alignments to overlap by maxolap percent [0-100]
|
jpayne@69
|
854 //! \param flagBad Flag non QLIS edgelets as !isGOOD
|
jpayne@69
|
855 //! \return void
|
jpayne@69
|
856 //!
|
jpayne@69
|
857 void DeltaGraph_t::flagQLIS (float epsilon, float maxolap, bool flagbad)
|
jpayne@69
|
858 {
|
jpayne@69
|
859 LIS_t * lis = NULL;
|
jpayne@69
|
860 long lis_size = 0;
|
jpayne@69
|
861 long i, j, n;
|
jpayne@69
|
862 long olap, leni, lenj, score, diff;
|
jpayne@69
|
863
|
jpayne@69
|
864 vector<DeltaEdgelet_t *> edgelets;
|
jpayne@69
|
865
|
jpayne@69
|
866 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
867 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
868 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
869
|
jpayne@69
|
870
|
jpayne@69
|
871 //-- For each query sequence
|
jpayne@69
|
872 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
|
jpayne@69
|
873 {
|
jpayne@69
|
874 //-- Collect all the good edgelets
|
jpayne@69
|
875 edgelets.clear();
|
jpayne@69
|
876 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
877 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
878 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
879 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
880 if ( (*eli)->isGOOD )
|
jpayne@69
|
881 edgelets.push_back (*eli);
|
jpayne@69
|
882
|
jpayne@69
|
883 //-- Resize and initialize
|
jpayne@69
|
884 n = edgelets.size();
|
jpayne@69
|
885 if ( n > lis_size )
|
jpayne@69
|
886 {
|
jpayne@69
|
887 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n);
|
jpayne@69
|
888 lis_size = n;
|
jpayne@69
|
889 }
|
jpayne@69
|
890 for ( i = 0; i < n; ++ i )
|
jpayne@69
|
891 lis[i].used = false;
|
jpayne@69
|
892
|
jpayne@69
|
893 //-- Sort by lo query coord
|
jpayne@69
|
894 sort (edgelets.begin(), edgelets.end(), EdgeletQCmp_t());
|
jpayne@69
|
895
|
jpayne@69
|
896 //-- Continue until all equivalent repeats are extracted
|
jpayne@69
|
897 vector<long> allbest;
|
jpayne@69
|
898 do
|
jpayne@69
|
899 {
|
jpayne@69
|
900 //-- Dynamic
|
jpayne@69
|
901 for ( i = 0; i < n; ++ i )
|
jpayne@69
|
902 {
|
jpayne@69
|
903 if ( lis[i].used ) continue;
|
jpayne@69
|
904
|
jpayne@69
|
905 lis[i].a = edgelets[i];
|
jpayne@69
|
906
|
jpayne@69
|
907 leni = lis[i].a->hiQ - lis[i].a->loQ + 1;
|
jpayne@69
|
908 lis[i].score =
|
jpayne@69
|
909 ScoreLocal (0, leni, 0, 0, lis[i].a->idy, 0);
|
jpayne@69
|
910
|
jpayne@69
|
911 lis[i].from = -1;
|
jpayne@69
|
912 lis[i].diff = 0;
|
jpayne@69
|
913
|
jpayne@69
|
914 for ( j = 0; j < i; ++ j )
|
jpayne@69
|
915 {
|
jpayne@69
|
916 if ( lis[j].used ) continue;
|
jpayne@69
|
917 if ( lis[j].from >= 0 &&
|
jpayne@69
|
918 lis[lis[j].from].a->hiQ >= lis[i].a->loQ )
|
jpayne@69
|
919 continue;
|
jpayne@69
|
920
|
jpayne@69
|
921 leni = lis[i].a->hiQ - lis[i].a->loQ + 1;
|
jpayne@69
|
922 lenj = lis[j].a->hiQ - lis[j].a->loQ + 1;
|
jpayne@69
|
923 olap = lis[j].a->hiQ - lis[i].a->loQ + 1;
|
jpayne@69
|
924 if ( olap < 0 )
|
jpayne@69
|
925 olap = 0;
|
jpayne@69
|
926
|
jpayne@69
|
927 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a);
|
jpayne@69
|
928
|
jpayne@69
|
929 score = ScoreLocal
|
jpayne@69
|
930 (lis[j].score, leni, lenj, olap, lis[i].a->idy, maxolap);
|
jpayne@69
|
931
|
jpayne@69
|
932 if ( score > lis[i].score
|
jpayne@69
|
933 ||
|
jpayne@69
|
934 (score == lis[i].score && diff < lis[i].diff) )
|
jpayne@69
|
935 {
|
jpayne@69
|
936 lis[i].from = j;
|
jpayne@69
|
937 lis[i].score = score;
|
jpayne@69
|
938 lis[i].diff = diff;
|
jpayne@69
|
939 }
|
jpayne@69
|
940 }
|
jpayne@69
|
941 }
|
jpayne@69
|
942 } while ( UpdateBest (lis, n, allbest, epsilon) );
|
jpayne@69
|
943
|
jpayne@69
|
944 long beg = PickBest (lis, allbest, epsilon);
|
jpayne@69
|
945 long end = allbest.size();
|
jpayne@69
|
946 if ( beg == end ) beg = 0;
|
jpayne@69
|
947 else end = beg + 1;
|
jpayne@69
|
948
|
jpayne@69
|
949 //-- Flag the edgelets
|
jpayne@69
|
950 for ( ; beg < end; ++ beg )
|
jpayne@69
|
951 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from )
|
jpayne@69
|
952 lis[i].a->isQLIS = true;
|
jpayne@69
|
953
|
jpayne@69
|
954 if ( flagbad )
|
jpayne@69
|
955 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
|
jpayne@69
|
956 if ( ! (*eli)->isQLIS )
|
jpayne@69
|
957 (*eli)->isGOOD = false;
|
jpayne@69
|
958 }
|
jpayne@69
|
959
|
jpayne@69
|
960 free (lis);
|
jpayne@69
|
961 }
|
jpayne@69
|
962
|
jpayne@69
|
963
|
jpayne@69
|
964 //-------------------------------------------------------------- flagRLIS ------
|
jpayne@69
|
965 //! \brief Flag edgelets in the reference LIS and unflag those who are not
|
jpayne@69
|
966 //!
|
jpayne@69
|
967 //! Runs a length*identity weighted LIS to determine the longest, REFERENCE
|
jpayne@69
|
968 //! consistent set of alignments for all reference sequences. This effectively
|
jpayne@69
|
969 //! identifies the "best" alignments for all positions of each reference.
|
jpayne@69
|
970 //!
|
jpayne@69
|
971 //! Sets isRLIS flag for good and unsets isGOOD flag for bad.
|
jpayne@69
|
972 //!
|
jpayne@69
|
973 //! \param epsilon Keep repeat alignments within epsilon % of the best align
|
jpayne@69
|
974 //! \param maxolap Only allow alignments to overlap by maxolap percent [0-100]
|
jpayne@69
|
975 //! \param flagBad Flag non RLIS edgelets as !isGOOD
|
jpayne@69
|
976 //! \return void
|
jpayne@69
|
977 //!
|
jpayne@69
|
978 void DeltaGraph_t::flagRLIS (float epsilon, float maxolap, bool flagbad)
|
jpayne@69
|
979 {
|
jpayne@69
|
980 LIS_t * lis = NULL;
|
jpayne@69
|
981 long lis_size = 0;
|
jpayne@69
|
982 long i, j, n;
|
jpayne@69
|
983 long olap, leni, lenj, score, diff;
|
jpayne@69
|
984
|
jpayne@69
|
985 vector<DeltaEdgelet_t *> edgelets;
|
jpayne@69
|
986
|
jpayne@69
|
987 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
988 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
989 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
990
|
jpayne@69
|
991
|
jpayne@69
|
992 //-- For each reference sequence
|
jpayne@69
|
993 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
994 {
|
jpayne@69
|
995 //-- Collect all the good edgelets
|
jpayne@69
|
996 edgelets.clear();
|
jpayne@69
|
997 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
998 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
999 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
1000 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
1001 if ( (*eli)->isGOOD )
|
jpayne@69
|
1002 edgelets.push_back (*eli);
|
jpayne@69
|
1003
|
jpayne@69
|
1004 //-- Resize
|
jpayne@69
|
1005 n = edgelets.size();
|
jpayne@69
|
1006 if ( n > lis_size )
|
jpayne@69
|
1007 {
|
jpayne@69
|
1008 lis = (LIS_t *) Safe_realloc (lis, sizeof (LIS_t) * n);
|
jpayne@69
|
1009 lis_size = n;
|
jpayne@69
|
1010 }
|
jpayne@69
|
1011 for ( i = 0; i < n; ++ i )
|
jpayne@69
|
1012 lis[i].used = false;
|
jpayne@69
|
1013
|
jpayne@69
|
1014 //-- Sort by lo reference coord
|
jpayne@69
|
1015 sort (edgelets.begin(), edgelets.end(), EdgeletRCmp_t());
|
jpayne@69
|
1016
|
jpayne@69
|
1017 //-- Continue until all equivalent repeats are extracted
|
jpayne@69
|
1018 vector<long> allbest;
|
jpayne@69
|
1019 do
|
jpayne@69
|
1020 {
|
jpayne@69
|
1021 //-- Dynamic
|
jpayne@69
|
1022 for ( i = 0; i < n; ++ i )
|
jpayne@69
|
1023 {
|
jpayne@69
|
1024 if ( lis[i].used ) continue;
|
jpayne@69
|
1025
|
jpayne@69
|
1026 lis[i].a = edgelets[i];
|
jpayne@69
|
1027
|
jpayne@69
|
1028 leni = lis[i].a->hiR - lis[i].a->loR + 1;
|
jpayne@69
|
1029 lis[i].score =
|
jpayne@69
|
1030 ScoreLocal (0, leni, 0, 0, lis[i].a->idy, 0);
|
jpayne@69
|
1031
|
jpayne@69
|
1032 lis[i].from = -1;
|
jpayne@69
|
1033 lis[i].diff = 0;
|
jpayne@69
|
1034
|
jpayne@69
|
1035 for ( j = 0; j < i; ++ j )
|
jpayne@69
|
1036 {
|
jpayne@69
|
1037 if ( lis[j].used ) continue;
|
jpayne@69
|
1038 if ( lis[j].from >= 0 &&
|
jpayne@69
|
1039 lis[lis[j].from].a->hiR >= lis[i].a->loR )
|
jpayne@69
|
1040 continue;
|
jpayne@69
|
1041
|
jpayne@69
|
1042 leni = lis[i].a->hiR - lis[i].a->loR + 1;
|
jpayne@69
|
1043 lenj = lis[j].a->hiR - lis[j].a->loR + 1;
|
jpayne@69
|
1044 olap = lis[j].a->hiR - lis[i].a->loR + 1;
|
jpayne@69
|
1045 if ( olap < 0 )
|
jpayne@69
|
1046 olap = 0;
|
jpayne@69
|
1047
|
jpayne@69
|
1048 diff = lis[j].diff + DiffAligns (lis[i].a, lis[j].a);
|
jpayne@69
|
1049
|
jpayne@69
|
1050 score = ScoreLocal
|
jpayne@69
|
1051 (lis[j].score, leni, lenj, olap, lis[i].a->idy, maxolap);
|
jpayne@69
|
1052
|
jpayne@69
|
1053 if ( score > lis[i].score
|
jpayne@69
|
1054 ||
|
jpayne@69
|
1055 (score == lis[i].score && diff < lis[i].diff) )
|
jpayne@69
|
1056 {
|
jpayne@69
|
1057 lis[i].from = j;
|
jpayne@69
|
1058 lis[i].score = score;
|
jpayne@69
|
1059 lis[i].diff = diff;
|
jpayne@69
|
1060 }
|
jpayne@69
|
1061 }
|
jpayne@69
|
1062 }
|
jpayne@69
|
1063 } while ( UpdateBest (lis, n, allbest, epsilon) );
|
jpayne@69
|
1064
|
jpayne@69
|
1065 long beg = PickBest (lis, allbest, epsilon);
|
jpayne@69
|
1066 long end = allbest.size();
|
jpayne@69
|
1067 if ( beg == end ) beg = 0;
|
jpayne@69
|
1068 else end = beg + 1;
|
jpayne@69
|
1069
|
jpayne@69
|
1070 //-- Flag the edgelets
|
jpayne@69
|
1071 for ( ; beg < end; ++ beg )
|
jpayne@69
|
1072 for ( i = allbest[beg]; i >= 0 && i < n; i = lis[i].from )
|
jpayne@69
|
1073 lis[i].a->isRLIS = true;
|
jpayne@69
|
1074
|
jpayne@69
|
1075 if ( flagbad )
|
jpayne@69
|
1076 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
|
jpayne@69
|
1077 if ( ! (*eli)->isRLIS )
|
jpayne@69
|
1078 (*eli)->isGOOD = false;
|
jpayne@69
|
1079 }
|
jpayne@69
|
1080
|
jpayne@69
|
1081 free (lis);
|
jpayne@69
|
1082 }
|
jpayne@69
|
1083
|
jpayne@69
|
1084
|
jpayne@69
|
1085 //------------------------------------------------------------- flagScore ------
|
jpayne@69
|
1086 //! \brief Flag edgelets with scores below a score threshold
|
jpayne@69
|
1087 //!
|
jpayne@69
|
1088 //! Unsets isGOOD for bad.
|
jpayne@69
|
1089 //!
|
jpayne@69
|
1090 //! \param minlen Flag edgelets if less than minlen in length
|
jpayne@69
|
1091 //! \param minidy Flag edgelets if less than minidy identity [0-100]
|
jpayne@69
|
1092 //! \return void
|
jpayne@69
|
1093 //!
|
jpayne@69
|
1094 void DeltaGraph_t::flagScore (long minlen, float minidy)
|
jpayne@69
|
1095 {
|
jpayne@69
|
1096 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
1097 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
1098 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
1099
|
jpayne@69
|
1100 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
1101 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
1102 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
1103 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
1104 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
1105 if ( (*eli)->isGOOD )
|
jpayne@69
|
1106 {
|
jpayne@69
|
1107 //-- Flag low identities
|
jpayne@69
|
1108 if ( (*eli)->idy * 100.0 < minidy )
|
jpayne@69
|
1109 (*eli)->isGOOD = false;
|
jpayne@69
|
1110
|
jpayne@69
|
1111 //-- Flag small lengths
|
jpayne@69
|
1112 if ( (*eli)->hiR - (*eli)->loR + 1 < minlen ||
|
jpayne@69
|
1113 (*eli)->hiQ - (*eli)->loQ + 1 < minlen )
|
jpayne@69
|
1114 (*eli)->isGOOD = false;
|
jpayne@69
|
1115 }
|
jpayne@69
|
1116 }
|
jpayne@69
|
1117
|
jpayne@69
|
1118
|
jpayne@69
|
1119 //-------------------------------------------------------------- flagUNIQ ------
|
jpayne@69
|
1120 //! \brief Flag edgelets with uniqueness below a certain threshold
|
jpayne@69
|
1121 //!
|
jpayne@69
|
1122 //! Unsets isGOOD for bad.
|
jpayne@69
|
1123 //!
|
jpayne@69
|
1124 //! \param minuniq Flag edgelets if less that minuniq percent [0-100] unique
|
jpayne@69
|
1125 //! \return void
|
jpayne@69
|
1126 //!
|
jpayne@69
|
1127 void DeltaGraph_t::flagUNIQ (float minuniq)
|
jpayne@69
|
1128 {
|
jpayne@69
|
1129 long i, uniq, len;
|
jpayne@69
|
1130
|
jpayne@69
|
1131 vector<DeltaEdgelet_t *> edgelets;
|
jpayne@69
|
1132
|
jpayne@69
|
1133 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
1134 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
1135 vector<DeltaEdgelet_t *>::iterator eli;
|
jpayne@69
|
1136
|
jpayne@69
|
1137
|
jpayne@69
|
1138 //-- For each reference sequence
|
jpayne@69
|
1139 long ref_size = 0;
|
jpayne@69
|
1140 long ref_len = 0;
|
jpayne@69
|
1141 unsigned char * ref_cov = NULL;
|
jpayne@69
|
1142 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
1143 {
|
jpayne@69
|
1144 //-- Reset the reference coverage array
|
jpayne@69
|
1145 ref_len = (mi->second).len;
|
jpayne@69
|
1146 if ( ref_len > ref_size )
|
jpayne@69
|
1147 {
|
jpayne@69
|
1148 ref_cov = (unsigned char *) Safe_realloc (ref_cov, ref_len + 1);
|
jpayne@69
|
1149 ref_size = ref_len;
|
jpayne@69
|
1150 }
|
jpayne@69
|
1151 for ( i = 1; i <= ref_len; ++ i )
|
jpayne@69
|
1152 ref_cov[i] = 0;
|
jpayne@69
|
1153
|
jpayne@69
|
1154 //-- Collect all the good edgelets
|
jpayne@69
|
1155 edgelets.clear();
|
jpayne@69
|
1156 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
1157 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
1158 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
1159 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
1160 if ( (*eli)->isGOOD )
|
jpayne@69
|
1161 {
|
jpayne@69
|
1162 edgelets.push_back (*eli);
|
jpayne@69
|
1163
|
jpayne@69
|
1164 //-- Add to the reference coverage
|
jpayne@69
|
1165 for ( i = (*eli)->loR; i <= (*eli)->hiR; i ++ )
|
jpayne@69
|
1166 if ( ref_cov[i] < UCHAR_MAX )
|
jpayne@69
|
1167 ref_cov[i] ++;
|
jpayne@69
|
1168 }
|
jpayne@69
|
1169
|
jpayne@69
|
1170 //-- Calculate the uniqueness of each edgelet
|
jpayne@69
|
1171 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
|
jpayne@69
|
1172 {
|
jpayne@69
|
1173 uniq = 0;
|
jpayne@69
|
1174 len = (*eli)->hiR - (*eli)->loR + 1;
|
jpayne@69
|
1175 for ( i = (*eli)->loR; i <= (*eli)->hiR; i ++ )
|
jpayne@69
|
1176 if ( ref_cov[i] == 1 )
|
jpayne@69
|
1177 uniq ++;
|
jpayne@69
|
1178
|
jpayne@69
|
1179 //-- Flag low reference uniqueness
|
jpayne@69
|
1180 if ( (float)uniq / (float)len * 100.0 < minuniq )
|
jpayne@69
|
1181 (*eli)->isGOOD = false;
|
jpayne@69
|
1182 }
|
jpayne@69
|
1183 }
|
jpayne@69
|
1184 free (ref_cov);
|
jpayne@69
|
1185
|
jpayne@69
|
1186
|
jpayne@69
|
1187 //-- For each query sequence
|
jpayne@69
|
1188 long qry_size = 0;
|
jpayne@69
|
1189 long qry_len = 0;
|
jpayne@69
|
1190 unsigned char * qry_cov = NULL;
|
jpayne@69
|
1191 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
|
jpayne@69
|
1192 {
|
jpayne@69
|
1193 //-- Reset the query coverage array
|
jpayne@69
|
1194 qry_len = (mi->second).len;
|
jpayne@69
|
1195 if ( qry_len > qry_size )
|
jpayne@69
|
1196 {
|
jpayne@69
|
1197 qry_cov = (unsigned char *) Safe_realloc (qry_cov, qry_len + 1);
|
jpayne@69
|
1198 qry_size = qry_len;
|
jpayne@69
|
1199 }
|
jpayne@69
|
1200 for ( i = 1; i <= qry_len; ++ i )
|
jpayne@69
|
1201 qry_cov[i] = 0;
|
jpayne@69
|
1202
|
jpayne@69
|
1203 //-- Collect all the good edgelets
|
jpayne@69
|
1204 edgelets.clear();
|
jpayne@69
|
1205 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
1206 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
1207 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
1208 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
1209 if ( (*eli)->isGOOD )
|
jpayne@69
|
1210 {
|
jpayne@69
|
1211 edgelets.push_back (*eli);
|
jpayne@69
|
1212
|
jpayne@69
|
1213 //-- Add to the query coverage
|
jpayne@69
|
1214 for ( i = (*eli)->loQ; i <= (*eli)->hiQ; i ++ )
|
jpayne@69
|
1215 if ( qry_cov[i] < UCHAR_MAX )
|
jpayne@69
|
1216 qry_cov[i] ++;
|
jpayne@69
|
1217 }
|
jpayne@69
|
1218
|
jpayne@69
|
1219 //-- Calculate the uniqueness of each edgelet
|
jpayne@69
|
1220 for ( eli = edgelets.begin(); eli != edgelets.end(); ++ eli )
|
jpayne@69
|
1221 {
|
jpayne@69
|
1222 uniq = 0;
|
jpayne@69
|
1223 len = (*eli)->hiQ - (*eli)->loQ + 1;
|
jpayne@69
|
1224 for ( i = (*eli)->loQ; i <= (*eli)->hiQ; i ++ )
|
jpayne@69
|
1225 if ( qry_cov[i] == 1 )
|
jpayne@69
|
1226 uniq ++;
|
jpayne@69
|
1227
|
jpayne@69
|
1228 //-- Flag low query uniqueness
|
jpayne@69
|
1229 if ( (float)uniq / (float)len * 100.0 < minuniq )
|
jpayne@69
|
1230 (*eli)->isGOOD = false;
|
jpayne@69
|
1231 }
|
jpayne@69
|
1232 }
|
jpayne@69
|
1233 free (qry_cov);
|
jpayne@69
|
1234 }
|
jpayne@69
|
1235
|
jpayne@69
|
1236
|
jpayne@69
|
1237 //----------------------------------------------------- loadSequences ----------
|
jpayne@69
|
1238 //! \brief Load the sequence information into the DeltaNodes
|
jpayne@69
|
1239 //!
|
jpayne@69
|
1240 //! \return void
|
jpayne@69
|
1241 //!
|
jpayne@69
|
1242 void DeltaGraph_t::loadSequences ()
|
jpayne@69
|
1243 {
|
jpayne@69
|
1244 map<string, DeltaNode_t>::iterator mi;
|
jpayne@69
|
1245
|
jpayne@69
|
1246 FILE * qryfile, * reffile;
|
jpayne@69
|
1247 char * R = NULL;
|
jpayne@69
|
1248 char * Q = NULL;
|
jpayne@69
|
1249 char id [MAX_LINE];
|
jpayne@69
|
1250 long initsize;
|
jpayne@69
|
1251 long len;
|
jpayne@69
|
1252
|
jpayne@69
|
1253 //-- Read in the reference sequences
|
jpayne@69
|
1254 reffile = File_Open (refpath.c_str(), "r");
|
jpayne@69
|
1255 initsize = INIT_SIZE;
|
jpayne@69
|
1256 R = (char *) Safe_malloc (initsize);
|
jpayne@69
|
1257 while ( Read_String (reffile, R, initsize, id, FALSE) )
|
jpayne@69
|
1258 if ( (mi = refnodes.find (id)) != refnodes.end() )
|
jpayne@69
|
1259 {
|
jpayne@69
|
1260 len = strlen (R + 1);
|
jpayne@69
|
1261 mi->second.seq = (char *) Safe_malloc (len + 2);
|
jpayne@69
|
1262 mi->second.seq[0] = '\0';
|
jpayne@69
|
1263 strcpy (mi->second.seq + 1, R + 1);
|
jpayne@69
|
1264 if ( len != mi->second.len )
|
jpayne@69
|
1265 {
|
jpayne@69
|
1266 cerr << "ERROR: Reference input does not match delta file\n";
|
jpayne@69
|
1267 exit (EXIT_FAILURE);
|
jpayne@69
|
1268 }
|
jpayne@69
|
1269 }
|
jpayne@69
|
1270 fclose (reffile);
|
jpayne@69
|
1271 free (R);
|
jpayne@69
|
1272
|
jpayne@69
|
1273 //-- Read in the query sequences
|
jpayne@69
|
1274 qryfile = File_Open (qrypath.c_str(), "r");
|
jpayne@69
|
1275 initsize = INIT_SIZE;
|
jpayne@69
|
1276 Q = (char *) Safe_malloc (initsize);
|
jpayne@69
|
1277 while ( Read_String (qryfile, Q, initsize, id, FALSE) )
|
jpayne@69
|
1278 if ( (mi = qrynodes.find (id)) != qrynodes.end() )
|
jpayne@69
|
1279 {
|
jpayne@69
|
1280 len = strlen (Q + 1);
|
jpayne@69
|
1281 mi->second.seq = (char *) Safe_malloc (len + 2);
|
jpayne@69
|
1282 mi->second.seq[0] = '\0';
|
jpayne@69
|
1283 strcpy (mi->second.seq + 1, Q + 1);
|
jpayne@69
|
1284 if ( len != mi->second.len )
|
jpayne@69
|
1285 {
|
jpayne@69
|
1286 cerr << "ERROR: Query input does not match delta file\n";
|
jpayne@69
|
1287 exit (EXIT_FAILURE);
|
jpayne@69
|
1288 }
|
jpayne@69
|
1289 }
|
jpayne@69
|
1290 fclose (qryfile);
|
jpayne@69
|
1291 free (Q);
|
jpayne@69
|
1292
|
jpayne@69
|
1293
|
jpayne@69
|
1294 //-- Check that we found all the sequences
|
jpayne@69
|
1295 for ( mi = refnodes.begin(); mi != refnodes.end(); ++ mi )
|
jpayne@69
|
1296 if ( mi->second.seq == NULL )
|
jpayne@69
|
1297 {
|
jpayne@69
|
1298 cerr << "ERROR: '" << mi->first << "' not found in reference file\n";
|
jpayne@69
|
1299 exit (EXIT_FAILURE);
|
jpayne@69
|
1300 }
|
jpayne@69
|
1301
|
jpayne@69
|
1302 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
|
jpayne@69
|
1303 if ( mi->second.seq == NULL )
|
jpayne@69
|
1304 {
|
jpayne@69
|
1305 cerr << "ERROR: '" << mi->first << "' not found in query file\n";
|
jpayne@69
|
1306 exit (EXIT_FAILURE);
|
jpayne@69
|
1307 }
|
jpayne@69
|
1308 }
|
jpayne@69
|
1309
|
jpayne@69
|
1310
|
jpayne@69
|
1311 //----------------------------------------------------- outputDelta ------------
|
jpayne@69
|
1312 //! \brief Outputs the contents of the graph as a deltafile
|
jpayne@69
|
1313 //!
|
jpayne@69
|
1314 //! \param out The output stream to write to
|
jpayne@69
|
1315 //! \return The output stream
|
jpayne@69
|
1316 //!
|
jpayne@69
|
1317 ostream & DeltaGraph_t::outputDelta (ostream & out)
|
jpayne@69
|
1318 {
|
jpayne@69
|
1319 bool header;
|
jpayne@69
|
1320 long s1, e1, s2, e2;
|
jpayne@69
|
1321
|
jpayne@69
|
1322 map<string, DeltaNode_t>::const_iterator mi;
|
jpayne@69
|
1323 vector<DeltaEdge_t *>::const_iterator ei;
|
jpayne@69
|
1324 vector<DeltaEdgelet_t *>::const_iterator eli;
|
jpayne@69
|
1325
|
jpayne@69
|
1326 //-- Print the file header
|
jpayne@69
|
1327 cout
|
jpayne@69
|
1328 << refpath << ' ' << qrypath << '\n'
|
jpayne@69
|
1329 << (datatype == PROMER_DATA ? PROMER_STRING : NUCMER_STRING) << '\n';
|
jpayne@69
|
1330
|
jpayne@69
|
1331 for ( mi = qrynodes.begin(); mi != qrynodes.end(); ++ mi )
|
jpayne@69
|
1332 {
|
jpayne@69
|
1333 for ( ei = (mi->second).edges.begin();
|
jpayne@69
|
1334 ei != (mi->second).edges.end(); ++ ei )
|
jpayne@69
|
1335 {
|
jpayne@69
|
1336 header = false;
|
jpayne@69
|
1337
|
jpayne@69
|
1338 for ( eli = (*ei)->edgelets.begin();
|
jpayne@69
|
1339 eli != (*ei)->edgelets.end(); ++ eli )
|
jpayne@69
|
1340 {
|
jpayne@69
|
1341 if ( ! (*eli)->isGOOD )
|
jpayne@69
|
1342 continue;
|
jpayne@69
|
1343
|
jpayne@69
|
1344 //-- Print the sequence header
|
jpayne@69
|
1345 if ( ! header )
|
jpayne@69
|
1346 {
|
jpayne@69
|
1347 cout
|
jpayne@69
|
1348 << '>'
|
jpayne@69
|
1349 << *((*ei)->refnode->id) << ' '
|
jpayne@69
|
1350 << *((*ei)->qrynode->id) << ' '
|
jpayne@69
|
1351 << (*ei)->refnode->len << ' '
|
jpayne@69
|
1352 << (*ei)->qrynode->len << '\n';
|
jpayne@69
|
1353 header = true;
|
jpayne@69
|
1354 }
|
jpayne@69
|
1355 //-- Print the alignment
|
jpayne@69
|
1356 s1 = (*eli)->loR;
|
jpayne@69
|
1357 e1 = (*eli)->hiR;
|
jpayne@69
|
1358 s2 = (*eli)->loQ;
|
jpayne@69
|
1359 e2 = (*eli)->hiQ;
|
jpayne@69
|
1360 if ( (*eli)->dirR == REVERSE_DIR )
|
jpayne@69
|
1361 Swap (s1, e1);
|
jpayne@69
|
1362 if ( (*eli)->dirQ == REVERSE_DIR )
|
jpayne@69
|
1363 Swap (s2, e2);
|
jpayne@69
|
1364
|
jpayne@69
|
1365 cout
|
jpayne@69
|
1366 << s1 << ' ' << e1 << ' ' << s2 << ' ' << e2 << ' '
|
jpayne@69
|
1367 << (*eli)->idyc << ' '
|
jpayne@69
|
1368 << (*eli)->simc << ' '
|
jpayne@69
|
1369 << (*eli)->stpc << '\n'
|
jpayne@69
|
1370 << (*eli)->delta;
|
jpayne@69
|
1371 }
|
jpayne@69
|
1372 }
|
jpayne@69
|
1373 }
|
jpayne@69
|
1374 return out;
|
jpayne@69
|
1375 }
|