Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/MarkovModel.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 # Copyright 2002 by Jeffrey Chang. | |
2 # All rights reserved. | |
3 # | |
4 # This file is part of the Biopython distribution and governed by your | |
5 # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
6 # Please see the LICENSE file that should have been included as part of this | |
7 # package. | |
8 """A state-emitting MarkovModel. | |
9 | |
10 Note terminology similar to Manning and Schutze is used. | |
11 | |
12 | |
13 Functions: | |
14 train_bw Train a markov model using the Baum-Welch algorithm. | |
15 train_visible Train a visible markov model using MLE. | |
16 find_states Find the a state sequence that explains some observations. | |
17 | |
18 load Load a MarkovModel. | |
19 save Save a MarkovModel. | |
20 | |
21 Classes: | |
22 MarkovModel Holds the description of a markov model | |
23 """ | |
24 | |
25 import warnings | |
26 from Bio import BiopythonDeprecationWarning | |
27 | |
28 warnings.warn( | |
29 "The 'Bio.MarkovModel' module is deprecated and will be removed in a " | |
30 "future release of Biopython. Consider using the hmmlearn package instead.", | |
31 BiopythonDeprecationWarning, | |
32 ) | |
33 | |
34 | |
35 try: | |
36 import numpy as np | |
37 except ImportError: | |
38 from Bio import MissingPythonDependencyError | |
39 | |
40 raise MissingPythonDependencyError( | |
41 "Please install NumPy if you want to use Bio.MarkovModel. " | |
42 "See http://www.numpy.org/" | |
43 ) from None | |
44 | |
45 logaddexp = np.logaddexp | |
46 | |
47 | |
48 def itemindex(values): | |
49 """Return a dictionary of values with their sequence offset as keys.""" | |
50 d = {} | |
51 entries = enumerate(values[::-1]) | |
52 n = len(values) - 1 | |
53 for index, key in entries: | |
54 d[key] = n - index | |
55 return d | |
56 | |
57 | |
58 np.random.seed() | |
59 | |
60 VERY_SMALL_NUMBER = 1e-300 | |
61 LOG0 = np.log(VERY_SMALL_NUMBER) | |
62 | |
63 | |
64 class MarkovModel: | |
65 """Create a state-emitting MarkovModel object.""" | |
66 | |
67 def __init__( | |
68 self, states, alphabet, p_initial=None, p_transition=None, p_emission=None | |
69 ): | |
70 """Initialize the class.""" | |
71 self.states = states | |
72 self.alphabet = alphabet | |
73 self.p_initial = p_initial | |
74 self.p_transition = p_transition | |
75 self.p_emission = p_emission | |
76 | |
77 def __str__(self): | |
78 """Create a string representation of the MarkovModel object.""" | |
79 from io import StringIO | |
80 | |
81 handle = StringIO() | |
82 save(self, handle) | |
83 handle.seek(0) | |
84 return handle.read() | |
85 | |
86 | |
87 def _readline_and_check_start(handle, start): | |
88 """Read the first line and evaluate that begisn with the correct start (PRIVATE).""" | |
89 line = handle.readline() | |
90 if not line.startswith(start): | |
91 raise ValueError(f"I expected {start!r} but got {line!r}") | |
92 return line | |
93 | |
94 | |
95 def load(handle): | |
96 """Parse a file handle into a MarkovModel object.""" | |
97 # Load the states. | |
98 line = _readline_and_check_start(handle, "STATES:") | |
99 states = line.split()[1:] | |
100 | |
101 # Load the alphabet. | |
102 line = _readline_and_check_start(handle, "ALPHABET:") | |
103 alphabet = line.split()[1:] | |
104 | |
105 mm = MarkovModel(states, alphabet) | |
106 N, M = len(states), len(alphabet) | |
107 | |
108 # Load the initial probabilities. | |
109 mm.p_initial = np.zeros(N) | |
110 line = _readline_and_check_start(handle, "INITIAL:") | |
111 for i in range(len(states)): | |
112 line = _readline_and_check_start(handle, f" {states[i]}:") | |
113 mm.p_initial[i] = float(line.split()[-1]) | |
114 | |
115 # Load the transition. | |
116 mm.p_transition = np.zeros((N, N)) | |
117 line = _readline_and_check_start(handle, "TRANSITION:") | |
118 for i in range(len(states)): | |
119 line = _readline_and_check_start(handle, f" {states[i]}:") | |
120 mm.p_transition[i, :] = [float(v) for v in line.split()[1:]] | |
121 | |
122 # Load the emission. | |
123 mm.p_emission = np.zeros((N, M)) | |
124 line = _readline_and_check_start(handle, "EMISSION:") | |
125 for i in range(len(states)): | |
126 line = _readline_and_check_start(handle, f" {states[i]}:") | |
127 mm.p_emission[i, :] = [float(v) for v in line.split()[1:]] | |
128 | |
129 return mm | |
130 | |
131 | |
132 def save(mm, handle): | |
133 """Save MarkovModel object into handle.""" | |
134 # This will fail if there are spaces in the states or alphabet. | |
135 w = handle.write | |
136 w(f"STATES: {' '.join(mm.states)}\n") | |
137 w(f"ALPHABET: {' '.join(mm.alphabet)}\n") | |
138 w("INITIAL:\n") | |
139 for i in range(len(mm.p_initial)): | |
140 w(f" {mm.states[i]}: {mm.p_initial[i]:g}\n") | |
141 w("TRANSITION:\n") | |
142 for i in range(len(mm.p_transition)): | |
143 w(f" {mm.states[i]}: {' '.join(str(x) for x in mm.p_transition[i])}\n") | |
144 w("EMISSION:\n") | |
145 for i in range(len(mm.p_emission)): | |
146 w(f" {mm.states[i]}: {' '.join(str(x) for x in mm.p_emission[i])}\n") | |
147 | |
148 | |
149 # XXX allow them to specify starting points | |
150 def train_bw( | |
151 states, | |
152 alphabet, | |
153 training_data, | |
154 pseudo_initial=None, | |
155 pseudo_transition=None, | |
156 pseudo_emission=None, | |
157 update_fn=None, | |
158 ): | |
159 """Train a MarkovModel using the Baum-Welch algorithm. | |
160 | |
161 Train a MarkovModel using the Baum-Welch algorithm. states is a list | |
162 of strings that describe the names of each state. alphabet is a | |
163 list of objects that indicate the allowed outputs. training_data | |
164 is a list of observations. Each observation is a list of objects | |
165 from the alphabet. | |
166 | |
167 pseudo_initial, pseudo_transition, and pseudo_emission are | |
168 optional parameters that you can use to assign pseudo-counts to | |
169 different matrices. They should be matrices of the appropriate | |
170 size that contain numbers to add to each parameter matrix, before | |
171 normalization. | |
172 | |
173 update_fn is an optional callback that takes parameters | |
174 (iteration, log_likelihood). It is called once per iteration. | |
175 """ | |
176 N, M = len(states), len(alphabet) | |
177 if not training_data: | |
178 raise ValueError("No training data given.") | |
179 if pseudo_initial is not None: | |
180 pseudo_initial = np.asarray(pseudo_initial) | |
181 if pseudo_initial.shape != (N,): | |
182 raise ValueError("pseudo_initial not shape len(states)") | |
183 if pseudo_transition is not None: | |
184 pseudo_transition = np.asarray(pseudo_transition) | |
185 if pseudo_transition.shape != (N, N): | |
186 raise ValueError("pseudo_transition not shape len(states) X len(states)") | |
187 if pseudo_emission is not None: | |
188 pseudo_emission = np.asarray(pseudo_emission) | |
189 if pseudo_emission.shape != (N, M): | |
190 raise ValueError("pseudo_emission not shape len(states) X len(alphabet)") | |
191 | |
192 # Training data is given as a list of members of the alphabet. | |
193 # Replace those with indexes into the alphabet list for easier | |
194 # computation. | |
195 training_outputs = [] | |
196 indexes = itemindex(alphabet) | |
197 for outputs in training_data: | |
198 training_outputs.append([indexes[x] for x in outputs]) | |
199 | |
200 # Do some sanity checking on the outputs. | |
201 lengths = [len(x) for x in training_outputs] | |
202 if min(lengths) == 0: | |
203 raise ValueError("I got training data with outputs of length 0") | |
204 | |
205 # Do the training with baum welch. | |
206 x = _baum_welch( | |
207 N, | |
208 M, | |
209 training_outputs, | |
210 pseudo_initial=pseudo_initial, | |
211 pseudo_transition=pseudo_transition, | |
212 pseudo_emission=pseudo_emission, | |
213 update_fn=update_fn, | |
214 ) | |
215 p_initial, p_transition, p_emission = x | |
216 return MarkovModel(states, alphabet, p_initial, p_transition, p_emission) | |
217 | |
218 | |
219 MAX_ITERATIONS = 1000 | |
220 | |
221 | |
222 def _baum_welch( | |
223 N, | |
224 M, | |
225 training_outputs, | |
226 p_initial=None, | |
227 p_transition=None, | |
228 p_emission=None, | |
229 pseudo_initial=None, | |
230 pseudo_transition=None, | |
231 pseudo_emission=None, | |
232 update_fn=None, | |
233 ): | |
234 """Implement the Baum-Welch algorithm to evaluate unknown parameters in the MarkovModel object (PRIVATE).""" | |
235 if p_initial is None: | |
236 p_initial = _random_norm(N) | |
237 else: | |
238 p_initial = _copy_and_check(p_initial, (N,)) | |
239 | |
240 if p_transition is None: | |
241 p_transition = _random_norm((N, N)) | |
242 else: | |
243 p_transition = _copy_and_check(p_transition, (N, N)) | |
244 if p_emission is None: | |
245 p_emission = _random_norm((N, M)) | |
246 else: | |
247 p_emission = _copy_and_check(p_emission, (N, M)) | |
248 | |
249 # Do all the calculations in log space to avoid underflows. | |
250 lp_initial = np.log(p_initial) | |
251 lp_transition = np.log(p_transition) | |
252 lp_emission = np.log(p_emission) | |
253 if pseudo_initial is not None: | |
254 lpseudo_initial = np.log(pseudo_initial) | |
255 else: | |
256 lpseudo_initial = None | |
257 if pseudo_transition is not None: | |
258 lpseudo_transition = np.log(pseudo_transition) | |
259 else: | |
260 lpseudo_transition = None | |
261 if pseudo_emission is not None: | |
262 lpseudo_emission = np.log(pseudo_emission) | |
263 else: | |
264 lpseudo_emission = None | |
265 | |
266 # Iterate through each sequence of output, updating the parameters | |
267 # to the HMM. Stop when the log likelihoods of the sequences | |
268 # stops varying. | |
269 prev_llik = None | |
270 for i in range(MAX_ITERATIONS): | |
271 llik = LOG0 | |
272 for outputs in training_outputs: | |
273 llik += _baum_welch_one( | |
274 N, | |
275 M, | |
276 outputs, | |
277 lp_initial, | |
278 lp_transition, | |
279 lp_emission, | |
280 lpseudo_initial, | |
281 lpseudo_transition, | |
282 lpseudo_emission, | |
283 ) | |
284 if update_fn is not None: | |
285 update_fn(i, llik) | |
286 if prev_llik is not None and np.fabs(prev_llik - llik) < 0.1: | |
287 break | |
288 prev_llik = llik | |
289 else: | |
290 raise RuntimeError("HMM did not converge in %d iterations" % MAX_ITERATIONS) | |
291 | |
292 # Return everything back in normal space. | |
293 return [np.exp(_) for _ in (lp_initial, lp_transition, lp_emission)] | |
294 | |
295 | |
296 def _baum_welch_one( | |
297 N, | |
298 M, | |
299 outputs, | |
300 lp_initial, | |
301 lp_transition, | |
302 lp_emission, | |
303 lpseudo_initial, | |
304 lpseudo_transition, | |
305 lpseudo_emission, | |
306 ): | |
307 """Execute one step for Baum-Welch algorithm (PRIVATE). | |
308 | |
309 Do one iteration of Baum-Welch based on a sequence of output. | |
310 Changes the value for lp_initial, lp_transition and lp_emission in place. | |
311 """ | |
312 T = len(outputs) | |
313 fmat = _forward(N, T, lp_initial, lp_transition, lp_emission, outputs) | |
314 bmat = _backward(N, T, lp_transition, lp_emission, outputs) | |
315 | |
316 # Calculate the probability of traversing each arc for any given | |
317 # transition. | |
318 lp_arc = np.zeros((N, N, T)) | |
319 for t in range(T): | |
320 k = outputs[t] | |
321 lp_traverse = np.zeros((N, N)) # P going over one arc. | |
322 for i in range(N): | |
323 for j in range(N): | |
324 # P(getting to this arc) | |
325 # P(making this transition) | |
326 # P(emitting this character) | |
327 # P(going to the end) | |
328 lp = ( | |
329 fmat[i][t] | |
330 + lp_transition[i][j] | |
331 + lp_emission[i][k] | |
332 + bmat[j][t + 1] | |
333 ) | |
334 lp_traverse[i][j] = lp | |
335 # Normalize the probability for this time step. | |
336 lp_arc[:, :, t] = lp_traverse - _logsum(lp_traverse) | |
337 | |
338 # Sum of all the transitions out of state i at time t. | |
339 lp_arcout_t = np.zeros((N, T)) | |
340 for t in range(T): | |
341 for i in range(N): | |
342 lp_arcout_t[i][t] = _logsum(lp_arc[i, :, t]) | |
343 | |
344 # Sum of all the transitions out of state i. | |
345 lp_arcout = np.zeros(N) | |
346 for i in range(N): | |
347 lp_arcout[i] = _logsum(lp_arcout_t[i, :]) | |
348 | |
349 # UPDATE P_INITIAL. | |
350 lp_initial = lp_arcout_t[:, 0] | |
351 if lpseudo_initial is not None: | |
352 lp_initial = _logvecadd(lp_initial, lpseudo_initial) | |
353 lp_initial = lp_initial - _logsum(lp_initial) | |
354 | |
355 # UPDATE P_TRANSITION. p_transition[i][j] is the sum of all the | |
356 # transitions from i to j, normalized by the sum of the | |
357 # transitions out of i. | |
358 for i in range(N): | |
359 for j in range(N): | |
360 lp_transition[i][j] = _logsum(lp_arc[i, j, :]) - lp_arcout[i] | |
361 if lpseudo_transition is not None: | |
362 lp_transition[i] = _logvecadd(lp_transition[i], lpseudo_transition) | |
363 lp_transition[i] = lp_transition[i] - _logsum(lp_transition[i]) | |
364 | |
365 # UPDATE P_EMISSION. lp_emission[i][k] is the sum of all the | |
366 # transitions out of i when k is observed, divided by the sum of | |
367 # the transitions out of i. | |
368 for i in range(N): | |
369 ksum = np.zeros(M) + LOG0 # ksum[k] is the sum of all i with k. | |
370 for t in range(T): | |
371 k = outputs[t] | |
372 for j in range(N): | |
373 ksum[k] = logaddexp(ksum[k], lp_arc[i, j, t]) | |
374 ksum = ksum - _logsum(ksum) # Normalize | |
375 if lpseudo_emission is not None: | |
376 ksum = _logvecadd(ksum, lpseudo_emission[i]) | |
377 ksum = ksum - _logsum(ksum) # Renormalize | |
378 lp_emission[i, :] = ksum | |
379 | |
380 # Calculate the log likelihood of the output based on the forward | |
381 # matrix. Since the parameters of the HMM has changed, the log | |
382 # likelihoods are going to be a step behind, and we might be doing | |
383 # one extra iteration of training. The alternative is to rerun | |
384 # the _forward algorithm and calculate from the clean one, but | |
385 # that may be more expensive than overshooting the training by one | |
386 # step. | |
387 return _logsum(fmat[:, T]) | |
388 | |
389 | |
390 def _forward(N, T, lp_initial, lp_transition, lp_emission, outputs): | |
391 """Implement forward algorithm (PRIVATE). | |
392 | |
393 Calculate a Nx(T+1) matrix, where the last column is the total | |
394 probability of the output. | |
395 """ | |
396 matrix = np.zeros((N, T + 1)) | |
397 | |
398 # Initialize the first column to be the initial values. | |
399 matrix[:, 0] = lp_initial | |
400 for t in range(1, T + 1): | |
401 k = outputs[t - 1] | |
402 for j in range(N): | |
403 # The probability of the state is the sum of the | |
404 # transitions from all the states from time t-1. | |
405 lprob = LOG0 | |
406 for i in range(N): | |
407 lp = matrix[i][t - 1] + lp_transition[i][j] + lp_emission[i][k] | |
408 lprob = logaddexp(lprob, lp) | |
409 matrix[j][t] = lprob | |
410 return matrix | |
411 | |
412 | |
413 def _backward(N, T, lp_transition, lp_emission, outputs): | |
414 """Implement backward algorithm (PRIVATE).""" | |
415 matrix = np.zeros((N, T + 1)) | |
416 for t in range(T - 1, -1, -1): | |
417 k = outputs[t] | |
418 for i in range(N): | |
419 # The probability of the state is the sum of the | |
420 # transitions from all the states from time t+1. | |
421 lprob = LOG0 | |
422 for j in range(N): | |
423 lp = matrix[j][t + 1] + lp_transition[i][j] + lp_emission[i][k] | |
424 lprob = logaddexp(lprob, lp) | |
425 matrix[i][t] = lprob | |
426 return matrix | |
427 | |
428 | |
429 def train_visible( | |
430 states, | |
431 alphabet, | |
432 training_data, | |
433 pseudo_initial=None, | |
434 pseudo_transition=None, | |
435 pseudo_emission=None, | |
436 ): | |
437 """Train a visible MarkovModel using maximum likelihoood estimates for each of the parameters. | |
438 | |
439 Train a visible MarkovModel using maximum likelihoood estimates | |
440 for each of the parameters. states is a list of strings that | |
441 describe the names of each state. alphabet is a list of objects | |
442 that indicate the allowed outputs. training_data is a list of | |
443 (outputs, observed states) where outputs is a list of the emission | |
444 from the alphabet, and observed states is a list of states from | |
445 states. | |
446 | |
447 pseudo_initial, pseudo_transition, and pseudo_emission are | |
448 optional parameters that you can use to assign pseudo-counts to | |
449 different matrices. They should be matrices of the appropriate | |
450 size that contain numbers to add to each parameter matrix. | |
451 """ | |
452 N, M = len(states), len(alphabet) | |
453 if pseudo_initial is not None: | |
454 pseudo_initial = np.asarray(pseudo_initial) | |
455 if pseudo_initial.shape != (N,): | |
456 raise ValueError("pseudo_initial not shape len(states)") | |
457 if pseudo_transition is not None: | |
458 pseudo_transition = np.asarray(pseudo_transition) | |
459 if pseudo_transition.shape != (N, N): | |
460 raise ValueError("pseudo_transition not shape len(states) X len(states)") | |
461 if pseudo_emission is not None: | |
462 pseudo_emission = np.asarray(pseudo_emission) | |
463 if pseudo_emission.shape != (N, M): | |
464 raise ValueError("pseudo_emission not shape len(states) X len(alphabet)") | |
465 | |
466 # Training data is given as a list of members of the alphabet. | |
467 # Replace those with indexes into the alphabet list for easier | |
468 # computation. | |
469 training_states, training_outputs = [], [] | |
470 states_indexes = itemindex(states) | |
471 outputs_indexes = itemindex(alphabet) | |
472 for toutputs, tstates in training_data: | |
473 if len(tstates) != len(toutputs): | |
474 raise ValueError("states and outputs not aligned") | |
475 training_states.append([states_indexes[x] for x in tstates]) | |
476 training_outputs.append([outputs_indexes[x] for x in toutputs]) | |
477 | |
478 x = _mle( | |
479 N, | |
480 M, | |
481 training_outputs, | |
482 training_states, | |
483 pseudo_initial, | |
484 pseudo_transition, | |
485 pseudo_emission, | |
486 ) | |
487 p_initial, p_transition, p_emission = x | |
488 | |
489 return MarkovModel(states, alphabet, p_initial, p_transition, p_emission) | |
490 | |
491 | |
492 def _mle( | |
493 N, | |
494 M, | |
495 training_outputs, | |
496 training_states, | |
497 pseudo_initial, | |
498 pseudo_transition, | |
499 pseudo_emission, | |
500 ): | |
501 """Implement Maximum likelihood estimation algorithm (PRIVATE).""" | |
502 # p_initial is the probability that a sequence of states starts | |
503 # off with a particular one. | |
504 p_initial = np.zeros(N) | |
505 if pseudo_initial: | |
506 p_initial = p_initial + pseudo_initial | |
507 for states in training_states: | |
508 p_initial[states[0]] += 1 | |
509 p_initial = _normalize(p_initial) | |
510 | |
511 # p_transition is the probability that a state leads to the next | |
512 # one. C(i,j)/C(i) where i and j are states. | |
513 p_transition = np.zeros((N, N)) | |
514 if pseudo_transition: | |
515 p_transition = p_transition + pseudo_transition | |
516 for states in training_states: | |
517 for n in range(len(states) - 1): | |
518 i, j = states[n], states[n + 1] | |
519 p_transition[i, j] += 1 | |
520 for i in range(len(p_transition)): | |
521 p_transition[i, :] = p_transition[i, :] / sum(p_transition[i, :]) | |
522 | |
523 # p_emission is the probability of an output given a state. | |
524 # C(s,o)|C(s) where o is an output and s is a state. | |
525 p_emission = np.zeros((N, M)) | |
526 if pseudo_emission: | |
527 p_emission = p_emission + pseudo_emission | |
528 p_emission = np.ones((N, M)) | |
529 for outputs, states in zip(training_outputs, training_states): | |
530 for o, s in zip(outputs, states): | |
531 p_emission[s, o] += 1 | |
532 for i in range(len(p_emission)): | |
533 p_emission[i, :] = p_emission[i, :] / sum(p_emission[i, :]) | |
534 | |
535 return p_initial, p_transition, p_emission | |
536 | |
537 | |
538 def _argmaxes(vector, allowance=None): | |
539 """Return indices of the maximum values aong the vector (PRIVATE).""" | |
540 return [np.argmax(vector)] | |
541 | |
542 | |
543 def find_states(markov_model, output): | |
544 """Find states in the given Markov model output. | |
545 | |
546 Returns a list of (states, score) tuples. | |
547 """ | |
548 mm = markov_model | |
549 N = len(mm.states) | |
550 | |
551 # _viterbi does calculations in log space. Add a tiny bit to the | |
552 # matrices so that the logs will not break. | |
553 lp_initial = np.log(mm.p_initial + VERY_SMALL_NUMBER) | |
554 lp_transition = np.log(mm.p_transition + VERY_SMALL_NUMBER) | |
555 lp_emission = np.log(mm.p_emission + VERY_SMALL_NUMBER) | |
556 # Change output into a list of indexes into the alphabet. | |
557 indexes = itemindex(mm.alphabet) | |
558 output = [indexes[x] for x in output] | |
559 | |
560 # Run the viterbi algorithm. | |
561 results = _viterbi(N, lp_initial, lp_transition, lp_emission, output) | |
562 | |
563 for i in range(len(results)): | |
564 states, score = results[i] | |
565 results[i] = [mm.states[x] for x in states], np.exp(score) | |
566 return results | |
567 | |
568 | |
569 def _viterbi(N, lp_initial, lp_transition, lp_emission, output): | |
570 """Implement Viterbi algorithm to find most likely states for a given input (PRIVATE).""" | |
571 T = len(output) | |
572 # Store the backtrace in a NxT matrix. | |
573 backtrace = [] # list of indexes of states in previous timestep. | |
574 for i in range(N): | |
575 backtrace.append([None] * T) | |
576 | |
577 # Store the best scores. | |
578 scores = np.zeros((N, T)) | |
579 scores[:, 0] = lp_initial + lp_emission[:, output[0]] | |
580 for t in range(1, T): | |
581 k = output[t] | |
582 for j in range(N): | |
583 # Find the most likely place it came from. | |
584 i_scores = scores[:, t - 1] + lp_transition[:, j] + lp_emission[j, k] | |
585 indexes = _argmaxes(i_scores) | |
586 scores[j, t] = i_scores[indexes[0]] | |
587 backtrace[j][t] = indexes | |
588 | |
589 # Do the backtrace. First, find a good place to start. Then, | |
590 # we'll follow the backtrace matrix to find the list of states. | |
591 # In the event of ties, there may be multiple paths back through | |
592 # the matrix, which implies a recursive solution. We'll simulate | |
593 # it by keeping our own stack. | |
594 in_process = [] # list of (t, states, score) | |
595 results = [] # return values. list of (states, score) | |
596 indexes = _argmaxes(scores[:, T - 1]) # pick the first place | |
597 for i in indexes: | |
598 in_process.append((T - 1, [i], scores[i][T - 1])) | |
599 while in_process: | |
600 t, states, score = in_process.pop() | |
601 if t == 0: | |
602 results.append((states, score)) | |
603 else: | |
604 indexes = backtrace[states[0]][t] | |
605 for i in indexes: | |
606 in_process.append((t - 1, [i] + states, score)) | |
607 return results | |
608 | |
609 | |
610 def _normalize(matrix): | |
611 """Normalize matrix object (PRIVATE).""" | |
612 if len(matrix.shape) == 1: | |
613 matrix = matrix / sum(matrix) | |
614 elif len(matrix.shape) == 2: | |
615 # Normalize by rows. | |
616 for i in range(len(matrix)): | |
617 matrix[i, :] = matrix[i, :] / sum(matrix[i, :]) | |
618 else: | |
619 raise ValueError("I cannot handle matrixes of that shape") | |
620 return matrix | |
621 | |
622 | |
623 def _uniform_norm(shape): | |
624 """Normalize a uniform matrix (PRIVATE).""" | |
625 matrix = np.ones(shape) | |
626 return _normalize(matrix) | |
627 | |
628 | |
629 def _random_norm(shape): | |
630 """Normalize a random matrix (PRIVATE).""" | |
631 matrix = np.random.random(shape) | |
632 return _normalize(matrix) | |
633 | |
634 | |
635 def _copy_and_check(matrix, desired_shape): | |
636 """Copy a matrix and check its dimension. Normalize at the end (PRIVATE).""" | |
637 # Copy the matrix. | |
638 matrix = np.array(matrix, copy=1) | |
639 # Check the dimensions. | |
640 if matrix.shape != desired_shape: | |
641 raise ValueError("Incorrect dimension") | |
642 # Make sure it's normalized. | |
643 if len(matrix.shape) == 1: | |
644 if np.fabs(sum(matrix) - 1.0) > 0.01: | |
645 raise ValueError("matrix not normalized to 1.0") | |
646 elif len(matrix.shape) == 2: | |
647 for i in range(len(matrix)): | |
648 if np.fabs(sum(matrix[i]) - 1.0) > 0.01: | |
649 raise ValueError("matrix %d not normalized to 1.0" % i) | |
650 else: | |
651 raise ValueError("I don't handle matrices > 2 dimensions") | |
652 return matrix | |
653 | |
654 | |
655 def _logsum(matrix): | |
656 """Implement logsum for a matrix object (PRIVATE).""" | |
657 if len(matrix.shape) > 1: | |
658 vec = np.reshape(matrix, (np.prod(matrix.shape),)) | |
659 else: | |
660 vec = matrix | |
661 sum = LOG0 | |
662 for num in vec: | |
663 sum = logaddexp(sum, num) | |
664 return sum | |
665 | |
666 | |
667 def _logvecadd(logvec1, logvec2): | |
668 """Implement a log sum for two vector objects (PRIVATE).""" | |
669 assert len(logvec1) == len(logvec2), "vectors aren't the same length" | |
670 sumvec = np.zeros(len(logvec1)) | |
671 for i in range(len(logvec1)): | |
672 sumvec[i] = logaddexp(logvec1[i], logvec2[i]) | |
673 return sumvec | |
674 | |
675 | |
676 def _exp_logsum(numbers): | |
677 """Return the exponential of a logsum (PRIVATE).""" | |
678 sum = _logsum(numbers) | |
679 return np.exp(sum) |