Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 # Copyright 2002 by Jeffrey Chang. | |
2 # All rights reserved. | |
3 # | |
4 # This file is part of the Biopython distribution and governed by your | |
5 # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
6 # Please see the LICENSE file that should have been included as part of this | |
7 # package. | |
8 """Code for doing logistic regressions (DEPRECATED). | |
9 | |
10 Classes: | |
11 - LogisticRegression Holds information for a LogisticRegression classifier. | |
12 | |
13 Functions: | |
14 - train Train a new classifier. | |
15 - calculate Calculate the probabilities of each class, given an observation. | |
16 - classify Classify an observation into a class. | |
17 | |
18 This module has been deprecated, please consider an alternative like scikit-learn | |
19 insead. | |
20 """ | |
21 | |
22 import warnings | |
23 from Bio import BiopythonDeprecationWarning | |
24 | |
25 warnings.warn( | |
26 "The 'Bio.LogisticRegression' module is deprecated and will be removed in a future " | |
27 "release of Biopython. Consider using scikit-learn instead.", | |
28 BiopythonDeprecationWarning, | |
29 ) | |
30 | |
31 try: | |
32 import numpy as np | |
33 import numpy.linalg | |
34 except ImportError: | |
35 from Bio import MissingPythonDependencyError | |
36 | |
37 raise MissingPythonDependencyError( | |
38 "Please install NumPy if you want to use Bio.LogisticRegression. " | |
39 "See http://www.numpy.org/" | |
40 ) from None | |
41 | |
42 | |
43 class LogisticRegression: | |
44 """Holds information necessary to do logistic regression classification. | |
45 | |
46 Attributes: | |
47 - beta - List of the weights for each dimension. | |
48 | |
49 """ | |
50 | |
51 def __init__(self): | |
52 """Initialize the class.""" | |
53 self.beta = [] | |
54 | |
55 | |
56 def train(xs, ys, update_fn=None, typecode=None): | |
57 """Train a logistic regression classifier on a training set. | |
58 | |
59 Argument xs is a list of observations and ys is a list of the class | |
60 assignments, which should be 0 or 1. xs and ys should contain the | |
61 same number of elements. update_fn is an optional callback function | |
62 that takes as parameters that iteration number and log likelihood. | |
63 """ | |
64 if len(xs) != len(ys): | |
65 raise ValueError("xs and ys should be the same length.") | |
66 classes = set(ys) | |
67 if classes != {0, 1}: | |
68 raise ValueError("Classes should be 0's and 1's") | |
69 if typecode is None: | |
70 typecode = "d" | |
71 | |
72 # Dimensionality of the data is the dimensionality of the | |
73 # observations plus a constant dimension. | |
74 N, ndims = len(xs), len(xs[0]) + 1 | |
75 if N == 0 or ndims == 1: | |
76 raise ValueError("No observations or observation of 0 dimension.") | |
77 | |
78 # Make an X array, with a constant first dimension. | |
79 X = np.ones((N, ndims), typecode) | |
80 X[:, 1:] = xs | |
81 Xt = np.transpose(X) | |
82 y = np.asarray(ys, typecode) | |
83 | |
84 # Initialize the beta parameter to 0. | |
85 beta = np.zeros(ndims, typecode) | |
86 | |
87 MAX_ITERATIONS = 500 | |
88 CONVERGE_THRESHOLD = 0.01 | |
89 stepsize = 1.0 | |
90 # Now iterate using Newton-Raphson until the log-likelihoods | |
91 # converge. | |
92 i = 0 | |
93 old_beta = old_llik = None | |
94 while i < MAX_ITERATIONS: | |
95 # Calculate the probabilities. p = e^(beta X) / (1+e^(beta X)) | |
96 ebetaX = np.exp(np.dot(beta, Xt)) | |
97 p = ebetaX / (1 + ebetaX) | |
98 | |
99 # Find the log likelihood score and see if I've converged. | |
100 logp = y * np.log(p) + (1 - y) * np.log(1 - p) | |
101 llik = sum(logp) | |
102 if update_fn is not None: | |
103 update_fn(iter, llik) | |
104 if old_llik is not None: | |
105 # Check to see if the likelihood decreased. If it did, then | |
106 # restore the old beta parameters and half the step size. | |
107 if llik < old_llik: | |
108 stepsize /= 2.0 | |
109 beta = old_beta | |
110 # If I've converged, then stop. | |
111 if np.fabs(llik - old_llik) <= CONVERGE_THRESHOLD: | |
112 break | |
113 old_llik, old_beta = llik, beta | |
114 i += 1 | |
115 | |
116 W = np.identity(N) * p | |
117 Xtyp = np.dot(Xt, y - p) # Calculate the first derivative. | |
118 XtWX = np.dot(np.dot(Xt, W), X) # Calculate the second derivative. | |
119 delta = numpy.linalg.solve(XtWX, Xtyp) | |
120 if np.fabs(stepsize - 1.0) > 0.001: | |
121 delta *= stepsize | |
122 beta += delta # Update beta. | |
123 else: | |
124 raise RuntimeError("Didn't converge.") | |
125 | |
126 lr = LogisticRegression() | |
127 lr.beta = list(beta) | |
128 return lr | |
129 | |
130 | |
131 def calculate(lr, x): | |
132 """Calculate the probability for each class. | |
133 | |
134 Arguments: | |
135 - lr is a LogisticRegression object. | |
136 - x is the observed data. | |
137 | |
138 Returns a list of the probability that it fits each class. | |
139 """ | |
140 # Insert a constant term for x. | |
141 x = np.asarray([1.0] + x) | |
142 # Calculate the probability. p = e^(beta X) / (1+e^(beta X)) | |
143 ebetaX = np.exp(np.dot(lr.beta, x)) | |
144 p = ebetaX / (1 + ebetaX) | |
145 return [1 - p, p] | |
146 | |
147 | |
148 def classify(lr, x): | |
149 """Classify an observation into a class.""" | |
150 probs = calculate(lr, x) | |
151 if probs[0] > probs[1]: | |
152 return 0 | |
153 return 1 |