comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 # Copyright 2002 by Jeffrey Chang.
2 # All rights reserved.
3 #
4 # This file is part of the Biopython distribution and governed by your
5 # choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
6 # Please see the LICENSE file that should have been included as part of this
7 # package.
8 """Code for doing logistic regressions (DEPRECATED).
9
10 Classes:
11 - LogisticRegression Holds information for a LogisticRegression classifier.
12
13 Functions:
14 - train Train a new classifier.
15 - calculate Calculate the probabilities of each class, given an observation.
16 - classify Classify an observation into a class.
17
18 This module has been deprecated, please consider an alternative like scikit-learn
19 insead.
20 """
21
22 import warnings
23 from Bio import BiopythonDeprecationWarning
24
25 warnings.warn(
26 "The 'Bio.LogisticRegression' module is deprecated and will be removed in a future "
27 "release of Biopython. Consider using scikit-learn instead.",
28 BiopythonDeprecationWarning,
29 )
30
31 try:
32 import numpy as np
33 import numpy.linalg
34 except ImportError:
35 from Bio import MissingPythonDependencyError
36
37 raise MissingPythonDependencyError(
38 "Please install NumPy if you want to use Bio.LogisticRegression. "
39 "See http://www.numpy.org/"
40 ) from None
41
42
43 class LogisticRegression:
44 """Holds information necessary to do logistic regression classification.
45
46 Attributes:
47 - beta - List of the weights for each dimension.
48
49 """
50
51 def __init__(self):
52 """Initialize the class."""
53 self.beta = []
54
55
56 def train(xs, ys, update_fn=None, typecode=None):
57 """Train a logistic regression classifier on a training set.
58
59 Argument xs is a list of observations and ys is a list of the class
60 assignments, which should be 0 or 1. xs and ys should contain the
61 same number of elements. update_fn is an optional callback function
62 that takes as parameters that iteration number and log likelihood.
63 """
64 if len(xs) != len(ys):
65 raise ValueError("xs and ys should be the same length.")
66 classes = set(ys)
67 if classes != {0, 1}:
68 raise ValueError("Classes should be 0's and 1's")
69 if typecode is None:
70 typecode = "d"
71
72 # Dimensionality of the data is the dimensionality of the
73 # observations plus a constant dimension.
74 N, ndims = len(xs), len(xs[0]) + 1
75 if N == 0 or ndims == 1:
76 raise ValueError("No observations or observation of 0 dimension.")
77
78 # Make an X array, with a constant first dimension.
79 X = np.ones((N, ndims), typecode)
80 X[:, 1:] = xs
81 Xt = np.transpose(X)
82 y = np.asarray(ys, typecode)
83
84 # Initialize the beta parameter to 0.
85 beta = np.zeros(ndims, typecode)
86
87 MAX_ITERATIONS = 500
88 CONVERGE_THRESHOLD = 0.01
89 stepsize = 1.0
90 # Now iterate using Newton-Raphson until the log-likelihoods
91 # converge.
92 i = 0
93 old_beta = old_llik = None
94 while i < MAX_ITERATIONS:
95 # Calculate the probabilities. p = e^(beta X) / (1+e^(beta X))
96 ebetaX = np.exp(np.dot(beta, Xt))
97 p = ebetaX / (1 + ebetaX)
98
99 # Find the log likelihood score and see if I've converged.
100 logp = y * np.log(p) + (1 - y) * np.log(1 - p)
101 llik = sum(logp)
102 if update_fn is not None:
103 update_fn(iter, llik)
104 if old_llik is not None:
105 # Check to see if the likelihood decreased. If it did, then
106 # restore the old beta parameters and half the step size.
107 if llik < old_llik:
108 stepsize /= 2.0
109 beta = old_beta
110 # If I've converged, then stop.
111 if np.fabs(llik - old_llik) <= CONVERGE_THRESHOLD:
112 break
113 old_llik, old_beta = llik, beta
114 i += 1
115
116 W = np.identity(N) * p
117 Xtyp = np.dot(Xt, y - p) # Calculate the first derivative.
118 XtWX = np.dot(np.dot(Xt, W), X) # Calculate the second derivative.
119 delta = numpy.linalg.solve(XtWX, Xtyp)
120 if np.fabs(stepsize - 1.0) > 0.001:
121 delta *= stepsize
122 beta += delta # Update beta.
123 else:
124 raise RuntimeError("Didn't converge.")
125
126 lr = LogisticRegression()
127 lr.beta = list(beta)
128 return lr
129
130
131 def calculate(lr, x):
132 """Calculate the probability for each class.
133
134 Arguments:
135 - lr is a LogisticRegression object.
136 - x is the observed data.
137
138 Returns a list of the probability that it fits each class.
139 """
140 # Insert a constant term for x.
141 x = np.asarray([1.0] + x)
142 # Calculate the probability. p = e^(beta X) / (1+e^(beta X))
143 ebetaX = np.exp(np.dot(lr.beta, x))
144 p = ebetaX / (1 + ebetaX)
145 return [1 - p, p]
146
147
148 def classify(lr, x):
149 """Classify an observation into a class."""
150 probs = calculate(lr, x)
151 if probs[0] > probs[1]:
152 return 0
153 return 1