Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py Tue Mar 18 17:55:14 2025 -0400 @@ -0,0 +1,153 @@ +# Copyright 2002 by Jeffrey Chang. +# All rights reserved. +# +# This file is part of the Biopython distribution and governed by your +# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". +# Please see the LICENSE file that should have been included as part of this +# package. +"""Code for doing logistic regressions (DEPRECATED). + +Classes: + - LogisticRegression Holds information for a LogisticRegression classifier. + +Functions: + - train Train a new classifier. + - calculate Calculate the probabilities of each class, given an observation. + - classify Classify an observation into a class. + +This module has been deprecated, please consider an alternative like scikit-learn +insead. +""" + +import warnings +from Bio import BiopythonDeprecationWarning + +warnings.warn( + "The 'Bio.LogisticRegression' module is deprecated and will be removed in a future " + "release of Biopython. Consider using scikit-learn instead.", + BiopythonDeprecationWarning, +) + +try: + import numpy as np + import numpy.linalg +except ImportError: + from Bio import MissingPythonDependencyError + + raise MissingPythonDependencyError( + "Please install NumPy if you want to use Bio.LogisticRegression. " + "See http://www.numpy.org/" + ) from None + + +class LogisticRegression: + """Holds information necessary to do logistic regression classification. + + Attributes: + - beta - List of the weights for each dimension. + + """ + + def __init__(self): + """Initialize the class.""" + self.beta = [] + + +def train(xs, ys, update_fn=None, typecode=None): + """Train a logistic regression classifier on a training set. + + Argument xs is a list of observations and ys is a list of the class + assignments, which should be 0 or 1. xs and ys should contain the + same number of elements. update_fn is an optional callback function + that takes as parameters that iteration number and log likelihood. + """ + if len(xs) != len(ys): + raise ValueError("xs and ys should be the same length.") + classes = set(ys) + if classes != {0, 1}: + raise ValueError("Classes should be 0's and 1's") + if typecode is None: + typecode = "d" + + # Dimensionality of the data is the dimensionality of the + # observations plus a constant dimension. + N, ndims = len(xs), len(xs[0]) + 1 + if N == 0 or ndims == 1: + raise ValueError("No observations or observation of 0 dimension.") + + # Make an X array, with a constant first dimension. + X = np.ones((N, ndims), typecode) + X[:, 1:] = xs + Xt = np.transpose(X) + y = np.asarray(ys, typecode) + + # Initialize the beta parameter to 0. + beta = np.zeros(ndims, typecode) + + MAX_ITERATIONS = 500 + CONVERGE_THRESHOLD = 0.01 + stepsize = 1.0 + # Now iterate using Newton-Raphson until the log-likelihoods + # converge. + i = 0 + old_beta = old_llik = None + while i < MAX_ITERATIONS: + # Calculate the probabilities. p = e^(beta X) / (1+e^(beta X)) + ebetaX = np.exp(np.dot(beta, Xt)) + p = ebetaX / (1 + ebetaX) + + # Find the log likelihood score and see if I've converged. + logp = y * np.log(p) + (1 - y) * np.log(1 - p) + llik = sum(logp) + if update_fn is not None: + update_fn(iter, llik) + if old_llik is not None: + # Check to see if the likelihood decreased. If it did, then + # restore the old beta parameters and half the step size. + if llik < old_llik: + stepsize /= 2.0 + beta = old_beta + # If I've converged, then stop. + if np.fabs(llik - old_llik) <= CONVERGE_THRESHOLD: + break + old_llik, old_beta = llik, beta + i += 1 + + W = np.identity(N) * p + Xtyp = np.dot(Xt, y - p) # Calculate the first derivative. + XtWX = np.dot(np.dot(Xt, W), X) # Calculate the second derivative. + delta = numpy.linalg.solve(XtWX, Xtyp) + if np.fabs(stepsize - 1.0) > 0.001: + delta *= stepsize + beta += delta # Update beta. + else: + raise RuntimeError("Didn't converge.") + + lr = LogisticRegression() + lr.beta = list(beta) + return lr + + +def calculate(lr, x): + """Calculate the probability for each class. + + Arguments: + - lr is a LogisticRegression object. + - x is the observed data. + + Returns a list of the probability that it fits each class. + """ + # Insert a constant term for x. + x = np.asarray([1.0] + x) + # Calculate the probability. p = e^(beta X) / (1+e^(beta X)) + ebetaX = np.exp(np.dot(lr.beta, x)) + p = ebetaX / (1 + ebetaX) + return [1 - p, p] + + +def classify(lr, x): + """Classify an observation into a class.""" + probs = calculate(lr, x) + if probs[0] > probs[1]: + return 0 + return 1