csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 16:23:26 -0400
parents
children

rev	line source
jpayne@68	1 # Copyright 2002 by Jeffrey Chang.
jpayne@68	2 # All rights reserved.
jpayne@68	3 #
jpayne@68	4 # This file is part of the Biopython distribution and governed by your
jpayne@68	5 # choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
jpayne@68	6 # Please see the LICENSE file that should have been included as part of this
jpayne@68	7 # package.
jpayne@68	8 """Code for doing logistic regressions (DEPRECATED).
jpayne@68	9
jpayne@68	10 Classes:
jpayne@68	11 - LogisticRegression Holds information for a LogisticRegression classifier.
jpayne@68	12
jpayne@68	13 Functions:
jpayne@68	14 - train Train a new classifier.
jpayne@68	15 - calculate Calculate the probabilities of each class, given an observation.
jpayne@68	16 - classify Classify an observation into a class.
jpayne@68	17
jpayne@68	18 This module has been deprecated, please consider an alternative like scikit-learn
jpayne@68	19 insead.
jpayne@68	20 """
jpayne@68	21
jpayne@68	22 import warnings
jpayne@68	23 from Bio import BiopythonDeprecationWarning
jpayne@68	24
jpayne@68	25 warnings.warn(
jpayne@68	26 "The 'Bio.LogisticRegression' module is deprecated and will be removed in a future "
jpayne@68	27 "release of Biopython. Consider using scikit-learn instead.",
jpayne@68	28 BiopythonDeprecationWarning,
jpayne@68	29 )
jpayne@68	30
jpayne@68	31 try:
jpayne@68	32 import numpy as np
jpayne@68	33 import numpy.linalg
jpayne@68	34 except ImportError:
jpayne@68	35 from Bio import MissingPythonDependencyError
jpayne@68	36
jpayne@68	37 raise MissingPythonDependencyError(
jpayne@68	38 "Please install NumPy if you want to use Bio.LogisticRegression. "
jpayne@68	39 "See http://www.numpy.org/"
jpayne@68	40 ) from None
jpayne@68	41
jpayne@68	42
jpayne@68	43 class LogisticRegression:
jpayne@68	44 """Holds information necessary to do logistic regression classification.
jpayne@68	45
jpayne@68	46 Attributes:
jpayne@68	47 - beta - List of the weights for each dimension.
jpayne@68	48
jpayne@68	49 """
jpayne@68	50
jpayne@68	51 def __init__(self):
jpayne@68	52 """Initialize the class."""
jpayne@68	53 self.beta = []
jpayne@68	54
jpayne@68	55
jpayne@68	56 def train(xs, ys, update_fn=None, typecode=None):
jpayne@68	57 """Train a logistic regression classifier on a training set.
jpayne@68	58
jpayne@68	59 Argument xs is a list of observations and ys is a list of the class
jpayne@68	60 assignments, which should be 0 or 1. xs and ys should contain the
jpayne@68	61 same number of elements. update_fn is an optional callback function
jpayne@68	62 that takes as parameters that iteration number and log likelihood.
jpayne@68	63 """
jpayne@68	64 if len(xs) != len(ys):
jpayne@68	65 raise ValueError("xs and ys should be the same length.")
jpayne@68	66 classes = set(ys)
jpayne@68	67 if classes != {0, 1}:
jpayne@68	68 raise ValueError("Classes should be 0's and 1's")
jpayne@68	69 if typecode is None:
jpayne@68	70 typecode = "d"
jpayne@68	71
jpayne@68	72 # Dimensionality of the data is the dimensionality of the
jpayne@68	73 # observations plus a constant dimension.
jpayne@68	74 N, ndims = len(xs), len(xs[0]) + 1
jpayne@68	75 if N == 0 or ndims == 1:
jpayne@68	76 raise ValueError("No observations or observation of 0 dimension.")
jpayne@68	77
jpayne@68	78 # Make an X array, with a constant first dimension.
jpayne@68	79 X = np.ones((N, ndims), typecode)
jpayne@68	80 X[:, 1:] = xs
jpayne@68	81 Xt = np.transpose(X)
jpayne@68	82 y = np.asarray(ys, typecode)
jpayne@68	83
jpayne@68	84 # Initialize the beta parameter to 0.
jpayne@68	85 beta = np.zeros(ndims, typecode)
jpayne@68	86
jpayne@68	87 MAX_ITERATIONS = 500
jpayne@68	88 CONVERGE_THRESHOLD = 0.01
jpayne@68	89 stepsize = 1.0
jpayne@68	90 # Now iterate using Newton-Raphson until the log-likelihoods
jpayne@68	91 # converge.
jpayne@68	92 i = 0
jpayne@68	93 old_beta = old_llik = None
jpayne@68	94 while i < MAX_ITERATIONS:
jpayne@68	95 # Calculate the probabilities. p = e^(beta X) / (1+e^(beta X))
jpayne@68	96 ebetaX = np.exp(np.dot(beta, Xt))
jpayne@68	97 p = ebetaX / (1 + ebetaX)
jpayne@68	98
jpayne@68	99 # Find the log likelihood score and see if I've converged.
jpayne@68	100 logp = y * np.log(p) + (1 - y) * np.log(1 - p)
jpayne@68	101 llik = sum(logp)
jpayne@68	102 if update_fn is not None:
jpayne@68	103 update_fn(iter, llik)
jpayne@68	104 if old_llik is not None:
jpayne@68	105 # Check to see if the likelihood decreased. If it did, then
jpayne@68	106 # restore the old beta parameters and half the step size.
jpayne@68	107 if llik < old_llik:
jpayne@68	108 stepsize /= 2.0
jpayne@68	109 beta = old_beta
jpayne@68	110 # If I've converged, then stop.
jpayne@68	111 if np.fabs(llik - old_llik) <= CONVERGE_THRESHOLD:
jpayne@68	112 break
jpayne@68	113 old_llik, old_beta = llik, beta
jpayne@68	114 i += 1
jpayne@68	115
jpayne@68	116 W = np.identity(N) * p
jpayne@68	117 Xtyp = np.dot(Xt, y - p) # Calculate the first derivative.
jpayne@68	118 XtWX = np.dot(np.dot(Xt, W), X) # Calculate the second derivative.
jpayne@68	119 delta = numpy.linalg.solve(XtWX, Xtyp)
jpayne@68	120 if np.fabs(stepsize - 1.0) > 0.001:
jpayne@68	121 delta *= stepsize
jpayne@68	122 beta += delta # Update beta.
jpayne@68	123 else:
jpayne@68	124 raise RuntimeError("Didn't converge.")
jpayne@68	125
jpayne@68	126 lr = LogisticRegression()
jpayne@68	127 lr.beta = list(beta)
jpayne@68	128 return lr
jpayne@68	129
jpayne@68	130
jpayne@68	131 def calculate(lr, x):
jpayne@68	132 """Calculate the probability for each class.
jpayne@68	133
jpayne@68	134 Arguments:
jpayne@68	135 - lr is a LogisticRegression object.
jpayne@68	136 - x is the observed data.
jpayne@68	137
jpayne@68	138 Returns a list of the probability that it fits each class.
jpayne@68	139 """
jpayne@68	140 # Insert a constant term for x.
jpayne@68	141 x = np.asarray([1.0] + x)
jpayne@68	142 # Calculate the probability. p = e^(beta X) / (1+e^(beta X))
jpayne@68	143 ebetaX = np.exp(np.dot(lr.beta, x))
jpayne@68	144 p = ebetaX / (1 + ebetaX)
jpayne@68	145 return [1 - p, p]
jpayne@68	146
jpayne@68	147
jpayne@68	148 def classify(lr, x):
jpayne@68	149 """Classify an observation into a class."""
jpayne@68	150 probs = calculate(lr, x)
jpayne@68	151 if probs[0] > probs[1]:
jpayne@68	152 return 0
jpayne@68	153 return 1

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/LogisticRegression.py @ 68:5028fdace37b