Mercurial > repos > rliterman > csp2

diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/Seq.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author: jpayne
date: Tue, 18 Mar 2025 17:55:14 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/Bio/Seq.py	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,3290 @@
+# Copyright 2000 Andrew Dalke.
+# Copyright 2000-2002 Brad Chapman.
+# Copyright 2004-2005, 2010 by M de Hoon.
+# Copyright 2007-2023 by Peter Cock.
+# All rights reserved.
+#
+# This file is part of the Biopython distribution and governed by your
+# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
+# Please see the LICENSE file that should have been included as part of this
+# package.
+"""Provide objects to represent biological sequences.
+
+See also the Seq_ wiki and the chapter in our tutorial:
+ - `HTML Tutorial`_
+ - `PDF Tutorial`_
+
+.. _Seq: http://biopython.org/wiki/Seq
+.. _`HTML Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.html
+.. _`PDF Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
+
+"""
+import array
+import collections
+import numbers
+import warnings
+
+from abc import ABC
+from abc import abstractmethod
+from typing import overload, Optional, Union, Dict
+
+from Bio import BiopythonWarning
+from Bio.Data import CodonTable
+from Bio.Data import IUPACData
+
+
+def _maketrans(complement_mapping):
+    """Make a python string translation table (PRIVATE).
+
+    Arguments:
+     - complement_mapping - a dictionary such as ambiguous_dna_complement
+       and ambiguous_rna_complement from Data.IUPACData.
+
+    Returns a translation table (a bytes object of length 256) for use with
+    the python string's translate method to use in a (reverse) complement.
+
+    Compatible with lower case and upper case sequences.
+
+    For internal use only.
+    """
+    keys = "".join(complement_mapping.keys()).encode("ASCII")
+    values = "".join(complement_mapping.values()).encode("ASCII")
+    return bytes.maketrans(keys + keys.lower(), values + values.lower())
+
+
+ambiguous_dna_complement = dict(IUPACData.ambiguous_dna_complement)
+ambiguous_dna_complement["U"] = ambiguous_dna_complement["T"]
+_dna_complement_table = _maketrans(ambiguous_dna_complement)
+del ambiguous_dna_complement
+ambiguous_rna_complement = dict(IUPACData.ambiguous_rna_complement)
+ambiguous_rna_complement["T"] = ambiguous_rna_complement["U"]
+_rna_complement_table = _maketrans(ambiguous_rna_complement)
+del ambiguous_rna_complement
+
+
+class SequenceDataAbstractBaseClass(ABC):
+    """Abstract base class for sequence content providers.
+
+    Most users will not need to use this class. It is used internally as a base
+    class for sequence content provider classes such as _UndefinedSequenceData
+    defined in this module, and _TwoBitSequenceData in Bio.SeqIO.TwoBitIO.
+    Instances of these classes can be used instead of a ``bytes`` object as the
+    data argument when creating a Seq object, and provide the sequence content
+    only when requested via ``__getitem__``. This allows lazy parsers to load
+    and parse sequence data from a file only for the requested sequence regions,
+    and _UndefinedSequenceData instances to raise an exception when undefined
+    sequence data are requested.
+
+    Future implementations of lazy parsers that similarly provide on-demand
+    parsing of sequence data should use a subclass of this abstract class and
+    implement the abstract methods ``__len__`` and ``__getitem__``:
+
+    * ``__len__`` must return the sequence length;
+    * ``__getitem__`` must return
+
+      * a ``bytes`` object for the requested region; or
+      * a new instance of the subclass for the requested region; or
+      * raise an ``UndefinedSequenceError``.
+
+      Calling ``__getitem__`` for a sequence region of size zero should always
+      return an empty ``bytes`` object.
+      Calling ``__getitem__`` for the full sequence (as in data[:]) should
+      either return a ``bytes`` object with the full sequence, or raise an
+      ``UndefinedSequenceError``.
+
+    Subclasses of SequenceDataAbstractBaseClass must call ``super().__init__()``
+    as part of their ``__init__`` method.
+    """
+
+    __slots__ = ()
+
+    def __init__(self):
+        """Check if ``__getitem__`` returns a bytes-like object."""
+        assert self[:0] == b""
+
+    @abstractmethod
+    def __len__(self):
+        pass
+
+    @abstractmethod
+    def __getitem__(self, key):
+        pass
+
+    def __bytes__(self):
+        return self[:]
+
+    def __hash__(self):
+        return hash(bytes(self))
+
+    def __eq__(self, other):
+        return bytes(self) == other
+
+    def __lt__(self, other):
+        return bytes(self) < other
+
+    def __le__(self, other):
+        return bytes(self) <= other
+
+    def __gt__(self, other):
+        return bytes(self) > other
+
+    def __ge__(self, other):
+        return bytes(self) >= other
+
+    def __add__(self, other):
+        try:
+            return bytes(self) + bytes(other)
+        except UndefinedSequenceError:
+            return NotImplemented
+            # will be handled by _UndefinedSequenceData.__radd__ or
+            # by _PartiallyDefinedSequenceData.__radd__
+
+    def __radd__(self, other):
+        return other + bytes(self)
+
+    def __mul__(self, other):
+        return other * bytes(self)
+
+    def __contains__(self, item):
+        return bytes(self).__contains__(item)
+
+    def decode(self, encoding="utf-8"):
+        """Decode the data as bytes using the codec registered for encoding.
+
+        encoding
+          The encoding with which to decode the bytes.
+        """
+        return bytes(self).decode(encoding)
+
+    def count(self, sub, start=None, end=None):
+        """Return the number of non-overlapping occurrences of sub in data[start:end].
+
+        Optional arguments start and end are interpreted as in slice notation.
+        This method behaves as the count method of Python strings.
+        """
+        return bytes(self).count(sub, start, end)
+
+    def find(self, sub, start=None, end=None):
+        """Return the lowest index in data where subsection sub is found.
+
+        Return the lowest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Return -1 on failure.
+        """
+        return bytes(self).find(sub, start, end)
+
+    def rfind(self, sub, start=None, end=None):
+        """Return the highest index in data where subsection sub is found.
+
+        Return the highest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Return -1 on failure.
+        """
+        return bytes(self).rfind(sub, start, end)
+
+    def index(self, sub, start=None, end=None):
+        """Return the lowest index in data where subsection sub is found.
+
+        Return the lowest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Raises ValueError when the subsection is not found.
+        """
+        return bytes(self).index(sub, start, end)
+
+    def rindex(self, sub, start=None, end=None):
+        """Return the highest index in data where subsection sub is found.
+
+        Return the highest index in data where subsection sub is found,
+        such that sub is contained within data[start,end].  Optional
+        arguments start and end are interpreted as in slice notation.
+
+        Raise ValueError when the subsection is not found.
+        """
+        return bytes(self).rindex(sub, start, end)
+
+    def startswith(self, prefix, start=None, end=None):
+        """Return True if data starts with the specified prefix, False otherwise.
+
+        With optional start, test data beginning at that position.
+        With optional end, stop comparing data at that position.
+        prefix can also be a tuple of bytes to try.
+        """
+        return bytes(self).startswith(prefix, start, end)
+
+    def endswith(self, suffix, start=None, end=None):
+        """Return True if data ends with the specified suffix, False otherwise.
+
+        With optional start, test data beginning at that position.
+        With optional end, stop comparing data at that position.
+        suffix can also be a tuple of bytes to try.
+        """
+        return bytes(self).endswith(suffix, start, end)
+
+    def split(self, sep=None, maxsplit=-1):
+        """Return a list of the sections in the data, using sep as the delimiter.
+
+        sep
+          The delimiter according which to split the data.
+          None (the default value) means split on ASCII whitespace characters
+          (space, tab, return, newline, formfeed, vertical tab).
+        maxsplit
+          Maximum number of splits to do.
+          -1 (the default value) means no limit.
+        """
+        return bytes(self).split(sep, maxsplit)
+
+    def rsplit(self, sep=None, maxsplit=-1):
+        """Return a list of the sections in the data, using sep as the delimiter.
+
+        sep
+          The delimiter according which to split the data.
+          None (the default value) means split on ASCII whitespace characters
+          (space, tab, return, newline, formfeed, vertical tab).
+        maxsplit
+          Maximum number of splits to do.
+          -1 (the default value) means no limit.
+
+        Splitting is done starting at the end of the data and working to the front.
+        """
+        return bytes(self).rsplit(sep, maxsplit)
+
+    def strip(self, chars=None):
+        """Strip leading and trailing characters contained in the argument.
+
+        If the argument is omitted or None, strip leading and trailing ASCII whitespace.
+        """
+        return bytes(self).strip(chars)
+
+    def lstrip(self, chars=None):
+        """Strip leading characters contained in the argument.
+
+        If the argument is omitted or None, strip leading ASCII whitespace.
+        """
+        return bytes(self).lstrip(chars)
+
+    def rstrip(self, chars=None):
+        """Strip trailing characters contained in the argument.
+
+        If the argument is omitted or None, strip trailing ASCII whitespace.
+        """
+        return bytes(self).rstrip(chars)
+
+    def removeprefix(self, prefix):
+        """Remove the prefix if present."""
+        # Want to do just this, but need Python 3.9+
+        # return bytes(self).removeprefix(prefix)
+        data = bytes(self)
+        try:
+            return data.removeprefix(prefix)
+        except AttributeError:
+            if data.startswith(prefix):
+                return data[len(prefix) :]
+            else:
+                return data
+
+    def removesuffix(self, suffix):
+        """Remove the suffix if present."""
+        # Want to do just this, but need Python 3.9+
+        # return bytes(self).removesuffix(suffix)
+        data = bytes(self)
+        try:
+            return data.removesuffix(suffix)
+        except AttributeError:
+            if data.startswith(suffix):
+                return data[: -len(suffix)]
+            else:
+                return data
+
+    def upper(self):
+        """Return a copy of data with all ASCII characters converted to uppercase."""
+        return bytes(self).upper()
+
+    def lower(self):
+        """Return a copy of data with all ASCII characters converted to lowercase."""
+        return bytes(self).lower()
+
+    def isupper(self):
+        """Return True if all ASCII characters in data are uppercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        return bytes(self).isupper()
+
+    def islower(self):
+        """Return True if all ASCII characters in data are lowercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        return bytes(self).islower()
+
+    def replace(self, old, new):
+        """Return a copy with all occurrences of substring old replaced by new."""
+        return bytes(self).replace(old, new)
+
+    def translate(self, table, delete=b""):
+        """Return a copy with each character mapped by the given translation table.
+
+          table
+            Translation table, which must be a bytes object of length 256.
+
+        All characters occurring in the optional argument delete are removed.
+        The remaining characters are mapped through the given translation table.
+        """
+        return bytes(self).translate(table, delete)
+
+    @property
+    def defined(self):
+        """Return True if the sequence is defined, False if undefined or partially defined.
+
+        Zero-length sequences are always considered to be defined.
+        """
+        return True
+
+    @property
+    def defined_ranges(self):
+        """Return a tuple of the ranges where the sequence contents is defined.
+
+        The return value has the format ((start1, end1), (start2, end2), ...).
+        """
+        length = len(self)
+        if length > 0:
+            return ((0, length),)
+        else:
+            return ()
+
+
+class _SeqAbstractBaseClass(ABC):
+    """Abstract base class for the Seq and MutableSeq classes (PRIVATE).
+
+    Most users will not need to use this class. It is used internally as an
+    abstract base class for Seq and MutableSeq, as most of their methods are
+    identical.
+    """
+
+    __slots__ = ("_data",)
+    __array_ufunc__ = None  # turn off numpy Ufuncs
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    def __bytes__(self):
+        return bytes(self._data)
+
+    def __repr__(self):
+        """Return (truncated) representation of the sequence."""
+        data = self._data
+        if isinstance(data, _UndefinedSequenceData):
+            return f"Seq(None, length={len(self)})"
+        if isinstance(data, _PartiallyDefinedSequenceData):
+            d = {}
+            for position, seq in data._data.items():
+                if len(seq) > 60:
+                    start = seq[:54].decode("ASCII")
+                    end = seq[-3:].decode("ASCII")
+                    seq = f"{start}...{end}"
+                else:
+                    seq = seq.decode("ASCII")
+                d[position] = seq
+            return "Seq(%r, length=%d)" % (d, len(self))
+        if len(data) > 60:
+            # Shows the last three letters as it is often useful to see if
+            # there is a stop codon at the end of a sequence.
+            # Note total length is 54+3+3=60
+            start = data[:54].decode("ASCII")
+            end = data[-3:].decode("ASCII")
+            return f"{self.__class__.__name__}('{start}...{end}')"
+        else:
+            data = data.decode("ASCII")
+            return f"{self.__class__.__name__}('{data}')"
+
+    def __str__(self):
+        """Return the full sequence as a python string."""
+        return self._data.decode("ASCII")
+
+    def __eq__(self, other):
+        """Compare the sequence to another sequence or a string.
+
+        Sequences are equal to each other if their sequence contents is
+        identical:
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> seq1 = Seq("ACGT")
+        >>> seq2 = Seq("ACGT")
+        >>> mutable_seq = MutableSeq("ACGT")
+        >>> seq1 == seq2
+        True
+        >>> seq1 == mutable_seq
+        True
+        >>> seq1 == "ACGT"
+        True
+
+        Note that the sequence objects themselves are not identical to each
+        other:
+
+        >>> id(seq1) == id(seq2)
+        False
+        >>> seq1 is seq2
+        False
+
+        Sequences can also be compared to strings, ``bytes``, and ``bytearray``
+        objects:
+
+        >>> seq1 == "ACGT"
+        True
+        >>> seq1 == b"ACGT"
+        True
+        >>> seq1 == bytearray(b"ACGT")
+        True
+        """
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data == other._data
+        elif isinstance(other, str):
+            return self._data == other.encode("ASCII")
+        else:
+            return self._data == other
+
+    def __lt__(self, other):
+        """Implement the less-than operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data < other._data
+        elif isinstance(other, str):
+            return self._data < other.encode("ASCII")
+        else:
+            return self._data < other
+
+    def __le__(self, other):
+        """Implement the less-than or equal operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data <= other._data
+        elif isinstance(other, str):
+            return self._data <= other.encode("ASCII")
+        else:
+            return self._data <= other
+
+    def __gt__(self, other):
+        """Implement the greater-than operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data > other._data
+        elif isinstance(other, str):
+            return self._data > other.encode("ASCII")
+        else:
+            return self._data > other
+
+    def __ge__(self, other):
+        """Implement the greater-than or equal operand."""
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self._data >= other._data
+        elif isinstance(other, str):
+            return self._data >= other.encode("ASCII")
+        else:
+            return self._data >= other
+
+    def __len__(self):
+        """Return the length of the sequence."""
+        return len(self._data)
+
+    def __iter__(self):
+        """Return an iterable of the sequence."""
+        return self._data.decode("ASCII").__iter__()
+
+    @overload
+    def __getitem__(self, index: int) -> str:
+        ...
+
+    @overload
+    def __getitem__(self, index: slice) -> "Seq":
+        ...
+
+    def __getitem__(self, index):
+        """Return a subsequence as a single letter or as a sequence object.
+
+        If the index is an integer, a single letter is returned as a Python
+        string:
+
+        >>> seq = Seq('ACTCGACGTCG')
+        >>> seq[5]
+        'A'
+
+        Otherwise, a new sequence object of the same class is returned:
+
+        >>> seq[5:8]
+        Seq('ACG')
+        >>> mutable_seq = MutableSeq('ACTCGACGTCG')
+        >>> mutable_seq[5:8]
+        MutableSeq('ACG')
+        """
+        if isinstance(index, numbers.Integral):
+            # Return a single letter as a string
+            return chr(self._data[index])
+        else:
+            # Return the (sub)sequence as another Seq/MutableSeq object
+            return self.__class__(self._data[index])
+
+    def __add__(self, other):
+        """Add a sequence or string to this sequence.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> Seq("MELKI") + "LV"
+        Seq('MELKILV')
+        >>> MutableSeq("MELKI") + "LV"
+        MutableSeq('MELKILV')
+        """
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self.__class__(self._data + other._data)
+        elif isinstance(other, str):
+            return self.__class__(self._data + other.encode("ASCII"))
+        else:
+            # If other is a SeqRecord, then SeqRecord's __radd__ will handle
+            # this. If not, returning NotImplemented will trigger a TypeError.
+            return NotImplemented
+
+    def __radd__(self, other):
+        """Add a sequence string on the left.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> "LV" + Seq("MELKI")
+        Seq('LVMELKI')
+        >>> "LV" + MutableSeq("MELKI")
+        MutableSeq('LVMELKI')
+
+        Adding two sequence objects is handled via the __add__ method.
+        """
+        if isinstance(other, str):
+            return self.__class__(other.encode("ASCII") + self._data)
+        else:
+            return NotImplemented
+
+    def __mul__(self, other):
+        """Multiply sequence by integer.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> Seq('ATG') * 2
+        Seq('ATGATG')
+        >>> MutableSeq('ATG') * 2
+        MutableSeq('ATGATG')
+        """
+        if not isinstance(other, numbers.Integral):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        # we would like to simply write
+        # data = self._data * other
+        # here, but currently that causes a bug on PyPy if self._data is a
+        # bytearray and other is a numpy integer. Using this workaround:
+        data = self._data.__mul__(other)
+        return self.__class__(data)
+
+    def __rmul__(self, other):
+        """Multiply integer by sequence.
+
+        >>> from Bio.Seq import Seq
+        >>> 2 * Seq('ATG')
+        Seq('ATGATG')
+        """
+        if not isinstance(other, numbers.Integral):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        # we would like to simply write
+        # data = self._data * other
+        # here, but currently that causes a bug on PyPy if self._data is a
+        # bytearray and other is a numpy integer. Using this workaround:
+        data = self._data.__mul__(other)
+        return self.__class__(data)
+
+    def __imul__(self, other):
+        """Multiply the sequence object by other and assign.
+
+        >>> from Bio.Seq import Seq
+        >>> seq = Seq('ATG')
+        >>> seq *= 2
+        >>> seq
+        Seq('ATGATG')
+
+        Note that this is different from in-place multiplication. The ``seq``
+        variable is reassigned to the multiplication result, but any variable
+        pointing to ``seq`` will remain unchanged:
+
+        >>> seq = Seq('ATG')
+        >>> seq2 = seq
+        >>> id(seq) == id(seq2)
+        True
+        >>> seq *= 2
+        >>> seq
+        Seq('ATGATG')
+        >>> seq2
+        Seq('ATG')
+        >>> id(seq) == id(seq2)
+        False
+        """
+        if not isinstance(other, numbers.Integral):
+            raise TypeError(f"can't multiply {self.__class__.__name__} by non-int type")
+        # we would like to simply write
+        # data = self._data * other
+        # here, but currently that causes a bug on PyPy if self._data is a
+        # bytearray and other is a numpy integer. Using this workaround:
+        data = self._data.__mul__(other)
+        return self.__class__(data)
+
+    def count(self, sub, start=None, end=None):
+        """Return a non-overlapping count, like that of a python string.
+
+        The number of occurrences of substring argument sub in the
+        (sub)sequence given by [start:end] is returned as an integer.
+        Optional arguments start and end are interpreted as in slice
+        notation.
+
+        Arguments:
+         - sub - a string or another Seq object to look for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("AAAATGA")
+        >>> print(my_seq.count("A"))
+        5
+        >>> print(my_seq.count("ATG"))
+        1
+        >>> print(my_seq.count(Seq("AT")))
+        1
+        >>> print(my_seq.count("AT", 2, -1))
+        1
+
+        HOWEVER, please note because the ``count`` method of Seq and MutableSeq
+        objects, like that of Python strings, do a non-overlapping search, this
+        may not give the answer you expect:
+
+        >>> "AAAA".count("AA")
+        2
+        >>> print(Seq("AAAA").count("AA"))
+        2
+
+        For an overlapping search, use the ``count_overlap`` method:
+
+        >>> print(Seq("AAAA").count_overlap("AA"))
+        3
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.count(sub, start, end)
+
+    def count_overlap(self, sub, start=None, end=None):
+        """Return an overlapping count.
+
+        Returns an integer, the number of occurrences of substring
+        argument sub in the (sub)sequence given by [start:end].
+        Optional arguments start and end are interpreted as in slice
+        notation.
+
+        Arguments:
+         - sub - a string or another Seq object to look for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> print(Seq("AAAA").count_overlap("AA"))
+        3
+        >>> print(Seq("ATATATATA").count_overlap("ATA"))
+        4
+        >>> print(Seq("ATATATATA").count_overlap("ATA", 3, -1))
+        1
+
+        For a non-overlapping search, use the ``count`` method:
+
+        >>> print(Seq("AAAA").count("AA"))
+        2
+
+        Where substrings do not overlap, ``count_overlap`` behaves the same as
+        the ``count`` method:
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("AAAATGA")
+        >>> print(my_seq.count_overlap("A"))
+        5
+        >>> my_seq.count_overlap("A") == my_seq.count("A")
+        True
+        >>> print(my_seq.count_overlap("ATG"))
+        1
+        >>> my_seq.count_overlap("ATG") == my_seq.count("ATG")
+        True
+        >>> print(my_seq.count_overlap(Seq("AT")))
+        1
+        >>> my_seq.count_overlap(Seq("AT")) == my_seq.count(Seq("AT"))
+        True
+        >>> print(my_seq.count_overlap("AT", 2, -1))
+        1
+        >>> my_seq.count_overlap("AT", 2, -1) == my_seq.count("AT", 2, -1)
+        True
+
+        HOWEVER, do not use this method for such cases because the
+        count() method is much for efficient.
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        data = self._data
+        overlap_count = 0
+        while True:
+            start = data.find(sub, start, end) + 1
+            if start != 0:
+                overlap_count += 1
+            else:
+                return overlap_count
+
+    def __contains__(self, item):
+        """Return True if item is a subsequence of the sequence, and False otherwise.
+
+        e.g.
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> my_dna = Seq("ATATGAAATTTGAAAA")
+        >>> "AAA" in my_dna
+        True
+        >>> Seq("AAA") in my_dna
+        True
+        >>> MutableSeq("AAA") in my_dna
+        True
+        """
+        if isinstance(item, _SeqAbstractBaseClass):
+            item = bytes(item)
+        elif isinstance(item, str):
+            item = item.encode("ASCII")
+        return item in self._data
+
+    def find(self, sub, start=None, end=None):
+        """Return the lowest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the lowest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Returns -1 if the subsequence is NOT found.
+
+        e.g. Locating the first typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.find("AUG")
+        3
+
+        The next typical start codon can then be found by starting the search
+        at position 4:
+
+        >>> my_rna.find("AUG", 4)
+        15
+
+        See the ``search`` method to find the locations of multiple subsequences
+        at the same time.
+        """
+        if isinstance(sub, _SeqAbstractBaseClass):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.find(sub, start, end)
+
+    def rfind(self, sub, start=None, end=None):
+        """Return the highest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the highest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Returns -1 if the subsequence is NOT found.
+
+        e.g. Locating the last typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.rfind("AUG")
+        15
+
+        The location of the typical start codon before that can be found by
+        ending the search at position 15:
+
+        >>> my_rna.rfind("AUG", end=15)
+        3
+
+        See the ``search`` method to find the locations of multiple subsequences
+        at the same time.
+        """
+        if isinstance(sub, _SeqAbstractBaseClass):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.rfind(sub, start, end)
+
+    def index(self, sub, start=None, end=None):
+        """Return the lowest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the lowest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Raises a ValueError if the subsequence is NOT found.
+
+        e.g. Locating the first typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.index("AUG")
+        3
+
+        The next typical start codon can then be found by starting the search
+        at position 4:
+
+        >>> my_rna.index("AUG", 4)
+        15
+
+        This method performs the same search as the ``find`` method.  However,
+        if the subsequence is not found, ``find`` returns -1 while ``index``
+        raises a ValueError:
+
+        >>> my_rna.index("T")
+        Traceback (most recent call last):
+                   ...
+        ValueError: ...
+        >>> my_rna.find("T")
+        -1
+
+        See the ``search`` method to find the locations of multiple subsequences
+        at the same time.
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.index(sub, start, end)
+
+    def rindex(self, sub, start=None, end=None):
+        """Return the highest index in the sequence where subsequence sub is found.
+
+        With optional arguments start and end, return the highest index in the
+        sequence such that the subsequence sub is contained within the sequence
+        region [start:end].
+
+        Arguments:
+         - sub - a string or another Seq or MutableSeq object to search for
+         - start - optional integer, slice start
+         - end - optional integer, slice end
+
+        Returns -1 if the subsequence is NOT found.
+
+        e.g. Locating the last typical start codon, AUG, in an RNA sequence:
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.rindex("AUG")
+        15
+
+        The location of the typical start codon before that can be found by
+        ending the search at position 15:
+
+        >>> my_rna.rindex("AUG", end=15)
+        3
+
+        This method performs the same search as the ``rfind`` method.  However,
+        if the subsequence is not found, ``rfind`` returns -1 which ``rindex``
+        raises a ValueError:
+
+        >>> my_rna.rindex("T")
+        Traceback (most recent call last):
+                   ...
+        ValueError: ...
+        >>> my_rna.rfind("T")
+        -1
+
+        See the ``search`` method to find the locations of multiple subsequences
+        at the same time.
+        """
+        if isinstance(sub, MutableSeq):
+            sub = sub._data
+        elif isinstance(sub, Seq):
+            sub = bytes(sub)
+        elif isinstance(sub, str):
+            sub = sub.encode("ASCII")
+        elif not isinstance(sub, (bytes, bytearray)):
+            raise TypeError(
+                "a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                % type(sub)
+            )
+        return self._data.rindex(sub, start, end)
+
+    def search(self, subs):
+        """Search the substrings subs in self and yield the index and substring found.
+
+        Arguments:
+         - subs - a list of strings, Seq, MutableSeq, bytes, or bytearray
+           objects containing the substrings to search for.
+
+        >>> from Bio.Seq import Seq
+        >>> dna = Seq("GTCATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAGTTG")
+        >>> matches = dna.search(["CC", Seq("ATTG"), "ATTG", Seq("CCC")])
+        >>> for index, substring in matches:
+        ...     print(index, substring)
+        ...
+        7 CC
+        9 ATTG
+        20 CC
+        34 CC
+        34 CCC
+        35 CC
+        """
+        subdict = collections.defaultdict(set)
+        for index, sub in enumerate(subs):
+            if isinstance(sub, (_SeqAbstractBaseClass, bytearray)):
+                sub = bytes(sub)
+            elif isinstance(sub, str):
+                sub = sub.encode("ASCII")
+            elif not isinstance(sub, bytes):
+                raise TypeError(
+                    "subs[%d]: a Seq, MutableSeq, str, bytes, or bytearray object is required, not '%s'"
+                    % (index, type(sub))
+                )
+            length = len(sub)
+            subdict[length].add(sub)
+        for start in range(len(self) - 1):
+            for length, subs in subdict.items():
+                stop = start + length
+                for sub in subs:
+                    if self._data[start:stop] == sub:
+                        yield (start, sub.decode())
+                        break
+
+    def startswith(self, prefix, start=None, end=None):
+        """Return True if the sequence starts with the given prefix, False otherwise.
+
+        Return True if the sequence starts with the specified prefix
+        (a string or another Seq object), False otherwise.
+        With optional start, test sequence beginning at that position.
+        With optional end, stop comparing sequence at that position.
+        prefix can also be a tuple of strings to try.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.startswith("GUC")
+        True
+        >>> my_rna.startswith("AUG")
+        False
+        >>> my_rna.startswith("AUG", 3)
+        True
+        >>> my_rna.startswith(("UCC", "UCA", "UCG"), 1)
+        True
+        """
+        if isinstance(prefix, tuple):
+            prefix = tuple(
+                bytes(p) if isinstance(p, _SeqAbstractBaseClass) else p.encode("ASCII")
+                for p in prefix
+            )
+        elif isinstance(prefix, _SeqAbstractBaseClass):
+            prefix = bytes(prefix)
+        elif isinstance(prefix, str):
+            prefix = prefix.encode("ASCII")
+        return self._data.startswith(prefix, start, end)
+
+    def endswith(self, suffix, start=None, end=None):
+        """Return True if the sequence ends with the given suffix, False otherwise.
+
+        Return True if the sequence ends with the specified suffix
+        (a string or another Seq object), False otherwise.
+        With optional start, test sequence beginning at that position.
+        With optional end, stop comparing sequence at that position.
+        suffix can also be a tuple of strings to try.  e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_rna.endswith("UUG")
+        True
+        >>> my_rna.endswith("AUG")
+        False
+        >>> my_rna.endswith("AUG", 0, 18)
+        True
+        >>> my_rna.endswith(("UCC", "UCA", "UUG"))
+        True
+        """
+        if isinstance(suffix, tuple):
+            suffix = tuple(
+                bytes(p) if isinstance(p, _SeqAbstractBaseClass) else p.encode("ASCII")
+                for p in suffix
+            )
+        elif isinstance(suffix, _SeqAbstractBaseClass):
+            suffix = bytes(suffix)
+        elif isinstance(suffix, str):
+            suffix = suffix.encode("ASCII")
+        return self._data.endswith(suffix, start, end)
+
+    def split(self, sep=None, maxsplit=-1):
+        """Return a list of subsequences when splitting the sequence by separator sep.
+
+        Return a list of the subsequences in the sequence (as Seq objects),
+        using sep as the delimiter string.  If maxsplit is given, at
+        most maxsplit splits are done.  If maxsplit is omitted, all
+        splits are made.
+
+        For consistency with the ``split`` method of Python strings, any
+        whitespace (tabs, spaces, newlines) is a separator if sep is None, the
+        default value
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_aa = my_rna.translate()
+        >>> my_aa
+        Seq('VMAIVMGR*KGAR*L')
+        >>> for pep in my_aa.split("*"):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR')
+        Seq('L')
+        >>> for pep in my_aa.split("*", 1):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR*L')
+
+        See also the rsplit method, which splits the sequence starting from the
+        end:
+
+        >>> for pep in my_aa.rsplit("*", 1):
+        ...     pep
+        Seq('VMAIVMGR*KGAR')
+        Seq('L')
+        """
+        if isinstance(sep, _SeqAbstractBaseClass):
+            sep = bytes(sep)
+        elif isinstance(sep, str):
+            sep = sep.encode("ASCII")
+        return [Seq(part) for part in self._data.split(sep, maxsplit)]
+
+    def rsplit(self, sep=None, maxsplit=-1):
+        """Return a list of subsequences by splitting the sequence from the right.
+
+        Return a list of the subsequences in the sequence (as Seq objects),
+        using sep as the delimiter string.  If maxsplit is given, at
+        most maxsplit splits are done.  If maxsplit is omitted, all
+        splits are made.
+
+        For consistency with the ``rsplit`` method of Python strings, any
+        whitespace (tabs, spaces, newlines) is a separator if sep is None, the
+        default value
+
+        e.g.
+
+        >>> from Bio.Seq import Seq
+        >>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
+        >>> my_aa = my_rna.translate()
+        >>> my_aa
+        Seq('VMAIVMGR*KGAR*L')
+        >>> for pep in my_aa.rsplit("*"):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR')
+        Seq('L')
+        >>> for pep in my_aa.rsplit("*", 1):
+        ...     pep
+        Seq('VMAIVMGR*KGAR')
+        Seq('L')
+
+        See also the split method, which splits the sequence starting from the
+        beginning:
+
+        >>> for pep in my_aa.split("*", 1):
+        ...     pep
+        Seq('VMAIVMGR')
+        Seq('KGAR*L')
+        """
+        if isinstance(sep, _SeqAbstractBaseClass):
+            sep = bytes(sep)
+        elif isinstance(sep, str):
+            sep = sep.encode("ASCII")
+        return [Seq(part) for part in self._data.rsplit(sep, maxsplit)]
+
+    def strip(self, chars=None, inplace=False):
+        """Return a sequence object with leading and trailing ends stripped.
+
+        With default arguments, leading and trailing whitespace is removed:
+
+        >>> seq = Seq(" ACGT ")
+        >>> seq.strip()
+        Seq('ACGT')
+        >>> seq
+        Seq(' ACGT ')
+
+        If ``chars`` is given and not ``None``, remove characters in ``chars``
+        instead.  The order of the characters to be removed is not important:
+
+        >>> Seq("ACGTACGT").strip("TGCA")
+        Seq('')
+
+        A copy of the sequence is returned if ``inplace`` is ``False`` (the
+        default value).  If ``inplace`` is ``True``, the sequence is stripped
+        in-place and returned.
+
+        >>> seq = MutableSeq(" ACGT ")
+        >>> seq.strip()
+        MutableSeq('ACGT')
+        >>> seq
+        MutableSeq(' ACGT ')
+        >>> seq.strip(inplace=True)
+        MutableSeq('ACGT')
+        >>> seq
+        MutableSeq('ACGT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if ``strip``
+        is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the lstrip and rstrip methods.
+        """
+        if isinstance(chars, _SeqAbstractBaseClass):
+            chars = bytes(chars)
+        elif isinstance(chars, str):
+            chars = chars.encode("ASCII")
+        try:
+            data = self._data.strip(chars)
+        except TypeError:
+            raise TypeError(
+                "argument must be None or a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def lstrip(self, chars=None, inplace=False):
+        """Return a sequence object with leading and trailing ends stripped.
+
+        With default arguments, leading whitespace is removed:
+
+        >>> seq = Seq(" ACGT ")
+        >>> seq.lstrip()
+        Seq('ACGT ')
+        >>> seq
+        Seq(' ACGT ')
+
+        If ``chars`` is given and not ``None``, remove characters in ``chars``
+        from the leading end instead.  The order of the characters to be removed
+        is not important:
+
+        >>> Seq("ACGACGTTACG").lstrip("GCA")
+        Seq('TTACG')
+
+        A copy of the sequence is returned if ``inplace`` is ``False`` (the
+        default value).  If ``inplace`` is ``True``, the sequence is stripped
+        in-place and returned.
+
+        >>> seq = MutableSeq(" ACGT ")
+        >>> seq.lstrip()
+        MutableSeq('ACGT ')
+        >>> seq
+        MutableSeq(' ACGT ')
+        >>> seq.lstrip(inplace=True)
+        MutableSeq('ACGT ')
+        >>> seq
+        MutableSeq('ACGT ')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``lstrip`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the strip and rstrip methods.
+        """
+        if isinstance(chars, _SeqAbstractBaseClass):
+            chars = bytes(chars)
+        elif isinstance(chars, str):
+            chars = chars.encode("ASCII")
+        try:
+            data = self._data.lstrip(chars)
+        except TypeError:
+            raise TypeError(
+                "argument must be None or a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def rstrip(self, chars=None, inplace=False):
+        """Return a sequence object with trailing ends stripped.
+
+        With default arguments, trailing whitespace is removed:
+
+        >>> seq = Seq(" ACGT ")
+        >>> seq.rstrip()
+        Seq(' ACGT')
+        >>> seq
+        Seq(' ACGT ')
+
+        If ``chars`` is given and not ``None``, remove characters in ``chars``
+        from the trailing end instead.  The order of the characters to be
+        removed is not important:
+
+        >>> Seq("ACGACGTTACG").rstrip("GCA")
+        Seq('ACGACGTT')
+
+        A copy of the sequence is returned if ``inplace`` is ``False`` (the
+        default value).  If ``inplace`` is ``True``, the sequence is stripped
+        in-place and returned.
+
+        >>> seq = MutableSeq(" ACGT ")
+        >>> seq.rstrip()
+        MutableSeq(' ACGT')
+        >>> seq
+        MutableSeq(' ACGT ')
+        >>> seq.rstrip(inplace=True)
+        MutableSeq(' ACGT')
+        >>> seq
+        MutableSeq(' ACGT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``rstrip`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the strip and lstrip methods.
+        """
+        if isinstance(chars, _SeqAbstractBaseClass):
+            chars = bytes(chars)
+        elif isinstance(chars, str):
+            chars = chars.encode("ASCII")
+        try:
+            data = self._data.rstrip(chars)
+        except TypeError:
+            raise TypeError(
+                "argument must be None or a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def removeprefix(self, prefix, inplace=False):
+        """Return a new Seq object with prefix (left) removed.
+
+        This behaves like the python string method of the same name.
+
+        e.g. Removing a start Codon:
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("ATGGTGTGTGT")
+        >>> my_seq
+        Seq('ATGGTGTGTGT')
+        >>> my_seq.removeprefix('ATG')
+        Seq('GTGTGTGT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``removeprefix`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the removesuffix method.
+        """
+        if isinstance(prefix, _SeqAbstractBaseClass):
+            prefix = bytes(prefix)
+        elif isinstance(prefix, str):
+            prefix = prefix.encode("ASCII")
+        try:
+            data = self._data.removeprefix(prefix)
+        except TypeError:
+            raise TypeError(
+                "argument must be a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        except AttributeError:
+            # Fall back for pre-Python 3.9
+            data = self._data
+            if data.startswith(prefix):
+                data = data[len(prefix) :]
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def removesuffix(self, suffix, inplace=False):
+        """Return a new Seq object with suffix (right) removed.
+
+        This behaves like the python string method of the same name.
+
+        e.g. Removing a stop codon:
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("GTGTGTGTTAG")
+        >>> my_seq
+        Seq('GTGTGTGTTAG')
+        >>> stop_codon = Seq("TAG")
+        >>> my_seq.removesuffix(stop_codon)
+        Seq('GTGTGTGT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``removesuffix`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the removeprefix method.
+        """
+        if isinstance(suffix, _SeqAbstractBaseClass):
+            suffix = bytes(suffix)
+        elif isinstance(suffix, str):
+            suffix = suffix.encode("ASCII")
+        try:
+            data = self._data.removesuffix(suffix)
+        except TypeError:
+            raise TypeError(
+                "argument must be a string, Seq, MutableSeq, or bytes-like object"
+            ) from None
+        except AttributeError:
+            # Fall back for pre-Python 3.9
+            data = self._data
+            if data.endswith(suffix):
+                data = data[: -len(suffix)]
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def upper(self, inplace=False):
+        """Return the sequence in upper case.
+
+        An upper-case copy of the sequence is returned if inplace is False,
+        the default value:
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> my_seq = Seq("VHLTPeeK*")
+        >>> my_seq
+        Seq('VHLTPeeK*')
+        >>> my_seq.lower()
+        Seq('vhltpeek*')
+        >>> my_seq.upper()
+        Seq('VHLTPEEK*')
+        >>> my_seq
+        Seq('VHLTPeeK*')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("VHLTPeeK*")
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+        >>> my_seq.lower()
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper()
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+
+        >>> my_seq.lower(inplace=True)
+        MutableSeq('vhltpeek*')
+        >>> my_seq
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper(inplace=True)
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPEEK*')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``upper`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the ``lower`` method.
+        """
+        data = self._data.upper()
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def lower(self, inplace=False):
+        """Return the sequence in lower case.
+
+        An lower-case copy of the sequence is returned if inplace is False,
+        the default value:
+
+        >>> from Bio.Seq import Seq, MutableSeq
+        >>> my_seq = Seq("VHLTPeeK*")
+        >>> my_seq
+        Seq('VHLTPeeK*')
+        >>> my_seq.lower()
+        Seq('vhltpeek*')
+        >>> my_seq.upper()
+        Seq('VHLTPEEK*')
+        >>> my_seq
+        Seq('VHLTPeeK*')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("VHLTPeeK*")
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+        >>> my_seq.lower()
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper()
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPeeK*')
+
+        >>> my_seq.lower(inplace=True)
+        MutableSeq('vhltpeek*')
+        >>> my_seq
+        MutableSeq('vhltpeek*')
+        >>> my_seq.upper(inplace=True)
+        MutableSeq('VHLTPEEK*')
+        >>> my_seq
+        MutableSeq('VHLTPEEK*')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``lower`` is called on a ``Seq`` object with ``inplace=True``.
+
+        See also the ``upper`` method.
+        """
+        data = self._data.lower()
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        else:
+            return self.__class__(data)
+
+    def isupper(self):
+        """Return True if all ASCII characters in data are uppercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        return self._data.isupper()
+
+    def islower(self):
+        """Return True if all ASCII characters in data are lowercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        return self._data.islower()
+
+    def translate(
+        self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-"
+    ):
+        """Turn a nucleotide sequence into a protein sequence by creating a new sequence object.
+
+        This method will translate DNA or RNA sequences. It should not
+        be used on protein sequences as any result will be biologically
+        meaningless.
+
+        Arguments:
+         - table - Which codon table to use?  This can be either a name
+           (string), an NCBI identifier (integer), or a CodonTable
+           object (useful for non-standard genetic codes).  This
+           defaults to the "Standard" table.
+         - stop_symbol - Single character string, what to use for
+           terminators.  This defaults to the asterisk, "*".
+         - to_stop - Boolean, defaults to False meaning do a full
+           translation continuing on past any stop codons (translated as the
+           specified stop_symbol).  If True, translation is terminated at
+           the first in frame stop codon (and the stop_symbol is not
+           appended to the returned protein sequence).
+         - cds - Boolean, indicates this is a complete CDS.  If True,
+           this checks the sequence starts with a valid alternative start
+           codon (which will be translated as methionine, M), that the
+           sequence length is a multiple of three, and that there is a
+           single in frame stop codon at the end (this will be excluded
+           from the protein sequence, regardless of the to_stop option).
+           If these tests fail, an exception is raised.
+         - gap - Single character string to denote symbol used for gaps.
+           Defaults to the minus sign.
+
+        A ``Seq`` object is returned if ``translate`` is called on a ``Seq``
+        object; a ``MutableSeq`` object is returned if ``translate`` is called
+        pn a ``MutableSeq`` object.
+
+        e.g. Using the standard table:
+
+        >>> coding_dna = Seq("GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
+        >>> coding_dna.translate()
+        Seq('VAIVMGR*KGAR*')
+        >>> coding_dna.translate(stop_symbol="@")
+        Seq('VAIVMGR@KGAR@')
+        >>> coding_dna.translate(to_stop=True)
+        Seq('VAIVMGR')
+
+        Now using NCBI table 2, where TGA is not a stop codon:
+
+        >>> coding_dna.translate(table=2)
+        Seq('VAIVMGRWKGAR*')
+        >>> coding_dna.translate(table=2, to_stop=True)
+        Seq('VAIVMGRWKGAR')
+
+        In fact, GTG is an alternative start codon under NCBI table 2, meaning
+        this sequence could be a complete CDS:
+
+        >>> coding_dna.translate(table=2, cds=True)
+        Seq('MAIVMGRWKGAR')
+
+        It isn't a valid CDS under NCBI table 1, due to both the start codon
+        and also the in frame stop codons:
+
+        >>> coding_dna.translate(table=1, cds=True)
+        Traceback (most recent call last):
+            ...
+        Bio.Data.CodonTable.TranslationError: First codon 'GTG' is not a start codon
+
+        If the sequence has no in-frame stop codon, then the to_stop argument
+        has no effect:
+
+        >>> coding_dna2 = Seq("TTGGCCATTGTAATGGGCCGC")
+        >>> coding_dna2.translate()
+        Seq('LAIVMGR')
+        >>> coding_dna2.translate(to_stop=True)
+        Seq('LAIVMGR')
+
+        NOTE - Ambiguous codons like "TAN" or "NNN" could be an amino acid
+        or a stop codon.  These are translated as "X".  Any invalid codon
+        (e.g. "TA?" or "T-A") will throw a TranslationError.
+
+        NOTE - This does NOT behave like the python string's translate
+        method.  For that use str(my_seq).translate(...) instead
+        """
+        try:
+            data = str(self)
+        except UndefinedSequenceError:
+            # translating an undefined sequence yields an undefined
+            # sequence with the length divided by 3
+            n = len(self)
+            if n % 3 != 0:
+                warnings.warn(
+                    "Partial codon, len(sequence) not a multiple of three. "
+                    "This may become an error in future.",
+                    BiopythonWarning,
+                )
+            return Seq(None, n // 3)
+
+        return self.__class__(
+            _translate_str(str(self), table, stop_symbol, to_stop, cds, gap=gap)
+        )
+
+    def complement(self, inplace=False):
+        """Return the complement as a DNA sequence.
+
+        >>> Seq("CGA").complement()
+        Seq('GCT')
+
+        Any U in the sequence is treated as a T:
+
+        >>> Seq("CGAUT").complement()
+        Seq('GCTAA')
+
+        In contrast, ``complement_rna`` returns an RNA sequence:
+
+        >>> Seq("CGAUT").complement_rna()
+        Seq('GCUAA')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("CGA")
+        >>> my_seq
+        MutableSeq('CGA')
+        >>> my_seq.complement()
+        MutableSeq('GCT')
+        >>> my_seq
+        MutableSeq('CGA')
+
+        >>> my_seq.complement(inplace=True)
+        MutableSeq('GCT')
+        >>> my_seq
+        MutableSeq('GCT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``complement_rna`` is called on a ``Seq`` object with ``inplace=True``.
+        """
+        ttable = _dna_complement_table
+        try:
+            data = self._data.translate(ttable)
+        except UndefinedSequenceError:
+            # complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def complement_rna(self, inplace=False):
+        """Return the complement as an RNA sequence.
+
+        >>> Seq("CGA").complement_rna()
+        Seq('GCU')
+
+        Any T in the sequence is treated as a U:
+
+        >>> Seq("CGAUT").complement_rna()
+        Seq('GCUAA')
+
+        In contrast, ``complement`` returns a DNA sequence by default:
+
+        >>> Seq("CGA").complement()
+        Seq('GCT')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("CGA")
+        >>> my_seq
+        MutableSeq('CGA')
+        >>> my_seq.complement_rna()
+        MutableSeq('GCU')
+        >>> my_seq
+        MutableSeq('CGA')
+
+        >>> my_seq.complement_rna(inplace=True)
+        MutableSeq('GCU')
+        >>> my_seq
+        MutableSeq('GCU')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``complement_rna`` is called on a ``Seq`` object with ``inplace=True``.
+        """
+        try:
+            data = self._data.translate(_rna_complement_table)
+        except UndefinedSequenceError:
+            # complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def reverse_complement(self, inplace=False):
+        """Return the reverse complement as a DNA sequence.
+
+        >>> Seq("CGA").reverse_complement()
+        Seq('TCG')
+
+        Any U in the sequence is treated as a T:
+
+        >>> Seq("CGAUT").reverse_complement()
+        Seq('AATCG')
+
+        In contrast, ``reverse_complement_rna`` returns an RNA sequence:
+
+        >>> Seq("CGA").reverse_complement_rna()
+        Seq('UCG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("CGA")
+        >>> my_seq
+        MutableSeq('CGA')
+        >>> my_seq.reverse_complement()
+        MutableSeq('TCG')
+        >>> my_seq
+        MutableSeq('CGA')
+
+        >>> my_seq.reverse_complement(inplace=True)
+        MutableSeq('TCG')
+        >>> my_seq
+        MutableSeq('TCG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``reverse_complement`` is called on a ``Seq`` object with
+        ``inplace=True``.
+        """
+        try:
+            data = self._data.translate(_dna_complement_table)
+        except UndefinedSequenceError:
+            # reverse complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[::-1] = data
+            return self
+        return self.__class__(data[::-1])
+
+    def reverse_complement_rna(self, inplace=False):
+        """Return the reverse complement as an RNA sequence.
+
+        >>> Seq("CGA").reverse_complement_rna()
+        Seq('UCG')
+
+        Any T in the sequence is treated as a U:
+
+        >>> Seq("CGAUT").reverse_complement_rna()
+        Seq('AAUCG')
+
+        In contrast, ``reverse_complement`` returns a DNA sequence:
+
+        >>> Seq("CGA").reverse_complement()
+        Seq('TCG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> my_seq = MutableSeq("CGA")
+        >>> my_seq
+        MutableSeq('CGA')
+        >>> my_seq.reverse_complement_rna()
+        MutableSeq('UCG')
+        >>> my_seq
+        MutableSeq('CGA')
+
+        >>> my_seq.reverse_complement_rna(inplace=True)
+        MutableSeq('UCG')
+        >>> my_seq
+        MutableSeq('UCG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``reverse_complement_rna`` is called on a ``Seq`` object with
+        ``inplace=True``.
+        """
+        try:
+            data = self._data.translate(_rna_complement_table)
+        except UndefinedSequenceError:
+            # reverse complement of an undefined sequence is an undefined sequence
+            # of the same length
+            return self
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[::-1] = data
+            return self
+        return self.__class__(data[::-1])
+
+    def transcribe(self, inplace=False):
+        """Transcribe a DNA sequence into RNA and return the RNA sequence as a new Seq object.
+
+        Following the usual convention, the sequence is interpreted as the
+        coding strand of the DNA double helix, not the template strand. This
+        means we can get the RNA sequence just by switching T to U.
+
+        >>> from Bio.Seq import Seq
+        >>> coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
+        >>> coding_dna
+        Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> coding_dna.transcribe()
+        Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> sequence = MutableSeq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
+        >>> sequence
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> sequence.transcribe()
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> sequence
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+
+        >>> sequence.transcribe(inplace=True)
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> sequence
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``transcribe`` is called on a ``Seq`` object with ``inplace=True``.
+
+        Trying to transcribe an RNA sequence has no effect.
+        If you have a nucleotide sequence which might be DNA or RNA
+        (or even a mixture), calling the transcribe method will ensure
+        any T becomes U.
+
+        Trying to transcribe a protein sequence will replace any
+        T for Threonine with U for Selenocysteine, which has no
+        biologically plausible rational.
+
+        >>> from Bio.Seq import Seq
+        >>> my_protein = Seq("MAIVMGRT")
+        >>> my_protein.transcribe()
+        Seq('MAIVMGRU')
+        """
+        data = self._data.replace(b"T", b"U").replace(b"t", b"u")
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def back_transcribe(self, inplace=False):
+        """Return the DNA sequence from an RNA sequence by creating a new Seq object.
+
+        >>> from Bio.Seq import Seq
+        >>> messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
+        >>> messenger_rna
+        Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> messenger_rna.back_transcribe()
+        Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+
+        The sequence is modified in-place and returned if inplace is True:
+
+        >>> sequence = MutableSeq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG")
+        >>> sequence
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+        >>> sequence.back_transcribe()
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> sequence
+        MutableSeq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG')
+
+        >>> sequence.back_transcribe(inplace=True)
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+        >>> sequence
+        MutableSeq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``transcribe`` is called on a ``Seq`` object with ``inplace=True``.
+
+        Trying to back-transcribe DNA has no effect, If you have a nucleotide
+        sequence which might be DNA or RNA (or even a mixture), calling the
+        back-transcribe method will ensure any U becomes T.
+
+        Trying to back-transcribe a protein sequence will replace any U for
+        Selenocysteine with T for Threonine, which is biologically meaningless.
+
+        >>> from Bio.Seq import Seq
+        >>> my_protein = Seq("MAIVMGRU")
+        >>> my_protein.back_transcribe()
+        Seq('MAIVMGRT')
+        """
+        data = self._data.replace(b"U", b"T").replace(b"u", b"t")
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    def join(self, other):
+        """Return a merge of the sequences in other, spaced by the sequence from self.
+
+        Accepts a Seq object, MutableSeq object, or string (and iterates over
+        the letters), or an iterable containing Seq, MutableSeq, or string
+        objects. These arguments will be concatenated with the calling sequence
+        as the spacer:
+
+        >>> concatenated = Seq('NNNNN').join([Seq("AAA"), Seq("TTT"), Seq("PPP")])
+        >>> concatenated
+        Seq('AAANNNNNTTTNNNNNPPP')
+
+        Joining the letters of a single sequence:
+
+        >>> Seq('NNNNN').join(Seq("ACGT"))
+        Seq('ANNNNNCNNNNNGNNNNNT')
+        >>> Seq('NNNNN').join("ACGT")
+        Seq('ANNNNNCNNNNNGNNNNNT')
+        """
+        if isinstance(other, _SeqAbstractBaseClass):
+            return self.__class__(str(self).join(str(other)))
+        elif isinstance(other, str):
+            return self.__class__(str(self).join(other))
+
+        from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+        if isinstance(other, SeqRecord):
+            raise TypeError("Iterable cannot be a SeqRecord")
+
+        for c in other:
+            if isinstance(c, SeqRecord):
+                raise TypeError("Iterable cannot contain SeqRecords")
+            elif not isinstance(c, (str, _SeqAbstractBaseClass)):
+                raise TypeError(
+                    "Input must be an iterable of Seq objects, MutableSeq objects, or strings"
+                )
+        return self.__class__(str(self).join([str(_) for _ in other]))
+
+    def replace(self, old, new, inplace=False):
+        """Return a copy with all occurrences of subsequence old replaced by new.
+
+        >>> s = Seq("ACGTAACCGGTT")
+        >>> t = s.replace("AC", "XYZ")
+        >>> s
+        Seq('ACGTAACCGGTT')
+        >>> t
+        Seq('XYZGTAXYZCGGTT')
+
+        For mutable sequences, passing inplace=True will modify the sequence in place:
+
+        >>> m = MutableSeq("ACGTAACCGGTT")
+        >>> t = m.replace("AC", "XYZ")
+        >>> m
+        MutableSeq('ACGTAACCGGTT')
+        >>> t
+        MutableSeq('XYZGTAXYZCGGTT')
+
+        >>> m = MutableSeq("ACGTAACCGGTT")
+        >>> t = m.replace("AC", "XYZ", inplace=True)
+        >>> m
+        MutableSeq('XYZGTAXYZCGGTT')
+        >>> t
+        MutableSeq('XYZGTAXYZCGGTT')
+
+        As ``Seq`` objects are immutable, a ``TypeError`` is raised if
+        ``replace`` is called on a ``Seq`` object with ``inplace=True``.
+        """
+        if isinstance(old, _SeqAbstractBaseClass):
+            old = bytes(old)
+        elif isinstance(old, str):
+            old = old.encode("ASCII")
+        if isinstance(new, _SeqAbstractBaseClass):
+            new = bytes(new)
+        elif isinstance(new, str):
+            new = new.encode("ASCII")
+        data = self._data.replace(old, new)
+        if inplace:
+            if not isinstance(self._data, bytearray):
+                raise TypeError("Sequence is immutable")
+            self._data[:] = data
+            return self
+        return self.__class__(data)
+
+    @property
+    def defined(self):
+        """Return True if the sequence is defined, False if undefined or partially defined.
+
+        Zero-length sequences are always considered to be defined.
+        """
+        if isinstance(self._data, (bytes, bytearray)):
+            return True
+        else:
+            return self._data.defined
+
+    @property
+    def defined_ranges(self):
+        """Return a tuple of the ranges where the sequence contents is defined.
+
+        The return value has the format ((start1, end1), (start2, end2), ...).
+        """
+        if isinstance(self._data, (bytes, bytearray)):
+            length = len(self)
+            if length > 0:
+                return ((0, length),)
+            else:
+                return ()
+        else:
+            return self._data.defined_ranges
+
+
+class Seq(_SeqAbstractBaseClass):
+    """Read-only sequence object (essentially a string with biological methods).
+
+    Like normal python strings, our basic sequence object is immutable.
+    This prevents you from doing my_seq[5] = "A" for example, but does allow
+    Seq objects to be used as dictionary keys.
+
+    The Seq object provides a number of string like methods (such as count,
+    find, split and strip).
+
+    The Seq object also provides some biological methods, such as complement,
+    reverse_complement, transcribe, back_transcribe and translate (which are
+    not applicable to protein sequences).
+    """
+
+    _data: Union[bytes, SequenceDataAbstractBaseClass]
+
+    def __init__(
+        self,
+        data: Union[
+            str,
+            bytes,
+            bytearray,
+            _SeqAbstractBaseClass,
+            SequenceDataAbstractBaseClass,
+            dict,
+            None,
+        ],
+        length: Optional[int] = None,
+    ):
+        """Create a Seq object.
+
+        Arguments:
+         - data - Sequence, required (string)
+         - length - Sequence length, used only if data is None or a dictionary (integer)
+
+        You will typically use Bio.SeqIO to read in sequences from files as
+        SeqRecord objects, whose sequence will be exposed as a Seq object via
+        the seq property.
+
+        However, you can also create a Seq object directly:
+
+        >>> from Bio.Seq import Seq
+        >>> my_seq = Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF")
+        >>> my_seq
+        Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF')
+        >>> print(my_seq)
+        MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+
+        To create a Seq object with for a sequence of known length but
+        unknown sequence contents, use None for the data argument and pass
+        the sequence length for the length argument. Trying to access the
+        sequence contents of a Seq object created in this way will raise
+        an UndefinedSequenceError:
+
+        >>> my_undefined_sequence = Seq(None, 20)
+        >>> my_undefined_sequence
+        Seq(None, length=20)
+        >>> len(my_undefined_sequence)
+        20
+        >>> print(my_undefined_sequence)
+        Traceback (most recent call last):
+        ...
+        Bio.Seq.UndefinedSequenceError: Sequence content is undefined
+
+        If the sequence contents is known for parts of the sequence only, use
+        a dictionary for the data argument to pass the known sequence segments:
+
+        >>> my_partially_defined_sequence = Seq({3: "ACGT"}, 10)
+        >>> my_partially_defined_sequence
+        Seq({3: 'ACGT'}, length=10)
+        >>> len(my_partially_defined_sequence)
+        10
+        >>> print(my_partially_defined_sequence)
+        Traceback (most recent call last):
+        ...
+        Bio.Seq.UndefinedSequenceError: Sequence content is only partially defined
+        >>> my_partially_defined_sequence[3:7]
+        Seq('ACGT')
+        >>> print(my_partially_defined_sequence[3:7])
+        ACGT
+        """
+        if data is None:
+            if length is None:
+                raise ValueError("length must not be None if data is None")
+            elif length == 0:
+                self._data = b""
+            elif length < 0:
+                raise ValueError("length must not be negative.")
+            else:
+                self._data = _UndefinedSequenceData(length)
+        elif isinstance(data, (bytes, SequenceDataAbstractBaseClass)):
+            self._data = data
+        elif isinstance(data, (bytearray, _SeqAbstractBaseClass)):
+            self._data = bytes(data)
+        elif isinstance(data, str):
+            self._data = bytes(data, encoding="ASCII")
+        elif isinstance(data, dict):
+            if length is None:
+                raise ValueError("length must not be None if data is a dictionary")
+            elif length == 0:
+                self._data = b""
+            elif length < 0:
+                raise ValueError("length must not be negative.")
+            else:
+                current = 0  # not needed here, but it keeps mypy happy
+                end = -1
+                starts = sorted(data.keys())
+                _data: Dict[int, bytes] = {}
+                for start in starts:
+                    seq = data[start]
+                    if isinstance(seq, str):
+                        seq = bytes(seq, encoding="ASCII")
+                    else:
+                        try:
+                            seq = bytes(seq)
+                        except Exception:
+                            raise ValueError("Expected bytes-like objects or strings")
+                    if start < end:
+                        raise ValueError("Sequence data are overlapping.")
+                    elif start == end:
+                        _data[current] += seq  # noqa: F821
+                    else:
+                        _data[start] = seq
+                        current = start
+                    end = start + len(seq)
+                if end > length:
+                    raise ValueError(
+                        "Provided sequence data extend beyond sequence length."
+                    )
+                elif end == length and current == 0:
+                    # sequence is fully defined
+                    self._data = _data[current]
+                else:
+                    self._data = _PartiallyDefinedSequenceData(length, _data)
+        else:
+            raise TypeError(
+                "data should be a string, bytes, bytearray, Seq, or MutableSeq object"
+            )
+
+    def __hash__(self):
+        """Hash of the sequence as a string for comparison.
+
+        See Seq object comparison documentation (method ``__eq__`` in
+        particular) as this has changed in Biopython 1.65. Older versions
+        would hash on object identity.
+        """
+        return hash(self._data)
+
+
+class MutableSeq(_SeqAbstractBaseClass):
+    """An editable sequence object.
+
+    Unlike normal python strings and our basic sequence object (the Seq class)
+    which are immutable, the MutableSeq lets you edit the sequence in place.
+    However, this means you cannot use a MutableSeq object as a dictionary key.
+
+    >>> from Bio.Seq import MutableSeq
+    >>> my_seq = MutableSeq("ACTCGTCGTCG")
+    >>> my_seq
+    MutableSeq('ACTCGTCGTCG')
+    >>> my_seq[5]
+    'T'
+    >>> my_seq[5] = "A"
+    >>> my_seq
+    MutableSeq('ACTCGACGTCG')
+    >>> my_seq[5]
+    'A'
+    >>> my_seq[5:8] = "NNN"
+    >>> my_seq
+    MutableSeq('ACTCGNNNTCG')
+    >>> len(my_seq)
+    11
+
+    Note that the MutableSeq object does not support as many string-like
+    or biological methods as the Seq object.
+    """
+
+    def __init__(self, data):
+        """Create a MutableSeq object."""
+        if isinstance(data, bytearray):
+            self._data = data
+        elif isinstance(data, bytes):
+            self._data = bytearray(data)
+        elif isinstance(data, str):
+            self._data = bytearray(data, "ASCII")
+        elif isinstance(data, MutableSeq):
+            self._data = data._data[:]  # Take a copy
+        elif isinstance(data, Seq):
+            # Make no assumptions about the Seq subclass internal storage
+            self._data = bytearray(bytes(data))
+        else:
+            raise TypeError(
+                "data should be a string, bytearray object, Seq object, or a "
+                "MutableSeq object"
+            )
+
+    def __setitem__(self, index, value):
+        """Set a subsequence of single letter via value parameter.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq[0] = 'T'
+        >>> my_seq
+        MutableSeq('TCTCGACGTCG')
+        """
+        if isinstance(index, numbers.Integral):
+            # Replacing a single letter with a new string
+            self._data[index] = ord(value)
+        else:
+            # Replacing a sub-sequence
+            if isinstance(value, MutableSeq):
+                self._data[index] = value._data
+            elif isinstance(value, Seq):
+                self._data[index] = bytes(value)
+            elif isinstance(value, str):
+                self._data[index] = value.encode("ASCII")
+            else:
+                raise TypeError(f"received unexpected type '{type(value).__name__}'")
+
+    def __delitem__(self, index):
+        """Delete a subsequence of single letter.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> del my_seq[0]
+        >>> my_seq
+        MutableSeq('CTCGACGTCG')
+        """
+        # Could be deleting a single letter, or a slice
+        del self._data[index]
+
+    def append(self, c):
+        """Add a subsequence to the mutable sequence object.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.append('A')
+        >>> my_seq
+        MutableSeq('ACTCGACGTCGA')
+
+        No return value.
+        """
+        self._data.append(ord(c.encode("ASCII")))
+
+    def insert(self, i, c):
+        """Add a subsequence to the mutable sequence object at a given index.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.insert(0,'A')
+        >>> my_seq
+        MutableSeq('AACTCGACGTCG')
+        >>> my_seq.insert(8,'G')
+        >>> my_seq
+        MutableSeq('AACTCGACGGTCG')
+
+        No return value.
+        """
+        self._data.insert(i, ord(c.encode("ASCII")))
+
+    def pop(self, i=(-1)):
+        """Remove a subsequence of a single letter at given index.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.pop()
+        'G'
+        >>> my_seq
+        MutableSeq('ACTCGACGTC')
+        >>> my_seq.pop()
+        'C'
+        >>> my_seq
+        MutableSeq('ACTCGACGT')
+
+        Returns the last character of the sequence.
+        """
+        c = self._data[i]
+        del self._data[i]
+        return chr(c)
+
+    def remove(self, item):
+        """Remove a subsequence of a single letter from mutable sequence.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.remove('C')
+        >>> my_seq
+        MutableSeq('ATCGACGTCG')
+        >>> my_seq.remove('A')
+        >>> my_seq
+        MutableSeq('TCGACGTCG')
+
+        No return value.
+        """
+        codepoint = ord(item)
+        try:
+            self._data.remove(codepoint)
+        except ValueError:
+            raise ValueError("value not found in MutableSeq") from None
+
+    def reverse(self):
+        """Modify the mutable sequence to reverse itself.
+
+        No return value.
+        """
+        self._data.reverse()
+
+    def extend(self, other):
+        """Add a sequence to the original mutable sequence object.
+
+        >>> my_seq = MutableSeq('ACTCGACGTCG')
+        >>> my_seq.extend('A')
+        >>> my_seq
+        MutableSeq('ACTCGACGTCGA')
+        >>> my_seq.extend('TTT')
+        >>> my_seq
+        MutableSeq('ACTCGACGTCGATTT')
+
+        No return value.
+        """
+        if isinstance(other, MutableSeq):
+            self._data.extend(other._data)
+        elif isinstance(other, Seq):
+            self._data.extend(bytes(other))
+        elif isinstance(other, str):
+            self._data.extend(other.encode("ASCII"))
+        else:
+            raise TypeError("expected a string, Seq or MutableSeq")
+
+
+class UndefinedSequenceError(ValueError):
+    """Sequence contents is undefined."""
+
+
+class _UndefinedSequenceData(SequenceDataAbstractBaseClass):
+    """Stores the length of a sequence with an undefined sequence contents (PRIVATE).
+
+    Objects of this class can be used to create a Seq object to represent
+    sequences with a known length, but an unknown sequence contents.
+    Calling __len__ returns the sequence length, calling __getitem__ raises an
+    UndefinedSequenceError except for requests of zero size, for which it
+    returns an empty bytes object.
+    """
+
+    __slots__ = ("_length",)
+
+    def __init__(self, length):
+        """Initialize the object with the sequence length.
+
+        The calling function is responsible for ensuring that the length is
+        greater than zero.
+        """
+        self._length = length
+        super().__init__()
+
+    def __getitem__(self, key: slice) -> Union[bytes, "_UndefinedSequenceData"]:
+        if isinstance(key, slice):
+            start, end, step = key.indices(self._length)
+            size = len(range(start, end, step))
+            if size == 0:
+                return b""
+            return _UndefinedSequenceData(size)
+        else:
+            raise UndefinedSequenceError("Sequence content is undefined")
+
+    def __len__(self):
+        return self._length
+
+    def __bytes__(self):
+        raise UndefinedSequenceError("Sequence content is undefined")
+
+    def __add__(self, other):
+        length = len(self) + len(other)
+        try:
+            other = bytes(other)
+        except UndefinedSequenceError:
+            if isinstance(other, _UndefinedSequenceData):
+                return _UndefinedSequenceData(length)
+            else:
+                return NotImplemented
+                # _PartiallyDefinedSequenceData.__radd__ will handle this
+        else:
+            data = {len(self): other}
+            return _PartiallyDefinedSequenceData(length, data)
+
+    def __radd__(self, other):
+        data = {0: bytes(other)}
+        length = len(other) + len(self)
+        return _PartiallyDefinedSequenceData(length, data)
+
+    def upper(self):
+        """Return an upper case copy of the sequence."""
+        # An upper case copy of an undefined sequence is an undefined
+        # sequence of the same length
+        return _UndefinedSequenceData(self._length)
+
+    def lower(self):
+        """Return a lower case copy of the sequence."""
+        # A lower case copy of an undefined sequence is an undefined
+        # sequence of the same length
+        return _UndefinedSequenceData(self._length)
+
+    def isupper(self):
+        """Return True if all ASCII characters in data are uppercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        # Character case is irrelevant for an undefined sequence
+        raise UndefinedSequenceError("Sequence content is undefined")
+
+    def islower(self):
+        """Return True if all ASCII characters in data are lowercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        # Character case is irrelevant for an undefined sequence
+        raise UndefinedSequenceError("Sequence content is undefined")
+
+    def replace(self, old, new):
+        """Return a copy with all occurrences of substring old replaced by new."""
+        # Replacing substring old by new in an undefined sequence will result
+        # in an undefined sequence of the same length, if old and new have the
+        # number of characters.
+        if len(old) != len(new):
+            raise UndefinedSequenceError("Sequence content is undefined")
+        return _UndefinedSequenceData(self._length)
+
+    @property
+    def defined(self):
+        """Return False, as the sequence is not defined and has a non-zero length."""
+        return False
+
+    @property
+    def defined_ranges(self):
+        """Return a tuple of the ranges where the sequence contents is defined.
+
+        As the sequence contents of an _UndefinedSequenceData object is fully
+        undefined, the return value is always an empty tuple.
+        """
+        return ()
+
+
+class _PartiallyDefinedSequenceData(SequenceDataAbstractBaseClass):
+    """Stores the length of a sequence with an undefined sequence contents (PRIVATE).
+
+    Objects of this class can be used to create a Seq object to represent
+    sequences with a known length, but with a sequence contents that is only
+    partially known.
+    Calling __len__ returns the sequence length, calling __getitem__ returns
+    the sequence contents if known, otherwise an UndefinedSequenceError is
+    raised.
+    """
+
+    __slots__ = ("_length", "_data")
+
+    def __init__(self, length, data):
+        """Initialize with the sequence length and defined sequence segments.
+
+        The calling function is responsible for ensuring that the length is
+        greater than zero.
+        """
+        self._length = length
+        self._data = data
+        super().__init__()
+
+    def __getitem__(
+        self, key: Union[slice, int]
+    ) -> Union[bytes, SequenceDataAbstractBaseClass]:
+        if isinstance(key, slice):
+            start, end, step = key.indices(self._length)
+            size = len(range(start, end, step))
+            if size == 0:
+                return b""
+            data = {}
+            for s, d in self._data.items():
+                indices = range(-s, -s + self._length)[key]
+                e: Optional[int] = indices.stop
+                assert e is not None
+                if step > 0:
+                    if e <= 0:
+                        continue
+                    if indices.start < 0:
+                        s = indices.start % step
+                    else:
+                        s = indices.start
+                else:  # step < 0
+                    if e < 0:
+                        e = None
+                    end = len(d) - 1
+                    if indices.start > end:
+                        s = end + (indices.start - end) % step
+                    else:
+                        s = indices.start
+                    if s < 0:
+                        continue
+                start = (s - indices.start) // step
+                d = d[s:e:step]
+                if d:
+                    data[start] = d
+            if len(data) == 0:  # Fully undefined sequence
+                return _UndefinedSequenceData(size)
+            # merge adjacent sequence segments
+            end = -1
+            previous = 0  # not needed here, but it keeps flake happy
+            items = data.items()
+            data = {}
+            for start, seq in items:
+                if end == start:
+                    data[previous] += seq
+                else:
+                    data[start] = seq
+                    previous = start
+                end = start + len(seq)
+            if len(data) == 1:
+                seq = data.get(0)
+                if seq is not None and len(seq) == size:
+                    return seq  # Fully defined sequence; return bytes
+            if step < 0:
+                # use this after we drop Python 3.7:
+                # data = {start: data[start] for start in reversed(data)}
+                # use this as long as we support Python 3.7:
+                data = {start: data[start] for start in reversed(list(data.keys()))}
+            return _PartiallyDefinedSequenceData(size, data)
+        elif self._length <= key:
+            raise IndexError("sequence index out of range")
+        else:
+            for start, seq in self._data.items():
+                if start <= key and key < start + len(seq):
+                    return seq[key - start]
+            raise UndefinedSequenceError("Sequence at position %d is undefined" % key)
+
+    def __len__(self):
+        return self._length
+
+    def __bytes__(self):
+        raise UndefinedSequenceError("Sequence content is only partially defined")
+
+    def __add__(self, other):
+        length = len(self) + len(other)
+        data = dict(self._data)
+        items = list(self._data.items())
+        start, seq = items[-1]
+        end = start + len(seq)
+        try:
+            other = bytes(other)
+        except UndefinedSequenceError:
+            if isinstance(other, _UndefinedSequenceData):
+                pass
+            elif isinstance(other, _PartiallyDefinedSequenceData):
+                other_items = list(other._data.items())
+                if end == len(self):
+                    other_start, other_seq = other_items.pop(0)
+                    if other_start == 0:
+                        data[start] += other_seq
+                    else:
+                        data[len(self) + other_start] = other_seq
+                for other_start, other_seq in other_items:
+                    data[len(self) + other_start] = other_seq
+        else:
+            if end == len(self):
+                data[start] += other
+            else:
+                data[len(self)] = other
+        return _PartiallyDefinedSequenceData(length, data)
+
+    def __radd__(self, other):
+        length = len(other) + len(self)
+        try:
+            other = bytes(other)
+        except UndefinedSequenceError:
+            data = {len(other) + start: seq for start, seq in self._data.items()}
+        else:
+            data = {0: other}
+            items = list(self._data.items())
+            start, seq = items.pop(0)
+            if start == 0:
+                data[0] += seq
+            else:
+                data[len(other) + start] = seq
+            for start, seq in items:
+                data[len(other) + start] = seq
+        return _PartiallyDefinedSequenceData(length, data)
+
+    def __mul__(self, other):
+        length = self._length
+        items = self._data.items()
+        data = {}
+        end = -1
+        previous = 0  # not needed here, but it keeps flake happy
+        for i in range(other):
+            for start, seq in items:
+                start += i * length
+                if end == start:
+                    data[previous] += seq
+                else:
+                    data[start] = seq
+                    previous = start
+            end = start + len(seq)
+        return _PartiallyDefinedSequenceData(length * other, data)
+
+    def upper(self):
+        """Return an upper case copy of the sequence."""
+        data = {start: seq.upper() for start, seq in self._data.items()}
+        return _PartiallyDefinedSequenceData(self._length, data)
+
+    def lower(self):
+        """Return a lower case copy of the sequence."""
+        data = {start: seq.lower() for start, seq in self._data.items()}
+        return _PartiallyDefinedSequenceData(self._length, data)
+
+    def isupper(self):
+        """Return True if all ASCII characters in data are uppercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        # Character case is irrelevant for an undefined sequence
+        raise UndefinedSequenceError("Sequence content is only partially defined")
+
+    def islower(self):
+        """Return True if all ASCII characters in data are lowercase.
+
+        If there are no cased characters, the method returns False.
+        """
+        # Character case is irrelevant for an undefined sequence
+        raise UndefinedSequenceError("Sequence content is only partially defined")
+
+    def translate(self, table, delete=b""):
+        """Return a copy with each character mapped by the given translation table.
+
+          table
+            Translation table, which must be a bytes object of length 256.
+
+        All characters occurring in the optional argument delete are removed.
+        The remaining characters are mapped through the given translation table.
+        """
+        items = self._data.items()
+        data = {start: seq.translate(table, delete) for start, seq in items}
+        return _PartiallyDefinedSequenceData(self._length, data)
+
+    def replace(self, old, new):
+        """Return a copy with all occurrences of substring old replaced by new."""
+        # Replacing substring old by new in the undefined sequence segments
+        # will result in an undefined sequence segment of the same length, if
+        # old and new have the number of characters. If not, an error is raised,
+        # as the correct start positions cannot be calculated reliably.
+        if len(old) != len(new):
+            raise UndefinedSequenceError(
+                "Sequence content is only partially defined; substring \n"
+                "replacement cannot be performed reliably"
+            )
+        items = self._data.items()
+        data = {start: seq.replace(old, new) for start, seq in items}
+        return _PartiallyDefinedSequenceData(self._length, data)
+
+    @property
+    def defined(self):
+        """Return False, as the sequence is not fully defined and has a non-zero length."""
+        return False
+
+    @property
+    def defined_ranges(self):
+        """Return a tuple of the ranges where the sequence contents is defined.
+
+        The return value has the format ((start1, end1), (start2, end2), ...).
+        """
+        return tuple((start, start + len(seq)) for start, seq in self._data.items())
+
+
+# The transcribe, backward_transcribe, and translate functions are
+# user-friendly versions of the corresponding Seq/MutableSeq methods.
+# The functions work both on Seq objects, and on strings.
+
+
+def transcribe(dna):
+    """Transcribe a DNA sequence into RNA.
+
+    Following the usual convention, the sequence is interpreted as the
+    coding strand of the DNA double helix, not the template strand. This
+    means we can get the RNA sequence just by switching T to U.
+
+    If given a string, returns a new string object.
+
+    Given a Seq or MutableSeq, returns a new Seq object.
+
+    e.g.
+
+    >>> transcribe("ACTGN")
+    'ACUGN'
+    """
+    if isinstance(dna, Seq):
+        return dna.transcribe()
+    elif isinstance(dna, MutableSeq):
+        return Seq(dna).transcribe()
+    else:
+        return dna.replace("T", "U").replace("t", "u")
+
+
+def back_transcribe(rna):
+    """Return the RNA sequence back-transcribed into DNA.
+
+    If given a string, returns a new string object.
+
+    Given a Seq or MutableSeq, returns a new Seq object.
+
+    e.g.
+
+    >>> back_transcribe("ACUGN")
+    'ACTGN'
+    """
+    if isinstance(rna, Seq):
+        return rna.back_transcribe()
+    elif isinstance(rna, MutableSeq):
+        return Seq(rna).back_transcribe()
+    else:
+        return rna.replace("U", "T").replace("u", "t")
+
+
+def _translate_str(
+    sequence, table, stop_symbol="*", to_stop=False, cds=False, pos_stop="X", gap=None
+):
+    """Translate nucleotide string into a protein string (PRIVATE).
+
+    Arguments:
+     - sequence - a string
+     - table - Which codon table to use?  This can be either a name (string),
+       an NCBI identifier (integer), or a CodonTable object (useful for
+       non-standard genetic codes).  This defaults to the "Standard" table.
+     - stop_symbol - a single character string, what to use for terminators.
+     - to_stop - boolean, should translation terminate at the first
+       in frame stop codon?  If there is no in-frame stop codon
+       then translation continues to the end.
+     - pos_stop - a single character string for a possible stop codon
+       (e.g. TAN or NNN)
+     - cds - Boolean, indicates this is a complete CDS.  If True, this
+       checks the sequence starts with a valid alternative start
+       codon (which will be translated as methionine, M), that the
+       sequence length is a multiple of three, and that there is a
+       single in frame stop codon at the end (this will be excluded
+       from the protein sequence, regardless of the to_stop option).
+       If these tests fail, an exception is raised.
+     - gap - Single character string to denote symbol used for gaps.
+       Defaults to None.
+
+    Returns a string.
+
+    e.g.
+
+    >>> from Bio.Data import CodonTable
+    >>> table = CodonTable.ambiguous_dna_by_id[1]
+    >>> _translate_str("AAA", table)
+    'K'
+    >>> _translate_str("TAR", table)
+    '*'
+    >>> _translate_str("TAN", table)
+    'X'
+    >>> _translate_str("TAN", table, pos_stop="@")
+    '@'
+    >>> _translate_str("TA?", table)
+    Traceback (most recent call last):
+       ...
+    Bio.Data.CodonTable.TranslationError: Codon 'TA?' is invalid
+
+    In a change to older versions of Biopython, partial codons are now
+    always regarded as an error (previously only checked if cds=True)
+    and will trigger a warning (likely to become an exception in a
+    future release).
+
+    If **cds=True**, the start and stop codons are checked, and the start
+    codon will be translated at methionine. The sequence must be an
+    while number of codons.
+
+    >>> _translate_str("ATGCCCTAG", table, cds=True)
+    'MP'
+    >>> _translate_str("AAACCCTAG", table, cds=True)
+    Traceback (most recent call last):
+       ...
+    Bio.Data.CodonTable.TranslationError: First codon 'AAA' is not a start codon
+    >>> _translate_str("ATGCCCTAGCCCTAG", table, cds=True)
+    Traceback (most recent call last):
+       ...
+    Bio.Data.CodonTable.TranslationError: Extra in frame stop codon 'TAG' found.
+    """
+    try:
+        table_id = int(table)
+    except ValueError:
+        # Assume it's a table name
+        # The same table can be used for RNA or DNA
+        try:
+            codon_table = CodonTable.ambiguous_generic_by_name[table]
+        except KeyError:
+            if isinstance(table, str):
+                raise ValueError(
+                    "The Bio.Seq translate methods and function DO NOT "
+                    "take a character string mapping table like the python "
+                    "string object's translate method. "
+                    "Use str(my_seq).translate(...) instead."
+                ) from None
+            else:
+                raise TypeError("table argument must be integer or string") from None
+    except (AttributeError, TypeError):
+        # Assume it's a CodonTable object
+        if isinstance(table, CodonTable.CodonTable):
+            codon_table = table
+        else:
+            raise ValueError("Bad table argument") from None
+    else:
+        # Assume it's a table ID
+        # The same table can be used for RNA or DNA
+        codon_table = CodonTable.ambiguous_generic_by_id[table_id]
+    sequence = sequence.upper()
+    amino_acids = []
+    forward_table = codon_table.forward_table
+    stop_codons = codon_table.stop_codons
+    if codon_table.nucleotide_alphabet is not None:
+        valid_letters = set(codon_table.nucleotide_alphabet.upper())
+    else:
+        # Assume the worst case, ambiguous DNA or RNA:
+        valid_letters = set(
+            IUPACData.ambiguous_dna_letters.upper()
+            + IUPACData.ambiguous_rna_letters.upper()
+        )
+    n = len(sequence)
+
+    # Check for tables with 'ambiguous' (dual-coding) stop codons:
+    dual_coding = [c for c in stop_codons if c in forward_table]
+    if dual_coding:
+        c = dual_coding[0]
+        if to_stop:
+            raise ValueError(
+                "You cannot use 'to_stop=True' with this table as it contains"
+                f" {len(dual_coding)} codon(s) which can be both STOP and an"
+                f" amino acid (e.g. '{c}' -> '{forward_table[c]}' or STOP)."
+            )
+        warnings.warn(
+            f"This table contains {len(dual_coding)} codon(s) which code(s) for"
+            f" both STOP and an amino acid (e.g. '{c}' -> '{forward_table[c]}'"
+            " or STOP). Such codons will be translated as amino acid.",
+            BiopythonWarning,
+        )
+
+    if cds:
+        if str(sequence[:3]).upper() not in codon_table.start_codons:
+            raise CodonTable.TranslationError(
+                f"First codon '{sequence[:3]}' is not a start codon"
+            )
+        if n % 3 != 0:
+            raise CodonTable.TranslationError(
+                f"Sequence length {n} is not a multiple of three"
+            )
+        if str(sequence[-3:]).upper() not in stop_codons:
+            raise CodonTable.TranslationError(
+                f"Final codon '{sequence[-3:]}' is not a stop codon"
+            )
+        # Don't translate the stop symbol, and manually translate the M
+        sequence = sequence[3:-3]
+        n -= 6
+        amino_acids = ["M"]
+    elif n % 3 != 0:
+        warnings.warn(
+            "Partial codon, len(sequence) not a multiple of three. "
+            "Explicitly trim the sequence or add trailing N before "
+            "translation. This may become an error in future.",
+            BiopythonWarning,
+        )
+    if gap is not None:
+        if not isinstance(gap, str):
+            raise TypeError("Gap character should be a single character string.")
+        elif len(gap) > 1:
+            raise ValueError("Gap character should be a single character string.")
+
+    for i in range(0, n - n % 3, 3):
+        codon = sequence[i : i + 3]
+        try:
+            amino_acids.append(forward_table[codon])
+        except (KeyError, CodonTable.TranslationError):
+            if codon in codon_table.stop_codons:
+                if cds:
+                    raise CodonTable.TranslationError(
+                        f"Extra in frame stop codon '{codon}' found."
+                    ) from None
+                if to_stop:
+                    break
+                amino_acids.append(stop_symbol)
+            elif valid_letters.issuperset(set(codon)):
+                # Possible stop codon (e.g. NNN or TAN)
+                amino_acids.append(pos_stop)
+            elif gap is not None and codon == gap * 3:
+                # Gapped translation
+                amino_acids.append(gap)
+            else:
+                raise CodonTable.TranslationError(
+                    f"Codon '{codon}' is invalid"
+                ) from None
+    return "".join(amino_acids)
+
+
+def translate(
+    sequence, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap=None
+):
+    """Translate a nucleotide sequence into amino acids.
+
+    If given a string, returns a new string object. Given a Seq or
+    MutableSeq, returns a Seq object.
+
+    Arguments:
+     - table - Which codon table to use?  This can be either a name
+       (string), an NCBI identifier (integer), or a CodonTable object
+       (useful for non-standard genetic codes).  Defaults to the "Standard"
+       table.
+     - stop_symbol - Single character string, what to use for any
+       terminators, defaults to the asterisk, "*".
+     - to_stop - Boolean, defaults to False meaning do a full
+       translation continuing on past any stop codons
+       (translated as the specified stop_symbol).  If
+       True, translation is terminated at the first in
+       frame stop codon (and the stop_symbol is not
+       appended to the returned protein sequence).
+     - cds - Boolean, indicates this is a complete CDS.  If True, this
+       checks the sequence starts with a valid alternative start
+       codon (which will be translated as methionine, M), that the
+       sequence length is a multiple of three, and that there is a
+       single in frame stop codon at the end (this will be excluded
+       from the protein sequence, regardless of the to_stop option).
+       If these tests fail, an exception is raised.
+     - gap - Single character string to denote symbol used for gaps.
+       Defaults to None.
+
+    A simple string example using the default (standard) genetic code:
+
+    >>> coding_dna = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG"
+    >>> translate(coding_dna)
+    'VAIVMGR*KGAR*'
+    >>> translate(coding_dna, stop_symbol="@")
+    'VAIVMGR@KGAR@'
+    >>> translate(coding_dna, to_stop=True)
+    'VAIVMGR'
+
+    Now using NCBI table 2, where TGA is not a stop codon:
+
+    >>> translate(coding_dna, table=2)
+    'VAIVMGRWKGAR*'
+    >>> translate(coding_dna, table=2, to_stop=True)
+    'VAIVMGRWKGAR'
+
+    In fact this example uses an alternative start codon valid under NCBI
+    table 2, GTG, which means this example is a complete valid CDS which
+    when translated should really start with methionine (not valine):
+
+    >>> translate(coding_dna, table=2, cds=True)
+    'MAIVMGRWKGAR'
+
+    Note that if the sequence has no in-frame stop codon, then the to_stop
+    argument has no effect:
+
+    >>> coding_dna2 = "GTGGCCATTGTAATGGGCCGC"
+    >>> translate(coding_dna2)
+    'VAIVMGR'
+    >>> translate(coding_dna2, to_stop=True)
+    'VAIVMGR'
+
+    NOTE - Ambiguous codons like "TAN" or "NNN" could be an amino acid
+    or a stop codon.  These are translated as "X".  Any invalid codon
+    (e.g. "TA?" or "T-A") will throw a TranslationError.
+
+    It will however translate either DNA or RNA.
+
+    NOTE - Since version 1.71 Biopython contains codon tables with 'ambiguous
+    stop codons'. These are stop codons with unambiguous sequence but which
+    have a context dependent coding as STOP or as amino acid. With these tables
+    'to_stop' must be False (otherwise a ValueError is raised). The dual
+    coding codons will always be translated as amino acid, except for
+    'cds=True', where the last codon will be translated as STOP.
+
+    >>> coding_dna3 = "ATGGCACGGAAGTGA"
+    >>> translate(coding_dna3)
+    'MARK*'
+
+    >>> translate(coding_dna3, table=27)  # Table 27: TGA -> STOP or W
+    'MARKW'
+
+    It will however raise a BiopythonWarning (not shown).
+
+    >>> translate(coding_dna3, table=27, cds=True)
+    'MARK'
+
+    >>> translate(coding_dna3, table=27, to_stop=True)
+    Traceback (most recent call last):
+       ...
+    ValueError: You cannot use 'to_stop=True' with this table ...
+    """
+    if isinstance(sequence, Seq):
+        return sequence.translate(table, stop_symbol, to_stop, cds)
+    elif isinstance(sequence, MutableSeq):
+        # Return a Seq object
+        return Seq(sequence).translate(table, stop_symbol, to_stop, cds)
+    else:
+        # Assume it's a string, return a string
+        return _translate_str(sequence, table, stop_symbol, to_stop, cds, gap=gap)
+
+
+def reverse_complement(sequence, inplace=False):
+    """Return the reverse complement as a DNA sequence.
+
+    If given a string, returns a new string object.
+    Given a Seq object, returns a new Seq object.
+    Given a MutableSeq, returns a new MutableSeq object.
+    Given a SeqRecord object, returns a new SeqRecord object.
+
+    >>> my_seq = "CGA"
+    >>> reverse_complement(my_seq)
+    'TCG'
+    >>> my_seq = Seq("CGA")
+    >>> reverse_complement(my_seq)
+    Seq('TCG')
+    >>> my_seq = MutableSeq("CGA")
+    >>> reverse_complement(my_seq)
+    MutableSeq('TCG')
+    >>> my_seq
+    MutableSeq('CGA')
+
+    Any U in the sequence is treated as a T:
+
+    >>> reverse_complement(Seq("CGAUT"))
+    Seq('AATCG')
+
+    In contrast, ``reverse_complement_rna`` returns an RNA sequence:
+
+    >>> reverse_complement_rna(Seq("CGAUT"))
+    Seq('AAUCG')
+
+    Supports and lower- and upper-case characters, and unambiguous and
+    ambiguous nucleotides. All other characters are not converted:
+
+    >>> reverse_complement("ACGTUacgtuXYZxyz")
+    'zrxZRXaacgtAACGT'
+
+    The sequence is modified in-place and returned if inplace is True:
+
+    >>> my_seq = MutableSeq("CGA")
+    >>> reverse_complement(my_seq, inplace=True)
+    MutableSeq('TCG')
+    >>> my_seq
+    MutableSeq('TCG')
+
+    As strings and ``Seq`` objects are immutable, a ``TypeError`` is
+    raised if ``reverse_complement`` is called on a ``Seq`` object with
+    ``inplace=True``.
+    """
+    from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+    if isinstance(sequence, (Seq, MutableSeq)):
+        return sequence.reverse_complement(inplace)
+    if isinstance(sequence, SeqRecord):
+        if inplace:
+            raise TypeError("SeqRecords are immutable")
+        return sequence.reverse_complement()
+    # Assume it's a string.
+    if inplace:
+        raise TypeError("strings are immutable")
+    sequence = sequence.encode("ASCII")
+    sequence = sequence.translate(_dna_complement_table)
+    sequence = sequence.decode("ASCII")
+    return sequence[::-1]
+
+
+def reverse_complement_rna(sequence, inplace=False):
+    """Return the reverse complement as an RNA sequence.
+
+    If given a string, returns a new string object.
+    Given a Seq object, returns a new Seq object.
+    Given a MutableSeq, returns a new MutableSeq object.
+    Given a SeqRecord object, returns a new SeqRecord object.
+
+    >>> my_seq = "CGA"
+    >>> reverse_complement_rna(my_seq)
+    'UCG'
+    >>> my_seq = Seq("CGA")
+    >>> reverse_complement_rna(my_seq)
+    Seq('UCG')
+    >>> my_seq = MutableSeq("CGA")
+    >>> reverse_complement_rna(my_seq)
+    MutableSeq('UCG')
+    >>> my_seq
+    MutableSeq('CGA')
+
+    Any T in the sequence is treated as a U:
+
+    >>> reverse_complement_rna(Seq("CGAUT"))
+    Seq('AAUCG')
+
+    In contrast, ``reverse_complement`` returns a DNA sequence:
+
+    >>> reverse_complement(Seq("CGAUT"), inplace=False)
+    Seq('AATCG')
+
+    Supports and lower- and upper-case characters, and unambiguous and
+    ambiguous nucleotides. All other characters are not converted:
+
+    >>> reverse_complement_rna("ACGTUacgtuXYZxyz")
+    'zrxZRXaacguAACGU'
+
+    The sequence is modified in-place and returned if inplace is True:
+
+    >>> my_seq = MutableSeq("CGA")
+    >>> reverse_complement_rna(my_seq, inplace=True)
+    MutableSeq('UCG')
+    >>> my_seq
+    MutableSeq('UCG')
+
+    As strings and ``Seq`` objects are immutable, a ``TypeError`` is
+    raised if ``reverse_complement`` is called on a ``Seq`` object with
+    ``inplace=True``.
+    """
+    from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+    if isinstance(sequence, (Seq, MutableSeq)):
+        return sequence.reverse_complement_rna(inplace)
+    if isinstance(sequence, SeqRecord):
+        if inplace:
+            raise TypeError("SeqRecords are immutable")
+        return sequence.reverse_complement_rna()
+    # Assume it's a string.
+    if inplace:
+        raise TypeError("strings are immutable")
+    sequence = sequence.encode("ASCII")
+    sequence = sequence.translate(_rna_complement_table)
+    sequence = sequence.decode("ASCII")
+    return sequence[::-1]
+
+
+def complement(sequence, inplace=False):
+    """Return the complement as a DNA sequence.
+
+    If given a string, returns a new string object.
+    Given a Seq object, returns a new Seq object.
+    Given a MutableSeq, returns a new MutableSeq object.
+    Given a SeqRecord object, returns a new SeqRecord object.
+
+    >>> my_seq = "CGA"
+    >>> complement(my_seq)
+    'GCT'
+    >>> my_seq = Seq("CGA")
+    >>> complement(my_seq)
+    Seq('GCT')
+    >>> my_seq = MutableSeq("CGA")
+    >>> complement(my_seq)
+    MutableSeq('GCT')
+    >>> my_seq
+    MutableSeq('CGA')
+
+    Any U in the sequence is treated as a T:
+
+    >>> complement(Seq("CGAUT"))
+    Seq('GCTAA')
+
+    In contrast, ``complement_rna`` returns an RNA sequence:
+
+    >>> complement_rna(Seq("CGAUT"))
+    Seq('GCUAA')
+
+    Supports and lower- and upper-case characters, and unambiguous and
+    ambiguous nucleotides. All other characters are not converted:
+
+    >>> complement("ACGTUacgtuXYZxyz")
+    'TGCAAtgcaaXRZxrz'
+
+    The sequence is modified in-place and returned if inplace is True:
+
+    >>> my_seq = MutableSeq("CGA")
+    >>> complement(my_seq, inplace=True)
+    MutableSeq('GCT')
+    >>> my_seq
+    MutableSeq('GCT')
+
+    As strings and ``Seq`` objects are immutable, a ``TypeError`` is
+    raised if ``reverse_complement`` is called on a ``Seq`` object with
+    ``inplace=True``.
+    """
+    from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+    if isinstance(sequence, (Seq, MutableSeq)):
+        return sequence.complement(inplace)
+    if isinstance(sequence, SeqRecord):
+        if inplace:
+            raise TypeError("SeqRecords are immutable")
+        return sequence.complement()
+    # Assume it's a string.
+    if inplace is True:
+        raise TypeError("strings are immutable")
+    sequence = sequence.encode("ASCII")
+    sequence = sequence.translate(_dna_complement_table)
+    return sequence.decode("ASCII")
+
+
+def complement_rna(sequence, inplace=False):
+    """Return the complement as an RNA sequence.
+
+    If given a string, returns a new string object.
+    Given a Seq object, returns a new Seq object.
+    Given a MutableSeq, returns a new MutableSeq object.
+    Given a SeqRecord object, returns a new SeqRecord object.
+
+    >>> my_seq = "CGA"
+    >>> complement_rna(my_seq)
+    'GCU'
+    >>> my_seq = Seq("CGA")
+    >>> complement_rna(my_seq)
+    Seq('GCU')
+    >>> my_seq = MutableSeq("CGA")
+    >>> complement_rna(my_seq)
+    MutableSeq('GCU')
+    >>> my_seq
+    MutableSeq('CGA')
+
+    Any T in the sequence is treated as a U:
+
+    >>> complement_rna(Seq("CGAUT"))
+    Seq('GCUAA')
+
+    In contrast, ``complement`` returns a DNA sequence:
+
+    >>> complement(Seq("CGAUT"))
+    Seq('GCTAA')
+
+    Supports and lower- and upper-case characters, and unambiguous and
+    ambiguous nucleotides. All other characters are not converted:
+
+    >>> complement_rna("ACGTUacgtuXYZxyz")
+    'UGCAAugcaaXRZxrz'
+
+    The sequence is modified in-place and returned if inplace is True:
+
+    >>> my_seq = MutableSeq("CGA")
+    >>> complement(my_seq, inplace=True)
+    MutableSeq('GCT')
+    >>> my_seq
+    MutableSeq('GCT')
+
+    As strings and ``Seq`` objects are immutable, a ``TypeError`` is
+    raised if ``reverse_complement`` is called on a ``Seq`` object with
+    ``inplace=True``.
+    """
+    from Bio.SeqRecord import SeqRecord  # Lazy to avoid circular imports
+
+    if isinstance(sequence, (Seq, MutableSeq)):
+        return sequence.complement_rna(inplace)
+    if isinstance(sequence, SeqRecord):
+        if inplace:
+            raise TypeError("SeqRecords are immutable")
+        return sequence.complement_rna()
+    # Assume it's a string.
+    if inplace:
+        raise TypeError("strings are immutable")
+    sequence = sequence.encode("ASCII")
+    sequence = sequence.translate(_rna_complement_table)
+    return sequence.decode("ASCII")
+
+
+def _test():
+    """Run the Bio.Seq module's doctests (PRIVATE)."""
+    print("Running doctests...")
+    import doctest
+
+    doctest.testmod(optionflags=doctest.IGNORE_EXCEPTION_DETAIL)
+    print("Done")
+
+
+if __name__ == "__main__":
+    _test()
author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children