jpayne@69: # Copyright 2000-2003 Jeff Chang. jpayne@69: # Copyright 2001-2008 Brad Chapman. jpayne@69: # Copyright 2005-2024 by Peter Cock. jpayne@69: # Copyright 2006-2009 Michiel de Hoon. jpayne@69: # All rights reserved. jpayne@69: # jpayne@69: # This file is part of the Biopython distribution and governed by your jpayne@69: # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". jpayne@69: # Please see the LICENSE file that should have been included as part of this jpayne@69: # package. jpayne@69: """Represent a Sequence Feature holding info about a part of a sequence. jpayne@69: jpayne@69: This is heavily modeled after the Biocorba SeqFeature objects, and jpayne@69: may be pretty biased towards GenBank stuff since I'm writing it jpayne@69: for the GenBank parser output... jpayne@69: jpayne@69: What's here: jpayne@69: jpayne@69: Base class to hold a Feature jpayne@69: ---------------------------- jpayne@69: jpayne@69: Classes: jpayne@69: - SeqFeature jpayne@69: jpayne@69: Hold information about a Reference jpayne@69: ---------------------------------- jpayne@69: jpayne@69: This is an attempt to create a General class to hold Reference type jpayne@69: information. jpayne@69: jpayne@69: Classes: jpayne@69: - Reference jpayne@69: jpayne@69: Specify locations of a feature on a Sequence jpayne@69: -------------------------------------------- jpayne@69: jpayne@69: This aims to handle, in Ewan Birney's words, 'the dreaded fuzziness issue'. jpayne@69: This has the advantages of allowing us to handle fuzzy stuff in case anyone jpayne@69: needs it, and also be compatible with BioPerl etc and BioSQL. jpayne@69: jpayne@69: Classes: jpayne@69: - Location - abstract base class of SimpleLocation and CompoundLocation. jpayne@69: - SimpleLocation - Specify the start and end location of a feature. jpayne@69: - CompoundLocation - Collection of SimpleLocation objects (for joins etc). jpayne@69: - Position - abstract base class of ExactPosition, WithinPosition, jpayne@69: BetweenPosition, AfterPosition, OneOfPosition, UncertainPosition, and jpayne@69: UnknownPosition. jpayne@69: - ExactPosition - Specify the position as being exact. jpayne@69: - WithinPosition - Specify a position occurring within some range. jpayne@69: - BetweenPosition - Specify a position occurring between a range (OBSOLETE?). jpayne@69: - BeforePosition - Specify the position as being found before some base. jpayne@69: - AfterPosition - Specify the position as being found after some base. jpayne@69: - OneOfPosition - Specify a position consisting of multiple alternative positions. jpayne@69: - UncertainPosition - Specify a specific position which is uncertain. jpayne@69: - UnknownPosition - Represents missing information like '?' in UniProt. jpayne@69: jpayne@69: jpayne@69: Exceptions: jpayne@69: - LocationParserError - Exception indicating a failure to parse a location jpayne@69: string. jpayne@69: jpayne@69: """ jpayne@69: import functools jpayne@69: import re jpayne@69: import warnings jpayne@69: from abc import ABC, abstractmethod jpayne@69: jpayne@69: from Bio import BiopythonDeprecationWarning jpayne@69: from Bio import BiopythonParserWarning jpayne@69: from Bio.Seq import MutableSeq jpayne@69: from Bio.Seq import reverse_complement jpayne@69: from Bio.Seq import Seq jpayne@69: jpayne@69: jpayne@69: # Regular expressions for location parsing jpayne@69: jpayne@69: _reference = r"(?:[a-zA-Z][a-zA-Z0-9_\.\|]*[a-zA-Z0-9]?\:)" jpayne@69: _oneof_position = r"one\-of\(\d+[,\d+]+\)" jpayne@69: jpayne@69: _oneof_location = rf"[<>]?(?:\d+|{_oneof_position})\.\.[<>]?(?:\d+|{_oneof_position})" jpayne@69: jpayne@69: _any_location = rf"({_reference}?{_oneof_location}|complement\({_oneof_location}\)|[^,]+|complement\([^,]+\))" jpayne@69: jpayne@69: _split = re.compile(_any_location).split jpayne@69: jpayne@69: assert _split("123..145")[1::2] == ["123..145"] jpayne@69: assert _split("123..145,200..209")[1::2] == ["123..145", "200..209"] jpayne@69: assert _split("one-of(200,203)..300")[1::2] == ["one-of(200,203)..300"] jpayne@69: assert _split("complement(123..145),200..209")[1::2] == [ jpayne@69: "complement(123..145)", jpayne@69: "200..209", jpayne@69: ] jpayne@69: assert _split("123..145,one-of(200,203)..209")[1::2] == [ jpayne@69: "123..145", jpayne@69: "one-of(200,203)..209", jpayne@69: ] jpayne@69: assert _split("123..145,one-of(200,203)..one-of(209,211),300")[1::2] == [ jpayne@69: "123..145", jpayne@69: "one-of(200,203)..one-of(209,211)", jpayne@69: "300", jpayne@69: ] jpayne@69: assert _split("123..145,complement(one-of(200,203)..one-of(209,211)),300")[1::2] == [ jpayne@69: "123..145", jpayne@69: "complement(one-of(200,203)..one-of(209,211))", jpayne@69: "300", jpayne@69: ] jpayne@69: assert _split("123..145,200..one-of(209,211),300")[1::2] == [ jpayne@69: "123..145", jpayne@69: "200..one-of(209,211)", jpayne@69: "300", jpayne@69: ] jpayne@69: assert _split("123..145,200..one-of(209,211)")[1::2] == [ jpayne@69: "123..145", jpayne@69: "200..one-of(209,211)", jpayne@69: ] jpayne@69: assert _split( jpayne@69: "complement(149815..150200),complement(293787..295573),NC_016402.1:6618..6676,181647..181905" jpayne@69: )[1::2] == [ jpayne@69: "complement(149815..150200)", jpayne@69: "complement(293787..295573)", jpayne@69: "NC_016402.1:6618..6676", jpayne@69: "181647..181905", jpayne@69: ] jpayne@69: jpayne@69: jpayne@69: _pair_location = r"[<>]?-?\d+\.\.[<>]?-?\d+" jpayne@69: jpayne@69: _between_location = r"\d+\^\d+" jpayne@69: jpayne@69: _within_position = r"\(\d+\.\d+\)" jpayne@69: _within_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" % ( jpayne@69: _within_position, jpayne@69: _within_position, jpayne@69: ) jpayne@69: _within_position = r"\((\d+)\.(\d+)\)" jpayne@69: _re_within_position = re.compile(_within_position) jpayne@69: assert _re_within_position.match("(3.9)") jpayne@69: jpayne@69: _oneof_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" % (_oneof_position, _oneof_position) jpayne@69: _oneof_position = r"one\-of\((\d+[,\d+]+)\)" jpayne@69: _re_oneof_position = re.compile(_oneof_position) jpayne@69: assert _re_oneof_position.match("one-of(6,9)") jpayne@69: assert not _re_oneof_position.match("one-of(3)") jpayne@69: assert _re_oneof_position.match("one-of(3,6)") jpayne@69: assert _re_oneof_position.match("one-of(3,6,9)") jpayne@69: jpayne@69: _solo_location = r"[<>]?\d+" jpayne@69: _solo_bond = r"bond\(%s\)" % _solo_location jpayne@69: jpayne@69: _re_location_category = re.compile( jpayne@69: r"^(?P%s)|(?P%s)|(?P%s)|(?P%s)|(?P%s)|(?P%s)$" jpayne@69: % ( jpayne@69: _pair_location, jpayne@69: _between_location, jpayne@69: _within_location, jpayne@69: _oneof_location, jpayne@69: _solo_bond, jpayne@69: _solo_location, jpayne@69: ) jpayne@69: ) jpayne@69: jpayne@69: jpayne@69: class LocationParserError(ValueError): jpayne@69: """Could not parse a feature location string.""" jpayne@69: jpayne@69: jpayne@69: class SeqFeature: jpayne@69: """Represent a Sequence Feature on an object. jpayne@69: jpayne@69: Attributes: jpayne@69: - location - the location of the feature on the sequence (SimpleLocation) jpayne@69: - type - the specified type of the feature (ie. CDS, exon, repeat...) jpayne@69: - id - A string identifier for the feature. jpayne@69: - qualifiers - A dictionary of qualifiers on the feature. These are jpayne@69: analogous to the qualifiers from a GenBank feature table. The keys of jpayne@69: the dictionary are qualifier names, the values are the qualifier jpayne@69: values. jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: def __init__( jpayne@69: self, jpayne@69: location=None, jpayne@69: type="", jpayne@69: id="", jpayne@69: qualifiers=None, jpayne@69: sub_features=None, jpayne@69: ): jpayne@69: """Initialize a SeqFeature on a sequence. jpayne@69: jpayne@69: location can either be a SimpleLocation (with strand argument also jpayne@69: given if required), or None. jpayne@69: jpayne@69: e.g. With no strand, on the forward strand, and on the reverse strand: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> f1 = SeqFeature(SimpleLocation(5, 10), type="domain") jpayne@69: >>> f1.location.strand == None jpayne@69: True jpayne@69: >>> f2 = SeqFeature(SimpleLocation(7, 110, strand=1), type="CDS") jpayne@69: >>> f2.location.strand == +1 jpayne@69: True jpayne@69: >>> f3 = SeqFeature(SimpleLocation(9, 108, strand=-1), type="CDS") jpayne@69: >>> f3.location.strand == -1 jpayne@69: True jpayne@69: jpayne@69: For exact start/end positions, an integer can be used (as shown above) jpayne@69: as shorthand for the ExactPosition object. For non-exact locations, the jpayne@69: SimpleLocation must be specified via the appropriate position objects. jpayne@69: """ jpayne@69: if ( jpayne@69: location is not None jpayne@69: and not isinstance(location, SimpleLocation) jpayne@69: and not isinstance(location, CompoundLocation) jpayne@69: ): jpayne@69: raise TypeError( jpayne@69: "SimpleLocation, CompoundLocation (or None) required for the location" jpayne@69: ) jpayne@69: self.location = location jpayne@69: self.type = type jpayne@69: self.id = id jpayne@69: self.qualifiers = {} jpayne@69: if qualifiers is not None: jpayne@69: self.qualifiers.update(qualifiers) jpayne@69: if sub_features is not None: jpayne@69: raise TypeError("Rather than sub_features, use a CompoundLocation") jpayne@69: jpayne@69: def _get_strand(self): jpayne@69: """Get function for the strand property (PRIVATE).""" jpayne@69: warnings.warn( jpayne@69: "Please use .location.strand rather than .strand", jpayne@69: BiopythonDeprecationWarning, jpayne@69: ) jpayne@69: return self.location.strand jpayne@69: jpayne@69: def _set_strand(self, value): jpayne@69: """Set function for the strand property (PRIVATE).""" jpayne@69: warnings.warn( jpayne@69: "Please use .location.strand rather than .strand", jpayne@69: BiopythonDeprecationWarning, jpayne@69: ) jpayne@69: try: jpayne@69: self.location.strand = value jpayne@69: except AttributeError: jpayne@69: if self.location is None: jpayne@69: if value is not None: jpayne@69: raise ValueError("Can't set strand without a location.") from None jpayne@69: else: jpayne@69: raise jpayne@69: jpayne@69: strand = property( jpayne@69: fget=_get_strand, jpayne@69: fset=_set_strand, jpayne@69: doc="Alias for the location's strand (DEPRECATED).", jpayne@69: ) jpayne@69: jpayne@69: def _get_ref(self): jpayne@69: """Get function for the reference property (PRIVATE).""" jpayne@69: warnings.warn( jpayne@69: "Please use .location.ref rather than .ref", jpayne@69: BiopythonDeprecationWarning, jpayne@69: ) jpayne@69: try: jpayne@69: return self.location.ref jpayne@69: except AttributeError: jpayne@69: return None jpayne@69: jpayne@69: def _set_ref(self, value): jpayne@69: """Set function for the reference property (PRIVATE).""" jpayne@69: warnings.warn( jpayne@69: "Please use .location.ref rather than .ref", jpayne@69: BiopythonDeprecationWarning, jpayne@69: ) jpayne@69: try: jpayne@69: self.location.ref = value jpayne@69: except AttributeError: jpayne@69: if self.location is None: jpayne@69: if value is not None: jpayne@69: raise ValueError("Can't set ref without a location.") from None jpayne@69: else: jpayne@69: raise jpayne@69: jpayne@69: ref = property( jpayne@69: fget=_get_ref, jpayne@69: fset=_set_ref, jpayne@69: doc="Alias for the location's ref (DEPRECATED).", jpayne@69: ) jpayne@69: jpayne@69: def _get_ref_db(self): jpayne@69: """Get function for the database reference property (PRIVATE).""" jpayne@69: warnings.warn( jpayne@69: "Please use .location.ref_db rather than .ref_db", jpayne@69: BiopythonDeprecationWarning, jpayne@69: ) jpayne@69: try: jpayne@69: return self.location.ref_db jpayne@69: except AttributeError: jpayne@69: return None jpayne@69: jpayne@69: def _set_ref_db(self, value): jpayne@69: """Set function for the database reference property (PRIVATE).""" jpayne@69: warnings.warn( jpayne@69: "Please use .location.ref_db rather than .ref_db", jpayne@69: BiopythonDeprecationWarning, jpayne@69: ) jpayne@69: self.location.ref_db = value jpayne@69: jpayne@69: ref_db = property( jpayne@69: fget=_get_ref_db, jpayne@69: fset=_set_ref_db, jpayne@69: doc="Alias for the location's ref_db (DEPRECATED).", jpayne@69: ) jpayne@69: jpayne@69: def __eq__(self, other): jpayne@69: """Check if two SeqFeature objects should be considered equal.""" jpayne@69: return ( jpayne@69: isinstance(other, SeqFeature) jpayne@69: and self.id == other.id jpayne@69: and self.type == other.type jpayne@69: and self.location == other.location jpayne@69: and self.qualifiers == other.qualifiers jpayne@69: ) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the feature as a string for debugging.""" jpayne@69: answer = f"{self.__class__.__name__}({self.location!r}" jpayne@69: if self.type: jpayne@69: answer += f", type={self.type!r}" jpayne@69: if self.id and self.id != "": jpayne@69: answer += f", id={self.id!r}" jpayne@69: if self.qualifiers: jpayne@69: answer += ", qualifiers=..." jpayne@69: answer += ")" jpayne@69: return answer jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return the full feature as a python string.""" jpayne@69: out = f"type: {self.type}\n" jpayne@69: out += f"location: {self.location}\n" jpayne@69: if self.id and self.id != "": jpayne@69: out += f"id: {self.id}\n" jpayne@69: out += "qualifiers:\n" jpayne@69: for qual_key in sorted(self.qualifiers): jpayne@69: out += f" Key: {qual_key}, Value: {self.qualifiers[qual_key]}\n" jpayne@69: return out jpayne@69: jpayne@69: def _shift(self, offset): jpayne@69: """Return a copy of the feature with its location shifted (PRIVATE). jpayne@69: jpayne@69: The annotation qualifiers are copied. jpayne@69: """ jpayne@69: return SeqFeature( jpayne@69: location=self.location._shift(offset), jpayne@69: type=self.type, jpayne@69: id=self.id, jpayne@69: qualifiers=self.qualifiers.copy(), jpayne@69: ) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the feature with its location flipped (PRIVATE). jpayne@69: jpayne@69: The argument length gives the length of the parent sequence. For jpayne@69: example a location 0..20 (+1 strand) with parent length 30 becomes jpayne@69: after flipping 10..30 (-1 strand). Strandless (None) or unknown jpayne@69: strand (0) remain like that - just their end points are changed. jpayne@69: jpayne@69: The annotation qualifiers are copied. jpayne@69: """ jpayne@69: return SeqFeature( jpayne@69: location=self.location._flip(length), jpayne@69: type=self.type, jpayne@69: id=self.id, jpayne@69: qualifiers=self.qualifiers.copy(), jpayne@69: ) jpayne@69: jpayne@69: def extract(self, parent_sequence, references=None): jpayne@69: """Extract the feature's sequence from supplied parent sequence. jpayne@69: jpayne@69: The parent_sequence can be a Seq like object or a string, and will jpayne@69: generally return an object of the same type. The exception to this is jpayne@69: a MutableSeq as the parent sequence will return a Seq object. jpayne@69: jpayne@69: This should cope with complex locations including complements, joins jpayne@69: and fuzzy positions. Even mixed strand features should work! This jpayne@69: also covers features on protein sequences (e.g. domains), although jpayne@69: here reverse strand features are not permitted. If the jpayne@69: location refers to other records, they must be supplied in the jpayne@69: optional dictionary references. jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL") jpayne@69: >>> f = SeqFeature(SimpleLocation(8, 15), type="domain") jpayne@69: >>> f.extract(seq) jpayne@69: Seq('VALIVIC') jpayne@69: jpayne@69: If the SimpleLocation is None, e.g. when parsing invalid locus jpayne@69: locations in the GenBank parser, extract() will raise a ValueError. jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SeqFeature jpayne@69: >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL") jpayne@69: >>> f = SeqFeature(None, type="domain") jpayne@69: >>> f.extract(seq) jpayne@69: Traceback (most recent call last): jpayne@69: ... jpayne@69: ValueError: The feature's .location is None. Check the sequence file for a valid location. jpayne@69: jpayne@69: Note - currently only compound features of type "join" are supported. jpayne@69: """ jpayne@69: if self.location is None: jpayne@69: raise ValueError( jpayne@69: "The feature's .location is None. Check the " jpayne@69: "sequence file for a valid location." jpayne@69: ) jpayne@69: return self.location.extract(parent_sequence, references=references) jpayne@69: jpayne@69: def translate( jpayne@69: self, jpayne@69: parent_sequence, jpayne@69: table="Standard", jpayne@69: start_offset=None, jpayne@69: stop_symbol="*", jpayne@69: to_stop=False, jpayne@69: cds=None, jpayne@69: gap=None, jpayne@69: ): jpayne@69: """Get a translation of the feature's sequence. jpayne@69: jpayne@69: This method is intended for CDS or other features that code proteins jpayne@69: and is a shortcut that will both extract the feature and jpayne@69: translate it, taking into account the codon_start and transl_table jpayne@69: qualifiers, if they are present. If they are not present the jpayne@69: value of the arguments "table" and "start_offset" are used. jpayne@69: jpayne@69: The "cds" parameter is set to "True" if the feature is of type jpayne@69: "CDS" but can be overridden by giving an explicit argument. jpayne@69: jpayne@69: The arguments stop_symbol, to_stop and gap have the same meaning jpayne@69: as Seq.translate, refer to that documentation for further information. jpayne@69: jpayne@69: Arguments: jpayne@69: - parent_sequence - A DNA or RNA sequence. jpayne@69: - table - Which codon table to use if there is no transl_table jpayne@69: qualifier for this feature. This can be either a name jpayne@69: (string), an NCBI identifier (integer), or a CodonTable jpayne@69: object (useful for non-standard genetic codes). This jpayne@69: defaults to the "Standard" table. jpayne@69: - start_offset - offset at which the first complete codon of a jpayne@69: coding feature can be found, relative to the first base of jpayne@69: that feature. Has a valid value of 0, 1 or 2. NOTE: this jpayne@69: uses python's 0-based numbering whereas the codon_start jpayne@69: qualifier in files from NCBI use 1-based numbering. jpayne@69: Will override a codon_start qualifier jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> seq = Seq("GGTTACACTTACCGATAATGTCTCTGATGA") jpayne@69: >>> f = SeqFeature(SimpleLocation(0, 30), type="CDS") jpayne@69: >>> f.qualifiers['transl_table'] = [11] jpayne@69: jpayne@69: Note that features of type CDS are subject to the usual jpayne@69: checks at translation. But you can override this behavior jpayne@69: by giving explicit arguments: jpayne@69: jpayne@69: >>> f.translate(seq, cds=False) jpayne@69: Seq('GYTYR*CL**') jpayne@69: jpayne@69: Now use the start_offset argument to change the frame. Note jpayne@69: this uses python 0-based numbering. jpayne@69: jpayne@69: >>> f.translate(seq, start_offset=1, cds=False) jpayne@69: Seq('VTLTDNVSD') jpayne@69: jpayne@69: Alternatively use the codon_start qualifier to do the same jpayne@69: thing. Note: this uses 1-based numbering, which is found jpayne@69: in files from NCBI. jpayne@69: jpayne@69: >>> f.qualifiers['codon_start'] = [2] jpayne@69: >>> f.translate(seq, cds=False) jpayne@69: Seq('VTLTDNVSD') jpayne@69: """ jpayne@69: # see if this feature should be translated in a different jpayne@69: # frame using the "codon_start" qualifier jpayne@69: if start_offset is None: jpayne@69: try: jpayne@69: start_offset = int(self.qualifiers["codon_start"][0]) - 1 jpayne@69: except KeyError: jpayne@69: start_offset = 0 jpayne@69: jpayne@69: if start_offset not in [0, 1, 2]: jpayne@69: raise ValueError( jpayne@69: "The start_offset must be 0, 1, or 2. " jpayne@69: f"The supplied value is {start_offset}. " jpayne@69: "Check the value of either the codon_start qualifier " jpayne@69: "or the start_offset argument" jpayne@69: ) jpayne@69: jpayne@69: feat_seq = self.extract(parent_sequence)[start_offset:] jpayne@69: codon_table = self.qualifiers.get("transl_table", [table])[0] jpayne@69: jpayne@69: if cds is None: jpayne@69: cds = self.type == "CDS" jpayne@69: jpayne@69: return feat_seq.translate( jpayne@69: table=codon_table, jpayne@69: stop_symbol=stop_symbol, jpayne@69: to_stop=to_stop, jpayne@69: cds=cds, jpayne@69: gap=gap, jpayne@69: ) jpayne@69: jpayne@69: def __bool__(self): jpayne@69: """Boolean value of an instance of this class (True). jpayne@69: jpayne@69: This behavior is for backwards compatibility, since until the jpayne@69: __len__ method was added, a SeqFeature always evaluated as True. jpayne@69: jpayne@69: Note that in comparison, Seq objects, strings, lists, etc, will all jpayne@69: evaluate to False if they have length zero. jpayne@69: jpayne@69: WARNING: The SeqFeature may in future evaluate to False when its jpayne@69: length is zero (in order to better match normal python behavior)! jpayne@69: """ jpayne@69: return True jpayne@69: jpayne@69: def __len__(self): jpayne@69: """Return the length of the region where the feature is located. jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL") jpayne@69: >>> f = SeqFeature(SimpleLocation(8, 15), type="domain") jpayne@69: >>> len(f) jpayne@69: 7 jpayne@69: >>> f.extract(seq) jpayne@69: Seq('VALIVIC') jpayne@69: >>> len(f.extract(seq)) jpayne@69: 7 jpayne@69: jpayne@69: This is a proxy for taking the length of the feature's location: jpayne@69: jpayne@69: >>> len(f.location) jpayne@69: 7 jpayne@69: jpayne@69: For simple features this is the same as the region spanned (end jpayne@69: position minus start position using Pythonic counting). However, for jpayne@69: a compound location (e.g. a CDS as the join of several exons) the jpayne@69: gaps are not counted (e.g. introns). This ensures that len(f) matches jpayne@69: len(f.extract(parent_seq)), and also makes sure things work properly jpayne@69: with features wrapping the origin etc. jpayne@69: """ jpayne@69: return len(self.location) jpayne@69: jpayne@69: def __iter__(self): jpayne@69: """Iterate over the parent positions within the feature. jpayne@69: jpayne@69: The iteration order is strand aware, and can be thought of as moving jpayne@69: along the feature using the parent sequence coordinates: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> f = SeqFeature(SimpleLocation(5, 10, strand=-1), type="domain") jpayne@69: >>> len(f) jpayne@69: 5 jpayne@69: >>> for i in f: print(i) jpayne@69: 9 jpayne@69: 8 jpayne@69: 7 jpayne@69: 6 jpayne@69: 5 jpayne@69: >>> list(f) jpayne@69: [9, 8, 7, 6, 5] jpayne@69: jpayne@69: This is a proxy for iterating over the location, jpayne@69: jpayne@69: >>> list(f.location) jpayne@69: [9, 8, 7, 6, 5] jpayne@69: """ jpayne@69: return iter(self.location) jpayne@69: jpayne@69: def __contains__(self, value): jpayne@69: """Check if an integer position is within the feature. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> f = SeqFeature(SimpleLocation(5, 10, strand=-1), type="domain") jpayne@69: >>> len(f) jpayne@69: 5 jpayne@69: >>> [i for i in range(15) if i in f] jpayne@69: [5, 6, 7, 8, 9] jpayne@69: jpayne@69: For example, to see which features include a SNP position, you could jpayne@69: use this: jpayne@69: jpayne@69: >>> from Bio import SeqIO jpayne@69: >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb") jpayne@69: >>> for f in record.features: jpayne@69: ... if 1750 in f: jpayne@69: ... print("%s %s" % (f.type, f.location)) jpayne@69: source [0:154478](+) jpayne@69: gene [1716:4347](-) jpayne@69: tRNA join{[4310:4347](-), [1716:1751](-)} jpayne@69: jpayne@69: Note that for a feature defined as a join of several subfeatures (e.g. jpayne@69: the union of several exons) the gaps are not checked (e.g. introns). jpayne@69: In this example, the tRNA location is defined in the GenBank file as jpayne@69: complement(join(1717..1751,4311..4347)), so that position 1760 falls jpayne@69: in the gap: jpayne@69: jpayne@69: >>> for f in record.features: jpayne@69: ... if 1760 in f: jpayne@69: ... print("%s %s" % (f.type, f.location)) jpayne@69: source [0:154478](+) jpayne@69: gene [1716:4347](-) jpayne@69: jpayne@69: Note that additional care may be required with fuzzy locations, for jpayne@69: example just before a BeforePosition: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SeqFeature, SimpleLocation jpayne@69: >>> from Bio.SeqFeature import BeforePosition jpayne@69: >>> f = SeqFeature(SimpleLocation(BeforePosition(3), 8), type="domain") jpayne@69: >>> len(f) jpayne@69: 5 jpayne@69: >>> [i for i in range(10) if i in f] jpayne@69: [3, 4, 5, 6, 7] jpayne@69: jpayne@69: Note that is is a proxy for testing membership on the location. jpayne@69: jpayne@69: >>> [i for i in range(10) if i in f.location] jpayne@69: [3, 4, 5, 6, 7] jpayne@69: """ jpayne@69: return value in self.location jpayne@69: jpayne@69: jpayne@69: # --- References jpayne@69: jpayne@69: jpayne@69: # TODO -- Will this hold PubMed and Medline information decently? jpayne@69: class Reference: jpayne@69: """Represent a Generic Reference object. jpayne@69: jpayne@69: Attributes: jpayne@69: - location - A list of Location objects specifying regions of jpayne@69: the sequence that the references correspond to. If no locations are jpayne@69: specified, the entire sequence is assumed. jpayne@69: - authors - A big old string, or a list split by author, of authors jpayne@69: for the reference. jpayne@69: - title - The title of the reference. jpayne@69: - journal - Journal the reference was published in. jpayne@69: - medline_id - A medline reference for the article. jpayne@69: - pubmed_id - A pubmed reference for the article. jpayne@69: - comment - A place to stick any comments about the reference. jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: def __init__(self): jpayne@69: """Initialize the class.""" jpayne@69: self.location = [] jpayne@69: self.authors = "" jpayne@69: self.consrtm = "" jpayne@69: self.title = "" jpayne@69: self.journal = "" jpayne@69: self.medline_id = "" jpayne@69: self.pubmed_id = "" jpayne@69: self.comment = "" jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return the full Reference object as a python string.""" jpayne@69: out = "" jpayne@69: for single_location in self.location: jpayne@69: out += f"location: {single_location}\n" jpayne@69: out += f"authors: {self.authors}\n" jpayne@69: if self.consrtm: jpayne@69: out += f"consrtm: {self.consrtm}\n" jpayne@69: out += f"title: {self.title}\n" jpayne@69: out += f"journal: {self.journal}\n" jpayne@69: out += f"medline id: {self.medline_id}\n" jpayne@69: out += f"pubmed id: {self.pubmed_id}\n" jpayne@69: out += f"comment: {self.comment}\n" jpayne@69: return out jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the Reference object as a string for debugging.""" jpayne@69: # TODO - Update this is __init__ later accepts values jpayne@69: return f"{self.__class__.__name__}(title={self.title!r}, ...)" jpayne@69: jpayne@69: def __eq__(self, other): jpayne@69: """Check if two Reference objects should be considered equal. jpayne@69: jpayne@69: Note prior to Biopython 1.70 the location was not compared, as jpayne@69: until then __eq__ for the SimpleLocation class was not defined. jpayne@69: """ jpayne@69: return ( jpayne@69: self.authors == other.authors jpayne@69: and self.consrtm == other.consrtm jpayne@69: and self.title == other.title jpayne@69: and self.journal == other.journal jpayne@69: and self.medline_id == other.medline_id jpayne@69: and self.pubmed_id == other.pubmed_id jpayne@69: and self.comment == other.comment jpayne@69: and self.location == other.location jpayne@69: ) jpayne@69: jpayne@69: jpayne@69: # --- Handling feature locations jpayne@69: jpayne@69: jpayne@69: class Location(ABC): jpayne@69: """Abstract base class representing a location.""" jpayne@69: jpayne@69: @abstractmethod jpayne@69: def __repr__(self): jpayne@69: """Represent the Location object as a string for debugging.""" jpayne@69: return f"{self.__class__.__name__}(...)" jpayne@69: jpayne@69: def fromstring(text, length=None, circular=False, stranded=True): jpayne@69: """Create a Location object from a string. jpayne@69: jpayne@69: This should accept any valid location string in the INSDC Feature Table jpayne@69: format (https://www.insdc.org/submitting-standards/feature-table/) as jpayne@69: used in GenBank, DDBJ and EMBL files. jpayne@69: jpayne@69: Simple examples: jpayne@69: jpayne@69: >>> Location.fromstring("123..456", 1000) jpayne@69: SimpleLocation(ExactPosition(122), ExactPosition(456), strand=1) jpayne@69: >>> Location.fromstring("complement(<123..>456)", 1000) jpayne@69: SimpleLocation(BeforePosition(122), AfterPosition(456), strand=-1) jpayne@69: jpayne@69: A more complex location using within positions, jpayne@69: jpayne@69: >>> Location.fromstring("(9.10)..(20.25)", 1000) jpayne@69: SimpleLocation(WithinPosition(8, left=8, right=9), WithinPosition(25, left=20, right=25), strand=1) jpayne@69: jpayne@69: Notice how that will act as though it has overall start 8 and end 25. jpayne@69: jpayne@69: Zero length between feature, jpayne@69: jpayne@69: >>> Location.fromstring("123^124", 1000) jpayne@69: SimpleLocation(ExactPosition(123), ExactPosition(123), strand=1) jpayne@69: jpayne@69: The expected sequence length is needed for a special case, a between jpayne@69: position at the start/end of a circular genome: jpayne@69: jpayne@69: >>> Location.fromstring("1000^1", 1000) jpayne@69: SimpleLocation(ExactPosition(1000), ExactPosition(1000), strand=1) jpayne@69: jpayne@69: Apart from this special case, between positions P^Q must have P+1==Q, jpayne@69: jpayne@69: >>> Location.fromstring("123^456", 1000) jpayne@69: Traceback (most recent call last): jpayne@69: ... jpayne@69: Bio.SeqFeature.LocationParserError: invalid feature location '123^456' jpayne@69: jpayne@69: You can optionally provide a reference name: jpayne@69: jpayne@69: >>> Location.fromstring("AL391218.9:105173..108462", 2000000) jpayne@69: SimpleLocation(ExactPosition(105172), ExactPosition(108462), strand=1, ref='AL391218.9') jpayne@69: jpayne@69: >>> Location.fromstring("<2644..159", 2868, "circular") jpayne@69: CompoundLocation([SimpleLocation(BeforePosition(2643), ExactPosition(2868), strand=1), SimpleLocation(ExactPosition(0), ExactPosition(159), strand=1)], 'join') jpayne@69: """ jpayne@69: if text.startswith("complement("): jpayne@69: if text[-1] != ")": jpayne@69: raise ValueError(f"closing bracket missing in '{text}'") jpayne@69: text = text[11:-1] jpayne@69: strand = -1 jpayne@69: elif stranded: jpayne@69: strand = 1 jpayne@69: else: jpayne@69: strand = None jpayne@69: jpayne@69: # Determine if we have a simple location or a compound location jpayne@69: if text.startswith("join("): jpayne@69: operator = "join" jpayne@69: parts = _split(text[5:-1])[1::2] jpayne@69: # assert parts[0] == "" and parts[-1] == "" jpayne@69: elif text.startswith("order("): jpayne@69: operator = "order" jpayne@69: parts = _split(text[6:-1])[1::2] jpayne@69: # assert parts[0] == "" and parts[-1] == "" jpayne@69: elif text.startswith("bond("): jpayne@69: operator = "bond" jpayne@69: parts = _split(text[5:-1])[1::2] jpayne@69: # assert parts[0] == "" and parts[-1] == "" jpayne@69: else: jpayne@69: loc = SimpleLocation.fromstring(text, length, circular) jpayne@69: loc.strand = strand jpayne@69: if strand == -1: jpayne@69: loc.parts.reverse() jpayne@69: return loc jpayne@69: locs = [] jpayne@69: for part in parts: jpayne@69: loc = SimpleLocation.fromstring(part, length, circular) jpayne@69: if loc is None: jpayne@69: break jpayne@69: if loc.strand == -1: jpayne@69: if strand == -1: jpayne@69: raise LocationParserError("double complement in '{text}'?") jpayne@69: else: jpayne@69: loc.strand = strand jpayne@69: locs.extend(loc.parts) jpayne@69: else: jpayne@69: if len(locs) == 1: jpayne@69: return loc jpayne@69: # Historically a join on the reverse strand has been represented jpayne@69: # in Biopython with both the parent SeqFeature and its children jpayne@69: # (the exons for a CDS) all given a strand of -1. Likewise, for jpayne@69: # a join feature on the forward strand they all have strand +1. jpayne@69: # However, we must also consider evil mixed strand examples like jpayne@69: # this, join(complement(69611..69724),139856..140087,140625..140650) jpayne@69: if strand == -1: jpayne@69: # Whole thing was wrapped in complement(...) jpayne@69: for loc in locs: jpayne@69: assert loc.strand == -1 jpayne@69: # Reverse the backwards order used in GenBank files jpayne@69: # with complement(join(...)) jpayne@69: locs = locs[::-1] jpayne@69: return CompoundLocation(locs, operator=operator) jpayne@69: # Not recognized jpayne@69: if "order" in text and "join" in text: jpayne@69: # See Bug 3197 jpayne@69: raise LocationParserError( jpayne@69: f"failed to parse feature location '{text}' containing a combination of 'join' and 'order' (nested operators) are illegal" jpayne@69: ) jpayne@69: jpayne@69: # See issue #937. Note that NCBI has already fixed this record. jpayne@69: if ",)" in text: jpayne@69: warnings.warn( jpayne@69: "Dropping trailing comma in malformed feature location", jpayne@69: BiopythonParserWarning, jpayne@69: ) jpayne@69: text = text.replace(",)", ")") jpayne@69: return Location.fromstring(text) jpayne@69: jpayne@69: raise LocationParserError(f"failed to parse feature location '{text}'") jpayne@69: jpayne@69: jpayne@69: class SimpleLocation(Location): jpayne@69: """Specify the location of a feature along a sequence. jpayne@69: jpayne@69: The SimpleLocation is used for simple continuous features, which can jpayne@69: be described as running from a start position to and end position jpayne@69: (optionally with a strand and reference information). More complex jpayne@69: locations made up from several non-continuous parts (e.g. a coding jpayne@69: sequence made up of several exons) are described using a SeqFeature jpayne@69: with a CompoundLocation. jpayne@69: jpayne@69: Note that the start and end location numbering follow Python's scheme, jpayne@69: thus a GenBank entry of 123..150 (one based counting) becomes a location jpayne@69: of [122:150] (zero based counting). jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> f = SimpleLocation(122, 150) jpayne@69: >>> print(f) jpayne@69: [122:150] jpayne@69: >>> print(f.start) jpayne@69: 122 jpayne@69: >>> print(f.end) jpayne@69: 150 jpayne@69: >>> print(f.strand) jpayne@69: None jpayne@69: jpayne@69: Note the strand defaults to None. If you are working with nucleotide jpayne@69: sequences you'd want to be explicit if it is the forward strand: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> f = SimpleLocation(122, 150, strand=+1) jpayne@69: >>> print(f) jpayne@69: [122:150](+) jpayne@69: >>> print(f.strand) jpayne@69: 1 jpayne@69: jpayne@69: Note that for a parent sequence of length n, the SimpleLocation jpayne@69: start and end must satisfy the inequality 0 <= start <= end <= n. jpayne@69: This means even for features on the reverse strand of a nucleotide jpayne@69: sequence, we expect the 'start' coordinate to be less than the jpayne@69: 'end'. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> r = SimpleLocation(122, 150, strand=-1) jpayne@69: >>> print(r) jpayne@69: [122:150](-) jpayne@69: >>> print(r.start) jpayne@69: 122 jpayne@69: >>> print(r.end) jpayne@69: 150 jpayne@69: >>> print(r.strand) jpayne@69: -1 jpayne@69: jpayne@69: i.e. Rather than thinking of the 'start' and 'end' biologically in a jpayne@69: strand aware manner, think of them as the 'left most' or 'minimum' jpayne@69: boundary, and the 'right most' or 'maximum' boundary of the region jpayne@69: being described. This is particularly important with compound jpayne@69: locations describing non-continuous regions. jpayne@69: jpayne@69: In the example above we have used standard exact positions, but there jpayne@69: are also specialised position objects used to represent fuzzy positions jpayne@69: as well, for example a GenBank location like complement(<123..150) jpayne@69: would use a BeforePosition object for the start. jpayne@69: """ jpayne@69: jpayne@69: def __init__(self, start, end, strand=None, ref=None, ref_db=None): jpayne@69: """Initialize the class. jpayne@69: jpayne@69: start and end arguments specify the values where the feature begins jpayne@69: and ends. These can either by any of the ``*Position`` objects that jpayne@69: inherit from Position, or can just be integers specifying the position. jpayne@69: In the case of integers, the values are assumed to be exact and are jpayne@69: converted in ExactPosition arguments. This is meant to make it easy jpayne@69: to deal with non-fuzzy ends. jpayne@69: jpayne@69: i.e. Short form: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> loc = SimpleLocation(5, 10, strand=-1) jpayne@69: >>> print(loc) jpayne@69: [5:10](-) jpayne@69: jpayne@69: Explicit form: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation, ExactPosition jpayne@69: >>> loc = SimpleLocation(ExactPosition(5), ExactPosition(10), strand=-1) jpayne@69: >>> print(loc) jpayne@69: [5:10](-) jpayne@69: jpayne@69: Other fuzzy positions are used similarly, jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> from Bio.SeqFeature import BeforePosition, AfterPosition jpayne@69: >>> loc2 = SimpleLocation(BeforePosition(5), AfterPosition(10), strand=-1) jpayne@69: >>> print(loc2) jpayne@69: [<5:>10](-) jpayne@69: jpayne@69: For nucleotide features you will also want to specify the strand, jpayne@69: use 1 for the forward (plus) strand, -1 for the reverse (negative) jpayne@69: strand, 0 for stranded but strand unknown (? in GFF3), or None for jpayne@69: when the strand does not apply (dot in GFF3), e.g. features on jpayne@69: proteins. jpayne@69: jpayne@69: >>> loc = SimpleLocation(5, 10, strand=+1) jpayne@69: >>> print(loc) jpayne@69: [5:10](+) jpayne@69: >>> print(loc.strand) jpayne@69: 1 jpayne@69: jpayne@69: Normally feature locations are given relative to the parent jpayne@69: sequence you are working with, but an explicit accession can jpayne@69: be given with the optional ref and db_ref strings: jpayne@69: jpayne@69: >>> loc = SimpleLocation(105172, 108462, ref="AL391218.9", strand=1) jpayne@69: >>> print(loc) jpayne@69: AL391218.9[105172:108462](+) jpayne@69: >>> print(loc.ref) jpayne@69: AL391218.9 jpayne@69: jpayne@69: """ jpayne@69: # TODO - Check 0 <= start <= end (<= length of reference) jpayne@69: if isinstance(start, Position): jpayne@69: self._start = start jpayne@69: elif isinstance(start, int): jpayne@69: self._start = ExactPosition(start) jpayne@69: else: jpayne@69: raise TypeError(f"start={start!r} {type(start)}") jpayne@69: if isinstance(end, Position): jpayne@69: self._end = end jpayne@69: elif isinstance(end, int): jpayne@69: self._end = ExactPosition(end) jpayne@69: else: jpayne@69: raise TypeError(f"end={end!r} {type(end)}") jpayne@69: if ( jpayne@69: isinstance(self.start, int) jpayne@69: and isinstance(self.end, int) jpayne@69: and self.start > self.end jpayne@69: ): jpayne@69: raise ValueError( jpayne@69: f"End location ({self.end}) must be greater than " jpayne@69: f"or equal to start location ({self.start})" jpayne@69: ) jpayne@69: self.strand = strand jpayne@69: self.ref = ref jpayne@69: self.ref_db = ref_db jpayne@69: jpayne@69: @staticmethod jpayne@69: def fromstring(text, length=None, circular=False): jpayne@69: """Create a SimpleLocation object from a string.""" jpayne@69: if text.startswith("complement("): jpayne@69: text = text[11:-1] jpayne@69: strand = -1 jpayne@69: else: jpayne@69: strand = None jpayne@69: # Try simple cases first for speed jpayne@69: try: jpayne@69: s, e = text.split("..") jpayne@69: s = int(s) - 1 jpayne@69: e = int(e) jpayne@69: except ValueError: jpayne@69: pass jpayne@69: else: jpayne@69: if 0 <= s < e: jpayne@69: return SimpleLocation(s, e, strand) jpayne@69: # Try general case jpayne@69: try: jpayne@69: ref, text = text.split(":") jpayne@69: except ValueError: jpayne@69: ref = None jpayne@69: m = _re_location_category.match(text) jpayne@69: if m is None: jpayne@69: raise LocationParserError(f"Could not parse feature location '{text}'") jpayne@69: for key, value in m.groupdict().items(): jpayne@69: if value is not None: jpayne@69: break jpayne@69: assert value == text jpayne@69: if key == "bond": jpayne@69: # e.g. bond(196) jpayne@69: warnings.warn( jpayne@69: "Dropping bond qualifier in feature location", jpayne@69: BiopythonParserWarning, jpayne@69: ) jpayne@69: text = text[5:-1] jpayne@69: s_pos = Position.fromstring(text, -1) jpayne@69: e_pos = Position.fromstring(text) jpayne@69: elif key == "solo": jpayne@69: # e.g. "123" jpayne@69: s_pos = Position.fromstring(text, -1) jpayne@69: e_pos = Position.fromstring(text) jpayne@69: elif key in ("pair", "within", "oneof"): jpayne@69: s, e = text.split("..") jpayne@69: # Attempt to fix features that span the origin jpayne@69: s_pos = Position.fromstring(s, -1) jpayne@69: e_pos = Position.fromstring(e) jpayne@69: if s_pos >= e_pos: jpayne@69: # There is likely a problem with origin wrapping. jpayne@69: # Create a CompoundLocation of the wrapped feature, jpayne@69: # consisting of two SimpleLocation objects to extend to jpayne@69: # the list of feature locations. jpayne@69: if not circular: jpayne@69: raise LocationParserError( jpayne@69: f"it appears that '{text}' is a feature that spans the origin, but the sequence topology is undefined" jpayne@69: ) jpayne@69: warnings.warn( jpayne@69: "Attempting to fix invalid location %r as " jpayne@69: "it looks like incorrect origin wrapping. " jpayne@69: "Please fix input file, this could have " jpayne@69: "unintended behavior." % text, jpayne@69: BiopythonParserWarning, jpayne@69: ) jpayne@69: jpayne@69: f1 = SimpleLocation(s_pos, length, strand) jpayne@69: f2 = SimpleLocation(0, e_pos, strand) jpayne@69: jpayne@69: if strand == -1: jpayne@69: # For complementary features spanning the origin jpayne@69: return f2 + f1 jpayne@69: else: jpayne@69: return f1 + f2 jpayne@69: elif key == "between": jpayne@69: # A between location like "67^68" (one based counting) is a jpayne@69: # special case (note it has zero length). In python slice jpayne@69: # notation this is 67:67, a zero length slice. See Bug 2622 jpayne@69: # Further more, on a circular genome of length N you can have jpayne@69: # a location N^1 meaning the junction at the origin. See Bug 3098. jpayne@69: # NOTE - We can imagine between locations like "2^4", but this jpayne@69: # is just "3". Similarly, "2^5" is just "3..4" jpayne@69: s, e = text.split("^") jpayne@69: s = int(s) jpayne@69: e = int(e) jpayne@69: if s + 1 == e or (s == length and e == 1): jpayne@69: s_pos = ExactPosition(s) jpayne@69: e_pos = s_pos jpayne@69: else: jpayne@69: raise LocationParserError(f"invalid feature location '{text}'") jpayne@69: if s_pos < 0: jpayne@69: raise LocationParserError( jpayne@69: f"negative starting position in feature location '{text}'" jpayne@69: ) jpayne@69: return SimpleLocation(s_pos, e_pos, strand, ref=ref) jpayne@69: jpayne@69: def _get_strand(self): jpayne@69: """Get function for the strand property (PRIVATE).""" jpayne@69: return self._strand jpayne@69: jpayne@69: def _set_strand(self, value): jpayne@69: """Set function for the strand property (PRIVATE).""" jpayne@69: if value not in [+1, -1, 0, None]: jpayne@69: raise ValueError(f"Strand should be +1, -1, 0 or None, not {value!r}") jpayne@69: self._strand = value jpayne@69: jpayne@69: strand = property( jpayne@69: fget=_get_strand, jpayne@69: fset=_set_strand, jpayne@69: doc="Strand of the location (+1, -1, 0 or None).", jpayne@69: ) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the SimpleLocation object (with python counting). jpayne@69: jpayne@69: For the simple case this uses the python splicing syntax, [122:150] jpayne@69: (zero based counting) which GenBank would call 123..150 (one based jpayne@69: counting). jpayne@69: """ jpayne@69: answer = f"[{self._start}:{self._end}]" jpayne@69: if self.ref and self.ref_db: jpayne@69: answer = f"{self.ref_db}:{self.ref}{answer}" jpayne@69: elif self.ref: jpayne@69: answer = self.ref + answer jpayne@69: # Is ref_db without ref meaningful? jpayne@69: if self.strand is None: jpayne@69: return answer jpayne@69: elif self.strand == +1: jpayne@69: return answer + "(+)" jpayne@69: elif self.strand == -1: jpayne@69: return answer + "(-)" jpayne@69: else: jpayne@69: # strand = 0, stranded but strand unknown, ? in GFF3 jpayne@69: return answer + "(?)" jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the SimpleLocation object as a string for debugging.""" jpayne@69: optional = "" jpayne@69: if self.strand is not None: jpayne@69: optional += f", strand={self.strand!r}" jpayne@69: if self.ref is not None: jpayne@69: optional += f", ref={self.ref!r}" jpayne@69: if self.ref_db is not None: jpayne@69: optional += f", ref_db={self.ref_db!r}" jpayne@69: return f"{self.__class__.__name__}({self.start!r}, {self.end!r}{optional})" jpayne@69: jpayne@69: def __add__(self, other): jpayne@69: """Combine location with another SimpleLocation object, or shift it. jpayne@69: jpayne@69: You can add two feature locations to make a join CompoundLocation: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> f1 = SimpleLocation(5, 10) jpayne@69: >>> f2 = SimpleLocation(20, 30) jpayne@69: >>> combined = f1 + f2 jpayne@69: >>> print(combined) jpayne@69: join{[5:10], [20:30]} jpayne@69: jpayne@69: This is thus equivalent to: jpayne@69: jpayne@69: >>> from Bio.SeqFeature import CompoundLocation jpayne@69: >>> join = CompoundLocation([f1, f2]) jpayne@69: >>> print(join) jpayne@69: join{[5:10], [20:30]} jpayne@69: jpayne@69: You can also use sum(...) in this way: jpayne@69: jpayne@69: >>> join = sum([f1, f2]) jpayne@69: >>> print(join) jpayne@69: join{[5:10], [20:30]} jpayne@69: jpayne@69: Furthermore, you can combine a SimpleLocation with a CompoundLocation jpayne@69: in this way. jpayne@69: jpayne@69: Separately, adding an integer will give a new SimpleLocation with jpayne@69: its start and end offset by that amount. For example: jpayne@69: jpayne@69: >>> print(f1) jpayne@69: [5:10] jpayne@69: >>> print(f1 + 100) jpayne@69: [105:110] jpayne@69: >>> print(200 + f1) jpayne@69: [205:210] jpayne@69: jpayne@69: This can be useful when editing annotation. jpayne@69: """ jpayne@69: if isinstance(other, SimpleLocation): jpayne@69: return CompoundLocation([self, other]) jpayne@69: elif isinstance(other, int): jpayne@69: return self._shift(other) jpayne@69: else: jpayne@69: # This will allow CompoundLocation's __radd__ to be called: jpayne@69: return NotImplemented jpayne@69: jpayne@69: def __radd__(self, other): jpayne@69: """Return a SimpleLocation object by shifting the location by an integer amount.""" jpayne@69: if isinstance(other, int): jpayne@69: return self._shift(other) jpayne@69: else: jpayne@69: return NotImplemented jpayne@69: jpayne@69: def __sub__(self, other): jpayne@69: """Subtracting an integer will shift the start and end by that amount. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> f1 = SimpleLocation(105, 150) jpayne@69: >>> print(f1) jpayne@69: [105:150] jpayne@69: >>> print(f1 - 100) jpayne@69: [5:50] jpayne@69: jpayne@69: This can be useful when editing annotation. You can also add an integer jpayne@69: to a feature location (which shifts in the opposite direction). jpayne@69: """ jpayne@69: if isinstance(other, int): jpayne@69: return self._shift(-other) jpayne@69: else: jpayne@69: return NotImplemented jpayne@69: jpayne@69: def __nonzero__(self): jpayne@69: """Return True regardless of the length of the feature. jpayne@69: jpayne@69: This behavior is for backwards compatibility, since until the jpayne@69: __len__ method was added, a SimpleLocation always evaluated as True. jpayne@69: jpayne@69: Note that in comparison, Seq objects, strings, lists, etc, will all jpayne@69: evaluate to False if they have length zero. jpayne@69: jpayne@69: WARNING: The SimpleLocation may in future evaluate to False when its jpayne@69: length is zero (in order to better match normal python behavior)! jpayne@69: """ jpayne@69: return True jpayne@69: jpayne@69: def __len__(self): jpayne@69: """Return the length of the region described by the SimpleLocation object. jpayne@69: jpayne@69: Note that extra care may be needed for fuzzy locations, e.g. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> from Bio.SeqFeature import BeforePosition, AfterPosition jpayne@69: >>> loc = SimpleLocation(BeforePosition(5), AfterPosition(10)) jpayne@69: >>> len(loc) jpayne@69: 5 jpayne@69: """ jpayne@69: return int(self._end) - int(self._start) jpayne@69: jpayne@69: def __contains__(self, value): jpayne@69: """Check if an integer position is within the SimpleLocation object. jpayne@69: jpayne@69: Note that extra care may be needed for fuzzy locations, e.g. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> from Bio.SeqFeature import BeforePosition, AfterPosition jpayne@69: >>> loc = SimpleLocation(BeforePosition(5), AfterPosition(10)) jpayne@69: >>> len(loc) jpayne@69: 5 jpayne@69: >>> [i for i in range(15) if i in loc] jpayne@69: [5, 6, 7, 8, 9] jpayne@69: """ jpayne@69: if not isinstance(value, int): jpayne@69: raise ValueError( jpayne@69: "Currently we only support checking for integer " jpayne@69: "positions being within a SimpleLocation." jpayne@69: ) jpayne@69: if value < self._start or value >= self._end: jpayne@69: return False jpayne@69: else: jpayne@69: return True jpayne@69: jpayne@69: def __iter__(self): jpayne@69: """Iterate over the parent positions within the SimpleLocation object. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> from Bio.SeqFeature import BeforePosition, AfterPosition jpayne@69: >>> loc = SimpleLocation(BeforePosition(5), AfterPosition(10)) jpayne@69: >>> len(loc) jpayne@69: 5 jpayne@69: >>> for i in loc: print(i) jpayne@69: 5 jpayne@69: 6 jpayne@69: 7 jpayne@69: 8 jpayne@69: 9 jpayne@69: >>> list(loc) jpayne@69: [5, 6, 7, 8, 9] jpayne@69: >>> [i for i in range(15) if i in loc] jpayne@69: [5, 6, 7, 8, 9] jpayne@69: jpayne@69: Note this is strand aware: jpayne@69: jpayne@69: >>> loc = SimpleLocation(BeforePosition(5), AfterPosition(10), strand = -1) jpayne@69: >>> list(loc) jpayne@69: [9, 8, 7, 6, 5] jpayne@69: """ jpayne@69: if self.strand == -1: jpayne@69: yield from range(self._end - 1, self._start - 1, -1) jpayne@69: else: jpayne@69: yield from range(self._start, self._end) jpayne@69: jpayne@69: def __eq__(self, other): jpayne@69: """Implement equality by comparing all the location attributes.""" jpayne@69: if not isinstance(other, SimpleLocation): jpayne@69: return False jpayne@69: return ( jpayne@69: self._start == other.start jpayne@69: and self._end == other.end jpayne@69: and self._strand == other.strand jpayne@69: and self.ref == other.ref jpayne@69: and self.ref_db == other.ref_db jpayne@69: ) jpayne@69: jpayne@69: def _shift(self, offset): jpayne@69: """Return a copy of the SimpleLocation shifted by an offset (PRIVATE). jpayne@69: jpayne@69: Returns self when location is relative to an external reference. jpayne@69: """ jpayne@69: # TODO - What if offset is a fuzzy position? jpayne@69: if self.ref or self.ref_db: jpayne@69: return self jpayne@69: return SimpleLocation( jpayne@69: start=self._start + offset, jpayne@69: end=self._end + offset, jpayne@69: strand=self.strand, jpayne@69: ) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE). jpayne@69: jpayne@69: Returns self when location is relative to an external reference. jpayne@69: """ jpayne@69: if self.ref or self.ref_db: jpayne@69: return self jpayne@69: # Note this will flip the start and end too! jpayne@69: if self.strand == +1: jpayne@69: flip_strand = -1 jpayne@69: elif self.strand == -1: jpayne@69: flip_strand = +1 jpayne@69: else: jpayne@69: # 0 or None jpayne@69: flip_strand = self.strand jpayne@69: return SimpleLocation( jpayne@69: start=self._end._flip(length), jpayne@69: end=self._start._flip(length), jpayne@69: strand=flip_strand, jpayne@69: ) jpayne@69: jpayne@69: @property jpayne@69: def parts(self): jpayne@69: """Read only list of sections (always one, the SimpleLocation object). jpayne@69: jpayne@69: This is a convenience property allowing you to write code handling jpayne@69: both SimpleLocation objects (with one part) and more complex jpayne@69: CompoundLocation objects (with multiple parts) interchangeably. jpayne@69: """ jpayne@69: return [self] jpayne@69: jpayne@69: @property jpayne@69: def start(self): jpayne@69: """Start location - left most (minimum) value, regardless of strand. jpayne@69: jpayne@69: Read only, returns an integer like position object, possibly a fuzzy jpayne@69: position. jpayne@69: """ jpayne@69: return self._start jpayne@69: jpayne@69: @property jpayne@69: def end(self): jpayne@69: """End location - right most (maximum) value, regardless of strand. jpayne@69: jpayne@69: Read only, returns an integer like position object, possibly a fuzzy jpayne@69: position. jpayne@69: """ jpayne@69: return self._end jpayne@69: jpayne@69: def extract(self, parent_sequence, references=None): jpayne@69: """Extract the sequence from supplied parent sequence using the SimpleLocation object. jpayne@69: jpayne@69: The parent_sequence can be a Seq like object or a string, and will jpayne@69: generally return an object of the same type. The exception to this is jpayne@69: a MutableSeq as the parent sequence will return a Seq object. jpayne@69: If the location refers to other records, they must be supplied jpayne@69: in the optional dictionary references. jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL") jpayne@69: >>> feature_loc = SimpleLocation(8, 15) jpayne@69: >>> feature_loc.extract(seq) jpayne@69: Seq('VALIVIC') jpayne@69: jpayne@69: """ jpayne@69: if self.ref or self.ref_db: jpayne@69: if not references: jpayne@69: raise ValueError( jpayne@69: f"Feature references another sequence ({self.ref})," jpayne@69: " references mandatory" jpayne@69: ) jpayne@69: elif self.ref not in references: jpayne@69: # KeyError? jpayne@69: raise ValueError( jpayne@69: f"Feature references another sequence ({self.ref})," jpayne@69: " not found in references" jpayne@69: ) jpayne@69: parent_sequence = references[self.ref] jpayne@69: f_seq = parent_sequence[int(self.start) : int(self.end)] jpayne@69: if isinstance(f_seq, MutableSeq): jpayne@69: f_seq = Seq(f_seq) jpayne@69: if self.strand == -1: jpayne@69: f_seq = reverse_complement(f_seq) jpayne@69: return f_seq jpayne@69: jpayne@69: jpayne@69: FeatureLocation = SimpleLocation # OBSOLETE; for backward compatability only. jpayne@69: jpayne@69: jpayne@69: class CompoundLocation(Location): jpayne@69: """For handling joins etc where a feature location has several parts.""" jpayne@69: jpayne@69: def __init__(self, parts, operator="join"): jpayne@69: """Initialize the class. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation, CompoundLocation jpayne@69: >>> f1 = SimpleLocation(10, 40, strand=+1) jpayne@69: >>> f2 = SimpleLocation(50, 59, strand=+1) jpayne@69: >>> f = CompoundLocation([f1, f2]) jpayne@69: >>> len(f) == len(f1) + len(f2) == 39 == len(list(f)) jpayne@69: True jpayne@69: >>> print(f.operator) jpayne@69: join jpayne@69: >>> 5 in f jpayne@69: False jpayne@69: >>> 15 in f jpayne@69: True jpayne@69: >>> f.strand jpayne@69: 1 jpayne@69: jpayne@69: Notice that the strand of the compound location is computed jpayne@69: automatically - in the case of mixed strands on the sub-locations jpayne@69: the overall strand is set to None. jpayne@69: jpayne@69: >>> f = CompoundLocation([SimpleLocation(3, 6, strand=+1), jpayne@69: ... SimpleLocation(10, 13, strand=-1)]) jpayne@69: >>> print(f.strand) jpayne@69: None jpayne@69: >>> len(f) jpayne@69: 6 jpayne@69: >>> list(f) jpayne@69: [3, 4, 5, 12, 11, 10] jpayne@69: jpayne@69: The example above doing list(f) iterates over the coordinates within the jpayne@69: feature. This allows you to use max and min on the location, to find the jpayne@69: range covered: jpayne@69: jpayne@69: >>> min(f) jpayne@69: 3 jpayne@69: >>> max(f) jpayne@69: 12 jpayne@69: jpayne@69: More generally, you can use the compound location's start and end which jpayne@69: give the full span covered, 0 <= start <= end <= full sequence length. jpayne@69: jpayne@69: >>> f.start == min(f) jpayne@69: True jpayne@69: >>> f.end == max(f) + 1 jpayne@69: True jpayne@69: jpayne@69: This is consistent with the behavior of the SimpleLocation for a single jpayne@69: region, where again the 'start' and 'end' do not necessarily give the jpayne@69: biological start and end, but rather the 'minimal' and 'maximal' jpayne@69: coordinate boundaries. jpayne@69: jpayne@69: Note that adding locations provides a more intuitive method of jpayne@69: construction: jpayne@69: jpayne@69: >>> f = SimpleLocation(3, 6, strand=+1) + SimpleLocation(10, 13, strand=-1) jpayne@69: >>> len(f) jpayne@69: 6 jpayne@69: >>> list(f) jpayne@69: [3, 4, 5, 12, 11, 10] jpayne@69: """ jpayne@69: self.operator = operator jpayne@69: self.parts = list(parts) jpayne@69: for loc in self.parts: jpayne@69: if not isinstance(loc, SimpleLocation): jpayne@69: raise ValueError( jpayne@69: "CompoundLocation should be given a list of " jpayne@69: "SimpleLocation objects, not %s" % loc.__class__ jpayne@69: ) jpayne@69: if len(parts) < 2: jpayne@69: raise ValueError( jpayne@69: f"CompoundLocation should have at least 2 parts, not {parts!r}" jpayne@69: ) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the CompoundLocation object (with python counting).""" jpayne@69: return "%s{%s}" % (self.operator, ", ".join(str(loc) for loc in self.parts)) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the CompoundLocation object as string for debugging.""" jpayne@69: return f"{self.__class__.__name__}({self.parts!r}, {self.operator!r})" jpayne@69: jpayne@69: def _get_strand(self): jpayne@69: """Get function for the strand property (PRIVATE).""" jpayne@69: # Historically a join on the reverse strand has been represented jpayne@69: # in Biopython with both the parent SeqFeature and its children jpayne@69: # (the exons for a CDS) all given a strand of -1. Likewise, for jpayne@69: # a join feature on the forward strand they all have strand +1. jpayne@69: # However, we must also consider evil mixed strand examples like jpayne@69: # this, join(complement(69611..69724),139856..140087,140625..140650) jpayne@69: if len({loc.strand for loc in self.parts}) == 1: jpayne@69: return self.parts[0].strand jpayne@69: else: jpayne@69: return None # i.e. mixed strands jpayne@69: jpayne@69: def _set_strand(self, value): jpayne@69: """Set function for the strand property (PRIVATE).""" jpayne@69: # Should this be allowed/encouraged? jpayne@69: for loc in self.parts: jpayne@69: loc.strand = value jpayne@69: jpayne@69: strand = property( jpayne@69: fget=_get_strand, jpayne@69: fset=_set_strand, jpayne@69: doc="""Overall strand of the compound location. jpayne@69: jpayne@69: If all the parts have the same strand, that is returned. Otherwise jpayne@69: for mixed strands, this returns None. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation, CompoundLocation jpayne@69: >>> f1 = SimpleLocation(15, 17, strand=1) jpayne@69: >>> f2 = SimpleLocation(20, 30, strand=-1) jpayne@69: >>> f = f1 + f2 jpayne@69: >>> f1.strand jpayne@69: 1 jpayne@69: >>> f2.strand jpayne@69: -1 jpayne@69: >>> f.strand jpayne@69: >>> f.strand is None jpayne@69: True jpayne@69: jpayne@69: If you set the strand of a CompoundLocation, this is applied to jpayne@69: all the parts - use with caution: jpayne@69: jpayne@69: >>> f.strand = 1 jpayne@69: >>> f1.strand jpayne@69: 1 jpayne@69: >>> f2.strand jpayne@69: 1 jpayne@69: >>> f.strand jpayne@69: 1 jpayne@69: jpayne@69: """, jpayne@69: ) jpayne@69: jpayne@69: def __add__(self, other): jpayne@69: """Combine locations, or shift the location by an integer offset. jpayne@69: jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> f1 = SimpleLocation(15, 17) + SimpleLocation(20, 30) jpayne@69: >>> print(f1) jpayne@69: join{[15:17], [20:30]} jpayne@69: jpayne@69: You can add another SimpleLocation: jpayne@69: jpayne@69: >>> print(f1 + SimpleLocation(40, 50)) jpayne@69: join{[15:17], [20:30], [40:50]} jpayne@69: >>> print(SimpleLocation(5, 10) + f1) jpayne@69: join{[5:10], [15:17], [20:30]} jpayne@69: jpayne@69: You can also add another CompoundLocation: jpayne@69: jpayne@69: >>> f2 = SimpleLocation(40, 50) + SimpleLocation(60, 70) jpayne@69: >>> print(f2) jpayne@69: join{[40:50], [60:70]} jpayne@69: >>> print(f1 + f2) jpayne@69: join{[15:17], [20:30], [40:50], [60:70]} jpayne@69: jpayne@69: Also, as with the SimpleLocation, adding an integer shifts the jpayne@69: location's coordinates by that offset: jpayne@69: jpayne@69: >>> print(f1 + 100) jpayne@69: join{[115:117], [120:130]} jpayne@69: >>> print(200 + f1) jpayne@69: join{[215:217], [220:230]} jpayne@69: >>> print(f1 + (-5)) jpayne@69: join{[10:12], [15:25]} jpayne@69: """ jpayne@69: if isinstance(other, SimpleLocation): jpayne@69: return CompoundLocation(self.parts + [other], self.operator) jpayne@69: elif isinstance(other, CompoundLocation): jpayne@69: if self.operator != other.operator: jpayne@69: # Handle join+order -> order as a special case? jpayne@69: raise ValueError( jpayne@69: f"Mixed operators {self.operator} and {other.operator}" jpayne@69: ) jpayne@69: return CompoundLocation(self.parts + other.parts, self.operator) jpayne@69: elif isinstance(other, int): jpayne@69: return self._shift(other) jpayne@69: else: jpayne@69: raise NotImplementedError jpayne@69: jpayne@69: def __radd__(self, other): jpayne@69: """Add a feature to the left.""" jpayne@69: if isinstance(other, SimpleLocation): jpayne@69: return CompoundLocation([other] + self.parts, self.operator) jpayne@69: elif isinstance(other, int): jpayne@69: return self._shift(other) jpayne@69: else: jpayne@69: raise NotImplementedError jpayne@69: jpayne@69: def __contains__(self, value): jpayne@69: """Check if an integer position is within the CompoundLocation object.""" jpayne@69: for loc in self.parts: jpayne@69: if value in loc: jpayne@69: return True jpayne@69: return False jpayne@69: jpayne@69: def __nonzero__(self): jpayne@69: """Return True regardless of the length of the feature. jpayne@69: jpayne@69: This behavior is for backwards compatibility, since until the jpayne@69: __len__ method was added, a SimpleLocation always evaluated as True. jpayne@69: jpayne@69: Note that in comparison, Seq objects, strings, lists, etc, will all jpayne@69: evaluate to False if they have length zero. jpayne@69: jpayne@69: WARNING: The SimpleLocation may in future evaluate to False when its jpayne@69: length is zero (in order to better match normal python behavior)! jpayne@69: """ jpayne@69: return True jpayne@69: jpayne@69: def __len__(self): jpayne@69: """Return the length of the CompoundLocation object.""" jpayne@69: return sum(len(loc) for loc in self.parts) jpayne@69: jpayne@69: def __iter__(self): jpayne@69: """Iterate over the parent positions within the CompoundLocation object.""" jpayne@69: for loc in self.parts: jpayne@69: yield from loc jpayne@69: jpayne@69: def __eq__(self, other): jpayne@69: """Check if all parts of CompoundLocation are equal to all parts of other CompoundLocation.""" jpayne@69: if not isinstance(other, CompoundLocation): jpayne@69: return False jpayne@69: if len(self.parts) != len(other.parts): jpayne@69: return False jpayne@69: if self.operator != other.operator: jpayne@69: return False jpayne@69: for self_part, other_part in zip(self.parts, other.parts): jpayne@69: if self_part != other_part: jpayne@69: return False jpayne@69: return True jpayne@69: jpayne@69: def _shift(self, offset): jpayne@69: """Return a copy of the CompoundLocation shifted by an offset (PRIVATE).""" jpayne@69: return CompoundLocation( jpayne@69: [loc._shift(offset) for loc in self.parts], self.operator jpayne@69: ) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the locations after the parent is reversed (PRIVATE). jpayne@69: jpayne@69: Note that the order of the parts is NOT reversed too. Consider a CDS jpayne@69: on the forward strand with exons small, medium and large (in length). jpayne@69: Once we change the frame of reference to the reverse complement strand, jpayne@69: the start codon is still part of the small exon, and the stop codon jpayne@69: still part of the large exon - so the part order remains the same! jpayne@69: jpayne@69: Here is an artificial example, were the features map to the two upper jpayne@69: case regions and the lower case runs of n are not used: jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SimpleLocation jpayne@69: >>> dna = Seq("nnnnnAGCATCCTGCTGTACnnnnnnnnGAGAMTGCCATGCCCCTGGAGTGAnnnnn") jpayne@69: >>> small = SimpleLocation(5, 20, strand=1) jpayne@69: >>> large = SimpleLocation(28, 52, strand=1) jpayne@69: >>> location = small + large jpayne@69: >>> print(small) jpayne@69: [5:20](+) jpayne@69: >>> print(large) jpayne@69: [28:52](+) jpayne@69: >>> print(location) jpayne@69: join{[5:20](+), [28:52](+)} jpayne@69: >>> for part in location.parts: jpayne@69: ... print(len(part)) jpayne@69: ... jpayne@69: 15 jpayne@69: 24 jpayne@69: jpayne@69: As you can see, this is a silly example where each "exon" is a word: jpayne@69: jpayne@69: >>> print(small.extract(dna).translate()) jpayne@69: SILLY jpayne@69: >>> print(large.extract(dna).translate()) jpayne@69: EXAMPLE* jpayne@69: >>> print(location.extract(dna).translate()) jpayne@69: SILLYEXAMPLE* jpayne@69: >>> for part in location.parts: jpayne@69: ... print(part.extract(dna).translate()) jpayne@69: ... jpayne@69: SILLY jpayne@69: EXAMPLE* jpayne@69: jpayne@69: Now, let's look at this from the reverse strand frame of reference: jpayne@69: jpayne@69: >>> flipped_dna = dna.reverse_complement() jpayne@69: >>> flipped_location = location._flip(len(dna)) jpayne@69: >>> print(flipped_location.extract(flipped_dna).translate()) jpayne@69: SILLYEXAMPLE* jpayne@69: >>> for part in flipped_location.parts: jpayne@69: ... print(part.extract(flipped_dna).translate()) jpayne@69: ... jpayne@69: SILLY jpayne@69: EXAMPLE* jpayne@69: jpayne@69: The key point here is the first part of the CompoundFeature is still the jpayne@69: small exon, while the second part is still the large exon: jpayne@69: jpayne@69: >>> for part in flipped_location.parts: jpayne@69: ... print(len(part)) jpayne@69: ... jpayne@69: 15 jpayne@69: 24 jpayne@69: >>> print(flipped_location) jpayne@69: join{[37:52](-), [5:29](-)} jpayne@69: jpayne@69: Notice the parts are not reversed. However, there was a bug here in older jpayne@69: versions of Biopython which would have given join{[5:29](-), [37:52](-)} jpayne@69: and the translation would have wrongly been "EXAMPLE*SILLY" instead. jpayne@69: jpayne@69: """ jpayne@69: return CompoundLocation( jpayne@69: [loc._flip(length) for loc in self.parts], self.operator jpayne@69: ) jpayne@69: jpayne@69: @property jpayne@69: def start(self): jpayne@69: """Start location - left most (minimum) value, regardless of strand. jpayne@69: jpayne@69: Read only, returns an integer like position object, possibly a fuzzy jpayne@69: position. jpayne@69: jpayne@69: For the special case of a CompoundLocation wrapping the origin of a jpayne@69: circular genome, this will return zero. jpayne@69: """ jpayne@69: return min(loc.start for loc in self.parts) jpayne@69: jpayne@69: @property jpayne@69: def end(self): jpayne@69: """End location - right most (maximum) value, regardless of strand. jpayne@69: jpayne@69: Read only, returns an integer like position object, possibly a fuzzy jpayne@69: position. jpayne@69: jpayne@69: For the special case of a CompoundLocation wrapping the origin of jpayne@69: a circular genome this will match the genome length. jpayne@69: """ jpayne@69: return max(loc.end for loc in self.parts) jpayne@69: jpayne@69: @property jpayne@69: def ref(self): jpayne@69: """Not present in CompoundLocation, dummy method for API compatibility.""" jpayne@69: return None jpayne@69: jpayne@69: @property jpayne@69: def ref_db(self): jpayne@69: """Not present in CompoundLocation, dummy method for API compatibility.""" jpayne@69: return None jpayne@69: jpayne@69: def extract(self, parent_sequence, references=None): jpayne@69: """Extract the sequence from supplied parent sequence using the CompoundLocation object. jpayne@69: jpayne@69: The parent_sequence can be a Seq like object or a string, and will jpayne@69: generally return an object of the same type. The exception to this is jpayne@69: a MutableSeq as the parent sequence will return a Seq object. jpayne@69: If the location refers to other records, they must be supplied jpayne@69: in the optional dictionary references. jpayne@69: jpayne@69: >>> from Bio.Seq import Seq jpayne@69: >>> from Bio.SeqFeature import SimpleLocation, CompoundLocation jpayne@69: >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL") jpayne@69: >>> fl1 = SimpleLocation(2, 8) jpayne@69: >>> fl2 = SimpleLocation(10, 15) jpayne@69: >>> fl3 = CompoundLocation([fl1,fl2]) jpayne@69: >>> fl3.extract(seq) jpayne@69: Seq('QHKAMILIVIC') jpayne@69: jpayne@69: """ jpayne@69: # This copes with mixed strand features & all on reverse: jpayne@69: parts = [ jpayne@69: loc.extract(parent_sequence, references=references) for loc in self.parts jpayne@69: ] jpayne@69: f_seq = functools.reduce(lambda x, y: x + y, parts) jpayne@69: return f_seq jpayne@69: jpayne@69: jpayne@69: class Position(ABC): jpayne@69: """Abstract base class representing a position.""" jpayne@69: jpayne@69: @abstractmethod jpayne@69: def __repr__(self): jpayne@69: """Represent the Position object as a string for debugging.""" jpayne@69: return f"{self.__class__.__name__}(...)" jpayne@69: jpayne@69: @staticmethod jpayne@69: def fromstring(text, offset=0): jpayne@69: """Build a Position object from the text string. jpayne@69: jpayne@69: For an end position, leave offset as zero (default): jpayne@69: jpayne@69: >>> Position.fromstring("5") jpayne@69: ExactPosition(5) jpayne@69: jpayne@69: For a start position, set offset to minus one (for Python counting): jpayne@69: jpayne@69: >>> Position.fromstring("5", -1) jpayne@69: ExactPosition(4) jpayne@69: jpayne@69: This also covers fuzzy positions: jpayne@69: jpayne@69: >>> p = Position.fromstring("<5") jpayne@69: >>> p jpayne@69: BeforePosition(5) jpayne@69: >>> print(p) jpayne@69: <5 jpayne@69: >>> int(p) jpayne@69: 5 jpayne@69: jpayne@69: >>> Position.fromstring(">5") jpayne@69: AfterPosition(5) jpayne@69: jpayne@69: By default assumes an end position, so note the integer behavior: jpayne@69: jpayne@69: >>> p = Position.fromstring("one-of(5,8,11)") jpayne@69: >>> p jpayne@69: OneOfPosition(11, choices=[ExactPosition(5), ExactPosition(8), ExactPosition(11)]) jpayne@69: >>> print(p) jpayne@69: one-of(5,8,11) jpayne@69: >>> int(p) jpayne@69: 11 jpayne@69: jpayne@69: >>> Position.fromstring("(8.10)") jpayne@69: WithinPosition(10, left=8, right=10) jpayne@69: jpayne@69: Fuzzy start positions: jpayne@69: jpayne@69: >>> p = Position.fromstring("<5", -1) jpayne@69: >>> p jpayne@69: BeforePosition(4) jpayne@69: >>> print(p) jpayne@69: <4 jpayne@69: >>> int(p) jpayne@69: 4 jpayne@69: jpayne@69: Notice how the integer behavior changes too! jpayne@69: jpayne@69: >>> p = Position.fromstring("one-of(5,8,11)", -1) jpayne@69: >>> p jpayne@69: OneOfPosition(4, choices=[ExactPosition(4), ExactPosition(7), ExactPosition(10)]) jpayne@69: >>> print(p) jpayne@69: one-of(4,7,10) jpayne@69: >>> int(p) jpayne@69: 4 jpayne@69: jpayne@69: """ jpayne@69: if offset != 0 and offset != -1: jpayne@69: raise ValueError( jpayne@69: "To convert one-based indices to zero-based indices, offset must be either 0 (for end positions) or -1 (for start positions)." jpayne@69: ) jpayne@69: if text == "?": jpayne@69: return UnknownPosition() jpayne@69: if text.startswith("?"): jpayne@69: return UncertainPosition(int(text[1:]) + offset) jpayne@69: if text.startswith("<"): jpayne@69: return BeforePosition(int(text[1:]) + offset) jpayne@69: if text.startswith(">"): jpayne@69: return AfterPosition(int(text[1:]) + offset) jpayne@69: m = _re_within_position.match(text) jpayne@69: if m is not None: jpayne@69: s, e = m.groups() jpayne@69: s = int(s) + offset jpayne@69: e = int(e) + offset jpayne@69: if offset == -1: jpayne@69: default = s jpayne@69: else: jpayne@69: default = e jpayne@69: return WithinPosition(default, left=s, right=e) jpayne@69: m = _re_oneof_position.match(text) jpayne@69: if m is not None: jpayne@69: positions = m.groups()[0] jpayne@69: parts = [ExactPosition(int(pos) + offset) for pos in positions.split(",")] jpayne@69: if offset == -1: jpayne@69: default = min(int(pos) for pos in parts) jpayne@69: else: jpayne@69: default = max(int(pos) for pos in parts) jpayne@69: return OneOfPosition(default, choices=parts) jpayne@69: return ExactPosition(int(text) + offset) jpayne@69: jpayne@69: jpayne@69: class ExactPosition(int, Position): jpayne@69: """Specify the specific position of a boundary. jpayne@69: jpayne@69: Arguments: jpayne@69: - position - The position of the boundary. jpayne@69: - extension - An optional argument which must be zero since we don't jpayne@69: have an extension. The argument is provided so that the same number jpayne@69: of arguments can be passed to all position types. jpayne@69: jpayne@69: In this case, there is no fuzziness associated with the position. jpayne@69: jpayne@69: >>> p = ExactPosition(5) jpayne@69: >>> p jpayne@69: ExactPosition(5) jpayne@69: >>> print(p) jpayne@69: 5 jpayne@69: jpayne@69: >>> isinstance(p, Position) jpayne@69: True jpayne@69: >>> isinstance(p, int) jpayne@69: True jpayne@69: jpayne@69: Integer comparisons and operations should work as expected: jpayne@69: jpayne@69: >>> p == 5 jpayne@69: True jpayne@69: >>> p < 6 jpayne@69: True jpayne@69: >>> p <= 5 jpayne@69: True jpayne@69: >>> p + 10 jpayne@69: ExactPosition(15) jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: def __new__(cls, position, extension=0): jpayne@69: """Create an ExactPosition object.""" jpayne@69: if extension != 0: jpayne@69: raise AttributeError(f"Non-zero extension {extension} for exact position.") jpayne@69: return int.__new__(cls, position) jpayne@69: jpayne@69: # Must define this on Python 3.8 onwards because we redefine __repr__ jpayne@69: def __str__(self): jpayne@69: """Return a representation of the ExactPosition object (with python counting).""" jpayne@69: return str(int(self)) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the ExactPosition object as a string for debugging.""" jpayne@69: return "%s(%i)" % (self.__class__.__name__, int(self)) jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted (PRIVATE).""" jpayne@69: # By default preserve any subclass jpayne@69: return self.__class__(int(self) + offset) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: # By default preserve any subclass jpayne@69: return self.__class__(length - int(self)) jpayne@69: jpayne@69: jpayne@69: class UncertainPosition(ExactPosition): jpayne@69: """Specify a specific position which is uncertain. jpayne@69: jpayne@69: This is used in UniProt, e.g. ?222 for uncertain position 222, or in the jpayne@69: XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL. jpayne@69: """ jpayne@69: jpayne@69: jpayne@69: class UnknownPosition(Position): jpayne@69: """Specify a specific position which is unknown (has no position). jpayne@69: jpayne@69: This is used in UniProt, e.g. ? or in the XML as unknown. jpayne@69: """ jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the UnknownPosition object as a string for debugging.""" jpayne@69: return f"{self.__class__.__name__}()" jpayne@69: jpayne@69: def __hash__(self): jpayne@69: """Return the hash value of the UnknownPosition object.""" jpayne@69: return hash(None) jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted (PRIVATE).""" jpayne@69: return self jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: return self jpayne@69: jpayne@69: jpayne@69: class WithinPosition(int, Position): jpayne@69: """Specify the position of a boundary within some coordinates. jpayne@69: jpayne@69: Arguments: jpayne@69: - position - The default integer position jpayne@69: - left - The start (left) position of the boundary jpayne@69: - right - The end (right) position of the boundary jpayne@69: jpayne@69: This allows dealing with a location like ((11.14)..100). This jpayne@69: indicates that the start of the sequence is somewhere between 11 jpayne@69: and 14. Since this is a start coordinate, it should act like jpayne@69: it is at position 11 (or in Python counting, 10). jpayne@69: jpayne@69: >>> p = WithinPosition(10, 10, 13) jpayne@69: >>> p jpayne@69: WithinPosition(10, left=10, right=13) jpayne@69: >>> print(p) jpayne@69: (10.13) jpayne@69: >>> int(p) jpayne@69: 10 jpayne@69: jpayne@69: Basic integer comparisons and operations should work as though jpayne@69: this were a plain integer: jpayne@69: jpayne@69: >>> p == 10 jpayne@69: True jpayne@69: >>> p in [9, 10, 11] jpayne@69: True jpayne@69: >>> p < 11 jpayne@69: True jpayne@69: >>> p + 10 jpayne@69: WithinPosition(20, left=20, right=23) jpayne@69: jpayne@69: >>> isinstance(p, WithinPosition) jpayne@69: True jpayne@69: >>> isinstance(p, Position) jpayne@69: True jpayne@69: >>> isinstance(p, int) jpayne@69: True jpayne@69: jpayne@69: Note this also applies for comparison to other position objects, jpayne@69: where again the integer behavior is used: jpayne@69: jpayne@69: >>> p == 10 jpayne@69: True jpayne@69: >>> p == ExactPosition(10) jpayne@69: True jpayne@69: >>> p == BeforePosition(10) jpayne@69: True jpayne@69: >>> p == AfterPosition(10) jpayne@69: True jpayne@69: jpayne@69: If this were an end point, you would want the position to be 13 jpayne@69: (the right/larger value, not the left/smaller value as above): jpayne@69: jpayne@69: >>> p2 = WithinPosition(13, 10, 13) jpayne@69: >>> p2 jpayne@69: WithinPosition(13, left=10, right=13) jpayne@69: >>> print(p2) jpayne@69: (10.13) jpayne@69: >>> int(p2) jpayne@69: 13 jpayne@69: >>> p2 == 13 jpayne@69: True jpayne@69: >>> p2 == ExactPosition(13) jpayne@69: True jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: def __new__(cls, position, left, right): jpayne@69: """Create a WithinPosition object.""" jpayne@69: if not (position == left or position == right): jpayne@69: raise RuntimeError( jpayne@69: "WithinPosition: %r should match left %r or " jpayne@69: "right %r" % (position, left, right) jpayne@69: ) jpayne@69: obj = int.__new__(cls, position) jpayne@69: obj._left = left jpayne@69: obj._right = right jpayne@69: return obj jpayne@69: jpayne@69: def __getnewargs__(self): jpayne@69: """Return the arguments accepted by __new__. jpayne@69: jpayne@69: Necessary to allow pickling and unpickling of class instances. jpayne@69: """ jpayne@69: return (int(self), self._left, self._right) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the WithinPosition object as a string for debugging.""" jpayne@69: return "%s(%i, left=%i, right=%i)" % ( jpayne@69: self.__class__.__name__, jpayne@69: int(self), jpayne@69: self._left, jpayne@69: self._right, jpayne@69: ) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the WithinPosition object (with python counting).""" jpayne@69: return f"({self._left}.{self._right})" jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted.""" jpayne@69: return self.__class__( jpayne@69: int(self) + offset, self._left + offset, self._right + offset jpayne@69: ) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: return self.__class__( jpayne@69: length - int(self), length - self._right, length - self._left jpayne@69: ) jpayne@69: jpayne@69: jpayne@69: class BetweenPosition(int, Position): jpayne@69: """Specify the position of a boundary between two coordinates (OBSOLETE?). jpayne@69: jpayne@69: Arguments: jpayne@69: - position - The default integer position jpayne@69: - left - The start (left) position of the boundary jpayne@69: - right - The end (right) position of the boundary jpayne@69: jpayne@69: This allows dealing with a position like 123^456. This jpayne@69: indicates that the start of the sequence is somewhere between jpayne@69: 123 and 456. It is up to the parser to set the position argument jpayne@69: to either boundary point (depending on if this is being used as jpayne@69: a start or end of the feature). For example as a feature end: jpayne@69: jpayne@69: >>> p = BetweenPosition(456, 123, 456) jpayne@69: >>> p jpayne@69: BetweenPosition(456, left=123, right=456) jpayne@69: >>> print(p) jpayne@69: (123^456) jpayne@69: >>> int(p) jpayne@69: 456 jpayne@69: jpayne@69: Integer equality and comparison use the given position, jpayne@69: jpayne@69: >>> p == 456 jpayne@69: True jpayne@69: >>> p in [455, 456, 457] jpayne@69: True jpayne@69: >>> p > 300 jpayne@69: True jpayne@69: jpayne@69: The old legacy properties of position and extension give the jpayne@69: starting/lower/left position as an integer, and the distance jpayne@69: to the ending/higher/right position as an integer. Note that jpayne@69: the position object will act like either the left or the right jpayne@69: end-point depending on how it was created: jpayne@69: jpayne@69: >>> p2 = BetweenPosition(123, left=123, right=456) jpayne@69: >>> int(p) == int(p2) jpayne@69: False jpayne@69: >>> p == 456 jpayne@69: True jpayne@69: >>> p2 == 123 jpayne@69: True jpayne@69: jpayne@69: Note this potentially surprising behavior: jpayne@69: jpayne@69: >>> BetweenPosition(123, left=123, right=456) == ExactPosition(123) jpayne@69: True jpayne@69: >>> BetweenPosition(123, left=123, right=456) == BeforePosition(123) jpayne@69: True jpayne@69: >>> BetweenPosition(123, left=123, right=456) == AfterPosition(123) jpayne@69: True jpayne@69: jpayne@69: i.e. For equality (and sorting) the position objects behave like jpayne@69: integers. jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: def __new__(cls, position, left, right): jpayne@69: """Create a new instance in BetweenPosition object.""" jpayne@69: assert position == left or position == right jpayne@69: # TODO - public API for getting left/right, especially the unknown one jpayne@69: obj = int.__new__(cls, position) jpayne@69: obj._left = left jpayne@69: obj._right = right jpayne@69: return obj jpayne@69: jpayne@69: def __getnewargs__(self): jpayne@69: """Return the arguments accepted by __new__. jpayne@69: jpayne@69: Necessary to allow pickling and unpickling of class instances. jpayne@69: """ jpayne@69: return (int(self), self._left, self._right) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the BetweenPosition object as a string for debugging.""" jpayne@69: return "%s(%i, left=%i, right=%i)" % ( jpayne@69: self.__class__.__name__, jpayne@69: int(self), jpayne@69: self._left, jpayne@69: self._right, jpayne@69: ) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the BetweenPosition object (with python counting).""" jpayne@69: return f"({self._left}^{self._right})" jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted (PRIVATE).""" jpayne@69: return self.__class__( jpayne@69: int(self) + offset, self._left + offset, self._right + offset jpayne@69: ) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: return self.__class__( jpayne@69: length - int(self), length - self._right, length - self._left jpayne@69: ) jpayne@69: jpayne@69: jpayne@69: class BeforePosition(int, Position): jpayne@69: """Specify a position where the actual location occurs before it. jpayne@69: jpayne@69: Arguments: jpayne@69: - position - The upper boundary of where the location can occur. jpayne@69: - extension - An optional argument which must be zero since we don't jpayne@69: have an extension. The argument is provided so that the same number jpayne@69: of arguments can be passed to all position types. jpayne@69: jpayne@69: This is used to specify positions like (<10..100) where the location jpayne@69: occurs somewhere before position 10. jpayne@69: jpayne@69: >>> p = BeforePosition(5) jpayne@69: >>> p jpayne@69: BeforePosition(5) jpayne@69: >>> print(p) jpayne@69: <5 jpayne@69: >>> int(p) jpayne@69: 5 jpayne@69: >>> p + 10 jpayne@69: BeforePosition(15) jpayne@69: jpayne@69: Note this potentially surprising behavior: jpayne@69: jpayne@69: >>> p == ExactPosition(5) jpayne@69: True jpayne@69: >>> p == AfterPosition(5) jpayne@69: True jpayne@69: jpayne@69: Just remember that for equality and sorting the position objects act jpayne@69: like integers. jpayne@69: """ jpayne@69: jpayne@69: # Subclasses int so can't use __init__ jpayne@69: def __new__(cls, position, extension=0): jpayne@69: """Create a new instance in BeforePosition object.""" jpayne@69: if extension != 0: jpayne@69: raise AttributeError(f"Non-zero extension {extension} for exact position.") jpayne@69: return int.__new__(cls, position) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the location as a string for debugging.""" jpayne@69: return "%s(%i)" % (self.__class__.__name__, int(self)) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the BeforePosition object (with python counting).""" jpayne@69: return f"<{int(self)}" jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted (PRIVATE).""" jpayne@69: return self.__class__(int(self) + offset) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: return AfterPosition(length - int(self)) jpayne@69: jpayne@69: jpayne@69: class AfterPosition(int, Position): jpayne@69: """Specify a position where the actual location is found after it. jpayne@69: jpayne@69: Arguments: jpayne@69: - position - The lower boundary of where the location can occur. jpayne@69: - extension - An optional argument which must be zero since we don't jpayne@69: have an extension. The argument is provided so that the same number jpayne@69: of arguments can be passed to all position types. jpayne@69: jpayne@69: This is used to specify positions like (>10..100) where the location jpayne@69: occurs somewhere after position 10. jpayne@69: jpayne@69: >>> p = AfterPosition(7) jpayne@69: >>> p jpayne@69: AfterPosition(7) jpayne@69: >>> print(p) jpayne@69: >7 jpayne@69: >>> int(p) jpayne@69: 7 jpayne@69: >>> p + 10 jpayne@69: AfterPosition(17) jpayne@69: jpayne@69: >>> isinstance(p, AfterPosition) jpayne@69: True jpayne@69: >>> isinstance(p, Position) jpayne@69: True jpayne@69: >>> isinstance(p, int) jpayne@69: True jpayne@69: jpayne@69: Note this potentially surprising behavior: jpayne@69: jpayne@69: >>> p == ExactPosition(7) jpayne@69: True jpayne@69: >>> p == BeforePosition(7) jpayne@69: True jpayne@69: jpayne@69: Just remember that for equality and sorting the position objects act jpayne@69: like integers. jpayne@69: """ jpayne@69: jpayne@69: # Subclasses int so can't use __init__ jpayne@69: def __new__(cls, position, extension=0): jpayne@69: """Create a new instance of the AfterPosition object.""" jpayne@69: if extension != 0: jpayne@69: raise AttributeError(f"Non-zero extension {extension} for exact position.") jpayne@69: return int.__new__(cls, position) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the location as a string for debugging.""" jpayne@69: return "%s(%i)" % (self.__class__.__name__, int(self)) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the AfterPosition object (with python counting).""" jpayne@69: return f">{int(self)}" jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted (PRIVATE).""" jpayne@69: return self.__class__(int(self) + offset) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: return BeforePosition(length - int(self)) jpayne@69: jpayne@69: jpayne@69: class OneOfPosition(int, Position): jpayne@69: """Specify a position where the location can be multiple positions. jpayne@69: jpayne@69: This models the GenBank 'one-of(1888,1901)' function, and tries jpayne@69: to make this fit within the Biopython Position models. If this was jpayne@69: a start position it should act like 1888, but as an end position 1901. jpayne@69: jpayne@69: >>> p = OneOfPosition(1888, [ExactPosition(1888), ExactPosition(1901)]) jpayne@69: >>> p jpayne@69: OneOfPosition(1888, choices=[ExactPosition(1888), ExactPosition(1901)]) jpayne@69: >>> int(p) jpayne@69: 1888 jpayne@69: jpayne@69: Integer comparisons and operators act like using int(p), jpayne@69: jpayne@69: >>> p == 1888 jpayne@69: True jpayne@69: >>> p <= 1888 jpayne@69: True jpayne@69: >>> p > 1888 jpayne@69: False jpayne@69: >>> p + 100 jpayne@69: OneOfPosition(1988, choices=[ExactPosition(1988), ExactPosition(2001)]) jpayne@69: jpayne@69: >>> isinstance(p, OneOfPosition) jpayne@69: True jpayne@69: >>> isinstance(p, Position) jpayne@69: True jpayne@69: >>> isinstance(p, int) jpayne@69: True jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: def __new__(cls, position, choices): jpayne@69: """Initialize with a set of possible positions. jpayne@69: jpayne@69: choices is a list of Position derived objects, specifying possible jpayne@69: locations. jpayne@69: jpayne@69: position is an integer specifying the default behavior. jpayne@69: """ jpayne@69: if position not in choices: jpayne@69: raise ValueError( jpayne@69: f"OneOfPosition: {position!r} should match one of {choices!r}" jpayne@69: ) jpayne@69: obj = int.__new__(cls, position) jpayne@69: obj.position_choices = choices jpayne@69: return obj jpayne@69: jpayne@69: def __getnewargs__(self): jpayne@69: """Return the arguments accepted by __new__. jpayne@69: jpayne@69: Necessary to allow pickling and unpickling of class instances. jpayne@69: """ jpayne@69: return (int(self), self.position_choices) jpayne@69: jpayne@69: def __repr__(self): jpayne@69: """Represent the OneOfPosition object as a string for debugging.""" jpayne@69: return "%s(%i, choices=%r)" % ( jpayne@69: self.__class__.__name__, jpayne@69: int(self), jpayne@69: self.position_choices, jpayne@69: ) jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return a representation of the OneOfPosition object (with python counting).""" jpayne@69: out = "one-of(" jpayne@69: for position in self.position_choices: jpayne@69: out += f"{position}," jpayne@69: # replace the last comma with the closing parenthesis jpayne@69: return out[:-1] + ")" jpayne@69: jpayne@69: def __add__(self, offset): jpayne@69: """Return a copy of the position object with its location shifted (PRIVATE).""" jpayne@69: return self.__class__( jpayne@69: int(self) + offset, [p + offset for p in self.position_choices] jpayne@69: ) jpayne@69: jpayne@69: def _flip(self, length): jpayne@69: """Return a copy of the location after the parent is reversed (PRIVATE).""" jpayne@69: return self.__class__( jpayne@69: length - int(self), [p._flip(length) for p in self.position_choices[::-1]] jpayne@69: ) jpayne@69: jpayne@69: jpayne@69: if __name__ == "__main__": jpayne@69: from Bio._utils import run_doctest jpayne@69: jpayne@69: run_doctest()