bioproject_to_srr_2: charset_normalizer/legacy.py comparison

comparison charset_normalizer/legacy.py @ 7:5eb2d5e3bf22

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538

author	jpayne
date	Sun, 05 May 2024 23:32:17 -0400
parents
children

comparison

equal deleted inserted replaced

-:b2745907b1eb
+:5eb2d5e3bf22
+from typing import Any, Dict, Optional, Union
+from warnings import warn
+from .api import from_bytes
+from .constant import CHARDET_CORRESPONDENCE
+def detect(
+byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
+) -> Dict[str, Optional[Union[str, float]]]:
+"""
+chardet legacy method
+Detect the encoding of the given byte string. It should be mostly backward-compatible.
+Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
+This function is deprecated and should be used to migrate your project easily, consult the documentation for
+further information. Not planned for removal.
+:param byte_str:     The byte sequence to examine.
+:param should_rename_legacy:  Should we rename legacy encodings
+to their more modern equivalents?
+"""
+if len(kwargs):
+warn(
+f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
+)
+if not isinstance(byte_str, (bytearray, bytes)):
+raise TypeError(  # pragma: nocover
+"Expected object of type bytes or bytearray, got: "
+"{0}".format(type(byte_str))
+)
+if isinstance(byte_str, bytearray):
+byte_str = bytes(byte_str)
+r = from_bytes(byte_str).best()
+encoding = r.encoding if r is not None else None
+language = r.language if r is not None and r.language != "Unknown" else ""
+confidence = 1.0 - r.chaos if r is not None else None
+# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
+# but chardet does return 'utf-8-sig' and it is a valid codec name.
+if r is not None and encoding == "utf_8" and r.bom:
+encoding += "_sig"
+if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
+encoding = CHARDET_CORRESPONDENCE[encoding]
+return {
+"encoding": encoding,
+"language": language,
+"confidence": confidence,
+}

Mercurial > repos > jpayne > bioproject_to_srr_2

comparison charset_normalizer/legacy.py @ 7:5eb2d5e3bf22