jpayne@7: import logging jpayne@7: from os import PathLike jpayne@7: from typing import BinaryIO, List, Optional, Set, Union jpayne@7: jpayne@7: from .cd import ( jpayne@7: coherence_ratio, jpayne@7: encoding_languages, jpayne@7: mb_encoding_languages, jpayne@7: merge_coherence_ratios, jpayne@7: ) jpayne@7: from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE jpayne@7: from .md import mess_ratio jpayne@7: from .models import CharsetMatch, CharsetMatches jpayne@7: from .utils import ( jpayne@7: any_specified_encoding, jpayne@7: cut_sequence_chunks, jpayne@7: iana_name, jpayne@7: identify_sig_or_bom, jpayne@7: is_cp_similar, jpayne@7: is_multi_byte_encoding, jpayne@7: should_strip_sig_or_bom, jpayne@7: ) jpayne@7: jpayne@7: # Will most likely be controversial jpayne@7: # logging.addLevelName(TRACE, "TRACE") jpayne@7: logger = logging.getLogger("charset_normalizer") jpayne@7: explain_handler = logging.StreamHandler() jpayne@7: explain_handler.setFormatter( jpayne@7: logging.Formatter("%(asctime)s | %(levelname)s | %(message)s") jpayne@7: ) jpayne@7: jpayne@7: jpayne@7: def from_bytes( jpayne@7: sequences: Union[bytes, bytearray], jpayne@7: steps: int = 5, jpayne@7: chunk_size: int = 512, jpayne@7: threshold: float = 0.2, jpayne@7: cp_isolation: Optional[List[str]] = None, jpayne@7: cp_exclusion: Optional[List[str]] = None, jpayne@7: preemptive_behaviour: bool = True, jpayne@7: explain: bool = False, jpayne@7: language_threshold: float = 0.1, jpayne@7: enable_fallback: bool = True, jpayne@7: ) -> CharsetMatches: jpayne@7: """ jpayne@7: Given a raw bytes sequence, return the best possibles charset usable to render str objects. jpayne@7: If there is no results, it is a strong indicator that the source is binary/not text. jpayne@7: By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence. jpayne@7: And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will. jpayne@7: jpayne@7: The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page jpayne@7: but never take it for granted. Can improve the performance. jpayne@7: jpayne@7: You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that jpayne@7: purpose. jpayne@7: jpayne@7: This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32. jpayne@7: By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain' jpayne@7: toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging. jpayne@7: Custom logging format and handler can be set manually. jpayne@7: """ jpayne@7: jpayne@7: if not isinstance(sequences, (bytearray, bytes)): jpayne@7: raise TypeError( jpayne@7: "Expected object of type bytes or bytearray, got: {0}".format( jpayne@7: type(sequences) jpayne@7: ) jpayne@7: ) jpayne@7: jpayne@7: if explain: jpayne@7: previous_logger_level: int = logger.level jpayne@7: logger.addHandler(explain_handler) jpayne@7: logger.setLevel(TRACE) jpayne@7: jpayne@7: length: int = len(sequences) jpayne@7: jpayne@7: if length == 0: jpayne@7: logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.") jpayne@7: if explain: jpayne@7: logger.removeHandler(explain_handler) jpayne@7: logger.setLevel(previous_logger_level or logging.WARNING) jpayne@7: return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")]) jpayne@7: jpayne@7: if cp_isolation is not None: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "cp_isolation is set. use this flag for debugging purpose. " jpayne@7: "limited list of encoding allowed : %s.", jpayne@7: ", ".join(cp_isolation), jpayne@7: ) jpayne@7: cp_isolation = [iana_name(cp, False) for cp in cp_isolation] jpayne@7: else: jpayne@7: cp_isolation = [] jpayne@7: jpayne@7: if cp_exclusion is not None: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "cp_exclusion is set. use this flag for debugging purpose. " jpayne@7: "limited list of encoding excluded : %s.", jpayne@7: ", ".join(cp_exclusion), jpayne@7: ) jpayne@7: cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion] jpayne@7: else: jpayne@7: cp_exclusion = [] jpayne@7: jpayne@7: if length <= (chunk_size * steps): jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.", jpayne@7: steps, jpayne@7: chunk_size, jpayne@7: length, jpayne@7: ) jpayne@7: steps = 1 jpayne@7: chunk_size = length jpayne@7: jpayne@7: if steps > 1 and length / steps < chunk_size: jpayne@7: chunk_size = int(length / steps) jpayne@7: jpayne@7: is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE jpayne@7: is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE jpayne@7: jpayne@7: if is_too_small_sequence: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Trying to detect encoding from a tiny portion of ({}) byte(s).".format( jpayne@7: length jpayne@7: ), jpayne@7: ) jpayne@7: elif is_too_large_sequence: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Using lazy str decoding because the payload is quite large, ({}) byte(s).".format( jpayne@7: length jpayne@7: ), jpayne@7: ) jpayne@7: jpayne@7: prioritized_encodings: List[str] = [] jpayne@7: jpayne@7: specified_encoding: Optional[str] = ( jpayne@7: any_specified_encoding(sequences) if preemptive_behaviour else None jpayne@7: ) jpayne@7: jpayne@7: if specified_encoding is not None: jpayne@7: prioritized_encodings.append(specified_encoding) jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Detected declarative mark in sequence. Priority +1 given for %s.", jpayne@7: specified_encoding, jpayne@7: ) jpayne@7: jpayne@7: tested: Set[str] = set() jpayne@7: tested_but_hard_failure: List[str] = [] jpayne@7: tested_but_soft_failure: List[str] = [] jpayne@7: jpayne@7: fallback_ascii: Optional[CharsetMatch] = None jpayne@7: fallback_u8: Optional[CharsetMatch] = None jpayne@7: fallback_specified: Optional[CharsetMatch] = None jpayne@7: jpayne@7: results: CharsetMatches = CharsetMatches() jpayne@7: jpayne@7: sig_encoding, sig_payload = identify_sig_or_bom(sequences) jpayne@7: jpayne@7: if sig_encoding is not None: jpayne@7: prioritized_encodings.append(sig_encoding) jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.", jpayne@7: len(sig_payload), jpayne@7: sig_encoding, jpayne@7: ) jpayne@7: jpayne@7: prioritized_encodings.append("ascii") jpayne@7: jpayne@7: if "utf_8" not in prioritized_encodings: jpayne@7: prioritized_encodings.append("utf_8") jpayne@7: jpayne@7: for encoding_iana in prioritized_encodings + IANA_SUPPORTED: jpayne@7: if cp_isolation and encoding_iana not in cp_isolation: jpayne@7: continue jpayne@7: jpayne@7: if cp_exclusion and encoding_iana in cp_exclusion: jpayne@7: continue jpayne@7: jpayne@7: if encoding_iana in tested: jpayne@7: continue jpayne@7: jpayne@7: tested.add(encoding_iana) jpayne@7: jpayne@7: decoded_payload: Optional[str] = None jpayne@7: bom_or_sig_available: bool = sig_encoding == encoding_iana jpayne@7: strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom( jpayne@7: encoding_iana jpayne@7: ) jpayne@7: jpayne@7: if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.", jpayne@7: encoding_iana, jpayne@7: ) jpayne@7: continue jpayne@7: if encoding_iana in {"utf_7"} and not bom_or_sig_available: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.", jpayne@7: encoding_iana, jpayne@7: ) jpayne@7: continue jpayne@7: jpayne@7: try: jpayne@7: is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana) jpayne@7: except (ModuleNotFoundError, ImportError): jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Encoding %s does not provide an IncrementalDecoder", jpayne@7: encoding_iana, jpayne@7: ) jpayne@7: continue jpayne@7: jpayne@7: try: jpayne@7: if is_too_large_sequence and is_multi_byte_decoder is False: jpayne@7: str( jpayne@7: sequences[: int(50e4)] jpayne@7: if strip_sig_or_bom is False jpayne@7: else sequences[len(sig_payload) : int(50e4)], jpayne@7: encoding=encoding_iana, jpayne@7: ) jpayne@7: else: jpayne@7: decoded_payload = str( jpayne@7: sequences jpayne@7: if strip_sig_or_bom is False jpayne@7: else sequences[len(sig_payload) :], jpayne@7: encoding=encoding_iana, jpayne@7: ) jpayne@7: except (UnicodeDecodeError, LookupError) as e: jpayne@7: if not isinstance(e, LookupError): jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Code page %s does not fit given bytes sequence at ALL. %s", jpayne@7: encoding_iana, jpayne@7: str(e), jpayne@7: ) jpayne@7: tested_but_hard_failure.append(encoding_iana) jpayne@7: continue jpayne@7: jpayne@7: similar_soft_failure_test: bool = False jpayne@7: jpayne@7: for encoding_soft_failed in tested_but_soft_failure: jpayne@7: if is_cp_similar(encoding_iana, encoding_soft_failed): jpayne@7: similar_soft_failure_test = True jpayne@7: break jpayne@7: jpayne@7: if similar_soft_failure_test: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!", jpayne@7: encoding_iana, jpayne@7: encoding_soft_failed, jpayne@7: ) jpayne@7: continue jpayne@7: jpayne@7: r_ = range( jpayne@7: 0 if not bom_or_sig_available else len(sig_payload), jpayne@7: length, jpayne@7: int(length / steps), jpayne@7: ) jpayne@7: jpayne@7: multi_byte_bonus: bool = ( jpayne@7: is_multi_byte_decoder jpayne@7: and decoded_payload is not None jpayne@7: and len(decoded_payload) < length jpayne@7: ) jpayne@7: jpayne@7: if multi_byte_bonus: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Code page %s is a multi byte encoding table and it appear that at least one character " jpayne@7: "was encoded using n-bytes.", jpayne@7: encoding_iana, jpayne@7: ) jpayne@7: jpayne@7: max_chunk_gave_up: int = int(len(r_) / 4) jpayne@7: jpayne@7: max_chunk_gave_up = max(max_chunk_gave_up, 2) jpayne@7: early_stop_count: int = 0 jpayne@7: lazy_str_hard_failure = False jpayne@7: jpayne@7: md_chunks: List[str] = [] jpayne@7: md_ratios = [] jpayne@7: jpayne@7: try: jpayne@7: for chunk in cut_sequence_chunks( jpayne@7: sequences, jpayne@7: encoding_iana, jpayne@7: r_, jpayne@7: chunk_size, jpayne@7: bom_or_sig_available, jpayne@7: strip_sig_or_bom, jpayne@7: sig_payload, jpayne@7: is_multi_byte_decoder, jpayne@7: decoded_payload, jpayne@7: ): jpayne@7: md_chunks.append(chunk) jpayne@7: jpayne@7: md_ratios.append( jpayne@7: mess_ratio( jpayne@7: chunk, jpayne@7: threshold, jpayne@7: explain is True and 1 <= len(cp_isolation) <= 2, jpayne@7: ) jpayne@7: ) jpayne@7: jpayne@7: if md_ratios[-1] >= threshold: jpayne@7: early_stop_count += 1 jpayne@7: jpayne@7: if (early_stop_count >= max_chunk_gave_up) or ( jpayne@7: bom_or_sig_available and strip_sig_or_bom is False jpayne@7: ): jpayne@7: break jpayne@7: except ( jpayne@7: UnicodeDecodeError jpayne@7: ) as e: # Lazy str loading may have missed something there jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", jpayne@7: encoding_iana, jpayne@7: str(e), jpayne@7: ) jpayne@7: early_stop_count = max_chunk_gave_up jpayne@7: lazy_str_hard_failure = True jpayne@7: jpayne@7: # We might want to check the sequence again with the whole content jpayne@7: # Only if initial MD tests passes jpayne@7: if ( jpayne@7: not lazy_str_hard_failure jpayne@7: and is_too_large_sequence jpayne@7: and not is_multi_byte_decoder jpayne@7: ): jpayne@7: try: jpayne@7: sequences[int(50e3) :].decode(encoding_iana, errors="strict") jpayne@7: except UnicodeDecodeError as e: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s", jpayne@7: encoding_iana, jpayne@7: str(e), jpayne@7: ) jpayne@7: tested_but_hard_failure.append(encoding_iana) jpayne@7: continue jpayne@7: jpayne@7: mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0 jpayne@7: if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up: jpayne@7: tested_but_soft_failure.append(encoding_iana) jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "%s was excluded because of initial chaos probing. Gave up %i time(s). " jpayne@7: "Computed mean chaos is %f %%.", jpayne@7: encoding_iana, jpayne@7: early_stop_count, jpayne@7: round(mean_mess_ratio * 100, ndigits=3), jpayne@7: ) jpayne@7: # Preparing those fallbacks in case we got nothing. jpayne@7: if ( jpayne@7: enable_fallback jpayne@7: and encoding_iana in ["ascii", "utf_8", specified_encoding] jpayne@7: and not lazy_str_hard_failure jpayne@7: ): jpayne@7: fallback_entry = CharsetMatch( jpayne@7: sequences, encoding_iana, threshold, False, [], decoded_payload jpayne@7: ) jpayne@7: if encoding_iana == specified_encoding: jpayne@7: fallback_specified = fallback_entry jpayne@7: elif encoding_iana == "ascii": jpayne@7: fallback_ascii = fallback_entry jpayne@7: else: jpayne@7: fallback_u8 = fallback_entry jpayne@7: continue jpayne@7: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "%s passed initial chaos probing. Mean measured chaos is %f %%", jpayne@7: encoding_iana, jpayne@7: round(mean_mess_ratio * 100, ndigits=3), jpayne@7: ) jpayne@7: jpayne@7: if not is_multi_byte_decoder: jpayne@7: target_languages: List[str] = encoding_languages(encoding_iana) jpayne@7: else: jpayne@7: target_languages = mb_encoding_languages(encoding_iana) jpayne@7: jpayne@7: if target_languages: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "{} should target any language(s) of {}".format( jpayne@7: encoding_iana, str(target_languages) jpayne@7: ), jpayne@7: ) jpayne@7: jpayne@7: cd_ratios = [] jpayne@7: jpayne@7: # We shall skip the CD when its about ASCII jpayne@7: # Most of the time its not relevant to run "language-detection" on it. jpayne@7: if encoding_iana != "ascii": jpayne@7: for chunk in md_chunks: jpayne@7: chunk_languages = coherence_ratio( jpayne@7: chunk, jpayne@7: language_threshold, jpayne@7: ",".join(target_languages) if target_languages else None, jpayne@7: ) jpayne@7: jpayne@7: cd_ratios.append(chunk_languages) jpayne@7: jpayne@7: cd_ratios_merged = merge_coherence_ratios(cd_ratios) jpayne@7: jpayne@7: if cd_ratios_merged: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "We detected language {} using {}".format( jpayne@7: cd_ratios_merged, encoding_iana jpayne@7: ), jpayne@7: ) jpayne@7: jpayne@7: results.append( jpayne@7: CharsetMatch( jpayne@7: sequences, jpayne@7: encoding_iana, jpayne@7: mean_mess_ratio, jpayne@7: bom_or_sig_available, jpayne@7: cd_ratios_merged, jpayne@7: decoded_payload, jpayne@7: ) jpayne@7: ) jpayne@7: jpayne@7: if ( jpayne@7: encoding_iana in [specified_encoding, "ascii", "utf_8"] jpayne@7: and mean_mess_ratio < 0.1 jpayne@7: ): jpayne@7: logger.debug( jpayne@7: "Encoding detection: %s is most likely the one.", encoding_iana jpayne@7: ) jpayne@7: if explain: jpayne@7: logger.removeHandler(explain_handler) jpayne@7: logger.setLevel(previous_logger_level) jpayne@7: return CharsetMatches([results[encoding_iana]]) jpayne@7: jpayne@7: if encoding_iana == sig_encoding: jpayne@7: logger.debug( jpayne@7: "Encoding detection: %s is most likely the one as we detected a BOM or SIG within " jpayne@7: "the beginning of the sequence.", jpayne@7: encoding_iana, jpayne@7: ) jpayne@7: if explain: jpayne@7: logger.removeHandler(explain_handler) jpayne@7: logger.setLevel(previous_logger_level) jpayne@7: return CharsetMatches([results[encoding_iana]]) jpayne@7: jpayne@7: if len(results) == 0: jpayne@7: if fallback_u8 or fallback_ascii or fallback_specified: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.", jpayne@7: ) jpayne@7: jpayne@7: if fallback_specified: jpayne@7: logger.debug( jpayne@7: "Encoding detection: %s will be used as a fallback match", jpayne@7: fallback_specified.encoding, jpayne@7: ) jpayne@7: results.append(fallback_specified) jpayne@7: elif ( jpayne@7: (fallback_u8 and fallback_ascii is None) jpayne@7: or ( jpayne@7: fallback_u8 jpayne@7: and fallback_ascii jpayne@7: and fallback_u8.fingerprint != fallback_ascii.fingerprint jpayne@7: ) jpayne@7: or (fallback_u8 is not None) jpayne@7: ): jpayne@7: logger.debug("Encoding detection: utf_8 will be used as a fallback match") jpayne@7: results.append(fallback_u8) jpayne@7: elif fallback_ascii: jpayne@7: logger.debug("Encoding detection: ascii will be used as a fallback match") jpayne@7: results.append(fallback_ascii) jpayne@7: jpayne@7: if results: jpayne@7: logger.debug( jpayne@7: "Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.", jpayne@7: results.best().encoding, # type: ignore jpayne@7: len(results) - 1, jpayne@7: ) jpayne@7: else: jpayne@7: logger.debug("Encoding detection: Unable to determine any suitable charset.") jpayne@7: jpayne@7: if explain: jpayne@7: logger.removeHandler(explain_handler) jpayne@7: logger.setLevel(previous_logger_level) jpayne@7: jpayne@7: return results jpayne@7: jpayne@7: jpayne@7: def from_fp( jpayne@7: fp: BinaryIO, jpayne@7: steps: int = 5, jpayne@7: chunk_size: int = 512, jpayne@7: threshold: float = 0.20, jpayne@7: cp_isolation: Optional[List[str]] = None, jpayne@7: cp_exclusion: Optional[List[str]] = None, jpayne@7: preemptive_behaviour: bool = True, jpayne@7: explain: bool = False, jpayne@7: language_threshold: float = 0.1, jpayne@7: enable_fallback: bool = True, jpayne@7: ) -> CharsetMatches: jpayne@7: """ jpayne@7: Same thing than the function from_bytes but using a file pointer that is already ready. jpayne@7: Will not close the file pointer. jpayne@7: """ jpayne@7: return from_bytes( jpayne@7: fp.read(), jpayne@7: steps, jpayne@7: chunk_size, jpayne@7: threshold, jpayne@7: cp_isolation, jpayne@7: cp_exclusion, jpayne@7: preemptive_behaviour, jpayne@7: explain, jpayne@7: language_threshold, jpayne@7: enable_fallback, jpayne@7: ) jpayne@7: jpayne@7: jpayne@7: def from_path( jpayne@7: path: Union[str, bytes, PathLike], # type: ignore[type-arg] jpayne@7: steps: int = 5, jpayne@7: chunk_size: int = 512, jpayne@7: threshold: float = 0.20, jpayne@7: cp_isolation: Optional[List[str]] = None, jpayne@7: cp_exclusion: Optional[List[str]] = None, jpayne@7: preemptive_behaviour: bool = True, jpayne@7: explain: bool = False, jpayne@7: language_threshold: float = 0.1, jpayne@7: enable_fallback: bool = True, jpayne@7: ) -> CharsetMatches: jpayne@7: """ jpayne@7: Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode. jpayne@7: Can raise IOError. jpayne@7: """ jpayne@7: with open(path, "rb") as fp: jpayne@7: return from_fp( jpayne@7: fp, jpayne@7: steps, jpayne@7: chunk_size, jpayne@7: threshold, jpayne@7: cp_isolation, jpayne@7: cp_exclusion, jpayne@7: preemptive_behaviour, jpayne@7: explain, jpayne@7: language_threshold, jpayne@7: enable_fallback, jpayne@7: ) jpayne@7: jpayne@7: jpayne@7: def is_binary( jpayne@7: fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg] jpayne@7: steps: int = 5, jpayne@7: chunk_size: int = 512, jpayne@7: threshold: float = 0.20, jpayne@7: cp_isolation: Optional[List[str]] = None, jpayne@7: cp_exclusion: Optional[List[str]] = None, jpayne@7: preemptive_behaviour: bool = True, jpayne@7: explain: bool = False, jpayne@7: language_threshold: float = 0.1, jpayne@7: enable_fallback: bool = False, jpayne@7: ) -> bool: jpayne@7: """ jpayne@7: Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string. jpayne@7: Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match jpayne@7: are disabled to be stricter around ASCII-compatible but unlikely to be a string. jpayne@7: """ jpayne@7: if isinstance(fp_or_path_or_payload, (str, PathLike)): jpayne@7: guesses = from_path( jpayne@7: fp_or_path_or_payload, jpayne@7: steps=steps, jpayne@7: chunk_size=chunk_size, jpayne@7: threshold=threshold, jpayne@7: cp_isolation=cp_isolation, jpayne@7: cp_exclusion=cp_exclusion, jpayne@7: preemptive_behaviour=preemptive_behaviour, jpayne@7: explain=explain, jpayne@7: language_threshold=language_threshold, jpayne@7: enable_fallback=enable_fallback, jpayne@7: ) jpayne@7: elif isinstance( jpayne@7: fp_or_path_or_payload, jpayne@7: ( jpayne@7: bytes, jpayne@7: bytearray, jpayne@7: ), jpayne@7: ): jpayne@7: guesses = from_bytes( jpayne@7: fp_or_path_or_payload, jpayne@7: steps=steps, jpayne@7: chunk_size=chunk_size, jpayne@7: threshold=threshold, jpayne@7: cp_isolation=cp_isolation, jpayne@7: cp_exclusion=cp_exclusion, jpayne@7: preemptive_behaviour=preemptive_behaviour, jpayne@7: explain=explain, jpayne@7: language_threshold=language_threshold, jpayne@7: enable_fallback=enable_fallback, jpayne@7: ) jpayne@7: else: jpayne@7: guesses = from_fp( jpayne@7: fp_or_path_or_payload, jpayne@7: steps=steps, jpayne@7: chunk_size=chunk_size, jpayne@7: threshold=threshold, jpayne@7: cp_isolation=cp_isolation, jpayne@7: cp_exclusion=cp_exclusion, jpayne@7: preemptive_behaviour=preemptive_behaviour, jpayne@7: explain=explain, jpayne@7: language_threshold=language_threshold, jpayne@7: enable_fallback=enable_fallback, jpayne@7: ) jpayne@7: jpayne@7: return not guesses