jpayne@7: from functools import lru_cache jpayne@7: from logging import getLogger jpayne@7: from typing import List, Optional jpayne@7: jpayne@7: from .constant import ( jpayne@7: COMMON_SAFE_ASCII_CHARACTERS, jpayne@7: TRACE, jpayne@7: UNICODE_SECONDARY_RANGE_KEYWORD, jpayne@7: ) jpayne@7: from .utils import ( jpayne@7: is_accentuated, jpayne@7: is_arabic, jpayne@7: is_arabic_isolated_form, jpayne@7: is_case_variable, jpayne@7: is_cjk, jpayne@7: is_emoticon, jpayne@7: is_hangul, jpayne@7: is_hiragana, jpayne@7: is_katakana, jpayne@7: is_latin, jpayne@7: is_punctuation, jpayne@7: is_separator, jpayne@7: is_symbol, jpayne@7: is_thai, jpayne@7: is_unprintable, jpayne@7: remove_accent, jpayne@7: unicode_range, jpayne@7: ) jpayne@7: jpayne@7: jpayne@7: class MessDetectorPlugin: jpayne@7: """ jpayne@7: Base abstract class used for mess detection plugins. jpayne@7: All detectors MUST extend and implement given methods. jpayne@7: """ jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: """ jpayne@7: Determine if given character should be fed in. jpayne@7: """ jpayne@7: raise NotImplementedError # pragma: nocover jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: """ jpayne@7: The main routine to be executed upon character. jpayne@7: Insert the logic in witch the text would be considered chaotic. jpayne@7: """ jpayne@7: raise NotImplementedError # pragma: nocover jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: """ jpayne@7: Permit to reset the plugin to the initial state. jpayne@7: """ jpayne@7: raise NotImplementedError jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: """ jpayne@7: Compute the chaos ratio based on what your feed() has seen. jpayne@7: Must NOT be lower than 0.; No restriction gt 0. jpayne@7: """ jpayne@7: raise NotImplementedError # pragma: nocover jpayne@7: jpayne@7: jpayne@7: class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._punctuation_count: int = 0 jpayne@7: self._symbol_count: int = 0 jpayne@7: self._character_count: int = 0 jpayne@7: jpayne@7: self._last_printable_char: Optional[str] = None jpayne@7: self._frenzy_symbol_in_word: bool = False jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return character.isprintable() jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: self._character_count += 1 jpayne@7: jpayne@7: if ( jpayne@7: character != self._last_printable_char jpayne@7: and character not in COMMON_SAFE_ASCII_CHARACTERS jpayne@7: ): jpayne@7: if is_punctuation(character): jpayne@7: self._punctuation_count += 1 jpayne@7: elif ( jpayne@7: character.isdigit() is False jpayne@7: and is_symbol(character) jpayne@7: and is_emoticon(character) is False jpayne@7: ): jpayne@7: self._symbol_count += 2 jpayne@7: jpayne@7: self._last_printable_char = character jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._punctuation_count = 0 jpayne@7: self._character_count = 0 jpayne@7: self._symbol_count = 0 jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count == 0: jpayne@7: return 0.0 jpayne@7: jpayne@7: ratio_of_punctuation: float = ( jpayne@7: self._punctuation_count + self._symbol_count jpayne@7: ) / self._character_count jpayne@7: jpayne@7: return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0 jpayne@7: jpayne@7: jpayne@7: class TooManyAccentuatedPlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._character_count: int = 0 jpayne@7: self._accentuated_count: int = 0 jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return character.isalpha() jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: self._character_count += 1 jpayne@7: jpayne@7: if is_accentuated(character): jpayne@7: self._accentuated_count += 1 jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._character_count = 0 jpayne@7: self._accentuated_count = 0 jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count < 8: jpayne@7: return 0.0 jpayne@7: jpayne@7: ratio_of_accentuation: float = self._accentuated_count / self._character_count jpayne@7: return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0 jpayne@7: jpayne@7: jpayne@7: class UnprintablePlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._unprintable_count: int = 0 jpayne@7: self._character_count: int = 0 jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return True jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: if is_unprintable(character): jpayne@7: self._unprintable_count += 1 jpayne@7: self._character_count += 1 jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._unprintable_count = 0 jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count == 0: jpayne@7: return 0.0 jpayne@7: jpayne@7: return (self._unprintable_count * 8) / self._character_count jpayne@7: jpayne@7: jpayne@7: class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._successive_count: int = 0 jpayne@7: self._character_count: int = 0 jpayne@7: jpayne@7: self._last_latin_character: Optional[str] = None jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return character.isalpha() and is_latin(character) jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: self._character_count += 1 jpayne@7: if ( jpayne@7: self._last_latin_character is not None jpayne@7: and is_accentuated(character) jpayne@7: and is_accentuated(self._last_latin_character) jpayne@7: ): jpayne@7: if character.isupper() and self._last_latin_character.isupper(): jpayne@7: self._successive_count += 1 jpayne@7: # Worse if its the same char duplicated with different accent. jpayne@7: if remove_accent(character) == remove_accent(self._last_latin_character): jpayne@7: self._successive_count += 1 jpayne@7: self._last_latin_character = character jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._successive_count = 0 jpayne@7: self._character_count = 0 jpayne@7: self._last_latin_character = None jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count == 0: jpayne@7: return 0.0 jpayne@7: jpayne@7: return (self._successive_count * 2) / self._character_count jpayne@7: jpayne@7: jpayne@7: class SuspiciousRange(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._suspicious_successive_range_count: int = 0 jpayne@7: self._character_count: int = 0 jpayne@7: self._last_printable_seen: Optional[str] = None jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return character.isprintable() jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: self._character_count += 1 jpayne@7: jpayne@7: if ( jpayne@7: character.isspace() jpayne@7: or is_punctuation(character) jpayne@7: or character in COMMON_SAFE_ASCII_CHARACTERS jpayne@7: ): jpayne@7: self._last_printable_seen = None jpayne@7: return jpayne@7: jpayne@7: if self._last_printable_seen is None: jpayne@7: self._last_printable_seen = character jpayne@7: return jpayne@7: jpayne@7: unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen) jpayne@7: unicode_range_b: Optional[str] = unicode_range(character) jpayne@7: jpayne@7: if is_suspiciously_successive_range(unicode_range_a, unicode_range_b): jpayne@7: self._suspicious_successive_range_count += 1 jpayne@7: jpayne@7: self._last_printable_seen = character jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._character_count = 0 jpayne@7: self._suspicious_successive_range_count = 0 jpayne@7: self._last_printable_seen = None jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count <= 24: jpayne@7: return 0.0 jpayne@7: jpayne@7: ratio_of_suspicious_range_usage: float = ( jpayne@7: self._suspicious_successive_range_count * 2 jpayne@7: ) / self._character_count jpayne@7: jpayne@7: return ratio_of_suspicious_range_usage jpayne@7: jpayne@7: jpayne@7: class SuperWeirdWordPlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._word_count: int = 0 jpayne@7: self._bad_word_count: int = 0 jpayne@7: self._foreign_long_count: int = 0 jpayne@7: jpayne@7: self._is_current_word_bad: bool = False jpayne@7: self._foreign_long_watch: bool = False jpayne@7: jpayne@7: self._character_count: int = 0 jpayne@7: self._bad_character_count: int = 0 jpayne@7: jpayne@7: self._buffer: str = "" jpayne@7: self._buffer_accent_count: int = 0 jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return True jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: if character.isalpha(): jpayne@7: self._buffer += character jpayne@7: if is_accentuated(character): jpayne@7: self._buffer_accent_count += 1 jpayne@7: if ( jpayne@7: self._foreign_long_watch is False jpayne@7: and (is_latin(character) is False or is_accentuated(character)) jpayne@7: and is_cjk(character) is False jpayne@7: and is_hangul(character) is False jpayne@7: and is_katakana(character) is False jpayne@7: and is_hiragana(character) is False jpayne@7: and is_thai(character) is False jpayne@7: ): jpayne@7: self._foreign_long_watch = True jpayne@7: return jpayne@7: if not self._buffer: jpayne@7: return jpayne@7: if ( jpayne@7: character.isspace() or is_punctuation(character) or is_separator(character) jpayne@7: ) and self._buffer: jpayne@7: self._word_count += 1 jpayne@7: buffer_length: int = len(self._buffer) jpayne@7: jpayne@7: self._character_count += buffer_length jpayne@7: jpayne@7: if buffer_length >= 4: jpayne@7: if self._buffer_accent_count / buffer_length > 0.34: jpayne@7: self._is_current_word_bad = True jpayne@7: # Word/Buffer ending with an upper case accentuated letter are so rare, jpayne@7: # that we will consider them all as suspicious. Same weight as foreign_long suspicious. jpayne@7: if ( jpayne@7: is_accentuated(self._buffer[-1]) jpayne@7: and self._buffer[-1].isupper() jpayne@7: and all(_.isupper() for _ in self._buffer) is False jpayne@7: ): jpayne@7: self._foreign_long_count += 1 jpayne@7: self._is_current_word_bad = True jpayne@7: if buffer_length >= 24 and self._foreign_long_watch: jpayne@7: camel_case_dst = [ jpayne@7: i jpayne@7: for c, i in zip(self._buffer, range(0, buffer_length)) jpayne@7: if c.isupper() jpayne@7: ] jpayne@7: probable_camel_cased: bool = False jpayne@7: jpayne@7: if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3): jpayne@7: probable_camel_cased = True jpayne@7: jpayne@7: if not probable_camel_cased: jpayne@7: self._foreign_long_count += 1 jpayne@7: self._is_current_word_bad = True jpayne@7: jpayne@7: if self._is_current_word_bad: jpayne@7: self._bad_word_count += 1 jpayne@7: self._bad_character_count += len(self._buffer) jpayne@7: self._is_current_word_bad = False jpayne@7: jpayne@7: self._foreign_long_watch = False jpayne@7: self._buffer = "" jpayne@7: self._buffer_accent_count = 0 jpayne@7: elif ( jpayne@7: character not in {"<", ">", "-", "=", "~", "|", "_"} jpayne@7: and character.isdigit() is False jpayne@7: and is_symbol(character) jpayne@7: ): jpayne@7: self._is_current_word_bad = True jpayne@7: self._buffer += character jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._buffer = "" jpayne@7: self._is_current_word_bad = False jpayne@7: self._foreign_long_watch = False jpayne@7: self._bad_word_count = 0 jpayne@7: self._word_count = 0 jpayne@7: self._character_count = 0 jpayne@7: self._bad_character_count = 0 jpayne@7: self._foreign_long_count = 0 jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._word_count <= 10 and self._foreign_long_count == 0: jpayne@7: return 0.0 jpayne@7: jpayne@7: return self._bad_character_count / self._character_count jpayne@7: jpayne@7: jpayne@7: class CjkInvalidStopPlugin(MessDetectorPlugin): jpayne@7: """ jpayne@7: GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and jpayne@7: can be easily detected. Searching for the overuse of '丅' and '丄'. jpayne@7: """ jpayne@7: jpayne@7: def __init__(self) -> None: jpayne@7: self._wrong_stop_count: int = 0 jpayne@7: self._cjk_character_count: int = 0 jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return True jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: if character in {"丅", "丄"}: jpayne@7: self._wrong_stop_count += 1 jpayne@7: return jpayne@7: if is_cjk(character): jpayne@7: self._cjk_character_count += 1 jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._wrong_stop_count = 0 jpayne@7: self._cjk_character_count = 0 jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._cjk_character_count < 16: jpayne@7: return 0.0 jpayne@7: return self._wrong_stop_count / self._cjk_character_count jpayne@7: jpayne@7: jpayne@7: class ArchaicUpperLowerPlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._buf: bool = False jpayne@7: jpayne@7: self._character_count_since_last_sep: int = 0 jpayne@7: jpayne@7: self._successive_upper_lower_count: int = 0 jpayne@7: self._successive_upper_lower_count_final: int = 0 jpayne@7: jpayne@7: self._character_count: int = 0 jpayne@7: jpayne@7: self._last_alpha_seen: Optional[str] = None jpayne@7: self._current_ascii_only: bool = True jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return True jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: is_concerned = character.isalpha() and is_case_variable(character) jpayne@7: chunk_sep = is_concerned is False jpayne@7: jpayne@7: if chunk_sep and self._character_count_since_last_sep > 0: jpayne@7: if ( jpayne@7: self._character_count_since_last_sep <= 64 jpayne@7: and character.isdigit() is False jpayne@7: and self._current_ascii_only is False jpayne@7: ): jpayne@7: self._successive_upper_lower_count_final += ( jpayne@7: self._successive_upper_lower_count jpayne@7: ) jpayne@7: jpayne@7: self._successive_upper_lower_count = 0 jpayne@7: self._character_count_since_last_sep = 0 jpayne@7: self._last_alpha_seen = None jpayne@7: self._buf = False jpayne@7: self._character_count += 1 jpayne@7: self._current_ascii_only = True jpayne@7: jpayne@7: return jpayne@7: jpayne@7: if self._current_ascii_only is True and character.isascii() is False: jpayne@7: self._current_ascii_only = False jpayne@7: jpayne@7: if self._last_alpha_seen is not None: jpayne@7: if (character.isupper() and self._last_alpha_seen.islower()) or ( jpayne@7: character.islower() and self._last_alpha_seen.isupper() jpayne@7: ): jpayne@7: if self._buf is True: jpayne@7: self._successive_upper_lower_count += 2 jpayne@7: self._buf = False jpayne@7: else: jpayne@7: self._buf = True jpayne@7: else: jpayne@7: self._buf = False jpayne@7: jpayne@7: self._character_count += 1 jpayne@7: self._character_count_since_last_sep += 1 jpayne@7: self._last_alpha_seen = character jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._character_count = 0 jpayne@7: self._character_count_since_last_sep = 0 jpayne@7: self._successive_upper_lower_count = 0 jpayne@7: self._successive_upper_lower_count_final = 0 jpayne@7: self._last_alpha_seen = None jpayne@7: self._buf = False jpayne@7: self._current_ascii_only = True jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count == 0: jpayne@7: return 0.0 jpayne@7: jpayne@7: return self._successive_upper_lower_count_final / self._character_count jpayne@7: jpayne@7: jpayne@7: class ArabicIsolatedFormPlugin(MessDetectorPlugin): jpayne@7: def __init__(self) -> None: jpayne@7: self._character_count: int = 0 jpayne@7: self._isolated_form_count: int = 0 jpayne@7: jpayne@7: def reset(self) -> None: # pragma: no cover jpayne@7: self._character_count = 0 jpayne@7: self._isolated_form_count = 0 jpayne@7: jpayne@7: def eligible(self, character: str) -> bool: jpayne@7: return is_arabic(character) jpayne@7: jpayne@7: def feed(self, character: str) -> None: jpayne@7: self._character_count += 1 jpayne@7: jpayne@7: if is_arabic_isolated_form(character): jpayne@7: self._isolated_form_count += 1 jpayne@7: jpayne@7: @property jpayne@7: def ratio(self) -> float: jpayne@7: if self._character_count < 8: jpayne@7: return 0.0 jpayne@7: jpayne@7: isolated_form_usage: float = self._isolated_form_count / self._character_count jpayne@7: jpayne@7: return isolated_form_usage jpayne@7: jpayne@7: jpayne@7: @lru_cache(maxsize=1024) jpayne@7: def is_suspiciously_successive_range( jpayne@7: unicode_range_a: Optional[str], unicode_range_b: Optional[str] jpayne@7: ) -> bool: jpayne@7: """ jpayne@7: Determine if two Unicode range seen next to each other can be considered as suspicious. jpayne@7: """ jpayne@7: if unicode_range_a is None or unicode_range_b is None: jpayne@7: return True jpayne@7: jpayne@7: if unicode_range_a == unicode_range_b: jpayne@7: return False jpayne@7: jpayne@7: if "Latin" in unicode_range_a and "Latin" in unicode_range_b: jpayne@7: return False jpayne@7: jpayne@7: if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b: jpayne@7: return False jpayne@7: jpayne@7: # Latin characters can be accompanied with a combining diacritical mark jpayne@7: # eg. Vietnamese. jpayne@7: if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and ( jpayne@7: "Combining" in unicode_range_a or "Combining" in unicode_range_b jpayne@7: ): jpayne@7: return False jpayne@7: jpayne@7: keywords_range_a, keywords_range_b = unicode_range_a.split( jpayne@7: " " jpayne@7: ), unicode_range_b.split(" ") jpayne@7: jpayne@7: for el in keywords_range_a: jpayne@7: if el in UNICODE_SECONDARY_RANGE_KEYWORD: jpayne@7: continue jpayne@7: if el in keywords_range_b: jpayne@7: return False jpayne@7: jpayne@7: # Japanese Exception jpayne@7: range_a_jp_chars, range_b_jp_chars = ( jpayne@7: unicode_range_a jpayne@7: in ( jpayne@7: "Hiragana", jpayne@7: "Katakana", jpayne@7: ), jpayne@7: unicode_range_b in ("Hiragana", "Katakana"), jpayne@7: ) jpayne@7: if (range_a_jp_chars or range_b_jp_chars) and ( jpayne@7: "CJK" in unicode_range_a or "CJK" in unicode_range_b jpayne@7: ): jpayne@7: return False jpayne@7: if range_a_jp_chars and range_b_jp_chars: jpayne@7: return False jpayne@7: jpayne@7: if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b: jpayne@7: if "CJK" in unicode_range_a or "CJK" in unicode_range_b: jpayne@7: return False jpayne@7: if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": jpayne@7: return False jpayne@7: jpayne@7: # Chinese/Japanese use dedicated range for punctuation and/or separators. jpayne@7: if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or ( jpayne@7: unicode_range_a in ["Katakana", "Hiragana"] jpayne@7: and unicode_range_b in ["Katakana", "Hiragana"] jpayne@7: ): jpayne@7: if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b: jpayne@7: return False jpayne@7: if "Forms" in unicode_range_a or "Forms" in unicode_range_b: jpayne@7: return False jpayne@7: if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": jpayne@7: return False jpayne@7: jpayne@7: return True jpayne@7: jpayne@7: jpayne@7: @lru_cache(maxsize=2048) jpayne@7: def mess_ratio( jpayne@7: decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False jpayne@7: ) -> float: jpayne@7: """ jpayne@7: Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. jpayne@7: """ jpayne@7: jpayne@7: detectors: List[MessDetectorPlugin] = [ jpayne@7: md_class() for md_class in MessDetectorPlugin.__subclasses__() jpayne@7: ] jpayne@7: jpayne@7: length: int = len(decoded_sequence) + 1 jpayne@7: jpayne@7: mean_mess_ratio: float = 0.0 jpayne@7: jpayne@7: if length < 512: jpayne@7: intermediary_mean_mess_ratio_calc: int = 32 jpayne@7: elif length <= 1024: jpayne@7: intermediary_mean_mess_ratio_calc = 64 jpayne@7: else: jpayne@7: intermediary_mean_mess_ratio_calc = 128 jpayne@7: jpayne@7: for character, index in zip(decoded_sequence + "\n", range(length)): jpayne@7: for detector in detectors: jpayne@7: if detector.eligible(character): jpayne@7: detector.feed(character) jpayne@7: jpayne@7: if ( jpayne@7: index > 0 and index % intermediary_mean_mess_ratio_calc == 0 jpayne@7: ) or index == length - 1: jpayne@7: mean_mess_ratio = sum(dt.ratio for dt in detectors) jpayne@7: jpayne@7: if mean_mess_ratio >= maximum_threshold: jpayne@7: break jpayne@7: jpayne@7: if debug: jpayne@7: logger = getLogger("charset_normalizer") jpayne@7: jpayne@7: logger.log( jpayne@7: TRACE, jpayne@7: "Mess-detector extended-analysis start. " jpayne@7: f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} " jpayne@7: f"maximum_threshold={maximum_threshold}", jpayne@7: ) jpayne@7: jpayne@7: if len(decoded_sequence) > 16: jpayne@7: logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}") jpayne@7: logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}") jpayne@7: jpayne@7: for dt in detectors: # pragma: nocover jpayne@7: logger.log(TRACE, f"{dt.__class__}: {dt.ratio}") jpayne@7: jpayne@7: return round(mean_mess_ratio, 3)