annotate charset_normalizer/constant.py @ 15:0a3943480712

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Tue, 21 May 2024 01:05:30 -0400
parents 5eb2d5e3bf22
children
rev   line source
jpayne@7 1 # -*- coding: utf-8 -*-
jpayne@7 2 from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
jpayne@7 3 from encodings.aliases import aliases
jpayne@7 4 from re import IGNORECASE, compile as re_compile
jpayne@7 5 from typing import Dict, List, Set, Union
jpayne@7 6
jpayne@7 7 # Contain for each eligible encoding a list of/item bytes SIG/BOM
jpayne@7 8 ENCODING_MARKS: Dict[str, Union[bytes, List[bytes]]] = {
jpayne@7 9 "utf_8": BOM_UTF8,
jpayne@7 10 "utf_7": [
jpayne@7 11 b"\x2b\x2f\x76\x38",
jpayne@7 12 b"\x2b\x2f\x76\x39",
jpayne@7 13 b"\x2b\x2f\x76\x2b",
jpayne@7 14 b"\x2b\x2f\x76\x2f",
jpayne@7 15 b"\x2b\x2f\x76\x38\x2d",
jpayne@7 16 ],
jpayne@7 17 "gb18030": b"\x84\x31\x95\x33",
jpayne@7 18 "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE],
jpayne@7 19 "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE],
jpayne@7 20 }
jpayne@7 21
jpayne@7 22 TOO_SMALL_SEQUENCE: int = 32
jpayne@7 23 TOO_BIG_SEQUENCE: int = int(10e6)
jpayne@7 24
jpayne@7 25 UTF8_MAXIMAL_ALLOCATION: int = 1_112_064
jpayne@7 26
jpayne@7 27 # Up-to-date Unicode ucd/15.0.0
jpayne@7 28 UNICODE_RANGES_COMBINED: Dict[str, range] = {
jpayne@7 29 "Control character": range(32),
jpayne@7 30 "Basic Latin": range(32, 128),
jpayne@7 31 "Latin-1 Supplement": range(128, 256),
jpayne@7 32 "Latin Extended-A": range(256, 384),
jpayne@7 33 "Latin Extended-B": range(384, 592),
jpayne@7 34 "IPA Extensions": range(592, 688),
jpayne@7 35 "Spacing Modifier Letters": range(688, 768),
jpayne@7 36 "Combining Diacritical Marks": range(768, 880),
jpayne@7 37 "Greek and Coptic": range(880, 1024),
jpayne@7 38 "Cyrillic": range(1024, 1280),
jpayne@7 39 "Cyrillic Supplement": range(1280, 1328),
jpayne@7 40 "Armenian": range(1328, 1424),
jpayne@7 41 "Hebrew": range(1424, 1536),
jpayne@7 42 "Arabic": range(1536, 1792),
jpayne@7 43 "Syriac": range(1792, 1872),
jpayne@7 44 "Arabic Supplement": range(1872, 1920),
jpayne@7 45 "Thaana": range(1920, 1984),
jpayne@7 46 "NKo": range(1984, 2048),
jpayne@7 47 "Samaritan": range(2048, 2112),
jpayne@7 48 "Mandaic": range(2112, 2144),
jpayne@7 49 "Syriac Supplement": range(2144, 2160),
jpayne@7 50 "Arabic Extended-B": range(2160, 2208),
jpayne@7 51 "Arabic Extended-A": range(2208, 2304),
jpayne@7 52 "Devanagari": range(2304, 2432),
jpayne@7 53 "Bengali": range(2432, 2560),
jpayne@7 54 "Gurmukhi": range(2560, 2688),
jpayne@7 55 "Gujarati": range(2688, 2816),
jpayne@7 56 "Oriya": range(2816, 2944),
jpayne@7 57 "Tamil": range(2944, 3072),
jpayne@7 58 "Telugu": range(3072, 3200),
jpayne@7 59 "Kannada": range(3200, 3328),
jpayne@7 60 "Malayalam": range(3328, 3456),
jpayne@7 61 "Sinhala": range(3456, 3584),
jpayne@7 62 "Thai": range(3584, 3712),
jpayne@7 63 "Lao": range(3712, 3840),
jpayne@7 64 "Tibetan": range(3840, 4096),
jpayne@7 65 "Myanmar": range(4096, 4256),
jpayne@7 66 "Georgian": range(4256, 4352),
jpayne@7 67 "Hangul Jamo": range(4352, 4608),
jpayne@7 68 "Ethiopic": range(4608, 4992),
jpayne@7 69 "Ethiopic Supplement": range(4992, 5024),
jpayne@7 70 "Cherokee": range(5024, 5120),
jpayne@7 71 "Unified Canadian Aboriginal Syllabics": range(5120, 5760),
jpayne@7 72 "Ogham": range(5760, 5792),
jpayne@7 73 "Runic": range(5792, 5888),
jpayne@7 74 "Tagalog": range(5888, 5920),
jpayne@7 75 "Hanunoo": range(5920, 5952),
jpayne@7 76 "Buhid": range(5952, 5984),
jpayne@7 77 "Tagbanwa": range(5984, 6016),
jpayne@7 78 "Khmer": range(6016, 6144),
jpayne@7 79 "Mongolian": range(6144, 6320),
jpayne@7 80 "Unified Canadian Aboriginal Syllabics Extended": range(6320, 6400),
jpayne@7 81 "Limbu": range(6400, 6480),
jpayne@7 82 "Tai Le": range(6480, 6528),
jpayne@7 83 "New Tai Lue": range(6528, 6624),
jpayne@7 84 "Khmer Symbols": range(6624, 6656),
jpayne@7 85 "Buginese": range(6656, 6688),
jpayne@7 86 "Tai Tham": range(6688, 6832),
jpayne@7 87 "Combining Diacritical Marks Extended": range(6832, 6912),
jpayne@7 88 "Balinese": range(6912, 7040),
jpayne@7 89 "Sundanese": range(7040, 7104),
jpayne@7 90 "Batak": range(7104, 7168),
jpayne@7 91 "Lepcha": range(7168, 7248),
jpayne@7 92 "Ol Chiki": range(7248, 7296),
jpayne@7 93 "Cyrillic Extended-C": range(7296, 7312),
jpayne@7 94 "Georgian Extended": range(7312, 7360),
jpayne@7 95 "Sundanese Supplement": range(7360, 7376),
jpayne@7 96 "Vedic Extensions": range(7376, 7424),
jpayne@7 97 "Phonetic Extensions": range(7424, 7552),
jpayne@7 98 "Phonetic Extensions Supplement": range(7552, 7616),
jpayne@7 99 "Combining Diacritical Marks Supplement": range(7616, 7680),
jpayne@7 100 "Latin Extended Additional": range(7680, 7936),
jpayne@7 101 "Greek Extended": range(7936, 8192),
jpayne@7 102 "General Punctuation": range(8192, 8304),
jpayne@7 103 "Superscripts and Subscripts": range(8304, 8352),
jpayne@7 104 "Currency Symbols": range(8352, 8400),
jpayne@7 105 "Combining Diacritical Marks for Symbols": range(8400, 8448),
jpayne@7 106 "Letterlike Symbols": range(8448, 8528),
jpayne@7 107 "Number Forms": range(8528, 8592),
jpayne@7 108 "Arrows": range(8592, 8704),
jpayne@7 109 "Mathematical Operators": range(8704, 8960),
jpayne@7 110 "Miscellaneous Technical": range(8960, 9216),
jpayne@7 111 "Control Pictures": range(9216, 9280),
jpayne@7 112 "Optical Character Recognition": range(9280, 9312),
jpayne@7 113 "Enclosed Alphanumerics": range(9312, 9472),
jpayne@7 114 "Box Drawing": range(9472, 9600),
jpayne@7 115 "Block Elements": range(9600, 9632),
jpayne@7 116 "Geometric Shapes": range(9632, 9728),
jpayne@7 117 "Miscellaneous Symbols": range(9728, 9984),
jpayne@7 118 "Dingbats": range(9984, 10176),
jpayne@7 119 "Miscellaneous Mathematical Symbols-A": range(10176, 10224),
jpayne@7 120 "Supplemental Arrows-A": range(10224, 10240),
jpayne@7 121 "Braille Patterns": range(10240, 10496),
jpayne@7 122 "Supplemental Arrows-B": range(10496, 10624),
jpayne@7 123 "Miscellaneous Mathematical Symbols-B": range(10624, 10752),
jpayne@7 124 "Supplemental Mathematical Operators": range(10752, 11008),
jpayne@7 125 "Miscellaneous Symbols and Arrows": range(11008, 11264),
jpayne@7 126 "Glagolitic": range(11264, 11360),
jpayne@7 127 "Latin Extended-C": range(11360, 11392),
jpayne@7 128 "Coptic": range(11392, 11520),
jpayne@7 129 "Georgian Supplement": range(11520, 11568),
jpayne@7 130 "Tifinagh": range(11568, 11648),
jpayne@7 131 "Ethiopic Extended": range(11648, 11744),
jpayne@7 132 "Cyrillic Extended-A": range(11744, 11776),
jpayne@7 133 "Supplemental Punctuation": range(11776, 11904),
jpayne@7 134 "CJK Radicals Supplement": range(11904, 12032),
jpayne@7 135 "Kangxi Radicals": range(12032, 12256),
jpayne@7 136 "Ideographic Description Characters": range(12272, 12288),
jpayne@7 137 "CJK Symbols and Punctuation": range(12288, 12352),
jpayne@7 138 "Hiragana": range(12352, 12448),
jpayne@7 139 "Katakana": range(12448, 12544),
jpayne@7 140 "Bopomofo": range(12544, 12592),
jpayne@7 141 "Hangul Compatibility Jamo": range(12592, 12688),
jpayne@7 142 "Kanbun": range(12688, 12704),
jpayne@7 143 "Bopomofo Extended": range(12704, 12736),
jpayne@7 144 "CJK Strokes": range(12736, 12784),
jpayne@7 145 "Katakana Phonetic Extensions": range(12784, 12800),
jpayne@7 146 "Enclosed CJK Letters and Months": range(12800, 13056),
jpayne@7 147 "CJK Compatibility": range(13056, 13312),
jpayne@7 148 "CJK Unified Ideographs Extension A": range(13312, 19904),
jpayne@7 149 "Yijing Hexagram Symbols": range(19904, 19968),
jpayne@7 150 "CJK Unified Ideographs": range(19968, 40960),
jpayne@7 151 "Yi Syllables": range(40960, 42128),
jpayne@7 152 "Yi Radicals": range(42128, 42192),
jpayne@7 153 "Lisu": range(42192, 42240),
jpayne@7 154 "Vai": range(42240, 42560),
jpayne@7 155 "Cyrillic Extended-B": range(42560, 42656),
jpayne@7 156 "Bamum": range(42656, 42752),
jpayne@7 157 "Modifier Tone Letters": range(42752, 42784),
jpayne@7 158 "Latin Extended-D": range(42784, 43008),
jpayne@7 159 "Syloti Nagri": range(43008, 43056),
jpayne@7 160 "Common Indic Number Forms": range(43056, 43072),
jpayne@7 161 "Phags-pa": range(43072, 43136),
jpayne@7 162 "Saurashtra": range(43136, 43232),
jpayne@7 163 "Devanagari Extended": range(43232, 43264),
jpayne@7 164 "Kayah Li": range(43264, 43312),
jpayne@7 165 "Rejang": range(43312, 43360),
jpayne@7 166 "Hangul Jamo Extended-A": range(43360, 43392),
jpayne@7 167 "Javanese": range(43392, 43488),
jpayne@7 168 "Myanmar Extended-B": range(43488, 43520),
jpayne@7 169 "Cham": range(43520, 43616),
jpayne@7 170 "Myanmar Extended-A": range(43616, 43648),
jpayne@7 171 "Tai Viet": range(43648, 43744),
jpayne@7 172 "Meetei Mayek Extensions": range(43744, 43776),
jpayne@7 173 "Ethiopic Extended-A": range(43776, 43824),
jpayne@7 174 "Latin Extended-E": range(43824, 43888),
jpayne@7 175 "Cherokee Supplement": range(43888, 43968),
jpayne@7 176 "Meetei Mayek": range(43968, 44032),
jpayne@7 177 "Hangul Syllables": range(44032, 55216),
jpayne@7 178 "Hangul Jamo Extended-B": range(55216, 55296),
jpayne@7 179 "High Surrogates": range(55296, 56192),
jpayne@7 180 "High Private Use Surrogates": range(56192, 56320),
jpayne@7 181 "Low Surrogates": range(56320, 57344),
jpayne@7 182 "Private Use Area": range(57344, 63744),
jpayne@7 183 "CJK Compatibility Ideographs": range(63744, 64256),
jpayne@7 184 "Alphabetic Presentation Forms": range(64256, 64336),
jpayne@7 185 "Arabic Presentation Forms-A": range(64336, 65024),
jpayne@7 186 "Variation Selectors": range(65024, 65040),
jpayne@7 187 "Vertical Forms": range(65040, 65056),
jpayne@7 188 "Combining Half Marks": range(65056, 65072),
jpayne@7 189 "CJK Compatibility Forms": range(65072, 65104),
jpayne@7 190 "Small Form Variants": range(65104, 65136),
jpayne@7 191 "Arabic Presentation Forms-B": range(65136, 65280),
jpayne@7 192 "Halfwidth and Fullwidth Forms": range(65280, 65520),
jpayne@7 193 "Specials": range(65520, 65536),
jpayne@7 194 "Linear B Syllabary": range(65536, 65664),
jpayne@7 195 "Linear B Ideograms": range(65664, 65792),
jpayne@7 196 "Aegean Numbers": range(65792, 65856),
jpayne@7 197 "Ancient Greek Numbers": range(65856, 65936),
jpayne@7 198 "Ancient Symbols": range(65936, 66000),
jpayne@7 199 "Phaistos Disc": range(66000, 66048),
jpayne@7 200 "Lycian": range(66176, 66208),
jpayne@7 201 "Carian": range(66208, 66272),
jpayne@7 202 "Coptic Epact Numbers": range(66272, 66304),
jpayne@7 203 "Old Italic": range(66304, 66352),
jpayne@7 204 "Gothic": range(66352, 66384),
jpayne@7 205 "Old Permic": range(66384, 66432),
jpayne@7 206 "Ugaritic": range(66432, 66464),
jpayne@7 207 "Old Persian": range(66464, 66528),
jpayne@7 208 "Deseret": range(66560, 66640),
jpayne@7 209 "Shavian": range(66640, 66688),
jpayne@7 210 "Osmanya": range(66688, 66736),
jpayne@7 211 "Osage": range(66736, 66816),
jpayne@7 212 "Elbasan": range(66816, 66864),
jpayne@7 213 "Caucasian Albanian": range(66864, 66928),
jpayne@7 214 "Vithkuqi": range(66928, 67008),
jpayne@7 215 "Linear A": range(67072, 67456),
jpayne@7 216 "Latin Extended-F": range(67456, 67520),
jpayne@7 217 "Cypriot Syllabary": range(67584, 67648),
jpayne@7 218 "Imperial Aramaic": range(67648, 67680),
jpayne@7 219 "Palmyrene": range(67680, 67712),
jpayne@7 220 "Nabataean": range(67712, 67760),
jpayne@7 221 "Hatran": range(67808, 67840),
jpayne@7 222 "Phoenician": range(67840, 67872),
jpayne@7 223 "Lydian": range(67872, 67904),
jpayne@7 224 "Meroitic Hieroglyphs": range(67968, 68000),
jpayne@7 225 "Meroitic Cursive": range(68000, 68096),
jpayne@7 226 "Kharoshthi": range(68096, 68192),
jpayne@7 227 "Old South Arabian": range(68192, 68224),
jpayne@7 228 "Old North Arabian": range(68224, 68256),
jpayne@7 229 "Manichaean": range(68288, 68352),
jpayne@7 230 "Avestan": range(68352, 68416),
jpayne@7 231 "Inscriptional Parthian": range(68416, 68448),
jpayne@7 232 "Inscriptional Pahlavi": range(68448, 68480),
jpayne@7 233 "Psalter Pahlavi": range(68480, 68528),
jpayne@7 234 "Old Turkic": range(68608, 68688),
jpayne@7 235 "Old Hungarian": range(68736, 68864),
jpayne@7 236 "Hanifi Rohingya": range(68864, 68928),
jpayne@7 237 "Rumi Numeral Symbols": range(69216, 69248),
jpayne@7 238 "Yezidi": range(69248, 69312),
jpayne@7 239 "Arabic Extended-C": range(69312, 69376),
jpayne@7 240 "Old Sogdian": range(69376, 69424),
jpayne@7 241 "Sogdian": range(69424, 69488),
jpayne@7 242 "Old Uyghur": range(69488, 69552),
jpayne@7 243 "Chorasmian": range(69552, 69600),
jpayne@7 244 "Elymaic": range(69600, 69632),
jpayne@7 245 "Brahmi": range(69632, 69760),
jpayne@7 246 "Kaithi": range(69760, 69840),
jpayne@7 247 "Sora Sompeng": range(69840, 69888),
jpayne@7 248 "Chakma": range(69888, 69968),
jpayne@7 249 "Mahajani": range(69968, 70016),
jpayne@7 250 "Sharada": range(70016, 70112),
jpayne@7 251 "Sinhala Archaic Numbers": range(70112, 70144),
jpayne@7 252 "Khojki": range(70144, 70224),
jpayne@7 253 "Multani": range(70272, 70320),
jpayne@7 254 "Khudawadi": range(70320, 70400),
jpayne@7 255 "Grantha": range(70400, 70528),
jpayne@7 256 "Newa": range(70656, 70784),
jpayne@7 257 "Tirhuta": range(70784, 70880),
jpayne@7 258 "Siddham": range(71040, 71168),
jpayne@7 259 "Modi": range(71168, 71264),
jpayne@7 260 "Mongolian Supplement": range(71264, 71296),
jpayne@7 261 "Takri": range(71296, 71376),
jpayne@7 262 "Ahom": range(71424, 71504),
jpayne@7 263 "Dogra": range(71680, 71760),
jpayne@7 264 "Warang Citi": range(71840, 71936),
jpayne@7 265 "Dives Akuru": range(71936, 72032),
jpayne@7 266 "Nandinagari": range(72096, 72192),
jpayne@7 267 "Zanabazar Square": range(72192, 72272),
jpayne@7 268 "Soyombo": range(72272, 72368),
jpayne@7 269 "Unified Canadian Aboriginal Syllabics Extended-A": range(72368, 72384),
jpayne@7 270 "Pau Cin Hau": range(72384, 72448),
jpayne@7 271 "Devanagari Extended-A": range(72448, 72544),
jpayne@7 272 "Bhaiksuki": range(72704, 72816),
jpayne@7 273 "Marchen": range(72816, 72896),
jpayne@7 274 "Masaram Gondi": range(72960, 73056),
jpayne@7 275 "Gunjala Gondi": range(73056, 73136),
jpayne@7 276 "Makasar": range(73440, 73472),
jpayne@7 277 "Kawi": range(73472, 73568),
jpayne@7 278 "Lisu Supplement": range(73648, 73664),
jpayne@7 279 "Tamil Supplement": range(73664, 73728),
jpayne@7 280 "Cuneiform": range(73728, 74752),
jpayne@7 281 "Cuneiform Numbers and Punctuation": range(74752, 74880),
jpayne@7 282 "Early Dynastic Cuneiform": range(74880, 75088),
jpayne@7 283 "Cypro-Minoan": range(77712, 77824),
jpayne@7 284 "Egyptian Hieroglyphs": range(77824, 78896),
jpayne@7 285 "Egyptian Hieroglyph Format Controls": range(78896, 78944),
jpayne@7 286 "Anatolian Hieroglyphs": range(82944, 83584),
jpayne@7 287 "Bamum Supplement": range(92160, 92736),
jpayne@7 288 "Mro": range(92736, 92784),
jpayne@7 289 "Tangsa": range(92784, 92880),
jpayne@7 290 "Bassa Vah": range(92880, 92928),
jpayne@7 291 "Pahawh Hmong": range(92928, 93072),
jpayne@7 292 "Medefaidrin": range(93760, 93856),
jpayne@7 293 "Miao": range(93952, 94112),
jpayne@7 294 "Ideographic Symbols and Punctuation": range(94176, 94208),
jpayne@7 295 "Tangut": range(94208, 100352),
jpayne@7 296 "Tangut Components": range(100352, 101120),
jpayne@7 297 "Khitan Small Script": range(101120, 101632),
jpayne@7 298 "Tangut Supplement": range(101632, 101760),
jpayne@7 299 "Kana Extended-B": range(110576, 110592),
jpayne@7 300 "Kana Supplement": range(110592, 110848),
jpayne@7 301 "Kana Extended-A": range(110848, 110896),
jpayne@7 302 "Small Kana Extension": range(110896, 110960),
jpayne@7 303 "Nushu": range(110960, 111360),
jpayne@7 304 "Duployan": range(113664, 113824),
jpayne@7 305 "Shorthand Format Controls": range(113824, 113840),
jpayne@7 306 "Znamenny Musical Notation": range(118528, 118736),
jpayne@7 307 "Byzantine Musical Symbols": range(118784, 119040),
jpayne@7 308 "Musical Symbols": range(119040, 119296),
jpayne@7 309 "Ancient Greek Musical Notation": range(119296, 119376),
jpayne@7 310 "Kaktovik Numerals": range(119488, 119520),
jpayne@7 311 "Mayan Numerals": range(119520, 119552),
jpayne@7 312 "Tai Xuan Jing Symbols": range(119552, 119648),
jpayne@7 313 "Counting Rod Numerals": range(119648, 119680),
jpayne@7 314 "Mathematical Alphanumeric Symbols": range(119808, 120832),
jpayne@7 315 "Sutton SignWriting": range(120832, 121520),
jpayne@7 316 "Latin Extended-G": range(122624, 122880),
jpayne@7 317 "Glagolitic Supplement": range(122880, 122928),
jpayne@7 318 "Cyrillic Extended-D": range(122928, 123024),
jpayne@7 319 "Nyiakeng Puachue Hmong": range(123136, 123216),
jpayne@7 320 "Toto": range(123536, 123584),
jpayne@7 321 "Wancho": range(123584, 123648),
jpayne@7 322 "Nag Mundari": range(124112, 124160),
jpayne@7 323 "Ethiopic Extended-B": range(124896, 124928),
jpayne@7 324 "Mende Kikakui": range(124928, 125152),
jpayne@7 325 "Adlam": range(125184, 125280),
jpayne@7 326 "Indic Siyaq Numbers": range(126064, 126144),
jpayne@7 327 "Ottoman Siyaq Numbers": range(126208, 126288),
jpayne@7 328 "Arabic Mathematical Alphabetic Symbols": range(126464, 126720),
jpayne@7 329 "Mahjong Tiles": range(126976, 127024),
jpayne@7 330 "Domino Tiles": range(127024, 127136),
jpayne@7 331 "Playing Cards": range(127136, 127232),
jpayne@7 332 "Enclosed Alphanumeric Supplement": range(127232, 127488),
jpayne@7 333 "Enclosed Ideographic Supplement": range(127488, 127744),
jpayne@7 334 "Miscellaneous Symbols and Pictographs": range(127744, 128512),
jpayne@7 335 "Emoticons range(Emoji)": range(128512, 128592),
jpayne@7 336 "Ornamental Dingbats": range(128592, 128640),
jpayne@7 337 "Transport and Map Symbols": range(128640, 128768),
jpayne@7 338 "Alchemical Symbols": range(128768, 128896),
jpayne@7 339 "Geometric Shapes Extended": range(128896, 129024),
jpayne@7 340 "Supplemental Arrows-C": range(129024, 129280),
jpayne@7 341 "Supplemental Symbols and Pictographs": range(129280, 129536),
jpayne@7 342 "Chess Symbols": range(129536, 129648),
jpayne@7 343 "Symbols and Pictographs Extended-A": range(129648, 129792),
jpayne@7 344 "Symbols for Legacy Computing": range(129792, 130048),
jpayne@7 345 "CJK Unified Ideographs Extension B": range(131072, 173792),
jpayne@7 346 "CJK Unified Ideographs Extension C": range(173824, 177984),
jpayne@7 347 "CJK Unified Ideographs Extension D": range(177984, 178208),
jpayne@7 348 "CJK Unified Ideographs Extension E": range(178208, 183984),
jpayne@7 349 "CJK Unified Ideographs Extension F": range(183984, 191472),
jpayne@7 350 "CJK Compatibility Ideographs Supplement": range(194560, 195104),
jpayne@7 351 "CJK Unified Ideographs Extension G": range(196608, 201552),
jpayne@7 352 "CJK Unified Ideographs Extension H": range(201552, 205744),
jpayne@7 353 "Tags": range(917504, 917632),
jpayne@7 354 "Variation Selectors Supplement": range(917760, 918000),
jpayne@7 355 "Supplementary Private Use Area-A": range(983040, 1048576),
jpayne@7 356 "Supplementary Private Use Area-B": range(1048576, 1114112),
jpayne@7 357 }
jpayne@7 358
jpayne@7 359
jpayne@7 360 UNICODE_SECONDARY_RANGE_KEYWORD: List[str] = [
jpayne@7 361 "Supplement",
jpayne@7 362 "Extended",
jpayne@7 363 "Extensions",
jpayne@7 364 "Modifier",
jpayne@7 365 "Marks",
jpayne@7 366 "Punctuation",
jpayne@7 367 "Symbols",
jpayne@7 368 "Forms",
jpayne@7 369 "Operators",
jpayne@7 370 "Miscellaneous",
jpayne@7 371 "Drawing",
jpayne@7 372 "Block",
jpayne@7 373 "Shapes",
jpayne@7 374 "Supplemental",
jpayne@7 375 "Tags",
jpayne@7 376 ]
jpayne@7 377
jpayne@7 378 RE_POSSIBLE_ENCODING_INDICATION = re_compile(
jpayne@7 379 r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)",
jpayne@7 380 IGNORECASE,
jpayne@7 381 )
jpayne@7 382
jpayne@7 383 IANA_NO_ALIASES = [
jpayne@7 384 "cp720",
jpayne@7 385 "cp737",
jpayne@7 386 "cp856",
jpayne@7 387 "cp874",
jpayne@7 388 "cp875",
jpayne@7 389 "cp1006",
jpayne@7 390 "koi8_r",
jpayne@7 391 "koi8_t",
jpayne@7 392 "koi8_u",
jpayne@7 393 ]
jpayne@7 394
jpayne@7 395 IANA_SUPPORTED: List[str] = sorted(
jpayne@7 396 filter(
jpayne@7 397 lambda x: x.endswith("_codec") is False
jpayne@7 398 and x not in {"rot_13", "tactis", "mbcs"},
jpayne@7 399 list(set(aliases.values())) + IANA_NO_ALIASES,
jpayne@7 400 )
jpayne@7 401 )
jpayne@7 402
jpayne@7 403 IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)
jpayne@7 404
jpayne@7 405 # pre-computed code page that are similar using the function cp_similarity.
jpayne@7 406 IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = {
jpayne@7 407 "cp037": ["cp1026", "cp1140", "cp273", "cp500"],
jpayne@7 408 "cp1026": ["cp037", "cp1140", "cp273", "cp500"],
jpayne@7 409 "cp1125": ["cp866"],
jpayne@7 410 "cp1140": ["cp037", "cp1026", "cp273", "cp500"],
jpayne@7 411 "cp1250": ["iso8859_2"],
jpayne@7 412 "cp1251": ["kz1048", "ptcp154"],
jpayne@7 413 "cp1252": ["iso8859_15", "iso8859_9", "latin_1"],
jpayne@7 414 "cp1253": ["iso8859_7"],
jpayne@7 415 "cp1254": ["iso8859_15", "iso8859_9", "latin_1"],
jpayne@7 416 "cp1257": ["iso8859_13"],
jpayne@7 417 "cp273": ["cp037", "cp1026", "cp1140", "cp500"],
jpayne@7 418 "cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"],
jpayne@7 419 "cp500": ["cp037", "cp1026", "cp1140", "cp273"],
jpayne@7 420 "cp850": ["cp437", "cp857", "cp858", "cp865"],
jpayne@7 421 "cp857": ["cp850", "cp858", "cp865"],
jpayne@7 422 "cp858": ["cp437", "cp850", "cp857", "cp865"],
jpayne@7 423 "cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"],
jpayne@7 424 "cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"],
jpayne@7 425 "cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"],
jpayne@7 426 "cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"],
jpayne@7 427 "cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"],
jpayne@7 428 "cp866": ["cp1125"],
jpayne@7 429 "iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"],
jpayne@7 430 "iso8859_11": ["tis_620"],
jpayne@7 431 "iso8859_13": ["cp1257"],
jpayne@7 432 "iso8859_14": [
jpayne@7 433 "iso8859_10",
jpayne@7 434 "iso8859_15",
jpayne@7 435 "iso8859_16",
jpayne@7 436 "iso8859_3",
jpayne@7 437 "iso8859_9",
jpayne@7 438 "latin_1",
jpayne@7 439 ],
jpayne@7 440 "iso8859_15": [
jpayne@7 441 "cp1252",
jpayne@7 442 "cp1254",
jpayne@7 443 "iso8859_10",
jpayne@7 444 "iso8859_14",
jpayne@7 445 "iso8859_16",
jpayne@7 446 "iso8859_3",
jpayne@7 447 "iso8859_9",
jpayne@7 448 "latin_1",
jpayne@7 449 ],
jpayne@7 450 "iso8859_16": [
jpayne@7 451 "iso8859_14",
jpayne@7 452 "iso8859_15",
jpayne@7 453 "iso8859_2",
jpayne@7 454 "iso8859_3",
jpayne@7 455 "iso8859_9",
jpayne@7 456 "latin_1",
jpayne@7 457 ],
jpayne@7 458 "iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"],
jpayne@7 459 "iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"],
jpayne@7 460 "iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"],
jpayne@7 461 "iso8859_7": ["cp1253"],
jpayne@7 462 "iso8859_9": [
jpayne@7 463 "cp1252",
jpayne@7 464 "cp1254",
jpayne@7 465 "cp1258",
jpayne@7 466 "iso8859_10",
jpayne@7 467 "iso8859_14",
jpayne@7 468 "iso8859_15",
jpayne@7 469 "iso8859_16",
jpayne@7 470 "iso8859_3",
jpayne@7 471 "iso8859_4",
jpayne@7 472 "latin_1",
jpayne@7 473 ],
jpayne@7 474 "kz1048": ["cp1251", "ptcp154"],
jpayne@7 475 "latin_1": [
jpayne@7 476 "cp1252",
jpayne@7 477 "cp1254",
jpayne@7 478 "cp1258",
jpayne@7 479 "iso8859_10",
jpayne@7 480 "iso8859_14",
jpayne@7 481 "iso8859_15",
jpayne@7 482 "iso8859_16",
jpayne@7 483 "iso8859_3",
jpayne@7 484 "iso8859_4",
jpayne@7 485 "iso8859_9",
jpayne@7 486 ],
jpayne@7 487 "mac_iceland": ["mac_roman", "mac_turkish"],
jpayne@7 488 "mac_roman": ["mac_iceland", "mac_turkish"],
jpayne@7 489 "mac_turkish": ["mac_iceland", "mac_roman"],
jpayne@7 490 "ptcp154": ["cp1251", "kz1048"],
jpayne@7 491 "tis_620": ["iso8859_11"],
jpayne@7 492 }
jpayne@7 493
jpayne@7 494
jpayne@7 495 CHARDET_CORRESPONDENCE: Dict[str, str] = {
jpayne@7 496 "iso2022_kr": "ISO-2022-KR",
jpayne@7 497 "iso2022_jp": "ISO-2022-JP",
jpayne@7 498 "euc_kr": "EUC-KR",
jpayne@7 499 "tis_620": "TIS-620",
jpayne@7 500 "utf_32": "UTF-32",
jpayne@7 501 "euc_jp": "EUC-JP",
jpayne@7 502 "koi8_r": "KOI8-R",
jpayne@7 503 "iso8859_1": "ISO-8859-1",
jpayne@7 504 "iso8859_2": "ISO-8859-2",
jpayne@7 505 "iso8859_5": "ISO-8859-5",
jpayne@7 506 "iso8859_6": "ISO-8859-6",
jpayne@7 507 "iso8859_7": "ISO-8859-7",
jpayne@7 508 "iso8859_8": "ISO-8859-8",
jpayne@7 509 "utf_16": "UTF-16",
jpayne@7 510 "cp855": "IBM855",
jpayne@7 511 "mac_cyrillic": "MacCyrillic",
jpayne@7 512 "gb2312": "GB2312",
jpayne@7 513 "gb18030": "GB18030",
jpayne@7 514 "cp932": "CP932",
jpayne@7 515 "cp866": "IBM866",
jpayne@7 516 "utf_8": "utf-8",
jpayne@7 517 "utf_8_sig": "UTF-8-SIG",
jpayne@7 518 "shift_jis": "SHIFT_JIS",
jpayne@7 519 "big5": "Big5",
jpayne@7 520 "cp1250": "windows-1250",
jpayne@7 521 "cp1251": "windows-1251",
jpayne@7 522 "cp1252": "Windows-1252",
jpayne@7 523 "cp1253": "windows-1253",
jpayne@7 524 "cp1255": "windows-1255",
jpayne@7 525 "cp1256": "windows-1256",
jpayne@7 526 "cp1254": "Windows-1254",
jpayne@7 527 "cp949": "CP949",
jpayne@7 528 }
jpayne@7 529
jpayne@7 530
jpayne@7 531 COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
jpayne@7 532 "<",
jpayne@7 533 ">",
jpayne@7 534 "=",
jpayne@7 535 ":",
jpayne@7 536 "/",
jpayne@7 537 "&",
jpayne@7 538 ";",
jpayne@7 539 "{",
jpayne@7 540 "}",
jpayne@7 541 "[",
jpayne@7 542 "]",
jpayne@7 543 ",",
jpayne@7 544 "|",
jpayne@7 545 '"',
jpayne@7 546 "-",
jpayne@7 547 }
jpayne@7 548
jpayne@7 549
jpayne@7 550 KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
jpayne@7 551 ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
jpayne@7 552
jpayne@7 553 # Logging LEVEL below DEBUG
jpayne@7 554 TRACE: int = 5
jpayne@7 555
jpayne@7 556
jpayne@7 557 # Language label that contain the em dash "—"
jpayne@7 558 # character are to be considered alternative seq to origin
jpayne@7 559 FREQUENCIES: Dict[str, List[str]] = {
jpayne@7 560 "English": [
jpayne@7 561 "e",
jpayne@7 562 "a",
jpayne@7 563 "t",
jpayne@7 564 "i",
jpayne@7 565 "o",
jpayne@7 566 "n",
jpayne@7 567 "s",
jpayne@7 568 "r",
jpayne@7 569 "h",
jpayne@7 570 "l",
jpayne@7 571 "d",
jpayne@7 572 "c",
jpayne@7 573 "u",
jpayne@7 574 "m",
jpayne@7 575 "f",
jpayne@7 576 "p",
jpayne@7 577 "g",
jpayne@7 578 "w",
jpayne@7 579 "y",
jpayne@7 580 "b",
jpayne@7 581 "v",
jpayne@7 582 "k",
jpayne@7 583 "x",
jpayne@7 584 "j",
jpayne@7 585 "z",
jpayne@7 586 "q",
jpayne@7 587 ],
jpayne@7 588 "English—": [
jpayne@7 589 "e",
jpayne@7 590 "a",
jpayne@7 591 "t",
jpayne@7 592 "i",
jpayne@7 593 "o",
jpayne@7 594 "n",
jpayne@7 595 "s",
jpayne@7 596 "r",
jpayne@7 597 "h",
jpayne@7 598 "l",
jpayne@7 599 "d",
jpayne@7 600 "c",
jpayne@7 601 "m",
jpayne@7 602 "u",
jpayne@7 603 "f",
jpayne@7 604 "p",
jpayne@7 605 "g",
jpayne@7 606 "w",
jpayne@7 607 "b",
jpayne@7 608 "y",
jpayne@7 609 "v",
jpayne@7 610 "k",
jpayne@7 611 "j",
jpayne@7 612 "x",
jpayne@7 613 "z",
jpayne@7 614 "q",
jpayne@7 615 ],
jpayne@7 616 "German": [
jpayne@7 617 "e",
jpayne@7 618 "n",
jpayne@7 619 "i",
jpayne@7 620 "r",
jpayne@7 621 "s",
jpayne@7 622 "t",
jpayne@7 623 "a",
jpayne@7 624 "d",
jpayne@7 625 "h",
jpayne@7 626 "u",
jpayne@7 627 "l",
jpayne@7 628 "g",
jpayne@7 629 "o",
jpayne@7 630 "c",
jpayne@7 631 "m",
jpayne@7 632 "b",
jpayne@7 633 "f",
jpayne@7 634 "k",
jpayne@7 635 "w",
jpayne@7 636 "z",
jpayne@7 637 "p",
jpayne@7 638 "v",
jpayne@7 639 "ü",
jpayne@7 640 "ä",
jpayne@7 641 "ö",
jpayne@7 642 "j",
jpayne@7 643 ],
jpayne@7 644 "French": [
jpayne@7 645 "e",
jpayne@7 646 "a",
jpayne@7 647 "s",
jpayne@7 648 "n",
jpayne@7 649 "i",
jpayne@7 650 "t",
jpayne@7 651 "r",
jpayne@7 652 "l",
jpayne@7 653 "u",
jpayne@7 654 "o",
jpayne@7 655 "d",
jpayne@7 656 "c",
jpayne@7 657 "p",
jpayne@7 658 "m",
jpayne@7 659 "é",
jpayne@7 660 "v",
jpayne@7 661 "g",
jpayne@7 662 "f",
jpayne@7 663 "b",
jpayne@7 664 "h",
jpayne@7 665 "q",
jpayne@7 666 "à",
jpayne@7 667 "x",
jpayne@7 668 "è",
jpayne@7 669 "y",
jpayne@7 670 "j",
jpayne@7 671 ],
jpayne@7 672 "Dutch": [
jpayne@7 673 "e",
jpayne@7 674 "n",
jpayne@7 675 "a",
jpayne@7 676 "i",
jpayne@7 677 "r",
jpayne@7 678 "t",
jpayne@7 679 "o",
jpayne@7 680 "d",
jpayne@7 681 "s",
jpayne@7 682 "l",
jpayne@7 683 "g",
jpayne@7 684 "h",
jpayne@7 685 "v",
jpayne@7 686 "m",
jpayne@7 687 "u",
jpayne@7 688 "k",
jpayne@7 689 "c",
jpayne@7 690 "p",
jpayne@7 691 "b",
jpayne@7 692 "w",
jpayne@7 693 "j",
jpayne@7 694 "z",
jpayne@7 695 "f",
jpayne@7 696 "y",
jpayne@7 697 "x",
jpayne@7 698 "ë",
jpayne@7 699 ],
jpayne@7 700 "Italian": [
jpayne@7 701 "e",
jpayne@7 702 "i",
jpayne@7 703 "a",
jpayne@7 704 "o",
jpayne@7 705 "n",
jpayne@7 706 "l",
jpayne@7 707 "t",
jpayne@7 708 "r",
jpayne@7 709 "s",
jpayne@7 710 "c",
jpayne@7 711 "d",
jpayne@7 712 "u",
jpayne@7 713 "p",
jpayne@7 714 "m",
jpayne@7 715 "g",
jpayne@7 716 "v",
jpayne@7 717 "f",
jpayne@7 718 "b",
jpayne@7 719 "z",
jpayne@7 720 "h",
jpayne@7 721 "q",
jpayne@7 722 "è",
jpayne@7 723 "à",
jpayne@7 724 "k",
jpayne@7 725 "y",
jpayne@7 726 "ò",
jpayne@7 727 ],
jpayne@7 728 "Polish": [
jpayne@7 729 "a",
jpayne@7 730 "i",
jpayne@7 731 "o",
jpayne@7 732 "e",
jpayne@7 733 "n",
jpayne@7 734 "r",
jpayne@7 735 "z",
jpayne@7 736 "w",
jpayne@7 737 "s",
jpayne@7 738 "c",
jpayne@7 739 "t",
jpayne@7 740 "k",
jpayne@7 741 "y",
jpayne@7 742 "d",
jpayne@7 743 "p",
jpayne@7 744 "m",
jpayne@7 745 "u",
jpayne@7 746 "l",
jpayne@7 747 "j",
jpayne@7 748 "ł",
jpayne@7 749 "g",
jpayne@7 750 "b",
jpayne@7 751 "h",
jpayne@7 752 "ą",
jpayne@7 753 "ę",
jpayne@7 754 "ó",
jpayne@7 755 ],
jpayne@7 756 "Spanish": [
jpayne@7 757 "e",
jpayne@7 758 "a",
jpayne@7 759 "o",
jpayne@7 760 "n",
jpayne@7 761 "s",
jpayne@7 762 "r",
jpayne@7 763 "i",
jpayne@7 764 "l",
jpayne@7 765 "d",
jpayne@7 766 "t",
jpayne@7 767 "c",
jpayne@7 768 "u",
jpayne@7 769 "m",
jpayne@7 770 "p",
jpayne@7 771 "b",
jpayne@7 772 "g",
jpayne@7 773 "v",
jpayne@7 774 "f",
jpayne@7 775 "y",
jpayne@7 776 "ó",
jpayne@7 777 "h",
jpayne@7 778 "q",
jpayne@7 779 "í",
jpayne@7 780 "j",
jpayne@7 781 "z",
jpayne@7 782 "á",
jpayne@7 783 ],
jpayne@7 784 "Russian": [
jpayne@7 785 "о",
jpayne@7 786 "а",
jpayne@7 787 "е",
jpayne@7 788 "и",
jpayne@7 789 "н",
jpayne@7 790 "с",
jpayne@7 791 "т",
jpayne@7 792 "р",
jpayne@7 793 "в",
jpayne@7 794 "л",
jpayne@7 795 "к",
jpayne@7 796 "м",
jpayne@7 797 "д",
jpayne@7 798 "п",
jpayne@7 799 "у",
jpayne@7 800 "г",
jpayne@7 801 "я",
jpayne@7 802 "ы",
jpayne@7 803 "з",
jpayne@7 804 "б",
jpayne@7 805 "й",
jpayne@7 806 "ь",
jpayne@7 807 "ч",
jpayne@7 808 "х",
jpayne@7 809 "ж",
jpayne@7 810 "ц",
jpayne@7 811 ],
jpayne@7 812 # Jap-Kanji
jpayne@7 813 "Japanese": [
jpayne@7 814 "人",
jpayne@7 815 "一",
jpayne@7 816 "大",
jpayne@7 817 "亅",
jpayne@7 818 "丁",
jpayne@7 819 "丨",
jpayne@7 820 "竹",
jpayne@7 821 "笑",
jpayne@7 822 "口",
jpayne@7 823 "日",
jpayne@7 824 "今",
jpayne@7 825 "二",
jpayne@7 826 "彳",
jpayne@7 827 "行",
jpayne@7 828 "十",
jpayne@7 829 "土",
jpayne@7 830 "丶",
jpayne@7 831 "寸",
jpayne@7 832 "寺",
jpayne@7 833 "時",
jpayne@7 834 "乙",
jpayne@7 835 "丿",
jpayne@7 836 "乂",
jpayne@7 837 "气",
jpayne@7 838 "気",
jpayne@7 839 "冂",
jpayne@7 840 "巾",
jpayne@7 841 "亠",
jpayne@7 842 "市",
jpayne@7 843 "目",
jpayne@7 844 "儿",
jpayne@7 845 "見",
jpayne@7 846 "八",
jpayne@7 847 "小",
jpayne@7 848 "凵",
jpayne@7 849 "県",
jpayne@7 850 "月",
jpayne@7 851 "彐",
jpayne@7 852 "門",
jpayne@7 853 "間",
jpayne@7 854 "木",
jpayne@7 855 "東",
jpayne@7 856 "山",
jpayne@7 857 "出",
jpayne@7 858 "本",
jpayne@7 859 "中",
jpayne@7 860 "刀",
jpayne@7 861 "分",
jpayne@7 862 "耳",
jpayne@7 863 "又",
jpayne@7 864 "取",
jpayne@7 865 "最",
jpayne@7 866 "言",
jpayne@7 867 "田",
jpayne@7 868 "心",
jpayne@7 869 "思",
jpayne@7 870 "刂",
jpayne@7 871 "前",
jpayne@7 872 "京",
jpayne@7 873 "尹",
jpayne@7 874 "事",
jpayne@7 875 "生",
jpayne@7 876 "厶",
jpayne@7 877 "云",
jpayne@7 878 "会",
jpayne@7 879 "未",
jpayne@7 880 "来",
jpayne@7 881 "白",
jpayne@7 882 "冫",
jpayne@7 883 "楽",
jpayne@7 884 "灬",
jpayne@7 885 "馬",
jpayne@7 886 "尸",
jpayne@7 887 "尺",
jpayne@7 888 "駅",
jpayne@7 889 "明",
jpayne@7 890 "耂",
jpayne@7 891 "者",
jpayne@7 892 "了",
jpayne@7 893 "阝",
jpayne@7 894 "都",
jpayne@7 895 "高",
jpayne@7 896 "卜",
jpayne@7 897 "占",
jpayne@7 898 "厂",
jpayne@7 899 "广",
jpayne@7 900 "店",
jpayne@7 901 "子",
jpayne@7 902 "申",
jpayne@7 903 "奄",
jpayne@7 904 "亻",
jpayne@7 905 "俺",
jpayne@7 906 "上",
jpayne@7 907 "方",
jpayne@7 908 "冖",
jpayne@7 909 "学",
jpayne@7 910 "衣",
jpayne@7 911 "艮",
jpayne@7 912 "食",
jpayne@7 913 "自",
jpayne@7 914 ],
jpayne@7 915 # Jap-Katakana
jpayne@7 916 "Japanese—": [
jpayne@7 917 "ー",
jpayne@7 918 "ン",
jpayne@7 919 "ス",
jpayne@7 920 "・",
jpayne@7 921 "ル",
jpayne@7 922 "ト",
jpayne@7 923 "リ",
jpayne@7 924 "イ",
jpayne@7 925 "ア",
jpayne@7 926 "ラ",
jpayne@7 927 "ッ",
jpayne@7 928 "ク",
jpayne@7 929 "ド",
jpayne@7 930 "シ",
jpayne@7 931 "レ",
jpayne@7 932 "ジ",
jpayne@7 933 "タ",
jpayne@7 934 "フ",
jpayne@7 935 "ロ",
jpayne@7 936 "カ",
jpayne@7 937 "テ",
jpayne@7 938 "マ",
jpayne@7 939 "ィ",
jpayne@7 940 "グ",
jpayne@7 941 "バ",
jpayne@7 942 "ム",
jpayne@7 943 "プ",
jpayne@7 944 "オ",
jpayne@7 945 "コ",
jpayne@7 946 "デ",
jpayne@7 947 "ニ",
jpayne@7 948 "ウ",
jpayne@7 949 "メ",
jpayne@7 950 "サ",
jpayne@7 951 "ビ",
jpayne@7 952 "ナ",
jpayne@7 953 "ブ",
jpayne@7 954 "ャ",
jpayne@7 955 "エ",
jpayne@7 956 "ュ",
jpayne@7 957 "チ",
jpayne@7 958 "キ",
jpayne@7 959 "ズ",
jpayne@7 960 "ダ",
jpayne@7 961 "パ",
jpayne@7 962 "ミ",
jpayne@7 963 "ェ",
jpayne@7 964 "ョ",
jpayne@7 965 "ハ",
jpayne@7 966 "セ",
jpayne@7 967 "ベ",
jpayne@7 968 "ガ",
jpayne@7 969 "モ",
jpayne@7 970 "ツ",
jpayne@7 971 "ネ",
jpayne@7 972 "ボ",
jpayne@7 973 "ソ",
jpayne@7 974 "ノ",
jpayne@7 975 "ァ",
jpayne@7 976 "ヴ",
jpayne@7 977 "ワ",
jpayne@7 978 "ポ",
jpayne@7 979 "ペ",
jpayne@7 980 "ピ",
jpayne@7 981 "ケ",
jpayne@7 982 "ゴ",
jpayne@7 983 "ギ",
jpayne@7 984 "ザ",
jpayne@7 985 "ホ",
jpayne@7 986 "ゲ",
jpayne@7 987 "ォ",
jpayne@7 988 "ヤ",
jpayne@7 989 "ヒ",
jpayne@7 990 "ユ",
jpayne@7 991 "ヨ",
jpayne@7 992 "ヘ",
jpayne@7 993 "ゼ",
jpayne@7 994 "ヌ",
jpayne@7 995 "ゥ",
jpayne@7 996 "ゾ",
jpayne@7 997 "ヶ",
jpayne@7 998 "ヂ",
jpayne@7 999 "ヲ",
jpayne@7 1000 "ヅ",
jpayne@7 1001 "ヵ",
jpayne@7 1002 "ヱ",
jpayne@7 1003 "ヰ",
jpayne@7 1004 "ヮ",
jpayne@7 1005 "ヽ",
jpayne@7 1006 "゠",
jpayne@7 1007 "ヾ",
jpayne@7 1008 "ヷ",
jpayne@7 1009 "ヿ",
jpayne@7 1010 "ヸ",
jpayne@7 1011 "ヹ",
jpayne@7 1012 "ヺ",
jpayne@7 1013 ],
jpayne@7 1014 # Jap-Hiragana
jpayne@7 1015 "Japanese——": [
jpayne@7 1016 "の",
jpayne@7 1017 "に",
jpayne@7 1018 "る",
jpayne@7 1019 "た",
jpayne@7 1020 "と",
jpayne@7 1021 "は",
jpayne@7 1022 "し",
jpayne@7 1023 "い",
jpayne@7 1024 "を",
jpayne@7 1025 "で",
jpayne@7 1026 "て",
jpayne@7 1027 "が",
jpayne@7 1028 "な",
jpayne@7 1029 "れ",
jpayne@7 1030 "か",
jpayne@7 1031 "ら",
jpayne@7 1032 "さ",
jpayne@7 1033 "っ",
jpayne@7 1034 "り",
jpayne@7 1035 "す",
jpayne@7 1036 "あ",
jpayne@7 1037 "も",
jpayne@7 1038 "こ",
jpayne@7 1039 "ま",
jpayne@7 1040 "う",
jpayne@7 1041 "く",
jpayne@7 1042 "よ",
jpayne@7 1043 "き",
jpayne@7 1044 "ん",
jpayne@7 1045 "め",
jpayne@7 1046 "お",
jpayne@7 1047 "け",
jpayne@7 1048 "そ",
jpayne@7 1049 "つ",
jpayne@7 1050 "だ",
jpayne@7 1051 "や",
jpayne@7 1052 "え",
jpayne@7 1053 "ど",
jpayne@7 1054 "わ",
jpayne@7 1055 "ち",
jpayne@7 1056 "み",
jpayne@7 1057 "せ",
jpayne@7 1058 "じ",
jpayne@7 1059 "ば",
jpayne@7 1060 "へ",
jpayne@7 1061 "び",
jpayne@7 1062 "ず",
jpayne@7 1063 "ろ",
jpayne@7 1064 "ほ",
jpayne@7 1065 "げ",
jpayne@7 1066 "む",
jpayne@7 1067 "べ",
jpayne@7 1068 "ひ",
jpayne@7 1069 "ょ",
jpayne@7 1070 "ゆ",
jpayne@7 1071 "ぶ",
jpayne@7 1072 "ご",
jpayne@7 1073 "ゃ",
jpayne@7 1074 "ね",
jpayne@7 1075 "ふ",
jpayne@7 1076 "ぐ",
jpayne@7 1077 "ぎ",
jpayne@7 1078 "ぼ",
jpayne@7 1079 "ゅ",
jpayne@7 1080 "づ",
jpayne@7 1081 "ざ",
jpayne@7 1082 "ぞ",
jpayne@7 1083 "ぬ",
jpayne@7 1084 "ぜ",
jpayne@7 1085 "ぱ",
jpayne@7 1086 "ぽ",
jpayne@7 1087 "ぷ",
jpayne@7 1088 "ぴ",
jpayne@7 1089 "ぃ",
jpayne@7 1090 "ぁ",
jpayne@7 1091 "ぇ",
jpayne@7 1092 "ぺ",
jpayne@7 1093 "ゞ",
jpayne@7 1094 "ぢ",
jpayne@7 1095 "ぉ",
jpayne@7 1096 "ぅ",
jpayne@7 1097 "ゐ",
jpayne@7 1098 "ゝ",
jpayne@7 1099 "ゑ",
jpayne@7 1100 "゛",
jpayne@7 1101 "゜",
jpayne@7 1102 "ゎ",
jpayne@7 1103 "ゔ",
jpayne@7 1104 "゚",
jpayne@7 1105 "ゟ",
jpayne@7 1106 "゙",
jpayne@7 1107 "ゕ",
jpayne@7 1108 "ゖ",
jpayne@7 1109 ],
jpayne@7 1110 "Portuguese": [
jpayne@7 1111 "a",
jpayne@7 1112 "e",
jpayne@7 1113 "o",
jpayne@7 1114 "s",
jpayne@7 1115 "i",
jpayne@7 1116 "r",
jpayne@7 1117 "d",
jpayne@7 1118 "n",
jpayne@7 1119 "t",
jpayne@7 1120 "m",
jpayne@7 1121 "u",
jpayne@7 1122 "c",
jpayne@7 1123 "l",
jpayne@7 1124 "p",
jpayne@7 1125 "g",
jpayne@7 1126 "v",
jpayne@7 1127 "b",
jpayne@7 1128 "f",
jpayne@7 1129 "h",
jpayne@7 1130 "ã",
jpayne@7 1131 "q",
jpayne@7 1132 "é",
jpayne@7 1133 "ç",
jpayne@7 1134 "á",
jpayne@7 1135 "z",
jpayne@7 1136 "í",
jpayne@7 1137 ],
jpayne@7 1138 "Swedish": [
jpayne@7 1139 "e",
jpayne@7 1140 "a",
jpayne@7 1141 "n",
jpayne@7 1142 "r",
jpayne@7 1143 "t",
jpayne@7 1144 "s",
jpayne@7 1145 "i",
jpayne@7 1146 "l",
jpayne@7 1147 "d",
jpayne@7 1148 "o",
jpayne@7 1149 "m",
jpayne@7 1150 "k",
jpayne@7 1151 "g",
jpayne@7 1152 "v",
jpayne@7 1153 "h",
jpayne@7 1154 "f",
jpayne@7 1155 "u",
jpayne@7 1156 "p",
jpayne@7 1157 "ä",
jpayne@7 1158 "c",
jpayne@7 1159 "b",
jpayne@7 1160 "ö",
jpayne@7 1161 "å",
jpayne@7 1162 "y",
jpayne@7 1163 "j",
jpayne@7 1164 "x",
jpayne@7 1165 ],
jpayne@7 1166 "Chinese": [
jpayne@7 1167 "的",
jpayne@7 1168 "一",
jpayne@7 1169 "是",
jpayne@7 1170 "不",
jpayne@7 1171 "了",
jpayne@7 1172 "在",
jpayne@7 1173 "人",
jpayne@7 1174 "有",
jpayne@7 1175 "我",
jpayne@7 1176 "他",
jpayne@7 1177 "这",
jpayne@7 1178 "个",
jpayne@7 1179 "们",
jpayne@7 1180 "中",
jpayne@7 1181 "来",
jpayne@7 1182 "上",
jpayne@7 1183 "大",
jpayne@7 1184 "为",
jpayne@7 1185 "和",
jpayne@7 1186 "国",
jpayne@7 1187 "地",
jpayne@7 1188 "到",
jpayne@7 1189 "以",
jpayne@7 1190 "说",
jpayne@7 1191 "时",
jpayne@7 1192 "要",
jpayne@7 1193 "就",
jpayne@7 1194 "出",
jpayne@7 1195 "会",
jpayne@7 1196 "可",
jpayne@7 1197 "也",
jpayne@7 1198 "你",
jpayne@7 1199 "对",
jpayne@7 1200 "生",
jpayne@7 1201 "能",
jpayne@7 1202 "而",
jpayne@7 1203 "子",
jpayne@7 1204 "那",
jpayne@7 1205 "得",
jpayne@7 1206 "于",
jpayne@7 1207 "着",
jpayne@7 1208 "下",
jpayne@7 1209 "自",
jpayne@7 1210 "之",
jpayne@7 1211 "年",
jpayne@7 1212 "过",
jpayne@7 1213 "发",
jpayne@7 1214 "后",
jpayne@7 1215 "作",
jpayne@7 1216 "里",
jpayne@7 1217 "用",
jpayne@7 1218 "道",
jpayne@7 1219 "行",
jpayne@7 1220 "所",
jpayne@7 1221 "然",
jpayne@7 1222 "家",
jpayne@7 1223 "种",
jpayne@7 1224 "事",
jpayne@7 1225 "成",
jpayne@7 1226 "方",
jpayne@7 1227 "多",
jpayne@7 1228 "经",
jpayne@7 1229 "么",
jpayne@7 1230 "去",
jpayne@7 1231 "法",
jpayne@7 1232 "学",
jpayne@7 1233 "如",
jpayne@7 1234 "都",
jpayne@7 1235 "同",
jpayne@7 1236 "现",
jpayne@7 1237 "当",
jpayne@7 1238 "没",
jpayne@7 1239 "动",
jpayne@7 1240 "面",
jpayne@7 1241 "起",
jpayne@7 1242 "看",
jpayne@7 1243 "定",
jpayne@7 1244 "天",
jpayne@7 1245 "分",
jpayne@7 1246 "还",
jpayne@7 1247 "进",
jpayne@7 1248 "好",
jpayne@7 1249 "小",
jpayne@7 1250 "部",
jpayne@7 1251 "其",
jpayne@7 1252 "些",
jpayne@7 1253 "主",
jpayne@7 1254 "样",
jpayne@7 1255 "理",
jpayne@7 1256 "心",
jpayne@7 1257 "她",
jpayne@7 1258 "本",
jpayne@7 1259 "前",
jpayne@7 1260 "开",
jpayne@7 1261 "但",
jpayne@7 1262 "因",
jpayne@7 1263 "只",
jpayne@7 1264 "从",
jpayne@7 1265 "想",
jpayne@7 1266 "实",
jpayne@7 1267 ],
jpayne@7 1268 "Ukrainian": [
jpayne@7 1269 "о",
jpayne@7 1270 "а",
jpayne@7 1271 "н",
jpayne@7 1272 "і",
jpayne@7 1273 "и",
jpayne@7 1274 "р",
jpayne@7 1275 "в",
jpayne@7 1276 "т",
jpayne@7 1277 "е",
jpayne@7 1278 "с",
jpayne@7 1279 "к",
jpayne@7 1280 "л",
jpayne@7 1281 "у",
jpayne@7 1282 "д",
jpayne@7 1283 "м",
jpayne@7 1284 "п",
jpayne@7 1285 "з",
jpayne@7 1286 "я",
jpayne@7 1287 "ь",
jpayne@7 1288 "б",
jpayne@7 1289 "г",
jpayne@7 1290 "й",
jpayne@7 1291 "ч",
jpayne@7 1292 "х",
jpayne@7 1293 "ц",
jpayne@7 1294 "ї",
jpayne@7 1295 ],
jpayne@7 1296 "Norwegian": [
jpayne@7 1297 "e",
jpayne@7 1298 "r",
jpayne@7 1299 "n",
jpayne@7 1300 "t",
jpayne@7 1301 "a",
jpayne@7 1302 "s",
jpayne@7 1303 "i",
jpayne@7 1304 "o",
jpayne@7 1305 "l",
jpayne@7 1306 "d",
jpayne@7 1307 "g",
jpayne@7 1308 "k",
jpayne@7 1309 "m",
jpayne@7 1310 "v",
jpayne@7 1311 "f",
jpayne@7 1312 "p",
jpayne@7 1313 "u",
jpayne@7 1314 "b",
jpayne@7 1315 "h",
jpayne@7 1316 "å",
jpayne@7 1317 "y",
jpayne@7 1318 "j",
jpayne@7 1319 "ø",
jpayne@7 1320 "c",
jpayne@7 1321 "æ",
jpayne@7 1322 "w",
jpayne@7 1323 ],
jpayne@7 1324 "Finnish": [
jpayne@7 1325 "a",
jpayne@7 1326 "i",
jpayne@7 1327 "n",
jpayne@7 1328 "t",
jpayne@7 1329 "e",
jpayne@7 1330 "s",
jpayne@7 1331 "l",
jpayne@7 1332 "o",
jpayne@7 1333 "u",
jpayne@7 1334 "k",
jpayne@7 1335 "ä",
jpayne@7 1336 "m",
jpayne@7 1337 "r",
jpayne@7 1338 "v",
jpayne@7 1339 "j",
jpayne@7 1340 "h",
jpayne@7 1341 "p",
jpayne@7 1342 "y",
jpayne@7 1343 "d",
jpayne@7 1344 "ö",
jpayne@7 1345 "g",
jpayne@7 1346 "c",
jpayne@7 1347 "b",
jpayne@7 1348 "f",
jpayne@7 1349 "w",
jpayne@7 1350 "z",
jpayne@7 1351 ],
jpayne@7 1352 "Vietnamese": [
jpayne@7 1353 "n",
jpayne@7 1354 "h",
jpayne@7 1355 "t",
jpayne@7 1356 "i",
jpayne@7 1357 "c",
jpayne@7 1358 "g",
jpayne@7 1359 "a",
jpayne@7 1360 "o",
jpayne@7 1361 "u",
jpayne@7 1362 "m",
jpayne@7 1363 "l",
jpayne@7 1364 "r",
jpayne@7 1365 "à",
jpayne@7 1366 "đ",
jpayne@7 1367 "s",
jpayne@7 1368 "e",
jpayne@7 1369 "v",
jpayne@7 1370 "p",
jpayne@7 1371 "b",
jpayne@7 1372 "y",
jpayne@7 1373 "ư",
jpayne@7 1374 "d",
jpayne@7 1375 "á",
jpayne@7 1376 "k",
jpayne@7 1377 "ộ",
jpayne@7 1378 "ế",
jpayne@7 1379 ],
jpayne@7 1380 "Czech": [
jpayne@7 1381 "o",
jpayne@7 1382 "e",
jpayne@7 1383 "a",
jpayne@7 1384 "n",
jpayne@7 1385 "t",
jpayne@7 1386 "s",
jpayne@7 1387 "i",
jpayne@7 1388 "l",
jpayne@7 1389 "v",
jpayne@7 1390 "r",
jpayne@7 1391 "k",
jpayne@7 1392 "d",
jpayne@7 1393 "u",
jpayne@7 1394 "m",
jpayne@7 1395 "p",
jpayne@7 1396 "í",
jpayne@7 1397 "c",
jpayne@7 1398 "h",
jpayne@7 1399 "z",
jpayne@7 1400 "á",
jpayne@7 1401 "y",
jpayne@7 1402 "j",
jpayne@7 1403 "b",
jpayne@7 1404 "ě",
jpayne@7 1405 "é",
jpayne@7 1406 "ř",
jpayne@7 1407 ],
jpayne@7 1408 "Hungarian": [
jpayne@7 1409 "e",
jpayne@7 1410 "a",
jpayne@7 1411 "t",
jpayne@7 1412 "l",
jpayne@7 1413 "s",
jpayne@7 1414 "n",
jpayne@7 1415 "k",
jpayne@7 1416 "r",
jpayne@7 1417 "i",
jpayne@7 1418 "o",
jpayne@7 1419 "z",
jpayne@7 1420 "á",
jpayne@7 1421 "é",
jpayne@7 1422 "g",
jpayne@7 1423 "m",
jpayne@7 1424 "b",
jpayne@7 1425 "y",
jpayne@7 1426 "v",
jpayne@7 1427 "d",
jpayne@7 1428 "h",
jpayne@7 1429 "u",
jpayne@7 1430 "p",
jpayne@7 1431 "j",
jpayne@7 1432 "ö",
jpayne@7 1433 "f",
jpayne@7 1434 "c",
jpayne@7 1435 ],
jpayne@7 1436 "Korean": [
jpayne@7 1437 "이",
jpayne@7 1438 "다",
jpayne@7 1439 "에",
jpayne@7 1440 "의",
jpayne@7 1441 "는",
jpayne@7 1442 "로",
jpayne@7 1443 "하",
jpayne@7 1444 "을",
jpayne@7 1445 "가",
jpayne@7 1446 "고",
jpayne@7 1447 "지",
jpayne@7 1448 "서",
jpayne@7 1449 "한",
jpayne@7 1450 "은",
jpayne@7 1451 "기",
jpayne@7 1452 "으",
jpayne@7 1453 "년",
jpayne@7 1454 "대",
jpayne@7 1455 "사",
jpayne@7 1456 "시",
jpayne@7 1457 "를",
jpayne@7 1458 "리",
jpayne@7 1459 "도",
jpayne@7 1460 "인",
jpayne@7 1461 "스",
jpayne@7 1462 "일",
jpayne@7 1463 ],
jpayne@7 1464 "Indonesian": [
jpayne@7 1465 "a",
jpayne@7 1466 "n",
jpayne@7 1467 "e",
jpayne@7 1468 "i",
jpayne@7 1469 "r",
jpayne@7 1470 "t",
jpayne@7 1471 "u",
jpayne@7 1472 "s",
jpayne@7 1473 "d",
jpayne@7 1474 "k",
jpayne@7 1475 "m",
jpayne@7 1476 "l",
jpayne@7 1477 "g",
jpayne@7 1478 "p",
jpayne@7 1479 "b",
jpayne@7 1480 "o",
jpayne@7 1481 "h",
jpayne@7 1482 "y",
jpayne@7 1483 "j",
jpayne@7 1484 "c",
jpayne@7 1485 "w",
jpayne@7 1486 "f",
jpayne@7 1487 "v",
jpayne@7 1488 "z",
jpayne@7 1489 "x",
jpayne@7 1490 "q",
jpayne@7 1491 ],
jpayne@7 1492 "Turkish": [
jpayne@7 1493 "a",
jpayne@7 1494 "e",
jpayne@7 1495 "i",
jpayne@7 1496 "n",
jpayne@7 1497 "r",
jpayne@7 1498 "l",
jpayne@7 1499 "ı",
jpayne@7 1500 "k",
jpayne@7 1501 "d",
jpayne@7 1502 "t",
jpayne@7 1503 "s",
jpayne@7 1504 "m",
jpayne@7 1505 "y",
jpayne@7 1506 "u",
jpayne@7 1507 "o",
jpayne@7 1508 "b",
jpayne@7 1509 "ü",
jpayne@7 1510 "ş",
jpayne@7 1511 "v",
jpayne@7 1512 "g",
jpayne@7 1513 "z",
jpayne@7 1514 "h",
jpayne@7 1515 "c",
jpayne@7 1516 "p",
jpayne@7 1517 "ç",
jpayne@7 1518 "ğ",
jpayne@7 1519 ],
jpayne@7 1520 "Romanian": [
jpayne@7 1521 "e",
jpayne@7 1522 "i",
jpayne@7 1523 "a",
jpayne@7 1524 "r",
jpayne@7 1525 "n",
jpayne@7 1526 "t",
jpayne@7 1527 "u",
jpayne@7 1528 "l",
jpayne@7 1529 "o",
jpayne@7 1530 "c",
jpayne@7 1531 "s",
jpayne@7 1532 "d",
jpayne@7 1533 "p",
jpayne@7 1534 "m",
jpayne@7 1535 "ă",
jpayne@7 1536 "f",
jpayne@7 1537 "v",
jpayne@7 1538 "î",
jpayne@7 1539 "g",
jpayne@7 1540 "b",
jpayne@7 1541 "ș",
jpayne@7 1542 "ț",
jpayne@7 1543 "z",
jpayne@7 1544 "h",
jpayne@7 1545 "â",
jpayne@7 1546 "j",
jpayne@7 1547 ],
jpayne@7 1548 "Farsi": [
jpayne@7 1549 "ا",
jpayne@7 1550 "ی",
jpayne@7 1551 "ر",
jpayne@7 1552 "د",
jpayne@7 1553 "ن",
jpayne@7 1554 "ه",
jpayne@7 1555 "و",
jpayne@7 1556 "م",
jpayne@7 1557 "ت",
jpayne@7 1558 "ب",
jpayne@7 1559 "س",
jpayne@7 1560 "ل",
jpayne@7 1561 "ک",
jpayne@7 1562 "ش",
jpayne@7 1563 "ز",
jpayne@7 1564 "ف",
jpayne@7 1565 "گ",
jpayne@7 1566 "ع",
jpayne@7 1567 "خ",
jpayne@7 1568 "ق",
jpayne@7 1569 "ج",
jpayne@7 1570 "آ",
jpayne@7 1571 "پ",
jpayne@7 1572 "ح",
jpayne@7 1573 "ط",
jpayne@7 1574 "ص",
jpayne@7 1575 ],
jpayne@7 1576 "Arabic": [
jpayne@7 1577 "ا",
jpayne@7 1578 "ل",
jpayne@7 1579 "ي",
jpayne@7 1580 "م",
jpayne@7 1581 "و",
jpayne@7 1582 "ن",
jpayne@7 1583 "ر",
jpayne@7 1584 "ت",
jpayne@7 1585 "ب",
jpayne@7 1586 "ة",
jpayne@7 1587 "ع",
jpayne@7 1588 "د",
jpayne@7 1589 "س",
jpayne@7 1590 "ف",
jpayne@7 1591 "ه",
jpayne@7 1592 "ك",
jpayne@7 1593 "ق",
jpayne@7 1594 "أ",
jpayne@7 1595 "ح",
jpayne@7 1596 "ج",
jpayne@7 1597 "ش",
jpayne@7 1598 "ط",
jpayne@7 1599 "ص",
jpayne@7 1600 "ى",
jpayne@7 1601 "خ",
jpayne@7 1602 "إ",
jpayne@7 1603 ],
jpayne@7 1604 "Danish": [
jpayne@7 1605 "e",
jpayne@7 1606 "r",
jpayne@7 1607 "n",
jpayne@7 1608 "t",
jpayne@7 1609 "a",
jpayne@7 1610 "i",
jpayne@7 1611 "s",
jpayne@7 1612 "d",
jpayne@7 1613 "l",
jpayne@7 1614 "o",
jpayne@7 1615 "g",
jpayne@7 1616 "m",
jpayne@7 1617 "k",
jpayne@7 1618 "f",
jpayne@7 1619 "v",
jpayne@7 1620 "u",
jpayne@7 1621 "b",
jpayne@7 1622 "h",
jpayne@7 1623 "p",
jpayne@7 1624 "å",
jpayne@7 1625 "y",
jpayne@7 1626 "ø",
jpayne@7 1627 "æ",
jpayne@7 1628 "c",
jpayne@7 1629 "j",
jpayne@7 1630 "w",
jpayne@7 1631 ],
jpayne@7 1632 "Serbian": [
jpayne@7 1633 "а",
jpayne@7 1634 "и",
jpayne@7 1635 "о",
jpayne@7 1636 "е",
jpayne@7 1637 "н",
jpayne@7 1638 "р",
jpayne@7 1639 "с",
jpayne@7 1640 "у",
jpayne@7 1641 "т",
jpayne@7 1642 "к",
jpayne@7 1643 "ј",
jpayne@7 1644 "в",
jpayne@7 1645 "д",
jpayne@7 1646 "м",
jpayne@7 1647 "п",
jpayne@7 1648 "л",
jpayne@7 1649 "г",
jpayne@7 1650 "з",
jpayne@7 1651 "б",
jpayne@7 1652 "a",
jpayne@7 1653 "i",
jpayne@7 1654 "e",
jpayne@7 1655 "o",
jpayne@7 1656 "n",
jpayne@7 1657 "ц",
jpayne@7 1658 "ш",
jpayne@7 1659 ],
jpayne@7 1660 "Lithuanian": [
jpayne@7 1661 "i",
jpayne@7 1662 "a",
jpayne@7 1663 "s",
jpayne@7 1664 "o",
jpayne@7 1665 "r",
jpayne@7 1666 "e",
jpayne@7 1667 "t",
jpayne@7 1668 "n",
jpayne@7 1669 "u",
jpayne@7 1670 "k",
jpayne@7 1671 "m",
jpayne@7 1672 "l",
jpayne@7 1673 "p",
jpayne@7 1674 "v",
jpayne@7 1675 "d",
jpayne@7 1676 "j",
jpayne@7 1677 "g",
jpayne@7 1678 "ė",
jpayne@7 1679 "b",
jpayne@7 1680 "y",
jpayne@7 1681 "ų",
jpayne@7 1682 "š",
jpayne@7 1683 "ž",
jpayne@7 1684 "c",
jpayne@7 1685 "ą",
jpayne@7 1686 "į",
jpayne@7 1687 ],
jpayne@7 1688 "Slovene": [
jpayne@7 1689 "e",
jpayne@7 1690 "a",
jpayne@7 1691 "i",
jpayne@7 1692 "o",
jpayne@7 1693 "n",
jpayne@7 1694 "r",
jpayne@7 1695 "s",
jpayne@7 1696 "l",
jpayne@7 1697 "t",
jpayne@7 1698 "j",
jpayne@7 1699 "v",
jpayne@7 1700 "k",
jpayne@7 1701 "d",
jpayne@7 1702 "p",
jpayne@7 1703 "m",
jpayne@7 1704 "u",
jpayne@7 1705 "z",
jpayne@7 1706 "b",
jpayne@7 1707 "g",
jpayne@7 1708 "h",
jpayne@7 1709 "č",
jpayne@7 1710 "c",
jpayne@7 1711 "š",
jpayne@7 1712 "ž",
jpayne@7 1713 "f",
jpayne@7 1714 "y",
jpayne@7 1715 ],
jpayne@7 1716 "Slovak": [
jpayne@7 1717 "o",
jpayne@7 1718 "a",
jpayne@7 1719 "e",
jpayne@7 1720 "n",
jpayne@7 1721 "i",
jpayne@7 1722 "r",
jpayne@7 1723 "v",
jpayne@7 1724 "t",
jpayne@7 1725 "s",
jpayne@7 1726 "l",
jpayne@7 1727 "k",
jpayne@7 1728 "d",
jpayne@7 1729 "m",
jpayne@7 1730 "p",
jpayne@7 1731 "u",
jpayne@7 1732 "c",
jpayne@7 1733 "h",
jpayne@7 1734 "j",
jpayne@7 1735 "b",
jpayne@7 1736 "z",
jpayne@7 1737 "á",
jpayne@7 1738 "y",
jpayne@7 1739 "ý",
jpayne@7 1740 "í",
jpayne@7 1741 "č",
jpayne@7 1742 "é",
jpayne@7 1743 ],
jpayne@7 1744 "Hebrew": [
jpayne@7 1745 "י",
jpayne@7 1746 "ו",
jpayne@7 1747 "ה",
jpayne@7 1748 "ל",
jpayne@7 1749 "ר",
jpayne@7 1750 "ב",
jpayne@7 1751 "ת",
jpayne@7 1752 "מ",
jpayne@7 1753 "א",
jpayne@7 1754 "ש",
jpayne@7 1755 "נ",
jpayne@7 1756 "ע",
jpayne@7 1757 "ם",
jpayne@7 1758 "ד",
jpayne@7 1759 "ק",
jpayne@7 1760 "ח",
jpayne@7 1761 "פ",
jpayne@7 1762 "ס",
jpayne@7 1763 "כ",
jpayne@7 1764 "ג",
jpayne@7 1765 "ט",
jpayne@7 1766 "צ",
jpayne@7 1767 "ן",
jpayne@7 1768 "ז",
jpayne@7 1769 "ך",
jpayne@7 1770 ],
jpayne@7 1771 "Bulgarian": [
jpayne@7 1772 "а",
jpayne@7 1773 "и",
jpayne@7 1774 "о",
jpayne@7 1775 "е",
jpayne@7 1776 "н",
jpayne@7 1777 "т",
jpayne@7 1778 "р",
jpayne@7 1779 "с",
jpayne@7 1780 "в",
jpayne@7 1781 "л",
jpayne@7 1782 "к",
jpayne@7 1783 "д",
jpayne@7 1784 "п",
jpayne@7 1785 "м",
jpayne@7 1786 "з",
jpayne@7 1787 "г",
jpayne@7 1788 "я",
jpayne@7 1789 "ъ",
jpayne@7 1790 "у",
jpayne@7 1791 "б",
jpayne@7 1792 "ч",
jpayne@7 1793 "ц",
jpayne@7 1794 "й",
jpayne@7 1795 "ж",
jpayne@7 1796 "щ",
jpayne@7 1797 "х",
jpayne@7 1798 ],
jpayne@7 1799 "Croatian": [
jpayne@7 1800 "a",
jpayne@7 1801 "i",
jpayne@7 1802 "o",
jpayne@7 1803 "e",
jpayne@7 1804 "n",
jpayne@7 1805 "r",
jpayne@7 1806 "j",
jpayne@7 1807 "s",
jpayne@7 1808 "t",
jpayne@7 1809 "u",
jpayne@7 1810 "k",
jpayne@7 1811 "l",
jpayne@7 1812 "v",
jpayne@7 1813 "d",
jpayne@7 1814 "m",
jpayne@7 1815 "p",
jpayne@7 1816 "g",
jpayne@7 1817 "z",
jpayne@7 1818 "b",
jpayne@7 1819 "c",
jpayne@7 1820 "č",
jpayne@7 1821 "h",
jpayne@7 1822 "š",
jpayne@7 1823 "ž",
jpayne@7 1824 "ć",
jpayne@7 1825 "f",
jpayne@7 1826 ],
jpayne@7 1827 "Hindi": [
jpayne@7 1828 "क",
jpayne@7 1829 "र",
jpayne@7 1830 "स",
jpayne@7 1831 "न",
jpayne@7 1832 "त",
jpayne@7 1833 "म",
jpayne@7 1834 "ह",
jpayne@7 1835 "प",
jpayne@7 1836 "य",
jpayne@7 1837 "ल",
jpayne@7 1838 "व",
jpayne@7 1839 "ज",
jpayne@7 1840 "द",
jpayne@7 1841 "ग",
jpayne@7 1842 "ब",
jpayne@7 1843 "श",
jpayne@7 1844 "ट",
jpayne@7 1845 "अ",
jpayne@7 1846 "ए",
jpayne@7 1847 "थ",
jpayne@7 1848 "भ",
jpayne@7 1849 "ड",
jpayne@7 1850 "च",
jpayne@7 1851 "ध",
jpayne@7 1852 "ष",
jpayne@7 1853 "इ",
jpayne@7 1854 ],
jpayne@7 1855 "Estonian": [
jpayne@7 1856 "a",
jpayne@7 1857 "i",
jpayne@7 1858 "e",
jpayne@7 1859 "s",
jpayne@7 1860 "t",
jpayne@7 1861 "l",
jpayne@7 1862 "u",
jpayne@7 1863 "n",
jpayne@7 1864 "o",
jpayne@7 1865 "k",
jpayne@7 1866 "r",
jpayne@7 1867 "d",
jpayne@7 1868 "m",
jpayne@7 1869 "v",
jpayne@7 1870 "g",
jpayne@7 1871 "p",
jpayne@7 1872 "j",
jpayne@7 1873 "h",
jpayne@7 1874 "ä",
jpayne@7 1875 "b",
jpayne@7 1876 "õ",
jpayne@7 1877 "ü",
jpayne@7 1878 "f",
jpayne@7 1879 "c",
jpayne@7 1880 "ö",
jpayne@7 1881 "y",
jpayne@7 1882 ],
jpayne@7 1883 "Thai": [
jpayne@7 1884 "า",
jpayne@7 1885 "น",
jpayne@7 1886 "ร",
jpayne@7 1887 "อ",
jpayne@7 1888 "ก",
jpayne@7 1889 "เ",
jpayne@7 1890 "ง",
jpayne@7 1891 "ม",
jpayne@7 1892 "ย",
jpayne@7 1893 "ล",
jpayne@7 1894 "ว",
jpayne@7 1895 "ด",
jpayne@7 1896 "ท",
jpayne@7 1897 "ส",
jpayne@7 1898 "ต",
jpayne@7 1899 "ะ",
jpayne@7 1900 "ป",
jpayne@7 1901 "บ",
jpayne@7 1902 "ค",
jpayne@7 1903 "ห",
jpayne@7 1904 "แ",
jpayne@7 1905 "จ",
jpayne@7 1906 "พ",
jpayne@7 1907 "ช",
jpayne@7 1908 "ข",
jpayne@7 1909 "ใ",
jpayne@7 1910 ],
jpayne@7 1911 "Greek": [
jpayne@7 1912 "α",
jpayne@7 1913 "τ",
jpayne@7 1914 "ο",
jpayne@7 1915 "ι",
jpayne@7 1916 "ε",
jpayne@7 1917 "ν",
jpayne@7 1918 "ρ",
jpayne@7 1919 "σ",
jpayne@7 1920 "κ",
jpayne@7 1921 "η",
jpayne@7 1922 "π",
jpayne@7 1923 "ς",
jpayne@7 1924 "υ",
jpayne@7 1925 "μ",
jpayne@7 1926 "λ",
jpayne@7 1927 "ί",
jpayne@7 1928 "ό",
jpayne@7 1929 "ά",
jpayne@7 1930 "γ",
jpayne@7 1931 "έ",
jpayne@7 1932 "δ",
jpayne@7 1933 "ή",
jpayne@7 1934 "ω",
jpayne@7 1935 "χ",
jpayne@7 1936 "θ",
jpayne@7 1937 "ύ",
jpayne@7 1938 ],
jpayne@7 1939 "Tamil": [
jpayne@7 1940 "க",
jpayne@7 1941 "த",
jpayne@7 1942 "ப",
jpayne@7 1943 "ட",
jpayne@7 1944 "ர",
jpayne@7 1945 "ம",
jpayne@7 1946 "ல",
jpayne@7 1947 "ன",
jpayne@7 1948 "வ",
jpayne@7 1949 "ற",
jpayne@7 1950 "ய",
jpayne@7 1951 "ள",
jpayne@7 1952 "ச",
jpayne@7 1953 "ந",
jpayne@7 1954 "இ",
jpayne@7 1955 "ண",
jpayne@7 1956 "அ",
jpayne@7 1957 "ஆ",
jpayne@7 1958 "ழ",
jpayne@7 1959 "ங",
jpayne@7 1960 "எ",
jpayne@7 1961 "உ",
jpayne@7 1962 "ஒ",
jpayne@7 1963 "ஸ",
jpayne@7 1964 ],
jpayne@7 1965 "Kazakh": [
jpayne@7 1966 "а",
jpayne@7 1967 "ы",
jpayne@7 1968 "е",
jpayne@7 1969 "н",
jpayne@7 1970 "т",
jpayne@7 1971 "р",
jpayne@7 1972 "л",
jpayne@7 1973 "і",
jpayne@7 1974 "д",
jpayne@7 1975 "с",
jpayne@7 1976 "м",
jpayne@7 1977 "қ",
jpayne@7 1978 "к",
jpayne@7 1979 "о",
jpayne@7 1980 "б",
jpayne@7 1981 "и",
jpayne@7 1982 "у",
jpayne@7 1983 "ғ",
jpayne@7 1984 "ж",
jpayne@7 1985 "ң",
jpayne@7 1986 "з",
jpayne@7 1987 "ш",
jpayne@7 1988 "й",
jpayne@7 1989 "п",
jpayne@7 1990 "г",
jpayne@7 1991 "ө",
jpayne@7 1992 ],
jpayne@7 1993 }
jpayne@7 1994
jpayne@7 1995 LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)