jpayne@68: """Implementation of JSONEncoder jpayne@68: """ jpayne@68: import re jpayne@68: jpayne@68: try: jpayne@68: from _json import encode_basestring_ascii as c_encode_basestring_ascii jpayne@68: except ImportError: jpayne@68: c_encode_basestring_ascii = None jpayne@68: try: jpayne@68: from _json import encode_basestring as c_encode_basestring jpayne@68: except ImportError: jpayne@68: c_encode_basestring = None jpayne@68: try: jpayne@68: from _json import make_encoder as c_make_encoder jpayne@68: except ImportError: jpayne@68: c_make_encoder = None jpayne@68: jpayne@68: ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') jpayne@68: ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') jpayne@68: HAS_UTF8 = re.compile(b'[\x80-\xff]') jpayne@68: ESCAPE_DCT = { jpayne@68: '\\': '\\\\', jpayne@68: '"': '\\"', jpayne@68: '\b': '\\b', jpayne@68: '\f': '\\f', jpayne@68: '\n': '\\n', jpayne@68: '\r': '\\r', jpayne@68: '\t': '\\t', jpayne@68: } jpayne@68: for i in range(0x20): jpayne@68: ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) jpayne@68: #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) jpayne@68: jpayne@68: INFINITY = float('inf') jpayne@68: jpayne@68: def py_encode_basestring(s): jpayne@68: """Return a JSON representation of a Python string jpayne@68: jpayne@68: """ jpayne@68: def replace(match): jpayne@68: return ESCAPE_DCT[match.group(0)] jpayne@68: return '"' + ESCAPE.sub(replace, s) + '"' jpayne@68: jpayne@68: jpayne@68: encode_basestring = (c_encode_basestring or py_encode_basestring) jpayne@68: jpayne@68: jpayne@68: def py_encode_basestring_ascii(s): jpayne@68: """Return an ASCII-only JSON representation of a Python string jpayne@68: jpayne@68: """ jpayne@68: def replace(match): jpayne@68: s = match.group(0) jpayne@68: try: jpayne@68: return ESCAPE_DCT[s] jpayne@68: except KeyError: jpayne@68: n = ord(s) jpayne@68: if n < 0x10000: jpayne@68: return '\\u{0:04x}'.format(n) jpayne@68: #return '\\u%04x' % (n,) jpayne@68: else: jpayne@68: # surrogate pair jpayne@68: n -= 0x10000 jpayne@68: s1 = 0xd800 | ((n >> 10) & 0x3ff) jpayne@68: s2 = 0xdc00 | (n & 0x3ff) jpayne@68: return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) jpayne@68: return '"' + ESCAPE_ASCII.sub(replace, s) + '"' jpayne@68: jpayne@68: jpayne@68: encode_basestring_ascii = ( jpayne@68: c_encode_basestring_ascii or py_encode_basestring_ascii) jpayne@68: jpayne@68: class JSONEncoder(object): jpayne@68: """Extensible JSON encoder for Python data structures. jpayne@68: jpayne@68: Supports the following objects and types by default: jpayne@68: jpayne@68: +-------------------+---------------+ jpayne@68: | Python | JSON | jpayne@68: +===================+===============+ jpayne@68: | dict | object | jpayne@68: +-------------------+---------------+ jpayne@68: | list, tuple | array | jpayne@68: +-------------------+---------------+ jpayne@68: | str | string | jpayne@68: +-------------------+---------------+ jpayne@68: | int, float | number | jpayne@68: +-------------------+---------------+ jpayne@68: | True | true | jpayne@68: +-------------------+---------------+ jpayne@68: | False | false | jpayne@68: +-------------------+---------------+ jpayne@68: | None | null | jpayne@68: +-------------------+---------------+ jpayne@68: jpayne@68: To extend this to recognize other objects, subclass and implement a jpayne@68: ``.default()`` method with another method that returns a serializable jpayne@68: object for ``o`` if possible, otherwise it should call the superclass jpayne@68: implementation (to raise ``TypeError``). jpayne@68: jpayne@68: """ jpayne@68: item_separator = ', ' jpayne@68: key_separator = ': ' jpayne@68: def __init__(self, *, skipkeys=False, ensure_ascii=True, jpayne@68: check_circular=True, allow_nan=True, sort_keys=False, jpayne@68: indent=None, separators=None, default=None): jpayne@68: """Constructor for JSONEncoder, with sensible defaults. jpayne@68: jpayne@68: If skipkeys is false, then it is a TypeError to attempt jpayne@68: encoding of keys that are not str, int, float or None. If jpayne@68: skipkeys is True, such items are simply skipped. jpayne@68: jpayne@68: If ensure_ascii is true, the output is guaranteed to be str jpayne@68: objects with all incoming non-ASCII characters escaped. If jpayne@68: ensure_ascii is false, the output can contain non-ASCII characters. jpayne@68: jpayne@68: If check_circular is true, then lists, dicts, and custom encoded jpayne@68: objects will be checked for circular references during encoding to jpayne@68: prevent an infinite recursion (which would cause an OverflowError). jpayne@68: Otherwise, no such check takes place. jpayne@68: jpayne@68: If allow_nan is true, then NaN, Infinity, and -Infinity will be jpayne@68: encoded as such. This behavior is not JSON specification compliant, jpayne@68: but is consistent with most JavaScript based encoders and decoders. jpayne@68: Otherwise, it will be a ValueError to encode such floats. jpayne@68: jpayne@68: If sort_keys is true, then the output of dictionaries will be jpayne@68: sorted by key; this is useful for regression tests to ensure jpayne@68: that JSON serializations can be compared on a day-to-day basis. jpayne@68: jpayne@68: If indent is a non-negative integer, then JSON array jpayne@68: elements and object members will be pretty-printed with that jpayne@68: indent level. An indent level of 0 will only insert newlines. jpayne@68: None is the most compact representation. jpayne@68: jpayne@68: If specified, separators should be an (item_separator, key_separator) jpayne@68: tuple. The default is (', ', ': ') if *indent* is ``None`` and jpayne@68: (',', ': ') otherwise. To get the most compact JSON representation, jpayne@68: you should specify (',', ':') to eliminate whitespace. jpayne@68: jpayne@68: If specified, default is a function that gets called for objects jpayne@68: that can't otherwise be serialized. It should return a JSON encodable jpayne@68: version of the object or raise a ``TypeError``. jpayne@68: jpayne@68: """ jpayne@68: jpayne@68: self.skipkeys = skipkeys jpayne@68: self.ensure_ascii = ensure_ascii jpayne@68: self.check_circular = check_circular jpayne@68: self.allow_nan = allow_nan jpayne@68: self.sort_keys = sort_keys jpayne@68: self.indent = indent jpayne@68: if separators is not None: jpayne@68: self.item_separator, self.key_separator = separators jpayne@68: elif indent is not None: jpayne@68: self.item_separator = ',' jpayne@68: if default is not None: jpayne@68: self.default = default jpayne@68: jpayne@68: def default(self, o): jpayne@68: """Implement this method in a subclass such that it returns jpayne@68: a serializable object for ``o``, or calls the base implementation jpayne@68: (to raise a ``TypeError``). jpayne@68: jpayne@68: For example, to support arbitrary iterators, you could jpayne@68: implement default like this:: jpayne@68: jpayne@68: def default(self, o): jpayne@68: try: jpayne@68: iterable = iter(o) jpayne@68: except TypeError: jpayne@68: pass jpayne@68: else: jpayne@68: return list(iterable) jpayne@68: # Let the base class default method raise the TypeError jpayne@68: return JSONEncoder.default(self, o) jpayne@68: jpayne@68: """ jpayne@68: raise TypeError(f'Object of type {o.__class__.__name__} ' jpayne@68: f'is not JSON serializable') jpayne@68: jpayne@68: def encode(self, o): jpayne@68: """Return a JSON string representation of a Python data structure. jpayne@68: jpayne@68: >>> from json.encoder import JSONEncoder jpayne@68: >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) jpayne@68: '{"foo": ["bar", "baz"]}' jpayne@68: jpayne@68: """ jpayne@68: # This is for extremely simple cases and benchmarks. jpayne@68: if isinstance(o, str): jpayne@68: if self.ensure_ascii: jpayne@68: return encode_basestring_ascii(o) jpayne@68: else: jpayne@68: return encode_basestring(o) jpayne@68: # This doesn't pass the iterator directly to ''.join() because the jpayne@68: # exceptions aren't as detailed. The list call should be roughly jpayne@68: # equivalent to the PySequence_Fast that ''.join() would do. jpayne@68: chunks = self.iterencode(o, _one_shot=True) jpayne@68: if not isinstance(chunks, (list, tuple)): jpayne@68: chunks = list(chunks) jpayne@68: return ''.join(chunks) jpayne@68: jpayne@68: def iterencode(self, o, _one_shot=False): jpayne@68: """Encode the given object and yield each string jpayne@68: representation as available. jpayne@68: jpayne@68: For example:: jpayne@68: jpayne@68: for chunk in JSONEncoder().iterencode(bigobject): jpayne@68: mysocket.write(chunk) jpayne@68: jpayne@68: """ jpayne@68: if self.check_circular: jpayne@68: markers = {} jpayne@68: else: jpayne@68: markers = None jpayne@68: if self.ensure_ascii: jpayne@68: _encoder = encode_basestring_ascii jpayne@68: else: jpayne@68: _encoder = encode_basestring jpayne@68: jpayne@68: def floatstr(o, allow_nan=self.allow_nan, jpayne@68: _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): jpayne@68: # Check for specials. Note that this type of test is processor jpayne@68: # and/or platform-specific, so do tests which don't depend on the jpayne@68: # internals. jpayne@68: jpayne@68: if o != o: jpayne@68: text = 'NaN' jpayne@68: elif o == _inf: jpayne@68: text = 'Infinity' jpayne@68: elif o == _neginf: jpayne@68: text = '-Infinity' jpayne@68: else: jpayne@68: return _repr(o) jpayne@68: jpayne@68: if not allow_nan: jpayne@68: raise ValueError( jpayne@68: "Out of range float values are not JSON compliant: " + jpayne@68: repr(o)) jpayne@68: jpayne@68: return text jpayne@68: jpayne@68: jpayne@68: if (_one_shot and c_make_encoder is not None jpayne@68: and self.indent is None): jpayne@68: _iterencode = c_make_encoder( jpayne@68: markers, self.default, _encoder, self.indent, jpayne@68: self.key_separator, self.item_separator, self.sort_keys, jpayne@68: self.skipkeys, self.allow_nan) jpayne@68: else: jpayne@68: _iterencode = _make_iterencode( jpayne@68: markers, self.default, _encoder, self.indent, floatstr, jpayne@68: self.key_separator, self.item_separator, self.sort_keys, jpayne@68: self.skipkeys, _one_shot) jpayne@68: return _iterencode(o, 0) jpayne@68: jpayne@68: def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, jpayne@68: _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, jpayne@68: ## HACK: hand-optimized bytecode; turn globals into locals jpayne@68: ValueError=ValueError, jpayne@68: dict=dict, jpayne@68: float=float, jpayne@68: id=id, jpayne@68: int=int, jpayne@68: isinstance=isinstance, jpayne@68: list=list, jpayne@68: str=str, jpayne@68: tuple=tuple, jpayne@68: _intstr=int.__repr__, jpayne@68: ): jpayne@68: jpayne@68: if _indent is not None and not isinstance(_indent, str): jpayne@68: _indent = ' ' * _indent jpayne@68: jpayne@68: def _iterencode_list(lst, _current_indent_level): jpayne@68: if not lst: jpayne@68: yield '[]' jpayne@68: return jpayne@68: if markers is not None: jpayne@68: markerid = id(lst) jpayne@68: if markerid in markers: jpayne@68: raise ValueError("Circular reference detected") jpayne@68: markers[markerid] = lst jpayne@68: buf = '[' jpayne@68: if _indent is not None: jpayne@68: _current_indent_level += 1 jpayne@68: newline_indent = '\n' + _indent * _current_indent_level jpayne@68: separator = _item_separator + newline_indent jpayne@68: buf += newline_indent jpayne@68: else: jpayne@68: newline_indent = None jpayne@68: separator = _item_separator jpayne@68: first = True jpayne@68: for value in lst: jpayne@68: if first: jpayne@68: first = False jpayne@68: else: jpayne@68: buf = separator jpayne@68: if isinstance(value, str): jpayne@68: yield buf + _encoder(value) jpayne@68: elif value is None: jpayne@68: yield buf + 'null' jpayne@68: elif value is True: jpayne@68: yield buf + 'true' jpayne@68: elif value is False: jpayne@68: yield buf + 'false' jpayne@68: elif isinstance(value, int): jpayne@68: # Subclasses of int/float may override __repr__, but we still jpayne@68: # want to encode them as integers/floats in JSON. One example jpayne@68: # within the standard library is IntEnum. jpayne@68: yield buf + _intstr(value) jpayne@68: elif isinstance(value, float): jpayne@68: # see comment above for int jpayne@68: yield buf + _floatstr(value) jpayne@68: else: jpayne@68: yield buf jpayne@68: if isinstance(value, (list, tuple)): jpayne@68: chunks = _iterencode_list(value, _current_indent_level) jpayne@68: elif isinstance(value, dict): jpayne@68: chunks = _iterencode_dict(value, _current_indent_level) jpayne@68: else: jpayne@68: chunks = _iterencode(value, _current_indent_level) jpayne@68: yield from chunks jpayne@68: if newline_indent is not None: jpayne@68: _current_indent_level -= 1 jpayne@68: yield '\n' + _indent * _current_indent_level jpayne@68: yield ']' jpayne@68: if markers is not None: jpayne@68: del markers[markerid] jpayne@68: jpayne@68: def _iterencode_dict(dct, _current_indent_level): jpayne@68: if not dct: jpayne@68: yield '{}' jpayne@68: return jpayne@68: if markers is not None: jpayne@68: markerid = id(dct) jpayne@68: if markerid in markers: jpayne@68: raise ValueError("Circular reference detected") jpayne@68: markers[markerid] = dct jpayne@68: yield '{' jpayne@68: if _indent is not None: jpayne@68: _current_indent_level += 1 jpayne@68: newline_indent = '\n' + _indent * _current_indent_level jpayne@68: item_separator = _item_separator + newline_indent jpayne@68: yield newline_indent jpayne@68: else: jpayne@68: newline_indent = None jpayne@68: item_separator = _item_separator jpayne@68: first = True jpayne@68: if _sort_keys: jpayne@68: items = sorted(dct.items()) jpayne@68: else: jpayne@68: items = dct.items() jpayne@68: for key, value in items: jpayne@68: if isinstance(key, str): jpayne@68: pass jpayne@68: # JavaScript is weakly typed for these, so it makes sense to jpayne@68: # also allow them. Many encoders seem to do something like this. jpayne@68: elif isinstance(key, float): jpayne@68: # see comment for int/float in _make_iterencode jpayne@68: key = _floatstr(key) jpayne@68: elif key is True: jpayne@68: key = 'true' jpayne@68: elif key is False: jpayne@68: key = 'false' jpayne@68: elif key is None: jpayne@68: key = 'null' jpayne@68: elif isinstance(key, int): jpayne@68: # see comment for int/float in _make_iterencode jpayne@68: key = _intstr(key) jpayne@68: elif _skipkeys: jpayne@68: continue jpayne@68: else: jpayne@68: raise TypeError(f'keys must be str, int, float, bool or None, ' jpayne@68: f'not {key.__class__.__name__}') jpayne@68: if first: jpayne@68: first = False jpayne@68: else: jpayne@68: yield item_separator jpayne@68: yield _encoder(key) jpayne@68: yield _key_separator jpayne@68: if isinstance(value, str): jpayne@68: yield _encoder(value) jpayne@68: elif value is None: jpayne@68: yield 'null' jpayne@68: elif value is True: jpayne@68: yield 'true' jpayne@68: elif value is False: jpayne@68: yield 'false' jpayne@68: elif isinstance(value, int): jpayne@68: # see comment for int/float in _make_iterencode jpayne@68: yield _intstr(value) jpayne@68: elif isinstance(value, float): jpayne@68: # see comment for int/float in _make_iterencode jpayne@68: yield _floatstr(value) jpayne@68: else: jpayne@68: if isinstance(value, (list, tuple)): jpayne@68: chunks = _iterencode_list(value, _current_indent_level) jpayne@68: elif isinstance(value, dict): jpayne@68: chunks = _iterencode_dict(value, _current_indent_level) jpayne@68: else: jpayne@68: chunks = _iterencode(value, _current_indent_level) jpayne@68: yield from chunks jpayne@68: if newline_indent is not None: jpayne@68: _current_indent_level -= 1 jpayne@68: yield '\n' + _indent * _current_indent_level jpayne@68: yield '}' jpayne@68: if markers is not None: jpayne@68: del markers[markerid] jpayne@68: jpayne@68: def _iterencode(o, _current_indent_level): jpayne@68: if isinstance(o, str): jpayne@68: yield _encoder(o) jpayne@68: elif o is None: jpayne@68: yield 'null' jpayne@68: elif o is True: jpayne@68: yield 'true' jpayne@68: elif o is False: jpayne@68: yield 'false' jpayne@68: elif isinstance(o, int): jpayne@68: # see comment for int/float in _make_iterencode jpayne@68: yield _intstr(o) jpayne@68: elif isinstance(o, float): jpayne@68: # see comment for int/float in _make_iterencode jpayne@68: yield _floatstr(o) jpayne@68: elif isinstance(o, (list, tuple)): jpayne@68: yield from _iterencode_list(o, _current_indent_level) jpayne@68: elif isinstance(o, dict): jpayne@68: yield from _iterencode_dict(o, _current_indent_level) jpayne@68: else: jpayne@68: if markers is not None: jpayne@68: markerid = id(o) jpayne@68: if markerid in markers: jpayne@68: raise ValueError("Circular reference detected") jpayne@68: markers[markerid] = o jpayne@68: o = _default(o) jpayne@68: yield from _iterencode(o, _current_indent_level) jpayne@68: if markers is not None: jpayne@68: del markers[markerid] jpayne@68: return _iterencode