jpayne@69: """Implementation of JSONEncoder jpayne@69: """ jpayne@69: import re jpayne@69: jpayne@69: try: jpayne@69: from _json import encode_basestring_ascii as c_encode_basestring_ascii jpayne@69: except ImportError: jpayne@69: c_encode_basestring_ascii = None jpayne@69: try: jpayne@69: from _json import encode_basestring as c_encode_basestring jpayne@69: except ImportError: jpayne@69: c_encode_basestring = None jpayne@69: try: jpayne@69: from _json import make_encoder as c_make_encoder jpayne@69: except ImportError: jpayne@69: c_make_encoder = None jpayne@69: jpayne@69: ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') jpayne@69: ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') jpayne@69: HAS_UTF8 = re.compile(b'[\x80-\xff]') jpayne@69: ESCAPE_DCT = { jpayne@69: '\\': '\\\\', jpayne@69: '"': '\\"', jpayne@69: '\b': '\\b', jpayne@69: '\f': '\\f', jpayne@69: '\n': '\\n', jpayne@69: '\r': '\\r', jpayne@69: '\t': '\\t', jpayne@69: } jpayne@69: for i in range(0x20): jpayne@69: ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) jpayne@69: #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) jpayne@69: jpayne@69: INFINITY = float('inf') jpayne@69: jpayne@69: def py_encode_basestring(s): jpayne@69: """Return a JSON representation of a Python string jpayne@69: jpayne@69: """ jpayne@69: def replace(match): jpayne@69: return ESCAPE_DCT[match.group(0)] jpayne@69: return '"' + ESCAPE.sub(replace, s) + '"' jpayne@69: jpayne@69: jpayne@69: encode_basestring = (c_encode_basestring or py_encode_basestring) jpayne@69: jpayne@69: jpayne@69: def py_encode_basestring_ascii(s): jpayne@69: """Return an ASCII-only JSON representation of a Python string jpayne@69: jpayne@69: """ jpayne@69: def replace(match): jpayne@69: s = match.group(0) jpayne@69: try: jpayne@69: return ESCAPE_DCT[s] jpayne@69: except KeyError: jpayne@69: n = ord(s) jpayne@69: if n < 0x10000: jpayne@69: return '\\u{0:04x}'.format(n) jpayne@69: #return '\\u%04x' % (n,) jpayne@69: else: jpayne@69: # surrogate pair jpayne@69: n -= 0x10000 jpayne@69: s1 = 0xd800 | ((n >> 10) & 0x3ff) jpayne@69: s2 = 0xdc00 | (n & 0x3ff) jpayne@69: return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) jpayne@69: return '"' + ESCAPE_ASCII.sub(replace, s) + '"' jpayne@69: jpayne@69: jpayne@69: encode_basestring_ascii = ( jpayne@69: c_encode_basestring_ascii or py_encode_basestring_ascii) jpayne@69: jpayne@69: class JSONEncoder(object): jpayne@69: """Extensible JSON encoder for Python data structures. jpayne@69: jpayne@69: Supports the following objects and types by default: jpayne@69: jpayne@69: +-------------------+---------------+ jpayne@69: | Python | JSON | jpayne@69: +===================+===============+ jpayne@69: | dict | object | jpayne@69: +-------------------+---------------+ jpayne@69: | list, tuple | array | jpayne@69: +-------------------+---------------+ jpayne@69: | str | string | jpayne@69: +-------------------+---------------+ jpayne@69: | int, float | number | jpayne@69: +-------------------+---------------+ jpayne@69: | True | true | jpayne@69: +-------------------+---------------+ jpayne@69: | False | false | jpayne@69: +-------------------+---------------+ jpayne@69: | None | null | jpayne@69: +-------------------+---------------+ jpayne@69: jpayne@69: To extend this to recognize other objects, subclass and implement a jpayne@69: ``.default()`` method with another method that returns a serializable jpayne@69: object for ``o`` if possible, otherwise it should call the superclass jpayne@69: implementation (to raise ``TypeError``). jpayne@69: jpayne@69: """ jpayne@69: item_separator = ', ' jpayne@69: key_separator = ': ' jpayne@69: def __init__(self, *, skipkeys=False, ensure_ascii=True, jpayne@69: check_circular=True, allow_nan=True, sort_keys=False, jpayne@69: indent=None, separators=None, default=None): jpayne@69: """Constructor for JSONEncoder, with sensible defaults. jpayne@69: jpayne@69: If skipkeys is false, then it is a TypeError to attempt jpayne@69: encoding of keys that are not str, int, float or None. If jpayne@69: skipkeys is True, such items are simply skipped. jpayne@69: jpayne@69: If ensure_ascii is true, the output is guaranteed to be str jpayne@69: objects with all incoming non-ASCII characters escaped. If jpayne@69: ensure_ascii is false, the output can contain non-ASCII characters. jpayne@69: jpayne@69: If check_circular is true, then lists, dicts, and custom encoded jpayne@69: objects will be checked for circular references during encoding to jpayne@69: prevent an infinite recursion (which would cause an OverflowError). jpayne@69: Otherwise, no such check takes place. jpayne@69: jpayne@69: If allow_nan is true, then NaN, Infinity, and -Infinity will be jpayne@69: encoded as such. This behavior is not JSON specification compliant, jpayne@69: but is consistent with most JavaScript based encoders and decoders. jpayne@69: Otherwise, it will be a ValueError to encode such floats. jpayne@69: jpayne@69: If sort_keys is true, then the output of dictionaries will be jpayne@69: sorted by key; this is useful for regression tests to ensure jpayne@69: that JSON serializations can be compared on a day-to-day basis. jpayne@69: jpayne@69: If indent is a non-negative integer, then JSON array jpayne@69: elements and object members will be pretty-printed with that jpayne@69: indent level. An indent level of 0 will only insert newlines. jpayne@69: None is the most compact representation. jpayne@69: jpayne@69: If specified, separators should be an (item_separator, key_separator) jpayne@69: tuple. The default is (', ', ': ') if *indent* is ``None`` and jpayne@69: (',', ': ') otherwise. To get the most compact JSON representation, jpayne@69: you should specify (',', ':') to eliminate whitespace. jpayne@69: jpayne@69: If specified, default is a function that gets called for objects jpayne@69: that can't otherwise be serialized. It should return a JSON encodable jpayne@69: version of the object or raise a ``TypeError``. jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: self.skipkeys = skipkeys jpayne@69: self.ensure_ascii = ensure_ascii jpayne@69: self.check_circular = check_circular jpayne@69: self.allow_nan = allow_nan jpayne@69: self.sort_keys = sort_keys jpayne@69: self.indent = indent jpayne@69: if separators is not None: jpayne@69: self.item_separator, self.key_separator = separators jpayne@69: elif indent is not None: jpayne@69: self.item_separator = ',' jpayne@69: if default is not None: jpayne@69: self.default = default jpayne@69: jpayne@69: def default(self, o): jpayne@69: """Implement this method in a subclass such that it returns jpayne@69: a serializable object for ``o``, or calls the base implementation jpayne@69: (to raise a ``TypeError``). jpayne@69: jpayne@69: For example, to support arbitrary iterators, you could jpayne@69: implement default like this:: jpayne@69: jpayne@69: def default(self, o): jpayne@69: try: jpayne@69: iterable = iter(o) jpayne@69: except TypeError: jpayne@69: pass jpayne@69: else: jpayne@69: return list(iterable) jpayne@69: # Let the base class default method raise the TypeError jpayne@69: return JSONEncoder.default(self, o) jpayne@69: jpayne@69: """ jpayne@69: raise TypeError(f'Object of type {o.__class__.__name__} ' jpayne@69: f'is not JSON serializable') jpayne@69: jpayne@69: def encode(self, o): jpayne@69: """Return a JSON string representation of a Python data structure. jpayne@69: jpayne@69: >>> from json.encoder import JSONEncoder jpayne@69: >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) jpayne@69: '{"foo": ["bar", "baz"]}' jpayne@69: jpayne@69: """ jpayne@69: # This is for extremely simple cases and benchmarks. jpayne@69: if isinstance(o, str): jpayne@69: if self.ensure_ascii: jpayne@69: return encode_basestring_ascii(o) jpayne@69: else: jpayne@69: return encode_basestring(o) jpayne@69: # This doesn't pass the iterator directly to ''.join() because the jpayne@69: # exceptions aren't as detailed. The list call should be roughly jpayne@69: # equivalent to the PySequence_Fast that ''.join() would do. jpayne@69: chunks = self.iterencode(o, _one_shot=True) jpayne@69: if not isinstance(chunks, (list, tuple)): jpayne@69: chunks = list(chunks) jpayne@69: return ''.join(chunks) jpayne@69: jpayne@69: def iterencode(self, o, _one_shot=False): jpayne@69: """Encode the given object and yield each string jpayne@69: representation as available. jpayne@69: jpayne@69: For example:: jpayne@69: jpayne@69: for chunk in JSONEncoder().iterencode(bigobject): jpayne@69: mysocket.write(chunk) jpayne@69: jpayne@69: """ jpayne@69: if self.check_circular: jpayne@69: markers = {} jpayne@69: else: jpayne@69: markers = None jpayne@69: if self.ensure_ascii: jpayne@69: _encoder = encode_basestring_ascii jpayne@69: else: jpayne@69: _encoder = encode_basestring jpayne@69: jpayne@69: def floatstr(o, allow_nan=self.allow_nan, jpayne@69: _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): jpayne@69: # Check for specials. Note that this type of test is processor jpayne@69: # and/or platform-specific, so do tests which don't depend on the jpayne@69: # internals. jpayne@69: jpayne@69: if o != o: jpayne@69: text = 'NaN' jpayne@69: elif o == _inf: jpayne@69: text = 'Infinity' jpayne@69: elif o == _neginf: jpayne@69: text = '-Infinity' jpayne@69: else: jpayne@69: return _repr(o) jpayne@69: jpayne@69: if not allow_nan: jpayne@69: raise ValueError( jpayne@69: "Out of range float values are not JSON compliant: " + jpayne@69: repr(o)) jpayne@69: jpayne@69: return text jpayne@69: jpayne@69: jpayne@69: if (_one_shot and c_make_encoder is not None jpayne@69: and self.indent is None): jpayne@69: _iterencode = c_make_encoder( jpayne@69: markers, self.default, _encoder, self.indent, jpayne@69: self.key_separator, self.item_separator, self.sort_keys, jpayne@69: self.skipkeys, self.allow_nan) jpayne@69: else: jpayne@69: _iterencode = _make_iterencode( jpayne@69: markers, self.default, _encoder, self.indent, floatstr, jpayne@69: self.key_separator, self.item_separator, self.sort_keys, jpayne@69: self.skipkeys, _one_shot) jpayne@69: return _iterencode(o, 0) jpayne@69: jpayne@69: def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, jpayne@69: _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, jpayne@69: ## HACK: hand-optimized bytecode; turn globals into locals jpayne@69: ValueError=ValueError, jpayne@69: dict=dict, jpayne@69: float=float, jpayne@69: id=id, jpayne@69: int=int, jpayne@69: isinstance=isinstance, jpayne@69: list=list, jpayne@69: str=str, jpayne@69: tuple=tuple, jpayne@69: _intstr=int.__repr__, jpayne@69: ): jpayne@69: jpayne@69: if _indent is not None and not isinstance(_indent, str): jpayne@69: _indent = ' ' * _indent jpayne@69: jpayne@69: def _iterencode_list(lst, _current_indent_level): jpayne@69: if not lst: jpayne@69: yield '[]' jpayne@69: return jpayne@69: if markers is not None: jpayne@69: markerid = id(lst) jpayne@69: if markerid in markers: jpayne@69: raise ValueError("Circular reference detected") jpayne@69: markers[markerid] = lst jpayne@69: buf = '[' jpayne@69: if _indent is not None: jpayne@69: _current_indent_level += 1 jpayne@69: newline_indent = '\n' + _indent * _current_indent_level jpayne@69: separator = _item_separator + newline_indent jpayne@69: buf += newline_indent jpayne@69: else: jpayne@69: newline_indent = None jpayne@69: separator = _item_separator jpayne@69: first = True jpayne@69: for value in lst: jpayne@69: if first: jpayne@69: first = False jpayne@69: else: jpayne@69: buf = separator jpayne@69: if isinstance(value, str): jpayne@69: yield buf + _encoder(value) jpayne@69: elif value is None: jpayne@69: yield buf + 'null' jpayne@69: elif value is True: jpayne@69: yield buf + 'true' jpayne@69: elif value is False: jpayne@69: yield buf + 'false' jpayne@69: elif isinstance(value, int): jpayne@69: # Subclasses of int/float may override __repr__, but we still jpayne@69: # want to encode them as integers/floats in JSON. One example jpayne@69: # within the standard library is IntEnum. jpayne@69: yield buf + _intstr(value) jpayne@69: elif isinstance(value, float): jpayne@69: # see comment above for int jpayne@69: yield buf + _floatstr(value) jpayne@69: else: jpayne@69: yield buf jpayne@69: if isinstance(value, (list, tuple)): jpayne@69: chunks = _iterencode_list(value, _current_indent_level) jpayne@69: elif isinstance(value, dict): jpayne@69: chunks = _iterencode_dict(value, _current_indent_level) jpayne@69: else: jpayne@69: chunks = _iterencode(value, _current_indent_level) jpayne@69: yield from chunks jpayne@69: if newline_indent is not None: jpayne@69: _current_indent_level -= 1 jpayne@69: yield '\n' + _indent * _current_indent_level jpayne@69: yield ']' jpayne@69: if markers is not None: jpayne@69: del markers[markerid] jpayne@69: jpayne@69: def _iterencode_dict(dct, _current_indent_level): jpayne@69: if not dct: jpayne@69: yield '{}' jpayne@69: return jpayne@69: if markers is not None: jpayne@69: markerid = id(dct) jpayne@69: if markerid in markers: jpayne@69: raise ValueError("Circular reference detected") jpayne@69: markers[markerid] = dct jpayne@69: yield '{' jpayne@69: if _indent is not None: jpayne@69: _current_indent_level += 1 jpayne@69: newline_indent = '\n' + _indent * _current_indent_level jpayne@69: item_separator = _item_separator + newline_indent jpayne@69: yield newline_indent jpayne@69: else: jpayne@69: newline_indent = None jpayne@69: item_separator = _item_separator jpayne@69: first = True jpayne@69: if _sort_keys: jpayne@69: items = sorted(dct.items()) jpayne@69: else: jpayne@69: items = dct.items() jpayne@69: for key, value in items: jpayne@69: if isinstance(key, str): jpayne@69: pass jpayne@69: # JavaScript is weakly typed for these, so it makes sense to jpayne@69: # also allow them. Many encoders seem to do something like this. jpayne@69: elif isinstance(key, float): jpayne@69: # see comment for int/float in _make_iterencode jpayne@69: key = _floatstr(key) jpayne@69: elif key is True: jpayne@69: key = 'true' jpayne@69: elif key is False: jpayne@69: key = 'false' jpayne@69: elif key is None: jpayne@69: key = 'null' jpayne@69: elif isinstance(key, int): jpayne@69: # see comment for int/float in _make_iterencode jpayne@69: key = _intstr(key) jpayne@69: elif _skipkeys: jpayne@69: continue jpayne@69: else: jpayne@69: raise TypeError(f'keys must be str, int, float, bool or None, ' jpayne@69: f'not {key.__class__.__name__}') jpayne@69: if first: jpayne@69: first = False jpayne@69: else: jpayne@69: yield item_separator jpayne@69: yield _encoder(key) jpayne@69: yield _key_separator jpayne@69: if isinstance(value, str): jpayne@69: yield _encoder(value) jpayne@69: elif value is None: jpayne@69: yield 'null' jpayne@69: elif value is True: jpayne@69: yield 'true' jpayne@69: elif value is False: jpayne@69: yield 'false' jpayne@69: elif isinstance(value, int): jpayne@69: # see comment for int/float in _make_iterencode jpayne@69: yield _intstr(value) jpayne@69: elif isinstance(value, float): jpayne@69: # see comment for int/float in _make_iterencode jpayne@69: yield _floatstr(value) jpayne@69: else: jpayne@69: if isinstance(value, (list, tuple)): jpayne@69: chunks = _iterencode_list(value, _current_indent_level) jpayne@69: elif isinstance(value, dict): jpayne@69: chunks = _iterencode_dict(value, _current_indent_level) jpayne@69: else: jpayne@69: chunks = _iterencode(value, _current_indent_level) jpayne@69: yield from chunks jpayne@69: if newline_indent is not None: jpayne@69: _current_indent_level -= 1 jpayne@69: yield '\n' + _indent * _current_indent_level jpayne@69: yield '}' jpayne@69: if markers is not None: jpayne@69: del markers[markerid] jpayne@69: jpayne@69: def _iterencode(o, _current_indent_level): jpayne@69: if isinstance(o, str): jpayne@69: yield _encoder(o) jpayne@69: elif o is None: jpayne@69: yield 'null' jpayne@69: elif o is True: jpayne@69: yield 'true' jpayne@69: elif o is False: jpayne@69: yield 'false' jpayne@69: elif isinstance(o, int): jpayne@69: # see comment for int/float in _make_iterencode jpayne@69: yield _intstr(o) jpayne@69: elif isinstance(o, float): jpayne@69: # see comment for int/float in _make_iterencode jpayne@69: yield _floatstr(o) jpayne@69: elif isinstance(o, (list, tuple)): jpayne@69: yield from _iterencode_list(o, _current_indent_level) jpayne@69: elif isinstance(o, dict): jpayne@69: yield from _iterencode_dict(o, _current_indent_level) jpayne@69: else: jpayne@69: if markers is not None: jpayne@69: markerid = id(o) jpayne@69: if markerid in markers: jpayne@69: raise ValueError("Circular reference detected") jpayne@69: markers[markerid] = o jpayne@69: o = _default(o) jpayne@69: yield from _iterencode(o, _current_indent_level) jpayne@69: if markers is not None: jpayne@69: del markers[markerid] jpayne@69: return _iterencode