jpayne@68: """Implementation of JSONDecoder jpayne@68: """ jpayne@68: import re jpayne@68: jpayne@68: from json import scanner jpayne@68: try: jpayne@68: from _json import scanstring as c_scanstring jpayne@68: except ImportError: jpayne@68: c_scanstring = None jpayne@68: jpayne@68: __all__ = ['JSONDecoder', 'JSONDecodeError'] jpayne@68: jpayne@68: FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL jpayne@68: jpayne@68: NaN = float('nan') jpayne@68: PosInf = float('inf') jpayne@68: NegInf = float('-inf') jpayne@68: jpayne@68: jpayne@68: class JSONDecodeError(ValueError): jpayne@68: """Subclass of ValueError with the following additional properties: jpayne@68: jpayne@68: msg: The unformatted error message jpayne@68: doc: The JSON document being parsed jpayne@68: pos: The start index of doc where parsing failed jpayne@68: lineno: The line corresponding to pos jpayne@68: colno: The column corresponding to pos jpayne@68: jpayne@68: """ jpayne@68: # Note that this exception is used from _json jpayne@68: def __init__(self, msg, doc, pos): jpayne@68: lineno = doc.count('\n', 0, pos) + 1 jpayne@68: colno = pos - doc.rfind('\n', 0, pos) jpayne@68: errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) jpayne@68: ValueError.__init__(self, errmsg) jpayne@68: self.msg = msg jpayne@68: self.doc = doc jpayne@68: self.pos = pos jpayne@68: self.lineno = lineno jpayne@68: self.colno = colno jpayne@68: jpayne@68: def __reduce__(self): jpayne@68: return self.__class__, (self.msg, self.doc, self.pos) jpayne@68: jpayne@68: jpayne@68: _CONSTANTS = { jpayne@68: '-Infinity': NegInf, jpayne@68: 'Infinity': PosInf, jpayne@68: 'NaN': NaN, jpayne@68: } jpayne@68: jpayne@68: jpayne@68: STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) jpayne@68: BACKSLASH = { jpayne@68: '"': '"', '\\': '\\', '/': '/', jpayne@68: 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', jpayne@68: } jpayne@68: jpayne@68: def _decode_uXXXX(s, pos): jpayne@68: esc = s[pos + 1:pos + 5] jpayne@68: if len(esc) == 4 and esc[1] not in 'xX': jpayne@68: try: jpayne@68: return int(esc, 16) jpayne@68: except ValueError: jpayne@68: pass jpayne@68: msg = "Invalid \\uXXXX escape" jpayne@68: raise JSONDecodeError(msg, s, pos) jpayne@68: jpayne@68: def py_scanstring(s, end, strict=True, jpayne@68: _b=BACKSLASH, _m=STRINGCHUNK.match): jpayne@68: """Scan the string s for a JSON string. End is the index of the jpayne@68: character in s after the quote that started the JSON string. jpayne@68: Unescapes all valid JSON string escape sequences and raises ValueError jpayne@68: on attempt to decode an invalid string. If strict is False then literal jpayne@68: control characters are allowed in the string. jpayne@68: jpayne@68: Returns a tuple of the decoded string and the index of the character in s jpayne@68: after the end quote.""" jpayne@68: chunks = [] jpayne@68: _append = chunks.append jpayne@68: begin = end - 1 jpayne@68: while 1: jpayne@68: chunk = _m(s, end) jpayne@68: if chunk is None: jpayne@68: raise JSONDecodeError("Unterminated string starting at", s, begin) jpayne@68: end = chunk.end() jpayne@68: content, terminator = chunk.groups() jpayne@68: # Content is contains zero or more unescaped string characters jpayne@68: if content: jpayne@68: _append(content) jpayne@68: # Terminator is the end of string, a literal control character, jpayne@68: # or a backslash denoting that an escape sequence follows jpayne@68: if terminator == '"': jpayne@68: break jpayne@68: elif terminator != '\\': jpayne@68: if strict: jpayne@68: #msg = "Invalid control character %r at" % (terminator,) jpayne@68: msg = "Invalid control character {0!r} at".format(terminator) jpayne@68: raise JSONDecodeError(msg, s, end) jpayne@68: else: jpayne@68: _append(terminator) jpayne@68: continue jpayne@68: try: jpayne@68: esc = s[end] jpayne@68: except IndexError: jpayne@68: raise JSONDecodeError("Unterminated string starting at", jpayne@68: s, begin) from None jpayne@68: # If not a unicode escape sequence, must be in the lookup table jpayne@68: if esc != 'u': jpayne@68: try: jpayne@68: char = _b[esc] jpayne@68: except KeyError: jpayne@68: msg = "Invalid \\escape: {0!r}".format(esc) jpayne@68: raise JSONDecodeError(msg, s, end) jpayne@68: end += 1 jpayne@68: else: jpayne@68: uni = _decode_uXXXX(s, end) jpayne@68: end += 5 jpayne@68: if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': jpayne@68: uni2 = _decode_uXXXX(s, end + 1) jpayne@68: if 0xdc00 <= uni2 <= 0xdfff: jpayne@68: uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) jpayne@68: end += 6 jpayne@68: char = chr(uni) jpayne@68: _append(char) jpayne@68: return ''.join(chunks), end jpayne@68: jpayne@68: jpayne@68: # Use speedup if available jpayne@68: scanstring = c_scanstring or py_scanstring jpayne@68: jpayne@68: WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) jpayne@68: WHITESPACE_STR = ' \t\n\r' jpayne@68: jpayne@68: jpayne@68: def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, jpayne@68: memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): jpayne@68: s, end = s_and_end jpayne@68: pairs = [] jpayne@68: pairs_append = pairs.append jpayne@68: # Backwards compatibility jpayne@68: if memo is None: jpayne@68: memo = {} jpayne@68: memo_get = memo.setdefault jpayne@68: # Use a slice to prevent IndexError from being raised, the following jpayne@68: # check will raise a more specific ValueError if the string is empty jpayne@68: nextchar = s[end:end + 1] jpayne@68: # Normally we expect nextchar == '"' jpayne@68: if nextchar != '"': jpayne@68: if nextchar in _ws: jpayne@68: end = _w(s, end).end() jpayne@68: nextchar = s[end:end + 1] jpayne@68: # Trivial empty object jpayne@68: if nextchar == '}': jpayne@68: if object_pairs_hook is not None: jpayne@68: result = object_pairs_hook(pairs) jpayne@68: return result, end + 1 jpayne@68: pairs = {} jpayne@68: if object_hook is not None: jpayne@68: pairs = object_hook(pairs) jpayne@68: return pairs, end + 1 jpayne@68: elif nextchar != '"': jpayne@68: raise JSONDecodeError( jpayne@68: "Expecting property name enclosed in double quotes", s, end) jpayne@68: end += 1 jpayne@68: while True: jpayne@68: key, end = scanstring(s, end, strict) jpayne@68: key = memo_get(key, key) jpayne@68: # To skip some function call overhead we optimize the fast paths where jpayne@68: # the JSON key separator is ": " or just ":". jpayne@68: if s[end:end + 1] != ':': jpayne@68: end = _w(s, end).end() jpayne@68: if s[end:end + 1] != ':': jpayne@68: raise JSONDecodeError("Expecting ':' delimiter", s, end) jpayne@68: end += 1 jpayne@68: jpayne@68: try: jpayne@68: if s[end] in _ws: jpayne@68: end += 1 jpayne@68: if s[end] in _ws: jpayne@68: end = _w(s, end + 1).end() jpayne@68: except IndexError: jpayne@68: pass jpayne@68: jpayne@68: try: jpayne@68: value, end = scan_once(s, end) jpayne@68: except StopIteration as err: jpayne@68: raise JSONDecodeError("Expecting value", s, err.value) from None jpayne@68: pairs_append((key, value)) jpayne@68: try: jpayne@68: nextchar = s[end] jpayne@68: if nextchar in _ws: jpayne@68: end = _w(s, end + 1).end() jpayne@68: nextchar = s[end] jpayne@68: except IndexError: jpayne@68: nextchar = '' jpayne@68: end += 1 jpayne@68: jpayne@68: if nextchar == '}': jpayne@68: break jpayne@68: elif nextchar != ',': jpayne@68: raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) jpayne@68: end = _w(s, end).end() jpayne@68: nextchar = s[end:end + 1] jpayne@68: end += 1 jpayne@68: if nextchar != '"': jpayne@68: raise JSONDecodeError( jpayne@68: "Expecting property name enclosed in double quotes", s, end - 1) jpayne@68: if object_pairs_hook is not None: jpayne@68: result = object_pairs_hook(pairs) jpayne@68: return result, end jpayne@68: pairs = dict(pairs) jpayne@68: if object_hook is not None: jpayne@68: pairs = object_hook(pairs) jpayne@68: return pairs, end jpayne@68: jpayne@68: def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): jpayne@68: s, end = s_and_end jpayne@68: values = [] jpayne@68: nextchar = s[end:end + 1] jpayne@68: if nextchar in _ws: jpayne@68: end = _w(s, end + 1).end() jpayne@68: nextchar = s[end:end + 1] jpayne@68: # Look-ahead for trivial empty array jpayne@68: if nextchar == ']': jpayne@68: return values, end + 1 jpayne@68: _append = values.append jpayne@68: while True: jpayne@68: try: jpayne@68: value, end = scan_once(s, end) jpayne@68: except StopIteration as err: jpayne@68: raise JSONDecodeError("Expecting value", s, err.value) from None jpayne@68: _append(value) jpayne@68: nextchar = s[end:end + 1] jpayne@68: if nextchar in _ws: jpayne@68: end = _w(s, end + 1).end() jpayne@68: nextchar = s[end:end + 1] jpayne@68: end += 1 jpayne@68: if nextchar == ']': jpayne@68: break jpayne@68: elif nextchar != ',': jpayne@68: raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) jpayne@68: try: jpayne@68: if s[end] in _ws: jpayne@68: end += 1 jpayne@68: if s[end] in _ws: jpayne@68: end = _w(s, end + 1).end() jpayne@68: except IndexError: jpayne@68: pass jpayne@68: jpayne@68: return values, end jpayne@68: jpayne@68: jpayne@68: class JSONDecoder(object): jpayne@68: """Simple JSON decoder jpayne@68: jpayne@68: Performs the following translations in decoding by default: jpayne@68: jpayne@68: +---------------+-------------------+ jpayne@68: | JSON | Python | jpayne@68: +===============+===================+ jpayne@68: | object | dict | jpayne@68: +---------------+-------------------+ jpayne@68: | array | list | jpayne@68: +---------------+-------------------+ jpayne@68: | string | str | jpayne@68: +---------------+-------------------+ jpayne@68: | number (int) | int | jpayne@68: +---------------+-------------------+ jpayne@68: | number (real) | float | jpayne@68: +---------------+-------------------+ jpayne@68: | true | True | jpayne@68: +---------------+-------------------+ jpayne@68: | false | False | jpayne@68: +---------------+-------------------+ jpayne@68: | null | None | jpayne@68: +---------------+-------------------+ jpayne@68: jpayne@68: It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as jpayne@68: their corresponding ``float`` values, which is outside the JSON spec. jpayne@68: jpayne@68: """ jpayne@68: jpayne@68: def __init__(self, *, object_hook=None, parse_float=None, jpayne@68: parse_int=None, parse_constant=None, strict=True, jpayne@68: object_pairs_hook=None): jpayne@68: """``object_hook``, if specified, will be called with the result jpayne@68: of every JSON object decoded and its return value will be used in jpayne@68: place of the given ``dict``. This can be used to provide custom jpayne@68: deserializations (e.g. to support JSON-RPC class hinting). jpayne@68: jpayne@68: ``object_pairs_hook``, if specified will be called with the result of jpayne@68: every JSON object decoded with an ordered list of pairs. The return jpayne@68: value of ``object_pairs_hook`` will be used instead of the ``dict``. jpayne@68: This feature can be used to implement custom decoders. jpayne@68: If ``object_hook`` is also defined, the ``object_pairs_hook`` takes jpayne@68: priority. jpayne@68: jpayne@68: ``parse_float``, if specified, will be called with the string jpayne@68: of every JSON float to be decoded. By default this is equivalent to jpayne@68: float(num_str). This can be used to use another datatype or parser jpayne@68: for JSON floats (e.g. decimal.Decimal). jpayne@68: jpayne@68: ``parse_int``, if specified, will be called with the string jpayne@68: of every JSON int to be decoded. By default this is equivalent to jpayne@68: int(num_str). This can be used to use another datatype or parser jpayne@68: for JSON integers (e.g. float). jpayne@68: jpayne@68: ``parse_constant``, if specified, will be called with one of the jpayne@68: following strings: -Infinity, Infinity, NaN. jpayne@68: This can be used to raise an exception if invalid JSON numbers jpayne@68: are encountered. jpayne@68: jpayne@68: If ``strict`` is false (true is the default), then control jpayne@68: characters will be allowed inside strings. Control characters in jpayne@68: this context are those with character codes in the 0-31 range, jpayne@68: including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. jpayne@68: """ jpayne@68: self.object_hook = object_hook jpayne@68: self.parse_float = parse_float or float jpayne@68: self.parse_int = parse_int or int jpayne@68: self.parse_constant = parse_constant or _CONSTANTS.__getitem__ jpayne@68: self.strict = strict jpayne@68: self.object_pairs_hook = object_pairs_hook jpayne@68: self.parse_object = JSONObject jpayne@68: self.parse_array = JSONArray jpayne@68: self.parse_string = scanstring jpayne@68: self.memo = {} jpayne@68: self.scan_once = scanner.make_scanner(self) jpayne@68: jpayne@68: jpayne@68: def decode(self, s, _w=WHITESPACE.match): jpayne@68: """Return the Python representation of ``s`` (a ``str`` instance jpayne@68: containing a JSON document). jpayne@68: jpayne@68: """ jpayne@68: obj, end = self.raw_decode(s, idx=_w(s, 0).end()) jpayne@68: end = _w(s, end).end() jpayne@68: if end != len(s): jpayne@68: raise JSONDecodeError("Extra data", s, end) jpayne@68: return obj jpayne@68: jpayne@68: def raw_decode(self, s, idx=0): jpayne@68: """Decode a JSON document from ``s`` (a ``str`` beginning with jpayne@68: a JSON document) and return a 2-tuple of the Python jpayne@68: representation and the index in ``s`` where the document ended. jpayne@68: jpayne@68: This can be used to decode a JSON document from a string that may jpayne@68: have extraneous data at the end. jpayne@68: jpayne@68: """ jpayne@68: try: jpayne@68: obj, end = self.scan_once(s, idx) jpayne@68: except StopIteration as err: jpayne@68: raise JSONDecodeError("Expecting value", s, err.value) from None jpayne@68: return obj, end