jpayne@68
|
1 """Implementation of JSONDecoder
|
jpayne@68
|
2 """
|
jpayne@68
|
3 import re
|
jpayne@68
|
4
|
jpayne@68
|
5 from json import scanner
|
jpayne@68
|
6 try:
|
jpayne@68
|
7 from _json import scanstring as c_scanstring
|
jpayne@68
|
8 except ImportError:
|
jpayne@68
|
9 c_scanstring = None
|
jpayne@68
|
10
|
jpayne@68
|
11 __all__ = ['JSONDecoder', 'JSONDecodeError']
|
jpayne@68
|
12
|
jpayne@68
|
13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
|
jpayne@68
|
14
|
jpayne@68
|
15 NaN = float('nan')
|
jpayne@68
|
16 PosInf = float('inf')
|
jpayne@68
|
17 NegInf = float('-inf')
|
jpayne@68
|
18
|
jpayne@68
|
19
|
jpayne@68
|
20 class JSONDecodeError(ValueError):
|
jpayne@68
|
21 """Subclass of ValueError with the following additional properties:
|
jpayne@68
|
22
|
jpayne@68
|
23 msg: The unformatted error message
|
jpayne@68
|
24 doc: The JSON document being parsed
|
jpayne@68
|
25 pos: The start index of doc where parsing failed
|
jpayne@68
|
26 lineno: The line corresponding to pos
|
jpayne@68
|
27 colno: The column corresponding to pos
|
jpayne@68
|
28
|
jpayne@68
|
29 """
|
jpayne@68
|
30 # Note that this exception is used from _json
|
jpayne@68
|
31 def __init__(self, msg, doc, pos):
|
jpayne@68
|
32 lineno = doc.count('\n', 0, pos) + 1
|
jpayne@68
|
33 colno = pos - doc.rfind('\n', 0, pos)
|
jpayne@68
|
34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
|
jpayne@68
|
35 ValueError.__init__(self, errmsg)
|
jpayne@68
|
36 self.msg = msg
|
jpayne@68
|
37 self.doc = doc
|
jpayne@68
|
38 self.pos = pos
|
jpayne@68
|
39 self.lineno = lineno
|
jpayne@68
|
40 self.colno = colno
|
jpayne@68
|
41
|
jpayne@68
|
42 def __reduce__(self):
|
jpayne@68
|
43 return self.__class__, (self.msg, self.doc, self.pos)
|
jpayne@68
|
44
|
jpayne@68
|
45
|
jpayne@68
|
46 _CONSTANTS = {
|
jpayne@68
|
47 '-Infinity': NegInf,
|
jpayne@68
|
48 'Infinity': PosInf,
|
jpayne@68
|
49 'NaN': NaN,
|
jpayne@68
|
50 }
|
jpayne@68
|
51
|
jpayne@68
|
52
|
jpayne@68
|
53 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
|
jpayne@68
|
54 BACKSLASH = {
|
jpayne@68
|
55 '"': '"', '\\': '\\', '/': '/',
|
jpayne@68
|
56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
|
jpayne@68
|
57 }
|
jpayne@68
|
58
|
jpayne@68
|
59 def _decode_uXXXX(s, pos):
|
jpayne@68
|
60 esc = s[pos + 1:pos + 5]
|
jpayne@68
|
61 if len(esc) == 4 and esc[1] not in 'xX':
|
jpayne@68
|
62 try:
|
jpayne@68
|
63 return int(esc, 16)
|
jpayne@68
|
64 except ValueError:
|
jpayne@68
|
65 pass
|
jpayne@68
|
66 msg = "Invalid \\uXXXX escape"
|
jpayne@68
|
67 raise JSONDecodeError(msg, s, pos)
|
jpayne@68
|
68
|
jpayne@68
|
69 def py_scanstring(s, end, strict=True,
|
jpayne@68
|
70 _b=BACKSLASH, _m=STRINGCHUNK.match):
|
jpayne@68
|
71 """Scan the string s for a JSON string. End is the index of the
|
jpayne@68
|
72 character in s after the quote that started the JSON string.
|
jpayne@68
|
73 Unescapes all valid JSON string escape sequences and raises ValueError
|
jpayne@68
|
74 on attempt to decode an invalid string. If strict is False then literal
|
jpayne@68
|
75 control characters are allowed in the string.
|
jpayne@68
|
76
|
jpayne@68
|
77 Returns a tuple of the decoded string and the index of the character in s
|
jpayne@68
|
78 after the end quote."""
|
jpayne@68
|
79 chunks = []
|
jpayne@68
|
80 _append = chunks.append
|
jpayne@68
|
81 begin = end - 1
|
jpayne@68
|
82 while 1:
|
jpayne@68
|
83 chunk = _m(s, end)
|
jpayne@68
|
84 if chunk is None:
|
jpayne@68
|
85 raise JSONDecodeError("Unterminated string starting at", s, begin)
|
jpayne@68
|
86 end = chunk.end()
|
jpayne@68
|
87 content, terminator = chunk.groups()
|
jpayne@68
|
88 # Content is contains zero or more unescaped string characters
|
jpayne@68
|
89 if content:
|
jpayne@68
|
90 _append(content)
|
jpayne@68
|
91 # Terminator is the end of string, a literal control character,
|
jpayne@68
|
92 # or a backslash denoting that an escape sequence follows
|
jpayne@68
|
93 if terminator == '"':
|
jpayne@68
|
94 break
|
jpayne@68
|
95 elif terminator != '\\':
|
jpayne@68
|
96 if strict:
|
jpayne@68
|
97 #msg = "Invalid control character %r at" % (terminator,)
|
jpayne@68
|
98 msg = "Invalid control character {0!r} at".format(terminator)
|
jpayne@68
|
99 raise JSONDecodeError(msg, s, end)
|
jpayne@68
|
100 else:
|
jpayne@68
|
101 _append(terminator)
|
jpayne@68
|
102 continue
|
jpayne@68
|
103 try:
|
jpayne@68
|
104 esc = s[end]
|
jpayne@68
|
105 except IndexError:
|
jpayne@68
|
106 raise JSONDecodeError("Unterminated string starting at",
|
jpayne@68
|
107 s, begin) from None
|
jpayne@68
|
108 # If not a unicode escape sequence, must be in the lookup table
|
jpayne@68
|
109 if esc != 'u':
|
jpayne@68
|
110 try:
|
jpayne@68
|
111 char = _b[esc]
|
jpayne@68
|
112 except KeyError:
|
jpayne@68
|
113 msg = "Invalid \\escape: {0!r}".format(esc)
|
jpayne@68
|
114 raise JSONDecodeError(msg, s, end)
|
jpayne@68
|
115 end += 1
|
jpayne@68
|
116 else:
|
jpayne@68
|
117 uni = _decode_uXXXX(s, end)
|
jpayne@68
|
118 end += 5
|
jpayne@68
|
119 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
|
jpayne@68
|
120 uni2 = _decode_uXXXX(s, end + 1)
|
jpayne@68
|
121 if 0xdc00 <= uni2 <= 0xdfff:
|
jpayne@68
|
122 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
|
jpayne@68
|
123 end += 6
|
jpayne@68
|
124 char = chr(uni)
|
jpayne@68
|
125 _append(char)
|
jpayne@68
|
126 return ''.join(chunks), end
|
jpayne@68
|
127
|
jpayne@68
|
128
|
jpayne@68
|
129 # Use speedup if available
|
jpayne@68
|
130 scanstring = c_scanstring or py_scanstring
|
jpayne@68
|
131
|
jpayne@68
|
132 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
|
jpayne@68
|
133 WHITESPACE_STR = ' \t\n\r'
|
jpayne@68
|
134
|
jpayne@68
|
135
|
jpayne@68
|
136 def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
|
jpayne@68
|
137 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
|
jpayne@68
|
138 s, end = s_and_end
|
jpayne@68
|
139 pairs = []
|
jpayne@68
|
140 pairs_append = pairs.append
|
jpayne@68
|
141 # Backwards compatibility
|
jpayne@68
|
142 if memo is None:
|
jpayne@68
|
143 memo = {}
|
jpayne@68
|
144 memo_get = memo.setdefault
|
jpayne@68
|
145 # Use a slice to prevent IndexError from being raised, the following
|
jpayne@68
|
146 # check will raise a more specific ValueError if the string is empty
|
jpayne@68
|
147 nextchar = s[end:end + 1]
|
jpayne@68
|
148 # Normally we expect nextchar == '"'
|
jpayne@68
|
149 if nextchar != '"':
|
jpayne@68
|
150 if nextchar in _ws:
|
jpayne@68
|
151 end = _w(s, end).end()
|
jpayne@68
|
152 nextchar = s[end:end + 1]
|
jpayne@68
|
153 # Trivial empty object
|
jpayne@68
|
154 if nextchar == '}':
|
jpayne@68
|
155 if object_pairs_hook is not None:
|
jpayne@68
|
156 result = object_pairs_hook(pairs)
|
jpayne@68
|
157 return result, end + 1
|
jpayne@68
|
158 pairs = {}
|
jpayne@68
|
159 if object_hook is not None:
|
jpayne@68
|
160 pairs = object_hook(pairs)
|
jpayne@68
|
161 return pairs, end + 1
|
jpayne@68
|
162 elif nextchar != '"':
|
jpayne@68
|
163 raise JSONDecodeError(
|
jpayne@68
|
164 "Expecting property name enclosed in double quotes", s, end)
|
jpayne@68
|
165 end += 1
|
jpayne@68
|
166 while True:
|
jpayne@68
|
167 key, end = scanstring(s, end, strict)
|
jpayne@68
|
168 key = memo_get(key, key)
|
jpayne@68
|
169 # To skip some function call overhead we optimize the fast paths where
|
jpayne@68
|
170 # the JSON key separator is ": " or just ":".
|
jpayne@68
|
171 if s[end:end + 1] != ':':
|
jpayne@68
|
172 end = _w(s, end).end()
|
jpayne@68
|
173 if s[end:end + 1] != ':':
|
jpayne@68
|
174 raise JSONDecodeError("Expecting ':' delimiter", s, end)
|
jpayne@68
|
175 end += 1
|
jpayne@68
|
176
|
jpayne@68
|
177 try:
|
jpayne@68
|
178 if s[end] in _ws:
|
jpayne@68
|
179 end += 1
|
jpayne@68
|
180 if s[end] in _ws:
|
jpayne@68
|
181 end = _w(s, end + 1).end()
|
jpayne@68
|
182 except IndexError:
|
jpayne@68
|
183 pass
|
jpayne@68
|
184
|
jpayne@68
|
185 try:
|
jpayne@68
|
186 value, end = scan_once(s, end)
|
jpayne@68
|
187 except StopIteration as err:
|
jpayne@68
|
188 raise JSONDecodeError("Expecting value", s, err.value) from None
|
jpayne@68
|
189 pairs_append((key, value))
|
jpayne@68
|
190 try:
|
jpayne@68
|
191 nextchar = s[end]
|
jpayne@68
|
192 if nextchar in _ws:
|
jpayne@68
|
193 end = _w(s, end + 1).end()
|
jpayne@68
|
194 nextchar = s[end]
|
jpayne@68
|
195 except IndexError:
|
jpayne@68
|
196 nextchar = ''
|
jpayne@68
|
197 end += 1
|
jpayne@68
|
198
|
jpayne@68
|
199 if nextchar == '}':
|
jpayne@68
|
200 break
|
jpayne@68
|
201 elif nextchar != ',':
|
jpayne@68
|
202 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
|
jpayne@68
|
203 end = _w(s, end).end()
|
jpayne@68
|
204 nextchar = s[end:end + 1]
|
jpayne@68
|
205 end += 1
|
jpayne@68
|
206 if nextchar != '"':
|
jpayne@68
|
207 raise JSONDecodeError(
|
jpayne@68
|
208 "Expecting property name enclosed in double quotes", s, end - 1)
|
jpayne@68
|
209 if object_pairs_hook is not None:
|
jpayne@68
|
210 result = object_pairs_hook(pairs)
|
jpayne@68
|
211 return result, end
|
jpayne@68
|
212 pairs = dict(pairs)
|
jpayne@68
|
213 if object_hook is not None:
|
jpayne@68
|
214 pairs = object_hook(pairs)
|
jpayne@68
|
215 return pairs, end
|
jpayne@68
|
216
|
jpayne@68
|
217 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
|
jpayne@68
|
218 s, end = s_and_end
|
jpayne@68
|
219 values = []
|
jpayne@68
|
220 nextchar = s[end:end + 1]
|
jpayne@68
|
221 if nextchar in _ws:
|
jpayne@68
|
222 end = _w(s, end + 1).end()
|
jpayne@68
|
223 nextchar = s[end:end + 1]
|
jpayne@68
|
224 # Look-ahead for trivial empty array
|
jpayne@68
|
225 if nextchar == ']':
|
jpayne@68
|
226 return values, end + 1
|
jpayne@68
|
227 _append = values.append
|
jpayne@68
|
228 while True:
|
jpayne@68
|
229 try:
|
jpayne@68
|
230 value, end = scan_once(s, end)
|
jpayne@68
|
231 except StopIteration as err:
|
jpayne@68
|
232 raise JSONDecodeError("Expecting value", s, err.value) from None
|
jpayne@68
|
233 _append(value)
|
jpayne@68
|
234 nextchar = s[end:end + 1]
|
jpayne@68
|
235 if nextchar in _ws:
|
jpayne@68
|
236 end = _w(s, end + 1).end()
|
jpayne@68
|
237 nextchar = s[end:end + 1]
|
jpayne@68
|
238 end += 1
|
jpayne@68
|
239 if nextchar == ']':
|
jpayne@68
|
240 break
|
jpayne@68
|
241 elif nextchar != ',':
|
jpayne@68
|
242 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
|
jpayne@68
|
243 try:
|
jpayne@68
|
244 if s[end] in _ws:
|
jpayne@68
|
245 end += 1
|
jpayne@68
|
246 if s[end] in _ws:
|
jpayne@68
|
247 end = _w(s, end + 1).end()
|
jpayne@68
|
248 except IndexError:
|
jpayne@68
|
249 pass
|
jpayne@68
|
250
|
jpayne@68
|
251 return values, end
|
jpayne@68
|
252
|
jpayne@68
|
253
|
jpayne@68
|
254 class JSONDecoder(object):
|
jpayne@68
|
255 """Simple JSON <http://json.org> decoder
|
jpayne@68
|
256
|
jpayne@68
|
257 Performs the following translations in decoding by default:
|
jpayne@68
|
258
|
jpayne@68
|
259 +---------------+-------------------+
|
jpayne@68
|
260 | JSON | Python |
|
jpayne@68
|
261 +===============+===================+
|
jpayne@68
|
262 | object | dict |
|
jpayne@68
|
263 +---------------+-------------------+
|
jpayne@68
|
264 | array | list |
|
jpayne@68
|
265 +---------------+-------------------+
|
jpayne@68
|
266 | string | str |
|
jpayne@68
|
267 +---------------+-------------------+
|
jpayne@68
|
268 | number (int) | int |
|
jpayne@68
|
269 +---------------+-------------------+
|
jpayne@68
|
270 | number (real) | float |
|
jpayne@68
|
271 +---------------+-------------------+
|
jpayne@68
|
272 | true | True |
|
jpayne@68
|
273 +---------------+-------------------+
|
jpayne@68
|
274 | false | False |
|
jpayne@68
|
275 +---------------+-------------------+
|
jpayne@68
|
276 | null | None |
|
jpayne@68
|
277 +---------------+-------------------+
|
jpayne@68
|
278
|
jpayne@68
|
279 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
|
jpayne@68
|
280 their corresponding ``float`` values, which is outside the JSON spec.
|
jpayne@68
|
281
|
jpayne@68
|
282 """
|
jpayne@68
|
283
|
jpayne@68
|
284 def __init__(self, *, object_hook=None, parse_float=None,
|
jpayne@68
|
285 parse_int=None, parse_constant=None, strict=True,
|
jpayne@68
|
286 object_pairs_hook=None):
|
jpayne@68
|
287 """``object_hook``, if specified, will be called with the result
|
jpayne@68
|
288 of every JSON object decoded and its return value will be used in
|
jpayne@68
|
289 place of the given ``dict``. This can be used to provide custom
|
jpayne@68
|
290 deserializations (e.g. to support JSON-RPC class hinting).
|
jpayne@68
|
291
|
jpayne@68
|
292 ``object_pairs_hook``, if specified will be called with the result of
|
jpayne@68
|
293 every JSON object decoded with an ordered list of pairs. The return
|
jpayne@68
|
294 value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
jpayne@68
|
295 This feature can be used to implement custom decoders.
|
jpayne@68
|
296 If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
|
jpayne@68
|
297 priority.
|
jpayne@68
|
298
|
jpayne@68
|
299 ``parse_float``, if specified, will be called with the string
|
jpayne@68
|
300 of every JSON float to be decoded. By default this is equivalent to
|
jpayne@68
|
301 float(num_str). This can be used to use another datatype or parser
|
jpayne@68
|
302 for JSON floats (e.g. decimal.Decimal).
|
jpayne@68
|
303
|
jpayne@68
|
304 ``parse_int``, if specified, will be called with the string
|
jpayne@68
|
305 of every JSON int to be decoded. By default this is equivalent to
|
jpayne@68
|
306 int(num_str). This can be used to use another datatype or parser
|
jpayne@68
|
307 for JSON integers (e.g. float).
|
jpayne@68
|
308
|
jpayne@68
|
309 ``parse_constant``, if specified, will be called with one of the
|
jpayne@68
|
310 following strings: -Infinity, Infinity, NaN.
|
jpayne@68
|
311 This can be used to raise an exception if invalid JSON numbers
|
jpayne@68
|
312 are encountered.
|
jpayne@68
|
313
|
jpayne@68
|
314 If ``strict`` is false (true is the default), then control
|
jpayne@68
|
315 characters will be allowed inside strings. Control characters in
|
jpayne@68
|
316 this context are those with character codes in the 0-31 range,
|
jpayne@68
|
317 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
|
jpayne@68
|
318 """
|
jpayne@68
|
319 self.object_hook = object_hook
|
jpayne@68
|
320 self.parse_float = parse_float or float
|
jpayne@68
|
321 self.parse_int = parse_int or int
|
jpayne@68
|
322 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
|
jpayne@68
|
323 self.strict = strict
|
jpayne@68
|
324 self.object_pairs_hook = object_pairs_hook
|
jpayne@68
|
325 self.parse_object = JSONObject
|
jpayne@68
|
326 self.parse_array = JSONArray
|
jpayne@68
|
327 self.parse_string = scanstring
|
jpayne@68
|
328 self.memo = {}
|
jpayne@68
|
329 self.scan_once = scanner.make_scanner(self)
|
jpayne@68
|
330
|
jpayne@68
|
331
|
jpayne@68
|
332 def decode(self, s, _w=WHITESPACE.match):
|
jpayne@68
|
333 """Return the Python representation of ``s`` (a ``str`` instance
|
jpayne@68
|
334 containing a JSON document).
|
jpayne@68
|
335
|
jpayne@68
|
336 """
|
jpayne@68
|
337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
|
jpayne@68
|
338 end = _w(s, end).end()
|
jpayne@68
|
339 if end != len(s):
|
jpayne@68
|
340 raise JSONDecodeError("Extra data", s, end)
|
jpayne@68
|
341 return obj
|
jpayne@68
|
342
|
jpayne@68
|
343 def raw_decode(self, s, idx=0):
|
jpayne@68
|
344 """Decode a JSON document from ``s`` (a ``str`` beginning with
|
jpayne@68
|
345 a JSON document) and return a 2-tuple of the Python
|
jpayne@68
|
346 representation and the index in ``s`` where the document ended.
|
jpayne@68
|
347
|
jpayne@68
|
348 This can be used to decode a JSON document from a string that may
|
jpayne@68
|
349 have extraneous data at the end.
|
jpayne@68
|
350
|
jpayne@68
|
351 """
|
jpayne@68
|
352 try:
|
jpayne@68
|
353 obj, end = self.scan_once(s, idx)
|
jpayne@68
|
354 except StopIteration as err:
|
jpayne@68
|
355 raise JSONDecodeError("Expecting value", s, err.value) from None
|
jpayne@68
|
356 return obj, end
|