jpayne@69
|
1 """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
|
jpayne@69
|
2 should be based on this code. """
|
jpayne@69
|
3
|
jpayne@69
|
4 from . import handler
|
jpayne@69
|
5
|
jpayne@69
|
6 from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
|
jpayne@69
|
7
|
jpayne@69
|
8
|
jpayne@69
|
9 # ===== XMLREADER =====
|
jpayne@69
|
10
|
jpayne@69
|
11 class XMLReader:
|
jpayne@69
|
12 """Interface for reading an XML document using callbacks.
|
jpayne@69
|
13
|
jpayne@69
|
14 XMLReader is the interface that an XML parser's SAX2 driver must
|
jpayne@69
|
15 implement. This interface allows an application to set and query
|
jpayne@69
|
16 features and properties in the parser, to register event handlers
|
jpayne@69
|
17 for document processing, and to initiate a document parse.
|
jpayne@69
|
18
|
jpayne@69
|
19 All SAX interfaces are assumed to be synchronous: the parse
|
jpayne@69
|
20 methods must not return until parsing is complete, and readers
|
jpayne@69
|
21 must wait for an event-handler callback to return before reporting
|
jpayne@69
|
22 the next event."""
|
jpayne@69
|
23
|
jpayne@69
|
24 def __init__(self):
|
jpayne@69
|
25 self._cont_handler = handler.ContentHandler()
|
jpayne@69
|
26 self._dtd_handler = handler.DTDHandler()
|
jpayne@69
|
27 self._ent_handler = handler.EntityResolver()
|
jpayne@69
|
28 self._err_handler = handler.ErrorHandler()
|
jpayne@69
|
29
|
jpayne@69
|
30 def parse(self, source):
|
jpayne@69
|
31 "Parse an XML document from a system identifier or an InputSource."
|
jpayne@69
|
32 raise NotImplementedError("This method must be implemented!")
|
jpayne@69
|
33
|
jpayne@69
|
34 def getContentHandler(self):
|
jpayne@69
|
35 "Returns the current ContentHandler."
|
jpayne@69
|
36 return self._cont_handler
|
jpayne@69
|
37
|
jpayne@69
|
38 def setContentHandler(self, handler):
|
jpayne@69
|
39 "Registers a new object to receive document content events."
|
jpayne@69
|
40 self._cont_handler = handler
|
jpayne@69
|
41
|
jpayne@69
|
42 def getDTDHandler(self):
|
jpayne@69
|
43 "Returns the current DTD handler."
|
jpayne@69
|
44 return self._dtd_handler
|
jpayne@69
|
45
|
jpayne@69
|
46 def setDTDHandler(self, handler):
|
jpayne@69
|
47 "Register an object to receive basic DTD-related events."
|
jpayne@69
|
48 self._dtd_handler = handler
|
jpayne@69
|
49
|
jpayne@69
|
50 def getEntityResolver(self):
|
jpayne@69
|
51 "Returns the current EntityResolver."
|
jpayne@69
|
52 return self._ent_handler
|
jpayne@69
|
53
|
jpayne@69
|
54 def setEntityResolver(self, resolver):
|
jpayne@69
|
55 "Register an object to resolve external entities."
|
jpayne@69
|
56 self._ent_handler = resolver
|
jpayne@69
|
57
|
jpayne@69
|
58 def getErrorHandler(self):
|
jpayne@69
|
59 "Returns the current ErrorHandler."
|
jpayne@69
|
60 return self._err_handler
|
jpayne@69
|
61
|
jpayne@69
|
62 def setErrorHandler(self, handler):
|
jpayne@69
|
63 "Register an object to receive error-message events."
|
jpayne@69
|
64 self._err_handler = handler
|
jpayne@69
|
65
|
jpayne@69
|
66 def setLocale(self, locale):
|
jpayne@69
|
67 """Allow an application to set the locale for errors and warnings.
|
jpayne@69
|
68
|
jpayne@69
|
69 SAX parsers are not required to provide localization for errors
|
jpayne@69
|
70 and warnings; if they cannot support the requested locale,
|
jpayne@69
|
71 however, they must raise a SAX exception. Applications may
|
jpayne@69
|
72 request a locale change in the middle of a parse."""
|
jpayne@69
|
73 raise SAXNotSupportedException("Locale support not implemented")
|
jpayne@69
|
74
|
jpayne@69
|
75 def getFeature(self, name):
|
jpayne@69
|
76 "Looks up and returns the state of a SAX2 feature."
|
jpayne@69
|
77 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
jpayne@69
|
78
|
jpayne@69
|
79 def setFeature(self, name, state):
|
jpayne@69
|
80 "Sets the state of a SAX2 feature."
|
jpayne@69
|
81 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
jpayne@69
|
82
|
jpayne@69
|
83 def getProperty(self, name):
|
jpayne@69
|
84 "Looks up and returns the value of a SAX2 property."
|
jpayne@69
|
85 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
jpayne@69
|
86
|
jpayne@69
|
87 def setProperty(self, name, value):
|
jpayne@69
|
88 "Sets the value of a SAX2 property."
|
jpayne@69
|
89 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
jpayne@69
|
90
|
jpayne@69
|
91 class IncrementalParser(XMLReader):
|
jpayne@69
|
92 """This interface adds three extra methods to the XMLReader
|
jpayne@69
|
93 interface that allow XML parsers to support incremental
|
jpayne@69
|
94 parsing. Support for this interface is optional, since not all
|
jpayne@69
|
95 underlying XML parsers support this functionality.
|
jpayne@69
|
96
|
jpayne@69
|
97 When the parser is instantiated it is ready to begin accepting
|
jpayne@69
|
98 data from the feed method immediately. After parsing has been
|
jpayne@69
|
99 finished with a call to close the reset method must be called to
|
jpayne@69
|
100 make the parser ready to accept new data, either from feed or
|
jpayne@69
|
101 using the parse method.
|
jpayne@69
|
102
|
jpayne@69
|
103 Note that these methods must _not_ be called during parsing, that
|
jpayne@69
|
104 is, after parse has been called and before it returns.
|
jpayne@69
|
105
|
jpayne@69
|
106 By default, the class also implements the parse method of the XMLReader
|
jpayne@69
|
107 interface using the feed, close and reset methods of the
|
jpayne@69
|
108 IncrementalParser interface as a convenience to SAX 2.0 driver
|
jpayne@69
|
109 writers."""
|
jpayne@69
|
110
|
jpayne@69
|
111 def __init__(self, bufsize=2**16):
|
jpayne@69
|
112 self._bufsize = bufsize
|
jpayne@69
|
113 XMLReader.__init__(self)
|
jpayne@69
|
114
|
jpayne@69
|
115 def parse(self, source):
|
jpayne@69
|
116 from . import saxutils
|
jpayne@69
|
117 source = saxutils.prepare_input_source(source)
|
jpayne@69
|
118
|
jpayne@69
|
119 self.prepareParser(source)
|
jpayne@69
|
120 file = source.getCharacterStream()
|
jpayne@69
|
121 if file is None:
|
jpayne@69
|
122 file = source.getByteStream()
|
jpayne@69
|
123 buffer = file.read(self._bufsize)
|
jpayne@69
|
124 while buffer:
|
jpayne@69
|
125 self.feed(buffer)
|
jpayne@69
|
126 buffer = file.read(self._bufsize)
|
jpayne@69
|
127 self.close()
|
jpayne@69
|
128
|
jpayne@69
|
129 def feed(self, data):
|
jpayne@69
|
130 """This method gives the raw XML data in the data parameter to
|
jpayne@69
|
131 the parser and makes it parse the data, emitting the
|
jpayne@69
|
132 corresponding events. It is allowed for XML constructs to be
|
jpayne@69
|
133 split across several calls to feed.
|
jpayne@69
|
134
|
jpayne@69
|
135 feed may raise SAXException."""
|
jpayne@69
|
136 raise NotImplementedError("This method must be implemented!")
|
jpayne@69
|
137
|
jpayne@69
|
138 def prepareParser(self, source):
|
jpayne@69
|
139 """This method is called by the parse implementation to allow
|
jpayne@69
|
140 the SAX 2.0 driver to prepare itself for parsing."""
|
jpayne@69
|
141 raise NotImplementedError("prepareParser must be overridden!")
|
jpayne@69
|
142
|
jpayne@69
|
143 def close(self):
|
jpayne@69
|
144 """This method is called when the entire XML document has been
|
jpayne@69
|
145 passed to the parser through the feed method, to notify the
|
jpayne@69
|
146 parser that there are no more data. This allows the parser to
|
jpayne@69
|
147 do the final checks on the document and empty the internal
|
jpayne@69
|
148 data buffer.
|
jpayne@69
|
149
|
jpayne@69
|
150 The parser will not be ready to parse another document until
|
jpayne@69
|
151 the reset method has been called.
|
jpayne@69
|
152
|
jpayne@69
|
153 close may raise SAXException."""
|
jpayne@69
|
154 raise NotImplementedError("This method must be implemented!")
|
jpayne@69
|
155
|
jpayne@69
|
156 def reset(self):
|
jpayne@69
|
157 """This method is called after close has been called to reset
|
jpayne@69
|
158 the parser so that it is ready to parse new documents. The
|
jpayne@69
|
159 results of calling parse or feed after close without calling
|
jpayne@69
|
160 reset are undefined."""
|
jpayne@69
|
161 raise NotImplementedError("This method must be implemented!")
|
jpayne@69
|
162
|
jpayne@69
|
163 # ===== LOCATOR =====
|
jpayne@69
|
164
|
jpayne@69
|
165 class Locator:
|
jpayne@69
|
166 """Interface for associating a SAX event with a document
|
jpayne@69
|
167 location. A locator object will return valid results only during
|
jpayne@69
|
168 calls to DocumentHandler methods; at any other time, the
|
jpayne@69
|
169 results are unpredictable."""
|
jpayne@69
|
170
|
jpayne@69
|
171 def getColumnNumber(self):
|
jpayne@69
|
172 "Return the column number where the current event ends."
|
jpayne@69
|
173 return -1
|
jpayne@69
|
174
|
jpayne@69
|
175 def getLineNumber(self):
|
jpayne@69
|
176 "Return the line number where the current event ends."
|
jpayne@69
|
177 return -1
|
jpayne@69
|
178
|
jpayne@69
|
179 def getPublicId(self):
|
jpayne@69
|
180 "Return the public identifier for the current event."
|
jpayne@69
|
181 return None
|
jpayne@69
|
182
|
jpayne@69
|
183 def getSystemId(self):
|
jpayne@69
|
184 "Return the system identifier for the current event."
|
jpayne@69
|
185 return None
|
jpayne@69
|
186
|
jpayne@69
|
187 # ===== INPUTSOURCE =====
|
jpayne@69
|
188
|
jpayne@69
|
189 class InputSource:
|
jpayne@69
|
190 """Encapsulation of the information needed by the XMLReader to
|
jpayne@69
|
191 read entities.
|
jpayne@69
|
192
|
jpayne@69
|
193 This class may include information about the public identifier,
|
jpayne@69
|
194 system identifier, byte stream (possibly with character encoding
|
jpayne@69
|
195 information) and/or the character stream of an entity.
|
jpayne@69
|
196
|
jpayne@69
|
197 Applications will create objects of this class for use in the
|
jpayne@69
|
198 XMLReader.parse method and for returning from
|
jpayne@69
|
199 EntityResolver.resolveEntity.
|
jpayne@69
|
200
|
jpayne@69
|
201 An InputSource belongs to the application, the XMLReader is not
|
jpayne@69
|
202 allowed to modify InputSource objects passed to it from the
|
jpayne@69
|
203 application, although it may make copies and modify those."""
|
jpayne@69
|
204
|
jpayne@69
|
205 def __init__(self, system_id = None):
|
jpayne@69
|
206 self.__system_id = system_id
|
jpayne@69
|
207 self.__public_id = None
|
jpayne@69
|
208 self.__encoding = None
|
jpayne@69
|
209 self.__bytefile = None
|
jpayne@69
|
210 self.__charfile = None
|
jpayne@69
|
211
|
jpayne@69
|
212 def setPublicId(self, public_id):
|
jpayne@69
|
213 "Sets the public identifier of this InputSource."
|
jpayne@69
|
214 self.__public_id = public_id
|
jpayne@69
|
215
|
jpayne@69
|
216 def getPublicId(self):
|
jpayne@69
|
217 "Returns the public identifier of this InputSource."
|
jpayne@69
|
218 return self.__public_id
|
jpayne@69
|
219
|
jpayne@69
|
220 def setSystemId(self, system_id):
|
jpayne@69
|
221 "Sets the system identifier of this InputSource."
|
jpayne@69
|
222 self.__system_id = system_id
|
jpayne@69
|
223
|
jpayne@69
|
224 def getSystemId(self):
|
jpayne@69
|
225 "Returns the system identifier of this InputSource."
|
jpayne@69
|
226 return self.__system_id
|
jpayne@69
|
227
|
jpayne@69
|
228 def setEncoding(self, encoding):
|
jpayne@69
|
229 """Sets the character encoding of this InputSource.
|
jpayne@69
|
230
|
jpayne@69
|
231 The encoding must be a string acceptable for an XML encoding
|
jpayne@69
|
232 declaration (see section 4.3.3 of the XML recommendation).
|
jpayne@69
|
233
|
jpayne@69
|
234 The encoding attribute of the InputSource is ignored if the
|
jpayne@69
|
235 InputSource also contains a character stream."""
|
jpayne@69
|
236 self.__encoding = encoding
|
jpayne@69
|
237
|
jpayne@69
|
238 def getEncoding(self):
|
jpayne@69
|
239 "Get the character encoding of this InputSource."
|
jpayne@69
|
240 return self.__encoding
|
jpayne@69
|
241
|
jpayne@69
|
242 def setByteStream(self, bytefile):
|
jpayne@69
|
243 """Set the byte stream (a Python file-like object which does
|
jpayne@69
|
244 not perform byte-to-character conversion) for this input
|
jpayne@69
|
245 source.
|
jpayne@69
|
246
|
jpayne@69
|
247 The SAX parser will ignore this if there is also a character
|
jpayne@69
|
248 stream specified, but it will use a byte stream in preference
|
jpayne@69
|
249 to opening a URI connection itself.
|
jpayne@69
|
250
|
jpayne@69
|
251 If the application knows the character encoding of the byte
|
jpayne@69
|
252 stream, it should set it with the setEncoding method."""
|
jpayne@69
|
253 self.__bytefile = bytefile
|
jpayne@69
|
254
|
jpayne@69
|
255 def getByteStream(self):
|
jpayne@69
|
256 """Get the byte stream for this input source.
|
jpayne@69
|
257
|
jpayne@69
|
258 The getEncoding method will return the character encoding for
|
jpayne@69
|
259 this byte stream, or None if unknown."""
|
jpayne@69
|
260 return self.__bytefile
|
jpayne@69
|
261
|
jpayne@69
|
262 def setCharacterStream(self, charfile):
|
jpayne@69
|
263 """Set the character stream for this input source. (The stream
|
jpayne@69
|
264 must be a Python 2.0 Unicode-wrapped file-like that performs
|
jpayne@69
|
265 conversion to Unicode strings.)
|
jpayne@69
|
266
|
jpayne@69
|
267 If there is a character stream specified, the SAX parser will
|
jpayne@69
|
268 ignore any byte stream and will not attempt to open a URI
|
jpayne@69
|
269 connection to the system identifier."""
|
jpayne@69
|
270 self.__charfile = charfile
|
jpayne@69
|
271
|
jpayne@69
|
272 def getCharacterStream(self):
|
jpayne@69
|
273 "Get the character stream for this input source."
|
jpayne@69
|
274 return self.__charfile
|
jpayne@69
|
275
|
jpayne@69
|
276 # ===== ATTRIBUTESIMPL =====
|
jpayne@69
|
277
|
jpayne@69
|
278 class AttributesImpl:
|
jpayne@69
|
279
|
jpayne@69
|
280 def __init__(self, attrs):
|
jpayne@69
|
281 """Non-NS-aware implementation.
|
jpayne@69
|
282
|
jpayne@69
|
283 attrs should be of the form {name : value}."""
|
jpayne@69
|
284 self._attrs = attrs
|
jpayne@69
|
285
|
jpayne@69
|
286 def getLength(self):
|
jpayne@69
|
287 return len(self._attrs)
|
jpayne@69
|
288
|
jpayne@69
|
289 def getType(self, name):
|
jpayne@69
|
290 return "CDATA"
|
jpayne@69
|
291
|
jpayne@69
|
292 def getValue(self, name):
|
jpayne@69
|
293 return self._attrs[name]
|
jpayne@69
|
294
|
jpayne@69
|
295 def getValueByQName(self, name):
|
jpayne@69
|
296 return self._attrs[name]
|
jpayne@69
|
297
|
jpayne@69
|
298 def getNameByQName(self, name):
|
jpayne@69
|
299 if name not in self._attrs:
|
jpayne@69
|
300 raise KeyError(name)
|
jpayne@69
|
301 return name
|
jpayne@69
|
302
|
jpayne@69
|
303 def getQNameByName(self, name):
|
jpayne@69
|
304 if name not in self._attrs:
|
jpayne@69
|
305 raise KeyError(name)
|
jpayne@69
|
306 return name
|
jpayne@69
|
307
|
jpayne@69
|
308 def getNames(self):
|
jpayne@69
|
309 return list(self._attrs.keys())
|
jpayne@69
|
310
|
jpayne@69
|
311 def getQNames(self):
|
jpayne@69
|
312 return list(self._attrs.keys())
|
jpayne@69
|
313
|
jpayne@69
|
314 def __len__(self):
|
jpayne@69
|
315 return len(self._attrs)
|
jpayne@69
|
316
|
jpayne@69
|
317 def __getitem__(self, name):
|
jpayne@69
|
318 return self._attrs[name]
|
jpayne@69
|
319
|
jpayne@69
|
320 def keys(self):
|
jpayne@69
|
321 return list(self._attrs.keys())
|
jpayne@69
|
322
|
jpayne@69
|
323 def __contains__(self, name):
|
jpayne@69
|
324 return name in self._attrs
|
jpayne@69
|
325
|
jpayne@69
|
326 def get(self, name, alternative=None):
|
jpayne@69
|
327 return self._attrs.get(name, alternative)
|
jpayne@69
|
328
|
jpayne@69
|
329 def copy(self):
|
jpayne@69
|
330 return self.__class__(self._attrs)
|
jpayne@69
|
331
|
jpayne@69
|
332 def items(self):
|
jpayne@69
|
333 return list(self._attrs.items())
|
jpayne@69
|
334
|
jpayne@69
|
335 def values(self):
|
jpayne@69
|
336 return list(self._attrs.values())
|
jpayne@69
|
337
|
jpayne@69
|
338 # ===== ATTRIBUTESNSIMPL =====
|
jpayne@69
|
339
|
jpayne@69
|
340 class AttributesNSImpl(AttributesImpl):
|
jpayne@69
|
341
|
jpayne@69
|
342 def __init__(self, attrs, qnames):
|
jpayne@69
|
343 """NS-aware implementation.
|
jpayne@69
|
344
|
jpayne@69
|
345 attrs should be of the form {(ns_uri, lname): value, ...}.
|
jpayne@69
|
346 qnames of the form {(ns_uri, lname): qname, ...}."""
|
jpayne@69
|
347 self._attrs = attrs
|
jpayne@69
|
348 self._qnames = qnames
|
jpayne@69
|
349
|
jpayne@69
|
350 def getValueByQName(self, name):
|
jpayne@69
|
351 for (nsname, qname) in self._qnames.items():
|
jpayne@69
|
352 if qname == name:
|
jpayne@69
|
353 return self._attrs[nsname]
|
jpayne@69
|
354
|
jpayne@69
|
355 raise KeyError(name)
|
jpayne@69
|
356
|
jpayne@69
|
357 def getNameByQName(self, name):
|
jpayne@69
|
358 for (nsname, qname) in self._qnames.items():
|
jpayne@69
|
359 if qname == name:
|
jpayne@69
|
360 return nsname
|
jpayne@69
|
361
|
jpayne@69
|
362 raise KeyError(name)
|
jpayne@69
|
363
|
jpayne@69
|
364 def getQNameByName(self, name):
|
jpayne@69
|
365 return self._qnames[name]
|
jpayne@69
|
366
|
jpayne@69
|
367 def getQNames(self):
|
jpayne@69
|
368 return list(self._qnames.values())
|
jpayne@69
|
369
|
jpayne@69
|
370 def copy(self):
|
jpayne@69
|
371 return self.__class__(self._attrs, self._qnames)
|
jpayne@69
|
372
|
jpayne@69
|
373
|
jpayne@69
|
374 def _test():
|
jpayne@69
|
375 XMLReader()
|
jpayne@69
|
376 IncrementalParser()
|
jpayne@69
|
377 Locator()
|
jpayne@69
|
378
|
jpayne@69
|
379 if __name__ == "__main__":
|
jpayne@69
|
380 _test()
|