Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/sax/xmlreader.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 """An XML Reader is the SAX 2 name for an XML parser. XML Parsers | |
2 should be based on this code. """ | |
3 | |
4 from . import handler | |
5 | |
6 from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException | |
7 | |
8 | |
9 # ===== XMLREADER ===== | |
10 | |
11 class XMLReader: | |
12 """Interface for reading an XML document using callbacks. | |
13 | |
14 XMLReader is the interface that an XML parser's SAX2 driver must | |
15 implement. This interface allows an application to set and query | |
16 features and properties in the parser, to register event handlers | |
17 for document processing, and to initiate a document parse. | |
18 | |
19 All SAX interfaces are assumed to be synchronous: the parse | |
20 methods must not return until parsing is complete, and readers | |
21 must wait for an event-handler callback to return before reporting | |
22 the next event.""" | |
23 | |
24 def __init__(self): | |
25 self._cont_handler = handler.ContentHandler() | |
26 self._dtd_handler = handler.DTDHandler() | |
27 self._ent_handler = handler.EntityResolver() | |
28 self._err_handler = handler.ErrorHandler() | |
29 | |
30 def parse(self, source): | |
31 "Parse an XML document from a system identifier or an InputSource." | |
32 raise NotImplementedError("This method must be implemented!") | |
33 | |
34 def getContentHandler(self): | |
35 "Returns the current ContentHandler." | |
36 return self._cont_handler | |
37 | |
38 def setContentHandler(self, handler): | |
39 "Registers a new object to receive document content events." | |
40 self._cont_handler = handler | |
41 | |
42 def getDTDHandler(self): | |
43 "Returns the current DTD handler." | |
44 return self._dtd_handler | |
45 | |
46 def setDTDHandler(self, handler): | |
47 "Register an object to receive basic DTD-related events." | |
48 self._dtd_handler = handler | |
49 | |
50 def getEntityResolver(self): | |
51 "Returns the current EntityResolver." | |
52 return self._ent_handler | |
53 | |
54 def setEntityResolver(self, resolver): | |
55 "Register an object to resolve external entities." | |
56 self._ent_handler = resolver | |
57 | |
58 def getErrorHandler(self): | |
59 "Returns the current ErrorHandler." | |
60 return self._err_handler | |
61 | |
62 def setErrorHandler(self, handler): | |
63 "Register an object to receive error-message events." | |
64 self._err_handler = handler | |
65 | |
66 def setLocale(self, locale): | |
67 """Allow an application to set the locale for errors and warnings. | |
68 | |
69 SAX parsers are not required to provide localization for errors | |
70 and warnings; if they cannot support the requested locale, | |
71 however, they must raise a SAX exception. Applications may | |
72 request a locale change in the middle of a parse.""" | |
73 raise SAXNotSupportedException("Locale support not implemented") | |
74 | |
75 def getFeature(self, name): | |
76 "Looks up and returns the state of a SAX2 feature." | |
77 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | |
78 | |
79 def setFeature(self, name, state): | |
80 "Sets the state of a SAX2 feature." | |
81 raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | |
82 | |
83 def getProperty(self, name): | |
84 "Looks up and returns the value of a SAX2 property." | |
85 raise SAXNotRecognizedException("Property '%s' not recognized" % name) | |
86 | |
87 def setProperty(self, name, value): | |
88 "Sets the value of a SAX2 property." | |
89 raise SAXNotRecognizedException("Property '%s' not recognized" % name) | |
90 | |
91 class IncrementalParser(XMLReader): | |
92 """This interface adds three extra methods to the XMLReader | |
93 interface that allow XML parsers to support incremental | |
94 parsing. Support for this interface is optional, since not all | |
95 underlying XML parsers support this functionality. | |
96 | |
97 When the parser is instantiated it is ready to begin accepting | |
98 data from the feed method immediately. After parsing has been | |
99 finished with a call to close the reset method must be called to | |
100 make the parser ready to accept new data, either from feed or | |
101 using the parse method. | |
102 | |
103 Note that these methods must _not_ be called during parsing, that | |
104 is, after parse has been called and before it returns. | |
105 | |
106 By default, the class also implements the parse method of the XMLReader | |
107 interface using the feed, close and reset methods of the | |
108 IncrementalParser interface as a convenience to SAX 2.0 driver | |
109 writers.""" | |
110 | |
111 def __init__(self, bufsize=2**16): | |
112 self._bufsize = bufsize | |
113 XMLReader.__init__(self) | |
114 | |
115 def parse(self, source): | |
116 from . import saxutils | |
117 source = saxutils.prepare_input_source(source) | |
118 | |
119 self.prepareParser(source) | |
120 file = source.getCharacterStream() | |
121 if file is None: | |
122 file = source.getByteStream() | |
123 buffer = file.read(self._bufsize) | |
124 while buffer: | |
125 self.feed(buffer) | |
126 buffer = file.read(self._bufsize) | |
127 self.close() | |
128 | |
129 def feed(self, data): | |
130 """This method gives the raw XML data in the data parameter to | |
131 the parser and makes it parse the data, emitting the | |
132 corresponding events. It is allowed for XML constructs to be | |
133 split across several calls to feed. | |
134 | |
135 feed may raise SAXException.""" | |
136 raise NotImplementedError("This method must be implemented!") | |
137 | |
138 def prepareParser(self, source): | |
139 """This method is called by the parse implementation to allow | |
140 the SAX 2.0 driver to prepare itself for parsing.""" | |
141 raise NotImplementedError("prepareParser must be overridden!") | |
142 | |
143 def close(self): | |
144 """This method is called when the entire XML document has been | |
145 passed to the parser through the feed method, to notify the | |
146 parser that there are no more data. This allows the parser to | |
147 do the final checks on the document and empty the internal | |
148 data buffer. | |
149 | |
150 The parser will not be ready to parse another document until | |
151 the reset method has been called. | |
152 | |
153 close may raise SAXException.""" | |
154 raise NotImplementedError("This method must be implemented!") | |
155 | |
156 def reset(self): | |
157 """This method is called after close has been called to reset | |
158 the parser so that it is ready to parse new documents. The | |
159 results of calling parse or feed after close without calling | |
160 reset are undefined.""" | |
161 raise NotImplementedError("This method must be implemented!") | |
162 | |
163 # ===== LOCATOR ===== | |
164 | |
165 class Locator: | |
166 """Interface for associating a SAX event with a document | |
167 location. A locator object will return valid results only during | |
168 calls to DocumentHandler methods; at any other time, the | |
169 results are unpredictable.""" | |
170 | |
171 def getColumnNumber(self): | |
172 "Return the column number where the current event ends." | |
173 return -1 | |
174 | |
175 def getLineNumber(self): | |
176 "Return the line number where the current event ends." | |
177 return -1 | |
178 | |
179 def getPublicId(self): | |
180 "Return the public identifier for the current event." | |
181 return None | |
182 | |
183 def getSystemId(self): | |
184 "Return the system identifier for the current event." | |
185 return None | |
186 | |
187 # ===== INPUTSOURCE ===== | |
188 | |
189 class InputSource: | |
190 """Encapsulation of the information needed by the XMLReader to | |
191 read entities. | |
192 | |
193 This class may include information about the public identifier, | |
194 system identifier, byte stream (possibly with character encoding | |
195 information) and/or the character stream of an entity. | |
196 | |
197 Applications will create objects of this class for use in the | |
198 XMLReader.parse method and for returning from | |
199 EntityResolver.resolveEntity. | |
200 | |
201 An InputSource belongs to the application, the XMLReader is not | |
202 allowed to modify InputSource objects passed to it from the | |
203 application, although it may make copies and modify those.""" | |
204 | |
205 def __init__(self, system_id = None): | |
206 self.__system_id = system_id | |
207 self.__public_id = None | |
208 self.__encoding = None | |
209 self.__bytefile = None | |
210 self.__charfile = None | |
211 | |
212 def setPublicId(self, public_id): | |
213 "Sets the public identifier of this InputSource." | |
214 self.__public_id = public_id | |
215 | |
216 def getPublicId(self): | |
217 "Returns the public identifier of this InputSource." | |
218 return self.__public_id | |
219 | |
220 def setSystemId(self, system_id): | |
221 "Sets the system identifier of this InputSource." | |
222 self.__system_id = system_id | |
223 | |
224 def getSystemId(self): | |
225 "Returns the system identifier of this InputSource." | |
226 return self.__system_id | |
227 | |
228 def setEncoding(self, encoding): | |
229 """Sets the character encoding of this InputSource. | |
230 | |
231 The encoding must be a string acceptable for an XML encoding | |
232 declaration (see section 4.3.3 of the XML recommendation). | |
233 | |
234 The encoding attribute of the InputSource is ignored if the | |
235 InputSource also contains a character stream.""" | |
236 self.__encoding = encoding | |
237 | |
238 def getEncoding(self): | |
239 "Get the character encoding of this InputSource." | |
240 return self.__encoding | |
241 | |
242 def setByteStream(self, bytefile): | |
243 """Set the byte stream (a Python file-like object which does | |
244 not perform byte-to-character conversion) for this input | |
245 source. | |
246 | |
247 The SAX parser will ignore this if there is also a character | |
248 stream specified, but it will use a byte stream in preference | |
249 to opening a URI connection itself. | |
250 | |
251 If the application knows the character encoding of the byte | |
252 stream, it should set it with the setEncoding method.""" | |
253 self.__bytefile = bytefile | |
254 | |
255 def getByteStream(self): | |
256 """Get the byte stream for this input source. | |
257 | |
258 The getEncoding method will return the character encoding for | |
259 this byte stream, or None if unknown.""" | |
260 return self.__bytefile | |
261 | |
262 def setCharacterStream(self, charfile): | |
263 """Set the character stream for this input source. (The stream | |
264 must be a Python 2.0 Unicode-wrapped file-like that performs | |
265 conversion to Unicode strings.) | |
266 | |
267 If there is a character stream specified, the SAX parser will | |
268 ignore any byte stream and will not attempt to open a URI | |
269 connection to the system identifier.""" | |
270 self.__charfile = charfile | |
271 | |
272 def getCharacterStream(self): | |
273 "Get the character stream for this input source." | |
274 return self.__charfile | |
275 | |
276 # ===== ATTRIBUTESIMPL ===== | |
277 | |
278 class AttributesImpl: | |
279 | |
280 def __init__(self, attrs): | |
281 """Non-NS-aware implementation. | |
282 | |
283 attrs should be of the form {name : value}.""" | |
284 self._attrs = attrs | |
285 | |
286 def getLength(self): | |
287 return len(self._attrs) | |
288 | |
289 def getType(self, name): | |
290 return "CDATA" | |
291 | |
292 def getValue(self, name): | |
293 return self._attrs[name] | |
294 | |
295 def getValueByQName(self, name): | |
296 return self._attrs[name] | |
297 | |
298 def getNameByQName(self, name): | |
299 if name not in self._attrs: | |
300 raise KeyError(name) | |
301 return name | |
302 | |
303 def getQNameByName(self, name): | |
304 if name not in self._attrs: | |
305 raise KeyError(name) | |
306 return name | |
307 | |
308 def getNames(self): | |
309 return list(self._attrs.keys()) | |
310 | |
311 def getQNames(self): | |
312 return list(self._attrs.keys()) | |
313 | |
314 def __len__(self): | |
315 return len(self._attrs) | |
316 | |
317 def __getitem__(self, name): | |
318 return self._attrs[name] | |
319 | |
320 def keys(self): | |
321 return list(self._attrs.keys()) | |
322 | |
323 def __contains__(self, name): | |
324 return name in self._attrs | |
325 | |
326 def get(self, name, alternative=None): | |
327 return self._attrs.get(name, alternative) | |
328 | |
329 def copy(self): | |
330 return self.__class__(self._attrs) | |
331 | |
332 def items(self): | |
333 return list(self._attrs.items()) | |
334 | |
335 def values(self): | |
336 return list(self._attrs.values()) | |
337 | |
338 # ===== ATTRIBUTESNSIMPL ===== | |
339 | |
340 class AttributesNSImpl(AttributesImpl): | |
341 | |
342 def __init__(self, attrs, qnames): | |
343 """NS-aware implementation. | |
344 | |
345 attrs should be of the form {(ns_uri, lname): value, ...}. | |
346 qnames of the form {(ns_uri, lname): qname, ...}.""" | |
347 self._attrs = attrs | |
348 self._qnames = qnames | |
349 | |
350 def getValueByQName(self, name): | |
351 for (nsname, qname) in self._qnames.items(): | |
352 if qname == name: | |
353 return self._attrs[nsname] | |
354 | |
355 raise KeyError(name) | |
356 | |
357 def getNameByQName(self, name): | |
358 for (nsname, qname) in self._qnames.items(): | |
359 if qname == name: | |
360 return nsname | |
361 | |
362 raise KeyError(name) | |
363 | |
364 def getQNameByName(self, name): | |
365 return self._qnames[name] | |
366 | |
367 def getQNames(self): | |
368 return list(self._qnames.values()) | |
369 | |
370 def copy(self): | |
371 return self.__class__(self._attrs, self._qnames) | |
372 | |
373 | |
374 def _test(): | |
375 XMLReader() | |
376 IncrementalParser() | |
377 Locator() | |
378 | |
379 if __name__ == "__main__": | |
380 _test() |