annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/sax/xmlreader.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
jpayne@69 2 should be based on this code. """
jpayne@69 3
jpayne@69 4 from . import handler
jpayne@69 5
jpayne@69 6 from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
jpayne@69 7
jpayne@69 8
jpayne@69 9 # ===== XMLREADER =====
jpayne@69 10
jpayne@69 11 class XMLReader:
jpayne@69 12 """Interface for reading an XML document using callbacks.
jpayne@69 13
jpayne@69 14 XMLReader is the interface that an XML parser's SAX2 driver must
jpayne@69 15 implement. This interface allows an application to set and query
jpayne@69 16 features and properties in the parser, to register event handlers
jpayne@69 17 for document processing, and to initiate a document parse.
jpayne@69 18
jpayne@69 19 All SAX interfaces are assumed to be synchronous: the parse
jpayne@69 20 methods must not return until parsing is complete, and readers
jpayne@69 21 must wait for an event-handler callback to return before reporting
jpayne@69 22 the next event."""
jpayne@69 23
jpayne@69 24 def __init__(self):
jpayne@69 25 self._cont_handler = handler.ContentHandler()
jpayne@69 26 self._dtd_handler = handler.DTDHandler()
jpayne@69 27 self._ent_handler = handler.EntityResolver()
jpayne@69 28 self._err_handler = handler.ErrorHandler()
jpayne@69 29
jpayne@69 30 def parse(self, source):
jpayne@69 31 "Parse an XML document from a system identifier or an InputSource."
jpayne@69 32 raise NotImplementedError("This method must be implemented!")
jpayne@69 33
jpayne@69 34 def getContentHandler(self):
jpayne@69 35 "Returns the current ContentHandler."
jpayne@69 36 return self._cont_handler
jpayne@69 37
jpayne@69 38 def setContentHandler(self, handler):
jpayne@69 39 "Registers a new object to receive document content events."
jpayne@69 40 self._cont_handler = handler
jpayne@69 41
jpayne@69 42 def getDTDHandler(self):
jpayne@69 43 "Returns the current DTD handler."
jpayne@69 44 return self._dtd_handler
jpayne@69 45
jpayne@69 46 def setDTDHandler(self, handler):
jpayne@69 47 "Register an object to receive basic DTD-related events."
jpayne@69 48 self._dtd_handler = handler
jpayne@69 49
jpayne@69 50 def getEntityResolver(self):
jpayne@69 51 "Returns the current EntityResolver."
jpayne@69 52 return self._ent_handler
jpayne@69 53
jpayne@69 54 def setEntityResolver(self, resolver):
jpayne@69 55 "Register an object to resolve external entities."
jpayne@69 56 self._ent_handler = resolver
jpayne@69 57
jpayne@69 58 def getErrorHandler(self):
jpayne@69 59 "Returns the current ErrorHandler."
jpayne@69 60 return self._err_handler
jpayne@69 61
jpayne@69 62 def setErrorHandler(self, handler):
jpayne@69 63 "Register an object to receive error-message events."
jpayne@69 64 self._err_handler = handler
jpayne@69 65
jpayne@69 66 def setLocale(self, locale):
jpayne@69 67 """Allow an application to set the locale for errors and warnings.
jpayne@69 68
jpayne@69 69 SAX parsers are not required to provide localization for errors
jpayne@69 70 and warnings; if they cannot support the requested locale,
jpayne@69 71 however, they must raise a SAX exception. Applications may
jpayne@69 72 request a locale change in the middle of a parse."""
jpayne@69 73 raise SAXNotSupportedException("Locale support not implemented")
jpayne@69 74
jpayne@69 75 def getFeature(self, name):
jpayne@69 76 "Looks up and returns the state of a SAX2 feature."
jpayne@69 77 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
jpayne@69 78
jpayne@69 79 def setFeature(self, name, state):
jpayne@69 80 "Sets the state of a SAX2 feature."
jpayne@69 81 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
jpayne@69 82
jpayne@69 83 def getProperty(self, name):
jpayne@69 84 "Looks up and returns the value of a SAX2 property."
jpayne@69 85 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
jpayne@69 86
jpayne@69 87 def setProperty(self, name, value):
jpayne@69 88 "Sets the value of a SAX2 property."
jpayne@69 89 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
jpayne@69 90
jpayne@69 91 class IncrementalParser(XMLReader):
jpayne@69 92 """This interface adds three extra methods to the XMLReader
jpayne@69 93 interface that allow XML parsers to support incremental
jpayne@69 94 parsing. Support for this interface is optional, since not all
jpayne@69 95 underlying XML parsers support this functionality.
jpayne@69 96
jpayne@69 97 When the parser is instantiated it is ready to begin accepting
jpayne@69 98 data from the feed method immediately. After parsing has been
jpayne@69 99 finished with a call to close the reset method must be called to
jpayne@69 100 make the parser ready to accept new data, either from feed or
jpayne@69 101 using the parse method.
jpayne@69 102
jpayne@69 103 Note that these methods must _not_ be called during parsing, that
jpayne@69 104 is, after parse has been called and before it returns.
jpayne@69 105
jpayne@69 106 By default, the class also implements the parse method of the XMLReader
jpayne@69 107 interface using the feed, close and reset methods of the
jpayne@69 108 IncrementalParser interface as a convenience to SAX 2.0 driver
jpayne@69 109 writers."""
jpayne@69 110
jpayne@69 111 def __init__(self, bufsize=2**16):
jpayne@69 112 self._bufsize = bufsize
jpayne@69 113 XMLReader.__init__(self)
jpayne@69 114
jpayne@69 115 def parse(self, source):
jpayne@69 116 from . import saxutils
jpayne@69 117 source = saxutils.prepare_input_source(source)
jpayne@69 118
jpayne@69 119 self.prepareParser(source)
jpayne@69 120 file = source.getCharacterStream()
jpayne@69 121 if file is None:
jpayne@69 122 file = source.getByteStream()
jpayne@69 123 buffer = file.read(self._bufsize)
jpayne@69 124 while buffer:
jpayne@69 125 self.feed(buffer)
jpayne@69 126 buffer = file.read(self._bufsize)
jpayne@69 127 self.close()
jpayne@69 128
jpayne@69 129 def feed(self, data):
jpayne@69 130 """This method gives the raw XML data in the data parameter to
jpayne@69 131 the parser and makes it parse the data, emitting the
jpayne@69 132 corresponding events. It is allowed for XML constructs to be
jpayne@69 133 split across several calls to feed.
jpayne@69 134
jpayne@69 135 feed may raise SAXException."""
jpayne@69 136 raise NotImplementedError("This method must be implemented!")
jpayne@69 137
jpayne@69 138 def prepareParser(self, source):
jpayne@69 139 """This method is called by the parse implementation to allow
jpayne@69 140 the SAX 2.0 driver to prepare itself for parsing."""
jpayne@69 141 raise NotImplementedError("prepareParser must be overridden!")
jpayne@69 142
jpayne@69 143 def close(self):
jpayne@69 144 """This method is called when the entire XML document has been
jpayne@69 145 passed to the parser through the feed method, to notify the
jpayne@69 146 parser that there are no more data. This allows the parser to
jpayne@69 147 do the final checks on the document and empty the internal
jpayne@69 148 data buffer.
jpayne@69 149
jpayne@69 150 The parser will not be ready to parse another document until
jpayne@69 151 the reset method has been called.
jpayne@69 152
jpayne@69 153 close may raise SAXException."""
jpayne@69 154 raise NotImplementedError("This method must be implemented!")
jpayne@69 155
jpayne@69 156 def reset(self):
jpayne@69 157 """This method is called after close has been called to reset
jpayne@69 158 the parser so that it is ready to parse new documents. The
jpayne@69 159 results of calling parse or feed after close without calling
jpayne@69 160 reset are undefined."""
jpayne@69 161 raise NotImplementedError("This method must be implemented!")
jpayne@69 162
jpayne@69 163 # ===== LOCATOR =====
jpayne@69 164
jpayne@69 165 class Locator:
jpayne@69 166 """Interface for associating a SAX event with a document
jpayne@69 167 location. A locator object will return valid results only during
jpayne@69 168 calls to DocumentHandler methods; at any other time, the
jpayne@69 169 results are unpredictable."""
jpayne@69 170
jpayne@69 171 def getColumnNumber(self):
jpayne@69 172 "Return the column number where the current event ends."
jpayne@69 173 return -1
jpayne@69 174
jpayne@69 175 def getLineNumber(self):
jpayne@69 176 "Return the line number where the current event ends."
jpayne@69 177 return -1
jpayne@69 178
jpayne@69 179 def getPublicId(self):
jpayne@69 180 "Return the public identifier for the current event."
jpayne@69 181 return None
jpayne@69 182
jpayne@69 183 def getSystemId(self):
jpayne@69 184 "Return the system identifier for the current event."
jpayne@69 185 return None
jpayne@69 186
jpayne@69 187 # ===== INPUTSOURCE =====
jpayne@69 188
jpayne@69 189 class InputSource:
jpayne@69 190 """Encapsulation of the information needed by the XMLReader to
jpayne@69 191 read entities.
jpayne@69 192
jpayne@69 193 This class may include information about the public identifier,
jpayne@69 194 system identifier, byte stream (possibly with character encoding
jpayne@69 195 information) and/or the character stream of an entity.
jpayne@69 196
jpayne@69 197 Applications will create objects of this class for use in the
jpayne@69 198 XMLReader.parse method and for returning from
jpayne@69 199 EntityResolver.resolveEntity.
jpayne@69 200
jpayne@69 201 An InputSource belongs to the application, the XMLReader is not
jpayne@69 202 allowed to modify InputSource objects passed to it from the
jpayne@69 203 application, although it may make copies and modify those."""
jpayne@69 204
jpayne@69 205 def __init__(self, system_id = None):
jpayne@69 206 self.__system_id = system_id
jpayne@69 207 self.__public_id = None
jpayne@69 208 self.__encoding = None
jpayne@69 209 self.__bytefile = None
jpayne@69 210 self.__charfile = None
jpayne@69 211
jpayne@69 212 def setPublicId(self, public_id):
jpayne@69 213 "Sets the public identifier of this InputSource."
jpayne@69 214 self.__public_id = public_id
jpayne@69 215
jpayne@69 216 def getPublicId(self):
jpayne@69 217 "Returns the public identifier of this InputSource."
jpayne@69 218 return self.__public_id
jpayne@69 219
jpayne@69 220 def setSystemId(self, system_id):
jpayne@69 221 "Sets the system identifier of this InputSource."
jpayne@69 222 self.__system_id = system_id
jpayne@69 223
jpayne@69 224 def getSystemId(self):
jpayne@69 225 "Returns the system identifier of this InputSource."
jpayne@69 226 return self.__system_id
jpayne@69 227
jpayne@69 228 def setEncoding(self, encoding):
jpayne@69 229 """Sets the character encoding of this InputSource.
jpayne@69 230
jpayne@69 231 The encoding must be a string acceptable for an XML encoding
jpayne@69 232 declaration (see section 4.3.3 of the XML recommendation).
jpayne@69 233
jpayne@69 234 The encoding attribute of the InputSource is ignored if the
jpayne@69 235 InputSource also contains a character stream."""
jpayne@69 236 self.__encoding = encoding
jpayne@69 237
jpayne@69 238 def getEncoding(self):
jpayne@69 239 "Get the character encoding of this InputSource."
jpayne@69 240 return self.__encoding
jpayne@69 241
jpayne@69 242 def setByteStream(self, bytefile):
jpayne@69 243 """Set the byte stream (a Python file-like object which does
jpayne@69 244 not perform byte-to-character conversion) for this input
jpayne@69 245 source.
jpayne@69 246
jpayne@69 247 The SAX parser will ignore this if there is also a character
jpayne@69 248 stream specified, but it will use a byte stream in preference
jpayne@69 249 to opening a URI connection itself.
jpayne@69 250
jpayne@69 251 If the application knows the character encoding of the byte
jpayne@69 252 stream, it should set it with the setEncoding method."""
jpayne@69 253 self.__bytefile = bytefile
jpayne@69 254
jpayne@69 255 def getByteStream(self):
jpayne@69 256 """Get the byte stream for this input source.
jpayne@69 257
jpayne@69 258 The getEncoding method will return the character encoding for
jpayne@69 259 this byte stream, or None if unknown."""
jpayne@69 260 return self.__bytefile
jpayne@69 261
jpayne@69 262 def setCharacterStream(self, charfile):
jpayne@69 263 """Set the character stream for this input source. (The stream
jpayne@69 264 must be a Python 2.0 Unicode-wrapped file-like that performs
jpayne@69 265 conversion to Unicode strings.)
jpayne@69 266
jpayne@69 267 If there is a character stream specified, the SAX parser will
jpayne@69 268 ignore any byte stream and will not attempt to open a URI
jpayne@69 269 connection to the system identifier."""
jpayne@69 270 self.__charfile = charfile
jpayne@69 271
jpayne@69 272 def getCharacterStream(self):
jpayne@69 273 "Get the character stream for this input source."
jpayne@69 274 return self.__charfile
jpayne@69 275
jpayne@69 276 # ===== ATTRIBUTESIMPL =====
jpayne@69 277
jpayne@69 278 class AttributesImpl:
jpayne@69 279
jpayne@69 280 def __init__(self, attrs):
jpayne@69 281 """Non-NS-aware implementation.
jpayne@69 282
jpayne@69 283 attrs should be of the form {name : value}."""
jpayne@69 284 self._attrs = attrs
jpayne@69 285
jpayne@69 286 def getLength(self):
jpayne@69 287 return len(self._attrs)
jpayne@69 288
jpayne@69 289 def getType(self, name):
jpayne@69 290 return "CDATA"
jpayne@69 291
jpayne@69 292 def getValue(self, name):
jpayne@69 293 return self._attrs[name]
jpayne@69 294
jpayne@69 295 def getValueByQName(self, name):
jpayne@69 296 return self._attrs[name]
jpayne@69 297
jpayne@69 298 def getNameByQName(self, name):
jpayne@69 299 if name not in self._attrs:
jpayne@69 300 raise KeyError(name)
jpayne@69 301 return name
jpayne@69 302
jpayne@69 303 def getQNameByName(self, name):
jpayne@69 304 if name not in self._attrs:
jpayne@69 305 raise KeyError(name)
jpayne@69 306 return name
jpayne@69 307
jpayne@69 308 def getNames(self):
jpayne@69 309 return list(self._attrs.keys())
jpayne@69 310
jpayne@69 311 def getQNames(self):
jpayne@69 312 return list(self._attrs.keys())
jpayne@69 313
jpayne@69 314 def __len__(self):
jpayne@69 315 return len(self._attrs)
jpayne@69 316
jpayne@69 317 def __getitem__(self, name):
jpayne@69 318 return self._attrs[name]
jpayne@69 319
jpayne@69 320 def keys(self):
jpayne@69 321 return list(self._attrs.keys())
jpayne@69 322
jpayne@69 323 def __contains__(self, name):
jpayne@69 324 return name in self._attrs
jpayne@69 325
jpayne@69 326 def get(self, name, alternative=None):
jpayne@69 327 return self._attrs.get(name, alternative)
jpayne@69 328
jpayne@69 329 def copy(self):
jpayne@69 330 return self.__class__(self._attrs)
jpayne@69 331
jpayne@69 332 def items(self):
jpayne@69 333 return list(self._attrs.items())
jpayne@69 334
jpayne@69 335 def values(self):
jpayne@69 336 return list(self._attrs.values())
jpayne@69 337
jpayne@69 338 # ===== ATTRIBUTESNSIMPL =====
jpayne@69 339
jpayne@69 340 class AttributesNSImpl(AttributesImpl):
jpayne@69 341
jpayne@69 342 def __init__(self, attrs, qnames):
jpayne@69 343 """NS-aware implementation.
jpayne@69 344
jpayne@69 345 attrs should be of the form {(ns_uri, lname): value, ...}.
jpayne@69 346 qnames of the form {(ns_uri, lname): qname, ...}."""
jpayne@69 347 self._attrs = attrs
jpayne@69 348 self._qnames = qnames
jpayne@69 349
jpayne@69 350 def getValueByQName(self, name):
jpayne@69 351 for (nsname, qname) in self._qnames.items():
jpayne@69 352 if qname == name:
jpayne@69 353 return self._attrs[nsname]
jpayne@69 354
jpayne@69 355 raise KeyError(name)
jpayne@69 356
jpayne@69 357 def getNameByQName(self, name):
jpayne@69 358 for (nsname, qname) in self._qnames.items():
jpayne@69 359 if qname == name:
jpayne@69 360 return nsname
jpayne@69 361
jpayne@69 362 raise KeyError(name)
jpayne@69 363
jpayne@69 364 def getQNameByName(self, name):
jpayne@69 365 return self._qnames[name]
jpayne@69 366
jpayne@69 367 def getQNames(self):
jpayne@69 368 return list(self._qnames.values())
jpayne@69 369
jpayne@69 370 def copy(self):
jpayne@69 371 return self.__class__(self._attrs, self._qnames)
jpayne@69 372
jpayne@69 373
jpayne@69 374 def _test():
jpayne@69 375 XMLReader()
jpayne@69 376 IncrementalParser()
jpayne@69 377 Locator()
jpayne@69 378
jpayne@69 379 if __name__ == "__main__":
jpayne@69 380 _test()