annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/sax/xmlreader.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 """An XML Reader is the SAX 2 name for an XML parser. XML Parsers
jpayne@68 2 should be based on this code. """
jpayne@68 3
jpayne@68 4 from . import handler
jpayne@68 5
jpayne@68 6 from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException
jpayne@68 7
jpayne@68 8
jpayne@68 9 # ===== XMLREADER =====
jpayne@68 10
jpayne@68 11 class XMLReader:
jpayne@68 12 """Interface for reading an XML document using callbacks.
jpayne@68 13
jpayne@68 14 XMLReader is the interface that an XML parser's SAX2 driver must
jpayne@68 15 implement. This interface allows an application to set and query
jpayne@68 16 features and properties in the parser, to register event handlers
jpayne@68 17 for document processing, and to initiate a document parse.
jpayne@68 18
jpayne@68 19 All SAX interfaces are assumed to be synchronous: the parse
jpayne@68 20 methods must not return until parsing is complete, and readers
jpayne@68 21 must wait for an event-handler callback to return before reporting
jpayne@68 22 the next event."""
jpayne@68 23
jpayne@68 24 def __init__(self):
jpayne@68 25 self._cont_handler = handler.ContentHandler()
jpayne@68 26 self._dtd_handler = handler.DTDHandler()
jpayne@68 27 self._ent_handler = handler.EntityResolver()
jpayne@68 28 self._err_handler = handler.ErrorHandler()
jpayne@68 29
jpayne@68 30 def parse(self, source):
jpayne@68 31 "Parse an XML document from a system identifier or an InputSource."
jpayne@68 32 raise NotImplementedError("This method must be implemented!")
jpayne@68 33
jpayne@68 34 def getContentHandler(self):
jpayne@68 35 "Returns the current ContentHandler."
jpayne@68 36 return self._cont_handler
jpayne@68 37
jpayne@68 38 def setContentHandler(self, handler):
jpayne@68 39 "Registers a new object to receive document content events."
jpayne@68 40 self._cont_handler = handler
jpayne@68 41
jpayne@68 42 def getDTDHandler(self):
jpayne@68 43 "Returns the current DTD handler."
jpayne@68 44 return self._dtd_handler
jpayne@68 45
jpayne@68 46 def setDTDHandler(self, handler):
jpayne@68 47 "Register an object to receive basic DTD-related events."
jpayne@68 48 self._dtd_handler = handler
jpayne@68 49
jpayne@68 50 def getEntityResolver(self):
jpayne@68 51 "Returns the current EntityResolver."
jpayne@68 52 return self._ent_handler
jpayne@68 53
jpayne@68 54 def setEntityResolver(self, resolver):
jpayne@68 55 "Register an object to resolve external entities."
jpayne@68 56 self._ent_handler = resolver
jpayne@68 57
jpayne@68 58 def getErrorHandler(self):
jpayne@68 59 "Returns the current ErrorHandler."
jpayne@68 60 return self._err_handler
jpayne@68 61
jpayne@68 62 def setErrorHandler(self, handler):
jpayne@68 63 "Register an object to receive error-message events."
jpayne@68 64 self._err_handler = handler
jpayne@68 65
jpayne@68 66 def setLocale(self, locale):
jpayne@68 67 """Allow an application to set the locale for errors and warnings.
jpayne@68 68
jpayne@68 69 SAX parsers are not required to provide localization for errors
jpayne@68 70 and warnings; if they cannot support the requested locale,
jpayne@68 71 however, they must raise a SAX exception. Applications may
jpayne@68 72 request a locale change in the middle of a parse."""
jpayne@68 73 raise SAXNotSupportedException("Locale support not implemented")
jpayne@68 74
jpayne@68 75 def getFeature(self, name):
jpayne@68 76 "Looks up and returns the state of a SAX2 feature."
jpayne@68 77 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
jpayne@68 78
jpayne@68 79 def setFeature(self, name, state):
jpayne@68 80 "Sets the state of a SAX2 feature."
jpayne@68 81 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
jpayne@68 82
jpayne@68 83 def getProperty(self, name):
jpayne@68 84 "Looks up and returns the value of a SAX2 property."
jpayne@68 85 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
jpayne@68 86
jpayne@68 87 def setProperty(self, name, value):
jpayne@68 88 "Sets the value of a SAX2 property."
jpayne@68 89 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
jpayne@68 90
jpayne@68 91 class IncrementalParser(XMLReader):
jpayne@68 92 """This interface adds three extra methods to the XMLReader
jpayne@68 93 interface that allow XML parsers to support incremental
jpayne@68 94 parsing. Support for this interface is optional, since not all
jpayne@68 95 underlying XML parsers support this functionality.
jpayne@68 96
jpayne@68 97 When the parser is instantiated it is ready to begin accepting
jpayne@68 98 data from the feed method immediately. After parsing has been
jpayne@68 99 finished with a call to close the reset method must be called to
jpayne@68 100 make the parser ready to accept new data, either from feed or
jpayne@68 101 using the parse method.
jpayne@68 102
jpayne@68 103 Note that these methods must _not_ be called during parsing, that
jpayne@68 104 is, after parse has been called and before it returns.
jpayne@68 105
jpayne@68 106 By default, the class also implements the parse method of the XMLReader
jpayne@68 107 interface using the feed, close and reset methods of the
jpayne@68 108 IncrementalParser interface as a convenience to SAX 2.0 driver
jpayne@68 109 writers."""
jpayne@68 110
jpayne@68 111 def __init__(self, bufsize=2**16):
jpayne@68 112 self._bufsize = bufsize
jpayne@68 113 XMLReader.__init__(self)
jpayne@68 114
jpayne@68 115 def parse(self, source):
jpayne@68 116 from . import saxutils
jpayne@68 117 source = saxutils.prepare_input_source(source)
jpayne@68 118
jpayne@68 119 self.prepareParser(source)
jpayne@68 120 file = source.getCharacterStream()
jpayne@68 121 if file is None:
jpayne@68 122 file = source.getByteStream()
jpayne@68 123 buffer = file.read(self._bufsize)
jpayne@68 124 while buffer:
jpayne@68 125 self.feed(buffer)
jpayne@68 126 buffer = file.read(self._bufsize)
jpayne@68 127 self.close()
jpayne@68 128
jpayne@68 129 def feed(self, data):
jpayne@68 130 """This method gives the raw XML data in the data parameter to
jpayne@68 131 the parser and makes it parse the data, emitting the
jpayne@68 132 corresponding events. It is allowed for XML constructs to be
jpayne@68 133 split across several calls to feed.
jpayne@68 134
jpayne@68 135 feed may raise SAXException."""
jpayne@68 136 raise NotImplementedError("This method must be implemented!")
jpayne@68 137
jpayne@68 138 def prepareParser(self, source):
jpayne@68 139 """This method is called by the parse implementation to allow
jpayne@68 140 the SAX 2.0 driver to prepare itself for parsing."""
jpayne@68 141 raise NotImplementedError("prepareParser must be overridden!")
jpayne@68 142
jpayne@68 143 def close(self):
jpayne@68 144 """This method is called when the entire XML document has been
jpayne@68 145 passed to the parser through the feed method, to notify the
jpayne@68 146 parser that there are no more data. This allows the parser to
jpayne@68 147 do the final checks on the document and empty the internal
jpayne@68 148 data buffer.
jpayne@68 149
jpayne@68 150 The parser will not be ready to parse another document until
jpayne@68 151 the reset method has been called.
jpayne@68 152
jpayne@68 153 close may raise SAXException."""
jpayne@68 154 raise NotImplementedError("This method must be implemented!")
jpayne@68 155
jpayne@68 156 def reset(self):
jpayne@68 157 """This method is called after close has been called to reset
jpayne@68 158 the parser so that it is ready to parse new documents. The
jpayne@68 159 results of calling parse or feed after close without calling
jpayne@68 160 reset are undefined."""
jpayne@68 161 raise NotImplementedError("This method must be implemented!")
jpayne@68 162
jpayne@68 163 # ===== LOCATOR =====
jpayne@68 164
jpayne@68 165 class Locator:
jpayne@68 166 """Interface for associating a SAX event with a document
jpayne@68 167 location. A locator object will return valid results only during
jpayne@68 168 calls to DocumentHandler methods; at any other time, the
jpayne@68 169 results are unpredictable."""
jpayne@68 170
jpayne@68 171 def getColumnNumber(self):
jpayne@68 172 "Return the column number where the current event ends."
jpayne@68 173 return -1
jpayne@68 174
jpayne@68 175 def getLineNumber(self):
jpayne@68 176 "Return the line number where the current event ends."
jpayne@68 177 return -1
jpayne@68 178
jpayne@68 179 def getPublicId(self):
jpayne@68 180 "Return the public identifier for the current event."
jpayne@68 181 return None
jpayne@68 182
jpayne@68 183 def getSystemId(self):
jpayne@68 184 "Return the system identifier for the current event."
jpayne@68 185 return None
jpayne@68 186
jpayne@68 187 # ===== INPUTSOURCE =====
jpayne@68 188
jpayne@68 189 class InputSource:
jpayne@68 190 """Encapsulation of the information needed by the XMLReader to
jpayne@68 191 read entities.
jpayne@68 192
jpayne@68 193 This class may include information about the public identifier,
jpayne@68 194 system identifier, byte stream (possibly with character encoding
jpayne@68 195 information) and/or the character stream of an entity.
jpayne@68 196
jpayne@68 197 Applications will create objects of this class for use in the
jpayne@68 198 XMLReader.parse method and for returning from
jpayne@68 199 EntityResolver.resolveEntity.
jpayne@68 200
jpayne@68 201 An InputSource belongs to the application, the XMLReader is not
jpayne@68 202 allowed to modify InputSource objects passed to it from the
jpayne@68 203 application, although it may make copies and modify those."""
jpayne@68 204
jpayne@68 205 def __init__(self, system_id = None):
jpayne@68 206 self.__system_id = system_id
jpayne@68 207 self.__public_id = None
jpayne@68 208 self.__encoding = None
jpayne@68 209 self.__bytefile = None
jpayne@68 210 self.__charfile = None
jpayne@68 211
jpayne@68 212 def setPublicId(self, public_id):
jpayne@68 213 "Sets the public identifier of this InputSource."
jpayne@68 214 self.__public_id = public_id
jpayne@68 215
jpayne@68 216 def getPublicId(self):
jpayne@68 217 "Returns the public identifier of this InputSource."
jpayne@68 218 return self.__public_id
jpayne@68 219
jpayne@68 220 def setSystemId(self, system_id):
jpayne@68 221 "Sets the system identifier of this InputSource."
jpayne@68 222 self.__system_id = system_id
jpayne@68 223
jpayne@68 224 def getSystemId(self):
jpayne@68 225 "Returns the system identifier of this InputSource."
jpayne@68 226 return self.__system_id
jpayne@68 227
jpayne@68 228 def setEncoding(self, encoding):
jpayne@68 229 """Sets the character encoding of this InputSource.
jpayne@68 230
jpayne@68 231 The encoding must be a string acceptable for an XML encoding
jpayne@68 232 declaration (see section 4.3.3 of the XML recommendation).
jpayne@68 233
jpayne@68 234 The encoding attribute of the InputSource is ignored if the
jpayne@68 235 InputSource also contains a character stream."""
jpayne@68 236 self.__encoding = encoding
jpayne@68 237
jpayne@68 238 def getEncoding(self):
jpayne@68 239 "Get the character encoding of this InputSource."
jpayne@68 240 return self.__encoding
jpayne@68 241
jpayne@68 242 def setByteStream(self, bytefile):
jpayne@68 243 """Set the byte stream (a Python file-like object which does
jpayne@68 244 not perform byte-to-character conversion) for this input
jpayne@68 245 source.
jpayne@68 246
jpayne@68 247 The SAX parser will ignore this if there is also a character
jpayne@68 248 stream specified, but it will use a byte stream in preference
jpayne@68 249 to opening a URI connection itself.
jpayne@68 250
jpayne@68 251 If the application knows the character encoding of the byte
jpayne@68 252 stream, it should set it with the setEncoding method."""
jpayne@68 253 self.__bytefile = bytefile
jpayne@68 254
jpayne@68 255 def getByteStream(self):
jpayne@68 256 """Get the byte stream for this input source.
jpayne@68 257
jpayne@68 258 The getEncoding method will return the character encoding for
jpayne@68 259 this byte stream, or None if unknown."""
jpayne@68 260 return self.__bytefile
jpayne@68 261
jpayne@68 262 def setCharacterStream(self, charfile):
jpayne@68 263 """Set the character stream for this input source. (The stream
jpayne@68 264 must be a Python 2.0 Unicode-wrapped file-like that performs
jpayne@68 265 conversion to Unicode strings.)
jpayne@68 266
jpayne@68 267 If there is a character stream specified, the SAX parser will
jpayne@68 268 ignore any byte stream and will not attempt to open a URI
jpayne@68 269 connection to the system identifier."""
jpayne@68 270 self.__charfile = charfile
jpayne@68 271
jpayne@68 272 def getCharacterStream(self):
jpayne@68 273 "Get the character stream for this input source."
jpayne@68 274 return self.__charfile
jpayne@68 275
jpayne@68 276 # ===== ATTRIBUTESIMPL =====
jpayne@68 277
jpayne@68 278 class AttributesImpl:
jpayne@68 279
jpayne@68 280 def __init__(self, attrs):
jpayne@68 281 """Non-NS-aware implementation.
jpayne@68 282
jpayne@68 283 attrs should be of the form {name : value}."""
jpayne@68 284 self._attrs = attrs
jpayne@68 285
jpayne@68 286 def getLength(self):
jpayne@68 287 return len(self._attrs)
jpayne@68 288
jpayne@68 289 def getType(self, name):
jpayne@68 290 return "CDATA"
jpayne@68 291
jpayne@68 292 def getValue(self, name):
jpayne@68 293 return self._attrs[name]
jpayne@68 294
jpayne@68 295 def getValueByQName(self, name):
jpayne@68 296 return self._attrs[name]
jpayne@68 297
jpayne@68 298 def getNameByQName(self, name):
jpayne@68 299 if name not in self._attrs:
jpayne@68 300 raise KeyError(name)
jpayne@68 301 return name
jpayne@68 302
jpayne@68 303 def getQNameByName(self, name):
jpayne@68 304 if name not in self._attrs:
jpayne@68 305 raise KeyError(name)
jpayne@68 306 return name
jpayne@68 307
jpayne@68 308 def getNames(self):
jpayne@68 309 return list(self._attrs.keys())
jpayne@68 310
jpayne@68 311 def getQNames(self):
jpayne@68 312 return list(self._attrs.keys())
jpayne@68 313
jpayne@68 314 def __len__(self):
jpayne@68 315 return len(self._attrs)
jpayne@68 316
jpayne@68 317 def __getitem__(self, name):
jpayne@68 318 return self._attrs[name]
jpayne@68 319
jpayne@68 320 def keys(self):
jpayne@68 321 return list(self._attrs.keys())
jpayne@68 322
jpayne@68 323 def __contains__(self, name):
jpayne@68 324 return name in self._attrs
jpayne@68 325
jpayne@68 326 def get(self, name, alternative=None):
jpayne@68 327 return self._attrs.get(name, alternative)
jpayne@68 328
jpayne@68 329 def copy(self):
jpayne@68 330 return self.__class__(self._attrs)
jpayne@68 331
jpayne@68 332 def items(self):
jpayne@68 333 return list(self._attrs.items())
jpayne@68 334
jpayne@68 335 def values(self):
jpayne@68 336 return list(self._attrs.values())
jpayne@68 337
jpayne@68 338 # ===== ATTRIBUTESNSIMPL =====
jpayne@68 339
jpayne@68 340 class AttributesNSImpl(AttributesImpl):
jpayne@68 341
jpayne@68 342 def __init__(self, attrs, qnames):
jpayne@68 343 """NS-aware implementation.
jpayne@68 344
jpayne@68 345 attrs should be of the form {(ns_uri, lname): value, ...}.
jpayne@68 346 qnames of the form {(ns_uri, lname): qname, ...}."""
jpayne@68 347 self._attrs = attrs
jpayne@68 348 self._qnames = qnames
jpayne@68 349
jpayne@68 350 def getValueByQName(self, name):
jpayne@68 351 for (nsname, qname) in self._qnames.items():
jpayne@68 352 if qname == name:
jpayne@68 353 return self._attrs[nsname]
jpayne@68 354
jpayne@68 355 raise KeyError(name)
jpayne@68 356
jpayne@68 357 def getNameByQName(self, name):
jpayne@68 358 for (nsname, qname) in self._qnames.items():
jpayne@68 359 if qname == name:
jpayne@68 360 return nsname
jpayne@68 361
jpayne@68 362 raise KeyError(name)
jpayne@68 363
jpayne@68 364 def getQNameByName(self, name):
jpayne@68 365 return self._qnames[name]
jpayne@68 366
jpayne@68 367 def getQNames(self):
jpayne@68 368 return list(self._qnames.values())
jpayne@68 369
jpayne@68 370 def copy(self):
jpayne@68 371 return self.__class__(self._attrs, self._qnames)
jpayne@68 372
jpayne@68 373
jpayne@68 374 def _test():
jpayne@68 375 XMLReader()
jpayne@68 376 IncrementalParser()
jpayne@68 377 Locator()
jpayne@68 378
jpayne@68 379 if __name__ == "__main__":
jpayne@68 380 _test()