annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/sax/expatreader.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 """
jpayne@69 2 SAX driver for the pyexpat C module. This driver works with
jpayne@69 3 pyexpat.__version__ == '2.22'.
jpayne@69 4 """
jpayne@69 5
jpayne@69 6 version = "0.20"
jpayne@69 7
jpayne@69 8 from xml.sax._exceptions import *
jpayne@69 9 from xml.sax.handler import feature_validation, feature_namespaces
jpayne@69 10 from xml.sax.handler import feature_namespace_prefixes
jpayne@69 11 from xml.sax.handler import feature_external_ges, feature_external_pes
jpayne@69 12 from xml.sax.handler import feature_string_interning
jpayne@69 13 from xml.sax.handler import property_xml_string, property_interning_dict
jpayne@69 14
jpayne@69 15 # xml.parsers.expat does not raise ImportError in Jython
jpayne@69 16 import sys
jpayne@69 17 if sys.platform[:4] == "java":
jpayne@69 18 raise SAXReaderNotAvailable("expat not available in Java", None)
jpayne@69 19 del sys
jpayne@69 20
jpayne@69 21 try:
jpayne@69 22 from xml.parsers import expat
jpayne@69 23 except ImportError:
jpayne@69 24 raise SAXReaderNotAvailable("expat not supported", None)
jpayne@69 25 else:
jpayne@69 26 if not hasattr(expat, "ParserCreate"):
jpayne@69 27 raise SAXReaderNotAvailable("expat not supported", None)
jpayne@69 28 from xml.sax import xmlreader, saxutils, handler
jpayne@69 29
jpayne@69 30 AttributesImpl = xmlreader.AttributesImpl
jpayne@69 31 AttributesNSImpl = xmlreader.AttributesNSImpl
jpayne@69 32
jpayne@69 33 # If we're using a sufficiently recent version of Python, we can use
jpayne@69 34 # weak references to avoid cycles between the parser and content
jpayne@69 35 # handler, otherwise we'll just have to pretend.
jpayne@69 36 try:
jpayne@69 37 import _weakref
jpayne@69 38 except ImportError:
jpayne@69 39 def _mkproxy(o):
jpayne@69 40 return o
jpayne@69 41 else:
jpayne@69 42 import weakref
jpayne@69 43 _mkproxy = weakref.proxy
jpayne@69 44 del weakref, _weakref
jpayne@69 45
jpayne@69 46 class _ClosedParser:
jpayne@69 47 pass
jpayne@69 48
jpayne@69 49 # --- ExpatLocator
jpayne@69 50
jpayne@69 51 class ExpatLocator(xmlreader.Locator):
jpayne@69 52 """Locator for use with the ExpatParser class.
jpayne@69 53
jpayne@69 54 This uses a weak reference to the parser object to avoid creating
jpayne@69 55 a circular reference between the parser and the content handler.
jpayne@69 56 """
jpayne@69 57 def __init__(self, parser):
jpayne@69 58 self._ref = _mkproxy(parser)
jpayne@69 59
jpayne@69 60 def getColumnNumber(self):
jpayne@69 61 parser = self._ref
jpayne@69 62 if parser._parser is None:
jpayne@69 63 return None
jpayne@69 64 return parser._parser.ErrorColumnNumber
jpayne@69 65
jpayne@69 66 def getLineNumber(self):
jpayne@69 67 parser = self._ref
jpayne@69 68 if parser._parser is None:
jpayne@69 69 return 1
jpayne@69 70 return parser._parser.ErrorLineNumber
jpayne@69 71
jpayne@69 72 def getPublicId(self):
jpayne@69 73 parser = self._ref
jpayne@69 74 if parser is None:
jpayne@69 75 return None
jpayne@69 76 return parser._source.getPublicId()
jpayne@69 77
jpayne@69 78 def getSystemId(self):
jpayne@69 79 parser = self._ref
jpayne@69 80 if parser is None:
jpayne@69 81 return None
jpayne@69 82 return parser._source.getSystemId()
jpayne@69 83
jpayne@69 84
jpayne@69 85 # --- ExpatParser
jpayne@69 86
jpayne@69 87 class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
jpayne@69 88 """SAX driver for the pyexpat C module."""
jpayne@69 89
jpayne@69 90 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
jpayne@69 91 xmlreader.IncrementalParser.__init__(self, bufsize)
jpayne@69 92 self._source = xmlreader.InputSource()
jpayne@69 93 self._parser = None
jpayne@69 94 self._namespaces = namespaceHandling
jpayne@69 95 self._lex_handler_prop = None
jpayne@69 96 self._parsing = 0
jpayne@69 97 self._entity_stack = []
jpayne@69 98 self._external_ges = 0
jpayne@69 99 self._interning = None
jpayne@69 100
jpayne@69 101 # XMLReader methods
jpayne@69 102
jpayne@69 103 def parse(self, source):
jpayne@69 104 "Parse an XML document from a URL or an InputSource."
jpayne@69 105 source = saxutils.prepare_input_source(source)
jpayne@69 106
jpayne@69 107 self._source = source
jpayne@69 108 try:
jpayne@69 109 self.reset()
jpayne@69 110 self._cont_handler.setDocumentLocator(ExpatLocator(self))
jpayne@69 111 xmlreader.IncrementalParser.parse(self, source)
jpayne@69 112 except:
jpayne@69 113 # bpo-30264: Close the source on error to not leak resources:
jpayne@69 114 # xml.sax.parse() doesn't give access to the underlying parser
jpayne@69 115 # to the caller
jpayne@69 116 self._close_source()
jpayne@69 117 raise
jpayne@69 118
jpayne@69 119 def prepareParser(self, source):
jpayne@69 120 if source.getSystemId() is not None:
jpayne@69 121 self._parser.SetBase(source.getSystemId())
jpayne@69 122
jpayne@69 123 # Redefined setContentHandler to allow changing handlers during parsing
jpayne@69 124
jpayne@69 125 def setContentHandler(self, handler):
jpayne@69 126 xmlreader.IncrementalParser.setContentHandler(self, handler)
jpayne@69 127 if self._parsing:
jpayne@69 128 self._reset_cont_handler()
jpayne@69 129
jpayne@69 130 def getFeature(self, name):
jpayne@69 131 if name == feature_namespaces:
jpayne@69 132 return self._namespaces
jpayne@69 133 elif name == feature_string_interning:
jpayne@69 134 return self._interning is not None
jpayne@69 135 elif name in (feature_validation, feature_external_pes,
jpayne@69 136 feature_namespace_prefixes):
jpayne@69 137 return 0
jpayne@69 138 elif name == feature_external_ges:
jpayne@69 139 return self._external_ges
jpayne@69 140 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
jpayne@69 141
jpayne@69 142 def setFeature(self, name, state):
jpayne@69 143 if self._parsing:
jpayne@69 144 raise SAXNotSupportedException("Cannot set features while parsing")
jpayne@69 145
jpayne@69 146 if name == feature_namespaces:
jpayne@69 147 self._namespaces = state
jpayne@69 148 elif name == feature_external_ges:
jpayne@69 149 self._external_ges = state
jpayne@69 150 elif name == feature_string_interning:
jpayne@69 151 if state:
jpayne@69 152 if self._interning is None:
jpayne@69 153 self._interning = {}
jpayne@69 154 else:
jpayne@69 155 self._interning = None
jpayne@69 156 elif name == feature_validation:
jpayne@69 157 if state:
jpayne@69 158 raise SAXNotSupportedException(
jpayne@69 159 "expat does not support validation")
jpayne@69 160 elif name == feature_external_pes:
jpayne@69 161 if state:
jpayne@69 162 raise SAXNotSupportedException(
jpayne@69 163 "expat does not read external parameter entities")
jpayne@69 164 elif name == feature_namespace_prefixes:
jpayne@69 165 if state:
jpayne@69 166 raise SAXNotSupportedException(
jpayne@69 167 "expat does not report namespace prefixes")
jpayne@69 168 else:
jpayne@69 169 raise SAXNotRecognizedException(
jpayne@69 170 "Feature '%s' not recognized" % name)
jpayne@69 171
jpayne@69 172 def getProperty(self, name):
jpayne@69 173 if name == handler.property_lexical_handler:
jpayne@69 174 return self._lex_handler_prop
jpayne@69 175 elif name == property_interning_dict:
jpayne@69 176 return self._interning
jpayne@69 177 elif name == property_xml_string:
jpayne@69 178 if self._parser:
jpayne@69 179 if hasattr(self._parser, "GetInputContext"):
jpayne@69 180 return self._parser.GetInputContext()
jpayne@69 181 else:
jpayne@69 182 raise SAXNotRecognizedException(
jpayne@69 183 "This version of expat does not support getting"
jpayne@69 184 " the XML string")
jpayne@69 185 else:
jpayne@69 186 raise SAXNotSupportedException(
jpayne@69 187 "XML string cannot be returned when not parsing")
jpayne@69 188 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
jpayne@69 189
jpayne@69 190 def setProperty(self, name, value):
jpayne@69 191 if name == handler.property_lexical_handler:
jpayne@69 192 self._lex_handler_prop = value
jpayne@69 193 if self._parsing:
jpayne@69 194 self._reset_lex_handler_prop()
jpayne@69 195 elif name == property_interning_dict:
jpayne@69 196 self._interning = value
jpayne@69 197 elif name == property_xml_string:
jpayne@69 198 raise SAXNotSupportedException("Property '%s' cannot be set" %
jpayne@69 199 name)
jpayne@69 200 else:
jpayne@69 201 raise SAXNotRecognizedException("Property '%s' not recognized" %
jpayne@69 202 name)
jpayne@69 203
jpayne@69 204 # IncrementalParser methods
jpayne@69 205
jpayne@69 206 def feed(self, data, isFinal = 0):
jpayne@69 207 if not self._parsing:
jpayne@69 208 self.reset()
jpayne@69 209 self._parsing = 1
jpayne@69 210 self._cont_handler.startDocument()
jpayne@69 211
jpayne@69 212 try:
jpayne@69 213 # The isFinal parameter is internal to the expat reader.
jpayne@69 214 # If it is set to true, expat will check validity of the entire
jpayne@69 215 # document. When feeding chunks, they are not normally final -
jpayne@69 216 # except when invoked from close.
jpayne@69 217 self._parser.Parse(data, isFinal)
jpayne@69 218 except expat.error as e:
jpayne@69 219 exc = SAXParseException(expat.ErrorString(e.code), e, self)
jpayne@69 220 # FIXME: when to invoke error()?
jpayne@69 221 self._err_handler.fatalError(exc)
jpayne@69 222
jpayne@69 223 def _close_source(self):
jpayne@69 224 source = self._source
jpayne@69 225 try:
jpayne@69 226 file = source.getCharacterStream()
jpayne@69 227 if file is not None:
jpayne@69 228 file.close()
jpayne@69 229 finally:
jpayne@69 230 file = source.getByteStream()
jpayne@69 231 if file is not None:
jpayne@69 232 file.close()
jpayne@69 233
jpayne@69 234 def close(self):
jpayne@69 235 if (self._entity_stack or self._parser is None or
jpayne@69 236 isinstance(self._parser, _ClosedParser)):
jpayne@69 237 # If we are completing an external entity, do nothing here
jpayne@69 238 return
jpayne@69 239 try:
jpayne@69 240 self.feed("", isFinal = 1)
jpayne@69 241 self._cont_handler.endDocument()
jpayne@69 242 self._parsing = 0
jpayne@69 243 # break cycle created by expat handlers pointing to our methods
jpayne@69 244 self._parser = None
jpayne@69 245 finally:
jpayne@69 246 self._parsing = 0
jpayne@69 247 if self._parser is not None:
jpayne@69 248 # Keep ErrorColumnNumber and ErrorLineNumber after closing.
jpayne@69 249 parser = _ClosedParser()
jpayne@69 250 parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
jpayne@69 251 parser.ErrorLineNumber = self._parser.ErrorLineNumber
jpayne@69 252 self._parser = parser
jpayne@69 253 self._close_source()
jpayne@69 254
jpayne@69 255 def _reset_cont_handler(self):
jpayne@69 256 self._parser.ProcessingInstructionHandler = \
jpayne@69 257 self._cont_handler.processingInstruction
jpayne@69 258 self._parser.CharacterDataHandler = self._cont_handler.characters
jpayne@69 259
jpayne@69 260 def _reset_lex_handler_prop(self):
jpayne@69 261 lex = self._lex_handler_prop
jpayne@69 262 parser = self._parser
jpayne@69 263 if lex is None:
jpayne@69 264 parser.CommentHandler = None
jpayne@69 265 parser.StartCdataSectionHandler = None
jpayne@69 266 parser.EndCdataSectionHandler = None
jpayne@69 267 parser.StartDoctypeDeclHandler = None
jpayne@69 268 parser.EndDoctypeDeclHandler = None
jpayne@69 269 else:
jpayne@69 270 parser.CommentHandler = lex.comment
jpayne@69 271 parser.StartCdataSectionHandler = lex.startCDATA
jpayne@69 272 parser.EndCdataSectionHandler = lex.endCDATA
jpayne@69 273 parser.StartDoctypeDeclHandler = self.start_doctype_decl
jpayne@69 274 parser.EndDoctypeDeclHandler = lex.endDTD
jpayne@69 275
jpayne@69 276 def reset(self):
jpayne@69 277 if self._namespaces:
jpayne@69 278 self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
jpayne@69 279 intern=self._interning)
jpayne@69 280 self._parser.namespace_prefixes = 1
jpayne@69 281 self._parser.StartElementHandler = self.start_element_ns
jpayne@69 282 self._parser.EndElementHandler = self.end_element_ns
jpayne@69 283 else:
jpayne@69 284 self._parser = expat.ParserCreate(self._source.getEncoding(),
jpayne@69 285 intern = self._interning)
jpayne@69 286 self._parser.StartElementHandler = self.start_element
jpayne@69 287 self._parser.EndElementHandler = self.end_element
jpayne@69 288
jpayne@69 289 self._reset_cont_handler()
jpayne@69 290 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
jpayne@69 291 self._parser.NotationDeclHandler = self.notation_decl
jpayne@69 292 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
jpayne@69 293 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
jpayne@69 294
jpayne@69 295 self._decl_handler_prop = None
jpayne@69 296 if self._lex_handler_prop:
jpayne@69 297 self._reset_lex_handler_prop()
jpayne@69 298 # self._parser.DefaultHandler =
jpayne@69 299 # self._parser.DefaultHandlerExpand =
jpayne@69 300 # self._parser.NotStandaloneHandler =
jpayne@69 301 self._parser.ExternalEntityRefHandler = self.external_entity_ref
jpayne@69 302 try:
jpayne@69 303 self._parser.SkippedEntityHandler = self.skipped_entity_handler
jpayne@69 304 except AttributeError:
jpayne@69 305 # This pyexpat does not support SkippedEntity
jpayne@69 306 pass
jpayne@69 307 self._parser.SetParamEntityParsing(
jpayne@69 308 expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
jpayne@69 309
jpayne@69 310 self._parsing = 0
jpayne@69 311 self._entity_stack = []
jpayne@69 312
jpayne@69 313 # Locator methods
jpayne@69 314
jpayne@69 315 def getColumnNumber(self):
jpayne@69 316 if self._parser is None:
jpayne@69 317 return None
jpayne@69 318 return self._parser.ErrorColumnNumber
jpayne@69 319
jpayne@69 320 def getLineNumber(self):
jpayne@69 321 if self._parser is None:
jpayne@69 322 return 1
jpayne@69 323 return self._parser.ErrorLineNumber
jpayne@69 324
jpayne@69 325 def getPublicId(self):
jpayne@69 326 return self._source.getPublicId()
jpayne@69 327
jpayne@69 328 def getSystemId(self):
jpayne@69 329 return self._source.getSystemId()
jpayne@69 330
jpayne@69 331 # event handlers
jpayne@69 332 def start_element(self, name, attrs):
jpayne@69 333 self._cont_handler.startElement(name, AttributesImpl(attrs))
jpayne@69 334
jpayne@69 335 def end_element(self, name):
jpayne@69 336 self._cont_handler.endElement(name)
jpayne@69 337
jpayne@69 338 def start_element_ns(self, name, attrs):
jpayne@69 339 pair = name.split()
jpayne@69 340 if len(pair) == 1:
jpayne@69 341 # no namespace
jpayne@69 342 pair = (None, name)
jpayne@69 343 elif len(pair) == 3:
jpayne@69 344 pair = pair[0], pair[1]
jpayne@69 345 else:
jpayne@69 346 # default namespace
jpayne@69 347 pair = tuple(pair)
jpayne@69 348
jpayne@69 349 newattrs = {}
jpayne@69 350 qnames = {}
jpayne@69 351 for (aname, value) in attrs.items():
jpayne@69 352 parts = aname.split()
jpayne@69 353 length = len(parts)
jpayne@69 354 if length == 1:
jpayne@69 355 # no namespace
jpayne@69 356 qname = aname
jpayne@69 357 apair = (None, aname)
jpayne@69 358 elif length == 3:
jpayne@69 359 qname = "%s:%s" % (parts[2], parts[1])
jpayne@69 360 apair = parts[0], parts[1]
jpayne@69 361 else:
jpayne@69 362 # default namespace
jpayne@69 363 qname = parts[1]
jpayne@69 364 apair = tuple(parts)
jpayne@69 365
jpayne@69 366 newattrs[apair] = value
jpayne@69 367 qnames[apair] = qname
jpayne@69 368
jpayne@69 369 self._cont_handler.startElementNS(pair, None,
jpayne@69 370 AttributesNSImpl(newattrs, qnames))
jpayne@69 371
jpayne@69 372 def end_element_ns(self, name):
jpayne@69 373 pair = name.split()
jpayne@69 374 if len(pair) == 1:
jpayne@69 375 pair = (None, name)
jpayne@69 376 elif len(pair) == 3:
jpayne@69 377 pair = pair[0], pair[1]
jpayne@69 378 else:
jpayne@69 379 pair = tuple(pair)
jpayne@69 380
jpayne@69 381 self._cont_handler.endElementNS(pair, None)
jpayne@69 382
jpayne@69 383 # this is not used (call directly to ContentHandler)
jpayne@69 384 def processing_instruction(self, target, data):
jpayne@69 385 self._cont_handler.processingInstruction(target, data)
jpayne@69 386
jpayne@69 387 # this is not used (call directly to ContentHandler)
jpayne@69 388 def character_data(self, data):
jpayne@69 389 self._cont_handler.characters(data)
jpayne@69 390
jpayne@69 391 def start_namespace_decl(self, prefix, uri):
jpayne@69 392 self._cont_handler.startPrefixMapping(prefix, uri)
jpayne@69 393
jpayne@69 394 def end_namespace_decl(self, prefix):
jpayne@69 395 self._cont_handler.endPrefixMapping(prefix)
jpayne@69 396
jpayne@69 397 def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
jpayne@69 398 self._lex_handler_prop.startDTD(name, pubid, sysid)
jpayne@69 399
jpayne@69 400 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
jpayne@69 401 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
jpayne@69 402
jpayne@69 403 def notation_decl(self, name, base, sysid, pubid):
jpayne@69 404 self._dtd_handler.notationDecl(name, pubid, sysid)
jpayne@69 405
jpayne@69 406 def external_entity_ref(self, context, base, sysid, pubid):
jpayne@69 407 if not self._external_ges:
jpayne@69 408 return 1
jpayne@69 409
jpayne@69 410 source = self._ent_handler.resolveEntity(pubid, sysid)
jpayne@69 411 source = saxutils.prepare_input_source(source,
jpayne@69 412 self._source.getSystemId() or
jpayne@69 413 "")
jpayne@69 414
jpayne@69 415 self._entity_stack.append((self._parser, self._source))
jpayne@69 416 self._parser = self._parser.ExternalEntityParserCreate(context)
jpayne@69 417 self._source = source
jpayne@69 418
jpayne@69 419 try:
jpayne@69 420 xmlreader.IncrementalParser.parse(self, source)
jpayne@69 421 except:
jpayne@69 422 return 0 # FIXME: save error info here?
jpayne@69 423
jpayne@69 424 (self._parser, self._source) = self._entity_stack[-1]
jpayne@69 425 del self._entity_stack[-1]
jpayne@69 426 return 1
jpayne@69 427
jpayne@69 428 def skipped_entity_handler(self, name, is_pe):
jpayne@69 429 if is_pe:
jpayne@69 430 # The SAX spec requires to report skipped PEs with a '%'
jpayne@69 431 name = '%'+name
jpayne@69 432 self._cont_handler.skippedEntity(name)
jpayne@69 433
jpayne@69 434 # ---
jpayne@69 435
jpayne@69 436 def create_parser(*args, **kwargs):
jpayne@69 437 return ExpatParser(*args, **kwargs)
jpayne@69 438
jpayne@69 439 # ---
jpayne@69 440
jpayne@69 441 if __name__ == "__main__":
jpayne@69 442 import xml.sax.saxutils
jpayne@69 443 p = create_parser()
jpayne@69 444 p.setContentHandler(xml.sax.saxutils.XMLGenerator())
jpayne@69 445 p.setErrorHandler(xml.sax.ErrorHandler())
jpayne@69 446 p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")