annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/sax/expatreader.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 """
jpayne@68 2 SAX driver for the pyexpat C module. This driver works with
jpayne@68 3 pyexpat.__version__ == '2.22'.
jpayne@68 4 """
jpayne@68 5
jpayne@68 6 version = "0.20"
jpayne@68 7
jpayne@68 8 from xml.sax._exceptions import *
jpayne@68 9 from xml.sax.handler import feature_validation, feature_namespaces
jpayne@68 10 from xml.sax.handler import feature_namespace_prefixes
jpayne@68 11 from xml.sax.handler import feature_external_ges, feature_external_pes
jpayne@68 12 from xml.sax.handler import feature_string_interning
jpayne@68 13 from xml.sax.handler import property_xml_string, property_interning_dict
jpayne@68 14
jpayne@68 15 # xml.parsers.expat does not raise ImportError in Jython
jpayne@68 16 import sys
jpayne@68 17 if sys.platform[:4] == "java":
jpayne@68 18 raise SAXReaderNotAvailable("expat not available in Java", None)
jpayne@68 19 del sys
jpayne@68 20
jpayne@68 21 try:
jpayne@68 22 from xml.parsers import expat
jpayne@68 23 except ImportError:
jpayne@68 24 raise SAXReaderNotAvailable("expat not supported", None)
jpayne@68 25 else:
jpayne@68 26 if not hasattr(expat, "ParserCreate"):
jpayne@68 27 raise SAXReaderNotAvailable("expat not supported", None)
jpayne@68 28 from xml.sax import xmlreader, saxutils, handler
jpayne@68 29
jpayne@68 30 AttributesImpl = xmlreader.AttributesImpl
jpayne@68 31 AttributesNSImpl = xmlreader.AttributesNSImpl
jpayne@68 32
jpayne@68 33 # If we're using a sufficiently recent version of Python, we can use
jpayne@68 34 # weak references to avoid cycles between the parser and content
jpayne@68 35 # handler, otherwise we'll just have to pretend.
jpayne@68 36 try:
jpayne@68 37 import _weakref
jpayne@68 38 except ImportError:
jpayne@68 39 def _mkproxy(o):
jpayne@68 40 return o
jpayne@68 41 else:
jpayne@68 42 import weakref
jpayne@68 43 _mkproxy = weakref.proxy
jpayne@68 44 del weakref, _weakref
jpayne@68 45
jpayne@68 46 class _ClosedParser:
jpayne@68 47 pass
jpayne@68 48
jpayne@68 49 # --- ExpatLocator
jpayne@68 50
jpayne@68 51 class ExpatLocator(xmlreader.Locator):
jpayne@68 52 """Locator for use with the ExpatParser class.
jpayne@68 53
jpayne@68 54 This uses a weak reference to the parser object to avoid creating
jpayne@68 55 a circular reference between the parser and the content handler.
jpayne@68 56 """
jpayne@68 57 def __init__(self, parser):
jpayne@68 58 self._ref = _mkproxy(parser)
jpayne@68 59
jpayne@68 60 def getColumnNumber(self):
jpayne@68 61 parser = self._ref
jpayne@68 62 if parser._parser is None:
jpayne@68 63 return None
jpayne@68 64 return parser._parser.ErrorColumnNumber
jpayne@68 65
jpayne@68 66 def getLineNumber(self):
jpayne@68 67 parser = self._ref
jpayne@68 68 if parser._parser is None:
jpayne@68 69 return 1
jpayne@68 70 return parser._parser.ErrorLineNumber
jpayne@68 71
jpayne@68 72 def getPublicId(self):
jpayne@68 73 parser = self._ref
jpayne@68 74 if parser is None:
jpayne@68 75 return None
jpayne@68 76 return parser._source.getPublicId()
jpayne@68 77
jpayne@68 78 def getSystemId(self):
jpayne@68 79 parser = self._ref
jpayne@68 80 if parser is None:
jpayne@68 81 return None
jpayne@68 82 return parser._source.getSystemId()
jpayne@68 83
jpayne@68 84
jpayne@68 85 # --- ExpatParser
jpayne@68 86
jpayne@68 87 class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
jpayne@68 88 """SAX driver for the pyexpat C module."""
jpayne@68 89
jpayne@68 90 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
jpayne@68 91 xmlreader.IncrementalParser.__init__(self, bufsize)
jpayne@68 92 self._source = xmlreader.InputSource()
jpayne@68 93 self._parser = None
jpayne@68 94 self._namespaces = namespaceHandling
jpayne@68 95 self._lex_handler_prop = None
jpayne@68 96 self._parsing = 0
jpayne@68 97 self._entity_stack = []
jpayne@68 98 self._external_ges = 0
jpayne@68 99 self._interning = None
jpayne@68 100
jpayne@68 101 # XMLReader methods
jpayne@68 102
jpayne@68 103 def parse(self, source):
jpayne@68 104 "Parse an XML document from a URL or an InputSource."
jpayne@68 105 source = saxutils.prepare_input_source(source)
jpayne@68 106
jpayne@68 107 self._source = source
jpayne@68 108 try:
jpayne@68 109 self.reset()
jpayne@68 110 self._cont_handler.setDocumentLocator(ExpatLocator(self))
jpayne@68 111 xmlreader.IncrementalParser.parse(self, source)
jpayne@68 112 except:
jpayne@68 113 # bpo-30264: Close the source on error to not leak resources:
jpayne@68 114 # xml.sax.parse() doesn't give access to the underlying parser
jpayne@68 115 # to the caller
jpayne@68 116 self._close_source()
jpayne@68 117 raise
jpayne@68 118
jpayne@68 119 def prepareParser(self, source):
jpayne@68 120 if source.getSystemId() is not None:
jpayne@68 121 self._parser.SetBase(source.getSystemId())
jpayne@68 122
jpayne@68 123 # Redefined setContentHandler to allow changing handlers during parsing
jpayne@68 124
jpayne@68 125 def setContentHandler(self, handler):
jpayne@68 126 xmlreader.IncrementalParser.setContentHandler(self, handler)
jpayne@68 127 if self._parsing:
jpayne@68 128 self._reset_cont_handler()
jpayne@68 129
jpayne@68 130 def getFeature(self, name):
jpayne@68 131 if name == feature_namespaces:
jpayne@68 132 return self._namespaces
jpayne@68 133 elif name == feature_string_interning:
jpayne@68 134 return self._interning is not None
jpayne@68 135 elif name in (feature_validation, feature_external_pes,
jpayne@68 136 feature_namespace_prefixes):
jpayne@68 137 return 0
jpayne@68 138 elif name == feature_external_ges:
jpayne@68 139 return self._external_ges
jpayne@68 140 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
jpayne@68 141
jpayne@68 142 def setFeature(self, name, state):
jpayne@68 143 if self._parsing:
jpayne@68 144 raise SAXNotSupportedException("Cannot set features while parsing")
jpayne@68 145
jpayne@68 146 if name == feature_namespaces:
jpayne@68 147 self._namespaces = state
jpayne@68 148 elif name == feature_external_ges:
jpayne@68 149 self._external_ges = state
jpayne@68 150 elif name == feature_string_interning:
jpayne@68 151 if state:
jpayne@68 152 if self._interning is None:
jpayne@68 153 self._interning = {}
jpayne@68 154 else:
jpayne@68 155 self._interning = None
jpayne@68 156 elif name == feature_validation:
jpayne@68 157 if state:
jpayne@68 158 raise SAXNotSupportedException(
jpayne@68 159 "expat does not support validation")
jpayne@68 160 elif name == feature_external_pes:
jpayne@68 161 if state:
jpayne@68 162 raise SAXNotSupportedException(
jpayne@68 163 "expat does not read external parameter entities")
jpayne@68 164 elif name == feature_namespace_prefixes:
jpayne@68 165 if state:
jpayne@68 166 raise SAXNotSupportedException(
jpayne@68 167 "expat does not report namespace prefixes")
jpayne@68 168 else:
jpayne@68 169 raise SAXNotRecognizedException(
jpayne@68 170 "Feature '%s' not recognized" % name)
jpayne@68 171
jpayne@68 172 def getProperty(self, name):
jpayne@68 173 if name == handler.property_lexical_handler:
jpayne@68 174 return self._lex_handler_prop
jpayne@68 175 elif name == property_interning_dict:
jpayne@68 176 return self._interning
jpayne@68 177 elif name == property_xml_string:
jpayne@68 178 if self._parser:
jpayne@68 179 if hasattr(self._parser, "GetInputContext"):
jpayne@68 180 return self._parser.GetInputContext()
jpayne@68 181 else:
jpayne@68 182 raise SAXNotRecognizedException(
jpayne@68 183 "This version of expat does not support getting"
jpayne@68 184 " the XML string")
jpayne@68 185 else:
jpayne@68 186 raise SAXNotSupportedException(
jpayne@68 187 "XML string cannot be returned when not parsing")
jpayne@68 188 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
jpayne@68 189
jpayne@68 190 def setProperty(self, name, value):
jpayne@68 191 if name == handler.property_lexical_handler:
jpayne@68 192 self._lex_handler_prop = value
jpayne@68 193 if self._parsing:
jpayne@68 194 self._reset_lex_handler_prop()
jpayne@68 195 elif name == property_interning_dict:
jpayne@68 196 self._interning = value
jpayne@68 197 elif name == property_xml_string:
jpayne@68 198 raise SAXNotSupportedException("Property '%s' cannot be set" %
jpayne@68 199 name)
jpayne@68 200 else:
jpayne@68 201 raise SAXNotRecognizedException("Property '%s' not recognized" %
jpayne@68 202 name)
jpayne@68 203
jpayne@68 204 # IncrementalParser methods
jpayne@68 205
jpayne@68 206 def feed(self, data, isFinal = 0):
jpayne@68 207 if not self._parsing:
jpayne@68 208 self.reset()
jpayne@68 209 self._parsing = 1
jpayne@68 210 self._cont_handler.startDocument()
jpayne@68 211
jpayne@68 212 try:
jpayne@68 213 # The isFinal parameter is internal to the expat reader.
jpayne@68 214 # If it is set to true, expat will check validity of the entire
jpayne@68 215 # document. When feeding chunks, they are not normally final -
jpayne@68 216 # except when invoked from close.
jpayne@68 217 self._parser.Parse(data, isFinal)
jpayne@68 218 except expat.error as e:
jpayne@68 219 exc = SAXParseException(expat.ErrorString(e.code), e, self)
jpayne@68 220 # FIXME: when to invoke error()?
jpayne@68 221 self._err_handler.fatalError(exc)
jpayne@68 222
jpayne@68 223 def _close_source(self):
jpayne@68 224 source = self._source
jpayne@68 225 try:
jpayne@68 226 file = source.getCharacterStream()
jpayne@68 227 if file is not None:
jpayne@68 228 file.close()
jpayne@68 229 finally:
jpayne@68 230 file = source.getByteStream()
jpayne@68 231 if file is not None:
jpayne@68 232 file.close()
jpayne@68 233
jpayne@68 234 def close(self):
jpayne@68 235 if (self._entity_stack or self._parser is None or
jpayne@68 236 isinstance(self._parser, _ClosedParser)):
jpayne@68 237 # If we are completing an external entity, do nothing here
jpayne@68 238 return
jpayne@68 239 try:
jpayne@68 240 self.feed("", isFinal = 1)
jpayne@68 241 self._cont_handler.endDocument()
jpayne@68 242 self._parsing = 0
jpayne@68 243 # break cycle created by expat handlers pointing to our methods
jpayne@68 244 self._parser = None
jpayne@68 245 finally:
jpayne@68 246 self._parsing = 0
jpayne@68 247 if self._parser is not None:
jpayne@68 248 # Keep ErrorColumnNumber and ErrorLineNumber after closing.
jpayne@68 249 parser = _ClosedParser()
jpayne@68 250 parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
jpayne@68 251 parser.ErrorLineNumber = self._parser.ErrorLineNumber
jpayne@68 252 self._parser = parser
jpayne@68 253 self._close_source()
jpayne@68 254
jpayne@68 255 def _reset_cont_handler(self):
jpayne@68 256 self._parser.ProcessingInstructionHandler = \
jpayne@68 257 self._cont_handler.processingInstruction
jpayne@68 258 self._parser.CharacterDataHandler = self._cont_handler.characters
jpayne@68 259
jpayne@68 260 def _reset_lex_handler_prop(self):
jpayne@68 261 lex = self._lex_handler_prop
jpayne@68 262 parser = self._parser
jpayne@68 263 if lex is None:
jpayne@68 264 parser.CommentHandler = None
jpayne@68 265 parser.StartCdataSectionHandler = None
jpayne@68 266 parser.EndCdataSectionHandler = None
jpayne@68 267 parser.StartDoctypeDeclHandler = None
jpayne@68 268 parser.EndDoctypeDeclHandler = None
jpayne@68 269 else:
jpayne@68 270 parser.CommentHandler = lex.comment
jpayne@68 271 parser.StartCdataSectionHandler = lex.startCDATA
jpayne@68 272 parser.EndCdataSectionHandler = lex.endCDATA
jpayne@68 273 parser.StartDoctypeDeclHandler = self.start_doctype_decl
jpayne@68 274 parser.EndDoctypeDeclHandler = lex.endDTD
jpayne@68 275
jpayne@68 276 def reset(self):
jpayne@68 277 if self._namespaces:
jpayne@68 278 self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
jpayne@68 279 intern=self._interning)
jpayne@68 280 self._parser.namespace_prefixes = 1
jpayne@68 281 self._parser.StartElementHandler = self.start_element_ns
jpayne@68 282 self._parser.EndElementHandler = self.end_element_ns
jpayne@68 283 else:
jpayne@68 284 self._parser = expat.ParserCreate(self._source.getEncoding(),
jpayne@68 285 intern = self._interning)
jpayne@68 286 self._parser.StartElementHandler = self.start_element
jpayne@68 287 self._parser.EndElementHandler = self.end_element
jpayne@68 288
jpayne@68 289 self._reset_cont_handler()
jpayne@68 290 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
jpayne@68 291 self._parser.NotationDeclHandler = self.notation_decl
jpayne@68 292 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
jpayne@68 293 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
jpayne@68 294
jpayne@68 295 self._decl_handler_prop = None
jpayne@68 296 if self._lex_handler_prop:
jpayne@68 297 self._reset_lex_handler_prop()
jpayne@68 298 # self._parser.DefaultHandler =
jpayne@68 299 # self._parser.DefaultHandlerExpand =
jpayne@68 300 # self._parser.NotStandaloneHandler =
jpayne@68 301 self._parser.ExternalEntityRefHandler = self.external_entity_ref
jpayne@68 302 try:
jpayne@68 303 self._parser.SkippedEntityHandler = self.skipped_entity_handler
jpayne@68 304 except AttributeError:
jpayne@68 305 # This pyexpat does not support SkippedEntity
jpayne@68 306 pass
jpayne@68 307 self._parser.SetParamEntityParsing(
jpayne@68 308 expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
jpayne@68 309
jpayne@68 310 self._parsing = 0
jpayne@68 311 self._entity_stack = []
jpayne@68 312
jpayne@68 313 # Locator methods
jpayne@68 314
jpayne@68 315 def getColumnNumber(self):
jpayne@68 316 if self._parser is None:
jpayne@68 317 return None
jpayne@68 318 return self._parser.ErrorColumnNumber
jpayne@68 319
jpayne@68 320 def getLineNumber(self):
jpayne@68 321 if self._parser is None:
jpayne@68 322 return 1
jpayne@68 323 return self._parser.ErrorLineNumber
jpayne@68 324
jpayne@68 325 def getPublicId(self):
jpayne@68 326 return self._source.getPublicId()
jpayne@68 327
jpayne@68 328 def getSystemId(self):
jpayne@68 329 return self._source.getSystemId()
jpayne@68 330
jpayne@68 331 # event handlers
jpayne@68 332 def start_element(self, name, attrs):
jpayne@68 333 self._cont_handler.startElement(name, AttributesImpl(attrs))
jpayne@68 334
jpayne@68 335 def end_element(self, name):
jpayne@68 336 self._cont_handler.endElement(name)
jpayne@68 337
jpayne@68 338 def start_element_ns(self, name, attrs):
jpayne@68 339 pair = name.split()
jpayne@68 340 if len(pair) == 1:
jpayne@68 341 # no namespace
jpayne@68 342 pair = (None, name)
jpayne@68 343 elif len(pair) == 3:
jpayne@68 344 pair = pair[0], pair[1]
jpayne@68 345 else:
jpayne@68 346 # default namespace
jpayne@68 347 pair = tuple(pair)
jpayne@68 348
jpayne@68 349 newattrs = {}
jpayne@68 350 qnames = {}
jpayne@68 351 for (aname, value) in attrs.items():
jpayne@68 352 parts = aname.split()
jpayne@68 353 length = len(parts)
jpayne@68 354 if length == 1:
jpayne@68 355 # no namespace
jpayne@68 356 qname = aname
jpayne@68 357 apair = (None, aname)
jpayne@68 358 elif length == 3:
jpayne@68 359 qname = "%s:%s" % (parts[2], parts[1])
jpayne@68 360 apair = parts[0], parts[1]
jpayne@68 361 else:
jpayne@68 362 # default namespace
jpayne@68 363 qname = parts[1]
jpayne@68 364 apair = tuple(parts)
jpayne@68 365
jpayne@68 366 newattrs[apair] = value
jpayne@68 367 qnames[apair] = qname
jpayne@68 368
jpayne@68 369 self._cont_handler.startElementNS(pair, None,
jpayne@68 370 AttributesNSImpl(newattrs, qnames))
jpayne@68 371
jpayne@68 372 def end_element_ns(self, name):
jpayne@68 373 pair = name.split()
jpayne@68 374 if len(pair) == 1:
jpayne@68 375 pair = (None, name)
jpayne@68 376 elif len(pair) == 3:
jpayne@68 377 pair = pair[0], pair[1]
jpayne@68 378 else:
jpayne@68 379 pair = tuple(pair)
jpayne@68 380
jpayne@68 381 self._cont_handler.endElementNS(pair, None)
jpayne@68 382
jpayne@68 383 # this is not used (call directly to ContentHandler)
jpayne@68 384 def processing_instruction(self, target, data):
jpayne@68 385 self._cont_handler.processingInstruction(target, data)
jpayne@68 386
jpayne@68 387 # this is not used (call directly to ContentHandler)
jpayne@68 388 def character_data(self, data):
jpayne@68 389 self._cont_handler.characters(data)
jpayne@68 390
jpayne@68 391 def start_namespace_decl(self, prefix, uri):
jpayne@68 392 self._cont_handler.startPrefixMapping(prefix, uri)
jpayne@68 393
jpayne@68 394 def end_namespace_decl(self, prefix):
jpayne@68 395 self._cont_handler.endPrefixMapping(prefix)
jpayne@68 396
jpayne@68 397 def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
jpayne@68 398 self._lex_handler_prop.startDTD(name, pubid, sysid)
jpayne@68 399
jpayne@68 400 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
jpayne@68 401 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
jpayne@68 402
jpayne@68 403 def notation_decl(self, name, base, sysid, pubid):
jpayne@68 404 self._dtd_handler.notationDecl(name, pubid, sysid)
jpayne@68 405
jpayne@68 406 def external_entity_ref(self, context, base, sysid, pubid):
jpayne@68 407 if not self._external_ges:
jpayne@68 408 return 1
jpayne@68 409
jpayne@68 410 source = self._ent_handler.resolveEntity(pubid, sysid)
jpayne@68 411 source = saxutils.prepare_input_source(source,
jpayne@68 412 self._source.getSystemId() or
jpayne@68 413 "")
jpayne@68 414
jpayne@68 415 self._entity_stack.append((self._parser, self._source))
jpayne@68 416 self._parser = self._parser.ExternalEntityParserCreate(context)
jpayne@68 417 self._source = source
jpayne@68 418
jpayne@68 419 try:
jpayne@68 420 xmlreader.IncrementalParser.parse(self, source)
jpayne@68 421 except:
jpayne@68 422 return 0 # FIXME: save error info here?
jpayne@68 423
jpayne@68 424 (self._parser, self._source) = self._entity_stack[-1]
jpayne@68 425 del self._entity_stack[-1]
jpayne@68 426 return 1
jpayne@68 427
jpayne@68 428 def skipped_entity_handler(self, name, is_pe):
jpayne@68 429 if is_pe:
jpayne@68 430 # The SAX spec requires to report skipped PEs with a '%'
jpayne@68 431 name = '%'+name
jpayne@68 432 self._cont_handler.skippedEntity(name)
jpayne@68 433
jpayne@68 434 # ---
jpayne@68 435
jpayne@68 436 def create_parser(*args, **kwargs):
jpayne@68 437 return ExpatParser(*args, **kwargs)
jpayne@68 438
jpayne@68 439 # ---
jpayne@68 440
jpayne@68 441 if __name__ == "__main__":
jpayne@68 442 import xml.sax.saxutils
jpayne@68 443 p = create_parser()
jpayne@68 444 p.setContentHandler(xml.sax.saxutils.XMLGenerator())
jpayne@68 445 p.setErrorHandler(xml.sax.ErrorHandler())
jpayne@68 446 p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")