jpayne@68: """An XML Reader is the SAX 2 name for an XML parser. XML Parsers jpayne@68: should be based on this code. """ jpayne@68: jpayne@68: from . import handler jpayne@68: jpayne@68: from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException jpayne@68: jpayne@68: jpayne@68: # ===== XMLREADER ===== jpayne@68: jpayne@68: class XMLReader: jpayne@68: """Interface for reading an XML document using callbacks. jpayne@68: jpayne@68: XMLReader is the interface that an XML parser's SAX2 driver must jpayne@68: implement. This interface allows an application to set and query jpayne@68: features and properties in the parser, to register event handlers jpayne@68: for document processing, and to initiate a document parse. jpayne@68: jpayne@68: All SAX interfaces are assumed to be synchronous: the parse jpayne@68: methods must not return until parsing is complete, and readers jpayne@68: must wait for an event-handler callback to return before reporting jpayne@68: the next event.""" jpayne@68: jpayne@68: def __init__(self): jpayne@68: self._cont_handler = handler.ContentHandler() jpayne@68: self._dtd_handler = handler.DTDHandler() jpayne@68: self._ent_handler = handler.EntityResolver() jpayne@68: self._err_handler = handler.ErrorHandler() jpayne@68: jpayne@68: def parse(self, source): jpayne@68: "Parse an XML document from a system identifier or an InputSource." jpayne@68: raise NotImplementedError("This method must be implemented!") jpayne@68: jpayne@68: def getContentHandler(self): jpayne@68: "Returns the current ContentHandler." jpayne@68: return self._cont_handler jpayne@68: jpayne@68: def setContentHandler(self, handler): jpayne@68: "Registers a new object to receive document content events." jpayne@68: self._cont_handler = handler jpayne@68: jpayne@68: def getDTDHandler(self): jpayne@68: "Returns the current DTD handler." jpayne@68: return self._dtd_handler jpayne@68: jpayne@68: def setDTDHandler(self, handler): jpayne@68: "Register an object to receive basic DTD-related events." jpayne@68: self._dtd_handler = handler jpayne@68: jpayne@68: def getEntityResolver(self): jpayne@68: "Returns the current EntityResolver." jpayne@68: return self._ent_handler jpayne@68: jpayne@68: def setEntityResolver(self, resolver): jpayne@68: "Register an object to resolve external entities." jpayne@68: self._ent_handler = resolver jpayne@68: jpayne@68: def getErrorHandler(self): jpayne@68: "Returns the current ErrorHandler." jpayne@68: return self._err_handler jpayne@68: jpayne@68: def setErrorHandler(self, handler): jpayne@68: "Register an object to receive error-message events." jpayne@68: self._err_handler = handler jpayne@68: jpayne@68: def setLocale(self, locale): jpayne@68: """Allow an application to set the locale for errors and warnings. jpayne@68: jpayne@68: SAX parsers are not required to provide localization for errors jpayne@68: and warnings; if they cannot support the requested locale, jpayne@68: however, they must raise a SAX exception. Applications may jpayne@68: request a locale change in the middle of a parse.""" jpayne@68: raise SAXNotSupportedException("Locale support not implemented") jpayne@68: jpayne@68: def getFeature(self, name): jpayne@68: "Looks up and returns the state of a SAX2 feature." jpayne@68: raise SAXNotRecognizedException("Feature '%s' not recognized" % name) jpayne@68: jpayne@68: def setFeature(self, name, state): jpayne@68: "Sets the state of a SAX2 feature." jpayne@68: raise SAXNotRecognizedException("Feature '%s' not recognized" % name) jpayne@68: jpayne@68: def getProperty(self, name): jpayne@68: "Looks up and returns the value of a SAX2 property." jpayne@68: raise SAXNotRecognizedException("Property '%s' not recognized" % name) jpayne@68: jpayne@68: def setProperty(self, name, value): jpayne@68: "Sets the value of a SAX2 property." jpayne@68: raise SAXNotRecognizedException("Property '%s' not recognized" % name) jpayne@68: jpayne@68: class IncrementalParser(XMLReader): jpayne@68: """This interface adds three extra methods to the XMLReader jpayne@68: interface that allow XML parsers to support incremental jpayne@68: parsing. Support for this interface is optional, since not all jpayne@68: underlying XML parsers support this functionality. jpayne@68: jpayne@68: When the parser is instantiated it is ready to begin accepting jpayne@68: data from the feed method immediately. After parsing has been jpayne@68: finished with a call to close the reset method must be called to jpayne@68: make the parser ready to accept new data, either from feed or jpayne@68: using the parse method. jpayne@68: jpayne@68: Note that these methods must _not_ be called during parsing, that jpayne@68: is, after parse has been called and before it returns. jpayne@68: jpayne@68: By default, the class also implements the parse method of the XMLReader jpayne@68: interface using the feed, close and reset methods of the jpayne@68: IncrementalParser interface as a convenience to SAX 2.0 driver jpayne@68: writers.""" jpayne@68: jpayne@68: def __init__(self, bufsize=2**16): jpayne@68: self._bufsize = bufsize jpayne@68: XMLReader.__init__(self) jpayne@68: jpayne@68: def parse(self, source): jpayne@68: from . import saxutils jpayne@68: source = saxutils.prepare_input_source(source) jpayne@68: jpayne@68: self.prepareParser(source) jpayne@68: file = source.getCharacterStream() jpayne@68: if file is None: jpayne@68: file = source.getByteStream() jpayne@68: buffer = file.read(self._bufsize) jpayne@68: while buffer: jpayne@68: self.feed(buffer) jpayne@68: buffer = file.read(self._bufsize) jpayne@68: self.close() jpayne@68: jpayne@68: def feed(self, data): jpayne@68: """This method gives the raw XML data in the data parameter to jpayne@68: the parser and makes it parse the data, emitting the jpayne@68: corresponding events. It is allowed for XML constructs to be jpayne@68: split across several calls to feed. jpayne@68: jpayne@68: feed may raise SAXException.""" jpayne@68: raise NotImplementedError("This method must be implemented!") jpayne@68: jpayne@68: def prepareParser(self, source): jpayne@68: """This method is called by the parse implementation to allow jpayne@68: the SAX 2.0 driver to prepare itself for parsing.""" jpayne@68: raise NotImplementedError("prepareParser must be overridden!") jpayne@68: jpayne@68: def close(self): jpayne@68: """This method is called when the entire XML document has been jpayne@68: passed to the parser through the feed method, to notify the jpayne@68: parser that there are no more data. This allows the parser to jpayne@68: do the final checks on the document and empty the internal jpayne@68: data buffer. jpayne@68: jpayne@68: The parser will not be ready to parse another document until jpayne@68: the reset method has been called. jpayne@68: jpayne@68: close may raise SAXException.""" jpayne@68: raise NotImplementedError("This method must be implemented!") jpayne@68: jpayne@68: def reset(self): jpayne@68: """This method is called after close has been called to reset jpayne@68: the parser so that it is ready to parse new documents. The jpayne@68: results of calling parse or feed after close without calling jpayne@68: reset are undefined.""" jpayne@68: raise NotImplementedError("This method must be implemented!") jpayne@68: jpayne@68: # ===== LOCATOR ===== jpayne@68: jpayne@68: class Locator: jpayne@68: """Interface for associating a SAX event with a document jpayne@68: location. A locator object will return valid results only during jpayne@68: calls to DocumentHandler methods; at any other time, the jpayne@68: results are unpredictable.""" jpayne@68: jpayne@68: def getColumnNumber(self): jpayne@68: "Return the column number where the current event ends." jpayne@68: return -1 jpayne@68: jpayne@68: def getLineNumber(self): jpayne@68: "Return the line number where the current event ends." jpayne@68: return -1 jpayne@68: jpayne@68: def getPublicId(self): jpayne@68: "Return the public identifier for the current event." jpayne@68: return None jpayne@68: jpayne@68: def getSystemId(self): jpayne@68: "Return the system identifier for the current event." jpayne@68: return None jpayne@68: jpayne@68: # ===== INPUTSOURCE ===== jpayne@68: jpayne@68: class InputSource: jpayne@68: """Encapsulation of the information needed by the XMLReader to jpayne@68: read entities. jpayne@68: jpayne@68: This class may include information about the public identifier, jpayne@68: system identifier, byte stream (possibly with character encoding jpayne@68: information) and/or the character stream of an entity. jpayne@68: jpayne@68: Applications will create objects of this class for use in the jpayne@68: XMLReader.parse method and for returning from jpayne@68: EntityResolver.resolveEntity. jpayne@68: jpayne@68: An InputSource belongs to the application, the XMLReader is not jpayne@68: allowed to modify InputSource objects passed to it from the jpayne@68: application, although it may make copies and modify those.""" jpayne@68: jpayne@68: def __init__(self, system_id = None): jpayne@68: self.__system_id = system_id jpayne@68: self.__public_id = None jpayne@68: self.__encoding = None jpayne@68: self.__bytefile = None jpayne@68: self.__charfile = None jpayne@68: jpayne@68: def setPublicId(self, public_id): jpayne@68: "Sets the public identifier of this InputSource." jpayne@68: self.__public_id = public_id jpayne@68: jpayne@68: def getPublicId(self): jpayne@68: "Returns the public identifier of this InputSource." jpayne@68: return self.__public_id jpayne@68: jpayne@68: def setSystemId(self, system_id): jpayne@68: "Sets the system identifier of this InputSource." jpayne@68: self.__system_id = system_id jpayne@68: jpayne@68: def getSystemId(self): jpayne@68: "Returns the system identifier of this InputSource." jpayne@68: return self.__system_id jpayne@68: jpayne@68: def setEncoding(self, encoding): jpayne@68: """Sets the character encoding of this InputSource. jpayne@68: jpayne@68: The encoding must be a string acceptable for an XML encoding jpayne@68: declaration (see section 4.3.3 of the XML recommendation). jpayne@68: jpayne@68: The encoding attribute of the InputSource is ignored if the jpayne@68: InputSource also contains a character stream.""" jpayne@68: self.__encoding = encoding jpayne@68: jpayne@68: def getEncoding(self): jpayne@68: "Get the character encoding of this InputSource." jpayne@68: return self.__encoding jpayne@68: jpayne@68: def setByteStream(self, bytefile): jpayne@68: """Set the byte stream (a Python file-like object which does jpayne@68: not perform byte-to-character conversion) for this input jpayne@68: source. jpayne@68: jpayne@68: The SAX parser will ignore this if there is also a character jpayne@68: stream specified, but it will use a byte stream in preference jpayne@68: to opening a URI connection itself. jpayne@68: jpayne@68: If the application knows the character encoding of the byte jpayne@68: stream, it should set it with the setEncoding method.""" jpayne@68: self.__bytefile = bytefile jpayne@68: jpayne@68: def getByteStream(self): jpayne@68: """Get the byte stream for this input source. jpayne@68: jpayne@68: The getEncoding method will return the character encoding for jpayne@68: this byte stream, or None if unknown.""" jpayne@68: return self.__bytefile jpayne@68: jpayne@68: def setCharacterStream(self, charfile): jpayne@68: """Set the character stream for this input source. (The stream jpayne@68: must be a Python 2.0 Unicode-wrapped file-like that performs jpayne@68: conversion to Unicode strings.) jpayne@68: jpayne@68: If there is a character stream specified, the SAX parser will jpayne@68: ignore any byte stream and will not attempt to open a URI jpayne@68: connection to the system identifier.""" jpayne@68: self.__charfile = charfile jpayne@68: jpayne@68: def getCharacterStream(self): jpayne@68: "Get the character stream for this input source." jpayne@68: return self.__charfile jpayne@68: jpayne@68: # ===== ATTRIBUTESIMPL ===== jpayne@68: jpayne@68: class AttributesImpl: jpayne@68: jpayne@68: def __init__(self, attrs): jpayne@68: """Non-NS-aware implementation. jpayne@68: jpayne@68: attrs should be of the form {name : value}.""" jpayne@68: self._attrs = attrs jpayne@68: jpayne@68: def getLength(self): jpayne@68: return len(self._attrs) jpayne@68: jpayne@68: def getType(self, name): jpayne@68: return "CDATA" jpayne@68: jpayne@68: def getValue(self, name): jpayne@68: return self._attrs[name] jpayne@68: jpayne@68: def getValueByQName(self, name): jpayne@68: return self._attrs[name] jpayne@68: jpayne@68: def getNameByQName(self, name): jpayne@68: if name not in self._attrs: jpayne@68: raise KeyError(name) jpayne@68: return name jpayne@68: jpayne@68: def getQNameByName(self, name): jpayne@68: if name not in self._attrs: jpayne@68: raise KeyError(name) jpayne@68: return name jpayne@68: jpayne@68: def getNames(self): jpayne@68: return list(self._attrs.keys()) jpayne@68: jpayne@68: def getQNames(self): jpayne@68: return list(self._attrs.keys()) jpayne@68: jpayne@68: def __len__(self): jpayne@68: return len(self._attrs) jpayne@68: jpayne@68: def __getitem__(self, name): jpayne@68: return self._attrs[name] jpayne@68: jpayne@68: def keys(self): jpayne@68: return list(self._attrs.keys()) jpayne@68: jpayne@68: def __contains__(self, name): jpayne@68: return name in self._attrs jpayne@68: jpayne@68: def get(self, name, alternative=None): jpayne@68: return self._attrs.get(name, alternative) jpayne@68: jpayne@68: def copy(self): jpayne@68: return self.__class__(self._attrs) jpayne@68: jpayne@68: def items(self): jpayne@68: return list(self._attrs.items()) jpayne@68: jpayne@68: def values(self): jpayne@68: return list(self._attrs.values()) jpayne@68: jpayne@68: # ===== ATTRIBUTESNSIMPL ===== jpayne@68: jpayne@68: class AttributesNSImpl(AttributesImpl): jpayne@68: jpayne@68: def __init__(self, attrs, qnames): jpayne@68: """NS-aware implementation. jpayne@68: jpayne@68: attrs should be of the form {(ns_uri, lname): value, ...}. jpayne@68: qnames of the form {(ns_uri, lname): qname, ...}.""" jpayne@68: self._attrs = attrs jpayne@68: self._qnames = qnames jpayne@68: jpayne@68: def getValueByQName(self, name): jpayne@68: for (nsname, qname) in self._qnames.items(): jpayne@68: if qname == name: jpayne@68: return self._attrs[nsname] jpayne@68: jpayne@68: raise KeyError(name) jpayne@68: jpayne@68: def getNameByQName(self, name): jpayne@68: for (nsname, qname) in self._qnames.items(): jpayne@68: if qname == name: jpayne@68: return nsname jpayne@68: jpayne@68: raise KeyError(name) jpayne@68: jpayne@68: def getQNameByName(self, name): jpayne@68: return self._qnames[name] jpayne@68: jpayne@68: def getQNames(self): jpayne@68: return list(self._qnames.values()) jpayne@68: jpayne@68: def copy(self): jpayne@68: return self.__class__(self._attrs, self._qnames) jpayne@68: jpayne@68: jpayne@68: def _test(): jpayne@68: XMLReader() jpayne@68: IncrementalParser() jpayne@68: Locator() jpayne@68: jpayne@68: if __name__ == "__main__": jpayne@68: _test()