jpayne@69: """An XML Reader is the SAX 2 name for an XML parser. XML Parsers jpayne@69: should be based on this code. """ jpayne@69: jpayne@69: from . import handler jpayne@69: jpayne@69: from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException jpayne@69: jpayne@69: jpayne@69: # ===== XMLREADER ===== jpayne@69: jpayne@69: class XMLReader: jpayne@69: """Interface for reading an XML document using callbacks. jpayne@69: jpayne@69: XMLReader is the interface that an XML parser's SAX2 driver must jpayne@69: implement. This interface allows an application to set and query jpayne@69: features and properties in the parser, to register event handlers jpayne@69: for document processing, and to initiate a document parse. jpayne@69: jpayne@69: All SAX interfaces are assumed to be synchronous: the parse jpayne@69: methods must not return until parsing is complete, and readers jpayne@69: must wait for an event-handler callback to return before reporting jpayne@69: the next event.""" jpayne@69: jpayne@69: def __init__(self): jpayne@69: self._cont_handler = handler.ContentHandler() jpayne@69: self._dtd_handler = handler.DTDHandler() jpayne@69: self._ent_handler = handler.EntityResolver() jpayne@69: self._err_handler = handler.ErrorHandler() jpayne@69: jpayne@69: def parse(self, source): jpayne@69: "Parse an XML document from a system identifier or an InputSource." jpayne@69: raise NotImplementedError("This method must be implemented!") jpayne@69: jpayne@69: def getContentHandler(self): jpayne@69: "Returns the current ContentHandler." jpayne@69: return self._cont_handler jpayne@69: jpayne@69: def setContentHandler(self, handler): jpayne@69: "Registers a new object to receive document content events." jpayne@69: self._cont_handler = handler jpayne@69: jpayne@69: def getDTDHandler(self): jpayne@69: "Returns the current DTD handler." jpayne@69: return self._dtd_handler jpayne@69: jpayne@69: def setDTDHandler(self, handler): jpayne@69: "Register an object to receive basic DTD-related events." jpayne@69: self._dtd_handler = handler jpayne@69: jpayne@69: def getEntityResolver(self): jpayne@69: "Returns the current EntityResolver." jpayne@69: return self._ent_handler jpayne@69: jpayne@69: def setEntityResolver(self, resolver): jpayne@69: "Register an object to resolve external entities." jpayne@69: self._ent_handler = resolver jpayne@69: jpayne@69: def getErrorHandler(self): jpayne@69: "Returns the current ErrorHandler." jpayne@69: return self._err_handler jpayne@69: jpayne@69: def setErrorHandler(self, handler): jpayne@69: "Register an object to receive error-message events." jpayne@69: self._err_handler = handler jpayne@69: jpayne@69: def setLocale(self, locale): jpayne@69: """Allow an application to set the locale for errors and warnings. jpayne@69: jpayne@69: SAX parsers are not required to provide localization for errors jpayne@69: and warnings; if they cannot support the requested locale, jpayne@69: however, they must raise a SAX exception. Applications may jpayne@69: request a locale change in the middle of a parse.""" jpayne@69: raise SAXNotSupportedException("Locale support not implemented") jpayne@69: jpayne@69: def getFeature(self, name): jpayne@69: "Looks up and returns the state of a SAX2 feature." jpayne@69: raise SAXNotRecognizedException("Feature '%s' not recognized" % name) jpayne@69: jpayne@69: def setFeature(self, name, state): jpayne@69: "Sets the state of a SAX2 feature." jpayne@69: raise SAXNotRecognizedException("Feature '%s' not recognized" % name) jpayne@69: jpayne@69: def getProperty(self, name): jpayne@69: "Looks up and returns the value of a SAX2 property." jpayne@69: raise SAXNotRecognizedException("Property '%s' not recognized" % name) jpayne@69: jpayne@69: def setProperty(self, name, value): jpayne@69: "Sets the value of a SAX2 property." jpayne@69: raise SAXNotRecognizedException("Property '%s' not recognized" % name) jpayne@69: jpayne@69: class IncrementalParser(XMLReader): jpayne@69: """This interface adds three extra methods to the XMLReader jpayne@69: interface that allow XML parsers to support incremental jpayne@69: parsing. Support for this interface is optional, since not all jpayne@69: underlying XML parsers support this functionality. jpayne@69: jpayne@69: When the parser is instantiated it is ready to begin accepting jpayne@69: data from the feed method immediately. After parsing has been jpayne@69: finished with a call to close the reset method must be called to jpayne@69: make the parser ready to accept new data, either from feed or jpayne@69: using the parse method. jpayne@69: jpayne@69: Note that these methods must _not_ be called during parsing, that jpayne@69: is, after parse has been called and before it returns. jpayne@69: jpayne@69: By default, the class also implements the parse method of the XMLReader jpayne@69: interface using the feed, close and reset methods of the jpayne@69: IncrementalParser interface as a convenience to SAX 2.0 driver jpayne@69: writers.""" jpayne@69: jpayne@69: def __init__(self, bufsize=2**16): jpayne@69: self._bufsize = bufsize jpayne@69: XMLReader.__init__(self) jpayne@69: jpayne@69: def parse(self, source): jpayne@69: from . import saxutils jpayne@69: source = saxutils.prepare_input_source(source) jpayne@69: jpayne@69: self.prepareParser(source) jpayne@69: file = source.getCharacterStream() jpayne@69: if file is None: jpayne@69: file = source.getByteStream() jpayne@69: buffer = file.read(self._bufsize) jpayne@69: while buffer: jpayne@69: self.feed(buffer) jpayne@69: buffer = file.read(self._bufsize) jpayne@69: self.close() jpayne@69: jpayne@69: def feed(self, data): jpayne@69: """This method gives the raw XML data in the data parameter to jpayne@69: the parser and makes it parse the data, emitting the jpayne@69: corresponding events. It is allowed for XML constructs to be jpayne@69: split across several calls to feed. jpayne@69: jpayne@69: feed may raise SAXException.""" jpayne@69: raise NotImplementedError("This method must be implemented!") jpayne@69: jpayne@69: def prepareParser(self, source): jpayne@69: """This method is called by the parse implementation to allow jpayne@69: the SAX 2.0 driver to prepare itself for parsing.""" jpayne@69: raise NotImplementedError("prepareParser must be overridden!") jpayne@69: jpayne@69: def close(self): jpayne@69: """This method is called when the entire XML document has been jpayne@69: passed to the parser through the feed method, to notify the jpayne@69: parser that there are no more data. This allows the parser to jpayne@69: do the final checks on the document and empty the internal jpayne@69: data buffer. jpayne@69: jpayne@69: The parser will not be ready to parse another document until jpayne@69: the reset method has been called. jpayne@69: jpayne@69: close may raise SAXException.""" jpayne@69: raise NotImplementedError("This method must be implemented!") jpayne@69: jpayne@69: def reset(self): jpayne@69: """This method is called after close has been called to reset jpayne@69: the parser so that it is ready to parse new documents. The jpayne@69: results of calling parse or feed after close without calling jpayne@69: reset are undefined.""" jpayne@69: raise NotImplementedError("This method must be implemented!") jpayne@69: jpayne@69: # ===== LOCATOR ===== jpayne@69: jpayne@69: class Locator: jpayne@69: """Interface for associating a SAX event with a document jpayne@69: location. A locator object will return valid results only during jpayne@69: calls to DocumentHandler methods; at any other time, the jpayne@69: results are unpredictable.""" jpayne@69: jpayne@69: def getColumnNumber(self): jpayne@69: "Return the column number where the current event ends." jpayne@69: return -1 jpayne@69: jpayne@69: def getLineNumber(self): jpayne@69: "Return the line number where the current event ends." jpayne@69: return -1 jpayne@69: jpayne@69: def getPublicId(self): jpayne@69: "Return the public identifier for the current event." jpayne@69: return None jpayne@69: jpayne@69: def getSystemId(self): jpayne@69: "Return the system identifier for the current event." jpayne@69: return None jpayne@69: jpayne@69: # ===== INPUTSOURCE ===== jpayne@69: jpayne@69: class InputSource: jpayne@69: """Encapsulation of the information needed by the XMLReader to jpayne@69: read entities. jpayne@69: jpayne@69: This class may include information about the public identifier, jpayne@69: system identifier, byte stream (possibly with character encoding jpayne@69: information) and/or the character stream of an entity. jpayne@69: jpayne@69: Applications will create objects of this class for use in the jpayne@69: XMLReader.parse method and for returning from jpayne@69: EntityResolver.resolveEntity. jpayne@69: jpayne@69: An InputSource belongs to the application, the XMLReader is not jpayne@69: allowed to modify InputSource objects passed to it from the jpayne@69: application, although it may make copies and modify those.""" jpayne@69: jpayne@69: def __init__(self, system_id = None): jpayne@69: self.__system_id = system_id jpayne@69: self.__public_id = None jpayne@69: self.__encoding = None jpayne@69: self.__bytefile = None jpayne@69: self.__charfile = None jpayne@69: jpayne@69: def setPublicId(self, public_id): jpayne@69: "Sets the public identifier of this InputSource." jpayne@69: self.__public_id = public_id jpayne@69: jpayne@69: def getPublicId(self): jpayne@69: "Returns the public identifier of this InputSource." jpayne@69: return self.__public_id jpayne@69: jpayne@69: def setSystemId(self, system_id): jpayne@69: "Sets the system identifier of this InputSource." jpayne@69: self.__system_id = system_id jpayne@69: jpayne@69: def getSystemId(self): jpayne@69: "Returns the system identifier of this InputSource." jpayne@69: return self.__system_id jpayne@69: jpayne@69: def setEncoding(self, encoding): jpayne@69: """Sets the character encoding of this InputSource. jpayne@69: jpayne@69: The encoding must be a string acceptable for an XML encoding jpayne@69: declaration (see section 4.3.3 of the XML recommendation). jpayne@69: jpayne@69: The encoding attribute of the InputSource is ignored if the jpayne@69: InputSource also contains a character stream.""" jpayne@69: self.__encoding = encoding jpayne@69: jpayne@69: def getEncoding(self): jpayne@69: "Get the character encoding of this InputSource." jpayne@69: return self.__encoding jpayne@69: jpayne@69: def setByteStream(self, bytefile): jpayne@69: """Set the byte stream (a Python file-like object which does jpayne@69: not perform byte-to-character conversion) for this input jpayne@69: source. jpayne@69: jpayne@69: The SAX parser will ignore this if there is also a character jpayne@69: stream specified, but it will use a byte stream in preference jpayne@69: to opening a URI connection itself. jpayne@69: jpayne@69: If the application knows the character encoding of the byte jpayne@69: stream, it should set it with the setEncoding method.""" jpayne@69: self.__bytefile = bytefile jpayne@69: jpayne@69: def getByteStream(self): jpayne@69: """Get the byte stream for this input source. jpayne@69: jpayne@69: The getEncoding method will return the character encoding for jpayne@69: this byte stream, or None if unknown.""" jpayne@69: return self.__bytefile jpayne@69: jpayne@69: def setCharacterStream(self, charfile): jpayne@69: """Set the character stream for this input source. (The stream jpayne@69: must be a Python 2.0 Unicode-wrapped file-like that performs jpayne@69: conversion to Unicode strings.) jpayne@69: jpayne@69: If there is a character stream specified, the SAX parser will jpayne@69: ignore any byte stream and will not attempt to open a URI jpayne@69: connection to the system identifier.""" jpayne@69: self.__charfile = charfile jpayne@69: jpayne@69: def getCharacterStream(self): jpayne@69: "Get the character stream for this input source." jpayne@69: return self.__charfile jpayne@69: jpayne@69: # ===== ATTRIBUTESIMPL ===== jpayne@69: jpayne@69: class AttributesImpl: jpayne@69: jpayne@69: def __init__(self, attrs): jpayne@69: """Non-NS-aware implementation. jpayne@69: jpayne@69: attrs should be of the form {name : value}.""" jpayne@69: self._attrs = attrs jpayne@69: jpayne@69: def getLength(self): jpayne@69: return len(self._attrs) jpayne@69: jpayne@69: def getType(self, name): jpayne@69: return "CDATA" jpayne@69: jpayne@69: def getValue(self, name): jpayne@69: return self._attrs[name] jpayne@69: jpayne@69: def getValueByQName(self, name): jpayne@69: return self._attrs[name] jpayne@69: jpayne@69: def getNameByQName(self, name): jpayne@69: if name not in self._attrs: jpayne@69: raise KeyError(name) jpayne@69: return name jpayne@69: jpayne@69: def getQNameByName(self, name): jpayne@69: if name not in self._attrs: jpayne@69: raise KeyError(name) jpayne@69: return name jpayne@69: jpayne@69: def getNames(self): jpayne@69: return list(self._attrs.keys()) jpayne@69: jpayne@69: def getQNames(self): jpayne@69: return list(self._attrs.keys()) jpayne@69: jpayne@69: def __len__(self): jpayne@69: return len(self._attrs) jpayne@69: jpayne@69: def __getitem__(self, name): jpayne@69: return self._attrs[name] jpayne@69: jpayne@69: def keys(self): jpayne@69: return list(self._attrs.keys()) jpayne@69: jpayne@69: def __contains__(self, name): jpayne@69: return name in self._attrs jpayne@69: jpayne@69: def get(self, name, alternative=None): jpayne@69: return self._attrs.get(name, alternative) jpayne@69: jpayne@69: def copy(self): jpayne@69: return self.__class__(self._attrs) jpayne@69: jpayne@69: def items(self): jpayne@69: return list(self._attrs.items()) jpayne@69: jpayne@69: def values(self): jpayne@69: return list(self._attrs.values()) jpayne@69: jpayne@69: # ===== ATTRIBUTESNSIMPL ===== jpayne@69: jpayne@69: class AttributesNSImpl(AttributesImpl): jpayne@69: jpayne@69: def __init__(self, attrs, qnames): jpayne@69: """NS-aware implementation. jpayne@69: jpayne@69: attrs should be of the form {(ns_uri, lname): value, ...}. jpayne@69: qnames of the form {(ns_uri, lname): qname, ...}.""" jpayne@69: self._attrs = attrs jpayne@69: self._qnames = qnames jpayne@69: jpayne@69: def getValueByQName(self, name): jpayne@69: for (nsname, qname) in self._qnames.items(): jpayne@69: if qname == name: jpayne@69: return self._attrs[nsname] jpayne@69: jpayne@69: raise KeyError(name) jpayne@69: jpayne@69: def getNameByQName(self, name): jpayne@69: for (nsname, qname) in self._qnames.items(): jpayne@69: if qname == name: jpayne@69: return nsname jpayne@69: jpayne@69: raise KeyError(name) jpayne@69: jpayne@69: def getQNameByName(self, name): jpayne@69: return self._qnames[name] jpayne@69: jpayne@69: def getQNames(self): jpayne@69: return list(self._qnames.values()) jpayne@69: jpayne@69: def copy(self): jpayne@69: return self.__class__(self._attrs, self._qnames) jpayne@69: jpayne@69: jpayne@69: def _test(): jpayne@69: XMLReader() jpayne@69: IncrementalParser() jpayne@69: Locator() jpayne@69: jpayne@69: if __name__ == "__main__": jpayne@69: _test()