jpayne@68
|
1 """
|
jpayne@68
|
2 This module contains the core classes of version 2.0 of SAX for Python.
|
jpayne@68
|
3 This file provides only default classes with absolutely minimum
|
jpayne@68
|
4 functionality, from which drivers and applications can be subclassed.
|
jpayne@68
|
5
|
jpayne@68
|
6 Many of these classes are empty and are included only as documentation
|
jpayne@68
|
7 of the interfaces.
|
jpayne@68
|
8
|
jpayne@68
|
9 $Id$
|
jpayne@68
|
10 """
|
jpayne@68
|
11
|
jpayne@68
|
12 version = '2.0beta'
|
jpayne@68
|
13
|
jpayne@68
|
14 #============================================================================
|
jpayne@68
|
15 #
|
jpayne@68
|
16 # HANDLER INTERFACES
|
jpayne@68
|
17 #
|
jpayne@68
|
18 #============================================================================
|
jpayne@68
|
19
|
jpayne@68
|
20 # ===== ERRORHANDLER =====
|
jpayne@68
|
21
|
jpayne@68
|
22 class ErrorHandler:
|
jpayne@68
|
23 """Basic interface for SAX error handlers.
|
jpayne@68
|
24
|
jpayne@68
|
25 If you create an object that implements this interface, then
|
jpayne@68
|
26 register the object with your XMLReader, the parser will call the
|
jpayne@68
|
27 methods in your object to report all warnings and errors. There
|
jpayne@68
|
28 are three levels of errors available: warnings, (possibly)
|
jpayne@68
|
29 recoverable errors, and unrecoverable errors. All methods take a
|
jpayne@68
|
30 SAXParseException as the only parameter."""
|
jpayne@68
|
31
|
jpayne@68
|
32 def error(self, exception):
|
jpayne@68
|
33 "Handle a recoverable error."
|
jpayne@68
|
34 raise exception
|
jpayne@68
|
35
|
jpayne@68
|
36 def fatalError(self, exception):
|
jpayne@68
|
37 "Handle a non-recoverable error."
|
jpayne@68
|
38 raise exception
|
jpayne@68
|
39
|
jpayne@68
|
40 def warning(self, exception):
|
jpayne@68
|
41 "Handle a warning."
|
jpayne@68
|
42 print(exception)
|
jpayne@68
|
43
|
jpayne@68
|
44
|
jpayne@68
|
45 # ===== CONTENTHANDLER =====
|
jpayne@68
|
46
|
jpayne@68
|
47 class ContentHandler:
|
jpayne@68
|
48 """Interface for receiving logical document content events.
|
jpayne@68
|
49
|
jpayne@68
|
50 This is the main callback interface in SAX, and the one most
|
jpayne@68
|
51 important to applications. The order of events in this interface
|
jpayne@68
|
52 mirrors the order of the information in the document."""
|
jpayne@68
|
53
|
jpayne@68
|
54 def __init__(self):
|
jpayne@68
|
55 self._locator = None
|
jpayne@68
|
56
|
jpayne@68
|
57 def setDocumentLocator(self, locator):
|
jpayne@68
|
58 """Called by the parser to give the application a locator for
|
jpayne@68
|
59 locating the origin of document events.
|
jpayne@68
|
60
|
jpayne@68
|
61 SAX parsers are strongly encouraged (though not absolutely
|
jpayne@68
|
62 required) to supply a locator: if it does so, it must supply
|
jpayne@68
|
63 the locator to the application by invoking this method before
|
jpayne@68
|
64 invoking any of the other methods in the DocumentHandler
|
jpayne@68
|
65 interface.
|
jpayne@68
|
66
|
jpayne@68
|
67 The locator allows the application to determine the end
|
jpayne@68
|
68 position of any document-related event, even if the parser is
|
jpayne@68
|
69 not reporting an error. Typically, the application will use
|
jpayne@68
|
70 this information for reporting its own errors (such as
|
jpayne@68
|
71 character content that does not match an application's
|
jpayne@68
|
72 business rules). The information returned by the locator is
|
jpayne@68
|
73 probably not sufficient for use with a search engine.
|
jpayne@68
|
74
|
jpayne@68
|
75 Note that the locator will return correct information only
|
jpayne@68
|
76 during the invocation of the events in this interface. The
|
jpayne@68
|
77 application should not attempt to use it at any other time."""
|
jpayne@68
|
78 self._locator = locator
|
jpayne@68
|
79
|
jpayne@68
|
80 def startDocument(self):
|
jpayne@68
|
81 """Receive notification of the beginning of a document.
|
jpayne@68
|
82
|
jpayne@68
|
83 The SAX parser will invoke this method only once, before any
|
jpayne@68
|
84 other methods in this interface or in DTDHandler (except for
|
jpayne@68
|
85 setDocumentLocator)."""
|
jpayne@68
|
86
|
jpayne@68
|
87 def endDocument(self):
|
jpayne@68
|
88 """Receive notification of the end of a document.
|
jpayne@68
|
89
|
jpayne@68
|
90 The SAX parser will invoke this method only once, and it will
|
jpayne@68
|
91 be the last method invoked during the parse. The parser shall
|
jpayne@68
|
92 not invoke this method until it has either abandoned parsing
|
jpayne@68
|
93 (because of an unrecoverable error) or reached the end of
|
jpayne@68
|
94 input."""
|
jpayne@68
|
95
|
jpayne@68
|
96 def startPrefixMapping(self, prefix, uri):
|
jpayne@68
|
97 """Begin the scope of a prefix-URI Namespace mapping.
|
jpayne@68
|
98
|
jpayne@68
|
99 The information from this event is not necessary for normal
|
jpayne@68
|
100 Namespace processing: the SAX XML reader will automatically
|
jpayne@68
|
101 replace prefixes for element and attribute names when the
|
jpayne@68
|
102 http://xml.org/sax/features/namespaces feature is true (the
|
jpayne@68
|
103 default).
|
jpayne@68
|
104
|
jpayne@68
|
105 There are cases, however, when applications need to use
|
jpayne@68
|
106 prefixes in character data or in attribute values, where they
|
jpayne@68
|
107 cannot safely be expanded automatically; the
|
jpayne@68
|
108 start/endPrefixMapping event supplies the information to the
|
jpayne@68
|
109 application to expand prefixes in those contexts itself, if
|
jpayne@68
|
110 necessary.
|
jpayne@68
|
111
|
jpayne@68
|
112 Note that start/endPrefixMapping events are not guaranteed to
|
jpayne@68
|
113 be properly nested relative to each-other: all
|
jpayne@68
|
114 startPrefixMapping events will occur before the corresponding
|
jpayne@68
|
115 startElement event, and all endPrefixMapping events will occur
|
jpayne@68
|
116 after the corresponding endElement event, but their order is
|
jpayne@68
|
117 not guaranteed."""
|
jpayne@68
|
118
|
jpayne@68
|
119 def endPrefixMapping(self, prefix):
|
jpayne@68
|
120 """End the scope of a prefix-URI mapping.
|
jpayne@68
|
121
|
jpayne@68
|
122 See startPrefixMapping for details. This event will always
|
jpayne@68
|
123 occur after the corresponding endElement event, but the order
|
jpayne@68
|
124 of endPrefixMapping events is not otherwise guaranteed."""
|
jpayne@68
|
125
|
jpayne@68
|
126 def startElement(self, name, attrs):
|
jpayne@68
|
127 """Signals the start of an element in non-namespace mode.
|
jpayne@68
|
128
|
jpayne@68
|
129 The name parameter contains the raw XML 1.0 name of the
|
jpayne@68
|
130 element type as a string and the attrs parameter holds an
|
jpayne@68
|
131 instance of the Attributes class containing the attributes of
|
jpayne@68
|
132 the element."""
|
jpayne@68
|
133
|
jpayne@68
|
134 def endElement(self, name):
|
jpayne@68
|
135 """Signals the end of an element in non-namespace mode.
|
jpayne@68
|
136
|
jpayne@68
|
137 The name parameter contains the name of the element type, just
|
jpayne@68
|
138 as with the startElement event."""
|
jpayne@68
|
139
|
jpayne@68
|
140 def startElementNS(self, name, qname, attrs):
|
jpayne@68
|
141 """Signals the start of an element in namespace mode.
|
jpayne@68
|
142
|
jpayne@68
|
143 The name parameter contains the name of the element type as a
|
jpayne@68
|
144 (uri, localname) tuple, the qname parameter the raw XML 1.0
|
jpayne@68
|
145 name used in the source document, and the attrs parameter
|
jpayne@68
|
146 holds an instance of the Attributes class containing the
|
jpayne@68
|
147 attributes of the element.
|
jpayne@68
|
148
|
jpayne@68
|
149 The uri part of the name tuple is None for elements which have
|
jpayne@68
|
150 no namespace."""
|
jpayne@68
|
151
|
jpayne@68
|
152 def endElementNS(self, name, qname):
|
jpayne@68
|
153 """Signals the end of an element in namespace mode.
|
jpayne@68
|
154
|
jpayne@68
|
155 The name parameter contains the name of the element type, just
|
jpayne@68
|
156 as with the startElementNS event."""
|
jpayne@68
|
157
|
jpayne@68
|
158 def characters(self, content):
|
jpayne@68
|
159 """Receive notification of character data.
|
jpayne@68
|
160
|
jpayne@68
|
161 The Parser will call this method to report each chunk of
|
jpayne@68
|
162 character data. SAX parsers may return all contiguous
|
jpayne@68
|
163 character data in a single chunk, or they may split it into
|
jpayne@68
|
164 several chunks; however, all of the characters in any single
|
jpayne@68
|
165 event must come from the same external entity so that the
|
jpayne@68
|
166 Locator provides useful information."""
|
jpayne@68
|
167
|
jpayne@68
|
168 def ignorableWhitespace(self, whitespace):
|
jpayne@68
|
169 """Receive notification of ignorable whitespace in element content.
|
jpayne@68
|
170
|
jpayne@68
|
171 Validating Parsers must use this method to report each chunk
|
jpayne@68
|
172 of ignorable whitespace (see the W3C XML 1.0 recommendation,
|
jpayne@68
|
173 section 2.10): non-validating parsers may also use this method
|
jpayne@68
|
174 if they are capable of parsing and using content models.
|
jpayne@68
|
175
|
jpayne@68
|
176 SAX parsers may return all contiguous whitespace in a single
|
jpayne@68
|
177 chunk, or they may split it into several chunks; however, all
|
jpayne@68
|
178 of the characters in any single event must come from the same
|
jpayne@68
|
179 external entity, so that the Locator provides useful
|
jpayne@68
|
180 information."""
|
jpayne@68
|
181
|
jpayne@68
|
182 def processingInstruction(self, target, data):
|
jpayne@68
|
183 """Receive notification of a processing instruction.
|
jpayne@68
|
184
|
jpayne@68
|
185 The Parser will invoke this method once for each processing
|
jpayne@68
|
186 instruction found: note that processing instructions may occur
|
jpayne@68
|
187 before or after the main document element.
|
jpayne@68
|
188
|
jpayne@68
|
189 A SAX parser should never report an XML declaration (XML 1.0,
|
jpayne@68
|
190 section 2.8) or a text declaration (XML 1.0, section 4.3.1)
|
jpayne@68
|
191 using this method."""
|
jpayne@68
|
192
|
jpayne@68
|
193 def skippedEntity(self, name):
|
jpayne@68
|
194 """Receive notification of a skipped entity.
|
jpayne@68
|
195
|
jpayne@68
|
196 The Parser will invoke this method once for each entity
|
jpayne@68
|
197 skipped. Non-validating processors may skip entities if they
|
jpayne@68
|
198 have not seen the declarations (because, for example, the
|
jpayne@68
|
199 entity was declared in an external DTD subset). All processors
|
jpayne@68
|
200 may skip external entities, depending on the values of the
|
jpayne@68
|
201 http://xml.org/sax/features/external-general-entities and the
|
jpayne@68
|
202 http://xml.org/sax/features/external-parameter-entities
|
jpayne@68
|
203 properties."""
|
jpayne@68
|
204
|
jpayne@68
|
205
|
jpayne@68
|
206 # ===== DTDHandler =====
|
jpayne@68
|
207
|
jpayne@68
|
208 class DTDHandler:
|
jpayne@68
|
209 """Handle DTD events.
|
jpayne@68
|
210
|
jpayne@68
|
211 This interface specifies only those DTD events required for basic
|
jpayne@68
|
212 parsing (unparsed entities and attributes)."""
|
jpayne@68
|
213
|
jpayne@68
|
214 def notationDecl(self, name, publicId, systemId):
|
jpayne@68
|
215 "Handle a notation declaration event."
|
jpayne@68
|
216
|
jpayne@68
|
217 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
jpayne@68
|
218 "Handle an unparsed entity declaration event."
|
jpayne@68
|
219
|
jpayne@68
|
220
|
jpayne@68
|
221 # ===== ENTITYRESOLVER =====
|
jpayne@68
|
222
|
jpayne@68
|
223 class EntityResolver:
|
jpayne@68
|
224 """Basic interface for resolving entities. If you create an object
|
jpayne@68
|
225 implementing this interface, then register the object with your
|
jpayne@68
|
226 Parser, the parser will call the method in your object to
|
jpayne@68
|
227 resolve all external entities. Note that DefaultHandler implements
|
jpayne@68
|
228 this interface with the default behaviour."""
|
jpayne@68
|
229
|
jpayne@68
|
230 def resolveEntity(self, publicId, systemId):
|
jpayne@68
|
231 """Resolve the system identifier of an entity and return either
|
jpayne@68
|
232 the system identifier to read from as a string, or an InputSource
|
jpayne@68
|
233 to read from."""
|
jpayne@68
|
234 return systemId
|
jpayne@68
|
235
|
jpayne@68
|
236
|
jpayne@68
|
237 #============================================================================
|
jpayne@68
|
238 #
|
jpayne@68
|
239 # CORE FEATURES
|
jpayne@68
|
240 #
|
jpayne@68
|
241 #============================================================================
|
jpayne@68
|
242
|
jpayne@68
|
243 feature_namespaces = "http://xml.org/sax/features/namespaces"
|
jpayne@68
|
244 # true: Perform Namespace processing (default).
|
jpayne@68
|
245 # false: Optionally do not perform Namespace processing
|
jpayne@68
|
246 # (implies namespace-prefixes).
|
jpayne@68
|
247 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
248
|
jpayne@68
|
249 feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
|
jpayne@68
|
250 # true: Report the original prefixed names and attributes used for Namespace
|
jpayne@68
|
251 # declarations.
|
jpayne@68
|
252 # false: Do not report attributes used for Namespace declarations, and
|
jpayne@68
|
253 # optionally do not report original prefixed names (default).
|
jpayne@68
|
254 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
255
|
jpayne@68
|
256 feature_string_interning = "http://xml.org/sax/features/string-interning"
|
jpayne@68
|
257 # true: All element names, prefixes, attribute names, Namespace URIs, and
|
jpayne@68
|
258 # local names are interned using the built-in intern function.
|
jpayne@68
|
259 # false: Names are not necessarily interned, although they may be (default).
|
jpayne@68
|
260 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
261
|
jpayne@68
|
262 feature_validation = "http://xml.org/sax/features/validation"
|
jpayne@68
|
263 # true: Report all validation errors (implies external-general-entities and
|
jpayne@68
|
264 # external-parameter-entities).
|
jpayne@68
|
265 # false: Do not report validation errors.
|
jpayne@68
|
266 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
267
|
jpayne@68
|
268 feature_external_ges = "http://xml.org/sax/features/external-general-entities"
|
jpayne@68
|
269 # true: Include all external general (text) entities.
|
jpayne@68
|
270 # false: Do not include external general entities.
|
jpayne@68
|
271 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
272
|
jpayne@68
|
273 feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
|
jpayne@68
|
274 # true: Include all external parameter entities, including the external
|
jpayne@68
|
275 # DTD subset.
|
jpayne@68
|
276 # false: Do not include any external parameter entities, even the external
|
jpayne@68
|
277 # DTD subset.
|
jpayne@68
|
278 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
279
|
jpayne@68
|
280 all_features = [feature_namespaces,
|
jpayne@68
|
281 feature_namespace_prefixes,
|
jpayne@68
|
282 feature_string_interning,
|
jpayne@68
|
283 feature_validation,
|
jpayne@68
|
284 feature_external_ges,
|
jpayne@68
|
285 feature_external_pes]
|
jpayne@68
|
286
|
jpayne@68
|
287
|
jpayne@68
|
288 #============================================================================
|
jpayne@68
|
289 #
|
jpayne@68
|
290 # CORE PROPERTIES
|
jpayne@68
|
291 #
|
jpayne@68
|
292 #============================================================================
|
jpayne@68
|
293
|
jpayne@68
|
294 property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
|
jpayne@68
|
295 # data type: xml.sax.sax2lib.LexicalHandler
|
jpayne@68
|
296 # description: An optional extension handler for lexical events like comments.
|
jpayne@68
|
297 # access: read/write
|
jpayne@68
|
298
|
jpayne@68
|
299 property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
|
jpayne@68
|
300 # data type: xml.sax.sax2lib.DeclHandler
|
jpayne@68
|
301 # description: An optional extension handler for DTD-related events other
|
jpayne@68
|
302 # than notations and unparsed entities.
|
jpayne@68
|
303 # access: read/write
|
jpayne@68
|
304
|
jpayne@68
|
305 property_dom_node = "http://xml.org/sax/properties/dom-node"
|
jpayne@68
|
306 # data type: org.w3c.dom.Node
|
jpayne@68
|
307 # description: When parsing, the current DOM node being visited if this is
|
jpayne@68
|
308 # a DOM iterator; when not parsing, the root DOM node for
|
jpayne@68
|
309 # iteration.
|
jpayne@68
|
310 # access: (parsing) read-only; (not parsing) read/write
|
jpayne@68
|
311
|
jpayne@68
|
312 property_xml_string = "http://xml.org/sax/properties/xml-string"
|
jpayne@68
|
313 # data type: String
|
jpayne@68
|
314 # description: The literal string of characters that was the source for
|
jpayne@68
|
315 # the current event.
|
jpayne@68
|
316 # access: read-only
|
jpayne@68
|
317
|
jpayne@68
|
318 property_encoding = "http://www.python.org/sax/properties/encoding"
|
jpayne@68
|
319 # data type: String
|
jpayne@68
|
320 # description: The name of the encoding to assume for input data.
|
jpayne@68
|
321 # access: write: set the encoding, e.g. established by a higher-level
|
jpayne@68
|
322 # protocol. May change during parsing (e.g. after
|
jpayne@68
|
323 # processing a META tag)
|
jpayne@68
|
324 # read: return the current encoding (possibly established through
|
jpayne@68
|
325 # auto-detection.
|
jpayne@68
|
326 # initial value: UTF-8
|
jpayne@68
|
327 #
|
jpayne@68
|
328
|
jpayne@68
|
329 property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
|
jpayne@68
|
330 # data type: Dictionary
|
jpayne@68
|
331 # description: The dictionary used to intern common strings in the document
|
jpayne@68
|
332 # access: write: Request that the parser uses a specific dictionary, to
|
jpayne@68
|
333 # allow interning across different documents
|
jpayne@68
|
334 # read: return the current interning dictionary, or None
|
jpayne@68
|
335 #
|
jpayne@68
|
336
|
jpayne@68
|
337 all_properties = [property_lexical_handler,
|
jpayne@68
|
338 property_dom_node,
|
jpayne@68
|
339 property_declaration_handler,
|
jpayne@68
|
340 property_xml_string,
|
jpayne@68
|
341 property_encoding,
|
jpayne@68
|
342 property_interning_dict]
|