jpayne@68
|
1 """\
|
jpayne@68
|
2 A library of useful helper classes to the SAX classes, for the
|
jpayne@68
|
3 convenience of application and driver writers.
|
jpayne@68
|
4 """
|
jpayne@68
|
5
|
jpayne@68
|
6 import os, urllib.parse, urllib.request
|
jpayne@68
|
7 import io
|
jpayne@68
|
8 import codecs
|
jpayne@68
|
9 from . import handler
|
jpayne@68
|
10 from . import xmlreader
|
jpayne@68
|
11
|
jpayne@68
|
12 def __dict_replace(s, d):
|
jpayne@68
|
13 """Replace substrings of a string using a dictionary."""
|
jpayne@68
|
14 for key, value in d.items():
|
jpayne@68
|
15 s = s.replace(key, value)
|
jpayne@68
|
16 return s
|
jpayne@68
|
17
|
jpayne@68
|
18 def escape(data, entities={}):
|
jpayne@68
|
19 """Escape &, <, and > in a string of data.
|
jpayne@68
|
20
|
jpayne@68
|
21 You can escape other strings of data by passing a dictionary as
|
jpayne@68
|
22 the optional entities parameter. The keys and values must all be
|
jpayne@68
|
23 strings; each key will be replaced with its corresponding value.
|
jpayne@68
|
24 """
|
jpayne@68
|
25
|
jpayne@68
|
26 # must do ampersand first
|
jpayne@68
|
27 data = data.replace("&", "&")
|
jpayne@68
|
28 data = data.replace(">", ">")
|
jpayne@68
|
29 data = data.replace("<", "<")
|
jpayne@68
|
30 if entities:
|
jpayne@68
|
31 data = __dict_replace(data, entities)
|
jpayne@68
|
32 return data
|
jpayne@68
|
33
|
jpayne@68
|
34 def unescape(data, entities={}):
|
jpayne@68
|
35 """Unescape &, <, and > in a string of data.
|
jpayne@68
|
36
|
jpayne@68
|
37 You can unescape other strings of data by passing a dictionary as
|
jpayne@68
|
38 the optional entities parameter. The keys and values must all be
|
jpayne@68
|
39 strings; each key will be replaced with its corresponding value.
|
jpayne@68
|
40 """
|
jpayne@68
|
41 data = data.replace("<", "<")
|
jpayne@68
|
42 data = data.replace(">", ">")
|
jpayne@68
|
43 if entities:
|
jpayne@68
|
44 data = __dict_replace(data, entities)
|
jpayne@68
|
45 # must do ampersand last
|
jpayne@68
|
46 return data.replace("&", "&")
|
jpayne@68
|
47
|
jpayne@68
|
48 def quoteattr(data, entities={}):
|
jpayne@68
|
49 """Escape and quote an attribute value.
|
jpayne@68
|
50
|
jpayne@68
|
51 Escape &, <, and > in a string of data, then quote it for use as
|
jpayne@68
|
52 an attribute value. The \" character will be escaped as well, if
|
jpayne@68
|
53 necessary.
|
jpayne@68
|
54
|
jpayne@68
|
55 You can escape other strings of data by passing a dictionary as
|
jpayne@68
|
56 the optional entities parameter. The keys and values must all be
|
jpayne@68
|
57 strings; each key will be replaced with its corresponding value.
|
jpayne@68
|
58 """
|
jpayne@68
|
59 entities = {**entities, '\n': ' ', '\r': ' ', '\t':'	'}
|
jpayne@68
|
60 data = escape(data, entities)
|
jpayne@68
|
61 if '"' in data:
|
jpayne@68
|
62 if "'" in data:
|
jpayne@68
|
63 data = '"%s"' % data.replace('"', """)
|
jpayne@68
|
64 else:
|
jpayne@68
|
65 data = "'%s'" % data
|
jpayne@68
|
66 else:
|
jpayne@68
|
67 data = '"%s"' % data
|
jpayne@68
|
68 return data
|
jpayne@68
|
69
|
jpayne@68
|
70
|
jpayne@68
|
71 def _gettextwriter(out, encoding):
|
jpayne@68
|
72 if out is None:
|
jpayne@68
|
73 import sys
|
jpayne@68
|
74 return sys.stdout
|
jpayne@68
|
75
|
jpayne@68
|
76 if isinstance(out, io.TextIOBase):
|
jpayne@68
|
77 # use a text writer as is
|
jpayne@68
|
78 return out
|
jpayne@68
|
79
|
jpayne@68
|
80 if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)):
|
jpayne@68
|
81 # use a codecs stream writer as is
|
jpayne@68
|
82 return out
|
jpayne@68
|
83
|
jpayne@68
|
84 # wrap a binary writer with TextIOWrapper
|
jpayne@68
|
85 if isinstance(out, io.RawIOBase):
|
jpayne@68
|
86 # Keep the original file open when the TextIOWrapper is
|
jpayne@68
|
87 # destroyed
|
jpayne@68
|
88 class _wrapper:
|
jpayne@68
|
89 __class__ = out.__class__
|
jpayne@68
|
90 def __getattr__(self, name):
|
jpayne@68
|
91 return getattr(out, name)
|
jpayne@68
|
92 buffer = _wrapper()
|
jpayne@68
|
93 buffer.close = lambda: None
|
jpayne@68
|
94 else:
|
jpayne@68
|
95 # This is to handle passed objects that aren't in the
|
jpayne@68
|
96 # IOBase hierarchy, but just have a write method
|
jpayne@68
|
97 buffer = io.BufferedIOBase()
|
jpayne@68
|
98 buffer.writable = lambda: True
|
jpayne@68
|
99 buffer.write = out.write
|
jpayne@68
|
100 try:
|
jpayne@68
|
101 # TextIOWrapper uses this methods to determine
|
jpayne@68
|
102 # if BOM (for UTF-16, etc) should be added
|
jpayne@68
|
103 buffer.seekable = out.seekable
|
jpayne@68
|
104 buffer.tell = out.tell
|
jpayne@68
|
105 except AttributeError:
|
jpayne@68
|
106 pass
|
jpayne@68
|
107 return io.TextIOWrapper(buffer, encoding=encoding,
|
jpayne@68
|
108 errors='xmlcharrefreplace',
|
jpayne@68
|
109 newline='\n',
|
jpayne@68
|
110 write_through=True)
|
jpayne@68
|
111
|
jpayne@68
|
112 class XMLGenerator(handler.ContentHandler):
|
jpayne@68
|
113
|
jpayne@68
|
114 def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False):
|
jpayne@68
|
115 handler.ContentHandler.__init__(self)
|
jpayne@68
|
116 out = _gettextwriter(out, encoding)
|
jpayne@68
|
117 self._write = out.write
|
jpayne@68
|
118 self._flush = out.flush
|
jpayne@68
|
119 self._ns_contexts = [{}] # contains uri -> prefix dicts
|
jpayne@68
|
120 self._current_context = self._ns_contexts[-1]
|
jpayne@68
|
121 self._undeclared_ns_maps = []
|
jpayne@68
|
122 self._encoding = encoding
|
jpayne@68
|
123 self._short_empty_elements = short_empty_elements
|
jpayne@68
|
124 self._pending_start_element = False
|
jpayne@68
|
125
|
jpayne@68
|
126 def _qname(self, name):
|
jpayne@68
|
127 """Builds a qualified name from a (ns_url, localname) pair"""
|
jpayne@68
|
128 if name[0]:
|
jpayne@68
|
129 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
|
jpayne@68
|
130 # bound by definition to http://www.w3.org/XML/1998/namespace. It
|
jpayne@68
|
131 # does not need to be declared and will not usually be found in
|
jpayne@68
|
132 # self._current_context.
|
jpayne@68
|
133 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
|
jpayne@68
|
134 return 'xml:' + name[1]
|
jpayne@68
|
135 # The name is in a non-empty namespace
|
jpayne@68
|
136 prefix = self._current_context[name[0]]
|
jpayne@68
|
137 if prefix:
|
jpayne@68
|
138 # If it is not the default namespace, prepend the prefix
|
jpayne@68
|
139 return prefix + ":" + name[1]
|
jpayne@68
|
140 # Return the unqualified name
|
jpayne@68
|
141 return name[1]
|
jpayne@68
|
142
|
jpayne@68
|
143 def _finish_pending_start_element(self,endElement=False):
|
jpayne@68
|
144 if self._pending_start_element:
|
jpayne@68
|
145 self._write('>')
|
jpayne@68
|
146 self._pending_start_element = False
|
jpayne@68
|
147
|
jpayne@68
|
148 # ContentHandler methods
|
jpayne@68
|
149
|
jpayne@68
|
150 def startDocument(self):
|
jpayne@68
|
151 self._write('<?xml version="1.0" encoding="%s"?>\n' %
|
jpayne@68
|
152 self._encoding)
|
jpayne@68
|
153
|
jpayne@68
|
154 def endDocument(self):
|
jpayne@68
|
155 self._flush()
|
jpayne@68
|
156
|
jpayne@68
|
157 def startPrefixMapping(self, prefix, uri):
|
jpayne@68
|
158 self._ns_contexts.append(self._current_context.copy())
|
jpayne@68
|
159 self._current_context[uri] = prefix
|
jpayne@68
|
160 self._undeclared_ns_maps.append((prefix, uri))
|
jpayne@68
|
161
|
jpayne@68
|
162 def endPrefixMapping(self, prefix):
|
jpayne@68
|
163 self._current_context = self._ns_contexts[-1]
|
jpayne@68
|
164 del self._ns_contexts[-1]
|
jpayne@68
|
165
|
jpayne@68
|
166 def startElement(self, name, attrs):
|
jpayne@68
|
167 self._finish_pending_start_element()
|
jpayne@68
|
168 self._write('<' + name)
|
jpayne@68
|
169 for (name, value) in attrs.items():
|
jpayne@68
|
170 self._write(' %s=%s' % (name, quoteattr(value)))
|
jpayne@68
|
171 if self._short_empty_elements:
|
jpayne@68
|
172 self._pending_start_element = True
|
jpayne@68
|
173 else:
|
jpayne@68
|
174 self._write(">")
|
jpayne@68
|
175
|
jpayne@68
|
176 def endElement(self, name):
|
jpayne@68
|
177 if self._pending_start_element:
|
jpayne@68
|
178 self._write('/>')
|
jpayne@68
|
179 self._pending_start_element = False
|
jpayne@68
|
180 else:
|
jpayne@68
|
181 self._write('</%s>' % name)
|
jpayne@68
|
182
|
jpayne@68
|
183 def startElementNS(self, name, qname, attrs):
|
jpayne@68
|
184 self._finish_pending_start_element()
|
jpayne@68
|
185 self._write('<' + self._qname(name))
|
jpayne@68
|
186
|
jpayne@68
|
187 for prefix, uri in self._undeclared_ns_maps:
|
jpayne@68
|
188 if prefix:
|
jpayne@68
|
189 self._write(' xmlns:%s="%s"' % (prefix, uri))
|
jpayne@68
|
190 else:
|
jpayne@68
|
191 self._write(' xmlns="%s"' % uri)
|
jpayne@68
|
192 self._undeclared_ns_maps = []
|
jpayne@68
|
193
|
jpayne@68
|
194 for (name, value) in attrs.items():
|
jpayne@68
|
195 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
|
jpayne@68
|
196 if self._short_empty_elements:
|
jpayne@68
|
197 self._pending_start_element = True
|
jpayne@68
|
198 else:
|
jpayne@68
|
199 self._write(">")
|
jpayne@68
|
200
|
jpayne@68
|
201 def endElementNS(self, name, qname):
|
jpayne@68
|
202 if self._pending_start_element:
|
jpayne@68
|
203 self._write('/>')
|
jpayne@68
|
204 self._pending_start_element = False
|
jpayne@68
|
205 else:
|
jpayne@68
|
206 self._write('</%s>' % self._qname(name))
|
jpayne@68
|
207
|
jpayne@68
|
208 def characters(self, content):
|
jpayne@68
|
209 if content:
|
jpayne@68
|
210 self._finish_pending_start_element()
|
jpayne@68
|
211 if not isinstance(content, str):
|
jpayne@68
|
212 content = str(content, self._encoding)
|
jpayne@68
|
213 self._write(escape(content))
|
jpayne@68
|
214
|
jpayne@68
|
215 def ignorableWhitespace(self, content):
|
jpayne@68
|
216 if content:
|
jpayne@68
|
217 self._finish_pending_start_element()
|
jpayne@68
|
218 if not isinstance(content, str):
|
jpayne@68
|
219 content = str(content, self._encoding)
|
jpayne@68
|
220 self._write(content)
|
jpayne@68
|
221
|
jpayne@68
|
222 def processingInstruction(self, target, data):
|
jpayne@68
|
223 self._finish_pending_start_element()
|
jpayne@68
|
224 self._write('<?%s %s?>' % (target, data))
|
jpayne@68
|
225
|
jpayne@68
|
226
|
jpayne@68
|
227 class XMLFilterBase(xmlreader.XMLReader):
|
jpayne@68
|
228 """This class is designed to sit between an XMLReader and the
|
jpayne@68
|
229 client application's event handlers. By default, it does nothing
|
jpayne@68
|
230 but pass requests up to the reader and events on to the handlers
|
jpayne@68
|
231 unmodified, but subclasses can override specific methods to modify
|
jpayne@68
|
232 the event stream or the configuration requests as they pass
|
jpayne@68
|
233 through."""
|
jpayne@68
|
234
|
jpayne@68
|
235 def __init__(self, parent = None):
|
jpayne@68
|
236 xmlreader.XMLReader.__init__(self)
|
jpayne@68
|
237 self._parent = parent
|
jpayne@68
|
238
|
jpayne@68
|
239 # ErrorHandler methods
|
jpayne@68
|
240
|
jpayne@68
|
241 def error(self, exception):
|
jpayne@68
|
242 self._err_handler.error(exception)
|
jpayne@68
|
243
|
jpayne@68
|
244 def fatalError(self, exception):
|
jpayne@68
|
245 self._err_handler.fatalError(exception)
|
jpayne@68
|
246
|
jpayne@68
|
247 def warning(self, exception):
|
jpayne@68
|
248 self._err_handler.warning(exception)
|
jpayne@68
|
249
|
jpayne@68
|
250 # ContentHandler methods
|
jpayne@68
|
251
|
jpayne@68
|
252 def setDocumentLocator(self, locator):
|
jpayne@68
|
253 self._cont_handler.setDocumentLocator(locator)
|
jpayne@68
|
254
|
jpayne@68
|
255 def startDocument(self):
|
jpayne@68
|
256 self._cont_handler.startDocument()
|
jpayne@68
|
257
|
jpayne@68
|
258 def endDocument(self):
|
jpayne@68
|
259 self._cont_handler.endDocument()
|
jpayne@68
|
260
|
jpayne@68
|
261 def startPrefixMapping(self, prefix, uri):
|
jpayne@68
|
262 self._cont_handler.startPrefixMapping(prefix, uri)
|
jpayne@68
|
263
|
jpayne@68
|
264 def endPrefixMapping(self, prefix):
|
jpayne@68
|
265 self._cont_handler.endPrefixMapping(prefix)
|
jpayne@68
|
266
|
jpayne@68
|
267 def startElement(self, name, attrs):
|
jpayne@68
|
268 self._cont_handler.startElement(name, attrs)
|
jpayne@68
|
269
|
jpayne@68
|
270 def endElement(self, name):
|
jpayne@68
|
271 self._cont_handler.endElement(name)
|
jpayne@68
|
272
|
jpayne@68
|
273 def startElementNS(self, name, qname, attrs):
|
jpayne@68
|
274 self._cont_handler.startElementNS(name, qname, attrs)
|
jpayne@68
|
275
|
jpayne@68
|
276 def endElementNS(self, name, qname):
|
jpayne@68
|
277 self._cont_handler.endElementNS(name, qname)
|
jpayne@68
|
278
|
jpayne@68
|
279 def characters(self, content):
|
jpayne@68
|
280 self._cont_handler.characters(content)
|
jpayne@68
|
281
|
jpayne@68
|
282 def ignorableWhitespace(self, chars):
|
jpayne@68
|
283 self._cont_handler.ignorableWhitespace(chars)
|
jpayne@68
|
284
|
jpayne@68
|
285 def processingInstruction(self, target, data):
|
jpayne@68
|
286 self._cont_handler.processingInstruction(target, data)
|
jpayne@68
|
287
|
jpayne@68
|
288 def skippedEntity(self, name):
|
jpayne@68
|
289 self._cont_handler.skippedEntity(name)
|
jpayne@68
|
290
|
jpayne@68
|
291 # DTDHandler methods
|
jpayne@68
|
292
|
jpayne@68
|
293 def notationDecl(self, name, publicId, systemId):
|
jpayne@68
|
294 self._dtd_handler.notationDecl(name, publicId, systemId)
|
jpayne@68
|
295
|
jpayne@68
|
296 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
jpayne@68
|
297 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
|
jpayne@68
|
298
|
jpayne@68
|
299 # EntityResolver methods
|
jpayne@68
|
300
|
jpayne@68
|
301 def resolveEntity(self, publicId, systemId):
|
jpayne@68
|
302 return self._ent_handler.resolveEntity(publicId, systemId)
|
jpayne@68
|
303
|
jpayne@68
|
304 # XMLReader methods
|
jpayne@68
|
305
|
jpayne@68
|
306 def parse(self, source):
|
jpayne@68
|
307 self._parent.setContentHandler(self)
|
jpayne@68
|
308 self._parent.setErrorHandler(self)
|
jpayne@68
|
309 self._parent.setEntityResolver(self)
|
jpayne@68
|
310 self._parent.setDTDHandler(self)
|
jpayne@68
|
311 self._parent.parse(source)
|
jpayne@68
|
312
|
jpayne@68
|
313 def setLocale(self, locale):
|
jpayne@68
|
314 self._parent.setLocale(locale)
|
jpayne@68
|
315
|
jpayne@68
|
316 def getFeature(self, name):
|
jpayne@68
|
317 return self._parent.getFeature(name)
|
jpayne@68
|
318
|
jpayne@68
|
319 def setFeature(self, name, state):
|
jpayne@68
|
320 self._parent.setFeature(name, state)
|
jpayne@68
|
321
|
jpayne@68
|
322 def getProperty(self, name):
|
jpayne@68
|
323 return self._parent.getProperty(name)
|
jpayne@68
|
324
|
jpayne@68
|
325 def setProperty(self, name, value):
|
jpayne@68
|
326 self._parent.setProperty(name, value)
|
jpayne@68
|
327
|
jpayne@68
|
328 # XMLFilter methods
|
jpayne@68
|
329
|
jpayne@68
|
330 def getParent(self):
|
jpayne@68
|
331 return self._parent
|
jpayne@68
|
332
|
jpayne@68
|
333 def setParent(self, parent):
|
jpayne@68
|
334 self._parent = parent
|
jpayne@68
|
335
|
jpayne@68
|
336 # --- Utility functions
|
jpayne@68
|
337
|
jpayne@68
|
338 def prepare_input_source(source, base=""):
|
jpayne@68
|
339 """This function takes an InputSource and an optional base URL and
|
jpayne@68
|
340 returns a fully resolved InputSource object ready for reading."""
|
jpayne@68
|
341
|
jpayne@68
|
342 if isinstance(source, os.PathLike):
|
jpayne@68
|
343 source = os.fspath(source)
|
jpayne@68
|
344 if isinstance(source, str):
|
jpayne@68
|
345 source = xmlreader.InputSource(source)
|
jpayne@68
|
346 elif hasattr(source, "read"):
|
jpayne@68
|
347 f = source
|
jpayne@68
|
348 source = xmlreader.InputSource()
|
jpayne@68
|
349 if isinstance(f.read(0), str):
|
jpayne@68
|
350 source.setCharacterStream(f)
|
jpayne@68
|
351 else:
|
jpayne@68
|
352 source.setByteStream(f)
|
jpayne@68
|
353 if hasattr(f, "name") and isinstance(f.name, str):
|
jpayne@68
|
354 source.setSystemId(f.name)
|
jpayne@68
|
355
|
jpayne@68
|
356 if source.getCharacterStream() is None and source.getByteStream() is None:
|
jpayne@68
|
357 sysid = source.getSystemId()
|
jpayne@68
|
358 basehead = os.path.dirname(os.path.normpath(base))
|
jpayne@68
|
359 sysidfilename = os.path.join(basehead, sysid)
|
jpayne@68
|
360 if os.path.isfile(sysidfilename):
|
jpayne@68
|
361 source.setSystemId(sysidfilename)
|
jpayne@68
|
362 f = open(sysidfilename, "rb")
|
jpayne@68
|
363 else:
|
jpayne@68
|
364 source.setSystemId(urllib.parse.urljoin(base, sysid))
|
jpayne@68
|
365 f = urllib.request.urlopen(source.getSystemId())
|
jpayne@68
|
366
|
jpayne@68
|
367 source.setByteStream(f)
|
jpayne@68
|
368
|
jpayne@68
|
369 return source
|