jpayne@68: # Copyright (C) 2001-2007 Python Software Foundation jpayne@68: # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter jpayne@68: # Contact: email-sig@python.org jpayne@68: jpayne@68: """A parser of RFC 2822 and MIME email messages.""" jpayne@68: jpayne@68: __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', jpayne@68: 'FeedParser', 'BytesFeedParser'] jpayne@68: jpayne@68: from io import StringIO, TextIOWrapper jpayne@68: jpayne@68: from email.feedparser import FeedParser, BytesFeedParser jpayne@68: from email._policybase import compat32 jpayne@68: jpayne@68: jpayne@68: class Parser: jpayne@68: def __init__(self, _class=None, *, policy=compat32): jpayne@68: """Parser of RFC 2822 and MIME email messages. jpayne@68: jpayne@68: Creates an in-memory object tree representing the email message, which jpayne@68: can then be manipulated and turned over to a Generator to return the jpayne@68: textual representation of the message. jpayne@68: jpayne@68: The string must be formatted as a block of RFC 2822 headers and header jpayne@68: continuation lines, optionally preceded by a `Unix-from' header. The jpayne@68: header block is terminated either by the end of the string or by a jpayne@68: blank line. jpayne@68: jpayne@68: _class is the class to instantiate for new message objects when they jpayne@68: must be created. This class must have a constructor that can take jpayne@68: zero arguments. Default is Message.Message. jpayne@68: jpayne@68: The policy keyword specifies a policy object that controls a number of jpayne@68: aspects of the parser's operation. The default policy maintains jpayne@68: backward compatibility. jpayne@68: jpayne@68: """ jpayne@68: self._class = _class jpayne@68: self.policy = policy jpayne@68: jpayne@68: def parse(self, fp, headersonly=False): jpayne@68: """Create a message structure from the data in a file. jpayne@68: jpayne@68: Reads all the data from the file and returns the root of the message jpayne@68: structure. Optional headersonly is a flag specifying whether to stop jpayne@68: parsing after reading the headers or not. The default is False, jpayne@68: meaning it parses the entire contents of the file. jpayne@68: """ jpayne@68: feedparser = FeedParser(self._class, policy=self.policy) jpayne@68: if headersonly: jpayne@68: feedparser._set_headersonly() jpayne@68: while True: jpayne@68: data = fp.read(8192) jpayne@68: if not data: jpayne@68: break jpayne@68: feedparser.feed(data) jpayne@68: return feedparser.close() jpayne@68: jpayne@68: def parsestr(self, text, headersonly=False): jpayne@68: """Create a message structure from a string. jpayne@68: jpayne@68: Returns the root of the message structure. Optional headersonly is a jpayne@68: flag specifying whether to stop parsing after reading the headers or jpayne@68: not. The default is False, meaning it parses the entire contents of jpayne@68: the file. jpayne@68: """ jpayne@68: return self.parse(StringIO(text), headersonly=headersonly) jpayne@68: jpayne@68: jpayne@68: jpayne@68: class HeaderParser(Parser): jpayne@68: def parse(self, fp, headersonly=True): jpayne@68: return Parser.parse(self, fp, True) jpayne@68: jpayne@68: def parsestr(self, text, headersonly=True): jpayne@68: return Parser.parsestr(self, text, True) jpayne@68: jpayne@68: jpayne@68: class BytesParser: jpayne@68: jpayne@68: def __init__(self, *args, **kw): jpayne@68: """Parser of binary RFC 2822 and MIME email messages. jpayne@68: jpayne@68: Creates an in-memory object tree representing the email message, which jpayne@68: can then be manipulated and turned over to a Generator to return the jpayne@68: textual representation of the message. jpayne@68: jpayne@68: The input must be formatted as a block of RFC 2822 headers and header jpayne@68: continuation lines, optionally preceded by a `Unix-from' header. The jpayne@68: header block is terminated either by the end of the input or by a jpayne@68: blank line. jpayne@68: jpayne@68: _class is the class to instantiate for new message objects when they jpayne@68: must be created. This class must have a constructor that can take jpayne@68: zero arguments. Default is Message.Message. jpayne@68: """ jpayne@68: self.parser = Parser(*args, **kw) jpayne@68: jpayne@68: def parse(self, fp, headersonly=False): jpayne@68: """Create a message structure from the data in a binary file. jpayne@68: jpayne@68: Reads all the data from the file and returns the root of the message jpayne@68: structure. Optional headersonly is a flag specifying whether to stop jpayne@68: parsing after reading the headers or not. The default is False, jpayne@68: meaning it parses the entire contents of the file. jpayne@68: """ jpayne@68: fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') jpayne@68: try: jpayne@68: return self.parser.parse(fp, headersonly) jpayne@68: finally: jpayne@68: fp.detach() jpayne@68: jpayne@68: jpayne@68: def parsebytes(self, text, headersonly=False): jpayne@68: """Create a message structure from a byte string. jpayne@68: jpayne@68: Returns the root of the message structure. Optional headersonly is a jpayne@68: flag specifying whether to stop parsing after reading the headers or jpayne@68: not. The default is False, meaning it parses the entire contents of jpayne@68: the file. jpayne@68: """ jpayne@68: text = text.decode('ASCII', errors='surrogateescape') jpayne@68: return self.parser.parsestr(text, headersonly) jpayne@68: jpayne@68: jpayne@68: class BytesHeaderParser(BytesParser): jpayne@68: def parse(self, fp, headersonly=True): jpayne@68: return BytesParser.parse(self, fp, headersonly=True) jpayne@68: jpayne@68: def parsebytes(self, text, headersonly=True): jpayne@68: return BytesParser.parsebytes(self, text, headersonly=True)