jpayne@68
|
1 # Copyright (C) 2001-2007 Python Software Foundation
|
jpayne@68
|
2 # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
jpayne@68
|
3 # Contact: email-sig@python.org
|
jpayne@68
|
4
|
jpayne@68
|
5 """A parser of RFC 2822 and MIME email messages."""
|
jpayne@68
|
6
|
jpayne@68
|
7 __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
|
jpayne@68
|
8 'FeedParser', 'BytesFeedParser']
|
jpayne@68
|
9
|
jpayne@68
|
10 from io import StringIO, TextIOWrapper
|
jpayne@68
|
11
|
jpayne@68
|
12 from email.feedparser import FeedParser, BytesFeedParser
|
jpayne@68
|
13 from email._policybase import compat32
|
jpayne@68
|
14
|
jpayne@68
|
15
|
jpayne@68
|
16 class Parser:
|
jpayne@68
|
17 def __init__(self, _class=None, *, policy=compat32):
|
jpayne@68
|
18 """Parser of RFC 2822 and MIME email messages.
|
jpayne@68
|
19
|
jpayne@68
|
20 Creates an in-memory object tree representing the email message, which
|
jpayne@68
|
21 can then be manipulated and turned over to a Generator to return the
|
jpayne@68
|
22 textual representation of the message.
|
jpayne@68
|
23
|
jpayne@68
|
24 The string must be formatted as a block of RFC 2822 headers and header
|
jpayne@68
|
25 continuation lines, optionally preceded by a `Unix-from' header. The
|
jpayne@68
|
26 header block is terminated either by the end of the string or by a
|
jpayne@68
|
27 blank line.
|
jpayne@68
|
28
|
jpayne@68
|
29 _class is the class to instantiate for new message objects when they
|
jpayne@68
|
30 must be created. This class must have a constructor that can take
|
jpayne@68
|
31 zero arguments. Default is Message.Message.
|
jpayne@68
|
32
|
jpayne@68
|
33 The policy keyword specifies a policy object that controls a number of
|
jpayne@68
|
34 aspects of the parser's operation. The default policy maintains
|
jpayne@68
|
35 backward compatibility.
|
jpayne@68
|
36
|
jpayne@68
|
37 """
|
jpayne@68
|
38 self._class = _class
|
jpayne@68
|
39 self.policy = policy
|
jpayne@68
|
40
|
jpayne@68
|
41 def parse(self, fp, headersonly=False):
|
jpayne@68
|
42 """Create a message structure from the data in a file.
|
jpayne@68
|
43
|
jpayne@68
|
44 Reads all the data from the file and returns the root of the message
|
jpayne@68
|
45 structure. Optional headersonly is a flag specifying whether to stop
|
jpayne@68
|
46 parsing after reading the headers or not. The default is False,
|
jpayne@68
|
47 meaning it parses the entire contents of the file.
|
jpayne@68
|
48 """
|
jpayne@68
|
49 feedparser = FeedParser(self._class, policy=self.policy)
|
jpayne@68
|
50 if headersonly:
|
jpayne@68
|
51 feedparser._set_headersonly()
|
jpayne@68
|
52 while True:
|
jpayne@68
|
53 data = fp.read(8192)
|
jpayne@68
|
54 if not data:
|
jpayne@68
|
55 break
|
jpayne@68
|
56 feedparser.feed(data)
|
jpayne@68
|
57 return feedparser.close()
|
jpayne@68
|
58
|
jpayne@68
|
59 def parsestr(self, text, headersonly=False):
|
jpayne@68
|
60 """Create a message structure from a string.
|
jpayne@68
|
61
|
jpayne@68
|
62 Returns the root of the message structure. Optional headersonly is a
|
jpayne@68
|
63 flag specifying whether to stop parsing after reading the headers or
|
jpayne@68
|
64 not. The default is False, meaning it parses the entire contents of
|
jpayne@68
|
65 the file.
|
jpayne@68
|
66 """
|
jpayne@68
|
67 return self.parse(StringIO(text), headersonly=headersonly)
|
jpayne@68
|
68
|
jpayne@68
|
69
|
jpayne@68
|
70
|
jpayne@68
|
71 class HeaderParser(Parser):
|
jpayne@68
|
72 def parse(self, fp, headersonly=True):
|
jpayne@68
|
73 return Parser.parse(self, fp, True)
|
jpayne@68
|
74
|
jpayne@68
|
75 def parsestr(self, text, headersonly=True):
|
jpayne@68
|
76 return Parser.parsestr(self, text, True)
|
jpayne@68
|
77
|
jpayne@68
|
78
|
jpayne@68
|
79 class BytesParser:
|
jpayne@68
|
80
|
jpayne@68
|
81 def __init__(self, *args, **kw):
|
jpayne@68
|
82 """Parser of binary RFC 2822 and MIME email messages.
|
jpayne@68
|
83
|
jpayne@68
|
84 Creates an in-memory object tree representing the email message, which
|
jpayne@68
|
85 can then be manipulated and turned over to a Generator to return the
|
jpayne@68
|
86 textual representation of the message.
|
jpayne@68
|
87
|
jpayne@68
|
88 The input must be formatted as a block of RFC 2822 headers and header
|
jpayne@68
|
89 continuation lines, optionally preceded by a `Unix-from' header. The
|
jpayne@68
|
90 header block is terminated either by the end of the input or by a
|
jpayne@68
|
91 blank line.
|
jpayne@68
|
92
|
jpayne@68
|
93 _class is the class to instantiate for new message objects when they
|
jpayne@68
|
94 must be created. This class must have a constructor that can take
|
jpayne@68
|
95 zero arguments. Default is Message.Message.
|
jpayne@68
|
96 """
|
jpayne@68
|
97 self.parser = Parser(*args, **kw)
|
jpayne@68
|
98
|
jpayne@68
|
99 def parse(self, fp, headersonly=False):
|
jpayne@68
|
100 """Create a message structure from the data in a binary file.
|
jpayne@68
|
101
|
jpayne@68
|
102 Reads all the data from the file and returns the root of the message
|
jpayne@68
|
103 structure. Optional headersonly is a flag specifying whether to stop
|
jpayne@68
|
104 parsing after reading the headers or not. The default is False,
|
jpayne@68
|
105 meaning it parses the entire contents of the file.
|
jpayne@68
|
106 """
|
jpayne@68
|
107 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
|
jpayne@68
|
108 try:
|
jpayne@68
|
109 return self.parser.parse(fp, headersonly)
|
jpayne@68
|
110 finally:
|
jpayne@68
|
111 fp.detach()
|
jpayne@68
|
112
|
jpayne@68
|
113
|
jpayne@68
|
114 def parsebytes(self, text, headersonly=False):
|
jpayne@68
|
115 """Create a message structure from a byte string.
|
jpayne@68
|
116
|
jpayne@68
|
117 Returns the root of the message structure. Optional headersonly is a
|
jpayne@68
|
118 flag specifying whether to stop parsing after reading the headers or
|
jpayne@68
|
119 not. The default is False, meaning it parses the entire contents of
|
jpayne@68
|
120 the file.
|
jpayne@68
|
121 """
|
jpayne@68
|
122 text = text.decode('ASCII', errors='surrogateescape')
|
jpayne@68
|
123 return self.parser.parsestr(text, headersonly)
|
jpayne@68
|
124
|
jpayne@68
|
125
|
jpayne@68
|
126 class BytesHeaderParser(BytesParser):
|
jpayne@68
|
127 def parse(self, fp, headersonly=True):
|
jpayne@68
|
128 return BytesParser.parse(self, fp, headersonly=True)
|
jpayne@68
|
129
|
jpayne@68
|
130 def parsebytes(self, text, headersonly=True):
|
jpayne@68
|
131 return BytesParser.parsebytes(self, text, headersonly=True)
|