jpayne@69: # Copyright (C) 2002-2007 Python Software Foundation jpayne@69: # Author: Ben Gertzfield jpayne@69: # Contact: email-sig@python.org jpayne@69: jpayne@69: """Base64 content transfer encoding per RFCs 2045-2047. jpayne@69: jpayne@69: This module handles the content transfer encoding method defined in RFC 2045 jpayne@69: to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit jpayne@69: characters encoding known as Base64. jpayne@69: jpayne@69: It is used in the MIME standards for email to attach images, audio, and text jpayne@69: using some 8-bit character sets to messages. jpayne@69: jpayne@69: This module provides an interface to encode and decode both headers and bodies jpayne@69: with Base64 encoding. jpayne@69: jpayne@69: RFC 2045 defines a method for including character set information in an jpayne@69: `encoded-word' in a header. This method is commonly used for 8-bit real names jpayne@69: in To:, From:, Cc:, etc. fields, as well as Subject: lines. jpayne@69: jpayne@69: This module does not do the line wrapping or end-of-line character conversion jpayne@69: necessary for proper internationalized headers; it only does dumb encoding and jpayne@69: decoding. To deal with the various line wrapping issues, use the email.header jpayne@69: module. jpayne@69: """ jpayne@69: jpayne@69: __all__ = [ jpayne@69: 'body_decode', jpayne@69: 'body_encode', jpayne@69: 'decode', jpayne@69: 'decodestring', jpayne@69: 'header_encode', jpayne@69: 'header_length', jpayne@69: ] jpayne@69: jpayne@69: jpayne@69: from base64 import b64encode jpayne@69: from binascii import b2a_base64, a2b_base64 jpayne@69: jpayne@69: CRLF = '\r\n' jpayne@69: NL = '\n' jpayne@69: EMPTYSTRING = '' jpayne@69: jpayne@69: # See also Charset.py jpayne@69: MISC_LEN = 7 jpayne@69: jpayne@69: jpayne@69: jpayne@69: # Helpers jpayne@69: def header_length(bytearray): jpayne@69: """Return the length of s when it is encoded with base64.""" jpayne@69: groups_of_3, leftover = divmod(len(bytearray), 3) jpayne@69: # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. jpayne@69: n = groups_of_3 * 4 jpayne@69: if leftover: jpayne@69: n += 4 jpayne@69: return n jpayne@69: jpayne@69: jpayne@69: jpayne@69: def header_encode(header_bytes, charset='iso-8859-1'): jpayne@69: """Encode a single header line with Base64 encoding in a given charset. jpayne@69: jpayne@69: charset names the character set to use to encode the header. It defaults jpayne@69: to iso-8859-1. Base64 encoding is defined in RFC 2045. jpayne@69: """ jpayne@69: if not header_bytes: jpayne@69: return "" jpayne@69: if isinstance(header_bytes, str): jpayne@69: header_bytes = header_bytes.encode(charset) jpayne@69: encoded = b64encode(header_bytes).decode("ascii") jpayne@69: return '=?%s?b?%s?=' % (charset, encoded) jpayne@69: jpayne@69: jpayne@69: jpayne@69: def body_encode(s, maxlinelen=76, eol=NL): jpayne@69: r"""Encode a string with base64. jpayne@69: jpayne@69: Each line will be wrapped at, at most, maxlinelen characters (defaults to jpayne@69: 76 characters). jpayne@69: jpayne@69: Each line of encoded text will end with eol, which defaults to "\n". Set jpayne@69: this to "\r\n" if you will be using the result of this function directly jpayne@69: in an email. jpayne@69: """ jpayne@69: if not s: jpayne@69: return s jpayne@69: jpayne@69: encvec = [] jpayne@69: max_unencoded = maxlinelen * 3 // 4 jpayne@69: for i in range(0, len(s), max_unencoded): jpayne@69: # BAW: should encode() inherit b2a_base64()'s dubious behavior in jpayne@69: # adding a newline to the encoded string? jpayne@69: enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") jpayne@69: if enc.endswith(NL) and eol != NL: jpayne@69: enc = enc[:-1] + eol jpayne@69: encvec.append(enc) jpayne@69: return EMPTYSTRING.join(encvec) jpayne@69: jpayne@69: jpayne@69: jpayne@69: def decode(string): jpayne@69: """Decode a raw base64 string, returning a bytes object. jpayne@69: jpayne@69: This function does not parse a full MIME header value encoded with jpayne@69: base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high jpayne@69: level email.header class for that functionality. jpayne@69: """ jpayne@69: if not string: jpayne@69: return bytes() jpayne@69: elif isinstance(string, str): jpayne@69: return a2b_base64(string.encode('raw-unicode-escape')) jpayne@69: else: jpayne@69: return a2b_base64(string) jpayne@69: jpayne@69: jpayne@69: # For convenience and backwards compatibility w/ standard base64 module jpayne@69: body_decode = decode jpayne@69: decodestring = decode