jpayne@68: # Copyright (C) 2002-2007 Python Software Foundation jpayne@68: # Author: Ben Gertzfield jpayne@68: # Contact: email-sig@python.org jpayne@68: jpayne@68: """Base64 content transfer encoding per RFCs 2045-2047. jpayne@68: jpayne@68: This module handles the content transfer encoding method defined in RFC 2045 jpayne@68: to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit jpayne@68: characters encoding known as Base64. jpayne@68: jpayne@68: It is used in the MIME standards for email to attach images, audio, and text jpayne@68: using some 8-bit character sets to messages. jpayne@68: jpayne@68: This module provides an interface to encode and decode both headers and bodies jpayne@68: with Base64 encoding. jpayne@68: jpayne@68: RFC 2045 defines a method for including character set information in an jpayne@68: `encoded-word' in a header. This method is commonly used for 8-bit real names jpayne@68: in To:, From:, Cc:, etc. fields, as well as Subject: lines. jpayne@68: jpayne@68: This module does not do the line wrapping or end-of-line character conversion jpayne@68: necessary for proper internationalized headers; it only does dumb encoding and jpayne@68: decoding. To deal with the various line wrapping issues, use the email.header jpayne@68: module. jpayne@68: """ jpayne@68: jpayne@68: __all__ = [ jpayne@68: 'body_decode', jpayne@68: 'body_encode', jpayne@68: 'decode', jpayne@68: 'decodestring', jpayne@68: 'header_encode', jpayne@68: 'header_length', jpayne@68: ] jpayne@68: jpayne@68: jpayne@68: from base64 import b64encode jpayne@68: from binascii import b2a_base64, a2b_base64 jpayne@68: jpayne@68: CRLF = '\r\n' jpayne@68: NL = '\n' jpayne@68: EMPTYSTRING = '' jpayne@68: jpayne@68: # See also Charset.py jpayne@68: MISC_LEN = 7 jpayne@68: jpayne@68: jpayne@68: jpayne@68: # Helpers jpayne@68: def header_length(bytearray): jpayne@68: """Return the length of s when it is encoded with base64.""" jpayne@68: groups_of_3, leftover = divmod(len(bytearray), 3) jpayne@68: # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. jpayne@68: n = groups_of_3 * 4 jpayne@68: if leftover: jpayne@68: n += 4 jpayne@68: return n jpayne@68: jpayne@68: jpayne@68: jpayne@68: def header_encode(header_bytes, charset='iso-8859-1'): jpayne@68: """Encode a single header line with Base64 encoding in a given charset. jpayne@68: jpayne@68: charset names the character set to use to encode the header. It defaults jpayne@68: to iso-8859-1. Base64 encoding is defined in RFC 2045. jpayne@68: """ jpayne@68: if not header_bytes: jpayne@68: return "" jpayne@68: if isinstance(header_bytes, str): jpayne@68: header_bytes = header_bytes.encode(charset) jpayne@68: encoded = b64encode(header_bytes).decode("ascii") jpayne@68: return '=?%s?b?%s?=' % (charset, encoded) jpayne@68: jpayne@68: jpayne@68: jpayne@68: def body_encode(s, maxlinelen=76, eol=NL): jpayne@68: r"""Encode a string with base64. jpayne@68: jpayne@68: Each line will be wrapped at, at most, maxlinelen characters (defaults to jpayne@68: 76 characters). jpayne@68: jpayne@68: Each line of encoded text will end with eol, which defaults to "\n". Set jpayne@68: this to "\r\n" if you will be using the result of this function directly jpayne@68: in an email. jpayne@68: """ jpayne@68: if not s: jpayne@68: return s jpayne@68: jpayne@68: encvec = [] jpayne@68: max_unencoded = maxlinelen * 3 // 4 jpayne@68: for i in range(0, len(s), max_unencoded): jpayne@68: # BAW: should encode() inherit b2a_base64()'s dubious behavior in jpayne@68: # adding a newline to the encoded string? jpayne@68: enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") jpayne@68: if enc.endswith(NL) and eol != NL: jpayne@68: enc = enc[:-1] + eol jpayne@68: encvec.append(enc) jpayne@68: return EMPTYSTRING.join(encvec) jpayne@68: jpayne@68: jpayne@68: jpayne@68: def decode(string): jpayne@68: """Decode a raw base64 string, returning a bytes object. jpayne@68: jpayne@68: This function does not parse a full MIME header value encoded with jpayne@68: base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high jpayne@68: level email.header class for that functionality. jpayne@68: """ jpayne@68: if not string: jpayne@68: return bytes() jpayne@68: elif isinstance(string, str): jpayne@68: return a2b_base64(string.encode('raw-unicode-escape')) jpayne@68: else: jpayne@68: return a2b_base64(string) jpayne@68: jpayne@68: jpayne@68: # For convenience and backwards compatibility w/ standard base64 module jpayne@68: body_decode = decode jpayne@68: decodestring = decode