我们从Python开源项目中,提取了以下46个代码示例,用于说明如何使用email.header.decode_header()。
def split_email(self, raw_email): parsed_email = BytesParser().parsebytes(raw_email) to_keep = [] attachments = [] if parsed_email.is_multipart(): for p in parsed_email.get_payload(): if p.get_filename(): filename = decode_header(p.get_filename()) if filename[0][1]: filename = filename[0][0].decode(filename[0][1]) else: filename = filename[0][0] attachments.append(File(p.get_payload(decode=True), filename)) else: to_keep.append(p) else: to_keep.append(parsed_email.get_payload()) return to_keep, attachments, parsed_email
def decode_mail_header(value, default_charset='us-ascii'): """ Decode a header value into a unicode string. """ try: headers = decode_header(value) except email.errors.HeaderParseError: return str_decode(str_encode(value, default_charset, 'replace'), default_charset) else: for index, (text, charset) in enumerate(headers): logger.debug("Mail header no. {}: {} encoding {}".format(index, str_decode(text, charset or 'utf-8'), charset)) try: headers[index] = str_decode(text, charset or default_charset, 'replace') except LookupError: # if the charset is unknown, force default headers[index] = str_decode(text, default_charset, 'replace') return ''.join(headers)
def test_rfc2047_multiline(self): eq = self.assertEqual s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" dh = decode_header(s) eq(dh, [ (b'Re:', None), (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), (b'baz foo bar', None), (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) header = make_header(dh) eq(str(header), 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= =?mac-iceland?q?=9Arg=8Cs?=""")
def get_attachments(self) -> Generator: """ Attachments of the mail message (generator) :return: generator of tuple(filename: str, payload: bytes) """ for part in self.obj.walk(): # multipart/* are just containers if part.get_content_maintype() == 'multipart': continue if part.get('Content-Disposition') is None: continue filename = part.get_filename() if not part.get_filename(): continue # this is what happens when Content-Disposition = inline filename = self._decode_value(*decode_header(filename)[0]) payload = part.get_payload(decode=True) if not payload: continue yield filename, payload
def getSubject(self, i): conn = self.conn id_list = conn.search(None, '(UNSEEN)')[1][0].split() try: email_id = id_list[i] except IndexError: return None data = conn.fetch(email_id, '(BODY.PEEK[HEADER.FIELDS (SUBJECT)])')[1] if not PY3: msg = message_from_string(data[0][1]) s, encoding = decode_header(msg['Subject'])[0] subject = s.decode(encoding or 'utf-8').encode('utf-8') else: msg = message_from_bytes(data[0][1]) s, encoding = decode_header(msg['Subject'])[0] subject = s if type(s) is str else s.decode(encoding or 'utf-8') return subject
def test_rfc2047_multiline(self): eq = self.assertEqual s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" dh = decode_header(s) eq(dh, [ (b'Re: ', None), (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), (b' baz foo bar ', None), (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) header = make_header(dh) eq(str(header), 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= =?mac-iceland?q?=9Arg=8Cs?=""")
def test_rfc2047_multiline(self): eq = self.assertEqual s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" dh = decode_header(s) eq(dh, [ ('Re:', None), ('r\x8aksm\x9arg\x8cs', 'mac-iceland'), ('baz foo bar', None), ('r\x8aksm\x9arg\x8cs', 'mac-iceland')]) eq(str(make_header(dh)), """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""")
def test_whitespace_eater_unicode_2(self): eq = self.assertEqual s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' dh = decode_header(s) eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'), ('jumped over the', None), ('lazy dog', 'iso-8859-1')]) hu = make_header(dh).__unicode__() eq(hu, u'The quick brown fox jumped over the lazy dog')
def test_rfc2047_missing_whitespace(self): s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' dh = decode_header(s) self.assertEqual(dh, [(s, None)])
def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' dh = decode_header(s) self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), ('rg', None), ('\xe5', 'iso-8859-1'), ('sbord', None)]) # Test the MIMEMessage class
def test_us_ascii_header(self): eq = self.assertEqual s = 'hello' x = decode_header(s) eq(x, [('hello', None)]) h = make_header(x) eq(s, h.encode())
def test_encoded_adjacent_nonencoded(self): eq = self.assertEqual h = Header() h.append('hello', 'iso-8859-1') h.append('world') s = h.encode() eq(s, '=?iso-8859-1?q?hello?= world') h = make_header(decode_header(s)) eq(h.encode(), s)
def test_whitespace_eater(self): eq = self.assertEqual s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' parts = decode_header(s) eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)]) hdr = make_header(parts) eq(hdr.encode(), 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
def test_broken_base64_header(self): raises = self.assertRaises s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?=' raises(errors.HeaderParseError, decode_header, s) # Test RFC 2231 header parameters (en/de)coding
def prepare_header(value): if value is None: return value return SEPARATOR.join([maybe_decode(value, encoding) for value, encoding in decode_header(value)])
def decode_subject(subject): pieces = [] for piece, charset in decode_header(subject): if charset is None: charset = "ascii" pieces.append(piece.decode(charset, "replace")) return "".join(pieces)
def test_broken_base64_header(self): raises = self.assertRaises s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' raises(errors.HeaderParseError, decode_header, s) # Test RFC 2231 header parameters (en/de)coding
def test_japanese_codecs(self): eq = self.ndiffAssertEqual j = Charset("euc-jp") g = Charset("iso-8859-1") h = Header("Hello World!") jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' ghello = 'Gr\xfc\xdf Gott!' h.append(jhello, j) h.append(ghello, g) # BAW: This used to -- and maybe should -- fold the two iso-8859-1 # chunks into a single encoded word. However it doesn't violate the # standard to have them as two encoded chunks and maybe it's # reasonable <wink> for each .append() call to result in a separate # encoded word. eq(h.encode(), """\ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") eq(decode_header(h.encode()), [('Hello World!', None), ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' h = Header(long, j, header_name="Subject") # test a very long header enc = h.encode() # TK: splitting point may differ by codec design and/or Header encoding eq(enc , """\ =?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") # TK: full decode comparison eq(h.__unicode__().encode('euc-jp'), long)
def decode_str(s): value, charset = decode_header(s)[0] if charset: value = value.decode(charset) return value
def decode_str(s): value, charset = decode_header(s)[0] if charset: value = value.decode(charset) return value #?????????
def really_decode_header(encoded_header): return ''.join([decoded.decode(charset or 'utf-8') for (decoded, charset) in decode_header(encoded_header)])
def header_represent(f, r): from email.header import decode_header text, encoding = decode_header(f)[0] if encoding: text = text.decode(encoding).encode('utf-8') return text
def decode_field(field): res = field if type(field) == bytes: res = field.decode() _bytes, encoding = decode_header(res)[0] if encoding: res = _bytes.decode(encoding) return res