import imaplib mail = imaplib.IMAP4_SSL('imap.gmail.com') mail.login('login@gmail.com', 'password') mail.list() # Out: list of "folders" aka labels in gmail. mail.select("inbox") # connect to inbox. #Get an email result, data = mail.uid('fetch', b'1', '(RFC822)') raw_email = data[0][1] email_message = email.message_from_bytes(raw_email) maintype = email_message.get_content_maintype() #HERE COMES TROUBLES - if hmtl will be base64 string if maintype == 'multipart': for part in email_message.get_payload(): print(part.get_content_maintype()) if part.get_content_maintype() == 'text': html = str(part.get_payload()) elif maintype == 'text': html = str(email_message.get_payload()) #Now I Can parse HTML if html is not None: soup = BeautifulSoup(html, 'html.parser')
一些字母带有base64编码。怎么解码呢? base64.b64encode(some_string)-没有帮助
import email raw_email = data[0][1] email_message = email.message_from_string(raw_email) print email_message['To'] print email.utils.parseaddr(email_message['From']) # for parsing "Yuji Tomita" <yuji@grovemade.com> print email_message.items() # print all headers def get_first_text_block(self, email_message_instance): maintype = email_message_instance.get_content_maintype() if maintype == 'multipart': for part in email_message_instance.get_payload(): if part.get_content_maintype() == 'text': return part.get_payload() elif maintype == 'text': return email_message_instance.get_payload()
**我没有写这个取自https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example- with-gmail/