使用python获取邮件数据,并写入Excel,邮件内容英文翻译中文
import os, re, time, json import email from imaplib import IMAP4_SSL # 如果要自动发 from smtplib import SMTP_SSL from bs4 import BeautifulSoup import openpyxl from openpyxl import load_workbook import requests import execjs # pip install PyExecJS # 需要注意, 包的名称:PyExecJS import time class AutoEmail(object): def __init__(self, account, host, password, maildir, oldmaildir): # 配置邮箱账号信息 self.account = account self.host = host self.password = password self.num = -1 self.maildir = maildir self.oldmaildir = oldmaildir def getemail(self): try: # 连接IMAP企业邮箱 email_conn = IMAP4_SSL(self.host) # 登入邮箱 email_conn.login(user=self.account, password=self.password) return email_conn except BaseException as e: print("Connect to {0} failed".format(self.host), e) def savefile(self, filename, data, path): """ 保存带附件的邮件,根据时间创建文件夹保存 :param filename: 保存的文件名 :param data: 数据 :param path: 保存的路径 :return: """ pass def emailfolder(self): email_conn = self.getemail() folder = [] for i in email_conn.list()[1]: folder.append(i) return folder def get_body(self, msg): if msg.is_multipart(): return self.get_body(msg.get_payload(0)) else: return msg.get_payload(None, decode=True) def receiveremail(self): email_conn = self.getemail() email_conn.select(self.oldmaildir, readonly=False) # email_data 为此文件夹下的所有邮件数据 status, email_data = email_conn.search(None, 'ALL') newlist = email_data[0].split() mail_count = len(newlist) # print('{}个文件被找到!'.format(mail_count)) # 这样去定义 取多少条邮件 # key 为 num val 为{"sender":,"send_time": , "content": ,} 存邮件信息 mail_data_list = {} for num in range(mail_count): mail_data_list[num] = {} if abs(num) > mail_count: break # 通过邮箱编号和选择获取什么数据 typ, data = email_conn.fetch(newlist[num], '(RFC822)') # print(data) msg_id = str(newlist[num]) # 用email库获取解析数据 msg = email.message_from_string(data[0][1].decode('utf-8')) if (msg.is_multipart()): parts = msg.get_payload() for m in parts: ctype = m.get_content_type() # print(ctype) if "multipart" in ctype: body = str(self.get_body(m), encoding='ISO-8859-1') # print(body) subject = re.search(r"Subject.*", body).group() mail_data_list[num]["subject"] = subject sender = re.search(r"From.*", body).group() # print(sender) mail_data_list[num]["sender"] = sender send_time = re.search(r"Date.*", body).group() mail_data_list[num]["send_time"] = send_time content = body mail_data_list[num]["content"] = content tran = GoogleTranslate() encontent = tran.translate(content, en_to_zn=True) mail_data_list[num]["encontent"] = encontent[0] mail_data_list[num]["annex"] = "YES" if "html" in ctype: html = str(m.get_payload(decode=True).decode('utf-8')) soup = BeautifulSoup(html, "lxml") sender = soup.find_all("a")[0].get("href").split(":")[1] # print(sender) send_time = soup.find_all("b")[1].parent.get_text(strip=True) send_time = re.search(r"(\d{4}-\d{1,2}-\d{1,2})", send_time).group() # print(send_time) # content = soup.find("div", attrs={"style": "BACKGROUND-COLOR: white"}).get_text(strip=True) subject = soup.find_all("b")[-1].parent.get_text(strip=True).replace("Subject:", "") # print(subject) content = soup.find_all("b")[-1].parent.parent.parent.find_next_siblings("div")[0].text mail_data_list[num]["subject"] = subject mail_data_list[num]["sender"] = sender mail_data_list[num]["send_time"] = send_time mail_data_list[num]["content"] = content tran = GoogleTranslate() encontent = tran.translate(content, en_to_zn=True) mail_data_list[num]["encontent"] = encontent[0] mail_data_list[num]["annex"] = "NO" # 参考邮件到文件夹 try: msg_id = msg_id.lstrip("b").strip() msg_id = int(msg_id.replace("'", "")) # print(msg_id) # print(type(msg_id)) # 拷贝到 另外一个文件夹 res = email_conn.copy(str(msg_id), self.maildir) print('copy successful:') except BaseException as e: # 应该要写入日志中 print("拷贝邮件失败:", e) self.removemail() # print(mail_data_list) return mail_data_list def removemail(self): email_conn = self.getemail() email_conn.select(self.oldmaildir, readonly=False) status, email_data = email_conn.search(None, 'ALL') newlist = email_data[0].split() mail_count = len(newlist) # 不知道什么原因,一次删不完,做个while循环,直到删完。再退出 while mail_count !=0: for num in range(mail_count): typ, data = email_conn.fetch(newlist[num], '(RFC822)') msg_id = str(newlist[num]) try: msg_id = msg_id.lstrip("b").strip() msg_id = int(msg_id.replace("'", "")) # 删除邮件 email_conn.store(str(msg_id), '+FLAGS', '(\\Deleted)') email_conn.expunge() # print('deleted successful:') except BaseException as e: # 应该要写入日志中 print("拷贝邮件失败:", e) status, email_data = email_conn.search(None, 'ALL') newlist = email_data[0].split() mail_count = len(newlist) # print(mail_count) class HandleExcel(object): def __init__(self): pass def getexcel(self): # 生成excel 句柄 wb = load_workbook("email.xlsx") # sheet 句柄 sh = wb["emailcontent"] return wb, sh def writedict(self, info_dict): wb, sh = self.getexcel() max_row = sh.max_row count = sh.cell(row=max_row, column=1) if count.value == "None": count_num = max_row+1 else: # print(count.value) count_num = count.value + 1 col = 0 for index in info_dict: sh.cell(row=max_row + 1, column=1, value=count_num) # 邮箱地址 第六列 senderaddress = info_dict[index]["sender"] sh.cell(row=max_row + 1, column=6, value=senderaddress) annex = info_dict[index]["annex"] sh.cell(row=max_row + 1, column=2, value=annex) # 用户发送邮箱时间 第三列 send_time = info_dict[index]["send_time"] sh.cell(row=max_row + 1, column=3, value=send_time) # 用户发送内容 第12列 content = info_dict[index]["content"] sh.cell(row=max_row + 1, column=13, value=content) # 用户发送的内容 使用google翻译的结果 第13列 encontent = info_dict[index]["encontent"] sh.cell(row=max_row + 1, column=14, value=encontent) # 下一行输入 count_num = count_num + 1 max_row = max_row + 1 wb.save("email.xlsx") # 保存 class GoogleTranslate(object): def __init__(self): self.ctx = execjs.compile( # 下面是一段js代码,从网页中分析得到 """ function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) def getTk(self, text): # 计算谷歌的算法值 return self.ctx.call("TL", text) def translate(self, content, en_to_zn=True): """ :param content: 翻译内容 :param en_to_zn: 是否由英文翻译成中文,默认为True :return: """ if len(content) > 4891: print("翻译的长度超过限制!!!") return tk = self.getTk(content) param = {'tk': tk, 'q': content} # url_zh_to_en = 'https://translate.google.cn/translate_a/single?client=t&sl=zh-CN&tl=en&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=6&tsel=3&kc=1' url_en_to_zh = "https://translate.google.cn/translate_a/single?client=t&sl=en &tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss &dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2" # 返回的结果为Json,解析为一个嵌套列表 result = requests.get(url_en_to_zh, params=param) results = result.json()[0] data = [] for test in results: if test[0]: data.append(test[0]) return '\n'.join(data), data account = "xxx@xxx.com" host = "imap.exmail.qq.com" password = "xxxxx" # 指定搜索的邮件文件夹 maildir = "&UXZO1mWHTvZZOQ-/oldfs" # 处理完的邮件移动到哪个文件夹&UXZO1mWHTvZZOQ-/fs oldmaildir = "&UXZO1mWHTvZZOQ-/fs" autoemail = AutoEmail(account, host, password, maildir, oldmaildir) info_dict = autoemail.receiveremail() # print(info_dict) excel = HandleExcel() excel.writedict(info_dict)