使用python获取邮件数据,并写入Excel,邮件内容英文翻译中文

import os, re, time, json
import email

from imaplib import IMAP4_SSL
# 如果要自动发
from smtplib import SMTP_SSL
from bs4 import BeautifulSoup
import openpyxl
from openpyxl import load_workbook

import requests
import execjs  # pip install PyExecJS  # 需要注意, 包的名称:PyExecJS
import time

class AutoEmail(object):

    def __init__(self, account, host, password, maildir, oldmaildir):
        # 配置邮箱账号信息
        self.account = account
        self.host = host
        self.password = password
        self.num = -1
        self.maildir = maildir
        self.oldmaildir = oldmaildir

    def getemail(self):
      try:
          # 连接IMAP企业邮箱
          email_conn = IMAP4_SSL(self.host)
          # 登入邮箱
          email_conn.login(user=self.account, password=self.password)
          return email_conn
      except BaseException as e:
          print("Connect to {0} failed".format(self.host), e)

    def savefile(self, filename, data, path):
        """
        保存带附件的邮件,根据时间创建文件夹保存
        :param filename: 保存的文件名
        :param data: 数据
        :param path: 保存的路径
        :return:
        """
        pass

    def emailfolder(self):
      email_conn = self.getemail()
      folder = []
      for i in email_conn.list()[1]:
          folder.append(i)
      return folder

    def get_body(self, msg):
      if msg.is_multipart():
        return self.get_body(msg.get_payload(0))
      else:
        return msg.get_payload(None, decode=True)

    def receiveremail(self):
        email_conn = self.getemail()
        email_conn.select(self.oldmaildir, readonly=False)
        # email_data 为此文件夹下的所有邮件数据
        status, email_data = email_conn.search(None, 'ALL')
        newlist = email_data[0].split()
        mail_count = len(newlist)
        # print('{}个文件被找到!'.format(mail_count))
        # 这样去定义 取多少条邮件
        # key 为 num val 为{"sender":,"send_time": , "content": ,} 存邮件信息
        mail_data_list = {}
        for num in range(mail_count):
            mail_data_list[num] = {}
            if abs(num) > mail_count:
               break
            # 通过邮箱编号和选择获取什么数据
            typ, data = email_conn.fetch(newlist[num], '(RFC822)')
            # print(data)
            msg_id = str(newlist[num])
            # 用email库获取解析数据
            msg = email.message_from_string(data[0][1].decode('utf-8'))
            if (msg.is_multipart()):
                parts = msg.get_payload()
                for m in parts:
                    ctype = m.get_content_type()
                    # print(ctype)
                    if "multipart" in ctype:
                        body = str(self.get_body(m), encoding='ISO-8859-1')
                        # print(body)
                        subject = re.search(r"Subject.*", body).group()
                        mail_data_list[num]["subject"] = subject
                        sender = re.search(r"From.*", body).group()
                        # print(sender)
                        mail_data_list[num]["sender"] = sender
                        send_time = re.search(r"Date.*", body).group()
                        mail_data_list[num]["send_time"] = send_time
                        content = body
                        mail_data_list[num]["content"] = content
                        tran = GoogleTranslate()
                        encontent = tran.translate(content, en_to_zn=True)
                        mail_data_list[num]["encontent"] = encontent[0]
                        mail_data_list[num]["annex"] = "YES"
                    if "html" in ctype:
                        html = str(m.get_payload(decode=True).decode('utf-8'))
                        soup = BeautifulSoup(html, "lxml")
                        sender = soup.find_all("a")[0].get("href").split(":")[1]
                        # print(sender)
                        send_time =  soup.find_all("b")[1].parent.get_text(strip=True)
                        send_time = re.search(r"(\d{4}-\d{1,2}-\d{1,2})", send_time).group()
                        # print(send_time)
                        # content = soup.find("div", attrs={"style": "BACKGROUND-COLOR: white"}).get_text(strip=True)
                        subject = soup.find_all("b")[-1].parent.get_text(strip=True).replace("Subject:", "")
                        # print(subject)
                        content = soup.find_all("b")[-1].parent.parent.parent.find_next_siblings("div")[0].text
                        mail_data_list[num]["subject"] = subject
                        mail_data_list[num]["sender"] = sender
                        mail_data_list[num]["send_time"] = send_time
                        mail_data_list[num]["content"] = content
                        tran = GoogleTranslate()
                        encontent = tran.translate(content, en_to_zn=True)
                        mail_data_list[num]["encontent"] = encontent[0]
                        mail_data_list[num]["annex"] = "NO"
            # 参考邮件到文件夹
            try:
                msg_id = msg_id.lstrip("b").strip()
                msg_id = int(msg_id.replace("'", ""))
                # print(msg_id)
                # print(type(msg_id))
                # 拷贝到 另外一个文件夹
                res = email_conn.copy(str(msg_id), self.maildir)
                print('copy successful:')
            except BaseException as e:
                # 应该要写入日志中
                print("拷贝邮件失败:", e)
        self.removemail()
        # print(mail_data_list)
        return mail_data_list

    def removemail(self):
        email_conn = self.getemail()
        email_conn.select(self.oldmaildir, readonly=False)
        status, email_data = email_conn.search(None, 'ALL')
        newlist = email_data[0].split()
        mail_count = len(newlist)
        # 不知道什么原因,一次删不完,做个while循环,直到删完。再退出
        while mail_count !=0:
            for num in range(mail_count):
                typ, data = email_conn.fetch(newlist[num], '(RFC822)')
                msg_id = str(newlist[num])
                try:
                  msg_id = msg_id.lstrip("b").strip()
                  msg_id = int(msg_id.replace("'", ""))
                  # 删除邮件
                  email_conn.store(str(msg_id), '+FLAGS', '(\\Deleted)')
                  email_conn.expunge()
                  # print('deleted successful:')
                except BaseException as e:
                  # 应该要写入日志中
                  print("拷贝邮件失败:", e)
            status, email_data = email_conn.search(None, 'ALL')
            newlist = email_data[0].split()
            mail_count = len(newlist)
            # print(mail_count)

class HandleExcel(object):

    def __init__(self):
        pass

    def getexcel(self):
        #  生成excel 句柄
        wb = load_workbook("email.xlsx")
        # sheet 句柄
        sh = wb["emailcontent"]
        return wb, sh


    def writedict(self, info_dict):
        wb, sh = self.getexcel()
        max_row = sh.max_row
        count = sh.cell(row=max_row, column=1)
        if count.value == "None":
          count_num = max_row+1
        else:
          # print(count.value)
          count_num = count.value + 1
        col = 0
        for index in info_dict:
            sh.cell(row=max_row + 1, column=1, value=count_num)
            # 邮箱地址  第六列
            senderaddress = info_dict[index]["sender"]
            sh.cell(row=max_row + 1, column=6, value=senderaddress)
            annex = info_dict[index]["annex"]
            sh.cell(row=max_row + 1, column=2, value=annex)
            # 用户发送邮箱时间 第三列
            send_time = info_dict[index]["send_time"]
            sh.cell(row=max_row + 1, column=3, value=send_time)
            # 用户发送内容 第12列
            content = info_dict[index]["content"]
            sh.cell(row=max_row + 1, column=13, value=content)
            # 用户发送的内容 使用google翻译的结果 第13列
            encontent = info_dict[index]["encontent"]
            sh.cell(row=max_row + 1, column=14, value=encontent)
            # 下一行输入
            count_num = count_num + 1
            max_row = max_row + 1
            wb.save("email.xlsx")  # 保存

class GoogleTranslate(object):
    def __init__(self):
      self.ctx = execjs.compile(  # 下面是一段js代码,从网页中分析得到
        """ function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """)

    def getTk(self, text):  # 计算谷歌的算法值
      return self.ctx.call("TL", text)

    def translate(self, content, en_to_zn=True):
      """
      :param content: 翻译内容
      :param en_to_zn: 是否由英文翻译成中文,默认为True
      :return:
      """
      if len(content) > 4891:
        print("翻译的长度超过限制!!!")
        return
      tk = self.getTk(content)
      param = {'tk': tk, 'q': content}
      # url_zh_to_en = 'https://translate.google.cn/translate_a/single?client=t&sl=zh-CN&tl=en&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=6&tsel=3&kc=1'
      url_en_to_zh = "https://translate.google.cn/translate_a/single?client=t&sl=en &tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss &dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2"

      # 返回的结果为Json,解析为一个嵌套列表
      result = requests.get(url_en_to_zh, params=param)
      results = result.json()[0]
      data = []
      for test in results:
        if test[0]:
          data.append(test[0])
      return '\n'.join(data), data


account = "xxx@xxx.com"
host = "imap.exmail.qq.com"
password = "xxxxx"
# 指定搜索的邮件文件夹
maildir = "&UXZO1mWHTvZZOQ-/oldfs"
# 处理完的邮件移动到哪个文件夹&UXZO1mWHTvZZOQ-/fs
oldmaildir = "&UXZO1mWHTvZZOQ-/fs"

autoemail = AutoEmail(account, host, password, maildir, oldmaildir)
info_dict = autoemail.receiveremail()
# print(info_dict)
excel = HandleExcel()
excel.writedict(info_dict)

  

posted @ 2021-08-31 14:33  Feng_fu  阅读(565)  评论(0编辑  收藏  举报