python3使用imaplib获取邮件

imaplib 获取邮件，email解析邮件

config文件中存有路径

1 # config.py
2 FILE_PATH_PREFIX = os.getcwd() + '/static/'
3 FILE_PATH_PREFIX_ALIAS = "/static/"  
4 FILE_DOMAIN_PREFIX = 'http://0.0.0.0:8090'

utils.py中的 file_path 方法

1 # 根据当前时间创建的文件夹，先检测再创建；
2 def file_path(file_path):
3     dayTime = datetime.now().strftime('%Y-%m-%d')
4     pwd = file_path + dayTime + '/'
5     isExists = os.path.exists(pwd)
6     if not isExists:
7         os.makedirs(pwd)
8     return pwd

imaplib 获取邮件，email解析邮件

get_email()函数的参数根据项目实际情况传。

旧版内容：

  1 import imaplib
  2 import email
  3 import re
  4 import time
  5 from email.header import decode_header, Header
  6 from datetime import datetime, timedelta
  7 from email.utils import parseaddr
  8 import util
  9 from config import FILE_PATH_PREFIX, FILE_DOMAIN_PREFIX, FILE_PATH_PREFIX_ALIAS
 10 import os
 11 
 12 imaplib.Commands['ID'] = ('AUTH')
 13 
 14 
 15 def decode_data(content, added_encode=None):
 16     """解码"""
 17 
 18     def _decode(bytes_, msg_charset):
 19         try:
 20             if isinstance(bytes_, bytes):
 21                 return str(bytes_, encoding=msg_charset)
 22             else:
 23                 return str(bytes_).split(' ')[0]
 24         except Exception as e:
 25             return None
 26 
 27     encodes = ['UTF-8', 'GBK', 'GB2312']
 28     if added_encode:
 29         encodes = [added_encode] + encodes
 30     for encoding in encodes:
 31         if r'\u' in str(content):
 32             str_data = _decode(content, 'unicode-escape')
 33         else:
 34             str_data = _decode(content, encoding)
 35         if str_data is not None:
 36             return str_data
 37     return None
 38 
 39 
 40 def get_local_time_stamp(msg, date):
 41     """将邮箱时间转换为北京时间"""
 42     if date is None:
 43         if msg['Received']:
 44             date = msg['Received'].split(';')[-1].strip()
 45         else:
 46             return None
 47     if ',' not in date:
 48         date = msg['Received'].split(';')[-1].strip()
 49     result = re.search(r"[\-+]\d+", date)
 50     if result:
 51         time_area = result.group()
 52         symbol = time_area[0]
 53         offset = int(time_area[1]) + int(time_area[2])
 54         date_re = re.compile(r'[(](.*?)[)]', re.S)
 55         time_zone = re.findall(date_re, date)
 56         if time_zone:
 57             format_str = '%a, %d %b %Y %H:%M:%S ' + time_area + ' ({})'.format(time_zone[0])
 58         else:
 59             format_str = '%a, %d %b %Y %H:%M:%S ' + time_area
 60         if symbol == "+":
 61             utc_time = time.strptime(date.strip(), format_str)
 62             temps_time = datetime.fromtimestamp(time.mktime(utc_time))
 63             if offset > 8:
 64                 offset = offset - 8
 65             elif offset < 8:
 66                 offset = 8 - offset
 67             else:
 68                 offset = 0
 69             local_temps_time = temps_time + timedelta(hours=offset)
 70         else:
 71             utc_time = time.strptime(date.strip(), format_str)
 72             temps_time = datetime.fromtimestamp(time.mktime(utc_time))
 73             local_temps_time = temps_time + timedelta(hours=(offset + 8))
 74         return local_temps_time
 75     else:
 76         time_zone = date[-3:]
 77         format_str = '%a, %d %b %Y %H:%M:%S {}'.format(time_zone)
 78         utc_time = time.strptime(date.strip(), format_str)
 79         temps_time = datetime.fromtimestamp(time.mktime(utc_time))
 80         if time_zone == 'UTC' or time_zone == 'GMT':
 81             hours_ = 8
 82         elif time_zone == 'CDT':
 83             hours_ = 13
 84         else:
 85             hours_ = 0
 86         local_temps_time = temps_time + timedelta(hours=hours_)
 87         return local_temps_time
 88 
 89 
 90 def parse_email_body(message):
 91     """解析内容"""
 92     content_list = []
 93     for part in message.walk():
 94         if not part.is_multipart():
 95             charset = part.get_charset()
 96             contentType = part.get_content_type()
 97             if contentType == 'text/plain' or contentType == 'text/html':
 98                 mail_content = decode_data(part.get_payload(decode=True), charset)
 99                 content_list.append(mail_content)
100     for i in content_list:
101         if 'html' in i:
102             content = i
103             return content
104     return content_list[0]
105 
106 
107 def parse_email_annex(message, client_id, from_email, send_email, mail_subject, mail_content, email_time):
108     """ 解析保存附件 """
109     annex_list = []
110     for part in message.walk():
111         # 获取附件名称类型
112         file_name = part.get_filename()
113         if not part.is_multipart():
114             if file_name:
115                 # 附件内容，先检测是否已存有该附件再保存
116                 file_name = decode_header(Header(file_name))
117                 annex_name = file_name[0][0]
118                 if file_name[0][1]:
119                     value, charset = decode_header(str(annex_name, file_name[0][1]))[0]
120                     annex_name = decode_data(value, charset)
121                 pwd = util.file_path(FILE_PATH_PREFIX + 'annex/')
122                 id_ = str(int(time.time()))
123                 url_ = FILE_DOMAIN_PREFIX + FILE_PATH_PREFIX_ALIAS + 'annex/' + pwd.split('/')[-2] + '/' + id_ + '_' \
124                        + annex_name
125                 path_ = pwd + id_ + '_' + annex_name
126                 if not os.path.isfile(path_):
127                     fp = open(path_, 'wb')
128                     fp.write(part.get_payload(decode=True))
129                     fp.close()
130                 annex_list.append({'annexName': annex_name, 'annexUrl': url_, 'annexPath': path_})
131     return annex_list
132 
133 
134 def get_email(server, username, password, send_email, client_id, created_by):
135     """
136     获取邮件信息并保存
137     https://www.docs4dev.com/docs/zh/python/3.7.2rc1/all/library-imaplib.html
138     """
139     # if first_run:
140     #     mail_status = 'All'
141     #     mail_num = -30
142     # else:
143     #     mail_status = 'UnSeen'
144     #     mail_num = -10
145     try:
146         imap = imaplib.IMAP4_SSL(server)
147         try:
148             imap.login(username, password)
149         except Exception as e:
150             print('账号:{} 登录失败:{}'.format(username, e))
151         else:
152             # 通过遍历查看imap中有哪些mailbox的值可以选择：
153             # for i in imap.list()[1]:
154             #     print('i:', i)
155             # mailbox：INBOX(默认收件箱)/Drafts(草稿箱)/Junk(垃圾箱)/Trash(已删除)/Sent(已发送)
156             mail_box = ['INBOX']
157             # 针对网易邮箱被阻止：https://blog.csdn.net/jony_online/article/details/108638571
158             args = ("name", username, "contact", username, "version", "1.0.0", "vendor", "myclient")
159             imap._simple_command('ID', '("' + '" "'.join(args) + '")')
160             for i in mail_box:
161                 try:
162                     imap.select(mailbox=i)
163                     typ, data = imap.search(None, 'All')  # UnSeen 未读邮件
164                 except:
165                     imap.select()
166                     typ, data = imap.search(None, 'All')
167                 for num in data[0].split()[-10:]:
168                     typ, data = imap.fetch(num, '(RFC822)')
169                     str_message = decode_data(data[0][1])
170                     message = email.message_from_string(str_message)
171                     sub = message.get('subject')
172                     if sub:
173                         mail_subject = ''
174                         msgCharset = ''
175                         for i in range(len(decode_header(sub))):
176                             subject_, charset = decode_header(sub)[i]
177                             msgCharset = charset
178                             if charset is not None:
179                                 # subject_ = subject_.decode(charset)
180                                 subject_ = decode_data(subject_, charset)
181                                 mail_subject += subject_
182                             else:
183                                 subject_ = decode_data(subject_, charset)
184                                 mail_subject += subject_
185                         # print('mail_subject:', mail_subject)
186                         from_email = parseaddr(message.get('from'))[1]  # 发件人邮箱
187                         from_name = message.get('from').split('<')[0].strip()
188                         if '"' in from_name:
189                             from_name = from_name.strip('"')
190                         from_name = decode_data(decode_header(from_name)[0][0], msgCharset)  # 发件人名称
191                         # to_email = parseaddr(message.get('to'))[1]  # 收件人邮箱
192                         date_ = get_local_time_stamp(message, message.get('date'))
193                         if date_ is not None:
194                             email_time = date_.timestamp()  # 收件时间
195                         else:
196                             email_time = time.time()
197                         mail_content = parse_email_body(message)
198                         # 将获取到的客户端上的邮件改为已读状态
199                         # imap.store(num, '+FLAGS', '(\\Seen)')
200             imap.close()
201             imap.logout()
202     except:
203         pass

View Code

更新后的内容：

  1 import imaplib
  2 import email
  3 import re
  4 import time
  5 from email.header import decode_header, Header
  6 from datetime import datetime, timedelta
  7 from email.utils import parseaddr
  8 import util
  9 from config import FILE_PATH_PREFIX, FILE_DOMAIN_PREFIX, FILE_PATH_PREFIX_ALIAS
 10 from model import EmailContentModel
 11 from util import id_generator
 12 import os
 13 from func_timeout import func_set_timeout
 14 
 15 
 16 imaplib.Commands['ID'] = ('AUTH')
 17 
 18 
 19 def decode_data(content, added_encode=None):
 20     """解码"""
 21 
 22     def _decode(bytes_, msg_charset):
 23         try:
 24             if isinstance(bytes_, bytes):
 25                 return str(bytes_, encoding=msg_charset)
 26             else:
 27                 return str(bytes_).split(' ')[0]
 28         except Exception as e:
 29             return None
 30 
 31     encodes = ['UTF-8', 'GBK', 'GB2312']
 32     if added_encode:
 33         encodes = [added_encode] + encodes
 34     for encoding in encodes:
 35         if r'\u' in str(content):
 36             str_data = _decode(content, 'unicode-escape')
 37         else:
 38             str_data = _decode(content, encoding)
 39         if str_data is not None:
 40             return str_data
 41     return None
 42 
 43 
 44 def parse_email_body(message):
 45     """解析内容"""
 46     content_list = []
 47     for part in message.walk():
 48         if not part.is_multipart():
 49             charset = part.get_charset()
 50             contentType = part.get_content_type()
 51             if contentType == 'text/plain' or contentType == 'text/html':
 52                 mail_content = decode_data(part.get_payload(decode=True), charset)
 53                 content_list.append(mail_content)
 54     content_list = list(set(content_list))
 55     if len(content_list) > 0:
 56         content_list = ''.join(content_list)
 57     else:
 58         content_list = content_list[0]
 59     return content_list
 60 
 61 
 62 def local_time(time_):
 63     date = str(time_)
 64     result = re.search(r"[\-+]\d+", date)
 65     if result:
 66         time_area = result.group()
 67         symbol = time_area[0]
 68         offset = int(time_area[1]) + int(time_area[2])
 69         date_re = re.compile(r'[(](.*?)[)]', re.S)
 70         time_zone = re.findall(date_re, date)
 71         if time_zone:
 72             format_str = '%a, %d %b %Y %H:%M:%S ' + time_area + ' ({})'.format(time_zone[0])
 73         else:
 74             format_str = '%a, %d %b %Y %H:%M:%S ' + time_area
 75         if symbol == "+":
 76             utc_time = time.strptime(date.strip(), format_str)
 77             temps_time = datetime.fromtimestamp(time.mktime(utc_time))
 78             if offset > 8:
 79                 offset = offset - 8
 80             elif offset < 8:
 81                 offset = 8 - offset
 82             else:
 83                 offset = 0
 84             local_temps_time = temps_time + timedelta(hours=offset)
 85         else:
 86             utc_time = time.strptime(date.strip(), format_str)
 87             temps_time = datetime.fromtimestamp(time.mktime(utc_time))
 88             local_temps_time = temps_time + timedelta(hours=(offset + 8))
 89         return local_temps_time
 90     else:
 91         time_zone = date[-3:]
 92         format_str = '%a, %d %b %Y %H:%M:%S {}'.format(time_zone)
 93         utc_time = time.strptime(date.strip(), format_str)
 94         temps_time = datetime.fromtimestamp(time.mktime(utc_time))
 95         if time_zone == 'UTC' or time_zone == 'GMT':
 96             hours_ = 8
 97         elif time_zone == 'CDT':
 98             hours_ = 13
 99         else:
100             hours_ = 0
101         local_temps_time = temps_time + timedelta(hours=hours_)
102         return local_temps_time
103 
104 
105 # # search('FROM','abc@outlook.com',conn)  根据输入的条件查找特定的邮件
106 # def search(key, value, conn):
107 #     result, data = conn.search(None, key, '"()"'.format(value))
108 #     return data
109 
110 
111 # 获取附件
112 def get_attachements(msg):
113     annex_list = []
114     for part in msg.walk():
115         if part.get_content_maintype() == 'multipart':
116             continue
117         if part.get('Content-Disposition') is None:
118             continue
119         filename = part.get_filename()
120         if bool(filename):
121             pwd = util.file_path(FILE_PATH_PREFIX + 'annex/')
122             date_time = str(int(time.time()))
123             url_ = FILE_DOMAIN_PREFIX + FILE_PATH_PREFIX_ALIAS + 'annex/' + pwd.split('/')[-2] + '/' + date_time + '_' \
124                    + filename
125             path_ = pwd + date_time + '_' + filename
126             if not os.path.isfile(path_):
127                 with open(path_, 'wb') as f:
128                     f.write(part.get_payload(decode=True))
129             annex_list.append({'annexName': filename, 'annexUrl': url_, 'annexPath': path_})
130     return annex_list
131 
132 
133 @func_set_timeout(200)
134 def get_email(server, username, password, send_email, client_id, created_by, mail_status, mail_num):
135     """
136     获取邮件信息并保存
137     https://www.docs4dev.com/docs/zh/python/3.7.2rc1/all/library-imaplib.html
138     """
139     try:
140         imap = imaplib.IMAP4_SSL(server)
141         try:
142             imap.login(username, password)
143         except Exception as e:
144             print('账号:{} 登录失败:{}'.format(username, e))
145         else:
146             # 通过遍历查看imap中有哪些mailbox的值可以选择：
147             # for i in imap.list()[1]:
148             #     print('i:', i)
149             # mailbox：INBOX(默认收件箱)/Drafts(草稿箱)/Junk(垃圾箱)/Trash(已删除)/Sent(已发送)
150             # mail_box = ['INBOX', 'Junk']
151             # 针对网易邮箱被阻止：https://blog.csdn.net/jony_online/article/details/108638571
152             args = ("name", username, "contact", username, "version", "1.0.0", "vendor", "myclient")
153             imap._simple_command('ID', '("' + '" "'.join(args) + '")')
154             imap.select(mailbox='INBOX', readonly=True)
155             typ, data = imap.search(None, mail_status)  # UnSeen 未读邮件
156             for num in data[0].split()[mail_num:]:
157                 # 获取邮件标识id
158                 # data, data = imap.fetch(num, '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
159                 typ, data = imap.fetch(num, '(RFC822)')
160                 message = email.message_from_bytes(data[0][1])
161                 r = re.search('<(.*)>', str(email.header.make_header(email.header.decode_header(message['Message-Id']))))
162                 message_id = r.group(1)
163                 email_data = EmailContentModel.find_by_message_id(message_id)
164                 if not email_data:
165                     mail_subject = email.header.make_header(email.header.decode_header(message['Subject']))
166                     from_email_list = email.header.make_header(email.header.decode_header(message['From']))
167                     from_email = parseaddr(str(from_email_list))[1]
168                     from_name = parseaddr(str(from_email_list))[0]
169                     to_email_list = email.header.make_header(email.header.decode_header(message['To']))
170                     if str(to_email_list)[0] == '<' and str(to_email_list)[-1] == '>':
171                         r_ = re.search('<(.*)>', str(to_email_list))
172                         to_email_list = r_.group(1)
173                     if ',' in str(to_email_list):
174                         to_email = str(to_email_list).split(',')
175                     else:
176                         to_email = [send_email]
177                     # to_name = parseaddr(str(to_email_list))[0]
178                     date_time = email.header.make_header(email.header.decode_header(message['Date']))
179                     email_time = local_time(date_time)
180                     mail_content = parse_email_body(message)
181                     # print("邮件内容是{}".format(body))
182                     annex_list = get_attachements(message)
183                 else:
184                     pass
185             imap.close()
186             imap.logout()
187     except:
188         # print('imaplib.IMAP4_SSL(server)：连接超时')
189         return None

View Code

在 flask 项目中使用调度器使 get_email() 函数自动后台运行获取邮件

 1 ＃ app.py
 2 from resource.imap_receive_email import save_email_task
 3 from apscheduler.schedulers.background import BackgroundScheduler
 4 from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor
 5 
 6 app = Flask(__name__)
 7 ...
 8 api.add_resource(Login, '/login')
 9 
10 # 使用调度器
11 executors = {
12     # 执行器的线程与进程数
13     'default': ThreadPoolExecutor(10),
14     'processpool': ProcessPoolExecutor(10)
15 }
16 job_defaults = {
17     # 最近多久时间内允许存在的任务数
18     'misfire_grace_time': 10,
19     # 该定时任务允许最大的实例个数
20     'max_instances': 10,
21     # 是否运行一次最新的任务，当多个任务堆积时
22     'coalesce': True
23 }
24 
25 scheduler = BackgroundScheduler(executors=executors, job_defaults=job_defaults)
26 scheduler.add_job(func=save_email_task, trigger='interval', seconds=30, replace_existing=True, max_instances=10)
27 try:
28     scheduler.start()
29 except (KeyboardInterrupt, SystemExit):
30     scheduler.shutdown()
31 
32 if __name__ == '__main__':
33     app.run(
34         host='0.0.0.0',
35         port=8090,
36         debug=DEBUG
37     )

任务 save_email_task：

1 def save_email_task():
2     # 先从数据库中获取邮箱的信息
3     email = EmailModel.object(...)
4     # 获取每个邮箱下的邮件
5     get_email(email['imapServer'], email['Username'], email['password']...)

poplib 获取邮件

参考：廖雪峰pop3收取邮件

（以下代码未测试，不可用）

  1 # -*- coding: utf-8 -*-
  2 
  3 import poplib
  4 import email
  5 import time
  6 from email.parser import Parser
  7 from email.header import decode_header
  8 from email.utils import parseaddr
  9 
 10 
 11 # 字符编码转换
 12 def decode_str(str_in):
 13     try:
 14         value, charset = decode_header(str_in)[0]
 15         if charset:
 16             value = value.decode(charset)
 17         return value
 18     except:
 19         return str_in
 20 
 21 
 22 def guess_charset(msg):
 23     charset = msg.get_charset()
 24     if charset is None:
 25         content_type = msg.get('Content-Type', '').lower()
 26         pos = content_type.find('charset=')
 27         if pos >= 0:
 28             charset = content_type[pos + 8:].strip()
 29     return charset
 30 
 31 
 32 def get_email(pop_server, username, password, send_email, client_id):
 33     # 登录邮箱
 34     pop3_server = 'pop3.qq.com'
 35     try:
 36         server = poplib.POP3(pop3_server, 110, timeout=50)
 37         # 身份认证:
 38         server.user(username)
 39         server.pass_(password)
 40     except BaseException as e:
 41         server = ''
 42         print("登陆失败")
 43     resp, mails, octets = server.list()  # list()返回所有邮件的编号:
 44     mails = mails
 45     indexs = range(len(mails), 0, -1)[-2:]  # 获取最近的10封邮件索引
 46     # 从最近的邮件开始，依次遍历所有邮件
 47     for index in [84, 0]:
 48         # 解析邮件
 49         # mail_msg = parser_mail(index)
 50         try:
 51             resp, lines, octets = server.retr(index)  # 获取第index封邮件，lines存储了邮件的原始文本的每一行
 52         except:
 53             try:  # 如果获取邮件失败，尝试重新登录邮箱再获取
 54                 server.user(username)
 55                 server.pass_(password)
 56                 resp, lines, octets = server.retr(index)
 57             except:  # 如果还是失败，返回False
 58                 return False
 59         # 2、拼接邮件
 60         try:
 61             msg_content = b'\n'.join(lines).decode('gbk')  # 邮件的原始文本
 62         except:
 63             try:
 64                 msg_content = b'\n'.join(lines).decode('utf-8')  # 邮件的原始文本
 65             except:
 66                 return False
 67 
 68         # 3、解析邮件内容
 69         try:
 70             msg = Parser().parsestr(msg_content)
 71         except:
 72             msg = "None"
 73             print('3:错误')
 74 
 75         # 4、解析邮件主题(标题)
 76         try:
 77             Subject = decode_str(msg.get("Subject"))
 78         except BaseException as e:
 79             print('4: 错误')
 80             Subject = "NONE"
 81 
 82         # 5、解析邮件时间
 83         try:
 84             Date = time.strptime(decode_str(msg.get("Date"))[0:24], '%a, %d %b %Y %H:%M:%S')
 85             Date = time.mktime(Date)  # 获取邮件的接收时间,格式化收件时间
 86         except:
 87             Date = "NONE"
 88             print('5:错误')
 89 
 90         # 6、解析发件人
 91         try:
 92             From = decode_str(msg.get("From")).split(' ')[-1]
 93         except:
 94             From = '6:<None>'
 95 
 96         mail_msg = {
 97             'From': From,
 98             'Date': Date,
 99             'Subject': Subject,
100             'Msg': msg,
101         }
102         print('mail_msg:', mail_msg)
103         if mail_msg:
104             print_info(mail_msg['Msg'])  # 输入邮件内容
105             get_att(mail_msg['Msg'])  # 下载邮件中的附件
106         server.quit()
107 
108 
109 # indent用于缩进显示:
110 def print_info(msg, indent=0):
111     if indent == 0:
112         for header in ['From', 'To', 'Subject']:
113             value = msg.get(header, '')
114             if value:
115                 if header == 'Subject':
116                     value = decode_str(value)
117                 else:
118                     hdr, addr = parseaddr(value)
119                     name = decode_str(hdr)
120                     value = u'%s <%s>' % (name, addr)
121             print('%s%s: %s' % ('  ' * indent, header, value))
122     if (msg.is_multipart()):
123         parts = msg.get_payload()
124         for n, part in enumerate(parts):
125             print('%spart %s' % ('  ' * indent, n))
126             print('%s--------------------' % ('  ' * indent))
127             print_info(part, indent + 1)
128     else:
129         content_type = msg.get_content_type()
130         if content_type == 'text/plain' or content_type == 'text/html':
131             content = msg.get_payload(decode=True)
132             charset = guess_charset(msg)
133             if charset:
134                 content = content.decode(charset)
135             print('%sText: %s' % ('  ' * indent, content + '...'))
136         else:
137             print('%sAttachment: %s' % ('  ' * indent, content_type))
138 
139 
140 # 解析邮件,获取附件
141 def get_att(msg_in):
142     attachment_files = []
143     i = 1
144     for part in msg_in.walk():
145         # 获取附件名称类型
146         file_name = part.get_filename()
147         print('file_name', file_name)
148         # contType = part.get_content_type()
149         if file_name:
150             h = email.header.Header(file_name)
151 
152             # 对附件名称进行解码
153             dh = email.header.decode_header(h)
154             filename = dh[0][0]
155             if dh[0][1]:
156                 # 将附件名称可读化
157                 filename = decode_str(str(filename, dh[0][1]))
158                 # print(filename)
159                 # filename = filename.encode("utf-8")
160 
161             # 下载附件
162             data = part.get_payload(decode=True)
163             path = r"附件" # 在指定目录下创建文件,如果不存在则创建目录
164             if not os.path.exists(path):
165                 os.makedirs(path)
166             att_file = open(path + '\\' + filename, 'wb') # 注意二进制文件需要用wb模式打开
167             attachment_files.append(filename)
168             att_file.write(data)  # 保存附件
169             att_file.close()
170 
171             print(f'附件({i}): {filename}')
172             i += 1
173     return attachment_files