python解析提取.eml邮件内容及附件
import re
import os
import email
from email.header import decode_header
from email.utils import parsedate_to_datetime
def parse_eml(eml_fp, attr_dir):
"""
eml文件解析
:params eml_fp: eml文件路径
:params attr_dir: 附件保存目录
"""
if not os.path.exists(attr_dir):
os.makedirs(attr_dir)
# 读取eml文件
with open(eml_fp, "r") as file:
eml_content = file.read()
# 转为email对象
msg = email.message_from_string(eml_content)
# 邮件主题
subject_bytes, subject_encode = decode_header(msg["Subject"])[0]
if subject_encode:
subject = subject_bytes.decode(subject_encode)
else:
subject = subject_bytes
print("主题:", subject)
# 邮件发件人
from_ip = re.search("<(.*)>", msg["from"]).group(1)
print("发件人邮箱:", from_ip)
from_name = decode_header(msg["from"].split("<")[0].strip())
if from_name:
if from_name[0] and from_name[0][1]:
from_n = from_name[0][0].decode(from_name[0][1])
else:
from_n = from_name[0][0]
print("发件人名称:", from_n)
# 邮件时间
received_date = parsedate_to_datetime(msg["date"])
print("接收时间:", received_date)
# 邮件正文及附件
for par in msg.walk():
if not par.is_multipart(): # 判断是否为multipart,里面的数据不需要
name = par.get_param("name") # 获取附件的文件名
if name:
# 附件
fname = decode_header(name)[0]
if fname[1]:
attr_name = fname[0].decode(fname[1])
else:
attr_name = fname[0]
print("附件名:", attr_name)
# 解码附件内容
attr_data = par.get_payload(decode=True)
attr_fp = os.path.join(attr_dir, attr_name)
with open(attr_fp, 'wb') as f_write:
f_write.write(attr_data)
else:
# 正文
text_char = par.get_content_charset()
if "text/plain" in par["content-type"]: # 文本正文
body = par.get_payload(decode=True).decode(text_char)
print("邮件正文:", body)
else: # html格式正文
html_body = par.get_payload(decode=True).decode(text_char)
print("HTML正文:", html_body)
print("-" * 60)
本文来自博客园,仅供参考学习,如有不当之处还望不吝赐教,不胜感激!转载请注明原文链接:https://www.cnblogs.com/rong-z/p/17670947.html
作者:cnblogs用户