python-邮件提醒功能
当scrapy爬取完成以后会发送详细信息到邮箱
1.首先编写邮件发送模块
#!usr/bin/env python # -*- coding:utf-8 -*- """ @file: emailHandler.py @time: 2018/04/21 """ # 邮件服务封装 import smtplib from email.mime.text import MIMEText from email.utils import formataddr status = {} # 状态码 pipeline_item = {"item": 0, # 成功item项 "item_error": 0, # 失败item项 "error_info": "" # 失败原因 } spider_time = {"start": "", "end": ""} # 开始结束时间 class EmailHandler(object): def __init__(self, user, password, type_=0): """ :param user:str 发送人邮箱地址(用户名) :param password:str 发送人申请的授权码 :param type_:int 0 为QQ邮箱 1 为163邮箱 """ self.__QQ = {'smtp': 'smtp.qq.com', 'port': 465} self.__163 = {'smtp': 'smtp.163.com', 'port': 25} self.user = user self.password = password if type_ == 0: self.server = smtplib.SMTP_SSL(self.__QQ['smtp'], self.__QQ['port']) self.server.login(self.user, self.password) elif type_ == 1: self.server = smtplib.SMTP_SSL(self.__163['smtp'], self.__163['port']) self.server.login(self.user, self.password) def send_mail(self, to, subject, content=None): """ :param to:str 接收人邮箱地址 :param subject:str 邮件标题 :param content:str 邮件内容 :return:bool True 成功 False 失败 """ try: if not content: content = "\r\n" for key in status.keys(): content += "状态码%s:%s次\r\n" % (key, status.get(key)) content += "\r\n" content += "存入数据库成功条数:%s 条\r\n存入数据库失败条数:%s 条\r\n" % (pipeline_item.get("item"), pipeline_item.get('item_error')) if pipeline_item.get('item_error') > 0: content += "失败原因:%s" % pipeline_item.get('error_info') content += "\r\n" content += "爬虫启动时间:%s \r\n爬虫结束时间:%s" % (spider_time.get("start"), spider_time.get("end")) msg = MIMEText(content, 'plain', 'utf-8') msg['From'] = formataddr(['', self.user]) msg['To'] = formataddr(['', to]) msg['Subject'] = subject self.server.sendmail(self.user, to, msg.as_string()) print("【%s】邮件发送成功" % subject) return True except Exception as f: print("【%s】邮件发送失败,请检查信息" % subject) return False
2.提供邮件提示所有数据
2.1在下载中间件中添加如下代码
from xxx.emailHandler import * def process_response(self, request, response, spider): # Called with the response returned from the downloader. # Must either; # - return a Response object # - return a Request object # - or raise IgnoreRequest if response.status in status.keys(): status[response.status] += 1 else: status[response.status] = 0 return response
2.2 在管道中加入如下代码
from xxx.middlewares import pipeline_item def process_item(self, item, spider): client = MongoClient(self.host, self.port) db_auth = client.xiaosidb db_auth.authenticate(self.user, self.passwd) db = client[self.db] table = db[self.table] # data = dict(item) try: table.insert(dict(item)) pipeline_item['item'] += 1 except Exception as e: pipeline_item['item_error'] += 0 pipeline_item['error_info'] = e return item
2.3在spiders 爬虫程序中加入如下代码
from xxx.middlewares import * def start_requests(self): spider_time['start'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) def close(spider, reason): spider_time['end'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) to_sender = 'xxxx' # 要发送的邮箱 mail = EmailHandler(user='xxxx', password='xxx') mail.send_mail(to_sender, '邮件标题')
3. 当爬虫运行完毕以后会发送邮件