crawler_exa1

编辑中...

#! /usr/bin/env python
# -*- coding:utf-8 -*-
# Author: Tdcqma

'''
网页爬虫,版本 2017-09-20 21:16
'''

import urllib.request
import ssl,re
import smtplib,email
import datetime

url="http://www.nsfocus.net/index.php?act=sec_bug&type_id=&os=&keyword=&page=1"

request = urllib.request.Request(url)

# 当尝试访问https开始当站点时,设置全局取消SSL证书验证
ssl._create_default_https_context = ssl._create_unverified_context

response = urllib.request.urlopen(request)
data = response.read().decode('utf-8')

# 保存当前日期到today变量里
today = str(datetime.date.today())

# 使用正则表达式匹配指定安全警报内容
str = "<li><span>"+today+".*"
res = re.findall(str,data)

# 发送邮件
chst = email.charset.Charset(input_charset = 'utf-8')
header = ("From: %s\nTo: %s\nSubject: %s\n\n" %
          ("from_mail@163.com",
           "to_mail@163.com",
           chst.header_encode("[爬虫安全通告-绿盟]")))

# 打开本地文件,目的为将抓取到的报警信息写入文件
f = open("secInfo-lvmeng.txt",'w',encoding='utf-8')

# 生产列表保存所有安全漏洞标题

tomcat_sec = ["Apache Tomcat 漏洞告警(当前生产版本为7.0.68)\n\n"]
Cisco_sec = ["Cisco 漏洞告警(当前生产版本为1.0.35)\n\n"]
WebLogic_sec = ["WebLogic 漏洞告警(当前生产版本为10.33)\n\n"]

tomcat_msg = ''' '''
WebLogic_msg = ''' '''
Cisco_msg = ''' '''

msg = ''' '''
for line in res:
    tomcat_title = "Apache Tomcat 漏洞告警(当前生产版本为7.0.68)"
    sub_url = "http://www.nsfocus.net"+line[37:50]  # 获取安全漏洞链接
    vul_title = line[52:-40]    # 获取安全漏洞标题
    if "Apache Tomca" in vul_title:
        line = ""+today+" "+vul_title+"\n      >> "+sub_url+'\n\n'
        tomcat_msg += line   # 逐行读取,将其保存到msg字符变量里。
        tomcat_sec.append(line)
    elif "WebLogic" in vul_title:
        line = "" + today + " " + vul_title + "\n      >> " + sub_url + '\n\n'
        WebLogic_msg += line
        WebLogic_sec.append(line)
    elif "Cisco" in vul_title:
        line = "" + today + " " + vul_title + "\n       >> " + sub_url + '\n\n'
        Cisco_msg += line
        Cisco_sec.append(line)

msg = [tomcat_sec,WebLogic_sec,Cisco_sec]
secu_msg = ''' '''

for i in range(len(msg)):
    if len(msg[i]) > 1:
        for j in range(len(msg[i])):
            secu_msg += msg[i][j]

f.writelines(secu_msg)
f.close()
msg = secu_msg

# 借用163smtp服务器发送邮件,将上面读到的报警信息作为邮件正文发送。
email_con = header.encode('utf-8') + msg.encode('utf-8')
smtp = smtplib.SMTP("smtp.163.com")
smtp.login("from_mail@163.com","from_mail_pass")
smtp.sendmail('from_mail@163.com','to_mail@163.com',email_con)
print('mail send success!')
smtp.quit()

获取爬虫邮件截图:

 

posted @ 2017-09-20 21:25  北海悟空  阅读(282)  评论(0编辑  收藏  举报