爬取绿盟漏洞扫描器数据
因为一些工作原因需要用到安全设备扫描器的漏洞数据,但是安全设备扫描器本身导出的漏洞数据是加密的,所以只能是使用爬虫进行爬取。
代码如下:
# -*-coding:utf-8 -*-
import requests, re
import sys
from bs4 import BeautifulSoup
import re,sys,os
import xlsxwriter
from xlrd import open_workbook
from xlutils.copy import copy
put_name = 'loudong'
def login(login_url, username, password):
# 请求头
my_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
'Origin': 'https://10.10.10.10',
'Referer': 'https://10.10.10.10/accounts/login_view/'
}
# 获取token
sss = requests.Session()
r = sss.get(url='https://10.10.10.10/accounts/login/', headers=my_headers, verify=False)
# <input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="mvTgwjCx1iTzAdRROOPvk8YctcbO9uXV">'
pattern = re.compile(r'<input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="(.*)">')
result = pattern.findall(r.text)
token = result[0]
# postdata
my_data = {
# 'commit' : '登录',
'username': username,
'password': password,
'csrfmiddlewaretoken': token
}
# 登录后k
r = sss.post(login_url,headers=my_headers,data=my_data,verify=False)
#print(r.text)
return sss
def get_date(url,sss):
my_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
'Origin': 'https://10.10.10.10',
'Referer': 'https://10.10.10.10/template/show_template?temp_id=12&temp_name=%E5%85%A8%E9%83%A8%E6%BC%8F%E6%B4%9E%E6%89%AB%E6%8F%8F&temp_desc=%E6%9C%AC%E6%A8%A1%E6%9D%BF%E6%89%AB%E6%8F%8F%E6%89%80%E6%9C%89%E6%8F%92%E4%BB%B6&vlun_count_allundefined'
}
my_data = {
'val': 'System',
'temp_id': '12',
'conditions': 'is_dangerous =',
'op_type':'showStemp'
}
r = sss.get(url, headers=my_headers, data=my_data, verify=False,timeout=5)
#判断页面是否为空
if r.apparent_encoding == 'utf-8':
pass
CVE_id = ''
cvss_score=''
CNCVE_id=''
risk_score=''
print('页面不为空....')
else:
print(url,'--> !!页面为空 ')
return
soup=BeautifulSoup(r.text,"html.parser")
#class 为 odd
tables=soup.find_all('tr',class_='odd')
#print(tables)
for i,env in enumerate(tables):
#漏洞名称
if i == 0:
leak_name=env.get_text()
#解决方法
if i == 1:
str_env=env.get_text()
solution=str_env[5:]
solution =solution.replace('\n','')
# #危险插件
# if i ==2:
# str_env=env.get_text()
# danger_plug=str_env[6]
#CVE编号
if i == 3:
str_env = env.get_text()
CVE_id=str_env[7:]
#CVSS评分
if i == 5:
str_env = env.get_text()
cvss_score=str_env[7:]
#class 为even
tables2=soup.find_all('tr',class_='even')
for i,env in enumerate(tables2):
#漏洞描述
if i ==0:
str_env=env.get_text()
leak_desc=str_env[6:].strip()
leak_desc=leak_desc.replace('\n', '')
#危险分值
if i ==1:
str_env=env.get_text()
risk_score=str_env[5:]
#发现日期
if i ==2:
str_env=env.get_text()
data_discovery=str_env[5:]
#CNCVE编号
if i ==3:
str_env=env.get_text()
CNCVE_id=str_env[9:]
#CNVD
tables3 = soup.find_all('td')
#print(tables3)
cnvd_id=''
if "CNVD" in tables3[-1].get_text():
cnvd_id=tables3[-1].get_text()
else:
pass
cnnvd_id=''
if "CNNVD" in tables3[-6].get_text():
cnnvd_id=tables3[-6].get_text()
else:
pass
print('数据返回---> succcess')
log_file(url)
return leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score
def w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score):
if not os.path.exists(put_name+".xls"):
workbook = xlsxwriter.Workbook(put_name+".xls") # 建立
worksheet = workbook.add_worksheet('employee')
workbook.close()
else:
r_xls = open_workbook(put_name+".xls") # 读取excel文件
row = r_xls.sheets()[0].nrows # 获取已有的行数
excel = copy(r_xls) # 将xlrd的对象转化为xlwt的对象
table = excel.get_sheet(0) # 获取要操作的sheet
# 对excel表追加一行内容
table.write(row, 0, leak_name)
table.write(row, 1, solution)
table.write(row, 2, CVE_id)
table.write(row, 3, cvss_score)
table.write(row, 4, leak_desc)
table.write(row, 5, data_discovery)
table.write(row, 6, CNCVE_id)
table.write(row, 7, cnvd_id)
table.write(row, 8, cnnvd_id)
table.write(row, 8, risk_score)
excel.save(put_name+".xls")
def e_file(str_f):
f=open('error.txt','a+')
str_f=str(str_f)
f.write(str_f+'\n')
f.close()
def log_file(str_f):
f=open('w_file.txt','a+')
str_f=str(str_f)
f.write(str_f+'\n')
f.close()
if __name__ == '__main__':
login_success = login("https://10.10.10.10/accounts/login_view/", "username", "password")
for i in range(50000,60000):
url = "https://10.10.10.10/template/show_vul_desc?id=%s"%(i)
#url='https://10.10.10.10/template/show_vul_desc?id=50123'
try:
leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score=get_date(url,login_success)
# #print(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id)
w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score)
except Exception as e:
e_f=url+str(e)
e_file(e_f)
print(url,e)
else:
print(url,"爬取结束end")
总结:在进行登录时遇到了token的问题,想了很长时间才解决,但是由于漏洞信息页的页码没有规律,所以只能是穷举了。。
代码本身还有很大的优化空间,以后有时间再完善。
本文来自博客园,作者:Chuan_Chen,转载请注明原文链接:https://www.cnblogs.com/wangcc7/p/13648924.html