[工具开发] Perl 爬虫脚本--从美国国家漏洞数据库抓取实时信息
一、简介
美国国家漏洞数据库收集了操作系统,应用软件的大量漏洞信息,当有新的漏洞出现时,它也会及时发布出来.
由于信息量巨大,用户每次都需要到它的网站进行搜索,比较麻烦.如果能有个工具,每天自动分析它发布的漏洞数据库,如果发现有所需要的新漏洞信息,通过邮件自动发送给公司的系统或者安全管理员就好了.
下面我写的这个工具就是起到这个作用的.图片是工具自动发送的邮件截图:
它每天都会根据用户设置的关键字自动抓取 NVD 数据,然后和前一天抓取的数据进行对比分析,当发现今天有新的数据时就发送邮件给用户,否则不发送.
二、效果截图
三、源代码
#!/usr/bin/perl -w #hahp@qq.com use 5.10.1; use strict; use LWP::Simple; use Net::SMTP; use MIME::Base64; use Encode qw/ decode encode /; my $REC_DIR = '/home/hupeng/nvd'; my @query_keywords = qw/ kernel tomcat apache spring /; my $TO_DAY = `date +%Y-%m-%d`; my $LAST_DAY = `date +%Y-%m-%d -d '-1 days'`; my $THIS_MONTH = `date +%m -d '-1 months'`; my $NEXT_MONTH = `date +%m`; my $THIS_YEAR = `date +%Y`; my $NEXT_YEAR = `date +%Y -d '+1 months'`; chomp($TO_DAY); chomp($LAST_DAY); chomp($THIS_MONTH); chomp($THIS_YEAR); chomp($NEXT_MONTH); chomp($NEXT_YEAR); $THIS_MONTH =~ s/^0+//g; my $nvdfile_lastday = "$REC_DIR/nvd_$LAST_DAY.txt"; my $nvdfile_today = "$REC_DIR/nvd_$TO_DAY.txt"; my $nvd_url_pre = 'http://web.nvd.nist.gov/view/vuln/detail?vulnId='; #my $sev_base = 'MEDIUM_HIGH'; my $theSmtpServer = 'XXXX'; my $theSmtpUser = 'XXXX'; my $theSmtpPasswd = 'XXXXX'; my $theSmtpSend = 'XXXXX'; my @theSmtpTo = ('hupeng@test2.com','hupeng@test.com'); my $theSmtpSubject = 'NVD 新记录 '.$TO_DAY; my $query_keywords_str = &arr2str0(@query_keywords); my $theSmtpBody = '<p>NVD 新记录</p><br><p>关键字:'.$query_keywords_str.'</p><br>'; sub str2arr { my ($str) = @_; $str =~ s/^\n|\n$//g; my @arr = split /\n/,$str; @arr = sort(@arr); #@arr = keys %{{ map { $_ => 1 } @arr }}; return @arr; } sub arr2str { my @arr = @_; my $str = ''; @arr = sort(@arr); foreach(@arr){ $str = $str.$_."\n"; } return $str; } sub arr2str0 { my @arr = @_; my $str = ''; @arr = sort(@arr); foreach(@arr){ $str = $str.$_.', '; } $str =~ s/,\ $//g; return $str; } sub getContent { my ($query_keywords) = @_; my @content = (); foreach my $query_keyword (@query_keywords){ #my $url = "http://web.nvd.nist.gov/view/vuln/search-results?adv_search=true\&cves=on\&query=$query_keyword\&pub_date_start_month=$start_month\&pub_date_start_year=$start_year\&cvss_sev_base=$sev_base\&cve_id="; #my $url = "http://web.nvd.nist.gov/view/vuln/search-results?adv_search=true\&cves=on\&query=$query_keyword"; my $url = "http://web.nvd.nist.gov/view/vuln/search-results?adv_search=true\&cves=on\&query=$query_keyword\&pub_date_start_month=$THIS_MONTH\&pub_date_start_year=$THIS_YEAR\&cve_id="; my $tmpStr = get($url); my @tmpArr = &str2arr($tmpStr); $tmpStr = ''; foreach(@tmpArr){ my $str = $_; chomp($str); $str =~ s/\s+//g; if( $str =~ m/BodyPlaceHolder_cplPageContent_plcZones_lt_zoneCenter_VulnerabilitySearchResults_VulnResultsRepeater_[\w]+(Anchor_.*$)/ ){ push(@content,$query_keyword.$1."\n"); } } @content = keys %{{ map { $_ => 1 } @content }}; @content = sort(@content); @tmpArr = (); } return @content; } sub getNvd { my ($nvd_file) = @_; my $maxnvd = '0'; my @nvds = (); my %result = ('maxnvd'=>'0','nvds'=>[]); if( open(FILE, "$nvd_file") ){ while(<FILE>){ push(@nvds, $_); } close FILE; foreach(@nvds){ if( $_ gt $maxnvd ){ $maxnvd = $_; } } } $result{'maxnvd'} = $maxnvd; $result{'nvds'} = [@nvds]; @nvds = (); return %result; } sub putNvd { my ($content,$nvd_file) = @_; if ( open(FILE, "> $nvd_file") ){ foreach (@$content){ if ($_ =~ m/[\w-]+Anchor_[\d]+">([\w-]+)<\/a>/){ print FILE $1."\n"; } } close FILE; } } sub getNewNvdRds { my ($maxNvd_lastday,$nvdsToday,$content) = @_; my @newNvds = (); foreach (@{$nvdsToday}){ my $nvd = ''; if( $_ gt $maxNvd_lastday){ my $str = $_; chomp($str); foreach my $ln1 (@{$content}){ if( $ln1 =~ m/^([\w-]+Anchor_[\d]+\">)$str<\/a>$/ ){ my $nvdID = $1; foreach my $ln2 (@{$content}){ if( $ln2 =~ m/^$nvdID([\d.]+)<\/a>([\w]+)$/ ){ $nvd = '<a href="'.$nvd_url_pre.$str.'">'.$str.'</a> CVSS Severity: '.encode('UTF-8',$1).' '.encode('UTF-8',$2).'<br>'; } } } } push(@newNvds,$nvd); } } return @newNvds; } # get max value of last day my %tmpHsh = (); %tmpHsh = &getNvd($nvdfile_lastday); my $maxNvd_lastday = $tmpHsh{'maxnvd'}; # get content of today # nvd 记录的详细信息 my @content = &getContent(@query_keywords); # put values of today &putNvd([@content],$nvdfile_today); # get max value of today %tmpHsh = &getNvd($nvdfile_today); my $maxNvd_today = $tmpHsh{'maxnvd'}; # get all values of today my @nvdsToday = @{$tmpHsh{'nvds'}}; %tmpHsh = (); # find new values # 排版后新记录的详细信息 my @newNvdRds = &getNewNvdRds($maxNvd_lastday,[@nvdsToday],[@content]); # send email my $count = @newNvdRds; if( $count ){ $theSmtpBody .= &arr2str(@newNvdRds); $theSmtpBody .= '<br><br>'.$TO_DAY.'<br><br>'; my $theSmtp = Net::SMTP->new($theSmtpServer,Timeout=>10); $theSmtp->auth($theSmtpUser,$theSmtpPasswd); $theSmtp->mail($theSmtpSend); $theSmtp->to(@theSmtpTo); $theSmtp->data(); $theSmtp->datasend("To: @theSmtpTo\n"); $theSmtp->datasend("Content-Type:text/html;charset=UTF-8\n"); $theSmtp->datasend("Subject:=?UTF-8?B?".encode_base64($theSmtpSubject, '')."?=\n\n"); $theSmtp->datasend("\n"); $theSmtp->datasend($theSmtpBody); $theSmtp->dataend(); $theSmtp->quit; }