推推

导航

Perl爬取铁路违章旅客信息

#! /usr/bin/perl
use strict;
use Encode qw(encode decode);
binmode(STDIN,":encoding(utf8)");
binmode(STDOUT,":encoding(utf8)");
binmode(STDERR,":encoding(utf8)");
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTML::TreeBuilder;

#构造urls
my @urls;
for(my $i=1;$i<15;$i++){
	my $url="http://218.94.123.13:9001/index.htm?name=&pageNo=$i";
	print $url,"\n";
	push(@urls,$url)
}
open FD ,">>/home/abcd/file";
binmode(FD,":encoding(utf8)");

map{getinfo($_)}@urls;

sub getinfo{
	my $url=shift;
	my $browser=LWP::UserAgent->new(); #模拟浏览器
	my $request=HTTP::Request->new("GET"=>"$url"); #生成请求
	my $response=$browser->request($request); #浏览器接收请求
	my $html=$response->content;
	$html=decode("utf8", $html);
	my $p=HTML::TreeBuilder->new_from_content($html);
	my @element1=$p->look_down(_tag=>"tr",class=>"tab_td");
	my @element2=$p->look_down(_tag=>"tr",class=>"");
	foreach(@element1){
		my @data=$_->find_by_tag_name("td");
		print FD join("||",map{$_->as_text}@data),"\n";
	}
	foreach(@element2){
		my @data=$_->find_by_tag_name("td");
		print FD join("||",map{$_->as_text}@data),"\n";
	}
}
  

  

posted on 2016-03-24 14:52  推推  阅读(178)  评论(0编辑  收藏  举报