jrhmpt01:/root/lwp/0526# cat a2.pl
use LWP::UserAgent;
use DBI;
use POSIX;
use Data::Dumper;
use HTML::TreeBuilder;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
use HTML::TreeBuilder::XPath;
$tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file( "0526.txt");
my @pages=$tree->find_by_tag_name('li');
#@urlall除了包含每个类别的文章,还包含阅读排行里的文章
foreach (@pages) {
@titlepage = $_->attr('class');
foreach (@titlepage) {
if ($_){
print "\$_ is $_\n";
unless ($_ ~~ @urlall) { push (@urlall ,$_);};
};
};
};
print @urlall ;
print "\n";
foreach my $var (@urlall){
#my $url=qq(/html/body//li[@class='$var']);
my $url="/html/body//li\[\@class=xxx\]";
$url =~ s/xxx/"$var"/g;
print "\$url is $url\n";
@total= $tree->findvalues("$url");
print @total;
print "\n";
#my @title= $tree->findvalues('/html/body//li[@class="alcw4 alcw41"]');
};
jrhmpt01:/root/lwp/0526# cat 0526.txt
<li class="alcw4 alcw41">
<div class="ajjbfb txdbfb bfb100">100<span>%</span></div>
<div class="ajjbfb txdbfb bfb100">200<span>%</span></div>
</li>
<li class="alcw4 alcw42">
<div class="ajjbfb txdbfb bfb100">100<span>%</span></div>
<div class="ajjbfb txdbfb bfb100">200<span>%</span></div>
<div class="ajjbfb txdbfb bfb100">scan<span>huihui</span></div>
</li>
jrhmpt01:/root/lwp/0526# perl a2.pl
$_ is alcw4 alcw41
$_ is alcw4 alcw42
alcw4 alcw41alcw4 alcw42
$url is /html/body//li[@class="alcw4 alcw41"]
100%200%
$url is /html/body//li[@class="alcw4 alcw42"]
100%200%scanhuihui