根据li标签 查找class="alcw4 alcw41"对应的值

jrhmpt01:/root/lwp/0526# cat a2.pl 
use  LWP::UserAgent;
use DBI;  
use POSIX;
use Data::Dumper;
use HTML::TreeBuilder;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");


 use HTML::TreeBuilder::XPath;
   $tree= HTML::TreeBuilder::XPath->new;
  $tree->parse_file( "0526.txt");
my    @pages=$tree->find_by_tag_name('li');
                      #@urlall除了包含每个类别的文章,还包含阅读排行里的文章
                      foreach (@pages) {
                                               @titlepage = $_->attr('class');
                                               foreach (@titlepage) {
                                                 if ($_){ 
                                                print "\$_ is $_\n";
                                                unless ($_ ~~ @urlall) { push (@urlall ,$_);};
                                                     };
                                           };
};

print @urlall ;
print "\n";


foreach my $var (@urlall){
#my $url=qq(/html/body//li[@class='$var']);
my $url="/html/body//li\[\@class=xxx\]";
$url =~ s/xxx/"$var"/g;
print "\$url is $url\n";
@total= $tree->findvalues("$url");
print @total;
print "\n";
#my @title= $tree->findvalues('/html/body//li[@class="alcw4 alcw41"]');

};
jrhmpt01:/root/lwp/0526# cat 0526.txt 
  <li class="alcw4 alcw41">
                        <div class="ajjbfb txdbfb bfb100">100<span>%</span></div>
                        <div class="ajjbfb txdbfb bfb100">200<span>%</span></div>
                    </li>


  <li class="alcw4 alcw42">
                        <div class="ajjbfb txdbfb bfb100">100<span>%</span></div>
                        <div class="ajjbfb txdbfb bfb100">200<span>%</span></div>
                        <div class="ajjbfb txdbfb bfb100">scan<span>huihui</span></div>
                    </li>

jrhmpt01:/root/lwp/0526# perl a2.pl 
$_ is alcw4 alcw41
$_ is alcw4 alcw42
alcw4 alcw41alcw4 alcw42
$url is /html/body//li[@class="alcw4 alcw41"]
100%200%
$url is /html/body//li[@class="alcw4 alcw42"]
100%200%scanhuihui

posted @ 2016-05-26 20:09  czcb  阅读(144)  评论(0编辑  收藏  举报