php xpath解析

<?php

require './vendor/autoload.php';

$client = new GuzzleHttp\Client();
$url = "https://www.cnblogs.com/brady-wang/";
$res = $client->request('GET', $url);
echo $res->getStatusCode();


$dom = new DOMDocument();
$dom->normalize();

// load html into document object model
@$dom->loadHTML($res->getBody()->getContents());
// create domxpath instance
$xPath = new DOMXPath($dom);
$elements = $xPath->query('//a[contains(@class,"postTitle")]/@*');
foreach ($elements as $e) {
    echo $e->nodeName ." :  ". $e->nodeValue .PHP_EOL;
}

for($i=0;$i<$elements->length;$i++){
    $item = $elements->item($i);
    var_dump($item->nodeValue);
    var_dump($item->textContent);
}

  

<?php

require './vendor/autoload.php';



function getContent($url)
{
    $client = new GuzzleHttp\Client();
    $res = $client->request('GET', $url);
    echo $res->getStatusCode();

    return $html = $res->getBody()->getContents();
}

function parse($html)
{
    $dom = new DOMDocument();
    $dom->normalize();

    @$dom->loadHTML($html);
    $xPath = new DOMXPath($dom);
    $elements = $xPath->query('//a[contains(@class,"postTitle")]/@*');
    $urls = [];
    foreach ($elements as $e) {
        if($e->nodeName == "href"){
            $urls[] = $e->nodeValue;
        }
    }
    return $urls;

}
$url = "https://www.cnblogs.com/brady-wang/";
$content = getContent($url);
$urls = parse($content);
var_dump($urls);

  

 

https://ask.csdn.net/questions/833515

posted @ 2021-11-17 18:08  brady-wang  阅读(226)  评论(0编辑  收藏  举报