php xpath解析
<?php require './vendor/autoload.php'; $client = new GuzzleHttp\Client(); $url = "https://www.cnblogs.com/brady-wang/"; $res = $client->request('GET', $url); echo $res->getStatusCode(); $dom = new DOMDocument(); $dom->normalize(); // load html into document object model @$dom->loadHTML($res->getBody()->getContents()); // create domxpath instance $xPath = new DOMXPath($dom); $elements = $xPath->query('//a[contains(@class,"postTitle")]/@*'); foreach ($elements as $e) { echo $e->nodeName ." : ". $e->nodeValue .PHP_EOL; } for($i=0;$i<$elements->length;$i++){ $item = $elements->item($i); var_dump($item->nodeValue); var_dump($item->textContent); }
<?php require './vendor/autoload.php'; function getContent($url) { $client = new GuzzleHttp\Client(); $res = $client->request('GET', $url); echo $res->getStatusCode(); return $html = $res->getBody()->getContents(); } function parse($html) { $dom = new DOMDocument(); $dom->normalize(); @$dom->loadHTML($html); $xPath = new DOMXPath($dom); $elements = $xPath->query('//a[contains(@class,"postTitle")]/@*'); $urls = []; foreach ($elements as $e) { if($e->nodeName == "href"){ $urls[] = $e->nodeValue; } } return $urls; } $url = "https://www.cnblogs.com/brady-wang/"; $content = getContent($url); $urls = parse($content); var_dump($urls);
https://ask.csdn.net/questions/833515