php爬虫 phpspider
<?php /** * Created by PhpStorm. * User: brady * Date: 2016/12/9 * Time: 17:32 */ ini_set("memory_limit", "1024M"); require dirname(__FILE__).'/../core/init.php'; $url = "http://www.epooll.com/archives/806/"; $html = requests::get($url); // 抽取文章标题 $selector = "//*[@id=\"content\"]/div[1]/div[1]/h1/a"; $title = selector::select($html, $selector); // 检查是否抽取到标题 // 抽取文章作者 $selector = "//*[@id=\"content\"]/div[1]/div[1]/h6/span[1]"; $author = selector::select($html, $selector); // 检查是否抽取到作者 // 去掉 作者: $author = str_replace("作者:", "", $author); //发布时间 $selector = "//*[@id=\"content\"]/div[1]/div[1]/h6/span[2]"; $time = selector::select($html, $selector); $time = str_replace("发布时间:",'', $time); $time = date("Y-m-d H:i:s",strtotime($time)); // 抽取文章内容 $selector = "//*[@id=\"content\"]/div[1]/div[2]"; $content = selector::select($html, $selector); // 检查是否抽取到内容 $data = array( 'article_title' => $title, 'article_author' => $author, 'article_content' => $content, ); // 查看数据是否正常 $res = db::insert("content", $data); var_dump($res);