爬虫 | php封装 | file_get_contents

今天无聊,用php封装了一套比较简单的http请求类
细节方面可以再优化

class Creeper
{
	public $url;
	public $header;
	public $text;
	public $responseHeader;
	public function __construct($url){
		$this->url = $url;
		$this->header = "Accept-language: *\r\n" .
	    	"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36\r\n"
	    	;
	}
	public function get(){
		$a = array(
			'http' => array(
				'method'=> "GET",
		    	'header'=> $this->header
  			),
  			'ssl' => array(    # 取消ssl验证
  				"verify_peer" => false,
  				"verify_peer_name" => false
  			)
		);
		$context = stream_context_create($a);     // 创建上下文
		$url = $this->url;
		$this->text = file_get_contents($url, false, $context);    // 进行访问
		$this->responseHeader = $http_response_header;
		//var_dump($http_response_header);
	}
	public function post($data){
		$postdata = urlencode($data);
		$a = array(
			'http'=>array(
				'method'=> "POST",
		    	'header'=> $this->header.
		    		"Content-Type: application/x-www-urlencoded\r\n",
		    	'content' => $postdata
  			)
		);
		$context = stream_context_create($a);     // 创建上下文
		$url = $this->url;
		$this->text = file_get_contents($url, false, $context);    // 进行访问
		$this->responseHeader = $http_response_header;
		//var_dump($http_response_header);
	}
}

基本上使用起来也还是挺方便的:

$creeper = new Creeper('http://www.xxxxx.com');
$creeper->get();
preg_match_all("/href=\"(.*?)\" target=/", $creeper->text, $links);
$links = $links[1];

over.

posted @ 2021-03-03 21:43  Mz1  阅读(169)  评论(0编辑  收藏  举报