cURL
1. 简单的爬虫
1. xampp开启php的cURL -> xampp/php/php.ini 搜索curl去掉分号即可
2. cmd D: -> cd xampp -> cd php
php -f ../htdocs/mirror/curl/curl.php > ../htdocs/mirror/curl/haha.html (将爬下来的数据保存在haha.html中)
// ../htdocs/mirror/curl/curl.php <?php $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.baidu.com"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $output = curl_exec($ch); //将输出文件中百度替换成屌丝 echo str_replace("百度", "屌丝", $output); ?>
2. 访问WebService
http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?op=getWeatherbyCityName
<?php $data = 'theCityName=武汉'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.webxml.com.cn/WebServices/WeatherWebService.asmx/getWeatherbyCityName"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length:".strlen($data) )); $rtn = curl_exec($ch); if(!curl_errno($ch)) echo $rtn; else echo "Curl error ".curl_error($ch); curl_close($ch); ?>
3. 模拟登陆后下载登陆之后的信息
<?php $data = "username=252973202@qq.com&password=这个密码就不写出来啦&remember=1"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.imooc.com/user/login"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //执行之后不直接打印出来 //模拟登陆开始 date_default_timezone_set('PRC'); //设置Cookie时区(有过期时间) //模板 大多数都是如此 curl_setopt($ch, CURLOPT_COOKIESESSION, true); curl_setopt($ch, CURLOPT_COOKIEFILE, "cookiefile"); curl_setopt($ch, CURLOPT_COOKIEJAR, "cookiefile"); curl_setopt($ch, CURLOPT_COOKIE, "session_name()"."=".session_id()); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //cURL支持页面链接跳转 curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length:".strlen($data) )); $rtn = curl_exec($ch); //模拟登陆结束 //登陆成功后获取登陆之后的信息 curl_setopt($ch, CURLOPT_URL, "http://www.imooc.com/space/index"); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type:text/xml")); $output = curl_exec($ch); curl_close($ch); echo $output; ?>