1.先来一个简单的案例,请求http协议的网站 // 初始化一个 cURL 对象 $curl = curl_init(); // 设置你需要抓取的URL curl_setopt($curl, CURLOPT_URL, 'http://www.hao123.com'); // 设置header //是否把被访问服务器的头信息显示出来, 0不显示,非0显示 curl_setopt($curl, CURLOPT_HEADER, 0); // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上, 0为直接输出屏幕,非0则不输出 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 运行cURL,请求网页 $data = curl_exec($curl); // 关闭URL请求 curl_close($curl); // 显示获得的数据 var_dump($data);
2.请求https协议网站,并发送数据(get) $url = 'https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid=wxfefd7eaa357a57cf&secret=e061b4df1183fb203e2dc38d35b6a633'; //$url = 'http://localhost/wx/xx.php'; $curl = curl_init($url); // 对认证证书来源的检查,0表示阻止对证书的合法性的检查。 curl_setopt ( $curl, CURLOPT_SSL_VERIFYPEER, 0 ); // 从证书中检查SSL加密算法是否存在 curl_setopt ( $curl, CURLOPT_SSL_VERIFYHOST, 2 ); //如果访问的url有发送跳转请求,将继续获取跳转后网址的内容 curl_setopt ( $curl, CURLOPT_FOLLOWLOCATION, 1 ); // 设置超时限制防止死循环 curl_setopt ($curl, CURLOPT_TIMEOUT, 30 ); //不取得返回头信息 curl_setopt ($curl, CURLOPT_HEADER, 0 ); /* CURLOPT_RETURNTRANSFER 设置为1 如果成功只将结果返回,不自动输出任何内容,如果失败 返回false 设置为0或不使用这个选项 ,如果成功返回true,自动输出返回内容,如果失败返回false */ curl_setopt($curl, CURLOPT_RETURNTRANSFER,1); $result = curl_exec ($curl); //关闭 curl_close ( $curl ); $res = json_decode($result,true); print_r($res);
1 //3.模拟登录lamp兄弟连 2 $url = 'http://bbs.lampbrother.net/login.php'; 3 4 $arr = array( 5 'step'=>2, 6 'lgt'=>2, 7 'pwuser'=>'你的邮箱', 8 'pwpwd'=>'你的密码', 9 'question'=>0, 10 'hideid'=>0 11 ); 12 13 /*****方法一*****/ 14 /* 15 // 把COOKIE保存至cookie.txt 16 curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt'); 17 curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); 18 先把COOKIE保存文件,调用的时候还得读取文件,这样意味着两次的IO操作,效率低 19 */ 20 21 /* 22 $cookie_file = tempnam('./temp','cookie'); 23 //先获取cookie保存文件 24 $ch = curl_init(); 25 curl_setopt($ch, CURLOPT_URL, $url); 26 curl_setopt($ch, CURLOPT_HEADER, 0); 27 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 28 curl_setopt($ch, CURLOPT_POST, 1); 29 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($arr)); 30 curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file); 31 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 32 $data = curl_exec($ch); 33 curl_close($ch); 34 //echo $data; 35 36 //通过保存文件的cookie请求首页 37 $ch = curl_init(); 38 curl_setopt($ch, CURLOPT_URL, 'http://bbs.lampbrother.net/'); 39 curl_setopt($ch, CURLOPT_HEADER, 0); 40 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0); 41 curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file); 42 curl_exec($ch); 43 curl_close($ch); 44 */ 45 46 47 /*****方法二*****/ 48 $ch = curl_init(); 49 curl_setopt($ch, CURLOPT_URL, $url); 50 //这里返回头信息方便获取 51 curl_setopt($ch, CURLOPT_HEADER, 1); 52 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 53 curl_setopt($ch, CURLOPT_POST, 1); 54 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($arr)); 55 $content = curl_exec($ch); 56 curl_close($ch); 57 //解析http数据流 58 list($header, $body) = explode("\r\n\r\n",$content); 59 print_r($header); 60 //解析cookie 61 preg_match_all("/set-cookie:([^\r\n]*)/i",$header,$matches); 62 //print_r($matches); 63 $cookies = implode(';', $matches[1]); 64 print_r($cookies); 65 66 67 //后面用curl请求时可以直接使用 68 // curl_setopt($ch, CURLOPT_COOKIE, $cookie); 69 $ch = curl_init(); 70 curl_setopt($ch, CURLOPT_URL, 'http://bbs.lampbrother.net/'); 71 curl_setopt($ch, CURLOPT_HEADER, 0); 72 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0); 73 curl_setopt($ch, CURLOPT_COOKIE, $cookies); 74 curl_exec($ch); 75 curl_close($ch);
<?php //4.开源中国信息抓取实例 header('Content-type:text/html;charset=utf-8'); $url = 'https://www.oschina.net/action/user/hash_login'; $data = array( 'email'=>'你的邮箱', 'pwd'=>sha1('你的密码'), 'save_login'=>1, ); $headers = array( 'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36', 'Referer:https://www.oschina.net/home/login?goto_page=http%3A%2F%2Fwww.oschina.net%2Fcode%2Fsnippet_47318_27221', ); //获取cookie $curl = curl_init($url); curl_setopt ( $curl, CURLOPT_SSL_VERIFYPEER, 0 ); curl_setopt ( $curl, CURLOPT_SSL_VERIFYHOST, 2 ); curl_setopt ( $curl, CURLOPT_FOLLOWLOCATION, 1 ); curl_setopt ($curl, CURLOPT_TIMEOUT, 30 ); curl_setopt ($curl, CURLOPT_HEADER, 1); curl_setopt($curl, CURLOPT_HTTPHEADER,$headers); curl_setopt($curl, CURLOPT_RETURNTRANSFER,1); curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($data)); $result = curl_exec ($curl); curl_close ($curl); //print_r($result); preg_match_all("/set-cookie:([^\r\n]*)/i",$result,$matches); //print_r($matches); $cookies = implode(';', $matches[1]); //抓取信息 $url = 'http://my.oschina.net/xxxxx/admin/inbox'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_COOKIE, $cookies); $res = curl_exec($ch); curl_close($ch); require './simple_html_dom.php'; //simple_html_dom解释包使用实例 $html1 = new simple_html_dom(); $html1->load($res); $r = $html1->find('ul.Msgs li[id]'); $html2 = new simple_html_dom(); foreach($r as $k=>$v){ $html2->load($v); $t = $html2->find('.msg'); foreach($t as $key=>$value){ echo $value.'<hr/>'; } } $html2->clear(); ?>