snatch
把此代码复制 放到本地 更换下cookie即可抓取 private $p = 100; //检察院最大页数100页 //抓取列表 public function index(){ set_time_limit(0); while(1){ $html = ''; if($this->p > 0){ $html = $this->p.'.html'; } $ch = curl_init(); $client_ip = array ( // 伪造ip '61.157.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 四川 '61.156.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 山东 '182.97.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 江西 '111.17.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 青岛 '219.148.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 河北、辽宁 '218.82.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 上海 '175.12.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 湖南 '221.220.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 北京 '123.125.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 北京 '14.16.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ) ); $ip = $client_ip [mt_rand ( 0, 9 )]; curl_setopt ($ch, CURLOPT_URL, 'http://www.ajxxgk.jcy.gov.cn/html/gj/jl/zjxflws/'.$html); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch,CURLOPT_ENCODING,"'gzip'");//解决乱码 curl_setopt ( $ch, CURLOPT_HTTPHEADER, array ( "X-FORWARDED-FOR: ".$ip, "CLIENT-IP: ".$ip, "Host: www.ajxxgk.jcy.gov.cn", "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding: gzip, deflate", //"Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; __jsl_clearance=1531466152.033|0|MWEqLGLZtJXfLs8JERxw7Xz0VSg%3D; PHPSESSID=7svf9vl5bvrsu424ro01gkq4o6; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466826; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0", "Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; PHPSESSID=7svf9vl5bvrsu424ro01gkq4o6; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531469833; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0; __jsl_clearance=1531469779.42|0|ixCK%2F5i3LhsoOqHgzJOh%2BkgLS58%3D", "Connection: keep-alive", "Upgrade-Insecure-Requests: 1", ) ); $file_contents = curl_exec($ch); curl_close($ch); $pattern1='/(a href="(.+?)")/'; if (preg_match_all($pattern1, $file_contents, $match)) { $announce_no1 = $match[2]; } foreach($announce_no1 as $val){ if(strpos($val,'/html/2') > -1){ M('jcy')->add(array('url'=>$val,'page'=>$this->p)); } } if($this->p == 100){ echo 'finish'; exit; } $this->p++; sleep(5); } } //抓取详情 public function detail(){ set_time_limit(0); $m = M('jcy'); $list = $m->group('url')->getField('url',true); $jcy = M('jcy','','mysql://jufa_slave:KYi2303mdyTdyh3@123.56.183.226:3312/jianchayuan'); foreach($list as $val){ $url = $val; $check = $jcy->where(array('url'=>$url))->find(); if($check){ continue; } $ch = curl_init(); $client_ip = array ( // 伪造ip '61.157.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 四川 '61.156.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 山东 '182.97.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 江西 '111.17.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 青岛 '219.148.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 河北、辽宁 '218.82.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 上海 '175.12.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 湖南 '221.220.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 北京 '123.125.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ), // 北京 '14.16.' . mt_rand ( 1, 254 ) . '.' . mt_rand ( 1, 254 ) ); $ip = $client_ip [mt_rand ( 0, 9 )]; curl_setopt ($ch, CURLOPT_URL, 'http://www.ajxxgk.jcy.gov.cn'.$url); //curl_setopt ($ch, CURLOPT_URL, 'http://www.ajxxgk.jcy.gov.cn/html/20180712/2/8282883.html'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch,CURLOPT_ENCODING,"'gzip'");//解决乱码 curl_setopt ( $ch, CURLOPT_HTTPHEADER, array ( "X-FORWARDED-FOR: ".$ip, "CLIENT-IP: ".$ip, "Host: www.ajxxgk.jcy.gov.cn", "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding: gzip, deflate", "Referer: http://www.ajxxgk.jcy.gov.cn$url", //"Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157,1531471714,1531529424; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0; __jsl_clearance=1531529417.685|0|d7RYsx0ncHz%2BZ3NUDzKPIYLNwV0%3D; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531529444; PHPSESSID=pqgshbduheq6ro8rj48suq97b6", //"Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157,1531471714,1531529424; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531533080; PHPSESSID=pqgshbduheq6ro8rj48suq97b6; __jsl_clearance=1531533073.053|0|qnatGTD8P%2F8T3RZxBurOrxy9im4%3D", //"Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157,1531471714,1531529424; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531537860; PHPSESSID=pqgshbduheq6ro8rj48suq97b6; Hm_lvt_d7682ab43891c68a00de46e9ce5b76aa=1531534004; Hm_lpvt_d7682ab43891c68a00de46e9ce5b76aa=1531534004; sYQDUGqqzHsearch_history=%u957F%u9AD8%u65B0%u68C0%u5211%u8BC9%u30142018%u301528%u53F7%7C%2C%u674E%u4E1C%u5219%u804C%u52A1%u4FB5%u5360%u6848%7C; __jsl_clearance=1531537853.545|0|TFsQzEgeOaRGBFjOuCoRwentL24%3D", //"Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157,1531471714,1531529424; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531548533; PHPSESSID=pqgshbduheq6ro8rj48suq97b6; Hm_lvt_d7682ab43891c68a00de46e9ce5b76aa=1531534004; Hm_lpvt_d7682ab43891c68a00de46e9ce5b76aa=1531534004; sYQDUGqqzHsearch_history=%u957F%u9AD8%u65B0%u68C0%u5211%u8BC9%u30142018%u301528%u53F7%7C%2C%u674E%u4E1C%u5219%u804C%u52A1%u4FB5%u5360%u6848%7C; __jsl_clearance=1531546901.058|0|sXrj6SfoWx3vMny%2BfHz85TNORl0%3D", "Cookie: __jsluid=41f16405b9462bf65ea12ec337f6884e; Hm_lvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531466157,1531471714,1531529424; sYQDUGqqzHpid=page_0; sYQDUGqqzHtid=tab_0; Hm_lpvt_2e64cf4f6ff9f8ccbe097650c83d719e=1531555369; PHPSESSID=pqgshbduheq6ro8rj48suq97b6; Hm_lvt_d7682ab43891c68a00de46e9ce5b76aa=1531534004; Hm_lpvt_d7682ab43891c68a00de46e9ce5b76aa=1531534004; sYQDUGqqzHsearch_history=%u957F%u9AD8%u65B0%u68C0%u5211%u8BC9%u30142018%u301528%u53F7%7C%2C%u674E%u4E1C%u5219%u804C%u52A1%u4FB5%u5360%u6848%7C; __jsl_clearance=1531555363.037|0|ABiNEmcyF9rTF7U%2FlZUcdgq01Sk%3D", "Connection: keep-alive", "Upgrade-Insecure-Requests: 1", "If-Modified-Since: Fri, 13 Jul 2018 15:42:16 GMT", "If-None-Match: W/'4bc075-6d79-570e3530aed66'", ) ); $file_contents = curl_exec($ch); curl_close($ch); // dump($file_contents); // exit; $announce_no1 = ''; $pattern1='/(<title>(.+?)<\/title>)/'; if (preg_match($pattern1, $file_contents, $match)) { $announce_no1 = $match[2]; } $announce_no1 = str_replace('- 法律文书公开 - 人民检察院案件信息公开网','',$announce_no1); $pattern1='/<p style="(.+?)">(.*?)<\/p>/is'; if (preg_match_all($pattern1, $file_contents, $match)) { $announce_no = $match[2]; } if(empty($announce_no)){ echo $val; exit; } $arr = array(); foreach($announce_no as $val){ $str = strip_tags($val); $str = $this->trimall($str); if(!empty($str)){ $arr[] = $str; } } $add['case_no'] = $arr[2]; $add['title'] = $announce_no1; $add['txt'] = implode('_|_',$arr); $add['url'] = $url; $jcy->add($add); sleep(5); } } function trimall($str){ $str = strip_tags($str); $qian = array(" ", " ", "\t", "\n", "\r", '',' ',' '); $hou = array("", "", "", "", "", "","",""); return str_replace($qian, $hou, $str); }
抓取官网
<?php //https://www.fajuhe.com/version2/01.script/ws_parse/CurlJufaanli.php include_once '../conf/mysql_conn.php'; include_once '../conf/Utils.php'; include_once '../ws_parse/function.wsp.php'; echo '<meta http-equiv="Content-Type" content="text/html; charset=utf8">'; date_default_timezone_set('Asia/Shanghai'); error_reporting(E_ERROR); $keyword=array( '' ); for ($i=0;$i<200;$i++){ detail(); } function detail() { $ch = curl_init(); $url = "http://spider.jufaanli.com/home/search/searchJson"; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_ENCODING, "'gzip'");//解决乱码 $curlPost = array( 'page' => '1', 'searchTime' => 1542337827703, 'searchNum' => 1, 'nowReason' => 20, 'sortType' => 'caseWeight', 'keyword' => '莱姆顿', 'TypeKey' => '1:莱姆顿', ); // 设置URL和相应的选项 $options = array( CURLOPT_URL => $url, CURLOPT_RETURNTRANSFER => 1, CURLOPT_HEADER => array( ), CURLOPT_HTTPHEADER => array( "Connection: keep-alive", "Content-Length: 152", "Accept: application/json, text/javascript, */*; q=0.01", "Origin: http://spider.jufaanli.com", "X-Requested-With: XMLHttpRequest", "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36", "Content-Type: application/x-www-form-urlencoded; charset=UTF-8", "Referer: http://spider.jufaanli.com/search2?TypeKey=1%3A%E8%8E%B1%E5%A7%86%E9%A1%BF", "Accept-Encoding: gzip, deflate", "Accept-Language: zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6", "Cookie: Cookie: t=4ed3f2e39fada12f15a1ee1b265f9a0b; BJYSESSION=8r562mhj2livqgmlebog2abha1; Hm_lvt_7d935fee641e9bdd8fd6b28e9a2b62dc=1542266081,1542266094,1542266694,1542329761; is_remember=1; refer_url=http%3A%2F%2Fspider.jufaanli.com%2Fsearch2%3FTypeKey%3D1%3A%E8%8E%B1%E5%A7%86%E9%A1%BF; Hm_lpvt_7d935fee641e9bdd8fd6b28e9a2b62dc=1542337815; login_time=2018-11-16+11%3A10%3A18; tf=fb9461bb0083ee407534bbc4cc8b7b82" ) ); curl_setopt_array($ch, $options); //post提交 curl_setopt($ch, CURLOPT_POSTFIELDS, $curlPost); // 抓取URL并把它传递给浏览器 $data = curl_exec($ch); if($data!=false){ echo "。"; } // 关闭cURL资源,并且释放系统资源 curl_close($ch); set_time_limit(0); }
使用代理(https://www.cnblogs.com/burningc/p/8794584.html):
代理ip使用网站:http://www.89ip.cn/index_13.html
<?php $ch = curl_init(); $requestUrl="http://39.105.47.187/"; curl_setopt($ch, CURLOPT_URL, $requestUrl); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC); //代理认证模式 //curl_setopt($ch, CURLOPT_PROXY, "111.47.154.38"); //代理服务器地址 //curl_setopt($ch, CURLOPT_PROXYPORT, 53281); //代理服务器端口 curl_setopt($ch, CURLOPT_PROXY, "221.210.120.153"); //代理服务器地址 curl_setopt($ch, CURLOPT_PROXYPORT, 54402); //代理服务器端口 //curl_setopt($ch, CURLOPT_PROXYUSERPWD, ":"); //http代理认证帐号,名称:pwd的格式 curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); //使用http代理模式 $file_contents = curl_exec($ch); echo "the result is ".($file_contents); $a=(curl_error ($ch)); var_dump($a);
你的指尖,有着改变世界的力量!