curl站外抓取第二发
<?php header("content-type:text/html;charset=utf-8"); ini_set('max_execution_time', 300); function getScholar($start,$end){ $result = array(); for($i=$start;$i<$end;$i++){ $url = "http://****.gov.cn*****"; $post_data = array ( "currentPage" => $i,//当前页 "pageSize" =>7 , ); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // 我们在POST数据哦! curl_setopt($ch, CURLOPT_POST, 1); // 把post的变量加上 curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); $output = curl_exec($ch); $output_json = json_decode($output); curl_close($ch); //var_dump($output_json); //die(); foreach($output_json->projectList as $k => $v){ $result[$v->code][]=$v->code; $result[$v->code][]=$v->name; }; } $end_end = $end-1; $fp = fopen("output-{$start}-{$end_end}.csv",'w');//打开文件 foreach ($result as $v){ ob_clean(); if(fputcsv($fp,$v)===false){//加数组数据放到csv文件中 die("can't write csv line"); } } fclose($fp) or die("can't close scholar.csv"); if(count($result)!=($end-$start)*7){ echo "数据出现错误"; echo "<br/>"; echo count($result); exit; } echo "数据抓取完成,共抓取到".count($result)."条记录"; } getScholar(3950,4000);//读取3950-4000页的数据
源码来源:http://blog.csdn.net/htmlgood/article/details/49558703