rolling_curl curl批量采集函数留份
摘自:http://www.searchtb.com/2012/06/rolling-curl-best-practices.html
1 function rolling_curl($urls, $delay=0) { 2 $queue = curl_multi_init(); 3 $map = array(); 4 5 foreach ($urls as $url) { 6 $ch = curl_init(); 7 8 curl_setopt($ch, CURLOPT_URL, $url); 9 curl_setopt($ch, CURLOPT_TIMEOUT, 5); 10 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 11 curl_setopt($ch, CURLOPT_HEADER, 0); 12 curl_setopt($ch, CURLOPT_NOSIGNAL, true); 13 14 curl_multi_add_handle($queue, $ch); 15 $map[(string) $ch] = $url; 16 } 17 /*主要内容*/ 18 $responses = array(); 19 do { 20 while (($code = curl_multi_exec($queue, $active)) == CURLM_CALL_MULTI_PERFORM) ; 21 22 if ($code != CURLM_OK) { break; } 23 24 // a request was just completed -- find out which one 25 while ($done = curl_multi_info_read($queue)) { 26 27 // get the info and content returned on the request 28 $info = curl_getinfo($done['handle']);
//可以通过info的信息来判断是否连接成功 29 $error = curl_error($done['handle']); 30 // $results = callback(curl_multi_getcontent($done['handle']), $delay);
//curl_multi_getcontent($done['handle']) 已经可以获取到详细内容,即可以开始业务逻辑
31 $responses[$map[(string) $done['handle']]] = compact('info', 'error', 'results'); 32 33 // remove the curl handle that just completed 34 curl_multi_remove_handle($queue, $done['handle']); 35 curl_close($done['handle']); 36 } 37 38 // Block for data in / output; error handling is done by curl_multi_exec 39 if ($active > 0) { 40 curl_multi_select($queue, 0.5); 41 } 42 43 } while ($active); 44 /*主要内容*/
45 curl_multi_close($queue); 46 return $responses; 47 } 48 49 50 }