PHP多进程爬虫
<?php /** * 多进程 * 注意: * 1.php 安装pcntl扩展 * 2.composer require fabpot/goutte --prefer-dist * */ function downloadImage($url, $path='/www/images/') { //echo $url . PHP_EOL; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); $file = curl_exec($ch); curl_close($ch); saveAsImage($url, $file, $path); } function saveAsImage($url, $file, $path) { //$filename = pathinfo($url, PATHINFO_BASENAME); $filename = $path.parse_url($url)['path']; // echo 'FILENAME: '.$filename.PHP_EOL; $real_dir = pathinfo($filename, PATHINFO_DIRNAME); // echo 'DIR NAME IS : '. $real_dir.PHP_EOL; if(!is_dir($real_dir)){ if(!mkdir($real_dir, 0777,true)){ echo 'MAKE DIR '.$real_dir.'FAIL!!!'.PHP_EOL; }; } $resource = fopen($filename, 'a'); fwrite($resource, $file); fclose($resource); } include __DIR__ . '/vendor/autoload.php'; use Goutte\Client; $client = new Client(); $links = [ 'http://www.nipic.com/topic/show_27192_1', 'http://www.nipic.com/topic/show_27054_1', 'http://www.nipic.com/topic/show_27085_1', ]; $pids = []; $dir = '/www/du'; foreach ($links as $url) { $pid = pcntl_fork(); switch ($pid) { case -1: die("Fork failed\n"); case 0: $id = posix_getpid(); echo "Create child process $id success~\n"; $data = []; for($i = 0;$i<10;$i++){ $url_arr = explode('_',$url); $url_arr[count($url_arr)-1] = intval($url_arr[count($url_arr)-1] )+ $i; $url = implode('_',$url_arr).'.html'; $crawler = $client->request('GET', $url); $crawler->filter('.search-works-thumb')->each(function($node) use ($client, $id,$dir, &$data) { $url = $node->link()->getUri(); $crawler = $client->request('GET', $url); $crawler->filter('#J_worksImg')->each(function($node) use ($id,$dir, &$data) { $src = $node->image()->getUri(); $data[$id][] = $src; downloadImage($src,$dir); }); }); } print_r($data); exit; break; default: $pids[$pid] = $pid; var_dump($pids); break; } } while ( count($pids) ) { if (($id = pcntl_wait($status, WUNTRACED)) > 0) { echo "child process $id is exit.\n"; unset($pids[$id]); } } echo "Done\n";