curl 抓取远程图片

  1 function download_file($file_name, $url) {
  2 $ch = curl_init($url);
  3 $fp = fopen($file_name, "wb");
  4 // set URL and other appropriate options
  5 $user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727;';
  6 $url_info = parse_url($url);
  7 $host = $url_info['host'];
  8 $header = array(
  9 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 10 'Accept-Charset: UTF-8,*',
 11 'Accept-Encoding: gzip, deflate',
 12 'Accept-Language: en-us,en;q=0.5',
 13 'Connection: keep-alive',
 14 "Host: {$host}",
 15 "User-Agent: {$user_agent}",
 16 );
 17 $options = array(CURLOPT_FILE => $fp,
 18 /*CURLOPT_HEADER => 0,
 19 CURLOPT_FOLLOWLOCATION => 1,
 20 CURLOPT_REFERER => $url,
 21 CURLOPT_TIMEOUT => 240
 22 */
 23 CURLOPT_HEADER => 0,
 24 CURLOPT_FOLLOWLOCATION => 1,
 25 CURLOPT_TIMEOUT => 240,
 26 CURLOPT_HTTPHEADER => $header,
 27 ); // 1 minute timeout (should be enough)
 28 
 29 curl_setopt_array($ch, $options);
 30 $ret = curl_exec($ch);
 31 if (!$ret) {
 32 var_export(curl_error($ch));
 33 }
 34 curl_close($ch); fclose($fp); return $ret;
 35 }
 36 function getinfo($uri) {
 37 // start output buffering
 38 ob_start();
 39 // initialize curl with given uri
 40 $ch = curl_init($uri);
 41 $user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727;';
 42 $url_info = parse_url($uri);
 43 $host = $url_info['host'];
 44 $header = array(
 45 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 46 'Accept-Charset: UTF-8,*',
 47 'Accept-Encoding: gzip, deflate',
 48 'Accept-Language: en-us,en;q=0.5',
 49 'Connection: keep-alive',
 50 "Host: {$host}",
 51 "User-Agent: {$user_agent}",
 52 );
 53 curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 54 // make sure we get the header
 55 curl_setopt($ch, CURLOPT_HEADER, 1);
 56 // make it a http HEAD request
 57 curl_setopt($ch, CURLOPT_NOBODY, 1);
 58 
 59 $okay = curl_exec($ch);
 60 curl_close($ch);
 61 // get the output buffer
 62 $head = ob_get_contents();
 63 // clean the output buffer and return to previous
 64 // buffer settings
 65 ob_end_clean();
 66 
 67 // gets you the numeric value from the Content-Length
 68 // field in the http header
 69 var_export($head);exit;
 70 $regex = '@Content-Length:\s(\d+)@';
 71 $count = preg_match($regex, $head, $matches);
 72 // if there was a Content-Length field, its value
 73 // will now be in $matches[1]
 74 if(isset($matches[1])){
 75 $size = $matches[1];
 76 }else{
 77 $size = 0;
 78 }
 79 $regex = '@Content-Type:\simage/(\w+)@';
 80 
 81 $count = preg_match($regex, $head, $matches);
 82 // if there was a Content-Length field, its value
 83 // will now be in $matches[1]
 84 if(isset($matches[1])){
 85 $type = $matches[1];
 86 }else{
 87 $type = '';
 88 }
 89 //$last=round($size/(1024*1024),3);
 90 //return $last.' MB';
 91 return array('size' => $size, 'type' => $type);
 92 }
 93 function getext($filename) {
 94 return substr(strrchr($filename, '.'), 1);
 95 }
 96 $valid_file_type = array(
 97 'jpeg' => 'jpg',
 98 'png' => 'png',
 99 'gif' => 'gif',
100 );
101 
102 $url = 'http://hiphotos.baidu.com/hackers365/pic/item/aa8b9a338d6f5518ac4b5fe6.jpg';
103 /*
104 $file_info = getinfo($url);
105 var_export($file_info);
106 $filename = basename($url);
107 $ext = $valid_file_type[$file_info['type']];
108 if ($ext != getext($filename)) {
109 $filename .= '.' . $ext;
110 }
111 var_export($filename);*/
112 download_file('/tmp/hacker.jpg', $url);
posted @ 2012-09-21 10:56  凌之城  阅读(364)  评论(0编辑  收藏  举报