[PHP类] 分享一个强大的HTTP访问类(可做采集)

做采集的时候,可以使用file_get_contents()去获取网页源代码,但是使用file_get_contents采集,速度慢,而且超时时间,不好控制。如果采集的页面不存在,需要等待的时间很长。一般来说,curl的速度最快,其次是socket,最后是file_get_contents。
现在跟大家分享一个很强大的采集类,会根据你的服务器当前的配置,自动选择最快的方式。已经封装了curl和socket,file_get_contents

用法很简单:
1、采用get方法请求
Http::doGet(网址);//超市时间可忽略,默认是5秒
Http::doGet(网址,超时时间); 
如echo Http::doGet('http://www.baidu.com');

2、采用post方法请求
Http::doPost(网址,数据,超时时间);


$url='http://www.canphp.com/test.php';
$data['name']='单骑';
$data['email']='admin@canphp.com';
Http::doPost($url,$data,10);

test.php页面接收数据
$_POST['name'];
$_POST['email'];

这个http类不仅可以用来采集,还有一个很强大的作用,模拟php异步多进程。
比如有index.php和a.php,  b.php,  c.php
在index.php中
Http::doGet('http://www.canphp.com/a.php',1);
Http::doGet('http://www.canphp.com/b.php',1);
Http::doGet('http://www.canphp.com/c.php',1);

a.php,  b.php,  c.php程序分别在头部加上ignore_user_abort(true);
那么就可以实现多进程了。

原理:
通过curl或socket发送请求给a.php,  b.php,  c.php,由于超时时间比较短,只是触发了a.php,  b.php,  c.php三个页面,不需要等待数据返回,连接已中断,但是a.php,  b.php,  c.php程序中加上了ignore_user_abort(true);忽略客户端连接,还会继续执行。

<?php 
// 数据采集,doGET,doPOST,文件下载,
class Http {
    static public $way = 0; 
    // 手动设置访问方式
    static public function setWay($way) {
        self :: $way = intval($way);
    } 
    static public function getSupport() {
        // 如果指定访问方式,则按指定的方式去访问
        if (isset(self :: $way) && in_array(self :: $way, array(1, 2, 3)))
            return self :: $way; 
        // 自动获取最佳访问方式
        if (function_exists('curl_init')) { // curl方式
                return 1;
        } else if (function_exists('fsockopen')) { // socket
                return 2;
        } else if (function_exists('file_get_contents')) { // php系统函数file_get_contents
                return 3;
        } else {
            return 0;
        } 
    } 
    // 通过get方式获取数据
    static public function doGet($url, $timeout = 5, $header = "") {
        if (empty($url) || empty($timeout))
            return false;
        if (!preg_match('/^(http|https)/is', $url))
            $url = "http://" . $url;
        $code = self :: getSupport();
        switch ($code) {
            case 1:return self :: curlGet($url, $timeout, $header);
                break;
            case 2:return self :: socketGet($url, $timeout, $header);
                break;
            case 3:return self :: phpGet($url, $timeout, $header);
                break;
            default:return false;
        } 
    } 
    // 通过POST方式发送数据
    static public function doPost($url, $post_data = array(), $timeout = 5, $header = "") {
        if (empty($url) || empty($post_data) || empty($timeout))
            return false;
        if (!preg_match('/^(http|https)/is', $url))
            $url = "http://" . $url;
        $code = self :: getSupport();
        switch ($code) {
            case 1:return self :: curlPost($url, $post_data, $timeout, $header);
                break;
            case 2:return self :: socketPost($url, $post_data, $timeout, $header);
                break;
            case 3:return self :: phpPost($url, $post_data, $timeout, $header);
                break;
            default:return false;
        } 
    } 
    // 通过curl get数据
    static public function curlGet($url, $timeout = 5, $header = "") {
        $header = empty($header)?self :: defaultHeader():$header;
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
        curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
        curl_setopt($ch, CURLOPT_HTTPHEADER, array($header)); //模拟的header头
        $result = curl_exec($ch);
        curl_close($ch);
        return $result;
    } 
    // 通过curl post数据
    static public function curlPost($url, $post_data = array(), $timeout = 5, $header = "") {
        $header = empty($header)?'':$header;
        $post_string = http_build_query($post_data);
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
        curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
        curl_setopt($ch, CURLOPT_HTTPHEADER, array($header)); //模拟的header头
        $result = curl_exec($ch);
        curl_close($ch);
        return $result;
    } 
    // 通过socket get数据
    static public function socketGet($url, $timeout = 5, $header = "") {
        $header = empty($header)?self :: defaultHeader():$header;
        $url2 = parse_url($url);
        $url2["path"] = isset($url2["path"])? $url2["path"]: "/" ;
        $url2["port"] = isset($url2["port"])? $url2["port"] : 80;
        $url2["query"] = isset($url2["query"])? "?" . $url2["query"] : "";
        $host_ip = @gethostbyname($url2["host"]);

        if (($fsock = fsockopen($host_ip, $url2['port'], $errno, $errstr, $timeout)) < 0) {
            return false;
        } 
        $request = $url2["path"] . $url2["query"];
        $in = "GET " . $request . " HTTP/1.0\r\n";
        if (false === strpos($header, "Host:")) {
            $in .= "Host: " . $url2["host"] . "\r\n";
        } 
        $in .= $header;
        $in .= "Connection: Close\r\n\r\n";

        if (!@fwrite($fsock, $in, strlen($in))) {
            @fclose($fsock);
            return false;
        } 
        return self :: GetHttpContent($fsock);
    } 
    // 通过socket post数据
    static public function socketPost($url, $post_data = array(), $timeout = 5, $header = "") {
        $header = empty($header)?self :: defaultHeader():$header;
        $post_string = http_build_query($post_data);

        $url2 = parse_url($url);
        $url2["path"] = ($url2["path"] == "" ? "/" : $url2["path"]);
        $url2["port"] = ($url2["port"] == "" ? 80 : $url2["port"]);
        $host_ip = @gethostbyname($url2["host"]);
        $fsock_timeout = $timeout; //超时时间
        if (($fsock = fsockopen($host_ip, $url2['port'], $errno, $errstr, $fsock_timeout)) < 0) {
            return false;
        } 
        $request = $url2["path"] . ($url2["query"] ? "?" . $url2["query"] : "");
        $in = "POST " . $request . " HTTP/1.0\r\n";
        $in .= "Host: " . $url2["host"] . "\r\n";
        $in .= $header;
        $in .= "Content-type: application/x-www-form-urlencoded\r\n";
        $in .= "Content-Length: " . strlen($post_string) . "\r\n";
        $in .= "Connection: Close\r\n\r\n";
        $in .= $post_string . "\r\n\r\n";
        unset($post_string);
        if (!@fwrite($fsock, $in, strlen($in))) {
            @fclose($fsock);
            return false;
        } 
        return self :: GetHttpContent($fsock);
    } 
    // 通过file_get_contents函数get数据
    static public function phpGet($url, $timeout = 5, $header = "") {
        $header = empty($header)?self :: defaultHeader():$header;
        $opts = array(
                'http' => array('protocol_version' => '1.0', // http协议版本(若不指定php5.2系默认为http1.0)
                'method' => "GET", // 获取方式
                'timeout' => $timeout , // 超时时间
                'header' => $header)
            );
        $context = stream_context_create($opts);
        return @file_get_contents($url, false, $context);
    } 
    // 通过file_get_contents 函数post数据
    static public function phpPost($url, $post_data = array(), $timeout = 5, $header = "") {
        $header = empty($header)?self :: defaultHeader():$header;
        $post_string = http_build_query($post_data);
        $header .= "Content-length: " . strlen($post_string);
        $opts = array('http' => array(
                'protocol_version' => '1.0', // http协议版本(若不指定php5.2系默认为http1.0)
                'method' => "POST", // 获取方式
                'timeout' => $timeout , // 超时时间
                'header' => $header,
                'content' => $post_string)
            );
        $context = stream_context_create($opts);
        return @file_get_contents($url, false, $context);
    } 
    // 默认模拟的header头
    static private function defaultHeader() {
        $header = "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12\r\n";
        $header .= "Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n";
        $header .= "Accept-language: zh-cn,zh;q=0.5\r\n";
        $header .= "Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n";
        return $header;
    } 
    // 获取通过socket方式get和post页面的返回数据
    static private function GetHttpContent($fsock = null) {
        $out = null;
        while ($buff = @fgets($fsock, 2048)) {
            $out .= $buff;
        } 
        fclose($fsock);
        $pos = strpos($out, "\r\n\r\n");
        $head = substr($out, 0, $pos); //http head
        $status = substr($head, 0, strpos($head, "\r\n")); //http status line
        $body = substr($out, $pos + 4, strlen($out) - ($pos + 4)); //page body
        if (preg_match("/^HTTP\/\d\.\d\s([\d]+)\s.*$/", $status, $matches)) {
            if (intval($matches[1]) / 100 == 2) {
                return $body;
            } else {
                return false;
            } 
        } else {
            return false;
        } 
    } 

    /**
     * 功能: 下载文件
     * 参数:$filename 下载文件路径
     * $showname 下载显示的文件名
     * $expire  下载内容浏览器缓存时间
     */
    static public function download($filename, $showname = '', $expire = 1800) {
        if (file_exists($filename) && is_file($filename)) {
            $length = filesize($filename);
        } else {
            die('下载文件不存在!');
        } 

        $type = mime_content_type($filename); 
        // 发送Http Header信息 开始下载
        header("Pragma: public");
        header("Cache-control: max-age=" . $expire); 
        // header('Cache-Control: no-store, no-cache, must-revalidate');
        header("Expires: " . gmdate("D, d M Y H:i:s", time() + $expire) . "GMT");
        header("Last-Modified: " . gmdate("D, d M Y H:i:s", time()) . "GMT");
        header("Content-Disposition: attachment; filename=" . $showname);
        header("Content-Length: " . $length);
        header("Content-type: " . $type);
        header('Content-Encoding: none');
        header("Content-Transfer-Encoding: binary");
        readfile($filename);
        return true;
    } 
} 

if (!function_exists ('mime_content_type')) {
    /**
     * +----------------------------------------------------------
     * 获取文件的mime_content类型
     * +----------------------------------------------------------
     * 
     * @return string +----------------------------------------------------------
     */
    function mime_content_type($filename) {
        static $contentType = array(
            'ai' => 'application/postscript',
            'aif' => 'audio/x-aiff',
            'aifc' => 'audio/x-aiff',
            'aiff' => 'audio/x-aiff',
            'asc' => 'application/pgp', // changed by skwashd - was text/plain
            'asf' => 'video/x-ms-asf',
            'asx' => 'video/x-ms-asf',
            'au' => 'audio/basic',
            'avi' => 'video/x-msvideo',
            'bcpio' => 'application/x-bcpio',
            'bin' => 'application/octet-stream',
            'bmp' => 'image/bmp',
            'c' => 'text/plain', // or 'text/x-csrc', //added by skwashd
            'cc' => 'text/plain', // or 'text/x-c++src', //added by skwashd
            'cs' => 'text/plain', // added by skwashd - for C# src
            'cpp' => 'text/x-c++src', // added by skwashd
            'cxx' => 'text/x-c++src', // added by skwashd
            'cdf' => 'application/x-netcdf',
            'class' => 'application/octet-stream', // secure but application/java-class is correct
            'com' => 'application/octet-stream', // added by skwashd
            'cpio' => 'application/x-cpio',
            'cpt' => 'application/mac-compactpro',
            'csh' => 'application/x-csh',
            'css' => 'text/css',
            'csv' => 'text/comma-separated-values', // added by skwashd
            'dcr' => 'application/x-director',
            'diff' => 'text/diff',
            'dir' => 'application/x-director',
            'dll' => 'application/octet-stream',
            'dms' => 'application/octet-stream',
            'doc' => 'application/msword',
            'dot' => 'application/msword', // added by skwashd
            'dvi' => 'application/x-dvi',
            'dxr' => 'application/x-director',
            'eps' => 'application/postscript',
            'etx' => 'text/x-setext',
            'exe' => 'application/octet-stream',
            'ez' => 'application/andrew-inset',
            'gif' => 'image/gif',
            'gtar' => 'application/x-gtar',
            'gz' => 'application/x-gzip',
            'h' => 'text/plain', // or 'text/x-chdr',//added by skwashd
            'h++' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
            'hh' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
            'hpp' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
            'hxx' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
            'hdf' => 'application/x-hdf',
            'hqx' => 'application/mac-binhex40',
            'htm' => 'text/html',
            'html' => 'text/html',
            'ice' => 'x-conference/x-cooltalk',
            'ics' => 'text/calendar',
            'ief' => 'image/ief',
            'ifb' => 'text/calendar',
            'iges' => 'model/iges',
            'igs' => 'model/iges',
            'jar' => 'application/x-jar', // added by skwashd - alternative mime type
            'java' => 'text/x-java-source', // added by skwashd
            'jpe' => 'image/jpeg',
            'jpeg' => 'image/jpeg',
            'jpg' => 'image/jpeg',
            'js' => 'application/x-javascript',
            'kar' => 'audio/midi',
            'latex' => 'application/x-latex',
            'lha' => 'application/octet-stream',
            'log' => 'text/plain',
            'lzh' => 'application/octet-stream',
            'm3u' => 'audio/x-mpegurl',
            'man' => 'application/x-troff-man',
            'me' => 'application/x-troff-me',
            'mesh' => 'model/mesh',
            'mid' => 'audio/midi',
            'midi' => 'audio/midi',
            'mif' => 'application/vnd.mif',
            'mov' => 'video/quicktime',
            'movie' => 'video/x-sgi-movie',
            'mp2' => 'audio/mpeg',
            'mp3' => 'audio/mpeg',
            'mpe' => 'video/mpeg',
            'mpeg' => 'video/mpeg',
            'mpg' => 'video/mpeg',
            'mpga' => 'audio/mpeg',
            'ms' => 'application/x-troff-ms',
            'msh' => 'model/mesh',
            'mxu' => 'video/vnd.mpegurl',
            'nc' => 'application/x-netcdf',
            'oda' => 'application/oda',
            'patch' => 'text/diff',
            'pbm' => 'image/x-portable-bitmap',
            'pdb' => 'chemical/x-pdb',
            'pdf' => 'application/pdf',
            'pgm' => 'image/x-portable-graymap',
            'pgn' => 'application/x-chess-pgn',
            'pgp' => 'application/pgp', // added by skwashd
            'php' => 'application/x-httpd-php',
            'php3' => 'application/x-httpd-php3',
            'pl' => 'application/x-perl',
            'pm' => 'application/x-perl',
            'png' => 'image/png',
            'pnm' => 'image/x-portable-anymap',
            'po' => 'text/plain',
            'ppm' => 'image/x-portable-pixmap',
            'ppt' => 'application/vnd.ms-powerpoint',
            'ps' => 'application/postscript',
            'qt' => 'video/quicktime',
            'ra' => 'audio/x-realaudio',
            'rar' => 'application/octet-stream',
            'ram' => 'audio/x-pn-realaudio',
            'ras' => 'image/x-cmu-raster',
            'rgb' => 'image/x-rgb',
            'rm' => 'audio/x-pn-realaudio',
            'roff' => 'application/x-troff',
            'rpm' => 'audio/x-pn-realaudio-plugin',
            'rtf' => 'text/rtf',
            'rtx' => 'text/richtext',
            'sgm' => 'text/sgml',
            'sgml' => 'text/sgml',
            'sh' => 'application/x-sh',
            'shar' => 'application/x-shar',
            'shtml' => 'text/html',
            'silo' => 'model/mesh',
            'sit' => 'application/x-stuffit',
            'skd' => 'application/x-koan',
            'skm' => 'application/x-koan',
            'skp' => 'application/x-koan',
            'skt' => 'application/x-koan',
            'smi' => 'application/smil',
            'smil' => 'application/smil',
            'snd' => 'audio/basic',
            'so' => 'application/octet-stream',
            'spl' => 'application/x-futuresplash',
            'src' => 'application/x-wais-source',
            'stc' => 'application/vnd.sun.xml.calc.template',
            'std' => 'application/vnd.sun.xml.draw.template',
            'sti' => 'application/vnd.sun.xml.impress.template',
            'stw' => 'application/vnd.sun.xml.writer.template',
            'sv4cpio' => 'application/x-sv4cpio',
            'sv4crc' => 'application/x-sv4crc',
            'swf' => 'application/x-shockwave-flash',
            'sxc' => 'application/vnd.sun.xml.calc',
            'sxd' => 'application/vnd.sun.xml.draw',
            'sxg' => 'application/vnd.sun.xml.writer.global',
            'sxi' => 'application/vnd.sun.xml.impress',
            'sxm' => 'application/vnd.sun.xml.math',
            'sxw' => 'application/vnd.sun.xml.writer',
            't' => 'application/x-troff',
            'tar' => 'application/x-tar',
            'tcl' => 'application/x-tcl',
            'tex' => 'application/x-tex',
            'texi' => 'application/x-texinfo',
            'texinfo' => 'application/x-texinfo',
            'tgz' => 'application/x-gtar',
            'tif' => 'image/tiff',
            'tiff' => 'image/tiff',
            'tr' => 'application/x-troff',
            'tsv' => 'text/tab-separated-values',
            'txt' => 'text/plain',
            'ustar' => 'application/x-ustar',
            'vbs' => 'text/plain', // added by skwashd - for obvious reasons
            'vcd' => 'application/x-cdlink',
            'vcf' => 'text/x-vcard',
            'vcs' => 'text/calendar',
            'vfb' => 'text/calendar',
            'vrml' => 'model/vrml',
            'vsd' => 'application/vnd.visio',
            'wav' => 'audio/x-wav',
            'wax' => 'audio/x-ms-wax',
            'wbmp' => 'image/vnd.wap.wbmp',
            'wbxml' => 'application/vnd.wap.wbxml',
            'wm' => 'video/x-ms-wm',
            'wma' => 'audio/x-ms-wma',
            'wmd' => 'application/x-ms-wmd',
            'wml' => 'text/vnd.wap.wml',
            'wmlc' => 'application/vnd.wap.wmlc',
            'wmls' => 'text/vnd.wap.wmlscript',
            'wmlsc' => 'application/vnd.wap.wmlscriptc',
            'wmv' => 'video/x-ms-wmv',
            'wmx' => 'video/x-ms-wmx',
            'wmz' => 'application/x-ms-wmz',
            'wrl' => 'model/vrml',
            'wvx' => 'video/x-ms-wvx',
            'xbm' => 'image/x-xbitmap',
            'xht' => 'application/xhtml+xml',
            'xhtml' => 'application/xhtml+xml',
            'xls' => 'application/vnd.ms-excel',
            'xlt' => 'application/vnd.ms-excel',
            'xml' => 'application/xml',
            'xpm' => 'image/x-xpixmap',
            'xsl' => 'text/xml',
            'xwd' => 'image/x-xwindowdump',
            'xyz' => 'chemical/x-xyz',
            'z' => 'application/x-compress',
            'zip' => 'application/zip',
            );
        $type = strtolower(substr(strrchr($filename, '.'), 1));
        if (isset($contentType[$type])) {
            $mime = $contentType[$type];
        } else {
            $mime = 'application/octet-stream';
        } 
        return $mime;
    } 
} 

if (!function_exists('image_type_to_extension')) {
    function image_type_to_extension($imagetype) {
        if (empty($imagetype)) return false;
        switch ($imagetype) {
            case IMAGETYPE_GIF : return '.gif';
            case IMAGETYPE_JPEG : return '.jpg';
            case IMAGETYPE_PNG : return '.png';
            case IMAGETYPE_SWF : return '.swf';
            case IMAGETYPE_PSD : return '.psd';
            case IMAGETYPE_BMP : return '.bmp';
            case IMAGETYPE_TIFF_II : return '.tiff';
            case IMAGETYPE_TIFF_MM : return '.tiff';
            case IMAGETYPE_JPC : return '.jpc';
            case IMAGETYPE_JP2 : return '.jp2';
            case IMAGETYPE_JPX : return '.jpf';
            case IMAGETYPE_JB2 : return '.jb2';
            case IMAGETYPE_SWC : return '.swc';
            case IMAGETYPE_IFF : return '.aiff';
            case IMAGETYPE_WBMP : return '.wbmp';
            case IMAGETYPE_XBM : return '.xbm';
            default : return false;
        } 
    } 
} 

?>

方法:download($filename, $showname='',$expire=1800) 
说明:用于下载文件
参数:
•$filename,包含路径的文件名
•$showname,下载显示的文件名,需要自行转成gbk编码,如果带空格,需要自行替换成其他字符
•$expire,下载内容浏览器缓存时间

使用方法:

1 $showname='最新资料.zip';
2 $showname=auto_charset($showname,'utf-8','gbk');//utf-8编码转成gbk编码
3 Http::download('upload/123.zip',$showname);

 

方法:doGet($url,$timeout=5,$header='')
说明:采用get方法请求页面,会自动使用最快的访问方式,获取数据
参数:
•$url,网址
•$timeout,超时时间
•$header,http请求头,用于发送cookie等信息

使用方法:

1 echo Http::doGet('http://www.baidu.com');

 

方法:doPost($url,$data,$timeout=5,$header='')
说明:采用post方法请求页面,会自动使用最快的访问方式,获取数据
参数:
•$url,网址
•$data,待发送的数据,类型数组。
•$timeout,超时时间
•$header,http请求头,用于发送cookie等信息

使用方法:

2   $data['name']='单骑';
3   $data['email']='admin@canphp.com';
4   Http::doPost($url,$data,10);
5  
6 //test.php页面接收数据        
7   $_POST['name'];
8   $_POST['email'];

 

方法:setWay($way)
说明:手动设置doGet()和doPost()访问方式
参数:
•$way:参数可以1或2或3
•参数1时:采用curl
•参数2时:采用socket
•参数3时:采用file_get_contents()函数模拟
•若不设置访问方式,会自动获取当前环境的支持方式,选择最佳的方式去访问,优先度curl > socket > file_get_contents

使用方法:

1 Http::setWay(3);
2 echo Http::doGet('http://www.baidu.com');//将采用file_get_contents()方式获取内容
posted @ 2015-10-26 15:41  chinall  阅读(1737)  评论(0编辑  收藏  举报