9、php多线程处理方法案例总结

  开头还是说下遇到的问题吧。公司项目需要APP端传输客户的手机号码对应城市数,测试发现当客户手机通讯录大于两千条时,数据处理会异常的缓慢,例如3000条会处理超过十分钟,然而客户再进行不超过三分钟的操作就已经要提交数据了,所以务必得在有限的时间内完成通讯录的比对。话不多言,开始写代码进行测试优化。

  首先是原始的处理思路,获取客户通讯录,进行遍历离线手机段对应城市文件,对比结果,然后保存为数组格式,代码简洁明了,然而测试执行时间漫长,超过6分多钟的时间!

  赶紧整理思路。发现当电话号段越在离线文件末端,比对的时间是越长。故在比对这块先进行单独的优化测试。将号段前三位进行分类,采用类似于索引的思路,确定每个号段遍历的开始位置。如下代码:

<?php
    $file = file("./phone.dat");
    
    $phone = $_GET['phone'];
    $param = substr($phone,0,7);
    $front = substr($phone,0,3);
  //这段注释为开始的实现思路
/*foreach ($file as $value) { $value = explode(',',$value); if($value[2] == $param){ var_dump($value); echo microtime(); die; } //$list[$i] = $value; //$i+=1; }*/ switch ($front) { case '130': $start = 1; break; case '131': $start = 9906; break; case '132': $start = 19906; break; case '133': $start = 29906; break; case '134': $start = 39839; break; case '135': $start = 49586; break; case '136': $start = 59586; break; case '137': $start = 69586; break; case '138': $start = 79586; break; case '139': $start = 89583; break; case '145': $start = 99581; break; case '147': $start = 106482; break; case '150': $start = 116080; break; case '151': $start = 126080; break; case '152': $start = 136080; break; case '153': $start = 146080; break; case '155': $start = 156079; break; case '156': $start = 166079; break; case '157': $start = 175882; break; case '158': $start = 184147; break; case '159': $start = 194147; break; case '170': $start = 204147; break; case '176': $start = 209266; break; case '178': $start = 221027; break; case '180': $start = 223996; break; case '181': $start = 233841; break; case '182': $start = 243803; break; case '183': $start = 253764; break; case '184': $start = 263754; break; case '185': $start = 271749; break; case '186': $start = 280725; break; case '187': $start = 290692; break; case '188': $start = 300673; break; case '189': $start = 310433; break; default: $start =null; break; } if($start != null) { for($i=($start-1);$i<=320431;$i++){ $value = explode(',',$file[$i]); if($value[2] == $param){ $value = json_encode($value[4],JSON_UNESCAPED_UNICODE); exit($value); //echo microtime(); //die; } } } ?>

测试相同号段(如187段)优化后的代码获取到结果的时间明显少于优化之前的时间。离线文件这块的优化取得基本的成功。

接下来是通讯录这部分的优化。想类比于离线文件的优化方式,分块进行比对,然后进行汇总。自然就回归到php的多线程处理。赶紧联系度娘来一波资源。。。(嘿嘿,开玩笑啦)。

网上度了好久终于找了两个方式:curl_multi_init()和继承Thread类的方式,下面话不多说就直接上代码了。

1、curl_multi_init()方式---原谅我代码注释很少。。。

<?php
    set_time_limit(600);
    $phones = file_get_contents('./directory-_2017-08-22.txt');
    $phones = json_decode($phones,true);
    echo microtime();
    $city = array();
    $data = [];
    $mid_num =  ceil(count($phones)/3);

    //echo $mid_num;
    $urls = ["http://localhost/myCode/get_city/deal_first.php?start_num=$mid_num",
             "http://localhost/myCode/get_city/deal_second.php?start_num=$mid_num",
             "http://localhost/myCode/get_city/deal_third.php?start_num=$mid_num"];
    //var_dump($urls);
    //$url_nums = count($urls);

        $mh = curl_multi_init();
        foreach ($urls as $i => $url) {
            $conn[$i] = curl_init($url);
            curl_setopt($conn[$i], CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
            curl_setopt($conn[$i], CURLOPT_SSL_VERIFYPEER, FALSE);
            curl_setopt($conn[$i], CURLOPT_RETURNTRANSFER, TRUE);
            curl_setopt($conn[$i], CURLOPT_TIMEOUT, 600);
            curl_setopt($conn[$i], CURLOPT_URL, $url);
            curl_multi_add_handle($mh,$conn[$i]);
        }
        do{
            curl_multi_exec($mh, $active);
        } while ($active);

        $active = null;

        foreach ($urls as $i => $url) {
            $error = curl_error($conn[$i]);
            //echo($error);
            $data[$i] = json_decode(curl_multi_getcontent($conn[$i]),true); // 获得结果值

        }

        foreach ($urls as $i => $url) {
            curl_multi_remove_handle($mh, $conn[$i]);
            curl_close($conn[$i]);
        }

        curl_multi_close($mh);
        //$data = json_decode($data,true);
        $tmp_data = array_merge_recursive($data[0],$data[1]);
        $fina_data = array_merge_recursive($tmp_data,$data[2]);
        //var_dump($fina_data);
        foreach ($fina_data as  $key=>$value) {
            if(is_array($fina_data[$key]))
            {
                $result[$key] = array_sum($fina_data[$key]);
            }else{
                $result[$key] = $fina_data[$key];
            }
        }
        $result = json_encode($result,JSON_UNESCAPED_UNICODE);
        $file_fina = fopen('./fina.txt', 'w+');
        fwrite($file_fina,$result);
        fclose($file_fina);
        echo "<br>";
        echo microtime();

?>

set_time_limit(600);
    $phones = file_get_contents('./directory-_2017-08-22.txt');
    $phones = json_decode($phones,true);
    $end_num = $_GET['start_num'];
    $city = [];
    for ($i=0; $i < $end_num; $i++) { 
        if(strstr($phones[$i]['contactNumber'],'+86'))
        {
            $phones[$i]['contactNumber'] = substr($phones[$i]['contactNumber'], 3);
        }
        if(strstr($phones[$i]['contactNumber'],'-'))
        {
            $tmp = explode('-', $phones[$i]['contactNumber']);
            $phones[$i]['contactNumber'] = implode($tmp);
            //var_dump($phone['contactNumber']);die;
        }
        if(strstr($phones[$i]['contactNumber'], ' '))
        {
            $tmp = explode(' ', $phones[$i]['contactNumber']);
            $phones[$i]['contactNumber'] = implode($tmp);
        }
        $match = "/^1[3|4|5|7|8][0-9]\d{4,8}$/";
        if(preg_match($match, $phones[$i]['contactNumber']))
        {
            $url = 'http://localhost//myCode/get_city/get_city.php?phone='.$phones[$i]['contactNumber'];
            $result = url_GET($url);
            //当城市名键值存在,则拉取计数值加1
            $result = json_decode($result);
            if(array_key_exists($result, $city))
            {
                $n = $city[$result];
                $city[$result] = ($n+1);
            }else{
                $city[$result] = 1;
            }
            //var_dump($city);
            //die;
        }
    }
    $city = json_encode($city,JSON_UNESCAPED_UNICODE);
    exit($city);
    function url_GET($url)
    {
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            curl_setopt($ch, CURLOPT_TIMEOUT, 10);
            $output = curl_exec($ch);
            curl_close($ch);
            return $output;
    }

第一个文件是主文件,负责调用curl_multi_init()多线程处理。第二个文件是具体的一个线程处理过程,后续还有两个文件,基本类似,就不再展示。

这个方式测试时间为3分钟左右,一下子提高了进一半的时间。

2、Thread类方式

<?php
set_time_limit(600);
class Test extends \Thread {

    public $result;        //子进程处理结果
    public $phones;    //待处理数据
    public $city;
    public function __construct($start_num,$end_num,$phones){
        $this->start_num  = $start_num;
        $this->end_num    = $end_num;
        $this->phones       = $phones;
    }
    public function run(){
        if($this->start_num&&$this->end_num){
            //输出结果值
            $this->result = $this->deal_phones($this->start_num,$this->end_num);
        }
    }
    public function url_GET($url){
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            curl_setopt($ch, CURLOPT_TIMEOUT, 10);
            $output = curl_exec($ch);
            curl_close($ch);
            return $output;
    }
    public function deal_phones($begin,$end){
        $city = [];
        for ($i=$begin; $i < $end; $i++) { 
            if(strstr($this->phones[$i]['contactNumber'],'+86'))
            {
                $this->phones[$i]['contactNumber'] = substr($this->phones[$i]['contactNumber'], 3);
            }
            if(strstr($this->phones[$i]['contactNumber'],'-'))
            {
                $tmp = explode('-', $this->phones[$i]['contactNumber']);
                $this->phones[$i]['contactNumber'] = implode($tmp);
                //var_dump($phone['contactNumber']);die;
            }
            if(strstr($this->phones[$i]['contactNumber'], ' '))
            {
                $tmp = explode(' ', $this->phones[$i]['contactNumber']);
                $this->phones[$i]['contactNumber'] = implode($tmp);
            }
            $match = "/^1[3|4|5|7|8][0-9]\d{4,8}$/";
            if(preg_match($match, $this->phones[$i]['contactNumber']))
            {
                $url = 'http://localhost//myCode/get_city/get_city.php?phone='.$this->phones[$i]['contactNumber'];
                $result = $this->url_GET($url);
                //当城市名键值存在,则拉取计数值加1
                $result = json_decode($result);
                if(array_key_exists($result, $city))
                {
                    $n = $city[$result];
                    $city[$result] = ($n+1);
                }else{
                    $city[$result] = 1;
                }
            }
        }
        $this->city = $city;
        return $this->city;
    }
}
echo microtime();
echo "<br>";
//切分数组
$phones = file_get_contents('./directory-_2017-08-22.txt');
$phones = json_decode($phones,true);
$total = count($phones);
$divide_num =  ceil($total/4);
$divide_s = 2*$divide_num;
$divide_t = 3*$divide_num;
$slice = [
            "0,$divide_num,$divide_num,$divide_s",
              "$divide_s,$divide_t,$divide_t,$total"
        ];
$all_res = [];
foreach ($slice as $key => $value) {
    $value_tmp = explode(',',$value);
    $start_num = $value_tmp[2];
    $end_num   = $value_tmp[3];

    $thread = new Test($start_num,$end_num,$phones);
    //主进程
    $thread->start();
    /******主进程程序*****/
    $start_zhu = $value_tmp[0];
    $end_zhu   = $value_tmp[1];
    $all_res[$key."0"] = $thread->deal_phones($start_zhu,$end_zhu);
    /******主进程程序*****/
    //子进程
    $thread->join();
    $all_res[$key."1"] = $thread->result;
}
var_dump($all_res);
     $all_res = json_encode($all_res,JSON_UNESCAPED_UNICODE);
        $file_fina = fopen('./thread.txt', 'w+');
        fwrite($file_fina,$all_res);
        fclose($file_fina);

echo "<br>";
echo microtime();
?>

测试时间为4分钟左右,感觉不如curl方式。可能自己还是在应用上没有到位吧。

至此两种多线程处理方式的使用梳理完毕。回头想想,确实如同数据库的索引一样,以空间换取时间,这大概也是如此吧。

posted @ 2017-12-27 17:14  5.2Hz  阅读(253)  评论(0编辑  收藏  举报