9、php多线程处理方法案例总结
开头还是说下遇到的问题吧。公司项目需要APP端传输客户的手机号码对应城市数,测试发现当客户手机通讯录大于两千条时,数据处理会异常的缓慢,例如3000条会处理超过十分钟,然而客户再进行不超过三分钟的操作就已经要提交数据了,所以务必得在有限的时间内完成通讯录的比对。话不多言,开始写代码进行测试优化。
首先是原始的处理思路,获取客户通讯录,进行遍历离线手机段对应城市文件,对比结果,然后保存为数组格式,代码简洁明了,然而测试执行时间漫长,超过6分多钟的时间!
赶紧整理思路。发现当电话号段越在离线文件末端,比对的时间是越长。故在比对这块先进行单独的优化测试。将号段前三位进行分类,采用类似于索引的思路,确定每个号段遍历的开始位置。如下代码:
<?php $file = file("./phone.dat"); $phone = $_GET['phone']; $param = substr($phone,0,7); $front = substr($phone,0,3);
//这段注释为开始的实现思路 /*foreach ($file as $value) { $value = explode(',',$value); if($value[2] == $param){ var_dump($value); echo microtime(); die; } //$list[$i] = $value; //$i+=1; }*/ switch ($front) { case '130': $start = 1; break; case '131': $start = 9906; break; case '132': $start = 19906; break; case '133': $start = 29906; break; case '134': $start = 39839; break; case '135': $start = 49586; break; case '136': $start = 59586; break; case '137': $start = 69586; break; case '138': $start = 79586; break; case '139': $start = 89583; break; case '145': $start = 99581; break; case '147': $start = 106482; break; case '150': $start = 116080; break; case '151': $start = 126080; break; case '152': $start = 136080; break; case '153': $start = 146080; break; case '155': $start = 156079; break; case '156': $start = 166079; break; case '157': $start = 175882; break; case '158': $start = 184147; break; case '159': $start = 194147; break; case '170': $start = 204147; break; case '176': $start = 209266; break; case '178': $start = 221027; break; case '180': $start = 223996; break; case '181': $start = 233841; break; case '182': $start = 243803; break; case '183': $start = 253764; break; case '184': $start = 263754; break; case '185': $start = 271749; break; case '186': $start = 280725; break; case '187': $start = 290692; break; case '188': $start = 300673; break; case '189': $start = 310433; break; default: $start =null; break; } if($start != null) { for($i=($start-1);$i<=320431;$i++){ $value = explode(',',$file[$i]); if($value[2] == $param){ $value = json_encode($value[4],JSON_UNESCAPED_UNICODE); exit($value); //echo microtime(); //die; } } } ?>
测试相同号段(如187段)优化后的代码获取到结果的时间明显少于优化之前的时间。离线文件这块的优化取得基本的成功。
接下来是通讯录这部分的优化。想类比于离线文件的优化方式,分块进行比对,然后进行汇总。自然就回归到php的多线程处理。赶紧联系度娘来一波资源。。。(嘿嘿,开玩笑啦)。
网上度了好久终于找了两个方式:curl_multi_init()和继承Thread类的方式,下面话不多说就直接上代码了。
1、curl_multi_init()方式---原谅我代码注释很少。。。
<?php set_time_limit(600); $phones = file_get_contents('./directory-_2017-08-22.txt'); $phones = json_decode($phones,true); echo microtime(); $city = array(); $data = []; $mid_num = ceil(count($phones)/3); //echo $mid_num; $urls = ["http://localhost/myCode/get_city/deal_first.php?start_num=$mid_num", "http://localhost/myCode/get_city/deal_second.php?start_num=$mid_num", "http://localhost/myCode/get_city/deal_third.php?start_num=$mid_num"]; //var_dump($urls); //$url_nums = count($urls); $mh = curl_multi_init(); foreach ($urls as $i => $url) { $conn[$i] = curl_init($url); curl_setopt($conn[$i], CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); curl_setopt($conn[$i], CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($conn[$i], CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($conn[$i], CURLOPT_TIMEOUT, 600); curl_setopt($conn[$i], CURLOPT_URL, $url); curl_multi_add_handle($mh,$conn[$i]); } do{ curl_multi_exec($mh, $active); } while ($active); $active = null; foreach ($urls as $i => $url) { $error = curl_error($conn[$i]); //echo($error); $data[$i] = json_decode(curl_multi_getcontent($conn[$i]),true); // 获得结果值 } foreach ($urls as $i => $url) { curl_multi_remove_handle($mh, $conn[$i]); curl_close($conn[$i]); } curl_multi_close($mh); //$data = json_decode($data,true); $tmp_data = array_merge_recursive($data[0],$data[1]); $fina_data = array_merge_recursive($tmp_data,$data[2]); //var_dump($fina_data); foreach ($fina_data as $key=>$value) { if(is_array($fina_data[$key])) { $result[$key] = array_sum($fina_data[$key]); }else{ $result[$key] = $fina_data[$key]; } } $result = json_encode($result,JSON_UNESCAPED_UNICODE); $file_fina = fopen('./fina.txt', 'w+'); fwrite($file_fina,$result); fclose($file_fina); echo "<br>"; echo microtime(); ?>
set_time_limit(600); $phones = file_get_contents('./directory-_2017-08-22.txt'); $phones = json_decode($phones,true); $end_num = $_GET['start_num']; $city = []; for ($i=0; $i < $end_num; $i++) { if(strstr($phones[$i]['contactNumber'],'+86')) { $phones[$i]['contactNumber'] = substr($phones[$i]['contactNumber'], 3); } if(strstr($phones[$i]['contactNumber'],'-')) { $tmp = explode('-', $phones[$i]['contactNumber']); $phones[$i]['contactNumber'] = implode($tmp); //var_dump($phone['contactNumber']);die; } if(strstr($phones[$i]['contactNumber'], ' ')) { $tmp = explode(' ', $phones[$i]['contactNumber']); $phones[$i]['contactNumber'] = implode($tmp); } $match = "/^1[3|4|5|7|8][0-9]\d{4,8}$/"; if(preg_match($match, $phones[$i]['contactNumber'])) { $url = 'http://localhost//myCode/get_city/get_city.php?phone='.$phones[$i]['contactNumber']; $result = url_GET($url); //当城市名键值存在,则拉取计数值加1 $result = json_decode($result); if(array_key_exists($result, $city)) { $n = $city[$result]; $city[$result] = ($n+1); }else{ $city[$result] = 1; } //var_dump($city); //die; } } $city = json_encode($city,JSON_UNESCAPED_UNICODE); exit($city); function url_GET($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $output = curl_exec($ch); curl_close($ch); return $output; }
第一个文件是主文件,负责调用curl_multi_init()多线程处理。第二个文件是具体的一个线程处理过程,后续还有两个文件,基本类似,就不再展示。
这个方式测试时间为3分钟左右,一下子提高了进一半的时间。
2、Thread类方式
<?php set_time_limit(600); class Test extends \Thread { public $result; //子进程处理结果 public $phones; //待处理数据 public $city; public function __construct($start_num,$end_num,$phones){ $this->start_num = $start_num; $this->end_num = $end_num; $this->phones = $phones; } public function run(){ if($this->start_num&&$this->end_num){ //输出结果值 $this->result = $this->deal_phones($this->start_num,$this->end_num); } } public function url_GET($url){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $output = curl_exec($ch); curl_close($ch); return $output; } public function deal_phones($begin,$end){ $city = []; for ($i=$begin; $i < $end; $i++) { if(strstr($this->phones[$i]['contactNumber'],'+86')) { $this->phones[$i]['contactNumber'] = substr($this->phones[$i]['contactNumber'], 3); } if(strstr($this->phones[$i]['contactNumber'],'-')) { $tmp = explode('-', $this->phones[$i]['contactNumber']); $this->phones[$i]['contactNumber'] = implode($tmp); //var_dump($phone['contactNumber']);die; } if(strstr($this->phones[$i]['contactNumber'], ' ')) { $tmp = explode(' ', $this->phones[$i]['contactNumber']); $this->phones[$i]['contactNumber'] = implode($tmp); } $match = "/^1[3|4|5|7|8][0-9]\d{4,8}$/"; if(preg_match($match, $this->phones[$i]['contactNumber'])) { $url = 'http://localhost//myCode/get_city/get_city.php?phone='.$this->phones[$i]['contactNumber']; $result = $this->url_GET($url); //当城市名键值存在,则拉取计数值加1 $result = json_decode($result); if(array_key_exists($result, $city)) { $n = $city[$result]; $city[$result] = ($n+1); }else{ $city[$result] = 1; } } } $this->city = $city; return $this->city; } } echo microtime(); echo "<br>"; //切分数组 $phones = file_get_contents('./directory-_2017-08-22.txt'); $phones = json_decode($phones,true); $total = count($phones); $divide_num = ceil($total/4); $divide_s = 2*$divide_num; $divide_t = 3*$divide_num; $slice = [ "0,$divide_num,$divide_num,$divide_s", "$divide_s,$divide_t,$divide_t,$total" ]; $all_res = []; foreach ($slice as $key => $value) { $value_tmp = explode(',',$value); $start_num = $value_tmp[2]; $end_num = $value_tmp[3]; $thread = new Test($start_num,$end_num,$phones); //主进程 $thread->start(); /******主进程程序*****/ $start_zhu = $value_tmp[0]; $end_zhu = $value_tmp[1]; $all_res[$key."0"] = $thread->deal_phones($start_zhu,$end_zhu); /******主进程程序*****/ //子进程 $thread->join(); $all_res[$key."1"] = $thread->result; } var_dump($all_res); $all_res = json_encode($all_res,JSON_UNESCAPED_UNICODE); $file_fina = fopen('./thread.txt', 'w+'); fwrite($file_fina,$all_res); fclose($file_fina); echo "<br>"; echo microtime(); ?>
测试时间为4分钟左右,感觉不如curl方式。可能自己还是在应用上没有到位吧。
至此两种多线程处理方式的使用梳理完毕。回头想想,确实如同数据库的索引一样,以空间换取时间,这大概也是如此吧。