Linux C程序操作Mysql 调用PHP采集淘宝商品
还是继续这个项目。
在上一篇Linux下利用Shell使PHP并发采集淘宝产品中,采用shell将对PHP的调用推到后台执行,模拟多线程。
此方法有一致命缺点,只能人工预判每个程序执行时间。如果判断时间少于执行时间,则会生成大量进程,如果判断时间多于执行时间,则会浪费时间资源。
所以,在此我们采用C程序来控制并发数。
整体思路和用shell调用相似,只是把shell控制改成了C。
下面是C程序:
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include <sys/time.h> 5 #include "/usr/local/include/mysql/mysql.h" 6 #define MAX_COLUMN_LEN 32 7 #define THREAD_NUM 20//线程数 8 int threads = 0; 9 pthread_t thread[THREAD_NUM]; 10 pthread_mutex_t mut;//线程锁 11 int count=0,vod_count=0,number = 0; 12 int *goods_id[1000000]; 13 void *thread1(int thread_id) 14 { 15 int sleepsec; 16 while (number < count){; 17 char shell_cmd[50]; 18 printf("number:%d\tthread_id=%d\tid=%s\n", number, thread_id, goods_id[number]); 19 sprintf(shell_cmd, "/usr/local/bin/php /var/www/9384shop/cron/goodsupdate.php %s", goods_id[number]);//生成shell命令 20 system(shell_cmd);//调用shell 21 pthread_mutex_lock(&mut); 22 number++; 23 pthread_mutex_unlock(&mut); 24 } 25 pthread_exit(NULL); 26 } 27 28 void create_thread(void){ 29 int i,temp; 30 for (i = 0; i < THREAD_NUM; i++){ 31 if (thread[i] == 0){ 32 if ((temp = pthread_create(&thread[i], NULL, thread1, i)) != 0){ 33 } 34 else{ 35 threads++; 36 } 37 break; 38 } 39 } 40 sleep(1); 41 } 42 void thread_wait(void) 43 { 44 int i; 45 /*等待线程结束*/ 46 for (i = 0; i < THREAD_NUM; i++){ 47 if (thread[i] != 0) { 48 pthread_join(thread[i], NULL); 49 } 50 } 51 } 52 int main(int argc, char *argv[]){ 53 MYSQL my_connection; 54 MYSQL_RES *result; 55 MYSQL_ROW sql_row; 56 MYSQL_FIELD *fd; 57 char column[MAX_COLUMN_LEN][MAX_COLUMN_LEN]; 58 int res,flag; 59 mysql_init(&my_connection); 60 if (mysql_real_connect(&my_connection, "localhost" 61 , "root", "202.133", "shop", 3306, NULL, 0)){ 62 printf("connected to mysql.\n"); 63 res = mysql_query(&my_connection, "select id from s_goods where is_off_sale=0 order by id desc limit 1000000");//查询 64 printf("select id from s_goods where is_off_sale=0 order by id desc limit 1000000\n"); 65 if (!res){ 66 int i = 0, j; 67 result = mysql_store_result(&my_connection);//保存查询到的数据到result 68 printf("the result number is %lu\n", (unsigned long)mysql_num_rows(result)); 69 count = (unsigned long)mysql_num_rows(result); 70 while (sql_row = mysql_fetch_row(result))//获取具体的数据 71 { 72 goods_id[i] = (unsigned long)sql_row[0]; 73 i++; 74 } 75 } 76 mysql_close(&my_connection);//断开连接 77 while (threads < THREAD_NUM) 78 create_thread(); 79 thread_wait(); 80 } 81 else{ 82 mysql_close(&my_connection);//断开连接 83 printf("ERROR:can not connect to mysql\n"); 84 } 85 86 }
PHP:
1 <?php 2 define("OTHER",true); 3 $host='localhost'; 4 $username='root'; 5 $password='123456'; 6 $db_name='taobao'; 7 $s=microtime(1); 8 $id=$argv[1]; 9 10 11 $con=mysql_connect($host,$username,$password); 12 mysql_select_db($db_name, $con); 13 $r=mysql_fetch_array(mysql_query('SELECT url,price FROM s_goods where id='.$id),MYSQL_ASSOC); 14 mysql_close($con); 15 $oldprice=$r['price']; 16 $rs=getPrice($r['url']); 17 $t=microtime(1)-$s; 18 $r=array(); 19 $r[]=date('Y-m-d H:i:s'); 20 $r[]=$id; 21 $r[]=ceil($t*1000)/1000; 22 if($rs=='soldout'){ 23 $r[]="OutStock"; 24 $con=mysql_connect($host,$username,$password); 25 mysql_select_db($db_name, $con); 26 mysql_query("UPDATE s_goods SET is_off_sale=1 WHERE id=".$id); 27 mysql_close($con); 28 } 29 elseif($rs===false) $r[]= 'FALSE'; 30 else{ 31 $r[]=$oldprice; 32 $r[]=isset($rs['price'])?$rs['price']:''; 33 $r[]=isset($rs['seller_nick'])?$rs['seller_nick']:''; 34 $r[]=isset($rs['taobao_shop_id'])?$rs['taobao_shop_id']:''; 35 $r[]=isset($rs['shop_name'])?$rs['shop_name']:''; 36 $r[]=isset($rs['sales'])?$rs['sales']:''; 37 $r[]=isset($rs['taobao_cid'])?$rs['taobao_cid']:''; 38 $r[]=isset($rs['merchandis_score'])?$rs['merchandis_score']:''; 39 $r[]=isset($rs['merchandis_total'])?$rs['merchandis_total']:''; 40 $a=array(); 41 //$rs['is_off_sale']=0; 42 foreach ($rs as $k=>$v){ 43 if(!empty($v)){ 44 $a[]="$k='$v'"; 45 } 46 } 47 $a[]="update_time='".date('Y-m-d H:i:s')."'"; 48 $con=mysql_connect($host,$username,$password); 49 mysql_select_db($db_name, $con); 50 mysql_query("set names utf8"); 51 mysql_query("UPDATE s_goods SET ".implode(',',$a)." WHERE id=".$id); 52 mysql_close($con); 53 } 54 $h=fopen('/home/staff/www/9384shop/cron/goodsUpdate.log','a+'); 55 56 fputcsv($h,$r); 57 fclose($h); 58 59 function getPrice($url){ 60 $rs=array(); 61 preg_match('/[&|\?]id=(\d+)/',$url,$id); 62 $id=$id[1]; 63 $c=curls($url,true); 64 $content = $c['content']; 65 if(empty($content)) exit; 66 $content=mb_convert_encoding($content,"UTF-8","gbk"); 67 $lastredirectaddr = $c['lastredirectaddr']; 68 if(preg_match('/noitem\.htm/',$content)||preg_match('/<strong>此宝贝已下架<\/strong>|您查看的商品找不到了|您查看的宝贝不存在,可能已下架或者被转移/',$content)){ 69 return 'soldout'; 70 }elseif(preg_match("/'reservePrice'\s*:\s*'([\d\.]+?)',/",$content,$price)){ 71 $price = (float)$price[1]; 72 }elseif(preg_match('/price:([\d\.]+?),/',$content,$price)){ 73 $price = (float)$price[1]; 74 } 75 if(preg_match('/"sellerNickName"\s*:\s*"(.*?)",/',$content,$nick)){ 76 $rs['seller_nick'] = urldecode($nick[1]); 77 }elseif(preg_match('/sellerNick\s*:\s*"(.*?)",/',$content,$nick)){ 78 $rs['seller_nick'] = $nick[1]; 79 } 80 if(preg_match('/shopId:"(\d+?)",/',$content,$shopid)){ 81 $rs['taobao_shop_id']=$shopid[1]; 82 }elseif(preg_match('/&shopId=(\d+)&/',$content,$shopid)){ 83 $rs['taobao_shop_id']=$shopid[1]; 84 } 85 if(preg_match("/'categoryId'\s*:\s*'(\d+?)',/",$content,$cid)){ 86 $rs['taobao_cid'] = (float)$cid[1]; 87 }elseif(preg_match('/"categoryId"\s*:\s*"(\d+?)",/',$content,$cid)){ 88 $rs['taobao_cid'] = (float)$cid[1]; 89 }elseif(preg_match("/\scid:'(\d+?)',/",$content,$cid)){ 90 $rs['taobao_cid'] = (float)$cid[1]; 91 } 92 if(OTHER){ 93 if(preg_match('/tmall\.com/',$lastredirectaddr)){ 94 if(preg_match('/slogo-shopname.*?>(.*?)<\/a>/',$content,$shopname)){ 95 $rs['shop_name']=json_decode('"'.$shopname[1].'"'); 96 } 97 if(empty($rs['shop_name'])&&!empty($shopname[1])) $rs['shop_name']=$shopname[1]; 98 if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick']; 99 $url2='http://mdskip.taobao.com/core/initItemDetail.htm?itemId='.$id; 100 $tmall_info = curls($url2); 101 preg_match('/"sellCount"\s*:\s*(\d+)/',$tmall_info,$temp); 102 if ($temp[1]!='') $rs['sales']=$temp[1]; 103 $merchandis=curls("http://dsr.rate.tmall.com/list_dsr_info.htm?callback=a&itemId=".$id); 104 if(preg_match('/gradeAvg"\s*:\s*([0-9\.]+)/',$merchandis,$m_t)) 105 $rs['merchandis_score']=$m_t[1]; 106 if(preg_match('/rateTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t2)) 107 $rs['merchandis_total']=$m_t2[1]; 108 }else{ 109 if(preg_match('/shopName\s*:\s*"(.*?)",/',$content,$shopname)){ 110 111 $rs['shop_name']=json_decode('"'.$shopname[1].'"'); 112 } 113 if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick']; 114 if(preg_match('/sellerId\s*:\s*"(.*?)"/',$content,$sellerid)||preg_match('/userId\':\'(\d+)\'/',$content,$sellerid)){ 115 $sellerid = $sellerid[1]; 116 } 117 if(preg_match('/sbn=([0-9a-z]+)/',$content,$sbn)) 118 $sbn=$sbn[1]; 119 $url2='http://detailskip.taobao.com/json/ifq.htm?id='.$id.'&sid='.$sellerid.'&sbn='.$sbn.'&q=1&callback=a'; 120 $count_rs = curls($url2); 121 preg_match('/quanity\s*:\s*(\d+)/',$count_rs,$temp); 122 if ($temp[1]!='') $rs['sales']=$temp[1]; 123 $merchandis=curls("http://rate.taobao.com/detail_rate.htm?userNumId=$sellerid&auctionNumId=$id¤tPage=1&rateType=1"); 124 if(preg_match('/merchandisScore"\s*:\s*"([0-9\.]+)/',$merchandis,$m_t)) $rs['merchandis_score']=$m_t[1]; 125 else $rs['merchandis_score']=6; 126 if(preg_match('/merchandisTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t)) $rs['merchandis_total']=$m_t[1]; 127 else $rs['merchandis_total']=0; 128 } 129 } 130 if(!$price){ 131 if(!isset($tmall_info)){ 132 $url2="http://mdskip.taobao.com/core/initItemDetail.htm?itemId=".$id; 133 $tmall_info=curls($url2); 134 } 135 $price_content=json_decode(iconv('gbk','utf-8',preg_replace('/(\d{10,}):/','"${1}":',$tmall_info)),true); 136 $priceinfo=$price_content['defaultModel']['itemPriceResultDO']['priceInfo']; 137 $price=array(); 138 if(is_array($priceinfo)){ 139 foreach ($priceinfo as $v){ 140 if($v['price']>0) 141 $price[]=$v['price']; 142 if(is_array($v['promotionList'])){ 143 foreach ($v['promotionList'] as $v2){ 144 $p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price']; 145 if($p>0) $price[]=$p; 146 } 147 } 148 if(is_array($v['suggestivePromotionList'])){ 149 foreach ($v['suggestivePromotionList'] as $v2){ 150 $p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price']; 151 if($p>0) $price[]=$p; 152 } 153 } 154 } 155 } 156 $price=count($price)>0?min($price):false; 157 } 158 $rs['price']=$price; 159 if(count($rs)) return $rs; 160 else return false; 161 } 162 function curls($url,$lastredirectaddr=false,$head=false,$times=1){ 163 $ch = curl_init(); 164 curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0'); 165 curl_setopt($ch, CURLOPT_REFERER,'http://www.tmall.com/'); 166 curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1); 167 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//设置输出方式, 0为自动输出返回的内容, 1为返回输出的内容,但不自动输出. 168 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); //timeout on connect 169 curl_setopt($ch, CURLOPT_TIMEOUT, 30); //timeout on response 170 curl_setopt($ch, CURLOPT_HEADER, $head);//是否输出头信息,0为不输出,非零则输出 171 curl_setopt($ch, CURLOPT_MAXREDIRS, 50 ); 172 curl_setopt($ch, CURLOPT_URL, $url); 173 $count_rs = curl_exec($ch); 174 if($count_rs === false){ 175 echo 'Curl error: ' . curl_error($ch)."\n"; 176 exit; 177 } 178 if($lastredirectaddr) $count_rs=array('content'=>$count_rs,'lastredirectaddr'=>curl_getinfo($ch,CURLINFO_EFFECTIVE_URL)); 179 curl_close($ch); 180 if($count_rs!=''||$count_rs['content']!='') return $count_rs; 181 elseif($times<3) return curls($url,$lastredirectaddr,$head,$times+1); 182 else return false; 183 }
程序执行结果:
1 "2014-04-28 12:55:17",36656,0.967,200.00,200.00,力挺服饰专营店,71777969,力挺服饰专营店,0,162201,0.0,0 2 "2014-04-28 12:55:17",36657,1.018,250.00,250.00,力挺服饰专营店,71777969,力挺服饰专营店,3,50008897,5.0,4 3 "2014-04-28 12:55:17",36655,1.001,189.00,189.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,162205,0.0,0 4 "2014-04-28 12:55:17",36654,0.979,500.00,500.00,力挺服饰专营店,71777969,力挺服饰专营店,1,50008900,5.0,1 5 "2014-04-28 12:55:17",36653,0.982,150.00,150.00,力挺服饰专营店,71777969,力挺服饰专营店,0,50000697,5.0,2 6 "2014-04-28 12:55:17",36650,0.874,138.00,138,美品坊,64228914,精致女装美品坊,1,162205,6,0 7 "2014-04-28 12:55:17",36652,1.008,229.00,229.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,50011277,0.0,0 8 "2014-04-28 12:55:17",36647,0.962,259.00,259.00,爱购叁陆陆服饰专营店,102120067,爱购叁陆陆服饰专营店,0,162205,0.0,0 9 "2014-04-28 12:55:17",36648,1.017,273.42,273.42,力挺服饰专营店,71777969,力挺服饰专营店,0,50000697,0.0,0 10 "2014-04-28 12:55:17",36645,0.961,646.80,646.80,羽戈旗舰店,100216434,羽戈旗舰店,0,50008779,4.7,29 11 "2014-04-28 12:55:17",36646,1.011,239.00,239.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,162205,0.0,0 12 "2014-04-28 12:55:17",36644,1.009,235.12,235.12,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,38,50008779,4.5,795 13 "2014-04-28 12:55:17",36643,0.968,320.68,320.68,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,143,50008779,4.8,2342 14 "2014-04-28 12:55:17",36641,0.946,19.50,19.50,淘公馆数码专营店,105992505,淘公馆数码专营店,0,50018926,4.6,15708 15 "2014-04-28 12:55:17",36642,0.985,482.92,482.92,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,80,50008779,4.8,493 16 "2014-04-28 12:55:17",36640,0.968,125.00,128.00,忆红妆旗舰店,64376787,忆红妆旗舰店,8,162702,4.9,345 17 "2014-04-28 12:55:17",36639,0.988,99.00,99.00,忆红妆旗舰店,64376787,忆红妆旗舰店,12,162702,4.8,115 18 "2014-04-28 12:55:17",36638,0.976,135.00,148.00,忆红妆旗舰店,64376787,忆红妆旗舰店,1,162702,4.7,18 19 "2014-04-28 12:55:18",36637,0.964,242.00,245.00,忆红妆旗舰店,64376787,忆红妆旗舰店,22,50005065,4.7,193 20 "2014-04-28 12:55:18",36636,0.953,412.70,427.50,忆红妆旗舰店,64376787,忆红妆旗舰店,112,162701,4.7,2291 21 "2014-04-28 12:55:18",36635,0.971,363.00,365.00,忆红妆旗舰店,64376787,忆红妆旗舰店,314,162701,4.8,1982 22 "2014-04-28 12:55:18",36634,0.973,179.10,175.00,忆红妆旗舰店,64376787,忆红妆旗舰店,0,50005065,4.8,26 23 "2014-04-28 12:55:18",36633,0.981,334.65,331.00,妹魅旗舰店,104267713,妹魅旗舰店,69,50012010,4.7,887 24 "2014-04-28 12:55:18",36631,0.943,315.00,315.00,gotrip箱包旗舰店,103732756,gotrip箱包旗舰店,122,50012019,4.8,1073 25 "2014-04-28 12:55:18",36632,0.989,192.00,192.00,哈妃猫旗舰店,70711288,哈妃猫旗舰店,11577,50012010,4.8,29206 26 "2014-04-28 12:55:18",36630,0.965,426.00,426.00,chicsouls旗舰店,106083266,chicsouls旗舰店,0,50012028,4.8,16 27 "2014-04-28 12:55:18",36629,0.953,99.00,99.00,莉娅阁旗舰店,67800337,莉娅阁旗舰店,0,50012027,4.8,97 28 "2014-04-28 12:55:18",36651,2.126,158.00,158,天天都特价等你,106393691,天天都特价,0,50010526,6,0 29 "2014-04-28 12:55:18",36628,0.973,2999.00,2999.00,舒适堡鞋类旗舰店,71301827,舒适堡鞋类旗舰店,0,50012027,5.0,19 30 "2014-04-28 12:55:18",36627,0.98,589.00,598.00,舒适堡鞋类旗舰店,71301827,舒适堡鞋类旗舰店,0,50012027,5.0,4 31 "2014-04-28 12:55:18",36626,0.972,253.00,253.00,非你不嫁服饰旗舰店,66835425,非你不嫁服饰旗舰店,7,162701,5.0,194 32 "2014-04-28 12:55:18",36622,0.854,198.00,198,刀1984,65104103,LFMY,1,162201,6,0 33 "2014-04-28 12:55:18",36625,0.965,235.00,235.00,千禧新娘旗舰店,62369744,千禧新娘旗舰店,287,162701,4.8,608 34 "2014-04-28 12:55:18",36624,0.98,10.00,10.00,朵品旗舰店,64673740,朵品旗舰店,16,50009032,4.9,680 35 "2014-04-28 12:55:18",36623,0.973,619.74,187.80,珂尼娅旗舰店,72260130,珂尼娅旗舰店,0,50012010,5.0,4 36 "2014-04-28 12:55:18",36621,0.977,138.00,138.00,eyesonu服饰旗舰店,63439938,eyesonu服饰旗舰店,23,50008901,4.7,806 37 "2014-04-28 12:55:19",36619,0.97,178.00,178.00,shezgood旗舰店,57301708,shezgood旗舰店,2,50010850,5.0,29 38 "2014-04-28 12:55:19",36618,0.992,119.00,119.00,伊莲旗舰店,73373759,伊莲旗舰店,0,50012010,4.7,2353 39 "2014-04-28 12:55:19",36617,0.967,219.80,219.80,爱伴箱包旗舰店,102234600,爱伴箱包旗舰店,1,50012010,4.7,16 40 "2014-04-28 12:55:19",36616,0.948,86.00,84.71,姿态服饰专营店,64752277,姿态服饰专营店,2,50012010,3.6,7 41 "2014-04-28 12:55:19",36620,1.082,99.00,98.90,奈奈爱霓女装旗舰店,57300194,奈奈爱霓女装旗舰店,840,1623,4.8,5593 42 "2014-04-28 12:55:19",36613,0.995,50.00,50.00,牧缇旗舰店,100328526,牧缇旗舰店,133,50000671,4.8,452 43 "2014-04-28 12:55:19",36612,0.998,98.01,98.01,lishberry旗舰店,63641040,lishberry旗舰店,0,50000671,4.8,28 44 "2014-04-28 12:55:19",36611,0.991,498.00,498.00,uncontrollable旗舰店,106009511,uncontrollable旗舰店,1,50010850,4.5,2 45 "2014-04-28 12:55:19",36610,0.981,99.00,99.00,森露旗舰店,71469682,森露旗舰店,0,50000671,4.7,22 46 "2014-04-28 12:55:19",36605,0.968,49.00,49.00,桃苡服饰旗舰店,68928805,桃苡服饰旗舰店,0,1623,5.0,3 47 "2014-04-28 12:55:19",36604,0.954,360.64,360.64,深艺服饰旗舰店,71168332,深艺服饰旗舰店,0,50005065,0.0,0 48 "2014-04-28 12:55:19",36603,0.955,168.00,168.00,艾芭莉旗舰店,100726318,艾芭莉旗舰店,55,50010850,4.8,1797 49 "2014-04-28 12:55:19",36601,0.962,78.00,78.00,歌莉韵旗舰店,105012878,歌莉韵旗舰店,169,162103,4.8,36633 50 "2014-04-28 12:55:19",36600,0.943,64.00,64.00,ieemk旗舰店,103210940,ieemk旗舰店,187,162205,4.7,2220
从日志中我们可以看出,1秒钟更新大概是15-20个产品。
采用这种方式既可以控制线程数,又能并发,或许是一个很好的解决方案。
但此方法也有自身的缺点:
1.因为主要功能是通过PHP来实现的,所以每更新一个产品,操作系统必新创建一个进程,这大大增加了操作系统的开销,如果就在C中对PHP的主要功能进行实现,会使程序性能大大提高。
2.功能耦合性太强,如果要改一个小细节只有重写源码然后编译(比如并发数,查询SQL等等),应采取参数方式来弥补这个缺点。
3.因为C只给PHP传递了1个ID参数,PHP必须通过查询数据库来获得其它信息,这样就会增加数据库的压力,降低程序的效率。
因为我是初学C,现学现卖,水平有限,所以留待以后改进。