编程

看山是山 看水是水

导航

Linux C程序操作Mysql 调用PHP采集淘宝商品

还是继续这个项目。

在上一篇Linux下利用Shell使PHP并发采集淘宝产品中,采用shell将对PHP的调用推到后台执行,模拟多线程。

此方法有一致命缺点,只能人工预判每个程序执行时间。如果判断时间少于执行时间,则会生成大量进程,如果判断时间多于执行时间,则会浪费时间资源。

所以,在此我们采用C程序来控制并发数。

整体思路和用shell调用相似,只是把shell控制改成了C。

下面是C程序:

 1 #include <stdio.h>
 2 #include <stdlib.h>  
 3 #include <string.h>  
 4 #include <sys/time.h>
 5 #include "/usr/local/include/mysql/mysql.h"  
 6 #define MAX_COLUMN_LEN  32 
 7 #define THREAD_NUM 20//线程数
 8 int threads = 0;
 9 pthread_t thread[THREAD_NUM];
10 pthread_mutex_t mut;//线程锁 
11 int count=0,vod_count=0,number = 0;
12 int *goods_id[1000000];
13 void *thread1(int thread_id)
14 {
15     int sleepsec;
16     while (number < count){;
17         char shell_cmd[50];
18         printf("number:%d\tthread_id=%d\tid=%s\n", number, thread_id, goods_id[number]);
19         sprintf(shell_cmd, "/usr/local/bin/php /var/www/9384shop/cron/goodsupdate.php %s", goods_id[number]);//生成shell命令
20         system(shell_cmd);//调用shell
21         pthread_mutex_lock(&mut);
22         number++;
23         pthread_mutex_unlock(&mut);         
24     }
25     pthread_exit(NULL);
26 }
27 
28 void create_thread(void){
29     int i,temp;
30     for (i = 0; i < THREAD_NUM; i++){
31         if (thread[i] == 0){
32             if ((temp = pthread_create(&thread[i], NULL, thread1, i)) != 0){
33             }
34             else{
35                 threads++;
36             }
37             break;
38         }
39     }
40     sleep(1);
41 }
42 void thread_wait(void)
43 {
44     int i;
45     /*等待线程结束*/
46     for (i = 0; i < THREAD_NUM; i++){
47         if (thread[i] != 0) {
48             pthread_join(thread[i], NULL);
49         }
50     }
51 }
52 int main(int argc, char *argv[]){
53     MYSQL my_connection;
54     MYSQL_RES *result;
55     MYSQL_ROW sql_row;
56     MYSQL_FIELD *fd;
57     char column[MAX_COLUMN_LEN][MAX_COLUMN_LEN];
58     int res,flag;
59     mysql_init(&my_connection);
60     if (mysql_real_connect(&my_connection, "localhost"
61         , "root", "202.133", "shop", 3306, NULL, 0)){
62         printf("connected to mysql.\n");        
63         res = mysql_query(&my_connection, "select id from s_goods where is_off_sale=0 order by id desc limit 1000000");//查询
64         printf("select id from s_goods where is_off_sale=0 order by id desc limit 1000000\n");
65         if (!res){
66             int i = 0, j;
67             result = mysql_store_result(&my_connection);//保存查询到的数据到result  
68             printf("the result number is %lu\n", (unsigned long)mysql_num_rows(result));
69             count = (unsigned long)mysql_num_rows(result);
70             while (sql_row = mysql_fetch_row(result))//获取具体的数据  
71             {
72                 goods_id[i] = (unsigned long)sql_row[0];
73                 i++;
74             } 
75         }
76         mysql_close(&my_connection);//断开连接 
77         while (threads < THREAD_NUM)
78             create_thread();
79         thread_wait();
80     }
81     else{
82         mysql_close(&my_connection);//断开连接  
83         printf("ERROR:can not connect to mysql\n");
84     }
85     
86 }

PHP:

  1 <?php
  2 define("OTHER",true);
  3 $host='localhost';
  4 $username='root';
  5 $password='123456';
  6 $db_name='taobao';
  7 $s=microtime(1);
  8 $id=$argv[1];
  9 
 10 
 11 $con=mysql_connect($host,$username,$password);
 12 mysql_select_db($db_name, $con);
 13 $r=mysql_fetch_array(mysql_query('SELECT url,price FROM s_goods where id='.$id),MYSQL_ASSOC);
 14 mysql_close($con);
 15 $oldprice=$r['price'];
 16 $rs=getPrice($r['url']);
 17 $t=microtime(1)-$s;
 18 $r=array();
 19 $r[]=date('Y-m-d H:i:s');
 20 $r[]=$id;
 21 $r[]=ceil($t*1000)/1000;
 22 if($rs=='soldout'){
 23     $r[]="OutStock";
 24     $con=mysql_connect($host,$username,$password);
 25     mysql_select_db($db_name, $con);
 26     mysql_query("UPDATE s_goods SET is_off_sale=1 WHERE id=".$id);
 27     mysql_close($con);
 28 }
 29 elseif($rs===false) $r[]= 'FALSE';
 30 else{
 31         $r[]=$oldprice;
 32         $r[]=isset($rs['price'])?$rs['price']:'';
 33         $r[]=isset($rs['seller_nick'])?$rs['seller_nick']:'';
 34         $r[]=isset($rs['taobao_shop_id'])?$rs['taobao_shop_id']:'';
 35         $r[]=isset($rs['shop_name'])?$rs['shop_name']:'';
 36         $r[]=isset($rs['sales'])?$rs['sales']:'';
 37         $r[]=isset($rs['taobao_cid'])?$rs['taobao_cid']:'';
 38         $r[]=isset($rs['merchandis_score'])?$rs['merchandis_score']:'';
 39         $r[]=isset($rs['merchandis_total'])?$rs['merchandis_total']:'';
 40         $a=array();
 41         //$rs['is_off_sale']=0;
 42         foreach ($rs as $k=>$v){
 43             if(!empty($v)){
 44                  $a[]="$k='$v'";
 45             }
 46         }
 47         $a[]="update_time='".date('Y-m-d H:i:s')."'";
 48         $con=mysql_connect($host,$username,$password);
 49         mysql_select_db($db_name, $con);
 50         mysql_query("set names utf8");
 51         mysql_query("UPDATE s_goods SET ".implode(',',$a)." WHERE id=".$id);
 52         mysql_close($con);
 53 }
 54 $h=fopen('/home/staff/www/9384shop/cron/goodsUpdate.log','a+');
 55 
 56 fputcsv($h,$r);
 57 fclose($h);
 58 
 59 function getPrice($url){
 60     $rs=array();
 61     preg_match('/[&|\?]id=(\d+)/',$url,$id);
 62     $id=$id[1];
 63     $c=curls($url,true);
 64     $content = $c['content'];
 65     if(empty($content)) exit;
 66     $content=mb_convert_encoding($content,"UTF-8","gbk");
 67     $lastredirectaddr = $c['lastredirectaddr'];    
 68     if(preg_match('/noitem\.htm/',$content)||preg_match('/<strong>此宝贝已下架<\/strong>|您查看的商品找不到了|您查看的宝贝不存在,可能已下架或者被转移/',$content)){    
 69         return 'soldout';
 70     }elseif(preg_match("/'reservePrice'\s*:\s*'([\d\.]+?)',/",$content,$price)){
 71         $price = (float)$price[1];
 72     }elseif(preg_match('/price:([\d\.]+?),/',$content,$price)){
 73         $price = (float)$price[1];
 74     }
 75     if(preg_match('/"sellerNickName"\s*:\s*"(.*?)",/',$content,$nick)){
 76         $rs['seller_nick'] = urldecode($nick[1]);
 77     }elseif(preg_match('/sellerNick\s*:\s*"(.*?)",/',$content,$nick)){
 78         $rs['seller_nick'] = $nick[1];
 79     }
 80     if(preg_match('/shopId:"(\d+?)",/',$content,$shopid)){
 81         $rs['taobao_shop_id']=$shopid[1];
 82     }elseif(preg_match('/&shopId=(\d+)&/',$content,$shopid)){
 83         $rs['taobao_shop_id']=$shopid[1];
 84     }
 85     if(preg_match("/'categoryId'\s*:\s*'(\d+?)',/",$content,$cid)){
 86         $rs['taobao_cid'] = (float)$cid[1];
 87     }elseif(preg_match('/"categoryId"\s*:\s*"(\d+?)",/',$content,$cid)){
 88         $rs['taobao_cid'] = (float)$cid[1];
 89     }elseif(preg_match("/\scid:'(\d+?)',/",$content,$cid)){
 90         $rs['taobao_cid'] = (float)$cid[1];
 91     }
 92     if(OTHER){
 93         if(preg_match('/tmall\.com/',$lastredirectaddr)){
 94             if(preg_match('/slogo-shopname.*?>(.*?)<\/a>/',$content,$shopname)){
 95                 $rs['shop_name']=json_decode('"'.$shopname[1].'"');
 96             }
 97             if(empty($rs['shop_name'])&&!empty($shopname[1])) $rs['shop_name']=$shopname[1]; 
 98             if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick'];
 99             $url2='http://mdskip.taobao.com/core/initItemDetail.htm?itemId='.$id;
100             $tmall_info = curls($url2);
101             preg_match('/"sellCount"\s*:\s*(\d+)/',$tmall_info,$temp);
102             if ($temp[1]!='') $rs['sales']=$temp[1];
103             $merchandis=curls("http://dsr.rate.tmall.com/list_dsr_info.htm?callback=a&itemId=".$id);
104             if(preg_match('/gradeAvg"\s*:\s*([0-9\.]+)/',$merchandis,$m_t))
105                 $rs['merchandis_score']=$m_t[1];
106             if(preg_match('/rateTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t2))
107                 $rs['merchandis_total']=$m_t2[1];
108         }else{
109             if(preg_match('/shopName\s*:\s*"(.*?)",/',$content,$shopname)){
110 
111                 $rs['shop_name']=json_decode('"'.$shopname[1].'"');
112             }
113             if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick'];
114             if(preg_match('/sellerId\s*:\s*"(.*?)"/',$content,$sellerid)||preg_match('/userId\':\'(\d+)\'/',$content,$sellerid)){
115                 $sellerid = $sellerid[1];
116             }
117             if(preg_match('/sbn=([0-9a-z]+)/',$content,$sbn))
118                 $sbn=$sbn[1];
119             $url2='http://detailskip.taobao.com/json/ifq.htm?id='.$id.'&sid='.$sellerid.'&sbn='.$sbn.'&q=1&callback=a';
120             $count_rs = curls($url2);
121             preg_match('/quanity\s*:\s*(\d+)/',$count_rs,$temp);
122             if ($temp[1]!='') $rs['sales']=$temp[1];
123             $merchandis=curls("http://rate.taobao.com/detail_rate.htm?userNumId=$sellerid&auctionNumId=$id&currentPage=1&rateType=1");
124             if(preg_match('/merchandisScore"\s*:\s*"([0-9\.]+)/',$merchandis,$m_t)) $rs['merchandis_score']=$m_t[1];
125             else $rs['merchandis_score']=6;
126             if(preg_match('/merchandisTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t)) $rs['merchandis_total']=$m_t[1];
127             else $rs['merchandis_total']=0;
128         }
129     }
130     if(!$price){
131         if(!isset($tmall_info)){
132             $url2="http://mdskip.taobao.com/core/initItemDetail.htm?itemId=".$id;
133             $tmall_info=curls($url2);
134         }
135         $price_content=json_decode(iconv('gbk','utf-8',preg_replace('/(\d{10,}):/','"${1}":',$tmall_info)),true);
136         $priceinfo=$price_content['defaultModel']['itemPriceResultDO']['priceInfo'];
137         $price=array();
138         if(is_array($priceinfo)){
139             foreach ($priceinfo as $v){
140                 if($v['price']>0)
141                     $price[]=$v['price'];
142                 if(is_array($v['promotionList'])){
143                     foreach ($v['promotionList'] as $v2){
144                         $p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price'];
145                         if($p>0) $price[]=$p;
146                     }
147                 }
148                 if(is_array($v['suggestivePromotionList'])){
149                     foreach ($v['suggestivePromotionList'] as $v2){
150                         $p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price'];
151                         if($p>0) $price[]=$p;
152                     }
153                 }
154             }
155         }
156         $price=count($price)>0?min($price):false;
157     }
158     $rs['price']=$price;
159     if(count($rs)) return $rs;
160     else return false;
161 }
162 function curls($url,$lastredirectaddr=false,$head=false,$times=1){
163     $ch = curl_init();
164     curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0');
165     curl_setopt($ch, CURLOPT_REFERER,'http://www.tmall.com/');
166     curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
167     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//设置输出方式, 0为自动输出返回的内容, 1为返回输出的内容,但不自动输出.
168     curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); //timeout on connect
169     curl_setopt($ch, CURLOPT_TIMEOUT, 30); //timeout on response
170     curl_setopt($ch, CURLOPT_HEADER, $head);//是否输出头信息,0为不输出,非零则输出
171     curl_setopt($ch, CURLOPT_MAXREDIRS, 50 );
172     curl_setopt($ch, CURLOPT_URL, $url);
173     $count_rs = curl_exec($ch);
174     if($count_rs === false){
175         echo 'Curl error: ' . curl_error($ch)."\n";
176         exit;    
177     }
178     if($lastredirectaddr) $count_rs=array('content'=>$count_rs,'lastredirectaddr'=>curl_getinfo($ch,CURLINFO_EFFECTIVE_URL));
179     curl_close($ch);
180     if($count_rs!=''||$count_rs['content']!='') return $count_rs;
181     elseif($times<3) return curls($url,$lastredirectaddr,$head,$times+1);
182     else return false;
183 }

程序执行结果:

 1 "2014-04-28 12:55:17",36656,0.967,200.00,200.00,力挺服饰专营店,71777969,力挺服饰专营店,0,162201,0.0,0
 2 "2014-04-28 12:55:17",36657,1.018,250.00,250.00,力挺服饰专营店,71777969,力挺服饰专营店,3,50008897,5.0,4
 3 "2014-04-28 12:55:17",36655,1.001,189.00,189.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,162205,0.0,0
 4 "2014-04-28 12:55:17",36654,0.979,500.00,500.00,力挺服饰专营店,71777969,力挺服饰专营店,1,50008900,5.0,1
 5 "2014-04-28 12:55:17",36653,0.982,150.00,150.00,力挺服饰专营店,71777969,力挺服饰专营店,0,50000697,5.0,2
 6 "2014-04-28 12:55:17",36650,0.874,138.00,138,美品坊,64228914,精致女装美品坊,1,162205,6,0
 7 "2014-04-28 12:55:17",36652,1.008,229.00,229.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,50011277,0.0,0
 8 "2014-04-28 12:55:17",36647,0.962,259.00,259.00,爱购叁陆陆服饰专营店,102120067,爱购叁陆陆服饰专营店,0,162205,0.0,0
 9 "2014-04-28 12:55:17",36648,1.017,273.42,273.42,力挺服饰专营店,71777969,力挺服饰专营店,0,50000697,0.0,0
10 "2014-04-28 12:55:17",36645,0.961,646.80,646.80,羽戈旗舰店,100216434,羽戈旗舰店,0,50008779,4.7,29
11 "2014-04-28 12:55:17",36646,1.011,239.00,239.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,162205,0.0,0
12 "2014-04-28 12:55:17",36644,1.009,235.12,235.12,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,38,50008779,4.5,795
13 "2014-04-28 12:55:17",36643,0.968,320.68,320.68,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,143,50008779,4.8,2342
14 "2014-04-28 12:55:17",36641,0.946,19.50,19.50,淘公馆数码专营店,105992505,淘公馆数码专营店,0,50018926,4.6,15708
15 "2014-04-28 12:55:17",36642,0.985,482.92,482.92,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,80,50008779,4.8,493
16 "2014-04-28 12:55:17",36640,0.968,125.00,128.00,忆红妆旗舰店,64376787,忆红妆旗舰店,8,162702,4.9,345
17 "2014-04-28 12:55:17",36639,0.988,99.00,99.00,忆红妆旗舰店,64376787,忆红妆旗舰店,12,162702,4.8,115
18 "2014-04-28 12:55:17",36638,0.976,135.00,148.00,忆红妆旗舰店,64376787,忆红妆旗舰店,1,162702,4.7,18
19 "2014-04-28 12:55:18",36637,0.964,242.00,245.00,忆红妆旗舰店,64376787,忆红妆旗舰店,22,50005065,4.7,193
20 "2014-04-28 12:55:18",36636,0.953,412.70,427.50,忆红妆旗舰店,64376787,忆红妆旗舰店,112,162701,4.7,2291
21 "2014-04-28 12:55:18",36635,0.971,363.00,365.00,忆红妆旗舰店,64376787,忆红妆旗舰店,314,162701,4.8,1982
22 "2014-04-28 12:55:18",36634,0.973,179.10,175.00,忆红妆旗舰店,64376787,忆红妆旗舰店,0,50005065,4.8,26
23 "2014-04-28 12:55:18",36633,0.981,334.65,331.00,妹魅旗舰店,104267713,妹魅旗舰店,69,50012010,4.7,887
24 "2014-04-28 12:55:18",36631,0.943,315.00,315.00,gotrip箱包旗舰店,103732756,gotrip箱包旗舰店,122,50012019,4.8,1073
25 "2014-04-28 12:55:18",36632,0.989,192.00,192.00,哈妃猫旗舰店,70711288,哈妃猫旗舰店,11577,50012010,4.8,29206
26 "2014-04-28 12:55:18",36630,0.965,426.00,426.00,chicsouls旗舰店,106083266,chicsouls旗舰店,0,50012028,4.8,16
27 "2014-04-28 12:55:18",36629,0.953,99.00,99.00,莉娅阁旗舰店,67800337,莉娅阁旗舰店,0,50012027,4.8,97
28 "2014-04-28 12:55:18",36651,2.126,158.00,158,天天都特价等你,106393691,天天都特价,0,50010526,6,0
29 "2014-04-28 12:55:18",36628,0.973,2999.00,2999.00,舒适堡鞋类旗舰店,71301827,舒适堡鞋类旗舰店,0,50012027,5.0,19
30 "2014-04-28 12:55:18",36627,0.98,589.00,598.00,舒适堡鞋类旗舰店,71301827,舒适堡鞋类旗舰店,0,50012027,5.0,4
31 "2014-04-28 12:55:18",36626,0.972,253.00,253.00,非你不嫁服饰旗舰店,66835425,非你不嫁服饰旗舰店,7,162701,5.0,194
32 "2014-04-28 12:55:18",36622,0.854,198.00,198,刀1984,65104103,LFMY,1,162201,6,0
33 "2014-04-28 12:55:18",36625,0.965,235.00,235.00,千禧新娘旗舰店,62369744,千禧新娘旗舰店,287,162701,4.8,608
34 "2014-04-28 12:55:18",36624,0.98,10.00,10.00,朵品旗舰店,64673740,朵品旗舰店,16,50009032,4.9,680
35 "2014-04-28 12:55:18",36623,0.973,619.74,187.80,珂尼娅旗舰店,72260130,珂尼娅旗舰店,0,50012010,5.0,4
36 "2014-04-28 12:55:18",36621,0.977,138.00,138.00,eyesonu服饰旗舰店,63439938,eyesonu服饰旗舰店,23,50008901,4.7,806
37 "2014-04-28 12:55:19",36619,0.97,178.00,178.00,shezgood旗舰店,57301708,shezgood旗舰店,2,50010850,5.0,29
38 "2014-04-28 12:55:19",36618,0.992,119.00,119.00,伊莲旗舰店,73373759,伊莲旗舰店,0,50012010,4.7,2353
39 "2014-04-28 12:55:19",36617,0.967,219.80,219.80,爱伴箱包旗舰店,102234600,爱伴箱包旗舰店,1,50012010,4.7,16
40 "2014-04-28 12:55:19",36616,0.948,86.00,84.71,姿态服饰专营店,64752277,姿态服饰专营店,2,50012010,3.6,7
41 "2014-04-28 12:55:19",36620,1.082,99.00,98.90,奈奈爱霓女装旗舰店,57300194,奈奈爱霓女装旗舰店,840,1623,4.8,5593
42 "2014-04-28 12:55:19",36613,0.995,50.00,50.00,牧缇旗舰店,100328526,牧缇旗舰店,133,50000671,4.8,452
43 "2014-04-28 12:55:19",36612,0.998,98.01,98.01,lishberry旗舰店,63641040,lishberry旗舰店,0,50000671,4.8,28
44 "2014-04-28 12:55:19",36611,0.991,498.00,498.00,uncontrollable旗舰店,106009511,uncontrollable旗舰店,1,50010850,4.5,2
45 "2014-04-28 12:55:19",36610,0.981,99.00,99.00,森露旗舰店,71469682,森露旗舰店,0,50000671,4.7,22
46 "2014-04-28 12:55:19",36605,0.968,49.00,49.00,桃苡服饰旗舰店,68928805,桃苡服饰旗舰店,0,1623,5.0,3
47 "2014-04-28 12:55:19",36604,0.954,360.64,360.64,深艺服饰旗舰店,71168332,深艺服饰旗舰店,0,50005065,0.0,0
48 "2014-04-28 12:55:19",36603,0.955,168.00,168.00,艾芭莉旗舰店,100726318,艾芭莉旗舰店,55,50010850,4.8,1797
49 "2014-04-28 12:55:19",36601,0.962,78.00,78.00,歌莉韵旗舰店,105012878,歌莉韵旗舰店,169,162103,4.8,36633
50 "2014-04-28 12:55:19",36600,0.943,64.00,64.00,ieemk旗舰店,103210940,ieemk旗舰店,187,162205,4.7,2220


从日志中我们可以看出,1秒钟更新大概是15-20个产品。

采用这种方式既可以控制线程数,又能并发,或许是一个很好的解决方案。

但此方法也有自身的缺点:

  1.因为主要功能是通过PHP来实现的,所以每更新一个产品,操作系统必新创建一个进程,这大大增加了操作系统的开销,如果就在C中对PHP的主要功能进行实现,会使程序性能大大提高。

  2.功能耦合性太强,如果要改一个小细节只有重写源码然后编译(比如并发数,查询SQL等等),应采取参数方式来弥补这个缺点。

  3.因为C只给PHP传递了1个ID参数,PHP必须通过查询数据库来获得其它信息,这样就会增加数据库的压力,降低程序的效率。

因为我是初学C,现学现卖,水平有限,所以留待以后改进。

 

 

 

posted on 2014-04-28 13:45  风飘无痕  阅读(820)  评论(0编辑  收藏  举报