python抓取基金信息
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import re
5 import urllib
6 import time
7 from sqlalchemy import *
8
9 class Fund:
10 def __init__(self, code):
11 self.code = code
12 self.attributes = {}
13 def __setitem__(self, key, value):
14 self.attributes[key] = value
15 def __getitem__(self, key):
16 return self.attributes[key]
17
18 fund_all = {}
19
20 fund_url = 'http://my.fund.163.com/stock/rankkfs.htm'
21
22 u_sock = urllib.urlopen(fund_url)
23
24 fund_str = u_sock.read().decode('gb2312')
25
26 fund_str = fund_str.encode('utf-8')
27
28 print fund_str
29
30 u_sock.close()
31
32 r_item_pattern = re.compile(r'<tr align="center" bgcolor="(#EFEFEF|#E7F3FE)" class="bzi">(.*?)</tr>',re.DOTALL)
33
34 r_anchor_pattern = re.compile(r'<td><a href=.*?>(.*?)</a></td>')
35
36 r_normal_pattern = re.compile(r'<td>([-0-9]*\.*\d*)</td>')
37
38 fund_list = r_item_pattern.findall(fund_str)
39
40 file_name = time.strftime('%Y%m%d') + '.html'
41
42 f = open( file_name, 'w')
43
44 db = BoundMetaData("mysql://root:clhclh@localhost/testcase?charset=utf8", echo=True)
45
46 funds = Table('funds', db, autoload=True)
47
48 for item in fund_list:
49 i = funds.insert()
50 s = item[1]
51 f.write(s)
52 anchor_tuple = r_anchor_pattern.findall(s)
53 fund = Fund(anchor_tuple[0])
54 fund['name'] = anchor_tuple[1]
55 fund['company'] = anchor_tuple[2]
56 normal_tuple = r_normal_pattern.findall(s)
57 fund['date'] = normal_tuple[0]
58 fund['util'] = normal_tuple[1]
59 fund['total'] = normal_tuple[2]
60 fund['rate'] = normal_tuple[3]
61 funds.insert().execute({'name':fund['name'],'code':fund.code,'date':fund['date'],'util':fund['util'],'total':fund['total'],'rate':fund['rate'],'company':fund['company']})
62 # sql = "insert into funds(name, code, `date`, util, total, rate, company) values('%s', '%s', '%s', '%s', '%s', '%s', '%s')" \
63 # % (fund['name'], fund.code, fund['date'], fund['util'], fund['total'], fund['rate'], fund['company'])
64 # try:
65 # print sql
66 # cursor.execute(sql)
67 # except Exception, e:
68 g # print e
69 # fund_all[fund.code] = fund
70 # f.write(s)
71 #conn.commit()
72 #cursor.close()
73 #conn.close()
74 f.close()
75
76
77
2 # -*- coding: utf-8 -*-
3
4 import re
5 import urllib
6 import time
7 from sqlalchemy import *
8
9 class Fund:
10 def __init__(self, code):
11 self.code = code
12 self.attributes = {}
13 def __setitem__(self, key, value):
14 self.attributes[key] = value
15 def __getitem__(self, key):
16 return self.attributes[key]
17
18 fund_all = {}
19
20 fund_url = 'http://my.fund.163.com/stock/rankkfs.htm'
21
22 u_sock = urllib.urlopen(fund_url)
23
24 fund_str = u_sock.read().decode('gb2312')
25
26 fund_str = fund_str.encode('utf-8')
27
28 print fund_str
29
30 u_sock.close()
31
32 r_item_pattern = re.compile(r'<tr align="center" bgcolor="(#EFEFEF|#E7F3FE)" class="bzi">(.*?)</tr>',re.DOTALL)
33
34 r_anchor_pattern = re.compile(r'<td><a href=.*?>(.*?)</a></td>')
35
36 r_normal_pattern = re.compile(r'<td>([-0-9]*\.*\d*)</td>')
37
38 fund_list = r_item_pattern.findall(fund_str)
39
40 file_name = time.strftime('%Y%m%d') + '.html'
41
42 f = open( file_name, 'w')
43
44 db = BoundMetaData("mysql://root:clhclh@localhost/testcase?charset=utf8", echo=True)
45
46 funds = Table('funds', db, autoload=True)
47
48 for item in fund_list:
49 i = funds.insert()
50 s = item[1]
51 f.write(s)
52 anchor_tuple = r_anchor_pattern.findall(s)
53 fund = Fund(anchor_tuple[0])
54 fund['name'] = anchor_tuple[1]
55 fund['company'] = anchor_tuple[2]
56 normal_tuple = r_normal_pattern.findall(s)
57 fund['date'] = normal_tuple[0]
58 fund['util'] = normal_tuple[1]
59 fund['total'] = normal_tuple[2]
60 fund['rate'] = normal_tuple[3]
61 funds.insert().execute({'name':fund['name'],'code':fund.code,'date':fund['date'],'util':fund['util'],'total':fund['total'],'rate':fund['rate'],'company':fund['company']})
62 # sql = "insert into funds(name, code, `date`, util, total, rate, company) values('%s', '%s', '%s', '%s', '%s', '%s', '%s')" \
63 # % (fund['name'], fund.code, fund['date'], fund['util'], fund['total'], fund['rate'], fund['company'])
64 # try:
65 # print sql
66 # cursor.execute(sql)
67 # except Exception, e:
68 g # print e
69 # fund_all[fund.code] = fund
70 # f.write(s)
71 #conn.commit()
72 #cursor.close()
73 #conn.close()
74 f.close()
75
76
77
这里要注意的一个问题就是当从163抓回的内容是gb2312进行编码的, 而我的数据库, 文件等都是utf-8编码的, 所以要先decode成gb2312,然后在encode成utf-8这样就不会乱码了. 我还写了个ruby版的,见:
http://www.cnblogs.com/angelface/archive/2007/08/03/840943.html
posted on 2007-08-03 08:53 angelface 阅读(1267) 评论(0) 编辑 收藏 举报