Python抓取双色球数据

  数据来源网站http://baidu.lecai.com/lottery/draw/list/50?d=2013-01-01

  HTML解析器http://pythonhosted.org/pyquery/ (可以像JQuery那样使用)

  源码:

 1 import MySQLdb as mysql
 2 from pyquery import PyQuery as pq
 3 
 4 create_table_sql = '''
 5 create table union_lotto(
 6     issue int  primary key,
 7     lottery_date   date,
 8     lottery_number varchar(30)
 9 )'''
10 
11 sql = "insert into union_lotto values(%(issue)s, %(date)s, %(number)s)"
12 conn = mysql.connect(host='localhost', db='caipiao', user='root', passwd='')
13 cur = conn.cursor()
14 
15 
16 def inserts(rows):
17     cur.executemany(sql, rows)
18     conn.commit()
19 
20 
21 def close():
22     conn.close()
23 
24 
25 def handler_row(row):
26     children = row.getchildren()
27     date =  children[0].text_content()
28     issue = children[1].getchildren()[0].text_content()
29     spans = children[2].getchildren()[0].getchildren()
30     numbers = []
31     for span in spans:
32         numbers.append(span.text_content())
33     lottery_number = '-'.join(numbers)
34     return {'issue': int(issue.strip()), 'date': date, 'number': lottery_number}
35 
36 
37 def grab_data(url):
38     d = pq(url=url)
39     rows = d("#draw_list > tbody > tr")
40     result = []
41     for row in rows:
42         result.append(handler_row(row))
43     return result
44 
45 
46 def main():
47     years = [(2003 + i) for i in range(0, 11)]
48     url = 'http://baidu.lecai.com/lottery/draw/list/50?d=%d-01-01'
49     print '.......star.........'
50     for year in years:
51         result = grab_data(url % year)
52         inserts(result)
53     close()
54     print '.......end..........'
55 
56 
57 if __name__ == '__main__':
58     main()

  

posted on 2013-12-30 20:55  Arts&Crafts  阅读(1008)  评论(0编辑  收藏  举报

导航