Python抓取双色球数据
数据来源网站http://baidu.lecai.com/lottery/draw/list/50?d=2013-01-01
HTML解析器http://pythonhosted.org/pyquery/ (可以像JQuery那样使用)
源码:
1 import MySQLdb as mysql 2 from pyquery import PyQuery as pq 3 4 create_table_sql = ''' 5 create table union_lotto( 6 issue int primary key, 7 lottery_date date, 8 lottery_number varchar(30) 9 )''' 10 11 sql = "insert into union_lotto values(%(issue)s, %(date)s, %(number)s)" 12 conn = mysql.connect(host='localhost', db='caipiao', user='root', passwd='') 13 cur = conn.cursor() 14 15 16 def inserts(rows): 17 cur.executemany(sql, rows) 18 conn.commit() 19 20 21 def close(): 22 conn.close() 23 24 25 def handler_row(row): 26 children = row.getchildren() 27 date = children[0].text_content() 28 issue = children[1].getchildren()[0].text_content() 29 spans = children[2].getchildren()[0].getchildren() 30 numbers = [] 31 for span in spans: 32 numbers.append(span.text_content()) 33 lottery_number = '-'.join(numbers) 34 return {'issue': int(issue.strip()), 'date': date, 'number': lottery_number} 35 36 37 def grab_data(url): 38 d = pq(url=url) 39 rows = d("#draw_list > tbody > tr") 40 result = [] 41 for row in rows: 42 result.append(handler_row(row)) 43 return result 44 45 46 def main(): 47 years = [(2003 + i) for i in range(0, 11)] 48 url = 'http://baidu.lecai.com/lottery/draw/list/50?d=%d-01-01' 49 print '.......star.........' 50 for year in years: 51 result = grab_data(url % year) 52 inserts(result) 53 close() 54 print '.......end..........' 55 56 57 if __name__ == '__main__': 58 main()
posted on 2013-12-30 20:55 Arts&Crafts 阅读(1008) 评论(0) 编辑 收藏 举报