体彩数据爬取
大乐透
爬取1
# 爬取大乐透的开奖历史数据 # http://www.lottery.gov.cn/api/lottery_kj_detail_new.jspx?_ltype=4&_term=19026 import requests import re import csv agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' headers = { 'User-Agent': agent, } proxy={ "http":"125.39.9.34:9000", } url = 'http://www.lottery.gov.cn/api/lottery_kj_detail_new.jspx' start = int(input('输入开始期号:')) # 18134 end = int(input('输入结束期号:')) # 19029 lottery_li = [] for qihao in range(start,end+1): data={ '_ltype':'4', '_term':qihao, } page_text = requests.post(url=url,headers=headers,data=data,proxies=proxy).text print(page_text) if page_text: # 根据返回数据解析 # 开奖号码 lottery_data = re.findall('codeNumber\"\:\[(.*?)\],\"',page_text,re.M) if lottery_data: num_data = lottery_data[0].replace("\"",'') # print(num_data) # 10,12,15,17,19,02,03 lottery_list = num_data.split(',') lottery_list.insert(0,qihao) # print(lottery_list) # ['10', '12', '15', '17', '19', '02', '03'] lottery_li.append(lottery_list) with open('lottery_data.csv','w',newline='') as csvf: spanwriter=csv.writer(csvf,dialect='excel') #创建writer对象 spanwriter.writerow(['qihao','red1','red2','red3','red4','red5','blue1','blue2']) #使用writer的方法writerow写入到文件 spanwriter.writerows(lottery_li) #迭代写入数据 print('done.....................')
爬取2(所有开奖记录)
# 爬取大乐透的开奖历史数据 # http://www.lottery.gov.cn/historykj/history.jspx?_ltype=dlt import requests import re import csv from lxml import etree import random import time agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' headers = { 'User-Agent': agent, } proxies=[ {"http":"125.39.9.34:9000"}, {"http":"222.139.125.232:8060"}, ] proxy = random.choice(proxies) params={ '_ltype':'dlt', } page = int(input("end page no:")) lottery_data = [] for page_no in range(1,page+1): url = 'http://www.lottery.gov.cn/historykj/history_%s.jspx' % page_no page_text = requests.get(url=url,params=params,headers=headers,proxies=proxy).text time.sleep(1) # print(page_text) tree = etree.HTML(page_text) tr_list = tree.xpath('//div[@class="result"]/table/tbody/tr') # 每页所有的tr 20 td_list = tree.xpath('//div[@class="result"]/table/tbody/tr/td') # 每页所有的td 400 for num_tr in range(1,len(tr_list) + 1): # print(num_tr) td_qihao = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[1]//text()'%(num_tr)) td_red1 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[2]//text()'%(num_tr)) td_red2 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[3]//text()'%(num_tr)) td_red3 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[4]//text()'%(num_tr)) td_red4 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[5]//text()'%(num_tr)) td_red5 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[6]//text()'%(num_tr)) td_blue1 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[7]//text()'%(num_tr)) td_blue2 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[8]//text()'%(num_tr)) lottery_one = td_qihao + td_red1+ td_red2+td_red3+td_red4+td_red5+td_blue1+td_blue2 lottery_data.append(lottery_one) # 写入csv with open('all_lottery.csv','w',newline='') as csvf: spanwriter=csv.writer(csvf,dialect='excel') #创建writer对象 spanwriter.writerow(['qihao','red1','red2','red3','red4','red5','blue1','blue2']) #使用writer的方法writerow写入到文件 spanwriter.writerows(lottery_data) #迭代写入数据 print('done..................................')