爬取大乐透历史中奖数据
import requests
from lxml import etree
import pprint
#import json
url='http://datachart.500.com/dlt/history/newinc/history.php?limit=23020&sort=1'
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
}
req=requests.get(url=url,headers=headers)
req_text=req.text
web_html = etree.HTML(req_text)
try:
ALL=[]
for j in range(1,23021):
all_list=[]
for i in range(1,16):
s='//*[@id="tdata"]/tr[{}]/td[{}]'.format(j,i)
q=web_html.xpath(s)[0].text
all_list.append(q)
ALL.append(all_list)
print(all_list)
print('获得{}条数据'.format(j))
with open(r'大乐透.txt','w') as f:
f.write(str(ALL))
print('下载完成!')
except:
pass
pprint.pprint(ALL)
import requests
from lxml import etree
import pprint
#import json
url='http://datachart.500.com/dlt/history/newinc/history.php?limit=23020&sort=1'
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
}
req=requests.get(url=url,headers=headers)
req_text=req.text
web_html = etree.HTML(req_text)
try:
with open(r'大乐透2.txt','w') as f:
for j in range(1,23021):
context=''
for i in range(1,16):
s='//*[@id="tdata"]/tr[{}]/td[{}]'.format(j,i)
q=web_html.xpath(s)[0].text
context+=q+','
f.write(context[:-1]+'\r\n')
print('获得{}条数据'.format(j))
print('下载完成!')
except:
pass
pprint.pprint(ALL)