爬取大乐透历史中奖信息,随便写,数据不大

爬取大乐透历史中奖数据

import requests
from lxml import etree
import pprint
#import json
url='http://datachart.500.com/dlt/history/newinc/history.php?limit=23020&sort=1'
headers = {
            'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'max-age=0',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
        }
req=requests.get(url=url,headers=headers)
req_text=req.text
web_html = etree.HTML(req_text)
try:
    ALL=[]
    for j in range(1,23021):
        all_list=[]
        for i in range(1,16):
            s='//*[@id="tdata"]/tr[{}]/td[{}]'.format(j,i)
            q=web_html.xpath(s)[0].text
            all_list.append(q)
        ALL.append(all_list)
        print(all_list)
        print('获得{}条数据'.format(j))
    
    with open(r'大乐透.txt','w') as f:
        f.write(str(ALL))
        print('下载完成!')

except:
    pass

pprint.pprint(ALL)
import requests
from lxml import etree
import pprint
#import json
url='http://datachart.500.com/dlt/history/newinc/history.php?limit=23020&sort=1'
headers = {
            'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'max-age=0',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
        }
req=requests.get(url=url,headers=headers)
req_text=req.text
web_html = etree.HTML(req_text)
try:
    
    
    with open(r'大乐透2.txt','w') as f:
        for j in range(1,23021):
            context=''
            for i in range(1,16):
                s='//*[@id="tdata"]/tr[{}]/td[{}]'.format(j,i)
                q=web_html.xpath(s)[0].text
                context+=q+','
            f.write(context[:-1]+'\r\n')
            print('获得{}条数据'.format(j))
        print('下载完成!')

except:
    pass

pprint.pprint(ALL)
posted @ 2023-02-26 00:27  AubeLiang  阅读(231)  评论(0编辑  收藏  举报