从500彩票网站下载历史数据

#从500彩票网站下载双色球历史数据,网页另存为01.txt
data = []
file = open('01.txt')
for line in file:
    line = line.lstrip()
    line = line.strip('\n')
    line = line.split(' ')[0]
    if len(line)==19 or len(line)==20 or len(line)==21:
        data.append(line)
newFile = open('彩票数据.txt','w')

for i in data:
    newFile.write(i)
    newFile.write('\n')
newFile.close()

##for k in data:
##    print('20%s%s: %s %s %s %s %s %s--%s'%\
##          (k[0:2],k[2:5],k[5:7],k[7:9],k[9:11],k[11:13],k[13:15],k[15:17],k[17:19]))
    

 

import urllib.request
import re

url = 'http://datachart.500.com/ssq/history/newinc/history.php?start=16110&end=16120'
html = urllib.request.urlopen(url).read()
html = html.decode('utf-8')

s1 = r'\d{5}</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont4">\d\d</td><td'
s2 = r'(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont4">(.*?)</td><td'
pat1 = re.compile(s1)

t1 = pat1.findall(html)

allData = []
tempList = []

for i in t1:
    pat2 = re.compile(s2)
    t2 = pat2.findall(i)
    tempList.append(t2)
print(tempList)

 

posted @ 2016-10-13 20:46  疯陈演义  阅读(2380)  评论(0编辑  收藏  举报