从500彩票网站下载历史数据
#从500彩票网站下载双色球历史数据,网页另存为01.txt data = [] file = open('01.txt') for line in file: line = line.lstrip() line = line.strip('\n') line = line.split(' ')[0] if len(line)==19 or len(line)==20 or len(line)==21: data.append(line) newFile = open('彩票数据.txt','w') for i in data: newFile.write(i) newFile.write('\n') newFile.close() ##for k in data: ## print('20%s%s: %s %s %s %s %s %s--%s'%\ ## (k[0:2],k[2:5],k[5:7],k[7:9],k[9:11],k[11:13],k[13:15],k[15:17],k[17:19]))
import urllib.request import re url = 'http://datachart.500.com/ssq/history/newinc/history.php?start=16110&end=16120' html = urllib.request.urlopen(url).read() html = html.decode('utf-8') s1 = r'\d{5}</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont2">\d\d</td><td class="t_cfont4">\d\d</td><td' s2 = r'(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont2">(.*?)</td><td class="t_cfont4">(.*?)</td><td' pat1 = re.compile(s1) t1 = pat1.findall(html) allData = [] tempList = [] for i in t1: pat2 = re.compile(s2) t2 = pat2.findall(i) tempList.append(t2) print(tempList)