python进行大乐透和双色球选号(LSTM预测和随机选号)
文章仅供参考学习
1.LSTM预测
首先去爬取数据
这个是爬取大乐透的,从07年爬到最新一期
import requests from bs4 import BeautifulSoup import csv # 目标URL url = 'http://datachart.500.com/dlt/history/newinc/history.php?start=07001' # 发送HTTP请求 response = requests.get(url) response.encoding = 'utf-8' # 确保编码正确 # 解析HTML内容 soup = BeautifulSoup(response.text, 'html.parser') # 定位包含开奖数据的表格体 tbody = soup.find('tbody', id="tdata") # 存储开奖数据的列表 lottery_data = [] # 遍历每一行数据 for tr in tbody.find_all('tr'): tds = tr.find_all('td') if tds: # 提取数据并添加到列表 lottery_data.append([td.text for td in tds]) # 写入CSV文件 with open('dlt_lottery_data.csv', 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) # 写入标题行 # writer.writerow(['期号', '号码1', '号码2', '号码3', '号码4', '号码5', '号码6', '号码7']) # 写入数据行 writer.writerows(lottery_data) print('数据抓取完成,并保存到dlt_lottery_data.csv文件中。')
下面是爬取双色球的
import requests from bs4 import BeautifulSoup import csv # 目标URL url = f'http://datachart.500.com/ssq/history/newinc/history.php?start=07001' # 发送HTTP请求 response = requests.get(url) response.encoding = 'utf-8' # 确保编码正确 # 解析HTML内容 soup = BeautifulSoup(response.text, 'html.parser') # 定位包含开奖数据的表格体 tbody = soup.find('tbody', id="tdata") # 存储开奖数据的列表 lottery_data = [] # 遍历每一行数据 for tr in tbody.find_all('tr'): tds = tr.find_all('td') if tds: # 提取数据并添加到列表 lottery_data.append([td.text for td in tds]) # 写入CSV文件 with open('ssq_lottery_data.csv', 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) # 写入标题行 # writer.writerow(['期号', '号码1', '号码2', '号码3', '号码4', '号码5', '号码6', '号码7']) # 写入数据行 writer.writerows(lottery_data) print('数据抓取完成,并保存到ssq_lottery_data.csv文件中。')
对爬取的数据进行处理
大乐透是5+2,双色球是6+1,两个不同,注意区分。
大乐透的
import csv import pandas as pd def get_data(path): r_data = [] b_data = [] with open(path, 'r') as file: reader = csv.reader(file) for row in reader: r_data.append(list(map(lambda x: int(x), row[1:7]))) b_data.append(list(map(lambda x: int(x), row[7:8]))) r_data.reverse() b_data.reverse() return r_data, b_data def process_data(): p = r"./ssq_lottery_data.csv" r_data, b_data = get_data(p) # print(b_data) return r_data, b_data if __name__ == '__main__': process_data()
下面是双色球的
import csv import pandas as pd def get_data(path): r_data = [] b_data = [] with open(path, 'r') as file: reader = csv.reader(file) for row in reader: r_data.append(list(map(lambda x: int(x), row[1:7]))) b_data.append(list(map(lambda x: int(x), row[7:8]))) r_data.reverse() b_data.reverse() return r_data, b_data def process_data(): p = r"./ssq_lottery_data.csv" r_data, b_data = get_data(p) # print(b_data) return r_data, b_data if __name__ == '__main__': process_data()
下面开始定义模型
# 定义 LSTM 模型 class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, output_size, num_layers=1): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out
对训练之前的数据进行标准化处理和转换成为tensor格式
def trans_process_data(seq_length): r_data, b_data = process_data() # print(r_data) # print(r_data) r_data = np.array(r_data) b_data = np.array(b_data) # 转换为 PyTorch 张量 r_data = torch.tensor(r_data, dtype=torch.float32) # 转换为 PyTorch 张量 b_data = torch.tensor(b_data, dtype=torch.float32) # 标准化 r_mean = r_data.mean(dim=0) r_std = r_data.std(dim=0) r_data = (r_data - r_mean) / r_std # 标准化 b_mean = b_data.mean(dim=0) b_std = b_data.std(dim=0) b_data = (b_data - b_mean) / b_std r_train = [] r_target = [] b_train = [] b_target = [] for i in range(len(r_data) - seq_length): r_train.append(r_data[i:i + seq_length]) r_target.append(r_data[i + seq_length]) r_train = torch.stack(r_train) r_target = torch.stack(r_target) for i in range(len(b_data) - seq_length): b_train.append(b_data[i:i + seq_length]) b_target.append(b_data[i + seq_length]) b_train = torch.stack(b_train) b_target = torch.stack(b_target) # print(r_train) return r_data, b_data, r_train, r_target, b_train, b_target, r_mean, r_std, b_mean, b_std
训练函数
def start_train(input_size, hidden_size, output_size, num_layers, train_data, target_data, num_epochs=100): model = LSTMModel(input_size, hidden_size, output_size, num_layers) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=0.05) # 训练模型 for epoch in range(num_epochs): model.train() optimizer.zero_grad() # 前向传播 outputs = model(train_data) loss = criterion(outputs, target_data) # 反向传播和优化 loss.backward() optimizer.step() if (epoch + 1) % 10 == 0: print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}') if epoch == int(num_epochs/2): optimizer = optim.Adam(model.parameters(), lr=0.01) return model
预测函数
def start_predicted(model, predicted_data): model.eval() with torch.no_grad(): test_input = predicted_data.unsqueeze(0) # 使用最后seq_length个时间步作为输入 predicted = model(test_input) # print("Predicted:", predicted) return predicted
红球和篮球分开训练预测,开始两个训练预测
def start_all_train(hidden_size, num_layers, num_epochs, seq_length): r_data, b_data, r_train, r_target, b_train, b_target, r_mean, r_std, b_mean, b_std = trans_process_data(seq_length) # print(r_mean, r_std) r_size = 5 r_model = start_train(r_size, hidden_size, r_size, num_layers, r_train, r_target, num_epochs) predicted_data = r_data[-seq_length:] r_predicted = start_predicted(r_model, predicted_data) print("--------------------------bbbbb-------------------------------------------") b_size = 2 b_model = start_train(b_size, hidden_size, b_size, num_layers, b_train, b_target, num_epochs) predicted_data = b_data[-seq_length:] b_predicted = start_predicted(b_model, predicted_data) print(r_predicted) print(b_predicted) r_predicted = r_predicted * r_std + r_mean b_predicted = b_predicted * b_std + b_mean print(r_predicted) print(b_predicted) return r_predicted, b_predicted
完整代码
import os import sys BASE_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(BASE_DIR) from data_process import process_data import torch import torch.nn as nn import torch.optim as optim import numpy as np # 定义 LSTM 模型 class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, output_size, num_layers=1): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out def trans_process_data(seq_length): r_data, b_data = process_data() # print(r_data) # print(r_data) r_data = np.array(r_data) b_data = np.array(b_data) # 转换为 PyTorch 张量 r_data = torch.tensor(r_data, dtype=torch.float32) # 转换为 PyTorch 张量 b_data = torch.tensor(b_data, dtype=torch.float32) # 标准化 r_mean = r_data.mean(dim=0) r_std = r_data.std(dim=0) r_data = (r_data - r_mean) / r_std # 标准化 b_mean = b_data.mean(dim=0) b_std = b_data.std(dim=0) b_data = (b_data - b_mean) / b_std r_train = [] r_target = [] b_train = [] b_target = [] for i in range(len(r_data) - seq_length): r_train.append(r_data[i:i + seq_length]) r_target.append(r_data[i + seq_length]) r_train = torch.stack(r_train) r_target = torch.stack(r_target) for i in range(len(b_data) - seq_length): b_train.append(b_data[i:i + seq_length]) b_target.append(b_data[i + seq_length]) b_train = torch.stack(b_train) b_target = torch.stack(b_target) # print(r_train) return r_data, b_data, r_train, r_target, b_train, b_target, r_mean, r_std, b_mean, b_std def start_train(input_size, hidden_size, output_size, num_layers, train_data, target_data, num_epochs=100): model = LSTMModel(input_size, hidden_size, output_size, num_layers) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=0.05) # 训练模型 for epoch in range(num_epochs): model.train() optimizer.zero_grad() # 前向传播 outputs = model(train_data) loss = criterion(outputs, target_data) # 反向传播和优化 loss.backward() optimizer.step() if (epoch + 1) % 10 == 0: print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}') if epoch == int(num_epochs/2): optimizer = optim.Adam(model.parameters(), lr=0.01) return model def start_predicted(model, predicted_data): model.eval() with torch.no_grad(): test_input = predicted_data.unsqueeze(0) # 使用最后seq_length个时间步作为输入 predicted = model(test_input) # print("Predicted:", predicted) return predicted def start_all_train(hidden_size, num_layers, num_epochs, seq_length): r_data, b_data, r_train, r_target, b_train, b_target, r_mean, r_std, b_mean, b_std = trans_process_data(seq_length) # print(r_mean, r_std) r_size = 5 r_model = start_train(r_size, hidden_size, r_size, num_layers, r_train, r_target, num_epochs) predicted_data = r_data[-seq_length:] r_predicted = start_predicted(r_model, predicted_data) print("--------------------------bbbbb-------------------------------------------") b_size = 2 b_model = start_train(b_size, hidden_size, b_size, num_layers, b_train, b_target, num_epochs) predicted_data = b_data[-seq_length:] b_predicted = start_predicted(b_model, predicted_data) print(r_predicted) print(b_predicted) r_predicted = r_predicted * r_std + r_mean b_predicted = b_predicted * b_std + b_mean print(r_predicted) print(b_predicted) return r_predicted, b_predicted if __name__ == '__main__': hidden_size = 20 num_layers = 3 num_epochs = 1000 seq_length = 10 r_predicted, b_predicted = start_all_train(hidden_size, num_layers, num_epochs, seq_length) # print(r_predicted) # print(b_predicted)
2.随机预测
下面是随机选号预测
import random import numpy as np from collections import Counter # 大乐透和双色球不一样 r_len = 5 r_num = 35 b_len = 2 b_num = 12 # 双色球 # r_len = 6 # r_num = 33 # # b_len = 1 # b_num = 16 number = 100000000 li = [] li_r = [] li_b = [] for i in range(number): r_li = random.sample(range(1, r_num+1), r_len) b_li = random.sample(range(1, b_num+1), b_len) li_r.extend(r_li) li_b.extend(b_li) print(i) counter_li_r = Counter(li_r) counter_li_b = Counter(li_b) most_common_li_r = counter_li_r.most_common(r_len) most_common_li_b = counter_li_b.most_common(b_len) most_common_li_r = list(map(lambda x: x[0], most_common_li_r)) most_common_li_b = list(map(lambda x: x[0], most_common_li_b)) most_common_li_r.sort() most_common_li_b.sort() li = most_common_li_r li.extend(most_common_li_b) print("most: ", li) most_least_li_r = counter_li_r.most_common()[-r_len-1:-1] most_least_li_b = counter_li_b.most_common()[-b_len-1:-1] most_least_li_r = list(map(lambda x: x[0], most_least_li_r)) most_least_li_b = list(map(lambda x: x[0], most_least_li_b)) most_least_li_r.sort() most_least_li_b.sort() li = most_least_li_r li.extend(most_least_li_b) print("least: ", li)
好运来,恭喜中一等奖
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
2023-02-24 socket搭建web服务端