强化学习代码实战-01无状态问题(多臂赌博机)
import numpy as np import random probs = np.random.uniform(size=10) rewards = [[1] for _ in range(10)] probs, rewards # 贪婪算法 def greedy_choose_one(): # 以某一小概率随机选择一根拉杆 if random.random() < 0.01: return random.randint(0,9) # 计算每个老H机的平均奖励 rewards_mean = [np.mean(i) for i in rewards] # 选择期望奖励最大的拉杆 return np.argmax(rewards_mean) # 递减的贪婪算法(随着玩的次数增加,探索力度要降低) def decay_choose_one(): play_count = sum([len(i) for i in rewards]) if random.random() < 1/play_count: return random.randint(0,9) rewards_mean = [np.mean(i) for i in rewards] return np.argmax(rewards_mean) # 上置信界算法(细化到每个老H机探索和利用的情况) def upon_confidence_choose(): """参考:https://www.cnblogs.com/Ryan0v0/p/11366578.html """ # 求每个老H机各玩了多少次 play_count = [len(i) for i in rewards] play_count = np.array(play_count) # 分子,总共玩了多少次,开根号让其增长速度变慢 fenzi = play_count.sum() ** 0.5 # 分母,每台老H机玩的次数,乘以2让其增长速度变快 fenmu = play_count * 2 ucb = fenzi / fenmu # 开根号,大于1的数会被缩小,小于1的数会被放大,让ucb维持在一定区间范围 ucb = ucb ** 0.5 rewards_mean = np.array([np.mean(i) for i in rewards]) # 带有ucb的平均奖励 rewards_mean += ucb return rewards_mean.argmax() # beta分布测试 def beta_test(): # 分布的分布,参数是二项分布的成功或失败次数 print(f"当数字小的时候,beta分布的概率有很大随机性") for _ in range(5): print(np.random.beta(1,1)) print(f"当数字大的时候,beta分布逐渐稳定") for _ in range(5): print(np.random.beta(1e5, 1e5)) def thompson_choose_one(): """ 参考:https://blog.csdn.net/qq_24434491/article/details/114319241 """ # 统计当前奖励为1的次数+1 get_reward_count = [sum(i)+1 for i in rewards] loss_reward_count = [sum(1-np.array(i))+1 for i in rewards] # 按照beta分布计算奖励分布,可以认为是每一台老H机中奖的概率 beta = np.random.beta(get_reward_count, loss_reward_count) return beta.argmax() def try_and_play(): # i = choose_one() # i = decay_choose_one() # i = upon_confidence_choose() i = thompson_choose_one() # 玩老H机,得到结果 reward = 0 if random.random() < probs[i]: reward = 1 # 记录玩的结果 rewards[i].append(reward) def play_N(num): # 玩N次 for _ in range(num): try_and_play() # 期望的最好结果 target = probs.max() * num # 实际玩出的结果 result = sum([sum(i) for i in rewards]) return target, result play_N(5000)
时刻记着自己要成为什么样的人!