AI | Q LEARNING
Q LEARNING 强化学习
参考来源:bilibili@莫烦python
基本算法:
示例代码:
import numpy as np
import pandas as pd
import time
N_STATES = 6 # 世界的最大长度
ACTIONS = ['left', 'right'] # 可选择的行为
EPSILON = 0.9 # greedy police, 有几成概率选择最优action
ALPHA = 0.1 # 学习效率
LAMADA = 0.9 #
MAX_EPISODES = 13 #
FRESH_TIME = 0.2 # 刷新时间
# 创建q表
def build_q_table(n_states, actions):
table = pd.DataFrame(
np.zeros((n_states, len(actions))),
columns = actions, # 动作名
)
return table
# 决策函数
def choose_action(state, q_table):
state_actions = q_table.iloc[state, :]
if (np.random.uniform() > EPSILON or (state_actions.all() == 0)):
# 随机选择
action_name = np.random.choice(ACTIONS)
else:
# 选择最优解
action_name = q_table.columns.values[state_actions.argmax()]
return action_name
# 环境反馈
def get_env_feedback(S, A):
if A == 'right':
if S == N_STATES - 2: # 游戏结束
S_ = 'win'
R = 1
else:
S_ = S+1
R = 0
else:
R = 0
if S == 0:
S_ = S
else:
S_ = S -1
return S_, R
# 环境更新
def update_env(S, episode, step_counter):
env_list = ['-']*(N_STATES -1) + ['T'] # 一维移动环境
if S == 'win':
interaction = 'Episode %s: total_step = %s' % (episode+1,step_counter)
print('\r{}'.format(interaction), end='')
time.sleep(2)
print('\r ', end='')
else:
env_list[S] = 'o'
interaction = ''.join(env_list)
print('\r{}'.format(interaction), end='')
time.sleep(FRESH_TIME)
# reinforcement learning
def rl():
# 初始化Q表
q_table = build_q_table(N_STATES, ACTIONS)
for episode in range(MAX_EPISODES): # 回合数
print(q_table)
step_counter = 0
S = 0
is_terminated = False
update_env(S, episode, step_counter)
while not is_terminated:
A = choose_action(S, q_table)
S_, R = get_env_feedback(S, A) # 决策+获取下一种状态
q_predict = q_table.loc[S, A] # 通过q表获取估计值
if S_ != 'win':
q_target = R + LAMADA * q_table.iloc[S_, :].max()
else:
q_target = R # 下一个状态就结束了
is_terminated = True
# 更新q表
q_table.loc[S, A] += ALPHA * (q_target - q_predict)
S = S_ # 移动
update_env(S, episode, step_counter+1)
step_counter += 1
return q_table
q_table = rl()
print('q_table: ')
print(q_table)
上面的代码和莫烦给的有一点区别,主要是pandas版本不同导致的区别。
本文来自博客园,作者:Mz1,转载请注明原文链接:https://www.cnblogs.com/Mz1-rc/p/15795007.html
如果有问题可以在下方评论或者email:mzi_mzi@163.com