弓刀

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

本篇代码来自:

https://github.com/rhololkeolke/lspi-python

这是lspi文件夹basisfunction.py文件

事项

(1)python ABC(abstract base class)用法:

https://mozillazg.com/2014/06/python-define-abstract-base-classes.html

http://blog.csdn.net/nixawk/article/details/42970321

 

  1 # -*- coding: utf-8 -*-
  2 """Abstract Base Class for Basis Function and some common implementations."""
  3 
  4 import abc
  5 
  6 import numpy as np
  7 
  8 
  9 class BasisFunction(object):
 10 
 11     r"""ABC for basis functions used by LSPI Policies.
 12 
 13     A basis function is a function that takes in a state vector and an action
 14     index and returns a vector of features. The resulting feature vector is
 15     referred to as :math:`\phi` in the LSPI paper (pg 9 of the PDF referenced
 16     in this package's documentation). The :math:`\phi` vector is dotted with
 17     the weight vector of the Policy to calculate the Q-value.
 18 
 19     The dimensions of the state vector are usually smaller than the dimensions
 20     of the :math:`\phi` vector. However, the dimensions of the :math:`\phi`
 21     vector are usually much smaller than the dimensions of an exact
 22     representation of the state which leads to significant savings when
 23     computing and storing a policy.
 24 
 25     """
 26   #该函数输入状态向量,和动作,返回特征向量(feature vector)
    #也就是说该模块是进行系统状态更新的?也可能是不更新,直接就是通过状态计算特征向量
    #特征向量和权重矩阵点成得到了Q值,通过点乘得到说明是通过线性方程对Q(S,A)J进行近似
27 __metaclass__ = abc.ABCMeta #构建类的方法 28 29 @abc.abstractmethod #python的@修饰符,说明要在子类中必须具体实现,不然报错,这似乎是abc库的常规用法 30 def size(self): #返回phi也就是特征向量的长度 31 r"""Return the vector size of the basis function. 32 33 Returns 34 ------- 35 int 36 The size of the :math:`\phi` vector. 37 (Referred to as k in the paper). 38 39 """ 40 pass # pragma: no cover 41 42 @abc.abstractmethod 43 def evaluate(self, state, action): #该函数通过状态动作对儿计算phi矩阵,完全依赖于子类的实现方法 44 r"""Calculate the :math:`\phi` matrix for the given state-action pair. 45 46 The way this value is calculated depends entirely on the concrete 47 implementation of BasisFunction. 48 49 Parameters 50 ---------- 51 state : numpy.array 52 The state to get the features for. 53 When calculating Q(s, a) this is the s. 54 action : int 55 The action index to get the features for. 56 When calculating Q(s, a) this is the a. 57 58 59 Returns 60 ------- 61 numpy.array 62 The :math:`\phi` vector. Used by Policy to compute Q-value. 63 64 """ 65 pass # pragma: no cover 66 67 @abc.abstractproperty #修饰符,必须实现? 68 def num_actions(self): #返回可能的动作action的数目 69 """Return number of possible actions. 70 71 Returns 72 ------- 73 int 74 Number of possible actions. 75 """ 76 pass # pragma: no cover 77 78 @staticmethod #静态方法 79 def _validate_num_actions(num_actions): #确认动作的数目是否满足要求 80 """Return num_actions if valid. Otherwise raise ValueError. 81 82 Return 83 ------ 84 int 85 Number of possible actions. 86 87 Raises 88 ------ 89 ValueError 90 If num_actions < 1 91 92 """ 93 if num_actions < 1: 94 raise ValueError('num_actions must be >= 1') #动作数目一定要大于1 95 return num_actions 96 97 98 class FakeBasis(BasisFunction): #基于上面的类构建一个新类 99 100 r"""Basis that ignores all input. Useful for random sampling. 101 102 When creating a purely random Policy a basis function is still required. 103 This basis function just returns a :math:`\phi` equal to [1.] for all 104 inputs. It will however, still throw exceptions for impossible values like 105 negative action indexes. 106 107 """ 108   #产生随机的策略时返回的特征向量phi只有一个值?就是1? 109 def __init__(self, num_actions):#在初始化的时候调用父函数对动作数目进行验证 110 """Initialize FakeBasis.""" 111 self.__num_actions = BasisFunction._validate_num_actions(num_actions) 112 113 def size(self):#返回特征向量的长度为1 114 r"""Return size of 1. 115 116 Returns 117 ------- 118 int 119 Size of :math:`phi` which is always 1 for FakeBasis 120 121 Example 122 ------- 123 124 >>> FakeBasis().size() 125 1 126 127 """ 128 return 1 129 130 def evaluate(self, state, action): #返回特征向量1 131 r"""Return :math:`\phi` equal to [1.]. 132 133 Parameters #参数 134 ---------- 135 state : numpy.array#状态 136 The state to get the features for. 137 When calculating Q(s, a) this is the s. FakeBasis ignores these 138 values. 139 action : int#所采取的动作 140 The action index to get the features for. 141 When calculating Q(s, a) this is the a. FakeBasis ignores these 142 values. 143 144 Returns 145 ------- 146 numpy.array#返回phi矩阵 147 :math:`\phi` vector equal to [1.]. 148 149 Raises 150 ------ 151 IndexError 152 If action index is < 0 153 154 Example 155 ------- 156 157 >>> FakeBasis().evaluate(np.arange(10), 0) 158 array([ 1.]) 159 160 """ 161 if action < 0: 162 raise IndexError('action index must be >= 0') 163 if action >= self.num_actions: 164 raise IndexError('action must be < num_actions') 165 return np.array([1.])#此处返回的phi矩阵就是1 166 167 @property 168 def num_actions(self):#返回动作的数目 169 """Return number of possible actions.""" 170 return self.__num_actions 171 172 @num_actions.setter #该修饰符表示将num_action转化为一个属性而不是一个方法 173 def num_actions(self, value): 174 """Set the number of possible actions. 175 176 Parameters 177 ---------- 178 value: int 179 Number of possible actions. Must be >= 1. 180 181 Raises 182 ------ 183 ValueError 184 If value < 1. 185 186 """ 187 if value < 1: 188 raise ValueError('num_actions must be at least 1.') 189 self.__num_actions = value 190 191 192 class OneDimensionalPolynomialBasis(BasisFunction): #一维多项式基 193 194 """Polynomial features for a state with one dimension. 195   #一维的多项式特征 196 Takes the value of the state and constructs a vector proportional 197 to the specified degree and number of actions. The polynomial is first 198 constructed as [..., 1, value, value^2, ..., value^k, ...]#这里k是自由度的数目 199 where k is the degree. The rest of the vector is 0. 200   #输入是状态的值构建一个和动作数目成比例的向量,构建的方法如上,就是一个多项式 201 Parameters 202 ---------- 203 degree : int#多项式的自由度 204 The polynomial degree. 205 num_actions: int #动作的数量 206 The total number of possible actions 207 208 Raises 209 ------ 210 ValueError 211 If degree is less than 0 212 ValueError 213 If num_actions is less than 1 214 215 """ 216 217 def __init__(self, degree, num_actions):#初始化,输入量:自由度,动作数目 218 """Initialize polynomial basis function.""" 219 self.__num_actions = BasisFunction._validate_num_actions(num_actions)#通过父类检测函数检测动作数目 220 221 if degree < 0:#检测自由度数目 222 raise ValueError('Degree must be >= 0') 223 self.degree = degree 224 225 def size(self):#返回基函数(特征函数的)长度 226 """Calculate the size of the basis function. 227      228 The base size will be degree + 1. This basic matrix is then 229 duplicated once for every action. Therefore the size is equal to 230 (degree + 1) * number of actions 231     #基的长度是自由度长度+1(因为由零次方)
      #对于每一个action都复制一个基向量,因此,总长度为:(degree + 1) * number of action
232 
233         Returns#返回
234         -------
235         int #基矩阵(基向量/phi)的长度,是一个整数
236             The size of the phi matrix that will be returned from evaluate.
237 
238 
239         Example#例子
240         -------
241 
242         >>> basis = OneDimensionalPolynomialBasis(2, 2)#基是一维多项式,自由度2,动作数2
243         >>> basis.size()#总长度:3*2
244         6
245 
246         """
247         return (self.degree + 1) * self.num_actions
248 
249     def evaluate(self, state, action):#评估函数
250         r"""Calculate :math:`\phi` matrix for given state action pair.
251       #计算给出状态动作对儿后的phi矩阵
252         The :math:`\phi` matrix is used to calculate the Q function for the
253         given policy.
254       #输出的phi矩阵用来计算Q值
255         Parameters #输入参数
256         ----------
257         state : numpy.array#状态
258             The state to get the features for.
259             When calculating Q(s, a) this is the s.
260         action : int #动作
261             The action index to get the features for.
262             When calculating Q(s, a) this is the a.
263 
264         Returns #返回
265         -------
266         numpy.array #phi
267             The :math:`\phi` vector. Used by Policy to compute Q-value.
268 
269         Raises 
270         ------#报错的情况,对动作的数目检查,对状态向量的维度进行检查
271         IndexError
272             If :math:`0 \le action < num\_actions` then IndexError is raised.
273         ValueError
274             If the state vector has any number of dimensions other than 1 a
275             ValueError is raised.
276 
277         Example#一个例子,没看懂
278         -------
279 
280         >>> basis = OneDimensionalPolynomialBasis(2, 2)
281         >>> basis.evaluate(np.array([2]), 0)
282         array([ 1.,  2.,  4.,  0.,  0.,  0.])
283 
284         """
285         if action < 0 or action >= self.num_actions: #如果动作的序号不对,也就是不是合理的动作
286             raise IndexError('Action index out of bounds')#报错
287 
288         if state.shape != (1, ):#如果状态向量的维度不对
289             raise ValueError('This class only supports one dimensional states')#报错
290 
291         phi = np.zeros((self.size(), ))#初始化phi值
292 
293         offset = (self.size()/self.num_actions)*action #偏置:(自由度的数目+1)*动做的序号?这一步是跳转到对应的动作的那一组的序号
294 
295         value = state[0]#状态向量的第一个值
296 
297         phi[offset:offset + self.degree + 1] = \更改对应动作那一组的phi值
298             np.array([pow(value, i) for i in range(self.degree+1)])更改成一维多项式向量
299 
300         return phi#返回phi
301 
302     @property
303     def num_actions(self):#返回动作的数目
304         """Return number of possible actions."""
305         return self.__num_actions
306 
307     @num_actions.setter
308     def num_actions(self, value):
309         """Set the number of possible actions.
310 
311         Parameters
312         ----------
313         value: int
314             Number of possible actions. Must be >= 1.
315 
316         Raises
317         ------
318         ValueError
319             If value < 1.
320 
321         """
322         if value < 1:
323             raise ValueError('num_actions must be at least 1.')
324         self.__num_actions = value
325 
326 
327 class RadialBasisFunction(BasisFunction):#径向基方程
328 
329     r"""Gaussian Multidimensional Radial Basis Function (RBF).
330     #高斯多维径向基函数
331     Given a set of k means :math:`(\mu_1 , \ldots, \mu_k)` produce a feature
332     vector :math:`(1, e^{-\gamma || s - \mu_1 ||^2}, \cdots,
333     e^{-\gamma || s - \mu_k ||^2})` where `s` is the state vector and
334     :math:`\gamma` is a free parameter. This vector will be padded with
335     0's on both sides proportional to the number of possible actions
336     specified.
337     #给出一组均值,求出一组高斯径向基特征
338     Parameters#输入
339     ----------
340     means: list(numpy.array)#一组均值
341         List of numpy arrays representing :math:`(\mu_1, \ldots, \mu_k)`.
342         Each :math:`\mu` is a numpy array with dimensions matching the state
343         vector this basis function will be used with. If the dimensions of each
344         vector are not equal than an exception will be raised. If no means are
345         specified then a ValueError will be raised
346     gamma: float #超参数,大于0
347         Free parameter which controls the size/spread of the Gaussian "bumps".
348         This parameter is best selected via tuning through cross validation.
349         gamma must be > 0.
350     num_actions: int #动作数目
351         Number of actions. Must be in range [1, :math:`\infty`] otherwise
352         an exception will be raised.
353 
354     Raises #一些错误的定义
355     ------
356     ValueError
357         If means list is empty
358     ValueError
359         If dimensions of each mean vector do not match.
360     ValueError
361         If gamma is <= 0.
362     ValueError
363         If num_actions is less than 1.
364 
365     Note
366     ----
367 
368     The numpy arrays specifying the means are not copied.
369 
370     """
371 
372     def __init__(self, means, gamma, num_actions):#初始化
373         """Initialize RBF instance."""
374         self.__num_actions = BasisFunction._validate_num_actions(num_actions) #检查动作的数目是否满足
375 
376         if len(means) == 0:#检查均值向量的长度是否满足
377             raise ValueError('You must specify at least one mean')
378 
379         if reduce(RadialBasisFunction.__check_mean_size, means) is None:#各种检查
380             raise ValueError('All mean vectors must have the same dimensions')
381 
382         self.means = means
383 
384         if gamma <= 0:
385             raise ValueError('gamma must be > 0')
386 
387         self.gamma = gamma
388 
389     @staticmethod
390     def __check_mean_size(left, right):#检查mean矩阵的维度问题
391         """Apply f if the value is not None.
392 
393         This method is meant to be used with reduce. It will return either the
394         right most numpy array or None if any of the array's had
395         differing sizes. I wanted to use a Maybe monad here,
396         but Python doesn't support that out of the box.
397 
398         Return
399         ------
400         None or numpy.array
401             None values will propogate through the reduce automatically.
402 
403         """
404         if left is None or right is None:
405             return None
406         else:
407             if left.shape != right.shape:
408                 return None
409         return right
410 
411     def size(self):
412         r"""Calculate size of the :math:`\phi` matrix.
413 
414         The size is equal to the number of means + 1 times the number of
415         number actions.
416 
417         Returns
418         -------
419         int
420             The size of the phi matrix that will be returned from evaluate.
421 
422         """
423         return (len(self.means) + 1) * self.num_actions#为什么要在mean矩阵的长度上加1??,因为打头的是1!!
424 
425     def evaluate(self, state, action):#计算phi
426         r"""Calculate the :math:`\phi` matrix.
427 
428         Matrix will have the following form:
        #矩阵会有如下形式
429 430 :math:`[\cdots, 1, e^{-\gamma || s - \mu_1 ||^2}, \cdots, 431 e^{-\gamma || s - \mu_k ||^2}, \cdots]` 432 433 where the matrix will be padded with 0's on either side depending 434 on the specified action index and the number of possible actions. 435 436 Returns 437 ------- 438 numpy.array#返回phi 439 The :math:`\phi` vector. Used by Policy to compute Q-value. 440 441 Raises 442 ------ 443 IndexError#一些错误的定义 444 If :math:`0 \le action < num\_actions` then IndexError is raised. 445 ValueError 446 If the state vector has any number of dimensions other than 1 a 447 ValueError is raised. 448 449 """ 450 if action < 0 or action >= self.num_actions:#检查action的编号是否合格 451 raise IndexError('Action index out of bounds') 452 453 if state.shape != self.means[0].shape:#检查状态和均值矩阵的维度问题 454 raise ValueError('Dimensions of state must match ' 455 'dimensions of means') 456 457 phi = np.zeros((self.size(), ))#初始化 458 offset = (len(self.means[0])+1)*action#确定action要更改的位置 459 460 rbf = [RadialBasisFunction.__calc_basis_component(state, 461 mean, 462 self.gamma) 463 for mean in self.means]#构建径向基向 464 phi[offset] = 1.#开头的是1   465 phi[offset+1:offset+1+len(rbf)] = rbf #后面的就是刚才计算的径向基项 466 467 return phi 468 469 @staticmethod 470 def __calc_basis_component(state, mean, gamma):#计算径向基项的方法 471 mean_diff = state - mean#偏差 472 return np.exp(-gamma*np.sum(mean_diff*mean_diff))#径向基计算公式 473 474 @property 475 def num_actions(self):#返回动作的数目 476 """Return number of possible actions.""" 477 return self.__num_actions 478 479 @num_actions.setter 480 def num_actions(self, value): 481 """Set the number of possible actions. 482 483 Parameters 484 ---------- 485 value: int 486 Number of possible actions. Must be >= 1. 487 488 Raises 489 ------ 490 ValueError 491 If value < 1. 492 493 """ 494 if value < 1: 495 raise ValueError('num_actions must be at least 1.') 496 self.__num_actions = value 497 498 499 class ExactBasis(BasisFunction): 500 501 """Basis function with no functional approximation. 502     #计算准确的状态基,没有经过方程近似的 503 This can only be used in domains with finite, discrete state-spaces. For 504 example the Chain domain from the LSPI paper would work with this basis, 505 but the inverted pendulum domain would not. 506     #这种方法用在有有限离散状态的情况下,在比如倒立摆等问题中不适用 507 Parameters 508 ----------#参数 509 num_states: list#状态的数目,这个解释略奇怪,这个列表包含了一些整数,这些整数代表了每个状态变量可能的value的数目?什么鬼 510 A list containing integers representing the number of possible values 511 for each state variable. 512 num_actions: int#动作的数目 513 Number of possible actions. 514 """ 515 516 def __init__(self, num_states, num_actions):初始化 517 """Initialize ExactBasis.""" 518 if len(np.where(num_states <= 0)[0]) != 0:#num_state必须大于0 519 raise ValueError('num_states value\'s must be > 0') 520 521 self.__num_actions = BasisFunction._validate_num_actions(num_actions)#检查动作数目   522 self._num_states = num_states#给自身变量_num_state赋值 523 524 self._offsets = [1]#偏差量,用来存放要更新的phi的位置 525 for i in range(1, len(num_states)): 526 self._offsets.append(self._offsets[-1]*num_states[i-1])#由于可能的状态数目不一致,所以要记录每一次变化情况,后面求phi的时候可以用 527 528 def size(self): 529 r"""Return the vector size of the basis function. 530       #返回Phi的长度 531 Returns 532 ------- 533 int 534 The size of the :math:`\phi` vector. 535 (Referred to as k in the paper). 536 """#先计算可能的状态数目,用了lambda函数,再乘以动作数目 537 return reduce(lambda x, y: x*y, self._num_states, 1)*self.__num_actions 538 539 def get_state_action_index(self, state, action): 540 """Return the non-zero index of the basis. 541       #获得state-action对儿对应的位置 542 Parameters 543 ----------#输入 544 state: numpy.array#状态 545 The state to get the index for. 546 action: int#动作 547 The state to get the index for. 548 549 Returns 550 ------- 551 int 552 The non-zero index of the basis 553 554 Raises 555 ------ 556 IndexError 557 If action index < 0 or action index > num_actions 558 """ 559 if action < 0:#动作的编号的合理化检测 560 raise IndexError('action index must be >= 0') 561 if action >= self.num_actions: 562 raise IndexError('action must be < num_actions') 563 564 base = action * int(self.size() / self.__num_actions)#动作数乘以状态数 565 566 offset = 0#偏置置零 567 for i, value in enumerate(state): 568 offset += self._offsets[i] * state[i] 569 570 return base + offset#找到要更新的phi的起始位置 571 572 def evaluate(self, state, action): 573 r"""Return a :math:`\phi` vector that has a single non-zero value. 574 575 Parameters 576 ---------- 577 state: numpy.array 578 The state to get the features for. When calculating Q(s, a) this is 579 the s. 580 action: int 581 The action index to get the features for. 582 When calculating Q(s, a) this is the a. 583 584 Returns 585 ------- 586 numpy.array 587 :math:`\phi` vector 588 589 Raises 590 ------ 591 IndexError 592 If action index < 0 or action index > num_actions 593 ValueError 594 If the size of the state does not match the the size of the 595 num_states list used during construction. 596 ValueError 597 If any of the state variables are < 0 or >= the corresponding 598 value in the num_states list used during construction. 599 """ 600 if len(state) != len(self._num_states):#各种检查是否合格 601 raise ValueError('Number of state variables must match ' 602 + 'size of num_states.') 603 if len(np.where(state < 0)[0]) != 0: 604 raise ValueError('state cannot contain negative values.') 605 for state_var, num_state_values in zip(state, self._num_states): 606 if state_var >= num_state_values: 607 raise ValueError('state values must be <= corresponding ' 608 + 'num_states value.') 609 610 phi = np.zeros(self.size())#初始化phi 611 phi[self.get_state_action_index(state, action)] = 1#更新对应位置的phi值. 612 613 return phi 614 615 @property 616 def num_actions(self): 617 """Return number of possible actions.""" 618 return self.__num_actions 619 620 @num_actions.setter#动作数目的计算函数 621 def num_actions(self, value): 622 """Set the number of possible actions. 623 624 Parameters 625 ---------- 626 value: int 627 Number of possible actions. Must be >= 1. 628 629 Raises 630 ------ 631 ValueError 632 if value < 1. 633 """ 634 if value < 1: 635 raise ValueError('num_actions must be at least 1.') 636 self.__num_actions = value

 

posted on 2016-05-12 19:09  弓刀  阅读(597)  评论(0编辑  收藏  举报