qa_model
1 [code=python] 2 import os 3 import sys 4 import time 5 6 import numpy 7 8 import shelve 9 10 import theano 11 import theano.tensor as T 12 from theano.tensor.shared_randomstreams import RandomStreams 13 14 class dA(object): 15 """Denoising Auto-Encoder class (dA) 16 17 A denoising autoencoders tries to reconstruct the input from a corrupted 18 version of it by projecting it first in a latent space and reprojecting 19 it afterwards back in the input space. Please refer to Vincent et al.,2008 20 for more details. If x is the input then equation (1) computes a partially 21 destroyed version of x by means of a stochastic mapping q_D. Equation (2) 22 computes the projection of the input into the latent space. Equation (3) 23 computes the reconstruction of the input, while equation (4) computes the 24 reconstruction error. 25 26 .. math:: 27 28 \tilde{x} ~ q_D(\tilde{x}|x) (1) 29 30 y = s(W \tilde{x} + b) (2) 31 32 x = s(W' y + b') (3) 33 34 L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4) 35 36 """ 37 38 def __init__( 39 self, 40 numpy_rng, 41 theano_rng=None, 42 input=None, 43 #n_visible=784, 44 n_hidden=100, 45 W=None, 46 bhid=None, 47 #bvis=None 48 ): 49 """ 50 Initialize the dA class by specifying the number of visible units (the 51 dimension d of the input ), the number of hidden units ( the dimension 52 d' of the latent or hidden space ) and the corruption level. The 53 constructor also receives symbolic variables for the input, weights and 54 bias. Such a symbolic variables are useful when, for example the input 55 is the result of some computations, or when weights are shared between 56 the dA and an MLP layer. When dealing with SdAs this always happens, 57 the dA on layer 2 gets as input the output of the dA on layer 1, 58 and the weights of the dA are used in the second stage of training 59 to construct an MLP. 60 61 :type numpy_rng: numpy.random.RandomState 62 :param numpy_rng: number random generator used to generate weights 63 64 :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams 65 :param theano_rng: Theano random generator; if None is given one is 66 generated based on a seed drawn from `rng` 67 68 :type input: theano.tensor.TensorType 69 :param input: a symbolic description of the input or None for 70 standalone dA 71 72 :type n_hidden: int 73 :param n_hidden: number of hidden units 74 75 :type W: theano.tensor.TensorType 76 :param W: Theano variable pointing to a set of weights that should be 77 shared belong the dA and another architecture; if dA should 78 be standalone set this to None 79 80 :type bhid: theano.tensor.TensorType 81 :param bhid: Theano variable pointing to a set of biases values (for 82 hidden units) that should be shared belong dA and another 83 architecture; if dA should be standalone set this to None 84 85 86 87 """ 88 #self.n_visible = n_visible 89 self.n_hidden = n_hidden 90 91 # create a Theano random generator that gives symbolic random values 92 if not theano_rng: 93 theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) 94 95 # note : W' was written as `W_prime` and b' as `b_prime` 96 if not W: 97 # W is initialized with `initial_W` which is uniformely sampled 98 # from -4*sqrt(6./(n_visible+n_hidden)) and 99 # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if 100 # converted using asarray to dtype 101 # theano.config.floatX so that the code is runable on GPU 102 initial_W = numpy.asarray( 103 numpy_rng.uniform( 104 low=-4 * numpy.sqrt(6. / (n_hidden + n_hidden)), 105 high=4 * numpy.sqrt(6. / (n_hidden + n_hidden)), 106 size=(n_hidden, n_hidden) 107 ), 108 dtype=theano.config.floatX 109 ) 110 W=theano.shared(value=initial_W, name='W', borrow=True) 111 112 ''' 113 if not bvis: 114 bvis = theano.shared( 115 value=numpy.zeros( 116 n_visible, 117 dtype=theano.config.floatX 118 ), 119 borrow=True 120 ) 121 ''' 122 if not bhid: 123 bhid = theano.shared( 124 value=numpy.zeros( 125 n_hidden, 126 dtype=theano.config.floatX 127 ), 128 name='b', 129 borrow=True 130 ) 131 132 self.W = W 133 # b corresponds to the bias of the hidden 134 self.b = bhid 135 # b_prime corresponds to the bias of the visible 136 #self.b_prime = bvis 137 # tied weights, therefore W_prime is W transpose 138 #self.W_prime = self.W.T 139 self.theano_rng = theano_rng 140 # if no input is given, generate a variable representing the input 141 if input is None: 142 # we use a matrix because we expect a minibatch of several 143 # examples, each example being a row 144 self.x = T.dmatrix(name='input') 145 else: 146 self.x = input 147 148 self.params = [self.W, self.b] 149 # end-snippet-1 150 def get_hidden_values(self): 151 """ Computes the values of the hidden layer """ 152 return T.sum(T.nnet.sigmoid(T.dot(self.x, self.W) + self.b),axis = 0) 153 154 ''' 155 def get_corrupted_input(self, input, corruption_level): 156 """This function keeps ``1-corruption_level`` entries of the inputs the 157 same and zero-out randomly selected subset of size ``coruption_level`` 158 Note : first argument of theano.rng.binomial is the shape(size) of 159 random numbers that it should produce 160 second argument is the number of trials 161 third argument is the probability of success of any trial 162 163 this will produce an array of 0s and 1s where 1 has a 164 probability of 1 - ``corruption_level`` and 0 with 165 ``corruption_level`` 166 167 The binomial function return int64 data type by 168 default. int64 multiplicated by the input 169 type(floatX) always return float64. To keep all data 170 in floatX when floatX is float32, we set the dtype of 171 the binomial to floatX. As in our case the value of 172 the binomial is always 0 or 1, this don't change the 173 result. This is needed to allow the gpu to work 174 correctly as it only support float32 for now. 175 176 """ 177 return self.theano_rng.binomial(size=input.shape, n=1, 178 p=1 - corruption_level, 179 dtype=theano.config.floatX) * input 180 ''' 181 ''' 182 183 def get_reconstructed_input(self, hidden): 184 """Computes the reconstructed input given the values of the 185 hidden layer 186 187 """ 188 return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) 189 190 191 def get_cost_updates(self, corruption_level, learning_rate): 192 """ This function computes the cost and the updates for one trainng 193 step of the dA """ 194 195 #tilde_x = self.get_corrupted_input(self.x, corruption_level) 196 y = self.get_hidden_values(tilde_x) 197 #z = self.get_reconstructed_input(y) 198 # note : we sum over the size of a datapoint; if we are using 199 # minibatches, L will be a vector, with one entry per 200 # example in minibatch 201 L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1) 202 # note : L is now a vector, where each element is the 203 # cross-entropy cost of the reconstruction of the 204 # corresponding example of the minibatch. We need to 205 # compute the average of all these to get the cost of 206 # the minibatch 207 cost = T.mean(L) 208 209 # compute the gradients of the cost of the `dA` with respect 210 # to its parameters 211 gparams = T.grad(cost, self.params) 212 # generate the list of updates 213 updates = [ 214 (param, param - learning_rate * gparam) 215 for param, gparam in zip(self.params, gparams) 216 ] 217 218 return (cost, updates) 219 ''' 220 221 222 x = T.fmatrix('x') # question matrix 223 y = T.fmatrix('x') # answer matrix 224 index = T.lscalar() 225 rng = numpy.random.RandomState(23455) 226 theano_rng = RandomStreams(rng.randint(2 ** 30)) 227 n_hidden=2 228 learning_rate=0.1 229 da_q=[] 230 da_a=[] 231 for count in range(n_hidden): 232 da_q.append(dA( 233 numpy_rng=rng, 234 theano_rng=theano_rng, 235 input=x, 236 #n_visible=28 * 28, 237 n_hidden=100 238 )) 239 240 241 for count in range(n_hidden): 242 da_a.append(dA( 243 numpy_rng=rng, 244 theano_rng=theano_rng, 245 input=y, 246 #n_visible=28 * 28, 247 n_hidden=100 248 )) 249 cost_matrix=[] 250 for hid_index in range(n_hidden): 251 cost_matrix.append(T.sum(T.sqr(da_q[hid_index].get_hidden_values()-da_a[hid_index].get_hidden_values())/2)) 252 cost=T.sum(cost_matrix) 253 params=da_q[0].params+da_a[hid_index].params 254 for hid_index in range(1,n_hidden): 255 params+=da_q[hid_index].params+da_a[hid_index].params 256 gparams=T.grad(cost, params) 257 updates = [] 258 for param, gparam in zip(params, gparams): 259 updates.append((param, param - learning_rate * gparam)) 260 db = shelve.open(r'data\training_data\training_data_30_50_1_9_games.dat') 261 x1=db['train_set1'] 262 q,a=x1[0] 263 q1,a1=x1[1] 264 train_da = theano.function( 265 [index], 266 cost, 267 updates=updates, 268 givens={ 269 x: x1[0][0], 270 y: x1[0][1] 271 } 272 ) 273 print train_da(0) 274 [/code]