深度学习笔记-Theano(二)
Theano 学习笔记
一 基本架构
三种用法:激励函数、多输出|输入 、function的名字
定义shared,提取使用,临时使用
激活函数:sigmoid()、softmax()、softplus()、relu()、tanh().....
分类问题、
回归问题、
非线性函数......
二 搭建自己的神经网络
2.1 定义Layer类
2.2 Regression回归例子
2.2.1 构造数据
x_data = np.linspace(-1, 1, 300)[:, np.newaxis];
noise = np.random.normal(0, 0.05, x_data.shape)
y_data = np.square(x_data) - 0.5 + noise
2.2.2 搭建神经网络
2.2.2.1 定义神经网络的输入与目标,定义输入类型 T.dmatrix
2.2.2.2 增加层
2.2.2.3 定义损失函数 cost 采用平方均值误差 T.mean(T.square())
2.2.2.4 计算权值和偏置值的梯度 T.grad(cost, [l1.W, l1.b, l2.W, l2.b])
2.2.2.5 利用梯度下降算法训练神经网络train=theano.function([],cost,updates=)
2.2.2.5.1 先定义学习率 和 训练函数
2.2.2.5.2 训练函数包括:输入|输出|更新网络的权值和偏置值
2.2.2.6 定义 预测函数 predict = theano.function([x], l2.outputs)
2.2.3 训练
2.3 可视化结果 回归例子
2.5 什么是过拟合
防止过拟合的方法:增加数据集;正则化L1、 L2、 L3、L4....; dropout
2.7 Save 保存|提取
python代码如下:
######################### Theano-lesson 0 Test 结束 ############################### #import numpy as np #import time #import theano #A = np.random.rand(1000,10000).astype(theano.config.floatX) #B = np.random.rand(10000,1000).astype(theano.config.floatX) #np_start = time.time() #AB = A.dot(B) #np_end = time.time() #X,Y = theano.tensor.matrices('XY') #mf = theano.function([X,Y],X.dot(Y)) #t_start = time.time() #tAB = mf(A,B) #t_end = time.time() #print("NP time: %f[s], theano time: %f[s] (times should be close when run on CPU!)" %( # np_end-np_start, t_end-t_start)) #print("Result difference: %f" % (np.abs(AB-tAB).max(), ) ) ######################### Theano-lesson 0 Test 结束 ############################### ######################### Theano-lesson 1 basic 开始 ############################### #import numpy as np #import theano.tensor as T #from theano import function #from theano import pp # #x = T.dscalar('x') #y = T.dscalar('y') #z = x + y # #f = function([x, y], z) #print(f(2, 3)) ## 5.0 # #print(pp(z)) ##(x + y) # #X = T.dmatrix('X') #Y = T.dmatrix('Y') #Z = X + Y #F = function([X, Y], Z) # #print(F(np.arange(12).reshape((3, 4)), # 10*np.ones((3, 4)) # ) # ) ######################### Theano-lesson 1 basic 结束 ############################### ######################### Theano-lesson 2 function_激励函数 开始 ############################ #import numpy as np #import theano #import theano.tensor as T # #x = T.dmatrix('x') #s = 1/(1 + T.exp(-x)) # #logistic = theano.function([x], s) #print(logistic([[2, 3], [3, 4]])) ######################### Theano-lesson 2 function_激励函数 结束 ############################ ######################### Theano-lesson 2 function_多输入|多输出 开始 ############################ #import theano #import theano.tensor as T #import numpy as np # #a, b = T.dmatrices('ab') #diff = a - b #abs_diff = abs(diff) #diff_squared = diff**2 # #f = theano.function([a, b], [diff, abs_diff, diff_squared]) # #x1, x2, x3 = f(np.ones((4, 4)), #np.arange(16).reshape((4, 4))) # #print(x1, x2, x3) ######################### Theano-lesson 2 function_多输入|多输出 结束 ############################ ######################### Theano-lesson 2 function_名字 开始 ############################ #import theano #import numpy as np #import theano.tensor as T # #x, y, w = T.scalars('xyw') #z = (x + y) * w # #f = theano.function([x, #theano.In(y, value = 1), #theano.In(w, value = 2)], #z) # #print(f(3)) #print(f(3, 6)) # #f = theano.function([x, #theano.In(y, value = 1), #theano.In(w, value = 2, name = 'weight')], #z) # #print(f(3, weight = 4)) ######################### Theano-lesson 2 function_名字 结束 ############################ ######################### Theano-lesson 3 Shared 开始 ############################ #import numpy as np #import theano.tensor as T #import theano # #state = theano.shared(np.array(0, dtype=np.float64,), 'state',) #inc = T.scalar('inc', dtype=state.dtype,) # #accumulator = theano.function([inc], state, updates=[(state, state+inc)]) #print(state.get_value()) # 0.0 #accumulator(1) #print(state.get_value()) # 1.0 #accumulator(10) #print(state.get_value()) # 10.0 # #state.set_value(-1) #accumulator(3) #print(state.get_value()) # 2.0 # #tmp_func = state * 2 + inc #a = T.scalar(dtype = state.dtype) #skip_shared = theano.function([inc, a], tmp_func, givens=[(state, a)], name='skip_shared') #print(skip_shared(2, 3)) # 8.0 #print(state.get_value()) # 2.0 ######################### Theano-lesson 3 Shared 结束 ############################ ######################### Theano-lesson 4 激励函数 开始 ############################ """ The available activation functions in theano can be found in this link: http://deeplearning.net/software/theano/library/tensor/nnet/nnet.html The activation functions include but not limited to softplus, sigmoid, relu, softmax, elu, tanh... For the hidden layer, we could use relu, tanh, softplus... For classification problems, we could use sigmoid or softmax for the output layer. For regression problems, we could use a linear function for the output layer. """ ######################### Theano-lesson 4 激励函数 结束 ############################ ######################### Theano-lesson 5 定义Layer类 开始 ############################ #import numpy as np #import theano #import theano.tensor as T # #class Layer(object): # def __init__(self, inputs, in_size, out_size, activation_function = None): # self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size))) # self.b = theano.shared(np.zeros((out_size,)) + 0.1) # self.Wx_plus_b = T.dot(imputs, self.W) + self.b # self.activation_function = activation_function # if activation_function is None: # self.outputs = self.Wx_plus_b # else: # self.outputs = self.activation_function(self.Wx_plus_b) ######################### Theano-lesson 5 定义Layer类 结束 ############################ ######################### Theano-lesson 6 Regression 例子 开始 ############################ #import numpy as np #import theano #import theano.tensor as T #import matplotlib.pyplot as plt # #class Layer(object): # def __init__(self, inputs, in_size, out_size, activation_function = None): # self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size))) # self.b = theano.shared(np.zeros((out_size,)) + 0.1) # self.Wx_plus_b = T.dot(inputs, self.W) + self.b # self.activation_function = activation_function # if activation_function is None: # self.outputs = self.Wx_plus_b # else: # self.outputs = self.activation_function(self.Wx_plus_b) # ############### 构造数据 ############### #x_data = np.linspace(-1, 1, 300)[:, np.newaxis] #noise = np.random.normal(0, 0.05, x_data.shape) #y_data = np.square(x_data) - 0.5 + noise ## 绘图 #plt.scatter(x_data, y_data) #plt.show() ##plt.show(block = False) ################ 搭建网络 ############# ## 定义数据类型 #x = T.dmatrix('x') #y = T.dmatrix('y') ## 增加层 #l1 = Layer(x, 1, 10, activation_function=T.nnet.relu) #l2 = Layer(l1.outputs, 10 , 1, activation_function = None) ## 定义代价函数 #cost = T.mean(T.square(l2.outputs - y)) ## 计算权值和偏置 #gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b]) ## 定义训练函数 #learning_rate = 0.05 #train = theano.function( #inputs = [x, y], #outputs = cost, #updates = [ #(l1.W, l1.W - learning_rate * gW1), #(l1.b, l1.b - learning_rate * gb1), #(l2.W, l2.W - learning_rate * gW2), #(l2.b, l2.b - learning_rate * gb2), #] #) ## 定义预测函数 #predict = theano.function([x], l2.outputs) ##################### 开始训练 ################### #for i in range(500): # err = train(x_data, y_data) # if i % 100 == 0: # print(err) ######################### Theano-lesson 6 Regression 例子 结束 ############################ ######################### Theano-lesson 7 可视化结果 Regression 例子 开始 ############################ # #import numpy as np #import theano #import theano.tensor as T #import matplotlib.pyplot as plt # #class Layer(object): # def __init__(self, inputs, in_size, out_size, activation_function = None): # self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size))) # self.b = theano.shared(np.zeros((out_size,)) + 0.1) # self.Wx_plus_b = T.dot(inputs, self.W) + self.b # self.activation_function = activation_function # if activation_function is None: # self.outputs = self.Wx_plus_b # else: # self.outputs = self.activation_function(self.Wx_plus_b) # ############### 构造数据 ############### #x_data = np.linspace(-1, 1, 300)[:, np.newaxis] #noise = np.random.normal(0, 0.05, x_data.shape) #y_data = np.square(x_data) - 0.5 + noise ## 绘图 #fig = plt.figure() #ax = fig.add_subplot(1,1,1) #ax.scatter(x_data, y_data) #plt.ion() #plt.show() ##plt.show(block = False) ################ 搭建网络 ############# ## 定义数据类型 #x = T.dmatrix('x') #y = T.dmatrix('y') ## 增加层 #l1 = Layer(x, 1, 10, activation_function=T.nnet.relu) #l2 = Layer(l1.outputs, 10 , 1, activation_function = None) ## 定义代价函数 #cost = T.mean(T.square(l2.outputs - y)) ## 计算权值和偏置 #gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b]) ## 定义训练函数 #learning_rate = 0.05 #train = theano.function( #inputs = [x, y], #outputs = cost, #updates = [ #(l1.W, l1.W - learning_rate * gW1), #(l1.b, l1.b - learning_rate * gb1), #(l2.W, l2.W - learning_rate * gW2), #(l2.b, l2.b - learning_rate * gb2), #] #) ## 定义预测函数 #predict = theano.function([x], l2.outputs) ##################### 开始训练 ################### #for i in range(500): # err = train(x_data, y_data) # if i % 100 == 0: # try: # ax.lines.remove(lines[0]) # except Exception: # pass # prediction_value = predict(x_data) # lines = ax.plot(x_data, prediction_value, 'r-', lw=2) # plt.pause(.5) ######################### Theano-lesson 7 可视化结果 Regression 例子 结束 ############################ ######################### Theano-lesson 8 Classification 例子 开始 ############################ #import numpy as np #import theano #import theano.tensor as T # ## 定义一个功能,用来计算分类问题的准确率(Precision) #def compute_accuracy(y_target, y_predict): # correct_predict = np.equal(y_predict, y_target) # accuracy = np.sum(correct_predict)/len(correct_predict) # return accuracy # ############# 随机产生数据集, 400个样本, 784个特征, 类别标签两类 ########### #rng = np.random # #N = 400 # training 数据个数 #feats = 784 # input 的 feature 数 # ## 生成随机数:D = (input_value, target_class) #D = (rng.randn(N, feats), rng.randint(low=0, high=2, size=N)) # target_calss = [0, 1, 1, 1, 0....] # ############### 1 建立模型 ################## ## 定义x 和 y, 相当于tensorflow 里的placeholder #x = T.dmatrix('x') #y = T.dvector('y') ## 初始化 weights 和 bias #W = theano.shared(rng.randn(feats), name='W') #b = theano.shared(0.1, name='b') ## 定义激活函数,交叉熵 #p_1 = T.nnet.sigmoid(T.dot(x, W) + b) #prediction = p_1 > 0.5 #xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # 交叉熵 ## xent 也可以使用下面这个达到一样的效果 ## xent = T.nnet.binary_crossentropy(p_1, y) # #cost = T.mean(xent) + 0.01 * (W ** 2).sum() # l2 正则化 #gW, gb = T.grad(cost, [W, b]) # ############### 2 激活模型 #################### ## 学习率, train() 和 predict() #learning_rate = 0.1 #train = theano.function( # inputs=[x, y], # outputs=[prediction, T.mean(xent)], # updates=((W, W - learning_rate * gW), (b, b - learning_rate * gb))) #predict = theano.function(inputs=[x], outputs=prediction) # ############### 3 训练模型 #################### ## Training #for i in range(500): # pred, err = train(D[0], D[1]) # if i % 50 == 0: # print('cost:', err) # print("accuracy:", compute_accuracy(D[1], predict(D[0]))) ## 最后打印出预测值和实际值进行比较 #print("target values for D:") #print(D[1]) #print("prediction on D:") #print(predict(D[0])) ######################### Theano-lesson 8 Classification 例子 结束 ############################ ######################### Theano-lesson 9 正则化 开始 ############################ #import theano #from sklearn.datasets import load_boston # 房价数据 #import theano.tensor as T #import numpy as np #import matplotlib.pyplot as plt # ## 定义 Layer 类 #class Layer(object): # def __init__(self, inputs, in_size, out_size, activation_function=None): # self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size))) # self.b = theano.shared(np.zeros((out_size,)) + 0.1) # self.Wx_pus_b = T.dot(inputs, self.W) + self.b # self.activation_function = activation_function # if activation_function is None: # self.outputs = self.Wx_pus_b # else: # self.outputs = self.activation_function(self.Wx_pus_b) # #def minmax_normalization(data): # xs_max = np.max(data, axis=0) # xs_min = np.min(data, axis=0) # xs = (1 - 0) * (data - xs_min) / (xs_max - xs_min) + 0 # return xs # ############### 导入模块,创建数据 ###################### #np.random.seed(100) #x_data = load_boston().data ## minmax normalization, rescale the inputs #x_data = minmax_normalization(x_data) #y_data = load_boston().target[:, np.newaxis] # ## cross validation, train test data split #x_train, y_train = x_data[:400], y_data[:400] #x_test, y_test = x_data[400:], y_data[400:] # #x = T.dmatrix("x") #y = T.dmatrix("y") # ############### 建立模型 ###################### #l1 = Layer(x, 13, 50, T.tanh) #l2 = Layer(l1.outputs, 50, 1, None) # ## the way to compute cost #cost = T.mean(T.square(l2.outputs - y)) # without regularization ## cost = T.mean(T.square(l2.outputs - y)) + 0.1 * ((l1.W ** 2).sum() + (l2.W ** 2).sum()) # with l2 regularization ## cost = T.mean(T.square(l2.outputs - y)) + 0.1 * (abs(l1.W).sum() + abs(l2.W).sum()) # with l1 regularization # #gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b]) # ################# 激活模型 #################### #learning_rate = 0.01 #train = theano.function( # inputs=[x, y], # updates=[(l1.W, l1.W - learning_rate * gW1), # (l1.b, l1.b - learning_rate * gb1), # (l2.W, l2.W - learning_rate * gW2), # (l2.b, l2.b - learning_rate * gb2)]) # #compute_cost = theano.function(inputs=[x, y], outputs=cost) # ################# 训练模型 ####################### ## record cost #train_err_list = [] #test_err_list = [] #learning_time = [] # #for i in range(1000): # train(x_train, y_train) # if i % 10 == 0: # # record cost # train_err_list.append(compute_cost(x_train, y_train)) # test_err_list.append(compute_cost(x_test, y_test)) # learning_time.append(i) # ###################### 可视化结果 ############################# ## plot cost history #plt.plot(learning_time, train_err_list, 'r-') #plt.plot(learning_time, test_err_list, 'b--') #plt.show() ######################### Theano-lesson 9 正则化 结束 ############################ ################ Theano-lesson 10 save_reload 开始 ################### import pickle # save model with open('save/model.pickle', 'wb') as file: model = [W.get_value, b.get_value] pickle.dumps(model, file) print(model[0][ : 10]) print("accuracy:", compute_accuracy(D[1], predict(D[0]))) # reload model with open('save/model.pickle', 'rb') as file; model = pickle.loads(file) W.set_value(model[0]) b.get_value(model[1]) print(W.get_value()[:10]) print("accuracy:", compute_accuracy(D[1], predict(D[0]))) ################ Theano-lesson 10 save_reload 结束 ##################