BP神经网络python实现
一、 激活函数
def sigmoid(Z): A = 1/(1+np.exp(-Z)) cache = Z return A, cache def relu(Z): A = np.maximum(0,Z) cache = Z return A, cache
二、 激活函数backward
""" Implement the backward propagation for a single RELU unit. """ def relu_backward(dA, cache): Z = cache dZ = np.array(dA, copy=True) # just converting dz to a correct object. # When z <= 0, you should set dz to 0 as well. dZ[Z <= 0] = 0return dZ """ Implement the backward propagation for a single SIGMOID unit. """ def sigmoid_backward(dA, cache): Z = cache s = 1/(1+np.exp(-Z)) dZ = dA * s * (1-s) return dZ
三、 网络层前馈和激活函数前馈
""" Implement the linear part of a layer's forward propagation. """ def linear_forward(A, W, b): Z = np.dot(W, A) + b assert(Z.shape == (W.shape[0], A.shape[1])) cache = (A, W, b) return Z, cache """ Implement the forward propagation for the LINEAR->ACTIVATION layer """ def linear_activation_forward(A_prev, W, b, activation): if activation == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = relu(Z) assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
四、 构建L层前馈
""" Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation """ def L_model_forward(X, parameters): caches = [] A = X L = len(parameters) // 2 # number of layers in the neural network # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list. for l in range(1, L): A_prev = A A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)],parameters['b' + str(l)], activation = "relu") caches.append(cache) # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list. AL, cache = linear_activation_forward(A, parameters['W' + str(L)], \ parameters['b' + str(L)], activation = "sigmoid") caches.append(cache) assert(AL.shape == (1,X.shape[1])) return AL, caches
五、 计算cost
def compute_cost(AL, Y): """ Implement the cost function defined by equation (7). Arguments: AL -- probability vector corresponding to your label predictions, shape (1, number of examples) Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples) Returns: cost -- cross-entropy cost """ m = Y.shape[1] # Compute loss from aL and y. ### START CODE HERE ### (≈ 1 lines of code) cost = np.average(np.power(AL-Y,2)) ### END CODE HERE ### return cost
六、 网络层反馈及激活函数反馈
""" Implement the linear portion of backward propagation for a single layer (layer l) """ def linear_backward(dZ, cache): A_prev, W, b = cache # b没用上 m = A_prev.shape[1] dW = (1 / m) * np.dot(dZ, A_prev.T) db = (1 / m) * np.sum(dZ, axis=1, keepdims=True) dA_prev = np.dot(W.T, dZ) assert (dA_prev.shape == A_prev.shape) assert (dW.shape == W.shape) assert (db.shape == b.shape) return dA_prev, dW, db
""" Implement the backward propagation for the LINEAR->ACTIVATION layer. """ def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
七、 L层网络反馈
""" Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group """ def L_model_backward(AL, Y, caches): grads = {} L = len(caches) # the number of layers # m = AL.shape[1] Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # Initializing the backpropagation dAL = AL - Y # derivative of cost with respect to AL # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] current_cache = caches[L-1] #((A W b), (Z)) grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = \ linear_activation_backward(dAL, current_cache, activation = "sigmoid") for l in reversed(range(L - 1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l+2)], current_cache, activation = "relu") grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
八、 梯度下降
""" Update parameters using gradient descent """ def update_parameters(parameters, grads, learning_rate): L = len(parameters) // 2 # number of layers in the neural network # Update rule for each parameter. Use a for loop. for l in range(L): parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l + 1)] parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l + 1)] return parameters
九、 整合成一个模型
""" Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID. """ def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):#lr was 0.009 costs = [] # keep track of cost # Parameters initialization. parameters = initialize_parameters_deep(layers_dims) # Loop (gradient descent) for i in range(0, num_iterations): # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID. AL, caches = L_model_forward(X, parameters) # Compute cost. cost = compute_cost(AL, Y) # Backward propagation. grads = L_model_backward(AL, Y, caches) # Update parameters. parameters = update_parameters(parameters, grads, learning_rate) # Print the cost every 100 training example if print_cost and i % 100 == 0: print ("Cost after iteration %i: %f" %(i, cost)) if print_cost and i % 100 == 0: costs.append(cost) return parameters