吴恩达-神经网络-week1-hw3

Ref：https://blog.csdn.net/u013733326/article/details/79702148
点击查看代码

from testCases import *
from planar_utils import plot_decision_boundary, \
    sigmoid, load_planar_dataset, load_extra_datasets

import sklearn
import sklearn.datasets
import sklearn.linear_model
import numpy as np
import matplotlib.pyplot as plt
import traceback

np.random.seed(1)
X, Y = load_planar_dataset()

plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral)
# plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral)

# --------------------dimension------------------------------------------
shape_X = X.shape
shape_Y = Y.shape
m = shape_Y[1]  # the number of training set
print("X的维度: " + str(shape_X))
print("Y的维度: " + str(shape_Y))
print("数据集的数据个数： " + str(m) + "个")

# ------------------Logistics Reg----------------------------------------
print('======================Logistics Regression===============================')

LR_model = sklearn.linear_model.LogisticRegressionCV()
LR_model.fit(X.T, Y.T)

plt.figure()
plot_decision_boundary(lambda x: LR_model.predict(x), X, Y)
plt.title("Logistic Regression")

LR_predictions = LR_model.predict(X.T)
accur = (np.dot(Y, LR_predictions) + np.dot(1 - Y, 1 - LR_predictions)) / float(Y.size) * 100
print("逻辑回归准确性：%d" % float(accur)
      + '% ' + '(正确标记的数据点所占的百分比)')  # 47%


# -------------------NN----------------------------------------------------
def layers_size(X, Y, h_layers=4):
    """
    :param X: Input features
    :param Y: Label
    :param h_layers: # ( the hidden layers )
    :return: the number of input layer, the hidden of input layer, the number of output layer
    """
    n_x = X.shape[0]  # input
    n_h = h_layers  # hidden layer
    n_y = Y.shape[0]  # output
    return (n_x, n_h, n_y)


def initial_parameter(n_x, n_h, n_y):
    """
    W[i] 的维度是 （n[i], n[i-1])
    b[i] 的维度是  (n[i], 1)
    :param n_x: the number of input layer
    :param n_h: the number of hidden layer
    :param n_y: the number of output layer
    :return: 初始化的参数
    """
    np.random.seed(2)
    W1 = np.random.randn(n_h, n_x) * 0.02
    b1 = np.zeros(shape=(n_h, 1))

    W2 = np.random.randn(n_y, n_h) * 0.02
    b2 = np.zeros(shape=(n_y, 1))

    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))

    paras = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2,
    }
    return paras


def forward_propagation(X, paras):
    """
    two layers nn model, 计算前向传播， tanh 和 SIGMOID 激活函数
    :param X:
    :param paras:
    :return:
    """
    W1 = paras['W1']
    b1 = paras['b1']
    W2 = paras['W2']
    b2 = paras['b2']

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)

    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    assert (A2.shape == (1, X.shape[1]))

    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2,
    }

    return (A2, cache)


def compute_cost(A2, Y):
    """
    this function compute the cost,
    :param A2: the activation of second layer
    :param Y: label: 0(not cat); 1(cat)
    :return: the cost of 2 layers nn model
    """
    try:
        m = Y.shape[1]
        log_prob = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
        cost = - np.sum(log_prob) / m
        cost = float(np.squeeze(cost))

        assert (isinstance(cost, float))
        return cost
    except:
        print(traceback.print_exc())


def back_propagation(paras, cache, X, Y):
    """
    the backward propagation of two layers nn model
    :param paras: the parameters of nn model
    :param cache: the activations of nn model
    :param X: input features
    :param Y: label
    :return: gradient: dW2, db2, dW1, db1
    """
    m = X.shape[1]
    W1 = paras['W1']
    W2 = paras['W2']

    A1 = cache['A1']
    A2 = cache['A2']

    dZ2 = A2 - Y
    dw2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dw1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    grads = {
        'dW1': dw1,
        'db1': db1,
        'dW2': dw2,
        'db2': db2
    }

    return grads


def update_params(params, grads, alpha=0.01):
    """
    update the parameters of nn model according to the gradient of backward propagation
    :param params: the parameters of the last iteration in the path of backward propagation
    :param grads: the gradient of backward propagation
    :param alpha: learning rate, default is 0.01
    :return: the updated parameters
    """
    W1, W2 = params['W1'], params['W2']
    b1, b2 = params['b1'], params['b2']

    dw1, dw2 = grads['dW1'], grads['dW2']
    db1, db2 = grads['db1'], grads['db2']

    W1 = W1 - alpha * dw1
    W2 = W2 - alpha * dw2
    b1 = b1 - alpha * db1
    b2 = b2 - alpha * db2

    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2
    }

    return parameters


def nn_model(X, Y, n_h=4, num_iterations=10000, print_cost=False):
    """
    two layers of nn model,
    :param X: input features
    :param Y: output, the label
    :param n_h: the number of the hidden layer
    :param iterations: the number of the iterations
    :param print_cost: default is False
    :return: the parameters of the nn model which have been trained
    """
    np.random.seed(3)
    n_x = layers_size(X, Y)[0]
    n_y = layers_size(X, Y)[2]

    params = initial_parameter(n_x, n_h, n_y)

    for i in range(num_iterations):
        A2, cache = forward_propagation(X, params)
        cost = compute_cost(A2, Y)
        grads = back_propagation(params, cache, X, Y)
        params = update_params(params, grads, alpha=0.5)

        if i % 1000 == 0 and print_cost:
            print(f'第{i}次循环成本为：' + str(cost))

    return params


def predict_y(X, params):
    """
    :param X: input features
    :param params:
    :return: 0 or 1
    """
    A2, cache = forward_propagation(X, params)
    predict_y = np.round(A2)
    return predict_y


print('======================Two layers nn model===============================')
parameters = nn_model(X, Y, n_h=4, num_iterations=10000, print_cost=True)

prediction_y = predict_y(X, parameters)
accu = float((np.dot(Y, prediction_y.T) + np.dot(1 - Y, 1 - prediction_y.T)) / float(Y.size) * 100)
# print('准确率: %.1f' % accu + "%")
print(f'Hidden layer: 的准确率为{accu}')

# figure
plt.figure()
plot_decision_boundary(lambda x: predict_y(x.T, parameters), X, Y)
plt.title('Decision Boundary for hidden layer size ' + str(4))

# Multi hidden layers
plt.figure(figsize=(16, 32))
hidde_layers = [1, 2, 4, 6, 10, 20, 50]

for i, n_h in enumerate(hidde_layers):
    params = nn_model(X, Y, n_h, num_iterations=2000, print_cost=False)
    prediction_y = predict_y(X, params)
    plt.subplot(5, 2, i + 1)
    plot_decision_boundary(lambda x: predict_y(x.T, parameters), X, Y)
    plt.title(f'Decision Boundary for hidden layer size {n_h}')
    accu = float((np.dot(Y, prediction_y.T) + np.dot(1 - Y, 1 - prediction_y.T)) / float(Y.size) * 100)
    print(f'Hidden layer: {n_h}的准确率为{accu}')

plt.savefig('hidden layer.png')

if __name__ == "__main__":
    pass
posted on 2021-09-26 15:25 RankFan 阅读(24) 评论(0) 编辑收藏举报
会员力量，点亮园子希望
刷新页面返回顶部
RankFan

吴恩达-神经网络-week1-hw3

导航

公告