Python|梯度下降法

全量梯度下降

import numpy as np
  
# 创建数据集X,y
np.random.seed(1)
X = np.random.rand(100, 1)
y = 4 + 3*X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
  
# 创建超参数
n_iterations = 10000
t0, t1 = 5, 500
  
# 定义一个函数来动态调整学习率
def learning_rate_schedule(t):
    return t0/(t+t1)
  
# 1,初始化θ
theta = np.random.randn(2, 1)
# 4,判断是否收敛,一般不会去设定阈值,而是直接采用设置相对大的迭代次数保证可以收敛
for i in range(n_iterations):
    # 2,求梯度,计算gradient
    gradients = X_b.T.dot(X_b.dot(theta)-y)
    # 3,应用梯度下降法的公式去调整θ值 θt+1=θt-η*gradient
    learning_rate = learning_rate_schedule(i)
    theta = theta - learning_rate * gradients
print(theta)

随机梯度下降

import numpy as np
  
# 创建数据集
X = 2*np.random.rand(100, 1)
y = 4 + 3*X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
# 创建超参数
n_epochs = 10000
m = 100
t0, t1 = 5, 500
  
# 定义一个函数来调整学习率
def learning_rate_schedule(t):
    return t0/(t+t1)
  
theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
    # 在双层for循环之间,每个轮次开始分批次迭代之前打乱数据索引顺序
    arr = np.arange(len(X_b))
    np.random.shuffle(arr)
    X_b = X_b[arr]
    y = y[arr]
    for i in range(m):
        xi = X_b[i:i+1]
        yi = y[i:i+1]
        gradients = xi.T.dot(xi.dot(theta)-yi)
        learning_rate = learning_rate_schedule(epoch*m + i)
        theta = theta - learning_rate * gradients
  
print(theta)

小批量梯度下降

import numpy as np
  
# 创建数据集X,y
X = 2*np.random.rand(100, 1)
y = 4 + 3*X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
  
# 创建超参数
t0, t1 = 5, 500
  
# 定义一个函数来动态调整学习率
def learning_rate_schedule(t):
    return t0/(t+t1)
  
n_epochs = 100000
m = 100
batch_size = 10
num_batches = int(m / batch_size)
  
theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
    arr = np.arange(len(X_b))
    np.random.shuffle(arr)
    X_b = X_b[arr]
    y = y[arr]
    for i in range(num_batches):
        x_batch = X_b[i*batch_size: i*batch_size + batch_size]
        y_batch = y[i*batch_size: i*batch_size + batch_size]
        gradients = x_batch.T.dot(x_batch.dot(theta)-y_batch)
        learning_rate = learning_rate_schedule(epoch * m + i)
        theta = theta - learning_rate*gradients
  
print(theta)
 
posted @ 2024-04-05 16:57  华小电  阅读(17)  评论(0编辑  收藏  举报