import argparse # to read inputs from command lineimport csv # to read the input data set fileimport numpy as np # to work with the data set
初始化部分
# initialise argument parser and read arguments from command line with the respective flags and then call the main() functionif __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data", help="Data File")
parser.add_argument("-l", "--learningRate", help="Learning Rate")
parser.add_argument("-t", "--threshold", help="Threshold")
main()
main函数部分
defmain():
args = parser.parse_args()
file, learningRate, threshold = args.data, float(
args.learningRate), float(args.threshold) # save respective command line inputs into variables# read csv file and the last column is the target output and is separated from the input (X) as Ywithopen(file) as csvFile:
reader = csv.reader(csvFile, delimiter=',')
X = []
Y = []
for row in reader:
X.append([1.0] + row[:-1])
Y.append([row[-1]])
# Convert data points into float and initialise weight vector with 0s.
n = len(X)
X = np.array(X).astype(float)
Y = np.array(Y).astype(float)
W = np.zeros(X.shape[1]).astype(float)
# this matrix is transposed to match the necessary matrix dimensions for calculating dot product
W = W.reshape(X.shape[1], 1).round(4)
# Calculate the predicted output value
f_x = calculatePredicatedValue(X, W)
# Calculate the initial SSE
sse_old = calculateSSE(Y, f_x)
outputFile = 'solution_' + \
'learningRate_' + str(learningRate) + '_threshold_' \
+ str(threshold) + '.csv''''
Output file is opened in writing mode and the data is written in the format mentioned in the post. After the
first values are written, the gradient and updated weights are calculated using the calculateGradient function.
An iteration variable is maintained to keep track on the number of times the batch linear regression is executed
before it falls below the threshold value. In the infinite while loop, the predicted output value is calculated
again and new SSE value is calculated. If the absolute difference between the older(SSE from previous iteration)
and newer(SSE from current iteration) SSE is greater than the threshold value, then above process is repeated.
The iteration is incremented by 1 and the current SSE is stored into previous SSE. If the absolute difference
between the older(SSE from previous iteration) and newer(SSE from current iteration) SSE falls below the
threshold value, the loop breaks and the last output values are written to the file.
'''withopen(outputFile, 'w', newline='') as csvFile:
writer = csv.writer(csvFile, delimiter=',', quoting=csv.QUOTE_NONE, escapechar='')
writer.writerow([*[0], *["{0:.4f}".format(val) for val in W.T[0]], *["{0:.4f}".format(sse_old)]])
gradient, W = calculateGradient(W, X, Y, f_x, learningRate)
iteration = 1whileTrue:
f_x = calculatePredicatedValue(X, W)
sse_new = calculateSSE(Y, f_x)
ifabs(sse_new - sse_old) > threshold:
writer.writerow([*[iteration], *["{0:.4f}".format(val) for val in W.T[0]], *["{0:.4f}".format(sse_new)]])
gradient, W = calculateGradient(W, X, Y, f_x, learningRate)
iteration += 1
sse_old = sse_new
else:
break
writer.writerow([*[iteration], *["{0:.4f}".format(val) for val in W.T[0]], *["{0:.4f}".format(sse_new)]])
print("Output File Name: " + outputFile
# dot product of X(input) and W(weights) as numpy matrices and returning the result which is the predicted outputdefcalculatePredicatedValue(X, W):
f_x = np.dot(X, W)
return f_x
calculateGradient函数
使用文章中提到的第一个公式计算梯度并更新权重。
defcalculateGradient(W, X, Y, f_x, learningRate):
gradient = (Y - f_x) * X
gradient = np.sum(gradient, axis=0)
temp = np.array(learningRate * gradient).reshape(W.shape)
W = W + temp
return gradient, W
import argparse
import csv
import numpy as np
defmain():
args = parser.parse_args()
file, learningRate, threshold = args.data, float(
args.learningRate), float(args.threshold) # save respective command line inputs into variables# read csv file and the last column is the target output and is separated from the input (X) as Ywithopen(file) as csvFile:
reader = csv.reader(csvFile, delimiter=',')
X = []
Y = []
for row in reader:
X.append([1.0] + row[:-1])
Y.append([row[-1]])
# Convert data points into float and initialise weight vector with 0s.
n = len(X)
X = np.array(X).astype(float)
Y = np.array(Y).astype(float)
W = np.zeros(X.shape[1]).astype(float)
# this matrix is transposed to match the necessary matrix dimensions for calculating dot product
W = W.reshape(X.shape[1], 1).round(4)
# Calculate the predicted output value
f_x = calculatePredicatedValue(X, W)
# Calculate the initial SSE
sse_old = calculateSSE(Y, f_x)
outputFile = 'solution_' + \
'learningRate_' + str(learningRate) + '_threshold_' \
+ str(threshold) + '.csv''''
Output file is opened in writing mode and the data is written in the format mentioned in the post. After the
first values are written, the gradient and updated weights are calculated using the calculateGradient function.
An iteration variable is maintained to keep track on the number of times the batch linear regression is executed
before it falls below the threshold value. In the infinite while loop, the predicted output value is calculated
again and new SSE value is calculated. If the absolute difference between the older(SSE from previous iteration)
and newer(SSE from current iteration) SSE is greater than the threshold value, then above process is repeated.
The iteration is incremented by 1 and the current SSE is stored into previous SSE. If the absolute difference
between the older(SSE from previous iteration) and newer(SSE from current iteration) SSE falls below the
threshold value, the loop breaks and the last output values are written to the file.
'''withopen(outputFile, 'w', newline='') as csvFile:
writer = csv.writer(csvFile, delimiter=',', quoting=csv.QUOTE_NONE, escapechar='')
writer.writerow([*[0], *["{0:.4f}".format(val) for val in W.T[0]], *["{0:.4f}".format(sse_old)]])
gradient, W = calculateGradient(W, X, Y, f_x, learningRate)
iteration = 1whileTrue:
f_x = calculatePredicatedValue(X, W)
sse_new = calculateSSE(Y, f_x)
ifabs(sse_new - sse_old) > threshold:
writer.writerow([*[iteration], *["{0:.4f}".format(val) for val in W.T[0]], *["{0:.4f}".format(sse_new)]])
gradient, W = calculateGradient(W, X, Y, f_x, learningRate)
iteration += 1
sse_old = sse_new
else:
break
writer.writerow([*[iteration], *["{0:.4f}".format(val) for val in W.T[0]], *["{0:.4f}".format(sse_new)]])
print("Output File Name: " + outputFile
defcalculateGradient(W, X, Y, f_x, learningRate):
gradient = (Y - f_x) * X
gradient = np.sum(gradient, axis=0)
# gradient = np.array([float("{0:.4f}".format(val)) for val in gradient])
temp = np.array(learningRate * gradient).reshape(W.shape)
W = W + temp
return gradient, W
defcalculateSSE(Y, f_x):
sse = np.sum(np.square(f_x - Y))
return sse
defcalculatePredicatedValue(X, W):
f_x = np.dot(X, W)
return f_x
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data", help="Data File")
parser.add_argument("-l", "--learningRate", help="Learning Rate")
parser.add_argument("-t", "--threshold", help="Threshold")
main()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)