周报8

#  how good is our algo by using cross validation
def how_good_is_our_algo(dataset, algo, n_folds, *args):
folds = k_fold_cross_validation_split(dataset, n_folds)
scores = list()
for fold in folds:
train_set = list(folds)
train_set.remove(fold)
train_set = sum(train_set, [])
test_set = list()
for row in fold:
row_copy = list(row)
test_set.append(row_copy)
row_copy[-1] = None
predicted = algo(train_set, test_set, *args)
actual = [row[-1] for row in fold]
rmse = rmse_method(actual, predicted)
scores.append(rmse)
return scores;


# prediction
def predict(row, coefficients):
yhat = coefficients[0]
# print(yhat)
for i in range(len(row)-1):
yhat += coefficients[i+1]*row[i]
# print(yhat,coefficients[i+1],row[i])
return yhat


# using stochastic gradient descent method to calculate the coefficient
def sad_method_to_calculate_coefficient(train_data, learning_rate, n_epoch):
coefficient_list = [0.0 for i in range(len(train_data[0]))]
for epoch in range(n_epoch):
for row in train_data:
yhat = predict(row, coefficient_list)
error = yhat = row[-1]
coefficient_list[0] = coefficient_list[0] - learning_rate*error
for i in range(len(row)-1):
coefficient_list[i+1] = coefficient_list[i+1]-learning_rate*error*row[i]
# print(learning_rate, n_epoch, error)
return coefficient_list


# using linear regression algo
def using_sad_method_to_calculate_linear_regression(train_data, testing_data, learn_rate,n_epoch):
predictions = list()
coefficient_list = sad_method_to_calculate_coefficient(train_data, learning_rate, n_epoch)
for row in testing_data:
yhat = predict(row, coefficient_list)
predictions.append(yhat)
return predictions
# using our real wine quality data
seed(1)
wine_quality_data_name = 'white.csv'
dataset = csv_loader('../csv/'+wine_quality_data_name)
dataset = list_our_data(dataset)
for i in range(len(dataset[0])):
string_to_float_converter(dataset, i)
posted @ 2022-04-24 18:08  我的未来姓栗山  阅读(25)  评论(0编辑  收藏  举报