初识机器学习及机器学习线性拟合的实现
从最小二乘法到机器学习
1,什么是机器学习?
机器学习有下⾯⼏种定义:
机器学习是⼀⻔⼈⼯智能的科学,该领域的主要研究对象是⼈⼯智能,特别是如何在经验学习
中改善具体算法的性能。
机器学习是对能通过经验⾃动改进的计算机算法的研究。
机器学习是⽤数据或以往的经验,以此优化计算机程序的性能标准。
2,最小二乘法
对与成线性的散点数据模型,通过最小二乘法的计算可以得出数据的拟合模型。(数学概论此处不赘述)
但是最小二乘法不可避免的缺点于平均数的易受极端值影响导致拟合模型容易出现过拟合。
注:算法和模型的区别
算法和模型是两个不同的概念,比如在这里的线性拟合中,使用的最小二乘法是算法,而模型是最后的到的关系式,例如y = 2x+4。
3,机器学习在数据拟合中的应用
(1) 最小二乘法适用场景通常是拟合数据呈线性时,但是对于非线性数据最小二乘法拟合出的直线通常就不适用于最小二乘法。对此,我们可以用多项式拟合去解决对数据的拟合问题。
(2) 抛开传统的多项式矩阵计算以外,另一种方法同样适用于非线性曲线拟合,也就是机器学习。
(3) 对于多项式y = a1x^n + a2x^(n-1) + a3x^(n-2) + ...... +a(m)x^n , 我们可以通过对系数进行微量的修改,从而使多项式趋近于真实值。
以下是对多元最高项为五次的多项式拟合的代码实现:
include<iostream> #include<vector> #include <chrono> #include <thread> #include <iomanip> using namespace std; class Coefficient { public: double a; double b; double c; double d; double e; double f; }; double f_x(double x,Coefficient &data) { double y = (data.a*x*x*x*x*x)+(data.b*x*x*x*x)+(data.c*x*x*x)+(data.d*x*x)+(data.e*x)+data.f; return y; } double Compute(vector<double> &x,vector<double> &y,Coefficient &data) { double y_pred_h = 0; for (int i = 0; i < 100; i++) { y_pred_h += (y[i] - f_x(x[i], data)) * (y[i] - f_x(x[i], data)); } return y_pred_h; } double feedback(int gradient) { // 梯度反馈 return 0.1 / pow(10, gradient); } void Com_e(vector<double>& x, vector<double>& y, Coefficient& data) { // 5次项计算 int gradient = 0; // 计算梯度,梯度为0时无决策 double result_order = 0; // 存储上一次计算结果 double result_now = 0; // 存储当前计算结果 int calibration = 0; //校准参数,默认为0 int aa = 0,kk = 0; // 默认最高精度为小数点后6位 while (1) { result_now = Compute(x, y, data); if (calibration == 0) { if (abs((int)(result_now * 1000000) - (int)(result_order * 1000000)) < 0.00000000001) { cout << setprecision(15) << "当前e值:" << data.e << endl; cout << setprecision(15) << "当前result_order的值:" <<result_order<<",当前result_now的值:" << result_now << endl; break; } } if (result_now - result_order > 0 && result_order != 0 ) { data.e += feedback(gradient); // 回溯 cout << "第"<< aa << "精度调正" << endl; this_thread::sleep_for(chrono::milliseconds(300)); gradient += 1; // 发生回溯后位数精度加1 calibration = 1; aa++; } data.e -= feedback(gradient); cout << "第"<< kk <<"精度校准,当前e值:" << data.e << endl; this_thread::sleep_for(chrono::milliseconds(300)); cout << setprecision(15) << "当前result_order的值:" << result_order << ",当前result_now的值:" << result_now << endl; result_order = result_now; // 赋给上一个基准点 cout << "当前result_order的值:" << result_order << endl; calibration = 0; kk++; } } int main() { vector<double> x = { 0.0238443, 0.0782814, 0.309722, 0.356379, 0.666561, 0.69018, 0.741007, 1.28952, 1.35156, 1.36749, 1.4039, 1.43163, 1.63053, 1.80123, 2.01043, 2.03856, 2.23708, 2.24379, 2.52274, 2.62696, 2.77726, 2.84281, 2.86952, 2.95086, 3.04979, 3.06172, 3.17244, 3.17664, 3.31864, 3.36358, 3.39419, 3.39652, 3.40442, 3.50581, 3.53935, 3.57435, 3.95945, 4.1482, 4.23497, 4.52956, 4.57167, 4.60112, 4.66563, 4.9201, 4.93874, 4.99603, 5.13926, 5.15358, 5.30797, 5.31045, 5.37438, 5.3793, 5.42455, 5.45504, 5.71962, 5.72462, 5.76174, 5.97731, 6.25061, 6.35035, 6.51578, 6.78054, 6.78656, 6.92455, 7.02385, 7.24099, 7.25993, 7.26249, 7.34808, 7.51587, 7.54976, 7.62619, 7.65725, 7.67294, 7.68242, 7.74679, 7.88493, 8.18776, 8.27934, 8.31393, 8.35422, 8.41552, 8.52648, 8.63434, 8.81657, 8.85457, 8.87774, 8.8978, 9.07313, 9.15527, 9.28593, 9.59358, 9.72242, 9.75149, 9.76268, 9.79195, 9.81805, 9.82021, 9.89382, 9.89629 }; vector<double> y = { 3.30207, 5.05404, 1.85261, 3.69977, 5.95768, 4.03767, 5.35161, 5.94737, 7.74113, 4.66731, 4.46411, 6.29095, 5.98303, 7.83717, 4.67074, 6.40559, 7.8632, 6.12997, 6.8309, 6.4112, 9.4472, 6.79975, 9.66573, 8.07317, 8.78041, 10.287, 9.53091, 8.78894, 8.56872, 10.7272, 9.16251, 9.2074, 10.5194, 11.8693, 9.43812, 9.88234, 10.1937, 10.0158, 9.53178, 13.1102, 12.2788, 11.7006, 13.4194, 12.3888, 13.1672, 14.0105, 12.5682, 14.0882, 14.2512, 14.3275, 14.7135, 14.1075, 13.2721, 13.4276, 14.2462, 13.0362, 15.8378, 14.8242, 14.8943, 15.1473, 16.0866, 17.7518, 15.9764, 15.5912, 16.9059, 18.3209, 17.9057, 17.7024, 18.8475, 17.8241, 19.6078, 19.5087, 16.9001, 18.634, 18.0884, 19.7081, 20.8486, 19.1761, 19.3259, 18.9728, 18.416, 19.1114, 21.3185, 19.993, 18.6365, 21.3748, 20.4762, 21.1566, 22.031, 20.0275, 20.3419, 19.9408, 23.093, 22.8716, 22.3229, 20.5651, 21.5855, 22.778, 22.5554, 23.0719 }; Coefficient data; data.a = 0.00000001; data.b = 0.00000001; data.c = 0.00000001; data.d = 0.00000001; data.e = 2.42014; data.f = 0.671964; Com_e(x, y, data); return 0; }