最优化算法-梯度下降
梯度下降算法,参考Edwin《最优化导论》8.2章节,算法采用go语言实现。
此处算法仍然存在疑惑,主要是获取梯度下降时如何确定步长,即使采用割线法获取最优步长,那么割线法的初始值又如何确定??
下面程序中虽然采用了牛顿法获取极值,但非常依赖初始取值范围!!
/***************************************** * FileName : grad.go * Author : fredric * Date : 2017.09.01 * Note : 梯度算法 * History : *****************************************/ package grad import( "fmt" "math" ) //无法采用牛顿方法求得极值,主要原因在于无法确定初始值,造成导数偏差很大 func _get_argmin_newton(x1, x2, x3, grad_x1, grad_x2, grad_x3 float64) float64 { fmt.Printf("_get_argmin input value %f,%f,%f,%f,%f,%f\n", x1, x2, x3, grad_x1, grad_x2, grad_x3) //f(x - a*delta) = (x1 - a * grad_x1 - 4)^4 + (x2 - a * grad_x2 - 3)^2 + 4 * (x3 - a*grad_x3 + 5)^4 //f'(x - a*delta) = 4 * grad_x1 * (x1 - a * grad_x1 - 4)^3 // + 2 * grad_x2 * (x2 - a * grad_x2 - 3) // + 16* grad_x3 * (x3 - a*grad_x3 + 5)^3 //f''(x - a*delta)= 12 * grad_x1^2 * (x1 - a * grad_x1 - 4)^2 // + 2 * grad_x2^2 * a // + 48 * grad_x3^2 * (x3 - a*grad_x3 + 5)^2 //采用牛顿法求取f(a)的最小值 //此处的初始值还是比较疑惑,因为初始值取不对,结果差太远 var a0 float64 = 0.0002 var a1 float64 = 0.0005 delta := 0.0005 for math.Abs(a1 - a0) > delta { a0 = a1 //fmt.Printf("a0: %f\n" , a0) //fmt.Printf("grad_x2: %f\n" , grad_x2) //fmt.Printf("grad_x2 * a0: %f\n" , grad_x2 * a0) //fmt.Printf("grad_x2 * 0.2: %f\n" , grad_x2 * 0.2) f_1_v := 4 * grad_x1 * (x1 - a0 * grad_x1 - 4)* (x1 - a0 * grad_x1 - 4)* (x1 - a0 * grad_x1 - 4) + 2 * grad_x2 * (x2 - a0 * grad_x2 - 3) + 16* grad_x3 * (x3 - a0 * grad_x3 + 5)* (x3 - a0 * grad_x3 + 5) * (x3 - a0 * grad_x3 + 5) f_2_v := 12 * grad_x1 * grad_x1 * (x1 - a1 * grad_x1 - 4)* (x1 - a1 * grad_x1 - 4) + 2 * grad_x2* grad_x2 * a1 + 48 * grad_x3* grad_x3 * (x3 - a1 * grad_x3 + 5)* (x3 - a1 * grad_x3 + 5) a1 = a0 - f_1_v / f_2_v //fmt.Printf("----------abs = %f\n", math.Abs(a1 - a0)) fmt.Printf("step value = %f f_1_v = %f, f_2_v = %f\n", (a0 + a1)/2, f_1_v, f_2_v) } return (a0 + a1)/2 } //采用常量方式求极值 func _get_argmin_const(x1, x2, x3, grad_x1, grad_x2, grad_x3 float64) float64{ /* * 不是很搞的清楚,当采用快速下降算法时如何确定固定步长,网上有一个说法实践是正确的 * 即满足李普希兹条件存在L>0使得|f(x1)-f(x2)|<=L|x1-x2|,步长取1/L * 下面这个例子由于存在x3这个高阶,所以如果步长取大的话,完全没有办法计算 */ return 0.0004 } func DoGradAlgorithm(){ //计算f(x1,x2,x3) = (x1 - 4)^4 + (x2 - 3)^2 + 4*(x3 + 5)^4 //所谓梯度本质上也是导数,只是针对多维度上,取了各个维度偏导数,组成向量; //最速下降法就是在每次迭代时取当前负梯度方向的能获取的函数数最小值 //初始值x0 = [4, 2, -1] x1 := 4.0 x2 := 2.0 x3 := -1.0 //取三次迭代 for i := 0; i < 4; i++ { grad_x1 := 4 * (x1 - 4)*(x1 - 4)*(x1 - 4) grad_x2 := 2 * (x2 - 3) grad_x3 := 16 * (x3 + 5)* (x3 + 5)* (x3 + 5) a := _get_argmin_newton(x1,x2,x3, grad_x1, grad_x2, grad_x3) fmt.Printf("grad_x1 = %f, grad_x2 = %f, grad_x3 = %f\n", grad_x1, grad_x2, grad_x3) x1 = x1 - a * grad_x1 x2 = x2 - a * grad_x2 x3 = x3 - a * grad_x3 fmt.Printf("x1 = %f, x2 = %f, x3 = %f\n", x1, x2, x3) } }