共轭梯度法示例代码

(未经允许,不得转载)

共轭梯度法真是个漂亮的算法。最近写了个示例代码,录在这里吧。

参考文献:
[1] 马红儒,“计算物理讲义”

1. 算法

Ref. [1],以及很多教材中有很多介绍,我自己写的讲义里也有,所以懒得在这里做叙述了,好像有本 painless conjugate gradient 口碑挺好。
我就简单把做法在这里说一下吧。

  • 次取 x0, 沿 p0 = - g0 (g0 为 x0 点梯度),进行一维优化。
  • 对于每次循环,即从 xi 出发,沿 pi 方向进行一维优化,找到 pi 方向的最低点 x_
  • 计算 x_{i+1} 点的梯度 g_{i+1},计算 alpha = N[ g_{i+1} ] / N[ g_i ],据此计算 p_{i+1} = alpha * p_i - g_i
  • 某次一维最优化以后,梯度 norm 小于某个值,结束迭代。

2. 示例代码


#include<iostream>
using namespace std;

#include<cmath>
#define max_iteration 100

double f( int n, double * x ){

        //return x[0] * x[0] + x[1] * x[1];
        return 10.0*(x[0]-1)*(x[0]-1) + 20.0 * (x[1]-2) * (x[1]-2) + 30;
}

void df( int n, double * x, double * df ){
        //df[0] = 2 * x[0]; df[1] = 2 * x[1];
        df[0] = 20.0 * (x[0]-1); df[1] = 40.0 * (x[1]-2);
}

void PrintVec( int n, double * x ){
        cout<<"[ ";
        for(int i=0;i<n;i++)cout<<x[i]<<"  "; cout<<"]\n";
}

double VecProd( int n, double * a, double * b)
{
        double y = 0; for(int i=0;i<n;i++) y += a[i] * b[i];
        return y;
}

bool VecBetween( int n, double * a, double * b, double * d ){
        double y=0;
        for(int i=0;i<n;i++){
                y += (d[i]-a[i]) * (d[i]-b[i]);
        }
        return y < 0 ? true : false;
}

double VecDistance( int n, double * a, double * b )
{
        double y = 0; for(int i=0;i<n;i++) y += (a[i] - b[i]) * (a[i] - b[i]);
        return sqrt(y);
}

double VecNorm( int n, double * a )
{
        double y = 0; for(int i=0;i<n;i++) y += a[i] * a[i];
        return y;
}

double op1d( double (*f)(int, double *), int n, double * x0, double * p, double step, double precision )
{
        double * a = new double [n]; for(int i=0;i<n;i++) a[i] = x0[i];
        double fa = f(n, x0);
        double * b = new double [n]; for(int i=0;i<n;i++) b[i] = a[i] + step * p[i] / sqrt( VecNorm(n, p) );
        double fb = f(n, b);
        for(int i=0;i<10;i++)
        {
                if( fb > fa )
                {
                        step /= 10;
                        for(int i=0;i<n;i++) b[i] = a[i] + step * p[i] / sqrt( VecNorm(n, p) );
                        fb = f(n, b);
                }
                else break;
        }
        if( fb > fa ){
                cout<<" op1d: f( x0 + step/10^10 * p/|p| ) > f( x0 ), that means x0 is already close to a local minimum. "<<endl;
                exit(1);
        }

        // find a suspicious zone
        double * c = new double [n]; double fc;
        int i;
        for(i=0;i<max_iteration;i++)
        {
                for(int j=0;j<n;j++) c[j] = b[j] + 1.618 * (b[j] - a[j]);
                fc = f( n, c ); if( fc > fb ) break; // suspicious zone found: [a,c]
                for(int j=0;j<n;j++)
                {
                        a[j] = b[j]; b[j] = c[j];
                }
                fa = fb; fb = fc;
        }
        if( i == max_iteration ){
                cout<<" op1d: failed to find a suspicious zone. \n"; exit(1);
        }
        cout<<" After "<<i+1<<" iterations, found a suspicious zone : \n";
        cout<<" fa = "<<fa<<"  a = "; PrintVec(n, a);
        cout<<" fb = "<<fb<<"  b = "; PrintVec(n, b);
        cout<<" fc = "<<fc<<"  c = "; PrintVec(n, c);

        // narrow the suspicious zone [a, c], to find the 1d minimum
        double * d = new double [n]; double fd;
        for(i=0;i<max_iteration;i++)
        {
                for(int j=0;j<n;j++) d[j] = a[j] + 0.618 * (c[j] - a[j] );
                fd = f( n, d );
                if( VecBetween( n, a, b, d ) )
                {
                        if( fd < fb ) // [a, b, c] -> [a, d, b]
                        {
                                for(int j=0;j<n;j++)
                                {
                                        c[j] = b[j]; b[j] = d[j];
                                }
                                fc = fb; fb = fd;
                        }
                        else // [a, b, c] -> [d, b, c]
                        {
                                for(int j=0;j<n;j++) a[j] = d[j];
                                fa = fd;
                        }
                }
                else
                {
                        if( fd < fb ) // [a, b, c] -> [b, d, c]
                        {
                                for(int j=0;j<n;j++)
                                {
                                        a[j] = b[j]; b[j] = d[j];
                                }
                                fa = fb; fb = fd;
                        }
                        else // [a, b, c] -> [a, b, d]
                        {
                                for(int j=0;j<n;j++)
                                {
                                        c[j] = d[j];
                                }
                                fc = fd;
                        }
                }
                if( VecDistance( n, a, c ) < precision )break;
        }
        for(i=0;i<n;i++) x0[i] = (a[i] + c[i])/2;
        cout<<" After "<<i+1<<" iterations, a 1d minimum is found.\n";
        delete [] a; delete [] b; delete [] c; delete [] d;
        return f(n, x0);
}


void cgm( double (*f)(int, double *), void (*df)(int, double *, double * ), int n, double * x0, double step, double precision, double epsabs ){

        double * g = new double [n]; df( n, x0, g );
        double * p = new double [n]; for(int i=0;i<n;i++) p[i] = -g[i];

        int k;
        for( k=0;k<max_iteration;k++)
        {
                double alpha = 1 / VecNorm(n, g);
                cout<<"-------------------------------------------------\n";
                cout<<" descend direction p = "; PrintVec(n, p);
                double f1dm = op1d( f, n, x0, p, step, precision );
                cout<<" op1d minimum: f = "<< f1dm <<", x = "; PrintVec(n, x0);
                cout<<"-------------------------------------------------\n";
                df( n, x0, g );
                if( sqrt(VecNorm(n, g)) < epsabs ) break;
                alpha *= VecNorm(n, g);
                for(int i=0;i<n;i++) p[i] = alpha * p[i] - g[i];
        }
        if( k == max_iteration )
        {
                cout<<" cgm: failed to find a minimum after "<<max_iteration<<" iterations.\n";
        }
        else
        {
                cout<<" cgm : found a minimum after "<< k+1 << " 1d optimizations\n";
                cout<<" \t f = "<<f(n, x0)<<", x = "; PrintVec( n, x0 );
        }
        delete [] g; delete [] p;
}

int main(){
        double x0[] = {4,0}, p[] = {-4, 3};
        //op1d( f, 2, x0, p, 0.1, 1E-5 );
        cgm( f, df, 2, x0, 100, 1E-8, 1E-5 );
        return 0;
}

运行结果:

-------------------------------------------------
 descend direction p = [ -60  80  ]
 After 2 iterations, found a suspicious zone : 
 fa = 116.4  a = [ 3.4  0.8  ]
 fb = 50.6044  b = [ 2.4292  2.0944  ]
 fc = 126.012  c = [ 0.858446  4.18874  ]
 After 3 iterations, a 1d minimum is found.
 op1d minimum: f = 47.561, x = [ 2.17073  2.43902  ]
-------------------------------------------------
-------------------------------------------------
 descend direction p = [ -28.5544  -10.7079  ]
 After 1 iterations, found a suspicious zone : 
 fa = 47.561  a = [ 2.17073  2.43902  ]
 fb = 30.704  b = [ 1.2344  2.0879  ]
 fc = 51.011  c = [ -0.280578  1.51978  ]
 After 3 iterations, a 1d minimum is found.
 op1d minimum: f = 30, x = [ 1  2  ]
-------------------------------------------------
 cgm : found a minimum after 2 1d optimizations
 	 f = 30, x = [ 1  2  ]

可以看到,一共只需要两次一维优化,这符合理论预期——对于 n 元二次型函数只需要 n 次一维优化。

posted on 2022-10-04 22:01  luyi07  阅读(268)  评论(0编辑  收藏  举报

导航