人工神经网络框架AForge学习(三):后向传播学习算法

// AForge Neural Net Library
//
// Copyright ?Andrew Kirillov, 2005-2006
// andrew.kirillov@gmail.com
//

namespace AForge.Neuro.Learning
{
    using System;

    /// <summary>
    /// Back propagation learning algorithm
    /// 后向传播学习算法
    /// </summary>
    ///
    /// <remarks>The class implements back propagation learning algorithm,
    /// which is widely used for training multi-layer neural networks with
    /// continuous activation functions.
    /// 这个类实现后向传播学习算法，被广泛应用于训练使用连续激活函数的多层神经网络。
    /// continuous activation functions,连续激活函数，阀值激活函数不能使用该算法，因为它是一个阶跃函数。这里的意思我认为应该是像SigmoidFunction这样的激活函数，它是非线性的、可微的类型的激活函数，    /// </remarks>
    ///
    public class BackPropagationLearning : ISupervisedLearning
    {
        // network to teach
        // 要训练的网络
        private ActivationNetwork network;
        // learning rate
        // 学习率
        private double learningRate = 0.1;
        // momentum
        //
        private double momentum = 0.0;

        //一维下标应该是层，二维下标是神经元.其中权重数组的三维下标是神经元上的每个输入
        // neuron's errors
        // 神经元的误差
        private double[][]        neuronErrors = null;
        // weight's updates
        // 权重的更新
        private double[][][]    weightsUpdates = null;
        // threshold's updates
        // 阀值的更新(threshold,阀值、修正值、偏置)
        private double[][]        thresholdsUpdates = null;

        /// <summary>
        /// Learning rate
        /// 学习率
        /// </summary>
        /// <remarks>The value determines speed of learning. Default value equals to 0.1.
        /// 这个值决定学习速度，值越小，收敛越慢，准确率高；如果值过大，权重可能在不适当的解之间摆动。通常取[0,1]区间上
        /// 的double,默认0.1
        /// </remarks>
        public double LearningRate
        {
            get { return learningRate; }
            set
            {
                learningRate = Math.Max( 0.0, Math.Min( 1.0, value ) );
            }
        }

        /// <summary>
        /// Momentum
        /// </summary>
        ///
        /// <remarks>The value determines the portion of previous weight's update
        /// to use on current iteration. Weight's update values are calculated on
        /// each iteration depending on neuron's error. The momentum specifies the amount
        /// of update to use from previous iteration and the amount of update
        /// to use from current iteration. If the value is equal to 0.1, for example,
        /// then 0.1 portion of previous update and 0.9 portion of current update are used
        /// to update weight's value.<br /><br />
        ///    Default value equals to 0.0.</remarks>
        ///
        public double Momentum
        {
            get { return momentum; }
            set
            {
                momentum = Math.Max( 0.0, Math.Min( 1.0, value ) );
            }
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="BackPropagationLearning"/> class
        /// 构造函数
        /// </summary>
        ///
        /// <param name="network">Network to teach 要训练的网络</param>
        ///
        public BackPropagationLearning( ActivationNetwork network )
        {
            this.network = network;

            // create error and deltas arrays
            // 创建误差和deltas数组
            neuronErrors = new double[network.LayersCount][];
            weightsUpdates = new double[network.LayersCount][][];
            thresholdsUpdates = new double[network.LayersCount][];

            // initialize errors and deltas arrays for each layer
            // 为每个层初始化误差和deltas数组,每个数组值对应一个相应的神经元
            for ( int i = 0, n = network.LayersCount; i < n; i++ )
            {
                Layer layer = network[i];

                neuronErrors[i] = new double[layer.NeuronsCount];
                weightsUpdates[i] = new double[layer.NeuronsCount][];
                thresholdsUpdates[i] = new double[layer.NeuronsCount];

                // for each neuron
                // 每个神经元上的输入权重初始化
                for ( int j = 0; j < layer.NeuronsCount; j++ )
                {
                    weightsUpdates[i][j] = new double[layer.InputsCount];
                }
            }
        }

        /// <summary>
        /// Runs learning iteration
        /// 学习迭代，学习一个训练样本
        /// </summary>
        ///
        /// <param name="input">input vector 输入数组</param>
        /// <param name="output">desired output vector 期望输出数组</param>
        ///
        /// <returns>Returns squared error of the last layer divided by 2 返回最后一层的误差平方差除以2</returns>
        ///
        /// <remarks>Runs one learning iteration and updates neuron's
        /// weights.
        /// 运行一次学习迭代并更新神经元的权重、阀值.
        /// 学习完一个样本，就对权重、阀值进行更新，这叫实例更新。
        /// 理论上，后向传播的数学推导使用周期更新，实践中使用实例更新
        /// </remarks>
        ///
        public double Run( double[] input, double[] output )
        {
            // compute the network's output
            // 计算网络输出
            network.Compute( input );

            // calculate network error
            // 计算网络误差
            double error = CalculateError( output );

            // calculate weights updates
            // 计算权重更新
            CalculateUpdates( input );

            // update the network
            // 更新网络权重、阀值
            UpdateNetwork( );

            return error;
        }

        /// <summary>
        /// Runs learning epoch
        /// 运行学习迭代一个周期
        /// </summary>
        ///
        /// <param name="input">array of input vectors 输入数组</param>
        /// <param name="output">array of output vectors 输出数组</param>
        ///
        /// <returns>Returns sum of squared errors of the last layer divided by 2 返回最后一层的误差平方差除以2</returns>
        ///
        /// <remarks>Runs series of learning iterations - one iteration
        /// for each input sample. Updates neuron's weights after each sample
        /// presented.
        /// 运行一个周期(所有训练样本)的学习迭代-一次迭代一个训练样本.在每一次迭代(每一个训练样本)后更新神经元权重
        /// </remarks>
        ///
        public double RunEpoch( double[][] input, double[][] output )
        {
            double error = 0.0;

            // run learning procedure for all samples
            //
            for ( int i = 0, n = input.Length; i < n; i++ )
            {
                error += Run( input[i], output[i] );
            }

            // return summary error
            return error;
        }

        /// <summary>
        /// Calculates error values for all neurons of the network
        /// 计算网络中所有神经元的误差值
        /// </summary>
        ///
        /// <param name="desiredOutput">Desired output vector 期望输出数组</param>
        ///
        /// <returns>Returns summary squared error of the last layer divided by 2</returns>
        ///
        private double CalculateError( double[] desiredOutput )
        {
            // current and the next layers
            // 当前和下一个层
            ActivationLayer    layer, layerNext;
            // current and the next errors arrays
            // 当前层和下一层的误差数组
            double[] errors, errorsNext;
            // error values
            // 误差值
            double error = 0, e, sum;
            // neuron's output value
            // 神经元的输出值
            double output;
            // layers count
            // 神经元层的数目
            int layersCount = network.LayersCount;

            // assume, that all neurons of the network have the same activation function
            // 假设，网络的所有神经元有相同的激活函数
            IActivationFunction    function = network[0][0].ActivationFunction;

            // calculate error values for the last layer first
            // 首先计算最后一层（输出层）的误差值
            layer    = network[layersCount - 1];
            errors    = neuronErrors[layersCount - 1];

            for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
            {
                output = layer[i].Output;
                // error of the neuron
                // 神经元的误差
                e = desiredOutput[i] - output;
                // error multiplied with activation function's derivative
                // 第i个神经元的误差*导函数
                errors[i] = e * function.Derivative2( output );
                // squre the error and sum it
                // 误差平方累加和
                error += ( e * e );
            }

            // calculate error values for other layers
            // 计算其他层（除输出层以外）的误差
            for ( int j = layersCount - 2; j >= 0; j-- )
            {
                layer        = network[j];
                layerNext    = network[j + 1];
                errors        = neuronErrors[j];
                errorsNext    = neuronErrors[j + 1];

                // for all neurons of the layer
                for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
                {
                    sum = 0.0;
                    // for all neurons of the next layer
                    for ( int k = 0, m = layerNext.NeuronsCount; k < m; k++ )
                    {
                        sum += errorsNext[k] * layerNext[k][i];
                    }
                    errors[i] = sum * function.Derivative2( layer[i].Output );
                }
            }

            // return squared error of the last layer divided by 2
            return error / 2.0;
        }

        /// <summary>
        /// Calculate weights updates
        /// 权重更新
        /// </summary>
        ///
        /// <param name="input">Network's input vector</param>
        ///
        private void CalculateUpdates( double[] input )
        {
            // current neuron
            ActivationNeuron    neuron;
            // current and previous layers
            ActivationLayer        layer, layerPrev;
            // layer's weights updates
            double[][]    layerWeightsUpdates;
            // layer's thresholds updates
            double[]    layerThresholdUpdates;
            // layer's error
            double[]    errors;
            // neuron's weights updates
            double[]    neuronWeightUpdates;
            // error value
            double        error;

            // 1 - calculate updates for the last layer fisrt
            layer = network[0];
            errors = neuronErrors[0];
            layerWeightsUpdates = weightsUpdates[0];
            layerThresholdUpdates = thresholdsUpdates[0];

            // for each neuron of the layer
            for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
            {
                neuron    = layer[i];
                error    = errors[i];
                neuronWeightUpdates    = layerWeightsUpdates[i];

                // for each weight of the neuron
                for ( int j = 0, m = neuron.InputsCount; j < m; j++ )
                {
                    // calculate weight update
                    neuronWeightUpdates[j] = learningRate * (
                        momentum * neuronWeightUpdates[j] +
                        ( 1.0 - momentum ) * error * input[j]
                        );
                }

                // calculate treshold update
                layerThresholdUpdates[i] = learningRate * (
                    momentum * layerThresholdUpdates[i] +
                    ( 1.0 - momentum ) * error
                    );
            }

            // 2 - for all other layers
            for ( int k = 1, l = network.LayersCount; k < l; k++ )
            {
                layerPrev            = network[k - 1];
                layer                = network[k];
                errors                = neuronErrors[k];
                layerWeightsUpdates    = weightsUpdates[k];
                layerThresholdUpdates = thresholdsUpdates[k];

                // for each neuron of the layer
                for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
                {
                    neuron    = layer[i];
                    error    = errors[i];
                    neuronWeightUpdates    = layerWeightsUpdates[i];

                    // for each synapse of the neuron
                    for ( int j = 0, m = neuron.InputsCount; j < m; j++ )
                    {
                        // calculate weight update
                        neuronWeightUpdates[j] = learningRate * (
                            momentum * neuronWeightUpdates[j] +
                            ( 1.0 - momentum ) * error * layerPrev[j].Output
                            );
                    }

                    // calculate treshold update
                    layerThresholdUpdates[i] = learningRate * (
                        momentum * layerThresholdUpdates[i] +
                        ( 1.0 - momentum ) * error
                        );
                }
            }
        }

        /// <summary>
        /// Update network'sweights
        /// </summary>
        ///
        private void UpdateNetwork( )
        {
            // current neuron
            ActivationNeuron    neuron;
            // current layer
            ActivationLayer        layer;
            // layer's weights updates
            double[][]    layerWeightsUpdates;
            // layer's thresholds updates
            double[]    layerThresholdUpdates;
            // neuron's weights updates
            double[]    neuronWeightUpdates;

            // for each layer of the network
            for ( int i = 0, n = network.LayersCount; i < n; i++ )
            {
                layer = network[i];
                layerWeightsUpdates = weightsUpdates[i];
                layerThresholdUpdates = thresholdsUpdates[i];

                // for each neuron of the layer
                for ( int j = 0, m = layer.NeuronsCount; j < m; j++ )
                {
                    neuron = layer[j];
                    neuronWeightUpdates = layerWeightsUpdates[j];

                    // for each weight of the neuron
                    for ( int k = 0, s = neuron.InputsCount; k < s; k++ )
                    {
                        // update weight
                        neuron[k] += neuronWeightUpdates[k];
                    }
                    // update treshold
                    neuron.Threshold += layerThresholdUpdates[j];
                }
            }
        }
    }
}

posted @ 2009-03-06 17:30 waemz 阅读(1157) 评论(0) 编辑收藏举报

刷新页面返回顶部

人工神经网络框架AForge学习(三):后向传播学习算法

公告