人工神经网络框架AForge学习(三):后向传播学习算法
// AForge Neural Net Library
//
// Copyright ?Andrew Kirillov, 2005-2006
// andrew.kirillov@gmail.com
//
namespace AForge.Neuro.Learning
{
using System;
/// <summary>
/// Back propagation learning algorithm
/// 后向传播学习算法
/// </summary>
///
/// <remarks>The class implements back propagation learning algorithm,
/// which is widely used for training multi-layer neural networks with
/// continuous activation functions.
/// 这个类实现后向传播学习算法,被广泛应用于训练使用连续激活函数的多层神经网络。
/// continuous activation functions,连续激活函数,阀值激活函数不能使用该算法,因为它是一个阶跃函数。这里的意思我认为应该是像SigmoidFunction这样的激活函数,它是非线性的、可微的类型的激活函数, /// </remarks>
///
public class BackPropagationLearning : ISupervisedLearning
{
// network to teach
// 要训练的网络
private ActivationNetwork network;
// learning rate
// 学习率
private double learningRate = 0.1;
// momentum
//
private double momentum = 0.0;
//一维下标应该是层,二维下标是神经元.其中权重数组的三维下标是神经元上的每个输入
// neuron's errors
// 神经元的误差
private double[][] neuronErrors = null;
// weight's updates
// 权重的更新
private double[][][] weightsUpdates = null;
// threshold's updates
// 阀值的更新(threshold,阀值、修正值、偏置)
private double[][] thresholdsUpdates = null;
/// <summary>
/// Learning rate
/// 学习率
/// </summary>
/// <remarks>The value determines speed of learning. Default value equals to 0.1.
/// 这个值决定学习速度,值越小,收敛越慢,准确率高;如果值过大,权重可能在不适当的解之间摆动。通常取[0,1]区间上
/// 的double,默认0.1
/// </remarks>
public double LearningRate
{
get { return learningRate; }
set
{
learningRate = Math.Max( 0.0, Math.Min( 1.0, value ) );
}
}
/// <summary>
/// Momentum
/// </summary>
///
/// <remarks>The value determines the portion of previous weight's update
/// to use on current iteration. Weight's update values are calculated on
/// each iteration depending on neuron's error. The momentum specifies the amount
/// of update to use from previous iteration and the amount of update
/// to use from current iteration. If the value is equal to 0.1, for example,
/// then 0.1 portion of previous update and 0.9 portion of current update are used
/// to update weight's value.<br /><br />
/// Default value equals to 0.0.</remarks>
///
public double Momentum
{
get { return momentum; }
set
{
momentum = Math.Max( 0.0, Math.Min( 1.0, value ) );
}
}
/// <summary>
/// Initializes a new instance of the <see cref="BackPropagationLearning"/> class
/// 构造函数
/// </summary>
///
/// <param name="network">Network to teach 要训练的网络</param>
///
public BackPropagationLearning( ActivationNetwork network )
{
this.network = network;
// create error and deltas arrays
// 创建误差和deltas数组
neuronErrors = new double[network.LayersCount][];
weightsUpdates = new double[network.LayersCount][][];
thresholdsUpdates = new double[network.LayersCount][];
// initialize errors and deltas arrays for each layer
// 为每个层初始化误差和deltas数组,每个数组值对应一个相应的神经元
for ( int i = 0, n = network.LayersCount; i < n; i++ )
{
Layer layer = network[i];
neuronErrors[i] = new double[layer.NeuronsCount];
weightsUpdates[i] = new double[layer.NeuronsCount][];
thresholdsUpdates[i] = new double[layer.NeuronsCount];
// for each neuron
// 每个神经元上的输入权重初始化
for ( int j = 0; j < layer.NeuronsCount; j++ )
{
weightsUpdates[i][j] = new double[layer.InputsCount];
}
}
}
/// <summary>
/// Runs learning iteration
/// 学习迭代,学习一个训练样本
/// </summary>
///
/// <param name="input">input vector 输入数组</param>
/// <param name="output">desired output vector 期望输出数组</param>
///
/// <returns>Returns squared error of the last layer divided by 2 返回最后一层的误差平方差除以2</returns>
///
/// <remarks>Runs one learning iteration and updates neuron's
/// weights.
/// 运行一次学习迭代并更新神经元的权重、阀值.
/// 学习完一个样本,就对权重、阀值进行更新,这叫 实例更新。
/// 理论上,后向传播的数学推导使用周期更新,实践中使用 实例更新
/// </remarks>
///
public double Run( double[] input, double[] output )
{
// compute the network's output
// 计算网络输出
network.Compute( input );
// calculate network error
// 计算网络误差
double error = CalculateError( output );
// calculate weights updates
// 计算权重更新
CalculateUpdates( input );
// update the network
// 更新网络权重、阀值
UpdateNetwork( );
return error;
}
/// <summary>
/// Runs learning epoch
/// 运行学习迭代一个周期
/// </summary>
///
/// <param name="input">array of input vectors 输入数组</param>
/// <param name="output">array of output vectors 输出数组</param>
///
/// <returns>Returns sum of squared errors of the last layer divided by 2 返回最后一层的误差平方差除以2</returns>
///
/// <remarks>Runs series of learning iterations - one iteration
/// for each input sample. Updates neuron's weights after each sample
/// presented.
/// 运行一个周期(所有训练样本)的学习迭代-一次迭代一个训练样本.在每一次迭代(每一个训练样本)后更新神经元权重
/// </remarks>
///
public double RunEpoch( double[][] input, double[][] output )
{
double error = 0.0;
// run learning procedure for all samples
//
for ( int i = 0, n = input.Length; i < n; i++ )
{
error += Run( input[i], output[i] );
}
// return summary error
return error;
}
/// <summary>
/// Calculates error values for all neurons of the network
/// 计算网络中所有神经元的误差值
/// </summary>
///
/// <param name="desiredOutput">Desired output vector 期望输出数组</param>
///
/// <returns>Returns summary squared error of the last layer divided by 2</returns>
///
private double CalculateError( double[] desiredOutput )
{
// current and the next layers
// 当前和下一个层
ActivationLayer layer, layerNext;
// current and the next errors arrays
// 当前层和下一层的误差数组
double[] errors, errorsNext;
// error values
// 误差值
double error = 0, e, sum;
// neuron's output value
// 神经元的输出值
double output;
// layers count
// 神经元层的数目
int layersCount = network.LayersCount;
// assume, that all neurons of the network have the same activation function
// 假设,网络的所有神经元有相同的激活函数
IActivationFunction function = network[0][0].ActivationFunction;
// calculate error values for the last layer first
// 首先计算最后一层(输出层)的误差值
layer = network[layersCount - 1];
errors = neuronErrors[layersCount - 1];
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
output = layer[i].Output;
// error of the neuron
// 神经元的误差
e = desiredOutput[i] - output;
// error multiplied with activation function's derivative
// 第i个神经元的误差*导函数
errors[i] = e * function.Derivative2( output );
// squre the error and sum it
// 误差平方累加和
error += ( e * e );
}
// calculate error values for other layers
// 计算其他层(除输出层以外)的误差
for ( int j = layersCount - 2; j >= 0; j-- )
{
layer = network[j];
layerNext = network[j + 1];
errors = neuronErrors[j];
errorsNext = neuronErrors[j + 1];
// for all neurons of the layer
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
sum = 0.0;
// for all neurons of the next layer
for ( int k = 0, m = layerNext.NeuronsCount; k < m; k++ )
{
sum += errorsNext[k] * layerNext[k][i];
}
errors[i] = sum * function.Derivative2( layer[i].Output );
}
}
// return squared error of the last layer divided by 2
return error / 2.0;
}
/// <summary>
/// Calculate weights updates
/// 权重更新
/// </summary>
///
/// <param name="input">Network's input vector</param>
///
private void CalculateUpdates( double[] input )
{
// current neuron
ActivationNeuron neuron;
// current and previous layers
ActivationLayer layer, layerPrev;
// layer's weights updates
double[][] layerWeightsUpdates;
// layer's thresholds updates
double[] layerThresholdUpdates;
// layer's error
double[] errors;
// neuron's weights updates
double[] neuronWeightUpdates;
// error value
double error;
// 1 - calculate updates for the last layer fisrt
layer = network[0];
errors = neuronErrors[0];
layerWeightsUpdates = weightsUpdates[0];
layerThresholdUpdates = thresholdsUpdates[0];
// for each neuron of the layer
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
neuron = layer[i];
error = errors[i];
neuronWeightUpdates = layerWeightsUpdates[i];
// for each weight of the neuron
for ( int j = 0, m = neuron.InputsCount; j < m; j++ )
{
// calculate weight update
neuronWeightUpdates[j] = learningRate * (
momentum * neuronWeightUpdates[j] +
( 1.0 - momentum ) * error * input[j]
);
}
// calculate treshold update
layerThresholdUpdates[i] = learningRate * (
momentum * layerThresholdUpdates[i] +
( 1.0 - momentum ) * error
);
}
// 2 - for all other layers
for ( int k = 1, l = network.LayersCount; k < l; k++ )
{
layerPrev = network[k - 1];
layer = network[k];
errors = neuronErrors[k];
layerWeightsUpdates = weightsUpdates[k];
layerThresholdUpdates = thresholdsUpdates[k];
// for each neuron of the layer
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
neuron = layer[i];
error = errors[i];
neuronWeightUpdates = layerWeightsUpdates[i];
// for each synapse of the neuron
for ( int j = 0, m = neuron.InputsCount; j < m; j++ )
{
// calculate weight update
neuronWeightUpdates[j] = learningRate * (
momentum * neuronWeightUpdates[j] +
( 1.0 - momentum ) * error * layerPrev[j].Output
);
}
// calculate treshold update
layerThresholdUpdates[i] = learningRate * (
momentum * layerThresholdUpdates[i] +
( 1.0 - momentum ) * error
);
}
}
}
/// <summary>
/// Update network'sweights
/// </summary>
///
private void UpdateNetwork( )
{
// current neuron
ActivationNeuron neuron;
// current layer
ActivationLayer layer;
// layer's weights updates
double[][] layerWeightsUpdates;
// layer's thresholds updates
double[] layerThresholdUpdates;
// neuron's weights updates
double[] neuronWeightUpdates;
// for each layer of the network
for ( int i = 0, n = network.LayersCount; i < n; i++ )
{
layer = network[i];
layerWeightsUpdates = weightsUpdates[i];
layerThresholdUpdates = thresholdsUpdates[i];
// for each neuron of the layer
for ( int j = 0, m = layer.NeuronsCount; j < m; j++ )
{
neuron = layer[j];
neuronWeightUpdates = layerWeightsUpdates[j];
// for each weight of the neuron
for ( int k = 0, s = neuron.InputsCount; k < s; k++ )
{
// update weight
neuron[k] += neuronWeightUpdates[k];
}
// update treshold
neuron.Threshold += layerThresholdUpdates[j];
}
}
}
}
}
//
// Copyright ?Andrew Kirillov, 2005-2006
// andrew.kirillov@gmail.com
//
namespace AForge.Neuro.Learning
{
using System;
/// <summary>
/// Back propagation learning algorithm
/// 后向传播学习算法
/// </summary>
///
/// <remarks>The class implements back propagation learning algorithm,
/// which is widely used for training multi-layer neural networks with
/// continuous activation functions.
/// 这个类实现后向传播学习算法,被广泛应用于训练使用连续激活函数的多层神经网络。
/// continuous activation functions,连续激活函数,阀值激活函数不能使用该算法,因为它是一个阶跃函数。这里的意思我认为应该是像SigmoidFunction这样的激活函数,它是非线性的、可微的类型的激活函数, /// </remarks>
///
public class BackPropagationLearning : ISupervisedLearning
{
// network to teach
// 要训练的网络
private ActivationNetwork network;
// learning rate
// 学习率
private double learningRate = 0.1;
// momentum
//
private double momentum = 0.0;
//一维下标应该是层,二维下标是神经元.其中权重数组的三维下标是神经元上的每个输入
// neuron's errors
// 神经元的误差
private double[][] neuronErrors = null;
// weight's updates
// 权重的更新
private double[][][] weightsUpdates = null;
// threshold's updates
// 阀值的更新(threshold,阀值、修正值、偏置)
private double[][] thresholdsUpdates = null;
/// <summary>
/// Learning rate
/// 学习率
/// </summary>
/// <remarks>The value determines speed of learning. Default value equals to 0.1.
/// 这个值决定学习速度,值越小,收敛越慢,准确率高;如果值过大,权重可能在不适当的解之间摆动。通常取[0,1]区间上
/// 的double,默认0.1
/// </remarks>
public double LearningRate
{
get { return learningRate; }
set
{
learningRate = Math.Max( 0.0, Math.Min( 1.0, value ) );
}
}
/// <summary>
/// Momentum
/// </summary>
///
/// <remarks>The value determines the portion of previous weight's update
/// to use on current iteration. Weight's update values are calculated on
/// each iteration depending on neuron's error. The momentum specifies the amount
/// of update to use from previous iteration and the amount of update
/// to use from current iteration. If the value is equal to 0.1, for example,
/// then 0.1 portion of previous update and 0.9 portion of current update are used
/// to update weight's value.<br /><br />
/// Default value equals to 0.0.</remarks>
///
public double Momentum
{
get { return momentum; }
set
{
momentum = Math.Max( 0.0, Math.Min( 1.0, value ) );
}
}
/// <summary>
/// Initializes a new instance of the <see cref="BackPropagationLearning"/> class
/// 构造函数
/// </summary>
///
/// <param name="network">Network to teach 要训练的网络</param>
///
public BackPropagationLearning( ActivationNetwork network )
{
this.network = network;
// create error and deltas arrays
// 创建误差和deltas数组
neuronErrors = new double[network.LayersCount][];
weightsUpdates = new double[network.LayersCount][][];
thresholdsUpdates = new double[network.LayersCount][];
// initialize errors and deltas arrays for each layer
// 为每个层初始化误差和deltas数组,每个数组值对应一个相应的神经元
for ( int i = 0, n = network.LayersCount; i < n; i++ )
{
Layer layer = network[i];
neuronErrors[i] = new double[layer.NeuronsCount];
weightsUpdates[i] = new double[layer.NeuronsCount][];
thresholdsUpdates[i] = new double[layer.NeuronsCount];
// for each neuron
// 每个神经元上的输入权重初始化
for ( int j = 0; j < layer.NeuronsCount; j++ )
{
weightsUpdates[i][j] = new double[layer.InputsCount];
}
}
}
/// <summary>
/// Runs learning iteration
/// 学习迭代,学习一个训练样本
/// </summary>
///
/// <param name="input">input vector 输入数组</param>
/// <param name="output">desired output vector 期望输出数组</param>
///
/// <returns>Returns squared error of the last layer divided by 2 返回最后一层的误差平方差除以2</returns>
///
/// <remarks>Runs one learning iteration and updates neuron's
/// weights.
/// 运行一次学习迭代并更新神经元的权重、阀值.
/// 学习完一个样本,就对权重、阀值进行更新,这叫 实例更新。
/// 理论上,后向传播的数学推导使用周期更新,实践中使用 实例更新
/// </remarks>
///
public double Run( double[] input, double[] output )
{
// compute the network's output
// 计算网络输出
network.Compute( input );
// calculate network error
// 计算网络误差
double error = CalculateError( output );
// calculate weights updates
// 计算权重更新
CalculateUpdates( input );
// update the network
// 更新网络权重、阀值
UpdateNetwork( );
return error;
}
/// <summary>
/// Runs learning epoch
/// 运行学习迭代一个周期
/// </summary>
///
/// <param name="input">array of input vectors 输入数组</param>
/// <param name="output">array of output vectors 输出数组</param>
///
/// <returns>Returns sum of squared errors of the last layer divided by 2 返回最后一层的误差平方差除以2</returns>
///
/// <remarks>Runs series of learning iterations - one iteration
/// for each input sample. Updates neuron's weights after each sample
/// presented.
/// 运行一个周期(所有训练样本)的学习迭代-一次迭代一个训练样本.在每一次迭代(每一个训练样本)后更新神经元权重
/// </remarks>
///
public double RunEpoch( double[][] input, double[][] output )
{
double error = 0.0;
// run learning procedure for all samples
//
for ( int i = 0, n = input.Length; i < n; i++ )
{
error += Run( input[i], output[i] );
}
// return summary error
return error;
}
/// <summary>
/// Calculates error values for all neurons of the network
/// 计算网络中所有神经元的误差值
/// </summary>
///
/// <param name="desiredOutput">Desired output vector 期望输出数组</param>
///
/// <returns>Returns summary squared error of the last layer divided by 2</returns>
///
private double CalculateError( double[] desiredOutput )
{
// current and the next layers
// 当前和下一个层
ActivationLayer layer, layerNext;
// current and the next errors arrays
// 当前层和下一层的误差数组
double[] errors, errorsNext;
// error values
// 误差值
double error = 0, e, sum;
// neuron's output value
// 神经元的输出值
double output;
// layers count
// 神经元层的数目
int layersCount = network.LayersCount;
// assume, that all neurons of the network have the same activation function
// 假设,网络的所有神经元有相同的激活函数
IActivationFunction function = network[0][0].ActivationFunction;
// calculate error values for the last layer first
// 首先计算最后一层(输出层)的误差值
layer = network[layersCount - 1];
errors = neuronErrors[layersCount - 1];
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
output = layer[i].Output;
// error of the neuron
// 神经元的误差
e = desiredOutput[i] - output;
// error multiplied with activation function's derivative
// 第i个神经元的误差*导函数
errors[i] = e * function.Derivative2( output );
// squre the error and sum it
// 误差平方累加和
error += ( e * e );
}
// calculate error values for other layers
// 计算其他层(除输出层以外)的误差
for ( int j = layersCount - 2; j >= 0; j-- )
{
layer = network[j];
layerNext = network[j + 1];
errors = neuronErrors[j];
errorsNext = neuronErrors[j + 1];
// for all neurons of the layer
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
sum = 0.0;
// for all neurons of the next layer
for ( int k = 0, m = layerNext.NeuronsCount; k < m; k++ )
{
sum += errorsNext[k] * layerNext[k][i];
}
errors[i] = sum * function.Derivative2( layer[i].Output );
}
}
// return squared error of the last layer divided by 2
return error / 2.0;
}
/// <summary>
/// Calculate weights updates
/// 权重更新
/// </summary>
///
/// <param name="input">Network's input vector</param>
///
private void CalculateUpdates( double[] input )
{
// current neuron
ActivationNeuron neuron;
// current and previous layers
ActivationLayer layer, layerPrev;
// layer's weights updates
double[][] layerWeightsUpdates;
// layer's thresholds updates
double[] layerThresholdUpdates;
// layer's error
double[] errors;
// neuron's weights updates
double[] neuronWeightUpdates;
// error value
double error;
// 1 - calculate updates for the last layer fisrt
layer = network[0];
errors = neuronErrors[0];
layerWeightsUpdates = weightsUpdates[0];
layerThresholdUpdates = thresholdsUpdates[0];
// for each neuron of the layer
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
neuron = layer[i];
error = errors[i];
neuronWeightUpdates = layerWeightsUpdates[i];
// for each weight of the neuron
for ( int j = 0, m = neuron.InputsCount; j < m; j++ )
{
// calculate weight update
neuronWeightUpdates[j] = learningRate * (
momentum * neuronWeightUpdates[j] +
( 1.0 - momentum ) * error * input[j]
);
}
// calculate treshold update
layerThresholdUpdates[i] = learningRate * (
momentum * layerThresholdUpdates[i] +
( 1.0 - momentum ) * error
);
}
// 2 - for all other layers
for ( int k = 1, l = network.LayersCount; k < l; k++ )
{
layerPrev = network[k - 1];
layer = network[k];
errors = neuronErrors[k];
layerWeightsUpdates = weightsUpdates[k];
layerThresholdUpdates = thresholdsUpdates[k];
// for each neuron of the layer
for ( int i = 0, n = layer.NeuronsCount; i < n; i++ )
{
neuron = layer[i];
error = errors[i];
neuronWeightUpdates = layerWeightsUpdates[i];
// for each synapse of the neuron
for ( int j = 0, m = neuron.InputsCount; j < m; j++ )
{
// calculate weight update
neuronWeightUpdates[j] = learningRate * (
momentum * neuronWeightUpdates[j] +
( 1.0 - momentum ) * error * layerPrev[j].Output
);
}
// calculate treshold update
layerThresholdUpdates[i] = learningRate * (
momentum * layerThresholdUpdates[i] +
( 1.0 - momentum ) * error
);
}
}
}
/// <summary>
/// Update network'sweights
/// </summary>
///
private void UpdateNetwork( )
{
// current neuron
ActivationNeuron neuron;
// current layer
ActivationLayer layer;
// layer's weights updates
double[][] layerWeightsUpdates;
// layer's thresholds updates
double[] layerThresholdUpdates;
// neuron's weights updates
double[] neuronWeightUpdates;
// for each layer of the network
for ( int i = 0, n = network.LayersCount; i < n; i++ )
{
layer = network[i];
layerWeightsUpdates = weightsUpdates[i];
layerThresholdUpdates = thresholdsUpdates[i];
// for each neuron of the layer
for ( int j = 0, m = layer.NeuronsCount; j < m; j++ )
{
neuron = layer[j];
neuronWeightUpdates = layerWeightsUpdates[j];
// for each weight of the neuron
for ( int k = 0, s = neuron.InputsCount; k < s; k++ )
{
// update weight
neuron[k] += neuronWeightUpdates[k];
}
// update treshold
neuron.Threshold += layerThresholdUpdates[j];
}
}
}
}
}