若随机变量X服从一个数学期望为μ、方差为σ^2的高斯分布。记为N(μ。σ^2)。
其概率密度函数为正态分布的期望值μ决定了其位置,其标准差σ决定了分布的幅度。
我们通常所说的标准正态分布是μ
= 0,σ = 1的正态分布。
从上图能够看出,当相差1个方差(σ), 满足要求的面积有68.27%.
当相差2个方差(σ)时,满足要求的面积有95.45.
当相差3个方差(σ)时,满足要求的面积有99.73%.
满足标准正态分的曲线,能够查表来求得正态分布的幅度.(见文后所附表格)
方差(Variance),是各个数据分别与其和的平均数之差的平方的和的平均数。用字母D表示。在概率论和数理统计中,方差用来度量随机变量和其数学期望(即均值)之间的偏离程度。
标准差(StandardDeviation)。是离均差平方和平均后的方根,用σ表示。标准差是方差的算术平方根。
标准差能反映一个数据集的离散程度。
測试代码:
// NormalDistribution.cpp : Defines the entry point for the console application. // #include <stdio.h> #include <tchar.h> #include <iostream> #include <windows.h> #include <algorithm> #define _USE_MATH_DEFINES #include <math.h> using namespace std; // 高斯分布随机数系列,默认期望值为0,方差为1 double GaussRand(double dExpect = 0, double dVariance = 1); double GaussRand(double dExpect, double dVariance) { static double V1, V2, S; static int phase = 0; double X; if ( phase == 0 ) { do { double U1 = (double)rand() / RAND_MAX; double U2 = (double)rand() / RAND_MAX; V1 = 2 * U1 - 1; V2 = 2 * U2 - 1; S = V1 * V1 + V2 * V2; } while(S >= 1 || S == 0); X = V1 * sqrt(-2 * log(S) / S); } else { X = V2 * sqrt(-2 * log(S) / S); } phase = 1 - phase; return (X*dVariance + dExpect); } int _tmain(int argc, _TCHAR* argv[]) { const int DATA_CNT = 100000; double dArrData[DATA_CNT] = {0}; double dSum = 0; // 对全部数赋随机数,默认期望值为0,方差为1 srand(GetTickCount()); for (int nIdx = 0; nIdx < DATA_CNT; nIdx++) { // 防止计算方差时数值过大 dArrData[nIdx] = GaussRand(); dSum += dArrData[nIdx]; } // 求平均数 double dAverageData = dSum / DATA_CNT; // 计算全部的数的方差(各个数据分别与其和的平均数之差的平方的和的平均数) double dVariance = 0.0; for (int nIdx = 0; nIdx < DATA_CNT; nIdx++) { double dDeviate = dArrData[nIdx] - dAverageData; dVariance += pow(dDeviate, 2); } dVariance /= DATA_CNT; // 计算标准差(方差的算术平方根,反映一组数据的离散程序) double dStandardDeviation = sqrt(dVariance); // 计算0.5个正负标准差之间包括的数字个数 int nDataCnt = 0; for (int nIdx = 0; nIdx < DATA_CNT; nIdx++) { double dDeviate = dArrData[nIdx] - dAverageData; if (abs(dDeviate) <= 0.5*dStandardDeviation) { nDataCnt++; } } cout<<nDataCnt<<endl; // 计算1个正负标准差之间包括的数字个数 nDataCnt = 0; for (int nIdx = 0; nIdx < DATA_CNT; nIdx++) { double dDeviate = dArrData[nIdx] - dAverageData; if (abs(dDeviate) <= dStandardDeviation) { nDataCnt++; } } cout<<nDataCnt<<endl; // 计算2个正负标准差之间包括的数字个数 nDataCnt = 0; for (int nIdx = 0; nIdx < DATA_CNT; nIdx++) { double dDeviate = dArrData[nIdx] - dAverageData; if (abs(dDeviate) <= 2*dStandardDeviation) { nDataCnt++; } } cout<<nDataCnt<<endl; // 计算3个正负标准差之间包括的数字个数 nDataCnt = 0; for (int nIdx = 0; nIdx < DATA_CNT; nIdx++) { double dDeviate = dArrData[nIdx] - dAverageData; if (abs(dDeviate) <= 3*dStandardDeviation) { nDataCnt++; } } cout<<nDataCnt<<endl; return 0; }
(附)标准正态分布表
φ( - x ) = 1 –φ( x )
x |
0 |
0.01 |
0.02 |
0.03 |
0.04 |
0.05 |
0.06 |
0.07 |
0.08 |
0.09 |
0 |
0.500 0 |
0.504 0 |
0.508 0 |
0.512 0 |
0.516 0 |
0.519 9 |
0.523 9 |
0.527 9 |
0.531 9 |
0.535 9 |
0.1 |
0.539 8 |
0.543 8 |
0.547 8 |
0.551 7 |
0.555 7 |
0.559 6 |
0.563 6 |
0.567 5 |
0.571 4 |
0.575 3 |
0.2 |
0.579 3 |
0.583 2 |
0.587 1 |
0.591 0 |
0.594 8 |
0.598 7 |
0.602 6 |
0.606 4 |
0.610 3 |
0.614 1 |
0.3 |
0.617 9 |
0.621 7 |
0.625 5 |
0.629 3 |
0.633 1 |
0.636 8 |
0.640 4 |
0.644 3 |
0.648 0 |
0.651 7 |
0.4 |
0.655 4 |
0.659 1 |
0.662 8 |
0.666 4 |
0.670 0 |
0.673 6 |
0.677 2 |
0.680 8 |
0.684 4 |
0.687 9 |
0.5 |
0.691 5 |
0.695 0 |
0.698 5 |
0.701 9 |
0.705 4 |
0.708 8 |
0.712 3 |
0.715 7 |
0.719 0 |
0.722 4 |
0.6 |
0.725 7 |
0.729 1 |
0.732 4 |
0.735 7 |
0.738 9 |
0.742 2 |
0.745 4 |
0.748 6 |
0.751 7 |
0.754 9 |
0.7 |
0.758 0 |
0.761 1 |
0.764 2 |
0.767 3 |
0.770 3 |
0.773 4 |
0.776 4 |
0.779 4 |
0.782 3 |
0.785 2 |
0.8 |
0.788 1 |
0.791 0 |
0.793 9 |
0.796 7 |
0.799 5 |
0.802 3 |
0.805 1 |
0.807 8 |
0.810 6 |
0.813 3 |
0.9 |
0.815 9 |
0.818 6 |
0.821 2 |
0.823 8 |
0.826 4 |
0.828 9 |
0.835 5 |
0.834 0 |
0.836 5 |
0.838 9 |
1 |
0.841 3 |
0.843 8 |
0.846 1 |
0.848 5 |
0.850 8 |
0.853 1 |
0.855 4 |
0.857 7 |
0.859 9 |
0.862 1 |
1.1 |
0.864 3 |
0.866 5 |
0.868 6 |
0.870 8 |
0.872 9 |
0.874 9 |
0.877 0 |
0.879 0 |
0.881 0 |
0.883 0 |
1.2 |
0.884 9 |
0.886 9 |
0.888 8 |
0.890 7 |
0.892 5 |
0.894 4 |
0.896 2 |
0.898 0 |
0.899 7 |
0.901 5 |
1.3 |
0.903 2 |
0.904 9 |
0.906 6 |
0.908 2 |
0.909 9 |
0.911 5 |
0.913 1 |
0.914 7 |
0.916 2 |
0.917 7 |
1.4 |
0.919 2 |
0.920 7 |
0.922 2 |
0.923 6 |
0.925 1 |
0.926 5 |
0.927 9 |
0.929 2 |
0.930 6 |
0.931 9 |
1.5 |
0.933 2 |
0.934 5 |
0.935 7 |
0.937 0 |
0.938 2 |
0.939 4 |
0.940 6 |
0.941 8 |
0.943 0 |
0.944 1 |
1.6 |
0.945 2 |
0.946 3 |
0.947 4 |
0.948 4 |
0.949 5 |
0.950 5 |
0.951 5 |
0.952 5 |
0.953 5 |
0.953 5 |
1.7 |
0.955 4 |
0.956 4 |
0.957 3 |
0.958 2 |
0.959 1 |
0.959 9 |
0.960 8 |
0.961 6 |
0.962 5 |
0.963 3 |
1.8 |
0.964 1 |
0.964 8 |
0.965 6 |
0.966 4 |
0.967 2 |
0.967 8 |
0.968 6 |
0.969 3 |
0.970 0 |
0.970 6 |
1.9 |
0.971 3 |
0.971 9 |
0.972 6 |
0.973 2 |
0.973 8 |
0.974 4 |
0.975 0 |
0.975 6 |
0.976 2 |
0.976 7 |
2 |
0.977 2 |
0.977 8 |
0.978 3 |
0.978 8 |
0.979 3 |
0.979 8 |
0.980 3 |
0.980 8 |
0.981 2 |
0.981 7 |
2.1 |
0.982 1 |
0.982 6 |
0.983 0 |
0.983 4 |
0.983 8 |
0.984 2 |
0.984 6 |
0.985 0 |
0.985 4 |
0.985 7 |
2.2 |
0.986 1 |
0.986 4 |
0.986 8 |
0.987 1 |
0.987 4 |
0.987 8 |
0.988 1 |
0.988 4 |
0.988 7 |
0.989 0 |
2.3 |
0.989 3 |
0.989 6 |
0.989 8 |
0.990 1 |
0.990 4 |
0.990 6 |
0.990 9 |
0.991 1 |
0.991 3 |
0.991 6 |
2.4 |
0.991 8 |
0.992 0 |
0.992 2 |
0.992 5 |
0.992 7 |
0.992 9 |
0.993 1 |
0.993 2 |
0.993 4 |
0.993 6 |
2.5 |
0.993 8 |
0.994 0 |
0.994 1 |
0.994 3 |
0.994 5 |
0.994 6 |
0.994 8 |
0.994 9 |
0.995 1 |
0.995 2 |
2.6 |
0.995 3 |
0.995 5 |
0.995 6 |
0.995 7 |
0.995 9 |
0.996 0 |
0.996 1 |
0.996 2 |
0.996 3 |
0.996 4 |
2.7 |
0.996 5 |
0.996 6 |
0.996 7 |
0.996 8 |
0.996 9 |
0.997 0 |
0.997 1 |
0.997 2 |
0.997 3 |
0.997 4 |
2.8 |
0.997 4 |
0.997 5 |
0.997 6 |
0.997 7 |
0.997 7 |
0.997 8 |
0.997 9 |
0.997 9 |
0.998 0 |
0.998 1 |
2.9 |
0.998 1 |
0.998 2 |
0.998 2 |
0.998 3 |
0.998 4 |
0.998 4 |
0.998 5 |
0.998 5 |
0.998 6 |
0.998 6 |
x |
0 |
0.1 |
0.2 |
0.3 |
0.4 |
0.5 |
0.6 |
0.7 |
0.8 |
0.9 |
3 |
0.998 7 |
0.999 0 |
0.999 3 |
0.999 5 |
0.999 7 |
0.999 8 |
0.999 8 |
0.999 9 |
0.999 9 |
1.000 0 |
(附)正态分布概率表
Φ( u ) =