已知分布的抽样

本文主要是根据MC随机抽样思想，进行已知分布的抽样，对于数据分析有用，主要做如下几个版本

C++
MATLAB
C#
PYTHON
C

C++版本的主要代码为

（1）数据部分，概率密度分布

const double energy[210]={21.000000, 22.000000, 23.000000, 24.000000, 25.000000, 26.000000, 27.000000, 28.000000, 29.000000, 30.000000, 31.000000, 32.000000, 33.000000, 34.000000, 35.000000, 36.000000, 37.000000, 38.000000, 39.000000, 40.000000, 41.000000, 42.000000, 43.000000, 44.000000, 45.000000, 46.000000, 47.000000, 48.000000, 49.000000, 50.000000, 51.000000, 52.000000, 53.000000, 54.000000, 55.000000, 56.000000, 57.000000, 58.000000, 59.000000, 60.000000};
const double probability[210]={ 0.003172, 0.001603, 0.003484, 0.000000, 0.000365, 0.003666, 0.001642, 0.005879, 0.005202, 0.022721, 0.027685, 0.056204, 0.075032, 0.118366, 0.163304, 0.215736, 0.283005, 0.343148, 0.422711, 0.494946, 0.574868, 0.652534, 0.717833, 0.794975, 0.842019, 0.909445, 0.924232, 0.986227, 0.974940, 1.000000, 0.964995, 0.968823, 0.896359, 0.866523, 0.761573, 0.687310, 0.557857, 0.420584, 0.248464, 0.038391};

（2）归一化概率密度分布并积分得到分布函数

            for (int x = 1; x < 210; x++)
            {
                sump += probability[x];
            }
            pp[0] = probability[0];
            for (int x = 1; x < 210; x++)
            { pp[x] = pp[x - 1] + probability[x]; }

（3）抽样函数

 1         double IncidentGammaEnergy()
 2         {
 3             double rand=G4UniformRand();
 4             double sum=pp[0];
 5             double pre_sum=0.;
 6             int p_size = sizeof(probability)/sizeof(probability[0]);
 7             for(int k=0;k<p_size-1;k++)
 8             {
 9                 if ((rand>pre_sum) && (rand<=sum))
10                 {
11                     return energy[k]+(energy[k+1]-energy[k])*(rand-pre_sum)*(sum-pre_sum);
12                 }
13                 pre_sum = sum;
14                 sum=pp[k+1];
15             }
16             return energy[p_size-1];
17         }

matlab实现的代码为

clear
clc

% data name is cs137.mat,it has sub var x and y
load cs137
x=x;
y=y;

% normalize
y=y/sum(y);
n=length(x);
% fenbuhanshu
pp=zeros(n,1);
for i=1:n-1
    pp(i+1)=pp(i)+y(i+1);
end

% sample now
pre_sum=0;
sumup=pp(1);
m=100000;
rand_number=rand(m,1);
out=zeros(m,1);
for j=1:m
    for i=1:n-1
        if(rand_number(j)>pre_sum)&&(rand_number(j)<=sumup)
             out(j)=x(i);
        end
        pre_sum=sumup;
        sumup=pp(i+1);
    end
    out=out';
end

% plot and compare
[histvalue,nbins]=hist(out,1:1:n);
histvalue=histvalue';
plot(nbins,histvalue/sum(histvalue))
hold on
plot(x,y,'r')
hold off
legend('抽样结果','原始数据')

抽样100000次（m=100000）结果为

m=10000

m=1000;

C#实现

using MathNet.Numerics.LinearAlgebra;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.IO;

namespace array_sample
{
    internal class Program
    {
        private static void Main(string[] args)
        {
            var spectest = loadspec("spectest");
            var result = IncidentGammaEnergy(spectest, 100000);
            int n = spectest.Length;
            //int n = 200;
            double[] histallll = new double[n];
            var histogram = new Histogram(result, n);
            for (int i = 0; i < histallll.Length; i++)
            {
                histallll[i] = histogram[i].Count;
            }

            //save_data(histallll, "test.txt");

            save_data(histallll, "test.txt");
            //foreach (double test in histallll)
            //{
            //    Console.WriteLine(test);
            //}
            //Console.WriteLine(histogram);
        }

        private static double[] IncidentGammaEnergy(double[] probability, int cishu)
        {
            double sump = 0;
            double[] pp = new double[probability.Length];
            double[] energy = new double[probability.Length];

            for (int x = 0; x < probability.Length; x++)
            { sump += probability[x]; }
            //double pp[210];
            pp[0] = probability[0] / sump;
            energy[0] = 0;
            for (int x = 1; x < probability.Length; x++)
            {
                pp[x] = pp[x - 1] + probability[x] / sump;
                energy[x] = x;
            }

            double sum = pp[0];
            double pre_sum = 0.0;
            int p_size = probability.Length;
            double[] result = new double[cishu];
            var randoms = GenerateRandom(cishu);
            for (int i = 0; i < cishu; i++)
            {
                //double rand = new Random().NextDouble();
                //long tick = DateTime.Now.Ticks;
                //Random ran = new Random((int)(tick & 0xffffffffL) | (int)(tick >> 32));
                //double rand = ran.NextDouble();
                //Console.WriteLine(rand);
                var rand = randoms[i];
                //Console.WriteLine(rand);
                for (int k = 0; k < p_size - 1; k++)
                {
                    if ((rand > pre_sum) && (rand <= sum))
                    {
                        result[i] = energy[k];// +(energy[k + 1] - energy[k]) * (rand - pre_sum) * (sum - pre_sum);
                    }
                    pre_sum = sum;
                    sum = pp[k + 1];
                }
                //result[i] = energy[p_size - 1];
            }
            return result;
        }

        private static List<double> GenerateRandom(int iNum)
        {
            long lTick = DateTime.Now.Ticks;
            List<double> lstRet = new List<double>();
            for (int i = 0; i < iNum; i++)
            {
                Random ran = new Random((int)lTick * i);
                double iTmp = ran.NextDouble();
                lstRet.Add(iTmp);
                lTick += (new Random((int)lTick).Next(978));
            }
            return lstRet;
        }

        private static double[] loadspec(string specname)
        {
            var M = Matrix<double>.Build;
            var V = Vector<double>.Build;

            string[] content = File.ReadAllLines(@specname);
            double[] yvalue = new double[content.Length];
            //double[] axesx = new double[content.Length];

            for (int i = 1; i < content.Length; i++)
            {
                //axesx[i] = i;
                yvalue[i] = Convert.ToDouble(content[i]);
            }
            return yvalue;
        }

        private static void save_data(double[] content, string str)
        {
            FileStream fs = new FileStream(@str, FileMode.Create);
            StreamWriter sw = new StreamWriter(fs);

            for (int i = 1; i < content.Length; i++)
            {
                sw.WriteLine(Convert.ToDouble(content[i]));//Convert.ToString(hist)
            }
            sw.Close();
            sw.Dispose();
            sw = null;
        }

        private static void save_data(Histogram content, string str)
        {
            FileStream fs = new FileStream(@str, FileMode.Create);
            StreamWriter sw = new StreamWriter(fs);

            sw.Write(content);//Convert.ToString(hist)

            sw.Close();
            sw.Dispose();
            sw = null;
        }
    }
}

　　其中遇到的主要问题是随机数的问题，跟MATLAB不同，他生成的随机数有很多重复，这是种子决定的，也就是时间变化赶不上生成的速度，于是查找资料，参考改写了种子，而且只能提前生成随机数。没解决的问题是histgram函数后面指定lower bound 和upper bound都有问题，所以只能让他自动确定了。其实可以自己写一个hist函数，参考引用4或5即可。

2016-12-30 01:24:05

明天写一下python实现，还没安装python，囧~突然发现有安装winpython，只是以前安装的，升级WIN10的1601版本，由于是全新升级，导致各种path路径都没了。。。

python实现

基本思路是使用random.choice函数。首先将数据读入，简单点先做数组好了，复杂点就读取文件，将数组中元素采用*n的形式搞定，n指的是纵坐标，需要为整数。直接choice函数抽样，采用循环，设定不同次数，然后hist结果，最后画图。具体实现如下

按照以上思路果然实现了（2016-12-31，03:18:05）

# -*- coding: utf-8 -*-
import numpy as np
import random as rd
import matplotlib.pyplot as plt
import matplotlib
zhfont1 = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc')

#load data from file
bb=np.loadtxt('spectest')
cc=bb.copy()
n=len(bb)
aa=range(n)

#sum all yvalue
test=0
for k in range(len(bb)):
    test += bb[k]

#creat an array with yvalues' xvalue
out = [0]*test
j=0
for k in range(len(aa)):
    while bb[k]!=0:
        out[j]= aa[k]
        bb[k]=bb[k]-1
        j=j+1

#sample
cishu=100000
count=0
result=[0]*cishu
while count<cishu:
    result[count]=rd.choice(out)
    count=count+1

#hist it
histout=np.histogram(result, bins=range(1025))
#save data
np.savetxt('result.txt',histout[0])
np.savetxt('cc.txt',cc)
    
#plot data
fig = plt.figure(1)
x = range(1024)
plt.plot(x,histout[0]/cishu,'m:',label="sampled data")
plt.plot(x,cc/test,'k-',label="raw data")
plt.xlabel('道址',fontproperties=zhfont1)
plt.ylabel('计数',fontproperties=zhfont1)
plt.legend(loc = 'upper left')
plt.show()
fig.savefig("test.png")

中间遇到一些波折，这是python自己写的第一个程序吧，，，其中的一个波折是数组长度搞错了，test应该用第一列的纵坐标之和不小心用成了第三列x.*y之和，如下图

检查了下，发现c#中写入文件后没有fs.close，特此标记下。

C的将来有时间再说了，或者谁有兴趣给做个也好啊，C还是不够熟悉哦，虽然大学只是学了C。。。

参考文献：

1、许淑艳老师的《蒙特卡罗方法在实验核物理中的应用》的第三章《由已知分布的随机抽样》

2、C#产生一组不重复随机数的两种方法

3、MATH.NET

4、msdn的Random.NextDouble Method ()

5、JasenKin的《算法--区间数据计算》

posted @ 2016-12-28 21:56 qiangges2017 阅读(1427) 评论(0) 编辑收藏举报

刷新页面返回顶部

qiangges2017

用python,php,apache,mysql,c#,c++,c,labview,matlab,ROOT,geant4,mcnp,egs,Gate等软件、程序、工具包或语言

已知分布的抽样

C++版本的主要代码为

matlab实现的代码为

python实现

公告