已知分布的抽样
本文主要是根据MC随机抽样思想,进行已知分布的抽样,对于数据分析有用,主要做如下几个版本
- C++
- MATLAB
- C#
- PYTHON
- C
C++版本的主要代码为
(1)数据部分,概率密度分布
const double energy[210]={21.000000, 22.000000, 23.000000, 24.000000, 25.000000, 26.000000, 27.000000, 28.000000, 29.000000, 30.000000, 31.000000, 32.000000, 33.000000, 34.000000, 35.000000, 36.000000, 37.000000, 38.000000, 39.000000, 40.000000, 41.000000, 42.000000, 43.000000, 44.000000, 45.000000, 46.000000, 47.000000, 48.000000, 49.000000, 50.000000, 51.000000, 52.000000, 53.000000, 54.000000, 55.000000, 56.000000, 57.000000, 58.000000, 59.000000, 60.000000}; const double probability[210]={ 0.003172, 0.001603, 0.003484, 0.000000, 0.000365, 0.003666, 0.001642, 0.005879, 0.005202, 0.022721, 0.027685, 0.056204, 0.075032, 0.118366, 0.163304, 0.215736, 0.283005, 0.343148, 0.422711, 0.494946, 0.574868, 0.652534, 0.717833, 0.794975, 0.842019, 0.909445, 0.924232, 0.986227, 0.974940, 1.000000, 0.964995, 0.968823, 0.896359, 0.866523, 0.761573, 0.687310, 0.557857, 0.420584, 0.248464, 0.038391};
(2)归一化概率密度分布并积分得到分布函数
for (int x = 1; x < 210; x++) { sump += probability[x]; } pp[0] = probability[0]; for (int x = 1; x < 210; x++) { pp[x] = pp[x - 1] + probability[x]; }
(3)抽样函数
1 double IncidentGammaEnergy() 2 { 3 double rand=G4UniformRand(); 4 double sum=pp[0]; 5 double pre_sum=0.; 6 int p_size = sizeof(probability)/sizeof(probability[0]); 7 for(int k=0;k<p_size-1;k++) 8 { 9 if ((rand>pre_sum) && (rand<=sum)) 10 { 11 return energy[k]+(energy[k+1]-energy[k])*(rand-pre_sum)*(sum-pre_sum); 12 } 13 pre_sum = sum; 14 sum=pp[k+1]; 15 } 16 return energy[p_size-1]; 17 }
matlab实现的代码为
clear clc
% data name is cs137.mat,it has sub var x and y load cs137 x=x; y=y; % normalize y=y/sum(y); n=length(x); % fenbuhanshu pp=zeros(n,1); for i=1:n-1 pp(i+1)=pp(i)+y(i+1); end % sample now pre_sum=0; sumup=pp(1); m=100000; rand_number=rand(m,1); out=zeros(m,1); for j=1:m for i=1:n-1 if(rand_number(j)>pre_sum)&&(rand_number(j)<=sumup) out(j)=x(i); end pre_sum=sumup; sumup=pp(i+1); end out=out'; end % plot and compare [histvalue,nbins]=hist(out,1:1:n); histvalue=histvalue'; plot(nbins,histvalue/sum(histvalue)) hold on plot(x,y,'r') hold off legend('抽样结果','原始数据')
抽样100000次(m=100000)结果为
m=10000
m=1000;
C#实现
using MathNet.Numerics.LinearAlgebra; using MathNet.Numerics.Statistics; using System; using System.Collections.Generic; using System.IO; namespace array_sample { internal class Program { private static void Main(string[] args) { var spectest = loadspec("spectest"); var result = IncidentGammaEnergy(spectest, 100000); int n = spectest.Length; //int n = 200; double[] histallll = new double[n]; var histogram = new Histogram(result, n); for (int i = 0; i < histallll.Length; i++) { histallll[i] = histogram[i].Count; } //save_data(histallll, "test.txt"); save_data(histallll, "test.txt"); //foreach (double test in histallll) //{ // Console.WriteLine(test); //} //Console.WriteLine(histogram); } private static double[] IncidentGammaEnergy(double[] probability, int cishu) { double sump = 0; double[] pp = new double[probability.Length]; double[] energy = new double[probability.Length]; for (int x = 0; x < probability.Length; x++) { sump += probability[x]; } //double pp[210]; pp[0] = probability[0] / sump; energy[0] = 0; for (int x = 1; x < probability.Length; x++) { pp[x] = pp[x - 1] + probability[x] / sump; energy[x] = x; } double sum = pp[0]; double pre_sum = 0.0; int p_size = probability.Length; double[] result = new double[cishu]; var randoms = GenerateRandom(cishu); for (int i = 0; i < cishu; i++) { //double rand = new Random().NextDouble(); //long tick = DateTime.Now.Ticks; //Random ran = new Random((int)(tick & 0xffffffffL) | (int)(tick >> 32)); //double rand = ran.NextDouble(); //Console.WriteLine(rand); var rand = randoms[i]; //Console.WriteLine(rand); for (int k = 0; k < p_size - 1; k++) { if ((rand > pre_sum) && (rand <= sum)) { result[i] = energy[k];// +(energy[k + 1] - energy[k]) * (rand - pre_sum) * (sum - pre_sum); } pre_sum = sum; sum = pp[k + 1]; } //result[i] = energy[p_size - 1]; } return result; } private static List<double> GenerateRandom(int iNum) { long lTick = DateTime.Now.Ticks; List<double> lstRet = new List<double>(); for (int i = 0; i < iNum; i++) { Random ran = new Random((int)lTick * i); double iTmp = ran.NextDouble(); lstRet.Add(iTmp); lTick += (new Random((int)lTick).Next(978)); } return lstRet; } private static double[] loadspec(string specname) { var M = Matrix<double>.Build; var V = Vector<double>.Build; string[] content = File.ReadAllLines(@specname); double[] yvalue = new double[content.Length]; //double[] axesx = new double[content.Length]; for (int i = 1; i < content.Length; i++) { //axesx[i] = i; yvalue[i] = Convert.ToDouble(content[i]); } return yvalue; } private static void save_data(double[] content, string str) { FileStream fs = new FileStream(@str, FileMode.Create); StreamWriter sw = new StreamWriter(fs); for (int i = 1; i < content.Length; i++) { sw.WriteLine(Convert.ToDouble(content[i]));//Convert.ToString(hist) } sw.Close(); sw.Dispose(); sw = null; } private static void save_data(Histogram content, string str) { FileStream fs = new FileStream(@str, FileMode.Create); StreamWriter sw = new StreamWriter(fs); sw.Write(content);//Convert.ToString(hist) sw.Close(); sw.Dispose(); sw = null; } } }
其中遇到的主要问题是随机数的问题,跟MATLAB不同,他生成的随机数有很多重复,这是种子决定的,也就是时间变化赶不上生成的速度,于是查找资料,参考改写了种子,而且只能提前生成随机数。没解决的问题是histgram函数后面指定lower bound 和upper bound都有问题,所以只能让他自动确定了。其实可以自己写一个hist函数,参考引用4或5即可。
2016-12-30 01:24:05
明天写一下python实现,还没安装python,囧~突然发现有安装winpython,只是以前安装的,升级WIN10的1601版本,由于是全新升级,导致各种path路径都没了。。。
python实现
基本思路是使用random.choice函数。首先将数据读入,简单点先做数组好了,复杂点就读取文件,将数组中元素采用*n的形式搞定,n指的是纵坐标,需要为整数。直接choice函数抽样,采用循环,设定不同次数,然后hist结果,最后画图。具体实现如下
按照以上思路果然实现了(2016-12-31,03:18:05)
# -*- coding: utf-8 -*- import numpy as np import random as rd import matplotlib.pyplot as plt import matplotlib zhfont1 = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc') #load data from file bb=np.loadtxt('spectest') cc=bb.copy() n=len(bb) aa=range(n) #sum all yvalue test=0 for k in range(len(bb)): test += bb[k] #creat an array with yvalues' xvalue out = [0]*test j=0 for k in range(len(aa)): while bb[k]!=0: out[j]= aa[k] bb[k]=bb[k]-1 j=j+1 #sample cishu=100000 count=0 result=[0]*cishu while count<cishu: result[count]=rd.choice(out) count=count+1 #hist it histout=np.histogram(result, bins=range(1025)) #save data np.savetxt('result.txt',histout[0]) np.savetxt('cc.txt',cc) #plot data fig = plt.figure(1) x = range(1024) plt.plot(x,histout[0]/cishu,'m:',label="sampled data") plt.plot(x,cc/test,'k-',label="raw data") plt.xlabel('道址',fontproperties=zhfont1) plt.ylabel('计数',fontproperties=zhfont1) plt.legend(loc = 'upper left') plt.show() fig.savefig("test.png")
中间遇到一些波折,这是python自己写的第一个程序吧,,,其中的一个波折是数组长度搞错了,test应该用第一列的纵坐标之和不小心用成了第三列x.*y之和,如下图
检查了下,发现c#中写入文件后没有fs.close,特此标记下。
C的将来有时间再说了,或者谁有兴趣给做个也好啊,C还是不够熟悉哦,虽然大学只是学了C。。。
参考文献:
1、许淑艳老师的《蒙特卡罗方法在实验核物理中的应用》的第三章《由已知分布的随机抽样》
2、C#产生一组不重复随机数的两种方法
3、MATH.NET
4、msdn的Random.NextDouble Method ()
5、JasenKin的《算法--区间数据计算》