Hadoop实例之Java代码实现利用MapReduce求π值
需求:假如有一个边长为1的正方形。以正方形的一个端点为圆心,以1为半径,画一个圆弧,于是在正方形内就有了一个直角扇形。在正方形里随机生成若干的点,则有些点是在扇形内,有些点是在扇形外。正方形的面积是1,扇形的面积是0.25*Pi。设点的数量一共是n,扇形内的点数量是nc,在点足够多足够密集的情况下,会近似有nc/n的比值约等于扇形面积与正方形面积的比值,也就是nc/n= 0.25*Pi/1,即Pi = 4*nc/n。
首先是随机生成点的问题,利用Halton序列算法随机生成的样本点十分均匀,计算精度较高,效果比较好。
下面是网上找到的一个利用Halton序列算法随机生成的样本点的代码:
public class Pi { static int digit = 40; private int[] bases= new int[2]; private double[] baseDigit = new double[2]; private double[][] background = new double[2][digit]; private long index; Pi(int[] base) { bases = base.clone(); index = 0; for(int i=0; i<bases.length; i++) { double b = 1.0/bases[i]; baseDigit[i] = b; for(int j=0; j<digit; j++) { background[i][j] = j == 0 ? b : background[i][j-1]*b; } } } double[] getNext() { index++; double[] result = {0,0}; for(int i=0; i<bases.length; i++) { long num = index; int j = 0; while(num != 0) { result[i] += num % bases[i] * background[i][j++]; num /= bases[i]; } } return result; } public static void main(String[] args) { int[] base = {2,5}; Pi test = new Pi(base); for(int x = 0; x < 100; x++){ double[] t = test.getNext(); System.out.println(t[0] + "\t" + t[1]); } } }
下面是计算π值的代码:
package mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import mapreduce.Pi;//下面生成随机数的时候需要这个类,该类即上面那部分代码 /** * * @author sakura * 2019.9.3 * 利用MapReduce计算π值 * */ public class CalPI { public static class PiMapper extends Mapper<Object, Text, Text, IntWritable>{ int number=0; //定义一个变量,用来存放一共生成的点数 //读取文件,每一行都是一个map 本程序读取的文件为十行,每行都是100000 public void map(Object key, Text value, Context context) throws IOException, InterruptedException { int pointNum = Integer.parseInt(value.toString());//将读取到的那一行赋值给pointNum number=number+pointNum;//将总点数赋值给number int[] base = {2,5};//生成随机点所用 Pi test = new Pi(base);//生成随机点所用 for(int x = 0; x < number; x++){ //循环生成随机点 double[] t = test.getNext();//随机生成点,并将坐标存入数组 System.out.println(t[0] + "\t" + t[1]);//控制台输出随机点的坐标 IntWritable result = new IntWritable(0); //定义输出值 if((t[0]*t[0]+t[1]*t[1])<=1)//判断生成的点是否在扇形面积内 { result = new IntWritable(1);//如果在,将输出值赋值为1 } value.set(String.valueOf(number));//定义输出键,输出键为当前生成点的总数 context.write(value, result);//写入 } } } public static class PiReducer extends Reducer<Text,IntWritable,Text,DoubleWritable> { private DoubleWritable result = new DoubleWritable();//声明输出值 public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { double pointNum =Double.parseDouble(key.toString());//获取输入的键 double sum = 0;//定义总数 for (IntWritable val : values) {//循环从values里取值,累加和赋值给sum sum += val.get(); } result.set(sum/pointNum*4);//将计算得到的π值赋值给result context.write(key, result);//将键值,即生成点总数,和result,即计算得到的π值作为一个键值对写入context } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf,"calculate pi"); job.setJarByClass(CalPI.class); job.setMapperClass(PiMapper.class); job.setReducerClass(PiReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); Path in = new Path("hdfs://192.168.68.130:9000/user/hadoop/nai.txt"); //读入文件地址 Path out = new Path("hdfs://192.168.68.130:9000/user/hadoop/output4"); //输出文件地址,output4不能存在 FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); System.exit(job.waitForCompletion(true) ? 0 : 1); } }