MapReduce--平均分，最高，低分以及及格率的计算 - meiLinYa

计算班级的平均分，以及个人的最高最低分，以及每个班级的及格率。

来先看一下我的数据。

时间			班级		姓名		科目			成绩
20180501 		1708a1 		li 		bishi 			80
20180501 		1708a1 		li 		jishi 			55
20180501 		1708a1 		li 		project 		90
20180501 		1708a1 		li2		bishi 			80
20180501 		1708a1 		li2		jishi 			20
20180501 		1708a1 		li2		project 		90
20180501 		1708a1 		li3		bishi 			50
20180501 		1708a1 		li3		jishi 			70
20180501 		1708a1 		li3		project 		60
20180501 		1708a1 		zhangsan 	bishi 			88
20180501 		1708a1 		zhangsan 	jishi 			55
20180501 		1708a1 		zhangsan 	project 		98
20180501 		1708a1 		lishi 		bishi 			18
20180501 		1708a1 		lishi 		jishi 			15
20180501 		1708a1 		lishi 		project 		15
20180501 		1708a1 		wangwu		bishi 			88
20180501 		1708a1 		wangwu		jishi 			76
20180501 		1708a1 		wangwu		project 		70
20180501 		1708a2 		li1 		bishi 			80
20180501 		1708a2 		li1 		jishi 			71
20180501 		1708a2 		li1 		project 		96
20180501 		1708a2 		li2 		bishi 			80
20180501 		1708a2 		li2 		jishi 			26
20180501 		1708a2 		li2 		project 		90
20180501 		1708a2 		li3 		bishi 			80
20180501 		1708a2 		li3 		jishi 			55
20180501 		1708a2 		li3 		project 		90
20180501 		1708a2 		zhangliang 	bishi 			81
20180501 		1708a2 		zhangliang 	jishi 			55
20180501 		1708a2 		zhangliang 	project 		98
20180501 		1708a2 		liuli 		bishi	 		70
20180501 		1708a2 		liuli 		jishi 			95
20180501 		1708a2 		liuli 		project 		75
20180501 		1708a2 		wangwu 		bishi 			80
20180501 		1708a2 		wangwu 		jishi 			76
20180501 		1708a2 		wangwu 		project 		70
20180501 		1708a2 		zhangxi 	bishi 			18
20180501 		1708a2 		zhangxi 	jishi 			16
20180501 		1708a2 		zhangxi 	project 		10

数据之间是空格。。。。

代码来了 -- 平均分，最高分，最低分

package com.huhu.day01;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 切割文本: 平均分，最高低分
 * 
 * @author huhu_k
 *
 */
public class HomeWork2 {

	// map
	public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
		Text keys = new Text();
		Text values = new Text();

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			// 数据切割方式(文本中的内容)
			// 按行分
			String[] line = value.toString().split(" ");
			keys.set(line[0] + ":" + line[2]);
			values.set(line[3] + ":" + line[4]);
			context.write(keys, values);
		}
	}

	// reduce
	public static class MyReducer extends Reducer<Text, Text, Text, Text> {

		@Override
		protected void reduce(Text key, Iterable<Text> value, Context context)
				throws IOException, InterruptedException {
			int max = Integer.MIN_VALUE;
			int min = Integer.MAX_VALUE;
			// 和
			int sum = 0;
			// 人数
			int count = 0;
			// 分数
			int score = 0;
			String classs = "";
			for (Text t : value) {
				classs = t.toString().split(":")[0];
				score = Integer.parseInt(t.toString().split(":")[1]);
				if (max < score)
					max = score;
				if (min > score)
					min = score;
				switch (classs) {
				case "bishi":
					score += score * 0.4;
					break;
				case "jishi":
					score += score * 0.3;
					break;
				case "project":
					score += score * 0.3;
					break;
				}
				sum += score;
				count++;
			}
			int avg = (int) sum / count;
			String[] student = key.toString().split(":");
			Text ky = new Text(student[0] + "\t" + student[1]);
			context.write(ky, new Text("平均分   " + avg));
			context.write(ky, new Text("最高值为   " + max));
			context.write(ky, new Text("最低值  " + min));
		}

	}

	public static void main(String[] args) throws Exception {

		// 配置容器
		Configuration conf = new Configuration();
		// 创建一个job
		@SuppressWarnings("deprecation")
		Job job = new Job(conf, "MyMapReduce Two");
		// 配置job
		job.setJarByClass(HomeWork2.class);
		job.setMapperClass(MyMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		// 输入输出
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		// 执行程序
		boolean waitForCompletion = job.waitForCompletion(true);
		System.exit(waitForCompletion ? 0 : 1);

	}

}

运行结果：

2.及格率

package com.huhu.day01;

import java.io.IOException;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 切割文本:及格率
 * 
 * @author huhu_k
 *
 */
public class HomeWork3 {

	// map
	public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
		Text keys = new Text();
		Text values = new Text();

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			// 数据切割方式(文本中的内容)
			// 按行分
			String[] line = value.toString().split(" ");
			keys.set(line[0] + ":" + line[1]);
			context.write(keys, value);
		}
	}

	// reduce
	public static class MyReducer extends Reducer<Text, Text, Text, Text> {
		Map<String, Double> map = new HashMap<>();
		Map<String, String> maps = new HashMap<>();

		@Override
		protected void reduce(Text key, Iterable<Text> value, Context context)
				throws IOException, InterruptedException {
			for (Text t : value) {
				String[] values = t.toString().split(" ");
				String student = values[2] + ":" + values[0] + ":" + values[1];
				String subject = values[3];
				double score = Integer.valueOf(values[4]);
				if ("bishi".equals(subject)) {
					score *= 0.4;
				} else {
					score *= 0.3;
				}
				// 如果map中有学生，累加学生的没门课程的分数
				if (map.containsKey(student)) {
					double scores = map.get(student);
					scores += score;
					map.put(student, scores);
				} else {
					// 第一次进入时不包含，则直接添加
					map.put(student, score);
				}
			}

			for (Map.Entry<String, Double> m : map.entrySet()) {
				String classname = m.getKey().split(":")[2];
				Double score = m.getValue();
				if (maps.containsKey(classname) && score >= 60) {
					String k = Integer.parseInt(maps.get(classname).split(":")[0]) + 1 + "";
					String v = Integer.parseInt(maps.get(classname).split(":")[1]) + 1 + "";
					maps.put(classname, k + ":" + v);
				} else if (maps.containsKey(classname) && score < 60) {
					String k = Integer.parseInt(maps.get(classname).split(":")[0]) + 1 + "";
					String v = Integer.parseInt(maps.get(classname).split(":")[1]) + "";
					maps.put(classname, k + ":" + v);
				} else if (!maps.containsKey(classname) && score < 60) {
					maps.put(classname, "1:0");
				} else if (!maps.containsKey(classname) && score >= 60) {
					maps.put(classname, "1:1");
				}
			}

		}

		@Override
		protected void cleanup(Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			for (Map.Entry<String, String> m : maps.entrySet()) {
				DecimalFormat d = new DecimalFormat("0.00%");
				double pass = Double.valueOf(m.getValue().split(":")[1]) / Double.valueOf(m.getValue().split(":")[0]);
				context.write(new Text(m.getKey()), new Text("及格率为:" + d.format(pass)));
			}
		}
	}

	public static void main(String[] args) throws Exception {

		// 配置容器
		Configuration conf = new Configuration();
		// 创建一个job
		@SuppressWarnings("deprecation")
		Job job = new Job(conf, "MyMapReduce Count");
		// 配置job
		job.setJarByClass(HomeWork3.class);
		job.setMapperClass(MyMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		// 输入输出
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		// 执行程序
		boolean waitForCompletion = job.waitForCompletion(true);
		System.exit(waitForCompletion ? 0 : 1);

	}

}

MapReduce一个分布式并行离线计算框架。我们只需要知道map()，reduce(),input,output，剩下的由框架完成

基于yarn的工作流程

posted on 2018-06-26 14:23 meiLinYa 阅读(746) 评论(0) 编辑收藏举报

刷新页面返回顶部