MapReduce的代码编写----统计学生性别示例

student.txt

1500100001,施笑槐,22,女,文科六班
1500100002,吕金鹏,24,男,文科六班
1500100003,单乐蕊,22,女,理科六班
1500100004,葛德曜,24,男,理科三班
1500100005,宣谷芹,22,女,理科五班
1500100006,边昂雄,21,男,理科二班
1500100007,尚孤风,23,女,文科六班
1500100008,符半双,22,女,理科六班
1500100009,沈德昌,21,男,理科一班
1500100010,羿彦昌,23,男,理科六班
...共1000行

程序代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

//统计学生性别人数
public class Demo2GenderCnt {
    //Map端
    public static class MyMapper extends Mapper<LongWritable, Text,Text, IntWritable>{
        //实现map方法，输入map回车自动生成
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //按照逗号切分数据(.var)
            String[] splits = value.toString().split(",");
            //将数据中的性别提取出来，第一个逗号左侧的数据为0，右侧为1，一直查到性别为3(.var)
            String gender = splits[3];
            //以性别作为key，1作为value，进行发送(发送的是<男，1>，<男，1>，..<女，1>，<女，1>，..)
            context.write(new Text(gender),new IntWritable(1));
        }
    }

    //Reduce端
    public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
        //实现reduce方法，输入reduce回车自动生成
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            //此时进入Reduce的是[男，{1，1，1..}]，[女，{1，1，1..}]，{1，1，1..}是迭代器，对应的是values
            //统计有多少个1
            //先定义一个变量
            int cnt = 0;
            //遍历迭代器，统计性别的人数，快捷键values.for
            for (IntWritable value : values) {
                cnt = cnt + value.get();
            }
            //key本身就是Text类型，不需要new
            context.write(key,new IntWritable(cnt));
        }
    }

    //Driver端
    //main方法
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();

        //MapReduce在运行的时候我们把它称为Job,创建一个Job实例
        Job job = Job.getInstance();
        // 对Job进行一些简单的配置，参数名字为类名
        job.setJobName("Demo2GenderCnt");
        //通过class类设置运行Job时该执行哪一个类
        job.setJarByClass(Demo2GenderCnt.class);

        //配置Map任务
        //配置Map任务该运行哪一个类
        job.setMapperClass(MyMapper.class);
        //对Map端输出的Key的类型进行配置
        job.setMapOutputKeyClass(Text.class);
        //对Map端输出的Value的类型进行配置
        job.setMapOutputValueClass(IntWritable.class);

        //配置Reduce任务
        //配置Reduce任务运行哪一个类
        job.setReducerClass(MyReducer.class);
        //对Reduce端输出的Key的类型进行配置
        job.setOutputKeyClass(Text.class);
        //对Reduce端输出的Value的类型进行配置
        job.setOutputValueClass(IntWritable.class);

        // 配置输入输出路径
        FileInputFormat.addInputPath(job,new Path("/student/input"));
        //输出路径不需要提前创建，如果该目录已存在则会报错，加个if语句判断
        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(new Path("/student/output"))){
            fs.delete(new Path("/student/output"),true);
        }
        FileOutputFormat.setOutputPath(job,new Path("/student/output"));

        // 等待job运行完成
        job.waitForCompletion(true);

        /**
         * 1、准备数据：
         * 创建HDFS目录
         * hdfs dfs -mkdir -p /student/input
         * 将student.txt上传至HDFS的/student/input目录下面
         * hdfs dfs -put student.txt /student/input
         *
         *2、提交MapReduce任务
         *hadoop jar Hadoop-1.0.jar com.shujia.MapReduce.Demo2Gender
         */
    }

}

写完代码以后需要进行一下操作

（1）在Xshell中迭代创建HDFS的目录（不迭代不需要加 -p）
	hdfs dfs -mkdir -p /student/input
（2）将student.txt上传到虚拟机（就是上传到Xftp，可以创建一个目录data，用来存放数据文件）
（3）在Xshell中将数据文件student.txt上传到HDFS的/student/input目录下面
	hdfs dfs -put student.txt /student/input
（4）在IDEA中将编写的代码打包，会生成一个jar包----Hadoop-1.0.jar
	将jar包上传到Xftp，可以创建一个目录jars，专门用来存放jar包
（5）在Xshell中运行这个jar包（运行前要切换到jar所在的目录）
	在IDEA中复制代码主类的路径：Copy Path-->Copy Reference
	hadoop jar Hadoop-1.0.jar com.shujia.MapReduce.Demo2GenderCnt
	
	可以通过master:8088查看运行进度
	
（6）在Xshell中查看运行结果
	hdfs dfs -cat /student/output/part-r-00000
或者 hadoop fs -cat /student/output/part-r-00000
（7）有必要的话，可以查看运行日志
	yarn logs -applicationId application_1644761997516_0003

运行结果

[root@master jars]# hdfs dfs -cat  /student/output/part-r-00000
女	493
男	507

posted @ 2022-02-16 11:15 阿伟宝座阅读(437) 评论(0) 收藏举报

刷新页面返回顶部

阿伟宝座

MapReduce的代码编写----统计学生性别示例

MapReduce的代码编写----统计学生性别示例

student.txt

程序代码

写完代码以后需要进行一下操作

运行结果

公告