每日总结

词频统计

WordCountMapper：

package com.atguigu.hdfs;

import java.io.IOException;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{

    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        //super.map(key, value, context);
        //String[] words = StringUtils.split(value.toString());
        String[] words = StringUtils.split(value.toString(), " ");
        for(String word:words)
        {
            context.write(new Text(word), new LongWritable(1));

        }
    }
}

WordCountReducer

package com.atguigu.hdfs;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {

    @Override
    protected void reduce(Text arg0, Iterable<LongWritable> arg1, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        // TODO Auto-generated method stub
        //super.reduce(arg0, arg1, arg2);
        int sum=0;
        for(LongWritable num:arg1)
        {
            sum += num.get();

        }
        context.write(arg0,new LongWritable(sum));


    }
}

WordCountRunner

package com.atguigu.hdfs;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class WordCountRunner {

    public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = new Job(conf);
        String dst = "hdfs://hadoop102:8020/mymapreduce2/in/input1.txt";
        //输出路径，必须是不存在的，空文件加也不行。
        String dstOut = "hdfs://hadoop102:8020/mymapreduce2/output101";
        job.setJarByClass(WordCountRunner.class);
        job.setJobName("wordcount");
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(dst));
        FileOutputFormat.setOutputPath(job, new Path(dstOut));
        job.waitForCompletion(true);
    }

}

posted @ 2021-09-20 12:37 chenghaixinag 阅读(34) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

chenghaixinag

每日总结

公告