Hadoop的MapReduce的WordCount实验——Java代码

 WordCountMapper.java

package MapReduce;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class WordCountMapper extends Mapper<LongWritable, Text, Text ,IntWritable>{
	@Override
	protected void map(LongWritable key1, Text value1, Context context) 
			throws IOException, InterruptedException{
			/**
			 * context 表示Mapper的上下文
			 * 上文: HDFS
			 * 下文: Mapper
			 */
			//数据 I Love Beijing
			String data = value1.toString();
			//分词
			String[] words = data.split(" ");
			//输出k2 v2
			for(String w:words) {
				context.write(new Text(w), new IntWritable(1));
			}
	}
}

WordCountReducer.java 

package MapReduce;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;



public class WordCountReducer extends Reducer<Text, IntWritable, Text ,IntWritable>{
	@Override
	protected void reduce(Text k3, Iterable<IntWritable> v3, Context context)throws IOException, InterruptedException{
		/**
		 * context是reduce的上下文
		 * 上文
		 * 下文
		 */
		int total = 0;
		for(IntWritable v:v3) {
			total += v.get();
		}
		//输出:   k4 单词   v4  频率
		context.write(k3, new IntWritable(total));
	}
	
}

WordCountMain.java

package MapReduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import junit.framework.Test;

public class WordCountMain {
	public static void main(String[] args)throws Exception{
		//创建一个job和任务入口
		Job job = Job.getInstance(new Configuration());
		job.setJarByClass(WordCountMain.class); //main方法所在的class
		//指定job的mapper和输出的类型<k2 v2>
		job.setMapperClass(WordCountMapper.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		//制定job的reducer和输出类型<k4 v4>
		job.setReducerClass(WordCountReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		//指定job的输入和输出
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		//执行job
		job.waitForCompletion(true);
	}
}

 

posted @ 2020-05-17 02:48  金鳞踏雨  阅读(7)  评论(0编辑  收藏  举报  来源