Hadoop的MapReduce的WordCount实验——Java代码
WordCountMapper.java
package MapReduce;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class WordCountMapper extends Mapper<LongWritable, Text, Text ,IntWritable>{
@Override
protected void map(LongWritable key1, Text value1, Context context)
throws IOException, InterruptedException{
/**
* context 表示Mapper的上下文
* 上文: HDFS
* 下文: Mapper
*/
//数据 I Love Beijing
String data = value1.toString();
//分词
String[] words = data.split(" ");
//输出k2 v2
for(String w:words) {
context.write(new Text(w), new IntWritable(1));
}
}
}
WordCountReducer.java
package MapReduce;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<Text, IntWritable, Text ,IntWritable>{
@Override
protected void reduce(Text k3, Iterable<IntWritable> v3, Context context)throws IOException, InterruptedException{
/**
* context是reduce的上下文
* 上文
* 下文
*/
int total = 0;
for(IntWritable v:v3) {
total += v.get();
}
//输出: k4 单词 v4 频率
context.write(k3, new IntWritable(total));
}
}
WordCountMain.java
package MapReduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import junit.framework.Test;
public class WordCountMain {
public static void main(String[] args)throws Exception{
//创建一个job和任务入口
Job job = Job.getInstance(new Configuration());
job.setJarByClass(WordCountMain.class); //main方法所在的class
//指定job的mapper和输出的类型<k2 v2>
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//制定job的reducer和输出类型<k4 v4>
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//指定job的输入和输出
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//执行job
job.waitForCompletion(true);
}
}