MapReduce八股文范式模板(WordCount为例)
MapReduce-八股文范式模板
mapreduce八股文
八股文也称制义、制艺、时文、八比文。而所谓的股,有对偶的意思。八股文有一套相对固定的写作格式,其题目取自四书五经,以四书命题占多数。
在这里套用八股文的概念,mapreduce同样有一种通用的模板框架,通过这个框架我们可以增添自己需要的业务代码来实现现实业务的需求,本文以WordCount为例。
话不多说,直接上代码。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.StringTokenizer;
/**
* MapReduce八股文模板
* WordCount为例
*
* @author Atrox
* @data 2019/08/27
*/
public class MapReduceModel {
/**
* 业务map类 继承Mapper类,重写map方法
*/
public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey = new Text();
private final static IntWritable mapOutputValue = new IntWritable(1);
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer stringTokenizer = new StringTokenizer(value.toString());
while (stringTokenizer.hasMoreTokens()) {
String s = stringTokenizer.nextToken();
mapOutputKey.set(s);
context.write(mapOutputKey, mapOutputValue);
}
}
}
/**
* 业务reduce类,继承Reduce类,重写reduce方法
*/
public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private static IntWritable outputValue = new IntWritable();
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
outputValue.set(sum);
context.write(key, outputValue);
}
}
/**
* mapreduce驱动
*/
public int run(String[] args) throws Exception {
//获取Configuration对象
Configuration configuration = new Configuration();
configuration.set("mapreduce.frameword.name", "local");
configuration.set("fs.defaultFS", "file:///");
//创建Job
Job job = Job.getInstance(configuration, this.getClass().getSimpleName());
//设置jar class
job.setJarByClass(this.getClass());
//设置输入文件路径
Path inPath = new Path(args[0]);
FileInputFormat.addInputPath(job, inPath);
//设置map
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置reduce
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置输出文件路径
Path outPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outPath);
//提交Job
job.submit();
Boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
args = new String[]{"/input/word.txt", "/output/result/"};
int status = new MapReduceModel().run(args);
System.out.println(status);
}
}