多个map对应一个reduce记录
/* 说明,对于数据来源的格式不一致,获取源头一处问题,我们采取多端的输入,一个reduce的处理*/
按单词统计来说
第一步 :App
/**
*把多个map封装到job中,启动job
*/
public class WCApp {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
Job job = Job.getInstance(conf);
//设置job的各种属性
job.setJobName("WCAppMulti"); //作业名称
job.setJarByClass(WCApp.class); //搜索类
//多个输入
MultipleInputs.addInputPath(job,new Path("file:///d:/mr/txt"),TextInputFormat.class, WCTextMapper.class);
MultipleInputs.addInputPath(job,new Path("file:///d:/mr/seq"), SequenceFileInputFormat.class,WCSeqMapper.class);
//设置输出
FileOutputFormat.setOutputPath(job,new Path(args[0]));
job.setReducerClass(WCReducer.class); //reducer类
job.setNumReduceTasks(3); //reduce个数
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class); //
job.waitForCompletion(true);
}
}
第二步:map的处理
/**
*文本类型的文件的读取
* WCTextMapper
*/
public class WCSeqMapper extends Mapper<IntWritable, Text, Text, IntWritable>{
protected void map(IntWritable key, Text value, Context context) throws IOException, InterruptedException {
Text keyOut = new Text();
IntWritable valueOut = new IntWritable();
String[] arr = value.toString().split(" ");
for(String s : arr){
keyOut.set(s);
valueOut.set(1);
context.write(keyOut,valueOut);
}
}
}
/**
* hadoop的压缩文件读取
*SeqMapper
*/
public class WCSeqMapper extends Mapper<IntWritable, Text, Text, IntWritable>{
protected void map(IntWritable key, Text value, Context context) throws IOException, InterruptedException {
Text keyOut = new Text();
IntWritable valueOut = new IntWritable();
String[] arr = value.toString().split(" ");
for(String s : arr){
keyOut.set(s);
valueOut.set(1);
context.write(keyOut,valueOut);
}
}
}
第三步:对读取的数据集进行聚集
/**
* Reducer
*/
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
/**
* reduce
*/
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0 ;
for(IntWritable iw : values){
count = count + iw.get() ;
}
String tno = Thread.currentThread().getName();
System.out.println(tno + " : MaxTempReducer :" + key.toString() + "=" + count);
context.write(key,new IntWritable(count));
}
}