map reduce两表联查操作 学生——课程 分数
package com.xzy.twotables;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
已知两张数据表,其中表一存储的是学生编号、学生姓名;表二存储的是学生编号、考试科目、考试成绩;编写mapreduce程序,汇总两张表数据为一张统一表格。
例如:
zhangsan math=80
zhangsan english=78
zhangsan computer=56
表一:
A001 zhangsan
A002 lisi
A003 wangwu
A004 zhaoliu
A005 diqi
表二:
A001 math 80
A002 math 76
A003 math 90
A004 math 67
A005 math 78
A001 english 78
A002 english 69
A003 english 88
A004 english 98
A005 english 56
A001 computer 56
A002 computer 77
A003 computer 84
A004 computer 92
A005 computer 55
*
*/
public class TwoTablesJob {
public static class TwoTablesMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
if(split.length == 2){
context.write(new Text(split[0]), new Text(split[1] + "_a"));//加标识
}else if(split.length == 3){
context.write(new Text(split[0]), new Text(split[1] + "=" + split[2]));
}
}
}
public static class TwoTablesReducer extends Reducer< Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
List<Text> names = new ArrayList<Text>();
List<Text> score = new ArrayList<Text>();
for (Text v : value) {
if(v.toString().endsWith("_a")){//判断是否有标识
String[] split = v.toString().split("_");
names.add(new Text(split[0]));
}else{
score.add(new Text(v.toString()));
}
}
if(names.size() > 0 && score.size() > 0){
for(int i = 0; i < names.size(); i++){
for(int j = 0; j < score.size(); j++){
context.write(names.get(i), score.get(j));
}
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(TwoTablesJob.class);
job.setMapperClass(TwoTablesMapper.class);
job.setReducerClass(TwoTablesReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, args[0]);
Path path = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(path))
fs.delete(path, true);
FileOutputFormat.setOutputPath(job, path);
job.waitForCompletion(true);
}
}