MR-join连接
package com.bw.mr; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class MapJoin { // 两个文件都在内存中 一个机器的内存中 // 通过集合放入一个机器的内存中 // 通过map任务放入内存中 // join连接共有的属性连接 public static class JMapper extends Mapper<LongWritable, Text, Text, NullWritable> { // 将user放入内存中 private Map<String, String> map = new HashMap<String, String>(); // 重新setup方法 运行在map前面 @Override protected void setup(Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { // 创建连接 FileSystem fs = FileSystem.get(context.getConfiguration()); // user01.txt hdfs 拉取信息 到内存map FSDataInputStream in = fs.open(new Path("hdfs://linux04:9000/user.txt")); // 读取 BufferedReader 读取器 BufferedReader bf = new BufferedReader(new InputStreamReader(in)); // 一行一行读取 String line = null; while ((line = bf.readLine()) != null) { String[] split = line.split(" "); map.put(split[0], split[1] + " " + split[2] + " " + split[3]); } bf.close(); in.close(); } @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { String[] strs = value.toString().split(" "); String orderId = strs[0]; String uid = strs[1]; String money = strs[2]; //join 的uid 关联】 String productInfo = map.get(uid); context.write(new Text(orderId+" "+uid+" "+productInfo), NullWritable.get()); } } public static void main(String[] args) throws Exception { Configuration conf =new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(MapJoin.class); job.setMapperClass(JMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }
作为一个真正的程序员,首先应该尊重编程,热爱你所写下的程序,他是你的伙伴,而不是工具。