1 实例描述
factoryname addressed
Beijing Red Star 1
Shenzhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Back of Beijing 1
addressID addressname
1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian
factoryname addressname
Back of Beijing Beijing
Beijing Red Star Beijing
Beijing Rising Beijing
Guangzhou Development Bank Guangzhou
Guangzhou Honda Guangzhou
Shenzhen Thunder Shenzhen
Tencent Shenzhen
2 设计思路
1 import; 2 import java.lang.String; 3 import java.util.Iterator; 4 import java.util.StringTokenizer; 5 6 import org.apache.hadoop.fs.Path; 7 import; 8 import org.apache.hadoop.mapreduce.Job; 9 import org.apache.hadoop.mapreduce.Mapper; 10 import org.apache.hadoop.mapreduce.Reducer; 11 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 13 14 public class MTJoin { 15 public static int time = 0; 16 17 public static class Map extends Mapper<Object, Text, Text, Text> { 18 19 @Override 20 protected void map(Object key, Text value, Context context) 21 throws IOException, InterruptedException { 22 String line = value.toString(); 23 String relationType = new String(); 24 if (line.contains("factoryname") == true 25 || line.contains("addressID") == true) { 26 return; 27 } 28 29 StringTokenizer itr = new StringTokenizer(line); 30 String mapkey = new String(); 31 String mapvalue = new String(); 32 33 String[] split = line.split(" "); 34 35 if (split.length == 2 && split[1].charAt(0) >= '0' 36 && split[1].charAt(0) <= '9') { 37 mapkey = split[1]; 38 mapvalue = split[0]; 39 relationType = "1"; 40 } 41 if (split.length == 2 && split[0].charAt(0) >= '0' 42 && split[0].charAt(0) <= '9') { 43 mapkey = split[0]; 44 mapvalue = split[1]; 45 relationType = "2"; 46 } 47 48 context.write(new Text(mapkey), new Text(relationType + "+" 49 + mapvalue)); 50 51 } 52 } 53 54 public static class Reduce extends Reducer<Text, Text, Text, Text> { 55 56 @Override 57 protected void reduce(Text key, Iterable<Text> values, Context context) 58 throws IOException, InterruptedException { 59 if (0 == time) { 60 context.write(new Text("factoryname"), new Text("addressname")); 61 time++; 62 } 63 64 int factorynum = 0; 65 String[] factory = new String[10]; 66 int addressnum = 0; 67 String[] address = new String[10]; 68 69 for(Text value:values ){ 70 if (0 == value.toString().length()) { 71 continue; 72 } 73 74 char relationType = value.toString().charAt(0); 75 76 // left 77 if ('1' == relationType) { 78 factory[factorynum] = value.toString().substring(2); 79 factorynum++; 80 } 81 // right 82 if ('2' == relationType) { 83 address[addressnum] = value.toString().substring(2); 84 addressnum++; 85 } 86 } 87 88 89 if (0 != factorynum && 0 != addressnum) { 90 for (int m = 0; m < factorynum; m++) { 91 for (int n = 0; n < addressnum; n++) { 92 context.write(new Text(factory[m]), 93 new Text(address[n])); 94 } 95 } 96 } 97 } 98 99 } 100 101 public static void main(String[] args) throws Exception { 102 Job job = new Job(); 103 job.setJobName("MTJoin"); 104 job.setJarByClass(MTJoin.class); 105 106 job.setMapperClass(Map.class); 107 job.setReducerClass(Reduce.class); 108 109 job.setOutputKeyClass(Text.class); 110 job.setOutputValueClass(Text.class); 111 112 FileInputFormat.addInputPath(job, new Path(args[0])); 113 FileOutputFormat.setOutputPath(job, new Path(args[1])); 114 115 System.exit(job.waitForCompletion(true) ? 0 : 1); 116 } 117 }
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步