“站在巨人的肩上
Standing on Shoulders of Giants.”

Hadoop on Mac with IntelliJ IDEA - 8 单表关联NullPointerException

简化陆喜恒. Hadoop实战(第2版)5.4单表关联的代码时遇到空指向异常,经分析是逻辑问题,在此做个记录。

环境:Mac OS X 10.9.5, IntelliJ IDEA 13.1.5, Hadoop 1.2.1

改好的代码如下,在reduce阶段遇到了NullPointerException。

 1 public class STjoinEx {
 2     private static final String TIMES = "TIMES";
 3 
 4     public static void main(String[] args) throws Exception {
 5         Configuration configuration = new Configuration();
 6         configuration.setInt(TIMES, 1);
 7         String[] remainingArgs = new GenericOptionsParser(configuration, args).getRemainingArgs();
 8         if (remainingArgs.length != 2) {
 9             System.err.println("STjoinEx <input> <output>");
10             System.exit(2);
11         }
12 
13         Job job = new Job(configuration, STjoinEx.class.getSimpleName());
14         job.setJarByClass(STjoinEx.class);
15         job.setMapperClass(Map.class);
16         job.setReducerClass(Reduce.class);
17         job.setInputFormatClass(KeyValueTextInputFormat.class);
18         job.setOutputFormatClass(TextOutputFormat.class);
19         job.setOutputKeyClass(Text.class);
20         job.setOutputValueClass(Text.class);
21 
22         FileInputFormat.setInputPaths(job, new Path(remainingArgs[0]));
23         FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));
24 
25         System.exit(job.waitForCompletion(true) ? 0 : 1);
26 
27     }
28 
29     public static class Map extends Mapper<Text, Text, Text, Text> {
30         final static Text LEFT_TABLE = new Text();
31         final static Text RIGHT_TABLE = new Text();
32 
33         @Override
34         protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
35             // left table
36             LEFT_TABLE.set("1 " + value);
37             context.write(key, LEFT_TABLE);
38             // right table
39             RIGHT_TABLE.set("2 " + key);
40             context.write(value, RIGHT_TABLE);
41         }
42     }
43 
44     public static class Reduce extends Reducer<Text, Text, Text, Text> {
45         private static final int INDENT = 2;
46         private static final Text GRAND_PARENT = new Text();
47         private static final Text GRAND_CHILD = new Text();
48 
49         @Override
50         protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
51             // output header
52             int times = context.getConfiguration().getInt(TIMES, 1);
53             if (times == 1) {
54                 context.write(new Text("grandChild"), new Text("grandParent"));
55                 context.getConfiguration().setInt(TIMES, ++times);
56             }
57 
58             // prepare matrix
59             int headChar = 0;
60             String[] grandChild = new String[10];
61             String[] grandParent = new String[10];
62             int grandChildNum = 0;
63             int grandParentNum = 0;
64 
65             for (Text value : values) {
66                 headChar = value.charAt(0);
67                 if (headChar == '1') {
68                     grandParent[grandParentNum] = value.toString().substring(2);
69                     grandParentNum++;
70                 } else {
71                     grandChild[grandChildNum] = value.toString().substring(2);
72                     grandChildNum++;
73                 }
74             }
75 
76             // multiply
77             if (grandChildNum != 0 && grandChildNum != 0) {
78                 for (int i = 0; i < grandChildNum; i++) {
79                     GRAND_CHILD.set(grandChild[i]);
80                     for (int j = 0; j < grandParentNum; j++) {
81                         GRAND_PARENT.set(grandParent[j]);
82                         context.write(GRAND_CHILD, GRAND_PARENT);
83                     }
84                 }
85             }
86         }
87     }
88 }

执行输出为

 1 14/10/07 11:12:51 INFO mapred.JobClient:  map 0% reduce 0%
 2 14/10/07 11:12:54 INFO mapred.JobClient:  map 100% reduce 0%
 3 14/10/07 11:13:01 INFO mapred.JobClient:  map 100% reduce 33%
 4 14/10/07 11:13:04 INFO mapred.JobClient: Task Id : attempt_201410021756_0048_r_000000_0, Status : FAILED
 5 java.lang.NullPointerException
 6     at org.apache.hadoop.io.Text.encode(Text.java:388)
 7     at org.apache.hadoop.io.Text.set(Text.java:178)
 8     at main.ch5.STjoinEx$Reduce.reduce(STjoinEx.java:96)
 9     at main.ch5.STjoinEx$Reduce.reduce(STjoinEx.java:61)
10     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:177)
11     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:649)
12     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418)
13     at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
14     at java.security.AccessController.doPrivileged(Native Method)
15     at javax.security.auth.Subject.doAs(Subject.java:396)
16     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
17     at org.apache.hadoop.mapred.Child.main(Child.java:249)

从输出信息可发现,源码96行if (grandChildNum != 0 && grandChildNum != 0)为出错行。两个判断条件重复了,将其中一个改成grandParentNum即可。

执行结果

 1 grandChild    grandParent
 2 Jone    Alice
 3 Jone    Jesse
 4 Tom    Alice
 5 Tom    Jesse
 6 Tom    Mary
 7 Tom    Ben
 8 Jone    Mary
 9 Jone    Ben
10 Philip    Alice
11 Philip    Jesse
12 Mark    Alice
13 Mark    Jesse
posted @ 2014-10-07 13:05  米高 | Michael  阅读(403)  评论(0编辑  收藏  举报