MapReduce Unit Test
以前用java写MR程序总不习惯写单元测试,就是查错也只是在小规模数据上跑一下程序。昨天工作时,遇到一个bug,查了好久也查出来。估计是业务逻辑上的错误。后来没办法,只好写了个单元测试,一步步跟踪,瞬间找到问题所在。所以说,工作中还是要勤快些。
1 import static org.junit.Assert.assertEquals; 2 import java.io.IOException; 3 import java.util.ArrayList; 4 import java.util.List; 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.io.LongWritable; 7 import org.apache.hadoop.io.Text; 8 import org.apache.hadoop.mrunit.mapreduce.MapDriver; 9 import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; 10 import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; 11 import org.apache.hadoop.mrunit.types.Pair; 12 import org.junit.Before; 13 import org.junit.Test; 14 import com.wanda.predict.GenerateCustomerNatureFeature.NatureFeatureMappper; 15 import com.wanda.predict.GenerateCustomerNatureFeature.NatureReducer; 16 import com.wanda.predict.pojo.Settings; 17 18 /** 19 * MapReduce 单元测试的模板 , 依赖于junit环境(junit.jar), mrunit.jar , mockito.jar 20 * 21 */ 22 public class MapperReducerUnitTest { 23 // 一些设置,与正常的mr程序一样,不过这里主要是加载一些信息。性能优化之类的就不要在单元测试里设置了。 24 Configuration conf = new Configuration(); 25 //Map.class 的测试驱动类 26 MapDriver<LongWritable, Text, Text, Text> mapDriver; 27 //Reduce.class 的测试驱动类 28 ReduceDriver<Text, Text, Text, Text> reduceDriver; 29 //Map.calss 、 Reduce.class转接到一起的流程测试驱动 30 MapReduceDriver<LongWritable, Text, Text, Text, Text, Text> mapReduceDriver; 31 32 @Before 33 public void setUp() { 34 35 //测试mapreduce 36 NatureFeatureMappper mapper = new NatureFeatureMappper(); 37 NatureReducer reducer = new NatureReducer(); 38 //添加要测试的map类 39 mapDriver = MapDriver.newMapDriver(mapper); 40 //添加要测试的reduce类 41 reduceDriver = ReduceDriver.newReduceDriver(reducer); 42 //添加map类和reduce类 43 mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer); 44 45 //测试配置参数 46 conf.setInt(Settings.TestDataSize.getName(), 1); 47 conf.setInt(Settings.TrainDataSize.getName(), 6); 48 //driver之间是独立的,谁用到谁就设置conf 49 reduceDriver.setConfiguration(conf); 50 mapReduceDriver.setConfiguration(conf); 51 } 52 53 @Test 54 public void testMapper() throws IOException { 55 mapDriver.withInput(new LongWritable(), new Text("map的输入")); 56 mapDriver.withOutput(new Text("期望的key"), new Text("期望的value")); 57 58 //打印实际结果 59 List<Pair<Text , Text>> result = mapDriver.run(); 60 for(Pair<Text , Text> kv : result){ 61 System.out.println("mapper : " + kv.getFirst()); 62 System.out.println("mapper : " + kv.getSecond()); 63 } 64 //进行case测试,对比输入输出结果 65 mapDriver.runTest(); 66 } 67 68 @Test 69 public void testReducer() throws IOException { 70 List<Text> values = new ArrayList<Text>(); 71 values.add(new Text("输入")); 72 reduceDriver.withInput(new Text("输入"), values); 73 reduceDriver.withOutput(new Text("期望的输出"), new Text("期望的输出")); 74 reduceDriver.runTest(); 75 } 76 77 @Test 78 public void testMapperReducer() throws IOException { 79 mapReduceDriver.withInput(new LongWritable(), new Text("输入")); 80 mapReduceDriver.withOutput(new Text("期望的输出"), new Text("期望的输出")); 81 //打印实际结果 82 List<Pair<Text, Text>> list = mapReduceDriver.run(); 83 System.out.println("mapreducedriver size:" + list.size()); 84 for(Pair<Text , Text> lst : list){ 85 System.out.println(lst.getFirst()); 86 System.out.println(lst.getSecond()); 87 } 88 //进行case测试,对比输入输出结果 89 mapReduceDriver.runTest(); 90 } 91 92 @Test 93 public void testMapperCount() throws IOException { 94 mapDriver.withInput(new LongWritable(), new Text("输入")); 95 mapDriver.withOutput(new Text("期望的输出"), new Text("期望的输出")); 96 mapDriver.runTest(); 97 //判断 map中的counter值是否与期望的相同 98 assertEquals("Expected 1 counter increment", 1, mapDriver.getCounters().findCounter("data", "suc").getValue()); 99 } 100 }