使用mapreduce向HBase写入数据,只有map,没有reduce
1 /** 2 * 使用mapreduce程序向HBase写入数据 3 * 只有map,没有reduce 4 * 使用TableMapReduceUtil.initTableReducerJob("user",null, job); 5 TableMapReduceUtil.addDependencyJars(job); 6 */ 7 package com.beifeng.hbase; 8 9 import java.io.IOException; 10 11 import org.apache.hadoop.conf.Configuration; 12 import org.apache.hadoop.fs.Path; 13 import org.apache.hadoop.hbase.HBaseConfiguration; 14 import org.apache.hadoop.hbase.client.Put; 15 //注意这个类是hbase.mapreduce,而不是hbase.mapred,否则会报莫名莫名其妙的错误 16 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 17 import org.apache.hadoop.hbase.mapreduce.TableReducer; 18 import org.apache.hadoop.io.LongWritable; 19 import org.apache.hadoop.io.NullWritable; 20 import org.apache.hadoop.io.Text; 21 import org.apache.hadoop.mapreduce.Job; 22 import org.apache.hadoop.mapreduce.Mapper; 23 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 24 25 public class MapToHbase { 26 27 private static class Map extends Mapper<LongWritable, Text, NullWritable, Put>{ 28 String keyrow = null; 29 String columnFamily = null; 30 String name = null; 31 String age = null; 32 String gender = null; 33 String hobby = null; 34 @Override 35 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, NullWritable, Put>.Context context) 36 throws IOException, InterruptedException { 37 String[] strs = value.toString().split("\\t"); 38 if (strs.length == 6 && !strs[0].equals("rowkey")) {//skip first 1 row 39 keyrow = strs[0]; 40 columnFamily = strs[1]; 41 name = strs[2]; 42 age = strs[3]; 43 gender = strs[4]; 44 hobby = strs[5]; 45 Put put = new Put(keyrow.getBytes()); 46 put.addColumn(columnFamily.getBytes(), "name".getBytes(), name.getBytes()); 47 put.addColumn(columnFamily.getBytes(), "age".getBytes(), age.getBytes()); 48 put.addColumn(columnFamily.getBytes(), "gender".getBytes(), gender.getBytes()); 49 put.addColumn(columnFamily.getBytes(), "hobby".getBytes(), hobby.getBytes()); 50 51 context.write(NullWritable.get(), put); 52 } 53 } 54 55 } 56 57 /*public static class testReduce extends TableReducer<NullWritable, Put, NullWritable>{ 58 59 @Override 60 protected void reduce(NullWritable key, Iterable<Put> values, 61 Reducer<NullWritable, Put, NullWritable, Mutation>.Context context) 62 throws IOException, InterruptedException { 63 for(Put put : values){ 64 context.write(NullWritable.get(), put); 65 } 66 } 67 }*/ 68 69 70 public static void main(String[] args) throws IllegalArgumentException, IOException { 71 Configuration config = HBaseConfiguration.create(); 72 //设置zookeeper的配置 73 config.set("hbase.zookeeper.quorum", "hadoopMaster,hdp102,hdp103"); 74 Job job = Job.getInstance(config); 75 job.setJarByClass(MapToHbase.class); 76 job.setMapperClass(Map.class); 77 job.setMapOutputKeyClass(NullWritable.class); 78 //只有map没有reduce,所以设置reduce的数目为0 79 job.setNumReduceTasks(0); 80 //设置数据的输入路径,没有使用参数,直接在程序中写入HDFS的路径 81 FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.56.101:9000/test")); 82 //驱动函数 83 TableMapReduceUtil.initTableReducerJob("user",null, job); 84 TableMapReduceUtil.addDependencyJars(job); 85 boolean b; 86 try { 87 b = job.waitForCompletion(true); 88 } catch (ClassNotFoundException e) { 89 e.printStackTrace(); 90 } catch (InterruptedException e) { 91 e.printStackTrace(); 92 } 93 } 94 95 }
源测试数据为:
rowkey columnFamily name age gender hobby 2001 info jacky 12 male run 2002 info lilian 13 male run 2003 info buke 14 male run 2004 info sam 15 male pingpang 2005 info lucy 16 male pingpang 2006 info yaya 17 male pingpang 2007 info james 18 female slag 2008 info gugu 19 female slag 2009 info gaga 20 female read 2010 info salila 21 female cook 2011 info frank 22 female smile
HBase查询结果为:
2001 column=info:age, timestamp=1499800935968, value=12 2001 column=info:gender, timestamp=1499800935968, value=male 2001 column=info:hobby, timestamp=1499800935968, value=run 2001 column=info:name, timestamp=1499800935968, value=jacky 2002 column=info:age, timestamp=1499800935968, value=13 2002 column=info:gender, timestamp=1499800935968, value=male 2002 column=info:hobby, timestamp=1499800935968, value=run 2002 column=info:name, timestamp=1499800935968, value=lilian 2003 column=info:age, timestamp=1499800935968, value=14 2003 column=info:gender, timestamp=1499800935968, value=male 2003 column=info:hobby, timestamp=1499800935968, value=run 2003 column=info:name, timestamp=1499800935968, value=buke 2004 column=info:age, timestamp=1499800935968, value=15 2004 column=info:gender, timestamp=1499800935968, value=male 2004 column=info:hobby, timestamp=1499800935968, value=pingpang 2004 column=info:name, timestamp=1499800935968, value=sam 2005 column=info:age, timestamp=1499800935968, value=16 2005 column=info:gender, timestamp=1499800935968, value=male
转载请注明出处,谢谢。