MapReduce-读取HBase
MapReduce读取HBase数据
代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | package com.hbase.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * @author:FengZhen * @create:2018年9月17日 * MapReduce读取HBase中数据 */ public class AnalyzeData extends Configured implements Tool{ private static String addr= "HDP233,HDP232,HDP231" ; private static String port= "2181" ; public enum Counters { ROWS, COLS, VALID, ERROR } static class AnalyzeMapper extends TableMapper<Text, IntWritable>{ private IntWritable ONE = new IntWritable(1); @Override protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException { context.getCounter(Counters.ROWS).increment(1); try { for (Cell cell : value.listCells()) { context.getCounter(Counters.COLS).increment(1); String hbaseValue = Bytes.toString(CellUtil.cloneValue(cell)); context.write( new Text(hbaseValue), ONE); context.getCounter(Counters.VALID).increment(1); } } catch (Exception e) { e.printStackTrace(); context.getCounter(Counters.ERROR).increment(1); } } } static class AnalyzeReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable intWritable : values) { count = count + intWritable. get (); } context.write(key, new IntWritable(count)); } } public int run(String[] arg0) throws Exception { String table = arg0[0]; String column = arg0[1]; String outPath = arg0[2]; Scan scan = new Scan(); if ( null != column) { byte [][] colkey = KeyValue.parseColumn(Bytes.toBytes(column)); if (colkey.length > 1) { scan.addColumn(colkey[0], colkey[1]); } else { scan.addFamily(colkey[0]); } } Configuration configuration = HBaseConfiguration.create(); configuration. set ( "hbase.zookeeper.quorum" ,addr); configuration. set ( "hbase.zookeeper.property.clientPort" , port); configuration. set (TableInputFormat.INPUT_TABLE, table); Job job = Job.getInstance(configuration); job.setJobName( "AnalyzeData" ); job.setJarByClass(AnalyzeData. class ); job.setMapperClass(AnalyzeMapper. class ); job.setInputFormatClass(TableInputFormat. class ); TableInputFormat.addColumns(scan, KeyValue.parseColumn(Bytes.toBytes(column))); job.setMapOutputKeyClass(Text. class ); job.setMapOutputValueClass(IntWritable. class ); //使用TableMapReduceUtil会报类找不到错误 //Caused by: java.lang.ClassNotFoundException: com.yammer.metrics.core.MetricsRegistry //TableMapReduceUtil.initTableMapperJob(table, scan, AnalyzeMapper.class, Text.class, IntWritable.class, job); job.setReducerClass(AnalyzeReducer. class ); job.setOutputKeyClass(Text. class ); job.setOutputValueClass(IntWritable. class ); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(outPath)); return job.waitForCompletion( true ) ? 0 : 1; } public static void main(String[] args) throws Exception { String[] params = new String[] { "test_table_mr" , "data:info" , "hdfs://fz/data/fz/output/mrReadHBase" }; int exitCode = ToolRunner.run( new AnalyzeData(), params ); System.exit(exitCode); } } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· 记一次.NET内存居高不下排查解决与启示