MapReduce-读取HBase

MapReduce读取HBase数据

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package com.hbase.mapreduce;
 
import java.io.IOException;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
 
/**
* @author:FengZhen
* @create:2018年9月17日
* MapReduce读取HBase中数据
*/
public class AnalyzeData extends Configured implements Tool{
     
    private static String addr="HDP233,HDP232,HDP231";
    private static String port="2181";
     
    public enum Counters { ROWS, COLS, VALID, ERROR }
     
    static class AnalyzeMapper extends TableMapper<Text, IntWritable>{
        private IntWritable ONE = new IntWritable(1);
        @Override
        protected void map(ImmutableBytesWritable key, Result value,
                Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            context.getCounter(Counters.ROWS).increment(1);
            try {
                for (Cell cell : value.listCells()) {
                    context.getCounter(Counters.COLS).increment(1);
                    String hbaseValue = Bytes.toString(CellUtil.cloneValue(cell));
                    context.write(new Text(hbaseValue), ONE);
                    context.getCounter(Counters.VALID).increment(1);
                }
            } catch (Exception e) {
                e.printStackTrace();
                context.getCounter(Counters.ERROR).increment(1);
            }
        }
    }
     
    static class AnalyzeReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int count = 0;
            for (IntWritable intWritable : values) {
                count = count + intWritable.get();
            }
            context.write(key, new IntWritable(count));
        }
    }
     
    public int run(String[] arg0) throws Exception {
        String table = arg0[0];
        String column = arg0[1];
        String outPath = arg0[2];
         
        Scan scan = new Scan();
        if (null != column) {
            byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));
            if (colkey.length > 1) {
                scan.addColumn(colkey[0], colkey[1]);
            }else {
                scan.addFamily(colkey[0]);
            }
        }
         
        Configuration configuration = HBaseConfiguration.create();
        configuration.set("hbase.zookeeper.quorum",addr);
        configuration.set("hbase.zookeeper.property.clientPort", port);
        configuration.set(TableInputFormat.INPUT_TABLE, table);
         
        Job job = Job.getInstance(configuration);
        job.setJobName("AnalyzeData");
        job.setJarByClass(AnalyzeData.class);
         
        job.setMapperClass(AnalyzeMapper.class);
        job.setInputFormatClass(TableInputFormat.class);
        TableInputFormat.addColumns(scan, KeyValue.parseColumn(Bytes.toBytes(column)));
         
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
         
        //使用TableMapReduceUtil会报类找不到错误
        //Caused by: java.lang.ClassNotFoundException: com.yammer.metrics.core.MetricsRegistry
        //TableMapReduceUtil.initTableMapperJob(table, scan, AnalyzeMapper.class, Text.class, IntWritable.class, job);
         
        job.setReducerClass(AnalyzeReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
         
        job.setNumReduceTasks(1);
        FileOutputFormat.setOutputPath(job, new Path(outPath));
         
        return job.waitForCompletion(true) ? 0 : 1;
    }
     
    public static void main(String[] args) throws Exception {
        String[] params = new String[] {"test_table_mr","data:info","hdfs://fz/data/fz/output/mrReadHBase"};
        int exitCode = ToolRunner.run(new AnalyzeData(), params);
        System.exit(exitCode);
    }
}

 

posted on   嘣嘣嚓  阅读(579)  评论(0编辑  收藏  举报

编辑推荐:
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
阅读排行:
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· 记一次.NET内存居高不下排查解决与启示

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示