MapReduce读写HBase架构图及其示例代码

MapReduce读写HBase架构图及其示例代码

1、MapReduce读写HBase架构图

2、MapReduce读HBase代码示例

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

// 读取stu表,统计班级人数
public class Demo05MRReadHBase {
    // Map端
    public static class MRReadHBase extends TableMapper<Text, IntWritable> {
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            //将rowkey获取出来,并转换为字节数组
            String rowkey = Bytes.toString(key.get());
            //将班级获取出来
            String clazz = Bytes.toString(value.getValue("info".getBytes(), "clazz".getBytes()));
            // 以班级作为KeyOut,1作为ValueOut发送出去
            context.write(new Text(clazz), new IntWritable(1));
        }
    }

    // Reduce端
    public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int cnt = 0;
            //遍历values
            for (IntWritable value : values) {  //values.for
                cnt += value.get();
            }
            context.write(key, new IntWritable(cnt));
        }
    }

    // Driver端
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        conf.set("fs.defaultFS", "hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJobName("Demo05MRReadHBase");
        job.setJarByClass(Demo05MRReadHBase.class);

        // 配置Map任务
        TableMapReduceUtil.initTableMapperJob(
                "stu",
                new Scan(),
                MRReadHBase.class,
                Text.class,
                IntWritable.class,
                job
        );

        // 配置Reduce任务
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 配置输入输出路径
        Path path = new Path("/MR/HBase/output/");
        FileSystem fs = FileSystem.get(conf);

        if (fs.exists(path)) {
            fs.delete(path, true);
        }

        FileOutputFormat.setOutputPath(job, path);

        job.waitForCompletion(true);
/**
 * 1、因为Hadoop内没有运行HBase的依赖,需要我们手动配置Hadoop运行时的依赖环境
 * 2、在Linux中配置一个临时的依赖即可
 * 				export HADOOP_CLASSPATH="$HBASE_HOME/lib/*"
 * 3、提交代码任务,打包成jar包,上传到Linux
 * 4、进入jar包所在路径,执行命令,提交任务:
 *      hadoop jar HBase-1.0.jar com.shujia.Demo05MRReadHBase
 * 5、执行完之后,可以查看一下
 *		hdfs dfs -cat /MR/HBase/output/*
 *	  结果:
 			文科一班		72
 			文科三班		94
 			...
 *		
 */
    }
}
手动配置Hadoop运行时的依赖环境步骤:

# 第一步 在 Linux 中切到 HBase 的目录下 的 lib 目录下
# lib 目录下存放的都是 jar 包(依赖)

cd /usr/local/soft/hbase-1.4.6/lib/

# 第二步 通过 HADOOP_CLASSPATH 指定 Hadoop 运行时的依赖环境

export HADOOP_CLASSPATH="$HBASE_HOME/lib/*"
# 注 这样的配置是临时的,只在当前会话生效

3、MapReduce写HBase代码示例

package com.shujia;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

// 读取stu表,统计性别人数,并将结果写回HBase的表 stu_gender_cnt
public class Demo06MRReadAndWriteHBase {
    // Map
    public static class MRReadHBase extends TableMapper<Text, IntWritable> {
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String rowkey = Bytes.toString(key.get());

            String gender = Bytes.toString(value.getValue("info".getBytes(), "gender".getBytes()));

            // 以班级作为KeyOut,1 作为ValueOut
            context.write(new Text(gender), new IntWritable(1));
        }
    }

    // Reduce
    // create 'stu_gender_cnt','info'
    public static class MRWriteHBase extends TableReducer<Text, IntWritable, NullWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, NullWritable, Mutation>.Context context) throws IOException, InterruptedException {
            int cnt = 0;
            for (IntWritable value : values) {
                cnt += value.get();
            }

            Put put = new Put(key.getBytes());
            put.addColumn("info".getBytes(), "cnt".getBytes(), (cnt + "").getBytes());

            context.write(NullWritable.get(), put);
        }
    }

    // Driver

    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");
        conf.set("fs.defaultFS", "hdfs://master:9000");

        Job job = Job.getInstance(conf);

        job.setJobName("Demo06MRReadAndWriteHBase");
        job.setJarByClass(Demo06MRReadAndWriteHBase.class);

        // 配置Map任务
        TableMapReduceUtil.initTableMapperJob(
                "stu",
                new Scan(),
                MRReadHBase.class,
                Text.class,
                IntWritable.class,
                job
        );


        // 配置Reduce任务
        TableMapReduceUtil.initTableReducerJob(
                "stu_gender_cnt",
                MRWriteHBase.class,
                job
        );
        job.waitForCompletion(true);
        
        /**
         * 先创建stu_gender_cnt表
         * create 'stu_gender_cnt','info'
         * 使用Maven插件将依赖打入Jar包中
         * hadoop jar HBase-1.0-jar-with-dependencies.jar com.shujia.Demo06MRReadAndWriteHBase
         */
    }
}
# 使用Maven插件将依赖打入Jar包中
# 在项目的 pom.xml 文件中添加 Maven 插件的依赖 然后重新导一下依赖

    <!--     将依赖打入Jar包-->
    <build>
        <plugins>

            <!-- Java Compiler -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>


            <!-- 带依赖jar 插件-->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

        </plugins>
    </build>

# 之后打 jar 包的时候就会将所需依赖一并打入
posted @   阿伟宝座  阅读(92)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
点击右上角即可分享
微信分享提示