用MapReduce读HBase写MongoDB样例

1、版本信息:

Hadoop版本:2.7.1

HBase版本:1.2.1

MongDB版本:3.4.14

 

2、HBase表名及数据:

 

3、Maven依赖:

复制代码
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>2.7.1</version>
</dependency>
<dependency>
    <groupId>org.mongodb</groupId>
    <artifactId>mongo-java-driver</artifactId>
    <version>3.4.3</version>
</dependency>
<dependency>
    <groupId>org.mongodb.mongo-hadoop</groupId>
    <artifactId>mongo-hadoop-core</artifactId>
    <version>2.0.2</version>
</dependency>
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>1.1.1</version>
</dependency>
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-server</artifactId>
    <version>1.1.1</version>
</dependency>
<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-common</artifactId>
    <version>1.1.1</version>
</dependency>
复制代码

 

4、MapReduce程序:

复制代码
package mapreduce;

import com.mongodb.BasicDBObject;
import com.mongodb.hadoop.MongoOutputFormat;
import com.mongodb.hadoop.io.BSONWritable;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;

import java.io.IOException;
import java.util.UUID;

public class HBaseToMongo {
    public static void main(String[] args) throws Exception {
        Long st = System.currentTimeMillis();

        Configuration config = new Configuration();
        config.set("dfs.socket.timeout", "180000");
        config.set("hbase.zookeeper.property.clientPort", "2181");
        config.set("hbase.zookeeper.quorum", "10.11.2.4,10.11.2.5,10.11.2.6");

        // The format of the URI is:
        // mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]]
        String uri = "mongodb://10.11.2.15:27017,10.11.2.16:27017,10.11.2.17:27017/postal.qch_test";
        MongoConfigUtil.setOutputURI(config, uri);

        Job job = Job.getInstance(config);
        job.setJobName("HBaseToMongo");
        job.setJarByClass(FilterMapper.class);
        job.setOutputFormatClass(MongoOutputFormat.class);
        job.setNumReduceTasks(0);
        TableMapReduceUtil.initTableMapperJob("qch_t1", new Scan(),
                FilterMapper.class, ImmutableBytesWritable.class, BSONWritable.class, job);

        System.exit( job.waitForCompletion( true ) ? 0 : 1 );
        System.out.println("HBaseToMongo:" + (System.currentTimeMillis() - st));
    }

    static class FilterMapper extends TableMapper<Text, BSONWritable> {
        @Override
        protected void map(ImmutableBytesWritable key,
                           Result value, Context context) throws IOException, InterruptedException {
            String col = getStrByByte(value.getValue("if".getBytes(), "col1".getBytes()));
            BSONWritable bsonWritable = new BSONWritable();
            BasicDBObject doc = new BasicDBObject();
            doc.put("_id", UUID.randomUUID().toString());
            doc.put("col", col);
            bsonWritable.setDoc(doc);
            context.write(new Text(key.toString()), bsonWritable);
        }

        private String getStrByByte(byte[] by) {
            String str = "";
            if (by != null && by.length > 0) {
                str = Bytes.toString(by);
            }
            return str;
        }
    }
}
复制代码

 

5、运行结果:

 

6、程序源码:

https://github.com/quchunhui/tod-train-1.0/blob/master/hadoop/src/main/java/mapreduce/HBaseToMongo.java

posted @   大墨垂杨  阅读(511)  评论(0编辑  收藏  举报
编辑推荐:
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
点击右上角即可分享
微信分享提示