0013.HBase进阶

Posted on 2020-10-23 21:53  锦喵卫指挥使  阅读(52)  评论(0编辑  收藏  举报


06-07-使用Java操作HBase


06-08-搭建HBase的全分布和HA

搭建HBase的全分布
	<property>
		 <name>hbase.rootdir</name>
		 <value>hdfs://192.168.16.141:9000/hbase</value>
	</property>		

	<property>
		 <name>hbase.cluster.distributed</name>
		 <value>true</value>
	</property>	

	<property>
		 <name>hbase.zookeeper.quorum</name>
		 <value>192.168.16.141</value>
	</property>		

	<property>
		 <name>dfs.replication</name>
		 <value>2</value>
	</property>

	<property>
		 <name>hbase.master.maxclockskew</name>
		 <value>180000</value>
	</property>
在不同的模式下HBase在ZK中保存的数据.png


06-09-HBase数据保存的过程和Region的分裂

数据保存的过程.png


06-10-HBase的过滤器


06-11-HBase上的MapReduce


import java.io.IOException;

import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

//这时候处理的就是HBase表中的一条数据                                                          k2      v2
// <k1  v1>代表输入,现在输入是:表中的一条记录
public class WordCountMapper extends TableMapper<Text, IntWritable> {

	@Override
	protected void map(ImmutableBytesWritable key, Result value,Context context)
			throws IOException, InterruptedException {
		// 获取数据: I love Beijing
		String data = Bytes.toString(value.getValue(Bytes.toBytes("content"), Bytes.toBytes("info")));
		
		//分词
		String[] words = data.split(" ");
		
		for(String w:words){
			context.write(new Text(w), new IntWritable(1));
		}
	}
}

import java.io.IOException;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

//                                                  k3      v3      keyout代表输出的一条记录:指定行键
public class WordCountReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {

	@Override
	protected void reduce(Text k3, Iterable<IntWritable> v3,Context context)
			throws IOException, InterruptedException {
		//对v3求和
		int total = 0;
		for(IntWritable v:v3){
			total = total + v.get();
		}
		
		//输出:也是表中的一条记录
		//构造一个Put对象,把单词作为rowkey
		Put put = new Put(Bytes.toBytes(k3.toString()));
		put.add(Bytes.toBytes("content"),  //列族
				Bytes.toBytes("result"),   // 列
				Bytes.toBytes(String.valueOf(total)));  //值
		
		//输出
		context.write(new ImmutableBytesWritable(Bytes.toBytes(k3.toString())), 
				     put);//得到结果
	}

}



import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;

public class WordCountMain {

	public static void main(String[] args) throws Exception {
		//指定Zookeeper地址
		//指定的配置信息: ZooKeeper
		Configuration conf = new Configuration();
		conf.set("hbase.zookeeper.quorum", "192.168.157.111");
		
		//创建一个任务
		Job job = Job.getInstance(conf);
		job.setJarByClass(WordCountMain.class);
		
		//定义一个扫描器只读取 content:info这个列的数据
		Scan scan = new Scan();
		scan.addColumn(Bytes.toBytes("content"), Bytes.toBytes("info"));
		
		//指定mapper
		TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("word"),      //输入的表
				                              scan,    //定义一个扫描器,过滤我们想要处理的数据
				                              WordCountMapper.class, 
				                              Text.class, 
				                              IntWritable.class, 
				                              job);
		
		//指定reducer
		TableMapReduceUtil.initTableReducerJob("stat", WordCountReducer.class, job);
		
		//执行任务
		job.waitForCompletion(true);
	}
}

Copyright © 2024 锦喵卫指挥使
Powered by .NET 8.0 on Kubernetes