目录
06-07-使用Java操作HBase
06-08-搭建HBase的全分布和HA
搭建HBase的全分布
<property>
<name>hbase.rootdir</name>
<value>hdfs://192.168.16.141:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>192.168.16.141</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>hbase.master.maxclockskew</name>
<value>180000</value>
</property>
在不同的模式下HBase在ZK中保存的数据.png
06-09-HBase数据保存的过程和Region的分裂
数据保存的过程.png
06-10-HBase的过滤器
06-11-HBase上的MapReduce
import java.io.IOException;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
//这时候处理的就是HBase表中的一条数据 k2 v2
// <k1 v1>代表输入,现在输入是:表中的一条记录
public class WordCountMapper extends TableMapper<Text, IntWritable> {
@Override
protected void map(ImmutableBytesWritable key, Result value,Context context)
throws IOException, InterruptedException {
// 获取数据: I love Beijing
String data = Bytes.toString(value.getValue(Bytes.toBytes("content"), Bytes.toBytes("info")));
//分词
String[] words = data.split(" ");
for(String w:words){
context.write(new Text(w), new IntWritable(1));
}
}
}
import java.io.IOException;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
// k3 v3 keyout代表输出的一条记录:指定行键
public class WordCountReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
@Override
protected void reduce(Text k3, Iterable<IntWritable> v3,Context context)
throws IOException, InterruptedException {
//对v3求和
int total = 0;
for(IntWritable v:v3){
total = total + v.get();
}
//输出:也是表中的一条记录
//构造一个Put对象,把单词作为rowkey
Put put = new Put(Bytes.toBytes(k3.toString()));
put.add(Bytes.toBytes("content"), //列族
Bytes.toBytes("result"), // 列
Bytes.toBytes(String.valueOf(total))); //值
//输出
context.write(new ImmutableBytesWritable(Bytes.toBytes(k3.toString())),
put);//得到结果
}
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
public class WordCountMain {
public static void main(String[] args) throws Exception {
//指定Zookeeper地址
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//创建一个任务
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountMain.class);
//定义一个扫描器只读取 content:info这个列的数据
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes("content"), Bytes.toBytes("info"));
//指定mapper
TableMapReduceUtil.initTableMapperJob(Bytes.toBytes("word"), //输入的表
scan, //定义一个扫描器,过滤我们想要处理的数据
WordCountMapper.class,
Text.class,
IntWritable.class,
job);
//指定reducer
TableMapReduceUtil.initTableReducerJob("stat", WordCountReducer.class, job);
//执行任务
job.waitForCompletion(true);
}
}