1

 

    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://ns1/hbase</value>
    </property>
    <property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
    </property>
    <property>
        <name>hbase.zookeeper.quorum</name>
        <value>hadoop-2:2181,hadoop-3:2181,hadoop-5:2181</value>
    </property>

</configuration>

修改hbase-env.sh 禁用自带zookeeper

export HBASE_MANAGES_ZK=false
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk

修改regionservers

vi regionservers

 

hadoop-3
hadoop-1
hadoop-4
hadoop-5

新建backup-masters

vi backup-masters
hadoop-1
hadoop-4

将HADOOP_HOME/etc/conf下hdfs-site.xml,core-site.xml拷贝到/usr/local/hbase-1.2.4/conf下

启动

在hadoop-2节点

start-hbase.sh

HA测试

打开浏览器,输入http://19

然后在浏览器输入备节点地址http://192.168.1.115:16010

Spark 写入Hbase

通过Spark创建一张表,并插入100万条数据
代码如下

import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, Put}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}


object testhbase {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("spark://192.168.1.116:7077").setAppName("hbase").setJars(List("C:\\Users\\hao\\IdeaProjects\\test\\out\\artifacts\\test_jar\\test.jar"))
    val sc=new SparkContext(conf)
    val rdd = sc.makeRDD(Array(1)).flatMap(_ => 0 to 1000000)
    rdd.foreachPartition(x => {
      val hbaseConf = HBaseConfiguration.create()
      hbaseConf.set("hbase.zookeeper.quorum", "hadoop-2,hadoop-3,hadoop-5")
      hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")
      hbaseConf.set("hbase.defaults.for.version.skip", "true")
      val hbaseConn = ConnectionFactory.createConnection(hbaseConf)
      val table = hbaseConn.getTable(TableName.valueOf("word"))
      x.foreach(value => {
        var put = new Put(Bytes.toBytes(value.toString))
        put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("c1"), Bytes.toBytes(value.toString))
        table.put(put)
      })
    })
  }
}

 

7.Spark读取Hbase表并存入到hdfs中

import org.apache.hadoop.hbase.HBaseConfiguration  
import org.apache.hadoop.hbase.mapreduce.TableInputFormat  
import org.apache.hadoop.hbase.io.ImmutableBytesWritable  
import org.apache.hadoop.hbase.client.Result  
import org.apache.hadoop.hbase.HConstants  

val tmpConf = HBaseConfiguration.create()  
tmpConf.set("hbase.zookeeper.quorum", "hadoop-2,hadoop-3,hadoop-5")  
tmpConf.set("hbase.zookeeper.property.clientPort", "2181")  
tmpConf.set(TableInputFormat.INPUT_TABLE, "word")  
tmpConf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "120000");     
val hBaseRDD = sc.newAPIHadoopRDD(tmpConf, classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])  
val lineRdd=hBaseRDD.map(r=>  
    (if(r._2.getFamilyMap("comumnfamily".getBytes).keySet.contains("column1".getBytes)){new String(r._2.getValue("data".getBytes,"log_date".getBytes))}else{"0"})+","+  
    (if(r._2.getFamilyMap("comumnfamily".getBytes).keySet.contains("column2".getBytes)){new String(r._2.getValue("data".getBytes,"area_code".getBytes))}else{"0"})  
)
lineRdd.saveAsTextFile("hdfs://ns1/hbasehdfs01")

lineRdd.repartition(1).saveAsTextFile("hdfs://ns1/hbasehdfs")

 

posted @ 2018-04-25 16:54  努力中国  阅读(117)  评论(0编辑  收藏  举报