IDEA中Spark读Hbase中的数据

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes

object 读Hbase数据 {
  def main(args: Array[String]): Unit = {
    val conf=HBaseConfiguration.create()
    val sc_conf=new SparkConf().setMaster("local[2]").setAppName("读取Hbase中的数据")
    val sc=new SparkContext(sc_conf)
    //设置查询的表名
    conf.set(TableInputFormat.INPUT_TABLE,"student")
    val RDD=sc.newAPIHadoopRDD(conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])

    val count=RDD.count()
    println("Students RDD Count:"+count)
    RDD.cache()
    //遍历输出
    RDD.foreach({case (_,result)=>
        val key=Bytes.toString(result.getRow)
        val name=Bytes.toString(result.getValue("info".getBytes,"name".getBytes))
        val gender=Bytes.toString(result.getValue("info".getBytes,"gender".getBytes))
        val age=Bytes.toString(result.getValue("info".getBytes,"age".getBytes))
        println("ROW:"+key+" name: "+name+" Gender: "+gender+" Age: "+age)
    })

  }
}
/* Hbase ‘student'表里的数据
hbase(main):002:0> scan 'student'
ROW                   COLUMN+CELL
 1                    column=info:age, timestamp=1511069825331, value=23
 1                    column=info:gender, timestamp=1511069793625, value=B
 1                    column=info:name, timestamp=1511069769388, value=soyo
 2                    column=info:age, timestamp=1511069981392, value=24
 2                    column=info:gender, timestamp=1511069942570, value=G
 2                    column=info:name, timestamp=1511069903103, value=soyo2
2 row(s) in 0.2910 seconds
*/

结果:

Students RDD Count:2
ROW:1 name: soyo Gender: B Age: 23
ROW:2 name: soyo2 Gender: G Age: 24

 

posted @ 2017-11-19 14:49  soyosuyang  阅读(958)  评论(0编辑  收藏  举报