IDEA中Spark读Hbase中的数据
import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.{SparkConf, SparkContext} import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes object 读Hbase数据 { def main(args: Array[String]): Unit = { val conf=HBaseConfiguration.create() val sc_conf=new SparkConf().setMaster("local[2]").setAppName("读取Hbase中的数据") val sc=new SparkContext(sc_conf) //设置查询的表名 conf.set(TableInputFormat.INPUT_TABLE,"student") val RDD=sc.newAPIHadoopRDD(conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result]) val count=RDD.count() println("Students RDD Count:"+count) RDD.cache() //遍历输出 RDD.foreach({case (_,result)=> val key=Bytes.toString(result.getRow) val name=Bytes.toString(result.getValue("info".getBytes,"name".getBytes)) val gender=Bytes.toString(result.getValue("info".getBytes,"gender".getBytes)) val age=Bytes.toString(result.getValue("info".getBytes,"age".getBytes)) println("ROW:"+key+" name: "+name+" Gender: "+gender+" Age: "+age) }) } } /* Hbase ‘student'表里的数据 hbase(main):002:0> scan 'student' ROW COLUMN+CELL 1 column=info:age, timestamp=1511069825331, value=23 1 column=info:gender, timestamp=1511069793625, value=B 1 column=info:name, timestamp=1511069769388, value=soyo 2 column=info:age, timestamp=1511069981392, value=24 2 column=info:gender, timestamp=1511069942570, value=G 2 column=info:name, timestamp=1511069903103, value=soyo2 2 row(s) in 0.2910 seconds */
结果:
Students RDD Count:2 ROW:1 name: soyo Gender: B Age: 23 ROW:2 name: soyo2 Gender: G Age: 24