IDEA中Spark往Hbase中写数据

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf

object 写Hbase数据 {
  def main(args: Array[String]): Unit = {
    val sparkConf=new SparkConf().setAppName("往Hbase中写数据").setMaster("local[2]")
    val sc=new SparkContext(sparkConf)
    val tableName="student"
   // sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE,tableName)

    val conf=HBaseConfiguration.create()

    val jobConf=new JobConf(conf)
     jobConf.setOutputFormat(classOf[TableOutputFormat])
    jobConf.set(TableOutputFormat.OUTPUT_TABLE,tableName)
    //构建新纪录
    val dataRDD=sc.makeRDD(Array("5,hadoop,B,29","6,spark,G,56"))
   val rdd=dataRDD.map(_.split(",")).map{x=>{
     val put=new Put(Bytes.toBytes(x(0))) //行健的值   Put.add方法接收三个参数:列族,列名,数据
     put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(x(1))) //info:name列的值
     put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes(x(2))) //info:gender列的值
     put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(x(3)))//info:age列的值
     (new ImmutableBytesWritable,put)  ////转化成RDD[(ImmutableBytesWritable,Put)]类型才能调用saveAsHadoopDataset
   }}
    rdd.saveAsHadoopDataset(jobConf)
  }
}

结果:

hbase(main):021:0> scan 'student'
ROW                                    COLUMN+CELL                                                                                                     
 3                                     column=info:age, timestamp=1511079380185, value=29                                                              
 3                                     column=info:gender, timestamp=1511079380185, value=B                                                            
 3                                     column=info:name, timestamp=1511079380185, value=hadoop                                                         
 4                                     column=info:age, timestamp=1511079380185, value=56                                                              
 4                                     column=info:gender, timestamp=1511079380185, value=G                                                            
 4                                     column=info:name, timestamp=1511079380185, value=spark                                                          
 5                                     column=info:age, timestamp=1511079414301, value=29                                                              
 5                                     column=info:gender, timestamp=1511079414301, value=B                                                            
 5                                     column=info:name, timestamp=1511079414301, value=hadoop                                                         
 6                                     column=info:age, timestamp=1511079414301, value=56                                                              
 6                                     column=info:gender, timestamp=1511079414301, value=G                                                            
 6                                     column=info:name, timestamp=1511079414301, value=spark 

 

posted @ 2017-11-19 16:20  soyosuyang  阅读(842)  评论(0编辑  收藏  举报