IDEA中Spark往Hbase中写数据
import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapred.TableOutputFormat import org.apache.spark.{SparkConf, SparkContext} import org.apache.hadoop.hbase.client.{Put, Result} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.mapred.JobConf object 写Hbase数据 { def main(args: Array[String]): Unit = { val sparkConf=new SparkConf().setAppName("往Hbase中写数据").setMaster("local[2]") val sc=new SparkContext(sparkConf) val tableName="student" // sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE,tableName) val conf=HBaseConfiguration.create() val jobConf=new JobConf(conf) jobConf.setOutputFormat(classOf[TableOutputFormat]) jobConf.set(TableOutputFormat.OUTPUT_TABLE,tableName) //构建新纪录 val dataRDD=sc.makeRDD(Array("5,hadoop,B,29","6,spark,G,56")) val rdd=dataRDD.map(_.split(",")).map{x=>{ val put=new Put(Bytes.toBytes(x(0))) //行健的值 Put.add方法接收三个参数:列族,列名,数据 put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(x(1))) //info:name列的值 put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes(x(2))) //info:gender列的值 put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(x(3)))//info:age列的值 (new ImmutableBytesWritable,put) ////转化成RDD[(ImmutableBytesWritable,Put)]类型才能调用saveAsHadoopDataset }} rdd.saveAsHadoopDataset(jobConf) } }
结果:
hbase(main):021:0> scan 'student' ROW COLUMN+CELL 3 column=info:age, timestamp=1511079380185, value=29 3 column=info:gender, timestamp=1511079380185, value=B 3 column=info:name, timestamp=1511079380185, value=hadoop 4 column=info:age, timestamp=1511079380185, value=56 4 column=info:gender, timestamp=1511079380185, value=G 4 column=info:name, timestamp=1511079380185, value=spark 5 column=info:age, timestamp=1511079414301, value=29 5 column=info:gender, timestamp=1511079414301, value=B 5 column=info:name, timestamp=1511079414301, value=hadoop 6 column=info:age, timestamp=1511079414301, value=56 6 column=info:gender, timestamp=1511079414301, value=G 6 column=info:name, timestamp=1511079414301, value=spark