1、Spark 通过api,hfile两种形式获取hbase数据,简单样例
pom内容:
<dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>0.98.6-cdh5.2.0</version> <exclusions> <exclusion> <artifactId>javax.servlet-api</artifactId> <groupId>javax.servlet</groupId> </exclusion> <exclusion> <artifactId>javax.servlet</artifactId> <groupId>org.eclipse.jetty.orbit</groupId> </exclusion> <exclusion> <artifactId>servlet-api-2.5</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> <exclusion> <artifactId>servlet-api</artifactId> <groupId>javax.servlet</groupId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>1.5.2-hdh3.1.0</version> <exclusions> <exclusion> <artifactId>hadoop-client</artifactId> <groupId>org.apache.hadoop</groupId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>1.5.2-hdh3.1.0</version> <exclusions> <exclusion> <artifactId>javax.servlet-api</artifactId> <groupId>javax.servlet</groupId> </exclusion> </exclusions> </dependency>
一、Hbase API获取hbase表数据
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.hbase.Cell; 3 import org.apache.hadoop.hbase.HBaseConfiguration; 4 import org.apache.hadoop.hbase.HColumnDescriptor; 5 import org.apache.hadoop.hbase.client.HTable; 6 import org.apache.hadoop.hbase.client.Result; 7 import org.apache.hadoop.hbase.client.ResultScanner; 8 import org.apache.hadoop.hbase.client.Scan; 9 import org.apache.hadoop.hbase.util.Bytes; 10 11 import java.io.IOException; 12 import java.util.List; 13 14 /** 15 * 通过HbaseApi获取数据 16 */ 17 public class DataAchieveFromHbaseApi { 18 public static void main(String[] args) throws IOException { 19 //Hbase配置 20 Configuration conf=HBaseConfiguration.create(); 21 conf.set("hbase.zookeeper.property.clientPort", "2181");//端口 22 conf.set("hbase.zookeeper.quorum","hdh1,hdh2,hdh3");//hbase zookeeper地址 23 //扫描配置 24 Scan scan=new Scan(); 25 scan.addFamily(Bytes.toBytes("cf"));//列族,可添加多个 26 //hbase表 27 HTable hTable=new HTable(conf, Bytes.toBytes("test"));//表明 28 //获取扫描数据 29 ResultScanner rs= hTable.getScanner(scan); 30 //hbase表的列族信息 31 HColumnDescriptor[] hColDes=hTable.getTableDescriptor().getColumnFamilies(); 32 for (HColumnDescriptor hColDe : hColDes) { 33 System.out.println(Bytes.toString(hColDe.getName())); 34 } 35 //展示每一行的每一列(这个只有一列)信息 36 for (Result r : rs) { 37 byte [] bytes= r.getValue(Bytes.toBytes("cf"),Bytes.toBytes("SSID"));//列族和列名 38 String str=new String(bytes,"UTF-8"); 39 if(null!=str&&str.trim().length()>0) { 40 System.out.println(str.trim()); 41 } 42 } 43 System.out.println("end<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"); 44 } 45 }
二、Spark提供接口获取Hbase表数据:
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.hbase.HBaseConfiguration; 3 import org.apache.hadoop.hbase.client.Result; 4 import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 5 import org.apache.hadoop.hbase.mapreduce.TableInputFormat; 6 import org.apache.hadoop.hbase.util.Bytes; 7 import org.apache.spark.SparkConf; 8 import org.apache.spark.api.java.JavaPairRDD; 9 import org.apache.spark.api.java.JavaSparkContext; 10 import org.apache.spark.api.java.function.VoidFunction; 11 import scala.Tuple2; 12 13 import java.io.IOException; 14 15 /** 16 * 通过hfile形式获取数据 17 */ 18 public class DataAchieveFromHfile { 19 private static JavaPairRDD<ImmutableBytesWritable, Result> rdd; 20 21 public static void main(String[] args) throws IOException { 22 Configuration conf= HBaseConfiguration.create(); 23 conf.set("hbase.zookeeper.property.clientPort", "2181"); 24 conf.set("hbase.zookeeper.quorum","hdh1,hdh2,hdh3"); 25 conf.set(TableInputFormat.INPUT_TABLE, "test"); 26 SparkConf conf1=new SparkConf().setAppName("test").setMaster("local");//设置spark app名称和运行模式(此为local模式) 27 JavaSparkContext sc=new JavaSparkContext(conf1); 28 //加载数据 29 rdd=sc.newAPIHadoopRDD(conf,TableInputFormat.class, ImmutableBytesWritable.class, Result.class); 30 System.out.println("读取数据条数:"+rdd.count()); 31 rdd.foreach(new VoidFunction<Tuple2<ImmutableBytesWritable, Result>>() { 32 @Override 33 public void call(Tuple2<ImmutableBytesWritable, Result> result) throws Exception { 34 byte [] bytes= result._2().getValue(Bytes.toBytes("cf"), Bytes.toBytes("SSID"));//列族和列名 35 String str= new String(bytes,"UTF-8"); 36 if(null!=str&&str.trim().length()>0) { 37 System.out.println(str.trim()); 38 } 39 } 40 }); 41 } 42 }
本文来自博客园,作者:life_start,转载请注明原文链接:https://www.cnblogs.com/yangh2016/p/5737350.html