Hbase之缓存扫描加快读取速度
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; import java.io.IOException; /** * Created by similarface on 16/8/23. */ public class ScanDataUseCache { private static Table table=null; public static Table getTable() { if(table==null){ try { Configuration configuration = HBaseConfiguration.create(); Connection connection = ConnectionFactory.createConnection(configuration); //建立表的连接 return connection.getTable(TableName.valueOf("testtable")); }catch (IOException e){ return table; } } return table; } private static void scan(int caching,int batch,boolean small) { int count=0; //setCaching 设置的值为每次rpc的请求记录数,默认是1;cache大可以优化性能,但是太大了会花费很长的时间进行一次传输。 //setBatch 设置每次取的column size;有些row特别大,所以需要分开传给client,就是一次传一个row的几个column。 //setSmall 是否为小扫描 //setScanMetricsEnabled 使用了集合 Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true); ResultScanner scanner=null; try { scanner = getTable().getScanner(scan); }catch (IOException e){ System.out.println(e); } if (scanner!=null){ for (Result result:scanner){ count++; } scanner.close(); ScanMetrics metrics = scan.getScanMetrics(); System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls); } else { System.out.println("Error"); } } public static void main(String[] args) throws IOException { // Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12 scan(1, 1, false); //Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7 scan(1, 0, false); // Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0 scan(1, 0, true); //Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3 scan(200, 1, false); //Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3 scan(200, 0, false); //Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0 scan(200, 0, true); // Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3 scan(2000, 100, false); // Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5 scan(2, 100, false); // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5 scan(2, 10, false); // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5 scan(5, 100, false); // Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3 scan(5, 20, false); // Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3 scan(10, 10, false); } } /** Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8 Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0 Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8 Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3 Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0 Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3 Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10 Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55 Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5 Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13 Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13 **/
这是一个9行数据的表
每行包含一些列
使用缓存为6 批量为3的扫描器
需要3个RPC
3个列装入一个Result实例
6个result到缓存中 组成一个RPC
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; import java.io.IOException; /** * Created by similarface on 16/8/24. */ public class ScanWithOffsetAndLimit { private static Table table = null; public static Table getTable() { if (table == null) { try { Configuration configuration = HBaseConfiguration.create(); Connection connection = ConnectionFactory.createConnection(configuration); //建立表的连接 return connection.getTable(TableName.valueOf("testtable")); } catch (IOException e) { return table; } } return table; } /** * 遍历访问数据 * @param num 运行次序 * @param caching * @param batch * @param offset * @param maxResults * @param maxResultSize * @param dump * @throws IOException */ private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump ) throws IOException { int count = 0; Scan scan = new Scan().setCaching(caching).setBatch(batch) .setRowOffsetPerColumnFamily(offset) .setMaxResultsPerColumnFamily(maxResults) .setMaxResultSize(maxResultSize) .setScanMetricsEnabled(true); ResultScanner scanner = getTable().getScanner(scan); System.out.println("Scan #" + num + " running..."); for (Result result : scanner) { count++; if (dump) System.out.println("Result [" + count + "]:" + result); } scanner.close(); ScanMetrics metrics = scan.getScanMetrics(); System.out.println("Caching: " + caching + ", Batch: " + batch + ", Offset: " + offset + ", maxResults: " + maxResults + ", maxSize: " + maxResultSize + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls); } public static void main(String[] args) throws IOException { //偏移为0 最大2个cell 所以会扫描到列1 和列2 scan(1, 11, 0, 0, 2, -1, true); //偏移为4 最大2个cell 所以会扫描到列5 和列6 scan(2, 11, 0, 4, 2, -1, true); // scan(3, 5, 0, 0, 2, -1, false); scan(4, 11, 2, 0, 5, -1, true); scan(5, 11, -1, -1, -1, 1, false); scan(6, 11, -1, -1, -1, 10000, false); } } /** Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458 Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3 Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004 Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469 **/