hbase 分页过滤(新老API的差别)
在hbase2.0以前分页过滤必须以上一次的最后一行+空字节数组作为下一次的起始行,
因为scan扫描的时候是包含起始行的,为了既能准确定位起始行,但又不重复把上一次的最末一行加入下一页,
所以,权威指南里才有了加上空字节数组的处理。
hbase2.0以后,新的api是withStartRow(byte[] startRow, boolean inclusive),可以直接设置是否包含起始行,完美解决问题,但是又保留了对以前api函数的兼容性。
//分页过滤 private static void pageFilterData() throws IOException{ Table table = helper.getConnection().getTable(TableName.valueOf("testtable")); final byte[] POSTFIX = new byte[] { 0x00 }; Filter filter = new PageFilter(10); int totalRows = 0; byte[] lastRow = null; while(true){ Scan scan = new Scan(); scan.setFilter(filter); if(lastRow!=null){ //为了兼容以前的scan.setStartRow()代码 //在上一次的最后一行加上一个空的byte数据,在下一个分页上,就会以新的key开始, // 但是实际上这个key并不存在,所以还是从真正的下一行开始扫描 //这么做的原因是scan的扫描会自动包含起始行,如果不加空字节数据,那么定位上就会把上一次的最后一行作为起始行,最后的数据就会多一行。 //而,新的api是withStartRow(byte[] startRow, boolean inclusive),可以直接设置是否包含起始行,完美解决问题,但是又保留了对 //以前api函数的兼容性 // byte[] startRow = Bytes.add(lastRow,POSTFIX); // System.out.println("start row: " + Bytes.toStringBinary(startRow)); // scan.withStartRow(startRow,true); System.out.println("start row: " + Bytes.toStringBinary(lastRow)); //不包含起始行,所以可以直接使用上一次的最后一行作为起始行 scan.withStartRow(lastRow,false); } ResultScanner scanner = table.getScanner(scan); int localRows = 0; Result result; while ((result=scanner.next())!=null){ System.out.println(localRows++ + ": " + result); totalRows++; lastRow = result.getRow(); } scanner.close(); if(localRows==0)break; } System.out.println("total rows: " + totalRows); }
兼容老API输出如下:
0: keyvalues={rowKey0/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey0/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey0/memo:detail/1555078771906/Put/vlen=7/seqid=0} 1: keyvalues={rowKey1/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey1/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey1/memo:detail/1555078771906/Put/vlen=7/seqid=0} 2: keyvalues={rowKey10/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey10/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey10/memo:detail/1555078771906/Put/vlen=8/seqid=0} 3: keyvalues={rowKey11/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey11/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey11/memo:detail/1555078771906/Put/vlen=8/seqid=0} 4: keyvalues={rowKey12/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey12/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey12/memo:detail/1555078771906/Put/vlen=8/seqid=0} 5: keyvalues={rowKey13/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey13/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey13/memo:detail/1555078771906/Put/vlen=8/seqid=0} 6: keyvalues={rowKey14/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey14/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey14/memo:detail/1555078771906/Put/vlen=8/seqid=0} 7: keyvalues={rowKey15/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey15/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey15/memo:detail/1555078771906/Put/vlen=8/seqid=0} 8: keyvalues={rowKey16/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey16/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey16/memo:detail/1555078771906/Put/vlen=8/seqid=0} 9: keyvalues={rowKey17/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey17/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey17/memo:detail/1555078771906/Put/vlen=8/seqid=0} start row: rowKey17\x00 0: keyvalues={rowKey18/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey18/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey18/memo:detail/1555078771906/Put/vlen=8/seqid=0} 1: keyvalues={rowKey19/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey19/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey19/memo:detail/1555078771906/Put/vlen=8/seqid=0} 2: keyvalues={rowKey2/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey2/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey2/memo:detail/1555078771906/Put/vlen=7/seqid=0} 3: keyvalues={rowKey20/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey20/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey20/memo:detail/1555078771906/Put/vlen=8/seqid=0} 4: keyvalues={rowKey21/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey21/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey21/memo:detail/1555078771906/Put/vlen=8/seqid=0} 5: keyvalues={rowKey22/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey22/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey22/memo:detail/1555078771906/Put/vlen=8/seqid=0} 6: keyvalues={rowKey23/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey23/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey23/memo:detail/1555078771906/Put/vlen=8/seqid=0} 7: keyvalues={rowKey24/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey24/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey24/memo:detail/1555078771906/Put/vlen=8/seqid=0} 8: keyvalues={rowKey25/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey25/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey25/memo:detail/1555078771906/Put/vlen=8/seqid=0} 9: keyvalues={rowKey26/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey26/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey26/memo:detail/1555078771906/Put/vlen=8/seqid=0} start row: rowKey26\x00 0: keyvalues={rowKey27/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey27/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey27/memo:detail/1555078771906/Put/vlen=8/seqid=0} 1: keyvalues={rowKey28/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey28/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey28/memo:detail/1555078771906/Put/vlen=8/seqid=0} 2: keyvalues={rowKey29/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey29/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey29/memo:detail/1555078771906/Put/vlen=8/seqid=0} 3: keyvalues={rowKey3/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey3/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey3/memo:detail/1555078771906/Put/vlen=7/seqid=0} 4: keyvalues={rowKey30/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey30/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey30/memo:detail/1555078771906/Put/vlen=8/seqid=0} 5: keyvalues={rowKey31/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey31/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey31/memo:detail/1555078771906/Put/vlen=8/seqid=0} 6: keyvalues={rowKey32/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey32/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey32/memo:detail/1555078771906/Put/vlen=8/seqid=0} 7: keyvalues={rowKey33/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey33/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey33/memo:detail/1555078771906/Put/vlen=8/seqid=0} 8: keyvalues={rowKey34/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey34/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey34/memo:detail/1555078771906/Put/vlen=8/seqid=0} 9: keyvalues={rowKey35/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey35/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey35/memo:detail/1555078771906/Put/vlen=8/seqid=0} start row: rowKey35\x00 ... ...省略其他数据
起始行加了空字节数据,形成了新的定位行(相当于真正的两页之间插入了一个虚拟行用于定位),解决了下一页首行的问题。
新的api输出:下一页的定位行就是上一页的末行,采用新的API设置首行的时候把inclusive设置成false(即不包含首行),那么输出结果就是正确的。
0: keyvalues={rowKey0/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey0/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey0/memo:detail/1555078771906/Put/vlen=7/seqid=0} 1: keyvalues={rowKey1/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey1/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey1/memo:detail/1555078771906/Put/vlen=7/seqid=0} 2: keyvalues={rowKey10/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey10/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey10/memo:detail/1555078771906/Put/vlen=8/seqid=0} 3: keyvalues={rowKey11/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey11/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey11/memo:detail/1555078771906/Put/vlen=8/seqid=0} 4: keyvalues={rowKey12/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey12/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey12/memo:detail/1555078771906/Put/vlen=8/seqid=0} 5: keyvalues={rowKey13/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey13/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey13/memo:detail/1555078771906/Put/vlen=8/seqid=0} 6: keyvalues={rowKey14/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey14/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey14/memo:detail/1555078771906/Put/vlen=8/seqid=0} 7: keyvalues={rowKey15/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey15/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey15/memo:detail/1555078771906/Put/vlen=8/seqid=0} 8: keyvalues={rowKey16/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey16/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey16/memo:detail/1555078771906/Put/vlen=8/seqid=0} 9: keyvalues={rowKey17/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey17/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey17/memo:detail/1555078771906/Put/vlen=8/seqid=0} start row: rowKey17 0: keyvalues={rowKey18/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey18/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey18/memo:detail/1555078771906/Put/vlen=8/seqid=0} 1: keyvalues={rowKey19/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey19/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey19/memo:detail/1555078771906/Put/vlen=8/seqid=0} 2: keyvalues={rowKey2/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey2/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey2/memo:detail/1555078771906/Put/vlen=7/seqid=0} 3: keyvalues={rowKey20/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey20/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey20/memo:detail/1555078771906/Put/vlen=8/seqid=0} 4: keyvalues={rowKey21/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey21/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey21/memo:detail/1555078771906/Put/vlen=8/seqid=0} 5: keyvalues={rowKey22/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey22/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey22/memo:detail/1555078771906/Put/vlen=8/seqid=0} 6: keyvalues={rowKey23/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey23/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey23/memo:detail/1555078771906/Put/vlen=8/seqid=0} 7: keyvalues={rowKey24/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey24/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey24/memo:detail/1555078771906/Put/vlen=8/seqid=0} 8: keyvalues={rowKey25/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey25/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey25/memo:detail/1555078771906/Put/vlen=8/seqid=0} 9: keyvalues={rowKey26/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey26/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey26/memo:detail/1555078771906/Put/vlen=8/seqid=0} start row: rowKey26 0: keyvalues={rowKey27/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey27/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey27/memo:detail/1555078771906/Put/vlen=8/seqid=0} 1: keyvalues={rowKey28/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey28/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey28/memo:detail/1555078771906/Put/vlen=8/seqid=0} 2: keyvalues={rowKey29/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey29/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey29/memo:detail/1555078771906/Put/vlen=8/seqid=0} 3: keyvalues={rowKey3/ex:addr/1555078771906/Put/vlen=7/seqid=0, rowKey3/info:username/1555078771906/Put/vlen=5/seqid=0, rowKey3/memo:detail/1555078771906/Put/vlen=7/seqid=0} 4: keyvalues={rowKey30/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey30/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey30/memo:detail/1555078771906/Put/vlen=8/seqid=0} 5: keyvalues={rowKey31/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey31/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey31/memo:detail/1555078771906/Put/vlen=8/seqid=0} 6: keyvalues={rowKey32/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey32/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey32/memo:detail/1555078771906/Put/vlen=8/seqid=0} 7: keyvalues={rowKey33/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey33/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey33/memo:detail/1555078771906/Put/vlen=8/seqid=0} 8: keyvalues={rowKey34/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey34/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey34/memo:detail/1555078771906/Put/vlen=8/seqid=0} 9: keyvalues={rowKey35/ex:addr/1555078771906/Put/vlen=8/seqid=0, rowKey35/info:username/1555078771906/Put/vlen=6/seqid=0, rowKey35/memo:detail/1555078771906/Put/vlen=8/seqid=0} start row: rowKey35 ... ...其他省略