HBase笔记--自定义filter
自定义filter需要继承的类:FilterBase
类里面的方法调用顺序
方法名 | 作用 | |
1 | boolean filterRowKey(Cell cell) | 根据row key过滤row。如果需要被过滤掉,返回true;需要返回给客户端,返回false |
2 | ReturnCode filterKeyValue(Cell v) | ReturnCode在Filter接口中定义的枚举类型,决定是否要包括该cell对象 (A way to filter based on the column family, column qualifier and/or the column value) |
3 | void filterRowCells(List<Cell> ignored) | 方法传入通过filterKeyValue的对象列表,然后在这里对列表里的元素进行任何转换或运算 |
4 | boolean filterRow() | 如果需要过滤掉某些行,那么返回true则过滤掉上面方法正在计算的行 |
5 | boolean filterAllRemaining() | 在过滤器里构建逻辑来提前停止一次扫描。 例如:在扫描很多行时,在行键、列限定符、单元值里找指定东西时,一旦找到目标,就不必关心剩下的行,可使用这个方法过滤 |
附:
filter执行流程(旧版):http://my.oschina.net/cloudcoder/blog/289649
旧版本的过滤方法 http://doc.okbase.net/wgp13x/archive/121557.html
示例代码:根据经纬度,过滤掉不在指定区域范围内的点:
参考材料:https://www.github.com/hbaseinaction
https://github.com/hbaseinaction/gis
import java.io.IOException; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.filter.FilterBase; import org.apache.hadoop.hbase.util.Bytes; import com.vividsolutions.jts.geom.Coordinate; import com.vividsolutions.jts.geom.Geometry; import com.vividsolutions.jts.geom.GeometryFactory; public class WithinFilter extends FilterBase { static final byte[] TABLE = "wifi".getBytes(); static final byte[] FAMILY = "a".getBytes(); static final byte[] ID = "id".getBytes(); static final byte[] X_COL = "lon".getBytes(); static final byte[] Y_COL = "lat".getBytes(); static final Log LOG = LogFactory.getLog(WithinFilter.class); final GeometryFactory factory = new GeometryFactory(); Geometry query = null; boolean exclude = false; public WithinFilter() { } public WithinFilter(Geometry query) { this.query = query; } //遍历每行每个列族的每个KeyValue的方法可能很慢,如果可以,HBase会优化对filterRow的调用 @Override public boolean hasFilterRow(){ return true; } //根据column family, column qualifier 或者 column value进行过滤 @Override public ReturnCode filterKeyValue(Cell cell) throws IOException { byte[] qualname = CellUtil.cloneQualifier(cell); if(Bytes.equals(qualname, Bytes.toBytes("不需要的qualifier名"))) //例如可以处理密码,并且将密码跳过不反回到客户端 return ReturnCode.SKIP; return ReturnCode.INCLUDE; } //根据经纬度过滤,符合要求的为在区域内的点 @Override public void filterRowCells(List<Cell> celllist) throws IOException{ double lon = Double.NaN; double lat = Double.NaN; for(Cell cell : celllist){ if(Bytes.equals(CellUtil.cloneQualifier(cell), X_COL)){ lon = Double.parseDouble(new String(CellUtil.cloneValue(cell))); } if(Bytes.equals(CellUtil.cloneQualifier(cell), Y_COL)){ lat = Double.parseDouble(new String(CellUtil.cloneValue(cell))); } } Coordinate coord = new Coordinate(lon,lat); Geometry point = factory.createPoint(coord); //创建Point if(!query.contains(point)){ //测试是否包含该点 this.exclude = true; } } //如果某一行没有落在查询边界想要排除它是,需要设置exclude标志。 @Override public boolean filterRow() { if (LOG.isDebugEnabled()) LOG.debug("filter applied. " + (this.exclude ? "rejecting" : "keeping")); return this.exclude; } @Override public void reset() { this.exclude = false; } }
-------------------------------------------