Lucene系列-facet
1.facet的直观认识
facet:面、切面、方面。个人理解就是维度,在满足query的前提下,观察结果在各维度上的分布(一个维度下各子类的数目)。
如jd上搜“手机”,得到4009个商品。其中品牌、网络、价格就是商品的维度(facet),点击某个品牌或者网络,获取更细分的结果。
点击品牌小米,获得小米手机的结果,显示27个。
点击移动4G,获得移动4G、小米手机,显示4个。
2.facet特性
- facet counting:返回一个facet下某子类的结果数。如上面的品牌维度下小米子类中满足查询"手机"的结果有27个。
- facet associations:一个文档与某子类的关联度,如一本书30%讲lucene,70%讲solor,这个百分比就是书与分类的关联度(匹配度、信心度)。
- multiple facet requests:支持多facet查询(多维度查询)。如查询品牌为小米、网络为移动4G的手机。
3.实例
一个facet简单使用例子,依赖于lucene-facet-4.10.0。讲述了从搜手机到品牌、到网络向下browser的过程。
public class SimpleFacetsExample { private final Directory indexDir = new RAMDirectory(); private final Directory taxoDir = new RAMDirectory(); private final FacetsConfig config = new FacetsConfig(); /** Empty constructor */ public SimpleFacetsExample() { config.setHierarchical("Publish Date", true); } /** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(Version.LUCENE_4_10_0, new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("device", "手机", Field.Store.YES)); doc.add(new TextField("name", "米1", Field.Store.YES)); doc.add(new FacetField("brand", "小米")); doc.add(new FacetField("network", "移动4G")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("device", "手机", Field.Store.YES)); doc.add(new TextField("name", "米4", Field.Store.YES)); doc.add(new FacetField("brand", "小米")); doc.add(new FacetField("network", "联通4G")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("device", "手机", Field.Store.YES)); doc.add(new TextField("name", "荣耀6", Field.Store.YES)); doc.add(new FacetField("brand", "华为")); doc.add(new FacetField("network", "移动4G")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("device", "电视", Field.Store.YES)); doc.add(new TextField("name", "小米电视2", Field.Store.YES)); doc.add(new FacetField("brand", "小米")); indexWriter.addDocument(config.build(taxoWriter, doc)); taxoWriter.close(); indexWriter.close(); } private void facetsWithSearch() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); //1.查询手机 System.out.println("-----手机-----"); TermQuery query = new TermQuery(new Term("device", "手机")); FacetsCollector.search(searcher, query, 10, fc); Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc); List<FacetResult> results = facets.getAllDims(10); //手机总共有3个,品牌维度:小米2个,华为1个;网络维度:移动4G 2个,联通4G 1个 for (FacetResult tmp : results) { System.out.println(tmp); } //2.drill down,品牌选小米 System.out.println("-----小米手机-----"); DrillDownQuery drillDownQuery = new DrillDownQuery(config, query); drillDownQuery.add("brand", "小米"); FacetsCollector fc1 = new FacetsCollector();//要new新collector,否则会累加 FacetsCollector.search(searcher, drillDownQuery, 10, fc1); facets = new FastTaxonomyFacetCounts(taxoReader, config, fc1); results = facets.getAllDims(10); //获得小米手机的分布,总数2个,网络:移动4G 1个,联通4G 1个 for (FacetResult tmp : results) { System.out.println(tmp); } //3.drill down,小米移动4G手机 System.out.println("-----移动4G小米手机-----"); drillDownQuery.add("network", "移动4G"); FacetsCollector fc2 = new FacetsCollector(); FacetsCollector.search(searcher, drillDownQuery, 10, fc2); facets = new FastTaxonomyFacetCounts(taxoReader, config, fc2); results = facets.getAllDims(10); for (FacetResult tmp : results) { System.out.println(tmp); } //4.drill sideways,横向浏览 //如果已经进入了小米手机,但是还想看到其他牌子(华为)的手机数目,就用到了sideways System.out.println("-----小米手机drill sideways-----"); DrillSideways ds = new DrillSideways(searcher, config, taxoReader); DrillDownQuery drillDownQuery1 = new DrillDownQuery(config, query); drillDownQuery1.add("brand", "小米"); DrillSidewaysResult result = ds.search(drillDownQuery1, 10); results = result.facets.getAllDims(10); for (FacetResult tmp : results) { System.out.println(tmp); } indexReader.close(); taxoReader.close(); } /** Runs the search and drill-down examples and prints the results. */ public static void main(String[] args) throws Exception { SimpleFacetsExample example = new SimpleFacetsExample(); example.index(); example.facetsWithSearch(); } }
输出:
-----手机----- //总数3个,2个子类 dim=brand path=[] value=3 childCount=2 小米 (2) 华为 (1) dim=network path=[] value=3 childCount=2 移动4G (2) 联通4G (1) -----小米手机----- //普通向下浏览,丢失了同一维度,其他子类的统计 dim=brand path=[] value=2 childCount=1 小米 (2) dim=network path=[] value=2 childCount=2 移动4G (1) 联通4G (1) -----移动4G小米手机----- dim=brand path=[] value=1 childCount=1 小米 (1) dim=network path=[] value=1 childCount=1 移动4G (1) -----小米手机drill sideways----- //drill sideways, 保留了该drill维度的其他子类统计 dim=brand path=[] value=3 childCount=2 小米 (2) 华为 (1) //小米手机中的网络分布 dim=network path=[] value=2 childCount=2 移动4G (1) 联通4G (1)