Lucene的Query类介绍

把Lucene的查询当成sql的查询,也许会笼统的明白些query的真相了。

查询分为大致两类,1:精准查询。2,模糊查询。

创建测试数据。

private Directory directory;
    private IndexReader reader;
    private String[] ids = {"1","2","3","4","5","6"};
    private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
    private String[] contents = {
            "welcome to visited the space,I like book",
            "hello boy, I like pingpeng ball",
            "my name is cc I like game",
            "I like football",
            "I like football and I like basketball too",
            "I like movie and swim"
    };
    private int[] attachs = {2,3,1,4,5,5};
    private String[] names = {"zhangsan","lisi","john","jetty","lisi","jake"};

先建立索引。

 

 1 private Map<String,Float> scores = new HashMap<String,Float>();
 2     
 3 public SearchUtil(){
 4     try {
 5         directory = FSDirectory.open(Paths.get("D://lucene//index"));
 6         scores.put("itat.org", 1.5f);
 7         scores.put("cc.org", 2.0f);
 8     } catch (IOException e) {
 9         // TODO Auto-generated catch block
10         e.printStackTrace();
11     }
12 }
13 /**
14  * 创建索引
15  */
16 @SuppressWarnings("deprecation")
17 public void index(){
18     IndexWriter writer = null;
19     try {
20         directory = FSDirectory.open(Paths.get("D://lucene//index"));
21         writer = getWriter();
22         Document doc = null;
23         for(int i=0;i<ids.length;i++){
24             doc = new Document();
25             doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
26             doc.add(new Field("name", names[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
27             doc.add(new Field("content", contents[i], Field.Store.NO,Field.Index.ANALYZED));
28             //存储数字
29             doc.add(new IntField("attach", attachs[i],  Field.Store.YES));
30             
31             // 加权操作
32             TextField field = new TextField("email", emails[i], Field.Store.YES);
33             String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
34             if (scores.containsKey(et)) {
35                 field.setBoost(scores.get(et));
36             }
37             doc.add(field);
38             // 添加文档
39             writer.addDocument(doc);
40         }
41     } catch (Exception e) {
42         // TODO: handle exception
43         e.printStackTrace();
44     }finally{
45         try {
46             writer.close();
47         } catch (IOException e) {
48             // TODO Auto-generated catch block
49             e.printStackTrace();
50         }
51     }
52 }

 

 索引建立完毕。

构造方法。

/**
     * getSearcher
     * @return
     */
    public IndexSearcher getSearcher(){
        try {
            directory = FSDirectory.open(Paths.get("D://lucene//index"));
            if(reader==null){
                reader = DirectoryReader.open(directory);
            }else{
                reader.close();
            }
            return new IndexSearcher(reader);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return null;
    }

一、精准匹配。

1,精准查询

就是查什么给什么。

 1 /**
 2      * 精准匹配
 3      */
 4     public void search(String searchField,String field){
 5         // 得到读取索引文件的路径
 6         IndexReader reader = null;
 7         try {
 8             directory = FSDirectory.open(Paths.get("D://lucene//index"));
 9             reader = DirectoryReader.open(directory);
10             IndexSearcher searcher = new IndexSearcher(reader);
11             // 运用term来查找
12             Term t = new Term(searchField, field);
13             Query q = new TermQuery(t);
14             // 获得查询的hits
15             TopDocs hits = searcher.search(q, 10);
16             // 显示结果
17             System.out.println("匹配 '" + q + "',总共查询到" + hits.totalHits + "个文档");
18             for (ScoreDoc scoreDoc : hits.scoreDocs){
19                 Document doc = searcher.doc(scoreDoc.doc);
20                 System.out.println("id:"+doc.get("id")+":"+doc.get("name")+",email:"+doc.get("email"));
21             }
22             
23         } catch (IOException e) {
24             // TODO Auto-generated catch block
25             e.printStackTrace();
26         }finally{
27             try {
28                 reader.close();
29             } catch (IOException e) {
30                 // TODO Auto-generated catch block
31                 e.printStackTrace();
32             }
33         }
34     }

 

2,区间查询。

/**
     * between
     * @param field
     * @param start
     * @param end
     * @param num
     */
    public void searchByTermRange(String field,String start,String end,int num) {
        try {
            IndexSearcher searcher = getSearcher();
            BytesRef lowerTerm = new BytesRef(start.getBytes()) ;
            BytesRef upperTerm = new BytesRef(end.getBytes()) ;
            
            Query query = new TermRangeQuery(field, lowerTerm , upperTerm, true, true);
            TopDocs tds = searcher.search(query, num);
            
            System.out.println("一共查询了:"+tds.totalHits);
            for(ScoreDoc sd:tds.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach"));
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

 

3、匹配其索引开始以指定的字符串的文档

 1 /**
 2      * 匹配其索引开始以指定的字符串的文档
 3      * @param field
 4      * @param value
 5      * @param num
 6      */
 7     public void searchByPrefix(String field,String value,int num) {
 8         try {
 9             IndexSearcher searcher = getSearcher();
10             Query query = new PrefixQuery(new Term(field,value));
11             TopDocs tds = searcher.search(query, num);
12             System.out.println("一共查到:"+tds.totalHits);
13             for(ScoreDoc scoreDoc:tds.scoreDocs){
14                 Document doc = searcher.doc(scoreDoc.doc);
15                 System.out.println(doc.get("id")+"---->"+
16                         doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
17                         doc.get("attach"));
18             }
19         } catch (Exception e) {
20             e.printStackTrace();
21         }
22     }

4、数字搜索

/**
     * 数字搜索
     * @param field
     * @param start
     * @param end
     * @param num
     */
    public void searchByNums(String field,int start,int end,int num){
        try {
            IndexSearcher searcher = getSearcher();
            Query query =   NumericRangeQuery.newIntRange(field, start, end, true, true);
            TopDocs tds = searcher.search(query, num);
            System.out.println("一共查到:"+tds.totalHits);
            for(ScoreDoc scoreDoc:tds.scoreDocs){
                Document doc = searcher.doc(scoreDoc.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach"));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

二、模糊匹配

/**
     * 通配符
     * @param field
     * @param value
     * @param num
     */
    public void searchByWildcard(String field,String value,int num){
        try {
            IndexSearcher searcher = getSearcher();
            WildcardQuery query = new WildcardQuery(new Term(field,value));
            TopDocs tds = searcher.search(query, num);
            System.out.println("一共查到:"+tds.totalHits);
            for(ScoreDoc scoreDoc:tds.scoreDocs){
                Document doc = searcher.doc(scoreDoc.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach"));
            }
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
        }
    }
    /**
     * BooleanQuery可以连接多个子查询
     * Occur.MUST表示必须出现
     * Occur.SHOULD表示可以出现
     * Occur.MUSE_NOT表示不能出现
     * @param field
     * @param value
     * @param num
     */
    @SuppressWarnings("deprecation")
    public void searchByBoolean(String[] field,String[] value,int num){
        try {
            if(field.length!=value.length){
                System.out.println("field的长度需要与value的长度相等!");
                System.exit(0);
            }
            IndexSearcher searcher = getSearcher();
            BooleanQuery query = null;
            TopDocs tds = null;
            for(int i = 0;i<field.length;i++){
                query = new BooleanQuery();
                query.add(new TermQuery(new Term(field[i],value[i])),Occur.SHOULD);
                tds = searcher.search(query, num);
            }
            System.out.println("一共查询:"+tds.totalHits);
            for(ScoreDoc doc:tds.scoreDocs){
                Document document = searcher.doc(doc.doc);
                System.out.println(document.get("id")+"---->"+
                        document.get("name")+"["+document.get("email")+"]-->"+document.get("id")+","+
                        document.get("attach"));
            }
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
        }
    }
    public void searchByPhrase(int num){
        try {
            IndexSearcher searcher = getSearcher();
            PhraseQuery query = new PhraseQuery();
            query.setSlop(3);
            query.add(new Term("content","like"));
//            //第一个Term
            query.add(new Term("content","football"));
            TopDocs tds = searcher.search(query, num);
            System.out.println("一共查询了:"+tds.totalHits);
            for(ScoreDoc sd:tds.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach"));
            }
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
        }
    }
    /**
     * 相似度匹配查询
     * @param num
     */
    public void searchByFuzzy(int num) {
        try {
            IndexSearcher searcher = getSearcher();
            FuzzyQuery query = new FuzzyQuery(new Term("name","jake")); 
            TopDocs tds = searcher.search(query, num);
            System.out.println("一共查询了:"+tds.totalHits);
            for(ScoreDoc sd:tds.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach")+","+doc.get("date"));
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public void searchByQueryParse(Query query,int num) {
        try {
            IndexSearcher searcher = getSearcher();
            TopDocs tds = searcher.search(query, num);
            System.out.println("一共查询了:"+tds.totalHits);
            for(ScoreDoc sd:tds.scoreDocs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach")+","+doc.get("date")+"=="+sd.score);
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

 

posted @ 2016-12-16 11:05  木子小僧  阅读(4132)  评论(3编辑  收藏  举报