1 /*
 2  * 正则表达式查询
 3  */
 4 
 5 
 6 import java.io.IOException;
 7 
 8 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 9 import org.apache.lucene.document.Document;
10 import org.apache.lucene.document.Field;
11 import org.apache.lucene.index.IndexWriter;
12 import org.apache.lucene.index.Term;
13 import org.apache.lucene.search.Hits;
14 import org.apache.lucene.search.IndexSearcher;
15 import org.apache.lucene.search.regex.RegexQuery;  //第三方插件RegexQuery其jar为lucene-regex-2.9.4.jar
16 
17  
18 
19 public class RegexQueryTest {
20 
21   private static final String INDEX_STORE_PATH="d:\\testRegexQuery";
22   
23   public static void main(String[] args) throws IOException
24   {
25    //索引
26    indexwriter(INDEX_STORE_PATH);
27    System.out.println("建立索引完毕!");
28    //查询
29    search(INDEX_STORE_PATH);
30    System.out.println("检索完毕!");
31   }
32   
33   //建立索引
34   public static void indexwriter(String path) throws IOException
35   {
36    IndexWriter writer=new IndexWriter( path ,new StandardAnalyzer(),true);
37    writer.setUseCompoundFile(false);
38    //创建三个文档
39    Document doc1=new Document();
40    Document doc2=new Document();
41    Document doc3=new Document();
42    Document doc4=new Document();
43    //构建三个URL地址用于正则匹配
44    Field f1=new Field("url","http://www.abc/com/profuct?type=1& cate=5",Field.Store.YES,
45      Field.Index.UN_TOKENIZED);
46    Field f2=new Field("url","http://def.com/product?type=5",Field.Store.YES,
47      Field.Index.UN_TOKENIZED);
48    Field f3=new Field("url","http://ghi/product?type=x",Field.Store.YES,
49      Field.Index.UN_TOKENIZED);
50    Field f4=new Field("url","http://xxx.abc/con/profuct?type=1& cate=5",Field.Store.YES,
51      Field.Index.UN_TOKENIZED);
52    
53    doc1.add(f1);
54    doc2.add(f2);
55    doc3.add(f3);
56    doc4.add(f4);
57    writer.addDocument(doc1);
58    writer.addDocument(doc2);
59    writer.addDocument(doc3);
60    writer.addDocument(doc4);
61    
62    //close
63    writer.close();
64   }
65   
66   //查询函数
67   public static void search(String path) throws IOException
68   {
69    IndexSearcher searcher=new IndexSearcher(path);
70    //创建正则表达式
71    String regex="].*";
72    //构建Term
73    Term term=new Term("url",regex);
74    
75    RegexQuery query=new RegexQuery(term);
76    
77    Hits hits=searcher.search(query);
78    for(int i=0;i<hits.length();i++)
79     System.out.println(hits.doc(i));
80   }
81 }

 

posted on 2012-12-22 22:04  烤德  阅读(2444)  评论(0编辑  收藏  举报