Lucene搜索笔记
Lucene的常用检索类:
IndexSearcher :检索操作的核心组件,用于对 IndexWriter 创建的索引执行,只读的检索操作,工作模式为接收 Query 对象而返回 ScoreDoc对象。
Term :检索的基本单元,标示检索的字段名称和检索对象的值,如Term( “title”, “lucene” )。即表示在 title 字段中搜寻关键词 lucene 。
Query:表示查询的抽象类,由相应的 Term 来标识。
TermQuery :最基本的查询类型,用于匹配含有指定值字段的文档。
TopDocs:保存查询结果的类。
ScoreDoc(Hits):用来装载搜索结果文档队列指针的数组容器。
package com.fxr.demo3; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestSearcher { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer();//创建标准的分词 String indexDir = "f:/lucenefxrindex"; Directory directory = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc []hits = null; Term term = new Term("address","shanghai"); TermQuery termQuery = new TermQuery(term); TopDocs topDocs = indexSearcher.search(termQuery, 10); hits= topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document document = indexSearcher.doc(hits[i].doc); System.out.println(hits[i].score); System.out.println(document.get("id")); System.out.println(document.get("name")); System.out.println(document.get("address")); } indexSearcher.close(); directory.close(); } }
通配符的查询:
在Lucene中也可以使用通配符查询:"*"是匹配任意长度的字符,"?"是匹配定长的字符。
package com.fxr.demo3; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestWildcardSearcher { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer();//创建标准的分词 String indexDir = "f:/lucenefxrindex"; Directory directory = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc []hits = null; Term term = new Term("address","z*"); WildcardQuery wildcardQuery = new WildcardQuery(term); TopDocs topDocs = indexSearcher.search(wildcardQuery, 10); hits= topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document document = indexSearcher.doc(hits[i].doc); System.out.println(hits[i].score); System.out.println(document.get("id")); System.out.println(document.get("name")); System.out.println(document.get("address")); } indexSearcher.close(); directory.close(); } }
如何把查询结合起来使用?我们在实际需求中有时候需要这样做,这里需要BooleanQuery,BooleanQuery里面有几个参数,参数可以实现几个Query结合起来的使用
package com.fxr.demo3; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestWildcardSearcher { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer();//创建标准的分词 String indexDir = "f:/lucenefxrindex"; Directory directory = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc []hits = null; Term wterm = new Term("address","z*"); WildcardQuery wildcardQuery = new WildcardQuery(wterm); Term tterm = new Term("name","zhangsan"); TermQuery termQuery = new TermQuery(tterm); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);//必须出现 booleanQuery.add(termQuery, BooleanClause.Occur.SHOULD);//或的关系 TopDocs topDocs = indexSearcher.search(wildcardQuery, 10); hits= topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document document = indexSearcher.doc(hits[i].doc); System.out.println(hits[i].score); System.out.println(document.get("id")); System.out.println(document.get("name")); System.out.println(document.get("address")); } indexSearcher.close(); directory.close(); } }
指定范围的查询RangeQuery,可以查询区间的数据,
package com.firstproject.testsearch; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.RangeQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestRangeQuery { public static void main(String[] args) throws IOException { Analyzer analyzer=new StandardAnalyzer(); String indexDir = "f:/lucenefxrindex"; Directory dir=FSDirectory.getDirectory(indexDir); IndexSearcher searcher=new IndexSearcher(dir); ScoreDoc [] hits=null; Term beginTerm=new Term("birthday","19820720"); Term endTerm=new Term("birthday","19830130"); RangeQuery query=new RangeQuery(beginTerm,endTerm,true); TopDocs topDocs=searcher.search(query, 2); hits=topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document doc=searcher.doc(hits[i].doc); // System.out.print(hits[i].score+" "); System.out.print(doc.get("id")+" "); System.out.print(doc.get("name")+" "); System.out.print(doc.get("address")+" "); System.out.println(doc.get("birthday")+" "); } searcher.close(); dir.close(); } }
读取文件中的数据
package com.fxr.demo3; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestFileDataQuery { public static void main(String[] args) throws IOException { File file = new File("f:/data/data.txt"); FileReader reader = new FileReader(file); //把文件下的数据读入到char数组中 char[]chs = new char[60000]; reader.read(chs); String strtemp = new String(chs); String[] strs = strtemp.split("Database: Compendex"); System.out.println(strs.length); for(int i=0;i<strs.length;i++){ strs[i] = strs[i].trim(); } Analyzer analyzer = new StandardAnalyzer(); String indexDir = "f:/lucenefxrindex"; Directory directory = FSDirectory.getDirectory(indexDir); IndexWriter indexWriter = new IndexWriter(directory,analyzer,IndexWriter.MaxFieldLength.UNLIMITED); for(int i=0;i<strs.length;i++){ Document document = new Document(); document.add(new Field("contents",strs[i],Field.Store.YES,Field.Index.ANALYZED)); indexWriter.addDocument(document); } indexWriter.optimize(); indexWriter.close(); directory.close(); System.out.println("ok!"); } }
package com.fxr.demo3; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestFileQuery { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { Analyzer analyzer = new StandardAnalyzer();//创建标准的分词 String indexDir = "f:/lucenefxrindex"; Directory directory = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc []hits = null; Term term = new Term("contents","*dsorption*"); WildcardQuery wildcardQuery = new WildcardQuery(term); //TermQuery termQuery = new TermQuery(term); TopDocs topDocs = indexSearcher.search(wildcardQuery, 126); hits= topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document document = indexSearcher.doc(hits[i].doc); System.out.println(hits[i].score); System.out.println(document.get("contents")); } indexSearcher.close(); directory.close(); } }