Lucene_索引(域)的查询

package cn.tz.lucene;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class IndexSearchTest {

	@Test
	public void testIndexSearch() throws Exception{
		//创建分词器
		//Analyzer analyzer=new StandardAnalyzer();
		Analyzer analyzer=new IKAnalyzer();
		FSDirectory directory=FSDirectory.open(new File("d:\\lucene"));
		//创建索引和文档的读对象
		IndexReader reader=IndexReader.open(directory);
		//创建索引的搜索对象
		IndexSearcher indexSearcher=new IndexSearcher(reader);
		//创建查询对象 
		//第一个参数:默认搜索域,没有指定搜索域时才使用的
		QueryParser queryParser= new QueryParser("fileName",analyzer);
		//格式: 域名:搜索关键词
		//Query query = queryParser.parse("fileName:apache");
		Query query = queryParser.parse("fileName:not exit");
		//第一个参数:查询语句对象  第二个参数:显示的数据条数
		TopDocs topDocs = indexSearcher.search(query,5);
		
		System.out.println("***** 一共有"+topDocs.totalHits+" 条记录 *****");
		//从搜索的结果中获取结果集
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		for(ScoreDoc scoreDoc:scoreDocs){
			//获取文档id
			int docId = scoreDoc.doc;
			
			//通过文档id从硬盘中读取对应得文件
			Document doc = reader.document(docId);
			System.out.println("fileName:"+doc.get("fileName"));
			System.out.println("fileSize:"+doc.get("fileSize"));
			System.out.println("==================================");
		}
		reader.close();
	}
	
	/**
	 * 使用TermQuery不需要分词器:它是分词后进行查询
	 * @throws Exception
	 */
	@Test
	public void testTermQuery() throws Exception{
		Analyzer analyzer=new  IKAnalyzer();
		FSDirectory dir=FSDirectory.open(new File("d:\\lucene"));
		//读对象
		IndexReader reader=IndexReader.open(dir);
		
		//查询对象
		Term term=new Term("fileName","apache");
	    Query query=new TermQuery(term);
		//搜索对象
		IndexSearcher searcher=new IndexSearcher(reader);
		TopDocs topDocs = searcher.search(query, 10);
		System.out.println("总条数: "+topDocs.totalHits);
		//从查询结果中获取结果集
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		for(ScoreDoc scoreDoc:scoreDocs){
			//获取文档ID
			int docID = scoreDoc.doc;
			//根据文档ID获取文档
		    Document document = reader.document(docID);
		    System.out.println("文件名: "+document.get("fileName"));
		    System.out.println("文件大小  :  "+document.get("fileSize"));
		    System.out.println("======================================");
		}
		reader.close();
	}
	/**
	 * NumericRangeQuery:
	 * 用于数字范围的查询
	 * 注意:只针对数字类型的Field域才可以进行检索
	 * 例如:LongFeild,FloatFeild...
	 * @throws Exception
	 */
	@Test
	public void testNumericRangeQuery() throws Exception{
		  Analyzer analyzer=new IKAnalyzer();
		  //数据源
		  FSDirectory dir=FSDirectory.open(new File("d:\\lucene"));
	      IndexReader reader=IndexReader.open(dir);	
	      IndexSearcher search=new IndexSearcher(reader);
	      //创建query对象
	      //参数:域名 最小值 最大值 是否包含最小值 是否包含最大值
	      NumericRangeQuery query=NumericRangeQuery.newLongRange("fileSize",100L,1000L,true,true);
	      
	      TopDocs topDocs = search.search(query, 10);
	      System.out.println("文件数量: "+topDocs.totalHits);
	      //将查询结果转为结果集
	      ScoreDoc[] scoreDocs = topDocs.scoreDocs;
	      for(ScoreDoc scoreDoc:scoreDocs){
	    	  //获取文档ID
	    	  int docID = scoreDoc.doc;
	    	  //根据文档ID获取文档
	    	  Document doc = reader.document(docID);
	    	  System.out.println("文件名称: "+doc.get("fileName"));
	    	  System.out.println("文件大小: "+doc.get("fileSize"));
	    	  System.out.println("=========================");
	      }
	      reader.close();
	 }
	
		/**
		 * BooleanQuery:用于多个条件(组合)查询
		 * 
		 */
		@Test
		public void testBooleanQuery() throws Exception{
		      FSDirectory dir=FSDirectory.open(new File("d:\\lucene"));
		      IndexReader reader=IndexReader.open(dir);
		      IndexSearcher searcher=new IndexSearcher(reader);
		      
		      TermQuery termQuery=new TermQuery(new Term("fileName","apache"));
		      NumericRangeQuery numericRangeQuery=NumericRangeQuery.newLongRange("fileSize",100L,1000L,true,true);
		      BooleanQuery booleanQuery=new BooleanQuery();
		      //Occur:
		      //MUST:and
		      //MUST_NOT:not
		      //Should:or
		      //查询文件名字包含有apache,文件大小在100-1000bit之内的
		      booleanQuery.add(termQuery, Occur.MUST);
		      booleanQuery.add(numericRangeQuery,Occur.MUST);
		      TopDocs topDocs=searcher.search(booleanQuery, 10);
		      
		      System.out.println("文件数量 : "+topDocs.totalHits);
		      ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		      for(ScoreDoc scoreDoc:scoreDocs){
		    	  int docId = scoreDoc.doc;
		    	  Document document = reader.document(docId);
		    	  System.out.println("文件名称: "+document.get("fileName"));
		    	  System.out.println("文件大小: "+document.get("fileSize"));
		    	  System.out.println("==============================");
		    	  
		      }
		}
		
		/**
		 * MultiFieldQueryParser:从多个域进行查询
		 * 
		 */
		@Test
		public void testMultiFieldQueryParser() throws Exception{
			Analyzer analyzer=new IKAnalyzer();
			FSDirectory directory=FSDirectory.open(new File("d:\\lucene"));
			IndexReader reader=IndexReader.open(directory);
			IndexSearcher searcher=new IndexSearcher(reader);
			//需求:查询文件名称和文件内容中包含有"apache"的内容
			//从fileName、fileContent域中进行查询
			String[] fields={"fileName","fileContent"};
			MultiFieldQueryParser multiQueryParser=new MultiFieldQueryParser(fields, analyzer);
			Query query = multiQueryParser.parse("apache");
			TopDocs topDocs=searcher.search(query, 5);
			System.out.println("总记录数: "+topDocs.totalHits);
			//根据查询结果返回结果集,并遍历 
			for(ScoreDoc scoreDoc:topDocs.scoreDocs){
				int docId = scoreDoc.doc;
				Document doc = reader.document(docId);
				System.out.println("文档名称:"+doc.get("fileName"));
				System.out.println("文档大小:"+doc.get("fileSize"));
				System.out.println("===============================");
			}
		}
}

  

posted @ 2018-01-18 17:03  scwyfy  阅读(338)  评论(0编辑  收藏  举报