lucene7以上最低要求jdk1.8
lucene下载地址:
http://archive.apache.org/dist/lucene/java/
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>6.0.0</version> </dependency> <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>6.0.0</version> </dependency> <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-memory --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-memory</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.9</version> </dependency> <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.6</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>6.0.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>7.3.0</version> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> <!--排除掉里面旧的lucene包,因为我们要重写里面的分析器和分词器 --> <exclusions> <exclusion> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> </exclusion> <exclusion> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> </exclusion> <exclusion> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency>
package com.ytkj.lucene; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.File; import java.io.IOException; /** * lucene入门程序 */ public class LuceneFrist { /** * 创建索引库 * @throws Exception */ public static void createIndex()throws Exception{ //1.创建一个Director对象,指定索引库保存的位置,保存在磁盘中 Directory directory=FSDirectory.open(new File("E:\\lucene\\lucenetemp").toPath()); //2.创建IndexWriter对象 //创建使用的分词器 StandardAnalyzer analyzer = new StandardAnalyzer(); IndexWriter indexWriter=new IndexWriter(directory,new IndexWriterConfig(analyzer)); //3.读取磁盘上的文件,对应每个文件创建一个文档对象 File dir=new File("E:\\lucene\\luceneresource"); File[] files = dir.listFiles(); for (File file : files) { //文件名称 String name = file.getName(); //文件路径 String path = file.getPath(); //文件内容 String content = FileUtils.readFileToString(file, "utf-8"); //文件大小 long size = FileUtils.sizeOf(file); //创建域 参数:域的名称 文件名称 是否存储 Field fieldName=new TextField("name",name,Field.Store.YES); Field fieldPath=new TextField("path",path,Field.Store.YES); Field fieldContent=new TextField("content",content,Field.Store.YES); Field fieldSize=new TextField("size",size+"",Field.Store.YES); //4.创建文档对象 Document document=new Document(); //5.向文档对象中添加域 document.add(fieldName); document.add(fieldPath); document.add(fieldContent); document.add(fieldSize); //6.把文档对象写入索引库 indexWriter.addDocument(document); } //7.关闭indexwriter对象 indexWriter.close(); } /** * 查询索引库 * @throws Exception */ public static void searchIndex() throws Exception { //1.创建一个Director对象,指定索引库保存的位置 Directory directory=FSDirectory.open(new File("E:\\lucene\\lucenetemp").toPath()); //2.创建indexReader对象 IndexReader indexReader= DirectoryReader.open(directory); //3.创建indexsearcher对象,构造方法中的参数indexReader对象 IndexSearcher indexSearcher=new IndexSearcher(indexReader); //4.创建一个query对象 Query query=new TermQuery(new Term("content","spring")); //5.执行查询,得到一个TopDocs对象 参数:查询对象 查询结果返回的最大记录数 TopDocs topDocs = indexSearcher.search(query, 10); //6.取查询结果的总记录数 int totalHits = topDocs.totalHits; System.out.println("查询结果的总记录数:"+totalHits); //7.获取文档列表 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { //取文档id int docId = scoreDoc.doc; //8.根据id获取文档对象 Document document = indexSearcher.doc(docId); System.out.println(document.get("name")); System.out.println(document.get("path")); System.out.println(document.get("content")); System.out.println(document.get("size")); } //关闭创建indexReader对象 indexReader.close(); } /** * 查看分词器的分词效果 * @throws Exception */ public static void testTikenStream() throws Exception { //创建使用的标准分词器 StandardAnalyzer analyzer = new StandardAnalyzer(); //使用分词器对象的tokenStream方法获取tokenStream对象 TokenStream tokenStream=analyzer.tokenStream("","org.springframework.jdbc.datasource.DataSourceTransactionManager"); //向tokenstream对象中设置一个引用,相当于一个指针 CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); //调用tokenstream的rest方法 tokenStream.reset(); //循环遍历tokenStream对象 while (tokenStream.incrementToken()){ System.out.println(charTermAttribute.toString()); } //关闭 tokenStream.close(); } /** * 中文分词器测试 */ public static void testIKAnalyzer() throws Exception { String etext = "Analysis is one of the main causes of slow indexing. Simply put, the more you analyze the slower analyze the indexing (in most cases)."; String chineseText = "张三说的确实在理。"; /** * ikanalyzer 中文分词器 因为Analyzer的createComponents方法API改变了 需要我们自己实现 * 分析器IKAnalyzer4Lucene7和分词器IKTokenizer4Lucene7 */ // IKAnalyzer 细粒度切分 try (Analyzer ik = new IKAnalyzer();) { TokenStream ts = ik.tokenStream("content", etext); System.out.println("IKAnalyzer中文分词器 细粒度切分,英文分词效果:"); doToken(ts); ts = ik.tokenStream("content", chineseText); System.out.println("IKAnalyzer中文分词器 细粒度切分,中文分词效果:"); doToken(ts); } // IKAnalyzer 智能切分 try (Analyzer ik = new IKAnalyzer(true);) { TokenStream ts = ik.tokenStream("content", etext); System.out.println("IKAnalyzer中文分词器 智能切分,英文分词效果:"); doToken(ts); ts = ik.tokenStream("content", chineseText); System.out.println("IKAnalyzer中文分词器 智能切分,中文分词效果:"); doToken(ts); } } private static void doToken(TokenStream ts) throws IOException { ts.reset(); CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class); while (ts.incrementToken()) { System.out.print(cta.toString() + "|"); } System.out.println(); ts.end(); ts.close(); } public static void main(String[] args) throws Exception { //createIndex(); //searchIndex(); //testTikenStream(); testIKAnalyzer(); } }