Lucene 4.9索引txt文件
暂时只是跑起来了,不知道是否正确,困了,睡觉了,改天再弄。搜索那块是分页的,也没仔细弄。。。
参考着 http://blog.csdn.net/kingskyleader/article/details/8444739
在data下放了三个txt...
S:\lucene\data\永生.txt
S:\lucene\data\1.txt
S:\lucene\data\2.txt
永生是本小说,汉语的应该没有英文。
1.txt 内容: hello
2.txt 内容: hi hello 哈哈
程序运行之后控制台打印的信息:
adding [Ljava.io.File;@3f611531 adding [Ljava.io.File;@3f611531 adding [Ljava.io.File;@3f611531 S:\lucene\data\1.txt 1407857427736 S:\lucene\data\2.txt 1407857444245
具体改天再研究。
下面是代码:
pom:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>LuceneTest</groupId> <artifactId>lucene</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>lucene</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <!-- lucene --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.9.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.9.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.9.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.9.0</version> </dependency> </dependencies> </project>
建立索引:
package lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class InitIndex { public void creatIndex() throws IOException { boolean create = true; File data = new File("S:\\lucene\\data"); File index = new File("S:\\lucene\\index"); Directory dir = FSDirectory.open(index); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter iw = new IndexWriter(dir, iwc); File[] file = data.listFiles(); FileInputStream fis = null; for (File f : file) { fis = new FileInputStream(f); Document doc = new Document(); Field pathField = new StringField("path", f.getPath(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", f.lastModified(), Field.Store.YES)); doc.add(new TextField("contents", new BufferedReader( new InputStreamReader(fis, "GBK")))); if (iw.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); iw.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // we use updateDocument instead to replace the old one matching // the exact // path, if present: System.out.println("updating " + file); iw.updateDocument(new Term("path", f.getPath()), doc); } } iw.close(); fis.close(); } }
搜索:
package lucene; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class Search { public void query() throws IOException, ParseException { String queries = "hello"; int hitsPerPage = 10; File index = new File("S:\\lucene\\index"); IndexReader reader = DirectoryReader.open(FSDirectory.open(index)); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents", analyzer); Query query = parser.parse(queries); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; int start = 0; int end = Math.min(numTotalHits, hitsPerPage); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); System.out.println(path); String modified=doc.get("modified"); System.out.println(modified); } } }
主函数:
package lucene; import java.io.IOException; import org.apache.lucene.queryparser.classic.ParseException; public class Main { public static void main(String args[]) throws IOException, ParseException{ InitIndex id=new InitIndex(); id.creatIndex(); Search se=new Search(); se.query(); } }