lucene 建立索引的不同方式
1.创建一个简单的索引:
package lia.meetlucene; import java.io.File; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.analysis.standard.StandardAnalyzer; public class BasicIndexer { public static void main(String[] args) throws java.io.IOException { String indexDir = "C:/Users/Administrator/Desktop/xdj"; Directory dir = FSDirectory.open(new File(indexDir)); /* * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3 * IndexWriter.MaxFieldLength.UNLIMITED); //3 */ IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer new StandardAnalyzer(Version.LUCENE_30),// 3 true, // 3 IndexWriter.MaxFieldLength.UNLIMITED); // 3 // Document Document doc = new Document(); // Field -title String title = "i love china"; Field field = new Field("title", title, Field.Store.YES, Field.Index.ANALYZED); // add field doc.add(field); // Field -content String content = "i love you, my mother land! "; field = new Field("content", content, Field.Store.YES, Field.Index.ANALYZED); // add field doc.add(field); // add document writer.addDocument(doc); // close IndexWriter writer.close(); // message System.out.println("Index Created!"); } }
2.创建一个复杂点的索引:
package lia.meetlucene; import java.io.File; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.analysis.standard.StandardAnalyzer; public class BasicIndexer { public static void main(String[] args) throws java.io.IOException { String indexDir = "C:/Users/Administrator/Desktop/xdj"; Directory dir = FSDirectory.open(new File(indexDir)); /* * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3 * IndexWriter.MaxFieldLength.UNLIMITED); //3 */ IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer new StandardAnalyzer(Version.LUCENE_30),// 3 true, // 3 IndexWriter.MaxFieldLength.UNLIMITED); // 3 // 创建Document--1 Document doc = new Document(); // 创建Field -title String title = "i love china"; Field field = new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED); // 添加add field doc.add(field); // 创建Field -content String content = "i love you, my mother land! "; field = new Field("content", content, Field.Store.YES, Field.Index.NOT_ANALYZED); // 添加add field doc.add(field); // 创建Field -time String time = "2007-05-31"; field = new Field("time", time, Field.Store.YES, Field.Index.NO); // 创建add field doc.add(field); // 添加add document writer.addDocument(doc); // 创建Document--2 doc = new Document(); // 创建Field -title title = "i love mom"; field = new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED); // add field doc.add(field); // 创建Field -content content = "i love you, my mother! "; field = new Field("content", content, Field.Store.YES, Field.Index.NOT_ANALYZED); // 添加add field doc.add(field); // 创建Field -time time = "2007-05-31"; field = new Field("time", time, Field.Store.YES, Field.Index.NO); // 添加add field doc.add(field); // 添加add document writer.addDocument(doc); // 创建Document--3 doc = new Document(); // 创建Field -title title = "i love xiaoyue"; field = new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED); // 添加add field doc.add(field); // 创建Field -content content = "i love you, my wife! "; field = new Field("content", content, Field.Store.YES, Field.Index.NOT_ANALYZED); // 添加add field doc.add(field); // 创建Field -time time = "2007-05-31"; field = new Field("time", time, Field.Store.YES, Field.Index.NO); // add field doc.add(field); // 添加add document writer.addDocument(doc); // 关闭close IndexWriter writer.close(); // 提示message System.out.println("Index Three Created!"); } }
3.文件创建一个索引
package lia.meetlucene; import java.io.File; import java.io.FileReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.analysis.standard.StandardAnalyzer; public class BasicIndexer { public static void main(String[] args) throws java.io.IOException { String indexDir = "C:/Users/Administrator/Desktop/xdj"; Directory dir = FSDirectory.open(new File(indexDir)); /* * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3 * IndexWriter.MaxFieldLength.UNLIMITED); //3 */ IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer new StandardAnalyzer(Version.LUCENE_30),// 3 true, // 3 IndexWriter.MaxFieldLength.UNLIMITED); // 3 // 创建Document Document doc = new Document(); File f = new File( "E:/xdj/tengxun/a_______________mm/2014-02-19 06.59.53.xml"); // 创建Field -name String name = f.getName(); Field field = new Field("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED); // 添加add field doc.add(field); // 创建Field -content field = new Field("content", new FileReader(f)); // FileText.getText(f); // add field doc.add(field); // 创建Field -path String path = f.getPath(); field = new Field("path", path, Field.Store.YES, Field.Index.NO); // 添加add field doc.add(field); // 添加add document writer.addDocument(doc); // 创建**************************************************************/ doc = new Document(); f = new File( "E:/xdj/tengxun/a_______________mm/2014-02-04 11.43.01.xml"); // 创建Field -name name = f.getName(); field = new Field("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED); // add field doc.add(field); // 创建Field -content field = new Field("content", new FileReader(f)); // 添加add field doc.add(field); // 创建Field -path path = f.getPath(); field = new Field("path", path, Field.Store.YES, Field.Index.NO); // 添加add field doc.add(field); // 添加add document writer.addDocument(doc); // 关闭close IndexWriter writer.close(); // 提示message System.out.println("File Index Created!"); } }
4.某个文件夹的所有文件创建索引
package lia.meetlucene; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.FileReader; // From chapter 1 /** * This code was originally written for Erik's Lucene intro java.net article */ public class Indexer { public static void main(String[] args) throws Exception { // args = new String[2]; // args[0] = "E:/xiaodajun/new/lia2e/src/lia/meetlucene"; // args[1] = // "E:/xiaodajun/new/lia2e/src/lia/meetlucene/data";//"src/lia/meetlucene/data"; // C:/Users/Administrator/Desktop/xdj/data if (args.length != 2) { throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir> <data dir>"); } // String indexDir = args[0]; // 1 // String dataDir = args[1]; // 2 String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin"; String dataDir = "C:/Users/Administrator/Desktop/xdj/data"; // String indexDir = // "E:/xdj/tengxun";//"C:/Users/Administrator/Desktop/xdj/suoyin"; // String dataDir = // "E:/xdj/tengxunsuoying";//"C:/Users/Administrator/Desktop/xdj/weibohanzi"; long start = System.currentTimeMillis(); // /////////////////////////////////////////////////////////////////////////////////////////// Indexer indexer = new Indexer(indexDir); int numIndexed; try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); // ///////////////////////////////////////////////////////////////////////////////////////////// System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } private IndexWriter writer; public Indexer(String indexDir) throws IOException { Directory dir = FSDirectory.open(new File(indexDir)); /* * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3 * IndexWriter.MaxFieldLength.UNLIMITED); //3 */ writer = new IndexWriter(dir, // 3 创建Lucene Index Writer new SmartChineseAnalyzer(Version.LUCENE_20),// 3 true, // 3 IndexWriter.MaxFieldLength.UNLIMITED); // 3 } public void close() throws IOException { writer.close(); // 4 关闭Lucene Index Writer } public int index(String dataDir, FileFilter filter) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) { if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { indexFile(f); } } return writer.numDocs(); // 5返沪被索引文档数 } private static class TextFilesFilter implements FileFilter { public boolean accept(File path) { return path.getName().toLowerCase() // 6只索引.txt文件,采用FileFilter .endsWith(".txt"); // 6 } } protected Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); // 7索引文件内容 doc.add(new Field("filename", f.getName(), // 8索引文件名 Field.Store.YES, Field.Index.NOT_ANALYZED));// 8 doc.add(new Field("fullpath", f.getCanonicalPath(), // 9索引文件完整路径 Field.Store.YES, Field.Index.NOT_ANALYZED));// 9 return doc; } // Store.是否存储 yes no compress(压缩之后再存) // Index。是否进行索引 Index.ANALYZED 分词后进行索引,NOT_ANALYZED 不索引,NOT_ANALYZED 不分词直接索引 private void indexFile(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); // 10向Lucene索引中添加文档 } } /* * #1 Create index in this directory #2 Index *.txt files from this directory #3 * Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents * indexed #6 Index .txt files only, using FileFilter #7 Index file content #8 * Index file name #9 Index file full path #10 Add document to Lucene index */
5.<Lucene in action>第二版索引demo
package lia.meetlucene; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.FileReader; // From chapter 1 /** * This code was originally written for Erik's Lucene intro java.net article */ public class Indexer { public static void main(String[] args) throws Exception { // args = new String[2]; // args[0] = "E:/xiaodajun/new/lia2e/src/lia/meetlucene"; // args[1] = // "E:/xiaodajun/new/lia2e/src/lia/meetlucene/data";//"src/lia/meetlucene/data"; // C:/Users/Administrator/Desktop/xdj/data if (args.length != 2) { throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir> <data dir>"); } // String indexDir = args[0]; // 1 // String dataDir = args[1]; // 2 // String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin"; // String dataDir = "C:/Users/Administrator/Desktop/xdj/data"; String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin"; String dataDir = "C:/Users/Administrator/Desktop/xdj/tengxun/A__Vae"; long start = System.currentTimeMillis(); // /////////////////////////////////////////////////////////////////////////////////////////// Indexer indexer = new Indexer(indexDir); int numIndexed; try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); // ///////////////////////////////////////////////////////////////////////////////////////////// System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } private IndexWriter writer; public Indexer(String indexDir) throws IOException { Directory dir = FSDirectory.open(new File(indexDir)); /* * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3 * IndexWriter.MaxFieldLength.UNLIMITED); //3 */ writer = new IndexWriter(dir, // 3 创建Lucene Index Writer new SmartChineseAnalyzer(Version.LUCENE_20),// 3 // new StandardAnalyzer(Version.LUCENE_30), true, // 3 IndexWriter.MaxFieldLength.UNLIMITED); // 3 } public void close() throws IOException { writer.close(); // 4 关闭Lucene Index Writer } public int index(String dataDir, FileFilter filter) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) { if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { indexFile(f); } } return writer.numDocs(); // 5返沪被索引文档数 } private static class TextFilesFilter implements FileFilter { public boolean accept(File path) { return path.getName().toLowerCase() // 6只索引.txt文件,采用FileFilter .endsWith(".xml"); // 6 } } protected Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); // 7索引文件内容 doc.add(new Field("filename", f.getName(), // 8索引文件名 Field.Store.YES, Field.Index.NOT_ANALYZED));// 8 doc.add(new Field("fullpath", f.getCanonicalPath(), // 9索引文件完整路径 Field.Store.YES, Field.Index.NOT_ANALYZED));// 9 return doc; } // Store.是否存储 yes no compress(压缩之后再存) // Index。是否进行索引 Index.ANALYZED 分词后进行索引,NOT_ANALYZED 不索引,NOT_ANALYZED 不分词直接索引 private void indexFile(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); // 10向Lucene索引中添加文档 } } /* * #1 Create index in this directory #2 Index *.txt files from this directory #3 * Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents * indexed #6 Index .txt files only, using FileFilter #7 Index file content #8 * Index file name #9 Index file full path #10 Add document to Lucene index */