Lucene学习

创建索引的例子:

package com.test;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class TestIndex {
    public static void main(String[] args) {
        TestIndex testIndex = new TestIndex();
        testIndex.creatIndex();

    }

    public void creatIndex() {
        // 这是索引存放的位置
        try {
            String indexPath = "f://index//";
            Directory dir;
            dir = FSDirectory.open(new File(indexPath));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                    analyzer);
            iwc.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, iwc);
            Document doc = new Document();
            doc.add(new Field("title", "我的Lucene", Field.Store.YES,
                    Field.Index.ANALYZED));
            doc.add(new Field("content", "Lucene 是一个简单的开源包", Field.Store.YES,
                    Field.Index.ANALYZED));
            writer.addDocument(doc);
            writer.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


}

在创建索引的时候

一般 目录的话都是用 FSDirectory.open() 的,把索引写在内存的话(RAMDirectory)内存必须足够大,除非那种要求速度特快的项目

分析器的话,建议使用自己编写的分词器,Lucene自己带的分词器毕竟不能满足自己的需要(我自己曾写过一个分词器,前面提到过的)

查询索引的代码:

private void search() {
        try{
        String index = "Lucene";
        String field = "f://index//";
        IndexReader reader = IndexReader
                .open(FSDirectory.open(new File(field)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        QueryParser parser =new QueryParser(Version.LUCENE_31, "content", analyzer);
        Query query = parser.parse(index);
        searcher.search(query, null,100);
        TopDocs results = searcher.search(query, 10); //只取排名前10的搜索结果
        ScoreDoc[] hits = results.scoreDocs;
        Document doc = null;
        for (int i = 0; i < hits.length; i++) {
        doc = searcher.doc(hits[i].doc);
        String contents = doc.get("content");
        }
        searcher.close();
        reader.close();

        }catch(Exception e){
            e.printStackTrace();
        }
    }

 

更新索引的例子:(更新是删除和添加的结合)

public void updateIndex() {
        try {
            Directory dir = FSDirectory.open(new File("D:/index"));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                    analyzer);
            iwc.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, iwc);
            Document doc=new Document();
            doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED));
            doc.add(new Field("content","www",Field.Store.YES,Field.Index.ANALYZED));
            writer.updateDocument(new Term("id", "1"),doc);
            writer.close();

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

删除索引的例子:

public void testDelete() {
        try {
            Directory dir = FSDirectory.open(new File("D:/index"));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                    analyzer);
            iwc.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, iwc);
            
            writer.deleteDocuments(new Term("id","1"));
            writer.close();
            
            
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

删除索引的时候既可以用IndexWriter 也可以用IndexReader,

没有进行段合并的时候

IndexWriter 的删除并不是真正的删除,只是把他标记为已删除

可以用MaxDoc 和numDocs 方法来查看

 

 

 

posted @ 2013-01-02 17:15  杨桃  阅读(325)  评论(0编辑  收藏  举报