lucene学习笔记(三)

好几天没更新了。更新一下,方便自己和大家学习。

这是最基本的代码

package index;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class IndexUtil {
    private String[] ids = {"1","2","3","4","5","6"};
    private String[] emails = {"aa@bb.org","bb@qq.org","cc@bb.org","dd@aa.org","ee@qq.org","ff@cc.org"};
    private String[] contents = {
            "welcome to visited the space,I like book",
            "hello boy,I like pingpeng bail",
            "my name is cc,I like game",
            "I like football,I like football",
            "I like football and I like basketball too",
            "I like movie and swim"
            };
    private int[] attachs = {2,3,1,4,5,5};
    private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
    private Directory directory = null;
    private Map<String, Float> scores = new HashMap<String, Float>();
    
    public IndexUtil() throws IOException{
        scores.put("cc.org", 2.0f);
        scores.put("aa.org", 1.5f);
        directory = FSDirectory.open(new File("E:\\lucene20140528\\index"));
    }
    public void query() throws Exception{
        IndexReader reader = IndexReader.open(directory);
        //可以有效获取到文档的数量
        System.out.println("numDocs"+reader.numDocs());
        System.out.println("maxDocs"+reader.maxDoc());
        System.out.println("numDeleteDocs"+reader.numDeletedDocs());
    }
    public void index() throws Exception{
        IndexWriter writer = null;
        try {
            writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
            writer.deleteAll();
            //文档Document相当于表中的每一条记录
            //域Field相当于表中的每一个字段
            Document doc = null;
            for (int i = 0; i < ids.length; i++) {
                doc = new Document();
                /*Field.Store.YES:表示把内容完全存储到索引里面,可以完全的还原(可以用doc.get())
                Field.Store.NO:表示这个域的内容不存储到文件中,但是可以被索引。此时内容无法完全还原
                
                Field.Index(索引选项)
                Index.ANALYZED:进行分词和索引,适合于标题、内容等
                Index.NOT_ANALYZED:进行索引、但是不进行分词、例如身份证号,姓名,ID等,适用于精确搜索
                Index.ANALYZED_NOT_NORMS:进行分词但是存储norms信息,这些norms信息包含创建索引的时间和权值等
                Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息
                Index.NO:不进行索引
                 */
                doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
                doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
                doc.add(new Field("content",contents[i],Field.Store.YES,Field.Index.ANALYZED));
                doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
                String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
                //加权
                if(scores.containsKey(et)){
                    doc.setBoost(scores.get(et));
                }else {
                    doc.setBoost(0.5f);
                }
                writer.addDocument(doc);
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally{
            if(writer!=null){
                writer.close();
            }
        }
        
    }
    public void delete(){
        IndexWriter writer = null;
        try {
            writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
            //参数是一个选项,可以是一个query,也可以是一个term(精确查找的值)
            writer.deleteDocuments(new Term("id", 1+""));
            writer.close();
            System.out.println("ole");
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        
    }
    public void forceDelete() throws Exception{
        IndexWriter writer = null;
        writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
        writer.forceMergeDeletes();
        writer.close();
    }
    public void merge() throws Exception{
        IndexWriter writer = null;
        writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
        //将索引合并为2段,这两段中的删除的数据将会被删除
        
        writer.forceMerge(2);
        writer.close();
    }
    public void undelete() throws Exception{
        //使用IndexReader进行恢复(恢复时必须把indexReader的只读设置为false)
        IndexReader reader = null;
        try {
            reader = IndexReader.open(directory,false);
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        reader.undeleteAll();
        reader.close();
    }
    public void update() throws Exception{
        IndexWriter writer = null;
        writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
        /**
         * lucene 并没有提供更新,这里的更新操作就是如下两个操作的合集
         * 先删除后添加
         */
        Document doc = new Document();
        doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
        doc.add(new Field("emails",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
        doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
        doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
        writer.updateDocument(new Term("id", "1"), doc);
        writer.close();
    }
    public void search() throws Exception{
        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        TermQuery query = new TermQuery(new Term("content", "like"));
        TopDocs tds = searcher.search(query, 10);
        for (ScoreDoc sd : tds.scoreDocs) {
            Document doc = searcher.doc(sd.doc);
            System.out.println(doc.get("name")+":"+doc.get("emails"));
        }
    }
}

测试类沾上:

package test;


import index.IndexUtil;

import org.junit.Test;

public class MyTest {
    @Test
    public void testIndex() throws Exception{
        IndexUtil util = new IndexUtil();
        util.index();
    }
    @Test
    public void testQuery() throws Exception{
        IndexUtil util = new IndexUtil();
        util.query();
    }
    @Test
    public void testDelete() throws Exception{
        IndexUtil util = new IndexUtil();
        util.delete();
    }
    @Test
    public void testUnDelete() throws Exception{
        IndexUtil util = new IndexUtil();
        util.undelete();
    }
    @Test
    public void testForceDelete() throws Exception{
        IndexUtil util = new IndexUtil();
        util.forceDelete();
    }
    @Test
    public void testMerge() throws Exception{
        IndexUtil util = new IndexUtil();
        util.merge();
    }
    @Test
    public void testUpdate() throws Exception{
        IndexUtil util = new IndexUtil();
        util.update();
    }
    @Test
    public void testSearch() throws Exception{
        IndexUtil util = new IndexUtil();
        util.search();
    }
}

今天看到了一个好用的工具luke(每个版本的lucene都会有这么一个工具。用来查看二进制的。非常不错)

 

posted @ 2014-06-02 22:04  mr.g.  阅读(171)  评论(0编辑  收藏  举报