lucene 5的测试程序——API变动太大

package hello;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map.Entry;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;

public class HelloLucene222 {

    public static void main(String[] args) {
        HelloLucene222 hLucene = new HelloLucene222();
        hLucene.index();
        System.out.print("search ...\n");
        hLucene.search();
    }

    // 建立索引
    public void index() {
        System.out.println("Indexing to directory begin...");
        try {
            Thread.sleep(10);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        System.out.println("sleep OK");
        long start = System.currentTimeMillis();
        IndexWriter writer = null;
        try {
            // 1、创建Directory
            // Directory directory = new RAMDirectory();//索引是建立在内存中的
            Directory directory = FSDirectory.open(Paths.get("C:\\exp\\test_data\\index"));// 创建在硬盘上
            // 2、创建IndexWriter
            IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            writer = new IndexWriter(directory, iwc);
            // 3、创建Document对象
            Document doc = null;
            // 4、为Document添加Field，是Document的一个子元素
            // File file = new File("D:\\exp\\test_data\\txt");
            File file = new File("C:\\exp\\test_data\\ES");
            for (File f : file.listFiles()) {
                try (BufferedReader br = new BufferedReader(new FileReader(f))) {
                    String line = null;
                    int i = 0;
                    Gson gson = new Gson();
                    while ((line = br.readLine()) != null) {
                        // process the line.
                        if ((i & 1) == 1) {
                            // System.out.println(line);
                            HashMap<String, String> events = gson.fromJson(line,
                                    new TypeToken<HashMap<String, String>>() {
                                    }.getType());
                            // System.out.println(events);
                            doc = new Document();
                            for (Entry<String, String> entry : events.entrySet()) {
                                doc.add(new TextField(entry.getKey(), entry.getValue(), Field.Store.NO));
                                // doc.add(new Field("filename", f.getName(),
                                // Field.Store.YES, Field.Index.NOT_ANALYZED));
                                doc.add(new Field("path", f.getAbsolutePath(), Field.Store.YES,
                                        Field.Index.NOT_ANALYZED));
                                // 5、通过IndexWriter添加文档到索引中
                            }
                            writer.addDocument(doc);
                        }
                        i += 1;
                    }
                }
                System.out.println("Indexing to directory '" + f.getAbsolutePath() + "'...");
            }
            long end = System.currentTimeMillis();
            System.out.println("add docment Took : " + ((end - start) / 1000.0));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        long end = System.currentTimeMillis();
        System.out.println("Took : " + ((end - start) / 1000.0));
        try {
            Thread.sleep(1);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        System.out.println("sleep OK");
    }

    // 搜索
    public void search() {
        long start = System.currentTimeMillis();
        Directory directory;
        try {
            // 1、创建Directory
            directory = FSDirectory.open(Paths.get("C:\\exp\\test_data\\index"));
            DirectoryReader ireader = DirectoryReader.open(directory);
            IndexSearcher isearcher = new IndexSearcher(ireader);
            // 4、创建搜索的Query
            // 创建QueryParser来确定要搜索文件的内容，第二个参数表示搜索的域
            QueryParser parser = new QueryParser("field-38", new StandardAnalyzer());
            // 创建Query，表示搜索域为content中包含java的文档
            Query query = parser.parse("tcholo");
            // 5、根据searcher搜索并且返回TopDocs
            TopDocs tdoc = isearcher.search(query, 10);// 只会显示10条内容

            // 6、根据TopDocs获取ScoreDoc对象
            ScoreDoc sdocs[] = tdoc.scoreDocs;
            for (ScoreDoc s : sdocs) {
                // 7、根据searcher行业ScoreDoc获取具体的Document对象
                Document document = isearcher.doc(s.doc);
                // 8、根据Document对象获取所需要的值
                System.out.println(document.get("filename") + "[" + document.get("path") + "]");
            }
            // 9、关闭reader
            ireader.close();
            directory.close();
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
        long end = System.currentTimeMillis();
        System.out.println("Took : " + ((end - start) / 1000.0));
    }

}
posted @ 2017-03-22 15:03 bonelee 阅读(466) 评论(0) 编辑收藏举报
努力加载评论中...
刷新页面返回顶部
将者，智、信、仁、勇、严也。

Hi，我是李智华，华为-安全AI算法专家，欢迎来到安全攻防对抗的有趣世界。

lucene 5的测试程序——API变动太大

公告