Lucene学习笔记(二)



































































































































































































































































import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.SimpleAnalyzer;
import junit.framework.TestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.ArrayList;
import java.util.Iterator;
public class FSversusRAMDirectoryTest extends TestCase
{
private Directory fsDir;
private Directory ramDir;
private Collection docs = loadDocuments(3000, 5);//加载数据
protected void setUp() throws Exception
{
String fsIndexDir = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "fs-index";
ramDir = new RAMDirectory();//内存中目录
fsDir = FSDirectory.getDirectory(fsIndexDir, true);
}
public void testTiming() throws IOException
{
long ramTiming = timeIndexWriter(ramDir);
long fsTiming = timeIndexWriter(fsDir);
assertTrue(fsTiming > ramTiming);
System.out.println("RAMDirectory Time: " + (ramTiming) + " ms");
System.out.println("FSDirectory Time : " + (fsTiming) + " ms");
}
private long timeIndexWriter(Directory dir) throws IOException
{
long start = System.currentTimeMillis();
addDocuments(dir);
long stop = System.currentTimeMillis();
return (stop - start);
}
private void addDocuments(Directory dir) throws IOException
{
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(),true);
/**
// change to adjust performance of indexing with FSDirectory
writer.mergeFactor = writer.mergeFactor;
writer.maxMergeDocs = writer.maxMergeDocs;
writer.minMergeDocs = writer.minMergeDocs;
*/
for (Iterator iter = docs.iterator(); iter.hasNext();)
{
Document doc = new Document();
String word = (String) iter.next();
doc.add(new Field("keyword",word,Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(new Field("unindexed",word,Field.Store.YES,Field.Index.NO));
doc.add(new Field("unstored",word,Field.Store.NO,Field.Index.TOKENIZED));
doc.add(new Field("text",word,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
private Collection loadDocuments(int numDocs, int wordsPerDoc)
{
Collection docs = new ArrayList(numDocs);
for (int i = 0; i < numDocs; i++)
{
StringBuffer doc = new StringBuffer(wordsPerDoc);
for (int j = 0; j < wordsPerDoc; j++)
{
doc.append("Bibamus ");
}
docs.add(doc.toString());
}
return docs;
}
}
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.SimpleAnalyzer;
import junit.framework.TestCase;
import java.io.IOException;
import java.util.Collection;
import java.util.ArrayList;
import java.util.Iterator;
public class FSversusRAMDirectoryTest extends TestCase
{
private Directory fsDir;
private Directory ramDir;
private Collection docs = loadDocuments(3000, 5);//加载数据
protected void setUp() throws Exception
{
String fsIndexDir = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "fs-index";
ramDir = new RAMDirectory();//内存中目录
fsDir = FSDirectory.getDirectory(fsIndexDir, true);
}
public void testTiming() throws IOException
{
long ramTiming = timeIndexWriter(ramDir);
long fsTiming = timeIndexWriter(fsDir);
assertTrue(fsTiming > ramTiming);
System.out.println("RAMDirectory Time: " + (ramTiming) + " ms");
System.out.println("FSDirectory Time : " + (fsTiming) + " ms");
}
private long timeIndexWriter(Directory dir) throws IOException
{
long start = System.currentTimeMillis();
addDocuments(dir);
long stop = System.currentTimeMillis();
return (stop - start);
}
private void addDocuments(Directory dir) throws IOException
{
IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(),true);
/**
// change to adjust performance of indexing with FSDirectory
writer.mergeFactor = writer.mergeFactor;
writer.maxMergeDocs = writer.maxMergeDocs;
writer.minMergeDocs = writer.minMergeDocs;
*/
for (Iterator iter = docs.iterator(); iter.hasNext();)
{
Document doc = new Document();
String word = (String) iter.next();
doc.add(new Field("keyword",word,Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(new Field("unindexed",word,Field.Store.YES,Field.Index.NO));
doc.add(new Field("unstored",word,Field.Store.NO,Field.Index.TOKENIZED));
doc.add(new Field("text",word,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
private Collection loadDocuments(int numDocs, int wordsPerDoc)
{
Collection docs = new ArrayList(numDocs);
for (int i = 0; i < numDocs; i++)
{
StringBuffer doc = new StringBuffer(wordsPerDoc);
for (int j = 0; j < wordsPerDoc; j++)
{
doc.append("Bibamus ");
}
docs.add(doc.toString());
}
return docs;
}
}
作者:洞庭散人
出处:http://phinecos.cnblogs.com/
本博客遵从Creative Commons Attribution 3.0 License,若用于非商业目的,您可以自由转载,但请保留原作者信息和文章链接URL。
posted on 2007-08-29 15:55 Phinecos(洞庭散人) 阅读(2234) 评论(0) 编辑 收藏 举报
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· .NET周刊【3月第1期 2025-03-02】
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· [AI/GPT/综述] AI Agent的设计模式综述
2006-08-29 操作系统复习笔记(五)