代码改变世界

初战luncene.net(1)

2006-08-20 16:08  cppguy  阅读(1968)  评论(2编辑  收藏  举报
   首先从http://incubator.apache.org/projects/lucene.net.html下载luncene.net的版本,由于它的原型是一个JAVA开源项目所以详细的资料可以查看luncene in action这本书,把压缩包解压会得到三个文件包 demo luncene.net  test,首先要先把luncene.net用vs生成一下,这样在bin目录就得到了luncene.net.dll了.
        luncene可以帮助我们搜索硬盘上的文件 还有 电子邮件,web页,当然也可以是数据库中的数据.
        第一步,是为我们所要搜索的对象建立索引.然后再对我们建立的索引进行搜索.
        建立索引,这里新建一个项目textIndexer,只针对.txt文件进行索引.首先luncene.net.dll在我们建立的项目里面把他添加引用.代码如下:在bin/debug目录下新建两个文件目录build/index  和luncene,其中luncene里面放如被索引的文件
using System;
using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
using Document = Lucene.Net.Documents.Document;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Field = Lucene.Net.Documents.Field;
using System.IO;

namespace TextIndexer
{
    
class textIndexer
    
{
        
static void Main(string[] args)
        
{
            
try
            
{

                FileInfo indexDir 
= new FileInfo("build/index");
                FileInfo dataDir 
= new FileInfo("luncene");

                DateTime start 
= DateTime.Now;
                
int numid = index(indexDir, dataDir);
                DateTime end 
= DateTime.Now;
                System.Console.WriteLine(
"Indesxing   +{0}  +file took+{1} millionseconds",numid,end-start);


            }

            
catch
            
{ }
        }

        
public static int index(FileInfo indexDir, FileInfo DataDir)
        
{
          
                
if (!File.Exists(DataDir.FullName)&&!Directory.Exists(DataDir.FullName))
                
{
                    
throw new Exception(DataDir + "does not exsit");
                }

                IndexWriter writer 
= new IndexWriter(DataDir,new StandardAnalyzer(),true);
                writer.SetUseCompoundFile(
false);
                indexDirectory(writer,DataDir);
                
int num = writer.DocCount();
                writer.Optimize();
                writer.Close();
                
return num;
         
        }

        
public static void indexDirectory(IndexWriter writer,FileInfo dir)
        
{
            
if (Directory.Exists(dir.FullName))
            
{
                
string[] files = Directory.GetFiles(dir.FullName);
                
foreach (string str in files)
                
{
                    
if (Directory.Exists(str))
                    
{
                        indexDirectory(writer, 
new FileInfo(str));
                    }

                    
else
                    
{
                        indexFile(writer,
new FileInfo(str));
                    }

                }

            }

        }

        
public static void indexFile(IndexWriter writer, FileInfo f)
        
{
            
if (!File.Exists(f.FullName))
            
return; }

            Console.WriteLine(
"Indexing +{0}", f.FullName);
            Document doc 
= new Document();
            doc.Add(Field.Text(
"contents",new StreamReader(f.FullName).ReadToEnd()));
            doc.Add(Field.Text(
"title",f.FullName));
            writer.AddDocument(doc);

        }


    }

}