Lucene.NET入门实例
今天从网上找了一些关于Lucene.NET的资料,自己做个demo测试了一下。
Lucene.NET 是开源的项目,从网上很容易找到源代码,新建一个web测试页面和一个控制台程序,控制台程序用来创建lucene的索引
控制台代码如下:
using System;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.IO;
using Lucene.Net.Analysis.Cn;
using Lucene.Net.Analysis.CJK;
using System.Data.SqlClient;
using System.Data;
namespace Test
{
class TokenizerTest
{
public static string connstr = "server=.;database=test;uid=sa;pwd=";
public static SqlConnection mycon;
static void Main(string[] args)
{
SqlDataReader myred = OpenTable();
IndexWriter writer = CreateIndex(myred);
Console.WriteLine("索引创建完成");
Console.Read();
}
public static SqlDataReader OpenTable()
{
mycon = new SqlConnection(connstr);
mycon.Open();
SqlCommand mycom = new SqlCommand("select * from Article", mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
DataTable d = tb;
return mycom.ExecuteReader();
}
//对数据库中的字段建立索引
public static IndexWriter CreateIndex(SqlDataReader myred)
{
IndexWriter writer = new IndexWriter("D:/index/", new ChineseAnalyzer(), true); //索引的存储位置
try
{
while (myred.Read())
{
Document doc = new Document();
doc.Add(new Field("id", myred["id"].ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("title", myred["title"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
doc.Add(new Field("summary", myred["summary"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
}
writer.Optimize(); //优化索引
writer.Close();
myred.Close();
}
catch (Exception e)
{
//Response.Write(e);
myred.Close();
}
return writer;
}
}
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.IO;
using Lucene.Net.Analysis.Cn;
using Lucene.Net.Analysis.CJK;
using System.Data.SqlClient;
using System.Data;
namespace Test
{
class TokenizerTest
{
public static string connstr = "server=.;database=test;uid=sa;pwd=";
public static SqlConnection mycon;
static void Main(string[] args)
{
SqlDataReader myred = OpenTable();
IndexWriter writer = CreateIndex(myred);
Console.WriteLine("索引创建完成");
Console.Read();
}
public static SqlDataReader OpenTable()
{
mycon = new SqlConnection(connstr);
mycon.Open();
SqlCommand mycom = new SqlCommand("select * from Article", mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
DataTable d = tb;
return mycom.ExecuteReader();
}
//对数据库中的字段建立索引
public static IndexWriter CreateIndex(SqlDataReader myred)
{
IndexWriter writer = new IndexWriter("D:/index/", new ChineseAnalyzer(), true); //索引的存储位置
try
{
while (myred.Read())
{
Document doc = new Document();
doc.Add(new Field("id", myred["id"].ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("title", myred["title"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
doc.Add(new Field("summary", myred["summary"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
}
writer.Optimize(); //优化索引
writer.Close();
myred.Close();
}
catch (Exception e)
{
//Response.Write(e);
myred.Close();
}
return writer;
}
}
}
创建好索引之后,就可以使用了测试页面代码:
protected void Search_Click(object sender, EventArgs e)
{
DataRow dr;
DataTable luctb = new DataTable();
luctb.Columns.Add("id");
luctb.Columns.Add("title");
luctb.Columns.Add("summary");
Stopwatch sw = new Stopwatch();
sw.Start();
Hits myhit = seacher(this.tj.Text.Trim());
for (int i = 0; i < myhit.Length(); i++)
{
Document doc = myhit.Doc(i);
dr = luctb.NewRow();
dr[0] = doc.Get("id").ToString();
dr[1] = doc.Get("title").ToString();
dr[2] = doc.Get("summary").ToString();
luctb.Rows.Add(dr);
dr.AcceptChanges();
}
sw.Stop();
this.Label1.Text = "共" + luctb.Rows.Count + "行" + sw.ElapsedMilliseconds.ToString();
this.gview.DataSource = luctb;
this.gview.DataBind();
DataTable sqltb = new DataTable();
mycon = new SqlConnection(connstr);
mycon.Open();
Stopwatch swsql = new Stopwatch();
swsql.Start();
SqlCommand mycom = new SqlCommand(string.Format("select Id,title,summary from Article where summary like '%{0}%' or title like '%{0}%'", this.tj.Text.Trim()), mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
swsql.Stop();
this.Label2.Text = "共" + tb.Rows.Count + "行" + swsql.ElapsedMilliseconds.ToString();
this.gviewsql.DataSource = tb;
this.gviewsql.DataBind();
{
DataRow dr;
DataTable luctb = new DataTable();
luctb.Columns.Add("id");
luctb.Columns.Add("title");
luctb.Columns.Add("summary");
Stopwatch sw = new Stopwatch();
sw.Start();
Hits myhit = seacher(this.tj.Text.Trim());
for (int i = 0; i < myhit.Length(); i++)
{
Document doc = myhit.Doc(i);
dr = luctb.NewRow();
dr[0] = doc.Get("id").ToString();
dr[1] = doc.Get("title").ToString();
dr[2] = doc.Get("summary").ToString();
luctb.Rows.Add(dr);
dr.AcceptChanges();
}
sw.Stop();
this.Label1.Text = "共" + luctb.Rows.Count + "行" + sw.ElapsedMilliseconds.ToString();
this.gview.DataSource = luctb;
this.gview.DataBind();
DataTable sqltb = new DataTable();
mycon = new SqlConnection(connstr);
mycon.Open();
Stopwatch swsql = new Stopwatch();
swsql.Start();
SqlCommand mycom = new SqlCommand(string.Format("select Id,title,summary from Article where summary like '%{0}%' or title like '%{0}%'", this.tj.Text.Trim()), mycon);
DataTable tb = new DataTable();
SqlDataAdapter adp = new SqlDataAdapter(mycom);
adp.Fill(tb);
swsql.Stop();
this.Label2.Text = "共" + tb.Rows.Count + "行" + swsql.ElapsedMilliseconds.ToString();
this.gviewsql.DataSource = tb;
this.gviewsql.DataBind();
}
代码中分别利用lucene和sql语句从Article查询title和summary两个字段中包含要查询字符的记录。表中共20w+条数据,
测试结果:lucene耗时:共35行16毫秒sql耗时:共35行2331毫秒