lucene学习笔记、资料
Java:
lucene(java)高亮搜索结果http://www.cnblogs.com/hejycpu/archive/2009/01/18/1377912.html
lucene(java)整套学习过程:http://www.chedong.com/tech/lucene.html#intro
Lucene(JAVA)的一些基本使用方法和概念 :http://ufosoft.blog.51cto.com/133277/64287
PaodingAnalyzer+Lucene(JAVA)的使用方法以及配置步骤:http://blog.csdn.net/love_javaprogram/article/details/6549320
.net
(提示足以按照网上上面的版本,用其它版本时,接口可能有些会报错)
搜索应用(个人实例,仅作参考):
//建立索引 [WebMethod(Description = "建立索引")] public void CreateIndex() { string INDEX_STORE_PATH = Server.MapPath("index"); //INDEX_STORE_PATH 为索引存储目录 IndexWriter writer = null; try { writer = new IndexWriter(INDEX_STORE_PATH, new StandardAnalyzer(), true);//第三个参数 true:创建 false 增量更新 DB db = new DB(); DataTable myred = db.ExecuteTable("select CrawlTitle,CrawlContent,CreateTime,PVCount from InfoContent"); //建立索引字段//每次读取一条记录 foreach (DataRow dr in myred.Rows) { Document doc = new Document(); doc.Add(new Field("title", dr["CrawlTitle"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));//存储,索引 doc.Add(new Field("content", dr["CrawlContent"].ToString(), Field.Store.YES, Field.Index.TOKENIZED));//存储,索引 doc.Add(new Field("CreateTime", dr["CreateTime"].ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));//存储,索引 doc.Add(new Field("indexcontent", dr["CrawlTitle"].ToString() + dr["CrawlContent"].ToString(), Field.Store.NO, Field.Index.TOKENIZED));//不存储,索引,indexcontent实现了title和content,也就是标题和内容的索引 writer.AddDocument(doc); //添加进关键词-文档对 中(这一步已经建立好索引了) } myred.Dispose(); //对索引进行优化 writer.Optimize();//所有数据库都在writer中的doc中 所以用optimize对其进行创建索引并保存在本地目录的三个文件中 writer.Close(); //去除对文件的锁定以及flush IndexWriter的常用方法 http://www.cnblogs.com/birdshover/archive/2008/09/19/1294040.html 网站的导航 http://birdshover.cnblogs.com/ } catch (Exception e) { } }
//点击搜索 Lucene和"庖丁解牛"中文分词包的整合 :http://zyj177484.blog.163.com/blog/static/18379423120127614414110/ /// <summary> /// a & b => +a +b /// a || b => a b /// a !b => +a -b /// </summary> /// <param name="keyword"></param> /// <returns></returns> [WebMethod(Description = "点击搜索")] public string searchButton(string keyword)//搜索 空格是或的意思 (+关键字 + 关键字2)这样是同时出现的意思 连在一起是同时出现且连在一起 { string forReturn = string.Empty; string INDEX_STORE_PATH = Server.MapPath("index"); //INDEX_STORE_PATH 为索引存储目录 Hits myhit = null; IndexSearcher mysea = new IndexSearcher(INDEX_STORE_PATH);//索引的路径 QueryParser q = new QueryParser("indexcontent", new StandardAnalyzer());//字段 分析器 //String[] fields = { "title", "content" }; //MultiFieldQueryParser mp = new MultiFieldQueryParser(fields, new StandardAnalyzer()); Query query = q.Parse(keyword); //Query query = q.Parse(keyword); //将关键词进行 分词 myhit = mysea.Search(query);//获得的所有有关记录都在myhit当中 对关键词的分词进行搜索 if (myhit != null) { DataRow myrow; DataTable mytab = new DataTable(); mytab.Columns.Add("title");//搜索的结果为:标题 表明 mytab.Columns.Add("content"); mytab.Columns.Add("CreateTime"); mytab.Clear(); for (int i = 0; i < myhit.Length(); i++) { Document doc = myhit.Doc(i); myrow = mytab.NewRow();//新增一条记录 myrow[0] = doc.Get("title").ToString();//记录0为 title myrow[1] = doc.Get("content").ToString();//记录1为tablename myrow[2] = doc.Get("CreateTime").ToString(); mytab.Rows.Add(myrow);//添加进表中 myrow.AcceptChanges(); } //已经获得所有的内容放在 mytab当中了 //9 1 分别是标题与正文 //由datatable获取相应的json串 string json = "{\"type\":[{\"CrawlTitle\":\"CrawlTitle\",\"CrawlContent\":\"CrawlContent\",\"CreateTime\":\"CreateTime\"}"; string jtemp = string.Empty; string one = string.Empty; string two = string.Empty; string third = string.Empty; int nine = 0; foreach (DataRow dataRow in mytab.Rows) { nine++; jtemp = "\"CrawlTitle\":\"{0}\",\"CrawlContent\":\"{1}\",\"CreateTime\":\"{2}\""; one = dataRow.ItemArray[0].ToString(); two = dataRow.ItemArray[1].ToString(); two = two.Replace("\"", "`"); third = dataRow.ItemArray[2].ToString(); jtemp = string.Format(jtemp, one, two, third); json += ",{" + jtemp + "}"; if (nine == 9) break; } json += "]}"; forReturn = json; } else { forReturn = "0"; } mysea.Close(); return forReturn; } } }