Daily Report 2012/11/06 陈伯雄(step 7)

  今天完成了中文分词功能,修改了sql建立倒排索引的框架,原来打算建立一个倒排索引,但考虑到数据库存在4个不同的文件类型的表:DOC,PDF,VEDIO,QAPAIR,现在分别针对每个表建立倒排索引,根据用户搜索需求搜索不同的表(效率问题有待商榷)。

 1        List<Result> resultList = new List<Result>();
 2             string connectionString = GetConnectionString();                            //SQL Server链接字符串   
 3             using (SqlConnection connection = new SqlConnection(connectionString))      //SQL链接类的实例化
 4             {
 5                 connection.Open();                                                      //打开数据库
 6                 //建立倒排表
 7                 string sqlstr = "CREATE table index_doc(key varchar(50) primary key, ID varchar(50))";                
 8                 SqlCommand cmd = new SqlCommand();
 9                 cmd.Connection = connection;
10                 cmd.CommandText = sqlstr;
11                 cmd.ExecuteNonQuery();
12                 sqlstr = "CREATE table index_pdf(key varchar(50) primary key, ID varchar(50))";
13                 cmd.CommandText = sqlstr;
14                 cmd.ExecuteNonQuery();
15                 sqlstr = "CREATE table index_vedio(key varchar(50) primary key, ID varchar(50))";
16                 cmd.CommandText = sqlstr;
17                 cmd.ExecuteNonQuery();
18                 sqlstr = "CREATE table index_qapair(key varchar(50) primary key, ID varchar(50))";
19                 cmd.CommandText = sqlstr;
20                 cmd.ExecuteNonQuery();
21 
22                 for (int i = 0; i < 4;i++ )
23                 {
24                     string table = "";
25                     if (i == 0) table = "index_doc";
26                     else if(i == 1) table = "index_pdf";
27                     else if(i == 2) table = "index_vedio";
28                     else table = "index_qapair";
29 
30                     //读取顺序表
31                     sqlstr = "SELECT * FROM" + table;
32                     cmd.CommandText = sqlstr;
33                     SqlDataReader reader = cmd.ExecuteReader();                             //它的返回类型为SqlDataReader。此方法用于用户进行的查询操作。使用SqlDataReader对象的Read();方法进行逐行读取。
34                     try
35                     {
36                         while (reader.Read())
37                         {
38                             string title = reader["title"].ToString();
39                             string keyword = reader["keyword"].ToString();
40                             string ID = reader["ID"].ToString();
41                             //分词处理
42                             //string[] words = getWords(title, keyword);
43                             List<string> words = getWords(title, keyword);          //中文分词详细请看李忠部分。
44                             //将keyword信息添加到倒排表
45                             foreach (string word in words)
46                             {
47                                 //倒排表中加入新关键词                            
48                                 cmd.CommandText = "SELECT value FROM index3 WHERE value = word";
49                                 object val = cmd.ExecuteScalar();
50                                 if (val == System.DBNull.Value)                               //if(cmd.ExecuteScalar() is DBNull)
51                                 {
52                                     cmd.CommandText = "INSERT INTO index3 VALUES(word, ID)";
53                                     cmd.ExecuteNonQuery();
54                                 }
55                                 //倒排索引中存在的关键词,加上属性ID信息
56                                 else
57                                 {
58                                     string newValue = val.ToString() + "," + ID;
59                                     cmd.CommandText = "UPDATE index3 SET value = newValue WHERE key = word";
60                                     cmd.ExecuteNonQuery();
61                                 }
62                             }
63                         }
64                     }
65                     finally
66                     {
67                         // Always call Close when done reading.
68                         reader.Close();
69                     }
70                 }
71 
72                 
73                 //匹配(未完成)
74                 string userWord = "用户搜索的关键词";
75                 //match(userWord);
76                 
77 
78                 connection.Close();        
posted @ 2012-11-06 23:37  DOOM_buaascse  阅读(253)  评论(0编辑  收藏  举报