C#实现字典树

 C#实现字典树,欢迎测试。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace ConsoleApplication7
{
public class Trie
{
private static Node Root = new Node(' ');
private int Capbility = 10;
public Trie(int capbility)
{
Capbility
= capbility;
}
public void AddWord(string Word)
{
List
<char> charArr = new List<char>(Word.ToCharArray());
AddNode(Root, charArr);
}
public List<string> Search(string Word)
{
List
<string> SearchResult = new List<string>();
//先深度先直线搜索,到达节点,再广度优先搜索。
Node DFSCurrentNode = Root;
char[] CharArr = Word.ToCharArray();
bool IsExists = true;
for (int i = 0; i < Word.Length; i++)
{
if (DFSCurrentNode.Nodes != null)
{
//如果找到了
if (DFSCurrentNode.Nodes.ContainsKey(CharArr[i]))
{
//继续找。
DFSCurrentNode = DFSCurrentNode.Nodes[CharArr[i]];
}
else
{
//没找到中断。
IsExists = false;
}
}
}
//深搜到了节点,在此节点上广搜。
if (IsExists)
{
List
<SearchInfo> CurrentNodeList = new List<SearchInfo>() { new SearchInfo(DFSCurrentNode, Word) };
BFS(SearchResult, CurrentNodeList);
}
return SearchResult;
}


//广度优先搜索
private void BFS(List<string> SearchResult, List<SearchInfo> CurrentNodeList)
{
//结果足够,退出
if (SearchResult.Count >= Capbility || CurrentNodeList.Count == 0)
{
return; }
bool IsOver = false;
//记录当前节点CurrentNodeList旧长度OldCount,以便于事后移除。
int OldCount = CurrentNodeList.Count;
//遍历当前节点集合。
//foreach (SearchInfo SI in CurrentNodeList)
for (int i = 0; i < OldCount; i++)
{
SearchInfo SI
= CurrentNodeList[i];
////遍历当前单个节点的子节点。
if (SI.CurrentNode.Nodes != null)
{
foreach (Node N in SI.CurrentNode.Nodes.Values)
{
//记录路径。
string Word = SI.Word + N.Char;
//路径有结尾标记,进入结果。
if (N.IsEnd)
{
SearchResult.Add(Word);
}
//结果足够,退出
if (SearchResult.Count >= Capbility)
{
IsOver
= true;
break;
}
else
{
//把子节点追加到CurrentNodeList尾部。
CurrentNodeList.Add(new SearchInfo(N, Word));
}
}
}
if (IsOver)
{
break;
}
}
//如果结果还不够
if (!IsOver)
{
//移除当前节点,就是CurrentNodeList在L0以前的记录。
CurrentNodeList.RemoveRange(0, OldCount);
//进入下一轮搜索。
BFS(SearchResult, CurrentNodeList);
}
}
private void AddNode(Node N, List<char> charArr)
{
if (charArr.Count > 0)
{
char C = charArr[0];
if (N.Nodes == null)
{
N.Nodes
= new Dictionary<char, Node>();
}
if (!N.Nodes.ContainsKey(C))
{
Node newNode
= new Node(C);
N.Nodes.Add(C, newNode);
}
if (charArr.Count == 1)
{
N.Nodes[C].IsEnd
= true;
}
charArr.RemoveAt(
0);
AddNode(N.Nodes[C], charArr);
}
}
//当前搜索的节点信息。相当于键值对

private bool Contains(Dictionary<char, Node> Nodes, char C)
{

return true;
}
private class SearchInfo
{
public SearchInfo(Node N, string W)
{
CurrentNode
= N;
Word
= W;
}
public string Word;
public Node CurrentNode;
}
private class Node
{
public Node(char C)
{
Char
= C;
}
public char Char;
public bool IsEnd = false;
public Dictionary<char, Node> Nodes;
}
}
}

测试代码:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
namespace ConsoleApplication7
{
class Program
{
static void Main(string[] args)
{
//在网站启动时候,批量添加词库。也可以在运行中单独添加新词。
//注意,核心容器是Static的全局变量,如果一个站运行多个词库,复制Trie类为其他名字,比如TrieFeeling,TrieOtherWord等
//操作实例可以各个页面随意创建新实例,其中添加操作是影响全局的,搜索是动态的互不干扰的。
int n = 100;//期望结果大小

Trie T
= new Trie(n);
int count = 10 * 10000;
Double M1
= getMem();
string[] Arr = new string[count];
for (int i = 0; i < Arr.Length; i++)
{
Arr[i]
= (Guid.NewGuid()).ToString();
}
Stopwatch SW
= new Stopwatch();
Console.WriteLine(
"原始内存消耗(兆)" + (getMem() - M1));
M1
= getMem();

SW.Start();
for (int i = 0; i < Arr.Length; i++)
{
T.AddWord(Arr[i]);
}
SW.Stop();
Console.WriteLine(
"插入" + Arr.Length + "条单词耗时:" + SW.ElapsedMilliseconds + "毫秒,增加内存消耗(兆)" + (getMem() - M1));
//搜索测试一
M1 = getMem();
Console.WriteLine(
"-------------输入:‘‘12’’---------------");
SW.Start();
List
<string> Result1 = T.Search("12");
SW.Stop();

Console.WriteLine(
"查询耗时:" + SW.ElapsedMilliseconds + "毫秒,增加内存消耗(兆)" + (getMem() - M1));
foreach (string S in Result1)
{
Console.WriteLine(S);
}
//搜索测试二
Console.WriteLine("-------------对比测试,常规IndexOf:‘‘12’’---------------");
int Num = 0;
SW.Start();
foreach (string S in Arr)
{
if (S.IndexOf("12") == 0)
{
Console.WriteLine(S);
Num
++;
}
if (Num == n)
{
break;
}
}
SW.Stop();
Console.WriteLine(
"查询耗时:" + SW.ElapsedMilliseconds + "毫秒");
Console.WriteLine(
"仅仅快了一两百毫秒,却消耗了大量内存来存储?");
Console.Read();
}

static Double getMem()
{
Process CurrentProcess
= Process.GetCurrentProcess();
return CurrentProcess.WorkingSet64 / 1024 / 1024;//占用内存

}

}
}
posted @ 2011-04-15 19:02  CSDN大笨狼  阅读(2979)  评论(0编辑  收藏  举报