字典树,又称单词查找树,Trie树,是一种树形结构,是一种哈希树的变种。
典型应用是用于统计、排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。
其优点是:利用字符串的公共前缀来节约存储空间,最大限度地减少无谓的字符串比较,查询效率比哈希表高。
根节点不包含字符,除根节点外每一个节点都只包含一个字符, 每个节点的所有子节点包含的字符都不相同。
从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
利用字典树查找字符串的过程,是从根节点开始,在根结点的子节点中寻找字符串的第一个字符,若找到节点,则从此节点开始进行字符串第二个字符的查找,这是一个反复、字符串逐个字符对应字典树逐层节点的查找过程。
一个简单的实现:
#include <iostream> using namespace std; class Trie { public: Trie(); ~Trie(); int32_t Insert(const char *word); int32_t Remove(const char *word); uint32_t Search(const char *word) const; private: static const uint32_t NUM_CHAR = 26; typedef struct TrieNode { char val; struct TrieNode *branch[NUM_CHAR]; uint32_t count; } trie_node_t; trie_node_t *root; trie_node_t *create_node(); void free(trie_node_t *node); char tolower(char c) const; }; Trie::Trie(): root(NULL) { root = create_node(); } Trie::~Trie() { free(root); } int32_t Trie::Insert(const char *word) { if (!word || strlen(word) == 0) { return -1; } trie_node_t *cur = root; const char *p = word; while (*p) { char c = tolower(*p); if (c == -1) { return -1; } uint32_t idx = c - 'a'; if (!cur->branch[idx]) { cur->branch[idx] = create_node(); cur->branch[idx]->val = c; } cur = cur->branch[idx]; ++p; } ++cur->count; return 0; } int32_t Trie::Remove(const char *word) { if (!word || strlen(word) == 0) { return -1; } trie_node_t *cur = root; const char *p = word; while (*p) { char c = tolower(*p); if (c == -1) { return -1; } uint32_t idx = c - 'a'; if (!cur->branch[idx]) { return -1; } cur = cur->branch[idx]; ++p; } if (cur->count > 0) { --cur->count; return 0; } return -1; } uint32_t Trie::Search(const char *word) const { if (!word || strlen(word) == 0) { return 0; } trie_node_t *cur = root; const char *p = word; while (*p) { char c = tolower(*p); if (c == -1) { return 0; } uint32_t idx = c - 'a'; if (!cur->branch[idx]) { return 0; } cur = cur->branch[idx]; ++p; } return cur->count; } Trie::trie_node_t *Trie::create_node() { Trie::trie_node_t *node = new Trie::trie_node_t; node->val = 0; for (int32_t i = 0; i < Trie::NUM_CHAR; ++i) { node->branch[i] = NULL; } node->count = 0; return node; } void Trie::free(Trie::trie_node_t *node) { if (node) { for (int32_t i = 0; i < NUM_CHAR; ++i) { free(node->branch[i]); } delete node; } } char Trie::tolower(char c) const { if (c >= 'a' && c <= 'z') { return c; } else if (c >= 'A' && c <= 'Z') { return c + 32; } return -1; } int32_t main() { Trie trie; trie.Insert("haha"); trie.Insert("haha"); trie.Insert("hehe"); trie.Remove("haha"); if (trie.Search("haha") > 0) { cout << "find haha" << endl; } if (trie.Search("hehe") > 0) { cout << "find hehe" << endl; } if (trie.Search("hihi") > 0) { cout << "find hihi" << endl; } return 0; }
From http://www.cnblogs.com/caosiyang/