自定义变形树
项目里遇到一个小需求,存储少量16进制串到一个容器中,并可对其进行搜索。
一开始我使用了字典树来存储,后来尝试写了个变形树,减少内存消耗和提高搜索效率。
基础结构见下图:
简单解释下,就是图中长方形是字符串,箭头为指针。当存储第一条字符串时,容器中存下一个字符串。当存储第二条时,与第一条进行比较,遇到第一个不同字符,在这个位置链接一个新的字符串,存储剩下没有比较的字符串;一直循环下去。
具体看代码:
1 #include <iostream> 2 #include <cstring> 3 #include <queue> 4 5 //变形树,功能类似于字典树 6 //适合数据量比较小的情况,存储不同元素 7 //字符串用首地址和长度表示,允许字符串中出现0x0 8 //push,先入长的,后入短的,即提前排序再push,会导致search性能达到最佳 9 class JTree 10 { 11 private: 12 struct Node 13 { 14 uint8_t *str; 15 Node **node; 16 uint8_t *fin; 17 uint32_t len; 18 19 Node() : len(0) 20 { 21 str = new uint8_t[96]; 22 str[0] = 0; 23 node = (Node **)(str + 32); 24 memset(node, 0, 32); 25 fin = str + 64; 26 memset(fin, 0xff, 32); 27 } 28 ~Node() 29 { 30 delete []str; 31 } 32 }; 33 enum 34 { 35 FIND, 36 NO_FIN, 37 NO_STR 38 }; 39 40 public: 41 JTree (); 42 ~JTree (); 43 void Insert (const uint8_t *str, uint32_t len, uint8_t value); 44 uint8_t Search (const uint8_t *str, uint32_t len); 45 46 private: 47 uint32_t Index (Node *&node, uint32_t &nodeid, const uint8_t *str, uint32_t len, uint32_t &strid); 48 49 private: 50 Node *head; 51 }; 52 53 int main() 54 { 55 JTree tmp; 56 tmp.Insert((const uint8_t *)("hello world"), 11, 1); 57 tmp.Insert((const uint8_t *)"hello lvhuan", 12, 2); 58 tmp.Insert((const uint8_t *)"hello", 5, 3); 59 std::cout << tmp.Search((const uint8_t *)"hello lvhuan", 12) << std::endl; 60 std::cout << tmp.Search((const uint8_t *)("hel"), 3) << std::endl; 61 62 return 0; 63 } 64 65 JTree::JTree () 66 { 67 head = new Node; 68 } 69 70 JTree::~JTree () 71 { 72 std::queue<Node *> v; 73 v.push(head); 74 uint32_t tmp; 75 uint32_t size = 1; 76 while (size) 77 { 78 tmp = size; 79 for (int r = 0; r < tmp; r++) 80 { 81 Node *tnode = v.front(); 82 for (int i = 0; i < tnode->len; i++) 83 { 84 if (tnode->node[i]) 85 { 86 v.push(tnode->node[i]); 87 size++; 88 } 89 } 90 delete tnode; 91 v.pop(); 92 } 93 size -= tmp; 94 } 95 } 96 97 void JTree::Insert (const uint8_t *str, uint32_t len, uint8_t value) 98 { 99 Node *tnode; 100 uint32_t tid; 101 uint32_t sid; 102 uint32_t ret; 103 if (ret = Index(tnode, tid, str, len, sid), ret == NO_STR) 104 { 105 if (tnode->len > tid + 1) 106 { 107 Node *tmp = new Node; 108 tmp->len = len - sid; 109 memcpy(tmp->str, str + sid, tmp->len); 110 tmp->fin[tmp->len - 1] = value; 111 tnode->node[tid] = tmp; 112 } 113 else 114 { 115 uint32_t tmp = len - sid; 116 memcpy(tnode->str + tnode->len, str + sid, tmp); 117 tnode->len += tmp; 118 tnode->fin[tnode->len - 1] = value; 119 } 120 } 121 else 122 { 123 tnode->fin[tid] = value; 124 } 125 } 126 127 uint8_t JTree::Search (const uint8_t *str, uint32_t len) 128 { 129 Node *tnode; 130 uint32_t tid; 131 uint32_t sid; 132 if (Index(tnode, tid, str, len, sid) == FIND) 133 { 134 return tnode->fin[tid]; 135 } 136 else 137 { 138 return 0xff; 139 } 140 } 141 142 uint32_t JTree::Index (Node *&node, uint32_t &nodeid, const uint8_t *str, uint32_t len, uint32_t &strid) 143 { 144 if (head->node[0] == 0) 145 { 146 node = head; 147 nodeid = 0; 148 strid = 0; 149 return NO_STR; 150 } 151 152 Node *curnode = head->node[0]; 153 uint8_t *curstr = const_cast<uint8_t *>(str); 154 uint32_t t1 = 0, t2 = 0; // t1--curnode, t2--str 155 156 while (true) 157 { 158 if (t2 < len) 159 { 160 if (curnode->len > t1 && (curnode->str[t1] == curstr[t2])) 161 { 162 t1++; 163 t2++; 164 } 165 else 166 { 167 if (curnode->node[t1]) 168 { 169 curnode = curnode->node[t1]; 170 t1 = 0; 171 } 172 else 173 { 174 node = curnode; 175 nodeid = t1; 176 strid = t2; 177 return NO_STR; 178 } 179 } 180 } 181 else 182 { 183 node = curnode; 184 nodeid = t1 - 1; 185 if (node->fin[nodeid] == 0xff) 186 { 187 return NO_FIN; 188 } 189 else 190 { 191 return FIND; 192 } 193 } 194 } 195 }