gb_trees数据结构
gb_trees={Size,Tree}Tree= {Key, Value, Smaller, Bigger} |nilSmaller=TreeBigger= Tree
gb_trees操作
Eshell V5.9.1 (abort with ^G) 1> G=gb_trees. gb_trees 2> G:empty(). {0,nil} 3> G:insert(k,v,G:empty()). {1,{k,v,nil,nil}} 4> G:insert(k1,v1,v(3)). {2,{k,v,nil,{k1,v1,nil,nil}}} 5> G:insert(k2,v3,v(4)). {3,{k,v,nil,{k1,v1,nil,{k2,v3,nil,nil}}}} 6> G:insert(k0,v0,v(4)). {3,{k,v,nil,{k1,v1,{k0,v0,nil,nil},nil}}} 7> G:insert(k0,v0,v(5)). {4,{k,v,nil,{k1,v1,{k0,v0,nil,nil},{k2,v3,nil,nil}}}} 8> G:insert(k0,v0,v(6)). ** exception error: {key_exists,k0} in function gb_trees:insert_1/4 (gb_trees.erl, line 321) in call from gb_trees:insert_1/4 (gb_trees.erl, line 283) in call from gb_trees:insert_1/4 (gb_trees.erl, line 300) in call from gb_trees:insert/3 (gb_trees.erl, line 280)
Eshell V5.9.1 (abort with ^G) 1> T={8,{k,v,nil,{k1,v1,{k0,v0,nil,nil},{k4,v4,{k3,v3,nil,nil},{k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}}}. {8, {k,v,nil, {k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}}} 2> gb_trees:delete(k1,T). {7,{k,v,nil,{k3,v3,{k0,v0,nil,nil}, {k4,v4,nil,{k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}}} 3> gb_trees:balance(v(2)). {7,{k4,v4,{k0,v0,{k,v,nil,nil},{k3,v3,nil,nil}}, {k6,v6,{k5,v5,nil,nil},{k7,v7,nil,nil}}}} 4>
lookup(Key, {_, T}) -> lookup_1(Key, T). %% The term order is an arithmetic total order, so we should not %% test exact equality for the keys. (If we do, then it becomes %% possible that neither `>', `<', nor `=:=' matches.) Testing '<' %% and '>' first is statistically better than testing for %% equality, and also allows us to skip the test completely in the %% remaining case. lookup_1(Key, {Key1, _, Smaller, _}) when Key < Key1 -> lookup_1(Key, Smaller); lookup_1(Key, {Key1, _, _, Bigger}) when Key > Key1 -> lookup_1(Key, Bigger); lookup_1(_, {_, Value, _, _}) -> {value, Value}; lookup_1(_, nil) -> none.
6> gb_trees:lookup(k1,T). {value,v1} 7> gb_trees:get(k1,T). v1 8>
update方法就执行类似的遍历过程完成了gb_trees的重建:
update(Key, Val, {S, T}) -> T1 = update_1(Key, Val, T), {S, T1}. %% See `lookup' for notes on the term comparison order. update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key < Key1 -> {Key1, V, update_1(Key, Value, Smaller), Bigger}; update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key > Key1 -> {Key1, V, Smaller, update_1(Key, Value, Bigger)}; update_1(Key, Value, {_, _, Smaller, Bigger}) -> {Key, Value, Smaller, Bigger}.
enter(Key, Val, T) -> case is_defined(Key, T) of true -> update(Key, Val, T); false -> insert(Key, Val, T) end.
keys({_, T}) -> keys(T, []). keys({Key, _Value, Small, Big}, L) -> keys(Small, [Key | keys(Big, L)]); keys(nil, L) -> L. values({_, T}) -> values(T, []). values({_Key, Value, Small, Big}, L) -> values(Small, [Value | values(Big, L)]); values(nil, L) -> L.
18> gb_trees:largest(T). {k7,v7} 19> gb_trees:take_largest(T). {k7,v7, {7, {k,v,nil, {k1,v1,{k0,v0,nil,nil}, {k4,v4,{k3,v3,nil,nil},{k5,v5,nil,{k6,v6,nil,nil}}}}}}} 20> gb_trees:smallest(T). {k,v} 21> gb_trees:take_smallest(T). {k,v, {7, {k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}}} 22>
take_largest({Size, Tree}) when is_integer(Size), Size >= 0 -> {Key, Value, Smaller} = take_largest1(Tree), {Key, Value, {Size - 1, Smaller}}. take_largest1({Key, Value, Smaller, nil}) -> {Key, Value, Smaller}; take_largest1({Key, Value, Smaller, Larger}) -> {Key1, Value1, Larger1} = take_largest1(Larger), {Key1, Value1, {Key, Value, Smaller, Larger1}}.
12> gb_trees:next(gb_trees:iterator(T)). {k,v, [{k0,v0,nil,nil}, {k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}]} 13> {Key,Value,I}=gb_trees:next(gb_trees:iterator(T)). {k,v, [{k0,v0,nil,nil}, {k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}]} 14> {Key2,Value2,I2}=gb_trees:next(I). {k0,v0, [{k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}]} 15> 15> gb_trees:iterator(T). [{k,v,nil, {k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}}] 16> I. [{k0,v0,nil,nil}, {k1,v1, {k0,v0,nil,nil}, {k4,v4, {k3,v3,nil,nil}, {k5,v5,nil,{k6,v6,nil,{k7,v7,nil,nil}}}}}] 18>
iterator({_, T}) -> iterator_1(T). iterator_1(T) -> iterator(T, []). %% The iterator structure is really just a list corresponding to %% the call stack of an in-order traversal. This is quite fast. iterator({_, _, nil, _} = T, As) -> [T | As]; iterator({_, _, L, _} = T, As) -> iterator(L, [T | As]); iterator(nil, As) -> As.
mochiweb_headers
%% @spec enter(key(), value(), headers()) -> headers() %% @doc Insert the pair into the headers, replacing any pre-existing key. enter(K, V, T) -> K1 = normalize(K), V1 = any_to_list(V), gb_trees:enter(K1, {K, V1}, T). %% @spec insert(key(), value(), headers()) -> headers() %% @doc Insert the pair into the headers, merging with any pre-existing key. %% A merge is done with Value = V0 ++ ", " ++ V1. insert(K, V, T) -> K1 = normalize(K), V1 = any_to_list(V), try gb_trees:insert(K1, {K, V1}, T) catch error:{key_exists, _} -> {K0, V0} = gb_trees:get(K1, T), V2 = merge(K1, V1, V0), gb_trees:update(K1, {K0, V2}, T) end. %% @spec delete_any(key(), headers()) -> headers() %% @doc Delete the header corresponding to key if it is present. delete_any(K, T) -> K1 = normalize(K), gb_trees:delete_any(K1, T).
When should you use gb_trees over dicts? Well, it's not a clear decision. As the benchmark module I have written will show, gb_trees and dicts have somewhat similar performances in many respects. However, the benchmark demonstrates that dicts have the best read speeds while the gb_trees tend to be a little quicker on other operations. You can judge based on your own needs which one would be the best.
Oh and also note that while dicts have a fold function, gb_trees don't: they instead have aniterator function, which returns a bit of the tree on which you can call
gb_trees:next(Iterator)
to get the following values in order. What this means is that you need to write your own recursive functions on top of gb_trees rather than use a generic fold. On the other hand, gb_trees let you have quick access to the smallest and largest elements of the structure withgb_trees:smallest/1
andgb_trees:largest/1
.link: http://learnyousomeerlang.com/a-short-visit-to-common-data-structures
是不是可以回答下面的问题了?
Q:为什么mochiweb_headers使用gb_tree作为存储结构?为什么不是dict或者其它的数据结构?
2014-8-20 16:55:14补充
http://erlang.org/pipermail/erlang-questions/2010-March/050333.html
Björn Gustavsson, from the Erlang/OTP team and programmer of Wings 3D, suggests using gb_sets in most circumstances, using ordset when you need a clear representation that you want to process with your own code, and using sets when you need the =:= operator . gb_sets, ordsets, and sofs all use the == operator to compare values; if you have the numbers 2 and 2.0, they’ll be seen as the same number .However, the sets module uses the =:= operator.
晚安!