Cowboy 源码分析(十三)

2012-06-03 14:17 rhinovirus 阅读(1852) 评论(0) 编辑收藏举报

　　这两天花了些时间搭建了下Go的开发环境，看了些基本的语法，感觉有类c语言基础的朋友们，学起来会容易些，学习Go语言的障碍会比erlang来的少的多。以后有机会跟大家分享Go吧，这边给大家截个图，分享下：

　　怎么样，看起来是不是跟C好像，呵呵，好了，回到Cowboy，上一篇，我们知道通过 cowboy_http_protocol:header/3 函数和 cowboy_http_protocol:parse_header/2之间的递归调用，来解析头部的每一行，我在上一篇提到 Headers最后一行的为 Connection，那么我们看下，cowboy_http_protocol:header/3 是如何处理的，代码如下：

header({http_header, _I, 'Connection', _R, Connection},
        Req=#http_req{headers=Headers}, State=#state{
        req_keepalive=Keepalive, max_keepalive=MaxKeepalive})
        when Keepalive < MaxKeepalive ->
    Req2 = Req#http_req{headers=[{'Connection', Connection}|Headers]},
    {ConnTokens, Req3}
        = cowboy_http_req:parse_header('Connection', Req2),
    ConnAtom = cowboy_http:connection_to_atom(ConnTokens),
    parse_header(Req3#http_req{connection=ConnAtom}, State);

　　首先是 Req2 = Req#http_req{headers=[{'Connection', Connection}|Headers]}, 这行是生成新的 Req2，很简单不解释了。

　　来看下这行：{ConnTokens, Req3} = cowboy_http_req:parse_header('Connection', Req2), 下面是cowboy_http_req:parse_header/2 的相关函数：

%% @doc Semantically parse headers.
%%
%% When the value isn't found, a proper default value for the type
%% returned is used as a return value.
%% @see parse_header/3
-spec parse_header(cowboy_http:header(), #http_req{})
    -> {any(), #http_req{}} | {error, badarg}.
parse_header(Name, Req=#http_req{p_headers=PHeaders}) ->
    case lists:keyfind(Name, 1, PHeaders) of
        false -> parse_header(Name, Req, parse_header_default(Name));
        {Name, Value} -> {Value, Req}
    end.

%% @doc Default values for semantic header parsing.
-spec parse_header_default(cowboy_http:header()) -> any().
parse_header_default('Connection') -> [];
parse_header_default('Transfer-Encoding') -> [<<"identity">>];
parse_header_default(_Name) -> undefined.

　　同样用debugger，我们可以看到 PHeaders = []，Name = 'Connection'，那么 lists:keyfind(Name, 1, PHeaders) 得到的肯定是 false，紧接着调用 cowboy_http_req:parse_header/3 这个函数，函数比较大，我不贴全部了，就贴调用的部分：

parse_header(Name, Req, Default) when Name =:= 'Connection' ->
    parse_header(Name, Req, Default,
        fun (Value) ->
            cowboy_http:nonempty_list(Value, fun cowboy_http:token_ci/2)
        end);

　　这部分函数，就一行代码，调用 cowboy_http_req:parse_header/4 函数，其中 Default = []，最后一个参数为匿名函数。由调用 cowboy_http:nonempty_list/2 和 cowboy_http:token_ci/2 组成，下面是 cowboy_http:nonempty_list/2 的代码：

%% Parsing.

%% @doc Parse a non-empty list of the given type.
-spec nonempty_list(binary(), fun()) -> [any(), ...] | {error, badarg}.
nonempty_list(Data, Fun) ->
    case list(Data, Fun, []) of
        {error, badarg} -> {error, badarg};
        [] -> {error, badarg};
        L -> lists:reverse(L)
    end.

　　这个函数接受2个参数，然后调用 cowboy_http:list/3 函数：

-spec list(binary(), fun(), [binary()]) -> [any()] | {error, badarg}.
%% From the RFC:
%% <blockquote>Wherever this construct is used, null elements are allowed,
%% but do not contribute to the count of elements present.
%% That is, "(element), , (element) " is permitted, but counts
%% as only two elements. Therefore, where at least one element is required,
%% at least one non-null element MUST be present.</blockquote>
list(Data, Fun, Acc) ->
    whitespace(Data,
        fun (<<>>) -> Acc;
            (<< $,, Rest/binary >>) -> list(Rest, Fun, Acc);
            (Rest) -> Fun(Rest,
                fun (D, I) -> whitespace(D,
                        fun (<<>>) -> [I|Acc];
                            (<< $,, R/binary >>) -> list(R, Fun, [I|Acc]);
                            (_Any) -> {error, badarg}
                        end)
                end)
        end).

　　这个方法看起来比较吓人，调用 cowboy_http:whitespace/2 同样传递2个参数，一个是Data，另一个还是一个参数的匿名函数，而复杂就在这个匿名函数上，我们详细看下：

　　(<<>>) -> Acc; 如果调用这个匿名函数的参数为 <<>>，则返回 Acc；

　　(<< $,, Rest/binary >>) -> list(Rest, Fun, Acc); 可以使用$符号来表示字符的整数值，摘自《Erlang程序设计》，那么 << $,, Rest/binary >> = << 44, Rest/binary >>

其实也就是这个二进制参数，由 ,为开头组成的二进制，下面是我测试的例子：

　　现在是不是明白了，其实很多新手（包括我）在遇到问题时，第一时间不是去动手做个测试，而是求助于其他人，如果能自己动手，不仅可以学到更多，而且还容易记住。好了，接下去就是递归调用 cowboy_http:list/3。

            (Rest) -> Fun(Rest,
                fun (D, I) -> whitespace(D,
                        fun (<<>>) -> [I|Acc];
                            (<< $,, R/binary >>) -> list(R, Fun, [I|Acc]);
                            (_Any) -> {error, badarg}
                        end)
                end)

　　如果是其他二进制数据，则调用 Fun(Rest, 匿名函数)，这里又定义了一个包含两个参数的匿名函数，不详细说了，我们看下 cowboy_http:whitespace/2 这个函数：

%% @doc Skip whitespace.
-spec whitespace(binary(), fun()) -> any().
whitespace(<< C, Rest/binary >>, Fun)
        when C =:= $\s; C =:= $\t ->
    whitespace(Rest, Fun);
whitespace(Data, Fun) ->
    Fun(Data).

　　这个函数比较简单，如果二进制以 \s 和 \t 开头，则过滤掉，然后调用 Fun(Data)。

　　由于包含许多匿名函数，虽然灵活了，代码的可读性大大降低了，不知道大家有没感觉到，今天就到这吧，发现不用图来说明下，我自己理解起来的比较混乱，画图去了，下一篇分享给大家。谢谢大家支持。

　　补充，附我上图地址：

　　http://huaban.com/pins/7051157/zoom/

刷新页面返回顶部

码农生涯临渊慕鱼不如退而结网