在上一篇日志里，我介绍了cowboy 的socket pool —— ranch。

接下来，接着来介绍cowboy，本篇主要介绍从接受到客户端消息请求，解析，一直到路由的过程。

首先，我们需要看的是cowboy_protocol模块，这个模块就是和ranch的衔接点（请看我上一篇日志http://www.cnblogs.com/star-star/archive/2012/12/19/2824008.html的分析），首先来看看每个session的初始化逻辑过程。

start_link(ListenerPid, Socket, Transport, Opts) ->
    Pid = spawn_link(?MODULE, init, [ListenerPid, Socket, Transport, Opts]),
    {ok, Pid}.
%% @private
-spec init(pid(), inet:socket(), module(), any()) -> ok.
init(ListenerPid, Socket, Transport, Opts) ->
    Dispatch = get_value(dispatch, Opts, []),
    MaxEmptyLines = get_value(max_empty_lines, Opts, 5),
    MaxHeaderNameLength = get_value(max_header_name_length, Opts, 64),
    MaxHeaderValueLength = get_value(max_header_value_length, Opts, 4096),
    MaxHeaders = get_value(max_headers, Opts, 100),
    MaxKeepalive = get_value(max_keepalive, Opts, infinity),
    MaxRequestLineLength = get_value(max_request_line_length, Opts, 4096),
    OnRequest = get_value(onrequest, Opts, undefined),
    OnResponse = get_value(onresponse, Opts, undefined),
    Timeout = get_value(timeout, Opts, 5000),
    ok = ranch:accept_ack(ListenerPid),
    wait_request(<<>>, #state{listener=ListenerPid, socket=Socket,
        transport=Transport, dispatch=Dispatch,
        max_empty_lines=MaxEmptyLines, max_keepalive=MaxKeepalive,
        max_request_line_length=MaxRequestLineLength,
        max_header_name_length=MaxHeaderNameLength,
        max_header_value_length=MaxHeaderValueLength, max_headers=MaxHeaders,
        timeout=Timeout, onrequest=OnRequest, onresponse=OnResponse}, 0).

%% Request parsing.
%%
%% The next set of functions is the request parsing code. All of it
%% runs using a single binary match context. This optimization ends
%% right after the header parsing is finished and the code becomes
%% more interesting past that point.

-spec wait_request(binary(), #state{}, non_neg_integer()) -> ok.
wait_request(Buffer, State=#state{socket=Socket, transport=Transport,
        timeout=Timeout}, ReqEmpty) ->
    case Transport:recv(Socket, 0, Timeout) of
        {ok, Data} ->
            parse_request(<< Buffer/binary, Data/binary >>, State, ReqEmpty);
        {error, _} ->
            terminate(State)
    end.

在以上的代码里，session初始化获取一堆关于http server的设定参数，将参数填充至上下文state中，同时调用ranch:accept_ack通知ranch初始化完成，

进入wait_request/3方法，循环等待客户端message，将每次获得的message放入一个buffer中，交给parse_request/3进行解析处理。

接下来，我们查看一下parse_request的源码：

%% Empty lines must be using \r\n.
parse_request(<< $\n, _/binary >>, State, _) ->
    error_terminate(400, State);
%% We limit the length of the Request-line to MaxLength to avoid endlessly
%% reading from the socket and eventually crashing.
parse_request(Buffer, State=#state{max_request_line_length=MaxLength,
        max_empty_lines=MaxEmpty}, ReqEmpty) ->
    case binary:match(Buffer, <<"\n">>) of
        nomatch when byte_size(Buffer) > MaxLength ->
            error_terminate(414, State);
        nomatch ->
            wait_request(Buffer, State, ReqEmpty);
        {1, _} when ReqEmpty =:= MaxEmpty ->
            error_terminate(400, State);
        {1, _} ->
            << _:16, Rest/binary >> = Buffer,
            parse_request(Rest, State, ReqEmpty + 1);
        {_, _} ->
            parse_method(Buffer, State, <<>>)
    end.

如果buffer中没有<<"\n">>，当已经超过最长接受长度时候，则终止session,否则继续等待message填充buffer,

如果buffer中有<<"\n">>且位置是1时候，则表示为空白行(因为换行是<<"\r\n">>)，如果没有超过规定的最大空白行个数，则将buffer中的<<"\r\n">>两个字节删掉，继续等待message.

如果buffer中有<<"\n">>且不在位置1时，就表明一行结束了，可以按照http协议格式开始解析了。

在解析完毕之后，进入cowboy_protocol:request, 按照解析的数据填充一个cowboy_request，交给cowboy_protocol:on_request

request(Buffer, State=#state{socket=Socket, transport=Transport,
        req_keepalive=ReqKeepalive, max_keepalive=MaxKeepalive,
        onresponse=OnResponse},
        Method, Path, Query, Fragment, Version, Headers, Host, Port) ->
    Req = cowboy_req:new(Socket, Transport, Method, Path, Query, Fragment,
        Version, Headers, Host, Port, Buffer, ReqKeepalive < MaxKeepalive,
        OnResponse),
    onrequest(Req, State, Host).

%% Call the global onrequest callback. The callback can send a reply,
%% in which case we consider the request handled and move on to the next
%% one. Note that since we haven't dispatched yet, we don't know the
%% handler, host_info, path_info or bindings yet.
-spec onrequest(cowboy_req:req(), #state{}, binary()) -> ok.
onrequest(Req, State=#state{onrequest=undefined}, Host) ->
    dispatch(Req, State, Host, cowboy_req:get(path, Req));
onrequest(Req, State=#state{onrequest=OnRequest}, Host) ->
    Req2 = OnRequest(Req),
    case cowboy_req:get(resp_state, Req2) of
        waiting -> dispatch(Req2, State, Host, cowboy_req:get(path, Req2));
        _ -> next_request(Req2, State, ok)
    end.

接下来的就是就要开始对request进行路由了。

首先，先介绍一下cowboy的路由规则。

它是一组{Hostname ,PathRules}组成的list，而PathRules是一组形如{Path, HandlerMod, HandlerOpts}组成的list。

cowboy的路由主要涉及的是cowboy_dispatcher:match(Dispatch, Host, Path),

主要脉络是循环匹配规则，先在match/3中匹配host，然后在match_path/4中匹配path

此模块代码很少，我在其中加上注释，完整分析一下:

-spec match(dispatch_rules(), Host::binary() | tokens(), Path::binary())
    -> {ok, module(), any(), bindings(),
        HostInfo::undefined | tokens(),
        PathInfo::undefined | tokens()}
    | {error, notfound, host} | {error, notfound, path} 
    | {error, badrequest, path}.
match([], _, _) ->  %匹配规则列表为空
    {error, notfound, host};
match([{'_', PathMatchs}|_Tail], _, Path) -> %若host位置为'_'，表示匹配任意host, 则可以直接进入path匹配，
    match_path(PathMatchs, undefined, Path, []);
match([{HostMatch, PathMatchs}|Tail], Tokens, Path) 
        when is_list(Tokens) ->
    case list_match(Tokens, lists:reverse(HostMatch), []) of %对host分割后的token list和指定的host匹配规则进行匹配
　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　%因为分割后的token list是倒置的，所以先使用list:reverse对匹配字符串进行反转。
　　     false ->
            match(Tail, Tokens, Path);  
        {true, Bindings, undefined} ->
            match_path(PathMatchs, undefined, Path, Bindings);
        {true, Bindings, HostInfo} ->
            match_path(PathMatchs, lists:reverse(HostInfo),
                Path, Bindings)
    end;
match(Dispatch, Host, Path) ->
    match(Dispatch, split_host(Host), Path).

-spec match_path(dispatch_path(),
    HostInfo::undefined | tokens(), binary() | tokens(), bindings())
    -> {ok, module(), any(), bindings(),
        HostInfo::undefined | tokens(),
        PathInfo::undefined | tokens()}
    | {error, notfound, path} | {error, badrequest, path}.
match_path([], _, _, _) ->
    {error, notfound, path};
match_path([{'_', Handler, Opts}|_Tail], HostInfo, _, Bindings) -> %'_‘直接匹配成功
    {ok, Handler, Opts, Bindings, HostInfo, undefined};
match_path([{<<"*">>, Handler, Opts}|_Tail], HostInfo, <<"*">>, Bindings) -> 
    {ok, Handler, Opts, Bindings, HostInfo, undefined};
match_path([{PathMatch, Handler, Opts}|Tail], HostInfo, Tokens,
        Bindings) when is_list(Tokens) ->
    case list_match(Tokens, PathMatch, []) of  %对path进行匹配
        false ->
            match_path(Tail, HostInfo, Tokens, Bindings);
        {true, PathBinds, PathInfo} ->
            {ok, Handler, Opts, Bindings ++ PathBinds, HostInfo, PathInfo}
    end;
match_path(_Dispatch, _HostInfo, badrequest, _Bindings) ->
    {error, badrequest, path};
match_path(Dispatch, HostInfo, Path, Bindings) ->
    match_path(Dispatch, HostInfo, split_path(Path), Bindings).

%% Internal.

%% @doc Split a hostname into a list of tokens.

%根据<<".">>将host分割成 token list ，
%如将<<"www.sohu.com">>分割成 [<<"com">>, <<"sohu">>, <<"www">>].然后进行匹配

-spec split_host(binary()) -> tokens().
split_host(Host) ->
    split_host(Host, []).

split_host(Host, Acc) ->
    case binary:match(Host, <<".">>) of
        nomatch when Host =:= <<>> ->
            Acc;
        nomatch ->
            [Host|Acc];
        {Pos, _} ->
            << Segment:Pos/binary, _:8, Rest/bits >> = Host,
            false = byte_size(Segment) == 0,
            split_host(Rest, [Segment|Acc])
    end.

%% @doc Split a path into a list of path segments.
%%
%% Following RFC2396, this function may return path segments containing any
%% character, including <em>/</em> if, and only if, a <em>/</em> was escaped
%% and part of a path segment.

%根据<<"/">>将path分割成token list,然后进过urldecode，再reverse
%如将<<"/adm/notify">>,分割成[<<"adm">> , <<"notify">>]
-spec split_path(binary()) -> tokens().
split_path(<< $/, Path/bits >>) ->
    split_path(Path, []).

split_path(Path, Acc) ->
    try
        case binary:match(Path, <<"/">>) of
            nomatch when Path =:= <<>> ->
                lists:reverse([cowboy_http:urldecode(S) || S <- Acc]);
            nomatch ->
                lists:reverse([cowboy_http:urldecode(S) || S <- [Path|Acc]]);
            {Pos, _} ->
                << Segment:Pos/binary, _:8, Rest/bits >> = Path,
                split_path(Rest, [Segment|Acc])
        end
    catch
        error:badarg ->
            badrequest
    end.

-spec list_match(tokens(), match_rule(), bindings())
    -> {true, bindings(), undefined | tokens()} | false.
%将按照<<".">>分割完毕的host以及按照<<"/">>path而形成的token list和匹配规则进行逐元素匹配
%% Atom '...' matches any trailing path, stop right now.
list_match(List, ['...'], Binds) ->  %若匹配的token list最后以'...'，则直接表示匹配成功
    {true, Binds, List};
%% Atom '_' matches anything, continue.
list_match([_E|Tail], ['_'|TailMatch], Binds) ->  %'_'匹配任意的token
    list_match(Tail, TailMatch, Binds);
%% Both values match, continue.
list_match([E|Tail], [E|TailMatch], Binds) -> %若匹配相同时
    list_match(Tail, TailMatch, Binds);
%% Bind E to the variable name V and continue.
list_match([E|Tail], [V|TailMatch], Binds) when is_atom(V) ->%对于用一个 named atom匹配时例如a 匹配 <<"bb">> 以形如{a, <<"bb">>}保存在绑定变量中，之后存储在cowboy_request中
    list_match(Tail, TailMatch, [{V, E}|Binds]);
%% Match complete.
list_match([], [], Binds) ->
    {true, Binds, undefined};
%% Values don't match, stop.
list_match(_List, _Match, _Binds) ->
    false.

匹配规则主要是这样：

Hostname and Path are match rules and can be either the
atom '_', which matches everything, `<<"*">>', which match the
wildcard path, or a list of tokens.

Each token can be either a binary, the atom <em>'_'</em>,
the atom '...' or a named atom. A binary token must match exactly,
'_' matches everything for a single token, <em>'...'</em> matches
everything for the rest of the tokens and a named atom will bind the
corresponding token value and return it.

匹配成功会返回结果为{ok, Handler, Opts, Bindings, HostInfo, PathInfo}

下面我来举几个匹配的例子，大家可以体会下，假设

Host = <<"www.sohu.com">>

Path = <<"/notify/test">>

cowboy_dispatcher:match([], <<"www.sohu.com">>, <<"/notify/test">>)

匹配失败:{error, notfound, host}

cowboy_dispatcher:match([{'_', []}], <<"www.sohu.com">>, <<"/notify/test">>)

匹配失败:{error, notfound, path}

cowboy_dispatcher:match([{'_', [{'_', test, []}

　　　　　　　　　　　　　　　　　　]}], <<"www.sohu.com">>, <<"/notify/test">>)

匹配成功:{ok, test,[], [], undefined, undefined}

cowboy_dispatcher:match([{[<<"www">>, a, b] , [{'_', test, []}

　　　　　　　　　　　　　　　　　　　　　　　　　　　]}], <<"www.sohu.com">>, <<"/notify/test">>),

匹配成功:{ok, test,[], [{a, <<"sohu">>}, {b, <<"com">>}], undefined, undefined}

cowboy_dispatcher:match([{['...', <<"com">>] , [{'_', test, []}]}],
　　　　　　　　　　　　　　<<"www.sohu.com">>, <<"/notify/test">>),

匹配成功: {ok,test,[],[],[<<"www">>,<<"sohu">>], undefined}

cowboy_dispatcher:match([{'_' , [{[<<"abc">>], test, []},
　　　　　　　　　　　　　　　　　　{[<<"notify">>, <<"test">>], test2,[]}]}],
　　　　　　　　　　　　　　<<"www.sohu.com">>, <<"/notify/test">>),

匹配成功: {ok,test2,[],[],undefined,undefined}

cowboy_dispatcher:match([{'_' , [{[<<"abc">>], test, []},
　　　　　　　　　　　　　　　　　　{[<<"notify">>, '_'], test2,[]}]}],
　　　　　　　　　　　　　　<<"www.sohu.com">>, <<"/notify/test">>),

匹配成功: {ok,test2, [], [], undefined, undefined}

cowboy_dispatcher:match([{'_' , [{[<<"abc">>], test, []},
　　　　　　　　　　　　　　　　　　{[<<"notify">>, '...'], test2,[]}]}],
　　　　　　　　　　　　　　<<"www.sohu.com">>, <<"/notify/test">>),

匹配成功: {ok,test2,[],[],undefined,[<<"test">>]}

cowboy_dispatcher:match([{'_' , [{[<<"abc">>], test, []},
　　　　　　　　　　　　　　　　　　{[<<"notify">>, a], test2,[]}]}],
　　　　　　　　　　　　　　<<"www.sohu.com">>, <<"/notify/test">>),

匹配成功: {ok,test2,[],[{a, <<"test">>}],undefined, undefined}

这里有一个需要提醒的:

Host匹配token list是反向的，

<<"www.sohu.com">>和

[<<"www">>, <<"sohu">>, <<"com">>] 匹配时的顺序其实是先匹配<<"com">> 再匹配<<"sohu">>, 最后是<<"www">>

而Path匹配的token list是正向的。

<<"/notify/test">>和

[<<"notify">>, <<"test">>] 匹配时的顺序是先匹配<<"notify">> 再匹配<<"test">>

其实其他倒没什么，就是使用'...'进行匹配的时候要注意= =

路由讲解完了，下一篇日志我们继续回到cowboy_protocol模块继续路由之后的代码，稍歇= =

posted on 2013-01-08 11:11 文武双全大星星阅读(509) 评论(1) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

公告