代码改变世界

Cowboy 源码分析(十二)

2012-05-31 00:11  rhinovirus  阅读(3107)  评论(1编辑  收藏  举报

  今天,又是按时下班,吃饱饭,继续跟大家分享Cowboy,昨天有件高兴的事,我尝试用闪存给@博客园团队发了一条闪存,问是否能在博客园首页中的编程语言分类中添加 Erlang 这一种编程语言,很快,@博客园团队就给我回信,很爽快的答应了,并且帮我把以前的文章也导入这个分类,省去了我手动倒的麻烦,这边谢谢@博客园团队的热心,希望博客园越来越好,越来越多的朋友来学 Erlang。

  好了,继续我们上一篇讲到的cowboy_http_protocol:request/2 方法:

  parse_header(#http_req{socket=Socket, transport=Transport, connection=ConnAtom, pid=self(), method=Method, version=Version, path='*', raw_path= <<"*">>, raw_qs= <<>>, onresponse=OnResponse, urldecode=URLDec}, State);

  这边有个记录 #http_req{} 我们看下它的定义,在 cowboy/include 目录下的 http.hrl 中,这个记录保存每个http请求的详细信息:  

-record(http_req, {
    %% Transport.
    socket     = undefined :: undefined | inet:socket(),
    transport  = undefined :: undefined | module(),
    connection = keepalive :: keepalive | close,

    %% Request.
    pid        = undefined :: pid(),
    method     = 'GET'     :: cowboy_http:method(),
    version    = {1, 1}    :: cowboy_http:version(),
    peer       = undefined :: undefined |
                                {inet:ip_address(), inet:port_number()},
    host       = undefined :: undefined | cowboy_dispatcher:tokens(),
    host_info  = undefined :: undefined | cowboy_dispatcher:tokens(),
    raw_host   = undefined :: undefined | binary(),
    port       = undefined :: undefined | inet:port_number(),
    path       = undefined :: undefined | '*' | cowboy_dispatcher:tokens(),
    path_info  = undefined :: undefined | cowboy_dispatcher:tokens(),
    raw_path   = undefined :: undefined | binary(),
    qs_vals    = undefined :: undefined | list({binary(), binary() | true}),
    raw_qs     = undefined :: undefined | binary(),
    bindings   = undefined :: undefined | cowboy_dispatcher:bindings(),
    headers    = []        :: cowboy_http:headers(),
    p_headers  = []        :: [any()], %% @todo Improve those specs.
    cookies    = undefined :: undefined | [{binary(), binary()}],
    meta       = []        :: [{atom(), any()}],

    %% Request body.
    body_state = waiting   :: waiting | done | {stream, fun(), any(), fun()}
                                | {multipart, non_neg_integer(), fun()},
    buffer     = <<>>      :: binary(),

    %% Response.
    resp_state = waiting   :: locked | waiting | chunks | done,
    resp_headers = []      :: cowboy_http:headers(),
    resp_body  = <<>>      :: iodata() | {non_neg_integer(),
                                fun(() -> {sent, non_neg_integer()})},

    %% Functions.
    onresponse = undefined :: undefined | fun((cowboy_http:status(),
        cowboy_http:headers(), #http_req{}) -> #http_req{}),
    urldecode :: {fun((binary(), T) -> binary()), T}
}).

  好了,弄明白了,这个记录,我们看下 cowboy_http_protocol:parse_header/2 具体实现:

-spec parse_header(#http_req{}, #state{}) -> ok.
parse_header(Req, State=#state{buffer=Buffer, max_line_length=MaxLength}) ->
    case erlang:decode_packet(httph_bin, Buffer, []) of
        {ok, Header, Rest} -> header(Header, Req, State#state{buffer=Rest});
        {more, _Length} when byte_size(Buffer) > MaxLength ->
            error_terminate(413, State);
        {more, _Length} -> wait_header(Req, State);
        {error, _Reason} -> error_terminate(400, State)
    end.

  我们还详细看下这个函数,还记得 erlang:decode_packet/3 这个函数吗,在前两篇文章,我们有过介绍,这次第一个参数由 http_bin 换成了 httph_bin,而这个参数又是什么意思呢?我们依然给出 erlang doc 地址:http://www.erlang.org/doc/man/erlang.html#decode_packet-3,官方文档对这个参数的描述如下:

http | httph | http_bin | httph_bin

The Hypertext Transfer Protocol. The packets are returned with the format according to HttpPacket described above. A packet is either a request, a response, a header or an end of header mark. Invalid lines are returned as HttpError.

Recognized request methods and header fields are returned as atoms. Others are returned as strings.

The protocol type http should only be used for the first line when a HttpRequest or a HttpResponse is expected. The following calls should use httph to get HttpHeader's until http_eoh is returned that marks the end of the headers and the beginning of any following message body.

The variants http_bin and httph_bin will return strings (HttpString) as binaries instead of lists.

  不知道大家看了能不能理解,我们还是用断点看下,究竟是什么意思。下面是当我访问 http://localhost:8080/ 时断点,监控到的变量的值:

< Req = {http_req,#Port<0.2990>,cowboy_tcp_transport,keepalive,<0.500.0>,
                  'GET',
                  {1,1},
                  undefined,undefined,undefined,undefined,undefined,[],
                  undefined,<<"/">>,undefined,<<>>,undefined,[],[],undefined,
                  [],waiting,<<>>,waiting,[],<<>>,undefined,
                  {#Fun<cowboy_http.urldecode.2>,crash}}
< State = {state,<0.270.0>,#Port<0.2990>,cowboy_tcp_transport,
                 [{'_',[{[<<"websocket">>],websocket_handler,[]},
                        {[<<"eventsource">>],eventsource_handler,[]},
                        {[<<"eventsource">>,<<"live">>],
                         eventsource_emitter,[]},
                        {'_',default_handler,[]}]}],
                 undefined,undefined,undefined,
                 {#Fun<cowboy_http.urldecode.2>,crash},
                 0,5,1,infinity,4096,5000,
                 <<"Host: localhost:8080\r\nUser-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/12.0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\nAccept-Encoding: gzip, deflate\r\nConnection: keep-alive\r\n\r\n">>,
                 false,infinity,undefined}
< Buffer = <<"Host: localhost:8080\r\nUser-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/12.0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\nAccept-Encoding: gzip, deflate\r\nConnection: keep-alive\r\n\r\n">>
< MaxLength = 4096

  接下来看下,执行 erlang:decode_packet/3 函数返回的结果:{ok, Header, Rest},相关变量的值如下:

< Header = {http_header,14,'Host',undefined,<<"localhost:8080">>}
< Rest = <<"User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/12.0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\nAccept-Encoding: gzip, deflate\r\nConnection: keep-alive\r\n\r\n">>

  这下大家应该都明白了吧,我们继续往下看下这行,Header, Req的值如上,而State记录修改了buffer作为参数带入到 header函数中:

  header(Header, Req, State#state{buffer=Rest});

  这里调用了 cowboy_http_protocol:header/3 函数:

-spec header({http_header, integer(), cowboy_http:header(), any(), binary()}
    | http_eoh, #http_req{}, #state{}) -> ok.
header({http_header, _I, 'Host', _R, RawHost}, Req=#http_req{
        transport=Transport, host=undefined}, State) ->
    RawHost2 = cowboy_bstr:to_lower(RawHost),
    case catch cowboy_dispatcher:split_host(RawHost2) of
        {Host, RawHost3, undefined} ->
            Port = default_port(Transport:name()),
            parse_header(Req#http_req{
                host=Host, raw_host=RawHost3, port=Port,
                headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);
        {Host, RawHost3, Port} ->
            parse_header(Req#http_req{
                host=Host, raw_host=RawHost3, port=Port,
                headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);
        {'EXIT', _Reason} ->
            error_terminate(400, State)
    end;

  这个函数有几个重载,我们根据之前的参数,能够确定调用的是哪个重载,也就是上面贴出的代码。我们按老规矩一行一行来看这个函数:

  RawHost2 = cowboy_bstr:to_lower(RawHost), 这个模块第一次见,我们看下这个函数:

%% @doc Convert a binary string to lowercase.
-spec to_lower(binary()) -> binary().
to_lower(L) ->
    << << (char_to_lower(C)) >> || << C >> <= L >>.

  看函数注释,还是比较清楚的,转换二进制字符串为小写。这里的 char_to_lower/1 很简单,我就不贴代码了。大家应该能一眼看懂,但是比较疑惑,erlang系统没有这样的函数支持吗?还是作者不知道?先不管了,知道的朋友,可以留言告诉我,谢谢。

  接着往下看吧,case catch cowboy_dispatcher:split_host(RawHost2) of

%% @doc Split a hostname into a list of tokens.
-spec split_host(binary())
    -> {tokens(), binary(), undefined | inet:port_number()}.
split_host(<<>>) ->
    {[], <<>>, undefined};
split_host(Host) ->
    case binary:split(Host, <<":">>) of
        [Host] ->
            {binary:split(Host, <<".">>, [global, trim]), Host, undefined};
        [Host2, Port] ->
            {binary:split(Host2, <<".">>, [global, trim]), Host2,
                list_to_integer(binary_to_list(Port))}
    end.

  这个函数也比较简单,就是分割主机名,返回格式为 {Host, RawHost3, Port} = {[<<"localhost">>], <<"localhost">>, 8080}。

  往下一行又调用了 cowboy_http_protocol:parse_header/2 函数:

  parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);

  只不过,这次的参数变了,从参数我们可以理解,其实这是在解析Http的头部,也就是咱们之前HttpFox所看到的Http Headers,如下图:

  

  上面,我们已经解析出了 Host,接下来调用:

  parse_header(Req#http_req{ host=Host, raw_host=RawHost3, port=Port, headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);

  将解析下一行,知道解析完整个头部为止,我把 cowboy_http_protocol:header/3的所有重载都贴出来:

-spec header({http_header, integer(), cowboy_http:header(), any(), binary()}
    | http_eoh, #http_req{}, #state{}) -> ok.
header({http_header, _I, 'Host', _R, RawHost}, Req=#http_req{
        transport=Transport, host=undefined}, State) ->
    RawHost2 = cowboy_bstr:to_lower(RawHost),
    case catch cowboy_dispatcher:split_host(RawHost2) of
        {Host, RawHost3, undefined} ->
            Port = default_port(Transport:name()),
            parse_header(Req#http_req{
                host=Host, raw_host=RawHost3, port=Port,
                headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);
        {Host, RawHost3, Port} ->
            parse_header(Req#http_req{
                host=Host, raw_host=RawHost3, port=Port,
                headers=[{'Host', RawHost3}|Req#http_req.headers]}, State);
        {'EXIT', _Reason} ->
            error_terminate(400, State)
    end;
%% Ignore Host headers if we already have it.
header({http_header, _I, 'Host', _R, _V}, Req, State) ->
    parse_header(Req, State);
header({http_header, _I, 'Connection', _R, Connection},
        Req=#http_req{headers=Headers}, State=#state{
        req_keepalive=Keepalive, max_keepalive=MaxKeepalive})
        when Keepalive < MaxKeepalive ->
    Req2 = Req#http_req{headers=[{'Connection', Connection}|Headers]},
    {ConnTokens, Req3}
        = cowboy_http_req:parse_header('Connection', Req2),
    ConnAtom = cowboy_http:connection_to_atom(ConnTokens),
    parse_header(Req3#http_req{connection=ConnAtom}, State);
header({http_header, _I, Field, _R, Value}, Req, State) ->
    Field2 = format_header(Field),
    parse_header(Req#http_req{headers=[{Field2, Value}|Req#http_req.headers]},
        State);
%% The Host header is required in HTTP/1.1.
header(http_eoh, #http_req{version={1, 1}, host=undefined}, State) ->
    error_terminate(400, State);
%% It is however optional in HTTP/1.0.
header(http_eoh, Req=#http_req{version={1, 0}, transport=Transport,
        host=undefined}, State=#state{buffer=Buffer}) ->
    Port = default_port(Transport:name()),
    onrequest(Req#http_req{host=[], raw_host= <<>>,
        port=Port, buffer=Buffer}, State#state{buffer= <<>>});
header(http_eoh, Req, State=#state{buffer=Buffer}) ->
    onrequest(Req#http_req{buffer=Buffer}, State#state{buffer= <<>>});
header(_Any, _Req, State) ->
    error_terminate(400, State).

  我们从 HttpFox中可以看到,Headers最后一行的为 Connection,那么我们重点看下当处理到最后一行时,程序又如何往下走呢?

  好了,今天就到这里,下一篇,我们将解决上面留下的疑问,继续为大家分享Cowboy的代码,谢谢大家支持。