HTTP/1.1 chunked 解码

0、简介

 

1、定义

RFC定义 https://tools.ietf.org/html/rfc2616#section-3.6.1

Chunked-Body   = *chunk
                 last-chunk
                 trailer
                 CRLF

chunk          = chunk-size [ chunk-extension ] CRLF
                 chunk-data CRLF
chunk-size     = 1*HEX
last-chunk     = 1*("0") [ chunk-extension ] CRLF

chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
chunk-ext-name = token
chunk-ext-val  = token | quoted-string
chunk-data     = chunk-size(OCTET)
trailer        = *(entity-header CRLF)

 

2.1、Entity Header Fields

https://tools.ietf.org/html/rfc2616#section-7.1

 

2、解析

 

 

解码伪代码 https://tools.ietf.org/html/rfc2616#section-19.4.6

 

length := 0  //body总长度初始化为0
read chunk-size, chunk-extension (if any) and CRLF  //读取第一行 获取 第一块 chunked 数据的大小(chunk扩展项)
while (chunk-size > 0) {
   read chunk-data and CRLF  //读取chunk-data, chunk-data 的长度为 chunk-size, 后面跟 \r\n 表示结束, chunk-size不包含\r\n
   append chunk-data to entity-body  //将chunk-data 追加到 实体body 中(解码后) 
   length := length + chunk-size  //body总长度更新
   read chunk-size and CRLF  //读取下一个 chunk头 获取chunk-size
}
//退出循环说明 chunk-size 为0, 即last-chunk, last-chunk后面可能会跟有trailer read entity-header //读取 entity-header while (entity-header not empty) { //读到空行,即整行内容只是\r\n这两个字节 append entity-header to existing header fields read entity-header } Content-Length := length Remove "chunked" from Transfer-Encoding

 

 

 

运行方式 ./a.out -u http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx -t target.jpg

#define _GNU_SOURCE /* for memmem */
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

/* chunked-encoding example URL
GET http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx
*/

void parse_paramters(int argc, char* argv[], char** url, char** target);
void parse_req_url(char* url, char** host, char** service, char** uri, char** target);

/* 
 * host can be domain-name or ip-address 
 * service can be well-known service name("http"/"ftp") or port number
 */
int connect_to_server(char* host, char* service);

void send_req_to_server(int fd, char* uri, char* host, char* service);

void recv_res_from_server(int fd, char* store_path);


int main(int argc, char* argv[])
{
    int sfd; /* socket file descriptor */
    char *url, *host, *service, *uri, *target = NULL;

    /* parse request */
    parse_paramters(argc, argv, &url, &target);
    parse_req_url(url, &host, &service, &uri, target ? NULL : &target);
    printf("Host   : [%s]\n", host);
    printf("Port   : [%s]\n", service);
    printf("Uri    : [%s]\n", uri);
    printf("Target : [%s]\n", target);

    /* create the connection to server */
    sfd = connect_to_server(host, service);

    /* send http req to server */
    send_req_to_server(sfd, uri, host, service);
    free(uri);
    free(host);
    free(service);

    /* get response from server */
    recv_res_from_server(sfd, target);
    free(target);

    /* cleanup */
    shutdown(sfd, SHUT_RDWR);
    close(sfd);

    exit(EXIT_SUCCESS);
}


void parse_paramters(int argc, char* argv[], char** url, char** target)
{
    int opt;

    if (!(url && target && (argc > 1) && argv)) {
        fprintf(stderr, "Usage: %s [-u url] [-t store_path]\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    while ((opt = getopt(argc, argv, "u:t:")) != -1) {
        switch (opt) {
        case 'u':
            *url = optarg;
            break;
        case 't':
            *target = strdup(optarg);
            break;
        default: /* '?' */
            fprintf(stderr, "Usage: %s [-u url] [-t store_path]\n", argv[0]);
            exit(EXIT_FAILURE);
        }
    }
}


void parse_req_url(char* url, char** host, char** service, char** uri, char** target)
{
    char* tmp;
    char* token;

    /* skip scheme */
    token = strstr(url, "://");
    if (token) {
        url = token + sizeof("://") - 1; 
    }

    /* find uri */
    token = strchr(url, '/');
    if (NULL == token) {
        *uri = strdup("/");
        if (target) {
            *target = strdup("index.html");
        }
    }
    else {
        *uri = strdup(token);
        *token = '\0';
        if (target) {
            token = strrchr(*uri, '/');
            if (token) {
                *target = strdup(token + 1);
            }
        }
    }
    
    /* find port */
    token = strchr(url, ':');
    if (token) {/* find port */
        *token = '\0';
    }
    else {
        *service = strdup("80");
    }
    *host = strdup(url);

}


int connect_to_server(char* host, char* service)
{
    int ret;
    int sfd;
    struct addrinfo hints;
    struct addrinfo *result, *rp;

    memset(&hints, 0, sizeof(struct addrinfo));
    hints.ai_family = AF_UNSPEC;     /* Allow IPv4 or IPv6 */
    hints.ai_socktype = SOCK_STREAM; /* stream socket */
    hints.ai_flags = AI_ADDRCONFIG; /* return the addr type same wtih the local system addr type */
    hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */
    ret = getaddrinfo(host, service, &hints, &result);
    if (ret != 0) {
        fprintf(stderr, "getaddrinfo() failed: %s\n", gai_strerror(ret));
        exit(EXIT_FAILURE);
    }
    
    /* getaddrinfo() returns a list of address structures.
        Try each address until we successfully connect(2).
        If socket(2) (or connect(2)) fails, we (close the socket and) try the next address. */
    for (rp = result; rp != NULL; rp = rp->ai_next) {
        sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
        if (sfd == -1)
            continue;
    
        if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1)
            break;                    /* Success */
    
        close(sfd);
    }
    
    if (rp == NULL) { /* No address succeeded */
        fprintf(stderr, "Could not connect to server %s:%s\n", host, service);
        exit(EXIT_FAILURE);
    }
    
    /* No longer needed */
    freeaddrinfo(result);

    return sfd;
}



void send_req_to_server(int fd, char* uri, char* host, char* service)
{
    int sn_len;
    char* send_buf;
    size_t buf_len;
    char req_header[] = "GET %s HTTP/1.1\r\n"
        "Host: %s\r\n"
        "Connection: close\r\n\r\n";

    buf_len = sizeof(req_header) + strlen(uri) + strlen(host) + 5;
    send_buf = (char*)calloc(buf_len, 1);
    sn_len = snprintf(send_buf, buf_len, req_header, uri, host);
    
    if (send(fd, send_buf, sn_len, 0)== -1) {
        perror("send() failed");
        exit(EXIT_FAILURE);
    }

    /* print req header */
    printf("\n%s", send_buf);
    
    free(send_buf);
}



typedef int (*DATA_PROC_CB)(void* data, ssize_t len);

typedef enum _srv_header_state {
    HEADER_STATE_LINE_START = 0,
    HEADER_STATE_LINE_DATA,
    HEADER_STATE_LINE_END, /* meet '\r' */
    HEADER_STATE_LINE_DONE,  /* meet '\n' */
    HEADER_STATE_HEAD_END,  /* meet '\r' */
    HEADER_STATE_HEAD_DONE  /* meet '\r' */
} srv_header_state_t;

typedef enum _srv_body_state {
    BODY_STATE_CHUNK_LINE_START = 0,
    BODY_STATE_CHUNK_LINE_DATA, /* the first chunk-body line  chunk-size [ chunk-extension ] CRLF  */
    BODY_STATE_CHUNK_LINE_END,  /* meet '\r' */
    BODY_STATE_CHUNK_LINE_DONE, /* meet '\n' */
    BODY_STATE_CHUNK_DATA_START,
    BODY_STATE_CHUNK_DATA_END,
    BODY_STATE_CHUNK_DATA_DONE
} srv_body_state_t;


typedef struct _srv_res {
    /* recv buf */
    unsigned char* buf_ptr;
    unsigned char* buf_start;
    size_t buf_len; /* total lenght */
    size_t buf_remain; /* unused lenght */

    /* buf proc */
    DATA_PROC_CB data_proc;
    DATA_PROC_CB res_header_proc;
    srv_header_state_t header_state;
    unsigned char* header_line_start;
    
    DATA_PROC_CB res_body_proc;
    srv_body_state_t body_state;
    unsigned char* body_chunk_start;
    
    FILE* store_file;
    /* body */
    int is_chunked_encoding;
    unsigned long chunked_size;
    unsigned long content_length;
} srv_res_t;

#define CHUNKED_ENCODING "Transfer-Encoding: chunked"
#define CONTETN_LENGTH   "Content-Length: "

int proc_res_header(void* data, ssize_t len)
{
    int i;
    int field_len;
    unsigned char ch;
    unsigned char* content_length;
    srv_res_t* res = (srv_res_t*)data;

    for (i = 0; i < len && res->header_state != HEADER_STATE_HEAD_DONE; i++) {
        ch = res->buf_ptr[i];
        switch (res->header_state) {
        case HEADER_STATE_LINE_START:
            res->header_state = HEADER_STATE_LINE_DATA;
            res->header_line_start = res->buf_ptr + i;
            break;
        case HEADER_STATE_LINE_DATA:
            if (ch == '\r') {
                res->header_state = HEADER_STATE_LINE_END;
            }
            break;
        case HEADER_STATE_LINE_END:
            if (ch == '\n') {
                res->header_state = HEADER_STATE_LINE_DONE;
                field_len = res->buf_ptr + i - res->header_line_start - 1;
                /* search Transfer-Encoding */
                if (!res->is_chunked_encoding && (field_len == sizeof(CHUNKED_ENCODING)-1) && \
                    !memcmp(res->header_line_start, CHUNKED_ENCODING, sizeof(CHUNKED_ENCODING)-1)) {
                    res->is_chunked_encoding = 1;
                }
                /* search Content-Length */
                if (!res->content_length) {
                    content_length = memmem(res->header_line_start, field_len, CONTETN_LENGTH, sizeof(CONTETN_LENGTH)-1);
                    if (content_length) {
                        res->content_length = strtoul(content_length + sizeof(CONTETN_LENGTH) - 1, NULL, 10);
                    }
                }
                    
                /* print header line */
                fprintf(stdout, "\033[45m"); /* color start */
                fwrite(res->header_line_start, 1, field_len, stdout);
                fprintf(stdout, "\033[0m\n"); /* color end */
            }
            else {
                fprintf(stderr, "invalid header found\n");
                exit(EXIT_FAILURE);
            }
            break;
        case HEADER_STATE_LINE_DONE:
            if (ch == '\r') {
                res->header_state = HEADER_STATE_HEAD_END;
            }
            else {
                res->header_state = HEADER_STATE_LINE_DATA;
                res->header_line_start = res->buf_ptr + i;
            }
            break;
        case HEADER_STATE_HEAD_END:
            if (ch == '\n') {
                res->header_state = HEADER_STATE_HEAD_DONE;
                fprintf(stdout, "\n\033[31m=== parse header done, chunked[%d] content-length[%lu] === \033[0m\n\n", \
                    res->is_chunked_encoding, res->content_length);
            }
        default:
            break;
        }
    }

    if (res->header_state == HEADER_STATE_HEAD_DONE) {
        res->data_proc = res->res_body_proc;
        if ((i + 1) < len) {/* found body data */
            res->buf_ptr += i;
            res->buf_remain -= len;
            return res->data_proc(res, len - i);
        }
    }
    else { /* header not finish */
        res->buf_remain -= len;
        if (res->buf_remain <= 0) {
            fprintf(stderr, "large header found\n");
            exit(EXIT_FAILURE);
        }
        res->buf_ptr += len;
    }

    return 0;
}

int proc_res_body(void* data, ssize_t len)
{
    int i;
    int data_left;
    unsigned char ch;
    srv_res_t* res = (srv_res_t*)data;

    /* not chunked encoding */
    if (!res->is_chunked_encoding) {
        fwrite(res->buf_ptr, 1, len, res->store_file);
        res->content_length -= len;
        if (res->content_length == 0) {/* get all body data */
            return 1;
        }
        return 0;
    }


    /* parse chunked-encoding */
    for (i = 0; i < len; i++) {
        ch = res->buf_ptr[i];
        switch (res->body_state) {
        case BODY_STATE_CHUNK_LINE_START:
            res->body_chunk_start = res->buf_ptr + i;
            res->body_state = BODY_STATE_CHUNK_LINE_DATA;
            break;
        case BODY_STATE_CHUNK_LINE_DATA:
            if (ch == '\r') {
                res->body_state = BODY_STATE_CHUNK_LINE_DONE;
            }
            break;
        case BODY_STATE_CHUNK_LINE_DONE:
            if (ch == '\n') {/* ignore chunk-extension */
                res->body_state = BODY_STATE_CHUNK_DATA_START;
                res->chunked_size = strtoul(res->body_chunk_start, NULL, 16);
                if (0 == res->chunked_size) { /* last chunk */
                    fprintf(stdout, "=== last-chunk found(total body size = %lu) ===\n", res->content_length);
                    /* ignore trailer */
                    return 1;
                }
                res->content_length += res->chunked_size;
                fprintf(stdout, "=== chunk-size %lu ===\n", res->chunked_size);
            }
            else {
                fprintf(stderr, "invalid chunk-body line found\n");
                exit(EXIT_FAILURE);
            }
            break;
        case BODY_STATE_CHUNK_DATA_START:
            data_left = len - i;
            if (data_left < res->chunked_size) {
                fwrite(res->buf_ptr + i, 1, data_left, res->store_file);
                res->chunked_size -= data_left;
                i  = len; /* end loop */
                /* reset buf */
                res->buf_ptr = res->buf_start;
                res->buf_remain = res->buf_len;
            }
            else {
                fwrite(res->buf_ptr + i, 1, res->chunked_size, res->store_file);
                i += res->chunked_size - 1;
                res->chunked_size = 0;
                res->body_state = BODY_STATE_CHUNK_DATA_END;
            }
            break;
        case BODY_STATE_CHUNK_DATA_END:
            if (ch == '\r') {
                res->body_state = BODY_STATE_CHUNK_DATA_DONE;
            }
            else {
                fprintf(stderr, "invalid chunk-body data found\n");
                exit(EXIT_FAILURE);
            }
            break;
        case BODY_STATE_CHUNK_DATA_DONE:
            if (ch == '\n') {
                res->body_state = BODY_STATE_CHUNK_LINE_START;
            }
            else {
                fprintf(stderr, "invalid chunk-body data found\n");
                exit(EXIT_FAILURE);
            }
            break;
        }
    }

    /* dont break the size line */
    if ((res->body_state == BODY_STATE_CHUNK_LINE_DATA) || \
        (res->body_state == BODY_STATE_CHUNK_LINE_DONE)) {
        res->buf_ptr += len;
        res->buf_remain -= len;
    }
}



void recv_res_from_server(int fd, char* store_path)
{
    ssize_t ret = 1;
    srv_res_t response;
    
    memset(&response, 0x00, sizeof(response));

    response.store_file = fopen(store_path, "wb");
    if (NULL == response.store_file) {
        perror("fopen() failed");
        exit(EXIT_SUCCESS);
    }

    response.buf_len = 0x1000; /* 4k */
    response.buf_ptr = calloc(1, 0x1000); /* alloc 4k memory */
    response.buf_remain = response.buf_len;
    response.buf_start = response.buf_ptr;
    response.res_header_proc = proc_res_header;
    response.res_body_proc = proc_res_body;
    response.data_proc = response.res_header_proc;

    while (ret > 0) {
        ret = recv(fd, response.buf_ptr, response.buf_remain, 0);
        if (ret > 0) {
            if (response.data_proc(&response, ret) == 1) { /* get all response */
                break;
            }
        }
    }

    if (ret == 0) {
        printf("server shutdown the connection\n");
    }
    else if (ret < 0) {
        perror("recv() failed");
        exit(EXIT_FAILURE);
    }

    /* cleanup */
    fclose(response.store_file);
    free(response.buf_start);

}

 

posted @ 2018-06-14 17:05  LubinLew  阅读(2246)  评论(0编辑  收藏  举报