HTTP/1.1 chunked 解码
0、简介
1、定义
RFC定义 https://tools.ietf.org/html/rfc2616#section-3.6.1
Chunked-Body = *chunk last-chunk trailer CRLF chunk = chunk-size [ chunk-extension ] CRLF chunk-data CRLF chunk-size = 1*HEX last-chunk = 1*("0") [ chunk-extension ] CRLF chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) chunk-ext-name = token chunk-ext-val = token | quoted-string chunk-data = chunk-size(OCTET) trailer = *(entity-header CRLF)
2.1、Entity Header Fields
https://tools.ietf.org/html/rfc2616#section-7.1
2、解析
解码伪代码 https://tools.ietf.org/html/rfc2616#section-19.4.6
length := 0 //body总长度初始化为0 read chunk-size, chunk-extension (if any) and CRLF //读取第一行 获取 第一块 chunked 数据的大小(chunk扩展项) while (chunk-size > 0) { read chunk-data and CRLF //读取chunk-data, chunk-data 的长度为 chunk-size, 后面跟 \r\n 表示结束, chunk-size不包含\r\n append chunk-data to entity-body //将chunk-data 追加到 实体body 中(解码后) length := length + chunk-size //body总长度更新 read chunk-size and CRLF //读取下一个 chunk头 获取chunk-size }
//退出循环说明 chunk-size 为0, 即last-chunk, last-chunk后面可能会跟有trailer read entity-header //读取 entity-header while (entity-header not empty) { //读到空行,即整行内容只是\r\n这两个字节 append entity-header to existing header fields read entity-header } Content-Length := length Remove "chunked" from Transfer-Encoding
运行方式 ./a.out -u http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx -t target.jpg
#define _GNU_SOURCE /* for memmem */ #include <sys/types.h> #include <sys/socket.h> #include <arpa/inet.h> #include <netinet/in.h> #include <netdb.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <string.h> /* chunked-encoding example URL GET http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx */ void parse_paramters(int argc, char* argv[], char** url, char** target); void parse_req_url(char* url, char** host, char** service, char** uri, char** target); /* * host can be domain-name or ip-address * service can be well-known service name("http"/"ftp") or port number */ int connect_to_server(char* host, char* service); void send_req_to_server(int fd, char* uri, char* host, char* service); void recv_res_from_server(int fd, char* store_path); int main(int argc, char* argv[]) { int sfd; /* socket file descriptor */ char *url, *host, *service, *uri, *target = NULL; /* parse request */ parse_paramters(argc, argv, &url, &target); parse_req_url(url, &host, &service, &uri, target ? NULL : &target); printf("Host : [%s]\n", host); printf("Port : [%s]\n", service); printf("Uri : [%s]\n", uri); printf("Target : [%s]\n", target); /* create the connection to server */ sfd = connect_to_server(host, service); /* send http req to server */ send_req_to_server(sfd, uri, host, service); free(uri); free(host); free(service); /* get response from server */ recv_res_from_server(sfd, target); free(target); /* cleanup */ shutdown(sfd, SHUT_RDWR); close(sfd); exit(EXIT_SUCCESS); } void parse_paramters(int argc, char* argv[], char** url, char** target) { int opt; if (!(url && target && (argc > 1) && argv)) { fprintf(stderr, "Usage: %s [-u url] [-t store_path]\n", argv[0]); exit(EXIT_FAILURE); } while ((opt = getopt(argc, argv, "u:t:")) != -1) { switch (opt) { case 'u': *url = optarg; break; case 't': *target = strdup(optarg); break; default: /* '?' */ fprintf(stderr, "Usage: %s [-u url] [-t store_path]\n", argv[0]); exit(EXIT_FAILURE); } } } void parse_req_url(char* url, char** host, char** service, char** uri, char** target) { char* tmp; char* token; /* skip scheme */ token = strstr(url, "://"); if (token) { url = token + sizeof("://") - 1; } /* find uri */ token = strchr(url, '/'); if (NULL == token) { *uri = strdup("/"); if (target) { *target = strdup("index.html"); } } else { *uri = strdup(token); *token = '\0'; if (target) { token = strrchr(*uri, '/'); if (token) { *target = strdup(token + 1); } } } /* find port */ token = strchr(url, ':'); if (token) {/* find port */ *token = '\0'; } else { *service = strdup("80"); } *host = strdup(url); } int connect_to_server(char* host, char* service) { int ret; int sfd; struct addrinfo hints; struct addrinfo *result, *rp; memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = SOCK_STREAM; /* stream socket */ hints.ai_flags = AI_ADDRCONFIG; /* return the addr type same wtih the local system addr type */ hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ ret = getaddrinfo(host, service, &hints, &result); if (ret != 0) { fprintf(stderr, "getaddrinfo() failed: %s\n", gai_strerror(ret)); exit(EXIT_FAILURE); } /* getaddrinfo() returns a list of address structures. Try each address until we successfully connect(2). If socket(2) (or connect(2)) fails, we (close the socket and) try the next address. */ for (rp = result; rp != NULL; rp = rp->ai_next) { sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); if (sfd == -1) continue; if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1) break; /* Success */ close(sfd); } if (rp == NULL) { /* No address succeeded */ fprintf(stderr, "Could not connect to server %s:%s\n", host, service); exit(EXIT_FAILURE); } /* No longer needed */ freeaddrinfo(result); return sfd; } void send_req_to_server(int fd, char* uri, char* host, char* service) { int sn_len; char* send_buf; size_t buf_len; char req_header[] = "GET %s HTTP/1.1\r\n" "Host: %s\r\n" "Connection: close\r\n\r\n"; buf_len = sizeof(req_header) + strlen(uri) + strlen(host) + 5; send_buf = (char*)calloc(buf_len, 1); sn_len = snprintf(send_buf, buf_len, req_header, uri, host); if (send(fd, send_buf, sn_len, 0)== -1) { perror("send() failed"); exit(EXIT_FAILURE); } /* print req header */ printf("\n%s", send_buf); free(send_buf); } typedef int (*DATA_PROC_CB)(void* data, ssize_t len); typedef enum _srv_header_state { HEADER_STATE_LINE_START = 0, HEADER_STATE_LINE_DATA, HEADER_STATE_LINE_END, /* meet '\r' */ HEADER_STATE_LINE_DONE, /* meet '\n' */ HEADER_STATE_HEAD_END, /* meet '\r' */ HEADER_STATE_HEAD_DONE /* meet '\r' */ } srv_header_state_t; typedef enum _srv_body_state { BODY_STATE_CHUNK_LINE_START = 0, BODY_STATE_CHUNK_LINE_DATA, /* the first chunk-body line chunk-size [ chunk-extension ] CRLF */ BODY_STATE_CHUNK_LINE_END, /* meet '\r' */ BODY_STATE_CHUNK_LINE_DONE, /* meet '\n' */ BODY_STATE_CHUNK_DATA_START, BODY_STATE_CHUNK_DATA_END, BODY_STATE_CHUNK_DATA_DONE } srv_body_state_t; typedef struct _srv_res { /* recv buf */ unsigned char* buf_ptr; unsigned char* buf_start; size_t buf_len; /* total lenght */ size_t buf_remain; /* unused lenght */ /* buf proc */ DATA_PROC_CB data_proc; DATA_PROC_CB res_header_proc; srv_header_state_t header_state; unsigned char* header_line_start; DATA_PROC_CB res_body_proc; srv_body_state_t body_state; unsigned char* body_chunk_start; FILE* store_file; /* body */ int is_chunked_encoding; unsigned long chunked_size; unsigned long content_length; } srv_res_t; #define CHUNKED_ENCODING "Transfer-Encoding: chunked" #define CONTETN_LENGTH "Content-Length: " int proc_res_header(void* data, ssize_t len) { int i; int field_len; unsigned char ch; unsigned char* content_length; srv_res_t* res = (srv_res_t*)data; for (i = 0; i < len && res->header_state != HEADER_STATE_HEAD_DONE; i++) { ch = res->buf_ptr[i]; switch (res->header_state) { case HEADER_STATE_LINE_START: res->header_state = HEADER_STATE_LINE_DATA; res->header_line_start = res->buf_ptr + i; break; case HEADER_STATE_LINE_DATA: if (ch == '\r') { res->header_state = HEADER_STATE_LINE_END; } break; case HEADER_STATE_LINE_END: if (ch == '\n') { res->header_state = HEADER_STATE_LINE_DONE; field_len = res->buf_ptr + i - res->header_line_start - 1; /* search Transfer-Encoding */ if (!res->is_chunked_encoding && (field_len == sizeof(CHUNKED_ENCODING)-1) && \ !memcmp(res->header_line_start, CHUNKED_ENCODING, sizeof(CHUNKED_ENCODING)-1)) { res->is_chunked_encoding = 1; } /* search Content-Length */ if (!res->content_length) { content_length = memmem(res->header_line_start, field_len, CONTETN_LENGTH, sizeof(CONTETN_LENGTH)-1); if (content_length) { res->content_length = strtoul(content_length + sizeof(CONTETN_LENGTH) - 1, NULL, 10); } } /* print header line */ fprintf(stdout, "\033[45m"); /* color start */ fwrite(res->header_line_start, 1, field_len, stdout); fprintf(stdout, "\033[0m\n"); /* color end */ } else { fprintf(stderr, "invalid header found\n"); exit(EXIT_FAILURE); } break; case HEADER_STATE_LINE_DONE: if (ch == '\r') { res->header_state = HEADER_STATE_HEAD_END; } else { res->header_state = HEADER_STATE_LINE_DATA; res->header_line_start = res->buf_ptr + i; } break; case HEADER_STATE_HEAD_END: if (ch == '\n') { res->header_state = HEADER_STATE_HEAD_DONE; fprintf(stdout, "\n\033[31m=== parse header done, chunked[%d] content-length[%lu] === \033[0m\n\n", \ res->is_chunked_encoding, res->content_length); } default: break; } } if (res->header_state == HEADER_STATE_HEAD_DONE) { res->data_proc = res->res_body_proc; if ((i + 1) < len) {/* found body data */ res->buf_ptr += i; res->buf_remain -= len; return res->data_proc(res, len - i); } } else { /* header not finish */ res->buf_remain -= len; if (res->buf_remain <= 0) { fprintf(stderr, "large header found\n"); exit(EXIT_FAILURE); } res->buf_ptr += len; } return 0; } int proc_res_body(void* data, ssize_t len) { int i; int data_left; unsigned char ch; srv_res_t* res = (srv_res_t*)data; /* not chunked encoding */ if (!res->is_chunked_encoding) { fwrite(res->buf_ptr, 1, len, res->store_file); res->content_length -= len; if (res->content_length == 0) {/* get all body data */ return 1; } return 0; } /* parse chunked-encoding */ for (i = 0; i < len; i++) { ch = res->buf_ptr[i]; switch (res->body_state) { case BODY_STATE_CHUNK_LINE_START: res->body_chunk_start = res->buf_ptr + i; res->body_state = BODY_STATE_CHUNK_LINE_DATA; break; case BODY_STATE_CHUNK_LINE_DATA: if (ch == '\r') { res->body_state = BODY_STATE_CHUNK_LINE_DONE; } break; case BODY_STATE_CHUNK_LINE_DONE: if (ch == '\n') {/* ignore chunk-extension */ res->body_state = BODY_STATE_CHUNK_DATA_START; res->chunked_size = strtoul(res->body_chunk_start, NULL, 16); if (0 == res->chunked_size) { /* last chunk */ fprintf(stdout, "=== last-chunk found(total body size = %lu) ===\n", res->content_length); /* ignore trailer */ return 1; } res->content_length += res->chunked_size; fprintf(stdout, "=== chunk-size %lu ===\n", res->chunked_size); } else { fprintf(stderr, "invalid chunk-body line found\n"); exit(EXIT_FAILURE); } break; case BODY_STATE_CHUNK_DATA_START: data_left = len - i; if (data_left < res->chunked_size) { fwrite(res->buf_ptr + i, 1, data_left, res->store_file); res->chunked_size -= data_left; i = len; /* end loop */ /* reset buf */ res->buf_ptr = res->buf_start; res->buf_remain = res->buf_len; } else { fwrite(res->buf_ptr + i, 1, res->chunked_size, res->store_file); i += res->chunked_size - 1; res->chunked_size = 0; res->body_state = BODY_STATE_CHUNK_DATA_END; } break; case BODY_STATE_CHUNK_DATA_END: if (ch == '\r') { res->body_state = BODY_STATE_CHUNK_DATA_DONE; } else { fprintf(stderr, "invalid chunk-body data found\n"); exit(EXIT_FAILURE); } break; case BODY_STATE_CHUNK_DATA_DONE: if (ch == '\n') { res->body_state = BODY_STATE_CHUNK_LINE_START; } else { fprintf(stderr, "invalid chunk-body data found\n"); exit(EXIT_FAILURE); } break; } } /* dont break the size line */ if ((res->body_state == BODY_STATE_CHUNK_LINE_DATA) || \ (res->body_state == BODY_STATE_CHUNK_LINE_DONE)) { res->buf_ptr += len; res->buf_remain -= len; } } void recv_res_from_server(int fd, char* store_path) { ssize_t ret = 1; srv_res_t response; memset(&response, 0x00, sizeof(response)); response.store_file = fopen(store_path, "wb"); if (NULL == response.store_file) { perror("fopen() failed"); exit(EXIT_SUCCESS); } response.buf_len = 0x1000; /* 4k */ response.buf_ptr = calloc(1, 0x1000); /* alloc 4k memory */ response.buf_remain = response.buf_len; response.buf_start = response.buf_ptr; response.res_header_proc = proc_res_header; response.res_body_proc = proc_res_body; response.data_proc = response.res_header_proc; while (ret > 0) { ret = recv(fd, response.buf_ptr, response.buf_remain, 0); if (ret > 0) { if (response.data_proc(&response, ret) == 1) { /* get all response */ break; } } } if (ret == 0) { printf("server shutdown the connection\n"); } else if (ret < 0) { perror("recv() failed"); exit(EXIT_FAILURE); } /* cleanup */ fclose(response.store_file); free(response.buf_start); }