CSAPP 07 Proxylab

proxy lab

phase 1 简单代理服务



  1. HTTP请求的格式

  1. 读10、11章,知道各种函数的作用


#include "csapp.h"

/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *proxy_hdr = "Proxy-Connection: close\r\n";

void doit(int client_fd);
void clienterror(int fd, char *cause, char *errnum, 
		 char *shortmsg, char *longmsg);
void parse_uri(char *uri, char *hostname, char *path, int *port);
void build_new_request(rio_t *rio_packet, char *new_request, char *hostname, char *port);

/* boot proxy as server get connfd from client*/
int main(int argc, char **argv) {
    int listenfd, connfd;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    // Check command line args
    if (argc != 2) {
		fprintf(stderr, "usage: %s <port>\n", argv[0]);

    listenfd = Open_listenfd(argv[1]);
    while (1) {
		clientlen = sizeof(clientaddr);
		connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); 
    	Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);

 * doit - handle one HTTP request/response transaction
void doit(int client_fd) {
    int real_server_fd;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    rio_t real_client, real_server;
    char hostname[MAXLINE], path[MAXLINE];
    int port;

    /* read request line(include method, uri, httpversion)
     * a request include line and head
    Rio_readinitb(&real_client, client_fd);
    if (!Rio_readlineb(&real_client, buf, MAXLINE))
    sscanf(buf, "%s %s %s", method, uri, version);
    if (strcasecmp(method, "GET")) {
        clienterror(client_fd, method, "501", "Not Implemented", "Tiny does not implement this method");

    // parse uri, link real server
    parse_uri(uri, hostname, path, &port);
    char port_str[10];
    sprintf(port_str, "%d", port);
    real_server_fd = open_clientfd(hostname, port_str);
    if(real_server_fd < 0){
        printf("connection failed\n");
    Rio_readinitb(&real_server, real_server_fd);

    // build new request
    char new_request[MAXLINE];
    sprintf(new_request, "GET %s HTTP/1.0\r\n", path);
    build_new_request(&real_client, new_request, hostname, port_str);

    // proxy as client sent message to real server
    Rio_writen(real_server_fd, new_request, strlen(new_request));

    // proxy as server respond real client
    int char_nums;
    while ((char_nums = Rio_readlineb(&real_server, buf, MAXLINE)) != 0) {
        Rio_writen(client_fd, buf, char_nums);

 * clienterror - returns an error message to the client
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) {
    char buf[MAXLINE], body[MAXBUF];

    // Build the HTTP response body 
    sprintf(body, "<html><title>Tiny Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Tiny Web server</em>\r\n", body);

    // Print the HTTP response
    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));

 * parse_uri - parse uri to get hostname, port, path from real client
 * eg uri : http://www.cmu.edu:8080/hub/index.html
void parse_uri(char *uri, char *hostname, char *path, int *port) {
    *port = 80;

    char* ptr_hostname = strstr(uri, "//");
    if (ptr_hostname) {
        /* have http://... */
        ptr_hostname += 2;
    } else {
        /* have not http://... */
        ptr_hostname = uri;

    char* ptr_port = strstr(ptr_hostname, ":");
    if (ptr_port) {
        /* have :8080 */
        *ptr_port = '\0';
        strncpy(hostname, ptr_hostname, MAXLINE);
        sscanf(ptr_port + 1, "%d%s", port, path);
    } else {
        /* have not :8080 */
        char* ptr_path = strstr(ptr_hostname, "/");
        if (ptr_path) {
            /* have /hub/index.html */
            *ptr_path = '\0';
            strncpy(hostname, ptr_hostname, MAXLINE);
            *ptr_path = '/';
            strncpy(path, ptr_path, MAXLINE);
        } else {
            /* have not /hub/index.html */
            strncpy(hostname, ptr_hostname, MAXLINE);
            strcpy(path, "");

 * build_new_request -  build new request
void build_new_request(rio_t *real_client, char *new_request, char *hostname, char *port){
    char temp_buf[MAXLINE];

    // get all old request head
    while (Rio_readlineb(real_client, temp_buf, MAXLINE) > 0) {
        if (strstr(temp_buf, "\r\n")) break;
        if (strstr(temp_buf, "Host:")) continue;
        if (strstr(temp_buf, "User-Agent:")) continue;
        if (strstr(temp_buf, "Connection:")) continue;
        if (strstr(temp_buf, "Proxy Connection:")) continue;
        sprintf(new_request, "%s%s", new_request, temp_buf);

    // build new request
    sprintf(new_request, "%sHost:%s:%s", new_request, hostname, port);
    sprintf(new_request, "%s%s%s%s", new_request, user_agent_hdr, conn_hdr, proxy_hdr);
    sprintf(new_request, "%s\r\n", new_request);


phase 2 并发



  1. 基于进程的并发
  2. 基于IO多路复用的并发
  3. 基于线程的并发(优化->预线程化技术):将连接到的客户端connfd放入资源库sbuf中,一个线程可以重复去取,相较于为每个客户端connfd都创建一个线程,这样做的性能提升比较好


# Makefile for Proxy Lab 
# You may modify this file any way you like (except for the handin
# rule). You instructor will type "make" on your specific Makefile to
# build your proxy from sources.

CC = gcc
CFLAGS = -g -Wall
LDFLAGS = -lpthread

all: proxy

csapp.o: csapp.c csapp.h
	$(CC) $(CFLAGS) -c csapp.c

subf.o: csapp.o
	$(CC) $(CFLAGS) -c sbuf.c

proxy.o: proxy.c csapp.h sbuf.h
	$(CC) $(CFLAGS) -c proxy.c

proxy: proxy.o csapp.o sbuf.o
	$(CC) $(CFLAGS) proxy.o csapp.o sbuf.o -o proxy $(LDFLAGS)

# Creates a tarball in ../proxylab-handin.tar that you can then
# hand in. DO NOT MODIFY THIS!
	(make clean; cd ..; tar cvf $(USER)-proxylab-handin.tar proxylab-handout --exclude tiny --exclude nop-server.py --exclude proxy --exclude driver.sh --exclude port-for-user.pl --exclude free-port.sh --exclude ".*")

	rm -f *~ *.o proxy core *.tar *.zip *.gzip *.bzip *.gz


#include "csapp.h"
#include "sbuf.h"
#define SBUFSIZE 16
#define NTHREADS 4

/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *proxy_hdr = "Proxy-Connection: close\r\n";

sbuf_t sbuf;

void *thread(void *vargp);
void doit(int client_fd);
void clienterror(int fd, char *cause, char *errnum, 
		 char *shortmsg, char *longmsg);
void parse_uri(char *uri, char *hostname, char *path, int *port);
void build_new_request(rio_t *rio_packet, char *new_request, char *hostname, char *port);

/* boot proxy as server get connfd from client*/
int main(int argc, char **argv) {
    int i, listenfd, connfd;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;
    pthread_t tid;

    // Check command line args
    if (argc != 2) {
		fprintf(stderr, "usage: %s <port>\n", argv[0]);

    listenfd = Open_listenfd(argv[1]);

    sbuf_init(&sbuf, SBUFSIZE);
    for (i = 0; i < NTHREADS; i++) {
        pthread_create(&tid, NULL, thread, NULL);

    while (1) {
		clientlen = sizeof(clientaddr);
		connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); 
    	Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);
        sbuf_insert(&sbuf, connfd);                                                                                   

 * thread - thread function
void *thread(void *vargp) {
    while (1) {
        int connfd = sbuf_remove(&sbuf);

 * doit - handle one HTTP request/response transaction
void doit(int client_fd) {
    int real_server_fd;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    rio_t real_client, real_server;
    char hostname[MAXLINE], path[MAXLINE];
    int port;

    /* read request line(include method, uri, httpversion)
     * a request include line and head
    Rio_readinitb(&real_client, client_fd);
    if (!Rio_readlineb(&real_client, buf, MAXLINE))
    sscanf(buf, "%s %s %s", method, uri, version);
    if (strcasecmp(method, "GET")) {
        clienterror(client_fd, method, "501", "Not Implemented", "Tiny does not implement this method");

    // parse uri, link real server
    parse_uri(uri, hostname, path, &port);
    char port_str[10];
    sprintf(port_str, "%d", port);
    real_server_fd = open_clientfd(hostname, port_str);
    if(real_server_fd < 0){
        printf("connection failed\n");
    Rio_readinitb(&real_server, real_server_fd);

    // build new request
    char new_request[MAXLINE];
    sprintf(new_request, "GET %s HTTP/1.0\r\n", path);
    build_new_request(&real_client, new_request, hostname, port_str);

    // proxy as client sent message to real server
    Rio_writen(real_server_fd, new_request, strlen(new_request));

    // proxy as server respond real client
    int char_nums;
    while ((char_nums = Rio_readlineb(&real_server, buf, MAXLINE)) != 0) {
        Rio_writen(client_fd, buf, char_nums);

 * clienterror - returns an error message to the client
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) {
    char buf[MAXLINE], body[MAXBUF];

    // Build the HTTP response body 
    sprintf(body, "<html><title>Tiny Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Tiny Web server</em>\r\n", body);

    // Print the HTTP response
    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));

 * parse_uri - parse uri to get hostname, port, path from real client
 * eg uri : http://www.cmu.edu:8080/hub/index.html
void parse_uri(char *uri, char *hostname, char *path, int *port) {
    *port = 80;

    char* ptr_hostname = strstr(uri, "//");
    if (ptr_hostname) {
        /* have http://... */
        ptr_hostname += 2;
    } else {
        /* have not http://... */
        ptr_hostname = uri;

    char* ptr_port = strstr(ptr_hostname, ":");
    if (ptr_port) {
        /* have :8080 */
        *ptr_port = '\0';
        strncpy(hostname, ptr_hostname, MAXLINE);
        sscanf(ptr_port + 1, "%d%s", port, path);
    } else {
        /* have not :8080 */
        char* ptr_path = strstr(ptr_hostname, "/");
        if (ptr_path) {
            /* have /hub/index.html */
            *ptr_path = '\0';
            strncpy(hostname, ptr_hostname, MAXLINE);
            *ptr_path = '/';
            strncpy(path, ptr_path, MAXLINE);
        } else {
            /* have not /hub/index.html */
            strncpy(hostname, ptr_hostname, MAXLINE);
            strcpy(path, "");

 * build_new_request -  build new request
void build_new_request(rio_t *real_client, char *new_request, char *hostname, char *port){
    char temp_buf[MAXLINE];

    // get all old request head
    while (Rio_readlineb(real_client, temp_buf, MAXLINE) > 0) {
        if (strstr(temp_buf, "\r\n")) break;
        if (strstr(temp_buf, "Host:")) continue;
        if (strstr(temp_buf, "User-Agent:")) continue;
        if (strstr(temp_buf, "Connection:")) continue;
        if (strstr(temp_buf, "Proxy Connection:")) continue;
        sprintf(new_request, "%s%s", new_request, temp_buf);

    // build new request
    sprintf(new_request, "%sHost:%s:%s", new_request, hostname, port);
    sprintf(new_request, "%s%s%s%s", new_request, user_agent_hdr, conn_hdr, proxy_hdr);
    sprintf(new_request, "%s\r\n", new_request);


phase 3 cache服务


  1. 读者写者模型
  2. LRU cache的实现


#include "csapp.h"

/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* numbers of object from a cache */

/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *proxy_hdr = "Proxy-Connection: close\r\n";

/* struction of one object(also one cache block) */
typedef struct {
    char *url;
    char *content;
    int cnt; /* LRU: the count of use */
    int is_used; /* equals 0 => obj can't be used; equals 1 => obj can be used */

/* Global varibles */
static object *cache;
static int readcnt; /* count of reader */
static sem_t readcnt_mutex, writer_mutex; /* and the mutex that pretects it */

/* helper function */
void doit(int client_fd);
void clienterror(int fd, char *cause, char *errnum, 
		 char *shortmsg, char *longmsg);
void parse_uri(char *uri, char *hostname, char *path, int *port);
void print_and_build_hdr(rio_t *rio_packet, char *new_request, char *hostname, char *port);
void *thread(void *varge_ptr);
void init_cache(void);
static void init_mutex(void);
int reader(int fd, char* url);
void writer(char* buf, char* url);

/* boot proxy as server get connfd from client*/
int main(int argc, char **argv) 
    int listenfd, *connfd_ptr;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;
    pthread_t tid;

    /* Check command line args */
    if (argc != 2) {
		fprintf(stderr, "usage: %s <port>\n", argv[0]);

    listenfd = Open_listenfd(argv[1]);
    while (1) {
		clientlen = sizeof(clientaddr);
        connfd_ptr = Malloc(sizeof(int)); /* alloc memory of each thread to avoid race */
		*connfd_ptr = Accept(listenfd, (SA *)&clientaddr, &clientlen); 
    	Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, 
                    port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);
		Pthread_create(&tid, NULL, thread, connfd_ptr);                                                                                    
    return 0;

 * Thread routine
void *thread(void *varge_ptr){
    int connfd = *((int *)varge_ptr);

 * doit - handle one HTTP request/response transaction
void doit(int client_fd) 
    int real_server_fd;
    char buf[MAXLINE], method[MAXLINE], url[MAXLINE], version[MAXLINE];
    char uri[MAXLINE], obj_buf[MAXLINE];
    rio_t real_client, real_server;
    char hostname[MAXLINE], path[MAXLINE];
    int port;

    /* Read request line and headers from real client */
    Rio_readinitb(&real_client, client_fd);
    if (!Rio_readlineb(&real_client, buf, MAXLINE))  	 
    sscanf(buf, "%s %s %s", method, uri, version);
    strcpy(url, uri);       
    if (strcasecmp(method, "GET")) {                     
        clienterror(client_fd, method, "501", "Not Implemented",
                    "Tiny does not implement this method");

    /* if object of request from cache */
    if(reader(client_fd, url)){
        fprintf(stdout, "%s from cache\n", url);

    /* perpare for parse uri and build new request */
    parse_uri(uri, hostname, path, &port);
    char port_str[0];
    sprintf(port_str, "%d", port); /* port from int convert to char */
    real_server_fd = Open_clientfd(hostname, port_str);  /* real server get fd from proxy(as client) */
	if(real_server_fd < 0){
        printf("connection failed\n");
    Rio_readinitb(&real_server, real_server_fd);
    char new_request[MAXLINE];
    sprintf(new_request, "GET %s HTTP/1.0\r\n", path);
    print_and_build_hdr(&real_client, new_request, hostname, port_str);

    /* proxy as client sent to web server */
    Rio_writen(real_server_fd, new_request, strlen(new_request));
    /* then proxy as server respond to real client */
    int char_nums;
    int obj_size = 0;
    while((char_nums = Rio_readlineb(&real_server, buf, MAXLINE))){
        Rio_writen(client_fd, buf, char_nums);

         /* perpare for write object to cache */
         if(obj_size + char_nums < MAX_OBJECT_SIZE){
            strcpy(obj_buf + obj_size, buf);
            obj_size += char_nums;

    if(obj_size < MAX_OBJECT_SIZE)
        writer(obj_buf, url);


 * clienterror - returns an error message to the client
void clienterror(int fd, char *cause, char *errnum, 
		 char *shortmsg, char *longmsg) 
    char buf[MAXLINE], body[MAXBUF];

    /* Build the HTTP response body */
    sprintf(body, "<html><title>Tiny Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Tiny Web server</em>\r\n", body);

    /* Print the HTTP response */
    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));

 * parse_uri - parse uri to get hostname, port, path from real client
void parse_uri(char *uri, char *hostname, char *path, int *port) {
    *port = 80; /* default port */
    char* ptr_hostname = strstr(uri,"//");
    /* normal uri => http://hostname:port/path */
    /* eg. uri => http://www.cmu.edu:8080/hub/index.html */
    if (ptr_hostname) 
        /* hostname_eg1. uri => http://hostname... */
        ptr_hostname += 2; 
        /* hostname_eg2. uri => hostname... <= NOT "http://"*/
        ptr_hostname = uri; 
    char* ptr_port = strstr(ptr_hostname, ":"); 
    /* port_eg1. uri => ...hostname:port... */
    if (ptr_port) {
        *ptr_port = '\0'; /* c-style: the end of string(hostname) is '\0' */
        strncpy(hostname, ptr_hostname, MAXLINE);

        /* change default port to current port */
        /* if path not char, sscanf will automatically store the ""(null) in the path */
        sscanf(ptr_port + 1,"%d%s", port, path); 
    /* port_eg1. uri => ...hostname... <= NOT ":port"*/
    else {
        char* ptr_path = strstr(ptr_hostname,"/");
        /* path_eg1. uri => .../path */
        if (ptr_path) {
            *ptr_path = '\0';
            strncpy(hostname, ptr_hostname, MAXLINE);
            *ptr_path = '/';
            strncpy(path, ptr_path, MAXLINE);
        /* path_eg2. uri => ... <= NOT "/path"*/
        strncpy(hostname, ptr_hostname, MAXLINE);

 * print_and_build_hdr - print old request_hdr then build and print new request_hdr
void print_and_build_hdr(rio_t *real_client, char *new_request, char *hostname, char *port){
    char temp_buf[MAXLINE];

    /* print old request_hdr */
    while(Rio_readlineb(real_client, temp_buf, MAXLINE) > 0){
        if (strstr(temp_buf, "\r\n")) break; /* read to end */

        /* if all old request_hdr had been read, we print it */
        if (strstr(temp_buf, "Host:")) continue;
        if (strstr(temp_buf, "User-Agent:")) continue;
        if (strstr(temp_buf, "Connection:")) continue;
        if (strstr(temp_buf, "Proxy Connection:")) continue;

        sprintf(new_request, "%s%s", new_request, temp_buf);

    /* build and print new request_hdr */
    sprintf(new_request, "%sHost: %s:%s\r\n", new_request, hostname, port);
    sprintf(new_request, "%s%s%s%s", new_request, user_agent_hdr, conn_hdr, proxy_hdr);
    sprintf(new_request,"%s\r\n", new_request);

 * initialize the cache
void init_cache(void){
    /* cache is a Array of object*/
    cache = (object*)Malloc(MAX_CACHE_SIZE);
    for(int i = 0; i < 10; i++){
        cache[i].url = (char*)Malloc(sizeof(char) * MAXLINE);
        cache[i].content = (char*)Malloc(sizeof(char) * MAX_OBJECT_SIZE);
        (cache[i].cnt) = 0;
        (cache[i].is_used) = 0;

 * initialize the mutex
static void init_mutex(void){
    Sem_init(&readcnt_mutex, 0, 1);
    Sem_init(&writer_mutex, 0, 1);

 * reader - read from cache to real client
int reader(int fd, char* url){
        int from_cache = 0; /* equals 0 => obj not from cache; equals 1 => obj from cache */

        if(readcnt == 1) /* First in */

        /* obj from cache then we should write content to fd of real client */
        for(int i = 0; i < NUMBERS_OBJECT; i++){
            if(cache[i].is_used == 1&& (strcmp(url, cache[i].url) == 0)){
                from_cache = 1;
                Rio_writen(fd, cache[i].content, MAX_OBJECT_SIZE);

        if(readcnt == 0) /* last out */

        return from_cache;        

 * writer - write from real server to cache
void writer(char* buf, char* url){
        int min_cnt = (cache[0].cnt);
        int insert_or_evict_i;


        /* LRU: find the empty obj to insert or the obj of min cnt to evict */
        for(int i = 0; i < NUMBERS_OBJECT; i++){
            if((cache[i].is_used) == 0){ /* insert */
                insert_or_evict_i = i;
            if((cache[i].cnt) < min_cnt){ /* evict */
                insert_or_evict_i = i;
                min_cnt = (cache[i].cnt);
        strcpy(cache[insert_or_evict_i].url, url);
        strcpy(cache[insert_or_evict_i].content, buf);
        (cache[insert_or_evict_i].cnt) = 0;
        (cache[insert_or_evict_i].is_used) = 1;



