setsockopt的TCP层实现剖析
应用层
NAME
setsockopt - set options on sockets
SYNOPSIS
#include <sys/types.h>
#include <sys/socket.h>
int setsockopt (int s, int level, int optname, const void *optval, socklen_t optlen);
EXAMPLE
自定义一个TCP层Socket选项:TCP_MAX_CWND。
int one = 1;
setsockopt(sockfd, SOL_TCP, TCP_MAX_CWND, &one, sizeof(one));
来看一下通用的TCP层Socket选项:
@netinet/tcp.h:
/* User-settable options (used with setsockopt). */ #define TCP_NODELAY 1 /* Don't delay send to coalesce packets */ #define TCP_MAXSEG 2 /* Set Maximum segment size */ #define TCP_CORK 3 /* Control sending of partial frames */ #define TCP_KEEPIDLE 4 /* Start keepalives after this period */ #define TCP_KEEPINTVL 5 /* Interval between keepalives */ #define TCP_KEEPCNT 6 /* Number of keepalives before death */ #define TCP_SYNCNT 7 /* Number of SYN retransmits */ #define TCP_LINGER2 8 /* Life time of orphaned FIN_WAIT2 state */ #define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ #define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ #define TCP_INFO 11 /* 注意:这个选项不能用于设置,只用于读取。Information about this connection */ #define TCP_QUICKACK 12 /* Bock/reenable quick ACKs */
Linux除了支持以上通用的TCP层Socket选项,还支持一些它特有的选项(较新的版本中又多了一些:)
@linux/tcp.h:
/* TCP soket options */ #define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ #define TCP_MAXSEG 2 /* Limit MSS */ #define TCP_CORK 3 /* Never send partially complete segments */ #define TCP_KEEPIDLE 4 /* Start keepalives after this period */ #define TCP_KEEPINTVL 5 /* Interval between keepalives */ #define TCP_KEEPCNT 6 /* Number of keepalives before death */ #define TCP_SYNCNT 7 /* Number of SYN retransmits */ #define TCP_LINGER2 8 /* Life time of orphaned FIN_WAIT2 state */ #define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ #define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ #define TCP_INFO 11 /* Information about this connection. */ #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams */ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
函数关系
函数调用关系图如下:
数据结构
struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, ... .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, ... };
函数实现
int tcp_setsockopt (struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); }
static int do_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int val; int err = 0; /* These are data/string values, all the others are ints */ switch (optname) { /* 指定连接要使用的TCP拥塞控制算法 */ case TCP_CONGESTION: { char name[TCP_CA_NAME_MAX]; if (optlen < 1) return -EINVAL; val = strncpy_from_user(name, optval, min_t(long, TCP_CA_NAME_MAX - 1, optlen)); /*说明名字不宜超过15字节*/ if (val < 0) return -EFAULT; name[val] = 0; lock_sock(sk); err = tcp_set_congestion_control(sk, name); release_sock(sk); return err; } case TCP_COOKIE_TRANSACTIONS: { ... } default: break; /* fallthru */ } if (optlen < sizeof(int)) return -EINVAL; /* -22, Invalid argument */ if (get_user(val, (int __user *) optval)) /* 获取用户空间的数据 */ return -EFAULT; /* -14, Bad address */ lock_sock(sk); switch(optname) { case TCP_MAXSEG: /* Values greater than interface MTU won't take effect. However at the point * when this call is done we typically don't yet know which interface is going to be used */ if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { /* 必须在88 ~ 32767之间 */ err = -EINVAL; break; } tp->rx_opt.user_mss = val; /*以后本端和对端的MSS都不会大于这个值了 */ break; ... case TCP_WINDOW_CLAMP: if (! val) { if (sk->sk_state != TCP_CLOSE) { err = -EINVAL; break; } tp->window_clamp = 0; /* tp->window_clamp: Maximal window to advertise */ } else /* 最小的通告窗口:(2048 + sizeof(struct sk_buff)) / 2) */ tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? SOCK_MIN_RCVBUF/2 : val; break; ... case TCP_THIN_DUPACK: if (val < 0 || val > 1) err = -EINVAL; else tp->thin_dupack = val; break; case TCP_MAX_CWND: //自定义选项 if (val < 0) err = -EINVAL; else tp->snd_cwnd_clamp = val; /* change max value of snd_cwnd */ break; ... default: err = -ENOPROTOOPT; /* -92, 协议无此选项 */ } release_sock(sk); return err; }
Author
zhangskd @ csdn blog