Socket connect 等简要分析

  1. connect 系统调用 分析

#include <sys/types.h> /* See NOTES */
#include <sys/socket.h>
int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen);

其中的参数解释如下:
·int sockfd :套接字描述符。
·const struct sockaddr*addr :要连接的地址。
·socklen_t addrlen :要连接的地址长度。
返回值 0 表示成功, -1 表示失败。

connect 的用途是使用指定的套接字去连接指定的地址。对于面向连接的协议(套接字类型为
SOCK_STREAM ), connect 只能成功一次(当然要如此,因为真正的连接已经建立了)。如果重复调
用 connect ,会返回 -1 表示失败,同时错误码为 EISCONN 。而对于非面向连接的协议(套接字类型为
SOCK_DGRAM ),则可以执行多次 connect (因为这时的 connect 仅仅是设置了默认的目的地址)。

对于 TCP 套接字来说, connect 实际上是要真正地进行三次握手,所以其默认是一个阻塞操作。那么
是否可以写一个非阻塞的 TCP connect 代码呢?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/*
 *  Attempt to connect to a socket with the server address.  The address
 *  is in user space so we verify it is OK and move it to kernel space.
 *
 *  For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 *  break bindings
 *
 *  NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 *  other SEQPACKET protocols that take time to connect() as it doesn't
 *  include the -EINPROGRESS status for such sockets.
 */
 
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
        int, addrlen)
{
    struct socket *sock;
    struct sockaddr_storage address;
    int err, fput_needed;
/* 通过文件描述符fd,找到对应的socket实例。
     * 以fd为索引从当前进程的文件描述符表files_struct实例中找到对应的file实例,
     * 然后从file实例的private_data成员中获取socket实例。
    */
    sock = sockfd_lookup_light(fd, &err, &fput_needed);
    if (!sock)
        goto out;
 /* 把套接字地址从用户空间拷贝到内核空间 */
    err = move_addr_to_kernel(uservaddr, addrlen, &address);
    if (err < 0)
        goto out_put;
 
    err =
        security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
    if (err)
        goto out_put;
/* 调用Socket层的操作函数,如果是SOCK_STREAM,则proto_ops为inet_stream_ops,
     * 函数指针指向inet_stream_connect()。
     */
    err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
                 sock->file->f_flags);
out_put:
    fput_light(sock->file, fput_needed);
out:
    return err;
}

 

1
2
3
4
5
6
7
8
9
10
int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
            int addr_len, int flags)
{
    int err;
 
    lock_sock(sock->sk);//进入互斥区
    err = __inet_stream_connect(sock, uaddr, addr_len, flags);
    release_sock(sock->sk);
    return err;
}

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 *  Connect to a remote host. There is regrettably still a little
 *  TCP 'magic' in here.
 */
int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
              int addr_len, int flags)
{
    struct sock *sk = sock->sk;
    int err;
    long timeo;
/*  长度合法性检查*/
    if (addr_len < sizeof(uaddr->sa_family))
        return -EINVAL;
 
    if (uaddr->sa_family == AF_UNSPEC) {/*  如果协议族为
AF_UNSPEC ,则先执行*/
        err = sk->sk_prot->disconnect(sk, flags);
/* 根据是否成功断开连接,来设置socket状态 */
        sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
        goto out;
    }
 
    switch (sock->state) {
    default:
        err = -EINVAL;
        goto out;
 /* 此套接口已经和对端的套接口相连接了,即连接已经建立 */
    case SS_CONNECTED:
        err = -EISCONN;/* Transport endpoint is already connected */
        goto out;
    case SS_CONNECTING:/*连接正在建立中 */
        err = -EALREADY;/* Operation already in progress */
        /* Fall out of switch with err, set for this state */
        break;
    case SS_UNCONNECTED:
        err = -EISCONN;
        if (sk->sk_state != TCP_CLOSE)
            goto out;
/* 如果使用的是TCP,则sk_prot为tcp_prot,connect为tcp_v4_connect() */
        err = sk->sk_prot->connect(sk, uaddr, addr_len);/* 发送SYN包 */
        if (err < 0)
            goto out;
/* 发出SYN包后socket状态设为正在连接 */
        sock->state = SS_CONNECTING;
 
        /* Just entered SS_CONNECTING state; the only
         * difference is that return value in non-blocking
         * case is EINPROGRESS, rather than EALREADY.
         */
        err = -EINPROGRESS;
        break;
    }
 /* sock的发送超时时间,非阻塞则为0 */
    timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
/* 发出SYN包后,等待后续握手的完成 */
    if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
        int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
                tcp_sk(sk)->fastopen_req &&
                tcp_sk(sk)->fastopen_req->data ? 1 : 0;
/* 如果是非阻塞的,那么就直接返回错误码-EINPROGRESS。
         * socket为阻塞时,使用inet_wait_for_connect()来等待协议栈的处理:
         * 1. 使用SO_SNDTIMEO,睡眠时间超过timeo就返回0,之后返回错误码-EINPROGRESS。
         * 2. 收到信号,就返回剩余的等待时间。之后会返回错误码-ERESTARTSYS或-EINTR。
         * 3. 三次握手成功,被sock I/O事件处理函数唤醒,之后会返回0。
         */
 
        /* Error code is set above */
        if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
            goto out;
 
        err = sock_intr_errno(timeo);
 /* 进程收到信号,如果err为-ERESTARTSYS,接下来库函数会重新调用connect() */
 
        if (signal_pending(current))
            goto out;
    }
 
    /* Connection was closed by RST, timeout, ICMP error
     * or another process disconnected us.
     */
    if (sk->sk_state == TCP_CLOSE)
        goto sock_error;
 
    /* sk->sk_err may be not zero now, if RECVERR was ordered by user
     * and error was received after socket entered established state.
     * Hence, it is handled normally after connect() return successfully.
     */
/* 更新socket状态为连接已建立 */
    sock->state = SS_CONNECTED;
    err = 0;
out:
    return err;
 
sock_error:
    err = sock_error(sk) ? : -ECONNABORTED;
    sock->state = SS_UNCONNECTED;
    if (sk->sk_prot->disconnect(sk, flags))
        sock->state = SS_DISCONNECTING;
    goto out;
}
EXPORT_SYMB

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
{
    DEFINE_WAIT(wait);
/* 把等待任务加入到socket的等待队列头部,把进程的状态设为TASK_INTERRUPTIBLE */
    prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
    sk->sk_write_pending += writebias;
 
    /* Basic assumption: if someone sets sk->sk_err, he _must_
     * change state of the socket from TCP_SYN_*.
     * Connect() does not allow to get error notifications
     * without closing the socket.
     */
/* 完成三次握手后,状态就会变为TCP_ESTABLISHED,从而退出循环 */
    while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
        release_sock(sk);
 /* 进入睡眠,直到超时或收到信号,或者被I/O事件处理函数唤醒。
         * 1. 如果是收到信号退出的,timeo为剩余的jiffies。
         * 2. 如果使用了SO_SNDTIMEO选项,超时退出后,timeo为0。
         * 3. 如果没有使用SO_SNDTIMEO选项,timeo为无穷大,即MAX_SCHEDULE_TIMEOUT,
         *      那么返回值也是这个,而超时时间不定。为了无限阻塞,需要上面的while循环。
         */
 
        timeo = schedule_timeout(timeo);
        lock_sock(sk);
/* 如果进程有待处理的信号,或者睡眠超时了,退出循环,之后会返回错误码 */
        if (signal_pending(current) || !timeo)
            break;
        prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
    }
 /* 等待结束时,把等待进程从等待队列中删除,把当前进程的状态设为TASK_RUNNING */
    finish_wait(sk_sleep(sk), &wait);
    sk->sk_write_pending -= writebias;
    return timeo;
}
/**/
进程的唤醒
  
 
三次握手中,当客户端收到SYNACK、发出ACK后,连接就成功建立了。
 
此时连接的状态从TCP_SYN_SENT或TCP_SYN_RECV变为TCP_ESTABLISHED,sock的状态发生变化,
 
会调用sock_def_wakeup()来处理连接状态变化事件,唤醒进程,connect()就能成功返回了。
 
sock_def_wakeup()的函数调用路径如下:
 
tcp_v4_rcv
 
tcp_v4_do_rcv
 
tcp_rcv_state_process
 
tcp_rcv_synsent_state_process
 
tcp_finish_connect
 
sock_def_wakeup
 
wake_up_interruptible_all
 
__wake_up

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct inet_connection_sock *icsk = inet_csk(sk);
 
    tcp_set_state(sk, TCP_ESTABLISHED);
 
    ------------------------
        ----------------------------  
     
    if (!sock_flag(sk, SOCK_DEAD)) {
        sk->sk_state_change(sk);---->// 指向sock_def_wakeup
/* 如果使用了异步通知,则发送SIGIO通知进程可写 */
        sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
    }
}   
 
static inline void sk_wake_async(struct sock *sk, int how, int band)
{
    if (sock_flag(sk, SOCK_FASYNC))
        sock_wake_async(sk->sk_socket, how, band);
}
 
static void sock_def_wakeup(struct sock *sk)
{
    struct socket_wq *wq;
 
    rcu_read_lock();
    wq = rcu_dereference(sk->sk_wq);
    if (wq_has_sleeper(wq))
        wake_up_interruptible_all(&wq->wait);
    rcu_read_unlock();
}
 
//最终调用__wake_up_common(),由于nr_exclusive为0,会把此socket上所有的等待进程都唤醒

 udp_prot 是 UDP 协议中所有自定义操作函数的集合。其 connect 的实现函数为 ip4_datagram_connect 。
其主要是设置了目的 IP 、端口和路由信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
    struct inet_sock *inet = inet_sk(sk);
    struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
    struct flowi4 *fl4;
    struct rtable *rt;
    __be32 saddr;
    int oif;
    int err;
 
 
    if (addr_len < sizeof(*usin))
        return -EINVAL;
 
    if (usin->sin_family != AF_INET)
        return -EAFNOSUPPORT;
    //复位路由高速缓冲区的入口地址
    sk_dst_reset(sk);
 
    lock_sock(sk);
    //和套接字绑定的网络设备索引号
 
    oif = sk->sk_bound_dev_if;
    saddr = inet->inet_saddr;
    //如果建立连接的地址是组传送地址,meiyou jiu 重新初始化oif和原地址
    if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
        if (!oif)
            oif = inet->mc_index;
        if (!saddr)
            saddr = inet->mc_addr;
    }
    fl4 = &inet->cork.fl.u.ip4;
    /*
    调用ip_route_connet寻找路由,
    源路由主要根据源地址、源端口、目的地址、目的端口、输出网络设备额索引号,
    如果寻找路由失败就返回错误,如果寻找的路由是广播地址路由就要是否路由在高速
    缓冲区的入口并返回错误。寻找路由成功就把套接字的状态变量sk_state设置为TCP_ESTABLISHED,
    并把路由保存到套接字的sk->sk_dst_cache数据域
    */
    rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
                  RT_CONN_FLAGS(sk), oif,
                  sk->sk_protocol,
                  inet->inet_sport, usin->sin_port, sk, true);
    if (IS_ERR(rt)) {
        err = PTR_ERR(rt);
        if (err == -ENETUNREACH)
            IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
        goto out;
    }
    //寻找的路由是广播地址路由,则释放该路由在路由缓冲区的入口
 
    if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
        ip_rt_put(rt);
        err = -EACCES;
        goto out;
    }
    if (!inet->inet_saddr)//从路由表中获取的信息更新udp的原地址
        inet->inet_saddr = fl4->saddr;    /* Update source address */
    if (!inet->inet_rcv_saddr) {
        inet->inet_rcv_saddr = fl4->saddr;
        if (sk->sk_prot->rehash)
            sk->sk_prot->rehash(sk);
    }//更新目的地址和目的端口,源端口已经给定了
    inet->inet_daddr = fl4->daddr;
    inet->inet_dport = usin->sin_port;
    sk->sk_state = TCP_ESTABLISHED;
    inet->inet_id = jiffies;
 
    sk_dst_set(sk, &rt->dst);
    err = 0;
out:
    release_sock(sk);
    return err;
}

 

posted @   codestacklinuxer  阅读(2933)  评论(0编辑  收藏  举报
编辑推荐:
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
阅读排行:
· 没有源码,如何修改代码逻辑?
· PowerShell开发游戏 · 打蜜蜂
· 在鹅厂做java开发是什么体验
· 百万级群聊的设计实践
· WPF到Web的无缝过渡:英雄联盟客户端的OpenSilver迁移实战
点击右上角即可分享
微信分享提示