kernel网络协议栈(1) 初始化

今天打算开始写linux内核网络协议栈相关的文章.
不能保证一定会更新完毕,有时间有兴趣的时候,会写一写。
2019.4.26 / fluray / 内核版本 5.0.9
原创文章,转载请著名出处。
 
首先要知道c语言实现的最基本的TCP C/S的模型/代码。
当我们在 Linux 下使用下列函数 创建套节字的时候,发生了什么?
int socket(int af, int type, int protocol);
我们看socket这个syscall的调用栈:
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
    return __sys_socket(family, type, protocol);
}
//__sys_socket
sock_create(family, type, protocol, &sock);
    __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
//读 net_families[family]
        pf = rcu_dereference(net_families[family]);
        ...
        pf->create(net, sock, protocol, kern);
            inet_create
//在inet_create中有下列代码
                if (sk->sk_prot->init) {
                    err = sk->sk_prot->init(sk);
                        tcp_v4_init_sock
这里有2个问题(实质都是何时被初始化的问题)
1.pf = rcu_dereference(net_families[family])中的net_families数组何时被初始化?
换句话说 为什么pf->create 是 inet_create ?
2.sk->sk_prot->init 为何是 tcp_v4_init_sock

首先了解一下linux kernel初始化时,tcp初始化相关的部分:
static int __init inet_init(void)
{
    struct inet_protosw *q;
    ...
    rc = proto_register(&tcp_prot, 1);//1
    ...
    rc = proto_register(&udp_prot, 1);
    ...
    (void)sock_register(&inet_family_ops);//2

    if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
        pr_crit("%s: Cannot add ICMP protocol\n", __func__);
    if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
        pr_crit("%s: Cannot add UDP protocol\n", __func__);
    if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)//3
        pr_crit("%s: Cannot add TCP protocol\n", __func__);
    ...
    /* Register the socket-side information for inet_create. */
    for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)//4
        INIT_LIST_HEAD(r);

    for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)//5
        inet_register_protosw(q); //这个函数初始化了 inetsw
    ...
}
这里面有几个关键的结构体,我做了标号,我们先看一下,知道大体上有那些成员即可
第一个:
struct proto tcp_prot = {
    .name            = "TCP",
    .owner            = THIS_MODULE,
    .close            = tcp_close,
    .pre_connect    = tcp_v4_pre_connect,
    .connect        = tcp_v4_connect,
    .disconnect        = tcp_disconnect,
    .accept            = inet_csk_accept,
    .ioctl            = tcp_ioctl,
    .init            = tcp_v4_init_sock,//初始化函数,在哪里被调用呢?
    .destroy        = tcp_v4_destroy_sock,
    .shutdown        = tcp_shutdown,
    .setsockopt        = tcp_setsockopt,
    .getsockopt        = tcp_getsockopt,
    .keepalive        = tcp_set_keepalive,
    .recvmsg        = tcp_recvmsg,
    .sendmsg        = tcp_sendmsg,
    .sendpage        = tcp_sendpage,
    ...
};
EXPORT_SYMBOL(tcp_prot);
第二个:
static const struct net_proto_family inet_family_ops = {
    .family = PF_INET,
    .create = inet_create,//注意这个函数
    .owner    = THIS_MODULE,
};
//这个对象在注册的时候,做了什么?
(void)sock_register(&inet_family_ops);
//写入到 net_families[family]中
        rcu_assign_pointer(net_families[ops->family], ops);

第三个:
static struct net_protocol tcp_protocol = {
    ...
    .handler    =    tcp_v4_rcv,
    ...
};
第五个:
static struct inet_protosw inetsw_array[] =
{
    {
        .type =       SOCK_STREAM,
        .protocol =   IPPROTO_TCP,
        .prot =       &tcp_prot,//注意这个函数
        .ops =        &inet_stream_ops,
        .flags =      INET_PROTOSW_PERMANENT |
                  INET_PROTOSW_ICSK,
    },

    {
        .type =       SOCK_DGRAM,
        .protocol =   IPPROTO_UDP,
        .prot =       &udp_prot,
        .ops =        &inet_dgram_ops,
        .flags =      INET_PROTOSW_PERMANENT,
       },
    ...
}
那么第四个 struct list_head inetsw[SOCK_MAX];
这个数组何时被写入有效数据?
inet_register_protosw 这个函数根据 inetsw_array 中的信息来初始化inetsw。

写入有效数据以后,以后在用的时候,就可以这么用
    struct inet_protosw *answer;
    ...
    rcu_read_lock();
    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
        if (protocol == answer->protocol) {
            ...
        }
        ...
    }
    ...
    rcu_read_unlock();

//注:rcu_read_lock rcu_read_unlock 语义上可以认为是读写锁中的读锁
问题1的解答:
// inet_create 在哪注册的?
static const struct net_proto_family inet_family_ops = {
    .family = PF_INET,
    .create = inet_create,
    .owner    = THIS_MODULE,
};
inet_init
    (void)sock_register(&inet_family_ops);
//写 net_families[family]
        rcu_assign_pointer(net_families[ops->family], ops);

问题二的解答:

static int inet_create(struct net *net, struct socket *sock, int protocol,
               int kern)
{
    struct sock *sk;
    struct inet_protosw *answer;
    struct inet_sock *inet;
    struct proto *answer_prot;
    ...
lookup_protocol:
    err = -ESOCKTNOSUPPORT;
    rcu_read_lock();
//这里遍历了 inetsw 这个数组,根据protocol查找
    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

        err = 0;
        /* Check the non-wild match. */
        if (protocol == answer->protocol) {
            if (protocol != IPPROTO_IP)
                break;
    ...
//sock_>ops的初始化
//对于tcp来说就是  inet_stream_ops
    sock->ops = answer->ops;  
    answer_prot = answer->prot;
    ...
//sk->sk_prot的初始化 
//对于tcp来说是 tcp_prot
    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
        sk->sk_prot = sk->sk_prot_creator = prot;

/*
static struct inet_protosw inetsw_array[] =
{
    {
        .type =       SOCK_STREAM,
        .protocol =   IPPROTO_TCP,
        .prot =       &tcp_prot,//注意这个函数
        .ops =        &inet_stream_ops,
        .flags =      INET_PROTOSW_PERMANENT |
                  INET_PROTOSW_ICSK,
    },
*/
    if (sk->sk_prot->init) {
//那么这里就是 tcp_v4_init_sock
        err = sk->sk_prot->init(sk);
    ...
    }
}

这里要记住:

socket持有的是 ops / inet_stream_ops
sock持有的是 sk_prot / tcp_prot
以后的文章中会用到。

 

posted on 2019-04-26 12:31  fluray  阅读(249)  评论(0编辑  收藏  举报

导航