网络设备

网络设备状态控制

内核模块和用户空间模块获取网络设备的信息状态主要方式有

netdev_chain 通知链 netlink 的RTMGRP_LINK 组播

通知链的事件:

 

网络设备的启用:

ifconfig up cli 最后调用ioctl的SIOCSIFFLAGS 来激活设备。而SIOCSIFFLAGS通过dev_change_flags 调用dev_open来激活设备 ;同时会发送一个NETDEV_UP的消息到网络设备状态改变到通知链上去

网卡链路状态检测

当网卡链路状态变化时(如断开或连上),网卡会通知驱动程序或者由驱动程序去查询
网卡的相关寄存器位(例如在 timeout 时去查询这些位),然后由 netif_carrier_on/off 去通知
内核这个变化。

void netif_carrier_on(struct net_device *dev)
{ // test_and_clear_bit - Clear a bit and return its old value
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
}
static inline netif_carrier_off(struct net_device *dev)
{
//test_and_set_bit - Set a bit and return its old value
if (!test_and_set_bit(_ _LINK_STATE_NOCARRIER, &dev->state))
linkwatch_fire_event(dev);
}
这样, netif_carrier_on 会 提交事件给linkwatch_fire_event,它会创建一个 lw_event 结构:
struct lw_event {
struct list_head list;
struct net_device *dev;一个关联网络设备的结构
};
并将这个结构初始化后(event->dev = dev;)加入到事件队列中:
spin_lock_irqsave(&lweventlist_lock, flags);
list_add_tail(&event->list, &lweventlist);
spin_unlock_irqrestore(&lweventlist_lock, flags);
然 后 它 调 用 schedule_work(&linkwatch_work) 由 内 核 线 程 去 处 理 这 些 事 件 。 它 最 终 由
linkwatch_run_queue(void)去完成这些处理工作:
list_for_each_safe(n, next, &head) {
struct lw_event *event = list_entry(n, struct lw_event, list);
struct net_device *dev = event->dev;

if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev)) {
dev_activate(dev);
} else
dev_deactivate(dev);
netdev_state_change(dev);
}
}
可以看到,它的最主要工作之一就是 netdev_state_change(dev)
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
raw_notifier_call_chain(&netdev_chain,
NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
这个函数通知注册到 netdev_chain 链表的所有子系统,这个网卡的链路状态有了变化。就是
说,如果某个子系统对网卡的链路状态变化感兴趣,它就可以注册到进这个链表,在变化产
生时,内核便会通知这些子系统。
注意: a. 它只会在网卡状态为 UP 时,才会发出通知,因为,如果状态为 DOWN,网卡链
路的状态改变也没什么意义。
b. 每个见网卡的这些状态变化的事件 lw_event 是不会队列的,即每个网卡只有一个
事件的实例在队列中。还有由上面看到的 lw_event 结构,它只是包含发生状态变化的网卡
设备,而没有包含它是链上或是断开的状状参数。

ifconfig ethtool iproute 工具概要

 应用层对套接口有关操作通过ioctl处理:

socket 为一个字符设备驱动,应用层的ioctl操作会调用驱动的file_operations 对应的ioctl接口;

 

对于socket来说就是socket_file_ops 接口集合的sock_ioctl

static const struct file_operations socket_file_ops = {
    .owner =    THIS_MODULE,
    .llseek =    no_llseek,
    .aio_read =    sock_aio_read,
    .aio_write =    sock_aio_write,
    .poll =        sock_poll,
    .unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
    .compat_ioctl = compat_sock_ioctl,
#endif
    .mmap =        sock_mmap,
    .open =        sock_no_open,    /* special open code to disallow open via /proc */
    .release =    sock_close, //Ó¦ÓóÌÐòclose
    .fasync =    sock_fasync,
    .sendpage =    sock_sendpage,
    .splice_write = generic_splice_sendpage,
    .splice_read =    sock_splice_read,
};
View Code

 //主要是和网络物理设备接口相关,例如设置eth0地址,创建eth2 删除   设置路由 ARP等等

//主要是和网络物理设备接口相关,例如设置eth0地址,创建eth2 删除   设置路由 ARP等等
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
    struct socket *sock;
    struct sock *sk;
    void __user *argp = (void __user *)arg;
    int pid, err;
    struct net *net;

    sock = file->private_data;
    sk = sock->sk;
    net = sock_net(sk);
    if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { //DEV设备iotcl命令字范围
        err = dev_ioctl(net, cmd, argp);
    } else
#ifdef CONFIG_WEXT_CORE
    if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
        err = dev_ioctl(net, cmd, argp);
    } else
#endif
        switch (cmd) {
        /* 设置file或者sock的进程ID或者进程组ID */
        case FIOSETOWN: 
        case SIOCSPGRP:
            err = -EFAULT;
            if (get_user(pid, (int __user *)argp))
                break;
            err = f_setown(sock->file, pid, 1);
            break;

        /* 获取file或者sock的进程ID或者进程组ID */
        case FIOGETOWN:
        case SIOCGPGRP:
            err = put_user(f_getown(sock->file),
                       (int __user *)argp);
            break;

        /* 设置 修改 创建 删除网桥设备 */
        case SIOCGIFBR:
        case SIOCSIFBR:
        case SIOCBRADDBR:
        case SIOCBRDELBR:
            err = -ENOPKG;
            if (!br_ioctl_hook)
                request_module("bridge");

            mutex_lock(&br_ioctl_mutex);
            if (br_ioctl_hook)
                err = br_ioctl_hook(net, cmd, argp);
            mutex_unlock(&br_ioctl_mutex);
            break;

        /* 设置 修改 创建 删除VLAN设备 */
        case SIOCGIFVLAN:
        case SIOCSIFVLAN:
            err = -ENOPKG;
            if (!vlan_ioctl_hook)
                request_module("8021q");

            mutex_lock(&vlan_ioctl_mutex);
            if (vlan_ioctl_hook)
                err = vlan_ioctl_hook(net, argp);
            mutex_unlock(&vlan_ioctl_mutex);
            break;
            
        case SIOCADDDLCI:
        case SIOCDELDLCI:
            err = -ENOPKG;
            if (!dlci_ioctl_hook)
                request_module("dlci");

            mutex_lock(&dlci_ioctl_mutex);
            if (dlci_ioctl_hook)
                err = dlci_ioctl_hook(cmd, argp);
            mutex_unlock(&dlci_ioctl_mutex);
            break;

        /* 其他ioctl命令字调用各自的sock ioctl   调用到inet_ioctl()。*/
        default:
            err = sock_do_ioctl(net, sock, cmd, arg);
            break;
        }
    return err;
}
View Code

 如果是tcp或者udp协议相关的

在创建socket 时,会调用inet_create();

/*
 *    Create an inet socket.
 */
//pf_inet的net_families[]为inet_family_ops,对应的套接口层ops参考inetsw_array中的inet_stream_ops inet_dgram_ops inet_sockraw_ops,传输层操作集分别为tcp_prot udp_prot raw_prot
//netlink的net_families[]netlink_family_ops,对应的套接口层ops为netlink_op

 

/*
 *    Create an inet socket.
 */

static int inet_create(struct net *net, struct socket *sock, int protocol,
               int kern)
{.
    ...........................................
        sock->ops = answer->ops; 对应的为
    answer_prot = answer->prot;
    answer_no_check = answer->no_check;
    answer_flags = answer->flags;        

...........................
}
static const struct proto_ops inet_dccp_ops = {
    .family           = PF_INET,
    .owner           = THIS_MODULE,
    .release       = inet_release,
    .bind           = inet_bind,
    .connect       = inet_stream_connect,
    .socketpair       = sock_no_socketpair,
    .accept           = inet_accept,
    .getname       = inet_getname,
    /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
    .poll           = dccp_poll,
    .ioctl           = inet_ioctl,

 inet_ioctl的实现

int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
    struct sock *sk = sock->sk;
    int err = 0;
    struct net *net = sock_net(sk);

    switch (cmd) {
    case SIOCGSTAMP:
        err = sock_get_timestamp(sk, (struct timeval __user *)arg);
        break;
    case SIOCGSTAMPNS:
        err = sock_get_timestampns(sk, (struct timespec __user *)arg);
        break;
        /* 添加 删除 路由操作 */
    case SIOCADDRT:
    case SIOCDELRT:
    case SIOCRTMSG:
        err = ip_rt_ioctl(net, cmd, (void __user *)arg);
        break;

        //ARP添加 删除 设置
    case SIOCDARP:
    case SIOCGARP:
    case SIOCSARP:
        err = arp_ioctl(net, cmd, (void __user *)arg);
        break;

        /* DEV设备接口操作 */
    case SIOCGIFADDR:
    case SIOCSIFADDR:
    case SIOCGIFBRDADDR:
    case SIOCSIFBRDADDR:
    case SIOCGIFNETMASK:
    case SIOCSIFNETMASK:
    case SIOCGIFDSTADDR:
    case SIOCSIFDSTADDR:
    case SIOCSIFPFLAGS:
    case SIOCGIFPFLAGS:
    case SIOCSIFFLAGS:
        err = devinet_ioctl(net, cmd, (void __user *)arg);
        break;
        
    default://对具体的某个协议的套接口ioctl操作
        if (sk->sk_prot->ioctl)
            err = sk->sk_prot->ioctl(sk, cmd, arg);
        else
            err = -ENOIOCTLCMD;
        break;
    }
    return err;
}
View Code
static long sock_do_ioctl(struct net *net, struct socket *sock,
                 unsigned int cmd, unsigned long arg)
{
    int err;
    void __user *argp = (void __user *)arg;

    err = sock->ops->ioctl(sock, cmd, arg);

    /*
     * If this ioctl is unknown try to hand it down
     * to the NIC driver.
     */
    if (err == -ENOIOCTLCMD)
        err = dev_ioctl(net, cmd, argp);

    return err;
}
View Code

 

dev_ioctl的实现

/**
 *    dev_load     - load a network module
 *    @net: the applicable net namespace
 *    @name: name of interface
 *
 *    If a network interface is not present and the process has suitable
 *    privileges this function loads the module. If module loading is not
 *    available in this kernel then it becomes a nop.
 */

void dev_load(struct net *net, const char *name)
{
    struct net_device *dev;
    int no_module;

    rcu_read_lock();
    dev = dev_get_by_name_rcu(net, name);
    rcu_read_unlock();

    no_module = !dev;
    if (no_module && capable(CAP_NET_ADMIN))
        no_module = request_module("netdev-%s", name);
    if (no_module && capable(CAP_SYS_MODULE))
        request_module("%s", name);
}
EXPORT_SYMBOL(dev_load);

/*
 *    This function handles all "interface"-type I/O control requests. The actual
 *    'doing' part of this is dev_ifsioc above.
 */

/**
 *    dev_ioctl    -    network device ioctl
 *    @net: the applicable net namespace
 *    @cmd: command to issue
 *    @arg: pointer to a struct ifreq in user space
 *
 *    Issue ioctl functions to devices. This is normally called by the
 *    user space syscall interfaces but can sometimes be useful for
 *    other purposes. The return value is the return from the syscall if
 *    positive or a negative errno code on error.
 */

int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
    struct ifreq ifr;
    int ret;
    char *colon;

    /* One special case: SIOCGIFCONF takes ifconf argument
       and requires shared lock, because it sleeps writing
       to user space.
     */

    if (cmd == SIOCGIFCONF) {
        rtnl_lock();
        ret = dev_ifconf(net, (char __user *) arg);
        rtnl_unlock();
        return ret;
    }
    if (cmd == SIOCGIFNAME)
        return dev_ifname(net, (struct ifreq __user *)arg);

    if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
        return -EFAULT;

    ifr.ifr_name[IFNAMSIZ-1] = 0;

    colon = strchr(ifr.ifr_name, ':');
    if (colon)
        *colon = 0;

    /*
     *    See which interface the caller is talking about.
     */

    switch (cmd) {
    /*
     *    These ioctl calls:
     *    - can be done by all.
     *    - atomic and do not require locking.
     *    - return a value
     */
    case SIOCGIFFLAGS:
    case SIOCGIFMETRIC:
    case SIOCGIFMTU:
    case SIOCGIFHWADDR:
    case SIOCGIFSLAVE:
    case SIOCGIFMAP:
    case SIOCGIFINDEX:
    case SIOCGIFTXQLEN:
        dev_load(net, ifr.ifr_name);
        rcu_read_lock();
        ret = dev_ifsioc_locked(net, &ifr, cmd);
        rcu_read_unlock();
        if (!ret) {
            if (colon)
                *colon = ':';
            if (copy_to_user(arg, &ifr,
                     sizeof(struct ifreq)))
                ret = -EFAULT;
        }
        return ret;

    case SIOCETHTOOL:
        dev_load(net, ifr.ifr_name);
        rtnl_lock();
        ret = dev_ethtool(net, &ifr);
        rtnl_unlock();
        if (!ret) {
            if (colon)
                *colon = ':';
            if (copy_to_user(arg, &ifr,
                     sizeof(struct ifreq)))
                ret = -EFAULT;
        }
        return ret;

    /*
     *    These ioctl calls:
     *    - require superuser power.
     *    - require strict serialization.
     *    - return a value
     */
    case SIOCGMIIPHY:
    case SIOCGMIIREG:
    case SIOCSIFNAME:
        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
            return -EPERM;
        dev_load(net, ifr.ifr_name);
        rtnl_lock();
        ret = dev_ifsioc(net, &ifr, cmd);
        rtnl_unlock();
        if (!ret) {
            if (colon)
                *colon = ':';
            if (copy_to_user(arg, &ifr,
                     sizeof(struct ifreq)))
                ret = -EFAULT;
        }
        return ret;

    /*
     *    These ioctl calls:
     *    - require superuser power.
     *    - require strict serialization.
     *    - do not return a value
     */
    case SIOCSIFMAP:
    case SIOCSIFTXQLEN:
        if (!capable(CAP_NET_ADMIN))
            return -EPERM;
        /* fall through */
    /*
     *    These ioctl calls:
     *    - require local superuser power.
     *    - require strict serialization.
     *    - do not return a value
     */
    case SIOCSIFFLAGS:
    case SIOCSIFMETRIC:
    case SIOCSIFMTU:
    case SIOCSIFHWADDR:
    case SIOCSIFSLAVE:
    case SIOCADDMULTI:
    case SIOCDELMULTI:
    case SIOCSIFHWBROADCAST:
    case SIOCSMIIREG:
    case SIOCBONDENSLAVE:
    case SIOCBONDRELEASE:
    case SIOCBONDSETHWADDR:
    case SIOCBONDCHANGEACTIVE:
    case SIOCBRADDIF:
    case SIOCBRDELIF:
    case SIOCSHWTSTAMP:
        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
            return -EPERM;
        /* fall through */
    case SIOCBONDSLAVEINFOQUERY:
    case SIOCBONDINFOQUERY:
        dev_load(net, ifr.ifr_name);
        rtnl_lock();
        ret = dev_ifsioc(net, &ifr, cmd);
        rtnl_unlock();
        return ret;

    case SIOCGIFMEM:
        /* Get the per device memory space. We can add this but
         * currently do not support it */
    case SIOCSIFMEM:
        /* Set the per device memory buffer space.
         * Not applicable in our case */
    case SIOCSIFLINK:
        return -ENOTTY;

    /*
     *    Unknown or private ioctl.
     */
    default:
        if (cmd == SIOCWANDEV ||
            cmd == SIOCGHWTSTAMP ||
            (cmd >= SIOCDEVPRIVATE &&
             cmd <= SIOCDEVPRIVATE + 15)) {
            dev_load(net, ifr.ifr_name);
            rtnl_lock();
            ret = dev_ifsioc(net, &ifr, cmd);
            rtnl_unlock();
            if (!ret && copy_to_user(arg, &ifr,
                         sizeof(struct ifreq)))
                ret = -EFAULT;
            return ret;
        }
        /* Take care of Wireless Extensions */
        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
            return wext_handle_ioctl(net, &ifr, cmd, arg);
        return -ENOTTY;
    }
}
View Code

 netlink实现接口的信息的控制

 其中内核有的netlink 消息如下

/****
 *        Routing/neighbour discovery messages.
 ****/

/* Types of messages */

enum {
    RTM_BASE    = 16,
#define RTM_BASE    RTM_BASE

    RTM_NEWLINK    = 16,
#define RTM_NEWLINK    RTM_NEWLINK
    RTM_DELLINK,
#define RTM_DELLINK    RTM_DELLINK
    RTM_GETLINK,
#define RTM_GETLINK    RTM_GETLINK
    RTM_SETLINK,
#define RTM_SETLINK    RTM_SETLINK

    RTM_NEWADDR    = 20,
#define RTM_NEWADDR    RTM_NEWADDR
    RTM_DELADDR,
#define RTM_DELADDR    RTM_DELADDR
    RTM_GETADDR,
#define RTM_GETADDR    RTM_GETADDR

    RTM_NEWROUTE    = 24,
#define RTM_NEWROUTE    RTM_NEWROUTE
    RTM_DELROUTE,
#define RTM_DELROUTE    RTM_DELROUTE
    RTM_GETROUTE,
#define RTM_GETROUTE    RTM_GETROUTE

    RTM_NEWNEIGH    = 28,
#define RTM_NEWNEIGH    RTM_NEWNEIGH
    RTM_DELNEIGH,
#define RTM_DELNEIGH    RTM_DELNEIGH
    RTM_GETNEIGH,
#define RTM_GETNEIGH    RTM_GETNEIGH

    RTM_NEWRULE    = 32,
#define RTM_NEWRULE    RTM_NEWRULE
    RTM_DELRULE,
#define RTM_DELRULE    RTM_DELRULE
    RTM_GETRULE,
#define RTM_GETRULE    RTM_GETRULE

    RTM_NEWQDISC    = 36,
#define RTM_NEWQDISC    RTM_NEWQDISC
    RTM_DELQDISC,
#define RTM_DELQDISC    RTM_DELQDISC
    RTM_GETQDISC,
#define RTM_GETQDISC    RTM_GETQDISC

    RTM_NEWTCLASS    = 40,
#define RTM_NEWTCLASS    RTM_NEWTCLASS
    RTM_DELTCLASS,
#define RTM_DELTCLASS    RTM_DELTCLASS
    RTM_GETTCLASS,
#define RTM_GETTCLASS    RTM_GETTCLASS

    RTM_NEWTFILTER    = 44,
#define RTM_NEWTFILTER    RTM_NEWTFILTER
    RTM_DELTFILTER,
#define RTM_DELTFILTER    RTM_DELTFILTER
    RTM_GETTFILTER,
#define RTM_GETTFILTER    RTM_GETTFILTER

    RTM_NEWACTION    = 48,
#define RTM_NEWACTION   RTM_NEWACTION
    RTM_DELACTION,
#define RTM_DELACTION   RTM_DELACTION
    RTM_GETACTION,
#define RTM_GETACTION   RTM_GETACTION

    RTM_NEWPREFIX    = 52,
#define RTM_NEWPREFIX    RTM_NEWPREFIX

    RTM_GETMULTICAST = 58,
#define RTM_GETMULTICAST RTM_GETMULTICAST

    RTM_GETANYCAST    = 62,
#define RTM_GETANYCAST    RTM_GETANYCAST

    RTM_NEWNEIGHTBL    = 64,
#define RTM_NEWNEIGHTBL    RTM_NEWNEIGHTBL
    RTM_GETNEIGHTBL    = 66,
#define RTM_GETNEIGHTBL    RTM_GETNEIGHTBL
    RTM_SETNEIGHTBL,
#define RTM_SETNEIGHTBL    RTM_SETNEIGHTBL

    RTM_NEWNDUSEROPT = 68,
#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT

    RTM_NEWADDRLABEL = 72,
#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
    RTM_DELADDRLABEL,
#define RTM_DELADDRLABEL RTM_DELADDRLABEL
    RTM_GETADDRLABEL,
#define RTM_GETADDRLABEL RTM_GETADDRLABEL

    RTM_GETDCB = 78,
#define RTM_GETDCB RTM_GETDCB
    RTM_SETDCB,
#define RTM_SETDCB RTM_SETDCB

    RTM_NEWNETCONF = 80,
#define RTM_NEWNETCONF RTM_NEWNETCONF
    RTM_GETNETCONF = 82,
#define RTM_GETNETCONF RTM_GETNETCONF

    RTM_NEWMDB = 84,
#define RTM_NEWMDB RTM_NEWMDB
    RTM_DELMDB = 85,
#define RTM_DELMDB RTM_DELMDB
    RTM_GETMDB = 86,
#define RTM_GETMDB RTM_GETMDB

    RTM_NEWNSID = 88,
#define RTM_NEWNSID RTM_NEWNSID
    RTM_DELNSID = 89,
#define RTM_DELNSID RTM_DELNSID
    RTM_GETNSID = 90,
#define RTM_GETNSID RTM_GETNSID

    RTM_NEWSTATS = 92,
#define RTM_NEWSTATS RTM_NEWSTATS
    RTM_GETSTATS = 94,
#define RTM_GETSTATS RTM_GETSTATS

    __RTM_MAX,
#define RTM_MAX        (((__RTM_MAX + 3) & ~3) - 1)
};

#define RTM_NR_MSGTYPES    (RTM_MAX + 1 - RTM_BASE)
#define RTM_NR_FAMILIES    (RTM_NR_MSGTYPES >> 2)
#define RTM_FAM(cmd)    (((cmd) - RTM_BASE) >>
View Code

 其中 RTM_NEWLINK-->.newlink    = vlan_newlink,--->创建接口vlanif

其netlink详细见af_netlink

netlink_proto_init

 

posted @ 2019-05-11 17:06  codestacklinuxer  阅读(895)  评论(0编辑  收藏  举报