ARP 实现

ARP 实现

现在我们介绍一下arp的实现,内核版本2.6.24.
[数据结构]
协议栈通过ARP协议获取到的网络上邻居主机的IP地址与MAC地址的对应关 系都会保存在这个表中,以备下次与邻居通讯时使用,
同时,ARP模块自身也会提供一套相应的机制来更新和维护这个邻居表.
struct neigh_table
{
    struct neigh_table      *next; //下一个邻居表
    int                     family; //协议簇
    int                     entry_size;//入口长度,也就是一个邻居结构的大小,初始化为sizeof(neighbour)+4 (4为一个IP地址的长度)
    int                     key_len;//哈希关键值长度 即IP地址的长度,为4
    //哈希值的计数函数(哈希值是经对应设备net_device与目的Ip计算出来的)
    __u32                   (*hash)(const void *pkey, const struct net_device *);
    //邻居初始化函数
    int                     (*constructor)(struct neighbour *);
    int                     (*pconstructor)(struct pneigh_entry *);
    void                    (*pdestructor)(struct pneigh_entry *);
    void                    (*proxy_redo)(struct sk_buff *skb);
    char                    *id;//邻居表的名称
    //系统中每个网络设备接口对应链表中一个节点,表示该设备接口上的邻居的一些传输参数。同时,链表中还有一个缺省的项。
    struct neigh_parms      parms;
    /* HACK. gc_* shoul follow parms without a gap! */
    //常规垃圾回收的时候使用
    int                     gc_interval;
    int                     gc_thresh1;
    //第二个阀值,如果邻居超过此值,当创建新的邻居时若超过五秒没有刷新,则必须立即刷新,强制垃圾回收
    int                     gc_thresh2;
    int                     gc_thresh3;//允许邻居的上限,超过将无法创建邻居项
    unsigned long           last_flush;//最近刷新时间
    //常规的垃圾回收定时器
    struct timer_list       gc_timer;
    struct timer_list       proxy_timer;//代理ARP定时器
    struct sk_buff_head     proxy_queue;//待处理的代理ARP数据包的队列
    atomic_t                entries;//整个表中邻居的数量
    rwlock_t                lock;
    //记录一个时间,即上次为 parms链表中每个节点生成reachable_time的时间,reachable_time是需要被定时刷新的
    unsigned long           last_rand;
    struct kmem_cache               *kmem_cachep;
    struct neigh_statistics *stats;
    struct neighbour        **hash_buckets;//哈希数组,存入其中的邻居
    unsigned int            hash_mask;
    __u32                   hash_rnd;//用于邻居哈希表hash_buckets的一个随机数
    unsigned int            hash_chain_gc;
    struct pneigh_entry     **phash_buckets;
#ifdef CONFIG_PROC_FS
    struct proc_dir_entry   *pde;
#endif
};
邻居项结构
struct neighbour
{
    struct neighbour        *next; //下一项
    struct neigh_table      *tbl; //所在邻居表
    struct neigh_parms      *parms; //传输参数
    struct net_device        *dev; //对应的网络设备
    unsigned long           used; //最后使用时间
    unsigned long           confirmed;
    unsigned long           updated; //更新时间
    __u8                    flags; //标志
    __u8                    nud_state; //状态
    __u8                    type; //类型
    __u8                    dead; //回收标志,为1将被回收
    atomic_t                probes; //重复发送arp请求的次数
    rwlock_t                lock;
    unsigned char           ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; //对应邻居的头部缓存
    struct hh_cache         *hh; //硬件头缓存,每种协议对应一个节点
    atomic_t                refcnt; //引用计数
    int                     (*output)(struct sk_buff *skb); //发送函数
    struct sk_buff_head     arp_queue; //发送skb的队列
    struct timer_list        timer; //定时器
    struct neigh_ops        *ops; //相关操作
    u8                      primary_key[0]; //记录地址
};
硬件头缓存,每种协议对应一个节点,协议类型记录在hh_type中,我们现在只处理IP协议,所以这个链表中总是只有一项.
struct hh_cache
{
    struct hh_cache *hh_next;  //下一项
    atomic_t        hh_refcnt;  //引用计数
    __be16          hh_type ____cacheline_aligned_in_smp; //协议类型

    u16             hh_len;         /* length of header */
    //输出函数,有了hh,下次再发送数据报,就不需要重新构建以太网头 了。当ARP解析完成后,需要更新hh缓冲。
    int             (*hh_output)(struct sk_buff *skb);
    seqlock_t       hh_lock;
#define HH_DATA_MOD     16
#define HH_DATA_OFF(__len)  (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
#define HH_DATA_ALIGN(__len) (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
    //缓存的硬件头(对于以太网来说,就是以太网头)
    unsigned long   hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};

#define NUD_IN_TIMER    (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE)
#define NUD_VALID       (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
#define NUD_CONNECTED   (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE)
[/数据结构]
[初始化]
    inet_init (net/ipv4/af_inet.c) -> arp_init arp初始化函数。
void __init arp_init(void)
{
    neigh_table_init(&arp_tbl);//初始化arp协议网络邻居解析表

    dev_add_pack(&arp_packet_type);//注册地址解析包接收器
    arp_proc_init();
#ifdef CONFIG_SYSCTL
    neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL);
#endif
    register_netdevice_notifier(&arp_netdev_notifier); //注册通知链,看下面arp通知链实现
}
void neigh_table_init(struct neigh_table *tbl)
{
    struct neigh_table *tmp;

    neigh_table_init_no_netlink(tbl); //初始化这个表
    write_lock(&neigh_tbl_lock);
    for (tmp = neigh_tables; tmp; tmp = tmp->next) { //查找是否有重复
        if (tmp->family == tbl->family)
            break;
    }
    //添加到连表头
    tbl->next       = neigh_tables;
    neigh_tables    = tbl;
    write_unlock(&neigh_tbl_lock);

    if (unlikely(tmp)) { //有重复,出错
        printk(KERN_ERR "NEIGH: Registering multiple tables for family %d\n", tbl->family);
        dump_stack();
    }
}
    实际的初始化函数
void neigh_table_init_no_netlink(struct neigh_table *tbl)
{
    unsigned long now = jiffies;
    unsigned long phsize;

    atomic_set(&tbl->parms.refcnt, 1); //引用计数初始 1
    INIT_RCU_HEAD(&tbl->parms.rcu_head);
    tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);

    if (!tbl->kmem_cachep) //建立网络邻居信息结构内存分配器
        tbl->kmem_cachep = kmem_cache_create(tbl->id, tbl->entry_size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

    tbl->stats = alloc_percpu(struct neigh_statistics); //每cpu变量
    if (!tbl->stats)
        panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS //在/proc/net/stat/arp_cache文件中反应arp信息
    tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
    if (!tbl->pde)
        panic("cannot create neighbour proc dir entry");
    tbl->pde->proc_fops = &neigh_stat_seq_fops;
    tbl->pde->data = tbl;
#endif
    tbl->hash_mask = 1;
    tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); //分配两项用作hash

    //#define PNEIGH_HASHMASK         0xF
    //分配17项用作高速缓存的hash
    phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
    tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);

    if (!tbl->hash_buckets || !tbl->phash_buckets)
        panic("cannot allocate neighbour cache hashes");

    get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); //保存一些随机数

    rwlock_init(&tbl->lock);
    init_timer(&tbl->gc_timer);//垃圾收集定时器
    tbl->gc_timer.data     = (unsigned long)tbl;
    tbl->gc_timer.function = neigh_periodic_timer; //垃圾收集函数
    tbl->gc_timer.expires  = now + 1;
    add_timer(&tbl->gc_timer);

    init_timer(&tbl->proxy_timer); //代理ARP定时器
    tbl->proxy_timer.data     = (unsigned long)tbl;
    tbl->proxy_timer.function = neigh_proxy_process; //代理arp缓存垃圾收集函数
    skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); // ?????????

    tbl->last_flush = now;
    tbl->last_rand  = now + tbl->parms.reachable_time * 20;
}
arp协议处理结构
static struct packet_type arp_packet_type = {
    .type = __constant_htons(ETH_P_ARP), //arp协议
    .func = arp_rcv, //当检测到协议是arp时调用
};
struct neigh_table arp_tbl = { //默认邻居表结构初始化
    .family =       AF_INET,
    .entry_size =   sizeof(struct neighbour) + 4,
    .key_len =      4,
    .hash =         arp_hash,
    .constructor =  arp_constructor,
    .proxy_redo =   parp_redo,
    .id =           "arp_cache",
    .parms = {
        .tbl =                  &arp_tbl,
        .base_reachable_time =  30 * HZ,
        .retrans_time = 1 * HZ,
        .gc_staletime = 60 * HZ,
        .reachable_time =               30 * HZ,
        .delay_probe_time =     5 * HZ,
        .queue_len =            3,
        .ucast_probes = 3,
        .mcast_probes = 3,
        .anycast_delay =        1 * HZ,
        .proxy_delay =          (8 * HZ) / 10,
        .proxy_qlen =           64,
        .locktime =             1 * HZ,
    },
    .gc_interval =  30 * HZ,
    .gc_thresh1 =   128,
    .gc_thresh2 =   512,
    .gc_thresh3 =   1024,
};
[/初始化]
[arp协议处理]
    arp接收处理函数
static int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
    struct arphdr *arp;

    if (dev->nd_net != &init_net)
        goto freeskb;
    //检查是否有完整的arp头长度
    if (!pskb_may_pull(skb, (sizeof(struct arphdr) + (2 * dev->addr_len) + (2 * sizeof(u32)))))
        goto freeskb;
    arp = arp_hdr(skb); //提取arp头结构
    //头长度不等于设备中头长度或设备没有arp解析或包是到其他主机或回环,最后如果不是ip地址的长度都出错
    if (arp->ar_hln != dev->addr_len || dev->flags & IFF_NOARP || skb->pkt_type == PACKET_OTHERHOST ||
            skb->pkt_type == PACKET_LOOPBACK || arp->ar_pln != 4)
        goto freeskb;
    //如果skb被共享了,分配一个新的skb
    if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
        goto out_of_mem;

    memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
    //调用相关arp方面的hook函数,处理后继续调用arp_process
    return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
freeskb:
    kfree_skb(skb);
out_of_mem:
    return 0;
}
    正式的处理arp包
static int arp_process(struct sk_buff *skb)
{
    struct net_device *dev = skb->dev;
    struct in_device *in_dev = in_dev_get(dev);
    struct arphdr *arp;
    unsigned char *arp_ptr;
    struct rtable *rt;
    unsigned char *sha;
    __be32 sip, tip;
    u16 dev_type = dev->type; //设备类型
    int addr_type;
    struct neighbour *n;

    if (in_dev == NULL)
        goto out;

    arp = arp_hdr(skb); //取出arp头

    switch (dev_type) {
        default:
            //如果协议类型不是ip或设备标识的类型不是协议指明的类型
            if (arp->ar_pro != htons(ETH_P_IP) || htons(dev_type) != arp->ar_hrd)
                goto out;
            break;
        case ARPHRD_ETHER: //以太网
        case ARPHRD_IEEE802_TR:
        case ARPHRD_FDDI:
        case ARPHRD_IEEE802:
            //arp头中标明的类型与这些类型都不符
            if ((arp->ar_hrd != htons(ARPHRD_ETHER) && arp->ar_hrd != htons(ARPHRD_IEEE802)) || arp->ar_pro != htons(ETH_P_IP))
                goto out;
            break;
        case ARPHRD_AX25:
            if (arp->ar_pro != htons(AX25_P_IP) || arp->ar_hrd != htons(ARPHRD_AX25))
                goto out;
            break;
        case ARPHRD_NETROM:
            if (arp->ar_pro != htons(AX25_P_IP) || arp->ar_hrd != htons(ARPHRD_NETROM))
                goto out;
            break;
    }
    //如果arp动作不是请求和应答
    if (arp->ar_op != htons(ARPOP_REPLY) && arp->ar_op != htons(ARPOP_REQUEST))
        goto out;
    arp_ptr= (unsigned char *)(arp+1); //指向头后面的部分,在内核arp头实现中是ip和硬件地址数据
    sha     = arp_ptr; //源硬件地址
    arp_ptr += dev->addr_len;//移动指针
    memcpy(&sip, arp_ptr, 4); //源ip
    arp_ptr += 4;
    arp_ptr += dev->addr_len;
    memcpy(&tip, arp_ptr, 4); //目的ip

    /* Check for bad requests for 127.x.x.x and requests for multicast addresses.  If this is one such, delete it. */
    if (LOOPBACK(tip) || MULTICAST(tip))//如果目的ip是回环或多播地址
        goto out;
    /* Special case: We must set Frame Relay source Q.922 address */
    if (dev_type == ARPHRD_DLCI)
        sha = dev->broadcast;
    /* Special case: IPv4 duplicate address detection packet (RFC2131) */
    //特殊情况处理:IPv4地址冲突检测(RFC2131:DHCP协议,它基于ARP协议,其发送的请求包的源地址为0)
    if (sip == 0) {
        if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(tip) == RTN_LOCAL && !arp_ignore(in_dev,dev,sip,tip))
            arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha);
        goto out;
    }
    //是arp请求,且路由查询正确
    if (arp->ar_op == htons(ARPOP_REQUEST) && ip_route_input(skb, tip, sip, 0, dev) == 0) {
        rt = (struct rtable*)skb->dst; //路由缓存
        addr_type = rt->rt_type;
        if (addr_type == RTN_LOCAL) { //本地地址查询
            n = neigh_event_ns(&arp_tbl, sha, &sip, dev);//查找和更新arp_tbl表
            if (n) {
                int dont_send = 0;
                if (!dont_send)
                    dont_send |= arp_ignore(in_dev,dev,sip,tip);

                if (!dont_send && IN_DEV_ARPFILTER(in_dev))
                    dont_send |= arp_filter(sip,tip,dev);

                if (!dont_send) //没有被忽略,没有被过滤,发送arp应答消息告知本机mac地址
                    arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);

                neigh_release(n);
            }
            goto out;
        } else if (IN_DEV_FORWARD(in_dev)) { //非本地地址查询,设备允许forward
            if ((rt->rt_flags&RTCF_DNAT) || (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
                        (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
                n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
                if (n)
                    neigh_release(n);

                if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST ||
                        in_dev->arp_parms->proxy_delay == 0) {
                    arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
                } else {
                    pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
                    in_dev_put(in_dev);
                    return 0;
                }
                goto out;
            }
        }
    }
    /* Update our ARP tables */
    n = __neigh_lookup(&arp_tbl, &sip, dev, 0);//在arp_tbl表中查找源ip(sip)对应的邻居结构

    if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
        /* Unsolicited ARP is not accepted by default.                                     
           It is possible, that this option should be enabled for some devices (strip is candidate)  */
        if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(sip) == RTN_UNICAST)
            n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
    }
    if (n) { //找到
        int state = NUD_REACHABLE; //是应答包吗,如果是那么状态就变为可达了
        int override;
        /* If several different ARP replies follows back-to-back, use the FIRST one. It is possible, if several proxy                              agents are active. Taking the first reply prevents arp trashing and chooses the fastest router.*/
        //locktime是以jiffies表示的邻居结构必须被更新的最小时间
        override = time_after(jiffies, n->updated + n->parms->locktime);

        /* Broadcast replies and request packets do not assert neighbour reachability. */
        //是请求包或是到主机的包
        if (arp->ar_op != htons(ARPOP_REPLY) || skb->pkt_type != PACKET_HOST)
            state = NUD_STALE;
        neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0);//更新邻居表
        neigh_release(n);
    }
out:
    if (in_dev)
        in_dev_put(in_dev);
    kfree_skb(skb);
    return 0;
}
    查找,如果找到则更新表
struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev)
{
    struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len); //查询
    if (neigh)
        neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_OVERRIDE); //更新
    return neigh;
}
    查询表,如果没有找到并且指定了创建标志则创建一个新的项
static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
    struct neighbour *n = neigh_lookup(tbl, pkey, dev); //查询
    if (n || !creat)
        return n;
    //没找到需要创建
    n = neigh_create(tbl, pkey, dev); //创建
    return IS_ERR(n) ? NULL : n;
}
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
    struct neighbour *n;
    int key_len = tbl->key_len;
    u32 hash_val = tbl->hash(pkey, dev); //根据源地址计算一个hash值

    NEIGH_CACHE_STAT_INC(tbl, lookups);

    read_lock_bh(&tbl->lock);
    for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { //在这hash桶中寻找
        //设备相同且地址匹配
        if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
            neigh_hold(n);
            NEIGH_CACHE_STAT_INC(tbl, hits);
            break;
        }
    }
    read_unlock_bh(&tbl->lock);
    return n;
}
struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
    u32 hash_val;
    int key_len = tbl->key_len;
    int error;
    struct neighbour *n1, *rc, *n = neigh_alloc(tbl); //分配一个邻居项

    if (!n) {
        rc = ERR_PTR(-ENOBUFS);
        goto out;
    }
    memcpy(n->primary_key, pkey, key_len); //把对端主机地址保存到邻居项的末尾(现在知道为什么初始化时要加个4了吧)
    n->dev = dev; //记录设备
    dev_hold(dev);

    /* Protocol specific setup. */
    if (tbl->constructor && (error = tbl->constructor(n)) < 0) { //如果有构造函数则调用,参看下面初始化指定函数实现
        rc = ERR_PTR(error);
        goto out_neigh_release;
    }
    /* Device specific setup. */
    if (n->parms->neigh_setup && (error = n->parms->neigh_setup(n)) < 0) {//参看下面初始化指定函数实现
        rc = ERR_PTR(error);
        goto out_neigh_release;
    }

    n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

    write_lock_bh(&tbl->lock);
    if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) //邻居数量超过掩码加1
        neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); //邻居hash表增长调整
    hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; //计算hash值

    if (n->parms->dead) {
        rc = ERR_PTR(-EINVAL);
        goto out_tbl_unlock;
    }
    //再次查找
    for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
        if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
            neigh_hold(n1);
            rc = n1; //找到
            goto out_tbl_unlock;
        }
    }
    //添加新项到表头
    n->next = tbl->hash_buckets[hash_val];
    tbl->hash_buckets[hash_val] = n;
    n->dead = 0; //分配时为1,现在添加到表后为0
    neigh_hold(n);
    write_unlock_bh(&tbl->lock);
    NEIGH_PRINTK2("neigh %p is created.\n", n);
    rc = n;
out:
    return rc;
out_tbl_unlock:
    write_unlock_bh(&tbl->lock);
out_neigh_release:
    neigh_release(n);
    goto out;
}
    分配一个邻居项
static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
    struct neighbour *n = NULL;
    unsigned long now = jiffies;
    int entries;

    entries = atomic_inc_return(&tbl->entries) - 1;
    //如果项数超过绝对阀值或超过中间值且现在时间超过邻居表最后刷新5秒
    if (entries >= tbl->gc_thresh3 || (entries >= tbl->gc_thresh2 && time_after(now, tbl->last_flush + 5 * HZ))) {
        //进行垃圾回收,后如果还是超过绝对阀值
        if (!neigh_forced_gc(tbl) && entries >= tbl->gc_thresh3)
            goto out_entries;//不能分配,退出
    }
    n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); //分配一项
    if (!n)

        goto out_entries;
    skb_queue_head_init(&n->arp_queue);
    rwlock_init(&n->lock);
    n->updated        = n->used = now;
    n->nud_state      = NUD_NONE; //刚被创建时的状态
    n->output         = neigh_blackhole; //发送数据时调用的函数
    n->parms          = neigh_parms_clone(&tbl->parms); //增加引用计数返回这个指针
    init_timer(&n->timer);
    n->timer.function = neigh_timer_handler; //参考下面定时器操作实现
    n->timer.data     = (unsigned long)n;

    NEIGH_CACHE_STAT_INC(tbl, allocs);
    n->tbl            = tbl;
    atomic_set(&n->refcnt, 1);
    n->dead           = 1;
out:
    return n;
out_entries:
    atomic_dec(&tbl->entries);
    goto out;
}
    增长hash表
static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
{
    struct neighbour **new_hash, **old_hash;
    unsigned int i, new_hash_mask, old_entries;

    NEIGH_CACHE_STAT_INC(tbl, hash_grows);
    BUG_ON(!is_power_of_2(new_entries));

    //分配新数量的hash桶,看上面代码是源数量的2倍
    new_hash = neigh_hash_alloc(new_entries);
    if (!new_hash)
        return;

    old_entries = tbl->hash_mask + 1; //旧的hash桶数
    new_hash_mask = new_entries - 1; //新项数的掩码
    old_hash = tbl->hash_buckets; //旧指针

    get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
    //循环所有旧hash桶
    for (i = 0; i < old_entries; i++) {
        struct neighbour *n, *next;
        for (n = old_hash[i]; n; n = next) {
            //重新计算hash值然后用新的掩码进行hash
            //可以把hash值保存到数据结构中,那么只需要与一下就可以了
            unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
            hash_val &= new_hash_mask;
            next = n->next;
            //把这项添加到新hash桶的头
            n->next = new_hash[hash_val];
            new_hash[hash_val] = n;
        }
    }
    //更新新桶和新掩码
    tbl->hash_buckets = new_hash;
    tbl->hash_mask = new_hash_mask;
    neigh_hash_free(old_hash, old_entries); //释放旧的hash桶
}
    更新邻居项 neigh_event_ns ->
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags)
{
    u8 old;
    int err;
    int notify = 0;
    struct net_device *dev;
    int update_isrouter = 0;

    write_lock_bh(&neigh->lock);

    dev    = neigh->dev;
    old    = neigh->nud_state; //旧状态
    err    = -EPERM;
    //如果更新操作不是管理员操作且邻居项是不应答arp的或永久的, 退出
    if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT)))
        goto out;

    if (!(new & NUD_VALID)) { //如果新状态没有有效标志集
        neigh_del_timer(neigh); //删除邻居项定时器
        if (old & NUD_CONNECTED) //如果旧状态是连接的
            neigh_suspect(neigh); //解除快速路径

        neigh->nud_state = new; //保存新状态
        err = 0;
        notify = old & NUD_VALID; //是否需要通知
        goto out;
    }
    /* Compare new lladdr with cached one */
    if (!dev->addr_len) { //设备的硬件地址长度为0
        /* First case: device needs no address. */
        lladdr = neigh->ha; //指向邻居项记录的硬件地址
    } else if (lladdr) { //携带了硬件地址
        /* The second case: if something is already cached and a new address is proposed:                                                   - compare new & old                                                             
           - if they are different, check override flag
           */
        //如果旧状态也是有效状态集中的标志且记录的硬件地址与参数的相同
        if ((old & NUD_VALID) && !memcmp(lladdr, neigh->ha, dev->addr_len))
            lladdr = neigh->ha;
    } else {
        /* No address is supplied; if we know something, use it, otherwise discard the request. */
        err = -EINVAL;
        if (!(old & NUD_VALID)) //旧状态是无效的
            goto out;
        lladdr = neigh->ha;
    }
    if (new & NUD_CONNECTED) //新状态是连接标志集
        neigh->confirmed = jiffies; //记录连接状态的变更时间
    neigh->updated = jiffies; //时间更新

    /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */
    err = 0;
    update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; //这个更新是路由系统做的吗

    if (old & NUD_VALID) { //旧状态是有效的
        //新硬件地址与原来记录的不同,且不允许复盖
        if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
            update_isrouter = 0;
            if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && (old & NUD_CONNECTED)) {
                lladdr = neigh->ha;
                new = NUD_STALE;
            } else
                goto out;
        } else {
            //地址相同
            if (lladdr == neigh->ha && new == NUD_STALE && ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || (old & NUD_CONNECTED)))
                new = old;
        }
    }
    if (new != old) { //新状态不同于旧状态
        neigh_del_timer(neigh); //删除定时器
        if (new & NUD_IN_TIMER) { //新状态要求启动定时器
            neigh_hold(neigh); //增加引用计数
            //启动定时器
            neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0)));
        }
        neigh->nud_state = new; //保存新状态
    }
    if (lladdr != neigh->ha) { //硬件地址不同
        memcpy(&neigh->ha, lladdr, dev->addr_len); //保存这个地址
        neigh_update_hhs(neigh);//更新hh_cache内容
        if (!(new & NUD_CONNECTED)) //新状态不是连接
            neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1);
        notify = 1;
    }
    if (new == old)
        goto out;
    if (new & NUD_CONNECTED) //新状态是连接,改变输出函数
        neigh_connect(neigh);
    else
        neigh_suspect(neigh);
    if (!(old & NUD_VALID)) { //原来旧状态不是有效状态
        struct sk_buff *skb;
        /* Again: avoid dead loop if something went wrong */
        while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
            struct neighbour *n1 = neigh;
            write_unlock_bh(&neigh->lock);
            /* On shaper/eql skb->dst->neighbour != neigh :( */
            //路由缓存和arp邻居项都准备好了
            if (skb->dst && skb->dst->neighbour)
                n1 = skb->dst->neighbour;
            n1->output(skb); //发送这个所有的skb数据包
            write_lock_bh(&neigh->lock);
        }
        skb_queue_purge(&neigh->arp_queue);
    }
out:
    if (update_isrouter) {
        neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? (neigh->flags | NTF_ROUTER) : (neigh->flags & ~NTF_ROUTER);
    }
    write_unlock_bh(&neigh->lock);
    if (notify)
        neigh_update_notify(neigh);

    return err;
}
    改变输出函数,在设置为连接状态时
static void neigh_connect(struct neighbour *neigh)
{
    struct hh_cache *hh;
    NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
    neigh->output = neigh->ops->connected_output;
    for (hh = neigh->hh; hh; hh = hh->hh_next)
        hh->hh_output = neigh->ops->hh_output;
}
static void neigh_suspect(struct neighbour *neigh)
{
    struct hh_cache *hh;
    NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);

    neigh->output = neigh->ops->output;

    for (hh = neigh->hh; hh; hh = hh->hh_next)
        hh->hh_output = neigh->ops->output;
}
    更新hh_cache中记录的硬件地址
static void neigh_update_hhs(struct neighbour *neigh)
{
    struct hh_cache *hh;
    //这个函数就是保存硬件地址信息到hh_cache结构中,在ether_setup函数中指定的.
    void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
        = neigh->dev->header_ops->cache_update;

    if (update) { //如果有这函数,更新所有的hh_cache结构
        for (hh = neigh->hh; hh; hh = hh->hh_next) {
            write_seqlock_bh(&hh->hh_lock);
            update(hh, neigh->dev, neigh->ha);
            write_sequnlock_bh(&hh->hh_lock);
        }
    }
}
是否可以使用代理arp来处理这个arp包
    arp_process->
static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
{
    struct in_device *out_dev;
    int imi, omi = -1;

    if (!IN_DEV_PROXY_ARP(in_dev)) //不允许代理arp
        return 0;

    if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) //不限制介质类型,可以进行ARP代理
        return 1;

    if (imi == -1)
        return 0;

    /* place to check for proxy_arp for routes */
    if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) {
        omi = IN_DEV_MEDIUM_ID(out_dev);
        in_dev_put(out_dev);
    }
    //外出接口介质与进入接口不同且不是不允许代理arp的介质,那么就可以进行arp代理
    return (omi != imi && omi != -1);
}
在代理arp缓存中进行查找
    arp_process->
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
    struct pneigh_entry *n;
    int key_len = tbl->key_len;
    u32 hash_val = *(u32 *)(pkey + key_len - 4);
    //下面是计算hash值
    hash_val ^= (hash_val >> 16);
    hash_val ^= hash_val >> 8;
    hash_val ^= hash_val >> 4;
    hash_val &= PNEIGH_HASHMASK;

    read_lock_bh(&tbl->lock);
    for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { //在桶中查找
        //key是目的ip
        if (!memcmp(n->key, pkey, key_len) && (n->dev == dev || !n->dev)) {
            read_unlock_bh(&tbl->lock);
            goto out; //找到
        }
    }
    read_unlock_bh(&tbl->lock);
    n = NULL;

    if (!creat) //不允许创建
        goto out;

    ASSERT_RTNL();
    //分配一项代理缓存
    n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
    if (!n)
        goto out;

    memcpy(n->key, pkey, key_len); //拷贝目的ip
    n->dev = dev; //保存设备结构, mac地址在这个结构中
    if (dev)
        dev_hold(dev);

    if (tbl->pconstructor && tbl->pconstructor(n)) { //如果有调用构造函数
        if (dev)
            dev_put(dev);

        kfree(n);
        n = NULL;
        goto out;
    }
    //插入到hash头
    write_lock_bh(&tbl->lock);
    n->next = tbl->phash_buckets[hash_val];
    tbl->phash_buckets[hash_val] = n;
    write_unlock_bh(&tbl->lock);
out:
    return n;
}
    创建发送一个arp包
void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip,
        unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw)
{
    struct sk_buff *skb;

    if (dev->flags&IFF_NOARP) //这个接口不允许arp
        return;
    //创建一个arp包的skb,这个函数很简单但是其中调用了一个函数dev_hard_header,主要实现是
    //return dev->header_ops->create(skb, dev, type, daddr, saddr, len);如果有这个操作和函数就会调用这个create函数
    //这个create函数是用来构建以太网头(struct ethhdr)的函数
    skb = arp_create(type, ptype, dest_ip, dev, src_ip, dest_hw, src_hw, target_hw);
    if (skb == NULL) {
        return;
    }
    //发送这个包,NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
    //简单,进入arp的外出hook后调用dev_queue_xmit发送函数
    arp_xmit(skb);
}
[/arp协议处理]
[初始化指定函数实现]
我们看到在初始化部分有一些函数指针被初始化了我们现在来分析。
    hash计算函数
static u32 arp_hash(const void *pkey, const struct net_device *dev)
{
    //还记得neigh_table_init_no_netlink函数中为hash_rnd读了一些随机数吗
    return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd);
}
    当创建一个邻居项时neigh_create会调用到这个构造函数
static int arp_constructor(struct neighbour *neigh)
{
    __be32 addr = *(__be32*)neigh->primary_key; //取ip地址
    struct net_device *dev = neigh->dev;
    struct in_device *in_dev;
    struct neigh_parms *parms;

    neigh->type = inet_addr_type(addr); //判断地址的类型

    rcu_read_lock();
    in_dev = __in_dev_get_rcu(dev);
    if (in_dev == NULL) {
        rcu_read_unlock();
        return -EINVAL;
    }
    parms = in_dev->arp_parms; //指向设备的参数结构
    __neigh_parms_put(neigh->parms); //减少初始化时使用的参数结构,这个结构在arp_tlb中嵌入和静态的被初始化
    neigh->parms = neigh_parms_clone(parms); //增加设备参数结构的引用计数
    rcu_read_unlock();

    if (!dev->header_ops) { //设备没有头操作
        neigh->nud_state = NUD_NOARP;
        neigh->ops = &arp_direct_ops;
        neigh->output = neigh->ops->queue_xmit;
    } else {
        switch (dev->type) { //设备类型
            default:
                break;
            case ARPHRD_ROSE:
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
            case ARPHRD_AX25:
#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
            case ARPHRD_NETROM:
#endif
                neigh->ops = &arp_broken_ops;
                neigh->output = neigh->ops->output;
                return 0;
#endif
                ;
        }
        if (neigh->type == RTN_MULTICAST) { //地址类型是多播
            neigh->nud_state = NUD_NOARP;
            arp_mc_map(addr, neigh->ha, dev, 1);
        } else if (dev->flags & (IFF_NOARP|IFF_LOOPBACK)) { //设备是回环或设置了不允许arp标志
            neigh->nud_state = NUD_NOARP;
            memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
        } else if (neigh->type == RTN_BROADCAST || dev->flags & IFF_POINTOPOINT) {//地址类型是广播或设备是ptop设备
            neigh->nud_state = NUD_NOARP;
            memcpy(neigh->ha, dev->broadcast, dev->addr_len);
        }
        if (dev->header_ops->cache) //设备有这函数指针,看下面邻居项操作实现
            neigh->ops = &arp_hh_ops;
        else
            neigh->ops = &arp_generic_ops;

        if (neigh->nud_state & NUD_VALID) //邻居项是有效状态
            neigh->output = neigh->ops->connected_output;
        else
            neigh->output = neigh->ops->output;
    }
    return 0;
}
    arp代理redo函数
static void parp_redo(struct sk_buff *skb)
{
    arp_process(skb);
}
[/初始化指定函数实现]
[邻居项操作实现]
    调用alloc_etherdev()来构建网卡的net_device结构的,同时ether_setup()函数是标准以太网卡的初始化函数,会进行相关字段的初始化。
void ether_setup(struct net_device *dev)
{
    dev->header_ops         = &eth_header_ops;
    ......
}
所以上面arp_constructor函数中neigh->ops就会指向arp_hh_ops结构。
static struct neigh_ops arp_hh_ops = {
    .family =               AF_INET,
    .solicit =              arp_solicit,
    .error_report =         arp_error_report,
    //下面都指向一个函数
    .output =               neigh_resolve_output,
    .connected_output =     neigh_resolve_output,
    //下面这两个指向了直接发送函数
    .hh_output =            dev_queue_xmit,
    .queue_xmit =           dev_queue_xmit,
};
arp协议的发送函数neigh_resolve_output,看代码前先要介绍一下谁,在哪调用了这个函数。
dst->output = ip_output->ip_finish_output->ip_finish_output2
......
//参考上面函数neigh_connect,邻居项状态变为NUD_REACHABLE时,会改变hh_cache->hh_output的指针指向neigh_ops->hh_output.
if (dst->hh) //有hh_cache结构,直接拷贝hh_cache中的硬件地址到以太网头的目的地址字段,然后调用hh->hh_output(skb);直接发送数据.
    return neigh_hh_output(dst->hh, skb);
else if (dst->neighbour)
    return dst->neighbour->output(skb);
    ......
    在linux 路由实现文章中看ip_route_output_slow的函数流程会告诉你ip_output函数的由来。
int neigh_resolve_output(struct sk_buff *skb)
{
    struct dst_entry *dst = skb->dst;
    struct neighbour *neigh;
    int rc = 0;
    //没有路由缓存或路由缓存没有绑定一个arp的邻居项
    //路由代码中函数arp_bind_neighbour在转发或向外发送数据时会为dst绑定(先查找,找不到就创建)一个邻居项。参考下面绑定邻居项
    if (!dst || !(neigh = dst->neighbour))
        goto discard;
    //skb->data指向网络头
    __skb_pull(skb, skb_network_offset(skb));

    if (!neigh_event_send(neigh, skb)) {//判断邻居项是否有可用状态,如果可用,则把数据包发送出去
        int err;
        struct net_device *dev = neigh->dev;
        //设备有cache函数(用来填充硬件地址信息到hh_cache结构中)且路由缓存没有hh结构
        if (dev->header_ops->cache && !dst->hh) {
            write_lock_bh(&neigh->lock);
            if (!dst->hh) //分配并初始化一个hh_cache结构
                neigh_hh_init(neigh, dst, dst->ops->protocol);


            //在skb中填充硬件头 return dev->header_ops->create(skb, dev, type, daddr, saddr, len);
            err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
            write_unlock_bh(&neigh->lock);
        } else {
            read_lock_bh(&neigh->lock);
            err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
            read_unlock_bh(&neigh->lock);
        }
        if (err >= 0) //发送这个skb
            rc = neigh->ops->queue_xmit(skb);
        else
            goto out_kfree_skb;
    }
out:
    return rc;
discard:
    NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
out_kfree_skb:
    rc = -EINVAL;
    kfree_skb(skb);
    goto out;
}
    触发状态转换,判断是否可用 0
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
    neigh->used = jiffies; //保存最后使用时间
    //在这里剔除了NUD_STALE状态,因为,在此状态有发送信息的时候,会将它转入延迟状态,并设置定时器,这在__neigh_event_send中可以看到
    if (!(neigh->nud_state & (NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
        return __neigh_event_send(neigh, skb);
    return 0;
}
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
    int rc;
    unsigned long now;

    write_lock_bh(&neigh->lock);
    rc = 0;
    //参数检测
    if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
        goto out_unlock_bh;

    now = jiffies;
    //状态不是STALE或INCOMPLETE
    if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
        //如果指定了探测数值
        if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
            atomic_set(&neigh->probes, neigh->parms->ucast_probes);
            neigh->nud_state     = NUD_INCOMPLETE; //设置状态为INCOMPLETE
            neigh->updated = jiffies;
            neigh_hold(neigh);
            neigh_add_timer(neigh, now + 1); //启动定时器,看下面定时器操作实现
        } else { //没有指定
            neigh->nud_state = NUD_FAILED; //失败状态
            neigh->updated = jiffies;
            write_unlock_bh(&neigh->lock);
            if (skb)
                kfree_skb(skb); //释放这个skb
            return 1;
        }
    } else if (neigh->nud_state & NUD_STALE) { //状态是STABLE,返回0将包送出
        NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
        neigh_hold(neigh);
        neigh->nud_state = NUD_DELAY; //延迟状态
        neigh->updated = jiffies;
        neigh_add_timer(neigh, jiffies + neigh->parms->delay_probe_time); //启动定时器
    }
    if (neigh->nud_state == NUD_INCOMPLETE) { //状态为INCOMPLETE
        if (skb) {
            if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { //队列长度超过限制
                struct sk_buff *buff;
                buff = neigh->arp_queue.next;
                __skb_unlink(buff, &neigh->arp_queue);
                kfree_skb(buff); //释放一个
            }
            __skb_queue_tail(&neigh->arp_queue, skb); //新包添加到队列尾部
        }
        rc = 1;
    }
out_unlock_bh:
    write_unlock_bh(&neigh->lock);
    return rc;
}
    分配并初始化一个hh_cache结构
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, __be16 protocol)
{
    struct hh_cache *hh;
    struct net_device *dev = dst->dev;

    for (hh = n->hh; hh; hh = hh->hh_next) //查找匹配协议
        if (hh->hh_type == protocol)
            break;
    //没有找到,分配一个
    if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
        seqlock_init(&hh->hh_lock);
        hh->hh_type = protocol; //记录协议
        atomic_set(&hh->hh_refcnt, 0);
        hh->hh_next = NULL;
        //调用以太网函数填充这个hh_cache结构,主要是硬件地址信息
        if (dev->header_ops->cache(n, hh)) {
            kfree(hh);
            hh = NULL;
        } else { //填充正确,放入到连表头
            atomic_inc(&hh->hh_refcnt);
            hh->hh_next = n->hh;
            n->hh       = hh;
            if (n->nud_state & NUD_CONNECTED) //是连接状态
                hh->hh_output = n->ops->hh_output; //指向直接输出函数
            else
                hh->hh_output = n->ops->output; //指向间接输出函数
        }
    }
    if (hh) {
        atomic_inc(&hh->hh_refcnt);
        dst->hh = hh; //保存这hh_cache
    }
}
    发送arp请求
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
    __be32 saddr = 0;
    u8  *dst_ha = NULL;
    struct net_device *dev = neigh->dev;
    __be32 target = *(__be32*)neigh->primary_key; //目的ip地址,也许是下一跳网关地址
    int probes = atomic_read(&neigh->probes); //探测次数
    struct in_device *in_dev = in_dev_get(dev);

    if (!in_dev)
        return;
    switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { //选择源ip地址时的策略
        default:
        case 0:  //任何本地ip地址都可以
            if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
                saddr = ip_hdr(skb)->saddr;
            break;
        case 1: //如果可能,选择在目标ip同样的子网范围内,不行那么用 2
            if (!skb)
                break;

            saddr = ip_hdr(skb)->saddr;
            if (inet_addr_type(saddr) == RTN_LOCAL) {
                /* saddr should be known to target */
                //这函数并不是在选择,只是判断一下源和目的是否在同一子网
                if (inet_addr_onlink(in_dev, target, saddr))
                    break;
            }
            saddr = 0;
            break;
        case 2:         //优先选择主要的ip
            break;
    }
    if (in_dev)
        in_dev_put(in_dev);
    if (!saddr) //如果源地址没有,那么选择一个. 参考ip 路由实现一文
        saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
    if ((probes -= neigh->parms->ucast_probes) < 0) {
        if (!(neigh->nud_state & NUD_VALID))
            printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
        dst_ha = neigh->ha;
        read_lock_bh(&neigh->lock);
    } else if ((probes -= neigh->parms->app_probes) < 0) {
#ifdef CONFIG_ARPD
        neigh_app_ns(neigh);
#endif
        return;
    }
    //如果dst_ha为NULL,那么发送请求时就会填充为广播地址
    arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_ha, dev->dev_addr, NULL);
    if (dst_ha)
        read_unlock_bh(&neigh->lock);
}
[/邻居项操作实现]
    [定时器操作实现]
static void neigh_timer_handler(unsigned long arg)
{
    unsigned long now, next;
    struct neighbour *neigh = (struct neighbour *)arg;
    unsigned state;
    int notify = 0;

    write_lock(&neigh->lock);

    state = neigh->nud_state;
    now = jiffies;
    next = now + HZ; //添加 1 秒
    if (!(state & NUD_IN_TIMER)) {
#ifndef CONFIG_SMP
        printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
#endif
        goto out;
    }
    //下面是确定发送延迟
    if (state & NUD_REACHABLE) {
        if (time_before_eq(now, neigh->confirmed + neigh->parms->reachable_time)) {
            NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
            next = neigh->confirmed + neigh->parms->reachable_time;
        } else if (time_before_eq(now, neigh->used + neigh->parms->delay_probe_time)) {
            NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
            neigh->nud_state = NUD_DELAY;
            neigh->updated = jiffies;
            neigh_suspect(neigh);
            next = now + neigh->parms->delay_probe_time;
        } else {
            NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
            neigh->nud_state = NUD_STALE;
            neigh->updated = jiffies;
            neigh_suspect(neigh);
            notify = 1;
        }
    } else if (state & NUD_DELAY) {
        if (time_before_eq(now, neigh->confirmed + neigh->parms->delay_probe_time)) {
            NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
            neigh->nud_state = NUD_REACHABLE;
            neigh->updated = jiffies;
            neigh_connect(neigh);
            notify = 1;
            next = neigh->confirmed + neigh->parms->reachable_time;
        } else {
            NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
            neigh->nud_state = NUD_PROBE;
            neigh->updated = jiffies;
            atomic_set(&neigh->probes, 0);
            next = now + neigh->parms->retrans_time;
        }
    } else {
        /* NUD_PROBE|NUD_INCOMPLETE */
        next = now + neigh->parms->retrans_time;
    }
    //如果状态还是没有到要求(可达)且探测次数已经超过最大限制
    if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
        struct sk_buff *skb;
        neigh->nud_state = NUD_FAILED; //失败
        neigh->updated = jiffies;
        notify = 1;
        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
        while (neigh->nud_state == NUD_FAILED && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
            write_unlock(&neigh->lock);
            neigh->ops->error_report(neigh, skb); //剩余的skb请求发送错误报告
            write_lock(&neigh->lock);
        }
        skb_queue_purge(&neigh->arp_queue);//全部销毁
    }
    //这是没有超过探测限制
    if (neigh->nud_state & NUD_IN_TIMER) {
        if (time_before(next, jiffies + HZ/2))
            next = jiffies + HZ/2;
        if (!mod_timer(&neigh->timer, next)) //更改定时器时间
            neigh_hold(neigh);
    }
    if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
        struct sk_buff *skb = skb_peek(&neigh->arp_queue); //获取一个skb,并没有脱连
        /* keep skb alive even if arp_queue overflows */
        if (skb)
            skb_get(skb);

        write_unlock(&neigh->lock);
        neigh->ops->solicit(neigh, skb); //发送arp请求
        atomic_inc(&neigh->probes);
        if (skb)
            kfree_skb(skb);
    } else {
out:
        write_unlock(&neigh->lock);
    }
    if (notify)
        neigh_update_notify(neigh);
    neigh_release(neigh);
}
[/定时器操作实现]
[绑定邻居项]
    在路由缓存中绑定一个邻居项
int arp_bind_neighbour(struct dst_entry *dst)
{
    struct net_device *dev = dst->dev;
    struct neighbour *n = dst->neighbour;

    if (dev == NULL)
        return -EINVAL;
    if (n == NULL) { //没有绑定
        __be32 nexthop = ((struct rtable*)dst)->rt_gateway; //下一跳地址
        if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
            nexthop = 0; //回环和ptop地址,不需要

        //根据地址查找
        n = __neigh_lookup_errno(
#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
                dev->type == ARPHRD_ATM ? clip_tbl_hook :
#endif
                &arp_tbl, &nexthop, dev);
        if (IS_ERR(n))
            return PTR_ERR(n);
        dst->neighbour = n;
    }
}
static inline struct neighbour * __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
    struct neighbour *n = neigh_lookup(tbl, pkey, dev); //查找
    if (n)
        return n;
    return neigh_create(tbl, pkey, dev); //没有找到创建一个,刚创建时的状态是NUD_NONE
}
那么调用流程就是,当发送ip包时查找路由,找到后查找邻居项,都找到或创建后调用路由缓存项中的dst->output函数。
[/绑定邻居项]
[arp通知链实现]
    通知回掉函数
static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
    struct net_device *dev = ptr;

    if (dev->nd_net != &init_net)
        return NOTIFY_DONE;
    switch (event) {
        case NETDEV_CHANGEADDR: //改变地址命令
            //这函数释放所有属于dev设备的邻居项
            neigh_changeaddr(&arp_tbl, dev);
            rt_cache_flush(0); //更新rt_flush_timer定时器时间,这定时器函数用于清空所有路由高速缓存
            break;
        default:
            break;
    }
    return NOTIFY_DONE;
}
[/arp通知链实现]

 

posted on 2013-08-27 14:40  SuperKing  阅读(1914)  评论(0编辑  收藏  举报

导航