网桥一二三

网桥?交换机的老爸是也。当然,一代要比一代强,交换机是一种多端口,自适应,再加上各种其他更好的性能,但这些形容词之后,仍然只是个“网桥”。

网桥涉及各协议,stp。网桥、终端等构成的网络,是个闭合的拓扑图,自然会有很多回环,圈圈什么的。数据包,当然不能无休止的转圈圈,所以,这个图,要有个逻辑概念,于是要修剪成无环路的树型网络。

首先从init开始:

static int __init br_init(void)
{
    int err;

    err = stp_proto_register(&br_stp_proto);
    if (err < 0) {
        pr_err("bridge: can't register sap for STP\n");
        return err;
    }   

    err = br_fdb_init();  //kmem_cache_create
    if (err)
        goto err_out;

    err = register_pernet_subsys(&br_net_ops);
    if (err)
        goto err_out1;

    //网桥的netfiter处理
    err = br_netfilter_init();	
    if (err)
        goto err_out2;

    //在netdev_chain通知链表上注册
    err = register_netdevice_notifier(&br_device_notifier);
    if (err)
        goto err_out3;

    err = br_netlink_init();
    if (err)
        goto err_out4;

    //用户空间ioctl调用的函数
    brioctl_set(br_ioctl_deviceless_stub);	//-->b:

#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
    br_fdb_test_addr_hook = br_fdb_test_addr;
#endif

    return 0;
}

  <b>


int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user * uarg)
{
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
return old_deviceless(net, uarg);

//新建网桥
case SIOCBRADDBR:
//删除网桥
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];

if (!capable(CAP_NET_ADMIN))
return-EPERM;

if (copy_from_user(buf, uarg, IFNAMSIZ))
return-EFAULT;

buf[IFNAMSIZ
-1] =0;
if (cmd == SIOCBRADDBR)
return br_add_bridge(net, buf); //-->c:

return br_del_bridge(net, buf);
}

return-EOPNOTSUPP;
}

  <c>

int br_add_bridge(struct net *net, constchar*name)
{
struct net_device *dev;
int ret;

//为虚拟桥新建一个net_device
dev = new_bridge_dev(net, name); //-->d:
if (!dev)
return-ENOMEM;

rtnl_lock();
if (strchr(dev->name, '%')) {
ret
= dev_alloc_name(dev, dev->name); //内核给分配个名字
if (ret <0)
goto out_free;
}

SET_NETDEV_DEVTYPE(dev,
&br_type);

ret
= register_netdevice(dev); //然后注册该网络设备
if (ret)
goto out_free;

ret
= br_sysfs_addbr(dev); //sysfs中建立相关信息
if (ret)
unregister_netdevice(dev);
out:
rtnl_unlock();
return ret;

out_free:
free_netdev(dev);
gotoout;
}

  <d>

staticstruct net_device *new_bridge_dev(struct net *net, constchar*name)
{
struct net_bridge *br;
struct net_device *dev;

dev
= alloc_netdev(sizeof(struct net_bridge), name,
br_dev_setup);
//-->e:

if (!dev)
return NULL;
dev_net_set(dev, net);

br
= netdev_priv(dev); //获得私有区间
br->dev = dev;

br
->stats = alloc_percpu(struct br_cpu_netstats);
if (!br->stats) {
free_netdev(dev);
return NULL;
}

spin_lock_init(
&br->lock);

//队列初始化。在port_list中保存了这个桥上的端口列表
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(
&br->hash_lock);

//stp协议相关
br->bridge_id.prio[0] =0x80;
br
->bridge_id.prio[1] =0x00;

memcpy(br
->group_addr, br_group_address, ETH_ALEN);

br
->feature_mask = dev->features;
br
->stp_enabled = BR_NO_STP;
br
->designated_root = br->bridge_id;
br
->root_path_cost =0;
br
->root_port =0;
br
->bridge_max_age = br->max_age =20* HZ;
br
->bridge_hello_time = br->hello_time =2* HZ;
br
->bridge_forward_delay = br->forward_delay =15* HZ;
br
->topology_change =0;
br
->topology_change_detected =0;
br
->ageing_time =300* HZ;

br_netfilter_rtable_init(br);

br_stp_timer_init(br);
br_multicast_init(br);

return dev;
}

  该函数主要是为*br (struct net_bridge) 赋值,但首先要初始化 dev (struct net_device)。

  <e>

void br_dev_setup(struct net_device *dev)
{
//将桥的MAC地址设为零
random_ether_addr(dev->dev_addr);
//dev以太网部分初始化
ether_setup(dev);
dev
->netdev_ops =&br_netdev_ops;  //***

dev
->destructor = br_dev_free;
SET_ETHTOOL_OPS(dev,
&br_ethtool_ops);
dev
->tx_queue_len =0;
dev
->priv_flags = IFF_EBRIDGE;

dev
->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK
| NETIF_F_NO_CSUM | NETIF_F_LLTX |
NETIF_F_NETNS_LOCAL
| NETIF_F_GSO | NETIF_F_HW_VLAN_TX;
}

 

代码中的网络处理函数部分:

将接口添进网桥时,用户空间调用ioctl(br_socket_fd, SIOCBRADDIF, &ifr)

dev->netdev_ops = &br_netdev_ops中,回调函数:

staticconststruct net_device_ops br_netdev_ops = {
... ...
.ndo_do_ioctl
= br_dev_ioctl,
... ...
}

 

  具体的网桥ioctl :

int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{   
    struct net_bridge *br = netdev_priv(dev);
    
    switch(cmd) {        
    case SIOCDEVPRIVATE:
        return old_dev_ioctl(dev, rq, cmd);
    //添加一个接口
    case SIOCBRADDIF:
    //删除一个接口
    case SIOCBRDELIF:
        return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); 	//-->f:
    
    }

    br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
    return -EOPNOTSUPP;
}

  <f>

/* called with RTNL */
staticint add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
struct net_device *dev;
int ret;

if (!capable(CAP_NET_ADMIN))
return-EPERM;

dev
= __dev_get_by_index(dev_net(br->dev), ifindex);
if (dev == NULL)
return-EINVAL;

if (isadd) //isadd: cmd == SIOCBRADDIF 为真
ret = br_add_if(br, dev); //-->g:
else
ret
= br_del_if(br, dev);

return ret;
}

  <g>

int br_add_if(struct net_bridge *br, struct net_device *dev)
{
    struct net_bridge_port *p;
    int err = 0;
    bool changed_addr;

    /* Don't allow bridging non-ethernet like devices */
    if ((dev->flags & IFF_LOOPBACK)   ||
         dev->type != ARPHRD_ETHER   || 
         dev->addr_len != ETH_ALEN)
        return -EINVAL;

    /* No bridging of bridges */
    if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
        return -ELOOP;

    /* Device is already being bridged */
    if (br_port_exists(dev))
        return -EBUSY;

    /* No bridging devices that dislike that (e.g. wireless) */
    if (dev->priv_flags & IFF_DONT_BRIDGE)
        return -EOPNOTSUPP;

    //为接口创建net_bridge_port
    p = new_nbp(br, dev);	//-->h:
    if (IS_ERR(p))
        return PTR_ERR(p);

    //设置接口为混杂模式
    err = dev_set_promiscuity(dev, 1);
    if (err)
        goto put_back;

    err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
                   SYSFS_BRIDGE_PORT_ATTR);
    if (err)
        goto err0;

    //更新port->MAC对应表
    err = br_fdb_insert(br, p, dev->dev_addr);	//-->i:
    if (err)
        goto err1;

    err = br_sysfs_addif(p);
    if (err)
        goto err2;

    if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
        goto err3;

    err = netdev_rx_handler_register(dev, br_handle_frame, p);	//-->k:
    if (err)
        goto err4;

    dev->priv_flags |= IFF_BRIDGE_PORT;

    dev_disable_lro(dev);

    list_add_rcu(&p->list, &br->port_list);

    spin_lock_bh(&br->lock);
    changed_addr = br_stp_recalculate_bridge_id(br);	//-->j:
    br_features_recompute(br);

    if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
        (br->dev->flags & IFF_UP))
        br_stp_enable_port(p);
    spin_unlock_bh(&br->lock);

    br_ifinfo_notify(RTM_NEWLINK, p);

    if (changed_addr)
        call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);

    dev_set_mtu(br->dev, br_min_mtu(br));

    kobject_uevent(&p->kobj, KOBJ_ADD);

    return 0;

err4:
    netdev_set_master(dev, NULL);
err3:
    sysfs_remove_link(br->ifobj, p->dev->name);
err2:
    br_fdb_delete_by_port(br, p, 1);
err1:
    kobject_put(&p->kobj);
    p = NULL; /* kobject_put frees */
err0:
    dev_set_promiscuity(dev, -1);
put_back:
    dev_put(dev);
    kfree(p);
    return err;
}

  <h>

/* 为接口创建net_bridge_port */
staticstruct net_bridge_port *new_nbp(struct net_bridge *br,
struct net_device *dev)
{
int index;
struct net_bridge_port *p;

index
= find_portno(br);
if (index <0)
return ERR_PTR(index);

p
= kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return ERR_PTR(-ENOMEM);

p
->br = br;
dev_hold(dev);
p
->dev = dev;
p
->path_cost = port_cost(dev);
p
->priority =0x8000>> BR_PORT_BITS;
p
->port_no = index;
p
->flags =0;
br_init_port(p);
p
->state = BR_STATE_DISABLED;
br_stp_port_timer_init(p);
br_multicast_add_port(p);

return p;
}

之后,把要加入的 接口对应的mac 与 接口 

         作为本机静态项 加入到port—mac对应表。

    <i>

int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char*addr)
{
int ret;

spin_lock_bh(
&br->hash_lock);
ret
= fdb_insert(br, source, addr);  //-->
spin_unlock_bh(
&br->hash_lock);
return ret;
}

/*
* 此函数先判断要插入项是否存在,
* 若是已存在,且不为静态项,具更新对应项。
* 若不存在该项,则分配一个net_bridge_fdb_entry,插入到CAM表
*/
staticint fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char*addr)
{
struct hlist_head *head =&br->hash[br_mac_hash(addr)];
struct net_bridge_fdb_entry *fdb;

//判断是否为有效的mac 地址
if (!is_valid_ether_addr(addr))
return-EINVAL;

fdb
= fdb_find(head, addr);
if (fdb) {
/* it is okay to have multiple ports with same
* address, just use the first one.
*/
if (fdb->is_local)
return0;
br_warn(br,
"adding interface %s with same address "
"as a received packet\n",
source
->dev->name);
fdb_delete(fdb);
}

if (!fdb_create(head, source, addr, 1))
return-ENOMEM;

return0;
}

  <j>

bool br_stp_recalculate_bridge_id(struct net_bridge *br)
{
const unsigned char*br_mac_zero =
(
const unsigned char*)br_mac_zero_aligned;
const unsigned char*addr = br_mac_zero;
struct net_bridge_port *p;

/* user has chosen a value so keep it */
if (br->flags & BR_SET_MAC_ADDR)
returnfalse;

//遍历桥中所有的端口
list_for_each_entry(p, &br->port_list, list) {
if (addr == br_mac_zero ||
memcmp(p
->dev->dev_addr, addr, ETH_ALEN) <0)
addr
= p->dev->dev_addr;

}

//如果不与现在桥的MAC相同
if (compare_ether_addr(br->bridge_id.addr, addr) ==0)
returnfalse; /* no change */

br_stp_change_bridge_id(br, addr);  //-->
returntrue;
}

  遍历桥对应的所有接口,然后取最小的MAC。然后判断最小MAC跟现在的MAC是否相同。

void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char*addr)
{
/* should be aligned on 2 bytes for compare_ether_addr() */
unsigned
short oldaddr_aligned[ETH_ALEN >>1];
unsigned
char*oldaddr = (unsigned char*)oldaddr_aligned;
struct net_bridge_port *p;
int wasroot;

wasroot
= br_is_root_bridge(br);

memcpy(oldaddr, br
->bridge_id.addr, ETH_ALEN);
memcpy(br
->bridge_id.addr, addr, ETH_ALEN);

//到这里,桥的MAC更新了!
memcpy(br->dev->dev_addr, addr, ETH_ALEN);

list_for_each_entry(p,
&br->port_list, list) {
if (!compare_ether_addr(p->designated_bridge.addr, oldaddr))
memcpy(p
->designated_bridge.addr, addr, ETH_ALEN);

if (!compare_ether_addr(p->designated_root.addr, oldaddr))
memcpy(p
->designated_root.addr, addr, ETH_ALEN);

}

br_configuration_update(br);
br_port_state_selection(br);
if (br_is_root_bridge(br) &&!wasroot)
br_become_root_bridge(br);
}

  

  以上的大致的网桥配置过程,配置好之后,便是发送接收数据,这里先瞧一眼网桥的接收数据的实现。

     <k>

int netdev_rx_handler_register(struct net_device *dev,
                               rx_handler_func_t *rx_handler,
                               void *rx_handler_data)
{
    ASSERT_RTNL();

    if (dev->rx_handler)
        return -EBUSY;

    rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
    rcu_assign_pointer(dev->rx_handler, rx_handler);	//回调

    return 0;
}

  利用回调,实际的数据处理函数便是:br_handle_frame

/*进行接收数据的处理*/
rx_handler_result_t br_handle_frame(
struct sk_buff **pskb)
{
struct net_bridge_port *p;
struct sk_buff *skb =*pskb;
const unsigned char*dest = eth_hdr(skb)->h_dest; //目的mac地址
br_should_route_hook_t *rhook;

if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;

if (!is_valid_ether_addr(eth_hdr(skb)->h_source))//源mac 为多播或者广播,丢弃
goto drop;

skb
= skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return RX_HANDLER_CONSUMED;

p
= br_port_get_rcu(skb->dev);

if (unlikely(is_link_local(dest))) {
/* Pause frames shouldn't be passed up by driver anyway */
if (skb->protocol == htons(ETH_P_PAUSE))
goto drop;

/* If STP is turned off, then forward */
if (p->br->stp_enabled == BR_NO_STP && dest[5] ==0)
goto forward;

if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_handle_local_finish)) {
return RX_HANDLER_CONSUMED; /* consumed by filter */
}
else {
*pskb = skb;
return RX_HANDLER_PASS; /* continue processing */
}
}

forward:
switch (p->state) {
case BR_STATE_FORWARDING: //状态为转发
rhook = rcu_dereference(br_should_route_hook);
if (rhook) {
if ((*rhook)(skb)) {
*pskb = skb;
return RX_HANDLER_PASS;
}
dest
= eth_hdr(skb)->h_dest;
}
/* fall through */
case BR_STATE_LEARNING: //状态为学习
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb
->pkt_type = PACKET_HOST;

NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb
->dev, NULL,
br_handle_frame_finish);
//-->
break;
default:
drop:
kfree_skb(skb);
}
return RX_HANDLER_CONSUMED;
}

  br_handle_frame_finish: 正常的数据包会流进br_handle_frame_finish()进行处理 :

int br_handle_frame_finish(struct sk_buff *skb)
{
//取得目的MAC地址
const unsigned char*dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
struct net_bridge *br;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
struct sk_buff *skb2;

if (!p || p->state == BR_STATE_DISABLED)
goto drop;

/* insert into forwarding database after filtering to avoid spoofing */
br
= p->br;
br_fdb_update(br, p, eth_hdr(skb)
->h_source);

if (is_multicast_ether_addr(dest) &&
br_multicast_rcv(br, p, skb))
goto drop;

if (p->state == BR_STATE_LEARNING)
goto drop;

BR_INPUT_SKB_CB(skb)
->brdev = br->dev;

/* The packet skb2 goes to the local host (NULL to skip). */
skb2
= NULL;

/*如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份,送到AF_PACKET协议处理体(网络软中断函数net_rx_action中ptype_all链的处理)*/
if (br->dev->flags & IFF_PROMISC)
skb2
= skb;

dst
= NULL;

if (is_multicast_ether_addr(dest)) { //目的mac为多播或者广播,则需要传至上层进行处理
mdst = br_mdb_get(br, skb);
if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
if ((mdst && mdst->mglist) ||
br_multicast_is_router(br))
skb2
= skb;
br_multicast_forward(mdst, skb, skb2);
skb
= NULL;
if (!skb2)
gotoout;
}
else
skb2
= skb;

br
->dev->stats.multicast++;
}
elseif ((dst = __br_fdb_get(br, dest)) && dst->is_local) { //查询CAM 表,到本机的则传至上层协议处理
skb2 = skb;
/* Do not forward the packet since it's local. */
skb
= NULL;
}

if (skb) {
if (dst)
br_forward(dst
->dst, skb, skb2); //不是本机的数据,则转发
else
br_flood_forward(br, skb, skb2);
//如果查询不到,在其它端口上都发送此包
}

if (skb2)
return br_pass_frame_up(skb2);

out:
return0;
drop:
kfree_skb(skb);
gotoout;
}

  该函数,通过查找CAM表,取得发送端口,如果当前CAM表里没有到目的MAC的端口,则在其它端口上都发送此数据包。

     在这个函数里,我们看到,查询CAM表的函数为:__br_fdb_get()

struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
const unsigned char*addr)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;

hlist_for_each_entry_rcu(fdb, h,
&br->hash[br_mac_hash(addr)], hlist) {
if (!compare_ether_addr(fdb->addr.addr, addr)) { //遍历,比较
if (unlikely(has_expired(br, fdb)))
break;
return fdb;
}
}

return NULL;
}

 首先取得目的MAC对应的哈希项。

 然后再遍历里面的数据,查看是否含有目的地址的项,fdb返回。
          如果是送给本机的数据包,则传至上层协议,
          如不是,则需要转发。

 

posted @ 2011-07-26 14:12  郝壹贰叁  阅读(301)  评论(0编辑  收藏  举报