OpenvSwitch代码分析之bridge和port

ovs-vsctl add-br br0 会在数据库里面加入新bridge的信息
ovs-vsctl add-port br0 eth0 会在数据库里面加入新的port信息

void bridge_run(void)
{
    //数据库发生变化
    if (ovsdb_idl_get_seqno(idl) != idl_seqno || vlan_splinters_changed) {
        idl_seqno = ovsdb_idl_get_seqno(idl);
            bridge_reconfigure(cfg ? cfg : &null_cfg);
    }
}

struct bridge {
    struct hmap_node node;      /* In 'all_bridges'. */
    char *name;                 /* User-specified arbitrary name. */
    char *type;                 /* Datapath type. */
    uint8_t ea[ETH_ADDR_LEN];   /* Bridge Ethernet Address. */
    uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */
    const struct ovsrec_bridge *cfg;

    /* OpenFlow switch processing. */
    struct ofproto *ofproto;    /* OpenFlow switch. */

    /* Bridge ports. */
    struct hmap ports;          /* "struct port"s indexed by name. */
    struct hmap ifaces;         /* "struct iface"s indexed by ofp_port. */
}

ofproto代表一个bridge的open flow proto
struct ofproto {
    struct hmap_node hmap_node; /* In global 'all_ofprotos' hmap. */
    /*all the operation fcuntion of the ofproto*/
    const struct ofproto_class *ofproto_class;
    char *type;                 /* Datapath type. */
    char *name;   
    uint64_t datapath_id;       /* Datapath ID. */
    struct oftable *tables;    /*flow tables*/
    int n_tables
    struct hmap ports;          /* Contains "struct ofport"s. */
    /* OpenFlow connections. */
    struct connmgr *connmgr;
}

struct ofproto_dpif {
    struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
    struct ofproto up;
    struct dpif_backer *backer;/*每个ofproto都有相同的dpif_backer, 它代表整个ovs datapath*/
}

struct dpif_backer {
    char *type;
    int refcount;
    struct dpif *dpif;
}

/*openvswitch datapath的handler, 负责与内核打交道*/
struct dpif {
    const struct dpif_class *dpif_class;
}

bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
{
    /*根据数据库更新信息添加或者删除bridge*/
    add_del_bridges(ovs_cfg);
    /*根据数据库更新信息为每个bridge收集新port或者删除port*/
    HMAP_FOR_EACH (br, node, &all_bridges) {
     /*如果是增加一个bridge,这里还会添加一个internal的local port为新的bridge*/
        bridge_collect_wanted_ports(br, splinter_vlans, &br->wanted_ports);
        bridge_del_ports(br, &br->wanted_ports);
    }

    HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) {
        if (!br->ofproto) {
            /*
            如果是新加的brdige会分配一个ofproto,实际分配oproto_dpif
            ofproto =  ofproto_dpif_class->alloc 
            ofproto->ofproto_class = ofproto_dpif_class
                 
            会获取dpif_backer,如果还没有就通过dp_if_create_and_open函数使用OVS_DP_CMD_NEW创建一个ovs-datapath,生成ovs-system设备
            从内核角度看,整个openvswitch系统就是一个datapath.
            ofproto->ofproto_class->construct(ofproto)
            */
 
            ofproto_create(br->name, br->type, &br->ofproto);
        }
    }

    HMAP_FOR_EACH (br, node, &all_bridges) {
        bridge_add_ports(br, &br->wanted_ports);
        shash_destroy(&br->wanted_ports);
    }
}

在内核看来用户态的bridge/datapath都只是整个openvswitch datapath的vport
只是新的bridge的vport是一个internal, add-port的是netdev, gre等
struct iface {
    struct list port_elem;      /* Element in struct port's "ifaces" list. */
    struct hmap_node ofp_port_node; /* In struct bridge's "ifaces" hmap. 
    struct port *port;          /* Containing port. */
    struct netdev *netdev;      /* Network device. */
    ofp_port_t ofp_port;        /* OpenFlow port number. */

    const struct ovsrec_interface *cfg; /*cfg for interface*/
};

struct port {
    struct bridge *bridge;
    char *name;
    struct hmap_node hmap_node; /* Element in struct bridge's "ports" hmap. */

    const struct ovsrec_port *cfg /*cfg for port*/
    /* An ordinary bridge port has 1 interface.
     * A bridge port for bonding has at least 2 interfaces. */
    struct list ifaces;         /* List of "struct iface"s. */
};

bridge_add_ports--->iface_create
{
    /*create interface*/
    iface_do_create(br, iface_cfg, port_cfg, &ofp_port, &netdev);
    iface->netdev = netdev;
    hmap_insert(&br->ifaces, &iface->ofp_port_node, hash_ofp_port(ofp_port));

    /*create port*/
    port = port_create(br, port_cfg)
    //hmap_insert(&br->ports, &port->hmap_node, hash_string(port->name, 0));
    list_push_back(&port->ifaces, &iface->port_elem);
}

struct netdev {
    /* The following do not change during the lifetime of a struct netdev. */
    char *name;                         /* Name of network device. */
    const struct netdev_class *netdev_class; 
}

struct netdev_linux {
    struct netdev up;
}

const struct netdev_class netdev_linux_class =
    NETDEV_LINUX_CLASS(
        "system",
        netdev_linux_construct,
        netdev_linux_get_stats,
        NULL,                    /* set_stats */
        netdev_linux_get_features,
        netdev_linux_get_status);

const struct netdev_class netdev_internal_class =
    NETDEV_LINUX_CLASS(
        "internal",
        netdev_linux_construct,
        netdev_internal_get_stats,
        netdev_internal_set_stats,
        NULL,                  /* get_features */
        netdev_internal_get_status);
}

/*gre*/
struct netdev_vport {
    struct netdev up;

    /* Tunnels. */
    struct netdev_tunnel_config tnl_cfg;
};
TUNNEL_CLASS("gre", "gre_system"),

int iface_do_create()
{
    /*根据type(internal, netdev, gre)获取interface 的netdev*/
    netdev_open(iface_cfg->name, iface_get_type(iface_cfg, br->cfg), &netdev);
    //netdev = rc->class->alloc()实际分配的是netdev_linux, gre是netdev_vport
    //rc->class->construct(netdev);对于netdev这种设备要存在, internal和gre是后面创建的

    /*设置interface options 只有gre设备才有*/
    error = iface_set_netdev_config(iface_cfg, netdev);

    ofproto_port_add(br->ofproto, netdev, ofp_portp);
    /*获取ofport, 并且将ofport与dp_port做映射 ofport_install-->port_construct分配ofport*/
    update_port(ofproto, netdev_name);
}

update_port
{
    ofport_open(ofproto, &ofproto_port, &pp);
    /*
        //allocate the ofport
        if (ofproto_port->ofp_port == OFPP_NONE) {
        if (!strcmp(ofproto->name, ofproto_port->name)) {
            ofproto_port->ofp_port = OFPP_LOCAL;
        } else {
            ofproto_port->ofp_port = alloc_ofp_port(ofproto,
                                                    ofproto_port->name);
        }
    }
    */
    ofport_install(ofproto, netdev, &pp);
}

struct ofport {
    struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */
    struct ofproto *ofproto;    /* The ofproto that contains this port. */
    struct netdev *netdev;
    struct ofputil_phy_port pp;
    ofp_port_t ofp_port;        /* OpenFlow port number. */
};

struct ofport_dpif {
    struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
    struct ofport up;
    /*datapath port*/
    odp_port_t odp_port;
}

ofport_install(struct ofproto *p, struct netdev *netdev, const struct ofputil_phy_port *pp)
{
    /*实际分配ofport_dpif*/
    ofport = p->ofproto_class->port_alloc();
    ofport->ofproto = p;
    ofport->netdev = netdev;
    ofport->pp = *pp;
    ofport->ofp_port = pp->port_no;
    ofport->created = time_msec();

    /* Add port to 'p'. */
    hmap_insert(&p->ports, &ofport->hmap_node, hash_ofp_port(ofport->ofp_port));
    /*map the ofport with dp_port*/
    error = p->ofproto_class->port_construct(ofport);
}

ofproto_port_add--->ofproto->ofproto_class->port_add
port_add
{
    /*netdev和internal都是获取的设备名, gre是获取的gre_system, 所以gre设备只会向add_port一次*/
    dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
    if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name))
    {
        /*这里是port_no是openvswitch datapath的port no*/
        dpif_port_add(ofproto->backer->dpif, netdev, &port_no)
        //dpif->dpif_class->port_add(dpif, netdev, &port_no);
    }
}

dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
                    odp_port_t *port_nop)
{
    使用OVS_VPORT_CMD_NEW创建vport于内核中
}

///////////////////////////////////
内核态
int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
    parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
    parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
    parms.options = a[OVS_VPORT_ATTR_OPTIONS];
    parms.dp = dp;
    parms.port_no = port_no;
    parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);

    vport = new_vport(&parms);--->ovs_vport_add(parms);--->vport_ops->create
}

//normal netdev: eth0 veth0 
const struct vport_ops ovs_netdev_vport_ops = {
    .type        = OVS_VPORT_TYPE_NETDEV,
    .create        = netdev_create,
    .destroy    = netdev_destroy,
    .get_name    = ovs_netdev_get_name,
    .send        = netdev_send,/*当报文的需要通过该vport发送的处理函数*/
};
struct vport *netdev_create(const struct vport_parms *parms)
{
     vport = ovs_vport_alloc(sizeof(struct netdev_vport), &ovs_netdev_vport_ops, parms);
    
     netdev_vport = netdev_vport_priv(vport);
     /*find the real dev*/
     netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
     /*设置ovs内核处理函数, 使netdev收到的报文不再走传统协议栈*/
     netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, vport)
}

static int netdev_send(struct vport *vport, struct sk_buff *skb)
{
    struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
    int mtu = netdev_vport->dev->mtu;
    int len;
   
    skb->dev = netdev_vport->dev;
    len = skb->len;
    /*直接从该dev发送*/
    dev_queue_xmit(skb);
      
}

//internal: bridge port
const struct vport_ops ovs_internal_vport_ops = {
    .type        = OVS_VPORT_TYPE_INTERNAL,
    .create        = internal_dev_create,
    .destroy    = internal_dev_destroy,
    .get_name    = ovs_netdev_get_name,
    .send        = internal_dev_recv,
};

truct net_device_ops internal_dev_netdev_ops = {
    .ndo_start_xmit = internal_dev_xmit,
};

/*internal net device发送函数就是ovs的处理函数*/
static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
    ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
}

struct vport *internal_dev_create(const struct vport_parms *parms)
{
    vport = ovs_vport_alloc(sizeof(struct netdev_vport), &ovs_internal_vport_ops, parms);
    netdev_vport = netdev_vport_priv(vport);

    /*创建一个ethernet device, netdev->netdev_ops = &internal_dev_netdev_ops;*/
    netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev), parms->name, do_setup)
    internal_dev = internal_dev_priv(netdev_vport->dev);
    internal_dev->vport = vport;
    
    err = register_netdevice(netdev_vport->dev);
}
/*如果报文指定从internal设备发出, 数据包会重新注入协议栈*/
int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
{
    skb->dev = netdev;
    skb->pkt_type = PACKET_HOST;
    skb->protocol = eth_type_trans(skb, netdev);
    skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);

    netif_rx(skb);
}

//gre port
const struct vport_ops ovs_gre_vport_ops = {
    .type        = OVS_VPORT_TYPE_GRE,
    .create        = gre_create,
    .destroy    = gre_tnl_destroy,
    .get_name    = gre_get_name,
    .send        = gre_send,
};

整个系统只创建一个gre device在内核
struct vport *gre_create(const struct vport_parms *parms)
{
    ovs_net = net_generic(net, ovs_net_id);
    if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
        vport = ERR_PTR(-EEXIST);
        goto error;
    }

    vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
}

发送
报文通过gre口发送会根据下发具体action规则添加gre header以及deliver header
int gre_send(struct vport *vport, struct sk_buff *skb)

接收
当系统收到gre报文,去掉了deliver header获取gre header信息传递给gre_rcv,然后进入ovs处理函数进行处理

OpenvSwitch代码分析之bridge和port - 蒙奇D小豌豆 - 蒙奇D小豌豆
 
http://wenx05124561.blog.163.com/blog/static/124000805201471652943681/
posted @ 2014-11-06 20:31  midu  阅读(1959)  评论(0编辑  收藏  举报