OVS 内核KEY值提取及匹配流表代码分析

原文链接:http://ry0117.com/2016/12/24/OVS内核KEY值提取及匹配流表代码分析/

 

当开启OVS后,创建datapath类型为system的网桥并他添加相关接口,OVS网桥内接口在网卡接收到数据包后,数据包会先到OVS的内核模块openvswitch内,从数据包上提取key值,并使用key值匹配OVS内核模块中的流表,当匹配到相应的流表后,则执行流表上相应的动作;

当在OVS内核缓存中匹配不到流表,则将key值信息通过NetLink发送给用户态的ovs-vswitchd守护进程,由其来决定如何处理数据包。

下面就Linux-3.19版本内核中OpenvSwitch内核模块中的提取Key值、匹配流表及执行流表动作相关的代码做一下分析。

 

 

提取KEY值(datapath/flow.c)

Key值信息是匹配流表的前提,key值中包括很多的信息,包括源MAC地址、目的MAC地址、VLAN信息、协议类型、源IP地址,目的IP地址、端口号等信息,所有的key值都可以从skb数据包中提取到。

  1 int
  2 ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
  3              struct sk_buff *skb, struct sw_flow_key *key)
  4 {
  5     /* Extract metadata from packet. */
  6     if (tun_info) {
  7         memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
  8         if (tun_info->options) {
  9             BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
 10                            8)) - 1
 11                     > sizeof(key->tun_opts));
 12             memcpy(GENEVE_OPTS(key, tun_info->options_len),
 13                    tun_info->options, tun_info->options_len);
 14             key->tun_opts_len = tun_info->options_len;
 15         } else {
 16             key->tun_opts_len = 0;
 17         }
 18     } else  {
 19         key->tun_opts_len = 0;
 20         memset(&key->tun_key, 0, sizeof(key->tun_key));
 21     }
 22     /*根据skb相关信息,给key的相关变量赋值*/
 23     key->phy.priority = skb->priority;
 24     /*设置key->phy.in_port为vport的接口序号*/
 25     key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
 26     key->phy.skb_mark = skb->mark;
 27     key->ovs_flow_hash = 0;
 28     key->recirc_id = 0;
 29     /*提取skb中的MAC、协议、IP地址、端口信息等key值*/
 30     return key_extract(skb, key);
 31 }
 32 /**
 33  * key_extract - extracts a flow key from an Ethernet frame.
 34  * @skb: sk_buff that contains the frame, with skb->data pointing to the
 35  * Ethernet header
 36  * @key: output flow key
 37  *
 38  * The caller must ensure that skb->len >= ETH_HLEN.
 39  *
 40  * Returns 0 if successful, otherwise a negative errno value.
 41  *
 42  * Initializes @skb header pointers as follows:
 43  *
 44  *    - skb->mac_header: the Ethernet header.
 45  *
 46  *    - skb->network_header: just past the Ethernet header, or just past the
 47  *      VLAN header, to the first byte of the Ethernet payload.
 48  *
 49  *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
 50  *      on output, then just past the IP header, if one is present and
 51  *      of a correct length, otherwise the same as skb->network_header.
 52  *      For other key->eth.type values it is left untouched.
 53  */
 54 static int
 55 key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 56 {
 57     int error;
 58     struct ethhdr *eth;
 59     /* Flags are always used as part of stats */
 60     key->tp.flags = 0;
 61     /*重置L2层头指针*/
 62     skb_reset_mac_header(skb);
 63     /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
 64      * header in the linear data area.
 65      */
 66     /*获取二层头部指针,并提取源MAC及目的MAC信息到key中*/
 67     eth = eth_hdr(skb);
 68     ether_addr_copy(key->eth.src, eth->h_source);
 69     ether_addr_copy(key->eth.dst, eth->h_dest);
 70     /*将skb->data指向L2的MAC地址结束的地址处,
 71     * 如果带有VLAN信息,则skb->data指向vlan相关信息的开头
 72     * 如果不带vlan信息,则skb->data则指向eth.type字段处*/
 73     __skb_pull(skb, 2 * ETH_ALEN);
 74     /* We are going to push all headers that we pull, so no need to
 75      * update skb->csum here.
 76      */
 77     /*提取vlan信息到key中*/
 78     key->eth.tci = 0;
 79     if (vlan_tx_tag_present(skb))
 80         key->eth.tci = htons(skb->vlan_tci);
 81     else if (eth->h_proto == htons(ETH_P_8021Q))
 82         if (unlikely(parse_vlan(skb, key)))
 83             return -ENOMEM;
 84     /*提取ether type数据包类型如ETH_P_IP、ETH_P_ARP、ETH_P_IPV6等*/
 85     key->eth.type = parse_ethertype(skb);
 86     if (unlikely(key->eth.type == htons(0)))
 87         return -ENOMEM;
 88     /*重置L3头部指针及MAC长度,保证skb->network_header指向正确的位置*/
 89     skb_reset_network_header(skb);
 90     skb_reset_mac_len(skb);
 91     __skb_push(skb, skb->data - skb_mac_header(skb));
 92     /* Network layer. */
 93     /*IP协议数据包*/
 94     if (key->eth.type == htons(ETH_P_IP)) {
 95         struct iphdr *nh;
 96         __be16 offset;
 97         /*检查IP数据包的合法性,若合法则设置skb->transport_header*/
 98         error = check_iphdr(skb);
 99         if (unlikely(error)) {
100             /*不合法的IP数据包*/
101             memset(&key->ip, 0, sizeof(key->ip));
102             memset(&key->ipv4, 0, sizeof(key->ipv4));
103             if (error == -EINVAL) {
104                 /* 此处不知道为何将L4头设置为L3层头部,
105                 * 也不知道为何error=0,后面搞清楚了在回来修改*/
106                 skb->transport_header = skb->network_header;
107                 error = 0;
108             }
109             return error;
110         }
111         /*获取L3层头部指针,并提取源IP及目的IP信息到key中*/
112         nh = ip_hdr(skb);
113         key->ipv4.addr.src = nh->saddr;
114         key->ipv4.addr.dst = nh->daddr;
115         /*提取IP的四层协议信息、TOS及ttl信息到key中*/
116         key->ip.proto = nh->protocol;
117         key->ip.tos = nh->tos;
118         key->ip.ttl = nh->ttl;
119         /*
120         * 从L3层中提取IP分片信息
121         * 对IP分片中的几个标志不是很清楚,暂时不做说明
122         */
123         offset = nh->frag_off & htons(IP_OFFSET);
124         if (offset) {
125             key->ip.frag = OVS_FRAG_TYPE_LATER;
126             return 0;
127         }
128         if (nh->frag_off & htons(IP_MF) ||
129             skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
130             key->ip.frag = OVS_FRAG_TYPE_FIRST;
131         else
132             key->ip.frag = OVS_FRAG_TYPE_NONE;
133         /* Transport layer. */
134         /*TCP协议数据包*/
135         if (key->ip.proto == IPPROTO_TCP) {
136             if (tcphdr_ok(skb)) {
137                 /*获取tcp四层头部,提取源端口及目的端口信息到key中*/
138                 struct tcphdr *tcp = tcp_hdr(skb);
139                 key->tp.src = tcp->source;
140                 key->tp.dst = tcp->dest;
141                 key->tp.flags = TCP_FLAGS_BE16(tcp);
142             } else {
143                 memset(&key->tp, 0, sizeof(key->tp));
144             }
145         /*UDP协议数据包*/
146         } else if (key->ip.proto == IPPROTO_UDP) {
147             if (udphdr_ok(skb)) {
148                 /*获取UDP四层头部,提取源端口及目的端口信息到key中*/
149                 struct udphdr *udp = udp_hdr(skb);
150                 key->tp.src = udp->source;
151                 key->tp.dst = udp->dest;
152             } else {
153                 memset(&key->tp, 0, sizeof(key->tp));
154             }
155         /*SCTP协议数据包*/
156         } else if (key->ip.proto == IPPROTO_SCTP) {
157             if (sctphdr_ok(skb)) {
158                 /*获取SCTP四层头部,提取源端口及目的端口到key中*/
159                 struct sctphdr *sctp = sctp_hdr(skb);
160                 key->tp.src = sctp->source;
161                 key->tp.dst = sctp->dest;
162             } else {
163                 memset(&key->tp, 0, sizeof(key->tp));
164             }
165         /*ICMP协议数据包*/
166         } else if (key->ip.proto == IPPROTO_ICMP) {
167             if (icmphdr_ok(skb)) {
168                 /*获取ICMP头部,并提取ICMP 类型及代码字段到key中*/
169                 struct icmphdr *icmp = icmp_hdr(skb);
170                 /* The ICMP type and code fields use the 16-bit
171                  * transport port fields, so we need to store
172                  * them in 16-bit network byte order. */
173                 key->tp.src = htons(icmp->type);
174                 key->tp.dst = htons(icmp->code);
175             } else {
176                 memset(&key->tp, 0, sizeof(key->tp));
177             }
178         }
179     /*ARP协议或者RARP协议数据包*/
180     } else if (key->eth.type == htons(ETH_P_ARP) ||
181            key->eth.type == htons(ETH_P_RARP)) {
182         struct arp_eth_header *arp;
183         bool arp_available = arphdr_ok(skb);
184         /*获取ARP头部指针*/
185         arp = (struct arp_eth_header *)skb_network_header(skb);
186         if (arp_available &&
187             arp->ar_hrd == htons(ARPHRD_ETHER) &&
188             arp->ar_pro == htons(ETH_P_IP) &&
189             arp->ar_hln == ETH_ALEN &&
190             arp->ar_pln == 4) {
191             /*提取ARP option字段到key中*/
192             /* We only match on the lower 8 bits of the opcode. */
193             if (ntohs(arp->ar_op) <= 0xff)
194                 key->ip.proto = ntohs(arp->ar_op);
195             else
196                 key->ip.proto = 0;
197             /*提取源MAC、目的MAC、源IP及目的MAC信息到key中*/
198             memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
199             memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
200             ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
201             ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
202         } else {
203             memset(&key->ip, 0, sizeof(key->ip));
204             memset(&key->ipv4, 0, sizeof(key->ipv4));
205         }
206     /*去提取MPLS信息到key中*/
207     } else if (eth_p_mpls(key->eth.type)) {
208         size_t stack_len = MPLS_HLEN;
209         /* In the presence of an MPLS label stack the end of the L2
210          * header and the beginning of the L3 header differ.
211          *
212          * Advance network_header to the beginning of the L3
213          * header. mac_len corresponds to the end of the L2 header.
214          */
215         while (1) {
216             __be32 lse;
217             error = check_header(skb, skb->mac_len + stack_len);
218             if (unlikely(error))
219                 return 0;
220             memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
221             if (stack_len == MPLS_HLEN)
222                 memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
223             skb_set_network_header(skb, skb->mac_len + stack_len);
224             if (lse & htonl(MPLS_LS_S_MASK))
225                 break;
226             stack_len += MPLS_HLEN;
227         }
228     /*IPv6协议,提取IPv6相关信息到key中 */
229     } else if (key->eth.type == htons(ETH_P_IPV6)) {
230         int nh_len;             /* IPv6 Header + Extensions */
231         nh_len = parse_ipv6hdr(skb, key);
232         if (unlikely(nh_len < 0)) {
233             memset(&key->ip, 0, sizeof(key->ip));
234             memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
235             if (nh_len == -EINVAL) {
236                 skb->transport_header = skb->network_header;
237                 error = 0;
238             } else {
239                 error = nh_len;
240             }
241             return error;
242         }
243         if (key->ip.frag == OVS_FRAG_TYPE_LATER)
244             return 0;
245         if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
246             key->ip.frag = OVS_FRAG_TYPE_FIRST;
247         /* Transport layer. */
248         if (key->ip.proto == NEXTHDR_TCP) {
249             if (tcphdr_ok(skb)) {
250                 struct tcphdr *tcp = tcp_hdr(skb);
251                 key->tp.src = tcp->source;
252                 key->tp.dst = tcp->dest;
253                 key->tp.flags = TCP_FLAGS_BE16(tcp);
254             } else {
255                 memset(&key->tp, 0, sizeof(key->tp));
256             }
257         } else if (key->ip.proto == NEXTHDR_UDP) {
258             if (udphdr_ok(skb)) {
259                 struct udphdr *udp = udp_hdr(skb);
260                 key->tp.src = udp->source;
261                 key->tp.dst = udp->dest;
262             } else {
263                 memset(&key->tp, 0, sizeof(key->tp));
264             }
265         } else if (key->ip.proto == NEXTHDR_SCTP) {
266             if (sctphdr_ok(skb)) {
267                 struct sctphdr *sctp = sctp_hdr(skb);
268                 key->tp.src = sctp->source;
269                 key->tp.dst = sctp->dest;
270             } else {
271                 memset(&key->tp, 0, sizeof(key->tp));
272             }
273         } else if (key->ip.proto == NEXTHDR_ICMP) {
274             if (icmp6hdr_ok(skb)) {
275                 error = parse_icmpv6(skb, key, nh_len);
276                 if (error)
277                     return error;
278             } else {
279                 memset(&key->tp, 0, sizeof(key->tp));
280             }
281         }
282     }
283     return 0;
284 }

 

根据KEY值匹配流表(datapath/datapath.c)

通过ovs_flow_key_extract函数及key_extract函数从skb中提取所有需要的key值,下面就是使用key值来匹配OVS内核模块openvswitch中缓存的流表信息,并在匹配到流表后执行流表中相应的动作处理数据包。若在内核中未匹配到流表,则通过Netlink消息将key值发送到用户态ovs-vswitchd进程,由用户态进程来决定如何处理数据包

 1 /* Must be called with rcu_read_lock. */
 2 void
 3 ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 4 {
 5     const struct vport *p = OVS_CB(skb)->input_vport;
 6     struct datapath *dp = p->dp;
 7     struct sw_flow *flow;
 8     struct sw_flow_actions *sf_acts;
 9     struct dp_stats_percpu *stats;
10     u64 *stats_counter;
11     u32 n_mask_hit;
12     /*获取每CPU变量dp->stats_percpu*/
13     stats = this_cpu_ptr(dp->stats_percpu);
14     /* Look up flow. */
15     /*根据key值遍历所有的流表*/
16     flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
17     if (unlikely(!flow)) {
18         /*
19         * 未匹配到任何流表,则将key值封装到Netlink消息中通过
20         * netlink发送到用户态ovs-vswitchd进程
21         * 由用户态进程来决定如何处理数据包
22         */
23         struct dp_upcall_info upcall;
24         int error;
25         upcall.cmd = OVS_PACKET_CMD_MISS;
26         upcall.userdata = NULL;
27         upcall.portid = ovs_vport_find_upcall_portid(p, skb);
28         upcall.egress_tun_info = NULL;
29         /*封装Netlink消息并发送给用户态ovs-vswitchd进程*/
30         error = ovs_dp_upcall(dp, skb, key, &upcall);
31         if (unlikely(error))
32             kfree_skb(skb);
33         else
34             consume_skb(skb);
35         stats_counter = &stats->n_missed;
36         goto out;
37     }
38     /*查询到流表后,更新相关流表的信息,包括流表匹配的包数及字节数*/
39     ovs_flow_stats_update(flow, key->tp.flags, skb);
40     /*获取匹配的流表的执行动作*/
41     sf_acts = rcu_dereference(flow->sf_acts);
42     /*执行匹配流表的动作*/
43     ovs_execute_actions(dp, skb, sf_acts, key);
44     stats_counter = &stats->n_hit;
45 out:
46     /* Update datapath statistics. */
47     u64_stats_update_begin(&stats->syncp);
48     (*stats_counter)++;
49     stats->n_mask_hit += n_mask_hit;
50     u64_stats_update_end(&stats->syncp);
51 }

 

执行流表ACTION(datapath/actions.c)

匹配到对应的流表后,从流表中获取流表的动作,循环遍历所有的flow action,执行相应的action动作。

 

  1 /* Execute a list of actions against 'skb'. */
  2 int
  3 ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
  4             const struct sw_flow_actions *acts,
  5             struct sw_flow_key *key)
  6 {
  7     int level = this_cpu_read(exec_actions_level);
  8     int err;
  9     this_cpu_inc(exec_actions_level);
 10     OVS_CB(skb)->egress_tun_info = NULL;
 11     /*执行流表动作*/
 12     err = do_execute_actions(dp, skb, key,
 13                  acts->actions, acts->actions_len);
 14     /*不知道这个process_deferred_actions具体是干什么的*/
 15     if (!level)
 16         process_deferred_actions(dp);
 17     this_cpu_dec(exec_actions_level);
 18     return err;
 19 }
 20 /* Execute a list of actions against 'skb'. */
 21 static int
 22 do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 23                   struct sw_flow_key *key,
 24                   const struct nlattr *attr, int len)
 25 {
 26     /* Every output action needs a separate clone of 'skb', but the common
 27      * case is just a single output action, so that doing a clone and
 28      * then freeing the original skbuff is wasteful.  So the following code
 29      * is slightly obscure just to avoid that.
 30      */
 31     int prev_port = -1;
 32     const struct nlattr *a;
 33     int rem;
 34     for (a = attr, rem = len; rem > 0;
 35          a = nla_next(a, &rem)) {
 36         int err = 0;
 37         if (unlikely(prev_port != -1)) {
 38             /*设置了output接口,克隆一份skb将数据包从
 39             * prv_port接口发送出去*/
 40             struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
 41             if (out_skb)
 42                 do_output(dp, out_skb, prev_port);
 43             prev_port = -1;
 44         }
 45         switch (nla_type(a)) {
 46         /*数据包发送的端口号*/
 47         case OVS_ACTION_ATTR_OUTPUT:
 48             prev_port = nla_get_u32(a);
 49             break;
 50         /*将数据包发送到用户态进程*/
 51         case OVS_ACTION_ATTR_USERSPACE:
 52             output_userspace(dp, skb, key, a);
 53             break;
 54         /*为key->ovs_flow_hash赋值*/
 55         case OVS_ACTION_ATTR_HASH:
 56             execute_hash(skb, key, a);
 57             break;
 58         /*MPLS处理,不了解MPLS,忽略*/
 59         case OVS_ACTION_ATTR_PUSH_MPLS:
 60             err = push_mpls(skb, key, nla_data(a));
 61             break;
 62         /*MPLS处理,忽略*/
 63         case OVS_ACTION_ATTR_POP_MPLS:
 64             err = pop_mpls(skb, key, nla_get_be16(a));
 65             break;
 66         /*设置VLAN tag*/
 67         case OVS_ACTION_ATTR_PUSH_VLAN:
 68             err = push_vlan(skb, key, nla_data(a));
 69             break;
 70         /*去Vlan tag*/
 71         case OVS_ACTION_ATTR_POP_VLAN:
 72             err = pop_vlan(skb, key);
 73             break;
 74         /*将skb及key添加到defered action中*/
 75         case OVS_ACTION_ATTR_RECIRC:
 76             err = execute_recirc(dp, skb, key, a, rem);
 77             if (nla_is_last(a, rem)) {
 78                 /* If this is the last action, the skb has
 79                  * been consumed or freed.
 80                  * Return immediately.
 81                  */
 82                 return err;
 83             }
 84             break;
 85         /*根据修改的动作,对数据包进行修改*/
 86         case OVS_ACTION_ATTR_SET:
 87             err = execute_set_action(skb, key, nla_data(a));
 88             break;
 89         case OVS_ACTION_ATTR_SAMPLE:
 90             err = sample(dp, skb, key, a);
 91             break;
 92         }
 93         if (unlikely(err)) {
 94             kfree_skb(skb);
 95             return err;
 96         }
 97     }
 98     if (prev_port != -1)
 99         do_output(dp, skb, prev_port);
100     else
101         consume_skb(skb);
102     return 0;
103 }

 

OUTPUT ACTION(datapath/actions.c)

流表的OUTPUT动作指定了数据包发送的出接口信息,调用do_output->ovs_vport_send->vport->ops->send发送函数将数据包从output action对应的接口发送出去。

 1 /*do_outpu发送数据包*/
 2 static void
 3 do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 4 {
 5     struct vport *vport = ovs_vport_rcu(dp, out_port);
 6     if (likely(vport))
 7         ovs_vport_send(vport, skb);
 8     else
 9         kfree_skb(skb);
10 }
11 /**
12  *    ovs_vport_send - send a packet on a device
13  *
14  * @vport: vport on which to send the packet
15  * @skb: skb to send
16  *
17  * Sends the given packet and returns the length of data sent.  Either ovs
18  * lock or rcu_read_lock must be held.
19  */
20 int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
21 {
22     /* 调用vport->ops->send回调函数发送数据包 */ 
23     int sent = vport->ops->send(vport, skb);
24     if (likely(sent > 0)) {
25         struct pcpu_sw_netstats *stats;
26         /*发送成功后更新每CPU变量vport->percpu_stats中的发送包数及发送字节数*/
27         stats = this_cpu_ptr(vport->percpu_stats);
28         u64_stats_update_begin(&stats->syncp);
29         stats->tx_packets++;
30         stats->tx_bytes += sent;
31         u64_stats_update_end(&stats->syncp);
32     } else if (sent < 0) {
33         ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
34     } else {
35         ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
36     }
37     return sent;
38 }

 

当OVS接口类型为system时,vport->ops->send函数为netdev_send:

 

 1 /*此函数即为OVS流表output action 发送数据包时的函数*/
 2 static int
 3 netdev_send(struct vport *vport, struct sk_buff *skb)
 4 {
 5     struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 6     int mtu = netdev_vport->dev->mtu;
 7     int len;
 8     /*如果未开启gso且数据包长度大于MTU,则释放数据包*/
 9     if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
10         net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
11                      netdev_vport->dev->name,
12                      packet_length(skb), mtu);
13         goto drop;
14     }
15     /*设置skb->dev为output action网口*/
16     skb->dev = netdev_vport->dev;
17     len = skb->len;
18     /*最后调用dev_queue_xmit发送数据包*/
19     dev_queue_xmit(skb);
20     return len;
21 drop:
22     kfree_skb(skb);
23     return 0;
24 }

 

SET ACTION(datapath/actions.c)

流表SET动作会修改数据包中指定的信息,如skb->priority skb->mark等信息。

 1 static int
 2 execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
 3                   const struct nlattr *nested_attr)
 4 {
 5     int err = 0;
 6     switch (nla_type(nested_attr)) {
 7     case OVS_KEY_ATTR_PRIORITY:
 8         skb->priority = nla_get_u32(nested_attr);
 9         key->phy.priority = skb->priority;
10         break;
11     case OVS_KEY_ATTR_SKB_MARK:
12         skb->mark = nla_get_u32(nested_attr);
13         key->phy.skb_mark = skb->mark;
14         break;
15     case OVS_KEY_ATTR_TUNNEL_INFO:
16         OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
17         break;
18     case OVS_KEY_ATTR_ETHERNET:
19         err = set_eth_addr(skb, key, nla_data(nested_attr));
20         break;
21     case OVS_KEY_ATTR_IPV4:
22         err = set_ipv4(skb, key, nla_data(nested_attr));
23         break;
24     case OVS_KEY_ATTR_IPV6:
25         err = set_ipv6(skb, key, nla_data(nested_attr));
26         break;
27     case OVS_KEY_ATTR_TCP:
28         err = set_tcp(skb, key, nla_data(nested_attr));
29         break;
30     case OVS_KEY_ATTR_UDP:
31         err = set_udp(skb, key, nla_data(nested_attr));
32         break;
33     case OVS_KEY_ATTR_SCTP:
34         err = set_sctp(skb, key, nla_data(nested_attr));
35         break;
36     case OVS_KEY_ATTR_MPLS:
37         err = set_mpls(skb, key, nla_data(nested_attr));
38         break;
39     }
40     return err;
41 }

 

PUSH_VLAN ACTION(datapath/actions.c)

流表PUSH_VLAN动作会在数据包中添加对应的VLAN tag信息。

 1 static int
 2 push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 3              const struct ovs_action_push_vlan *vlan)
 4 {
 5     if (vlan_tx_tag_present(skb))
 6         invalidate_flow_key(key);
 7     else
 8         key->eth.tci = vlan->vlan_tci;
 9     return skb_vlan_push(skb, vlan->vlan_tpid,
10                  ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
11 }
12 int
13 skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
14 {
15     if (vlan_tx_tag_present(skb)) {
16         unsigned int offset = skb->data - skb_mac_header(skb);
17         int err;
18         /* __vlan_insert_tag expect skb->data pointing to mac header.
19          * So change skb->data before calling it and change back to
20          * original position later
21          */
22         __skb_push(skb, offset);
23         err = __vlan_insert_tag(skb, skb->vlan_proto,
24                     vlan_tx_tag_get(skb));
25         if (err)
26             return err;
27         skb->protocol = skb->vlan_proto;
28         skb->mac_len += VLAN_HLEN;
29         __skb_pull(skb, offset);
30         if (skb->ip_summed == CHECKSUM_COMPLETE)
31             skb->csum = csum_add(skb->csum, csum_partial(skb->data
32                     + (2 * ETH_ALEN), VLAN_HLEN, 0));
33     }
34     __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
35     return 0;
36 }
37 static inline void
38 __vlan_hwaccel_put_tag(struct sk_buff *skb,
39                       __be16 vlan_proto, u16 vlan_tci)
40 {
41     /*设置数据包Vlan tag信息*/
42     skb->vlan_proto = vlan_proto;
43     skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
44 }

 

POP_VLAN ACTION(datapath/actions.c)

流表POP_VLAN动作移除数据包中的Vlan tag信息并更新数据包中的校验和

 1 static int
 2 pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 3 {
 4     int err;
 5     err = skb_vlan_pop(skb);
 6     if (vlan_tx_tag_present(skb))
 7         invalidate_flow_key(key);
 8     else
 9         key->eth.tci = 0;
10     return err;
11 }
12 int
13 skb_vlan_pop(struct sk_buff *skb)
14 {
15     u16 vlan_tci;
16     __be16 vlan_proto;
17     int err;
18     if (likely(vlan_tx_tag_present(skb))) {
19         skb->vlan_tci = 0;
20     } else {
21         if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
22                   skb->protocol != htons(ETH_P_8021AD)) ||
23                  skb->len < VLAN_ETH_HLEN))
24             return 0;
25         err = __skb_vlan_pop(skb, &vlan_tci);
26         if (err)
27             return err;
28     }
29     /* move next vlan tag to hw accel tag */
30     if (likely((skb->protocol != htons(ETH_P_8021Q) &&
31             skb->protocol != htons(ETH_P_8021AD)) ||
32            skb->len < VLAN_ETH_HLEN))
33         return 0;
34     vlan_proto = skb->protocol;
35     err = __skb_vlan_pop(skb, &vlan_tci);
36     if (unlikely(err))
37         return err;
38     __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
39     return 0;
40 }
41 /* remove VLAN header from packet and update csum accordingly. */
42 static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
43 {
44     struct vlan_hdr *vhdr;
45     unsigned int offset = skb->data - skb_mac_header(skb);
46     int err;
47     __skb_push(skb, offset);
48     err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
49     if (unlikely(err))
50         goto pull;
51     
52     skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
53     vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
54     *vlan_tci = ntohs(vhdr->h_vlan_TCI);
55     memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
56     __skb_pull(skb, VLAN_HLEN);
57     vlan_set_encap_proto(skb, vhdr);
58     skb->mac_header += VLAN_HLEN;
59     if (skb_network_offset(skb) < ETH_HLEN)
60         skb_set_network_header(skb, ETH_HLEN);
61     skb_reset_mac_len(skb);
62 pull:
63     __skb_pull(skb, offset);
64     return err;
65 }

 

posted @ 2019-01-04 17:04  salami_china  阅读(1613)  评论(0编辑  收藏  举报