OVS 内核KEY值提取及匹配流表代码分析
原文链接:http://ry0117.com/2016/12/24/OVS内核KEY值提取及匹配流表代码分析/
当开启OVS后,创建datapath
类型为system
的网桥并他添加相关接口,OVS网桥内接口在网卡接收到数据包后,数据包会先到OVS的内核模块openvswitch
内,从数据包上提取key值,并使用key值匹配OVS内核模块中的流表,当匹配到相应的流表后,则执行流表上相应的动作;
当在OVS内核缓存中匹配不到流表,则将key值信息通过NetLink发送给用户态的ovs-vswitchd
守护进程,由其来决定如何处理数据包。
下面就Linux-3.19版本内核中OpenvSwitch内核模块中的提取Key值、匹配流表及执行流表动作相关的代码做一下分析。
提取KEY值(datapath/flow.c)
Key值信息是匹配流表的前提,key值中包括很多的信息,包括源MAC地址、目的MAC地址、VLAN信息、协议类型、源IP地址,目的IP地址、端口号等信息,所有的key值都可以从skb数据包中提取到。
1 int 2 ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, 3 struct sk_buff *skb, struct sw_flow_key *key) 4 { 5 /* Extract metadata from packet. */ 6 if (tun_info) { 7 memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key)); 8 if (tun_info->options) { 9 BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 10 8)) - 1 11 > sizeof(key->tun_opts)); 12 memcpy(GENEVE_OPTS(key, tun_info->options_len), 13 tun_info->options, tun_info->options_len); 14 key->tun_opts_len = tun_info->options_len; 15 } else { 16 key->tun_opts_len = 0; 17 } 18 } else { 19 key->tun_opts_len = 0; 20 memset(&key->tun_key, 0, sizeof(key->tun_key)); 21 } 22 /*根据skb相关信息,给key的相关变量赋值*/ 23 key->phy.priority = skb->priority; 24 /*设置key->phy.in_port为vport的接口序号*/ 25 key->phy.in_port = OVS_CB(skb)->input_vport->port_no; 26 key->phy.skb_mark = skb->mark; 27 key->ovs_flow_hash = 0; 28 key->recirc_id = 0; 29 /*提取skb中的MAC、协议、IP地址、端口信息等key值*/ 30 return key_extract(skb, key); 31 } 32 /** 33 * key_extract - extracts a flow key from an Ethernet frame. 34 * @skb: sk_buff that contains the frame, with skb->data pointing to the 35 * Ethernet header 36 * @key: output flow key 37 * 38 * The caller must ensure that skb->len >= ETH_HLEN. 39 * 40 * Returns 0 if successful, otherwise a negative errno value. 41 * 42 * Initializes @skb header pointers as follows: 43 * 44 * - skb->mac_header: the Ethernet header. 45 * 46 * - skb->network_header: just past the Ethernet header, or just past the 47 * VLAN header, to the first byte of the Ethernet payload. 48 * 49 * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 50 * on output, then just past the IP header, if one is present and 51 * of a correct length, otherwise the same as skb->network_header. 52 * For other key->eth.type values it is left untouched. 53 */ 54 static int 55 key_extract(struct sk_buff *skb, struct sw_flow_key *key) 56 { 57 int error; 58 struct ethhdr *eth; 59 /* Flags are always used as part of stats */ 60 key->tp.flags = 0; 61 /*重置L2层头指针*/ 62 skb_reset_mac_header(skb); 63 /* Link layer. We are guaranteed to have at least the 14 byte Ethernet 64 * header in the linear data area. 65 */ 66 /*获取二层头部指针,并提取源MAC及目的MAC信息到key中*/ 67 eth = eth_hdr(skb); 68 ether_addr_copy(key->eth.src, eth->h_source); 69 ether_addr_copy(key->eth.dst, eth->h_dest); 70 /*将skb->data指向L2的MAC地址结束的地址处, 71 * 如果带有VLAN信息,则skb->data指向vlan相关信息的开头 72 * 如果不带vlan信息,则skb->data则指向eth.type字段处*/ 73 __skb_pull(skb, 2 * ETH_ALEN); 74 /* We are going to push all headers that we pull, so no need to 75 * update skb->csum here. 76 */ 77 /*提取vlan信息到key中*/ 78 key->eth.tci = 0; 79 if (vlan_tx_tag_present(skb)) 80 key->eth.tci = htons(skb->vlan_tci); 81 else if (eth->h_proto == htons(ETH_P_8021Q)) 82 if (unlikely(parse_vlan(skb, key))) 83 return -ENOMEM; 84 /*提取ether type数据包类型如ETH_P_IP、ETH_P_ARP、ETH_P_IPV6等*/ 85 key->eth.type = parse_ethertype(skb); 86 if (unlikely(key->eth.type == htons(0))) 87 return -ENOMEM; 88 /*重置L3头部指针及MAC长度,保证skb->network_header指向正确的位置*/ 89 skb_reset_network_header(skb); 90 skb_reset_mac_len(skb); 91 __skb_push(skb, skb->data - skb_mac_header(skb)); 92 /* Network layer. */ 93 /*IP协议数据包*/ 94 if (key->eth.type == htons(ETH_P_IP)) { 95 struct iphdr *nh; 96 __be16 offset; 97 /*检查IP数据包的合法性,若合法则设置skb->transport_header*/ 98 error = check_iphdr(skb); 99 if (unlikely(error)) { 100 /*不合法的IP数据包*/ 101 memset(&key->ip, 0, sizeof(key->ip)); 102 memset(&key->ipv4, 0, sizeof(key->ipv4)); 103 if (error == -EINVAL) { 104 /* 此处不知道为何将L4头设置为L3层头部, 105 * 也不知道为何error=0,后面搞清楚了在回来修改*/ 106 skb->transport_header = skb->network_header; 107 error = 0; 108 } 109 return error; 110 } 111 /*获取L3层头部指针,并提取源IP及目的IP信息到key中*/ 112 nh = ip_hdr(skb); 113 key->ipv4.addr.src = nh->saddr; 114 key->ipv4.addr.dst = nh->daddr; 115 /*提取IP的四层协议信息、TOS及ttl信息到key中*/ 116 key->ip.proto = nh->protocol; 117 key->ip.tos = nh->tos; 118 key->ip.ttl = nh->ttl; 119 /* 120 * 从L3层中提取IP分片信息 121 * 对IP分片中的几个标志不是很清楚,暂时不做说明 122 */ 123 offset = nh->frag_off & htons(IP_OFFSET); 124 if (offset) { 125 key->ip.frag = OVS_FRAG_TYPE_LATER; 126 return 0; 127 } 128 if (nh->frag_off & htons(IP_MF) || 129 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 130 key->ip.frag = OVS_FRAG_TYPE_FIRST; 131 else 132 key->ip.frag = OVS_FRAG_TYPE_NONE; 133 /* Transport layer. */ 134 /*TCP协议数据包*/ 135 if (key->ip.proto == IPPROTO_TCP) { 136 if (tcphdr_ok(skb)) { 137 /*获取tcp四层头部,提取源端口及目的端口信息到key中*/ 138 struct tcphdr *tcp = tcp_hdr(skb); 139 key->tp.src = tcp->source; 140 key->tp.dst = tcp->dest; 141 key->tp.flags = TCP_FLAGS_BE16(tcp); 142 } else { 143 memset(&key->tp, 0, sizeof(key->tp)); 144 } 145 /*UDP协议数据包*/ 146 } else if (key->ip.proto == IPPROTO_UDP) { 147 if (udphdr_ok(skb)) { 148 /*获取UDP四层头部,提取源端口及目的端口信息到key中*/ 149 struct udphdr *udp = udp_hdr(skb); 150 key->tp.src = udp->source; 151 key->tp.dst = udp->dest; 152 } else { 153 memset(&key->tp, 0, sizeof(key->tp)); 154 } 155 /*SCTP协议数据包*/ 156 } else if (key->ip.proto == IPPROTO_SCTP) { 157 if (sctphdr_ok(skb)) { 158 /*获取SCTP四层头部,提取源端口及目的端口到key中*/ 159 struct sctphdr *sctp = sctp_hdr(skb); 160 key->tp.src = sctp->source; 161 key->tp.dst = sctp->dest; 162 } else { 163 memset(&key->tp, 0, sizeof(key->tp)); 164 } 165 /*ICMP协议数据包*/ 166 } else if (key->ip.proto == IPPROTO_ICMP) { 167 if (icmphdr_ok(skb)) { 168 /*获取ICMP头部,并提取ICMP 类型及代码字段到key中*/ 169 struct icmphdr *icmp = icmp_hdr(skb); 170 /* The ICMP type and code fields use the 16-bit 171 * transport port fields, so we need to store 172 * them in 16-bit network byte order. */ 173 key->tp.src = htons(icmp->type); 174 key->tp.dst = htons(icmp->code); 175 } else { 176 memset(&key->tp, 0, sizeof(key->tp)); 177 } 178 } 179 /*ARP协议或者RARP协议数据包*/ 180 } else if (key->eth.type == htons(ETH_P_ARP) || 181 key->eth.type == htons(ETH_P_RARP)) { 182 struct arp_eth_header *arp; 183 bool arp_available = arphdr_ok(skb); 184 /*获取ARP头部指针*/ 185 arp = (struct arp_eth_header *)skb_network_header(skb); 186 if (arp_available && 187 arp->ar_hrd == htons(ARPHRD_ETHER) && 188 arp->ar_pro == htons(ETH_P_IP) && 189 arp->ar_hln == ETH_ALEN && 190 arp->ar_pln == 4) { 191 /*提取ARP option字段到key中*/ 192 /* We only match on the lower 8 bits of the opcode. */ 193 if (ntohs(arp->ar_op) <= 0xff) 194 key->ip.proto = ntohs(arp->ar_op); 195 else 196 key->ip.proto = 0; 197 /*提取源MAC、目的MAC、源IP及目的MAC信息到key中*/ 198 memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); 199 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 200 ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha); 201 ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha); 202 } else { 203 memset(&key->ip, 0, sizeof(key->ip)); 204 memset(&key->ipv4, 0, sizeof(key->ipv4)); 205 } 206 /*去提取MPLS信息到key中*/ 207 } else if (eth_p_mpls(key->eth.type)) { 208 size_t stack_len = MPLS_HLEN; 209 /* In the presence of an MPLS label stack the end of the L2 210 * header and the beginning of the L3 header differ. 211 * 212 * Advance network_header to the beginning of the L3 213 * header. mac_len corresponds to the end of the L2 header. 214 */ 215 while (1) { 216 __be32 lse; 217 error = check_header(skb, skb->mac_len + stack_len); 218 if (unlikely(error)) 219 return 0; 220 memcpy(&lse, skb_network_header(skb), MPLS_HLEN); 221 if (stack_len == MPLS_HLEN) 222 memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); 223 skb_set_network_header(skb, skb->mac_len + stack_len); 224 if (lse & htonl(MPLS_LS_S_MASK)) 225 break; 226 stack_len += MPLS_HLEN; 227 } 228 /*IPv6协议,提取IPv6相关信息到key中 */ 229 } else if (key->eth.type == htons(ETH_P_IPV6)) { 230 int nh_len; /* IPv6 Header + Extensions */ 231 nh_len = parse_ipv6hdr(skb, key); 232 if (unlikely(nh_len < 0)) { 233 memset(&key->ip, 0, sizeof(key->ip)); 234 memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr)); 235 if (nh_len == -EINVAL) { 236 skb->transport_header = skb->network_header; 237 error = 0; 238 } else { 239 error = nh_len; 240 } 241 return error; 242 } 243 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 244 return 0; 245 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 246 key->ip.frag = OVS_FRAG_TYPE_FIRST; 247 /* Transport layer. */ 248 if (key->ip.proto == NEXTHDR_TCP) { 249 if (tcphdr_ok(skb)) { 250 struct tcphdr *tcp = tcp_hdr(skb); 251 key->tp.src = tcp->source; 252 key->tp.dst = tcp->dest; 253 key->tp.flags = TCP_FLAGS_BE16(tcp); 254 } else { 255 memset(&key->tp, 0, sizeof(key->tp)); 256 } 257 } else if (key->ip.proto == NEXTHDR_UDP) { 258 if (udphdr_ok(skb)) { 259 struct udphdr *udp = udp_hdr(skb); 260 key->tp.src = udp->source; 261 key->tp.dst = udp->dest; 262 } else { 263 memset(&key->tp, 0, sizeof(key->tp)); 264 } 265 } else if (key->ip.proto == NEXTHDR_SCTP) { 266 if (sctphdr_ok(skb)) { 267 struct sctphdr *sctp = sctp_hdr(skb); 268 key->tp.src = sctp->source; 269 key->tp.dst = sctp->dest; 270 } else { 271 memset(&key->tp, 0, sizeof(key->tp)); 272 } 273 } else if (key->ip.proto == NEXTHDR_ICMP) { 274 if (icmp6hdr_ok(skb)) { 275 error = parse_icmpv6(skb, key, nh_len); 276 if (error) 277 return error; 278 } else { 279 memset(&key->tp, 0, sizeof(key->tp)); 280 } 281 } 282 } 283 return 0; 284 }
根据KEY值匹配流表(datapath/datapath.c)
通过ovs_flow_key_extract
函数及key_extract
函数从skb
中提取所有需要的key
值,下面就是使用key
值来匹配OVS内核模块openvswitch中缓存的流表信息,并在匹配到流表后执行流表中相应的动作处理数据包。若在内核中未匹配到流表,则通过Netlink消息将key值发送到用户态ovs-vswitchd进程,由用户态进程来决定如何处理数据包
1 /* Must be called with rcu_read_lock. */ 2 void 3 ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 4 { 5 const struct vport *p = OVS_CB(skb)->input_vport; 6 struct datapath *dp = p->dp; 7 struct sw_flow *flow; 8 struct sw_flow_actions *sf_acts; 9 struct dp_stats_percpu *stats; 10 u64 *stats_counter; 11 u32 n_mask_hit; 12 /*获取每CPU变量dp->stats_percpu*/ 13 stats = this_cpu_ptr(dp->stats_percpu); 14 /* Look up flow. */ 15 /*根据key值遍历所有的流表*/ 16 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); 17 if (unlikely(!flow)) { 18 /* 19 * 未匹配到任何流表,则将key值封装到Netlink消息中通过 20 * netlink发送到用户态ovs-vswitchd进程 21 * 由用户态进程来决定如何处理数据包 22 */ 23 struct dp_upcall_info upcall; 24 int error; 25 upcall.cmd = OVS_PACKET_CMD_MISS; 26 upcall.userdata = NULL; 27 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 28 upcall.egress_tun_info = NULL; 29 /*封装Netlink消息并发送给用户态ovs-vswitchd进程*/ 30 error = ovs_dp_upcall(dp, skb, key, &upcall); 31 if (unlikely(error)) 32 kfree_skb(skb); 33 else 34 consume_skb(skb); 35 stats_counter = &stats->n_missed; 36 goto out; 37 } 38 /*查询到流表后,更新相关流表的信息,包括流表匹配的包数及字节数*/ 39 ovs_flow_stats_update(flow, key->tp.flags, skb); 40 /*获取匹配的流表的执行动作*/ 41 sf_acts = rcu_dereference(flow->sf_acts); 42 /*执行匹配流表的动作*/ 43 ovs_execute_actions(dp, skb, sf_acts, key); 44 stats_counter = &stats->n_hit; 45 out: 46 /* Update datapath statistics. */ 47 u64_stats_update_begin(&stats->syncp); 48 (*stats_counter)++; 49 stats->n_mask_hit += n_mask_hit; 50 u64_stats_update_end(&stats->syncp); 51 }
执行流表ACTION(datapath/actions.c)
匹配到对应的流表后,从流表中获取流表的动作,循环遍历所有的flow action
,执行相应的action
动作。
1 /* Execute a list of actions against 'skb'. */ 2 int 3 ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 4 const struct sw_flow_actions *acts, 5 struct sw_flow_key *key) 6 { 7 int level = this_cpu_read(exec_actions_level); 8 int err; 9 this_cpu_inc(exec_actions_level); 10 OVS_CB(skb)->egress_tun_info = NULL; 11 /*执行流表动作*/ 12 err = do_execute_actions(dp, skb, key, 13 acts->actions, acts->actions_len); 14 /*不知道这个process_deferred_actions具体是干什么的*/ 15 if (!level) 16 process_deferred_actions(dp); 17 this_cpu_dec(exec_actions_level); 18 return err; 19 } 20 /* Execute a list of actions against 'skb'. */ 21 static int 22 do_execute_actions(struct datapath *dp, struct sk_buff *skb, 23 struct sw_flow_key *key, 24 const struct nlattr *attr, int len) 25 { 26 /* Every output action needs a separate clone of 'skb', but the common 27 * case is just a single output action, so that doing a clone and 28 * then freeing the original skbuff is wasteful. So the following code 29 * is slightly obscure just to avoid that. 30 */ 31 int prev_port = -1; 32 const struct nlattr *a; 33 int rem; 34 for (a = attr, rem = len; rem > 0; 35 a = nla_next(a, &rem)) { 36 int err = 0; 37 if (unlikely(prev_port != -1)) { 38 /*设置了output接口,克隆一份skb将数据包从 39 * prv_port接口发送出去*/ 40 struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC); 41 if (out_skb) 42 do_output(dp, out_skb, prev_port); 43 prev_port = -1; 44 } 45 switch (nla_type(a)) { 46 /*数据包发送的端口号*/ 47 case OVS_ACTION_ATTR_OUTPUT: 48 prev_port = nla_get_u32(a); 49 break; 50 /*将数据包发送到用户态进程*/ 51 case OVS_ACTION_ATTR_USERSPACE: 52 output_userspace(dp, skb, key, a); 53 break; 54 /*为key->ovs_flow_hash赋值*/ 55 case OVS_ACTION_ATTR_HASH: 56 execute_hash(skb, key, a); 57 break; 58 /*MPLS处理,不了解MPLS,忽略*/ 59 case OVS_ACTION_ATTR_PUSH_MPLS: 60 err = push_mpls(skb, key, nla_data(a)); 61 break; 62 /*MPLS处理,忽略*/ 63 case OVS_ACTION_ATTR_POP_MPLS: 64 err = pop_mpls(skb, key, nla_get_be16(a)); 65 break; 66 /*设置VLAN tag*/ 67 case OVS_ACTION_ATTR_PUSH_VLAN: 68 err = push_vlan(skb, key, nla_data(a)); 69 break; 70 /*去Vlan tag*/ 71 case OVS_ACTION_ATTR_POP_VLAN: 72 err = pop_vlan(skb, key); 73 break; 74 /*将skb及key添加到defered action中*/ 75 case OVS_ACTION_ATTR_RECIRC: 76 err = execute_recirc(dp, skb, key, a, rem); 77 if (nla_is_last(a, rem)) { 78 /* If this is the last action, the skb has 79 * been consumed or freed. 80 * Return immediately. 81 */ 82 return err; 83 } 84 break; 85 /*根据修改的动作,对数据包进行修改*/ 86 case OVS_ACTION_ATTR_SET: 87 err = execute_set_action(skb, key, nla_data(a)); 88 break; 89 case OVS_ACTION_ATTR_SAMPLE: 90 err = sample(dp, skb, key, a); 91 break; 92 } 93 if (unlikely(err)) { 94 kfree_skb(skb); 95 return err; 96 } 97 } 98 if (prev_port != -1) 99 do_output(dp, skb, prev_port); 100 else 101 consume_skb(skb); 102 return 0; 103 }
OUTPUT ACTION(datapath/actions.c)
流表的OUTPUT
动作指定了数据包发送的出接口信息,调用do_output
->ovs_vport_send
->vport->ops->send
发送函数将数据包从output action
对应的接口发送出去。
1 /*do_outpu发送数据包*/ 2 static void 3 do_output(struct datapath *dp, struct sk_buff *skb, int out_port) 4 { 5 struct vport *vport = ovs_vport_rcu(dp, out_port); 6 if (likely(vport)) 7 ovs_vport_send(vport, skb); 8 else 9 kfree_skb(skb); 10 } 11 /** 12 * ovs_vport_send - send a packet on a device 13 * 14 * @vport: vport on which to send the packet 15 * @skb: skb to send 16 * 17 * Sends the given packet and returns the length of data sent. Either ovs 18 * lock or rcu_read_lock must be held. 19 */ 20 int ovs_vport_send(struct vport *vport, struct sk_buff *skb) 21 { 22 /* 调用vport->ops->send回调函数发送数据包 */ 23 int sent = vport->ops->send(vport, skb); 24 if (likely(sent > 0)) { 25 struct pcpu_sw_netstats *stats; 26 /*发送成功后更新每CPU变量vport->percpu_stats中的发送包数及发送字节数*/ 27 stats = this_cpu_ptr(vport->percpu_stats); 28 u64_stats_update_begin(&stats->syncp); 29 stats->tx_packets++; 30 stats->tx_bytes += sent; 31 u64_stats_update_end(&stats->syncp); 32 } else if (sent < 0) { 33 ovs_vport_record_error(vport, VPORT_E_TX_ERROR); 34 } else { 35 ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); 36 } 37 return sent; 38 }
当OVS接口类型为system
时,vport->ops->send
函数为netdev_send
:
1 /*此函数即为OVS流表output action 发送数据包时的函数*/ 2 static int 3 netdev_send(struct vport *vport, struct sk_buff *skb) 4 { 5 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 6 int mtu = netdev_vport->dev->mtu; 7 int len; 8 /*如果未开启gso且数据包长度大于MTU,则释放数据包*/ 9 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { 10 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", 11 netdev_vport->dev->name, 12 packet_length(skb), mtu); 13 goto drop; 14 } 15 /*设置skb->dev为output action网口*/ 16 skb->dev = netdev_vport->dev; 17 len = skb->len; 18 /*最后调用dev_queue_xmit发送数据包*/ 19 dev_queue_xmit(skb); 20 return len; 21 drop: 22 kfree_skb(skb); 23 return 0; 24 }
SET ACTION(datapath/actions.c)
流表SET
动作会修改数据包中指定的信息,如skb->priority
skb->mark
等信息。
1 static int 2 execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, 3 const struct nlattr *nested_attr) 4 { 5 int err = 0; 6 switch (nla_type(nested_attr)) { 7 case OVS_KEY_ATTR_PRIORITY: 8 skb->priority = nla_get_u32(nested_attr); 9 key->phy.priority = skb->priority; 10 break; 11 case OVS_KEY_ATTR_SKB_MARK: 12 skb->mark = nla_get_u32(nested_attr); 13 key->phy.skb_mark = skb->mark; 14 break; 15 case OVS_KEY_ATTR_TUNNEL_INFO: 16 OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); 17 break; 18 case OVS_KEY_ATTR_ETHERNET: 19 err = set_eth_addr(skb, key, nla_data(nested_attr)); 20 break; 21 case OVS_KEY_ATTR_IPV4: 22 err = set_ipv4(skb, key, nla_data(nested_attr)); 23 break; 24 case OVS_KEY_ATTR_IPV6: 25 err = set_ipv6(skb, key, nla_data(nested_attr)); 26 break; 27 case OVS_KEY_ATTR_TCP: 28 err = set_tcp(skb, key, nla_data(nested_attr)); 29 break; 30 case OVS_KEY_ATTR_UDP: 31 err = set_udp(skb, key, nla_data(nested_attr)); 32 break; 33 case OVS_KEY_ATTR_SCTP: 34 err = set_sctp(skb, key, nla_data(nested_attr)); 35 break; 36 case OVS_KEY_ATTR_MPLS: 37 err = set_mpls(skb, key, nla_data(nested_attr)); 38 break; 39 } 40 return err; 41 }
PUSH_VLAN ACTION(datapath/actions.c)
流表PUSH_VLAN
动作会在数据包中添加对应的VLAN tag信息。
1 static int 2 push_vlan(struct sk_buff *skb, struct sw_flow_key *key, 3 const struct ovs_action_push_vlan *vlan) 4 { 5 if (vlan_tx_tag_present(skb)) 6 invalidate_flow_key(key); 7 else 8 key->eth.tci = vlan->vlan_tci; 9 return skb_vlan_push(skb, vlan->vlan_tpid, 10 ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); 11 } 12 int 13 skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) 14 { 15 if (vlan_tx_tag_present(skb)) { 16 unsigned int offset = skb->data - skb_mac_header(skb); 17 int err; 18 /* __vlan_insert_tag expect skb->data pointing to mac header. 19 * So change skb->data before calling it and change back to 20 * original position later 21 */ 22 __skb_push(skb, offset); 23 err = __vlan_insert_tag(skb, skb->vlan_proto, 24 vlan_tx_tag_get(skb)); 25 if (err) 26 return err; 27 skb->protocol = skb->vlan_proto; 28 skb->mac_len += VLAN_HLEN; 29 __skb_pull(skb, offset); 30 if (skb->ip_summed == CHECKSUM_COMPLETE) 31 skb->csum = csum_add(skb->csum, csum_partial(skb->data 32 + (2 * ETH_ALEN), VLAN_HLEN, 0)); 33 } 34 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); 35 return 0; 36 } 37 static inline void 38 __vlan_hwaccel_put_tag(struct sk_buff *skb, 39 __be16 vlan_proto, u16 vlan_tci) 40 { 41 /*设置数据包Vlan tag信息*/ 42 skb->vlan_proto = vlan_proto; 43 skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; 44 }
POP_VLAN ACTION(datapath/actions.c)
流表POP_VLAN
动作移除数据包中的Vlan tag信息并更新数据包中的校验和
1 static int 2 pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) 3 { 4 int err; 5 err = skb_vlan_pop(skb); 6 if (vlan_tx_tag_present(skb)) 7 invalidate_flow_key(key); 8 else 9 key->eth.tci = 0; 10 return err; 11 } 12 int 13 skb_vlan_pop(struct sk_buff *skb) 14 { 15 u16 vlan_tci; 16 __be16 vlan_proto; 17 int err; 18 if (likely(vlan_tx_tag_present(skb))) { 19 skb->vlan_tci = 0; 20 } else { 21 if (unlikely((skb->protocol != htons(ETH_P_8021Q) && 22 skb->protocol != htons(ETH_P_8021AD)) || 23 skb->len < VLAN_ETH_HLEN)) 24 return 0; 25 err = __skb_vlan_pop(skb, &vlan_tci); 26 if (err) 27 return err; 28 } 29 /* move next vlan tag to hw accel tag */ 30 if (likely((skb->protocol != htons(ETH_P_8021Q) && 31 skb->protocol != htons(ETH_P_8021AD)) || 32 skb->len < VLAN_ETH_HLEN)) 33 return 0; 34 vlan_proto = skb->protocol; 35 err = __skb_vlan_pop(skb, &vlan_tci); 36 if (unlikely(err)) 37 return err; 38 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); 39 return 0; 40 } 41 /* remove VLAN header from packet and update csum accordingly. */ 42 static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) 43 { 44 struct vlan_hdr *vhdr; 45 unsigned int offset = skb->data - skb_mac_header(skb); 46 int err; 47 __skb_push(skb, offset); 48 err = skb_ensure_writable(skb, VLAN_ETH_HLEN); 49 if (unlikely(err)) 50 goto pull; 51 52 skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); 53 vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); 54 *vlan_tci = ntohs(vhdr->h_vlan_TCI); 55 memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); 56 __skb_pull(skb, VLAN_HLEN); 57 vlan_set_encap_proto(skb, vhdr); 58 skb->mac_header += VLAN_HLEN; 59 if (skb_network_offset(skb) < ETH_HLEN) 60 skb_set_network_header(skb, ETH_HLEN); 61 skb_reset_mac_len(skb); 62 pull: 63 __skb_pull(skb, offset); 64 return err; 65 }