Linux 驱动框架---net驱动框架
这一篇主要是学习网络设备驱动框架性,具体的实例分析可以参考Linux 驱动框架---dm9000分析 。Linux 对于网络设备的驱动的定义分了四层分别是网络接口层对上是IP,ARP等网络协议,因为网络协议还是相对复杂且不会变动特别大肯定是由内核来实现;网络设备接口层实际上就是对网络设备操作的封装,封装成一个结构体由驱动工程师来填充内容从而做到抽象;设备驱动功能层其实就是网络设备接口层封装好的接口的具体实现以操作硬件设备完成指定动作的软件部分;网络设备与媒介层(MAC和PHY硬件部分)。网络协议接口层就是对上提供的发送和接收接口。发送接口接受上层协议下发的应用数据(已经使用struct sk_buff 数据结构封装)然后调用设备接口层驱动硬件完成数据发送;其次是数据接收其主要是在由物理层接收完数据后同样用struct sk_buff 结构封装后交给网络协议层的接口。驱动框架的简单的层级结构如下:
网络协议接口层对网络层提供两个了主要接口用于数据的发送(int dev_queue_xmit(struct sk_buff* skb))和接收(int netif_rx(struct sk_buff* skb))。
数据发送
数据发送接口由应用程序和协议栈主动在内核空间调用,这一部分偏内核部分(内核实现)的就是操作net_device 上的一个queue将数据包放进去,最后进行合适的调度调用网络设备接口层进而进入设备驱动功能层完成数据包的发送。
int dev_queue_xmit(struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); }
=======》 static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) { struct net_device *dev = skb->dev; struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; skb_reset_mac_header(skb); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); skb_update_prio(skb); txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); goto out; } /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... Really, it is unlikely that netif_tx_lock protection is necessary here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. Check this and shot the lock. It is not prone from deadlocks. Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) goto recursion_alert; HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); rc = dev_hard_start_xmit(skb, dev, txq); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; } } HARD_TX_UNLOCK(dev, txq); net_crit_ratelimited("Virtual device %s asks to queue packet!\n", dev->name); } else { /* Recursion is detected! It is possible, * unfortunately */ recursion_alert: net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", dev->name); } } rc = -ENETDOWN; rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return rc; out: rcu_read_unlock_bh(); return rc; }
数据接收
数据接收接口也是内核部分代码由内核实现,是内核留给驱动层接口上报接收到数据的接口路径,网络设备驱动在中断或轮询中收到数据包后需要打包一个socket数据包(struct sk_buff)然后通过内核数据接收接口上报给内核上层。
int netif_rx(struct sk_buff *skb) { trace_netif_rx_entry(skb); return netif_rx_internal(skb); } static int netif_rx_internal(struct sk_buff *skb) { int ret; net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; preempt_disable(); rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu < 0) cpu = smp_processor_id(); ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); preempt_enable(); } else #endif { unsigned int qtail; ret = enqueue_to_backlog(skb, get_cpu(), &qtail); put_cpu(); } return ret; }
由上面的代码可以看出来 struct sk_buff 在网络驱动中是一个非常重要的数据结构。网络数据的整个传输过程就是对这个数据结构的访问修改读取的过程,所以来简单了解一下这个数据结构
struct sk_buff { /* These two members must be first. */ struct sk_buff *next; struct sk_buff *prev; union { ktime_t tstamp; struct skb_mstamp skb_mstamp; }; struct sock *sk; struct net_device *dev; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ char cb[48] __aligned(8); unsigned long _skb_refdst; #ifdef CONFIG_XFRM struct sec_path *sp; #endif unsigned int len, data_len; __u16 mac_len, hdr_len; union { __wsum csum; struct { __u16 csum_start; __u16 csum_offset; }; }; __u32 priority; kmemcheck_bitfield_begin(flags1); __u8 ignore_df:1, cloned:1, ip_summed:2, nohdr:1, nfctinfo:3; __u8 pkt_type:3, fclone:2, ipvs_property:1, peeked:1, nf_trace:1; kmemcheck_bitfield_end(flags1); __be16 protocol; void (*destructor)(struct sk_buff *skb); #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack *nfct; #endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif int skb_iif; __u32 hash; __be16 vlan_proto; __u16 vlan_tci; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif #endif __u16 queue_mapping; kmemcheck_bitfield_begin(flags2); #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif __u8 pfmemalloc:1; __u8 ooo_okay:1; __u8 l4_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; __u8 no_fcs:1; __u8 head_frag:1; /* Encapsulation protocol and NIC drivers should use * this flag to indicate to each other if the skb contains * encapsulated packet or not and maybe use the inner packet * headers if needed */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; __u8 csum_complete_sw:1; /* 3/5 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { unsigned int napi_id; dma_cookie_t dma_cookie; }; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif union { __u32 mark; __u32 dropcount; __u32 reserved_tailroom; }; __be16 inner_protocol; __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; __u16 transport_header; __u16 network_header; __u16 mac_header; /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; unsigned char *head, *data; unsigned int truesize; atomic_t users; };
成员还是比较多的但是内核提供了对应的接口函数来直观的操作他,其中数据相关重要的成员有四个head、data、tail、end对这个结构的关系如下图
这里只是框架性的记录了网络驱动的分层相关的内容具体的细节实现参考实例分析,除此之外与之相关的驱动接口有如下几个。
申请套接字缓冲区
struct sk_buff* alloc_sbk(unsigned int len,gfp_t priority) struct sk_buff* dev_alloc_skb(unsigned int len)
其中len为数据缓冲区的大小,对应释放的接口有
void kfree_skb(struct sk_buff* skb); void dev_kfree_skb(unsigned int len); void dev_kfree_skb_irq(unsigned int len); //其实就是在上面两种接口的封装增加了是否为中断中的判断。 void dev_kfree_skb_any(unsigned int len);
数据包操作
//skb->tail向后移动len,skb->len增加len unsigned char *skb_put(struct sk_buff* skb,unsigned int len); //skb->data向前移动len,skb->len增加len unsigned char *skb_push(struct sk_buff* skb,unsigned int len); //skb->data向后移动len,skb->len减少len unsigned char *skb_pull(struct sk_buff* skb,unsigned int len); //skb->tail和skb->data同时向后移动len unsigned char *skb_reserve(struct sk_buff* skb,unsigned int len);
上面这些接口会在驱动中调用具体驱动分析时在详细学习。
网络设备接口
内核抽象了网络设备接口的封装但是这个结构十分庞大且复杂其中涵盖统计属性、配置等定义如下:
1 struct net_device { 2 3 /* 4 * This is the first field of the "visible" part of this structure 5 * (i.e. as seen by users in the "Space.c" file). It is the name 6 * of the interface. 7 */ 8 char name[IFNAMSIZ]; 9 10 /* device name hash chain, please keep it close to name[] */ 11 struct hlist_node name_hlist; 12 13 /* snmp alias */ 14 char *ifalias; 15 16 /* 17 * I/O specific fields 18 * FIXME: Merge these and struct ifmap into one 19 */ 20 unsigned long mem_end; /* shared mem end */ 21 unsigned long mem_start; /* shared mem start */ 22 unsigned long base_addr; /* device I/O address */ 23 int irq; /* device IRQ number */ 24 25 /* 26 * Some hardware also needs these fields, but they are not 27 * part of the usual set specified in Space.c. 28 */ 29 30 unsigned long state; 31 32 struct list_head dev_list; 33 struct list_head napi_list; 34 struct list_head unreg_list; 35 struct list_head close_list; 36 37 /* directly linked devices, like slaves for bonding */ 38 struct { 39 struct list_head upper; 40 struct list_head lower; 41 } adj_list; 42 43 /* all linked devices, *including* neighbours */ 44 struct { 45 struct list_head upper; 46 struct list_head lower; 47 } all_adj_list; 48 49 50 /* currently active device features */ 51 netdev_features_t features; 52 /* user-changeable features */ 53 netdev_features_t hw_features; 54 /* user-requested features */ 55 netdev_features_t wanted_features; 56 /* mask of features inheritable by VLAN devices */ 57 netdev_features_t vlan_features; 58 /* mask of features inherited by encapsulating devices 59 * This field indicates what encapsulation offloads 60 * the hardware is capable of doing, and drivers will 61 * need to set them appropriately. 62 */ 63 netdev_features_t hw_enc_features; 64 /* mask of fetures inheritable by MPLS */ 65 netdev_features_t mpls_features; 66 67 /* Interface index. Unique device identifier */ 68 int ifindex; 69 int iflink; 70 71 struct net_device_stats stats; 72 73 /* dropped packets by core network, Do not use this in drivers */ 74 atomic_long_t rx_dropped; 75 atomic_long_t tx_dropped; 76 77 /* Stats to monitor carrier on<->off transitions */ 78 atomic_t carrier_changes; 79 80 #ifdef CONFIG_WIRELESS_EXT 81 /* List of functions to handle Wireless Extensions (instead of ioctl). 82 * See <net/iw_handler.h> for details. Jean II */ 83 const struct iw_handler_def * wireless_handlers; 84 /* Instance data managed by the core of Wireless Extensions. */ 85 struct iw_public_data * wireless_data; 86 #endif 87 /* Management operations */ 88 const struct net_device_ops *netdev_ops; 89 const struct ethtool_ops *ethtool_ops; 90 const struct forwarding_accel_ops *fwd_ops; 91 92 /* Hardware header description */ 93 const struct header_ops *header_ops; 94 95 unsigned int flags; /* interface flags (a la BSD) */ 96 unsigned int priv_flags; /* Like 'flags' but invisible to userspace. 97 * See if.h for definitions. */ 98 unsigned short gflags; 99 unsigned short padded; /* How much padding added by alloc_netdev() */ 100 101 unsigned char operstate; /* RFC2863 operstate */ 102 unsigned char link_mode; /* mapping policy to operstate */ 103 104 unsigned char if_port; /* Selectable AUI, TP,..*/ 105 unsigned char dma; /* DMA channel */ 106 107 unsigned int mtu; /* interface MTU value */ 108 unsigned short type; /* interface hardware type */ 109 unsigned short hard_header_len; /* hardware hdr length */ 110 111 /* extra head- and tailroom the hardware may need, but not in all cases 112 * can this be guaranteed, especially tailroom. Some cases also use 113 * LL_MAX_HEADER instead to allocate the skb. 114 */ 115 unsigned short needed_headroom; 116 unsigned short needed_tailroom; 117 118 /* Interface address info. */ 119 unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ 120 unsigned char addr_assign_type; /* hw address assignment type */ 121 unsigned char addr_len; /* hardware address length */ 122 unsigned short neigh_priv_len; 123 unsigned short dev_id; /* Used to differentiate devices 124 * that share the same link 125 * layer address 126 */ 127 unsigned short dev_port; /* Used to differentiate 128 * devices that share the same 129 * function 130 */ 131 spinlock_t addr_list_lock; 132 struct netdev_hw_addr_list uc; /* Unicast mac addresses */ 133 struct netdev_hw_addr_list mc; /* Multicast mac addresses */ 134 struct netdev_hw_addr_list dev_addrs; /* list of device 135 * hw addresses 136 */ 137 #ifdef CONFIG_SYSFS 138 struct kset *queues_kset; 139 #endif 140 141 bool uc_promisc; 142 unsigned int promiscuity; 143 unsigned int allmulti; 144 145 146 /* Protocol specific pointers */ 147 148 #if IS_ENABLED(CONFIG_VLAN_8021Q) 149 struct vlan_info __rcu *vlan_info; /* VLAN info */ 150 #endif 151 #if IS_ENABLED(CONFIG_NET_DSA) 152 struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ 153 #endif 154 #if IS_ENABLED(CONFIG_TIPC) 155 struct tipc_bearer __rcu *tipc_ptr; /* TIPC specific data */ 156 #endif 157 void *atalk_ptr; /* AppleTalk link */ 158 struct in_device __rcu *ip_ptr; /* IPv4 specific data */ 159 struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ 160 struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ 161 void *ax25_ptr; /* AX.25 specific data */ 162 struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, 163 assign before registering */ 164 165 /* 166 * Cache lines mostly used on receive path (including eth_type_trans()) 167 */ 168 unsigned long last_rx; /* Time of last Rx */ 169 170 /* Interface address info used in eth_type_trans() */ 171 unsigned char *dev_addr; /* hw address, (before bcast 172 because most packets are 173 unicast) */ 174 175 176 #ifdef CONFIG_SYSFS 177 struct netdev_rx_queue *_rx; 178 179 /* Number of RX queues allocated at register_netdev() time */ 180 unsigned int num_rx_queues; 181 182 /* Number of RX queues currently active in device */ 183 unsigned int real_num_rx_queues; 184 185 #endif 186 187 rx_handler_func_t __rcu *rx_handler; 188 void __rcu *rx_handler_data; 189 190 struct netdev_queue __rcu *ingress_queue; 191 unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ 192 193 194 /* 195 * Cache lines mostly used on transmit path 196 */ 197 struct netdev_queue *_tx ____cacheline_aligned_in_smp; 198 199 /* Number of TX queues allocated at alloc_netdev_mq() time */ 200 unsigned int num_tx_queues; 201 202 /* Number of TX queues currently active in device */ 203 unsigned int real_num_tx_queues; 204 205 /* root qdisc from userspace point of view */ 206 struct Qdisc *qdisc; 207 208 unsigned long tx_queue_len; /* Max frames per queue allowed */ 209 spinlock_t tx_global_lock; 210 211 #ifdef CONFIG_XPS 212 struct xps_dev_maps __rcu *xps_maps; 213 #endif 214 #ifdef CONFIG_RFS_ACCEL 215 /* CPU reverse-mapping for RX completion interrupts, indexed 216 * by RX queue number. Assigned by driver. This must only be 217 * set if the ndo_rx_flow_steer operation is defined. */ 218 struct cpu_rmap *rx_cpu_rmap; 219 #endif 220 221 /* These may be needed for future network-power-down code. */ 222 223 /* 224 * trans_start here is expensive for high speed devices on SMP, 225 * please use netdev_queue->trans_start instead. 226 */ 227 unsigned long trans_start; /* Time (in jiffies) of last Tx */ 228 229 int watchdog_timeo; /* used by dev_watchdog() */ 230 struct timer_list watchdog_timer; 231 232 /* Number of references to this device */ 233 int __percpu *pcpu_refcnt; 234 235 /* delayed register/unregister */ 236 struct list_head todo_list; 237 /* device index hash chain */ 238 struct hlist_node index_hlist; 239 240 struct list_head link_watch_list; 241 242 /* register/unregister state machine */ 243 enum { NETREG_UNINITIALIZED=0, 244 NETREG_REGISTERED, /* completed register_netdevice */ 245 NETREG_UNREGISTERING, /* called unregister_netdevice */ 246 NETREG_UNREGISTERED, /* completed unregister todo */ 247 NETREG_RELEASED, /* called free_netdev */ 248 NETREG_DUMMY, /* dummy device for NAPI poll */ 249 } reg_state:8; 250 251 bool dismantle; /* device is going do be freed */ 252 253 enum { 254 RTNL_LINK_INITIALIZED, 255 RTNL_LINK_INITIALIZING, 256 } rtnl_link_state:16; 257 258 /* Called from unregister, can be used to call free_netdev */ 259 void (*destructor)(struct net_device *dev); 260 261 #ifdef CONFIG_NETPOLL 262 struct netpoll_info __rcu *npinfo; 263 #endif 264 265 #ifdef CONFIG_NET_NS 266 /* Network namespace this network device is inside */ 267 struct net *nd_net; 268 #endif 269 270 /* mid-layer private */ 271 union { 272 void *ml_priv; 273 struct pcpu_lstats __percpu *lstats; /* loopback stats */ 274 struct pcpu_sw_netstats __percpu *tstats; 275 struct pcpu_dstats __percpu *dstats; /* dummy stats */ 276 struct pcpu_vstats __percpu *vstats; /* veth stats */ 277 }; 278 /* GARP */ 279 struct garp_port __rcu *garp_port; 280 /* MRP */ 281 struct mrp_port __rcu *mrp_port; 282 283 /* class/net/name entry */ 284 struct device dev; 285 /* space for optional device, statistics, and wireless sysfs groups */ 286 const struct attribute_group *sysfs_groups[4]; 287 /* space for optional per-rx queue attributes */ 288 const struct attribute_group *sysfs_rx_queue_group; 289 290 /* rtnetlink link ops */ 291 const struct rtnl_link_ops *rtnl_link_ops; 292 293 /* for setting kernel sock attribute on TCP connection setup */ 294 #define GSO_MAX_SIZE 65536 295 unsigned int gso_max_size; 296 #define GSO_MAX_SEGS 65535 297 u16 gso_max_segs; 298 299 #ifdef CONFIG_DCB 300 /* Data Center Bridging netlink ops */ 301 const struct dcbnl_rtnl_ops *dcbnl_ops; 302 #endif 303 u8 num_tc; 304 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; 305 u8 prio_tc_map[TC_BITMASK + 1]; 306 307 #if IS_ENABLED(CONFIG_FCOE) 308 /* max exchange id for FCoE LRO by ddp */ 309 unsigned int fcoe_ddp_xid; 310 #endif 311 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) 312 struct netprio_map __rcu *priomap; 313 #endif 314 /* phy device may attach itself for hardware timestamping */ 315 struct phy_device *phydev; 316 317 struct lock_class_key *qdisc_tx_busylock; 318 319 /* group the device belongs to */ 320 int group; 321 322 struct pm_qos_request pm_qos_req; 323 };
其中主要的成员有
1、全局的信息
name
2、硬件信息
定义设备所使用的共享内存的内存信息和中断等。
unsigned long mem_start
unsigned long mem_end
unsigned long base_addr
unsigned char irq
unsigned char if_port
其中if_port 为多端口设备使用。
3、接口信息
hard_header_len 硬件头长度
type 接口硬件类型
mtu 接口mtu
flags 接口特性标志,部分由内核使用
4、设备操作函数
netdev_ops (驱动的重点实现部分),驱动有时还需要实现一部分网络工具需要的接口函数。放在这个结构体的net_device->ethtool_ops (非必须,有设置设备MAC地址等)
这里重要先看一下网络设备操作接口,定义如下:
struct net_device_ops { int (*ndo_init)(struct net_device *dev); void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, struct net_device *dev); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); int (*ndo_set_mac_address)(struct net_device *dev, void *addr); int (*ndo_validate_addr)(struct net_device *dev); int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); void (*ndo_tx_timeout) (struct net_device *dev); struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev, struct netpoll_info *info); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif #ifdef CONFIG_NET_RX_BUSY_POLL int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, int queue, u16 vlan, u8 qos); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *ivf); int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); int (*ndo_setup_tc)(struct net_device *dev, u8 tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc); int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc); int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, struct netdev_fcoe_hbainfo *hbainfo); #endif #if IS_ENABLED(CONFIG_LIBFCOE) #define NETDEV_FCOE_WWNN 0 #define NETDEV_FCOE_WWPN 1 int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); #endif #ifdef CONFIG_RFS_ACCEL int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); #endif int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); int (*ndo_neigh_construct)(struct neighbour *n); void (*ndo_neigh_destroy)(struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_port_id *ppid); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); void (*ndo_del_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv); netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, struct net_device *dev, void *priv); int (*ndo_get_lock_subclass)(struct net_device *dev); };
很多也很复杂但也是实现必要的部分就可以了。具体的每个函数的作用后面分析驱动实例时候在全部看这里暂时记录一部部分自己现在理解的。当实现好一个网络设备并填充绑定了必须的接口信息之后就可以将网络设备注册到内核了。
接口分析
网络设备的初始化(ndo_init成员)
在这个接口一般完成硬件的准备工作,检查硬件,并获取记录硬件资源。软件接口的准备工作,然后初始化私有成员并绑定到设备。
网络设备打开和关闭(ndo_open 和ndo_stop)
使能设备的硬件资源如中断申请,DMA 使能和配置。调用netif_start_queque()激活设备发送队列。关闭过程就是释放和关闭前面申请的资源调用netif_stop_queque()停止设备数据包传输。
数据发送过程
网络设备的数据传输过程依赖内核的一个数据结构struct sk_buff网络层打包一个完全的数据包,由下层一层层使用去除各层信息最后到达物理设备完成数据在电路上的传输。伪代码如下:
int xxx_tx(struct sk_buff*sk_buff,struct net_device* dev) { 执行软件操作逻辑的处理,如等待队列---阻塞调用进程等。 记录信息和超时的处理 操作硬件设备寄存器等完成数据到硬件设备。 最后有硬件设备将数据合适时间完成发送。 发送完成后还要唤醒组册的队列中的进程。 }
数据接收过程
数据的接受一般都是由中断触发的,硬件完成数据的接收和校验后将调用网络设备的接收接口函数,将接收到的数据封成一个struct sk_buff 结构体然后通过void xxx_rx(struct net_device* dev)向上层递交数据。除此之外有些网络设备是NAPI兼容设备,就是对数据接收支持轮询的方式,此时这个轮询函数也封装在设备操作中,并且由指定的接口启动和停止轮询一般都是数据的第一包到来后进入中断在中断中启动轮询调度。当受到数据时他将调用netif_receive_skb(struct sk_buff* sk_buff)提交数据给上层协议。详细具体的信息后面在学习。具体的设备连接状态信息统计等等详细的内容这里也暂时不详细看,后续用到来继续】深入具体的学习,这里还是前面的话重在走粗线条,太细后面不用也记不住。
网络设备注册
网络设备成员实现完成后就需要通过对应的接口完成注册,使用如下接口:
int register_netdev(struct net_device* dev)
同时还有注销接口
void unregister_netdevice(struct net_device* dev)
Linux 还提供了一些宏完成网络设备的内存申请和成员的部分初始化。其最后都是通过调用
struct net_device* net_dev_mqs(int sizeof_priv,const char* name,void(*setup)(struct net_device*),unsigned int txgs,unsigned int txgs);
来分配内存和初始化网络设备。网络设备的注册和普通设备的注册没有太大的区别不在详细看。