深入理解linux网络技术内幕读书笔记(六)--PCI层与网络接口卡
Table of Contents
本章涉及的数据结构
pci_device_id结构
1: struct pci_device_id { 2: __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ 3: __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ 4: __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ 5: kernel_ulong_t driver_data; /* Data private to the driver */ 6: };pci_device_id唯一标识一个PCI设备。它的几个成员依次分别表示:厂商号,设备号,子厂商号,子设备号,类别,类别掩码(类可分为基类,子类),私有数据。
每一个PCI设备的驱动程序都有一个pci_device_id的数组,用于告诉PCI核心自己能够驱动哪些设备。
[注] include/linux/mod_devicetable.h
pci_dev结构
1: /* 2: * The pci_dev structure is used to describe PCI devices. 3: */ 4: struct pci_dev { 5: /* 总线设备链表元素bus_list:每一个pci_dev结构除了链接到全局设备链表中外,还会通过这个成员连接到其所属PCI总线的设备链表中。 6: 每一条PCI总线都维护一条它自己的设备链表视图,以便描述所有连接在该PCI总线上的设备,其表头由PCI总线的pci_bus结构中的 devices成员所描述t*/ 7: struct list_head bus_list; /* node in per-bus list */ 8: /* 总线指针bus:指向这个PCI设备所在的PCI总线的pci_bus结构。因此,对于桥设备而言,bus指针将指向桥设备的主总线(primary bus),也即指向桥设备所在的PCI总线*/ 9: struct pci_bus *bus; /* bus this device is on */ 10: /* 指针subordinate:指向这个PCI设备所桥接的下级总线。这个指针成员仅对桥设备才有意义,而对于一般的非桥PCI设备而言,该指针成员总是为NULL*/ 11: struct pci_bus *subordinate; /* bus this device bridges to */ 12: 13: /* 无类型指针sysdata:指向一片特定于系统的扩展数据*/ 14: void *sysdata; /* hook for sys-specific extension */ 15: /* 指针procent:指向该PCI设备在/proc文件系统中对应的目录项*/ 16: struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ 17: struct pci_slot *slot; /* Physical slot this device is in */ 18: 19: /* devfn:这个PCI设备的设备功能号,也成为PCI逻辑设备号(0-255)。其中bit[7:3]是物理设备号(取值范围0-31),bit[2:0]是功能号(取值范围0-7)。 */ 20: unsigned int devfn; /* encoded device & function index */ 21: /* vendor:这是一个16无符号整数,表示PCI设备的厂商ID*/ 22: unsigned short vendor; 23: /*device:这是一个16无符号整数,表示PCI设备的设备ID */ 24: unsigned short device; 25: /* subsystem_vendor:这是一个16无符号整数,表示PCI设备的子系统厂商ID*/ 26: unsigned short subsystem_vendor; 27: /* subsystem_device:这是一个16无符号整数,表示PCI设备的子系统设备ID。*/ 28: unsigned short subsystem_device; 29: /* class:32位的无符号整数,表示该PCI设备的类别,其中,bit[7:0]为编程接口,bit[15:8]为子类别代码,bit [23:16]为基类别代码,bit[31:24]无意义。 30: 显然,class成员的低3字节刚好对应与PCI配置空间中的类代码*/ 31: unsigned int class; /* 3 bytes: (base,sub,prog-if) */ 32: u8 revision; /* PCI revision, low byte of class word */ 33: /* hdr_type:8位符号整数,表示PCI配置空间头部的类型。其中,bit[7]=1表示这是一个多功能设备,bit[7]=0表示这是一个单功能设备。 34: Bit[6:0]则表示PCI配置空间头部的布局类型,值00h表示这是一个一般PCI设备的配置空间头部,值01h表示这是一个PCI-to-PCI桥的配置空间头部, 35: 值02h表示CardBus桥的配置空间头部*/ 36: u8 hdr_type; /* PCI header type (`multi' flag masked out) */ 37: u8 pcie_cap; /* PCI-E capability offset */ 38: u8 pcie_type; /* PCI-E device/port type */ 39: u8 rom_base_reg; /* which config register controls the ROM */ 40: /* rom_base_reg:8位无符号整数,表示PCI配置空间中的ROM基地址寄存器在PCI配置空间中的位置。ROM基地址寄存器在不同类型的PCI配置空间头部的位置是不一样的, 41: 对于type 0的配置空间布局,ROM基地址寄存器的起始位置是30h,而对于PCI-to-PCI桥所用的type 1配置空间布局,ROM基地址寄存器的起始位置是38h*/ 42: u8 pin; /* which interrupt pin this device uses */ 43: 44: /* 指针driver:指向这个PCI设备所对应的驱动程序定义的pci_driver结构。每一个pci设备驱动程序都必须定义它自己的pci_driver结构来描述它自己。*/ 45: struct pci_driver *driver; /* which driver has allocated this device */ 46: /*dma_mask:用于DMA的总线地址掩码,一般来说,这个成员的值是0xffffffff。数据类型dma_addr_t定义在include/asm/types.h中,在x86平台上, 47: dma_addr_t类型就是u32类型*/ 48: u64 dma_mask; /* Mask of the bits of bus address this 49: device implements. Normally this is 50: 0xffffffff. You only need to change 51: this if your device has broken DMA 52: or supports 64-bit transfers. */ 53: 54: struct device_dma_parameters dma_parms; 55: 56: /* 当前操作状态 */ 57: pci_power_t current_state; /* Current operating state. In ACPI-speak, 58: this is D0-D3, D0 being fully functional, 59: and D3 being off. */ 60: int pm_cap; /* PM capability offset in the 61: configuration space */ 62: unsigned int pme_support:5; /* Bitmask of states from which PME# 63: can be generated */ 64: unsigned int pme_interrupt:1; 65: unsigned int d1_support:1; /* Low power state D1 is supported */ 66: unsigned int d2_support:1; /* Low power state D2 is supported */ 67: unsigned int no_d1d2:1; /* Only allow D0 and D3 */ 68: unsigned int wakeup_prepared:1; 69: unsigned int d3_delay; /* D3->D0 transition time in ms */ 70: 71: #ifdef CONFIG_PCIEASPM 72: struct pcie_link_state *link_state; /* ASPM link state. */ 73: #endif 74: 75: pci_channel_state_t error_state; /* current connectivity state */ 76: /* 通用的设备接口*/ 77: struct device dev; /* Generic device interface */ 78: 79: /* 配置空间的大小 */ 80: int cfg_size; /* Size of configuration space */ 81: 82: /* 83: * Instead of touching interrupt line and base address registers 84: * directly, use the values stored here. They might be different! 85: */ 86: /* 无符号的整数irq:表示这个PCI设备通过哪根IRQ输入线产生中断,一般为0-15之间的某个值 */ 87: unsigned int irq; 88: /*表示该设备可能用到的资源,包括:I/O断口区域、设备内存地址区域以及扩展ROM地址区域。*/ 89: struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ 90: resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */ 91: 92: /* These fields are used by common fixups */ 93: /* 透明 PCI 桥 */ 94: unsigned int transparent:1; /* Transparent PCI bridge */ 95: /* 多功能设备*/ 96: unsigned int multifunction:1;/* Part of multi-function device */ 97: /* keep track of device state */ 98: unsigned int is_added:1; 99: /* 设备是主设备*/ 100: unsigned int is_busmaster:1; /* device is busmaster */ 101: /* 设备不使用msi*/ 102: unsigned int no_msi:1; /* device may not use msi */ 103: /* 配置空间访问形式用块的形式 */ 104: unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ 105: unsigned int broken_parity_status:1; /* Device generates false positive parity */ 106: unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ 107: unsigned int msi_enabled:1; 108: unsigned int msix_enabled:1; 109: unsigned int ari_enabled:1; /* ARI forwarding */ 110: unsigned int is_managed:1; 111: unsigned int is_pcie:1; /* Obsolete. Will be removed. 112: Use pci_is_pcie() instead */ 113: unsigned int needs_freset:1; /* Dev requires fundamental reset */ 114: unsigned int state_saved:1; 115: unsigned int is_physfn:1; 116: unsigned int is_virtfn:1; 117: unsigned int reset_fn:1; 118: unsigned int is_hotplug_bridge:1; 119: unsigned int __aer_firmware_first_valid:1; 120: unsigned int __aer_firmware_first:1; 121: pci_dev_flags_t dev_flags; 122: atomic_t enable_cnt; /* pci_enable_device has been called */ 123: 124: /* 在挂起时保存配置空间*/ 125: u32 saved_config_space[16]; /* config space saved at suspend time */ 126: struct hlist_head saved_cap_space; 127: /* sysfs ROM入口的属性描述*/ 128: struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ 129: int rom_attr_enabled; /* has display of the rom attribute been enabled? */ 130: struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ 131: struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ 132: #ifdef CONFIG_PCI_MSI 133: struct list_head msi_list; 134: #endif 135: struct pci_vpd *vpd; 136: #ifdef CONFIG_PCI_IOV 137: union { 138: struct pci_sriov *sriov; /* SR-IOV capability related */ 139: struct pci_dev *physfn; /* the PF this VF is associated with */ 140: }; 141: struct pci_ats *ats; /* Address Translation Service */ 142: #endif 143: };
每一个PCI设备都会被分派一个pci_dev实例,如同网络设备都会被分派net_device实例一样。这个结构由内核使用,以引用一个PCI设备。
[注] include/linux/pci.h
pci_driver结构
1: struct pci_driver { 2: struct list_head node; 3: char *name; 4: const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */ 5: int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ 6: void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ 7: int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ 8: int (*suspend_late) (struct pci_dev *dev, pm_message_t state); 9: int (*resume_early) (struct pci_dev *dev); 10: int (*resume) (struct pci_dev *dev); /* Device woken up */ 11: void (*shutdown) (struct pci_dev *dev); 12: struct pci_error_handlers *err_handler; 13: struct device_driver driver; 14: struct pci_dynids dynids; 15: };定义PCI层与设备驱动程序之间的接口。
[注] include/linux/pci.h
PCI NIC设备驱动程序的注册
注册
1: /** 2: * __pci_register_driver - register a new pci driver 3: * @drv: the driver structure to register 4: * @owner: owner module of drv 5: * @mod_name: module name string 6: * 7: * Adds the driver structure to the list of registered drivers. 8: * Returns a negative value on error, otherwise 0. 9: * If no error occurred, the driver remains registered even if 10: * no device was claimed during registration. 11: */ 12: int __pci_register_driver(struct pci_driver *drv, struct module *owner, 13: const char *mod_name) 14: { 15: int error; 16: 17: /* initialize common driver fields */ 18: drv->driver.name = drv->name; 19: drv->driver.bus = &pci_bus_type; 20: drv->driver.owner = owner; 21: drv->driver.mod_name = mod_name; 22: 23: spin_lock_init(&drv->dynids.lock); 24: INIT_LIST_HEAD(&drv->dynids.list); 25: 26: /* register with core */ 27: error = driver_register(&drv->driver); 28: if (error) 29: goto out; 30: 31: error = pci_create_newid_file(drv); 32: if (error) 33: goto out_newid; 34: 35: error = pci_create_removeid_file(drv); 36: if (error) 37: goto out_removeid; 38: out: 39: return error; 40: 41: out_removeid: 42: pci_remove_newid_file(drv); 43: out_newid: 44: driver_unregister(&drv->driver); 45: goto out; 46: }
[注] net/core/dev.c
解除
1: /** 2: * pci_unregister_driver - unregister a pci driver 3: * @drv: the driver structure to unregister 4: * 5: * Deletes the driver structure from the list of registered PCI drivers, 6: * gives it a chance to clean up by calling its remove() function for 7: * each device it was responsible for, and marks those devices as 8: * driverless. 9: */ 10: 11: void 12: pci_unregister_driver(struct pci_driver *drv) 13: { 14: pci_remove_removeid_file(drv); 15: pci_remove_newid_file(drv); 16: driver_unregister(&drv->driver); 17: pci_free_dynids(drv); 18: }[注] driver/pci/pci-driver.c
电源管理与网络唤醒
PCI电源管理事件由pci_driver数据结构的suspend和resume函数处理。除了分别负责PCIa状态的保存与恢复之外,这些函数遇到NIC的情况时还需采取特殊步骤:
- suspend主要停止设备的出口队列,使得该设备无法再传输。
- resume重启出口i队列,使得该设备得以再次传输。
网络唤醒(Wake-on-Lan, WOL)允许NIC在接收到一种特殊类型的帧时候唤醒处于待命状态的系统,WOL默认是关闭的。 此功能可以用pci_enable_wake打开或关上。
唤醒系统的魔术封包特性:
- 目的MAC地址属于正在接收的NIC(无论单播/多播/广播)。
- 帧中的某处(任何地方)会设置一段48位序列(也就是FF:FF:FF:FF:FF:FF),后面再接NIC MAC地址,在一行中至少连续重复16次。