深入理解linux网络技术内幕读书笔记(六)--PCI层与网络接口卡

本章涉及的数据结构


pci_device_id结构


1:  struct pci_device_id {
2:       __u32 vendor, device;       /* Vendor and device ID or PCI_ANY_ID*/
3:       __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
4:       __u32 class, class_mask;    /* (class,subclass,prog-if) triplet */
5:       kernel_ulong_t driver_data; /* Data private to the driver */
6:  };
pci_device_id唯一标识一个PCI设备。它的几个成员依次分别表示:厂商号,设备号,子厂商号,子设备号,类别,类别掩码(类可分为基类,子类),私有数据。
每一个PCI设备的驱动程序都有一个pci_device_id的数组,用于告诉PCI核心自己能够驱动哪些设备。
[注] include/linux/mod_devicetable.h


pci_dev结构

  1:  /*
  2:   * The pci_dev structure is used to describe PCI devices.
  3:   */
  4:  struct pci_dev {
  5:        /* 总线设备链表元素bus_list:每一个pci_dev结构除了链接到全局设备链表中外,还会通过这个成员连接到其所属PCI总线的设备链表中。
  6:           每一条PCI总线都维护一条它自己的设备链表视图,以便描述所有连接在该PCI总线上的设备,其表头由PCI总线的pci_bus结构中的 devices成员所描述t*/
  7:       struct list_head bus_list;  /* node in per-bus list */
  8:       /* 总线指针bus:指向这个PCI设备所在的PCI总线的pci_bus结构。因此,对于桥设备而言,bus指针将指向桥设备的主总线(primary bus),也即指向桥设备所在的PCI总线*/
  9:       struct pci_bus  *bus;       /* bus this device is on */
 10:       /* 指针subordinate:指向这个PCI设备所桥接的下级总线。这个指针成员仅对桥设备才有意义,而对于一般的非桥PCI设备而言,该指针成员总是为NULL*/
 11:       struct pci_bus  *subordinate;   /* bus this device bridges to */
 12:  
 13:       /* 无类型指针sysdata:指向一片特定于系统的扩展数据*/
 14:       void        *sysdata;   /* hook for sys-specific extension */
 15:       /* 指针procent:指向该PCI设备在/proc文件系统中对应的目录项*/
 16:       struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
 17:       struct pci_slot *slot;      /* Physical slot this device is in */
 18:  
 19:       /* devfn:这个PCI设备的设备功能号,也成为PCI逻辑设备号(0-255)。其中bit[7:3]是物理设备号(取值范围0-31),bit[2:0]是功能号(取值范围0-7)。 */
 20:       unsigned int    devfn;      /* encoded device & function index */
 21:       /* vendor:这是一个16无符号整数,表示PCI设备的厂商ID*/
 22:       unsigned short  vendor;
 23:       /*device:这是一个16无符号整数,表示PCI设备的设备ID */
 24:       unsigned short  device;
 25:       /* subsystem_vendor:这是一个16无符号整数,表示PCI设备的子系统厂商ID*/
 26:       unsigned short  subsystem_vendor;
 27:       /* subsystem_device:这是一个16无符号整数,表示PCI设备的子系统设备ID。*/
 28:       unsigned short  subsystem_device;
 29:       /* class:32位的无符号整数,表示该PCI设备的类别,其中,bit[7:0]为编程接口,bit[15:8]为子类别代码,bit [23:16]为基类别代码,bit[31:24]无意义。
 30:          显然,class成员的低3字节刚好对应与PCI配置空间中的类代码*/
 31:       unsigned int    class;      /* 3 bytes: (base,sub,prog-if) */
 32:       u8      revision;   /* PCI revision, low byte of class word */
 33:       /* hdr_type:8位符号整数,表示PCI配置空间头部的类型。其中,bit[7]=1表示这是一个多功能设备,bit[7]=0表示这是一个单功能设备。
 34:          Bit[6:0]则表示PCI配置空间头部的布局类型,值00h表示这是一个一般PCI设备的配置空间头部,值01h表示这是一个PCI-to-PCI桥的配置空间头部,
 35:          值02h表示CardBus桥的配置空间头部*/
 36:       u8      hdr_type;   /* PCI header type (`multi' flag masked out) */
 37:       u8      pcie_cap;   /* PCI-E capability offset */
 38:       u8      pcie_type;  /* PCI-E device/port type */
 39:       u8      rom_base_reg;   /* which config register controls the ROM */
 40:       /* rom_base_reg:8位无符号整数,表示PCI配置空间中的ROM基地址寄存器在PCI配置空间中的位置。ROM基地址寄存器在不同类型的PCI配置空间头部的位置是不一样的,
 41:          对于type 0的配置空间布局,ROM基地址寄存器的起始位置是30h,而对于PCI-to-PCI桥所用的type 1配置空间布局,ROM基地址寄存器的起始位置是38h*/
 42:       u8      pin;        /* which interrupt pin this device uses */
 43:  
 44:       /* 指针driver:指向这个PCI设备所对应的驱动程序定义的pci_driver结构。每一个pci设备驱动程序都必须定义它自己的pci_driver结构来描述它自己。*/
 45:       struct pci_driver *driver;  /* which driver has allocated this device */
 46:       /*dma_mask:用于DMA的总线地址掩码,一般来说,这个成员的值是0xffffffff。数据类型dma_addr_t定义在include/asm/types.h中,在x86平台上,
 47:         dma_addr_t类型就是u32类型*/
 48:       u64     dma_mask;   /* Mask of the bits of bus address this
 49:                          device implements.  Normally this is
 50:                          0xffffffff.  You only need to change
 51:                          this if your device has broken DMA
 52:                          or supports 64-bit transfers.  */
 53:  
 54:       struct device_dma_parameters dma_parms;
 55:  
 56:       /* 当前操作状态 */
 57:       pci_power_t     current_state;  /* Current operating state. In ACPI-speak,
 58:                          this is D0-D3, D0 being fully functional,
 59:                          and D3 being off. */
 60:       int     pm_cap;     /* PM capability offset in the
 61:                          configuration space */
 62:       unsigned int    pme_support:5;  /* Bitmask of states from which PME#
 63:                          can be generated */
 64:       unsigned int    pme_interrupt:1;
 65:       unsigned int    d1_support:1;   /* Low power state D1 is supported */
 66:       unsigned int    d2_support:1;   /* Low power state D2 is supported */
 67:       unsigned int    no_d1d2:1;  /* Only allow D0 and D3 */
 68:       unsigned int    wakeup_prepared:1;
 69:       unsigned int    d3_delay;   /* D3->D0 transition time in ms */
 70:  
 71:  #ifdef CONFIG_PCIEASPM
 72:       struct pcie_link_state  *link_state;    /* ASPM link state. */
 73:  #endif
 74:  
 75:       pci_channel_state_t error_state;    /* current connectivity state */
 76:       /* 通用的设备接口*/
 77:       struct  device  dev;        /* Generic device interface */
 78:  
 79:       /* 配置空间的大小 */
 80:       int     cfg_size;   /* Size of configuration space */
 81:  
 82:       /*
 83:        * Instead of touching interrupt line and base address registers
 84:        * directly, use the values stored here. They might be different!
 85:        */
 86:       /* 无符号的整数irq:表示这个PCI设备通过哪根IRQ输入线产生中断,一般为0-15之间的某个值 */
 87:       unsigned int    irq;
 88:       /*表示该设备可能用到的资源,包括:I/O断口区域、设备内存地址区域以及扩展ROM地址区域。*/
 89:       struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 90:       resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
 91:  
 92:       /* These fields are used by common fixups */
 93:       /* 透明 PCI 桥 */
 94:       unsigned int    transparent:1;  /* Transparent PCI bridge */
 95:       /* 多功能设备*/ 
 96:       unsigned int    multifunction:1;/* Part of multi-function device */
 97:       /* keep track of device state */
 98:       unsigned int    is_added:1;
 99:       /* 设备是主设备*/
100:       unsigned int    is_busmaster:1; /* device is busmaster */
101:       /* 设备不使用msi*/
102:       unsigned int    no_msi:1;   /* device may not use msi */
103:       /* 配置空间访问形式用块的形式 */
104:       unsigned int    block_ucfg_access:1;    /* userspace config space access is blocked */
105:       unsigned int    broken_parity_status:1; /* Device generates false positive parity */
106:       unsigned int    irq_reroute_variant:2;  /* device needs IRQ rerouting variant */
107:       unsigned int    msi_enabled:1;
108:       unsigned int    msix_enabled:1;
109:       unsigned int    ari_enabled:1;  /* ARI forwarding */
110:       unsigned int    is_managed:1;
111:       unsigned int    is_pcie:1;  /* Obsolete. Will be removed.
112:                          Use pci_is_pcie() instead */
113:       unsigned int    needs_freset:1; /* Dev requires fundamental reset */
114:       unsigned int    state_saved:1;
115:       unsigned int    is_physfn:1;
116:       unsigned int    is_virtfn:1;
117:       unsigned int    reset_fn:1;
118:       unsigned int    is_hotplug_bridge:1;
119:       unsigned int    __aer_firmware_first_valid:1;
120:       unsigned int    __aer_firmware_first:1;
121:       pci_dev_flags_t dev_flags;
122:       atomic_t    enable_cnt; /* pci_enable_device has been called */
123:  
124:       /* 在挂起时保存配置空间*/
125:       u32     saved_config_space[16]; /* config space saved at suspend time */
126:       struct hlist_head saved_cap_space;
127:       /* sysfs ROM入口的属性描述*/
128:       struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
129:       int rom_attr_enabled;       /* has display of the rom attribute been enabled? */
130:       struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
131:       struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
132:  #ifdef CONFIG_PCI_MSI
133:       struct list_head msi_list;
134:  #endif
135:       struct pci_vpd *vpd;
136:  #ifdef CONFIG_PCI_IOV
137:       union {
138:           struct pci_sriov *sriov;    /* SR-IOV capability related */
139:           struct pci_dev *physfn; /* the PF this VF is associated with */
140:       };
141:       struct pci_ats  *ats;   /* Address Translation Service */
142:  #endif
143:  };

每一个PCI设备都会被分派一个pci_dev实例,如同网络设备都会被分派net_device实例一样。这个结构由内核使用,以引用一个PCI设备。
[注] include/linux/pci.h


pci_driver结构


 1:  struct pci_driver {
 2:       struct list_head node;
 3:       char *name;
 4:       const struct pci_device_id *id_table;   /* must be non-NULL for probe to be called */
 5:       int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);   /* New device inserted */
 6:       void (*remove) (struct pci_dev *dev);   /* Device removed (NULL if not a hot-plug capable driver) */
 7:       int  (*suspend) (struct pci_dev *dev, pm_message_t state);  /* Device suspended */
 8:       int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
 9:       int  (*resume_early) (struct pci_dev *dev);
10:       int  (*resume) (struct pci_dev *dev);                   /* Device woken up */
11:       void (*shutdown) (struct pci_dev *dev);
12:       struct pci_error_handlers *err_handler;
13:       struct device_driver    driver;
14:       struct pci_dynids dynids;
15:  };
定义PCI层与设备驱动程序之间的接口。
[注] include/linux/pci.h


PCI NIC设备驱动程序的注册

注册

 1:  /**
 2:   * __pci_register_driver - register a new pci driver
 3:   * @drv: the driver structure to register
 4:   * @owner: owner module of drv
 5:   * @mod_name: module name string
 6:   * 
 7:   * Adds the driver structure to the list of registered drivers.
 8:   * Returns a negative value on error, otherwise 0. 
 9:   * If no error occurred, the driver remains registered even if 
10:   * no device was claimed during registration.
11:   */
12:  int __pci_register_driver(struct pci_driver *drv, struct module *owner,
13:                 const char *mod_name)
14:  {
15:       int error;
16:  
17:       /* initialize common driver fields */
18:       drv->driver.name = drv->name;
19:       drv->driver.bus = &pci_bus_type;
20:       drv->driver.owner = owner;
21:       drv->driver.mod_name = mod_name;
22:  
23:       spin_lock_init(&drv->dynids.lock);
24:       INIT_LIST_HEAD(&drv->dynids.list);
25:  
26:       /* register with core */
27:       error = driver_register(&drv->driver);
28:       if (error)
29:           goto out;
30:  
31:       error = pci_create_newid_file(drv);
32:       if (error)
33:           goto out_newid;
34:  
35:       error = pci_create_removeid_file(drv);
36:       if (error)
37:           goto out_removeid;
38:  out:
39:       return error;
40:  
41:  out_removeid:
42:       pci_remove_newid_file(drv);
43:  out_newid:
44:       driver_unregister(&drv->driver);
45:       goto out;
46:  }

[注] net/core/dev.c


解除


 1:  /**
 2:   * pci_unregister_driver - unregister a pci driver
 3:   * @drv: the driver structure to unregister
 4:   * 
 5:   * Deletes the driver structure from the list of registered PCI drivers,
 6:   * gives it a chance to clean up by calling its remove() function for
 7:   * each device it was responsible for, and marks those devices as
 8:   * driverless.
 9:   */
10:  
11:  void
12:  pci_unregister_driver(struct pci_driver *drv)
13:  {
14:       pci_remove_removeid_file(drv);
15:       pci_remove_newid_file(drv);
16:       driver_unregister(&drv->driver);
17:       pci_free_dynids(drv);
18:  }
[注] driver/pci/pci-driver.c


电源管理与网络唤醒

PCI电源管理事件由pci_driver数据结构的suspend和resume函数处理。除了分别负责PCIa状态的保存与恢复之外,这些函数遇到NIC的情况时还需采取特殊步骤:

  • suspend主要停止设备的出口队列,使得该设备无法再传输。
  • resume重启出口i队列,使得该设备得以再次传输。

网络唤醒(Wake-on-Lan, WOL)允许NIC在接收到一种特殊类型的帧时候唤醒处于待命状态的系统,WOL默认是关闭的。 此功能可以用pci_enable_wake打开或关上。
唤醒系统的魔术封包特性:

  • 目的MAC地址属于正在接收的NIC(无论单播/多播/广播)。
  • 帧中的某处(任何地方)会设置一段48位序列(也就是FF:FF:FF:FF:FF:FF),后面再接NIC MAC地址,在一行中至少连续重复16次。



posted @ 2014-02-17 22:58  mospan  阅读(2558)  评论(0编辑  收藏  举报