Linux网络驱动--snull
snull是《Linux Device Drivers》中的一个网络驱动的例子。这里引用这个例子学习Linux网络驱动。
因为snull的源码,网上已经更新到适合最新内核,而我自己用的还是2.6.22.6比较旧的内核。而网上好像找不到旧版的snull。因此结合《Linux Device Drivers》把最新的snull例子移植到2.6.22.6内核中。移植也相对简单,这里也提供移植好的代码。
估计不少网友看到《Linux Device Drivers》的网络驱动部分,一脸懵逼,包括我自己,不理解作者设计这个例子的真正目的,尽管有配图,仍然懵懂,甚至不知道为什么会用到6个IP地址。如图:
其实作者的本意是想通过虚拟网卡来模拟实际的网卡和外部的网络设备的通信来讨论网络驱动。通过其中任何一个网络接口(sn0或sn1)发送数据,都在另一个网络接口(sn0或sn1)接收到。
因为sn0和sn1都不在同一个网段,所以sn0和sn1之间直接互ping是不行的,这中间必须必须做点转换。
例子:
理论上local0和remote0只能互ping,因为他们都在同一个网段:192.168.0.0,但事实上,local0在发出数据之后,local0的第3个字节最低有效位改取反,就变成了remote1,remote1的数据才能到达local1,因为他们在同一段IP。相反,local1在发出数据之后,local1的第3个字节最低有效位改取反,就变成了remote0,remote0的数据才能到达local0.
因此,在实验之前,需要添加一些配置:
在/etc/networks文件中添加如下网段IP:
snullnet0 192.168.2.0
snullnet1 192.168.3.0
在/etc/hosts文件中添加如下IP地址
192.168.2.8 local0
192.168.2.9 remote0
192.168.3.9 local1
192.168.3.8 remote1
注意: 1. 网段IP和IP地址的第三个字节的最低有效位是相反的
2. local0和remote1第四个字节必须一样,remote0和local1第四个字节必须一样
3. 如果开发板上的真正网卡用了的网段IP,就不能再用于本实验。如:我的开发板的DM9000网卡使用网段是192.168.1.0, 因此本实验不能再使用192.168.1.0作为网段,否则有冲突。
代码: snull.c, 其中snull.h没改动,因此不贴出来
1 /* 2 * snull.c -- the Simple Network Utility 3 * 4 * Copyright (C) 2001 Alessandro Rubini and Jonathan Corbet 5 * Copyright (C) 2001 O'Reilly & Associates 6 * 7 * The source code in this file can be freely used, adapted, 8 * and redistributed in source or binary form, so long as an 9 * acknowledgment appears in derived source files. The citation 10 * should list that the code comes from the book "Linux Device 11 * Drivers" by Alessandro Rubini and Jonathan Corbet, published 12 * by O'Reilly & Associates. No warranty is attached; 13 * we cannot take responsibility for errors or fitness for use. 14 * 15 * $Id: snull.c,v 1.21 2004/11/05 02:36:03 rubini Exp $ 16 */ 17 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/moduleparam.h> 21 22 #include <linux/sched.h> 23 #include <linux/kernel.h> /* printk() */ 24 #include <linux/slab.h> /* kmalloc() */ 25 #include <linux/errno.h> /* error codes */ 26 #include <linux/types.h> /* size_t */ 27 #include <linux/interrupt.h> /* mark_bh */ 28 29 #include <linux/in.h> 30 #include <linux/netdevice.h> /* struct device, and other headers */ 31 #include <linux/etherdevice.h> /* eth_type_trans */ 32 #include <linux/ip.h> /* struct iphdr */ 33 #include <linux/tcp.h> /* struct tcphdr */ 34 #include <linux/skbuff.h> 35 36 #include "snull.h" 37 38 #include <linux/in6.h> 39 #include <asm/checksum.h> 40 41 MODULE_AUTHOR("Alessandro Rubini, Jonathan Corbet"); 42 MODULE_LICENSE("Dual BSD/GPL"); 43 44 45 /* 46 * Transmitter lockup simulation, normally disabled. 47 */ 48 static int lockup = 0; 49 module_param(lockup, int, 0); 50 51 static int timeout = SNULL_TIMEOUT; 52 module_param(timeout, int, 0); 53 54 /* 55 * Do we run in NAPI mode? 56 */ 57 static int use_napi = 0; 58 module_param(use_napi, int, 0); 59 60 61 /* 62 * A structure representing an in-flight packet. 63 */ 64 struct snull_packet { 65 struct snull_packet *next; 66 struct net_device *dev; 67 int datalen; 68 u8 data[ETH_DATA_LEN]; 69 }; 70 71 int pool_size = 8; 72 module_param(pool_size, int, 0); 73 74 /* 75 * This structure is private to each device. It is used to pass 76 * packets in and out, so there is place for a packet 77 */ 78 79 struct snull_priv { 80 struct net_device_stats stats; 81 int status; 82 struct snull_packet *ppool; 83 struct snull_packet *rx_queue; /* List of incoming packets */ 84 int rx_int_enabled; 85 int tx_packetlen; 86 u8 *tx_packetdata; 87 struct sk_buff *skb; 88 spinlock_t lock; 89 struct net_device *dev; 90 //struct napi_struct napi; 91 }; 92 93 static void snull_tx_timeout(struct net_device *dev); 94 static void (*snull_interrupt)(int, void *, struct pt_regs *); 95 96 /* 97 * Set up a device's packet pool. 98 */ 99 void snull_setup_pool(struct net_device *dev) 100 { 101 struct snull_priv *priv = netdev_priv(dev); 102 int i; 103 struct snull_packet *pkt; 104 105 priv->ppool = NULL; 106 for (i = 0; i < pool_size; i++) { 107 pkt = kmalloc (sizeof (struct snull_packet), GFP_KERNEL); 108 if (pkt == NULL) { 109 printk (KERN_NOTICE "Ran out of memory allocating packet pool\n"); 110 return; 111 } 112 pkt->dev = dev; 113 pkt->next = priv->ppool; 114 priv->ppool = pkt; 115 } 116 } 117 118 void snull_teardown_pool(struct net_device *dev) 119 { 120 struct snull_priv *priv = netdev_priv(dev); 121 struct snull_packet *pkt; 122 123 while ((pkt = priv->ppool)) { 124 priv->ppool = pkt->next; 125 kfree (pkt); 126 /* FIXME - in-flight packets ? */ 127 } 128 } 129 130 /* 131 * Buffer/pool management. 132 */ 133 struct snull_packet *snull_get_tx_buffer(struct net_device *dev) 134 { 135 struct snull_priv *priv = netdev_priv(dev); 136 unsigned long flags; 137 struct snull_packet *pkt; 138 139 spin_lock_irqsave(&priv->lock, flags); 140 pkt = priv->ppool; 141 priv->ppool = pkt->next; 142 if (priv->ppool == NULL) { 143 printk (KERN_INFO "Pool empty\n"); 144 netif_stop_queue(dev); 145 } 146 spin_unlock_irqrestore(&priv->lock, flags); 147 return pkt; 148 } 149 150 151 void snull_release_buffer(struct snull_packet *pkt) 152 { 153 unsigned long flags; 154 struct snull_priv *priv = netdev_priv(pkt->dev); 155 156 spin_lock_irqsave(&priv->lock, flags); 157 pkt->next = priv->ppool; 158 priv->ppool = pkt; 159 spin_unlock_irqrestore(&priv->lock, flags); 160 if (netif_queue_stopped(pkt->dev) && pkt->next == NULL) 161 netif_wake_queue(pkt->dev); 162 163 printk("snull_release_buffer\n"); 164 } 165 166 void snull_enqueue_buf(struct net_device *dev, struct snull_packet *pkt) 167 { 168 unsigned long flags; 169 struct snull_priv *priv = netdev_priv(dev); 170 171 spin_lock_irqsave(&priv->lock, flags); 172 pkt->next = priv->rx_queue; /* FIXME - misorders packets */ 173 priv->rx_queue = pkt; 174 spin_unlock_irqrestore(&priv->lock, flags); 175 } 176 177 struct snull_packet *snull_dequeue_buf(struct net_device *dev) 178 { 179 struct snull_priv *priv = netdev_priv(dev); 180 struct snull_packet *pkt; 181 unsigned long flags; 182 183 spin_lock_irqsave(&priv->lock, flags); 184 pkt = priv->rx_queue; 185 if (pkt != NULL) 186 priv->rx_queue = pkt->next; 187 spin_unlock_irqrestore(&priv->lock, flags); 188 return pkt; 189 } 190 191 /* 192 * Enable and disable receive interrupts. 193 */ 194 static void snull_rx_ints(struct net_device *dev, int enable) 195 { 196 struct snull_priv *priv = netdev_priv(dev); 197 priv->rx_int_enabled = enable; 198 } 199 200 201 /* 202 * Open and close 203 */ 204 205 int snull_open(struct net_device *dev) 206 { 207 /* request_region(), request_irq(), .... (like fops->open) */ 208 209 /* 210 * Assign the hardware address of the board: use "\0SNULx", where 211 * x is 0 or 1. The first byte is '\0' to avoid being a multicast 212 * address (the first byte of multicast addrs is odd). 213 */ 214 /* [cgw]: 分配一个假的硬件地址,真正的网卡的时候,这个地址是从网卡读出来的 */ 215 memcpy(dev->dev_addr, "\0SNUL0", ETH_ALEN); 216 /* [cgw]: 因为注册了两个虚拟网卡,第二个虚拟网卡的地址跟第一个的地址必须不一样 217 * 即这两个网卡地址分别为\0SNUL0和\0SNUL1 218 */ 219 if (dev == snull_devs[1]) 220 dev->dev_addr[ETH_ALEN-1]++; /* \0SNUL1 */ 221 /* [cgw]: 启动发送队列 */ 222 netif_start_queue(dev); 223 224 printk("snull_open\n"); 225 226 return 0; 227 } 228 229 int snull_release(struct net_device *dev) 230 { 231 /* release ports, irq and such -- like fops->close */ 232 233 netif_stop_queue(dev); /* can't transmit any more */ 234 235 printk("snull_release\n"); 236 237 return 0; 238 } 239 240 /* 241 * Configuration changes (passed on by ifconfig) 242 */ 243 int snull_config(struct net_device *dev, struct ifmap *map) 244 { 245 if (dev->flags & IFF_UP) /* can't act on a running interface */ 246 return -EBUSY; 247 248 /* Don't allow changing the I/O address */ 249 if (map->base_addr != dev->base_addr) { 250 printk(KERN_WARNING "snull: Can't change I/O address\n"); 251 return -EOPNOTSUPP; 252 } 253 254 /* Allow changing the IRQ */ 255 if (map->irq != dev->irq) { 256 dev->irq = map->irq; 257 /* request_irq() is delayed to open-time */ 258 } 259 260 printk("snull_config\n"); 261 262 /* ignore other fields */ 263 return 0; 264 } 265 266 /* 267 * Receive a packet: retrieve, encapsulate and pass over to upper levels 268 */ 269 void snull_rx(struct net_device *dev, struct snull_packet *pkt) 270 { 271 struct sk_buff *skb; 272 struct snull_priv *priv = netdev_priv(dev); 273 274 /* 275 * The packet has been retrieved from the transmission 276 * medium. Build an skb around it, so upper layers can handle it 277 */ 278 /* [cgw]: 为接收包分配一个skb */ 279 skb = dev_alloc_skb(pkt->datalen + 2); 280 if (!skb) { 281 if (printk_ratelimit()) 282 printk(KERN_NOTICE "snull rx: low on mem - packet dropped\n"); 283 priv->stats.rx_dropped++; 284 goto out; 285 } 286 /* [cgw]: 16字节对齐,即IP首部前是网卡硬件地址首部,其占14字节,需要为其增加2 287 * 个字节 288 */ 289 skb_reserve(skb, 2); /* align IP on 16B boundary */ 290 /* [cgw]: 开辟一个数据缓冲区用于存放接收数据 */ 291 memcpy(skb_put(skb, pkt->datalen), pkt->data, pkt->datalen); 292 293 /* Write metadata, and then pass to the receive level */ 294 skb->dev = dev; 295 if (skb->dev == snull_devs[0]) { 296 printk("skb->dev is snull_devs[0]\n"); 297 } else { 298 printk("skb->dev is snull_devs[1]\n"); 299 } 300 /* [cgw]: 确定包的协议ID */ 301 skb->protocol = eth_type_trans(skb, dev); 302 303 printk("skb->protocol = %d\n", skb->protocol); 304 305 skb->ip_summed = CHECKSUM_UNNECESSARY; /* don't check it */ 306 /* [cgw]: 统计接收包数和字节数 */ 307 priv->stats.rx_packets++; 308 priv->stats.rx_bytes += pkt->datalen; 309 /* [cgw]: 上报应用层 */ 310 netif_rx(skb); 311 312 printk("snull_rx\n"); 313 314 out: 315 return; 316 } 317 318 319 /* 320 * The poll implementation. 321 */ 322 //static int snull_poll(struct napi_struct *napi, int budget) 323 static int snull_poll(struct net_device *dev, int *budget) 324 { 325 //int npackets = 0; 326 //struct sk_buff *skb; 327 //struct snull_priv *priv = container_of(napi, struct snull_priv, napi); 328 //struct net_device *dev = priv->dev; 329 //struct snull_packet *pkt; 330 331 int npackets = 0, quota = min(dev->quota, *budget); 332 struct sk_buff *skb; 333 struct snull_priv *priv = netdev_priv(dev); 334 struct snull_packet *pkt; 335 336 printk("snull_poll\n"); 337 338 //while (npackets < budget && priv->rx_queue) { 339 while (npackets < quota && priv->rx_queue) { 340 pkt = snull_dequeue_buf(dev); 341 skb = dev_alloc_skb(pkt->datalen + 2); 342 if (! skb) { 343 if (printk_ratelimit()) 344 printk(KERN_NOTICE "snull: packet dropped\n"); 345 priv->stats.rx_dropped++; 346 snull_release_buffer(pkt); 347 continue; 348 } 349 skb_reserve(skb, 2); /* align IP on 16B boundary */ 350 memcpy(skb_put(skb, pkt->datalen), pkt->data, pkt->datalen); 351 skb->dev = dev; 352 skb->protocol = eth_type_trans(skb, dev); 353 skb->ip_summed = CHECKSUM_UNNECESSARY; /* don't check it */ 354 netif_receive_skb(skb); 355 356 /* Maintain stats */ 357 npackets++; 358 priv->stats.rx_packets++; 359 priv->stats.rx_bytes += pkt->datalen; 360 snull_release_buffer(pkt); 361 } 362 /* If we processed all packets, we're done; tell the kernel and reenable ints */ 363 *budget -= npackets; 364 dev->quota -= npackets; 365 if (! priv->rx_queue) { 366 //napi_complete(napi); 367 netif_rx_complete(dev); 368 snull_rx_ints(dev, 1); 369 return 0; 370 } 371 /* We couldn't process everything. */ 372 //return npackets; 373 return 1; 374 } 375 376 /* 377 * The typical interrupt entry point 378 */ 379 static void snull_regular_interrupt(int irq, void *dev_id, struct pt_regs *regs) 380 { 381 int statusword; 382 struct snull_priv *priv; 383 struct snull_packet *pkt = NULL; 384 /* 385 * As usual, check the "device" pointer to be sure it is 386 * really interrupting. 387 * Then assign "struct device *dev" 388 */ 389 struct net_device *dev = (struct net_device *)dev_id; 390 /* ... and check with hw if it's really ours */ 391 392 /* paranoid */ 393 if (!dev) 394 return; 395 396 /* Lock the device */ 397 priv = netdev_priv(dev); 398 spin_lock(&priv->lock); 399 400 /* [cgw]: 判断产生的是什么类型的中断,接收还是中断 */ 401 /* retrieve statusword: real netdevices use I/O instructions */ 402 statusword = priv->status; 403 404 printk("priv->status = %d\n", priv->status); 405 406 priv->status = 0; 407 /* [cgw]: 接收完成中断 */ 408 if (statusword & SNULL_RX_INTR) { 409 /* send it to snull_rx for handling */ 410 pkt = priv->rx_queue; 411 if (pkt) { 412 priv->rx_queue = pkt->next; 413 /* [cgw]: 网卡接收到数据,上报给应用层 */ 414 snull_rx(dev, pkt); 415 } 416 } 417 /* [cgw]: 发送完成中断 */ 418 if (statusword & SNULL_TX_INTR) { 419 /* [cgw]: 统计已发送的包数和总字节数,并释放这个包的内存 */ 420 /* a transmission is over: free the skb */ 421 priv->stats.tx_packets++; 422 priv->stats.tx_bytes += priv->tx_packetlen; 423 dev_kfree_skb(priv->skb); 424 } 425 426 /* Unlock the device and we are done */ 427 spin_unlock(&priv->lock); 428 if (pkt) snull_release_buffer(pkt); /* Do this outside the lock! */ 429 430 printk("snull_regular_interrupt\n"); 431 432 return; 433 } 434 435 /* 436 * A NAPI interrupt handler. 437 */ 438 static void snull_napi_interrupt(int irq, void *dev_id, struct pt_regs *regs) 439 { 440 int statusword; 441 struct snull_priv *priv; 442 443 /* 444 * As usual, check the "device" pointer for shared handlers. 445 * Then assign "struct device *dev" 446 */ 447 struct net_device *dev = (struct net_device *)dev_id; 448 /* ... and check with hw if it's really ours */ 449 450 printk("snull_napi_interrupt\n"); 451 452 /* paranoid */ 453 if (!dev) 454 return; 455 456 /* Lock the device */ 457 priv = netdev_priv(dev); 458 spin_lock(&priv->lock); 459 460 /* retrieve statusword: real netdevices use I/O instructions */ 461 statusword = priv->status; 462 priv->status = 0; 463 if (statusword & SNULL_RX_INTR) { 464 snull_rx_ints(dev, 0); /* Disable further interrupts */ 465 //napi_schedule(&priv->napi); 466 netif_rx_schedule(dev); 467 } 468 if (statusword & SNULL_TX_INTR) { 469 /* a transmission is over: free the skb */ 470 priv->stats.tx_packets++; 471 priv->stats.tx_bytes += priv->tx_packetlen; 472 dev_kfree_skb(priv->skb); 473 } 474 475 /* Unlock the device and we are done */ 476 spin_unlock(&priv->lock); 477 return; 478 } 479 480 481 /* 482 * Transmit a packet (low level interface) 483 */ 484 static void snull_hw_tx(char *buf, int len, struct net_device *dev) 485 { 486 /* 487 * This function deals with hw details. This interface loops 488 * back the packet to the other snull interface (if any). 489 * In other words, this function implements the snull behaviour, 490 * while all other procedures are rather device-independent 491 */ 492 struct iphdr *ih; 493 struct net_device *dest; 494 struct snull_priv *priv; 495 u32 *saddr, *daddr; 496 struct snull_packet *tx_buffer; 497 498 /* I am paranoid. Ain't I? */ 499 if (len < sizeof(struct ethhdr) + sizeof(struct iphdr)) { 500 printk("snull: Hmm... packet too short (%i octets)\n", 501 len); 502 return; 503 } 504 505 /* [cgw]: 打印上层应用(即ping)要发的这个包的内容 506 * 这个包的格式为: 507 * 14字节以太网首部+20字节IP地址首部+20字节TCP地址首部+n字节数据 508 */ 509 510 if (1) { /* enable this conditional to look at the data */ 511 int i; 512 PDEBUG("len is %i\n" KERN_DEBUG "data:",len); 513 /* [cgw]: 14字节以太网首部 */ 514 for (i=0 ; i<14; i++) 515 printk(" %02x",buf[i]&0xff); 516 printk("\n"); 517 518 /* [cgw]: 20字节IP地址首部 */ 519 for (i=14 ; i<34; i++) 520 printk(" %02x",buf[i]&0xff); 521 printk("\n"); 522 523 /* [cgw]: 20字节TCP地址首部 */ 524 for (i=34 ; i<54; i++) 525 printk(" %02x",buf[i]&0xff); 526 printk("\n"); 527 528 /* [cgw]: n字节数据 */ 529 for (i=54 ; i<len; i++) 530 printk(" %02x",buf[i]&0xff); 531 printk("\n"); 532 } 533 /* 534 * Ethhdr is 14 bytes, but the kernel arranges for iphdr 535 * to be aligned (i.e., ethhdr is unaligned) 536 */ 537 /* [cgw]: 提取本地和目标IP地址 */ 538 ih = (struct iphdr *)(buf+sizeof(struct ethhdr)); 539 saddr = &ih->saddr; 540 daddr = &ih->daddr; 541 542 printk("ih->protocol = %d is buf[23]\n", ih->protocol); 543 printk("saddr = %d.%d.%d.%d\n", *((u8 *)saddr + 0), *((u8 *)saddr + 1), *((u8 *)saddr + 2), *((u8 *)saddr + 3)); 544 printk("daddr = %d.%d.%d.%d\n", *((u8 *)daddr + 0), *((u8 *)daddr + 1), *((u8 *)daddr + 2), *((u8 *)daddr + 3)); 545 546 /* [cgw]: 改变本地和目标IP地址的第三个字节的最低位,即原来是0,则改为1,原来是1,则改为0 547 */ 548 ((u8 *)saddr)[2] ^= 1; /* change the third octet (class C) */ 549 ((u8 *)daddr)[2] ^= 1; 550 551 /* [cgw]: 从新计算校验,因为IP已改变 */ 552 ih->check = 0; /* and rebuild the checksum (ip needs it) */ 553 ih->check = ip_fast_csum((unsigned char *)ih,ih->ihl); 554 555 /* [cgw]: 打印更改后的IP地址,和TCP地址, 556 */ 557 if (dev == snull_devs[0]) 558 //PDEBUGG("%08x:%05i --> %08x:%05i\n", 559 printk("%08x:%05i --> %08x:%05i\n", 560 ntohl(ih->saddr),ntohs(((struct tcphdr *)(ih+1))->source), 561 ntohl(ih->daddr),ntohs(((struct tcphdr *)(ih+1))->dest)); 562 else 563 //PDEBUGG("%08x:%05i <-- %08x:%05i\n", 564 printk("%08x:%05i <-- %08x:%05i\n", 565 ntohl(ih->daddr),ntohs(((struct tcphdr *)(ih+1))->dest), 566 ntohl(ih->saddr),ntohs(((struct tcphdr *)(ih+1))->source)); 567 568 /* 569 * Ok, now the packet is ready for transmission: first simulate a 570 * receive interrupt on the twin device, then a 571 * transmission-done on the transmitting device 572 */ 573 /* [cgw]: 获得目的网卡设备 */ 574 dest = snull_devs[dev == snull_devs[0] ? 1 : 0]; 575 576 if (dev == snull_devs[0]) { 577 printk("snull_devs[0]\n"); 578 } else { 579 printk("snull_devs[1]\n"); 580 } 581 582 priv = netdev_priv(dest); 583 /* [cgw]: 取出一块内存分配给本地网卡 */ 584 tx_buffer = snull_get_tx_buffer(dev); 585 /* [cgw]: 设置数据包大小 */ 586 tx_buffer->datalen = len; 587 588 printk("tx_buffer->datalen = %d\n", tx_buffer->datalen); 589 590 /* [cgw]: 填充发送网卡的数据 */ 591 memcpy(tx_buffer->data, buf, len); 592 /* [cgw]: 把发送的数据直接加入到接收队列,这里相当于本地网卡要发送的数据 593 * 已经给目标网卡直接接收到了 594 */ 595 snull_enqueue_buf(dest, tx_buffer); 596 /* [cgw]: 如果接收中断使能,这个也是模拟的接收中断,因为上面已经模拟接收 597 * 到数据,所以立刻产生一个中断 598 */ 599 if (priv->rx_int_enabled) { 600 priv->status |= SNULL_RX_INTR; 601 printk("priv->status = %d\n", priv->status); 602 /* [cgw]: 执行接收中断 */ 603 snull_interrupt(0, dest, NULL); 604 printk("snull_interrupt(0, dest, NULL);\n"); 605 } 606 607 /* [cgw]: 获得本地网卡的私有数据指针 */ 608 priv = netdev_priv(dev); 609 /* [cgw]: 把本地网卡要发送的数据存到私有数据缓冲区,接着产生一个发送中断 610 */ 611 priv->tx_packetlen = len; 612 priv->tx_packetdata = buf; 613 priv->status |= SNULL_TX_INTR; 614 if (lockup && ((priv->stats.tx_packets + 1) % lockup) == 0) { 615 /* Simulate a dropped transmit interrupt */ 616 netif_stop_queue(dev); 617 PDEBUG("Simulate lockup at %ld, txp %ld\n", jiffies, 618 (unsigned long) priv->stats.tx_packets); 619 } 620 else { 621 /* [cgw]: 产生一个发送中断 */ 622 snull_interrupt(0, dev, NULL); 623 printk("snull_interrupt(0, dev, NULL);\n"); 624 } 625 } 626 627 /* 628 * Transmit a packet (called by the kernel) 629 */ 630 int snull_tx(struct sk_buff *skb, struct net_device *dev) 631 { 632 int len; 633 char *data, shortpkt[ETH_ZLEN]; 634 struct snull_priv *priv = netdev_priv(dev); 635 636 /* [cgw]: 获取上层需要发送的数据和长度 */ 637 data = skb->data; 638 len = skb->len; 639 640 printk("skb->len = %d\n", skb->len); 641 642 if (len < ETH_ZLEN) { 643 memset(shortpkt, 0, ETH_ZLEN); 644 memcpy(shortpkt, skb->data, skb->len); 645 len = ETH_ZLEN; 646 data = shortpkt; 647 } 648 /* [cgw]: 开始计算时间截,用于处理发送超时 */ 649 dev->trans_start = jiffies; /* save the timestamp */ 650 651 /* Remember the skb, so we can free it at interrupt time */ 652 priv->skb = skb; 653 654 printk("snull_tx\n"); 655 656 /* actual deliver of data is device-specific, and not shown here */ 657 /* [cgw]: 模拟把数据包写入硬件,通过硬件发送出去,但实际上不是 */ 658 snull_hw_tx(data, len, dev); 659 660 //printk("snull_tx\n"); 661 662 return 0; /* Our simple device can not fail */ 663 } 664 665 /* 666 * Deal with a transmit timeout. 667 */ 668 void snull_tx_timeout (struct net_device *dev) 669 { 670 struct snull_priv *priv = netdev_priv(dev); 671 672 PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies, 673 jiffies - dev->trans_start); 674 /* Simulate a transmission interrupt to get things moving */ 675 priv->status = SNULL_TX_INTR; 676 snull_interrupt(0, dev, NULL); 677 priv->stats.tx_errors++; 678 netif_wake_queue(dev); 679 680 printk("snull_tx_timeout\n"); 681 682 return; 683 } 684 685 686 687 /* 688 * Ioctl commands 689 */ 690 int snull_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) 691 { 692 PDEBUG("ioctl\n"); 693 printk("ioctl\n"); 694 return 0; 695 } 696 697 /* 698 * Return statistics to the caller 699 */ 700 struct net_device_stats *snull_stats(struct net_device *dev) 701 { 702 struct snull_priv *priv = netdev_priv(dev); 703 704 printk("snull_stats\n"); 705 706 return &priv->stats; 707 } 708 709 /* 710 * This function is called to fill up an eth header, since arp is not 711 * available on the interface 712 */ 713 int snull_rebuild_header(struct sk_buff *skb) 714 { 715 struct ethhdr *eth = (struct ethhdr *) skb->data; 716 struct net_device *dev = skb->dev; 717 718 memcpy(eth->h_source, dev->dev_addr, dev->addr_len); 719 memcpy(eth->h_dest, dev->dev_addr, dev->addr_len); 720 eth->h_dest[ETH_ALEN-1] ^= 0x01; /* dest is us xor 1 */ 721 722 printk("snull_rebuild_header\n"); 723 724 return 0; 725 } 726 727 728 //int snull_header(struct sk_buff *skb, struct net_device *dev, 729 // unsigned short type, const void *daddr, const void *saddr, 730 // unsigned len) 731 732 int snull_header(struct sk_buff *skb, struct net_device *dev, 733 unsigned short type, void *daddr, void *saddr, 734 unsigned len) 735 { 736 struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); 737 738 printk("len = %d\n", len); 739 740 printk("type = %02x\n", type); //ETH_P_IP 0x0800 /* Internet Protocol packet */ 741 742 /* htons是将整型变量从主机字节顺序转变成网络字节顺序, 743 * 就是整数在地址空间存储方式变为:高位字节存放在内存的低地址处 744 */ 745 eth->h_proto = htons(type); 746 printk("h_proto = %d\n", eth->h_proto); 747 748 printk("addr_len = %d\n", dev->addr_len); 749 printk("dev_addr = %02x.%02x.%02x.%02x.%02x.%02x\n", dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); 750 751 if (saddr) { 752 printk("saddr = %02x.%02x.%02x.%02x.%02x.%02x\n", *((unsigned char *)saddr + 0), *((unsigned char *)saddr + 1), *((unsigned char *)saddr + 2), *((unsigned char *)saddr + 3), *((unsigned char *)saddr + 4), *((unsigned char *)saddr + 5)); 753 } 754 755 if (daddr) { 756 printk("daddr = %02x.%02x.%02x.%02x.%02x.%02x\n", *((unsigned char *)daddr + 0), *((unsigned char *)daddr + 1), *((unsigned char *)daddr + 2), *((unsigned char *)daddr + 3), *((unsigned char *)daddr + 4), *((unsigned char *)daddr + 5)); 757 } 758 759 /* [cgw]: 上层应用要发送数据时,通过下层添加硬件地址,才能决定发送到那个目标网卡 760 */ 761 memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len); 762 memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len); 763 printk("h_source = %02x.%02x.%02x.%02x.%02x.%02x\n", eth->h_source[0], eth->h_source[1], eth->h_source[2],eth->h_source[3], eth->h_source[4], eth->h_source[5]); 764 printk("h_dest = %02x.%02x.%02x.%02x.%02x.%02x\n", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]); 765 766 /* [cgw]: 设置目标网卡硬件地址,即本地网卡和目标网卡硬件地址的最后一个字节的最低有效位 767 * 是相反关系,即本地是\0SNUL0的话,目标就是\0SNUL1,或者本地是\0SNUL1,目标就是\0SNUL0 768 */ 769 eth->h_dest[ETH_ALEN-1] ^= 0x01; /* dest is us xor 1 */ 770 printk("h_dest[ETH_ALEN-1] ^ 0x01 = %02x\n", eth->h_dest[ETH_ALEN-1]); 771 772 printk("hard_header_len = %d\n", dev->hard_header_len); 773 774 return (dev->hard_header_len); 775 } 776 777 778 779 780 781 /* 782 * The "change_mtu" method is usually not needed. 783 * If you need it, it must be like this. 784 */ 785 int snull_change_mtu(struct net_device *dev, int new_mtu) 786 { 787 unsigned long flags; 788 struct snull_priv *priv = netdev_priv(dev); 789 spinlock_t *lock = &priv->lock; 790 791 /* check ranges */ 792 if ((new_mtu < 68) || (new_mtu > 1500)) 793 return -EINVAL; 794 /* 795 * Do anything you need, and the accept the value 796 */ 797 spin_lock_irqsave(lock, flags); 798 dev->mtu = new_mtu; 799 spin_unlock_irqrestore(lock, flags); 800 return 0; /* success */ 801 } 802 803 #if 0 804 static const struct header_ops snull_header_ops = { 805 .create = snull_header, 806 .rebuild = snull_rebuild_header 807 }; 808 809 static const struct net_device_ops snull_netdev_ops = { 810 .ndo_open = snull_open, 811 .ndo_stop = snull_release, 812 .ndo_start_xmit = snull_tx, 813 .ndo_do_ioctl = snull_ioctl, 814 .ndo_set_config = snull_config, 815 .ndo_get_stats = snull_stats, 816 .ndo_change_mtu = snull_change_mtu, 817 .ndo_tx_timeout = snull_tx_timeout 818 }; 819 #endif 820 821 /* 822 * The init function (sometimes called probe). 823 * It is invoked by register_netdev() 824 */ 825 void snull_init(struct net_device *dev) 826 { 827 struct snull_priv *priv; 828 #if 0 829 /* 830 * Make the usual checks: check_region(), probe irq, ... -ENODEV 831 * should be returned if no device found. No resource should be 832 * grabbed: this is done on open(). 833 */ 834 #endif 835 836 /* 837 * Then, assign other fields in dev, using ether_setup() and some 838 * hand assignments 839 */ 840 ether_setup(dev); /* assign some of the fields */ 841 dev->watchdog_timeo = timeout; 842 843 //dev->netdev_ops = &snull_netdev_ops; 844 //dev->header_ops = &snull_header_ops; 845 846 dev->hard_header = snull_header; 847 dev->rebuild_header = snull_rebuild_header; 848 849 dev->open = snull_open; 850 dev->stop = snull_release; 851 dev->hard_start_xmit = snull_tx; 852 dev->do_ioctl = snull_ioctl; 853 dev->set_config = snull_config; 854 dev->get_stats = snull_stats; 855 dev->change_mtu = snull_change_mtu; 856 dev->tx_timeout = snull_tx_timeout; 857 858 /* keep the default flags, just add NOARP */ 859 dev->flags |= IFF_NOARP; 860 dev->features |= NETIF_F_HW_CSUM; 861 862 dev->hard_header_cache = NULL; 863 864 /* 865 * Then, initialize the priv field. This encloses the statistics 866 * and a few private fields. 867 */ 868 priv = netdev_priv(dev); 869 #if 0 870 if (use_napi) { 871 netif_napi_add(dev, &priv->napi, snull_poll,2); 872 } 873 #else 874 if (use_napi) { 875 dev->poll = snull_poll; 876 dev->weight = 2; 877 } 878 #endif 879 memset(priv, 0, sizeof(struct snull_priv)); 880 spin_lock_init(&priv->lock); 881 snull_rx_ints(dev, 1); /* enable receive interrupts */ 882 snull_setup_pool(dev); 883 884 printk("snull_init\n"); 885 } 886 887 /* 888 * The devices 889 */ 890 891 struct net_device *snull_devs[2]; 892 893 894 895 /* 896 * Finally, the module stuff 897 */ 898 899 void snull_cleanup(void) 900 { 901 int i; 902 903 for (i = 0; i < 2; i++) { 904 if (snull_devs[i]) { 905 unregister_netdev(snull_devs[i]); 906 snull_teardown_pool(snull_devs[i]); 907 free_netdev(snull_devs[i]); 908 } 909 } 910 return; 911 } 912 913 914 915 916 int snull_init_module(void) 917 { 918 int result, i, ret = -ENOMEM; 919 920 snull_interrupt = use_napi ? snull_napi_interrupt : snull_regular_interrupt; 921 922 /* Allocate the devices */ 923 snull_devs[0] = alloc_netdev(sizeof(struct snull_priv), "sn%d", 924 snull_init); 925 snull_devs[1] = alloc_netdev(sizeof(struct snull_priv), "sn%d", 926 snull_init); 927 if (snull_devs[0] == NULL || snull_devs[1] == NULL) 928 goto out; 929 930 ret = -ENODEV; 931 for (i = 0; i < 2; i++) 932 if ((result = register_netdev(snull_devs[i]))) 933 printk("snull: error %i registering device \"%s\"\n", 934 result, snull_devs[i]->name); 935 else 936 ret = 0; 937 938 printk("snull_init_module\n"); 939 940 out: 941 if (ret) 942 snull_cleanup(); 943 return ret; 944 } 945 946 947 module_init(snull_init_module); 948 module_exit(snull_cleanup);
makefile:
# Comment/uncomment the following line to disable/enable debugging #DEBUG = y # Add your debugging flag (or not) to CFLAGS ifeq ($(DEBUG),y) DEBFLAGS = -O -g -DSBULL_DEBUG # "-O" is needed to expand inlines else DEBFLAGS = -O2 endif EXTRA_CFLAGS += $(DEBFLAGS) EXTRA_CFLAGS += -I.. ifneq ($(KERNELRELEASE),) # call from kernel build system obj-m := snull.o else KERNELDIR ?= /lib/modules/$(shell uname -r)/build PWD := $(shell pwd) default: $(MAKE) -C $(KERNELDIR) M=$(PWD) modules endif
运行:
# insmod snull.ko snull_init snull_init snull_stats snull_stats snull_init_module # ifconfig sn0 local0 snull_open snull_stats # ifconfig sn1 local1 snull_open snull_stats # ping -c 1 remote0 PING remote0 (192.168.2.9): 56 data bytes len = 84 type = 800 h_proto = 8 addr_len = 6 dev_addr = 00.53.4e.55.4c.30 daddr = 00.53.4e.55.4c.30 h_source = 00.53.4e.55.4c.30 h_dest = 00.53.4e.55.4c.30 h_dest[ETH_ALEN-1] ^ 0x01 = 31 hard_header_len = 14 skb->len = 98 snull_tx 00 53 4e 55 4c 31 00 53 4e 55 4c 30 08 00 45 00 00 54 00 00 40 00 40 01 b5 47 c0 a8 02 08 c0 a8 02 09 08 00 d0 0e 09 03 00 00 bc e8 62 05 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ih->protocol = 1 is buf[23] saddr = 192.168.2.8 daddr = 192.168.2.9 c0a80308:02048 --> c0a80309:53262 snull_devs[0] tx_buffer->datalen = 98 priv->status = 1 priv->status = 1 skb->dev is snull_devs[1] skb->protocol = 8 snull_rx snull_release_buffer snull_regular_interrupt snull_interrupt(0, dest, NULL); priv->status = 2 snull_regular_interrupt snull_interrupt(0, dev, NULL); len = 84 type = 800 h_proto = 8 addr_len = 6 dev_addr = 00.53.4e.55.4c.31 daddr = 00.53.4e.55.4c.31 h_source = 00.53.4e.55.4c.31 h_dest = 00.53.4e.55.4c.31 h_dest[ETH_ALEN-1] ^ 0x01 = 30 hard_header_len = 14 skb->len = 98 snull_tx 00 53 4e 55 4c 30 00 53 4e 55 4c 31 08 00 45 00 00 54 a0 17 00 00 40 01 53 30 c0 a8 03 09 c0 a8 03 08 00 00 d8 0e 09 03 00 00 bc e8 62 05 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ih->protocol = 1 is buf[23] saddr = 192.168.3.9 daddr = 192.168.3.8 c0a80208:55310 <-- c0a80209:00000 snull_devs[1] tx_buffer->datalen = 98 priv->status = 1 priv->status = 1 skb->dev is snull_devs[0] skb->protocol = 8 snull_rx snull_release_buffer snull_regular_interrupt snull_interrupt(0, dest, NULL); priv->status = 2 snull_regular_interrupt snull_interrupt(0, dev, NULL); 64 bytes from 192.168.2.9: seq=0 ttl=64 time=159.673 ms --- remote0 ping statistics --- 1 packets transmitted, 1 packets received, 0% packet loss round-trip min/avg/max = 159.673/159.673/159.673 ms
分析现象:
1.当执行ping命令后,驱动首先会调用snull_header
int snull_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); printk("len = %d\n", len); printk("type = %02x\n", type); //ETH_P_IP 0x0800 /* Internet Protocol packet */ /* htons是将整型变量从主机字节顺序转变成网络字节顺序, * 就是整数在地址空间存储方式变为:高位字节存放在内存的低地址处 */ eth->h_proto = htons(type); printk("h_proto = %d\n", eth->h_proto); printk("addr_len = %d\n", dev->addr_len); printk("dev_addr = %02x.%02x.%02x.%02x.%02x.%02x\n", dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); if (saddr) { printk("saddr = %02x.%02x.%02x.%02x.%02x.%02x\n", *((unsigned char *)saddr + 0), *((unsigned char *)saddr + 1), *((unsigned char *)saddr + 2), *((unsigned char *)saddr + 3), *((unsigned char *)saddr + 4), *((unsigned char *)saddr + 5)); } if (daddr) { printk("daddr = %02x.%02x.%02x.%02x.%02x.%02x\n", *((unsigned char *)daddr + 0), *((unsigned char *)daddr + 1), *((unsigned char *)daddr + 2), *((unsigned char *)daddr + 3), *((unsigned char *)daddr + 4), *((unsigned char *)daddr + 5)); } /* [cgw]: 上层应用要发送数据时,通过下层添加硬件地址,才能决定发送到那个目标网卡 */ memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len); memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len); printk("h_source = %02x.%02x.%02x.%02x.%02x.%02x\n", eth->h_source[0], eth->h_source[1], eth->h_source[2],eth->h_source[3], eth->h_source[4], eth->h_source[5]); printk("h_dest = %02x.%02x.%02x.%02x.%02x.%02x\n", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]); /* [cgw]: 设置目标网卡硬件地址,即本地网卡和目标网卡硬件地址的最后一个字节的最低有效位 * 是相反关系,即本地是\0SNUL0的话,目标就是\0SNUL1,或者本地是\0SNUL1,目标就是\0SNUL0 */ eth->h_dest[ETH_ALEN-1] ^= 0x01; /* dest is us xor 1 */ printk("h_dest[ETH_ALEN-1] ^ 0x01 = %02x\n", eth->h_dest[ETH_ALEN-1]); printk("hard_header_len = %d\n", dev->hard_header_len); return (dev->hard_header_len); }
因为应用层要发送数据包了,所以要为这个数据包添加硬件地址,即以太网地址首部,才能通过网卡发送出去。
2. 然后内核会通过调用snull_tx发送数据包,snull_tx调用了snull_hw_tx,在这里更改本地IP为目标IP,并把本地要发的数据直接拷贝给目标网卡,代表目标网卡以接收到数据,并触发接收完成中断,向应用层上报数据,接着触发发送完成中断,表示数据已经发送到目标网卡。
3. 数据包分析:
static void snull_hw_tx(char *buf, int len, struct net_device *dev)
这里的buf为应用层要发送的数据包,数据包格式为:14字节以太网首部+20字节IP地址首部+20字节TCP地址首部+n字节数据
1 /* [cgw]: 14字节以太网首部 */ 2 for (i=0 ; i<14; i++) 3 printk(" %02x",buf[i]&0xff); 4 printk("\n"); 5 6 /* [cgw]: 20字节IP地址首部 */ 7 for (i=14 ; i<34; i++) 8 printk(" %02x",buf[i]&0xff); 9 printk("\n"); 10 11 /* [cgw]: 20字节TCP地址首部 */ 12 for (i=34 ; i<54; i++) 13 printk(" %02x",buf[i]&0xff); 14 printk("\n"); 15 16 /* [cgw]: n字节数据 */ 17 for (i=54 ; i<len; i++) 18 printk(" %02x",buf[i]&0xff); 19 printk("\n");
打印结果:
00 53 4e 55 4c 30 00 53 4e 55 4c 31 08 00 //14字节以太网首部 45 00 00 54 a0 17 00 00 40 01 53 30 c0 a8 03 09 c0 a8 03 08 //20字节IP地址首部 00 00 d8 0e 09 03 00 00 bc e8 62 05 00 00 00 00 00 00 00 00 //20字节TCP地址首部 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 //n字节数据
其中:00 53 4e 55 4c 30 就硬件地址\0SNUL0的ASCII码,00 53 4e 55 4c 31 就硬件地址\0SNUL1的ASCII码。
c0 a8 02 08表示本地IP地址local0:192.168.2.8, c0 a8 02 09表示本地IP地址remote0:192.168.2.9。
代表 00 53 4e 55 4c 31 00 53 4e 55 4c 30 08 00 的结构体是:
1 struct ethhdr { 2 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ 3 unsigned char h_source[ETH_ALEN]; /* source ether addr */ 4 __be16 h_proto; /* packet type ID field */ 5 } __attribute__((packed));
即h_ptoto = 0x08 (0x0800,经过htons转换为0x08)
代表45 00 00 54 00 00 40 00 40 01 b5 47 c0 a8 02 08 c0 a8 02 09的结构体是:
1 struct iphdr { 2 #if defined(__LITTLE_ENDIAN_BITFIELD) 3 __u8 ihl:4, 4 version:4; 5 #elif defined (__BIG_ENDIAN_BITFIELD) 6 __u8 version:4, 7 ihl:4; 8 #else 9 #error "Please fix <asm/byteorder.h>" 10 #endif 11 __u8 tos; 12 __be16 tot_len; 13 __be16 id; 14 __be16 frag_off; 15 __u8 ttl; 16 __u8 protocol; 17 __sum16 check; 18 __be32 saddr; 19 __be32 daddr; 20 /*The options start here. */ 21 };
代表 08 00 d0 0e 09 03 00 00 bc e8 62 05 00 00 00 00 00 00 00 00 的结构体是:
1 struct tcphdr { 2 __be16 source; 3 __be16 dest; 4 __be32 seq; 5 __be32 ack_seq; 6 #if defined(__LITTLE_ENDIAN_BITFIELD) 7 __u16 res1:4, 8 doff:4, 9 fin:1, 10 syn:1, 11 rst:1, 12 psh:1, 13 ack:1, 14 urg:1, 15 ece:1, 16 cwr:1; 17 #elif defined(__BIG_ENDIAN_BITFIELD) 18 __u16 doff:4, 19 res1:4, 20 cwr:1, 21 ece:1, 22 urg:1, 23 ack:1, 24 psh:1, 25 rst:1, 26 syn:1, 27 fin:1; 28 #else 29 #error "Adjust your <asm/byteorder.h> defines" 30 #endif 31 __be16 window; 32 __sum16 check; 33 __be16 urg_ptr; 34 };
NAPI
NAPI的全称是“NEW API”。
要使用NAPI功能,只要在加载snull.ko的添加一句use_napi=1就行了
如:#insmod snull.ko use_napi=1
NAPI有什么作用?
NAPI是一种使用轮询(poll)的方式去接收数据。如当系统需要接收一大坨数据时,数据量比较大时,这个时候数据的接收就不应该在中断中进行。即产生接收完成中断后,立即禁止中断,通知内核调用poll,轮询接收数据,接收完成后,再使能接收中断。这样大大提高系统的性能。
在驱动初始化时:分配好poll函数
1 if (use_napi) { 2 dev->poll = snull_poll; 3 dev->weight = 2; 4 }
在接收中断中
1 if (statusword & SNULL_RX_INTR) { 2 /* send it to snull_rx for handling */ 3 pkt = priv->rx_queue; 4 if (pkt) { 5 priv->rx_queue = pkt->next; 6 /* [cgw]: 网卡接收到数据,上报给应用层 */ 7 snull_rx(dev, pkt); 8 } 9 }
改为
1 if (statusword & SNULL_RX_INTR) { 2 snull_rx_ints(dev, 0); /* Disable further interrupts */ 3 //napi_schedule(&priv->napi); 4 netif_rx_schedule(dev); 5 }
在中断中,直接通知内核调用snull_poll即可,snull_poll轮询接收数据,并上报给应用层。
下一篇博客介绍DM9000网卡驱动。
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步