sk_buff
1 /* 2 * Routines having to do with the 'struct sk_buff' memory handlers. 3 * 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 5 * Florian La Roche <rzsfl@rz.uni-sb.de> 6 * 7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ 8 * 9 * Fixes: 10 * Alan Cox : Fixed the worst of the load 11 * balancer bugs. 12 * Dave Platt : Interrupt stacking fix. 13 * Richard Kooijman : Timestamp fixes. 14 * Alan Cox : Changed buffer format. 15 * Alan Cox : destructor hook for AF_UNIX etc. 16 * Linus Torvalds : Better skb_clone. 17 * Alan Cox : Added skb_copy. 18 * Alan Cox : Added all the changed routines Linus 19 * only put in the headers 20 * Ray VanTassle : Fixed --skb->lock in free 21 * Alan Cox : skb_copy copy arp field 22 * Andi Kleen : slabified it. 23 * Robert Olsson : Removed skb_head_pool 24 * 25 * NOTE: 26 * The __skb_ routines should be called with interrupts 27 * disabled, or you better be *real* sure that the operation is atomic 28 * with respect to whatever list is being frobbed (e.g. via lock_sock() 29 * or via disabling bottom half handlers, etc). 30 * 31 * This program is free software; you can redistribute it and/or 32 * modify it under the terms of the GNU General Public License 33 * as published by the Free Software Foundation; either version 34 * 2 of the License, or (at your option) any later version. 35 */ 36 37 /* 38 * The functions in this file will not compile correctly with gcc 2.4.x 39 */ 40 41 #include <linux/config.h> 42 #include <linux/module.h> 43 #include <linux/types.h> 44 #include <linux/kernel.h> 45 #include <linux/sched.h> 46 #include <linux/mm.h> 47 #include <linux/interrupt.h> 48 #include <linux/in.h> 49 #include <linux/inet.h> 50 #include <linux/slab.h> 51 #include <linux/netdevice.h> 52 #ifdef CONFIG_NET_CLS_ACT 53 #include <net/pkt_sched.h> 54 #endif 55 #include <linux/string.h> 56 #include <linux/skbuff.h> 57 #include <linux/cache.h> 58 #include <linux/rtnetlink.h> 59 #include <linux/init.h> 60 #include <linux/highmem.h> 61 62 #include <net/protocol.h> 63 #include <net/dst.h> 64 #include <net/sock.h> 65 #include <net/checksum.h> 66 #include <net/xfrm.h> 67 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 71 static kmem_cache_t *skbuff_head_cache; 72 73 /* 74 * Keep out-of-line to prevent kernel bloat. 75 * __builtin_return_address is not used because it is not always 76 * reliable. 77 */ 78 79 /** 80 * skb_over_panic - private function 81 * @skb: buffer 82 * @sz: size 83 * @here: address 84 * 85 * Out of line support code for skb_put(). Not user callable. 86 */ 87 void skb_over_panic(struct sk_buff *skb, int sz, void *here) 88 { 89 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 90 "data:%p tail:%p end:%p dev:%s/n", 91 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 92 skb->dev ? skb->dev->name : "<NULL>"); 93 BUG(); 94 } 95 96 /** 97 * skb_under_panic - private function 98 * @skb: buffer 99 * @sz: size 100 * @here: address 101 * 102 * Out of line support code for skb_push(). Not user callable. 103 */ 104 105 void skb_under_panic(struct sk_buff *skb, int sz, void *here) 106 { 107 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 108 "data:%p tail:%p end:%p dev:%s/n", 109 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 110 skb->dev ? skb->dev->name : "<NULL>"); 111 BUG(); 112 } 113 114 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 115 * 'private' fields and also do memory statistics to find all the 116 * [BEEP] leaks. 117 * 118 */ 119 120 /** 121 * alloc_skb - allocate a network buffer 122 * @size: size to allocate 123 * @gfp_mask: allocation mask 124 * 125 * Allocate a new &sk_buff. The returned buffer has no headroom and a 126 * tail room of size bytes. The object has a reference count of one. 127 * The return is the buffer. On a failure the return is %NULL. 128 * 129 * Buffers may only be allocated from interrupts using a @gfp_mask of 130 * %GFP_ATOMIC. 131 */ 132 struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) 133 { 134 struct sk_buff *skb; 135 u8 *data; 136 137 /* Get the HEAD */ 138 /* 从cache缓冲池中获取内存 */ 139 skb = kmem_cache_alloc(skbuff_head_cache, 140 gfp_mask & ~__GFP_DMA); 141 if (!skb) 142 goto out; 143 144 /* Get the DATA. Size must match skb_add_mtu(). */ 145 146 /* 对其size */ 147 size = SKB_DATA_ALIGN(size); 148 149 /* 分配的缓冲长度包含skb_shared_info的长度 */ 150 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 151 if (!data) 152 goto nodata; 153 154 /* 155 * offsetof是一个编译器宏或者是自定义的宏,用于计算member在struct中的偏移量。 156 * 把在truesize前面的field全部清零。 157 */ 158 memset(skb, 0, offsetof(struct sk_buff, truesize)); 159 160 /* truesize是广义SKB的大小,包含了4个部分的长度:skb自身,header,page frags,frag list */ 161 skb->truesize = size + sizeof(struct sk_buff); 162 163 /* users初始化成1 */ 164 atomic_set(&skb->users, 1); 165 166 /* 初始化所有数据指针 */ 167 skb->head = data; 168 skb->data = data; 169 skb->tail = data; 170 skb->end = data + size; 171 172 /* 173 * skb_shinfo是个宏,#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 174 * 所以用这个宏的时候必须等skb->end已经初始化。 175 * skb_shinfo 接在skb->end指向的内存空间后面。 176 */ 177 178 /* 初始化skb_shared_info结构体 */ 179 atomic_set(&(skb_shinfo(skb)->dataref), 1); 180 skb_shinfo(skb)->nr_frags = 0; 181 skb_shinfo(skb)->tso_size = 0; 182 skb_shinfo(skb)->tso_segs = 0; 183 skb_shinfo(skb)->frag_list = NULL; 184 out: 185 return skb; 186 nodata: 187 kmem_cache_free(skbuff_head_cache, skb); 188 skb = NULL; 189 goto out; 190 } 191 192 /** 193 * alloc_skb_from_cache - allocate a network buffer 194 * @cp: kmem_cache from which to allocate the data area 195 * (object size must be big enough for @size bytes + skb overheads) 196 * @size: size to allocate 197 * @gfp_mask: allocation mask 198 * 199 * Allocate a new &sk_buff. The returned buffer has no headroom and 200 * tail room of size bytes. The object has a reference count of one. 201 * The return is the buffer. On a failure the return is %NULL. 202 * 203 * Buffers may only be allocated from interrupts using a @gfp_mask of 204 * %GFP_ATOMIC. 205 */ 206 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 207 unsigned int size, int gfp_mask) 208 { 209 struct sk_buff *skb; 210 u8 *data; 211 212 /* Get the HEAD */ 213 skb = kmem_cache_alloc(skbuff_head_cache, 214 gfp_mask & ~__GFP_DMA); 215 if (!skb) 216 goto out; 217 218 /* Get the DATA. */ 219 size = SKB_DATA_ALIGN(size); 220 221 /* 这个函数和上面函数不同的地方就在下面这句,不用kmalloc,而用kmem_cache_alloc。 */ 222 data = kmem_cache_alloc(cp, gfp_mask); 223 if (!data) 224 goto nodata; 225 226 memset(skb, 0, offsetof(struct sk_buff, truesize)); 227 skb->truesize = size + sizeof(struct sk_buff); 228 atomic_set(&skb->users, 1); 229 skb->head = data; 230 skb->data = data; 231 skb->tail = data; 232 skb->end = data + size; 233 234 atomic_set(&(skb_shinfo(skb)->dataref), 1); 235 skb_shinfo(skb)->nr_frags = 0; 236 skb_shinfo(skb)->tso_size = 0; 237 skb_shinfo(skb)->tso_segs = 0; 238 skb_shinfo(skb)->frag_list = NULL; 239 out: 240 return skb; 241 nodata: 242 kmem_cache_free(skbuff_head_cache, skb); 243 skb = NULL; 244 goto out; 245 } 246 247 /* 这个函数是用来释放当前skb的frag_list区的 */ 248 static void skb_drop_fraglist(struct sk_buff *skb) 249 { 250 struct sk_buff *list = skb_shinfo(skb)->frag_list; 251 252 skb_shinfo(skb)->frag_list = NULL; 253 254 /* 循环前进,直到没有为止。 */ 255 do { 256 struct sk_buff *this = list; 257 list = list->next; 258 kfree_skb(this); 259 } while (list); 260 } 261 262 static void skb_clone_fraglist(struct sk_buff *skb) 263 { 264 struct sk_buff *list; 265 /* 对当前skb的frag_list区链上的每个skb增加引用计数。 */ 266 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 267 skb_get(list); 268 } 269 270 void skb_release_data(struct sk_buff *skb) 271 { 272 /* 查看skb是否被clone?skb_shinfo的dataref是否为0? 273 * 如果是,那么就释放skb非线性区域和线性区域。 */ 274 if (!skb->cloned || 275 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, 276 &skb_shinfo(skb)->dataref)) { 277 278 /* 释放page frags区 */ 279 if (skb_shinfo(skb)->nr_frags) { 280 int i; 281 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 282 put_page(skb_shinfo(skb)->frags[i].page); 283 } 284 285 /* 释放frag_list区 */ 286 if (skb_shinfo(skb)->frag_list) 287 skb_drop_fraglist(skb); 288 289 /* 释放线性区域 */ 290 kfree(skb->head); 291 } 292 } 293 294 /* 295 * Free an skbuff by memory without cleaning the state. 296 */ 297 298 /* 把skb自身和线性,非线性区域全部释放 */ 299 void kfree_skbmem(struct sk_buff *skb) 300 { 301 skb_release_data(skb); 302 kmem_cache_free(skbuff_head_cache, skb); 303 } 304 305 /** 306 * __kfree_skb - private function 307 * @skb: buffer 308 * 309 * Free an sk_buff. Release anything attached to the buffer. 310 * Clean the state. This is an internal helper function. Users should 311 * always call kfree_skb 312 */ 313 /* 这个函数应该也能算是一个wrapper函数 */ 314 315 void __kfree_skb(struct sk_buff *skb) 316 { 317 BUG_ON(skb->list != NULL); 318 319 dst_release(skb->dst); 320 #ifdef CONFIG_XFRM 321 secpath_put(skb->sp); 322 #endif 323 if (skb->destructor) { 324 WARN_ON(in_irq()); 325 skb->destructor(skb); 326 } 327 #ifdef CONFIG_NETFILTER 328 nf_conntrack_put(skb->nfct); 329 #ifdef CONFIG_BRIDGE_NETFILTER 330 nf_bridge_put(skb->nf_bridge); 331 #endif 332 #endif 333 /* XXX: IS this still necessary? - JHS */ 334 #ifdef CONFIG_NET_SCHED 335 skb->tc_index = 0; 336 #ifdef CONFIG_NET_CLS_ACT 337 skb->tc_verd = 0; 338 skb->tc_classid = 0; 339 #endif 340 #endif 341 342 kfree_skbmem(skb); 343 } 344 345 /** 346 * skb_clone - duplicate an sk_buff 347 * @skb: buffer to clone 348 * @gfp_mask: allocation priority 349 * 350 * Duplicate an &sk_buff. The new one is not owned by a socket. Both 351 * copies share the same packet data but not structure. The new 352 * buffer has a reference count of 1. If the allocation fails the 353 * function returns %NULL otherwise the new buffer is returned. 354 * 355 * If this function is called from an interrupt gfp_mask() must be 356 * %GFP_ATOMIC. 357 */ 358 359 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 360 { 361 /* 从cache池中分配一个skb */ 362 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 363 364 if (!n) 365 return NULL; 366 367 /* 这个C(x) 就是clone的意思 */ 368 #define C(x) n->x = skb->x 369 370 n->next = n->prev = NULL; 371 n->list = NULL; 372 n->sk = NULL; 373 /* 把skb中各个成员都clone过去 */ 374 C(stamp); 375 C(dev); 376 C(real_dev); 377 C(h); 378 C(nh); 379 C(mac); 380 C(dst); 381 dst_clone(skb->dst); 382 C(sp); 383 #ifdef CONFIG_INET 384 secpath_get(skb->sp); 385 #endif 386 memcpy(n->cb, skb->cb, sizeof(skb->cb)); 387 C(len); 388 C(data_len); 389 C(csum); 390 C(local_df); 391 /* 新分配的skb是clone的 */ 392 n->cloned = 1; 393 n->nohdr = 0; 394 C(pkt_type); 395 C(ip_summed); 396 C(priority); 397 C(protocol); 398 C(security); 399 n->destructor = NULL; 400 #ifdef CONFIG_NETFILTER 401 C(nfmark); 402 C(nfcache); 403 C(nfct); 404 nf_conntrack_get(skb->nfct); 405 C(nfctinfo); 406 #ifdef CONFIG_NETFILTER_DEBUG 407 C(nf_debug); 408 #endif 409 #ifdef CONFIG_BRIDGE_NETFILTER 410 C(nf_bridge); 411 nf_bridge_get(skb->nf_bridge); 412 #endif 413 #endif /*CONFIG_NETFILTER*/ 414 #if defined(CONFIG_HIPPI) 415 C(private); 416 #endif 417 #ifdef CONFIG_NET_SCHED 418 C(tc_index); 419 #ifdef CONFIG_NET_CLS_ACT 420 n->tc_verd = SET_TC_VERD(skb->tc_verd,0); 421 n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); 422 n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); 423 C(input_dev); 424 C(tc_classid); 425 #endif 426 427 #endif 428 C(truesize); 429 /* 新skb的users初始化为1 */ 430 atomic_set(&n->users, 1); 431 C(head); 432 C(data); 433 C(tail); 434 C(end); 435 436 /* 增加被clone的skb的数据引用 */ 437 atomic_inc(&(skb_shinfo(skb)->dataref)); 438 /* 设置原skb也是被clone了 */ 439 skb->cloned = 1; 440 441 return n; 442 } 443 444 445 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 446 { 447 /* 448 * Shift between the two data areas in bytes 449 */ 450 /* 为了等一下要给网络各层的指针赋值,现在要先算出两个data的偏移量 */ 451 unsigned long offset = new->data - old->data; 452 453 new->list = NULL; 454 new->sk = NULL; 455 new->dev = old->dev; 456 new->real_dev = old->real_dev; 457 new->priority = old->priority; 458 new->protocol = old->protocol; 459 new->dst = dst_clone(old->dst); 460 #ifdef CONFIG_INET 461 new->sp = secpath_get(old->sp); 462 #endif 463 /* 用上面算出来的offset来算 */ 464 new->h.raw = old->h.raw + offset; 465 new->nh.raw = old->nh.raw + offset; 466 new->mac.raw = old->mac.raw + offset; 467 468 /* 拷贝control block */ 469 memcpy(new->cb, old->cb, sizeof(old->cb)); 470 471 new->local_df = old->local_df; 472 new->pkt_type = old->pkt_type; 473 new->stamp = old->stamp; 474 new->destructor = NULL; 475 new->security = old->security; 476 #ifdef CONFIG_NETFILTER 477 new->nfmark = old->nfmark; 478 new->nfcache = old->nfcache; 479 new->nfct = old->nfct; 480 nf_conntrack_get(old->nfct); 481 new->nfctinfo = old->nfctinfo; 482 #ifdef CONFIG_NETFILTER_DEBUG 483 new->nf_debug = old->nf_debug; 484 #endif 485 #ifdef CONFIG_BRIDGE_NETFILTER 486 new->nf_bridge = old->nf_bridge; 487 nf_bridge_get(old->nf_bridge); 488 #endif 489 #endif 490 #ifdef CONFIG_NET_SCHED 491 #ifdef CONFIG_NET_CLS_ACT 492 new->tc_verd = old->tc_verd; 493 #endif 494 new->tc_index = old->tc_index; 495 #endif 496 /* 设置新的skb的users为1 */ 497 atomic_set(&new->users, 1); 498 499 /* 把skb_shinfo的东西也一起copy过去 */ 500 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 501 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 502 } 503 504 /** 505 * skb_copy - create private copy of an sk_buff 506 * @skb: buffer to copy 507 * @gfp_mask: allocation priority 508 * 509 * Make a copy of both an &sk_buff and its data. This is used when the 510 * caller wishes to modify the data and needs a private copy of the 511 * data to alter. Returns %NULL on failure or the pointer to the buffer 512 * on success. The returned buffer has a reference count of 1. 513 * 514 * As by-product this function converts non-linear &sk_buff to linear 515 * one, so that &sk_buff becomes completely private and caller is allowed 516 * to modify all the data of returned buffer. This means that this 517 * function is not recommended for use in circumstances when only 518 * header is going to be modified. Use pskb_copy() instead. 519 */ 520 521 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 522 { 523 int headerlen = skb->data - skb->head; 524 /* 525 * Allocate the copy buffer 526 */ 527 528 /* 529 * 分配内存包含线性数据区的长度和非线性数据区的长度 530 * data_len是指非线性数据区的长度。 531 */ 532 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 533 gfp_mask); 534 if (!n) 535 return NULL; 536 537 /* Set the data pointer */ 538 /* 预留头的长度 */ 539 skb_reserve(n, headerlen); 540 /* Set the tail pointer and length */ 541 /* len是指线性和非线性数据的总长,把tail往后推 */ 542 skb_put(n, skb->len); 543 n->csum = skb->csum; 544 n->ip_summed = skb->ip_summed; 545 /* 因为 skb_copy_bits 函数中 offset是对有效负载的,即skb->data。 546 * 因此这里的offset为-headerlen。目的是从skb->data向前推headerlen。 547 * 从skb的head处拷贝到n的head处。这个函数把skb的线性和非线性部分全部拷贝到 548 * n的线性部分去了。 549 */ 550 if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) 551 BUG(); 552 553 /* 把skb的本身复制到n的本身 */ 554 copy_skb_header(n, skb); 555 return n; 556 } 557 558 559 /** 560 * pskb_copy - create copy of an sk_buff with private head. 561 * @skb: buffer to copy 562 * @gfp_mask: allocation priority 563 * 564 * Make a copy of both an &sk_buff and part of its data, located 565 * in header. Fragmented data remain shared. This is used when 566 * the caller wishes to modify only header of &sk_buff and needs 567 * private copy of the header to alter. Returns %NULL on failure 568 * or the pointer to the buffer on success. 569 * The returned buffer has a reference count of 1. 570 */ 571 572 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 573 { 574 /* 575 * Allocate the copy buffer 576 */ 577 /* 分配一个新的skb_buff n,它的线性区长度是和原skb长度一样 */ 578 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 579 580 if (!n) 581 goto out; 582 583 /* Set the data pointer */ 584 /* 预留head到data之间的空隙 */ 585 skb_reserve(n, skb->data - skb->head); 586 587 /* Set the tail pointer and length */ 588 /* 准备向n放数据,试放数据长度是skb的header section的长度 */ 589 skb_put(n, skb_headlen(skb)); 590 591 /* Copy the bytes */ 592 /* 拷贝有效负载,长度是n->len。上面skb_put中已经把n->len赋值成skb_headlen(skb) 593 * 所以这里拷贝线性区域的长度。 594 */ 595 memcpy(n->data, skb->data, n->len); 596 597 /* 复制skb本身信息到n */ 598 n->csum = skb->csum; 599 n->ip_summed = skb->ip_summed; 600 601 n->data_len = skb->data_len; 602 n->len = skb->len; 603 604 /* 把skb中page frags的指针复制到n的page frags。 */ 605 if (skb_shinfo(skb)->nr_frags) { 606 int i; 607 608 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 609 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 610 get_page(skb_shinfo(n)->frags[i].page); 611 } 612 skb_shinfo(n)->nr_frags = i; 613 } 614 615 /* 把skb中frag_list地址复制到n的frag_list */ 616 if (skb_shinfo(skb)->frag_list) { 617 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 618 skb_clone_fraglist(n); 619 } 620 621 /* 把skb的本身复制到n的本身 */ 622 copy_skb_header(n, skb); 623 out: 624 return n; 625 } 626 627 /** 628 * pskb_expand_head - reallocate header of &sk_buff 629 * @skb: buffer to reallocate 630 * @nhead: room to add at head 631 * @ntail: room to add at tail 632 * @gfp_mask: allocation priority 633 * 634 * Expands (or creates identical copy, if &nhead and &ntail are zero) 635 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 636 * reference count of 1. Returns zero in the case of success or error, 637 * if expansion failed. In the last case, &sk_buff is not changed. 638 * 639 * All the pointers pointing into skb header may change and must be 640 * reloaded after call to this function. 641 */ 642 /* 这个函数要注意的是原来的skb结构体并没有释放 643 * 释放的是header section数据区。 644 */ 645 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 646 { 647 int i; 648 u8 *data; 649 /* 算出原来线性区的长度,再加上现在要求的增加的headroom和tailroom。 */ 650 int size = nhead + (skb->end - skb->head) + ntail; 651 long off; 652 653 if (skb_shared(skb)) 654 BUG(); 655 656 /* 对齐size的大小 */ 657 size = SKB_DATA_ALIGN(size); 658 659 /* 按照要求分配新的header section */ 660 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 661 if (!data) 662 goto nodata; 663 664 /* Copy only real data... and, alas, header. This should be 665 * optimized for the cases when header is void. */ 666 /* 拷贝payload到正确的位置上 */ 667 memcpy(data + nhead, skb->head, skb->tail - skb->head); 668 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 669 670 /* 下面复制page frags区域和fraglist区域的指针 */ 671 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 672 get_page(skb_shinfo(skb)->frags[i].page); 673 674 if (skb_shinfo(skb)->frag_list) 675 skb_clone_fraglist(skb); 676 677 /* 释放原来的数据区 */ 678 skb_release_data(skb); 679 680 /* 计算偏移量 */ 681 off = (data + nhead) - skb->head; 682 683 skb->head = data; 684 skb->end = data + size; 685 skb->data += off; 686 skb->tail += off; 687 skb->mac.raw += off; 688 skb->h.raw += off; 689 skb->nh.raw += off; 690 skb->cloned = 0; 691 skb->nohdr = 0; 692 atomic_set(&skb_shinfo(skb)->dataref, 1); 693 return 0; 694 695 nodata: 696 return -ENOMEM; 697 } 698 699 /* Make private copy of skb with writable head and some headroom */ 700 701 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 702 { 703 struct sk_buff *skb2; 704 /* 计算现在要求的headroom 和原来headroom之间的差值 */ 705 int delta = headroom - skb_headroom(skb); 706 707 /* 如果现在要求的headroom没有原来的headroom大,那说明原来的header section可以用, 708 * 所以只要用pskb_copy复制一份skb结构体和它的线性区域就可以了。 709 */ 710 if (delta <= 0) 711 skb2 = pskb_copy(skb, GFP_ATOMIC); 712 else { 713 /* 如果要求的headroom比原来的headroom大的话,clone一个skb */ 714 skb2 = skb_clone(skb, GFP_ATOMIC); 715 /* 把新clone的skb用pskb_expand_head扩大headroom */ 716 if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, 717 GFP_ATOMIC)) { 718 kfree_skb(skb2); 719 skb2 = NULL; 720 } 721 } 722 return skb2; 723 } 724 725 726 /** 727 * skb_copy_expand - copy and expand sk_buff 728 * @skb: buffer to copy 729 * @newheadroom: new free bytes at head 730 * @newtailroom: new free bytes at tail 731 * @gfp_mask: allocation priority 732 * 733 * Make a copy of both an &sk_buff and its data and while doing so 734 * allocate additional space. 735 * 736 * This is used when the caller wishes to modify the data and needs a 737 * private copy of the data to alter as well as more space for new fields. 738 * Returns %NULL on failure or the pointer to the buffer 739 * on success. The returned buffer has a reference count of 1. 740 * 741 * You must pass %GFP_ATOMIC as the allocation priority if this function 742 * is called from an interrupt. 743 * 744 * BUG ALERT: ip_summed is not copied. Why does this work? Is it used 745 * only by netfilter in the cases when checksum is recalculated? --ANK 746 */ 747 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 748 int newheadroom, int newtailroom, int gfp_mask) 749 { 750 /* 751 * Allocate the copy buffer 752 */ 753 /* 分配一个新的skb结构体,header section长度是原来的skb所有数据长度加上新的skb要求的headroom 754 * 和要求的tailroom。目的是把原来的SKB线性化。 755 */ 756 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 757 gfp_mask); 758 int head_copy_len, head_copy_off; 759 760 if (!n) 761 return NULL; 762 763 /* 新的sk_buff n的headroom长度为newheadroom */ 764 skb_reserve(n, newheadroom); 765 766 /* Set the tail pointer and length */ 767 /* 设置tail指针和n->len */ 768 skb_put(n, skb->len); 769 770 /* 设置head_copy_len 为老的skb的headroom */ 771 head_copy_len = skb_headroom(skb); 772 head_copy_off = 0; 773 /* 如果新的headroom比老的headroom小, 774 * 拷贝长度就为新的headroom的长度。 775 */ 776 if (newheadroom <= head_copy_len) 777 head_copy_len = newheadroom; 778 else 779 head_copy_off = newheadroom - head_copy_len; 780 781 /* Copy the linear header and data. */ 782 /* offset为原来skb->data-head_copy_len */ 783 if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, 784 skb->len + head_copy_len)) 785 BUG(); 786 787 /* 拷贝skb结构体到n结构体 */ 788 copy_skb_header(n, skb); 789 790 return n; 791 } 792 793 /** 794 * skb_pad - zero pad the tail of an skb 795 * @skb: buffer to pad 796 * @pad: space to pad 797 * 798 * Ensure that a buffer is followed by a padding area that is zero 799 * filled. Used by network drivers which may DMA or transfer data 800 * beyond the buffer end onto the wire. 801 * 802 * May return NULL in out of memory cases. 803 */ 804 805 struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 806 { 807 struct sk_buff *nskb; 808 809 /* If the skbuff is non linear tailroom is always zero.. */ 810 /* 如果需要pad的长度比skb_tailroom小的话, 811 * 就直接从skb->data+skb->len,开始清零. 812 */ 813 if (skb_tailroom(skb) >= pad) { 814 memset(skb->data+skb->len, 0, pad); 815 return skb; 816 } 817 818 /* 如果需要pad的长度比tailroom长的话,就skb_copy_expand */ 819 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad,GFP_ATOMIC); 820 /* 释放原来的SKB */ 821 kfree_skb(skb); 822 /* 清零 */ 823 if (nskb) 824 memset(nskb->data+nskb->len, 0, pad); 825 return nskb; 826 }