rtnetlink: delay RTM_DELLINK notification until after ndo_uninit()
[linux-2.6-block.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
1da177e4
LT
7 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
e005d193
JP
39#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
40
1da177e4
LT
41#include <linux/module.h>
42#include <linux/types.h>
43#include <linux/kernel.h>
fe55f6d5 44#include <linux/kmemcheck.h>
1da177e4
LT
45#include <linux/mm.h>
46#include <linux/interrupt.h>
47#include <linux/in.h>
48#include <linux/inet.h>
49#include <linux/slab.h>
de960aa9
FW
50#include <linux/tcp.h>
51#include <linux/udp.h>
1da177e4
LT
52#include <linux/netdevice.h>
53#ifdef CONFIG_NET_CLS_ACT
54#include <net/pkt_sched.h>
55#endif
56#include <linux/string.h>
57#include <linux/skbuff.h>
9c55e01c 58#include <linux/splice.h>
1da177e4
LT
59#include <linux/cache.h>
60#include <linux/rtnetlink.h>
61#include <linux/init.h>
716ea3a7 62#include <linux/scatterlist.h>
ac45f602 63#include <linux/errqueue.h>
268bb0ce 64#include <linux/prefetch.h>
0d5501c1 65#include <linux/if_vlan.h>
1da177e4
LT
66
67#include <net/protocol.h>
68#include <net/dst.h>
69#include <net/sock.h>
70#include <net/checksum.h>
ed1f50c3 71#include <net/ip6_checksum.h>
1da177e4
LT
72#include <net/xfrm.h>
73
74#include <asm/uaccess.h>
ad8d75ff 75#include <trace/events/skb.h>
51c56b00 76#include <linux/highmem.h>
a1f8e7f7 77
d7e8883c 78struct kmem_cache *skbuff_head_cache __read_mostly;
e18b890b 79static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1da177e4 80
1da177e4 81/**
f05de73b
JS
82 * skb_panic - private function for out-of-line support
83 * @skb: buffer
84 * @sz: size
85 * @addr: address
99d5851e 86 * @msg: skb_over_panic or skb_under_panic
1da177e4 87 *
f05de73b
JS
88 * Out-of-line support for skb_put() and skb_push().
89 * Called via the wrapper skb_over_panic() or skb_under_panic().
90 * Keep out of line to prevent kernel bloat.
91 * __builtin_return_address is not used because it is not always reliable.
1da177e4 92 */
f05de73b 93static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
99d5851e 94 const char msg[])
1da177e4 95{
e005d193 96 pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
99d5851e 97 msg, addr, skb->len, sz, skb->head, skb->data,
e005d193
JP
98 (unsigned long)skb->tail, (unsigned long)skb->end,
99 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
100 BUG();
101}
102
f05de73b 103static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
1da177e4 104{
f05de73b 105 skb_panic(skb, sz, addr, __func__);
1da177e4
LT
106}
107
f05de73b
JS
108static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
109{
110 skb_panic(skb, sz, addr, __func__);
111}
c93bdd0e
MG
112
113/*
114 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
115 * the caller if emergency pfmemalloc reserves are being used. If it is and
116 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
117 * may be used. Otherwise, the packet data may be discarded until enough
118 * memory is free
119 */
120#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
121 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
61c5e88a 122
123static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
124 unsigned long ip, bool *pfmemalloc)
c93bdd0e
MG
125{
126 void *obj;
127 bool ret_pfmemalloc = false;
128
129 /*
130 * Try a regular allocation, when that fails and we're not entitled
131 * to the reserves, fail.
132 */
133 obj = kmalloc_node_track_caller(size,
134 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
135 node);
136 if (obj || !(gfp_pfmemalloc_allowed(flags)))
137 goto out;
138
139 /* Try again but now we are using pfmemalloc reserves */
140 ret_pfmemalloc = true;
141 obj = kmalloc_node_track_caller(size, flags, node);
142
143out:
144 if (pfmemalloc)
145 *pfmemalloc = ret_pfmemalloc;
146
147 return obj;
148}
149
1da177e4
LT
150/* Allocate a new skbuff. We do this ourselves so we can fill in a few
151 * 'private' fields and also do memory statistics to find all the
152 * [BEEP] leaks.
153 *
154 */
155
0ebd0ac5
PM
156struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
157{
158 struct sk_buff *skb;
159
160 /* Get the HEAD */
161 skb = kmem_cache_alloc_node(skbuff_head_cache,
162 gfp_mask & ~__GFP_DMA, node);
163 if (!skb)
164 goto out;
165
166 /*
167 * Only clear those fields we need to clear, not those that we will
168 * actually initialise below. Hence, don't put any more fields after
169 * the tail pointer in struct sk_buff!
170 */
171 memset(skb, 0, offsetof(struct sk_buff, tail));
5e71d9d7 172 skb->head = NULL;
0ebd0ac5
PM
173 skb->truesize = sizeof(struct sk_buff);
174 atomic_set(&skb->users, 1);
175
35d04610 176 skb->mac_header = (typeof(skb->mac_header))~0U;
0ebd0ac5
PM
177out:
178 return skb;
179}
180
1da177e4 181/**
d179cd12 182 * __alloc_skb - allocate a network buffer
1da177e4
LT
183 * @size: size to allocate
184 * @gfp_mask: allocation mask
c93bdd0e
MG
185 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
186 * instead of head cache and allocate a cloned (child) skb.
187 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
188 * allocations in case the data is required for writeback
b30973f8 189 * @node: numa node to allocate memory on
1da177e4
LT
190 *
191 * Allocate a new &sk_buff. The returned buffer has no headroom and a
94b6042c
BH
192 * tail room of at least size bytes. The object has a reference count
193 * of one. The return is the buffer. On a failure the return is %NULL.
1da177e4
LT
194 *
195 * Buffers may only be allocated from interrupts using a @gfp_mask of
196 * %GFP_ATOMIC.
197 */
dd0fc66f 198struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
c93bdd0e 199 int flags, int node)
1da177e4 200{
e18b890b 201 struct kmem_cache *cache;
4947d3ef 202 struct skb_shared_info *shinfo;
1da177e4
LT
203 struct sk_buff *skb;
204 u8 *data;
c93bdd0e 205 bool pfmemalloc;
1da177e4 206
c93bdd0e
MG
207 cache = (flags & SKB_ALLOC_FCLONE)
208 ? skbuff_fclone_cache : skbuff_head_cache;
209
210 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
211 gfp_mask |= __GFP_MEMALLOC;
8798b3fb 212
1da177e4 213 /* Get the HEAD */
b30973f8 214 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
215 if (!skb)
216 goto out;
ec7d2f2c 217 prefetchw(skb);
1da177e4 218
87fb4b7b
ED
219 /* We do our best to align skb_shared_info on a separate cache
220 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
221 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
222 * Both skb->head and skb_shared_info are cache line aligned.
223 */
bc417e30 224 size = SKB_DATA_ALIGN(size);
87fb4b7b 225 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
c93bdd0e 226 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
1da177e4
LT
227 if (!data)
228 goto nodata;
87fb4b7b
ED
229 /* kmalloc(size) might give us more room than requested.
230 * Put skb_shared_info exactly at the end of allocated zone,
231 * to allow max possible filling before reallocation.
232 */
233 size = SKB_WITH_OVERHEAD(ksize(data));
ec7d2f2c 234 prefetchw(data + size);
1da177e4 235
ca0605a7 236 /*
c8005785
JB
237 * Only clear those fields we need to clear, not those that we will
238 * actually initialise below. Hence, don't put any more fields after
239 * the tail pointer in struct sk_buff!
ca0605a7
ACM
240 */
241 memset(skb, 0, offsetof(struct sk_buff, tail));
87fb4b7b
ED
242 /* Account for allocated memory : skb + skb->head */
243 skb->truesize = SKB_TRUESIZE(size);
c93bdd0e 244 skb->pfmemalloc = pfmemalloc;
1da177e4
LT
245 atomic_set(&skb->users, 1);
246 skb->head = data;
247 skb->data = data;
27a884dc 248 skb_reset_tail_pointer(skb);
4305b541 249 skb->end = skb->tail + size;
35d04610
CW
250 skb->mac_header = (typeof(skb->mac_header))~0U;
251 skb->transport_header = (typeof(skb->transport_header))~0U;
19633e12 252
4947d3ef
BL
253 /* make sure we initialize shinfo sequentially */
254 shinfo = skb_shinfo(skb);
ec7d2f2c 255 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef 256 atomic_set(&shinfo->dataref, 1);
c2aa3665 257 kmemcheck_annotate_variable(shinfo->destructor_arg);
4947d3ef 258
c93bdd0e 259 if (flags & SKB_ALLOC_FCLONE) {
d0bf4a9e 260 struct sk_buff_fclones *fclones;
1da177e4 261
d0bf4a9e
ED
262 fclones = container_of(skb, struct sk_buff_fclones, skb1);
263
264 kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
d179cd12 265 skb->fclone = SKB_FCLONE_ORIG;
d0bf4a9e 266 atomic_set(&fclones->fclone_ref, 1);
d179cd12 267
c8753d55 268 fclones->skb2.fclone = SKB_FCLONE_FREE;
d0bf4a9e 269 fclones->skb2.pfmemalloc = pfmemalloc;
d179cd12 270 }
1da177e4
LT
271out:
272 return skb;
273nodata:
8798b3fb 274 kmem_cache_free(cache, skb);
1da177e4
LT
275 skb = NULL;
276 goto out;
1da177e4 277}
b4ac530f 278EXPORT_SYMBOL(__alloc_skb);
1da177e4 279
b2b5ce9d
ED
280/**
281 * build_skb - build a network buffer
282 * @data: data buffer provided by caller
d3836f21 283 * @frag_size: size of fragment, or 0 if head was kmalloced
b2b5ce9d
ED
284 *
285 * Allocate a new &sk_buff. Caller provides space holding head and
deceb4c0
FF
286 * skb_shared_info. @data must have been allocated by kmalloc() only if
287 * @frag_size is 0, otherwise data should come from the page allocator.
b2b5ce9d
ED
288 * The return is the new skb buffer.
289 * On a failure the return is %NULL, and @data is not freed.
290 * Notes :
291 * Before IO, driver allocates only data buffer where NIC put incoming frame
292 * Driver should add room at head (NET_SKB_PAD) and
293 * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
294 * After IO, driver calls build_skb(), to allocate sk_buff and populate it
295 * before giving packet to stack.
296 * RX rings only contains data buffers, not full skbs.
297 */
d3836f21 298struct sk_buff *build_skb(void *data, unsigned int frag_size)
b2b5ce9d
ED
299{
300 struct skb_shared_info *shinfo;
301 struct sk_buff *skb;
d3836f21 302 unsigned int size = frag_size ? : ksize(data);
b2b5ce9d
ED
303
304 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
305 if (!skb)
306 return NULL;
307
d3836f21 308 size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
b2b5ce9d
ED
309
310 memset(skb, 0, offsetof(struct sk_buff, tail));
311 skb->truesize = SKB_TRUESIZE(size);
d3836f21 312 skb->head_frag = frag_size != 0;
b2b5ce9d
ED
313 atomic_set(&skb->users, 1);
314 skb->head = data;
315 skb->data = data;
316 skb_reset_tail_pointer(skb);
317 skb->end = skb->tail + size;
35d04610
CW
318 skb->mac_header = (typeof(skb->mac_header))~0U;
319 skb->transport_header = (typeof(skb->transport_header))~0U;
b2b5ce9d
ED
320
321 /* make sure we initialize shinfo sequentially */
322 shinfo = skb_shinfo(skb);
323 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
324 atomic_set(&shinfo->dataref, 1);
325 kmemcheck_annotate_variable(shinfo->destructor_arg);
326
327 return skb;
328}
329EXPORT_SYMBOL(build_skb);
330
a1c7fff7 331struct netdev_alloc_cache {
69b08f62
ED
332 struct page_frag frag;
333 /* we maintain a pagecount bias, so that we dont dirty cache line
334 * containing page->_count every time we allocate a fragment.
335 */
336 unsigned int pagecnt_bias;
a1c7fff7
ED
337};
338static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
339
c93bdd0e 340static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
6f532612
ED
341{
342 struct netdev_alloc_cache *nc;
343 void *data = NULL;
69b08f62 344 int order;
6f532612
ED
345 unsigned long flags;
346
347 local_irq_save(flags);
903ceff7 348 nc = this_cpu_ptr(&netdev_alloc_cache);
69b08f62 349 if (unlikely(!nc->frag.page)) {
6f532612 350refill:
69b08f62
ED
351 for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
352 gfp_t gfp = gfp_mask;
353
354 if (order)
355 gfp |= __GFP_COMP | __GFP_NOWARN;
356 nc->frag.page = alloc_pages(gfp, order);
357 if (likely(nc->frag.page))
358 break;
359 if (--order < 0)
360 goto end;
361 }
362 nc->frag.size = PAGE_SIZE << order;
4c450583
ED
363 /* Even if we own the page, we do not use atomic_set().
364 * This would break get_page_unless_zero() users.
365 */
366 atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
367 &nc->frag.page->_count);
69b08f62
ED
368 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
369 nc->frag.offset = 0;
6f532612 370 }
540eb7bf 371
69b08f62 372 if (nc->frag.offset + fragsz > nc->frag.size) {
4c450583
ED
373 if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
374 if (!atomic_sub_and_test(nc->pagecnt_bias,
375 &nc->frag.page->_count))
376 goto refill;
377 /* OK, page count is 0, we can safely set it */
378 atomic_set(&nc->frag.page->_count,
379 NETDEV_PAGECNT_MAX_BIAS);
380 } else {
381 atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
382 &nc->frag.page->_count);
383 }
384 nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
385 nc->frag.offset = 0;
6f532612 386 }
540eb7bf 387
69b08f62
ED
388 data = page_address(nc->frag.page) + nc->frag.offset;
389 nc->frag.offset += fragsz;
540eb7bf
AD
390 nc->pagecnt_bias--;
391end:
6f532612
ED
392 local_irq_restore(flags);
393 return data;
394}
c93bdd0e
MG
395
396/**
397 * netdev_alloc_frag - allocate a page fragment
398 * @fragsz: fragment size
399 *
400 * Allocates a frag from a page for receive buffer.
401 * Uses GFP_ATOMIC allocations.
402 */
403void *netdev_alloc_frag(unsigned int fragsz)
404{
405 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
406}
6f532612
ED
407EXPORT_SYMBOL(netdev_alloc_frag);
408
8af27456
CH
409/**
410 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
411 * @dev: network device to receive on
412 * @length: length to allocate
413 * @gfp_mask: get_free_pages mask, passed to alloc_skb
414 *
415 * Allocate a new &sk_buff and assign it a usage count of one. The
416 * buffer has unspecified headroom built in. Users should allocate
417 * the headroom they think they need without accounting for the
418 * built in space. The built in space is used for optimisations.
419 *
420 * %NULL is returned if there is no free memory.
421 */
422struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
6f532612 423 unsigned int length, gfp_t gfp_mask)
8af27456 424{
6f532612 425 struct sk_buff *skb = NULL;
a1c7fff7
ED
426 unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
427 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
428
310e158c 429 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
c93bdd0e
MG
430 void *data;
431
432 if (sk_memalloc_socks())
433 gfp_mask |= __GFP_MEMALLOC;
434
435 data = __netdev_alloc_frag(fragsz, gfp_mask);
a1c7fff7 436
6f532612
ED
437 if (likely(data)) {
438 skb = build_skb(data, fragsz);
439 if (unlikely(!skb))
440 put_page(virt_to_head_page(data));
a1c7fff7 441 }
a1c7fff7 442 } else {
c93bdd0e
MG
443 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
444 SKB_ALLOC_RX, NUMA_NO_NODE);
a1c7fff7 445 }
7b2e497a 446 if (likely(skb)) {
8af27456 447 skb_reserve(skb, NET_SKB_PAD);
7b2e497a
CH
448 skb->dev = dev;
449 }
8af27456
CH
450 return skb;
451}
b4ac530f 452EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4 453
654bed16 454void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
50269e19 455 int size, unsigned int truesize)
654bed16
PZ
456{
457 skb_fill_page_desc(skb, i, page, off, size);
458 skb->len += size;
459 skb->data_len += size;
50269e19 460 skb->truesize += truesize;
654bed16
PZ
461}
462EXPORT_SYMBOL(skb_add_rx_frag);
463
f8e617e1
JW
464void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
465 unsigned int truesize)
466{
467 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
468
469 skb_frag_size_add(frag, size);
470 skb->len += size;
471 skb->data_len += size;
472 skb->truesize += truesize;
473}
474EXPORT_SYMBOL(skb_coalesce_rx_frag);
475
27b437c8 476static void skb_drop_list(struct sk_buff **listp)
1da177e4 477{
bd8a7036 478 kfree_skb_list(*listp);
27b437c8 479 *listp = NULL;
1da177e4
LT
480}
481
27b437c8
HX
482static inline void skb_drop_fraglist(struct sk_buff *skb)
483{
484 skb_drop_list(&skb_shinfo(skb)->frag_list);
485}
486
1da177e4
LT
487static void skb_clone_fraglist(struct sk_buff *skb)
488{
489 struct sk_buff *list;
490
fbb398a8 491 skb_walk_frags(skb, list)
1da177e4
LT
492 skb_get(list);
493}
494
d3836f21
ED
495static void skb_free_head(struct sk_buff *skb)
496{
497 if (skb->head_frag)
498 put_page(virt_to_head_page(skb->head));
499 else
500 kfree(skb->head);
501}
502
5bba1712 503static void skb_release_data(struct sk_buff *skb)
1da177e4 504{
ff04a771
ED
505 struct skb_shared_info *shinfo = skb_shinfo(skb);
506 int i;
1da177e4 507
ff04a771
ED
508 if (skb->cloned &&
509 atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
510 &shinfo->dataref))
511 return;
a6686f2f 512
ff04a771
ED
513 for (i = 0; i < shinfo->nr_frags; i++)
514 __skb_frag_unref(&shinfo->frags[i]);
a6686f2f 515
ff04a771
ED
516 /*
517 * If skb buf is from userspace, we need to notify the caller
518 * the lower device DMA has done;
519 */
520 if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
521 struct ubuf_info *uarg;
1da177e4 522
ff04a771
ED
523 uarg = shinfo->destructor_arg;
524 if (uarg->callback)
525 uarg->callback(uarg, true);
1da177e4 526 }
ff04a771
ED
527
528 if (shinfo->frag_list)
529 kfree_skb_list(shinfo->frag_list);
530
531 skb_free_head(skb);
1da177e4
LT
532}
533
534/*
535 * Free an skbuff by memory without cleaning the state.
536 */
2d4baff8 537static void kfree_skbmem(struct sk_buff *skb)
1da177e4 538{
d0bf4a9e 539 struct sk_buff_fclones *fclones;
d179cd12 540
d179cd12
DM
541 switch (skb->fclone) {
542 case SKB_FCLONE_UNAVAILABLE:
543 kmem_cache_free(skbuff_head_cache, skb);
544 break;
545
546 case SKB_FCLONE_ORIG:
d0bf4a9e
ED
547 fclones = container_of(skb, struct sk_buff_fclones, skb1);
548 if (atomic_dec_and_test(&fclones->fclone_ref))
549 kmem_cache_free(skbuff_fclone_cache, fclones);
d179cd12
DM
550 break;
551
552 case SKB_FCLONE_CLONE:
d0bf4a9e 553 fclones = container_of(skb, struct sk_buff_fclones, skb2);
d179cd12 554
e7820e39
ED
555 /* The clone portion is available for
556 * fast-cloning again.
d179cd12 557 */
e7820e39
ED
558 skb->fclone = SKB_FCLONE_FREE;
559
560 if (atomic_dec_and_test(&fclones->fclone_ref))
d0bf4a9e 561 kmem_cache_free(skbuff_fclone_cache, fclones);
d179cd12 562 break;
3ff50b79 563 }
1da177e4
LT
564}
565
04a4bb55 566static void skb_release_head_state(struct sk_buff *skb)
1da177e4 567{
adf30907 568 skb_dst_drop(skb);
1da177e4
LT
569#ifdef CONFIG_XFRM
570 secpath_put(skb->sp);
571#endif
9c2b3328
SH
572 if (skb->destructor) {
573 WARN_ON(in_irq());
1da177e4
LT
574 skb->destructor(skb);
575 }
a3bf7ae9 576#if IS_ENABLED(CONFIG_NF_CONNTRACK)
5f79e0f9 577 nf_conntrack_put(skb->nfct);
2fc72c7b 578#endif
1109a90c 579#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1da177e4
LT
580 nf_bridge_put(skb->nf_bridge);
581#endif
1da177e4
LT
582/* XXX: IS this still necessary? - JHS */
583#ifdef CONFIG_NET_SCHED
584 skb->tc_index = 0;
585#ifdef CONFIG_NET_CLS_ACT
586 skb->tc_verd = 0;
1da177e4
LT
587#endif
588#endif
04a4bb55
LB
589}
590
591/* Free everything but the sk_buff shell. */
592static void skb_release_all(struct sk_buff *skb)
593{
594 skb_release_head_state(skb);
5e71d9d7 595 if (likely(skb->head))
0ebd0ac5 596 skb_release_data(skb);
2d4baff8
HX
597}
598
599/**
600 * __kfree_skb - private function
601 * @skb: buffer
602 *
603 * Free an sk_buff. Release anything attached to the buffer.
604 * Clean the state. This is an internal helper function. Users should
605 * always call kfree_skb
606 */
1da177e4 607
2d4baff8
HX
608void __kfree_skb(struct sk_buff *skb)
609{
610 skb_release_all(skb);
1da177e4
LT
611 kfree_skbmem(skb);
612}
b4ac530f 613EXPORT_SYMBOL(__kfree_skb);
1da177e4 614