irda: Use __netdev_alloc_skb() instead of __dev_alloc_skb().
[linux-2.6-block.git] / net / core / skbuff.c
CommitLineData
1da177e4
LT
1/*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
1da177e4
LT
7 * Fixes:
8 * Alan Cox : Fixed the worst of the load
9 * balancer bugs.
10 * Dave Platt : Interrupt stacking fix.
11 * Richard Kooijman : Timestamp fixes.
12 * Alan Cox : Changed buffer format.
13 * Alan Cox : destructor hook for AF_UNIX etc.
14 * Linus Torvalds : Better skb_clone.
15 * Alan Cox : Added skb_copy.
16 * Alan Cox : Added all the changed routines Linus
17 * only put in the headers
18 * Ray VanTassle : Fixed --skb->lock in free
19 * Alan Cox : skb_copy copy arp field
20 * Andi Kleen : slabified it.
21 * Robert Olsson : Removed skb_head_pool
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35/*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
1da177e4
LT
39#include <linux/module.h>
40#include <linux/types.h>
41#include <linux/kernel.h>
1da177e4
LT
42#include <linux/mm.h>
43#include <linux/interrupt.h>
44#include <linux/in.h>
45#include <linux/inet.h>
46#include <linux/slab.h>
47#include <linux/netdevice.h>
48#ifdef CONFIG_NET_CLS_ACT
49#include <net/pkt_sched.h>
50#endif
51#include <linux/string.h>
52#include <linux/skbuff.h>
9c55e01c 53#include <linux/splice.h>
1da177e4
LT
54#include <linux/cache.h>
55#include <linux/rtnetlink.h>
56#include <linux/init.h>
716ea3a7 57#include <linux/scatterlist.h>
1da177e4
LT
58
59#include <net/protocol.h>
60#include <net/dst.h>
61#include <net/sock.h>
62#include <net/checksum.h>
63#include <net/xfrm.h>
64
65#include <asm/uaccess.h>
66#include <asm/system.h>
67
a1f8e7f7
AV
68#include "kmap_skb.h"
69
e18b890b
CL
70static struct kmem_cache *skbuff_head_cache __read_mostly;
71static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1da177e4 72
9c55e01c
JA
73static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
74 struct pipe_buffer *buf)
75{
8b9d3728 76 put_page(buf->page);
9c55e01c
JA
77}
78
79static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
80 struct pipe_buffer *buf)
81{
8b9d3728 82 get_page(buf->page);
9c55e01c
JA
83}
84
85static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
86 struct pipe_buffer *buf)
87{
88 return 1;
89}
90
91
92/* Pipe buffer operations for a socket. */
93static struct pipe_buf_operations sock_pipe_buf_ops = {
94 .can_merge = 0,
95 .map = generic_pipe_buf_map,
96 .unmap = generic_pipe_buf_unmap,
97 .confirm = generic_pipe_buf_confirm,
98 .release = sock_pipe_buf_release,
99 .steal = sock_pipe_buf_steal,
100 .get = sock_pipe_buf_get,
101};
102
1da177e4
LT
103/*
104 * Keep out-of-line to prevent kernel bloat.
105 * __builtin_return_address is not used because it is not always
106 * reliable.
107 */
108
109/**
110 * skb_over_panic - private function
111 * @skb: buffer
112 * @sz: size
113 * @here: address
114 *
115 * Out of line support code for skb_put(). Not user callable.
116 */
117void skb_over_panic(struct sk_buff *skb, int sz, void *here)
118{
26095455 119 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
4305b541 120 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 121 here, skb->len, sz, skb->head, skb->data,
4305b541 122 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 123 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
124 BUG();
125}
126
127/**
128 * skb_under_panic - private function
129 * @skb: buffer
130 * @sz: size
131 * @here: address
132 *
133 * Out of line support code for skb_push(). Not user callable.
134 */
135
136void skb_under_panic(struct sk_buff *skb, int sz, void *here)
137{
26095455 138 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
4305b541 139 "data:%p tail:%#lx end:%#lx dev:%s\n",
27a884dc 140 here, skb->len, sz, skb->head, skb->data,
4305b541 141 (unsigned long)skb->tail, (unsigned long)skb->end,
26095455 142 skb->dev ? skb->dev->name : "<NULL>");
1da177e4
LT
143 BUG();
144}
145
dc6de336
DM
146void skb_truesize_bug(struct sk_buff *skb)
147{
8f480c0e 148 WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) "
dc6de336
DM
149 "len=%u, sizeof(sk_buff)=%Zd\n",
150 skb->truesize, skb->len, sizeof(struct sk_buff));
151}
152EXPORT_SYMBOL(skb_truesize_bug);
153
1da177e4
LT
154/* Allocate a new skbuff. We do this ourselves so we can fill in a few
155 * 'private' fields and also do memory statistics to find all the
156 * [BEEP] leaks.
157 *
158 */
159
160/**
d179cd12 161 * __alloc_skb - allocate a network buffer
1da177e4
LT
162 * @size: size to allocate
163 * @gfp_mask: allocation mask
c83c2486
RD
164 * @fclone: allocate from fclone cache instead of head cache
165 * and allocate a cloned (child) skb
b30973f8 166 * @node: numa node to allocate memory on
1da177e4
LT
167 *
168 * Allocate a new &sk_buff. The returned buffer has no headroom and a
169 * tail room of size bytes. The object has a reference count of one.
170 * The return is the buffer. On a failure the return is %NULL.
171 *
172 * Buffers may only be allocated from interrupts using a @gfp_mask of
173 * %GFP_ATOMIC.
174 */
dd0fc66f 175struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
b30973f8 176 int fclone, int node)
1da177e4 177{
e18b890b 178 struct kmem_cache *cache;
4947d3ef 179 struct skb_shared_info *shinfo;
1da177e4
LT
180 struct sk_buff *skb;
181 u8 *data;
182
8798b3fb
HX
183 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
184
1da177e4 185 /* Get the HEAD */
b30973f8 186 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4
LT
187 if (!skb)
188 goto out;
189
1da177e4 190 size = SKB_DATA_ALIGN(size);
b30973f8
CH
191 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
192 gfp_mask, node);
1da177e4
LT
193 if (!data)
194 goto nodata;
195
ca0605a7 196 /*
c8005785
JB
197 * Only clear those fields we need to clear, not those that we will
198 * actually initialise below. Hence, don't put any more fields after
199 * the tail pointer in struct sk_buff!
ca0605a7
ACM
200 */
201 memset(skb, 0, offsetof(struct sk_buff, tail));
1da177e4
LT
202 skb->truesize = size + sizeof(struct sk_buff);
203 atomic_set(&skb->users, 1);
204 skb->head = data;
205 skb->data = data;
27a884dc 206 skb_reset_tail_pointer(skb);
4305b541 207 skb->end = skb->tail + size;
4947d3ef
BL
208 /* make sure we initialize shinfo sequentially */
209 shinfo = skb_shinfo(skb);
210 atomic_set(&shinfo->dataref, 1);
211 shinfo->nr_frags = 0;
7967168c
HX
212 shinfo->gso_size = 0;
213 shinfo->gso_segs = 0;
214 shinfo->gso_type = 0;
4947d3ef
BL
215 shinfo->ip6_frag_id = 0;
216 shinfo->frag_list = NULL;
217
d179cd12
DM
218 if (fclone) {
219 struct sk_buff *child = skb + 1;
220 atomic_t *fclone_ref = (atomic_t *) (child + 1);
1da177e4 221
d179cd12
DM
222 skb->fclone = SKB_FCLONE_ORIG;
223 atomic_set(fclone_ref, 1);
224
225 child->fclone = SKB_FCLONE_UNAVAILABLE;
226 }
1da177e4
LT
227out:
228 return skb;
229nodata:
8798b3fb 230 kmem_cache_free(cache, skb);
1da177e4
LT
231 skb = NULL;
232 goto out;
1da177e4
LT
233}
234
8af27456
CH
235/**
236 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
237 * @dev: network device to receive on
238 * @length: length to allocate
239 * @gfp_mask: get_free_pages mask, passed to alloc_skb
240 *
241 * Allocate a new &sk_buff and assign it a usage count of one. The
242 * buffer has unspecified headroom built in. Users should allocate
243 * the headroom they think they need without accounting for the
244 * built in space. The built in space is used for optimisations.
245 *
246 * %NULL is returned if there is no free memory.
247 */
248struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
249 unsigned int length, gfp_t gfp_mask)
250{
43cb76d9 251 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
8af27456
CH
252 struct sk_buff *skb;
253
4ec93edb 254 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
7b2e497a 255 if (likely(skb)) {
8af27456 256 skb_reserve(skb, NET_SKB_PAD);
7b2e497a
CH
257 skb->dev = dev;
258 }
8af27456
CH
259 return skb;
260}
1da177e4 261
654bed16
PZ
262struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
263{
264 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
265 struct page *page;
266
267 page = alloc_pages_node(node, gfp_mask, 0);
268 return page;
269}
270EXPORT_SYMBOL(__netdev_alloc_page);
271
272void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
273 int size)
274{
275 skb_fill_page_desc(skb, i, page, off, size);
276 skb->len += size;
277 skb->data_len += size;
278 skb->truesize += size;
279}
280EXPORT_SYMBOL(skb_add_rx_frag);
281
f58518e6
IJ
282/**
283 * dev_alloc_skb - allocate an skbuff for receiving
284 * @length: length to allocate
285 *
286 * Allocate a new &sk_buff and assign it a usage count of one. The
287 * buffer has unspecified headroom built in. Users should allocate
288 * the headroom they think they need without accounting for the
289 * built in space. The built in space is used for optimisations.
290 *
291 * %NULL is returned if there is no free memory. Although this function
292 * allocates memory it can be called from an interrupt.
293 */
294struct sk_buff *dev_alloc_skb(unsigned int length)
295{
1483b874
DV
296 /*
297 * There is more code here than it seems:
a0f55e0e 298 * __dev_alloc_skb is an inline
1483b874 299 */
f58518e6
IJ
300 return __dev_alloc_skb(length, GFP_ATOMIC);
301}
302EXPORT_SYMBOL(dev_alloc_skb);
303
27b437c8 304static void skb_drop_list(struct sk_buff **listp)
1da177e4 305{
27b437c8 306 struct sk_buff *list = *listp;
1da177e4 307
27b437c8 308 *listp = NULL;
1da177e4
LT
309
310 do {
311 struct sk_buff *this = list;
312 list = list->next;
313 kfree_skb(this);
314 } while (list);
315}
316
27b437c8
HX
317static inline void skb_drop_fraglist(struct sk_buff *skb)
318{
319 skb_drop_list(&skb_shinfo(skb)->frag_list);
320}
321
1da177e4
LT
322static void skb_clone_fraglist(struct sk_buff *skb)
323{
324 struct sk_buff *list;
325
326 for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
327 skb_get(list);
328}
329
5bba1712 330static void skb_release_data(struct sk_buff *skb)
1da177e4
LT
331{
332 if (!skb->cloned ||
333 !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
334 &skb_shinfo(skb)->dataref)) {
335 if (skb_shinfo(skb)->nr_frags) {
336 int i;
337 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
338 put_page(skb_shinfo(skb)->frags[i].page);
339 }
340
341 if (skb_shinfo(skb)->frag_list)
342 skb_drop_fraglist(skb);
343
344 kfree(skb->head);
345 }
346}
347
348/*
349 * Free an skbuff by memory without cleaning the state.
350 */
2d4baff8 351static void kfree_skbmem(struct sk_buff *skb)
1da177e4 352{
d179cd12
DM
353 struct sk_buff *other;
354 atomic_t *fclone_ref;
355
d179cd12
DM
356 switch (skb->fclone) {
357 case SKB_FCLONE_UNAVAILABLE:
358 kmem_cache_free(skbuff_head_cache, skb);
359 break;
360
361 case SKB_FCLONE_ORIG:
362 fclone_ref = (atomic_t *) (skb + 2);
363 if (atomic_dec_and_test(fclone_ref))
364 kmem_cache_free(skbuff_fclone_cache, skb);
365 break;
366
367 case SKB_FCLONE_CLONE:
368 fclone_ref = (atomic_t *) (skb + 1);
369 other = skb - 1;
370
371 /* The clone portion is available for
372 * fast-cloning again.
373 */
374 skb->fclone = SKB_FCLONE_UNAVAILABLE;
375
376 if (atomic_dec_and_test(fclone_ref))
377 kmem_cache_free(skbuff_fclone_cache, other);
378 break;
3ff50b79 379 }
1da177e4
LT
380}
381
04a4bb55 382static void skb_release_head_state(struct sk_buff *skb)
1da177e4 383{
1da177e4
LT
384 dst_release(skb->dst);
385#ifdef CONFIG_XFRM
386 secpath_put(skb->sp);
387#endif
9c2b3328
SH
388 if (skb->destructor) {
389 WARN_ON(in_irq());
1da177e4
LT
390 skb->destructor(skb);
391 }
9fb9cbb1 392#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
5f79e0f9 393 nf_conntrack_put(skb->nfct);
9fb9cbb1
YK
394 nf_conntrack_put_reasm(skb->nfct_reasm);
395#endif
1da177e4
LT
396#ifdef CONFIG_BRIDGE_NETFILTER
397 nf_bridge_put(skb->nf_bridge);
398#endif
1da177e4
LT
399/* XXX: IS this still necessary? - JHS */
400#ifdef CONFIG_NET_SCHED
401 skb->tc_index = 0;
402#ifdef CONFIG_NET_CLS_ACT
403 skb->tc_verd = 0;
1da177e4
LT
404#endif
405#endif
04a4bb55
LB
406}
407
408/* Free everything but the sk_buff shell. */
409static void skb_release_all(struct sk_buff *skb)
410{
411 skb_release_head_state(skb);
2d4baff8
HX
412 skb_release_data(skb);
413}
414
415/**
416 * __kfree_skb - private function
417 * @skb: buffer
418 *
419 * Free an sk_buff. Release anything attached to the buffer.
420 * Clean the state. This is an internal helper function. Users should
421 * always call kfree_skb
422 */
1da177e4 423
2d4baff8
HX
424void __kfree_skb(struct sk_buff *skb)
425{
426 skb_release_all(skb);
1da177e4
LT
427 kfree_skbmem(skb);
428}
429