speed up madvise_need_mmap_write() usage
[linux-2.6-block.git] / mm / slob.c
CommitLineData
10cef602
MM
1/*
2 * SLOB Allocator: Simple List Of Blocks
3 *
4 * Matt Mackall <mpm@selenic.com> 12/30/03
5 *
6 * How SLOB works:
7 *
8 * The core of SLOB is a traditional K&R style heap allocator, with
9 * support for returning aligned objects. The granularity of this
55394849
NP
10 * allocator is as little as 2 bytes, however typically most architectures
11 * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
95b35127
NP
12 *
13 * The slob heap is a linked list of pages from __get_free_page, and
14 * within each page, there is a singly-linked list of free blocks (slob_t).
15 * The heap is grown on demand and allocation from the heap is currently
16 * first-fit.
10cef602
MM
17 *
18 * Above this is an implementation of kmalloc/kfree. Blocks returned
55394849 19 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
10cef602 20 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
d87a133f
NP
21 * __get_free_pages directly, allocating compound pages so the page order
22 * does not have to be separately tracked, and also stores the exact
23 * allocation size in page->private so that it can be used to accurately
24 * provide ksize(). These objects are detected in kfree() because slob_page()
25 * is false for them.
10cef602
MM
26 *
27 * SLAB is emulated on top of SLOB by simply calling constructors and
95b35127
NP
28 * destructors for every SLAB allocation. Objects are returned with the
29 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
30 * case the low-level allocator will fragment blocks to create the proper
31 * alignment. Again, objects of page-size or greater are allocated by
32 * calling __get_free_pages. As SLAB objects know their size, no separate
33 * size bookkeeping is necessary and there is essentially no allocation
d87a133f
NP
34 * space overhead, and compound pages aren't needed for multi-page
35 * allocations.
10cef602
MM
36 */
37
95b35127 38#include <linux/kernel.h>
10cef602
MM
39#include <linux/slab.h>
40#include <linux/mm.h>
41#include <linux/cache.h>
42#include <linux/init.h>
43#include <linux/module.h>
afc0cedb 44#include <linux/rcupdate.h>
95b35127
NP
45#include <linux/list.h>
46#include <asm/atomic.h>
47
95b35127
NP
48/*
49 * slob_block has a field 'units', which indicates size of block if +ve,
50 * or offset of next block if -ve (in SLOB_UNITs).
51 *
52 * Free blocks of size 1 unit simply contain the offset of the next block.
53 * Those with larger size contain their size in the first SLOB_UNIT of
54 * memory, and the offset of the next free block in the second SLOB_UNIT.
55 */
55394849 56#if PAGE_SIZE <= (32767 * 2)
95b35127
NP
57typedef s16 slobidx_t;
58#else
59typedef s32 slobidx_t;
60#endif
61
10cef602 62struct slob_block {
95b35127 63 slobidx_t units;
55394849 64};
10cef602
MM
65typedef struct slob_block slob_t;
66
95b35127
NP
67/*
68 * We use struct page fields to manage some slob allocation aspects,
69 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
70 * just define our own struct page type variant here.
71 */
72struct slob_page {
73 union {
74 struct {
75 unsigned long flags; /* mandatory */
76 atomic_t _count; /* mandatory */
77 slobidx_t units; /* free units left in page */
78 unsigned long pad[2];
79 slob_t *free; /* first free slob_t in page */
80 struct list_head list; /* linked list of free pages */
81 };
82 struct page page;
83 };
84};
85static inline void struct_slob_page_wrong_size(void)
86{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
87
88/*
89 * free_slob_page: call before a slob_page is returned to the page allocator.
90 */
91static inline void free_slob_page(struct slob_page *sp)
92{
93 reset_page_mapcount(&sp->page);
94 sp->page.mapping = NULL;
95}
96
97/*
98 * All (partially) free slob pages go on this list.
99 */
100static LIST_HEAD(free_slob_pages);
101
102/*
103 * slob_page: True for all slob pages (false for bigblock pages)
104 */
105static inline int slob_page(struct slob_page *sp)
106{
107 return test_bit(PG_active, &sp->flags);
108}
109
110static inline void set_slob_page(struct slob_page *sp)
111{
112 __set_bit(PG_active, &sp->flags);
113}
114
115static inline void clear_slob_page(struct slob_page *sp)
116{
117 __clear_bit(PG_active, &sp->flags);
118}
119
120/*
121 * slob_page_free: true for pages on free_slob_pages list.
122 */
123static inline int slob_page_free(struct slob_page *sp)
124{
125 return test_bit(PG_private, &sp->flags);
126}
127
128static inline void set_slob_page_free(struct slob_page *sp)
129{
130 list_add(&sp->list, &free_slob_pages);
131 __set_bit(PG_private, &sp->flags);
132}
133
134static inline void clear_slob_page_free(struct slob_page *sp)
135{
136 list_del(&sp->list);
137 __clear_bit(PG_private, &sp->flags);
138}
139
10cef602
MM
140#define SLOB_UNIT sizeof(slob_t)
141#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
142#define SLOB_ALIGN L1_CACHE_BYTES
143
afc0cedb
NP
144/*
145 * struct slob_rcu is inserted at the tail of allocated slob blocks, which
146 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
147 * the block using call_rcu.
148 */
149struct slob_rcu {
150 struct rcu_head head;
151 int size;
152};
153
95b35127
NP
154/*
155 * slob_lock protects all slob allocator structures.
156 */
10cef602 157static DEFINE_SPINLOCK(slob_lock);
10cef602 158
95b35127
NP
159/*
160 * Encode the given size and next info into a free slob block s.
161 */
162static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
163{
164 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
165 slobidx_t offset = next - base;
bcb4ddb4 166
95b35127
NP
167 if (size > 1) {
168 s[0].units = size;
169 s[1].units = offset;
170 } else
171 s[0].units = -offset;
172}
10cef602 173
95b35127
NP
174/*
175 * Return the size of a slob block.
176 */
177static slobidx_t slob_units(slob_t *s)
178{
179 if (s->units > 0)
180 return s->units;
181 return 1;
182}
183
184/*
185 * Return the next free slob block pointer after this one.
186 */
187static slob_t *slob_next(slob_t *s)
188{
189 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
190 slobidx_t next;
191
192 if (s[0].units < 0)
193 next = -s[0].units;
194 else
195 next = s[1].units;
196 return base+next;
197}
198
199/*
200 * Returns true if s is the last free block in its page.
201 */
202static int slob_last(slob_t *s)
203{
204 return !((unsigned long)slob_next(s) & ~PAGE_MASK);
205}
206
207/*
208 * Allocate a slob block within a given slob_page sp.
209 */
210static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
10cef602
MM
211{
212 slob_t *prev, *cur, *aligned = 0;
213 int delta = 0, units = SLOB_UNITS(size);
10cef602 214
95b35127
NP
215 for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) {
216 slobidx_t avail = slob_units(cur);
217
10cef602
MM
218 if (align) {
219 aligned = (slob_t *)ALIGN((unsigned long)cur, align);
220 delta = aligned - cur;
221 }
95b35127
NP
222 if (avail >= units + delta) { /* room enough? */
223 slob_t *next;
224
10cef602 225 if (delta) { /* need to fragment head to align? */
95b35127
NP
226 next = slob_next(cur);
227 set_slob(aligned, avail - delta, next);
228 set_slob(cur, delta, aligned);
10cef602
MM
229 prev = cur;
230 cur = aligned;
95b35127 231 avail = slob_units(cur);
10cef602
MM
232 }
233
95b35127
NP
234 next = slob_next(cur);
235 if (avail == units) { /* exact fit? unlink. */
236 if (prev)
237 set_slob(prev, slob_units(prev), next);
238 else
239 sp->free = next;
240 } else { /* fragment */
241 if (prev)
242 set_slob(prev, slob_units(prev), cur + units);
243 else
244 sp->free = cur + units;
245 set_slob(cur + units, avail - units, next);
10cef602
MM
246 }
247
95b35127
NP
248 sp->units -= units;
249 if (!sp->units)
250 clear_slob_page_free(sp);
10cef602
MM
251 return cur;
252 }
95b35127
NP
253 if (slob_last(cur))
254 return NULL;
255 }
256}
10cef602 257
95b35127
NP
258/*
259 * slob_alloc: entry point into the slob allocator.
260 */
261static void *slob_alloc(size_t size, gfp_t gfp, int align)
262{
263 struct slob_page *sp;
264 slob_t *b = NULL;
265 unsigned long flags;
10cef602 266
95b35127
NP
267 spin_lock_irqsave(&slob_lock, flags);
268 /* Iterate through each partially free page, try to find room */
269 list_for_each_entry(sp, &free_slob_pages, list) {
270 if (sp->units >= SLOB_UNITS(size)) {
271 b = slob_page_alloc(sp, size, align);
272 if (b)
273 break;
10cef602
MM
274 }
275 }
95b35127
NP
276 spin_unlock_irqrestore(&slob_lock, flags);
277
278 /* Not enough space: must allocate a new page */
279 if (!b) {
280 b = (slob_t *)__get_free_page(gfp);
281 if (!b)
282 return 0;
283 sp = (struct slob_page *)virt_to_page(b);
284 set_slob_page(sp);
285
286 spin_lock_irqsave(&slob_lock, flags);
287 sp->units = SLOB_UNITS(PAGE_SIZE);
288 sp->free = b;
289 INIT_LIST_HEAD(&sp->list);
290 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
291 set_slob_page_free(sp);
292 b = slob_page_alloc(sp, size, align);
293 BUG_ON(!b);
294 spin_unlock_irqrestore(&slob_lock, flags);
295 }
296 return b;
10cef602
MM
297}
298
95b35127
NP
299/*
300 * slob_free: entry point into the slob allocator.
301 */
10cef602
MM
302static void slob_free(void *block, int size)
303{
95b35127
NP
304 struct slob_page *sp;
305 slob_t *prev, *next, *b = (slob_t *)block;
306 slobidx_t units;
10cef602
MM
307 unsigned long flags;
308
309 if (!block)
310 return;
95b35127 311 BUG_ON(!size);
10cef602 312
95b35127
NP
313 sp = (struct slob_page *)virt_to_page(block);
314 units = SLOB_UNITS(size);
10cef602 315
10cef602 316 spin_lock_irqsave(&slob_lock, flags);
10cef602 317
95b35127
NP
318 if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
319 /* Go directly to page allocator. Do not pass slob allocator */
320 if (slob_page_free(sp))
321 clear_slob_page_free(sp);
322 clear_slob_page(sp);
323 free_slob_page(sp);
324 free_page((unsigned long)b);
325 goto out;
326 }
10cef602 327
95b35127
NP
328 if (!slob_page_free(sp)) {
329 /* This slob page is about to become partially free. Easy! */
330 sp->units = units;
331 sp->free = b;
332 set_slob(b, units,
333 (void *)((unsigned long)(b +
334 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
335 set_slob_page_free(sp);
336 goto out;
337 }
338
339 /*
340 * Otherwise the page is already partially free, so find reinsertion
341 * point.
342 */
343 sp->units += units;
10cef602 344
95b35127
NP
345 if (b < sp->free) {
346 set_slob(b, units, sp->free);
347 sp->free = b;
348 } else {
349 prev = sp->free;
350 next = slob_next(prev);
351 while (b > next) {
352 prev = next;
353 next = slob_next(prev);
354 }
10cef602 355
95b35127
NP
356 if (!slob_last(prev) && b + units == next) {
357 units += slob_units(next);
358 set_slob(b, units, slob_next(next));
359 } else
360 set_slob(b, units, next);
361
362 if (prev + slob_units(prev) == b) {
363 units = slob_units(b) + slob_units(prev);
364 set_slob(prev, units, slob_next(b));
365 } else
366 set_slob(prev, slob_units(prev), b);
367 }
368out:
10cef602
MM
369 spin_unlock_irqrestore(&slob_lock, flags);
370}
371
95b35127
NP
372/*
373 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
374 */
375
55394849
NP
376#ifndef ARCH_KMALLOC_MINALIGN
377#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
378#endif
379
380#ifndef ARCH_SLAB_MINALIGN
381#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
382#endif
383
384
2e892f43 385void *__kmalloc(size_t size, gfp_t gfp)
10cef602 386{
55394849
NP
387 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
388
389 if (size < PAGE_SIZE - align) {
390 unsigned int *m;
391 m = slob_alloc(size + align, gfp, align);
95b35127 392 if (m)
55394849
NP
393 *m = size;
394 return (void *)m + align;
d87a133f
NP
395 } else {
396 void *ret;
397
398 ret = (void *) __get_free_pages(gfp | __GFP_COMP,
399 get_order(size));
400 if (ret) {
401 struct page *page;
402 page = virt_to_page(ret);
403 page->private = size;
404 }
405 return ret;
10cef602 406 }
10cef602 407}
2e892f43 408EXPORT_SYMBOL(__kmalloc);
10cef602 409
fd76bab2
PE
410/**
411 * krealloc - reallocate memory. The contents will remain unchanged.
412 *
413 * @p: object to reallocate memory for.
414 * @new_size: how many bytes of memory are required.
415 * @flags: the type of memory to allocate.
416 *
417 * The contents of the object pointed to are preserved up to the
418 * lesser of the new and old sizes. If @p is %NULL, krealloc()
419 * behaves exactly like kmalloc(). If @size is 0 and @p is not a
420 * %NULL pointer, the object pointed to is freed.
421 */
422void *krealloc(const void *p, size_t new_size, gfp_t flags)
423{
424 void *ret;
425
426 if (unlikely(!p))
427 return kmalloc_track_caller(new_size, flags);
428
429 if (unlikely(!new_size)) {
430 kfree(p);
431 return NULL;
432 }
433
434 ret = kmalloc_track_caller(new_size, flags);
435 if (ret) {
436 memcpy(ret, p, min(new_size, ksize(p)));
437 kfree(p);
438 }
439 return ret;
440}
441EXPORT_SYMBOL(krealloc);
442
10cef602
MM
443void kfree(const void *block)
444{
95b35127 445 struct slob_page *sp;
10cef602
MM
446
447 if (!block)
448 return;
449
95b35127 450 sp = (struct slob_page *)virt_to_page(block);
d87a133f 451 if (slob_page(sp)) {
55394849
NP
452 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
453 unsigned int *m = (unsigned int *)(block - align);
454 slob_free(m, *m + align);
d87a133f
NP
455 } else
456 put_page(&sp->page);
10cef602
MM
457}
458
459EXPORT_SYMBOL(kfree);
460
d87a133f 461/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
fd76bab2 462size_t ksize(const void *block)
10cef602 463{
95b35127 464 struct slob_page *sp;
10cef602
MM
465
466 if (!block)
467 return 0;
468
95b35127 469 sp = (struct slob_page *)virt_to_page(block);
d87a133f
NP
470 if (slob_page(sp))
471 return ((slob_t *)block - 1)->units + SLOB_UNIT;
472 else
473 return sp->page.private;
10cef602
MM
474}
475
476struct kmem_cache {
477 unsigned int size, align;
afc0cedb 478 unsigned long flags;
10cef602
MM
479 const char *name;
480 void (*ctor)(void *, struct kmem_cache *, unsigned long);
10cef602
MM
481};
482
483struct kmem_cache *kmem_cache_create(const char *name, size_t size,
484 size_t align, unsigned long flags,
485 void (*ctor)(void*, struct kmem_cache *, unsigned long),
486 void (*dtor)(void*, struct kmem_cache *, unsigned long))
487{
488 struct kmem_cache *c;
489
490 c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
491
492 if (c) {
493 c->name = name;
494 c->size = size;
afc0cedb 495 if (flags & SLAB_DESTROY_BY_RCU) {
afc0cedb
NP
496 /* leave room for rcu footer at the end of object */
497 c->size += sizeof(struct slob_rcu);
498 }
499 c->flags = flags;
10cef602 500 c->ctor = ctor;
10cef602 501 /* ignore alignment unless it's forced */
5af60839 502 c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
55394849
NP
503 if (c->align < ARCH_SLAB_MINALIGN)
504 c->align = ARCH_SLAB_MINALIGN;
10cef602
MM
505 if (c->align < align)
506 c->align = align;
bc0055ae
AM
507 } else if (flags & SLAB_PANIC)
508 panic("Cannot create slab cache %s\n", name);
10cef602
MM
509
510 return c;
511}
512EXPORT_SYMBOL(kmem_cache_create);
513
133d205a 514void kmem_cache_destroy(struct kmem_cache *c)
10cef602
MM
515{
516 slob_free(c, sizeof(struct kmem_cache));
10cef602
MM
517}
518EXPORT_SYMBOL(kmem_cache_destroy);
519
520void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
521{
522 void *b;
523
524 if (c->size < PAGE_SIZE)
525 b = slob_alloc(c->size, flags, c->align);
526 else
4ab688c5 527 b = (void *)__get_free_pages(flags, get_order(c->size));
10cef602
MM
528
529 if (c->ctor)
a35afb83 530 c->ctor(b, c, 0);
10cef602
MM
531
532 return b;
533}
534EXPORT_SYMBOL(kmem_cache_alloc);
535
a8c0f9a4
PE
536void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags)
537{
538 void *ret = kmem_cache_alloc(c, flags);
539 if (ret)
540 memset(ret, 0, c->size);
541
542 return ret;
543}
544EXPORT_SYMBOL(kmem_cache_zalloc);
545
afc0cedb 546static void __kmem_cache_free(void *b, int size)
10cef602 547{
afc0cedb
NP
548 if (size < PAGE_SIZE)
549 slob_free(b, size);
10cef602 550 else
afc0cedb
NP
551 free_pages((unsigned long)b, get_order(size));
552}
553
554static void kmem_rcu_free(struct rcu_head *head)
555{
556 struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
557 void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
558
559 __kmem_cache_free(b, slob_rcu->size);
560}
561
562void kmem_cache_free(struct kmem_cache *c, void *b)
563{
564 if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
565 struct slob_rcu *slob_rcu;
566 slob_rcu = b + (c->size - sizeof(struct slob_rcu));
567 INIT_RCU_HEAD(&slob_rcu->head);
568 slob_rcu->size = c->size;
569 call_rcu(&slob_rcu->head, kmem_rcu_free);
570 } else {
afc0cedb
NP
571 __kmem_cache_free(b, c->size);
572 }
10cef602
MM
573}
574EXPORT_SYMBOL(kmem_cache_free);
575
576unsigned int kmem_cache_size(struct kmem_cache *c)
577{
578 return c->size;
579}
580EXPORT_SYMBOL(kmem_cache_size);
581
582const char *kmem_cache_name(struct kmem_cache *c)
583{
584 return c->name;
585}
586EXPORT_SYMBOL(kmem_cache_name);
587
2e892f43
CL
588int kmem_cache_shrink(struct kmem_cache *d)
589{
590 return 0;
591}
592EXPORT_SYMBOL(kmem_cache_shrink);
593
55935a34 594int kmem_ptr_validate(struct kmem_cache *a, const void *b)
2e892f43
CL
595{
596 return 0;
597}
598
bcb4ddb4
DG
599void __init kmem_cache_init(void)
600{
10cef602 601}