2 * xvmalloc memory allocator
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
13 #ifdef CONFIG_ZRAM_DEBUG
17 #include <linux/bitops.h>
18 #include <linux/errno.h>
19 #include <linux/highmem.h>
20 #include <linux/init.h>
21 #include <linux/string.h>
22 #include <linux/slab.h>
25 #include "xvmalloc_int.h"
27 static void stat_inc(u64 *value)
32 static void stat_dec(u64 *value)
37 static int test_flag(struct block_header *block, enum blockflags flag)
39 return block->prev & BIT(flag);
42 static void set_flag(struct block_header *block, enum blockflags flag)
44 block->prev |= BIT(flag);
47 static void clear_flag(struct block_header *block, enum blockflags flag)
49 block->prev &= ~BIT(flag);
53 * Given <page, offset> pair, provide a dereferencable pointer.
54 * This is called from xv_malloc/xv_free path, so it
57 static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
61 base = kmap_atomic(page, type);
65 static void put_ptr_atomic(void *ptr, enum km_type type)
67 kunmap_atomic(ptr, type);
70 static u32 get_blockprev(struct block_header *block)
72 return block->prev & PREV_MASK;
75 static void set_blockprev(struct block_header *block, u16 new_offset)
77 block->prev = new_offset | (block->prev & FLAGS_MASK);
80 static struct block_header *BLOCK_NEXT(struct block_header *block)
82 return (struct block_header *)
83 ((char *)block + block->size + XV_ALIGN);
87 * Get index of free list containing blocks of maximum size
88 * which is less than or equal to given size.
90 static u32 get_index_for_insert(u32 size)
92 if (unlikely(size > XV_MAX_ALLOC_SIZE))
93 size = XV_MAX_ALLOC_SIZE;
94 size &= ~FL_DELTA_MASK;
95 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
99 * Get index of free list having blocks of size greater than
100 * or equal to requested size.
102 static u32 get_index(u32 size)
104 if (unlikely(size < XV_MIN_ALLOC_SIZE))
105 size = XV_MIN_ALLOC_SIZE;
106 size = ALIGN(size, FL_DELTA);
107 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
111 * find_block - find block of at least given size
112 * @pool: memory pool to search from
113 * @size: size of block required
114 * @page: page containing required block
115 * @offset: offset within the page where block is located.
117 * Searches two level bitmap to locate block of at least
118 * the given size. If such a block is found, it provides
119 * <page, offset> to identify this block and returns index
120 * in freelist where we found this block.
121 * Otherwise, returns 0 and <page, offset> params are not touched.
123 static u32 find_block(struct xv_pool *pool, u32 size,
124 struct page **page, u32 *offset)
126 ulong flbitmap, slbitmap;
127 u32 flindex, slindex, slbitstart;
129 /* There are no free blocks in this pool */
133 /* Get freelist index correspoding to this size */
134 slindex = get_index(size);
135 slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
136 slbitstart = slindex % BITS_PER_LONG;
139 * If freelist is not empty at this index, we found the
140 * block - head of this list. This is approximate best-fit match.
142 if (test_bit(slbitstart, &slbitmap)) {
143 *page = pool->freelist[slindex].page;
144 *offset = pool->freelist[slindex].offset;
149 * No best-fit found. Search a bit further in bitmap for a free block.
150 * Second level bitmap consists of series of 32-bit chunks. Search
151 * further in the chunk where we expected a best-fit, starting from
152 * index location found above.
155 slbitmap >>= slbitstart;
157 /* Skip this search if we were already at end of this bitmap chunk */
158 if ((slbitstart != BITS_PER_LONG) && slbitmap) {
159 slindex += __ffs(slbitmap) + 1;
160 *page = pool->freelist[slindex].page;
161 *offset = pool->freelist[slindex].offset;
165 /* Now do a full two-level bitmap search to find next nearest fit */
166 flindex = slindex / BITS_PER_LONG;
168 flbitmap = (pool->flbitmap) >> (flindex + 1);
172 flindex += __ffs(flbitmap) + 1;
173 slbitmap = pool->slbitmap[flindex];
174 slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
175 *page = pool->freelist[slindex].page;
176 *offset = pool->freelist[slindex].offset;
182 * Insert block at <page, offset> in freelist of given pool.
183 * freelist used depends on block size.
185 static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
186 struct block_header *block)
188 u32 flindex, slindex;
189 struct block_header *nextblock;
191 slindex = get_index_for_insert(block->size);
192 flindex = slindex / BITS_PER_LONG;
194 block->link.prev_page = NULL;
195 block->link.prev_offset = 0;
196 block->link.next_page = pool->freelist[slindex].page;
197 block->link.next_offset = pool->freelist[slindex].offset;
198 pool->freelist[slindex].page = page;
199 pool->freelist[slindex].offset = offset;
201 if (block->link.next_page) {
202 nextblock = get_ptr_atomic(block->link.next_page,
203 block->link.next_offset, KM_USER1);
204 nextblock->link.prev_page = page;
205 nextblock->link.prev_offset = offset;
206 put_ptr_atomic(nextblock, KM_USER1);
207 /* If there was a next page then the free bits are set. */
211 __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
212 __set_bit(flindex, &pool->flbitmap);
216 * Remove block from head of freelist. Index 'slindex' identifies the freelist.
218 static void remove_block_head(struct xv_pool *pool,
219 struct block_header *block, u32 slindex)
221 struct block_header *tmpblock;
222 u32 flindex = slindex / BITS_PER_LONG;
224 pool->freelist[slindex].page = block->link.next_page;
225 pool->freelist[slindex].offset = block->link.next_offset;
226 block->link.prev_page = NULL;
227 block->link.prev_offset = 0;
229 if (!pool->freelist[slindex].page) {
230 __clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
231 if (!pool->slbitmap[flindex])
232 __clear_bit(flindex, &pool->flbitmap);
235 * DEBUG ONLY: We need not reinitialize freelist head previous
236 * pointer to 0 - we never depend on its value. But just for
237 * sanity, lets do it.
239 tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
240 pool->freelist[slindex].offset, KM_USER1);
241 tmpblock->link.prev_page = NULL;
242 tmpblock->link.prev_offset = 0;
243 put_ptr_atomic(tmpblock, KM_USER1);
248 * Remove block from freelist. Index 'slindex' identifies the freelist.
250 static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
251 struct block_header *block, u32 slindex)
254 struct block_header *tmpblock;
256 if (pool->freelist[slindex].page == page
257 && pool->freelist[slindex].offset == offset) {
258 remove_block_head(pool, block, slindex);
262 flindex = slindex / BITS_PER_LONG;
264 if (block->link.prev_page) {
265 tmpblock = get_ptr_atomic(block->link.prev_page,
266 block->link.prev_offset, KM_USER1);
267 tmpblock->link.next_page = block->link.next_page;
268 tmpblock->link.next_offset = block->link.next_offset;
269 put_ptr_atomic(tmpblock, KM_USER1);
272 if (block->link.next_page) {
273 tmpblock = get_ptr_atomic(block->link.next_page,
274 block->link.next_offset, KM_USER1);
275 tmpblock->link.prev_page = block->link.prev_page;
276 tmpblock->link.prev_offset = block->link.prev_offset;
277 put_ptr_atomic(tmpblock, KM_USER1);
282 * Allocate a page and add it to freelist of given pool.
284 static int grow_pool(struct xv_pool *pool, gfp_t flags)
287 struct block_header *block;
289 page = alloc_page(flags);
293 stat_inc(&pool->total_pages);
295 spin_lock(&pool->lock);
296 block = get_ptr_atomic(page, 0, KM_USER0);
298 block->size = PAGE_SIZE - XV_ALIGN;
299 set_flag(block, BLOCK_FREE);
300 clear_flag(block, PREV_FREE);
301 set_blockprev(block, 0);
303 insert_block(pool, page, 0, block);
305 put_ptr_atomic(block, KM_USER0);
306 spin_unlock(&pool->lock);
312 * Create a memory pool. Allocates freelist, bitmaps and other
315 struct xv_pool *xv_create_pool(void)
318 struct xv_pool *pool;
320 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
321 pool = kzalloc(ovhd_size, GFP_KERNEL);
325 spin_lock_init(&pool->lock);
330 void xv_destroy_pool(struct xv_pool *pool)
336 * xv_malloc - Allocate block of given size from pool.
337 * @pool: pool to allocate from
338 * @size: size of block to allocate
339 * @page: page no. that holds the object
340 * @offset: location of object within page
342 * On success, <page, offset> identifies block allocated
343 * and 0 is returned. On failure, <page, offset> is set to
344 * 0 and -ENOMEM is returned.
346 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
348 int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
349 u32 *offset, gfp_t flags)
352 u32 index, tmpsize, origsize, tmpoffset;
353 struct block_header *block, *tmpblock;
359 if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
362 size = ALIGN(size, XV_ALIGN);
364 spin_lock(&pool->lock);
366 index = find_block(pool, size, page, offset);
369 spin_unlock(&pool->lock);
370 if (flags & GFP_NOWAIT)
372 error = grow_pool(pool, flags);
376 spin_lock(&pool->lock);
377 index = find_block(pool, size, page, offset);
381 spin_unlock(&pool->lock);
385 block = get_ptr_atomic(*page, *offset, KM_USER0);
387 remove_block_head(pool, block, index);
389 /* Split the block if required */
390 tmpoffset = *offset + size + XV_ALIGN;
391 tmpsize = block->size - size;
392 tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
394 tmpblock->size = tmpsize - XV_ALIGN;
395 set_flag(tmpblock, BLOCK_FREE);
396 clear_flag(tmpblock, PREV_FREE);
398 set_blockprev(tmpblock, *offset);
399 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
400 insert_block(pool, *page, tmpoffset, tmpblock);
402 if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
403 tmpblock = BLOCK_NEXT(tmpblock);
404 set_blockprev(tmpblock, tmpoffset);
407 /* This block is exact fit */
408 if (tmpoffset != PAGE_SIZE)
409 clear_flag(tmpblock, PREV_FREE);
412 block->size = origsize;
413 clear_flag(block, BLOCK_FREE);
415 put_ptr_atomic(block, KM_USER0);
416 spin_unlock(&pool->lock);
424 * Free block identified with <page, offset>
426 void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
429 struct block_header *block, *tmpblock;
433 spin_lock(&pool->lock);
435 page_start = get_ptr_atomic(page, 0, KM_USER0);
436 block = (struct block_header *)((char *)page_start + offset);
438 /* Catch double free bugs */
439 BUG_ON(test_flag(block, BLOCK_FREE));
441 block->size = ALIGN(block->size, XV_ALIGN);
443 tmpblock = BLOCK_NEXT(block);
444 if (offset + block->size + XV_ALIGN == PAGE_SIZE)
447 /* Merge next block if its free */
448 if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
450 * Blocks smaller than XV_MIN_ALLOC_SIZE
451 * are not inserted in any free list.
453 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
454 remove_block(pool, page,
455 offset + block->size + XV_ALIGN, tmpblock,
456 get_index_for_insert(tmpblock->size));
458 block->size += tmpblock->size + XV_ALIGN;
461 /* Merge previous block if its free */
462 if (test_flag(block, PREV_FREE)) {
463 tmpblock = (struct block_header *)((char *)(page_start) +
464 get_blockprev(block));
465 offset = offset - tmpblock->size - XV_ALIGN;
467 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
468 remove_block(pool, page, offset, tmpblock,
469 get_index_for_insert(tmpblock->size));
471 tmpblock->size += block->size + XV_ALIGN;
475 /* No used objects in this page. Free it. */
476 if (block->size == PAGE_SIZE - XV_ALIGN) {
477 put_ptr_atomic(page_start, KM_USER0);
478 spin_unlock(&pool->lock);
481 stat_dec(&pool->total_pages);
485 set_flag(block, BLOCK_FREE);
486 if (block->size >= XV_MIN_ALLOC_SIZE)
487 insert_block(pool, page, offset, block);
489 if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
490 tmpblock = BLOCK_NEXT(block);
491 set_flag(tmpblock, PREV_FREE);
492 set_blockprev(tmpblock, offset);
495 put_ptr_atomic(page_start, KM_USER0);
496 spin_unlock(&pool->lock);
499 u32 xv_get_object_size(void *obj)
501 struct block_header *blk;
503 blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
508 * Returns total memory used by allocator (userdata + metadata)
510 u64 xv_get_total_size_bytes(struct xv_pool *pool)
512 return pool->total_pages << PAGE_SHIFT;