Change dmapool free block management
[linux-2.6-block.git] / mm / dmapool.c
CommitLineData
6182a094
MW
1/*
2 * DMA Pool allocator
3 *
4 * Copyright 2001 David Brownell
5 * Copyright 2007 Intel Corporation
6 * Author: Matthew Wilcox <willy@linux.intel.com>
7 *
8 * This software may be redistributed and/or modified under the terms of
9 * the GNU General Public License ("GPL") version 2 as published by the
10 * Free Software Foundation.
11 *
12 * This allocator returns small blocks of a given size which are DMA-able by
13 * the given device. It uses the dma_alloc_coherent page allocator to get
14 * new pages, then splits them up into blocks of the required size.
15 * Many older drivers still have their own code to do this.
16 *
17 * The current design of this allocator is fairly simple. The pool is
18 * represented by the 'struct dma_pool' which keeps a doubly-linked list of
19 * allocated pages. Each page in the page_list is split into blocks of at
a35a3455
MW
20 * least 'size' bytes. Free blocks are tracked in an unsorted singly-linked
21 * list of free blocks within the page. Used blocks aren't tracked, but we
22 * keep a count of how many are currently allocated from each page.
6182a094 23 */
1da177e4
LT
24
25#include <linux/device.h>
1da177e4
LT
26#include <linux/dma-mapping.h>
27#include <linux/dmapool.h>
6182a094
MW
28#include <linux/kernel.h>
29#include <linux/list.h>
1da177e4 30#include <linux/module.h>
6182a094 31#include <linux/mutex.h>
c9cf5528 32#include <linux/poison.h>
e8edc6e0 33#include <linux/sched.h>
6182a094
MW
34#include <linux/slab.h>
35#include <linux/spinlock.h>
36#include <linux/string.h>
37#include <linux/types.h>
38#include <linux/wait.h>
1da177e4 39
e87aa773
MW
40struct dma_pool { /* the pool */
41 struct list_head page_list;
42 spinlock_t lock;
e87aa773
MW
43 size_t size;
44 struct device *dev;
45 size_t allocation;
46 char name[32];
47 wait_queue_head_t waitq;
48 struct list_head pools;
1da177e4
LT
49};
50
e87aa773
MW
51struct dma_page { /* cacheable header for 'allocation' bytes */
52 struct list_head page_list;
53 void *vaddr;
54 dma_addr_t dma;
a35a3455
MW
55 unsigned int in_use;
56 unsigned int offset;
1da177e4
LT
57};
58
59#define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000)
1da177e4 60
e87aa773 61static DEFINE_MUTEX(pools_lock);
1da177e4
LT
62
63static ssize_t
e87aa773 64show_pools(struct device *dev, struct device_attribute *attr, char *buf)
1da177e4
LT
65{
66 unsigned temp;
67 unsigned size;
68 char *next;
69 struct dma_page *page;
70 struct dma_pool *pool;
71
72 next = buf;
73 size = PAGE_SIZE;
74
75 temp = scnprintf(next, size, "poolinfo - 0.1\n");
76 size -= temp;
77 next += temp;
78
b2366d68 79 mutex_lock(&pools_lock);
1da177e4
LT
80 list_for_each_entry(pool, &dev->dma_pools, pools) {
81 unsigned pages = 0;
82 unsigned blocks = 0;
83
84 list_for_each_entry(page, &pool->page_list, page_list) {
85 pages++;
86 blocks += page->in_use;
87 }
88
89 /* per-pool info, no real statistics yet */
90 temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n",
a35a3455
MW
91 pool->name, blocks,
92 pages * (pool->allocation / pool->size),
e87aa773 93 pool->size, pages);
1da177e4
LT
94 size -= temp;
95 next += temp;
96 }
b2366d68 97 mutex_unlock(&pools_lock);
1da177e4
LT
98
99 return PAGE_SIZE - size;
100}
e87aa773
MW
101
102static DEVICE_ATTR(pools, S_IRUGO, show_pools, NULL);
1da177e4
LT
103
104/**
105 * dma_pool_create - Creates a pool of consistent memory blocks, for dma.
106 * @name: name of pool, for diagnostics
107 * @dev: device that will be doing the DMA
108 * @size: size of the blocks in this pool.
109 * @align: alignment requirement for blocks; must be a power of two
110 * @allocation: returned blocks won't cross this boundary (or zero)
111 * Context: !in_interrupt()
112 *
113 * Returns a dma allocation pool with the requested characteristics, or
114 * null if one can't be created. Given one of these pools, dma_pool_alloc()
115 * may be used to allocate memory. Such memory will all have "consistent"
116 * DMA mappings, accessible by the device and its driver without using
117 * cache flushing primitives. The actual size of blocks allocated may be
118 * larger than requested because of alignment.
119 *
120 * If allocation is nonzero, objects returned from dma_pool_alloc() won't
121 * cross that size boundary. This is useful for devices which have
122 * addressing restrictions on individual DMA transfers, such as not crossing
123 * boundaries of 4KBytes.
124 */
e87aa773
MW
125struct dma_pool *dma_pool_create(const char *name, struct device *dev,
126 size_t size, size_t align, size_t allocation)
1da177e4 127{
e87aa773 128 struct dma_pool *retval;
1da177e4 129
399154be 130 if (align == 0) {
1da177e4 131 align = 1;
399154be 132 } else if (align & (align - 1)) {
1da177e4 133 return NULL;
1da177e4
LT
134 }
135
a35a3455 136 if (size == 0) {
399154be 137 return NULL;
a35a3455
MW
138 } else if (size < 4) {
139 size = 4;
140 }
399154be
MW
141
142 if ((size % align) != 0)
143 size = ALIGN(size, align);
144
1da177e4
LT
145 if (allocation == 0) {
146 if (PAGE_SIZE < size)
147 allocation = size;
148 else
149 allocation = PAGE_SIZE;
e87aa773 150 /* FIXME: round up for less fragmentation */
1da177e4
LT
151 } else if (allocation < size)
152 return NULL;
153
e87aa773
MW
154 if (!
155 (retval =
156 kmalloc_node(sizeof *retval, GFP_KERNEL, dev_to_node(dev))))
1da177e4
LT
157 return retval;
158
e87aa773 159 strlcpy(retval->name, name, sizeof retval->name);
1da177e4
LT
160
161 retval->dev = dev;
162
e87aa773
MW
163 INIT_LIST_HEAD(&retval->page_list);
164 spin_lock_init(&retval->lock);
1da177e4
LT
165 retval->size = size;
166 retval->allocation = allocation;
e87aa773 167 init_waitqueue_head(&retval->waitq);
1da177e4
LT
168
169 if (dev) {
141ecc53
CH
170 int ret;
171
b2366d68 172 mutex_lock(&pools_lock);
e87aa773
MW
173 if (list_empty(&dev->dma_pools))
174 ret = device_create_file(dev, &dev_attr_pools);
141ecc53
CH
175 else
176 ret = 0;
1da177e4 177 /* note: not currently insisting "name" be unique */
141ecc53 178 if (!ret)
e87aa773 179 list_add(&retval->pools, &dev->dma_pools);
141ecc53
CH
180 else {
181 kfree(retval);
182 retval = NULL;
183 }
b2366d68 184 mutex_unlock(&pools_lock);
1da177e4 185 } else
e87aa773 186 INIT_LIST_HEAD(&retval->pools);
1da177e4
LT
187
188 return retval;
189}
e87aa773 190EXPORT_SYMBOL(dma_pool_create);
1da177e4 191
a35a3455
MW
192static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
193{
194 unsigned int offset = 0;
195
196 do {
197 unsigned int next = offset + pool->size;
198 if (unlikely((next + pool->size) >= pool->allocation))
199 next = pool->allocation;
200 *(int *)(page->vaddr + offset) = next;
201 offset = next;
202 } while (offset < pool->allocation);
203}
204
e87aa773 205static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
1da177e4 206{
e87aa773 207 struct dma_page *page;
1da177e4 208
a35a3455 209 page = kmalloc(sizeof(*page), mem_flags);
1da177e4
LT
210 if (!page)
211 return NULL;
a35a3455 212 page->vaddr = dma_alloc_coherent(pool->dev, pool->allocation,
e87aa773 213 &page->dma, mem_flags);
1da177e4 214 if (page->vaddr) {
1da177e4 215#ifdef CONFIG_DEBUG_SLAB
e87aa773 216 memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
1da177e4 217#endif
a35a3455 218 pool_initialise_page(pool, page);
e87aa773 219 list_add(&page->page_list, &pool->page_list);
1da177e4 220 page->in_use = 0;
a35a3455 221 page->offset = 0;
1da177e4 222 } else {
e87aa773 223 kfree(page);
1da177e4
LT
224 page = NULL;
225 }
226 return page;
227}
228
a35a3455 229static inline int is_page_busy(struct dma_page *page)
1da177e4 230{
a35a3455 231 return page->in_use != 0;
1da177e4
LT
232}
233
e87aa773 234static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
1da177e4 235{
e87aa773 236 dma_addr_t dma = page->dma;
1da177e4
LT
237
238#ifdef CONFIG_DEBUG_SLAB
e87aa773 239 memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
1da177e4 240#endif
e87aa773
MW
241 dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
242 list_del(&page->page_list);
243 kfree(page);
1da177e4
LT
244}
245
1da177e4
LT
246/**
247 * dma_pool_destroy - destroys a pool of dma memory blocks.
248 * @pool: dma pool that will be destroyed
249 * Context: !in_interrupt()
250 *
251 * Caller guarantees that no more memory from the pool is in use,
252 * and that nothing will try to use the pool after this call.
253 */
e87aa773 254void dma_pool_destroy(struct dma_pool *pool)
1da177e4 255{
b2366d68 256 mutex_lock(&pools_lock);
e87aa773
MW
257 list_del(&pool->pools);
258 if (pool->dev && list_empty(&pool->dev->dma_pools))
259 device_remove_file(pool->dev, &dev_attr_pools);
b2366d68 260 mutex_unlock(&pools_lock);
1da177e4 261
e87aa773
MW
262 while (!list_empty(&pool->page_list)) {
263 struct dma_page *page;
264 page = list_entry(pool->page_list.next,
265 struct dma_page, page_list);
a35a3455 266 if (is_page_busy(page)) {
1da177e4 267 if (pool->dev)
e87aa773
MW
268 dev_err(pool->dev,
269 "dma_pool_destroy %s, %p busy\n",
1da177e4
LT
270 pool->name, page->vaddr);
271 else
e87aa773
MW
272 printk(KERN_ERR
273 "dma_pool_destroy %s, %p busy\n",
274 pool->name, page->vaddr);
1da177e4 275 /* leak the still-in-use consistent memory */
e87aa773
MW
276 list_del(&page->page_list);
277 kfree(page);
1da177e4 278 } else
e87aa773 279 pool_free_page(pool, page);
1da177e4
LT
280 }
281
e87aa773 282 kfree(pool);
1da177e4 283}
e87aa773 284EXPORT_SYMBOL(dma_pool_destroy);
1da177e4
LT
285
286/**
287 * dma_pool_alloc - get a block of consistent memory
288 * @pool: dma pool that will produce the block
289 * @mem_flags: GFP_* bitmask
290 * @handle: pointer to dma address of block
291 *
292 * This returns the kernel virtual address of a currently unused block,
293 * and reports its dma address through the handle.
6182a094 294 * If such a memory block can't be allocated, %NULL is returned.
1da177e4 295 */
e87aa773
MW
296void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
297 dma_addr_t *handle)
1da177e4 298{
e87aa773
MW
299 unsigned long flags;
300 struct dma_page *page;
e87aa773
MW
301 size_t offset;
302 void *retval;
303
e87aa773 304 spin_lock_irqsave(&pool->lock, flags);
2cae367e 305 restart:
1da177e4 306 list_for_each_entry(page, &pool->page_list, page_list) {
a35a3455
MW
307 if (page->offset < pool->allocation)
308 goto ready;
1da177e4 309 }
e87aa773
MW
310 page = pool_alloc_page(pool, GFP_ATOMIC);
311 if (!page) {
1da177e4 312 if (mem_flags & __GFP_WAIT) {
e87aa773 313 DECLARE_WAITQUEUE(wait, current);
1da177e4 314
d9aacccf 315 __set_current_state(TASK_INTERRUPTIBLE);
2cae367e 316 __add_wait_queue(&pool->waitq, &wait);
e87aa773 317 spin_unlock_irqrestore(&pool->lock, flags);
1da177e4 318
e87aa773 319 schedule_timeout(POOL_TIMEOUT_JIFFIES);
1da177e4 320
2cae367e
MW
321 spin_lock_irqsave(&pool->lock, flags);
322 __remove_wait_queue(&pool->waitq, &wait);
1da177e4
LT
323 goto restart;
324 }
325 retval = NULL;
326 goto done;
327 }
328
e87aa773 329 ready:
1da177e4 330 page->in_use++;
a35a3455
MW
331 offset = page->offset;
332 page->offset = *(int *)(page->vaddr + offset);
1da177e4
LT
333 retval = offset + page->vaddr;
334 *handle = offset + page->dma;
335#ifdef CONFIG_DEBUG_SLAB
e87aa773 336 memset(retval, POOL_POISON_ALLOCATED, pool->size);
1da177e4 337#endif
e87aa773
MW
338 done:
339 spin_unlock_irqrestore(&pool->lock, flags);
1da177e4
LT
340 return retval;
341}
e87aa773 342EXPORT_SYMBOL(dma_pool_alloc);
1da177e4 343
e87aa773 344static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
1da177e4 345{
e87aa773
MW
346 unsigned long flags;
347 struct dma_page *page;
1da177e4 348
e87aa773 349 spin_lock_irqsave(&pool->lock, flags);
1da177e4
LT
350 list_for_each_entry(page, &pool->page_list, page_list) {
351 if (dma < page->dma)
352 continue;
353 if (dma < (page->dma + pool->allocation))
354 goto done;
355 }
356 page = NULL;
e87aa773
MW
357 done:
358 spin_unlock_irqrestore(&pool->lock, flags);
1da177e4
LT
359 return page;
360}
361
1da177e4
LT
362/**
363 * dma_pool_free - put block back into dma pool
364 * @pool: the dma pool holding the block
365 * @vaddr: virtual address of block
366 * @dma: dma address of block
367 *
368 * Caller promises neither device nor driver will again touch this block
369 * unless it is first re-allocated.
370 */
e87aa773 371void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
1da177e4 372{
e87aa773
MW
373 struct dma_page *page;
374 unsigned long flags;
a35a3455 375 unsigned int offset;
1da177e4 376
e87aa773
MW
377 page = pool_find_page(pool, dma);
378 if (!page) {
1da177e4 379 if (pool->dev)
e87aa773
MW
380 dev_err(pool->dev,
381 "dma_pool_free %s, %p/%lx (bad dma)\n",
382 pool->name, vaddr, (unsigned long)dma);
1da177e4 383 else
e87aa773
MW
384 printk(KERN_ERR "dma_pool_free %s, %p/%lx (bad dma)\n",
385 pool->name, vaddr, (unsigned long)dma);
1da177e4
LT
386 return;
387 }
388
a35a3455 389 offset = vaddr - page->vaddr;
1da177e4 390#ifdef CONFIG_DEBUG_SLAB
a35a3455 391 if ((dma - page->dma) != offset) {
1da177e4 392 if (pool->dev)
e87aa773
MW
393 dev_err(pool->dev,
394 "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
395 pool->name, vaddr, (unsigned long long)dma);
1da177e4 396 else
e87aa773
MW
397 printk(KERN_ERR
398 "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
399 pool->name, vaddr, (unsigned long long)dma);
1da177e4
LT
400 return;
401 }
a35a3455
MW
402 {
403 unsigned int chain = page->offset;
404 while (chain < pool->allocation) {
405 if (chain != offset) {
406 chain = *(int *)(page->vaddr + chain);
407 continue;
408 }
409 if (pool->dev)
410 dev_err(pool->dev, "dma_pool_free %s, dma %Lx "
411 "already free\n", pool->name,
412 (unsigned long long)dma);
413 else
414 printk(KERN_ERR "dma_pool_free %s, dma %Lx "
415 "already free\n", pool->name,
416 (unsigned long long)dma);
417 return;
418 }
1da177e4 419 }
e87aa773 420 memset(vaddr, POOL_POISON_FREED, pool->size);
1da177e4
LT
421#endif
422
e87aa773 423 spin_lock_irqsave(&pool->lock, flags);
1da177e4 424 page->in_use--;
a35a3455
MW
425 *(int *)vaddr = page->offset;
426 page->offset = offset;
e87aa773 427 if (waitqueue_active(&pool->waitq))
2cae367e 428 wake_up_locked(&pool->waitq);
1da177e4
LT
429 /*
430 * Resist a temptation to do
a35a3455 431 * if (!is_page_busy(page)) pool_free_page(pool, page);
1da177e4
LT
432 * Better have a few empty pages hang around.
433 */
e87aa773 434 spin_unlock_irqrestore(&pool->lock, flags);
1da177e4 435}
e87aa773 436EXPORT_SYMBOL(dma_pool_free);
1da177e4 437
9ac7849e
TH
438/*
439 * Managed DMA pool
440 */
441static void dmam_pool_release(struct device *dev, void *res)
442{
443 struct dma_pool *pool = *(struct dma_pool **)res;
444
445 dma_pool_destroy(pool);
446}
447
448static int dmam_pool_match(struct device *dev, void *res, void *match_data)
449{
450 return *(struct dma_pool **)res == match_data;
451}
452
453/**
454 * dmam_pool_create - Managed dma_pool_create()
455 * @name: name of pool, for diagnostics
456 * @dev: device that will be doing the DMA
457 * @size: size of the blocks in this pool.
458 * @align: alignment requirement for blocks; must be a power of two
459 * @allocation: returned blocks won't cross this boundary (or zero)
460 *
461 * Managed dma_pool_create(). DMA pool created with this function is
462 * automatically destroyed on driver detach.
463 */
464struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
465 size_t size, size_t align, size_t allocation)
466{
467 struct dma_pool **ptr, *pool;
468
469 ptr = devres_alloc(dmam_pool_release, sizeof(*ptr), GFP_KERNEL);
470 if (!ptr)
471 return NULL;
472
473 pool = *ptr = dma_pool_create(name, dev, size, align, allocation);
474 if (pool)
475 devres_add(dev, ptr);
476 else
477 devres_free(ptr);
478
479 return pool;
480}
e87aa773 481EXPORT_SYMBOL(dmam_pool_create);
9ac7849e
TH
482
483/**
484 * dmam_pool_destroy - Managed dma_pool_destroy()
485 * @pool: dma pool that will be destroyed
486 *
487 * Managed dma_pool_destroy().
488 */
489void dmam_pool_destroy(struct dma_pool *pool)
490{
491 struct device *dev = pool->dev;
492
493 dma_pool_destroy(pool);
494 WARN_ON(devres_destroy(dev, dmam_pool_release, dmam_pool_match, pool));
495}
e87aa773 496EXPORT_SYMBOL(dmam_pool_destroy);