Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / drivers / iommu / iova.c
CommitLineData
3b20eb23 1// SPDX-License-Identifier: GPL-2.0-only
f8de50eb 2/*
a15a519e 3 * Copyright © 2006-2009, Intel Corporation.
f8de50eb 4 *
98bcef56 5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
f8de50eb
KA
6 */
7
38717946 8#include <linux/iova.h>
15bbdec3 9#include <linux/module.h>
85b45456 10#include <linux/slab.h>
9257b4a2
OP
11#include <linux/smp.h>
12#include <linux/bitops.h>
aaffaa8a 13#include <linux/cpu.h>
9257b4a2 14
bb68b2fb
RM
15/* The anchor node sits above the top of the usable address space */
16#define IOVA_ANCHOR ~0UL
17
9257b4a2
OP
18static bool iova_rcache_insert(struct iova_domain *iovad,
19 unsigned long pfn,
20 unsigned long size);
21static unsigned long iova_rcache_get(struct iova_domain *iovad,
22 unsigned long size,
23 unsigned long limit_pfn);
24static void init_iova_rcaches(struct iova_domain *iovad);
25static void free_iova_rcaches(struct iova_domain *iovad);
19282101 26static void fq_destroy_all_entries(struct iova_domain *iovad);
e99e88a9 27static void fq_flush_timeout(struct timer_list *t);
85b45456 28
f8de50eb 29void
0fb5fe87 30init_iova_domain(struct iova_domain *iovad, unsigned long granule,
aa3ac946 31 unsigned long start_pfn)
f8de50eb 32{
0fb5fe87
RM
33 /*
34 * IOVA granularity will normally be equal to the smallest
35 * supported IOMMU page size; both *must* be capable of
36 * representing individual CPU pages exactly.
37 */
38 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
39
f8de50eb
KA
40 spin_lock_init(&iovad->iova_rbtree_lock);
41 iovad->rbroot = RB_ROOT;
973f5fbe
RM
42 iovad->cached_node = &iovad->anchor.node;
43 iovad->cached32_node = &iovad->anchor.node;
0fb5fe87 44 iovad->granule = granule;
1b722500 45 iovad->start_pfn = start_pfn;
aa3ac946 46 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
bee60e94 47 iovad->max32_alloc_size = iovad->dma_32bit_pfn;
42f87e71
JR
48 iovad->flush_cb = NULL;
49 iovad->fq = NULL;
bb68b2fb
RM
50 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
51 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
52 rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
9257b4a2 53 init_iova_rcaches(iovad);
f8de50eb 54}
9b41760b 55EXPORT_SYMBOL_GPL(init_iova_domain);
f8de50eb 56
42f87e71
JR
57static void free_iova_flush_queue(struct iova_domain *iovad)
58{
59 if (!iovad->fq)
60 return;
61
9a005a80
JR
62 if (timer_pending(&iovad->fq_timer))
63 del_timer(&iovad->fq_timer);
64
19282101 65 fq_destroy_all_entries(iovad);
9a005a80 66
42f87e71
JR
67 free_percpu(iovad->fq);
68
69 iovad->fq = NULL;
70 iovad->flush_cb = NULL;
71 iovad->entry_dtor = NULL;
72}
73
74int init_iova_flush_queue(struct iova_domain *iovad,
75 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
76{
77 int cpu;
78
fb418dab
JR
79 atomic64_set(&iovad->fq_flush_start_cnt, 0);
80 atomic64_set(&iovad->fq_flush_finish_cnt, 0);
81
42f87e71
JR
82 iovad->fq = alloc_percpu(struct iova_fq);
83 if (!iovad->fq)
84 return -ENOMEM;
85
86 iovad->flush_cb = flush_cb;
87 iovad->entry_dtor = entry_dtor;
88
89 for_each_possible_cpu(cpu) {
90 struct iova_fq *fq;
91
92 fq = per_cpu_ptr(iovad->fq, cpu);
93 fq->head = 0;
94 fq->tail = 0;
8109c2a2
JR
95
96 spin_lock_init(&fq->lock);
42f87e71
JR
97 }
98
e99e88a9 99 timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
9a005a80
JR
100 atomic_set(&iovad->fq_timer_on, 0);
101
42f87e71
JR
102 return 0;
103}
104EXPORT_SYMBOL_GPL(init_iova_flush_queue);
105
f8de50eb 106static struct rb_node *
973f5fbe 107__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
f8de50eb 108{
973f5fbe
RM
109 if (limit_pfn <= iovad->dma_32bit_pfn)
110 return iovad->cached32_node;
e60aa7b5 111
973f5fbe 112 return iovad->cached_node;
f8de50eb
KA
113}
114
115static void
e60aa7b5 116__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
f8de50eb 117{
e60aa7b5
RM
118 if (new->pfn_hi < iovad->dma_32bit_pfn)
119 iovad->cached32_node = &new->node;
120 else
121 iovad->cached_node = &new->node;
f8de50eb
KA
122}
123
124static void
125__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
126{
127 struct iova *cached_iova;
f8de50eb 128
e60aa7b5
RM
129 cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
130 if (free->pfn_hi < iovad->dma_32bit_pfn &&
bee60e94 131 free->pfn_lo >= cached_iova->pfn_lo) {
e60aa7b5 132 iovad->cached32_node = rb_next(&free->node);
bee60e94
GK
133 iovad->max32_alloc_size = iovad->dma_32bit_pfn;
134 }
e60aa7b5
RM
135
136 cached_iova = rb_entry(iovad->cached_node, struct iova, node);
973f5fbe 137 if (free->pfn_lo >= cached_iova->pfn_lo)
e60aa7b5 138 iovad->cached_node = rb_next(&free->node);
f8de50eb
KA
139}
140
d751751a
MS
141/* Insert the iova into domain rbtree by holding writer lock */
142static void
143iova_insert_rbtree(struct rb_root *root, struct iova *iova,
144 struct rb_node *start)
145{
146 struct rb_node **new, *parent = NULL;
147
148 new = (start) ? &start : &(root->rb_node);
149 /* Figure out where to put new node */
150 while (*new) {
151 struct iova *this = rb_entry(*new, struct iova, node);
152
153 parent = *new;
154
155 if (iova->pfn_lo < this->pfn_lo)
156 new = &((*new)->rb_left);
157 else if (iova->pfn_lo > this->pfn_lo)
158 new = &((*new)->rb_right);
159 else {
160 WARN_ON(1); /* this should not happen */
161 return;
162 }
163 }
164 /* Add new node and rebalance tree. */
165 rb_link_node(&iova->node, parent, new);
166 rb_insert_color(&iova->node, root);
167}
168
ddf02886 169static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
170 unsigned long size, unsigned long limit_pfn,
171 struct iova *new, bool size_aligned)
f8de50eb 172{
973f5fbe
RM
173 struct rb_node *curr, *prev;
174 struct iova *curr_iova;
f8de50eb 175 unsigned long flags;
e60aa7b5 176 unsigned long new_pfn;
086c83ac
ZL
177 unsigned long align_mask = ~0UL;
178
179 if (size_aligned)
180 align_mask <<= fls_long(size - 1);
f8de50eb
KA
181
182 /* Walk the tree backwards */
183 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
bee60e94
GK
184 if (limit_pfn <= iovad->dma_32bit_pfn &&
185 size >= iovad->max32_alloc_size)
186 goto iova32_full;
187
973f5fbe
RM
188 curr = __get_cached_rbnode(iovad, limit_pfn);
189 curr_iova = rb_entry(curr, struct iova, node);
190 do {
191 limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
192 new_pfn = (limit_pfn - size) & align_mask;
ddf02886 193 prev = curr;
f8de50eb 194 curr = rb_prev(curr);
973f5fbe
RM
195 curr_iova = rb_entry(curr, struct iova, node);
196 } while (curr && new_pfn <= curr_iova->pfn_hi);
f8de50eb 197
80ef4464
RR
198 if (limit_pfn < size || new_pfn < iovad->start_pfn) {
199 iovad->max32_alloc_size = size;
bee60e94 200 goto iova32_full;
80ef4464 201 }
f76aec76
KA
202
203 /* pfn_lo will point to size aligned address if size_aligned is set */
086c83ac 204 new->pfn_lo = new_pfn;
f76aec76 205 new->pfn_hi = new->pfn_lo + size - 1;
f8de50eb 206
d751751a
MS
207 /* If we have 'prev', it's a valid place to start the insertion. */
208 iova_insert_rbtree(&iovad->rbroot, new, prev);
e60aa7b5 209 __cached_rbnode_insert_update(iovad, new);
ddf02886 210
f8de50eb
KA
211 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
212 return 0;
bee60e94
GK
213
214iova32_full:
bee60e94
GK
215 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
216 return -ENOMEM;
f8de50eb
KA
217}
218
ae1ff3d6
SA
219static struct kmem_cache *iova_cache;
220static unsigned int iova_cache_users;
221static DEFINE_MUTEX(iova_cache_mutex);
222
223struct iova *alloc_iova_mem(void)
224{
225 return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
226}
227EXPORT_SYMBOL(alloc_iova_mem);
228
229void free_iova_mem(struct iova *iova)
230{
bb68b2fb
RM
231 if (iova->pfn_lo != IOVA_ANCHOR)
232 kmem_cache_free(iova_cache, iova);
ae1ff3d6
SA
233}
234EXPORT_SYMBOL(free_iova_mem);
235
236int iova_cache_get(void)
237{
238 mutex_lock(&iova_cache_mutex);
239 if (!iova_cache_users) {
240 iova_cache = kmem_cache_create(
241 "iommu_iova", sizeof(struct iova), 0,
242 SLAB_HWCACHE_ALIGN, NULL);
243 if (!iova_cache) {
244 mutex_unlock(&iova_cache_mutex);
245 printk(KERN_ERR "Couldn't create iova cache\n");
246 return -ENOMEM;
247 }
248 }
249
250 iova_cache_users++;
251 mutex_unlock(&iova_cache_mutex);
252
253 return 0;
254}
9b41760b 255EXPORT_SYMBOL_GPL(iova_cache_get);
ae1ff3d6
SA
256
257void iova_cache_put(void)
258{
259 mutex_lock(&iova_cache_mutex);
260 if (WARN_ON(!iova_cache_users)) {
261 mutex_unlock(&iova_cache_mutex);
262 return;
263 }
264 iova_cache_users--;
265 if (!iova_cache_users)
266 kmem_cache_destroy(iova_cache);
267 mutex_unlock(&iova_cache_mutex);
268}
9b41760b 269EXPORT_SYMBOL_GPL(iova_cache_put);
ae1ff3d6 270
f8de50eb
KA
271/**
272 * alloc_iova - allocates an iova
07db0409
MI
273 * @iovad: - iova domain in question
274 * @size: - size of page frames to allocate
275 * @limit_pfn: - max limit address
276 * @size_aligned: - set if size_aligned address range is required
1b722500
RM
277 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
278 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
f76aec76
KA
279 * flag is set then the allocated address iova->pfn_lo will be naturally
280 * aligned on roundup_power_of_two(size).
f8de50eb
KA
281 */
282struct iova *
283alloc_iova(struct iova_domain *iovad, unsigned long size,
f76aec76
KA
284 unsigned long limit_pfn,
285 bool size_aligned)
f8de50eb 286{
f8de50eb
KA
287 struct iova *new_iova;
288 int ret;
289
290 new_iova = alloc_iova_mem();
291 if (!new_iova)
292 return NULL;
293
757c370f 294 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
ddf02886 295 new_iova, size_aligned);
f8de50eb
KA
296
297 if (ret) {
f8de50eb
KA
298 free_iova_mem(new_iova);
299 return NULL;
300 }
301
f8de50eb
KA
302 return new_iova;
303}
9b41760b 304EXPORT_SYMBOL_GPL(alloc_iova);
f8de50eb 305
9257b4a2
OP
306static struct iova *
307private_find_iova(struct iova_domain *iovad, unsigned long pfn)
f8de50eb 308{
9257b4a2
OP
309 struct rb_node *node = iovad->rbroot.rb_node;
310
311 assert_spin_locked(&iovad->iova_rbtree_lock);
f8de50eb 312
f8de50eb 313 while (node) {
eba484b5 314 struct iova *iova = rb_entry(node, struct iova, node);
f8de50eb 315
f8de50eb
KA
316 if (pfn < iova->pfn_lo)
317 node = node->rb_left;
2070f940 318 else if (pfn > iova->pfn_hi)
f8de50eb 319 node = node->rb_right;
2070f940
ZL
320 else
321 return iova; /* pfn falls within iova's range */
f8de50eb
KA
322 }
323
f8de50eb
KA
324 return NULL;
325}
9257b4a2
OP
326
327static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
328{
329 assert_spin_locked(&iovad->iova_rbtree_lock);
330 __cached_rbnode_delete_update(iovad, iova);
331 rb_erase(&iova->node, &iovad->rbroot);
332 free_iova_mem(iova);
333}
334
335/**
336 * find_iova - finds an iova for a given pfn
337 * @iovad: - iova domain in question.
338 * @pfn: - page frame number
339 * This function finds and returns an iova belonging to the
340 * given doamin which matches the given pfn.
341 */
342struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
343{
344 unsigned long flags;
345 struct iova *iova;
346
347 /* Take the lock so that no other thread is manipulating the rbtree */
348 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
349 iova = private_find_iova(iovad, pfn);
350 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
351 return iova;
352}
9b41760b 353EXPORT_SYMBOL_GPL(find_iova);
f8de50eb
KA
354
355/**
356 * __free_iova - frees the given iova
357 * @iovad: iova domain in question.
358 * @iova: iova in question.
359 * Frees the given iova belonging to the giving domain
360 */
361void
362__free_iova(struct iova_domain *iovad, struct iova *iova)
363{
364 unsigned long flags;
365
366 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
9257b4a2 367 private_free_iova(iovad, iova);
f8de50eb 368 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
f8de50eb 369}
9b41760b 370EXPORT_SYMBOL_GPL(__free_iova);
f8de50eb
KA
371
372/**
373 * free_iova - finds and frees the iova for a given pfn
374 * @iovad: - iova domain in question.
375 * @pfn: - pfn that is allocated previously
376 * This functions finds an iova for a given pfn and then
377 * frees the iova from that domain.
378 */
379void
380free_iova(struct iova_domain *iovad, unsigned long pfn)
381{
382 struct iova *iova = find_iova(iovad, pfn);
733cac2a 383
f8de50eb
KA
384 if (iova)
385 __free_iova(iovad, iova);
386
387}
9b41760b 388EXPORT_SYMBOL_GPL(free_iova);
f8de50eb 389
9257b4a2
OP
390/**
391 * alloc_iova_fast - allocates an iova from rcache
392 * @iovad: - iova domain in question
393 * @size: - size of page frames to allocate
394 * @limit_pfn: - max limit address
538d5b33 395 * @flush_rcache: - set to flush rcache on regular allocation failure
9257b4a2 396 * This function tries to satisfy an iova allocation from the rcache,
538d5b33
TN
397 * and falls back to regular allocation on failure. If regular allocation
398 * fails too and the flush_rcache flag is set then the rcache will be flushed.
9257b4a2
OP
399*/
400unsigned long
401alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
538d5b33 402 unsigned long limit_pfn, bool flush_rcache)
9257b4a2 403{
9257b4a2
OP
404 unsigned long iova_pfn;
405 struct iova *new_iova;
406
b826ee9a 407 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
9257b4a2
OP
408 if (iova_pfn)
409 return iova_pfn;
410
411retry:
412 new_iova = alloc_iova(iovad, size, limit_pfn, true);
413 if (!new_iova) {
414 unsigned int cpu;
415
538d5b33 416 if (!flush_rcache)
9257b4a2
OP
417 return 0;
418
419 /* Try replenishing IOVAs by flushing rcache. */
538d5b33 420 flush_rcache = false;
9257b4a2
OP
421 for_each_online_cpu(cpu)
422 free_cpu_cached_iovas(cpu, iovad);
423 goto retry;
424 }
425
426 return new_iova->pfn_lo;
427}
428EXPORT_SYMBOL_GPL(alloc_iova_fast);
429
430/**
431 * free_iova_fast - free iova pfn range into rcache
432 * @iovad: - iova domain in question.
433 * @pfn: - pfn that is allocated previously
434 * @size: - # of pages in range
435 * This functions frees an iova range by trying to put it into the rcache,
436 * falling back to regular iova deallocation via free_iova() if this fails.
437 */
438void
439free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
440{
441 if (iova_rcache_insert(iovad, pfn, size))
442 return;
443
444 free_iova(iovad, pfn);
445}
446EXPORT_SYMBOL_GPL(free_iova_fast);
447
19282101
JR
448#define fq_ring_for_each(i, fq) \
449 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
450
451static inline bool fq_full(struct iova_fq *fq)
452{
8109c2a2 453 assert_spin_locked(&fq->lock);
19282101
JR
454 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
455}
456
457static inline unsigned fq_ring_add(struct iova_fq *fq)
458{
459 unsigned idx = fq->tail;
460
8109c2a2
JR
461 assert_spin_locked(&fq->lock);
462
19282101
JR
463 fq->tail = (idx + 1) % IOVA_FQ_SIZE;
464
465 return idx;
466}
467
468static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
469{
fb418dab 470 u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
19282101
JR
471 unsigned idx;
472
8109c2a2
JR
473 assert_spin_locked(&fq->lock);
474
19282101
JR
475 fq_ring_for_each(idx, fq) {
476
fb418dab
JR
477 if (fq->entries[idx].counter >= counter)
478 break;
479
19282101
JR
480 if (iovad->entry_dtor)
481 iovad->entry_dtor(fq->entries[idx].data);
482
483 free_iova_fast(iovad,
484 fq->entries[idx].iova_pfn,
485 fq->entries[idx].pages);
fb418dab
JR
486
487 fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
19282101 488 }
fb418dab 489}
19282101 490
fb418dab
JR
491static void iova_domain_flush(struct iova_domain *iovad)
492{
493 atomic64_inc(&iovad->fq_flush_start_cnt);
494 iovad->flush_cb(iovad);
495 atomic64_inc(&iovad->fq_flush_finish_cnt);
19282101
JR
496}
497
498static void fq_destroy_all_entries(struct iova_domain *iovad)
499{
500 int cpu;
501
502 /*
503 * This code runs when the iova_domain is being detroyed, so don't
504 * bother to free iovas, just call the entry_dtor on all remaining
505 * entries.
506 */
507 if (!iovad->entry_dtor)
508 return;
509
510 for_each_possible_cpu(cpu) {
511 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
512 int idx;
513
514 fq_ring_for_each(idx, fq)
515 iovad->entry_dtor(fq->entries[idx].data);
516 }
517}
518
e99e88a9 519static void fq_flush_timeout(struct timer_list *t)
9a005a80 520{
e99e88a9 521 struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
9a005a80
JR
522 int cpu;
523
524 atomic_set(&iovad->fq_timer_on, 0);
525 iova_domain_flush(iovad);
526
527 for_each_possible_cpu(cpu) {
528 unsigned long flags;
529 struct iova_fq *fq;
530
531 fq = per_cpu_ptr(iovad->fq, cpu);
532 spin_lock_irqsave(&fq->lock, flags);
533 fq_ring_free(iovad, fq);
534 spin_unlock_irqrestore(&fq->lock, flags);
535 }
536}
537
19282101
JR
538void queue_iova(struct iova_domain *iovad,
539 unsigned long pfn, unsigned long pages,
540 unsigned long data)
541{
94e2cc4d 542 struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
8109c2a2 543 unsigned long flags;
19282101
JR
544 unsigned idx;
545
8109c2a2
JR
546 spin_lock_irqsave(&fq->lock, flags);
547
fb418dab
JR
548 /*
549 * First remove all entries from the flush queue that have already been
550 * flushed out on another CPU. This makes the fq_full() check below less
551 * likely to be true.
552 */
553 fq_ring_free(iovad, fq);
554
19282101 555 if (fq_full(fq)) {
fb418dab 556 iova_domain_flush(iovad);
19282101
JR
557 fq_ring_free(iovad, fq);
558 }
559
560 idx = fq_ring_add(fq);
561
562 fq->entries[idx].iova_pfn = pfn;
563 fq->entries[idx].pages = pages;
564 fq->entries[idx].data = data;
fb418dab 565 fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt);
19282101 566
8109c2a2 567 spin_unlock_irqrestore(&fq->lock, flags);
9a005a80
JR
568
569 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
570 mod_timer(&iovad->fq_timer,
571 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
19282101
JR
572}
573EXPORT_SYMBOL_GPL(queue_iova);
574
f8de50eb
KA
575/**
576 * put_iova_domain - destroys the iova doamin
577 * @iovad: - iova domain in question.
578 * All the iova's in that domain are destroyed.
579 */
580void put_iova_domain(struct iova_domain *iovad)
581{
7595dc58 582 struct iova *iova, *tmp;
f8de50eb 583
42f87e71 584 free_iova_flush_queue(iovad);
9257b4a2 585 free_iova_rcaches(iovad);
7595dc58 586 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
f8de50eb 587 free_iova_mem(iova);
f8de50eb 588}
9b41760b 589EXPORT_SYMBOL_GPL(put_iova_domain);
f8de50eb
KA
590
591static int
592__is_range_overlap(struct rb_node *node,
593 unsigned long pfn_lo, unsigned long pfn_hi)
594{
eba484b5 595 struct iova *iova = rb_entry(node, struct iova, node);
f8de50eb
KA
596
597 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
598 return 1;
599 return 0;
600}
601
75f05569
JL
602static inline struct iova *
603alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
604{
605 struct iova *iova;
606
607 iova = alloc_iova_mem();
608 if (iova) {
609 iova->pfn_lo = pfn_lo;
610 iova->pfn_hi = pfn_hi;
611 }
612
613 return iova;
614}
615
f8de50eb
KA
616static struct iova *
617__insert_new_range(struct iova_domain *iovad,
618 unsigned long pfn_lo, unsigned long pfn_hi)
619{
620 struct iova *iova;
621
75f05569
JL
622 iova = alloc_and_init_iova(pfn_lo, pfn_hi);
623 if (iova)
d751751a 624 iova_insert_rbtree(&iovad->rbroot, iova, NULL);
f8de50eb 625
f8de50eb
KA
626 return iova;
627}
628
629static void
630__adjust_overlap_range(struct iova *iova,
631 unsigned long *pfn_lo, unsigned long *pfn_hi)
632{
633 if (*pfn_lo < iova->pfn_lo)
634 iova->pfn_lo = *pfn_lo;
635 if (*pfn_hi > iova->pfn_hi)
636 *pfn_lo = iova->pfn_hi + 1;
637}
638
639/**
640 * reserve_iova - reserves an iova in the given range
641 * @iovad: - iova domain pointer
642 * @pfn_lo: - lower page frame address
643 * @pfn_hi:- higher pfn adderss
644 * This function allocates reserves the address range from pfn_lo to pfn_hi so
645 * that this address is not dished out as part of alloc_iova.
646 */
647struct iova *
648reserve_iova(struct iova_domain *iovad,
649 unsigned long pfn_lo, unsigned long pfn_hi)
650{
651 struct rb_node *node;
652 unsigned long flags;
653 struct iova *iova;
654 unsigned int overlap = 0;
655
bb68b2fb
RM
656 /* Don't allow nonsensical pfns */
657 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
658 return NULL;
659
3d39cecc 660 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
f8de50eb
KA
661 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
662 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
eba484b5 663 iova = rb_entry(node, struct iova, node);
f8de50eb
KA
664 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
665 if ((pfn_lo >= iova->pfn_lo) &&
666 (pfn_hi <= iova->pfn_hi))
667 goto finish;
668 overlap = 1;
669
670 } else if (overlap)
671 break;
672 }
673
25985edc 674 /* We are here either because this is the first reserver node
f8de50eb
KA
675 * or need to insert remaining non overlap addr range
676 */
677 iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
678finish:
679
3d39cecc 680 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
f8de50eb
KA
681 return iova;
682}
9b41760b 683EXPORT_SYMBOL_GPL(reserve_iova);
f8de50eb
KA
684
685/**
686 * copy_reserved_iova - copies the reserved between domains
687 * @from: - source doamin from where to copy
688 * @to: - destination domin where to copy
689 * This function copies reserved iova's from one doamin to
690 * other.
691 */
692void
693copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
694{
695 unsigned long flags;
696 struct rb_node *node;
697
3d39cecc 698 spin_lock_irqsave(&from->iova_rbtree_lock, flags);
f8de50eb 699 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
eba484b5 700 struct iova *iova = rb_entry(node, struct iova, node);
f8de50eb 701 struct iova *new_iova;
733cac2a 702
abbb8a09
RM
703 if (iova->pfn_lo == IOVA_ANCHOR)
704 continue;
705
f8de50eb
KA
706 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
707 if (!new_iova)
708 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
709 iova->pfn_lo, iova->pfn_lo);
710 }
3d39cecc 711 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
f8de50eb 712}
9b41760b 713EXPORT_SYMBOL_GPL(copy_reserved_iova);
75f05569
JL
714
715struct iova *
716split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
717 unsigned long pfn_lo, unsigned long pfn_hi)
718{
719 unsigned long flags;
720 struct iova *prev = NULL, *next = NULL;
721
722 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
723 if (iova->pfn_lo < pfn_lo) {
724 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
725 if (prev == NULL)
726 goto error;
727 }
728 if (iova->pfn_hi > pfn_hi) {
729 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
730 if (next == NULL)
731 goto error;
732 }
733
734 __cached_rbnode_delete_update(iovad, iova);
735 rb_erase(&iova->node, &iovad->rbroot);
736
737 if (prev) {
d751751a 738 iova_insert_rbtree(&iovad->rbroot, prev, NULL);
75f05569
JL
739 iova->pfn_lo = pfn_lo;
740 }
741 if (next) {
d751751a 742 iova_insert_rbtree(&iovad->rbroot, next, NULL);
75f05569
JL
743 iova->pfn_hi = pfn_hi;
744 }
745 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
746
747 return iova;
748
749error:
750 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
751 if (prev)
752 free_iova_mem(prev);
753 return NULL;
754}
15bbdec3 755
9257b4a2
OP
756/*
757 * Magazine caches for IOVA ranges. For an introduction to magazines,
758 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
759 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
760 * For simplicity, we use a static magazine size and don't implement the
761 * dynamic size tuning described in the paper.
762 */
763
764#define IOVA_MAG_SIZE 128
765
766struct iova_magazine {
767 unsigned long size;
768 unsigned long pfns[IOVA_MAG_SIZE];
769};
770
771struct iova_cpu_rcache {
772 spinlock_t lock;
773 struct iova_magazine *loaded;
774 struct iova_magazine *prev;
775};
776
777static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
778{
779 return kzalloc(sizeof(struct iova_magazine), flags);
780}
781
782static void iova_magazine_free(struct iova_magazine *mag)
783{
784 kfree(mag);
785}
786
787static void
788iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
789{
790 unsigned long flags;
791 int i;
792
793 if (!mag)
794 return;
795
796 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
797
798 for (i = 0 ; i < mag->size; ++i) {
799 struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
800
801 BUG_ON(!iova);
802 private_free_iova(iovad, iova);
803 }
804
805 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
806
807 mag->size = 0;
808}
809
810static bool iova_magazine_full(struct iova_magazine *mag)
811{
812 return (mag && mag->size == IOVA_MAG_SIZE);
813}
814
815static bool iova_magazine_empty(struct iova_magazine *mag)
816{
817 return (!mag || mag->size == 0);
818}
819
820static unsigned long iova_magazine_pop(struct iova_magazine *mag,
821 unsigned long limit_pfn)
822{
e8b19840
RM
823 int i;
824 unsigned long pfn;
825
9257b4a2
OP
826 BUG_ON(iova_magazine_empty(mag));
827
e8b19840
RM
828 /* Only fall back to the rbtree if we have no suitable pfns at all */
829 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
830 if (i == 0)
831 return 0;
832
833 /* Swap it to pop it */
834 pfn = mag->pfns[i];
835 mag->pfns[i] = mag->pfns[--mag->size];
9257b4a2 836
e8b19840 837 return pfn;
9257b4a2
OP
838}
839
840static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
841{
842 BUG_ON(iova_magazine_full(mag));
843
844 mag->pfns[mag->size++] = pfn;
845}
846
847static void init_iova_rcaches(struct iova_domain *iovad)
848{
849 struct iova_cpu_rcache *cpu_rcache;
850 struct iova_rcache *rcache;
851 unsigned int cpu;
852 int i;
853
854 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
855 rcache = &iovad->rcaches[i];
856 spin_lock_init(&rcache->lock);
857 rcache->depot_size = 0;
858 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
859 if (WARN_ON(!rcache->cpu_rcaches))
860 continue;
861 for_each_possible_cpu(cpu) {
862 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
863 spin_lock_init(&cpu_rcache->lock);
864 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
865 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
866 }
867 }
868}
869
870/*
871 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
872 * return true on success. Can fail if rcache is full and we can't free
873 * space, and free_iova() (our only caller) will then return the IOVA
874 * range to the rbtree instead.
875 */
876static bool __iova_rcache_insert(struct iova_domain *iovad,
877 struct iova_rcache *rcache,
878 unsigned long iova_pfn)
879{
880 struct iova_magazine *mag_to_free = NULL;
881 struct iova_cpu_rcache *cpu_rcache;
882 bool can_insert = false;
883 unsigned long flags;
884
aaffaa8a 885 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
9257b4a2
OP
886 spin_lock_irqsave(&cpu_rcache->lock, flags);
887
888 if (!iova_magazine_full(cpu_rcache->loaded)) {
889 can_insert = true;
890 } else if (!iova_magazine_full(cpu_rcache->prev)) {
891 swap(cpu_rcache->prev, cpu_rcache->loaded);
892 can_insert = true;
893 } else {
894 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
895
896 if (new_mag) {
897 spin_lock(&rcache->lock);
898 if (rcache->depot_size < MAX_GLOBAL_MAGS) {
899 rcache->depot[rcache->depot_size++] =
900 cpu_rcache->loaded;
901 } else {
902 mag_to_free = cpu_rcache->loaded;
903 }
904 spin_unlock(&rcache->lock);
905
906 cpu_rcache->loaded = new_mag;
907 can_insert = true;
908 }
909 }
910
911 if (can_insert)
912 iova_magazine_push(cpu_rcache->loaded, iova_pfn);
913
914 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
915
916 if (mag_to_free) {
917 iova_magazine_free_pfns(mag_to_free, iovad);
918 iova_magazine_free(mag_to_free);
919 }
920
921 return can_insert;
922}
923
924static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
925 unsigned long size)
926{
927 unsigned int log_size = order_base_2(size);
928
929 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
930 return false;
931
932 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
933}
934
935/*
936 * Caller wants to allocate a new IOVA range from 'rcache'. If we can
937 * satisfy the request, return a matching non-NULL range and remove
938 * it from the 'rcache'.
939 */
940static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
941 unsigned long limit_pfn)
942{
943 struct iova_cpu_rcache *cpu_rcache;
944 unsigned long iova_pfn = 0;
945 bool has_pfn = false;
946 unsigned long flags;
947
aaffaa8a 948 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
9257b4a2
OP
949 spin_lock_irqsave(&cpu_rcache->lock, flags);
950
951 if (!iova_magazine_empty(cpu_rcache->loaded)) {
952 has_pfn = true;
953 } else if (!iova_magazine_empty(cpu_rcache->prev)) {
954 swap(cpu_rcache->prev, cpu_rcache->loaded);
955 has_pfn = true;
956 } else {
957 spin_lock(&rcache->lock);
958 if (rcache->depot_size > 0) {
959 iova_magazine_free(cpu_rcache->loaded);
960 cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
961 has_pfn = true;
962 }
963 spin_unlock(&rcache->lock);
964 }
965
966 if (has_pfn)
967 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
968
969 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
970
971 return iova_pfn;
972}
973
974/*
975 * Try to satisfy IOVA allocation range from rcache. Fail if requested
976 * size is too big or the DMA limit we are given isn't satisfied by the
977 * top element in the magazine.
978 */
979static unsigned long iova_rcache_get(struct iova_domain *iovad,
980 unsigned long size,
981 unsigned long limit_pfn)
982{
983 unsigned int log_size = order_base_2(size);
984
985 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
986 return 0;
987
b826ee9a 988 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
9257b4a2
OP
989}
990
9257b4a2
OP
991/*
992 * free rcache data structures.
993 */
994static void free_iova_rcaches(struct iova_domain *iovad)
995{
996 struct iova_rcache *rcache;
7595dc58 997 struct iova_cpu_rcache *cpu_rcache;
9257b4a2
OP
998 unsigned int cpu;
999 int i, j;
1000
1001 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1002 rcache = &iovad->rcaches[i];
7595dc58
RM
1003 for_each_possible_cpu(cpu) {
1004 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1005 iova_magazine_free(cpu_rcache->loaded);
1006 iova_magazine_free(cpu_rcache->prev);
1007 }
9257b4a2 1008 free_percpu(rcache->cpu_rcaches);
7595dc58 1009 for (j = 0; j < rcache->depot_size; ++j)
9257b4a2 1010 iova_magazine_free(rcache->depot[j]);
9257b4a2
OP
1011 }
1012}
1013
1014/*
1015 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1016 */
1017void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1018{
1019 struct iova_cpu_rcache *cpu_rcache;
1020 struct iova_rcache *rcache;
1021 unsigned long flags;
1022 int i;
1023
1024 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1025 rcache = &iovad->rcaches[i];
1026 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1027 spin_lock_irqsave(&cpu_rcache->lock, flags);
1028 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1029 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1030 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1031 }
1032}
1033
15bbdec3
SA
1034MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1035MODULE_LICENSE("GPL");