Commit | Line | Data |
---|---|---|
3b20eb23 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
f8de50eb | 2 | /* |
a15a519e | 3 | * Copyright © 2006-2009, Intel Corporation. |
f8de50eb | 4 | * |
98bcef56 | 5 | * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
f8de50eb KA |
6 | */ |
7 | ||
38717946 | 8 | #include <linux/iova.h> |
15bbdec3 | 9 | #include <linux/module.h> |
85b45456 | 10 | #include <linux/slab.h> |
9257b4a2 OP |
11 | #include <linux/smp.h> |
12 | #include <linux/bitops.h> | |
aaffaa8a | 13 | #include <linux/cpu.h> |
9257b4a2 | 14 | |
bb68b2fb RM |
15 | /* The anchor node sits above the top of the usable address space */ |
16 | #define IOVA_ANCHOR ~0UL | |
17 | ||
32e92d9f JG |
18 | #define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ |
19 | ||
9257b4a2 OP |
20 | static bool iova_rcache_insert(struct iova_domain *iovad, |
21 | unsigned long pfn, | |
22 | unsigned long size); | |
23 | static unsigned long iova_rcache_get(struct iova_domain *iovad, | |
24 | unsigned long size, | |
25 | unsigned long limit_pfn); | |
149448b3 | 26 | static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); |
9257b4a2 | 27 | static void free_iova_rcaches(struct iova_domain *iovad); |
f598a497 | 28 | |
6d9870b7 JG |
29 | unsigned long iova_rcache_range(void) |
30 | { | |
31 | return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); | |
32 | } | |
33 | ||
f598a497 JG |
34 | static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) |
35 | { | |
36 | struct iova_domain *iovad; | |
37 | ||
38 | iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); | |
39 | ||
40 | free_cpu_cached_iovas(cpu, iovad); | |
41 | return 0; | |
42 | } | |
43 | ||
6fa3525b | 44 | static void free_global_cached_iovas(struct iova_domain *iovad); |
85b45456 | 45 | |
7ae31cec RM |
46 | static struct iova *to_iova(struct rb_node *node) |
47 | { | |
48 | return rb_entry(node, struct iova, node); | |
49 | } | |
50 | ||
f8de50eb | 51 | void |
0fb5fe87 | 52 | init_iova_domain(struct iova_domain *iovad, unsigned long granule, |
aa3ac946 | 53 | unsigned long start_pfn) |
f8de50eb | 54 | { |
0fb5fe87 RM |
55 | /* |
56 | * IOVA granularity will normally be equal to the smallest | |
57 | * supported IOMMU page size; both *must* be capable of | |
58 | * representing individual CPU pages exactly. | |
59 | */ | |
60 | BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); | |
61 | ||
f8de50eb KA |
62 | spin_lock_init(&iovad->iova_rbtree_lock); |
63 | iovad->rbroot = RB_ROOT; | |
973f5fbe RM |
64 | iovad->cached_node = &iovad->anchor.node; |
65 | iovad->cached32_node = &iovad->anchor.node; | |
0fb5fe87 | 66 | iovad->granule = granule; |
1b722500 | 67 | iovad->start_pfn = start_pfn; |
aa3ac946 | 68 | iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); |
bee60e94 | 69 | iovad->max32_alloc_size = iovad->dma_32bit_pfn; |
bb68b2fb RM |
70 | iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; |
71 | rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); | |
72 | rb_insert_color(&iovad->anchor.node, &iovad->rbroot); | |
f8de50eb | 73 | } |
9b41760b | 74 | EXPORT_SYMBOL_GPL(init_iova_domain); |
f8de50eb KA |
75 | |
76 | static struct rb_node * | |
973f5fbe | 77 | __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) |
f8de50eb | 78 | { |
973f5fbe RM |
79 | if (limit_pfn <= iovad->dma_32bit_pfn) |
80 | return iovad->cached32_node; | |
e60aa7b5 | 81 | |
973f5fbe | 82 | return iovad->cached_node; |
f8de50eb KA |
83 | } |
84 | ||
85 | static void | |
e60aa7b5 | 86 | __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) |
f8de50eb | 87 | { |
e60aa7b5 RM |
88 | if (new->pfn_hi < iovad->dma_32bit_pfn) |
89 | iovad->cached32_node = &new->node; | |
90 | else | |
91 | iovad->cached_node = &new->node; | |
f8de50eb KA |
92 | } |
93 | ||
94 | static void | |
95 | __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) | |
96 | { | |
97 | struct iova *cached_iova; | |
f8de50eb | 98 | |
7ae31cec | 99 | cached_iova = to_iova(iovad->cached32_node); |
9eed17d3 CW |
100 | if (free == cached_iova || |
101 | (free->pfn_hi < iovad->dma_32bit_pfn && | |
5b61343b | 102 | free->pfn_lo >= cached_iova->pfn_lo)) |
e60aa7b5 | 103 | iovad->cached32_node = rb_next(&free->node); |
5b61343b RM |
104 | |
105 | if (free->pfn_lo < iovad->dma_32bit_pfn) | |
bee60e94 | 106 | iovad->max32_alloc_size = iovad->dma_32bit_pfn; |
e60aa7b5 | 107 | |
7ae31cec | 108 | cached_iova = to_iova(iovad->cached_node); |
973f5fbe | 109 | if (free->pfn_lo >= cached_iova->pfn_lo) |
e60aa7b5 | 110 | iovad->cached_node = rb_next(&free->node); |
f8de50eb KA |
111 | } |
112 | ||
371d7955 RM |
113 | static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn) |
114 | { | |
115 | struct rb_node *node, *next; | |
116 | /* | |
117 | * Ideally what we'd like to judge here is whether limit_pfn is close | |
118 | * enough to the highest-allocated IOVA that starting the allocation | |
119 | * walk from the anchor node will be quicker than this initial work to | |
120 | * find an exact starting point (especially if that ends up being the | |
121 | * anchor node anyway). This is an incredibly crude approximation which | |
122 | * only really helps the most likely case, but is at least trivially easy. | |
123 | */ | |
124 | if (limit_pfn > iovad->dma_32bit_pfn) | |
125 | return &iovad->anchor.node; | |
126 | ||
127 | node = iovad->rbroot.rb_node; | |
128 | while (to_iova(node)->pfn_hi < limit_pfn) | |
129 | node = node->rb_right; | |
130 | ||
131 | search_left: | |
132 | while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn) | |
133 | node = node->rb_left; | |
134 | ||
135 | if (!node->rb_left) | |
136 | return node; | |
137 | ||
138 | next = node->rb_left; | |
139 | while (next->rb_right) { | |
140 | next = next->rb_right; | |
141 | if (to_iova(next)->pfn_lo >= limit_pfn) { | |
142 | node = next; | |
143 | goto search_left; | |
144 | } | |
145 | } | |
146 | ||
147 | return node; | |
148 | } | |
149 | ||
d751751a MS |
150 | /* Insert the iova into domain rbtree by holding writer lock */ |
151 | static void | |
152 | iova_insert_rbtree(struct rb_root *root, struct iova *iova, | |
153 | struct rb_node *start) | |
154 | { | |
155 | struct rb_node **new, *parent = NULL; | |
156 | ||
157 | new = (start) ? &start : &(root->rb_node); | |
158 | /* Figure out where to put new node */ | |
159 | while (*new) { | |
7ae31cec | 160 | struct iova *this = to_iova(*new); |
d751751a MS |
161 | |
162 | parent = *new; | |
163 | ||
164 | if (iova->pfn_lo < this->pfn_lo) | |
165 | new = &((*new)->rb_left); | |
166 | else if (iova->pfn_lo > this->pfn_lo) | |
167 | new = &((*new)->rb_right); | |
168 | else { | |
169 | WARN_ON(1); /* this should not happen */ | |
170 | return; | |
171 | } | |
172 | } | |
173 | /* Add new node and rebalance tree. */ | |
174 | rb_link_node(&iova->node, parent, new); | |
175 | rb_insert_color(&iova->node, root); | |
176 | } | |
177 | ||
ddf02886 | 178 | static int __alloc_and_insert_iova_range(struct iova_domain *iovad, |
179 | unsigned long size, unsigned long limit_pfn, | |
180 | struct iova *new, bool size_aligned) | |
f8de50eb | 181 | { |
973f5fbe RM |
182 | struct rb_node *curr, *prev; |
183 | struct iova *curr_iova; | |
f8de50eb | 184 | unsigned long flags; |
4e89dce7 | 185 | unsigned long new_pfn, retry_pfn; |
086c83ac | 186 | unsigned long align_mask = ~0UL; |
4e89dce7 | 187 | unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn; |
086c83ac ZL |
188 | |
189 | if (size_aligned) | |
190 | align_mask <<= fls_long(size - 1); | |
f8de50eb KA |
191 | |
192 | /* Walk the tree backwards */ | |
193 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | |
bee60e94 GK |
194 | if (limit_pfn <= iovad->dma_32bit_pfn && |
195 | size >= iovad->max32_alloc_size) | |
196 | goto iova32_full; | |
197 | ||
973f5fbe | 198 | curr = __get_cached_rbnode(iovad, limit_pfn); |
7ae31cec | 199 | curr_iova = to_iova(curr); |
dcdb3ba7 | 200 | retry_pfn = curr_iova->pfn_hi; |
4e89dce7 VJ |
201 | |
202 | retry: | |
973f5fbe | 203 | do { |
4e89dce7 VJ |
204 | high_pfn = min(high_pfn, curr_iova->pfn_lo); |
205 | new_pfn = (high_pfn - size) & align_mask; | |
ddf02886 | 206 | prev = curr; |
f8de50eb | 207 | curr = rb_prev(curr); |
7ae31cec | 208 | curr_iova = to_iova(curr); |
4e89dce7 VJ |
209 | } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn); |
210 | ||
211 | if (high_pfn < size || new_pfn < low_pfn) { | |
212 | if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { | |
213 | high_pfn = limit_pfn; | |
dcdb3ba7 | 214 | low_pfn = retry_pfn + 1; |
371d7955 | 215 | curr = iova_find_limit(iovad, limit_pfn); |
7ae31cec | 216 | curr_iova = to_iova(curr); |
4e89dce7 VJ |
217 | goto retry; |
218 | } | |
80ef4464 | 219 | iovad->max32_alloc_size = size; |
bee60e94 | 220 | goto iova32_full; |
80ef4464 | 221 | } |
f76aec76 KA |
222 | |
223 | /* pfn_lo will point to size aligned address if size_aligned is set */ | |
086c83ac | 224 | new->pfn_lo = new_pfn; |
f76aec76 | 225 | new->pfn_hi = new->pfn_lo + size - 1; |
f8de50eb | 226 | |
d751751a MS |
227 | /* If we have 'prev', it's a valid place to start the insertion. */ |
228 | iova_insert_rbtree(&iovad->rbroot, new, prev); | |
e60aa7b5 | 229 | __cached_rbnode_insert_update(iovad, new); |
ddf02886 | 230 | |
f8de50eb KA |
231 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); |
232 | return 0; | |
bee60e94 GK |
233 | |
234 | iova32_full: | |
bee60e94 GK |
235 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); |
236 | return -ENOMEM; | |
f8de50eb KA |
237 | } |
238 | ||
ae1ff3d6 SA |
239 | static struct kmem_cache *iova_cache; |
240 | static unsigned int iova_cache_users; | |
241 | static DEFINE_MUTEX(iova_cache_mutex); | |
242 | ||
51b70b81 | 243 | static struct iova *alloc_iova_mem(void) |
ae1ff3d6 | 244 | { |
944c9175 | 245 | return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN); |
ae1ff3d6 | 246 | } |
ae1ff3d6 | 247 | |
176cfc18 | 248 | static void free_iova_mem(struct iova *iova) |
ae1ff3d6 | 249 | { |
bb68b2fb RM |
250 | if (iova->pfn_lo != IOVA_ANCHOR) |
251 | kmem_cache_free(iova_cache, iova); | |
ae1ff3d6 | 252 | } |
ae1ff3d6 SA |
253 | |
254 | int iova_cache_get(void) | |
255 | { | |
256 | mutex_lock(&iova_cache_mutex); | |
257 | if (!iova_cache_users) { | |
f598a497 JG |
258 | int ret; |
259 | ||
260 | ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL, | |
261 | iova_cpuhp_dead); | |
262 | if (ret) { | |
263 | mutex_unlock(&iova_cache_mutex); | |
264 | pr_err("Couldn't register cpuhp handler\n"); | |
265 | return ret; | |
266 | } | |
267 | ||
ae1ff3d6 SA |
268 | iova_cache = kmem_cache_create( |
269 | "iommu_iova", sizeof(struct iova), 0, | |
270 | SLAB_HWCACHE_ALIGN, NULL); | |
271 | if (!iova_cache) { | |
f598a497 | 272 | cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); |
ae1ff3d6 | 273 | mutex_unlock(&iova_cache_mutex); |
3a0ce12e | 274 | pr_err("Couldn't create iova cache\n"); |
ae1ff3d6 SA |
275 | return -ENOMEM; |
276 | } | |
277 | } | |
278 | ||
279 | iova_cache_users++; | |
280 | mutex_unlock(&iova_cache_mutex); | |
281 | ||
282 | return 0; | |
283 | } | |
9b41760b | 284 | EXPORT_SYMBOL_GPL(iova_cache_get); |
ae1ff3d6 SA |
285 | |
286 | void iova_cache_put(void) | |
287 | { | |
288 | mutex_lock(&iova_cache_mutex); | |
289 | if (WARN_ON(!iova_cache_users)) { | |
290 | mutex_unlock(&iova_cache_mutex); | |
291 | return; | |
292 | } | |
293 | iova_cache_users--; | |
f598a497 JG |
294 | if (!iova_cache_users) { |
295 | cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); | |
ae1ff3d6 | 296 | kmem_cache_destroy(iova_cache); |
f598a497 | 297 | } |
ae1ff3d6 SA |
298 | mutex_unlock(&iova_cache_mutex); |
299 | } | |
9b41760b | 300 | EXPORT_SYMBOL_GPL(iova_cache_put); |
ae1ff3d6 | 301 | |
f8de50eb KA |
302 | /** |
303 | * alloc_iova - allocates an iova | |
07db0409 MI |
304 | * @iovad: - iova domain in question |
305 | * @size: - size of page frames to allocate | |
306 | * @limit_pfn: - max limit address | |
307 | * @size_aligned: - set if size_aligned address range is required | |
1b722500 RM |
308 | * This function allocates an iova in the range iovad->start_pfn to limit_pfn, |
309 | * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned | |
f76aec76 KA |
310 | * flag is set then the allocated address iova->pfn_lo will be naturally |
311 | * aligned on roundup_power_of_two(size). | |
f8de50eb KA |
312 | */ |
313 | struct iova * | |
314 | alloc_iova(struct iova_domain *iovad, unsigned long size, | |
f76aec76 KA |
315 | unsigned long limit_pfn, |
316 | bool size_aligned) | |
f8de50eb | 317 | { |
f8de50eb KA |
318 | struct iova *new_iova; |
319 | int ret; | |
320 | ||
321 | new_iova = alloc_iova_mem(); | |
322 | if (!new_iova) | |
323 | return NULL; | |
324 | ||
757c370f | 325 | ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, |
ddf02886 | 326 | new_iova, size_aligned); |
f8de50eb KA |
327 | |
328 | if (ret) { | |
f8de50eb KA |
329 | free_iova_mem(new_iova); |
330 | return NULL; | |
331 | } | |
332 | ||
f8de50eb KA |
333 | return new_iova; |
334 | } | |
9b41760b | 335 | EXPORT_SYMBOL_GPL(alloc_iova); |
f8de50eb | 336 | |
9257b4a2 OP |
337 | static struct iova * |
338 | private_find_iova(struct iova_domain *iovad, unsigned long pfn) | |
f8de50eb | 339 | { |
9257b4a2 OP |
340 | struct rb_node *node = iovad->rbroot.rb_node; |
341 | ||
342 | assert_spin_locked(&iovad->iova_rbtree_lock); | |
f8de50eb | 343 | |
f8de50eb | 344 | while (node) { |
7ae31cec | 345 | struct iova *iova = to_iova(node); |
f8de50eb | 346 | |
f8de50eb KA |
347 | if (pfn < iova->pfn_lo) |
348 | node = node->rb_left; | |
2070f940 | 349 | else if (pfn > iova->pfn_hi) |
f8de50eb | 350 | node = node->rb_right; |
2070f940 ZL |
351 | else |
352 | return iova; /* pfn falls within iova's range */ | |
f8de50eb KA |
353 | } |
354 | ||
f8de50eb KA |
355 | return NULL; |
356 | } | |
9257b4a2 | 357 | |
7978724f | 358 | static void remove_iova(struct iova_domain *iovad, struct iova *iova) |
9257b4a2 OP |
359 | { |
360 | assert_spin_locked(&iovad->iova_rbtree_lock); | |
361 | __cached_rbnode_delete_update(iovad, iova); | |
362 | rb_erase(&iova->node, &iovad->rbroot); | |
9257b4a2 OP |
363 | } |
364 | ||
365 | /** | |
366 | * find_iova - finds an iova for a given pfn | |
367 | * @iovad: - iova domain in question. | |
368 | * @pfn: - page frame number | |
369 | * This function finds and returns an iova belonging to the | |
6775ae90 | 370 | * given domain which matches the given pfn. |
9257b4a2 OP |
371 | */ |
372 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) | |
373 | { | |
374 | unsigned long flags; | |
375 | struct iova *iova; | |
376 | ||
377 | /* Take the lock so that no other thread is manipulating the rbtree */ | |
378 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | |
379 | iova = private_find_iova(iovad, pfn); | |
380 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | |
381 | return iova; | |
382 | } | |
9b41760b | 383 | EXPORT_SYMBOL_GPL(find_iova); |
f8de50eb KA |
384 | |
385 | /** | |
386 | * __free_iova - frees the given iova | |
387 | * @iovad: iova domain in question. | |
388 | * @iova: iova in question. | |
389 | * Frees the given iova belonging to the giving domain | |
390 | */ | |
391 | void | |
392 | __free_iova(struct iova_domain *iovad, struct iova *iova) | |
393 | { | |
394 | unsigned long flags; | |
395 | ||
396 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); | |
7978724f | 397 | remove_iova(iovad, iova); |
f8de50eb | 398 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); |
7978724f | 399 | free_iova_mem(iova); |
f8de50eb | 400 | } |
9b41760b | 401 | EXPORT_SYMBOL_GPL(__free_iova); |
f8de50eb KA |
402 | |
403 | /** | |
404 | * free_iova - finds and frees the iova for a given pfn | |
405 | * @iovad: - iova domain in question. | |
406 | * @pfn: - pfn that is allocated previously | |
407 | * This functions finds an iova for a given pfn and then | |
408 | * frees the iova from that domain. | |
409 | */ | |
410 | void | |
411 | free_iova(struct iova_domain *iovad, unsigned long pfn) | |
412 | { | |
3a651b3a CW |
413 | unsigned long flags; |
414 | struct iova *iova; | |
733cac2a | 415 | |
3a651b3a CW |
416 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
417 | iova = private_find_iova(iovad, pfn); | |
7978724f XC |
418 | if (!iova) { |
419 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | |
420 | return; | |
421 | } | |
422 | remove_iova(iovad, iova); | |
3a651b3a | 423 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); |
7978724f | 424 | free_iova_mem(iova); |
f8de50eb | 425 | } |
9b41760b | 426 | EXPORT_SYMBOL_GPL(free_iova); |
f8de50eb | 427 | |
9257b4a2 OP |
428 | /** |
429 | * alloc_iova_fast - allocates an iova from rcache | |
430 | * @iovad: - iova domain in question | |
431 | * @size: - size of page frames to allocate | |
432 | * @limit_pfn: - max limit address | |
538d5b33 | 433 | * @flush_rcache: - set to flush rcache on regular allocation failure |
9257b4a2 | 434 | * This function tries to satisfy an iova allocation from the rcache, |
538d5b33 TN |
435 | * and falls back to regular allocation on failure. If regular allocation |
436 | * fails too and the flush_rcache flag is set then the rcache will be flushed. | |
9257b4a2 OP |
437 | */ |
438 | unsigned long | |
439 | alloc_iova_fast(struct iova_domain *iovad, unsigned long size, | |
538d5b33 | 440 | unsigned long limit_pfn, bool flush_rcache) |
9257b4a2 | 441 | { |
9257b4a2 OP |
442 | unsigned long iova_pfn; |
443 | struct iova *new_iova; | |
444 | ||
972bf252 JG |
445 | /* |
446 | * Freeing non-power-of-two-sized allocations back into the IOVA caches | |
447 | * will come back to bite us badly, so we have to waste a bit of space | |
448 | * rounding up anything cacheable to make sure that can't happen. The | |
449 | * order of the unadjusted size will still match upon freeing. | |
450 | */ | |
451 | if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) | |
452 | size = roundup_pow_of_two(size); | |
453 | ||
b826ee9a | 454 | iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); |
9257b4a2 OP |
455 | if (iova_pfn) |
456 | return iova_pfn; | |
457 | ||
458 | retry: | |
459 | new_iova = alloc_iova(iovad, size, limit_pfn, true); | |
460 | if (!new_iova) { | |
461 | unsigned int cpu; | |
462 | ||
538d5b33 | 463 | if (!flush_rcache) |
9257b4a2 OP |
464 | return 0; |
465 | ||
466 | /* Try replenishing IOVAs by flushing rcache. */ | |
538d5b33 | 467 | flush_rcache = false; |
9257b4a2 OP |
468 | for_each_online_cpu(cpu) |
469 | free_cpu_cached_iovas(cpu, iovad); | |
6fa3525b | 470 | free_global_cached_iovas(iovad); |
9257b4a2 OP |
471 | goto retry; |
472 | } | |
473 | ||
474 | return new_iova->pfn_lo; | |
475 | } | |
a93a9626 | 476 | EXPORT_SYMBOL_GPL(alloc_iova_fast); |
9257b4a2 OP |
477 | |
478 | /** | |
479 | * free_iova_fast - free iova pfn range into rcache | |
480 | * @iovad: - iova domain in question. | |
481 | * @pfn: - pfn that is allocated previously | |
482 | * @size: - # of pages in range | |
483 | * This functions frees an iova range by trying to put it into the rcache, | |
484 | * falling back to regular iova deallocation via free_iova() if this fails. | |
485 | */ | |
486 | void | |
487 | free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) | |
488 | { | |
489 | if (iova_rcache_insert(iovad, pfn, size)) | |
490 | return; | |
491 | ||
492 | free_iova(iovad, pfn); | |
493 | } | |
a93a9626 | 494 | EXPORT_SYMBOL_GPL(free_iova_fast); |
9257b4a2 | 495 | |
32e92d9f JG |
496 | static void iova_domain_free_rcaches(struct iova_domain *iovad) |
497 | { | |
498 | cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, | |
499 | &iovad->cpuhp_dead); | |
500 | free_iova_rcaches(iovad); | |
501 | } | |
502 | ||
f8de50eb | 503 | /** |
6775ae90 | 504 | * put_iova_domain - destroys the iova domain |
f8de50eb KA |
505 | * @iovad: - iova domain in question. |
506 | * All the iova's in that domain are destroyed. | |
507 | */ | |
508 | void put_iova_domain(struct iova_domain *iovad) | |
509 | { | |
7595dc58 | 510 | struct iova *iova, *tmp; |
f8de50eb | 511 | |
32e92d9f JG |
512 | if (iovad->rcaches) |
513 | iova_domain_free_rcaches(iovad); | |
514 | ||
7595dc58 | 515 | rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) |
f8de50eb | 516 | free_iova_mem(iova); |
f8de50eb | 517 | } |
9b41760b | 518 | EXPORT_SYMBOL_GPL(put_iova_domain); |
f8de50eb KA |
519 | |
520 | static int | |
521 | __is_range_overlap(struct rb_node *node, | |
522 | unsigned long pfn_lo, unsigned long pfn_hi) | |
523 | { | |
7ae31cec | 524 | struct iova *iova = to_iova(node); |
f8de50eb KA |
525 | |
526 | if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) | |
527 | return 1; | |
528 | return 0; | |
529 | } | |
530 | ||
75f05569 JL |
531 | static inline struct iova * |
532 | alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) | |
533 | { | |
534 | struct iova *iova; | |
535 | ||
536 | iova = alloc_iova_mem(); | |
537 | if (iova) { | |
538 | iova->pfn_lo = pfn_lo; | |
539 | iova->pfn_hi = pfn_hi; | |
540 | } | |
541 | ||
542 | return iova; | |
543 | } | |
544 | ||
f8de50eb KA |
545 | static struct iova * |
546 | __insert_new_range(struct iova_domain *iovad, | |
547 | unsigned long pfn_lo, unsigned long pfn_hi) | |
548 | { | |
549 | struct iova *iova; | |
550 | ||
75f05569 JL |
551 | iova = alloc_and_init_iova(pfn_lo, pfn_hi); |
552 | if (iova) | |
d751751a | 553 | iova_insert_rbtree(&iovad->rbroot, iova, NULL); |
f8de50eb | 554 | |
f8de50eb KA |
555 | return iova; |
556 | } | |
557 | ||
558 | static void | |
559 | __adjust_overlap_range(struct iova *iova, | |
560 | unsigned long *pfn_lo, unsigned long *pfn_hi) | |
561 | { | |
562 | if (*pfn_lo < iova->pfn_lo) | |
563 | iova->pfn_lo = *pfn_lo; | |
564 | if (*pfn_hi > iova->pfn_hi) | |
565 | *pfn_lo = iova->pfn_hi + 1; | |
566 | } | |
567 | ||
568 | /** | |
569 | * reserve_iova - reserves an iova in the given range | |
570 | * @iovad: - iova domain pointer | |
571 | * @pfn_lo: - lower page frame address | |
572 | * @pfn_hi:- higher pfn adderss | |
573 | * This function allocates reserves the address range from pfn_lo to pfn_hi so | |
574 | * that this address is not dished out as part of alloc_iova. | |
575 | */ | |
576 | struct iova * | |
577 | reserve_iova(struct iova_domain *iovad, | |
578 | unsigned long pfn_lo, unsigned long pfn_hi) | |
579 | { | |
580 | struct rb_node *node; | |
581 | unsigned long flags; | |
582 | struct iova *iova; | |
583 | unsigned int overlap = 0; | |
584 | ||
bb68b2fb RM |
585 | /* Don't allow nonsensical pfns */ |
586 | if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) | |
587 | return NULL; | |
588 | ||
3d39cecc | 589 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
f8de50eb KA |
590 | for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { |
591 | if (__is_range_overlap(node, pfn_lo, pfn_hi)) { | |
7ae31cec | 592 | iova = to_iova(node); |
f8de50eb KA |
593 | __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); |
594 | if ((pfn_lo >= iova->pfn_lo) && | |
595 | (pfn_hi <= iova->pfn_hi)) | |
596 | goto finish; | |
597 | overlap = 1; | |
598 | ||
599 | } else if (overlap) | |
600 | break; | |
601 | } | |
602 | ||
25985edc | 603 | /* We are here either because this is the first reserver node |
f8de50eb KA |
604 | * or need to insert remaining non overlap addr range |
605 | */ | |
606 | iova = __insert_new_range(iovad, pfn_lo, pfn_hi); | |
607 | finish: | |
608 | ||
3d39cecc | 609 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); |
f8de50eb KA |
610 | return iova; |
611 | } | |
9b41760b | 612 | EXPORT_SYMBOL_GPL(reserve_iova); |
f8de50eb | 613 | |
9257b4a2 OP |
614 | /* |
615 | * Magazine caches for IOVA ranges. For an introduction to magazines, | |
616 | * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab | |
617 | * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. | |
618 | * For simplicity, we use a static magazine size and don't implement the | |
619 | * dynamic size tuning described in the paper. | |
620 | */ | |
621 | ||
b4c9bf17 FT |
622 | /* |
623 | * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to | |
624 | * assure size of 'iova_magazine' to be 1024 bytes, so that no memory | |
625 | * will be wasted. | |
626 | */ | |
627 | #define IOVA_MAG_SIZE 127 | |
32e92d9f | 628 | #define MAX_GLOBAL_MAGS 32 /* magazines per bin */ |
9257b4a2 OP |
629 | |
630 | struct iova_magazine { | |
631 | unsigned long size; | |
632 | unsigned long pfns[IOVA_MAG_SIZE]; | |
633 | }; | |
634 | ||
635 | struct iova_cpu_rcache { | |
636 | spinlock_t lock; | |
637 | struct iova_magazine *loaded; | |
638 | struct iova_magazine *prev; | |
639 | }; | |
640 | ||
32e92d9f JG |
641 | struct iova_rcache { |
642 | spinlock_t lock; | |
643 | unsigned long depot_size; | |
644 | struct iova_magazine *depot[MAX_GLOBAL_MAGS]; | |
645 | struct iova_cpu_rcache __percpu *cpu_rcaches; | |
646 | }; | |
647 | ||
9257b4a2 OP |
648 | static struct iova_magazine *iova_magazine_alloc(gfp_t flags) |
649 | { | |
5d62bacc ZL |
650 | struct iova_magazine *mag; |
651 | ||
652 | mag = kmalloc(sizeof(*mag), flags); | |
653 | if (mag) | |
654 | mag->size = 0; | |
655 | ||
656 | return mag; | |
9257b4a2 OP |
657 | } |
658 | ||
659 | static void iova_magazine_free(struct iova_magazine *mag) | |
660 | { | |
661 | kfree(mag); | |
662 | } | |
663 | ||
664 | static void | |
665 | iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) | |
666 | { | |
667 | unsigned long flags; | |
668 | int i; | |
669 | ||
9257b4a2 OP |
670 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
671 | ||
672 | for (i = 0 ; i < mag->size; ++i) { | |
673 | struct iova *iova = private_find_iova(iovad, mag->pfns[i]); | |
674 | ||
d3e3d2be RM |
675 | if (WARN_ON(!iova)) |
676 | continue; | |
677 | ||
7978724f XC |
678 | remove_iova(iovad, iova); |
679 | free_iova_mem(iova); | |
9257b4a2 OP |
680 | } |
681 | ||
682 | spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); | |
683 | ||
684 | mag->size = 0; | |
685 | } | |
686 | ||
687 | static bool iova_magazine_full(struct iova_magazine *mag) | |
688 | { | |
a390bde7 | 689 | return mag->size == IOVA_MAG_SIZE; |
9257b4a2 OP |
690 | } |
691 | ||
692 | static bool iova_magazine_empty(struct iova_magazine *mag) | |
693 | { | |
a390bde7 | 694 | return mag->size == 0; |
9257b4a2 OP |
695 | } |
696 | ||
697 | static unsigned long iova_magazine_pop(struct iova_magazine *mag, | |
698 | unsigned long limit_pfn) | |
699 | { | |
e8b19840 RM |
700 | int i; |
701 | unsigned long pfn; | |
702 | ||
e8b19840 RM |
703 | /* Only fall back to the rbtree if we have no suitable pfns at all */ |
704 | for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) | |
705 | if (i == 0) | |
706 | return 0; | |
707 | ||
708 | /* Swap it to pop it */ | |
709 | pfn = mag->pfns[i]; | |
710 | mag->pfns[i] = mag->pfns[--mag->size]; | |
9257b4a2 | 711 | |
e8b19840 | 712 | return pfn; |
9257b4a2 OP |
713 | } |
714 | ||
715 | static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) | |
716 | { | |
9257b4a2 OP |
717 | mag->pfns[mag->size++] = pfn; |
718 | } | |
719 | ||
32e92d9f | 720 | int iova_domain_init_rcaches(struct iova_domain *iovad) |
9257b4a2 | 721 | { |
9257b4a2 | 722 | unsigned int cpu; |
32e92d9f JG |
723 | int i, ret; |
724 | ||
725 | iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, | |
726 | sizeof(struct iova_rcache), | |
727 | GFP_KERNEL); | |
728 | if (!iovad->rcaches) | |
729 | return -ENOMEM; | |
9257b4a2 OP |
730 | |
731 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { | |
32e92d9f JG |
732 | struct iova_cpu_rcache *cpu_rcache; |
733 | struct iova_rcache *rcache; | |
734 | ||
9257b4a2 OP |
735 | rcache = &iovad->rcaches[i]; |
736 | spin_lock_init(&rcache->lock); | |
737 | rcache->depot_size = 0; | |
32e92d9f JG |
738 | rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), |
739 | cache_line_size()); | |
740 | if (!rcache->cpu_rcaches) { | |
741 | ret = -ENOMEM; | |
742 | goto out_err; | |
743 | } | |
9257b4a2 OP |
744 | for_each_possible_cpu(cpu) { |
745 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); | |
32e92d9f | 746 | |
9257b4a2 OP |
747 | spin_lock_init(&cpu_rcache->lock); |
748 | cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); | |
749 | cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); | |
32e92d9f JG |
750 | if (!cpu_rcache->loaded || !cpu_rcache->prev) { |
751 | ret = -ENOMEM; | |
752 | goto out_err; | |
753 | } | |
9257b4a2 OP |
754 | } |
755 | } | |
32e92d9f JG |
756 | |
757 | ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, | |
758 | &iovad->cpuhp_dead); | |
759 | if (ret) | |
760 | goto out_err; | |
761 | return 0; | |
762 | ||
763 | out_err: | |
764 | free_iova_rcaches(iovad); | |
765 | return ret; | |
9257b4a2 | 766 | } |
32e92d9f | 767 | EXPORT_SYMBOL_GPL(iova_domain_init_rcaches); |
9257b4a2 OP |
768 | |
769 | /* | |
770 | * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and | |
771 | * return true on success. Can fail if rcache is full and we can't free | |
772 | * space, and free_iova() (our only caller) will then return the IOVA | |
773 | * range to the rbtree instead. | |
774 | */ | |
775 | static bool __iova_rcache_insert(struct iova_domain *iovad, | |
776 | struct iova_rcache *rcache, | |
777 | unsigned long iova_pfn) | |
778 | { | |
779 | struct iova_magazine *mag_to_free = NULL; | |
780 | struct iova_cpu_rcache *cpu_rcache; | |
781 | bool can_insert = false; | |
782 | unsigned long flags; | |
783 | ||
aaffaa8a | 784 | cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); |
9257b4a2 OP |
785 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
786 | ||
787 | if (!iova_magazine_full(cpu_rcache->loaded)) { | |
788 | can_insert = true; | |
789 | } else if (!iova_magazine_full(cpu_rcache->prev)) { | |
790 | swap(cpu_rcache->prev, cpu_rcache->loaded); | |
791 | can_insert = true; | |
792 | } else { | |
793 | struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); | |
794 | ||
795 | if (new_mag) { | |
796 | spin_lock(&rcache->lock); | |
797 | if (rcache->depot_size < MAX_GLOBAL_MAGS) { | |
798 | rcache->depot[rcache->depot_size++] = | |
799 | cpu_rcache->loaded; | |
800 | } else { | |
801 | mag_to_free = cpu_rcache->loaded; | |
802 | } | |
803 | spin_unlock(&rcache->lock); | |
804 | ||
805 | cpu_rcache->loaded = new_mag; | |
806 | can_insert = true; | |
807 | } | |
808 | } | |
809 | ||
810 | if (can_insert) | |
811 | iova_magazine_push(cpu_rcache->loaded, iova_pfn); | |
812 | ||
813 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
814 | ||
815 | if (mag_to_free) { | |
816 | iova_magazine_free_pfns(mag_to_free, iovad); | |
817 | iova_magazine_free(mag_to_free); | |
818 | } | |
819 | ||
820 | return can_insert; | |
821 | } | |
822 | ||
823 | static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, | |
824 | unsigned long size) | |
825 | { | |
826 | unsigned int log_size = order_base_2(size); | |
827 | ||
828 | if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) | |
829 | return false; | |
830 | ||
831 | return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); | |
832 | } | |
833 | ||
834 | /* | |
835 | * Caller wants to allocate a new IOVA range from 'rcache'. If we can | |
836 | * satisfy the request, return a matching non-NULL range and remove | |
837 | * it from the 'rcache'. | |
838 | */ | |
839 | static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
840 | unsigned long limit_pfn) | |
841 | { | |
842 | struct iova_cpu_rcache *cpu_rcache; | |
843 | unsigned long iova_pfn = 0; | |
844 | bool has_pfn = false; | |
845 | unsigned long flags; | |
846 | ||
aaffaa8a | 847 | cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); |
9257b4a2 OP |
848 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
849 | ||
850 | if (!iova_magazine_empty(cpu_rcache->loaded)) { | |
851 | has_pfn = true; | |
852 | } else if (!iova_magazine_empty(cpu_rcache->prev)) { | |
853 | swap(cpu_rcache->prev, cpu_rcache->loaded); | |
854 | has_pfn = true; | |
855 | } else { | |
856 | spin_lock(&rcache->lock); | |
857 | if (rcache->depot_size > 0) { | |
858 | iova_magazine_free(cpu_rcache->loaded); | |
859 | cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; | |
860 | has_pfn = true; | |
861 | } | |
862 | spin_unlock(&rcache->lock); | |
863 | } | |
864 | ||
865 | if (has_pfn) | |
866 | iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); | |
867 | ||
868 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
869 | ||
870 | return iova_pfn; | |
871 | } | |
872 | ||
873 | /* | |
874 | * Try to satisfy IOVA allocation range from rcache. Fail if requested | |
875 | * size is too big or the DMA limit we are given isn't satisfied by the | |
876 | * top element in the magazine. | |
877 | */ | |
878 | static unsigned long iova_rcache_get(struct iova_domain *iovad, | |
879 | unsigned long size, | |
880 | unsigned long limit_pfn) | |
881 | { | |
882 | unsigned int log_size = order_base_2(size); | |
883 | ||
189cb8fe | 884 | if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) |
9257b4a2 OP |
885 | return 0; |
886 | ||
b826ee9a | 887 | return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); |
9257b4a2 OP |
888 | } |
889 | ||
9257b4a2 OP |
890 | /* |
891 | * free rcache data structures. | |
892 | */ | |
893 | static void free_iova_rcaches(struct iova_domain *iovad) | |
894 | { | |
895 | struct iova_rcache *rcache; | |
7595dc58 | 896 | struct iova_cpu_rcache *cpu_rcache; |
9257b4a2 OP |
897 | unsigned int cpu; |
898 | int i, j; | |
899 | ||
900 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { | |
901 | rcache = &iovad->rcaches[i]; | |
32e92d9f JG |
902 | if (!rcache->cpu_rcaches) |
903 | break; | |
7595dc58 RM |
904 | for_each_possible_cpu(cpu) { |
905 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); | |
906 | iova_magazine_free(cpu_rcache->loaded); | |
907 | iova_magazine_free(cpu_rcache->prev); | |
908 | } | |
9257b4a2 | 909 | free_percpu(rcache->cpu_rcaches); |
7595dc58 | 910 | for (j = 0; j < rcache->depot_size; ++j) |
9257b4a2 | 911 | iova_magazine_free(rcache->depot[j]); |
9257b4a2 | 912 | } |
32e92d9f JG |
913 | |
914 | kfree(iovad->rcaches); | |
915 | iovad->rcaches = NULL; | |
9257b4a2 OP |
916 | } |
917 | ||
918 | /* | |
919 | * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) | |
920 | */ | |
149448b3 | 921 | static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) |
9257b4a2 OP |
922 | { |
923 | struct iova_cpu_rcache *cpu_rcache; | |
924 | struct iova_rcache *rcache; | |
925 | unsigned long flags; | |
926 | int i; | |
927 | ||
928 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { | |
929 | rcache = &iovad->rcaches[i]; | |
930 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); | |
931 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
932 | iova_magazine_free_pfns(cpu_rcache->loaded, iovad); | |
933 | iova_magazine_free_pfns(cpu_rcache->prev, iovad); | |
934 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
935 | } | |
936 | } | |
937 | ||
6fa3525b VJ |
938 | /* |
939 | * free all the IOVA ranges of global cache | |
940 | */ | |
941 | static void free_global_cached_iovas(struct iova_domain *iovad) | |
942 | { | |
943 | struct iova_rcache *rcache; | |
944 | unsigned long flags; | |
945 | int i, j; | |
946 | ||
947 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { | |
948 | rcache = &iovad->rcaches[i]; | |
949 | spin_lock_irqsave(&rcache->lock, flags); | |
950 | for (j = 0; j < rcache->depot_size; ++j) { | |
951 | iova_magazine_free_pfns(rcache->depot[j], iovad); | |
952 | iova_magazine_free(rcache->depot[j]); | |
953 | } | |
954 | rcache->depot_size = 0; | |
955 | spin_unlock_irqrestore(&rcache->lock, flags); | |
956 | } | |
957 | } | |
15bbdec3 SA |
958 | MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); |
959 | MODULE_LICENSE("GPL"); |