Commit | Line | Data |
---|---|---|
15b244a8 AK |
1 | /* |
2 | * IOMMU helpers in MMU context. | |
3 | * | |
4 | * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | */ | |
12 | ||
3f07c014 | 13 | #include <linux/sched/signal.h> |
15b244a8 AK |
14 | #include <linux/slab.h> |
15 | #include <linux/rculist.h> | |
16 | #include <linux/vmalloc.h> | |
17 | #include <linux/mutex.h> | |
2e5bbb54 BS |
18 | #include <linux/migrate.h> |
19 | #include <linux/hugetlb.h> | |
20 | #include <linux/swap.h> | |
425333bf | 21 | #include <linux/sizes.h> |
15b244a8 | 22 | #include <asm/mmu_context.h> |
76fa4975 | 23 | #include <asm/pte-walk.h> |
678e174c | 24 | #include <linux/mm_inline.h> |
15b244a8 AK |
25 | |
26 | static DEFINE_MUTEX(mem_list_mutex); | |
27 | ||
425333bf AK |
28 | #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 |
29 | #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) | |
30 | ||
15b244a8 AK |
31 | struct mm_iommu_table_group_mem_t { |
32 | struct list_head next; | |
33 | struct rcu_head rcu; | |
34 | unsigned long used; | |
35 | atomic64_t mapped; | |
76fa4975 | 36 | unsigned int pageshift; |
15b244a8 | 37 | u64 ua; /* userspace address */ |
678e174c AK |
38 | u64 entries; /* number of entries in hpas/hpages[] */ |
39 | /* | |
40 | * in mm_iommu_get we temporarily use this to store | |
41 | * struct page address. | |
42 | * | |
43 | * We need to convert ua to hpa in real mode. Make it | |
44 | * simpler by storing physical address. | |
45 | */ | |
46 | union { | |
47 | struct page **hpages; /* vmalloc'ed */ | |
48 | phys_addr_t *hpas; | |
49 | }; | |
c10c21ef AK |
50 | #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) |
51 | u64 dev_hpa; /* Device memory base address */ | |
15b244a8 AK |
52 | }; |
53 | ||
54 | static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, | |
55 | unsigned long npages, bool incr) | |
56 | { | |
57 | long ret = 0, locked, lock_limit; | |
58 | ||
59 | if (!npages) | |
60 | return 0; | |
61 | ||
62 | down_write(&mm->mmap_sem); | |
63 | ||
64 | if (incr) { | |
65 | locked = mm->locked_vm + npages; | |
66 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
67 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | |
68 | ret = -ENOMEM; | |
69 | else | |
70 | mm->locked_vm += npages; | |
71 | } else { | |
72 | if (WARN_ON_ONCE(npages > mm->locked_vm)) | |
73 | npages = mm->locked_vm; | |
74 | mm->locked_vm -= npages; | |
75 | } | |
76 | ||
77 | pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", | |
d7baee69 | 78 | current ? current->pid : 0, |
15b244a8 AK |
79 | incr ? '+' : '-', |
80 | npages << PAGE_SHIFT, | |
81 | mm->locked_vm << PAGE_SHIFT, | |
82 | rlimit(RLIMIT_MEMLOCK)); | |
83 | up_write(&mm->mmap_sem); | |
84 | ||
85 | return ret; | |
86 | } | |
87 | ||
d7baee69 | 88 | bool mm_iommu_preregistered(struct mm_struct *mm) |
15b244a8 | 89 | { |
d7baee69 | 90 | return !list_empty(&mm->context.iommu_group_mem_list); |
15b244a8 AK |
91 | } |
92 | EXPORT_SYMBOL_GPL(mm_iommu_preregistered); | |
93 | ||
c10c21ef | 94 | static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, |
678e174c AK |
95 | unsigned long entries, unsigned long dev_hpa, |
96 | struct mm_iommu_table_group_mem_t **pmem) | |
15b244a8 AK |
97 | { |
98 | struct mm_iommu_table_group_mem_t *mem; | |
678e174c | 99 | long i, ret, locked_entries = 0; |
76fa4975 | 100 | unsigned int pageshift; |
15b244a8 | 101 | |
15b244a8 AK |
102 | mutex_lock(&mem_list_mutex); |
103 | ||
d7baee69 | 104 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, |
15b244a8 | 105 | next) { |
15b244a8 AK |
106 | /* Overlap? */ |
107 | if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && | |
108 | (ua < (mem->ua + | |
109 | (mem->entries << PAGE_SHIFT)))) { | |
110 | ret = -EINVAL; | |
111 | goto unlock_exit; | |
112 | } | |
113 | ||
114 | } | |
115 | ||
c10c21ef AK |
116 | if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { |
117 | ret = mm_iommu_adjust_locked_vm(mm, entries, true); | |
118 | if (ret) | |
119 | goto unlock_exit; | |
15b244a8 | 120 | |
c10c21ef AK |
121 | locked_entries = entries; |
122 | } | |
15b244a8 AK |
123 | |
124 | mem = kzalloc(sizeof(*mem), GFP_KERNEL); | |
125 | if (!mem) { | |
126 | ret = -ENOMEM; | |
127 | goto unlock_exit; | |
128 | } | |
129 | ||
c10c21ef AK |
130 | if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { |
131 | mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); | |
132 | mem->dev_hpa = dev_hpa; | |
133 | goto good_exit; | |
134 | } | |
135 | mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; | |
136 | ||
76fa4975 AK |
137 | /* |
138 | * For a starting point for a maximum page size calculation | |
139 | * we use @ua and @entries natural alignment to allow IOMMU pages | |
140 | * smaller than huge pages but still bigger than PAGE_SIZE. | |
141 | */ | |
142 | mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); | |
fad953ce | 143 | mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); |
15b244a8 AK |
144 | if (!mem->hpas) { |
145 | kfree(mem); | |
146 | ret = -ENOMEM; | |
147 | goto unlock_exit; | |
148 | } | |
149 | ||
678e174c AK |
150 | down_read(&mm->mmap_sem); |
151 | ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); | |
152 | up_read(&mm->mmap_sem); | |
153 | if (ret != entries) { | |
154 | /* free the reference taken */ | |
155 | for (i = 0; i < ret; i++) | |
156 | put_page(mem->hpages[i]); | |
157 | ||
158 | vfree(mem->hpas); | |
159 | kfree(mem); | |
160 | ret = -EFAULT; | |
161 | goto unlock_exit; | |
162 | } | |
163 | ||
164 | pageshift = PAGE_SHIFT; | |
15b244a8 | 165 | for (i = 0; i < entries; ++i) { |
678e174c AK |
166 | struct page *page = mem->hpages[i]; |
167 | ||
7f188251 AK |
168 | /* |
169 | * Allow to use larger than 64k IOMMU pages. Only do that | |
170 | * if we are backed by hugetlb. | |
171 | */ | |
172 | if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { | |
76fa4975 | 173 | struct page *head = compound_head(page); |
7f188251 AK |
174 | |
175 | pageshift = compound_order(head) + PAGE_SHIFT; | |
76fa4975 AK |
176 | } |
177 | mem->pageshift = min(mem->pageshift, pageshift); | |
678e174c AK |
178 | /* |
179 | * We don't need struct page reference any more, switch | |
180 | * to physical address. | |
181 | */ | |
15b244a8 AK |
182 | mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; |
183 | } | |
184 | ||
c10c21ef | 185 | good_exit: |
678e174c | 186 | ret = 0; |
15b244a8 AK |
187 | atomic64_set(&mem->mapped, 1); |
188 | mem->used = 1; | |
189 | mem->ua = ua; | |
190 | mem->entries = entries; | |
191 | *pmem = mem; | |
192 | ||
d7baee69 | 193 | list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); |
15b244a8 AK |
194 | |
195 | unlock_exit: | |
196 | if (locked_entries && ret) | |
d7baee69 | 197 | mm_iommu_adjust_locked_vm(mm, locked_entries, false); |
15b244a8 AK |
198 | |
199 | mutex_unlock(&mem_list_mutex); | |
200 | ||
201 | return ret; | |
202 | } | |
c10c21ef AK |
203 | |
204 | long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, | |
205 | struct mm_iommu_table_group_mem_t **pmem) | |
206 | { | |
207 | return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, | |
208 | pmem); | |
209 | } | |
e0bf78b0 | 210 | EXPORT_SYMBOL_GPL(mm_iommu_new); |
15b244a8 | 211 | |
c10c21ef AK |
212 | long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, |
213 | unsigned long entries, unsigned long dev_hpa, | |
214 | struct mm_iommu_table_group_mem_t **pmem) | |
215 | { | |
216 | return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); | |
217 | } | |
218 | EXPORT_SYMBOL_GPL(mm_iommu_newdev); | |
219 | ||
15b244a8 AK |
220 | static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) |
221 | { | |
222 | long i; | |
223 | struct page *page = NULL; | |
224 | ||
c10c21ef AK |
225 | if (!mem->hpas) |
226 | return; | |
227 | ||
15b244a8 AK |
228 | for (i = 0; i < mem->entries; ++i) { |
229 | if (!mem->hpas[i]) | |
230 | continue; | |
231 | ||
232 | page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); | |
233 | if (!page) | |
234 | continue; | |
235 | ||
425333bf AK |
236 | if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) |
237 | SetPageDirty(page); | |
238 | ||
15b244a8 AK |
239 | put_page(page); |
240 | mem->hpas[i] = 0; | |
241 | } | |
242 | } | |
243 | ||
244 | static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) | |
245 | { | |
246 | ||
247 | mm_iommu_unpin(mem); | |
248 | vfree(mem->hpas); | |
249 | kfree(mem); | |
250 | } | |
251 | ||
252 | static void mm_iommu_free(struct rcu_head *head) | |
253 | { | |
254 | struct mm_iommu_table_group_mem_t *mem = container_of(head, | |
255 | struct mm_iommu_table_group_mem_t, rcu); | |
256 | ||
257 | mm_iommu_do_free(mem); | |
258 | } | |
259 | ||
260 | static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) | |
261 | { | |
262 | list_del_rcu(&mem->next); | |
15b244a8 AK |
263 | call_rcu(&mem->rcu, mm_iommu_free); |
264 | } | |
265 | ||
d7baee69 | 266 | long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) |
15b244a8 AK |
267 | { |
268 | long ret = 0; | |
c10c21ef | 269 | unsigned long entries, dev_hpa; |
15b244a8 | 270 | |
15b244a8 AK |
271 | mutex_lock(&mem_list_mutex); |
272 | ||
273 | if (mem->used == 0) { | |
274 | ret = -ENOENT; | |
275 | goto unlock_exit; | |
276 | } | |
277 | ||
278 | --mem->used; | |
279 | /* There are still users, exit */ | |
280 | if (mem->used) | |
281 | goto unlock_exit; | |
282 | ||
283 | /* Are there still mappings? */ | |
284 | if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { | |
285 | ++mem->used; | |
286 | ret = -EBUSY; | |
287 | goto unlock_exit; | |
288 | } | |
289 | ||
290 | /* @mapped became 0 so now mappings are disabled, release the region */ | |
c10c21ef AK |
291 | entries = mem->entries; |
292 | dev_hpa = mem->dev_hpa; | |
15b244a8 AK |
293 | mm_iommu_release(mem); |
294 | ||
c10c21ef AK |
295 | if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) |
296 | mm_iommu_adjust_locked_vm(mm, entries, false); | |
d7baee69 | 297 | |
15b244a8 AK |
298 | unlock_exit: |
299 | mutex_unlock(&mem_list_mutex); | |
300 | ||
301 | return ret; | |
302 | } | |
303 | EXPORT_SYMBOL_GPL(mm_iommu_put); | |
304 | ||
d7baee69 AK |
305 | struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, |
306 | unsigned long ua, unsigned long size) | |
15b244a8 AK |
307 | { |
308 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; | |
309 | ||
d7baee69 | 310 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { |
15b244a8 AK |
311 | if ((mem->ua <= ua) && |
312 | (ua + size <= mem->ua + | |
313 | (mem->entries << PAGE_SHIFT))) { | |
314 | ret = mem; | |
315 | break; | |
316 | } | |
317 | } | |
318 | ||
319 | return ret; | |
320 | } | |
321 | EXPORT_SYMBOL_GPL(mm_iommu_lookup); | |
322 | ||
6b5c19c5 AK |
323 | struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, |
324 | unsigned long ua, unsigned long size) | |
325 | { | |
326 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; | |
327 | ||
328 | list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, | |
329 | next) { | |
330 | if ((mem->ua <= ua) && | |
331 | (ua + size <= mem->ua + | |
332 | (mem->entries << PAGE_SHIFT))) { | |
333 | ret = mem; | |
334 | break; | |
335 | } | |
336 | } | |
337 | ||
338 | return ret; | |
339 | } | |
6b5c19c5 | 340 | |
e0bf78b0 | 341 | struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, |
d7baee69 | 342 | unsigned long ua, unsigned long entries) |
15b244a8 AK |
343 | { |
344 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; | |
345 | ||
e0bf78b0 AK |
346 | mutex_lock(&mem_list_mutex); |
347 | ||
d7baee69 | 348 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { |
15b244a8 AK |
349 | if ((mem->ua == ua) && (mem->entries == entries)) { |
350 | ret = mem; | |
e0bf78b0 | 351 | ++mem->used; |
15b244a8 AK |
352 | break; |
353 | } | |
354 | } | |
355 | ||
e0bf78b0 AK |
356 | mutex_unlock(&mem_list_mutex); |
357 | ||
15b244a8 AK |
358 | return ret; |
359 | } | |
e0bf78b0 | 360 | EXPORT_SYMBOL_GPL(mm_iommu_get); |
15b244a8 AK |
361 | |
362 | long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, | |
76fa4975 | 363 | unsigned long ua, unsigned int pageshift, unsigned long *hpa) |
15b244a8 AK |
364 | { |
365 | const long entry = (ua - mem->ua) >> PAGE_SHIFT; | |
c10c21ef | 366 | u64 *va; |
15b244a8 AK |
367 | |
368 | if (entry >= mem->entries) | |
369 | return -EFAULT; | |
370 | ||
76fa4975 AK |
371 | if (pageshift > mem->pageshift) |
372 | return -EFAULT; | |
373 | ||
c10c21ef AK |
374 | if (!mem->hpas) { |
375 | *hpa = mem->dev_hpa + (ua - mem->ua); | |
376 | return 0; | |
377 | } | |
378 | ||
379 | va = &mem->hpas[entry]; | |
425333bf | 380 | *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); |
15b244a8 AK |
381 | |
382 | return 0; | |
383 | } | |
384 | EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); | |
385 | ||
6b5c19c5 | 386 | long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, |
76fa4975 | 387 | unsigned long ua, unsigned int pageshift, unsigned long *hpa) |
6b5c19c5 AK |
388 | { |
389 | const long entry = (ua - mem->ua) >> PAGE_SHIFT; | |
6b5c19c5 AK |
390 | unsigned long *pa; |
391 | ||
392 | if (entry >= mem->entries) | |
393 | return -EFAULT; | |
394 | ||
76fa4975 AK |
395 | if (pageshift > mem->pageshift) |
396 | return -EFAULT; | |
397 | ||
c10c21ef AK |
398 | if (!mem->hpas) { |
399 | *hpa = mem->dev_hpa + (ua - mem->ua); | |
400 | return 0; | |
401 | } | |
402 | ||
403 | pa = (void *) vmalloc_to_phys(&mem->hpas[entry]); | |
6b5c19c5 AK |
404 | if (!pa) |
405 | return -EFAULT; | |
406 | ||
425333bf | 407 | *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); |
6b5c19c5 AK |
408 | |
409 | return 0; | |
410 | } | |
425333bf AK |
411 | |
412 | extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) | |
413 | { | |
414 | struct mm_iommu_table_group_mem_t *mem; | |
415 | long entry; | |
416 | void *va; | |
417 | unsigned long *pa; | |
418 | ||
419 | mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); | |
420 | if (!mem) | |
421 | return; | |
422 | ||
c10c21ef AK |
423 | if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) |
424 | return; | |
425 | ||
425333bf AK |
426 | entry = (ua - mem->ua) >> PAGE_SHIFT; |
427 | va = &mem->hpas[entry]; | |
428 | ||
429 | pa = (void *) vmalloc_to_phys(va); | |
430 | if (!pa) | |
431 | return; | |
432 | ||
433 | *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; | |
434 | } | |
6b5c19c5 | 435 | |
c10c21ef AK |
436 | bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, |
437 | unsigned int pageshift, unsigned long *size) | |
438 | { | |
439 | struct mm_iommu_table_group_mem_t *mem; | |
440 | unsigned long end; | |
441 | ||
442 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { | |
443 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) | |
444 | continue; | |
445 | ||
446 | end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); | |
447 | if ((mem->dev_hpa <= hpa) && (hpa < end)) { | |
448 | /* | |
449 | * Since the IOMMU page size might be bigger than | |
450 | * PAGE_SIZE, the amount of preregistered memory | |
451 | * starting from @hpa might be smaller than 1<<pageshift | |
452 | * and the caller needs to distinguish this situation. | |
453 | */ | |
454 | *size = min(1UL << pageshift, end - hpa); | |
455 | return true; | |
456 | } | |
457 | } | |
458 | ||
459 | return false; | |
460 | } | |
461 | EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); | |
462 | ||
15b244a8 AK |
463 | long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) |
464 | { | |
465 | if (atomic64_inc_not_zero(&mem->mapped)) | |
466 | return 0; | |
467 | ||
468 | /* Last mm_iommu_put() has been called, no more mappings allowed() */ | |
469 | return -ENXIO; | |
470 | } | |
471 | EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); | |
472 | ||
473 | void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) | |
474 | { | |
475 | atomic64_add_unless(&mem->mapped, -1, 1); | |
476 | } | |
477 | EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); | |
478 | ||
88f54a35 | 479 | void mm_iommu_init(struct mm_struct *mm) |
15b244a8 | 480 | { |
88f54a35 | 481 | INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); |
15b244a8 | 482 | } |