Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
15b244a8 AK |
2 | /* |
3 | * IOMMU helpers in MMU context. | |
4 | * | |
5 | * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> | |
15b244a8 AK |
6 | */ |
7 | ||
3f07c014 | 8 | #include <linux/sched/signal.h> |
15b244a8 AK |
9 | #include <linux/slab.h> |
10 | #include <linux/rculist.h> | |
11 | #include <linux/vmalloc.h> | |
12 | #include <linux/mutex.h> | |
2e5bbb54 BS |
13 | #include <linux/migrate.h> |
14 | #include <linux/hugetlb.h> | |
15 | #include <linux/swap.h> | |
425333bf | 16 | #include <linux/sizes.h> |
79eb597c | 17 | #include <linux/mm.h> |
15b244a8 | 18 | #include <asm/mmu_context.h> |
76fa4975 | 19 | #include <asm/pte-walk.h> |
678e174c | 20 | #include <linux/mm_inline.h> |
15b244a8 AK |
21 | |
22 | static DEFINE_MUTEX(mem_list_mutex); | |
23 | ||
425333bf AK |
24 | #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 |
25 | #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) | |
26 | ||
15b244a8 AK |
27 | struct mm_iommu_table_group_mem_t { |
28 | struct list_head next; | |
29 | struct rcu_head rcu; | |
30 | unsigned long used; | |
31 | atomic64_t mapped; | |
76fa4975 | 32 | unsigned int pageshift; |
15b244a8 | 33 | u64 ua; /* userspace address */ |
678e174c AK |
34 | u64 entries; /* number of entries in hpas/hpages[] */ |
35 | /* | |
36 | * in mm_iommu_get we temporarily use this to store | |
37 | * struct page address. | |
38 | * | |
39 | * We need to convert ua to hpa in real mode. Make it | |
40 | * simpler by storing physical address. | |
41 | */ | |
42 | union { | |
43 | struct page **hpages; /* vmalloc'ed */ | |
44 | phys_addr_t *hpas; | |
45 | }; | |
c10c21ef AK |
46 | #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) |
47 | u64 dev_hpa; /* Device memory base address */ | |
15b244a8 AK |
48 | }; |
49 | ||
d7baee69 | 50 | bool mm_iommu_preregistered(struct mm_struct *mm) |
15b244a8 | 51 | { |
d7baee69 | 52 | return !list_empty(&mm->context.iommu_group_mem_list); |
15b244a8 AK |
53 | } |
54 | EXPORT_SYMBOL_GPL(mm_iommu_preregistered); | |
55 | ||
c10c21ef | 56 | static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, |
678e174c AK |
57 | unsigned long entries, unsigned long dev_hpa, |
58 | struct mm_iommu_table_group_mem_t **pmem) | |
15b244a8 | 59 | { |
eb9d7a62 AK |
60 | struct mm_iommu_table_group_mem_t *mem, *mem2; |
61 | long i, ret, locked_entries = 0, pinned = 0; | |
76fa4975 | 62 | unsigned int pageshift; |
7a3a4d76 | 63 | unsigned long entry, chunk; |
15b244a8 | 64 | |
c10c21ef | 65 | if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { |
79eb597c | 66 | ret = account_locked_vm(mm, entries, true); |
c10c21ef | 67 | if (ret) |
eb9d7a62 | 68 | return ret; |
15b244a8 | 69 | |
c10c21ef AK |
70 | locked_entries = entries; |
71 | } | |
15b244a8 AK |
72 | |
73 | mem = kzalloc(sizeof(*mem), GFP_KERNEL); | |
74 | if (!mem) { | |
75 | ret = -ENOMEM; | |
76 | goto unlock_exit; | |
77 | } | |
78 | ||
c10c21ef AK |
79 | if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { |
80 | mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); | |
81 | mem->dev_hpa = dev_hpa; | |
82 | goto good_exit; | |
83 | } | |
84 | mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; | |
85 | ||
76fa4975 AK |
86 | /* |
87 | * For a starting point for a maximum page size calculation | |
88 | * we use @ua and @entries natural alignment to allow IOMMU pages | |
89 | * smaller than huge pages but still bigger than PAGE_SIZE. | |
90 | */ | |
91 | mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); | |
fad953ce | 92 | mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); |
15b244a8 AK |
93 | if (!mem->hpas) { |
94 | kfree(mem); | |
95 | ret = -ENOMEM; | |
96 | goto unlock_exit; | |
97 | } | |
98 | ||
d8ed45c5 | 99 | mmap_read_lock(mm); |
5e0a760b | 100 | chunk = (1UL << (PAGE_SHIFT + MAX_PAGE_ORDER)) / |
7a3a4d76 AK |
101 | sizeof(struct vm_area_struct *); |
102 | chunk = min(chunk, entries); | |
103 | for (entry = 0; entry < entries; entry += chunk) { | |
104 | unsigned long n = min(entries - entry, chunk); | |
105 | ||
aa4b87fe | 106 | ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n, |
932f4a63 | 107 | FOLL_WRITE | FOLL_LONGTERM, |
4c630f30 | 108 | mem->hpages + entry); |
7a3a4d76 AK |
109 | if (ret == n) { |
110 | pinned += n; | |
111 | continue; | |
112 | } | |
113 | if (ret > 0) | |
114 | pinned += ret; | |
115 | break; | |
116 | } | |
d8ed45c5 | 117 | mmap_read_unlock(mm); |
7a3a4d76 AK |
118 | if (pinned != entries) { |
119 | if (!ret) | |
120 | ret = -EFAULT; | |
eb9d7a62 | 121 | goto free_exit; |
678e174c AK |
122 | } |
123 | ||
c10c21ef | 124 | good_exit: |
15b244a8 AK |
125 | atomic64_set(&mem->mapped, 1); |
126 | mem->used = 1; | |
127 | mem->ua = ua; | |
128 | mem->entries = entries; | |
15b244a8 | 129 | |
eb9d7a62 | 130 | mutex_lock(&mem_list_mutex); |
15b244a8 | 131 | |
b5952f81 QC |
132 | list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, |
133 | lockdep_is_held(&mem_list_mutex)) { | |
eb9d7a62 AK |
134 | /* Overlap? */ |
135 | if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && | |
136 | (ua < (mem2->ua + | |
137 | (mem2->entries << PAGE_SHIFT)))) { | |
138 | ret = -EINVAL; | |
139 | mutex_unlock(&mem_list_mutex); | |
140 | goto free_exit; | |
141 | } | |
142 | } | |
143 | ||
c4b78169 AK |
144 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { |
145 | /* | |
146 | * Allow to use larger than 64k IOMMU pages. Only do that | |
147 | * if we are backed by hugetlb. Skip device memory as it is not | |
148 | * backed with page structs. | |
149 | */ | |
150 | pageshift = PAGE_SHIFT; | |
151 | for (i = 0; i < entries; ++i) { | |
152 | struct page *page = mem->hpages[i]; | |
153 | ||
154 | if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) | |
155 | pageshift = page_shift(compound_head(page)); | |
156 | mem->pageshift = min(mem->pageshift, pageshift); | |
157 | /* | |
158 | * We don't need struct page reference any more, switch | |
159 | * to physical address. | |
160 | */ | |
161 | mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; | |
162 | } | |
163 | } | |
164 | ||
eb9d7a62 | 165 | list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); |
15b244a8 AK |
166 | |
167 | mutex_unlock(&mem_list_mutex); | |
168 | ||
eb9d7a62 AK |
169 | *pmem = mem; |
170 | ||
171 | return 0; | |
172 | ||
173 | free_exit: | |
aa4b87fe | 174 | /* free the references taken */ |
f1f6a7dd | 175 | unpin_user_pages(mem->hpages, pinned); |
eb9d7a62 AK |
176 | |
177 | vfree(mem->hpas); | |
178 | kfree(mem); | |
179 | ||
180 | unlock_exit: | |
79eb597c | 181 | account_locked_vm(mm, locked_entries, false); |
eb9d7a62 | 182 | |
15b244a8 AK |
183 | return ret; |
184 | } | |
c10c21ef AK |
185 | |
186 | long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, | |
187 | struct mm_iommu_table_group_mem_t **pmem) | |
188 | { | |
189 | return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, | |
190 | pmem); | |
191 | } | |
e0bf78b0 | 192 | EXPORT_SYMBOL_GPL(mm_iommu_new); |
15b244a8 | 193 | |
c10c21ef AK |
194 | long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, |
195 | unsigned long entries, unsigned long dev_hpa, | |
196 | struct mm_iommu_table_group_mem_t **pmem) | |
197 | { | |
198 | return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); | |
199 | } | |
200 | EXPORT_SYMBOL_GPL(mm_iommu_newdev); | |
201 | ||
15b244a8 AK |
202 | static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) |
203 | { | |
204 | long i; | |
205 | struct page *page = NULL; | |
206 | ||
c10c21ef AK |
207 | if (!mem->hpas) |
208 | return; | |
209 | ||
15b244a8 AK |
210 | for (i = 0; i < mem->entries; ++i) { |
211 | if (!mem->hpas[i]) | |
212 | continue; | |
213 | ||
214 | page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); | |
215 | if (!page) | |
216 | continue; | |
217 | ||
425333bf AK |
218 | if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) |
219 | SetPageDirty(page); | |
220 | ||
f1f6a7dd | 221 | unpin_user_page(page); |
aa4b87fe | 222 | |
15b244a8 AK |
223 | mem->hpas[i] = 0; |
224 | } | |
225 | } | |
226 | ||
227 | static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) | |
228 | { | |
229 | ||
230 | mm_iommu_unpin(mem); | |
231 | vfree(mem->hpas); | |
232 | kfree(mem); | |
233 | } | |
234 | ||
235 | static void mm_iommu_free(struct rcu_head *head) | |
236 | { | |
237 | struct mm_iommu_table_group_mem_t *mem = container_of(head, | |
238 | struct mm_iommu_table_group_mem_t, rcu); | |
239 | ||
240 | mm_iommu_do_free(mem); | |
241 | } | |
242 | ||
243 | static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) | |
244 | { | |
245 | list_del_rcu(&mem->next); | |
15b244a8 AK |
246 | call_rcu(&mem->rcu, mm_iommu_free); |
247 | } | |
248 | ||
d7baee69 | 249 | long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) |
15b244a8 AK |
250 | { |
251 | long ret = 0; | |
eb9d7a62 | 252 | unsigned long unlock_entries = 0; |
15b244a8 | 253 | |
15b244a8 AK |
254 | mutex_lock(&mem_list_mutex); |
255 | ||
256 | if (mem->used == 0) { | |
257 | ret = -ENOENT; | |
258 | goto unlock_exit; | |
259 | } | |
260 | ||
261 | --mem->used; | |
262 | /* There are still users, exit */ | |
263 | if (mem->used) | |
264 | goto unlock_exit; | |
265 | ||
266 | /* Are there still mappings? */ | |
c33cd1ed | 267 | if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) { |
15b244a8 AK |
268 | ++mem->used; |
269 | ret = -EBUSY; | |
270 | goto unlock_exit; | |
271 | } | |
272 | ||
eb9d7a62 AK |
273 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) |
274 | unlock_entries = mem->entries; | |
275 | ||
15b244a8 AK |
276 | /* @mapped became 0 so now mappings are disabled, release the region */ |
277 | mm_iommu_release(mem); | |
278 | ||
279 | unlock_exit: | |
280 | mutex_unlock(&mem_list_mutex); | |
281 | ||
79eb597c | 282 | account_locked_vm(mm, unlock_entries, false); |
eb9d7a62 | 283 | |
15b244a8 AK |
284 | return ret; |
285 | } | |
286 | EXPORT_SYMBOL_GPL(mm_iommu_put); | |
287 | ||
d7baee69 AK |
288 | struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, |
289 | unsigned long ua, unsigned long size) | |
15b244a8 AK |
290 | { |
291 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; | |
292 | ||
b5952f81 | 293 | rcu_read_lock(); |
d7baee69 | 294 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { |
15b244a8 AK |
295 | if ((mem->ua <= ua) && |
296 | (ua + size <= mem->ua + | |
297 | (mem->entries << PAGE_SHIFT))) { | |
298 | ret = mem; | |
299 | break; | |
300 | } | |
301 | } | |
b5952f81 | 302 | rcu_read_unlock(); |
15b244a8 AK |
303 | |
304 | return ret; | |
305 | } | |
306 | EXPORT_SYMBOL_GPL(mm_iommu_lookup); | |
307 | ||
e0bf78b0 | 308 | struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, |
d7baee69 | 309 | unsigned long ua, unsigned long entries) |
15b244a8 AK |
310 | { |
311 | struct mm_iommu_table_group_mem_t *mem, *ret = NULL; | |
312 | ||
e0bf78b0 AK |
313 | mutex_lock(&mem_list_mutex); |
314 | ||
b5952f81 QC |
315 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, |
316 | lockdep_is_held(&mem_list_mutex)) { | |
15b244a8 AK |
317 | if ((mem->ua == ua) && (mem->entries == entries)) { |
318 | ret = mem; | |
e0bf78b0 | 319 | ++mem->used; |
15b244a8 AK |
320 | break; |
321 | } | |
322 | } | |
323 | ||
e0bf78b0 AK |
324 | mutex_unlock(&mem_list_mutex); |
325 | ||
15b244a8 AK |
326 | return ret; |
327 | } | |
e0bf78b0 | 328 | EXPORT_SYMBOL_GPL(mm_iommu_get); |
15b244a8 AK |
329 | |
330 | long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, | |
76fa4975 | 331 | unsigned long ua, unsigned int pageshift, unsigned long *hpa) |
15b244a8 AK |
332 | { |
333 | const long entry = (ua - mem->ua) >> PAGE_SHIFT; | |
c10c21ef | 334 | u64 *va; |
15b244a8 AK |
335 | |
336 | if (entry >= mem->entries) | |
337 | return -EFAULT; | |
338 | ||
76fa4975 AK |
339 | if (pageshift > mem->pageshift) |
340 | return -EFAULT; | |
341 | ||
c10c21ef AK |
342 | if (!mem->hpas) { |
343 | *hpa = mem->dev_hpa + (ua - mem->ua); | |
344 | return 0; | |
345 | } | |
346 | ||
347 | va = &mem->hpas[entry]; | |
425333bf | 348 | *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); |
15b244a8 AK |
349 | |
350 | return 0; | |
351 | } | |
352 | EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); | |
353 | ||
c10c21ef AK |
354 | bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, |
355 | unsigned int pageshift, unsigned long *size) | |
356 | { | |
357 | struct mm_iommu_table_group_mem_t *mem; | |
358 | unsigned long end; | |
359 | ||
b5952f81 | 360 | rcu_read_lock(); |
c10c21ef AK |
361 | list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { |
362 | if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) | |
363 | continue; | |
364 | ||
365 | end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); | |
366 | if ((mem->dev_hpa <= hpa) && (hpa < end)) { | |
367 | /* | |
368 | * Since the IOMMU page size might be bigger than | |
369 | * PAGE_SIZE, the amount of preregistered memory | |
370 | * starting from @hpa might be smaller than 1<<pageshift | |
371 | * and the caller needs to distinguish this situation. | |
372 | */ | |
373 | *size = min(1UL << pageshift, end - hpa); | |
374 | return true; | |
375 | } | |
376 | } | |
b5952f81 | 377 | rcu_read_unlock(); |
c10c21ef AK |
378 | |
379 | return false; | |
380 | } | |
381 | EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); | |
382 | ||
15b244a8 AK |
383 | long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) |
384 | { | |
385 | if (atomic64_inc_not_zero(&mem->mapped)) | |
386 | return 0; | |
387 | ||
388 | /* Last mm_iommu_put() has been called, no more mappings allowed() */ | |
389 | return -ENXIO; | |
390 | } | |
391 | EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); | |
392 | ||
393 | void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) | |
394 | { | |
395 | atomic64_add_unless(&mem->mapped, -1, 1); | |
396 | } | |
397 | EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); | |
398 | ||
88f54a35 | 399 | void mm_iommu_init(struct mm_struct *mm) |
15b244a8 | 400 | { |
88f54a35 | 401 | INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); |
15b244a8 | 402 | } |