arm64: mm: implement pte_devmap support
[linux-block.git] / drivers / vfio / vfio_iommu_spapr_tce.c
CommitLineData
d2912cb1 1// SPDX-License-Identifier: GPL-2.0-only
5ffd229c
AK
2/*
3 * VFIO: IOMMU DMA mapping support for TCE on POWER
4 *
5 * Copyright (C) 2013 IBM Corp. All rights reserved.
6 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
7 *
5ffd229c
AK
8 * Derived from original vfio_iommu_type1.c:
9 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
10 * Author: Alex Williamson <alex.williamson@redhat.com>
11 */
12
13#include <linux/module.h>
14#include <linux/pci.h>
15#include <linux/slab.h>
16#include <linux/uaccess.h>
17#include <linux/err.h>
18#include <linux/vfio.h>
2157e7b8 19#include <linux/vmalloc.h>
6e84f315 20#include <linux/sched/mm.h>
3f07c014 21#include <linux/sched/signal.h>
6e84f315 22
5ffd229c
AK
23#include <asm/iommu.h>
24#include <asm/tce.h>
2157e7b8 25#include <asm/mmu_context.h>
5ffd229c
AK
26
27#define DRIVER_VERSION "0.1"
28#define DRIVER_AUTHOR "aik@ozlabs.ru"
29#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
30
31static void tce_iommu_detach_group(void *iommu_data,
32 struct iommu_group *iommu_group);
33
bc82d122 34static long try_increment_locked_vm(struct mm_struct *mm, long npages)
2d270df8
AK
35{
36 long ret = 0, locked, lock_limit;
37
bc82d122
AK
38 if (WARN_ON_ONCE(!mm))
39 return -EPERM;
2d270df8
AK
40
41 if (!npages)
42 return 0;
43
bc82d122
AK
44 down_write(&mm->mmap_sem);
45 locked = mm->locked_vm + npages;
2d270df8
AK
46 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
47 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
48 ret = -ENOMEM;
49 else
bc82d122 50 mm->locked_vm += npages;
2d270df8
AK
51
52 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
53 npages << PAGE_SHIFT,
bc82d122 54 mm->locked_vm << PAGE_SHIFT,
2d270df8
AK
55 rlimit(RLIMIT_MEMLOCK),
56 ret ? " - exceeded" : "");
57
bc82d122 58 up_write(&mm->mmap_sem);
2d270df8
AK
59
60 return ret;
61}
62
bc82d122 63static void decrement_locked_vm(struct mm_struct *mm, long npages)
2d270df8 64{
bc82d122
AK
65 if (!mm || !npages)
66 return;
2d270df8 67
bc82d122
AK
68 down_write(&mm->mmap_sem);
69 if (WARN_ON_ONCE(npages > mm->locked_vm))
70 npages = mm->locked_vm;
71 mm->locked_vm -= npages;
2d270df8
AK
72 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
73 npages << PAGE_SHIFT,
bc82d122 74 mm->locked_vm << PAGE_SHIFT,
2d270df8 75 rlimit(RLIMIT_MEMLOCK));
bc82d122 76 up_write(&mm->mmap_sem);
2d270df8
AK
77}
78
5ffd229c
AK
79/*
80 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
81 *
82 * This code handles mapping and unmapping of user data buffers
83 * into DMA'ble space using the IOMMU
84 */
85
2157e7b8
AK
86struct tce_iommu_group {
87 struct list_head next;
88 struct iommu_group *grp;
89};
90
4b6fad70
AK
91/*
92 * A container needs to remember which preregistered region it has
93 * referenced to do proper cleanup at the userspace process exit.
94 */
95struct tce_iommu_prereg {
96 struct list_head next;
97 struct mm_iommu_table_group_mem_t *mem;
98};
99
5ffd229c
AK
100/*
101 * The container descriptor supports only a single group per container.
102 * Required by the API as the container is not supplied with the IOMMU group
103 * at the moment of initialization.
104 */
105struct tce_container {
106 struct mutex lock;
5ffd229c 107 bool enabled;
2157e7b8 108 bool v2;
d9c72894 109 bool def_window_pending;
2d270df8 110 unsigned long locked_pages;
bc82d122 111 struct mm_struct *mm;
2157e7b8
AK
112 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
113 struct list_head group_list;
4b6fad70 114 struct list_head prereg_list;
5ffd229c
AK
115};
116
bc82d122
AK
117static long tce_iommu_mm_set(struct tce_container *container)
118{
119 if (container->mm) {
120 if (container->mm == current->mm)
121 return 0;
122 return -EPERM;
123 }
124 BUG_ON(!current->mm);
125 container->mm = current->mm;
126 atomic_inc(&container->mm->mm_count);
127
128 return 0;
129}
130
4b6fad70
AK
131static long tce_iommu_prereg_free(struct tce_container *container,
132 struct tce_iommu_prereg *tcemem)
133{
134 long ret;
135
136 ret = mm_iommu_put(container->mm, tcemem->mem);
137 if (ret)
138 return ret;
139
140 list_del(&tcemem->next);
141 kfree(tcemem);
142
143 return 0;
144}
145
2157e7b8
AK
146static long tce_iommu_unregister_pages(struct tce_container *container,
147 __u64 vaddr, __u64 size)
148{
149 struct mm_iommu_table_group_mem_t *mem;
4b6fad70
AK
150 struct tce_iommu_prereg *tcemem;
151 bool found = false;
e0bf78b0 152 long ret;
2157e7b8
AK
153
154 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
155 return -EINVAL;
156
e0bf78b0 157 mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
2157e7b8
AK
158 if (!mem)
159 return -ENOENT;
160
4b6fad70
AK
161 list_for_each_entry(tcemem, &container->prereg_list, next) {
162 if (tcemem->mem == mem) {
163 found = true;
164 break;
165 }
166 }
167
168 if (!found)
e0bf78b0
AK
169 ret = -ENOENT;
170 else
171 ret = tce_iommu_prereg_free(container, tcemem);
4b6fad70 172
e0bf78b0
AK
173 mm_iommu_put(container->mm, mem);
174
175 return ret;
2157e7b8
AK
176}
177
178static long tce_iommu_register_pages(struct tce_container *container,
179 __u64 vaddr, __u64 size)
180{
181 long ret = 0;
182 struct mm_iommu_table_group_mem_t *mem = NULL;
4b6fad70 183 struct tce_iommu_prereg *tcemem;
2157e7b8
AK
184 unsigned long entries = size >> PAGE_SHIFT;
185
186 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
187 ((vaddr + size) < vaddr))
188 return -EINVAL;
189
e0bf78b0 190 mem = mm_iommu_get(container->mm, vaddr, entries);
4b6fad70
AK
191 if (mem) {
192 list_for_each_entry(tcemem, &container->prereg_list, next) {
e0bf78b0
AK
193 if (tcemem->mem == mem) {
194 ret = -EBUSY;
195 goto put_exit;
196 }
4b6fad70 197 }
e0bf78b0
AK
198 } else {
199 ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
200 if (ret)
201 return ret;
4b6fad70
AK
202 }
203
4b6fad70 204 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
3393af24 205 if (!tcemem) {
e0bf78b0
AK
206 ret = -ENOMEM;
207 goto put_exit;
3393af24
AK
208 }
209
4b6fad70
AK
210 tcemem->mem = mem;
211 list_add(&tcemem->next, &container->prereg_list);
212
2157e7b8
AK
213 container->enabled = true;
214
215 return 0;
e0bf78b0
AK
216
217put_exit:
218 mm_iommu_put(container->mm, mem);
219 return ret;
2157e7b8
AK
220}
221
c10c21ef
AK
222static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
223 unsigned int page_shift)
e432bc7e 224{
c10c21ef
AK
225 struct page *page;
226 unsigned long size = 0;
227
228 if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
229 return size == (1UL << page_shift);
230
231 page = pfn_to_page(hpa >> PAGE_SHIFT);
e432bc7e
AK
232 /*
233 * Check that the TCE table granularity is not bigger than the size of
234 * a page we just found. Otherwise the hardware can get access to
235 * a bigger memory chunk that it should.
236 */
237 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
238}
239
2157e7b8
AK
240static inline bool tce_groups_attached(struct tce_container *container)
241{
242 return !list_empty(&container->group_list);
243}
244
0eaf4def
AK
245static long tce_iommu_find_table(struct tce_container *container,
246 phys_addr_t ioba, struct iommu_table **ptbl)
247{
248 long i;
0eaf4def
AK
249
250 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
2157e7b8 251 struct iommu_table *tbl = container->tables[i];
0eaf4def
AK
252
253 if (tbl) {
254 unsigned long entry = ioba >> tbl->it_page_shift;
255 unsigned long start = tbl->it_offset;
256 unsigned long end = start + tbl->it_size;
257
258 if ((start <= entry) && (entry < end)) {
259 *ptbl = tbl;
260 return i;
261 }
262 }
263 }
264
265 return -1;
266}
267
e633bc86
AK
268static int tce_iommu_find_free_table(struct tce_container *container)
269{
270 int i;
271
272 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
273 if (!container->tables[i])
274 return i;
275 }
276
277 return -ENOSPC;
278}
279
5ffd229c
AK
280static int tce_iommu_enable(struct tce_container *container)
281{
282 int ret = 0;
2d270df8 283 unsigned long locked;
0eaf4def 284 struct iommu_table_group *table_group;
2157e7b8 285 struct tce_iommu_group *tcegrp;
5ffd229c 286
5ffd229c
AK
287 if (container->enabled)
288 return -EBUSY;
289
290 /*
291 * When userspace pages are mapped into the IOMMU, they are effectively
292 * locked memory, so, theoretically, we need to update the accounting
293 * of locked pages on each map and unmap. For powerpc, the map unmap
294 * paths can be very hot, though, and the accounting would kill
295 * performance, especially since it would be difficult to impossible
296 * to handle the accounting in real mode only.
297 *
298 * To address that, rather than precisely accounting every page, we
299 * instead account for a worst case on locked memory when the iommu is
300 * enabled and disabled. The worst case upper bound on locked memory
301 * is the size of the whole iommu window, which is usually relatively
302 * small (compared to total memory sizes) on POWER hardware.
303 *
304 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
305 * that would effectively kill the guest at random points, much better
306 * enforcing the limit based on the max that the guest can map.
2d270df8
AK
307 *
308 * Unfortunately at the moment it counts whole tables, no matter how
309 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
310 * each with 2GB DMA window, 8GB will be counted here. The reason for
311 * this is that we cannot tell here the amount of RAM used by the guest
312 * as this information is only available from KVM and VFIO is
313 * KVM agnostic.
4793d65d
AK
314 *
315 * So we do not allow enabling a container without a group attached
316 * as there is no way to know how much we should increment
317 * the locked_vm counter.
5ffd229c 318 */
2157e7b8
AK
319 if (!tce_groups_attached(container))
320 return -ENODEV;
321
322 tcegrp = list_first_entry(&container->group_list,
323 struct tce_iommu_group, next);
324 table_group = iommu_group_get_iommudata(tcegrp->grp);
0eaf4def
AK
325 if (!table_group)
326 return -ENODEV;
327
4793d65d
AK
328 if (!table_group->tce32_size)
329 return -EPERM;
330
bc82d122
AK
331 ret = tce_iommu_mm_set(container);
332 if (ret)
333 return ret;
334
4793d65d 335 locked = table_group->tce32_size >> PAGE_SHIFT;
bc82d122 336 ret = try_increment_locked_vm(container->mm, locked);
2d270df8
AK
337 if (ret)
338 return ret;
5ffd229c 339
2d270df8
AK
340 container->locked_pages = locked;
341
342 container->enabled = true;
5ffd229c
AK
343
344 return ret;
345}
346
347static void tce_iommu_disable(struct tce_container *container)
348{
349 if (!container->enabled)
350 return;
351
352 container->enabled = false;
353
bc82d122
AK
354 BUG_ON(!container->mm);
355 decrement_locked_vm(container->mm, container->locked_pages);
5ffd229c
AK
356}
357
358static void *tce_iommu_open(unsigned long arg)
359{
360 struct tce_container *container;
361
2157e7b8 362 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
5ffd229c
AK
363 pr_err("tce_vfio: Wrong IOMMU type\n");
364 return ERR_PTR(-EINVAL);
365 }
366
367 container = kzalloc(sizeof(*container), GFP_KERNEL);
368 if (!container)
369 return ERR_PTR(-ENOMEM);
370
371 mutex_init(&container->lock);
2157e7b8 372 INIT_LIST_HEAD_RCU(&container->group_list);
4b6fad70 373 INIT_LIST_HEAD_RCU(&container->prereg_list);
2157e7b8
AK
374
375 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
5ffd229c
AK
376
377 return container;
378}
379
2157e7b8
AK
380static int tce_iommu_clear(struct tce_container *container,
381 struct iommu_table *tbl,
382 unsigned long entry, unsigned long pages);
bc82d122
AK
383static void tce_iommu_free_table(struct tce_container *container,
384 struct iommu_table *tbl);
2157e7b8 385
5ffd229c
AK
386static void tce_iommu_release(void *iommu_data)
387{
388 struct tce_container *container = iommu_data;
2157e7b8 389 struct tce_iommu_group *tcegrp;
517ad4ae 390 struct tce_iommu_prereg *tcemem, *tmtmp;
2157e7b8 391 long i;
5ffd229c 392
2157e7b8
AK
393 while (tce_groups_attached(container)) {
394 tcegrp = list_first_entry(&container->group_list,
395 struct tce_iommu_group, next);
2157e7b8
AK
396 tce_iommu_detach_group(iommu_data, tcegrp->grp);
397 }
5ffd229c 398
2157e7b8
AK
399 /*
400 * If VFIO created a table, it was not disposed
401 * by tce_iommu_detach_group() so do it now.
402 */
403 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
404 struct iommu_table *tbl = container->tables[i];
405
406 if (!tbl)
407 continue;
408
409 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
bc82d122 410 tce_iommu_free_table(container, tbl);
2157e7b8 411 }
5ffd229c 412
517ad4ae
AK
413 list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
414 WARN_ON(tce_iommu_prereg_free(container, tcemem));
4b6fad70 415
649354b7 416 tce_iommu_disable(container);
bc82d122
AK
417 if (container->mm)
418 mmdrop(container->mm);
5ffd229c
AK
419 mutex_destroy(&container->lock);
420
421 kfree(container);
422}
423
649354b7 424static void tce_iommu_unuse_page(struct tce_container *container,
05c6cfb9 425 unsigned long hpa)
649354b7
AK
426{
427 struct page *page;
428
05c6cfb9 429 page = pfn_to_page(hpa >> PAGE_SHIFT);
649354b7
AK
430 put_page(page);
431}
432
bc82d122 433static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
1463edca 434 unsigned long tce, unsigned long shift,
2157e7b8
AK
435 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
436{
437 long ret = 0;
438 struct mm_iommu_table_group_mem_t *mem;
439
1463edca 440 mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
2157e7b8
AK
441 if (!mem)
442 return -EINVAL;
443
76fa4975 444 ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
2157e7b8
AK
445 if (ret)
446 return -EINVAL;
447
448 *pmem = mem;
449
450 return 0;
451}
452
bc82d122
AK
453static void tce_iommu_unuse_page_v2(struct tce_container *container,
454 struct iommu_table *tbl, unsigned long entry)
2157e7b8
AK
455{
456 struct mm_iommu_table_group_mem_t *mem = NULL;
457 int ret;
458 unsigned long hpa = 0;
6e301a8e 459 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
2157e7b8 460
bc82d122 461 if (!pua)
2157e7b8
AK
462 return;
463
00a5c58d 464 ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
b3124ec2 465 tbl->it_page_shift, &hpa, &mem);
2157e7b8 466 if (ret)
00a5c58d
AK
467 pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
468 __func__, be64_to_cpu(*pua), entry, ret);
2157e7b8
AK
469 if (mem)
470 mm_iommu_mapped_dec(mem);
471
00a5c58d 472 *pua = cpu_to_be64(0);
2157e7b8
AK
473}
474
9b14a1ff
AK
475static int tce_iommu_clear(struct tce_container *container,
476 struct iommu_table *tbl,
477 unsigned long entry, unsigned long pages)
478{
05c6cfb9
AK
479 unsigned long oldhpa;
480 long ret;
481 enum dma_data_direction direction;
6e301a8e
AK
482 unsigned long lastentry = entry + pages;
483
484 for ( ; entry < lastentry; ++entry) {
485 if (tbl->it_indirect_levels && tbl->it_userspace) {
486 /*
487 * For multilevel tables, we can take a shortcut here
488 * and skip some TCEs as we know that the userspace
489 * addresses cache is a mirror of the real TCE table
490 * and if it is missing some indirect levels, then
491 * the hardware table does not have them allocated
492 * either and therefore does not require updating.
493 */
494 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
495 entry);
496 if (!pua) {
497 /* align to level_size which is power of two */
498 entry |= tbl->it_level_size - 1;
499 continue;
500 }
501 }
9b14a1ff 502
5c2fefd8
AK
503 cond_resched();
504
05c6cfb9
AK
505 direction = DMA_NONE;
506 oldhpa = 0;
c10c21ef
AK
507 ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
508 &direction);
05c6cfb9
AK
509 if (ret)
510 continue;
511
512 if (direction == DMA_NONE)
9b14a1ff
AK
513 continue;
514
2157e7b8 515 if (container->v2) {
bc82d122 516 tce_iommu_unuse_page_v2(container, tbl, entry);
2157e7b8
AK
517 continue;
518 }
519
05c6cfb9 520 tce_iommu_unuse_page(container, oldhpa);
9b14a1ff
AK
521 }
522
523 return 0;
524}
525
649354b7
AK
526static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
527{
528 struct page *page = NULL;
529 enum dma_data_direction direction = iommu_tce_direction(tce);
530
531 if (get_user_pages_fast(tce & PAGE_MASK, 1,
73b0140b
IW
532 direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
533 &page) != 1)
649354b7
AK
534 return -EFAULT;
535
536 *hpa = __pa((unsigned long) page_address(page));
537
538 return 0;
539}
540
9b14a1ff
AK
541static long tce_iommu_build(struct tce_container *container,
542 struct iommu_table *tbl,
05c6cfb9
AK
543 unsigned long entry, unsigned long tce, unsigned long pages,
544 enum dma_data_direction direction)
9b14a1ff
AK
545{
546 long i, ret = 0;
649354b7 547 unsigned long hpa;
05c6cfb9 548 enum dma_data_direction dirtmp;
9b14a1ff
AK
549
550 for (i = 0; i < pages; ++i) {
551 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
552
649354b7
AK
553 ret = tce_iommu_use_page(tce, &hpa);
554 if (ret)
9b14a1ff 555 break;
e432bc7e 556
c10c21ef
AK
557 if (!tce_page_is_contained(container->mm, hpa,
558 tbl->it_page_shift)) {
e432bc7e
AK
559 ret = -EPERM;
560 break;
561 }
562
649354b7 563 hpa |= offset;
05c6cfb9 564 dirtmp = direction;
c10c21ef
AK
565 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
566 &dirtmp);
9b14a1ff 567 if (ret) {
649354b7 568 tce_iommu_unuse_page(container, hpa);
9b14a1ff
AK
569 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
570 __func__, entry << tbl->it_page_shift,
571 tce, ret);
572 break;
573 }
05c6cfb9
AK
574
575 if (dirtmp != DMA_NONE)
576 tce_iommu_unuse_page(container, hpa);
577
00663d4e 578 tce += IOMMU_PAGE_SIZE(tbl);
9b14a1ff
AK
579 }
580
581 if (ret)
582 tce_iommu_clear(container, tbl, entry, i);
583
584 return ret;
585}
586
2157e7b8
AK
587static long tce_iommu_build_v2(struct tce_container *container,
588 struct iommu_table *tbl,
589 unsigned long entry, unsigned long tce, unsigned long pages,
590 enum dma_data_direction direction)
591{
592 long i, ret = 0;
2157e7b8
AK
593 unsigned long hpa;
594 enum dma_data_direction dirtmp;
595
596 for (i = 0; i < pages; ++i) {
597 struct mm_iommu_table_group_mem_t *mem = NULL;
00a5c58d 598 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
2157e7b8 599
bc82d122 600 ret = tce_iommu_prereg_ua_to_hpa(container,
1463edca 601 tce, tbl->it_page_shift, &hpa, &mem);
2157e7b8
AK
602 if (ret)
603 break;
604
c10c21ef
AK
605 if (!tce_page_is_contained(container->mm, hpa,
606 tbl->it_page_shift)) {
2157e7b8
AK
607 ret = -EPERM;
608 break;
609 }
610
611 /* Preserve offset within IOMMU page */
612 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
613 dirtmp = direction;
614
615 /* The registered region is being unregistered */
616 if (mm_iommu_mapped_inc(mem))
617 break;
618
c10c21ef
AK
619 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
620 &dirtmp);
2157e7b8
AK
621 if (ret) {
622 /* dirtmp cannot be DMA_NONE here */
bc82d122 623 tce_iommu_unuse_page_v2(container, tbl, entry + i);
2157e7b8
AK
624 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
625 __func__, entry << tbl->it_page_shift,
626 tce, ret);
627 break;
628 }
629
630 if (dirtmp != DMA_NONE)
bc82d122 631 tce_iommu_unuse_page_v2(container, tbl, entry + i);
2157e7b8 632
00a5c58d 633 *pua = cpu_to_be64(tce);
2157e7b8
AK
634
635 tce += IOMMU_PAGE_SIZE(tbl);
636 }
637
638 if (ret)
639 tce_iommu_clear(container, tbl, entry, i);
640
641 return ret;
642}
643
46d3e1e1
AK
644static long tce_iommu_create_table(struct tce_container *container,
645 struct iommu_table_group *table_group,
646 int num,
647 __u32 page_shift,
648 __u64 window_size,
649 __u32 levels,
650 struct iommu_table **ptbl)
651{
652 long ret, table_size;
653
654 table_size = table_group->ops->get_table_size(page_shift, window_size,
655 levels);
656 if (!table_size)
657 return -EINVAL;
658
bc82d122 659 ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
46d3e1e1
AK
660 if (ret)
661 return ret;
662
663 ret = table_group->ops->create_table(table_group, num,
664 page_shift, window_size, levels, ptbl);
665
666 WARN_ON(!ret && !(*ptbl)->it_ops->free);
a68bd126 667 WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
46d3e1e1 668
46d3e1e1
AK
669 return ret;
670}
671
bc82d122
AK
672static void tce_iommu_free_table(struct tce_container *container,
673 struct iommu_table *tbl)
46d3e1e1
AK
674{
675 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
676
e5afdf9d 677 iommu_tce_table_put(tbl);
bc82d122 678 decrement_locked_vm(container->mm, pages);
46d3e1e1
AK
679}
680
e633bc86
AK
681static long tce_iommu_create_window(struct tce_container *container,
682 __u32 page_shift, __u64 window_size, __u32 levels,
683 __u64 *start_addr)
684{
685 struct tce_iommu_group *tcegrp;
686 struct iommu_table_group *table_group;
687 struct iommu_table *tbl = NULL;
688 long ret, num;
689
690 num = tce_iommu_find_free_table(container);
691 if (num < 0)
692 return num;
693
694 /* Get the first group for ops::create_table */
695 tcegrp = list_first_entry(&container->group_list,
696 struct tce_iommu_group, next);
697 table_group = iommu_group_get_iommudata(tcegrp->grp);
698 if (!table_group)
699 return -EFAULT;
700
701 if (!(table_group->pgsizes & (1ULL << page_shift)))
702 return -EINVAL;
703
704 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
705 !table_group->ops->get_table_size ||
706 !table_group->ops->create_table)
707 return -EPERM;
708
709 /* Create TCE table */
710 ret = tce_iommu_create_table(container, table_group, num,
711 page_shift, window_size, levels, &tbl);
712 if (ret)
713 return ret;
714
715 BUG_ON(!tbl->it_ops->free);
716
717 /*
718 * Program the table to every group.
719 * Groups have been tested for compatibility at the attach time.
720 */
721 list_for_each_entry(tcegrp, &container->group_list, next) {
722 table_group = iommu_group_get_iommudata(tcegrp->grp);
723
724 ret = table_group->ops->set_window(table_group, num, tbl);
725 if (ret)
726 goto unset_exit;
727 }
728
729 container->tables[num] = tbl;
730
731 /* Return start address assigned by platform in create_table() */
732 *start_addr = tbl->it_offset << tbl->it_page_shift;
733
734 return 0;
735
736unset_exit:
737 list_for_each_entry(tcegrp, &container->group_list, next) {
738 table_group = iommu_group_get_iommudata(tcegrp->grp);
739 table_group->ops->unset_window(table_group, num);
740 }
bc82d122 741 tce_iommu_free_table(container, tbl);
e633bc86
AK
742
743 return ret;
744}
745
746static long tce_iommu_remove_window(struct tce_container *container,
747 __u64 start_addr)
748{
749 struct iommu_table_group *table_group = NULL;
750 struct iommu_table *tbl;
751 struct tce_iommu_group *tcegrp;
752 int num;
753
754 num = tce_iommu_find_table(container, start_addr, &tbl);
755 if (num < 0)
756 return -EINVAL;
757
758 BUG_ON(!tbl->it_size);
759
760 /* Detach groups from IOMMUs */
761 list_for_each_entry(tcegrp, &container->group_list, next) {
762 table_group = iommu_group_get_iommudata(tcegrp->grp);
763
764 /*
765 * SPAPR TCE IOMMU exposes the default DMA window to
766 * the guest via dma32_window_start/size of
767 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
768 * the userspace to remove this window, some do not so
769 * here we check for the platform capability.
770 */
771 if (!table_group->ops || !table_group->ops->unset_window)
772 return -EPERM;
773
774 table_group->ops->unset_window(table_group, num);
775 }
776
777 /* Free table */
778 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
bc82d122 779 tce_iommu_free_table(container, tbl);
e633bc86
AK
780 container->tables[num] = NULL;
781
782 return 0;
783}
784
6f01cc69
AK
785static long tce_iommu_create_default_window(struct tce_container *container)
786{
787 long ret;
788 __u64 start_addr = 0;
789 struct tce_iommu_group *tcegrp;
790 struct iommu_table_group *table_group;
791
d9c72894
AK
792 if (!container->def_window_pending)
793 return 0;
794
6f01cc69
AK
795 if (!tce_groups_attached(container))
796 return -ENODEV;
797
798 tcegrp = list_first_entry(&container->group_list,
799 struct tce_iommu_group, next);
800 table_group = iommu_group_get_iommudata(tcegrp->grp);
801 if (!table_group)
802 return -ENODEV;
803
804 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
805 table_group->tce32_size, 1, &start_addr);
806 WARN_ON_ONCE(!ret && start_addr);
807
d9c72894
AK
808 if (!ret)
809 container->def_window_pending = false;
810
6f01cc69
AK
811 return ret;
812}
813
5ffd229c
AK
814static long tce_iommu_ioctl(void *iommu_data,
815 unsigned int cmd, unsigned long arg)
816{
817 struct tce_container *container = iommu_data;
e633bc86 818 unsigned long minsz, ddwsz;
5ffd229c
AK
819 long ret;
820
821 switch (cmd) {
822 case VFIO_CHECK_EXTENSION:
1b69be5e
GS
823 switch (arg) {
824 case VFIO_SPAPR_TCE_IOMMU:
2157e7b8 825 case VFIO_SPAPR_TCE_v2_IOMMU:
1b69be5e
GS
826 ret = 1;
827 break;
828 default:
829 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
830 break;
831 }
832
833 return (ret < 0) ? 0 : ret;
bc82d122
AK
834 }
835
836 /*
837 * Sanity check to prevent one userspace from manipulating
838 * another userspace mm.
839 */
840 BUG_ON(!container);
841 if (container->mm && container->mm != current->mm)
842 return -EPERM;
5ffd229c 843
bc82d122 844 switch (cmd) {
5ffd229c
AK
845 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
846 struct vfio_iommu_spapr_tce_info info;
2157e7b8 847 struct tce_iommu_group *tcegrp;
0eaf4def
AK
848 struct iommu_table_group *table_group;
849
2157e7b8 850 if (!tce_groups_attached(container))
0eaf4def
AK
851 return -ENXIO;
852
2157e7b8
AK
853 tcegrp = list_first_entry(&container->group_list,
854 struct tce_iommu_group, next);
855 table_group = iommu_group_get_iommudata(tcegrp->grp);
5ffd229c 856
4793d65d 857 if (!table_group)
5ffd229c
AK
858 return -ENXIO;
859
860 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
861 dma32_window_size);
862
863 if (copy_from_user(&info, (void __user *)arg, minsz))
864 return -EFAULT;
865
866 if (info.argsz < minsz)
867 return -EINVAL;
868
4793d65d
AK
869 info.dma32_window_start = table_group->tce32_start;
870 info.dma32_window_size = table_group->tce32_size;
5ffd229c 871 info.flags = 0;
e633bc86
AK
872 memset(&info.ddw, 0, sizeof(info.ddw));
873
874 if (table_group->max_dynamic_windows_supported &&
875 container->v2) {
876 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
877 info.ddw.pgsizes = table_group->pgsizes;
878 info.ddw.max_dynamic_windows_supported =
879 table_group->max_dynamic_windows_supported;
880 info.ddw.levels = table_group->max_levels;
881 }
882
883 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
884
885 if (info.argsz >= ddwsz)
886 minsz = ddwsz;
5ffd229c
AK
887
888 if (copy_to_user((void __user *)arg, &info, minsz))
889 return -EFAULT;
890
891 return 0;
892 }
893 case VFIO_IOMMU_MAP_DMA: {
894 struct vfio_iommu_type1_dma_map param;
0eaf4def 895 struct iommu_table *tbl = NULL;
0eaf4def 896 long num;
05c6cfb9 897 enum dma_data_direction direction;
5ffd229c 898
3c56e822
AK
899 if (!container->enabled)
900 return -EPERM;
901
5ffd229c
AK
902 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
903
904 if (copy_from_user(&param, (void __user *)arg, minsz))
905 return -EFAULT;
906
907 if (param.argsz < minsz)
908 return -EINVAL;
909
910 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
911 VFIO_DMA_MAP_FLAG_WRITE))
912 return -EINVAL;
913
d9c72894
AK
914 ret = tce_iommu_create_default_window(container);
915 if (ret)
916 return ret;
917
0eaf4def
AK
918 num = tce_iommu_find_table(container, param.iova, &tbl);
919 if (num < 0)
920 return -ENXIO;
921
00663d4e
AK
922 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
923 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
5ffd229c
AK
924 return -EINVAL;
925
926 /* iova is checked by the IOMMU API */
05c6cfb9
AK
927 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
928 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
929 direction = DMA_BIDIRECTIONAL;
930 else
931 direction = DMA_TO_DEVICE;
932 } else {
933 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
934 direction = DMA_FROM_DEVICE;
935 else
936 return -EINVAL;
937 }
5ffd229c 938
05c6cfb9 939 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
5ffd229c
AK
940 if (ret)
941 return ret;
942
2157e7b8
AK
943 if (container->v2)
944 ret = tce_iommu_build_v2(container, tbl,
945 param.iova >> tbl->it_page_shift,
946 param.vaddr,
947 param.size >> tbl->it_page_shift,
948 direction);
949 else
950 ret = tce_iommu_build(container, tbl,
951 param.iova >> tbl->it_page_shift,
952 param.vaddr,
953 param.size >> tbl->it_page_shift,
954 direction);
5ffd229c
AK
955
956 iommu_flush_tce(tbl);
957
958 return ret;
959 }
960 case VFIO_IOMMU_UNMAP_DMA: {
961 struct vfio_iommu_type1_dma_unmap param;
0eaf4def
AK
962 struct iommu_table *tbl = NULL;
963 long num;
5ffd229c 964
3c56e822
AK
965 if (!container->enabled)
966 return -EPERM;
967
5ffd229c
AK
968 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
969 size);
970
971 if (copy_from_user(&param, (void __user *)arg, minsz))
972 return -EFAULT;
973
974 if (param.argsz < minsz)
975 return -EINVAL;
976
977 /* No flag is supported now */
978 if (param.flags)
979 return -EINVAL;
980
d9c72894
AK
981 ret = tce_iommu_create_default_window(container);
982 if (ret)
983 return ret;
984
0eaf4def
AK
985 num = tce_iommu_find_table(container, param.iova, &tbl);
986 if (num < 0)
987 return -ENXIO;
988
00663d4e 989 if (param.size & ~IOMMU_PAGE_MASK(tbl))
5ffd229c
AK
990 return -EINVAL;
991
992 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
00663d4e 993 param.size >> tbl->it_page_shift);
5ffd229c
AK
994 if (ret)
995 return ret;
996
9b14a1ff 997 ret = tce_iommu_clear(container, tbl,
00663d4e
AK
998 param.iova >> tbl->it_page_shift,
999 param.size >> tbl->it_page_shift);
5ffd229c
AK
1000 iommu_flush_tce(tbl);
1001
1002 return ret;
1003 }
2157e7b8
AK
1004 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
1005 struct vfio_iommu_spapr_register_memory param;
1006
1007 if (!container->v2)
1008 break;
1009
1010 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1011 size);
1012
bc82d122
AK
1013 ret = tce_iommu_mm_set(container);
1014 if (ret)
1015 return ret;
1016
2157e7b8
AK
1017 if (copy_from_user(&param, (void __user *)arg, minsz))
1018 return -EFAULT;
1019
1020 if (param.argsz < minsz)
1021 return -EINVAL;
1022
1023 /* No flag is supported now */
1024 if (param.flags)
1025 return -EINVAL;
1026
1027 mutex_lock(&container->lock);
1028 ret = tce_iommu_register_pages(container, param.vaddr,
1029 param.size);
1030 mutex_unlock(&container->lock);
1031
1032 return ret;
1033 }
1034 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
1035 struct vfio_iommu_spapr_register_memory param;
1036
1037 if (!container->v2)
1038 break;
1039
bc82d122
AK
1040 if (!container->mm)
1041 return -EPERM;
1042
2157e7b8
AK
1043 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1044 size);
1045
1046 if (copy_from_user(&param, (void __user *)arg, minsz))
1047 return -EFAULT;
1048
1049 if (param.argsz < minsz)
1050 return -EINVAL;
1051
1052 /* No flag is supported now */
1053 if (param.flags)
1054 return -EINVAL;
1055
1056 mutex_lock(&container->lock);
1057 ret = tce_iommu_unregister_pages(container, param.vaddr,
1058 param.size);
1059 mutex_unlock(&container->lock);
1060
1061 return ret;
1062 }
5ffd229c 1063 case VFIO_IOMMU_ENABLE:
2157e7b8
AK
1064 if (container->v2)
1065 break;
1066
5ffd229c
AK
1067 mutex_lock(&container->lock);
1068 ret = tce_iommu_enable(container);
1069 mutex_unlock(&container->lock);
1070 return ret;
1071
1072
1073 case VFIO_IOMMU_DISABLE:
2157e7b8
AK
1074 if (container->v2)
1075 break;
1076
5ffd229c
AK
1077 mutex_lock(&container->lock);
1078 tce_iommu_disable(container);
1079 mutex_unlock(&container->lock);
1080 return 0;
1b69be5e 1081
2157e7b8
AK
1082 case VFIO_EEH_PE_OP: {
1083 struct tce_iommu_group *tcegrp;
1084
1085 ret = 0;
1086 list_for_each_entry(tcegrp, &container->group_list, next) {
1087 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1088 cmd, arg);
1089 if (ret)
1090 return ret;
1091 }
1092 return ret;
1093 }
1094
e633bc86
AK
1095 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1096 struct vfio_iommu_spapr_tce_create create;
1097
1098 if (!container->v2)
1099 break;
1100
bc82d122
AK
1101 ret = tce_iommu_mm_set(container);
1102 if (ret)
1103 return ret;
1104
e633bc86
AK
1105 if (!tce_groups_attached(container))
1106 return -ENXIO;
1107
1108 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1109 start_addr);
1110
1111 if (copy_from_user(&create, (void __user *)arg, minsz))
1112 return -EFAULT;
1113
1114 if (create.argsz < minsz)
1115 return -EINVAL;
1116
1117 if (create.flags)
1118 return -EINVAL;
1119
1120 mutex_lock(&container->lock);
1121
d9c72894 1122 ret = tce_iommu_create_default_window(container);
2da64d20
AK
1123 if (!ret)
1124 ret = tce_iommu_create_window(container,
1125 create.page_shift,
1126 create.window_size, create.levels,
1127 &create.start_addr);
e633bc86
AK
1128
1129 mutex_unlock(&container->lock);
1130
1131 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1132 ret = -EFAULT;
1133
1134 return ret;
1135 }
1136 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1137 struct vfio_iommu_spapr_tce_remove remove;
1138
1139 if (!container->v2)
1140 break;
1141
bc82d122
AK
1142 ret = tce_iommu_mm_set(container);
1143 if (ret)
1144 return ret;
1145
e633bc86
AK
1146 if (!tce_groups_attached(container))
1147 return -ENXIO;
1148
1149 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1150 start_addr);
1151
1152 if (copy_from_user(&remove, (void __user *)arg, minsz))
1153 return -EFAULT;
1154
1155 if (remove.argsz < minsz)
1156 return -EINVAL;
1157
1158 if (remove.flags)
1159 return -EINVAL;
1160
d9c72894
AK
1161 if (container->def_window_pending && !remove.start_addr) {
1162 container->def_window_pending = false;
1163 return 0;
1164 }
1165
e633bc86
AK
1166 mutex_lock(&container->lock);
1167
1168 ret = tce_iommu_remove_window(container, remove.start_addr);
1169
1170 mutex_unlock(&container->lock);
1171
1172 return ret;
1173 }
5ffd229c
AK
1174 }
1175
1176 return -ENOTTY;
1177}
1178
f87a8864
AK
1179static void tce_iommu_release_ownership(struct tce_container *container,
1180 struct iommu_table_group *table_group)
1181{
1182 int i;
1183
1184 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
2157e7b8 1185 struct iommu_table *tbl = container->tables[i];
f87a8864
AK
1186
1187 if (!tbl)
1188 continue;
1189
1190 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1191 if (tbl->it_map)
1192 iommu_release_ownership(tbl);
2157e7b8
AK
1193
1194 container->tables[i] = NULL;
f87a8864
AK
1195 }
1196}
1197
1198static int tce_iommu_take_ownership(struct tce_container *container,
1199 struct iommu_table_group *table_group)
1200{
1201 int i, j, rc = 0;
1202
1203 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1204 struct iommu_table *tbl = table_group->tables[i];
1205
1206 if (!tbl || !tbl->it_map)
1207 continue;
1208
39701e56 1209 rc = iommu_take_ownership(tbl);
f87a8864
AK
1210 if (rc) {
1211 for (j = 0; j < i; ++j)
1212 iommu_release_ownership(
1213 table_group->tables[j]);
1214
1215 return rc;
1216 }
1217 }
1218
2157e7b8
AK
1219 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1220 container->tables[i] = table_group->tables[i];
1221
f87a8864
AK
1222 return 0;
1223}
1224
1225static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1226 struct iommu_table_group *table_group)
1227{
46d3e1e1
AK
1228 long i;
1229
1230 if (!table_group->ops->unset_window) {
1231 WARN_ON_ONCE(1);
1232 return;
1233 }
1234
2157e7b8 1235 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
a3906855
AK
1236 if (container->tables[i])
1237 table_group->ops->unset_window(table_group, i);
46d3e1e1 1238
f87a8864
AK
1239 table_group->ops->release_ownership(table_group);
1240}
1241
1242static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1243 struct iommu_table_group *table_group)
1244{
930a42de
AK
1245 long i, ret = 0;
1246
46d3e1e1
AK
1247 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1248 !table_group->ops->release_ownership) {
1249 WARN_ON_ONCE(1);
1250 return -EFAULT;
1251 }
1252
f87a8864
AK
1253 table_group->ops->take_ownership(table_group);
1254
930a42de
AK
1255 /* Set all windows to the new group */
1256 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1257 struct iommu_table *tbl = container->tables[i];
1258
1259 if (!tbl)
1260 continue;
1261
1262 ret = table_group->ops->set_window(table_group, i, tbl);
1263 if (ret)
1264 goto release_exit;
1265 }
1266
2157e7b8 1267 return 0;
930a42de
AK
1268
1269release_exit:
1270 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1271 table_group->ops->unset_window(table_group, i);
1272
1273 table_group->ops->release_ownership(table_group);
1274
1275 return ret;
f87a8864
AK
1276}
1277
5ffd229c
AK
1278static int tce_iommu_attach_group(void *iommu_data,
1279 struct iommu_group *iommu_group)
1280{
1281 int ret;
1282 struct tce_container *container = iommu_data;
0eaf4def 1283 struct iommu_table_group *table_group;
2157e7b8 1284 struct tce_iommu_group *tcegrp = NULL;
5ffd229c 1285
5ffd229c
AK
1286 mutex_lock(&container->lock);
1287
1288 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
1289 iommu_group_id(iommu_group), iommu_group); */
2157e7b8 1290 table_group = iommu_group_get_iommudata(iommu_group);
bd00fdf1
GK
1291 if (!table_group) {
1292 ret = -ENODEV;
1293 goto unlock_exit;
1294 }
2157e7b8
AK
1295
1296 if (tce_groups_attached(container) && (!table_group->ops ||
1297 !table_group->ops->take_ownership ||
1298 !table_group->ops->release_ownership)) {
5ffd229c 1299 ret = -EBUSY;
22af4859
AK
1300 goto unlock_exit;
1301 }
1302
2157e7b8
AK
1303 /* Check if new group has the same iommu_ops (i.e. compatible) */
1304 list_for_each_entry(tcegrp, &container->group_list, next) {
1305 struct iommu_table_group *table_group_tmp;
1306
1307 if (tcegrp->grp == iommu_group) {
1308 pr_warn("tce_vfio: Group %d is already attached\n",
1309 iommu_group_id(iommu_group));
1310 ret = -EBUSY;
1311 goto unlock_exit;
1312 }
1313 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
54de285b
AK
1314 if (table_group_tmp->ops->create_table !=
1315 table_group->ops->create_table) {
2157e7b8
AK
1316 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1317 iommu_group_id(iommu_group),
1318 iommu_group_id(tcegrp->grp));
1319 ret = -EPERM;
1320 goto unlock_exit;
1321 }
5ffd229c
AK
1322 }
1323
2157e7b8
AK
1324 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1325 if (!tcegrp) {
1326 ret = -ENOMEM;
0eaf4def
AK
1327 goto unlock_exit;
1328 }
1329
f87a8864 1330 if (!table_group->ops || !table_group->ops->take_ownership ||
6f01cc69 1331 !table_group->ops->release_ownership) {
1282ba7f
AK
1332 if (container->v2) {
1333 ret = -EPERM;
1334 goto unlock_exit;
1335 }
f87a8864 1336 ret = tce_iommu_take_ownership(container, table_group);
6f01cc69 1337 } else {
1282ba7f
AK
1338 if (!container->v2) {
1339 ret = -EPERM;
1340 goto unlock_exit;
1341 }
f87a8864 1342 ret = tce_iommu_take_ownership_ddw(container, table_group);
6f01cc69 1343 if (!tce_groups_attached(container) && !container->tables[0])
d9c72894 1344 container->def_window_pending = true;
6f01cc69 1345 }
f87a8864 1346
2157e7b8
AK
1347 if (!ret) {
1348 tcegrp->grp = iommu_group;
1349 list_add(&tcegrp->next, &container->group_list);
1350 }
22af4859
AK
1351
1352unlock_exit:
2157e7b8
AK
1353 if (ret && tcegrp)
1354 kfree(tcegrp);
1355
5ffd229c
AK
1356 mutex_unlock(&container->lock);
1357
1358 return ret;
1359}
1360
1361static void tce_iommu_detach_group(void *iommu_data,
1362 struct iommu_group *iommu_group)
1363{
1364 struct tce_container *container = iommu_data;
0eaf4def 1365 struct iommu_table_group *table_group;
2157e7b8
AK
1366 bool found = false;
1367 struct tce_iommu_group *tcegrp;
5ffd229c 1368
5ffd229c 1369 mutex_lock(&container->lock);
2157e7b8
AK
1370
1371 list_for_each_entry(tcegrp, &container->group_list, next) {
1372 if (tcegrp->grp == iommu_group) {
1373 found = true;
1374 break;
1375 }
22af4859 1376 }
5ffd229c 1377
2157e7b8
AK
1378 if (!found) {
1379 pr_warn("tce_vfio: detaching unattached group #%u\n",
1380 iommu_group_id(iommu_group));
1381 goto unlock_exit;
5ffd229c 1382 }
22af4859 1383
2157e7b8
AK
1384 list_del(&tcegrp->next);
1385 kfree(tcegrp);
0eaf4def
AK
1386
1387 table_group = iommu_group_get_iommudata(iommu_group);
1388 BUG_ON(!table_group);
1389
f87a8864
AK
1390 if (!table_group->ops || !table_group->ops->release_ownership)
1391 tce_iommu_release_ownership(container, table_group);
1392 else
1393 tce_iommu_release_ownership_ddw(container, table_group);
22af4859
AK
1394
1395unlock_exit:
5ffd229c
AK
1396 mutex_unlock(&container->lock);
1397}
1398
e39dd513 1399static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
5ffd229c
AK
1400 .name = "iommu-vfio-powerpc",
1401 .owner = THIS_MODULE,
1402 .open = tce_iommu_open,
1403 .release = tce_iommu_release,
1404 .ioctl = tce_iommu_ioctl,
1405 .attach_group = tce_iommu_attach_group,
1406 .detach_group = tce_iommu_detach_group,
1407};
1408
1409static int __init tce_iommu_init(void)
1410{
1411 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1412}
1413
1414static void __exit tce_iommu_cleanup(void)
1415{
1416 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1417}
1418
1419module_init(tce_iommu_init);
1420module_exit(tce_iommu_cleanup);
1421
1422MODULE_VERSION(DRIVER_VERSION);
1423MODULE_LICENSE("GPL v2");
1424MODULE_AUTHOR(DRIVER_AUTHOR);
1425MODULE_DESCRIPTION(DRIVER_DESC);
1426