Commit | Line | Data |
---|---|---|
45051539 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
e3c495c7 JR |
2 | /* |
3 | * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. | |
63ce3ae8 | 4 | * Author: Joerg Roedel <jroedel@suse.de> |
e3c495c7 JR |
5 | */ |
6 | ||
101fa037 JR |
7 | #define pr_fmt(fmt) "AMD-Vi: " fmt |
8 | ||
8736b2c3 | 9 | #include <linux/mmu_notifier.h> |
ed96f228 JR |
10 | #include <linux/amd-iommu.h> |
11 | #include <linux/mm_types.h> | |
8736b2c3 | 12 | #include <linux/profile.h> |
e3c495c7 | 13 | #include <linux/module.h> |
2d5503b6 | 14 | #include <linux/sched.h> |
6e84f315 | 15 | #include <linux/sched/mm.h> |
ed96f228 | 16 | #include <linux/iommu.h> |
028eeacc | 17 | #include <linux/wait.h> |
ed96f228 JR |
18 | #include <linux/pci.h> |
19 | #include <linux/gfp.h> | |
20 | ||
028eeacc | 21 | #include "amd_iommu_types.h" |
ed96f228 | 22 | #include "amd_iommu_proto.h" |
e3c495c7 JR |
23 | |
24 | MODULE_LICENSE("GPL v2"); | |
63ce3ae8 | 25 | MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); |
e3c495c7 | 26 | |
ed96f228 JR |
27 | #define MAX_DEVICES 0x10000 |
28 | #define PRI_QUEUE_SIZE 512 | |
29 | ||
30 | struct pri_queue { | |
31 | atomic_t inflight; | |
32 | bool finish; | |
028eeacc | 33 | int status; |
ed96f228 JR |
34 | }; |
35 | ||
36 | struct pasid_state { | |
37 | struct list_head list; /* For global state-list */ | |
38 | atomic_t count; /* Reference count */ | |
d73a6d72 | 39 | unsigned mmu_notifier_count; /* Counting nested mmu_notifier |
e79df31c | 40 | calls */ |
ed96f228 | 41 | struct mm_struct *mm; /* mm_struct for the faults */ |
ff6d0cce | 42 | struct mmu_notifier mn; /* mmu_notifier handle */ |
ed96f228 JR |
43 | struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ |
44 | struct device_state *device_state; /* Link to our device_state */ | |
45 | int pasid; /* PASID index */ | |
d9e1611e JR |
46 | bool invalid; /* Used during setup and |
47 | teardown of the pasid */ | |
d73a6d72 JR |
48 | spinlock_t lock; /* Protect pri_queues and |
49 | mmu_notifer_count */ | |
028eeacc | 50 | wait_queue_head_t wq; /* To wait for count == 0 */ |
ed96f228 JR |
51 | }; |
52 | ||
53 | struct device_state { | |
741669c7 JR |
54 | struct list_head list; |
55 | u16 devid; | |
ed96f228 JR |
56 | atomic_t count; |
57 | struct pci_dev *pdev; | |
58 | struct pasid_state **states; | |
59 | struct iommu_domain *domain; | |
60 | int pasid_levels; | |
61 | int max_pasids; | |
175d6146 | 62 | amd_iommu_invalid_ppr_cb inv_ppr_cb; |
bc21662f | 63 | amd_iommu_invalidate_ctx inv_ctx_cb; |
ed96f228 | 64 | spinlock_t lock; |
028eeacc JR |
65 | wait_queue_head_t wq; |
66 | }; | |
67 | ||
68 | struct fault { | |
69 | struct work_struct work; | |
70 | struct device_state *dev_state; | |
71 | struct pasid_state *state; | |
72 | struct mm_struct *mm; | |
73 | u64 address; | |
74 | u16 devid; | |
75 | u16 pasid; | |
76 | u16 tag; | |
77 | u16 finish; | |
78 | u16 flags; | |
ed96f228 JR |
79 | }; |
80 | ||
741669c7 | 81 | static LIST_HEAD(state_list); |
ed96f228 JR |
82 | static spinlock_t state_lock; |
83 | ||
028eeacc JR |
84 | static struct workqueue_struct *iommu_wq; |
85 | ||
2d5503b6 | 86 | static void free_pasid_states(struct device_state *dev_state); |
ed96f228 JR |
87 | |
88 | static u16 device_id(struct pci_dev *pdev) | |
89 | { | |
90 | u16 devid; | |
91 | ||
92 | devid = pdev->bus->number; | |
93 | devid = (devid << 8) | pdev->devfn; | |
94 | ||
95 | return devid; | |
96 | } | |
97 | ||
b87d2d7c JR |
98 | static struct device_state *__get_device_state(u16 devid) |
99 | { | |
741669c7 JR |
100 | struct device_state *dev_state; |
101 | ||
102 | list_for_each_entry(dev_state, &state_list, list) { | |
103 | if (dev_state->devid == devid) | |
104 | return dev_state; | |
105 | } | |
106 | ||
107 | return NULL; | |
b87d2d7c JR |
108 | } |
109 | ||
ed96f228 JR |
110 | static struct device_state *get_device_state(u16 devid) |
111 | { | |
112 | struct device_state *dev_state; | |
113 | unsigned long flags; | |
114 | ||
115 | spin_lock_irqsave(&state_lock, flags); | |
b87d2d7c | 116 | dev_state = __get_device_state(devid); |
ed96f228 JR |
117 | if (dev_state != NULL) |
118 | atomic_inc(&dev_state->count); | |
119 | spin_unlock_irqrestore(&state_lock, flags); | |
120 | ||
121 | return dev_state; | |
122 | } | |
123 | ||
124 | static void free_device_state(struct device_state *dev_state) | |
125 | { | |
55c99a4d JR |
126 | struct iommu_group *group; |
127 | ||
2d5503b6 JR |
128 | /* |
129 | * First detach device from domain - No more PRI requests will arrive | |
130 | * from that device after it is unbound from the IOMMUv2 domain. | |
131 | */ | |
55c99a4d JR |
132 | group = iommu_group_get(&dev_state->pdev->dev); |
133 | if (WARN_ON(!group)) | |
134 | return; | |
135 | ||
136 | iommu_detach_group(dev_state->domain, group); | |
137 | ||
138 | iommu_group_put(group); | |
2d5503b6 JR |
139 | |
140 | /* Everything is down now, free the IOMMUv2 domain */ | |
ed96f228 | 141 | iommu_domain_free(dev_state->domain); |
2d5503b6 JR |
142 | |
143 | /* Finally get rid of the device-state */ | |
ed96f228 JR |
144 | kfree(dev_state); |
145 | } | |
146 | ||
147 | static void put_device_state(struct device_state *dev_state) | |
148 | { | |
149 | if (atomic_dec_and_test(&dev_state->count)) | |
028eeacc | 150 | wake_up(&dev_state->wq); |
ed96f228 JR |
151 | } |
152 | ||
2d5503b6 JR |
153 | /* Must be called under dev_state->lock */ |
154 | static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, | |
155 | int pasid, bool alloc) | |
156 | { | |
157 | struct pasid_state **root, **ptr; | |
158 | int level, index; | |
159 | ||
160 | level = dev_state->pasid_levels; | |
161 | root = dev_state->states; | |
162 | ||
163 | while (true) { | |
164 | ||
165 | index = (pasid >> (9 * level)) & 0x1ff; | |
166 | ptr = &root[index]; | |
167 | ||
168 | if (level == 0) | |
169 | break; | |
170 | ||
171 | if (*ptr == NULL) { | |
172 | if (!alloc) | |
173 | return NULL; | |
174 | ||
175 | *ptr = (void *)get_zeroed_page(GFP_ATOMIC); | |
176 | if (*ptr == NULL) | |
177 | return NULL; | |
178 | } | |
179 | ||
180 | root = (struct pasid_state **)*ptr; | |
181 | level -= 1; | |
182 | } | |
183 | ||
184 | return ptr; | |
185 | } | |
186 | ||
187 | static int set_pasid_state(struct device_state *dev_state, | |
188 | struct pasid_state *pasid_state, | |
189 | int pasid) | |
190 | { | |
191 | struct pasid_state **ptr; | |
192 | unsigned long flags; | |
193 | int ret; | |
194 | ||
195 | spin_lock_irqsave(&dev_state->lock, flags); | |
196 | ptr = __get_pasid_state_ptr(dev_state, pasid, true); | |
197 | ||
198 | ret = -ENOMEM; | |
199 | if (ptr == NULL) | |
200 | goto out_unlock; | |
201 | ||
202 | ret = -ENOMEM; | |
203 | if (*ptr != NULL) | |
204 | goto out_unlock; | |
205 | ||
206 | *ptr = pasid_state; | |
207 | ||
208 | ret = 0; | |
209 | ||
210 | out_unlock: | |
211 | spin_unlock_irqrestore(&dev_state->lock, flags); | |
212 | ||
213 | return ret; | |
214 | } | |
215 | ||
216 | static void clear_pasid_state(struct device_state *dev_state, int pasid) | |
217 | { | |
218 | struct pasid_state **ptr; | |
219 | unsigned long flags; | |
220 | ||
221 | spin_lock_irqsave(&dev_state->lock, flags); | |
222 | ptr = __get_pasid_state_ptr(dev_state, pasid, true); | |
223 | ||
224 | if (ptr == NULL) | |
225 | goto out_unlock; | |
226 | ||
227 | *ptr = NULL; | |
228 | ||
229 | out_unlock: | |
230 | spin_unlock_irqrestore(&dev_state->lock, flags); | |
231 | } | |
232 | ||
233 | static struct pasid_state *get_pasid_state(struct device_state *dev_state, | |
234 | int pasid) | |
235 | { | |
236 | struct pasid_state **ptr, *ret = NULL; | |
237 | unsigned long flags; | |
238 | ||
239 | spin_lock_irqsave(&dev_state->lock, flags); | |
240 | ptr = __get_pasid_state_ptr(dev_state, pasid, false); | |
241 | ||
242 | if (ptr == NULL) | |
243 | goto out_unlock; | |
244 | ||
245 | ret = *ptr; | |
246 | if (ret) | |
247 | atomic_inc(&ret->count); | |
248 | ||
249 | out_unlock: | |
250 | spin_unlock_irqrestore(&dev_state->lock, flags); | |
251 | ||
252 | return ret; | |
253 | } | |
254 | ||
255 | static void free_pasid_state(struct pasid_state *pasid_state) | |
256 | { | |
257 | kfree(pasid_state); | |
258 | } | |
259 | ||
260 | static void put_pasid_state(struct pasid_state *pasid_state) | |
261 | { | |
1c51099a | 262 | if (atomic_dec_and_test(&pasid_state->count)) |
028eeacc | 263 | wake_up(&pasid_state->wq); |
2d5503b6 JR |
264 | } |
265 | ||
028eeacc JR |
266 | static void put_pasid_state_wait(struct pasid_state *pasid_state) |
267 | { | |
1bf1b431 | 268 | atomic_dec(&pasid_state->count); |
a1bec062 | 269 | wait_event(pasid_state->wq, !atomic_read(&pasid_state->count)); |
028eeacc JR |
270 | free_pasid_state(pasid_state); |
271 | } | |
272 | ||
61feb438 | 273 | static void unbind_pasid(struct pasid_state *pasid_state) |
8736b2c3 JR |
274 | { |
275 | struct iommu_domain *domain; | |
276 | ||
277 | domain = pasid_state->device_state->domain; | |
278 | ||
53d340ef JR |
279 | /* |
280 | * Mark pasid_state as invalid, no more faults will we added to the | |
281 | * work queue after this is visible everywhere. | |
282 | */ | |
283 | pasid_state->invalid = true; | |
284 | ||
285 | /* Make sure this is visible */ | |
286 | smp_wmb(); | |
287 | ||
288 | /* After this the device/pasid can't access the mm anymore */ | |
8736b2c3 | 289 | amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); |
8736b2c3 JR |
290 | |
291 | /* Make sure no more pending faults are in the queue */ | |
292 | flush_workqueue(iommu_wq); | |
8736b2c3 JR |
293 | } |
294 | ||
2d5503b6 JR |
295 | static void free_pasid_states_level1(struct pasid_state **tbl) |
296 | { | |
297 | int i; | |
298 | ||
299 | for (i = 0; i < 512; ++i) { | |
300 | if (tbl[i] == NULL) | |
301 | continue; | |
302 | ||
303 | free_page((unsigned long)tbl[i]); | |
304 | } | |
305 | } | |
306 | ||
307 | static void free_pasid_states_level2(struct pasid_state **tbl) | |
308 | { | |
309 | struct pasid_state **ptr; | |
310 | int i; | |
311 | ||
312 | for (i = 0; i < 512; ++i) { | |
313 | if (tbl[i] == NULL) | |
314 | continue; | |
315 | ||
316 | ptr = (struct pasid_state **)tbl[i]; | |
317 | free_pasid_states_level1(ptr); | |
318 | } | |
319 | } | |
320 | ||
321 | static void free_pasid_states(struct device_state *dev_state) | |
322 | { | |
323 | struct pasid_state *pasid_state; | |
324 | int i; | |
325 | ||
326 | for (i = 0; i < dev_state->max_pasids; ++i) { | |
327 | pasid_state = get_pasid_state(dev_state, i); | |
328 | if (pasid_state == NULL) | |
329 | continue; | |
330 | ||
2d5503b6 | 331 | put_pasid_state(pasid_state); |
a40d4c67 JR |
332 | |
333 | /* | |
334 | * This will call the mn_release function and | |
335 | * unbind the PASID | |
336 | */ | |
337 | mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); | |
c5db16ad JR |
338 | |
339 | put_pasid_state_wait(pasid_state); /* Reference taken in | |
daff2f9c | 340 | amd_iommu_bind_pasid */ |
75058a30 JR |
341 | |
342 | /* Drop reference taken in amd_iommu_bind_pasid */ | |
343 | put_device_state(dev_state); | |
2d5503b6 JR |
344 | } |
345 | ||
346 | if (dev_state->pasid_levels == 2) | |
347 | free_pasid_states_level2(dev_state->states); | |
348 | else if (dev_state->pasid_levels == 1) | |
349 | free_pasid_states_level1(dev_state->states); | |
23d3a98c JR |
350 | else |
351 | BUG_ON(dev_state->pasid_levels != 0); | |
2d5503b6 JR |
352 | |
353 | free_page((unsigned long)dev_state->states); | |
354 | } | |
355 | ||
8736b2c3 JR |
356 | static struct pasid_state *mn_to_state(struct mmu_notifier *mn) |
357 | { | |
358 | return container_of(mn, struct pasid_state, mn); | |
359 | } | |
360 | ||
e7cc3dd4 JR |
361 | static void mn_invalidate_range(struct mmu_notifier *mn, |
362 | struct mm_struct *mm, | |
363 | unsigned long start, unsigned long end) | |
8736b2c3 JR |
364 | { |
365 | struct pasid_state *pasid_state; | |
366 | struct device_state *dev_state; | |
367 | ||
368 | pasid_state = mn_to_state(mn); | |
369 | dev_state = pasid_state->device_state; | |
370 | ||
e7cc3dd4 JR |
371 | if ((start ^ (end - 1)) < PAGE_SIZE) |
372 | amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, | |
373 | start); | |
374 | else | |
375 | amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid); | |
8736b2c3 JR |
376 | } |
377 | ||
a40d4c67 JR |
378 | static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) |
379 | { | |
380 | struct pasid_state *pasid_state; | |
381 | struct device_state *dev_state; | |
d9e1611e | 382 | bool run_inv_ctx_cb; |
a40d4c67 JR |
383 | |
384 | might_sleep(); | |
385 | ||
d9e1611e JR |
386 | pasid_state = mn_to_state(mn); |
387 | dev_state = pasid_state->device_state; | |
388 | run_inv_ctx_cb = !pasid_state->invalid; | |
a40d4c67 | 389 | |
940f700d | 390 | if (run_inv_ctx_cb && dev_state->inv_ctx_cb) |
a40d4c67 JR |
391 | dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); |
392 | ||
61feb438 | 393 | unbind_pasid(pasid_state); |
a40d4c67 JR |
394 | } |
395 | ||
759ce23b | 396 | static const struct mmu_notifier_ops iommu_mn = { |
a40d4c67 | 397 | .release = mn_release, |
e7cc3dd4 | 398 | .invalidate_range = mn_invalidate_range, |
8736b2c3 JR |
399 | }; |
400 | ||
028eeacc JR |
401 | static void set_pri_tag_status(struct pasid_state *pasid_state, |
402 | u16 tag, int status) | |
403 | { | |
404 | unsigned long flags; | |
405 | ||
406 | spin_lock_irqsave(&pasid_state->lock, flags); | |
407 | pasid_state->pri[tag].status = status; | |
408 | spin_unlock_irqrestore(&pasid_state->lock, flags); | |
409 | } | |
410 | ||
411 | static void finish_pri_tag(struct device_state *dev_state, | |
412 | struct pasid_state *pasid_state, | |
413 | u16 tag) | |
414 | { | |
415 | unsigned long flags; | |
416 | ||
417 | spin_lock_irqsave(&pasid_state->lock, flags); | |
418 | if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && | |
419 | pasid_state->pri[tag].finish) { | |
420 | amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, | |
421 | pasid_state->pri[tag].status, tag); | |
422 | pasid_state->pri[tag].finish = false; | |
423 | pasid_state->pri[tag].status = PPR_SUCCESS; | |
424 | } | |
425 | spin_unlock_irqrestore(&pasid_state->lock, flags); | |
426 | } | |
427 | ||
9dc00f4c JB |
428 | static void handle_fault_error(struct fault *fault) |
429 | { | |
430 | int status; | |
431 | ||
432 | if (!fault->dev_state->inv_ppr_cb) { | |
433 | set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); | |
434 | return; | |
435 | } | |
436 | ||
437 | status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, | |
438 | fault->pasid, | |
439 | fault->address, | |
440 | fault->flags); | |
441 | switch (status) { | |
442 | case AMD_IOMMU_INV_PRI_RSP_SUCCESS: | |
443 | set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); | |
444 | break; | |
445 | case AMD_IOMMU_INV_PRI_RSP_INVALID: | |
446 | set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); | |
447 | break; | |
448 | case AMD_IOMMU_INV_PRI_RSP_FAIL: | |
449 | set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); | |
450 | break; | |
451 | default: | |
452 | BUG(); | |
453 | } | |
454 | } | |
455 | ||
7b5cc1a9 JR |
456 | static bool access_error(struct vm_area_struct *vma, struct fault *fault) |
457 | { | |
458 | unsigned long requested = 0; | |
459 | ||
460 | if (fault->flags & PPR_FAULT_EXEC) | |
461 | requested |= VM_EXEC; | |
462 | ||
463 | if (fault->flags & PPR_FAULT_READ) | |
464 | requested |= VM_READ; | |
465 | ||
466 | if (fault->flags & PPR_FAULT_WRITE) | |
467 | requested |= VM_WRITE; | |
468 | ||
469 | return (requested & ~vma->vm_flags) != 0; | |
470 | } | |
471 | ||
028eeacc JR |
472 | static void do_fault(struct work_struct *work) |
473 | { | |
474 | struct fault *fault = container_of(work, struct fault, work); | |
9dc00f4c | 475 | struct vm_area_struct *vma; |
50a7ca3c | 476 | vm_fault_t ret = VM_FAULT_ERROR; |
43c0ea20 JR |
477 | unsigned int flags = 0; |
478 | struct mm_struct *mm; | |
9dc00f4c | 479 | u64 address; |
028eeacc | 480 | |
9dc00f4c JB |
481 | mm = fault->state->mm; |
482 | address = fault->address; | |
483 | ||
43c0ea20 JR |
484 | if (fault->flags & PPR_FAULT_USER) |
485 | flags |= FAULT_FLAG_USER; | |
486 | if (fault->flags & PPR_FAULT_WRITE) | |
487 | flags |= FAULT_FLAG_WRITE; | |
1b2ee126 | 488 | flags |= FAULT_FLAG_REMOTE; |
43c0ea20 | 489 | |
9dc00f4c JB |
490 | down_read(&mm->mmap_sem); |
491 | vma = find_extend_vma(mm, address); | |
492e7459 | 492 | if (!vma || address < vma->vm_start) |
9dc00f4c | 493 | /* failed to get a vma in the right range */ |
9dc00f4c | 494 | goto out; |
028eeacc | 495 | |
7b5cc1a9 | 496 | /* Check if we have the right permissions on the vma */ |
492e7459 | 497 | if (access_error(vma, fault)) |
d14f6fce | 498 | goto out; |
d14f6fce | 499 | |
dcddffd4 | 500 | ret = handle_mm_fault(vma, address, flags); |
492e7459 | 501 | out: |
9dc00f4c JB |
502 | up_read(&mm->mmap_sem); |
503 | ||
492e7459 JR |
504 | if (ret & VM_FAULT_ERROR) |
505 | /* failed to service fault */ | |
506 | handle_fault_error(fault); | |
507 | ||
028eeacc JR |
508 | finish_pri_tag(fault->dev_state, fault->state, fault->tag); |
509 | ||
510 | put_pasid_state(fault->state); | |
511 | ||
512 | kfree(fault); | |
513 | } | |
514 | ||
515 | static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) | |
516 | { | |
517 | struct amd_iommu_fault *iommu_fault; | |
518 | struct pasid_state *pasid_state; | |
519 | struct device_state *dev_state; | |
520 | unsigned long flags; | |
521 | struct fault *fault; | |
522 | bool finish; | |
daae2d25 | 523 | u16 tag, devid; |
028eeacc | 524 | int ret; |
daae2d25 BH |
525 | struct iommu_dev_data *dev_data; |
526 | struct pci_dev *pdev = NULL; | |
028eeacc JR |
527 | |
528 | iommu_fault = data; | |
529 | tag = iommu_fault->tag & 0x1ff; | |
530 | finish = (iommu_fault->tag >> 9) & 1; | |
531 | ||
daae2d25 | 532 | devid = iommu_fault->device_id; |
d5bf0f4f SK |
533 | pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), |
534 | devid & 0xff); | |
daae2d25 BH |
535 | if (!pdev) |
536 | return -ENODEV; | |
537 | dev_data = get_dev_data(&pdev->dev); | |
538 | ||
539 | /* In kdump kernel pci dev is not initialized yet -> send INVALID */ | |
028eeacc | 540 | ret = NOTIFY_DONE; |
daae2d25 BH |
541 | if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) |
542 | && dev_data->defer_attach) { | |
543 | amd_iommu_complete_ppr(pdev, iommu_fault->pasid, | |
544 | PPR_INVALID, tag); | |
545 | goto out; | |
546 | } | |
547 | ||
028eeacc JR |
548 | dev_state = get_device_state(iommu_fault->device_id); |
549 | if (dev_state == NULL) | |
550 | goto out; | |
551 | ||
552 | pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); | |
53d340ef | 553 | if (pasid_state == NULL || pasid_state->invalid) { |
028eeacc JR |
554 | /* We know the device but not the PASID -> send INVALID */ |
555 | amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, | |
556 | PPR_INVALID, tag); | |
557 | goto out_drop_state; | |
558 | } | |
559 | ||
560 | spin_lock_irqsave(&pasid_state->lock, flags); | |
561 | atomic_inc(&pasid_state->pri[tag].inflight); | |
562 | if (finish) | |
563 | pasid_state->pri[tag].finish = true; | |
564 | spin_unlock_irqrestore(&pasid_state->lock, flags); | |
565 | ||
566 | fault = kzalloc(sizeof(*fault), GFP_ATOMIC); | |
567 | if (fault == NULL) { | |
568 | /* We are OOM - send success and let the device re-fault */ | |
569 | finish_pri_tag(dev_state, pasid_state, tag); | |
570 | goto out_drop_state; | |
571 | } | |
572 | ||
573 | fault->dev_state = dev_state; | |
574 | fault->address = iommu_fault->address; | |
575 | fault->state = pasid_state; | |
576 | fault->tag = tag; | |
577 | fault->finish = finish; | |
b00675b8 | 578 | fault->pasid = iommu_fault->pasid; |
028eeacc JR |
579 | fault->flags = iommu_fault->flags; |
580 | INIT_WORK(&fault->work, do_fault); | |
581 | ||
582 | queue_work(iommu_wq, &fault->work); | |
583 | ||
584 | ret = NOTIFY_OK; | |
585 | ||
586 | out_drop_state: | |
dc88db7e JR |
587 | |
588 | if (ret != NOTIFY_OK && pasid_state) | |
589 | put_pasid_state(pasid_state); | |
590 | ||
028eeacc JR |
591 | put_device_state(dev_state); |
592 | ||
593 | out: | |
594 | return ret; | |
595 | } | |
596 | ||
597 | static struct notifier_block ppr_nb = { | |
598 | .notifier_call = ppr_notifier, | |
599 | }; | |
600 | ||
2d5503b6 JR |
601 | int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, |
602 | struct task_struct *task) | |
603 | { | |
604 | struct pasid_state *pasid_state; | |
605 | struct device_state *dev_state; | |
f0aac63b | 606 | struct mm_struct *mm; |
2d5503b6 JR |
607 | u16 devid; |
608 | int ret; | |
609 | ||
610 | might_sleep(); | |
611 | ||
612 | if (!amd_iommu_v2_supported()) | |
613 | return -ENODEV; | |
614 | ||
615 | devid = device_id(pdev); | |
616 | dev_state = get_device_state(devid); | |
617 | ||
618 | if (dev_state == NULL) | |
619 | return -EINVAL; | |
620 | ||
621 | ret = -EINVAL; | |
622 | if (pasid < 0 || pasid >= dev_state->max_pasids) | |
623 | goto out; | |
624 | ||
625 | ret = -ENOMEM; | |
626 | pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); | |
627 | if (pasid_state == NULL) | |
628 | goto out; | |
629 | ||
f0aac63b | 630 | |
2d5503b6 | 631 | atomic_set(&pasid_state->count, 1); |
028eeacc | 632 | init_waitqueue_head(&pasid_state->wq); |
2c13d47a JR |
633 | spin_lock_init(&pasid_state->lock); |
634 | ||
f0aac63b | 635 | mm = get_task_mm(task); |
f0aac63b | 636 | pasid_state->mm = mm; |
2d5503b6 JR |
637 | pasid_state->device_state = dev_state; |
638 | pasid_state->pasid = pasid; | |
d9e1611e JR |
639 | pasid_state->invalid = true; /* Mark as valid only if we are |
640 | done with setting up the pasid */ | |
8736b2c3 | 641 | pasid_state->mn.ops = &iommu_mn; |
2d5503b6 JR |
642 | |
643 | if (pasid_state->mm == NULL) | |
644 | goto out_free; | |
645 | ||
f0aac63b | 646 | mmu_notifier_register(&pasid_state->mn, mm); |
8736b2c3 | 647 | |
2d5503b6 JR |
648 | ret = set_pasid_state(dev_state, pasid_state, pasid); |
649 | if (ret) | |
8736b2c3 | 650 | goto out_unregister; |
2d5503b6 JR |
651 | |
652 | ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, | |
653 | __pa(pasid_state->mm->pgd)); | |
654 | if (ret) | |
655 | goto out_clear_state; | |
656 | ||
d9e1611e JR |
657 | /* Now we are ready to handle faults */ |
658 | pasid_state->invalid = false; | |
659 | ||
f0aac63b JR |
660 | /* |
661 | * Drop the reference to the mm_struct here. We rely on the | |
662 | * mmu_notifier release call-back to inform us when the mm | |
663 | * is going away. | |
664 | */ | |
665 | mmput(mm); | |
666 | ||
2d5503b6 JR |
667 | return 0; |
668 | ||
669 | out_clear_state: | |
670 | clear_pasid_state(dev_state, pasid); | |
671 | ||
8736b2c3 | 672 | out_unregister: |
f0aac63b | 673 | mmu_notifier_unregister(&pasid_state->mn, mm); |
73dbd4a4 | 674 | mmput(mm); |
8736b2c3 | 675 | |
2d5503b6 | 676 | out_free: |
028eeacc | 677 | free_pasid_state(pasid_state); |
2d5503b6 JR |
678 | |
679 | out: | |
680 | put_device_state(dev_state); | |
681 | ||
682 | return ret; | |
683 | } | |
684 | EXPORT_SYMBOL(amd_iommu_bind_pasid); | |
685 | ||
686 | void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) | |
687 | { | |
a40d4c67 | 688 | struct pasid_state *pasid_state; |
2d5503b6 JR |
689 | struct device_state *dev_state; |
690 | u16 devid; | |
691 | ||
692 | might_sleep(); | |
693 | ||
694 | if (!amd_iommu_v2_supported()) | |
695 | return; | |
696 | ||
697 | devid = device_id(pdev); | |
698 | dev_state = get_device_state(devid); | |
699 | if (dev_state == NULL) | |
700 | return; | |
701 | ||
702 | if (pasid < 0 || pasid >= dev_state->max_pasids) | |
703 | goto out; | |
704 | ||
a40d4c67 JR |
705 | pasid_state = get_pasid_state(dev_state, pasid); |
706 | if (pasid_state == NULL) | |
707 | goto out; | |
708 | /* | |
709 | * Drop reference taken here. We are safe because we still hold | |
710 | * the reference taken in the amd_iommu_bind_pasid function. | |
711 | */ | |
712 | put_pasid_state(pasid_state); | |
713 | ||
53d340ef JR |
714 | /* Clear the pasid state so that the pasid can be re-used */ |
715 | clear_pasid_state(dev_state, pasid_state->pasid); | |
716 | ||
f0aac63b | 717 | /* |
fcaa9606 JR |
718 | * Call mmu_notifier_unregister to drop our reference |
719 | * to pasid_state->mm | |
f0aac63b | 720 | */ |
fcaa9606 | 721 | mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); |
2d5503b6 | 722 | |
c5db16ad | 723 | put_pasid_state_wait(pasid_state); /* Reference taken in |
daff2f9c | 724 | amd_iommu_bind_pasid */ |
2d5503b6 | 725 | out: |
75058a30 JR |
726 | /* Drop reference taken in this function */ |
727 | put_device_state(dev_state); | |
728 | ||
729 | /* Drop reference taken in amd_iommu_bind_pasid */ | |
2d5503b6 JR |
730 | put_device_state(dev_state); |
731 | } | |
732 | EXPORT_SYMBOL(amd_iommu_unbind_pasid); | |
733 | ||
ed96f228 JR |
734 | int amd_iommu_init_device(struct pci_dev *pdev, int pasids) |
735 | { | |
736 | struct device_state *dev_state; | |
55c99a4d | 737 | struct iommu_group *group; |
ed96f228 JR |
738 | unsigned long flags; |
739 | int ret, tmp; | |
740 | u16 devid; | |
741 | ||
742 | might_sleep(); | |
743 | ||
744 | if (!amd_iommu_v2_supported()) | |
745 | return -ENODEV; | |
746 | ||
747 | if (pasids <= 0 || pasids > (PASID_MASK + 1)) | |
748 | return -EINVAL; | |
749 | ||
750 | devid = device_id(pdev); | |
751 | ||
752 | dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); | |
753 | if (dev_state == NULL) | |
754 | return -ENOMEM; | |
755 | ||
756 | spin_lock_init(&dev_state->lock); | |
028eeacc | 757 | init_waitqueue_head(&dev_state->wq); |
741669c7 JR |
758 | dev_state->pdev = pdev; |
759 | dev_state->devid = devid; | |
ed96f228 JR |
760 | |
761 | tmp = pasids; | |
762 | for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) | |
763 | dev_state->pasid_levels += 1; | |
764 | ||
765 | atomic_set(&dev_state->count, 1); | |
766 | dev_state->max_pasids = pasids; | |
767 | ||
768 | ret = -ENOMEM; | |
769 | dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); | |
770 | if (dev_state->states == NULL) | |
771 | goto out_free_dev_state; | |
772 | ||
773 | dev_state->domain = iommu_domain_alloc(&pci_bus_type); | |
774 | if (dev_state->domain == NULL) | |
775 | goto out_free_states; | |
776 | ||
777 | amd_iommu_domain_direct_map(dev_state->domain); | |
778 | ||
779 | ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); | |
780 | if (ret) | |
781 | goto out_free_domain; | |
782 | ||
55c99a4d | 783 | group = iommu_group_get(&pdev->dev); |
24c790fb DC |
784 | if (!group) { |
785 | ret = -EINVAL; | |
ed96f228 | 786 | goto out_free_domain; |
24c790fb | 787 | } |
ed96f228 | 788 | |
55c99a4d JR |
789 | ret = iommu_attach_group(dev_state->domain, group); |
790 | if (ret != 0) | |
791 | goto out_drop_group; | |
792 | ||
793 | iommu_group_put(group); | |
794 | ||
ed96f228 JR |
795 | spin_lock_irqsave(&state_lock, flags); |
796 | ||
741669c7 | 797 | if (__get_device_state(devid) != NULL) { |
ed96f228 JR |
798 | spin_unlock_irqrestore(&state_lock, flags); |
799 | ret = -EBUSY; | |
800 | goto out_free_domain; | |
801 | } | |
802 | ||
741669c7 | 803 | list_add_tail(&dev_state->list, &state_list); |
ed96f228 JR |
804 | |
805 | spin_unlock_irqrestore(&state_lock, flags); | |
806 | ||
807 | return 0; | |
808 | ||
55c99a4d JR |
809 | out_drop_group: |
810 | iommu_group_put(group); | |
811 | ||
ed96f228 JR |
812 | out_free_domain: |
813 | iommu_domain_free(dev_state->domain); | |
814 | ||
815 | out_free_states: | |
816 | free_page((unsigned long)dev_state->states); | |
817 | ||
818 | out_free_dev_state: | |
819 | kfree(dev_state); | |
820 | ||
821 | return ret; | |
822 | } | |
823 | EXPORT_SYMBOL(amd_iommu_init_device); | |
824 | ||
825 | void amd_iommu_free_device(struct pci_dev *pdev) | |
826 | { | |
827 | struct device_state *dev_state; | |
828 | unsigned long flags; | |
829 | u16 devid; | |
830 | ||
831 | if (!amd_iommu_v2_supported()) | |
832 | return; | |
833 | ||
834 | devid = device_id(pdev); | |
835 | ||
836 | spin_lock_irqsave(&state_lock, flags); | |
837 | ||
b87d2d7c | 838 | dev_state = __get_device_state(devid); |
ed96f228 JR |
839 | if (dev_state == NULL) { |
840 | spin_unlock_irqrestore(&state_lock, flags); | |
841 | return; | |
842 | } | |
843 | ||
741669c7 | 844 | list_del(&dev_state->list); |
ed96f228 JR |
845 | |
846 | spin_unlock_irqrestore(&state_lock, flags); | |
847 | ||
2d5503b6 JR |
848 | /* Get rid of any remaining pasid states */ |
849 | free_pasid_states(dev_state); | |
850 | ||
91f65fac PZ |
851 | put_device_state(dev_state); |
852 | /* | |
853 | * Wait until the last reference is dropped before freeing | |
854 | * the device state. | |
855 | */ | |
856 | wait_event(dev_state->wq, !atomic_read(&dev_state->count)); | |
857 | free_device_state(dev_state); | |
ed96f228 JR |
858 | } |
859 | EXPORT_SYMBOL(amd_iommu_free_device); | |
860 | ||
175d6146 JR |
861 | int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, |
862 | amd_iommu_invalid_ppr_cb cb) | |
863 | { | |
864 | struct device_state *dev_state; | |
865 | unsigned long flags; | |
866 | u16 devid; | |
867 | int ret; | |
868 | ||
869 | if (!amd_iommu_v2_supported()) | |
870 | return -ENODEV; | |
871 | ||
872 | devid = device_id(pdev); | |
873 | ||
874 | spin_lock_irqsave(&state_lock, flags); | |
875 | ||
876 | ret = -EINVAL; | |
b87d2d7c | 877 | dev_state = __get_device_state(devid); |
175d6146 JR |
878 | if (dev_state == NULL) |
879 | goto out_unlock; | |
880 | ||
881 | dev_state->inv_ppr_cb = cb; | |
882 | ||
883 | ret = 0; | |
884 | ||
885 | out_unlock: | |
886 | spin_unlock_irqrestore(&state_lock, flags); | |
887 | ||
888 | return ret; | |
889 | } | |
890 | EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); | |
891 | ||
bc21662f JR |
892 | int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, |
893 | amd_iommu_invalidate_ctx cb) | |
894 | { | |
895 | struct device_state *dev_state; | |
896 | unsigned long flags; | |
897 | u16 devid; | |
898 | int ret; | |
899 | ||
900 | if (!amd_iommu_v2_supported()) | |
901 | return -ENODEV; | |
902 | ||
903 | devid = device_id(pdev); | |
904 | ||
905 | spin_lock_irqsave(&state_lock, flags); | |
906 | ||
907 | ret = -EINVAL; | |
b87d2d7c | 908 | dev_state = __get_device_state(devid); |
bc21662f JR |
909 | if (dev_state == NULL) |
910 | goto out_unlock; | |
911 | ||
912 | dev_state->inv_ctx_cb = cb; | |
913 | ||
914 | ret = 0; | |
915 | ||
916 | out_unlock: | |
917 | spin_unlock_irqrestore(&state_lock, flags); | |
918 | ||
919 | return ret; | |
920 | } | |
921 | EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); | |
922 | ||
e3c495c7 JR |
923 | static int __init amd_iommu_v2_init(void) |
924 | { | |
028eeacc | 925 | int ret; |
ed96f228 | 926 | |
63ce3ae8 | 927 | pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n"); |
474d567d JR |
928 | |
929 | if (!amd_iommu_v2_supported()) { | |
07db0409 | 930 | pr_info("AMD IOMMUv2 functionality not available on this system\n"); |
474d567d JR |
931 | /* |
932 | * Load anyway to provide the symbols to other modules | |
933 | * which may use AMD IOMMUv2 optionally. | |
934 | */ | |
935 | return 0; | |
936 | } | |
e3c495c7 | 937 | |
ed96f228 JR |
938 | spin_lock_init(&state_lock); |
939 | ||
028eeacc | 940 | ret = -ENOMEM; |
cf7513e7 | 941 | iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); |
8736b2c3 | 942 | if (iommu_wq == NULL) |
741669c7 | 943 | goto out; |
8736b2c3 | 944 | |
028eeacc JR |
945 | amd_iommu_register_ppr_notifier(&ppr_nb); |
946 | ||
e3c495c7 | 947 | return 0; |
028eeacc | 948 | |
741669c7 | 949 | out: |
028eeacc | 950 | return ret; |
e3c495c7 JR |
951 | } |
952 | ||
953 | static void __exit amd_iommu_v2_exit(void) | |
954 | { | |
ed96f228 | 955 | struct device_state *dev_state; |
ed96f228 JR |
956 | int i; |
957 | ||
474d567d JR |
958 | if (!amd_iommu_v2_supported()) |
959 | return; | |
960 | ||
028eeacc JR |
961 | amd_iommu_unregister_ppr_notifier(&ppr_nb); |
962 | ||
963 | flush_workqueue(iommu_wq); | |
964 | ||
965 | /* | |
966 | * The loop below might call flush_workqueue(), so call | |
967 | * destroy_workqueue() after it | |
968 | */ | |
ed96f228 JR |
969 | for (i = 0; i < MAX_DEVICES; ++i) { |
970 | dev_state = get_device_state(i); | |
971 | ||
972 | if (dev_state == NULL) | |
973 | continue; | |
974 | ||
975 | WARN_ON_ONCE(1); | |
976 | ||
ed96f228 | 977 | put_device_state(dev_state); |
028eeacc | 978 | amd_iommu_free_device(dev_state->pdev); |
ed96f228 JR |
979 | } |
980 | ||
028eeacc | 981 | destroy_workqueue(iommu_wq); |
e3c495c7 JR |
982 | } |
983 | ||
984 | module_init(amd_iommu_v2_init); | |
985 | module_exit(amd_iommu_v2_exit); |