Commit | Line | Data |
---|---|---|
2025cf9e | 1 | // SPDX-License-Identifier: GPL-2.0-only |
8a94ade4 DW |
2 | /* |
3 | * Copyright © 2015 Intel Corporation. | |
4 | * | |
8a94ade4 DW |
5 | * Authors: David Woodhouse <dwmw2@infradead.org> |
6 | */ | |
7 | ||
2f26e0a9 DW |
8 | #include <linux/mmu_notifier.h> |
9 | #include <linux/sched.h> | |
6e84f315 | 10 | #include <linux/sched/mm.h> |
2f26e0a9 | 11 | #include <linux/slab.h> |
2f26e0a9 DW |
12 | #include <linux/rculist.h> |
13 | #include <linux/pci.h> | |
14 | #include <linux/pci-ats.h> | |
a222a7f0 DW |
15 | #include <linux/dmar.h> |
16 | #include <linux/interrupt.h> | |
50a7ca3c | 17 | #include <linux/mm_types.h> |
100b8a14 | 18 | #include <linux/xarray.h> |
9d8c3af3 | 19 | #include <asm/page.h> |
20f0afd1 | 20 | #include <asm/fpu/api.h> |
a222a7f0 | 21 | |
2585a279 | 22 | #include "iommu.h" |
02f3effd | 23 | #include "pasid.h" |
0f4834ab | 24 | #include "perf.h" |
757636ed | 25 | #include "../iommu-sva.h" |
933ab6d3 | 26 | #include "trace.h" |
af395073 | 27 | |
a222a7f0 | 28 | static irqreturn_t prq_event_thread(int irq, void *d); |
2f26e0a9 | 29 | |
100b8a14 LB |
30 | static DEFINE_XARRAY_ALLOC(pasid_private_array); |
31 | static int pasid_private_add(ioasid_t pasid, void *priv) | |
32 | { | |
33 | return xa_alloc(&pasid_private_array, &pasid, priv, | |
34 | XA_LIMIT(pasid, pasid), GFP_ATOMIC); | |
35 | } | |
36 | ||
37 | static void pasid_private_remove(ioasid_t pasid) | |
38 | { | |
39 | xa_erase(&pasid_private_array, pasid); | |
40 | } | |
41 | ||
42 | static void *pasid_private_find(ioasid_t pasid) | |
43 | { | |
44 | return xa_load(&pasid_private_array, pasid); | |
45 | } | |
46 | ||
9e52cc0f LB |
47 | static struct intel_svm_dev * |
48 | svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) | |
49 | { | |
50 | struct intel_svm_dev *sdev = NULL, *t; | |
51 | ||
52 | rcu_read_lock(); | |
53 | list_for_each_entry_rcu(t, &svm->devs, list) { | |
54 | if (t->dev == dev) { | |
55 | sdev = t; | |
56 | break; | |
57 | } | |
58 | } | |
59 | rcu_read_unlock(); | |
60 | ||
61 | return sdev; | |
62 | } | |
63 | ||
a222a7f0 DW |
64 | int intel_svm_enable_prq(struct intel_iommu *iommu) |
65 | { | |
4c82b886 | 66 | struct iopf_queue *iopfq; |
a222a7f0 DW |
67 | struct page *pages; |
68 | int irq, ret; | |
69 | ||
70 | pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); | |
71 | if (!pages) { | |
72 | pr_warn("IOMMU: %s: Failed to allocate page request queue\n", | |
73 | iommu->name); | |
74 | return -ENOMEM; | |
75 | } | |
76 | iommu->prq = page_address(pages); | |
77 | ||
4a0d4265 | 78 | irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); |
a222a7f0 DW |
79 | if (irq <= 0) { |
80 | pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", | |
81 | iommu->name); | |
82 | ret = -EINVAL; | |
4c82b886 | 83 | goto free_prq; |
a222a7f0 DW |
84 | } |
85 | iommu->pr_irq = irq; | |
86 | ||
4c82b886 LB |
87 | snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), |
88 | "dmar%d-iopfq", iommu->seq_id); | |
89 | iopfq = iopf_queue_alloc(iommu->iopfq_name); | |
90 | if (!iopfq) { | |
91 | pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); | |
92 | ret = -ENOMEM; | |
93 | goto free_hwirq; | |
94 | } | |
95 | iommu->iopf_queue = iopfq; | |
96 | ||
a222a7f0 DW |
97 | snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); |
98 | ||
99 | ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, | |
100 | iommu->prq_name, iommu); | |
101 | if (ret) { | |
102 | pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", | |
103 | iommu->name); | |
4c82b886 | 104 | goto free_iopfq; |
a222a7f0 DW |
105 | } |
106 | dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); | |
107 | dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); | |
108 | dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); | |
109 | ||
66ac4db3 LB |
110 | init_completion(&iommu->prq_complete); |
111 | ||
a222a7f0 | 112 | return 0; |
4c82b886 LB |
113 | |
114 | free_iopfq: | |
115 | iopf_queue_free(iommu->iopf_queue); | |
116 | iommu->iopf_queue = NULL; | |
117 | free_hwirq: | |
118 | dmar_free_hwirq(irq); | |
119 | iommu->pr_irq = 0; | |
120 | free_prq: | |
121 | free_pages((unsigned long)iommu->prq, PRQ_ORDER); | |
122 | iommu->prq = NULL; | |
123 | ||
124 | return ret; | |
a222a7f0 DW |
125 | } |
126 | ||
127 | int intel_svm_finish_prq(struct intel_iommu *iommu) | |
128 | { | |
129 | dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); | |
130 | dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); | |
131 | dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); | |
132 | ||
72d54811 JS |
133 | if (iommu->pr_irq) { |
134 | free_irq(iommu->pr_irq, iommu); | |
135 | dmar_free_hwirq(iommu->pr_irq); | |
136 | iommu->pr_irq = 0; | |
137 | } | |
a222a7f0 | 138 | |
4c82b886 LB |
139 | if (iommu->iopf_queue) { |
140 | iopf_queue_free(iommu->iopf_queue); | |
141 | iommu->iopf_queue = NULL; | |
142 | } | |
143 | ||
a222a7f0 DW |
144 | free_pages((unsigned long)iommu->prq, PRQ_ORDER); |
145 | iommu->prq = NULL; | |
146 | ||
147 | return 0; | |
148 | } | |
149 | ||
ff3dc652 JP |
150 | void intel_svm_check(struct intel_iommu *iommu) |
151 | { | |
152 | if (!pasid_supported(iommu)) | |
153 | return; | |
154 | ||
155 | if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && | |
156 | !cap_fl1gp_support(iommu->cap)) { | |
157 | pr_err("%s SVM disabled, incompatible 1GB page capability\n", | |
158 | iommu->name); | |
159 | return; | |
160 | } | |
161 | ||
162 | if (cpu_feature_enabled(X86_FEATURE_LA57) && | |
b722cb32 | 163 | !cap_fl5lp_support(iommu->cap)) { |
ff3dc652 JP |
164 | pr_err("%s SVM disabled, incompatible paging mode\n", |
165 | iommu->name); | |
166 | return; | |
167 | } | |
168 | ||
169 | iommu->flags |= VTD_FLAG_SVM_CAPABLE; | |
170 | } | |
171 | ||
2d6ffc63 LB |
172 | static void __flush_svm_range_dev(struct intel_svm *svm, |
173 | struct intel_svm_dev *sdev, | |
174 | unsigned long address, | |
175 | unsigned long pages, int ih) | |
2f26e0a9 | 176 | { |
586081d3 | 177 | struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); |
9872f9bd LB |
178 | |
179 | if (WARN_ON(!pages)) | |
180 | return; | |
181 | ||
182 | qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); | |
e65a6897 | 183 | if (info->ats_enabled) { |
9872f9bd LB |
184 | qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, |
185 | svm->pasid, sdev->qdep, address, | |
186 | order_base_2(pages)); | |
e65a6897 JP |
187 | quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), |
188 | svm->pasid, sdev->qdep); | |
189 | } | |
2f26e0a9 DW |
190 | } |
191 | ||
2d6ffc63 LB |
192 | static void intel_flush_svm_range_dev(struct intel_svm *svm, |
193 | struct intel_svm_dev *sdev, | |
194 | unsigned long address, | |
195 | unsigned long pages, int ih) | |
196 | { | |
197 | unsigned long shift = ilog2(__roundup_pow_of_two(pages)); | |
198 | unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); | |
199 | unsigned long start = ALIGN_DOWN(address, align); | |
200 | unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); | |
201 | ||
202 | while (start < end) { | |
203 | __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); | |
204 | start += align; | |
205 | } | |
206 | } | |
207 | ||
2f26e0a9 | 208 | static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, |
8744daf4 | 209 | unsigned long pages, int ih) |
2f26e0a9 DW |
210 | { |
211 | struct intel_svm_dev *sdev; | |
212 | ||
213 | rcu_read_lock(); | |
214 | list_for_each_entry_rcu(sdev, &svm->devs, list) | |
8744daf4 | 215 | intel_flush_svm_range_dev(svm, sdev, address, pages, ih); |
2f26e0a9 DW |
216 | rcu_read_unlock(); |
217 | } | |
218 | ||
e7ad6c2a LB |
219 | static void intel_flush_svm_all(struct intel_svm *svm) |
220 | { | |
221 | struct device_domain_info *info; | |
222 | struct intel_svm_dev *sdev; | |
223 | ||
224 | rcu_read_lock(); | |
225 | list_for_each_entry_rcu(sdev, &svm->devs, list) { | |
226 | info = dev_iommu_priv_get(sdev->dev); | |
227 | ||
228 | qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); | |
229 | if (info->ats_enabled) { | |
230 | qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, | |
231 | svm->pasid, sdev->qdep, | |
232 | 0, 64 - VTD_PAGE_SHIFT); | |
233 | quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, | |
234 | svm->pasid, sdev->qdep); | |
235 | } | |
236 | } | |
237 | rcu_read_unlock(); | |
238 | } | |
239 | ||
2f26e0a9 | 240 | /* Pages have been freed at this point */ |
1af5a810 AP |
241 | static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, |
242 | struct mm_struct *mm, | |
243 | unsigned long start, unsigned long end) | |
2f26e0a9 DW |
244 | { |
245 | struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); | |
246 | ||
e7ad6c2a LB |
247 | if (start == 0 && end == -1UL) { |
248 | intel_flush_svm_all(svm); | |
249 | return; | |
250 | } | |
251 | ||
2f26e0a9 | 252 | intel_flush_svm_range(svm, start, |
8744daf4 | 253 | (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); |
2f26e0a9 DW |
254 | } |
255 | ||
2f26e0a9 DW |
256 | static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) |
257 | { | |
258 | struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); | |
e57e58bd | 259 | struct intel_svm_dev *sdev; |
2f26e0a9 | 260 | |
e57e58bd DW |
261 | /* This might end up being called from exit_mmap(), *before* the page |
262 | * tables are cleared. And __mmu_notifier_release() will delete us from | |
263 | * the list of notifiers so that our invalidate_range() callback doesn't | |
264 | * get called when the page tables are cleared. So we need to protect | |
265 | * against hardware accessing those page tables. | |
266 | * | |
267 | * We do it by clearing the entry in the PASID table and then flushing | |
268 | * the IOTLB and the PASID table caches. This might upset hardware; | |
269 | * perhaps we'll want to point the PASID to a dummy PGD (like the zero | |
270 | * page) so that we end up taking a fault that the hardware really | |
271 | * *has* to handle gracefully without affecting other processes. | |
272 | */ | |
e57e58bd | 273 | rcu_read_lock(); |
81ebd91a | 274 | list_for_each_entry_rcu(sdev, &svm->devs, list) |
9ad9f45b | 275 | intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, |
37e91bd4 | 276 | svm->pasid, true); |
e57e58bd | 277 | rcu_read_unlock(); |
2f26e0a9 | 278 | |
2f26e0a9 DW |
279 | } |
280 | ||
281 | static const struct mmu_notifier_ops intel_mmuops = { | |
282 | .release = intel_mm_release, | |
1af5a810 | 283 | .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, |
2f26e0a9 DW |
284 | }; |
285 | ||
19abcf70 LB |
286 | static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, |
287 | struct intel_svm **rsvm, | |
288 | struct intel_svm_dev **rsdev) | |
289 | { | |
9e52cc0f | 290 | struct intel_svm_dev *sdev = NULL; |
19abcf70 LB |
291 | struct intel_svm *svm; |
292 | ||
fffaed1e | 293 | if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) |
19abcf70 LB |
294 | return -EINVAL; |
295 | ||
100b8a14 | 296 | svm = pasid_private_find(pasid); |
19abcf70 LB |
297 | if (IS_ERR(svm)) |
298 | return PTR_ERR(svm); | |
299 | ||
300 | if (!svm) | |
301 | goto out; | |
302 | ||
303 | /* | |
304 | * If we found svm for the PASID, there must be at least one device | |
305 | * bond. | |
306 | */ | |
307 | if (WARN_ON(list_empty(&svm->devs))) | |
308 | return -EINVAL; | |
9e52cc0f | 309 | sdev = svm_lookup_device_by_dev(svm, dev); |
19abcf70 LB |
310 | |
311 | out: | |
312 | *rsvm = svm; | |
313 | *rsdev = sdev; | |
314 | ||
315 | return 0; | |
316 | } | |
317 | ||
ec9ab12d | 318 | static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, |
5c79705d | 319 | struct iommu_domain *domain, ioasid_t pasid) |
40483774 | 320 | { |
586081d3 | 321 | struct device_domain_info *info = dev_iommu_priv_get(dev); |
5c79705d | 322 | struct mm_struct *mm = domain->mm; |
40483774 LB |
323 | struct intel_svm_dev *sdev; |
324 | struct intel_svm *svm; | |
ffd5869d | 325 | unsigned long sflags; |
40483774 | 326 | int ret = 0; |
2f26e0a9 | 327 | |
5c79705d | 328 | svm = pasid_private_find(pasid); |
40483774 LB |
329 | if (!svm) { |
330 | svm = kzalloc(sizeof(*svm), GFP_KERNEL); | |
331 | if (!svm) | |
ec9ab12d | 332 | return -ENOMEM; |
5cec7537 | 333 | |
5c79705d | 334 | svm->pasid = pasid; |
40483774 | 335 | svm->mm = mm; |
40483774 | 336 | INIT_LIST_HEAD_RCU(&svm->devs); |
2f26e0a9 | 337 | |
942fd543 LB |
338 | svm->notifier.ops = &intel_mmuops; |
339 | ret = mmu_notifier_register(&svm->notifier, mm); | |
340 | if (ret) { | |
341 | kfree(svm); | |
ec9ab12d | 342 | return ret; |
06905ea8 | 343 | } |
2f26e0a9 | 344 | |
40483774 LB |
345 | ret = pasid_private_add(svm->pasid, svm); |
346 | if (ret) { | |
942fd543 | 347 | mmu_notifier_unregister(&svm->notifier, mm); |
40483774 | 348 | kfree(svm); |
ec9ab12d | 349 | return ret; |
2f26e0a9 | 350 | } |
40483774 | 351 | } |
06905ea8 | 352 | |
2f26e0a9 DW |
353 | sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); |
354 | if (!sdev) { | |
355 | ret = -ENOMEM; | |
40483774 | 356 | goto free_svm; |
2f26e0a9 | 357 | } |
40483774 | 358 | |
2f26e0a9 | 359 | sdev->dev = dev; |
9ad9f45b | 360 | sdev->iommu = iommu; |
d7cbc0f3 LB |
361 | sdev->did = FLPT_DEFAULT_DID; |
362 | sdev->sid = PCI_DEVID(info->bus, info->devfn); | |
40483774 | 363 | init_rcu_head(&sdev->rcu); |
d7cbc0f3 | 364 | if (info->ats_enabled) { |
d7cbc0f3 LB |
365 | sdev->qdep = info->ats_qdep; |
366 | if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) | |
367 | sdev->qdep = 0; | |
368 | } | |
369 | ||
40483774 | 370 | /* Setup the pasid table: */ |
942fd543 | 371 | sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; |
5c79705d | 372 | ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid, |
40483774 | 373 | FLPT_DEFAULT_DID, sflags); |
40483774 LB |
374 | if (ret) |
375 | goto free_sdev; | |
97140101 | 376 | |
2f26e0a9 | 377 | list_add_rcu(&sdev->list, &svm->devs); |
49cab9d2 | 378 | |
ec9ab12d | 379 | return 0; |
40483774 LB |
380 | |
381 | free_sdev: | |
382 | kfree(sdev); | |
383 | free_svm: | |
384 | if (list_empty(&svm->devs)) { | |
942fd543 | 385 | mmu_notifier_unregister(&svm->notifier, mm); |
5c79705d | 386 | pasid_private_remove(pasid); |
40483774 LB |
387 | kfree(svm); |
388 | } | |
389 | ||
ec9ab12d | 390 | return ret; |
2f26e0a9 | 391 | } |
2f26e0a9 | 392 | |
b6170188 | 393 | void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) |
2f26e0a9 DW |
394 | { |
395 | struct intel_svm_dev *sdev; | |
2f26e0a9 | 396 | struct intel_svm *svm; |
40483774 | 397 | struct mm_struct *mm; |
2f26e0a9 | 398 | |
b6170188 LB |
399 | if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) |
400 | return; | |
40483774 | 401 | mm = svm->mm; |
59a62337 | 402 | |
19abcf70 | 403 | if (sdev) { |
49cab9d2 | 404 | list_del_rcu(&sdev->list); |
49cab9d2 LB |
405 | kfree_rcu(sdev, rcu); |
406 | ||
407 | if (list_empty(&svm->devs)) { | |
408 | if (svm->notifier.ops) | |
409 | mmu_notifier_unregister(&svm->notifier, mm); | |
410 | pasid_private_remove(svm->pasid); | |
411 | /* | |
412 | * We mandate that no page faults may be outstanding | |
413 | * for the PASID when intel_svm_unbind_mm() is called. | |
414 | * If that is not obeyed, subtle errors will happen. | |
415 | * Let's make them less subtle... | |
416 | */ | |
417 | memset(svm, 0x6b, sizeof(*svm)); | |
418 | kfree(svm); | |
2f26e0a9 DW |
419 | } |
420 | } | |
2f26e0a9 | 421 | } |
15060aba | 422 | |
a222a7f0 DW |
423 | /* Page request queue descriptor */ |
424 | struct page_req_dsc { | |
5b438f4b JP |
425 | union { |
426 | struct { | |
427 | u64 type:8; | |
428 | u64 pasid_present:1; | |
429 | u64 priv_data_present:1; | |
430 | u64 rsvd:6; | |
431 | u64 rid:16; | |
432 | u64 pasid:20; | |
433 | u64 exe_req:1; | |
434 | u64 pm_req:1; | |
435 | u64 rsvd2:10; | |
436 | }; | |
437 | u64 qw_0; | |
438 | }; | |
439 | union { | |
440 | struct { | |
441 | u64 rd_req:1; | |
442 | u64 wr_req:1; | |
443 | u64 lpig:1; | |
444 | u64 prg_index:9; | |
445 | u64 addr:52; | |
446 | }; | |
447 | u64 qw_1; | |
448 | }; | |
449 | u64 priv_data[2]; | |
a222a7f0 DW |
450 | }; |
451 | ||
9d8c3af3 AR |
452 | static bool is_canonical_address(u64 addr) |
453 | { | |
454 | int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); | |
455 | long saddr = (long) addr; | |
456 | ||
457 | return (((saddr << shift) >> shift) == saddr); | |
458 | } | |
459 | ||
66ac4db3 | 460 | /** |
15478623 | 461 | * intel_drain_pasid_prq - Drain page requests and responses for a pasid |
66ac4db3 LB |
462 | * @dev: target device |
463 | * @pasid: pasid for draining | |
464 | * | |
465 | * Drain all pending page requests and responses related to @pasid in both | |
466 | * software and hardware. This is supposed to be called after the device | |
467 | * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB | |
468 | * and DevTLB have been invalidated. | |
469 | * | |
470 | * It waits until all pending page requests for @pasid in the page fault | |
471 | * queue are completed by the prq handling thread. Then follow the steps | |
472 | * described in VT-d spec CH7.10 to drain all page requests and page | |
473 | * responses pending in the hardware. | |
474 | */ | |
15478623 | 475 | void intel_drain_pasid_prq(struct device *dev, u32 pasid) |
66ac4db3 LB |
476 | { |
477 | struct device_domain_info *info; | |
478 | struct dmar_domain *domain; | |
479 | struct intel_iommu *iommu; | |
480 | struct qi_desc desc[3]; | |
481 | struct pci_dev *pdev; | |
482 | int head, tail; | |
483 | u16 sid, did; | |
484 | int qdep; | |
485 | ||
586081d3 | 486 | info = dev_iommu_priv_get(dev); |
66ac4db3 LB |
487 | if (WARN_ON(!info || !dev_is_pci(dev))) |
488 | return; | |
489 | ||
490 | if (!info->pri_enabled) | |
491 | return; | |
492 | ||
493 | iommu = info->iommu; | |
494 | domain = info->domain; | |
495 | pdev = to_pci_dev(dev); | |
496 | sid = PCI_DEVID(info->bus, info->devfn); | |
ba949f4c | 497 | did = domain_id_iommu(domain, iommu); |
66ac4db3 LB |
498 | qdep = pci_ats_queue_depth(pdev); |
499 | ||
500 | /* | |
501 | * Check and wait until all pending page requests in the queue are | |
502 | * handled by the prq handling thread. | |
503 | */ | |
504 | prq_retry: | |
505 | reinit_completion(&iommu->prq_complete); | |
506 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; | |
507 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; | |
508 | while (head != tail) { | |
509 | struct page_req_dsc *req; | |
510 | ||
511 | req = &iommu->prq[head / sizeof(*req)]; | |
512 | if (!req->pasid_present || req->pasid != pasid) { | |
513 | head = (head + sizeof(*req)) & PRQ_RING_MASK; | |
514 | continue; | |
515 | } | |
516 | ||
517 | wait_for_completion(&iommu->prq_complete); | |
518 | goto prq_retry; | |
519 | } | |
520 | ||
d5b9e4bf LB |
521 | iopf_queue_flush_dev(dev); |
522 | ||
66ac4db3 LB |
523 | /* |
524 | * Perform steps described in VT-d spec CH7.10 to drain page | |
525 | * requests and responses in hardware. | |
526 | */ | |
527 | memset(desc, 0, sizeof(desc)); | |
528 | desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | | |
529 | QI_IWD_FENCE | | |
530 | QI_IWD_TYPE; | |
531 | desc[1].qw0 = QI_EIOTLB_PASID(pasid) | | |
532 | QI_EIOTLB_DID(did) | | |
533 | QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | | |
534 | QI_EIOTLB_TYPE; | |
535 | desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | | |
536 | QI_DEV_EIOTLB_SID(sid) | | |
537 | QI_DEV_EIOTLB_QDEP(qdep) | | |
538 | QI_DEIOTLB_TYPE | | |
539 | QI_DEV_IOTLB_PFSID(info->pfsid); | |
540 | qi_retry: | |
541 | reinit_completion(&iommu->prq_complete); | |
542 | qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); | |
543 | if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { | |
544 | wait_for_completion(&iommu->prq_complete); | |
545 | goto qi_retry; | |
546 | } | |
547 | } | |
548 | ||
eb8d93ea LB |
549 | static int prq_to_iommu_prot(struct page_req_dsc *req) |
550 | { | |
551 | int prot = 0; | |
552 | ||
553 | if (req->rd_req) | |
554 | prot |= IOMMU_FAULT_PERM_READ; | |
555 | if (req->wr_req) | |
556 | prot |= IOMMU_FAULT_PERM_WRITE; | |
557 | if (req->exe_req) | |
558 | prot |= IOMMU_FAULT_PERM_EXEC; | |
559 | if (req->pm_req) | |
560 | prot |= IOMMU_FAULT_PERM_PRIV; | |
561 | ||
562 | return prot; | |
563 | } | |
564 | ||
0f4834ab LB |
565 | static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, |
566 | struct page_req_dsc *desc) | |
eb8d93ea LB |
567 | { |
568 | struct iommu_fault_event event; | |
569 | ||
570 | if (!dev || !dev_is_pci(dev)) | |
571 | return -ENODEV; | |
572 | ||
573 | /* Fill in event data for device specific processing */ | |
574 | memset(&event, 0, sizeof(struct iommu_fault_event)); | |
575 | event.fault.type = IOMMU_FAULT_PAGE_REQ; | |
03d20509 | 576 | event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; |
eb8d93ea LB |
577 | event.fault.prm.pasid = desc->pasid; |
578 | event.fault.prm.grpid = desc->prg_index; | |
579 | event.fault.prm.perm = prq_to_iommu_prot(desc); | |
580 | ||
581 | if (desc->lpig) | |
582 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; | |
583 | if (desc->pasid_present) { | |
584 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; | |
585 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; | |
586 | } | |
587 | if (desc->priv_data_present) { | |
588 | /* | |
589 | * Set last page in group bit if private data is present, | |
590 | * page response is required as it does for LPIG. | |
591 | * iommu_report_device_fault() doesn't understand this vendor | |
592 | * specific requirement thus we set last_page as a workaround. | |
593 | */ | |
594 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; | |
595 | event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; | |
606636dc GS |
596 | event.fault.prm.private_data[0] = desc->priv_data[0]; |
597 | event.fault.prm.private_data[1] = desc->priv_data[1]; | |
0f4834ab LB |
598 | } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { |
599 | /* | |
600 | * If the private data fields are not used by hardware, use it | |
601 | * to monitor the prq handle latency. | |
602 | */ | |
603 | event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); | |
eb8d93ea LB |
604 | } |
605 | ||
606 | return iommu_report_device_fault(dev, &event); | |
607 | } | |
608 | ||
ae7f09b1 LB |
609 | static void handle_bad_prq_event(struct intel_iommu *iommu, |
610 | struct page_req_dsc *req, int result) | |
611 | { | |
612 | struct qi_desc desc; | |
613 | ||
614 | pr_err("%s: Invalid page request: %08llx %08llx\n", | |
615 | iommu->name, ((unsigned long long *)req)[0], | |
616 | ((unsigned long long *)req)[1]); | |
617 | ||
618 | /* | |
619 | * Per VT-d spec. v3.0 ch7.7, system software must | |
620 | * respond with page group response if private data | |
621 | * is present (PDP) or last page in group (LPIG) bit | |
622 | * is set. This is an additional VT-d feature beyond | |
623 | * PCI ATS spec. | |
624 | */ | |
625 | if (!req->lpig && !req->priv_data_present) | |
626 | return; | |
627 | ||
628 | desc.qw0 = QI_PGRP_PASID(req->pasid) | | |
629 | QI_PGRP_DID(req->rid) | | |
630 | QI_PGRP_PASID_P(req->pasid_present) | | |
631 | QI_PGRP_PDP(req->priv_data_present) | | |
632 | QI_PGRP_RESP_CODE(result) | | |
633 | QI_PGRP_RESP_TYPE; | |
634 | desc.qw1 = QI_PGRP_IDX(req->prg_index) | | |
635 | QI_PGRP_LPIG(req->lpig); | |
ae7f09b1 | 636 | |
606636dc GS |
637 | if (req->priv_data_present) { |
638 | desc.qw2 = req->priv_data[0]; | |
639 | desc.qw3 = req->priv_data[1]; | |
640 | } else { | |
641 | desc.qw2 = 0; | |
642 | desc.qw3 = 0; | |
643 | } | |
644 | ||
ae7f09b1 LB |
645 | qi_submit_sync(iommu, &desc, 1, 0); |
646 | } | |
647 | ||
a222a7f0 DW |
648 | static irqreturn_t prq_event_thread(int irq, void *d) |
649 | { | |
650 | struct intel_iommu *iommu = d; | |
ae7f09b1 LB |
651 | struct page_req_dsc *req; |
652 | int head, tail, handled; | |
06f4b8d0 | 653 | struct pci_dev *pdev; |
ae7f09b1 | 654 | u64 address; |
a222a7f0 | 655 | |
ae7f09b1 LB |
656 | /* |
657 | * Clear PPR bit before reading head/tail registers, to ensure that | |
658 | * we get a new interrupt if needed. | |
659 | */ | |
46924008 DW |
660 | writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); |
661 | ||
a222a7f0 DW |
662 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; |
663 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; | |
ae7f09b1 | 664 | handled = (head != tail); |
a222a7f0 | 665 | while (head != tail) { |
a222a7f0 | 666 | req = &iommu->prq[head / sizeof(*req)]; |
7f92a2e9 | 667 | address = (u64)req->addr << VTD_PAGE_SHIFT; |
ae7f09b1 LB |
668 | |
669 | if (unlikely(!req->pasid_present)) { | |
670 | pr_err("IOMMU: %s: Page request without PASID\n", | |
671 | iommu->name); | |
672 | bad_req: | |
ae7f09b1 LB |
673 | handle_bad_prq_event(iommu, req, QI_RESP_INVALID); |
674 | goto prq_advance; | |
a222a7f0 | 675 | } |
ae7f09b1 LB |
676 | |
677 | if (unlikely(!is_canonical_address(address))) { | |
678 | pr_err("IOMMU: %s: Address is not canonical\n", | |
679 | iommu->name); | |
680 | goto bad_req; | |
681 | } | |
682 | ||
683 | if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { | |
684 | pr_err("IOMMU: %s: Page request in Privilege Mode\n", | |
685 | iommu->name); | |
686 | goto bad_req; | |
78a523fe | 687 | } |
ae7f09b1 LB |
688 | |
689 | if (unlikely(req->exe_req && req->rd_req)) { | |
690 | pr_err("IOMMU: %s: Execution request not supported\n", | |
691 | iommu->name); | |
692 | goto bad_req; | |
78a523fe | 693 | } |
ae7f09b1 | 694 | |
da8669ff LB |
695 | /* Drop Stop Marker message. No need for a response. */ |
696 | if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) | |
697 | goto prq_advance; | |
698 | ||
06f4b8d0 LB |
699 | pdev = pci_get_domain_bus_and_slot(iommu->segment, |
700 | PCI_BUS_NUM(req->rid), | |
701 | req->rid & 0xff); | |
eb8d93ea LB |
702 | /* |
703 | * If prq is to be handled outside iommu driver via receiver of | |
704 | * the fault notifiers, we skip the page response here. | |
705 | */ | |
6927d352 YY |
706 | if (!pdev) |
707 | goto bad_req; | |
e93a67f5 | 708 | |
6927d352 YY |
709 | if (intel_svm_prq_report(iommu, &pdev->dev, req)) |
710 | handle_bad_prq_event(iommu, req, QI_RESP_INVALID); | |
711 | else | |
712 | trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, | |
713 | req->priv_data[0], req->priv_data[1], | |
714 | iommu->prq_seq_number++); | |
715 | pci_dev_put(pdev); | |
eb8d93ea | 716 | prq_advance: |
a222a7f0 DW |
717 | head = (head + sizeof(*req)) & PRQ_RING_MASK; |
718 | } | |
719 | ||
720 | dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); | |
721 | ||
66ac4db3 LB |
722 | /* |
723 | * Clear the page request overflow bit and wake up all threads that | |
724 | * are waiting for the completion of this handling. | |
725 | */ | |
28a77185 LB |
726 | if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { |
727 | pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", | |
728 | iommu->name); | |
729 | head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; | |
730 | tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; | |
731 | if (head == tail) { | |
d5b9e4bf | 732 | iopf_queue_discard_partial(iommu->iopf_queue); |
28a77185 LB |
733 | writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); |
734 | pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", | |
735 | iommu->name); | |
736 | } | |
737 | } | |
66ac4db3 LB |
738 | |
739 | if (!completion_done(&iommu->prq_complete)) | |
740 | complete(&iommu->prq_complete); | |
741 | ||
a222a7f0 DW |
742 | return IRQ_RETVAL(handled); |
743 | } | |
064a57d7 | 744 | |
8b737121 LB |
745 | int intel_svm_page_response(struct device *dev, |
746 | struct iommu_fault_event *evt, | |
747 | struct iommu_page_response *msg) | |
748 | { | |
1903ef8f LB |
749 | struct device_domain_info *info = dev_iommu_priv_get(dev); |
750 | struct intel_iommu *iommu = info->iommu; | |
751 | u8 bus = info->bus, devfn = info->devfn; | |
8b737121 | 752 | struct iommu_fault_page_request *prm; |
8b737121 LB |
753 | bool private_present; |
754 | bool pasid_present; | |
755 | bool last_page; | |
8b737121 LB |
756 | int ret = 0; |
757 | u16 sid; | |
758 | ||
8b737121 LB |
759 | prm = &evt->fault.prm; |
760 | sid = PCI_DEVID(bus, devfn); | |
761 | pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; | |
762 | private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; | |
763 | last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; | |
764 | ||
765 | if (!pasid_present) { | |
766 | ret = -EINVAL; | |
767 | goto out; | |
768 | } | |
769 | ||
770 | if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { | |
771 | ret = -EINVAL; | |
772 | goto out; | |
773 | } | |
774 | ||
8b737121 LB |
775 | /* |
776 | * Per VT-d spec. v3.0 ch7.7, system software must respond | |
777 | * with page group response if private data is present (PDP) | |
778 | * or last page in group (LPIG) bit is set. This is an | |
779 | * additional VT-d requirement beyond PCI ATS spec. | |
780 | */ | |
781 | if (last_page || private_present) { | |
782 | struct qi_desc desc; | |
783 | ||
784 | desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | | |
785 | QI_PGRP_PASID_P(pasid_present) | | |
786 | QI_PGRP_PDP(private_present) | | |
787 | QI_PGRP_RESP_CODE(msg->code) | | |
788 | QI_PGRP_RESP_TYPE; | |
789 | desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); | |
790 | desc.qw2 = 0; | |
791 | desc.qw3 = 0; | |
606636dc GS |
792 | |
793 | if (private_present) { | |
794 | desc.qw2 = prm->private_data[0]; | |
795 | desc.qw3 = prm->private_data[1]; | |
796 | } else if (prm->private_data[0]) { | |
0f4834ab LB |
797 | dmar_latency_update(iommu, DMAR_LATENCY_PRQ, |
798 | ktime_to_ns(ktime_get()) - prm->private_data[0]); | |
606636dc | 799 | } |
8b737121 LB |
800 | |
801 | qi_submit_sync(iommu, &desc, 1, 0); | |
802 | } | |
803 | out: | |
8b737121 LB |
804 | return ret; |
805 | } | |
eaca8889 | 806 | |
eaca8889 LB |
807 | static int intel_svm_set_dev_pasid(struct iommu_domain *domain, |
808 | struct device *dev, ioasid_t pasid) | |
809 | { | |
810 | struct device_domain_info *info = dev_iommu_priv_get(dev); | |
811 | struct intel_iommu *iommu = info->iommu; | |
eaca8889 | 812 | |
5c79705d | 813 | return intel_svm_bind_mm(iommu, dev, domain, pasid); |
eaca8889 LB |
814 | } |
815 | ||
816 | static void intel_svm_domain_free(struct iommu_domain *domain) | |
817 | { | |
818 | kfree(to_dmar_domain(domain)); | |
819 | } | |
820 | ||
821 | static const struct iommu_domain_ops intel_svm_domain_ops = { | |
822 | .set_dev_pasid = intel_svm_set_dev_pasid, | |
823 | .free = intel_svm_domain_free | |
824 | }; | |
825 | ||
826 | struct iommu_domain *intel_svm_domain_alloc(void) | |
827 | { | |
828 | struct dmar_domain *domain; | |
829 | ||
830 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | |
831 | if (!domain) | |
832 | return NULL; | |
833 | domain->domain.ops = &intel_svm_domain_ops; | |
834 | ||
835 | return &domain->domain; | |
836 | } |