Linux 6.12-rc1
[linux-block.git] / drivers / iommu / intel / pasid.c
CommitLineData
56283174 1// SPDX-License-Identifier: GPL-2.0
367f82de 2/*
56283174
LB
3 * intel-pasid.c - PASID idr, table and entry manipulation
4 *
5 * Copyright (C) 2018 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10#define pr_fmt(fmt) "DMAR: " fmt
11
6f7db75e 12#include <linux/bitops.h>
437f35e1 13#include <linux/cpufeature.h>
56283174 14#include <linux/dmar.h>
56283174
LB
15#include <linux/iommu.h>
16#include <linux/memory.h>
cc580e41
LB
17#include <linux/pci.h>
18#include <linux/pci-ats.h>
56283174
LB
19#include <linux/spinlock.h>
20
2585a279 21#include "iommu.h"
02f3effd 22#include "pasid.h"
06c37505 23#include "../iommu-pages.h"
56283174
LB
24
25/*
26 * Intel IOMMU system wide PASID name space:
27 */
56283174 28u32 intel_pasid_max_id = PASID_MAX;
cc580e41
LB
29
30/*
31 * Per device pasid table management:
32 */
cc580e41
LB
33
34/*
35 * Allocate a pasid table for @dev. It should be called in a
36 * single-thread context.
37 */
38int intel_pasid_alloc_table(struct device *dev)
39{
40 struct device_domain_info *info;
41 struct pasid_table *pasid_table;
06c37505 42 struct pasid_dir_entry *dir;
c7b6bac9 43 u32 max_pasid = 0;
4140d77a 44 int order, size;
cc580e41 45
0bbeb01a 46 might_sleep();
586081d3 47 info = dev_iommu_priv_get(dev);
bd7ebb77
NC
48 if (WARN_ON(!info || !dev_is_pci(dev)))
49 return -ENODEV;
50 if (WARN_ON(info->pasid_table))
51 return -EEXIST;
cc580e41 52
0bbeb01a 53 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
cc580e41
LB
54 if (!pasid_table)
55 return -ENOMEM;
cc580e41 56
0bbeb01a 57 if (info->pasid_supported)
c7b6bac9 58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
0bbeb01a
LB
59 intel_pasid_max_id);
60
61 size = max_pasid >> (PASID_PDE_SHIFT - 3);
62 order = size ? get_order(size) : 0;
06c37505
PT
63 dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order);
64 if (!dir) {
dca4d60f 65 kfree(pasid_table);
cc580e41 66 return -ENOMEM;
dca4d60f 67 }
cc580e41 68
06c37505 69 pasid_table->table = dir;
cc580e41 70 pasid_table->order = order;
0bbeb01a 71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
4140d77a 72 info->pasid_table = pasid_table;
cc580e41 73
194b3348 74 if (!ecap_coherent(info->iommu->ecap))
8a3b8e63 75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
194b3348 76
cc580e41
LB
77 return 0;
78}
79
80void intel_pasid_free_table(struct device *dev)
81{
82 struct device_domain_info *info;
83 struct pasid_table *pasid_table;
0bbeb01a
LB
84 struct pasid_dir_entry *dir;
85 struct pasid_entry *table;
86 int i, max_pde;
cc580e41 87
586081d3 88 info = dev_iommu_priv_get(dev);
0bbeb01a 89 if (!info || !dev_is_pci(dev) || !info->pasid_table)
cc580e41
LB
90 return;
91
92 pasid_table = info->pasid_table;
4140d77a 93 info->pasid_table = NULL;
cc580e41 94
0bbeb01a
LB
95 /* Free scalable mode PASID directory tables: */
96 dir = pasid_table->table;
97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
98 for (i = 0; i < max_pde; i++) {
99 table = get_pasid_table_from_pde(&dir[i]);
06c37505 100 iommu_free_page(table);
0bbeb01a
LB
101 }
102
06c37505 103 iommu_free_pages(pasid_table->table, pasid_table->order);
cc580e41
LB
104 kfree(pasid_table);
105}
106
107struct pasid_table *intel_pasid_get_table(struct device *dev)
108{
109 struct device_domain_info *info;
110
586081d3 111 info = dev_iommu_priv_get(dev);
cc580e41
LB
112 if (!info)
113 return NULL;
114
115 return info->pasid_table;
116}
117
442b8183 118static int intel_pasid_get_dev_max_id(struct device *dev)
cc580e41
LB
119{
120 struct device_domain_info *info;
121
586081d3 122 info = dev_iommu_priv_get(dev);
cc580e41
LB
123 if (!info || !info->pasid_table)
124 return 0;
125
126 return info->pasid_table->max_pasid;
127}
128
442b8183 129static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
cc580e41 130{
0bbeb01a 131 struct device_domain_info *info;
cc580e41 132 struct pasid_table *pasid_table;
0bbeb01a 133 struct pasid_dir_entry *dir;
cc580e41 134 struct pasid_entry *entries;
0bbeb01a 135 int dir_index, index;
cc580e41
LB
136
137 pasid_table = intel_pasid_get_table(dev);
c7b6bac9 138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
cc580e41
LB
139 return NULL;
140
0bbeb01a 141 dir = pasid_table->table;
586081d3 142 info = dev_iommu_priv_get(dev);
0bbeb01a
LB
143 dir_index = pasid >> PASID_PDE_SHIFT;
144 index = pasid & PASID_PTE_MASK;
145
803766cb 146retry:
0bbeb01a
LB
147 entries = get_pasid_table_from_pde(&dir[dir_index]);
148 if (!entries) {
5c555f1f
UB
149 u64 tmp;
150
06c37505 151 entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC);
803766cb 152 if (!entries)
0bbeb01a 153 return NULL;
0bbeb01a 154
803766cb
LB
155 /*
156 * The pasid directory table entry won't be freed after
157 * allocation. No worry about the race with free and
158 * clear. However, this entry might be populated by others
159 * while we are preparing it. Use theirs with a retry.
160 */
5c555f1f
UB
161 tmp = 0ULL;
162 if (!try_cmpxchg64(&dir[dir_index].val, &tmp,
163 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
06c37505 164 iommu_free_page(entries);
803766cb
LB
165 goto retry;
166 }
194b3348
JP
167 if (!ecap_coherent(info->iommu->ecap)) {
168 clflush_cache_range(entries, VTD_PAGE_SIZE);
169 clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
170 }
0bbeb01a 171 }
cc580e41 172
0bbeb01a 173 return &entries[index];
cc580e41
LB
174}
175
176/*
177 * Interfaces for PASID table entry manipulation:
178 */
37e91bd4 179static void
c7b6bac9 180intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
cc580e41
LB
181{
182 struct pasid_entry *pe;
183
184 pe = intel_pasid_get_entry(dev, pasid);
185 if (WARN_ON(!pe))
186 return;
187
37e91bd4
LB
188 if (fault_ignore && pasid_pte_is_present(pe))
189 pasid_clear_entry_with_fpd(pe);
190 else
191 pasid_clear_entry(pe);
cc580e41 192}
6f7db75e 193
6f7db75e
LB
194static void
195pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
c7b6bac9 196 u16 did, u32 pasid)
6f7db75e
LB
197{
198 struct qi_desc desc;
199
61a06a16
JP
200 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
201 QI_PC_PASID(pasid) | QI_PC_TYPE;
6f7db75e
LB
202 desc.qw1 = 0;
203 desc.qw2 = 0;
204 desc.qw3 = 0;
205
8a1d8246 206 qi_submit_sync(iommu, &desc, 1, 0);
6f7db75e
LB
207}
208
6f7db75e
LB
209static void
210devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
c7b6bac9 211 struct device *dev, u32 pasid)
6f7db75e
LB
212{
213 struct device_domain_info *info;
214 u16 sid, qdep, pfsid;
215
586081d3 216 info = dev_iommu_priv_get(dev);
6f7db75e
LB
217 if (!info || !info->ats_enabled)
218 return;
219
4fc82cd9
EZ
220 if (pci_dev_is_disconnected(to_pci_dev(dev)))
221 return;
222
6f7db75e
LB
223 sid = info->bus << 8 | info->devfn;
224 qdep = info->ats_qdep;
225 pfsid = info->pfsid;
226
e7e69461
JP
227 /*
228 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
229 * devTLB flush w/o PASID should be used. For non-zero PASID under
230 * SVA usage, device could do DMA with multiple PASIDs. It is more
231 * efficient to flush devTLB specific to the PASID.
232 */
42987801 233 if (pasid == IOMMU_NO_PASID)
e7e69461
JP
234 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
235 else
236 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
6f7db75e
LB
237}
238
37e91bd4 239void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
c7b6bac9 240 u32 pasid, bool fault_ignore)
6f7db75e
LB
241{
242 struct pasid_entry *pte;
8798d364 243 u16 did, pgtt;
6f7db75e 244
8430fd3f 245 spin_lock(&iommu->lock);
6f7db75e 246 pte = intel_pasid_get_entry(dev, pasid);
8430fd3f
LB
247 if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
248 spin_unlock(&iommu->lock);
8b74b6ab 249 return;
8430fd3f 250 }
8b74b6ab 251
6f7db75e 252 did = pasid_get_domain_id(pte);
8798d364 253 pgtt = pasid_pte_get_pgtt(pte);
37e91bd4 254 intel_pasid_clear_entry(dev, pasid, fault_ignore);
8430fd3f 255 spin_unlock(&iommu->lock);
6f7db75e
LB
256
257 if (!ecap_coherent(iommu->ecap))
258 clflush_cache_range(pte, sizeof(*pte));
259
260 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
8798d364
LY
261
262 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
263 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
264 else
265 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
6f7db75e 266
1f5e307c 267 devtlb_invalidation_with_pasid(iommu, dev, pasid);
6f7db75e
LB
268}
269
423d39d8
LY
270/*
271 * This function flushes cache for a newly setup pasid table entry.
272 * Caller of it should not modify the in-use pasid table entries.
273 */
d62efd4f
JP
274static void pasid_flush_caches(struct intel_iommu *iommu,
275 struct pasid_entry *pte,
c7b6bac9 276 u32 pasid, u16 did)
d62efd4f
JP
277{
278 if (!ecap_coherent(iommu->ecap))
279 clflush_cache_range(pte, sizeof(*pte));
280
281 if (cap_caching_mode(iommu->cap)) {
282 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
9872f9bd 283 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
d62efd4f
JP
284 } else {
285 iommu_flush_write_buffer(iommu);
286 }
287}
288
437f35e1
LB
289/*
290 * Set up the scalable mode pasid table entry for first only
291 * translation type.
292 */
293int intel_pasid_setup_first_level(struct intel_iommu *iommu,
294 struct device *dev, pgd_t *pgd,
c7b6bac9 295 u32 pasid, u16 did, int flags)
437f35e1
LB
296{
297 struct pasid_entry *pte;
298
299 if (!ecap_flts(iommu->ecap)) {
300 pr_err("No first level translation support on %s\n",
301 iommu->name);
302 return -EINVAL;
303 }
304
b722cb32 305 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
8430fd3f
LB
306 pr_err("No 5-level paging support for first-level on %s\n",
307 iommu->name);
437f35e1 308 return -EINVAL;
8430fd3f 309 }
437f35e1 310
8430fd3f
LB
311 spin_lock(&iommu->lock);
312 pte = intel_pasid_get_entry(dev, pasid);
313 if (!pte) {
314 spin_unlock(&iommu->lock);
315 return -ENODEV;
316 }
317
318 if (pasid_pte_is_present(pte)) {
319 spin_unlock(&iommu->lock);
423d39d8 320 return -EBUSY;
8430fd3f 321 }
423d39d8 322
437f35e1
LB
323 pasid_clear_entry(pte);
324
325 /* Setup the first level page table pointer: */
326 pasid_set_flptr(pte, (u64)__pa(pgd));
437f35e1 327
8430fd3f
LB
328 if (flags & PASID_FLAG_FL5LP)
329 pasid_set_flpm(pte, 1);
437f35e1 330
6c00612d
LB
331 if (flags & PASID_FLAG_PAGE_SNOOP)
332 pasid_set_pgsnp(pte);
333
437f35e1
LB
334 pasid_set_domain_id(pte, did);
335 pasid_set_address_width(pte, iommu->agaw);
336 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
337
338 /* Setup Present and PASID Granular Transfer Type: */
b0d1f874 339 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
437f35e1 340 pasid_set_present(pte);
8430fd3f
LB
341 spin_unlock(&iommu->lock);
342
d62efd4f 343 pasid_flush_caches(iommu, pte, pasid, did);
437f35e1
LB
344
345 return 0;
346}
347
3aef9ca6
JP
348/*
349 * Skip top levels of page tables for iommu which has less agaw
350 * than default. Unnecessary for PT mode.
351 */
80b79e14
LB
352static int iommu_skip_agaw(struct dmar_domain *domain,
353 struct intel_iommu *iommu,
354 struct dma_pte **pgd)
3aef9ca6
JP
355{
356 int agaw;
357
358 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
359 *pgd = phys_to_virt(dma_pte_addr(*pgd));
360 if (!dma_pte_present(*pgd))
361 return -EINVAL;
362 }
363
364 return agaw;
365}
366
6f7db75e
LB
367/*
368 * Set up the scalable mode pasid entry for second only translation type.
369 */
370int intel_pasid_setup_second_level(struct intel_iommu *iommu,
371 struct dmar_domain *domain,
c7b6bac9 372 struct device *dev, u32 pasid)
6f7db75e
LB
373{
374 struct pasid_entry *pte;
375 struct dma_pte *pgd;
376 u64 pgd_val;
377 int agaw;
378 u16 did;
379
380 /*
381 * If hardware advertises no support for second level
382 * translation, return directly.
383 */
384 if (!ecap_slts(iommu->ecap)) {
385 pr_err("No second level translation support on %s\n",
386 iommu->name);
387 return -EINVAL;
388 }
389
6f7db75e 390 pgd = domain->pgd;
3aef9ca6
JP
391 agaw = iommu_skip_agaw(domain, iommu, &pgd);
392 if (agaw < 0) {
393 dev_err(dev, "Invalid domain page table\n");
394 return -EINVAL;
6f7db75e
LB
395 }
396
397 pgd_val = virt_to_phys(pgd);
ba949f4c 398 did = domain_id_iommu(domain, iommu);
6f7db75e 399
8430fd3f 400 spin_lock(&iommu->lock);
6f7db75e
LB
401 pte = intel_pasid_get_entry(dev, pasid);
402 if (!pte) {
8430fd3f 403 spin_unlock(&iommu->lock);
6f7db75e
LB
404 return -ENODEV;
405 }
406
8430fd3f
LB
407 if (pasid_pte_is_present(pte)) {
408 spin_unlock(&iommu->lock);
423d39d8 409 return -EBUSY;
8430fd3f 410 }
423d39d8 411
6f7db75e
LB
412 pasid_clear_entry(pte);
413 pasid_set_domain_id(pte, did);
414 pasid_set_slptr(pte, pgd_val);
415 pasid_set_address_width(pte, agaw);
b0d1f874 416 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
6f7db75e
LB
417 pasid_set_fault_enable(pte);
418 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
f35f22cc
JM
419 if (domain->dirty_tracking)
420 pasid_set_ssade(pte);
6f7db75e 421
6f7db75e 422 pasid_set_present(pte);
8430fd3f
LB
423 spin_unlock(&iommu->lock);
424
d62efd4f 425 pasid_flush_caches(iommu, pte, pasid, did);
6f7db75e
LB
426
427 return 0;
428}
429
f35f22cc
JM
430/*
431 * Set up dirty tracking on a second only or nested translation type.
432 */
433int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
f35f22cc
JM
434 struct device *dev, u32 pasid,
435 bool enabled)
436{
437 struct pasid_entry *pte;
438 u16 did, pgtt;
439
440 spin_lock(&iommu->lock);
441
442 pte = intel_pasid_get_entry(dev, pasid);
443 if (!pte) {
444 spin_unlock(&iommu->lock);
445 dev_err_ratelimited(
446 dev, "Failed to get pasid entry of PASID %d\n", pasid);
447 return -ENODEV;
448 }
449
56ecaf6c 450 did = pasid_get_domain_id(pte);
f35f22cc
JM
451 pgtt = pasid_pte_get_pgtt(pte);
452 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
453 pgtt != PASID_ENTRY_PGTT_NESTED) {
454 spin_unlock(&iommu->lock);
455 dev_err_ratelimited(
456 dev,
457 "Dirty tracking not supported on translation type %d\n",
458 pgtt);
459 return -EOPNOTSUPP;
460 }
461
462 if (pasid_get_ssade(pte) == enabled) {
463 spin_unlock(&iommu->lock);
464 return 0;
465 }
466
467 if (enabled)
468 pasid_set_ssade(pte);
469 else
470 pasid_clear_ssade(pte);
471 spin_unlock(&iommu->lock);
472
473 if (!ecap_coherent(iommu->ecap))
474 clflush_cache_range(pte, sizeof(*pte));
475
476 /*
477 * From VT-d spec table 25 "Guidance to Software for Invalidations":
478 *
479 * - PASID-selective-within-Domain PASID-cache invalidation
480 * If (PGTT=SS or Nested)
481 * - Domain-selective IOTLB invalidation
482 * Else
483 * - PASID-selective PASID-based IOTLB invalidation
484 * - If (pasid is RID_PASID)
485 * - Global Device-TLB invalidation to affected functions
486 * Else
487 * - PASID-based Device-TLB invalidation (with S=1 and
488 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
489 */
490 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
491
492 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
493
1f5e307c 494 devtlb_invalidation_with_pasid(iommu, dev, pasid);
f35f22cc
JM
495
496 return 0;
497}
498
6f7db75e
LB
499/*
500 * Set up the scalable mode pasid entry for passthrough translation type.
501 */
502int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
c7b6bac9 503 struct device *dev, u32 pasid)
6f7db75e
LB
504{
505 u16 did = FLPT_DEFAULT_DID;
506 struct pasid_entry *pte;
507
8430fd3f 508 spin_lock(&iommu->lock);
6f7db75e
LB
509 pte = intel_pasid_get_entry(dev, pasid);
510 if (!pte) {
8430fd3f 511 spin_unlock(&iommu->lock);
6f7db75e
LB
512 return -ENODEV;
513 }
514
8430fd3f
LB
515 if (pasid_pte_is_present(pte)) {
516 spin_unlock(&iommu->lock);
423d39d8 517 return -EBUSY;
8430fd3f 518 }
423d39d8 519
6f7db75e
LB
520 pasid_clear_entry(pte);
521 pasid_set_domain_id(pte, did);
522 pasid_set_address_width(pte, iommu->agaw);
b0d1f874 523 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
6f7db75e
LB
524 pasid_set_fault_enable(pte);
525 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
6f7db75e 526 pasid_set_present(pte);
8430fd3f
LB
527 spin_unlock(&iommu->lock);
528
d62efd4f 529 pasid_flush_caches(iommu, pte, pasid, did);
6f7db75e
LB
530
531 return 0;
532}
fc0051cb
LB
533
534/*
535 * Set the page snoop control for a pasid entry which has been set up.
536 */
537void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
538 struct device *dev, u32 pasid)
539{
540 struct pasid_entry *pte;
541 u16 did;
542
543 spin_lock(&iommu->lock);
544 pte = intel_pasid_get_entry(dev, pasid);
545 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
546 spin_unlock(&iommu->lock);
547 return;
548 }
549
550 pasid_set_pgsnp(pte);
551 did = pasid_get_domain_id(pte);
552 spin_unlock(&iommu->lock);
553
554 if (!ecap_coherent(iommu->ecap))
555 clflush_cache_range(pte, sizeof(*pte));
556
557 /*
558 * VT-d spec 3.4 table23 states guides for cache invalidation:
559 *
560 * - PASID-selective-within-Domain PASID-cache invalidation
561 * - PASID-selective PASID-based IOTLB invalidation
562 * - If (pasid is RID_PASID)
563 * - Global Device-TLB invalidation to affected functions
564 * Else
565 * - PASID-based Device-TLB invalidation (with S=1 and
566 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
567 */
568 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
569 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
570
1f5e307c 571 devtlb_invalidation_with_pasid(iommu, dev, pasid);
fc0051cb 572}
111bf85c
LB
573
574/**
575 * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
576 * @iommu: IOMMU which the device belong to
577 * @dev: Device to be set up for translation
578 * @pasid: PASID to be programmed in the device PASID table
579 * @domain: User stage-1 domain nested on a stage-2 domain
580 *
581 * This is used for nested translation. The input domain should be
582 * nested type and nested on a parent with 'is_nested_parent' flag
583 * set.
584 */
585int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
586 u32 pasid, struct dmar_domain *domain)
587{
588 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
589 pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
590 struct dmar_domain *s2_domain = domain->s2_domain;
591 u16 did = domain_id_iommu(domain, iommu);
592 struct dma_pte *pgd = s2_domain->pgd;
593 struct pasid_entry *pte;
594
595 /* Address width should match the address width supported by hardware */
596 switch (s1_cfg->addr_width) {
597 case ADDR_WIDTH_4LEVEL:
598 break;
599 case ADDR_WIDTH_5LEVEL:
600 if (!cap_fl5lp_support(iommu->cap)) {
601 dev_err_ratelimited(dev,
602 "5-level paging not supported\n");
603 return -EINVAL;
604 }
605 break;
606 default:
607 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
608 s1_cfg->addr_width);
609 return -EINVAL;
610 }
611
612 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
613 pr_err_ratelimited("No supervisor request support on %s\n",
614 iommu->name);
615 return -EINVAL;
616 }
617
618 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
619 pr_err_ratelimited("No extended access flag support on %s\n",
620 iommu->name);
621 return -EINVAL;
622 }
623
624 spin_lock(&iommu->lock);
625 pte = intel_pasid_get_entry(dev, pasid);
626 if (!pte) {
627 spin_unlock(&iommu->lock);
628 return -ENODEV;
629 }
630 if (pasid_pte_is_present(pte)) {
631 spin_unlock(&iommu->lock);
632 return -EBUSY;
633 }
634
635 pasid_clear_entry(pte);
636
637 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
638 pasid_set_flpm(pte, 1);
639
640 pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
641
642 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
643 pasid_set_sre(pte);
644 if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
645 pasid_set_wpe(pte);
646 }
647
648 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
649 pasid_set_eafe(pte);
650
651 if (s2_domain->force_snooping)
652 pasid_set_pgsnp(pte);
653
654 pasid_set_slptr(pte, virt_to_phys(pgd));
655 pasid_set_fault_enable(pte);
656 pasid_set_domain_id(pte, did);
657 pasid_set_address_width(pte, s2_domain->agaw);
658 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
1f0198fc
YL
659 if (s2_domain->dirty_tracking)
660 pasid_set_ssade(pte);
111bf85c
LB
661 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
662 pasid_set_present(pte);
663 spin_unlock(&iommu->lock);
664
665 pasid_flush_caches(iommu, pte, pasid, did);
666
667 return 0;
668}
81e921fd
LB
669
670/*
671 * Interfaces to setup or teardown a pasid table to the scalable-mode
672 * context table entry:
673 */
674
675static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
676{
677 struct device_domain_info *info = dev_iommu_priv_get(dev);
678 struct intel_iommu *iommu = info->iommu;
679 struct context_entry *context;
7af6c720 680 u16 did;
81e921fd
LB
681
682 spin_lock(&iommu->lock);
683 context = iommu_context_addr(iommu, bus, devfn, false);
684 if (!context) {
685 spin_unlock(&iommu->lock);
686 return;
687 }
688
7af6c720 689 did = context_domain_id(context);
81e921fd
LB
690 context_clear_entry(context);
691 __iommu_flush_cache(iommu, context, sizeof(*context));
692 spin_unlock(&iommu->lock);
7af6c720 693 intel_context_flush_present(info, context, did, false);
81e921fd
LB
694}
695
696static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
697{
698 struct device *dev = data;
699
700 if (dev == &pdev->dev)
701 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
702
703 return 0;
704}
705
706void intel_pasid_teardown_sm_context(struct device *dev)
707{
708 struct device_domain_info *info = dev_iommu_priv_get(dev);
709
710 if (!dev_is_pci(dev)) {
711 device_pasid_table_teardown(dev, info->bus, info->devfn);
712 return;
713 }
714
715 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
716}
301f1a80
LB
717
718/*
719 * Get the PASID directory size for scalable mode context entry.
720 * Value of X in the PDTS field of a scalable mode context entry
721 * indicates PASID directory with 2^(X + 7) entries.
722 */
723static unsigned long context_get_sm_pds(struct pasid_table *table)
724{
725 unsigned long pds, max_pde;
726
727 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
728 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
729 if (pds < 7)
730 return 0;
731
732 return pds - 7;
733}
734
735static int context_entry_set_pasid_table(struct context_entry *context,
736 struct device *dev)
737{
738 struct device_domain_info *info = dev_iommu_priv_get(dev);
739 struct pasid_table *table = info->pasid_table;
740 struct intel_iommu *iommu = info->iommu;
741 unsigned long pds;
742
743 context_clear_entry(context);
744
745 pds = context_get_sm_pds(table);
746 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
747 context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
748
749 if (info->ats_supported)
750 context_set_sm_dte(context);
301f1a80
LB
751 if (info->pasid_supported)
752 context_set_pasid(context);
753
754 context_set_fault_enable(context);
755 context_set_present(context);
756 __iommu_flush_cache(iommu, context, sizeof(*context));
757
758 return 0;
759}
760
761static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
762{
763 struct device_domain_info *info = dev_iommu_priv_get(dev);
764 struct intel_iommu *iommu = info->iommu;
765 struct context_entry *context;
766
767 spin_lock(&iommu->lock);
768 context = iommu_context_addr(iommu, bus, devfn, true);
769 if (!context) {
770 spin_unlock(&iommu->lock);
771 return -ENOMEM;
772 }
773
774 if (context_present(context) && !context_copied(iommu, bus, devfn)) {
775 spin_unlock(&iommu->lock);
776 return 0;
777 }
778
779 if (context_copied(iommu, bus, devfn)) {
780 context_clear_entry(context);
781 __iommu_flush_cache(iommu, context, sizeof(*context));
782
783 /*
784 * For kdump cases, old valid entries may be cached due to
785 * the in-flight DMA and copied pgtable, but there is no
786 * unmapping behaviour for them, thus we need explicit cache
787 * flushes for all affected domain IDs and PASIDs used in
788 * the copied PASID table. Given that we have no idea about
789 * which domain IDs and PASIDs were used in the copied tables,
790 * upgrade them to global PASID and IOTLB cache invalidation.
791 */
792 iommu->flush.flush_context(iommu, 0,
793 PCI_DEVID(bus, devfn),
794 DMA_CCMD_MASK_NOBIT,
795 DMA_CCMD_DEVICE_INVL);
796 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
797 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
798 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
799
800 /*
801 * At this point, the device is supposed to finish reset at
802 * its driver probe stage, so no in-flight DMA will exist,
803 * and we don't need to worry anymore hereafter.
804 */
805 clear_context_copied(iommu, bus, devfn);
806 }
807
808 context_entry_set_pasid_table(context, dev);
809 spin_unlock(&iommu->lock);
810
811 /*
812 * It's a non-present to present mapping. If hardware doesn't cache
813 * non-present entry we don't need to flush the caches. If it does
814 * cache non-present entries, then it does so in the special
815 * domain #0, which we have to flush:
816 */
817 if (cap_caching_mode(iommu->cap)) {
818 iommu->flush.flush_context(iommu, 0,
819 PCI_DEVID(bus, devfn),
820 DMA_CCMD_MASK_NOBIT,
821 DMA_CCMD_DEVICE_INVL);
822 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
823 }
824
825 return 0;
826}
827
828static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
829{
830 struct device *dev = data;
831
832 if (dev != &pdev->dev)
833 return 0;
834
835 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
836}
837
838/*
839 * Set the device's PASID table to its context table entry.
840 *
841 * The PASID table is set to the context entries of both device itself
842 * and its alias requester ID for DMA.
843 */
844int intel_pasid_setup_sm_context(struct device *dev)
845{
846 struct device_domain_info *info = dev_iommu_priv_get(dev);
847
848 if (!dev_is_pci(dev))
849 return device_pasid_table_setup(dev, info->bus, info->devfn);
850
851 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
852}
f90584f4
LB
853
854/*
855 * Global Device-TLB invalidation following changes in a context entry which
856 * was present.
857 */
858static void __context_flush_dev_iotlb(struct device_domain_info *info)
859{
860 if (!info->ats_enabled)
861 return;
862
863 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
864 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
865
866 /*
867 * There is no guarantee that the device DMA is stopped when it reaches
868 * here. Therefore, always attempt the extra device TLB invalidation
869 * quirk. The impact on performance is acceptable since this is not a
870 * performance-critical path.
871 */
872 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID,
873 info->ats_qdep);
874}
875
876/*
877 * Cache invalidations after change in a context table entry that was present
878 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
879 * IOMMU is in scalable mode and all PASID table entries of the device were
880 * non-present, set flush_domains to false. Otherwise, true.
881 */
882void intel_context_flush_present(struct device_domain_info *info,
883 struct context_entry *context,
7af6c720 884 u16 did, bool flush_domains)
f90584f4
LB
885{
886 struct intel_iommu *iommu = info->iommu;
f90584f4
LB
887 struct pasid_entry *pte;
888 int i;
889
890 /*
891 * Device-selective context-cache invalidation. The Domain-ID field
892 * of the Context-cache Invalidate Descriptor is ignored by hardware
893 * when operating in scalable mode. Therefore the @did value doesn't
894 * matter in scalable mode.
895 */
896 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn),
897 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
898
899 /*
900 * For legacy mode:
901 * - Domain-selective IOTLB invalidation
902 * - Global Device-TLB invalidation to all affected functions
903 */
904 if (!sm_supported(iommu)) {
905 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
906 __context_flush_dev_iotlb(info);
907
908 return;
909 }
910
911 /*
912 * For scalable mode:
913 * - Domain-selective PASID-cache invalidation to affected domains
914 * - Domain-selective IOTLB invalidation to affected domains
915 * - Global Device-TLB invalidation to affected functions
916 */
917 if (flush_domains) {
918 /*
919 * If the IOMMU is running in scalable mode and there might
920 * be potential PASID translations, the caller should hold
921 * the lock to ensure that context changes and cache flushes
922 * are atomic.
923 */
924 assert_spin_locked(&iommu->lock);
925 for (i = 0; i < info->pasid_table->max_pasid; i++) {
926 pte = intel_pasid_get_entry(info->dev, i);
927 if (!pte || !pasid_pte_is_present(pte))
928 continue;
929
930 did = pasid_get_domain_id(pte);
931 qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0);
932 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
933 }
934 }
935
936 __context_flush_dev_iotlb(info);
937}