iommu/vt-d: Don't read VCCAP register unless it exists
[linux-2.6-block.git] / drivers / iommu / intel / iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
0a0f0d8b 26#include <linux/dma-map-ops.h>
ba395927 27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
0b1abd1f 40#include <linux/dma-map-ops.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
cfb94a37 44#include <linux/swiotlb.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
cfb94a37 48#include <trace/events/intel_iommu.h>
ba395927 49
672cf6df 50#include "../irq_remapping.h"
02f3effd 51#include "pasid.h"
078e1ee2 52
5b6985ce
FY
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
ba395927 56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
5e3b4a15 65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 66
4ed0d3e6 67#define MAX_AGAW_WIDTH 64
5c645b35 68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 69
2ebe3151
DW
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 78
1b722500
RM
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
f27be03b 82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 83
df08cdc7
AM
84/* page table handling */
85#define LEVEL_STRIDE (9)
86#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
87
6d1c56a9
OBC
88/*
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
92 * that we support.
93 *
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
97 *
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
100 *
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
103 */
104#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
105
df08cdc7
AM
106static inline int agaw_to_level(int agaw)
107{
108 return agaw + 2;
109}
110
111static inline int agaw_to_width(int agaw)
112{
5c645b35 113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
114}
115
116static inline int width_to_agaw(int width)
117{
5c645b35 118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
119}
120
121static inline unsigned int level_to_offset_bits(int level)
122{
123 return (level - 1) * LEVEL_STRIDE;
124}
125
29aaebbc 126static inline int pfn_level_offset(u64 pfn, int level)
df08cdc7
AM
127{
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129}
130
29aaebbc 131static inline u64 level_mask(int level)
df08cdc7 132{
29aaebbc 133 return -1ULL << level_to_offset_bits(level);
df08cdc7
AM
134}
135
29aaebbc 136static inline u64 level_size(int level)
df08cdc7 137{
29aaebbc 138 return 1ULL << level_to_offset_bits(level);
df08cdc7
AM
139}
140
29aaebbc 141static inline u64 align_to_level(u64 pfn, int level)
df08cdc7
AM
142{
143 return (pfn + level_size(level) - 1) & level_mask(level);
144}
fd18de50 145
6dd9a7c7
YS
146static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147{
29aaebbc 148 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
149}
150
dd4e8319
DW
151/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154{
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157
158static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159{
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161}
162static inline unsigned long page_to_dma_pfn(struct page *pg)
163{
164 return mm_to_dma_pfn(page_to_pfn(pg));
165}
166static inline unsigned long virt_to_dma_pfn(void *p)
167{
168 return page_to_dma_pfn(virt_to_page(p));
169}
170
d9630fe9
WH
171/* global iommu list, set NULL for ignored DMAR units */
172static struct intel_iommu **g_iommus;
173
e0fc7e0b 174static void __init check_tylersburg_isoch(void);
9af88143
DW
175static int rwbf_quirk;
176
b779260b
JC
177/*
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
180 */
181static int force_on = 0;
4d213e76 182static int intel_iommu_tboot_noforce;
89a6079d 183static int no_platform_optin;
b779260b 184
46b08e1a 185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 186
091d42e4
JR
187/*
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189 * if marked present.
190 */
191static phys_addr_t root_entry_lctp(struct root_entry *re)
192{
193 if (!(re->lo & 1))
194 return 0;
195
196 return re->lo & VTD_PAGE_MASK;
197}
198
199/*
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201 * if marked present.
202 */
203static phys_addr_t root_entry_uctp(struct root_entry *re)
204{
205 if (!(re->hi & 1))
206 return 0;
46b08e1a 207
091d42e4
JR
208 return re->hi & VTD_PAGE_MASK;
209}
c07e7d21 210
cf484d0e
JR
211static inline void context_clear_pasid_enable(struct context_entry *context)
212{
213 context->lo &= ~(1ULL << 11);
214}
215
216static inline bool context_pasid_enabled(struct context_entry *context)
217{
218 return !!(context->lo & (1ULL << 11));
219}
220
221static inline void context_set_copied(struct context_entry *context)
222{
223 context->hi |= (1ull << 3);
224}
225
226static inline bool context_copied(struct context_entry *context)
227{
228 return !!(context->hi & (1ULL << 3));
229}
230
231static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
232{
233 return (context->lo & 1);
234}
cf484d0e 235
26b86092 236bool context_present(struct context_entry *context)
cf484d0e
JR
237{
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
241}
242
c07e7d21
MM
243static inline void context_set_present(struct context_entry *context)
244{
245 context->lo |= 1;
246}
247
248static inline void context_set_fault_enable(struct context_entry *context)
249{
250 context->lo &= (((u64)-1) << 2) | 1;
251}
252
c07e7d21
MM
253static inline void context_set_translation_type(struct context_entry *context,
254 unsigned long value)
255{
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
258}
259
260static inline void context_set_address_root(struct context_entry *context,
261 unsigned long value)
262{
1a2262f9 263 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
264 context->lo |= value & VTD_PAGE_MASK;
265}
266
267static inline void context_set_address_width(struct context_entry *context,
268 unsigned long value)
269{
270 context->hi |= value & 7;
271}
272
273static inline void context_set_domain_id(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
277}
278
dbcd861f
JR
279static inline int context_domain_id(struct context_entry *c)
280{
281 return((c->hi >> 8) & 0xffff);
282}
283
c07e7d21
MM
284static inline void context_clear_entry(struct context_entry *context)
285{
286 context->lo = 0;
287 context->hi = 0;
288}
7a8fc25e 289
2c2e2c38
FY
290/*
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
295 */
19943b0e
DW
296static struct dmar_domain *si_domain;
297static int hw_pass_through = 1;
2c2e2c38 298
29a27719
JR
299#define for_each_domain_iommu(idx, domain) \
300 for (idx = 0; idx < g_num_of_iommus; idx++) \
301 if (domain->iommu_refcnt[idx])
302
b94e4117
JL
303struct dmar_rmrr_unit {
304 struct list_head list; /* list of rmrr units */
305 struct acpi_dmar_header *hdr; /* ACPI header */
306 u64 base_address; /* reserved base address*/
307 u64 end_address; /* reserved end address */
832bd858 308 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
309 int devices_cnt; /* target device count */
310};
311
312struct dmar_atsr_unit {
313 struct list_head list; /* list of ATSR units */
314 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 315 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
316 int devices_cnt; /* target device count */
317 u8 include_all:1; /* include all ports */
318};
319
320static LIST_HEAD(dmar_atsr_units);
321static LIST_HEAD(dmar_rmrr_units);
322
323#define for_each_rmrr_units(rmrr) \
324 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
325
5e0d2a6f 326/* bitmap for indexing intel_iommus */
5e0d2a6f 327static int g_num_of_iommus;
328
92d03cc8 329static void domain_exit(struct dmar_domain *domain);
ba395927 330static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 331static void dmar_remove_one_dev_info(struct device *dev);
127c7615 332static void __dmar_remove_one_dev_info(struct device_domain_info *info);
8af46c78
LB
333static int intel_iommu_attach_device(struct iommu_domain *domain,
334 struct device *dev);
cfb94a37
LB
335static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
336 dma_addr_t iova);
ba395927 337
d3f13810 338#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
339int dmar_disabled = 0;
340#else
341int dmar_disabled = 1;
04618252 342#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
0cd5c3c8 343
ba61c3da 344#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
04618252
LB
345int intel_iommu_sm = 1;
346#else
cdd3a249 347int intel_iommu_sm;
ba61c3da 348#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
04618252 349
8bc1f85c
ED
350int intel_iommu_enabled = 0;
351EXPORT_SYMBOL_GPL(intel_iommu_enabled);
352
2d9e667e 353static int dmar_map_gfx = 1;
7d3b03ce 354static int dmar_forcedac;
5e0d2a6f 355static int intel_iommu_strict;
6dd9a7c7 356static int intel_iommu_superpage = 1;
ae853ddb 357static int iommu_identity_mapping;
e5e04d05 358static int intel_no_bounce;
b1012ca8 359static int iommu_skip_te_disable;
c83b2f20 360
ae853ddb
DW
361#define IDENTMAP_GFX 2
362#define IDENTMAP_AZALIA 4
c83b2f20 363
c0771df8
DW
364int intel_iommu_gfx_mapped;
365EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
366
8af46c78 367#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
e85bb99b
LB
368struct device_domain_info *get_domain_info(struct device *dev)
369{
370 struct device_domain_info *info;
371
372 if (!dev)
373 return NULL;
374
01b9d4e2 375 info = dev_iommu_priv_get(dev);
2d33b7d6 376 if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
e85bb99b
LB
377 return NULL;
378
379 return info;
380}
381
e2726dae 382DEFINE_SPINLOCK(device_domain_lock);
ba395927
KA
383static LIST_HEAD(device_domain_list);
384
e5e04d05
LB
385#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
386 to_pci_dev(d)->untrusted)
387
85319dcc
LB
388/*
389 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 390 * callback @fn against each element.
85319dcc
LB
391 */
392int for_each_device_domain(int (*fn)(struct device_domain_info *info,
393 void *data), void *data)
394{
395 int ret = 0;
0bbeb01a 396 unsigned long flags;
85319dcc
LB
397 struct device_domain_info *info;
398
0bbeb01a 399 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
400 list_for_each_entry(info, &device_domain_list, global) {
401 ret = fn(info, data);
0bbeb01a
LB
402 if (ret) {
403 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 404 return ret;
0bbeb01a 405 }
85319dcc 406 }
0bbeb01a 407 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
408
409 return 0;
410}
411
b0119e87 412const struct iommu_ops intel_iommu_ops;
a8bcbb0d 413
4158c2ec
JR
414static bool translation_pre_enabled(struct intel_iommu *iommu)
415{
416 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
417}
418
091d42e4
JR
419static void clear_translation_pre_enabled(struct intel_iommu *iommu)
420{
421 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
422}
423
4158c2ec
JR
424static void init_translation_status(struct intel_iommu *iommu)
425{
426 u32 gsts;
427
428 gsts = readl(iommu->reg + DMAR_GSTS_REG);
429 if (gsts & DMA_GSTS_TES)
430 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
431}
432
ba395927
KA
433static int __init intel_iommu_setup(char *str)
434{
435 if (!str)
436 return -EINVAL;
437 while (*str) {
0cd5c3c8
KM
438 if (!strncmp(str, "on", 2)) {
439 dmar_disabled = 0;
9f10e5bf 440 pr_info("IOMMU enabled\n");
0cd5c3c8 441 } else if (!strncmp(str, "off", 3)) {
ba395927 442 dmar_disabled = 1;
89a6079d 443 no_platform_optin = 1;
9f10e5bf 444 pr_info("IOMMU disabled\n");
ba395927
KA
445 } else if (!strncmp(str, "igfx_off", 8)) {
446 dmar_map_gfx = 0;
9f10e5bf 447 pr_info("Disable GFX device mapping\n");
7d3b03ce 448 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 449 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 450 dmar_forcedac = 1;
5e0d2a6f 451 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 452 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 453 intel_iommu_strict = 1;
6dd9a7c7 454 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 455 pr_info("Disable supported super page\n");
6dd9a7c7 456 intel_iommu_superpage = 0;
8950dcd8
LB
457 } else if (!strncmp(str, "sm_on", 5)) {
458 pr_info("Intel-IOMMU: scalable mode supported\n");
459 intel_iommu_sm = 1;
bfd20f1c 460 } else if (!strncmp(str, "tboot_noforce", 13)) {
8627892a 461 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
bfd20f1c 462 intel_iommu_tboot_noforce = 1;
e5e04d05
LB
463 } else if (!strncmp(str, "nobounce", 8)) {
464 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
465 intel_no_bounce = 1;
ba395927
KA
466 }
467
468 str += strcspn(str, ",");
469 while (*str == ',')
470 str++;
471 }
472 return 0;
473}
474__setup("intel_iommu=", intel_iommu_setup);
475
476static struct kmem_cache *iommu_domain_cache;
477static struct kmem_cache *iommu_devinfo_cache;
ba395927 478
9452d5bf
JR
479static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
480{
8bf47816
JR
481 struct dmar_domain **domains;
482 int idx = did >> 8;
483
484 domains = iommu->domains[idx];
485 if (!domains)
486 return NULL;
487
488 return domains[did & 0xff];
9452d5bf
JR
489}
490
491static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
492 struct dmar_domain *domain)
493{
8bf47816
JR
494 struct dmar_domain **domains;
495 int idx = did >> 8;
496
497 if (!iommu->domains[idx]) {
498 size_t size = 256 * sizeof(struct dmar_domain *);
499 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
500 }
501
502 domains = iommu->domains[idx];
503 if (WARN_ON(!domains))
504 return;
505 else
506 domains[did & 0xff] = domain;
9452d5bf
JR
507}
508
9ddbfb42 509void *alloc_pgtable_page(int node)
eb3fa7cb 510{
4c923d47
SS
511 struct page *page;
512 void *vaddr = NULL;
eb3fa7cb 513
4c923d47
SS
514 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
515 if (page)
516 vaddr = page_address(page);
eb3fa7cb 517 return vaddr;
ba395927
KA
518}
519
9ddbfb42 520void free_pgtable_page(void *vaddr)
ba395927
KA
521{
522 free_page((unsigned long)vaddr);
523}
524
525static inline void *alloc_domain_mem(void)
526{
354bb65e 527 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
528}
529
38717946 530static void free_domain_mem(void *vaddr)
ba395927
KA
531{
532 kmem_cache_free(iommu_domain_cache, vaddr);
533}
534
535static inline void * alloc_devinfo_mem(void)
536{
354bb65e 537 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
538}
539
540static inline void free_devinfo_mem(void *vaddr)
541{
542 kmem_cache_free(iommu_devinfo_cache, vaddr);
543}
544
28ccce0d
JR
545static inline int domain_type_is_si(struct dmar_domain *domain)
546{
547 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
548}
549
ddf09b6d
LB
550static inline bool domain_use_first_level(struct dmar_domain *domain)
551{
552 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
553}
554
162d1b10
JL
555static inline int domain_pfn_supported(struct dmar_domain *domain,
556 unsigned long pfn)
557{
558 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
559
560 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
561}
562
4ed0d3e6 563static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
564{
565 unsigned long sagaw;
566 int agaw = -1;
567
568 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 569 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
570 agaw >= 0; agaw--) {
571 if (test_bit(agaw, &sagaw))
572 break;
573 }
574
575 return agaw;
576}
577
4ed0d3e6
FY
578/*
579 * Calculate max SAGAW for each iommu.
580 */
581int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
582{
583 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
584}
585
586/*
587 * calculate agaw for each iommu.
588 * "SAGAW" may be different across iommus, use a default agaw, and
589 * get a supported less agaw for iommus that don't support the default agaw.
590 */
591int iommu_calculate_agaw(struct intel_iommu *iommu)
592{
593 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
594}
595
2c2e2c38 596/* This functionin only returns single iommu in a domain */
9ddbfb42 597struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
598{
599 int iommu_id;
600
2c2e2c38 601 /* si_domain and vm domain should not get here. */
fa954e68
LB
602 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
603 return NULL;
604
29a27719
JR
605 for_each_domain_iommu(iommu_id, domain)
606 break;
607
8c11e798
WH
608 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
609 return NULL;
610
611 return g_iommus[iommu_id];
612}
613
04c00956
LB
614static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
615{
616 return sm_supported(iommu) ?
617 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
618}
619
8e604097
WH
620static void domain_update_iommu_coherency(struct dmar_domain *domain)
621{
d0501960
DW
622 struct dmar_drhd_unit *drhd;
623 struct intel_iommu *iommu;
2f119c78
QL
624 bool found = false;
625 int i;
2e12bc29 626
d0501960 627 domain->iommu_coherency = 1;
8e604097 628
29a27719 629 for_each_domain_iommu(i, domain) {
2f119c78 630 found = true;
04c00956 631 if (!iommu_paging_structure_coherency(g_iommus[i])) {
8e604097
WH
632 domain->iommu_coherency = 0;
633 break;
634 }
8e604097 635 }
d0501960
DW
636 if (found)
637 return;
638
639 /* No hardware attached; use lowest common denominator */
640 rcu_read_lock();
641 for_each_active_iommu(iommu, drhd) {
04c00956 642 if (!iommu_paging_structure_coherency(iommu)) {
d0501960
DW
643 domain->iommu_coherency = 0;
644 break;
645 }
646 }
647 rcu_read_unlock();
8e604097
WH
648}
649
161f6934 650static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 651{
161f6934
JL
652 struct dmar_drhd_unit *drhd;
653 struct intel_iommu *iommu;
654 int ret = 1;
58c610bd 655
161f6934
JL
656 rcu_read_lock();
657 for_each_active_iommu(iommu, drhd) {
658 if (iommu != skip) {
659 if (!ecap_sc_support(iommu->ecap)) {
660 ret = 0;
661 break;
662 }
58c610bd 663 }
58c610bd 664 }
161f6934
JL
665 rcu_read_unlock();
666
667 return ret;
58c610bd
SY
668}
669
64229e8f
LB
670static int domain_update_iommu_superpage(struct dmar_domain *domain,
671 struct intel_iommu *skip)
6dd9a7c7 672{
8140a95d 673 struct dmar_drhd_unit *drhd;
161f6934 674 struct intel_iommu *iommu;
64229e8f 675 int mask = 0x3;
6dd9a7c7
YS
676
677 if (!intel_iommu_superpage) {
161f6934 678 return 0;
6dd9a7c7
YS
679 }
680
8140a95d 681 /* set iommu_superpage to the smallest common denominator */
0e242612 682 rcu_read_lock();
8140a95d 683 for_each_active_iommu(iommu, drhd) {
161f6934 684 if (iommu != skip) {
64229e8f
LB
685 if (domain && domain_use_first_level(domain)) {
686 if (!cap_fl1gp_support(iommu->cap))
687 mask = 0x1;
688 } else {
689 mask &= cap_super_page_val(iommu->cap);
690 }
691
161f6934
JL
692 if (!mask)
693 break;
6dd9a7c7
YS
694 }
695 }
0e242612
JL
696 rcu_read_unlock();
697
161f6934 698 return fls(mask);
6dd9a7c7
YS
699}
700
d2ef0962
LB
701static int domain_update_device_node(struct dmar_domain *domain)
702{
703 struct device_domain_info *info;
704 int nid = NUMA_NO_NODE;
705
706 assert_spin_locked(&device_domain_lock);
707
708 if (list_empty(&domain->devices))
709 return NUMA_NO_NODE;
710
711 list_for_each_entry(info, &domain->devices, link) {
712 if (!info->dev)
713 continue;
714
715 /*
716 * There could possibly be multiple device numa nodes as devices
717 * within the same domain may sit behind different IOMMUs. There
718 * isn't perfect answer in such situation, so we select first
719 * come first served policy.
720 */
721 nid = dev_to_node(info->dev);
722 if (nid != NUMA_NO_NODE)
723 break;
724 }
725
726 return nid;
727}
728
58c610bd
SY
729/* Some capabilities may be different across iommus */
730static void domain_update_iommu_cap(struct dmar_domain *domain)
731{
732 domain_update_iommu_coherency(domain);
161f6934 733 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
64229e8f 734 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
d2ef0962
LB
735
736 /*
737 * If RHSA is missing, we should default to the device numa domain
738 * as fall back.
739 */
740 if (domain->nid == NUMA_NO_NODE)
741 domain->nid = domain_update_device_node(domain);
58c610bd
SY
742}
743
26b86092
SM
744struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
745 u8 devfn, int alloc)
03ecc32c
DW
746{
747 struct root_entry *root = &iommu->root_entry[bus];
748 struct context_entry *context;
749 u64 *entry;
750
4df4eab1 751 entry = &root->lo;
765b6a98 752 if (sm_supported(iommu)) {
03ecc32c
DW
753 if (devfn >= 0x80) {
754 devfn -= 0x80;
755 entry = &root->hi;
756 }
757 devfn *= 2;
758 }
03ecc32c
DW
759 if (*entry & 1)
760 context = phys_to_virt(*entry & VTD_PAGE_MASK);
761 else {
762 unsigned long phy_addr;
763 if (!alloc)
764 return NULL;
765
766 context = alloc_pgtable_page(iommu->node);
767 if (!context)
768 return NULL;
769
770 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
771 phy_addr = virt_to_phys((void *)context);
772 *entry = phy_addr | 1;
773 __iommu_flush_cache(iommu, entry, sizeof(*entry));
774 }
775 return &context[devfn];
776}
777
1d461597
JR
778static bool attach_deferred(struct device *dev)
779{
01b9d4e2 780 return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
1d461597
JR
781}
782
b9a7f981
EA
783/**
784 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
785 * sub-hierarchy of a candidate PCI-PCI bridge
786 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
787 * @bridge: the candidate PCI-PCI bridge
788 *
789 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
790 */
791static bool
792is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
793{
794 struct pci_dev *pdev, *pbridge;
795
796 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
797 return false;
798
799 pdev = to_pci_dev(dev);
800 pbridge = to_pci_dev(bridge);
801
802 if (pbridge->subordinate &&
803 pbridge->subordinate->number <= pdev->bus->number &&
804 pbridge->subordinate->busn_res.end >= pdev->bus->number)
805 return true;
806
807 return false;
808}
809
2d33b7d6
LB
810static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
811{
812 struct dmar_drhd_unit *drhd;
813 u32 vtbar;
814 int rc;
815
816 /* We know that this device on this chipset has its own IOMMU.
817 * If we find it under a different IOMMU, then the BIOS is lying
818 * to us. Hope that the IOMMU for this device is actually
819 * disabled, and it needs no translation...
820 */
821 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
822 if (rc) {
823 /* "can't" happen */
824 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
825 return false;
826 }
827 vtbar &= 0xffff0000;
828
829 /* we know that the this iommu should be at offset 0xa000 from vtbar */
830 drhd = dmar_find_matched_drhd_unit(pdev);
831 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
832 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
833 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
834 return true;
835 }
836
837 return false;
838}
839
840static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
841{
842 if (!iommu || iommu->drhd->ignored)
843 return true;
844
845 if (dev_is_pci(dev)) {
846 struct pci_dev *pdev = to_pci_dev(dev);
847
848 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
849 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
850 quirk_ioat_snb_local_iommu(pdev))
851 return true;
852 }
853
854 return false;
855}
856
dd6692f1 857struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
858{
859 struct dmar_drhd_unit *drhd = NULL;
dd6692f1 860 struct pci_dev *pdev = NULL;
b683b230 861 struct intel_iommu *iommu;
156baca8 862 struct device *tmp;
aa4d066a 863 u16 segment = 0;
c7151a8d
WH
864 int i;
865
2d33b7d6 866 if (!dev)
4ed6a540
DW
867 return NULL;
868
156baca8 869 if (dev_is_pci(dev)) {
1c387188
AR
870 struct pci_dev *pf_pdev;
871
e3560ee4 872 pdev = pci_real_dma_dev(to_pci_dev(dev));
5823e330 873
1c387188
AR
874 /* VFs aren't listed in scope tables; we need to look up
875 * the PF instead to find the IOMMU. */
876 pf_pdev = pci_physfn(pdev);
877 dev = &pf_pdev->dev;
156baca8 878 segment = pci_domain_nr(pdev->bus);
ca5b74d2 879 } else if (has_acpi_companion(dev))
156baca8
DW
880 dev = &ACPI_COMPANION(dev)->dev;
881
0e242612 882 rcu_read_lock();
2d33b7d6 883 for_each_iommu(iommu, drhd) {
156baca8 884 if (pdev && segment != drhd->segment)
276dbf99 885 continue;
c7151a8d 886
b683b230 887 for_each_active_dev_scope(drhd->devices,
156baca8
DW
888 drhd->devices_cnt, i, tmp) {
889 if (tmp == dev) {
1c387188
AR
890 /* For a VF use its original BDF# not that of the PF
891 * which we used for the IOMMU lookup. Strictly speaking
892 * we could do this for all PCI devices; we only need to
893 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 894 if (pdev && pdev->is_virtfn)
1c387188
AR
895 goto got_pdev;
896
dd6692f1
LB
897 if (bus && devfn) {
898 *bus = drhd->devices[i].bus;
899 *devfn = drhd->devices[i].devfn;
900 }
b683b230 901 goto out;
156baca8
DW
902 }
903
b9a7f981 904 if (is_downstream_to_pci_bridge(dev, tmp))
156baca8 905 goto got_pdev;
924b6231 906 }
c7151a8d 907
156baca8
DW
908 if (pdev && drhd->include_all) {
909 got_pdev:
dd6692f1
LB
910 if (bus && devfn) {
911 *bus = pdev->bus->number;
912 *devfn = pdev->devfn;
913 }
b683b230 914 goto out;
156baca8 915 }
c7151a8d 916 }
b683b230 917 iommu = NULL;
156baca8 918 out:
2d33b7d6
LB
919 if (iommu_is_dummy(iommu, dev))
920 iommu = NULL;
921
0e242612 922 rcu_read_unlock();
c7151a8d 923
b683b230 924 return iommu;
c7151a8d
WH
925}
926
5331fe6f
WH
927static void domain_flush_cache(struct dmar_domain *domain,
928 void *addr, int size)
929{
930 if (!domain->iommu_coherency)
931 clflush_cache_range(addr, size);
932}
933
ba395927
KA
934static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
935{
ba395927 936 struct context_entry *context;
03ecc32c 937 int ret = 0;
ba395927
KA
938 unsigned long flags;
939
940 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
941 context = iommu_context_addr(iommu, bus, devfn, 0);
942 if (context)
943 ret = context_present(context);
ba395927
KA
944 spin_unlock_irqrestore(&iommu->lock, flags);
945 return ret;
946}
947
ba395927
KA
948static void free_context_table(struct intel_iommu *iommu)
949{
ba395927
KA
950 int i;
951 unsigned long flags;
952 struct context_entry *context;
953
954 spin_lock_irqsave(&iommu->lock, flags);
955 if (!iommu->root_entry) {
956 goto out;
957 }
958 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 959 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
960 if (context)
961 free_pgtable_page(context);
03ecc32c 962
765b6a98 963 if (!sm_supported(iommu))
03ecc32c
DW
964 continue;
965
966 context = iommu_context_addr(iommu, i, 0x80, 0);
967 if (context)
968 free_pgtable_page(context);
969
ba395927
KA
970 }
971 free_pgtable_page(iommu->root_entry);
972 iommu->root_entry = NULL;
973out:
974 spin_unlock_irqrestore(&iommu->lock, flags);
975}
976
b026fd28 977static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 978 unsigned long pfn, int *target_level)
ba395927 979{
e083ea5b 980 struct dma_pte *parent, *pte;
ba395927 981 int level = agaw_to_level(domain->agaw);
4399c8bf 982 int offset;
ba395927
KA
983
984 BUG_ON(!domain->pgd);
f9423606 985
162d1b10 986 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
987 /* Address beyond IOMMU's addressing capabilities. */
988 return NULL;
989
ba395927
KA
990 parent = domain->pgd;
991
5cf0a76f 992 while (1) {
ba395927
KA
993 void *tmp_page;
994
b026fd28 995 offset = pfn_level_offset(pfn, level);
ba395927 996 pte = &parent[offset];
5cf0a76f 997 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 998 break;
5cf0a76f 999 if (level == *target_level)
ba395927
KA
1000 break;
1001
19c239ce 1002 if (!dma_pte_present(pte)) {
c85994e4
DW
1003 uint64_t pteval;
1004
4c923d47 1005 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1006
206a73c1 1007 if (!tmp_page)
ba395927 1008 return NULL;
206a73c1 1009
c85994e4 1010 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1011 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
ddf09b6d 1012 if (domain_use_first_level(domain))
16ecf10e 1013 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
effad4b5 1014 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1015 /* Someone else set it while we were thinking; use theirs. */
1016 free_pgtable_page(tmp_page);
effad4b5 1017 else
c85994e4 1018 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1019 }
5cf0a76f
DW
1020 if (level == 1)
1021 break;
1022
19c239ce 1023 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1024 level--;
1025 }
1026
5cf0a76f
DW
1027 if (!*target_level)
1028 *target_level = level;
1029
ba395927
KA
1030 return pte;
1031}
1032
1033/* return address's pte at specific level */
90dcfb5e
DW
1034static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1035 unsigned long pfn,
6dd9a7c7 1036 int level, int *large_page)
ba395927 1037{
e083ea5b 1038 struct dma_pte *parent, *pte;
ba395927
KA
1039 int total = agaw_to_level(domain->agaw);
1040 int offset;
1041
1042 parent = domain->pgd;
1043 while (level <= total) {
90dcfb5e 1044 offset = pfn_level_offset(pfn, total);
ba395927
KA
1045 pte = &parent[offset];
1046 if (level == total)
1047 return pte;
1048
6dd9a7c7
YS
1049 if (!dma_pte_present(pte)) {
1050 *large_page = total;
ba395927 1051 break;
6dd9a7c7
YS
1052 }
1053
e16922af 1054 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1055 *large_page = total;
1056 return pte;
1057 }
1058
19c239ce 1059 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1060 total--;
1061 }
1062 return NULL;
1063}
1064
ba395927 1065/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1066static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1067 unsigned long start_pfn,
1068 unsigned long last_pfn)
ba395927 1069{
e083ea5b 1070 unsigned int large_page;
310a5ab9 1071 struct dma_pte *first_pte, *pte;
66eae846 1072
162d1b10
JL
1073 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1074 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1075 BUG_ON(start_pfn > last_pfn);
ba395927 1076
04b18e65 1077 /* we don't need lock here; nobody else touches the iova range */
59c36286 1078 do {
6dd9a7c7
YS
1079 large_page = 1;
1080 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1081 if (!pte) {
6dd9a7c7 1082 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1083 continue;
1084 }
6dd9a7c7 1085 do {
310a5ab9 1086 dma_clear_pte(pte);
6dd9a7c7 1087 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1088 pte++;
75e6bf96
DW
1089 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1090
310a5ab9
DW
1091 domain_flush_cache(domain, first_pte,
1092 (void *)pte - (void *)first_pte);
59c36286
DW
1093
1094 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1095}
1096
3269ee0b 1097static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
1098 int retain_level, struct dma_pte *pte,
1099 unsigned long pfn, unsigned long start_pfn,
1100 unsigned long last_pfn)
3269ee0b
AW
1101{
1102 pfn = max(start_pfn, pfn);
1103 pte = &pte[pfn_level_offset(pfn, level)];
1104
1105 do {
1106 unsigned long level_pfn;
1107 struct dma_pte *level_pte;
1108
1109 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1110 goto next;
1111
f7116e11 1112 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1113 level_pte = phys_to_virt(dma_pte_addr(pte));
1114
bc24c571
DD
1115 if (level > 2) {
1116 dma_pte_free_level(domain, level - 1, retain_level,
1117 level_pte, level_pfn, start_pfn,
1118 last_pfn);
1119 }
3269ee0b 1120
bc24c571
DD
1121 /*
1122 * Free the page table if we're below the level we want to
1123 * retain and the range covers the entire table.
1124 */
1125 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1126 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1127 dma_clear_pte(pte);
1128 domain_flush_cache(domain, pte, sizeof(*pte));
1129 free_pgtable_page(level_pte);
1130 }
1131next:
1132 pfn += level_size(level);
1133 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1134}
1135
bc24c571
DD
1136/*
1137 * clear last level (leaf) ptes and free page table pages below the
1138 * level we wish to keep intact.
1139 */
ba395927 1140static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1141 unsigned long start_pfn,
bc24c571
DD
1142 unsigned long last_pfn,
1143 int retain_level)
ba395927 1144{
162d1b10
JL
1145 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1146 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1147 BUG_ON(start_pfn > last_pfn);
ba395927 1148
d41a4adb
JL
1149 dma_pte_clear_range(domain, start_pfn, last_pfn);
1150
f3a0a52f 1151 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1152 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1153 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1154
ba395927 1155 /* free pgd */
d794dc9b 1156 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1157 free_pgtable_page(domain->pgd);
1158 domain->pgd = NULL;
1159 }
1160}
1161
ea8ea460
DW
1162/* When a page at a given level is being unlinked from its parent, we don't
1163 need to *modify* it at all. All we need to do is make a list of all the
1164 pages which can be freed just as soon as we've flushed the IOTLB and we
1165 know the hardware page-walk will no longer touch them.
1166 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1167 be freed. */
1168static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1169 int level, struct dma_pte *pte,
1170 struct page *freelist)
1171{
1172 struct page *pg;
1173
1174 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1175 pg->freelist = freelist;
1176 freelist = pg;
1177
1178 if (level == 1)
1179 return freelist;
1180
adeb2590
JL
1181 pte = page_address(pg);
1182 do {
ea8ea460
DW
1183 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1184 freelist = dma_pte_list_pagetables(domain, level - 1,
1185 pte, freelist);
adeb2590
JL
1186 pte++;
1187 } while (!first_pte_in_page(pte));
ea8ea460
DW
1188
1189 return freelist;
1190}
1191
1192static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1193 struct dma_pte *pte, unsigned long pfn,
1194 unsigned long start_pfn,
1195 unsigned long last_pfn,
1196 struct page *freelist)
1197{
1198 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1199
1200 pfn = max(start_pfn, pfn);
1201 pte = &pte[pfn_level_offset(pfn, level)];
1202
1203 do {
1204 unsigned long level_pfn;
1205
1206 if (!dma_pte_present(pte))
1207 goto next;
1208
1209 level_pfn = pfn & level_mask(level);
1210
1211 /* If range covers entire pagetable, free it */
1212 if (start_pfn <= level_pfn &&
1213 last_pfn >= level_pfn + level_size(level) - 1) {
1214 /* These suborbinate page tables are going away entirely. Don't
1215 bother to clear them; we're just going to *free* them. */
1216 if (level > 1 && !dma_pte_superpage(pte))
1217 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1218
1219 dma_clear_pte(pte);
1220 if (!first_pte)
1221 first_pte = pte;
1222 last_pte = pte;
1223 } else if (level > 1) {
1224 /* Recurse down into a level that isn't *entirely* obsolete */
1225 freelist = dma_pte_clear_level(domain, level - 1,
1226 phys_to_virt(dma_pte_addr(pte)),
1227 level_pfn, start_pfn, last_pfn,
1228 freelist);
1229 }
1230next:
1231 pfn += level_size(level);
1232 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1233
1234 if (first_pte)
1235 domain_flush_cache(domain, first_pte,
1236 (void *)++last_pte - (void *)first_pte);
1237
1238 return freelist;
1239}
1240
1241/* We can't just free the pages because the IOMMU may still be walking
1242 the page tables, and may have cached the intermediate levels. The
1243 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1244static struct page *domain_unmap(struct dmar_domain *domain,
1245 unsigned long start_pfn,
1246 unsigned long last_pfn)
ea8ea460 1247{
e083ea5b 1248 struct page *freelist;
ea8ea460 1249
162d1b10
JL
1250 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1251 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1252 BUG_ON(start_pfn > last_pfn);
1253
1254 /* we don't need lock here; nobody else touches the iova range */
1255 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1256 domain->pgd, 0, start_pfn, last_pfn, NULL);
1257
1258 /* free pgd */
1259 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1260 struct page *pgd_page = virt_to_page(domain->pgd);
1261 pgd_page->freelist = freelist;
1262 freelist = pgd_page;
1263
1264 domain->pgd = NULL;
1265 }
1266
1267 return freelist;
1268}
1269
b690420a 1270static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1271{
1272 struct page *pg;
1273
1274 while ((pg = freelist)) {
1275 freelist = pg->freelist;
1276 free_pgtable_page(page_address(pg));
1277 }
1278}
1279
13cf0174
JR
1280static void iova_entry_free(unsigned long data)
1281{
1282 struct page *freelist = (struct page *)data;
1283
1284 dma_free_pagelist(freelist);
1285}
1286
ba395927
KA
1287/* iommu handling */
1288static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1289{
1290 struct root_entry *root;
1291 unsigned long flags;
1292
4c923d47 1293 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1294 if (!root) {
9f10e5bf 1295 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1296 iommu->name);
ba395927 1297 return -ENOMEM;
ffebeb46 1298 }
ba395927 1299
5b6985ce 1300 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1301
1302 spin_lock_irqsave(&iommu->lock, flags);
1303 iommu->root_entry = root;
1304 spin_unlock_irqrestore(&iommu->lock, flags);
1305
1306 return 0;
1307}
1308
ba395927
KA
1309static void iommu_set_root_entry(struct intel_iommu *iommu)
1310{
03ecc32c 1311 u64 addr;
c416daa9 1312 u32 sts;
ba395927
KA
1313 unsigned long flag;
1314
03ecc32c 1315 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1316 if (sm_supported(iommu))
1317 addr |= DMA_RTADDR_SMT;
ba395927 1318
1f5b3c3f 1319 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1320 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1321
c416daa9 1322 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1323
1324 /* Make sure hardware complete it */
1325 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1326 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1327
1f5b3c3f 1328 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1329}
1330
6f7db75e 1331void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1332{
1333 u32 val;
1334 unsigned long flag;
1335
9af88143 1336 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1337 return;
ba395927 1338
1f5b3c3f 1339 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1340 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1341
1342 /* Make sure hardware complete it */
1343 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1344 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1345
1f5b3c3f 1346 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1347}
1348
1349/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1350static void __iommu_flush_context(struct intel_iommu *iommu,
1351 u16 did, u16 source_id, u8 function_mask,
1352 u64 type)
ba395927
KA
1353{
1354 u64 val = 0;
1355 unsigned long flag;
1356
ba395927
KA
1357 switch (type) {
1358 case DMA_CCMD_GLOBAL_INVL:
1359 val = DMA_CCMD_GLOBAL_INVL;
1360 break;
1361 case DMA_CCMD_DOMAIN_INVL:
1362 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1363 break;
1364 case DMA_CCMD_DEVICE_INVL:
1365 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1366 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1367 break;
1368 default:
1369 BUG();
1370 }
1371 val |= DMA_CCMD_ICC;
1372
1f5b3c3f 1373 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1374 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1375
1376 /* Make sure hardware complete it */
1377 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1378 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1379
1f5b3c3f 1380 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1381}
1382
ba395927 1383/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1384static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1385 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1386{
1387 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1388 u64 val = 0, val_iva = 0;
1389 unsigned long flag;
1390
ba395927
KA
1391 switch (type) {
1392 case DMA_TLB_GLOBAL_FLUSH:
1393 /* global flush doesn't need set IVA_REG */
1394 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1395 break;
1396 case DMA_TLB_DSI_FLUSH:
1397 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1398 break;
1399 case DMA_TLB_PSI_FLUSH:
1400 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1401 /* IH bit is passed in as part of address */
ba395927
KA
1402 val_iva = size_order | addr;
1403 break;
1404 default:
1405 BUG();
1406 }
1407 /* Note: set drain read/write */
1408#if 0
1409 /*
1410 * This is probably to be super secure.. Looks like we can
1411 * ignore it without any impact.
1412 */
1413 if (cap_read_drain(iommu->cap))
1414 val |= DMA_TLB_READ_DRAIN;
1415#endif
1416 if (cap_write_drain(iommu->cap))
1417 val |= DMA_TLB_WRITE_DRAIN;
1418
1f5b3c3f 1419 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1420 /* Note: Only uses first TLB reg currently */
1421 if (val_iva)
1422 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1423 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1424
1425 /* Make sure hardware complete it */
1426 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1427 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1428
1f5b3c3f 1429 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1430
1431 /* check IOTLB invalidation granularity */
1432 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1433 pr_err("Flush IOTLB failed\n");
ba395927 1434 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1435 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1436 (unsigned long long)DMA_TLB_IIRG(type),
1437 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1438}
1439
64ae892b
DW
1440static struct device_domain_info *
1441iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1442 u8 bus, u8 devfn)
93a23a72 1443{
93a23a72 1444 struct device_domain_info *info;
93a23a72 1445
55d94043
JR
1446 assert_spin_locked(&device_domain_lock);
1447
93a23a72
YZ
1448 if (!iommu->qi)
1449 return NULL;
1450
93a23a72 1451 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1452 if (info->iommu == iommu && info->bus == bus &&
1453 info->devfn == devfn) {
b16d0cb9
DW
1454 if (info->ats_supported && info->dev)
1455 return info;
93a23a72
YZ
1456 break;
1457 }
93a23a72 1458
b16d0cb9 1459 return NULL;
93a23a72
YZ
1460}
1461
0824c592
OP
1462static void domain_update_iotlb(struct dmar_domain *domain)
1463{
1464 struct device_domain_info *info;
1465 bool has_iotlb_device = false;
1466
1467 assert_spin_locked(&device_domain_lock);
1468
1469 list_for_each_entry(info, &domain->devices, link) {
1470 struct pci_dev *pdev;
1471
1472 if (!info->dev || !dev_is_pci(info->dev))
1473 continue;
1474
1475 pdev = to_pci_dev(info->dev);
1476 if (pdev->ats_enabled) {
1477 has_iotlb_device = true;
1478 break;
1479 }
1480 }
1481
1482 domain->has_iotlb_device = has_iotlb_device;
1483}
1484
93a23a72 1485static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1486{
fb0cc3aa
BH
1487 struct pci_dev *pdev;
1488
0824c592
OP
1489 assert_spin_locked(&device_domain_lock);
1490
0bcb3e28 1491 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1492 return;
1493
fb0cc3aa 1494 pdev = to_pci_dev(info->dev);
1c48db44
JP
1495 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1496 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1497 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1498 * reserved, which should be set to 0.
1499 */
1500 if (!ecap_dit(info->iommu->ecap))
1501 info->pfsid = 0;
1502 else {
1503 struct pci_dev *pf_pdev;
1504
1505 /* pdev will be returned if device is not a vf */
1506 pf_pdev = pci_physfn(pdev);
cc49baa9 1507 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1508 }
fb0cc3aa 1509
b16d0cb9
DW
1510#ifdef CONFIG_INTEL_IOMMU_SVM
1511 /* The PCIe spec, in its wisdom, declares that the behaviour of
1512 the device if you enable PASID support after ATS support is
1513 undefined. So always enable PASID support on devices which
1514 have it, even if we can't yet know if we're ever going to
1515 use it. */
1516 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1517 info->pasid_enabled = 1;
1518
1b84778a
KS
1519 if (info->pri_supported &&
1520 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1521 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1522 info->pri_enabled = 1;
1523#endif
da656a04 1524 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
fb58fdcd 1525 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1526 info->ats_enabled = 1;
0824c592 1527 domain_update_iotlb(info->domain);
b16d0cb9
DW
1528 info->ats_qdep = pci_ats_queue_depth(pdev);
1529 }
93a23a72
YZ
1530}
1531
1532static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1533{
b16d0cb9
DW
1534 struct pci_dev *pdev;
1535
0824c592
OP
1536 assert_spin_locked(&device_domain_lock);
1537
da972fb1 1538 if (!dev_is_pci(info->dev))
93a23a72
YZ
1539 return;
1540
b16d0cb9
DW
1541 pdev = to_pci_dev(info->dev);
1542
1543 if (info->ats_enabled) {
1544 pci_disable_ats(pdev);
1545 info->ats_enabled = 0;
0824c592 1546 domain_update_iotlb(info->domain);
b16d0cb9
DW
1547 }
1548#ifdef CONFIG_INTEL_IOMMU_SVM
1549 if (info->pri_enabled) {
1550 pci_disable_pri(pdev);
1551 info->pri_enabled = 0;
1552 }
1553 if (info->pasid_enabled) {
1554 pci_disable_pasid(pdev);
1555 info->pasid_enabled = 0;
1556 }
1557#endif
93a23a72
YZ
1558}
1559
1560static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1561 u64 addr, unsigned mask)
1562{
1563 u16 sid, qdep;
1564 unsigned long flags;
1565 struct device_domain_info *info;
1566
0824c592
OP
1567 if (!domain->has_iotlb_device)
1568 return;
1569
93a23a72
YZ
1570 spin_lock_irqsave(&device_domain_lock, flags);
1571 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1572 if (!info->ats_enabled)
93a23a72
YZ
1573 continue;
1574
1575 sid = info->bus << 8 | info->devfn;
b16d0cb9 1576 qdep = info->ats_qdep;
1c48db44
JP
1577 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1578 qdep, addr, mask);
93a23a72
YZ
1579 }
1580 spin_unlock_irqrestore(&device_domain_lock, flags);
1581}
1582
33cd6e64
LB
1583static void domain_flush_piotlb(struct intel_iommu *iommu,
1584 struct dmar_domain *domain,
1585 u64 addr, unsigned long npages, bool ih)
1586{
1587 u16 did = domain->iommu_did[iommu->seq_id];
1588
1589 if (domain->default_pasid)
1590 qi_flush_piotlb(iommu, did, domain->default_pasid,
1591 addr, npages, ih);
1592
1593 if (!list_empty(&domain->devices))
1594 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1595}
1596
a1ddcbe9
JR
1597static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1598 struct dmar_domain *domain,
1599 unsigned long pfn, unsigned int pages,
1600 int ih, int map)
ba395927 1601{
9dd2fe89 1602 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1603 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1604 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1605
ba395927
KA
1606 BUG_ON(pages == 0);
1607
ea8ea460
DW
1608 if (ih)
1609 ih = 1 << 6;
33cd6e64
LB
1610
1611 if (domain_use_first_level(domain)) {
1612 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1613 } else {
1614 /*
1615 * Fallback to domain selective flush if no PSI support or
1616 * the size is too big. PSI requires page size to be 2 ^ x,
1617 * and the base address is naturally aligned to the size.
1618 */
1619 if (!cap_pgsel_inv(iommu->cap) ||
1620 mask > cap_max_amask_val(iommu->cap))
1621 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1622 DMA_TLB_DSI_FLUSH);
1623 else
1624 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1625 DMA_TLB_PSI_FLUSH);
1626 }
bf92df30
YZ
1627
1628 /*
82653633
NA
1629 * In caching mode, changes of pages from non-present to present require
1630 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1631 */
82653633 1632 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1633 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1634}
1635
eed91a0b
PX
1636/* Notification for newly created mappings */
1637static inline void __mapping_notify_one(struct intel_iommu *iommu,
1638 struct dmar_domain *domain,
1639 unsigned long pfn, unsigned int pages)
1640{
33cd6e64
LB
1641 /*
1642 * It's a non-present to present mapping. Only flush if caching mode
1643 * and second level.
1644 */
1645 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
eed91a0b
PX
1646 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1647 else
1648 iommu_flush_write_buffer(iommu);
1649}
1650
13cf0174
JR
1651static void iommu_flush_iova(struct iova_domain *iovad)
1652{
1653 struct dmar_domain *domain;
1654 int idx;
1655
1656 domain = container_of(iovad, struct dmar_domain, iovad);
1657
1658 for_each_domain_iommu(idx, domain) {
1659 struct intel_iommu *iommu = g_iommus[idx];
1660 u16 did = domain->iommu_did[iommu->seq_id];
1661
33cd6e64
LB
1662 if (domain_use_first_level(domain))
1663 domain_flush_piotlb(iommu, domain, 0, -1, 0);
1664 else
1665 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1666 DMA_TLB_DSI_FLUSH);
13cf0174
JR
1667
1668 if (!cap_caching_mode(iommu->cap))
1669 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1670 0, MAX_AGAW_PFN_WIDTH);
1671 }
1672}
1673
f8bab735 1674static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1675{
1676 u32 pmen;
1677 unsigned long flags;
1678
5bb71fc7
LB
1679 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1680 return;
1681
1f5b3c3f 1682 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1683 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1684 pmen &= ~DMA_PMEN_EPM;
1685 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1686
1687 /* wait for the protected region status bit to clear */
1688 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1689 readl, !(pmen & DMA_PMEN_PRS), pmen);
1690
1f5b3c3f 1691 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1692}
1693
2a41ccee 1694static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1695{
1696 u32 sts;
1697 unsigned long flags;
1698
1f5b3c3f 1699 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1700 iommu->gcmd |= DMA_GCMD_TE;
1701 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1702
1703 /* Make sure hardware complete it */
1704 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1705 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1706
1f5b3c3f 1707 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1708}
1709
2a41ccee 1710static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1711{
1712 u32 sts;
1713 unsigned long flag;
1714
b1012ca8
LB
1715 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1716 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1717 return;
1718
1f5b3c3f 1719 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1720 iommu->gcmd &= ~DMA_GCMD_TE;
1721 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1722
1723 /* Make sure hardware complete it */
1724 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1725 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1726
1f5b3c3f 1727 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1728}
1729
1730static int iommu_init_domains(struct intel_iommu *iommu)
1731{
8bf47816
JR
1732 u32 ndomains, nlongs;
1733 size_t size;
ba395927
KA
1734
1735 ndomains = cap_ndoms(iommu->cap);
8bf47816 1736 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1737 iommu->name, ndomains);
ba395927
KA
1738 nlongs = BITS_TO_LONGS(ndomains);
1739
94a91b50
DD
1740 spin_lock_init(&iommu->lock);
1741
ba395927
KA
1742 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1743 if (!iommu->domain_ids) {
9f10e5bf
JR
1744 pr_err("%s: Allocating domain id array failed\n",
1745 iommu->name);
ba395927
KA
1746 return -ENOMEM;
1747 }
8bf47816 1748
86f004c7 1749 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1750 iommu->domains = kzalloc(size, GFP_KERNEL);
1751
1752 if (iommu->domains) {
1753 size = 256 * sizeof(struct dmar_domain *);
1754 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1755 }
1756
1757 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1758 pr_err("%s: Allocating domain array failed\n",
1759 iommu->name);
852bdb04 1760 kfree(iommu->domain_ids);
8bf47816 1761 kfree(iommu->domains);
852bdb04 1762 iommu->domain_ids = NULL;
8bf47816 1763 iommu->domains = NULL;
ba395927
KA
1764 return -ENOMEM;
1765 }
1766
1767 /*
c0e8a6c8
JR
1768 * If Caching mode is set, then invalid translations are tagged
1769 * with domain-id 0, hence we need to pre-allocate it. We also
1770 * use domain-id 0 as a marker for non-allocated domain-id, so
1771 * make sure it is not used for a real domain.
ba395927 1772 */
c0e8a6c8
JR
1773 set_bit(0, iommu->domain_ids);
1774
3b33d4ab
LB
1775 /*
1776 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1777 * entry for first-level or pass-through translation modes should
1778 * be programmed with a domain id different from those used for
1779 * second-level or nested translation. We reserve a domain id for
1780 * this purpose.
1781 */
1782 if (sm_supported(iommu))
1783 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1784
ba395927
KA
1785 return 0;
1786}
ba395927 1787
ffebeb46 1788static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1789{
29a27719 1790 struct device_domain_info *info, *tmp;
55d94043 1791 unsigned long flags;
ba395927 1792
29a27719
JR
1793 if (!iommu->domains || !iommu->domain_ids)
1794 return;
a4eaa86c 1795
55d94043 1796 spin_lock_irqsave(&device_domain_lock, flags);
29a27719 1797 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
29a27719
JR
1798 if (info->iommu != iommu)
1799 continue;
1800
1801 if (!info->dev || !info->domain)
1802 continue;
1803
bea64033 1804 __dmar_remove_one_dev_info(info);
ba395927 1805 }
55d94043 1806 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1807
1808 if (iommu->gcmd & DMA_GCMD_TE)
1809 iommu_disable_translation(iommu);
ffebeb46 1810}
ba395927 1811
ffebeb46
JL
1812static void free_dmar_iommu(struct intel_iommu *iommu)
1813{
1814 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1815 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1816 int i;
1817
1818 for (i = 0; i < elems; i++)
1819 kfree(iommu->domains[i]);
ffebeb46
JL
1820 kfree(iommu->domains);
1821 kfree(iommu->domain_ids);
1822 iommu->domains = NULL;
1823 iommu->domain_ids = NULL;
1824 }
ba395927 1825
d9630fe9
WH
1826 g_iommus[iommu->seq_id] = NULL;
1827
ba395927
KA
1828 /* free context mapping */
1829 free_context_table(iommu);
8a94ade4
DW
1830
1831#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1832 if (pasid_supported(iommu)) {
a222a7f0
DW
1833 if (ecap_prs(iommu->ecap))
1834 intel_svm_finish_prq(iommu);
a222a7f0 1835 }
d76b42e9 1836 if (vccap_pasid(iommu->vccap))
3375303e
JP
1837 ioasid_unregister_allocator(&iommu->pasid_allocator);
1838
8a94ade4 1839#endif
ba395927
KA
1840}
1841
a1948f2e
LB
1842/*
1843 * Check and return whether first level is used by default for
b802d070 1844 * DMA translation.
a1948f2e
LB
1845 */
1846static bool first_level_by_default(void)
1847{
1848 struct dmar_drhd_unit *drhd;
1849 struct intel_iommu *iommu;
b802d070 1850 static int first_level_support = -1;
a1948f2e
LB
1851
1852 if (likely(first_level_support != -1))
1853 return first_level_support;
1854
1855 first_level_support = 1;
1856
1857 rcu_read_lock();
1858 for_each_active_iommu(iommu, drhd) {
1859 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
1860 first_level_support = 0;
1861 break;
1862 }
1863 }
1864 rcu_read_unlock();
1865
1866 return first_level_support;
1867}
1868
ab8dfe25 1869static struct dmar_domain *alloc_domain(int flags)
ba395927 1870{
ba395927 1871 struct dmar_domain *domain;
ba395927
KA
1872
1873 domain = alloc_domain_mem();
1874 if (!domain)
1875 return NULL;
1876
ab8dfe25 1877 memset(domain, 0, sizeof(*domain));
98fa15f3 1878 domain->nid = NUMA_NO_NODE;
ab8dfe25 1879 domain->flags = flags;
a1948f2e
LB
1880 if (first_level_by_default())
1881 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
0824c592 1882 domain->has_iotlb_device = false;
92d03cc8 1883 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1884
1885 return domain;
1886}
1887
d160aca5
JR
1888/* Must be called with iommu->lock */
1889static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1890 struct intel_iommu *iommu)
1891{
44bde614 1892 unsigned long ndomains;
55d94043 1893 int num;
44bde614 1894
55d94043 1895 assert_spin_locked(&device_domain_lock);
d160aca5 1896 assert_spin_locked(&iommu->lock);
ba395927 1897
29a27719
JR
1898 domain->iommu_refcnt[iommu->seq_id] += 1;
1899 domain->iommu_count += 1;
1900 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1901 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1902 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1903
1904 if (num >= ndomains) {
1905 pr_err("%s: No free domain ids\n", iommu->name);
1906 domain->iommu_refcnt[iommu->seq_id] -= 1;
1907 domain->iommu_count -= 1;
55d94043 1908 return -ENOSPC;
2c2e2c38 1909 }
ba395927 1910
d160aca5
JR
1911 set_bit(num, iommu->domain_ids);
1912 set_iommu_domain(iommu, num, domain);
1913
1914 domain->iommu_did[iommu->seq_id] = num;
1915 domain->nid = iommu->node;
fb170fb4 1916
fb170fb4
JL
1917 domain_update_iommu_cap(domain);
1918 }
d160aca5 1919
55d94043 1920 return 0;
fb170fb4
JL
1921}
1922
1923static int domain_detach_iommu(struct dmar_domain *domain,
1924 struct intel_iommu *iommu)
1925{
e083ea5b 1926 int num, count;
d160aca5 1927
55d94043 1928 assert_spin_locked(&device_domain_lock);
d160aca5 1929 assert_spin_locked(&iommu->lock);
fb170fb4 1930
29a27719
JR
1931 domain->iommu_refcnt[iommu->seq_id] -= 1;
1932 count = --domain->iommu_count;
1933 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1934 num = domain->iommu_did[iommu->seq_id];
1935 clear_bit(num, iommu->domain_ids);
1936 set_iommu_domain(iommu, num, NULL);
fb170fb4 1937
fb170fb4 1938 domain_update_iommu_cap(domain);
c0e8a6c8 1939 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1940 }
fb170fb4
JL
1941
1942 return count;
1943}
1944
ba395927 1945static struct iova_domain reserved_iova_list;
8a443df4 1946static struct lock_class_key reserved_rbtree_key;
ba395927 1947
51a63e67 1948static int dmar_init_reserved_ranges(void)
ba395927
KA
1949{
1950 struct pci_dev *pdev = NULL;
1951 struct iova *iova;
1952 int i;
ba395927 1953
aa3ac946 1954 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1955
8a443df4
MG
1956 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1957 &reserved_rbtree_key);
1958
ba395927
KA
1959 /* IOAPIC ranges shouldn't be accessed by DMA */
1960 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1961 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1962 if (!iova) {
9f10e5bf 1963 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1964 return -ENODEV;
1965 }
ba395927
KA
1966
1967 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1968 for_each_pci_dev(pdev) {
1969 struct resource *r;
1970
1971 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1972 r = &pdev->resource[i];
1973 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1974 continue;
1a4a4551
DW
1975 iova = reserve_iova(&reserved_iova_list,
1976 IOVA_PFN(r->start),
1977 IOVA_PFN(r->end));
51a63e67 1978 if (!iova) {
932a6523 1979 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1980 return -ENODEV;
1981 }
ba395927
KA
1982 }
1983 }
51a63e67 1984 return 0;
ba395927
KA
1985}
1986
ba395927
KA
1987static inline int guestwidth_to_adjustwidth(int gaw)
1988{
1989 int agaw;
1990 int r = (gaw - 12) % 9;
1991
1992 if (r == 0)
1993 agaw = gaw;
1994 else
1995 agaw = gaw + 9 - r;
1996 if (agaw > 64)
1997 agaw = 64;
1998 return agaw;
1999}
2000
ba395927
KA
2001static void domain_exit(struct dmar_domain *domain)
2002{
ba395927 2003
d160aca5 2004 /* Remove associated devices and clear attached or cached domains */
ba395927 2005 domain_remove_dev_info(domain);
92d03cc8 2006
ba395927 2007 /* destroy iovas */
e70b081c
TM
2008 if (domain->domain.type == IOMMU_DOMAIN_DMA)
2009 put_iova_domain(&domain->iovad);
ba395927 2010
3ee9eca7
DS
2011 if (domain->pgd) {
2012 struct page *freelist;
ba395927 2013
3ee9eca7
DS
2014 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
2015 dma_free_pagelist(freelist);
2016 }
ea8ea460 2017
ba395927
KA
2018 free_domain_mem(domain);
2019}
2020
7373a8cc
LB
2021/*
2022 * Get the PASID directory size for scalable mode context entry.
2023 * Value of X in the PDTS field of a scalable mode context entry
2024 * indicates PASID directory with 2^(X + 7) entries.
2025 */
2026static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2027{
2028 int pds, max_pde;
2029
2030 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2031 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2032 if (pds < 7)
2033 return 0;
2034
2035 return pds - 7;
2036}
2037
2038/*
2039 * Set the RID_PASID field of a scalable mode context entry. The
2040 * IOMMU hardware will use the PASID value set in this field for
2041 * DMA translations of DMA requests without PASID.
2042 */
2043static inline void
2044context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2045{
2046 context->hi |= pasid & ((1 << 20) - 1);
7373a8cc
LB
2047}
2048
2049/*
2050 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2051 * entry.
2052 */
2053static inline void context_set_sm_dte(struct context_entry *context)
2054{
2055 context->lo |= (1 << 2);
2056}
2057
2058/*
2059 * Set the PRE(Page Request Enable) field of a scalable mode context
2060 * entry.
2061 */
2062static inline void context_set_sm_pre(struct context_entry *context)
2063{
2064 context->lo |= (1 << 4);
2065}
2066
2067/* Convert value to context PASID directory size field coding. */
2068#define context_pdts(pds) (((pds) & 0x7) << 9)
2069
64ae892b
DW
2070static int domain_context_mapping_one(struct dmar_domain *domain,
2071 struct intel_iommu *iommu,
ca6e322d 2072 struct pasid_table *table,
28ccce0d 2073 u8 bus, u8 devfn)
ba395927 2074{
c6c2cebd 2075 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2076 int translation = CONTEXT_TT_MULTI_LEVEL;
2077 struct device_domain_info *info = NULL;
ba395927 2078 struct context_entry *context;
ba395927 2079 unsigned long flags;
7373a8cc 2080 int ret;
28ccce0d 2081
c6c2cebd
JR
2082 WARN_ON(did == 0);
2083
28ccce0d
JR
2084 if (hw_pass_through && domain_type_is_si(domain))
2085 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2086
2087 pr_debug("Set context mapping for %02x:%02x.%d\n",
2088 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2089
ba395927 2090 BUG_ON(!domain->pgd);
5331fe6f 2091
55d94043
JR
2092 spin_lock_irqsave(&device_domain_lock, flags);
2093 spin_lock(&iommu->lock);
2094
2095 ret = -ENOMEM;
03ecc32c 2096 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2097 if (!context)
55d94043 2098 goto out_unlock;
ba395927 2099
55d94043
JR
2100 ret = 0;
2101 if (context_present(context))
2102 goto out_unlock;
cf484d0e 2103
aec0e861
XP
2104 /*
2105 * For kdump cases, old valid entries may be cached due to the
2106 * in-flight DMA and copied pgtable, but there is no unmapping
2107 * behaviour for them, thus we need an explicit cache flush for
2108 * the newly-mapped device. For kdump, at this point, the device
2109 * is supposed to finish reset at its driver probe stage, so no
2110 * in-flight DMA will exist, and we don't need to worry anymore
2111 * hereafter.
2112 */
2113 if (context_copied(context)) {
2114 u16 did_old = context_domain_id(context);
2115
b117e038 2116 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2117 iommu->flush.flush_context(iommu, did_old,
2118 (((u16)bus) << 8) | devfn,
2119 DMA_CCMD_MASK_NOBIT,
2120 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2121 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2122 DMA_TLB_DSI_FLUSH);
2123 }
aec0e861
XP
2124 }
2125
de24e553 2126 context_clear_entry(context);
ea6606b0 2127
7373a8cc
LB
2128 if (sm_supported(iommu)) {
2129 unsigned long pds;
4ed0d3e6 2130
7373a8cc
LB
2131 WARN_ON(!table);
2132
2133 /* Setup the PASID DIR pointer: */
2134 pds = context_get_sm_pds(table);
2135 context->lo = (u64)virt_to_phys(table->table) |
2136 context_pdts(pds);
2137
2138 /* Setup the RID_PASID field: */
2139 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2140
de24e553 2141 /*
7373a8cc
LB
2142 * Setup the Device-TLB enable bit and Page request
2143 * Enable bit:
de24e553 2144 */
7373a8cc
LB
2145 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2146 if (info && info->ats_supported)
2147 context_set_sm_dte(context);
2148 if (info && info->pri_supported)
2149 context_set_sm_pre(context);
2150 } else {
2151 struct dma_pte *pgd = domain->pgd;
2152 int agaw;
2153
2154 context_set_domain_id(context, did);
7373a8cc
LB
2155
2156 if (translation != CONTEXT_TT_PASS_THROUGH) {
2157 /*
2158 * Skip top levels of page tables for iommu which has
2159 * less agaw than default. Unnecessary for PT mode.
2160 */
2161 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2162 ret = -ENOMEM;
2163 pgd = phys_to_virt(dma_pte_addr(pgd));
2164 if (!dma_pte_present(pgd))
2165 goto out_unlock;
2166 }
2167
2168 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2169 if (info && info->ats_supported)
2170 translation = CONTEXT_TT_DEV_IOTLB;
2171 else
2172 translation = CONTEXT_TT_MULTI_LEVEL;
2173
2174 context_set_address_root(context, virt_to_phys(pgd));
2175 context_set_address_width(context, agaw);
2176 } else {
2177 /*
2178 * In pass through mode, AW must be programmed to
2179 * indicate the largest AGAW value supported by
2180 * hardware. And ASR is ignored by hardware.
2181 */
2182 context_set_address_width(context, iommu->msagaw);
2183 }
41b80db2
LB
2184
2185 context_set_translation_type(context, translation);
93a23a72 2186 }
4ed0d3e6 2187
c07e7d21
MM
2188 context_set_fault_enable(context);
2189 context_set_present(context);
04c00956
LB
2190 if (!ecap_coherent(iommu->ecap))
2191 clflush_cache_range(context, sizeof(*context));
ba395927 2192
4c25a2c1
DW
2193 /*
2194 * It's a non-present to present mapping. If hardware doesn't cache
2195 * non-present entry we only need to flush the write-buffer. If the
2196 * _does_ cache non-present entries, then it does so in the special
2197 * domain #0, which we have to flush:
2198 */
2199 if (cap_caching_mode(iommu->cap)) {
2200 iommu->flush.flush_context(iommu, 0,
2201 (((u16)bus) << 8) | devfn,
2202 DMA_CCMD_MASK_NOBIT,
2203 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2204 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2205 } else {
ba395927 2206 iommu_flush_write_buffer(iommu);
4c25a2c1 2207 }
93a23a72 2208 iommu_enable_dev_iotlb(info);
c7151a8d 2209
55d94043
JR
2210 ret = 0;
2211
2212out_unlock:
2213 spin_unlock(&iommu->lock);
2214 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2215
5c365d18 2216 return ret;
ba395927
KA
2217}
2218
0ce4a85f
LB
2219struct domain_context_mapping_data {
2220 struct dmar_domain *domain;
2221 struct intel_iommu *iommu;
2222 struct pasid_table *table;
2223};
2224
2225static int domain_context_mapping_cb(struct pci_dev *pdev,
2226 u16 alias, void *opaque)
2227{
2228 struct domain_context_mapping_data *data = opaque;
2229
2230 return domain_context_mapping_one(data->domain, data->iommu,
2231 data->table, PCI_BUS_NUM(alias),
2232 alias & 0xff);
2233}
2234
ba395927 2235static int
28ccce0d 2236domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2237{
0ce4a85f 2238 struct domain_context_mapping_data data;
ca6e322d 2239 struct pasid_table *table;
64ae892b 2240 struct intel_iommu *iommu;
156baca8 2241 u8 bus, devfn;
64ae892b 2242
e1f167f3 2243 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2244 if (!iommu)
2245 return -ENODEV;
ba395927 2246
ca6e322d 2247 table = intel_pasid_get_table(dev);
0ce4a85f
LB
2248
2249 if (!dev_is_pci(dev))
2250 return domain_context_mapping_one(domain, iommu, table,
2251 bus, devfn);
2252
2253 data.domain = domain;
2254 data.iommu = iommu;
2255 data.table = table;
2256
2257 return pci_for_each_dma_alias(to_pci_dev(dev),
2258 &domain_context_mapping_cb, &data);
579305f7
AW
2259}
2260
2261static int domain_context_mapped_cb(struct pci_dev *pdev,
2262 u16 alias, void *opaque)
2263{
2264 struct intel_iommu *iommu = opaque;
2265
2266 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2267}
2268
e1f167f3 2269static int domain_context_mapped(struct device *dev)
ba395927 2270{
5331fe6f 2271 struct intel_iommu *iommu;
156baca8 2272 u8 bus, devfn;
5331fe6f 2273
e1f167f3 2274 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2275 if (!iommu)
2276 return -ENODEV;
ba395927 2277
579305f7
AW
2278 if (!dev_is_pci(dev))
2279 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2280
579305f7
AW
2281 return !pci_for_each_dma_alias(to_pci_dev(dev),
2282 domain_context_mapped_cb, iommu);
ba395927
KA
2283}
2284
f532959b
FY
2285/* Returns a number of VTD pages, but aligned to MM page size */
2286static inline unsigned long aligned_nrpages(unsigned long host_addr,
2287 size_t size)
2288{
2289 host_addr &= ~PAGE_MASK;
2290 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2291}
2292
6dd9a7c7
YS
2293/* Return largest possible superpage level for a given mapping */
2294static inline int hardware_largepage_caps(struct dmar_domain *domain,
2295 unsigned long iov_pfn,
2296 unsigned long phy_pfn,
2297 unsigned long pages)
2298{
2299 int support, level = 1;
2300 unsigned long pfnmerge;
2301
2302 support = domain->iommu_superpage;
2303
2304 /* To use a large page, the virtual *and* physical addresses
2305 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2306 of them will mean we have to use smaller pages. So just
2307 merge them and check both at once. */
2308 pfnmerge = iov_pfn | phy_pfn;
2309
2310 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2311 pages >>= VTD_STRIDE_SHIFT;
2312 if (!pages)
2313 break;
2314 pfnmerge >>= VTD_STRIDE_SHIFT;
2315 level++;
2316 support--;
2317 }
2318 return level;
2319}
2320
9051aa02
DW
2321static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2322 struct scatterlist *sg, unsigned long phys_pfn,
2323 unsigned long nr_pages, int prot)
e1605495
DW
2324{
2325 struct dma_pte *first_pte = NULL, *pte = NULL;
3f649ab7 2326 phys_addr_t pteval;
cc4f14aa 2327 unsigned long sg_res = 0;
6dd9a7c7
YS
2328 unsigned int largepage_lvl = 0;
2329 unsigned long lvl_pages = 0;
ddf09b6d 2330 u64 attr;
e1605495 2331
162d1b10 2332 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2333
2334 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2335 return -EINVAL;
2336
ddf09b6d
LB
2337 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2338 if (domain_use_first_level(domain))
16ecf10e 2339 attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
e1605495 2340
cc4f14aa
JL
2341 if (!sg) {
2342 sg_res = nr_pages;
ddf09b6d 2343 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
9051aa02
DW
2344 }
2345
6dd9a7c7 2346 while (nr_pages > 0) {
c85994e4
DW
2347 uint64_t tmp;
2348
e1605495 2349 if (!sg_res) {
29a90b70
RM
2350 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2351
f532959b 2352 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2353 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2354 sg->dma_length = sg->length;
ddf09b6d 2355 pteval = (sg_phys(sg) - pgoff) | attr;
6dd9a7c7 2356 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2357 }
6dd9a7c7 2358
e1605495 2359 if (!pte) {
6dd9a7c7
YS
2360 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2361
5cf0a76f 2362 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2363 if (!pte)
2364 return -ENOMEM;
6dd9a7c7 2365 /* It is large page*/
6491d4d0 2366 if (largepage_lvl > 1) {
ba2374fd
CZ
2367 unsigned long nr_superpages, end_pfn;
2368
6dd9a7c7 2369 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2370 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2371
2372 nr_superpages = sg_res / lvl_pages;
2373 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2374
d41a4adb
JL
2375 /*
2376 * Ensure that old small page tables are
ba2374fd 2377 * removed to make room for superpage(s).
bc24c571
DD
2378 * We're adding new large pages, so make sure
2379 * we don't remove their parent tables.
d41a4adb 2380 */
bc24c571
DD
2381 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2382 largepage_lvl + 1);
6491d4d0 2383 } else {
6dd9a7c7 2384 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2385 }
6dd9a7c7 2386
e1605495
DW
2387 }
2388 /* We don't need lock here, nobody else
2389 * touches the iova range
2390 */
7766a3fb 2391 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2392 if (tmp) {
1bf20f0d 2393 static int dumps = 5;
9f10e5bf
JR
2394 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2395 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2396 if (dumps) {
2397 dumps--;
2398 debug_dma_dump_mappings(NULL);
2399 }
2400 WARN_ON(1);
2401 }
6dd9a7c7
YS
2402
2403 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2404
2405 BUG_ON(nr_pages < lvl_pages);
2406 BUG_ON(sg_res < lvl_pages);
2407
2408 nr_pages -= lvl_pages;
2409 iov_pfn += lvl_pages;
2410 phys_pfn += lvl_pages;
2411 pteval += lvl_pages * VTD_PAGE_SIZE;
2412 sg_res -= lvl_pages;
2413
2414 /* If the next PTE would be the first in a new page, then we
2415 need to flush the cache on the entries we've just written.
2416 And then we'll need to recalculate 'pte', so clear it and
2417 let it get set again in the if (!pte) block above.
2418
2419 If we're done (!nr_pages) we need to flush the cache too.
2420
2421 Also if we've been setting superpages, we may need to
2422 recalculate 'pte' and switch back to smaller pages for the
2423 end of the mapping, if the trailing size is not enough to
2424 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2425 pte++;
6dd9a7c7
YS
2426 if (!nr_pages || first_pte_in_page(pte) ||
2427 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2428 domain_flush_cache(domain, first_pte,
2429 (void *)pte - (void *)first_pte);
2430 pte = NULL;
2431 }
6dd9a7c7
YS
2432
2433 if (!sg_res && nr_pages)
e1605495
DW
2434 sg = sg_next(sg);
2435 }
2436 return 0;
2437}
2438
87684fd9 2439static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2440 struct scatterlist *sg, unsigned long phys_pfn,
2441 unsigned long nr_pages, int prot)
2442{
fa954e68 2443 int iommu_id, ret;
095303e0
LB
2444 struct intel_iommu *iommu;
2445
2446 /* Do the real mapping first */
2447 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2448 if (ret)
2449 return ret;
2450
fa954e68
LB
2451 for_each_domain_iommu(iommu_id, domain) {
2452 iommu = g_iommus[iommu_id];
095303e0
LB
2453 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2454 }
2455
2456 return 0;
87684fd9
PX
2457}
2458
9051aa02
DW
2459static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2460 struct scatterlist *sg, unsigned long nr_pages,
2461 int prot)
ba395927 2462{
87684fd9 2463 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2464}
6f6a00e4 2465
9051aa02
DW
2466static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2467 unsigned long phys_pfn, unsigned long nr_pages,
2468 int prot)
2469{
87684fd9 2470 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2471}
2472
2452d9db 2473static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2474{
5082219b
FS
2475 unsigned long flags;
2476 struct context_entry *context;
2477 u16 did_old;
2478
c7151a8d
WH
2479 if (!iommu)
2480 return;
8c11e798 2481
5082219b
FS
2482 spin_lock_irqsave(&iommu->lock, flags);
2483 context = iommu_context_addr(iommu, bus, devfn, 0);
2484 if (!context) {
2485 spin_unlock_irqrestore(&iommu->lock, flags);
2486 return;
2487 }
2488 did_old = context_domain_id(context);
2489 context_clear_entry(context);
2490 __iommu_flush_cache(iommu, context, sizeof(*context));
2491 spin_unlock_irqrestore(&iommu->lock, flags);
2492 iommu->flush.flush_context(iommu,
2493 did_old,
2494 (((u16)bus) << 8) | devfn,
2495 DMA_CCMD_MASK_NOBIT,
2496 DMA_CCMD_DEVICE_INVL);
2497 iommu->flush.flush_iotlb(iommu,
2498 did_old,
2499 0,
2500 0,
2501 DMA_TLB_DSI_FLUSH);
ba395927
KA
2502}
2503
109b9b04
DW
2504static inline void unlink_domain_info(struct device_domain_info *info)
2505{
2506 assert_spin_locked(&device_domain_lock);
2507 list_del(&info->link);
2508 list_del(&info->global);
2509 if (info->dev)
01b9d4e2 2510 dev_iommu_priv_set(info->dev, NULL);
109b9b04
DW
2511}
2512
ba395927
KA
2513static void domain_remove_dev_info(struct dmar_domain *domain)
2514{
3a74ca01 2515 struct device_domain_info *info, *tmp;
fb170fb4 2516 unsigned long flags;
ba395927
KA
2517
2518 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2519 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2520 __dmar_remove_one_dev_info(info);
ba395927
KA
2521 spin_unlock_irqrestore(&device_domain_lock, flags);
2522}
2523
e2726dae 2524struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2525{
2526 struct device_domain_info *info;
2527
6097df45
LB
2528 if (unlikely(!dev || !dev->iommu))
2529 return NULL;
2530
2d33b7d6 2531 if (unlikely(attach_deferred(dev)))
1ee0186b
LB
2532 return NULL;
2533
2534 /* No lock here, assumes no domain exit in normal case */
e85bb99b 2535 info = get_domain_info(dev);
1ee0186b
LB
2536 if (likely(info))
2537 return info->domain;
2538
2539 return NULL;
2540}
2541
034d98cc 2542static void do_deferred_attach(struct device *dev)
1ee0186b 2543{
034d98cc 2544 struct iommu_domain *domain;
8af46c78 2545
01b9d4e2 2546 dev_iommu_priv_set(dev, NULL);
034d98cc
JR
2547 domain = iommu_get_domain_for_dev(dev);
2548 if (domain)
2549 intel_iommu_attach_device(domain, dev);
2550}
2551
5a8f40e8 2552static inline struct device_domain_info *
745f2586
JL
2553dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2554{
2555 struct device_domain_info *info;
2556
2557 list_for_each_entry(info, &device_domain_list, global)
4fda230e 2558 if (info->segment == segment && info->bus == bus &&
745f2586 2559 info->devfn == devfn)
5a8f40e8 2560 return info;
745f2586
JL
2561
2562 return NULL;
2563}
2564
ddf09b6d
LB
2565static int domain_setup_first_level(struct intel_iommu *iommu,
2566 struct dmar_domain *domain,
2567 struct device *dev,
c7b6bac9 2568 u32 pasid)
ddf09b6d
LB
2569{
2570 int flags = PASID_FLAG_SUPERVISOR_MODE;
2571 struct dma_pte *pgd = domain->pgd;
2572 int agaw, level;
2573
2574 /*
2575 * Skip top levels of page tables for iommu which has
2576 * less agaw than default. Unnecessary for PT mode.
2577 */
2578 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2579 pgd = phys_to_virt(dma_pte_addr(pgd));
2580 if (!dma_pte_present(pgd))
2581 return -ENOMEM;
2582 }
2583
2584 level = agaw_to_level(agaw);
2585 if (level != 4 && level != 5)
2586 return -EINVAL;
2587
2588 flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
2589
2590 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2591 domain->iommu_did[iommu->seq_id],
2592 flags);
2593}
2594
8038bdb8
JD
2595static bool dev_is_real_dma_subdevice(struct device *dev)
2596{
2597 return dev && dev_is_pci(dev) &&
2598 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2599}
2600
5db31569
JR
2601static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2602 int bus, int devfn,
2603 struct device *dev,
2604 struct dmar_domain *domain)
745f2586 2605{
5a8f40e8 2606 struct dmar_domain *found = NULL;
745f2586
JL
2607 struct device_domain_info *info;
2608 unsigned long flags;
d160aca5 2609 int ret;
745f2586
JL
2610
2611 info = alloc_devinfo_mem();
2612 if (!info)
b718cd3d 2613 return NULL;
745f2586 2614
4fda230e
JD
2615 if (!dev_is_real_dma_subdevice(dev)) {
2616 info->bus = bus;
2617 info->devfn = devfn;
2618 info->segment = iommu->segment;
2619 } else {
2620 struct pci_dev *pdev = to_pci_dev(dev);
2621
2622 info->bus = pdev->bus->number;
2623 info->devfn = pdev->devfn;
2624 info->segment = pci_domain_nr(pdev->bus);
2625 }
2626
b16d0cb9
DW
2627 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2628 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2629 info->ats_qdep = 0;
745f2586
JL
2630 info->dev = dev;
2631 info->domain = domain;
5a8f40e8 2632 info->iommu = iommu;
cc580e41 2633 info->pasid_table = NULL;
95587a75 2634 info->auxd_enabled = 0;
67b8e02b 2635 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2636
b16d0cb9
DW
2637 if (dev && dev_is_pci(dev)) {
2638 struct pci_dev *pdev = to_pci_dev(info->dev);
2639
da656a04
JPB
2640 if (ecap_dev_iotlb_support(iommu->ecap) &&
2641 pci_ats_supported(pdev) &&
b16d0cb9
DW
2642 dmar_find_matched_atsr_unit(pdev))
2643 info->ats_supported = 1;
2644
765b6a98
LB
2645 if (sm_supported(iommu)) {
2646 if (pasid_supported(iommu)) {
b16d0cb9
DW
2647 int features = pci_pasid_features(pdev);
2648 if (features >= 0)
2649 info->pasid_supported = features | 1;
2650 }
2651
2652 if (info->ats_supported && ecap_prs(iommu->ecap) &&
3f9a7a13 2653 pci_pri_supported(pdev))
b16d0cb9
DW
2654 info->pri_supported = 1;
2655 }
2656 }
2657
745f2586
JL
2658 spin_lock_irqsave(&device_domain_lock, flags);
2659 if (dev)
0bcb3e28 2660 found = find_domain(dev);
f303e507
JR
2661
2662 if (!found) {
5a8f40e8 2663 struct device_domain_info *info2;
4fda230e
JD
2664 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2665 info->devfn);
f303e507
JR
2666 if (info2) {
2667 found = info2->domain;
2668 info2->dev = dev;
2669 }
5a8f40e8 2670 }
f303e507 2671
745f2586
JL
2672 if (found) {
2673 spin_unlock_irqrestore(&device_domain_lock, flags);
2674 free_devinfo_mem(info);
b718cd3d
DW
2675 /* Caller must free the original domain */
2676 return found;
745f2586
JL
2677 }
2678
d160aca5
JR
2679 spin_lock(&iommu->lock);
2680 ret = domain_attach_iommu(domain, iommu);
2681 spin_unlock(&iommu->lock);
2682
2683 if (ret) {
c6c2cebd 2684 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2685 free_devinfo_mem(info);
c6c2cebd
JR
2686 return NULL;
2687 }
c6c2cebd 2688
b718cd3d
DW
2689 list_add(&info->link, &domain->devices);
2690 list_add(&info->global, &device_domain_list);
2691 if (dev)
01b9d4e2 2692 dev_iommu_priv_set(dev, info);
0bbeb01a 2693 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2694
0bbeb01a
LB
2695 /* PASID table is mandatory for a PCI device in scalable mode. */
2696 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2697 ret = intel_pasid_alloc_table(dev);
2698 if (ret) {
932a6523 2699 dev_err(dev, "PASID table allocation failed\n");
71753239 2700 dmar_remove_one_dev_info(dev);
0bbeb01a 2701 return NULL;
a7fc93fe 2702 }
ef848b7e
LB
2703
2704 /* Setup the PASID entry for requests without PASID: */
1a3f2fd7 2705 spin_lock_irqsave(&iommu->lock, flags);
ef848b7e
LB
2706 if (hw_pass_through && domain_type_is_si(domain))
2707 ret = intel_pasid_setup_pass_through(iommu, domain,
2708 dev, PASID_RID2PASID);
ddf09b6d
LB
2709 else if (domain_use_first_level(domain))
2710 ret = domain_setup_first_level(iommu, domain, dev,
2711 PASID_RID2PASID);
ef848b7e
LB
2712 else
2713 ret = intel_pasid_setup_second_level(iommu, domain,
2714 dev, PASID_RID2PASID);
1a3f2fd7 2715 spin_unlock_irqrestore(&iommu->lock, flags);
ef848b7e 2716 if (ret) {
932a6523 2717 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2718 dmar_remove_one_dev_info(dev);
ef848b7e 2719 return NULL;
a7fc93fe
LB
2720 }
2721 }
b718cd3d 2722
cc4e2575 2723 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2724 dev_err(dev, "Domain context map failed\n");
71753239 2725 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2726 return NULL;
2727 }
2728
b718cd3d 2729 return domain;
745f2586
JL
2730}
2731
b213203e 2732static int iommu_domain_identity_map(struct dmar_domain *domain,
e70b081c
TM
2733 unsigned long first_vpfn,
2734 unsigned long last_vpfn)
ba395927 2735{
ba395927
KA
2736 /*
2737 * RMRR range might have overlap with physical memory range,
2738 * clear it first
2739 */
c5395d5c 2740 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2741
87684fd9
PX
2742 return __domain_mapping(domain, first_vpfn, NULL,
2743 first_vpfn, last_vpfn - first_vpfn + 1,
2744 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2745}
2746
301e7ee1
JR
2747static int md_domain_init(struct dmar_domain *domain, int guest_width);
2748
071e1374 2749static int __init si_domain_init(int hw)
2c2e2c38 2750{
4de354ec
LB
2751 struct dmar_rmrr_unit *rmrr;
2752 struct device *dev;
2753 int i, nid, ret;
2c2e2c38 2754
ab8dfe25 2755 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2756 if (!si_domain)
2757 return -EFAULT;
2758
301e7ee1 2759 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2c2e2c38
FY
2760 domain_exit(si_domain);
2761 return -EFAULT;
2762 }
2763
19943b0e
DW
2764 if (hw)
2765 return 0;
2766
c7ab48d2 2767 for_each_online_node(nid) {
5dfe8660
TH
2768 unsigned long start_pfn, end_pfn;
2769 int i;
2770
2771 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2772 ret = iommu_domain_identity_map(si_domain,
e70b081c
TM
2773 mm_to_dma_pfn(start_pfn),
2774 mm_to_dma_pfn(end_pfn));
5dfe8660
TH
2775 if (ret)
2776 return ret;
2777 }
c7ab48d2
DW
2778 }
2779
4de354ec 2780 /*
9235cb13
LB
2781 * Identity map the RMRRs so that devices with RMRRs could also use
2782 * the si_domain.
4de354ec
LB
2783 */
2784 for_each_rmrr_units(rmrr) {
2785 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2786 i, dev) {
2787 unsigned long long start = rmrr->base_address;
2788 unsigned long long end = rmrr->end_address;
2789
4de354ec
LB
2790 if (WARN_ON(end < start ||
2791 end >> agaw_to_width(si_domain->agaw)))
2792 continue;
2793
48f0bcfb
LB
2794 ret = iommu_domain_identity_map(si_domain,
2795 mm_to_dma_pfn(start >> PAGE_SHIFT),
2796 mm_to_dma_pfn(end >> PAGE_SHIFT));
4de354ec
LB
2797 if (ret)
2798 return ret;
2799 }
2800 }
2801
2c2e2c38
FY
2802 return 0;
2803}
2804
28ccce0d 2805static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2806{
0ac72664 2807 struct dmar_domain *ndomain;
5a8f40e8 2808 struct intel_iommu *iommu;
156baca8 2809 u8 bus, devfn;
2c2e2c38 2810
5913c9bf 2811 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2812 if (!iommu)
2813 return -ENODEV;
2814
5db31569 2815 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2816 if (ndomain != domain)
2817 return -EBUSY;
2c2e2c38
FY
2818
2819 return 0;
2820}
2821
0b9d9753 2822static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2823{
2824 struct dmar_rmrr_unit *rmrr;
832bd858 2825 struct device *tmp;
ea2447f7
TM
2826 int i;
2827
0e242612 2828 rcu_read_lock();
ea2447f7 2829 for_each_rmrr_units(rmrr) {
b683b230
JL
2830 /*
2831 * Return TRUE if this RMRR contains the device that
2832 * is passed in.
2833 */
2834 for_each_active_dev_scope(rmrr->devices,
2835 rmrr->devices_cnt, i, tmp)
e143fd45
EA
2836 if (tmp == dev ||
2837 is_downstream_to_pci_bridge(dev, tmp)) {
0e242612 2838 rcu_read_unlock();
ea2447f7 2839 return true;
b683b230 2840 }
ea2447f7 2841 }
0e242612 2842 rcu_read_unlock();
ea2447f7
TM
2843 return false;
2844}
2845
1c5c59fb
EA
2846/**
2847 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2848 * is relaxable (ie. is allowed to be not enforced under some conditions)
2849 * @dev: device handle
2850 *
2851 * We assume that PCI USB devices with RMRRs have them largely
2852 * for historical reasons and that the RMRR space is not actively used post
2853 * boot. This exclusion may change if vendors begin to abuse it.
2854 *
2855 * The same exception is made for graphics devices, with the requirement that
2856 * any use of the RMRR regions will be torn down before assigning the device
2857 * to a guest.
2858 *
2859 * Return: true if the RMRR is relaxable, false otherwise
2860 */
2861static bool device_rmrr_is_relaxable(struct device *dev)
2862{
2863 struct pci_dev *pdev;
2864
2865 if (!dev_is_pci(dev))
2866 return false;
2867
2868 pdev = to_pci_dev(dev);
2869 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2870 return true;
2871 else
2872 return false;
2873}
2874
c875d2c1
AW
2875/*
2876 * There are a couple cases where we need to restrict the functionality of
2877 * devices associated with RMRRs. The first is when evaluating a device for
2878 * identity mapping because problems exist when devices are moved in and out
2879 * of domains and their respective RMRR information is lost. This means that
2880 * a device with associated RMRRs will never be in a "passthrough" domain.
2881 * The second is use of the device through the IOMMU API. This interface
2882 * expects to have full control of the IOVA space for the device. We cannot
2883 * satisfy both the requirement that RMRR access is maintained and have an
2884 * unencumbered IOVA space. We also have no ability to quiesce the device's
2885 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2886 * We therefore prevent devices associated with an RMRR from participating in
2887 * the IOMMU API, which eliminates them from device assignment.
2888 *
1c5c59fb
EA
2889 * In both cases, devices which have relaxable RMRRs are not concerned by this
2890 * restriction. See device_rmrr_is_relaxable comment.
c875d2c1
AW
2891 */
2892static bool device_is_rmrr_locked(struct device *dev)
2893{
2894 if (!device_has_rmrr(dev))
2895 return false;
2896
1c5c59fb
EA
2897 if (device_rmrr_is_relaxable(dev))
2898 return false;
c875d2c1
AW
2899
2900 return true;
2901}
2902
f273a453
LB
2903/*
2904 * Return the required default domain type for a specific device.
2905 *
2906 * @dev: the device in query
2907 * @startup: true if this is during early boot
2908 *
2909 * Returns:
2910 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2911 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2912 * - 0: both identity and dynamic domains work for this device
2913 */
0e31a726 2914static int device_def_domain_type(struct device *dev)
6941af28 2915{
3bdb2591
DW
2916 if (dev_is_pci(dev)) {
2917 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2918
89a6079d
LB
2919 /*
2920 * Prevent any device marked as untrusted from getting
2921 * placed into the statically identity mapping domain.
2922 */
2923 if (pdev->untrusted)
f273a453 2924 return IOMMU_DOMAIN_DMA;
89a6079d 2925
3bdb2591 2926 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2927 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2928
3bdb2591 2929 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2930 return IOMMU_DOMAIN_IDENTITY;
3bdb2591 2931 }
3dfc813d 2932
b89b6605 2933 return 0;
f273a453
LB
2934}
2935
ffebeb46
JL
2936static void intel_iommu_init_qi(struct intel_iommu *iommu)
2937{
2938 /*
2939 * Start from the sane iommu hardware state.
2940 * If the queued invalidation is already initialized by us
2941 * (for example, while enabling interrupt-remapping) then
2942 * we got the things already rolling from a sane state.
2943 */
2944 if (!iommu->qi) {
2945 /*
2946 * Clear any previous faults.
2947 */
2948 dmar_fault(-1, iommu);
2949 /*
2950 * Disable queued invalidation if supported and already enabled
2951 * before OS handover.
2952 */
2953 dmar_disable_qi(iommu);
2954 }
2955
2956 if (dmar_enable_qi(iommu)) {
2957 /*
2958 * Queued Invalidate not enabled, use Register Based Invalidate
2959 */
2960 iommu->flush.flush_context = __iommu_flush_context;
2961 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2962 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2963 iommu->name);
2964 } else {
2965 iommu->flush.flush_context = qi_flush_context;
2966 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2967 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2968 }
2969}
2970
091d42e4 2971static int copy_context_table(struct intel_iommu *iommu,
dfddb969 2972 struct root_entry *old_re,
091d42e4
JR
2973 struct context_entry **tbl,
2974 int bus, bool ext)
2975{
dbcd861f 2976 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 2977 struct context_entry *new_ce = NULL, ce;
dfddb969 2978 struct context_entry *old_ce = NULL;
543c8dcf 2979 struct root_entry re;
091d42e4
JR
2980 phys_addr_t old_ce_phys;
2981
2982 tbl_idx = ext ? bus * 2 : bus;
dfddb969 2983 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
2984
2985 for (devfn = 0; devfn < 256; devfn++) {
2986 /* First calculate the correct index */
2987 idx = (ext ? devfn * 2 : devfn) % 256;
2988
2989 if (idx == 0) {
2990 /* First save what we may have and clean up */
2991 if (new_ce) {
2992 tbl[tbl_idx] = new_ce;
2993 __iommu_flush_cache(iommu, new_ce,
2994 VTD_PAGE_SIZE);
2995 pos = 1;
2996 }
2997
2998 if (old_ce)
829383e1 2999 memunmap(old_ce);
091d42e4
JR
3000
3001 ret = 0;
3002 if (devfn < 0x80)
543c8dcf 3003 old_ce_phys = root_entry_lctp(&re);
091d42e4 3004 else
543c8dcf 3005 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3006
3007 if (!old_ce_phys) {
3008 if (ext && devfn == 0) {
3009 /* No LCTP, try UCTP */
3010 devfn = 0x7f;
3011 continue;
3012 } else {
3013 goto out;
3014 }
3015 }
3016
3017 ret = -ENOMEM;
dfddb969
DW
3018 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3019 MEMREMAP_WB);
091d42e4
JR
3020 if (!old_ce)
3021 goto out;
3022
3023 new_ce = alloc_pgtable_page(iommu->node);
3024 if (!new_ce)
3025 goto out_unmap;
3026
3027 ret = 0;
3028 }
3029
3030 /* Now copy the context entry */
dfddb969 3031 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3032
cf484d0e 3033 if (!__context_present(&ce))
091d42e4
JR
3034 continue;
3035
dbcd861f
JR
3036 did = context_domain_id(&ce);
3037 if (did >= 0 && did < cap_ndoms(iommu->cap))
3038 set_bit(did, iommu->domain_ids);
3039
cf484d0e
JR
3040 /*
3041 * We need a marker for copied context entries. This
3042 * marker needs to work for the old format as well as
3043 * for extended context entries.
3044 *
3045 * Bit 67 of the context entry is used. In the old
3046 * format this bit is available to software, in the
3047 * extended format it is the PGE bit, but PGE is ignored
3048 * by HW if PASIDs are disabled (and thus still
3049 * available).
3050 *
3051 * So disable PASIDs first and then mark the entry
3052 * copied. This means that we don't copy PASID
3053 * translations from the old kernel, but this is fine as
3054 * faults there are not fatal.
3055 */
3056 context_clear_pasid_enable(&ce);
3057 context_set_copied(&ce);
3058
091d42e4
JR
3059 new_ce[idx] = ce;
3060 }
3061
3062 tbl[tbl_idx + pos] = new_ce;
3063
3064 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3065
3066out_unmap:
dfddb969 3067 memunmap(old_ce);
091d42e4
JR
3068
3069out:
3070 return ret;
3071}
3072
3073static int copy_translation_tables(struct intel_iommu *iommu)
3074{
3075 struct context_entry **ctxt_tbls;
dfddb969 3076 struct root_entry *old_rt;
091d42e4
JR
3077 phys_addr_t old_rt_phys;
3078 int ctxt_table_entries;
3079 unsigned long flags;
3080 u64 rtaddr_reg;
3081 int bus, ret;
c3361f2f 3082 bool new_ext, ext;
091d42e4
JR
3083
3084 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3085 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3086 new_ext = !!ecap_ecs(iommu->ecap);
3087
3088 /*
3089 * The RTT bit can only be changed when translation is disabled,
3090 * but disabling translation means to open a window for data
3091 * corruption. So bail out and don't copy anything if we would
3092 * have to change the bit.
3093 */
3094 if (new_ext != ext)
3095 return -EINVAL;
091d42e4
JR
3096
3097 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3098 if (!old_rt_phys)
3099 return -EINVAL;
3100
dfddb969 3101 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3102 if (!old_rt)
3103 return -ENOMEM;
3104
3105 /* This is too big for the stack - allocate it from slab */
3106 ctxt_table_entries = ext ? 512 : 256;
3107 ret = -ENOMEM;
6396bb22 3108 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3109 if (!ctxt_tbls)
3110 goto out_unmap;
3111
3112 for (bus = 0; bus < 256; bus++) {
3113 ret = copy_context_table(iommu, &old_rt[bus],
3114 ctxt_tbls, bus, ext);
3115 if (ret) {
3116 pr_err("%s: Failed to copy context table for bus %d\n",
3117 iommu->name, bus);
3118 continue;
3119 }
3120 }
3121
3122 spin_lock_irqsave(&iommu->lock, flags);
3123
3124 /* Context tables are copied, now write them to the root_entry table */
3125 for (bus = 0; bus < 256; bus++) {
3126 int idx = ext ? bus * 2 : bus;
3127 u64 val;
3128
3129 if (ctxt_tbls[idx]) {
3130 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3131 iommu->root_entry[bus].lo = val;
3132 }
3133
3134 if (!ext || !ctxt_tbls[idx + 1])
3135 continue;
3136
3137 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3138 iommu->root_entry[bus].hi = val;
3139 }
3140
3141 spin_unlock_irqrestore(&iommu->lock, flags);
3142
3143 kfree(ctxt_tbls);
3144
3145 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3146
3147 ret = 0;
3148
3149out_unmap:
dfddb969 3150 memunmap(old_rt);
091d42e4
JR
3151
3152 return ret;
3153}
3154
3375303e
JP
3155#ifdef CONFIG_INTEL_IOMMU_SVM
3156static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3157{
3158 struct intel_iommu *iommu = data;
3159 ioasid_t ioasid;
3160
3161 if (!iommu)
3162 return INVALID_IOASID;
3163 /*
3164 * VT-d virtual command interface always uses the full 20 bit
3165 * PASID range. Host can partition guest PASID range based on
3166 * policies but it is out of guest's control.
3167 */
3168 if (min < PASID_MIN || max > intel_pasid_max_id)
3169 return INVALID_IOASID;
3170
3171 if (vcmd_alloc_pasid(iommu, &ioasid))
3172 return INVALID_IOASID;
3173
3174 return ioasid;
3175}
3176
3177static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3178{
3179 struct intel_iommu *iommu = data;
3180
3181 if (!iommu)
3182 return;
3183 /*
3184 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3185 * We can only free the PASID when all the devices are unbound.
3186 */
3187 if (ioasid_find(NULL, ioasid, NULL)) {
3188 pr_alert("Cannot free active IOASID %d\n", ioasid);
3189 return;
3190 }
3191 vcmd_free_pasid(iommu, ioasid);
3192}
3193
3194static void register_pasid_allocator(struct intel_iommu *iommu)
3195{
3196 /*
3197 * If we are running in the host, no need for custom allocator
3198 * in that PASIDs are allocated from the host system-wide.
3199 */
3200 if (!cap_caching_mode(iommu->cap))
3201 return;
3202
3203 if (!sm_supported(iommu)) {
3204 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3205 return;
3206 }
3207
3208 /*
3209 * Register a custom PASID allocator if we are running in a guest,
3210 * guest PASID must be obtained via virtual command interface.
3211 * There can be multiple vIOMMUs in each guest but only one allocator
3212 * is active. All vIOMMU allocators will eventually be calling the same
3213 * host allocator.
3214 */
d76b42e9 3215 if (!vccap_pasid(iommu->vccap))
3375303e
JP
3216 return;
3217
3218 pr_info("Register custom PASID allocator\n");
3219 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3220 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3221 iommu->pasid_allocator.pdata = (void *)iommu;
3222 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3223 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3224 /*
3225 * Disable scalable mode on this IOMMU if there
3226 * is no custom allocator. Mixing SM capable vIOMMU
3227 * and non-SM vIOMMU are not supported.
3228 */
3229 intel_iommu_sm = 0;
3230 }
3231}
3232#endif
3233
b779260b 3234static int __init init_dmars(void)
ba395927
KA
3235{
3236 struct dmar_drhd_unit *drhd;
ba395927 3237 struct intel_iommu *iommu;
df4f3c60 3238 int ret;
2c2e2c38 3239
ba395927
KA
3240 /*
3241 * for each drhd
3242 * allocate root
3243 * initialize and program root entry to not present
3244 * endfor
3245 */
3246 for_each_drhd_unit(drhd) {
5e0d2a6f 3247 /*
3248 * lock not needed as this is only incremented in the single
3249 * threaded kernel __init code path all other access are read
3250 * only
3251 */
78d8e704 3252 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3253 g_num_of_iommus++;
3254 continue;
3255 }
9f10e5bf 3256 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3257 }
3258
ffebeb46
JL
3259 /* Preallocate enough resources for IOMMU hot-addition */
3260 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3261 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3262
d9630fe9
WH
3263 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3264 GFP_KERNEL);
3265 if (!g_iommus) {
9f10e5bf 3266 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3267 ret = -ENOMEM;
3268 goto error;
3269 }
3270
6a8c6748
LB
3271 for_each_iommu(iommu, drhd) {
3272 if (drhd->ignored) {
3273 iommu_disable_translation(iommu);
3274 continue;
3275 }
3276
56283174
LB
3277 /*
3278 * Find the max pasid size of all IOMMU's in the system.
3279 * We need to ensure the system pasid table is no bigger
3280 * than the smallest supported.
3281 */
765b6a98 3282 if (pasid_supported(iommu)) {
56283174
LB
3283 u32 temp = 2 << ecap_pss(iommu->ecap);
3284
3285 intel_pasid_max_id = min_t(u32, temp,
3286 intel_pasid_max_id);
3287 }
3288
d9630fe9 3289 g_iommus[iommu->seq_id] = iommu;
ba395927 3290
b63d80d1
JR
3291 intel_iommu_init_qi(iommu);
3292
e61d98d8
SS
3293 ret = iommu_init_domains(iommu);
3294 if (ret)
989d51fc 3295 goto free_iommu;
e61d98d8 3296
4158c2ec
JR
3297 init_translation_status(iommu);
3298
091d42e4
JR
3299 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3300 iommu_disable_translation(iommu);
3301 clear_translation_pre_enabled(iommu);
3302 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3303 iommu->name);
3304 }
4158c2ec 3305
ba395927
KA
3306 /*
3307 * TBD:
3308 * we could share the same root & context tables
25985edc 3309 * among all IOMMU's. Need to Split it later.
ba395927
KA
3310 */
3311 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3312 if (ret)
989d51fc 3313 goto free_iommu;
5f0a7f76 3314
091d42e4
JR
3315 if (translation_pre_enabled(iommu)) {
3316 pr_info("Translation already enabled - trying to copy translation structures\n");
3317
3318 ret = copy_translation_tables(iommu);
3319 if (ret) {
3320 /*
3321 * We found the IOMMU with translation
3322 * enabled - but failed to copy over the
3323 * old root-entry table. Try to proceed
3324 * by disabling translation now and
3325 * allocating a clean root-entry table.
3326 * This might cause DMAR faults, but
3327 * probably the dump will still succeed.
3328 */
3329 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3330 iommu->name);
3331 iommu_disable_translation(iommu);
3332 clear_translation_pre_enabled(iommu);
3333 } else {
3334 pr_info("Copied translation tables from previous kernel for %s\n",
3335 iommu->name);
3336 }
3337 }
3338
4ed0d3e6 3339 if (!ecap_pass_through(iommu->ecap))
19943b0e 3340 hw_pass_through = 0;
ff3dc652 3341 intel_svm_check(iommu);
ba395927
KA
3342 }
3343
a4c34ff1
JR
3344 /*
3345 * Now that qi is enabled on all iommus, set the root entry and flush
3346 * caches. This is required on some Intel X58 chipsets, otherwise the
3347 * flush_context function will loop forever and the boot hangs.
3348 */
3349 for_each_active_iommu(iommu, drhd) {
3350 iommu_flush_write_buffer(iommu);
3375303e
JP
3351#ifdef CONFIG_INTEL_IOMMU_SVM
3352 register_pasid_allocator(iommu);
3353#endif
a4c34ff1
JR
3354 iommu_set_root_entry(iommu);
3355 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3356 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3357 }
3358
d3f13810 3359#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3360 dmar_map_gfx = 0;
19943b0e 3361#endif
e0fc7e0b 3362
5daab580
LB
3363 if (!dmar_map_gfx)
3364 iommu_identity_mapping |= IDENTMAP_GFX;
3365
21e722c4
AR
3366 check_tylersburg_isoch();
3367
4de354ec
LB
3368 ret = si_domain_init(hw_pass_through);
3369 if (ret)
3370 goto free_iommu;
86080ccc 3371
ba395927
KA
3372 /*
3373 * for each drhd
3374 * enable fault log
3375 * global invalidate context cache
3376 * global invalidate iotlb
3377 * enable translation
3378 */
7c919779 3379 for_each_iommu(iommu, drhd) {
51a63e67
JC
3380 if (drhd->ignored) {
3381 /*
3382 * we always have to disable PMRs or DMA may fail on
3383 * this device
3384 */
3385 if (force_on)
7c919779 3386 iommu_disable_protect_mem_regions(iommu);
ba395927 3387 continue;
51a63e67 3388 }
ba395927
KA
3389
3390 iommu_flush_write_buffer(iommu);
3391
a222a7f0 3392#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3393 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3394 /*
3395 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3396 * could cause possible lock race condition.
3397 */
3398 up_write(&dmar_global_lock);
a222a7f0 3399 ret = intel_svm_enable_prq(iommu);
a7755c3c 3400 down_write(&dmar_global_lock);
a222a7f0
DW
3401 if (ret)
3402 goto free_iommu;
3403 }
3404#endif
3460a6d9
KA
3405 ret = dmar_set_interrupt(iommu);
3406 if (ret)
989d51fc 3407 goto free_iommu;
ba395927
KA
3408 }
3409
3410 return 0;
989d51fc
JL
3411
3412free_iommu:
ffebeb46
JL
3413 for_each_active_iommu(iommu, drhd) {
3414 disable_dmar_iommu(iommu);
a868e6b7 3415 free_dmar_iommu(iommu);
ffebeb46 3416 }
13cf0174 3417
d9630fe9 3418 kfree(g_iommus);
13cf0174 3419
989d51fc 3420error:
ba395927
KA
3421 return ret;
3422}
3423
5a5e02a6 3424/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3425static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3426 struct dmar_domain *domain,
3427 unsigned long nrpages, uint64_t dma_mask)
ba395927 3428{
e083ea5b 3429 unsigned long iova_pfn;
ba395927 3430
cb8b892d
LB
3431 /*
3432 * Restrict dma_mask to the width that the iommu can handle.
3433 * First-level translation restricts the input-address to a
3434 * canonical address (i.e., address bits 63:N have the same
3435 * value as address bit [N-1], where N is 48-bits with 4-level
3436 * paging and 57-bits with 5-level paging). Hence, skip bit
3437 * [N-1].
3438 */
3439 if (domain_use_first_level(domain))
3440 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
3441 dma_mask);
3442 else
3443 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
3444 dma_mask);
3445
8f6429c7
RM
3446 /* Ensure we reserve the whole size-aligned region */
3447 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3448
3449 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3450 /*
3451 * First try to allocate an io virtual address in
284901a9 3452 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3453 * from higher range
ba395927 3454 */
22e2f9fa 3455 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3456 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3457 if (iova_pfn)
3458 return iova_pfn;
875764de 3459 }
538d5b33
TN
3460 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3461 IOVA_PFN(dma_mask), true);
22e2f9fa 3462 if (unlikely(!iova_pfn)) {
944c9175
QC
3463 dev_err_once(dev, "Allocating %ld-page iova failed\n",
3464 nrpages);
2aac6304 3465 return 0;
f76aec76
KA
3466 }
3467
22e2f9fa 3468 return iova_pfn;
f76aec76
KA
3469}
3470
21d5d27c
LG
3471static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3472 size_t size, int dir, u64 dma_mask)
f76aec76 3473{
f76aec76 3474 struct dmar_domain *domain;
5b6985ce 3475 phys_addr_t start_paddr;
2aac6304 3476 unsigned long iova_pfn;
f76aec76 3477 int prot = 0;
6865f0d1 3478 int ret;
8c11e798 3479 struct intel_iommu *iommu;
33041ec0 3480 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3481
3482 BUG_ON(dir == DMA_NONE);
2c2e2c38 3483
6fc7020c
LB
3484 if (unlikely(attach_deferred(dev)))
3485 do_deferred_attach(dev);
3486
96d170f3 3487 domain = find_domain(dev);
f76aec76 3488 if (!domain)
524a669b 3489 return DMA_MAPPING_ERROR;
f76aec76 3490
8c11e798 3491 iommu = domain_get_iommu(domain);
88cb6a74 3492 size = aligned_nrpages(paddr, size);
f76aec76 3493
2aac6304
OP
3494 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3495 if (!iova_pfn)
f76aec76
KA
3496 goto error;
3497
ba395927
KA
3498 /*
3499 * Check if DMAR supports zero-length reads on write only
3500 * mappings..
3501 */
3502 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3503 !cap_zlr(iommu->cap))
ba395927
KA
3504 prot |= DMA_PTE_READ;
3505 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3506 prot |= DMA_PTE_WRITE;
3507 /*
6865f0d1 3508 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3509 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3510 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3511 * is not a big problem
3512 */
2aac6304 3513 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3514 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3515 if (ret)
3516 goto error;
3517
2aac6304 3518 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246 3519 start_paddr += paddr & ~PAGE_MASK;
3b53034c
LB
3520
3521 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3522
03d6a246 3523 return start_paddr;
ba395927 3524
ba395927 3525error:
2aac6304 3526 if (iova_pfn)
22e2f9fa 3527 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3528 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3529 size, (unsigned long long)paddr, dir);
524a669b 3530 return DMA_MAPPING_ERROR;
ba395927
KA
3531}
3532
ffbbef5c
FT
3533static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3534 unsigned long offset, size_t size,
3535 enum dma_data_direction dir,
00085f1e 3536 unsigned long attrs)
bb9e6d65 3537{
6fc7020c
LB
3538 return __intel_map_single(dev, page_to_phys(page) + offset,
3539 size, dir, *dev->dma_mask);
21d5d27c
LG
3540}
3541
3542static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3543 size_t size, enum dma_data_direction dir,
3544 unsigned long attrs)
3545{
6fc7020c 3546 return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
bb9e6d65
FT
3547}
3548
769530e4 3549static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3550{
f76aec76 3551 struct dmar_domain *domain;
d794dc9b 3552 unsigned long start_pfn, last_pfn;
769530e4 3553 unsigned long nrpages;
2aac6304 3554 unsigned long iova_pfn;
8c11e798 3555 struct intel_iommu *iommu;
ea8ea460 3556 struct page *freelist;
f7b0c4ce 3557 struct pci_dev *pdev = NULL;
ba395927 3558
1525a29a 3559 domain = find_domain(dev);
ba395927
KA
3560 BUG_ON(!domain);
3561
8c11e798
WH
3562 iommu = domain_get_iommu(domain);
3563
2aac6304 3564 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3565
769530e4 3566 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3567 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3568 last_pfn = start_pfn + nrpages - 1;
ba395927 3569
f7b0c4ce
LB
3570 if (dev_is_pci(dev))
3571 pdev = to_pci_dev(dev);
3572
ea8ea460 3573 freelist = domain_unmap(domain, start_pfn, last_pfn);
effa4678
DS
3574 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3575 !has_iova_flush_queue(&domain->iovad)) {
a1ddcbe9 3576 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3577 nrpages, !freelist, 0);
5e0d2a6f 3578 /* free iova */
22e2f9fa 3579 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3580 dma_free_pagelist(freelist);
5e0d2a6f 3581 } else {
13cf0174
JR
3582 queue_iova(&domain->iovad, iova_pfn, nrpages,
3583 (unsigned long)freelist);
5e0d2a6f 3584 /*
3585 * queue up the release of the unmap to save the 1/6th of the
3586 * cpu used up by the iotlb flush operation...
3587 */
5e0d2a6f 3588 }
3b53034c
LB
3589
3590 trace_unmap_single(dev, dev_addr, size);
ba395927
KA
3591}
3592
d41a4adb
JL
3593static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3594 size_t size, enum dma_data_direction dir,
00085f1e 3595 unsigned long attrs)
d41a4adb 3596{
6fc7020c 3597 intel_unmap(dev, dev_addr, size);
9cc0c2af
CH
3598}
3599
3600static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3601 size_t size, enum dma_data_direction dir, unsigned long attrs)
3602{
6fc7020c 3603 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3604}
3605
5040a918 3606static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3607 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3608 unsigned long attrs)
ba395927 3609{
7ec916f8
CH
3610 struct page *page = NULL;
3611 int order;
ba395927 3612
6fc7020c
LB
3613 if (unlikely(attach_deferred(dev)))
3614 do_deferred_attach(dev);
9cc0c2af 3615
7ec916f8
CH
3616 size = PAGE_ALIGN(size);
3617 order = get_order(size);
7ec916f8
CH
3618
3619 if (gfpflags_allow_blocking(flags)) {
3620 unsigned int count = size >> PAGE_SHIFT;
3621
d834c5ab
MS
3622 page = dma_alloc_from_contiguous(dev, count, order,
3623 flags & __GFP_NOWARN);
7ec916f8
CH
3624 }
3625
3626 if (!page)
3627 page = alloc_pages(flags, order);
3628 if (!page)
3629 return NULL;
3630 memset(page_address(page), 0, size);
3631
21d5d27c
LG
3632 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3633 DMA_BIDIRECTIONAL,
3634 dev->coherent_dma_mask);
524a669b 3635 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3636 return page_address(page);
3637 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3638 __free_pages(page, order);
36746436 3639
ba395927
KA
3640 return NULL;
3641}
3642
5040a918 3643static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3644 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3645{
7ec916f8
CH
3646 int order;
3647 struct page *page = virt_to_page(vaddr);
3648
3649 size = PAGE_ALIGN(size);
3650 order = get_order(size);
3651
3652 intel_unmap(dev, dma_handle, size);
3653 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3654 __free_pages(page, order);
ba395927
KA
3655}
3656
5040a918 3657static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3658 int nelems, enum dma_data_direction dir,
00085f1e 3659 unsigned long attrs)
ba395927 3660{
769530e4
OP
3661 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3662 unsigned long nrpages = 0;
3663 struct scatterlist *sg;
3664 int i;
3665
3666 for_each_sg(sglist, sg, nelems, i) {
3667 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3668 }
3669
3670 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3b53034c
LB
3671
3672 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3673}
3674
5040a918 3675static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3676 enum dma_data_direction dir, unsigned long attrs)
ba395927 3677{
ba395927 3678 int i;
ba395927 3679 struct dmar_domain *domain;
f76aec76
KA
3680 size_t size = 0;
3681 int prot = 0;
2aac6304 3682 unsigned long iova_pfn;
f76aec76 3683 int ret;
c03ab37c 3684 struct scatterlist *sg;
b536d24d 3685 unsigned long start_vpfn;
8c11e798 3686 struct intel_iommu *iommu;
ba395927
KA
3687
3688 BUG_ON(dir == DMA_NONE);
6fc7020c
LB
3689
3690 if (unlikely(attach_deferred(dev)))
3691 do_deferred_attach(dev);
ba395927 3692
96d170f3 3693 domain = find_domain(dev);
f76aec76
KA
3694 if (!domain)
3695 return 0;
3696
8c11e798
WH
3697 iommu = domain_get_iommu(domain);
3698
b536d24d 3699 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3700 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3701
2aac6304 3702 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3703 *dev->dma_mask);
2aac6304 3704 if (!iova_pfn) {
c03ab37c 3705 sglist->dma_length = 0;
f76aec76
KA
3706 return 0;
3707 }
3708
3709 /*
3710 * Check if DMAR supports zero-length reads on write only
3711 * mappings..
3712 */
3713 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3714 !cap_zlr(iommu->cap))
f76aec76
KA
3715 prot |= DMA_PTE_READ;
3716 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3717 prot |= DMA_PTE_WRITE;
3718
2aac6304 3719 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3720
f532959b 3721 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3722 if (unlikely(ret)) {
e1605495 3723 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3724 start_vpfn + size - 1,
3725 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3726 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3727 return 0;
ba395927
KA
3728 }
3729
984d03ad
LB
3730 for_each_sg(sglist, sg, nelems, i)
3731 trace_map_sg(dev, i + 1, nelems, sg);
3b53034c 3732
ba395927
KA
3733 return nelems;
3734}
3735
9c24eaf8
AS
3736static u64 intel_get_required_mask(struct device *dev)
3737{
9c24eaf8
AS
3738 return DMA_BIT_MASK(32);
3739}
3740
02b4da5f 3741static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3742 .alloc = intel_alloc_coherent,
3743 .free = intel_free_coherent,
ba395927
KA
3744 .map_sg = intel_map_sg,
3745 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3746 .map_page = intel_map_page,
3747 .unmap_page = intel_unmap_page,
21d5d27c 3748 .map_resource = intel_map_resource,
9cc0c2af 3749 .unmap_resource = intel_unmap_resource,
fec777c3 3750 .dma_supported = dma_direct_supported,
f9f3232a
CH
3751 .mmap = dma_common_mmap,
3752 .get_sgtable = dma_common_get_sgtable,
efa70f2f
CH
3753 .alloc_pages = dma_common_alloc_pages,
3754 .free_pages = dma_common_free_pages,
9c24eaf8 3755 .get_required_mask = intel_get_required_mask,
ba395927
KA
3756};
3757
cfb94a37
LB
3758static void
3759bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3760 enum dma_data_direction dir, enum dma_sync_target target)
3761{
3762 struct dmar_domain *domain;
3763 phys_addr_t tlb_addr;
3764
3765 domain = find_domain(dev);
3766 if (WARN_ON(!domain))
3767 return;
3768
3769 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3770 if (is_swiotlb_buffer(tlb_addr))
3771 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3772}
3773
3774static dma_addr_t
3775bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3776 enum dma_data_direction dir, unsigned long attrs,
3777 u64 dma_mask)
3778{
3779 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3780 struct dmar_domain *domain;
3781 struct intel_iommu *iommu;
3782 unsigned long iova_pfn;
3783 unsigned long nrpages;
3784 phys_addr_t tlb_addr;
3785 int prot = 0;
3786 int ret;
3787
a11bfde9
JR
3788 if (unlikely(attach_deferred(dev)))
3789 do_deferred_attach(dev);
3790
96d170f3 3791 domain = find_domain(dev);
a11bfde9 3792
cfb94a37
LB
3793 if (WARN_ON(dir == DMA_NONE || !domain))
3794 return DMA_MAPPING_ERROR;
3795
3796 iommu = domain_get_iommu(domain);
3797 if (WARN_ON(!iommu))
3798 return DMA_MAPPING_ERROR;
3799
3800 nrpages = aligned_nrpages(0, size);
3801 iova_pfn = intel_alloc_iova(dev, domain,
3802 dma_to_mm_pfn(nrpages), dma_mask);
3803 if (!iova_pfn)
3804 return DMA_MAPPING_ERROR;
3805
3806 /*
3807 * Check if DMAR supports zero-length reads on write only
3808 * mappings..
3809 */
3810 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3811 !cap_zlr(iommu->cap))
3812 prot |= DMA_PTE_READ;
3813 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3814 prot |= DMA_PTE_WRITE;
3815
3816 /*
3817 * If both the physical buffer start address and size are
3818 * page aligned, we don't need to use a bounce page.
3819 */
3820 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3821 tlb_addr = swiotlb_tbl_map_single(dev,
5ceda740 3822 phys_to_dma_unencrypted(dev, io_tlb_start),
cfb94a37
LB
3823 paddr, size, aligned_size, dir, attrs);
3824 if (tlb_addr == DMA_MAPPING_ERROR) {
3825 goto swiotlb_error;
3826 } else {
3827 /* Cleanup the padding area. */
3828 void *padding_start = phys_to_virt(tlb_addr);
3829 size_t padding_size = aligned_size;
3830
3831 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3832 (dir == DMA_TO_DEVICE ||
3833 dir == DMA_BIDIRECTIONAL)) {
3834 padding_start += size;
3835 padding_size -= size;
3836 }
3837
3838 memset(padding_start, 0, padding_size);
3839 }
3840 } else {
3841 tlb_addr = paddr;
3842 }
3843
3844 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3845 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3846 if (ret)
3847 goto mapping_error;
3848
3849 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3850
3851 return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3852
3853mapping_error:
3854 if (is_swiotlb_buffer(tlb_addr))
3855 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3856 aligned_size, dir, attrs);
3857swiotlb_error:
3858 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3859 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3860 size, (unsigned long long)paddr, dir);
3861
3862 return DMA_MAPPING_ERROR;
3863}
3864
3865static void
3866bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3867 enum dma_data_direction dir, unsigned long attrs)
3868{
3869 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3870 struct dmar_domain *domain;
3871 phys_addr_t tlb_addr;
3872
3873 domain = find_domain(dev);
3874 if (WARN_ON(!domain))
3875 return;
3876
3877 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3878 if (WARN_ON(!tlb_addr))
3879 return;
3880
3881 intel_unmap(dev, dev_addr, size);
3882 if (is_swiotlb_buffer(tlb_addr))
3883 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3884 aligned_size, dir, attrs);
3885
3886 trace_bounce_unmap_single(dev, dev_addr, size);
3887}
3888
3889static dma_addr_t
3890bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3891 size_t size, enum dma_data_direction dir, unsigned long attrs)
3892{
3893 return bounce_map_single(dev, page_to_phys(page) + offset,
3894 size, dir, attrs, *dev->dma_mask);
3895}
3896
3897static dma_addr_t
3898bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3899 enum dma_data_direction dir, unsigned long attrs)
3900{
3901 return bounce_map_single(dev, phys_addr, size,
3902 dir, attrs, *dev->dma_mask);
3903}
3904
3905static void
3906bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3907 enum dma_data_direction dir, unsigned long attrs)
3908{
3909 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3910}
3911
3912static void
3913bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3914 enum dma_data_direction dir, unsigned long attrs)
3915{
3916 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3917}
3918
3919static void
3920bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3921 enum dma_data_direction dir, unsigned long attrs)
3922{
3923 struct scatterlist *sg;
3924 int i;
3925
3926 for_each_sg(sglist, sg, nelems, i)
3927 bounce_unmap_page(dev, sg->dma_address,
3928 sg_dma_len(sg), dir, attrs);
3929}
3930
3931static int
3932bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3933 enum dma_data_direction dir, unsigned long attrs)
3934{
3935 int i;
3936 struct scatterlist *sg;
3937
3938 for_each_sg(sglist, sg, nelems, i) {
3939 sg->dma_address = bounce_map_page(dev, sg_page(sg),
3940 sg->offset, sg->length,
3941 dir, attrs);
3942 if (sg->dma_address == DMA_MAPPING_ERROR)
3943 goto out_unmap;
3944 sg_dma_len(sg) = sg->length;
3945 }
3946
984d03ad
LB
3947 for_each_sg(sglist, sg, nelems, i)
3948 trace_bounce_map_sg(dev, i + 1, nelems, sg);
3949
cfb94a37
LB
3950 return nelems;
3951
3952out_unmap:
3953 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3954 return 0;
3955}
3956
3957static void
3958bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3959 size_t size, enum dma_data_direction dir)
3960{
3961 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3962}
3963
3964static void
3965bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
3966 size_t size, enum dma_data_direction dir)
3967{
3968 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
3969}
3970
3971static void
3972bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
3973 int nelems, enum dma_data_direction dir)
3974{
3975 struct scatterlist *sg;
3976 int i;
3977
3978 for_each_sg(sglist, sg, nelems, i)
3979 bounce_sync_single(dev, sg_dma_address(sg),
3980 sg_dma_len(sg), dir, SYNC_FOR_CPU);
3981}
3982
3983static void
3984bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
3985 int nelems, enum dma_data_direction dir)
3986{
3987 struct scatterlist *sg;
3988 int i;
3989
3990 for_each_sg(sglist, sg, nelems, i)
3991 bounce_sync_single(dev, sg_dma_address(sg),
3992 sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
3993}
3994
3995static const struct dma_map_ops bounce_dma_ops = {
3996 .alloc = intel_alloc_coherent,
3997 .free = intel_free_coherent,
3998 .map_sg = bounce_map_sg,
3999 .unmap_sg = bounce_unmap_sg,
4000 .map_page = bounce_map_page,
4001 .unmap_page = bounce_unmap_page,
4002 .sync_single_for_cpu = bounce_sync_single_for_cpu,
4003 .sync_single_for_device = bounce_sync_single_for_device,
4004 .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
4005 .sync_sg_for_device = bounce_sync_sg_for_device,
4006 .map_resource = bounce_map_resource,
4007 .unmap_resource = bounce_unmap_resource,
efa70f2f
CH
4008 .alloc_pages = dma_common_alloc_pages,
4009 .free_pages = dma_common_free_pages,
cfb94a37
LB
4010 .dma_supported = dma_direct_supported,
4011};
4012
ba395927
KA
4013static inline int iommu_domain_cache_init(void)
4014{
4015 int ret = 0;
4016
4017 iommu_domain_cache = kmem_cache_create("iommu_domain",
4018 sizeof(struct dmar_domain),
4019 0,
4020 SLAB_HWCACHE_ALIGN,
4021
4022 NULL);
4023 if (!iommu_domain_cache) {
9f10e5bf 4024 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
4025 ret = -ENOMEM;
4026 }
4027
4028 return ret;
4029}
4030
4031static inline int iommu_devinfo_cache_init(void)
4032{
4033 int ret = 0;
4034
4035 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4036 sizeof(struct device_domain_info),
4037 0,
4038 SLAB_HWCACHE_ALIGN,
ba395927
KA
4039 NULL);
4040 if (!iommu_devinfo_cache) {
9f10e5bf 4041 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
4042 ret = -ENOMEM;
4043 }
4044
4045 return ret;
4046}
4047
ba395927
KA
4048static int __init iommu_init_mempool(void)
4049{
4050 int ret;
ae1ff3d6 4051 ret = iova_cache_get();
ba395927
KA
4052 if (ret)
4053 return ret;
4054
4055 ret = iommu_domain_cache_init();
4056 if (ret)
4057 goto domain_error;
4058
4059 ret = iommu_devinfo_cache_init();
4060 if (!ret)
4061 return ret;
4062
4063 kmem_cache_destroy(iommu_domain_cache);
4064domain_error:
ae1ff3d6 4065 iova_cache_put();
ba395927
KA
4066
4067 return -ENOMEM;
4068}
4069
4070static void __init iommu_exit_mempool(void)
4071{
4072 kmem_cache_destroy(iommu_devinfo_cache);
4073 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 4074 iova_cache_put();
ba395927
KA
4075}
4076
ba395927
KA
4077static void __init init_no_remapping_devices(void)
4078{
4079 struct dmar_drhd_unit *drhd;
832bd858 4080 struct device *dev;
b683b230 4081 int i;
ba395927
KA
4082
4083 for_each_drhd_unit(drhd) {
4084 if (!drhd->include_all) {
b683b230
JL
4085 for_each_active_dev_scope(drhd->devices,
4086 drhd->devices_cnt, i, dev)
4087 break;
832bd858 4088 /* ignore DMAR unit if no devices exist */
ba395927
KA
4089 if (i == drhd->devices_cnt)
4090 drhd->ignored = 1;
4091 }
4092 }
4093
7c919779 4094 for_each_active_drhd_unit(drhd) {
7c919779 4095 if (drhd->include_all)
ba395927
KA
4096 continue;
4097
b683b230
JL
4098 for_each_active_dev_scope(drhd->devices,
4099 drhd->devices_cnt, i, dev)
832bd858 4100 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4101 break;
ba395927
KA
4102 if (i < drhd->devices_cnt)
4103 continue;
4104
c0771df8
DW
4105 /* This IOMMU has *only* gfx devices. Either bypass it or
4106 set the gfx_mapped flag, as appropriate */
b1012ca8 4107 drhd->gfx_dedicated = 1;
2d33b7d6 4108 if (!dmar_map_gfx)
c0771df8 4109 drhd->ignored = 1;
ba395927
KA
4110 }
4111}
4112
f59c7b69
FY
4113#ifdef CONFIG_SUSPEND
4114static int init_iommu_hw(void)
4115{
4116 struct dmar_drhd_unit *drhd;
4117 struct intel_iommu *iommu = NULL;
4118
4119 for_each_active_iommu(iommu, drhd)
4120 if (iommu->qi)
4121 dmar_reenable_qi(iommu);
4122
b779260b
JC
4123 for_each_iommu(iommu, drhd) {
4124 if (drhd->ignored) {
4125 /*
4126 * we always have to disable PMRs or DMA may fail on
4127 * this device
4128 */
4129 if (force_on)
4130 iommu_disable_protect_mem_regions(iommu);
4131 continue;
4132 }
095303e0 4133
f59c7b69
FY
4134 iommu_flush_write_buffer(iommu);
4135
4136 iommu_set_root_entry(iommu);
4137
4138 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4139 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4140 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4141 iommu_enable_translation(iommu);
b94996c9 4142 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4143 }
4144
4145 return 0;
4146}
4147
4148static void iommu_flush_all(void)
4149{
4150 struct dmar_drhd_unit *drhd;
4151 struct intel_iommu *iommu;
4152
4153 for_each_active_iommu(iommu, drhd) {
4154 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4155 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4156 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4157 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4158 }
4159}
4160
134fac3f 4161static int iommu_suspend(void)
f59c7b69
FY
4162{
4163 struct dmar_drhd_unit *drhd;
4164 struct intel_iommu *iommu = NULL;
4165 unsigned long flag;
4166
4167 for_each_active_iommu(iommu, drhd) {
6396bb22 4168 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4169 GFP_ATOMIC);
4170 if (!iommu->iommu_state)
4171 goto nomem;
4172 }
4173
4174 iommu_flush_all();
4175
4176 for_each_active_iommu(iommu, drhd) {
4177 iommu_disable_translation(iommu);
4178
1f5b3c3f 4179 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4180
4181 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4182 readl(iommu->reg + DMAR_FECTL_REG);
4183 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4184 readl(iommu->reg + DMAR_FEDATA_REG);
4185 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4186 readl(iommu->reg + DMAR_FEADDR_REG);
4187 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4188 readl(iommu->reg + DMAR_FEUADDR_REG);
4189
1f5b3c3f 4190 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4191 }
4192 return 0;
4193
4194nomem:
4195 for_each_active_iommu(iommu, drhd)
4196 kfree(iommu->iommu_state);
4197
4198 return -ENOMEM;
4199}
4200
134fac3f 4201static void iommu_resume(void)
f59c7b69
FY
4202{
4203 struct dmar_drhd_unit *drhd;
4204 struct intel_iommu *iommu = NULL;
4205 unsigned long flag;
4206
4207 if (init_iommu_hw()) {
b779260b
JC
4208 if (force_on)
4209 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4210 else
4211 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4212 return;
f59c7b69
FY
4213 }
4214
4215 for_each_active_iommu(iommu, drhd) {
4216
1f5b3c3f 4217 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4218
4219 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4220 iommu->reg + DMAR_FECTL_REG);
4221 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4222 iommu->reg + DMAR_FEDATA_REG);
4223 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4224 iommu->reg + DMAR_FEADDR_REG);
4225 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4226 iommu->reg + DMAR_FEUADDR_REG);
4227
1f5b3c3f 4228 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4229 }
4230
4231 for_each_active_iommu(iommu, drhd)
4232 kfree(iommu->iommu_state);
f59c7b69
FY
4233}
4234
134fac3f 4235static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4236 .resume = iommu_resume,
4237 .suspend = iommu_suspend,
4238};
4239
134fac3f 4240static void __init init_iommu_pm_ops(void)
f59c7b69 4241{
134fac3f 4242 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4243}
4244
4245#else
99592ba4 4246static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4247#endif /* CONFIG_PM */
4248
ce4cc52b
BR
4249static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
4250{
4251 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
4252 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
4253 rmrr->end_address <= rmrr->base_address ||
4254 arch_rmrr_sanity_check(rmrr))
4255 return -EINVAL;
4256
4257 return 0;
4258}
4259
c2a0b538 4260int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4261{
4262 struct acpi_dmar_reserved_memory *rmrr;
4263 struct dmar_rmrr_unit *rmrru;
f036c7fa
YC
4264
4265 rmrr = (struct acpi_dmar_reserved_memory *)header;
96788c7a
HG
4266 if (rmrr_sanity_check(rmrr)) {
4267 pr_warn(FW_BUG
f5a68bb0
BR
4268 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
4269 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4270 rmrr->base_address, rmrr->end_address,
4271 dmi_get_system_info(DMI_BIOS_VENDOR),
4272 dmi_get_system_info(DMI_BIOS_VERSION),
4273 dmi_get_system_info(DMI_PRODUCT_VERSION));
96788c7a
HG
4274 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4275 }
318fe7df
SS
4276
4277 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4278 if (!rmrru)
0659b8dc 4279 goto out;
318fe7df
SS
4280
4281 rmrru->hdr = header;
f036c7fa 4282
318fe7df
SS
4283 rmrru->base_address = rmrr->base_address;
4284 rmrru->end_address = rmrr->end_address;
0659b8dc 4285
2e455289
JL
4286 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4287 ((void *)rmrr) + rmrr->header.length,
4288 &rmrru->devices_cnt);
0659b8dc 4289 if (rmrru->devices_cnt && rmrru->devices == NULL)
5f64ce54 4290 goto free_rmrru;
318fe7df 4291
2e455289 4292 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4293
2e455289 4294 return 0;
0659b8dc
EA
4295free_rmrru:
4296 kfree(rmrru);
4297out:
4298 return -ENOMEM;
318fe7df
SS
4299}
4300
6b197249
JL
4301static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4302{
4303 struct dmar_atsr_unit *atsru;
4304 struct acpi_dmar_atsr *tmp;
4305
c6f4ebde
QC
4306 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
4307 dmar_rcu_check()) {
6b197249
JL
4308 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4309 if (atsr->segment != tmp->segment)
4310 continue;
4311 if (atsr->header.length != tmp->header.length)
4312 continue;
4313 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4314 return atsru;
4315 }
4316
4317 return NULL;
4318}
4319
4320int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4321{
4322 struct acpi_dmar_atsr *atsr;
4323 struct dmar_atsr_unit *atsru;
4324
b608fe35 4325 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4326 return 0;
4327
318fe7df 4328 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4329 atsru = dmar_find_atsr(atsr);
4330 if (atsru)
4331 return 0;
4332
4333 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4334 if (!atsru)
4335 return -ENOMEM;
4336
6b197249
JL
4337 /*
4338 * If memory is allocated from slab by ACPI _DSM method, we need to
4339 * copy the memory content because the memory buffer will be freed
4340 * on return.
4341 */
4342 atsru->hdr = (void *)(atsru + 1);
4343 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4344 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4345 if (!atsru->include_all) {
4346 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4347 (void *)atsr + atsr->header.length,
4348 &atsru->devices_cnt);
4349 if (atsru->devices_cnt && atsru->devices == NULL) {
4350 kfree(atsru);
4351 return -ENOMEM;
4352 }
4353 }
318fe7df 4354
0e242612 4355 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4356
4357 return 0;
4358}
4359
9bdc531e
JL
4360static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4361{
4362 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4363 kfree(atsru);
4364}
4365
6b197249
JL
4366int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4367{
4368 struct acpi_dmar_atsr *atsr;
4369 struct dmar_atsr_unit *atsru;
4370
4371 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4372 atsru = dmar_find_atsr(atsr);
4373 if (atsru) {
4374 list_del_rcu(&atsru->list);
4375 synchronize_rcu();
4376 intel_iommu_free_atsr(atsru);
4377 }
4378
4379 return 0;
4380}
4381
4382int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4383{
4384 int i;
4385 struct device *dev;
4386 struct acpi_dmar_atsr *atsr;
4387 struct dmar_atsr_unit *atsru;
4388
4389 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4390 atsru = dmar_find_atsr(atsr);
4391 if (!atsru)
4392 return 0;
4393
194dc870 4394 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4395 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4396 i, dev)
4397 return -EBUSY;
194dc870 4398 }
6b197249
JL
4399
4400 return 0;
4401}
4402
ffebeb46
JL
4403static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4404{
e083ea5b 4405 int sp, ret;
ffebeb46
JL
4406 struct intel_iommu *iommu = dmaru->iommu;
4407
4408 if (g_iommus[iommu->seq_id])
4409 return 0;
4410
4411 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4412 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4413 iommu->name);
4414 return -ENXIO;
4415 }
4416 if (!ecap_sc_support(iommu->ecap) &&
4417 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4418 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4419 iommu->name);
4420 return -ENXIO;
4421 }
64229e8f 4422 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
ffebeb46 4423 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4424 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4425 iommu->name);
4426 return -ENXIO;
4427 }
4428
4429 /*
4430 * Disable translation if already enabled prior to OS handover.
4431 */
4432 if (iommu->gcmd & DMA_GCMD_TE)
4433 iommu_disable_translation(iommu);
4434
4435 g_iommus[iommu->seq_id] = iommu;
4436 ret = iommu_init_domains(iommu);
4437 if (ret == 0)
4438 ret = iommu_alloc_root_entry(iommu);
4439 if (ret)
4440 goto out;
4441
ff3dc652 4442 intel_svm_check(iommu);
8a94ade4 4443
ffebeb46
JL
4444 if (dmaru->ignored) {
4445 /*
4446 * we always have to disable PMRs or DMA may fail on this device
4447 */
4448 if (force_on)
4449 iommu_disable_protect_mem_regions(iommu);
4450 return 0;
4451 }
4452
4453 intel_iommu_init_qi(iommu);
4454 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4455
4456#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4457 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4458 ret = intel_svm_enable_prq(iommu);
4459 if (ret)
4460 goto disable_iommu;
4461 }
4462#endif
ffebeb46
JL
4463 ret = dmar_set_interrupt(iommu);
4464 if (ret)
4465 goto disable_iommu;
4466
4467 iommu_set_root_entry(iommu);
4468 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4469 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4470 iommu_enable_translation(iommu);
4471
ffebeb46
JL
4472 iommu_disable_protect_mem_regions(iommu);
4473 return 0;
4474
4475disable_iommu:
4476 disable_dmar_iommu(iommu);
4477out:
4478 free_dmar_iommu(iommu);
4479 return ret;
4480}
4481
6b197249
JL
4482int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4483{
ffebeb46
JL
4484 int ret = 0;
4485 struct intel_iommu *iommu = dmaru->iommu;
4486
4487 if (!intel_iommu_enabled)
4488 return 0;
4489 if (iommu == NULL)
4490 return -EINVAL;
4491
4492 if (insert) {
4493 ret = intel_iommu_add(dmaru);
4494 } else {
4495 disable_dmar_iommu(iommu);
4496 free_dmar_iommu(iommu);
4497 }
4498
4499 return ret;
6b197249
JL
4500}
4501
9bdc531e
JL
4502static void intel_iommu_free_dmars(void)
4503{
4504 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4505 struct dmar_atsr_unit *atsru, *atsr_n;
4506
4507 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4508 list_del(&rmrru->list);
4509 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4510 kfree(rmrru);
318fe7df
SS
4511 }
4512
9bdc531e
JL
4513 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4514 list_del(&atsru->list);
4515 intel_iommu_free_atsr(atsru);
4516 }
318fe7df
SS
4517}
4518
4519int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4520{
b683b230 4521 int i, ret = 1;
318fe7df 4522 struct pci_bus *bus;
832bd858
DW
4523 struct pci_dev *bridge = NULL;
4524 struct device *tmp;
318fe7df
SS
4525 struct acpi_dmar_atsr *atsr;
4526 struct dmar_atsr_unit *atsru;
4527
4528 dev = pci_physfn(dev);
318fe7df 4529 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4530 bridge = bus->self;
d14053b3
DW
4531 /* If it's an integrated device, allow ATS */
4532 if (!bridge)
4533 return 1;
4534 /* Connected via non-PCIe: no ATS */
4535 if (!pci_is_pcie(bridge) ||
62f87c0e 4536 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4537 return 0;
d14053b3 4538 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4539 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4540 break;
318fe7df
SS
4541 }
4542
0e242612 4543 rcu_read_lock();
b5f82ddf
JL
4544 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4545 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4546 if (atsr->segment != pci_domain_nr(dev->bus))
4547 continue;
4548
b683b230 4549 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4550 if (tmp == &bridge->dev)
b683b230 4551 goto out;
b5f82ddf
JL
4552
4553 if (atsru->include_all)
b683b230 4554 goto out;
b5f82ddf 4555 }
b683b230
JL
4556 ret = 0;
4557out:
0e242612 4558 rcu_read_unlock();
318fe7df 4559
b683b230 4560 return ret;
318fe7df
SS
4561}
4562
59ce0515
JL
4563int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4564{
e083ea5b 4565 int ret;
59ce0515
JL
4566 struct dmar_rmrr_unit *rmrru;
4567 struct dmar_atsr_unit *atsru;
4568 struct acpi_dmar_atsr *atsr;
4569 struct acpi_dmar_reserved_memory *rmrr;
4570
b608fe35 4571 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4572 return 0;
4573
4574 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4575 rmrr = container_of(rmrru->hdr,
4576 struct acpi_dmar_reserved_memory, header);
4577 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4578 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4579 ((void *)rmrr) + rmrr->header.length,
4580 rmrr->segment, rmrru->devices,
4581 rmrru->devices_cnt);
e083ea5b 4582 if (ret < 0)
59ce0515 4583 return ret;
e6a8c9b3 4584 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4585 dmar_remove_dev_scope(info, rmrr->segment,
4586 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4587 }
4588 }
4589
4590 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4591 if (atsru->include_all)
4592 continue;
4593
4594 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4595 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4596 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4597 (void *)atsr + atsr->header.length,
4598 atsr->segment, atsru->devices,
4599 atsru->devices_cnt);
4600 if (ret > 0)
4601 break;
e083ea5b 4602 else if (ret < 0)
59ce0515 4603 return ret;
e6a8c9b3 4604 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4605 if (dmar_remove_dev_scope(info, atsr->segment,
4606 atsru->devices, atsru->devices_cnt))
4607 break;
4608 }
4609 }
4610
4611 return 0;
4612}
4613
75f05569
JL
4614static int intel_iommu_memory_notifier(struct notifier_block *nb,
4615 unsigned long val, void *v)
4616{
4617 struct memory_notify *mhp = v;
e70b081c
TM
4618 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4619 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4620 mhp->nr_pages - 1);
75f05569
JL
4621
4622 switch (val) {
4623 case MEM_GOING_ONLINE:
e70b081c
TM
4624 if (iommu_domain_identity_map(si_domain,
4625 start_vpfn, last_vpfn)) {
4626 pr_warn("Failed to build identity map for [%lx-%lx]\n",
4627 start_vpfn, last_vpfn);
75f05569
JL
4628 return NOTIFY_BAD;
4629 }
4630 break;
4631
4632 case MEM_OFFLINE:
4633 case MEM_CANCEL_ONLINE:
e70b081c 4634 {
75f05569
JL
4635 struct dmar_drhd_unit *drhd;
4636 struct intel_iommu *iommu;
ea8ea460 4637 struct page *freelist;
75f05569 4638
e70b081c
TM
4639 freelist = domain_unmap(si_domain,
4640 start_vpfn, last_vpfn);
ea8ea460 4641
75f05569
JL
4642 rcu_read_lock();
4643 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4644 iommu_flush_iotlb_psi(iommu, si_domain,
e70b081c 4645 start_vpfn, mhp->nr_pages,
ea8ea460 4646 !freelist, 0);
75f05569 4647 rcu_read_unlock();
ea8ea460 4648 dma_free_pagelist(freelist);
75f05569
JL
4649 }
4650 break;
4651 }
4652
4653 return NOTIFY_OK;
4654}
4655
4656static struct notifier_block intel_iommu_memory_nb = {
4657 .notifier_call = intel_iommu_memory_notifier,
4658 .priority = 0
4659};
4660
22e2f9fa
OP
4661static void free_all_cpu_cached_iovas(unsigned int cpu)
4662{
4663 int i;
4664
4665 for (i = 0; i < g_num_of_iommus; i++) {
4666 struct intel_iommu *iommu = g_iommus[i];
4667 struct dmar_domain *domain;
0caa7616 4668 int did;
22e2f9fa
OP
4669
4670 if (!iommu)
4671 continue;
4672
3bd4f911 4673 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4674 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa 4675
e70b081c 4676 if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
22e2f9fa 4677 continue;
e70b081c 4678
22e2f9fa
OP
4679 free_cpu_cached_iovas(cpu, &domain->iovad);
4680 }
4681 }
4682}
4683
21647615 4684static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4685{
21647615 4686 free_all_cpu_cached_iovas(cpu);
21647615 4687 return 0;
aa473240
OP
4688}
4689
161b28aa
JR
4690static void intel_disable_iommus(void)
4691{
4692 struct intel_iommu *iommu = NULL;
4693 struct dmar_drhd_unit *drhd;
4694
4695 for_each_iommu(iommu, drhd)
4696 iommu_disable_translation(iommu);
4697}
4698
6c3a44ed
DD
4699void intel_iommu_shutdown(void)
4700{
4701 struct dmar_drhd_unit *drhd;
4702 struct intel_iommu *iommu = NULL;
4703
4704 if (no_iommu || dmar_disabled)
4705 return;
4706
4707 down_write(&dmar_global_lock);
4708
4709 /* Disable PMRs explicitly here. */
4710 for_each_iommu(iommu, drhd)
4711 iommu_disable_protect_mem_regions(iommu);
4712
4713 /* Make sure the IOMMUs are switched off */
4714 intel_disable_iommus();
4715
4716 up_write(&dmar_global_lock);
4717}
4718
a7fdb6e6
JR
4719static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4720{
2926a2aa
JR
4721 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4722
4723 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4724}
4725
a5459cfe
AW
4726static ssize_t intel_iommu_show_version(struct device *dev,
4727 struct device_attribute *attr,
4728 char *buf)
4729{
a7fdb6e6 4730 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4731 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4732 return sprintf(buf, "%d:%d\n",
4733 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4734}
4735static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4736
4737static ssize_t intel_iommu_show_address(struct device *dev,
4738 struct device_attribute *attr,
4739 char *buf)
4740{
a7fdb6e6 4741 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4742 return sprintf(buf, "%llx\n", iommu->reg_phys);
4743}
4744static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4745
4746static ssize_t intel_iommu_show_cap(struct device *dev,
4747 struct device_attribute *attr,
4748 char *buf)
4749{
a7fdb6e6 4750 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4751 return sprintf(buf, "%llx\n", iommu->cap);
4752}
4753static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4754
4755static ssize_t intel_iommu_show_ecap(struct device *dev,
4756 struct device_attribute *attr,
4757 char *buf)
4758{
a7fdb6e6 4759 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4760 return sprintf(buf, "%llx\n", iommu->ecap);
4761}
4762static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4763
2238c082
AW
4764static ssize_t intel_iommu_show_ndoms(struct device *dev,
4765 struct device_attribute *attr,
4766 char *buf)
4767{
a7fdb6e6 4768 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4769 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4770}
4771static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4772
4773static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4774 struct device_attribute *attr,
4775 char *buf)
4776{
a7fdb6e6 4777 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4778 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4779 cap_ndoms(iommu->cap)));
4780}
4781static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4782
a5459cfe
AW
4783static struct attribute *intel_iommu_attrs[] = {
4784 &dev_attr_version.attr,
4785 &dev_attr_address.attr,
4786 &dev_attr_cap.attr,
4787 &dev_attr_ecap.attr,
2238c082
AW
4788 &dev_attr_domains_supported.attr,
4789 &dev_attr_domains_used.attr,
a5459cfe
AW
4790 NULL,
4791};
4792
4793static struct attribute_group intel_iommu_group = {
4794 .name = "intel-iommu",
4795 .attrs = intel_iommu_attrs,
4796};
4797
4798const struct attribute_group *intel_iommu_groups[] = {
4799 &intel_iommu_group,
4800 NULL,
4801};
4802
99b50be9 4803static inline bool has_external_pci(void)
89a6079d
LB
4804{
4805 struct pci_dev *pdev = NULL;
89a6079d 4806
c5a5dc4c 4807 for_each_pci_dev(pdev)
99b50be9 4808 if (pdev->external_facing)
c5a5dc4c 4809 return true;
89a6079d 4810
c5a5dc4c
LB
4811 return false;
4812}
89a6079d 4813
c5a5dc4c
LB
4814static int __init platform_optin_force_iommu(void)
4815{
99b50be9 4816 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
89a6079d
LB
4817 return 0;
4818
4819 if (no_iommu || dmar_disabled)
4820 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4821
4822 /*
4823 * If Intel-IOMMU is disabled by default, we will apply identity
4824 * map for all devices except those marked as being untrusted.
4825 */
4826 if (dmar_disabled)
b89b6605 4827 iommu_set_default_passthrough(false);
89a6079d
LB
4828
4829 dmar_disabled = 0;
89a6079d
LB
4830 no_iommu = 0;
4831
4832 return 1;
4833}
4834
fa212a97
LB
4835static int __init probe_acpi_namespace_devices(void)
4836{
4837 struct dmar_drhd_unit *drhd;
af88ec39
QC
4838 /* To avoid a -Wunused-but-set-variable warning. */
4839 struct intel_iommu *iommu __maybe_unused;
fa212a97
LB
4840 struct device *dev;
4841 int i, ret = 0;
4842
4843 for_each_active_iommu(iommu, drhd) {
4844 for_each_active_dev_scope(drhd->devices,
4845 drhd->devices_cnt, i, dev) {
4846 struct acpi_device_physical_node *pn;
4847 struct iommu_group *group;
4848 struct acpi_device *adev;
4849
4850 if (dev->bus != &acpi_bus_type)
4851 continue;
4852
4853 adev = to_acpi_device(dev);
4854 mutex_lock(&adev->physical_node_lock);
4855 list_for_each_entry(pn,
4856 &adev->physical_node_list, node) {
4857 group = iommu_group_get(pn->dev);
4858 if (group) {
4859 iommu_group_put(group);
4860 continue;
4861 }
4862
4863 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4864 ret = iommu_probe_device(pn->dev);
4865 if (ret)
4866 break;
4867 }
4868 mutex_unlock(&adev->physical_node_lock);
4869
4870 if (ret)
4871 return ret;
4872 }
4873 }
4874
4875 return 0;
4876}
4877
ba395927
KA
4878int __init intel_iommu_init(void)
4879{
9bdc531e 4880 int ret = -ENODEV;
3a93c841 4881 struct dmar_drhd_unit *drhd;
7c919779 4882 struct intel_iommu *iommu;
ba395927 4883
89a6079d
LB
4884 /*
4885 * Intel IOMMU is required for a TXT/tboot launch or platform
4886 * opt in, so enforce that.
4887 */
4d213e76
ZD
4888 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4889 platform_optin_force_iommu();
a59b50e9 4890
3a5670e8
JL
4891 if (iommu_init_mempool()) {
4892 if (force_on)
4893 panic("tboot: Failed to initialize iommu memory\n");
4894 return -ENOMEM;
4895 }
4896
4897 down_write(&dmar_global_lock);
a59b50e9
JC
4898 if (dmar_table_init()) {
4899 if (force_on)
4900 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4901 goto out_free_dmar;
a59b50e9 4902 }
ba395927 4903
c2c7286a 4904 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4905 if (force_on)
4906 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4907 goto out_free_dmar;
a59b50e9 4908 }
1886e8a9 4909
ec154bf5
JR
4910 up_write(&dmar_global_lock);
4911
4912 /*
4913 * The bus notifier takes the dmar_global_lock, so lockdep will
4914 * complain later when we register it under the lock.
4915 */
4916 dmar_register_bus_notifier();
4917
4918 down_write(&dmar_global_lock);
4919
1da8347d
MD
4920 if (!no_iommu)
4921 intel_iommu_debugfs_init();
4922
161b28aa 4923 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4924 /*
4925 * We exit the function here to ensure IOMMU's remapping and
4926 * mempool aren't setup, which means that the IOMMU's PMRs
4927 * won't be disabled via the call to init_dmars(). So disable
4928 * it explicitly here. The PMRs were setup by tboot prior to
4929 * calling SENTER, but the kernel is expected to reset/tear
4930 * down the PMRs.
4931 */
4932 if (intel_iommu_tboot_noforce) {
4933 for_each_iommu(iommu, drhd)
4934 iommu_disable_protect_mem_regions(iommu);
4935 }
4936
161b28aa
JR
4937 /*
4938 * Make sure the IOMMUs are switched off, even when we
4939 * boot into a kexec kernel and the previous kernel left
4940 * them enabled
4941 */
4942 intel_disable_iommus();
9bdc531e 4943 goto out_free_dmar;
161b28aa 4944 }
2ae21010 4945
318fe7df 4946 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4947 pr_info("No RMRR found\n");
318fe7df
SS
4948
4949 if (list_empty(&dmar_atsr_units))
9f10e5bf 4950 pr_info("No ATSR found\n");
318fe7df 4951
51a63e67
JC
4952 if (dmar_init_reserved_ranges()) {
4953 if (force_on)
4954 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4955 goto out_free_reserved_range;
51a63e67 4956 }
ba395927 4957
cf1ec453
LB
4958 if (dmar_map_gfx)
4959 intel_iommu_gfx_mapped = 1;
4960
ba395927
KA
4961 init_no_remapping_devices();
4962
b779260b 4963 ret = init_dmars();
ba395927 4964 if (ret) {
a59b50e9
JC
4965 if (force_on)
4966 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4967 pr_err("Initialization failed\n");
9bdc531e 4968 goto out_free_reserved_range;
ba395927 4969 }
3a5670e8 4970 up_write(&dmar_global_lock);
ba395927 4971
134fac3f 4972 init_iommu_pm_ops();
a8bcbb0d 4973
2d48ea0e 4974 down_read(&dmar_global_lock);
39ab9555
JR
4975 for_each_active_iommu(iommu, drhd) {
4976 iommu_device_sysfs_add(&iommu->iommu, NULL,
4977 intel_iommu_groups,
4978 "%s", iommu->name);
4979 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4980 iommu_device_register(&iommu->iommu);
4981 }
2d48ea0e 4982 up_read(&dmar_global_lock);
a5459cfe 4983
4236d97d 4984 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
75f05569
JL
4985 if (si_domain && !hw_pass_through)
4986 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4987 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4988 intel_iommu_cpu_dead);
d8190dc6 4989
d5692d4a 4990 down_read(&dmar_global_lock);
fa212a97
LB
4991 if (probe_acpi_namespace_devices())
4992 pr_warn("ACPI name space devices didn't probe correctly\n");
4993
d8190dc6
LB
4994 /* Finally, we enable the DMA remapping hardware. */
4995 for_each_iommu(iommu, drhd) {
6a8c6748 4996 if (!drhd->ignored && !translation_pre_enabled(iommu))
d8190dc6
LB
4997 iommu_enable_translation(iommu);
4998
4999 iommu_disable_protect_mem_regions(iommu);
5000 }
2d48ea0e
QC
5001 up_read(&dmar_global_lock);
5002
d8190dc6
LB
5003 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5004
8bc1f85c
ED
5005 intel_iommu_enabled = 1;
5006
ba395927 5007 return 0;
9bdc531e
JL
5008
5009out_free_reserved_range:
5010 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
5011out_free_dmar:
5012 intel_iommu_free_dmars();
3a5670e8
JL
5013 up_write(&dmar_global_lock);
5014 iommu_exit_mempool();
9bdc531e 5015 return ret;
ba395927 5016}
e820482c 5017
0ce4a85f
LB
5018static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5019{
5020 struct intel_iommu *iommu = opaque;
5021
5022 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5023 return 0;
5024}
5025
5026/*
5027 * NB - intel-iommu lacks any sort of reference counting for the users of
5028 * dependent devices. If multiple endpoints have intersecting dependent
5029 * devices, unbinding the driver from any one of them will possibly leave
5030 * the others unable to operate.
5031 */
5032static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5033{
5034 if (!iommu || !dev || !dev_is_pci(dev))
5035 return;
5036
5037 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5038}
5039
127c7615 5040static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 5041{
942067f1 5042 struct dmar_domain *domain;
c7151a8d
WH
5043 struct intel_iommu *iommu;
5044 unsigned long flags;
c7151a8d 5045
55d94043
JR
5046 assert_spin_locked(&device_domain_lock);
5047
127c7615 5048 if (WARN_ON(!info))
c7151a8d
WH
5049 return;
5050
127c7615 5051 iommu = info->iommu;
942067f1 5052 domain = info->domain;
c7151a8d 5053
127c7615 5054 if (info->dev) {
ef848b7e
LB
5055 if (dev_is_pci(info->dev) && sm_supported(iommu))
5056 intel_pasid_tear_down_entry(iommu, info->dev,
37e91bd4 5057 PASID_RID2PASID, false);
ef848b7e 5058
127c7615 5059 iommu_disable_dev_iotlb(info);
8038bdb8
JD
5060 if (!dev_is_real_dma_subdevice(info->dev))
5061 domain_context_clear(iommu, info->dev);
a7fc93fe 5062 intel_pasid_free_table(info->dev);
127c7615 5063 }
c7151a8d 5064
b608ac3b 5065 unlink_domain_info(info);
c7151a8d 5066
d160aca5 5067 spin_lock_irqsave(&iommu->lock, flags);
942067f1 5068 domain_detach_iommu(domain, iommu);
d160aca5 5069 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 5070
127c7615 5071 free_devinfo_mem(info);
c7151a8d 5072}
c7151a8d 5073
71753239 5074static void dmar_remove_one_dev_info(struct device *dev)
55d94043 5075{
127c7615 5076 struct device_domain_info *info;
55d94043 5077 unsigned long flags;
3e7abe25 5078
55d94043 5079 spin_lock_irqsave(&device_domain_lock, flags);
e85bb99b
LB
5080 info = get_domain_info(dev);
5081 if (info)
ae23bfb6 5082 __dmar_remove_one_dev_info(info);
55d94043 5083 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5084}
5085
301e7ee1
JR
5086static int md_domain_init(struct dmar_domain *domain, int guest_width)
5087{
5088 int adjust_width;
5089
301e7ee1
JR
5090 /* calculate AGAW */
5091 domain->gaw = guest_width;
5092 adjust_width = guestwidth_to_adjustwidth(guest_width);
5093 domain->agaw = width_to_agaw(adjust_width);
5094
5095 domain->iommu_coherency = 0;
5096 domain->iommu_snooping = 0;
5097 domain->iommu_superpage = 0;
5098 domain->max_addr = 0;
5099
5100 /* always allocate the top pgd */
5101 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5102 if (!domain->pgd)
5103 return -ENOMEM;
5104 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5105 return 0;
5106}
5107
e70b081c
TM
5108static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
5109{
5110 init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5111 copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
5112
5113 if (!intel_iommu_strict &&
5114 init_iova_flush_queue(&dmar_domain->iovad,
5115 iommu_flush_iova, iova_entry_free))
5116 pr_info("iova flush queue initialization failed\n");
5117}
5118
00a77deb 5119static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5120{
5d450806 5121 struct dmar_domain *dmar_domain;
00a77deb
JR
5122 struct iommu_domain *domain;
5123
4de354ec 5124 switch (type) {
fa954e68 5125 case IOMMU_DOMAIN_DMA:
4de354ec 5126 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 5127 dmar_domain = alloc_domain(0);
4de354ec
LB
5128 if (!dmar_domain) {
5129 pr_err("Can't allocate dmar_domain\n");
5130 return NULL;
5131 }
301e7ee1 5132 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4de354ec
LB
5133 pr_err("Domain initialization failed\n");
5134 domain_exit(dmar_domain);
5135 return NULL;
5136 }
fa954e68 5137
e70b081c
TM
5138 if (type == IOMMU_DOMAIN_DMA)
5139 intel_init_iova_domain(dmar_domain);
fa954e68 5140
4de354ec
LB
5141 domain = &dmar_domain->domain;
5142 domain->geometry.aperture_start = 0;
5143 domain->geometry.aperture_end =
5144 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5145 domain->geometry.force_aperture = true;
5146
5147 return domain;
5148 case IOMMU_DOMAIN_IDENTITY:
5149 return &si_domain->domain;
5150 default:
00a77deb 5151 return NULL;
38717946 5152 }
8a0e715b 5153
4de354ec 5154 return NULL;
38717946 5155}
38717946 5156
00a77deb 5157static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5158{
4de354ec
LB
5159 if (domain != &si_domain->domain)
5160 domain_exit(to_dmar_domain(domain));
38717946 5161}
38717946 5162
67b8e02b
LB
5163/*
5164 * Check whether a @domain could be attached to the @dev through the
5165 * aux-domain attach/detach APIs.
5166 */
5167static inline bool
5168is_aux_domain(struct device *dev, struct iommu_domain *domain)
5169{
e85bb99b 5170 struct device_domain_info *info = get_domain_info(dev);
67b8e02b
LB
5171
5172 return info && info->auxd_enabled &&
5173 domain->type == IOMMU_DOMAIN_UNMANAGED;
5174}
5175
5176static void auxiliary_link_device(struct dmar_domain *domain,
5177 struct device *dev)
5178{
e85bb99b 5179 struct device_domain_info *info = get_domain_info(dev);
67b8e02b
LB
5180
5181 assert_spin_locked(&device_domain_lock);
5182 if (WARN_ON(!info))
5183 return;
5184
5185 domain->auxd_refcnt++;
5186 list_add(&domain->auxd, &info->auxiliary_domains);
5187}
5188
5189static void auxiliary_unlink_device(struct dmar_domain *domain,
5190 struct device *dev)
5191{
e85bb99b 5192 struct device_domain_info *info = get_domain_info(dev);
67b8e02b
LB
5193
5194 assert_spin_locked(&device_domain_lock);
5195 if (WARN_ON(!info))
5196 return;
5197
5198 list_del(&domain->auxd);
5199 domain->auxd_refcnt--;
5200
5201 if (!domain->auxd_refcnt && domain->default_pasid > 0)
59a62337 5202 ioasid_free(domain->default_pasid);
67b8e02b
LB
5203}
5204
5205static int aux_domain_add_dev(struct dmar_domain *domain,
5206 struct device *dev)
5207{
5208 int ret;
67b8e02b
LB
5209 unsigned long flags;
5210 struct intel_iommu *iommu;
5211
dd6692f1 5212 iommu = device_to_iommu(dev, NULL, NULL);
67b8e02b
LB
5213 if (!iommu)
5214 return -ENODEV;
5215
5216 if (domain->default_pasid <= 0) {
c7b6bac9 5217 u32 pasid;
67b8e02b 5218
59a62337
JP
5219 /* No private data needed for the default pasid */
5220 pasid = ioasid_alloc(NULL, PASID_MIN,
5221 pci_max_pasids(to_pci_dev(dev)) - 1,
5222 NULL);
5223 if (pasid == INVALID_IOASID) {
67b8e02b
LB
5224 pr_err("Can't allocate default pasid\n");
5225 return -ENODEV;
5226 }
5227 domain->default_pasid = pasid;
5228 }
5229
5230 spin_lock_irqsave(&device_domain_lock, flags);
5231 /*
5232 * iommu->lock must be held to attach domain to iommu and setup the
5233 * pasid entry for second level translation.
5234 */
5235 spin_lock(&iommu->lock);
5236 ret = domain_attach_iommu(domain, iommu);
5237 if (ret)
5238 goto attach_failed;
5239
5240 /* Setup the PASID entry for mediated devices: */
ddf09b6d
LB
5241 if (domain_use_first_level(domain))
5242 ret = domain_setup_first_level(iommu, domain, dev,
5243 domain->default_pasid);
5244 else
5245 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5246 domain->default_pasid);
67b8e02b
LB
5247 if (ret)
5248 goto table_failed;
5249 spin_unlock(&iommu->lock);
5250
5251 auxiliary_link_device(domain, dev);
5252
5253 spin_unlock_irqrestore(&device_domain_lock, flags);
5254
5255 return 0;
5256
5257table_failed:
5258 domain_detach_iommu(domain, iommu);
5259attach_failed:
5260 spin_unlock(&iommu->lock);
5261 spin_unlock_irqrestore(&device_domain_lock, flags);
5262 if (!domain->auxd_refcnt && domain->default_pasid > 0)
59a62337 5263 ioasid_free(domain->default_pasid);
67b8e02b
LB
5264
5265 return ret;
5266}
5267
5268static void aux_domain_remove_dev(struct dmar_domain *domain,
5269 struct device *dev)
5270{
5271 struct device_domain_info *info;
5272 struct intel_iommu *iommu;
5273 unsigned long flags;
5274
5275 if (!is_aux_domain(dev, &domain->domain))
5276 return;
5277
5278 spin_lock_irqsave(&device_domain_lock, flags);
e85bb99b 5279 info = get_domain_info(dev);
67b8e02b
LB
5280 iommu = info->iommu;
5281
5282 auxiliary_unlink_device(domain, dev);
5283
5284 spin_lock(&iommu->lock);
37e91bd4 5285 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
67b8e02b
LB
5286 domain_detach_iommu(domain, iommu);
5287 spin_unlock(&iommu->lock);
5288
5289 spin_unlock_irqrestore(&device_domain_lock, flags);
5290}
5291
8cc3759a
LB
5292static int prepare_domain_attach_device(struct iommu_domain *domain,
5293 struct device *dev)
38717946 5294{
00a77deb 5295 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5296 struct intel_iommu *iommu;
5297 int addr_width;
faa3d6f5 5298
dd6692f1 5299 iommu = device_to_iommu(dev, NULL, NULL);
fe40f1e0
WH
5300 if (!iommu)
5301 return -ENODEV;
5302
5303 /* check if this iommu agaw is sufficient for max mapped address */
5304 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5305 if (addr_width > cap_mgaw(iommu->cap))
5306 addr_width = cap_mgaw(iommu->cap);
5307
5308 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5309 dev_err(dev, "%s: iommu width (%d) is not "
5310 "sufficient for the mapped address (%llx)\n",
5311 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5312 return -EFAULT;
5313 }
a99c47a2
TL
5314 dmar_domain->gaw = addr_width;
5315
5316 /*
5317 * Knock out extra levels of page tables if necessary
5318 */
5319 while (iommu->agaw < dmar_domain->agaw) {
5320 struct dma_pte *pte;
5321
5322 pte = dmar_domain->pgd;
5323 if (dma_pte_present(pte)) {
25cbff16
SY
5324 dmar_domain->pgd = (struct dma_pte *)
5325 phys_to_virt(dma_pte_addr(pte));
7a661013 5326 free_pgtable_page(pte);
a99c47a2
TL
5327 }
5328 dmar_domain->agaw--;
5329 }
fe40f1e0 5330
8cc3759a
LB
5331 return 0;
5332}
5333
5334static int intel_iommu_attach_device(struct iommu_domain *domain,
5335 struct device *dev)
5336{
5337 int ret;
5338
5679582c
LB
5339 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5340 device_is_rmrr_locked(dev)) {
8cc3759a
LB
5341 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5342 return -EPERM;
5343 }
5344
67b8e02b
LB
5345 if (is_aux_domain(dev, domain))
5346 return -EPERM;
5347
8cc3759a
LB
5348 /* normally dev is not mapped */
5349 if (unlikely(domain_context_mapped(dev))) {
5350 struct dmar_domain *old_domain;
5351
5352 old_domain = find_domain(dev);
fa954e68 5353 if (old_domain)
8cc3759a 5354 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5355 }
5356
5357 ret = prepare_domain_attach_device(domain, dev);
5358 if (ret)
5359 return ret;
5360
5361 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5362}
38717946 5363
67b8e02b
LB
5364static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5365 struct device *dev)
5366{
5367 int ret;
5368
5369 if (!is_aux_domain(dev, domain))
5370 return -EPERM;
5371
5372 ret = prepare_domain_attach_device(domain, dev);
5373 if (ret)
5374 return ret;
5375
5376 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5377}
5378
4c5478c9
JR
5379static void intel_iommu_detach_device(struct iommu_domain *domain,
5380 struct device *dev)
38717946 5381{
71753239 5382 dmar_remove_one_dev_info(dev);
faa3d6f5 5383}
c7151a8d 5384
67b8e02b
LB
5385static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5386 struct device *dev)
5387{
5388 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5389}
5390
6ee1b77b
JP
5391/*
5392 * 2D array for converting and sanitizing IOMMU generic TLB granularity to
5393 * VT-d granularity. Invalidation is typically included in the unmap operation
5394 * as a result of DMA or VFIO unmap. However, for assigned devices guest
5395 * owns the first level page tables. Invalidations of translation caches in the
5396 * guest are trapped and passed down to the host.
5397 *
5398 * vIOMMU in the guest will only expose first level page tables, therefore
5399 * we do not support IOTLB granularity for request without PASID (second level).
5400 *
5401 * For example, to find the VT-d granularity encoding for IOTLB
5402 * type and page selective granularity within PASID:
5403 * X: indexed by iommu cache type
5404 * Y: indexed by enum iommu_inv_granularity
5405 * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
5406 */
5407
7809c4d5 5408static const int
6ee1b77b
JP
5409inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
5410 /*
5411 * PASID based IOTLB invalidation: PASID selective (per PASID),
5412 * page selective (address granularity)
5413 */
5414 {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
5415 /* PASID based dev TLBs */
5416 {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
5417 /* PASID cache */
5418 {-EINVAL, -EINVAL, -EINVAL}
5419};
5420
5421static inline int to_vtd_granularity(int type, int granu)
5422{
5423 return inv_type_granu_table[type][granu];
5424}
5425
5426static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
5427{
5428 u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
5429
5430 /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
5431 * IOMMU cache invalidate API passes granu_size in bytes, and number of
5432 * granu size in contiguous memory.
5433 */
5434 return order_base_2(nr_pages);
5435}
5436
5437#ifdef CONFIG_INTEL_IOMMU_SVM
5438static int
5439intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
5440 struct iommu_cache_invalidate_info *inv_info)
5441{
5442 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5443 struct device_domain_info *info;
5444 struct intel_iommu *iommu;
5445 unsigned long flags;
5446 int cache_type;
5447 u8 bus, devfn;
5448 u16 did, sid;
5449 int ret = 0;
5450 u64 size = 0;
5451
6278eecb 5452 if (!inv_info || !dmar_domain)
6ee1b77b
JP
5453 return -EINVAL;
5454
5455 if (!dev || !dev_is_pci(dev))
5456 return -ENODEV;
5457
5458 iommu = device_to_iommu(dev, &bus, &devfn);
5459 if (!iommu)
5460 return -ENODEV;
5461
5462 if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
5463 return -EINVAL;
5464
5465 spin_lock_irqsave(&device_domain_lock, flags);
5466 spin_lock(&iommu->lock);
e85bb99b 5467 info = get_domain_info(dev);
6ee1b77b
JP
5468 if (!info) {
5469 ret = -EINVAL;
5470 goto out_unlock;
5471 }
5472 did = dmar_domain->iommu_did[iommu->seq_id];
5473 sid = PCI_DEVID(bus, devfn);
5474
5475 /* Size is only valid in address selective invalidation */
0fa1a15f 5476 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
8d3bb3b8
JP
5477 size = to_vtd_size(inv_info->granu.addr_info.granule_size,
5478 inv_info->granu.addr_info.nb_granules);
6ee1b77b
JP
5479
5480 for_each_set_bit(cache_type,
5481 (unsigned long *)&inv_info->cache,
5482 IOMMU_CACHE_INV_TYPE_NR) {
5483 int granu = 0;
5484 u64 pasid = 0;
0fa1a15f 5485 u64 addr = 0;
6ee1b77b
JP
5486
5487 granu = to_vtd_granularity(cache_type, inv_info->granularity);
5488 if (granu == -EINVAL) {
5489 pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
5490 cache_type, inv_info->granularity);
5491 break;
5492 }
5493
5494 /*
5495 * PASID is stored in different locations based on the
5496 * granularity.
5497 */
5498 if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
8d3bb3b8
JP
5499 (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
5500 pasid = inv_info->granu.pasid_info.pasid;
6ee1b77b 5501 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
8d3bb3b8
JP
5502 (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
5503 pasid = inv_info->granu.addr_info.pasid;
6ee1b77b
JP
5504
5505 switch (BIT(cache_type)) {
5506 case IOMMU_CACHE_INV_TYPE_IOTLB:
1ff00279 5507 /* HW will ignore LSB bits based on address mask */
6ee1b77b
JP
5508 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5509 size &&
8d3bb3b8 5510 (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
1ff00279 5511 pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
8d3bb3b8 5512 inv_info->granu.addr_info.addr, size);
6ee1b77b
JP
5513 }
5514
5515 /*
5516 * If granu is PASID-selective, address is ignored.
5517 * We use npages = -1 to indicate that.
5518 */
5519 qi_flush_piotlb(iommu, did, pasid,
8d3bb3b8 5520 mm_to_dma_pfn(inv_info->granu.addr_info.addr),
6ee1b77b 5521 (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
8d3bb3b8 5522 inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
6ee1b77b 5523
0fa1a15f
LY
5524 if (!info->ats_enabled)
5525 break;
6ee1b77b
JP
5526 /*
5527 * Always flush device IOTLB if ATS is enabled. vIOMMU
5528 * in the guest may assume IOTLB flush is inclusive,
5529 * which is more efficient.
5530 */
0fa1a15f 5531 fallthrough;
6ee1b77b 5532 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
0fa1a15f
LY
5533 /*
5534 * PASID based device TLB invalidation does not support
5535 * IOMMU_INV_GRANU_PASID granularity but only supports
5536 * IOMMU_INV_GRANU_ADDR.
5537 * The equivalent of that is we set the size to be the
5538 * entire range of 64 bit. User only provides PASID info
5539 * without address info. So we set addr to 0.
5540 */
5541 if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
5542 size = 64 - VTD_PAGE_SHIFT;
5543 addr = 0;
5544 } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
8d3bb3b8 5545 addr = inv_info->granu.addr_info.addr;
0fa1a15f
LY
5546 }
5547
6ee1b77b
JP
5548 if (info->ats_enabled)
5549 qi_flush_dev_iotlb_pasid(iommu, sid,
5550 info->pfsid, pasid,
0fa1a15f 5551 info->ats_qdep, addr,
78df6c86 5552 size);
6ee1b77b
JP
5553 else
5554 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5555 break;
5556 default:
5557 dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5558 cache_type);
5559 ret = -EINVAL;
5560 }
5561 }
5562out_unlock:
5563 spin_unlock(&iommu->lock);
5564 spin_unlock_irqrestore(&device_domain_lock, flags);
5565
5566 return ret;
5567}
5568#endif
5569
b146a1c9
JR
5570static int intel_iommu_map(struct iommu_domain *domain,
5571 unsigned long iova, phys_addr_t hpa,
781ca2de 5572 size_t size, int iommu_prot, gfp_t gfp)
faa3d6f5 5573{
00a77deb 5574 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5575 u64 max_addr;
dde57a21 5576 int prot = 0;
faa3d6f5 5577 int ret;
fe40f1e0 5578
dde57a21
JR
5579 if (iommu_prot & IOMMU_READ)
5580 prot |= DMA_PTE_READ;
5581 if (iommu_prot & IOMMU_WRITE)
5582 prot |= DMA_PTE_WRITE;
9cf06697
SY
5583 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5584 prot |= DMA_PTE_SNP;
dde57a21 5585
163cc52c 5586 max_addr = iova + size;
dde57a21 5587 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5588 u64 end;
5589
5590 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5591 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5592 if (end < max_addr) {
9f10e5bf 5593 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5594 "sufficient for the mapped address (%llx)\n",
8954da1f 5595 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5596 return -EFAULT;
5597 }
dde57a21 5598 dmar_domain->max_addr = max_addr;
fe40f1e0 5599 }
ad051221
DW
5600 /* Round up size to next multiple of PAGE_SIZE, if it and
5601 the low bits of hpa would take us onto the next page */
88cb6a74 5602 size = aligned_nrpages(hpa, size);
ad051221
DW
5603 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5604 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5605 return ret;
38717946 5606}
38717946 5607
5009065d 5608static size_t intel_iommu_unmap(struct iommu_domain *domain,
56f8af5e
WD
5609 unsigned long iova, size_t size,
5610 struct iommu_iotlb_gather *gather)
38717946 5611{
00a77deb 5612 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5613 struct page *freelist = NULL;
ea8ea460
DW
5614 unsigned long start_pfn, last_pfn;
5615 unsigned int npages;
42e8c186 5616 int iommu_id, level = 0;
5cf0a76f
DW
5617
5618 /* Cope with horrid API which requires us to unmap more than the
5619 size argument if it happens to be a large-page mapping. */
dc02e46e 5620 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5621
5622 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5623 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5624
ea8ea460
DW
5625 start_pfn = iova >> VTD_PAGE_SHIFT;
5626 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5627
5628 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5629
5630 npages = last_pfn - start_pfn + 1;
5631
f746a025 5632 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5633 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5634 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5635
5636 dma_free_pagelist(freelist);
fe40f1e0 5637
163cc52c
DW
5638 if (dmar_domain->max_addr == iova + size)
5639 dmar_domain->max_addr = iova;
b146a1c9 5640
5cf0a76f 5641 return size;
38717946 5642}
38717946 5643
d14d6577 5644static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5645 dma_addr_t iova)
38717946 5646{
00a77deb 5647 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5648 struct dma_pte *pte;
5cf0a76f 5649 int level = 0;
faa3d6f5 5650 u64 phys = 0;
38717946 5651
5cf0a76f 5652 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
77a1bce8
YH
5653 if (pte && dma_pte_present(pte))
5654 phys = dma_pte_addr(pte) +
5655 (iova & (BIT_MASK(level_to_offset_bits(level) +
5656 VTD_PAGE_SHIFT) - 1));
38717946 5657
faa3d6f5 5658 return phys;
38717946 5659}
a8bcbb0d 5660
95587a75
LB
5661static inline bool scalable_mode_support(void)
5662{
5663 struct dmar_drhd_unit *drhd;
5664 struct intel_iommu *iommu;
5665 bool ret = true;
5666
5667 rcu_read_lock();
5668 for_each_active_iommu(iommu, drhd) {
5669 if (!sm_supported(iommu)) {
5670 ret = false;
5671 break;
5672 }
5673 }
5674 rcu_read_unlock();
5675
5676 return ret;
5677}
5678
5679static inline bool iommu_pasid_support(void)
5680{
5681 struct dmar_drhd_unit *drhd;
5682 struct intel_iommu *iommu;
5683 bool ret = true;
5684
5685 rcu_read_lock();
5686 for_each_active_iommu(iommu, drhd) {
5687 if (!pasid_supported(iommu)) {
5688 ret = false;
5689 break;
5690 }
5691 }
5692 rcu_read_unlock();
5693
5694 return ret;
5695}
5696
2cd1311a
LB
5697static inline bool nested_mode_support(void)
5698{
5699 struct dmar_drhd_unit *drhd;
5700 struct intel_iommu *iommu;
5701 bool ret = true;
5702
5703 rcu_read_lock();
5704 for_each_active_iommu(iommu, drhd) {
5705 if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
5706 ret = false;
5707 break;
5708 }
5709 }
5710 rcu_read_unlock();
5711
5712 return ret;
5713}
5714
5d587b8d 5715static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5716{
dbb9fd86 5717 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5718 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5719 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5720 return irq_remapping_enabled == 1;
dbb9fd86 5721
5d587b8d 5722 return false;
dbb9fd86
SY
5723}
5724
e5d1841f 5725static struct iommu_device *intel_iommu_probe_device(struct device *dev)
abdfdde2 5726{
a5459cfe 5727 struct intel_iommu *iommu;
70ae6f0d 5728
dd6692f1 5729 iommu = device_to_iommu(dev, NULL, NULL);
a5459cfe 5730 if (!iommu)
e5d1841f 5731 return ERR_PTR(-ENODEV);
a4ff1fc2 5732
8af46c78 5733 if (translation_pre_enabled(iommu))
01b9d4e2 5734 dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
8af46c78 5735
e5d1841f 5736 return &iommu->iommu;
abdfdde2 5737}
70ae6f0d 5738
e5d1841f 5739static void intel_iommu_release_device(struct device *dev)
abdfdde2 5740{
a5459cfe 5741 struct intel_iommu *iommu;
a5459cfe 5742
dd6692f1 5743 iommu = device_to_iommu(dev, NULL, NULL);
a5459cfe
AW
5744 if (!iommu)
5745 return;
5746
458b7c8e
LB
5747 dmar_remove_one_dev_info(dev);
5748
6fc7020c
LB
5749 set_dma_ops(dev, NULL);
5750}
a5459cfe 5751
6fc7020c
LB
5752static void intel_iommu_probe_finalize(struct device *dev)
5753{
5754 struct iommu_domain *domain;
cfb94a37 5755
6fc7020c 5756 domain = iommu_get_domain_for_dev(dev);
cfb94a37 5757 if (device_needs_bounce(dev))
6fc7020c
LB
5758 set_dma_ops(dev, &bounce_dma_ops);
5759 else if (domain && domain->type == IOMMU_DOMAIN_DMA)
5760 set_dma_ops(dev, &intel_dma_ops);
5761 else
cfb94a37 5762 set_dma_ops(dev, NULL);
70ae6f0d
AW
5763}
5764
0659b8dc
EA
5765static void intel_iommu_get_resv_regions(struct device *device,
5766 struct list_head *head)
5767{
5f64ce54 5768 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
0659b8dc
EA
5769 struct iommu_resv_region *reg;
5770 struct dmar_rmrr_unit *rmrr;
5771 struct device *i_dev;
5772 int i;
5773
5f64ce54 5774 down_read(&dmar_global_lock);
0659b8dc
EA
5775 for_each_rmrr_units(rmrr) {
5776 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5777 i, i_dev) {
5f64ce54 5778 struct iommu_resv_region *resv;
1c5c59fb 5779 enum iommu_resv_type type;
5f64ce54
EA
5780 size_t length;
5781
3855ba2d
EA
5782 if (i_dev != device &&
5783 !is_downstream_to_pci_bridge(device, i_dev))
0659b8dc
EA
5784 continue;
5785
5f64ce54 5786 length = rmrr->end_address - rmrr->base_address + 1;
1c5c59fb
EA
5787
5788 type = device_rmrr_is_relaxable(device) ?
5789 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5790
5f64ce54 5791 resv = iommu_alloc_resv_region(rmrr->base_address,
1c5c59fb 5792 length, prot, type);
5f64ce54
EA
5793 if (!resv)
5794 break;
5795
5796 list_add_tail(&resv->list, head);
0659b8dc
EA
5797 }
5798 }
5f64ce54 5799 up_read(&dmar_global_lock);
0659b8dc 5800
d850c2ee
LB
5801#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5802 if (dev_is_pci(device)) {
5803 struct pci_dev *pdev = to_pci_dev(device);
5804
5805 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
cde9319e 5806 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
d8018a0e 5807 IOMMU_RESV_DIRECT_RELAXABLE);
d850c2ee
LB
5808 if (reg)
5809 list_add_tail(&reg->list, head);
5810 }
5811 }
5812#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5813
0659b8dc
EA
5814 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5815 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5816 0, IOMMU_RESV_MSI);
0659b8dc
EA
5817 if (!reg)
5818 return;
5819 list_add_tail(&reg->list, head);
5820}
5821
d7cbc0f3 5822int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5823{
5824 struct device_domain_info *info;
5825 struct context_entry *context;
5826 struct dmar_domain *domain;
5827 unsigned long flags;
5828 u64 ctx_lo;
5829 int ret;
5830
4ec066c7 5831 domain = find_domain(dev);
2f26e0a9
DW
5832 if (!domain)
5833 return -EINVAL;
5834
5835 spin_lock_irqsave(&device_domain_lock, flags);
5836 spin_lock(&iommu->lock);
5837
5838 ret = -EINVAL;
e85bb99b 5839 info = get_domain_info(dev);
2f26e0a9
DW
5840 if (!info || !info->pasid_supported)
5841 goto out;
5842
5843 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5844 if (WARN_ON(!context))
5845 goto out;
5846
5847 ctx_lo = context[0].lo;
5848
2f26e0a9 5849 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5850 ctx_lo |= CONTEXT_PASIDE;
5851 context[0].lo = ctx_lo;
5852 wmb();
d7cbc0f3
LB
5853 iommu->flush.flush_context(iommu,
5854 domain->iommu_did[iommu->seq_id],
5855 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5856 DMA_CCMD_MASK_NOBIT,
5857 DMA_CCMD_DEVICE_INVL);
5858 }
5859
5860 /* Enable PASID support in the device, if it wasn't already */
5861 if (!info->pasid_enabled)
5862 iommu_enable_dev_iotlb(info);
5863
2f26e0a9
DW
5864 ret = 0;
5865
5866 out:
5867 spin_unlock(&iommu->lock);
5868 spin_unlock_irqrestore(&device_domain_lock, flags);
5869
5870 return ret;
5871}
5872
73bcbdc9
JS
5873static void intel_iommu_apply_resv_region(struct device *dev,
5874 struct iommu_domain *domain,
5875 struct iommu_resv_region *region)
5876{
5877 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5878 unsigned long start, end;
5879
5880 start = IOVA_PFN(region->start);
5881 end = IOVA_PFN(region->start + region->length - 1);
5882
5883 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5884}
5885
4a350a0e
PS
5886static struct iommu_group *intel_iommu_device_group(struct device *dev)
5887{
5888 if (dev_is_pci(dev))
5889 return pci_device_group(dev);
5890 return generic_device_group(dev);
5891}
5892
95587a75
LB
5893static int intel_iommu_enable_auxd(struct device *dev)
5894{
5895 struct device_domain_info *info;
5896 struct intel_iommu *iommu;
5897 unsigned long flags;
95587a75
LB
5898 int ret;
5899
dd6692f1 5900 iommu = device_to_iommu(dev, NULL, NULL);
95587a75
LB
5901 if (!iommu || dmar_disabled)
5902 return -EINVAL;
5903
5904 if (!sm_supported(iommu) || !pasid_supported(iommu))
5905 return -EINVAL;
5906
5907 ret = intel_iommu_enable_pasid(iommu, dev);
5908 if (ret)
5909 return -ENODEV;
5910
5911 spin_lock_irqsave(&device_domain_lock, flags);
e85bb99b 5912 info = get_domain_info(dev);
95587a75
LB
5913 info->auxd_enabled = 1;
5914 spin_unlock_irqrestore(&device_domain_lock, flags);
5915
5916 return 0;
5917}
5918
5919static int intel_iommu_disable_auxd(struct device *dev)
5920{
5921 struct device_domain_info *info;
5922 unsigned long flags;
5923
5924 spin_lock_irqsave(&device_domain_lock, flags);
e85bb99b 5925 info = get_domain_info(dev);
95587a75
LB
5926 if (!WARN_ON(!info))
5927 info->auxd_enabled = 0;
5928 spin_unlock_irqrestore(&device_domain_lock, flags);
5929
5930 return 0;
5931}
5932
5933/*
5934 * A PCI express designated vendor specific extended capability is defined
5935 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5936 * for system software and tools to detect endpoint devices supporting the
5937 * Intel scalable IO virtualization without host driver dependency.
5938 *
5939 * Returns the address of the matching extended capability structure within
5940 * the device's PCI configuration space or 0 if the device does not support
5941 * it.
5942 */
5943static int siov_find_pci_dvsec(struct pci_dev *pdev)
5944{
5945 int pos;
5946 u16 vendor, id;
5947
5948 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5949 while (pos) {
5950 pci_read_config_word(pdev, pos + 4, &vendor);
5951 pci_read_config_word(pdev, pos + 8, &id);
5952 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5953 return pos;
5954
5955 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5956 }
5957
5958 return 0;
5959}
5960
5961static bool
5962intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5963{
5964 if (feat == IOMMU_DEV_FEAT_AUX) {
5965 int ret;
5966
5967 if (!dev_is_pci(dev) || dmar_disabled ||
5968 !scalable_mode_support() || !iommu_pasid_support())
5969 return false;
5970
5971 ret = pci_pasid_features(to_pci_dev(dev));
5972 if (ret < 0)
5973 return false;
5974
5975 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5976 }
5977
76fdd6c5
JP
5978 if (feat == IOMMU_DEV_FEAT_SVA) {
5979 struct device_domain_info *info = get_domain_info(dev);
5980
5981 return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
5982 info->pasid_supported && info->pri_supported &&
5983 info->ats_supported;
5984 }
5985
95587a75
LB
5986 return false;
5987}
5988
5989static int
5990intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5991{
5992 if (feat == IOMMU_DEV_FEAT_AUX)
5993 return intel_iommu_enable_auxd(dev);
5994
76fdd6c5
JP
5995 if (feat == IOMMU_DEV_FEAT_SVA) {
5996 struct device_domain_info *info = get_domain_info(dev);
5997
5998 if (!info)
5999 return -EINVAL;
6000
6001 if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
6002 return 0;
6003 }
6004
95587a75
LB
6005 return -ENODEV;
6006}
6007
6008static int
6009intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
6010{
6011 if (feat == IOMMU_DEV_FEAT_AUX)
6012 return intel_iommu_disable_auxd(dev);
6013
6014 return -ENODEV;
6015}
6016
6017static bool
6018intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
6019{
e85bb99b 6020 struct device_domain_info *info = get_domain_info(dev);
95587a75
LB
6021
6022 if (feat == IOMMU_DEV_FEAT_AUX)
6023 return scalable_mode_support() && info && info->auxd_enabled;
6024
6025 return false;
6026}
6027
0e8000f8
LB
6028static int
6029intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
6030{
6031 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6032
6033 return dmar_domain->default_pasid > 0 ?
6034 dmar_domain->default_pasid : -EINVAL;
6035}
6036
8af46c78
LB
6037static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
6038 struct device *dev)
6039{
1d461597 6040 return attach_deferred(dev);
8af46c78
LB
6041}
6042
2cd1311a
LB
6043static int
6044intel_iommu_domain_set_attr(struct iommu_domain *domain,
6045 enum iommu_attr attr, void *data)
6046{
6047 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6048 unsigned long flags;
6049 int ret = 0;
6050
6051 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
6052 return -EINVAL;
6053
6054 switch (attr) {
6055 case DOMAIN_ATTR_NESTING:
6056 spin_lock_irqsave(&device_domain_lock, flags);
6057 if (nested_mode_support() &&
6058 list_empty(&dmar_domain->devices)) {
6059 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
6060 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
6061 } else {
6062 ret = -ENODEV;
6063 }
6064 spin_unlock_irqrestore(&device_domain_lock, flags);
6065 break;
6066 default:
6067 ret = -EINVAL;
6068 break;
6069 }
6070
6071 return ret;
6072}
6073
67e8a5b1
RJ
6074/*
6075 * Check that the device does not live on an external facing PCI port that is
6076 * marked as untrusted. Such devices should not be able to apply quirks and
6077 * thus not be able to bypass the IOMMU restrictions.
6078 */
6079static bool risky_device(struct pci_dev *pdev)
6080{
6081 if (pdev->untrusted) {
6082 pci_info(pdev,
6083 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
6084 pdev->vendor, pdev->device);
6085 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
6086 return true;
6087 }
6088 return false;
6089}
6090
b0119e87 6091const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
6092 .capable = intel_iommu_capable,
6093 .domain_alloc = intel_iommu_domain_alloc,
6094 .domain_free = intel_iommu_domain_free,
2cd1311a 6095 .domain_set_attr = intel_iommu_domain_set_attr,
0659b8dc
EA
6096 .attach_dev = intel_iommu_attach_device,
6097 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
6098 .aux_attach_dev = intel_iommu_aux_attach_device,
6099 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 6100 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
6101 .map = intel_iommu_map,
6102 .unmap = intel_iommu_unmap,
0659b8dc 6103 .iova_to_phys = intel_iommu_iova_to_phys,
e5d1841f 6104 .probe_device = intel_iommu_probe_device,
6fc7020c 6105 .probe_finalize = intel_iommu_probe_finalize,
e5d1841f 6106 .release_device = intel_iommu_release_device,
0659b8dc 6107 .get_resv_regions = intel_iommu_get_resv_regions,
0ecdebb7 6108 .put_resv_regions = generic_iommu_put_resv_regions,
73bcbdc9 6109 .apply_resv_region = intel_iommu_apply_resv_region,
4a350a0e 6110 .device_group = intel_iommu_device_group,
95587a75
LB
6111 .dev_has_feat = intel_iommu_dev_has_feat,
6112 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
6113 .dev_enable_feat = intel_iommu_dev_enable_feat,
6114 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 6115 .is_attach_deferred = intel_iommu_is_attach_deferred,
7039d11b 6116 .def_domain_type = device_def_domain_type,
0659b8dc 6117 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
56722a43 6118#ifdef CONFIG_INTEL_IOMMU_SVM
6ee1b77b 6119 .cache_invalidate = intel_iommu_sva_invalidate,
56722a43
JP
6120 .sva_bind_gpasid = intel_svm_bind_gpasid,
6121 .sva_unbind_gpasid = intel_svm_unbind_gpasid,
064a57d7
JP
6122 .sva_bind = intel_svm_bind,
6123 .sva_unbind = intel_svm_unbind,
6124 .sva_get_pasid = intel_svm_get_pasid,
8b737121 6125 .page_response = intel_svm_page_response,
56722a43 6126#endif
a8bcbb0d 6127};
9af88143 6128
1f76249c 6129static void quirk_iommu_igfx(struct pci_dev *dev)
9452618e 6130{
67e8a5b1
RJ
6131 if (risky_device(dev))
6132 return;
6133
932a6523 6134 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
6135 dmar_map_gfx = 0;
6136}
6137
1f76249c
CW
6138/* G4x/GM45 integrated gfx dmar support is totally busted. */
6139DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
6140DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
6141DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
6142DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
6143DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
6144DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
6145DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
6146
6147/* Broadwell igfx malfunctions with dmar */
6148DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6149DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6150DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6151DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6152DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6153DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6154DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6155DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6156DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6157DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6158DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6159DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6160DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6161DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6162DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6163DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6164DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6165DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6166DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6167DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6168DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6170DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
9452618e 6172
d34d6517 6173static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143 6174{
67e8a5b1
RJ
6175 if (risky_device(dev))
6176 return;
6177
9af88143
DW
6178 /*
6179 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 6180 * but needs it. Same seems to hold for the desktop versions.
9af88143 6181 */
932a6523 6182 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
6183 rwbf_quirk = 1;
6184}
6185
6186DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
6187DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6188DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6189DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6190DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6191DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6192DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 6193
eecfd57f
AJ
6194#define GGC 0x52
6195#define GGC_MEMORY_SIZE_MASK (0xf << 8)
6196#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6197#define GGC_MEMORY_SIZE_1M (0x1 << 8)
6198#define GGC_MEMORY_SIZE_2M (0x3 << 8)
6199#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6200#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6201#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6202#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6203
d34d6517 6204static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
6205{
6206 unsigned short ggc;
6207
67e8a5b1
RJ
6208 if (risky_device(dev))
6209 return;
6210
eecfd57f 6211 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
6212 return;
6213
eecfd57f 6214 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 6215 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 6216 dmar_map_gfx = 0;
6fbcfb3e
DW
6217 } else if (dmar_map_gfx) {
6218 /* we have to ensure the gfx device is idle before we flush */
932a6523 6219 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
6220 intel_iommu_strict = 1;
6221 }
9eecabcb
DW
6222}
6223DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6224DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6225DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6226DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6227
b1012ca8
LB
6228static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
6229{
6230 unsigned short ver;
6231
6232 if (!IS_GFX_DEVICE(dev))
6233 return;
6234
6235 ver = (dev->device >> 8) & 0xff;
6236 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
6237 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
6238 ver != 0x9a)
6239 return;
6240
6241 if (risky_device(dev))
6242 return;
6243
6244 pci_info(dev, "Skip IOMMU disabling for graphics\n");
6245 iommu_skip_te_disable = 1;
6246}
6247DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
6248
e0fc7e0b
DW
6249/* On Tylersburg chipsets, some BIOSes have been known to enable the
6250 ISOCH DMAR unit for the Azalia sound device, but not give it any
6251 TLB entries, which causes it to deadlock. Check for that. We do
6252 this in a function called from init_dmars(), instead of in a PCI
6253 quirk, because we don't want to print the obnoxious "BIOS broken"
6254 message if VT-d is actually disabled.
6255*/
6256static void __init check_tylersburg_isoch(void)
6257{
6258 struct pci_dev *pdev;
6259 uint32_t vtisochctrl;
6260
6261 /* If there's no Azalia in the system anyway, forget it. */
6262 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6263 if (!pdev)
6264 return;
67e8a5b1
RJ
6265
6266 if (risky_device(pdev)) {
6267 pci_dev_put(pdev);
6268 return;
6269 }
6270
e0fc7e0b
DW
6271 pci_dev_put(pdev);
6272
6273 /* System Management Registers. Might be hidden, in which case
6274 we can't do the sanity check. But that's OK, because the
6275 known-broken BIOSes _don't_ actually hide it, so far. */
6276 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6277 if (!pdev)
6278 return;
6279
67e8a5b1
RJ
6280 if (risky_device(pdev)) {
6281 pci_dev_put(pdev);
6282 return;
6283 }
6284
e0fc7e0b
DW
6285 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6286 pci_dev_put(pdev);
6287 return;
6288 }
6289
6290 pci_dev_put(pdev);
6291
6292 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6293 if (vtisochctrl & 1)
6294 return;
6295
6296 /* Drop all bits other than the number of TLB entries */
6297 vtisochctrl &= 0x1c;
6298
6299 /* If we have the recommended number of TLB entries (16), fine. */
6300 if (vtisochctrl == 0x10)
6301 return;
6302
6303 /* Zero TLB entries? You get to ride the short bus to school. */
6304 if (!vtisochctrl) {
6305 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6306 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6307 dmi_get_system_info(DMI_BIOS_VENDOR),
6308 dmi_get_system_info(DMI_BIOS_VERSION),
6309 dmi_get_system_info(DMI_PRODUCT_VERSION));
6310 iommu_identity_mapping |= IDENTMAP_AZALIA;
6311 return;
6312 }
9f10e5bf
JR
6313
6314 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
6315 vtisochctrl);
6316}