Merge tag 'drm-fixes-5.3-2019-07-31' of git://people.freedesktop.org/~agd5f/linux...
[linux-2.6-block.git] / drivers / iommu / intel-iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
36746436 40#include <linux/dma-contiguous.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
8a8f422d 44#include <asm/irq_remapping.h>
ba395927 45#include <asm/cacheflush.h>
46a7fa27 46#include <asm/iommu.h>
ba395927 47
078e1ee2 48#include "irq_remapping.h"
56283174 49#include "intel-pasid.h"
078e1ee2 50
5b6985ce
FY
51#define ROOT_SIZE VTD_PAGE_SIZE
52#define CONTEXT_SIZE VTD_PAGE_SIZE
53
ba395927 54#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 55#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
5e3b4a15 63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
1b722500
RM
77/* IO virtual address start page frame number */
78#define IOVA_START_PFN (1)
79
f27be03b 80#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 81
df08cdc7
AM
82/* page table handling */
83#define LEVEL_STRIDE (9)
84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
85
6d1c56a9
OBC
86/*
87 * This bitmap is used to advertise the page sizes our hardware support
88 * to the IOMMU core, which will then use this information to split
89 * physically contiguous memory regions it is mapping into page sizes
90 * that we support.
91 *
92 * Traditionally the IOMMU core just handed us the mappings directly,
93 * after making sure the size is an order of a 4KiB page and that the
94 * mapping has natural alignment.
95 *
96 * To retain this behavior, we currently advertise that we support
97 * all page sizes that are an order of 4KiB.
98 *
99 * If at some point we'd like to utilize the IOMMU core's new behavior,
100 * we could change this to advertise the real page sizes we support.
101 */
102#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
103
df08cdc7
AM
104static inline int agaw_to_level(int agaw)
105{
106 return agaw + 2;
107}
108
109static inline int agaw_to_width(int agaw)
110{
5c645b35 111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
112}
113
114static inline int width_to_agaw(int width)
115{
5c645b35 116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
117}
118
119static inline unsigned int level_to_offset_bits(int level)
120{
121 return (level - 1) * LEVEL_STRIDE;
122}
123
124static inline int pfn_level_offset(unsigned long pfn, int level)
125{
126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127}
128
129static inline unsigned long level_mask(int level)
130{
131 return -1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long level_size(int level)
135{
136 return 1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long align_to_level(unsigned long pfn, int level)
140{
141 return (pfn + level_size(level) - 1) & level_mask(level);
142}
fd18de50 143
6dd9a7c7
YS
144static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145{
5c645b35 146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
147}
148
dd4e8319
DW
149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150 are never going to work. */
151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152{
153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155
156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157{
158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160static inline unsigned long page_to_dma_pfn(struct page *pg)
161{
162 return mm_to_dma_pfn(page_to_pfn(pg));
163}
164static inline unsigned long virt_to_dma_pfn(void *p)
165{
166 return page_to_dma_pfn(virt_to_page(p));
167}
168
d9630fe9
WH
169/* global iommu list, set NULL for ignored DMAR units */
170static struct intel_iommu **g_iommus;
171
e0fc7e0b 172static void __init check_tylersburg_isoch(void);
9af88143
DW
173static int rwbf_quirk;
174
b779260b
JC
175/*
176 * set to 1 to panic kernel if can't successfully enable VT-d
177 * (used when kernel is launched w/ TXT)
178 */
179static int force_on = 0;
bfd20f1c 180int intel_iommu_tboot_noforce;
89a6079d 181static int no_platform_optin;
b779260b 182
46b08e1a 183#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 184
091d42e4
JR
185/*
186 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
187 * if marked present.
188 */
189static phys_addr_t root_entry_lctp(struct root_entry *re)
190{
191 if (!(re->lo & 1))
192 return 0;
193
194 return re->lo & VTD_PAGE_MASK;
195}
196
197/*
198 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_uctp(struct root_entry *re)
202{
203 if (!(re->hi & 1))
204 return 0;
46b08e1a 205
091d42e4
JR
206 return re->hi & VTD_PAGE_MASK;
207}
c07e7d21 208
cf484d0e
JR
209static inline void context_clear_pasid_enable(struct context_entry *context)
210{
211 context->lo &= ~(1ULL << 11);
212}
213
214static inline bool context_pasid_enabled(struct context_entry *context)
215{
216 return !!(context->lo & (1ULL << 11));
217}
218
219static inline void context_set_copied(struct context_entry *context)
220{
221 context->hi |= (1ull << 3);
222}
223
224static inline bool context_copied(struct context_entry *context)
225{
226 return !!(context->hi & (1ULL << 3));
227}
228
229static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
230{
231 return (context->lo & 1);
232}
cf484d0e 233
26b86092 234bool context_present(struct context_entry *context)
cf484d0e
JR
235{
236 return context_pasid_enabled(context) ?
237 __context_present(context) :
238 __context_present(context) && !context_copied(context);
239}
240
c07e7d21
MM
241static inline void context_set_present(struct context_entry *context)
242{
243 context->lo |= 1;
244}
245
246static inline void context_set_fault_enable(struct context_entry *context)
247{
248 context->lo &= (((u64)-1) << 2) | 1;
249}
250
c07e7d21
MM
251static inline void context_set_translation_type(struct context_entry *context,
252 unsigned long value)
253{
254 context->lo &= (((u64)-1) << 4) | 3;
255 context->lo |= (value & 3) << 2;
256}
257
258static inline void context_set_address_root(struct context_entry *context,
259 unsigned long value)
260{
1a2262f9 261 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
262 context->lo |= value & VTD_PAGE_MASK;
263}
264
265static inline void context_set_address_width(struct context_entry *context,
266 unsigned long value)
267{
268 context->hi |= value & 7;
269}
270
271static inline void context_set_domain_id(struct context_entry *context,
272 unsigned long value)
273{
274 context->hi |= (value & ((1 << 16) - 1)) << 8;
275}
276
dbcd861f
JR
277static inline int context_domain_id(struct context_entry *c)
278{
279 return((c->hi >> 8) & 0xffff);
280}
281
c07e7d21
MM
282static inline void context_clear_entry(struct context_entry *context)
283{
284 context->lo = 0;
285 context->hi = 0;
286}
7a8fc25e 287
2c2e2c38
FY
288/*
289 * This domain is a statically identity mapping domain.
290 * 1. This domain creats a static 1:1 mapping to all usable memory.
291 * 2. It maps to each iommu if successful.
292 * 3. Each iommu mapps to this domain if successful.
293 */
19943b0e
DW
294static struct dmar_domain *si_domain;
295static int hw_pass_through = 1;
2c2e2c38 296
2c2e2c38 297/* si_domain contains mulitple devices */
fa954e68 298#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 299
942067f1
LB
300/*
301 * This is a DMA domain allocated through the iommu domain allocation
302 * interface. But one or more devices belonging to this domain have
303 * been chosen to use a private domain. We should avoid to use the
304 * map/unmap/iova_to_phys APIs on it.
305 */
306#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
307
29a27719
JR
308#define for_each_domain_iommu(idx, domain) \
309 for (idx = 0; idx < g_num_of_iommus; idx++) \
310 if (domain->iommu_refcnt[idx])
311
b94e4117
JL
312struct dmar_rmrr_unit {
313 struct list_head list; /* list of rmrr units */
314 struct acpi_dmar_header *hdr; /* ACPI header */
315 u64 base_address; /* reserved base address*/
316 u64 end_address; /* reserved end address */
832bd858 317 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
318 int devices_cnt; /* target device count */
319};
320
321struct dmar_atsr_unit {
322 struct list_head list; /* list of ATSR units */
323 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 324 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
325 int devices_cnt; /* target device count */
326 u8 include_all:1; /* include all ports */
327};
328
329static LIST_HEAD(dmar_atsr_units);
330static LIST_HEAD(dmar_rmrr_units);
331
332#define for_each_rmrr_units(rmrr) \
333 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
334
5e0d2a6f 335/* bitmap for indexing intel_iommus */
5e0d2a6f 336static int g_num_of_iommus;
337
92d03cc8 338static void domain_exit(struct dmar_domain *domain);
ba395927 339static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 340static void dmar_remove_one_dev_info(struct device *dev);
127c7615 341static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2a46ddf7
JL
342static int domain_detach_iommu(struct dmar_domain *domain,
343 struct intel_iommu *iommu);
4de354ec 344static bool device_is_rmrr_locked(struct device *dev);
8af46c78
LB
345static int intel_iommu_attach_device(struct iommu_domain *domain,
346 struct device *dev);
ba395927 347
d3f13810 348#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
349int dmar_disabled = 0;
350#else
351int dmar_disabled = 1;
d3f13810 352#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 353
cdd3a249 354int intel_iommu_sm;
8bc1f85c
ED
355int intel_iommu_enabled = 0;
356EXPORT_SYMBOL_GPL(intel_iommu_enabled);
357
2d9e667e 358static int dmar_map_gfx = 1;
7d3b03ce 359static int dmar_forcedac;
5e0d2a6f 360static int intel_iommu_strict;
6dd9a7c7 361static int intel_iommu_superpage = 1;
ae853ddb 362static int iommu_identity_mapping;
c83b2f20 363
ae853ddb
DW
364#define IDENTMAP_ALL 1
365#define IDENTMAP_GFX 2
366#define IDENTMAP_AZALIA 4
c83b2f20 367
c0771df8
DW
368int intel_iommu_gfx_mapped;
369EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
370
ba395927 371#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
8af46c78 372#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
ba395927
KA
373static DEFINE_SPINLOCK(device_domain_lock);
374static LIST_HEAD(device_domain_list);
375
85319dcc
LB
376/*
377 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 378 * callback @fn against each element.
85319dcc
LB
379 */
380int for_each_device_domain(int (*fn)(struct device_domain_info *info,
381 void *data), void *data)
382{
383 int ret = 0;
0bbeb01a 384 unsigned long flags;
85319dcc
LB
385 struct device_domain_info *info;
386
0bbeb01a 387 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
388 list_for_each_entry(info, &device_domain_list, global) {
389 ret = fn(info, data);
0bbeb01a
LB
390 if (ret) {
391 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 392 return ret;
0bbeb01a 393 }
85319dcc 394 }
0bbeb01a 395 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
396
397 return 0;
398}
399
b0119e87 400const struct iommu_ops intel_iommu_ops;
a8bcbb0d 401
4158c2ec
JR
402static bool translation_pre_enabled(struct intel_iommu *iommu)
403{
404 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
405}
406
091d42e4
JR
407static void clear_translation_pre_enabled(struct intel_iommu *iommu)
408{
409 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
410}
411
4158c2ec
JR
412static void init_translation_status(struct intel_iommu *iommu)
413{
414 u32 gsts;
415
416 gsts = readl(iommu->reg + DMAR_GSTS_REG);
417 if (gsts & DMA_GSTS_TES)
418 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
419}
420
00a77deb
JR
421/* Convert generic 'struct iommu_domain to private struct dmar_domain */
422static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
423{
424 return container_of(dom, struct dmar_domain, domain);
425}
426
ba395927
KA
427static int __init intel_iommu_setup(char *str)
428{
429 if (!str)
430 return -EINVAL;
431 while (*str) {
0cd5c3c8
KM
432 if (!strncmp(str, "on", 2)) {
433 dmar_disabled = 0;
9f10e5bf 434 pr_info("IOMMU enabled\n");
0cd5c3c8 435 } else if (!strncmp(str, "off", 3)) {
ba395927 436 dmar_disabled = 1;
89a6079d 437 no_platform_optin = 1;
9f10e5bf 438 pr_info("IOMMU disabled\n");
ba395927
KA
439 } else if (!strncmp(str, "igfx_off", 8)) {
440 dmar_map_gfx = 0;
9f10e5bf 441 pr_info("Disable GFX device mapping\n");
7d3b03ce 442 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 443 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 444 dmar_forcedac = 1;
5e0d2a6f 445 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 446 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 447 intel_iommu_strict = 1;
6dd9a7c7 448 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 449 pr_info("Disable supported super page\n");
6dd9a7c7 450 intel_iommu_superpage = 0;
8950dcd8
LB
451 } else if (!strncmp(str, "sm_on", 5)) {
452 pr_info("Intel-IOMMU: scalable mode supported\n");
453 intel_iommu_sm = 1;
bfd20f1c
SL
454 } else if (!strncmp(str, "tboot_noforce", 13)) {
455 printk(KERN_INFO
456 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
457 intel_iommu_tboot_noforce = 1;
ba395927
KA
458 }
459
460 str += strcspn(str, ",");
461 while (*str == ',')
462 str++;
463 }
464 return 0;
465}
466__setup("intel_iommu=", intel_iommu_setup);
467
468static struct kmem_cache *iommu_domain_cache;
469static struct kmem_cache *iommu_devinfo_cache;
ba395927 470
9452d5bf
JR
471static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
472{
8bf47816
JR
473 struct dmar_domain **domains;
474 int idx = did >> 8;
475
476 domains = iommu->domains[idx];
477 if (!domains)
478 return NULL;
479
480 return domains[did & 0xff];
9452d5bf
JR
481}
482
483static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
484 struct dmar_domain *domain)
485{
8bf47816
JR
486 struct dmar_domain **domains;
487 int idx = did >> 8;
488
489 if (!iommu->domains[idx]) {
490 size_t size = 256 * sizeof(struct dmar_domain *);
491 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
492 }
493
494 domains = iommu->domains[idx];
495 if (WARN_ON(!domains))
496 return;
497 else
498 domains[did & 0xff] = domain;
9452d5bf
JR
499}
500
9ddbfb42 501void *alloc_pgtable_page(int node)
eb3fa7cb 502{
4c923d47
SS
503 struct page *page;
504 void *vaddr = NULL;
eb3fa7cb 505
4c923d47
SS
506 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
507 if (page)
508 vaddr = page_address(page);
eb3fa7cb 509 return vaddr;
ba395927
KA
510}
511
9ddbfb42 512void free_pgtable_page(void *vaddr)
ba395927
KA
513{
514 free_page((unsigned long)vaddr);
515}
516
517static inline void *alloc_domain_mem(void)
518{
354bb65e 519 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
520}
521
38717946 522static void free_domain_mem(void *vaddr)
ba395927
KA
523{
524 kmem_cache_free(iommu_domain_cache, vaddr);
525}
526
527static inline void * alloc_devinfo_mem(void)
528{
354bb65e 529 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
530}
531
532static inline void free_devinfo_mem(void *vaddr)
533{
534 kmem_cache_free(iommu_devinfo_cache, vaddr);
535}
536
28ccce0d
JR
537static inline int domain_type_is_si(struct dmar_domain *domain)
538{
539 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
540}
541
162d1b10
JL
542static inline int domain_pfn_supported(struct dmar_domain *domain,
543 unsigned long pfn)
544{
545 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
546
547 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
548}
549
4ed0d3e6 550static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
551{
552 unsigned long sagaw;
553 int agaw = -1;
554
555 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 556 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
557 agaw >= 0; agaw--) {
558 if (test_bit(agaw, &sagaw))
559 break;
560 }
561
562 return agaw;
563}
564
4ed0d3e6
FY
565/*
566 * Calculate max SAGAW for each iommu.
567 */
568int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
569{
570 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
571}
572
573/*
574 * calculate agaw for each iommu.
575 * "SAGAW" may be different across iommus, use a default agaw, and
576 * get a supported less agaw for iommus that don't support the default agaw.
577 */
578int iommu_calculate_agaw(struct intel_iommu *iommu)
579{
580 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
581}
582
2c2e2c38 583/* This functionin only returns single iommu in a domain */
9ddbfb42 584struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
585{
586 int iommu_id;
587
2c2e2c38 588 /* si_domain and vm domain should not get here. */
fa954e68
LB
589 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
590 return NULL;
591
29a27719
JR
592 for_each_domain_iommu(iommu_id, domain)
593 break;
594
8c11e798
WH
595 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
596 return NULL;
597
598 return g_iommus[iommu_id];
599}
600
8e604097
WH
601static void domain_update_iommu_coherency(struct dmar_domain *domain)
602{
d0501960
DW
603 struct dmar_drhd_unit *drhd;
604 struct intel_iommu *iommu;
2f119c78
QL
605 bool found = false;
606 int i;
2e12bc29 607
d0501960 608 domain->iommu_coherency = 1;
8e604097 609
29a27719 610 for_each_domain_iommu(i, domain) {
2f119c78 611 found = true;
8e604097
WH
612 if (!ecap_coherent(g_iommus[i]->ecap)) {
613 domain->iommu_coherency = 0;
614 break;
615 }
8e604097 616 }
d0501960
DW
617 if (found)
618 return;
619
620 /* No hardware attached; use lowest common denominator */
621 rcu_read_lock();
622 for_each_active_iommu(iommu, drhd) {
623 if (!ecap_coherent(iommu->ecap)) {
624 domain->iommu_coherency = 0;
625 break;
626 }
627 }
628 rcu_read_unlock();
8e604097
WH
629}
630
161f6934 631static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 632{
161f6934
JL
633 struct dmar_drhd_unit *drhd;
634 struct intel_iommu *iommu;
635 int ret = 1;
58c610bd 636
161f6934
JL
637 rcu_read_lock();
638 for_each_active_iommu(iommu, drhd) {
639 if (iommu != skip) {
640 if (!ecap_sc_support(iommu->ecap)) {
641 ret = 0;
642 break;
643 }
58c610bd 644 }
58c610bd 645 }
161f6934
JL
646 rcu_read_unlock();
647
648 return ret;
58c610bd
SY
649}
650
161f6934 651static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 652{
8140a95d 653 struct dmar_drhd_unit *drhd;
161f6934 654 struct intel_iommu *iommu;
8140a95d 655 int mask = 0xf;
6dd9a7c7
YS
656
657 if (!intel_iommu_superpage) {
161f6934 658 return 0;
6dd9a7c7
YS
659 }
660
8140a95d 661 /* set iommu_superpage to the smallest common denominator */
0e242612 662 rcu_read_lock();
8140a95d 663 for_each_active_iommu(iommu, drhd) {
161f6934
JL
664 if (iommu != skip) {
665 mask &= cap_super_page_val(iommu->cap);
666 if (!mask)
667 break;
6dd9a7c7
YS
668 }
669 }
0e242612
JL
670 rcu_read_unlock();
671
161f6934 672 return fls(mask);
6dd9a7c7
YS
673}
674
58c610bd
SY
675/* Some capabilities may be different across iommus */
676static void domain_update_iommu_cap(struct dmar_domain *domain)
677{
678 domain_update_iommu_coherency(domain);
161f6934
JL
679 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
680 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
681}
682
26b86092
SM
683struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
684 u8 devfn, int alloc)
03ecc32c
DW
685{
686 struct root_entry *root = &iommu->root_entry[bus];
687 struct context_entry *context;
688 u64 *entry;
689
4df4eab1 690 entry = &root->lo;
765b6a98 691 if (sm_supported(iommu)) {
03ecc32c
DW
692 if (devfn >= 0x80) {
693 devfn -= 0x80;
694 entry = &root->hi;
695 }
696 devfn *= 2;
697 }
03ecc32c
DW
698 if (*entry & 1)
699 context = phys_to_virt(*entry & VTD_PAGE_MASK);
700 else {
701 unsigned long phy_addr;
702 if (!alloc)
703 return NULL;
704
705 context = alloc_pgtable_page(iommu->node);
706 if (!context)
707 return NULL;
708
709 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
710 phy_addr = virt_to_phys((void *)context);
711 *entry = phy_addr | 1;
712 __iommu_flush_cache(iommu, entry, sizeof(*entry));
713 }
714 return &context[devfn];
715}
716
4ed6a540
DW
717static int iommu_dummy(struct device *dev)
718{
719 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
720}
721
b9a7f981
EA
722/**
723 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
724 * sub-hierarchy of a candidate PCI-PCI bridge
725 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
726 * @bridge: the candidate PCI-PCI bridge
727 *
728 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
729 */
730static bool
731is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
732{
733 struct pci_dev *pdev, *pbridge;
734
735 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
736 return false;
737
738 pdev = to_pci_dev(dev);
739 pbridge = to_pci_dev(bridge);
740
741 if (pbridge->subordinate &&
742 pbridge->subordinate->number <= pdev->bus->number &&
743 pbridge->subordinate->busn_res.end >= pdev->bus->number)
744 return true;
745
746 return false;
747}
748
156baca8 749static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
750{
751 struct dmar_drhd_unit *drhd = NULL;
b683b230 752 struct intel_iommu *iommu;
156baca8 753 struct device *tmp;
b9a7f981 754 struct pci_dev *pdev = NULL;
aa4d066a 755 u16 segment = 0;
c7151a8d
WH
756 int i;
757
4ed6a540
DW
758 if (iommu_dummy(dev))
759 return NULL;
760
156baca8 761 if (dev_is_pci(dev)) {
1c387188
AR
762 struct pci_dev *pf_pdev;
763
156baca8 764 pdev = to_pci_dev(dev);
5823e330
JD
765
766#ifdef CONFIG_X86
767 /* VMD child devices currently cannot be handled individually */
768 if (is_vmd(pdev->bus))
769 return NULL;
770#endif
771
1c387188
AR
772 /* VFs aren't listed in scope tables; we need to look up
773 * the PF instead to find the IOMMU. */
774 pf_pdev = pci_physfn(pdev);
775 dev = &pf_pdev->dev;
156baca8 776 segment = pci_domain_nr(pdev->bus);
ca5b74d2 777 } else if (has_acpi_companion(dev))
156baca8
DW
778 dev = &ACPI_COMPANION(dev)->dev;
779
0e242612 780 rcu_read_lock();
b683b230 781 for_each_active_iommu(iommu, drhd) {
156baca8 782 if (pdev && segment != drhd->segment)
276dbf99 783 continue;
c7151a8d 784
b683b230 785 for_each_active_dev_scope(drhd->devices,
156baca8
DW
786 drhd->devices_cnt, i, tmp) {
787 if (tmp == dev) {
1c387188
AR
788 /* For a VF use its original BDF# not that of the PF
789 * which we used for the IOMMU lookup. Strictly speaking
790 * we could do this for all PCI devices; we only need to
791 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 792 if (pdev && pdev->is_virtfn)
1c387188
AR
793 goto got_pdev;
794
156baca8
DW
795 *bus = drhd->devices[i].bus;
796 *devfn = drhd->devices[i].devfn;
b683b230 797 goto out;
156baca8
DW
798 }
799
b9a7f981 800 if (is_downstream_to_pci_bridge(dev, tmp))
156baca8 801 goto got_pdev;
924b6231 802 }
c7151a8d 803
156baca8
DW
804 if (pdev && drhd->include_all) {
805 got_pdev:
806 *bus = pdev->bus->number;
807 *devfn = pdev->devfn;
b683b230 808 goto out;
156baca8 809 }
c7151a8d 810 }
b683b230 811 iommu = NULL;
156baca8 812 out:
0e242612 813 rcu_read_unlock();
c7151a8d 814
b683b230 815 return iommu;
c7151a8d
WH
816}
817
5331fe6f
WH
818static void domain_flush_cache(struct dmar_domain *domain,
819 void *addr, int size)
820{
821 if (!domain->iommu_coherency)
822 clflush_cache_range(addr, size);
823}
824
ba395927
KA
825static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
826{
ba395927 827 struct context_entry *context;
03ecc32c 828 int ret = 0;
ba395927
KA
829 unsigned long flags;
830
831 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
832 context = iommu_context_addr(iommu, bus, devfn, 0);
833 if (context)
834 ret = context_present(context);
ba395927
KA
835 spin_unlock_irqrestore(&iommu->lock, flags);
836 return ret;
837}
838
ba395927
KA
839static void free_context_table(struct intel_iommu *iommu)
840{
ba395927
KA
841 int i;
842 unsigned long flags;
843 struct context_entry *context;
844
845 spin_lock_irqsave(&iommu->lock, flags);
846 if (!iommu->root_entry) {
847 goto out;
848 }
849 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 850 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
851 if (context)
852 free_pgtable_page(context);
03ecc32c 853
765b6a98 854 if (!sm_supported(iommu))
03ecc32c
DW
855 continue;
856
857 context = iommu_context_addr(iommu, i, 0x80, 0);
858 if (context)
859 free_pgtable_page(context);
860
ba395927
KA
861 }
862 free_pgtable_page(iommu->root_entry);
863 iommu->root_entry = NULL;
864out:
865 spin_unlock_irqrestore(&iommu->lock, flags);
866}
867
b026fd28 868static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 869 unsigned long pfn, int *target_level)
ba395927 870{
e083ea5b 871 struct dma_pte *parent, *pte;
ba395927 872 int level = agaw_to_level(domain->agaw);
4399c8bf 873 int offset;
ba395927
KA
874
875 BUG_ON(!domain->pgd);
f9423606 876
162d1b10 877 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
878 /* Address beyond IOMMU's addressing capabilities. */
879 return NULL;
880
ba395927
KA
881 parent = domain->pgd;
882
5cf0a76f 883 while (1) {
ba395927
KA
884 void *tmp_page;
885
b026fd28 886 offset = pfn_level_offset(pfn, level);
ba395927 887 pte = &parent[offset];
5cf0a76f 888 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 889 break;
5cf0a76f 890 if (level == *target_level)
ba395927
KA
891 break;
892
19c239ce 893 if (!dma_pte_present(pte)) {
c85994e4
DW
894 uint64_t pteval;
895
4c923d47 896 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 897
206a73c1 898 if (!tmp_page)
ba395927 899 return NULL;
206a73c1 900
c85994e4 901 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 902 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 903 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
904 /* Someone else set it while we were thinking; use theirs. */
905 free_pgtable_page(tmp_page);
effad4b5 906 else
c85994e4 907 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 908 }
5cf0a76f
DW
909 if (level == 1)
910 break;
911
19c239ce 912 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
913 level--;
914 }
915
5cf0a76f
DW
916 if (!*target_level)
917 *target_level = level;
918
ba395927
KA
919 return pte;
920}
921
922/* return address's pte at specific level */
90dcfb5e
DW
923static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
924 unsigned long pfn,
6dd9a7c7 925 int level, int *large_page)
ba395927 926{
e083ea5b 927 struct dma_pte *parent, *pte;
ba395927
KA
928 int total = agaw_to_level(domain->agaw);
929 int offset;
930
931 parent = domain->pgd;
932 while (level <= total) {
90dcfb5e 933 offset = pfn_level_offset(pfn, total);
ba395927
KA
934 pte = &parent[offset];
935 if (level == total)
936 return pte;
937
6dd9a7c7
YS
938 if (!dma_pte_present(pte)) {
939 *large_page = total;
ba395927 940 break;
6dd9a7c7
YS
941 }
942
e16922af 943 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
944 *large_page = total;
945 return pte;
946 }
947
19c239ce 948 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
949 total--;
950 }
951 return NULL;
952}
953
ba395927 954/* clear last level pte, a tlb flush should be followed */
5cf0a76f 955static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
956 unsigned long start_pfn,
957 unsigned long last_pfn)
ba395927 958{
e083ea5b 959 unsigned int large_page;
310a5ab9 960 struct dma_pte *first_pte, *pte;
66eae846 961
162d1b10
JL
962 BUG_ON(!domain_pfn_supported(domain, start_pfn));
963 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 964 BUG_ON(start_pfn > last_pfn);
ba395927 965
04b18e65 966 /* we don't need lock here; nobody else touches the iova range */
59c36286 967 do {
6dd9a7c7
YS
968 large_page = 1;
969 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 970 if (!pte) {
6dd9a7c7 971 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
972 continue;
973 }
6dd9a7c7 974 do {
310a5ab9 975 dma_clear_pte(pte);
6dd9a7c7 976 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 977 pte++;
75e6bf96
DW
978 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
979
310a5ab9
DW
980 domain_flush_cache(domain, first_pte,
981 (void *)pte - (void *)first_pte);
59c36286
DW
982
983 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
984}
985
3269ee0b 986static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
987 int retain_level, struct dma_pte *pte,
988 unsigned long pfn, unsigned long start_pfn,
989 unsigned long last_pfn)
3269ee0b
AW
990{
991 pfn = max(start_pfn, pfn);
992 pte = &pte[pfn_level_offset(pfn, level)];
993
994 do {
995 unsigned long level_pfn;
996 struct dma_pte *level_pte;
997
998 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
999 goto next;
1000
f7116e11 1001 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1002 level_pte = phys_to_virt(dma_pte_addr(pte));
1003
bc24c571
DD
1004 if (level > 2) {
1005 dma_pte_free_level(domain, level - 1, retain_level,
1006 level_pte, level_pfn, start_pfn,
1007 last_pfn);
1008 }
3269ee0b 1009
bc24c571
DD
1010 /*
1011 * Free the page table if we're below the level we want to
1012 * retain and the range covers the entire table.
1013 */
1014 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1015 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1016 dma_clear_pte(pte);
1017 domain_flush_cache(domain, pte, sizeof(*pte));
1018 free_pgtable_page(level_pte);
1019 }
1020next:
1021 pfn += level_size(level);
1022 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1023}
1024
bc24c571
DD
1025/*
1026 * clear last level (leaf) ptes and free page table pages below the
1027 * level we wish to keep intact.
1028 */
ba395927 1029static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1030 unsigned long start_pfn,
bc24c571
DD
1031 unsigned long last_pfn,
1032 int retain_level)
ba395927 1033{
162d1b10
JL
1034 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1035 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1036 BUG_ON(start_pfn > last_pfn);
ba395927 1037
d41a4adb
JL
1038 dma_pte_clear_range(domain, start_pfn, last_pfn);
1039
f3a0a52f 1040 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1041 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1042 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1043
ba395927 1044 /* free pgd */
d794dc9b 1045 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1046 free_pgtable_page(domain->pgd);
1047 domain->pgd = NULL;
1048 }
1049}
1050
ea8ea460
DW
1051/* When a page at a given level is being unlinked from its parent, we don't
1052 need to *modify* it at all. All we need to do is make a list of all the
1053 pages which can be freed just as soon as we've flushed the IOTLB and we
1054 know the hardware page-walk will no longer touch them.
1055 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1056 be freed. */
1057static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1058 int level, struct dma_pte *pte,
1059 struct page *freelist)
1060{
1061 struct page *pg;
1062
1063 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1064 pg->freelist = freelist;
1065 freelist = pg;
1066
1067 if (level == 1)
1068 return freelist;
1069
adeb2590
JL
1070 pte = page_address(pg);
1071 do {
ea8ea460
DW
1072 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1073 freelist = dma_pte_list_pagetables(domain, level - 1,
1074 pte, freelist);
adeb2590
JL
1075 pte++;
1076 } while (!first_pte_in_page(pte));
ea8ea460
DW
1077
1078 return freelist;
1079}
1080
1081static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1082 struct dma_pte *pte, unsigned long pfn,
1083 unsigned long start_pfn,
1084 unsigned long last_pfn,
1085 struct page *freelist)
1086{
1087 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1088
1089 pfn = max(start_pfn, pfn);
1090 pte = &pte[pfn_level_offset(pfn, level)];
1091
1092 do {
1093 unsigned long level_pfn;
1094
1095 if (!dma_pte_present(pte))
1096 goto next;
1097
1098 level_pfn = pfn & level_mask(level);
1099
1100 /* If range covers entire pagetable, free it */
1101 if (start_pfn <= level_pfn &&
1102 last_pfn >= level_pfn + level_size(level) - 1) {
1103 /* These suborbinate page tables are going away entirely. Don't
1104 bother to clear them; we're just going to *free* them. */
1105 if (level > 1 && !dma_pte_superpage(pte))
1106 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1107
1108 dma_clear_pte(pte);
1109 if (!first_pte)
1110 first_pte = pte;
1111 last_pte = pte;
1112 } else if (level > 1) {
1113 /* Recurse down into a level that isn't *entirely* obsolete */
1114 freelist = dma_pte_clear_level(domain, level - 1,
1115 phys_to_virt(dma_pte_addr(pte)),
1116 level_pfn, start_pfn, last_pfn,
1117 freelist);
1118 }
1119next:
1120 pfn += level_size(level);
1121 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1122
1123 if (first_pte)
1124 domain_flush_cache(domain, first_pte,
1125 (void *)++last_pte - (void *)first_pte);
1126
1127 return freelist;
1128}
1129
1130/* We can't just free the pages because the IOMMU may still be walking
1131 the page tables, and may have cached the intermediate levels. The
1132 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1133static struct page *domain_unmap(struct dmar_domain *domain,
1134 unsigned long start_pfn,
1135 unsigned long last_pfn)
ea8ea460 1136{
e083ea5b 1137 struct page *freelist;
ea8ea460 1138
162d1b10
JL
1139 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1140 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1141 BUG_ON(start_pfn > last_pfn);
1142
1143 /* we don't need lock here; nobody else touches the iova range */
1144 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1145 domain->pgd, 0, start_pfn, last_pfn, NULL);
1146
1147 /* free pgd */
1148 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1149 struct page *pgd_page = virt_to_page(domain->pgd);
1150 pgd_page->freelist = freelist;
1151 freelist = pgd_page;
1152
1153 domain->pgd = NULL;
1154 }
1155
1156 return freelist;
1157}
1158
b690420a 1159static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1160{
1161 struct page *pg;
1162
1163 while ((pg = freelist)) {
1164 freelist = pg->freelist;
1165 free_pgtable_page(page_address(pg));
1166 }
1167}
1168
13cf0174
JR
1169static void iova_entry_free(unsigned long data)
1170{
1171 struct page *freelist = (struct page *)data;
1172
1173 dma_free_pagelist(freelist);
1174}
1175
ba395927
KA
1176/* iommu handling */
1177static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1178{
1179 struct root_entry *root;
1180 unsigned long flags;
1181
4c923d47 1182 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1183 if (!root) {
9f10e5bf 1184 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1185 iommu->name);
ba395927 1186 return -ENOMEM;
ffebeb46 1187 }
ba395927 1188
5b6985ce 1189 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1190
1191 spin_lock_irqsave(&iommu->lock, flags);
1192 iommu->root_entry = root;
1193 spin_unlock_irqrestore(&iommu->lock, flags);
1194
1195 return 0;
1196}
1197
ba395927
KA
1198static void iommu_set_root_entry(struct intel_iommu *iommu)
1199{
03ecc32c 1200 u64 addr;
c416daa9 1201 u32 sts;
ba395927
KA
1202 unsigned long flag;
1203
03ecc32c 1204 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1205 if (sm_supported(iommu))
1206 addr |= DMA_RTADDR_SMT;
ba395927 1207
1f5b3c3f 1208 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1209 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1210
c416daa9 1211 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1212
1213 /* Make sure hardware complete it */
1214 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1215 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1216
1f5b3c3f 1217 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1218}
1219
6f7db75e 1220void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1221{
1222 u32 val;
1223 unsigned long flag;
1224
9af88143 1225 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1226 return;
ba395927 1227
1f5b3c3f 1228 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1229 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1230
1231 /* Make sure hardware complete it */
1232 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1233 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1234
1f5b3c3f 1235 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1236}
1237
1238/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1239static void __iommu_flush_context(struct intel_iommu *iommu,
1240 u16 did, u16 source_id, u8 function_mask,
1241 u64 type)
ba395927
KA
1242{
1243 u64 val = 0;
1244 unsigned long flag;
1245
ba395927
KA
1246 switch (type) {
1247 case DMA_CCMD_GLOBAL_INVL:
1248 val = DMA_CCMD_GLOBAL_INVL;
1249 break;
1250 case DMA_CCMD_DOMAIN_INVL:
1251 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1252 break;
1253 case DMA_CCMD_DEVICE_INVL:
1254 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1255 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1256 break;
1257 default:
1258 BUG();
1259 }
1260 val |= DMA_CCMD_ICC;
1261
1f5b3c3f 1262 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1263 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1264
1265 /* Make sure hardware complete it */
1266 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1267 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1268
1f5b3c3f 1269 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1270}
1271
ba395927 1272/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1273static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1274 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1275{
1276 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1277 u64 val = 0, val_iva = 0;
1278 unsigned long flag;
1279
ba395927
KA
1280 switch (type) {
1281 case DMA_TLB_GLOBAL_FLUSH:
1282 /* global flush doesn't need set IVA_REG */
1283 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1284 break;
1285 case DMA_TLB_DSI_FLUSH:
1286 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1287 break;
1288 case DMA_TLB_PSI_FLUSH:
1289 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1290 /* IH bit is passed in as part of address */
ba395927
KA
1291 val_iva = size_order | addr;
1292 break;
1293 default:
1294 BUG();
1295 }
1296 /* Note: set drain read/write */
1297#if 0
1298 /*
1299 * This is probably to be super secure.. Looks like we can
1300 * ignore it without any impact.
1301 */
1302 if (cap_read_drain(iommu->cap))
1303 val |= DMA_TLB_READ_DRAIN;
1304#endif
1305 if (cap_write_drain(iommu->cap))
1306 val |= DMA_TLB_WRITE_DRAIN;
1307
1f5b3c3f 1308 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1309 /* Note: Only uses first TLB reg currently */
1310 if (val_iva)
1311 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1312 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1313
1314 /* Make sure hardware complete it */
1315 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1316 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1317
1f5b3c3f 1318 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1319
1320 /* check IOTLB invalidation granularity */
1321 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1322 pr_err("Flush IOTLB failed\n");
ba395927 1323 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1324 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1325 (unsigned long long)DMA_TLB_IIRG(type),
1326 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1327}
1328
64ae892b
DW
1329static struct device_domain_info *
1330iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1331 u8 bus, u8 devfn)
93a23a72 1332{
93a23a72 1333 struct device_domain_info *info;
93a23a72 1334
55d94043
JR
1335 assert_spin_locked(&device_domain_lock);
1336
93a23a72
YZ
1337 if (!iommu->qi)
1338 return NULL;
1339
93a23a72 1340 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1341 if (info->iommu == iommu && info->bus == bus &&
1342 info->devfn == devfn) {
b16d0cb9
DW
1343 if (info->ats_supported && info->dev)
1344 return info;
93a23a72
YZ
1345 break;
1346 }
93a23a72 1347
b16d0cb9 1348 return NULL;
93a23a72
YZ
1349}
1350
0824c592
OP
1351static void domain_update_iotlb(struct dmar_domain *domain)
1352{
1353 struct device_domain_info *info;
1354 bool has_iotlb_device = false;
1355
1356 assert_spin_locked(&device_domain_lock);
1357
1358 list_for_each_entry(info, &domain->devices, link) {
1359 struct pci_dev *pdev;
1360
1361 if (!info->dev || !dev_is_pci(info->dev))
1362 continue;
1363
1364 pdev = to_pci_dev(info->dev);
1365 if (pdev->ats_enabled) {
1366 has_iotlb_device = true;
1367 break;
1368 }
1369 }
1370
1371 domain->has_iotlb_device = has_iotlb_device;
1372}
1373
93a23a72 1374static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1375{
fb0cc3aa
BH
1376 struct pci_dev *pdev;
1377
0824c592
OP
1378 assert_spin_locked(&device_domain_lock);
1379
0bcb3e28 1380 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1381 return;
1382
fb0cc3aa 1383 pdev = to_pci_dev(info->dev);
1c48db44
JP
1384 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1385 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1386 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1387 * reserved, which should be set to 0.
1388 */
1389 if (!ecap_dit(info->iommu->ecap))
1390 info->pfsid = 0;
1391 else {
1392 struct pci_dev *pf_pdev;
1393
1394 /* pdev will be returned if device is not a vf */
1395 pf_pdev = pci_physfn(pdev);
cc49baa9 1396 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1397 }
fb0cc3aa 1398
b16d0cb9
DW
1399#ifdef CONFIG_INTEL_IOMMU_SVM
1400 /* The PCIe spec, in its wisdom, declares that the behaviour of
1401 the device if you enable PASID support after ATS support is
1402 undefined. So always enable PASID support on devices which
1403 have it, even if we can't yet know if we're ever going to
1404 use it. */
1405 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1406 info->pasid_enabled = 1;
1407
1b84778a
KS
1408 if (info->pri_supported &&
1409 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1410 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1411 info->pri_enabled = 1;
1412#endif
fb58fdcd 1413 if (!pdev->untrusted && info->ats_supported &&
61363c14 1414 pci_ats_page_aligned(pdev) &&
fb58fdcd 1415 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1416 info->ats_enabled = 1;
0824c592 1417 domain_update_iotlb(info->domain);
b16d0cb9
DW
1418 info->ats_qdep = pci_ats_queue_depth(pdev);
1419 }
93a23a72
YZ
1420}
1421
1422static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1423{
b16d0cb9
DW
1424 struct pci_dev *pdev;
1425
0824c592
OP
1426 assert_spin_locked(&device_domain_lock);
1427
da972fb1 1428 if (!dev_is_pci(info->dev))
93a23a72
YZ
1429 return;
1430
b16d0cb9
DW
1431 pdev = to_pci_dev(info->dev);
1432
1433 if (info->ats_enabled) {
1434 pci_disable_ats(pdev);
1435 info->ats_enabled = 0;
0824c592 1436 domain_update_iotlb(info->domain);
b16d0cb9
DW
1437 }
1438#ifdef CONFIG_INTEL_IOMMU_SVM
1439 if (info->pri_enabled) {
1440 pci_disable_pri(pdev);
1441 info->pri_enabled = 0;
1442 }
1443 if (info->pasid_enabled) {
1444 pci_disable_pasid(pdev);
1445 info->pasid_enabled = 0;
1446 }
1447#endif
93a23a72
YZ
1448}
1449
1450static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1451 u64 addr, unsigned mask)
1452{
1453 u16 sid, qdep;
1454 unsigned long flags;
1455 struct device_domain_info *info;
1456
0824c592
OP
1457 if (!domain->has_iotlb_device)
1458 return;
1459
93a23a72
YZ
1460 spin_lock_irqsave(&device_domain_lock, flags);
1461 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1462 if (!info->ats_enabled)
93a23a72
YZ
1463 continue;
1464
1465 sid = info->bus << 8 | info->devfn;
b16d0cb9 1466 qdep = info->ats_qdep;
1c48db44
JP
1467 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1468 qdep, addr, mask);
93a23a72
YZ
1469 }
1470 spin_unlock_irqrestore(&device_domain_lock, flags);
1471}
1472
a1ddcbe9
JR
1473static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1474 struct dmar_domain *domain,
1475 unsigned long pfn, unsigned int pages,
1476 int ih, int map)
ba395927 1477{
9dd2fe89 1478 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1479 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1480 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1481
ba395927
KA
1482 BUG_ON(pages == 0);
1483
ea8ea460
DW
1484 if (ih)
1485 ih = 1 << 6;
ba395927 1486 /*
9dd2fe89
YZ
1487 * Fallback to domain selective flush if no PSI support or the size is
1488 * too big.
ba395927
KA
1489 * PSI requires page size to be 2 ^ x, and the base address is naturally
1490 * aligned to the size
1491 */
9dd2fe89
YZ
1492 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1493 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1494 DMA_TLB_DSI_FLUSH);
9dd2fe89 1495 else
ea8ea460 1496 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1497 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1498
1499 /*
82653633
NA
1500 * In caching mode, changes of pages from non-present to present require
1501 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1502 */
82653633 1503 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1504 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1505}
1506
eed91a0b
PX
1507/* Notification for newly created mappings */
1508static inline void __mapping_notify_one(struct intel_iommu *iommu,
1509 struct dmar_domain *domain,
1510 unsigned long pfn, unsigned int pages)
1511{
1512 /* It's a non-present to present mapping. Only flush if caching mode */
1513 if (cap_caching_mode(iommu->cap))
1514 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1515 else
1516 iommu_flush_write_buffer(iommu);
1517}
1518
13cf0174
JR
1519static void iommu_flush_iova(struct iova_domain *iovad)
1520{
1521 struct dmar_domain *domain;
1522 int idx;
1523
1524 domain = container_of(iovad, struct dmar_domain, iovad);
1525
1526 for_each_domain_iommu(idx, domain) {
1527 struct intel_iommu *iommu = g_iommus[idx];
1528 u16 did = domain->iommu_did[iommu->seq_id];
1529
1530 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1531
1532 if (!cap_caching_mode(iommu->cap))
1533 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1534 0, MAX_AGAW_PFN_WIDTH);
1535 }
1536}
1537
f8bab735 1538static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1539{
1540 u32 pmen;
1541 unsigned long flags;
1542
5bb71fc7
LB
1543 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1544 return;
1545
1f5b3c3f 1546 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1547 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1548 pmen &= ~DMA_PMEN_EPM;
1549 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1550
1551 /* wait for the protected region status bit to clear */
1552 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1553 readl, !(pmen & DMA_PMEN_PRS), pmen);
1554
1f5b3c3f 1555 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1556}
1557
2a41ccee 1558static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1559{
1560 u32 sts;
1561 unsigned long flags;
1562
1f5b3c3f 1563 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1564 iommu->gcmd |= DMA_GCMD_TE;
1565 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1566
1567 /* Make sure hardware complete it */
1568 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1569 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1570
1f5b3c3f 1571 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1572}
1573
2a41ccee 1574static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1575{
1576 u32 sts;
1577 unsigned long flag;
1578
1f5b3c3f 1579 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1580 iommu->gcmd &= ~DMA_GCMD_TE;
1581 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1582
1583 /* Make sure hardware complete it */
1584 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1585 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1586
1f5b3c3f 1587 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1588}
1589
1590static int iommu_init_domains(struct intel_iommu *iommu)
1591{
8bf47816
JR
1592 u32 ndomains, nlongs;
1593 size_t size;
ba395927
KA
1594
1595 ndomains = cap_ndoms(iommu->cap);
8bf47816 1596 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1597 iommu->name, ndomains);
ba395927
KA
1598 nlongs = BITS_TO_LONGS(ndomains);
1599
94a91b50
DD
1600 spin_lock_init(&iommu->lock);
1601
ba395927
KA
1602 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1603 if (!iommu->domain_ids) {
9f10e5bf
JR
1604 pr_err("%s: Allocating domain id array failed\n",
1605 iommu->name);
ba395927
KA
1606 return -ENOMEM;
1607 }
8bf47816 1608
86f004c7 1609 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1610 iommu->domains = kzalloc(size, GFP_KERNEL);
1611
1612 if (iommu->domains) {
1613 size = 256 * sizeof(struct dmar_domain *);
1614 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1615 }
1616
1617 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1618 pr_err("%s: Allocating domain array failed\n",
1619 iommu->name);
852bdb04 1620 kfree(iommu->domain_ids);
8bf47816 1621 kfree(iommu->domains);
852bdb04 1622 iommu->domain_ids = NULL;
8bf47816 1623 iommu->domains = NULL;
ba395927
KA
1624 return -ENOMEM;
1625 }
1626
1627 /*
c0e8a6c8
JR
1628 * If Caching mode is set, then invalid translations are tagged
1629 * with domain-id 0, hence we need to pre-allocate it. We also
1630 * use domain-id 0 as a marker for non-allocated domain-id, so
1631 * make sure it is not used for a real domain.
ba395927 1632 */
c0e8a6c8
JR
1633 set_bit(0, iommu->domain_ids);
1634
3b33d4ab
LB
1635 /*
1636 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1637 * entry for first-level or pass-through translation modes should
1638 * be programmed with a domain id different from those used for
1639 * second-level or nested translation. We reserve a domain id for
1640 * this purpose.
1641 */
1642 if (sm_supported(iommu))
1643 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1644
ba395927
KA
1645 return 0;
1646}
ba395927 1647
ffebeb46 1648static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1649{
29a27719 1650 struct device_domain_info *info, *tmp;
55d94043 1651 unsigned long flags;
ba395927 1652
29a27719
JR
1653 if (!iommu->domains || !iommu->domain_ids)
1654 return;
a4eaa86c 1655
55d94043 1656 spin_lock_irqsave(&device_domain_lock, flags);
29a27719 1657 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
29a27719
JR
1658 if (info->iommu != iommu)
1659 continue;
1660
1661 if (!info->dev || !info->domain)
1662 continue;
1663
bea64033 1664 __dmar_remove_one_dev_info(info);
ba395927 1665 }
55d94043 1666 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1667
1668 if (iommu->gcmd & DMA_GCMD_TE)
1669 iommu_disable_translation(iommu);
ffebeb46 1670}
ba395927 1671
ffebeb46
JL
1672static void free_dmar_iommu(struct intel_iommu *iommu)
1673{
1674 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1675 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1676 int i;
1677
1678 for (i = 0; i < elems; i++)
1679 kfree(iommu->domains[i]);
ffebeb46
JL
1680 kfree(iommu->domains);
1681 kfree(iommu->domain_ids);
1682 iommu->domains = NULL;
1683 iommu->domain_ids = NULL;
1684 }
ba395927 1685
d9630fe9
WH
1686 g_iommus[iommu->seq_id] = NULL;
1687
ba395927
KA
1688 /* free context mapping */
1689 free_context_table(iommu);
8a94ade4
DW
1690
1691#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1692 if (pasid_supported(iommu)) {
a222a7f0
DW
1693 if (ecap_prs(iommu->ecap))
1694 intel_svm_finish_prq(iommu);
a222a7f0 1695 }
8a94ade4 1696#endif
ba395927
KA
1697}
1698
ab8dfe25 1699static struct dmar_domain *alloc_domain(int flags)
ba395927 1700{
ba395927 1701 struct dmar_domain *domain;
ba395927
KA
1702
1703 domain = alloc_domain_mem();
1704 if (!domain)
1705 return NULL;
1706
ab8dfe25 1707 memset(domain, 0, sizeof(*domain));
98fa15f3 1708 domain->nid = NUMA_NO_NODE;
ab8dfe25 1709 domain->flags = flags;
0824c592 1710 domain->has_iotlb_device = false;
92d03cc8 1711 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1712
1713 return domain;
1714}
1715
d160aca5
JR
1716/* Must be called with iommu->lock */
1717static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1718 struct intel_iommu *iommu)
1719{
44bde614 1720 unsigned long ndomains;
55d94043 1721 int num;
44bde614 1722
55d94043 1723 assert_spin_locked(&device_domain_lock);
d160aca5 1724 assert_spin_locked(&iommu->lock);
ba395927 1725
29a27719
JR
1726 domain->iommu_refcnt[iommu->seq_id] += 1;
1727 domain->iommu_count += 1;
1728 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1729 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1730 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1731
1732 if (num >= ndomains) {
1733 pr_err("%s: No free domain ids\n", iommu->name);
1734 domain->iommu_refcnt[iommu->seq_id] -= 1;
1735 domain->iommu_count -= 1;
55d94043 1736 return -ENOSPC;
2c2e2c38 1737 }
ba395927 1738
d160aca5
JR
1739 set_bit(num, iommu->domain_ids);
1740 set_iommu_domain(iommu, num, domain);
1741
1742 domain->iommu_did[iommu->seq_id] = num;
1743 domain->nid = iommu->node;
fb170fb4 1744
fb170fb4
JL
1745 domain_update_iommu_cap(domain);
1746 }
d160aca5 1747
55d94043 1748 return 0;
fb170fb4
JL
1749}
1750
1751static int domain_detach_iommu(struct dmar_domain *domain,
1752 struct intel_iommu *iommu)
1753{
e083ea5b 1754 int num, count;
d160aca5 1755
55d94043 1756 assert_spin_locked(&device_domain_lock);
d160aca5 1757 assert_spin_locked(&iommu->lock);
fb170fb4 1758
29a27719
JR
1759 domain->iommu_refcnt[iommu->seq_id] -= 1;
1760 count = --domain->iommu_count;
1761 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1762 num = domain->iommu_did[iommu->seq_id];
1763 clear_bit(num, iommu->domain_ids);
1764 set_iommu_domain(iommu, num, NULL);
fb170fb4 1765
fb170fb4 1766 domain_update_iommu_cap(domain);
c0e8a6c8 1767 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1768 }
fb170fb4
JL
1769
1770 return count;
1771}
1772
ba395927 1773static struct iova_domain reserved_iova_list;
8a443df4 1774static struct lock_class_key reserved_rbtree_key;
ba395927 1775
51a63e67 1776static int dmar_init_reserved_ranges(void)
ba395927
KA
1777{
1778 struct pci_dev *pdev = NULL;
1779 struct iova *iova;
1780 int i;
ba395927 1781
aa3ac946 1782 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1783
8a443df4
MG
1784 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1785 &reserved_rbtree_key);
1786
ba395927
KA
1787 /* IOAPIC ranges shouldn't be accessed by DMA */
1788 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1789 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1790 if (!iova) {
9f10e5bf 1791 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1792 return -ENODEV;
1793 }
ba395927
KA
1794
1795 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1796 for_each_pci_dev(pdev) {
1797 struct resource *r;
1798
1799 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1800 r = &pdev->resource[i];
1801 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1802 continue;
1a4a4551
DW
1803 iova = reserve_iova(&reserved_iova_list,
1804 IOVA_PFN(r->start),
1805 IOVA_PFN(r->end));
51a63e67 1806 if (!iova) {
932a6523 1807 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1808 return -ENODEV;
1809 }
ba395927
KA
1810 }
1811 }
51a63e67 1812 return 0;
ba395927
KA
1813}
1814
1815static void domain_reserve_special_ranges(struct dmar_domain *domain)
1816{
1817 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1818}
1819
1820static inline int guestwidth_to_adjustwidth(int gaw)
1821{
1822 int agaw;
1823 int r = (gaw - 12) % 9;
1824
1825 if (r == 0)
1826 agaw = gaw;
1827 else
1828 agaw = gaw + 9 - r;
1829 if (agaw > 64)
1830 agaw = 64;
1831 return agaw;
1832}
1833
301e7ee1
JR
1834static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1835 int guest_width)
1836{
1837 int adjust_width, agaw;
1838 unsigned long sagaw;
1839 int err;
1840
1841 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1842
1843 err = init_iova_flush_queue(&domain->iovad,
1844 iommu_flush_iova, iova_entry_free);
1845 if (err)
1846 return err;
1847
1848 domain_reserve_special_ranges(domain);
1849
1850 /* calculate AGAW */
1851 if (guest_width > cap_mgaw(iommu->cap))
1852 guest_width = cap_mgaw(iommu->cap);
1853 domain->gaw = guest_width;
1854 adjust_width = guestwidth_to_adjustwidth(guest_width);
1855 agaw = width_to_agaw(adjust_width);
1856 sagaw = cap_sagaw(iommu->cap);
1857 if (!test_bit(agaw, &sagaw)) {
1858 /* hardware doesn't support it, choose a bigger one */
1859 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1860 agaw = find_next_bit(&sagaw, 5, agaw);
1861 if (agaw >= 5)
1862 return -ENODEV;
1863 }
1864 domain->agaw = agaw;
1865
1866 if (ecap_coherent(iommu->ecap))
1867 domain->iommu_coherency = 1;
1868 else
1869 domain->iommu_coherency = 0;
1870
1871 if (ecap_sc_support(iommu->ecap))
1872 domain->iommu_snooping = 1;
1873 else
1874 domain->iommu_snooping = 0;
1875
1876 if (intel_iommu_superpage)
1877 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1878 else
1879 domain->iommu_superpage = 0;
1880
1881 domain->nid = iommu->node;
1882
1883 /* always allocate the top pgd */
1884 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1885 if (!domain->pgd)
1886 return -ENOMEM;
1887 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1888 return 0;
1889}
1890
ba395927
KA
1891static void domain_exit(struct dmar_domain *domain)
1892{
ba395927 1893
d160aca5 1894 /* Remove associated devices and clear attached or cached domains */
ba395927 1895 domain_remove_dev_info(domain);
92d03cc8 1896
ba395927
KA
1897 /* destroy iovas */
1898 put_iova_domain(&domain->iovad);
ba395927 1899
3ee9eca7
DS
1900 if (domain->pgd) {
1901 struct page *freelist;
ba395927 1902
3ee9eca7
DS
1903 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1904 dma_free_pagelist(freelist);
1905 }
ea8ea460 1906
ba395927
KA
1907 free_domain_mem(domain);
1908}
1909
7373a8cc
LB
1910/*
1911 * Get the PASID directory size for scalable mode context entry.
1912 * Value of X in the PDTS field of a scalable mode context entry
1913 * indicates PASID directory with 2^(X + 7) entries.
1914 */
1915static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1916{
1917 int pds, max_pde;
1918
1919 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1920 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1921 if (pds < 7)
1922 return 0;
1923
1924 return pds - 7;
1925}
1926
1927/*
1928 * Set the RID_PASID field of a scalable mode context entry. The
1929 * IOMMU hardware will use the PASID value set in this field for
1930 * DMA translations of DMA requests without PASID.
1931 */
1932static inline void
1933context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1934{
1935 context->hi |= pasid & ((1 << 20) - 1);
1936 context->hi |= (1 << 20);
1937}
1938
1939/*
1940 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1941 * entry.
1942 */
1943static inline void context_set_sm_dte(struct context_entry *context)
1944{
1945 context->lo |= (1 << 2);
1946}
1947
1948/*
1949 * Set the PRE(Page Request Enable) field of a scalable mode context
1950 * entry.
1951 */
1952static inline void context_set_sm_pre(struct context_entry *context)
1953{
1954 context->lo |= (1 << 4);
1955}
1956
1957/* Convert value to context PASID directory size field coding. */
1958#define context_pdts(pds) (((pds) & 0x7) << 9)
1959
64ae892b
DW
1960static int domain_context_mapping_one(struct dmar_domain *domain,
1961 struct intel_iommu *iommu,
ca6e322d 1962 struct pasid_table *table,
28ccce0d 1963 u8 bus, u8 devfn)
ba395927 1964{
c6c2cebd 1965 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1966 int translation = CONTEXT_TT_MULTI_LEVEL;
1967 struct device_domain_info *info = NULL;
ba395927 1968 struct context_entry *context;
ba395927 1969 unsigned long flags;
7373a8cc 1970 int ret;
28ccce0d 1971
c6c2cebd
JR
1972 WARN_ON(did == 0);
1973
28ccce0d
JR
1974 if (hw_pass_through && domain_type_is_si(domain))
1975 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1976
1977 pr_debug("Set context mapping for %02x:%02x.%d\n",
1978 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1979
ba395927 1980 BUG_ON(!domain->pgd);
5331fe6f 1981
55d94043
JR
1982 spin_lock_irqsave(&device_domain_lock, flags);
1983 spin_lock(&iommu->lock);
1984
1985 ret = -ENOMEM;
03ecc32c 1986 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1987 if (!context)
55d94043 1988 goto out_unlock;
ba395927 1989
55d94043
JR
1990 ret = 0;
1991 if (context_present(context))
1992 goto out_unlock;
cf484d0e 1993
aec0e861
XP
1994 /*
1995 * For kdump cases, old valid entries may be cached due to the
1996 * in-flight DMA and copied pgtable, but there is no unmapping
1997 * behaviour for them, thus we need an explicit cache flush for
1998 * the newly-mapped device. For kdump, at this point, the device
1999 * is supposed to finish reset at its driver probe stage, so no
2000 * in-flight DMA will exist, and we don't need to worry anymore
2001 * hereafter.
2002 */
2003 if (context_copied(context)) {
2004 u16 did_old = context_domain_id(context);
2005
b117e038 2006 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2007 iommu->flush.flush_context(iommu, did_old,
2008 (((u16)bus) << 8) | devfn,
2009 DMA_CCMD_MASK_NOBIT,
2010 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2011 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2012 DMA_TLB_DSI_FLUSH);
2013 }
aec0e861
XP
2014 }
2015
de24e553 2016 context_clear_entry(context);
ea6606b0 2017
7373a8cc
LB
2018 if (sm_supported(iommu)) {
2019 unsigned long pds;
4ed0d3e6 2020
7373a8cc
LB
2021 WARN_ON(!table);
2022
2023 /* Setup the PASID DIR pointer: */
2024 pds = context_get_sm_pds(table);
2025 context->lo = (u64)virt_to_phys(table->table) |
2026 context_pdts(pds);
2027
2028 /* Setup the RID_PASID field: */
2029 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2030
de24e553 2031 /*
7373a8cc
LB
2032 * Setup the Device-TLB enable bit and Page request
2033 * Enable bit:
de24e553 2034 */
7373a8cc
LB
2035 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2036 if (info && info->ats_supported)
2037 context_set_sm_dte(context);
2038 if (info && info->pri_supported)
2039 context_set_sm_pre(context);
2040 } else {
2041 struct dma_pte *pgd = domain->pgd;
2042 int agaw;
2043
2044 context_set_domain_id(context, did);
7373a8cc
LB
2045
2046 if (translation != CONTEXT_TT_PASS_THROUGH) {
2047 /*
2048 * Skip top levels of page tables for iommu which has
2049 * less agaw than default. Unnecessary for PT mode.
2050 */
2051 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2052 ret = -ENOMEM;
2053 pgd = phys_to_virt(dma_pte_addr(pgd));
2054 if (!dma_pte_present(pgd))
2055 goto out_unlock;
2056 }
2057
2058 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2059 if (info && info->ats_supported)
2060 translation = CONTEXT_TT_DEV_IOTLB;
2061 else
2062 translation = CONTEXT_TT_MULTI_LEVEL;
2063
2064 context_set_address_root(context, virt_to_phys(pgd));
2065 context_set_address_width(context, agaw);
2066 } else {
2067 /*
2068 * In pass through mode, AW must be programmed to
2069 * indicate the largest AGAW value supported by
2070 * hardware. And ASR is ignored by hardware.
2071 */
2072 context_set_address_width(context, iommu->msagaw);
2073 }
41b80db2
LB
2074
2075 context_set_translation_type(context, translation);
93a23a72 2076 }
4ed0d3e6 2077
c07e7d21
MM
2078 context_set_fault_enable(context);
2079 context_set_present(context);
5331fe6f 2080 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2081
4c25a2c1
DW
2082 /*
2083 * It's a non-present to present mapping. If hardware doesn't cache
2084 * non-present entry we only need to flush the write-buffer. If the
2085 * _does_ cache non-present entries, then it does so in the special
2086 * domain #0, which we have to flush:
2087 */
2088 if (cap_caching_mode(iommu->cap)) {
2089 iommu->flush.flush_context(iommu, 0,
2090 (((u16)bus) << 8) | devfn,
2091 DMA_CCMD_MASK_NOBIT,
2092 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2093 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2094 } else {
ba395927 2095 iommu_flush_write_buffer(iommu);
4c25a2c1 2096 }
93a23a72 2097 iommu_enable_dev_iotlb(info);
c7151a8d 2098
55d94043
JR
2099 ret = 0;
2100
2101out_unlock:
2102 spin_unlock(&iommu->lock);
2103 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2104
5c365d18 2105 return ret;
ba395927
KA
2106}
2107
2108static int
28ccce0d 2109domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2110{
ca6e322d 2111 struct pasid_table *table;
64ae892b 2112 struct intel_iommu *iommu;
156baca8 2113 u8 bus, devfn;
64ae892b 2114
e1f167f3 2115 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2116 if (!iommu)
2117 return -ENODEV;
ba395927 2118
ca6e322d 2119 table = intel_pasid_get_table(dev);
55752949 2120 return domain_context_mapping_one(domain, iommu, table, bus, devfn);
579305f7
AW
2121}
2122
2123static int domain_context_mapped_cb(struct pci_dev *pdev,
2124 u16 alias, void *opaque)
2125{
2126 struct intel_iommu *iommu = opaque;
2127
2128 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2129}
2130
e1f167f3 2131static int domain_context_mapped(struct device *dev)
ba395927 2132{
5331fe6f 2133 struct intel_iommu *iommu;
156baca8 2134 u8 bus, devfn;
5331fe6f 2135
e1f167f3 2136 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2137 if (!iommu)
2138 return -ENODEV;
ba395927 2139
579305f7
AW
2140 if (!dev_is_pci(dev))
2141 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2142
579305f7
AW
2143 return !pci_for_each_dma_alias(to_pci_dev(dev),
2144 domain_context_mapped_cb, iommu);
ba395927
KA
2145}
2146
f532959b
FY
2147/* Returns a number of VTD pages, but aligned to MM page size */
2148static inline unsigned long aligned_nrpages(unsigned long host_addr,
2149 size_t size)
2150{
2151 host_addr &= ~PAGE_MASK;
2152 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2153}
2154
6dd9a7c7
YS
2155/* Return largest possible superpage level for a given mapping */
2156static inline int hardware_largepage_caps(struct dmar_domain *domain,
2157 unsigned long iov_pfn,
2158 unsigned long phy_pfn,
2159 unsigned long pages)
2160{
2161 int support, level = 1;
2162 unsigned long pfnmerge;
2163
2164 support = domain->iommu_superpage;
2165
2166 /* To use a large page, the virtual *and* physical addresses
2167 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2168 of them will mean we have to use smaller pages. So just
2169 merge them and check both at once. */
2170 pfnmerge = iov_pfn | phy_pfn;
2171
2172 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2173 pages >>= VTD_STRIDE_SHIFT;
2174 if (!pages)
2175 break;
2176 pfnmerge >>= VTD_STRIDE_SHIFT;
2177 level++;
2178 support--;
2179 }
2180 return level;
2181}
2182
9051aa02
DW
2183static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2184 struct scatterlist *sg, unsigned long phys_pfn,
2185 unsigned long nr_pages, int prot)
e1605495
DW
2186{
2187 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2188 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2189 unsigned long sg_res = 0;
6dd9a7c7
YS
2190 unsigned int largepage_lvl = 0;
2191 unsigned long lvl_pages = 0;
e1605495 2192
162d1b10 2193 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2194
2195 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2196 return -EINVAL;
2197
2198 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2199
cc4f14aa
JL
2200 if (!sg) {
2201 sg_res = nr_pages;
9051aa02
DW
2202 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2203 }
2204
6dd9a7c7 2205 while (nr_pages > 0) {
c85994e4
DW
2206 uint64_t tmp;
2207
e1605495 2208 if (!sg_res) {
29a90b70
RM
2209 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2210
f532959b 2211 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2212 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2213 sg->dma_length = sg->length;
29a90b70 2214 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2215 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2216 }
6dd9a7c7 2217
e1605495 2218 if (!pte) {
6dd9a7c7
YS
2219 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2220
5cf0a76f 2221 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2222 if (!pte)
2223 return -ENOMEM;
6dd9a7c7 2224 /* It is large page*/
6491d4d0 2225 if (largepage_lvl > 1) {
ba2374fd
CZ
2226 unsigned long nr_superpages, end_pfn;
2227
6dd9a7c7 2228 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2229 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2230
2231 nr_superpages = sg_res / lvl_pages;
2232 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2233
d41a4adb
JL
2234 /*
2235 * Ensure that old small page tables are
ba2374fd 2236 * removed to make room for superpage(s).
bc24c571
DD
2237 * We're adding new large pages, so make sure
2238 * we don't remove their parent tables.
d41a4adb 2239 */
bc24c571
DD
2240 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2241 largepage_lvl + 1);
6491d4d0 2242 } else {
6dd9a7c7 2243 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2244 }
6dd9a7c7 2245
e1605495
DW
2246 }
2247 /* We don't need lock here, nobody else
2248 * touches the iova range
2249 */
7766a3fb 2250 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2251 if (tmp) {
1bf20f0d 2252 static int dumps = 5;
9f10e5bf
JR
2253 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2254 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2255 if (dumps) {
2256 dumps--;
2257 debug_dma_dump_mappings(NULL);
2258 }
2259 WARN_ON(1);
2260 }
6dd9a7c7
YS
2261
2262 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2263
2264 BUG_ON(nr_pages < lvl_pages);
2265 BUG_ON(sg_res < lvl_pages);
2266
2267 nr_pages -= lvl_pages;
2268 iov_pfn += lvl_pages;
2269 phys_pfn += lvl_pages;
2270 pteval += lvl_pages * VTD_PAGE_SIZE;
2271 sg_res -= lvl_pages;
2272
2273 /* If the next PTE would be the first in a new page, then we
2274 need to flush the cache on the entries we've just written.
2275 And then we'll need to recalculate 'pte', so clear it and
2276 let it get set again in the if (!pte) block above.
2277
2278 If we're done (!nr_pages) we need to flush the cache too.
2279
2280 Also if we've been setting superpages, we may need to
2281 recalculate 'pte' and switch back to smaller pages for the
2282 end of the mapping, if the trailing size is not enough to
2283 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2284 pte++;
6dd9a7c7
YS
2285 if (!nr_pages || first_pte_in_page(pte) ||
2286 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2287 domain_flush_cache(domain, first_pte,
2288 (void *)pte - (void *)first_pte);
2289 pte = NULL;
2290 }
6dd9a7c7
YS
2291
2292 if (!sg_res && nr_pages)
e1605495
DW
2293 sg = sg_next(sg);
2294 }
2295 return 0;
2296}
2297
87684fd9 2298static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2299 struct scatterlist *sg, unsigned long phys_pfn,
2300 unsigned long nr_pages, int prot)
2301{
fa954e68 2302 int iommu_id, ret;
095303e0
LB
2303 struct intel_iommu *iommu;
2304
2305 /* Do the real mapping first */
2306 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2307 if (ret)
2308 return ret;
2309
fa954e68
LB
2310 for_each_domain_iommu(iommu_id, domain) {
2311 iommu = g_iommus[iommu_id];
095303e0
LB
2312 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2313 }
2314
2315 return 0;
87684fd9
PX
2316}
2317
9051aa02
DW
2318static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2319 struct scatterlist *sg, unsigned long nr_pages,
2320 int prot)
ba395927 2321{
87684fd9 2322 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2323}
6f6a00e4 2324
9051aa02
DW
2325static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2326 unsigned long phys_pfn, unsigned long nr_pages,
2327 int prot)
2328{
87684fd9 2329 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2330}
2331
2452d9db 2332static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2333{
5082219b
FS
2334 unsigned long flags;
2335 struct context_entry *context;
2336 u16 did_old;
2337
c7151a8d
WH
2338 if (!iommu)
2339 return;
8c11e798 2340
5082219b
FS
2341 spin_lock_irqsave(&iommu->lock, flags);
2342 context = iommu_context_addr(iommu, bus, devfn, 0);
2343 if (!context) {
2344 spin_unlock_irqrestore(&iommu->lock, flags);
2345 return;
2346 }
2347 did_old = context_domain_id(context);
2348 context_clear_entry(context);
2349 __iommu_flush_cache(iommu, context, sizeof(*context));
2350 spin_unlock_irqrestore(&iommu->lock, flags);
2351 iommu->flush.flush_context(iommu,
2352 did_old,
2353 (((u16)bus) << 8) | devfn,
2354 DMA_CCMD_MASK_NOBIT,
2355 DMA_CCMD_DEVICE_INVL);
2356 iommu->flush.flush_iotlb(iommu,
2357 did_old,
2358 0,
2359 0,
2360 DMA_TLB_DSI_FLUSH);
ba395927
KA
2361}
2362
109b9b04
DW
2363static inline void unlink_domain_info(struct device_domain_info *info)
2364{
2365 assert_spin_locked(&device_domain_lock);
2366 list_del(&info->link);
2367 list_del(&info->global);
2368 if (info->dev)
0bcb3e28 2369 info->dev->archdata.iommu = NULL;
109b9b04
DW
2370}
2371
ba395927
KA
2372static void domain_remove_dev_info(struct dmar_domain *domain)
2373{
3a74ca01 2374 struct device_domain_info *info, *tmp;
fb170fb4 2375 unsigned long flags;
ba395927
KA
2376
2377 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2378 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2379 __dmar_remove_one_dev_info(info);
ba395927
KA
2380 spin_unlock_irqrestore(&device_domain_lock, flags);
2381}
2382
2383/*
2384 * find_domain
1525a29a 2385 * Note: we use struct device->archdata.iommu stores the info
ba395927 2386 */
1525a29a 2387static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2388{
2389 struct device_domain_info *info;
2390
8af46c78
LB
2391 if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2392 struct iommu_domain *domain;
2393
2394 dev->archdata.iommu = NULL;
2395 domain = iommu_get_domain_for_dev(dev);
2396 if (domain)
2397 intel_iommu_attach_device(domain, dev);
2398 }
2399
ba395927 2400 /* No lock here, assumes no domain exit in normal case */
1525a29a 2401 info = dev->archdata.iommu;
8af46c78 2402
b316d02a 2403 if (likely(info))
ba395927
KA
2404 return info->domain;
2405 return NULL;
2406}
2407
5a8f40e8 2408static inline struct device_domain_info *
745f2586
JL
2409dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2410{
2411 struct device_domain_info *info;
2412
2413 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2414 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2415 info->devfn == devfn)
5a8f40e8 2416 return info;
745f2586
JL
2417
2418 return NULL;
2419}
2420
5db31569
JR
2421static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2422 int bus, int devfn,
2423 struct device *dev,
2424 struct dmar_domain *domain)
745f2586 2425{
5a8f40e8 2426 struct dmar_domain *found = NULL;
745f2586
JL
2427 struct device_domain_info *info;
2428 unsigned long flags;
d160aca5 2429 int ret;
745f2586
JL
2430
2431 info = alloc_devinfo_mem();
2432 if (!info)
b718cd3d 2433 return NULL;
745f2586 2434
745f2586
JL
2435 info->bus = bus;
2436 info->devfn = devfn;
b16d0cb9
DW
2437 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2438 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2439 info->ats_qdep = 0;
745f2586
JL
2440 info->dev = dev;
2441 info->domain = domain;
5a8f40e8 2442 info->iommu = iommu;
cc580e41 2443 info->pasid_table = NULL;
95587a75 2444 info->auxd_enabled = 0;
67b8e02b 2445 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2446
b16d0cb9
DW
2447 if (dev && dev_is_pci(dev)) {
2448 struct pci_dev *pdev = to_pci_dev(info->dev);
2449
d8b85910
LB
2450 if (!pdev->untrusted &&
2451 !pci_ats_disabled() &&
cef74409 2452 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2453 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2454 dmar_find_matched_atsr_unit(pdev))
2455 info->ats_supported = 1;
2456
765b6a98
LB
2457 if (sm_supported(iommu)) {
2458 if (pasid_supported(iommu)) {
b16d0cb9
DW
2459 int features = pci_pasid_features(pdev);
2460 if (features >= 0)
2461 info->pasid_supported = features | 1;
2462 }
2463
2464 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2465 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2466 info->pri_supported = 1;
2467 }
2468 }
2469
745f2586
JL
2470 spin_lock_irqsave(&device_domain_lock, flags);
2471 if (dev)
0bcb3e28 2472 found = find_domain(dev);
f303e507
JR
2473
2474 if (!found) {
5a8f40e8 2475 struct device_domain_info *info2;
41e80dca 2476 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2477 if (info2) {
2478 found = info2->domain;
2479 info2->dev = dev;
2480 }
5a8f40e8 2481 }
f303e507 2482
745f2586
JL
2483 if (found) {
2484 spin_unlock_irqrestore(&device_domain_lock, flags);
2485 free_devinfo_mem(info);
b718cd3d
DW
2486 /* Caller must free the original domain */
2487 return found;
745f2586
JL
2488 }
2489
d160aca5
JR
2490 spin_lock(&iommu->lock);
2491 ret = domain_attach_iommu(domain, iommu);
2492 spin_unlock(&iommu->lock);
2493
2494 if (ret) {
c6c2cebd 2495 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2496 free_devinfo_mem(info);
c6c2cebd
JR
2497 return NULL;
2498 }
c6c2cebd 2499
b718cd3d
DW
2500 list_add(&info->link, &domain->devices);
2501 list_add(&info->global, &device_domain_list);
2502 if (dev)
2503 dev->archdata.iommu = info;
0bbeb01a 2504 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2505
0bbeb01a
LB
2506 /* PASID table is mandatory for a PCI device in scalable mode. */
2507 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2508 ret = intel_pasid_alloc_table(dev);
2509 if (ret) {
932a6523 2510 dev_err(dev, "PASID table allocation failed\n");
71753239 2511 dmar_remove_one_dev_info(dev);
0bbeb01a 2512 return NULL;
a7fc93fe 2513 }
ef848b7e
LB
2514
2515 /* Setup the PASID entry for requests without PASID: */
2516 spin_lock(&iommu->lock);
2517 if (hw_pass_through && domain_type_is_si(domain))
2518 ret = intel_pasid_setup_pass_through(iommu, domain,
2519 dev, PASID_RID2PASID);
2520 else
2521 ret = intel_pasid_setup_second_level(iommu, domain,
2522 dev, PASID_RID2PASID);
2523 spin_unlock(&iommu->lock);
2524 if (ret) {
932a6523 2525 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2526 dmar_remove_one_dev_info(dev);
ef848b7e 2527 return NULL;
a7fc93fe
LB
2528 }
2529 }
b718cd3d 2530
cc4e2575 2531 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2532 dev_err(dev, "Domain context map failed\n");
71753239 2533 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2534 return NULL;
2535 }
2536
b718cd3d 2537 return domain;
745f2586
JL
2538}
2539
579305f7
AW
2540static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2541{
2542 *(u16 *)opaque = alias;
2543 return 0;
2544}
2545
76208356 2546static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2547{
e083ea5b 2548 struct device_domain_info *info;
76208356 2549 struct dmar_domain *domain = NULL;
579305f7 2550 struct intel_iommu *iommu;
fcc35c63 2551 u16 dma_alias;
ba395927 2552 unsigned long flags;
aa4d066a 2553 u8 bus, devfn;
ba395927 2554
579305f7
AW
2555 iommu = device_to_iommu(dev, &bus, &devfn);
2556 if (!iommu)
2557 return NULL;
2558
146922ec
DW
2559 if (dev_is_pci(dev)) {
2560 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2561
579305f7
AW
2562 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2563
2564 spin_lock_irqsave(&device_domain_lock, flags);
2565 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2566 PCI_BUS_NUM(dma_alias),
2567 dma_alias & 0xff);
2568 if (info) {
2569 iommu = info->iommu;
2570 domain = info->domain;
5a8f40e8 2571 }
579305f7 2572 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2573
76208356 2574 /* DMA alias already has a domain, use it */
579305f7 2575 if (info)
76208356 2576 goto out;
579305f7 2577 }
ba395927 2578
146922ec 2579 /* Allocate and initialize new domain for the device */
ab8dfe25 2580 domain = alloc_domain(0);
745f2586 2581 if (!domain)
579305f7 2582 return NULL;
301e7ee1 2583 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2584 domain_exit(domain);
2585 return NULL;
2c2e2c38 2586 }
ba395927 2587
76208356 2588out:
76208356
JR
2589 return domain;
2590}
579305f7 2591
76208356
JR
2592static struct dmar_domain *set_domain_for_dev(struct device *dev,
2593 struct dmar_domain *domain)
2594{
2595 struct intel_iommu *iommu;
2596 struct dmar_domain *tmp;
2597 u16 req_id, dma_alias;
2598 u8 bus, devfn;
2599
2600 iommu = device_to_iommu(dev, &bus, &devfn);
2601 if (!iommu)
2602 return NULL;
2603
2604 req_id = ((u16)bus << 8) | devfn;
2605
2606 if (dev_is_pci(dev)) {
2607 struct pci_dev *pdev = to_pci_dev(dev);
2608
2609 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2610
2611 /* register PCI DMA alias device */
2612 if (req_id != dma_alias) {
2613 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2614 dma_alias & 0xff, NULL, domain);
2615
2616 if (!tmp || tmp != domain)
2617 return tmp;
2618 }
ba395927
KA
2619 }
2620
5db31569 2621 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2622 if (!tmp || tmp != domain)
2623 return tmp;
2624
2625 return domain;
2626}
579305f7 2627
b213203e
DW
2628static int iommu_domain_identity_map(struct dmar_domain *domain,
2629 unsigned long long start,
2630 unsigned long long end)
ba395927 2631{
c5395d5c
DW
2632 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2633 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2634
2635 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2636 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2637 pr_err("Reserving iova failed\n");
b213203e 2638 return -ENOMEM;
ba395927
KA
2639 }
2640
af1089ce 2641 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2642 /*
2643 * RMRR range might have overlap with physical memory range,
2644 * clear it first
2645 */
c5395d5c 2646 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2647
87684fd9
PX
2648 return __domain_mapping(domain, first_vpfn, NULL,
2649 first_vpfn, last_vpfn - first_vpfn + 1,
2650 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2651}
2652
d66ce54b
JR
2653static int domain_prepare_identity_map(struct device *dev,
2654 struct dmar_domain *domain,
2655 unsigned long long start,
2656 unsigned long long end)
b213203e 2657{
19943b0e
DW
2658 /* For _hardware_ passthrough, don't bother. But for software
2659 passthrough, we do it anyway -- it may indicate a memory
2660 range which is reserved in E820, so which didn't get set
2661 up to start with in si_domain */
2662 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2663 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2664 start, end);
19943b0e
DW
2665 return 0;
2666 }
2667
932a6523 2668 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2669
5595b528
DW
2670 if (end < start) {
2671 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2672 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2673 dmi_get_system_info(DMI_BIOS_VENDOR),
2674 dmi_get_system_info(DMI_BIOS_VERSION),
2675 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2676 return -EIO;
5595b528
DW
2677 }
2678
2ff729f5
DW
2679 if (end >> agaw_to_width(domain->agaw)) {
2680 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2681 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2682 agaw_to_width(domain->agaw),
2683 dmi_get_system_info(DMI_BIOS_VENDOR),
2684 dmi_get_system_info(DMI_BIOS_VERSION),
2685 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2686 return -EIO;
2ff729f5 2687 }
19943b0e 2688
d66ce54b
JR
2689 return iommu_domain_identity_map(domain, start, end);
2690}
ba395927 2691
301e7ee1
JR
2692static int md_domain_init(struct dmar_domain *domain, int guest_width);
2693
071e1374 2694static int __init si_domain_init(int hw)
2c2e2c38 2695{
4de354ec
LB
2696 struct dmar_rmrr_unit *rmrr;
2697 struct device *dev;
2698 int i, nid, ret;
2c2e2c38 2699
ab8dfe25 2700 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2701 if (!si_domain)
2702 return -EFAULT;
2703
301e7ee1 2704 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2c2e2c38
FY
2705 domain_exit(si_domain);
2706 return -EFAULT;
2707 }
2708
19943b0e
DW
2709 if (hw)
2710 return 0;
2711
c7ab48d2 2712 for_each_online_node(nid) {
5dfe8660
TH
2713 unsigned long start_pfn, end_pfn;
2714 int i;
2715
2716 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2717 ret = iommu_domain_identity_map(si_domain,
2718 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2719 if (ret)
2720 return ret;
2721 }
c7ab48d2
DW
2722 }
2723
4de354ec
LB
2724 /*
2725 * Normally we use DMA domains for devices which have RMRRs. But we
2726 * loose this requirement for graphic and usb devices. Identity map
2727 * the RMRRs for graphic and USB devices so that they could use the
2728 * si_domain.
2729 */
2730 for_each_rmrr_units(rmrr) {
2731 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2732 i, dev) {
2733 unsigned long long start = rmrr->base_address;
2734 unsigned long long end = rmrr->end_address;
2735
2736 if (device_is_rmrr_locked(dev))
2737 continue;
2738
2739 if (WARN_ON(end < start ||
2740 end >> agaw_to_width(si_domain->agaw)))
2741 continue;
2742
2743 ret = iommu_domain_identity_map(si_domain, start, end);
2744 if (ret)
2745 return ret;
2746 }
2747 }
2748
2c2e2c38
FY
2749 return 0;
2750}
2751
9b226624 2752static int identity_mapping(struct device *dev)
2c2e2c38
FY
2753{
2754 struct device_domain_info *info;
2755
9b226624 2756 info = dev->archdata.iommu;
cb452a40
MT
2757 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2758 return (info->domain == si_domain);
2c2e2c38 2759
2c2e2c38
FY
2760 return 0;
2761}
2762
28ccce0d 2763static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2764{
0ac72664 2765 struct dmar_domain *ndomain;
5a8f40e8 2766 struct intel_iommu *iommu;
156baca8 2767 u8 bus, devfn;
2c2e2c38 2768
5913c9bf 2769 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2770 if (!iommu)
2771 return -ENODEV;
2772
5db31569 2773 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2774 if (ndomain != domain)
2775 return -EBUSY;
2c2e2c38
FY
2776
2777 return 0;
2778}
2779
0b9d9753 2780static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2781{
2782 struct dmar_rmrr_unit *rmrr;
832bd858 2783 struct device *tmp;
ea2447f7
TM
2784 int i;
2785
0e242612 2786 rcu_read_lock();
ea2447f7 2787 for_each_rmrr_units(rmrr) {
b683b230
JL
2788 /*
2789 * Return TRUE if this RMRR contains the device that
2790 * is passed in.
2791 */
2792 for_each_active_dev_scope(rmrr->devices,
2793 rmrr->devices_cnt, i, tmp)
e143fd45
EA
2794 if (tmp == dev ||
2795 is_downstream_to_pci_bridge(dev, tmp)) {
0e242612 2796 rcu_read_unlock();
ea2447f7 2797 return true;
b683b230 2798 }
ea2447f7 2799 }
0e242612 2800 rcu_read_unlock();
ea2447f7
TM
2801 return false;
2802}
2803
1c5c59fb
EA
2804/**
2805 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2806 * is relaxable (ie. is allowed to be not enforced under some conditions)
2807 * @dev: device handle
2808 *
2809 * We assume that PCI USB devices with RMRRs have them largely
2810 * for historical reasons and that the RMRR space is not actively used post
2811 * boot. This exclusion may change if vendors begin to abuse it.
2812 *
2813 * The same exception is made for graphics devices, with the requirement that
2814 * any use of the RMRR regions will be torn down before assigning the device
2815 * to a guest.
2816 *
2817 * Return: true if the RMRR is relaxable, false otherwise
2818 */
2819static bool device_rmrr_is_relaxable(struct device *dev)
2820{
2821 struct pci_dev *pdev;
2822
2823 if (!dev_is_pci(dev))
2824 return false;
2825
2826 pdev = to_pci_dev(dev);
2827 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2828 return true;
2829 else
2830 return false;
2831}
2832
c875d2c1
AW
2833/*
2834 * There are a couple cases where we need to restrict the functionality of
2835 * devices associated with RMRRs. The first is when evaluating a device for
2836 * identity mapping because problems exist when devices are moved in and out
2837 * of domains and their respective RMRR information is lost. This means that
2838 * a device with associated RMRRs will never be in a "passthrough" domain.
2839 * The second is use of the device through the IOMMU API. This interface
2840 * expects to have full control of the IOVA space for the device. We cannot
2841 * satisfy both the requirement that RMRR access is maintained and have an
2842 * unencumbered IOVA space. We also have no ability to quiesce the device's
2843 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2844 * We therefore prevent devices associated with an RMRR from participating in
2845 * the IOMMU API, which eliminates them from device assignment.
2846 *
1c5c59fb
EA
2847 * In both cases, devices which have relaxable RMRRs are not concerned by this
2848 * restriction. See device_rmrr_is_relaxable comment.
c875d2c1
AW
2849 */
2850static bool device_is_rmrr_locked(struct device *dev)
2851{
2852 if (!device_has_rmrr(dev))
2853 return false;
2854
1c5c59fb
EA
2855 if (device_rmrr_is_relaxable(dev))
2856 return false;
c875d2c1
AW
2857
2858 return true;
2859}
2860
f273a453
LB
2861/*
2862 * Return the required default domain type for a specific device.
2863 *
2864 * @dev: the device in query
2865 * @startup: true if this is during early boot
2866 *
2867 * Returns:
2868 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2869 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2870 * - 0: both identity and dynamic domains work for this device
2871 */
0e31a726 2872static int device_def_domain_type(struct device *dev)
6941af28 2873{
3bdb2591
DW
2874 if (dev_is_pci(dev)) {
2875 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2876
c875d2c1 2877 if (device_is_rmrr_locked(dev))
f273a453 2878 return IOMMU_DOMAIN_DMA;
e0fc7e0b 2879
89a6079d
LB
2880 /*
2881 * Prevent any device marked as untrusted from getting
2882 * placed into the statically identity mapping domain.
2883 */
2884 if (pdev->untrusted)
f273a453 2885 return IOMMU_DOMAIN_DMA;
89a6079d 2886
3bdb2591 2887 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2888 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2889
3bdb2591 2890 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2891 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
2892
2893 /*
2894 * We want to start off with all devices in the 1:1 domain, and
2895 * take them out later if we find they can't access all of memory.
2896 *
2897 * However, we can't do this for PCI devices behind bridges,
2898 * because all PCI devices behind the same bridge will end up
2899 * with the same source-id on their transactions.
2900 *
2901 * Practically speaking, we can't change things around for these
2902 * devices at run-time, because we can't be sure there'll be no
2903 * DMA transactions in flight for any of their siblings.
2904 *
2905 * So PCI devices (unless they're on the root bus) as well as
2906 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2907 * the 1:1 domain, just in _case_ one of their siblings turns out
2908 * not to be able to map all of memory.
2909 */
2910 if (!pci_is_pcie(pdev)) {
2911 if (!pci_is_root_bus(pdev->bus))
f273a453 2912 return IOMMU_DOMAIN_DMA;
3bdb2591 2913 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 2914 return IOMMU_DOMAIN_DMA;
3bdb2591 2915 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 2916 return IOMMU_DOMAIN_DMA;
3bdb2591
DW
2917 } else {
2918 if (device_has_rmrr(dev))
f273a453 2919 return IOMMU_DOMAIN_DMA;
3bdb2591 2920 }
3dfc813d 2921
f273a453
LB
2922 return (iommu_identity_mapping & IDENTMAP_ALL) ?
2923 IOMMU_DOMAIN_IDENTITY : 0;
2924}
2925
ffebeb46
JL
2926static void intel_iommu_init_qi(struct intel_iommu *iommu)
2927{
2928 /*
2929 * Start from the sane iommu hardware state.
2930 * If the queued invalidation is already initialized by us
2931 * (for example, while enabling interrupt-remapping) then
2932 * we got the things already rolling from a sane state.
2933 */
2934 if (!iommu->qi) {
2935 /*
2936 * Clear any previous faults.
2937 */
2938 dmar_fault(-1, iommu);
2939 /*
2940 * Disable queued invalidation if supported and already enabled
2941 * before OS handover.
2942 */
2943 dmar_disable_qi(iommu);
2944 }
2945
2946 if (dmar_enable_qi(iommu)) {
2947 /*
2948 * Queued Invalidate not enabled, use Register Based Invalidate
2949 */
2950 iommu->flush.flush_context = __iommu_flush_context;
2951 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2952 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2953 iommu->name);
2954 } else {
2955 iommu->flush.flush_context = qi_flush_context;
2956 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2957 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2958 }
2959}
2960
091d42e4 2961static int copy_context_table(struct intel_iommu *iommu,
dfddb969 2962 struct root_entry *old_re,
091d42e4
JR
2963 struct context_entry **tbl,
2964 int bus, bool ext)
2965{
dbcd861f 2966 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 2967 struct context_entry *new_ce = NULL, ce;
dfddb969 2968 struct context_entry *old_ce = NULL;
543c8dcf 2969 struct root_entry re;
091d42e4
JR
2970 phys_addr_t old_ce_phys;
2971
2972 tbl_idx = ext ? bus * 2 : bus;
dfddb969 2973 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
2974
2975 for (devfn = 0; devfn < 256; devfn++) {
2976 /* First calculate the correct index */
2977 idx = (ext ? devfn * 2 : devfn) % 256;
2978
2979 if (idx == 0) {
2980 /* First save what we may have and clean up */
2981 if (new_ce) {
2982 tbl[tbl_idx] = new_ce;
2983 __iommu_flush_cache(iommu, new_ce,
2984 VTD_PAGE_SIZE);
2985 pos = 1;
2986 }
2987
2988 if (old_ce)
829383e1 2989 memunmap(old_ce);
091d42e4
JR
2990
2991 ret = 0;
2992 if (devfn < 0x80)
543c8dcf 2993 old_ce_phys = root_entry_lctp(&re);
091d42e4 2994 else
543c8dcf 2995 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
2996
2997 if (!old_ce_phys) {
2998 if (ext && devfn == 0) {
2999 /* No LCTP, try UCTP */
3000 devfn = 0x7f;
3001 continue;
3002 } else {
3003 goto out;
3004 }
3005 }
3006
3007 ret = -ENOMEM;
dfddb969
DW
3008 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3009 MEMREMAP_WB);
091d42e4
JR
3010 if (!old_ce)
3011 goto out;
3012
3013 new_ce = alloc_pgtable_page(iommu->node);
3014 if (!new_ce)
3015 goto out_unmap;
3016
3017 ret = 0;
3018 }
3019
3020 /* Now copy the context entry */
dfddb969 3021 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3022
cf484d0e 3023 if (!__context_present(&ce))
091d42e4
JR
3024 continue;
3025
dbcd861f
JR
3026 did = context_domain_id(&ce);
3027 if (did >= 0 && did < cap_ndoms(iommu->cap))
3028 set_bit(did, iommu->domain_ids);
3029
cf484d0e
JR
3030 /*
3031 * We need a marker for copied context entries. This
3032 * marker needs to work for the old format as well as
3033 * for extended context entries.
3034 *
3035 * Bit 67 of the context entry is used. In the old
3036 * format this bit is available to software, in the
3037 * extended format it is the PGE bit, but PGE is ignored
3038 * by HW if PASIDs are disabled (and thus still
3039 * available).
3040 *
3041 * So disable PASIDs first and then mark the entry
3042 * copied. This means that we don't copy PASID
3043 * translations from the old kernel, but this is fine as
3044 * faults there are not fatal.
3045 */
3046 context_clear_pasid_enable(&ce);
3047 context_set_copied(&ce);
3048
091d42e4
JR
3049 new_ce[idx] = ce;
3050 }
3051
3052 tbl[tbl_idx + pos] = new_ce;
3053
3054 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3055
3056out_unmap:
dfddb969 3057 memunmap(old_ce);
091d42e4
JR
3058
3059out:
3060 return ret;
3061}
3062
3063static int copy_translation_tables(struct intel_iommu *iommu)
3064{
3065 struct context_entry **ctxt_tbls;
dfddb969 3066 struct root_entry *old_rt;
091d42e4
JR
3067 phys_addr_t old_rt_phys;
3068 int ctxt_table_entries;
3069 unsigned long flags;
3070 u64 rtaddr_reg;
3071 int bus, ret;
c3361f2f 3072 bool new_ext, ext;
091d42e4
JR
3073
3074 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3075 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3076 new_ext = !!ecap_ecs(iommu->ecap);
3077
3078 /*
3079 * The RTT bit can only be changed when translation is disabled,
3080 * but disabling translation means to open a window for data
3081 * corruption. So bail out and don't copy anything if we would
3082 * have to change the bit.
3083 */
3084 if (new_ext != ext)
3085 return -EINVAL;
091d42e4
JR
3086
3087 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3088 if (!old_rt_phys)
3089 return -EINVAL;
3090
dfddb969 3091 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3092 if (!old_rt)
3093 return -ENOMEM;
3094
3095 /* This is too big for the stack - allocate it from slab */
3096 ctxt_table_entries = ext ? 512 : 256;
3097 ret = -ENOMEM;
6396bb22 3098 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3099 if (!ctxt_tbls)
3100 goto out_unmap;
3101
3102 for (bus = 0; bus < 256; bus++) {
3103 ret = copy_context_table(iommu, &old_rt[bus],
3104 ctxt_tbls, bus, ext);
3105 if (ret) {
3106 pr_err("%s: Failed to copy context table for bus %d\n",
3107 iommu->name, bus);
3108 continue;
3109 }
3110 }
3111
3112 spin_lock_irqsave(&iommu->lock, flags);
3113
3114 /* Context tables are copied, now write them to the root_entry table */
3115 for (bus = 0; bus < 256; bus++) {
3116 int idx = ext ? bus * 2 : bus;
3117 u64 val;
3118
3119 if (ctxt_tbls[idx]) {
3120 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3121 iommu->root_entry[bus].lo = val;
3122 }
3123
3124 if (!ext || !ctxt_tbls[idx + 1])
3125 continue;
3126
3127 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3128 iommu->root_entry[bus].hi = val;
3129 }
3130
3131 spin_unlock_irqrestore(&iommu->lock, flags);
3132
3133 kfree(ctxt_tbls);
3134
3135 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3136
3137 ret = 0;
3138
3139out_unmap:
dfddb969 3140 memunmap(old_rt);
091d42e4
JR
3141
3142 return ret;
3143}
3144
b779260b 3145static int __init init_dmars(void)
ba395927
KA
3146{
3147 struct dmar_drhd_unit *drhd;
ba395927 3148 struct intel_iommu *iommu;
df4f3c60 3149 int ret;
2c2e2c38 3150
ba395927
KA
3151 /*
3152 * for each drhd
3153 * allocate root
3154 * initialize and program root entry to not present
3155 * endfor
3156 */
3157 for_each_drhd_unit(drhd) {
5e0d2a6f 3158 /*
3159 * lock not needed as this is only incremented in the single
3160 * threaded kernel __init code path all other access are read
3161 * only
3162 */
78d8e704 3163 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3164 g_num_of_iommus++;
3165 continue;
3166 }
9f10e5bf 3167 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3168 }
3169
ffebeb46
JL
3170 /* Preallocate enough resources for IOMMU hot-addition */
3171 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3172 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3173
d9630fe9
WH
3174 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3175 GFP_KERNEL);
3176 if (!g_iommus) {
9f10e5bf 3177 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3178 ret = -ENOMEM;
3179 goto error;
3180 }
3181
6a8c6748
LB
3182 for_each_iommu(iommu, drhd) {
3183 if (drhd->ignored) {
3184 iommu_disable_translation(iommu);
3185 continue;
3186 }
3187
56283174
LB
3188 /*
3189 * Find the max pasid size of all IOMMU's in the system.
3190 * We need to ensure the system pasid table is no bigger
3191 * than the smallest supported.
3192 */
765b6a98 3193 if (pasid_supported(iommu)) {
56283174
LB
3194 u32 temp = 2 << ecap_pss(iommu->ecap);
3195
3196 intel_pasid_max_id = min_t(u32, temp,
3197 intel_pasid_max_id);
3198 }
3199
d9630fe9 3200 g_iommus[iommu->seq_id] = iommu;
ba395927 3201
b63d80d1
JR
3202 intel_iommu_init_qi(iommu);
3203
e61d98d8
SS
3204 ret = iommu_init_domains(iommu);
3205 if (ret)
989d51fc 3206 goto free_iommu;
e61d98d8 3207
4158c2ec
JR
3208 init_translation_status(iommu);
3209
091d42e4
JR
3210 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3211 iommu_disable_translation(iommu);
3212 clear_translation_pre_enabled(iommu);
3213 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3214 iommu->name);
3215 }
4158c2ec 3216
ba395927
KA
3217 /*
3218 * TBD:
3219 * we could share the same root & context tables
25985edc 3220 * among all IOMMU's. Need to Split it later.
ba395927
KA
3221 */
3222 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3223 if (ret)
989d51fc 3224 goto free_iommu;
5f0a7f76 3225
091d42e4
JR
3226 if (translation_pre_enabled(iommu)) {
3227 pr_info("Translation already enabled - trying to copy translation structures\n");
3228
3229 ret = copy_translation_tables(iommu);
3230 if (ret) {
3231 /*
3232 * We found the IOMMU with translation
3233 * enabled - but failed to copy over the
3234 * old root-entry table. Try to proceed
3235 * by disabling translation now and
3236 * allocating a clean root-entry table.
3237 * This might cause DMAR faults, but
3238 * probably the dump will still succeed.
3239 */
3240 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3241 iommu->name);
3242 iommu_disable_translation(iommu);
3243 clear_translation_pre_enabled(iommu);
3244 } else {
3245 pr_info("Copied translation tables from previous kernel for %s\n",
3246 iommu->name);
3247 }
3248 }
3249
4ed0d3e6 3250 if (!ecap_pass_through(iommu->ecap))
19943b0e 3251 hw_pass_through = 0;
8a94ade4 3252#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3253 if (pasid_supported(iommu))
d9737953 3254 intel_svm_init(iommu);
8a94ade4 3255#endif
ba395927
KA
3256 }
3257
a4c34ff1
JR
3258 /*
3259 * Now that qi is enabled on all iommus, set the root entry and flush
3260 * caches. This is required on some Intel X58 chipsets, otherwise the
3261 * flush_context function will loop forever and the boot hangs.
3262 */
3263 for_each_active_iommu(iommu, drhd) {
3264 iommu_flush_write_buffer(iommu);
3265 iommu_set_root_entry(iommu);
3266 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3267 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3268 }
3269
19943b0e 3270 if (iommu_pass_through)
e0fc7e0b
DW
3271 iommu_identity_mapping |= IDENTMAP_ALL;
3272
d3f13810 3273#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3274 dmar_map_gfx = 0;
19943b0e 3275#endif
e0fc7e0b 3276
5daab580
LB
3277 if (!dmar_map_gfx)
3278 iommu_identity_mapping |= IDENTMAP_GFX;
3279
21e722c4
AR
3280 check_tylersburg_isoch();
3281
4de354ec
LB
3282 ret = si_domain_init(hw_pass_through);
3283 if (ret)
3284 goto free_iommu;
86080ccc 3285
ba395927
KA
3286 /*
3287 * for each drhd
3288 * enable fault log
3289 * global invalidate context cache
3290 * global invalidate iotlb
3291 * enable translation
3292 */
7c919779 3293 for_each_iommu(iommu, drhd) {
51a63e67
JC
3294 if (drhd->ignored) {
3295 /*
3296 * we always have to disable PMRs or DMA may fail on
3297 * this device
3298 */
3299 if (force_on)
7c919779 3300 iommu_disable_protect_mem_regions(iommu);
ba395927 3301 continue;
51a63e67 3302 }
ba395927
KA
3303
3304 iommu_flush_write_buffer(iommu);
3305
a222a7f0 3306#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3307 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3308 /*
3309 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3310 * could cause possible lock race condition.
3311 */
3312 up_write(&dmar_global_lock);
a222a7f0 3313 ret = intel_svm_enable_prq(iommu);
a7755c3c 3314 down_write(&dmar_global_lock);
a222a7f0
DW
3315 if (ret)
3316 goto free_iommu;
3317 }
3318#endif
3460a6d9
KA
3319 ret = dmar_set_interrupt(iommu);
3320 if (ret)
989d51fc 3321 goto free_iommu;
ba395927
KA
3322 }
3323
3324 return 0;
989d51fc
JL
3325
3326free_iommu:
ffebeb46
JL
3327 for_each_active_iommu(iommu, drhd) {
3328 disable_dmar_iommu(iommu);
a868e6b7 3329 free_dmar_iommu(iommu);
ffebeb46 3330 }
13cf0174 3331
d9630fe9 3332 kfree(g_iommus);
13cf0174 3333
989d51fc 3334error:
ba395927
KA
3335 return ret;
3336}
3337
5a5e02a6 3338/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3339static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3340 struct dmar_domain *domain,
3341 unsigned long nrpages, uint64_t dma_mask)
ba395927 3342{
e083ea5b 3343 unsigned long iova_pfn;
ba395927 3344
875764de
DW
3345 /* Restrict dma_mask to the width that the iommu can handle */
3346 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3347 /* Ensure we reserve the whole size-aligned region */
3348 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3349
3350 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3351 /*
3352 * First try to allocate an io virtual address in
284901a9 3353 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3354 * from higher range
ba395927 3355 */
22e2f9fa 3356 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3357 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3358 if (iova_pfn)
3359 return iova_pfn;
875764de 3360 }
538d5b33
TN
3361 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3362 IOVA_PFN(dma_mask), true);
22e2f9fa 3363 if (unlikely(!iova_pfn)) {
932a6523 3364 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3365 return 0;
f76aec76
KA
3366 }
3367
22e2f9fa 3368 return iova_pfn;
f76aec76
KA
3369}
3370
4ec066c7 3371static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
f76aec76 3372{
1c5ebba9 3373 struct dmar_domain *domain, *tmp;
b1ce5b79 3374 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3375 struct device *i_dev;
3376 int i, ret;
f76aec76 3377
4ec066c7 3378 /* Device shouldn't be attached by any domains. */
1c5ebba9
JR
3379 domain = find_domain(dev);
3380 if (domain)
4ec066c7 3381 return NULL;
1c5ebba9
JR
3382
3383 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3384 if (!domain)
3385 goto out;
ba395927 3386
b1ce5b79
JR
3387 /* We have a new domain - setup possible RMRRs for the device */
3388 rcu_read_lock();
3389 for_each_rmrr_units(rmrr) {
3390 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3391 i, i_dev) {
3392 if (i_dev != dev)
3393 continue;
3394
3395 ret = domain_prepare_identity_map(dev, domain,
3396 rmrr->base_address,
3397 rmrr->end_address);
3398 if (ret)
3399 dev_err(dev, "Mapping reserved region failed\n");
3400 }
3401 }
3402 rcu_read_unlock();
3403
1c5ebba9
JR
3404 tmp = set_domain_for_dev(dev, domain);
3405 if (!tmp || domain != tmp) {
3406 domain_exit(domain);
3407 domain = tmp;
3408 }
3409
3410out:
1c5ebba9 3411 if (!domain)
932a6523 3412 dev_err(dev, "Allocating domain failed\n");
c57b260a
LB
3413 else
3414 domain->domain.type = IOMMU_DOMAIN_DMA;
1c5ebba9 3415
f76aec76
KA
3416 return domain;
3417}
3418
ecb509ec 3419/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3420static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3421{
98b2fffb 3422 int ret;
2c2e2c38 3423
3d89194a 3424 if (iommu_dummy(dev))
48b2c937 3425 return false;
1e4c64c4 3426
98b2fffb
LB
3427 ret = identity_mapping(dev);
3428 if (ret) {
3429 u64 dma_mask = *dev->dma_mask;
3430
3431 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3432 dma_mask = dev->coherent_dma_mask;
3433
3434 if (dma_mask >= dma_get_required_mask(dev))
48b2c937
CH
3435 return false;
3436
3437 /*
3438 * 32 bit DMA is removed from si_domain and fall back to
3439 * non-identity mapping.
3440 */
3441 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3442 ret = iommu_request_dma_domain_for_dev(dev);
3443 if (ret) {
3444 struct iommu_domain *domain;
3445 struct dmar_domain *dmar_domain;
3446
3447 domain = iommu_get_domain_for_dev(dev);
3448 if (domain) {
3449 dmar_domain = to_dmar_domain(domain);
3450 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3451 }
4ec066c7 3452 get_private_domain_for_dev(dev);
2c2e2c38 3453 }
98b2fffb
LB
3454
3455 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3456 }
3457
48b2c937 3458 return true;
2c2e2c38
FY
3459}
3460
21d5d27c
LG
3461static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3462 size_t size, int dir, u64 dma_mask)
f76aec76 3463{
f76aec76 3464 struct dmar_domain *domain;
5b6985ce 3465 phys_addr_t start_paddr;
2aac6304 3466 unsigned long iova_pfn;
f76aec76 3467 int prot = 0;
6865f0d1 3468 int ret;
8c11e798 3469 struct intel_iommu *iommu;
33041ec0 3470 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3471
3472 BUG_ON(dir == DMA_NONE);
2c2e2c38 3473
4ec066c7 3474 domain = find_domain(dev);
f76aec76 3475 if (!domain)
524a669b 3476 return DMA_MAPPING_ERROR;
f76aec76 3477
8c11e798 3478 iommu = domain_get_iommu(domain);
88cb6a74 3479 size = aligned_nrpages(paddr, size);
f76aec76 3480
2aac6304
OP
3481 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3482 if (!iova_pfn)
f76aec76
KA
3483 goto error;
3484
ba395927
KA
3485 /*
3486 * Check if DMAR supports zero-length reads on write only
3487 * mappings..
3488 */
3489 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3490 !cap_zlr(iommu->cap))
ba395927
KA
3491 prot |= DMA_PTE_READ;
3492 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3493 prot |= DMA_PTE_WRITE;
3494 /*
6865f0d1 3495 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3496 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3497 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3498 * is not a big problem
3499 */
2aac6304 3500 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3501 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3502 if (ret)
3503 goto error;
3504
2aac6304 3505 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3506 start_paddr += paddr & ~PAGE_MASK;
3507 return start_paddr;
ba395927 3508
ba395927 3509error:
2aac6304 3510 if (iova_pfn)
22e2f9fa 3511 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3512 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3513 size, (unsigned long long)paddr, dir);
524a669b 3514 return DMA_MAPPING_ERROR;
ba395927
KA
3515}
3516
ffbbef5c
FT
3517static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3518 unsigned long offset, size_t size,
3519 enum dma_data_direction dir,
00085f1e 3520 unsigned long attrs)
bb9e6d65 3521{
9cc0c2af
CH
3522 if (iommu_need_mapping(dev))
3523 return __intel_map_single(dev, page_to_phys(page) + offset,
3524 size, dir, *dev->dma_mask);
3525 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3526}
3527
3528static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3529 size_t size, enum dma_data_direction dir,
3530 unsigned long attrs)
3531{
9cc0c2af
CH
3532 if (iommu_need_mapping(dev))
3533 return __intel_map_single(dev, phys_addr, size, dir,
3534 *dev->dma_mask);
3535 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3536}
3537
769530e4 3538static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3539{
f76aec76 3540 struct dmar_domain *domain;
d794dc9b 3541 unsigned long start_pfn, last_pfn;
769530e4 3542 unsigned long nrpages;
2aac6304 3543 unsigned long iova_pfn;
8c11e798 3544 struct intel_iommu *iommu;
ea8ea460 3545 struct page *freelist;
f7b0c4ce 3546 struct pci_dev *pdev = NULL;
ba395927 3547
1525a29a 3548 domain = find_domain(dev);
ba395927
KA
3549 BUG_ON(!domain);
3550
8c11e798
WH
3551 iommu = domain_get_iommu(domain);
3552
2aac6304 3553 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3554
769530e4 3555 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3556 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3557 last_pfn = start_pfn + nrpages - 1;
ba395927 3558
f7b0c4ce
LB
3559 if (dev_is_pci(dev))
3560 pdev = to_pci_dev(dev);
3561
932a6523 3562 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3563
ea8ea460 3564 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3565
effa4678
DS
3566 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3567 !has_iova_flush_queue(&domain->iovad)) {
a1ddcbe9 3568 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3569 nrpages, !freelist, 0);
5e0d2a6f 3570 /* free iova */
22e2f9fa 3571 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3572 dma_free_pagelist(freelist);
5e0d2a6f 3573 } else {
13cf0174
JR
3574 queue_iova(&domain->iovad, iova_pfn, nrpages,
3575 (unsigned long)freelist);
5e0d2a6f 3576 /*
3577 * queue up the release of the unmap to save the 1/6th of the
3578 * cpu used up by the iotlb flush operation...
3579 */
5e0d2a6f 3580 }
ba395927
KA
3581}
3582
d41a4adb
JL
3583static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3584 size_t size, enum dma_data_direction dir,
00085f1e 3585 unsigned long attrs)
d41a4adb 3586{
9cc0c2af
CH
3587 if (iommu_need_mapping(dev))
3588 intel_unmap(dev, dev_addr, size);
3589 else
3590 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3591}
3592
3593static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3594 size_t size, enum dma_data_direction dir, unsigned long attrs)
3595{
3596 if (iommu_need_mapping(dev))
3597 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3598}
3599
5040a918 3600static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3601 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3602 unsigned long attrs)
ba395927 3603{
7ec916f8
CH
3604 struct page *page = NULL;
3605 int order;
ba395927 3606
9cc0c2af
CH
3607 if (!iommu_need_mapping(dev))
3608 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3609
7ec916f8
CH
3610 size = PAGE_ALIGN(size);
3611 order = get_order(size);
7ec916f8
CH
3612
3613 if (gfpflags_allow_blocking(flags)) {
3614 unsigned int count = size >> PAGE_SHIFT;
3615
d834c5ab
MS
3616 page = dma_alloc_from_contiguous(dev, count, order,
3617 flags & __GFP_NOWARN);
7ec916f8
CH
3618 }
3619
3620 if (!page)
3621 page = alloc_pages(flags, order);
3622 if (!page)
3623 return NULL;
3624 memset(page_address(page), 0, size);
3625
21d5d27c
LG
3626 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3627 DMA_BIDIRECTIONAL,
3628 dev->coherent_dma_mask);
524a669b 3629 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3630 return page_address(page);
3631 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3632 __free_pages(page, order);
36746436 3633
ba395927
KA
3634 return NULL;
3635}
3636
5040a918 3637static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3638 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3639{
7ec916f8
CH
3640 int order;
3641 struct page *page = virt_to_page(vaddr);
3642
9cc0c2af
CH
3643 if (!iommu_need_mapping(dev))
3644 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3645
7ec916f8
CH
3646 size = PAGE_ALIGN(size);
3647 order = get_order(size);
3648
3649 intel_unmap(dev, dma_handle, size);
3650 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3651 __free_pages(page, order);
ba395927
KA
3652}
3653
5040a918 3654static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3655 int nelems, enum dma_data_direction dir,
00085f1e 3656 unsigned long attrs)
ba395927 3657{
769530e4
OP
3658 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3659 unsigned long nrpages = 0;
3660 struct scatterlist *sg;
3661 int i;
3662
9cc0c2af
CH
3663 if (!iommu_need_mapping(dev))
3664 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3665
769530e4
OP
3666 for_each_sg(sglist, sg, nelems, i) {
3667 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3668 }
3669
3670 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3671}
3672
5040a918 3673static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3674 enum dma_data_direction dir, unsigned long attrs)
ba395927 3675{
ba395927 3676 int i;
ba395927 3677 struct dmar_domain *domain;
f76aec76
KA
3678 size_t size = 0;
3679 int prot = 0;
2aac6304 3680 unsigned long iova_pfn;
f76aec76 3681 int ret;
c03ab37c 3682 struct scatterlist *sg;
b536d24d 3683 unsigned long start_vpfn;
8c11e798 3684 struct intel_iommu *iommu;
ba395927
KA
3685
3686 BUG_ON(dir == DMA_NONE);
48b2c937 3687 if (!iommu_need_mapping(dev))
9cc0c2af 3688 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3689
4ec066c7 3690 domain = find_domain(dev);
f76aec76
KA
3691 if (!domain)
3692 return 0;
3693
8c11e798
WH
3694 iommu = domain_get_iommu(domain);
3695
b536d24d 3696 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3697 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3698
2aac6304 3699 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3700 *dev->dma_mask);
2aac6304 3701 if (!iova_pfn) {
c03ab37c 3702 sglist->dma_length = 0;
f76aec76
KA
3703 return 0;
3704 }
3705
3706 /*
3707 * Check if DMAR supports zero-length reads on write only
3708 * mappings..
3709 */
3710 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3711 !cap_zlr(iommu->cap))
f76aec76
KA
3712 prot |= DMA_PTE_READ;
3713 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3714 prot |= DMA_PTE_WRITE;
3715
2aac6304 3716 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3717
f532959b 3718 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3719 if (unlikely(ret)) {
e1605495 3720 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3721 start_vpfn + size - 1,
3722 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3723 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3724 return 0;
ba395927
KA
3725 }
3726
ba395927
KA
3727 return nelems;
3728}
3729
02b4da5f 3730static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3731 .alloc = intel_alloc_coherent,
3732 .free = intel_free_coherent,
ba395927
KA
3733 .map_sg = intel_map_sg,
3734 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3735 .map_page = intel_map_page,
3736 .unmap_page = intel_unmap_page,
21d5d27c 3737 .map_resource = intel_map_resource,
9cc0c2af 3738 .unmap_resource = intel_unmap_resource,
fec777c3 3739 .dma_supported = dma_direct_supported,
ba395927
KA
3740};
3741
3742static inline int iommu_domain_cache_init(void)
3743{
3744 int ret = 0;
3745
3746 iommu_domain_cache = kmem_cache_create("iommu_domain",
3747 sizeof(struct dmar_domain),
3748 0,
3749 SLAB_HWCACHE_ALIGN,
3750
3751 NULL);
3752 if (!iommu_domain_cache) {
9f10e5bf 3753 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3754 ret = -ENOMEM;
3755 }
3756
3757 return ret;
3758}
3759
3760static inline int iommu_devinfo_cache_init(void)
3761{
3762 int ret = 0;
3763
3764 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3765 sizeof(struct device_domain_info),
3766 0,
3767 SLAB_HWCACHE_ALIGN,
ba395927
KA
3768 NULL);
3769 if (!iommu_devinfo_cache) {
9f10e5bf 3770 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3771 ret = -ENOMEM;
3772 }
3773
3774 return ret;
3775}
3776
ba395927
KA
3777static int __init iommu_init_mempool(void)
3778{
3779 int ret;
ae1ff3d6 3780 ret = iova_cache_get();
ba395927
KA
3781 if (ret)
3782 return ret;
3783
3784 ret = iommu_domain_cache_init();
3785 if (ret)
3786 goto domain_error;
3787
3788 ret = iommu_devinfo_cache_init();
3789 if (!ret)
3790 return ret;
3791
3792 kmem_cache_destroy(iommu_domain_cache);
3793domain_error:
ae1ff3d6 3794 iova_cache_put();
ba395927
KA
3795
3796 return -ENOMEM;
3797}
3798
3799static void __init iommu_exit_mempool(void)
3800{
3801 kmem_cache_destroy(iommu_devinfo_cache);
3802 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3803 iova_cache_put();
ba395927
KA
3804}
3805
556ab45f
DW
3806static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3807{
3808 struct dmar_drhd_unit *drhd;
3809 u32 vtbar;
3810 int rc;
3811
3812 /* We know that this device on this chipset has its own IOMMU.
3813 * If we find it under a different IOMMU, then the BIOS is lying
3814 * to us. Hope that the IOMMU for this device is actually
3815 * disabled, and it needs no translation...
3816 */
3817 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3818 if (rc) {
3819 /* "can't" happen */
3820 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3821 return;
3822 }
3823 vtbar &= 0xffff0000;
3824
3825 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3826 drhd = dmar_find_matched_drhd_unit(pdev);
3827 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3828 TAINT_FIRMWARE_WORKAROUND,
3829 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3830 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3831}
3832DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3833
ba395927
KA
3834static void __init init_no_remapping_devices(void)
3835{
3836 struct dmar_drhd_unit *drhd;
832bd858 3837 struct device *dev;
b683b230 3838 int i;
ba395927
KA
3839
3840 for_each_drhd_unit(drhd) {
3841 if (!drhd->include_all) {
b683b230
JL
3842 for_each_active_dev_scope(drhd->devices,
3843 drhd->devices_cnt, i, dev)
3844 break;
832bd858 3845 /* ignore DMAR unit if no devices exist */
ba395927
KA
3846 if (i == drhd->devices_cnt)
3847 drhd->ignored = 1;
3848 }
3849 }
3850
7c919779 3851 for_each_active_drhd_unit(drhd) {
7c919779 3852 if (drhd->include_all)
ba395927
KA
3853 continue;
3854
b683b230
JL
3855 for_each_active_dev_scope(drhd->devices,
3856 drhd->devices_cnt, i, dev)
832bd858 3857 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3858 break;
ba395927
KA
3859 if (i < drhd->devices_cnt)
3860 continue;
3861
c0771df8
DW
3862 /* This IOMMU has *only* gfx devices. Either bypass it or
3863 set the gfx_mapped flag, as appropriate */
cf1ec453 3864 if (!dmar_map_gfx) {
c0771df8 3865 drhd->ignored = 1;
b683b230
JL
3866 for_each_active_dev_scope(drhd->devices,
3867 drhd->devices_cnt, i, dev)
832bd858 3868 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3869 }
3870 }
3871}
3872
f59c7b69
FY
3873#ifdef CONFIG_SUSPEND
3874static int init_iommu_hw(void)
3875{
3876 struct dmar_drhd_unit *drhd;
3877 struct intel_iommu *iommu = NULL;
3878
3879 for_each_active_iommu(iommu, drhd)
3880 if (iommu->qi)
3881 dmar_reenable_qi(iommu);
3882
b779260b
JC
3883 for_each_iommu(iommu, drhd) {
3884 if (drhd->ignored) {
3885 /*
3886 * we always have to disable PMRs or DMA may fail on
3887 * this device
3888 */
3889 if (force_on)
3890 iommu_disable_protect_mem_regions(iommu);
3891 continue;
3892 }
095303e0 3893
f59c7b69
FY
3894 iommu_flush_write_buffer(iommu);
3895
3896 iommu_set_root_entry(iommu);
3897
3898 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3899 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3900 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3901 iommu_enable_translation(iommu);
b94996c9 3902 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3903 }
3904
3905 return 0;
3906}
3907
3908static void iommu_flush_all(void)
3909{
3910 struct dmar_drhd_unit *drhd;
3911 struct intel_iommu *iommu;
3912
3913 for_each_active_iommu(iommu, drhd) {
3914 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3915 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3916 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3917 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3918 }
3919}
3920
134fac3f 3921static int iommu_suspend(void)
f59c7b69
FY
3922{
3923 struct dmar_drhd_unit *drhd;
3924 struct intel_iommu *iommu = NULL;
3925 unsigned long flag;
3926
3927 for_each_active_iommu(iommu, drhd) {
6396bb22 3928 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
3929 GFP_ATOMIC);
3930 if (!iommu->iommu_state)
3931 goto nomem;
3932 }
3933
3934 iommu_flush_all();
3935
3936 for_each_active_iommu(iommu, drhd) {
3937 iommu_disable_translation(iommu);
3938
1f5b3c3f 3939 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3940
3941 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3942 readl(iommu->reg + DMAR_FECTL_REG);
3943 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3944 readl(iommu->reg + DMAR_FEDATA_REG);
3945 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3946 readl(iommu->reg + DMAR_FEADDR_REG);
3947 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3948 readl(iommu->reg + DMAR_FEUADDR_REG);
3949
1f5b3c3f 3950 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3951 }
3952 return 0;
3953
3954nomem:
3955 for_each_active_iommu(iommu, drhd)
3956 kfree(iommu->iommu_state);
3957
3958 return -ENOMEM;
3959}
3960
134fac3f 3961static void iommu_resume(void)
f59c7b69
FY
3962{
3963 struct dmar_drhd_unit *drhd;
3964 struct intel_iommu *iommu = NULL;
3965 unsigned long flag;
3966
3967 if (init_iommu_hw()) {
b779260b
JC
3968 if (force_on)
3969 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3970 else
3971 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3972 return;
f59c7b69
FY
3973 }
3974
3975 for_each_active_iommu(iommu, drhd) {
3976
1f5b3c3f 3977 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3978
3979 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3980 iommu->reg + DMAR_FECTL_REG);
3981 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3982 iommu->reg + DMAR_FEDATA_REG);
3983 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3984 iommu->reg + DMAR_FEADDR_REG);
3985 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3986 iommu->reg + DMAR_FEUADDR_REG);
3987
1f5b3c3f 3988 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3989 }
3990
3991 for_each_active_iommu(iommu, drhd)
3992 kfree(iommu->iommu_state);
f59c7b69
FY
3993}
3994
134fac3f 3995static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3996 .resume = iommu_resume,
3997 .suspend = iommu_suspend,
3998};
3999
134fac3f 4000static void __init init_iommu_pm_ops(void)
f59c7b69 4001{
134fac3f 4002 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4003}
4004
4005#else
99592ba4 4006static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4007#endif /* CONFIG_PM */
4008
c2a0b538 4009int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4010{
4011 struct acpi_dmar_reserved_memory *rmrr;
4012 struct dmar_rmrr_unit *rmrru;
4013
4014 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4015 if (!rmrru)
0659b8dc 4016 goto out;
318fe7df
SS
4017
4018 rmrru->hdr = header;
4019 rmrr = (struct acpi_dmar_reserved_memory *)header;
4020 rmrru->base_address = rmrr->base_address;
4021 rmrru->end_address = rmrr->end_address;
0659b8dc 4022
2e455289
JL
4023 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4024 ((void *)rmrr) + rmrr->header.length,
4025 &rmrru->devices_cnt);
0659b8dc 4026 if (rmrru->devices_cnt && rmrru->devices == NULL)
5f64ce54 4027 goto free_rmrru;
318fe7df 4028
2e455289 4029 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4030
2e455289 4031 return 0;
0659b8dc
EA
4032free_rmrru:
4033 kfree(rmrru);
4034out:
4035 return -ENOMEM;
318fe7df
SS
4036}
4037
6b197249
JL
4038static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4039{
4040 struct dmar_atsr_unit *atsru;
4041 struct acpi_dmar_atsr *tmp;
4042
4043 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4044 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4045 if (atsr->segment != tmp->segment)
4046 continue;
4047 if (atsr->header.length != tmp->header.length)
4048 continue;
4049 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4050 return atsru;
4051 }
4052
4053 return NULL;
4054}
4055
4056int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4057{
4058 struct acpi_dmar_atsr *atsr;
4059 struct dmar_atsr_unit *atsru;
4060
b608fe35 4061 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4062 return 0;
4063
318fe7df 4064 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4065 atsru = dmar_find_atsr(atsr);
4066 if (atsru)
4067 return 0;
4068
4069 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4070 if (!atsru)
4071 return -ENOMEM;
4072
6b197249
JL
4073 /*
4074 * If memory is allocated from slab by ACPI _DSM method, we need to
4075 * copy the memory content because the memory buffer will be freed
4076 * on return.
4077 */
4078 atsru->hdr = (void *)(atsru + 1);
4079 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4080 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4081 if (!atsru->include_all) {
4082 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4083 (void *)atsr + atsr->header.length,
4084 &atsru->devices_cnt);
4085 if (atsru->devices_cnt && atsru->devices == NULL) {
4086 kfree(atsru);
4087 return -ENOMEM;
4088 }
4089 }
318fe7df 4090
0e242612 4091 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4092
4093 return 0;
4094}
4095
9bdc531e
JL
4096static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4097{
4098 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4099 kfree(atsru);
4100}
4101
6b197249
JL
4102int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4103{
4104 struct acpi_dmar_atsr *atsr;
4105 struct dmar_atsr_unit *atsru;
4106
4107 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4108 atsru = dmar_find_atsr(atsr);
4109 if (atsru) {
4110 list_del_rcu(&atsru->list);
4111 synchronize_rcu();
4112 intel_iommu_free_atsr(atsru);
4113 }
4114
4115 return 0;
4116}
4117
4118int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4119{
4120 int i;
4121 struct device *dev;
4122 struct acpi_dmar_atsr *atsr;
4123 struct dmar_atsr_unit *atsru;
4124
4125 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4126 atsru = dmar_find_atsr(atsr);
4127 if (!atsru)
4128 return 0;
4129
194dc870 4130 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4131 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4132 i, dev)
4133 return -EBUSY;
194dc870 4134 }
6b197249
JL
4135
4136 return 0;
4137}
4138
ffebeb46
JL
4139static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4140{
e083ea5b 4141 int sp, ret;
ffebeb46
JL
4142 struct intel_iommu *iommu = dmaru->iommu;
4143
4144 if (g_iommus[iommu->seq_id])
4145 return 0;
4146
4147 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4148 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4149 iommu->name);
4150 return -ENXIO;
4151 }
4152 if (!ecap_sc_support(iommu->ecap) &&
4153 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4154 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4155 iommu->name);
4156 return -ENXIO;
4157 }
4158 sp = domain_update_iommu_superpage(iommu) - 1;
4159 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4160 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4161 iommu->name);
4162 return -ENXIO;
4163 }
4164
4165 /*
4166 * Disable translation if already enabled prior to OS handover.
4167 */
4168 if (iommu->gcmd & DMA_GCMD_TE)
4169 iommu_disable_translation(iommu);
4170
4171 g_iommus[iommu->seq_id] = iommu;
4172 ret = iommu_init_domains(iommu);
4173 if (ret == 0)
4174 ret = iommu_alloc_root_entry(iommu);
4175 if (ret)
4176 goto out;
4177
8a94ade4 4178#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4179 if (pasid_supported(iommu))
d9737953 4180 intel_svm_init(iommu);
8a94ade4
DW
4181#endif
4182
ffebeb46
JL
4183 if (dmaru->ignored) {
4184 /*
4185 * we always have to disable PMRs or DMA may fail on this device
4186 */
4187 if (force_on)
4188 iommu_disable_protect_mem_regions(iommu);
4189 return 0;
4190 }
4191
4192 intel_iommu_init_qi(iommu);
4193 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4194
4195#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4196 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4197 ret = intel_svm_enable_prq(iommu);
4198 if (ret)
4199 goto disable_iommu;
4200 }
4201#endif
ffebeb46
JL
4202 ret = dmar_set_interrupt(iommu);
4203 if (ret)
4204 goto disable_iommu;
4205
4206 iommu_set_root_entry(iommu);
4207 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4208 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4209 iommu_enable_translation(iommu);
4210
ffebeb46
JL
4211 iommu_disable_protect_mem_regions(iommu);
4212 return 0;
4213
4214disable_iommu:
4215 disable_dmar_iommu(iommu);
4216out:
4217 free_dmar_iommu(iommu);
4218 return ret;
4219}
4220
6b197249
JL
4221int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4222{
ffebeb46
JL
4223 int ret = 0;
4224 struct intel_iommu *iommu = dmaru->iommu;
4225
4226 if (!intel_iommu_enabled)
4227 return 0;
4228 if (iommu == NULL)
4229 return -EINVAL;
4230
4231 if (insert) {
4232 ret = intel_iommu_add(dmaru);
4233 } else {
4234 disable_dmar_iommu(iommu);
4235 free_dmar_iommu(iommu);
4236 }
4237
4238 return ret;
6b197249
JL
4239}
4240
9bdc531e
JL
4241static void intel_iommu_free_dmars(void)
4242{
4243 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4244 struct dmar_atsr_unit *atsru, *atsr_n;
4245
4246 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4247 list_del(&rmrru->list);
4248 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4249 kfree(rmrru);
318fe7df
SS
4250 }
4251
9bdc531e
JL
4252 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4253 list_del(&atsru->list);
4254 intel_iommu_free_atsr(atsru);
4255 }
318fe7df
SS
4256}
4257
4258int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4259{
b683b230 4260 int i, ret = 1;
318fe7df 4261 struct pci_bus *bus;
832bd858
DW
4262 struct pci_dev *bridge = NULL;
4263 struct device *tmp;
318fe7df
SS
4264 struct acpi_dmar_atsr *atsr;
4265 struct dmar_atsr_unit *atsru;
4266
4267 dev = pci_physfn(dev);
318fe7df 4268 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4269 bridge = bus->self;
d14053b3
DW
4270 /* If it's an integrated device, allow ATS */
4271 if (!bridge)
4272 return 1;
4273 /* Connected via non-PCIe: no ATS */
4274 if (!pci_is_pcie(bridge) ||
62f87c0e 4275 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4276 return 0;
d14053b3 4277 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4278 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4279 break;
318fe7df
SS
4280 }
4281
0e242612 4282 rcu_read_lock();
b5f82ddf
JL
4283 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4284 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4285 if (atsr->segment != pci_domain_nr(dev->bus))
4286 continue;
4287
b683b230 4288 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4289 if (tmp == &bridge->dev)
b683b230 4290 goto out;
b5f82ddf
JL
4291
4292 if (atsru->include_all)
b683b230 4293 goto out;
b5f82ddf 4294 }
b683b230
JL
4295 ret = 0;
4296out:
0e242612 4297 rcu_read_unlock();
318fe7df 4298
b683b230 4299 return ret;
318fe7df
SS
4300}
4301
59ce0515
JL
4302int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4303{
e083ea5b 4304 int ret;
59ce0515
JL
4305 struct dmar_rmrr_unit *rmrru;
4306 struct dmar_atsr_unit *atsru;
4307 struct acpi_dmar_atsr *atsr;
4308 struct acpi_dmar_reserved_memory *rmrr;
4309
b608fe35 4310 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4311 return 0;
4312
4313 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4314 rmrr = container_of(rmrru->hdr,
4315 struct acpi_dmar_reserved_memory, header);
4316 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4317 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4318 ((void *)rmrr) + rmrr->header.length,
4319 rmrr->segment, rmrru->devices,
4320 rmrru->devices_cnt);
e083ea5b 4321 if (ret < 0)
59ce0515 4322 return ret;
e6a8c9b3 4323 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4324 dmar_remove_dev_scope(info, rmrr->segment,
4325 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4326 }
4327 }
4328
4329 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4330 if (atsru->include_all)
4331 continue;
4332
4333 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4334 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4335 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4336 (void *)atsr + atsr->header.length,
4337 atsr->segment, atsru->devices,
4338 atsru->devices_cnt);
4339 if (ret > 0)
4340 break;
e083ea5b 4341 else if (ret < 0)
59ce0515 4342 return ret;
e6a8c9b3 4343 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4344 if (dmar_remove_dev_scope(info, atsr->segment,
4345 atsru->devices, atsru->devices_cnt))
4346 break;
4347 }
4348 }
4349
4350 return 0;
4351}
4352
75f05569
JL
4353static int intel_iommu_memory_notifier(struct notifier_block *nb,
4354 unsigned long val, void *v)
4355{
4356 struct memory_notify *mhp = v;
4357 unsigned long long start, end;
4358 unsigned long start_vpfn, last_vpfn;
4359
4360 switch (val) {
4361 case MEM_GOING_ONLINE:
4362 start = mhp->start_pfn << PAGE_SHIFT;
4363 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4364 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4365 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4366 start, end);
4367 return NOTIFY_BAD;
4368 }
4369 break;
4370
4371 case MEM_OFFLINE:
4372 case MEM_CANCEL_ONLINE:
4373 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4374 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4375 while (start_vpfn <= last_vpfn) {
4376 struct iova *iova;
4377 struct dmar_drhd_unit *drhd;
4378 struct intel_iommu *iommu;
ea8ea460 4379 struct page *freelist;
75f05569
JL
4380
4381 iova = find_iova(&si_domain->iovad, start_vpfn);
4382 if (iova == NULL) {
9f10e5bf 4383 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4384 start_vpfn);
4385 break;
4386 }
4387
4388 iova = split_and_remove_iova(&si_domain->iovad, iova,
4389 start_vpfn, last_vpfn);
4390 if (iova == NULL) {
9f10e5bf 4391 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4392 start_vpfn, last_vpfn);
4393 return NOTIFY_BAD;
4394 }
4395
ea8ea460
DW
4396 freelist = domain_unmap(si_domain, iova->pfn_lo,
4397 iova->pfn_hi);
4398
75f05569
JL
4399 rcu_read_lock();
4400 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4401 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4402 iova->pfn_lo, iova_size(iova),
ea8ea460 4403 !freelist, 0);
75f05569 4404 rcu_read_unlock();
ea8ea460 4405 dma_free_pagelist(freelist);
75f05569
JL
4406
4407 start_vpfn = iova->pfn_hi + 1;
4408 free_iova_mem(iova);
4409 }
4410 break;
4411 }
4412
4413 return NOTIFY_OK;
4414}
4415
4416static struct notifier_block intel_iommu_memory_nb = {
4417 .notifier_call = intel_iommu_memory_notifier,
4418 .priority = 0
4419};
4420
22e2f9fa
OP
4421static void free_all_cpu_cached_iovas(unsigned int cpu)
4422{
4423 int i;
4424
4425 for (i = 0; i < g_num_of_iommus; i++) {
4426 struct intel_iommu *iommu = g_iommus[i];
4427 struct dmar_domain *domain;
0caa7616 4428 int did;
22e2f9fa
OP
4429
4430 if (!iommu)
4431 continue;
4432
3bd4f911 4433 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4434 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4435
4436 if (!domain)
4437 continue;
4438 free_cpu_cached_iovas(cpu, &domain->iovad);
4439 }
4440 }
4441}
4442
21647615 4443static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4444{
21647615 4445 free_all_cpu_cached_iovas(cpu);
21647615 4446 return 0;
aa473240
OP
4447}
4448
161b28aa
JR
4449static void intel_disable_iommus(void)
4450{
4451 struct intel_iommu *iommu = NULL;
4452 struct dmar_drhd_unit *drhd;
4453
4454 for_each_iommu(iommu, drhd)
4455 iommu_disable_translation(iommu);
4456}
4457
a7fdb6e6
JR
4458static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4459{
2926a2aa
JR
4460 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4461
4462 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4463}
4464
a5459cfe
AW
4465static ssize_t intel_iommu_show_version(struct device *dev,
4466 struct device_attribute *attr,
4467 char *buf)
4468{
a7fdb6e6 4469 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4470 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4471 return sprintf(buf, "%d:%d\n",
4472 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4473}
4474static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4475
4476static ssize_t intel_iommu_show_address(struct device *dev,
4477 struct device_attribute *attr,
4478 char *buf)
4479{
a7fdb6e6 4480 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4481 return sprintf(buf, "%llx\n", iommu->reg_phys);
4482}
4483static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4484
4485static ssize_t intel_iommu_show_cap(struct device *dev,
4486 struct device_attribute *attr,
4487 char *buf)
4488{
a7fdb6e6 4489 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4490 return sprintf(buf, "%llx\n", iommu->cap);
4491}
4492static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4493
4494static ssize_t intel_iommu_show_ecap(struct device *dev,
4495 struct device_attribute *attr,
4496 char *buf)
4497{
a7fdb6e6 4498 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4499 return sprintf(buf, "%llx\n", iommu->ecap);
4500}
4501static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4502
2238c082
AW
4503static ssize_t intel_iommu_show_ndoms(struct device *dev,
4504 struct device_attribute *attr,
4505 char *buf)
4506{
a7fdb6e6 4507 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4508 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4509}
4510static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4511
4512static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4513 struct device_attribute *attr,
4514 char *buf)
4515{
a7fdb6e6 4516 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4517 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4518 cap_ndoms(iommu->cap)));
4519}
4520static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4521
a5459cfe
AW
4522static struct attribute *intel_iommu_attrs[] = {
4523 &dev_attr_version.attr,
4524 &dev_attr_address.attr,
4525 &dev_attr_cap.attr,
4526 &dev_attr_ecap.attr,
2238c082
AW
4527 &dev_attr_domains_supported.attr,
4528 &dev_attr_domains_used.attr,
a5459cfe
AW
4529 NULL,
4530};
4531
4532static struct attribute_group intel_iommu_group = {
4533 .name = "intel-iommu",
4534 .attrs = intel_iommu_attrs,
4535};
4536
4537const struct attribute_group *intel_iommu_groups[] = {
4538 &intel_iommu_group,
4539 NULL,
4540};
4541
89a6079d
LB
4542static int __init platform_optin_force_iommu(void)
4543{
4544 struct pci_dev *pdev = NULL;
4545 bool has_untrusted_dev = false;
4546
4547 if (!dmar_platform_optin() || no_platform_optin)
4548 return 0;
4549
4550 for_each_pci_dev(pdev) {
4551 if (pdev->untrusted) {
4552 has_untrusted_dev = true;
4553 break;
4554 }
4555 }
4556
4557 if (!has_untrusted_dev)
4558 return 0;
4559
4560 if (no_iommu || dmar_disabled)
4561 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4562
4563 /*
4564 * If Intel-IOMMU is disabled by default, we will apply identity
4565 * map for all devices except those marked as being untrusted.
4566 */
4567 if (dmar_disabled)
4568 iommu_identity_mapping |= IDENTMAP_ALL;
4569
4570 dmar_disabled = 0;
4571#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4572 swiotlb = 0;
4573#endif
4574 no_iommu = 0;
4575
4576 return 1;
4577}
4578
fa212a97
LB
4579static int __init probe_acpi_namespace_devices(void)
4580{
4581 struct dmar_drhd_unit *drhd;
af88ec39
QC
4582 /* To avoid a -Wunused-but-set-variable warning. */
4583 struct intel_iommu *iommu __maybe_unused;
fa212a97
LB
4584 struct device *dev;
4585 int i, ret = 0;
4586
4587 for_each_active_iommu(iommu, drhd) {
4588 for_each_active_dev_scope(drhd->devices,
4589 drhd->devices_cnt, i, dev) {
4590 struct acpi_device_physical_node *pn;
4591 struct iommu_group *group;
4592 struct acpi_device *adev;
4593
4594 if (dev->bus != &acpi_bus_type)
4595 continue;
4596
4597 adev = to_acpi_device(dev);
4598 mutex_lock(&adev->physical_node_lock);
4599 list_for_each_entry(pn,
4600 &adev->physical_node_list, node) {
4601 group = iommu_group_get(pn->dev);
4602 if (group) {
4603 iommu_group_put(group);
4604 continue;
4605 }
4606
4607 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4608 ret = iommu_probe_device(pn->dev);
4609 if (ret)
4610 break;
4611 }
4612 mutex_unlock(&adev->physical_node_lock);
4613
4614 if (ret)
4615 return ret;
4616 }
4617 }
4618
4619 return 0;
4620}
4621
ba395927
KA
4622int __init intel_iommu_init(void)
4623{
9bdc531e 4624 int ret = -ENODEV;
3a93c841 4625 struct dmar_drhd_unit *drhd;
7c919779 4626 struct intel_iommu *iommu;
ba395927 4627
89a6079d
LB
4628 /*
4629 * Intel IOMMU is required for a TXT/tboot launch or platform
4630 * opt in, so enforce that.
4631 */
4632 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4633
3a5670e8
JL
4634 if (iommu_init_mempool()) {
4635 if (force_on)
4636 panic("tboot: Failed to initialize iommu memory\n");
4637 return -ENOMEM;
4638 }
4639
4640 down_write(&dmar_global_lock);
a59b50e9
JC
4641 if (dmar_table_init()) {
4642 if (force_on)
4643 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4644 goto out_free_dmar;
a59b50e9 4645 }
ba395927 4646
c2c7286a 4647 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4648 if (force_on)
4649 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4650 goto out_free_dmar;
a59b50e9 4651 }
1886e8a9 4652
ec154bf5
JR
4653 up_write(&dmar_global_lock);
4654
4655 /*
4656 * The bus notifier takes the dmar_global_lock, so lockdep will
4657 * complain later when we register it under the lock.
4658 */
4659 dmar_register_bus_notifier();
4660
4661 down_write(&dmar_global_lock);
4662
161b28aa 4663 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4664 /*
4665 * We exit the function here to ensure IOMMU's remapping and
4666 * mempool aren't setup, which means that the IOMMU's PMRs
4667 * won't be disabled via the call to init_dmars(). So disable
4668 * it explicitly here. The PMRs were setup by tboot prior to
4669 * calling SENTER, but the kernel is expected to reset/tear
4670 * down the PMRs.
4671 */
4672 if (intel_iommu_tboot_noforce) {
4673 for_each_iommu(iommu, drhd)
4674 iommu_disable_protect_mem_regions(iommu);
4675 }
4676
161b28aa
JR
4677 /*
4678 * Make sure the IOMMUs are switched off, even when we
4679 * boot into a kexec kernel and the previous kernel left
4680 * them enabled
4681 */
4682 intel_disable_iommus();
9bdc531e 4683 goto out_free_dmar;
161b28aa 4684 }
2ae21010 4685
318fe7df 4686 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4687 pr_info("No RMRR found\n");
318fe7df
SS
4688
4689 if (list_empty(&dmar_atsr_units))
9f10e5bf 4690 pr_info("No ATSR found\n");
318fe7df 4691
51a63e67
JC
4692 if (dmar_init_reserved_ranges()) {
4693 if (force_on)
4694 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4695 goto out_free_reserved_range;
51a63e67 4696 }
ba395927 4697
cf1ec453
LB
4698 if (dmar_map_gfx)
4699 intel_iommu_gfx_mapped = 1;
4700
ba395927
KA
4701 init_no_remapping_devices();
4702
b779260b 4703 ret = init_dmars();
ba395927 4704 if (ret) {
a59b50e9
JC
4705 if (force_on)
4706 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4707 pr_err("Initialization failed\n");
9bdc531e 4708 goto out_free_reserved_range;
ba395927 4709 }
3a5670e8 4710 up_write(&dmar_global_lock);
ba395927 4711
4fac8076 4712#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4713 swiotlb = 0;
4714#endif
19943b0e 4715 dma_ops = &intel_dma_ops;
4ed0d3e6 4716
134fac3f 4717 init_iommu_pm_ops();
a8bcbb0d 4718
39ab9555
JR
4719 for_each_active_iommu(iommu, drhd) {
4720 iommu_device_sysfs_add(&iommu->iommu, NULL,
4721 intel_iommu_groups,
4722 "%s", iommu->name);
4723 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4724 iommu_device_register(&iommu->iommu);
4725 }
a5459cfe 4726
4236d97d 4727 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
75f05569
JL
4728 if (si_domain && !hw_pass_through)
4729 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4730 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4731 intel_iommu_cpu_dead);
d8190dc6 4732
d5692d4a 4733 down_read(&dmar_global_lock);
fa212a97
LB
4734 if (probe_acpi_namespace_devices())
4735 pr_warn("ACPI name space devices didn't probe correctly\n");
d5692d4a 4736 up_read(&dmar_global_lock);
fa212a97 4737
d8190dc6
LB
4738 /* Finally, we enable the DMA remapping hardware. */
4739 for_each_iommu(iommu, drhd) {
6a8c6748 4740 if (!drhd->ignored && !translation_pre_enabled(iommu))
d8190dc6
LB
4741 iommu_enable_translation(iommu);
4742
4743 iommu_disable_protect_mem_regions(iommu);
4744 }
4745 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4746
8bc1f85c 4747 intel_iommu_enabled = 1;
ee2636b8 4748 intel_iommu_debugfs_init();
8bc1f85c 4749
ba395927 4750 return 0;
9bdc531e
JL
4751
4752out_free_reserved_range:
4753 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4754out_free_dmar:
4755 intel_iommu_free_dmars();
3a5670e8
JL
4756 up_write(&dmar_global_lock);
4757 iommu_exit_mempool();
9bdc531e 4758 return ret;
ba395927 4759}
e820482c 4760
127c7615 4761static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4762{
942067f1 4763 struct dmar_domain *domain;
c7151a8d
WH
4764 struct intel_iommu *iommu;
4765 unsigned long flags;
c7151a8d 4766
55d94043
JR
4767 assert_spin_locked(&device_domain_lock);
4768
127c7615 4769 if (WARN_ON(!info))
c7151a8d
WH
4770 return;
4771
127c7615 4772 iommu = info->iommu;
942067f1 4773 domain = info->domain;
c7151a8d 4774
127c7615 4775 if (info->dev) {
ef848b7e
LB
4776 if (dev_is_pci(info->dev) && sm_supported(iommu))
4777 intel_pasid_tear_down_entry(iommu, info->dev,
4778 PASID_RID2PASID);
4779
127c7615 4780 iommu_disable_dev_iotlb(info);
55752949 4781 domain_context_clear_one(iommu, info->bus, info->devfn);
a7fc93fe 4782 intel_pasid_free_table(info->dev);
127c7615 4783 }
c7151a8d 4784
b608ac3b 4785 unlink_domain_info(info);
c7151a8d 4786
d160aca5 4787 spin_lock_irqsave(&iommu->lock, flags);
942067f1 4788 domain_detach_iommu(domain, iommu);
d160aca5 4789 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4790
942067f1
LB
4791 /* free the private domain */
4792 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
4793 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY))
4794 domain_exit(info->domain);
4795
127c7615 4796 free_devinfo_mem(info);
c7151a8d 4797}
c7151a8d 4798
71753239 4799static void dmar_remove_one_dev_info(struct device *dev)
55d94043 4800{
127c7615 4801 struct device_domain_info *info;
55d94043 4802 unsigned long flags;
3e7abe25 4803
55d94043 4804 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4805 info = dev->archdata.iommu;
4806 __dmar_remove_one_dev_info(info);
55d94043 4807 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4808}
4809
301e7ee1
JR
4810static int md_domain_init(struct dmar_domain *domain, int guest_width)
4811{
4812 int adjust_width;
4813
4814 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4815 domain_reserve_special_ranges(domain);
4816
4817 /* calculate AGAW */
4818 domain->gaw = guest_width;
4819 adjust_width = guestwidth_to_adjustwidth(guest_width);
4820 domain->agaw = width_to_agaw(adjust_width);
4821
4822 domain->iommu_coherency = 0;
4823 domain->iommu_snooping = 0;
4824 domain->iommu_superpage = 0;
4825 domain->max_addr = 0;
4826
4827 /* always allocate the top pgd */
4828 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4829 if (!domain->pgd)
4830 return -ENOMEM;
4831 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4832 return 0;
4833}
4834
00a77deb 4835static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4836{
5d450806 4837 struct dmar_domain *dmar_domain;
00a77deb
JR
4838 struct iommu_domain *domain;
4839
4de354ec 4840 switch (type) {
fa954e68
LB
4841 case IOMMU_DOMAIN_DMA:
4842 /* fallthrough */
4de354ec 4843 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 4844 dmar_domain = alloc_domain(0);
4de354ec
LB
4845 if (!dmar_domain) {
4846 pr_err("Can't allocate dmar_domain\n");
4847 return NULL;
4848 }
301e7ee1 4849 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4de354ec
LB
4850 pr_err("Domain initialization failed\n");
4851 domain_exit(dmar_domain);
4852 return NULL;
4853 }
fa954e68
LB
4854
4855 if (type == IOMMU_DOMAIN_DMA &&
4856 init_iova_flush_queue(&dmar_domain->iovad,
4857 iommu_flush_iova, iova_entry_free)) {
4858 pr_warn("iova flush queue initialization failed\n");
4859 intel_iommu_strict = 1;
4860 }
4861
4de354ec 4862 domain_update_iommu_cap(dmar_domain);
38717946 4863
4de354ec
LB
4864 domain = &dmar_domain->domain;
4865 domain->geometry.aperture_start = 0;
4866 domain->geometry.aperture_end =
4867 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4868 domain->geometry.force_aperture = true;
4869
4870 return domain;
4871 case IOMMU_DOMAIN_IDENTITY:
4872 return &si_domain->domain;
4873 default:
00a77deb 4874 return NULL;
38717946 4875 }
8a0e715b 4876
4de354ec 4877 return NULL;
38717946 4878}
38717946 4879
00a77deb 4880static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4881{
4de354ec
LB
4882 if (domain != &si_domain->domain)
4883 domain_exit(to_dmar_domain(domain));
38717946 4884}
38717946 4885
67b8e02b
LB
4886/*
4887 * Check whether a @domain could be attached to the @dev through the
4888 * aux-domain attach/detach APIs.
4889 */
4890static inline bool
4891is_aux_domain(struct device *dev, struct iommu_domain *domain)
4892{
4893 struct device_domain_info *info = dev->archdata.iommu;
4894
4895 return info && info->auxd_enabled &&
4896 domain->type == IOMMU_DOMAIN_UNMANAGED;
4897}
4898
4899static void auxiliary_link_device(struct dmar_domain *domain,
4900 struct device *dev)
4901{
4902 struct device_domain_info *info = dev->archdata.iommu;
4903
4904 assert_spin_locked(&device_domain_lock);
4905 if (WARN_ON(!info))
4906 return;
4907
4908 domain->auxd_refcnt++;
4909 list_add(&domain->auxd, &info->auxiliary_domains);
4910}
4911
4912static void auxiliary_unlink_device(struct dmar_domain *domain,
4913 struct device *dev)
4914{
4915 struct device_domain_info *info = dev->archdata.iommu;
4916
4917 assert_spin_locked(&device_domain_lock);
4918 if (WARN_ON(!info))
4919 return;
4920
4921 list_del(&domain->auxd);
4922 domain->auxd_refcnt--;
4923
4924 if (!domain->auxd_refcnt && domain->default_pasid > 0)
4925 intel_pasid_free_id(domain->default_pasid);
4926}
4927
4928static int aux_domain_add_dev(struct dmar_domain *domain,
4929 struct device *dev)
4930{
4931 int ret;
4932 u8 bus, devfn;
4933 unsigned long flags;
4934 struct intel_iommu *iommu;
4935
4936 iommu = device_to_iommu(dev, &bus, &devfn);
4937 if (!iommu)
4938 return -ENODEV;
4939
4940 if (domain->default_pasid <= 0) {
4941 int pasid;
4942
4943 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
4944 pci_max_pasids(to_pci_dev(dev)),
4945 GFP_KERNEL);
4946 if (pasid <= 0) {
4947 pr_err("Can't allocate default pasid\n");
4948 return -ENODEV;
4949 }
4950 domain->default_pasid = pasid;
4951 }
4952
4953 spin_lock_irqsave(&device_domain_lock, flags);
4954 /*
4955 * iommu->lock must be held to attach domain to iommu and setup the
4956 * pasid entry for second level translation.
4957 */
4958 spin_lock(&iommu->lock);
4959 ret = domain_attach_iommu(domain, iommu);
4960 if (ret)
4961 goto attach_failed;
4962
4963 /* Setup the PASID entry for mediated devices: */
4964 ret = intel_pasid_setup_second_level(iommu, domain, dev,
4965 domain->default_pasid);
4966 if (ret)
4967 goto table_failed;
4968 spin_unlock(&iommu->lock);
4969
4970 auxiliary_link_device(domain, dev);
4971
4972 spin_unlock_irqrestore(&device_domain_lock, flags);
4973
4974 return 0;
4975
4976table_failed:
4977 domain_detach_iommu(domain, iommu);
4978attach_failed:
4979 spin_unlock(&iommu->lock);
4980 spin_unlock_irqrestore(&device_domain_lock, flags);
4981 if (!domain->auxd_refcnt && domain->default_pasid > 0)
4982 intel_pasid_free_id(domain->default_pasid);
4983
4984 return ret;
4985}
4986
4987static void aux_domain_remove_dev(struct dmar_domain *domain,
4988 struct device *dev)
4989{
4990 struct device_domain_info *info;
4991 struct intel_iommu *iommu;
4992 unsigned long flags;
4993
4994 if (!is_aux_domain(dev, &domain->domain))
4995 return;
4996
4997 spin_lock_irqsave(&device_domain_lock, flags);
4998 info = dev->archdata.iommu;
4999 iommu = info->iommu;
5000
5001 auxiliary_unlink_device(domain, dev);
5002
5003 spin_lock(&iommu->lock);
5004 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5005 domain_detach_iommu(domain, iommu);
5006 spin_unlock(&iommu->lock);
5007
5008 spin_unlock_irqrestore(&device_domain_lock, flags);
5009}
5010
8cc3759a
LB
5011static int prepare_domain_attach_device(struct iommu_domain *domain,
5012 struct device *dev)
38717946 5013{
00a77deb 5014 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5015 struct intel_iommu *iommu;
5016 int addr_width;
156baca8 5017 u8 bus, devfn;
faa3d6f5 5018
156baca8 5019 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5020 if (!iommu)
5021 return -ENODEV;
5022
5023 /* check if this iommu agaw is sufficient for max mapped address */
5024 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5025 if (addr_width > cap_mgaw(iommu->cap))
5026 addr_width = cap_mgaw(iommu->cap);
5027
5028 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5029 dev_err(dev, "%s: iommu width (%d) is not "
5030 "sufficient for the mapped address (%llx)\n",
5031 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5032 return -EFAULT;
5033 }
a99c47a2
TL
5034 dmar_domain->gaw = addr_width;
5035
5036 /*
5037 * Knock out extra levels of page tables if necessary
5038 */
5039 while (iommu->agaw < dmar_domain->agaw) {
5040 struct dma_pte *pte;
5041
5042 pte = dmar_domain->pgd;
5043 if (dma_pte_present(pte)) {
25cbff16
SY
5044 dmar_domain->pgd = (struct dma_pte *)
5045 phys_to_virt(dma_pte_addr(pte));
7a661013 5046 free_pgtable_page(pte);
a99c47a2
TL
5047 }
5048 dmar_domain->agaw--;
5049 }
fe40f1e0 5050
8cc3759a
LB
5051 return 0;
5052}
5053
5054static int intel_iommu_attach_device(struct iommu_domain *domain,
5055 struct device *dev)
5056{
5057 int ret;
5058
5679582c
LB
5059 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5060 device_is_rmrr_locked(dev)) {
8cc3759a
LB
5061 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5062 return -EPERM;
5063 }
5064
67b8e02b
LB
5065 if (is_aux_domain(dev, domain))
5066 return -EPERM;
5067
8cc3759a
LB
5068 /* normally dev is not mapped */
5069 if (unlikely(domain_context_mapped(dev))) {
5070 struct dmar_domain *old_domain;
5071
5072 old_domain = find_domain(dev);
fa954e68 5073 if (old_domain)
8cc3759a 5074 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5075 }
5076
5077 ret = prepare_domain_attach_device(domain, dev);
5078 if (ret)
5079 return ret;
5080
5081 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5082}
38717946 5083
67b8e02b
LB
5084static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5085 struct device *dev)
5086{
5087 int ret;
5088
5089 if (!is_aux_domain(dev, domain))
5090 return -EPERM;
5091
5092 ret = prepare_domain_attach_device(domain, dev);
5093 if (ret)
5094 return ret;
5095
5096 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5097}
5098
4c5478c9
JR
5099static void intel_iommu_detach_device(struct iommu_domain *domain,
5100 struct device *dev)
38717946 5101{
71753239 5102 dmar_remove_one_dev_info(dev);
faa3d6f5 5103}
c7151a8d 5104
67b8e02b
LB
5105static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5106 struct device *dev)
5107{
5108 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5109}
5110
b146a1c9
JR
5111static int intel_iommu_map(struct iommu_domain *domain,
5112 unsigned long iova, phys_addr_t hpa,
5009065d 5113 size_t size, int iommu_prot)
faa3d6f5 5114{
00a77deb 5115 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5116 u64 max_addr;
dde57a21 5117 int prot = 0;
faa3d6f5 5118 int ret;
fe40f1e0 5119
942067f1
LB
5120 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5121 return -EINVAL;
5122
dde57a21
JR
5123 if (iommu_prot & IOMMU_READ)
5124 prot |= DMA_PTE_READ;
5125 if (iommu_prot & IOMMU_WRITE)
5126 prot |= DMA_PTE_WRITE;
9cf06697
SY
5127 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5128 prot |= DMA_PTE_SNP;
dde57a21 5129
163cc52c 5130 max_addr = iova + size;
dde57a21 5131 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5132 u64 end;
5133
5134 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5135 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5136 if (end < max_addr) {
9f10e5bf 5137 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5138 "sufficient for the mapped address (%llx)\n",
8954da1f 5139 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5140 return -EFAULT;
5141 }
dde57a21 5142 dmar_domain->max_addr = max_addr;
fe40f1e0 5143 }
ad051221
DW
5144 /* Round up size to next multiple of PAGE_SIZE, if it and
5145 the low bits of hpa would take us onto the next page */
88cb6a74 5146 size = aligned_nrpages(hpa, size);
ad051221
DW
5147 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5148 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5149 return ret;
38717946 5150}
38717946 5151
5009065d 5152static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5153 unsigned long iova, size_t size)
38717946 5154{
00a77deb 5155 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5156 struct page *freelist = NULL;
ea8ea460
DW
5157 unsigned long start_pfn, last_pfn;
5158 unsigned int npages;
42e8c186 5159 int iommu_id, level = 0;
5cf0a76f
DW
5160
5161 /* Cope with horrid API which requires us to unmap more than the
5162 size argument if it happens to be a large-page mapping. */
dc02e46e 5163 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
942067f1
LB
5164 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5165 return 0;
5cf0a76f
DW
5166
5167 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5168 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5169
ea8ea460
DW
5170 start_pfn = iova >> VTD_PAGE_SHIFT;
5171 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5172
5173 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5174
5175 npages = last_pfn - start_pfn + 1;
5176
f746a025 5177 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5178 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5179 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5180
5181 dma_free_pagelist(freelist);
fe40f1e0 5182
163cc52c
DW
5183 if (dmar_domain->max_addr == iova + size)
5184 dmar_domain->max_addr = iova;
b146a1c9 5185
5cf0a76f 5186 return size;
38717946 5187}
38717946 5188
d14d6577 5189static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5190 dma_addr_t iova)
38717946 5191{
00a77deb 5192 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5193 struct dma_pte *pte;
5cf0a76f 5194 int level = 0;
faa3d6f5 5195 u64 phys = 0;
38717946 5196
942067f1
LB
5197 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5198 return 0;
5199
5cf0a76f 5200 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5201 if (pte)
faa3d6f5 5202 phys = dma_pte_addr(pte);
38717946 5203
faa3d6f5 5204 return phys;
38717946 5205}
a8bcbb0d 5206
95587a75
LB
5207static inline bool scalable_mode_support(void)
5208{
5209 struct dmar_drhd_unit *drhd;
5210 struct intel_iommu *iommu;
5211 bool ret = true;
5212
5213 rcu_read_lock();
5214 for_each_active_iommu(iommu, drhd) {
5215 if (!sm_supported(iommu)) {
5216 ret = false;
5217 break;
5218 }
5219 }
5220 rcu_read_unlock();
5221
5222 return ret;
5223}
5224
5225static inline bool iommu_pasid_support(void)
5226{
5227 struct dmar_drhd_unit *drhd;
5228 struct intel_iommu *iommu;
5229 bool ret = true;
5230
5231 rcu_read_lock();
5232 for_each_active_iommu(iommu, drhd) {
5233 if (!pasid_supported(iommu)) {
5234 ret = false;
5235 break;
5236 }
5237 }
5238 rcu_read_unlock();
5239
5240 return ret;
5241}
5242
5d587b8d 5243static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5244{
dbb9fd86 5245 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5246 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5247 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5248 return irq_remapping_enabled == 1;
dbb9fd86 5249
5d587b8d 5250 return false;
dbb9fd86
SY
5251}
5252
abdfdde2
AW
5253static int intel_iommu_add_device(struct device *dev)
5254{
942067f1
LB
5255 struct dmar_domain *dmar_domain;
5256 struct iommu_domain *domain;
a5459cfe 5257 struct intel_iommu *iommu;
abdfdde2 5258 struct iommu_group *group;
156baca8 5259 u8 bus, devfn;
942067f1 5260 int ret;
70ae6f0d 5261
a5459cfe
AW
5262 iommu = device_to_iommu(dev, &bus, &devfn);
5263 if (!iommu)
70ae6f0d
AW
5264 return -ENODEV;
5265
e3d10af1 5266 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5267
8af46c78
LB
5268 if (translation_pre_enabled(iommu))
5269 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5270
e17f9ff4 5271 group = iommu_group_get_for_dev(dev);
783f157b 5272
e17f9ff4
AW
5273 if (IS_ERR(group))
5274 return PTR_ERR(group);
bcb71abe 5275
abdfdde2 5276 iommu_group_put(group);
942067f1
LB
5277
5278 domain = iommu_get_domain_for_dev(dev);
5279 dmar_domain = to_dmar_domain(domain);
5280 if (domain->type == IOMMU_DOMAIN_DMA) {
0e31a726 5281 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
942067f1
LB
5282 ret = iommu_request_dm_for_dev(dev);
5283 if (ret) {
5284 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5285 domain_add_dev_info(si_domain, dev);
5286 dev_info(dev,
5287 "Device uses a private identity domain.\n");
942067f1 5288 }
942067f1
LB
5289 }
5290 } else {
0e31a726 5291 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
942067f1
LB
5292 ret = iommu_request_dma_domain_for_dev(dev);
5293 if (ret) {
5294 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
4ec066c7 5295 if (!get_private_domain_for_dev(dev)) {
942067f1
LB
5296 dev_warn(dev,
5297 "Failed to get a private domain.\n");
5298 return -ENOMEM;
5299 }
5300
5301 dev_info(dev,
5302 "Device uses a private dma domain.\n");
942067f1 5303 }
942067f1
LB
5304 }
5305 }
5306
e17f9ff4 5307 return 0;
abdfdde2 5308}
70ae6f0d 5309
abdfdde2
AW
5310static void intel_iommu_remove_device(struct device *dev)
5311{
a5459cfe
AW
5312 struct intel_iommu *iommu;
5313 u8 bus, devfn;
5314
5315 iommu = device_to_iommu(dev, &bus, &devfn);
5316 if (!iommu)
5317 return;
5318
abdfdde2 5319 iommu_group_remove_device(dev);
a5459cfe 5320
e3d10af1 5321 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5322}
5323
0659b8dc
EA
5324static void intel_iommu_get_resv_regions(struct device *device,
5325 struct list_head *head)
5326{
5f64ce54 5327 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
0659b8dc
EA
5328 struct iommu_resv_region *reg;
5329 struct dmar_rmrr_unit *rmrr;
5330 struct device *i_dev;
5331 int i;
5332
5f64ce54 5333 down_read(&dmar_global_lock);
0659b8dc
EA
5334 for_each_rmrr_units(rmrr) {
5335 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5336 i, i_dev) {
5f64ce54 5337 struct iommu_resv_region *resv;
1c5c59fb 5338 enum iommu_resv_type type;
5f64ce54
EA
5339 size_t length;
5340
3855ba2d
EA
5341 if (i_dev != device &&
5342 !is_downstream_to_pci_bridge(device, i_dev))
0659b8dc
EA
5343 continue;
5344
5f64ce54 5345 length = rmrr->end_address - rmrr->base_address + 1;
1c5c59fb
EA
5346
5347 type = device_rmrr_is_relaxable(device) ?
5348 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5349
5f64ce54 5350 resv = iommu_alloc_resv_region(rmrr->base_address,
1c5c59fb 5351 length, prot, type);
5f64ce54
EA
5352 if (!resv)
5353 break;
5354
5355 list_add_tail(&resv->list, head);
0659b8dc
EA
5356 }
5357 }
5f64ce54 5358 up_read(&dmar_global_lock);
0659b8dc 5359
d850c2ee
LB
5360#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5361 if (dev_is_pci(device)) {
5362 struct pci_dev *pdev = to_pci_dev(device);
5363
5364 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5365 reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5366 IOMMU_RESV_DIRECT);
5367 if (reg)
5368 list_add_tail(&reg->list, head);
5369 }
5370 }
5371#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5372
0659b8dc
EA
5373 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5374 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5375 0, IOMMU_RESV_MSI);
0659b8dc
EA
5376 if (!reg)
5377 return;
5378 list_add_tail(&reg->list, head);
5379}
5380
5381static void intel_iommu_put_resv_regions(struct device *dev,
5382 struct list_head *head)
5383{
5384 struct iommu_resv_region *entry, *next;
5385
5f64ce54
EA
5386 list_for_each_entry_safe(entry, next, head, list)
5387 kfree(entry);
70ae6f0d
AW
5388}
5389
d7cbc0f3 5390int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5391{
5392 struct device_domain_info *info;
5393 struct context_entry *context;
5394 struct dmar_domain *domain;
5395 unsigned long flags;
5396 u64 ctx_lo;
5397 int ret;
5398
4ec066c7 5399 domain = find_domain(dev);
2f26e0a9
DW
5400 if (!domain)
5401 return -EINVAL;
5402
5403 spin_lock_irqsave(&device_domain_lock, flags);
5404 spin_lock(&iommu->lock);
5405
5406 ret = -EINVAL;
d7cbc0f3 5407 info = dev->archdata.iommu;
2f26e0a9
DW
5408 if (!info || !info->pasid_supported)
5409 goto out;
5410
5411 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5412 if (WARN_ON(!context))
5413 goto out;
5414
5415 ctx_lo = context[0].lo;
5416
2f26e0a9 5417 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5418 ctx_lo |= CONTEXT_PASIDE;
5419 context[0].lo = ctx_lo;
5420 wmb();
d7cbc0f3
LB
5421 iommu->flush.flush_context(iommu,
5422 domain->iommu_did[iommu->seq_id],
5423 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5424 DMA_CCMD_MASK_NOBIT,
5425 DMA_CCMD_DEVICE_INVL);
5426 }
5427
5428 /* Enable PASID support in the device, if it wasn't already */
5429 if (!info->pasid_enabled)
5430 iommu_enable_dev_iotlb(info);
5431
2f26e0a9
DW
5432 ret = 0;
5433
5434 out:
5435 spin_unlock(&iommu->lock);
5436 spin_unlock_irqrestore(&device_domain_lock, flags);
5437
5438 return ret;
5439}
5440
73bcbdc9
JS
5441static void intel_iommu_apply_resv_region(struct device *dev,
5442 struct iommu_domain *domain,
5443 struct iommu_resv_region *region)
5444{
5445 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5446 unsigned long start, end;
5447
5448 start = IOVA_PFN(region->start);
5449 end = IOVA_PFN(region->start + region->length - 1);
5450
5451 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5452}
5453
d7cbc0f3 5454#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5455struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5456{
5457 struct intel_iommu *iommu;
5458 u8 bus, devfn;
5459
5460 if (iommu_dummy(dev)) {
5461 dev_warn(dev,
5462 "No IOMMU translation for device; cannot enable SVM\n");
5463 return NULL;
5464 }
5465
5466 iommu = device_to_iommu(dev, &bus, &devfn);
5467 if ((!iommu)) {
b9997e38 5468 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5469 return NULL;
5470 }
5471
2f26e0a9
DW
5472 return iommu;
5473}
5474#endif /* CONFIG_INTEL_IOMMU_SVM */
5475
95587a75
LB
5476static int intel_iommu_enable_auxd(struct device *dev)
5477{
5478 struct device_domain_info *info;
5479 struct intel_iommu *iommu;
5480 unsigned long flags;
5481 u8 bus, devfn;
5482 int ret;
5483
5484 iommu = device_to_iommu(dev, &bus, &devfn);
5485 if (!iommu || dmar_disabled)
5486 return -EINVAL;
5487
5488 if (!sm_supported(iommu) || !pasid_supported(iommu))
5489 return -EINVAL;
5490
5491 ret = intel_iommu_enable_pasid(iommu, dev);
5492 if (ret)
5493 return -ENODEV;
5494
5495 spin_lock_irqsave(&device_domain_lock, flags);
5496 info = dev->archdata.iommu;
5497 info->auxd_enabled = 1;
5498 spin_unlock_irqrestore(&device_domain_lock, flags);
5499
5500 return 0;
5501}
5502
5503static int intel_iommu_disable_auxd(struct device *dev)
5504{
5505 struct device_domain_info *info;
5506 unsigned long flags;
5507
5508 spin_lock_irqsave(&device_domain_lock, flags);
5509 info = dev->archdata.iommu;
5510 if (!WARN_ON(!info))
5511 info->auxd_enabled = 0;
5512 spin_unlock_irqrestore(&device_domain_lock, flags);
5513
5514 return 0;
5515}
5516
5517/*
5518 * A PCI express designated vendor specific extended capability is defined
5519 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5520 * for system software and tools to detect endpoint devices supporting the
5521 * Intel scalable IO virtualization without host driver dependency.
5522 *
5523 * Returns the address of the matching extended capability structure within
5524 * the device's PCI configuration space or 0 if the device does not support
5525 * it.
5526 */
5527static int siov_find_pci_dvsec(struct pci_dev *pdev)
5528{
5529 int pos;
5530 u16 vendor, id;
5531
5532 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5533 while (pos) {
5534 pci_read_config_word(pdev, pos + 4, &vendor);
5535 pci_read_config_word(pdev, pos + 8, &id);
5536 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5537 return pos;
5538
5539 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5540 }
5541
5542 return 0;
5543}
5544
5545static bool
5546intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5547{
5548 if (feat == IOMMU_DEV_FEAT_AUX) {
5549 int ret;
5550
5551 if (!dev_is_pci(dev) || dmar_disabled ||
5552 !scalable_mode_support() || !iommu_pasid_support())
5553 return false;
5554
5555 ret = pci_pasid_features(to_pci_dev(dev));
5556 if (ret < 0)
5557 return false;
5558
5559 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5560 }
5561
5562 return false;
5563}
5564
5565static int
5566intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5567{
5568 if (feat == IOMMU_DEV_FEAT_AUX)
5569 return intel_iommu_enable_auxd(dev);
5570
5571 return -ENODEV;
5572}
5573
5574static int
5575intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5576{
5577 if (feat == IOMMU_DEV_FEAT_AUX)
5578 return intel_iommu_disable_auxd(dev);
5579
5580 return -ENODEV;
5581}
5582
5583static bool
5584intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5585{
5586 struct device_domain_info *info = dev->archdata.iommu;
5587
5588 if (feat == IOMMU_DEV_FEAT_AUX)
5589 return scalable_mode_support() && info && info->auxd_enabled;
5590
5591 return false;
5592}
5593
0e8000f8
LB
5594static int
5595intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5596{
5597 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5598
5599 return dmar_domain->default_pasid > 0 ?
5600 dmar_domain->default_pasid : -EINVAL;
5601}
5602
8af46c78
LB
5603static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5604 struct device *dev)
5605{
5606 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5607}
5608
b0119e87 5609const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5610 .capable = intel_iommu_capable,
5611 .domain_alloc = intel_iommu_domain_alloc,
5612 .domain_free = intel_iommu_domain_free,
5613 .attach_dev = intel_iommu_attach_device,
5614 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5615 .aux_attach_dev = intel_iommu_aux_attach_device,
5616 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5617 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5618 .map = intel_iommu_map,
5619 .unmap = intel_iommu_unmap,
0659b8dc
EA
5620 .iova_to_phys = intel_iommu_iova_to_phys,
5621 .add_device = intel_iommu_add_device,
5622 .remove_device = intel_iommu_remove_device,
5623 .get_resv_regions = intel_iommu_get_resv_regions,
5624 .put_resv_regions = intel_iommu_put_resv_regions,
73bcbdc9 5625 .apply_resv_region = intel_iommu_apply_resv_region,
0659b8dc 5626 .device_group = pci_device_group,
95587a75
LB
5627 .dev_has_feat = intel_iommu_dev_has_feat,
5628 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5629 .dev_enable_feat = intel_iommu_dev_enable_feat,
5630 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 5631 .is_attach_deferred = intel_iommu_is_attach_deferred,
0659b8dc 5632 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5633};
9af88143 5634
9452618e
DV
5635static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5636{
5637 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5638 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5639 dmar_map_gfx = 0;
5640}
5641
5642DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5643DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5644DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5645DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5646DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5647DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5648DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5649
d34d6517 5650static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5651{
5652 /*
5653 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5654 * but needs it. Same seems to hold for the desktop versions.
9af88143 5655 */
932a6523 5656 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5657 rwbf_quirk = 1;
5658}
5659
5660DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5661DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5662DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5663DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5664DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5665DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5666DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5667
eecfd57f
AJ
5668#define GGC 0x52
5669#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5670#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5671#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5672#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5673#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5674#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5675#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5676#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5677
d34d6517 5678static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5679{
5680 unsigned short ggc;
5681
eecfd57f 5682 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5683 return;
5684
eecfd57f 5685 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5686 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5687 dmar_map_gfx = 0;
6fbcfb3e
DW
5688 } else if (dmar_map_gfx) {
5689 /* we have to ensure the gfx device is idle before we flush */
932a6523 5690 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5691 intel_iommu_strict = 1;
5692 }
9eecabcb
DW
5693}
5694DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5695DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5696DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5697DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5698
e0fc7e0b
DW
5699/* On Tylersburg chipsets, some BIOSes have been known to enable the
5700 ISOCH DMAR unit for the Azalia sound device, but not give it any
5701 TLB entries, which causes it to deadlock. Check for that. We do
5702 this in a function called from init_dmars(), instead of in a PCI
5703 quirk, because we don't want to print the obnoxious "BIOS broken"
5704 message if VT-d is actually disabled.
5705*/
5706static void __init check_tylersburg_isoch(void)
5707{
5708 struct pci_dev *pdev;
5709 uint32_t vtisochctrl;
5710
5711 /* If there's no Azalia in the system anyway, forget it. */
5712 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5713 if (!pdev)
5714 return;
5715 pci_dev_put(pdev);
5716
5717 /* System Management Registers. Might be hidden, in which case
5718 we can't do the sanity check. But that's OK, because the
5719 known-broken BIOSes _don't_ actually hide it, so far. */
5720 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5721 if (!pdev)
5722 return;
5723
5724 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5725 pci_dev_put(pdev);
5726 return;
5727 }
5728
5729 pci_dev_put(pdev);
5730
5731 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5732 if (vtisochctrl & 1)
5733 return;
5734
5735 /* Drop all bits other than the number of TLB entries */
5736 vtisochctrl &= 0x1c;
5737
5738 /* If we have the recommended number of TLB entries (16), fine. */
5739 if (vtisochctrl == 0x10)
5740 return;
5741
5742 /* Zero TLB entries? You get to ride the short bus to school. */
5743 if (!vtisochctrl) {
5744 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5745 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5746 dmi_get_system_info(DMI_BIOS_VENDOR),
5747 dmi_get_system_info(DMI_BIOS_VERSION),
5748 dmi_get_system_info(DMI_PRODUCT_VERSION));
5749 iommu_identity_mapping |= IDENTMAP_AZALIA;
5750 return;
5751 }
9f10e5bf
JR
5752
5753 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5754 vtisochctrl);
5755}