Merge branch 'core-objtool-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / drivers / iommu / intel-iommu.c
CommitLineData
2025cf9e 1// SPDX-License-Identifier: GPL-2.0-only
ba395927 2/*
ea8ea460 3 * Copyright © 2006-2014 Intel Corporation.
ba395927 4 *
ea8ea460
DW
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 10 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
11 */
12
9f10e5bf 13#define pr_fmt(fmt) "DMAR: " fmt
932a6523 14#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 15
ba395927
KA
16#include <linux/init.h>
17#include <linux/bitmap.h>
5e0d2a6f 18#include <linux/debugfs.h>
54485c30 19#include <linux/export.h>
ba395927
KA
20#include <linux/slab.h>
21#include <linux/irq.h>
22#include <linux/interrupt.h>
ba395927
KA
23#include <linux/spinlock.h>
24#include <linux/pci.h>
25#include <linux/dmar.h>
26#include <linux/dma-mapping.h>
27#include <linux/mempool.h>
75f05569 28#include <linux/memory.h>
aa473240 29#include <linux/cpu.h>
5e0d2a6f 30#include <linux/timer.h>
dfddb969 31#include <linux/io.h>
38717946 32#include <linux/iova.h>
5d450806 33#include <linux/iommu.h>
38717946 34#include <linux/intel-iommu.h>
134fac3f 35#include <linux/syscore_ops.h>
69575d38 36#include <linux/tboot.h>
adb2fe02 37#include <linux/dmi.h>
5cdede24 38#include <linux/pci-ats.h>
0ee332c1 39#include <linux/memblock.h>
36746436 40#include <linux/dma-contiguous.h>
fec777c3 41#include <linux/dma-direct.h>
091d42e4 42#include <linux/crash_dump.h>
98fa15f3 43#include <linux/numa.h>
cfb94a37 44#include <linux/swiotlb.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
cfb94a37 48#include <trace/events/intel_iommu.h>
ba395927 49
078e1ee2 50#include "irq_remapping.h"
56283174 51#include "intel-pasid.h"
078e1ee2 52
5b6985ce
FY
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
ba395927 56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
5e3b4a15 65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 66
4ed0d3e6 67#define MAX_AGAW_WIDTH 64
5c645b35 68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 69
2ebe3151
DW
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 78
1b722500
RM
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
f27be03b 82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 83
df08cdc7
AM
84/* page table handling */
85#define LEVEL_STRIDE (9)
86#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
87
6d1c56a9
OBC
88/*
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
92 * that we support.
93 *
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
97 *
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
100 *
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
103 */
104#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
105
df08cdc7
AM
106static inline int agaw_to_level(int agaw)
107{
108 return agaw + 2;
109}
110
111static inline int agaw_to_width(int agaw)
112{
5c645b35 113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
114}
115
116static inline int width_to_agaw(int width)
117{
5c645b35 118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
119}
120
121static inline unsigned int level_to_offset_bits(int level)
122{
123 return (level - 1) * LEVEL_STRIDE;
124}
125
126static inline int pfn_level_offset(unsigned long pfn, int level)
127{
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129}
130
131static inline unsigned long level_mask(int level)
132{
133 return -1UL << level_to_offset_bits(level);
134}
135
136static inline unsigned long level_size(int level)
137{
138 return 1UL << level_to_offset_bits(level);
139}
140
141static inline unsigned long align_to_level(unsigned long pfn, int level)
142{
143 return (pfn + level_size(level) - 1) & level_mask(level);
144}
fd18de50 145
6dd9a7c7
YS
146static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147{
5c645b35 148 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
149}
150
dd4e8319
DW
151/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154{
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156}
157
158static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159{
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161}
162static inline unsigned long page_to_dma_pfn(struct page *pg)
163{
164 return mm_to_dma_pfn(page_to_pfn(pg));
165}
166static inline unsigned long virt_to_dma_pfn(void *p)
167{
168 return page_to_dma_pfn(virt_to_page(p));
169}
170
d9630fe9
WH
171/* global iommu list, set NULL for ignored DMAR units */
172static struct intel_iommu **g_iommus;
173
e0fc7e0b 174static void __init check_tylersburg_isoch(void);
9af88143
DW
175static int rwbf_quirk;
176
b779260b
JC
177/*
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
180 */
181static int force_on = 0;
bfd20f1c 182int intel_iommu_tboot_noforce;
89a6079d 183static int no_platform_optin;
b779260b 184
46b08e1a 185#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 186
091d42e4
JR
187/*
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189 * if marked present.
190 */
191static phys_addr_t root_entry_lctp(struct root_entry *re)
192{
193 if (!(re->lo & 1))
194 return 0;
195
196 return re->lo & VTD_PAGE_MASK;
197}
198
199/*
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201 * if marked present.
202 */
203static phys_addr_t root_entry_uctp(struct root_entry *re)
204{
205 if (!(re->hi & 1))
206 return 0;
46b08e1a 207
091d42e4
JR
208 return re->hi & VTD_PAGE_MASK;
209}
c07e7d21 210
cf484d0e
JR
211static inline void context_clear_pasid_enable(struct context_entry *context)
212{
213 context->lo &= ~(1ULL << 11);
214}
215
216static inline bool context_pasid_enabled(struct context_entry *context)
217{
218 return !!(context->lo & (1ULL << 11));
219}
220
221static inline void context_set_copied(struct context_entry *context)
222{
223 context->hi |= (1ull << 3);
224}
225
226static inline bool context_copied(struct context_entry *context)
227{
228 return !!(context->hi & (1ULL << 3));
229}
230
231static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
232{
233 return (context->lo & 1);
234}
cf484d0e 235
26b86092 236bool context_present(struct context_entry *context)
cf484d0e
JR
237{
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
241}
242
c07e7d21
MM
243static inline void context_set_present(struct context_entry *context)
244{
245 context->lo |= 1;
246}
247
248static inline void context_set_fault_enable(struct context_entry *context)
249{
250 context->lo &= (((u64)-1) << 2) | 1;
251}
252
c07e7d21
MM
253static inline void context_set_translation_type(struct context_entry *context,
254 unsigned long value)
255{
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
258}
259
260static inline void context_set_address_root(struct context_entry *context,
261 unsigned long value)
262{
1a2262f9 263 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
264 context->lo |= value & VTD_PAGE_MASK;
265}
266
267static inline void context_set_address_width(struct context_entry *context,
268 unsigned long value)
269{
270 context->hi |= value & 7;
271}
272
273static inline void context_set_domain_id(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
277}
278
dbcd861f
JR
279static inline int context_domain_id(struct context_entry *c)
280{
281 return((c->hi >> 8) & 0xffff);
282}
283
c07e7d21
MM
284static inline void context_clear_entry(struct context_entry *context)
285{
286 context->lo = 0;
287 context->hi = 0;
288}
7a8fc25e 289
2c2e2c38
FY
290/*
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
295 */
19943b0e
DW
296static struct dmar_domain *si_domain;
297static int hw_pass_through = 1;
2c2e2c38 298
2c2e2c38 299/* si_domain contains mulitple devices */
fa954e68 300#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 301
942067f1
LB
302/*
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
307 */
308#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
309
a1948f2e
LB
310/*
311 * When VT-d works in the scalable mode, it allows DMA translation to
312 * happen through either first level or second level page table. This
313 * bit marks that the DMA translation for the domain goes through the
314 * first level page table, otherwise, it goes through the second level.
315 */
316#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
317
2cd1311a
LB
318/*
319 * Domain represents a virtual machine which demands iommu nested
320 * translation mode support.
321 */
322#define DOMAIN_FLAG_NESTING_MODE BIT(3)
323
29a27719
JR
324#define for_each_domain_iommu(idx, domain) \
325 for (idx = 0; idx < g_num_of_iommus; idx++) \
326 if (domain->iommu_refcnt[idx])
327
b94e4117
JL
328struct dmar_rmrr_unit {
329 struct list_head list; /* list of rmrr units */
330 struct acpi_dmar_header *hdr; /* ACPI header */
331 u64 base_address; /* reserved base address*/
332 u64 end_address; /* reserved end address */
832bd858 333 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
334 int devices_cnt; /* target device count */
335};
336
337struct dmar_atsr_unit {
338 struct list_head list; /* list of ATSR units */
339 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 340 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
341 int devices_cnt; /* target device count */
342 u8 include_all:1; /* include all ports */
343};
344
345static LIST_HEAD(dmar_atsr_units);
346static LIST_HEAD(dmar_rmrr_units);
347
348#define for_each_rmrr_units(rmrr) \
349 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
350
5e0d2a6f 351/* bitmap for indexing intel_iommus */
5e0d2a6f 352static int g_num_of_iommus;
353
92d03cc8 354static void domain_exit(struct dmar_domain *domain);
ba395927 355static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 356static void dmar_remove_one_dev_info(struct device *dev);
127c7615 357static void __dmar_remove_one_dev_info(struct device_domain_info *info);
0ce4a85f
LB
358static void domain_context_clear(struct intel_iommu *iommu,
359 struct device *dev);
2a46ddf7
JL
360static int domain_detach_iommu(struct dmar_domain *domain,
361 struct intel_iommu *iommu);
4de354ec 362static bool device_is_rmrr_locked(struct device *dev);
8af46c78
LB
363static int intel_iommu_attach_device(struct iommu_domain *domain,
364 struct device *dev);
cfb94a37
LB
365static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
366 dma_addr_t iova);
ba395927 367
d3f13810 368#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
369int dmar_disabled = 0;
370#else
371int dmar_disabled = 1;
04618252 372#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
0cd5c3c8 373
04618252
LB
374#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
375int intel_iommu_sm = 1;
376#else
cdd3a249 377int intel_iommu_sm;
04618252
LB
378#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
379
8bc1f85c
ED
380int intel_iommu_enabled = 0;
381EXPORT_SYMBOL_GPL(intel_iommu_enabled);
382
2d9e667e 383static int dmar_map_gfx = 1;
7d3b03ce 384static int dmar_forcedac;
5e0d2a6f 385static int intel_iommu_strict;
6dd9a7c7 386static int intel_iommu_superpage = 1;
ae853ddb 387static int iommu_identity_mapping;
e5e04d05 388static int intel_no_bounce;
c83b2f20 389
ae853ddb
DW
390#define IDENTMAP_GFX 2
391#define IDENTMAP_AZALIA 4
c83b2f20 392
c0771df8
DW
393int intel_iommu_gfx_mapped;
394EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
395
ba395927 396#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
8af46c78 397#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
e2726dae 398DEFINE_SPINLOCK(device_domain_lock);
ba395927
KA
399static LIST_HEAD(device_domain_list);
400
e5e04d05
LB
401#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
402 to_pci_dev(d)->untrusted)
403
85319dcc
LB
404/*
405 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 406 * callback @fn against each element.
85319dcc
LB
407 */
408int for_each_device_domain(int (*fn)(struct device_domain_info *info,
409 void *data), void *data)
410{
411 int ret = 0;
0bbeb01a 412 unsigned long flags;
85319dcc
LB
413 struct device_domain_info *info;
414
0bbeb01a 415 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
416 list_for_each_entry(info, &device_domain_list, global) {
417 ret = fn(info, data);
0bbeb01a
LB
418 if (ret) {
419 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 420 return ret;
0bbeb01a 421 }
85319dcc 422 }
0bbeb01a 423 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
424
425 return 0;
426}
427
b0119e87 428const struct iommu_ops intel_iommu_ops;
a8bcbb0d 429
4158c2ec
JR
430static bool translation_pre_enabled(struct intel_iommu *iommu)
431{
432 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
433}
434
091d42e4
JR
435static void clear_translation_pre_enabled(struct intel_iommu *iommu)
436{
437 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
438}
439
4158c2ec
JR
440static void init_translation_status(struct intel_iommu *iommu)
441{
442 u32 gsts;
443
444 gsts = readl(iommu->reg + DMAR_GSTS_REG);
445 if (gsts & DMA_GSTS_TES)
446 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
447}
448
00a77deb
JR
449/* Convert generic 'struct iommu_domain to private struct dmar_domain */
450static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
451{
452 return container_of(dom, struct dmar_domain, domain);
453}
454
ba395927
KA
455static int __init intel_iommu_setup(char *str)
456{
457 if (!str)
458 return -EINVAL;
459 while (*str) {
0cd5c3c8
KM
460 if (!strncmp(str, "on", 2)) {
461 dmar_disabled = 0;
9f10e5bf 462 pr_info("IOMMU enabled\n");
0cd5c3c8 463 } else if (!strncmp(str, "off", 3)) {
ba395927 464 dmar_disabled = 1;
89a6079d 465 no_platform_optin = 1;
9f10e5bf 466 pr_info("IOMMU disabled\n");
ba395927
KA
467 } else if (!strncmp(str, "igfx_off", 8)) {
468 dmar_map_gfx = 0;
9f10e5bf 469 pr_info("Disable GFX device mapping\n");
7d3b03ce 470 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 471 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 472 dmar_forcedac = 1;
5e0d2a6f 473 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 474 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 475 intel_iommu_strict = 1;
6dd9a7c7 476 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 477 pr_info("Disable supported super page\n");
6dd9a7c7 478 intel_iommu_superpage = 0;
8950dcd8
LB
479 } else if (!strncmp(str, "sm_on", 5)) {
480 pr_info("Intel-IOMMU: scalable mode supported\n");
481 intel_iommu_sm = 1;
bfd20f1c
SL
482 } else if (!strncmp(str, "tboot_noforce", 13)) {
483 printk(KERN_INFO
484 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
485 intel_iommu_tboot_noforce = 1;
e5e04d05
LB
486 } else if (!strncmp(str, "nobounce", 8)) {
487 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
488 intel_no_bounce = 1;
ba395927
KA
489 }
490
491 str += strcspn(str, ",");
492 while (*str == ',')
493 str++;
494 }
495 return 0;
496}
497__setup("intel_iommu=", intel_iommu_setup);
498
499static struct kmem_cache *iommu_domain_cache;
500static struct kmem_cache *iommu_devinfo_cache;
ba395927 501
9452d5bf
JR
502static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
503{
8bf47816
JR
504 struct dmar_domain **domains;
505 int idx = did >> 8;
506
507 domains = iommu->domains[idx];
508 if (!domains)
509 return NULL;
510
511 return domains[did & 0xff];
9452d5bf
JR
512}
513
514static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
515 struct dmar_domain *domain)
516{
8bf47816
JR
517 struct dmar_domain **domains;
518 int idx = did >> 8;
519
520 if (!iommu->domains[idx]) {
521 size_t size = 256 * sizeof(struct dmar_domain *);
522 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
523 }
524
525 domains = iommu->domains[idx];
526 if (WARN_ON(!domains))
527 return;
528 else
529 domains[did & 0xff] = domain;
9452d5bf
JR
530}
531
9ddbfb42 532void *alloc_pgtable_page(int node)
eb3fa7cb 533{
4c923d47
SS
534 struct page *page;
535 void *vaddr = NULL;
eb3fa7cb 536
4c923d47
SS
537 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
538 if (page)
539 vaddr = page_address(page);
eb3fa7cb 540 return vaddr;
ba395927
KA
541}
542
9ddbfb42 543void free_pgtable_page(void *vaddr)
ba395927
KA
544{
545 free_page((unsigned long)vaddr);
546}
547
548static inline void *alloc_domain_mem(void)
549{
354bb65e 550 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
551}
552
38717946 553static void free_domain_mem(void *vaddr)
ba395927
KA
554{
555 kmem_cache_free(iommu_domain_cache, vaddr);
556}
557
558static inline void * alloc_devinfo_mem(void)
559{
354bb65e 560 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
561}
562
563static inline void free_devinfo_mem(void *vaddr)
564{
565 kmem_cache_free(iommu_devinfo_cache, vaddr);
566}
567
28ccce0d
JR
568static inline int domain_type_is_si(struct dmar_domain *domain)
569{
570 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
571}
572
ddf09b6d
LB
573static inline bool domain_use_first_level(struct dmar_domain *domain)
574{
575 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
576}
577
162d1b10
JL
578static inline int domain_pfn_supported(struct dmar_domain *domain,
579 unsigned long pfn)
580{
581 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
582
583 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
584}
585
4ed0d3e6 586static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
587{
588 unsigned long sagaw;
589 int agaw = -1;
590
591 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 592 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
593 agaw >= 0; agaw--) {
594 if (test_bit(agaw, &sagaw))
595 break;
596 }
597
598 return agaw;
599}
600
4ed0d3e6
FY
601/*
602 * Calculate max SAGAW for each iommu.
603 */
604int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
605{
606 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
607}
608
609/*
610 * calculate agaw for each iommu.
611 * "SAGAW" may be different across iommus, use a default agaw, and
612 * get a supported less agaw for iommus that don't support the default agaw.
613 */
614int iommu_calculate_agaw(struct intel_iommu *iommu)
615{
616 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
617}
618
2c2e2c38 619/* This functionin only returns single iommu in a domain */
9ddbfb42 620struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
621{
622 int iommu_id;
623
2c2e2c38 624 /* si_domain and vm domain should not get here. */
fa954e68
LB
625 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
626 return NULL;
627
29a27719
JR
628 for_each_domain_iommu(iommu_id, domain)
629 break;
630
8c11e798
WH
631 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
632 return NULL;
633
634 return g_iommus[iommu_id];
635}
636
8e604097
WH
637static void domain_update_iommu_coherency(struct dmar_domain *domain)
638{
d0501960
DW
639 struct dmar_drhd_unit *drhd;
640 struct intel_iommu *iommu;
2f119c78
QL
641 bool found = false;
642 int i;
2e12bc29 643
d0501960 644 domain->iommu_coherency = 1;
8e604097 645
29a27719 646 for_each_domain_iommu(i, domain) {
2f119c78 647 found = true;
8e604097
WH
648 if (!ecap_coherent(g_iommus[i]->ecap)) {
649 domain->iommu_coherency = 0;
650 break;
651 }
8e604097 652 }
d0501960
DW
653 if (found)
654 return;
655
656 /* No hardware attached; use lowest common denominator */
657 rcu_read_lock();
658 for_each_active_iommu(iommu, drhd) {
659 if (!ecap_coherent(iommu->ecap)) {
660 domain->iommu_coherency = 0;
661 break;
662 }
663 }
664 rcu_read_unlock();
8e604097
WH
665}
666
161f6934 667static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 668{
161f6934
JL
669 struct dmar_drhd_unit *drhd;
670 struct intel_iommu *iommu;
671 int ret = 1;
58c610bd 672
161f6934
JL
673 rcu_read_lock();
674 for_each_active_iommu(iommu, drhd) {
675 if (iommu != skip) {
676 if (!ecap_sc_support(iommu->ecap)) {
677 ret = 0;
678 break;
679 }
58c610bd 680 }
58c610bd 681 }
161f6934
JL
682 rcu_read_unlock();
683
684 return ret;
58c610bd
SY
685}
686
64229e8f
LB
687static int domain_update_iommu_superpage(struct dmar_domain *domain,
688 struct intel_iommu *skip)
6dd9a7c7 689{
8140a95d 690 struct dmar_drhd_unit *drhd;
161f6934 691 struct intel_iommu *iommu;
64229e8f 692 int mask = 0x3;
6dd9a7c7
YS
693
694 if (!intel_iommu_superpage) {
161f6934 695 return 0;
6dd9a7c7
YS
696 }
697
8140a95d 698 /* set iommu_superpage to the smallest common denominator */
0e242612 699 rcu_read_lock();
8140a95d 700 for_each_active_iommu(iommu, drhd) {
161f6934 701 if (iommu != skip) {
64229e8f
LB
702 if (domain && domain_use_first_level(domain)) {
703 if (!cap_fl1gp_support(iommu->cap))
704 mask = 0x1;
705 } else {
706 mask &= cap_super_page_val(iommu->cap);
707 }
708
161f6934
JL
709 if (!mask)
710 break;
6dd9a7c7
YS
711 }
712 }
0e242612
JL
713 rcu_read_unlock();
714
161f6934 715 return fls(mask);
6dd9a7c7
YS
716}
717
58c610bd
SY
718/* Some capabilities may be different across iommus */
719static void domain_update_iommu_cap(struct dmar_domain *domain)
720{
721 domain_update_iommu_coherency(domain);
161f6934 722 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
64229e8f 723 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
58c610bd
SY
724}
725
26b86092
SM
726struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
727 u8 devfn, int alloc)
03ecc32c
DW
728{
729 struct root_entry *root = &iommu->root_entry[bus];
730 struct context_entry *context;
731 u64 *entry;
732
4df4eab1 733 entry = &root->lo;
765b6a98 734 if (sm_supported(iommu)) {
03ecc32c
DW
735 if (devfn >= 0x80) {
736 devfn -= 0x80;
737 entry = &root->hi;
738 }
739 devfn *= 2;
740 }
03ecc32c
DW
741 if (*entry & 1)
742 context = phys_to_virt(*entry & VTD_PAGE_MASK);
743 else {
744 unsigned long phy_addr;
745 if (!alloc)
746 return NULL;
747
748 context = alloc_pgtable_page(iommu->node);
749 if (!context)
750 return NULL;
751
752 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
753 phy_addr = virt_to_phys((void *)context);
754 *entry = phy_addr | 1;
755 __iommu_flush_cache(iommu, entry, sizeof(*entry));
756 }
757 return &context[devfn];
758}
759
4ed6a540
DW
760static int iommu_dummy(struct device *dev)
761{
762 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
763}
764
1d461597
JR
765static bool attach_deferred(struct device *dev)
766{
767 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
768}
769
b9a7f981
EA
770/**
771 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
772 * sub-hierarchy of a candidate PCI-PCI bridge
773 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
774 * @bridge: the candidate PCI-PCI bridge
775 *
776 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
777 */
778static bool
779is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
780{
781 struct pci_dev *pdev, *pbridge;
782
783 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
784 return false;
785
786 pdev = to_pci_dev(dev);
787 pbridge = to_pci_dev(bridge);
788
789 if (pbridge->subordinate &&
790 pbridge->subordinate->number <= pdev->bus->number &&
791 pbridge->subordinate->busn_res.end >= pdev->bus->number)
792 return true;
793
794 return false;
795}
796
156baca8 797static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
798{
799 struct dmar_drhd_unit *drhd = NULL;
b683b230 800 struct intel_iommu *iommu;
156baca8 801 struct device *tmp;
b9a7f981 802 struct pci_dev *pdev = NULL;
aa4d066a 803 u16 segment = 0;
c7151a8d
WH
804 int i;
805
4ed6a540
DW
806 if (iommu_dummy(dev))
807 return NULL;
808
156baca8 809 if (dev_is_pci(dev)) {
1c387188
AR
810 struct pci_dev *pf_pdev;
811
e3560ee4 812 pdev = pci_real_dma_dev(to_pci_dev(dev));
5823e330 813
1c387188
AR
814 /* VFs aren't listed in scope tables; we need to look up
815 * the PF instead to find the IOMMU. */
816 pf_pdev = pci_physfn(pdev);
817 dev = &pf_pdev->dev;
156baca8 818 segment = pci_domain_nr(pdev->bus);
ca5b74d2 819 } else if (has_acpi_companion(dev))
156baca8
DW
820 dev = &ACPI_COMPANION(dev)->dev;
821
0e242612 822 rcu_read_lock();
b683b230 823 for_each_active_iommu(iommu, drhd) {
156baca8 824 if (pdev && segment != drhd->segment)
276dbf99 825 continue;
c7151a8d 826
b683b230 827 for_each_active_dev_scope(drhd->devices,
156baca8
DW
828 drhd->devices_cnt, i, tmp) {
829 if (tmp == dev) {
1c387188
AR
830 /* For a VF use its original BDF# not that of the PF
831 * which we used for the IOMMU lookup. Strictly speaking
832 * we could do this for all PCI devices; we only need to
833 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 834 if (pdev && pdev->is_virtfn)
1c387188
AR
835 goto got_pdev;
836
156baca8
DW
837 *bus = drhd->devices[i].bus;
838 *devfn = drhd->devices[i].devfn;
b683b230 839 goto out;
156baca8
DW
840 }
841
b9a7f981 842 if (is_downstream_to_pci_bridge(dev, tmp))
156baca8 843 goto got_pdev;
924b6231 844 }
c7151a8d 845
156baca8
DW
846 if (pdev && drhd->include_all) {
847 got_pdev:
848 *bus = pdev->bus->number;
849 *devfn = pdev->devfn;
b683b230 850 goto out;
156baca8 851 }
c7151a8d 852 }
b683b230 853 iommu = NULL;
156baca8 854 out:
0e242612 855 rcu_read_unlock();
c7151a8d 856
b683b230 857 return iommu;
c7151a8d
WH
858}
859
5331fe6f
WH
860static void domain_flush_cache(struct dmar_domain *domain,
861 void *addr, int size)
862{
863 if (!domain->iommu_coherency)
864 clflush_cache_range(addr, size);
865}
866
ba395927
KA
867static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
868{
ba395927 869 struct context_entry *context;
03ecc32c 870 int ret = 0;
ba395927
KA
871 unsigned long flags;
872
873 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
874 context = iommu_context_addr(iommu, bus, devfn, 0);
875 if (context)
876 ret = context_present(context);
ba395927
KA
877 spin_unlock_irqrestore(&iommu->lock, flags);
878 return ret;
879}
880
ba395927
KA
881static void free_context_table(struct intel_iommu *iommu)
882{
ba395927
KA
883 int i;
884 unsigned long flags;
885 struct context_entry *context;
886
887 spin_lock_irqsave(&iommu->lock, flags);
888 if (!iommu->root_entry) {
889 goto out;
890 }
891 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 892 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
893 if (context)
894 free_pgtable_page(context);
03ecc32c 895
765b6a98 896 if (!sm_supported(iommu))
03ecc32c
DW
897 continue;
898
899 context = iommu_context_addr(iommu, i, 0x80, 0);
900 if (context)
901 free_pgtable_page(context);
902
ba395927
KA
903 }
904 free_pgtable_page(iommu->root_entry);
905 iommu->root_entry = NULL;
906out:
907 spin_unlock_irqrestore(&iommu->lock, flags);
908}
909
b026fd28 910static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 911 unsigned long pfn, int *target_level)
ba395927 912{
e083ea5b 913 struct dma_pte *parent, *pte;
ba395927 914 int level = agaw_to_level(domain->agaw);
4399c8bf 915 int offset;
ba395927
KA
916
917 BUG_ON(!domain->pgd);
f9423606 918
162d1b10 919 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
920 /* Address beyond IOMMU's addressing capabilities. */
921 return NULL;
922
ba395927
KA
923 parent = domain->pgd;
924
5cf0a76f 925 while (1) {
ba395927
KA
926 void *tmp_page;
927
b026fd28 928 offset = pfn_level_offset(pfn, level);
ba395927 929 pte = &parent[offset];
5cf0a76f 930 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 931 break;
5cf0a76f 932 if (level == *target_level)
ba395927
KA
933 break;
934
19c239ce 935 if (!dma_pte_present(pte)) {
c85994e4
DW
936 uint64_t pteval;
937
4c923d47 938 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 939
206a73c1 940 if (!tmp_page)
ba395927 941 return NULL;
206a73c1 942
c85994e4 943 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 944 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
ddf09b6d
LB
945 if (domain_use_first_level(domain))
946 pteval |= DMA_FL_PTE_XD;
effad4b5 947 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
948 /* Someone else set it while we were thinking; use theirs. */
949 free_pgtable_page(tmp_page);
effad4b5 950 else
c85994e4 951 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 952 }
5cf0a76f
DW
953 if (level == 1)
954 break;
955
19c239ce 956 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
957 level--;
958 }
959
5cf0a76f
DW
960 if (!*target_level)
961 *target_level = level;
962
ba395927
KA
963 return pte;
964}
965
966/* return address's pte at specific level */
90dcfb5e
DW
967static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
968 unsigned long pfn,
6dd9a7c7 969 int level, int *large_page)
ba395927 970{
e083ea5b 971 struct dma_pte *parent, *pte;
ba395927
KA
972 int total = agaw_to_level(domain->agaw);
973 int offset;
974
975 parent = domain->pgd;
976 while (level <= total) {
90dcfb5e 977 offset = pfn_level_offset(pfn, total);
ba395927
KA
978 pte = &parent[offset];
979 if (level == total)
980 return pte;
981
6dd9a7c7
YS
982 if (!dma_pte_present(pte)) {
983 *large_page = total;
ba395927 984 break;
6dd9a7c7
YS
985 }
986
e16922af 987 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
988 *large_page = total;
989 return pte;
990 }
991
19c239ce 992 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
993 total--;
994 }
995 return NULL;
996}
997
ba395927 998/* clear last level pte, a tlb flush should be followed */
5cf0a76f 999static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1000 unsigned long start_pfn,
1001 unsigned long last_pfn)
ba395927 1002{
e083ea5b 1003 unsigned int large_page;
310a5ab9 1004 struct dma_pte *first_pte, *pte;
66eae846 1005
162d1b10
JL
1006 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1007 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1008 BUG_ON(start_pfn > last_pfn);
ba395927 1009
04b18e65 1010 /* we don't need lock here; nobody else touches the iova range */
59c36286 1011 do {
6dd9a7c7
YS
1012 large_page = 1;
1013 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1014 if (!pte) {
6dd9a7c7 1015 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1016 continue;
1017 }
6dd9a7c7 1018 do {
310a5ab9 1019 dma_clear_pte(pte);
6dd9a7c7 1020 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1021 pte++;
75e6bf96
DW
1022 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1023
310a5ab9
DW
1024 domain_flush_cache(domain, first_pte,
1025 (void *)pte - (void *)first_pte);
59c36286
DW
1026
1027 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1028}
1029
3269ee0b 1030static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
1031 int retain_level, struct dma_pte *pte,
1032 unsigned long pfn, unsigned long start_pfn,
1033 unsigned long last_pfn)
3269ee0b
AW
1034{
1035 pfn = max(start_pfn, pfn);
1036 pte = &pte[pfn_level_offset(pfn, level)];
1037
1038 do {
1039 unsigned long level_pfn;
1040 struct dma_pte *level_pte;
1041
1042 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1043 goto next;
1044
f7116e11 1045 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1046 level_pte = phys_to_virt(dma_pte_addr(pte));
1047
bc24c571
DD
1048 if (level > 2) {
1049 dma_pte_free_level(domain, level - 1, retain_level,
1050 level_pte, level_pfn, start_pfn,
1051 last_pfn);
1052 }
3269ee0b 1053
bc24c571
DD
1054 /*
1055 * Free the page table if we're below the level we want to
1056 * retain and the range covers the entire table.
1057 */
1058 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1059 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1060 dma_clear_pte(pte);
1061 domain_flush_cache(domain, pte, sizeof(*pte));
1062 free_pgtable_page(level_pte);
1063 }
1064next:
1065 pfn += level_size(level);
1066 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1067}
1068
bc24c571
DD
1069/*
1070 * clear last level (leaf) ptes and free page table pages below the
1071 * level we wish to keep intact.
1072 */
ba395927 1073static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1074 unsigned long start_pfn,
bc24c571
DD
1075 unsigned long last_pfn,
1076 int retain_level)
ba395927 1077{
162d1b10
JL
1078 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1079 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1080 BUG_ON(start_pfn > last_pfn);
ba395927 1081
d41a4adb
JL
1082 dma_pte_clear_range(domain, start_pfn, last_pfn);
1083
f3a0a52f 1084 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1085 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1086 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1087
ba395927 1088 /* free pgd */
d794dc9b 1089 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1090 free_pgtable_page(domain->pgd);
1091 domain->pgd = NULL;
1092 }
1093}
1094
ea8ea460
DW
1095/* When a page at a given level is being unlinked from its parent, we don't
1096 need to *modify* it at all. All we need to do is make a list of all the
1097 pages which can be freed just as soon as we've flushed the IOTLB and we
1098 know the hardware page-walk will no longer touch them.
1099 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1100 be freed. */
1101static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1102 int level, struct dma_pte *pte,
1103 struct page *freelist)
1104{
1105 struct page *pg;
1106
1107 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1108 pg->freelist = freelist;
1109 freelist = pg;
1110
1111 if (level == 1)
1112 return freelist;
1113
adeb2590
JL
1114 pte = page_address(pg);
1115 do {
ea8ea460
DW
1116 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1117 freelist = dma_pte_list_pagetables(domain, level - 1,
1118 pte, freelist);
adeb2590
JL
1119 pte++;
1120 } while (!first_pte_in_page(pte));
ea8ea460
DW
1121
1122 return freelist;
1123}
1124
1125static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1126 struct dma_pte *pte, unsigned long pfn,
1127 unsigned long start_pfn,
1128 unsigned long last_pfn,
1129 struct page *freelist)
1130{
1131 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1132
1133 pfn = max(start_pfn, pfn);
1134 pte = &pte[pfn_level_offset(pfn, level)];
1135
1136 do {
1137 unsigned long level_pfn;
1138
1139 if (!dma_pte_present(pte))
1140 goto next;
1141
1142 level_pfn = pfn & level_mask(level);
1143
1144 /* If range covers entire pagetable, free it */
1145 if (start_pfn <= level_pfn &&
1146 last_pfn >= level_pfn + level_size(level) - 1) {
1147 /* These suborbinate page tables are going away entirely. Don't
1148 bother to clear them; we're just going to *free* them. */
1149 if (level > 1 && !dma_pte_superpage(pte))
1150 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1151
1152 dma_clear_pte(pte);
1153 if (!first_pte)
1154 first_pte = pte;
1155 last_pte = pte;
1156 } else if (level > 1) {
1157 /* Recurse down into a level that isn't *entirely* obsolete */
1158 freelist = dma_pte_clear_level(domain, level - 1,
1159 phys_to_virt(dma_pte_addr(pte)),
1160 level_pfn, start_pfn, last_pfn,
1161 freelist);
1162 }
1163next:
1164 pfn += level_size(level);
1165 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1166
1167 if (first_pte)
1168 domain_flush_cache(domain, first_pte,
1169 (void *)++last_pte - (void *)first_pte);
1170
1171 return freelist;
1172}
1173
1174/* We can't just free the pages because the IOMMU may still be walking
1175 the page tables, and may have cached the intermediate levels. The
1176 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1177static struct page *domain_unmap(struct dmar_domain *domain,
1178 unsigned long start_pfn,
1179 unsigned long last_pfn)
ea8ea460 1180{
e083ea5b 1181 struct page *freelist;
ea8ea460 1182
162d1b10
JL
1183 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1184 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1185 BUG_ON(start_pfn > last_pfn);
1186
1187 /* we don't need lock here; nobody else touches the iova range */
1188 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1189 domain->pgd, 0, start_pfn, last_pfn, NULL);
1190
1191 /* free pgd */
1192 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1193 struct page *pgd_page = virt_to_page(domain->pgd);
1194 pgd_page->freelist = freelist;
1195 freelist = pgd_page;
1196
1197 domain->pgd = NULL;
1198 }
1199
1200 return freelist;
1201}
1202
b690420a 1203static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1204{
1205 struct page *pg;
1206
1207 while ((pg = freelist)) {
1208 freelist = pg->freelist;
1209 free_pgtable_page(page_address(pg));
1210 }
1211}
1212
13cf0174
JR
1213static void iova_entry_free(unsigned long data)
1214{
1215 struct page *freelist = (struct page *)data;
1216
1217 dma_free_pagelist(freelist);
1218}
1219
ba395927
KA
1220/* iommu handling */
1221static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1222{
1223 struct root_entry *root;
1224 unsigned long flags;
1225
4c923d47 1226 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1227 if (!root) {
9f10e5bf 1228 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1229 iommu->name);
ba395927 1230 return -ENOMEM;
ffebeb46 1231 }
ba395927 1232
5b6985ce 1233 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1234
1235 spin_lock_irqsave(&iommu->lock, flags);
1236 iommu->root_entry = root;
1237 spin_unlock_irqrestore(&iommu->lock, flags);
1238
1239 return 0;
1240}
1241
ba395927
KA
1242static void iommu_set_root_entry(struct intel_iommu *iommu)
1243{
03ecc32c 1244 u64 addr;
c416daa9 1245 u32 sts;
ba395927
KA
1246 unsigned long flag;
1247
03ecc32c 1248 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1249 if (sm_supported(iommu))
1250 addr |= DMA_RTADDR_SMT;
ba395927 1251
1f5b3c3f 1252 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1253 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1254
c416daa9 1255 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1256
1257 /* Make sure hardware complete it */
1258 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1259 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1260
1f5b3c3f 1261 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1262}
1263
6f7db75e 1264void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1265{
1266 u32 val;
1267 unsigned long flag;
1268
9af88143 1269 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1270 return;
ba395927 1271
1f5b3c3f 1272 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1273 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1274
1275 /* Make sure hardware complete it */
1276 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1277 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1278
1f5b3c3f 1279 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1280}
1281
1282/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1283static void __iommu_flush_context(struct intel_iommu *iommu,
1284 u16 did, u16 source_id, u8 function_mask,
1285 u64 type)
ba395927
KA
1286{
1287 u64 val = 0;
1288 unsigned long flag;
1289
ba395927
KA
1290 switch (type) {
1291 case DMA_CCMD_GLOBAL_INVL:
1292 val = DMA_CCMD_GLOBAL_INVL;
1293 break;
1294 case DMA_CCMD_DOMAIN_INVL:
1295 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1296 break;
1297 case DMA_CCMD_DEVICE_INVL:
1298 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1299 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1300 break;
1301 default:
1302 BUG();
1303 }
1304 val |= DMA_CCMD_ICC;
1305
1f5b3c3f 1306 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1307 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1308
1309 /* Make sure hardware complete it */
1310 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1311 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1312
1f5b3c3f 1313 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1314}
1315
ba395927 1316/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1317static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1318 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1319{
1320 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1321 u64 val = 0, val_iva = 0;
1322 unsigned long flag;
1323
ba395927
KA
1324 switch (type) {
1325 case DMA_TLB_GLOBAL_FLUSH:
1326 /* global flush doesn't need set IVA_REG */
1327 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1328 break;
1329 case DMA_TLB_DSI_FLUSH:
1330 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1331 break;
1332 case DMA_TLB_PSI_FLUSH:
1333 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1334 /* IH bit is passed in as part of address */
ba395927
KA
1335 val_iva = size_order | addr;
1336 break;
1337 default:
1338 BUG();
1339 }
1340 /* Note: set drain read/write */
1341#if 0
1342 /*
1343 * This is probably to be super secure.. Looks like we can
1344 * ignore it without any impact.
1345 */
1346 if (cap_read_drain(iommu->cap))
1347 val |= DMA_TLB_READ_DRAIN;
1348#endif
1349 if (cap_write_drain(iommu->cap))
1350 val |= DMA_TLB_WRITE_DRAIN;
1351
1f5b3c3f 1352 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1353 /* Note: Only uses first TLB reg currently */
1354 if (val_iva)
1355 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1356 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1357
1358 /* Make sure hardware complete it */
1359 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1360 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1361
1f5b3c3f 1362 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1363
1364 /* check IOTLB invalidation granularity */
1365 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1366 pr_err("Flush IOTLB failed\n");
ba395927 1367 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1368 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1369 (unsigned long long)DMA_TLB_IIRG(type),
1370 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1371}
1372
64ae892b
DW
1373static struct device_domain_info *
1374iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1375 u8 bus, u8 devfn)
93a23a72 1376{
93a23a72 1377 struct device_domain_info *info;
93a23a72 1378
55d94043
JR
1379 assert_spin_locked(&device_domain_lock);
1380
93a23a72
YZ
1381 if (!iommu->qi)
1382 return NULL;
1383
93a23a72 1384 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1385 if (info->iommu == iommu && info->bus == bus &&
1386 info->devfn == devfn) {
b16d0cb9
DW
1387 if (info->ats_supported && info->dev)
1388 return info;
93a23a72
YZ
1389 break;
1390 }
93a23a72 1391
b16d0cb9 1392 return NULL;
93a23a72
YZ
1393}
1394
0824c592
OP
1395static void domain_update_iotlb(struct dmar_domain *domain)
1396{
1397 struct device_domain_info *info;
1398 bool has_iotlb_device = false;
1399
1400 assert_spin_locked(&device_domain_lock);
1401
1402 list_for_each_entry(info, &domain->devices, link) {
1403 struct pci_dev *pdev;
1404
1405 if (!info->dev || !dev_is_pci(info->dev))
1406 continue;
1407
1408 pdev = to_pci_dev(info->dev);
1409 if (pdev->ats_enabled) {
1410 has_iotlb_device = true;
1411 break;
1412 }
1413 }
1414
1415 domain->has_iotlb_device = has_iotlb_device;
1416}
1417
93a23a72 1418static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1419{
fb0cc3aa
BH
1420 struct pci_dev *pdev;
1421
0824c592
OP
1422 assert_spin_locked(&device_domain_lock);
1423
0bcb3e28 1424 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1425 return;
1426
fb0cc3aa 1427 pdev = to_pci_dev(info->dev);
1c48db44
JP
1428 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1429 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1430 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1431 * reserved, which should be set to 0.
1432 */
1433 if (!ecap_dit(info->iommu->ecap))
1434 info->pfsid = 0;
1435 else {
1436 struct pci_dev *pf_pdev;
1437
1438 /* pdev will be returned if device is not a vf */
1439 pf_pdev = pci_physfn(pdev);
cc49baa9 1440 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1441 }
fb0cc3aa 1442
b16d0cb9
DW
1443#ifdef CONFIG_INTEL_IOMMU_SVM
1444 /* The PCIe spec, in its wisdom, declares that the behaviour of
1445 the device if you enable PASID support after ATS support is
1446 undefined. So always enable PASID support on devices which
1447 have it, even if we can't yet know if we're ever going to
1448 use it. */
1449 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1450 info->pasid_enabled = 1;
1451
1b84778a
KS
1452 if (info->pri_supported &&
1453 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1454 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1455 info->pri_enabled = 1;
1456#endif
fb58fdcd 1457 if (!pdev->untrusted && info->ats_supported &&
61363c14 1458 pci_ats_page_aligned(pdev) &&
fb58fdcd 1459 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1460 info->ats_enabled = 1;
0824c592 1461 domain_update_iotlb(info->domain);
b16d0cb9
DW
1462 info->ats_qdep = pci_ats_queue_depth(pdev);
1463 }
93a23a72
YZ
1464}
1465
1466static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1467{
b16d0cb9
DW
1468 struct pci_dev *pdev;
1469
0824c592
OP
1470 assert_spin_locked(&device_domain_lock);
1471
da972fb1 1472 if (!dev_is_pci(info->dev))
93a23a72
YZ
1473 return;
1474
b16d0cb9
DW
1475 pdev = to_pci_dev(info->dev);
1476
1477 if (info->ats_enabled) {
1478 pci_disable_ats(pdev);
1479 info->ats_enabled = 0;
0824c592 1480 domain_update_iotlb(info->domain);
b16d0cb9
DW
1481 }
1482#ifdef CONFIG_INTEL_IOMMU_SVM
1483 if (info->pri_enabled) {
1484 pci_disable_pri(pdev);
1485 info->pri_enabled = 0;
1486 }
1487 if (info->pasid_enabled) {
1488 pci_disable_pasid(pdev);
1489 info->pasid_enabled = 0;
1490 }
1491#endif
93a23a72
YZ
1492}
1493
1494static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1495 u64 addr, unsigned mask)
1496{
1497 u16 sid, qdep;
1498 unsigned long flags;
1499 struct device_domain_info *info;
1500
0824c592
OP
1501 if (!domain->has_iotlb_device)
1502 return;
1503
93a23a72
YZ
1504 spin_lock_irqsave(&device_domain_lock, flags);
1505 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1506 if (!info->ats_enabled)
93a23a72
YZ
1507 continue;
1508
1509 sid = info->bus << 8 | info->devfn;
b16d0cb9 1510 qdep = info->ats_qdep;
1c48db44
JP
1511 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1512 qdep, addr, mask);
93a23a72
YZ
1513 }
1514 spin_unlock_irqrestore(&device_domain_lock, flags);
1515}
1516
33cd6e64
LB
1517static void domain_flush_piotlb(struct intel_iommu *iommu,
1518 struct dmar_domain *domain,
1519 u64 addr, unsigned long npages, bool ih)
1520{
1521 u16 did = domain->iommu_did[iommu->seq_id];
1522
1523 if (domain->default_pasid)
1524 qi_flush_piotlb(iommu, did, domain->default_pasid,
1525 addr, npages, ih);
1526
1527 if (!list_empty(&domain->devices))
1528 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1529}
1530
a1ddcbe9
JR
1531static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1532 struct dmar_domain *domain,
1533 unsigned long pfn, unsigned int pages,
1534 int ih, int map)
ba395927 1535{
9dd2fe89 1536 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1537 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1538 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1539
ba395927
KA
1540 BUG_ON(pages == 0);
1541
ea8ea460
DW
1542 if (ih)
1543 ih = 1 << 6;
33cd6e64
LB
1544
1545 if (domain_use_first_level(domain)) {
1546 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1547 } else {
1548 /*
1549 * Fallback to domain selective flush if no PSI support or
1550 * the size is too big. PSI requires page size to be 2 ^ x,
1551 * and the base address is naturally aligned to the size.
1552 */
1553 if (!cap_pgsel_inv(iommu->cap) ||
1554 mask > cap_max_amask_val(iommu->cap))
1555 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1556 DMA_TLB_DSI_FLUSH);
1557 else
1558 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1559 DMA_TLB_PSI_FLUSH);
1560 }
bf92df30
YZ
1561
1562 /*
82653633
NA
1563 * In caching mode, changes of pages from non-present to present require
1564 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1565 */
82653633 1566 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1567 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1568}
1569
eed91a0b
PX
1570/* Notification for newly created mappings */
1571static inline void __mapping_notify_one(struct intel_iommu *iommu,
1572 struct dmar_domain *domain,
1573 unsigned long pfn, unsigned int pages)
1574{
33cd6e64
LB
1575 /*
1576 * It's a non-present to present mapping. Only flush if caching mode
1577 * and second level.
1578 */
1579 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
eed91a0b
PX
1580 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1581 else
1582 iommu_flush_write_buffer(iommu);
1583}
1584
13cf0174
JR
1585static void iommu_flush_iova(struct iova_domain *iovad)
1586{
1587 struct dmar_domain *domain;
1588 int idx;
1589
1590 domain = container_of(iovad, struct dmar_domain, iovad);
1591
1592 for_each_domain_iommu(idx, domain) {
1593 struct intel_iommu *iommu = g_iommus[idx];
1594 u16 did = domain->iommu_did[iommu->seq_id];
1595
33cd6e64
LB
1596 if (domain_use_first_level(domain))
1597 domain_flush_piotlb(iommu, domain, 0, -1, 0);
1598 else
1599 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1600 DMA_TLB_DSI_FLUSH);
13cf0174
JR
1601
1602 if (!cap_caching_mode(iommu->cap))
1603 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1604 0, MAX_AGAW_PFN_WIDTH);
1605 }
1606}
1607
f8bab735 1608static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1609{
1610 u32 pmen;
1611 unsigned long flags;
1612
5bb71fc7
LB
1613 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1614 return;
1615
1f5b3c3f 1616 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1617 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1618 pmen &= ~DMA_PMEN_EPM;
1619 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1620
1621 /* wait for the protected region status bit to clear */
1622 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1623 readl, !(pmen & DMA_PMEN_PRS), pmen);
1624
1f5b3c3f 1625 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1626}
1627
2a41ccee 1628static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1629{
1630 u32 sts;
1631 unsigned long flags;
1632
1f5b3c3f 1633 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1634 iommu->gcmd |= DMA_GCMD_TE;
1635 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1636
1637 /* Make sure hardware complete it */
1638 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1639 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1640
1f5b3c3f 1641 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1642}
1643
2a41ccee 1644static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1645{
1646 u32 sts;
1647 unsigned long flag;
1648
1f5b3c3f 1649 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1650 iommu->gcmd &= ~DMA_GCMD_TE;
1651 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1652
1653 /* Make sure hardware complete it */
1654 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1655 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1656
1f5b3c3f 1657 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1658}
1659
1660static int iommu_init_domains(struct intel_iommu *iommu)
1661{
8bf47816
JR
1662 u32 ndomains, nlongs;
1663 size_t size;
ba395927
KA
1664
1665 ndomains = cap_ndoms(iommu->cap);
8bf47816 1666 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1667 iommu->name, ndomains);
ba395927
KA
1668 nlongs = BITS_TO_LONGS(ndomains);
1669
94a91b50
DD
1670 spin_lock_init(&iommu->lock);
1671
ba395927
KA
1672 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1673 if (!iommu->domain_ids) {
9f10e5bf
JR
1674 pr_err("%s: Allocating domain id array failed\n",
1675 iommu->name);
ba395927
KA
1676 return -ENOMEM;
1677 }
8bf47816 1678
86f004c7 1679 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1680 iommu->domains = kzalloc(size, GFP_KERNEL);
1681
1682 if (iommu->domains) {
1683 size = 256 * sizeof(struct dmar_domain *);
1684 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1685 }
1686
1687 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1688 pr_err("%s: Allocating domain array failed\n",
1689 iommu->name);
852bdb04 1690 kfree(iommu->domain_ids);
8bf47816 1691 kfree(iommu->domains);
852bdb04 1692 iommu->domain_ids = NULL;
8bf47816 1693 iommu->domains = NULL;
ba395927
KA
1694 return -ENOMEM;
1695 }
1696
1697 /*
c0e8a6c8
JR
1698 * If Caching mode is set, then invalid translations are tagged
1699 * with domain-id 0, hence we need to pre-allocate it. We also
1700 * use domain-id 0 as a marker for non-allocated domain-id, so
1701 * make sure it is not used for a real domain.
ba395927 1702 */
c0e8a6c8
JR
1703 set_bit(0, iommu->domain_ids);
1704
3b33d4ab
LB
1705 /*
1706 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1707 * entry for first-level or pass-through translation modes should
1708 * be programmed with a domain id different from those used for
1709 * second-level or nested translation. We reserve a domain id for
1710 * this purpose.
1711 */
1712 if (sm_supported(iommu))
1713 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1714
ba395927
KA
1715 return 0;
1716}
ba395927 1717
ffebeb46 1718static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1719{
29a27719 1720 struct device_domain_info *info, *tmp;
55d94043 1721 unsigned long flags;
ba395927 1722
29a27719
JR
1723 if (!iommu->domains || !iommu->domain_ids)
1724 return;
a4eaa86c 1725
55d94043 1726 spin_lock_irqsave(&device_domain_lock, flags);
29a27719 1727 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
29a27719
JR
1728 if (info->iommu != iommu)
1729 continue;
1730
1731 if (!info->dev || !info->domain)
1732 continue;
1733
bea64033 1734 __dmar_remove_one_dev_info(info);
ba395927 1735 }
55d94043 1736 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1737
1738 if (iommu->gcmd & DMA_GCMD_TE)
1739 iommu_disable_translation(iommu);
ffebeb46 1740}
ba395927 1741
ffebeb46
JL
1742static void free_dmar_iommu(struct intel_iommu *iommu)
1743{
1744 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1745 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1746 int i;
1747
1748 for (i = 0; i < elems; i++)
1749 kfree(iommu->domains[i]);
ffebeb46
JL
1750 kfree(iommu->domains);
1751 kfree(iommu->domain_ids);
1752 iommu->domains = NULL;
1753 iommu->domain_ids = NULL;
1754 }
ba395927 1755
d9630fe9
WH
1756 g_iommus[iommu->seq_id] = NULL;
1757
ba395927
KA
1758 /* free context mapping */
1759 free_context_table(iommu);
8a94ade4
DW
1760
1761#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1762 if (pasid_supported(iommu)) {
a222a7f0
DW
1763 if (ecap_prs(iommu->ecap))
1764 intel_svm_finish_prq(iommu);
a222a7f0 1765 }
8a94ade4 1766#endif
ba395927
KA
1767}
1768
a1948f2e
LB
1769/*
1770 * Check and return whether first level is used by default for
b802d070 1771 * DMA translation.
a1948f2e
LB
1772 */
1773static bool first_level_by_default(void)
1774{
1775 struct dmar_drhd_unit *drhd;
1776 struct intel_iommu *iommu;
b802d070 1777 static int first_level_support = -1;
a1948f2e
LB
1778
1779 if (likely(first_level_support != -1))
1780 return first_level_support;
1781
1782 first_level_support = 1;
1783
1784 rcu_read_lock();
1785 for_each_active_iommu(iommu, drhd) {
1786 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
1787 first_level_support = 0;
1788 break;
1789 }
1790 }
1791 rcu_read_unlock();
1792
1793 return first_level_support;
1794}
1795
ab8dfe25 1796static struct dmar_domain *alloc_domain(int flags)
ba395927 1797{
ba395927 1798 struct dmar_domain *domain;
ba395927
KA
1799
1800 domain = alloc_domain_mem();
1801 if (!domain)
1802 return NULL;
1803
ab8dfe25 1804 memset(domain, 0, sizeof(*domain));
98fa15f3 1805 domain->nid = NUMA_NO_NODE;
ab8dfe25 1806 domain->flags = flags;
a1948f2e
LB
1807 if (first_level_by_default())
1808 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
0824c592 1809 domain->has_iotlb_device = false;
92d03cc8 1810 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1811
1812 return domain;
1813}
1814
d160aca5
JR
1815/* Must be called with iommu->lock */
1816static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1817 struct intel_iommu *iommu)
1818{
44bde614 1819 unsigned long ndomains;
55d94043 1820 int num;
44bde614 1821
55d94043 1822 assert_spin_locked(&device_domain_lock);
d160aca5 1823 assert_spin_locked(&iommu->lock);
ba395927 1824
29a27719
JR
1825 domain->iommu_refcnt[iommu->seq_id] += 1;
1826 domain->iommu_count += 1;
1827 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1828 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1829 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1830
1831 if (num >= ndomains) {
1832 pr_err("%s: No free domain ids\n", iommu->name);
1833 domain->iommu_refcnt[iommu->seq_id] -= 1;
1834 domain->iommu_count -= 1;
55d94043 1835 return -ENOSPC;
2c2e2c38 1836 }
ba395927 1837
d160aca5
JR
1838 set_bit(num, iommu->domain_ids);
1839 set_iommu_domain(iommu, num, domain);
1840
1841 domain->iommu_did[iommu->seq_id] = num;
1842 domain->nid = iommu->node;
fb170fb4 1843
fb170fb4
JL
1844 domain_update_iommu_cap(domain);
1845 }
d160aca5 1846
55d94043 1847 return 0;
fb170fb4
JL
1848}
1849
1850static int domain_detach_iommu(struct dmar_domain *domain,
1851 struct intel_iommu *iommu)
1852{
e083ea5b 1853 int num, count;
d160aca5 1854
55d94043 1855 assert_spin_locked(&device_domain_lock);
d160aca5 1856 assert_spin_locked(&iommu->lock);
fb170fb4 1857
29a27719
JR
1858 domain->iommu_refcnt[iommu->seq_id] -= 1;
1859 count = --domain->iommu_count;
1860 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1861 num = domain->iommu_did[iommu->seq_id];
1862 clear_bit(num, iommu->domain_ids);
1863 set_iommu_domain(iommu, num, NULL);
fb170fb4 1864
fb170fb4 1865 domain_update_iommu_cap(domain);
c0e8a6c8 1866 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1867 }
fb170fb4
JL
1868
1869 return count;
1870}
1871
ba395927 1872static struct iova_domain reserved_iova_list;
8a443df4 1873static struct lock_class_key reserved_rbtree_key;
ba395927 1874
51a63e67 1875static int dmar_init_reserved_ranges(void)
ba395927
KA
1876{
1877 struct pci_dev *pdev = NULL;
1878 struct iova *iova;
1879 int i;
ba395927 1880
aa3ac946 1881 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1882
8a443df4
MG
1883 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1884 &reserved_rbtree_key);
1885
ba395927
KA
1886 /* IOAPIC ranges shouldn't be accessed by DMA */
1887 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1888 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1889 if (!iova) {
9f10e5bf 1890 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1891 return -ENODEV;
1892 }
ba395927
KA
1893
1894 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1895 for_each_pci_dev(pdev) {
1896 struct resource *r;
1897
1898 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1899 r = &pdev->resource[i];
1900 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1901 continue;
1a4a4551
DW
1902 iova = reserve_iova(&reserved_iova_list,
1903 IOVA_PFN(r->start),
1904 IOVA_PFN(r->end));
51a63e67 1905 if (!iova) {
932a6523 1906 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1907 return -ENODEV;
1908 }
ba395927
KA
1909 }
1910 }
51a63e67 1911 return 0;
ba395927
KA
1912}
1913
1914static void domain_reserve_special_ranges(struct dmar_domain *domain)
1915{
1916 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1917}
1918
1919static inline int guestwidth_to_adjustwidth(int gaw)
1920{
1921 int agaw;
1922 int r = (gaw - 12) % 9;
1923
1924 if (r == 0)
1925 agaw = gaw;
1926 else
1927 agaw = gaw + 9 - r;
1928 if (agaw > 64)
1929 agaw = 64;
1930 return agaw;
1931}
1932
301e7ee1
JR
1933static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1934 int guest_width)
1935{
1936 int adjust_width, agaw;
1937 unsigned long sagaw;
8e3391cf 1938 int ret;
301e7ee1
JR
1939
1940 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1941
10f8008f 1942 if (!intel_iommu_strict) {
8e3391cf 1943 ret = init_iova_flush_queue(&domain->iovad,
10f8008f 1944 iommu_flush_iova, iova_entry_free);
8e3391cf
LB
1945 if (ret)
1946 pr_info("iova flush queue initialization failed\n");
10f8008f 1947 }
301e7ee1
JR
1948
1949 domain_reserve_special_ranges(domain);
1950
1951 /* calculate AGAW */
1952 if (guest_width > cap_mgaw(iommu->cap))
1953 guest_width = cap_mgaw(iommu->cap);
1954 domain->gaw = guest_width;
1955 adjust_width = guestwidth_to_adjustwidth(guest_width);
1956 agaw = width_to_agaw(adjust_width);
1957 sagaw = cap_sagaw(iommu->cap);
1958 if (!test_bit(agaw, &sagaw)) {
1959 /* hardware doesn't support it, choose a bigger one */
1960 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1961 agaw = find_next_bit(&sagaw, 5, agaw);
1962 if (agaw >= 5)
1963 return -ENODEV;
1964 }
1965 domain->agaw = agaw;
1966
1967 if (ecap_coherent(iommu->ecap))
1968 domain->iommu_coherency = 1;
1969 else
1970 domain->iommu_coherency = 0;
1971
1972 if (ecap_sc_support(iommu->ecap))
1973 domain->iommu_snooping = 1;
1974 else
1975 domain->iommu_snooping = 0;
1976
1977 if (intel_iommu_superpage)
1978 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1979 else
1980 domain->iommu_superpage = 0;
1981
1982 domain->nid = iommu->node;
1983
1984 /* always allocate the top pgd */
1985 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1986 if (!domain->pgd)
1987 return -ENOMEM;
1988 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1989 return 0;
1990}
1991
ba395927
KA
1992static void domain_exit(struct dmar_domain *domain)
1993{
ba395927 1994
d160aca5 1995 /* Remove associated devices and clear attached or cached domains */
ba395927 1996 domain_remove_dev_info(domain);
92d03cc8 1997
ba395927
KA
1998 /* destroy iovas */
1999 put_iova_domain(&domain->iovad);
ba395927 2000
3ee9eca7
DS
2001 if (domain->pgd) {
2002 struct page *freelist;
ba395927 2003
3ee9eca7
DS
2004 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
2005 dma_free_pagelist(freelist);
2006 }
ea8ea460 2007
ba395927
KA
2008 free_domain_mem(domain);
2009}
2010
7373a8cc
LB
2011/*
2012 * Get the PASID directory size for scalable mode context entry.
2013 * Value of X in the PDTS field of a scalable mode context entry
2014 * indicates PASID directory with 2^(X + 7) entries.
2015 */
2016static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2017{
2018 int pds, max_pde;
2019
2020 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2021 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2022 if (pds < 7)
2023 return 0;
2024
2025 return pds - 7;
2026}
2027
2028/*
2029 * Set the RID_PASID field of a scalable mode context entry. The
2030 * IOMMU hardware will use the PASID value set in this field for
2031 * DMA translations of DMA requests without PASID.
2032 */
2033static inline void
2034context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2035{
2036 context->hi |= pasid & ((1 << 20) - 1);
2037 context->hi |= (1 << 20);
2038}
2039
2040/*
2041 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2042 * entry.
2043 */
2044static inline void context_set_sm_dte(struct context_entry *context)
2045{
2046 context->lo |= (1 << 2);
2047}
2048
2049/*
2050 * Set the PRE(Page Request Enable) field of a scalable mode context
2051 * entry.
2052 */
2053static inline void context_set_sm_pre(struct context_entry *context)
2054{
2055 context->lo |= (1 << 4);
2056}
2057
2058/* Convert value to context PASID directory size field coding. */
2059#define context_pdts(pds) (((pds) & 0x7) << 9)
2060
64ae892b
DW
2061static int domain_context_mapping_one(struct dmar_domain *domain,
2062 struct intel_iommu *iommu,
ca6e322d 2063 struct pasid_table *table,
28ccce0d 2064 u8 bus, u8 devfn)
ba395927 2065{
c6c2cebd 2066 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
2067 int translation = CONTEXT_TT_MULTI_LEVEL;
2068 struct device_domain_info *info = NULL;
ba395927 2069 struct context_entry *context;
ba395927 2070 unsigned long flags;
7373a8cc 2071 int ret;
28ccce0d 2072
c6c2cebd
JR
2073 WARN_ON(did == 0);
2074
28ccce0d
JR
2075 if (hw_pass_through && domain_type_is_si(domain))
2076 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
2077
2078 pr_debug("Set context mapping for %02x:%02x.%d\n",
2079 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2080
ba395927 2081 BUG_ON(!domain->pgd);
5331fe6f 2082
55d94043
JR
2083 spin_lock_irqsave(&device_domain_lock, flags);
2084 spin_lock(&iommu->lock);
2085
2086 ret = -ENOMEM;
03ecc32c 2087 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2088 if (!context)
55d94043 2089 goto out_unlock;
ba395927 2090
55d94043
JR
2091 ret = 0;
2092 if (context_present(context))
2093 goto out_unlock;
cf484d0e 2094
aec0e861
XP
2095 /*
2096 * For kdump cases, old valid entries may be cached due to the
2097 * in-flight DMA and copied pgtable, but there is no unmapping
2098 * behaviour for them, thus we need an explicit cache flush for
2099 * the newly-mapped device. For kdump, at this point, the device
2100 * is supposed to finish reset at its driver probe stage, so no
2101 * in-flight DMA will exist, and we don't need to worry anymore
2102 * hereafter.
2103 */
2104 if (context_copied(context)) {
2105 u16 did_old = context_domain_id(context);
2106
b117e038 2107 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2108 iommu->flush.flush_context(iommu, did_old,
2109 (((u16)bus) << 8) | devfn,
2110 DMA_CCMD_MASK_NOBIT,
2111 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2112 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2113 DMA_TLB_DSI_FLUSH);
2114 }
aec0e861
XP
2115 }
2116
de24e553 2117 context_clear_entry(context);
ea6606b0 2118
7373a8cc
LB
2119 if (sm_supported(iommu)) {
2120 unsigned long pds;
4ed0d3e6 2121
7373a8cc
LB
2122 WARN_ON(!table);
2123
2124 /* Setup the PASID DIR pointer: */
2125 pds = context_get_sm_pds(table);
2126 context->lo = (u64)virt_to_phys(table->table) |
2127 context_pdts(pds);
2128
2129 /* Setup the RID_PASID field: */
2130 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2131
de24e553 2132 /*
7373a8cc
LB
2133 * Setup the Device-TLB enable bit and Page request
2134 * Enable bit:
de24e553 2135 */
7373a8cc
LB
2136 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2137 if (info && info->ats_supported)
2138 context_set_sm_dte(context);
2139 if (info && info->pri_supported)
2140 context_set_sm_pre(context);
2141 } else {
2142 struct dma_pte *pgd = domain->pgd;
2143 int agaw;
2144
2145 context_set_domain_id(context, did);
7373a8cc
LB
2146
2147 if (translation != CONTEXT_TT_PASS_THROUGH) {
2148 /*
2149 * Skip top levels of page tables for iommu which has
2150 * less agaw than default. Unnecessary for PT mode.
2151 */
2152 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2153 ret = -ENOMEM;
2154 pgd = phys_to_virt(dma_pte_addr(pgd));
2155 if (!dma_pte_present(pgd))
2156 goto out_unlock;
2157 }
2158
2159 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2160 if (info && info->ats_supported)
2161 translation = CONTEXT_TT_DEV_IOTLB;
2162 else
2163 translation = CONTEXT_TT_MULTI_LEVEL;
2164
2165 context_set_address_root(context, virt_to_phys(pgd));
2166 context_set_address_width(context, agaw);
2167 } else {
2168 /*
2169 * In pass through mode, AW must be programmed to
2170 * indicate the largest AGAW value supported by
2171 * hardware. And ASR is ignored by hardware.
2172 */
2173 context_set_address_width(context, iommu->msagaw);
2174 }
41b80db2
LB
2175
2176 context_set_translation_type(context, translation);
93a23a72 2177 }
4ed0d3e6 2178
c07e7d21
MM
2179 context_set_fault_enable(context);
2180 context_set_present(context);
5331fe6f 2181 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2182
4c25a2c1
DW
2183 /*
2184 * It's a non-present to present mapping. If hardware doesn't cache
2185 * non-present entry we only need to flush the write-buffer. If the
2186 * _does_ cache non-present entries, then it does so in the special
2187 * domain #0, which we have to flush:
2188 */
2189 if (cap_caching_mode(iommu->cap)) {
2190 iommu->flush.flush_context(iommu, 0,
2191 (((u16)bus) << 8) | devfn,
2192 DMA_CCMD_MASK_NOBIT,
2193 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2194 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2195 } else {
ba395927 2196 iommu_flush_write_buffer(iommu);
4c25a2c1 2197 }
93a23a72 2198 iommu_enable_dev_iotlb(info);
c7151a8d 2199
55d94043
JR
2200 ret = 0;
2201
2202out_unlock:
2203 spin_unlock(&iommu->lock);
2204 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2205
5c365d18 2206 return ret;
ba395927
KA
2207}
2208
0ce4a85f
LB
2209struct domain_context_mapping_data {
2210 struct dmar_domain *domain;
2211 struct intel_iommu *iommu;
2212 struct pasid_table *table;
2213};
2214
2215static int domain_context_mapping_cb(struct pci_dev *pdev,
2216 u16 alias, void *opaque)
2217{
2218 struct domain_context_mapping_data *data = opaque;
2219
2220 return domain_context_mapping_one(data->domain, data->iommu,
2221 data->table, PCI_BUS_NUM(alias),
2222 alias & 0xff);
2223}
2224
ba395927 2225static int
28ccce0d 2226domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2227{
0ce4a85f 2228 struct domain_context_mapping_data data;
ca6e322d 2229 struct pasid_table *table;
64ae892b 2230 struct intel_iommu *iommu;
156baca8 2231 u8 bus, devfn;
64ae892b 2232
e1f167f3 2233 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2234 if (!iommu)
2235 return -ENODEV;
ba395927 2236
ca6e322d 2237 table = intel_pasid_get_table(dev);
0ce4a85f
LB
2238
2239 if (!dev_is_pci(dev))
2240 return domain_context_mapping_one(domain, iommu, table,
2241 bus, devfn);
2242
2243 data.domain = domain;
2244 data.iommu = iommu;
2245 data.table = table;
2246
2247 return pci_for_each_dma_alias(to_pci_dev(dev),
2248 &domain_context_mapping_cb, &data);
579305f7
AW
2249}
2250
2251static int domain_context_mapped_cb(struct pci_dev *pdev,
2252 u16 alias, void *opaque)
2253{
2254 struct intel_iommu *iommu = opaque;
2255
2256 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2257}
2258
e1f167f3 2259static int domain_context_mapped(struct device *dev)
ba395927 2260{
5331fe6f 2261 struct intel_iommu *iommu;
156baca8 2262 u8 bus, devfn;
5331fe6f 2263
e1f167f3 2264 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2265 if (!iommu)
2266 return -ENODEV;
ba395927 2267
579305f7
AW
2268 if (!dev_is_pci(dev))
2269 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2270
579305f7
AW
2271 return !pci_for_each_dma_alias(to_pci_dev(dev),
2272 domain_context_mapped_cb, iommu);
ba395927
KA
2273}
2274
f532959b
FY
2275/* Returns a number of VTD pages, but aligned to MM page size */
2276static inline unsigned long aligned_nrpages(unsigned long host_addr,
2277 size_t size)
2278{
2279 host_addr &= ~PAGE_MASK;
2280 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2281}
2282
6dd9a7c7
YS
2283/* Return largest possible superpage level for a given mapping */
2284static inline int hardware_largepage_caps(struct dmar_domain *domain,
2285 unsigned long iov_pfn,
2286 unsigned long phy_pfn,
2287 unsigned long pages)
2288{
2289 int support, level = 1;
2290 unsigned long pfnmerge;
2291
2292 support = domain->iommu_superpage;
2293
2294 /* To use a large page, the virtual *and* physical addresses
2295 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2296 of them will mean we have to use smaller pages. So just
2297 merge them and check both at once. */
2298 pfnmerge = iov_pfn | phy_pfn;
2299
2300 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2301 pages >>= VTD_STRIDE_SHIFT;
2302 if (!pages)
2303 break;
2304 pfnmerge >>= VTD_STRIDE_SHIFT;
2305 level++;
2306 support--;
2307 }
2308 return level;
2309}
2310
9051aa02
DW
2311static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2312 struct scatterlist *sg, unsigned long phys_pfn,
2313 unsigned long nr_pages, int prot)
e1605495
DW
2314{
2315 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2316 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2317 unsigned long sg_res = 0;
6dd9a7c7
YS
2318 unsigned int largepage_lvl = 0;
2319 unsigned long lvl_pages = 0;
ddf09b6d 2320 u64 attr;
e1605495 2321
162d1b10 2322 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2323
2324 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2325 return -EINVAL;
2326
ddf09b6d
LB
2327 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2328 if (domain_use_first_level(domain))
2329 attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
e1605495 2330
cc4f14aa
JL
2331 if (!sg) {
2332 sg_res = nr_pages;
ddf09b6d 2333 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
9051aa02
DW
2334 }
2335
6dd9a7c7 2336 while (nr_pages > 0) {
c85994e4
DW
2337 uint64_t tmp;
2338
e1605495 2339 if (!sg_res) {
29a90b70
RM
2340 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2341
f532959b 2342 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2343 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2344 sg->dma_length = sg->length;
ddf09b6d 2345 pteval = (sg_phys(sg) - pgoff) | attr;
6dd9a7c7 2346 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2347 }
6dd9a7c7 2348
e1605495 2349 if (!pte) {
6dd9a7c7
YS
2350 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2351
5cf0a76f 2352 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2353 if (!pte)
2354 return -ENOMEM;
6dd9a7c7 2355 /* It is large page*/
6491d4d0 2356 if (largepage_lvl > 1) {
ba2374fd
CZ
2357 unsigned long nr_superpages, end_pfn;
2358
6dd9a7c7 2359 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2360 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2361
2362 nr_superpages = sg_res / lvl_pages;
2363 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2364
d41a4adb
JL
2365 /*
2366 * Ensure that old small page tables are
ba2374fd 2367 * removed to make room for superpage(s).
bc24c571
DD
2368 * We're adding new large pages, so make sure
2369 * we don't remove their parent tables.
d41a4adb 2370 */
bc24c571
DD
2371 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2372 largepage_lvl + 1);
6491d4d0 2373 } else {
6dd9a7c7 2374 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2375 }
6dd9a7c7 2376
e1605495
DW
2377 }
2378 /* We don't need lock here, nobody else
2379 * touches the iova range
2380 */
7766a3fb 2381 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2382 if (tmp) {
1bf20f0d 2383 static int dumps = 5;
9f10e5bf
JR
2384 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2385 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2386 if (dumps) {
2387 dumps--;
2388 debug_dma_dump_mappings(NULL);
2389 }
2390 WARN_ON(1);
2391 }
6dd9a7c7
YS
2392
2393 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2394
2395 BUG_ON(nr_pages < lvl_pages);
2396 BUG_ON(sg_res < lvl_pages);
2397
2398 nr_pages -= lvl_pages;
2399 iov_pfn += lvl_pages;
2400 phys_pfn += lvl_pages;
2401 pteval += lvl_pages * VTD_PAGE_SIZE;
2402 sg_res -= lvl_pages;
2403
2404 /* If the next PTE would be the first in a new page, then we
2405 need to flush the cache on the entries we've just written.
2406 And then we'll need to recalculate 'pte', so clear it and
2407 let it get set again in the if (!pte) block above.
2408
2409 If we're done (!nr_pages) we need to flush the cache too.
2410
2411 Also if we've been setting superpages, we may need to
2412 recalculate 'pte' and switch back to smaller pages for the
2413 end of the mapping, if the trailing size is not enough to
2414 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2415 pte++;
6dd9a7c7
YS
2416 if (!nr_pages || first_pte_in_page(pte) ||
2417 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2418 domain_flush_cache(domain, first_pte,
2419 (void *)pte - (void *)first_pte);
2420 pte = NULL;
2421 }
6dd9a7c7
YS
2422
2423 if (!sg_res && nr_pages)
e1605495
DW
2424 sg = sg_next(sg);
2425 }
2426 return 0;
2427}
2428
87684fd9 2429static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2430 struct scatterlist *sg, unsigned long phys_pfn,
2431 unsigned long nr_pages, int prot)
2432{
fa954e68 2433 int iommu_id, ret;
095303e0
LB
2434 struct intel_iommu *iommu;
2435
2436 /* Do the real mapping first */
2437 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2438 if (ret)
2439 return ret;
2440
fa954e68
LB
2441 for_each_domain_iommu(iommu_id, domain) {
2442 iommu = g_iommus[iommu_id];
095303e0
LB
2443 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2444 }
2445
2446 return 0;
87684fd9
PX
2447}
2448
9051aa02
DW
2449static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2450 struct scatterlist *sg, unsigned long nr_pages,
2451 int prot)
ba395927 2452{
87684fd9 2453 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2454}
6f6a00e4 2455
9051aa02
DW
2456static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2457 unsigned long phys_pfn, unsigned long nr_pages,
2458 int prot)
2459{
87684fd9 2460 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2461}
2462
2452d9db 2463static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2464{
5082219b
FS
2465 unsigned long flags;
2466 struct context_entry *context;
2467 u16 did_old;
2468
c7151a8d
WH
2469 if (!iommu)
2470 return;
8c11e798 2471
5082219b
FS
2472 spin_lock_irqsave(&iommu->lock, flags);
2473 context = iommu_context_addr(iommu, bus, devfn, 0);
2474 if (!context) {
2475 spin_unlock_irqrestore(&iommu->lock, flags);
2476 return;
2477 }
2478 did_old = context_domain_id(context);
2479 context_clear_entry(context);
2480 __iommu_flush_cache(iommu, context, sizeof(*context));
2481 spin_unlock_irqrestore(&iommu->lock, flags);
2482 iommu->flush.flush_context(iommu,
2483 did_old,
2484 (((u16)bus) << 8) | devfn,
2485 DMA_CCMD_MASK_NOBIT,
2486 DMA_CCMD_DEVICE_INVL);
2487 iommu->flush.flush_iotlb(iommu,
2488 did_old,
2489 0,
2490 0,
2491 DMA_TLB_DSI_FLUSH);
ba395927
KA
2492}
2493
109b9b04
DW
2494static inline void unlink_domain_info(struct device_domain_info *info)
2495{
2496 assert_spin_locked(&device_domain_lock);
2497 list_del(&info->link);
2498 list_del(&info->global);
2499 if (info->dev)
0bcb3e28 2500 info->dev->archdata.iommu = NULL;
109b9b04
DW
2501}
2502
ba395927
KA
2503static void domain_remove_dev_info(struct dmar_domain *domain)
2504{
3a74ca01 2505 struct device_domain_info *info, *tmp;
fb170fb4 2506 unsigned long flags;
ba395927
KA
2507
2508 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2509 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2510 __dmar_remove_one_dev_info(info);
ba395927
KA
2511 spin_unlock_irqrestore(&device_domain_lock, flags);
2512}
2513
e2726dae 2514struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2515{
2516 struct device_domain_info *info;
2517
1d461597 2518 if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
1ee0186b
LB
2519 return NULL;
2520
2b0140c6
JD
2521 if (dev_is_pci(dev))
2522 dev = &pci_real_dma_dev(to_pci_dev(dev))->dev;
2523
1ee0186b
LB
2524 /* No lock here, assumes no domain exit in normal case */
2525 info = dev->archdata.iommu;
2526 if (likely(info))
2527 return info->domain;
2528
2529 return NULL;
2530}
2531
034d98cc 2532static void do_deferred_attach(struct device *dev)
1ee0186b 2533{
034d98cc 2534 struct iommu_domain *domain;
8af46c78 2535
034d98cc
JR
2536 dev->archdata.iommu = NULL;
2537 domain = iommu_get_domain_for_dev(dev);
2538 if (domain)
2539 intel_iommu_attach_device(domain, dev);
2540}
2541
5a8f40e8 2542static inline struct device_domain_info *
745f2586
JL
2543dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2544{
2545 struct device_domain_info *info;
2546
2547 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2548 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2549 info->devfn == devfn)
5a8f40e8 2550 return info;
745f2586
JL
2551
2552 return NULL;
2553}
2554
ddf09b6d
LB
2555static int domain_setup_first_level(struct intel_iommu *iommu,
2556 struct dmar_domain *domain,
2557 struct device *dev,
2558 int pasid)
2559{
2560 int flags = PASID_FLAG_SUPERVISOR_MODE;
2561 struct dma_pte *pgd = domain->pgd;
2562 int agaw, level;
2563
2564 /*
2565 * Skip top levels of page tables for iommu which has
2566 * less agaw than default. Unnecessary for PT mode.
2567 */
2568 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2569 pgd = phys_to_virt(dma_pte_addr(pgd));
2570 if (!dma_pte_present(pgd))
2571 return -ENOMEM;
2572 }
2573
2574 level = agaw_to_level(agaw);
2575 if (level != 4 && level != 5)
2576 return -EINVAL;
2577
2578 flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
2579
2580 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2581 domain->iommu_did[iommu->seq_id],
2582 flags);
2583}
2584
5db31569
JR
2585static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2586 int bus, int devfn,
2587 struct device *dev,
2588 struct dmar_domain *domain)
745f2586 2589{
5a8f40e8 2590 struct dmar_domain *found = NULL;
745f2586
JL
2591 struct device_domain_info *info;
2592 unsigned long flags;
d160aca5 2593 int ret;
745f2586
JL
2594
2595 info = alloc_devinfo_mem();
2596 if (!info)
b718cd3d 2597 return NULL;
745f2586 2598
745f2586
JL
2599 info->bus = bus;
2600 info->devfn = devfn;
b16d0cb9
DW
2601 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2602 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2603 info->ats_qdep = 0;
745f2586
JL
2604 info->dev = dev;
2605 info->domain = domain;
5a8f40e8 2606 info->iommu = iommu;
cc580e41 2607 info->pasid_table = NULL;
95587a75 2608 info->auxd_enabled = 0;
67b8e02b 2609 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2610
b16d0cb9
DW
2611 if (dev && dev_is_pci(dev)) {
2612 struct pci_dev *pdev = to_pci_dev(info->dev);
2613
d8b85910
LB
2614 if (!pdev->untrusted &&
2615 !pci_ats_disabled() &&
cef74409 2616 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2617 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2618 dmar_find_matched_atsr_unit(pdev))
2619 info->ats_supported = 1;
2620
765b6a98
LB
2621 if (sm_supported(iommu)) {
2622 if (pasid_supported(iommu)) {
b16d0cb9
DW
2623 int features = pci_pasid_features(pdev);
2624 if (features >= 0)
2625 info->pasid_supported = features | 1;
2626 }
2627
2628 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2629 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2630 info->pri_supported = 1;
2631 }
2632 }
2633
745f2586
JL
2634 spin_lock_irqsave(&device_domain_lock, flags);
2635 if (dev)
0bcb3e28 2636 found = find_domain(dev);
f303e507
JR
2637
2638 if (!found) {
5a8f40e8 2639 struct device_domain_info *info2;
41e80dca 2640 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2641 if (info2) {
2642 found = info2->domain;
2643 info2->dev = dev;
2644 }
5a8f40e8 2645 }
f303e507 2646
745f2586
JL
2647 if (found) {
2648 spin_unlock_irqrestore(&device_domain_lock, flags);
2649 free_devinfo_mem(info);
b718cd3d
DW
2650 /* Caller must free the original domain */
2651 return found;
745f2586
JL
2652 }
2653
d160aca5
JR
2654 spin_lock(&iommu->lock);
2655 ret = domain_attach_iommu(domain, iommu);
2656 spin_unlock(&iommu->lock);
2657
2658 if (ret) {
c6c2cebd 2659 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2660 free_devinfo_mem(info);
c6c2cebd
JR
2661 return NULL;
2662 }
c6c2cebd 2663
b718cd3d
DW
2664 list_add(&info->link, &domain->devices);
2665 list_add(&info->global, &device_domain_list);
2666 if (dev)
2667 dev->archdata.iommu = info;
0bbeb01a 2668 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2669
0bbeb01a
LB
2670 /* PASID table is mandatory for a PCI device in scalable mode. */
2671 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2672 ret = intel_pasid_alloc_table(dev);
2673 if (ret) {
932a6523 2674 dev_err(dev, "PASID table allocation failed\n");
71753239 2675 dmar_remove_one_dev_info(dev);
0bbeb01a 2676 return NULL;
a7fc93fe 2677 }
ef848b7e
LB
2678
2679 /* Setup the PASID entry for requests without PASID: */
2680 spin_lock(&iommu->lock);
2681 if (hw_pass_through && domain_type_is_si(domain))
2682 ret = intel_pasid_setup_pass_through(iommu, domain,
2683 dev, PASID_RID2PASID);
ddf09b6d
LB
2684 else if (domain_use_first_level(domain))
2685 ret = domain_setup_first_level(iommu, domain, dev,
2686 PASID_RID2PASID);
ef848b7e
LB
2687 else
2688 ret = intel_pasid_setup_second_level(iommu, domain,
2689 dev, PASID_RID2PASID);
2690 spin_unlock(&iommu->lock);
2691 if (ret) {
932a6523 2692 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2693 dmar_remove_one_dev_info(dev);
ef848b7e 2694 return NULL;
a7fc93fe
LB
2695 }
2696 }
b718cd3d 2697
cc4e2575 2698 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2699 dev_err(dev, "Domain context map failed\n");
71753239 2700 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2701 return NULL;
2702 }
2703
b718cd3d 2704 return domain;
745f2586
JL
2705}
2706
579305f7
AW
2707static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2708{
2709 *(u16 *)opaque = alias;
2710 return 0;
2711}
2712
76208356 2713static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2714{
e083ea5b 2715 struct device_domain_info *info;
76208356 2716 struct dmar_domain *domain = NULL;
579305f7 2717 struct intel_iommu *iommu;
fcc35c63 2718 u16 dma_alias;
ba395927 2719 unsigned long flags;
aa4d066a 2720 u8 bus, devfn;
ba395927 2721
579305f7
AW
2722 iommu = device_to_iommu(dev, &bus, &devfn);
2723 if (!iommu)
2724 return NULL;
2725
146922ec
DW
2726 if (dev_is_pci(dev)) {
2727 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2728
579305f7
AW
2729 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2730
2731 spin_lock_irqsave(&device_domain_lock, flags);
2732 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2733 PCI_BUS_NUM(dma_alias),
2734 dma_alias & 0xff);
2735 if (info) {
2736 iommu = info->iommu;
2737 domain = info->domain;
5a8f40e8 2738 }
579305f7 2739 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2740
76208356 2741 /* DMA alias already has a domain, use it */
579305f7 2742 if (info)
76208356 2743 goto out;
579305f7 2744 }
ba395927 2745
146922ec 2746 /* Allocate and initialize new domain for the device */
ab8dfe25 2747 domain = alloc_domain(0);
745f2586 2748 if (!domain)
579305f7 2749 return NULL;
301e7ee1 2750 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2751 domain_exit(domain);
2752 return NULL;
2c2e2c38 2753 }
ba395927 2754
76208356 2755out:
76208356
JR
2756 return domain;
2757}
579305f7 2758
76208356
JR
2759static struct dmar_domain *set_domain_for_dev(struct device *dev,
2760 struct dmar_domain *domain)
2761{
2762 struct intel_iommu *iommu;
2763 struct dmar_domain *tmp;
2764 u16 req_id, dma_alias;
2765 u8 bus, devfn;
2766
2767 iommu = device_to_iommu(dev, &bus, &devfn);
2768 if (!iommu)
2769 return NULL;
2770
2771 req_id = ((u16)bus << 8) | devfn;
2772
2773 if (dev_is_pci(dev)) {
2774 struct pci_dev *pdev = to_pci_dev(dev);
2775
2776 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2777
2778 /* register PCI DMA alias device */
2779 if (req_id != dma_alias) {
2780 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2781 dma_alias & 0xff, NULL, domain);
2782
2783 if (!tmp || tmp != domain)
2784 return tmp;
2785 }
ba395927
KA
2786 }
2787
5db31569 2788 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2789 if (!tmp || tmp != domain)
2790 return tmp;
2791
2792 return domain;
2793}
579305f7 2794
b213203e
DW
2795static int iommu_domain_identity_map(struct dmar_domain *domain,
2796 unsigned long long start,
2797 unsigned long long end)
ba395927 2798{
c5395d5c
DW
2799 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2800 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2801
2802 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2803 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2804 pr_err("Reserving iova failed\n");
b213203e 2805 return -ENOMEM;
ba395927
KA
2806 }
2807
af1089ce 2808 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2809 /*
2810 * RMRR range might have overlap with physical memory range,
2811 * clear it first
2812 */
c5395d5c 2813 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2814
87684fd9
PX
2815 return __domain_mapping(domain, first_vpfn, NULL,
2816 first_vpfn, last_vpfn - first_vpfn + 1,
2817 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2818}
2819
d66ce54b
JR
2820static int domain_prepare_identity_map(struct device *dev,
2821 struct dmar_domain *domain,
2822 unsigned long long start,
2823 unsigned long long end)
b213203e 2824{
19943b0e
DW
2825 /* For _hardware_ passthrough, don't bother. But for software
2826 passthrough, we do it anyway -- it may indicate a memory
2827 range which is reserved in E820, so which didn't get set
2828 up to start with in si_domain */
2829 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2830 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2831 start, end);
19943b0e
DW
2832 return 0;
2833 }
2834
932a6523 2835 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2836
5595b528
DW
2837 if (end < start) {
2838 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2839 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2840 dmi_get_system_info(DMI_BIOS_VENDOR),
2841 dmi_get_system_info(DMI_BIOS_VERSION),
2842 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2843 return -EIO;
5595b528
DW
2844 }
2845
2ff729f5
DW
2846 if (end >> agaw_to_width(domain->agaw)) {
2847 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2848 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2849 agaw_to_width(domain->agaw),
2850 dmi_get_system_info(DMI_BIOS_VENDOR),
2851 dmi_get_system_info(DMI_BIOS_VERSION),
2852 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2853 return -EIO;
2ff729f5 2854 }
19943b0e 2855
d66ce54b
JR
2856 return iommu_domain_identity_map(domain, start, end);
2857}
ba395927 2858
301e7ee1
JR
2859static int md_domain_init(struct dmar_domain *domain, int guest_width);
2860
071e1374 2861static int __init si_domain_init(int hw)
2c2e2c38 2862{
4de354ec
LB
2863 struct dmar_rmrr_unit *rmrr;
2864 struct device *dev;
2865 int i, nid, ret;
2c2e2c38 2866
ab8dfe25 2867 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2868 if (!si_domain)
2869 return -EFAULT;
2870
301e7ee1 2871 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2c2e2c38
FY
2872 domain_exit(si_domain);
2873 return -EFAULT;
2874 }
2875
19943b0e
DW
2876 if (hw)
2877 return 0;
2878
c7ab48d2 2879 for_each_online_node(nid) {
5dfe8660
TH
2880 unsigned long start_pfn, end_pfn;
2881 int i;
2882
2883 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2884 ret = iommu_domain_identity_map(si_domain,
2885 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2886 if (ret)
2887 return ret;
2888 }
c7ab48d2
DW
2889 }
2890
4de354ec 2891 /*
9235cb13
LB
2892 * Identity map the RMRRs so that devices with RMRRs could also use
2893 * the si_domain.
4de354ec
LB
2894 */
2895 for_each_rmrr_units(rmrr) {
2896 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2897 i, dev) {
2898 unsigned long long start = rmrr->base_address;
2899 unsigned long long end = rmrr->end_address;
2900
4de354ec
LB
2901 if (WARN_ON(end < start ||
2902 end >> agaw_to_width(si_domain->agaw)))
2903 continue;
2904
2905 ret = iommu_domain_identity_map(si_domain, start, end);
2906 if (ret)
2907 return ret;
2908 }
2909 }
2910
2c2e2c38
FY
2911 return 0;
2912}
2913
9b226624 2914static int identity_mapping(struct device *dev)
2c2e2c38
FY
2915{
2916 struct device_domain_info *info;
2917
9b226624 2918 info = dev->archdata.iommu;
1ddb32da 2919 if (info)
cb452a40 2920 return (info->domain == si_domain);
2c2e2c38 2921
2c2e2c38
FY
2922 return 0;
2923}
2924
28ccce0d 2925static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2926{
0ac72664 2927 struct dmar_domain *ndomain;
5a8f40e8 2928 struct intel_iommu *iommu;
156baca8 2929 u8 bus, devfn;
2c2e2c38 2930
5913c9bf 2931 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2932 if (!iommu)
2933 return -ENODEV;
2934
5db31569 2935 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2936 if (ndomain != domain)
2937 return -EBUSY;
2c2e2c38
FY
2938
2939 return 0;
2940}
2941
0b9d9753 2942static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2943{
2944 struct dmar_rmrr_unit *rmrr;
832bd858 2945 struct device *tmp;
ea2447f7
TM
2946 int i;
2947
0e242612 2948 rcu_read_lock();
ea2447f7 2949 for_each_rmrr_units(rmrr) {
b683b230
JL
2950 /*
2951 * Return TRUE if this RMRR contains the device that
2952 * is passed in.
2953 */
2954 for_each_active_dev_scope(rmrr->devices,
2955 rmrr->devices_cnt, i, tmp)
e143fd45
EA
2956 if (tmp == dev ||
2957 is_downstream_to_pci_bridge(dev, tmp)) {
0e242612 2958 rcu_read_unlock();
ea2447f7 2959 return true;
b683b230 2960 }
ea2447f7 2961 }
0e242612 2962 rcu_read_unlock();
ea2447f7
TM
2963 return false;
2964}
2965
1c5c59fb
EA
2966/**
2967 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2968 * is relaxable (ie. is allowed to be not enforced under some conditions)
2969 * @dev: device handle
2970 *
2971 * We assume that PCI USB devices with RMRRs have them largely
2972 * for historical reasons and that the RMRR space is not actively used post
2973 * boot. This exclusion may change if vendors begin to abuse it.
2974 *
2975 * The same exception is made for graphics devices, with the requirement that
2976 * any use of the RMRR regions will be torn down before assigning the device
2977 * to a guest.
2978 *
2979 * Return: true if the RMRR is relaxable, false otherwise
2980 */
2981static bool device_rmrr_is_relaxable(struct device *dev)
2982{
2983 struct pci_dev *pdev;
2984
2985 if (!dev_is_pci(dev))
2986 return false;
2987
2988 pdev = to_pci_dev(dev);
2989 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2990 return true;
2991 else
2992 return false;
2993}
2994
c875d2c1
AW
2995/*
2996 * There are a couple cases where we need to restrict the functionality of
2997 * devices associated with RMRRs. The first is when evaluating a device for
2998 * identity mapping because problems exist when devices are moved in and out
2999 * of domains and their respective RMRR information is lost. This means that
3000 * a device with associated RMRRs will never be in a "passthrough" domain.
3001 * The second is use of the device through the IOMMU API. This interface
3002 * expects to have full control of the IOVA space for the device. We cannot
3003 * satisfy both the requirement that RMRR access is maintained and have an
3004 * unencumbered IOVA space. We also have no ability to quiesce the device's
3005 * use of the RMRR space or even inform the IOMMU API user of the restriction.
3006 * We therefore prevent devices associated with an RMRR from participating in
3007 * the IOMMU API, which eliminates them from device assignment.
3008 *
1c5c59fb
EA
3009 * In both cases, devices which have relaxable RMRRs are not concerned by this
3010 * restriction. See device_rmrr_is_relaxable comment.
c875d2c1
AW
3011 */
3012static bool device_is_rmrr_locked(struct device *dev)
3013{
3014 if (!device_has_rmrr(dev))
3015 return false;
3016
1c5c59fb
EA
3017 if (device_rmrr_is_relaxable(dev))
3018 return false;
c875d2c1
AW
3019
3020 return true;
3021}
3022
f273a453
LB
3023/*
3024 * Return the required default domain type for a specific device.
3025 *
3026 * @dev: the device in query
3027 * @startup: true if this is during early boot
3028 *
3029 * Returns:
3030 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
3031 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
3032 * - 0: both identity and dynamic domains work for this device
3033 */
0e31a726 3034static int device_def_domain_type(struct device *dev)
6941af28 3035{
3bdb2591
DW
3036 if (dev_is_pci(dev)) {
3037 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 3038
89a6079d
LB
3039 /*
3040 * Prevent any device marked as untrusted from getting
3041 * placed into the statically identity mapping domain.
3042 */
3043 if (pdev->untrusted)
f273a453 3044 return IOMMU_DOMAIN_DMA;
89a6079d 3045
3bdb2591 3046 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 3047 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 3048
3bdb2591 3049 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 3050 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
3051
3052 /*
3053 * We want to start off with all devices in the 1:1 domain, and
3054 * take them out later if we find they can't access all of memory.
3055 *
3056 * However, we can't do this for PCI devices behind bridges,
3057 * because all PCI devices behind the same bridge will end up
3058 * with the same source-id on their transactions.
3059 *
3060 * Practically speaking, we can't change things around for these
3061 * devices at run-time, because we can't be sure there'll be no
3062 * DMA transactions in flight for any of their siblings.
3063 *
3064 * So PCI devices (unless they're on the root bus) as well as
3065 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
3066 * the 1:1 domain, just in _case_ one of their siblings turns out
3067 * not to be able to map all of memory.
3068 */
3069 if (!pci_is_pcie(pdev)) {
3070 if (!pci_is_root_bus(pdev->bus))
f273a453 3071 return IOMMU_DOMAIN_DMA;
3bdb2591 3072 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 3073 return IOMMU_DOMAIN_DMA;
3bdb2591 3074 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 3075 return IOMMU_DOMAIN_DMA;
3bdb2591 3076 }
3dfc813d 3077
b89b6605 3078 return 0;
f273a453
LB
3079}
3080
ffebeb46
JL
3081static void intel_iommu_init_qi(struct intel_iommu *iommu)
3082{
3083 /*
3084 * Start from the sane iommu hardware state.
3085 * If the queued invalidation is already initialized by us
3086 * (for example, while enabling interrupt-remapping) then
3087 * we got the things already rolling from a sane state.
3088 */
3089 if (!iommu->qi) {
3090 /*
3091 * Clear any previous faults.
3092 */
3093 dmar_fault(-1, iommu);
3094 /*
3095 * Disable queued invalidation if supported and already enabled
3096 * before OS handover.
3097 */
3098 dmar_disable_qi(iommu);
3099 }
3100
3101 if (dmar_enable_qi(iommu)) {
3102 /*
3103 * Queued Invalidate not enabled, use Register Based Invalidate
3104 */
3105 iommu->flush.flush_context = __iommu_flush_context;
3106 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3107 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3108 iommu->name);
3109 } else {
3110 iommu->flush.flush_context = qi_flush_context;
3111 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3112 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3113 }
3114}
3115
091d42e4 3116static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3117 struct root_entry *old_re,
091d42e4
JR
3118 struct context_entry **tbl,
3119 int bus, bool ext)
3120{
dbcd861f 3121 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3122 struct context_entry *new_ce = NULL, ce;
dfddb969 3123 struct context_entry *old_ce = NULL;
543c8dcf 3124 struct root_entry re;
091d42e4
JR
3125 phys_addr_t old_ce_phys;
3126
3127 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3128 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3129
3130 for (devfn = 0; devfn < 256; devfn++) {
3131 /* First calculate the correct index */
3132 idx = (ext ? devfn * 2 : devfn) % 256;
3133
3134 if (idx == 0) {
3135 /* First save what we may have and clean up */
3136 if (new_ce) {
3137 tbl[tbl_idx] = new_ce;
3138 __iommu_flush_cache(iommu, new_ce,
3139 VTD_PAGE_SIZE);
3140 pos = 1;
3141 }
3142
3143 if (old_ce)
829383e1 3144 memunmap(old_ce);
091d42e4
JR
3145
3146 ret = 0;
3147 if (devfn < 0x80)
543c8dcf 3148 old_ce_phys = root_entry_lctp(&re);
091d42e4 3149 else
543c8dcf 3150 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3151
3152 if (!old_ce_phys) {
3153 if (ext && devfn == 0) {
3154 /* No LCTP, try UCTP */
3155 devfn = 0x7f;
3156 continue;
3157 } else {
3158 goto out;
3159 }
3160 }
3161
3162 ret = -ENOMEM;
dfddb969
DW
3163 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3164 MEMREMAP_WB);
091d42e4
JR
3165 if (!old_ce)
3166 goto out;
3167
3168 new_ce = alloc_pgtable_page(iommu->node);
3169 if (!new_ce)
3170 goto out_unmap;
3171
3172 ret = 0;
3173 }
3174
3175 /* Now copy the context entry */
dfddb969 3176 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3177
cf484d0e 3178 if (!__context_present(&ce))
091d42e4
JR
3179 continue;
3180
dbcd861f
JR
3181 did = context_domain_id(&ce);
3182 if (did >= 0 && did < cap_ndoms(iommu->cap))
3183 set_bit(did, iommu->domain_ids);
3184
cf484d0e
JR
3185 /*
3186 * We need a marker for copied context entries. This
3187 * marker needs to work for the old format as well as
3188 * for extended context entries.
3189 *
3190 * Bit 67 of the context entry is used. In the old
3191 * format this bit is available to software, in the
3192 * extended format it is the PGE bit, but PGE is ignored
3193 * by HW if PASIDs are disabled (and thus still
3194 * available).
3195 *
3196 * So disable PASIDs first and then mark the entry
3197 * copied. This means that we don't copy PASID
3198 * translations from the old kernel, but this is fine as
3199 * faults there are not fatal.
3200 */
3201 context_clear_pasid_enable(&ce);
3202 context_set_copied(&ce);
3203
091d42e4
JR
3204 new_ce[idx] = ce;
3205 }
3206
3207 tbl[tbl_idx + pos] = new_ce;
3208
3209 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3210
3211out_unmap:
dfddb969 3212 memunmap(old_ce);
091d42e4
JR
3213
3214out:
3215 return ret;
3216}
3217
3218static int copy_translation_tables(struct intel_iommu *iommu)
3219{
3220 struct context_entry **ctxt_tbls;
dfddb969 3221 struct root_entry *old_rt;
091d42e4
JR
3222 phys_addr_t old_rt_phys;
3223 int ctxt_table_entries;
3224 unsigned long flags;
3225 u64 rtaddr_reg;
3226 int bus, ret;
c3361f2f 3227 bool new_ext, ext;
091d42e4
JR
3228
3229 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3230 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3231 new_ext = !!ecap_ecs(iommu->ecap);
3232
3233 /*
3234 * The RTT bit can only be changed when translation is disabled,
3235 * but disabling translation means to open a window for data
3236 * corruption. So bail out and don't copy anything if we would
3237 * have to change the bit.
3238 */
3239 if (new_ext != ext)
3240 return -EINVAL;
091d42e4
JR
3241
3242 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3243 if (!old_rt_phys)
3244 return -EINVAL;
3245
dfddb969 3246 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3247 if (!old_rt)
3248 return -ENOMEM;
3249
3250 /* This is too big for the stack - allocate it from slab */
3251 ctxt_table_entries = ext ? 512 : 256;
3252 ret = -ENOMEM;
6396bb22 3253 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3254 if (!ctxt_tbls)
3255 goto out_unmap;
3256
3257 for (bus = 0; bus < 256; bus++) {
3258 ret = copy_context_table(iommu, &old_rt[bus],
3259 ctxt_tbls, bus, ext);
3260 if (ret) {
3261 pr_err("%s: Failed to copy context table for bus %d\n",
3262 iommu->name, bus);
3263 continue;
3264 }
3265 }
3266
3267 spin_lock_irqsave(&iommu->lock, flags);
3268
3269 /* Context tables are copied, now write them to the root_entry table */
3270 for (bus = 0; bus < 256; bus++) {
3271 int idx = ext ? bus * 2 : bus;
3272 u64 val;
3273
3274 if (ctxt_tbls[idx]) {
3275 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3276 iommu->root_entry[bus].lo = val;
3277 }
3278
3279 if (!ext || !ctxt_tbls[idx + 1])
3280 continue;
3281
3282 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3283 iommu->root_entry[bus].hi = val;
3284 }
3285
3286 spin_unlock_irqrestore(&iommu->lock, flags);
3287
3288 kfree(ctxt_tbls);
3289
3290 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3291
3292 ret = 0;
3293
3294out_unmap:
dfddb969 3295 memunmap(old_rt);
091d42e4
JR
3296
3297 return ret;
3298}
3299
b779260b 3300static int __init init_dmars(void)
ba395927
KA
3301{
3302 struct dmar_drhd_unit *drhd;
ba395927 3303 struct intel_iommu *iommu;
df4f3c60 3304 int ret;
2c2e2c38 3305
ba395927
KA
3306 /*
3307 * for each drhd
3308 * allocate root
3309 * initialize and program root entry to not present
3310 * endfor
3311 */
3312 for_each_drhd_unit(drhd) {
5e0d2a6f 3313 /*
3314 * lock not needed as this is only incremented in the single
3315 * threaded kernel __init code path all other access are read
3316 * only
3317 */
78d8e704 3318 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3319 g_num_of_iommus++;
3320 continue;
3321 }
9f10e5bf 3322 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3323 }
3324
ffebeb46
JL
3325 /* Preallocate enough resources for IOMMU hot-addition */
3326 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3327 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3328
d9630fe9
WH
3329 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3330 GFP_KERNEL);
3331 if (!g_iommus) {
9f10e5bf 3332 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3333 ret = -ENOMEM;
3334 goto error;
3335 }
3336
6a8c6748
LB
3337 for_each_iommu(iommu, drhd) {
3338 if (drhd->ignored) {
3339 iommu_disable_translation(iommu);
3340 continue;
3341 }
3342
56283174
LB
3343 /*
3344 * Find the max pasid size of all IOMMU's in the system.
3345 * We need to ensure the system pasid table is no bigger
3346 * than the smallest supported.
3347 */
765b6a98 3348 if (pasid_supported(iommu)) {
56283174
LB
3349 u32 temp = 2 << ecap_pss(iommu->ecap);
3350
3351 intel_pasid_max_id = min_t(u32, temp,
3352 intel_pasid_max_id);
3353 }
3354
d9630fe9 3355 g_iommus[iommu->seq_id] = iommu;
ba395927 3356
b63d80d1
JR
3357 intel_iommu_init_qi(iommu);
3358
e61d98d8
SS
3359 ret = iommu_init_domains(iommu);
3360 if (ret)
989d51fc 3361 goto free_iommu;
e61d98d8 3362
4158c2ec
JR
3363 init_translation_status(iommu);
3364
091d42e4
JR
3365 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3366 iommu_disable_translation(iommu);
3367 clear_translation_pre_enabled(iommu);
3368 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3369 iommu->name);
3370 }
4158c2ec 3371
ba395927
KA
3372 /*
3373 * TBD:
3374 * we could share the same root & context tables
25985edc 3375 * among all IOMMU's. Need to Split it later.
ba395927
KA
3376 */
3377 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3378 if (ret)
989d51fc 3379 goto free_iommu;
5f0a7f76 3380
091d42e4
JR
3381 if (translation_pre_enabled(iommu)) {
3382 pr_info("Translation already enabled - trying to copy translation structures\n");
3383
3384 ret = copy_translation_tables(iommu);
3385 if (ret) {
3386 /*
3387 * We found the IOMMU with translation
3388 * enabled - but failed to copy over the
3389 * old root-entry table. Try to proceed
3390 * by disabling translation now and
3391 * allocating a clean root-entry table.
3392 * This might cause DMAR faults, but
3393 * probably the dump will still succeed.
3394 */
3395 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3396 iommu->name);
3397 iommu_disable_translation(iommu);
3398 clear_translation_pre_enabled(iommu);
3399 } else {
3400 pr_info("Copied translation tables from previous kernel for %s\n",
3401 iommu->name);
3402 }
3403 }
3404
4ed0d3e6 3405 if (!ecap_pass_through(iommu->ecap))
19943b0e 3406 hw_pass_through = 0;
ff3dc652 3407 intel_svm_check(iommu);
ba395927
KA
3408 }
3409
a4c34ff1
JR
3410 /*
3411 * Now that qi is enabled on all iommus, set the root entry and flush
3412 * caches. This is required on some Intel X58 chipsets, otherwise the
3413 * flush_context function will loop forever and the boot hangs.
3414 */
3415 for_each_active_iommu(iommu, drhd) {
3416 iommu_flush_write_buffer(iommu);
3417 iommu_set_root_entry(iommu);
3418 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3419 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3420 }
3421
d3f13810 3422#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3423 dmar_map_gfx = 0;
19943b0e 3424#endif
e0fc7e0b 3425
5daab580
LB
3426 if (!dmar_map_gfx)
3427 iommu_identity_mapping |= IDENTMAP_GFX;
3428
21e722c4
AR
3429 check_tylersburg_isoch();
3430
4de354ec
LB
3431 ret = si_domain_init(hw_pass_through);
3432 if (ret)
3433 goto free_iommu;
86080ccc 3434
ba395927
KA
3435 /*
3436 * for each drhd
3437 * enable fault log
3438 * global invalidate context cache
3439 * global invalidate iotlb
3440 * enable translation
3441 */
7c919779 3442 for_each_iommu(iommu, drhd) {
51a63e67
JC
3443 if (drhd->ignored) {
3444 /*
3445 * we always have to disable PMRs or DMA may fail on
3446 * this device
3447 */
3448 if (force_on)
7c919779 3449 iommu_disable_protect_mem_regions(iommu);
ba395927 3450 continue;
51a63e67 3451 }
ba395927
KA
3452
3453 iommu_flush_write_buffer(iommu);
3454
a222a7f0 3455#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3456 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3457 /*
3458 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3459 * could cause possible lock race condition.
3460 */
3461 up_write(&dmar_global_lock);
a222a7f0 3462 ret = intel_svm_enable_prq(iommu);
a7755c3c 3463 down_write(&dmar_global_lock);
a222a7f0
DW
3464 if (ret)
3465 goto free_iommu;
3466 }
3467#endif
3460a6d9
KA
3468 ret = dmar_set_interrupt(iommu);
3469 if (ret)
989d51fc 3470 goto free_iommu;
ba395927
KA
3471 }
3472
3473 return 0;
989d51fc
JL
3474
3475free_iommu:
ffebeb46
JL
3476 for_each_active_iommu(iommu, drhd) {
3477 disable_dmar_iommu(iommu);
a868e6b7 3478 free_dmar_iommu(iommu);
ffebeb46 3479 }
13cf0174 3480
d9630fe9 3481 kfree(g_iommus);
13cf0174 3482
989d51fc 3483error:
ba395927
KA
3484 return ret;
3485}
3486
5a5e02a6 3487/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3488static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3489 struct dmar_domain *domain,
3490 unsigned long nrpages, uint64_t dma_mask)
ba395927 3491{
e083ea5b 3492 unsigned long iova_pfn;
ba395927 3493
cb8b892d
LB
3494 /*
3495 * Restrict dma_mask to the width that the iommu can handle.
3496 * First-level translation restricts the input-address to a
3497 * canonical address (i.e., address bits 63:N have the same
3498 * value as address bit [N-1], where N is 48-bits with 4-level
3499 * paging and 57-bits with 5-level paging). Hence, skip bit
3500 * [N-1].
3501 */
3502 if (domain_use_first_level(domain))
3503 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
3504 dma_mask);
3505 else
3506 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
3507 dma_mask);
3508
8f6429c7
RM
3509 /* Ensure we reserve the whole size-aligned region */
3510 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3511
3512 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3513 /*
3514 * First try to allocate an io virtual address in
284901a9 3515 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3516 * from higher range
ba395927 3517 */
22e2f9fa 3518 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3519 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3520 if (iova_pfn)
3521 return iova_pfn;
875764de 3522 }
538d5b33
TN
3523 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3524 IOVA_PFN(dma_mask), true);
22e2f9fa 3525 if (unlikely(!iova_pfn)) {
944c9175
QC
3526 dev_err_once(dev, "Allocating %ld-page iova failed\n",
3527 nrpages);
2aac6304 3528 return 0;
f76aec76
KA
3529 }
3530
22e2f9fa 3531 return iova_pfn;
f76aec76
KA
3532}
3533
4ec066c7 3534static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
f76aec76 3535{
1c5ebba9 3536 struct dmar_domain *domain, *tmp;
b1ce5b79 3537 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3538 struct device *i_dev;
3539 int i, ret;
f76aec76 3540
4ec066c7 3541 /* Device shouldn't be attached by any domains. */
1c5ebba9
JR
3542 domain = find_domain(dev);
3543 if (domain)
4ec066c7 3544 return NULL;
1c5ebba9
JR
3545
3546 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3547 if (!domain)
3548 goto out;
ba395927 3549
b1ce5b79
JR
3550 /* We have a new domain - setup possible RMRRs for the device */
3551 rcu_read_lock();
3552 for_each_rmrr_units(rmrr) {
3553 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3554 i, i_dev) {
3555 if (i_dev != dev)
3556 continue;
3557
3558 ret = domain_prepare_identity_map(dev, domain,
3559 rmrr->base_address,
3560 rmrr->end_address);
3561 if (ret)
3562 dev_err(dev, "Mapping reserved region failed\n");
3563 }
3564 }
3565 rcu_read_unlock();
3566
1c5ebba9
JR
3567 tmp = set_domain_for_dev(dev, domain);
3568 if (!tmp || domain != tmp) {
3569 domain_exit(domain);
3570 domain = tmp;
3571 }
3572
3573out:
1c5ebba9 3574 if (!domain)
932a6523 3575 dev_err(dev, "Allocating domain failed\n");
c57b260a
LB
3576 else
3577 domain->domain.type = IOMMU_DOMAIN_DMA;
1c5ebba9 3578
f76aec76
KA
3579 return domain;
3580}
3581
ecb509ec 3582/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3583static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3584{
98b2fffb 3585 int ret;
2c2e2c38 3586
3d89194a 3587 if (iommu_dummy(dev))
48b2c937 3588 return false;
1e4c64c4 3589
a11bfde9
JR
3590 if (unlikely(attach_deferred(dev)))
3591 do_deferred_attach(dev);
3592
98b2fffb
LB
3593 ret = identity_mapping(dev);
3594 if (ret) {
3595 u64 dma_mask = *dev->dma_mask;
3596
3597 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3598 dma_mask = dev->coherent_dma_mask;
3599
9c24eaf8 3600 if (dma_mask >= dma_direct_get_required_mask(dev))
48b2c937
CH
3601 return false;
3602
3603 /*
3604 * 32 bit DMA is removed from si_domain and fall back to
3605 * non-identity mapping.
3606 */
3607 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3608 ret = iommu_request_dma_domain_for_dev(dev);
3609 if (ret) {
3610 struct iommu_domain *domain;
3611 struct dmar_domain *dmar_domain;
3612
3613 domain = iommu_get_domain_for_dev(dev);
3614 if (domain) {
3615 dmar_domain = to_dmar_domain(domain);
3616 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3617 }
ae23bfb6 3618 dmar_remove_one_dev_info(dev);
4ec066c7 3619 get_private_domain_for_dev(dev);
2c2e2c38 3620 }
98b2fffb
LB
3621
3622 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3623 }
3624
48b2c937 3625 return true;
2c2e2c38
FY
3626}
3627
21d5d27c
LG
3628static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3629 size_t size, int dir, u64 dma_mask)
f76aec76 3630{
f76aec76 3631 struct dmar_domain *domain;
5b6985ce 3632 phys_addr_t start_paddr;
2aac6304 3633 unsigned long iova_pfn;
f76aec76 3634 int prot = 0;
6865f0d1 3635 int ret;
8c11e798 3636 struct intel_iommu *iommu;
33041ec0 3637 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3638
3639 BUG_ON(dir == DMA_NONE);
2c2e2c38 3640
96d170f3 3641 domain = find_domain(dev);
f76aec76 3642 if (!domain)
524a669b 3643 return DMA_MAPPING_ERROR;
f76aec76 3644
8c11e798 3645 iommu = domain_get_iommu(domain);
88cb6a74 3646 size = aligned_nrpages(paddr, size);
f76aec76 3647
2aac6304
OP
3648 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3649 if (!iova_pfn)
f76aec76
KA
3650 goto error;
3651
ba395927
KA
3652 /*
3653 * Check if DMAR supports zero-length reads on write only
3654 * mappings..
3655 */
3656 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3657 !cap_zlr(iommu->cap))
ba395927
KA
3658 prot |= DMA_PTE_READ;
3659 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3660 prot |= DMA_PTE_WRITE;
3661 /*
6865f0d1 3662 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3663 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3664 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3665 * is not a big problem
3666 */
2aac6304 3667 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3668 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3669 if (ret)
3670 goto error;
3671
2aac6304 3672 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246 3673 start_paddr += paddr & ~PAGE_MASK;
3b53034c
LB
3674
3675 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3676
03d6a246 3677 return start_paddr;
ba395927 3678
ba395927 3679error:
2aac6304 3680 if (iova_pfn)
22e2f9fa 3681 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3682 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3683 size, (unsigned long long)paddr, dir);
524a669b 3684 return DMA_MAPPING_ERROR;
ba395927
KA
3685}
3686
ffbbef5c
FT
3687static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3688 unsigned long offset, size_t size,
3689 enum dma_data_direction dir,
00085f1e 3690 unsigned long attrs)
bb9e6d65 3691{
9cc0c2af
CH
3692 if (iommu_need_mapping(dev))
3693 return __intel_map_single(dev, page_to_phys(page) + offset,
3694 size, dir, *dev->dma_mask);
3695 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3696}
3697
3698static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3699 size_t size, enum dma_data_direction dir,
3700 unsigned long attrs)
3701{
9cc0c2af
CH
3702 if (iommu_need_mapping(dev))
3703 return __intel_map_single(dev, phys_addr, size, dir,
3704 *dev->dma_mask);
3705 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3706}
3707
769530e4 3708static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3709{
f76aec76 3710 struct dmar_domain *domain;
d794dc9b 3711 unsigned long start_pfn, last_pfn;
769530e4 3712 unsigned long nrpages;
2aac6304 3713 unsigned long iova_pfn;
8c11e798 3714 struct intel_iommu *iommu;
ea8ea460 3715 struct page *freelist;
f7b0c4ce 3716 struct pci_dev *pdev = NULL;
ba395927 3717
1525a29a 3718 domain = find_domain(dev);
ba395927
KA
3719 BUG_ON(!domain);
3720
8c11e798
WH
3721 iommu = domain_get_iommu(domain);
3722
2aac6304 3723 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3724
769530e4 3725 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3726 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3727 last_pfn = start_pfn + nrpages - 1;
ba395927 3728
f7b0c4ce
LB
3729 if (dev_is_pci(dev))
3730 pdev = to_pci_dev(dev);
3731
ea8ea460 3732 freelist = domain_unmap(domain, start_pfn, last_pfn);
effa4678
DS
3733 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3734 !has_iova_flush_queue(&domain->iovad)) {
a1ddcbe9 3735 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3736 nrpages, !freelist, 0);
5e0d2a6f 3737 /* free iova */
22e2f9fa 3738 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3739 dma_free_pagelist(freelist);
5e0d2a6f 3740 } else {
13cf0174
JR
3741 queue_iova(&domain->iovad, iova_pfn, nrpages,
3742 (unsigned long)freelist);
5e0d2a6f 3743 /*
3744 * queue up the release of the unmap to save the 1/6th of the
3745 * cpu used up by the iotlb flush operation...
3746 */
5e0d2a6f 3747 }
3b53034c
LB
3748
3749 trace_unmap_single(dev, dev_addr, size);
ba395927
KA
3750}
3751
d41a4adb
JL
3752static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3753 size_t size, enum dma_data_direction dir,
00085f1e 3754 unsigned long attrs)
d41a4adb 3755{
9cc0c2af
CH
3756 if (iommu_need_mapping(dev))
3757 intel_unmap(dev, dev_addr, size);
3758 else
3759 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3760}
3761
3762static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3763 size_t size, enum dma_data_direction dir, unsigned long attrs)
3764{
3765 if (iommu_need_mapping(dev))
3766 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3767}
3768
5040a918 3769static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3770 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3771 unsigned long attrs)
ba395927 3772{
7ec916f8
CH
3773 struct page *page = NULL;
3774 int order;
ba395927 3775
9cc0c2af
CH
3776 if (!iommu_need_mapping(dev))
3777 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3778
7ec916f8
CH
3779 size = PAGE_ALIGN(size);
3780 order = get_order(size);
7ec916f8
CH
3781
3782 if (gfpflags_allow_blocking(flags)) {
3783 unsigned int count = size >> PAGE_SHIFT;
3784
d834c5ab
MS
3785 page = dma_alloc_from_contiguous(dev, count, order,
3786 flags & __GFP_NOWARN);
7ec916f8
CH
3787 }
3788
3789 if (!page)
3790 page = alloc_pages(flags, order);
3791 if (!page)
3792 return NULL;
3793 memset(page_address(page), 0, size);
3794
21d5d27c
LG
3795 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3796 DMA_BIDIRECTIONAL,
3797 dev->coherent_dma_mask);
524a669b 3798 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3799 return page_address(page);
3800 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3801 __free_pages(page, order);
36746436 3802
ba395927
KA
3803 return NULL;
3804}
3805
5040a918 3806static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3807 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3808{
7ec916f8
CH
3809 int order;
3810 struct page *page = virt_to_page(vaddr);
3811
9cc0c2af
CH
3812 if (!iommu_need_mapping(dev))
3813 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3814
7ec916f8
CH
3815 size = PAGE_ALIGN(size);
3816 order = get_order(size);
3817
3818 intel_unmap(dev, dma_handle, size);
3819 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3820 __free_pages(page, order);
ba395927
KA
3821}
3822
5040a918 3823static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3824 int nelems, enum dma_data_direction dir,
00085f1e 3825 unsigned long attrs)
ba395927 3826{
769530e4
OP
3827 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3828 unsigned long nrpages = 0;
3829 struct scatterlist *sg;
3830 int i;
3831
9cc0c2af
CH
3832 if (!iommu_need_mapping(dev))
3833 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3834
769530e4
OP
3835 for_each_sg(sglist, sg, nelems, i) {
3836 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3837 }
3838
3839 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3b53034c
LB
3840
3841 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3842}
3843
5040a918 3844static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3845 enum dma_data_direction dir, unsigned long attrs)
ba395927 3846{
ba395927 3847 int i;
ba395927 3848 struct dmar_domain *domain;
f76aec76
KA
3849 size_t size = 0;
3850 int prot = 0;
2aac6304 3851 unsigned long iova_pfn;
f76aec76 3852 int ret;
c03ab37c 3853 struct scatterlist *sg;
b536d24d 3854 unsigned long start_vpfn;
8c11e798 3855 struct intel_iommu *iommu;
ba395927
KA
3856
3857 BUG_ON(dir == DMA_NONE);
48b2c937 3858 if (!iommu_need_mapping(dev))
9cc0c2af 3859 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3860
96d170f3 3861 domain = find_domain(dev);
f76aec76
KA
3862 if (!domain)
3863 return 0;
3864
8c11e798
WH
3865 iommu = domain_get_iommu(domain);
3866
b536d24d 3867 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3868 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3869
2aac6304 3870 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3871 *dev->dma_mask);
2aac6304 3872 if (!iova_pfn) {
c03ab37c 3873 sglist->dma_length = 0;
f76aec76
KA
3874 return 0;
3875 }
3876
3877 /*
3878 * Check if DMAR supports zero-length reads on write only
3879 * mappings..
3880 */
3881 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3882 !cap_zlr(iommu->cap))
f76aec76
KA
3883 prot |= DMA_PTE_READ;
3884 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3885 prot |= DMA_PTE_WRITE;
3886
2aac6304 3887 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3888
f532959b 3889 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3890 if (unlikely(ret)) {
e1605495 3891 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3892 start_vpfn + size - 1,
3893 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3894 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3895 return 0;
ba395927
KA
3896 }
3897
984d03ad
LB
3898 for_each_sg(sglist, sg, nelems, i)
3899 trace_map_sg(dev, i + 1, nelems, sg);
3b53034c 3900
ba395927
KA
3901 return nelems;
3902}
3903
9c24eaf8
AS
3904static u64 intel_get_required_mask(struct device *dev)
3905{
3906 if (!iommu_need_mapping(dev))
3907 return dma_direct_get_required_mask(dev);
3908 return DMA_BIT_MASK(32);
3909}
3910
02b4da5f 3911static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3912 .alloc = intel_alloc_coherent,
3913 .free = intel_free_coherent,
ba395927
KA
3914 .map_sg = intel_map_sg,
3915 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3916 .map_page = intel_map_page,
3917 .unmap_page = intel_unmap_page,
21d5d27c 3918 .map_resource = intel_map_resource,
9cc0c2af 3919 .unmap_resource = intel_unmap_resource,
fec777c3 3920 .dma_supported = dma_direct_supported,
f9f3232a
CH
3921 .mmap = dma_common_mmap,
3922 .get_sgtable = dma_common_get_sgtable,
9c24eaf8 3923 .get_required_mask = intel_get_required_mask,
ba395927
KA
3924};
3925
cfb94a37
LB
3926static void
3927bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3928 enum dma_data_direction dir, enum dma_sync_target target)
3929{
3930 struct dmar_domain *domain;
3931 phys_addr_t tlb_addr;
3932
3933 domain = find_domain(dev);
3934 if (WARN_ON(!domain))
3935 return;
3936
3937 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3938 if (is_swiotlb_buffer(tlb_addr))
3939 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3940}
3941
3942static dma_addr_t
3943bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3944 enum dma_data_direction dir, unsigned long attrs,
3945 u64 dma_mask)
3946{
3947 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3948 struct dmar_domain *domain;
3949 struct intel_iommu *iommu;
3950 unsigned long iova_pfn;
3951 unsigned long nrpages;
3952 phys_addr_t tlb_addr;
3953 int prot = 0;
3954 int ret;
3955
a11bfde9
JR
3956 if (unlikely(attach_deferred(dev)))
3957 do_deferred_attach(dev);
3958
96d170f3 3959 domain = find_domain(dev);
a11bfde9 3960
cfb94a37
LB
3961 if (WARN_ON(dir == DMA_NONE || !domain))
3962 return DMA_MAPPING_ERROR;
3963
3964 iommu = domain_get_iommu(domain);
3965 if (WARN_ON(!iommu))
3966 return DMA_MAPPING_ERROR;
3967
3968 nrpages = aligned_nrpages(0, size);
3969 iova_pfn = intel_alloc_iova(dev, domain,
3970 dma_to_mm_pfn(nrpages), dma_mask);
3971 if (!iova_pfn)
3972 return DMA_MAPPING_ERROR;
3973
3974 /*
3975 * Check if DMAR supports zero-length reads on write only
3976 * mappings..
3977 */
3978 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3979 !cap_zlr(iommu->cap))
3980 prot |= DMA_PTE_READ;
3981 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3982 prot |= DMA_PTE_WRITE;
3983
3984 /*
3985 * If both the physical buffer start address and size are
3986 * page aligned, we don't need to use a bounce page.
3987 */
3988 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3989 tlb_addr = swiotlb_tbl_map_single(dev,
3990 __phys_to_dma(dev, io_tlb_start),
3991 paddr, size, aligned_size, dir, attrs);
3992 if (tlb_addr == DMA_MAPPING_ERROR) {
3993 goto swiotlb_error;
3994 } else {
3995 /* Cleanup the padding area. */
3996 void *padding_start = phys_to_virt(tlb_addr);
3997 size_t padding_size = aligned_size;
3998
3999 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
4000 (dir == DMA_TO_DEVICE ||
4001 dir == DMA_BIDIRECTIONAL)) {
4002 padding_start += size;
4003 padding_size -= size;
4004 }
4005
4006 memset(padding_start, 0, padding_size);
4007 }
4008 } else {
4009 tlb_addr = paddr;
4010 }
4011
4012 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
4013 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
4014 if (ret)
4015 goto mapping_error;
4016
4017 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
4018
4019 return (phys_addr_t)iova_pfn << PAGE_SHIFT;
4020
4021mapping_error:
4022 if (is_swiotlb_buffer(tlb_addr))
4023 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
4024 aligned_size, dir, attrs);
4025swiotlb_error:
4026 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
4027 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
4028 size, (unsigned long long)paddr, dir);
4029
4030 return DMA_MAPPING_ERROR;
4031}
4032
4033static void
4034bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
4035 enum dma_data_direction dir, unsigned long attrs)
4036{
4037 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
4038 struct dmar_domain *domain;
4039 phys_addr_t tlb_addr;
4040
4041 domain = find_domain(dev);
4042 if (WARN_ON(!domain))
4043 return;
4044
4045 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
4046 if (WARN_ON(!tlb_addr))
4047 return;
4048
4049 intel_unmap(dev, dev_addr, size);
4050 if (is_swiotlb_buffer(tlb_addr))
4051 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
4052 aligned_size, dir, attrs);
4053
4054 trace_bounce_unmap_single(dev, dev_addr, size);
4055}
4056
4057static dma_addr_t
4058bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
4059 size_t size, enum dma_data_direction dir, unsigned long attrs)
4060{
4061 return bounce_map_single(dev, page_to_phys(page) + offset,
4062 size, dir, attrs, *dev->dma_mask);
4063}
4064
4065static dma_addr_t
4066bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
4067 enum dma_data_direction dir, unsigned long attrs)
4068{
4069 return bounce_map_single(dev, phys_addr, size,
4070 dir, attrs, *dev->dma_mask);
4071}
4072
4073static void
4074bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
4075 enum dma_data_direction dir, unsigned long attrs)
4076{
4077 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
4078}
4079
4080static void
4081bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
4082 enum dma_data_direction dir, unsigned long attrs)
4083{
4084 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
4085}
4086
4087static void
4088bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
4089 enum dma_data_direction dir, unsigned long attrs)
4090{
4091 struct scatterlist *sg;
4092 int i;
4093
4094 for_each_sg(sglist, sg, nelems, i)
4095 bounce_unmap_page(dev, sg->dma_address,
4096 sg_dma_len(sg), dir, attrs);
4097}
4098
4099static int
4100bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
4101 enum dma_data_direction dir, unsigned long attrs)
4102{
4103 int i;
4104 struct scatterlist *sg;
4105
4106 for_each_sg(sglist, sg, nelems, i) {
4107 sg->dma_address = bounce_map_page(dev, sg_page(sg),
4108 sg->offset, sg->length,
4109 dir, attrs);
4110 if (sg->dma_address == DMA_MAPPING_ERROR)
4111 goto out_unmap;
4112 sg_dma_len(sg) = sg->length;
4113 }
4114
984d03ad
LB
4115 for_each_sg(sglist, sg, nelems, i)
4116 trace_bounce_map_sg(dev, i + 1, nelems, sg);
4117
cfb94a37
LB
4118 return nelems;
4119
4120out_unmap:
4121 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
4122 return 0;
4123}
4124
4125static void
4126bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
4127 size_t size, enum dma_data_direction dir)
4128{
4129 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
4130}
4131
4132static void
4133bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4134 size_t size, enum dma_data_direction dir)
4135{
4136 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4137}
4138
4139static void
4140bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4141 int nelems, enum dma_data_direction dir)
4142{
4143 struct scatterlist *sg;
4144 int i;
4145
4146 for_each_sg(sglist, sg, nelems, i)
4147 bounce_sync_single(dev, sg_dma_address(sg),
4148 sg_dma_len(sg), dir, SYNC_FOR_CPU);
4149}
4150
4151static void
4152bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4153 int nelems, enum dma_data_direction dir)
4154{
4155 struct scatterlist *sg;
4156 int i;
4157
4158 for_each_sg(sglist, sg, nelems, i)
4159 bounce_sync_single(dev, sg_dma_address(sg),
4160 sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4161}
4162
4163static const struct dma_map_ops bounce_dma_ops = {
4164 .alloc = intel_alloc_coherent,
4165 .free = intel_free_coherent,
4166 .map_sg = bounce_map_sg,
4167 .unmap_sg = bounce_unmap_sg,
4168 .map_page = bounce_map_page,
4169 .unmap_page = bounce_unmap_page,
4170 .sync_single_for_cpu = bounce_sync_single_for_cpu,
4171 .sync_single_for_device = bounce_sync_single_for_device,
4172 .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
4173 .sync_sg_for_device = bounce_sync_sg_for_device,
4174 .map_resource = bounce_map_resource,
4175 .unmap_resource = bounce_unmap_resource,
4176 .dma_supported = dma_direct_supported,
4177};
4178
ba395927
KA
4179static inline int iommu_domain_cache_init(void)
4180{
4181 int ret = 0;
4182
4183 iommu_domain_cache = kmem_cache_create("iommu_domain",
4184 sizeof(struct dmar_domain),
4185 0,
4186 SLAB_HWCACHE_ALIGN,
4187
4188 NULL);
4189 if (!iommu_domain_cache) {
9f10e5bf 4190 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
4191 ret = -ENOMEM;
4192 }
4193
4194 return ret;
4195}
4196
4197static inline int iommu_devinfo_cache_init(void)
4198{
4199 int ret = 0;
4200
4201 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4202 sizeof(struct device_domain_info),
4203 0,
4204 SLAB_HWCACHE_ALIGN,
ba395927
KA
4205 NULL);
4206 if (!iommu_devinfo_cache) {
9f10e5bf 4207 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
4208 ret = -ENOMEM;
4209 }
4210
4211 return ret;
4212}
4213
ba395927
KA
4214static int __init iommu_init_mempool(void)
4215{
4216 int ret;
ae1ff3d6 4217 ret = iova_cache_get();
ba395927
KA
4218 if (ret)
4219 return ret;
4220
4221 ret = iommu_domain_cache_init();
4222 if (ret)
4223 goto domain_error;
4224
4225 ret = iommu_devinfo_cache_init();
4226 if (!ret)
4227 return ret;
4228
4229 kmem_cache_destroy(iommu_domain_cache);
4230domain_error:
ae1ff3d6 4231 iova_cache_put();
ba395927
KA
4232
4233 return -ENOMEM;
4234}
4235
4236static void __init iommu_exit_mempool(void)
4237{
4238 kmem_cache_destroy(iommu_devinfo_cache);
4239 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 4240 iova_cache_put();
ba395927
KA
4241}
4242
556ab45f
DW
4243static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4244{
4245 struct dmar_drhd_unit *drhd;
4246 u32 vtbar;
4247 int rc;
4248
4249 /* We know that this device on this chipset has its own IOMMU.
4250 * If we find it under a different IOMMU, then the BIOS is lying
4251 * to us. Hope that the IOMMU for this device is actually
4252 * disabled, and it needs no translation...
4253 */
4254 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4255 if (rc) {
4256 /* "can't" happen */
4257 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4258 return;
4259 }
4260 vtbar &= 0xffff0000;
4261
4262 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4263 drhd = dmar_find_matched_drhd_unit(pdev);
81ee85d0
HG
4264 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
4265 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
4266 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
556ab45f 4267 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
81ee85d0 4268 }
556ab45f
DW
4269}
4270DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4271
ba395927
KA
4272static void __init init_no_remapping_devices(void)
4273{
4274 struct dmar_drhd_unit *drhd;
832bd858 4275 struct device *dev;
b683b230 4276 int i;
ba395927
KA
4277
4278 for_each_drhd_unit(drhd) {
4279 if (!drhd->include_all) {
b683b230
JL
4280 for_each_active_dev_scope(drhd->devices,
4281 drhd->devices_cnt, i, dev)
4282 break;
832bd858 4283 /* ignore DMAR unit if no devices exist */
ba395927
KA
4284 if (i == drhd->devices_cnt)
4285 drhd->ignored = 1;
4286 }
4287 }
4288
7c919779 4289 for_each_active_drhd_unit(drhd) {
7c919779 4290 if (drhd->include_all)
ba395927
KA
4291 continue;
4292
b683b230
JL
4293 for_each_active_dev_scope(drhd->devices,
4294 drhd->devices_cnt, i, dev)
832bd858 4295 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4296 break;
ba395927
KA
4297 if (i < drhd->devices_cnt)
4298 continue;
4299
c0771df8
DW
4300 /* This IOMMU has *only* gfx devices. Either bypass it or
4301 set the gfx_mapped flag, as appropriate */
cf1ec453 4302 if (!dmar_map_gfx) {
c0771df8 4303 drhd->ignored = 1;
b683b230
JL
4304 for_each_active_dev_scope(drhd->devices,
4305 drhd->devices_cnt, i, dev)
832bd858 4306 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4307 }
4308 }
4309}
4310
f59c7b69
FY
4311#ifdef CONFIG_SUSPEND
4312static int init_iommu_hw(void)
4313{
4314 struct dmar_drhd_unit *drhd;
4315 struct intel_iommu *iommu = NULL;
4316
4317 for_each_active_iommu(iommu, drhd)
4318 if (iommu->qi)
4319 dmar_reenable_qi(iommu);
4320
b779260b
JC
4321 for_each_iommu(iommu, drhd) {
4322 if (drhd->ignored) {
4323 /*
4324 * we always have to disable PMRs or DMA may fail on
4325 * this device
4326 */
4327 if (force_on)
4328 iommu_disable_protect_mem_regions(iommu);
4329 continue;
4330 }
095303e0 4331
f59c7b69
FY
4332 iommu_flush_write_buffer(iommu);
4333
4334 iommu_set_root_entry(iommu);
4335
4336 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4337 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4338 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4339 iommu_enable_translation(iommu);
b94996c9 4340 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4341 }
4342
4343 return 0;
4344}
4345
4346static void iommu_flush_all(void)
4347{
4348 struct dmar_drhd_unit *drhd;
4349 struct intel_iommu *iommu;
4350
4351 for_each_active_iommu(iommu, drhd) {
4352 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4353 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4354 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4355 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4356 }
4357}
4358
134fac3f 4359static int iommu_suspend(void)
f59c7b69
FY
4360{
4361 struct dmar_drhd_unit *drhd;
4362 struct intel_iommu *iommu = NULL;
4363 unsigned long flag;
4364
4365 for_each_active_iommu(iommu, drhd) {
6396bb22 4366 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4367 GFP_ATOMIC);
4368 if (!iommu->iommu_state)
4369 goto nomem;
4370 }
4371
4372 iommu_flush_all();
4373
4374 for_each_active_iommu(iommu, drhd) {
4375 iommu_disable_translation(iommu);
4376
1f5b3c3f 4377 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4378
4379 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4380 readl(iommu->reg + DMAR_FECTL_REG);
4381 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4382 readl(iommu->reg + DMAR_FEDATA_REG);
4383 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4384 readl(iommu->reg + DMAR_FEADDR_REG);
4385 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4386 readl(iommu->reg + DMAR_FEUADDR_REG);
4387
1f5b3c3f 4388 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4389 }
4390 return 0;
4391
4392nomem:
4393 for_each_active_iommu(iommu, drhd)
4394 kfree(iommu->iommu_state);
4395
4396 return -ENOMEM;
4397}
4398
134fac3f 4399static void iommu_resume(void)
f59c7b69
FY
4400{
4401 struct dmar_drhd_unit *drhd;
4402 struct intel_iommu *iommu = NULL;
4403 unsigned long flag;
4404
4405 if (init_iommu_hw()) {
b779260b
JC
4406 if (force_on)
4407 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4408 else
4409 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4410 return;
f59c7b69
FY
4411 }
4412
4413 for_each_active_iommu(iommu, drhd) {
4414
1f5b3c3f 4415 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4416
4417 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4418 iommu->reg + DMAR_FECTL_REG);
4419 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4420 iommu->reg + DMAR_FEDATA_REG);
4421 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4422 iommu->reg + DMAR_FEADDR_REG);
4423 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4424 iommu->reg + DMAR_FEUADDR_REG);
4425
1f5b3c3f 4426 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4427 }
4428
4429 for_each_active_iommu(iommu, drhd)
4430 kfree(iommu->iommu_state);
f59c7b69
FY
4431}
4432
134fac3f 4433static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4434 .resume = iommu_resume,
4435 .suspend = iommu_suspend,
4436};
4437
134fac3f 4438static void __init init_iommu_pm_ops(void)
f59c7b69 4439{
134fac3f 4440 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4441}
4442
4443#else
99592ba4 4444static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4445#endif /* CONFIG_PM */
4446
ce4cc52b
BR
4447static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
4448{
4449 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
4450 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
4451 rmrr->end_address <= rmrr->base_address ||
4452 arch_rmrr_sanity_check(rmrr))
4453 return -EINVAL;
4454
4455 return 0;
4456}
4457
c2a0b538 4458int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4459{
4460 struct acpi_dmar_reserved_memory *rmrr;
4461 struct dmar_rmrr_unit *rmrru;
f036c7fa
YC
4462
4463 rmrr = (struct acpi_dmar_reserved_memory *)header;
96788c7a
HG
4464 if (rmrr_sanity_check(rmrr)) {
4465 pr_warn(FW_BUG
f5a68bb0
BR
4466 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
4467 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4468 rmrr->base_address, rmrr->end_address,
4469 dmi_get_system_info(DMI_BIOS_VENDOR),
4470 dmi_get_system_info(DMI_BIOS_VERSION),
4471 dmi_get_system_info(DMI_PRODUCT_VERSION));
96788c7a
HG
4472 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4473 }
318fe7df
SS
4474
4475 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4476 if (!rmrru)
0659b8dc 4477 goto out;
318fe7df
SS
4478
4479 rmrru->hdr = header;
f036c7fa 4480
318fe7df
SS
4481 rmrru->base_address = rmrr->base_address;
4482 rmrru->end_address = rmrr->end_address;
0659b8dc 4483
2e455289
JL
4484 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4485 ((void *)rmrr) + rmrr->header.length,
4486 &rmrru->devices_cnt);
0659b8dc 4487 if (rmrru->devices_cnt && rmrru->devices == NULL)
5f64ce54 4488 goto free_rmrru;
318fe7df 4489
2e455289 4490 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4491
2e455289 4492 return 0;
0659b8dc
EA
4493free_rmrru:
4494 kfree(rmrru);
4495out:
4496 return -ENOMEM;
318fe7df
SS
4497}
4498
6b197249
JL
4499static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4500{
4501 struct dmar_atsr_unit *atsru;
4502 struct acpi_dmar_atsr *tmp;
4503
4504 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4505 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4506 if (atsr->segment != tmp->segment)
4507 continue;
4508 if (atsr->header.length != tmp->header.length)
4509 continue;
4510 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4511 return atsru;
4512 }
4513
4514 return NULL;
4515}
4516
4517int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4518{
4519 struct acpi_dmar_atsr *atsr;
4520 struct dmar_atsr_unit *atsru;
4521
b608fe35 4522 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4523 return 0;
4524
318fe7df 4525 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4526 atsru = dmar_find_atsr(atsr);
4527 if (atsru)
4528 return 0;
4529
4530 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4531 if (!atsru)
4532 return -ENOMEM;
4533
6b197249
JL
4534 /*
4535 * If memory is allocated from slab by ACPI _DSM method, we need to
4536 * copy the memory content because the memory buffer will be freed
4537 * on return.
4538 */
4539 atsru->hdr = (void *)(atsru + 1);
4540 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4541 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4542 if (!atsru->include_all) {
4543 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4544 (void *)atsr + atsr->header.length,
4545 &atsru->devices_cnt);
4546 if (atsru->devices_cnt && atsru->devices == NULL) {
4547 kfree(atsru);
4548 return -ENOMEM;
4549 }
4550 }
318fe7df 4551
0e242612 4552 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4553
4554 return 0;
4555}
4556
9bdc531e
JL
4557static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4558{
4559 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4560 kfree(atsru);
4561}
4562
6b197249
JL
4563int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4564{
4565 struct acpi_dmar_atsr *atsr;
4566 struct dmar_atsr_unit *atsru;
4567
4568 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4569 atsru = dmar_find_atsr(atsr);
4570 if (atsru) {
4571 list_del_rcu(&atsru->list);
4572 synchronize_rcu();
4573 intel_iommu_free_atsr(atsru);
4574 }
4575
4576 return 0;
4577}
4578
4579int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4580{
4581 int i;
4582 struct device *dev;
4583 struct acpi_dmar_atsr *atsr;
4584 struct dmar_atsr_unit *atsru;
4585
4586 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4587 atsru = dmar_find_atsr(atsr);
4588 if (!atsru)
4589 return 0;
4590
194dc870 4591 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4592 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4593 i, dev)
4594 return -EBUSY;
194dc870 4595 }
6b197249
JL
4596
4597 return 0;
4598}
4599
ffebeb46
JL
4600static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4601{
e083ea5b 4602 int sp, ret;
ffebeb46
JL
4603 struct intel_iommu *iommu = dmaru->iommu;
4604
4605 if (g_iommus[iommu->seq_id])
4606 return 0;
4607
4608 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4609 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4610 iommu->name);
4611 return -ENXIO;
4612 }
4613 if (!ecap_sc_support(iommu->ecap) &&
4614 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4615 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4616 iommu->name);
4617 return -ENXIO;
4618 }
64229e8f 4619 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
ffebeb46 4620 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4621 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4622 iommu->name);
4623 return -ENXIO;
4624 }
4625
4626 /*
4627 * Disable translation if already enabled prior to OS handover.
4628 */
4629 if (iommu->gcmd & DMA_GCMD_TE)
4630 iommu_disable_translation(iommu);
4631
4632 g_iommus[iommu->seq_id] = iommu;
4633 ret = iommu_init_domains(iommu);
4634 if (ret == 0)
4635 ret = iommu_alloc_root_entry(iommu);
4636 if (ret)
4637 goto out;
4638
ff3dc652 4639 intel_svm_check(iommu);
8a94ade4 4640
ffebeb46
JL
4641 if (dmaru->ignored) {
4642 /*
4643 * we always have to disable PMRs or DMA may fail on this device
4644 */
4645 if (force_on)
4646 iommu_disable_protect_mem_regions(iommu);
4647 return 0;
4648 }
4649
4650 intel_iommu_init_qi(iommu);
4651 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4652
4653#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4654 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4655 ret = intel_svm_enable_prq(iommu);
4656 if (ret)
4657 goto disable_iommu;
4658 }
4659#endif
ffebeb46
JL
4660 ret = dmar_set_interrupt(iommu);
4661 if (ret)
4662 goto disable_iommu;
4663
4664 iommu_set_root_entry(iommu);
4665 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4666 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4667 iommu_enable_translation(iommu);
4668
ffebeb46
JL
4669 iommu_disable_protect_mem_regions(iommu);
4670 return 0;
4671
4672disable_iommu:
4673 disable_dmar_iommu(iommu);
4674out:
4675 free_dmar_iommu(iommu);
4676 return ret;
4677}
4678
6b197249
JL
4679int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4680{
ffebeb46
JL
4681 int ret = 0;
4682 struct intel_iommu *iommu = dmaru->iommu;
4683
4684 if (!intel_iommu_enabled)
4685 return 0;
4686 if (iommu == NULL)
4687 return -EINVAL;
4688
4689 if (insert) {
4690 ret = intel_iommu_add(dmaru);
4691 } else {
4692 disable_dmar_iommu(iommu);
4693 free_dmar_iommu(iommu);
4694 }
4695
4696 return ret;
6b197249
JL
4697}
4698
9bdc531e
JL
4699static void intel_iommu_free_dmars(void)
4700{
4701 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4702 struct dmar_atsr_unit *atsru, *atsr_n;
4703
4704 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4705 list_del(&rmrru->list);
4706 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4707 kfree(rmrru);
318fe7df
SS
4708 }
4709
9bdc531e
JL
4710 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4711 list_del(&atsru->list);
4712 intel_iommu_free_atsr(atsru);
4713 }
318fe7df
SS
4714}
4715
4716int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4717{
b683b230 4718 int i, ret = 1;
318fe7df 4719 struct pci_bus *bus;
832bd858
DW
4720 struct pci_dev *bridge = NULL;
4721 struct device *tmp;
318fe7df
SS
4722 struct acpi_dmar_atsr *atsr;
4723 struct dmar_atsr_unit *atsru;
4724
4725 dev = pci_physfn(dev);
318fe7df 4726 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4727 bridge = bus->self;
d14053b3
DW
4728 /* If it's an integrated device, allow ATS */
4729 if (!bridge)
4730 return 1;
4731 /* Connected via non-PCIe: no ATS */
4732 if (!pci_is_pcie(bridge) ||
62f87c0e 4733 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4734 return 0;
d14053b3 4735 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4736 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4737 break;
318fe7df
SS
4738 }
4739
0e242612 4740 rcu_read_lock();
b5f82ddf
JL
4741 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4742 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4743 if (atsr->segment != pci_domain_nr(dev->bus))
4744 continue;
4745
b683b230 4746 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4747 if (tmp == &bridge->dev)
b683b230 4748 goto out;
b5f82ddf
JL
4749
4750 if (atsru->include_all)
b683b230 4751 goto out;
b5f82ddf 4752 }
b683b230
JL
4753 ret = 0;
4754out:
0e242612 4755 rcu_read_unlock();
318fe7df 4756
b683b230 4757 return ret;
318fe7df
SS
4758}
4759
59ce0515
JL
4760int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4761{
e083ea5b 4762 int ret;
59ce0515
JL
4763 struct dmar_rmrr_unit *rmrru;
4764 struct dmar_atsr_unit *atsru;
4765 struct acpi_dmar_atsr *atsr;
4766 struct acpi_dmar_reserved_memory *rmrr;
4767
b608fe35 4768 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4769 return 0;
4770
4771 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4772 rmrr = container_of(rmrru->hdr,
4773 struct acpi_dmar_reserved_memory, header);
4774 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4775 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4776 ((void *)rmrr) + rmrr->header.length,
4777 rmrr->segment, rmrru->devices,
4778 rmrru->devices_cnt);
e083ea5b 4779 if (ret < 0)
59ce0515 4780 return ret;
e6a8c9b3 4781 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4782 dmar_remove_dev_scope(info, rmrr->segment,
4783 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4784 }
4785 }
4786
4787 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4788 if (atsru->include_all)
4789 continue;
4790
4791 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4792 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4793 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4794 (void *)atsr + atsr->header.length,
4795 atsr->segment, atsru->devices,
4796 atsru->devices_cnt);
4797 if (ret > 0)
4798 break;
e083ea5b 4799 else if (ret < 0)
59ce0515 4800 return ret;
e6a8c9b3 4801 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4802 if (dmar_remove_dev_scope(info, atsr->segment,
4803 atsru->devices, atsru->devices_cnt))
4804 break;
4805 }
4806 }
4807
4808 return 0;
4809}
4810
75f05569
JL
4811static int intel_iommu_memory_notifier(struct notifier_block *nb,
4812 unsigned long val, void *v)
4813{
4814 struct memory_notify *mhp = v;
4815 unsigned long long start, end;
4816 unsigned long start_vpfn, last_vpfn;
4817
4818 switch (val) {
4819 case MEM_GOING_ONLINE:
4820 start = mhp->start_pfn << PAGE_SHIFT;
4821 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4822 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4823 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4824 start, end);
4825 return NOTIFY_BAD;
4826 }
4827 break;
4828
4829 case MEM_OFFLINE:
4830 case MEM_CANCEL_ONLINE:
4831 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4832 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4833 while (start_vpfn <= last_vpfn) {
4834 struct iova *iova;
4835 struct dmar_drhd_unit *drhd;
4836 struct intel_iommu *iommu;
ea8ea460 4837 struct page *freelist;
75f05569
JL
4838
4839 iova = find_iova(&si_domain->iovad, start_vpfn);
4840 if (iova == NULL) {
9f10e5bf 4841 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4842 start_vpfn);
4843 break;
4844 }
4845
4846 iova = split_and_remove_iova(&si_domain->iovad, iova,
4847 start_vpfn, last_vpfn);
4848 if (iova == NULL) {
9f10e5bf 4849 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4850 start_vpfn, last_vpfn);
4851 return NOTIFY_BAD;
4852 }
4853
ea8ea460
DW
4854 freelist = domain_unmap(si_domain, iova->pfn_lo,
4855 iova->pfn_hi);
4856
75f05569
JL
4857 rcu_read_lock();
4858 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4859 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4860 iova->pfn_lo, iova_size(iova),
ea8ea460 4861 !freelist, 0);
75f05569 4862 rcu_read_unlock();
ea8ea460 4863 dma_free_pagelist(freelist);
75f05569
JL
4864
4865 start_vpfn = iova->pfn_hi + 1;
4866 free_iova_mem(iova);
4867 }
4868 break;
4869 }
4870
4871 return NOTIFY_OK;
4872}
4873
4874static struct notifier_block intel_iommu_memory_nb = {
4875 .notifier_call = intel_iommu_memory_notifier,
4876 .priority = 0
4877};
4878
22e2f9fa
OP
4879static void free_all_cpu_cached_iovas(unsigned int cpu)
4880{
4881 int i;
4882
4883 for (i = 0; i < g_num_of_iommus; i++) {
4884 struct intel_iommu *iommu = g_iommus[i];
4885 struct dmar_domain *domain;
0caa7616 4886 int did;
22e2f9fa
OP
4887
4888 if (!iommu)
4889 continue;
4890
3bd4f911 4891 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4892 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4893
4894 if (!domain)
4895 continue;
4896 free_cpu_cached_iovas(cpu, &domain->iovad);
4897 }
4898 }
4899}
4900
21647615 4901static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4902{
21647615 4903 free_all_cpu_cached_iovas(cpu);
21647615 4904 return 0;
aa473240
OP
4905}
4906
161b28aa
JR
4907static void intel_disable_iommus(void)
4908{
4909 struct intel_iommu *iommu = NULL;
4910 struct dmar_drhd_unit *drhd;
4911
4912 for_each_iommu(iommu, drhd)
4913 iommu_disable_translation(iommu);
4914}
4915
6c3a44ed
DD
4916void intel_iommu_shutdown(void)
4917{
4918 struct dmar_drhd_unit *drhd;
4919 struct intel_iommu *iommu = NULL;
4920
4921 if (no_iommu || dmar_disabled)
4922 return;
4923
4924 down_write(&dmar_global_lock);
4925
4926 /* Disable PMRs explicitly here. */
4927 for_each_iommu(iommu, drhd)
4928 iommu_disable_protect_mem_regions(iommu);
4929
4930 /* Make sure the IOMMUs are switched off */
4931 intel_disable_iommus();
4932
4933 up_write(&dmar_global_lock);
4934}
4935
a7fdb6e6
JR
4936static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4937{
2926a2aa
JR
4938 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4939
4940 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4941}
4942
a5459cfe
AW
4943static ssize_t intel_iommu_show_version(struct device *dev,
4944 struct device_attribute *attr,
4945 char *buf)
4946{
a7fdb6e6 4947 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4948 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4949 return sprintf(buf, "%d:%d\n",
4950 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4951}
4952static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4953
4954static ssize_t intel_iommu_show_address(struct device *dev,
4955 struct device_attribute *attr,
4956 char *buf)
4957{
a7fdb6e6 4958 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4959 return sprintf(buf, "%llx\n", iommu->reg_phys);
4960}
4961static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4962
4963static ssize_t intel_iommu_show_cap(struct device *dev,
4964 struct device_attribute *attr,
4965 char *buf)
4966{
a7fdb6e6 4967 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4968 return sprintf(buf, "%llx\n", iommu->cap);
4969}
4970static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4971
4972static ssize_t intel_iommu_show_ecap(struct device *dev,
4973 struct device_attribute *attr,
4974 char *buf)
4975{
a7fdb6e6 4976 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4977 return sprintf(buf, "%llx\n", iommu->ecap);
4978}
4979static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4980
2238c082
AW
4981static ssize_t intel_iommu_show_ndoms(struct device *dev,
4982 struct device_attribute *attr,
4983 char *buf)
4984{
a7fdb6e6 4985 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4986 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4987}
4988static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4989
4990static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4991 struct device_attribute *attr,
4992 char *buf)
4993{
a7fdb6e6 4994 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4995 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4996 cap_ndoms(iommu->cap)));
4997}
4998static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4999
a5459cfe
AW
5000static struct attribute *intel_iommu_attrs[] = {
5001 &dev_attr_version.attr,
5002 &dev_attr_address.attr,
5003 &dev_attr_cap.attr,
5004 &dev_attr_ecap.attr,
2238c082
AW
5005 &dev_attr_domains_supported.attr,
5006 &dev_attr_domains_used.attr,
a5459cfe
AW
5007 NULL,
5008};
5009
5010static struct attribute_group intel_iommu_group = {
5011 .name = "intel-iommu",
5012 .attrs = intel_iommu_attrs,
5013};
5014
5015const struct attribute_group *intel_iommu_groups[] = {
5016 &intel_iommu_group,
5017 NULL,
5018};
5019
c5a5dc4c 5020static inline bool has_untrusted_dev(void)
89a6079d
LB
5021{
5022 struct pci_dev *pdev = NULL;
89a6079d 5023
c5a5dc4c
LB
5024 for_each_pci_dev(pdev)
5025 if (pdev->untrusted)
5026 return true;
89a6079d 5027
c5a5dc4c
LB
5028 return false;
5029}
89a6079d 5030
c5a5dc4c
LB
5031static int __init platform_optin_force_iommu(void)
5032{
5033 if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
89a6079d
LB
5034 return 0;
5035
5036 if (no_iommu || dmar_disabled)
5037 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
5038
5039 /*
5040 * If Intel-IOMMU is disabled by default, we will apply identity
5041 * map for all devices except those marked as being untrusted.
5042 */
5043 if (dmar_disabled)
b89b6605 5044 iommu_set_default_passthrough(false);
89a6079d
LB
5045
5046 dmar_disabled = 0;
89a6079d
LB
5047 no_iommu = 0;
5048
5049 return 1;
5050}
5051
fa212a97
LB
5052static int __init probe_acpi_namespace_devices(void)
5053{
5054 struct dmar_drhd_unit *drhd;
af88ec39
QC
5055 /* To avoid a -Wunused-but-set-variable warning. */
5056 struct intel_iommu *iommu __maybe_unused;
fa212a97
LB
5057 struct device *dev;
5058 int i, ret = 0;
5059
5060 for_each_active_iommu(iommu, drhd) {
5061 for_each_active_dev_scope(drhd->devices,
5062 drhd->devices_cnt, i, dev) {
5063 struct acpi_device_physical_node *pn;
5064 struct iommu_group *group;
5065 struct acpi_device *adev;
5066
5067 if (dev->bus != &acpi_bus_type)
5068 continue;
5069
5070 adev = to_acpi_device(dev);
5071 mutex_lock(&adev->physical_node_lock);
5072 list_for_each_entry(pn,
5073 &adev->physical_node_list, node) {
5074 group = iommu_group_get(pn->dev);
5075 if (group) {
5076 iommu_group_put(group);
5077 continue;
5078 }
5079
5080 pn->dev->bus->iommu_ops = &intel_iommu_ops;
5081 ret = iommu_probe_device(pn->dev);
5082 if (ret)
5083 break;
5084 }
5085 mutex_unlock(&adev->physical_node_lock);
5086
5087 if (ret)
5088 return ret;
5089 }
5090 }
5091
5092 return 0;
5093}
5094
ba395927
KA
5095int __init intel_iommu_init(void)
5096{
9bdc531e 5097 int ret = -ENODEV;
3a93c841 5098 struct dmar_drhd_unit *drhd;
7c919779 5099 struct intel_iommu *iommu;
ba395927 5100
89a6079d
LB
5101 /*
5102 * Intel IOMMU is required for a TXT/tboot launch or platform
5103 * opt in, so enforce that.
5104 */
5105 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 5106
3a5670e8
JL
5107 if (iommu_init_mempool()) {
5108 if (force_on)
5109 panic("tboot: Failed to initialize iommu memory\n");
5110 return -ENOMEM;
5111 }
5112
5113 down_write(&dmar_global_lock);
a59b50e9
JC
5114 if (dmar_table_init()) {
5115 if (force_on)
5116 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 5117 goto out_free_dmar;
a59b50e9 5118 }
ba395927 5119
c2c7286a 5120 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
5121 if (force_on)
5122 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 5123 goto out_free_dmar;
a59b50e9 5124 }
1886e8a9 5125
ec154bf5
JR
5126 up_write(&dmar_global_lock);
5127
5128 /*
5129 * The bus notifier takes the dmar_global_lock, so lockdep will
5130 * complain later when we register it under the lock.
5131 */
5132 dmar_register_bus_notifier();
5133
5134 down_write(&dmar_global_lock);
5135
1da8347d
MD
5136 if (!no_iommu)
5137 intel_iommu_debugfs_init();
5138
161b28aa 5139 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
5140 /*
5141 * We exit the function here to ensure IOMMU's remapping and
5142 * mempool aren't setup, which means that the IOMMU's PMRs
5143 * won't be disabled via the call to init_dmars(). So disable
5144 * it explicitly here. The PMRs were setup by tboot prior to
5145 * calling SENTER, but the kernel is expected to reset/tear
5146 * down the PMRs.
5147 */
5148 if (intel_iommu_tboot_noforce) {
5149 for_each_iommu(iommu, drhd)
5150 iommu_disable_protect_mem_regions(iommu);
5151 }
5152
161b28aa
JR
5153 /*
5154 * Make sure the IOMMUs are switched off, even when we
5155 * boot into a kexec kernel and the previous kernel left
5156 * them enabled
5157 */
5158 intel_disable_iommus();
9bdc531e 5159 goto out_free_dmar;
161b28aa 5160 }
2ae21010 5161
318fe7df 5162 if (list_empty(&dmar_rmrr_units))
9f10e5bf 5163 pr_info("No RMRR found\n");
318fe7df
SS
5164
5165 if (list_empty(&dmar_atsr_units))
9f10e5bf 5166 pr_info("No ATSR found\n");
318fe7df 5167
51a63e67
JC
5168 if (dmar_init_reserved_ranges()) {
5169 if (force_on)
5170 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 5171 goto out_free_reserved_range;
51a63e67 5172 }
ba395927 5173
cf1ec453
LB
5174 if (dmar_map_gfx)
5175 intel_iommu_gfx_mapped = 1;
5176
ba395927
KA
5177 init_no_remapping_devices();
5178
b779260b 5179 ret = init_dmars();
ba395927 5180 if (ret) {
a59b50e9
JC
5181 if (force_on)
5182 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 5183 pr_err("Initialization failed\n");
9bdc531e 5184 goto out_free_reserved_range;
ba395927 5185 }
3a5670e8 5186 up_write(&dmar_global_lock);
ba395927 5187
4fac8076 5188#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
c5a5dc4c
LB
5189 /*
5190 * If the system has no untrusted device or the user has decided
5191 * to disable the bounce page mechanisms, we don't need swiotlb.
5192 * Mark this and the pre-allocated bounce pages will be released
5193 * later.
5194 */
5195 if (!has_untrusted_dev() || intel_no_bounce)
5196 swiotlb = 0;
75f1cdf1 5197#endif
19943b0e 5198 dma_ops = &intel_dma_ops;
4ed0d3e6 5199
134fac3f 5200 init_iommu_pm_ops();
a8bcbb0d 5201
2d48ea0e 5202 down_read(&dmar_global_lock);
39ab9555
JR
5203 for_each_active_iommu(iommu, drhd) {
5204 iommu_device_sysfs_add(&iommu->iommu, NULL,
5205 intel_iommu_groups,
5206 "%s", iommu->name);
5207 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5208 iommu_device_register(&iommu->iommu);
5209 }
2d48ea0e 5210 up_read(&dmar_global_lock);
a5459cfe 5211
4236d97d 5212 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
75f05569
JL
5213 if (si_domain && !hw_pass_through)
5214 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
5215 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5216 intel_iommu_cpu_dead);
d8190dc6 5217
d5692d4a 5218 down_read(&dmar_global_lock);
fa212a97
LB
5219 if (probe_acpi_namespace_devices())
5220 pr_warn("ACPI name space devices didn't probe correctly\n");
5221
d8190dc6
LB
5222 /* Finally, we enable the DMA remapping hardware. */
5223 for_each_iommu(iommu, drhd) {
6a8c6748 5224 if (!drhd->ignored && !translation_pre_enabled(iommu))
d8190dc6
LB
5225 iommu_enable_translation(iommu);
5226
5227 iommu_disable_protect_mem_regions(iommu);
5228 }
2d48ea0e
QC
5229 up_read(&dmar_global_lock);
5230
d8190dc6
LB
5231 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5232
8bc1f85c
ED
5233 intel_iommu_enabled = 1;
5234
ba395927 5235 return 0;
9bdc531e
JL
5236
5237out_free_reserved_range:
5238 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
5239out_free_dmar:
5240 intel_iommu_free_dmars();
3a5670e8
JL
5241 up_write(&dmar_global_lock);
5242 iommu_exit_mempool();
9bdc531e 5243 return ret;
ba395927 5244}
e820482c 5245
0ce4a85f
LB
5246static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5247{
5248 struct intel_iommu *iommu = opaque;
5249
5250 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5251 return 0;
5252}
5253
5254/*
5255 * NB - intel-iommu lacks any sort of reference counting for the users of
5256 * dependent devices. If multiple endpoints have intersecting dependent
5257 * devices, unbinding the driver from any one of them will possibly leave
5258 * the others unable to operate.
5259 */
5260static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5261{
5262 if (!iommu || !dev || !dev_is_pci(dev))
5263 return;
5264
5265 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5266}
5267
127c7615 5268static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 5269{
942067f1 5270 struct dmar_domain *domain;
c7151a8d
WH
5271 struct intel_iommu *iommu;
5272 unsigned long flags;
c7151a8d 5273
55d94043
JR
5274 assert_spin_locked(&device_domain_lock);
5275
127c7615 5276 if (WARN_ON(!info))
c7151a8d
WH
5277 return;
5278
127c7615 5279 iommu = info->iommu;
942067f1 5280 domain = info->domain;
c7151a8d 5281
127c7615 5282 if (info->dev) {
ef848b7e
LB
5283 if (dev_is_pci(info->dev) && sm_supported(iommu))
5284 intel_pasid_tear_down_entry(iommu, info->dev,
5285 PASID_RID2PASID);
5286
127c7615 5287 iommu_disable_dev_iotlb(info);
0ce4a85f 5288 domain_context_clear(iommu, info->dev);
a7fc93fe 5289 intel_pasid_free_table(info->dev);
127c7615 5290 }
c7151a8d 5291
b608ac3b 5292 unlink_domain_info(info);
c7151a8d 5293
d160aca5 5294 spin_lock_irqsave(&iommu->lock, flags);
942067f1 5295 domain_detach_iommu(domain, iommu);
d160aca5 5296 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 5297
942067f1
LB
5298 /* free the private domain */
5299 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
3a18844d
LB
5300 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5301 list_empty(&domain->devices))
942067f1
LB
5302 domain_exit(info->domain);
5303
127c7615 5304 free_devinfo_mem(info);
c7151a8d 5305}
c7151a8d 5306
71753239 5307static void dmar_remove_one_dev_info(struct device *dev)
55d94043 5308{
127c7615 5309 struct device_domain_info *info;
55d94043 5310 unsigned long flags;
3e7abe25 5311
55d94043 5312 spin_lock_irqsave(&device_domain_lock, flags);
127c7615 5313 info = dev->archdata.iommu;
bf708cfb
JS
5314 if (info && info != DEFER_DEVICE_DOMAIN_INFO
5315 && info != DUMMY_DEVICE_DOMAIN_INFO)
ae23bfb6 5316 __dmar_remove_one_dev_info(info);
55d94043 5317 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5318}
5319
301e7ee1
JR
5320static int md_domain_init(struct dmar_domain *domain, int guest_width)
5321{
5322 int adjust_width;
5323
5324 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5325 domain_reserve_special_ranges(domain);
5326
5327 /* calculate AGAW */
5328 domain->gaw = guest_width;
5329 adjust_width = guestwidth_to_adjustwidth(guest_width);
5330 domain->agaw = width_to_agaw(adjust_width);
5331
5332 domain->iommu_coherency = 0;
5333 domain->iommu_snooping = 0;
5334 domain->iommu_superpage = 0;
5335 domain->max_addr = 0;
5336
5337 /* always allocate the top pgd */
5338 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5339 if (!domain->pgd)
5340 return -ENOMEM;
5341 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5342 return 0;
5343}
5344
00a77deb 5345static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5346{
5d450806 5347 struct dmar_domain *dmar_domain;
00a77deb 5348 struct iommu_domain *domain;
10f8008f 5349 int ret;
00a77deb 5350
4de354ec 5351 switch (type) {
fa954e68
LB
5352 case IOMMU_DOMAIN_DMA:
5353 /* fallthrough */
4de354ec 5354 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 5355 dmar_domain = alloc_domain(0);
4de354ec
LB
5356 if (!dmar_domain) {
5357 pr_err("Can't allocate dmar_domain\n");
5358 return NULL;
5359 }
301e7ee1 5360 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4de354ec
LB
5361 pr_err("Domain initialization failed\n");
5362 domain_exit(dmar_domain);
5363 return NULL;
5364 }
fa954e68 5365
10f8008f
LB
5366 if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
5367 ret = init_iova_flush_queue(&dmar_domain->iovad,
5368 iommu_flush_iova,
5369 iova_entry_free);
8e3391cf
LB
5370 if (ret)
5371 pr_info("iova flush queue initialization failed\n");
fa954e68
LB
5372 }
5373
4de354ec 5374 domain_update_iommu_cap(dmar_domain);
38717946 5375
4de354ec
LB
5376 domain = &dmar_domain->domain;
5377 domain->geometry.aperture_start = 0;
5378 domain->geometry.aperture_end =
5379 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5380 domain->geometry.force_aperture = true;
5381
5382 return domain;
5383 case IOMMU_DOMAIN_IDENTITY:
5384 return &si_domain->domain;
5385 default:
00a77deb 5386 return NULL;
38717946 5387 }
8a0e715b 5388
4de354ec 5389 return NULL;
38717946 5390}
38717946 5391
00a77deb 5392static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5393{
4de354ec
LB
5394 if (domain != &si_domain->domain)
5395 domain_exit(to_dmar_domain(domain));
38717946 5396}
38717946 5397
67b8e02b
LB
5398/*
5399 * Check whether a @domain could be attached to the @dev through the
5400 * aux-domain attach/detach APIs.
5401 */
5402static inline bool
5403is_aux_domain(struct device *dev, struct iommu_domain *domain)
5404{
5405 struct device_domain_info *info = dev->archdata.iommu;
5406
5407 return info && info->auxd_enabled &&
5408 domain->type == IOMMU_DOMAIN_UNMANAGED;
5409}
5410
5411static void auxiliary_link_device(struct dmar_domain *domain,
5412 struct device *dev)
5413{
5414 struct device_domain_info *info = dev->archdata.iommu;
5415
5416 assert_spin_locked(&device_domain_lock);
5417 if (WARN_ON(!info))
5418 return;
5419
5420 domain->auxd_refcnt++;
5421 list_add(&domain->auxd, &info->auxiliary_domains);
5422}
5423
5424static void auxiliary_unlink_device(struct dmar_domain *domain,
5425 struct device *dev)
5426{
5427 struct device_domain_info *info = dev->archdata.iommu;
5428
5429 assert_spin_locked(&device_domain_lock);
5430 if (WARN_ON(!info))
5431 return;
5432
5433 list_del(&domain->auxd);
5434 domain->auxd_refcnt--;
5435
5436 if (!domain->auxd_refcnt && domain->default_pasid > 0)
59a62337 5437 ioasid_free(domain->default_pasid);
67b8e02b
LB
5438}
5439
5440static int aux_domain_add_dev(struct dmar_domain *domain,
5441 struct device *dev)
5442{
5443 int ret;
5444 u8 bus, devfn;
5445 unsigned long flags;
5446 struct intel_iommu *iommu;
5447
5448 iommu = device_to_iommu(dev, &bus, &devfn);
5449 if (!iommu)
5450 return -ENODEV;
5451
5452 if (domain->default_pasid <= 0) {
5453 int pasid;
5454
59a62337
JP
5455 /* No private data needed for the default pasid */
5456 pasid = ioasid_alloc(NULL, PASID_MIN,
5457 pci_max_pasids(to_pci_dev(dev)) - 1,
5458 NULL);
5459 if (pasid == INVALID_IOASID) {
67b8e02b
LB
5460 pr_err("Can't allocate default pasid\n");
5461 return -ENODEV;
5462 }
5463 domain->default_pasid = pasid;
5464 }
5465
5466 spin_lock_irqsave(&device_domain_lock, flags);
5467 /*
5468 * iommu->lock must be held to attach domain to iommu and setup the
5469 * pasid entry for second level translation.
5470 */
5471 spin_lock(&iommu->lock);
5472 ret = domain_attach_iommu(domain, iommu);
5473 if (ret)
5474 goto attach_failed;
5475
5476 /* Setup the PASID entry for mediated devices: */
ddf09b6d
LB
5477 if (domain_use_first_level(domain))
5478 ret = domain_setup_first_level(iommu, domain, dev,
5479 domain->default_pasid);
5480 else
5481 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5482 domain->default_pasid);
67b8e02b
LB
5483 if (ret)
5484 goto table_failed;
5485 spin_unlock(&iommu->lock);
5486
5487 auxiliary_link_device(domain, dev);
5488
5489 spin_unlock_irqrestore(&device_domain_lock, flags);
5490
5491 return 0;
5492
5493table_failed:
5494 domain_detach_iommu(domain, iommu);
5495attach_failed:
5496 spin_unlock(&iommu->lock);
5497 spin_unlock_irqrestore(&device_domain_lock, flags);
5498 if (!domain->auxd_refcnt && domain->default_pasid > 0)
59a62337 5499 ioasid_free(domain->default_pasid);
67b8e02b
LB
5500
5501 return ret;
5502}
5503
5504static void aux_domain_remove_dev(struct dmar_domain *domain,
5505 struct device *dev)
5506{
5507 struct device_domain_info *info;
5508 struct intel_iommu *iommu;
5509 unsigned long flags;
5510
5511 if (!is_aux_domain(dev, &domain->domain))
5512 return;
5513
5514 spin_lock_irqsave(&device_domain_lock, flags);
5515 info = dev->archdata.iommu;
5516 iommu = info->iommu;
5517
5518 auxiliary_unlink_device(domain, dev);
5519
5520 spin_lock(&iommu->lock);
5521 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5522 domain_detach_iommu(domain, iommu);
5523 spin_unlock(&iommu->lock);
5524
5525 spin_unlock_irqrestore(&device_domain_lock, flags);
5526}
5527
8cc3759a
LB
5528static int prepare_domain_attach_device(struct iommu_domain *domain,
5529 struct device *dev)
38717946 5530{
00a77deb 5531 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5532 struct intel_iommu *iommu;
5533 int addr_width;
156baca8 5534 u8 bus, devfn;
faa3d6f5 5535
156baca8 5536 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5537 if (!iommu)
5538 return -ENODEV;
5539
5540 /* check if this iommu agaw is sufficient for max mapped address */
5541 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5542 if (addr_width > cap_mgaw(iommu->cap))
5543 addr_width = cap_mgaw(iommu->cap);
5544
5545 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5546 dev_err(dev, "%s: iommu width (%d) is not "
5547 "sufficient for the mapped address (%llx)\n",
5548 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5549 return -EFAULT;
5550 }
a99c47a2
TL
5551 dmar_domain->gaw = addr_width;
5552
5553 /*
5554 * Knock out extra levels of page tables if necessary
5555 */
5556 while (iommu->agaw < dmar_domain->agaw) {
5557 struct dma_pte *pte;
5558
5559 pte = dmar_domain->pgd;
5560 if (dma_pte_present(pte)) {
25cbff16
SY
5561 dmar_domain->pgd = (struct dma_pte *)
5562 phys_to_virt(dma_pte_addr(pte));
7a661013 5563 free_pgtable_page(pte);
a99c47a2
TL
5564 }
5565 dmar_domain->agaw--;
5566 }
fe40f1e0 5567
8cc3759a
LB
5568 return 0;
5569}
5570
5571static int intel_iommu_attach_device(struct iommu_domain *domain,
5572 struct device *dev)
5573{
5574 int ret;
5575
5679582c
LB
5576 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5577 device_is_rmrr_locked(dev)) {
8cc3759a
LB
5578 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5579 return -EPERM;
5580 }
5581
67b8e02b
LB
5582 if (is_aux_domain(dev, domain))
5583 return -EPERM;
5584
8cc3759a
LB
5585 /* normally dev is not mapped */
5586 if (unlikely(domain_context_mapped(dev))) {
5587 struct dmar_domain *old_domain;
5588
5589 old_domain = find_domain(dev);
fa954e68 5590 if (old_domain)
8cc3759a 5591 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5592 }
5593
5594 ret = prepare_domain_attach_device(domain, dev);
5595 if (ret)
5596 return ret;
5597
5598 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5599}
38717946 5600
67b8e02b
LB
5601static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5602 struct device *dev)
5603{
5604 int ret;
5605
5606 if (!is_aux_domain(dev, domain))
5607 return -EPERM;
5608
5609 ret = prepare_domain_attach_device(domain, dev);
5610 if (ret)
5611 return ret;
5612
5613 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5614}
5615
4c5478c9
JR
5616static void intel_iommu_detach_device(struct iommu_domain *domain,
5617 struct device *dev)
38717946 5618{
71753239 5619 dmar_remove_one_dev_info(dev);
faa3d6f5 5620}
c7151a8d 5621
67b8e02b
LB
5622static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5623 struct device *dev)
5624{
5625 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5626}
5627
b146a1c9
JR
5628static int intel_iommu_map(struct iommu_domain *domain,
5629 unsigned long iova, phys_addr_t hpa,
781ca2de 5630 size_t size, int iommu_prot, gfp_t gfp)
faa3d6f5 5631{
00a77deb 5632 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5633 u64 max_addr;
dde57a21 5634 int prot = 0;
faa3d6f5 5635 int ret;
fe40f1e0 5636
dde57a21
JR
5637 if (iommu_prot & IOMMU_READ)
5638 prot |= DMA_PTE_READ;
5639 if (iommu_prot & IOMMU_WRITE)
5640 prot |= DMA_PTE_WRITE;
9cf06697
SY
5641 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5642 prot |= DMA_PTE_SNP;
dde57a21 5643
163cc52c 5644 max_addr = iova + size;
dde57a21 5645 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5646 u64 end;
5647
5648 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5649 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5650 if (end < max_addr) {
9f10e5bf 5651 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5652 "sufficient for the mapped address (%llx)\n",
8954da1f 5653 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5654 return -EFAULT;
5655 }
dde57a21 5656 dmar_domain->max_addr = max_addr;
fe40f1e0 5657 }
ad051221
DW
5658 /* Round up size to next multiple of PAGE_SIZE, if it and
5659 the low bits of hpa would take us onto the next page */
88cb6a74 5660 size = aligned_nrpages(hpa, size);
ad051221
DW
5661 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5662 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5663 return ret;
38717946 5664}
38717946 5665
5009065d 5666static size_t intel_iommu_unmap(struct iommu_domain *domain,
56f8af5e
WD
5667 unsigned long iova, size_t size,
5668 struct iommu_iotlb_gather *gather)
38717946 5669{
00a77deb 5670 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5671 struct page *freelist = NULL;
ea8ea460
DW
5672 unsigned long start_pfn, last_pfn;
5673 unsigned int npages;
42e8c186 5674 int iommu_id, level = 0;
5cf0a76f
DW
5675
5676 /* Cope with horrid API which requires us to unmap more than the
5677 size argument if it happens to be a large-page mapping. */
dc02e46e 5678 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5679
5680 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5681 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5682
ea8ea460
DW
5683 start_pfn = iova >> VTD_PAGE_SHIFT;
5684 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5685
5686 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5687
5688 npages = last_pfn - start_pfn + 1;
5689
f746a025 5690 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5691 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5692 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5693
5694 dma_free_pagelist(freelist);
fe40f1e0 5695
163cc52c
DW
5696 if (dmar_domain->max_addr == iova + size)
5697 dmar_domain->max_addr = iova;
b146a1c9 5698
5cf0a76f 5699 return size;
38717946 5700}
38717946 5701
d14d6577 5702static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5703 dma_addr_t iova)
38717946 5704{
00a77deb 5705 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5706 struct dma_pte *pte;
5cf0a76f 5707 int level = 0;
faa3d6f5 5708 u64 phys = 0;
38717946 5709
5cf0a76f 5710 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
77a1bce8
YH
5711 if (pte && dma_pte_present(pte))
5712 phys = dma_pte_addr(pte) +
5713 (iova & (BIT_MASK(level_to_offset_bits(level) +
5714 VTD_PAGE_SHIFT) - 1));
38717946 5715
faa3d6f5 5716 return phys;
38717946 5717}
a8bcbb0d 5718
95587a75
LB
5719static inline bool scalable_mode_support(void)
5720{
5721 struct dmar_drhd_unit *drhd;
5722 struct intel_iommu *iommu;
5723 bool ret = true;
5724
5725 rcu_read_lock();
5726 for_each_active_iommu(iommu, drhd) {
5727 if (!sm_supported(iommu)) {
5728 ret = false;
5729 break;
5730 }
5731 }
5732 rcu_read_unlock();
5733
5734 return ret;
5735}
5736
5737static inline bool iommu_pasid_support(void)
5738{
5739 struct dmar_drhd_unit *drhd;
5740 struct intel_iommu *iommu;
5741 bool ret = true;
5742
5743 rcu_read_lock();
5744 for_each_active_iommu(iommu, drhd) {
5745 if (!pasid_supported(iommu)) {
5746 ret = false;
5747 break;
5748 }
5749 }
5750 rcu_read_unlock();
5751
5752 return ret;
5753}
5754
2cd1311a
LB
5755static inline bool nested_mode_support(void)
5756{
5757 struct dmar_drhd_unit *drhd;
5758 struct intel_iommu *iommu;
5759 bool ret = true;
5760
5761 rcu_read_lock();
5762 for_each_active_iommu(iommu, drhd) {
5763 if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
5764 ret = false;
5765 break;
5766 }
5767 }
5768 rcu_read_unlock();
5769
5770 return ret;
5771}
5772
5d587b8d 5773static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5774{
dbb9fd86 5775 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5776 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5777 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5778 return irq_remapping_enabled == 1;
dbb9fd86 5779
5d587b8d 5780 return false;
dbb9fd86
SY
5781}
5782
abdfdde2
AW
5783static int intel_iommu_add_device(struct device *dev)
5784{
942067f1
LB
5785 struct dmar_domain *dmar_domain;
5786 struct iommu_domain *domain;
a5459cfe 5787 struct intel_iommu *iommu;
abdfdde2 5788 struct iommu_group *group;
156baca8 5789 u8 bus, devfn;
942067f1 5790 int ret;
70ae6f0d 5791
a5459cfe
AW
5792 iommu = device_to_iommu(dev, &bus, &devfn);
5793 if (!iommu)
70ae6f0d
AW
5794 return -ENODEV;
5795
e3d10af1 5796 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5797
8af46c78
LB
5798 if (translation_pre_enabled(iommu))
5799 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5800
e17f9ff4 5801 group = iommu_group_get_for_dev(dev);
783f157b 5802
f78947c4
JD
5803 if (IS_ERR(group)) {
5804 ret = PTR_ERR(group);
5805 goto unlink;
5806 }
bcb71abe 5807
abdfdde2 5808 iommu_group_put(group);
942067f1
LB
5809
5810 domain = iommu_get_domain_for_dev(dev);
5811 dmar_domain = to_dmar_domain(domain);
5812 if (domain->type == IOMMU_DOMAIN_DMA) {
0e31a726 5813 if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
942067f1
LB
5814 ret = iommu_request_dm_for_dev(dev);
5815 if (ret) {
ae23bfb6 5816 dmar_remove_one_dev_info(dev);
942067f1
LB
5817 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5818 domain_add_dev_info(si_domain, dev);
5819 dev_info(dev,
5820 "Device uses a private identity domain.\n");
942067f1 5821 }
942067f1
LB
5822 }
5823 } else {
0e31a726 5824 if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
942067f1
LB
5825 ret = iommu_request_dma_domain_for_dev(dev);
5826 if (ret) {
ae23bfb6 5827 dmar_remove_one_dev_info(dev);
942067f1 5828 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
4ec066c7 5829 if (!get_private_domain_for_dev(dev)) {
942067f1
LB
5830 dev_warn(dev,
5831 "Failed to get a private domain.\n");
f78947c4
JD
5832 ret = -ENOMEM;
5833 goto unlink;
942067f1
LB
5834 }
5835
5836 dev_info(dev,
5837 "Device uses a private dma domain.\n");
942067f1 5838 }
942067f1
LB
5839 }
5840 }
5841
cfb94a37
LB
5842 if (device_needs_bounce(dev)) {
5843 dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5844 set_dma_ops(dev, &bounce_dma_ops);
5845 }
5846
e17f9ff4 5847 return 0;
f78947c4
JD
5848
5849unlink:
5850 iommu_device_unlink(&iommu->iommu, dev);
5851 return ret;
abdfdde2 5852}
70ae6f0d 5853
abdfdde2
AW
5854static void intel_iommu_remove_device(struct device *dev)
5855{
a5459cfe
AW
5856 struct intel_iommu *iommu;
5857 u8 bus, devfn;
5858
5859 iommu = device_to_iommu(dev, &bus, &devfn);
5860 if (!iommu)
5861 return;
5862
458b7c8e
LB
5863 dmar_remove_one_dev_info(dev);
5864
abdfdde2 5865 iommu_group_remove_device(dev);
a5459cfe 5866
e3d10af1 5867 iommu_device_unlink(&iommu->iommu, dev);
cfb94a37
LB
5868
5869 if (device_needs_bounce(dev))
5870 set_dma_ops(dev, NULL);
70ae6f0d
AW
5871}
5872
0659b8dc
EA
5873static void intel_iommu_get_resv_regions(struct device *device,
5874 struct list_head *head)
5875{
5f64ce54 5876 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
0659b8dc
EA
5877 struct iommu_resv_region *reg;
5878 struct dmar_rmrr_unit *rmrr;
5879 struct device *i_dev;
5880 int i;
5881
5f64ce54 5882 down_read(&dmar_global_lock);
0659b8dc
EA
5883 for_each_rmrr_units(rmrr) {
5884 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5885 i, i_dev) {
5f64ce54 5886 struct iommu_resv_region *resv;
1c5c59fb 5887 enum iommu_resv_type type;
5f64ce54
EA
5888 size_t length;
5889
3855ba2d
EA
5890 if (i_dev != device &&
5891 !is_downstream_to_pci_bridge(device, i_dev))
0659b8dc
EA
5892 continue;
5893
5f64ce54 5894 length = rmrr->end_address - rmrr->base_address + 1;
1c5c59fb
EA
5895
5896 type = device_rmrr_is_relaxable(device) ?
5897 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5898
5f64ce54 5899 resv = iommu_alloc_resv_region(rmrr->base_address,
1c5c59fb 5900 length, prot, type);
5f64ce54
EA
5901 if (!resv)
5902 break;
5903
5904 list_add_tail(&resv->list, head);
0659b8dc
EA
5905 }
5906 }
5f64ce54 5907 up_read(&dmar_global_lock);
0659b8dc 5908
d850c2ee
LB
5909#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5910 if (dev_is_pci(device)) {
5911 struct pci_dev *pdev = to_pci_dev(device);
5912
5913 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
cde9319e 5914 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
d8018a0e 5915 IOMMU_RESV_DIRECT_RELAXABLE);
d850c2ee
LB
5916 if (reg)
5917 list_add_tail(&reg->list, head);
5918 }
5919 }
5920#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5921
0659b8dc
EA
5922 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5923 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5924 0, IOMMU_RESV_MSI);
0659b8dc
EA
5925 if (!reg)
5926 return;
5927 list_add_tail(&reg->list, head);
5928}
5929
d7cbc0f3 5930int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5931{
5932 struct device_domain_info *info;
5933 struct context_entry *context;
5934 struct dmar_domain *domain;
5935 unsigned long flags;
5936 u64 ctx_lo;
5937 int ret;
5938
4ec066c7 5939 domain = find_domain(dev);
2f26e0a9
DW
5940 if (!domain)
5941 return -EINVAL;
5942
5943 spin_lock_irqsave(&device_domain_lock, flags);
5944 spin_lock(&iommu->lock);
5945
5946 ret = -EINVAL;
d7cbc0f3 5947 info = dev->archdata.iommu;
2f26e0a9
DW
5948 if (!info || !info->pasid_supported)
5949 goto out;
5950
5951 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5952 if (WARN_ON(!context))
5953 goto out;
5954
5955 ctx_lo = context[0].lo;
5956
2f26e0a9 5957 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5958 ctx_lo |= CONTEXT_PASIDE;
5959 context[0].lo = ctx_lo;
5960 wmb();
d7cbc0f3
LB
5961 iommu->flush.flush_context(iommu,
5962 domain->iommu_did[iommu->seq_id],
5963 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5964 DMA_CCMD_MASK_NOBIT,
5965 DMA_CCMD_DEVICE_INVL);
5966 }
5967
5968 /* Enable PASID support in the device, if it wasn't already */
5969 if (!info->pasid_enabled)
5970 iommu_enable_dev_iotlb(info);
5971
2f26e0a9
DW
5972 ret = 0;
5973
5974 out:
5975 spin_unlock(&iommu->lock);
5976 spin_unlock_irqrestore(&device_domain_lock, flags);
5977
5978 return ret;
5979}
5980
73bcbdc9
JS
5981static void intel_iommu_apply_resv_region(struct device *dev,
5982 struct iommu_domain *domain,
5983 struct iommu_resv_region *region)
5984{
5985 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5986 unsigned long start, end;
5987
5988 start = IOVA_PFN(region->start);
5989 end = IOVA_PFN(region->start + region->length - 1);
5990
5991 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5992}
5993
4a350a0e
PS
5994static struct iommu_group *intel_iommu_device_group(struct device *dev)
5995{
5996 if (dev_is_pci(dev))
5997 return pci_device_group(dev);
5998 return generic_device_group(dev);
5999}
6000
d7cbc0f3 6001#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
6002struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
6003{
6004 struct intel_iommu *iommu;
6005 u8 bus, devfn;
6006
6007 if (iommu_dummy(dev)) {
6008 dev_warn(dev,
6009 "No IOMMU translation for device; cannot enable SVM\n");
6010 return NULL;
6011 }
6012
6013 iommu = device_to_iommu(dev, &bus, &devfn);
6014 if ((!iommu)) {
b9997e38 6015 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
6016 return NULL;
6017 }
6018
2f26e0a9
DW
6019 return iommu;
6020}
6021#endif /* CONFIG_INTEL_IOMMU_SVM */
6022
95587a75
LB
6023static int intel_iommu_enable_auxd(struct device *dev)
6024{
6025 struct device_domain_info *info;
6026 struct intel_iommu *iommu;
6027 unsigned long flags;
6028 u8 bus, devfn;
6029 int ret;
6030
6031 iommu = device_to_iommu(dev, &bus, &devfn);
6032 if (!iommu || dmar_disabled)
6033 return -EINVAL;
6034
6035 if (!sm_supported(iommu) || !pasid_supported(iommu))
6036 return -EINVAL;
6037
6038 ret = intel_iommu_enable_pasid(iommu, dev);
6039 if (ret)
6040 return -ENODEV;
6041
6042 spin_lock_irqsave(&device_domain_lock, flags);
6043 info = dev->archdata.iommu;
6044 info->auxd_enabled = 1;
6045 spin_unlock_irqrestore(&device_domain_lock, flags);
6046
6047 return 0;
6048}
6049
6050static int intel_iommu_disable_auxd(struct device *dev)
6051{
6052 struct device_domain_info *info;
6053 unsigned long flags;
6054
6055 spin_lock_irqsave(&device_domain_lock, flags);
6056 info = dev->archdata.iommu;
6057 if (!WARN_ON(!info))
6058 info->auxd_enabled = 0;
6059 spin_unlock_irqrestore(&device_domain_lock, flags);
6060
6061 return 0;
6062}
6063
6064/*
6065 * A PCI express designated vendor specific extended capability is defined
6066 * in the section 3.7 of Intel scalable I/O virtualization technical spec
6067 * for system software and tools to detect endpoint devices supporting the
6068 * Intel scalable IO virtualization without host driver dependency.
6069 *
6070 * Returns the address of the matching extended capability structure within
6071 * the device's PCI configuration space or 0 if the device does not support
6072 * it.
6073 */
6074static int siov_find_pci_dvsec(struct pci_dev *pdev)
6075{
6076 int pos;
6077 u16 vendor, id;
6078
6079 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
6080 while (pos) {
6081 pci_read_config_word(pdev, pos + 4, &vendor);
6082 pci_read_config_word(pdev, pos + 8, &id);
6083 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
6084 return pos;
6085
6086 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
6087 }
6088
6089 return 0;
6090}
6091
6092static bool
6093intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
6094{
6095 if (feat == IOMMU_DEV_FEAT_AUX) {
6096 int ret;
6097
6098 if (!dev_is_pci(dev) || dmar_disabled ||
6099 !scalable_mode_support() || !iommu_pasid_support())
6100 return false;
6101
6102 ret = pci_pasid_features(to_pci_dev(dev));
6103 if (ret < 0)
6104 return false;
6105
6106 return !!siov_find_pci_dvsec(to_pci_dev(dev));
6107 }
6108
6109 return false;
6110}
6111
6112static int
6113intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
6114{
6115 if (feat == IOMMU_DEV_FEAT_AUX)
6116 return intel_iommu_enable_auxd(dev);
6117
6118 return -ENODEV;
6119}
6120
6121static int
6122intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
6123{
6124 if (feat == IOMMU_DEV_FEAT_AUX)
6125 return intel_iommu_disable_auxd(dev);
6126
6127 return -ENODEV;
6128}
6129
6130static bool
6131intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
6132{
6133 struct device_domain_info *info = dev->archdata.iommu;
6134
6135 if (feat == IOMMU_DEV_FEAT_AUX)
6136 return scalable_mode_support() && info && info->auxd_enabled;
6137
6138 return false;
6139}
6140
0e8000f8
LB
6141static int
6142intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
6143{
6144 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6145
6146 return dmar_domain->default_pasid > 0 ?
6147 dmar_domain->default_pasid : -EINVAL;
6148}
6149
8af46c78
LB
6150static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
6151 struct device *dev)
6152{
1d461597 6153 return attach_deferred(dev);
8af46c78
LB
6154}
6155
2cd1311a
LB
6156static int
6157intel_iommu_domain_set_attr(struct iommu_domain *domain,
6158 enum iommu_attr attr, void *data)
6159{
6160 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6161 unsigned long flags;
6162 int ret = 0;
6163
6164 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
6165 return -EINVAL;
6166
6167 switch (attr) {
6168 case DOMAIN_ATTR_NESTING:
6169 spin_lock_irqsave(&device_domain_lock, flags);
6170 if (nested_mode_support() &&
6171 list_empty(&dmar_domain->devices)) {
6172 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
6173 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
6174 } else {
6175 ret = -ENODEV;
6176 }
6177 spin_unlock_irqrestore(&device_domain_lock, flags);
6178 break;
6179 default:
6180 ret = -EINVAL;
6181 break;
6182 }
6183
6184 return ret;
6185}
6186
b0119e87 6187const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
6188 .capable = intel_iommu_capable,
6189 .domain_alloc = intel_iommu_domain_alloc,
6190 .domain_free = intel_iommu_domain_free,
2cd1311a 6191 .domain_set_attr = intel_iommu_domain_set_attr,
0659b8dc
EA
6192 .attach_dev = intel_iommu_attach_device,
6193 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
6194 .aux_attach_dev = intel_iommu_aux_attach_device,
6195 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 6196 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
6197 .map = intel_iommu_map,
6198 .unmap = intel_iommu_unmap,
0659b8dc
EA
6199 .iova_to_phys = intel_iommu_iova_to_phys,
6200 .add_device = intel_iommu_add_device,
6201 .remove_device = intel_iommu_remove_device,
6202 .get_resv_regions = intel_iommu_get_resv_regions,
0ecdebb7 6203 .put_resv_regions = generic_iommu_put_resv_regions,
73bcbdc9 6204 .apply_resv_region = intel_iommu_apply_resv_region,
4a350a0e 6205 .device_group = intel_iommu_device_group,
95587a75
LB
6206 .dev_has_feat = intel_iommu_dev_has_feat,
6207 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
6208 .dev_enable_feat = intel_iommu_dev_enable_feat,
6209 .dev_disable_feat = intel_iommu_dev_disable_feat,
8af46c78 6210 .is_attach_deferred = intel_iommu_is_attach_deferred,
0659b8dc 6211 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 6212};
9af88143 6213
1f76249c 6214static void quirk_iommu_igfx(struct pci_dev *dev)
9452618e 6215{
932a6523 6216 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
6217 dmar_map_gfx = 0;
6218}
6219
1f76249c
CW
6220/* G4x/GM45 integrated gfx dmar support is totally busted. */
6221DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
6222DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
6223DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
6224DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
6225DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
6226DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
6227DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
6228
6229/* Broadwell igfx malfunctions with dmar */
6230DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6231DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6232DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6233DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6234DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6235DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6236DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6237DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6238DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6239DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6240DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6241DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6242DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6243DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6244DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6245DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6246DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6247DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6248DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6249DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6250DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6251DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6252DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6253DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
9452618e 6254
d34d6517 6255static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
6256{
6257 /*
6258 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 6259 * but needs it. Same seems to hold for the desktop versions.
9af88143 6260 */
932a6523 6261 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
6262 rwbf_quirk = 1;
6263}
6264
6265DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
6266DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6267DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6268DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6270DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 6272
eecfd57f
AJ
6273#define GGC 0x52
6274#define GGC_MEMORY_SIZE_MASK (0xf << 8)
6275#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6276#define GGC_MEMORY_SIZE_1M (0x1 << 8)
6277#define GGC_MEMORY_SIZE_2M (0x3 << 8)
6278#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6279#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6280#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6281#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6282
d34d6517 6283static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
6284{
6285 unsigned short ggc;
6286
eecfd57f 6287 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
6288 return;
6289
eecfd57f 6290 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 6291 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 6292 dmar_map_gfx = 0;
6fbcfb3e
DW
6293 } else if (dmar_map_gfx) {
6294 /* we have to ensure the gfx device is idle before we flush */
932a6523 6295 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
6296 intel_iommu_strict = 1;
6297 }
9eecabcb
DW
6298}
6299DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6300DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6301DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6302DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6303
e0fc7e0b
DW
6304/* On Tylersburg chipsets, some BIOSes have been known to enable the
6305 ISOCH DMAR unit for the Azalia sound device, but not give it any
6306 TLB entries, which causes it to deadlock. Check for that. We do
6307 this in a function called from init_dmars(), instead of in a PCI
6308 quirk, because we don't want to print the obnoxious "BIOS broken"
6309 message if VT-d is actually disabled.
6310*/
6311static void __init check_tylersburg_isoch(void)
6312{
6313 struct pci_dev *pdev;
6314 uint32_t vtisochctrl;
6315
6316 /* If there's no Azalia in the system anyway, forget it. */
6317 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6318 if (!pdev)
6319 return;
6320 pci_dev_put(pdev);
6321
6322 /* System Management Registers. Might be hidden, in which case
6323 we can't do the sanity check. But that's OK, because the
6324 known-broken BIOSes _don't_ actually hide it, so far. */
6325 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6326 if (!pdev)
6327 return;
6328
6329 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6330 pci_dev_put(pdev);
6331 return;
6332 }
6333
6334 pci_dev_put(pdev);
6335
6336 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6337 if (vtisochctrl & 1)
6338 return;
6339
6340 /* Drop all bits other than the number of TLB entries */
6341 vtisochctrl &= 0x1c;
6342
6343 /* If we have the recommended number of TLB entries (16), fine. */
6344 if (vtisochctrl == 0x10)
6345 return;
6346
6347 /* Zero TLB entries? You get to ride the short bus to school. */
6348 if (!vtisochctrl) {
6349 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6350 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6351 dmi_get_system_info(DMI_BIOS_VENDOR),
6352 dmi_get_system_info(DMI_BIOS_VERSION),
6353 dmi_get_system_info(DMI_PRODUCT_VERSION));
6354 iommu_identity_mapping |= IDENTMAP_AZALIA;
6355 return;
6356 }
9f10e5bf
JR
6357
6358 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
6359 vtisochctrl);
6360}