iommu/vt-d: Probe DMA-capable ACPI name space devices
[linux-2.6-block.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf 21#define pr_fmt(fmt) "DMAR: " fmt
932a6523 22#define dev_fmt(fmt) pr_fmt(fmt)
9f10e5bf 23
ba395927
KA
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
75f05569 36#include <linux/memory.h>
aa473240 37#include <linux/cpu.h>
5e0d2a6f 38#include <linux/timer.h>
dfddb969 39#include <linux/io.h>
38717946 40#include <linux/iova.h>
5d450806 41#include <linux/iommu.h>
38717946 42#include <linux/intel-iommu.h>
134fac3f 43#include <linux/syscore_ops.h>
69575d38 44#include <linux/tboot.h>
adb2fe02 45#include <linux/dmi.h>
5cdede24 46#include <linux/pci-ats.h>
0ee332c1 47#include <linux/memblock.h>
36746436 48#include <linux/dma-contiguous.h>
fec777c3 49#include <linux/dma-direct.h>
091d42e4 50#include <linux/crash_dump.h>
98fa15f3 51#include <linux/numa.h>
8a8f422d 52#include <asm/irq_remapping.h>
ba395927 53#include <asm/cacheflush.h>
46a7fa27 54#include <asm/iommu.h>
ba395927 55
078e1ee2 56#include "irq_remapping.h"
56283174 57#include "intel-pasid.h"
078e1ee2 58
5b6985ce
FY
59#define ROOT_SIZE VTD_PAGE_SIZE
60#define CONTEXT_SIZE VTD_PAGE_SIZE
61
ba395927 62#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 63#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 64#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 65#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
66
67#define IOAPIC_RANGE_START (0xfee00000)
68#define IOAPIC_RANGE_END (0xfeefffff)
69#define IOVA_START_ADDR (0x1000)
70
5e3b4a15 71#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 72
4ed0d3e6 73#define MAX_AGAW_WIDTH 64
5c645b35 74#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 75
2ebe3151
DW
76#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
77#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
78
79/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
80 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
81#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
82 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
83#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 84
1b722500
RM
85/* IO virtual address start page frame number */
86#define IOVA_START_PFN (1)
87
f27be03b 88#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 89
df08cdc7
AM
90/* page table handling */
91#define LEVEL_STRIDE (9)
92#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
93
6d1c56a9
OBC
94/*
95 * This bitmap is used to advertise the page sizes our hardware support
96 * to the IOMMU core, which will then use this information to split
97 * physically contiguous memory regions it is mapping into page sizes
98 * that we support.
99 *
100 * Traditionally the IOMMU core just handed us the mappings directly,
101 * after making sure the size is an order of a 4KiB page and that the
102 * mapping has natural alignment.
103 *
104 * To retain this behavior, we currently advertise that we support
105 * all page sizes that are an order of 4KiB.
106 *
107 * If at some point we'd like to utilize the IOMMU core's new behavior,
108 * we could change this to advertise the real page sizes we support.
109 */
110#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
111
df08cdc7
AM
112static inline int agaw_to_level(int agaw)
113{
114 return agaw + 2;
115}
116
117static inline int agaw_to_width(int agaw)
118{
5c645b35 119 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
120}
121
122static inline int width_to_agaw(int width)
123{
5c645b35 124 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
125}
126
127static inline unsigned int level_to_offset_bits(int level)
128{
129 return (level - 1) * LEVEL_STRIDE;
130}
131
132static inline int pfn_level_offset(unsigned long pfn, int level)
133{
134 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
135}
136
137static inline unsigned long level_mask(int level)
138{
139 return -1UL << level_to_offset_bits(level);
140}
141
142static inline unsigned long level_size(int level)
143{
144 return 1UL << level_to_offset_bits(level);
145}
146
147static inline unsigned long align_to_level(unsigned long pfn, int level)
148{
149 return (pfn + level_size(level) - 1) & level_mask(level);
150}
fd18de50 151
6dd9a7c7
YS
152static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
153{
5c645b35 154 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
155}
156
dd4e8319
DW
157/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
158 are never going to work. */
159static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
160{
161 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
162}
163
164static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
165{
166 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
167}
168static inline unsigned long page_to_dma_pfn(struct page *pg)
169{
170 return mm_to_dma_pfn(page_to_pfn(pg));
171}
172static inline unsigned long virt_to_dma_pfn(void *p)
173{
174 return page_to_dma_pfn(virt_to_page(p));
175}
176
d9630fe9
WH
177/* global iommu list, set NULL for ignored DMAR units */
178static struct intel_iommu **g_iommus;
179
e0fc7e0b 180static void __init check_tylersburg_isoch(void);
9af88143
DW
181static int rwbf_quirk;
182
b779260b
JC
183/*
184 * set to 1 to panic kernel if can't successfully enable VT-d
185 * (used when kernel is launched w/ TXT)
186 */
187static int force_on = 0;
bfd20f1c 188int intel_iommu_tboot_noforce;
89a6079d 189static int no_platform_optin;
b779260b 190
46b08e1a 191#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 192
091d42e4
JR
193/*
194 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
195 * if marked present.
196 */
197static phys_addr_t root_entry_lctp(struct root_entry *re)
198{
199 if (!(re->lo & 1))
200 return 0;
201
202 return re->lo & VTD_PAGE_MASK;
203}
204
205/*
206 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
207 * if marked present.
208 */
209static phys_addr_t root_entry_uctp(struct root_entry *re)
210{
211 if (!(re->hi & 1))
212 return 0;
46b08e1a 213
091d42e4
JR
214 return re->hi & VTD_PAGE_MASK;
215}
c07e7d21 216
cf484d0e
JR
217static inline void context_clear_pasid_enable(struct context_entry *context)
218{
219 context->lo &= ~(1ULL << 11);
220}
221
222static inline bool context_pasid_enabled(struct context_entry *context)
223{
224 return !!(context->lo & (1ULL << 11));
225}
226
227static inline void context_set_copied(struct context_entry *context)
228{
229 context->hi |= (1ull << 3);
230}
231
232static inline bool context_copied(struct context_entry *context)
233{
234 return !!(context->hi & (1ULL << 3));
235}
236
237static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
238{
239 return (context->lo & 1);
240}
cf484d0e 241
26b86092 242bool context_present(struct context_entry *context)
cf484d0e
JR
243{
244 return context_pasid_enabled(context) ?
245 __context_present(context) :
246 __context_present(context) && !context_copied(context);
247}
248
c07e7d21
MM
249static inline void context_set_present(struct context_entry *context)
250{
251 context->lo |= 1;
252}
253
254static inline void context_set_fault_enable(struct context_entry *context)
255{
256 context->lo &= (((u64)-1) << 2) | 1;
257}
258
c07e7d21
MM
259static inline void context_set_translation_type(struct context_entry *context,
260 unsigned long value)
261{
262 context->lo &= (((u64)-1) << 4) | 3;
263 context->lo |= (value & 3) << 2;
264}
265
266static inline void context_set_address_root(struct context_entry *context,
267 unsigned long value)
268{
1a2262f9 269 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
270 context->lo |= value & VTD_PAGE_MASK;
271}
272
273static inline void context_set_address_width(struct context_entry *context,
274 unsigned long value)
275{
276 context->hi |= value & 7;
277}
278
279static inline void context_set_domain_id(struct context_entry *context,
280 unsigned long value)
281{
282 context->hi |= (value & ((1 << 16) - 1)) << 8;
283}
284
dbcd861f
JR
285static inline int context_domain_id(struct context_entry *c)
286{
287 return((c->hi >> 8) & 0xffff);
288}
289
c07e7d21
MM
290static inline void context_clear_entry(struct context_entry *context)
291{
292 context->lo = 0;
293 context->hi = 0;
294}
7a8fc25e 295
2c2e2c38
FY
296/*
297 * This domain is a statically identity mapping domain.
298 * 1. This domain creats a static 1:1 mapping to all usable memory.
299 * 2. It maps to each iommu if successful.
300 * 3. Each iommu mapps to this domain if successful.
301 */
19943b0e
DW
302static struct dmar_domain *si_domain;
303static int hw_pass_through = 1;
2c2e2c38 304
2c2e2c38 305/* si_domain contains mulitple devices */
fa954e68 306#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
2c2e2c38 307
942067f1
LB
308/*
309 * This is a DMA domain allocated through the iommu domain allocation
310 * interface. But one or more devices belonging to this domain have
311 * been chosen to use a private domain. We should avoid to use the
312 * map/unmap/iova_to_phys APIs on it.
313 */
314#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
315
29a27719
JR
316#define for_each_domain_iommu(idx, domain) \
317 for (idx = 0; idx < g_num_of_iommus; idx++) \
318 if (domain->iommu_refcnt[idx])
319
b94e4117
JL
320struct dmar_rmrr_unit {
321 struct list_head list; /* list of rmrr units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
323 u64 base_address; /* reserved base address*/
324 u64 end_address; /* reserved end address */
832bd858 325 struct dmar_dev_scope *devices; /* target devices */
b94e4117 326 int devices_cnt; /* target device count */
0659b8dc 327 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
328};
329
330struct dmar_atsr_unit {
331 struct list_head list; /* list of ATSR units */
332 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 333 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
334 int devices_cnt; /* target device count */
335 u8 include_all:1; /* include all ports */
336};
337
338static LIST_HEAD(dmar_atsr_units);
339static LIST_HEAD(dmar_rmrr_units);
340
341#define for_each_rmrr_units(rmrr) \
342 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
343
5e0d2a6f 344/* bitmap for indexing intel_iommus */
5e0d2a6f 345static int g_num_of_iommus;
346
92d03cc8 347static void domain_exit(struct dmar_domain *domain);
ba395927 348static void domain_remove_dev_info(struct dmar_domain *domain);
71753239 349static void dmar_remove_one_dev_info(struct device *dev);
127c7615 350static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
351static void domain_context_clear(struct intel_iommu *iommu,
352 struct device *dev);
2a46ddf7
JL
353static int domain_detach_iommu(struct dmar_domain *domain,
354 struct intel_iommu *iommu);
4de354ec 355static bool device_is_rmrr_locked(struct device *dev);
ba395927 356
d3f13810 357#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
358int dmar_disabled = 0;
359#else
360int dmar_disabled = 1;
d3f13810 361#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 362
cdd3a249 363int intel_iommu_sm;
8bc1f85c
ED
364int intel_iommu_enabled = 0;
365EXPORT_SYMBOL_GPL(intel_iommu_enabled);
366
2d9e667e 367static int dmar_map_gfx = 1;
7d3b03ce 368static int dmar_forcedac;
5e0d2a6f 369static int intel_iommu_strict;
6dd9a7c7 370static int intel_iommu_superpage = 1;
ae853ddb 371static int iommu_identity_mapping;
c83b2f20 372
ae853ddb
DW
373#define IDENTMAP_ALL 1
374#define IDENTMAP_GFX 2
375#define IDENTMAP_AZALIA 4
c83b2f20 376
c0771df8
DW
377int intel_iommu_gfx_mapped;
378EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
379
ba395927
KA
380#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
381static DEFINE_SPINLOCK(device_domain_lock);
382static LIST_HEAD(device_domain_list);
383
85319dcc
LB
384/*
385 * Iterate over elements in device_domain_list and call the specified
0bbeb01a 386 * callback @fn against each element.
85319dcc
LB
387 */
388int for_each_device_domain(int (*fn)(struct device_domain_info *info,
389 void *data), void *data)
390{
391 int ret = 0;
0bbeb01a 392 unsigned long flags;
85319dcc
LB
393 struct device_domain_info *info;
394
0bbeb01a 395 spin_lock_irqsave(&device_domain_lock, flags);
85319dcc
LB
396 list_for_each_entry(info, &device_domain_list, global) {
397 ret = fn(info, data);
0bbeb01a
LB
398 if (ret) {
399 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc 400 return ret;
0bbeb01a 401 }
85319dcc 402 }
0bbeb01a 403 spin_unlock_irqrestore(&device_domain_lock, flags);
85319dcc
LB
404
405 return 0;
406}
407
b0119e87 408const struct iommu_ops intel_iommu_ops;
a8bcbb0d 409
4158c2ec
JR
410static bool translation_pre_enabled(struct intel_iommu *iommu)
411{
412 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
413}
414
091d42e4
JR
415static void clear_translation_pre_enabled(struct intel_iommu *iommu)
416{
417 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
418}
419
4158c2ec
JR
420static void init_translation_status(struct intel_iommu *iommu)
421{
422 u32 gsts;
423
424 gsts = readl(iommu->reg + DMAR_GSTS_REG);
425 if (gsts & DMA_GSTS_TES)
426 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
427}
428
00a77deb
JR
429/* Convert generic 'struct iommu_domain to private struct dmar_domain */
430static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
431{
432 return container_of(dom, struct dmar_domain, domain);
433}
434
ba395927
KA
435static int __init intel_iommu_setup(char *str)
436{
437 if (!str)
438 return -EINVAL;
439 while (*str) {
0cd5c3c8
KM
440 if (!strncmp(str, "on", 2)) {
441 dmar_disabled = 0;
9f10e5bf 442 pr_info("IOMMU enabled\n");
0cd5c3c8 443 } else if (!strncmp(str, "off", 3)) {
ba395927 444 dmar_disabled = 1;
89a6079d 445 no_platform_optin = 1;
9f10e5bf 446 pr_info("IOMMU disabled\n");
ba395927
KA
447 } else if (!strncmp(str, "igfx_off", 8)) {
448 dmar_map_gfx = 0;
9f10e5bf 449 pr_info("Disable GFX device mapping\n");
7d3b03ce 450 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 451 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 452 dmar_forcedac = 1;
5e0d2a6f 453 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 454 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 455 intel_iommu_strict = 1;
6dd9a7c7 456 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 457 pr_info("Disable supported super page\n");
6dd9a7c7 458 intel_iommu_superpage = 0;
8950dcd8
LB
459 } else if (!strncmp(str, "sm_on", 5)) {
460 pr_info("Intel-IOMMU: scalable mode supported\n");
461 intel_iommu_sm = 1;
bfd20f1c
SL
462 } else if (!strncmp(str, "tboot_noforce", 13)) {
463 printk(KERN_INFO
464 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
465 intel_iommu_tboot_noforce = 1;
ba395927
KA
466 }
467
468 str += strcspn(str, ",");
469 while (*str == ',')
470 str++;
471 }
472 return 0;
473}
474__setup("intel_iommu=", intel_iommu_setup);
475
476static struct kmem_cache *iommu_domain_cache;
477static struct kmem_cache *iommu_devinfo_cache;
ba395927 478
9452d5bf
JR
479static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
480{
8bf47816
JR
481 struct dmar_domain **domains;
482 int idx = did >> 8;
483
484 domains = iommu->domains[idx];
485 if (!domains)
486 return NULL;
487
488 return domains[did & 0xff];
9452d5bf
JR
489}
490
491static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
492 struct dmar_domain *domain)
493{
8bf47816
JR
494 struct dmar_domain **domains;
495 int idx = did >> 8;
496
497 if (!iommu->domains[idx]) {
498 size_t size = 256 * sizeof(struct dmar_domain *);
499 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
500 }
501
502 domains = iommu->domains[idx];
503 if (WARN_ON(!domains))
504 return;
505 else
506 domains[did & 0xff] = domain;
9452d5bf
JR
507}
508
9ddbfb42 509void *alloc_pgtable_page(int node)
eb3fa7cb 510{
4c923d47
SS
511 struct page *page;
512 void *vaddr = NULL;
eb3fa7cb 513
4c923d47
SS
514 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
515 if (page)
516 vaddr = page_address(page);
eb3fa7cb 517 return vaddr;
ba395927
KA
518}
519
9ddbfb42 520void free_pgtable_page(void *vaddr)
ba395927
KA
521{
522 free_page((unsigned long)vaddr);
523}
524
525static inline void *alloc_domain_mem(void)
526{
354bb65e 527 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
528}
529
38717946 530static void free_domain_mem(void *vaddr)
ba395927
KA
531{
532 kmem_cache_free(iommu_domain_cache, vaddr);
533}
534
535static inline void * alloc_devinfo_mem(void)
536{
354bb65e 537 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
538}
539
540static inline void free_devinfo_mem(void *vaddr)
541{
542 kmem_cache_free(iommu_devinfo_cache, vaddr);
543}
544
28ccce0d
JR
545static inline int domain_type_is_si(struct dmar_domain *domain)
546{
547 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
548}
549
162d1b10
JL
550static inline int domain_pfn_supported(struct dmar_domain *domain,
551 unsigned long pfn)
552{
553 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
554
555 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
556}
557
4ed0d3e6 558static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
559{
560 unsigned long sagaw;
561 int agaw = -1;
562
563 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 564 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
565 agaw >= 0; agaw--) {
566 if (test_bit(agaw, &sagaw))
567 break;
568 }
569
570 return agaw;
571}
572
4ed0d3e6
FY
573/*
574 * Calculate max SAGAW for each iommu.
575 */
576int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
577{
578 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
579}
580
581/*
582 * calculate agaw for each iommu.
583 * "SAGAW" may be different across iommus, use a default agaw, and
584 * get a supported less agaw for iommus that don't support the default agaw.
585 */
586int iommu_calculate_agaw(struct intel_iommu *iommu)
587{
588 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
589}
590
2c2e2c38 591/* This functionin only returns single iommu in a domain */
9ddbfb42 592struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
593{
594 int iommu_id;
595
2c2e2c38 596 /* si_domain and vm domain should not get here. */
fa954e68
LB
597 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
598 return NULL;
599
29a27719
JR
600 for_each_domain_iommu(iommu_id, domain)
601 break;
602
8c11e798
WH
603 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
604 return NULL;
605
606 return g_iommus[iommu_id];
607}
608
8e604097
WH
609static void domain_update_iommu_coherency(struct dmar_domain *domain)
610{
d0501960
DW
611 struct dmar_drhd_unit *drhd;
612 struct intel_iommu *iommu;
2f119c78
QL
613 bool found = false;
614 int i;
2e12bc29 615
d0501960 616 domain->iommu_coherency = 1;
8e604097 617
29a27719 618 for_each_domain_iommu(i, domain) {
2f119c78 619 found = true;
8e604097
WH
620 if (!ecap_coherent(g_iommus[i]->ecap)) {
621 domain->iommu_coherency = 0;
622 break;
623 }
8e604097 624 }
d0501960
DW
625 if (found)
626 return;
627
628 /* No hardware attached; use lowest common denominator */
629 rcu_read_lock();
630 for_each_active_iommu(iommu, drhd) {
631 if (!ecap_coherent(iommu->ecap)) {
632 domain->iommu_coherency = 0;
633 break;
634 }
635 }
636 rcu_read_unlock();
8e604097
WH
637}
638
161f6934 639static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 640{
161f6934
JL
641 struct dmar_drhd_unit *drhd;
642 struct intel_iommu *iommu;
643 int ret = 1;
58c610bd 644
161f6934
JL
645 rcu_read_lock();
646 for_each_active_iommu(iommu, drhd) {
647 if (iommu != skip) {
648 if (!ecap_sc_support(iommu->ecap)) {
649 ret = 0;
650 break;
651 }
58c610bd 652 }
58c610bd 653 }
161f6934
JL
654 rcu_read_unlock();
655
656 return ret;
58c610bd
SY
657}
658
161f6934 659static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 660{
8140a95d 661 struct dmar_drhd_unit *drhd;
161f6934 662 struct intel_iommu *iommu;
8140a95d 663 int mask = 0xf;
6dd9a7c7
YS
664
665 if (!intel_iommu_superpage) {
161f6934 666 return 0;
6dd9a7c7
YS
667 }
668
8140a95d 669 /* set iommu_superpage to the smallest common denominator */
0e242612 670 rcu_read_lock();
8140a95d 671 for_each_active_iommu(iommu, drhd) {
161f6934
JL
672 if (iommu != skip) {
673 mask &= cap_super_page_val(iommu->cap);
674 if (!mask)
675 break;
6dd9a7c7
YS
676 }
677 }
0e242612
JL
678 rcu_read_unlock();
679
161f6934 680 return fls(mask);
6dd9a7c7
YS
681}
682
58c610bd
SY
683/* Some capabilities may be different across iommus */
684static void domain_update_iommu_cap(struct dmar_domain *domain)
685{
686 domain_update_iommu_coherency(domain);
161f6934
JL
687 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
688 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
689}
690
26b86092
SM
691struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
692 u8 devfn, int alloc)
03ecc32c
DW
693{
694 struct root_entry *root = &iommu->root_entry[bus];
695 struct context_entry *context;
696 u64 *entry;
697
4df4eab1 698 entry = &root->lo;
765b6a98 699 if (sm_supported(iommu)) {
03ecc32c
DW
700 if (devfn >= 0x80) {
701 devfn -= 0x80;
702 entry = &root->hi;
703 }
704 devfn *= 2;
705 }
03ecc32c
DW
706 if (*entry & 1)
707 context = phys_to_virt(*entry & VTD_PAGE_MASK);
708 else {
709 unsigned long phy_addr;
710 if (!alloc)
711 return NULL;
712
713 context = alloc_pgtable_page(iommu->node);
714 if (!context)
715 return NULL;
716
717 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
718 phy_addr = virt_to_phys((void *)context);
719 *entry = phy_addr | 1;
720 __iommu_flush_cache(iommu, entry, sizeof(*entry));
721 }
722 return &context[devfn];
723}
724
4ed6a540
DW
725static int iommu_dummy(struct device *dev)
726{
727 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
728}
729
156baca8 730static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
731{
732 struct dmar_drhd_unit *drhd = NULL;
b683b230 733 struct intel_iommu *iommu;
156baca8
DW
734 struct device *tmp;
735 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 736 u16 segment = 0;
c7151a8d
WH
737 int i;
738
4ed6a540
DW
739 if (iommu_dummy(dev))
740 return NULL;
741
156baca8 742 if (dev_is_pci(dev)) {
1c387188
AR
743 struct pci_dev *pf_pdev;
744
156baca8 745 pdev = to_pci_dev(dev);
5823e330
JD
746
747#ifdef CONFIG_X86
748 /* VMD child devices currently cannot be handled individually */
749 if (is_vmd(pdev->bus))
750 return NULL;
751#endif
752
1c387188
AR
753 /* VFs aren't listed in scope tables; we need to look up
754 * the PF instead to find the IOMMU. */
755 pf_pdev = pci_physfn(pdev);
756 dev = &pf_pdev->dev;
156baca8 757 segment = pci_domain_nr(pdev->bus);
ca5b74d2 758 } else if (has_acpi_companion(dev))
156baca8
DW
759 dev = &ACPI_COMPANION(dev)->dev;
760
0e242612 761 rcu_read_lock();
b683b230 762 for_each_active_iommu(iommu, drhd) {
156baca8 763 if (pdev && segment != drhd->segment)
276dbf99 764 continue;
c7151a8d 765
b683b230 766 for_each_active_dev_scope(drhd->devices,
156baca8
DW
767 drhd->devices_cnt, i, tmp) {
768 if (tmp == dev) {
1c387188
AR
769 /* For a VF use its original BDF# not that of the PF
770 * which we used for the IOMMU lookup. Strictly speaking
771 * we could do this for all PCI devices; we only need to
772 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 773 if (pdev && pdev->is_virtfn)
1c387188
AR
774 goto got_pdev;
775
156baca8
DW
776 *bus = drhd->devices[i].bus;
777 *devfn = drhd->devices[i].devfn;
b683b230 778 goto out;
156baca8
DW
779 }
780
781 if (!pdev || !dev_is_pci(tmp))
782 continue;
783
784 ptmp = to_pci_dev(tmp);
785 if (ptmp->subordinate &&
786 ptmp->subordinate->number <= pdev->bus->number &&
787 ptmp->subordinate->busn_res.end >= pdev->bus->number)
788 goto got_pdev;
924b6231 789 }
c7151a8d 790
156baca8
DW
791 if (pdev && drhd->include_all) {
792 got_pdev:
793 *bus = pdev->bus->number;
794 *devfn = pdev->devfn;
b683b230 795 goto out;
156baca8 796 }
c7151a8d 797 }
b683b230 798 iommu = NULL;
156baca8 799 out:
0e242612 800 rcu_read_unlock();
c7151a8d 801
b683b230 802 return iommu;
c7151a8d
WH
803}
804
5331fe6f
WH
805static void domain_flush_cache(struct dmar_domain *domain,
806 void *addr, int size)
807{
808 if (!domain->iommu_coherency)
809 clflush_cache_range(addr, size);
810}
811
ba395927
KA
812static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
813{
ba395927 814 struct context_entry *context;
03ecc32c 815 int ret = 0;
ba395927
KA
816 unsigned long flags;
817
818 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
819 context = iommu_context_addr(iommu, bus, devfn, 0);
820 if (context)
821 ret = context_present(context);
ba395927
KA
822 spin_unlock_irqrestore(&iommu->lock, flags);
823 return ret;
824}
825
ba395927
KA
826static void free_context_table(struct intel_iommu *iommu)
827{
ba395927
KA
828 int i;
829 unsigned long flags;
830 struct context_entry *context;
831
832 spin_lock_irqsave(&iommu->lock, flags);
833 if (!iommu->root_entry) {
834 goto out;
835 }
836 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 837 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
838 if (context)
839 free_pgtable_page(context);
03ecc32c 840
765b6a98 841 if (!sm_supported(iommu))
03ecc32c
DW
842 continue;
843
844 context = iommu_context_addr(iommu, i, 0x80, 0);
845 if (context)
846 free_pgtable_page(context);
847
ba395927
KA
848 }
849 free_pgtable_page(iommu->root_entry);
850 iommu->root_entry = NULL;
851out:
852 spin_unlock_irqrestore(&iommu->lock, flags);
853}
854
b026fd28 855static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 856 unsigned long pfn, int *target_level)
ba395927 857{
e083ea5b 858 struct dma_pte *parent, *pte;
ba395927 859 int level = agaw_to_level(domain->agaw);
4399c8bf 860 int offset;
ba395927
KA
861
862 BUG_ON(!domain->pgd);
f9423606 863
162d1b10 864 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
865 /* Address beyond IOMMU's addressing capabilities. */
866 return NULL;
867
ba395927
KA
868 parent = domain->pgd;
869
5cf0a76f 870 while (1) {
ba395927
KA
871 void *tmp_page;
872
b026fd28 873 offset = pfn_level_offset(pfn, level);
ba395927 874 pte = &parent[offset];
5cf0a76f 875 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 876 break;
5cf0a76f 877 if (level == *target_level)
ba395927
KA
878 break;
879
19c239ce 880 if (!dma_pte_present(pte)) {
c85994e4
DW
881 uint64_t pteval;
882
4c923d47 883 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 884
206a73c1 885 if (!tmp_page)
ba395927 886 return NULL;
206a73c1 887
c85994e4 888 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 889 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 890 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
891 /* Someone else set it while we were thinking; use theirs. */
892 free_pgtable_page(tmp_page);
effad4b5 893 else
c85994e4 894 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 895 }
5cf0a76f
DW
896 if (level == 1)
897 break;
898
19c239ce 899 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
900 level--;
901 }
902
5cf0a76f
DW
903 if (!*target_level)
904 *target_level = level;
905
ba395927
KA
906 return pte;
907}
908
6dd9a7c7 909
ba395927 910/* return address's pte at specific level */
90dcfb5e
DW
911static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
912 unsigned long pfn,
6dd9a7c7 913 int level, int *large_page)
ba395927 914{
e083ea5b 915 struct dma_pte *parent, *pte;
ba395927
KA
916 int total = agaw_to_level(domain->agaw);
917 int offset;
918
919 parent = domain->pgd;
920 while (level <= total) {
90dcfb5e 921 offset = pfn_level_offset(pfn, total);
ba395927
KA
922 pte = &parent[offset];
923 if (level == total)
924 return pte;
925
6dd9a7c7
YS
926 if (!dma_pte_present(pte)) {
927 *large_page = total;
ba395927 928 break;
6dd9a7c7
YS
929 }
930
e16922af 931 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
932 *large_page = total;
933 return pte;
934 }
935
19c239ce 936 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
937 total--;
938 }
939 return NULL;
940}
941
ba395927 942/* clear last level pte, a tlb flush should be followed */
5cf0a76f 943static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
944 unsigned long start_pfn,
945 unsigned long last_pfn)
ba395927 946{
e083ea5b 947 unsigned int large_page;
310a5ab9 948 struct dma_pte *first_pte, *pte;
66eae846 949
162d1b10
JL
950 BUG_ON(!domain_pfn_supported(domain, start_pfn));
951 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 952 BUG_ON(start_pfn > last_pfn);
ba395927 953
04b18e65 954 /* we don't need lock here; nobody else touches the iova range */
59c36286 955 do {
6dd9a7c7
YS
956 large_page = 1;
957 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 958 if (!pte) {
6dd9a7c7 959 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
960 continue;
961 }
6dd9a7c7 962 do {
310a5ab9 963 dma_clear_pte(pte);
6dd9a7c7 964 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 965 pte++;
75e6bf96
DW
966 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
967
310a5ab9
DW
968 domain_flush_cache(domain, first_pte,
969 (void *)pte - (void *)first_pte);
59c36286
DW
970
971 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
972}
973
3269ee0b 974static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
975 int retain_level, struct dma_pte *pte,
976 unsigned long pfn, unsigned long start_pfn,
977 unsigned long last_pfn)
3269ee0b
AW
978{
979 pfn = max(start_pfn, pfn);
980 pte = &pte[pfn_level_offset(pfn, level)];
981
982 do {
983 unsigned long level_pfn;
984 struct dma_pte *level_pte;
985
986 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
987 goto next;
988
f7116e11 989 level_pfn = pfn & level_mask(level);
3269ee0b
AW
990 level_pte = phys_to_virt(dma_pte_addr(pte));
991
bc24c571
DD
992 if (level > 2) {
993 dma_pte_free_level(domain, level - 1, retain_level,
994 level_pte, level_pfn, start_pfn,
995 last_pfn);
996 }
3269ee0b 997
bc24c571
DD
998 /*
999 * Free the page table if we're below the level we want to
1000 * retain and the range covers the entire table.
1001 */
1002 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1003 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1004 dma_clear_pte(pte);
1005 domain_flush_cache(domain, pte, sizeof(*pte));
1006 free_pgtable_page(level_pte);
1007 }
1008next:
1009 pfn += level_size(level);
1010 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1011}
1012
bc24c571
DD
1013/*
1014 * clear last level (leaf) ptes and free page table pages below the
1015 * level we wish to keep intact.
1016 */
ba395927 1017static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1018 unsigned long start_pfn,
bc24c571
DD
1019 unsigned long last_pfn,
1020 int retain_level)
ba395927 1021{
162d1b10
JL
1022 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1023 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1024 BUG_ON(start_pfn > last_pfn);
ba395927 1025
d41a4adb
JL
1026 dma_pte_clear_range(domain, start_pfn, last_pfn);
1027
f3a0a52f 1028 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1029 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1030 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1031
ba395927 1032 /* free pgd */
d794dc9b 1033 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1034 free_pgtable_page(domain->pgd);
1035 domain->pgd = NULL;
1036 }
1037}
1038
ea8ea460
DW
1039/* When a page at a given level is being unlinked from its parent, we don't
1040 need to *modify* it at all. All we need to do is make a list of all the
1041 pages which can be freed just as soon as we've flushed the IOTLB and we
1042 know the hardware page-walk will no longer touch them.
1043 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1044 be freed. */
1045static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1046 int level, struct dma_pte *pte,
1047 struct page *freelist)
1048{
1049 struct page *pg;
1050
1051 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1052 pg->freelist = freelist;
1053 freelist = pg;
1054
1055 if (level == 1)
1056 return freelist;
1057
adeb2590
JL
1058 pte = page_address(pg);
1059 do {
ea8ea460
DW
1060 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1061 freelist = dma_pte_list_pagetables(domain, level - 1,
1062 pte, freelist);
adeb2590
JL
1063 pte++;
1064 } while (!first_pte_in_page(pte));
ea8ea460
DW
1065
1066 return freelist;
1067}
1068
1069static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1070 struct dma_pte *pte, unsigned long pfn,
1071 unsigned long start_pfn,
1072 unsigned long last_pfn,
1073 struct page *freelist)
1074{
1075 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1076
1077 pfn = max(start_pfn, pfn);
1078 pte = &pte[pfn_level_offset(pfn, level)];
1079
1080 do {
1081 unsigned long level_pfn;
1082
1083 if (!dma_pte_present(pte))
1084 goto next;
1085
1086 level_pfn = pfn & level_mask(level);
1087
1088 /* If range covers entire pagetable, free it */
1089 if (start_pfn <= level_pfn &&
1090 last_pfn >= level_pfn + level_size(level) - 1) {
1091 /* These suborbinate page tables are going away entirely. Don't
1092 bother to clear them; we're just going to *free* them. */
1093 if (level > 1 && !dma_pte_superpage(pte))
1094 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1095
1096 dma_clear_pte(pte);
1097 if (!first_pte)
1098 first_pte = pte;
1099 last_pte = pte;
1100 } else if (level > 1) {
1101 /* Recurse down into a level that isn't *entirely* obsolete */
1102 freelist = dma_pte_clear_level(domain, level - 1,
1103 phys_to_virt(dma_pte_addr(pte)),
1104 level_pfn, start_pfn, last_pfn,
1105 freelist);
1106 }
1107next:
1108 pfn += level_size(level);
1109 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1110
1111 if (first_pte)
1112 domain_flush_cache(domain, first_pte,
1113 (void *)++last_pte - (void *)first_pte);
1114
1115 return freelist;
1116}
1117
1118/* We can't just free the pages because the IOMMU may still be walking
1119 the page tables, and may have cached the intermediate levels. The
1120 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1121static struct page *domain_unmap(struct dmar_domain *domain,
1122 unsigned long start_pfn,
1123 unsigned long last_pfn)
ea8ea460 1124{
e083ea5b 1125 struct page *freelist;
ea8ea460 1126
162d1b10
JL
1127 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1128 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1129 BUG_ON(start_pfn > last_pfn);
1130
1131 /* we don't need lock here; nobody else touches the iova range */
1132 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1133 domain->pgd, 0, start_pfn, last_pfn, NULL);
1134
1135 /* free pgd */
1136 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1137 struct page *pgd_page = virt_to_page(domain->pgd);
1138 pgd_page->freelist = freelist;
1139 freelist = pgd_page;
1140
1141 domain->pgd = NULL;
1142 }
1143
1144 return freelist;
1145}
1146
b690420a 1147static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1148{
1149 struct page *pg;
1150
1151 while ((pg = freelist)) {
1152 freelist = pg->freelist;
1153 free_pgtable_page(page_address(pg));
1154 }
1155}
1156
13cf0174
JR
1157static void iova_entry_free(unsigned long data)
1158{
1159 struct page *freelist = (struct page *)data;
1160
1161 dma_free_pagelist(freelist);
1162}
1163
ba395927
KA
1164/* iommu handling */
1165static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1166{
1167 struct root_entry *root;
1168 unsigned long flags;
1169
4c923d47 1170 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1171 if (!root) {
9f10e5bf 1172 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1173 iommu->name);
ba395927 1174 return -ENOMEM;
ffebeb46 1175 }
ba395927 1176
5b6985ce 1177 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1178
1179 spin_lock_irqsave(&iommu->lock, flags);
1180 iommu->root_entry = root;
1181 spin_unlock_irqrestore(&iommu->lock, flags);
1182
1183 return 0;
1184}
1185
ba395927
KA
1186static void iommu_set_root_entry(struct intel_iommu *iommu)
1187{
03ecc32c 1188 u64 addr;
c416daa9 1189 u32 sts;
ba395927
KA
1190 unsigned long flag;
1191
03ecc32c 1192 addr = virt_to_phys(iommu->root_entry);
7373a8cc
LB
1193 if (sm_supported(iommu))
1194 addr |= DMA_RTADDR_SMT;
ba395927 1195
1f5b3c3f 1196 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1197 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1198
c416daa9 1199 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1200
1201 /* Make sure hardware complete it */
1202 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1203 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1204
1f5b3c3f 1205 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1206}
1207
6f7db75e 1208void iommu_flush_write_buffer(struct intel_iommu *iommu)
ba395927
KA
1209{
1210 u32 val;
1211 unsigned long flag;
1212
9af88143 1213 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1214 return;
ba395927 1215
1f5b3c3f 1216 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1217 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1218
1219 /* Make sure hardware complete it */
1220 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1221 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1222
1f5b3c3f 1223 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1224}
1225
1226/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1227static void __iommu_flush_context(struct intel_iommu *iommu,
1228 u16 did, u16 source_id, u8 function_mask,
1229 u64 type)
ba395927
KA
1230{
1231 u64 val = 0;
1232 unsigned long flag;
1233
ba395927
KA
1234 switch (type) {
1235 case DMA_CCMD_GLOBAL_INVL:
1236 val = DMA_CCMD_GLOBAL_INVL;
1237 break;
1238 case DMA_CCMD_DOMAIN_INVL:
1239 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1240 break;
1241 case DMA_CCMD_DEVICE_INVL:
1242 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1243 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1244 break;
1245 default:
1246 BUG();
1247 }
1248 val |= DMA_CCMD_ICC;
1249
1f5b3c3f 1250 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1251 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1252
1253 /* Make sure hardware complete it */
1254 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1255 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1256
1f5b3c3f 1257 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1258}
1259
ba395927 1260/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1261static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1262 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1263{
1264 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1265 u64 val = 0, val_iva = 0;
1266 unsigned long flag;
1267
ba395927
KA
1268 switch (type) {
1269 case DMA_TLB_GLOBAL_FLUSH:
1270 /* global flush doesn't need set IVA_REG */
1271 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1272 break;
1273 case DMA_TLB_DSI_FLUSH:
1274 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1275 break;
1276 case DMA_TLB_PSI_FLUSH:
1277 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1278 /* IH bit is passed in as part of address */
ba395927
KA
1279 val_iva = size_order | addr;
1280 break;
1281 default:
1282 BUG();
1283 }
1284 /* Note: set drain read/write */
1285#if 0
1286 /*
1287 * This is probably to be super secure.. Looks like we can
1288 * ignore it without any impact.
1289 */
1290 if (cap_read_drain(iommu->cap))
1291 val |= DMA_TLB_READ_DRAIN;
1292#endif
1293 if (cap_write_drain(iommu->cap))
1294 val |= DMA_TLB_WRITE_DRAIN;
1295
1f5b3c3f 1296 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1297 /* Note: Only uses first TLB reg currently */
1298 if (val_iva)
1299 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1300 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1301
1302 /* Make sure hardware complete it */
1303 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1304 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1305
1f5b3c3f 1306 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1307
1308 /* check IOTLB invalidation granularity */
1309 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1310 pr_err("Flush IOTLB failed\n");
ba395927 1311 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1312 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1313 (unsigned long long)DMA_TLB_IIRG(type),
1314 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1315}
1316
64ae892b
DW
1317static struct device_domain_info *
1318iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1319 u8 bus, u8 devfn)
93a23a72 1320{
93a23a72 1321 struct device_domain_info *info;
93a23a72 1322
55d94043
JR
1323 assert_spin_locked(&device_domain_lock);
1324
93a23a72
YZ
1325 if (!iommu->qi)
1326 return NULL;
1327
93a23a72 1328 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1329 if (info->iommu == iommu && info->bus == bus &&
1330 info->devfn == devfn) {
b16d0cb9
DW
1331 if (info->ats_supported && info->dev)
1332 return info;
93a23a72
YZ
1333 break;
1334 }
93a23a72 1335
b16d0cb9 1336 return NULL;
93a23a72
YZ
1337}
1338
0824c592
OP
1339static void domain_update_iotlb(struct dmar_domain *domain)
1340{
1341 struct device_domain_info *info;
1342 bool has_iotlb_device = false;
1343
1344 assert_spin_locked(&device_domain_lock);
1345
1346 list_for_each_entry(info, &domain->devices, link) {
1347 struct pci_dev *pdev;
1348
1349 if (!info->dev || !dev_is_pci(info->dev))
1350 continue;
1351
1352 pdev = to_pci_dev(info->dev);
1353 if (pdev->ats_enabled) {
1354 has_iotlb_device = true;
1355 break;
1356 }
1357 }
1358
1359 domain->has_iotlb_device = has_iotlb_device;
1360}
1361
93a23a72 1362static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1363{
fb0cc3aa
BH
1364 struct pci_dev *pdev;
1365
0824c592
OP
1366 assert_spin_locked(&device_domain_lock);
1367
0bcb3e28 1368 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1369 return;
1370
fb0cc3aa 1371 pdev = to_pci_dev(info->dev);
1c48db44
JP
1372 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1373 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1374 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1375 * reserved, which should be set to 0.
1376 */
1377 if (!ecap_dit(info->iommu->ecap))
1378 info->pfsid = 0;
1379 else {
1380 struct pci_dev *pf_pdev;
1381
1382 /* pdev will be returned if device is not a vf */
1383 pf_pdev = pci_physfn(pdev);
cc49baa9 1384 info->pfsid = pci_dev_id(pf_pdev);
1c48db44 1385 }
fb0cc3aa 1386
b16d0cb9
DW
1387#ifdef CONFIG_INTEL_IOMMU_SVM
1388 /* The PCIe spec, in its wisdom, declares that the behaviour of
1389 the device if you enable PASID support after ATS support is
1390 undefined. So always enable PASID support on devices which
1391 have it, even if we can't yet know if we're ever going to
1392 use it. */
1393 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1394 info->pasid_enabled = 1;
1395
1b84778a
KS
1396 if (info->pri_supported &&
1397 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1398 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
b16d0cb9
DW
1399 info->pri_enabled = 1;
1400#endif
fb58fdcd 1401 if (!pdev->untrusted && info->ats_supported &&
61363c14 1402 pci_ats_page_aligned(pdev) &&
fb58fdcd 1403 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
b16d0cb9 1404 info->ats_enabled = 1;
0824c592 1405 domain_update_iotlb(info->domain);
b16d0cb9
DW
1406 info->ats_qdep = pci_ats_queue_depth(pdev);
1407 }
93a23a72
YZ
1408}
1409
1410static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1411{
b16d0cb9
DW
1412 struct pci_dev *pdev;
1413
0824c592
OP
1414 assert_spin_locked(&device_domain_lock);
1415
da972fb1 1416 if (!dev_is_pci(info->dev))
93a23a72
YZ
1417 return;
1418
b16d0cb9
DW
1419 pdev = to_pci_dev(info->dev);
1420
1421 if (info->ats_enabled) {
1422 pci_disable_ats(pdev);
1423 info->ats_enabled = 0;
0824c592 1424 domain_update_iotlb(info->domain);
b16d0cb9
DW
1425 }
1426#ifdef CONFIG_INTEL_IOMMU_SVM
1427 if (info->pri_enabled) {
1428 pci_disable_pri(pdev);
1429 info->pri_enabled = 0;
1430 }
1431 if (info->pasid_enabled) {
1432 pci_disable_pasid(pdev);
1433 info->pasid_enabled = 0;
1434 }
1435#endif
93a23a72
YZ
1436}
1437
1438static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1439 u64 addr, unsigned mask)
1440{
1441 u16 sid, qdep;
1442 unsigned long flags;
1443 struct device_domain_info *info;
1444
0824c592
OP
1445 if (!domain->has_iotlb_device)
1446 return;
1447
93a23a72
YZ
1448 spin_lock_irqsave(&device_domain_lock, flags);
1449 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1450 if (!info->ats_enabled)
93a23a72
YZ
1451 continue;
1452
1453 sid = info->bus << 8 | info->devfn;
b16d0cb9 1454 qdep = info->ats_qdep;
1c48db44
JP
1455 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1456 qdep, addr, mask);
93a23a72
YZ
1457 }
1458 spin_unlock_irqrestore(&device_domain_lock, flags);
1459}
1460
a1ddcbe9
JR
1461static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1462 struct dmar_domain *domain,
1463 unsigned long pfn, unsigned int pages,
1464 int ih, int map)
ba395927 1465{
9dd2fe89 1466 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1467 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1468 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1469
ba395927
KA
1470 BUG_ON(pages == 0);
1471
ea8ea460
DW
1472 if (ih)
1473 ih = 1 << 6;
ba395927 1474 /*
9dd2fe89
YZ
1475 * Fallback to domain selective flush if no PSI support or the size is
1476 * too big.
ba395927
KA
1477 * PSI requires page size to be 2 ^ x, and the base address is naturally
1478 * aligned to the size
1479 */
9dd2fe89
YZ
1480 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1481 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1482 DMA_TLB_DSI_FLUSH);
9dd2fe89 1483 else
ea8ea460 1484 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1485 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1486
1487 /*
82653633
NA
1488 * In caching mode, changes of pages from non-present to present require
1489 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1490 */
82653633 1491 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1492 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1493}
1494
eed91a0b
PX
1495/* Notification for newly created mappings */
1496static inline void __mapping_notify_one(struct intel_iommu *iommu,
1497 struct dmar_domain *domain,
1498 unsigned long pfn, unsigned int pages)
1499{
1500 /* It's a non-present to present mapping. Only flush if caching mode */
1501 if (cap_caching_mode(iommu->cap))
1502 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1503 else
1504 iommu_flush_write_buffer(iommu);
1505}
1506
13cf0174
JR
1507static void iommu_flush_iova(struct iova_domain *iovad)
1508{
1509 struct dmar_domain *domain;
1510 int idx;
1511
1512 domain = container_of(iovad, struct dmar_domain, iovad);
1513
1514 for_each_domain_iommu(idx, domain) {
1515 struct intel_iommu *iommu = g_iommus[idx];
1516 u16 did = domain->iommu_did[iommu->seq_id];
1517
1518 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1519
1520 if (!cap_caching_mode(iommu->cap))
1521 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1522 0, MAX_AGAW_PFN_WIDTH);
1523 }
1524}
1525
f8bab735 1526static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1527{
1528 u32 pmen;
1529 unsigned long flags;
1530
5bb71fc7
LB
1531 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1532 return;
1533
1f5b3c3f 1534 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1535 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1536 pmen &= ~DMA_PMEN_EPM;
1537 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1538
1539 /* wait for the protected region status bit to clear */
1540 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1541 readl, !(pmen & DMA_PMEN_PRS), pmen);
1542
1f5b3c3f 1543 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1544}
1545
2a41ccee 1546static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1547{
1548 u32 sts;
1549 unsigned long flags;
1550
1f5b3c3f 1551 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1552 iommu->gcmd |= DMA_GCMD_TE;
1553 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1554
1555 /* Make sure hardware complete it */
1556 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1557 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1558
1f5b3c3f 1559 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1560}
1561
2a41ccee 1562static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1563{
1564 u32 sts;
1565 unsigned long flag;
1566
1f5b3c3f 1567 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1568 iommu->gcmd &= ~DMA_GCMD_TE;
1569 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1570
1571 /* Make sure hardware complete it */
1572 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1573 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1574
1f5b3c3f 1575 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1576}
1577
3460a6d9 1578
ba395927
KA
1579static int iommu_init_domains(struct intel_iommu *iommu)
1580{
8bf47816
JR
1581 u32 ndomains, nlongs;
1582 size_t size;
ba395927
KA
1583
1584 ndomains = cap_ndoms(iommu->cap);
8bf47816 1585 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1586 iommu->name, ndomains);
ba395927
KA
1587 nlongs = BITS_TO_LONGS(ndomains);
1588
94a91b50
DD
1589 spin_lock_init(&iommu->lock);
1590
ba395927
KA
1591 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1592 if (!iommu->domain_ids) {
9f10e5bf
JR
1593 pr_err("%s: Allocating domain id array failed\n",
1594 iommu->name);
ba395927
KA
1595 return -ENOMEM;
1596 }
8bf47816 1597
86f004c7 1598 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1599 iommu->domains = kzalloc(size, GFP_KERNEL);
1600
1601 if (iommu->domains) {
1602 size = 256 * sizeof(struct dmar_domain *);
1603 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1604 }
1605
1606 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1607 pr_err("%s: Allocating domain array failed\n",
1608 iommu->name);
852bdb04 1609 kfree(iommu->domain_ids);
8bf47816 1610 kfree(iommu->domains);
852bdb04 1611 iommu->domain_ids = NULL;
8bf47816 1612 iommu->domains = NULL;
ba395927
KA
1613 return -ENOMEM;
1614 }
1615
8bf47816
JR
1616
1617
ba395927 1618 /*
c0e8a6c8
JR
1619 * If Caching mode is set, then invalid translations are tagged
1620 * with domain-id 0, hence we need to pre-allocate it. We also
1621 * use domain-id 0 as a marker for non-allocated domain-id, so
1622 * make sure it is not used for a real domain.
ba395927 1623 */
c0e8a6c8
JR
1624 set_bit(0, iommu->domain_ids);
1625
3b33d4ab
LB
1626 /*
1627 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1628 * entry for first-level or pass-through translation modes should
1629 * be programmed with a domain id different from those used for
1630 * second-level or nested translation. We reserve a domain id for
1631 * this purpose.
1632 */
1633 if (sm_supported(iommu))
1634 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1635
ba395927
KA
1636 return 0;
1637}
ba395927 1638
ffebeb46 1639static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1640{
29a27719 1641 struct device_domain_info *info, *tmp;
55d94043 1642 unsigned long flags;
ba395927 1643
29a27719
JR
1644 if (!iommu->domains || !iommu->domain_ids)
1645 return;
a4eaa86c 1646
55d94043 1647 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1648 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1649 struct dmar_domain *domain;
1650
1651 if (info->iommu != iommu)
1652 continue;
1653
1654 if (!info->dev || !info->domain)
1655 continue;
1656
1657 domain = info->domain;
1658
bea64033 1659 __dmar_remove_one_dev_info(info);
ba395927 1660 }
55d94043 1661 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1662
1663 if (iommu->gcmd & DMA_GCMD_TE)
1664 iommu_disable_translation(iommu);
ffebeb46 1665}
ba395927 1666
ffebeb46
JL
1667static void free_dmar_iommu(struct intel_iommu *iommu)
1668{
1669 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1670 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1671 int i;
1672
1673 for (i = 0; i < elems; i++)
1674 kfree(iommu->domains[i]);
ffebeb46
JL
1675 kfree(iommu->domains);
1676 kfree(iommu->domain_ids);
1677 iommu->domains = NULL;
1678 iommu->domain_ids = NULL;
1679 }
ba395927 1680
d9630fe9
WH
1681 g_iommus[iommu->seq_id] = NULL;
1682
ba395927
KA
1683 /* free context mapping */
1684 free_context_table(iommu);
8a94ade4
DW
1685
1686#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 1687 if (pasid_supported(iommu)) {
a222a7f0
DW
1688 if (ecap_prs(iommu->ecap))
1689 intel_svm_finish_prq(iommu);
a222a7f0 1690 }
8a94ade4 1691#endif
ba395927
KA
1692}
1693
ab8dfe25 1694static struct dmar_domain *alloc_domain(int flags)
ba395927 1695{
ba395927 1696 struct dmar_domain *domain;
ba395927
KA
1697
1698 domain = alloc_domain_mem();
1699 if (!domain)
1700 return NULL;
1701
ab8dfe25 1702 memset(domain, 0, sizeof(*domain));
98fa15f3 1703 domain->nid = NUMA_NO_NODE;
ab8dfe25 1704 domain->flags = flags;
0824c592 1705 domain->has_iotlb_device = false;
92d03cc8 1706 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1707
1708 return domain;
1709}
1710
d160aca5
JR
1711/* Must be called with iommu->lock */
1712static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1713 struct intel_iommu *iommu)
1714{
44bde614 1715 unsigned long ndomains;
55d94043 1716 int num;
44bde614 1717
55d94043 1718 assert_spin_locked(&device_domain_lock);
d160aca5 1719 assert_spin_locked(&iommu->lock);
ba395927 1720
29a27719
JR
1721 domain->iommu_refcnt[iommu->seq_id] += 1;
1722 domain->iommu_count += 1;
1723 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1724 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1725 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1726
1727 if (num >= ndomains) {
1728 pr_err("%s: No free domain ids\n", iommu->name);
1729 domain->iommu_refcnt[iommu->seq_id] -= 1;
1730 domain->iommu_count -= 1;
55d94043 1731 return -ENOSPC;
2c2e2c38 1732 }
ba395927 1733
d160aca5
JR
1734 set_bit(num, iommu->domain_ids);
1735 set_iommu_domain(iommu, num, domain);
1736
1737 domain->iommu_did[iommu->seq_id] = num;
1738 domain->nid = iommu->node;
fb170fb4 1739
fb170fb4
JL
1740 domain_update_iommu_cap(domain);
1741 }
d160aca5 1742
55d94043 1743 return 0;
fb170fb4
JL
1744}
1745
1746static int domain_detach_iommu(struct dmar_domain *domain,
1747 struct intel_iommu *iommu)
1748{
e083ea5b 1749 int num, count;
d160aca5 1750
55d94043 1751 assert_spin_locked(&device_domain_lock);
d160aca5 1752 assert_spin_locked(&iommu->lock);
fb170fb4 1753
29a27719
JR
1754 domain->iommu_refcnt[iommu->seq_id] -= 1;
1755 count = --domain->iommu_count;
1756 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1757 num = domain->iommu_did[iommu->seq_id];
1758 clear_bit(num, iommu->domain_ids);
1759 set_iommu_domain(iommu, num, NULL);
fb170fb4 1760
fb170fb4 1761 domain_update_iommu_cap(domain);
c0e8a6c8 1762 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1763 }
fb170fb4
JL
1764
1765 return count;
1766}
1767
ba395927 1768static struct iova_domain reserved_iova_list;
8a443df4 1769static struct lock_class_key reserved_rbtree_key;
ba395927 1770
51a63e67 1771static int dmar_init_reserved_ranges(void)
ba395927
KA
1772{
1773 struct pci_dev *pdev = NULL;
1774 struct iova *iova;
1775 int i;
ba395927 1776
aa3ac946 1777 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1778
8a443df4
MG
1779 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1780 &reserved_rbtree_key);
1781
ba395927
KA
1782 /* IOAPIC ranges shouldn't be accessed by DMA */
1783 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1784 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1785 if (!iova) {
9f10e5bf 1786 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1787 return -ENODEV;
1788 }
ba395927
KA
1789
1790 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1791 for_each_pci_dev(pdev) {
1792 struct resource *r;
1793
1794 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1795 r = &pdev->resource[i];
1796 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1797 continue;
1a4a4551
DW
1798 iova = reserve_iova(&reserved_iova_list,
1799 IOVA_PFN(r->start),
1800 IOVA_PFN(r->end));
51a63e67 1801 if (!iova) {
932a6523 1802 pci_err(pdev, "Reserve iova for %pR failed\n", r);
51a63e67
JC
1803 return -ENODEV;
1804 }
ba395927
KA
1805 }
1806 }
51a63e67 1807 return 0;
ba395927
KA
1808}
1809
1810static void domain_reserve_special_ranges(struct dmar_domain *domain)
1811{
1812 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1813}
1814
1815static inline int guestwidth_to_adjustwidth(int gaw)
1816{
1817 int agaw;
1818 int r = (gaw - 12) % 9;
1819
1820 if (r == 0)
1821 agaw = gaw;
1822 else
1823 agaw = gaw + 9 - r;
1824 if (agaw > 64)
1825 agaw = 64;
1826 return agaw;
1827}
1828
dc534b25
JR
1829static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1830 int guest_width)
ba395927 1831{
ba395927
KA
1832 int adjust_width, agaw;
1833 unsigned long sagaw;
13cf0174 1834 int err;
ba395927 1835
aa3ac946 1836 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1837
1838 err = init_iova_flush_queue(&domain->iovad,
1839 iommu_flush_iova, iova_entry_free);
1840 if (err)
1841 return err;
1842
ba395927
KA
1843 domain_reserve_special_ranges(domain);
1844
1845 /* calculate AGAW */
ba395927
KA
1846 if (guest_width > cap_mgaw(iommu->cap))
1847 guest_width = cap_mgaw(iommu->cap);
1848 domain->gaw = guest_width;
1849 adjust_width = guestwidth_to_adjustwidth(guest_width);
1850 agaw = width_to_agaw(adjust_width);
1851 sagaw = cap_sagaw(iommu->cap);
1852 if (!test_bit(agaw, &sagaw)) {
1853 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1854 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1855 agaw = find_next_bit(&sagaw, 5, agaw);
1856 if (agaw >= 5)
1857 return -ENODEV;
1858 }
1859 domain->agaw = agaw;
ba395927 1860
8e604097
WH
1861 if (ecap_coherent(iommu->ecap))
1862 domain->iommu_coherency = 1;
1863 else
1864 domain->iommu_coherency = 0;
1865
58c610bd
SY
1866 if (ecap_sc_support(iommu->ecap))
1867 domain->iommu_snooping = 1;
1868 else
1869 domain->iommu_snooping = 0;
1870
214e39aa
DW
1871 if (intel_iommu_superpage)
1872 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1873 else
1874 domain->iommu_superpage = 0;
1875
4c923d47 1876 domain->nid = iommu->node;
c7151a8d 1877
ba395927 1878 /* always allocate the top pgd */
4c923d47 1879 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1880 if (!domain->pgd)
1881 return -ENOMEM;
5b6985ce 1882 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1883 return 0;
1884}
1885
1886static void domain_exit(struct dmar_domain *domain)
1887{
e083ea5b 1888 struct page *freelist;
ba395927 1889
d160aca5 1890 /* Remove associated devices and clear attached or cached domains */
ba395927 1891 domain_remove_dev_info(domain);
92d03cc8 1892
ba395927
KA
1893 /* destroy iovas */
1894 put_iova_domain(&domain->iovad);
ba395927 1895
ea8ea460 1896 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1897
ea8ea460
DW
1898 dma_free_pagelist(freelist);
1899
ba395927
KA
1900 free_domain_mem(domain);
1901}
1902
7373a8cc
LB
1903/*
1904 * Get the PASID directory size for scalable mode context entry.
1905 * Value of X in the PDTS field of a scalable mode context entry
1906 * indicates PASID directory with 2^(X + 7) entries.
1907 */
1908static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1909{
1910 int pds, max_pde;
1911
1912 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1913 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1914 if (pds < 7)
1915 return 0;
1916
1917 return pds - 7;
1918}
1919
1920/*
1921 * Set the RID_PASID field of a scalable mode context entry. The
1922 * IOMMU hardware will use the PASID value set in this field for
1923 * DMA translations of DMA requests without PASID.
1924 */
1925static inline void
1926context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1927{
1928 context->hi |= pasid & ((1 << 20) - 1);
1929 context->hi |= (1 << 20);
1930}
1931
1932/*
1933 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1934 * entry.
1935 */
1936static inline void context_set_sm_dte(struct context_entry *context)
1937{
1938 context->lo |= (1 << 2);
1939}
1940
1941/*
1942 * Set the PRE(Page Request Enable) field of a scalable mode context
1943 * entry.
1944 */
1945static inline void context_set_sm_pre(struct context_entry *context)
1946{
1947 context->lo |= (1 << 4);
1948}
1949
1950/* Convert value to context PASID directory size field coding. */
1951#define context_pdts(pds) (((pds) & 0x7) << 9)
1952
64ae892b
DW
1953static int domain_context_mapping_one(struct dmar_domain *domain,
1954 struct intel_iommu *iommu,
ca6e322d 1955 struct pasid_table *table,
28ccce0d 1956 u8 bus, u8 devfn)
ba395927 1957{
c6c2cebd 1958 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1959 int translation = CONTEXT_TT_MULTI_LEVEL;
1960 struct device_domain_info *info = NULL;
ba395927 1961 struct context_entry *context;
ba395927 1962 unsigned long flags;
7373a8cc 1963 int ret;
28ccce0d 1964
c6c2cebd
JR
1965 WARN_ON(did == 0);
1966
28ccce0d
JR
1967 if (hw_pass_through && domain_type_is_si(domain))
1968 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1969
1970 pr_debug("Set context mapping for %02x:%02x.%d\n",
1971 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1972
ba395927 1973 BUG_ON(!domain->pgd);
5331fe6f 1974
55d94043
JR
1975 spin_lock_irqsave(&device_domain_lock, flags);
1976 spin_lock(&iommu->lock);
1977
1978 ret = -ENOMEM;
03ecc32c 1979 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1980 if (!context)
55d94043 1981 goto out_unlock;
ba395927 1982
55d94043
JR
1983 ret = 0;
1984 if (context_present(context))
1985 goto out_unlock;
cf484d0e 1986
aec0e861
XP
1987 /*
1988 * For kdump cases, old valid entries may be cached due to the
1989 * in-flight DMA and copied pgtable, but there is no unmapping
1990 * behaviour for them, thus we need an explicit cache flush for
1991 * the newly-mapped device. For kdump, at this point, the device
1992 * is supposed to finish reset at its driver probe stage, so no
1993 * in-flight DMA will exist, and we don't need to worry anymore
1994 * hereafter.
1995 */
1996 if (context_copied(context)) {
1997 u16 did_old = context_domain_id(context);
1998
b117e038 1999 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2000 iommu->flush.flush_context(iommu, did_old,
2001 (((u16)bus) << 8) | devfn,
2002 DMA_CCMD_MASK_NOBIT,
2003 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2004 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2005 DMA_TLB_DSI_FLUSH);
2006 }
aec0e861
XP
2007 }
2008
de24e553 2009 context_clear_entry(context);
ea6606b0 2010
7373a8cc
LB
2011 if (sm_supported(iommu)) {
2012 unsigned long pds;
4ed0d3e6 2013
7373a8cc
LB
2014 WARN_ON(!table);
2015
2016 /* Setup the PASID DIR pointer: */
2017 pds = context_get_sm_pds(table);
2018 context->lo = (u64)virt_to_phys(table->table) |
2019 context_pdts(pds);
2020
2021 /* Setup the RID_PASID field: */
2022 context_set_sm_rid2pasid(context, PASID_RID2PASID);
de24e553 2023
de24e553 2024 /*
7373a8cc
LB
2025 * Setup the Device-TLB enable bit and Page request
2026 * Enable bit:
de24e553 2027 */
7373a8cc
LB
2028 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2029 if (info && info->ats_supported)
2030 context_set_sm_dte(context);
2031 if (info && info->pri_supported)
2032 context_set_sm_pre(context);
2033 } else {
2034 struct dma_pte *pgd = domain->pgd;
2035 int agaw;
2036
2037 context_set_domain_id(context, did);
7373a8cc
LB
2038
2039 if (translation != CONTEXT_TT_PASS_THROUGH) {
2040 /*
2041 * Skip top levels of page tables for iommu which has
2042 * less agaw than default. Unnecessary for PT mode.
2043 */
2044 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2045 ret = -ENOMEM;
2046 pgd = phys_to_virt(dma_pte_addr(pgd));
2047 if (!dma_pte_present(pgd))
2048 goto out_unlock;
2049 }
2050
2051 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2052 if (info && info->ats_supported)
2053 translation = CONTEXT_TT_DEV_IOTLB;
2054 else
2055 translation = CONTEXT_TT_MULTI_LEVEL;
2056
2057 context_set_address_root(context, virt_to_phys(pgd));
2058 context_set_address_width(context, agaw);
2059 } else {
2060 /*
2061 * In pass through mode, AW must be programmed to
2062 * indicate the largest AGAW value supported by
2063 * hardware. And ASR is ignored by hardware.
2064 */
2065 context_set_address_width(context, iommu->msagaw);
2066 }
41b80db2
LB
2067
2068 context_set_translation_type(context, translation);
93a23a72 2069 }
4ed0d3e6 2070
c07e7d21
MM
2071 context_set_fault_enable(context);
2072 context_set_present(context);
5331fe6f 2073 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2074
4c25a2c1
DW
2075 /*
2076 * It's a non-present to present mapping. If hardware doesn't cache
2077 * non-present entry we only need to flush the write-buffer. If the
2078 * _does_ cache non-present entries, then it does so in the special
2079 * domain #0, which we have to flush:
2080 */
2081 if (cap_caching_mode(iommu->cap)) {
2082 iommu->flush.flush_context(iommu, 0,
2083 (((u16)bus) << 8) | devfn,
2084 DMA_CCMD_MASK_NOBIT,
2085 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2086 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2087 } else {
ba395927 2088 iommu_flush_write_buffer(iommu);
4c25a2c1 2089 }
93a23a72 2090 iommu_enable_dev_iotlb(info);
c7151a8d 2091
55d94043
JR
2092 ret = 0;
2093
2094out_unlock:
2095 spin_unlock(&iommu->lock);
2096 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2097
5c365d18 2098 return ret;
ba395927
KA
2099}
2100
579305f7
AW
2101struct domain_context_mapping_data {
2102 struct dmar_domain *domain;
2103 struct intel_iommu *iommu;
ca6e322d 2104 struct pasid_table *table;
579305f7
AW
2105};
2106
2107static int domain_context_mapping_cb(struct pci_dev *pdev,
2108 u16 alias, void *opaque)
2109{
2110 struct domain_context_mapping_data *data = opaque;
2111
2112 return domain_context_mapping_one(data->domain, data->iommu,
ca6e322d
LB
2113 data->table, PCI_BUS_NUM(alias),
2114 alias & 0xff);
579305f7
AW
2115}
2116
ba395927 2117static int
28ccce0d 2118domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2119{
ca6e322d
LB
2120 struct domain_context_mapping_data data;
2121 struct pasid_table *table;
64ae892b 2122 struct intel_iommu *iommu;
156baca8 2123 u8 bus, devfn;
64ae892b 2124
e1f167f3 2125 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2126 if (!iommu)
2127 return -ENODEV;
ba395927 2128
ca6e322d
LB
2129 table = intel_pasid_get_table(dev);
2130
579305f7 2131 if (!dev_is_pci(dev))
ca6e322d
LB
2132 return domain_context_mapping_one(domain, iommu, table,
2133 bus, devfn);
579305f7
AW
2134
2135 data.domain = domain;
2136 data.iommu = iommu;
ca6e322d 2137 data.table = table;
579305f7
AW
2138
2139 return pci_for_each_dma_alias(to_pci_dev(dev),
2140 &domain_context_mapping_cb, &data);
2141}
2142
2143static int domain_context_mapped_cb(struct pci_dev *pdev,
2144 u16 alias, void *opaque)
2145{
2146 struct intel_iommu *iommu = opaque;
2147
2148 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2149}
2150
e1f167f3 2151static int domain_context_mapped(struct device *dev)
ba395927 2152{
5331fe6f 2153 struct intel_iommu *iommu;
156baca8 2154 u8 bus, devfn;
5331fe6f 2155
e1f167f3 2156 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2157 if (!iommu)
2158 return -ENODEV;
ba395927 2159
579305f7
AW
2160 if (!dev_is_pci(dev))
2161 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2162
579305f7
AW
2163 return !pci_for_each_dma_alias(to_pci_dev(dev),
2164 domain_context_mapped_cb, iommu);
ba395927
KA
2165}
2166
f532959b
FY
2167/* Returns a number of VTD pages, but aligned to MM page size */
2168static inline unsigned long aligned_nrpages(unsigned long host_addr,
2169 size_t size)
2170{
2171 host_addr &= ~PAGE_MASK;
2172 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2173}
2174
6dd9a7c7
YS
2175/* Return largest possible superpage level for a given mapping */
2176static inline int hardware_largepage_caps(struct dmar_domain *domain,
2177 unsigned long iov_pfn,
2178 unsigned long phy_pfn,
2179 unsigned long pages)
2180{
2181 int support, level = 1;
2182 unsigned long pfnmerge;
2183
2184 support = domain->iommu_superpage;
2185
2186 /* To use a large page, the virtual *and* physical addresses
2187 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2188 of them will mean we have to use smaller pages. So just
2189 merge them and check both at once. */
2190 pfnmerge = iov_pfn | phy_pfn;
2191
2192 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2193 pages >>= VTD_STRIDE_SHIFT;
2194 if (!pages)
2195 break;
2196 pfnmerge >>= VTD_STRIDE_SHIFT;
2197 level++;
2198 support--;
2199 }
2200 return level;
2201}
2202
9051aa02
DW
2203static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2204 struct scatterlist *sg, unsigned long phys_pfn,
2205 unsigned long nr_pages, int prot)
e1605495
DW
2206{
2207 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2208 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2209 unsigned long sg_res = 0;
6dd9a7c7
YS
2210 unsigned int largepage_lvl = 0;
2211 unsigned long lvl_pages = 0;
e1605495 2212
162d1b10 2213 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2214
2215 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2216 return -EINVAL;
2217
2218 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2219
cc4f14aa
JL
2220 if (!sg) {
2221 sg_res = nr_pages;
9051aa02
DW
2222 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2223 }
2224
6dd9a7c7 2225 while (nr_pages > 0) {
c85994e4
DW
2226 uint64_t tmp;
2227
e1605495 2228 if (!sg_res) {
29a90b70
RM
2229 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2230
f532959b 2231 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2232 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2233 sg->dma_length = sg->length;
29a90b70 2234 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2235 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2236 }
6dd9a7c7 2237
e1605495 2238 if (!pte) {
6dd9a7c7
YS
2239 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2240
5cf0a76f 2241 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2242 if (!pte)
2243 return -ENOMEM;
6dd9a7c7 2244 /* It is large page*/
6491d4d0 2245 if (largepage_lvl > 1) {
ba2374fd
CZ
2246 unsigned long nr_superpages, end_pfn;
2247
6dd9a7c7 2248 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2249 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2250
2251 nr_superpages = sg_res / lvl_pages;
2252 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2253
d41a4adb
JL
2254 /*
2255 * Ensure that old small page tables are
ba2374fd 2256 * removed to make room for superpage(s).
bc24c571
DD
2257 * We're adding new large pages, so make sure
2258 * we don't remove their parent tables.
d41a4adb 2259 */
bc24c571
DD
2260 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2261 largepage_lvl + 1);
6491d4d0 2262 } else {
6dd9a7c7 2263 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2264 }
6dd9a7c7 2265
e1605495
DW
2266 }
2267 /* We don't need lock here, nobody else
2268 * touches the iova range
2269 */
7766a3fb 2270 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2271 if (tmp) {
1bf20f0d 2272 static int dumps = 5;
9f10e5bf
JR
2273 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2274 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2275 if (dumps) {
2276 dumps--;
2277 debug_dma_dump_mappings(NULL);
2278 }
2279 WARN_ON(1);
2280 }
6dd9a7c7
YS
2281
2282 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2283
2284 BUG_ON(nr_pages < lvl_pages);
2285 BUG_ON(sg_res < lvl_pages);
2286
2287 nr_pages -= lvl_pages;
2288 iov_pfn += lvl_pages;
2289 phys_pfn += lvl_pages;
2290 pteval += lvl_pages * VTD_PAGE_SIZE;
2291 sg_res -= lvl_pages;
2292
2293 /* If the next PTE would be the first in a new page, then we
2294 need to flush the cache on the entries we've just written.
2295 And then we'll need to recalculate 'pte', so clear it and
2296 let it get set again in the if (!pte) block above.
2297
2298 If we're done (!nr_pages) we need to flush the cache too.
2299
2300 Also if we've been setting superpages, we may need to
2301 recalculate 'pte' and switch back to smaller pages for the
2302 end of the mapping, if the trailing size is not enough to
2303 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2304 pte++;
6dd9a7c7
YS
2305 if (!nr_pages || first_pte_in_page(pte) ||
2306 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2307 domain_flush_cache(domain, first_pte,
2308 (void *)pte - (void *)first_pte);
2309 pte = NULL;
2310 }
6dd9a7c7
YS
2311
2312 if (!sg_res && nr_pages)
e1605495
DW
2313 sg = sg_next(sg);
2314 }
2315 return 0;
2316}
2317
87684fd9 2318static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
095303e0
LB
2319 struct scatterlist *sg, unsigned long phys_pfn,
2320 unsigned long nr_pages, int prot)
2321{
fa954e68 2322 int iommu_id, ret;
095303e0
LB
2323 struct intel_iommu *iommu;
2324
2325 /* Do the real mapping first */
2326 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2327 if (ret)
2328 return ret;
2329
fa954e68
LB
2330 for_each_domain_iommu(iommu_id, domain) {
2331 iommu = g_iommus[iommu_id];
095303e0
LB
2332 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2333 }
2334
2335 return 0;
87684fd9
PX
2336}
2337
9051aa02
DW
2338static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339 struct scatterlist *sg, unsigned long nr_pages,
2340 int prot)
ba395927 2341{
87684fd9 2342 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2343}
6f6a00e4 2344
9051aa02
DW
2345static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2346 unsigned long phys_pfn, unsigned long nr_pages,
2347 int prot)
2348{
87684fd9 2349 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2350}
2351
2452d9db 2352static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2353{
5082219b
FS
2354 unsigned long flags;
2355 struct context_entry *context;
2356 u16 did_old;
2357
c7151a8d
WH
2358 if (!iommu)
2359 return;
8c11e798 2360
5082219b
FS
2361 spin_lock_irqsave(&iommu->lock, flags);
2362 context = iommu_context_addr(iommu, bus, devfn, 0);
2363 if (!context) {
2364 spin_unlock_irqrestore(&iommu->lock, flags);
2365 return;
2366 }
2367 did_old = context_domain_id(context);
2368 context_clear_entry(context);
2369 __iommu_flush_cache(iommu, context, sizeof(*context));
2370 spin_unlock_irqrestore(&iommu->lock, flags);
2371 iommu->flush.flush_context(iommu,
2372 did_old,
2373 (((u16)bus) << 8) | devfn,
2374 DMA_CCMD_MASK_NOBIT,
2375 DMA_CCMD_DEVICE_INVL);
2376 iommu->flush.flush_iotlb(iommu,
2377 did_old,
2378 0,
2379 0,
2380 DMA_TLB_DSI_FLUSH);
ba395927
KA
2381}
2382
109b9b04
DW
2383static inline void unlink_domain_info(struct device_domain_info *info)
2384{
2385 assert_spin_locked(&device_domain_lock);
2386 list_del(&info->link);
2387 list_del(&info->global);
2388 if (info->dev)
0bcb3e28 2389 info->dev->archdata.iommu = NULL;
109b9b04
DW
2390}
2391
ba395927
KA
2392static void domain_remove_dev_info(struct dmar_domain *domain)
2393{
3a74ca01 2394 struct device_domain_info *info, *tmp;
fb170fb4 2395 unsigned long flags;
ba395927
KA
2396
2397 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2398 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2399 __dmar_remove_one_dev_info(info);
ba395927
KA
2400 spin_unlock_irqrestore(&device_domain_lock, flags);
2401}
2402
2403/*
2404 * find_domain
1525a29a 2405 * Note: we use struct device->archdata.iommu stores the info
ba395927 2406 */
1525a29a 2407static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2408{
2409 struct device_domain_info *info;
2410
2411 /* No lock here, assumes no domain exit in normal case */
1525a29a 2412 info = dev->archdata.iommu;
b316d02a 2413 if (likely(info))
ba395927
KA
2414 return info->domain;
2415 return NULL;
2416}
2417
5a8f40e8 2418static inline struct device_domain_info *
745f2586
JL
2419dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2420{
2421 struct device_domain_info *info;
2422
2423 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2424 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2425 info->devfn == devfn)
5a8f40e8 2426 return info;
745f2586
JL
2427
2428 return NULL;
2429}
2430
5db31569
JR
2431static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2432 int bus, int devfn,
2433 struct device *dev,
2434 struct dmar_domain *domain)
745f2586 2435{
5a8f40e8 2436 struct dmar_domain *found = NULL;
745f2586
JL
2437 struct device_domain_info *info;
2438 unsigned long flags;
d160aca5 2439 int ret;
745f2586
JL
2440
2441 info = alloc_devinfo_mem();
2442 if (!info)
b718cd3d 2443 return NULL;
745f2586 2444
745f2586
JL
2445 info->bus = bus;
2446 info->devfn = devfn;
b16d0cb9
DW
2447 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2448 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2449 info->ats_qdep = 0;
745f2586
JL
2450 info->dev = dev;
2451 info->domain = domain;
5a8f40e8 2452 info->iommu = iommu;
cc580e41 2453 info->pasid_table = NULL;
95587a75 2454 info->auxd_enabled = 0;
67b8e02b 2455 INIT_LIST_HEAD(&info->auxiliary_domains);
745f2586 2456
b16d0cb9
DW
2457 if (dev && dev_is_pci(dev)) {
2458 struct pci_dev *pdev = to_pci_dev(info->dev);
2459
d8b85910
LB
2460 if (!pdev->untrusted &&
2461 !pci_ats_disabled() &&
cef74409 2462 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2463 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2464 dmar_find_matched_atsr_unit(pdev))
2465 info->ats_supported = 1;
2466
765b6a98
LB
2467 if (sm_supported(iommu)) {
2468 if (pasid_supported(iommu)) {
b16d0cb9
DW
2469 int features = pci_pasid_features(pdev);
2470 if (features >= 0)
2471 info->pasid_supported = features | 1;
2472 }
2473
2474 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2475 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2476 info->pri_supported = 1;
2477 }
2478 }
2479
745f2586
JL
2480 spin_lock_irqsave(&device_domain_lock, flags);
2481 if (dev)
0bcb3e28 2482 found = find_domain(dev);
f303e507
JR
2483
2484 if (!found) {
5a8f40e8 2485 struct device_domain_info *info2;
41e80dca 2486 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2487 if (info2) {
2488 found = info2->domain;
2489 info2->dev = dev;
2490 }
5a8f40e8 2491 }
f303e507 2492
745f2586
JL
2493 if (found) {
2494 spin_unlock_irqrestore(&device_domain_lock, flags);
2495 free_devinfo_mem(info);
b718cd3d
DW
2496 /* Caller must free the original domain */
2497 return found;
745f2586
JL
2498 }
2499
d160aca5
JR
2500 spin_lock(&iommu->lock);
2501 ret = domain_attach_iommu(domain, iommu);
2502 spin_unlock(&iommu->lock);
2503
2504 if (ret) {
c6c2cebd 2505 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2506 free_devinfo_mem(info);
c6c2cebd
JR
2507 return NULL;
2508 }
c6c2cebd 2509
b718cd3d
DW
2510 list_add(&info->link, &domain->devices);
2511 list_add(&info->global, &device_domain_list);
2512 if (dev)
2513 dev->archdata.iommu = info;
0bbeb01a 2514 spin_unlock_irqrestore(&device_domain_lock, flags);
a7fc93fe 2515
0bbeb01a
LB
2516 /* PASID table is mandatory for a PCI device in scalable mode. */
2517 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
a7fc93fe
LB
2518 ret = intel_pasid_alloc_table(dev);
2519 if (ret) {
932a6523 2520 dev_err(dev, "PASID table allocation failed\n");
71753239 2521 dmar_remove_one_dev_info(dev);
0bbeb01a 2522 return NULL;
a7fc93fe 2523 }
ef848b7e
LB
2524
2525 /* Setup the PASID entry for requests without PASID: */
2526 spin_lock(&iommu->lock);
2527 if (hw_pass_through && domain_type_is_si(domain))
2528 ret = intel_pasid_setup_pass_through(iommu, domain,
2529 dev, PASID_RID2PASID);
2530 else
2531 ret = intel_pasid_setup_second_level(iommu, domain,
2532 dev, PASID_RID2PASID);
2533 spin_unlock(&iommu->lock);
2534 if (ret) {
932a6523 2535 dev_err(dev, "Setup RID2PASID failed\n");
71753239 2536 dmar_remove_one_dev_info(dev);
ef848b7e 2537 return NULL;
a7fc93fe
LB
2538 }
2539 }
b718cd3d 2540
cc4e2575 2541 if (dev && domain_context_mapping(domain, dev)) {
932a6523 2542 dev_err(dev, "Domain context map failed\n");
71753239 2543 dmar_remove_one_dev_info(dev);
cc4e2575
JR
2544 return NULL;
2545 }
2546
b718cd3d 2547 return domain;
745f2586
JL
2548}
2549
579305f7
AW
2550static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2551{
2552 *(u16 *)opaque = alias;
2553 return 0;
2554}
2555
76208356 2556static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2557{
e083ea5b 2558 struct device_domain_info *info;
76208356 2559 struct dmar_domain *domain = NULL;
579305f7 2560 struct intel_iommu *iommu;
fcc35c63 2561 u16 dma_alias;
ba395927 2562 unsigned long flags;
aa4d066a 2563 u8 bus, devfn;
ba395927 2564
579305f7
AW
2565 iommu = device_to_iommu(dev, &bus, &devfn);
2566 if (!iommu)
2567 return NULL;
2568
146922ec
DW
2569 if (dev_is_pci(dev)) {
2570 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2571
579305f7
AW
2572 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2573
2574 spin_lock_irqsave(&device_domain_lock, flags);
2575 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2576 PCI_BUS_NUM(dma_alias),
2577 dma_alias & 0xff);
2578 if (info) {
2579 iommu = info->iommu;
2580 domain = info->domain;
5a8f40e8 2581 }
579305f7 2582 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2583
76208356 2584 /* DMA alias already has a domain, use it */
579305f7 2585 if (info)
76208356 2586 goto out;
579305f7 2587 }
ba395927 2588
146922ec 2589 /* Allocate and initialize new domain for the device */
ab8dfe25 2590 domain = alloc_domain(0);
745f2586 2591 if (!domain)
579305f7 2592 return NULL;
dc534b25 2593 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2594 domain_exit(domain);
2595 return NULL;
2c2e2c38 2596 }
ba395927 2597
76208356 2598out:
579305f7 2599
76208356
JR
2600 return domain;
2601}
579305f7 2602
76208356
JR
2603static struct dmar_domain *set_domain_for_dev(struct device *dev,
2604 struct dmar_domain *domain)
2605{
2606 struct intel_iommu *iommu;
2607 struct dmar_domain *tmp;
2608 u16 req_id, dma_alias;
2609 u8 bus, devfn;
2610
2611 iommu = device_to_iommu(dev, &bus, &devfn);
2612 if (!iommu)
2613 return NULL;
2614
2615 req_id = ((u16)bus << 8) | devfn;
2616
2617 if (dev_is_pci(dev)) {
2618 struct pci_dev *pdev = to_pci_dev(dev);
2619
2620 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2621
2622 /* register PCI DMA alias device */
2623 if (req_id != dma_alias) {
2624 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2625 dma_alias & 0xff, NULL, domain);
2626
2627 if (!tmp || tmp != domain)
2628 return tmp;
2629 }
ba395927
KA
2630 }
2631
5db31569 2632 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2633 if (!tmp || tmp != domain)
2634 return tmp;
2635
2636 return domain;
2637}
579305f7 2638
76208356
JR
2639static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2640{
2641 struct dmar_domain *domain, *tmp;
2642
2643 domain = find_domain(dev);
2644 if (domain)
2645 goto out;
2646
2647 domain = find_or_alloc_domain(dev, gaw);
2648 if (!domain)
2649 goto out;
2650
2651 tmp = set_domain_for_dev(dev, domain);
2652 if (!tmp || domain != tmp) {
579305f7
AW
2653 domain_exit(domain);
2654 domain = tmp;
2655 }
b718cd3d 2656
76208356
JR
2657out:
2658
b718cd3d 2659 return domain;
ba395927
KA
2660}
2661
b213203e
DW
2662static int iommu_domain_identity_map(struct dmar_domain *domain,
2663 unsigned long long start,
2664 unsigned long long end)
ba395927 2665{
c5395d5c
DW
2666 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2667 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2668
2669 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2670 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2671 pr_err("Reserving iova failed\n");
b213203e 2672 return -ENOMEM;
ba395927
KA
2673 }
2674
af1089ce 2675 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2676 /*
2677 * RMRR range might have overlap with physical memory range,
2678 * clear it first
2679 */
c5395d5c 2680 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2681
87684fd9
PX
2682 return __domain_mapping(domain, first_vpfn, NULL,
2683 first_vpfn, last_vpfn - first_vpfn + 1,
2684 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2685}
2686
d66ce54b
JR
2687static int domain_prepare_identity_map(struct device *dev,
2688 struct dmar_domain *domain,
2689 unsigned long long start,
2690 unsigned long long end)
b213203e 2691{
19943b0e
DW
2692 /* For _hardware_ passthrough, don't bother. But for software
2693 passthrough, we do it anyway -- it may indicate a memory
2694 range which is reserved in E820, so which didn't get set
2695 up to start with in si_domain */
2696 if (domain == si_domain && hw_pass_through) {
932a6523
BH
2697 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2698 start, end);
19943b0e
DW
2699 return 0;
2700 }
2701
932a6523 2702 dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
9f10e5bf 2703
5595b528
DW
2704 if (end < start) {
2705 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2706 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2707 dmi_get_system_info(DMI_BIOS_VENDOR),
2708 dmi_get_system_info(DMI_BIOS_VERSION),
2709 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2710 return -EIO;
5595b528
DW
2711 }
2712
2ff729f5
DW
2713 if (end >> agaw_to_width(domain->agaw)) {
2714 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2715 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2716 agaw_to_width(domain->agaw),
2717 dmi_get_system_info(DMI_BIOS_VENDOR),
2718 dmi_get_system_info(DMI_BIOS_VERSION),
2719 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2720 return -EIO;
2ff729f5 2721 }
19943b0e 2722
d66ce54b
JR
2723 return iommu_domain_identity_map(domain, start, end);
2724}
ba395927 2725
d66ce54b
JR
2726static int iommu_prepare_identity_map(struct device *dev,
2727 unsigned long long start,
2728 unsigned long long end)
2729{
2730 struct dmar_domain *domain;
2731 int ret;
2732
2733 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2734 if (!domain)
2735 return -ENOMEM;
2736
2737 ret = domain_prepare_identity_map(dev, domain, start, end);
2738 if (ret)
2739 domain_exit(domain);
b213203e 2740
ba395927 2741 return ret;
ba395927
KA
2742}
2743
2744static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2745 struct device *dev)
ba395927 2746{
0b9d9753 2747 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2748 return 0;
0b9d9753
DW
2749 return iommu_prepare_identity_map(dev, rmrr->base_address,
2750 rmrr->end_address);
ba395927
KA
2751}
2752
d3f13810 2753#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2754static inline void iommu_prepare_isa(void)
2755{
2756 struct pci_dev *pdev;
2757 int ret;
2758
2759 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2760 if (!pdev)
2761 return;
2762
9f10e5bf 2763 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2764 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2765
2766 if (ret)
9f10e5bf 2767 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2768
9b27e82d 2769 pci_dev_put(pdev);
49a0429e
KA
2770}
2771#else
2772static inline void iommu_prepare_isa(void)
2773{
2774 return;
2775}
d3f13810 2776#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2777
2c2e2c38 2778static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2779
071e1374 2780static int __init si_domain_init(int hw)
2c2e2c38 2781{
4de354ec
LB
2782 struct dmar_rmrr_unit *rmrr;
2783 struct device *dev;
2784 int i, nid, ret;
2c2e2c38 2785
ab8dfe25 2786 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2787 if (!si_domain)
2788 return -EFAULT;
2789
2c2e2c38
FY
2790 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2791 domain_exit(si_domain);
2792 return -EFAULT;
2793 }
2794
19943b0e
DW
2795 if (hw)
2796 return 0;
2797
c7ab48d2 2798 for_each_online_node(nid) {
5dfe8660
TH
2799 unsigned long start_pfn, end_pfn;
2800 int i;
2801
2802 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2803 ret = iommu_domain_identity_map(si_domain,
2804 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2805 if (ret)
2806 return ret;
2807 }
c7ab48d2
DW
2808 }
2809
4de354ec
LB
2810 /*
2811 * Normally we use DMA domains for devices which have RMRRs. But we
2812 * loose this requirement for graphic and usb devices. Identity map
2813 * the RMRRs for graphic and USB devices so that they could use the
2814 * si_domain.
2815 */
2816 for_each_rmrr_units(rmrr) {
2817 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2818 i, dev) {
2819 unsigned long long start = rmrr->base_address;
2820 unsigned long long end = rmrr->end_address;
2821
2822 if (device_is_rmrr_locked(dev))
2823 continue;
2824
2825 if (WARN_ON(end < start ||
2826 end >> agaw_to_width(si_domain->agaw)))
2827 continue;
2828
2829 ret = iommu_domain_identity_map(si_domain, start, end);
2830 if (ret)
2831 return ret;
2832 }
2833 }
2834
2c2e2c38
FY
2835 return 0;
2836}
2837
9b226624 2838static int identity_mapping(struct device *dev)
2c2e2c38
FY
2839{
2840 struct device_domain_info *info;
2841
9b226624 2842 info = dev->archdata.iommu;
cb452a40
MT
2843 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2844 return (info->domain == si_domain);
2c2e2c38 2845
2c2e2c38
FY
2846 return 0;
2847}
2848
28ccce0d 2849static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2850{
0ac72664 2851 struct dmar_domain *ndomain;
5a8f40e8 2852 struct intel_iommu *iommu;
156baca8 2853 u8 bus, devfn;
2c2e2c38 2854
5913c9bf 2855 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2856 if (!iommu)
2857 return -ENODEV;
2858
5db31569 2859 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2860 if (ndomain != domain)
2861 return -EBUSY;
2c2e2c38
FY
2862
2863 return 0;
2864}
2865
0b9d9753 2866static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2867{
2868 struct dmar_rmrr_unit *rmrr;
832bd858 2869 struct device *tmp;
ea2447f7
TM
2870 int i;
2871
0e242612 2872 rcu_read_lock();
ea2447f7 2873 for_each_rmrr_units(rmrr) {
b683b230
JL
2874 /*
2875 * Return TRUE if this RMRR contains the device that
2876 * is passed in.
2877 */
2878 for_each_active_dev_scope(rmrr->devices,
2879 rmrr->devices_cnt, i, tmp)
0b9d9753 2880 if (tmp == dev) {
0e242612 2881 rcu_read_unlock();
ea2447f7 2882 return true;
b683b230 2883 }
ea2447f7 2884 }
0e242612 2885 rcu_read_unlock();
ea2447f7
TM
2886 return false;
2887}
2888
c875d2c1
AW
2889/*
2890 * There are a couple cases where we need to restrict the functionality of
2891 * devices associated with RMRRs. The first is when evaluating a device for
2892 * identity mapping because problems exist when devices are moved in and out
2893 * of domains and their respective RMRR information is lost. This means that
2894 * a device with associated RMRRs will never be in a "passthrough" domain.
2895 * The second is use of the device through the IOMMU API. This interface
2896 * expects to have full control of the IOVA space for the device. We cannot
2897 * satisfy both the requirement that RMRR access is maintained and have an
2898 * unencumbered IOVA space. We also have no ability to quiesce the device's
2899 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2900 * We therefore prevent devices associated with an RMRR from participating in
2901 * the IOMMU API, which eliminates them from device assignment.
2902 *
2903 * In both cases we assume that PCI USB devices with RMRRs have them largely
2904 * for historical reasons and that the RMRR space is not actively used post
2905 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2906 *
2907 * The same exception is made for graphics devices, with the requirement that
2908 * any use of the RMRR regions will be torn down before assigning the device
2909 * to a guest.
c875d2c1
AW
2910 */
2911static bool device_is_rmrr_locked(struct device *dev)
2912{
2913 if (!device_has_rmrr(dev))
2914 return false;
2915
2916 if (dev_is_pci(dev)) {
2917 struct pci_dev *pdev = to_pci_dev(dev);
2918
18436afd 2919 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2920 return false;
2921 }
2922
2923 return true;
2924}
2925
f273a453
LB
2926/*
2927 * Return the required default domain type for a specific device.
2928 *
2929 * @dev: the device in query
2930 * @startup: true if this is during early boot
2931 *
2932 * Returns:
2933 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2934 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2935 * - 0: both identity and dynamic domains work for this device
2936 */
2937static int device_def_domain_type(struct device *dev, int startup)
6941af28 2938{
3bdb2591
DW
2939 if (dev_is_pci(dev)) {
2940 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2941
c875d2c1 2942 if (device_is_rmrr_locked(dev))
f273a453 2943 return IOMMU_DOMAIN_DMA;
e0fc7e0b 2944
89a6079d
LB
2945 /*
2946 * Prevent any device marked as untrusted from getting
2947 * placed into the statically identity mapping domain.
2948 */
2949 if (pdev->untrusted)
f273a453 2950 return IOMMU_DOMAIN_DMA;
89a6079d 2951
3bdb2591 2952 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
f273a453 2953 return IOMMU_DOMAIN_IDENTITY;
e0fc7e0b 2954
3bdb2591 2955 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
f273a453 2956 return IOMMU_DOMAIN_IDENTITY;
3bdb2591
DW
2957
2958 /*
2959 * We want to start off with all devices in the 1:1 domain, and
2960 * take them out later if we find they can't access all of memory.
2961 *
2962 * However, we can't do this for PCI devices behind bridges,
2963 * because all PCI devices behind the same bridge will end up
2964 * with the same source-id on their transactions.
2965 *
2966 * Practically speaking, we can't change things around for these
2967 * devices at run-time, because we can't be sure there'll be no
2968 * DMA transactions in flight for any of their siblings.
2969 *
2970 * So PCI devices (unless they're on the root bus) as well as
2971 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2972 * the 1:1 domain, just in _case_ one of their siblings turns out
2973 * not to be able to map all of memory.
2974 */
2975 if (!pci_is_pcie(pdev)) {
2976 if (!pci_is_root_bus(pdev->bus))
f273a453 2977 return IOMMU_DOMAIN_DMA;
3bdb2591 2978 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
f273a453 2979 return IOMMU_DOMAIN_DMA;
3bdb2591 2980 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
f273a453 2981 return IOMMU_DOMAIN_DMA;
3bdb2591
DW
2982 } else {
2983 if (device_has_rmrr(dev))
f273a453 2984 return IOMMU_DOMAIN_DMA;
3bdb2591 2985 }
3dfc813d 2986
f273a453
LB
2987 return (iommu_identity_mapping & IDENTMAP_ALL) ?
2988 IOMMU_DOMAIN_IDENTITY : 0;
2989}
2990
2991static inline int iommu_should_identity_map(struct device *dev, int startup)
2992{
2993 return device_def_domain_type(dev, startup) == IOMMU_DOMAIN_IDENTITY;
6941af28
DW
2994}
2995
cf04eee8
DW
2996static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2997{
2998 int ret;
2999
3000 if (!iommu_should_identity_map(dev, 1))
3001 return 0;
3002
28ccce0d 3003 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 3004 if (!ret)
932a6523
BH
3005 dev_info(dev, "%s identity mapping\n",
3006 hw ? "Hardware" : "Software");
cf04eee8
DW
3007 else if (ret == -ENODEV)
3008 /* device not associated with an iommu */
3009 ret = 0;
3010
3011 return ret;
3012}
3013
3014
071e1374 3015static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 3016{
2c2e2c38 3017 struct pci_dev *pdev = NULL;
cf04eee8
DW
3018 struct dmar_drhd_unit *drhd;
3019 struct intel_iommu *iommu;
3020 struct device *dev;
3021 int i;
3022 int ret = 0;
2c2e2c38 3023
2c2e2c38 3024 for_each_pci_dev(pdev) {
cf04eee8
DW
3025 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3026 if (ret)
3027 return ret;
3028 }
3029
3030 for_each_active_iommu(iommu, drhd)
3031 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3032 struct acpi_device_physical_node *pn;
3033 struct acpi_device *adev;
3034
3035 if (dev->bus != &acpi_bus_type)
3036 continue;
86080ccc 3037
cf04eee8
DW
3038 adev= to_acpi_device(dev);
3039 mutex_lock(&adev->physical_node_lock);
3040 list_for_each_entry(pn, &adev->physical_node_list, node) {
3041 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3042 if (ret)
3043 break;
eae460b6 3044 }
cf04eee8
DW
3045 mutex_unlock(&adev->physical_node_lock);
3046 if (ret)
3047 return ret;
62edf5dc 3048 }
2c2e2c38
FY
3049
3050 return 0;
3051}
3052
ffebeb46
JL
3053static void intel_iommu_init_qi(struct intel_iommu *iommu)
3054{
3055 /*
3056 * Start from the sane iommu hardware state.
3057 * If the queued invalidation is already initialized by us
3058 * (for example, while enabling interrupt-remapping) then
3059 * we got the things already rolling from a sane state.
3060 */
3061 if (!iommu->qi) {
3062 /*
3063 * Clear any previous faults.
3064 */
3065 dmar_fault(-1, iommu);
3066 /*
3067 * Disable queued invalidation if supported and already enabled
3068 * before OS handover.
3069 */
3070 dmar_disable_qi(iommu);
3071 }
3072
3073 if (dmar_enable_qi(iommu)) {
3074 /*
3075 * Queued Invalidate not enabled, use Register Based Invalidate
3076 */
3077 iommu->flush.flush_context = __iommu_flush_context;
3078 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3079 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3080 iommu->name);
3081 } else {
3082 iommu->flush.flush_context = qi_flush_context;
3083 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3084 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3085 }
3086}
3087
091d42e4 3088static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3089 struct root_entry *old_re,
091d42e4
JR
3090 struct context_entry **tbl,
3091 int bus, bool ext)
3092{
dbcd861f 3093 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3094 struct context_entry *new_ce = NULL, ce;
dfddb969 3095 struct context_entry *old_ce = NULL;
543c8dcf 3096 struct root_entry re;
091d42e4
JR
3097 phys_addr_t old_ce_phys;
3098
3099 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3100 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3101
3102 for (devfn = 0; devfn < 256; devfn++) {
3103 /* First calculate the correct index */
3104 idx = (ext ? devfn * 2 : devfn) % 256;
3105
3106 if (idx == 0) {
3107 /* First save what we may have and clean up */
3108 if (new_ce) {
3109 tbl[tbl_idx] = new_ce;
3110 __iommu_flush_cache(iommu, new_ce,
3111 VTD_PAGE_SIZE);
3112 pos = 1;
3113 }
3114
3115 if (old_ce)
829383e1 3116 memunmap(old_ce);
091d42e4
JR
3117
3118 ret = 0;
3119 if (devfn < 0x80)
543c8dcf 3120 old_ce_phys = root_entry_lctp(&re);
091d42e4 3121 else
543c8dcf 3122 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3123
3124 if (!old_ce_phys) {
3125 if (ext && devfn == 0) {
3126 /* No LCTP, try UCTP */
3127 devfn = 0x7f;
3128 continue;
3129 } else {
3130 goto out;
3131 }
3132 }
3133
3134 ret = -ENOMEM;
dfddb969
DW
3135 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3136 MEMREMAP_WB);
091d42e4
JR
3137 if (!old_ce)
3138 goto out;
3139
3140 new_ce = alloc_pgtable_page(iommu->node);
3141 if (!new_ce)
3142 goto out_unmap;
3143
3144 ret = 0;
3145 }
3146
3147 /* Now copy the context entry */
dfddb969 3148 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3149
cf484d0e 3150 if (!__context_present(&ce))
091d42e4
JR
3151 continue;
3152
dbcd861f
JR
3153 did = context_domain_id(&ce);
3154 if (did >= 0 && did < cap_ndoms(iommu->cap))
3155 set_bit(did, iommu->domain_ids);
3156
cf484d0e
JR
3157 /*
3158 * We need a marker for copied context entries. This
3159 * marker needs to work for the old format as well as
3160 * for extended context entries.
3161 *
3162 * Bit 67 of the context entry is used. In the old
3163 * format this bit is available to software, in the
3164 * extended format it is the PGE bit, but PGE is ignored
3165 * by HW if PASIDs are disabled (and thus still
3166 * available).
3167 *
3168 * So disable PASIDs first and then mark the entry
3169 * copied. This means that we don't copy PASID
3170 * translations from the old kernel, but this is fine as
3171 * faults there are not fatal.
3172 */
3173 context_clear_pasid_enable(&ce);
3174 context_set_copied(&ce);
3175
091d42e4
JR
3176 new_ce[idx] = ce;
3177 }
3178
3179 tbl[tbl_idx + pos] = new_ce;
3180
3181 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3182
3183out_unmap:
dfddb969 3184 memunmap(old_ce);
091d42e4
JR
3185
3186out:
3187 return ret;
3188}
3189
3190static int copy_translation_tables(struct intel_iommu *iommu)
3191{
3192 struct context_entry **ctxt_tbls;
dfddb969 3193 struct root_entry *old_rt;
091d42e4
JR
3194 phys_addr_t old_rt_phys;
3195 int ctxt_table_entries;
3196 unsigned long flags;
3197 u64 rtaddr_reg;
3198 int bus, ret;
c3361f2f 3199 bool new_ext, ext;
091d42e4
JR
3200
3201 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3202 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3203 new_ext = !!ecap_ecs(iommu->ecap);
3204
3205 /*
3206 * The RTT bit can only be changed when translation is disabled,
3207 * but disabling translation means to open a window for data
3208 * corruption. So bail out and don't copy anything if we would
3209 * have to change the bit.
3210 */
3211 if (new_ext != ext)
3212 return -EINVAL;
091d42e4
JR
3213
3214 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3215 if (!old_rt_phys)
3216 return -EINVAL;
3217
dfddb969 3218 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3219 if (!old_rt)
3220 return -ENOMEM;
3221
3222 /* This is too big for the stack - allocate it from slab */
3223 ctxt_table_entries = ext ? 512 : 256;
3224 ret = -ENOMEM;
6396bb22 3225 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3226 if (!ctxt_tbls)
3227 goto out_unmap;
3228
3229 for (bus = 0; bus < 256; bus++) {
3230 ret = copy_context_table(iommu, &old_rt[bus],
3231 ctxt_tbls, bus, ext);
3232 if (ret) {
3233 pr_err("%s: Failed to copy context table for bus %d\n",
3234 iommu->name, bus);
3235 continue;
3236 }
3237 }
3238
3239 spin_lock_irqsave(&iommu->lock, flags);
3240
3241 /* Context tables are copied, now write them to the root_entry table */
3242 for (bus = 0; bus < 256; bus++) {
3243 int idx = ext ? bus * 2 : bus;
3244 u64 val;
3245
3246 if (ctxt_tbls[idx]) {
3247 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3248 iommu->root_entry[bus].lo = val;
3249 }
3250
3251 if (!ext || !ctxt_tbls[idx + 1])
3252 continue;
3253
3254 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3255 iommu->root_entry[bus].hi = val;
3256 }
3257
3258 spin_unlock_irqrestore(&iommu->lock, flags);
3259
3260 kfree(ctxt_tbls);
3261
3262 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3263
3264 ret = 0;
3265
3266out_unmap:
dfddb969 3267 memunmap(old_rt);
091d42e4
JR
3268
3269 return ret;
3270}
3271
b779260b 3272static int __init init_dmars(void)
ba395927
KA
3273{
3274 struct dmar_drhd_unit *drhd;
3275 struct dmar_rmrr_unit *rmrr;
a87f4918 3276 bool copied_tables = false;
832bd858 3277 struct device *dev;
ba395927 3278 struct intel_iommu *iommu;
13cf0174 3279 int i, ret;
2c2e2c38 3280
ba395927
KA
3281 /*
3282 * for each drhd
3283 * allocate root
3284 * initialize and program root entry to not present
3285 * endfor
3286 */
3287 for_each_drhd_unit(drhd) {
5e0d2a6f 3288 /*
3289 * lock not needed as this is only incremented in the single
3290 * threaded kernel __init code path all other access are read
3291 * only
3292 */
78d8e704 3293 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3294 g_num_of_iommus++;
3295 continue;
3296 }
9f10e5bf 3297 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3298 }
3299
ffebeb46
JL
3300 /* Preallocate enough resources for IOMMU hot-addition */
3301 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3302 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3303
d9630fe9
WH
3304 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3305 GFP_KERNEL);
3306 if (!g_iommus) {
9f10e5bf 3307 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3308 ret = -ENOMEM;
3309 goto error;
3310 }
3311
7c919779 3312 for_each_active_iommu(iommu, drhd) {
56283174
LB
3313 /*
3314 * Find the max pasid size of all IOMMU's in the system.
3315 * We need to ensure the system pasid table is no bigger
3316 * than the smallest supported.
3317 */
765b6a98 3318 if (pasid_supported(iommu)) {
56283174
LB
3319 u32 temp = 2 << ecap_pss(iommu->ecap);
3320
3321 intel_pasid_max_id = min_t(u32, temp,
3322 intel_pasid_max_id);
3323 }
3324
d9630fe9 3325 g_iommus[iommu->seq_id] = iommu;
ba395927 3326
b63d80d1
JR
3327 intel_iommu_init_qi(iommu);
3328
e61d98d8
SS
3329 ret = iommu_init_domains(iommu);
3330 if (ret)
989d51fc 3331 goto free_iommu;
e61d98d8 3332
4158c2ec
JR
3333 init_translation_status(iommu);
3334
091d42e4
JR
3335 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3336 iommu_disable_translation(iommu);
3337 clear_translation_pre_enabled(iommu);
3338 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3339 iommu->name);
3340 }
4158c2ec 3341
ba395927
KA
3342 /*
3343 * TBD:
3344 * we could share the same root & context tables
25985edc 3345 * among all IOMMU's. Need to Split it later.
ba395927
KA
3346 */
3347 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3348 if (ret)
989d51fc 3349 goto free_iommu;
5f0a7f76 3350
091d42e4
JR
3351 if (translation_pre_enabled(iommu)) {
3352 pr_info("Translation already enabled - trying to copy translation structures\n");
3353
3354 ret = copy_translation_tables(iommu);
3355 if (ret) {
3356 /*
3357 * We found the IOMMU with translation
3358 * enabled - but failed to copy over the
3359 * old root-entry table. Try to proceed
3360 * by disabling translation now and
3361 * allocating a clean root-entry table.
3362 * This might cause DMAR faults, but
3363 * probably the dump will still succeed.
3364 */
3365 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3366 iommu->name);
3367 iommu_disable_translation(iommu);
3368 clear_translation_pre_enabled(iommu);
3369 } else {
3370 pr_info("Copied translation tables from previous kernel for %s\n",
3371 iommu->name);
a87f4918 3372 copied_tables = true;
091d42e4
JR
3373 }
3374 }
3375
4ed0d3e6 3376 if (!ecap_pass_through(iommu->ecap))
19943b0e 3377 hw_pass_through = 0;
8a94ade4 3378#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3379 if (pasid_supported(iommu))
d9737953 3380 intel_svm_init(iommu);
8a94ade4 3381#endif
ba395927
KA
3382 }
3383
a4c34ff1
JR
3384 /*
3385 * Now that qi is enabled on all iommus, set the root entry and flush
3386 * caches. This is required on some Intel X58 chipsets, otherwise the
3387 * flush_context function will loop forever and the boot hangs.
3388 */
3389 for_each_active_iommu(iommu, drhd) {
3390 iommu_flush_write_buffer(iommu);
3391 iommu_set_root_entry(iommu);
3392 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3393 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3394 }
3395
19943b0e 3396 if (iommu_pass_through)
e0fc7e0b
DW
3397 iommu_identity_mapping |= IDENTMAP_ALL;
3398
d3f13810 3399#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
5daab580 3400 dmar_map_gfx = 0;
19943b0e 3401#endif
e0fc7e0b 3402
5daab580
LB
3403 if (!dmar_map_gfx)
3404 iommu_identity_mapping |= IDENTMAP_GFX;
3405
21e722c4
AR
3406 check_tylersburg_isoch();
3407
4de354ec
LB
3408 ret = si_domain_init(hw_pass_through);
3409 if (ret)
3410 goto free_iommu;
86080ccc 3411
e0fc7e0b 3412
a87f4918
JR
3413 /*
3414 * If we copied translations from a previous kernel in the kdump
3415 * case, we can not assign the devices to domains now, as that
3416 * would eliminate the old mappings. So skip this part and defer
3417 * the assignment to device driver initialization time.
3418 */
3419 if (copied_tables)
3420 goto domains_done;
3421
ba395927 3422 /*
19943b0e
DW
3423 * If pass through is not set or not enabled, setup context entries for
3424 * identity mappings for rmrr, gfx, and isa and may fall back to static
3425 * identity mapping if iommu_identity_mapping is set.
ba395927 3426 */
19943b0e
DW
3427 if (iommu_identity_mapping) {
3428 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3429 if (ret) {
9f10e5bf 3430 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3431 goto free_iommu;
ba395927
KA
3432 }
3433 }
ba395927 3434 /*
19943b0e
DW
3435 * For each rmrr
3436 * for each dev attached to rmrr
3437 * do
3438 * locate drhd for dev, alloc domain for dev
3439 * allocate free domain
3440 * allocate page table entries for rmrr
3441 * if context not allocated for bus
3442 * allocate and init context
3443 * set present in root table for this bus
3444 * init context with domain, translation etc
3445 * endfor
3446 * endfor
ba395927 3447 */
9f10e5bf 3448 pr_info("Setting RMRR:\n");
19943b0e 3449 for_each_rmrr_units(rmrr) {
b683b230
JL
3450 /* some BIOS lists non-exist devices in DMAR table. */
3451 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3452 i, dev) {
0b9d9753 3453 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3454 if (ret)
9f10e5bf 3455 pr_err("Mapping reserved region failed\n");
ba395927 3456 }
4ed0d3e6 3457 }
49a0429e 3458
19943b0e
DW
3459 iommu_prepare_isa();
3460
a87f4918
JR
3461domains_done:
3462
ba395927
KA
3463 /*
3464 * for each drhd
3465 * enable fault log
3466 * global invalidate context cache
3467 * global invalidate iotlb
3468 * enable translation
3469 */
7c919779 3470 for_each_iommu(iommu, drhd) {
51a63e67
JC
3471 if (drhd->ignored) {
3472 /*
3473 * we always have to disable PMRs or DMA may fail on
3474 * this device
3475 */
3476 if (force_on)
7c919779 3477 iommu_disable_protect_mem_regions(iommu);
ba395927 3478 continue;
51a63e67 3479 }
ba395927
KA
3480
3481 iommu_flush_write_buffer(iommu);
3482
a222a7f0 3483#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 3484 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a7755c3c
LB
3485 /*
3486 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3487 * could cause possible lock race condition.
3488 */
3489 up_write(&dmar_global_lock);
a222a7f0 3490 ret = intel_svm_enable_prq(iommu);
a7755c3c 3491 down_write(&dmar_global_lock);
a222a7f0
DW
3492 if (ret)
3493 goto free_iommu;
3494 }
3495#endif
3460a6d9
KA
3496 ret = dmar_set_interrupt(iommu);
3497 if (ret)
989d51fc 3498 goto free_iommu;
ba395927
KA
3499 }
3500
3501 return 0;
989d51fc
JL
3502
3503free_iommu:
ffebeb46
JL
3504 for_each_active_iommu(iommu, drhd) {
3505 disable_dmar_iommu(iommu);
a868e6b7 3506 free_dmar_iommu(iommu);
ffebeb46 3507 }
13cf0174 3508
d9630fe9 3509 kfree(g_iommus);
13cf0174 3510
989d51fc 3511error:
ba395927
KA
3512 return ret;
3513}
3514
5a5e02a6 3515/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3516static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3517 struct dmar_domain *domain,
3518 unsigned long nrpages, uint64_t dma_mask)
ba395927 3519{
e083ea5b 3520 unsigned long iova_pfn;
ba395927 3521
875764de
DW
3522 /* Restrict dma_mask to the width that the iommu can handle */
3523 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3524 /* Ensure we reserve the whole size-aligned region */
3525 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3526
3527 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3528 /*
3529 * First try to allocate an io virtual address in
284901a9 3530 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3531 * from higher range
ba395927 3532 */
22e2f9fa 3533 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3534 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3535 if (iova_pfn)
3536 return iova_pfn;
875764de 3537 }
538d5b33
TN
3538 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3539 IOVA_PFN(dma_mask), true);
22e2f9fa 3540 if (unlikely(!iova_pfn)) {
932a6523 3541 dev_err(dev, "Allocating %ld-page iova failed", nrpages);
2aac6304 3542 return 0;
f76aec76
KA
3543 }
3544
22e2f9fa 3545 return iova_pfn;
f76aec76
KA
3546}
3547
9ddbfb42 3548struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3549{
1c5ebba9 3550 struct dmar_domain *domain, *tmp;
b1ce5b79 3551 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3552 struct device *i_dev;
3553 int i, ret;
f76aec76 3554
1c5ebba9
JR
3555 domain = find_domain(dev);
3556 if (domain)
3557 goto out;
3558
3559 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3560 if (!domain)
3561 goto out;
ba395927 3562
b1ce5b79
JR
3563 /* We have a new domain - setup possible RMRRs for the device */
3564 rcu_read_lock();
3565 for_each_rmrr_units(rmrr) {
3566 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3567 i, i_dev) {
3568 if (i_dev != dev)
3569 continue;
3570
3571 ret = domain_prepare_identity_map(dev, domain,
3572 rmrr->base_address,
3573 rmrr->end_address);
3574 if (ret)
3575 dev_err(dev, "Mapping reserved region failed\n");
3576 }
3577 }
3578 rcu_read_unlock();
3579
1c5ebba9
JR
3580 tmp = set_domain_for_dev(dev, domain);
3581 if (!tmp || domain != tmp) {
3582 domain_exit(domain);
3583 domain = tmp;
3584 }
3585
3586out:
3587
3588 if (!domain)
932a6523 3589 dev_err(dev, "Allocating domain failed\n");
1c5ebba9
JR
3590
3591
f76aec76
KA
3592 return domain;
3593}
3594
ecb509ec 3595/* Check if the dev needs to go through non-identity map and unmap process.*/
48b2c937 3596static bool iommu_need_mapping(struct device *dev)
2c2e2c38 3597{
98b2fffb 3598 int ret;
2c2e2c38 3599
3d89194a 3600 if (iommu_dummy(dev))
48b2c937 3601 return false;
1e4c64c4 3602
98b2fffb
LB
3603 ret = identity_mapping(dev);
3604 if (ret) {
3605 u64 dma_mask = *dev->dma_mask;
3606
3607 if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3608 dma_mask = dev->coherent_dma_mask;
3609
3610 if (dma_mask >= dma_get_required_mask(dev))
48b2c937
CH
3611 return false;
3612
3613 /*
3614 * 32 bit DMA is removed from si_domain and fall back to
3615 * non-identity mapping.
3616 */
3617 dmar_remove_one_dev_info(dev);
98b2fffb
LB
3618 ret = iommu_request_dma_domain_for_dev(dev);
3619 if (ret) {
3620 struct iommu_domain *domain;
3621 struct dmar_domain *dmar_domain;
3622
3623 domain = iommu_get_domain_for_dev(dev);
3624 if (domain) {
3625 dmar_domain = to_dmar_domain(domain);
3626 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3627 }
3628 get_valid_domain_for_dev(dev);
2c2e2c38 3629 }
98b2fffb
LB
3630
3631 dev_info(dev, "32bit DMA uses non-identity mapping\n");
2c2e2c38
FY
3632 }
3633
48b2c937 3634 return true;
2c2e2c38
FY
3635}
3636
21d5d27c
LG
3637static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3638 size_t size, int dir, u64 dma_mask)
f76aec76 3639{
f76aec76 3640 struct dmar_domain *domain;
5b6985ce 3641 phys_addr_t start_paddr;
2aac6304 3642 unsigned long iova_pfn;
f76aec76 3643 int prot = 0;
6865f0d1 3644 int ret;
8c11e798 3645 struct intel_iommu *iommu;
33041ec0 3646 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3647
3648 BUG_ON(dir == DMA_NONE);
2c2e2c38 3649
5040a918 3650 domain = get_valid_domain_for_dev(dev);
f76aec76 3651 if (!domain)
524a669b 3652 return DMA_MAPPING_ERROR;
f76aec76 3653
8c11e798 3654 iommu = domain_get_iommu(domain);
88cb6a74 3655 size = aligned_nrpages(paddr, size);
f76aec76 3656
2aac6304
OP
3657 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3658 if (!iova_pfn)
f76aec76
KA
3659 goto error;
3660
ba395927
KA
3661 /*
3662 * Check if DMAR supports zero-length reads on write only
3663 * mappings..
3664 */
3665 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3666 !cap_zlr(iommu->cap))
ba395927
KA
3667 prot |= DMA_PTE_READ;
3668 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3669 prot |= DMA_PTE_WRITE;
3670 /*
6865f0d1 3671 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3672 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3673 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3674 * is not a big problem
3675 */
2aac6304 3676 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3677 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3678 if (ret)
3679 goto error;
3680
2aac6304 3681 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3682 start_paddr += paddr & ~PAGE_MASK;
3683 return start_paddr;
ba395927 3684
ba395927 3685error:
2aac6304 3686 if (iova_pfn)
22e2f9fa 3687 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
932a6523
BH
3688 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3689 size, (unsigned long long)paddr, dir);
524a669b 3690 return DMA_MAPPING_ERROR;
ba395927
KA
3691}
3692
ffbbef5c
FT
3693static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3694 unsigned long offset, size_t size,
3695 enum dma_data_direction dir,
00085f1e 3696 unsigned long attrs)
bb9e6d65 3697{
9cc0c2af
CH
3698 if (iommu_need_mapping(dev))
3699 return __intel_map_single(dev, page_to_phys(page) + offset,
3700 size, dir, *dev->dma_mask);
3701 return dma_direct_map_page(dev, page, offset, size, dir, attrs);
21d5d27c
LG
3702}
3703
3704static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3705 size_t size, enum dma_data_direction dir,
3706 unsigned long attrs)
3707{
9cc0c2af
CH
3708 if (iommu_need_mapping(dev))
3709 return __intel_map_single(dev, phys_addr, size, dir,
3710 *dev->dma_mask);
3711 return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
bb9e6d65
FT
3712}
3713
769530e4 3714static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3715{
f76aec76 3716 struct dmar_domain *domain;
d794dc9b 3717 unsigned long start_pfn, last_pfn;
769530e4 3718 unsigned long nrpages;
2aac6304 3719 unsigned long iova_pfn;
8c11e798 3720 struct intel_iommu *iommu;
ea8ea460 3721 struct page *freelist;
f7b0c4ce 3722 struct pci_dev *pdev = NULL;
ba395927 3723
1525a29a 3724 domain = find_domain(dev);
ba395927
KA
3725 BUG_ON(!domain);
3726
8c11e798
WH
3727 iommu = domain_get_iommu(domain);
3728
2aac6304 3729 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3730
769530e4 3731 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3732 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3733 last_pfn = start_pfn + nrpages - 1;
ba395927 3734
f7b0c4ce
LB
3735 if (dev_is_pci(dev))
3736 pdev = to_pci_dev(dev);
3737
932a6523 3738 dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
ba395927 3739
ea8ea460 3740 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3741
f7b0c4ce 3742 if (intel_iommu_strict || (pdev && pdev->untrusted)) {
a1ddcbe9 3743 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3744 nrpages, !freelist, 0);
5e0d2a6f 3745 /* free iova */
22e2f9fa 3746 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3747 dma_free_pagelist(freelist);
5e0d2a6f 3748 } else {
13cf0174
JR
3749 queue_iova(&domain->iovad, iova_pfn, nrpages,
3750 (unsigned long)freelist);
5e0d2a6f 3751 /*
3752 * queue up the release of the unmap to save the 1/6th of the
3753 * cpu used up by the iotlb flush operation...
3754 */
5e0d2a6f 3755 }
ba395927
KA
3756}
3757
d41a4adb
JL
3758static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3759 size_t size, enum dma_data_direction dir,
00085f1e 3760 unsigned long attrs)
d41a4adb 3761{
9cc0c2af
CH
3762 if (iommu_need_mapping(dev))
3763 intel_unmap(dev, dev_addr, size);
3764 else
3765 dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3766}
3767
3768static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3769 size_t size, enum dma_data_direction dir, unsigned long attrs)
3770{
3771 if (iommu_need_mapping(dev))
3772 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3773}
3774
5040a918 3775static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3776 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3777 unsigned long attrs)
ba395927 3778{
7ec916f8
CH
3779 struct page *page = NULL;
3780 int order;
ba395927 3781
9cc0c2af
CH
3782 if (!iommu_need_mapping(dev))
3783 return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3784
7ec916f8
CH
3785 size = PAGE_ALIGN(size);
3786 order = get_order(size);
7ec916f8
CH
3787
3788 if (gfpflags_allow_blocking(flags)) {
3789 unsigned int count = size >> PAGE_SHIFT;
3790
d834c5ab
MS
3791 page = dma_alloc_from_contiguous(dev, count, order,
3792 flags & __GFP_NOWARN);
7ec916f8
CH
3793 }
3794
3795 if (!page)
3796 page = alloc_pages(flags, order);
3797 if (!page)
3798 return NULL;
3799 memset(page_address(page), 0, size);
3800
21d5d27c
LG
3801 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3802 DMA_BIDIRECTIONAL,
3803 dev->coherent_dma_mask);
524a669b 3804 if (*dma_handle != DMA_MAPPING_ERROR)
7ec916f8
CH
3805 return page_address(page);
3806 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3807 __free_pages(page, order);
36746436 3808
ba395927
KA
3809 return NULL;
3810}
3811
5040a918 3812static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3813 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3814{
7ec916f8
CH
3815 int order;
3816 struct page *page = virt_to_page(vaddr);
3817
9cc0c2af
CH
3818 if (!iommu_need_mapping(dev))
3819 return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3820
7ec916f8
CH
3821 size = PAGE_ALIGN(size);
3822 order = get_order(size);
3823
3824 intel_unmap(dev, dma_handle, size);
3825 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3826 __free_pages(page, order);
ba395927
KA
3827}
3828
5040a918 3829static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3830 int nelems, enum dma_data_direction dir,
00085f1e 3831 unsigned long attrs)
ba395927 3832{
769530e4
OP
3833 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3834 unsigned long nrpages = 0;
3835 struct scatterlist *sg;
3836 int i;
3837
9cc0c2af
CH
3838 if (!iommu_need_mapping(dev))
3839 return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3840
769530e4
OP
3841 for_each_sg(sglist, sg, nelems, i) {
3842 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3843 }
3844
3845 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3846}
3847
5040a918 3848static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3849 enum dma_data_direction dir, unsigned long attrs)
ba395927 3850{
ba395927 3851 int i;
ba395927 3852 struct dmar_domain *domain;
f76aec76
KA
3853 size_t size = 0;
3854 int prot = 0;
2aac6304 3855 unsigned long iova_pfn;
f76aec76 3856 int ret;
c03ab37c 3857 struct scatterlist *sg;
b536d24d 3858 unsigned long start_vpfn;
8c11e798 3859 struct intel_iommu *iommu;
ba395927
KA
3860
3861 BUG_ON(dir == DMA_NONE);
48b2c937 3862 if (!iommu_need_mapping(dev))
9cc0c2af 3863 return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
ba395927 3864
5040a918 3865 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3866 if (!domain)
3867 return 0;
3868
8c11e798
WH
3869 iommu = domain_get_iommu(domain);
3870
b536d24d 3871 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3872 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3873
2aac6304 3874 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3875 *dev->dma_mask);
2aac6304 3876 if (!iova_pfn) {
c03ab37c 3877 sglist->dma_length = 0;
f76aec76
KA
3878 return 0;
3879 }
3880
3881 /*
3882 * Check if DMAR supports zero-length reads on write only
3883 * mappings..
3884 */
3885 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3886 !cap_zlr(iommu->cap))
f76aec76
KA
3887 prot |= DMA_PTE_READ;
3888 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3889 prot |= DMA_PTE_WRITE;
3890
2aac6304 3891 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3892
f532959b 3893 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3894 if (unlikely(ret)) {
e1605495 3895 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3896 start_vpfn + size - 1,
3897 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3898 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3899 return 0;
ba395927
KA
3900 }
3901
ba395927
KA
3902 return nelems;
3903}
3904
02b4da5f 3905static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3906 .alloc = intel_alloc_coherent,
3907 .free = intel_free_coherent,
ba395927
KA
3908 .map_sg = intel_map_sg,
3909 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3910 .map_page = intel_map_page,
3911 .unmap_page = intel_unmap_page,
21d5d27c 3912 .map_resource = intel_map_resource,
9cc0c2af 3913 .unmap_resource = intel_unmap_resource,
fec777c3 3914 .dma_supported = dma_direct_supported,
ba395927
KA
3915};
3916
3917static inline int iommu_domain_cache_init(void)
3918{
3919 int ret = 0;
3920
3921 iommu_domain_cache = kmem_cache_create("iommu_domain",
3922 sizeof(struct dmar_domain),
3923 0,
3924 SLAB_HWCACHE_ALIGN,
3925
3926 NULL);
3927 if (!iommu_domain_cache) {
9f10e5bf 3928 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3929 ret = -ENOMEM;
3930 }
3931
3932 return ret;
3933}
3934
3935static inline int iommu_devinfo_cache_init(void)
3936{
3937 int ret = 0;
3938
3939 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3940 sizeof(struct device_domain_info),
3941 0,
3942 SLAB_HWCACHE_ALIGN,
ba395927
KA
3943 NULL);
3944 if (!iommu_devinfo_cache) {
9f10e5bf 3945 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3946 ret = -ENOMEM;
3947 }
3948
3949 return ret;
3950}
3951
ba395927
KA
3952static int __init iommu_init_mempool(void)
3953{
3954 int ret;
ae1ff3d6 3955 ret = iova_cache_get();
ba395927
KA
3956 if (ret)
3957 return ret;
3958
3959 ret = iommu_domain_cache_init();
3960 if (ret)
3961 goto domain_error;
3962
3963 ret = iommu_devinfo_cache_init();
3964 if (!ret)
3965 return ret;
3966
3967 kmem_cache_destroy(iommu_domain_cache);
3968domain_error:
ae1ff3d6 3969 iova_cache_put();
ba395927
KA
3970
3971 return -ENOMEM;
3972}
3973
3974static void __init iommu_exit_mempool(void)
3975{
3976 kmem_cache_destroy(iommu_devinfo_cache);
3977 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3978 iova_cache_put();
ba395927
KA
3979}
3980
556ab45f
DW
3981static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3982{
3983 struct dmar_drhd_unit *drhd;
3984 u32 vtbar;
3985 int rc;
3986
3987 /* We know that this device on this chipset has its own IOMMU.
3988 * If we find it under a different IOMMU, then the BIOS is lying
3989 * to us. Hope that the IOMMU for this device is actually
3990 * disabled, and it needs no translation...
3991 */
3992 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3993 if (rc) {
3994 /* "can't" happen */
3995 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3996 return;
3997 }
3998 vtbar &= 0xffff0000;
3999
4000 /* we know that the this iommu should be at offset 0xa000 from vtbar */
4001 drhd = dmar_find_matched_drhd_unit(pdev);
4002 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4003 TAINT_FIRMWARE_WORKAROUND,
4004 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4005 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4006}
4007DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4008
ba395927
KA
4009static void __init init_no_remapping_devices(void)
4010{
4011 struct dmar_drhd_unit *drhd;
832bd858 4012 struct device *dev;
b683b230 4013 int i;
ba395927
KA
4014
4015 for_each_drhd_unit(drhd) {
4016 if (!drhd->include_all) {
b683b230
JL
4017 for_each_active_dev_scope(drhd->devices,
4018 drhd->devices_cnt, i, dev)
4019 break;
832bd858 4020 /* ignore DMAR unit if no devices exist */
ba395927
KA
4021 if (i == drhd->devices_cnt)
4022 drhd->ignored = 1;
4023 }
4024 }
4025
7c919779 4026 for_each_active_drhd_unit(drhd) {
7c919779 4027 if (drhd->include_all)
ba395927
KA
4028 continue;
4029
b683b230
JL
4030 for_each_active_dev_scope(drhd->devices,
4031 drhd->devices_cnt, i, dev)
832bd858 4032 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4033 break;
ba395927
KA
4034 if (i < drhd->devices_cnt)
4035 continue;
4036
c0771df8
DW
4037 /* This IOMMU has *only* gfx devices. Either bypass it or
4038 set the gfx_mapped flag, as appropriate */
cf1ec453 4039 if (!dmar_map_gfx) {
c0771df8 4040 drhd->ignored = 1;
b683b230
JL
4041 for_each_active_dev_scope(drhd->devices,
4042 drhd->devices_cnt, i, dev)
832bd858 4043 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4044 }
4045 }
4046}
4047
f59c7b69
FY
4048#ifdef CONFIG_SUSPEND
4049static int init_iommu_hw(void)
4050{
4051 struct dmar_drhd_unit *drhd;
4052 struct intel_iommu *iommu = NULL;
4053
4054 for_each_active_iommu(iommu, drhd)
4055 if (iommu->qi)
4056 dmar_reenable_qi(iommu);
4057
b779260b
JC
4058 for_each_iommu(iommu, drhd) {
4059 if (drhd->ignored) {
4060 /*
4061 * we always have to disable PMRs or DMA may fail on
4062 * this device
4063 */
4064 if (force_on)
4065 iommu_disable_protect_mem_regions(iommu);
4066 continue;
4067 }
095303e0 4068
f59c7b69
FY
4069 iommu_flush_write_buffer(iommu);
4070
4071 iommu_set_root_entry(iommu);
4072
4073 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4074 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4075 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4076 iommu_enable_translation(iommu);
b94996c9 4077 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4078 }
4079
4080 return 0;
4081}
4082
4083static void iommu_flush_all(void)
4084{
4085 struct dmar_drhd_unit *drhd;
4086 struct intel_iommu *iommu;
4087
4088 for_each_active_iommu(iommu, drhd) {
4089 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4090 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4091 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4092 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4093 }
4094}
4095
134fac3f 4096static int iommu_suspend(void)
f59c7b69
FY
4097{
4098 struct dmar_drhd_unit *drhd;
4099 struct intel_iommu *iommu = NULL;
4100 unsigned long flag;
4101
4102 for_each_active_iommu(iommu, drhd) {
6396bb22 4103 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4104 GFP_ATOMIC);
4105 if (!iommu->iommu_state)
4106 goto nomem;
4107 }
4108
4109 iommu_flush_all();
4110
4111 for_each_active_iommu(iommu, drhd) {
4112 iommu_disable_translation(iommu);
4113
1f5b3c3f 4114 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4115
4116 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4117 readl(iommu->reg + DMAR_FECTL_REG);
4118 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4119 readl(iommu->reg + DMAR_FEDATA_REG);
4120 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4121 readl(iommu->reg + DMAR_FEADDR_REG);
4122 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4123 readl(iommu->reg + DMAR_FEUADDR_REG);
4124
1f5b3c3f 4125 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4126 }
4127 return 0;
4128
4129nomem:
4130 for_each_active_iommu(iommu, drhd)
4131 kfree(iommu->iommu_state);
4132
4133 return -ENOMEM;
4134}
4135
134fac3f 4136static void iommu_resume(void)
f59c7b69
FY
4137{
4138 struct dmar_drhd_unit *drhd;
4139 struct intel_iommu *iommu = NULL;
4140 unsigned long flag;
4141
4142 if (init_iommu_hw()) {
b779260b
JC
4143 if (force_on)
4144 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4145 else
4146 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4147 return;
f59c7b69
FY
4148 }
4149
4150 for_each_active_iommu(iommu, drhd) {
4151
1f5b3c3f 4152 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4153
4154 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4155 iommu->reg + DMAR_FECTL_REG);
4156 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4157 iommu->reg + DMAR_FEDATA_REG);
4158 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4159 iommu->reg + DMAR_FEADDR_REG);
4160 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4161 iommu->reg + DMAR_FEUADDR_REG);
4162
1f5b3c3f 4163 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4164 }
4165
4166 for_each_active_iommu(iommu, drhd)
4167 kfree(iommu->iommu_state);
f59c7b69
FY
4168}
4169
134fac3f 4170static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4171 .resume = iommu_resume,
4172 .suspend = iommu_suspend,
4173};
4174
134fac3f 4175static void __init init_iommu_pm_ops(void)
f59c7b69 4176{
134fac3f 4177 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4178}
4179
4180#else
99592ba4 4181static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4182#endif /* CONFIG_PM */
4183
318fe7df 4184
c2a0b538 4185int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4186{
4187 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4188 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4189 struct dmar_rmrr_unit *rmrru;
0659b8dc 4190 size_t length;
318fe7df
SS
4191
4192 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4193 if (!rmrru)
0659b8dc 4194 goto out;
318fe7df
SS
4195
4196 rmrru->hdr = header;
4197 rmrr = (struct acpi_dmar_reserved_memory *)header;
4198 rmrru->base_address = rmrr->base_address;
4199 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4200
4201 length = rmrr->end_address - rmrr->base_address + 1;
4202 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4203 IOMMU_RESV_DIRECT);
4204 if (!rmrru->resv)
4205 goto free_rmrru;
4206
2e455289
JL
4207 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4208 ((void *)rmrr) + rmrr->header.length,
4209 &rmrru->devices_cnt);
0659b8dc
EA
4210 if (rmrru->devices_cnt && rmrru->devices == NULL)
4211 goto free_all;
318fe7df 4212
2e455289 4213 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4214
2e455289 4215 return 0;
0659b8dc
EA
4216free_all:
4217 kfree(rmrru->resv);
4218free_rmrru:
4219 kfree(rmrru);
4220out:
4221 return -ENOMEM;
318fe7df
SS
4222}
4223
6b197249
JL
4224static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4225{
4226 struct dmar_atsr_unit *atsru;
4227 struct acpi_dmar_atsr *tmp;
4228
4229 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4230 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4231 if (atsr->segment != tmp->segment)
4232 continue;
4233 if (atsr->header.length != tmp->header.length)
4234 continue;
4235 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4236 return atsru;
4237 }
4238
4239 return NULL;
4240}
4241
4242int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4243{
4244 struct acpi_dmar_atsr *atsr;
4245 struct dmar_atsr_unit *atsru;
4246
b608fe35 4247 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4248 return 0;
4249
318fe7df 4250 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4251 atsru = dmar_find_atsr(atsr);
4252 if (atsru)
4253 return 0;
4254
4255 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4256 if (!atsru)
4257 return -ENOMEM;
4258
6b197249
JL
4259 /*
4260 * If memory is allocated from slab by ACPI _DSM method, we need to
4261 * copy the memory content because the memory buffer will be freed
4262 * on return.
4263 */
4264 atsru->hdr = (void *)(atsru + 1);
4265 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4266 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4267 if (!atsru->include_all) {
4268 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4269 (void *)atsr + atsr->header.length,
4270 &atsru->devices_cnt);
4271 if (atsru->devices_cnt && atsru->devices == NULL) {
4272 kfree(atsru);
4273 return -ENOMEM;
4274 }
4275 }
318fe7df 4276
0e242612 4277 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4278
4279 return 0;
4280}
4281
9bdc531e
JL
4282static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4283{
4284 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4285 kfree(atsru);
4286}
4287
6b197249
JL
4288int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4289{
4290 struct acpi_dmar_atsr *atsr;
4291 struct dmar_atsr_unit *atsru;
4292
4293 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4294 atsru = dmar_find_atsr(atsr);
4295 if (atsru) {
4296 list_del_rcu(&atsru->list);
4297 synchronize_rcu();
4298 intel_iommu_free_atsr(atsru);
4299 }
4300
4301 return 0;
4302}
4303
4304int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4305{
4306 int i;
4307 struct device *dev;
4308 struct acpi_dmar_atsr *atsr;
4309 struct dmar_atsr_unit *atsru;
4310
4311 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4312 atsru = dmar_find_atsr(atsr);
4313 if (!atsru)
4314 return 0;
4315
194dc870 4316 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4317 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4318 i, dev)
4319 return -EBUSY;
194dc870 4320 }
6b197249
JL
4321
4322 return 0;
4323}
4324
ffebeb46
JL
4325static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4326{
e083ea5b 4327 int sp, ret;
ffebeb46
JL
4328 struct intel_iommu *iommu = dmaru->iommu;
4329
4330 if (g_iommus[iommu->seq_id])
4331 return 0;
4332
4333 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4334 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4335 iommu->name);
4336 return -ENXIO;
4337 }
4338 if (!ecap_sc_support(iommu->ecap) &&
4339 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4340 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4341 iommu->name);
4342 return -ENXIO;
4343 }
4344 sp = domain_update_iommu_superpage(iommu) - 1;
4345 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4346 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4347 iommu->name);
4348 return -ENXIO;
4349 }
4350
4351 /*
4352 * Disable translation if already enabled prior to OS handover.
4353 */
4354 if (iommu->gcmd & DMA_GCMD_TE)
4355 iommu_disable_translation(iommu);
4356
4357 g_iommus[iommu->seq_id] = iommu;
4358 ret = iommu_init_domains(iommu);
4359 if (ret == 0)
4360 ret = iommu_alloc_root_entry(iommu);
4361 if (ret)
4362 goto out;
4363
8a94ade4 4364#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4365 if (pasid_supported(iommu))
d9737953 4366 intel_svm_init(iommu);
8a94ade4
DW
4367#endif
4368
ffebeb46
JL
4369 if (dmaru->ignored) {
4370 /*
4371 * we always have to disable PMRs or DMA may fail on this device
4372 */
4373 if (force_on)
4374 iommu_disable_protect_mem_regions(iommu);
4375 return 0;
4376 }
4377
4378 intel_iommu_init_qi(iommu);
4379 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4380
4381#ifdef CONFIG_INTEL_IOMMU_SVM
765b6a98 4382 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
a222a7f0
DW
4383 ret = intel_svm_enable_prq(iommu);
4384 if (ret)
4385 goto disable_iommu;
4386 }
4387#endif
ffebeb46
JL
4388 ret = dmar_set_interrupt(iommu);
4389 if (ret)
4390 goto disable_iommu;
4391
4392 iommu_set_root_entry(iommu);
4393 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4394 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4395 iommu_enable_translation(iommu);
4396
ffebeb46
JL
4397 iommu_disable_protect_mem_regions(iommu);
4398 return 0;
4399
4400disable_iommu:
4401 disable_dmar_iommu(iommu);
4402out:
4403 free_dmar_iommu(iommu);
4404 return ret;
4405}
4406
6b197249
JL
4407int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4408{
ffebeb46
JL
4409 int ret = 0;
4410 struct intel_iommu *iommu = dmaru->iommu;
4411
4412 if (!intel_iommu_enabled)
4413 return 0;
4414 if (iommu == NULL)
4415 return -EINVAL;
4416
4417 if (insert) {
4418 ret = intel_iommu_add(dmaru);
4419 } else {
4420 disable_dmar_iommu(iommu);
4421 free_dmar_iommu(iommu);
4422 }
4423
4424 return ret;
6b197249
JL
4425}
4426
9bdc531e
JL
4427static void intel_iommu_free_dmars(void)
4428{
4429 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4430 struct dmar_atsr_unit *atsru, *atsr_n;
4431
4432 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4433 list_del(&rmrru->list);
4434 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4435 kfree(rmrru->resv);
9bdc531e 4436 kfree(rmrru);
318fe7df
SS
4437 }
4438
9bdc531e
JL
4439 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4440 list_del(&atsru->list);
4441 intel_iommu_free_atsr(atsru);
4442 }
318fe7df
SS
4443}
4444
4445int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4446{
b683b230 4447 int i, ret = 1;
318fe7df 4448 struct pci_bus *bus;
832bd858
DW
4449 struct pci_dev *bridge = NULL;
4450 struct device *tmp;
318fe7df
SS
4451 struct acpi_dmar_atsr *atsr;
4452 struct dmar_atsr_unit *atsru;
4453
4454 dev = pci_physfn(dev);
318fe7df 4455 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4456 bridge = bus->self;
d14053b3
DW
4457 /* If it's an integrated device, allow ATS */
4458 if (!bridge)
4459 return 1;
4460 /* Connected via non-PCIe: no ATS */
4461 if (!pci_is_pcie(bridge) ||
62f87c0e 4462 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4463 return 0;
d14053b3 4464 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4465 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4466 break;
318fe7df
SS
4467 }
4468
0e242612 4469 rcu_read_lock();
b5f82ddf
JL
4470 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4471 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4472 if (atsr->segment != pci_domain_nr(dev->bus))
4473 continue;
4474
b683b230 4475 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4476 if (tmp == &bridge->dev)
b683b230 4477 goto out;
b5f82ddf
JL
4478
4479 if (atsru->include_all)
b683b230 4480 goto out;
b5f82ddf 4481 }
b683b230
JL
4482 ret = 0;
4483out:
0e242612 4484 rcu_read_unlock();
318fe7df 4485
b683b230 4486 return ret;
318fe7df
SS
4487}
4488
59ce0515
JL
4489int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4490{
e083ea5b 4491 int ret;
59ce0515
JL
4492 struct dmar_rmrr_unit *rmrru;
4493 struct dmar_atsr_unit *atsru;
4494 struct acpi_dmar_atsr *atsr;
4495 struct acpi_dmar_reserved_memory *rmrr;
4496
b608fe35 4497 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4498 return 0;
4499
4500 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4501 rmrr = container_of(rmrru->hdr,
4502 struct acpi_dmar_reserved_memory, header);
4503 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4504 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4505 ((void *)rmrr) + rmrr->header.length,
4506 rmrr->segment, rmrru->devices,
4507 rmrru->devices_cnt);
e083ea5b 4508 if (ret < 0)
59ce0515 4509 return ret;
e6a8c9b3 4510 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4511 dmar_remove_dev_scope(info, rmrr->segment,
4512 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4513 }
4514 }
4515
4516 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4517 if (atsru->include_all)
4518 continue;
4519
4520 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4521 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4522 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4523 (void *)atsr + atsr->header.length,
4524 atsr->segment, atsru->devices,
4525 atsru->devices_cnt);
4526 if (ret > 0)
4527 break;
e083ea5b 4528 else if (ret < 0)
59ce0515 4529 return ret;
e6a8c9b3 4530 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4531 if (dmar_remove_dev_scope(info, atsr->segment,
4532 atsru->devices, atsru->devices_cnt))
4533 break;
4534 }
4535 }
4536
4537 return 0;
4538}
4539
99dcaded
FY
4540/*
4541 * Here we only respond to action of unbound device from driver.
4542 *
4543 * Added device is not attached to its DMAR domain here yet. That will happen
4544 * when mapping the device to iova.
4545 */
4546static int device_notifier(struct notifier_block *nb,
4547 unsigned long action, void *data)
4548{
4549 struct device *dev = data;
99dcaded
FY
4550 struct dmar_domain *domain;
4551
3d89194a 4552 if (iommu_dummy(dev))
44cd613c
DW
4553 return 0;
4554
117266fd
LB
4555 if (action == BUS_NOTIFY_REMOVED_DEVICE) {
4556 domain = find_domain(dev);
4557 if (!domain)
4558 return 0;
99dcaded 4559
117266fd 4560 dmar_remove_one_dev_info(dev);
117266fd
LB
4561 } else if (action == BUS_NOTIFY_ADD_DEVICE) {
4562 if (iommu_should_identity_map(dev, 1))
4563 domain_add_dev_info(si_domain, dev);
4564 }
a97590e5 4565
99dcaded
FY
4566 return 0;
4567}
4568
4569static struct notifier_block device_nb = {
4570 .notifier_call = device_notifier,
4571};
4572
75f05569
JL
4573static int intel_iommu_memory_notifier(struct notifier_block *nb,
4574 unsigned long val, void *v)
4575{
4576 struct memory_notify *mhp = v;
4577 unsigned long long start, end;
4578 unsigned long start_vpfn, last_vpfn;
4579
4580 switch (val) {
4581 case MEM_GOING_ONLINE:
4582 start = mhp->start_pfn << PAGE_SHIFT;
4583 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4584 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4585 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4586 start, end);
4587 return NOTIFY_BAD;
4588 }
4589 break;
4590
4591 case MEM_OFFLINE:
4592 case MEM_CANCEL_ONLINE:
4593 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4594 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4595 while (start_vpfn <= last_vpfn) {
4596 struct iova *iova;
4597 struct dmar_drhd_unit *drhd;
4598 struct intel_iommu *iommu;
ea8ea460 4599 struct page *freelist;
75f05569
JL
4600
4601 iova = find_iova(&si_domain->iovad, start_vpfn);
4602 if (iova == NULL) {
9f10e5bf 4603 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4604 start_vpfn);
4605 break;
4606 }
4607
4608 iova = split_and_remove_iova(&si_domain->iovad, iova,
4609 start_vpfn, last_vpfn);
4610 if (iova == NULL) {
9f10e5bf 4611 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4612 start_vpfn, last_vpfn);
4613 return NOTIFY_BAD;
4614 }
4615
ea8ea460
DW
4616 freelist = domain_unmap(si_domain, iova->pfn_lo,
4617 iova->pfn_hi);
4618
75f05569
JL
4619 rcu_read_lock();
4620 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4621 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4622 iova->pfn_lo, iova_size(iova),
ea8ea460 4623 !freelist, 0);
75f05569 4624 rcu_read_unlock();
ea8ea460 4625 dma_free_pagelist(freelist);
75f05569
JL
4626
4627 start_vpfn = iova->pfn_hi + 1;
4628 free_iova_mem(iova);
4629 }
4630 break;
4631 }
4632
4633 return NOTIFY_OK;
4634}
4635
4636static struct notifier_block intel_iommu_memory_nb = {
4637 .notifier_call = intel_iommu_memory_notifier,
4638 .priority = 0
4639};
4640
22e2f9fa
OP
4641static void free_all_cpu_cached_iovas(unsigned int cpu)
4642{
4643 int i;
4644
4645 for (i = 0; i < g_num_of_iommus; i++) {
4646 struct intel_iommu *iommu = g_iommus[i];
4647 struct dmar_domain *domain;
0caa7616 4648 int did;
22e2f9fa
OP
4649
4650 if (!iommu)
4651 continue;
4652
3bd4f911 4653 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4654 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4655
4656 if (!domain)
4657 continue;
4658 free_cpu_cached_iovas(cpu, &domain->iovad);
4659 }
4660 }
4661}
4662
21647615 4663static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4664{
21647615 4665 free_all_cpu_cached_iovas(cpu);
21647615 4666 return 0;
aa473240
OP
4667}
4668
161b28aa
JR
4669static void intel_disable_iommus(void)
4670{
4671 struct intel_iommu *iommu = NULL;
4672 struct dmar_drhd_unit *drhd;
4673
4674 for_each_iommu(iommu, drhd)
4675 iommu_disable_translation(iommu);
4676}
4677
a7fdb6e6
JR
4678static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4679{
2926a2aa
JR
4680 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4681
4682 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4683}
4684
a5459cfe
AW
4685static ssize_t intel_iommu_show_version(struct device *dev,
4686 struct device_attribute *attr,
4687 char *buf)
4688{
a7fdb6e6 4689 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4690 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4691 return sprintf(buf, "%d:%d\n",
4692 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4693}
4694static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4695
4696static ssize_t intel_iommu_show_address(struct device *dev,
4697 struct device_attribute *attr,
4698 char *buf)
4699{
a7fdb6e6 4700 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4701 return sprintf(buf, "%llx\n", iommu->reg_phys);
4702}
4703static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4704
4705static ssize_t intel_iommu_show_cap(struct device *dev,
4706 struct device_attribute *attr,
4707 char *buf)
4708{
a7fdb6e6 4709 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4710 return sprintf(buf, "%llx\n", iommu->cap);
4711}
4712static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4713
4714static ssize_t intel_iommu_show_ecap(struct device *dev,
4715 struct device_attribute *attr,
4716 char *buf)
4717{
a7fdb6e6 4718 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4719 return sprintf(buf, "%llx\n", iommu->ecap);
4720}
4721static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4722
2238c082
AW
4723static ssize_t intel_iommu_show_ndoms(struct device *dev,
4724 struct device_attribute *attr,
4725 char *buf)
4726{
a7fdb6e6 4727 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4728 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4729}
4730static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4731
4732static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4733 struct device_attribute *attr,
4734 char *buf)
4735{
a7fdb6e6 4736 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4737 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4738 cap_ndoms(iommu->cap)));
4739}
4740static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4741
a5459cfe
AW
4742static struct attribute *intel_iommu_attrs[] = {
4743 &dev_attr_version.attr,
4744 &dev_attr_address.attr,
4745 &dev_attr_cap.attr,
4746 &dev_attr_ecap.attr,
2238c082
AW
4747 &dev_attr_domains_supported.attr,
4748 &dev_attr_domains_used.attr,
a5459cfe
AW
4749 NULL,
4750};
4751
4752static struct attribute_group intel_iommu_group = {
4753 .name = "intel-iommu",
4754 .attrs = intel_iommu_attrs,
4755};
4756
4757const struct attribute_group *intel_iommu_groups[] = {
4758 &intel_iommu_group,
4759 NULL,
4760};
4761
89a6079d
LB
4762static int __init platform_optin_force_iommu(void)
4763{
4764 struct pci_dev *pdev = NULL;
4765 bool has_untrusted_dev = false;
4766
4767 if (!dmar_platform_optin() || no_platform_optin)
4768 return 0;
4769
4770 for_each_pci_dev(pdev) {
4771 if (pdev->untrusted) {
4772 has_untrusted_dev = true;
4773 break;
4774 }
4775 }
4776
4777 if (!has_untrusted_dev)
4778 return 0;
4779
4780 if (no_iommu || dmar_disabled)
4781 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4782
4783 /*
4784 * If Intel-IOMMU is disabled by default, we will apply identity
4785 * map for all devices except those marked as being untrusted.
4786 */
4787 if (dmar_disabled)
4788 iommu_identity_mapping |= IDENTMAP_ALL;
4789
4790 dmar_disabled = 0;
4791#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
4792 swiotlb = 0;
4793#endif
4794 no_iommu = 0;
4795
4796 return 1;
4797}
4798
fa212a97
LB
4799static int __init probe_acpi_namespace_devices(void)
4800{
4801 struct dmar_drhd_unit *drhd;
4802 struct intel_iommu *iommu;
4803 struct device *dev;
4804 int i, ret = 0;
4805
4806 for_each_active_iommu(iommu, drhd) {
4807 for_each_active_dev_scope(drhd->devices,
4808 drhd->devices_cnt, i, dev) {
4809 struct acpi_device_physical_node *pn;
4810 struct iommu_group *group;
4811 struct acpi_device *adev;
4812
4813 if (dev->bus != &acpi_bus_type)
4814 continue;
4815
4816 adev = to_acpi_device(dev);
4817 mutex_lock(&adev->physical_node_lock);
4818 list_for_each_entry(pn,
4819 &adev->physical_node_list, node) {
4820 group = iommu_group_get(pn->dev);
4821 if (group) {
4822 iommu_group_put(group);
4823 continue;
4824 }
4825
4826 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4827 ret = iommu_probe_device(pn->dev);
4828 if (ret)
4829 break;
4830 }
4831 mutex_unlock(&adev->physical_node_lock);
4832
4833 if (ret)
4834 return ret;
4835 }
4836 }
4837
4838 return 0;
4839}
4840
ba395927
KA
4841int __init intel_iommu_init(void)
4842{
9bdc531e 4843 int ret = -ENODEV;
3a93c841 4844 struct dmar_drhd_unit *drhd;
7c919779 4845 struct intel_iommu *iommu;
ba395927 4846
89a6079d
LB
4847 /*
4848 * Intel IOMMU is required for a TXT/tboot launch or platform
4849 * opt in, so enforce that.
4850 */
4851 force_on = tboot_force_iommu() || platform_optin_force_iommu();
a59b50e9 4852
3a5670e8
JL
4853 if (iommu_init_mempool()) {
4854 if (force_on)
4855 panic("tboot: Failed to initialize iommu memory\n");
4856 return -ENOMEM;
4857 }
4858
4859 down_write(&dmar_global_lock);
a59b50e9
JC
4860 if (dmar_table_init()) {
4861 if (force_on)
4862 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4863 goto out_free_dmar;
a59b50e9 4864 }
ba395927 4865
c2c7286a 4866 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4867 if (force_on)
4868 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4869 goto out_free_dmar;
a59b50e9 4870 }
1886e8a9 4871
ec154bf5
JR
4872 up_write(&dmar_global_lock);
4873
4874 /*
4875 * The bus notifier takes the dmar_global_lock, so lockdep will
4876 * complain later when we register it under the lock.
4877 */
4878 dmar_register_bus_notifier();
4879
4880 down_write(&dmar_global_lock);
4881
161b28aa 4882 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4883 /*
4884 * We exit the function here to ensure IOMMU's remapping and
4885 * mempool aren't setup, which means that the IOMMU's PMRs
4886 * won't be disabled via the call to init_dmars(). So disable
4887 * it explicitly here. The PMRs were setup by tboot prior to
4888 * calling SENTER, but the kernel is expected to reset/tear
4889 * down the PMRs.
4890 */
4891 if (intel_iommu_tboot_noforce) {
4892 for_each_iommu(iommu, drhd)
4893 iommu_disable_protect_mem_regions(iommu);
4894 }
4895
161b28aa
JR
4896 /*
4897 * Make sure the IOMMUs are switched off, even when we
4898 * boot into a kexec kernel and the previous kernel left
4899 * them enabled
4900 */
4901 intel_disable_iommus();
9bdc531e 4902 goto out_free_dmar;
161b28aa 4903 }
2ae21010 4904
318fe7df 4905 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4906 pr_info("No RMRR found\n");
318fe7df
SS
4907
4908 if (list_empty(&dmar_atsr_units))
9f10e5bf 4909 pr_info("No ATSR found\n");
318fe7df 4910
51a63e67
JC
4911 if (dmar_init_reserved_ranges()) {
4912 if (force_on)
4913 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4914 goto out_free_reserved_range;
51a63e67 4915 }
ba395927 4916
cf1ec453
LB
4917 if (dmar_map_gfx)
4918 intel_iommu_gfx_mapped = 1;
4919
ba395927
KA
4920 init_no_remapping_devices();
4921
b779260b 4922 ret = init_dmars();
ba395927 4923 if (ret) {
a59b50e9
JC
4924 if (force_on)
4925 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4926 pr_err("Initialization failed\n");
9bdc531e 4927 goto out_free_reserved_range;
ba395927 4928 }
3a5670e8 4929 up_write(&dmar_global_lock);
ba395927 4930
4fac8076 4931#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4932 swiotlb = 0;
4933#endif
19943b0e 4934 dma_ops = &intel_dma_ops;
4ed0d3e6 4935
134fac3f 4936 init_iommu_pm_ops();
a8bcbb0d 4937
39ab9555
JR
4938 for_each_active_iommu(iommu, drhd) {
4939 iommu_device_sysfs_add(&iommu->iommu, NULL,
4940 intel_iommu_groups,
4941 "%s", iommu->name);
4942 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4943 iommu_device_register(&iommu->iommu);
4944 }
a5459cfe 4945
4236d97d 4946 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4947 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4948 if (si_domain && !hw_pass_through)
4949 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4950 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4951 intel_iommu_cpu_dead);
d8190dc6 4952
fa212a97
LB
4953 if (probe_acpi_namespace_devices())
4954 pr_warn("ACPI name space devices didn't probe correctly\n");
4955
d8190dc6
LB
4956 /* Finally, we enable the DMA remapping hardware. */
4957 for_each_iommu(iommu, drhd) {
4958 if (!translation_pre_enabled(iommu))
4959 iommu_enable_translation(iommu);
4960
4961 iommu_disable_protect_mem_regions(iommu);
4962 }
4963 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4964
8bc1f85c 4965 intel_iommu_enabled = 1;
ee2636b8 4966 intel_iommu_debugfs_init();
8bc1f85c 4967
ba395927 4968 return 0;
9bdc531e
JL
4969
4970out_free_reserved_range:
4971 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4972out_free_dmar:
4973 intel_iommu_free_dmars();
3a5670e8
JL
4974 up_write(&dmar_global_lock);
4975 iommu_exit_mempool();
9bdc531e 4976 return ret;
ba395927 4977}
e820482c 4978
2452d9db 4979static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4980{
4981 struct intel_iommu *iommu = opaque;
4982
2452d9db 4983 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4984 return 0;
4985}
4986
4987/*
4988 * NB - intel-iommu lacks any sort of reference counting for the users of
4989 * dependent devices. If multiple endpoints have intersecting dependent
4990 * devices, unbinding the driver from any one of them will possibly leave
4991 * the others unable to operate.
4992 */
2452d9db 4993static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4994{
0bcb3e28 4995 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4996 return;
4997
2452d9db 4998 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4999}
5000
127c7615 5001static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 5002{
942067f1 5003 struct dmar_domain *domain;
c7151a8d
WH
5004 struct intel_iommu *iommu;
5005 unsigned long flags;
c7151a8d 5006
55d94043
JR
5007 assert_spin_locked(&device_domain_lock);
5008
127c7615 5009 if (WARN_ON(!info))
c7151a8d
WH
5010 return;
5011
127c7615 5012 iommu = info->iommu;
942067f1 5013 domain = info->domain;
c7151a8d 5014
127c7615 5015 if (info->dev) {
ef848b7e
LB
5016 if (dev_is_pci(info->dev) && sm_supported(iommu))
5017 intel_pasid_tear_down_entry(iommu, info->dev,
5018 PASID_RID2PASID);
5019
127c7615
JR
5020 iommu_disable_dev_iotlb(info);
5021 domain_context_clear(iommu, info->dev);
a7fc93fe 5022 intel_pasid_free_table(info->dev);
127c7615 5023 }
c7151a8d 5024
b608ac3b 5025 unlink_domain_info(info);
c7151a8d 5026
d160aca5 5027 spin_lock_irqsave(&iommu->lock, flags);
942067f1 5028 domain_detach_iommu(domain, iommu);
d160aca5 5029 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 5030
942067f1
LB
5031 /* free the private domain */
5032 if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5033 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY))
5034 domain_exit(info->domain);
5035
127c7615 5036 free_devinfo_mem(info);
c7151a8d 5037}
c7151a8d 5038
71753239 5039static void dmar_remove_one_dev_info(struct device *dev)
55d94043 5040{
127c7615 5041 struct device_domain_info *info;
55d94043 5042 unsigned long flags;
3e7abe25 5043
55d94043 5044 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
5045 info = dev->archdata.iommu;
5046 __dmar_remove_one_dev_info(info);
55d94043 5047 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
5048}
5049
2c2e2c38 5050static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
5051{
5052 int adjust_width;
5053
aa3ac946 5054 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
5055 domain_reserve_special_ranges(domain);
5056
5057 /* calculate AGAW */
5058 domain->gaw = guest_width;
5059 adjust_width = guestwidth_to_adjustwidth(guest_width);
5060 domain->agaw = width_to_agaw(adjust_width);
5061
5e98c4b1 5062 domain->iommu_coherency = 0;
c5b15255 5063 domain->iommu_snooping = 0;
6dd9a7c7 5064 domain->iommu_superpage = 0;
fe40f1e0 5065 domain->max_addr = 0;
5e98c4b1
WH
5066
5067 /* always allocate the top pgd */
4c923d47 5068 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
5069 if (!domain->pgd)
5070 return -ENOMEM;
5071 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5072 return 0;
5073}
5074
00a77deb 5075static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 5076{
5d450806 5077 struct dmar_domain *dmar_domain;
00a77deb
JR
5078 struct iommu_domain *domain;
5079
4de354ec 5080 switch (type) {
fa954e68
LB
5081 case IOMMU_DOMAIN_DMA:
5082 /* fallthrough */
4de354ec 5083 case IOMMU_DOMAIN_UNMANAGED:
fa954e68 5084 dmar_domain = alloc_domain(0);
4de354ec
LB
5085 if (!dmar_domain) {
5086 pr_err("Can't allocate dmar_domain\n");
5087 return NULL;
5088 }
5089 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5090 pr_err("Domain initialization failed\n");
5091 domain_exit(dmar_domain);
5092 return NULL;
5093 }
fa954e68
LB
5094
5095 if (type == IOMMU_DOMAIN_DMA &&
5096 init_iova_flush_queue(&dmar_domain->iovad,
5097 iommu_flush_iova, iova_entry_free)) {
5098 pr_warn("iova flush queue initialization failed\n");
5099 intel_iommu_strict = 1;
5100 }
5101
4de354ec 5102 domain_update_iommu_cap(dmar_domain);
38717946 5103
4de354ec
LB
5104 domain = &dmar_domain->domain;
5105 domain->geometry.aperture_start = 0;
5106 domain->geometry.aperture_end =
5107 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5108 domain->geometry.force_aperture = true;
5109
5110 return domain;
5111 case IOMMU_DOMAIN_IDENTITY:
5112 return &si_domain->domain;
5113 default:
00a77deb 5114 return NULL;
38717946 5115 }
8a0e715b 5116
4de354ec 5117 return NULL;
38717946 5118}
38717946 5119
00a77deb 5120static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 5121{
4de354ec
LB
5122 if (domain != &si_domain->domain)
5123 domain_exit(to_dmar_domain(domain));
38717946 5124}
38717946 5125
67b8e02b
LB
5126/*
5127 * Check whether a @domain could be attached to the @dev through the
5128 * aux-domain attach/detach APIs.
5129 */
5130static inline bool
5131is_aux_domain(struct device *dev, struct iommu_domain *domain)
5132{
5133 struct device_domain_info *info = dev->archdata.iommu;
5134
5135 return info && info->auxd_enabled &&
5136 domain->type == IOMMU_DOMAIN_UNMANAGED;
5137}
5138
5139static void auxiliary_link_device(struct dmar_domain *domain,
5140 struct device *dev)
5141{
5142 struct device_domain_info *info = dev->archdata.iommu;
5143
5144 assert_spin_locked(&device_domain_lock);
5145 if (WARN_ON(!info))
5146 return;
5147
5148 domain->auxd_refcnt++;
5149 list_add(&domain->auxd, &info->auxiliary_domains);
5150}
5151
5152static void auxiliary_unlink_device(struct dmar_domain *domain,
5153 struct device *dev)
5154{
5155 struct device_domain_info *info = dev->archdata.iommu;
5156
5157 assert_spin_locked(&device_domain_lock);
5158 if (WARN_ON(!info))
5159 return;
5160
5161 list_del(&domain->auxd);
5162 domain->auxd_refcnt--;
5163
5164 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5165 intel_pasid_free_id(domain->default_pasid);
5166}
5167
5168static int aux_domain_add_dev(struct dmar_domain *domain,
5169 struct device *dev)
5170{
5171 int ret;
5172 u8 bus, devfn;
5173 unsigned long flags;
5174 struct intel_iommu *iommu;
5175
5176 iommu = device_to_iommu(dev, &bus, &devfn);
5177 if (!iommu)
5178 return -ENODEV;
5179
5180 if (domain->default_pasid <= 0) {
5181 int pasid;
5182
5183 pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5184 pci_max_pasids(to_pci_dev(dev)),
5185 GFP_KERNEL);
5186 if (pasid <= 0) {
5187 pr_err("Can't allocate default pasid\n");
5188 return -ENODEV;
5189 }
5190 domain->default_pasid = pasid;
5191 }
5192
5193 spin_lock_irqsave(&device_domain_lock, flags);
5194 /*
5195 * iommu->lock must be held to attach domain to iommu and setup the
5196 * pasid entry for second level translation.
5197 */
5198 spin_lock(&iommu->lock);
5199 ret = domain_attach_iommu(domain, iommu);
5200 if (ret)
5201 goto attach_failed;
5202
5203 /* Setup the PASID entry for mediated devices: */
5204 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5205 domain->default_pasid);
5206 if (ret)
5207 goto table_failed;
5208 spin_unlock(&iommu->lock);
5209
5210 auxiliary_link_device(domain, dev);
5211
5212 spin_unlock_irqrestore(&device_domain_lock, flags);
5213
5214 return 0;
5215
5216table_failed:
5217 domain_detach_iommu(domain, iommu);
5218attach_failed:
5219 spin_unlock(&iommu->lock);
5220 spin_unlock_irqrestore(&device_domain_lock, flags);
5221 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5222 intel_pasid_free_id(domain->default_pasid);
5223
5224 return ret;
5225}
5226
5227static void aux_domain_remove_dev(struct dmar_domain *domain,
5228 struct device *dev)
5229{
5230 struct device_domain_info *info;
5231 struct intel_iommu *iommu;
5232 unsigned long flags;
5233
5234 if (!is_aux_domain(dev, &domain->domain))
5235 return;
5236
5237 spin_lock_irqsave(&device_domain_lock, flags);
5238 info = dev->archdata.iommu;
5239 iommu = info->iommu;
5240
5241 auxiliary_unlink_device(domain, dev);
5242
5243 spin_lock(&iommu->lock);
5244 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5245 domain_detach_iommu(domain, iommu);
5246 spin_unlock(&iommu->lock);
5247
5248 spin_unlock_irqrestore(&device_domain_lock, flags);
5249}
5250
8cc3759a
LB
5251static int prepare_domain_attach_device(struct iommu_domain *domain,
5252 struct device *dev)
38717946 5253{
00a77deb 5254 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
5255 struct intel_iommu *iommu;
5256 int addr_width;
156baca8 5257 u8 bus, devfn;
faa3d6f5 5258
156baca8 5259 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5260 if (!iommu)
5261 return -ENODEV;
5262
5263 /* check if this iommu agaw is sufficient for max mapped address */
5264 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5265 if (addr_width > cap_mgaw(iommu->cap))
5266 addr_width = cap_mgaw(iommu->cap);
5267
5268 if (dmar_domain->max_addr > (1LL << addr_width)) {
932a6523
BH
5269 dev_err(dev, "%s: iommu width (%d) is not "
5270 "sufficient for the mapped address (%llx)\n",
5271 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5272 return -EFAULT;
5273 }
a99c47a2
TL
5274 dmar_domain->gaw = addr_width;
5275
5276 /*
5277 * Knock out extra levels of page tables if necessary
5278 */
5279 while (iommu->agaw < dmar_domain->agaw) {
5280 struct dma_pte *pte;
5281
5282 pte = dmar_domain->pgd;
5283 if (dma_pte_present(pte)) {
25cbff16
SY
5284 dmar_domain->pgd = (struct dma_pte *)
5285 phys_to_virt(dma_pte_addr(pte));
7a661013 5286 free_pgtable_page(pte);
a99c47a2
TL
5287 }
5288 dmar_domain->agaw--;
5289 }
fe40f1e0 5290
8cc3759a
LB
5291 return 0;
5292}
5293
5294static int intel_iommu_attach_device(struct iommu_domain *domain,
5295 struct device *dev)
5296{
5297 int ret;
5298
5299 if (device_is_rmrr_locked(dev)) {
5300 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5301 return -EPERM;
5302 }
5303
67b8e02b
LB
5304 if (is_aux_domain(dev, domain))
5305 return -EPERM;
5306
8cc3759a
LB
5307 /* normally dev is not mapped */
5308 if (unlikely(domain_context_mapped(dev))) {
5309 struct dmar_domain *old_domain;
5310
5311 old_domain = find_domain(dev);
fa954e68 5312 if (old_domain)
8cc3759a 5313 dmar_remove_one_dev_info(dev);
8cc3759a
LB
5314 }
5315
5316 ret = prepare_domain_attach_device(domain, dev);
5317 if (ret)
5318 return ret;
5319
5320 return domain_add_dev_info(to_dmar_domain(domain), dev);
38717946 5321}
38717946 5322
67b8e02b
LB
5323static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5324 struct device *dev)
5325{
5326 int ret;
5327
5328 if (!is_aux_domain(dev, domain))
5329 return -EPERM;
5330
5331 ret = prepare_domain_attach_device(domain, dev);
5332 if (ret)
5333 return ret;
5334
5335 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5336}
5337
4c5478c9
JR
5338static void intel_iommu_detach_device(struct iommu_domain *domain,
5339 struct device *dev)
38717946 5340{
71753239 5341 dmar_remove_one_dev_info(dev);
faa3d6f5 5342}
c7151a8d 5343
67b8e02b
LB
5344static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5345 struct device *dev)
5346{
5347 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5348}
5349
b146a1c9
JR
5350static int intel_iommu_map(struct iommu_domain *domain,
5351 unsigned long iova, phys_addr_t hpa,
5009065d 5352 size_t size, int iommu_prot)
faa3d6f5 5353{
00a77deb 5354 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5355 u64 max_addr;
dde57a21 5356 int prot = 0;
faa3d6f5 5357 int ret;
fe40f1e0 5358
942067f1
LB
5359 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5360 return -EINVAL;
5361
dde57a21
JR
5362 if (iommu_prot & IOMMU_READ)
5363 prot |= DMA_PTE_READ;
5364 if (iommu_prot & IOMMU_WRITE)
5365 prot |= DMA_PTE_WRITE;
9cf06697
SY
5366 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5367 prot |= DMA_PTE_SNP;
dde57a21 5368
163cc52c 5369 max_addr = iova + size;
dde57a21 5370 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5371 u64 end;
5372
5373 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5374 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5375 if (end < max_addr) {
9f10e5bf 5376 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5377 "sufficient for the mapped address (%llx)\n",
8954da1f 5378 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5379 return -EFAULT;
5380 }
dde57a21 5381 dmar_domain->max_addr = max_addr;
fe40f1e0 5382 }
ad051221
DW
5383 /* Round up size to next multiple of PAGE_SIZE, if it and
5384 the low bits of hpa would take us onto the next page */
88cb6a74 5385 size = aligned_nrpages(hpa, size);
ad051221
DW
5386 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5387 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5388 return ret;
38717946 5389}
38717946 5390
5009065d 5391static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5392 unsigned long iova, size_t size)
38717946 5393{
00a77deb 5394 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5395 struct page *freelist = NULL;
ea8ea460
DW
5396 unsigned long start_pfn, last_pfn;
5397 unsigned int npages;
42e8c186 5398 int iommu_id, level = 0;
5cf0a76f
DW
5399
5400 /* Cope with horrid API which requires us to unmap more than the
5401 size argument if it happens to be a large-page mapping. */
dc02e46e 5402 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
942067f1
LB
5403 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5404 return 0;
5cf0a76f
DW
5405
5406 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5407 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5408
ea8ea460
DW
5409 start_pfn = iova >> VTD_PAGE_SHIFT;
5410 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5411
5412 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5413
5414 npages = last_pfn - start_pfn + 1;
5415
f746a025 5416 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5417 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5418 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5419
5420 dma_free_pagelist(freelist);
fe40f1e0 5421
163cc52c
DW
5422 if (dmar_domain->max_addr == iova + size)
5423 dmar_domain->max_addr = iova;
b146a1c9 5424
5cf0a76f 5425 return size;
38717946 5426}
38717946 5427
d14d6577 5428static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5429 dma_addr_t iova)
38717946 5430{
00a77deb 5431 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5432 struct dma_pte *pte;
5cf0a76f 5433 int level = 0;
faa3d6f5 5434 u64 phys = 0;
38717946 5435
942067f1
LB
5436 if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
5437 return 0;
5438
5cf0a76f 5439 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5440 if (pte)
faa3d6f5 5441 phys = dma_pte_addr(pte);
38717946 5442
faa3d6f5 5443 return phys;
38717946 5444}
a8bcbb0d 5445
95587a75
LB
5446static inline bool scalable_mode_support(void)
5447{
5448 struct dmar_drhd_unit *drhd;
5449 struct intel_iommu *iommu;
5450 bool ret = true;
5451
5452 rcu_read_lock();
5453 for_each_active_iommu(iommu, drhd) {
5454 if (!sm_supported(iommu)) {
5455 ret = false;
5456 break;
5457 }
5458 }
5459 rcu_read_unlock();
5460
5461 return ret;
5462}
5463
5464static inline bool iommu_pasid_support(void)
5465{
5466 struct dmar_drhd_unit *drhd;
5467 struct intel_iommu *iommu;
5468 bool ret = true;
5469
5470 rcu_read_lock();
5471 for_each_active_iommu(iommu, drhd) {
5472 if (!pasid_supported(iommu)) {
5473 ret = false;
5474 break;
5475 }
5476 }
5477 rcu_read_unlock();
5478
5479 return ret;
5480}
5481
5d587b8d 5482static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5483{
dbb9fd86 5484 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5485 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5486 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5487 return irq_remapping_enabled == 1;
dbb9fd86 5488
5d587b8d 5489 return false;
dbb9fd86
SY
5490}
5491
abdfdde2
AW
5492static int intel_iommu_add_device(struct device *dev)
5493{
942067f1
LB
5494 struct dmar_domain *dmar_domain;
5495 struct iommu_domain *domain;
a5459cfe 5496 struct intel_iommu *iommu;
abdfdde2 5497 struct iommu_group *group;
156baca8 5498 u8 bus, devfn;
942067f1 5499 int ret;
70ae6f0d 5500
a5459cfe
AW
5501 iommu = device_to_iommu(dev, &bus, &devfn);
5502 if (!iommu)
70ae6f0d
AW
5503 return -ENODEV;
5504
e3d10af1 5505 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5506
e17f9ff4 5507 group = iommu_group_get_for_dev(dev);
783f157b 5508
e17f9ff4
AW
5509 if (IS_ERR(group))
5510 return PTR_ERR(group);
bcb71abe 5511
abdfdde2 5512 iommu_group_put(group);
942067f1
LB
5513
5514 domain = iommu_get_domain_for_dev(dev);
5515 dmar_domain = to_dmar_domain(domain);
5516 if (domain->type == IOMMU_DOMAIN_DMA) {
5517 if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_IDENTITY) {
5518 ret = iommu_request_dm_for_dev(dev);
5519 if (ret) {
5520 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5521 domain_add_dev_info(si_domain, dev);
5522 dev_info(dev,
5523 "Device uses a private identity domain.\n");
5524 return 0;
5525 }
5526
5527 return -ENODEV;
5528 }
5529 } else {
5530 if (device_def_domain_type(dev, 1) == IOMMU_DOMAIN_DMA) {
5531 ret = iommu_request_dma_domain_for_dev(dev);
5532 if (ret) {
5533 dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5534 if (!get_valid_domain_for_dev(dev)) {
5535 dev_warn(dev,
5536 "Failed to get a private domain.\n");
5537 return -ENOMEM;
5538 }
5539
5540 dev_info(dev,
5541 "Device uses a private dma domain.\n");
5542 return 0;
5543 }
5544
5545 return -ENODEV;
5546 }
5547 }
5548
e17f9ff4 5549 return 0;
abdfdde2 5550}
70ae6f0d 5551
abdfdde2
AW
5552static void intel_iommu_remove_device(struct device *dev)
5553{
a5459cfe
AW
5554 struct intel_iommu *iommu;
5555 u8 bus, devfn;
5556
5557 iommu = device_to_iommu(dev, &bus, &devfn);
5558 if (!iommu)
5559 return;
5560
abdfdde2 5561 iommu_group_remove_device(dev);
a5459cfe 5562
e3d10af1 5563 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5564}
5565
0659b8dc
EA
5566static void intel_iommu_get_resv_regions(struct device *device,
5567 struct list_head *head)
5568{
5569 struct iommu_resv_region *reg;
5570 struct dmar_rmrr_unit *rmrr;
5571 struct device *i_dev;
5572 int i;
5573
5574 rcu_read_lock();
5575 for_each_rmrr_units(rmrr) {
5576 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5577 i, i_dev) {
5578 if (i_dev != device)
5579 continue;
5580
5581 list_add_tail(&rmrr->resv->list, head);
5582 }
5583 }
5584 rcu_read_unlock();
5585
d850c2ee
LB
5586#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5587 if (dev_is_pci(device)) {
5588 struct pci_dev *pdev = to_pci_dev(device);
5589
5590 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5591 reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
5592 IOMMU_RESV_DIRECT);
5593 if (reg)
5594 list_add_tail(&reg->list, head);
5595 }
5596 }
5597#endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5598
0659b8dc
EA
5599 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5600 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5601 0, IOMMU_RESV_MSI);
0659b8dc
EA
5602 if (!reg)
5603 return;
5604 list_add_tail(&reg->list, head);
5605}
5606
5607static void intel_iommu_put_resv_regions(struct device *dev,
5608 struct list_head *head)
5609{
5610 struct iommu_resv_region *entry, *next;
5611
5612 list_for_each_entry_safe(entry, next, head, list) {
198bc325 5613 if (entry->type == IOMMU_RESV_MSI)
0659b8dc
EA
5614 kfree(entry);
5615 }
70ae6f0d
AW
5616}
5617
d7cbc0f3 5618int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
2f26e0a9
DW
5619{
5620 struct device_domain_info *info;
5621 struct context_entry *context;
5622 struct dmar_domain *domain;
5623 unsigned long flags;
5624 u64 ctx_lo;
5625 int ret;
5626
d7cbc0f3 5627 domain = get_valid_domain_for_dev(dev);
2f26e0a9
DW
5628 if (!domain)
5629 return -EINVAL;
5630
5631 spin_lock_irqsave(&device_domain_lock, flags);
5632 spin_lock(&iommu->lock);
5633
5634 ret = -EINVAL;
d7cbc0f3 5635 info = dev->archdata.iommu;
2f26e0a9
DW
5636 if (!info || !info->pasid_supported)
5637 goto out;
5638
5639 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5640 if (WARN_ON(!context))
5641 goto out;
5642
5643 ctx_lo = context[0].lo;
5644
2f26e0a9 5645 if (!(ctx_lo & CONTEXT_PASIDE)) {
2f26e0a9
DW
5646 ctx_lo |= CONTEXT_PASIDE;
5647 context[0].lo = ctx_lo;
5648 wmb();
d7cbc0f3
LB
5649 iommu->flush.flush_context(iommu,
5650 domain->iommu_did[iommu->seq_id],
5651 PCI_DEVID(info->bus, info->devfn),
2f26e0a9
DW
5652 DMA_CCMD_MASK_NOBIT,
5653 DMA_CCMD_DEVICE_INVL);
5654 }
5655
5656 /* Enable PASID support in the device, if it wasn't already */
5657 if (!info->pasid_enabled)
5658 iommu_enable_dev_iotlb(info);
5659
2f26e0a9
DW
5660 ret = 0;
5661
5662 out:
5663 spin_unlock(&iommu->lock);
5664 spin_unlock_irqrestore(&device_domain_lock, flags);
5665
5666 return ret;
5667}
5668
73bcbdc9
JS
5669static void intel_iommu_apply_resv_region(struct device *dev,
5670 struct iommu_domain *domain,
5671 struct iommu_resv_region *region)
5672{
5673 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5674 unsigned long start, end;
5675
5676 start = IOVA_PFN(region->start);
5677 end = IOVA_PFN(region->start + region->length - 1);
5678
5679 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5680}
5681
d7cbc0f3 5682#ifdef CONFIG_INTEL_IOMMU_SVM
2f26e0a9
DW
5683struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5684{
5685 struct intel_iommu *iommu;
5686 u8 bus, devfn;
5687
5688 if (iommu_dummy(dev)) {
5689 dev_warn(dev,
5690 "No IOMMU translation for device; cannot enable SVM\n");
5691 return NULL;
5692 }
5693
5694 iommu = device_to_iommu(dev, &bus, &devfn);
5695 if ((!iommu)) {
b9997e38 5696 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5697 return NULL;
5698 }
5699
2f26e0a9
DW
5700 return iommu;
5701}
5702#endif /* CONFIG_INTEL_IOMMU_SVM */
5703
95587a75
LB
5704static int intel_iommu_enable_auxd(struct device *dev)
5705{
5706 struct device_domain_info *info;
5707 struct intel_iommu *iommu;
5708 unsigned long flags;
5709 u8 bus, devfn;
5710 int ret;
5711
5712 iommu = device_to_iommu(dev, &bus, &devfn);
5713 if (!iommu || dmar_disabled)
5714 return -EINVAL;
5715
5716 if (!sm_supported(iommu) || !pasid_supported(iommu))
5717 return -EINVAL;
5718
5719 ret = intel_iommu_enable_pasid(iommu, dev);
5720 if (ret)
5721 return -ENODEV;
5722
5723 spin_lock_irqsave(&device_domain_lock, flags);
5724 info = dev->archdata.iommu;
5725 info->auxd_enabled = 1;
5726 spin_unlock_irqrestore(&device_domain_lock, flags);
5727
5728 return 0;
5729}
5730
5731static int intel_iommu_disable_auxd(struct device *dev)
5732{
5733 struct device_domain_info *info;
5734 unsigned long flags;
5735
5736 spin_lock_irqsave(&device_domain_lock, flags);
5737 info = dev->archdata.iommu;
5738 if (!WARN_ON(!info))
5739 info->auxd_enabled = 0;
5740 spin_unlock_irqrestore(&device_domain_lock, flags);
5741
5742 return 0;
5743}
5744
5745/*
5746 * A PCI express designated vendor specific extended capability is defined
5747 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5748 * for system software and tools to detect endpoint devices supporting the
5749 * Intel scalable IO virtualization without host driver dependency.
5750 *
5751 * Returns the address of the matching extended capability structure within
5752 * the device's PCI configuration space or 0 if the device does not support
5753 * it.
5754 */
5755static int siov_find_pci_dvsec(struct pci_dev *pdev)
5756{
5757 int pos;
5758 u16 vendor, id;
5759
5760 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5761 while (pos) {
5762 pci_read_config_word(pdev, pos + 4, &vendor);
5763 pci_read_config_word(pdev, pos + 8, &id);
5764 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5765 return pos;
5766
5767 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5768 }
5769
5770 return 0;
5771}
5772
5773static bool
5774intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5775{
5776 if (feat == IOMMU_DEV_FEAT_AUX) {
5777 int ret;
5778
5779 if (!dev_is_pci(dev) || dmar_disabled ||
5780 !scalable_mode_support() || !iommu_pasid_support())
5781 return false;
5782
5783 ret = pci_pasid_features(to_pci_dev(dev));
5784 if (ret < 0)
5785 return false;
5786
5787 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5788 }
5789
5790 return false;
5791}
5792
5793static int
5794intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5795{
5796 if (feat == IOMMU_DEV_FEAT_AUX)
5797 return intel_iommu_enable_auxd(dev);
5798
5799 return -ENODEV;
5800}
5801
5802static int
5803intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5804{
5805 if (feat == IOMMU_DEV_FEAT_AUX)
5806 return intel_iommu_disable_auxd(dev);
5807
5808 return -ENODEV;
5809}
5810
5811static bool
5812intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5813{
5814 struct device_domain_info *info = dev->archdata.iommu;
5815
5816 if (feat == IOMMU_DEV_FEAT_AUX)
5817 return scalable_mode_support() && info && info->auxd_enabled;
5818
5819 return false;
5820}
5821
0e8000f8
LB
5822static int
5823intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5824{
5825 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5826
5827 return dmar_domain->default_pasid > 0 ?
5828 dmar_domain->default_pasid : -EINVAL;
5829}
5830
b0119e87 5831const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5832 .capable = intel_iommu_capable,
5833 .domain_alloc = intel_iommu_domain_alloc,
5834 .domain_free = intel_iommu_domain_free,
5835 .attach_dev = intel_iommu_attach_device,
5836 .detach_dev = intel_iommu_detach_device,
67b8e02b
LB
5837 .aux_attach_dev = intel_iommu_aux_attach_device,
5838 .aux_detach_dev = intel_iommu_aux_detach_device,
0e8000f8 5839 .aux_get_pasid = intel_iommu_aux_get_pasid,
0659b8dc
EA
5840 .map = intel_iommu_map,
5841 .unmap = intel_iommu_unmap,
0659b8dc
EA
5842 .iova_to_phys = intel_iommu_iova_to_phys,
5843 .add_device = intel_iommu_add_device,
5844 .remove_device = intel_iommu_remove_device,
5845 .get_resv_regions = intel_iommu_get_resv_regions,
5846 .put_resv_regions = intel_iommu_put_resv_regions,
73bcbdc9 5847 .apply_resv_region = intel_iommu_apply_resv_region,
0659b8dc 5848 .device_group = pci_device_group,
95587a75
LB
5849 .dev_has_feat = intel_iommu_dev_has_feat,
5850 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5851 .dev_enable_feat = intel_iommu_dev_enable_feat,
5852 .dev_disable_feat = intel_iommu_dev_disable_feat,
0659b8dc 5853 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5854};
9af88143 5855
9452618e
DV
5856static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5857{
5858 /* G4x/GM45 integrated gfx dmar support is totally busted. */
932a6523 5859 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5860 dmar_map_gfx = 0;
5861}
5862
5863DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5864DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5865DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5866DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5867DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5868DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5869DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5870
d34d6517 5871static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5872{
5873 /*
5874 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5875 * but needs it. Same seems to hold for the desktop versions.
9af88143 5876 */
932a6523 5877 pci_info(dev, "Forcing write-buffer flush capability\n");
9af88143
DW
5878 rwbf_quirk = 1;
5879}
5880
5881DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5882DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5883DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5884DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5885DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5886DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5887DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5888
eecfd57f
AJ
5889#define GGC 0x52
5890#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5891#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5892#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5893#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5894#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5895#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5896#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5897#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5898
d34d6517 5899static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5900{
5901 unsigned short ggc;
5902
eecfd57f 5903 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5904 return;
5905
eecfd57f 5906 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
932a6523 5907 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5908 dmar_map_gfx = 0;
6fbcfb3e
DW
5909 } else if (dmar_map_gfx) {
5910 /* we have to ensure the gfx device is idle before we flush */
932a6523 5911 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5912 intel_iommu_strict = 1;
5913 }
9eecabcb
DW
5914}
5915DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5916DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5917DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5918DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5919
e0fc7e0b
DW
5920/* On Tylersburg chipsets, some BIOSes have been known to enable the
5921 ISOCH DMAR unit for the Azalia sound device, but not give it any
5922 TLB entries, which causes it to deadlock. Check for that. We do
5923 this in a function called from init_dmars(), instead of in a PCI
5924 quirk, because we don't want to print the obnoxious "BIOS broken"
5925 message if VT-d is actually disabled.
5926*/
5927static void __init check_tylersburg_isoch(void)
5928{
5929 struct pci_dev *pdev;
5930 uint32_t vtisochctrl;
5931
5932 /* If there's no Azalia in the system anyway, forget it. */
5933 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5934 if (!pdev)
5935 return;
5936 pci_dev_put(pdev);
5937
5938 /* System Management Registers. Might be hidden, in which case
5939 we can't do the sanity check. But that's OK, because the
5940 known-broken BIOSes _don't_ actually hide it, so far. */
5941 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5942 if (!pdev)
5943 return;
5944
5945 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5946 pci_dev_put(pdev);
5947 return;
5948 }
5949
5950 pci_dev_put(pdev);
5951
5952 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5953 if (vtisochctrl & 1)
5954 return;
5955
5956 /* Drop all bits other than the number of TLB entries */
5957 vtisochctrl &= 0x1c;
5958
5959 /* If we have the recommended number of TLB entries (16), fine. */
5960 if (vtisochctrl == 0x10)
5961 return;
5962
5963 /* Zero TLB entries? You get to ride the short bus to school. */
5964 if (!vtisochctrl) {
5965 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5966 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5967 dmi_get_system_info(DMI_BIOS_VENDOR),
5968 dmi_get_system_info(DMI_BIOS_VERSION),
5969 dmi_get_system_info(DMI_PRODUCT_VERSION));
5970 iommu_identity_mapping |= IDENTMAP_AZALIA;
5971 return;
5972 }
9f10e5bf
JR
5973
5974 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5975 vtisochctrl);
5976}