Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski...
[linux-2.6-block.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
5e0d2a6f 36#include <linux/timer.h>
dfddb969 37#include <linux/io.h>
38717946 38#include <linux/iova.h>
5d450806 39#include <linux/iommu.h>
38717946 40#include <linux/intel-iommu.h>
134fac3f 41#include <linux/syscore_ops.h>
69575d38 42#include <linux/tboot.h>
adb2fe02 43#include <linux/dmi.h>
5cdede24 44#include <linux/pci-ats.h>
0ee332c1 45#include <linux/memblock.h>
36746436 46#include <linux/dma-contiguous.h>
091d42e4 47#include <linux/crash_dump.h>
8a8f422d 48#include <asm/irq_remapping.h>
ba395927 49#include <asm/cacheflush.h>
46a7fa27 50#include <asm/iommu.h>
ba395927 51
078e1ee2
JR
52#include "irq_remapping.h"
53
5b6985ce
FY
54#define ROOT_SIZE VTD_PAGE_SIZE
55#define CONTEXT_SIZE VTD_PAGE_SIZE
56
ba395927 57#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 58#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 59#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 60#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
61
62#define IOAPIC_RANGE_START (0xfee00000)
63#define IOAPIC_RANGE_END (0xfeefffff)
64#define IOVA_START_ADDR (0x1000)
65
66#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
67
4ed0d3e6 68#define MAX_AGAW_WIDTH 64
5c645b35 69#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 70
2ebe3151
DW
71#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
72#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73
74/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
75 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
76#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
77 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 79
1b722500
RM
80/* IO virtual address start page frame number */
81#define IOVA_START_PFN (1)
82
f27be03b 83#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 84#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 85#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 86
df08cdc7
AM
87/* page table handling */
88#define LEVEL_STRIDE (9)
89#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
90
6d1c56a9
OBC
91/*
92 * This bitmap is used to advertise the page sizes our hardware support
93 * to the IOMMU core, which will then use this information to split
94 * physically contiguous memory regions it is mapping into page sizes
95 * that we support.
96 *
97 * Traditionally the IOMMU core just handed us the mappings directly,
98 * after making sure the size is an order of a 4KiB page and that the
99 * mapping has natural alignment.
100 *
101 * To retain this behavior, we currently advertise that we support
102 * all page sizes that are an order of 4KiB.
103 *
104 * If at some point we'd like to utilize the IOMMU core's new behavior,
105 * we could change this to advertise the real page sizes we support.
106 */
107#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
108
df08cdc7
AM
109static inline int agaw_to_level(int agaw)
110{
111 return agaw + 2;
112}
113
114static inline int agaw_to_width(int agaw)
115{
5c645b35 116 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
117}
118
119static inline int width_to_agaw(int width)
120{
5c645b35 121 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
122}
123
124static inline unsigned int level_to_offset_bits(int level)
125{
126 return (level - 1) * LEVEL_STRIDE;
127}
128
129static inline int pfn_level_offset(unsigned long pfn, int level)
130{
131 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
132}
133
134static inline unsigned long level_mask(int level)
135{
136 return -1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long level_size(int level)
140{
141 return 1UL << level_to_offset_bits(level);
142}
143
144static inline unsigned long align_to_level(unsigned long pfn, int level)
145{
146 return (pfn + level_size(level) - 1) & level_mask(level);
147}
fd18de50 148
6dd9a7c7
YS
149static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
150{
5c645b35 151 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
152}
153
dd4e8319
DW
154/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
155 are never going to work. */
156static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
157{
158 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160
161static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
162{
163 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
164}
165static inline unsigned long page_to_dma_pfn(struct page *pg)
166{
167 return mm_to_dma_pfn(page_to_pfn(pg));
168}
169static inline unsigned long virt_to_dma_pfn(void *p)
170{
171 return page_to_dma_pfn(virt_to_page(p));
172}
173
d9630fe9
WH
174/* global iommu list, set NULL for ignored DMAR units */
175static struct intel_iommu **g_iommus;
176
e0fc7e0b 177static void __init check_tylersburg_isoch(void);
9af88143
DW
178static int rwbf_quirk;
179
b779260b
JC
180/*
181 * set to 1 to panic kernel if can't successfully enable VT-d
182 * (used when kernel is launched w/ TXT)
183 */
184static int force_on = 0;
185
46b08e1a
MM
186/*
187 * 0: Present
188 * 1-11: Reserved
189 * 12-63: Context Ptr (12 - (haw-1))
190 * 64-127: Reserved
191 */
192struct root_entry {
03ecc32c
DW
193 u64 lo;
194 u64 hi;
46b08e1a
MM
195};
196#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 197
091d42e4
JR
198/*
199 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
200 * if marked present.
201 */
202static phys_addr_t root_entry_lctp(struct root_entry *re)
203{
204 if (!(re->lo & 1))
205 return 0;
206
207 return re->lo & VTD_PAGE_MASK;
208}
209
210/*
211 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
212 * if marked present.
213 */
214static phys_addr_t root_entry_uctp(struct root_entry *re)
215{
216 if (!(re->hi & 1))
217 return 0;
46b08e1a 218
091d42e4
JR
219 return re->hi & VTD_PAGE_MASK;
220}
7a8fc25e
MM
221/*
222 * low 64 bits:
223 * 0: present
224 * 1: fault processing disable
225 * 2-3: translation type
226 * 12-63: address space root
227 * high 64 bits:
228 * 0-2: address width
229 * 3-6: aval
230 * 8-23: domain id
231 */
232struct context_entry {
233 u64 lo;
234 u64 hi;
235};
c07e7d21 236
cf484d0e
JR
237static inline void context_clear_pasid_enable(struct context_entry *context)
238{
239 context->lo &= ~(1ULL << 11);
240}
241
242static inline bool context_pasid_enabled(struct context_entry *context)
243{
244 return !!(context->lo & (1ULL << 11));
245}
246
247static inline void context_set_copied(struct context_entry *context)
248{
249 context->hi |= (1ull << 3);
250}
251
252static inline bool context_copied(struct context_entry *context)
253{
254 return !!(context->hi & (1ULL << 3));
255}
256
257static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
258{
259 return (context->lo & 1);
260}
cf484d0e
JR
261
262static inline bool context_present(struct context_entry *context)
263{
264 return context_pasid_enabled(context) ?
265 __context_present(context) :
266 __context_present(context) && !context_copied(context);
267}
268
c07e7d21
MM
269static inline void context_set_present(struct context_entry *context)
270{
271 context->lo |= 1;
272}
273
274static inline void context_set_fault_enable(struct context_entry *context)
275{
276 context->lo &= (((u64)-1) << 2) | 1;
277}
278
c07e7d21
MM
279static inline void context_set_translation_type(struct context_entry *context,
280 unsigned long value)
281{
282 context->lo &= (((u64)-1) << 4) | 3;
283 context->lo |= (value & 3) << 2;
284}
285
286static inline void context_set_address_root(struct context_entry *context,
287 unsigned long value)
288{
1a2262f9 289 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
290 context->lo |= value & VTD_PAGE_MASK;
291}
292
293static inline void context_set_address_width(struct context_entry *context,
294 unsigned long value)
295{
296 context->hi |= value & 7;
297}
298
299static inline void context_set_domain_id(struct context_entry *context,
300 unsigned long value)
301{
302 context->hi |= (value & ((1 << 16) - 1)) << 8;
303}
304
dbcd861f
JR
305static inline int context_domain_id(struct context_entry *c)
306{
307 return((c->hi >> 8) & 0xffff);
308}
309
c07e7d21
MM
310static inline void context_clear_entry(struct context_entry *context)
311{
312 context->lo = 0;
313 context->hi = 0;
314}
7a8fc25e 315
622ba12a
MM
316/*
317 * 0: readable
318 * 1: writable
319 * 2-6: reserved
320 * 7: super page
9cf06697
SY
321 * 8-10: available
322 * 11: snoop behavior
622ba12a
MM
323 * 12-63: Host physcial address
324 */
325struct dma_pte {
326 u64 val;
327};
622ba12a 328
19c239ce
MM
329static inline void dma_clear_pte(struct dma_pte *pte)
330{
331 pte->val = 0;
332}
333
19c239ce
MM
334static inline u64 dma_pte_addr(struct dma_pte *pte)
335{
c85994e4
DW
336#ifdef CONFIG_64BIT
337 return pte->val & VTD_PAGE_MASK;
338#else
339 /* Must have a full atomic 64-bit read */
1a8bd481 340 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 341#endif
19c239ce
MM
342}
343
19c239ce
MM
344static inline bool dma_pte_present(struct dma_pte *pte)
345{
346 return (pte->val & 3) != 0;
347}
622ba12a 348
4399c8bf
AK
349static inline bool dma_pte_superpage(struct dma_pte *pte)
350{
c3c75eb7 351 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
352}
353
75e6bf96
DW
354static inline int first_pte_in_page(struct dma_pte *pte)
355{
356 return !((unsigned long)pte & ~VTD_PAGE_MASK);
357}
358
2c2e2c38
FY
359/*
360 * This domain is a statically identity mapping domain.
361 * 1. This domain creats a static 1:1 mapping to all usable memory.
362 * 2. It maps to each iommu if successful.
363 * 3. Each iommu mapps to this domain if successful.
364 */
19943b0e
DW
365static struct dmar_domain *si_domain;
366static int hw_pass_through = 1;
2c2e2c38 367
28ccce0d
JR
368/*
369 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
370 * across iommus may be owned in one domain, e.g. kvm guest.
371 */
ab8dfe25 372#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 373
2c2e2c38 374/* si_domain contains mulitple devices */
ab8dfe25 375#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 376
29a27719
JR
377#define for_each_domain_iommu(idx, domain) \
378 for (idx = 0; idx < g_num_of_iommus; idx++) \
379 if (domain->iommu_refcnt[idx])
380
99126f7c 381struct dmar_domain {
4c923d47 382 int nid; /* node id */
29a27719
JR
383
384 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
385 /* Refcount of devices per iommu */
386
99126f7c 387
c0e8a6c8
JR
388 u16 iommu_did[DMAR_UNITS_SUPPORTED];
389 /* Domain ids per IOMMU. Use u16 since
390 * domain ids are 16 bit wide according
391 * to VT-d spec, section 9.3 */
99126f7c 392
00a77deb 393 struct list_head devices; /* all devices' list */
99126f7c
MM
394 struct iova_domain iovad; /* iova's that belong to this domain */
395
396 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
397 int gaw; /* max guest address width */
398
399 /* adjusted guest address width, 0 is level 2 30-bit */
400 int agaw;
401
3b5410e7 402 int flags; /* flags to find out type of domain */
8e604097
WH
403
404 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 405 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 406 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
407 int iommu_superpage;/* Level of superpages supported:
408 0 == 4KiB (no superpages), 1 == 2MiB,
409 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 410 u64 max_addr; /* maximum mapped address */
00a77deb
JR
411
412 struct iommu_domain domain; /* generic domain data structure for
413 iommu core */
99126f7c
MM
414};
415
a647dacb
MM
416/* PCI domain-device relationship */
417struct device_domain_info {
418 struct list_head link; /* link to domain siblings */
419 struct list_head global; /* link to global list */
276dbf99 420 u8 bus; /* PCI bus number */
a647dacb 421 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
422 u8 pasid_supported:3;
423 u8 pasid_enabled:1;
424 u8 pri_supported:1;
425 u8 pri_enabled:1;
426 u8 ats_supported:1;
427 u8 ats_enabled:1;
428 u8 ats_qdep;
0bcb3e28 429 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 430 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
431 struct dmar_domain *domain; /* pointer to domain */
432};
433
b94e4117
JL
434struct dmar_rmrr_unit {
435 struct list_head list; /* list of rmrr units */
436 struct acpi_dmar_header *hdr; /* ACPI header */
437 u64 base_address; /* reserved base address*/
438 u64 end_address; /* reserved end address */
832bd858 439 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
440 int devices_cnt; /* target device count */
441};
442
443struct dmar_atsr_unit {
444 struct list_head list; /* list of ATSR units */
445 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 446 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
447 int devices_cnt; /* target device count */
448 u8 include_all:1; /* include all ports */
449};
450
451static LIST_HEAD(dmar_atsr_units);
452static LIST_HEAD(dmar_rmrr_units);
453
454#define for_each_rmrr_units(rmrr) \
455 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
456
5e0d2a6f 457static void flush_unmaps_timeout(unsigned long data);
458
b707cb02 459static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 460
80b20dd8 461#define HIGH_WATER_MARK 250
462struct deferred_flush_tables {
463 int next;
464 struct iova *iova[HIGH_WATER_MARK];
465 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 466 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 467};
468
469static struct deferred_flush_tables *deferred_flush;
470
5e0d2a6f 471/* bitmap for indexing intel_iommus */
5e0d2a6f 472static int g_num_of_iommus;
473
474static DEFINE_SPINLOCK(async_umap_flush_lock);
475static LIST_HEAD(unmaps_to_do);
476
477static int timer_on;
478static long list_size;
5e0d2a6f 479
92d03cc8 480static void domain_exit(struct dmar_domain *domain);
ba395927 481static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
482static void dmar_remove_one_dev_info(struct dmar_domain *domain,
483 struct device *dev);
127c7615 484static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
485static void domain_context_clear(struct intel_iommu *iommu,
486 struct device *dev);
2a46ddf7
JL
487static int domain_detach_iommu(struct dmar_domain *domain,
488 struct intel_iommu *iommu);
ba395927 489
d3f13810 490#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
491int dmar_disabled = 0;
492#else
493int dmar_disabled = 1;
d3f13810 494#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 495
8bc1f85c
ED
496int intel_iommu_enabled = 0;
497EXPORT_SYMBOL_GPL(intel_iommu_enabled);
498
2d9e667e 499static int dmar_map_gfx = 1;
7d3b03ce 500static int dmar_forcedac;
5e0d2a6f 501static int intel_iommu_strict;
6dd9a7c7 502static int intel_iommu_superpage = 1;
c83b2f20 503static int intel_iommu_ecs = 1;
ae853ddb
DW
504static int intel_iommu_pasid28;
505static int iommu_identity_mapping;
c83b2f20 506
ae853ddb
DW
507#define IDENTMAP_ALL 1
508#define IDENTMAP_GFX 2
509#define IDENTMAP_AZALIA 4
c83b2f20 510
d42fde70
DW
511/* Broadwell and Skylake have broken ECS support — normal so-called "second
512 * level" translation of DMA requests-without-PASID doesn't actually happen
513 * unless you also set the NESTE bit in an extended context-entry. Which of
514 * course means that SVM doesn't work because it's trying to do nested
515 * translation of the physical addresses it finds in the process page tables,
516 * through the IOVA->phys mapping found in the "second level" page tables.
517 *
518 * The VT-d specification was retroactively changed to change the definition
519 * of the capability bits and pretend that Broadwell/Skylake never happened...
520 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
521 * for some reason it was the PASID capability bit which was redefined (from
522 * bit 28 on BDW/SKL to bit 40 in future).
523 *
524 * So our test for ECS needs to eschew those implementations which set the old
525 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
526 * Unless we are working around the 'pasid28' limitations, that is, by putting
527 * the device into passthrough mode for normal DMA and thus masking the bug.
528 */
c83b2f20 529#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
d42fde70
DW
530 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
531/* PASID support is thus enabled if ECS is enabled and *either* of the old
532 * or new capability bits are set. */
533#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
534 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 535
c0771df8
DW
536int intel_iommu_gfx_mapped;
537EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
538
ba395927
KA
539#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
540static DEFINE_SPINLOCK(device_domain_lock);
541static LIST_HEAD(device_domain_list);
542
b22f6434 543static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 544
4158c2ec
JR
545static bool translation_pre_enabled(struct intel_iommu *iommu)
546{
547 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
548}
549
091d42e4
JR
550static void clear_translation_pre_enabled(struct intel_iommu *iommu)
551{
552 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
553}
554
4158c2ec
JR
555static void init_translation_status(struct intel_iommu *iommu)
556{
557 u32 gsts;
558
559 gsts = readl(iommu->reg + DMAR_GSTS_REG);
560 if (gsts & DMA_GSTS_TES)
561 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
562}
563
00a77deb
JR
564/* Convert generic 'struct iommu_domain to private struct dmar_domain */
565static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
566{
567 return container_of(dom, struct dmar_domain, domain);
568}
569
ba395927
KA
570static int __init intel_iommu_setup(char *str)
571{
572 if (!str)
573 return -EINVAL;
574 while (*str) {
0cd5c3c8
KM
575 if (!strncmp(str, "on", 2)) {
576 dmar_disabled = 0;
9f10e5bf 577 pr_info("IOMMU enabled\n");
0cd5c3c8 578 } else if (!strncmp(str, "off", 3)) {
ba395927 579 dmar_disabled = 1;
9f10e5bf 580 pr_info("IOMMU disabled\n");
ba395927
KA
581 } else if (!strncmp(str, "igfx_off", 8)) {
582 dmar_map_gfx = 0;
9f10e5bf 583 pr_info("Disable GFX device mapping\n");
7d3b03ce 584 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 585 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 586 dmar_forcedac = 1;
5e0d2a6f 587 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 588 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 589 intel_iommu_strict = 1;
6dd9a7c7 590 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 591 pr_info("Disable supported super page\n");
6dd9a7c7 592 intel_iommu_superpage = 0;
c83b2f20
DW
593 } else if (!strncmp(str, "ecs_off", 7)) {
594 printk(KERN_INFO
595 "Intel-IOMMU: disable extended context table support\n");
596 intel_iommu_ecs = 0;
ae853ddb
DW
597 } else if (!strncmp(str, "pasid28", 7)) {
598 printk(KERN_INFO
599 "Intel-IOMMU: enable pre-production PASID support\n");
600 intel_iommu_pasid28 = 1;
601 iommu_identity_mapping |= IDENTMAP_GFX;
ba395927
KA
602 }
603
604 str += strcspn(str, ",");
605 while (*str == ',')
606 str++;
607 }
608 return 0;
609}
610__setup("intel_iommu=", intel_iommu_setup);
611
612static struct kmem_cache *iommu_domain_cache;
613static struct kmem_cache *iommu_devinfo_cache;
ba395927 614
9452d5bf
JR
615static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
616{
8bf47816
JR
617 struct dmar_domain **domains;
618 int idx = did >> 8;
619
620 domains = iommu->domains[idx];
621 if (!domains)
622 return NULL;
623
624 return domains[did & 0xff];
9452d5bf
JR
625}
626
627static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
628 struct dmar_domain *domain)
629{
8bf47816
JR
630 struct dmar_domain **domains;
631 int idx = did >> 8;
632
633 if (!iommu->domains[idx]) {
634 size_t size = 256 * sizeof(struct dmar_domain *);
635 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
636 }
637
638 domains = iommu->domains[idx];
639 if (WARN_ON(!domains))
640 return;
641 else
642 domains[did & 0xff] = domain;
9452d5bf
JR
643}
644
4c923d47 645static inline void *alloc_pgtable_page(int node)
eb3fa7cb 646{
4c923d47
SS
647 struct page *page;
648 void *vaddr = NULL;
eb3fa7cb 649
4c923d47
SS
650 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
651 if (page)
652 vaddr = page_address(page);
eb3fa7cb 653 return vaddr;
ba395927
KA
654}
655
656static inline void free_pgtable_page(void *vaddr)
657{
658 free_page((unsigned long)vaddr);
659}
660
661static inline void *alloc_domain_mem(void)
662{
354bb65e 663 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
664}
665
38717946 666static void free_domain_mem(void *vaddr)
ba395927
KA
667{
668 kmem_cache_free(iommu_domain_cache, vaddr);
669}
670
671static inline void * alloc_devinfo_mem(void)
672{
354bb65e 673 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
674}
675
676static inline void free_devinfo_mem(void *vaddr)
677{
678 kmem_cache_free(iommu_devinfo_cache, vaddr);
679}
680
ab8dfe25
JL
681static inline int domain_type_is_vm(struct dmar_domain *domain)
682{
683 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
684}
685
28ccce0d
JR
686static inline int domain_type_is_si(struct dmar_domain *domain)
687{
688 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
689}
690
ab8dfe25
JL
691static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
692{
693 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
694 DOMAIN_FLAG_STATIC_IDENTITY);
695}
1b573683 696
162d1b10
JL
697static inline int domain_pfn_supported(struct dmar_domain *domain,
698 unsigned long pfn)
699{
700 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
701
702 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
703}
704
4ed0d3e6 705static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
706{
707 unsigned long sagaw;
708 int agaw = -1;
709
710 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 711 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
712 agaw >= 0; agaw--) {
713 if (test_bit(agaw, &sagaw))
714 break;
715 }
716
717 return agaw;
718}
719
4ed0d3e6
FY
720/*
721 * Calculate max SAGAW for each iommu.
722 */
723int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
724{
725 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
726}
727
728/*
729 * calculate agaw for each iommu.
730 * "SAGAW" may be different across iommus, use a default agaw, and
731 * get a supported less agaw for iommus that don't support the default agaw.
732 */
733int iommu_calculate_agaw(struct intel_iommu *iommu)
734{
735 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
736}
737
2c2e2c38 738/* This functionin only returns single iommu in a domain */
8c11e798
WH
739static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
740{
741 int iommu_id;
742
2c2e2c38 743 /* si_domain and vm domain should not get here. */
ab8dfe25 744 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
745 for_each_domain_iommu(iommu_id, domain)
746 break;
747
8c11e798
WH
748 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
749 return NULL;
750
751 return g_iommus[iommu_id];
752}
753
8e604097
WH
754static void domain_update_iommu_coherency(struct dmar_domain *domain)
755{
d0501960
DW
756 struct dmar_drhd_unit *drhd;
757 struct intel_iommu *iommu;
2f119c78
QL
758 bool found = false;
759 int i;
2e12bc29 760
d0501960 761 domain->iommu_coherency = 1;
8e604097 762
29a27719 763 for_each_domain_iommu(i, domain) {
2f119c78 764 found = true;
8e604097
WH
765 if (!ecap_coherent(g_iommus[i]->ecap)) {
766 domain->iommu_coherency = 0;
767 break;
768 }
8e604097 769 }
d0501960
DW
770 if (found)
771 return;
772
773 /* No hardware attached; use lowest common denominator */
774 rcu_read_lock();
775 for_each_active_iommu(iommu, drhd) {
776 if (!ecap_coherent(iommu->ecap)) {
777 domain->iommu_coherency = 0;
778 break;
779 }
780 }
781 rcu_read_unlock();
8e604097
WH
782}
783
161f6934 784static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 785{
161f6934
JL
786 struct dmar_drhd_unit *drhd;
787 struct intel_iommu *iommu;
788 int ret = 1;
58c610bd 789
161f6934
JL
790 rcu_read_lock();
791 for_each_active_iommu(iommu, drhd) {
792 if (iommu != skip) {
793 if (!ecap_sc_support(iommu->ecap)) {
794 ret = 0;
795 break;
796 }
58c610bd 797 }
58c610bd 798 }
161f6934
JL
799 rcu_read_unlock();
800
801 return ret;
58c610bd
SY
802}
803
161f6934 804static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 805{
8140a95d 806 struct dmar_drhd_unit *drhd;
161f6934 807 struct intel_iommu *iommu;
8140a95d 808 int mask = 0xf;
6dd9a7c7
YS
809
810 if (!intel_iommu_superpage) {
161f6934 811 return 0;
6dd9a7c7
YS
812 }
813
8140a95d 814 /* set iommu_superpage to the smallest common denominator */
0e242612 815 rcu_read_lock();
8140a95d 816 for_each_active_iommu(iommu, drhd) {
161f6934
JL
817 if (iommu != skip) {
818 mask &= cap_super_page_val(iommu->cap);
819 if (!mask)
820 break;
6dd9a7c7
YS
821 }
822 }
0e242612
JL
823 rcu_read_unlock();
824
161f6934 825 return fls(mask);
6dd9a7c7
YS
826}
827
58c610bd
SY
828/* Some capabilities may be different across iommus */
829static void domain_update_iommu_cap(struct dmar_domain *domain)
830{
831 domain_update_iommu_coherency(domain);
161f6934
JL
832 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
833 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
834}
835
03ecc32c
DW
836static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
837 u8 bus, u8 devfn, int alloc)
838{
839 struct root_entry *root = &iommu->root_entry[bus];
840 struct context_entry *context;
841 u64 *entry;
842
4df4eab1 843 entry = &root->lo;
c83b2f20 844 if (ecs_enabled(iommu)) {
03ecc32c
DW
845 if (devfn >= 0x80) {
846 devfn -= 0x80;
847 entry = &root->hi;
848 }
849 devfn *= 2;
850 }
03ecc32c
DW
851 if (*entry & 1)
852 context = phys_to_virt(*entry & VTD_PAGE_MASK);
853 else {
854 unsigned long phy_addr;
855 if (!alloc)
856 return NULL;
857
858 context = alloc_pgtable_page(iommu->node);
859 if (!context)
860 return NULL;
861
862 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
863 phy_addr = virt_to_phys((void *)context);
864 *entry = phy_addr | 1;
865 __iommu_flush_cache(iommu, entry, sizeof(*entry));
866 }
867 return &context[devfn];
868}
869
4ed6a540
DW
870static int iommu_dummy(struct device *dev)
871{
872 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
873}
874
156baca8 875static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
876{
877 struct dmar_drhd_unit *drhd = NULL;
b683b230 878 struct intel_iommu *iommu;
156baca8
DW
879 struct device *tmp;
880 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 881 u16 segment = 0;
c7151a8d
WH
882 int i;
883
4ed6a540
DW
884 if (iommu_dummy(dev))
885 return NULL;
886
156baca8
DW
887 if (dev_is_pci(dev)) {
888 pdev = to_pci_dev(dev);
889 segment = pci_domain_nr(pdev->bus);
ca5b74d2 890 } else if (has_acpi_companion(dev))
156baca8
DW
891 dev = &ACPI_COMPANION(dev)->dev;
892
0e242612 893 rcu_read_lock();
b683b230 894 for_each_active_iommu(iommu, drhd) {
156baca8 895 if (pdev && segment != drhd->segment)
276dbf99 896 continue;
c7151a8d 897
b683b230 898 for_each_active_dev_scope(drhd->devices,
156baca8
DW
899 drhd->devices_cnt, i, tmp) {
900 if (tmp == dev) {
901 *bus = drhd->devices[i].bus;
902 *devfn = drhd->devices[i].devfn;
b683b230 903 goto out;
156baca8
DW
904 }
905
906 if (!pdev || !dev_is_pci(tmp))
907 continue;
908
909 ptmp = to_pci_dev(tmp);
910 if (ptmp->subordinate &&
911 ptmp->subordinate->number <= pdev->bus->number &&
912 ptmp->subordinate->busn_res.end >= pdev->bus->number)
913 goto got_pdev;
924b6231 914 }
c7151a8d 915
156baca8
DW
916 if (pdev && drhd->include_all) {
917 got_pdev:
918 *bus = pdev->bus->number;
919 *devfn = pdev->devfn;
b683b230 920 goto out;
156baca8 921 }
c7151a8d 922 }
b683b230 923 iommu = NULL;
156baca8 924 out:
0e242612 925 rcu_read_unlock();
c7151a8d 926
b683b230 927 return iommu;
c7151a8d
WH
928}
929
5331fe6f
WH
930static void domain_flush_cache(struct dmar_domain *domain,
931 void *addr, int size)
932{
933 if (!domain->iommu_coherency)
934 clflush_cache_range(addr, size);
935}
936
ba395927
KA
937static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
938{
ba395927 939 struct context_entry *context;
03ecc32c 940 int ret = 0;
ba395927
KA
941 unsigned long flags;
942
943 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
944 context = iommu_context_addr(iommu, bus, devfn, 0);
945 if (context)
946 ret = context_present(context);
ba395927
KA
947 spin_unlock_irqrestore(&iommu->lock, flags);
948 return ret;
949}
950
951static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
952{
ba395927
KA
953 struct context_entry *context;
954 unsigned long flags;
955
956 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 957 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 958 if (context) {
03ecc32c
DW
959 context_clear_entry(context);
960 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
961 }
962 spin_unlock_irqrestore(&iommu->lock, flags);
963}
964
965static void free_context_table(struct intel_iommu *iommu)
966{
ba395927
KA
967 int i;
968 unsigned long flags;
969 struct context_entry *context;
970
971 spin_lock_irqsave(&iommu->lock, flags);
972 if (!iommu->root_entry) {
973 goto out;
974 }
975 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 976 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
977 if (context)
978 free_pgtable_page(context);
03ecc32c 979
c83b2f20 980 if (!ecs_enabled(iommu))
03ecc32c
DW
981 continue;
982
983 context = iommu_context_addr(iommu, i, 0x80, 0);
984 if (context)
985 free_pgtable_page(context);
986
ba395927
KA
987 }
988 free_pgtable_page(iommu->root_entry);
989 iommu->root_entry = NULL;
990out:
991 spin_unlock_irqrestore(&iommu->lock, flags);
992}
993
b026fd28 994static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 995 unsigned long pfn, int *target_level)
ba395927 996{
ba395927
KA
997 struct dma_pte *parent, *pte = NULL;
998 int level = agaw_to_level(domain->agaw);
4399c8bf 999 int offset;
ba395927
KA
1000
1001 BUG_ON(!domain->pgd);
f9423606 1002
162d1b10 1003 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
1004 /* Address beyond IOMMU's addressing capabilities. */
1005 return NULL;
1006
ba395927
KA
1007 parent = domain->pgd;
1008
5cf0a76f 1009 while (1) {
ba395927
KA
1010 void *tmp_page;
1011
b026fd28 1012 offset = pfn_level_offset(pfn, level);
ba395927 1013 pte = &parent[offset];
5cf0a76f 1014 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 1015 break;
5cf0a76f 1016 if (level == *target_level)
ba395927
KA
1017 break;
1018
19c239ce 1019 if (!dma_pte_present(pte)) {
c85994e4
DW
1020 uint64_t pteval;
1021
4c923d47 1022 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1023
206a73c1 1024 if (!tmp_page)
ba395927 1025 return NULL;
206a73c1 1026
c85994e4 1027 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1028 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1029 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1030 /* Someone else set it while we were thinking; use theirs. */
1031 free_pgtable_page(tmp_page);
effad4b5 1032 else
c85994e4 1033 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1034 }
5cf0a76f
DW
1035 if (level == 1)
1036 break;
1037
19c239ce 1038 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1039 level--;
1040 }
1041
5cf0a76f
DW
1042 if (!*target_level)
1043 *target_level = level;
1044
ba395927
KA
1045 return pte;
1046}
1047
6dd9a7c7 1048
ba395927 1049/* return address's pte at specific level */
90dcfb5e
DW
1050static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1051 unsigned long pfn,
6dd9a7c7 1052 int level, int *large_page)
ba395927
KA
1053{
1054 struct dma_pte *parent, *pte = NULL;
1055 int total = agaw_to_level(domain->agaw);
1056 int offset;
1057
1058 parent = domain->pgd;
1059 while (level <= total) {
90dcfb5e 1060 offset = pfn_level_offset(pfn, total);
ba395927
KA
1061 pte = &parent[offset];
1062 if (level == total)
1063 return pte;
1064
6dd9a7c7
YS
1065 if (!dma_pte_present(pte)) {
1066 *large_page = total;
ba395927 1067 break;
6dd9a7c7
YS
1068 }
1069
e16922af 1070 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1071 *large_page = total;
1072 return pte;
1073 }
1074
19c239ce 1075 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1076 total--;
1077 }
1078 return NULL;
1079}
1080
ba395927 1081/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1082static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1083 unsigned long start_pfn,
1084 unsigned long last_pfn)
ba395927 1085{
6dd9a7c7 1086 unsigned int large_page = 1;
310a5ab9 1087 struct dma_pte *first_pte, *pte;
66eae846 1088
162d1b10
JL
1089 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1090 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1091 BUG_ON(start_pfn > last_pfn);
ba395927 1092
04b18e65 1093 /* we don't need lock here; nobody else touches the iova range */
59c36286 1094 do {
6dd9a7c7
YS
1095 large_page = 1;
1096 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1097 if (!pte) {
6dd9a7c7 1098 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1099 continue;
1100 }
6dd9a7c7 1101 do {
310a5ab9 1102 dma_clear_pte(pte);
6dd9a7c7 1103 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1104 pte++;
75e6bf96
DW
1105 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1106
310a5ab9
DW
1107 domain_flush_cache(domain, first_pte,
1108 (void *)pte - (void *)first_pte);
59c36286
DW
1109
1110 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1111}
1112
3269ee0b
AW
1113static void dma_pte_free_level(struct dmar_domain *domain, int level,
1114 struct dma_pte *pte, unsigned long pfn,
1115 unsigned long start_pfn, unsigned long last_pfn)
1116{
1117 pfn = max(start_pfn, pfn);
1118 pte = &pte[pfn_level_offset(pfn, level)];
1119
1120 do {
1121 unsigned long level_pfn;
1122 struct dma_pte *level_pte;
1123
1124 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1125 goto next;
1126
1127 level_pfn = pfn & level_mask(level - 1);
1128 level_pte = phys_to_virt(dma_pte_addr(pte));
1129
1130 if (level > 2)
1131 dma_pte_free_level(domain, level - 1, level_pte,
1132 level_pfn, start_pfn, last_pfn);
1133
1134 /* If range covers entire pagetable, free it */
1135 if (!(start_pfn > level_pfn ||
08336fd2 1136 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1137 dma_clear_pte(pte);
1138 domain_flush_cache(domain, pte, sizeof(*pte));
1139 free_pgtable_page(level_pte);
1140 }
1141next:
1142 pfn += level_size(level);
1143 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1144}
1145
ba395927
KA
1146/* free page table pages. last level pte should already be cleared */
1147static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1148 unsigned long start_pfn,
1149 unsigned long last_pfn)
ba395927 1150{
162d1b10
JL
1151 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1152 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1153 BUG_ON(start_pfn > last_pfn);
ba395927 1154
d41a4adb
JL
1155 dma_pte_clear_range(domain, start_pfn, last_pfn);
1156
f3a0a52f 1157 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1158 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1159 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1160
ba395927 1161 /* free pgd */
d794dc9b 1162 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1163 free_pgtable_page(domain->pgd);
1164 domain->pgd = NULL;
1165 }
1166}
1167
ea8ea460
DW
1168/* When a page at a given level is being unlinked from its parent, we don't
1169 need to *modify* it at all. All we need to do is make a list of all the
1170 pages which can be freed just as soon as we've flushed the IOTLB and we
1171 know the hardware page-walk will no longer touch them.
1172 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1173 be freed. */
1174static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1175 int level, struct dma_pte *pte,
1176 struct page *freelist)
1177{
1178 struct page *pg;
1179
1180 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1181 pg->freelist = freelist;
1182 freelist = pg;
1183
1184 if (level == 1)
1185 return freelist;
1186
adeb2590
JL
1187 pte = page_address(pg);
1188 do {
ea8ea460
DW
1189 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1190 freelist = dma_pte_list_pagetables(domain, level - 1,
1191 pte, freelist);
adeb2590
JL
1192 pte++;
1193 } while (!first_pte_in_page(pte));
ea8ea460
DW
1194
1195 return freelist;
1196}
1197
1198static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1199 struct dma_pte *pte, unsigned long pfn,
1200 unsigned long start_pfn,
1201 unsigned long last_pfn,
1202 struct page *freelist)
1203{
1204 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1205
1206 pfn = max(start_pfn, pfn);
1207 pte = &pte[pfn_level_offset(pfn, level)];
1208
1209 do {
1210 unsigned long level_pfn;
1211
1212 if (!dma_pte_present(pte))
1213 goto next;
1214
1215 level_pfn = pfn & level_mask(level);
1216
1217 /* If range covers entire pagetable, free it */
1218 if (start_pfn <= level_pfn &&
1219 last_pfn >= level_pfn + level_size(level) - 1) {
1220 /* These suborbinate page tables are going away entirely. Don't
1221 bother to clear them; we're just going to *free* them. */
1222 if (level > 1 && !dma_pte_superpage(pte))
1223 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1224
1225 dma_clear_pte(pte);
1226 if (!first_pte)
1227 first_pte = pte;
1228 last_pte = pte;
1229 } else if (level > 1) {
1230 /* Recurse down into a level that isn't *entirely* obsolete */
1231 freelist = dma_pte_clear_level(domain, level - 1,
1232 phys_to_virt(dma_pte_addr(pte)),
1233 level_pfn, start_pfn, last_pfn,
1234 freelist);
1235 }
1236next:
1237 pfn += level_size(level);
1238 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1239
1240 if (first_pte)
1241 domain_flush_cache(domain, first_pte,
1242 (void *)++last_pte - (void *)first_pte);
1243
1244 return freelist;
1245}
1246
1247/* We can't just free the pages because the IOMMU may still be walking
1248 the page tables, and may have cached the intermediate levels. The
1249 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1250static struct page *domain_unmap(struct dmar_domain *domain,
1251 unsigned long start_pfn,
1252 unsigned long last_pfn)
ea8ea460 1253{
ea8ea460
DW
1254 struct page *freelist = NULL;
1255
162d1b10
JL
1256 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1257 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1258 BUG_ON(start_pfn > last_pfn);
1259
1260 /* we don't need lock here; nobody else touches the iova range */
1261 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1262 domain->pgd, 0, start_pfn, last_pfn, NULL);
1263
1264 /* free pgd */
1265 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1266 struct page *pgd_page = virt_to_page(domain->pgd);
1267 pgd_page->freelist = freelist;
1268 freelist = pgd_page;
1269
1270 domain->pgd = NULL;
1271 }
1272
1273 return freelist;
1274}
1275
b690420a 1276static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1277{
1278 struct page *pg;
1279
1280 while ((pg = freelist)) {
1281 freelist = pg->freelist;
1282 free_pgtable_page(page_address(pg));
1283 }
1284}
1285
ba395927
KA
1286/* iommu handling */
1287static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1288{
1289 struct root_entry *root;
1290 unsigned long flags;
1291
4c923d47 1292 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1293 if (!root) {
9f10e5bf 1294 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1295 iommu->name);
ba395927 1296 return -ENOMEM;
ffebeb46 1297 }
ba395927 1298
5b6985ce 1299 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1300
1301 spin_lock_irqsave(&iommu->lock, flags);
1302 iommu->root_entry = root;
1303 spin_unlock_irqrestore(&iommu->lock, flags);
1304
1305 return 0;
1306}
1307
ba395927
KA
1308static void iommu_set_root_entry(struct intel_iommu *iommu)
1309{
03ecc32c 1310 u64 addr;
c416daa9 1311 u32 sts;
ba395927
KA
1312 unsigned long flag;
1313
03ecc32c 1314 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1315 if (ecs_enabled(iommu))
03ecc32c 1316 addr |= DMA_RTADDR_RTT;
ba395927 1317
1f5b3c3f 1318 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1319 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1320
c416daa9 1321 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1322
1323 /* Make sure hardware complete it */
1324 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1325 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1326
1f5b3c3f 1327 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1328}
1329
1330static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1331{
1332 u32 val;
1333 unsigned long flag;
1334
9af88143 1335 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1336 return;
ba395927 1337
1f5b3c3f 1338 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1339 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1340
1341 /* Make sure hardware complete it */
1342 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1343 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1344
1f5b3c3f 1345 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1346}
1347
1348/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1349static void __iommu_flush_context(struct intel_iommu *iommu,
1350 u16 did, u16 source_id, u8 function_mask,
1351 u64 type)
ba395927
KA
1352{
1353 u64 val = 0;
1354 unsigned long flag;
1355
ba395927
KA
1356 switch (type) {
1357 case DMA_CCMD_GLOBAL_INVL:
1358 val = DMA_CCMD_GLOBAL_INVL;
1359 break;
1360 case DMA_CCMD_DOMAIN_INVL:
1361 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1362 break;
1363 case DMA_CCMD_DEVICE_INVL:
1364 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1365 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1366 break;
1367 default:
1368 BUG();
1369 }
1370 val |= DMA_CCMD_ICC;
1371
1f5b3c3f 1372 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1373 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1374
1375 /* Make sure hardware complete it */
1376 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1377 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1378
1f5b3c3f 1379 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1380}
1381
ba395927 1382/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1383static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1384 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1385{
1386 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1387 u64 val = 0, val_iva = 0;
1388 unsigned long flag;
1389
ba395927
KA
1390 switch (type) {
1391 case DMA_TLB_GLOBAL_FLUSH:
1392 /* global flush doesn't need set IVA_REG */
1393 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1394 break;
1395 case DMA_TLB_DSI_FLUSH:
1396 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1397 break;
1398 case DMA_TLB_PSI_FLUSH:
1399 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1400 /* IH bit is passed in as part of address */
ba395927
KA
1401 val_iva = size_order | addr;
1402 break;
1403 default:
1404 BUG();
1405 }
1406 /* Note: set drain read/write */
1407#if 0
1408 /*
1409 * This is probably to be super secure.. Looks like we can
1410 * ignore it without any impact.
1411 */
1412 if (cap_read_drain(iommu->cap))
1413 val |= DMA_TLB_READ_DRAIN;
1414#endif
1415 if (cap_write_drain(iommu->cap))
1416 val |= DMA_TLB_WRITE_DRAIN;
1417
1f5b3c3f 1418 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1419 /* Note: Only uses first TLB reg currently */
1420 if (val_iva)
1421 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1422 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1423
1424 /* Make sure hardware complete it */
1425 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1426 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1427
1f5b3c3f 1428 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1429
1430 /* check IOTLB invalidation granularity */
1431 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1432 pr_err("Flush IOTLB failed\n");
ba395927 1433 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1434 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1435 (unsigned long long)DMA_TLB_IIRG(type),
1436 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1437}
1438
64ae892b
DW
1439static struct device_domain_info *
1440iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1441 u8 bus, u8 devfn)
93a23a72 1442{
93a23a72 1443 struct device_domain_info *info;
93a23a72 1444
55d94043
JR
1445 assert_spin_locked(&device_domain_lock);
1446
93a23a72
YZ
1447 if (!iommu->qi)
1448 return NULL;
1449
93a23a72 1450 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1451 if (info->iommu == iommu && info->bus == bus &&
1452 info->devfn == devfn) {
b16d0cb9
DW
1453 if (info->ats_supported && info->dev)
1454 return info;
93a23a72
YZ
1455 break;
1456 }
93a23a72 1457
b16d0cb9 1458 return NULL;
93a23a72
YZ
1459}
1460
1461static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1462{
fb0cc3aa
BH
1463 struct pci_dev *pdev;
1464
0bcb3e28 1465 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1466 return;
1467
fb0cc3aa 1468 pdev = to_pci_dev(info->dev);
fb0cc3aa 1469
b16d0cb9
DW
1470#ifdef CONFIG_INTEL_IOMMU_SVM
1471 /* The PCIe spec, in its wisdom, declares that the behaviour of
1472 the device if you enable PASID support after ATS support is
1473 undefined. So always enable PASID support on devices which
1474 have it, even if we can't yet know if we're ever going to
1475 use it. */
1476 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1477 info->pasid_enabled = 1;
1478
1479 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1480 info->pri_enabled = 1;
1481#endif
1482 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1483 info->ats_enabled = 1;
1484 info->ats_qdep = pci_ats_queue_depth(pdev);
1485 }
93a23a72
YZ
1486}
1487
1488static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1489{
b16d0cb9
DW
1490 struct pci_dev *pdev;
1491
da972fb1 1492 if (!dev_is_pci(info->dev))
93a23a72
YZ
1493 return;
1494
b16d0cb9
DW
1495 pdev = to_pci_dev(info->dev);
1496
1497 if (info->ats_enabled) {
1498 pci_disable_ats(pdev);
1499 info->ats_enabled = 0;
1500 }
1501#ifdef CONFIG_INTEL_IOMMU_SVM
1502 if (info->pri_enabled) {
1503 pci_disable_pri(pdev);
1504 info->pri_enabled = 0;
1505 }
1506 if (info->pasid_enabled) {
1507 pci_disable_pasid(pdev);
1508 info->pasid_enabled = 0;
1509 }
1510#endif
93a23a72
YZ
1511}
1512
1513static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1514 u64 addr, unsigned mask)
1515{
1516 u16 sid, qdep;
1517 unsigned long flags;
1518 struct device_domain_info *info;
1519
1520 spin_lock_irqsave(&device_domain_lock, flags);
1521 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1522 if (!info->ats_enabled)
93a23a72
YZ
1523 continue;
1524
1525 sid = info->bus << 8 | info->devfn;
b16d0cb9 1526 qdep = info->ats_qdep;
93a23a72
YZ
1527 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1528 }
1529 spin_unlock_irqrestore(&device_domain_lock, flags);
1530}
1531
a1ddcbe9
JR
1532static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1533 struct dmar_domain *domain,
1534 unsigned long pfn, unsigned int pages,
1535 int ih, int map)
ba395927 1536{
9dd2fe89 1537 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1538 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1539 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1540
ba395927
KA
1541 BUG_ON(pages == 0);
1542
ea8ea460
DW
1543 if (ih)
1544 ih = 1 << 6;
ba395927 1545 /*
9dd2fe89
YZ
1546 * Fallback to domain selective flush if no PSI support or the size is
1547 * too big.
ba395927
KA
1548 * PSI requires page size to be 2 ^ x, and the base address is naturally
1549 * aligned to the size
1550 */
9dd2fe89
YZ
1551 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1552 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1553 DMA_TLB_DSI_FLUSH);
9dd2fe89 1554 else
ea8ea460 1555 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1556 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1557
1558 /*
82653633
NA
1559 * In caching mode, changes of pages from non-present to present require
1560 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1561 */
82653633 1562 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1563 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1564 addr, mask);
ba395927
KA
1565}
1566
f8bab735 1567static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1568{
1569 u32 pmen;
1570 unsigned long flags;
1571
1f5b3c3f 1572 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1573 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1574 pmen &= ~DMA_PMEN_EPM;
1575 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1576
1577 /* wait for the protected region status bit to clear */
1578 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1579 readl, !(pmen & DMA_PMEN_PRS), pmen);
1580
1f5b3c3f 1581 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1582}
1583
2a41ccee 1584static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1585{
1586 u32 sts;
1587 unsigned long flags;
1588
1f5b3c3f 1589 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1590 iommu->gcmd |= DMA_GCMD_TE;
1591 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1592
1593 /* Make sure hardware complete it */
1594 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1595 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1596
1f5b3c3f 1597 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1598}
1599
2a41ccee 1600static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1601{
1602 u32 sts;
1603 unsigned long flag;
1604
1f5b3c3f 1605 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1606 iommu->gcmd &= ~DMA_GCMD_TE;
1607 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1608
1609 /* Make sure hardware complete it */
1610 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1611 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1612
1f5b3c3f 1613 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1614}
1615
3460a6d9 1616
ba395927
KA
1617static int iommu_init_domains(struct intel_iommu *iommu)
1618{
8bf47816
JR
1619 u32 ndomains, nlongs;
1620 size_t size;
ba395927
KA
1621
1622 ndomains = cap_ndoms(iommu->cap);
8bf47816 1623 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1624 iommu->name, ndomains);
ba395927
KA
1625 nlongs = BITS_TO_LONGS(ndomains);
1626
94a91b50
DD
1627 spin_lock_init(&iommu->lock);
1628
ba395927
KA
1629 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1630 if (!iommu->domain_ids) {
9f10e5bf
JR
1631 pr_err("%s: Allocating domain id array failed\n",
1632 iommu->name);
ba395927
KA
1633 return -ENOMEM;
1634 }
8bf47816
JR
1635
1636 size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1637 iommu->domains = kzalloc(size, GFP_KERNEL);
1638
1639 if (iommu->domains) {
1640 size = 256 * sizeof(struct dmar_domain *);
1641 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1642 }
1643
1644 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1645 pr_err("%s: Allocating domain array failed\n",
1646 iommu->name);
852bdb04 1647 kfree(iommu->domain_ids);
8bf47816 1648 kfree(iommu->domains);
852bdb04 1649 iommu->domain_ids = NULL;
8bf47816 1650 iommu->domains = NULL;
ba395927
KA
1651 return -ENOMEM;
1652 }
1653
8bf47816
JR
1654
1655
ba395927 1656 /*
c0e8a6c8
JR
1657 * If Caching mode is set, then invalid translations are tagged
1658 * with domain-id 0, hence we need to pre-allocate it. We also
1659 * use domain-id 0 as a marker for non-allocated domain-id, so
1660 * make sure it is not used for a real domain.
ba395927 1661 */
c0e8a6c8
JR
1662 set_bit(0, iommu->domain_ids);
1663
ba395927
KA
1664 return 0;
1665}
ba395927 1666
ffebeb46 1667static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1668{
29a27719 1669 struct device_domain_info *info, *tmp;
55d94043 1670 unsigned long flags;
ba395927 1671
29a27719
JR
1672 if (!iommu->domains || !iommu->domain_ids)
1673 return;
a4eaa86c 1674
55d94043 1675 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1676 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1677 struct dmar_domain *domain;
1678
1679 if (info->iommu != iommu)
1680 continue;
1681
1682 if (!info->dev || !info->domain)
1683 continue;
1684
1685 domain = info->domain;
1686
e6de0f8d 1687 dmar_remove_one_dev_info(domain, info->dev);
29a27719
JR
1688
1689 if (!domain_type_is_vm_or_si(domain))
1690 domain_exit(domain);
ba395927 1691 }
55d94043 1692 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1693
1694 if (iommu->gcmd & DMA_GCMD_TE)
1695 iommu_disable_translation(iommu);
ffebeb46 1696}
ba395927 1697
ffebeb46
JL
1698static void free_dmar_iommu(struct intel_iommu *iommu)
1699{
1700 if ((iommu->domains) && (iommu->domain_ids)) {
8bf47816
JR
1701 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1702 int i;
1703
1704 for (i = 0; i < elems; i++)
1705 kfree(iommu->domains[i]);
ffebeb46
JL
1706 kfree(iommu->domains);
1707 kfree(iommu->domain_ids);
1708 iommu->domains = NULL;
1709 iommu->domain_ids = NULL;
1710 }
ba395927 1711
d9630fe9
WH
1712 g_iommus[iommu->seq_id] = NULL;
1713
ba395927
KA
1714 /* free context mapping */
1715 free_context_table(iommu);
8a94ade4
DW
1716
1717#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1718 if (pasid_enabled(iommu)) {
1719 if (ecap_prs(iommu->ecap))
1720 intel_svm_finish_prq(iommu);
8a94ade4 1721 intel_svm_free_pasid_tables(iommu);
a222a7f0 1722 }
8a94ade4 1723#endif
ba395927
KA
1724}
1725
ab8dfe25 1726static struct dmar_domain *alloc_domain(int flags)
ba395927 1727{
ba395927 1728 struct dmar_domain *domain;
ba395927
KA
1729
1730 domain = alloc_domain_mem();
1731 if (!domain)
1732 return NULL;
1733
ab8dfe25 1734 memset(domain, 0, sizeof(*domain));
4c923d47 1735 domain->nid = -1;
ab8dfe25 1736 domain->flags = flags;
92d03cc8 1737 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1738
1739 return domain;
1740}
1741
d160aca5
JR
1742/* Must be called with iommu->lock */
1743static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1744 struct intel_iommu *iommu)
1745{
44bde614 1746 unsigned long ndomains;
55d94043 1747 int num;
44bde614 1748
55d94043 1749 assert_spin_locked(&device_domain_lock);
d160aca5 1750 assert_spin_locked(&iommu->lock);
ba395927 1751
29a27719
JR
1752 domain->iommu_refcnt[iommu->seq_id] += 1;
1753 domain->iommu_count += 1;
1754 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1755 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1756 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1757
1758 if (num >= ndomains) {
1759 pr_err("%s: No free domain ids\n", iommu->name);
1760 domain->iommu_refcnt[iommu->seq_id] -= 1;
1761 domain->iommu_count -= 1;
55d94043 1762 return -ENOSPC;
2c2e2c38 1763 }
ba395927 1764
d160aca5
JR
1765 set_bit(num, iommu->domain_ids);
1766 set_iommu_domain(iommu, num, domain);
1767
1768 domain->iommu_did[iommu->seq_id] = num;
1769 domain->nid = iommu->node;
fb170fb4 1770
fb170fb4
JL
1771 domain_update_iommu_cap(domain);
1772 }
d160aca5 1773
55d94043 1774 return 0;
fb170fb4
JL
1775}
1776
1777static int domain_detach_iommu(struct dmar_domain *domain,
1778 struct intel_iommu *iommu)
1779{
d160aca5 1780 int num, count = INT_MAX;
d160aca5 1781
55d94043 1782 assert_spin_locked(&device_domain_lock);
d160aca5 1783 assert_spin_locked(&iommu->lock);
fb170fb4 1784
29a27719
JR
1785 domain->iommu_refcnt[iommu->seq_id] -= 1;
1786 count = --domain->iommu_count;
1787 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1788 num = domain->iommu_did[iommu->seq_id];
1789 clear_bit(num, iommu->domain_ids);
1790 set_iommu_domain(iommu, num, NULL);
fb170fb4 1791
fb170fb4 1792 domain_update_iommu_cap(domain);
c0e8a6c8 1793 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1794 }
fb170fb4
JL
1795
1796 return count;
1797}
1798
ba395927 1799static struct iova_domain reserved_iova_list;
8a443df4 1800static struct lock_class_key reserved_rbtree_key;
ba395927 1801
51a63e67 1802static int dmar_init_reserved_ranges(void)
ba395927
KA
1803{
1804 struct pci_dev *pdev = NULL;
1805 struct iova *iova;
1806 int i;
ba395927 1807
0fb5fe87
RM
1808 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1809 DMA_32BIT_PFN);
ba395927 1810
8a443df4
MG
1811 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1812 &reserved_rbtree_key);
1813
ba395927
KA
1814 /* IOAPIC ranges shouldn't be accessed by DMA */
1815 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1816 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1817 if (!iova) {
9f10e5bf 1818 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1819 return -ENODEV;
1820 }
ba395927
KA
1821
1822 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1823 for_each_pci_dev(pdev) {
1824 struct resource *r;
1825
1826 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1827 r = &pdev->resource[i];
1828 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1829 continue;
1a4a4551
DW
1830 iova = reserve_iova(&reserved_iova_list,
1831 IOVA_PFN(r->start),
1832 IOVA_PFN(r->end));
51a63e67 1833 if (!iova) {
9f10e5bf 1834 pr_err("Reserve iova failed\n");
51a63e67
JC
1835 return -ENODEV;
1836 }
ba395927
KA
1837 }
1838 }
51a63e67 1839 return 0;
ba395927
KA
1840}
1841
1842static void domain_reserve_special_ranges(struct dmar_domain *domain)
1843{
1844 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1845}
1846
1847static inline int guestwidth_to_adjustwidth(int gaw)
1848{
1849 int agaw;
1850 int r = (gaw - 12) % 9;
1851
1852 if (r == 0)
1853 agaw = gaw;
1854 else
1855 agaw = gaw + 9 - r;
1856 if (agaw > 64)
1857 agaw = 64;
1858 return agaw;
1859}
1860
dc534b25
JR
1861static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1862 int guest_width)
ba395927 1863{
ba395927
KA
1864 int adjust_width, agaw;
1865 unsigned long sagaw;
1866
0fb5fe87
RM
1867 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1868 DMA_32BIT_PFN);
ba395927
KA
1869 domain_reserve_special_ranges(domain);
1870
1871 /* calculate AGAW */
ba395927
KA
1872 if (guest_width > cap_mgaw(iommu->cap))
1873 guest_width = cap_mgaw(iommu->cap);
1874 domain->gaw = guest_width;
1875 adjust_width = guestwidth_to_adjustwidth(guest_width);
1876 agaw = width_to_agaw(adjust_width);
1877 sagaw = cap_sagaw(iommu->cap);
1878 if (!test_bit(agaw, &sagaw)) {
1879 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1880 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1881 agaw = find_next_bit(&sagaw, 5, agaw);
1882 if (agaw >= 5)
1883 return -ENODEV;
1884 }
1885 domain->agaw = agaw;
ba395927 1886
8e604097
WH
1887 if (ecap_coherent(iommu->ecap))
1888 domain->iommu_coherency = 1;
1889 else
1890 domain->iommu_coherency = 0;
1891
58c610bd
SY
1892 if (ecap_sc_support(iommu->ecap))
1893 domain->iommu_snooping = 1;
1894 else
1895 domain->iommu_snooping = 0;
1896
214e39aa
DW
1897 if (intel_iommu_superpage)
1898 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1899 else
1900 domain->iommu_superpage = 0;
1901
4c923d47 1902 domain->nid = iommu->node;
c7151a8d 1903
ba395927 1904 /* always allocate the top pgd */
4c923d47 1905 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1906 if (!domain->pgd)
1907 return -ENOMEM;
5b6985ce 1908 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1909 return 0;
1910}
1911
1912static void domain_exit(struct dmar_domain *domain)
1913{
ea8ea460 1914 struct page *freelist = NULL;
ba395927
KA
1915
1916 /* Domain 0 is reserved, so dont process it */
1917 if (!domain)
1918 return;
1919
7b668357
AW
1920 /* Flush any lazy unmaps that may reference this domain */
1921 if (!intel_iommu_strict)
1922 flush_unmaps_timeout(0);
1923
d160aca5
JR
1924 /* Remove associated devices and clear attached or cached domains */
1925 rcu_read_lock();
ba395927 1926 domain_remove_dev_info(domain);
d160aca5 1927 rcu_read_unlock();
92d03cc8 1928
ba395927
KA
1929 /* destroy iovas */
1930 put_iova_domain(&domain->iovad);
ba395927 1931
ea8ea460 1932 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1933
ea8ea460
DW
1934 dma_free_pagelist(freelist);
1935
ba395927
KA
1936 free_domain_mem(domain);
1937}
1938
64ae892b
DW
1939static int domain_context_mapping_one(struct dmar_domain *domain,
1940 struct intel_iommu *iommu,
28ccce0d 1941 u8 bus, u8 devfn)
ba395927 1942{
c6c2cebd 1943 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1944 int translation = CONTEXT_TT_MULTI_LEVEL;
1945 struct device_domain_info *info = NULL;
ba395927 1946 struct context_entry *context;
ba395927 1947 unsigned long flags;
ea6606b0 1948 struct dma_pte *pgd;
55d94043 1949 int ret, agaw;
28ccce0d 1950
c6c2cebd
JR
1951 WARN_ON(did == 0);
1952
28ccce0d
JR
1953 if (hw_pass_through && domain_type_is_si(domain))
1954 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1955
1956 pr_debug("Set context mapping for %02x:%02x.%d\n",
1957 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1958
ba395927 1959 BUG_ON(!domain->pgd);
5331fe6f 1960
55d94043
JR
1961 spin_lock_irqsave(&device_domain_lock, flags);
1962 spin_lock(&iommu->lock);
1963
1964 ret = -ENOMEM;
03ecc32c 1965 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1966 if (!context)
55d94043 1967 goto out_unlock;
ba395927 1968
55d94043
JR
1969 ret = 0;
1970 if (context_present(context))
1971 goto out_unlock;
cf484d0e 1972
ea6606b0
WH
1973 pgd = domain->pgd;
1974
de24e553 1975 context_clear_entry(context);
c6c2cebd 1976 context_set_domain_id(context, did);
ea6606b0 1977
de24e553
JR
1978 /*
1979 * Skip top levels of page tables for iommu which has less agaw
1980 * than default. Unnecessary for PT mode.
1981 */
93a23a72 1982 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 1983 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 1984 ret = -ENOMEM;
de24e553 1985 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
1986 if (!dma_pte_present(pgd))
1987 goto out_unlock;
ea6606b0 1988 }
4ed0d3e6 1989
64ae892b 1990 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
1991 if (info && info->ats_supported)
1992 translation = CONTEXT_TT_DEV_IOTLB;
1993 else
1994 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 1995
93a23a72
YZ
1996 context_set_address_root(context, virt_to_phys(pgd));
1997 context_set_address_width(context, iommu->agaw);
de24e553
JR
1998 } else {
1999 /*
2000 * In pass through mode, AW must be programmed to
2001 * indicate the largest AGAW value supported by
2002 * hardware. And ASR is ignored by hardware.
2003 */
2004 context_set_address_width(context, iommu->msagaw);
93a23a72 2005 }
4ed0d3e6
FY
2006
2007 context_set_translation_type(context, translation);
c07e7d21
MM
2008 context_set_fault_enable(context);
2009 context_set_present(context);
5331fe6f 2010 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2011
4c25a2c1
DW
2012 /*
2013 * It's a non-present to present mapping. If hardware doesn't cache
2014 * non-present entry we only need to flush the write-buffer. If the
2015 * _does_ cache non-present entries, then it does so in the special
2016 * domain #0, which we have to flush:
2017 */
2018 if (cap_caching_mode(iommu->cap)) {
2019 iommu->flush.flush_context(iommu, 0,
2020 (((u16)bus) << 8) | devfn,
2021 DMA_CCMD_MASK_NOBIT,
2022 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2023 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2024 } else {
ba395927 2025 iommu_flush_write_buffer(iommu);
4c25a2c1 2026 }
93a23a72 2027 iommu_enable_dev_iotlb(info);
c7151a8d 2028
55d94043
JR
2029 ret = 0;
2030
2031out_unlock:
2032 spin_unlock(&iommu->lock);
2033 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2034
ba395927
KA
2035 return 0;
2036}
2037
579305f7
AW
2038struct domain_context_mapping_data {
2039 struct dmar_domain *domain;
2040 struct intel_iommu *iommu;
579305f7
AW
2041};
2042
2043static int domain_context_mapping_cb(struct pci_dev *pdev,
2044 u16 alias, void *opaque)
2045{
2046 struct domain_context_mapping_data *data = opaque;
2047
2048 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2049 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2050}
2051
ba395927 2052static int
28ccce0d 2053domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2054{
64ae892b 2055 struct intel_iommu *iommu;
156baca8 2056 u8 bus, devfn;
579305f7 2057 struct domain_context_mapping_data data;
64ae892b 2058
e1f167f3 2059 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2060 if (!iommu)
2061 return -ENODEV;
ba395927 2062
579305f7 2063 if (!dev_is_pci(dev))
28ccce0d 2064 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2065
2066 data.domain = domain;
2067 data.iommu = iommu;
579305f7
AW
2068
2069 return pci_for_each_dma_alias(to_pci_dev(dev),
2070 &domain_context_mapping_cb, &data);
2071}
2072
2073static int domain_context_mapped_cb(struct pci_dev *pdev,
2074 u16 alias, void *opaque)
2075{
2076 struct intel_iommu *iommu = opaque;
2077
2078 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2079}
2080
e1f167f3 2081static int domain_context_mapped(struct device *dev)
ba395927 2082{
5331fe6f 2083 struct intel_iommu *iommu;
156baca8 2084 u8 bus, devfn;
5331fe6f 2085
e1f167f3 2086 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2087 if (!iommu)
2088 return -ENODEV;
ba395927 2089
579305f7
AW
2090 if (!dev_is_pci(dev))
2091 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2092
579305f7
AW
2093 return !pci_for_each_dma_alias(to_pci_dev(dev),
2094 domain_context_mapped_cb, iommu);
ba395927
KA
2095}
2096
f532959b
FY
2097/* Returns a number of VTD pages, but aligned to MM page size */
2098static inline unsigned long aligned_nrpages(unsigned long host_addr,
2099 size_t size)
2100{
2101 host_addr &= ~PAGE_MASK;
2102 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2103}
2104
6dd9a7c7
YS
2105/* Return largest possible superpage level for a given mapping */
2106static inline int hardware_largepage_caps(struct dmar_domain *domain,
2107 unsigned long iov_pfn,
2108 unsigned long phy_pfn,
2109 unsigned long pages)
2110{
2111 int support, level = 1;
2112 unsigned long pfnmerge;
2113
2114 support = domain->iommu_superpage;
2115
2116 /* To use a large page, the virtual *and* physical addresses
2117 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2118 of them will mean we have to use smaller pages. So just
2119 merge them and check both at once. */
2120 pfnmerge = iov_pfn | phy_pfn;
2121
2122 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2123 pages >>= VTD_STRIDE_SHIFT;
2124 if (!pages)
2125 break;
2126 pfnmerge >>= VTD_STRIDE_SHIFT;
2127 level++;
2128 support--;
2129 }
2130 return level;
2131}
2132
9051aa02
DW
2133static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2134 struct scatterlist *sg, unsigned long phys_pfn,
2135 unsigned long nr_pages, int prot)
e1605495
DW
2136{
2137 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2138 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2139 unsigned long sg_res = 0;
6dd9a7c7
YS
2140 unsigned int largepage_lvl = 0;
2141 unsigned long lvl_pages = 0;
e1605495 2142
162d1b10 2143 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2144
2145 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2146 return -EINVAL;
2147
2148 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2149
cc4f14aa
JL
2150 if (!sg) {
2151 sg_res = nr_pages;
9051aa02
DW
2152 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2153 }
2154
6dd9a7c7 2155 while (nr_pages > 0) {
c85994e4
DW
2156 uint64_t tmp;
2157
e1605495 2158 if (!sg_res) {
f532959b 2159 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2160 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2161 sg->dma_length = sg->length;
3e6110fd 2162 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2163 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2164 }
6dd9a7c7 2165
e1605495 2166 if (!pte) {
6dd9a7c7
YS
2167 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2168
5cf0a76f 2169 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2170 if (!pte)
2171 return -ENOMEM;
6dd9a7c7 2172 /* It is large page*/
6491d4d0 2173 if (largepage_lvl > 1) {
ba2374fd
CZ
2174 unsigned long nr_superpages, end_pfn;
2175
6dd9a7c7 2176 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2177 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2178
2179 nr_superpages = sg_res / lvl_pages;
2180 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2181
d41a4adb
JL
2182 /*
2183 * Ensure that old small page tables are
ba2374fd 2184 * removed to make room for superpage(s).
d41a4adb 2185 */
ba2374fd 2186 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
6491d4d0 2187 } else {
6dd9a7c7 2188 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2189 }
6dd9a7c7 2190
e1605495
DW
2191 }
2192 /* We don't need lock here, nobody else
2193 * touches the iova range
2194 */
7766a3fb 2195 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2196 if (tmp) {
1bf20f0d 2197 static int dumps = 5;
9f10e5bf
JR
2198 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2199 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2200 if (dumps) {
2201 dumps--;
2202 debug_dma_dump_mappings(NULL);
2203 }
2204 WARN_ON(1);
2205 }
6dd9a7c7
YS
2206
2207 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2208
2209 BUG_ON(nr_pages < lvl_pages);
2210 BUG_ON(sg_res < lvl_pages);
2211
2212 nr_pages -= lvl_pages;
2213 iov_pfn += lvl_pages;
2214 phys_pfn += lvl_pages;
2215 pteval += lvl_pages * VTD_PAGE_SIZE;
2216 sg_res -= lvl_pages;
2217
2218 /* If the next PTE would be the first in a new page, then we
2219 need to flush the cache on the entries we've just written.
2220 And then we'll need to recalculate 'pte', so clear it and
2221 let it get set again in the if (!pte) block above.
2222
2223 If we're done (!nr_pages) we need to flush the cache too.
2224
2225 Also if we've been setting superpages, we may need to
2226 recalculate 'pte' and switch back to smaller pages for the
2227 end of the mapping, if the trailing size is not enough to
2228 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2229 pte++;
6dd9a7c7
YS
2230 if (!nr_pages || first_pte_in_page(pte) ||
2231 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2232 domain_flush_cache(domain, first_pte,
2233 (void *)pte - (void *)first_pte);
2234 pte = NULL;
2235 }
6dd9a7c7
YS
2236
2237 if (!sg_res && nr_pages)
e1605495
DW
2238 sg = sg_next(sg);
2239 }
2240 return 0;
2241}
2242
9051aa02
DW
2243static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2244 struct scatterlist *sg, unsigned long nr_pages,
2245 int prot)
ba395927 2246{
9051aa02
DW
2247 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2248}
6f6a00e4 2249
9051aa02
DW
2250static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2251 unsigned long phys_pfn, unsigned long nr_pages,
2252 int prot)
2253{
2254 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2255}
2256
2452d9db 2257static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2258{
c7151a8d
WH
2259 if (!iommu)
2260 return;
8c11e798
WH
2261
2262 clear_context_table(iommu, bus, devfn);
2263 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2264 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2265 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2266}
2267
109b9b04
DW
2268static inline void unlink_domain_info(struct device_domain_info *info)
2269{
2270 assert_spin_locked(&device_domain_lock);
2271 list_del(&info->link);
2272 list_del(&info->global);
2273 if (info->dev)
0bcb3e28 2274 info->dev->archdata.iommu = NULL;
109b9b04
DW
2275}
2276
ba395927
KA
2277static void domain_remove_dev_info(struct dmar_domain *domain)
2278{
3a74ca01 2279 struct device_domain_info *info, *tmp;
fb170fb4 2280 unsigned long flags;
ba395927
KA
2281
2282 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2283 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2284 __dmar_remove_one_dev_info(info);
ba395927
KA
2285 spin_unlock_irqrestore(&device_domain_lock, flags);
2286}
2287
2288/*
2289 * find_domain
1525a29a 2290 * Note: we use struct device->archdata.iommu stores the info
ba395927 2291 */
1525a29a 2292static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2293{
2294 struct device_domain_info *info;
2295
2296 /* No lock here, assumes no domain exit in normal case */
1525a29a 2297 info = dev->archdata.iommu;
ba395927
KA
2298 if (info)
2299 return info->domain;
2300 return NULL;
2301}
2302
5a8f40e8 2303static inline struct device_domain_info *
745f2586
JL
2304dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2305{
2306 struct device_domain_info *info;
2307
2308 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2309 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2310 info->devfn == devfn)
5a8f40e8 2311 return info;
745f2586
JL
2312
2313 return NULL;
2314}
2315
5db31569
JR
2316static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2317 int bus, int devfn,
2318 struct device *dev,
2319 struct dmar_domain *domain)
745f2586 2320{
5a8f40e8 2321 struct dmar_domain *found = NULL;
745f2586
JL
2322 struct device_domain_info *info;
2323 unsigned long flags;
d160aca5 2324 int ret;
745f2586
JL
2325
2326 info = alloc_devinfo_mem();
2327 if (!info)
b718cd3d 2328 return NULL;
745f2586 2329
745f2586
JL
2330 info->bus = bus;
2331 info->devfn = devfn;
b16d0cb9
DW
2332 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2333 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2334 info->ats_qdep = 0;
745f2586
JL
2335 info->dev = dev;
2336 info->domain = domain;
5a8f40e8 2337 info->iommu = iommu;
745f2586 2338
b16d0cb9
DW
2339 if (dev && dev_is_pci(dev)) {
2340 struct pci_dev *pdev = to_pci_dev(info->dev);
2341
2342 if (ecap_dev_iotlb_support(iommu->ecap) &&
2343 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2344 dmar_find_matched_atsr_unit(pdev))
2345 info->ats_supported = 1;
2346
2347 if (ecs_enabled(iommu)) {
2348 if (pasid_enabled(iommu)) {
2349 int features = pci_pasid_features(pdev);
2350 if (features >= 0)
2351 info->pasid_supported = features | 1;
2352 }
2353
2354 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2355 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2356 info->pri_supported = 1;
2357 }
2358 }
2359
745f2586
JL
2360 spin_lock_irqsave(&device_domain_lock, flags);
2361 if (dev)
0bcb3e28 2362 found = find_domain(dev);
f303e507
JR
2363
2364 if (!found) {
5a8f40e8 2365 struct device_domain_info *info2;
41e80dca 2366 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2367 if (info2) {
2368 found = info2->domain;
2369 info2->dev = dev;
2370 }
5a8f40e8 2371 }
f303e507 2372
745f2586
JL
2373 if (found) {
2374 spin_unlock_irqrestore(&device_domain_lock, flags);
2375 free_devinfo_mem(info);
b718cd3d
DW
2376 /* Caller must free the original domain */
2377 return found;
745f2586
JL
2378 }
2379
d160aca5
JR
2380 spin_lock(&iommu->lock);
2381 ret = domain_attach_iommu(domain, iommu);
2382 spin_unlock(&iommu->lock);
2383
2384 if (ret) {
c6c2cebd 2385 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2386 free_devinfo_mem(info);
c6c2cebd
JR
2387 return NULL;
2388 }
c6c2cebd 2389
b718cd3d
DW
2390 list_add(&info->link, &domain->devices);
2391 list_add(&info->global, &device_domain_list);
2392 if (dev)
2393 dev->archdata.iommu = info;
2394 spin_unlock_irqrestore(&device_domain_lock, flags);
2395
cc4e2575
JR
2396 if (dev && domain_context_mapping(domain, dev)) {
2397 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2398 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2399 return NULL;
2400 }
2401
b718cd3d 2402 return domain;
745f2586
JL
2403}
2404
579305f7
AW
2405static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2406{
2407 *(u16 *)opaque = alias;
2408 return 0;
2409}
2410
ba395927 2411/* domain is initialized */
146922ec 2412static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2413{
cc4e2575 2414 struct device_domain_info *info = NULL;
579305f7
AW
2415 struct dmar_domain *domain, *tmp;
2416 struct intel_iommu *iommu;
08a7f456 2417 u16 req_id, dma_alias;
ba395927 2418 unsigned long flags;
aa4d066a 2419 u8 bus, devfn;
ba395927 2420
146922ec 2421 domain = find_domain(dev);
ba395927
KA
2422 if (domain)
2423 return domain;
2424
579305f7
AW
2425 iommu = device_to_iommu(dev, &bus, &devfn);
2426 if (!iommu)
2427 return NULL;
2428
08a7f456
JR
2429 req_id = ((u16)bus << 8) | devfn;
2430
146922ec
DW
2431 if (dev_is_pci(dev)) {
2432 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2433
579305f7
AW
2434 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2435
2436 spin_lock_irqsave(&device_domain_lock, flags);
2437 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2438 PCI_BUS_NUM(dma_alias),
2439 dma_alias & 0xff);
2440 if (info) {
2441 iommu = info->iommu;
2442 domain = info->domain;
5a8f40e8 2443 }
579305f7 2444 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2445
579305f7
AW
2446 /* DMA alias already has a domain, uses it */
2447 if (info)
2448 goto found_domain;
2449 }
ba395927 2450
146922ec 2451 /* Allocate and initialize new domain for the device */
ab8dfe25 2452 domain = alloc_domain(0);
745f2586 2453 if (!domain)
579305f7 2454 return NULL;
dc534b25 2455 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2456 domain_exit(domain);
2457 return NULL;
2c2e2c38 2458 }
ba395927 2459
579305f7 2460 /* register PCI DMA alias device */
0b74ecdf 2461 if (dev_is_pci(dev) && req_id != dma_alias) {
5db31569
JR
2462 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2463 dma_alias & 0xff, NULL, domain);
579305f7
AW
2464
2465 if (!tmp || tmp != domain) {
2466 domain_exit(domain);
2467 domain = tmp;
2468 }
2469
b718cd3d 2470 if (!domain)
579305f7 2471 return NULL;
ba395927
KA
2472 }
2473
2474found_domain:
5db31569 2475 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
579305f7
AW
2476
2477 if (!tmp || tmp != domain) {
2478 domain_exit(domain);
2479 domain = tmp;
2480 }
b718cd3d
DW
2481
2482 return domain;
ba395927
KA
2483}
2484
b213203e
DW
2485static int iommu_domain_identity_map(struct dmar_domain *domain,
2486 unsigned long long start,
2487 unsigned long long end)
ba395927 2488{
c5395d5c
DW
2489 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2490 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2491
2492 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2493 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2494 pr_err("Reserving iova failed\n");
b213203e 2495 return -ENOMEM;
ba395927
KA
2496 }
2497
af1089ce 2498 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2499 /*
2500 * RMRR range might have overlap with physical memory range,
2501 * clear it first
2502 */
c5395d5c 2503 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2504
c5395d5c
DW
2505 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2506 last_vpfn - first_vpfn + 1,
61df7443 2507 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2508}
2509
d66ce54b
JR
2510static int domain_prepare_identity_map(struct device *dev,
2511 struct dmar_domain *domain,
2512 unsigned long long start,
2513 unsigned long long end)
b213203e 2514{
19943b0e
DW
2515 /* For _hardware_ passthrough, don't bother. But for software
2516 passthrough, we do it anyway -- it may indicate a memory
2517 range which is reserved in E820, so which didn't get set
2518 up to start with in si_domain */
2519 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2520 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2521 dev_name(dev), start, end);
19943b0e
DW
2522 return 0;
2523 }
2524
9f10e5bf
JR
2525 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2526 dev_name(dev), start, end);
2527
5595b528
DW
2528 if (end < start) {
2529 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2530 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2531 dmi_get_system_info(DMI_BIOS_VENDOR),
2532 dmi_get_system_info(DMI_BIOS_VERSION),
2533 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2534 return -EIO;
5595b528
DW
2535 }
2536
2ff729f5
DW
2537 if (end >> agaw_to_width(domain->agaw)) {
2538 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2539 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2540 agaw_to_width(domain->agaw),
2541 dmi_get_system_info(DMI_BIOS_VENDOR),
2542 dmi_get_system_info(DMI_BIOS_VERSION),
2543 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2544 return -EIO;
2ff729f5 2545 }
19943b0e 2546
d66ce54b
JR
2547 return iommu_domain_identity_map(domain, start, end);
2548}
ba395927 2549
d66ce54b
JR
2550static int iommu_prepare_identity_map(struct device *dev,
2551 unsigned long long start,
2552 unsigned long long end)
2553{
2554 struct dmar_domain *domain;
2555 int ret;
2556
2557 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2558 if (!domain)
2559 return -ENOMEM;
2560
2561 ret = domain_prepare_identity_map(dev, domain, start, end);
2562 if (ret)
2563 domain_exit(domain);
b213203e 2564
ba395927 2565 return ret;
ba395927
KA
2566}
2567
2568static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2569 struct device *dev)
ba395927 2570{
0b9d9753 2571 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2572 return 0;
0b9d9753
DW
2573 return iommu_prepare_identity_map(dev, rmrr->base_address,
2574 rmrr->end_address);
ba395927
KA
2575}
2576
d3f13810 2577#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2578static inline void iommu_prepare_isa(void)
2579{
2580 struct pci_dev *pdev;
2581 int ret;
2582
2583 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2584 if (!pdev)
2585 return;
2586
9f10e5bf 2587 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2588 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2589
2590 if (ret)
9f10e5bf 2591 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2592
9b27e82d 2593 pci_dev_put(pdev);
49a0429e
KA
2594}
2595#else
2596static inline void iommu_prepare_isa(void)
2597{
2598 return;
2599}
d3f13810 2600#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2601
2c2e2c38 2602static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2603
071e1374 2604static int __init si_domain_init(int hw)
2c2e2c38 2605{
c7ab48d2 2606 int nid, ret = 0;
2c2e2c38 2607
ab8dfe25 2608 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2609 if (!si_domain)
2610 return -EFAULT;
2611
2c2e2c38
FY
2612 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2613 domain_exit(si_domain);
2614 return -EFAULT;
2615 }
2616
0dc79715 2617 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2618
19943b0e
DW
2619 if (hw)
2620 return 0;
2621
c7ab48d2 2622 for_each_online_node(nid) {
5dfe8660
TH
2623 unsigned long start_pfn, end_pfn;
2624 int i;
2625
2626 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2627 ret = iommu_domain_identity_map(si_domain,
2628 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2629 if (ret)
2630 return ret;
2631 }
c7ab48d2
DW
2632 }
2633
2c2e2c38
FY
2634 return 0;
2635}
2636
9b226624 2637static int identity_mapping(struct device *dev)
2c2e2c38
FY
2638{
2639 struct device_domain_info *info;
2640
2641 if (likely(!iommu_identity_mapping))
2642 return 0;
2643
9b226624 2644 info = dev->archdata.iommu;
cb452a40
MT
2645 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2646 return (info->domain == si_domain);
2c2e2c38 2647
2c2e2c38
FY
2648 return 0;
2649}
2650
28ccce0d 2651static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2652{
0ac72664 2653 struct dmar_domain *ndomain;
5a8f40e8 2654 struct intel_iommu *iommu;
156baca8 2655 u8 bus, devfn;
2c2e2c38 2656
5913c9bf 2657 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2658 if (!iommu)
2659 return -ENODEV;
2660
5db31569 2661 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2662 if (ndomain != domain)
2663 return -EBUSY;
2c2e2c38
FY
2664
2665 return 0;
2666}
2667
0b9d9753 2668static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2669{
2670 struct dmar_rmrr_unit *rmrr;
832bd858 2671 struct device *tmp;
ea2447f7
TM
2672 int i;
2673
0e242612 2674 rcu_read_lock();
ea2447f7 2675 for_each_rmrr_units(rmrr) {
b683b230
JL
2676 /*
2677 * Return TRUE if this RMRR contains the device that
2678 * is passed in.
2679 */
2680 for_each_active_dev_scope(rmrr->devices,
2681 rmrr->devices_cnt, i, tmp)
0b9d9753 2682 if (tmp == dev) {
0e242612 2683 rcu_read_unlock();
ea2447f7 2684 return true;
b683b230 2685 }
ea2447f7 2686 }
0e242612 2687 rcu_read_unlock();
ea2447f7
TM
2688 return false;
2689}
2690
c875d2c1
AW
2691/*
2692 * There are a couple cases where we need to restrict the functionality of
2693 * devices associated with RMRRs. The first is when evaluating a device for
2694 * identity mapping because problems exist when devices are moved in and out
2695 * of domains and their respective RMRR information is lost. This means that
2696 * a device with associated RMRRs will never be in a "passthrough" domain.
2697 * The second is use of the device through the IOMMU API. This interface
2698 * expects to have full control of the IOVA space for the device. We cannot
2699 * satisfy both the requirement that RMRR access is maintained and have an
2700 * unencumbered IOVA space. We also have no ability to quiesce the device's
2701 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2702 * We therefore prevent devices associated with an RMRR from participating in
2703 * the IOMMU API, which eliminates them from device assignment.
2704 *
2705 * In both cases we assume that PCI USB devices with RMRRs have them largely
2706 * for historical reasons and that the RMRR space is not actively used post
2707 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2708 *
2709 * The same exception is made for graphics devices, with the requirement that
2710 * any use of the RMRR regions will be torn down before assigning the device
2711 * to a guest.
c875d2c1
AW
2712 */
2713static bool device_is_rmrr_locked(struct device *dev)
2714{
2715 if (!device_has_rmrr(dev))
2716 return false;
2717
2718 if (dev_is_pci(dev)) {
2719 struct pci_dev *pdev = to_pci_dev(dev);
2720
18436afd 2721 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2722 return false;
2723 }
2724
2725 return true;
2726}
2727
3bdb2591 2728static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2729{
ea2447f7 2730
3bdb2591
DW
2731 if (dev_is_pci(dev)) {
2732 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2733
c875d2c1 2734 if (device_is_rmrr_locked(dev))
3bdb2591 2735 return 0;
e0fc7e0b 2736
3bdb2591
DW
2737 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2738 return 1;
e0fc7e0b 2739
3bdb2591
DW
2740 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2741 return 1;
6941af28 2742
3bdb2591 2743 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2744 return 0;
3bdb2591
DW
2745
2746 /*
2747 * We want to start off with all devices in the 1:1 domain, and
2748 * take them out later if we find they can't access all of memory.
2749 *
2750 * However, we can't do this for PCI devices behind bridges,
2751 * because all PCI devices behind the same bridge will end up
2752 * with the same source-id on their transactions.
2753 *
2754 * Practically speaking, we can't change things around for these
2755 * devices at run-time, because we can't be sure there'll be no
2756 * DMA transactions in flight for any of their siblings.
2757 *
2758 * So PCI devices (unless they're on the root bus) as well as
2759 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2760 * the 1:1 domain, just in _case_ one of their siblings turns out
2761 * not to be able to map all of memory.
2762 */
2763 if (!pci_is_pcie(pdev)) {
2764 if (!pci_is_root_bus(pdev->bus))
2765 return 0;
2766 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2767 return 0;
2768 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2769 return 0;
3bdb2591
DW
2770 } else {
2771 if (device_has_rmrr(dev))
2772 return 0;
2773 }
3dfc813d 2774
3bdb2591 2775 /*
3dfc813d 2776 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2777 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2778 * take them out of the 1:1 domain later.
2779 */
8fcc5372
CW
2780 if (!startup) {
2781 /*
2782 * If the device's dma_mask is less than the system's memory
2783 * size then this is not a candidate for identity mapping.
2784 */
3bdb2591 2785 u64 dma_mask = *dev->dma_mask;
8fcc5372 2786
3bdb2591
DW
2787 if (dev->coherent_dma_mask &&
2788 dev->coherent_dma_mask < dma_mask)
2789 dma_mask = dev->coherent_dma_mask;
8fcc5372 2790
3bdb2591 2791 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2792 }
6941af28
DW
2793
2794 return 1;
2795}
2796
cf04eee8
DW
2797static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2798{
2799 int ret;
2800
2801 if (!iommu_should_identity_map(dev, 1))
2802 return 0;
2803
28ccce0d 2804 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2805 if (!ret)
9f10e5bf
JR
2806 pr_info("%s identity mapping for device %s\n",
2807 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2808 else if (ret == -ENODEV)
2809 /* device not associated with an iommu */
2810 ret = 0;
2811
2812 return ret;
2813}
2814
2815
071e1374 2816static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2817{
2c2e2c38 2818 struct pci_dev *pdev = NULL;
cf04eee8
DW
2819 struct dmar_drhd_unit *drhd;
2820 struct intel_iommu *iommu;
2821 struct device *dev;
2822 int i;
2823 int ret = 0;
2c2e2c38 2824
2c2e2c38 2825 for_each_pci_dev(pdev) {
cf04eee8
DW
2826 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2827 if (ret)
2828 return ret;
2829 }
2830
2831 for_each_active_iommu(iommu, drhd)
2832 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2833 struct acpi_device_physical_node *pn;
2834 struct acpi_device *adev;
2835
2836 if (dev->bus != &acpi_bus_type)
2837 continue;
86080ccc 2838
cf04eee8
DW
2839 adev= to_acpi_device(dev);
2840 mutex_lock(&adev->physical_node_lock);
2841 list_for_each_entry(pn, &adev->physical_node_list, node) {
2842 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2843 if (ret)
2844 break;
eae460b6 2845 }
cf04eee8
DW
2846 mutex_unlock(&adev->physical_node_lock);
2847 if (ret)
2848 return ret;
62edf5dc 2849 }
2c2e2c38
FY
2850
2851 return 0;
2852}
2853
ffebeb46
JL
2854static void intel_iommu_init_qi(struct intel_iommu *iommu)
2855{
2856 /*
2857 * Start from the sane iommu hardware state.
2858 * If the queued invalidation is already initialized by us
2859 * (for example, while enabling interrupt-remapping) then
2860 * we got the things already rolling from a sane state.
2861 */
2862 if (!iommu->qi) {
2863 /*
2864 * Clear any previous faults.
2865 */
2866 dmar_fault(-1, iommu);
2867 /*
2868 * Disable queued invalidation if supported and already enabled
2869 * before OS handover.
2870 */
2871 dmar_disable_qi(iommu);
2872 }
2873
2874 if (dmar_enable_qi(iommu)) {
2875 /*
2876 * Queued Invalidate not enabled, use Register Based Invalidate
2877 */
2878 iommu->flush.flush_context = __iommu_flush_context;
2879 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2880 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2881 iommu->name);
2882 } else {
2883 iommu->flush.flush_context = qi_flush_context;
2884 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2885 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2886 }
2887}
2888
091d42e4 2889static int copy_context_table(struct intel_iommu *iommu,
dfddb969 2890 struct root_entry *old_re,
091d42e4
JR
2891 struct context_entry **tbl,
2892 int bus, bool ext)
2893{
dbcd861f 2894 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 2895 struct context_entry *new_ce = NULL, ce;
dfddb969 2896 struct context_entry *old_ce = NULL;
543c8dcf 2897 struct root_entry re;
091d42e4
JR
2898 phys_addr_t old_ce_phys;
2899
2900 tbl_idx = ext ? bus * 2 : bus;
dfddb969 2901 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
2902
2903 for (devfn = 0; devfn < 256; devfn++) {
2904 /* First calculate the correct index */
2905 idx = (ext ? devfn * 2 : devfn) % 256;
2906
2907 if (idx == 0) {
2908 /* First save what we may have and clean up */
2909 if (new_ce) {
2910 tbl[tbl_idx] = new_ce;
2911 __iommu_flush_cache(iommu, new_ce,
2912 VTD_PAGE_SIZE);
2913 pos = 1;
2914 }
2915
2916 if (old_ce)
2917 iounmap(old_ce);
2918
2919 ret = 0;
2920 if (devfn < 0x80)
543c8dcf 2921 old_ce_phys = root_entry_lctp(&re);
091d42e4 2922 else
543c8dcf 2923 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
2924
2925 if (!old_ce_phys) {
2926 if (ext && devfn == 0) {
2927 /* No LCTP, try UCTP */
2928 devfn = 0x7f;
2929 continue;
2930 } else {
2931 goto out;
2932 }
2933 }
2934
2935 ret = -ENOMEM;
dfddb969
DW
2936 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2937 MEMREMAP_WB);
091d42e4
JR
2938 if (!old_ce)
2939 goto out;
2940
2941 new_ce = alloc_pgtable_page(iommu->node);
2942 if (!new_ce)
2943 goto out_unmap;
2944
2945 ret = 0;
2946 }
2947
2948 /* Now copy the context entry */
dfddb969 2949 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 2950
cf484d0e 2951 if (!__context_present(&ce))
091d42e4
JR
2952 continue;
2953
dbcd861f
JR
2954 did = context_domain_id(&ce);
2955 if (did >= 0 && did < cap_ndoms(iommu->cap))
2956 set_bit(did, iommu->domain_ids);
2957
cf484d0e
JR
2958 /*
2959 * We need a marker for copied context entries. This
2960 * marker needs to work for the old format as well as
2961 * for extended context entries.
2962 *
2963 * Bit 67 of the context entry is used. In the old
2964 * format this bit is available to software, in the
2965 * extended format it is the PGE bit, but PGE is ignored
2966 * by HW if PASIDs are disabled (and thus still
2967 * available).
2968 *
2969 * So disable PASIDs first and then mark the entry
2970 * copied. This means that we don't copy PASID
2971 * translations from the old kernel, but this is fine as
2972 * faults there are not fatal.
2973 */
2974 context_clear_pasid_enable(&ce);
2975 context_set_copied(&ce);
2976
091d42e4
JR
2977 new_ce[idx] = ce;
2978 }
2979
2980 tbl[tbl_idx + pos] = new_ce;
2981
2982 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2983
2984out_unmap:
dfddb969 2985 memunmap(old_ce);
091d42e4
JR
2986
2987out:
2988 return ret;
2989}
2990
2991static int copy_translation_tables(struct intel_iommu *iommu)
2992{
2993 struct context_entry **ctxt_tbls;
dfddb969 2994 struct root_entry *old_rt;
091d42e4
JR
2995 phys_addr_t old_rt_phys;
2996 int ctxt_table_entries;
2997 unsigned long flags;
2998 u64 rtaddr_reg;
2999 int bus, ret;
c3361f2f 3000 bool new_ext, ext;
091d42e4
JR
3001
3002 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3003 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3004 new_ext = !!ecap_ecs(iommu->ecap);
3005
3006 /*
3007 * The RTT bit can only be changed when translation is disabled,
3008 * but disabling translation means to open a window for data
3009 * corruption. So bail out and don't copy anything if we would
3010 * have to change the bit.
3011 */
3012 if (new_ext != ext)
3013 return -EINVAL;
091d42e4
JR
3014
3015 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3016 if (!old_rt_phys)
3017 return -EINVAL;
3018
dfddb969 3019 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3020 if (!old_rt)
3021 return -ENOMEM;
3022
3023 /* This is too big for the stack - allocate it from slab */
3024 ctxt_table_entries = ext ? 512 : 256;
3025 ret = -ENOMEM;
3026 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3027 if (!ctxt_tbls)
3028 goto out_unmap;
3029
3030 for (bus = 0; bus < 256; bus++) {
3031 ret = copy_context_table(iommu, &old_rt[bus],
3032 ctxt_tbls, bus, ext);
3033 if (ret) {
3034 pr_err("%s: Failed to copy context table for bus %d\n",
3035 iommu->name, bus);
3036 continue;
3037 }
3038 }
3039
3040 spin_lock_irqsave(&iommu->lock, flags);
3041
3042 /* Context tables are copied, now write them to the root_entry table */
3043 for (bus = 0; bus < 256; bus++) {
3044 int idx = ext ? bus * 2 : bus;
3045 u64 val;
3046
3047 if (ctxt_tbls[idx]) {
3048 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3049 iommu->root_entry[bus].lo = val;
3050 }
3051
3052 if (!ext || !ctxt_tbls[idx + 1])
3053 continue;
3054
3055 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3056 iommu->root_entry[bus].hi = val;
3057 }
3058
3059 spin_unlock_irqrestore(&iommu->lock, flags);
3060
3061 kfree(ctxt_tbls);
3062
3063 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3064
3065 ret = 0;
3066
3067out_unmap:
dfddb969 3068 memunmap(old_rt);
091d42e4
JR
3069
3070 return ret;
3071}
3072
b779260b 3073static int __init init_dmars(void)
ba395927
KA
3074{
3075 struct dmar_drhd_unit *drhd;
3076 struct dmar_rmrr_unit *rmrr;
a87f4918 3077 bool copied_tables = false;
832bd858 3078 struct device *dev;
ba395927 3079 struct intel_iommu *iommu;
9d783ba0 3080 int i, ret;
2c2e2c38 3081
ba395927
KA
3082 /*
3083 * for each drhd
3084 * allocate root
3085 * initialize and program root entry to not present
3086 * endfor
3087 */
3088 for_each_drhd_unit(drhd) {
5e0d2a6f 3089 /*
3090 * lock not needed as this is only incremented in the single
3091 * threaded kernel __init code path all other access are read
3092 * only
3093 */
78d8e704 3094 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3095 g_num_of_iommus++;
3096 continue;
3097 }
9f10e5bf 3098 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3099 }
3100
ffebeb46
JL
3101 /* Preallocate enough resources for IOMMU hot-addition */
3102 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3103 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3104
d9630fe9
WH
3105 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3106 GFP_KERNEL);
3107 if (!g_iommus) {
9f10e5bf 3108 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3109 ret = -ENOMEM;
3110 goto error;
3111 }
3112
80b20dd8 3113 deferred_flush = kzalloc(g_num_of_iommus *
3114 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3115 if (!deferred_flush) {
5e0d2a6f 3116 ret = -ENOMEM;
989d51fc 3117 goto free_g_iommus;
5e0d2a6f 3118 }
3119
7c919779 3120 for_each_active_iommu(iommu, drhd) {
d9630fe9 3121 g_iommus[iommu->seq_id] = iommu;
ba395927 3122
b63d80d1
JR
3123 intel_iommu_init_qi(iommu);
3124
e61d98d8
SS
3125 ret = iommu_init_domains(iommu);
3126 if (ret)
989d51fc 3127 goto free_iommu;
e61d98d8 3128
4158c2ec
JR
3129 init_translation_status(iommu);
3130
091d42e4
JR
3131 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3132 iommu_disable_translation(iommu);
3133 clear_translation_pre_enabled(iommu);
3134 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3135 iommu->name);
3136 }
4158c2ec 3137
ba395927
KA
3138 /*
3139 * TBD:
3140 * we could share the same root & context tables
25985edc 3141 * among all IOMMU's. Need to Split it later.
ba395927
KA
3142 */
3143 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3144 if (ret)
989d51fc 3145 goto free_iommu;
5f0a7f76 3146
091d42e4
JR
3147 if (translation_pre_enabled(iommu)) {
3148 pr_info("Translation already enabled - trying to copy translation structures\n");
3149
3150 ret = copy_translation_tables(iommu);
3151 if (ret) {
3152 /*
3153 * We found the IOMMU with translation
3154 * enabled - but failed to copy over the
3155 * old root-entry table. Try to proceed
3156 * by disabling translation now and
3157 * allocating a clean root-entry table.
3158 * This might cause DMAR faults, but
3159 * probably the dump will still succeed.
3160 */
3161 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3162 iommu->name);
3163 iommu_disable_translation(iommu);
3164 clear_translation_pre_enabled(iommu);
3165 } else {
3166 pr_info("Copied translation tables from previous kernel for %s\n",
3167 iommu->name);
a87f4918 3168 copied_tables = true;
091d42e4
JR
3169 }
3170 }
3171
5f0a7f76
JR
3172 iommu_flush_write_buffer(iommu);
3173 iommu_set_root_entry(iommu);
3174 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3175 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3176
4ed0d3e6 3177 if (!ecap_pass_through(iommu->ecap))
19943b0e 3178 hw_pass_through = 0;
8a94ade4
DW
3179#ifdef CONFIG_INTEL_IOMMU_SVM
3180 if (pasid_enabled(iommu))
3181 intel_svm_alloc_pasid_tables(iommu);
3182#endif
ba395927
KA
3183 }
3184
19943b0e 3185 if (iommu_pass_through)
e0fc7e0b
DW
3186 iommu_identity_mapping |= IDENTMAP_ALL;
3187
d3f13810 3188#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3189 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3190#endif
e0fc7e0b 3191
86080ccc
JR
3192 if (iommu_identity_mapping) {
3193 ret = si_domain_init(hw_pass_through);
3194 if (ret)
3195 goto free_iommu;
3196 }
3197
e0fc7e0b
DW
3198 check_tylersburg_isoch();
3199
a87f4918
JR
3200 /*
3201 * If we copied translations from a previous kernel in the kdump
3202 * case, we can not assign the devices to domains now, as that
3203 * would eliminate the old mappings. So skip this part and defer
3204 * the assignment to device driver initialization time.
3205 */
3206 if (copied_tables)
3207 goto domains_done;
3208
ba395927 3209 /*
19943b0e
DW
3210 * If pass through is not set or not enabled, setup context entries for
3211 * identity mappings for rmrr, gfx, and isa and may fall back to static
3212 * identity mapping if iommu_identity_mapping is set.
ba395927 3213 */
19943b0e
DW
3214 if (iommu_identity_mapping) {
3215 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3216 if (ret) {
9f10e5bf 3217 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3218 goto free_iommu;
ba395927
KA
3219 }
3220 }
ba395927 3221 /*
19943b0e
DW
3222 * For each rmrr
3223 * for each dev attached to rmrr
3224 * do
3225 * locate drhd for dev, alloc domain for dev
3226 * allocate free domain
3227 * allocate page table entries for rmrr
3228 * if context not allocated for bus
3229 * allocate and init context
3230 * set present in root table for this bus
3231 * init context with domain, translation etc
3232 * endfor
3233 * endfor
ba395927 3234 */
9f10e5bf 3235 pr_info("Setting RMRR:\n");
19943b0e 3236 for_each_rmrr_units(rmrr) {
b683b230
JL
3237 /* some BIOS lists non-exist devices in DMAR table. */
3238 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3239 i, dev) {
0b9d9753 3240 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3241 if (ret)
9f10e5bf 3242 pr_err("Mapping reserved region failed\n");
ba395927 3243 }
4ed0d3e6 3244 }
49a0429e 3245
19943b0e
DW
3246 iommu_prepare_isa();
3247
a87f4918
JR
3248domains_done:
3249
ba395927
KA
3250 /*
3251 * for each drhd
3252 * enable fault log
3253 * global invalidate context cache
3254 * global invalidate iotlb
3255 * enable translation
3256 */
7c919779 3257 for_each_iommu(iommu, drhd) {
51a63e67
JC
3258 if (drhd->ignored) {
3259 /*
3260 * we always have to disable PMRs or DMA may fail on
3261 * this device
3262 */
3263 if (force_on)
7c919779 3264 iommu_disable_protect_mem_regions(iommu);
ba395927 3265 continue;
51a63e67 3266 }
ba395927
KA
3267
3268 iommu_flush_write_buffer(iommu);
3269
a222a7f0
DW
3270#ifdef CONFIG_INTEL_IOMMU_SVM
3271 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3272 ret = intel_svm_enable_prq(iommu);
3273 if (ret)
3274 goto free_iommu;
3275 }
3276#endif
3460a6d9
KA
3277 ret = dmar_set_interrupt(iommu);
3278 if (ret)
989d51fc 3279 goto free_iommu;
3460a6d9 3280
8939ddf6
JR
3281 if (!translation_pre_enabled(iommu))
3282 iommu_enable_translation(iommu);
3283
b94996c9 3284 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3285 }
3286
3287 return 0;
989d51fc
JL
3288
3289free_iommu:
ffebeb46
JL
3290 for_each_active_iommu(iommu, drhd) {
3291 disable_dmar_iommu(iommu);
a868e6b7 3292 free_dmar_iommu(iommu);
ffebeb46 3293 }
9bdc531e 3294 kfree(deferred_flush);
989d51fc 3295free_g_iommus:
d9630fe9 3296 kfree(g_iommus);
989d51fc 3297error:
ba395927
KA
3298 return ret;
3299}
3300
5a5e02a6 3301/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
3302static struct iova *intel_alloc_iova(struct device *dev,
3303 struct dmar_domain *domain,
3304 unsigned long nrpages, uint64_t dma_mask)
ba395927 3305{
ba395927 3306 struct iova *iova = NULL;
ba395927 3307
875764de
DW
3308 /* Restrict dma_mask to the width that the iommu can handle */
3309 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3310 /* Ensure we reserve the whole size-aligned region */
3311 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3312
3313 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3314 /*
3315 * First try to allocate an io virtual address in
284901a9 3316 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3317 * from higher range
ba395927 3318 */
875764de
DW
3319 iova = alloc_iova(&domain->iovad, nrpages,
3320 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3321 if (iova)
3322 return iova;
3323 }
3324 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3325 if (unlikely(!iova)) {
9f10e5bf 3326 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3327 nrpages, dev_name(dev));
f76aec76
KA
3328 return NULL;
3329 }
3330
3331 return iova;
3332}
3333
d4b709f4 3334static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76 3335{
b1ce5b79 3336 struct dmar_rmrr_unit *rmrr;
f76aec76 3337 struct dmar_domain *domain;
b1ce5b79
JR
3338 struct device *i_dev;
3339 int i, ret;
f76aec76 3340
d4b709f4 3341 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 3342 if (!domain) {
9f10e5bf 3343 pr_err("Allocating domain for %s failed\n",
d4b709f4 3344 dev_name(dev));
4fe05bbc 3345 return NULL;
ba395927
KA
3346 }
3347
b1ce5b79
JR
3348 /* We have a new domain - setup possible RMRRs for the device */
3349 rcu_read_lock();
3350 for_each_rmrr_units(rmrr) {
3351 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3352 i, i_dev) {
3353 if (i_dev != dev)
3354 continue;
3355
3356 ret = domain_prepare_identity_map(dev, domain,
3357 rmrr->base_address,
3358 rmrr->end_address);
3359 if (ret)
3360 dev_err(dev, "Mapping reserved region failed\n");
3361 }
3362 }
3363 rcu_read_unlock();
3364
f76aec76
KA
3365 return domain;
3366}
3367
d4b709f4 3368static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3369{
3370 struct device_domain_info *info;
3371
3372 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3373 info = dev->archdata.iommu;
147202aa
DW
3374 if (likely(info))
3375 return info->domain;
3376
3377 return __get_valid_domain_for_dev(dev);
3378}
3379
ecb509ec 3380/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3381static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3382{
3383 int found;
3384
3d89194a 3385 if (iommu_dummy(dev))
1e4c64c4
DW
3386 return 1;
3387
2c2e2c38 3388 if (!iommu_identity_mapping)
1e4c64c4 3389 return 0;
2c2e2c38 3390
9b226624 3391 found = identity_mapping(dev);
2c2e2c38 3392 if (found) {
ecb509ec 3393 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3394 return 1;
3395 else {
3396 /*
3397 * 32 bit DMA is removed from si_domain and fall back
3398 * to non-identity mapping.
3399 */
e6de0f8d 3400 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3401 pr_info("32bit %s uses non-identity mapping\n",
3402 dev_name(dev));
2c2e2c38
FY
3403 return 0;
3404 }
3405 } else {
3406 /*
3407 * In case of a detached 64 bit DMA device from vm, the device
3408 * is put into si_domain for identity mapping.
3409 */
ecb509ec 3410 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3411 int ret;
28ccce0d 3412 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3413 if (!ret) {
9f10e5bf
JR
3414 pr_info("64bit %s uses identity mapping\n",
3415 dev_name(dev));
2c2e2c38
FY
3416 return 1;
3417 }
3418 }
3419 }
3420
1e4c64c4 3421 return 0;
2c2e2c38
FY
3422}
3423
5040a918 3424static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3425 size_t size, int dir, u64 dma_mask)
f76aec76 3426{
f76aec76 3427 struct dmar_domain *domain;
5b6985ce 3428 phys_addr_t start_paddr;
f76aec76
KA
3429 struct iova *iova;
3430 int prot = 0;
6865f0d1 3431 int ret;
8c11e798 3432 struct intel_iommu *iommu;
33041ec0 3433 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3434
3435 BUG_ON(dir == DMA_NONE);
2c2e2c38 3436
5040a918 3437 if (iommu_no_mapping(dev))
6865f0d1 3438 return paddr;
f76aec76 3439
5040a918 3440 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3441 if (!domain)
3442 return 0;
3443
8c11e798 3444 iommu = domain_get_iommu(domain);
88cb6a74 3445 size = aligned_nrpages(paddr, size);
f76aec76 3446
5040a918 3447 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3448 if (!iova)
3449 goto error;
3450
ba395927
KA
3451 /*
3452 * Check if DMAR supports zero-length reads on write only
3453 * mappings..
3454 */
3455 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3456 !cap_zlr(iommu->cap))
ba395927
KA
3457 prot |= DMA_PTE_READ;
3458 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3459 prot |= DMA_PTE_WRITE;
3460 /*
6865f0d1 3461 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3462 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3463 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3464 * is not a big problem
3465 */
0ab36de2 3466 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3467 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3468 if (ret)
3469 goto error;
3470
1f0ef2aa
DW
3471 /* it's a non-present to present mapping. Only flush if caching mode */
3472 if (cap_caching_mode(iommu->cap))
a1ddcbe9
JR
3473 iommu_flush_iotlb_psi(iommu, domain,
3474 mm_to_dma_pfn(iova->pfn_lo),
3475 size, 0, 1);
1f0ef2aa 3476 else
8c11e798 3477 iommu_flush_write_buffer(iommu);
f76aec76 3478
03d6a246
DW
3479 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3480 start_paddr += paddr & ~PAGE_MASK;
3481 return start_paddr;
ba395927 3482
ba395927 3483error:
f76aec76
KA
3484 if (iova)
3485 __free_iova(&domain->iovad, iova);
9f10e5bf 3486 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3487 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3488 return 0;
3489}
3490
ffbbef5c
FT
3491static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3492 unsigned long offset, size_t size,
3493 enum dma_data_direction dir,
3494 struct dma_attrs *attrs)
bb9e6d65 3495{
ffbbef5c 3496 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3497 dir, *dev->dma_mask);
bb9e6d65
FT
3498}
3499
5e0d2a6f 3500static void flush_unmaps(void)
3501{
80b20dd8 3502 int i, j;
5e0d2a6f 3503
5e0d2a6f 3504 timer_on = 0;
3505
3506 /* just flush them all */
3507 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3508 struct intel_iommu *iommu = g_iommus[i];
3509 if (!iommu)
3510 continue;
c42d9f32 3511
9dd2fe89
YZ
3512 if (!deferred_flush[i].next)
3513 continue;
3514
78d5f0f5
NA
3515 /* In caching mode, global flushes turn emulation expensive */
3516 if (!cap_caching_mode(iommu->cap))
3517 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3518 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3519 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3520 unsigned long mask;
3521 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3522 struct dmar_domain *domain = deferred_flush[i].domain[j];
3523
3524 /* On real hardware multiple invalidations are expensive */
3525 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3526 iommu_flush_iotlb_psi(iommu, domain,
a156ef99 3527 iova->pfn_lo, iova_size(iova),
ea8ea460 3528 !deferred_flush[i].freelist[j], 0);
78d5f0f5 3529 else {
a156ef99 3530 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
78d5f0f5
NA
3531 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3532 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3533 }
93a23a72 3534 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3535 if (deferred_flush[i].freelist[j])
3536 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3537 }
9dd2fe89 3538 deferred_flush[i].next = 0;
5e0d2a6f 3539 }
3540
5e0d2a6f 3541 list_size = 0;
5e0d2a6f 3542}
3543
3544static void flush_unmaps_timeout(unsigned long data)
3545{
80b20dd8 3546 unsigned long flags;
3547
3548 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3549 flush_unmaps();
80b20dd8 3550 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3551}
3552
ea8ea460 3553static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3554{
3555 unsigned long flags;
80b20dd8 3556 int next, iommu_id;
8c11e798 3557 struct intel_iommu *iommu;
5e0d2a6f 3558
3559 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3560 if (list_size == HIGH_WATER_MARK)
3561 flush_unmaps();
3562
8c11e798
WH
3563 iommu = domain_get_iommu(dom);
3564 iommu_id = iommu->seq_id;
c42d9f32 3565
80b20dd8 3566 next = deferred_flush[iommu_id].next;
3567 deferred_flush[iommu_id].domain[next] = dom;
3568 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3569 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3570 deferred_flush[iommu_id].next++;
5e0d2a6f 3571
3572 if (!timer_on) {
3573 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3574 timer_on = 1;
3575 }
3576 list_size++;
3577 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3578}
3579
d41a4adb 3580static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3581{
f76aec76 3582 struct dmar_domain *domain;
d794dc9b 3583 unsigned long start_pfn, last_pfn;
ba395927 3584 struct iova *iova;
8c11e798 3585 struct intel_iommu *iommu;
ea8ea460 3586 struct page *freelist;
ba395927 3587
73676832 3588 if (iommu_no_mapping(dev))
f76aec76 3589 return;
2c2e2c38 3590
1525a29a 3591 domain = find_domain(dev);
ba395927
KA
3592 BUG_ON(!domain);
3593
8c11e798
WH
3594 iommu = domain_get_iommu(domain);
3595
ba395927 3596 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3597 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3598 (unsigned long long)dev_addr))
ba395927 3599 return;
ba395927 3600
d794dc9b
DW
3601 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3602 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3603
d794dc9b 3604 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3605 dev_name(dev), start_pfn, last_pfn);
ba395927 3606
ea8ea460 3607 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3608
5e0d2a6f 3609 if (intel_iommu_strict) {
a1ddcbe9 3610 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
ea8ea460 3611 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3612 /* free iova */
3613 __free_iova(&domain->iovad, iova);
ea8ea460 3614 dma_free_pagelist(freelist);
5e0d2a6f 3615 } else {
ea8ea460 3616 add_unmap(domain, iova, freelist);
5e0d2a6f 3617 /*
3618 * queue up the release of the unmap to save the 1/6th of the
3619 * cpu used up by the iotlb flush operation...
3620 */
5e0d2a6f 3621 }
ba395927
KA
3622}
3623
d41a4adb
JL
3624static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3625 size_t size, enum dma_data_direction dir,
3626 struct dma_attrs *attrs)
3627{
3628 intel_unmap(dev, dev_addr);
3629}
3630
5040a918 3631static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3632 dma_addr_t *dma_handle, gfp_t flags,
3633 struct dma_attrs *attrs)
ba395927 3634{
36746436 3635 struct page *page = NULL;
ba395927
KA
3636 int order;
3637
5b6985ce 3638 size = PAGE_ALIGN(size);
ba395927 3639 order = get_order(size);
e8bb910d 3640
5040a918 3641 if (!iommu_no_mapping(dev))
e8bb910d 3642 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3643 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3644 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3645 flags |= GFP_DMA;
3646 else
3647 flags |= GFP_DMA32;
3648 }
ba395927 3649
d0164adc 3650 if (gfpflags_allow_blocking(flags)) {
36746436
AM
3651 unsigned int count = size >> PAGE_SHIFT;
3652
3653 page = dma_alloc_from_contiguous(dev, count, order);
3654 if (page && iommu_no_mapping(dev) &&
3655 page_to_phys(page) + size > dev->coherent_dma_mask) {
3656 dma_release_from_contiguous(dev, page, count);
3657 page = NULL;
3658 }
3659 }
3660
3661 if (!page)
3662 page = alloc_pages(flags, order);
3663 if (!page)
ba395927 3664 return NULL;
36746436 3665 memset(page_address(page), 0, size);
ba395927 3666
36746436 3667 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3668 DMA_BIDIRECTIONAL,
5040a918 3669 dev->coherent_dma_mask);
ba395927 3670 if (*dma_handle)
36746436
AM
3671 return page_address(page);
3672 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3673 __free_pages(page, order);
3674
ba395927
KA
3675 return NULL;
3676}
3677
5040a918 3678static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3679 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3680{
3681 int order;
36746436 3682 struct page *page = virt_to_page(vaddr);
ba395927 3683
5b6985ce 3684 size = PAGE_ALIGN(size);
ba395927
KA
3685 order = get_order(size);
3686
d41a4adb 3687 intel_unmap(dev, dma_handle);
36746436
AM
3688 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3689 __free_pages(page, order);
ba395927
KA
3690}
3691
5040a918 3692static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3693 int nelems, enum dma_data_direction dir,
3694 struct dma_attrs *attrs)
ba395927 3695{
d41a4adb 3696 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3697}
3698
ba395927 3699static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3700 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3701{
3702 int i;
c03ab37c 3703 struct scatterlist *sg;
ba395927 3704
c03ab37c 3705 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3706 BUG_ON(!sg_page(sg));
3e6110fd 3707 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3708 sg->dma_length = sg->length;
ba395927
KA
3709 }
3710 return nelems;
3711}
3712
5040a918 3713static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3714 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3715{
ba395927 3716 int i;
ba395927 3717 struct dmar_domain *domain;
f76aec76
KA
3718 size_t size = 0;
3719 int prot = 0;
f76aec76
KA
3720 struct iova *iova = NULL;
3721 int ret;
c03ab37c 3722 struct scatterlist *sg;
b536d24d 3723 unsigned long start_vpfn;
8c11e798 3724 struct intel_iommu *iommu;
ba395927
KA
3725
3726 BUG_ON(dir == DMA_NONE);
5040a918
DW
3727 if (iommu_no_mapping(dev))
3728 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3729
5040a918 3730 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3731 if (!domain)
3732 return 0;
3733
8c11e798
WH
3734 iommu = domain_get_iommu(domain);
3735
b536d24d 3736 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3737 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3738
5040a918
DW
3739 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3740 *dev->dma_mask);
f76aec76 3741 if (!iova) {
c03ab37c 3742 sglist->dma_length = 0;
f76aec76
KA
3743 return 0;
3744 }
3745
3746 /*
3747 * Check if DMAR supports zero-length reads on write only
3748 * mappings..
3749 */
3750 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3751 !cap_zlr(iommu->cap))
f76aec76
KA
3752 prot |= DMA_PTE_READ;
3753 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3754 prot |= DMA_PTE_WRITE;
3755
b536d24d 3756 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3757
f532959b 3758 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3759 if (unlikely(ret)) {
e1605495
DW
3760 dma_pte_free_pagetable(domain, start_vpfn,
3761 start_vpfn + size - 1);
e1605495
DW
3762 __free_iova(&domain->iovad, iova);
3763 return 0;
ba395927
KA
3764 }
3765
1f0ef2aa
DW
3766 /* it's a non-present to present mapping. Only flush if caching mode */
3767 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3768 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3769 else
8c11e798 3770 iommu_flush_write_buffer(iommu);
1f0ef2aa 3771
ba395927
KA
3772 return nelems;
3773}
3774
dfb805e8
FT
3775static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3776{
3777 return !dma_addr;
3778}
3779
160c1d8e 3780struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3781 .alloc = intel_alloc_coherent,
3782 .free = intel_free_coherent,
ba395927
KA
3783 .map_sg = intel_map_sg,
3784 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3785 .map_page = intel_map_page,
3786 .unmap_page = intel_unmap_page,
dfb805e8 3787 .mapping_error = intel_mapping_error,
ba395927
KA
3788};
3789
3790static inline int iommu_domain_cache_init(void)
3791{
3792 int ret = 0;
3793
3794 iommu_domain_cache = kmem_cache_create("iommu_domain",
3795 sizeof(struct dmar_domain),
3796 0,
3797 SLAB_HWCACHE_ALIGN,
3798
3799 NULL);
3800 if (!iommu_domain_cache) {
9f10e5bf 3801 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3802 ret = -ENOMEM;
3803 }
3804
3805 return ret;
3806}
3807
3808static inline int iommu_devinfo_cache_init(void)
3809{
3810 int ret = 0;
3811
3812 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3813 sizeof(struct device_domain_info),
3814 0,
3815 SLAB_HWCACHE_ALIGN,
ba395927
KA
3816 NULL);
3817 if (!iommu_devinfo_cache) {
9f10e5bf 3818 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3819 ret = -ENOMEM;
3820 }
3821
3822 return ret;
3823}
3824
ba395927
KA
3825static int __init iommu_init_mempool(void)
3826{
3827 int ret;
ae1ff3d6 3828 ret = iova_cache_get();
ba395927
KA
3829 if (ret)
3830 return ret;
3831
3832 ret = iommu_domain_cache_init();
3833 if (ret)
3834 goto domain_error;
3835
3836 ret = iommu_devinfo_cache_init();
3837 if (!ret)
3838 return ret;
3839
3840 kmem_cache_destroy(iommu_domain_cache);
3841domain_error:
ae1ff3d6 3842 iova_cache_put();
ba395927
KA
3843
3844 return -ENOMEM;
3845}
3846
3847static void __init iommu_exit_mempool(void)
3848{
3849 kmem_cache_destroy(iommu_devinfo_cache);
3850 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3851 iova_cache_put();
ba395927
KA
3852}
3853
556ab45f
DW
3854static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3855{
3856 struct dmar_drhd_unit *drhd;
3857 u32 vtbar;
3858 int rc;
3859
3860 /* We know that this device on this chipset has its own IOMMU.
3861 * If we find it under a different IOMMU, then the BIOS is lying
3862 * to us. Hope that the IOMMU for this device is actually
3863 * disabled, and it needs no translation...
3864 */
3865 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3866 if (rc) {
3867 /* "can't" happen */
3868 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3869 return;
3870 }
3871 vtbar &= 0xffff0000;
3872
3873 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3874 drhd = dmar_find_matched_drhd_unit(pdev);
3875 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3876 TAINT_FIRMWARE_WORKAROUND,
3877 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3878 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3879}
3880DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3881
ba395927
KA
3882static void __init init_no_remapping_devices(void)
3883{
3884 struct dmar_drhd_unit *drhd;
832bd858 3885 struct device *dev;
b683b230 3886 int i;
ba395927
KA
3887
3888 for_each_drhd_unit(drhd) {
3889 if (!drhd->include_all) {
b683b230
JL
3890 for_each_active_dev_scope(drhd->devices,
3891 drhd->devices_cnt, i, dev)
3892 break;
832bd858 3893 /* ignore DMAR unit if no devices exist */
ba395927
KA
3894 if (i == drhd->devices_cnt)
3895 drhd->ignored = 1;
3896 }
3897 }
3898
7c919779 3899 for_each_active_drhd_unit(drhd) {
7c919779 3900 if (drhd->include_all)
ba395927
KA
3901 continue;
3902
b683b230
JL
3903 for_each_active_dev_scope(drhd->devices,
3904 drhd->devices_cnt, i, dev)
832bd858 3905 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3906 break;
ba395927
KA
3907 if (i < drhd->devices_cnt)
3908 continue;
3909
c0771df8
DW
3910 /* This IOMMU has *only* gfx devices. Either bypass it or
3911 set the gfx_mapped flag, as appropriate */
3912 if (dmar_map_gfx) {
3913 intel_iommu_gfx_mapped = 1;
3914 } else {
3915 drhd->ignored = 1;
b683b230
JL
3916 for_each_active_dev_scope(drhd->devices,
3917 drhd->devices_cnt, i, dev)
832bd858 3918 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3919 }
3920 }
3921}
3922
f59c7b69
FY
3923#ifdef CONFIG_SUSPEND
3924static int init_iommu_hw(void)
3925{
3926 struct dmar_drhd_unit *drhd;
3927 struct intel_iommu *iommu = NULL;
3928
3929 for_each_active_iommu(iommu, drhd)
3930 if (iommu->qi)
3931 dmar_reenable_qi(iommu);
3932
b779260b
JC
3933 for_each_iommu(iommu, drhd) {
3934 if (drhd->ignored) {
3935 /*
3936 * we always have to disable PMRs or DMA may fail on
3937 * this device
3938 */
3939 if (force_on)
3940 iommu_disable_protect_mem_regions(iommu);
3941 continue;
3942 }
3943
f59c7b69
FY
3944 iommu_flush_write_buffer(iommu);
3945
3946 iommu_set_root_entry(iommu);
3947
3948 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3949 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3950 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3951 iommu_enable_translation(iommu);
b94996c9 3952 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3953 }
3954
3955 return 0;
3956}
3957
3958static void iommu_flush_all(void)
3959{
3960 struct dmar_drhd_unit *drhd;
3961 struct intel_iommu *iommu;
3962
3963 for_each_active_iommu(iommu, drhd) {
3964 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3965 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3966 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3967 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3968 }
3969}
3970
134fac3f 3971static int iommu_suspend(void)
f59c7b69
FY
3972{
3973 struct dmar_drhd_unit *drhd;
3974 struct intel_iommu *iommu = NULL;
3975 unsigned long flag;
3976
3977 for_each_active_iommu(iommu, drhd) {
3978 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3979 GFP_ATOMIC);
3980 if (!iommu->iommu_state)
3981 goto nomem;
3982 }
3983
3984 iommu_flush_all();
3985
3986 for_each_active_iommu(iommu, drhd) {
3987 iommu_disable_translation(iommu);
3988
1f5b3c3f 3989 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3990
3991 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3992 readl(iommu->reg + DMAR_FECTL_REG);
3993 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3994 readl(iommu->reg + DMAR_FEDATA_REG);
3995 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3996 readl(iommu->reg + DMAR_FEADDR_REG);
3997 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3998 readl(iommu->reg + DMAR_FEUADDR_REG);
3999
1f5b3c3f 4000 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4001 }
4002 return 0;
4003
4004nomem:
4005 for_each_active_iommu(iommu, drhd)
4006 kfree(iommu->iommu_state);
4007
4008 return -ENOMEM;
4009}
4010
134fac3f 4011static void iommu_resume(void)
f59c7b69
FY
4012{
4013 struct dmar_drhd_unit *drhd;
4014 struct intel_iommu *iommu = NULL;
4015 unsigned long flag;
4016
4017 if (init_iommu_hw()) {
b779260b
JC
4018 if (force_on)
4019 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4020 else
4021 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4022 return;
f59c7b69
FY
4023 }
4024
4025 for_each_active_iommu(iommu, drhd) {
4026
1f5b3c3f 4027 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4028
4029 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4030 iommu->reg + DMAR_FECTL_REG);
4031 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4032 iommu->reg + DMAR_FEDATA_REG);
4033 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4034 iommu->reg + DMAR_FEADDR_REG);
4035 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4036 iommu->reg + DMAR_FEUADDR_REG);
4037
1f5b3c3f 4038 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4039 }
4040
4041 for_each_active_iommu(iommu, drhd)
4042 kfree(iommu->iommu_state);
f59c7b69
FY
4043}
4044
134fac3f 4045static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4046 .resume = iommu_resume,
4047 .suspend = iommu_suspend,
4048};
4049
134fac3f 4050static void __init init_iommu_pm_ops(void)
f59c7b69 4051{
134fac3f 4052 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4053}
4054
4055#else
99592ba4 4056static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4057#endif /* CONFIG_PM */
4058
318fe7df 4059
c2a0b538 4060int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4061{
4062 struct acpi_dmar_reserved_memory *rmrr;
4063 struct dmar_rmrr_unit *rmrru;
4064
4065 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4066 if (!rmrru)
4067 return -ENOMEM;
4068
4069 rmrru->hdr = header;
4070 rmrr = (struct acpi_dmar_reserved_memory *)header;
4071 rmrru->base_address = rmrr->base_address;
4072 rmrru->end_address = rmrr->end_address;
2e455289
JL
4073 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4074 ((void *)rmrr) + rmrr->header.length,
4075 &rmrru->devices_cnt);
4076 if (rmrru->devices_cnt && rmrru->devices == NULL) {
4077 kfree(rmrru);
4078 return -ENOMEM;
4079 }
318fe7df 4080
2e455289 4081 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4082
2e455289 4083 return 0;
318fe7df
SS
4084}
4085
6b197249
JL
4086static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4087{
4088 struct dmar_atsr_unit *atsru;
4089 struct acpi_dmar_atsr *tmp;
4090
4091 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4092 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4093 if (atsr->segment != tmp->segment)
4094 continue;
4095 if (atsr->header.length != tmp->header.length)
4096 continue;
4097 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4098 return atsru;
4099 }
4100
4101 return NULL;
4102}
4103
4104int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4105{
4106 struct acpi_dmar_atsr *atsr;
4107 struct dmar_atsr_unit *atsru;
4108
6b197249
JL
4109 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4110 return 0;
4111
318fe7df 4112 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4113 atsru = dmar_find_atsr(atsr);
4114 if (atsru)
4115 return 0;
4116
4117 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4118 if (!atsru)
4119 return -ENOMEM;
4120
6b197249
JL
4121 /*
4122 * If memory is allocated from slab by ACPI _DSM method, we need to
4123 * copy the memory content because the memory buffer will be freed
4124 * on return.
4125 */
4126 atsru->hdr = (void *)(atsru + 1);
4127 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4128 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4129 if (!atsru->include_all) {
4130 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4131 (void *)atsr + atsr->header.length,
4132 &atsru->devices_cnt);
4133 if (atsru->devices_cnt && atsru->devices == NULL) {
4134 kfree(atsru);
4135 return -ENOMEM;
4136 }
4137 }
318fe7df 4138
0e242612 4139 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4140
4141 return 0;
4142}
4143
9bdc531e
JL
4144static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4145{
4146 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4147 kfree(atsru);
4148}
4149
6b197249
JL
4150int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4151{
4152 struct acpi_dmar_atsr *atsr;
4153 struct dmar_atsr_unit *atsru;
4154
4155 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4156 atsru = dmar_find_atsr(atsr);
4157 if (atsru) {
4158 list_del_rcu(&atsru->list);
4159 synchronize_rcu();
4160 intel_iommu_free_atsr(atsru);
4161 }
4162
4163 return 0;
4164}
4165
4166int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4167{
4168 int i;
4169 struct device *dev;
4170 struct acpi_dmar_atsr *atsr;
4171 struct dmar_atsr_unit *atsru;
4172
4173 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4174 atsru = dmar_find_atsr(atsr);
4175 if (!atsru)
4176 return 0;
4177
4178 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4179 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4180 i, dev)
4181 return -EBUSY;
4182
4183 return 0;
4184}
4185
ffebeb46
JL
4186static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4187{
4188 int sp, ret = 0;
4189 struct intel_iommu *iommu = dmaru->iommu;
4190
4191 if (g_iommus[iommu->seq_id])
4192 return 0;
4193
4194 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4195 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4196 iommu->name);
4197 return -ENXIO;
4198 }
4199 if (!ecap_sc_support(iommu->ecap) &&
4200 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4201 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4202 iommu->name);
4203 return -ENXIO;
4204 }
4205 sp = domain_update_iommu_superpage(iommu) - 1;
4206 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4207 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4208 iommu->name);
4209 return -ENXIO;
4210 }
4211
4212 /*
4213 * Disable translation if already enabled prior to OS handover.
4214 */
4215 if (iommu->gcmd & DMA_GCMD_TE)
4216 iommu_disable_translation(iommu);
4217
4218 g_iommus[iommu->seq_id] = iommu;
4219 ret = iommu_init_domains(iommu);
4220 if (ret == 0)
4221 ret = iommu_alloc_root_entry(iommu);
4222 if (ret)
4223 goto out;
4224
8a94ade4
DW
4225#ifdef CONFIG_INTEL_IOMMU_SVM
4226 if (pasid_enabled(iommu))
4227 intel_svm_alloc_pasid_tables(iommu);
4228#endif
4229
ffebeb46
JL
4230 if (dmaru->ignored) {
4231 /*
4232 * we always have to disable PMRs or DMA may fail on this device
4233 */
4234 if (force_on)
4235 iommu_disable_protect_mem_regions(iommu);
4236 return 0;
4237 }
4238
4239 intel_iommu_init_qi(iommu);
4240 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4241
4242#ifdef CONFIG_INTEL_IOMMU_SVM
4243 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4244 ret = intel_svm_enable_prq(iommu);
4245 if (ret)
4246 goto disable_iommu;
4247 }
4248#endif
ffebeb46
JL
4249 ret = dmar_set_interrupt(iommu);
4250 if (ret)
4251 goto disable_iommu;
4252
4253 iommu_set_root_entry(iommu);
4254 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4255 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4256 iommu_enable_translation(iommu);
4257
ffebeb46
JL
4258 iommu_disable_protect_mem_regions(iommu);
4259 return 0;
4260
4261disable_iommu:
4262 disable_dmar_iommu(iommu);
4263out:
4264 free_dmar_iommu(iommu);
4265 return ret;
4266}
4267
6b197249
JL
4268int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4269{
ffebeb46
JL
4270 int ret = 0;
4271 struct intel_iommu *iommu = dmaru->iommu;
4272
4273 if (!intel_iommu_enabled)
4274 return 0;
4275 if (iommu == NULL)
4276 return -EINVAL;
4277
4278 if (insert) {
4279 ret = intel_iommu_add(dmaru);
4280 } else {
4281 disable_dmar_iommu(iommu);
4282 free_dmar_iommu(iommu);
4283 }
4284
4285 return ret;
6b197249
JL
4286}
4287
9bdc531e
JL
4288static void intel_iommu_free_dmars(void)
4289{
4290 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4291 struct dmar_atsr_unit *atsru, *atsr_n;
4292
4293 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4294 list_del(&rmrru->list);
4295 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4296 kfree(rmrru);
318fe7df
SS
4297 }
4298
9bdc531e
JL
4299 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4300 list_del(&atsru->list);
4301 intel_iommu_free_atsr(atsru);
4302 }
318fe7df
SS
4303}
4304
4305int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4306{
b683b230 4307 int i, ret = 1;
318fe7df 4308 struct pci_bus *bus;
832bd858
DW
4309 struct pci_dev *bridge = NULL;
4310 struct device *tmp;
318fe7df
SS
4311 struct acpi_dmar_atsr *atsr;
4312 struct dmar_atsr_unit *atsru;
4313
4314 dev = pci_physfn(dev);
318fe7df 4315 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4316 bridge = bus->self;
d14053b3
DW
4317 /* If it's an integrated device, allow ATS */
4318 if (!bridge)
4319 return 1;
4320 /* Connected via non-PCIe: no ATS */
4321 if (!pci_is_pcie(bridge) ||
62f87c0e 4322 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4323 return 0;
d14053b3 4324 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4325 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4326 break;
318fe7df
SS
4327 }
4328
0e242612 4329 rcu_read_lock();
b5f82ddf
JL
4330 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4331 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4332 if (atsr->segment != pci_domain_nr(dev->bus))
4333 continue;
4334
b683b230 4335 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4336 if (tmp == &bridge->dev)
b683b230 4337 goto out;
b5f82ddf
JL
4338
4339 if (atsru->include_all)
b683b230 4340 goto out;
b5f82ddf 4341 }
b683b230
JL
4342 ret = 0;
4343out:
0e242612 4344 rcu_read_unlock();
318fe7df 4345
b683b230 4346 return ret;
318fe7df
SS
4347}
4348
59ce0515
JL
4349int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4350{
4351 int ret = 0;
4352 struct dmar_rmrr_unit *rmrru;
4353 struct dmar_atsr_unit *atsru;
4354 struct acpi_dmar_atsr *atsr;
4355 struct acpi_dmar_reserved_memory *rmrr;
4356
4357 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4358 return 0;
4359
4360 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4361 rmrr = container_of(rmrru->hdr,
4362 struct acpi_dmar_reserved_memory, header);
4363 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4364 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4365 ((void *)rmrr) + rmrr->header.length,
4366 rmrr->segment, rmrru->devices,
4367 rmrru->devices_cnt);
27e24950 4368 if(ret < 0)
59ce0515 4369 return ret;
e6a8c9b3 4370 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4371 dmar_remove_dev_scope(info, rmrr->segment,
4372 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4373 }
4374 }
4375
4376 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4377 if (atsru->include_all)
4378 continue;
4379
4380 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4381 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4382 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4383 (void *)atsr + atsr->header.length,
4384 atsr->segment, atsru->devices,
4385 atsru->devices_cnt);
4386 if (ret > 0)
4387 break;
4388 else if(ret < 0)
4389 return ret;
e6a8c9b3 4390 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4391 if (dmar_remove_dev_scope(info, atsr->segment,
4392 atsru->devices, atsru->devices_cnt))
4393 break;
4394 }
4395 }
4396
4397 return 0;
4398}
4399
99dcaded
FY
4400/*
4401 * Here we only respond to action of unbound device from driver.
4402 *
4403 * Added device is not attached to its DMAR domain here yet. That will happen
4404 * when mapping the device to iova.
4405 */
4406static int device_notifier(struct notifier_block *nb,
4407 unsigned long action, void *data)
4408{
4409 struct device *dev = data;
99dcaded
FY
4410 struct dmar_domain *domain;
4411
3d89194a 4412 if (iommu_dummy(dev))
44cd613c
DW
4413 return 0;
4414
1196c2fb 4415 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4416 return 0;
4417
1525a29a 4418 domain = find_domain(dev);
99dcaded
FY
4419 if (!domain)
4420 return 0;
4421
e6de0f8d 4422 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4423 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4424 domain_exit(domain);
a97590e5 4425
99dcaded
FY
4426 return 0;
4427}
4428
4429static struct notifier_block device_nb = {
4430 .notifier_call = device_notifier,
4431};
4432
75f05569
JL
4433static int intel_iommu_memory_notifier(struct notifier_block *nb,
4434 unsigned long val, void *v)
4435{
4436 struct memory_notify *mhp = v;
4437 unsigned long long start, end;
4438 unsigned long start_vpfn, last_vpfn;
4439
4440 switch (val) {
4441 case MEM_GOING_ONLINE:
4442 start = mhp->start_pfn << PAGE_SHIFT;
4443 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4444 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4445 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4446 start, end);
4447 return NOTIFY_BAD;
4448 }
4449 break;
4450
4451 case MEM_OFFLINE:
4452 case MEM_CANCEL_ONLINE:
4453 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4454 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4455 while (start_vpfn <= last_vpfn) {
4456 struct iova *iova;
4457 struct dmar_drhd_unit *drhd;
4458 struct intel_iommu *iommu;
ea8ea460 4459 struct page *freelist;
75f05569
JL
4460
4461 iova = find_iova(&si_domain->iovad, start_vpfn);
4462 if (iova == NULL) {
9f10e5bf 4463 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4464 start_vpfn);
4465 break;
4466 }
4467
4468 iova = split_and_remove_iova(&si_domain->iovad, iova,
4469 start_vpfn, last_vpfn);
4470 if (iova == NULL) {
9f10e5bf 4471 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4472 start_vpfn, last_vpfn);
4473 return NOTIFY_BAD;
4474 }
4475
ea8ea460
DW
4476 freelist = domain_unmap(si_domain, iova->pfn_lo,
4477 iova->pfn_hi);
4478
75f05569
JL
4479 rcu_read_lock();
4480 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4481 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4482 iova->pfn_lo, iova_size(iova),
ea8ea460 4483 !freelist, 0);
75f05569 4484 rcu_read_unlock();
ea8ea460 4485 dma_free_pagelist(freelist);
75f05569
JL
4486
4487 start_vpfn = iova->pfn_hi + 1;
4488 free_iova_mem(iova);
4489 }
4490 break;
4491 }
4492
4493 return NOTIFY_OK;
4494}
4495
4496static struct notifier_block intel_iommu_memory_nb = {
4497 .notifier_call = intel_iommu_memory_notifier,
4498 .priority = 0
4499};
4500
a5459cfe
AW
4501
4502static ssize_t intel_iommu_show_version(struct device *dev,
4503 struct device_attribute *attr,
4504 char *buf)
4505{
4506 struct intel_iommu *iommu = dev_get_drvdata(dev);
4507 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4508 return sprintf(buf, "%d:%d\n",
4509 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4510}
4511static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4512
4513static ssize_t intel_iommu_show_address(struct device *dev,
4514 struct device_attribute *attr,
4515 char *buf)
4516{
4517 struct intel_iommu *iommu = dev_get_drvdata(dev);
4518 return sprintf(buf, "%llx\n", iommu->reg_phys);
4519}
4520static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4521
4522static ssize_t intel_iommu_show_cap(struct device *dev,
4523 struct device_attribute *attr,
4524 char *buf)
4525{
4526 struct intel_iommu *iommu = dev_get_drvdata(dev);
4527 return sprintf(buf, "%llx\n", iommu->cap);
4528}
4529static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4530
4531static ssize_t intel_iommu_show_ecap(struct device *dev,
4532 struct device_attribute *attr,
4533 char *buf)
4534{
4535 struct intel_iommu *iommu = dev_get_drvdata(dev);
4536 return sprintf(buf, "%llx\n", iommu->ecap);
4537}
4538static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4539
2238c082
AW
4540static ssize_t intel_iommu_show_ndoms(struct device *dev,
4541 struct device_attribute *attr,
4542 char *buf)
4543{
4544 struct intel_iommu *iommu = dev_get_drvdata(dev);
4545 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4546}
4547static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4548
4549static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4550 struct device_attribute *attr,
4551 char *buf)
4552{
4553 struct intel_iommu *iommu = dev_get_drvdata(dev);
4554 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4555 cap_ndoms(iommu->cap)));
4556}
4557static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4558
a5459cfe
AW
4559static struct attribute *intel_iommu_attrs[] = {
4560 &dev_attr_version.attr,
4561 &dev_attr_address.attr,
4562 &dev_attr_cap.attr,
4563 &dev_attr_ecap.attr,
2238c082
AW
4564 &dev_attr_domains_supported.attr,
4565 &dev_attr_domains_used.attr,
a5459cfe
AW
4566 NULL,
4567};
4568
4569static struct attribute_group intel_iommu_group = {
4570 .name = "intel-iommu",
4571 .attrs = intel_iommu_attrs,
4572};
4573
4574const struct attribute_group *intel_iommu_groups[] = {
4575 &intel_iommu_group,
4576 NULL,
4577};
4578
ba395927
KA
4579int __init intel_iommu_init(void)
4580{
9bdc531e 4581 int ret = -ENODEV;
3a93c841 4582 struct dmar_drhd_unit *drhd;
7c919779 4583 struct intel_iommu *iommu;
ba395927 4584
a59b50e9
JC
4585 /* VT-d is required for a TXT/tboot launch, so enforce that */
4586 force_on = tboot_force_iommu();
4587
3a5670e8
JL
4588 if (iommu_init_mempool()) {
4589 if (force_on)
4590 panic("tboot: Failed to initialize iommu memory\n");
4591 return -ENOMEM;
4592 }
4593
4594 down_write(&dmar_global_lock);
a59b50e9
JC
4595 if (dmar_table_init()) {
4596 if (force_on)
4597 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4598 goto out_free_dmar;
a59b50e9 4599 }
ba395927 4600
c2c7286a 4601 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4602 if (force_on)
4603 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4604 goto out_free_dmar;
a59b50e9 4605 }
1886e8a9 4606
75f1cdf1 4607 if (no_iommu || dmar_disabled)
9bdc531e 4608 goto out_free_dmar;
2ae21010 4609
318fe7df 4610 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4611 pr_info("No RMRR found\n");
318fe7df
SS
4612
4613 if (list_empty(&dmar_atsr_units))
9f10e5bf 4614 pr_info("No ATSR found\n");
318fe7df 4615
51a63e67
JC
4616 if (dmar_init_reserved_ranges()) {
4617 if (force_on)
4618 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4619 goto out_free_reserved_range;
51a63e67 4620 }
ba395927
KA
4621
4622 init_no_remapping_devices();
4623
b779260b 4624 ret = init_dmars();
ba395927 4625 if (ret) {
a59b50e9
JC
4626 if (force_on)
4627 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4628 pr_err("Initialization failed\n");
9bdc531e 4629 goto out_free_reserved_range;
ba395927 4630 }
3a5670e8 4631 up_write(&dmar_global_lock);
9f10e5bf 4632 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4633
5e0d2a6f 4634 init_timer(&unmap_timer);
75f1cdf1
FT
4635#ifdef CONFIG_SWIOTLB
4636 swiotlb = 0;
4637#endif
19943b0e 4638 dma_ops = &intel_dma_ops;
4ed0d3e6 4639
134fac3f 4640 init_iommu_pm_ops();
a8bcbb0d 4641
a5459cfe
AW
4642 for_each_active_iommu(iommu, drhd)
4643 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4644 intel_iommu_groups,
2439d4aa 4645 "%s", iommu->name);
a5459cfe 4646
4236d97d 4647 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4648 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4649 if (si_domain && !hw_pass_through)
4650 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4651
8bc1f85c
ED
4652 intel_iommu_enabled = 1;
4653
ba395927 4654 return 0;
9bdc531e
JL
4655
4656out_free_reserved_range:
4657 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4658out_free_dmar:
4659 intel_iommu_free_dmars();
3a5670e8
JL
4660 up_write(&dmar_global_lock);
4661 iommu_exit_mempool();
9bdc531e 4662 return ret;
ba395927 4663}
e820482c 4664
2452d9db 4665static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4666{
4667 struct intel_iommu *iommu = opaque;
4668
2452d9db 4669 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4670 return 0;
4671}
4672
4673/*
4674 * NB - intel-iommu lacks any sort of reference counting for the users of
4675 * dependent devices. If multiple endpoints have intersecting dependent
4676 * devices, unbinding the driver from any one of them will possibly leave
4677 * the others unable to operate.
4678 */
2452d9db 4679static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4680{
0bcb3e28 4681 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4682 return;
4683
2452d9db 4684 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4685}
4686
127c7615 4687static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4688{
c7151a8d
WH
4689 struct intel_iommu *iommu;
4690 unsigned long flags;
c7151a8d 4691
55d94043
JR
4692 assert_spin_locked(&device_domain_lock);
4693
127c7615 4694 if (WARN_ON(!info))
c7151a8d
WH
4695 return;
4696
127c7615 4697 iommu = info->iommu;
c7151a8d 4698
127c7615
JR
4699 if (info->dev) {
4700 iommu_disable_dev_iotlb(info);
4701 domain_context_clear(iommu, info->dev);
4702 }
c7151a8d 4703
b608ac3b 4704 unlink_domain_info(info);
c7151a8d 4705
d160aca5 4706 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4707 domain_detach_iommu(info->domain, iommu);
d160aca5 4708 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4709
127c7615 4710 free_devinfo_mem(info);
c7151a8d 4711}
c7151a8d 4712
55d94043
JR
4713static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4714 struct device *dev)
4715{
127c7615 4716 struct device_domain_info *info;
55d94043 4717 unsigned long flags;
3e7abe25 4718
55d94043 4719 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4720 info = dev->archdata.iommu;
4721 __dmar_remove_one_dev_info(info);
55d94043 4722 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4723}
4724
2c2e2c38 4725static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4726{
4727 int adjust_width;
4728
0fb5fe87
RM
4729 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4730 DMA_32BIT_PFN);
5e98c4b1
WH
4731 domain_reserve_special_ranges(domain);
4732
4733 /* calculate AGAW */
4734 domain->gaw = guest_width;
4735 adjust_width = guestwidth_to_adjustwidth(guest_width);
4736 domain->agaw = width_to_agaw(adjust_width);
4737
5e98c4b1 4738 domain->iommu_coherency = 0;
c5b15255 4739 domain->iommu_snooping = 0;
6dd9a7c7 4740 domain->iommu_superpage = 0;
fe40f1e0 4741 domain->max_addr = 0;
5e98c4b1
WH
4742
4743 /* always allocate the top pgd */
4c923d47 4744 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4745 if (!domain->pgd)
4746 return -ENOMEM;
4747 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4748 return 0;
4749}
4750
00a77deb 4751static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4752{
5d450806 4753 struct dmar_domain *dmar_domain;
00a77deb
JR
4754 struct iommu_domain *domain;
4755
4756 if (type != IOMMU_DOMAIN_UNMANAGED)
4757 return NULL;
38717946 4758
ab8dfe25 4759 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4760 if (!dmar_domain) {
9f10e5bf 4761 pr_err("Can't allocate dmar_domain\n");
00a77deb 4762 return NULL;
38717946 4763 }
2c2e2c38 4764 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4765 pr_err("Domain initialization failed\n");
92d03cc8 4766 domain_exit(dmar_domain);
00a77deb 4767 return NULL;
38717946 4768 }
8140a95d 4769 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4770
00a77deb 4771 domain = &dmar_domain->domain;
8a0e715b
JR
4772 domain->geometry.aperture_start = 0;
4773 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4774 domain->geometry.force_aperture = true;
4775
00a77deb 4776 return domain;
38717946 4777}
38717946 4778
00a77deb 4779static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4780{
00a77deb 4781 domain_exit(to_dmar_domain(domain));
38717946 4782}
38717946 4783
4c5478c9
JR
4784static int intel_iommu_attach_device(struct iommu_domain *domain,
4785 struct device *dev)
38717946 4786{
00a77deb 4787 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4788 struct intel_iommu *iommu;
4789 int addr_width;
156baca8 4790 u8 bus, devfn;
faa3d6f5 4791
c875d2c1
AW
4792 if (device_is_rmrr_locked(dev)) {
4793 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4794 return -EPERM;
4795 }
4796
7207d8f9
DW
4797 /* normally dev is not mapped */
4798 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4799 struct dmar_domain *old_domain;
4800
1525a29a 4801 old_domain = find_domain(dev);
faa3d6f5 4802 if (old_domain) {
d160aca5 4803 rcu_read_lock();
de7e8886 4804 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4805 rcu_read_unlock();
62c22167
JR
4806
4807 if (!domain_type_is_vm_or_si(old_domain) &&
4808 list_empty(&old_domain->devices))
4809 domain_exit(old_domain);
faa3d6f5
WH
4810 }
4811 }
4812
156baca8 4813 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4814 if (!iommu)
4815 return -ENODEV;
4816
4817 /* check if this iommu agaw is sufficient for max mapped address */
4818 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4819 if (addr_width > cap_mgaw(iommu->cap))
4820 addr_width = cap_mgaw(iommu->cap);
4821
4822 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 4823 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4824 "sufficient for the mapped address (%llx)\n",
a99c47a2 4825 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4826 return -EFAULT;
4827 }
a99c47a2
TL
4828 dmar_domain->gaw = addr_width;
4829
4830 /*
4831 * Knock out extra levels of page tables if necessary
4832 */
4833 while (iommu->agaw < dmar_domain->agaw) {
4834 struct dma_pte *pte;
4835
4836 pte = dmar_domain->pgd;
4837 if (dma_pte_present(pte)) {
25cbff16
SY
4838 dmar_domain->pgd = (struct dma_pte *)
4839 phys_to_virt(dma_pte_addr(pte));
7a661013 4840 free_pgtable_page(pte);
a99c47a2
TL
4841 }
4842 dmar_domain->agaw--;
4843 }
fe40f1e0 4844
28ccce0d 4845 return domain_add_dev_info(dmar_domain, dev);
38717946 4846}
38717946 4847
4c5478c9
JR
4848static void intel_iommu_detach_device(struct iommu_domain *domain,
4849 struct device *dev)
38717946 4850{
e6de0f8d 4851 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 4852}
c7151a8d 4853
b146a1c9
JR
4854static int intel_iommu_map(struct iommu_domain *domain,
4855 unsigned long iova, phys_addr_t hpa,
5009065d 4856 size_t size, int iommu_prot)
faa3d6f5 4857{
00a77deb 4858 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 4859 u64 max_addr;
dde57a21 4860 int prot = 0;
faa3d6f5 4861 int ret;
fe40f1e0 4862
dde57a21
JR
4863 if (iommu_prot & IOMMU_READ)
4864 prot |= DMA_PTE_READ;
4865 if (iommu_prot & IOMMU_WRITE)
4866 prot |= DMA_PTE_WRITE;
9cf06697
SY
4867 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4868 prot |= DMA_PTE_SNP;
dde57a21 4869
163cc52c 4870 max_addr = iova + size;
dde57a21 4871 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4872 u64 end;
4873
4874 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4875 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4876 if (end < max_addr) {
9f10e5bf 4877 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4878 "sufficient for the mapped address (%llx)\n",
8954da1f 4879 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4880 return -EFAULT;
4881 }
dde57a21 4882 dmar_domain->max_addr = max_addr;
fe40f1e0 4883 }
ad051221
DW
4884 /* Round up size to next multiple of PAGE_SIZE, if it and
4885 the low bits of hpa would take us onto the next page */
88cb6a74 4886 size = aligned_nrpages(hpa, size);
ad051221
DW
4887 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4888 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4889 return ret;
38717946 4890}
38717946 4891
5009065d 4892static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4893 unsigned long iova, size_t size)
38717946 4894{
00a77deb 4895 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
4896 struct page *freelist = NULL;
4897 struct intel_iommu *iommu;
4898 unsigned long start_pfn, last_pfn;
4899 unsigned int npages;
42e8c186 4900 int iommu_id, level = 0;
5cf0a76f
DW
4901
4902 /* Cope with horrid API which requires us to unmap more than the
4903 size argument if it happens to be a large-page mapping. */
dc02e46e 4904 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
4905
4906 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4907 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4908
ea8ea460
DW
4909 start_pfn = iova >> VTD_PAGE_SHIFT;
4910 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4911
4912 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4913
4914 npages = last_pfn - start_pfn + 1;
4915
29a27719 4916 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 4917 iommu = g_iommus[iommu_id];
ea8ea460 4918
42e8c186
JR
4919 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4920 start_pfn, npages, !freelist, 0);
ea8ea460
DW
4921 }
4922
4923 dma_free_pagelist(freelist);
fe40f1e0 4924
163cc52c
DW
4925 if (dmar_domain->max_addr == iova + size)
4926 dmar_domain->max_addr = iova;
b146a1c9 4927
5cf0a76f 4928 return size;
38717946 4929}
38717946 4930
d14d6577 4931static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4932 dma_addr_t iova)
38717946 4933{
00a77deb 4934 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 4935 struct dma_pte *pte;
5cf0a76f 4936 int level = 0;
faa3d6f5 4937 u64 phys = 0;
38717946 4938
5cf0a76f 4939 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4940 if (pte)
faa3d6f5 4941 phys = dma_pte_addr(pte);
38717946 4942
faa3d6f5 4943 return phys;
38717946 4944}
a8bcbb0d 4945
5d587b8d 4946static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 4947{
dbb9fd86 4948 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 4949 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 4950 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 4951 return irq_remapping_enabled == 1;
dbb9fd86 4952
5d587b8d 4953 return false;
dbb9fd86
SY
4954}
4955
abdfdde2
AW
4956static int intel_iommu_add_device(struct device *dev)
4957{
a5459cfe 4958 struct intel_iommu *iommu;
abdfdde2 4959 struct iommu_group *group;
156baca8 4960 u8 bus, devfn;
70ae6f0d 4961
a5459cfe
AW
4962 iommu = device_to_iommu(dev, &bus, &devfn);
4963 if (!iommu)
70ae6f0d
AW
4964 return -ENODEV;
4965
a5459cfe 4966 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4967
e17f9ff4 4968 group = iommu_group_get_for_dev(dev);
783f157b 4969
e17f9ff4
AW
4970 if (IS_ERR(group))
4971 return PTR_ERR(group);
bcb71abe 4972
abdfdde2 4973 iommu_group_put(group);
e17f9ff4 4974 return 0;
abdfdde2 4975}
70ae6f0d 4976
abdfdde2
AW
4977static void intel_iommu_remove_device(struct device *dev)
4978{
a5459cfe
AW
4979 struct intel_iommu *iommu;
4980 u8 bus, devfn;
4981
4982 iommu = device_to_iommu(dev, &bus, &devfn);
4983 if (!iommu)
4984 return;
4985
abdfdde2 4986 iommu_group_remove_device(dev);
a5459cfe
AW
4987
4988 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4989}
4990
2f26e0a9
DW
4991#ifdef CONFIG_INTEL_IOMMU_SVM
4992int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
4993{
4994 struct device_domain_info *info;
4995 struct context_entry *context;
4996 struct dmar_domain *domain;
4997 unsigned long flags;
4998 u64 ctx_lo;
4999 int ret;
5000
5001 domain = get_valid_domain_for_dev(sdev->dev);
5002 if (!domain)
5003 return -EINVAL;
5004
5005 spin_lock_irqsave(&device_domain_lock, flags);
5006 spin_lock(&iommu->lock);
5007
5008 ret = -EINVAL;
5009 info = sdev->dev->archdata.iommu;
5010 if (!info || !info->pasid_supported)
5011 goto out;
5012
5013 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5014 if (WARN_ON(!context))
5015 goto out;
5016
5017 ctx_lo = context[0].lo;
5018
5019 sdev->did = domain->iommu_did[iommu->seq_id];
5020 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5021
5022 if (!(ctx_lo & CONTEXT_PASIDE)) {
5023 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5024 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
5025 wmb();
5026 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5027 * extended to permit requests-with-PASID if the PASIDE bit
5028 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5029 * however, the PASIDE bit is ignored and requests-with-PASID
5030 * are unconditionally blocked. Which makes less sense.
5031 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5032 * "guest mode" translation types depending on whether ATS
5033 * is available or not. Annoyingly, we can't use the new
5034 * modes *unless* PASIDE is set. */
5035 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5036 ctx_lo &= ~CONTEXT_TT_MASK;
5037 if (info->ats_supported)
5038 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5039 else
5040 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5041 }
5042 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5043 if (iommu->pasid_state_table)
5044 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5045 if (info->pri_supported)
5046 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5047 context[0].lo = ctx_lo;
5048 wmb();
5049 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5050 DMA_CCMD_MASK_NOBIT,
5051 DMA_CCMD_DEVICE_INVL);
5052 }
5053
5054 /* Enable PASID support in the device, if it wasn't already */
5055 if (!info->pasid_enabled)
5056 iommu_enable_dev_iotlb(info);
5057
5058 if (info->ats_enabled) {
5059 sdev->dev_iotlb = 1;
5060 sdev->qdep = info->ats_qdep;
5061 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5062 sdev->qdep = 0;
5063 }
5064 ret = 0;
5065
5066 out:
5067 spin_unlock(&iommu->lock);
5068 spin_unlock_irqrestore(&device_domain_lock, flags);
5069
5070 return ret;
5071}
5072
5073struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5074{
5075 struct intel_iommu *iommu;
5076 u8 bus, devfn;
5077
5078 if (iommu_dummy(dev)) {
5079 dev_warn(dev,
5080 "No IOMMU translation for device; cannot enable SVM\n");
5081 return NULL;
5082 }
5083
5084 iommu = device_to_iommu(dev, &bus, &devfn);
5085 if ((!iommu)) {
b9997e38 5086 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5087 return NULL;
5088 }
5089
5090 if (!iommu->pasid_table) {
b9997e38 5091 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
2f26e0a9
DW
5092 return NULL;
5093 }
5094
5095 return iommu;
5096}
5097#endif /* CONFIG_INTEL_IOMMU_SVM */
5098
b22f6434 5099static const struct iommu_ops intel_iommu_ops = {
5d587b8d 5100 .capable = intel_iommu_capable,
00a77deb
JR
5101 .domain_alloc = intel_iommu_domain_alloc,
5102 .domain_free = intel_iommu_domain_free,
a8bcbb0d
JR
5103 .attach_dev = intel_iommu_attach_device,
5104 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
5105 .map = intel_iommu_map,
5106 .unmap = intel_iommu_unmap,
315786eb 5107 .map_sg = default_iommu_map_sg,
a8bcbb0d 5108 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
5109 .add_device = intel_iommu_add_device,
5110 .remove_device = intel_iommu_remove_device,
a960fadb 5111 .device_group = pci_device_group,
6d1c56a9 5112 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5113};
9af88143 5114
9452618e
DV
5115static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5116{
5117 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5118 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
5119 dmar_map_gfx = 0;
5120}
5121
5122DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5123DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5124DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5125DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5126DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5127DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5128DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5129
d34d6517 5130static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5131{
5132 /*
5133 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5134 * but needs it. Same seems to hold for the desktop versions.
9af88143 5135 */
9f10e5bf 5136 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5137 rwbf_quirk = 1;
5138}
5139
5140DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
5141DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5142DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5143DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5144DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5145DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5146DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5147
eecfd57f
AJ
5148#define GGC 0x52
5149#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5150#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5151#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5152#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5153#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5154#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5155#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5156#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5157
d34d6517 5158static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5159{
5160 unsigned short ggc;
5161
eecfd57f 5162 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5163 return;
5164
eecfd57f 5165 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5166 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5167 dmar_map_gfx = 0;
6fbcfb3e
DW
5168 } else if (dmar_map_gfx) {
5169 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5170 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5171 intel_iommu_strict = 1;
5172 }
9eecabcb
DW
5173}
5174DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5175DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5176DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5177DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5178
e0fc7e0b
DW
5179/* On Tylersburg chipsets, some BIOSes have been known to enable the
5180 ISOCH DMAR unit for the Azalia sound device, but not give it any
5181 TLB entries, which causes it to deadlock. Check for that. We do
5182 this in a function called from init_dmars(), instead of in a PCI
5183 quirk, because we don't want to print the obnoxious "BIOS broken"
5184 message if VT-d is actually disabled.
5185*/
5186static void __init check_tylersburg_isoch(void)
5187{
5188 struct pci_dev *pdev;
5189 uint32_t vtisochctrl;
5190
5191 /* If there's no Azalia in the system anyway, forget it. */
5192 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5193 if (!pdev)
5194 return;
5195 pci_dev_put(pdev);
5196
5197 /* System Management Registers. Might be hidden, in which case
5198 we can't do the sanity check. But that's OK, because the
5199 known-broken BIOSes _don't_ actually hide it, so far. */
5200 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5201 if (!pdev)
5202 return;
5203
5204 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5205 pci_dev_put(pdev);
5206 return;
5207 }
5208
5209 pci_dev_put(pdev);
5210
5211 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5212 if (vtisochctrl & 1)
5213 return;
5214
5215 /* Drop all bits other than the number of TLB entries */
5216 vtisochctrl &= 0x1c;
5217
5218 /* If we have the recommended number of TLB entries (16), fine. */
5219 if (vtisochctrl == 0x10)
5220 return;
5221
5222 /* Zero TLB entries? You get to ride the short bus to school. */
5223 if (!vtisochctrl) {
5224 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5225 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5226 dmi_get_system_info(DMI_BIOS_VENDOR),
5227 dmi_get_system_info(DMI_BIOS_VERSION),
5228 dmi_get_system_info(DMI_PRODUCT_VERSION));
5229 iommu_identity_mapping |= IDENTMAP_AZALIA;
5230 return;
5231 }
9f10e5bf
JR
5232
5233 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5234 vtisochctrl);
5235}