iommu/vt-d: Introduce helper function dmar_walk_resources()
[linux-2.6-block.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2
JR
47#include "irq_remapping.h"
48
5b6985ce
FY
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
ba395927
KA
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
4ed0d3e6 62#define MAX_AGAW_WIDTH 64
5c645b35 63#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 64
2ebe3151
DW
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 73
f27be03b 74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 77
df08cdc7
AM
78/* page table handling */
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
6d1c56a9
OBC
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
df08cdc7
AM
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
5c645b35 107 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
108}
109
110static inline int width_to_agaw(int width)
111{
5c645b35 112 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
fd18de50 139
6dd9a7c7
YS
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
5c645b35 142 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
143}
144
dd4e8319
DW
145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146 are never going to work. */
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
d9630fe9
WH
165/* global iommu list, set NULL for ignored DMAR units */
166static struct intel_iommu **g_iommus;
167
e0fc7e0b 168static void __init check_tylersburg_isoch(void);
9af88143
DW
169static int rwbf_quirk;
170
b779260b
JC
171/*
172 * set to 1 to panic kernel if can't successfully enable VT-d
173 * (used when kernel is launched w/ TXT)
174 */
175static int force_on = 0;
176
46b08e1a
MM
177/*
178 * 0: Present
179 * 1-11: Reserved
180 * 12-63: Context Ptr (12 - (haw-1))
181 * 64-127: Reserved
182 */
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
1a2262f9 198 root->val &= ~VTD_PAGE_MASK;
46b08e1a
MM
199 root->val |= value & VTD_PAGE_MASK;
200}
201
202static inline struct context_entry *
203get_context_addr_from_root(struct root_entry *root)
204{
205 return (struct context_entry *)
206 (root_present(root)?phys_to_virt(
207 root->val & VTD_PAGE_MASK) :
208 NULL);
209}
210
7a8fc25e
MM
211/*
212 * low 64 bits:
213 * 0: present
214 * 1: fault processing disable
215 * 2-3: translation type
216 * 12-63: address space root
217 * high 64 bits:
218 * 0-2: address width
219 * 3-6: aval
220 * 8-23: domain id
221 */
222struct context_entry {
223 u64 lo;
224 u64 hi;
225};
c07e7d21
MM
226
227static inline bool context_present(struct context_entry *context)
228{
229 return (context->lo & 1);
230}
231static inline void context_set_present(struct context_entry *context)
232{
233 context->lo |= 1;
234}
235
236static inline void context_set_fault_enable(struct context_entry *context)
237{
238 context->lo &= (((u64)-1) << 2) | 1;
239}
240
c07e7d21
MM
241static inline void context_set_translation_type(struct context_entry *context,
242 unsigned long value)
243{
244 context->lo &= (((u64)-1) << 4) | 3;
245 context->lo |= (value & 3) << 2;
246}
247
248static inline void context_set_address_root(struct context_entry *context,
249 unsigned long value)
250{
1a2262f9 251 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
252 context->lo |= value & VTD_PAGE_MASK;
253}
254
255static inline void context_set_address_width(struct context_entry *context,
256 unsigned long value)
257{
258 context->hi |= value & 7;
259}
260
261static inline void context_set_domain_id(struct context_entry *context,
262 unsigned long value)
263{
264 context->hi |= (value & ((1 << 16) - 1)) << 8;
265}
266
267static inline void context_clear_entry(struct context_entry *context)
268{
269 context->lo = 0;
270 context->hi = 0;
271}
7a8fc25e 272
622ba12a
MM
273/*
274 * 0: readable
275 * 1: writable
276 * 2-6: reserved
277 * 7: super page
9cf06697
SY
278 * 8-10: available
279 * 11: snoop behavior
622ba12a
MM
280 * 12-63: Host physcial address
281 */
282struct dma_pte {
283 u64 val;
284};
622ba12a 285
19c239ce
MM
286static inline void dma_clear_pte(struct dma_pte *pte)
287{
288 pte->val = 0;
289}
290
19c239ce
MM
291static inline u64 dma_pte_addr(struct dma_pte *pte)
292{
c85994e4
DW
293#ifdef CONFIG_64BIT
294 return pte->val & VTD_PAGE_MASK;
295#else
296 /* Must have a full atomic 64-bit read */
1a8bd481 297 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 298#endif
19c239ce
MM
299}
300
19c239ce
MM
301static inline bool dma_pte_present(struct dma_pte *pte)
302{
303 return (pte->val & 3) != 0;
304}
622ba12a 305
4399c8bf
AK
306static inline bool dma_pte_superpage(struct dma_pte *pte)
307{
c3c75eb7 308 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
309}
310
75e6bf96
DW
311static inline int first_pte_in_page(struct dma_pte *pte)
312{
313 return !((unsigned long)pte & ~VTD_PAGE_MASK);
314}
315
2c2e2c38
FY
316/*
317 * This domain is a statically identity mapping domain.
318 * 1. This domain creats a static 1:1 mapping to all usable memory.
319 * 2. It maps to each iommu if successful.
320 * 3. Each iommu mapps to this domain if successful.
321 */
19943b0e
DW
322static struct dmar_domain *si_domain;
323static int hw_pass_through = 1;
2c2e2c38 324
1ce28feb
WH
325/* domain represents a virtual machine, more than one devices
326 * across iommus may be owned in one domain, e.g. kvm guest.
327 */
ab8dfe25 328#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 329
2c2e2c38 330/* si_domain contains mulitple devices */
ab8dfe25 331#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 332
1b198bb0
MT
333/* define the limit of IOMMUs supported in each domain */
334#ifdef CONFIG_X86
335# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
336#else
337# define IOMMU_UNITS_SUPPORTED 64
338#endif
339
99126f7c
MM
340struct dmar_domain {
341 int id; /* domain id */
4c923d47 342 int nid; /* node id */
1b198bb0
MT
343 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
344 /* bitmap of iommus this domain uses*/
99126f7c
MM
345
346 struct list_head devices; /* all devices' list */
347 struct iova_domain iovad; /* iova's that belong to this domain */
348
349 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
350 int gaw; /* max guest address width */
351
352 /* adjusted guest address width, 0 is level 2 30-bit */
353 int agaw;
354
3b5410e7 355 int flags; /* flags to find out type of domain */
8e604097
WH
356
357 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 358 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 359 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
360 int iommu_superpage;/* Level of superpages supported:
361 0 == 4KiB (no superpages), 1 == 2MiB,
362 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 363 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 364 u64 max_addr; /* maximum mapped address */
99126f7c
MM
365};
366
a647dacb
MM
367/* PCI domain-device relationship */
368struct device_domain_info {
369 struct list_head link; /* link to domain siblings */
370 struct list_head global; /* link to global list */
276dbf99 371 u8 bus; /* PCI bus number */
a647dacb 372 u8 devfn; /* PCI devfn number */
0bcb3e28 373 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 374 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
375 struct dmar_domain *domain; /* pointer to domain */
376};
377
b94e4117
JL
378struct dmar_rmrr_unit {
379 struct list_head list; /* list of rmrr units */
380 struct acpi_dmar_header *hdr; /* ACPI header */
381 u64 base_address; /* reserved base address*/
382 u64 end_address; /* reserved end address */
832bd858 383 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
384 int devices_cnt; /* target device count */
385};
386
387struct dmar_atsr_unit {
388 struct list_head list; /* list of ATSR units */
389 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 390 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
391 int devices_cnt; /* target device count */
392 u8 include_all:1; /* include all ports */
393};
394
395static LIST_HEAD(dmar_atsr_units);
396static LIST_HEAD(dmar_rmrr_units);
397
398#define for_each_rmrr_units(rmrr) \
399 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
400
5e0d2a6f 401static void flush_unmaps_timeout(unsigned long data);
402
b707cb02 403static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 404
80b20dd8 405#define HIGH_WATER_MARK 250
406struct deferred_flush_tables {
407 int next;
408 struct iova *iova[HIGH_WATER_MARK];
409 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 410 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 411};
412
413static struct deferred_flush_tables *deferred_flush;
414
5e0d2a6f 415/* bitmap for indexing intel_iommus */
5e0d2a6f 416static int g_num_of_iommus;
417
418static DEFINE_SPINLOCK(async_umap_flush_lock);
419static LIST_HEAD(unmaps_to_do);
420
421static int timer_on;
422static long list_size;
5e0d2a6f 423
92d03cc8 424static void domain_exit(struct dmar_domain *domain);
ba395927 425static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 426static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 427 struct device *dev);
92d03cc8 428static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 429 struct device *dev);
2a46ddf7
JL
430static int domain_detach_iommu(struct dmar_domain *domain,
431 struct intel_iommu *iommu);
ba395927 432
d3f13810 433#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
434int dmar_disabled = 0;
435#else
436int dmar_disabled = 1;
d3f13810 437#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 438
8bc1f85c
ED
439int intel_iommu_enabled = 0;
440EXPORT_SYMBOL_GPL(intel_iommu_enabled);
441
2d9e667e 442static int dmar_map_gfx = 1;
7d3b03ce 443static int dmar_forcedac;
5e0d2a6f 444static int intel_iommu_strict;
6dd9a7c7 445static int intel_iommu_superpage = 1;
ba395927 446
c0771df8
DW
447int intel_iommu_gfx_mapped;
448EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
449
ba395927
KA
450#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
451static DEFINE_SPINLOCK(device_domain_lock);
452static LIST_HEAD(device_domain_list);
453
b22f6434 454static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 455
ba395927
KA
456static int __init intel_iommu_setup(char *str)
457{
458 if (!str)
459 return -EINVAL;
460 while (*str) {
0cd5c3c8
KM
461 if (!strncmp(str, "on", 2)) {
462 dmar_disabled = 0;
463 printk(KERN_INFO "Intel-IOMMU: enabled\n");
464 } else if (!strncmp(str, "off", 3)) {
ba395927 465 dmar_disabled = 1;
0cd5c3c8 466 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
467 } else if (!strncmp(str, "igfx_off", 8)) {
468 dmar_map_gfx = 0;
469 printk(KERN_INFO
470 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 471 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 472 printk(KERN_INFO
7d3b03ce
KA
473 "Intel-IOMMU: Forcing DAC for PCI devices\n");
474 dmar_forcedac = 1;
5e0d2a6f 475 } else if (!strncmp(str, "strict", 6)) {
476 printk(KERN_INFO
477 "Intel-IOMMU: disable batched IOTLB flush\n");
478 intel_iommu_strict = 1;
6dd9a7c7
YS
479 } else if (!strncmp(str, "sp_off", 6)) {
480 printk(KERN_INFO
481 "Intel-IOMMU: disable supported super page\n");
482 intel_iommu_superpage = 0;
ba395927
KA
483 }
484
485 str += strcspn(str, ",");
486 while (*str == ',')
487 str++;
488 }
489 return 0;
490}
491__setup("intel_iommu=", intel_iommu_setup);
492
493static struct kmem_cache *iommu_domain_cache;
494static struct kmem_cache *iommu_devinfo_cache;
495static struct kmem_cache *iommu_iova_cache;
496
4c923d47 497static inline void *alloc_pgtable_page(int node)
eb3fa7cb 498{
4c923d47
SS
499 struct page *page;
500 void *vaddr = NULL;
eb3fa7cb 501
4c923d47
SS
502 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
503 if (page)
504 vaddr = page_address(page);
eb3fa7cb 505 return vaddr;
ba395927
KA
506}
507
508static inline void free_pgtable_page(void *vaddr)
509{
510 free_page((unsigned long)vaddr);
511}
512
513static inline void *alloc_domain_mem(void)
514{
354bb65e 515 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
516}
517
38717946 518static void free_domain_mem(void *vaddr)
ba395927
KA
519{
520 kmem_cache_free(iommu_domain_cache, vaddr);
521}
522
523static inline void * alloc_devinfo_mem(void)
524{
354bb65e 525 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
526}
527
528static inline void free_devinfo_mem(void *vaddr)
529{
530 kmem_cache_free(iommu_devinfo_cache, vaddr);
531}
532
533struct iova *alloc_iova_mem(void)
534{
354bb65e 535 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
536}
537
538void free_iova_mem(struct iova *iova)
539{
540 kmem_cache_free(iommu_iova_cache, iova);
541}
542
ab8dfe25
JL
543static inline int domain_type_is_vm(struct dmar_domain *domain)
544{
545 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
546}
547
548static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
549{
550 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
551 DOMAIN_FLAG_STATIC_IDENTITY);
552}
1b573683 553
162d1b10
JL
554static inline int domain_pfn_supported(struct dmar_domain *domain,
555 unsigned long pfn)
556{
557 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
558
559 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
560}
561
4ed0d3e6 562static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
563{
564 unsigned long sagaw;
565 int agaw = -1;
566
567 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 568 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
569 agaw >= 0; agaw--) {
570 if (test_bit(agaw, &sagaw))
571 break;
572 }
573
574 return agaw;
575}
576
4ed0d3e6
FY
577/*
578 * Calculate max SAGAW for each iommu.
579 */
580int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
581{
582 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
583}
584
585/*
586 * calculate agaw for each iommu.
587 * "SAGAW" may be different across iommus, use a default agaw, and
588 * get a supported less agaw for iommus that don't support the default agaw.
589 */
590int iommu_calculate_agaw(struct intel_iommu *iommu)
591{
592 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
593}
594
2c2e2c38 595/* This functionin only returns single iommu in a domain */
8c11e798
WH
596static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
597{
598 int iommu_id;
599
2c2e2c38 600 /* si_domain and vm domain should not get here. */
ab8dfe25 601 BUG_ON(domain_type_is_vm_or_si(domain));
1b198bb0 602 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
603 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
604 return NULL;
605
606 return g_iommus[iommu_id];
607}
608
8e604097
WH
609static void domain_update_iommu_coherency(struct dmar_domain *domain)
610{
d0501960
DW
611 struct dmar_drhd_unit *drhd;
612 struct intel_iommu *iommu;
613 int i, found = 0;
2e12bc29 614
d0501960 615 domain->iommu_coherency = 1;
8e604097 616
1b198bb0 617 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 618 found = 1;
8e604097
WH
619 if (!ecap_coherent(g_iommus[i]->ecap)) {
620 domain->iommu_coherency = 0;
621 break;
622 }
8e604097 623 }
d0501960
DW
624 if (found)
625 return;
626
627 /* No hardware attached; use lowest common denominator */
628 rcu_read_lock();
629 for_each_active_iommu(iommu, drhd) {
630 if (!ecap_coherent(iommu->ecap)) {
631 domain->iommu_coherency = 0;
632 break;
633 }
634 }
635 rcu_read_unlock();
8e604097
WH
636}
637
161f6934 638static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 639{
161f6934
JL
640 struct dmar_drhd_unit *drhd;
641 struct intel_iommu *iommu;
642 int ret = 1;
58c610bd 643
161f6934
JL
644 rcu_read_lock();
645 for_each_active_iommu(iommu, drhd) {
646 if (iommu != skip) {
647 if (!ecap_sc_support(iommu->ecap)) {
648 ret = 0;
649 break;
650 }
58c610bd 651 }
58c610bd 652 }
161f6934
JL
653 rcu_read_unlock();
654
655 return ret;
58c610bd
SY
656}
657
161f6934 658static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 659{
8140a95d 660 struct dmar_drhd_unit *drhd;
161f6934 661 struct intel_iommu *iommu;
8140a95d 662 int mask = 0xf;
6dd9a7c7
YS
663
664 if (!intel_iommu_superpage) {
161f6934 665 return 0;
6dd9a7c7
YS
666 }
667
8140a95d 668 /* set iommu_superpage to the smallest common denominator */
0e242612 669 rcu_read_lock();
8140a95d 670 for_each_active_iommu(iommu, drhd) {
161f6934
JL
671 if (iommu != skip) {
672 mask &= cap_super_page_val(iommu->cap);
673 if (!mask)
674 break;
6dd9a7c7
YS
675 }
676 }
0e242612
JL
677 rcu_read_unlock();
678
161f6934 679 return fls(mask);
6dd9a7c7
YS
680}
681
58c610bd
SY
682/* Some capabilities may be different across iommus */
683static void domain_update_iommu_cap(struct dmar_domain *domain)
684{
685 domain_update_iommu_coherency(domain);
161f6934
JL
686 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
687 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
688}
689
156baca8 690static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
691{
692 struct dmar_drhd_unit *drhd = NULL;
b683b230 693 struct intel_iommu *iommu;
156baca8
DW
694 struct device *tmp;
695 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 696 u16 segment = 0;
c7151a8d
WH
697 int i;
698
156baca8
DW
699 if (dev_is_pci(dev)) {
700 pdev = to_pci_dev(dev);
701 segment = pci_domain_nr(pdev->bus);
702 } else if (ACPI_COMPANION(dev))
703 dev = &ACPI_COMPANION(dev)->dev;
704
0e242612 705 rcu_read_lock();
b683b230 706 for_each_active_iommu(iommu, drhd) {
156baca8 707 if (pdev && segment != drhd->segment)
276dbf99 708 continue;
c7151a8d 709
b683b230 710 for_each_active_dev_scope(drhd->devices,
156baca8
DW
711 drhd->devices_cnt, i, tmp) {
712 if (tmp == dev) {
713 *bus = drhd->devices[i].bus;
714 *devfn = drhd->devices[i].devfn;
b683b230 715 goto out;
156baca8
DW
716 }
717
718 if (!pdev || !dev_is_pci(tmp))
719 continue;
720
721 ptmp = to_pci_dev(tmp);
722 if (ptmp->subordinate &&
723 ptmp->subordinate->number <= pdev->bus->number &&
724 ptmp->subordinate->busn_res.end >= pdev->bus->number)
725 goto got_pdev;
924b6231 726 }
c7151a8d 727
156baca8
DW
728 if (pdev && drhd->include_all) {
729 got_pdev:
730 *bus = pdev->bus->number;
731 *devfn = pdev->devfn;
b683b230 732 goto out;
156baca8 733 }
c7151a8d 734 }
b683b230 735 iommu = NULL;
156baca8 736 out:
0e242612 737 rcu_read_unlock();
c7151a8d 738
b683b230 739 return iommu;
c7151a8d
WH
740}
741
5331fe6f
WH
742static void domain_flush_cache(struct dmar_domain *domain,
743 void *addr, int size)
744{
745 if (!domain->iommu_coherency)
746 clflush_cache_range(addr, size);
747}
748
ba395927
KA
749/* Gets context entry for a given bus and devfn */
750static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
751 u8 bus, u8 devfn)
752{
753 struct root_entry *root;
754 struct context_entry *context;
755 unsigned long phy_addr;
756 unsigned long flags;
757
758 spin_lock_irqsave(&iommu->lock, flags);
759 root = &iommu->root_entry[bus];
760 context = get_context_addr_from_root(root);
761 if (!context) {
4c923d47
SS
762 context = (struct context_entry *)
763 alloc_pgtable_page(iommu->node);
ba395927
KA
764 if (!context) {
765 spin_unlock_irqrestore(&iommu->lock, flags);
766 return NULL;
767 }
5b6985ce 768 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
769 phy_addr = virt_to_phys((void *)context);
770 set_root_value(root, phy_addr);
771 set_root_present(root);
772 __iommu_flush_cache(iommu, root, sizeof(*root));
773 }
774 spin_unlock_irqrestore(&iommu->lock, flags);
775 return &context[devfn];
776}
777
778static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
779{
780 struct root_entry *root;
781 struct context_entry *context;
782 int ret;
783 unsigned long flags;
784
785 spin_lock_irqsave(&iommu->lock, flags);
786 root = &iommu->root_entry[bus];
787 context = get_context_addr_from_root(root);
788 if (!context) {
789 ret = 0;
790 goto out;
791 }
c07e7d21 792 ret = context_present(&context[devfn]);
ba395927
KA
793out:
794 spin_unlock_irqrestore(&iommu->lock, flags);
795 return ret;
796}
797
798static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
799{
800 struct root_entry *root;
801 struct context_entry *context;
802 unsigned long flags;
803
804 spin_lock_irqsave(&iommu->lock, flags);
805 root = &iommu->root_entry[bus];
806 context = get_context_addr_from_root(root);
807 if (context) {
c07e7d21 808 context_clear_entry(&context[devfn]);
ba395927
KA
809 __iommu_flush_cache(iommu, &context[devfn], \
810 sizeof(*context));
811 }
812 spin_unlock_irqrestore(&iommu->lock, flags);
813}
814
815static void free_context_table(struct intel_iommu *iommu)
816{
817 struct root_entry *root;
818 int i;
819 unsigned long flags;
820 struct context_entry *context;
821
822 spin_lock_irqsave(&iommu->lock, flags);
823 if (!iommu->root_entry) {
824 goto out;
825 }
826 for (i = 0; i < ROOT_ENTRY_NR; i++) {
827 root = &iommu->root_entry[i];
828 context = get_context_addr_from_root(root);
829 if (context)
830 free_pgtable_page(context);
831 }
832 free_pgtable_page(iommu->root_entry);
833 iommu->root_entry = NULL;
834out:
835 spin_unlock_irqrestore(&iommu->lock, flags);
836}
837
b026fd28 838static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 839 unsigned long pfn, int *target_level)
ba395927 840{
ba395927
KA
841 struct dma_pte *parent, *pte = NULL;
842 int level = agaw_to_level(domain->agaw);
4399c8bf 843 int offset;
ba395927
KA
844
845 BUG_ON(!domain->pgd);
f9423606 846
162d1b10 847 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
848 /* Address beyond IOMMU's addressing capabilities. */
849 return NULL;
850
ba395927
KA
851 parent = domain->pgd;
852
5cf0a76f 853 while (1) {
ba395927
KA
854 void *tmp_page;
855
b026fd28 856 offset = pfn_level_offset(pfn, level);
ba395927 857 pte = &parent[offset];
5cf0a76f 858 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 859 break;
5cf0a76f 860 if (level == *target_level)
ba395927
KA
861 break;
862
19c239ce 863 if (!dma_pte_present(pte)) {
c85994e4
DW
864 uint64_t pteval;
865
4c923d47 866 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 867
206a73c1 868 if (!tmp_page)
ba395927 869 return NULL;
206a73c1 870
c85994e4 871 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 872 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 873 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
874 /* Someone else set it while we were thinking; use theirs. */
875 free_pgtable_page(tmp_page);
effad4b5 876 else
c85994e4 877 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 878 }
5cf0a76f
DW
879 if (level == 1)
880 break;
881
19c239ce 882 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
883 level--;
884 }
885
5cf0a76f
DW
886 if (!*target_level)
887 *target_level = level;
888
ba395927
KA
889 return pte;
890}
891
6dd9a7c7 892
ba395927 893/* return address's pte at specific level */
90dcfb5e
DW
894static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
895 unsigned long pfn,
6dd9a7c7 896 int level, int *large_page)
ba395927
KA
897{
898 struct dma_pte *parent, *pte = NULL;
899 int total = agaw_to_level(domain->agaw);
900 int offset;
901
902 parent = domain->pgd;
903 while (level <= total) {
90dcfb5e 904 offset = pfn_level_offset(pfn, total);
ba395927
KA
905 pte = &parent[offset];
906 if (level == total)
907 return pte;
908
6dd9a7c7
YS
909 if (!dma_pte_present(pte)) {
910 *large_page = total;
ba395927 911 break;
6dd9a7c7
YS
912 }
913
e16922af 914 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
915 *large_page = total;
916 return pte;
917 }
918
19c239ce 919 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
920 total--;
921 }
922 return NULL;
923}
924
ba395927 925/* clear last level pte, a tlb flush should be followed */
5cf0a76f 926static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
927 unsigned long start_pfn,
928 unsigned long last_pfn)
ba395927 929{
6dd9a7c7 930 unsigned int large_page = 1;
310a5ab9 931 struct dma_pte *first_pte, *pte;
66eae846 932
162d1b10
JL
933 BUG_ON(!domain_pfn_supported(domain, start_pfn));
934 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 935 BUG_ON(start_pfn > last_pfn);
ba395927 936
04b18e65 937 /* we don't need lock here; nobody else touches the iova range */
59c36286 938 do {
6dd9a7c7
YS
939 large_page = 1;
940 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 941 if (!pte) {
6dd9a7c7 942 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
943 continue;
944 }
6dd9a7c7 945 do {
310a5ab9 946 dma_clear_pte(pte);
6dd9a7c7 947 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 948 pte++;
75e6bf96
DW
949 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
950
310a5ab9
DW
951 domain_flush_cache(domain, first_pte,
952 (void *)pte - (void *)first_pte);
59c36286
DW
953
954 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
955}
956
3269ee0b
AW
957static void dma_pte_free_level(struct dmar_domain *domain, int level,
958 struct dma_pte *pte, unsigned long pfn,
959 unsigned long start_pfn, unsigned long last_pfn)
960{
961 pfn = max(start_pfn, pfn);
962 pte = &pte[pfn_level_offset(pfn, level)];
963
964 do {
965 unsigned long level_pfn;
966 struct dma_pte *level_pte;
967
968 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
969 goto next;
970
971 level_pfn = pfn & level_mask(level - 1);
972 level_pte = phys_to_virt(dma_pte_addr(pte));
973
974 if (level > 2)
975 dma_pte_free_level(domain, level - 1, level_pte,
976 level_pfn, start_pfn, last_pfn);
977
978 /* If range covers entire pagetable, free it */
979 if (!(start_pfn > level_pfn ||
08336fd2 980 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
981 dma_clear_pte(pte);
982 domain_flush_cache(domain, pte, sizeof(*pte));
983 free_pgtable_page(level_pte);
984 }
985next:
986 pfn += level_size(level);
987 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
988}
989
ba395927
KA
990/* free page table pages. last level pte should already be cleared */
991static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
992 unsigned long start_pfn,
993 unsigned long last_pfn)
ba395927 994{
162d1b10
JL
995 BUG_ON(!domain_pfn_supported(domain, start_pfn));
996 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 997 BUG_ON(start_pfn > last_pfn);
ba395927 998
d41a4adb
JL
999 dma_pte_clear_range(domain, start_pfn, last_pfn);
1000
f3a0a52f 1001 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1002 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1003 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1004
ba395927 1005 /* free pgd */
d794dc9b 1006 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1007 free_pgtable_page(domain->pgd);
1008 domain->pgd = NULL;
1009 }
1010}
1011
ea8ea460
DW
1012/* When a page at a given level is being unlinked from its parent, we don't
1013 need to *modify* it at all. All we need to do is make a list of all the
1014 pages which can be freed just as soon as we've flushed the IOTLB and we
1015 know the hardware page-walk will no longer touch them.
1016 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1017 be freed. */
1018static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1019 int level, struct dma_pte *pte,
1020 struct page *freelist)
1021{
1022 struct page *pg;
1023
1024 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1025 pg->freelist = freelist;
1026 freelist = pg;
1027
1028 if (level == 1)
1029 return freelist;
1030
adeb2590
JL
1031 pte = page_address(pg);
1032 do {
ea8ea460
DW
1033 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1034 freelist = dma_pte_list_pagetables(domain, level - 1,
1035 pte, freelist);
adeb2590
JL
1036 pte++;
1037 } while (!first_pte_in_page(pte));
ea8ea460
DW
1038
1039 return freelist;
1040}
1041
1042static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1043 struct dma_pte *pte, unsigned long pfn,
1044 unsigned long start_pfn,
1045 unsigned long last_pfn,
1046 struct page *freelist)
1047{
1048 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1049
1050 pfn = max(start_pfn, pfn);
1051 pte = &pte[pfn_level_offset(pfn, level)];
1052
1053 do {
1054 unsigned long level_pfn;
1055
1056 if (!dma_pte_present(pte))
1057 goto next;
1058
1059 level_pfn = pfn & level_mask(level);
1060
1061 /* If range covers entire pagetable, free it */
1062 if (start_pfn <= level_pfn &&
1063 last_pfn >= level_pfn + level_size(level) - 1) {
1064 /* These suborbinate page tables are going away entirely. Don't
1065 bother to clear them; we're just going to *free* them. */
1066 if (level > 1 && !dma_pte_superpage(pte))
1067 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1068
1069 dma_clear_pte(pte);
1070 if (!first_pte)
1071 first_pte = pte;
1072 last_pte = pte;
1073 } else if (level > 1) {
1074 /* Recurse down into a level that isn't *entirely* obsolete */
1075 freelist = dma_pte_clear_level(domain, level - 1,
1076 phys_to_virt(dma_pte_addr(pte)),
1077 level_pfn, start_pfn, last_pfn,
1078 freelist);
1079 }
1080next:
1081 pfn += level_size(level);
1082 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1083
1084 if (first_pte)
1085 domain_flush_cache(domain, first_pte,
1086 (void *)++last_pte - (void *)first_pte);
1087
1088 return freelist;
1089}
1090
1091/* We can't just free the pages because the IOMMU may still be walking
1092 the page tables, and may have cached the intermediate levels. The
1093 pages can only be freed after the IOTLB flush has been done. */
1094struct page *domain_unmap(struct dmar_domain *domain,
1095 unsigned long start_pfn,
1096 unsigned long last_pfn)
1097{
ea8ea460
DW
1098 struct page *freelist = NULL;
1099
162d1b10
JL
1100 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1101 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1102 BUG_ON(start_pfn > last_pfn);
1103
1104 /* we don't need lock here; nobody else touches the iova range */
1105 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1106 domain->pgd, 0, start_pfn, last_pfn, NULL);
1107
1108 /* free pgd */
1109 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1110 struct page *pgd_page = virt_to_page(domain->pgd);
1111 pgd_page->freelist = freelist;
1112 freelist = pgd_page;
1113
1114 domain->pgd = NULL;
1115 }
1116
1117 return freelist;
1118}
1119
1120void dma_free_pagelist(struct page *freelist)
1121{
1122 struct page *pg;
1123
1124 while ((pg = freelist)) {
1125 freelist = pg->freelist;
1126 free_pgtable_page(page_address(pg));
1127 }
1128}
1129
ba395927
KA
1130/* iommu handling */
1131static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1132{
1133 struct root_entry *root;
1134 unsigned long flags;
1135
4c923d47 1136 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1137 if (!root)
1138 return -ENOMEM;
1139
5b6985ce 1140 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1141
1142 spin_lock_irqsave(&iommu->lock, flags);
1143 iommu->root_entry = root;
1144 spin_unlock_irqrestore(&iommu->lock, flags);
1145
1146 return 0;
1147}
1148
ba395927
KA
1149static void iommu_set_root_entry(struct intel_iommu *iommu)
1150{
1151 void *addr;
c416daa9 1152 u32 sts;
ba395927
KA
1153 unsigned long flag;
1154
1155 addr = iommu->root_entry;
1156
1f5b3c3f 1157 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1158 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1159
c416daa9 1160 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1161
1162 /* Make sure hardware complete it */
1163 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1164 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1165
1f5b3c3f 1166 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1167}
1168
1169static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1170{
1171 u32 val;
1172 unsigned long flag;
1173
9af88143 1174 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1175 return;
ba395927 1176
1f5b3c3f 1177 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1178 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1179
1180 /* Make sure hardware complete it */
1181 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1182 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1183
1f5b3c3f 1184 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1185}
1186
1187/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1188static void __iommu_flush_context(struct intel_iommu *iommu,
1189 u16 did, u16 source_id, u8 function_mask,
1190 u64 type)
ba395927
KA
1191{
1192 u64 val = 0;
1193 unsigned long flag;
1194
ba395927
KA
1195 switch (type) {
1196 case DMA_CCMD_GLOBAL_INVL:
1197 val = DMA_CCMD_GLOBAL_INVL;
1198 break;
1199 case DMA_CCMD_DOMAIN_INVL:
1200 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1201 break;
1202 case DMA_CCMD_DEVICE_INVL:
1203 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1204 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1205 break;
1206 default:
1207 BUG();
1208 }
1209 val |= DMA_CCMD_ICC;
1210
1f5b3c3f 1211 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1212 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1213
1214 /* Make sure hardware complete it */
1215 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1216 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1217
1f5b3c3f 1218 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1219}
1220
ba395927 1221/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1222static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1223 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1224{
1225 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1226 u64 val = 0, val_iva = 0;
1227 unsigned long flag;
1228
ba395927
KA
1229 switch (type) {
1230 case DMA_TLB_GLOBAL_FLUSH:
1231 /* global flush doesn't need set IVA_REG */
1232 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1233 break;
1234 case DMA_TLB_DSI_FLUSH:
1235 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1236 break;
1237 case DMA_TLB_PSI_FLUSH:
1238 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1239 /* IH bit is passed in as part of address */
ba395927
KA
1240 val_iva = size_order | addr;
1241 break;
1242 default:
1243 BUG();
1244 }
1245 /* Note: set drain read/write */
1246#if 0
1247 /*
1248 * This is probably to be super secure.. Looks like we can
1249 * ignore it without any impact.
1250 */
1251 if (cap_read_drain(iommu->cap))
1252 val |= DMA_TLB_READ_DRAIN;
1253#endif
1254 if (cap_write_drain(iommu->cap))
1255 val |= DMA_TLB_WRITE_DRAIN;
1256
1f5b3c3f 1257 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1258 /* Note: Only uses first TLB reg currently */
1259 if (val_iva)
1260 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1261 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1262
1263 /* Make sure hardware complete it */
1264 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1265 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1266
1f5b3c3f 1267 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1268
1269 /* check IOTLB invalidation granularity */
1270 if (DMA_TLB_IAIG(val) == 0)
1271 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1272 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1273 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1274 (unsigned long long)DMA_TLB_IIRG(type),
1275 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1276}
1277
64ae892b
DW
1278static struct device_domain_info *
1279iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1280 u8 bus, u8 devfn)
93a23a72
YZ
1281{
1282 int found = 0;
1283 unsigned long flags;
1284 struct device_domain_info *info;
0bcb3e28 1285 struct pci_dev *pdev;
93a23a72
YZ
1286
1287 if (!ecap_dev_iotlb_support(iommu->ecap))
1288 return NULL;
1289
1290 if (!iommu->qi)
1291 return NULL;
1292
1293 spin_lock_irqsave(&device_domain_lock, flags);
1294 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1295 if (info->iommu == iommu && info->bus == bus &&
1296 info->devfn == devfn) {
93a23a72
YZ
1297 found = 1;
1298 break;
1299 }
1300 spin_unlock_irqrestore(&device_domain_lock, flags);
1301
0bcb3e28 1302 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1303 return NULL;
1304
0bcb3e28
DW
1305 pdev = to_pci_dev(info->dev);
1306
1307 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1308 return NULL;
1309
0bcb3e28 1310 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1311 return NULL;
1312
93a23a72
YZ
1313 return info;
1314}
1315
1316static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1317{
0bcb3e28 1318 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1319 return;
1320
0bcb3e28 1321 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1322}
1323
1324static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1325{
0bcb3e28
DW
1326 if (!info->dev || !dev_is_pci(info->dev) ||
1327 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1328 return;
1329
0bcb3e28 1330 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1331}
1332
1333static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1334 u64 addr, unsigned mask)
1335{
1336 u16 sid, qdep;
1337 unsigned long flags;
1338 struct device_domain_info *info;
1339
1340 spin_lock_irqsave(&device_domain_lock, flags);
1341 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1342 struct pci_dev *pdev;
1343 if (!info->dev || !dev_is_pci(info->dev))
1344 continue;
1345
1346 pdev = to_pci_dev(info->dev);
1347 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1348 continue;
1349
1350 sid = info->bus << 8 | info->devfn;
0bcb3e28 1351 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1352 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1353 }
1354 spin_unlock_irqrestore(&device_domain_lock, flags);
1355}
1356
1f0ef2aa 1357static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1358 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1359{
9dd2fe89 1360 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1361 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1362
ba395927
KA
1363 BUG_ON(pages == 0);
1364
ea8ea460
DW
1365 if (ih)
1366 ih = 1 << 6;
ba395927 1367 /*
9dd2fe89
YZ
1368 * Fallback to domain selective flush if no PSI support or the size is
1369 * too big.
ba395927
KA
1370 * PSI requires page size to be 2 ^ x, and the base address is naturally
1371 * aligned to the size
1372 */
9dd2fe89
YZ
1373 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1374 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1375 DMA_TLB_DSI_FLUSH);
9dd2fe89 1376 else
ea8ea460 1377 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1378 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1379
1380 /*
82653633
NA
1381 * In caching mode, changes of pages from non-present to present require
1382 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1383 */
82653633 1384 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1385 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1386}
1387
f8bab735 1388static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1389{
1390 u32 pmen;
1391 unsigned long flags;
1392
1f5b3c3f 1393 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1394 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1395 pmen &= ~DMA_PMEN_EPM;
1396 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1397
1398 /* wait for the protected region status bit to clear */
1399 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1400 readl, !(pmen & DMA_PMEN_PRS), pmen);
1401
1f5b3c3f 1402 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1403}
1404
2a41ccee 1405static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1406{
1407 u32 sts;
1408 unsigned long flags;
1409
1f5b3c3f 1410 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1411 iommu->gcmd |= DMA_GCMD_TE;
1412 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1413
1414 /* Make sure hardware complete it */
1415 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1416 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1417
1f5b3c3f 1418 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1419}
1420
2a41ccee 1421static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1422{
1423 u32 sts;
1424 unsigned long flag;
1425
1f5b3c3f 1426 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1427 iommu->gcmd &= ~DMA_GCMD_TE;
1428 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1429
1430 /* Make sure hardware complete it */
1431 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1432 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1433
1f5b3c3f 1434 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1435}
1436
3460a6d9 1437
ba395927
KA
1438static int iommu_init_domains(struct intel_iommu *iommu)
1439{
1440 unsigned long ndomains;
1441 unsigned long nlongs;
1442
1443 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1444 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1445 iommu->seq_id, ndomains);
ba395927
KA
1446 nlongs = BITS_TO_LONGS(ndomains);
1447
94a91b50
DD
1448 spin_lock_init(&iommu->lock);
1449
ba395927
KA
1450 /* TBD: there might be 64K domains,
1451 * consider other allocation for future chip
1452 */
1453 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1454 if (!iommu->domain_ids) {
852bdb04
JL
1455 pr_err("IOMMU%d: allocating domain id array failed\n",
1456 iommu->seq_id);
ba395927
KA
1457 return -ENOMEM;
1458 }
1459 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1460 GFP_KERNEL);
1461 if (!iommu->domains) {
852bdb04
JL
1462 pr_err("IOMMU%d: allocating domain array failed\n",
1463 iommu->seq_id);
1464 kfree(iommu->domain_ids);
1465 iommu->domain_ids = NULL;
ba395927
KA
1466 return -ENOMEM;
1467 }
1468
1469 /*
1470 * if Caching mode is set, then invalid translations are tagged
1471 * with domainid 0. Hence we need to pre-allocate it.
1472 */
1473 if (cap_caching_mode(iommu->cap))
1474 set_bit(0, iommu->domain_ids);
1475 return 0;
1476}
ba395927 1477
a868e6b7 1478static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1479{
1480 struct dmar_domain *domain;
2a46ddf7 1481 int i;
ba395927 1482
94a91b50 1483 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1484 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1485 /*
1486 * Domain id 0 is reserved for invalid translation
1487 * if hardware supports caching mode.
1488 */
1489 if (cap_caching_mode(iommu->cap) && i == 0)
1490 continue;
1491
94a91b50
DD
1492 domain = iommu->domains[i];
1493 clear_bit(i, iommu->domain_ids);
129ad281
JL
1494 if (domain_detach_iommu(domain, iommu) == 0 &&
1495 !domain_type_is_vm(domain))
92d03cc8 1496 domain_exit(domain);
5e98c4b1 1497 }
ba395927
KA
1498 }
1499
1500 if (iommu->gcmd & DMA_GCMD_TE)
1501 iommu_disable_translation(iommu);
1502
ba395927
KA
1503 kfree(iommu->domains);
1504 kfree(iommu->domain_ids);
a868e6b7
JL
1505 iommu->domains = NULL;
1506 iommu->domain_ids = NULL;
ba395927 1507
d9630fe9
WH
1508 g_iommus[iommu->seq_id] = NULL;
1509
ba395927
KA
1510 /* free context mapping */
1511 free_context_table(iommu);
ba395927
KA
1512}
1513
ab8dfe25 1514static struct dmar_domain *alloc_domain(int flags)
ba395927 1515{
92d03cc8
JL
1516 /* domain id for virtual machine, it won't be set in context */
1517 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1518 struct dmar_domain *domain;
ba395927
KA
1519
1520 domain = alloc_domain_mem();
1521 if (!domain)
1522 return NULL;
1523
ab8dfe25 1524 memset(domain, 0, sizeof(*domain));
4c923d47 1525 domain->nid = -1;
ab8dfe25 1526 domain->flags = flags;
92d03cc8
JL
1527 spin_lock_init(&domain->iommu_lock);
1528 INIT_LIST_HEAD(&domain->devices);
ab8dfe25 1529 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
92d03cc8 1530 domain->id = atomic_inc_return(&vm_domid);
2c2e2c38
FY
1531
1532 return domain;
1533}
1534
fb170fb4
JL
1535static int __iommu_attach_domain(struct dmar_domain *domain,
1536 struct intel_iommu *iommu)
2c2e2c38
FY
1537{
1538 int num;
1539 unsigned long ndomains;
2c2e2c38 1540
ba395927 1541 ndomains = cap_ndoms(iommu->cap);
ba395927 1542 num = find_first_zero_bit(iommu->domain_ids, ndomains);
fb170fb4
JL
1543 if (num < ndomains) {
1544 set_bit(num, iommu->domain_ids);
1545 iommu->domains[num] = domain;
1546 } else {
1547 num = -ENOSPC;
ba395927
KA
1548 }
1549
fb170fb4
JL
1550 return num;
1551}
1552
1553static int iommu_attach_domain(struct dmar_domain *domain,
1554 struct intel_iommu *iommu)
1555{
1556 int num;
1557 unsigned long flags;
1558
1559 spin_lock_irqsave(&iommu->lock, flags);
1560 num = __iommu_attach_domain(domain, iommu);
44bde614 1561 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4
JL
1562 if (num < 0)
1563 pr_err("IOMMU: no free domain ids\n");
ba395927 1564
fb170fb4 1565 return num;
ba395927
KA
1566}
1567
44bde614
JL
1568static int iommu_attach_vm_domain(struct dmar_domain *domain,
1569 struct intel_iommu *iommu)
1570{
1571 int num;
1572 unsigned long ndomains;
1573
1574 ndomains = cap_ndoms(iommu->cap);
1575 for_each_set_bit(num, iommu->domain_ids, ndomains)
1576 if (iommu->domains[num] == domain)
1577 return num;
1578
1579 return __iommu_attach_domain(domain, iommu);
1580}
1581
2c2e2c38
FY
1582static void iommu_detach_domain(struct dmar_domain *domain,
1583 struct intel_iommu *iommu)
ba395927
KA
1584{
1585 unsigned long flags;
2c2e2c38 1586 int num, ndomains;
ba395927 1587
8c11e798 1588 spin_lock_irqsave(&iommu->lock, flags);
fb170fb4
JL
1589 if (domain_type_is_vm_or_si(domain)) {
1590 ndomains = cap_ndoms(iommu->cap);
1591 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1592 if (iommu->domains[num] == domain) {
1593 clear_bit(num, iommu->domain_ids);
1594 iommu->domains[num] = NULL;
1595 break;
1596 }
2c2e2c38 1597 }
fb170fb4
JL
1598 } else {
1599 clear_bit(domain->id, iommu->domain_ids);
1600 iommu->domains[domain->id] = NULL;
2c2e2c38 1601 }
8c11e798 1602 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1603}
1604
fb170fb4
JL
1605static void domain_attach_iommu(struct dmar_domain *domain,
1606 struct intel_iommu *iommu)
1607{
1608 unsigned long flags;
1609
1610 spin_lock_irqsave(&domain->iommu_lock, flags);
1611 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1612 domain->iommu_count++;
1613 if (domain->iommu_count == 1)
1614 domain->nid = iommu->node;
1615 domain_update_iommu_cap(domain);
1616 }
1617 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1618}
1619
1620static int domain_detach_iommu(struct dmar_domain *domain,
1621 struct intel_iommu *iommu)
1622{
1623 unsigned long flags;
1624 int count = INT_MAX;
1625
1626 spin_lock_irqsave(&domain->iommu_lock, flags);
1627 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1628 count = --domain->iommu_count;
1629 domain_update_iommu_cap(domain);
1630 }
1631 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1632
1633 return count;
1634}
1635
ba395927 1636static struct iova_domain reserved_iova_list;
8a443df4 1637static struct lock_class_key reserved_rbtree_key;
ba395927 1638
51a63e67 1639static int dmar_init_reserved_ranges(void)
ba395927
KA
1640{
1641 struct pci_dev *pdev = NULL;
1642 struct iova *iova;
1643 int i;
ba395927 1644
f661197e 1645 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1646
8a443df4
MG
1647 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1648 &reserved_rbtree_key);
1649
ba395927
KA
1650 /* IOAPIC ranges shouldn't be accessed by DMA */
1651 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1652 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1653 if (!iova) {
ba395927 1654 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1655 return -ENODEV;
1656 }
ba395927
KA
1657
1658 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1659 for_each_pci_dev(pdev) {
1660 struct resource *r;
1661
1662 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1663 r = &pdev->resource[i];
1664 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1665 continue;
1a4a4551
DW
1666 iova = reserve_iova(&reserved_iova_list,
1667 IOVA_PFN(r->start),
1668 IOVA_PFN(r->end));
51a63e67 1669 if (!iova) {
ba395927 1670 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1671 return -ENODEV;
1672 }
ba395927
KA
1673 }
1674 }
51a63e67 1675 return 0;
ba395927
KA
1676}
1677
1678static void domain_reserve_special_ranges(struct dmar_domain *domain)
1679{
1680 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1681}
1682
1683static inline int guestwidth_to_adjustwidth(int gaw)
1684{
1685 int agaw;
1686 int r = (gaw - 12) % 9;
1687
1688 if (r == 0)
1689 agaw = gaw;
1690 else
1691 agaw = gaw + 9 - r;
1692 if (agaw > 64)
1693 agaw = 64;
1694 return agaw;
1695}
1696
1697static int domain_init(struct dmar_domain *domain, int guest_width)
1698{
1699 struct intel_iommu *iommu;
1700 int adjust_width, agaw;
1701 unsigned long sagaw;
1702
f661197e 1703 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1704 domain_reserve_special_ranges(domain);
1705
1706 /* calculate AGAW */
8c11e798 1707 iommu = domain_get_iommu(domain);
ba395927
KA
1708 if (guest_width > cap_mgaw(iommu->cap))
1709 guest_width = cap_mgaw(iommu->cap);
1710 domain->gaw = guest_width;
1711 adjust_width = guestwidth_to_adjustwidth(guest_width);
1712 agaw = width_to_agaw(adjust_width);
1713 sagaw = cap_sagaw(iommu->cap);
1714 if (!test_bit(agaw, &sagaw)) {
1715 /* hardware doesn't support it, choose a bigger one */
1716 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1717 agaw = find_next_bit(&sagaw, 5, agaw);
1718 if (agaw >= 5)
1719 return -ENODEV;
1720 }
1721 domain->agaw = agaw;
ba395927 1722
8e604097
WH
1723 if (ecap_coherent(iommu->ecap))
1724 domain->iommu_coherency = 1;
1725 else
1726 domain->iommu_coherency = 0;
1727
58c610bd
SY
1728 if (ecap_sc_support(iommu->ecap))
1729 domain->iommu_snooping = 1;
1730 else
1731 domain->iommu_snooping = 0;
1732
214e39aa
DW
1733 if (intel_iommu_superpage)
1734 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1735 else
1736 domain->iommu_superpage = 0;
1737
4c923d47 1738 domain->nid = iommu->node;
c7151a8d 1739
ba395927 1740 /* always allocate the top pgd */
4c923d47 1741 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1742 if (!domain->pgd)
1743 return -ENOMEM;
5b6985ce 1744 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1745 return 0;
1746}
1747
1748static void domain_exit(struct dmar_domain *domain)
1749{
2c2e2c38
FY
1750 struct dmar_drhd_unit *drhd;
1751 struct intel_iommu *iommu;
ea8ea460 1752 struct page *freelist = NULL;
ba395927
KA
1753
1754 /* Domain 0 is reserved, so dont process it */
1755 if (!domain)
1756 return;
1757
7b668357
AW
1758 /* Flush any lazy unmaps that may reference this domain */
1759 if (!intel_iommu_strict)
1760 flush_unmaps_timeout(0);
1761
92d03cc8 1762 /* remove associated devices */
ba395927 1763 domain_remove_dev_info(domain);
92d03cc8 1764
ba395927
KA
1765 /* destroy iovas */
1766 put_iova_domain(&domain->iovad);
ba395927 1767
ea8ea460 1768 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1769
92d03cc8 1770 /* clear attached or cached domains */
0e242612 1771 rcu_read_lock();
2c2e2c38 1772 for_each_active_iommu(iommu, drhd)
fb170fb4 1773 iommu_detach_domain(domain, iommu);
0e242612 1774 rcu_read_unlock();
2c2e2c38 1775
ea8ea460
DW
1776 dma_free_pagelist(freelist);
1777
ba395927
KA
1778 free_domain_mem(domain);
1779}
1780
64ae892b
DW
1781static int domain_context_mapping_one(struct dmar_domain *domain,
1782 struct intel_iommu *iommu,
1783 u8 bus, u8 devfn, int translation)
ba395927
KA
1784{
1785 struct context_entry *context;
ba395927 1786 unsigned long flags;
ea6606b0 1787 struct dma_pte *pgd;
ea6606b0
WH
1788 int id;
1789 int agaw;
93a23a72 1790 struct device_domain_info *info = NULL;
ba395927
KA
1791
1792 pr_debug("Set context mapping for %02x:%02x.%d\n",
1793 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1794
ba395927 1795 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1796 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1797 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1798
ba395927
KA
1799 context = device_to_context_entry(iommu, bus, devfn);
1800 if (!context)
1801 return -ENOMEM;
1802 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1803 if (context_present(context)) {
ba395927
KA
1804 spin_unlock_irqrestore(&iommu->lock, flags);
1805 return 0;
1806 }
1807
ea6606b0
WH
1808 id = domain->id;
1809 pgd = domain->pgd;
1810
ab8dfe25 1811 if (domain_type_is_vm_or_si(domain)) {
44bde614
JL
1812 if (domain_type_is_vm(domain)) {
1813 id = iommu_attach_vm_domain(domain, iommu);
fb170fb4 1814 if (id < 0) {
ea6606b0 1815 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4 1816 pr_err("IOMMU: no free domain ids\n");
ea6606b0
WH
1817 return -EFAULT;
1818 }
ea6606b0
WH
1819 }
1820
1821 /* Skip top levels of page tables for
1822 * iommu which has less agaw than default.
1672af11 1823 * Unnecessary for PT mode.
ea6606b0 1824 */
1672af11
CW
1825 if (translation != CONTEXT_TT_PASS_THROUGH) {
1826 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1827 pgd = phys_to_virt(dma_pte_addr(pgd));
1828 if (!dma_pte_present(pgd)) {
1829 spin_unlock_irqrestore(&iommu->lock, flags);
1830 return -ENOMEM;
1831 }
ea6606b0
WH
1832 }
1833 }
1834 }
1835
1836 context_set_domain_id(context, id);
4ed0d3e6 1837
93a23a72 1838 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1839 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1840 translation = info ? CONTEXT_TT_DEV_IOTLB :
1841 CONTEXT_TT_MULTI_LEVEL;
1842 }
4ed0d3e6
FY
1843 /*
1844 * In pass through mode, AW must be programmed to indicate the largest
1845 * AGAW value supported by hardware. And ASR is ignored by hardware.
1846 */
93a23a72 1847 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1848 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1849 else {
1850 context_set_address_root(context, virt_to_phys(pgd));
1851 context_set_address_width(context, iommu->agaw);
1852 }
4ed0d3e6
FY
1853
1854 context_set_translation_type(context, translation);
c07e7d21
MM
1855 context_set_fault_enable(context);
1856 context_set_present(context);
5331fe6f 1857 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1858
4c25a2c1
DW
1859 /*
1860 * It's a non-present to present mapping. If hardware doesn't cache
1861 * non-present entry we only need to flush the write-buffer. If the
1862 * _does_ cache non-present entries, then it does so in the special
1863 * domain #0, which we have to flush:
1864 */
1865 if (cap_caching_mode(iommu->cap)) {
1866 iommu->flush.flush_context(iommu, 0,
1867 (((u16)bus) << 8) | devfn,
1868 DMA_CCMD_MASK_NOBIT,
1869 DMA_CCMD_DEVICE_INVL);
18fd779a 1870 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1871 } else {
ba395927 1872 iommu_flush_write_buffer(iommu);
4c25a2c1 1873 }
93a23a72 1874 iommu_enable_dev_iotlb(info);
ba395927 1875 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 1876
fb170fb4
JL
1877 domain_attach_iommu(domain, iommu);
1878
ba395927
KA
1879 return 0;
1880}
1881
579305f7
AW
1882struct domain_context_mapping_data {
1883 struct dmar_domain *domain;
1884 struct intel_iommu *iommu;
1885 int translation;
1886};
1887
1888static int domain_context_mapping_cb(struct pci_dev *pdev,
1889 u16 alias, void *opaque)
1890{
1891 struct domain_context_mapping_data *data = opaque;
1892
1893 return domain_context_mapping_one(data->domain, data->iommu,
1894 PCI_BUS_NUM(alias), alias & 0xff,
1895 data->translation);
1896}
1897
ba395927 1898static int
e1f167f3
DW
1899domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1900 int translation)
ba395927 1901{
64ae892b 1902 struct intel_iommu *iommu;
156baca8 1903 u8 bus, devfn;
579305f7 1904 struct domain_context_mapping_data data;
64ae892b 1905
e1f167f3 1906 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1907 if (!iommu)
1908 return -ENODEV;
ba395927 1909
579305f7
AW
1910 if (!dev_is_pci(dev))
1911 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1912 translation);
579305f7
AW
1913
1914 data.domain = domain;
1915 data.iommu = iommu;
1916 data.translation = translation;
1917
1918 return pci_for_each_dma_alias(to_pci_dev(dev),
1919 &domain_context_mapping_cb, &data);
1920}
1921
1922static int domain_context_mapped_cb(struct pci_dev *pdev,
1923 u16 alias, void *opaque)
1924{
1925 struct intel_iommu *iommu = opaque;
1926
1927 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
1928}
1929
e1f167f3 1930static int domain_context_mapped(struct device *dev)
ba395927 1931{
5331fe6f 1932 struct intel_iommu *iommu;
156baca8 1933 u8 bus, devfn;
5331fe6f 1934
e1f167f3 1935 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1936 if (!iommu)
1937 return -ENODEV;
ba395927 1938
579305f7
AW
1939 if (!dev_is_pci(dev))
1940 return device_context_mapped(iommu, bus, devfn);
e1f167f3 1941
579305f7
AW
1942 return !pci_for_each_dma_alias(to_pci_dev(dev),
1943 domain_context_mapped_cb, iommu);
ba395927
KA
1944}
1945
f532959b
FY
1946/* Returns a number of VTD pages, but aligned to MM page size */
1947static inline unsigned long aligned_nrpages(unsigned long host_addr,
1948 size_t size)
1949{
1950 host_addr &= ~PAGE_MASK;
1951 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1952}
1953
6dd9a7c7
YS
1954/* Return largest possible superpage level for a given mapping */
1955static inline int hardware_largepage_caps(struct dmar_domain *domain,
1956 unsigned long iov_pfn,
1957 unsigned long phy_pfn,
1958 unsigned long pages)
1959{
1960 int support, level = 1;
1961 unsigned long pfnmerge;
1962
1963 support = domain->iommu_superpage;
1964
1965 /* To use a large page, the virtual *and* physical addresses
1966 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1967 of them will mean we have to use smaller pages. So just
1968 merge them and check both at once. */
1969 pfnmerge = iov_pfn | phy_pfn;
1970
1971 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1972 pages >>= VTD_STRIDE_SHIFT;
1973 if (!pages)
1974 break;
1975 pfnmerge >>= VTD_STRIDE_SHIFT;
1976 level++;
1977 support--;
1978 }
1979 return level;
1980}
1981
9051aa02
DW
1982static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1983 struct scatterlist *sg, unsigned long phys_pfn,
1984 unsigned long nr_pages, int prot)
e1605495
DW
1985{
1986 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1987 phys_addr_t uninitialized_var(pteval);
9051aa02 1988 unsigned long sg_res;
6dd9a7c7
YS
1989 unsigned int largepage_lvl = 0;
1990 unsigned long lvl_pages = 0;
e1605495 1991
162d1b10 1992 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
1993
1994 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1995 return -EINVAL;
1996
1997 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1998
9051aa02
DW
1999 if (sg)
2000 sg_res = 0;
2001 else {
2002 sg_res = nr_pages + 1;
2003 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2004 }
2005
6dd9a7c7 2006 while (nr_pages > 0) {
c85994e4
DW
2007 uint64_t tmp;
2008
e1605495 2009 if (!sg_res) {
f532959b 2010 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2011 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2012 sg->dma_length = sg->length;
2013 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2014 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2015 }
6dd9a7c7 2016
e1605495 2017 if (!pte) {
6dd9a7c7
YS
2018 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2019
5cf0a76f 2020 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2021 if (!pte)
2022 return -ENOMEM;
6dd9a7c7 2023 /* It is large page*/
6491d4d0 2024 if (largepage_lvl > 1) {
6dd9a7c7 2025 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb
JL
2026 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2027 /*
2028 * Ensure that old small page tables are
2029 * removed to make room for superpage,
2030 * if they exist.
2031 */
6491d4d0 2032 dma_pte_free_pagetable(domain, iov_pfn,
d41a4adb 2033 iov_pfn + lvl_pages - 1);
6491d4d0 2034 } else {
6dd9a7c7 2035 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2036 }
6dd9a7c7 2037
e1605495
DW
2038 }
2039 /* We don't need lock here, nobody else
2040 * touches the iova range
2041 */
7766a3fb 2042 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2043 if (tmp) {
1bf20f0d 2044 static int dumps = 5;
c85994e4
DW
2045 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2046 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2047 if (dumps) {
2048 dumps--;
2049 debug_dma_dump_mappings(NULL);
2050 }
2051 WARN_ON(1);
2052 }
6dd9a7c7
YS
2053
2054 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2055
2056 BUG_ON(nr_pages < lvl_pages);
2057 BUG_ON(sg_res < lvl_pages);
2058
2059 nr_pages -= lvl_pages;
2060 iov_pfn += lvl_pages;
2061 phys_pfn += lvl_pages;
2062 pteval += lvl_pages * VTD_PAGE_SIZE;
2063 sg_res -= lvl_pages;
2064
2065 /* If the next PTE would be the first in a new page, then we
2066 need to flush the cache on the entries we've just written.
2067 And then we'll need to recalculate 'pte', so clear it and
2068 let it get set again in the if (!pte) block above.
2069
2070 If we're done (!nr_pages) we need to flush the cache too.
2071
2072 Also if we've been setting superpages, we may need to
2073 recalculate 'pte' and switch back to smaller pages for the
2074 end of the mapping, if the trailing size is not enough to
2075 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2076 pte++;
6dd9a7c7
YS
2077 if (!nr_pages || first_pte_in_page(pte) ||
2078 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2079 domain_flush_cache(domain, first_pte,
2080 (void *)pte - (void *)first_pte);
2081 pte = NULL;
2082 }
6dd9a7c7
YS
2083
2084 if (!sg_res && nr_pages)
e1605495
DW
2085 sg = sg_next(sg);
2086 }
2087 return 0;
2088}
2089
9051aa02
DW
2090static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2091 struct scatterlist *sg, unsigned long nr_pages,
2092 int prot)
ba395927 2093{
9051aa02
DW
2094 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2095}
6f6a00e4 2096
9051aa02
DW
2097static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2098 unsigned long phys_pfn, unsigned long nr_pages,
2099 int prot)
2100{
2101 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2102}
2103
c7151a8d 2104static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2105{
c7151a8d
WH
2106 if (!iommu)
2107 return;
8c11e798
WH
2108
2109 clear_context_table(iommu, bus, devfn);
2110 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2111 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2112 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2113}
2114
109b9b04
DW
2115static inline void unlink_domain_info(struct device_domain_info *info)
2116{
2117 assert_spin_locked(&device_domain_lock);
2118 list_del(&info->link);
2119 list_del(&info->global);
2120 if (info->dev)
0bcb3e28 2121 info->dev->archdata.iommu = NULL;
109b9b04
DW
2122}
2123
ba395927
KA
2124static void domain_remove_dev_info(struct dmar_domain *domain)
2125{
3a74ca01 2126 struct device_domain_info *info, *tmp;
fb170fb4 2127 unsigned long flags;
ba395927
KA
2128
2129 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2130 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2131 unlink_domain_info(info);
ba395927
KA
2132 spin_unlock_irqrestore(&device_domain_lock, flags);
2133
93a23a72 2134 iommu_disable_dev_iotlb(info);
7c7faa11 2135 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2136
ab8dfe25 2137 if (domain_type_is_vm(domain)) {
7c7faa11 2138 iommu_detach_dependent_devices(info->iommu, info->dev);
fb170fb4 2139 domain_detach_iommu(domain, info->iommu);
92d03cc8
JL
2140 }
2141
2142 free_devinfo_mem(info);
ba395927
KA
2143 spin_lock_irqsave(&device_domain_lock, flags);
2144 }
2145 spin_unlock_irqrestore(&device_domain_lock, flags);
2146}
2147
2148/*
2149 * find_domain
1525a29a 2150 * Note: we use struct device->archdata.iommu stores the info
ba395927 2151 */
1525a29a 2152static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2153{
2154 struct device_domain_info *info;
2155
2156 /* No lock here, assumes no domain exit in normal case */
1525a29a 2157 info = dev->archdata.iommu;
ba395927
KA
2158 if (info)
2159 return info->domain;
2160 return NULL;
2161}
2162
5a8f40e8 2163static inline struct device_domain_info *
745f2586
JL
2164dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2165{
2166 struct device_domain_info *info;
2167
2168 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2169 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2170 info->devfn == devfn)
5a8f40e8 2171 return info;
745f2586
JL
2172
2173 return NULL;
2174}
2175
5a8f40e8 2176static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2177 int bus, int devfn,
b718cd3d
DW
2178 struct device *dev,
2179 struct dmar_domain *domain)
745f2586 2180{
5a8f40e8 2181 struct dmar_domain *found = NULL;
745f2586
JL
2182 struct device_domain_info *info;
2183 unsigned long flags;
2184
2185 info = alloc_devinfo_mem();
2186 if (!info)
b718cd3d 2187 return NULL;
745f2586 2188
745f2586
JL
2189 info->bus = bus;
2190 info->devfn = devfn;
2191 info->dev = dev;
2192 info->domain = domain;
5a8f40e8 2193 info->iommu = iommu;
745f2586
JL
2194
2195 spin_lock_irqsave(&device_domain_lock, flags);
2196 if (dev)
0bcb3e28 2197 found = find_domain(dev);
5a8f40e8
DW
2198 else {
2199 struct device_domain_info *info2;
41e80dca 2200 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2201 if (info2)
2202 found = info2->domain;
2203 }
745f2586
JL
2204 if (found) {
2205 spin_unlock_irqrestore(&device_domain_lock, flags);
2206 free_devinfo_mem(info);
b718cd3d
DW
2207 /* Caller must free the original domain */
2208 return found;
745f2586
JL
2209 }
2210
b718cd3d
DW
2211 list_add(&info->link, &domain->devices);
2212 list_add(&info->global, &device_domain_list);
2213 if (dev)
2214 dev->archdata.iommu = info;
2215 spin_unlock_irqrestore(&device_domain_lock, flags);
2216
2217 return domain;
745f2586
JL
2218}
2219
579305f7
AW
2220static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2221{
2222 *(u16 *)opaque = alias;
2223 return 0;
2224}
2225
ba395927 2226/* domain is initialized */
146922ec 2227static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2228{
579305f7
AW
2229 struct dmar_domain *domain, *tmp;
2230 struct intel_iommu *iommu;
5a8f40e8 2231 struct device_domain_info *info;
579305f7 2232 u16 dma_alias;
ba395927 2233 unsigned long flags;
aa4d066a 2234 u8 bus, devfn;
ba395927 2235
146922ec 2236 domain = find_domain(dev);
ba395927
KA
2237 if (domain)
2238 return domain;
2239
579305f7
AW
2240 iommu = device_to_iommu(dev, &bus, &devfn);
2241 if (!iommu)
2242 return NULL;
2243
146922ec
DW
2244 if (dev_is_pci(dev)) {
2245 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2246
579305f7
AW
2247 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2248
2249 spin_lock_irqsave(&device_domain_lock, flags);
2250 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2251 PCI_BUS_NUM(dma_alias),
2252 dma_alias & 0xff);
2253 if (info) {
2254 iommu = info->iommu;
2255 domain = info->domain;
5a8f40e8 2256 }
579305f7 2257 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2258
579305f7
AW
2259 /* DMA alias already has a domain, uses it */
2260 if (info)
2261 goto found_domain;
2262 }
ba395927 2263
146922ec 2264 /* Allocate and initialize new domain for the device */
ab8dfe25 2265 domain = alloc_domain(0);
745f2586 2266 if (!domain)
579305f7 2267 return NULL;
44bde614
JL
2268 domain->id = iommu_attach_domain(domain, iommu);
2269 if (domain->id < 0) {
2fe9723d 2270 free_domain_mem(domain);
579305f7 2271 return NULL;
2c2e2c38 2272 }
fb170fb4 2273 domain_attach_iommu(domain, iommu);
579305f7
AW
2274 if (domain_init(domain, gaw)) {
2275 domain_exit(domain);
2276 return NULL;
2c2e2c38 2277 }
ba395927 2278
579305f7
AW
2279 /* register PCI DMA alias device */
2280 if (dev_is_pci(dev)) {
2281 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2282 dma_alias & 0xff, NULL, domain);
2283
2284 if (!tmp || tmp != domain) {
2285 domain_exit(domain);
2286 domain = tmp;
2287 }
2288
b718cd3d 2289 if (!domain)
579305f7 2290 return NULL;
ba395927
KA
2291 }
2292
2293found_domain:
579305f7
AW
2294 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2295
2296 if (!tmp || tmp != domain) {
2297 domain_exit(domain);
2298 domain = tmp;
2299 }
b718cd3d
DW
2300
2301 return domain;
ba395927
KA
2302}
2303
2c2e2c38 2304static int iommu_identity_mapping;
e0fc7e0b
DW
2305#define IDENTMAP_ALL 1
2306#define IDENTMAP_GFX 2
2307#define IDENTMAP_AZALIA 4
2c2e2c38 2308
b213203e
DW
2309static int iommu_domain_identity_map(struct dmar_domain *domain,
2310 unsigned long long start,
2311 unsigned long long end)
ba395927 2312{
c5395d5c
DW
2313 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2314 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2315
2316 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2317 dma_to_mm_pfn(last_vpfn))) {
ba395927 2318 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2319 return -ENOMEM;
ba395927
KA
2320 }
2321
c5395d5c
DW
2322 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2323 start, end, domain->id);
ba395927
KA
2324 /*
2325 * RMRR range might have overlap with physical memory range,
2326 * clear it first
2327 */
c5395d5c 2328 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2329
c5395d5c
DW
2330 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2331 last_vpfn - first_vpfn + 1,
61df7443 2332 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2333}
2334
0b9d9753 2335static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2336 unsigned long long start,
2337 unsigned long long end)
2338{
2339 struct dmar_domain *domain;
2340 int ret;
2341
0b9d9753 2342 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2343 if (!domain)
2344 return -ENOMEM;
2345
19943b0e
DW
2346 /* For _hardware_ passthrough, don't bother. But for software
2347 passthrough, we do it anyway -- it may indicate a memory
2348 range which is reserved in E820, so which didn't get set
2349 up to start with in si_domain */
2350 if (domain == si_domain && hw_pass_through) {
2351 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2352 dev_name(dev), start, end);
19943b0e
DW
2353 return 0;
2354 }
2355
2356 printk(KERN_INFO
2357 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2358 dev_name(dev), start, end);
2ff729f5 2359
5595b528
DW
2360 if (end < start) {
2361 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2362 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2363 dmi_get_system_info(DMI_BIOS_VENDOR),
2364 dmi_get_system_info(DMI_BIOS_VERSION),
2365 dmi_get_system_info(DMI_PRODUCT_VERSION));
2366 ret = -EIO;
2367 goto error;
2368 }
2369
2ff729f5
DW
2370 if (end >> agaw_to_width(domain->agaw)) {
2371 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2372 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2373 agaw_to_width(domain->agaw),
2374 dmi_get_system_info(DMI_BIOS_VENDOR),
2375 dmi_get_system_info(DMI_BIOS_VERSION),
2376 dmi_get_system_info(DMI_PRODUCT_VERSION));
2377 ret = -EIO;
2378 goto error;
2379 }
19943b0e 2380
b213203e 2381 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2382 if (ret)
2383 goto error;
2384
2385 /* context entry init */
0b9d9753 2386 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2387 if (ret)
2388 goto error;
2389
2390 return 0;
2391
2392 error:
ba395927
KA
2393 domain_exit(domain);
2394 return ret;
ba395927
KA
2395}
2396
2397static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2398 struct device *dev)
ba395927 2399{
0b9d9753 2400 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2401 return 0;
0b9d9753
DW
2402 return iommu_prepare_identity_map(dev, rmrr->base_address,
2403 rmrr->end_address);
ba395927
KA
2404}
2405
d3f13810 2406#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2407static inline void iommu_prepare_isa(void)
2408{
2409 struct pci_dev *pdev;
2410 int ret;
2411
2412 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2413 if (!pdev)
2414 return;
2415
c7ab48d2 2416 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2417 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2418
2419 if (ret)
c7ab48d2
DW
2420 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2421 "floppy might not work\n");
49a0429e 2422
9b27e82d 2423 pci_dev_put(pdev);
49a0429e
KA
2424}
2425#else
2426static inline void iommu_prepare_isa(void)
2427{
2428 return;
2429}
d3f13810 2430#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2431
2c2e2c38 2432static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2433
071e1374 2434static int __init si_domain_init(int hw)
2c2e2c38
FY
2435{
2436 struct dmar_drhd_unit *drhd;
2437 struct intel_iommu *iommu;
c7ab48d2 2438 int nid, ret = 0;
44bde614 2439 bool first = true;
2c2e2c38 2440
ab8dfe25 2441 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2442 if (!si_domain)
2443 return -EFAULT;
2444
2c2e2c38
FY
2445 for_each_active_iommu(iommu, drhd) {
2446 ret = iommu_attach_domain(si_domain, iommu);
fb170fb4 2447 if (ret < 0) {
2c2e2c38
FY
2448 domain_exit(si_domain);
2449 return -EFAULT;
44bde614
JL
2450 } else if (first) {
2451 si_domain->id = ret;
2452 first = false;
2453 } else if (si_domain->id != ret) {
2454 domain_exit(si_domain);
2455 return -EFAULT;
2c2e2c38 2456 }
fb170fb4 2457 domain_attach_iommu(si_domain, iommu);
2c2e2c38
FY
2458 }
2459
2460 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2461 domain_exit(si_domain);
2462 return -EFAULT;
2463 }
2464
9544c003
JL
2465 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2466 si_domain->id);
2c2e2c38 2467
19943b0e
DW
2468 if (hw)
2469 return 0;
2470
c7ab48d2 2471 for_each_online_node(nid) {
5dfe8660
TH
2472 unsigned long start_pfn, end_pfn;
2473 int i;
2474
2475 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2476 ret = iommu_domain_identity_map(si_domain,
2477 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2478 if (ret)
2479 return ret;
2480 }
c7ab48d2
DW
2481 }
2482
2c2e2c38
FY
2483 return 0;
2484}
2485
9b226624 2486static int identity_mapping(struct device *dev)
2c2e2c38
FY
2487{
2488 struct device_domain_info *info;
2489
2490 if (likely(!iommu_identity_mapping))
2491 return 0;
2492
9b226624 2493 info = dev->archdata.iommu;
cb452a40
MT
2494 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2495 return (info->domain == si_domain);
2c2e2c38 2496
2c2e2c38
FY
2497 return 0;
2498}
2499
2500static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2501 struct device *dev, int translation)
2c2e2c38 2502{
0ac72664 2503 struct dmar_domain *ndomain;
5a8f40e8 2504 struct intel_iommu *iommu;
156baca8 2505 u8 bus, devfn;
5fe60f4e 2506 int ret;
2c2e2c38 2507
5913c9bf 2508 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2509 if (!iommu)
2510 return -ENODEV;
2511
5913c9bf 2512 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2513 if (ndomain != domain)
2514 return -EBUSY;
2c2e2c38 2515
5913c9bf 2516 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2517 if (ret) {
5913c9bf 2518 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2519 return ret;
2520 }
2521
2c2e2c38
FY
2522 return 0;
2523}
2524
0b9d9753 2525static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2526{
2527 struct dmar_rmrr_unit *rmrr;
832bd858 2528 struct device *tmp;
ea2447f7
TM
2529 int i;
2530
0e242612 2531 rcu_read_lock();
ea2447f7 2532 for_each_rmrr_units(rmrr) {
b683b230
JL
2533 /*
2534 * Return TRUE if this RMRR contains the device that
2535 * is passed in.
2536 */
2537 for_each_active_dev_scope(rmrr->devices,
2538 rmrr->devices_cnt, i, tmp)
0b9d9753 2539 if (tmp == dev) {
0e242612 2540 rcu_read_unlock();
ea2447f7 2541 return true;
b683b230 2542 }
ea2447f7 2543 }
0e242612 2544 rcu_read_unlock();
ea2447f7
TM
2545 return false;
2546}
2547
c875d2c1
AW
2548/*
2549 * There are a couple cases where we need to restrict the functionality of
2550 * devices associated with RMRRs. The first is when evaluating a device for
2551 * identity mapping because problems exist when devices are moved in and out
2552 * of domains and their respective RMRR information is lost. This means that
2553 * a device with associated RMRRs will never be in a "passthrough" domain.
2554 * The second is use of the device through the IOMMU API. This interface
2555 * expects to have full control of the IOVA space for the device. We cannot
2556 * satisfy both the requirement that RMRR access is maintained and have an
2557 * unencumbered IOVA space. We also have no ability to quiesce the device's
2558 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2559 * We therefore prevent devices associated with an RMRR from participating in
2560 * the IOMMU API, which eliminates them from device assignment.
2561 *
2562 * In both cases we assume that PCI USB devices with RMRRs have them largely
2563 * for historical reasons and that the RMRR space is not actively used post
2564 * boot. This exclusion may change if vendors begin to abuse it.
2565 */
2566static bool device_is_rmrr_locked(struct device *dev)
2567{
2568 if (!device_has_rmrr(dev))
2569 return false;
2570
2571 if (dev_is_pci(dev)) {
2572 struct pci_dev *pdev = to_pci_dev(dev);
2573
2574 if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
2575 return false;
2576 }
2577
2578 return true;
2579}
2580
3bdb2591 2581static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2582{
ea2447f7 2583
3bdb2591
DW
2584 if (dev_is_pci(dev)) {
2585 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2586
c875d2c1 2587 if (device_is_rmrr_locked(dev))
3bdb2591 2588 return 0;
e0fc7e0b 2589
3bdb2591
DW
2590 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2591 return 1;
e0fc7e0b 2592
3bdb2591
DW
2593 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2594 return 1;
6941af28 2595
3bdb2591 2596 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2597 return 0;
3bdb2591
DW
2598
2599 /*
2600 * We want to start off with all devices in the 1:1 domain, and
2601 * take them out later if we find they can't access all of memory.
2602 *
2603 * However, we can't do this for PCI devices behind bridges,
2604 * because all PCI devices behind the same bridge will end up
2605 * with the same source-id on their transactions.
2606 *
2607 * Practically speaking, we can't change things around for these
2608 * devices at run-time, because we can't be sure there'll be no
2609 * DMA transactions in flight for any of their siblings.
2610 *
2611 * So PCI devices (unless they're on the root bus) as well as
2612 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2613 * the 1:1 domain, just in _case_ one of their siblings turns out
2614 * not to be able to map all of memory.
2615 */
2616 if (!pci_is_pcie(pdev)) {
2617 if (!pci_is_root_bus(pdev->bus))
2618 return 0;
2619 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2620 return 0;
2621 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2622 return 0;
3bdb2591
DW
2623 } else {
2624 if (device_has_rmrr(dev))
2625 return 0;
2626 }
3dfc813d 2627
3bdb2591 2628 /*
3dfc813d 2629 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2630 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2631 * take them out of the 1:1 domain later.
2632 */
8fcc5372
CW
2633 if (!startup) {
2634 /*
2635 * If the device's dma_mask is less than the system's memory
2636 * size then this is not a candidate for identity mapping.
2637 */
3bdb2591 2638 u64 dma_mask = *dev->dma_mask;
8fcc5372 2639
3bdb2591
DW
2640 if (dev->coherent_dma_mask &&
2641 dev->coherent_dma_mask < dma_mask)
2642 dma_mask = dev->coherent_dma_mask;
8fcc5372 2643
3bdb2591 2644 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2645 }
6941af28
DW
2646
2647 return 1;
2648}
2649
cf04eee8
DW
2650static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2651{
2652 int ret;
2653
2654 if (!iommu_should_identity_map(dev, 1))
2655 return 0;
2656
2657 ret = domain_add_dev_info(si_domain, dev,
2658 hw ? CONTEXT_TT_PASS_THROUGH :
2659 CONTEXT_TT_MULTI_LEVEL);
2660 if (!ret)
2661 pr_info("IOMMU: %s identity mapping for device %s\n",
2662 hw ? "hardware" : "software", dev_name(dev));
2663 else if (ret == -ENODEV)
2664 /* device not associated with an iommu */
2665 ret = 0;
2666
2667 return ret;
2668}
2669
2670
071e1374 2671static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2672{
2c2e2c38 2673 struct pci_dev *pdev = NULL;
cf04eee8
DW
2674 struct dmar_drhd_unit *drhd;
2675 struct intel_iommu *iommu;
2676 struct device *dev;
2677 int i;
2678 int ret = 0;
2c2e2c38 2679
19943b0e 2680 ret = si_domain_init(hw);
2c2e2c38
FY
2681 if (ret)
2682 return -EFAULT;
2683
2c2e2c38 2684 for_each_pci_dev(pdev) {
cf04eee8
DW
2685 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2686 if (ret)
2687 return ret;
2688 }
2689
2690 for_each_active_iommu(iommu, drhd)
2691 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2692 struct acpi_device_physical_node *pn;
2693 struct acpi_device *adev;
2694
2695 if (dev->bus != &acpi_bus_type)
2696 continue;
2697
2698 adev= to_acpi_device(dev);
2699 mutex_lock(&adev->physical_node_lock);
2700 list_for_each_entry(pn, &adev->physical_node_list, node) {
2701 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2702 if (ret)
2703 break;
eae460b6 2704 }
cf04eee8
DW
2705 mutex_unlock(&adev->physical_node_lock);
2706 if (ret)
2707 return ret;
62edf5dc 2708 }
2c2e2c38
FY
2709
2710 return 0;
2711}
2712
b779260b 2713static int __init init_dmars(void)
ba395927
KA
2714{
2715 struct dmar_drhd_unit *drhd;
2716 struct dmar_rmrr_unit *rmrr;
832bd858 2717 struct device *dev;
ba395927 2718 struct intel_iommu *iommu;
9d783ba0 2719 int i, ret;
2c2e2c38 2720
ba395927
KA
2721 /*
2722 * for each drhd
2723 * allocate root
2724 * initialize and program root entry to not present
2725 * endfor
2726 */
2727 for_each_drhd_unit(drhd) {
5e0d2a6f 2728 /*
2729 * lock not needed as this is only incremented in the single
2730 * threaded kernel __init code path all other access are read
2731 * only
2732 */
1b198bb0
MT
2733 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2734 g_num_of_iommus++;
2735 continue;
2736 }
2737 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2738 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2739 }
2740
d9630fe9
WH
2741 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2742 GFP_KERNEL);
2743 if (!g_iommus) {
2744 printk(KERN_ERR "Allocating global iommu array failed\n");
2745 ret = -ENOMEM;
2746 goto error;
2747 }
2748
80b20dd8 2749 deferred_flush = kzalloc(g_num_of_iommus *
2750 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2751 if (!deferred_flush) {
5e0d2a6f 2752 ret = -ENOMEM;
989d51fc 2753 goto free_g_iommus;
5e0d2a6f 2754 }
2755
7c919779 2756 for_each_active_iommu(iommu, drhd) {
d9630fe9 2757 g_iommus[iommu->seq_id] = iommu;
ba395927 2758
e61d98d8
SS
2759 ret = iommu_init_domains(iommu);
2760 if (ret)
989d51fc 2761 goto free_iommu;
e61d98d8 2762
ba395927
KA
2763 /*
2764 * TBD:
2765 * we could share the same root & context tables
25985edc 2766 * among all IOMMU's. Need to Split it later.
ba395927
KA
2767 */
2768 ret = iommu_alloc_root_entry(iommu);
2769 if (ret) {
2770 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2771 goto free_iommu;
ba395927 2772 }
4ed0d3e6 2773 if (!ecap_pass_through(iommu->ecap))
19943b0e 2774 hw_pass_through = 0;
ba395927
KA
2775 }
2776
1531a6a6
SS
2777 /*
2778 * Start from the sane iommu hardware state.
2779 */
7c919779 2780 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2781 /*
2782 * If the queued invalidation is already initialized by us
2783 * (for example, while enabling interrupt-remapping) then
2784 * we got the things already rolling from a sane state.
2785 */
2786 if (iommu->qi)
2787 continue;
2788
2789 /*
2790 * Clear any previous faults.
2791 */
2792 dmar_fault(-1, iommu);
2793 /*
2794 * Disable queued invalidation if supported and already enabled
2795 * before OS handover.
2796 */
2797 dmar_disable_qi(iommu);
2798 }
2799
7c919779 2800 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2801 if (dmar_enable_qi(iommu)) {
2802 /*
2803 * Queued Invalidate not enabled, use Register Based
2804 * Invalidate
2805 */
2806 iommu->flush.flush_context = __iommu_flush_context;
2807 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2808 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2809 "invalidation\n",
680a7524 2810 iommu->seq_id,
b4e0f9eb 2811 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2812 } else {
2813 iommu->flush.flush_context = qi_flush_context;
2814 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2815 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2816 "invalidation\n",
680a7524 2817 iommu->seq_id,
b4e0f9eb 2818 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2819 }
2820 }
2821
19943b0e 2822 if (iommu_pass_through)
e0fc7e0b
DW
2823 iommu_identity_mapping |= IDENTMAP_ALL;
2824
d3f13810 2825#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2826 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2827#endif
e0fc7e0b
DW
2828
2829 check_tylersburg_isoch();
2830
ba395927 2831 /*
19943b0e
DW
2832 * If pass through is not set or not enabled, setup context entries for
2833 * identity mappings for rmrr, gfx, and isa and may fall back to static
2834 * identity mapping if iommu_identity_mapping is set.
ba395927 2835 */
19943b0e
DW
2836 if (iommu_identity_mapping) {
2837 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2838 if (ret) {
19943b0e 2839 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2840 goto free_iommu;
ba395927
KA
2841 }
2842 }
ba395927 2843 /*
19943b0e
DW
2844 * For each rmrr
2845 * for each dev attached to rmrr
2846 * do
2847 * locate drhd for dev, alloc domain for dev
2848 * allocate free domain
2849 * allocate page table entries for rmrr
2850 * if context not allocated for bus
2851 * allocate and init context
2852 * set present in root table for this bus
2853 * init context with domain, translation etc
2854 * endfor
2855 * endfor
ba395927 2856 */
19943b0e
DW
2857 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2858 for_each_rmrr_units(rmrr) {
b683b230
JL
2859 /* some BIOS lists non-exist devices in DMAR table. */
2860 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2861 i, dev) {
0b9d9753 2862 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2863 if (ret)
2864 printk(KERN_ERR
2865 "IOMMU: mapping reserved region failed\n");
ba395927 2866 }
4ed0d3e6 2867 }
49a0429e 2868
19943b0e
DW
2869 iommu_prepare_isa();
2870
ba395927
KA
2871 /*
2872 * for each drhd
2873 * enable fault log
2874 * global invalidate context cache
2875 * global invalidate iotlb
2876 * enable translation
2877 */
7c919779 2878 for_each_iommu(iommu, drhd) {
51a63e67
JC
2879 if (drhd->ignored) {
2880 /*
2881 * we always have to disable PMRs or DMA may fail on
2882 * this device
2883 */
2884 if (force_on)
7c919779 2885 iommu_disable_protect_mem_regions(iommu);
ba395927 2886 continue;
51a63e67 2887 }
ba395927
KA
2888
2889 iommu_flush_write_buffer(iommu);
2890
3460a6d9
KA
2891 ret = dmar_set_interrupt(iommu);
2892 if (ret)
989d51fc 2893 goto free_iommu;
3460a6d9 2894
ba395927
KA
2895 iommu_set_root_entry(iommu);
2896
4c25a2c1 2897 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2898 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2a41ccee 2899 iommu_enable_translation(iommu);
b94996c9 2900 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2901 }
2902
2903 return 0;
989d51fc
JL
2904
2905free_iommu:
7c919779 2906 for_each_active_iommu(iommu, drhd)
a868e6b7 2907 free_dmar_iommu(iommu);
9bdc531e 2908 kfree(deferred_flush);
989d51fc 2909free_g_iommus:
d9630fe9 2910 kfree(g_iommus);
989d51fc 2911error:
ba395927
KA
2912 return ret;
2913}
2914
5a5e02a6 2915/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2916static struct iova *intel_alloc_iova(struct device *dev,
2917 struct dmar_domain *domain,
2918 unsigned long nrpages, uint64_t dma_mask)
ba395927 2919{
ba395927 2920 struct iova *iova = NULL;
ba395927 2921
875764de
DW
2922 /* Restrict dma_mask to the width that the iommu can handle */
2923 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2924
2925 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2926 /*
2927 * First try to allocate an io virtual address in
284901a9 2928 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2929 * from higher range
ba395927 2930 */
875764de
DW
2931 iova = alloc_iova(&domain->iovad, nrpages,
2932 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2933 if (iova)
2934 return iova;
2935 }
2936 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2937 if (unlikely(!iova)) {
2938 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2939 nrpages, dev_name(dev));
f76aec76
KA
2940 return NULL;
2941 }
2942
2943 return iova;
2944}
2945
d4b709f4 2946static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2947{
2948 struct dmar_domain *domain;
2949 int ret;
2950
d4b709f4 2951 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2952 if (!domain) {
d4b709f4
DW
2953 printk(KERN_ERR "Allocating domain for %s failed",
2954 dev_name(dev));
4fe05bbc 2955 return NULL;
ba395927
KA
2956 }
2957
2958 /* make sure context mapping is ok */
d4b709f4
DW
2959 if (unlikely(!domain_context_mapped(dev))) {
2960 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2961 if (ret) {
d4b709f4
DW
2962 printk(KERN_ERR "Domain context map for %s failed",
2963 dev_name(dev));
4fe05bbc 2964 return NULL;
f76aec76 2965 }
ba395927
KA
2966 }
2967
f76aec76
KA
2968 return domain;
2969}
2970
d4b709f4 2971static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2972{
2973 struct device_domain_info *info;
2974
2975 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2976 info = dev->archdata.iommu;
147202aa
DW
2977 if (likely(info))
2978 return info->domain;
2979
2980 return __get_valid_domain_for_dev(dev);
2981}
2982
3d89194a 2983static int iommu_dummy(struct device *dev)
2c2e2c38 2984{
3d89194a 2985 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2986}
2987
ecb509ec 2988/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2989static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2990{
2991 int found;
2992
3d89194a 2993 if (iommu_dummy(dev))
1e4c64c4
DW
2994 return 1;
2995
2c2e2c38 2996 if (!iommu_identity_mapping)
1e4c64c4 2997 return 0;
2c2e2c38 2998
9b226624 2999 found = identity_mapping(dev);
2c2e2c38 3000 if (found) {
ecb509ec 3001 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3002 return 1;
3003 else {
3004 /*
3005 * 32 bit DMA is removed from si_domain and fall back
3006 * to non-identity mapping.
3007 */
bf9c9eda 3008 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 3009 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 3010 dev_name(dev));
2c2e2c38
FY
3011 return 0;
3012 }
3013 } else {
3014 /*
3015 * In case of a detached 64 bit DMA device from vm, the device
3016 * is put into si_domain for identity mapping.
3017 */
ecb509ec 3018 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3019 int ret;
5913c9bf 3020 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
3021 hw_pass_through ?
3022 CONTEXT_TT_PASS_THROUGH :
3023 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
3024 if (!ret) {
3025 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 3026 dev_name(dev));
2c2e2c38
FY
3027 return 1;
3028 }
3029 }
3030 }
3031
1e4c64c4 3032 return 0;
2c2e2c38
FY
3033}
3034
5040a918 3035static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3036 size_t size, int dir, u64 dma_mask)
f76aec76 3037{
f76aec76 3038 struct dmar_domain *domain;
5b6985ce 3039 phys_addr_t start_paddr;
f76aec76
KA
3040 struct iova *iova;
3041 int prot = 0;
6865f0d1 3042 int ret;
8c11e798 3043 struct intel_iommu *iommu;
33041ec0 3044 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3045
3046 BUG_ON(dir == DMA_NONE);
2c2e2c38 3047
5040a918 3048 if (iommu_no_mapping(dev))
6865f0d1 3049 return paddr;
f76aec76 3050
5040a918 3051 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3052 if (!domain)
3053 return 0;
3054
8c11e798 3055 iommu = domain_get_iommu(domain);
88cb6a74 3056 size = aligned_nrpages(paddr, size);
f76aec76 3057
5040a918 3058 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3059 if (!iova)
3060 goto error;
3061
ba395927
KA
3062 /*
3063 * Check if DMAR supports zero-length reads on write only
3064 * mappings..
3065 */
3066 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3067 !cap_zlr(iommu->cap))
ba395927
KA
3068 prot |= DMA_PTE_READ;
3069 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3070 prot |= DMA_PTE_WRITE;
3071 /*
6865f0d1 3072 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3073 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3074 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3075 * is not a big problem
3076 */
0ab36de2 3077 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3078 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3079 if (ret)
3080 goto error;
3081
1f0ef2aa
DW
3082 /* it's a non-present to present mapping. Only flush if caching mode */
3083 if (cap_caching_mode(iommu->cap))
ea8ea460 3084 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3085 else
8c11e798 3086 iommu_flush_write_buffer(iommu);
f76aec76 3087
03d6a246
DW
3088 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3089 start_paddr += paddr & ~PAGE_MASK;
3090 return start_paddr;
ba395927 3091
ba395927 3092error:
f76aec76
KA
3093 if (iova)
3094 __free_iova(&domain->iovad, iova);
4cf2e75d 3095 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3096 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3097 return 0;
3098}
3099
ffbbef5c
FT
3100static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3101 unsigned long offset, size_t size,
3102 enum dma_data_direction dir,
3103 struct dma_attrs *attrs)
bb9e6d65 3104{
ffbbef5c 3105 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3106 dir, *dev->dma_mask);
bb9e6d65
FT
3107}
3108
5e0d2a6f 3109static void flush_unmaps(void)
3110{
80b20dd8 3111 int i, j;
5e0d2a6f 3112
5e0d2a6f 3113 timer_on = 0;
3114
3115 /* just flush them all */
3116 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3117 struct intel_iommu *iommu = g_iommus[i];
3118 if (!iommu)
3119 continue;
c42d9f32 3120
9dd2fe89
YZ
3121 if (!deferred_flush[i].next)
3122 continue;
3123
78d5f0f5
NA
3124 /* In caching mode, global flushes turn emulation expensive */
3125 if (!cap_caching_mode(iommu->cap))
3126 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3127 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3128 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3129 unsigned long mask;
3130 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3131 struct dmar_domain *domain = deferred_flush[i].domain[j];
3132
3133 /* On real hardware multiple invalidations are expensive */
3134 if (cap_caching_mode(iommu->cap))
3135 iommu_flush_iotlb_psi(iommu, domain->id,
a156ef99 3136 iova->pfn_lo, iova_size(iova),
ea8ea460 3137 !deferred_flush[i].freelist[j], 0);
78d5f0f5 3138 else {
a156ef99 3139 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
78d5f0f5
NA
3140 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3141 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3142 }
93a23a72 3143 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3144 if (deferred_flush[i].freelist[j])
3145 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3146 }
9dd2fe89 3147 deferred_flush[i].next = 0;
5e0d2a6f 3148 }
3149
5e0d2a6f 3150 list_size = 0;
5e0d2a6f 3151}
3152
3153static void flush_unmaps_timeout(unsigned long data)
3154{
80b20dd8 3155 unsigned long flags;
3156
3157 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3158 flush_unmaps();
80b20dd8 3159 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3160}
3161
ea8ea460 3162static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3163{
3164 unsigned long flags;
80b20dd8 3165 int next, iommu_id;
8c11e798 3166 struct intel_iommu *iommu;
5e0d2a6f 3167
3168 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3169 if (list_size == HIGH_WATER_MARK)
3170 flush_unmaps();
3171
8c11e798
WH
3172 iommu = domain_get_iommu(dom);
3173 iommu_id = iommu->seq_id;
c42d9f32 3174
80b20dd8 3175 next = deferred_flush[iommu_id].next;
3176 deferred_flush[iommu_id].domain[next] = dom;
3177 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3178 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3179 deferred_flush[iommu_id].next++;
5e0d2a6f 3180
3181 if (!timer_on) {
3182 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3183 timer_on = 1;
3184 }
3185 list_size++;
3186 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3187}
3188
d41a4adb 3189static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3190{
f76aec76 3191 struct dmar_domain *domain;
d794dc9b 3192 unsigned long start_pfn, last_pfn;
ba395927 3193 struct iova *iova;
8c11e798 3194 struct intel_iommu *iommu;
ea8ea460 3195 struct page *freelist;
ba395927 3196
73676832 3197 if (iommu_no_mapping(dev))
f76aec76 3198 return;
2c2e2c38 3199
1525a29a 3200 domain = find_domain(dev);
ba395927
KA
3201 BUG_ON(!domain);
3202
8c11e798
WH
3203 iommu = domain_get_iommu(domain);
3204
ba395927 3205 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3206 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3207 (unsigned long long)dev_addr))
ba395927 3208 return;
ba395927 3209
d794dc9b
DW
3210 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3211 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3212
d794dc9b 3213 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3214 dev_name(dev), start_pfn, last_pfn);
ba395927 3215
ea8ea460 3216 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3217
5e0d2a6f 3218 if (intel_iommu_strict) {
03d6a246 3219 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3220 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3221 /* free iova */
3222 __free_iova(&domain->iovad, iova);
ea8ea460 3223 dma_free_pagelist(freelist);
5e0d2a6f 3224 } else {
ea8ea460 3225 add_unmap(domain, iova, freelist);
5e0d2a6f 3226 /*
3227 * queue up the release of the unmap to save the 1/6th of the
3228 * cpu used up by the iotlb flush operation...
3229 */
5e0d2a6f 3230 }
ba395927
KA
3231}
3232
d41a4adb
JL
3233static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3234 size_t size, enum dma_data_direction dir,
3235 struct dma_attrs *attrs)
3236{
3237 intel_unmap(dev, dev_addr);
3238}
3239
5040a918 3240static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3241 dma_addr_t *dma_handle, gfp_t flags,
3242 struct dma_attrs *attrs)
ba395927 3243{
36746436 3244 struct page *page = NULL;
ba395927
KA
3245 int order;
3246
5b6985ce 3247 size = PAGE_ALIGN(size);
ba395927 3248 order = get_order(size);
e8bb910d 3249
5040a918 3250 if (!iommu_no_mapping(dev))
e8bb910d 3251 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3252 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3253 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3254 flags |= GFP_DMA;
3255 else
3256 flags |= GFP_DMA32;
3257 }
ba395927 3258
36746436
AM
3259 if (flags & __GFP_WAIT) {
3260 unsigned int count = size >> PAGE_SHIFT;
3261
3262 page = dma_alloc_from_contiguous(dev, count, order);
3263 if (page && iommu_no_mapping(dev) &&
3264 page_to_phys(page) + size > dev->coherent_dma_mask) {
3265 dma_release_from_contiguous(dev, page, count);
3266 page = NULL;
3267 }
3268 }
3269
3270 if (!page)
3271 page = alloc_pages(flags, order);
3272 if (!page)
ba395927 3273 return NULL;
36746436 3274 memset(page_address(page), 0, size);
ba395927 3275
36746436 3276 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3277 DMA_BIDIRECTIONAL,
5040a918 3278 dev->coherent_dma_mask);
ba395927 3279 if (*dma_handle)
36746436
AM
3280 return page_address(page);
3281 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3282 __free_pages(page, order);
3283
ba395927
KA
3284 return NULL;
3285}
3286
5040a918 3287static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3288 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3289{
3290 int order;
36746436 3291 struct page *page = virt_to_page(vaddr);
ba395927 3292
5b6985ce 3293 size = PAGE_ALIGN(size);
ba395927
KA
3294 order = get_order(size);
3295
d41a4adb 3296 intel_unmap(dev, dma_handle);
36746436
AM
3297 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3298 __free_pages(page, order);
ba395927
KA
3299}
3300
5040a918 3301static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3302 int nelems, enum dma_data_direction dir,
3303 struct dma_attrs *attrs)
ba395927 3304{
d41a4adb 3305 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3306}
3307
ba395927 3308static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3309 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3310{
3311 int i;
c03ab37c 3312 struct scatterlist *sg;
ba395927 3313
c03ab37c 3314 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3315 BUG_ON(!sg_page(sg));
4cf2e75d 3316 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3317 sg->dma_length = sg->length;
ba395927
KA
3318 }
3319 return nelems;
3320}
3321
5040a918 3322static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3323 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3324{
ba395927 3325 int i;
ba395927 3326 struct dmar_domain *domain;
f76aec76
KA
3327 size_t size = 0;
3328 int prot = 0;
f76aec76
KA
3329 struct iova *iova = NULL;
3330 int ret;
c03ab37c 3331 struct scatterlist *sg;
b536d24d 3332 unsigned long start_vpfn;
8c11e798 3333 struct intel_iommu *iommu;
ba395927
KA
3334
3335 BUG_ON(dir == DMA_NONE);
5040a918
DW
3336 if (iommu_no_mapping(dev))
3337 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3338
5040a918 3339 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3340 if (!domain)
3341 return 0;
3342
8c11e798
WH
3343 iommu = domain_get_iommu(domain);
3344
b536d24d 3345 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3346 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3347
5040a918
DW
3348 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3349 *dev->dma_mask);
f76aec76 3350 if (!iova) {
c03ab37c 3351 sglist->dma_length = 0;
f76aec76
KA
3352 return 0;
3353 }
3354
3355 /*
3356 * Check if DMAR supports zero-length reads on write only
3357 * mappings..
3358 */
3359 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3360 !cap_zlr(iommu->cap))
f76aec76
KA
3361 prot |= DMA_PTE_READ;
3362 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3363 prot |= DMA_PTE_WRITE;
3364
b536d24d 3365 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3366
f532959b 3367 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3368 if (unlikely(ret)) {
e1605495
DW
3369 dma_pte_free_pagetable(domain, start_vpfn,
3370 start_vpfn + size - 1);
e1605495
DW
3371 __free_iova(&domain->iovad, iova);
3372 return 0;
ba395927
KA
3373 }
3374
1f0ef2aa
DW
3375 /* it's a non-present to present mapping. Only flush if caching mode */
3376 if (cap_caching_mode(iommu->cap))
ea8ea460 3377 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3378 else
8c11e798 3379 iommu_flush_write_buffer(iommu);
1f0ef2aa 3380
ba395927
KA
3381 return nelems;
3382}
3383
dfb805e8
FT
3384static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3385{
3386 return !dma_addr;
3387}
3388
160c1d8e 3389struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3390 .alloc = intel_alloc_coherent,
3391 .free = intel_free_coherent,
ba395927
KA
3392 .map_sg = intel_map_sg,
3393 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3394 .map_page = intel_map_page,
3395 .unmap_page = intel_unmap_page,
dfb805e8 3396 .mapping_error = intel_mapping_error,
ba395927
KA
3397};
3398
3399static inline int iommu_domain_cache_init(void)
3400{
3401 int ret = 0;
3402
3403 iommu_domain_cache = kmem_cache_create("iommu_domain",
3404 sizeof(struct dmar_domain),
3405 0,
3406 SLAB_HWCACHE_ALIGN,
3407
3408 NULL);
3409 if (!iommu_domain_cache) {
3410 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3411 ret = -ENOMEM;
3412 }
3413
3414 return ret;
3415}
3416
3417static inline int iommu_devinfo_cache_init(void)
3418{
3419 int ret = 0;
3420
3421 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3422 sizeof(struct device_domain_info),
3423 0,
3424 SLAB_HWCACHE_ALIGN,
ba395927
KA
3425 NULL);
3426 if (!iommu_devinfo_cache) {
3427 printk(KERN_ERR "Couldn't create devinfo cache\n");
3428 ret = -ENOMEM;
3429 }
3430
3431 return ret;
3432}
3433
3434static inline int iommu_iova_cache_init(void)
3435{
3436 int ret = 0;
3437
3438 iommu_iova_cache = kmem_cache_create("iommu_iova",
3439 sizeof(struct iova),
3440 0,
3441 SLAB_HWCACHE_ALIGN,
ba395927
KA
3442 NULL);
3443 if (!iommu_iova_cache) {
3444 printk(KERN_ERR "Couldn't create iova cache\n");
3445 ret = -ENOMEM;
3446 }
3447
3448 return ret;
3449}
3450
3451static int __init iommu_init_mempool(void)
3452{
3453 int ret;
3454 ret = iommu_iova_cache_init();
3455 if (ret)
3456 return ret;
3457
3458 ret = iommu_domain_cache_init();
3459 if (ret)
3460 goto domain_error;
3461
3462 ret = iommu_devinfo_cache_init();
3463 if (!ret)
3464 return ret;
3465
3466 kmem_cache_destroy(iommu_domain_cache);
3467domain_error:
3468 kmem_cache_destroy(iommu_iova_cache);
3469
3470 return -ENOMEM;
3471}
3472
3473static void __init iommu_exit_mempool(void)
3474{
3475 kmem_cache_destroy(iommu_devinfo_cache);
3476 kmem_cache_destroy(iommu_domain_cache);
3477 kmem_cache_destroy(iommu_iova_cache);
3478
3479}
3480
556ab45f
DW
3481static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3482{
3483 struct dmar_drhd_unit *drhd;
3484 u32 vtbar;
3485 int rc;
3486
3487 /* We know that this device on this chipset has its own IOMMU.
3488 * If we find it under a different IOMMU, then the BIOS is lying
3489 * to us. Hope that the IOMMU for this device is actually
3490 * disabled, and it needs no translation...
3491 */
3492 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3493 if (rc) {
3494 /* "can't" happen */
3495 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3496 return;
3497 }
3498 vtbar &= 0xffff0000;
3499
3500 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3501 drhd = dmar_find_matched_drhd_unit(pdev);
3502 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3503 TAINT_FIRMWARE_WORKAROUND,
3504 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3505 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3506}
3507DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3508
ba395927
KA
3509static void __init init_no_remapping_devices(void)
3510{
3511 struct dmar_drhd_unit *drhd;
832bd858 3512 struct device *dev;
b683b230 3513 int i;
ba395927
KA
3514
3515 for_each_drhd_unit(drhd) {
3516 if (!drhd->include_all) {
b683b230
JL
3517 for_each_active_dev_scope(drhd->devices,
3518 drhd->devices_cnt, i, dev)
3519 break;
832bd858 3520 /* ignore DMAR unit if no devices exist */
ba395927
KA
3521 if (i == drhd->devices_cnt)
3522 drhd->ignored = 1;
3523 }
3524 }
3525
7c919779 3526 for_each_active_drhd_unit(drhd) {
7c919779 3527 if (drhd->include_all)
ba395927
KA
3528 continue;
3529
b683b230
JL
3530 for_each_active_dev_scope(drhd->devices,
3531 drhd->devices_cnt, i, dev)
832bd858 3532 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3533 break;
ba395927
KA
3534 if (i < drhd->devices_cnt)
3535 continue;
3536
c0771df8
DW
3537 /* This IOMMU has *only* gfx devices. Either bypass it or
3538 set the gfx_mapped flag, as appropriate */
3539 if (dmar_map_gfx) {
3540 intel_iommu_gfx_mapped = 1;
3541 } else {
3542 drhd->ignored = 1;
b683b230
JL
3543 for_each_active_dev_scope(drhd->devices,
3544 drhd->devices_cnt, i, dev)
832bd858 3545 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3546 }
3547 }
3548}
3549
f59c7b69
FY
3550#ifdef CONFIG_SUSPEND
3551static int init_iommu_hw(void)
3552{
3553 struct dmar_drhd_unit *drhd;
3554 struct intel_iommu *iommu = NULL;
3555
3556 for_each_active_iommu(iommu, drhd)
3557 if (iommu->qi)
3558 dmar_reenable_qi(iommu);
3559
b779260b
JC
3560 for_each_iommu(iommu, drhd) {
3561 if (drhd->ignored) {
3562 /*
3563 * we always have to disable PMRs or DMA may fail on
3564 * this device
3565 */
3566 if (force_on)
3567 iommu_disable_protect_mem_regions(iommu);
3568 continue;
3569 }
3570
f59c7b69
FY
3571 iommu_flush_write_buffer(iommu);
3572
3573 iommu_set_root_entry(iommu);
3574
3575 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3576 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3577 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3578 iommu_enable_translation(iommu);
b94996c9 3579 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3580 }
3581
3582 return 0;
3583}
3584
3585static void iommu_flush_all(void)
3586{
3587 struct dmar_drhd_unit *drhd;
3588 struct intel_iommu *iommu;
3589
3590 for_each_active_iommu(iommu, drhd) {
3591 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3592 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3593 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3594 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3595 }
3596}
3597
134fac3f 3598static int iommu_suspend(void)
f59c7b69
FY
3599{
3600 struct dmar_drhd_unit *drhd;
3601 struct intel_iommu *iommu = NULL;
3602 unsigned long flag;
3603
3604 for_each_active_iommu(iommu, drhd) {
3605 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3606 GFP_ATOMIC);
3607 if (!iommu->iommu_state)
3608 goto nomem;
3609 }
3610
3611 iommu_flush_all();
3612
3613 for_each_active_iommu(iommu, drhd) {
3614 iommu_disable_translation(iommu);
3615
1f5b3c3f 3616 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3617
3618 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3619 readl(iommu->reg + DMAR_FECTL_REG);
3620 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3621 readl(iommu->reg + DMAR_FEDATA_REG);
3622 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3623 readl(iommu->reg + DMAR_FEADDR_REG);
3624 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3625 readl(iommu->reg + DMAR_FEUADDR_REG);
3626
1f5b3c3f 3627 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3628 }
3629 return 0;
3630
3631nomem:
3632 for_each_active_iommu(iommu, drhd)
3633 kfree(iommu->iommu_state);
3634
3635 return -ENOMEM;
3636}
3637
134fac3f 3638static void iommu_resume(void)
f59c7b69
FY
3639{
3640 struct dmar_drhd_unit *drhd;
3641 struct intel_iommu *iommu = NULL;
3642 unsigned long flag;
3643
3644 if (init_iommu_hw()) {
b779260b
JC
3645 if (force_on)
3646 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3647 else
3648 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3649 return;
f59c7b69
FY
3650 }
3651
3652 for_each_active_iommu(iommu, drhd) {
3653
1f5b3c3f 3654 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3655
3656 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3657 iommu->reg + DMAR_FECTL_REG);
3658 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3659 iommu->reg + DMAR_FEDATA_REG);
3660 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3661 iommu->reg + DMAR_FEADDR_REG);
3662 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3663 iommu->reg + DMAR_FEUADDR_REG);
3664
1f5b3c3f 3665 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3666 }
3667
3668 for_each_active_iommu(iommu, drhd)
3669 kfree(iommu->iommu_state);
f59c7b69
FY
3670}
3671
134fac3f 3672static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3673 .resume = iommu_resume,
3674 .suspend = iommu_suspend,
3675};
3676
134fac3f 3677static void __init init_iommu_pm_ops(void)
f59c7b69 3678{
134fac3f 3679 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3680}
3681
3682#else
99592ba4 3683static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3684#endif /* CONFIG_PM */
3685
318fe7df 3686
c2a0b538 3687int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
3688{
3689 struct acpi_dmar_reserved_memory *rmrr;
3690 struct dmar_rmrr_unit *rmrru;
3691
3692 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3693 if (!rmrru)
3694 return -ENOMEM;
3695
3696 rmrru->hdr = header;
3697 rmrr = (struct acpi_dmar_reserved_memory *)header;
3698 rmrru->base_address = rmrr->base_address;
3699 rmrru->end_address = rmrr->end_address;
2e455289
JL
3700 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3701 ((void *)rmrr) + rmrr->header.length,
3702 &rmrru->devices_cnt);
3703 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3704 kfree(rmrru);
3705 return -ENOMEM;
3706 }
318fe7df 3707
2e455289 3708 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3709
2e455289 3710 return 0;
318fe7df
SS
3711}
3712
c2a0b538 3713int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
3714{
3715 struct acpi_dmar_atsr *atsr;
3716 struct dmar_atsr_unit *atsru;
3717
3718 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3719 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3720 if (!atsru)
3721 return -ENOMEM;
3722
3723 atsru->hdr = hdr;
3724 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3725 if (!atsru->include_all) {
3726 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3727 (void *)atsr + atsr->header.length,
3728 &atsru->devices_cnt);
3729 if (atsru->devices_cnt && atsru->devices == NULL) {
3730 kfree(atsru);
3731 return -ENOMEM;
3732 }
3733 }
318fe7df 3734
0e242612 3735 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3736
3737 return 0;
3738}
3739
9bdc531e
JL
3740static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3741{
3742 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3743 kfree(atsru);
3744}
3745
3746static void intel_iommu_free_dmars(void)
3747{
3748 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3749 struct dmar_atsr_unit *atsru, *atsr_n;
3750
3751 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3752 list_del(&rmrru->list);
3753 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3754 kfree(rmrru);
318fe7df
SS
3755 }
3756
9bdc531e
JL
3757 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3758 list_del(&atsru->list);
3759 intel_iommu_free_atsr(atsru);
3760 }
318fe7df
SS
3761}
3762
3763int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3764{
b683b230 3765 int i, ret = 1;
318fe7df 3766 struct pci_bus *bus;
832bd858
DW
3767 struct pci_dev *bridge = NULL;
3768 struct device *tmp;
318fe7df
SS
3769 struct acpi_dmar_atsr *atsr;
3770 struct dmar_atsr_unit *atsru;
3771
3772 dev = pci_physfn(dev);
318fe7df 3773 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3774 bridge = bus->self;
318fe7df 3775 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3776 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3777 return 0;
b5f82ddf 3778 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3779 break;
318fe7df 3780 }
b5f82ddf
JL
3781 if (!bridge)
3782 return 0;
318fe7df 3783
0e242612 3784 rcu_read_lock();
b5f82ddf
JL
3785 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3786 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3787 if (atsr->segment != pci_domain_nr(dev->bus))
3788 continue;
3789
b683b230 3790 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3791 if (tmp == &bridge->dev)
b683b230 3792 goto out;
b5f82ddf
JL
3793
3794 if (atsru->include_all)
b683b230 3795 goto out;
b5f82ddf 3796 }
b683b230
JL
3797 ret = 0;
3798out:
0e242612 3799 rcu_read_unlock();
318fe7df 3800
b683b230 3801 return ret;
318fe7df
SS
3802}
3803
59ce0515
JL
3804int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3805{
3806 int ret = 0;
3807 struct dmar_rmrr_unit *rmrru;
3808 struct dmar_atsr_unit *atsru;
3809 struct acpi_dmar_atsr *atsr;
3810 struct acpi_dmar_reserved_memory *rmrr;
3811
3812 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3813 return 0;
3814
3815 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3816 rmrr = container_of(rmrru->hdr,
3817 struct acpi_dmar_reserved_memory, header);
3818 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3819 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3820 ((void *)rmrr) + rmrr->header.length,
3821 rmrr->segment, rmrru->devices,
3822 rmrru->devices_cnt);
27e24950 3823 if(ret < 0)
59ce0515
JL
3824 return ret;
3825 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3826 dmar_remove_dev_scope(info, rmrr->segment,
3827 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3828 }
3829 }
3830
3831 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3832 if (atsru->include_all)
3833 continue;
3834
3835 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3836 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3837 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3838 (void *)atsr + atsr->header.length,
3839 atsr->segment, atsru->devices,
3840 atsru->devices_cnt);
3841 if (ret > 0)
3842 break;
3843 else if(ret < 0)
3844 return ret;
3845 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3846 if (dmar_remove_dev_scope(info, atsr->segment,
3847 atsru->devices, atsru->devices_cnt))
3848 break;
3849 }
3850 }
3851
3852 return 0;
3853}
3854
99dcaded
FY
3855/*
3856 * Here we only respond to action of unbound device from driver.
3857 *
3858 * Added device is not attached to its DMAR domain here yet. That will happen
3859 * when mapping the device to iova.
3860 */
3861static int device_notifier(struct notifier_block *nb,
3862 unsigned long action, void *data)
3863{
3864 struct device *dev = data;
99dcaded
FY
3865 struct dmar_domain *domain;
3866
3d89194a 3867 if (iommu_dummy(dev))
44cd613c
DW
3868 return 0;
3869
1196c2fb 3870 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
3871 return 0;
3872
e7f9fa54
JR
3873 /*
3874 * If the device is still attached to a device driver we can't
3875 * tear down the domain yet as DMA mappings may still be in use.
3876 * Wait for the BUS_NOTIFY_UNBOUND_DRIVER event to do that.
3877 */
3878 if (action == BUS_NOTIFY_DEL_DEVICE && dev->driver != NULL)
3879 return 0;
3880
1525a29a 3881 domain = find_domain(dev);
99dcaded
FY
3882 if (!domain)
3883 return 0;
3884
3a5670e8 3885 down_read(&dmar_global_lock);
bf9c9eda 3886 domain_remove_one_dev_info(domain, dev);
ab8dfe25 3887 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 3888 domain_exit(domain);
3a5670e8 3889 up_read(&dmar_global_lock);
a97590e5 3890
99dcaded
FY
3891 return 0;
3892}
3893
3894static struct notifier_block device_nb = {
3895 .notifier_call = device_notifier,
3896};
3897
75f05569
JL
3898static int intel_iommu_memory_notifier(struct notifier_block *nb,
3899 unsigned long val, void *v)
3900{
3901 struct memory_notify *mhp = v;
3902 unsigned long long start, end;
3903 unsigned long start_vpfn, last_vpfn;
3904
3905 switch (val) {
3906 case MEM_GOING_ONLINE:
3907 start = mhp->start_pfn << PAGE_SHIFT;
3908 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3909 if (iommu_domain_identity_map(si_domain, start, end)) {
3910 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3911 start, end);
3912 return NOTIFY_BAD;
3913 }
3914 break;
3915
3916 case MEM_OFFLINE:
3917 case MEM_CANCEL_ONLINE:
3918 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3919 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3920 while (start_vpfn <= last_vpfn) {
3921 struct iova *iova;
3922 struct dmar_drhd_unit *drhd;
3923 struct intel_iommu *iommu;
ea8ea460 3924 struct page *freelist;
75f05569
JL
3925
3926 iova = find_iova(&si_domain->iovad, start_vpfn);
3927 if (iova == NULL) {
3928 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3929 start_vpfn);
3930 break;
3931 }
3932
3933 iova = split_and_remove_iova(&si_domain->iovad, iova,
3934 start_vpfn, last_vpfn);
3935 if (iova == NULL) {
3936 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3937 start_vpfn, last_vpfn);
3938 return NOTIFY_BAD;
3939 }
3940
ea8ea460
DW
3941 freelist = domain_unmap(si_domain, iova->pfn_lo,
3942 iova->pfn_hi);
3943
75f05569
JL
3944 rcu_read_lock();
3945 for_each_active_iommu(iommu, drhd)
3946 iommu_flush_iotlb_psi(iommu, si_domain->id,
a156ef99 3947 iova->pfn_lo, iova_size(iova),
ea8ea460 3948 !freelist, 0);
75f05569 3949 rcu_read_unlock();
ea8ea460 3950 dma_free_pagelist(freelist);
75f05569
JL
3951
3952 start_vpfn = iova->pfn_hi + 1;
3953 free_iova_mem(iova);
3954 }
3955 break;
3956 }
3957
3958 return NOTIFY_OK;
3959}
3960
3961static struct notifier_block intel_iommu_memory_nb = {
3962 .notifier_call = intel_iommu_memory_notifier,
3963 .priority = 0
3964};
3965
a5459cfe
AW
3966
3967static ssize_t intel_iommu_show_version(struct device *dev,
3968 struct device_attribute *attr,
3969 char *buf)
3970{
3971 struct intel_iommu *iommu = dev_get_drvdata(dev);
3972 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3973 return sprintf(buf, "%d:%d\n",
3974 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3975}
3976static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3977
3978static ssize_t intel_iommu_show_address(struct device *dev,
3979 struct device_attribute *attr,
3980 char *buf)
3981{
3982 struct intel_iommu *iommu = dev_get_drvdata(dev);
3983 return sprintf(buf, "%llx\n", iommu->reg_phys);
3984}
3985static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3986
3987static ssize_t intel_iommu_show_cap(struct device *dev,
3988 struct device_attribute *attr,
3989 char *buf)
3990{
3991 struct intel_iommu *iommu = dev_get_drvdata(dev);
3992 return sprintf(buf, "%llx\n", iommu->cap);
3993}
3994static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3995
3996static ssize_t intel_iommu_show_ecap(struct device *dev,
3997 struct device_attribute *attr,
3998 char *buf)
3999{
4000 struct intel_iommu *iommu = dev_get_drvdata(dev);
4001 return sprintf(buf, "%llx\n", iommu->ecap);
4002}
4003static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4004
4005static struct attribute *intel_iommu_attrs[] = {
4006 &dev_attr_version.attr,
4007 &dev_attr_address.attr,
4008 &dev_attr_cap.attr,
4009 &dev_attr_ecap.attr,
4010 NULL,
4011};
4012
4013static struct attribute_group intel_iommu_group = {
4014 .name = "intel-iommu",
4015 .attrs = intel_iommu_attrs,
4016};
4017
4018const struct attribute_group *intel_iommu_groups[] = {
4019 &intel_iommu_group,
4020 NULL,
4021};
4022
ba395927
KA
4023int __init intel_iommu_init(void)
4024{
9bdc531e 4025 int ret = -ENODEV;
3a93c841 4026 struct dmar_drhd_unit *drhd;
7c919779 4027 struct intel_iommu *iommu;
ba395927 4028
a59b50e9
JC
4029 /* VT-d is required for a TXT/tboot launch, so enforce that */
4030 force_on = tboot_force_iommu();
4031
3a5670e8
JL
4032 if (iommu_init_mempool()) {
4033 if (force_on)
4034 panic("tboot: Failed to initialize iommu memory\n");
4035 return -ENOMEM;
4036 }
4037
4038 down_write(&dmar_global_lock);
a59b50e9
JC
4039 if (dmar_table_init()) {
4040 if (force_on)
4041 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4042 goto out_free_dmar;
a59b50e9 4043 }
ba395927 4044
3a93c841
TI
4045 /*
4046 * Disable translation if already enabled prior to OS handover.
4047 */
7c919779 4048 for_each_active_iommu(iommu, drhd)
3a93c841
TI
4049 if (iommu->gcmd & DMA_GCMD_TE)
4050 iommu_disable_translation(iommu);
3a93c841 4051
c2c7286a 4052 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4053 if (force_on)
4054 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4055 goto out_free_dmar;
a59b50e9 4056 }
1886e8a9 4057
75f1cdf1 4058 if (no_iommu || dmar_disabled)
9bdc531e 4059 goto out_free_dmar;
2ae21010 4060
318fe7df
SS
4061 if (list_empty(&dmar_rmrr_units))
4062 printk(KERN_INFO "DMAR: No RMRR found\n");
4063
4064 if (list_empty(&dmar_atsr_units))
4065 printk(KERN_INFO "DMAR: No ATSR found\n");
4066
51a63e67
JC
4067 if (dmar_init_reserved_ranges()) {
4068 if (force_on)
4069 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4070 goto out_free_reserved_range;
51a63e67 4071 }
ba395927
KA
4072
4073 init_no_remapping_devices();
4074
b779260b 4075 ret = init_dmars();
ba395927 4076 if (ret) {
a59b50e9
JC
4077 if (force_on)
4078 panic("tboot: Failed to initialize DMARs\n");
ba395927 4079 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4080 goto out_free_reserved_range;
ba395927 4081 }
3a5670e8 4082 up_write(&dmar_global_lock);
ba395927
KA
4083 printk(KERN_INFO
4084 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4085
5e0d2a6f 4086 init_timer(&unmap_timer);
75f1cdf1
FT
4087#ifdef CONFIG_SWIOTLB
4088 swiotlb = 0;
4089#endif
19943b0e 4090 dma_ops = &intel_dma_ops;
4ed0d3e6 4091
134fac3f 4092 init_iommu_pm_ops();
a8bcbb0d 4093
a5459cfe
AW
4094 for_each_active_iommu(iommu, drhd)
4095 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4096 intel_iommu_groups,
4097 iommu->name);
4098
4236d97d 4099 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4100 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4101 if (si_domain && !hw_pass_through)
4102 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4103
8bc1f85c
ED
4104 intel_iommu_enabled = 1;
4105
ba395927 4106 return 0;
9bdc531e
JL
4107
4108out_free_reserved_range:
4109 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4110out_free_dmar:
4111 intel_iommu_free_dmars();
3a5670e8
JL
4112 up_write(&dmar_global_lock);
4113 iommu_exit_mempool();
9bdc531e 4114 return ret;
ba395927 4115}
e820482c 4116
579305f7
AW
4117static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4118{
4119 struct intel_iommu *iommu = opaque;
4120
4121 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4122 return 0;
4123}
4124
4125/*
4126 * NB - intel-iommu lacks any sort of reference counting for the users of
4127 * dependent devices. If multiple endpoints have intersecting dependent
4128 * devices, unbinding the driver from any one of them will possibly leave
4129 * the others unable to operate.
4130 */
3199aa6b 4131static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4132 struct device *dev)
3199aa6b 4133{
0bcb3e28 4134 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4135 return;
4136
579305f7 4137 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4138}
4139
2c2e2c38 4140static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4141 struct device *dev)
c7151a8d 4142{
bca2b916 4143 struct device_domain_info *info, *tmp;
c7151a8d
WH
4144 struct intel_iommu *iommu;
4145 unsigned long flags;
4146 int found = 0;
156baca8 4147 u8 bus, devfn;
c7151a8d 4148
bf9c9eda 4149 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4150 if (!iommu)
4151 return;
4152
4153 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4154 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4155 if (info->iommu == iommu && info->bus == bus &&
4156 info->devfn == devfn) {
109b9b04 4157 unlink_domain_info(info);
c7151a8d
WH
4158 spin_unlock_irqrestore(&device_domain_lock, flags);
4159
93a23a72 4160 iommu_disable_dev_iotlb(info);
c7151a8d 4161 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4162 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4163 free_devinfo_mem(info);
4164
4165 spin_lock_irqsave(&device_domain_lock, flags);
4166
4167 if (found)
4168 break;
4169 else
4170 continue;
4171 }
4172
4173 /* if there is no other devices under the same iommu
4174 * owned by this domain, clear this iommu in iommu_bmp
4175 * update iommu count and coherency
4176 */
8bbc4410 4177 if (info->iommu == iommu)
c7151a8d
WH
4178 found = 1;
4179 }
4180
3e7abe25
RD
4181 spin_unlock_irqrestore(&device_domain_lock, flags);
4182
c7151a8d 4183 if (found == 0) {
fb170fb4
JL
4184 domain_detach_iommu(domain, iommu);
4185 if (!domain_type_is_vm_or_si(domain))
4186 iommu_detach_domain(domain, iommu);
c7151a8d 4187 }
c7151a8d
WH
4188}
4189
2c2e2c38 4190static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4191{
4192 int adjust_width;
4193
4194 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4195 domain_reserve_special_ranges(domain);
4196
4197 /* calculate AGAW */
4198 domain->gaw = guest_width;
4199 adjust_width = guestwidth_to_adjustwidth(guest_width);
4200 domain->agaw = width_to_agaw(adjust_width);
4201
5e98c4b1 4202 domain->iommu_coherency = 0;
c5b15255 4203 domain->iommu_snooping = 0;
6dd9a7c7 4204 domain->iommu_superpage = 0;
fe40f1e0 4205 domain->max_addr = 0;
5e98c4b1
WH
4206
4207 /* always allocate the top pgd */
4c923d47 4208 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4209 if (!domain->pgd)
4210 return -ENOMEM;
4211 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4212 return 0;
4213}
4214
5d450806 4215static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4216{
5d450806 4217 struct dmar_domain *dmar_domain;
38717946 4218
ab8dfe25 4219 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4220 if (!dmar_domain) {
38717946 4221 printk(KERN_ERR
5d450806
JR
4222 "intel_iommu_domain_init: dmar_domain == NULL\n");
4223 return -ENOMEM;
38717946 4224 }
2c2e2c38 4225 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4226 printk(KERN_ERR
5d450806 4227 "intel_iommu_domain_init() failed\n");
92d03cc8 4228 domain_exit(dmar_domain);
5d450806 4229 return -ENOMEM;
38717946 4230 }
8140a95d 4231 domain_update_iommu_cap(dmar_domain);
5d450806 4232 domain->priv = dmar_domain;
faa3d6f5 4233
8a0e715b
JR
4234 domain->geometry.aperture_start = 0;
4235 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4236 domain->geometry.force_aperture = true;
4237
5d450806 4238 return 0;
38717946 4239}
38717946 4240
5d450806 4241static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4242{
5d450806
JR
4243 struct dmar_domain *dmar_domain = domain->priv;
4244
4245 domain->priv = NULL;
92d03cc8 4246 domain_exit(dmar_domain);
38717946 4247}
38717946 4248
4c5478c9
JR
4249static int intel_iommu_attach_device(struct iommu_domain *domain,
4250 struct device *dev)
38717946 4251{
4c5478c9 4252 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4253 struct intel_iommu *iommu;
4254 int addr_width;
156baca8 4255 u8 bus, devfn;
faa3d6f5 4256
c875d2c1
AW
4257 if (device_is_rmrr_locked(dev)) {
4258 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4259 return -EPERM;
4260 }
4261
7207d8f9
DW
4262 /* normally dev is not mapped */
4263 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4264 struct dmar_domain *old_domain;
4265
1525a29a 4266 old_domain = find_domain(dev);
faa3d6f5 4267 if (old_domain) {
ab8dfe25 4268 if (domain_type_is_vm_or_si(dmar_domain))
bf9c9eda 4269 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4270 else
4271 domain_remove_dev_info(old_domain);
4272 }
4273 }
4274
156baca8 4275 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4276 if (!iommu)
4277 return -ENODEV;
4278
4279 /* check if this iommu agaw is sufficient for max mapped address */
4280 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4281 if (addr_width > cap_mgaw(iommu->cap))
4282 addr_width = cap_mgaw(iommu->cap);
4283
4284 if (dmar_domain->max_addr > (1LL << addr_width)) {
4285 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4286 "sufficient for the mapped address (%llx)\n",
a99c47a2 4287 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4288 return -EFAULT;
4289 }
a99c47a2
TL
4290 dmar_domain->gaw = addr_width;
4291
4292 /*
4293 * Knock out extra levels of page tables if necessary
4294 */
4295 while (iommu->agaw < dmar_domain->agaw) {
4296 struct dma_pte *pte;
4297
4298 pte = dmar_domain->pgd;
4299 if (dma_pte_present(pte)) {
25cbff16
SY
4300 dmar_domain->pgd = (struct dma_pte *)
4301 phys_to_virt(dma_pte_addr(pte));
7a661013 4302 free_pgtable_page(pte);
a99c47a2
TL
4303 }
4304 dmar_domain->agaw--;
4305 }
fe40f1e0 4306
5913c9bf 4307 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4308}
38717946 4309
4c5478c9
JR
4310static void intel_iommu_detach_device(struct iommu_domain *domain,
4311 struct device *dev)
38717946 4312{
4c5478c9 4313 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4314
bf9c9eda 4315 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4316}
c7151a8d 4317
b146a1c9
JR
4318static int intel_iommu_map(struct iommu_domain *domain,
4319 unsigned long iova, phys_addr_t hpa,
5009065d 4320 size_t size, int iommu_prot)
faa3d6f5 4321{
dde57a21 4322 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4323 u64 max_addr;
dde57a21 4324 int prot = 0;
faa3d6f5 4325 int ret;
fe40f1e0 4326
dde57a21
JR
4327 if (iommu_prot & IOMMU_READ)
4328 prot |= DMA_PTE_READ;
4329 if (iommu_prot & IOMMU_WRITE)
4330 prot |= DMA_PTE_WRITE;
9cf06697
SY
4331 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4332 prot |= DMA_PTE_SNP;
dde57a21 4333
163cc52c 4334 max_addr = iova + size;
dde57a21 4335 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4336 u64 end;
4337
4338 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4339 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4340 if (end < max_addr) {
8954da1f 4341 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4342 "sufficient for the mapped address (%llx)\n",
8954da1f 4343 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4344 return -EFAULT;
4345 }
dde57a21 4346 dmar_domain->max_addr = max_addr;
fe40f1e0 4347 }
ad051221
DW
4348 /* Round up size to next multiple of PAGE_SIZE, if it and
4349 the low bits of hpa would take us onto the next page */
88cb6a74 4350 size = aligned_nrpages(hpa, size);
ad051221
DW
4351 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4352 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4353 return ret;
38717946 4354}
38717946 4355
5009065d 4356static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4357 unsigned long iova, size_t size)
38717946 4358{
dde57a21 4359 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4360 struct page *freelist = NULL;
4361 struct intel_iommu *iommu;
4362 unsigned long start_pfn, last_pfn;
4363 unsigned int npages;
4364 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4365
4366 /* Cope with horrid API which requires us to unmap more than the
4367 size argument if it happens to be a large-page mapping. */
4368 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4369 BUG();
4370
4371 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4372 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4373
ea8ea460
DW
4374 start_pfn = iova >> VTD_PAGE_SHIFT;
4375 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4376
4377 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4378
4379 npages = last_pfn - start_pfn + 1;
4380
4381 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4382 iommu = g_iommus[iommu_id];
4383
4384 /*
4385 * find bit position of dmar_domain
4386 */
4387 ndomains = cap_ndoms(iommu->cap);
4388 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4389 if (iommu->domains[num] == dmar_domain)
4390 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4391 npages, !freelist, 0);
4392 }
4393
4394 }
4395
4396 dma_free_pagelist(freelist);
fe40f1e0 4397
163cc52c
DW
4398 if (dmar_domain->max_addr == iova + size)
4399 dmar_domain->max_addr = iova;
b146a1c9 4400
5cf0a76f 4401 return size;
38717946 4402}
38717946 4403
d14d6577 4404static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4405 dma_addr_t iova)
38717946 4406{
d14d6577 4407 struct dmar_domain *dmar_domain = domain->priv;
38717946 4408 struct dma_pte *pte;
5cf0a76f 4409 int level = 0;
faa3d6f5 4410 u64 phys = 0;
38717946 4411
5cf0a76f 4412 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4413 if (pte)
faa3d6f5 4414 phys = dma_pte_addr(pte);
38717946 4415
faa3d6f5 4416 return phys;
38717946 4417}
a8bcbb0d 4418
5d587b8d 4419static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 4420{
dbb9fd86 4421 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 4422 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 4423 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 4424 return irq_remapping_enabled == 1;
dbb9fd86 4425
5d587b8d 4426 return false;
dbb9fd86
SY
4427}
4428
abdfdde2
AW
4429static int intel_iommu_add_device(struct device *dev)
4430{
a5459cfe 4431 struct intel_iommu *iommu;
abdfdde2 4432 struct iommu_group *group;
156baca8 4433 u8 bus, devfn;
70ae6f0d 4434
a5459cfe
AW
4435 iommu = device_to_iommu(dev, &bus, &devfn);
4436 if (!iommu)
70ae6f0d
AW
4437 return -ENODEV;
4438
a5459cfe 4439 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4440
e17f9ff4 4441 group = iommu_group_get_for_dev(dev);
783f157b 4442
e17f9ff4
AW
4443 if (IS_ERR(group))
4444 return PTR_ERR(group);
bcb71abe 4445
abdfdde2 4446 iommu_group_put(group);
e17f9ff4 4447 return 0;
abdfdde2 4448}
70ae6f0d 4449
abdfdde2
AW
4450static void intel_iommu_remove_device(struct device *dev)
4451{
a5459cfe
AW
4452 struct intel_iommu *iommu;
4453 u8 bus, devfn;
4454
4455 iommu = device_to_iommu(dev, &bus, &devfn);
4456 if (!iommu)
4457 return;
4458
abdfdde2 4459 iommu_group_remove_device(dev);
a5459cfe
AW
4460
4461 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4462}
4463
b22f6434 4464static const struct iommu_ops intel_iommu_ops = {
5d587b8d 4465 .capable = intel_iommu_capable,
a8bcbb0d
JR
4466 .domain_init = intel_iommu_domain_init,
4467 .domain_destroy = intel_iommu_domain_destroy,
4468 .attach_dev = intel_iommu_attach_device,
4469 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4470 .map = intel_iommu_map,
4471 .unmap = intel_iommu_unmap,
a8bcbb0d 4472 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
4473 .add_device = intel_iommu_add_device,
4474 .remove_device = intel_iommu_remove_device,
6d1c56a9 4475 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4476};
9af88143 4477
9452618e
DV
4478static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4479{
4480 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4481 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4482 dmar_map_gfx = 0;
4483}
4484
4485DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4486DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4487DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4488DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4489DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4490DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4491DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4492
d34d6517 4493static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4494{
4495 /*
4496 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4497 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4498 */
4499 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4500 rwbf_quirk = 1;
4501}
4502
4503DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4505DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4506DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4510
eecfd57f
AJ
4511#define GGC 0x52
4512#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4513#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4514#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4515#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4516#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4517#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4518#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4519#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4520
d34d6517 4521static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4522{
4523 unsigned short ggc;
4524
eecfd57f 4525 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4526 return;
4527
eecfd57f 4528 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4529 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4530 dmar_map_gfx = 0;
6fbcfb3e
DW
4531 } else if (dmar_map_gfx) {
4532 /* we have to ensure the gfx device is idle before we flush */
4533 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4534 intel_iommu_strict = 1;
4535 }
9eecabcb
DW
4536}
4537DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4538DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4539DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4540DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4541
e0fc7e0b
DW
4542/* On Tylersburg chipsets, some BIOSes have been known to enable the
4543 ISOCH DMAR unit for the Azalia sound device, but not give it any
4544 TLB entries, which causes it to deadlock. Check for that. We do
4545 this in a function called from init_dmars(), instead of in a PCI
4546 quirk, because we don't want to print the obnoxious "BIOS broken"
4547 message if VT-d is actually disabled.
4548*/
4549static void __init check_tylersburg_isoch(void)
4550{
4551 struct pci_dev *pdev;
4552 uint32_t vtisochctrl;
4553
4554 /* If there's no Azalia in the system anyway, forget it. */
4555 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4556 if (!pdev)
4557 return;
4558 pci_dev_put(pdev);
4559
4560 /* System Management Registers. Might be hidden, in which case
4561 we can't do the sanity check. But that's OK, because the
4562 known-broken BIOSes _don't_ actually hide it, so far. */
4563 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4564 if (!pdev)
4565 return;
4566
4567 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4568 pci_dev_put(pdev);
4569 return;
4570 }
4571
4572 pci_dev_put(pdev);
4573
4574 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4575 if (vtisochctrl & 1)
4576 return;
4577
4578 /* Drop all bits other than the number of TLB entries */
4579 vtisochctrl &= 0x1c;
4580
4581 /* If we have the recommended number of TLB entries (16), fine. */
4582 if (vtisochctrl == 0x10)
4583 return;
4584
4585 /* Zero TLB entries? You get to ride the short bus to school. */
4586 if (!vtisochctrl) {
4587 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4588 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4589 dmi_get_system_info(DMI_BIOS_VENDOR),
4590 dmi_get_system_info(DMI_BIOS_VERSION),
4591 dmi_get_system_info(DMI_PRODUCT_VERSION));
4592 iommu_identity_mapping |= IDENTMAP_AZALIA;
4593 return;
4594 }
4595
4596 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4597 vtisochctrl);
4598}