intel-iommu: Fix off-by-one in RMRR setup
[linux-2.6-block.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
134fac3f 39#include <linux/syscore_ops.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
825507d6
MT
49#define IS_BRIDGE_HOST_DEVICE(pdev) \
50 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
ba395927
KA
51#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 53#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
54
55#define IOAPIC_RANGE_START (0xfee00000)
56#define IOAPIC_RANGE_END (0xfeefffff)
57#define IOVA_START_ADDR (0x1000)
58
59#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
60
4ed0d3e6
FY
61#define MAX_AGAW_WIDTH 64
62
2ebe3151
DW
63#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
65
66/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
69 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 71
f27be03b 72#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 73#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 74#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 75
df08cdc7
AM
76/* page table handling */
77#define LEVEL_STRIDE (9)
78#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
79
80static inline int agaw_to_level(int agaw)
81{
82 return agaw + 2;
83}
84
85static inline int agaw_to_width(int agaw)
86{
87 return 30 + agaw * LEVEL_STRIDE;
88}
89
90static inline int width_to_agaw(int width)
91{
92 return (width - 30) / LEVEL_STRIDE;
93}
94
95static inline unsigned int level_to_offset_bits(int level)
96{
97 return (level - 1) * LEVEL_STRIDE;
98}
99
100static inline int pfn_level_offset(unsigned long pfn, int level)
101{
102 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
103}
104
105static inline unsigned long level_mask(int level)
106{
107 return -1UL << level_to_offset_bits(level);
108}
109
110static inline unsigned long level_size(int level)
111{
112 return 1UL << level_to_offset_bits(level);
113}
114
115static inline unsigned long align_to_level(unsigned long pfn, int level)
116{
117 return (pfn + level_size(level) - 1) & level_mask(level);
118}
fd18de50 119
6dd9a7c7
YS
120static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
121{
122 return 1 << ((lvl - 1) * LEVEL_STRIDE);
123}
124
dd4e8319
DW
125/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
126 are never going to work. */
127static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
128{
129 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
130}
131
132static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
133{
134 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
135}
136static inline unsigned long page_to_dma_pfn(struct page *pg)
137{
138 return mm_to_dma_pfn(page_to_pfn(pg));
139}
140static inline unsigned long virt_to_dma_pfn(void *p)
141{
142 return page_to_dma_pfn(virt_to_page(p));
143}
144
d9630fe9
WH
145/* global iommu list, set NULL for ignored DMAR units */
146static struct intel_iommu **g_iommus;
147
e0fc7e0b 148static void __init check_tylersburg_isoch(void);
9af88143
DW
149static int rwbf_quirk;
150
b779260b
JC
151/*
152 * set to 1 to panic kernel if can't successfully enable VT-d
153 * (used when kernel is launched w/ TXT)
154 */
155static int force_on = 0;
156
46b08e1a
MM
157/*
158 * 0: Present
159 * 1-11: Reserved
160 * 12-63: Context Ptr (12 - (haw-1))
161 * 64-127: Reserved
162 */
163struct root_entry {
164 u64 val;
165 u64 rsvd1;
166};
167#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
168static inline bool root_present(struct root_entry *root)
169{
170 return (root->val & 1);
171}
172static inline void set_root_present(struct root_entry *root)
173{
174 root->val |= 1;
175}
176static inline void set_root_value(struct root_entry *root, unsigned long value)
177{
178 root->val |= value & VTD_PAGE_MASK;
179}
180
181static inline struct context_entry *
182get_context_addr_from_root(struct root_entry *root)
183{
184 return (struct context_entry *)
185 (root_present(root)?phys_to_virt(
186 root->val & VTD_PAGE_MASK) :
187 NULL);
188}
189
7a8fc25e
MM
190/*
191 * low 64 bits:
192 * 0: present
193 * 1: fault processing disable
194 * 2-3: translation type
195 * 12-63: address space root
196 * high 64 bits:
197 * 0-2: address width
198 * 3-6: aval
199 * 8-23: domain id
200 */
201struct context_entry {
202 u64 lo;
203 u64 hi;
204};
c07e7d21
MM
205
206static inline bool context_present(struct context_entry *context)
207{
208 return (context->lo & 1);
209}
210static inline void context_set_present(struct context_entry *context)
211{
212 context->lo |= 1;
213}
214
215static inline void context_set_fault_enable(struct context_entry *context)
216{
217 context->lo &= (((u64)-1) << 2) | 1;
218}
219
c07e7d21
MM
220static inline void context_set_translation_type(struct context_entry *context,
221 unsigned long value)
222{
223 context->lo &= (((u64)-1) << 4) | 3;
224 context->lo |= (value & 3) << 2;
225}
226
227static inline void context_set_address_root(struct context_entry *context,
228 unsigned long value)
229{
230 context->lo |= value & VTD_PAGE_MASK;
231}
232
233static inline void context_set_address_width(struct context_entry *context,
234 unsigned long value)
235{
236 context->hi |= value & 7;
237}
238
239static inline void context_set_domain_id(struct context_entry *context,
240 unsigned long value)
241{
242 context->hi |= (value & ((1 << 16) - 1)) << 8;
243}
244
245static inline void context_clear_entry(struct context_entry *context)
246{
247 context->lo = 0;
248 context->hi = 0;
249}
7a8fc25e 250
622ba12a
MM
251/*
252 * 0: readable
253 * 1: writable
254 * 2-6: reserved
255 * 7: super page
9cf06697
SY
256 * 8-10: available
257 * 11: snoop behavior
622ba12a
MM
258 * 12-63: Host physcial address
259 */
260struct dma_pte {
261 u64 val;
262};
622ba12a 263
19c239ce
MM
264static inline void dma_clear_pte(struct dma_pte *pte)
265{
266 pte->val = 0;
267}
268
269static inline void dma_set_pte_readable(struct dma_pte *pte)
270{
271 pte->val |= DMA_PTE_READ;
272}
273
274static inline void dma_set_pte_writable(struct dma_pte *pte)
275{
276 pte->val |= DMA_PTE_WRITE;
277}
278
9cf06697
SY
279static inline void dma_set_pte_snp(struct dma_pte *pte)
280{
281 pte->val |= DMA_PTE_SNP;
282}
283
19c239ce
MM
284static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
285{
286 pte->val = (pte->val & ~3) | (prot & 3);
287}
288
289static inline u64 dma_pte_addr(struct dma_pte *pte)
290{
c85994e4
DW
291#ifdef CONFIG_64BIT
292 return pte->val & VTD_PAGE_MASK;
293#else
294 /* Must have a full atomic 64-bit read */
1a8bd481 295 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 296#endif
19c239ce
MM
297}
298
dd4e8319 299static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 300{
dd4e8319 301 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
302}
303
304static inline bool dma_pte_present(struct dma_pte *pte)
305{
306 return (pte->val & 3) != 0;
307}
622ba12a 308
75e6bf96
DW
309static inline int first_pte_in_page(struct dma_pte *pte)
310{
311 return !((unsigned long)pte & ~VTD_PAGE_MASK);
312}
313
2c2e2c38
FY
314/*
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
319 */
19943b0e
DW
320static struct dmar_domain *si_domain;
321static int hw_pass_through = 1;
2c2e2c38 322
3b5410e7 323/* devices under the same p2p bridge are owned in one domain */
cdc7b837 324#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 325
1ce28feb
WH
326/* domain represents a virtual machine, more than one devices
327 * across iommus may be owned in one domain, e.g. kvm guest.
328 */
329#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
330
2c2e2c38
FY
331/* si_domain contains mulitple devices */
332#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
333
99126f7c
MM
334struct dmar_domain {
335 int id; /* domain id */
4c923d47 336 int nid; /* node id */
8c11e798 337 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
338
339 struct list_head devices; /* all devices' list */
340 struct iova_domain iovad; /* iova's that belong to this domain */
341
342 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
343 int gaw; /* max guest address width */
344
345 /* adjusted guest address width, 0 is level 2 30-bit */
346 int agaw;
347
3b5410e7 348 int flags; /* flags to find out type of domain */
8e604097
WH
349
350 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 351 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 352 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
353 int iommu_superpage;/* Level of superpages supported:
354 0 == 4KiB (no superpages), 1 == 2MiB,
355 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 356 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 357 u64 max_addr; /* maximum mapped address */
99126f7c
MM
358};
359
a647dacb
MM
360/* PCI domain-device relationship */
361struct device_domain_info {
362 struct list_head link; /* link to domain siblings */
363 struct list_head global; /* link to global list */
276dbf99
DW
364 int segment; /* PCI domain */
365 u8 bus; /* PCI bus number */
a647dacb 366 u8 devfn; /* PCI devfn number */
45e829ea 367 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 368 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
369 struct dmar_domain *domain; /* pointer to domain */
370};
371
5e0d2a6f 372static void flush_unmaps_timeout(unsigned long data);
373
374DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
375
80b20dd8 376#define HIGH_WATER_MARK 250
377struct deferred_flush_tables {
378 int next;
379 struct iova *iova[HIGH_WATER_MARK];
380 struct dmar_domain *domain[HIGH_WATER_MARK];
381};
382
383static struct deferred_flush_tables *deferred_flush;
384
5e0d2a6f 385/* bitmap for indexing intel_iommus */
5e0d2a6f 386static int g_num_of_iommus;
387
388static DEFINE_SPINLOCK(async_umap_flush_lock);
389static LIST_HEAD(unmaps_to_do);
390
391static int timer_on;
392static long list_size;
5e0d2a6f 393
ba395927
KA
394static void domain_remove_dev_info(struct dmar_domain *domain);
395
0cd5c3c8
KM
396#ifdef CONFIG_DMAR_DEFAULT_ON
397int dmar_disabled = 0;
398#else
399int dmar_disabled = 1;
400#endif /*CONFIG_DMAR_DEFAULT_ON*/
401
2d9e667e 402static int dmar_map_gfx = 1;
7d3b03ce 403static int dmar_forcedac;
5e0d2a6f 404static int intel_iommu_strict;
6dd9a7c7 405static int intel_iommu_superpage = 1;
ba395927
KA
406
407#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
408static DEFINE_SPINLOCK(device_domain_lock);
409static LIST_HEAD(device_domain_list);
410
a8bcbb0d
JR
411static struct iommu_ops intel_iommu_ops;
412
ba395927
KA
413static int __init intel_iommu_setup(char *str)
414{
415 if (!str)
416 return -EINVAL;
417 while (*str) {
0cd5c3c8
KM
418 if (!strncmp(str, "on", 2)) {
419 dmar_disabled = 0;
420 printk(KERN_INFO "Intel-IOMMU: enabled\n");
421 } else if (!strncmp(str, "off", 3)) {
ba395927 422 dmar_disabled = 1;
0cd5c3c8 423 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
424 } else if (!strncmp(str, "igfx_off", 8)) {
425 dmar_map_gfx = 0;
426 printk(KERN_INFO
427 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 428 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 429 printk(KERN_INFO
7d3b03ce
KA
430 "Intel-IOMMU: Forcing DAC for PCI devices\n");
431 dmar_forcedac = 1;
5e0d2a6f 432 } else if (!strncmp(str, "strict", 6)) {
433 printk(KERN_INFO
434 "Intel-IOMMU: disable batched IOTLB flush\n");
435 intel_iommu_strict = 1;
6dd9a7c7
YS
436 } else if (!strncmp(str, "sp_off", 6)) {
437 printk(KERN_INFO
438 "Intel-IOMMU: disable supported super page\n");
439 intel_iommu_superpage = 0;
ba395927
KA
440 }
441
442 str += strcspn(str, ",");
443 while (*str == ',')
444 str++;
445 }
446 return 0;
447}
448__setup("intel_iommu=", intel_iommu_setup);
449
450static struct kmem_cache *iommu_domain_cache;
451static struct kmem_cache *iommu_devinfo_cache;
452static struct kmem_cache *iommu_iova_cache;
453
4c923d47 454static inline void *alloc_pgtable_page(int node)
eb3fa7cb 455{
4c923d47
SS
456 struct page *page;
457 void *vaddr = NULL;
eb3fa7cb 458
4c923d47
SS
459 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
460 if (page)
461 vaddr = page_address(page);
eb3fa7cb 462 return vaddr;
ba395927
KA
463}
464
465static inline void free_pgtable_page(void *vaddr)
466{
467 free_page((unsigned long)vaddr);
468}
469
470static inline void *alloc_domain_mem(void)
471{
354bb65e 472 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
473}
474
38717946 475static void free_domain_mem(void *vaddr)
ba395927
KA
476{
477 kmem_cache_free(iommu_domain_cache, vaddr);
478}
479
480static inline void * alloc_devinfo_mem(void)
481{
354bb65e 482 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
483}
484
485static inline void free_devinfo_mem(void *vaddr)
486{
487 kmem_cache_free(iommu_devinfo_cache, vaddr);
488}
489
490struct iova *alloc_iova_mem(void)
491{
354bb65e 492 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
493}
494
495void free_iova_mem(struct iova *iova)
496{
497 kmem_cache_free(iommu_iova_cache, iova);
498}
499
1b573683 500
4ed0d3e6 501static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
502{
503 unsigned long sagaw;
504 int agaw = -1;
505
506 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 507 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
508 agaw >= 0; agaw--) {
509 if (test_bit(agaw, &sagaw))
510 break;
511 }
512
513 return agaw;
514}
515
4ed0d3e6
FY
516/*
517 * Calculate max SAGAW for each iommu.
518 */
519int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
520{
521 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
522}
523
524/*
525 * calculate agaw for each iommu.
526 * "SAGAW" may be different across iommus, use a default agaw, and
527 * get a supported less agaw for iommus that don't support the default agaw.
528 */
529int iommu_calculate_agaw(struct intel_iommu *iommu)
530{
531 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
532}
533
2c2e2c38 534/* This functionin only returns single iommu in a domain */
8c11e798
WH
535static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
536{
537 int iommu_id;
538
2c2e2c38 539 /* si_domain and vm domain should not get here. */
1ce28feb 540 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 541 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 542
8c11e798
WH
543 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
544 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
545 return NULL;
546
547 return g_iommus[iommu_id];
548}
549
8e604097
WH
550static void domain_update_iommu_coherency(struct dmar_domain *domain)
551{
552 int i;
553
554 domain->iommu_coherency = 1;
555
a45946ab 556 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
557 if (!ecap_coherent(g_iommus[i]->ecap)) {
558 domain->iommu_coherency = 0;
559 break;
560 }
8e604097
WH
561 }
562}
563
58c610bd
SY
564static void domain_update_iommu_snooping(struct dmar_domain *domain)
565{
566 int i;
567
568 domain->iommu_snooping = 1;
569
a45946ab 570 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
571 if (!ecap_sc_support(g_iommus[i]->ecap)) {
572 domain->iommu_snooping = 0;
573 break;
574 }
58c610bd
SY
575 }
576}
577
6dd9a7c7
YS
578static void domain_update_iommu_superpage(struct dmar_domain *domain)
579{
580 int i, mask = 0xf;
581
582 if (!intel_iommu_superpage) {
583 domain->iommu_superpage = 0;
584 return;
585 }
586
587 domain->iommu_superpage = 4; /* 1TiB */
588
589 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
590 mask |= cap_super_page_val(g_iommus[i]->cap);
591 if (!mask) {
592 break;
593 }
594 }
595 domain->iommu_superpage = fls(mask);
596}
597
58c610bd
SY
598/* Some capabilities may be different across iommus */
599static void domain_update_iommu_cap(struct dmar_domain *domain)
600{
601 domain_update_iommu_coherency(domain);
602 domain_update_iommu_snooping(domain);
6dd9a7c7 603 domain_update_iommu_superpage(domain);
58c610bd
SY
604}
605
276dbf99 606static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
607{
608 struct dmar_drhd_unit *drhd = NULL;
609 int i;
610
611 for_each_drhd_unit(drhd) {
612 if (drhd->ignored)
613 continue;
276dbf99
DW
614 if (segment != drhd->segment)
615 continue;
c7151a8d 616
924b6231 617 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
618 if (drhd->devices[i] &&
619 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
620 drhd->devices[i]->devfn == devfn)
621 return drhd->iommu;
4958c5dc
DW
622 if (drhd->devices[i] &&
623 drhd->devices[i]->subordinate &&
924b6231
DW
624 drhd->devices[i]->subordinate->number <= bus &&
625 drhd->devices[i]->subordinate->subordinate >= bus)
626 return drhd->iommu;
627 }
c7151a8d
WH
628
629 if (drhd->include_all)
630 return drhd->iommu;
631 }
632
633 return NULL;
634}
635
5331fe6f
WH
636static void domain_flush_cache(struct dmar_domain *domain,
637 void *addr, int size)
638{
639 if (!domain->iommu_coherency)
640 clflush_cache_range(addr, size);
641}
642
ba395927
KA
643/* Gets context entry for a given bus and devfn */
644static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
645 u8 bus, u8 devfn)
646{
647 struct root_entry *root;
648 struct context_entry *context;
649 unsigned long phy_addr;
650 unsigned long flags;
651
652 spin_lock_irqsave(&iommu->lock, flags);
653 root = &iommu->root_entry[bus];
654 context = get_context_addr_from_root(root);
655 if (!context) {
4c923d47
SS
656 context = (struct context_entry *)
657 alloc_pgtable_page(iommu->node);
ba395927
KA
658 if (!context) {
659 spin_unlock_irqrestore(&iommu->lock, flags);
660 return NULL;
661 }
5b6985ce 662 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
663 phy_addr = virt_to_phys((void *)context);
664 set_root_value(root, phy_addr);
665 set_root_present(root);
666 __iommu_flush_cache(iommu, root, sizeof(*root));
667 }
668 spin_unlock_irqrestore(&iommu->lock, flags);
669 return &context[devfn];
670}
671
672static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
673{
674 struct root_entry *root;
675 struct context_entry *context;
676 int ret;
677 unsigned long flags;
678
679 spin_lock_irqsave(&iommu->lock, flags);
680 root = &iommu->root_entry[bus];
681 context = get_context_addr_from_root(root);
682 if (!context) {
683 ret = 0;
684 goto out;
685 }
c07e7d21 686 ret = context_present(&context[devfn]);
ba395927
KA
687out:
688 spin_unlock_irqrestore(&iommu->lock, flags);
689 return ret;
690}
691
692static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
693{
694 struct root_entry *root;
695 struct context_entry *context;
696 unsigned long flags;
697
698 spin_lock_irqsave(&iommu->lock, flags);
699 root = &iommu->root_entry[bus];
700 context = get_context_addr_from_root(root);
701 if (context) {
c07e7d21 702 context_clear_entry(&context[devfn]);
ba395927
KA
703 __iommu_flush_cache(iommu, &context[devfn], \
704 sizeof(*context));
705 }
706 spin_unlock_irqrestore(&iommu->lock, flags);
707}
708
709static void free_context_table(struct intel_iommu *iommu)
710{
711 struct root_entry *root;
712 int i;
713 unsigned long flags;
714 struct context_entry *context;
715
716 spin_lock_irqsave(&iommu->lock, flags);
717 if (!iommu->root_entry) {
718 goto out;
719 }
720 for (i = 0; i < ROOT_ENTRY_NR; i++) {
721 root = &iommu->root_entry[i];
722 context = get_context_addr_from_root(root);
723 if (context)
724 free_pgtable_page(context);
725 }
726 free_pgtable_page(iommu->root_entry);
727 iommu->root_entry = NULL;
728out:
729 spin_unlock_irqrestore(&iommu->lock, flags);
730}
731
b026fd28 732static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
6dd9a7c7 733 unsigned long pfn, int large_level)
ba395927 734{
b026fd28 735 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
736 struct dma_pte *parent, *pte = NULL;
737 int level = agaw_to_level(domain->agaw);
6dd9a7c7 738 int offset, target_level;
ba395927
KA
739
740 BUG_ON(!domain->pgd);
b026fd28 741 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
742 parent = domain->pgd;
743
6dd9a7c7
YS
744 /* Search pte */
745 if (!large_level)
746 target_level = 1;
747 else
748 target_level = large_level;
749
ba395927
KA
750 while (level > 0) {
751 void *tmp_page;
752
b026fd28 753 offset = pfn_level_offset(pfn, level);
ba395927 754 pte = &parent[offset];
6dd9a7c7
YS
755 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
756 break;
757 if (level == target_level)
ba395927
KA
758 break;
759
19c239ce 760 if (!dma_pte_present(pte)) {
c85994e4
DW
761 uint64_t pteval;
762
4c923d47 763 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 764
206a73c1 765 if (!tmp_page)
ba395927 766 return NULL;
206a73c1 767
c85994e4 768 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 769 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
770 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
771 /* Someone else set it while we were thinking; use theirs. */
772 free_pgtable_page(tmp_page);
773 } else {
774 dma_pte_addr(pte);
775 domain_flush_cache(domain, pte, sizeof(*pte));
776 }
ba395927 777 }
19c239ce 778 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
779 level--;
780 }
781
ba395927
KA
782 return pte;
783}
784
6dd9a7c7 785
ba395927 786/* return address's pte at specific level */
90dcfb5e
DW
787static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
788 unsigned long pfn,
6dd9a7c7 789 int level, int *large_page)
ba395927
KA
790{
791 struct dma_pte *parent, *pte = NULL;
792 int total = agaw_to_level(domain->agaw);
793 int offset;
794
795 parent = domain->pgd;
796 while (level <= total) {
90dcfb5e 797 offset = pfn_level_offset(pfn, total);
ba395927
KA
798 pte = &parent[offset];
799 if (level == total)
800 return pte;
801
6dd9a7c7
YS
802 if (!dma_pte_present(pte)) {
803 *large_page = total;
ba395927 804 break;
6dd9a7c7
YS
805 }
806
807 if (pte->val & DMA_PTE_LARGE_PAGE) {
808 *large_page = total;
809 return pte;
810 }
811
19c239ce 812 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
813 total--;
814 }
815 return NULL;
816}
817
ba395927 818/* clear last level pte, a tlb flush should be followed */
595badf5
DW
819static void dma_pte_clear_range(struct dmar_domain *domain,
820 unsigned long start_pfn,
821 unsigned long last_pfn)
ba395927 822{
04b18e65 823 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 824 unsigned int large_page = 1;
310a5ab9 825 struct dma_pte *first_pte, *pte;
66eae846 826
04b18e65 827 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 828 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 829 BUG_ON(start_pfn > last_pfn);
ba395927 830
04b18e65 831 /* we don't need lock here; nobody else touches the iova range */
59c36286 832 do {
6dd9a7c7
YS
833 large_page = 1;
834 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 835 if (!pte) {
6dd9a7c7 836 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
837 continue;
838 }
6dd9a7c7 839 do {
310a5ab9 840 dma_clear_pte(pte);
6dd9a7c7 841 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 842 pte++;
75e6bf96
DW
843 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
844
310a5ab9
DW
845 domain_flush_cache(domain, first_pte,
846 (void *)pte - (void *)first_pte);
59c36286
DW
847
848 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
849}
850
851/* free page table pages. last level pte should already be cleared */
852static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
853 unsigned long start_pfn,
854 unsigned long last_pfn)
ba395927 855{
6660c63a 856 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 857 struct dma_pte *first_pte, *pte;
ba395927
KA
858 int total = agaw_to_level(domain->agaw);
859 int level;
6660c63a 860 unsigned long tmp;
6dd9a7c7 861 int large_page = 2;
ba395927 862
6660c63a
DW
863 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
864 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 865 BUG_ON(start_pfn > last_pfn);
ba395927 866
f3a0a52f 867 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
868 level = 2;
869 while (level <= total) {
6660c63a
DW
870 tmp = align_to_level(start_pfn, level);
871
f3a0a52f 872 /* If we can't even clear one PTE at this level, we're done */
6660c63a 873 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
874 return;
875
59c36286 876 do {
6dd9a7c7
YS
877 large_page = level;
878 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
879 if (large_page > level)
880 level = large_page + 1;
f3a0a52f
DW
881 if (!pte) {
882 tmp = align_to_level(tmp + 1, level + 1);
883 continue;
884 }
75e6bf96 885 do {
6a43e574
DW
886 if (dma_pte_present(pte)) {
887 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
888 dma_clear_pte(pte);
889 }
f3a0a52f
DW
890 pte++;
891 tmp += level_size(level);
75e6bf96
DW
892 } while (!first_pte_in_page(pte) &&
893 tmp + level_size(level) - 1 <= last_pfn);
894
f3a0a52f
DW
895 domain_flush_cache(domain, first_pte,
896 (void *)pte - (void *)first_pte);
897
59c36286 898 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
899 level++;
900 }
901 /* free pgd */
d794dc9b 902 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
903 free_pgtable_page(domain->pgd);
904 domain->pgd = NULL;
905 }
906}
907
908/* iommu handling */
909static int iommu_alloc_root_entry(struct intel_iommu *iommu)
910{
911 struct root_entry *root;
912 unsigned long flags;
913
4c923d47 914 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
915 if (!root)
916 return -ENOMEM;
917
5b6985ce 918 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
919
920 spin_lock_irqsave(&iommu->lock, flags);
921 iommu->root_entry = root;
922 spin_unlock_irqrestore(&iommu->lock, flags);
923
924 return 0;
925}
926
ba395927
KA
927static void iommu_set_root_entry(struct intel_iommu *iommu)
928{
929 void *addr;
c416daa9 930 u32 sts;
ba395927
KA
931 unsigned long flag;
932
933 addr = iommu->root_entry;
934
935 spin_lock_irqsave(&iommu->register_lock, flag);
936 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
937
c416daa9 938 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
939
940 /* Make sure hardware complete it */
941 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 942 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
943
944 spin_unlock_irqrestore(&iommu->register_lock, flag);
945}
946
947static void iommu_flush_write_buffer(struct intel_iommu *iommu)
948{
949 u32 val;
950 unsigned long flag;
951
9af88143 952 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 953 return;
ba395927
KA
954
955 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 956 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
957
958 /* Make sure hardware complete it */
959 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 960 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
961
962 spin_unlock_irqrestore(&iommu->register_lock, flag);
963}
964
965/* return value determine if we need a write buffer flush */
4c25a2c1
DW
966static void __iommu_flush_context(struct intel_iommu *iommu,
967 u16 did, u16 source_id, u8 function_mask,
968 u64 type)
ba395927
KA
969{
970 u64 val = 0;
971 unsigned long flag;
972
ba395927
KA
973 switch (type) {
974 case DMA_CCMD_GLOBAL_INVL:
975 val = DMA_CCMD_GLOBAL_INVL;
976 break;
977 case DMA_CCMD_DOMAIN_INVL:
978 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
979 break;
980 case DMA_CCMD_DEVICE_INVL:
981 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
982 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
983 break;
984 default:
985 BUG();
986 }
987 val |= DMA_CCMD_ICC;
988
989 spin_lock_irqsave(&iommu->register_lock, flag);
990 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
991
992 /* Make sure hardware complete it */
993 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
994 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
995
996 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
997}
998
ba395927 999/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1000static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1001 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1002{
1003 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1004 u64 val = 0, val_iva = 0;
1005 unsigned long flag;
1006
ba395927
KA
1007 switch (type) {
1008 case DMA_TLB_GLOBAL_FLUSH:
1009 /* global flush doesn't need set IVA_REG */
1010 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1011 break;
1012 case DMA_TLB_DSI_FLUSH:
1013 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1014 break;
1015 case DMA_TLB_PSI_FLUSH:
1016 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1017 /* Note: always flush non-leaf currently */
1018 val_iva = size_order | addr;
1019 break;
1020 default:
1021 BUG();
1022 }
1023 /* Note: set drain read/write */
1024#if 0
1025 /*
1026 * This is probably to be super secure.. Looks like we can
1027 * ignore it without any impact.
1028 */
1029 if (cap_read_drain(iommu->cap))
1030 val |= DMA_TLB_READ_DRAIN;
1031#endif
1032 if (cap_write_drain(iommu->cap))
1033 val |= DMA_TLB_WRITE_DRAIN;
1034
1035 spin_lock_irqsave(&iommu->register_lock, flag);
1036 /* Note: Only uses first TLB reg currently */
1037 if (val_iva)
1038 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1039 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1040
1041 /* Make sure hardware complete it */
1042 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1043 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1044
1045 spin_unlock_irqrestore(&iommu->register_lock, flag);
1046
1047 /* check IOTLB invalidation granularity */
1048 if (DMA_TLB_IAIG(val) == 0)
1049 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1050 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1051 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1052 (unsigned long long)DMA_TLB_IIRG(type),
1053 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1054}
1055
93a23a72
YZ
1056static struct device_domain_info *iommu_support_dev_iotlb(
1057 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1058{
1059 int found = 0;
1060 unsigned long flags;
1061 struct device_domain_info *info;
1062 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1063
1064 if (!ecap_dev_iotlb_support(iommu->ecap))
1065 return NULL;
1066
1067 if (!iommu->qi)
1068 return NULL;
1069
1070 spin_lock_irqsave(&device_domain_lock, flags);
1071 list_for_each_entry(info, &domain->devices, link)
1072 if (info->bus == bus && info->devfn == devfn) {
1073 found = 1;
1074 break;
1075 }
1076 spin_unlock_irqrestore(&device_domain_lock, flags);
1077
1078 if (!found || !info->dev)
1079 return NULL;
1080
1081 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1082 return NULL;
1083
1084 if (!dmar_find_matched_atsr_unit(info->dev))
1085 return NULL;
1086
1087 info->iommu = iommu;
1088
1089 return info;
1090}
1091
1092static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1093{
93a23a72
YZ
1094 if (!info)
1095 return;
1096
1097 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1098}
1099
1100static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1101{
1102 if (!info->dev || !pci_ats_enabled(info->dev))
1103 return;
1104
1105 pci_disable_ats(info->dev);
1106}
1107
1108static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1109 u64 addr, unsigned mask)
1110{
1111 u16 sid, qdep;
1112 unsigned long flags;
1113 struct device_domain_info *info;
1114
1115 spin_lock_irqsave(&device_domain_lock, flags);
1116 list_for_each_entry(info, &domain->devices, link) {
1117 if (!info->dev || !pci_ats_enabled(info->dev))
1118 continue;
1119
1120 sid = info->bus << 8 | info->devfn;
1121 qdep = pci_ats_queue_depth(info->dev);
1122 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1123 }
1124 spin_unlock_irqrestore(&device_domain_lock, flags);
1125}
1126
1f0ef2aa 1127static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1128 unsigned long pfn, unsigned int pages, int map)
ba395927 1129{
9dd2fe89 1130 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1131 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1132
ba395927
KA
1133 BUG_ON(pages == 0);
1134
ba395927 1135 /*
9dd2fe89
YZ
1136 * Fallback to domain selective flush if no PSI support or the size is
1137 * too big.
ba395927
KA
1138 * PSI requires page size to be 2 ^ x, and the base address is naturally
1139 * aligned to the size
1140 */
9dd2fe89
YZ
1141 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1142 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1143 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1144 else
1145 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1146 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1147
1148 /*
82653633
NA
1149 * In caching mode, changes of pages from non-present to present require
1150 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1151 */
82653633 1152 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1153 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1154}
1155
f8bab735 1156static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1157{
1158 u32 pmen;
1159 unsigned long flags;
1160
1161 spin_lock_irqsave(&iommu->register_lock, flags);
1162 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1163 pmen &= ~DMA_PMEN_EPM;
1164 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1165
1166 /* wait for the protected region status bit to clear */
1167 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1168 readl, !(pmen & DMA_PMEN_PRS), pmen);
1169
1170 spin_unlock_irqrestore(&iommu->register_lock, flags);
1171}
1172
ba395927
KA
1173static int iommu_enable_translation(struct intel_iommu *iommu)
1174{
1175 u32 sts;
1176 unsigned long flags;
1177
1178 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1179 iommu->gcmd |= DMA_GCMD_TE;
1180 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1181
1182 /* Make sure hardware complete it */
1183 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1184 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1185
ba395927
KA
1186 spin_unlock_irqrestore(&iommu->register_lock, flags);
1187 return 0;
1188}
1189
1190static int iommu_disable_translation(struct intel_iommu *iommu)
1191{
1192 u32 sts;
1193 unsigned long flag;
1194
1195 spin_lock_irqsave(&iommu->register_lock, flag);
1196 iommu->gcmd &= ~DMA_GCMD_TE;
1197 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1198
1199 /* Make sure hardware complete it */
1200 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1201 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1202
1203 spin_unlock_irqrestore(&iommu->register_lock, flag);
1204 return 0;
1205}
1206
3460a6d9 1207
ba395927
KA
1208static int iommu_init_domains(struct intel_iommu *iommu)
1209{
1210 unsigned long ndomains;
1211 unsigned long nlongs;
1212
1213 ndomains = cap_ndoms(iommu->cap);
680a7524
YL
1214 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1215 ndomains);
ba395927
KA
1216 nlongs = BITS_TO_LONGS(ndomains);
1217
94a91b50
DD
1218 spin_lock_init(&iommu->lock);
1219
ba395927
KA
1220 /* TBD: there might be 64K domains,
1221 * consider other allocation for future chip
1222 */
1223 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1224 if (!iommu->domain_ids) {
1225 printk(KERN_ERR "Allocating domain id array failed\n");
1226 return -ENOMEM;
1227 }
1228 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1229 GFP_KERNEL);
1230 if (!iommu->domains) {
1231 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1232 return -ENOMEM;
1233 }
1234
1235 /*
1236 * if Caching mode is set, then invalid translations are tagged
1237 * with domainid 0. Hence we need to pre-allocate it.
1238 */
1239 if (cap_caching_mode(iommu->cap))
1240 set_bit(0, iommu->domain_ids);
1241 return 0;
1242}
ba395927 1243
ba395927
KA
1244
1245static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1246static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1247
1248void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1249{
1250 struct dmar_domain *domain;
1251 int i;
c7151a8d 1252 unsigned long flags;
ba395927 1253
94a91b50 1254 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1255 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1256 domain = iommu->domains[i];
1257 clear_bit(i, iommu->domain_ids);
1258
1259 spin_lock_irqsave(&domain->iommu_lock, flags);
1260 if (--domain->iommu_count == 0) {
1261 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1262 vm_domain_exit(domain);
1263 else
1264 domain_exit(domain);
1265 }
1266 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1267 }
ba395927
KA
1268 }
1269
1270 if (iommu->gcmd & DMA_GCMD_TE)
1271 iommu_disable_translation(iommu);
1272
1273 if (iommu->irq) {
dced35ae 1274 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1275 /* This will mask the irq */
1276 free_irq(iommu->irq, iommu);
1277 destroy_irq(iommu->irq);
1278 }
1279
1280 kfree(iommu->domains);
1281 kfree(iommu->domain_ids);
1282
d9630fe9
WH
1283 g_iommus[iommu->seq_id] = NULL;
1284
1285 /* if all iommus are freed, free g_iommus */
1286 for (i = 0; i < g_num_of_iommus; i++) {
1287 if (g_iommus[i])
1288 break;
1289 }
1290
1291 if (i == g_num_of_iommus)
1292 kfree(g_iommus);
1293
ba395927
KA
1294 /* free context mapping */
1295 free_context_table(iommu);
ba395927
KA
1296}
1297
2c2e2c38 1298static struct dmar_domain *alloc_domain(void)
ba395927 1299{
ba395927 1300 struct dmar_domain *domain;
ba395927
KA
1301
1302 domain = alloc_domain_mem();
1303 if (!domain)
1304 return NULL;
1305
4c923d47 1306 domain->nid = -1;
2c2e2c38
FY
1307 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1308 domain->flags = 0;
1309
1310 return domain;
1311}
1312
1313static int iommu_attach_domain(struct dmar_domain *domain,
1314 struct intel_iommu *iommu)
1315{
1316 int num;
1317 unsigned long ndomains;
1318 unsigned long flags;
1319
ba395927
KA
1320 ndomains = cap_ndoms(iommu->cap);
1321
1322 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1323
ba395927
KA
1324 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1325 if (num >= ndomains) {
1326 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1327 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1328 return -ENOMEM;
ba395927
KA
1329 }
1330
ba395927 1331 domain->id = num;
2c2e2c38 1332 set_bit(num, iommu->domain_ids);
8c11e798 1333 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1334 iommu->domains[num] = domain;
1335 spin_unlock_irqrestore(&iommu->lock, flags);
1336
2c2e2c38 1337 return 0;
ba395927
KA
1338}
1339
2c2e2c38
FY
1340static void iommu_detach_domain(struct dmar_domain *domain,
1341 struct intel_iommu *iommu)
ba395927
KA
1342{
1343 unsigned long flags;
2c2e2c38
FY
1344 int num, ndomains;
1345 int found = 0;
ba395927 1346
8c11e798 1347 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1348 ndomains = cap_ndoms(iommu->cap);
a45946ab 1349 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1350 if (iommu->domains[num] == domain) {
1351 found = 1;
1352 break;
1353 }
2c2e2c38
FY
1354 }
1355
1356 if (found) {
1357 clear_bit(num, iommu->domain_ids);
1358 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1359 iommu->domains[num] = NULL;
1360 }
8c11e798 1361 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1362}
1363
1364static struct iova_domain reserved_iova_list;
8a443df4 1365static struct lock_class_key reserved_rbtree_key;
ba395927 1366
51a63e67 1367static int dmar_init_reserved_ranges(void)
ba395927
KA
1368{
1369 struct pci_dev *pdev = NULL;
1370 struct iova *iova;
1371 int i;
ba395927 1372
f661197e 1373 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1374
8a443df4
MG
1375 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1376 &reserved_rbtree_key);
1377
ba395927
KA
1378 /* IOAPIC ranges shouldn't be accessed by DMA */
1379 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1380 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1381 if (!iova) {
ba395927 1382 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1383 return -ENODEV;
1384 }
ba395927
KA
1385
1386 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1387 for_each_pci_dev(pdev) {
1388 struct resource *r;
1389
1390 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1391 r = &pdev->resource[i];
1392 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1393 continue;
1a4a4551
DW
1394 iova = reserve_iova(&reserved_iova_list,
1395 IOVA_PFN(r->start),
1396 IOVA_PFN(r->end));
51a63e67 1397 if (!iova) {
ba395927 1398 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1399 return -ENODEV;
1400 }
ba395927
KA
1401 }
1402 }
51a63e67 1403 return 0;
ba395927
KA
1404}
1405
1406static void domain_reserve_special_ranges(struct dmar_domain *domain)
1407{
1408 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1409}
1410
1411static inline int guestwidth_to_adjustwidth(int gaw)
1412{
1413 int agaw;
1414 int r = (gaw - 12) % 9;
1415
1416 if (r == 0)
1417 agaw = gaw;
1418 else
1419 agaw = gaw + 9 - r;
1420 if (agaw > 64)
1421 agaw = 64;
1422 return agaw;
1423}
1424
1425static int domain_init(struct dmar_domain *domain, int guest_width)
1426{
1427 struct intel_iommu *iommu;
1428 int adjust_width, agaw;
1429 unsigned long sagaw;
1430
f661197e 1431 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1432 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1433
1434 domain_reserve_special_ranges(domain);
1435
1436 /* calculate AGAW */
8c11e798 1437 iommu = domain_get_iommu(domain);
ba395927
KA
1438 if (guest_width > cap_mgaw(iommu->cap))
1439 guest_width = cap_mgaw(iommu->cap);
1440 domain->gaw = guest_width;
1441 adjust_width = guestwidth_to_adjustwidth(guest_width);
1442 agaw = width_to_agaw(adjust_width);
1443 sagaw = cap_sagaw(iommu->cap);
1444 if (!test_bit(agaw, &sagaw)) {
1445 /* hardware doesn't support it, choose a bigger one */
1446 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1447 agaw = find_next_bit(&sagaw, 5, agaw);
1448 if (agaw >= 5)
1449 return -ENODEV;
1450 }
1451 domain->agaw = agaw;
1452 INIT_LIST_HEAD(&domain->devices);
1453
8e604097
WH
1454 if (ecap_coherent(iommu->ecap))
1455 domain->iommu_coherency = 1;
1456 else
1457 domain->iommu_coherency = 0;
1458
58c610bd
SY
1459 if (ecap_sc_support(iommu->ecap))
1460 domain->iommu_snooping = 1;
1461 else
1462 domain->iommu_snooping = 0;
1463
6dd9a7c7 1464 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
c7151a8d 1465 domain->iommu_count = 1;
4c923d47 1466 domain->nid = iommu->node;
c7151a8d 1467
ba395927 1468 /* always allocate the top pgd */
4c923d47 1469 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1470 if (!domain->pgd)
1471 return -ENOMEM;
5b6985ce 1472 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1473 return 0;
1474}
1475
1476static void domain_exit(struct dmar_domain *domain)
1477{
2c2e2c38
FY
1478 struct dmar_drhd_unit *drhd;
1479 struct intel_iommu *iommu;
ba395927
KA
1480
1481 /* Domain 0 is reserved, so dont process it */
1482 if (!domain)
1483 return;
1484
7b668357
AW
1485 /* Flush any lazy unmaps that may reference this domain */
1486 if (!intel_iommu_strict)
1487 flush_unmaps_timeout(0);
1488
ba395927
KA
1489 domain_remove_dev_info(domain);
1490 /* destroy iovas */
1491 put_iova_domain(&domain->iovad);
ba395927
KA
1492
1493 /* clear ptes */
595badf5 1494 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1495
1496 /* free page tables */
d794dc9b 1497 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1498
2c2e2c38
FY
1499 for_each_active_iommu(iommu, drhd)
1500 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1501 iommu_detach_domain(domain, iommu);
1502
ba395927
KA
1503 free_domain_mem(domain);
1504}
1505
4ed0d3e6
FY
1506static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1507 u8 bus, u8 devfn, int translation)
ba395927
KA
1508{
1509 struct context_entry *context;
ba395927 1510 unsigned long flags;
5331fe6f 1511 struct intel_iommu *iommu;
ea6606b0
WH
1512 struct dma_pte *pgd;
1513 unsigned long num;
1514 unsigned long ndomains;
1515 int id;
1516 int agaw;
93a23a72 1517 struct device_domain_info *info = NULL;
ba395927
KA
1518
1519 pr_debug("Set context mapping for %02x:%02x.%d\n",
1520 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1521
ba395927 1522 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1523 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1524 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1525
276dbf99 1526 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1527 if (!iommu)
1528 return -ENODEV;
1529
ba395927
KA
1530 context = device_to_context_entry(iommu, bus, devfn);
1531 if (!context)
1532 return -ENOMEM;
1533 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1534 if (context_present(context)) {
ba395927
KA
1535 spin_unlock_irqrestore(&iommu->lock, flags);
1536 return 0;
1537 }
1538
ea6606b0
WH
1539 id = domain->id;
1540 pgd = domain->pgd;
1541
2c2e2c38
FY
1542 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1543 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1544 int found = 0;
1545
1546 /* find an available domain id for this device in iommu */
1547 ndomains = cap_ndoms(iommu->cap);
a45946ab 1548 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1549 if (iommu->domains[num] == domain) {
1550 id = num;
1551 found = 1;
1552 break;
1553 }
ea6606b0
WH
1554 }
1555
1556 if (found == 0) {
1557 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1558 if (num >= ndomains) {
1559 spin_unlock_irqrestore(&iommu->lock, flags);
1560 printk(KERN_ERR "IOMMU: no free domain ids\n");
1561 return -EFAULT;
1562 }
1563
1564 set_bit(num, iommu->domain_ids);
1565 iommu->domains[num] = domain;
1566 id = num;
1567 }
1568
1569 /* Skip top levels of page tables for
1570 * iommu which has less agaw than default.
1672af11 1571 * Unnecessary for PT mode.
ea6606b0 1572 */
1672af11
CW
1573 if (translation != CONTEXT_TT_PASS_THROUGH) {
1574 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1575 pgd = phys_to_virt(dma_pte_addr(pgd));
1576 if (!dma_pte_present(pgd)) {
1577 spin_unlock_irqrestore(&iommu->lock, flags);
1578 return -ENOMEM;
1579 }
ea6606b0
WH
1580 }
1581 }
1582 }
1583
1584 context_set_domain_id(context, id);
4ed0d3e6 1585
93a23a72
YZ
1586 if (translation != CONTEXT_TT_PASS_THROUGH) {
1587 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1588 translation = info ? CONTEXT_TT_DEV_IOTLB :
1589 CONTEXT_TT_MULTI_LEVEL;
1590 }
4ed0d3e6
FY
1591 /*
1592 * In pass through mode, AW must be programmed to indicate the largest
1593 * AGAW value supported by hardware. And ASR is ignored by hardware.
1594 */
93a23a72 1595 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1596 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1597 else {
1598 context_set_address_root(context, virt_to_phys(pgd));
1599 context_set_address_width(context, iommu->agaw);
1600 }
4ed0d3e6
FY
1601
1602 context_set_translation_type(context, translation);
c07e7d21
MM
1603 context_set_fault_enable(context);
1604 context_set_present(context);
5331fe6f 1605 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1606
4c25a2c1
DW
1607 /*
1608 * It's a non-present to present mapping. If hardware doesn't cache
1609 * non-present entry we only need to flush the write-buffer. If the
1610 * _does_ cache non-present entries, then it does so in the special
1611 * domain #0, which we have to flush:
1612 */
1613 if (cap_caching_mode(iommu->cap)) {
1614 iommu->flush.flush_context(iommu, 0,
1615 (((u16)bus) << 8) | devfn,
1616 DMA_CCMD_MASK_NOBIT,
1617 DMA_CCMD_DEVICE_INVL);
82653633 1618 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1619 } else {
ba395927 1620 iommu_flush_write_buffer(iommu);
4c25a2c1 1621 }
93a23a72 1622 iommu_enable_dev_iotlb(info);
ba395927 1623 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1624
1625 spin_lock_irqsave(&domain->iommu_lock, flags);
1626 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1627 domain->iommu_count++;
4c923d47
SS
1628 if (domain->iommu_count == 1)
1629 domain->nid = iommu->node;
58c610bd 1630 domain_update_iommu_cap(domain);
c7151a8d
WH
1631 }
1632 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1633 return 0;
1634}
1635
1636static int
4ed0d3e6
FY
1637domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1638 int translation)
ba395927
KA
1639{
1640 int ret;
1641 struct pci_dev *tmp, *parent;
1642
276dbf99 1643 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1644 pdev->bus->number, pdev->devfn,
1645 translation);
ba395927
KA
1646 if (ret)
1647 return ret;
1648
1649 /* dependent device mapping */
1650 tmp = pci_find_upstream_pcie_bridge(pdev);
1651 if (!tmp)
1652 return 0;
1653 /* Secondary interface's bus number and devfn 0 */
1654 parent = pdev->bus->self;
1655 while (parent != tmp) {
276dbf99
DW
1656 ret = domain_context_mapping_one(domain,
1657 pci_domain_nr(parent->bus),
1658 parent->bus->number,
4ed0d3e6 1659 parent->devfn, translation);
ba395927
KA
1660 if (ret)
1661 return ret;
1662 parent = parent->bus->self;
1663 }
45e829ea 1664 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1665 return domain_context_mapping_one(domain,
276dbf99 1666 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1667 tmp->subordinate->number, 0,
1668 translation);
ba395927
KA
1669 else /* this is a legacy PCI bridge */
1670 return domain_context_mapping_one(domain,
276dbf99
DW
1671 pci_domain_nr(tmp->bus),
1672 tmp->bus->number,
4ed0d3e6
FY
1673 tmp->devfn,
1674 translation);
ba395927
KA
1675}
1676
5331fe6f 1677static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1678{
1679 int ret;
1680 struct pci_dev *tmp, *parent;
5331fe6f
WH
1681 struct intel_iommu *iommu;
1682
276dbf99
DW
1683 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1684 pdev->devfn);
5331fe6f
WH
1685 if (!iommu)
1686 return -ENODEV;
ba395927 1687
276dbf99 1688 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1689 if (!ret)
1690 return ret;
1691 /* dependent device mapping */
1692 tmp = pci_find_upstream_pcie_bridge(pdev);
1693 if (!tmp)
1694 return ret;
1695 /* Secondary interface's bus number and devfn 0 */
1696 parent = pdev->bus->self;
1697 while (parent != tmp) {
8c11e798 1698 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1699 parent->devfn);
ba395927
KA
1700 if (!ret)
1701 return ret;
1702 parent = parent->bus->self;
1703 }
5f4d91a1 1704 if (pci_is_pcie(tmp))
276dbf99
DW
1705 return device_context_mapped(iommu, tmp->subordinate->number,
1706 0);
ba395927 1707 else
276dbf99
DW
1708 return device_context_mapped(iommu, tmp->bus->number,
1709 tmp->devfn);
ba395927
KA
1710}
1711
f532959b
FY
1712/* Returns a number of VTD pages, but aligned to MM page size */
1713static inline unsigned long aligned_nrpages(unsigned long host_addr,
1714 size_t size)
1715{
1716 host_addr &= ~PAGE_MASK;
1717 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1718}
1719
6dd9a7c7
YS
1720/* Return largest possible superpage level for a given mapping */
1721static inline int hardware_largepage_caps(struct dmar_domain *domain,
1722 unsigned long iov_pfn,
1723 unsigned long phy_pfn,
1724 unsigned long pages)
1725{
1726 int support, level = 1;
1727 unsigned long pfnmerge;
1728
1729 support = domain->iommu_superpage;
1730
1731 /* To use a large page, the virtual *and* physical addresses
1732 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1733 of them will mean we have to use smaller pages. So just
1734 merge them and check both at once. */
1735 pfnmerge = iov_pfn | phy_pfn;
1736
1737 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1738 pages >>= VTD_STRIDE_SHIFT;
1739 if (!pages)
1740 break;
1741 pfnmerge >>= VTD_STRIDE_SHIFT;
1742 level++;
1743 support--;
1744 }
1745 return level;
1746}
1747
9051aa02
DW
1748static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1749 struct scatterlist *sg, unsigned long phys_pfn,
1750 unsigned long nr_pages, int prot)
e1605495
DW
1751{
1752 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1753 phys_addr_t uninitialized_var(pteval);
e1605495 1754 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1755 unsigned long sg_res;
6dd9a7c7
YS
1756 unsigned int largepage_lvl = 0;
1757 unsigned long lvl_pages = 0;
e1605495
DW
1758
1759 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1760
1761 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1762 return -EINVAL;
1763
1764 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1765
9051aa02
DW
1766 if (sg)
1767 sg_res = 0;
1768 else {
1769 sg_res = nr_pages + 1;
1770 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1771 }
1772
6dd9a7c7 1773 while (nr_pages > 0) {
c85994e4
DW
1774 uint64_t tmp;
1775
e1605495 1776 if (!sg_res) {
f532959b 1777 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1778 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1779 sg->dma_length = sg->length;
1780 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1781 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1782 }
6dd9a7c7 1783
e1605495 1784 if (!pte) {
6dd9a7c7
YS
1785 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1786
1787 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1788 if (!pte)
1789 return -ENOMEM;
6dd9a7c7
YS
1790 /* It is large page*/
1791 if (largepage_lvl > 1)
1792 pteval |= DMA_PTE_LARGE_PAGE;
1793 else
1794 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1795
e1605495
DW
1796 }
1797 /* We don't need lock here, nobody else
1798 * touches the iova range
1799 */
7766a3fb 1800 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1801 if (tmp) {
1bf20f0d 1802 static int dumps = 5;
c85994e4
DW
1803 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1804 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1805 if (dumps) {
1806 dumps--;
1807 debug_dma_dump_mappings(NULL);
1808 }
1809 WARN_ON(1);
1810 }
6dd9a7c7
YS
1811
1812 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1813
1814 BUG_ON(nr_pages < lvl_pages);
1815 BUG_ON(sg_res < lvl_pages);
1816
1817 nr_pages -= lvl_pages;
1818 iov_pfn += lvl_pages;
1819 phys_pfn += lvl_pages;
1820 pteval += lvl_pages * VTD_PAGE_SIZE;
1821 sg_res -= lvl_pages;
1822
1823 /* If the next PTE would be the first in a new page, then we
1824 need to flush the cache on the entries we've just written.
1825 And then we'll need to recalculate 'pte', so clear it and
1826 let it get set again in the if (!pte) block above.
1827
1828 If we're done (!nr_pages) we need to flush the cache too.
1829
1830 Also if we've been setting superpages, we may need to
1831 recalculate 'pte' and switch back to smaller pages for the
1832 end of the mapping, if the trailing size is not enough to
1833 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1834 pte++;
6dd9a7c7
YS
1835 if (!nr_pages || first_pte_in_page(pte) ||
1836 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1837 domain_flush_cache(domain, first_pte,
1838 (void *)pte - (void *)first_pte);
1839 pte = NULL;
1840 }
6dd9a7c7
YS
1841
1842 if (!sg_res && nr_pages)
e1605495
DW
1843 sg = sg_next(sg);
1844 }
1845 return 0;
1846}
1847
9051aa02
DW
1848static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1849 struct scatterlist *sg, unsigned long nr_pages,
1850 int prot)
ba395927 1851{
9051aa02
DW
1852 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1853}
6f6a00e4 1854
9051aa02
DW
1855static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1856 unsigned long phys_pfn, unsigned long nr_pages,
1857 int prot)
1858{
1859 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1860}
1861
c7151a8d 1862static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1863{
c7151a8d
WH
1864 if (!iommu)
1865 return;
8c11e798
WH
1866
1867 clear_context_table(iommu, bus, devfn);
1868 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1869 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1870 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1871}
1872
1873static void domain_remove_dev_info(struct dmar_domain *domain)
1874{
1875 struct device_domain_info *info;
1876 unsigned long flags;
c7151a8d 1877 struct intel_iommu *iommu;
ba395927
KA
1878
1879 spin_lock_irqsave(&device_domain_lock, flags);
1880 while (!list_empty(&domain->devices)) {
1881 info = list_entry(domain->devices.next,
1882 struct device_domain_info, link);
1883 list_del(&info->link);
1884 list_del(&info->global);
1885 if (info->dev)
358dd8ac 1886 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1887 spin_unlock_irqrestore(&device_domain_lock, flags);
1888
93a23a72 1889 iommu_disable_dev_iotlb(info);
276dbf99 1890 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1891 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1892 free_devinfo_mem(info);
1893
1894 spin_lock_irqsave(&device_domain_lock, flags);
1895 }
1896 spin_unlock_irqrestore(&device_domain_lock, flags);
1897}
1898
1899/*
1900 * find_domain
358dd8ac 1901 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1902 */
38717946 1903static struct dmar_domain *
ba395927
KA
1904find_domain(struct pci_dev *pdev)
1905{
1906 struct device_domain_info *info;
1907
1908 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1909 info = pdev->dev.archdata.iommu;
ba395927
KA
1910 if (info)
1911 return info->domain;
1912 return NULL;
1913}
1914
ba395927
KA
1915/* domain is initialized */
1916static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1917{
1918 struct dmar_domain *domain, *found = NULL;
1919 struct intel_iommu *iommu;
1920 struct dmar_drhd_unit *drhd;
1921 struct device_domain_info *info, *tmp;
1922 struct pci_dev *dev_tmp;
1923 unsigned long flags;
1924 int bus = 0, devfn = 0;
276dbf99 1925 int segment;
2c2e2c38 1926 int ret;
ba395927
KA
1927
1928 domain = find_domain(pdev);
1929 if (domain)
1930 return domain;
1931
276dbf99
DW
1932 segment = pci_domain_nr(pdev->bus);
1933
ba395927
KA
1934 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1935 if (dev_tmp) {
5f4d91a1 1936 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1937 bus = dev_tmp->subordinate->number;
1938 devfn = 0;
1939 } else {
1940 bus = dev_tmp->bus->number;
1941 devfn = dev_tmp->devfn;
1942 }
1943 spin_lock_irqsave(&device_domain_lock, flags);
1944 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1945 if (info->segment == segment &&
1946 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1947 found = info->domain;
1948 break;
1949 }
1950 }
1951 spin_unlock_irqrestore(&device_domain_lock, flags);
1952 /* pcie-pci bridge already has a domain, uses it */
1953 if (found) {
1954 domain = found;
1955 goto found_domain;
1956 }
1957 }
1958
2c2e2c38
FY
1959 domain = alloc_domain();
1960 if (!domain)
1961 goto error;
1962
ba395927
KA
1963 /* Allocate new domain for the device */
1964 drhd = dmar_find_matched_drhd_unit(pdev);
1965 if (!drhd) {
1966 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1967 pci_name(pdev));
1968 return NULL;
1969 }
1970 iommu = drhd->iommu;
1971
2c2e2c38
FY
1972 ret = iommu_attach_domain(domain, iommu);
1973 if (ret) {
2fe9723d 1974 free_domain_mem(domain);
ba395927 1975 goto error;
2c2e2c38 1976 }
ba395927
KA
1977
1978 if (domain_init(domain, gaw)) {
1979 domain_exit(domain);
1980 goto error;
1981 }
1982
1983 /* register pcie-to-pci device */
1984 if (dev_tmp) {
1985 info = alloc_devinfo_mem();
1986 if (!info) {
1987 domain_exit(domain);
1988 goto error;
1989 }
276dbf99 1990 info->segment = segment;
ba395927
KA
1991 info->bus = bus;
1992 info->devfn = devfn;
1993 info->dev = NULL;
1994 info->domain = domain;
1995 /* This domain is shared by devices under p2p bridge */
3b5410e7 1996 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1997
1998 /* pcie-to-pci bridge already has a domain, uses it */
1999 found = NULL;
2000 spin_lock_irqsave(&device_domain_lock, flags);
2001 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
2002 if (tmp->segment == segment &&
2003 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
2004 found = tmp->domain;
2005 break;
2006 }
2007 }
2008 if (found) {
00dfff77 2009 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
2010 free_devinfo_mem(info);
2011 domain_exit(domain);
2012 domain = found;
2013 } else {
2014 list_add(&info->link, &domain->devices);
2015 list_add(&info->global, &device_domain_list);
00dfff77 2016 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2017 }
ba395927
KA
2018 }
2019
2020found_domain:
2021 info = alloc_devinfo_mem();
2022 if (!info)
2023 goto error;
276dbf99 2024 info->segment = segment;
ba395927
KA
2025 info->bus = pdev->bus->number;
2026 info->devfn = pdev->devfn;
2027 info->dev = pdev;
2028 info->domain = domain;
2029 spin_lock_irqsave(&device_domain_lock, flags);
2030 /* somebody is fast */
2031 found = find_domain(pdev);
2032 if (found != NULL) {
2033 spin_unlock_irqrestore(&device_domain_lock, flags);
2034 if (found != domain) {
2035 domain_exit(domain);
2036 domain = found;
2037 }
2038 free_devinfo_mem(info);
2039 return domain;
2040 }
2041 list_add(&info->link, &domain->devices);
2042 list_add(&info->global, &device_domain_list);
358dd8ac 2043 pdev->dev.archdata.iommu = info;
ba395927
KA
2044 spin_unlock_irqrestore(&device_domain_lock, flags);
2045 return domain;
2046error:
2047 /* recheck it here, maybe others set it */
2048 return find_domain(pdev);
2049}
2050
2c2e2c38 2051static int iommu_identity_mapping;
e0fc7e0b
DW
2052#define IDENTMAP_ALL 1
2053#define IDENTMAP_GFX 2
2054#define IDENTMAP_AZALIA 4
2c2e2c38 2055
b213203e
DW
2056static int iommu_domain_identity_map(struct dmar_domain *domain,
2057 unsigned long long start,
2058 unsigned long long end)
ba395927 2059{
c5395d5c
DW
2060 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2061 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2062
2063 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2064 dma_to_mm_pfn(last_vpfn))) {
ba395927 2065 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2066 return -ENOMEM;
ba395927
KA
2067 }
2068
c5395d5c
DW
2069 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2070 start, end, domain->id);
ba395927
KA
2071 /*
2072 * RMRR range might have overlap with physical memory range,
2073 * clear it first
2074 */
c5395d5c 2075 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2076
c5395d5c
DW
2077 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2078 last_vpfn - first_vpfn + 1,
61df7443 2079 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2080}
2081
2082static int iommu_prepare_identity_map(struct pci_dev *pdev,
2083 unsigned long long start,
2084 unsigned long long end)
2085{
2086 struct dmar_domain *domain;
2087 int ret;
2088
c7ab48d2 2089 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2090 if (!domain)
2091 return -ENOMEM;
2092
19943b0e
DW
2093 /* For _hardware_ passthrough, don't bother. But for software
2094 passthrough, we do it anyway -- it may indicate a memory
2095 range which is reserved in E820, so which didn't get set
2096 up to start with in si_domain */
2097 if (domain == si_domain && hw_pass_through) {
2098 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2099 pci_name(pdev), start, end);
2100 return 0;
2101 }
2102
2103 printk(KERN_INFO
2104 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2105 pci_name(pdev), start, end);
2ff729f5 2106
5595b528
DW
2107 if (end < start) {
2108 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2109 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2110 dmi_get_system_info(DMI_BIOS_VENDOR),
2111 dmi_get_system_info(DMI_BIOS_VERSION),
2112 dmi_get_system_info(DMI_PRODUCT_VERSION));
2113 ret = -EIO;
2114 goto error;
2115 }
2116
2ff729f5
DW
2117 if (end >> agaw_to_width(domain->agaw)) {
2118 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2119 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2120 agaw_to_width(domain->agaw),
2121 dmi_get_system_info(DMI_BIOS_VENDOR),
2122 dmi_get_system_info(DMI_BIOS_VERSION),
2123 dmi_get_system_info(DMI_PRODUCT_VERSION));
2124 ret = -EIO;
2125 goto error;
2126 }
19943b0e 2127
b213203e 2128 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2129 if (ret)
2130 goto error;
2131
2132 /* context entry init */
4ed0d3e6 2133 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2134 if (ret)
2135 goto error;
2136
2137 return 0;
2138
2139 error:
ba395927
KA
2140 domain_exit(domain);
2141 return ret;
ba395927
KA
2142}
2143
2144static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2145 struct pci_dev *pdev)
2146{
358dd8ac 2147 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2148 return 0;
2149 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2150 rmrr->end_address);
ba395927
KA
2151}
2152
49a0429e
KA
2153#ifdef CONFIG_DMAR_FLOPPY_WA
2154static inline void iommu_prepare_isa(void)
2155{
2156 struct pci_dev *pdev;
2157 int ret;
2158
2159 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2160 if (!pdev)
2161 return;
2162
c7ab48d2 2163 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2164 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2165
2166 if (ret)
c7ab48d2
DW
2167 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2168 "floppy might not work\n");
49a0429e
KA
2169
2170}
2171#else
2172static inline void iommu_prepare_isa(void)
2173{
2174 return;
2175}
2176#endif /* !CONFIG_DMAR_FLPY_WA */
2177
2c2e2c38 2178static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2179
2180static int __init si_domain_work_fn(unsigned long start_pfn,
2181 unsigned long end_pfn, void *datax)
2182{
2183 int *ret = datax;
2184
2185 *ret = iommu_domain_identity_map(si_domain,
2186 (uint64_t)start_pfn << PAGE_SHIFT,
2187 (uint64_t)end_pfn << PAGE_SHIFT);
2188 return *ret;
2189
2190}
2191
071e1374 2192static int __init si_domain_init(int hw)
2c2e2c38
FY
2193{
2194 struct dmar_drhd_unit *drhd;
2195 struct intel_iommu *iommu;
c7ab48d2 2196 int nid, ret = 0;
2c2e2c38
FY
2197
2198 si_domain = alloc_domain();
2199 if (!si_domain)
2200 return -EFAULT;
2201
c7ab48d2 2202 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2203
2204 for_each_active_iommu(iommu, drhd) {
2205 ret = iommu_attach_domain(si_domain, iommu);
2206 if (ret) {
2207 domain_exit(si_domain);
2208 return -EFAULT;
2209 }
2210 }
2211
2212 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2213 domain_exit(si_domain);
2214 return -EFAULT;
2215 }
2216
2217 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2218
19943b0e
DW
2219 if (hw)
2220 return 0;
2221
c7ab48d2
DW
2222 for_each_online_node(nid) {
2223 work_with_active_regions(nid, si_domain_work_fn, &ret);
2224 if (ret)
2225 return ret;
2226 }
2227
2c2e2c38
FY
2228 return 0;
2229}
2230
2231static void domain_remove_one_dev_info(struct dmar_domain *domain,
2232 struct pci_dev *pdev);
2233static int identity_mapping(struct pci_dev *pdev)
2234{
2235 struct device_domain_info *info;
2236
2237 if (likely(!iommu_identity_mapping))
2238 return 0;
2239
cb452a40
MT
2240 info = pdev->dev.archdata.iommu;
2241 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2242 return (info->domain == si_domain);
2c2e2c38 2243
2c2e2c38
FY
2244 return 0;
2245}
2246
2247static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2248 struct pci_dev *pdev,
2249 int translation)
2c2e2c38
FY
2250{
2251 struct device_domain_info *info;
2252 unsigned long flags;
5fe60f4e 2253 int ret;
2c2e2c38
FY
2254
2255 info = alloc_devinfo_mem();
2256 if (!info)
2257 return -ENOMEM;
2258
5fe60f4e
DW
2259 ret = domain_context_mapping(domain, pdev, translation);
2260 if (ret) {
2261 free_devinfo_mem(info);
2262 return ret;
2263 }
2264
2c2e2c38
FY
2265 info->segment = pci_domain_nr(pdev->bus);
2266 info->bus = pdev->bus->number;
2267 info->devfn = pdev->devfn;
2268 info->dev = pdev;
2269 info->domain = domain;
2270
2271 spin_lock_irqsave(&device_domain_lock, flags);
2272 list_add(&info->link, &domain->devices);
2273 list_add(&info->global, &device_domain_list);
2274 pdev->dev.archdata.iommu = info;
2275 spin_unlock_irqrestore(&device_domain_lock, flags);
2276
2277 return 0;
2278}
2279
6941af28
DW
2280static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2281{
e0fc7e0b
DW
2282 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2283 return 1;
2284
2285 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2286 return 1;
2287
2288 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2289 return 0;
6941af28 2290
3dfc813d
DW
2291 /*
2292 * We want to start off with all devices in the 1:1 domain, and
2293 * take them out later if we find they can't access all of memory.
2294 *
2295 * However, we can't do this for PCI devices behind bridges,
2296 * because all PCI devices behind the same bridge will end up
2297 * with the same source-id on their transactions.
2298 *
2299 * Practically speaking, we can't change things around for these
2300 * devices at run-time, because we can't be sure there'll be no
2301 * DMA transactions in flight for any of their siblings.
2302 *
2303 * So PCI devices (unless they're on the root bus) as well as
2304 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2305 * the 1:1 domain, just in _case_ one of their siblings turns out
2306 * not to be able to map all of memory.
2307 */
5f4d91a1 2308 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2309 if (!pci_is_root_bus(pdev->bus))
2310 return 0;
2311 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2312 return 0;
2313 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2314 return 0;
2315
2316 /*
2317 * At boot time, we don't yet know if devices will be 64-bit capable.
2318 * Assume that they will -- if they turn out not to be, then we can
2319 * take them out of the 1:1 domain later.
2320 */
8fcc5372
CW
2321 if (!startup) {
2322 /*
2323 * If the device's dma_mask is less than the system's memory
2324 * size then this is not a candidate for identity mapping.
2325 */
2326 u64 dma_mask = pdev->dma_mask;
2327
2328 if (pdev->dev.coherent_dma_mask &&
2329 pdev->dev.coherent_dma_mask < dma_mask)
2330 dma_mask = pdev->dev.coherent_dma_mask;
2331
2332 return dma_mask >= dma_get_required_mask(&pdev->dev);
2333 }
6941af28
DW
2334
2335 return 1;
2336}
2337
071e1374 2338static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2339{
2c2e2c38
FY
2340 struct pci_dev *pdev = NULL;
2341 int ret;
2342
19943b0e 2343 ret = si_domain_init(hw);
2c2e2c38
FY
2344 if (ret)
2345 return -EFAULT;
2346
2c2e2c38 2347 for_each_pci_dev(pdev) {
825507d6
MT
2348 /* Skip Host/PCI Bridge devices */
2349 if (IS_BRIDGE_HOST_DEVICE(pdev))
2350 continue;
6941af28 2351 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2352 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2353 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2354
5fe60f4e 2355 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2356 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2357 CONTEXT_TT_MULTI_LEVEL);
2358 if (ret)
2359 return ret;
62edf5dc 2360 }
2c2e2c38
FY
2361 }
2362
2363 return 0;
2364}
2365
b779260b 2366static int __init init_dmars(void)
ba395927
KA
2367{
2368 struct dmar_drhd_unit *drhd;
2369 struct dmar_rmrr_unit *rmrr;
2370 struct pci_dev *pdev;
2371 struct intel_iommu *iommu;
9d783ba0 2372 int i, ret;
2c2e2c38 2373
ba395927
KA
2374 /*
2375 * for each drhd
2376 * allocate root
2377 * initialize and program root entry to not present
2378 * endfor
2379 */
2380 for_each_drhd_unit(drhd) {
5e0d2a6f 2381 g_num_of_iommus++;
2382 /*
2383 * lock not needed as this is only incremented in the single
2384 * threaded kernel __init code path all other access are read
2385 * only
2386 */
2387 }
2388
d9630fe9
WH
2389 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2390 GFP_KERNEL);
2391 if (!g_iommus) {
2392 printk(KERN_ERR "Allocating global iommu array failed\n");
2393 ret = -ENOMEM;
2394 goto error;
2395 }
2396
80b20dd8 2397 deferred_flush = kzalloc(g_num_of_iommus *
2398 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2399 if (!deferred_flush) {
5e0d2a6f 2400 ret = -ENOMEM;
2401 goto error;
2402 }
2403
5e0d2a6f 2404 for_each_drhd_unit(drhd) {
2405 if (drhd->ignored)
2406 continue;
1886e8a9
SS
2407
2408 iommu = drhd->iommu;
d9630fe9 2409 g_iommus[iommu->seq_id] = iommu;
ba395927 2410
e61d98d8
SS
2411 ret = iommu_init_domains(iommu);
2412 if (ret)
2413 goto error;
2414
ba395927
KA
2415 /*
2416 * TBD:
2417 * we could share the same root & context tables
25985edc 2418 * among all IOMMU's. Need to Split it later.
ba395927
KA
2419 */
2420 ret = iommu_alloc_root_entry(iommu);
2421 if (ret) {
2422 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2423 goto error;
2424 }
4ed0d3e6 2425 if (!ecap_pass_through(iommu->ecap))
19943b0e 2426 hw_pass_through = 0;
ba395927
KA
2427 }
2428
1531a6a6
SS
2429 /*
2430 * Start from the sane iommu hardware state.
2431 */
a77b67d4
YS
2432 for_each_drhd_unit(drhd) {
2433 if (drhd->ignored)
2434 continue;
2435
2436 iommu = drhd->iommu;
1531a6a6
SS
2437
2438 /*
2439 * If the queued invalidation is already initialized by us
2440 * (for example, while enabling interrupt-remapping) then
2441 * we got the things already rolling from a sane state.
2442 */
2443 if (iommu->qi)
2444 continue;
2445
2446 /*
2447 * Clear any previous faults.
2448 */
2449 dmar_fault(-1, iommu);
2450 /*
2451 * Disable queued invalidation if supported and already enabled
2452 * before OS handover.
2453 */
2454 dmar_disable_qi(iommu);
2455 }
2456
2457 for_each_drhd_unit(drhd) {
2458 if (drhd->ignored)
2459 continue;
2460
2461 iommu = drhd->iommu;
2462
a77b67d4
YS
2463 if (dmar_enable_qi(iommu)) {
2464 /*
2465 * Queued Invalidate not enabled, use Register Based
2466 * Invalidate
2467 */
2468 iommu->flush.flush_context = __iommu_flush_context;
2469 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2470 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2471 "invalidation\n",
680a7524 2472 iommu->seq_id,
b4e0f9eb 2473 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2474 } else {
2475 iommu->flush.flush_context = qi_flush_context;
2476 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2477 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2478 "invalidation\n",
680a7524 2479 iommu->seq_id,
b4e0f9eb 2480 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2481 }
2482 }
2483
19943b0e 2484 if (iommu_pass_through)
e0fc7e0b
DW
2485 iommu_identity_mapping |= IDENTMAP_ALL;
2486
19943b0e 2487#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2488 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2489#endif
e0fc7e0b
DW
2490
2491 check_tylersburg_isoch();
2492
ba395927 2493 /*
19943b0e
DW
2494 * If pass through is not set or not enabled, setup context entries for
2495 * identity mappings for rmrr, gfx, and isa and may fall back to static
2496 * identity mapping if iommu_identity_mapping is set.
ba395927 2497 */
19943b0e
DW
2498 if (iommu_identity_mapping) {
2499 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2500 if (ret) {
19943b0e
DW
2501 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2502 goto error;
ba395927
KA
2503 }
2504 }
ba395927 2505 /*
19943b0e
DW
2506 * For each rmrr
2507 * for each dev attached to rmrr
2508 * do
2509 * locate drhd for dev, alloc domain for dev
2510 * allocate free domain
2511 * allocate page table entries for rmrr
2512 * if context not allocated for bus
2513 * allocate and init context
2514 * set present in root table for this bus
2515 * init context with domain, translation etc
2516 * endfor
2517 * endfor
ba395927 2518 */
19943b0e
DW
2519 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2520 for_each_rmrr_units(rmrr) {
2521 for (i = 0; i < rmrr->devices_cnt; i++) {
2522 pdev = rmrr->devices[i];
2523 /*
2524 * some BIOS lists non-exist devices in DMAR
2525 * table.
2526 */
2527 if (!pdev)
2528 continue;
2529 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2530 if (ret)
2531 printk(KERN_ERR
2532 "IOMMU: mapping reserved region failed\n");
ba395927 2533 }
4ed0d3e6 2534 }
49a0429e 2535
19943b0e
DW
2536 iommu_prepare_isa();
2537
ba395927
KA
2538 /*
2539 * for each drhd
2540 * enable fault log
2541 * global invalidate context cache
2542 * global invalidate iotlb
2543 * enable translation
2544 */
2545 for_each_drhd_unit(drhd) {
51a63e67
JC
2546 if (drhd->ignored) {
2547 /*
2548 * we always have to disable PMRs or DMA may fail on
2549 * this device
2550 */
2551 if (force_on)
2552 iommu_disable_protect_mem_regions(drhd->iommu);
ba395927 2553 continue;
51a63e67 2554 }
ba395927 2555 iommu = drhd->iommu;
ba395927
KA
2556
2557 iommu_flush_write_buffer(iommu);
2558
3460a6d9
KA
2559 ret = dmar_set_interrupt(iommu);
2560 if (ret)
2561 goto error;
2562
ba395927
KA
2563 iommu_set_root_entry(iommu);
2564
4c25a2c1 2565 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2566 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2567
ba395927
KA
2568 ret = iommu_enable_translation(iommu);
2569 if (ret)
2570 goto error;
b94996c9
DW
2571
2572 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2573 }
2574
2575 return 0;
2576error:
2577 for_each_drhd_unit(drhd) {
2578 if (drhd->ignored)
2579 continue;
2580 iommu = drhd->iommu;
2581 free_iommu(iommu);
2582 }
d9630fe9 2583 kfree(g_iommus);
ba395927
KA
2584 return ret;
2585}
2586
5a5e02a6 2587/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2588static struct iova *intel_alloc_iova(struct device *dev,
2589 struct dmar_domain *domain,
2590 unsigned long nrpages, uint64_t dma_mask)
ba395927 2591{
ba395927 2592 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2593 struct iova *iova = NULL;
ba395927 2594
875764de
DW
2595 /* Restrict dma_mask to the width that the iommu can handle */
2596 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2597
2598 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2599 /*
2600 * First try to allocate an io virtual address in
284901a9 2601 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2602 * from higher range
ba395927 2603 */
875764de
DW
2604 iova = alloc_iova(&domain->iovad, nrpages,
2605 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2606 if (iova)
2607 return iova;
2608 }
2609 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2610 if (unlikely(!iova)) {
2611 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2612 nrpages, pci_name(pdev));
f76aec76
KA
2613 return NULL;
2614 }
2615
2616 return iova;
2617}
2618
147202aa 2619static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2620{
2621 struct dmar_domain *domain;
2622 int ret;
2623
2624 domain = get_domain_for_dev(pdev,
2625 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2626 if (!domain) {
2627 printk(KERN_ERR
2628 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2629 return NULL;
ba395927
KA
2630 }
2631
2632 /* make sure context mapping is ok */
5331fe6f 2633 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2634 ret = domain_context_mapping(domain, pdev,
2635 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2636 if (ret) {
2637 printk(KERN_ERR
2638 "Domain context map for %s failed",
2639 pci_name(pdev));
4fe05bbc 2640 return NULL;
f76aec76 2641 }
ba395927
KA
2642 }
2643
f76aec76
KA
2644 return domain;
2645}
2646
147202aa
DW
2647static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2648{
2649 struct device_domain_info *info;
2650
2651 /* No lock here, assumes no domain exit in normal case */
2652 info = dev->dev.archdata.iommu;
2653 if (likely(info))
2654 return info->domain;
2655
2656 return __get_valid_domain_for_dev(dev);
2657}
2658
2c2e2c38
FY
2659static int iommu_dummy(struct pci_dev *pdev)
2660{
2661 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2662}
2663
2664/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2665static int iommu_no_mapping(struct device *dev)
2c2e2c38 2666{
73676832 2667 struct pci_dev *pdev;
2c2e2c38
FY
2668 int found;
2669
73676832
DW
2670 if (unlikely(dev->bus != &pci_bus_type))
2671 return 1;
2672
2673 pdev = to_pci_dev(dev);
1e4c64c4
DW
2674 if (iommu_dummy(pdev))
2675 return 1;
2676
2c2e2c38 2677 if (!iommu_identity_mapping)
1e4c64c4 2678 return 0;
2c2e2c38
FY
2679
2680 found = identity_mapping(pdev);
2681 if (found) {
6941af28 2682 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2683 return 1;
2684 else {
2685 /*
2686 * 32 bit DMA is removed from si_domain and fall back
2687 * to non-identity mapping.
2688 */
2689 domain_remove_one_dev_info(si_domain, pdev);
2690 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2691 pci_name(pdev));
2692 return 0;
2693 }
2694 } else {
2695 /*
2696 * In case of a detached 64 bit DMA device from vm, the device
2697 * is put into si_domain for identity mapping.
2698 */
6941af28 2699 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2700 int ret;
5fe60f4e
DW
2701 ret = domain_add_dev_info(si_domain, pdev,
2702 hw_pass_through ?
2703 CONTEXT_TT_PASS_THROUGH :
2704 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2705 if (!ret) {
2706 printk(KERN_INFO "64bit %s uses identity mapping\n",
2707 pci_name(pdev));
2708 return 1;
2709 }
2710 }
2711 }
2712
1e4c64c4 2713 return 0;
2c2e2c38
FY
2714}
2715
bb9e6d65
FT
2716static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2717 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2718{
2719 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2720 struct dmar_domain *domain;
5b6985ce 2721 phys_addr_t start_paddr;
f76aec76
KA
2722 struct iova *iova;
2723 int prot = 0;
6865f0d1 2724 int ret;
8c11e798 2725 struct intel_iommu *iommu;
33041ec0 2726 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2727
2728 BUG_ON(dir == DMA_NONE);
2c2e2c38 2729
73676832 2730 if (iommu_no_mapping(hwdev))
6865f0d1 2731 return paddr;
f76aec76
KA
2732
2733 domain = get_valid_domain_for_dev(pdev);
2734 if (!domain)
2735 return 0;
2736
8c11e798 2737 iommu = domain_get_iommu(domain);
88cb6a74 2738 size = aligned_nrpages(paddr, size);
f76aec76 2739
c681d0ba 2740 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2741 if (!iova)
2742 goto error;
2743
ba395927
KA
2744 /*
2745 * Check if DMAR supports zero-length reads on write only
2746 * mappings..
2747 */
2748 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2749 !cap_zlr(iommu->cap))
ba395927
KA
2750 prot |= DMA_PTE_READ;
2751 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2752 prot |= DMA_PTE_WRITE;
2753 /*
6865f0d1 2754 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2755 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2756 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2757 * is not a big problem
2758 */
0ab36de2 2759 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2760 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2761 if (ret)
2762 goto error;
2763
1f0ef2aa
DW
2764 /* it's a non-present to present mapping. Only flush if caching mode */
2765 if (cap_caching_mode(iommu->cap))
82653633 2766 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2767 else
8c11e798 2768 iommu_flush_write_buffer(iommu);
f76aec76 2769
03d6a246
DW
2770 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2771 start_paddr += paddr & ~PAGE_MASK;
2772 return start_paddr;
ba395927 2773
ba395927 2774error:
f76aec76
KA
2775 if (iova)
2776 __free_iova(&domain->iovad, iova);
4cf2e75d 2777 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2778 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2779 return 0;
2780}
2781
ffbbef5c
FT
2782static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2783 unsigned long offset, size_t size,
2784 enum dma_data_direction dir,
2785 struct dma_attrs *attrs)
bb9e6d65 2786{
ffbbef5c
FT
2787 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2788 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2789}
2790
5e0d2a6f 2791static void flush_unmaps(void)
2792{
80b20dd8 2793 int i, j;
5e0d2a6f 2794
5e0d2a6f 2795 timer_on = 0;
2796
2797 /* just flush them all */
2798 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2799 struct intel_iommu *iommu = g_iommus[i];
2800 if (!iommu)
2801 continue;
c42d9f32 2802
9dd2fe89
YZ
2803 if (!deferred_flush[i].next)
2804 continue;
2805
78d5f0f5
NA
2806 /* In caching mode, global flushes turn emulation expensive */
2807 if (!cap_caching_mode(iommu->cap))
2808 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2809 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2810 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2811 unsigned long mask;
2812 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2813 struct dmar_domain *domain = deferred_flush[i].domain[j];
2814
2815 /* On real hardware multiple invalidations are expensive */
2816 if (cap_caching_mode(iommu->cap))
2817 iommu_flush_iotlb_psi(iommu, domain->id,
2818 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2819 else {
2820 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2821 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2822 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2823 }
93a23a72 2824 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2825 }
9dd2fe89 2826 deferred_flush[i].next = 0;
5e0d2a6f 2827 }
2828
5e0d2a6f 2829 list_size = 0;
5e0d2a6f 2830}
2831
2832static void flush_unmaps_timeout(unsigned long data)
2833{
80b20dd8 2834 unsigned long flags;
2835
2836 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2837 flush_unmaps();
80b20dd8 2838 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2839}
2840
2841static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2842{
2843 unsigned long flags;
80b20dd8 2844 int next, iommu_id;
8c11e798 2845 struct intel_iommu *iommu;
5e0d2a6f 2846
2847 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2848 if (list_size == HIGH_WATER_MARK)
2849 flush_unmaps();
2850
8c11e798
WH
2851 iommu = domain_get_iommu(dom);
2852 iommu_id = iommu->seq_id;
c42d9f32 2853
80b20dd8 2854 next = deferred_flush[iommu_id].next;
2855 deferred_flush[iommu_id].domain[next] = dom;
2856 deferred_flush[iommu_id].iova[next] = iova;
2857 deferred_flush[iommu_id].next++;
5e0d2a6f 2858
2859 if (!timer_on) {
2860 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2861 timer_on = 1;
2862 }
2863 list_size++;
2864 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2865}
2866
ffbbef5c
FT
2867static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2868 size_t size, enum dma_data_direction dir,
2869 struct dma_attrs *attrs)
ba395927 2870{
ba395927 2871 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2872 struct dmar_domain *domain;
d794dc9b 2873 unsigned long start_pfn, last_pfn;
ba395927 2874 struct iova *iova;
8c11e798 2875 struct intel_iommu *iommu;
ba395927 2876
73676832 2877 if (iommu_no_mapping(dev))
f76aec76 2878 return;
2c2e2c38 2879
ba395927
KA
2880 domain = find_domain(pdev);
2881 BUG_ON(!domain);
2882
8c11e798
WH
2883 iommu = domain_get_iommu(domain);
2884
ba395927 2885 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2886 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2887 (unsigned long long)dev_addr))
ba395927 2888 return;
ba395927 2889
d794dc9b
DW
2890 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2891 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2892
d794dc9b
DW
2893 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2894 pci_name(pdev), start_pfn, last_pfn);
ba395927 2895
f76aec76 2896 /* clear the whole page */
d794dc9b
DW
2897 dma_pte_clear_range(domain, start_pfn, last_pfn);
2898
f76aec76 2899 /* free page tables */
d794dc9b
DW
2900 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2901
5e0d2a6f 2902 if (intel_iommu_strict) {
03d6a246 2903 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2904 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2905 /* free iova */
2906 __free_iova(&domain->iovad, iova);
2907 } else {
2908 add_unmap(domain, iova);
2909 /*
2910 * queue up the release of the unmap to save the 1/6th of the
2911 * cpu used up by the iotlb flush operation...
2912 */
5e0d2a6f 2913 }
ba395927
KA
2914}
2915
d7ab5c46
FT
2916static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2917 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2918{
2919 void *vaddr;
2920 int order;
2921
5b6985ce 2922 size = PAGE_ALIGN(size);
ba395927 2923 order = get_order(size);
e8bb910d
AW
2924
2925 if (!iommu_no_mapping(hwdev))
2926 flags &= ~(GFP_DMA | GFP_DMA32);
2927 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2928 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2929 flags |= GFP_DMA;
2930 else
2931 flags |= GFP_DMA32;
2932 }
ba395927
KA
2933
2934 vaddr = (void *)__get_free_pages(flags, order);
2935 if (!vaddr)
2936 return NULL;
2937 memset(vaddr, 0, size);
2938
bb9e6d65
FT
2939 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2940 DMA_BIDIRECTIONAL,
2941 hwdev->coherent_dma_mask);
ba395927
KA
2942 if (*dma_handle)
2943 return vaddr;
2944 free_pages((unsigned long)vaddr, order);
2945 return NULL;
2946}
2947
d7ab5c46
FT
2948static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2949 dma_addr_t dma_handle)
ba395927
KA
2950{
2951 int order;
2952
5b6985ce 2953 size = PAGE_ALIGN(size);
ba395927
KA
2954 order = get_order(size);
2955
0db9b7ae 2956 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2957 free_pages((unsigned long)vaddr, order);
2958}
2959
d7ab5c46
FT
2960static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2961 int nelems, enum dma_data_direction dir,
2962 struct dma_attrs *attrs)
ba395927 2963{
ba395927
KA
2964 struct pci_dev *pdev = to_pci_dev(hwdev);
2965 struct dmar_domain *domain;
d794dc9b 2966 unsigned long start_pfn, last_pfn;
f76aec76 2967 struct iova *iova;
8c11e798 2968 struct intel_iommu *iommu;
ba395927 2969
73676832 2970 if (iommu_no_mapping(hwdev))
ba395927
KA
2971 return;
2972
2973 domain = find_domain(pdev);
8c11e798
WH
2974 BUG_ON(!domain);
2975
2976 iommu = domain_get_iommu(domain);
ba395927 2977
c03ab37c 2978 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2979 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2980 (unsigned long long)sglist[0].dma_address))
f76aec76 2981 return;
f76aec76 2982
d794dc9b
DW
2983 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2984 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2985
2986 /* clear the whole page */
d794dc9b
DW
2987 dma_pte_clear_range(domain, start_pfn, last_pfn);
2988
f76aec76 2989 /* free page tables */
d794dc9b 2990 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2991
acea0018
DW
2992 if (intel_iommu_strict) {
2993 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2994 last_pfn - start_pfn + 1, 0);
acea0018
DW
2995 /* free iova */
2996 __free_iova(&domain->iovad, iova);
2997 } else {
2998 add_unmap(domain, iova);
2999 /*
3000 * queue up the release of the unmap to save the 1/6th of the
3001 * cpu used up by the iotlb flush operation...
3002 */
3003 }
ba395927
KA
3004}
3005
ba395927 3006static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3007 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3008{
3009 int i;
c03ab37c 3010 struct scatterlist *sg;
ba395927 3011
c03ab37c 3012 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3013 BUG_ON(!sg_page(sg));
4cf2e75d 3014 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3015 sg->dma_length = sg->length;
ba395927
KA
3016 }
3017 return nelems;
3018}
3019
d7ab5c46
FT
3020static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3021 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3022{
ba395927 3023 int i;
ba395927
KA
3024 struct pci_dev *pdev = to_pci_dev(hwdev);
3025 struct dmar_domain *domain;
f76aec76
KA
3026 size_t size = 0;
3027 int prot = 0;
f76aec76
KA
3028 struct iova *iova = NULL;
3029 int ret;
c03ab37c 3030 struct scatterlist *sg;
b536d24d 3031 unsigned long start_vpfn;
8c11e798 3032 struct intel_iommu *iommu;
ba395927
KA
3033
3034 BUG_ON(dir == DMA_NONE);
73676832 3035 if (iommu_no_mapping(hwdev))
c03ab37c 3036 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3037
f76aec76
KA
3038 domain = get_valid_domain_for_dev(pdev);
3039 if (!domain)
3040 return 0;
3041
8c11e798
WH
3042 iommu = domain_get_iommu(domain);
3043
b536d24d 3044 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3045 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3046
5a5e02a6
DW
3047 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3048 pdev->dma_mask);
f76aec76 3049 if (!iova) {
c03ab37c 3050 sglist->dma_length = 0;
f76aec76
KA
3051 return 0;
3052 }
3053
3054 /*
3055 * Check if DMAR supports zero-length reads on write only
3056 * mappings..
3057 */
3058 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3059 !cap_zlr(iommu->cap))
f76aec76
KA
3060 prot |= DMA_PTE_READ;
3061 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3062 prot |= DMA_PTE_WRITE;
3063
b536d24d 3064 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3065
f532959b 3066 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3067 if (unlikely(ret)) {
3068 /* clear the page */
3069 dma_pte_clear_range(domain, start_vpfn,
3070 start_vpfn + size - 1);
3071 /* free page tables */
3072 dma_pte_free_pagetable(domain, start_vpfn,
3073 start_vpfn + size - 1);
3074 /* free iova */
3075 __free_iova(&domain->iovad, iova);
3076 return 0;
ba395927
KA
3077 }
3078
1f0ef2aa
DW
3079 /* it's a non-present to present mapping. Only flush if caching mode */
3080 if (cap_caching_mode(iommu->cap))
82653633 3081 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3082 else
8c11e798 3083 iommu_flush_write_buffer(iommu);
1f0ef2aa 3084
ba395927
KA
3085 return nelems;
3086}
3087
dfb805e8
FT
3088static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3089{
3090 return !dma_addr;
3091}
3092
160c1d8e 3093struct dma_map_ops intel_dma_ops = {
ba395927
KA
3094 .alloc_coherent = intel_alloc_coherent,
3095 .free_coherent = intel_free_coherent,
ba395927
KA
3096 .map_sg = intel_map_sg,
3097 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3098 .map_page = intel_map_page,
3099 .unmap_page = intel_unmap_page,
dfb805e8 3100 .mapping_error = intel_mapping_error,
ba395927
KA
3101};
3102
3103static inline int iommu_domain_cache_init(void)
3104{
3105 int ret = 0;
3106
3107 iommu_domain_cache = kmem_cache_create("iommu_domain",
3108 sizeof(struct dmar_domain),
3109 0,
3110 SLAB_HWCACHE_ALIGN,
3111
3112 NULL);
3113 if (!iommu_domain_cache) {
3114 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3115 ret = -ENOMEM;
3116 }
3117
3118 return ret;
3119}
3120
3121static inline int iommu_devinfo_cache_init(void)
3122{
3123 int ret = 0;
3124
3125 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3126 sizeof(struct device_domain_info),
3127 0,
3128 SLAB_HWCACHE_ALIGN,
ba395927
KA
3129 NULL);
3130 if (!iommu_devinfo_cache) {
3131 printk(KERN_ERR "Couldn't create devinfo cache\n");
3132 ret = -ENOMEM;
3133 }
3134
3135 return ret;
3136}
3137
3138static inline int iommu_iova_cache_init(void)
3139{
3140 int ret = 0;
3141
3142 iommu_iova_cache = kmem_cache_create("iommu_iova",
3143 sizeof(struct iova),
3144 0,
3145 SLAB_HWCACHE_ALIGN,
ba395927
KA
3146 NULL);
3147 if (!iommu_iova_cache) {
3148 printk(KERN_ERR "Couldn't create iova cache\n");
3149 ret = -ENOMEM;
3150 }
3151
3152 return ret;
3153}
3154
3155static int __init iommu_init_mempool(void)
3156{
3157 int ret;
3158 ret = iommu_iova_cache_init();
3159 if (ret)
3160 return ret;
3161
3162 ret = iommu_domain_cache_init();
3163 if (ret)
3164 goto domain_error;
3165
3166 ret = iommu_devinfo_cache_init();
3167 if (!ret)
3168 return ret;
3169
3170 kmem_cache_destroy(iommu_domain_cache);
3171domain_error:
3172 kmem_cache_destroy(iommu_iova_cache);
3173
3174 return -ENOMEM;
3175}
3176
3177static void __init iommu_exit_mempool(void)
3178{
3179 kmem_cache_destroy(iommu_devinfo_cache);
3180 kmem_cache_destroy(iommu_domain_cache);
3181 kmem_cache_destroy(iommu_iova_cache);
3182
3183}
3184
556ab45f
DW
3185static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3186{
3187 struct dmar_drhd_unit *drhd;
3188 u32 vtbar;
3189 int rc;
3190
3191 /* We know that this device on this chipset has its own IOMMU.
3192 * If we find it under a different IOMMU, then the BIOS is lying
3193 * to us. Hope that the IOMMU for this device is actually
3194 * disabled, and it needs no translation...
3195 */
3196 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3197 if (rc) {
3198 /* "can't" happen */
3199 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3200 return;
3201 }
3202 vtbar &= 0xffff0000;
3203
3204 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3205 drhd = dmar_find_matched_drhd_unit(pdev);
3206 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3207 TAINT_FIRMWARE_WORKAROUND,
3208 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3209 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3210}
3211DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3212
ba395927
KA
3213static void __init init_no_remapping_devices(void)
3214{
3215 struct dmar_drhd_unit *drhd;
3216
3217 for_each_drhd_unit(drhd) {
3218 if (!drhd->include_all) {
3219 int i;
3220 for (i = 0; i < drhd->devices_cnt; i++)
3221 if (drhd->devices[i] != NULL)
3222 break;
3223 /* ignore DMAR unit if no pci devices exist */
3224 if (i == drhd->devices_cnt)
3225 drhd->ignored = 1;
3226 }
3227 }
3228
3229 if (dmar_map_gfx)
3230 return;
3231
3232 for_each_drhd_unit(drhd) {
3233 int i;
3234 if (drhd->ignored || drhd->include_all)
3235 continue;
3236
3237 for (i = 0; i < drhd->devices_cnt; i++)
3238 if (drhd->devices[i] &&
3239 !IS_GFX_DEVICE(drhd->devices[i]))
3240 break;
3241
3242 if (i < drhd->devices_cnt)
3243 continue;
3244
3245 /* bypass IOMMU if it is just for gfx devices */
3246 drhd->ignored = 1;
3247 for (i = 0; i < drhd->devices_cnt; i++) {
3248 if (!drhd->devices[i])
3249 continue;
358dd8ac 3250 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3251 }
3252 }
3253}
3254
f59c7b69
FY
3255#ifdef CONFIG_SUSPEND
3256static int init_iommu_hw(void)
3257{
3258 struct dmar_drhd_unit *drhd;
3259 struct intel_iommu *iommu = NULL;
3260
3261 for_each_active_iommu(iommu, drhd)
3262 if (iommu->qi)
3263 dmar_reenable_qi(iommu);
3264
b779260b
JC
3265 for_each_iommu(iommu, drhd) {
3266 if (drhd->ignored) {
3267 /*
3268 * we always have to disable PMRs or DMA may fail on
3269 * this device
3270 */
3271 if (force_on)
3272 iommu_disable_protect_mem_regions(iommu);
3273 continue;
3274 }
3275
f59c7b69
FY
3276 iommu_flush_write_buffer(iommu);
3277
3278 iommu_set_root_entry(iommu);
3279
3280 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3281 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3282 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3283 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3284 if (iommu_enable_translation(iommu))
3285 return 1;
b94996c9 3286 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3287 }
3288
3289 return 0;
3290}
3291
3292static void iommu_flush_all(void)
3293{
3294 struct dmar_drhd_unit *drhd;
3295 struct intel_iommu *iommu;
3296
3297 for_each_active_iommu(iommu, drhd) {
3298 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3299 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3300 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3301 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3302 }
3303}
3304
134fac3f 3305static int iommu_suspend(void)
f59c7b69
FY
3306{
3307 struct dmar_drhd_unit *drhd;
3308 struct intel_iommu *iommu = NULL;
3309 unsigned long flag;
3310
3311 for_each_active_iommu(iommu, drhd) {
3312 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3313 GFP_ATOMIC);
3314 if (!iommu->iommu_state)
3315 goto nomem;
3316 }
3317
3318 iommu_flush_all();
3319
3320 for_each_active_iommu(iommu, drhd) {
3321 iommu_disable_translation(iommu);
3322
3323 spin_lock_irqsave(&iommu->register_lock, flag);
3324
3325 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3326 readl(iommu->reg + DMAR_FECTL_REG);
3327 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3328 readl(iommu->reg + DMAR_FEDATA_REG);
3329 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3330 readl(iommu->reg + DMAR_FEADDR_REG);
3331 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3332 readl(iommu->reg + DMAR_FEUADDR_REG);
3333
3334 spin_unlock_irqrestore(&iommu->register_lock, flag);
3335 }
3336 return 0;
3337
3338nomem:
3339 for_each_active_iommu(iommu, drhd)
3340 kfree(iommu->iommu_state);
3341
3342 return -ENOMEM;
3343}
3344
134fac3f 3345static void iommu_resume(void)
f59c7b69
FY
3346{
3347 struct dmar_drhd_unit *drhd;
3348 struct intel_iommu *iommu = NULL;
3349 unsigned long flag;
3350
3351 if (init_iommu_hw()) {
b779260b
JC
3352 if (force_on)
3353 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3354 else
3355 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3356 return;
f59c7b69
FY
3357 }
3358
3359 for_each_active_iommu(iommu, drhd) {
3360
3361 spin_lock_irqsave(&iommu->register_lock, flag);
3362
3363 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3364 iommu->reg + DMAR_FECTL_REG);
3365 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3366 iommu->reg + DMAR_FEDATA_REG);
3367 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3368 iommu->reg + DMAR_FEADDR_REG);
3369 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3370 iommu->reg + DMAR_FEUADDR_REG);
3371
3372 spin_unlock_irqrestore(&iommu->register_lock, flag);
3373 }
3374
3375 for_each_active_iommu(iommu, drhd)
3376 kfree(iommu->iommu_state);
f59c7b69
FY
3377}
3378
134fac3f 3379static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3380 .resume = iommu_resume,
3381 .suspend = iommu_suspend,
3382};
3383
134fac3f 3384static void __init init_iommu_pm_ops(void)
f59c7b69 3385{
134fac3f 3386 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3387}
3388
3389#else
134fac3f 3390static inline int init_iommu_pm_ops(void) { }
f59c7b69
FY
3391#endif /* CONFIG_PM */
3392
99dcaded
FY
3393/*
3394 * Here we only respond to action of unbound device from driver.
3395 *
3396 * Added device is not attached to its DMAR domain here yet. That will happen
3397 * when mapping the device to iova.
3398 */
3399static int device_notifier(struct notifier_block *nb,
3400 unsigned long action, void *data)
3401{
3402 struct device *dev = data;
3403 struct pci_dev *pdev = to_pci_dev(dev);
3404 struct dmar_domain *domain;
3405
44cd613c
DW
3406 if (iommu_no_mapping(dev))
3407 return 0;
3408
99dcaded
FY
3409 domain = find_domain(pdev);
3410 if (!domain)
3411 return 0;
3412
a97590e5 3413 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
99dcaded
FY
3414 domain_remove_one_dev_info(domain, pdev);
3415
a97590e5
AW
3416 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3417 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3418 list_empty(&domain->devices))
3419 domain_exit(domain);
3420 }
3421
99dcaded
FY
3422 return 0;
3423}
3424
3425static struct notifier_block device_nb = {
3426 .notifier_call = device_notifier,
3427};
3428
ba395927
KA
3429int __init intel_iommu_init(void)
3430{
3431 int ret = 0;
3432
a59b50e9
JC
3433 /* VT-d is required for a TXT/tboot launch, so enforce that */
3434 force_on = tboot_force_iommu();
3435
3436 if (dmar_table_init()) {
3437 if (force_on)
3438 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3439 return -ENODEV;
a59b50e9 3440 }
ba395927 3441
a59b50e9
JC
3442 if (dmar_dev_scope_init()) {
3443 if (force_on)
3444 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3445 return -ENODEV;
a59b50e9 3446 }
1886e8a9 3447
2ae21010
SS
3448 /*
3449 * Check the need for DMA-remapping initialization now.
3450 * Above initialization will also be used by Interrupt-remapping.
3451 */
75f1cdf1 3452 if (no_iommu || dmar_disabled)
2ae21010
SS
3453 return -ENODEV;
3454
51a63e67
JC
3455 if (iommu_init_mempool()) {
3456 if (force_on)
3457 panic("tboot: Failed to initialize iommu memory\n");
3458 return -ENODEV;
3459 }
3460
3461 if (dmar_init_reserved_ranges()) {
3462 if (force_on)
3463 panic("tboot: Failed to reserve iommu ranges\n");
3464 return -ENODEV;
3465 }
ba395927
KA
3466
3467 init_no_remapping_devices();
3468
b779260b 3469 ret = init_dmars();
ba395927 3470 if (ret) {
a59b50e9
JC
3471 if (force_on)
3472 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3473 printk(KERN_ERR "IOMMU: dmar init failed\n");
3474 put_iova_domain(&reserved_iova_list);
3475 iommu_exit_mempool();
3476 return ret;
3477 }
3478 printk(KERN_INFO
3479 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3480
5e0d2a6f 3481 init_timer(&unmap_timer);
75f1cdf1
FT
3482#ifdef CONFIG_SWIOTLB
3483 swiotlb = 0;
3484#endif
19943b0e 3485 dma_ops = &intel_dma_ops;
4ed0d3e6 3486
134fac3f 3487 init_iommu_pm_ops();
a8bcbb0d
JR
3488
3489 register_iommu(&intel_iommu_ops);
3490
99dcaded
FY
3491 bus_register_notifier(&pci_bus_type, &device_nb);
3492
ba395927
KA
3493 return 0;
3494}
e820482c 3495
3199aa6b
HW
3496static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3497 struct pci_dev *pdev)
3498{
3499 struct pci_dev *tmp, *parent;
3500
3501 if (!iommu || !pdev)
3502 return;
3503
3504 /* dependent device detach */
3505 tmp = pci_find_upstream_pcie_bridge(pdev);
3506 /* Secondary interface's bus number and devfn 0 */
3507 if (tmp) {
3508 parent = pdev->bus->self;
3509 while (parent != tmp) {
3510 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3511 parent->devfn);
3199aa6b
HW
3512 parent = parent->bus->self;
3513 }
45e829ea 3514 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3515 iommu_detach_dev(iommu,
3516 tmp->subordinate->number, 0);
3517 else /* this is a legacy PCI bridge */
276dbf99
DW
3518 iommu_detach_dev(iommu, tmp->bus->number,
3519 tmp->devfn);
3199aa6b
HW
3520 }
3521}
3522
2c2e2c38 3523static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3524 struct pci_dev *pdev)
3525{
3526 struct device_domain_info *info;
3527 struct intel_iommu *iommu;
3528 unsigned long flags;
3529 int found = 0;
3530 struct list_head *entry, *tmp;
3531
276dbf99
DW
3532 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3533 pdev->devfn);
c7151a8d
WH
3534 if (!iommu)
3535 return;
3536
3537 spin_lock_irqsave(&device_domain_lock, flags);
3538 list_for_each_safe(entry, tmp, &domain->devices) {
3539 info = list_entry(entry, struct device_domain_info, link);
8519dc44
MH
3540 if (info->segment == pci_domain_nr(pdev->bus) &&
3541 info->bus == pdev->bus->number &&
c7151a8d
WH
3542 info->devfn == pdev->devfn) {
3543 list_del(&info->link);
3544 list_del(&info->global);
3545 if (info->dev)
3546 info->dev->dev.archdata.iommu = NULL;
3547 spin_unlock_irqrestore(&device_domain_lock, flags);
3548
93a23a72 3549 iommu_disable_dev_iotlb(info);
c7151a8d 3550 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3551 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3552 free_devinfo_mem(info);
3553
3554 spin_lock_irqsave(&device_domain_lock, flags);
3555
3556 if (found)
3557 break;
3558 else
3559 continue;
3560 }
3561
3562 /* if there is no other devices under the same iommu
3563 * owned by this domain, clear this iommu in iommu_bmp
3564 * update iommu count and coherency
3565 */
276dbf99
DW
3566 if (iommu == device_to_iommu(info->segment, info->bus,
3567 info->devfn))
c7151a8d
WH
3568 found = 1;
3569 }
3570
3571 if (found == 0) {
3572 unsigned long tmp_flags;
3573 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3574 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3575 domain->iommu_count--;
58c610bd 3576 domain_update_iommu_cap(domain);
c7151a8d 3577 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3578
9b4554b2
AW
3579 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3580 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3581 spin_lock_irqsave(&iommu->lock, tmp_flags);
3582 clear_bit(domain->id, iommu->domain_ids);
3583 iommu->domains[domain->id] = NULL;
3584 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3585 }
c7151a8d
WH
3586 }
3587
3588 spin_unlock_irqrestore(&device_domain_lock, flags);
3589}
3590
3591static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3592{
3593 struct device_domain_info *info;
3594 struct intel_iommu *iommu;
3595 unsigned long flags1, flags2;
3596
3597 spin_lock_irqsave(&device_domain_lock, flags1);
3598 while (!list_empty(&domain->devices)) {
3599 info = list_entry(domain->devices.next,
3600 struct device_domain_info, link);
3601 list_del(&info->link);
3602 list_del(&info->global);
3603 if (info->dev)
3604 info->dev->dev.archdata.iommu = NULL;
3605
3606 spin_unlock_irqrestore(&device_domain_lock, flags1);
3607
93a23a72 3608 iommu_disable_dev_iotlb(info);
276dbf99 3609 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3610 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3611 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3612
3613 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3614 * and capabilities
c7151a8d
WH
3615 */
3616 spin_lock_irqsave(&domain->iommu_lock, flags2);
3617 if (test_and_clear_bit(iommu->seq_id,
3618 &domain->iommu_bmp)) {
3619 domain->iommu_count--;
58c610bd 3620 domain_update_iommu_cap(domain);
c7151a8d
WH
3621 }
3622 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3623
3624 free_devinfo_mem(info);
3625 spin_lock_irqsave(&device_domain_lock, flags1);
3626 }
3627 spin_unlock_irqrestore(&device_domain_lock, flags1);
3628}
3629
5e98c4b1
WH
3630/* domain id for virtual machine, it won't be set in context */
3631static unsigned long vm_domid;
3632
3633static struct dmar_domain *iommu_alloc_vm_domain(void)
3634{
3635 struct dmar_domain *domain;
3636
3637 domain = alloc_domain_mem();
3638 if (!domain)
3639 return NULL;
3640
3641 domain->id = vm_domid++;
4c923d47 3642 domain->nid = -1;
5e98c4b1
WH
3643 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3644 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3645
3646 return domain;
3647}
3648
2c2e2c38 3649static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3650{
3651 int adjust_width;
3652
3653 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3654 spin_lock_init(&domain->iommu_lock);
3655
3656 domain_reserve_special_ranges(domain);
3657
3658 /* calculate AGAW */
3659 domain->gaw = guest_width;
3660 adjust_width = guestwidth_to_adjustwidth(guest_width);
3661 domain->agaw = width_to_agaw(adjust_width);
3662
3663 INIT_LIST_HEAD(&domain->devices);
3664
3665 domain->iommu_count = 0;
3666 domain->iommu_coherency = 0;
c5b15255 3667 domain->iommu_snooping = 0;
6dd9a7c7 3668 domain->iommu_superpage = 0;
fe40f1e0 3669 domain->max_addr = 0;
4c923d47 3670 domain->nid = -1;
5e98c4b1
WH
3671
3672 /* always allocate the top pgd */
4c923d47 3673 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3674 if (!domain->pgd)
3675 return -ENOMEM;
3676 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3677 return 0;
3678}
3679
3680static void iommu_free_vm_domain(struct dmar_domain *domain)
3681{
3682 unsigned long flags;
3683 struct dmar_drhd_unit *drhd;
3684 struct intel_iommu *iommu;
3685 unsigned long i;
3686 unsigned long ndomains;
3687
3688 for_each_drhd_unit(drhd) {
3689 if (drhd->ignored)
3690 continue;
3691 iommu = drhd->iommu;
3692
3693 ndomains = cap_ndoms(iommu->cap);
a45946ab 3694 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3695 if (iommu->domains[i] == domain) {
3696 spin_lock_irqsave(&iommu->lock, flags);
3697 clear_bit(i, iommu->domain_ids);
3698 iommu->domains[i] = NULL;
3699 spin_unlock_irqrestore(&iommu->lock, flags);
3700 break;
3701 }
5e98c4b1
WH
3702 }
3703 }
3704}
3705
3706static void vm_domain_exit(struct dmar_domain *domain)
3707{
5e98c4b1
WH
3708 /* Domain 0 is reserved, so dont process it */
3709 if (!domain)
3710 return;
3711
3712 vm_domain_remove_all_dev_info(domain);
3713 /* destroy iovas */
3714 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3715
3716 /* clear ptes */
595badf5 3717 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3718
3719 /* free page tables */
d794dc9b 3720 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3721
3722 iommu_free_vm_domain(domain);
3723 free_domain_mem(domain);
3724}
3725
5d450806 3726static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3727{
5d450806 3728 struct dmar_domain *dmar_domain;
38717946 3729
5d450806
JR
3730 dmar_domain = iommu_alloc_vm_domain();
3731 if (!dmar_domain) {
38717946 3732 printk(KERN_ERR
5d450806
JR
3733 "intel_iommu_domain_init: dmar_domain == NULL\n");
3734 return -ENOMEM;
38717946 3735 }
2c2e2c38 3736 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3737 printk(KERN_ERR
5d450806
JR
3738 "intel_iommu_domain_init() failed\n");
3739 vm_domain_exit(dmar_domain);
3740 return -ENOMEM;
38717946 3741 }
5d450806 3742 domain->priv = dmar_domain;
faa3d6f5 3743
5d450806 3744 return 0;
38717946 3745}
38717946 3746
5d450806 3747static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3748{
5d450806
JR
3749 struct dmar_domain *dmar_domain = domain->priv;
3750
3751 domain->priv = NULL;
3752 vm_domain_exit(dmar_domain);
38717946 3753}
38717946 3754
4c5478c9
JR
3755static int intel_iommu_attach_device(struct iommu_domain *domain,
3756 struct device *dev)
38717946 3757{
4c5478c9
JR
3758 struct dmar_domain *dmar_domain = domain->priv;
3759 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3760 struct intel_iommu *iommu;
3761 int addr_width;
faa3d6f5
WH
3762
3763 /* normally pdev is not mapped */
3764 if (unlikely(domain_context_mapped(pdev))) {
3765 struct dmar_domain *old_domain;
3766
3767 old_domain = find_domain(pdev);
3768 if (old_domain) {
2c2e2c38
FY
3769 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3770 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3771 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3772 else
3773 domain_remove_dev_info(old_domain);
3774 }
3775 }
3776
276dbf99
DW
3777 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3778 pdev->devfn);
fe40f1e0
WH
3779 if (!iommu)
3780 return -ENODEV;
3781
3782 /* check if this iommu agaw is sufficient for max mapped address */
3783 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3784 if (addr_width > cap_mgaw(iommu->cap))
3785 addr_width = cap_mgaw(iommu->cap);
3786
3787 if (dmar_domain->max_addr > (1LL << addr_width)) {
3788 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3789 "sufficient for the mapped address (%llx)\n",
a99c47a2 3790 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3791 return -EFAULT;
3792 }
a99c47a2
TL
3793 dmar_domain->gaw = addr_width;
3794
3795 /*
3796 * Knock out extra levels of page tables if necessary
3797 */
3798 while (iommu->agaw < dmar_domain->agaw) {
3799 struct dma_pte *pte;
3800
3801 pte = dmar_domain->pgd;
3802 if (dma_pte_present(pte)) {
25cbff16
SY
3803 dmar_domain->pgd = (struct dma_pte *)
3804 phys_to_virt(dma_pte_addr(pte));
7a661013 3805 free_pgtable_page(pte);
a99c47a2
TL
3806 }
3807 dmar_domain->agaw--;
3808 }
fe40f1e0 3809
5fe60f4e 3810 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3811}
38717946 3812
4c5478c9
JR
3813static void intel_iommu_detach_device(struct iommu_domain *domain,
3814 struct device *dev)
38717946 3815{
4c5478c9
JR
3816 struct dmar_domain *dmar_domain = domain->priv;
3817 struct pci_dev *pdev = to_pci_dev(dev);
3818
2c2e2c38 3819 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3820}
c7151a8d 3821
b146a1c9
JR
3822static int intel_iommu_map(struct iommu_domain *domain,
3823 unsigned long iova, phys_addr_t hpa,
3824 int gfp_order, int iommu_prot)
faa3d6f5 3825{
dde57a21 3826 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 3827 u64 max_addr;
dde57a21 3828 int prot = 0;
b146a1c9 3829 size_t size;
faa3d6f5 3830 int ret;
fe40f1e0 3831
dde57a21
JR
3832 if (iommu_prot & IOMMU_READ)
3833 prot |= DMA_PTE_READ;
3834 if (iommu_prot & IOMMU_WRITE)
3835 prot |= DMA_PTE_WRITE;
9cf06697
SY
3836 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3837 prot |= DMA_PTE_SNP;
dde57a21 3838
b146a1c9 3839 size = PAGE_SIZE << gfp_order;
163cc52c 3840 max_addr = iova + size;
dde57a21 3841 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3842 u64 end;
3843
3844 /* check if minimum agaw is sufficient for mapped address */
8954da1f 3845 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 3846 if (end < max_addr) {
8954da1f 3847 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3848 "sufficient for the mapped address (%llx)\n",
8954da1f 3849 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
3850 return -EFAULT;
3851 }
dde57a21 3852 dmar_domain->max_addr = max_addr;
fe40f1e0 3853 }
ad051221
DW
3854 /* Round up size to next multiple of PAGE_SIZE, if it and
3855 the low bits of hpa would take us onto the next page */
88cb6a74 3856 size = aligned_nrpages(hpa, size);
ad051221
DW
3857 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3858 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3859 return ret;
38717946 3860}
38717946 3861
b146a1c9
JR
3862static int intel_iommu_unmap(struct iommu_domain *domain,
3863 unsigned long iova, int gfp_order)
38717946 3864{
dde57a21 3865 struct dmar_domain *dmar_domain = domain->priv;
b146a1c9 3866 size_t size = PAGE_SIZE << gfp_order;
4b99d352 3867
163cc52c
DW
3868 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3869 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3870
163cc52c
DW
3871 if (dmar_domain->max_addr == iova + size)
3872 dmar_domain->max_addr = iova;
b146a1c9
JR
3873
3874 return gfp_order;
38717946 3875}
38717946 3876
d14d6577
JR
3877static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3878 unsigned long iova)
38717946 3879{
d14d6577 3880 struct dmar_domain *dmar_domain = domain->priv;
38717946 3881 struct dma_pte *pte;
faa3d6f5 3882 u64 phys = 0;
38717946 3883
6dd9a7c7 3884 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 3885 if (pte)
faa3d6f5 3886 phys = dma_pte_addr(pte);
38717946 3887
faa3d6f5 3888 return phys;
38717946 3889}
a8bcbb0d 3890
dbb9fd86
SY
3891static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3892 unsigned long cap)
3893{
3894 struct dmar_domain *dmar_domain = domain->priv;
3895
3896 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3897 return dmar_domain->iommu_snooping;
323f99cb
TL
3898 if (cap == IOMMU_CAP_INTR_REMAP)
3899 return intr_remapping_enabled;
dbb9fd86
SY
3900
3901 return 0;
3902}
3903
a8bcbb0d
JR
3904static struct iommu_ops intel_iommu_ops = {
3905 .domain_init = intel_iommu_domain_init,
3906 .domain_destroy = intel_iommu_domain_destroy,
3907 .attach_dev = intel_iommu_attach_device,
3908 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
3909 .map = intel_iommu_map,
3910 .unmap = intel_iommu_unmap,
a8bcbb0d 3911 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3912 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3913};
9af88143
DW
3914
3915static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3916{
3917 /*
3918 * Mobile 4 Series Chipset neglects to set RWBF capability,
3919 * but needs it:
3920 */
3921 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3922 rwbf_quirk = 1;
2d9e667e
DW
3923
3924 /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3925 if (dev->revision == 0x07) {
3926 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3927 dmar_map_gfx = 0;
3928 }
9af88143
DW
3929}
3930
3931DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b 3932
eecfd57f
AJ
3933#define GGC 0x52
3934#define GGC_MEMORY_SIZE_MASK (0xf << 8)
3935#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
3936#define GGC_MEMORY_SIZE_1M (0x1 << 8)
3937#define GGC_MEMORY_SIZE_2M (0x3 << 8)
3938#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
3939#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
3940#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
3941#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
3942
9eecabcb
DW
3943static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3944{
3945 unsigned short ggc;
3946
eecfd57f 3947 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
3948 return;
3949
eecfd57f 3950 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
3951 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3952 dmar_map_gfx = 0;
3953 }
3954}
3955DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3956DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3957DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3958DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3959
e0fc7e0b
DW
3960/* On Tylersburg chipsets, some BIOSes have been known to enable the
3961 ISOCH DMAR unit for the Azalia sound device, but not give it any
3962 TLB entries, which causes it to deadlock. Check for that. We do
3963 this in a function called from init_dmars(), instead of in a PCI
3964 quirk, because we don't want to print the obnoxious "BIOS broken"
3965 message if VT-d is actually disabled.
3966*/
3967static void __init check_tylersburg_isoch(void)
3968{
3969 struct pci_dev *pdev;
3970 uint32_t vtisochctrl;
3971
3972 /* If there's no Azalia in the system anyway, forget it. */
3973 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3974 if (!pdev)
3975 return;
3976 pci_dev_put(pdev);
3977
3978 /* System Management Registers. Might be hidden, in which case
3979 we can't do the sanity check. But that's OK, because the
3980 known-broken BIOSes _don't_ actually hide it, so far. */
3981 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3982 if (!pdev)
3983 return;
3984
3985 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3986 pci_dev_put(pdev);
3987 return;
3988 }
3989
3990 pci_dev_put(pdev);
3991
3992 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3993 if (vtisochctrl & 1)
3994 return;
3995
3996 /* Drop all bits other than the number of TLB entries */
3997 vtisochctrl &= 0x1c;
3998
3999 /* If we have the recommended number of TLB entries (16), fine. */
4000 if (vtisochctrl == 0x10)
4001 return;
4002
4003 /* Zero TLB entries? You get to ride the short bus to school. */
4004 if (!vtisochctrl) {
4005 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4006 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4007 dmi_get_system_info(DMI_BIOS_VENDOR),
4008 dmi_get_system_info(DMI_BIOS_VERSION),
4009 dmi_get_system_info(DMI_PRODUCT_VERSION));
4010 iommu_identity_mapping |= IDENTMAP_AZALIA;
4011 return;
4012 }
4013
4014 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4015 vtisochctrl);
4016}