sh: update defconfigs.
[linux-2.6-block.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
f59c7b69 39#include <linux/sysdev.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
ba395927
KA
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
4ed0d3e6
FY
59#define MAX_AGAW_WIDTH 64
60
2ebe3151
DW
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 69
f27be03b 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 73
fd18de50 74
dd4e8319
DW
75/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
76 are never going to work. */
77static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
78{
79 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
80}
81
82static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
83{
84 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
85}
86static inline unsigned long page_to_dma_pfn(struct page *pg)
87{
88 return mm_to_dma_pfn(page_to_pfn(pg));
89}
90static inline unsigned long virt_to_dma_pfn(void *p)
91{
92 return page_to_dma_pfn(virt_to_page(p));
93}
94
d9630fe9
WH
95/* global iommu list, set NULL for ignored DMAR units */
96static struct intel_iommu **g_iommus;
97
e0fc7e0b 98static void __init check_tylersburg_isoch(void);
9af88143
DW
99static int rwbf_quirk;
100
46b08e1a
MM
101/*
102 * 0: Present
103 * 1-11: Reserved
104 * 12-63: Context Ptr (12 - (haw-1))
105 * 64-127: Reserved
106 */
107struct root_entry {
108 u64 val;
109 u64 rsvd1;
110};
111#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
112static inline bool root_present(struct root_entry *root)
113{
114 return (root->val & 1);
115}
116static inline void set_root_present(struct root_entry *root)
117{
118 root->val |= 1;
119}
120static inline void set_root_value(struct root_entry *root, unsigned long value)
121{
122 root->val |= value & VTD_PAGE_MASK;
123}
124
125static inline struct context_entry *
126get_context_addr_from_root(struct root_entry *root)
127{
128 return (struct context_entry *)
129 (root_present(root)?phys_to_virt(
130 root->val & VTD_PAGE_MASK) :
131 NULL);
132}
133
7a8fc25e
MM
134/*
135 * low 64 bits:
136 * 0: present
137 * 1: fault processing disable
138 * 2-3: translation type
139 * 12-63: address space root
140 * high 64 bits:
141 * 0-2: address width
142 * 3-6: aval
143 * 8-23: domain id
144 */
145struct context_entry {
146 u64 lo;
147 u64 hi;
148};
c07e7d21
MM
149
150static inline bool context_present(struct context_entry *context)
151{
152 return (context->lo & 1);
153}
154static inline void context_set_present(struct context_entry *context)
155{
156 context->lo |= 1;
157}
158
159static inline void context_set_fault_enable(struct context_entry *context)
160{
161 context->lo &= (((u64)-1) << 2) | 1;
162}
163
c07e7d21
MM
164static inline void context_set_translation_type(struct context_entry *context,
165 unsigned long value)
166{
167 context->lo &= (((u64)-1) << 4) | 3;
168 context->lo |= (value & 3) << 2;
169}
170
171static inline void context_set_address_root(struct context_entry *context,
172 unsigned long value)
173{
174 context->lo |= value & VTD_PAGE_MASK;
175}
176
177static inline void context_set_address_width(struct context_entry *context,
178 unsigned long value)
179{
180 context->hi |= value & 7;
181}
182
183static inline void context_set_domain_id(struct context_entry *context,
184 unsigned long value)
185{
186 context->hi |= (value & ((1 << 16) - 1)) << 8;
187}
188
189static inline void context_clear_entry(struct context_entry *context)
190{
191 context->lo = 0;
192 context->hi = 0;
193}
7a8fc25e 194
622ba12a
MM
195/*
196 * 0: readable
197 * 1: writable
198 * 2-6: reserved
199 * 7: super page
9cf06697
SY
200 * 8-10: available
201 * 11: snoop behavior
622ba12a
MM
202 * 12-63: Host physcial address
203 */
204struct dma_pte {
205 u64 val;
206};
622ba12a 207
19c239ce
MM
208static inline void dma_clear_pte(struct dma_pte *pte)
209{
210 pte->val = 0;
211}
212
213static inline void dma_set_pte_readable(struct dma_pte *pte)
214{
215 pte->val |= DMA_PTE_READ;
216}
217
218static inline void dma_set_pte_writable(struct dma_pte *pte)
219{
220 pte->val |= DMA_PTE_WRITE;
221}
222
9cf06697
SY
223static inline void dma_set_pte_snp(struct dma_pte *pte)
224{
225 pte->val |= DMA_PTE_SNP;
226}
227
19c239ce
MM
228static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
229{
230 pte->val = (pte->val & ~3) | (prot & 3);
231}
232
233static inline u64 dma_pte_addr(struct dma_pte *pte)
234{
c85994e4
DW
235#ifdef CONFIG_64BIT
236 return pte->val & VTD_PAGE_MASK;
237#else
238 /* Must have a full atomic 64-bit read */
239 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
240#endif
19c239ce
MM
241}
242
dd4e8319 243static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 244{
dd4e8319 245 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
246}
247
248static inline bool dma_pte_present(struct dma_pte *pte)
249{
250 return (pte->val & 3) != 0;
251}
622ba12a 252
75e6bf96
DW
253static inline int first_pte_in_page(struct dma_pte *pte)
254{
255 return !((unsigned long)pte & ~VTD_PAGE_MASK);
256}
257
2c2e2c38
FY
258/*
259 * This domain is a statically identity mapping domain.
260 * 1. This domain creats a static 1:1 mapping to all usable memory.
261 * 2. It maps to each iommu if successful.
262 * 3. Each iommu mapps to this domain if successful.
263 */
19943b0e
DW
264static struct dmar_domain *si_domain;
265static int hw_pass_through = 1;
2c2e2c38 266
3b5410e7 267/* devices under the same p2p bridge are owned in one domain */
cdc7b837 268#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 269
1ce28feb
WH
270/* domain represents a virtual machine, more than one devices
271 * across iommus may be owned in one domain, e.g. kvm guest.
272 */
273#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
274
2c2e2c38
FY
275/* si_domain contains mulitple devices */
276#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
277
99126f7c
MM
278struct dmar_domain {
279 int id; /* domain id */
4c923d47 280 int nid; /* node id */
8c11e798 281 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
282
283 struct list_head devices; /* all devices' list */
284 struct iova_domain iovad; /* iova's that belong to this domain */
285
286 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
287 int gaw; /* max guest address width */
288
289 /* adjusted guest address width, 0 is level 2 30-bit */
290 int agaw;
291
3b5410e7 292 int flags; /* flags to find out type of domain */
8e604097
WH
293
294 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 295 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
296 int iommu_count; /* reference count of iommu */
297 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 298 u64 max_addr; /* maximum mapped address */
99126f7c
MM
299};
300
a647dacb
MM
301/* PCI domain-device relationship */
302struct device_domain_info {
303 struct list_head link; /* link to domain siblings */
304 struct list_head global; /* link to global list */
276dbf99
DW
305 int segment; /* PCI domain */
306 u8 bus; /* PCI bus number */
a647dacb 307 u8 devfn; /* PCI devfn number */
45e829ea 308 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 309 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
310 struct dmar_domain *domain; /* pointer to domain */
311};
312
5e0d2a6f 313static void flush_unmaps_timeout(unsigned long data);
314
315DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
316
80b20dd8 317#define HIGH_WATER_MARK 250
318struct deferred_flush_tables {
319 int next;
320 struct iova *iova[HIGH_WATER_MARK];
321 struct dmar_domain *domain[HIGH_WATER_MARK];
322};
323
324static struct deferred_flush_tables *deferred_flush;
325
5e0d2a6f 326/* bitmap for indexing intel_iommus */
5e0d2a6f 327static int g_num_of_iommus;
328
329static DEFINE_SPINLOCK(async_umap_flush_lock);
330static LIST_HEAD(unmaps_to_do);
331
332static int timer_on;
333static long list_size;
5e0d2a6f 334
ba395927
KA
335static void domain_remove_dev_info(struct dmar_domain *domain);
336
0cd5c3c8
KM
337#ifdef CONFIG_DMAR_DEFAULT_ON
338int dmar_disabled = 0;
339#else
340int dmar_disabled = 1;
341#endif /*CONFIG_DMAR_DEFAULT_ON*/
342
ba395927 343static int __initdata dmar_map_gfx = 1;
7d3b03ce 344static int dmar_forcedac;
5e0d2a6f 345static int intel_iommu_strict;
ba395927
KA
346
347#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
348static DEFINE_SPINLOCK(device_domain_lock);
349static LIST_HEAD(device_domain_list);
350
a8bcbb0d
JR
351static struct iommu_ops intel_iommu_ops;
352
ba395927
KA
353static int __init intel_iommu_setup(char *str)
354{
355 if (!str)
356 return -EINVAL;
357 while (*str) {
0cd5c3c8
KM
358 if (!strncmp(str, "on", 2)) {
359 dmar_disabled = 0;
360 printk(KERN_INFO "Intel-IOMMU: enabled\n");
361 } else if (!strncmp(str, "off", 3)) {
ba395927 362 dmar_disabled = 1;
0cd5c3c8 363 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
364 } else if (!strncmp(str, "igfx_off", 8)) {
365 dmar_map_gfx = 0;
366 printk(KERN_INFO
367 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 368 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 369 printk(KERN_INFO
7d3b03ce
KA
370 "Intel-IOMMU: Forcing DAC for PCI devices\n");
371 dmar_forcedac = 1;
5e0d2a6f 372 } else if (!strncmp(str, "strict", 6)) {
373 printk(KERN_INFO
374 "Intel-IOMMU: disable batched IOTLB flush\n");
375 intel_iommu_strict = 1;
ba395927
KA
376 }
377
378 str += strcspn(str, ",");
379 while (*str == ',')
380 str++;
381 }
382 return 0;
383}
384__setup("intel_iommu=", intel_iommu_setup);
385
386static struct kmem_cache *iommu_domain_cache;
387static struct kmem_cache *iommu_devinfo_cache;
388static struct kmem_cache *iommu_iova_cache;
389
4c923d47 390static inline void *alloc_pgtable_page(int node)
eb3fa7cb 391{
4c923d47
SS
392 struct page *page;
393 void *vaddr = NULL;
eb3fa7cb 394
4c923d47
SS
395 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
396 if (page)
397 vaddr = page_address(page);
eb3fa7cb 398 return vaddr;
ba395927
KA
399}
400
401static inline void free_pgtable_page(void *vaddr)
402{
403 free_page((unsigned long)vaddr);
404}
405
406static inline void *alloc_domain_mem(void)
407{
354bb65e 408 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
409}
410
38717946 411static void free_domain_mem(void *vaddr)
ba395927
KA
412{
413 kmem_cache_free(iommu_domain_cache, vaddr);
414}
415
416static inline void * alloc_devinfo_mem(void)
417{
354bb65e 418 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
419}
420
421static inline void free_devinfo_mem(void *vaddr)
422{
423 kmem_cache_free(iommu_devinfo_cache, vaddr);
424}
425
426struct iova *alloc_iova_mem(void)
427{
354bb65e 428 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
429}
430
431void free_iova_mem(struct iova *iova)
432{
433 kmem_cache_free(iommu_iova_cache, iova);
434}
435
1b573683
WH
436
437static inline int width_to_agaw(int width);
438
4ed0d3e6 439static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
440{
441 unsigned long sagaw;
442 int agaw = -1;
443
444 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 445 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
446 agaw >= 0; agaw--) {
447 if (test_bit(agaw, &sagaw))
448 break;
449 }
450
451 return agaw;
452}
453
4ed0d3e6
FY
454/*
455 * Calculate max SAGAW for each iommu.
456 */
457int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
458{
459 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
460}
461
462/*
463 * calculate agaw for each iommu.
464 * "SAGAW" may be different across iommus, use a default agaw, and
465 * get a supported less agaw for iommus that don't support the default agaw.
466 */
467int iommu_calculate_agaw(struct intel_iommu *iommu)
468{
469 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
470}
471
2c2e2c38 472/* This functionin only returns single iommu in a domain */
8c11e798
WH
473static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
474{
475 int iommu_id;
476
2c2e2c38 477 /* si_domain and vm domain should not get here. */
1ce28feb 478 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 479 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 480
8c11e798
WH
481 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
482 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
483 return NULL;
484
485 return g_iommus[iommu_id];
486}
487
8e604097
WH
488static void domain_update_iommu_coherency(struct dmar_domain *domain)
489{
490 int i;
491
492 domain->iommu_coherency = 1;
493
494 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
495 for (; i < g_num_of_iommus; ) {
496 if (!ecap_coherent(g_iommus[i]->ecap)) {
497 domain->iommu_coherency = 0;
498 break;
499 }
500 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
501 }
502}
503
58c610bd
SY
504static void domain_update_iommu_snooping(struct dmar_domain *domain)
505{
506 int i;
507
508 domain->iommu_snooping = 1;
509
510 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
511 for (; i < g_num_of_iommus; ) {
512 if (!ecap_sc_support(g_iommus[i]->ecap)) {
513 domain->iommu_snooping = 0;
514 break;
515 }
516 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
517 }
518}
519
520/* Some capabilities may be different across iommus */
521static void domain_update_iommu_cap(struct dmar_domain *domain)
522{
523 domain_update_iommu_coherency(domain);
524 domain_update_iommu_snooping(domain);
525}
526
276dbf99 527static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
528{
529 struct dmar_drhd_unit *drhd = NULL;
530 int i;
531
532 for_each_drhd_unit(drhd) {
533 if (drhd->ignored)
534 continue;
276dbf99
DW
535 if (segment != drhd->segment)
536 continue;
c7151a8d 537
924b6231 538 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
539 if (drhd->devices[i] &&
540 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
541 drhd->devices[i]->devfn == devfn)
542 return drhd->iommu;
4958c5dc
DW
543 if (drhd->devices[i] &&
544 drhd->devices[i]->subordinate &&
924b6231
DW
545 drhd->devices[i]->subordinate->number <= bus &&
546 drhd->devices[i]->subordinate->subordinate >= bus)
547 return drhd->iommu;
548 }
c7151a8d
WH
549
550 if (drhd->include_all)
551 return drhd->iommu;
552 }
553
554 return NULL;
555}
556
5331fe6f
WH
557static void domain_flush_cache(struct dmar_domain *domain,
558 void *addr, int size)
559{
560 if (!domain->iommu_coherency)
561 clflush_cache_range(addr, size);
562}
563
ba395927
KA
564/* Gets context entry for a given bus and devfn */
565static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
566 u8 bus, u8 devfn)
567{
568 struct root_entry *root;
569 struct context_entry *context;
570 unsigned long phy_addr;
571 unsigned long flags;
572
573 spin_lock_irqsave(&iommu->lock, flags);
574 root = &iommu->root_entry[bus];
575 context = get_context_addr_from_root(root);
576 if (!context) {
4c923d47
SS
577 context = (struct context_entry *)
578 alloc_pgtable_page(iommu->node);
ba395927
KA
579 if (!context) {
580 spin_unlock_irqrestore(&iommu->lock, flags);
581 return NULL;
582 }
5b6985ce 583 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
584 phy_addr = virt_to_phys((void *)context);
585 set_root_value(root, phy_addr);
586 set_root_present(root);
587 __iommu_flush_cache(iommu, root, sizeof(*root));
588 }
589 spin_unlock_irqrestore(&iommu->lock, flags);
590 return &context[devfn];
591}
592
593static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
594{
595 struct root_entry *root;
596 struct context_entry *context;
597 int ret;
598 unsigned long flags;
599
600 spin_lock_irqsave(&iommu->lock, flags);
601 root = &iommu->root_entry[bus];
602 context = get_context_addr_from_root(root);
603 if (!context) {
604 ret = 0;
605 goto out;
606 }
c07e7d21 607 ret = context_present(&context[devfn]);
ba395927
KA
608out:
609 spin_unlock_irqrestore(&iommu->lock, flags);
610 return ret;
611}
612
613static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
614{
615 struct root_entry *root;
616 struct context_entry *context;
617 unsigned long flags;
618
619 spin_lock_irqsave(&iommu->lock, flags);
620 root = &iommu->root_entry[bus];
621 context = get_context_addr_from_root(root);
622 if (context) {
c07e7d21 623 context_clear_entry(&context[devfn]);
ba395927
KA
624 __iommu_flush_cache(iommu, &context[devfn], \
625 sizeof(*context));
626 }
627 spin_unlock_irqrestore(&iommu->lock, flags);
628}
629
630static void free_context_table(struct intel_iommu *iommu)
631{
632 struct root_entry *root;
633 int i;
634 unsigned long flags;
635 struct context_entry *context;
636
637 spin_lock_irqsave(&iommu->lock, flags);
638 if (!iommu->root_entry) {
639 goto out;
640 }
641 for (i = 0; i < ROOT_ENTRY_NR; i++) {
642 root = &iommu->root_entry[i];
643 context = get_context_addr_from_root(root);
644 if (context)
645 free_pgtable_page(context);
646 }
647 free_pgtable_page(iommu->root_entry);
648 iommu->root_entry = NULL;
649out:
650 spin_unlock_irqrestore(&iommu->lock, flags);
651}
652
653/* page table handling */
654#define LEVEL_STRIDE (9)
655#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
656
657static inline int agaw_to_level(int agaw)
658{
659 return agaw + 2;
660}
661
662static inline int agaw_to_width(int agaw)
663{
664 return 30 + agaw * LEVEL_STRIDE;
665
666}
667
668static inline int width_to_agaw(int width)
669{
670 return (width - 30) / LEVEL_STRIDE;
671}
672
673static inline unsigned int level_to_offset_bits(int level)
674{
6660c63a 675 return (level - 1) * LEVEL_STRIDE;
ba395927
KA
676}
677
77dfa56c 678static inline int pfn_level_offset(unsigned long pfn, int level)
ba395927 679{
6660c63a 680 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
ba395927
KA
681}
682
6660c63a 683static inline unsigned long level_mask(int level)
ba395927 684{
6660c63a 685 return -1UL << level_to_offset_bits(level);
ba395927
KA
686}
687
6660c63a 688static inline unsigned long level_size(int level)
ba395927 689{
6660c63a 690 return 1UL << level_to_offset_bits(level);
ba395927
KA
691}
692
6660c63a 693static inline unsigned long align_to_level(unsigned long pfn, int level)
ba395927 694{
6660c63a 695 return (pfn + level_size(level) - 1) & level_mask(level);
ba395927
KA
696}
697
b026fd28
DW
698static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
699 unsigned long pfn)
ba395927 700{
b026fd28 701 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
702 struct dma_pte *parent, *pte = NULL;
703 int level = agaw_to_level(domain->agaw);
704 int offset;
ba395927
KA
705
706 BUG_ON(!domain->pgd);
b026fd28 707 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
708 parent = domain->pgd;
709
ba395927
KA
710 while (level > 0) {
711 void *tmp_page;
712
b026fd28 713 offset = pfn_level_offset(pfn, level);
ba395927
KA
714 pte = &parent[offset];
715 if (level == 1)
716 break;
717
19c239ce 718 if (!dma_pte_present(pte)) {
c85994e4
DW
719 uint64_t pteval;
720
4c923d47 721 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 722
206a73c1 723 if (!tmp_page)
ba395927 724 return NULL;
206a73c1 725
c85994e4 726 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 727 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
728 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
729 /* Someone else set it while we were thinking; use theirs. */
730 free_pgtable_page(tmp_page);
731 } else {
732 dma_pte_addr(pte);
733 domain_flush_cache(domain, pte, sizeof(*pte));
734 }
ba395927 735 }
19c239ce 736 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
737 level--;
738 }
739
ba395927
KA
740 return pte;
741}
742
743/* return address's pte at specific level */
90dcfb5e
DW
744static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
745 unsigned long pfn,
746 int level)
ba395927
KA
747{
748 struct dma_pte *parent, *pte = NULL;
749 int total = agaw_to_level(domain->agaw);
750 int offset;
751
752 parent = domain->pgd;
753 while (level <= total) {
90dcfb5e 754 offset = pfn_level_offset(pfn, total);
ba395927
KA
755 pte = &parent[offset];
756 if (level == total)
757 return pte;
758
19c239ce 759 if (!dma_pte_present(pte))
ba395927 760 break;
19c239ce 761 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
762 total--;
763 }
764 return NULL;
765}
766
ba395927 767/* clear last level pte, a tlb flush should be followed */
595badf5
DW
768static void dma_pte_clear_range(struct dmar_domain *domain,
769 unsigned long start_pfn,
770 unsigned long last_pfn)
ba395927 771{
04b18e65 772 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
310a5ab9 773 struct dma_pte *first_pte, *pte;
66eae846 774
04b18e65 775 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 776 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 777 BUG_ON(start_pfn > last_pfn);
ba395927 778
04b18e65 779 /* we don't need lock here; nobody else touches the iova range */
59c36286 780 do {
310a5ab9
DW
781 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
782 if (!pte) {
783 start_pfn = align_to_level(start_pfn + 1, 2);
784 continue;
785 }
75e6bf96 786 do {
310a5ab9
DW
787 dma_clear_pte(pte);
788 start_pfn++;
789 pte++;
75e6bf96
DW
790 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
791
310a5ab9
DW
792 domain_flush_cache(domain, first_pte,
793 (void *)pte - (void *)first_pte);
59c36286
DW
794
795 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
796}
797
798/* free page table pages. last level pte should already be cleared */
799static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
800 unsigned long start_pfn,
801 unsigned long last_pfn)
ba395927 802{
6660c63a 803 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 804 struct dma_pte *first_pte, *pte;
ba395927
KA
805 int total = agaw_to_level(domain->agaw);
806 int level;
6660c63a 807 unsigned long tmp;
ba395927 808
6660c63a
DW
809 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
810 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 811 BUG_ON(start_pfn > last_pfn);
ba395927 812
f3a0a52f 813 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
814 level = 2;
815 while (level <= total) {
6660c63a
DW
816 tmp = align_to_level(start_pfn, level);
817
f3a0a52f 818 /* If we can't even clear one PTE at this level, we're done */
6660c63a 819 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
820 return;
821
59c36286 822 do {
f3a0a52f
DW
823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
824 if (!pte) {
825 tmp = align_to_level(tmp + 1, level + 1);
826 continue;
827 }
75e6bf96 828 do {
6a43e574
DW
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
f3a0a52f
DW
833 pte++;
834 tmp += level_size(level);
75e6bf96
DW
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
f3a0a52f
DW
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
59c36286 841 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
842 level++;
843 }
844 /* free pgd */
d794dc9b 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
846 free_pgtable_page(domain->pgd);
847 domain->pgd = NULL;
848 }
849}
850
851/* iommu handling */
852static int iommu_alloc_root_entry(struct intel_iommu *iommu)
853{
854 struct root_entry *root;
855 unsigned long flags;
856
4c923d47 857 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
858 if (!root)
859 return -ENOMEM;
860
5b6985ce 861 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
862
863 spin_lock_irqsave(&iommu->lock, flags);
864 iommu->root_entry = root;
865 spin_unlock_irqrestore(&iommu->lock, flags);
866
867 return 0;
868}
869
ba395927
KA
870static void iommu_set_root_entry(struct intel_iommu *iommu)
871{
872 void *addr;
c416daa9 873 u32 sts;
ba395927
KA
874 unsigned long flag;
875
876 addr = iommu->root_entry;
877
878 spin_lock_irqsave(&iommu->register_lock, flag);
879 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
880
c416daa9 881 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
882
883 /* Make sure hardware complete it */
884 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 885 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
886
887 spin_unlock_irqrestore(&iommu->register_lock, flag);
888}
889
890static void iommu_flush_write_buffer(struct intel_iommu *iommu)
891{
892 u32 val;
893 unsigned long flag;
894
9af88143 895 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 896 return;
ba395927
KA
897
898 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 899 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
900
901 /* Make sure hardware complete it */
902 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 903 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
904
905 spin_unlock_irqrestore(&iommu->register_lock, flag);
906}
907
908/* return value determine if we need a write buffer flush */
4c25a2c1
DW
909static void __iommu_flush_context(struct intel_iommu *iommu,
910 u16 did, u16 source_id, u8 function_mask,
911 u64 type)
ba395927
KA
912{
913 u64 val = 0;
914 unsigned long flag;
915
ba395927
KA
916 switch (type) {
917 case DMA_CCMD_GLOBAL_INVL:
918 val = DMA_CCMD_GLOBAL_INVL;
919 break;
920 case DMA_CCMD_DOMAIN_INVL:
921 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
922 break;
923 case DMA_CCMD_DEVICE_INVL:
924 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
925 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
926 break;
927 default:
928 BUG();
929 }
930 val |= DMA_CCMD_ICC;
931
932 spin_lock_irqsave(&iommu->register_lock, flag);
933 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
934
935 /* Make sure hardware complete it */
936 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
937 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
938
939 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
940}
941
ba395927 942/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
943static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
944 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
945{
946 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
947 u64 val = 0, val_iva = 0;
948 unsigned long flag;
949
ba395927
KA
950 switch (type) {
951 case DMA_TLB_GLOBAL_FLUSH:
952 /* global flush doesn't need set IVA_REG */
953 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
954 break;
955 case DMA_TLB_DSI_FLUSH:
956 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
957 break;
958 case DMA_TLB_PSI_FLUSH:
959 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
960 /* Note: always flush non-leaf currently */
961 val_iva = size_order | addr;
962 break;
963 default:
964 BUG();
965 }
966 /* Note: set drain read/write */
967#if 0
968 /*
969 * This is probably to be super secure.. Looks like we can
970 * ignore it without any impact.
971 */
972 if (cap_read_drain(iommu->cap))
973 val |= DMA_TLB_READ_DRAIN;
974#endif
975 if (cap_write_drain(iommu->cap))
976 val |= DMA_TLB_WRITE_DRAIN;
977
978 spin_lock_irqsave(&iommu->register_lock, flag);
979 /* Note: Only uses first TLB reg currently */
980 if (val_iva)
981 dmar_writeq(iommu->reg + tlb_offset, val_iva);
982 dmar_writeq(iommu->reg + tlb_offset + 8, val);
983
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
986 dmar_readq, (!(val & DMA_TLB_IVT)), val);
987
988 spin_unlock_irqrestore(&iommu->register_lock, flag);
989
990 /* check IOTLB invalidation granularity */
991 if (DMA_TLB_IAIG(val) == 0)
992 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
993 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
994 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
995 (unsigned long long)DMA_TLB_IIRG(type),
996 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
997}
998
93a23a72
YZ
999static struct device_domain_info *iommu_support_dev_iotlb(
1000 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1001{
1002 int found = 0;
1003 unsigned long flags;
1004 struct device_domain_info *info;
1005 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1006
1007 if (!ecap_dev_iotlb_support(iommu->ecap))
1008 return NULL;
1009
1010 if (!iommu->qi)
1011 return NULL;
1012
1013 spin_lock_irqsave(&device_domain_lock, flags);
1014 list_for_each_entry(info, &domain->devices, link)
1015 if (info->bus == bus && info->devfn == devfn) {
1016 found = 1;
1017 break;
1018 }
1019 spin_unlock_irqrestore(&device_domain_lock, flags);
1020
1021 if (!found || !info->dev)
1022 return NULL;
1023
1024 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1025 return NULL;
1026
1027 if (!dmar_find_matched_atsr_unit(info->dev))
1028 return NULL;
1029
1030 info->iommu = iommu;
1031
1032 return info;
1033}
1034
1035static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1036{
93a23a72
YZ
1037 if (!info)
1038 return;
1039
1040 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1041}
1042
1043static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1044{
1045 if (!info->dev || !pci_ats_enabled(info->dev))
1046 return;
1047
1048 pci_disable_ats(info->dev);
1049}
1050
1051static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1052 u64 addr, unsigned mask)
1053{
1054 u16 sid, qdep;
1055 unsigned long flags;
1056 struct device_domain_info *info;
1057
1058 spin_lock_irqsave(&device_domain_lock, flags);
1059 list_for_each_entry(info, &domain->devices, link) {
1060 if (!info->dev || !pci_ats_enabled(info->dev))
1061 continue;
1062
1063 sid = info->bus << 8 | info->devfn;
1064 qdep = pci_ats_queue_depth(info->dev);
1065 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1066 }
1067 spin_unlock_irqrestore(&device_domain_lock, flags);
1068}
1069
1f0ef2aa 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
03d6a246 1071 unsigned long pfn, unsigned int pages)
ba395927 1072{
9dd2fe89 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1075
ba395927
KA
1076 BUG_ON(pages == 0);
1077
ba395927 1078 /*
9dd2fe89
YZ
1079 * Fallback to domain selective flush if no PSI support or the size is
1080 * too big.
ba395927
KA
1081 * PSI requires page size to be 2 ^ x, and the base address is naturally
1082 * aligned to the size
1083 */
9dd2fe89
YZ
1084 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1085 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1086 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1087 else
1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1089 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
93a23a72 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1097}
1098
f8bab735 1099static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1100{
1101 u32 pmen;
1102 unsigned long flags;
1103
1104 spin_lock_irqsave(&iommu->register_lock, flags);
1105 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1106 pmen &= ~DMA_PMEN_EPM;
1107 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1108
1109 /* wait for the protected region status bit to clear */
1110 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1111 readl, !(pmen & DMA_PMEN_PRS), pmen);
1112
1113 spin_unlock_irqrestore(&iommu->register_lock, flags);
1114}
1115
ba395927
KA
1116static int iommu_enable_translation(struct intel_iommu *iommu)
1117{
1118 u32 sts;
1119 unsigned long flags;
1120
1121 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1122 iommu->gcmd |= DMA_GCMD_TE;
1123 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1124
1125 /* Make sure hardware complete it */
1126 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1127 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1128
ba395927
KA
1129 spin_unlock_irqrestore(&iommu->register_lock, flags);
1130 return 0;
1131}
1132
1133static int iommu_disable_translation(struct intel_iommu *iommu)
1134{
1135 u32 sts;
1136 unsigned long flag;
1137
1138 spin_lock_irqsave(&iommu->register_lock, flag);
1139 iommu->gcmd &= ~DMA_GCMD_TE;
1140 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1141
1142 /* Make sure hardware complete it */
1143 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1144 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1145
1146 spin_unlock_irqrestore(&iommu->register_lock, flag);
1147 return 0;
1148}
1149
3460a6d9 1150
ba395927
KA
1151static int iommu_init_domains(struct intel_iommu *iommu)
1152{
1153 unsigned long ndomains;
1154 unsigned long nlongs;
1155
1156 ndomains = cap_ndoms(iommu->cap);
1157 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1158 nlongs = BITS_TO_LONGS(ndomains);
1159
94a91b50
DD
1160 spin_lock_init(&iommu->lock);
1161
ba395927
KA
1162 /* TBD: there might be 64K domains,
1163 * consider other allocation for future chip
1164 */
1165 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1166 if (!iommu->domain_ids) {
1167 printk(KERN_ERR "Allocating domain id array failed\n");
1168 return -ENOMEM;
1169 }
1170 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1171 GFP_KERNEL);
1172 if (!iommu->domains) {
1173 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1174 return -ENOMEM;
1175 }
1176
1177 /*
1178 * if Caching mode is set, then invalid translations are tagged
1179 * with domainid 0. Hence we need to pre-allocate it.
1180 */
1181 if (cap_caching_mode(iommu->cap))
1182 set_bit(0, iommu->domain_ids);
1183 return 0;
1184}
ba395927 1185
ba395927
KA
1186
1187static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1188static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1189
1190void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1191{
1192 struct dmar_domain *domain;
1193 int i;
c7151a8d 1194 unsigned long flags;
ba395927 1195
94a91b50
DD
1196 if ((iommu->domains) && (iommu->domain_ids)) {
1197 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1198 for (; i < cap_ndoms(iommu->cap); ) {
1199 domain = iommu->domains[i];
1200 clear_bit(i, iommu->domain_ids);
1201
1202 spin_lock_irqsave(&domain->iommu_lock, flags);
1203 if (--domain->iommu_count == 0) {
1204 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1205 vm_domain_exit(domain);
1206 else
1207 domain_exit(domain);
1208 }
1209 spin_unlock_irqrestore(&domain->iommu_lock, flags);
c7151a8d 1210
94a91b50
DD
1211 i = find_next_bit(iommu->domain_ids,
1212 cap_ndoms(iommu->cap), i+1);
5e98c4b1 1213 }
ba395927
KA
1214 }
1215
1216 if (iommu->gcmd & DMA_GCMD_TE)
1217 iommu_disable_translation(iommu);
1218
1219 if (iommu->irq) {
1220 set_irq_data(iommu->irq, NULL);
1221 /* This will mask the irq */
1222 free_irq(iommu->irq, iommu);
1223 destroy_irq(iommu->irq);
1224 }
1225
1226 kfree(iommu->domains);
1227 kfree(iommu->domain_ids);
1228
d9630fe9
WH
1229 g_iommus[iommu->seq_id] = NULL;
1230
1231 /* if all iommus are freed, free g_iommus */
1232 for (i = 0; i < g_num_of_iommus; i++) {
1233 if (g_iommus[i])
1234 break;
1235 }
1236
1237 if (i == g_num_of_iommus)
1238 kfree(g_iommus);
1239
ba395927
KA
1240 /* free context mapping */
1241 free_context_table(iommu);
ba395927
KA
1242}
1243
2c2e2c38 1244static struct dmar_domain *alloc_domain(void)
ba395927 1245{
ba395927 1246 struct dmar_domain *domain;
ba395927
KA
1247
1248 domain = alloc_domain_mem();
1249 if (!domain)
1250 return NULL;
1251
4c923d47 1252 domain->nid = -1;
2c2e2c38
FY
1253 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1254 domain->flags = 0;
1255
1256 return domain;
1257}
1258
1259static int iommu_attach_domain(struct dmar_domain *domain,
1260 struct intel_iommu *iommu)
1261{
1262 int num;
1263 unsigned long ndomains;
1264 unsigned long flags;
1265
ba395927
KA
1266 ndomains = cap_ndoms(iommu->cap);
1267
1268 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1269
ba395927
KA
1270 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1271 if (num >= ndomains) {
1272 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1273 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1274 return -ENOMEM;
ba395927
KA
1275 }
1276
ba395927 1277 domain->id = num;
2c2e2c38 1278 set_bit(num, iommu->domain_ids);
8c11e798 1279 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1280 iommu->domains[num] = domain;
1281 spin_unlock_irqrestore(&iommu->lock, flags);
1282
2c2e2c38 1283 return 0;
ba395927
KA
1284}
1285
2c2e2c38
FY
1286static void iommu_detach_domain(struct dmar_domain *domain,
1287 struct intel_iommu *iommu)
ba395927
KA
1288{
1289 unsigned long flags;
2c2e2c38
FY
1290 int num, ndomains;
1291 int found = 0;
ba395927 1292
8c11e798 1293 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38
FY
1294 ndomains = cap_ndoms(iommu->cap);
1295 num = find_first_bit(iommu->domain_ids, ndomains);
1296 for (; num < ndomains; ) {
1297 if (iommu->domains[num] == domain) {
1298 found = 1;
1299 break;
1300 }
1301 num = find_next_bit(iommu->domain_ids,
1302 cap_ndoms(iommu->cap), num+1);
1303 }
1304
1305 if (found) {
1306 clear_bit(num, iommu->domain_ids);
1307 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1308 iommu->domains[num] = NULL;
1309 }
8c11e798 1310 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1311}
1312
1313static struct iova_domain reserved_iova_list;
8a443df4 1314static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1315
1316static void dmar_init_reserved_ranges(void)
1317{
1318 struct pci_dev *pdev = NULL;
1319 struct iova *iova;
1320 int i;
ba395927 1321
f661197e 1322 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1323
8a443df4
MG
1324 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1325 &reserved_rbtree_key);
1326
ba395927
KA
1327 /* IOAPIC ranges shouldn't be accessed by DMA */
1328 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1329 IOVA_PFN(IOAPIC_RANGE_END));
1330 if (!iova)
1331 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1332
1333 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1334 for_each_pci_dev(pdev) {
1335 struct resource *r;
1336
1337 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1338 r = &pdev->resource[i];
1339 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1340 continue;
1a4a4551
DW
1341 iova = reserve_iova(&reserved_iova_list,
1342 IOVA_PFN(r->start),
1343 IOVA_PFN(r->end));
ba395927
KA
1344 if (!iova)
1345 printk(KERN_ERR "Reserve iova failed\n");
1346 }
1347 }
1348
1349}
1350
1351static void domain_reserve_special_ranges(struct dmar_domain *domain)
1352{
1353 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1354}
1355
1356static inline int guestwidth_to_adjustwidth(int gaw)
1357{
1358 int agaw;
1359 int r = (gaw - 12) % 9;
1360
1361 if (r == 0)
1362 agaw = gaw;
1363 else
1364 agaw = gaw + 9 - r;
1365 if (agaw > 64)
1366 agaw = 64;
1367 return agaw;
1368}
1369
1370static int domain_init(struct dmar_domain *domain, int guest_width)
1371{
1372 struct intel_iommu *iommu;
1373 int adjust_width, agaw;
1374 unsigned long sagaw;
1375
f661197e 1376 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1377 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1378
1379 domain_reserve_special_ranges(domain);
1380
1381 /* calculate AGAW */
8c11e798 1382 iommu = domain_get_iommu(domain);
ba395927
KA
1383 if (guest_width > cap_mgaw(iommu->cap))
1384 guest_width = cap_mgaw(iommu->cap);
1385 domain->gaw = guest_width;
1386 adjust_width = guestwidth_to_adjustwidth(guest_width);
1387 agaw = width_to_agaw(adjust_width);
1388 sagaw = cap_sagaw(iommu->cap);
1389 if (!test_bit(agaw, &sagaw)) {
1390 /* hardware doesn't support it, choose a bigger one */
1391 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1392 agaw = find_next_bit(&sagaw, 5, agaw);
1393 if (agaw >= 5)
1394 return -ENODEV;
1395 }
1396 domain->agaw = agaw;
1397 INIT_LIST_HEAD(&domain->devices);
1398
8e604097
WH
1399 if (ecap_coherent(iommu->ecap))
1400 domain->iommu_coherency = 1;
1401 else
1402 domain->iommu_coherency = 0;
1403
58c610bd
SY
1404 if (ecap_sc_support(iommu->ecap))
1405 domain->iommu_snooping = 1;
1406 else
1407 domain->iommu_snooping = 0;
1408
c7151a8d 1409 domain->iommu_count = 1;
4c923d47 1410 domain->nid = iommu->node;
c7151a8d 1411
ba395927 1412 /* always allocate the top pgd */
4c923d47 1413 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1414 if (!domain->pgd)
1415 return -ENOMEM;
5b6985ce 1416 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1417 return 0;
1418}
1419
1420static void domain_exit(struct dmar_domain *domain)
1421{
2c2e2c38
FY
1422 struct dmar_drhd_unit *drhd;
1423 struct intel_iommu *iommu;
ba395927
KA
1424
1425 /* Domain 0 is reserved, so dont process it */
1426 if (!domain)
1427 return;
1428
1429 domain_remove_dev_info(domain);
1430 /* destroy iovas */
1431 put_iova_domain(&domain->iovad);
ba395927
KA
1432
1433 /* clear ptes */
595badf5 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1435
1436 /* free page tables */
d794dc9b 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1438
2c2e2c38
FY
1439 for_each_active_iommu(iommu, drhd)
1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1441 iommu_detach_domain(domain, iommu);
1442
ba395927
KA
1443 free_domain_mem(domain);
1444}
1445
4ed0d3e6
FY
1446static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1447 u8 bus, u8 devfn, int translation)
ba395927
KA
1448{
1449 struct context_entry *context;
ba395927 1450 unsigned long flags;
5331fe6f 1451 struct intel_iommu *iommu;
ea6606b0
WH
1452 struct dma_pte *pgd;
1453 unsigned long num;
1454 unsigned long ndomains;
1455 int id;
1456 int agaw;
93a23a72 1457 struct device_domain_info *info = NULL;
ba395927
KA
1458
1459 pr_debug("Set context mapping for %02x:%02x.%d\n",
1460 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1461
ba395927 1462 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1463 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1464 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1465
276dbf99 1466 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1467 if (!iommu)
1468 return -ENODEV;
1469
ba395927
KA
1470 context = device_to_context_entry(iommu, bus, devfn);
1471 if (!context)
1472 return -ENOMEM;
1473 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1474 if (context_present(context)) {
ba395927
KA
1475 spin_unlock_irqrestore(&iommu->lock, flags);
1476 return 0;
1477 }
1478
ea6606b0
WH
1479 id = domain->id;
1480 pgd = domain->pgd;
1481
2c2e2c38
FY
1482 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1483 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1484 int found = 0;
1485
1486 /* find an available domain id for this device in iommu */
1487 ndomains = cap_ndoms(iommu->cap);
1488 num = find_first_bit(iommu->domain_ids, ndomains);
1489 for (; num < ndomains; ) {
1490 if (iommu->domains[num] == domain) {
1491 id = num;
1492 found = 1;
1493 break;
1494 }
1495 num = find_next_bit(iommu->domain_ids,
1496 cap_ndoms(iommu->cap), num+1);
1497 }
1498
1499 if (found == 0) {
1500 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1501 if (num >= ndomains) {
1502 spin_unlock_irqrestore(&iommu->lock, flags);
1503 printk(KERN_ERR "IOMMU: no free domain ids\n");
1504 return -EFAULT;
1505 }
1506
1507 set_bit(num, iommu->domain_ids);
1508 iommu->domains[num] = domain;
1509 id = num;
1510 }
1511
1512 /* Skip top levels of page tables for
1513 * iommu which has less agaw than default.
1672af11 1514 * Unnecessary for PT mode.
ea6606b0 1515 */
1672af11
CW
1516 if (translation != CONTEXT_TT_PASS_THROUGH) {
1517 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1518 pgd = phys_to_virt(dma_pte_addr(pgd));
1519 if (!dma_pte_present(pgd)) {
1520 spin_unlock_irqrestore(&iommu->lock, flags);
1521 return -ENOMEM;
1522 }
ea6606b0
WH
1523 }
1524 }
1525 }
1526
1527 context_set_domain_id(context, id);
4ed0d3e6 1528
93a23a72
YZ
1529 if (translation != CONTEXT_TT_PASS_THROUGH) {
1530 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1531 translation = info ? CONTEXT_TT_DEV_IOTLB :
1532 CONTEXT_TT_MULTI_LEVEL;
1533 }
4ed0d3e6
FY
1534 /*
1535 * In pass through mode, AW must be programmed to indicate the largest
1536 * AGAW value supported by hardware. And ASR is ignored by hardware.
1537 */
93a23a72 1538 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1539 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1540 else {
1541 context_set_address_root(context, virt_to_phys(pgd));
1542 context_set_address_width(context, iommu->agaw);
1543 }
4ed0d3e6
FY
1544
1545 context_set_translation_type(context, translation);
c07e7d21
MM
1546 context_set_fault_enable(context);
1547 context_set_present(context);
5331fe6f 1548 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1549
4c25a2c1
DW
1550 /*
1551 * It's a non-present to present mapping. If hardware doesn't cache
1552 * non-present entry we only need to flush the write-buffer. If the
1553 * _does_ cache non-present entries, then it does so in the special
1554 * domain #0, which we have to flush:
1555 */
1556 if (cap_caching_mode(iommu->cap)) {
1557 iommu->flush.flush_context(iommu, 0,
1558 (((u16)bus) << 8) | devfn,
1559 DMA_CCMD_MASK_NOBIT,
1560 DMA_CCMD_DEVICE_INVL);
1f0ef2aa 1561 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1562 } else {
ba395927 1563 iommu_flush_write_buffer(iommu);
4c25a2c1 1564 }
93a23a72 1565 iommu_enable_dev_iotlb(info);
ba395927 1566 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1567
1568 spin_lock_irqsave(&domain->iommu_lock, flags);
1569 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1570 domain->iommu_count++;
4c923d47
SS
1571 if (domain->iommu_count == 1)
1572 domain->nid = iommu->node;
58c610bd 1573 domain_update_iommu_cap(domain);
c7151a8d
WH
1574 }
1575 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1576 return 0;
1577}
1578
1579static int
4ed0d3e6
FY
1580domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1581 int translation)
ba395927
KA
1582{
1583 int ret;
1584 struct pci_dev *tmp, *parent;
1585
276dbf99 1586 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1587 pdev->bus->number, pdev->devfn,
1588 translation);
ba395927
KA
1589 if (ret)
1590 return ret;
1591
1592 /* dependent device mapping */
1593 tmp = pci_find_upstream_pcie_bridge(pdev);
1594 if (!tmp)
1595 return 0;
1596 /* Secondary interface's bus number and devfn 0 */
1597 parent = pdev->bus->self;
1598 while (parent != tmp) {
276dbf99
DW
1599 ret = domain_context_mapping_one(domain,
1600 pci_domain_nr(parent->bus),
1601 parent->bus->number,
4ed0d3e6 1602 parent->devfn, translation);
ba395927
KA
1603 if (ret)
1604 return ret;
1605 parent = parent->bus->self;
1606 }
45e829ea 1607 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1608 return domain_context_mapping_one(domain,
276dbf99 1609 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1610 tmp->subordinate->number, 0,
1611 translation);
ba395927
KA
1612 else /* this is a legacy PCI bridge */
1613 return domain_context_mapping_one(domain,
276dbf99
DW
1614 pci_domain_nr(tmp->bus),
1615 tmp->bus->number,
4ed0d3e6
FY
1616 tmp->devfn,
1617 translation);
ba395927
KA
1618}
1619
5331fe6f 1620static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1621{
1622 int ret;
1623 struct pci_dev *tmp, *parent;
5331fe6f
WH
1624 struct intel_iommu *iommu;
1625
276dbf99
DW
1626 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1627 pdev->devfn);
5331fe6f
WH
1628 if (!iommu)
1629 return -ENODEV;
ba395927 1630
276dbf99 1631 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1632 if (!ret)
1633 return ret;
1634 /* dependent device mapping */
1635 tmp = pci_find_upstream_pcie_bridge(pdev);
1636 if (!tmp)
1637 return ret;
1638 /* Secondary interface's bus number and devfn 0 */
1639 parent = pdev->bus->self;
1640 while (parent != tmp) {
8c11e798 1641 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1642 parent->devfn);
ba395927
KA
1643 if (!ret)
1644 return ret;
1645 parent = parent->bus->self;
1646 }
5f4d91a1 1647 if (pci_is_pcie(tmp))
276dbf99
DW
1648 return device_context_mapped(iommu, tmp->subordinate->number,
1649 0);
ba395927 1650 else
276dbf99
DW
1651 return device_context_mapped(iommu, tmp->bus->number,
1652 tmp->devfn);
ba395927
KA
1653}
1654
f532959b
FY
1655/* Returns a number of VTD pages, but aligned to MM page size */
1656static inline unsigned long aligned_nrpages(unsigned long host_addr,
1657 size_t size)
1658{
1659 host_addr &= ~PAGE_MASK;
1660 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1661}
1662
9051aa02
DW
1663static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1664 struct scatterlist *sg, unsigned long phys_pfn,
1665 unsigned long nr_pages, int prot)
e1605495
DW
1666{
1667 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1668 phys_addr_t uninitialized_var(pteval);
e1605495 1669 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1670 unsigned long sg_res;
e1605495
DW
1671
1672 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1673
1674 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1675 return -EINVAL;
1676
1677 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1678
9051aa02
DW
1679 if (sg)
1680 sg_res = 0;
1681 else {
1682 sg_res = nr_pages + 1;
1683 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1684 }
1685
e1605495 1686 while (nr_pages--) {
c85994e4
DW
1687 uint64_t tmp;
1688
e1605495 1689 if (!sg_res) {
f532959b 1690 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1691 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1692 sg->dma_length = sg->length;
1693 pteval = page_to_phys(sg_page(sg)) | prot;
1694 }
1695 if (!pte) {
1696 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1697 if (!pte)
1698 return -ENOMEM;
1699 }
1700 /* We don't need lock here, nobody else
1701 * touches the iova range
1702 */
7766a3fb 1703 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1704 if (tmp) {
1bf20f0d 1705 static int dumps = 5;
c85994e4
DW
1706 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1707 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1708 if (dumps) {
1709 dumps--;
1710 debug_dma_dump_mappings(NULL);
1711 }
1712 WARN_ON(1);
1713 }
e1605495 1714 pte++;
75e6bf96 1715 if (!nr_pages || first_pte_in_page(pte)) {
e1605495
DW
1716 domain_flush_cache(domain, first_pte,
1717 (void *)pte - (void *)first_pte);
1718 pte = NULL;
1719 }
1720 iov_pfn++;
1721 pteval += VTD_PAGE_SIZE;
1722 sg_res--;
1723 if (!sg_res)
1724 sg = sg_next(sg);
1725 }
1726 return 0;
1727}
1728
9051aa02
DW
1729static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1730 struct scatterlist *sg, unsigned long nr_pages,
1731 int prot)
ba395927 1732{
9051aa02
DW
1733 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1734}
6f6a00e4 1735
9051aa02
DW
1736static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1737 unsigned long phys_pfn, unsigned long nr_pages,
1738 int prot)
1739{
1740 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1741}
1742
c7151a8d 1743static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1744{
c7151a8d
WH
1745 if (!iommu)
1746 return;
8c11e798
WH
1747
1748 clear_context_table(iommu, bus, devfn);
1749 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1750 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1751 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1752}
1753
1754static void domain_remove_dev_info(struct dmar_domain *domain)
1755{
1756 struct device_domain_info *info;
1757 unsigned long flags;
c7151a8d 1758 struct intel_iommu *iommu;
ba395927
KA
1759
1760 spin_lock_irqsave(&device_domain_lock, flags);
1761 while (!list_empty(&domain->devices)) {
1762 info = list_entry(domain->devices.next,
1763 struct device_domain_info, link);
1764 list_del(&info->link);
1765 list_del(&info->global);
1766 if (info->dev)
358dd8ac 1767 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1768 spin_unlock_irqrestore(&device_domain_lock, flags);
1769
93a23a72 1770 iommu_disable_dev_iotlb(info);
276dbf99 1771 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1772 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1773 free_devinfo_mem(info);
1774
1775 spin_lock_irqsave(&device_domain_lock, flags);
1776 }
1777 spin_unlock_irqrestore(&device_domain_lock, flags);
1778}
1779
1780/*
1781 * find_domain
358dd8ac 1782 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1783 */
38717946 1784static struct dmar_domain *
ba395927
KA
1785find_domain(struct pci_dev *pdev)
1786{
1787 struct device_domain_info *info;
1788
1789 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1790 info = pdev->dev.archdata.iommu;
ba395927
KA
1791 if (info)
1792 return info->domain;
1793 return NULL;
1794}
1795
ba395927
KA
1796/* domain is initialized */
1797static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1798{
1799 struct dmar_domain *domain, *found = NULL;
1800 struct intel_iommu *iommu;
1801 struct dmar_drhd_unit *drhd;
1802 struct device_domain_info *info, *tmp;
1803 struct pci_dev *dev_tmp;
1804 unsigned long flags;
1805 int bus = 0, devfn = 0;
276dbf99 1806 int segment;
2c2e2c38 1807 int ret;
ba395927
KA
1808
1809 domain = find_domain(pdev);
1810 if (domain)
1811 return domain;
1812
276dbf99
DW
1813 segment = pci_domain_nr(pdev->bus);
1814
ba395927
KA
1815 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1816 if (dev_tmp) {
5f4d91a1 1817 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1818 bus = dev_tmp->subordinate->number;
1819 devfn = 0;
1820 } else {
1821 bus = dev_tmp->bus->number;
1822 devfn = dev_tmp->devfn;
1823 }
1824 spin_lock_irqsave(&device_domain_lock, flags);
1825 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1826 if (info->segment == segment &&
1827 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1828 found = info->domain;
1829 break;
1830 }
1831 }
1832 spin_unlock_irqrestore(&device_domain_lock, flags);
1833 /* pcie-pci bridge already has a domain, uses it */
1834 if (found) {
1835 domain = found;
1836 goto found_domain;
1837 }
1838 }
1839
2c2e2c38
FY
1840 domain = alloc_domain();
1841 if (!domain)
1842 goto error;
1843
ba395927
KA
1844 /* Allocate new domain for the device */
1845 drhd = dmar_find_matched_drhd_unit(pdev);
1846 if (!drhd) {
1847 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1848 pci_name(pdev));
1849 return NULL;
1850 }
1851 iommu = drhd->iommu;
1852
2c2e2c38
FY
1853 ret = iommu_attach_domain(domain, iommu);
1854 if (ret) {
1855 domain_exit(domain);
ba395927 1856 goto error;
2c2e2c38 1857 }
ba395927
KA
1858
1859 if (domain_init(domain, gaw)) {
1860 domain_exit(domain);
1861 goto error;
1862 }
1863
1864 /* register pcie-to-pci device */
1865 if (dev_tmp) {
1866 info = alloc_devinfo_mem();
1867 if (!info) {
1868 domain_exit(domain);
1869 goto error;
1870 }
276dbf99 1871 info->segment = segment;
ba395927
KA
1872 info->bus = bus;
1873 info->devfn = devfn;
1874 info->dev = NULL;
1875 info->domain = domain;
1876 /* This domain is shared by devices under p2p bridge */
3b5410e7 1877 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1878
1879 /* pcie-to-pci bridge already has a domain, uses it */
1880 found = NULL;
1881 spin_lock_irqsave(&device_domain_lock, flags);
1882 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
1883 if (tmp->segment == segment &&
1884 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
1885 found = tmp->domain;
1886 break;
1887 }
1888 }
1889 if (found) {
1890 free_devinfo_mem(info);
1891 domain_exit(domain);
1892 domain = found;
1893 } else {
1894 list_add(&info->link, &domain->devices);
1895 list_add(&info->global, &device_domain_list);
1896 }
1897 spin_unlock_irqrestore(&device_domain_lock, flags);
1898 }
1899
1900found_domain:
1901 info = alloc_devinfo_mem();
1902 if (!info)
1903 goto error;
276dbf99 1904 info->segment = segment;
ba395927
KA
1905 info->bus = pdev->bus->number;
1906 info->devfn = pdev->devfn;
1907 info->dev = pdev;
1908 info->domain = domain;
1909 spin_lock_irqsave(&device_domain_lock, flags);
1910 /* somebody is fast */
1911 found = find_domain(pdev);
1912 if (found != NULL) {
1913 spin_unlock_irqrestore(&device_domain_lock, flags);
1914 if (found != domain) {
1915 domain_exit(domain);
1916 domain = found;
1917 }
1918 free_devinfo_mem(info);
1919 return domain;
1920 }
1921 list_add(&info->link, &domain->devices);
1922 list_add(&info->global, &device_domain_list);
358dd8ac 1923 pdev->dev.archdata.iommu = info;
ba395927
KA
1924 spin_unlock_irqrestore(&device_domain_lock, flags);
1925 return domain;
1926error:
1927 /* recheck it here, maybe others set it */
1928 return find_domain(pdev);
1929}
1930
2c2e2c38 1931static int iommu_identity_mapping;
e0fc7e0b
DW
1932#define IDENTMAP_ALL 1
1933#define IDENTMAP_GFX 2
1934#define IDENTMAP_AZALIA 4
2c2e2c38 1935
b213203e
DW
1936static int iommu_domain_identity_map(struct dmar_domain *domain,
1937 unsigned long long start,
1938 unsigned long long end)
ba395927 1939{
c5395d5c
DW
1940 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1941 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1942
1943 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1944 dma_to_mm_pfn(last_vpfn))) {
ba395927 1945 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 1946 return -ENOMEM;
ba395927
KA
1947 }
1948
c5395d5c
DW
1949 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1950 start, end, domain->id);
ba395927
KA
1951 /*
1952 * RMRR range might have overlap with physical memory range,
1953 * clear it first
1954 */
c5395d5c 1955 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 1956
c5395d5c
DW
1957 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1958 last_vpfn - first_vpfn + 1,
61df7443 1959 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
1960}
1961
1962static int iommu_prepare_identity_map(struct pci_dev *pdev,
1963 unsigned long long start,
1964 unsigned long long end)
1965{
1966 struct dmar_domain *domain;
1967 int ret;
1968
c7ab48d2 1969 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
1970 if (!domain)
1971 return -ENOMEM;
1972
19943b0e
DW
1973 /* For _hardware_ passthrough, don't bother. But for software
1974 passthrough, we do it anyway -- it may indicate a memory
1975 range which is reserved in E820, so which didn't get set
1976 up to start with in si_domain */
1977 if (domain == si_domain && hw_pass_through) {
1978 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1979 pci_name(pdev), start, end);
1980 return 0;
1981 }
1982
1983 printk(KERN_INFO
1984 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1985 pci_name(pdev), start, end);
2ff729f5 1986
5595b528
DW
1987 if (end < start) {
1988 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
1989 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1990 dmi_get_system_info(DMI_BIOS_VENDOR),
1991 dmi_get_system_info(DMI_BIOS_VERSION),
1992 dmi_get_system_info(DMI_PRODUCT_VERSION));
1993 ret = -EIO;
1994 goto error;
1995 }
1996
2ff729f5
DW
1997 if (end >> agaw_to_width(domain->agaw)) {
1998 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
1999 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2000 agaw_to_width(domain->agaw),
2001 dmi_get_system_info(DMI_BIOS_VENDOR),
2002 dmi_get_system_info(DMI_BIOS_VERSION),
2003 dmi_get_system_info(DMI_PRODUCT_VERSION));
2004 ret = -EIO;
2005 goto error;
2006 }
19943b0e 2007
b213203e 2008 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2009 if (ret)
2010 goto error;
2011
2012 /* context entry init */
4ed0d3e6 2013 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2014 if (ret)
2015 goto error;
2016
2017 return 0;
2018
2019 error:
ba395927
KA
2020 domain_exit(domain);
2021 return ret;
ba395927
KA
2022}
2023
2024static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2025 struct pci_dev *pdev)
2026{
358dd8ac 2027 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2028 return 0;
2029 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2030 rmrr->end_address + 1);
2031}
2032
49a0429e
KA
2033#ifdef CONFIG_DMAR_FLOPPY_WA
2034static inline void iommu_prepare_isa(void)
2035{
2036 struct pci_dev *pdev;
2037 int ret;
2038
2039 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2040 if (!pdev)
2041 return;
2042
c7ab48d2 2043 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
49a0429e
KA
2044 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2045
2046 if (ret)
c7ab48d2
DW
2047 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2048 "floppy might not work\n");
49a0429e
KA
2049
2050}
2051#else
2052static inline void iommu_prepare_isa(void)
2053{
2054 return;
2055}
2056#endif /* !CONFIG_DMAR_FLPY_WA */
2057
2c2e2c38 2058static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2059
2060static int __init si_domain_work_fn(unsigned long start_pfn,
2061 unsigned long end_pfn, void *datax)
2062{
2063 int *ret = datax;
2064
2065 *ret = iommu_domain_identity_map(si_domain,
2066 (uint64_t)start_pfn << PAGE_SHIFT,
2067 (uint64_t)end_pfn << PAGE_SHIFT);
2068 return *ret;
2069
2070}
2071
071e1374 2072static int __init si_domain_init(int hw)
2c2e2c38
FY
2073{
2074 struct dmar_drhd_unit *drhd;
2075 struct intel_iommu *iommu;
c7ab48d2 2076 int nid, ret = 0;
2c2e2c38
FY
2077
2078 si_domain = alloc_domain();
2079 if (!si_domain)
2080 return -EFAULT;
2081
c7ab48d2 2082 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2083
2084 for_each_active_iommu(iommu, drhd) {
2085 ret = iommu_attach_domain(si_domain, iommu);
2086 if (ret) {
2087 domain_exit(si_domain);
2088 return -EFAULT;
2089 }
2090 }
2091
2092 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2093 domain_exit(si_domain);
2094 return -EFAULT;
2095 }
2096
2097 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2098
19943b0e
DW
2099 if (hw)
2100 return 0;
2101
c7ab48d2
DW
2102 for_each_online_node(nid) {
2103 work_with_active_regions(nid, si_domain_work_fn, &ret);
2104 if (ret)
2105 return ret;
2106 }
2107
2c2e2c38
FY
2108 return 0;
2109}
2110
2111static void domain_remove_one_dev_info(struct dmar_domain *domain,
2112 struct pci_dev *pdev);
2113static int identity_mapping(struct pci_dev *pdev)
2114{
2115 struct device_domain_info *info;
2116
2117 if (likely(!iommu_identity_mapping))
2118 return 0;
2119
2120
2121 list_for_each_entry(info, &si_domain->devices, link)
2122 if (info->dev == pdev)
2123 return 1;
2124 return 0;
2125}
2126
2127static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2128 struct pci_dev *pdev,
2129 int translation)
2c2e2c38
FY
2130{
2131 struct device_domain_info *info;
2132 unsigned long flags;
5fe60f4e 2133 int ret;
2c2e2c38
FY
2134
2135 info = alloc_devinfo_mem();
2136 if (!info)
2137 return -ENOMEM;
2138
5fe60f4e
DW
2139 ret = domain_context_mapping(domain, pdev, translation);
2140 if (ret) {
2141 free_devinfo_mem(info);
2142 return ret;
2143 }
2144
2c2e2c38
FY
2145 info->segment = pci_domain_nr(pdev->bus);
2146 info->bus = pdev->bus->number;
2147 info->devfn = pdev->devfn;
2148 info->dev = pdev;
2149 info->domain = domain;
2150
2151 spin_lock_irqsave(&device_domain_lock, flags);
2152 list_add(&info->link, &domain->devices);
2153 list_add(&info->global, &device_domain_list);
2154 pdev->dev.archdata.iommu = info;
2155 spin_unlock_irqrestore(&device_domain_lock, flags);
2156
2157 return 0;
2158}
2159
6941af28
DW
2160static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2161{
e0fc7e0b
DW
2162 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2163 return 1;
2164
2165 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2166 return 1;
2167
2168 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2169 return 0;
6941af28 2170
3dfc813d
DW
2171 /*
2172 * We want to start off with all devices in the 1:1 domain, and
2173 * take them out later if we find they can't access all of memory.
2174 *
2175 * However, we can't do this for PCI devices behind bridges,
2176 * because all PCI devices behind the same bridge will end up
2177 * with the same source-id on their transactions.
2178 *
2179 * Practically speaking, we can't change things around for these
2180 * devices at run-time, because we can't be sure there'll be no
2181 * DMA transactions in flight for any of their siblings.
2182 *
2183 * So PCI devices (unless they're on the root bus) as well as
2184 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2185 * the 1:1 domain, just in _case_ one of their siblings turns out
2186 * not to be able to map all of memory.
2187 */
5f4d91a1 2188 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2189 if (!pci_is_root_bus(pdev->bus))
2190 return 0;
2191 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2192 return 0;
2193 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2194 return 0;
2195
2196 /*
2197 * At boot time, we don't yet know if devices will be 64-bit capable.
2198 * Assume that they will -- if they turn out not to be, then we can
2199 * take them out of the 1:1 domain later.
2200 */
6941af28
DW
2201 if (!startup)
2202 return pdev->dma_mask > DMA_BIT_MASK(32);
2203
2204 return 1;
2205}
2206
071e1374 2207static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2208{
2c2e2c38
FY
2209 struct pci_dev *pdev = NULL;
2210 int ret;
2211
19943b0e 2212 ret = si_domain_init(hw);
2c2e2c38
FY
2213 if (ret)
2214 return -EFAULT;
2215
2c2e2c38 2216 for_each_pci_dev(pdev) {
6941af28 2217 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2218 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2219 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2220
5fe60f4e 2221 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2222 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2223 CONTEXT_TT_MULTI_LEVEL);
2224 if (ret)
2225 return ret;
62edf5dc 2226 }
2c2e2c38
FY
2227 }
2228
2229 return 0;
2230}
2231
2232int __init init_dmars(void)
ba395927
KA
2233{
2234 struct dmar_drhd_unit *drhd;
2235 struct dmar_rmrr_unit *rmrr;
2236 struct pci_dev *pdev;
2237 struct intel_iommu *iommu;
9d783ba0 2238 int i, ret;
2c2e2c38 2239
ba395927
KA
2240 /*
2241 * for each drhd
2242 * allocate root
2243 * initialize and program root entry to not present
2244 * endfor
2245 */
2246 for_each_drhd_unit(drhd) {
5e0d2a6f 2247 g_num_of_iommus++;
2248 /*
2249 * lock not needed as this is only incremented in the single
2250 * threaded kernel __init code path all other access are read
2251 * only
2252 */
2253 }
2254
d9630fe9
WH
2255 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2256 GFP_KERNEL);
2257 if (!g_iommus) {
2258 printk(KERN_ERR "Allocating global iommu array failed\n");
2259 ret = -ENOMEM;
2260 goto error;
2261 }
2262
80b20dd8 2263 deferred_flush = kzalloc(g_num_of_iommus *
2264 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2265 if (!deferred_flush) {
5e0d2a6f 2266 ret = -ENOMEM;
2267 goto error;
2268 }
2269
5e0d2a6f 2270 for_each_drhd_unit(drhd) {
2271 if (drhd->ignored)
2272 continue;
1886e8a9
SS
2273
2274 iommu = drhd->iommu;
d9630fe9 2275 g_iommus[iommu->seq_id] = iommu;
ba395927 2276
e61d98d8
SS
2277 ret = iommu_init_domains(iommu);
2278 if (ret)
2279 goto error;
2280
ba395927
KA
2281 /*
2282 * TBD:
2283 * we could share the same root & context tables
2284 * amoung all IOMMU's. Need to Split it later.
2285 */
2286 ret = iommu_alloc_root_entry(iommu);
2287 if (ret) {
2288 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2289 goto error;
2290 }
4ed0d3e6 2291 if (!ecap_pass_through(iommu->ecap))
19943b0e 2292 hw_pass_through = 0;
ba395927
KA
2293 }
2294
1531a6a6
SS
2295 /*
2296 * Start from the sane iommu hardware state.
2297 */
a77b67d4
YS
2298 for_each_drhd_unit(drhd) {
2299 if (drhd->ignored)
2300 continue;
2301
2302 iommu = drhd->iommu;
1531a6a6
SS
2303
2304 /*
2305 * If the queued invalidation is already initialized by us
2306 * (for example, while enabling interrupt-remapping) then
2307 * we got the things already rolling from a sane state.
2308 */
2309 if (iommu->qi)
2310 continue;
2311
2312 /*
2313 * Clear any previous faults.
2314 */
2315 dmar_fault(-1, iommu);
2316 /*
2317 * Disable queued invalidation if supported and already enabled
2318 * before OS handover.
2319 */
2320 dmar_disable_qi(iommu);
2321 }
2322
2323 for_each_drhd_unit(drhd) {
2324 if (drhd->ignored)
2325 continue;
2326
2327 iommu = drhd->iommu;
2328
a77b67d4
YS
2329 if (dmar_enable_qi(iommu)) {
2330 /*
2331 * Queued Invalidate not enabled, use Register Based
2332 * Invalidate
2333 */
2334 iommu->flush.flush_context = __iommu_flush_context;
2335 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2336 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2337 "invalidation\n",
2338 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2339 } else {
2340 iommu->flush.flush_context = qi_flush_context;
2341 iommu->flush.flush_iotlb = qi_flush_iotlb;
2342 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2343 "invalidation\n",
2344 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2345 }
2346 }
2347
19943b0e 2348 if (iommu_pass_through)
e0fc7e0b
DW
2349 iommu_identity_mapping |= IDENTMAP_ALL;
2350
19943b0e 2351#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2352 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2353#endif
e0fc7e0b
DW
2354
2355 check_tylersburg_isoch();
2356
ba395927 2357 /*
19943b0e
DW
2358 * If pass through is not set or not enabled, setup context entries for
2359 * identity mappings for rmrr, gfx, and isa and may fall back to static
2360 * identity mapping if iommu_identity_mapping is set.
ba395927 2361 */
19943b0e
DW
2362 if (iommu_identity_mapping) {
2363 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2364 if (ret) {
19943b0e
DW
2365 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2366 goto error;
ba395927
KA
2367 }
2368 }
ba395927 2369 /*
19943b0e
DW
2370 * For each rmrr
2371 * for each dev attached to rmrr
2372 * do
2373 * locate drhd for dev, alloc domain for dev
2374 * allocate free domain
2375 * allocate page table entries for rmrr
2376 * if context not allocated for bus
2377 * allocate and init context
2378 * set present in root table for this bus
2379 * init context with domain, translation etc
2380 * endfor
2381 * endfor
ba395927 2382 */
19943b0e
DW
2383 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2384 for_each_rmrr_units(rmrr) {
2385 for (i = 0; i < rmrr->devices_cnt; i++) {
2386 pdev = rmrr->devices[i];
2387 /*
2388 * some BIOS lists non-exist devices in DMAR
2389 * table.
2390 */
2391 if (!pdev)
2392 continue;
2393 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2394 if (ret)
2395 printk(KERN_ERR
2396 "IOMMU: mapping reserved region failed\n");
ba395927 2397 }
4ed0d3e6 2398 }
49a0429e 2399
19943b0e
DW
2400 iommu_prepare_isa();
2401
ba395927
KA
2402 /*
2403 * for each drhd
2404 * enable fault log
2405 * global invalidate context cache
2406 * global invalidate iotlb
2407 * enable translation
2408 */
2409 for_each_drhd_unit(drhd) {
2410 if (drhd->ignored)
2411 continue;
2412 iommu = drhd->iommu;
ba395927
KA
2413
2414 iommu_flush_write_buffer(iommu);
2415
3460a6d9
KA
2416 ret = dmar_set_interrupt(iommu);
2417 if (ret)
2418 goto error;
2419
ba395927
KA
2420 iommu_set_root_entry(iommu);
2421
4c25a2c1 2422 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2423 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2424
ba395927
KA
2425 ret = iommu_enable_translation(iommu);
2426 if (ret)
2427 goto error;
b94996c9
DW
2428
2429 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2430 }
2431
2432 return 0;
2433error:
2434 for_each_drhd_unit(drhd) {
2435 if (drhd->ignored)
2436 continue;
2437 iommu = drhd->iommu;
2438 free_iommu(iommu);
2439 }
d9630fe9 2440 kfree(g_iommus);
ba395927
KA
2441 return ret;
2442}
2443
5a5e02a6 2444/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2445static struct iova *intel_alloc_iova(struct device *dev,
2446 struct dmar_domain *domain,
2447 unsigned long nrpages, uint64_t dma_mask)
ba395927 2448{
ba395927 2449 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2450 struct iova *iova = NULL;
ba395927 2451
875764de
DW
2452 /* Restrict dma_mask to the width that the iommu can handle */
2453 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2454
2455 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2456 /*
2457 * First try to allocate an io virtual address in
284901a9 2458 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2459 * from higher range
ba395927 2460 */
875764de
DW
2461 iova = alloc_iova(&domain->iovad, nrpages,
2462 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2463 if (iova)
2464 return iova;
2465 }
2466 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2467 if (unlikely(!iova)) {
2468 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2469 nrpages, pci_name(pdev));
f76aec76
KA
2470 return NULL;
2471 }
2472
2473 return iova;
2474}
2475
147202aa 2476static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2477{
2478 struct dmar_domain *domain;
2479 int ret;
2480
2481 domain = get_domain_for_dev(pdev,
2482 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2483 if (!domain) {
2484 printk(KERN_ERR
2485 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2486 return NULL;
ba395927
KA
2487 }
2488
2489 /* make sure context mapping is ok */
5331fe6f 2490 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2491 ret = domain_context_mapping(domain, pdev,
2492 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2493 if (ret) {
2494 printk(KERN_ERR
2495 "Domain context map for %s failed",
2496 pci_name(pdev));
4fe05bbc 2497 return NULL;
f76aec76 2498 }
ba395927
KA
2499 }
2500
f76aec76
KA
2501 return domain;
2502}
2503
147202aa
DW
2504static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2505{
2506 struct device_domain_info *info;
2507
2508 /* No lock here, assumes no domain exit in normal case */
2509 info = dev->dev.archdata.iommu;
2510 if (likely(info))
2511 return info->domain;
2512
2513 return __get_valid_domain_for_dev(dev);
2514}
2515
2c2e2c38
FY
2516static int iommu_dummy(struct pci_dev *pdev)
2517{
2518 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2519}
2520
2521/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2522static int iommu_no_mapping(struct device *dev)
2c2e2c38 2523{
73676832 2524 struct pci_dev *pdev;
2c2e2c38
FY
2525 int found;
2526
73676832
DW
2527 if (unlikely(dev->bus != &pci_bus_type))
2528 return 1;
2529
2530 pdev = to_pci_dev(dev);
1e4c64c4
DW
2531 if (iommu_dummy(pdev))
2532 return 1;
2533
2c2e2c38 2534 if (!iommu_identity_mapping)
1e4c64c4 2535 return 0;
2c2e2c38
FY
2536
2537 found = identity_mapping(pdev);
2538 if (found) {
6941af28 2539 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2540 return 1;
2541 else {
2542 /*
2543 * 32 bit DMA is removed from si_domain and fall back
2544 * to non-identity mapping.
2545 */
2546 domain_remove_one_dev_info(si_domain, pdev);
2547 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2548 pci_name(pdev));
2549 return 0;
2550 }
2551 } else {
2552 /*
2553 * In case of a detached 64 bit DMA device from vm, the device
2554 * is put into si_domain for identity mapping.
2555 */
6941af28 2556 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2557 int ret;
5fe60f4e
DW
2558 ret = domain_add_dev_info(si_domain, pdev,
2559 hw_pass_through ?
2560 CONTEXT_TT_PASS_THROUGH :
2561 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2562 if (!ret) {
2563 printk(KERN_INFO "64bit %s uses identity mapping\n",
2564 pci_name(pdev));
2565 return 1;
2566 }
2567 }
2568 }
2569
1e4c64c4 2570 return 0;
2c2e2c38
FY
2571}
2572
bb9e6d65
FT
2573static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2574 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2575{
2576 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2577 struct dmar_domain *domain;
5b6985ce 2578 phys_addr_t start_paddr;
f76aec76
KA
2579 struct iova *iova;
2580 int prot = 0;
6865f0d1 2581 int ret;
8c11e798 2582 struct intel_iommu *iommu;
33041ec0 2583 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2584
2585 BUG_ON(dir == DMA_NONE);
2c2e2c38 2586
73676832 2587 if (iommu_no_mapping(hwdev))
6865f0d1 2588 return paddr;
f76aec76
KA
2589
2590 domain = get_valid_domain_for_dev(pdev);
2591 if (!domain)
2592 return 0;
2593
8c11e798 2594 iommu = domain_get_iommu(domain);
88cb6a74 2595 size = aligned_nrpages(paddr, size);
f76aec76 2596
5a5e02a6
DW
2597 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2598 pdev->dma_mask);
f76aec76
KA
2599 if (!iova)
2600 goto error;
2601
ba395927
KA
2602 /*
2603 * Check if DMAR supports zero-length reads on write only
2604 * mappings..
2605 */
2606 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2607 !cap_zlr(iommu->cap))
ba395927
KA
2608 prot |= DMA_PTE_READ;
2609 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2610 prot |= DMA_PTE_WRITE;
2611 /*
6865f0d1 2612 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2613 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2614 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2615 * is not a big problem
2616 */
0ab36de2 2617 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2618 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2619 if (ret)
2620 goto error;
2621
1f0ef2aa
DW
2622 /* it's a non-present to present mapping. Only flush if caching mode */
2623 if (cap_caching_mode(iommu->cap))
03d6a246 2624 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
1f0ef2aa 2625 else
8c11e798 2626 iommu_flush_write_buffer(iommu);
f76aec76 2627
03d6a246
DW
2628 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2629 start_paddr += paddr & ~PAGE_MASK;
2630 return start_paddr;
ba395927 2631
ba395927 2632error:
f76aec76
KA
2633 if (iova)
2634 __free_iova(&domain->iovad, iova);
4cf2e75d 2635 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2636 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2637 return 0;
2638}
2639
ffbbef5c
FT
2640static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2641 unsigned long offset, size_t size,
2642 enum dma_data_direction dir,
2643 struct dma_attrs *attrs)
bb9e6d65 2644{
ffbbef5c
FT
2645 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2646 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2647}
2648
5e0d2a6f 2649static void flush_unmaps(void)
2650{
80b20dd8 2651 int i, j;
5e0d2a6f 2652
5e0d2a6f 2653 timer_on = 0;
2654
2655 /* just flush them all */
2656 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2657 struct intel_iommu *iommu = g_iommus[i];
2658 if (!iommu)
2659 continue;
c42d9f32 2660
9dd2fe89
YZ
2661 if (!deferred_flush[i].next)
2662 continue;
2663
2664 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2665 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2666 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2667 unsigned long mask;
2668 struct iova *iova = deferred_flush[i].iova[j];
2669
64de5af0 2670 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
93a23a72 2671 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
64de5af0 2672 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
93a23a72 2673 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2674 }
9dd2fe89 2675 deferred_flush[i].next = 0;
5e0d2a6f 2676 }
2677
5e0d2a6f 2678 list_size = 0;
5e0d2a6f 2679}
2680
2681static void flush_unmaps_timeout(unsigned long data)
2682{
80b20dd8 2683 unsigned long flags;
2684
2685 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2686 flush_unmaps();
80b20dd8 2687 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2688}
2689
2690static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2691{
2692 unsigned long flags;
80b20dd8 2693 int next, iommu_id;
8c11e798 2694 struct intel_iommu *iommu;
5e0d2a6f 2695
2696 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2697 if (list_size == HIGH_WATER_MARK)
2698 flush_unmaps();
2699
8c11e798
WH
2700 iommu = domain_get_iommu(dom);
2701 iommu_id = iommu->seq_id;
c42d9f32 2702
80b20dd8 2703 next = deferred_flush[iommu_id].next;
2704 deferred_flush[iommu_id].domain[next] = dom;
2705 deferred_flush[iommu_id].iova[next] = iova;
2706 deferred_flush[iommu_id].next++;
5e0d2a6f 2707
2708 if (!timer_on) {
2709 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2710 timer_on = 1;
2711 }
2712 list_size++;
2713 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2714}
2715
ffbbef5c
FT
2716static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2717 size_t size, enum dma_data_direction dir,
2718 struct dma_attrs *attrs)
ba395927 2719{
ba395927 2720 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2721 struct dmar_domain *domain;
d794dc9b 2722 unsigned long start_pfn, last_pfn;
ba395927 2723 struct iova *iova;
8c11e798 2724 struct intel_iommu *iommu;
ba395927 2725
73676832 2726 if (iommu_no_mapping(dev))
f76aec76 2727 return;
2c2e2c38 2728
ba395927
KA
2729 domain = find_domain(pdev);
2730 BUG_ON(!domain);
2731
8c11e798
WH
2732 iommu = domain_get_iommu(domain);
2733
ba395927 2734 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2735 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2736 (unsigned long long)dev_addr))
ba395927 2737 return;
ba395927 2738
d794dc9b
DW
2739 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2740 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2741
d794dc9b
DW
2742 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2743 pci_name(pdev), start_pfn, last_pfn);
ba395927 2744
f76aec76 2745 /* clear the whole page */
d794dc9b
DW
2746 dma_pte_clear_range(domain, start_pfn, last_pfn);
2747
f76aec76 2748 /* free page tables */
d794dc9b
DW
2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2750
5e0d2a6f 2751 if (intel_iommu_strict) {
03d6a246 2752 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
d794dc9b 2753 last_pfn - start_pfn + 1);
5e0d2a6f 2754 /* free iova */
2755 __free_iova(&domain->iovad, iova);
2756 } else {
2757 add_unmap(domain, iova);
2758 /*
2759 * queue up the release of the unmap to save the 1/6th of the
2760 * cpu used up by the iotlb flush operation...
2761 */
5e0d2a6f 2762 }
ba395927
KA
2763}
2764
d7ab5c46
FT
2765static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2766 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2767{
2768 void *vaddr;
2769 int order;
2770
5b6985ce 2771 size = PAGE_ALIGN(size);
ba395927 2772 order = get_order(size);
e8bb910d
AW
2773
2774 if (!iommu_no_mapping(hwdev))
2775 flags &= ~(GFP_DMA | GFP_DMA32);
2776 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2777 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2778 flags |= GFP_DMA;
2779 else
2780 flags |= GFP_DMA32;
2781 }
ba395927
KA
2782
2783 vaddr = (void *)__get_free_pages(flags, order);
2784 if (!vaddr)
2785 return NULL;
2786 memset(vaddr, 0, size);
2787
bb9e6d65
FT
2788 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2789 DMA_BIDIRECTIONAL,
2790 hwdev->coherent_dma_mask);
ba395927
KA
2791 if (*dma_handle)
2792 return vaddr;
2793 free_pages((unsigned long)vaddr, order);
2794 return NULL;
2795}
2796
d7ab5c46
FT
2797static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2798 dma_addr_t dma_handle)
ba395927
KA
2799{
2800 int order;
2801
5b6985ce 2802 size = PAGE_ALIGN(size);
ba395927
KA
2803 order = get_order(size);
2804
0db9b7ae 2805 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2806 free_pages((unsigned long)vaddr, order);
2807}
2808
d7ab5c46
FT
2809static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2810 int nelems, enum dma_data_direction dir,
2811 struct dma_attrs *attrs)
ba395927 2812{
ba395927
KA
2813 struct pci_dev *pdev = to_pci_dev(hwdev);
2814 struct dmar_domain *domain;
d794dc9b 2815 unsigned long start_pfn, last_pfn;
f76aec76 2816 struct iova *iova;
8c11e798 2817 struct intel_iommu *iommu;
ba395927 2818
73676832 2819 if (iommu_no_mapping(hwdev))
ba395927
KA
2820 return;
2821
2822 domain = find_domain(pdev);
8c11e798
WH
2823 BUG_ON(!domain);
2824
2825 iommu = domain_get_iommu(domain);
ba395927 2826
c03ab37c 2827 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2828 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2829 (unsigned long long)sglist[0].dma_address))
f76aec76 2830 return;
f76aec76 2831
d794dc9b
DW
2832 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2833 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2834
2835 /* clear the whole page */
d794dc9b
DW
2836 dma_pte_clear_range(domain, start_pfn, last_pfn);
2837
f76aec76 2838 /* free page tables */
d794dc9b 2839 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2840
acea0018
DW
2841 if (intel_iommu_strict) {
2842 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2843 last_pfn - start_pfn + 1);
2844 /* free iova */
2845 __free_iova(&domain->iovad, iova);
2846 } else {
2847 add_unmap(domain, iova);
2848 /*
2849 * queue up the release of the unmap to save the 1/6th of the
2850 * cpu used up by the iotlb flush operation...
2851 */
2852 }
ba395927
KA
2853}
2854
ba395927 2855static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2856 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2857{
2858 int i;
c03ab37c 2859 struct scatterlist *sg;
ba395927 2860
c03ab37c 2861 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2862 BUG_ON(!sg_page(sg));
4cf2e75d 2863 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 2864 sg->dma_length = sg->length;
ba395927
KA
2865 }
2866 return nelems;
2867}
2868
d7ab5c46
FT
2869static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2870 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 2871{
ba395927 2872 int i;
ba395927
KA
2873 struct pci_dev *pdev = to_pci_dev(hwdev);
2874 struct dmar_domain *domain;
f76aec76
KA
2875 size_t size = 0;
2876 int prot = 0;
b536d24d 2877 size_t offset_pfn = 0;
f76aec76
KA
2878 struct iova *iova = NULL;
2879 int ret;
c03ab37c 2880 struct scatterlist *sg;
b536d24d 2881 unsigned long start_vpfn;
8c11e798 2882 struct intel_iommu *iommu;
ba395927
KA
2883
2884 BUG_ON(dir == DMA_NONE);
73676832 2885 if (iommu_no_mapping(hwdev))
c03ab37c 2886 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2887
f76aec76
KA
2888 domain = get_valid_domain_for_dev(pdev);
2889 if (!domain)
2890 return 0;
2891
8c11e798
WH
2892 iommu = domain_get_iommu(domain);
2893
b536d24d 2894 for_each_sg(sglist, sg, nelems, i)
88cb6a74 2895 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 2896
5a5e02a6
DW
2897 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2898 pdev->dma_mask);
f76aec76 2899 if (!iova) {
c03ab37c 2900 sglist->dma_length = 0;
f76aec76
KA
2901 return 0;
2902 }
2903
2904 /*
2905 * Check if DMAR supports zero-length reads on write only
2906 * mappings..
2907 */
2908 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2909 !cap_zlr(iommu->cap))
f76aec76
KA
2910 prot |= DMA_PTE_READ;
2911 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2912 prot |= DMA_PTE_WRITE;
2913
b536d24d 2914 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 2915
f532959b 2916 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
2917 if (unlikely(ret)) {
2918 /* clear the page */
2919 dma_pte_clear_range(domain, start_vpfn,
2920 start_vpfn + size - 1);
2921 /* free page tables */
2922 dma_pte_free_pagetable(domain, start_vpfn,
2923 start_vpfn + size - 1);
2924 /* free iova */
2925 __free_iova(&domain->iovad, iova);
2926 return 0;
ba395927
KA
2927 }
2928
1f0ef2aa
DW
2929 /* it's a non-present to present mapping. Only flush if caching mode */
2930 if (cap_caching_mode(iommu->cap))
03d6a246 2931 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
1f0ef2aa 2932 else
8c11e798 2933 iommu_flush_write_buffer(iommu);
1f0ef2aa 2934
ba395927
KA
2935 return nelems;
2936}
2937
dfb805e8
FT
2938static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2939{
2940 return !dma_addr;
2941}
2942
160c1d8e 2943struct dma_map_ops intel_dma_ops = {
ba395927
KA
2944 .alloc_coherent = intel_alloc_coherent,
2945 .free_coherent = intel_free_coherent,
ba395927
KA
2946 .map_sg = intel_map_sg,
2947 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
2948 .map_page = intel_map_page,
2949 .unmap_page = intel_unmap_page,
dfb805e8 2950 .mapping_error = intel_mapping_error,
ba395927
KA
2951};
2952
2953static inline int iommu_domain_cache_init(void)
2954{
2955 int ret = 0;
2956
2957 iommu_domain_cache = kmem_cache_create("iommu_domain",
2958 sizeof(struct dmar_domain),
2959 0,
2960 SLAB_HWCACHE_ALIGN,
2961
2962 NULL);
2963 if (!iommu_domain_cache) {
2964 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2965 ret = -ENOMEM;
2966 }
2967
2968 return ret;
2969}
2970
2971static inline int iommu_devinfo_cache_init(void)
2972{
2973 int ret = 0;
2974
2975 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2976 sizeof(struct device_domain_info),
2977 0,
2978 SLAB_HWCACHE_ALIGN,
ba395927
KA
2979 NULL);
2980 if (!iommu_devinfo_cache) {
2981 printk(KERN_ERR "Couldn't create devinfo cache\n");
2982 ret = -ENOMEM;
2983 }
2984
2985 return ret;
2986}
2987
2988static inline int iommu_iova_cache_init(void)
2989{
2990 int ret = 0;
2991
2992 iommu_iova_cache = kmem_cache_create("iommu_iova",
2993 sizeof(struct iova),
2994 0,
2995 SLAB_HWCACHE_ALIGN,
ba395927
KA
2996 NULL);
2997 if (!iommu_iova_cache) {
2998 printk(KERN_ERR "Couldn't create iova cache\n");
2999 ret = -ENOMEM;
3000 }
3001
3002 return ret;
3003}
3004
3005static int __init iommu_init_mempool(void)
3006{
3007 int ret;
3008 ret = iommu_iova_cache_init();
3009 if (ret)
3010 return ret;
3011
3012 ret = iommu_domain_cache_init();
3013 if (ret)
3014 goto domain_error;
3015
3016 ret = iommu_devinfo_cache_init();
3017 if (!ret)
3018 return ret;
3019
3020 kmem_cache_destroy(iommu_domain_cache);
3021domain_error:
3022 kmem_cache_destroy(iommu_iova_cache);
3023
3024 return -ENOMEM;
3025}
3026
3027static void __init iommu_exit_mempool(void)
3028{
3029 kmem_cache_destroy(iommu_devinfo_cache);
3030 kmem_cache_destroy(iommu_domain_cache);
3031 kmem_cache_destroy(iommu_iova_cache);
3032
3033}
3034
ba395927
KA
3035static void __init init_no_remapping_devices(void)
3036{
3037 struct dmar_drhd_unit *drhd;
3038
3039 for_each_drhd_unit(drhd) {
3040 if (!drhd->include_all) {
3041 int i;
3042 for (i = 0; i < drhd->devices_cnt; i++)
3043 if (drhd->devices[i] != NULL)
3044 break;
3045 /* ignore DMAR unit if no pci devices exist */
3046 if (i == drhd->devices_cnt)
3047 drhd->ignored = 1;
3048 }
3049 }
3050
3051 if (dmar_map_gfx)
3052 return;
3053
3054 for_each_drhd_unit(drhd) {
3055 int i;
3056 if (drhd->ignored || drhd->include_all)
3057 continue;
3058
3059 for (i = 0; i < drhd->devices_cnt; i++)
3060 if (drhd->devices[i] &&
3061 !IS_GFX_DEVICE(drhd->devices[i]))
3062 break;
3063
3064 if (i < drhd->devices_cnt)
3065 continue;
3066
3067 /* bypass IOMMU if it is just for gfx devices */
3068 drhd->ignored = 1;
3069 for (i = 0; i < drhd->devices_cnt; i++) {
3070 if (!drhd->devices[i])
3071 continue;
358dd8ac 3072 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3073 }
3074 }
3075}
3076
f59c7b69
FY
3077#ifdef CONFIG_SUSPEND
3078static int init_iommu_hw(void)
3079{
3080 struct dmar_drhd_unit *drhd;
3081 struct intel_iommu *iommu = NULL;
3082
3083 for_each_active_iommu(iommu, drhd)
3084 if (iommu->qi)
3085 dmar_reenable_qi(iommu);
3086
3087 for_each_active_iommu(iommu, drhd) {
3088 iommu_flush_write_buffer(iommu);
3089
3090 iommu_set_root_entry(iommu);
3091
3092 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3093 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3094 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3095 DMA_TLB_GLOBAL_FLUSH);
f59c7b69 3096 iommu_enable_translation(iommu);
b94996c9 3097 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3098 }
3099
3100 return 0;
3101}
3102
3103static void iommu_flush_all(void)
3104{
3105 struct dmar_drhd_unit *drhd;
3106 struct intel_iommu *iommu;
3107
3108 for_each_active_iommu(iommu, drhd) {
3109 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3110 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3111 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3112 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3113 }
3114}
3115
3116static int iommu_suspend(struct sys_device *dev, pm_message_t state)
3117{
3118 struct dmar_drhd_unit *drhd;
3119 struct intel_iommu *iommu = NULL;
3120 unsigned long flag;
3121
3122 for_each_active_iommu(iommu, drhd) {
3123 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3124 GFP_ATOMIC);
3125 if (!iommu->iommu_state)
3126 goto nomem;
3127 }
3128
3129 iommu_flush_all();
3130
3131 for_each_active_iommu(iommu, drhd) {
3132 iommu_disable_translation(iommu);
3133
3134 spin_lock_irqsave(&iommu->register_lock, flag);
3135
3136 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3137 readl(iommu->reg + DMAR_FECTL_REG);
3138 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3139 readl(iommu->reg + DMAR_FEDATA_REG);
3140 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3141 readl(iommu->reg + DMAR_FEADDR_REG);
3142 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3143 readl(iommu->reg + DMAR_FEUADDR_REG);
3144
3145 spin_unlock_irqrestore(&iommu->register_lock, flag);
3146 }
3147 return 0;
3148
3149nomem:
3150 for_each_active_iommu(iommu, drhd)
3151 kfree(iommu->iommu_state);
3152
3153 return -ENOMEM;
3154}
3155
3156static int iommu_resume(struct sys_device *dev)
3157{
3158 struct dmar_drhd_unit *drhd;
3159 struct intel_iommu *iommu = NULL;
3160 unsigned long flag;
3161
3162 if (init_iommu_hw()) {
3163 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3164 return -EIO;
3165 }
3166
3167 for_each_active_iommu(iommu, drhd) {
3168
3169 spin_lock_irqsave(&iommu->register_lock, flag);
3170
3171 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3172 iommu->reg + DMAR_FECTL_REG);
3173 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3174 iommu->reg + DMAR_FEDATA_REG);
3175 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3176 iommu->reg + DMAR_FEADDR_REG);
3177 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3178 iommu->reg + DMAR_FEUADDR_REG);
3179
3180 spin_unlock_irqrestore(&iommu->register_lock, flag);
3181 }
3182
3183 for_each_active_iommu(iommu, drhd)
3184 kfree(iommu->iommu_state);
3185
3186 return 0;
3187}
3188
3189static struct sysdev_class iommu_sysclass = {
3190 .name = "iommu",
3191 .resume = iommu_resume,
3192 .suspend = iommu_suspend,
3193};
3194
3195static struct sys_device device_iommu = {
3196 .cls = &iommu_sysclass,
3197};
3198
3199static int __init init_iommu_sysfs(void)
3200{
3201 int error;
3202
3203 error = sysdev_class_register(&iommu_sysclass);
3204 if (error)
3205 return error;
3206
3207 error = sysdev_register(&device_iommu);
3208 if (error)
3209 sysdev_class_unregister(&iommu_sysclass);
3210
3211 return error;
3212}
3213
3214#else
3215static int __init init_iommu_sysfs(void)
3216{
3217 return 0;
3218}
3219#endif /* CONFIG_PM */
3220
99dcaded
FY
3221/*
3222 * Here we only respond to action of unbound device from driver.
3223 *
3224 * Added device is not attached to its DMAR domain here yet. That will happen
3225 * when mapping the device to iova.
3226 */
3227static int device_notifier(struct notifier_block *nb,
3228 unsigned long action, void *data)
3229{
3230 struct device *dev = data;
3231 struct pci_dev *pdev = to_pci_dev(dev);
3232 struct dmar_domain *domain;
3233
44cd613c
DW
3234 if (iommu_no_mapping(dev))
3235 return 0;
3236
99dcaded
FY
3237 domain = find_domain(pdev);
3238 if (!domain)
3239 return 0;
3240
3241 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
3242 domain_remove_one_dev_info(domain, pdev);
3243
3244 return 0;
3245}
3246
3247static struct notifier_block device_nb = {
3248 .notifier_call = device_notifier,
3249};
3250
ba395927
KA
3251int __init intel_iommu_init(void)
3252{
3253 int ret = 0;
a59b50e9 3254 int force_on = 0;
ba395927 3255
a59b50e9
JC
3256 /* VT-d is required for a TXT/tboot launch, so enforce that */
3257 force_on = tboot_force_iommu();
3258
3259 if (dmar_table_init()) {
3260 if (force_on)
3261 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3262 return -ENODEV;
a59b50e9 3263 }
ba395927 3264
a59b50e9
JC
3265 if (dmar_dev_scope_init()) {
3266 if (force_on)
3267 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3268 return -ENODEV;
a59b50e9 3269 }
1886e8a9 3270
2ae21010
SS
3271 /*
3272 * Check the need for DMA-remapping initialization now.
3273 * Above initialization will also be used by Interrupt-remapping.
3274 */
75f1cdf1 3275 if (no_iommu || dmar_disabled)
2ae21010
SS
3276 return -ENODEV;
3277
ba395927
KA
3278 iommu_init_mempool();
3279 dmar_init_reserved_ranges();
3280
3281 init_no_remapping_devices();
3282
3283 ret = init_dmars();
3284 if (ret) {
a59b50e9
JC
3285 if (force_on)
3286 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3287 printk(KERN_ERR "IOMMU: dmar init failed\n");
3288 put_iova_domain(&reserved_iova_list);
3289 iommu_exit_mempool();
3290 return ret;
3291 }
3292 printk(KERN_INFO
3293 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3294
5e0d2a6f 3295 init_timer(&unmap_timer);
75f1cdf1
FT
3296#ifdef CONFIG_SWIOTLB
3297 swiotlb = 0;
3298#endif
19943b0e 3299 dma_ops = &intel_dma_ops;
4ed0d3e6 3300
f59c7b69 3301 init_iommu_sysfs();
a8bcbb0d
JR
3302
3303 register_iommu(&intel_iommu_ops);
3304
99dcaded
FY
3305 bus_register_notifier(&pci_bus_type, &device_nb);
3306
ba395927
KA
3307 return 0;
3308}
e820482c 3309
3199aa6b
HW
3310static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3311 struct pci_dev *pdev)
3312{
3313 struct pci_dev *tmp, *parent;
3314
3315 if (!iommu || !pdev)
3316 return;
3317
3318 /* dependent device detach */
3319 tmp = pci_find_upstream_pcie_bridge(pdev);
3320 /* Secondary interface's bus number and devfn 0 */
3321 if (tmp) {
3322 parent = pdev->bus->self;
3323 while (parent != tmp) {
3324 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3325 parent->devfn);
3199aa6b
HW
3326 parent = parent->bus->self;
3327 }
45e829ea 3328 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3329 iommu_detach_dev(iommu,
3330 tmp->subordinate->number, 0);
3331 else /* this is a legacy PCI bridge */
276dbf99
DW
3332 iommu_detach_dev(iommu, tmp->bus->number,
3333 tmp->devfn);
3199aa6b
HW
3334 }
3335}
3336
2c2e2c38 3337static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3338 struct pci_dev *pdev)
3339{
3340 struct device_domain_info *info;
3341 struct intel_iommu *iommu;
3342 unsigned long flags;
3343 int found = 0;
3344 struct list_head *entry, *tmp;
3345
276dbf99
DW
3346 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3347 pdev->devfn);
c7151a8d
WH
3348 if (!iommu)
3349 return;
3350
3351 spin_lock_irqsave(&device_domain_lock, flags);
3352 list_for_each_safe(entry, tmp, &domain->devices) {
3353 info = list_entry(entry, struct device_domain_info, link);
276dbf99 3354 /* No need to compare PCI domain; it has to be the same */
c7151a8d
WH
3355 if (info->bus == pdev->bus->number &&
3356 info->devfn == pdev->devfn) {
3357 list_del(&info->link);
3358 list_del(&info->global);
3359 if (info->dev)
3360 info->dev->dev.archdata.iommu = NULL;
3361 spin_unlock_irqrestore(&device_domain_lock, flags);
3362
93a23a72 3363 iommu_disable_dev_iotlb(info);
c7151a8d 3364 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3365 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3366 free_devinfo_mem(info);
3367
3368 spin_lock_irqsave(&device_domain_lock, flags);
3369
3370 if (found)
3371 break;
3372 else
3373 continue;
3374 }
3375
3376 /* if there is no other devices under the same iommu
3377 * owned by this domain, clear this iommu in iommu_bmp
3378 * update iommu count and coherency
3379 */
276dbf99
DW
3380 if (iommu == device_to_iommu(info->segment, info->bus,
3381 info->devfn))
c7151a8d
WH
3382 found = 1;
3383 }
3384
3385 if (found == 0) {
3386 unsigned long tmp_flags;
3387 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3388 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3389 domain->iommu_count--;
58c610bd 3390 domain_update_iommu_cap(domain);
c7151a8d
WH
3391 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3392 }
3393
3394 spin_unlock_irqrestore(&device_domain_lock, flags);
3395}
3396
3397static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3398{
3399 struct device_domain_info *info;
3400 struct intel_iommu *iommu;
3401 unsigned long flags1, flags2;
3402
3403 spin_lock_irqsave(&device_domain_lock, flags1);
3404 while (!list_empty(&domain->devices)) {
3405 info = list_entry(domain->devices.next,
3406 struct device_domain_info, link);
3407 list_del(&info->link);
3408 list_del(&info->global);
3409 if (info->dev)
3410 info->dev->dev.archdata.iommu = NULL;
3411
3412 spin_unlock_irqrestore(&device_domain_lock, flags1);
3413
93a23a72 3414 iommu_disable_dev_iotlb(info);
276dbf99 3415 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3416 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3417 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3418
3419 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3420 * and capabilities
c7151a8d
WH
3421 */
3422 spin_lock_irqsave(&domain->iommu_lock, flags2);
3423 if (test_and_clear_bit(iommu->seq_id,
3424 &domain->iommu_bmp)) {
3425 domain->iommu_count--;
58c610bd 3426 domain_update_iommu_cap(domain);
c7151a8d
WH
3427 }
3428 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3429
3430 free_devinfo_mem(info);
3431 spin_lock_irqsave(&device_domain_lock, flags1);
3432 }
3433 spin_unlock_irqrestore(&device_domain_lock, flags1);
3434}
3435
5e98c4b1
WH
3436/* domain id for virtual machine, it won't be set in context */
3437static unsigned long vm_domid;
3438
fe40f1e0
WH
3439static int vm_domain_min_agaw(struct dmar_domain *domain)
3440{
3441 int i;
3442 int min_agaw = domain->agaw;
3443
3444 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
3445 for (; i < g_num_of_iommus; ) {
3446 if (min_agaw > g_iommus[i]->agaw)
3447 min_agaw = g_iommus[i]->agaw;
3448
3449 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
3450 }
3451
3452 return min_agaw;
3453}
3454
5e98c4b1
WH
3455static struct dmar_domain *iommu_alloc_vm_domain(void)
3456{
3457 struct dmar_domain *domain;
3458
3459 domain = alloc_domain_mem();
3460 if (!domain)
3461 return NULL;
3462
3463 domain->id = vm_domid++;
4c923d47 3464 domain->nid = -1;
5e98c4b1
WH
3465 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3466 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3467
3468 return domain;
3469}
3470
2c2e2c38 3471static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3472{
3473 int adjust_width;
3474
3475 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3476 spin_lock_init(&domain->iommu_lock);
3477
3478 domain_reserve_special_ranges(domain);
3479
3480 /* calculate AGAW */
3481 domain->gaw = guest_width;
3482 adjust_width = guestwidth_to_adjustwidth(guest_width);
3483 domain->agaw = width_to_agaw(adjust_width);
3484
3485 INIT_LIST_HEAD(&domain->devices);
3486
3487 domain->iommu_count = 0;
3488 domain->iommu_coherency = 0;
c5b15255 3489 domain->iommu_snooping = 0;
fe40f1e0 3490 domain->max_addr = 0;
4c923d47 3491 domain->nid = -1;
5e98c4b1
WH
3492
3493 /* always allocate the top pgd */
4c923d47 3494 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3495 if (!domain->pgd)
3496 return -ENOMEM;
3497 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3498 return 0;
3499}
3500
3501static void iommu_free_vm_domain(struct dmar_domain *domain)
3502{
3503 unsigned long flags;
3504 struct dmar_drhd_unit *drhd;
3505 struct intel_iommu *iommu;
3506 unsigned long i;
3507 unsigned long ndomains;
3508
3509 for_each_drhd_unit(drhd) {
3510 if (drhd->ignored)
3511 continue;
3512 iommu = drhd->iommu;
3513
3514 ndomains = cap_ndoms(iommu->cap);
3515 i = find_first_bit(iommu->domain_ids, ndomains);
3516 for (; i < ndomains; ) {
3517 if (iommu->domains[i] == domain) {
3518 spin_lock_irqsave(&iommu->lock, flags);
3519 clear_bit(i, iommu->domain_ids);
3520 iommu->domains[i] = NULL;
3521 spin_unlock_irqrestore(&iommu->lock, flags);
3522 break;
3523 }
3524 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
3525 }
3526 }
3527}
3528
3529static void vm_domain_exit(struct dmar_domain *domain)
3530{
5e98c4b1
WH
3531 /* Domain 0 is reserved, so dont process it */
3532 if (!domain)
3533 return;
3534
3535 vm_domain_remove_all_dev_info(domain);
3536 /* destroy iovas */
3537 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3538
3539 /* clear ptes */
595badf5 3540 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3541
3542 /* free page tables */
d794dc9b 3543 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3544
3545 iommu_free_vm_domain(domain);
3546 free_domain_mem(domain);
3547}
3548
5d450806 3549static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3550{
5d450806 3551 struct dmar_domain *dmar_domain;
38717946 3552
5d450806
JR
3553 dmar_domain = iommu_alloc_vm_domain();
3554 if (!dmar_domain) {
38717946 3555 printk(KERN_ERR
5d450806
JR
3556 "intel_iommu_domain_init: dmar_domain == NULL\n");
3557 return -ENOMEM;
38717946 3558 }
2c2e2c38 3559 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3560 printk(KERN_ERR
5d450806
JR
3561 "intel_iommu_domain_init() failed\n");
3562 vm_domain_exit(dmar_domain);
3563 return -ENOMEM;
38717946 3564 }
5d450806 3565 domain->priv = dmar_domain;
faa3d6f5 3566
5d450806 3567 return 0;
38717946 3568}
38717946 3569
5d450806 3570static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3571{
5d450806
JR
3572 struct dmar_domain *dmar_domain = domain->priv;
3573
3574 domain->priv = NULL;
3575 vm_domain_exit(dmar_domain);
38717946 3576}
38717946 3577
4c5478c9
JR
3578static int intel_iommu_attach_device(struct iommu_domain *domain,
3579 struct device *dev)
38717946 3580{
4c5478c9
JR
3581 struct dmar_domain *dmar_domain = domain->priv;
3582 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3583 struct intel_iommu *iommu;
3584 int addr_width;
3585 u64 end;
faa3d6f5
WH
3586
3587 /* normally pdev is not mapped */
3588 if (unlikely(domain_context_mapped(pdev))) {
3589 struct dmar_domain *old_domain;
3590
3591 old_domain = find_domain(pdev);
3592 if (old_domain) {
2c2e2c38
FY
3593 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3594 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3595 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3596 else
3597 domain_remove_dev_info(old_domain);
3598 }
3599 }
3600
276dbf99
DW
3601 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3602 pdev->devfn);
fe40f1e0
WH
3603 if (!iommu)
3604 return -ENODEV;
3605
3606 /* check if this iommu agaw is sufficient for max mapped address */
3607 addr_width = agaw_to_width(iommu->agaw);
3608 end = DOMAIN_MAX_ADDR(addr_width);
3609 end = end & VTD_PAGE_MASK;
4c5478c9 3610 if (end < dmar_domain->max_addr) {
fe40f1e0
WH
3611 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3612 "sufficient for the mapped address (%llx)\n",
4c5478c9 3613 __func__, iommu->agaw, dmar_domain->max_addr);
fe40f1e0
WH
3614 return -EFAULT;
3615 }
3616
5fe60f4e 3617 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3618}
38717946 3619
4c5478c9
JR
3620static void intel_iommu_detach_device(struct iommu_domain *domain,
3621 struct device *dev)
38717946 3622{
4c5478c9
JR
3623 struct dmar_domain *dmar_domain = domain->priv;
3624 struct pci_dev *pdev = to_pci_dev(dev);
3625
2c2e2c38 3626 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3627}
c7151a8d 3628
dde57a21
JR
3629static int intel_iommu_map_range(struct iommu_domain *domain,
3630 unsigned long iova, phys_addr_t hpa,
3631 size_t size, int iommu_prot)
faa3d6f5 3632{
dde57a21 3633 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
3634 u64 max_addr;
3635 int addr_width;
dde57a21 3636 int prot = 0;
faa3d6f5 3637 int ret;
fe40f1e0 3638
dde57a21
JR
3639 if (iommu_prot & IOMMU_READ)
3640 prot |= DMA_PTE_READ;
3641 if (iommu_prot & IOMMU_WRITE)
3642 prot |= DMA_PTE_WRITE;
9cf06697
SY
3643 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3644 prot |= DMA_PTE_SNP;
dde57a21 3645
163cc52c 3646 max_addr = iova + size;
dde57a21 3647 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3648 int min_agaw;
3649 u64 end;
3650
3651 /* check if minimum agaw is sufficient for mapped address */
dde57a21 3652 min_agaw = vm_domain_min_agaw(dmar_domain);
fe40f1e0
WH
3653 addr_width = agaw_to_width(min_agaw);
3654 end = DOMAIN_MAX_ADDR(addr_width);
3655 end = end & VTD_PAGE_MASK;
3656 if (end < max_addr) {
3657 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3658 "sufficient for the mapped address (%llx)\n",
3659 __func__, min_agaw, max_addr);
3660 return -EFAULT;
3661 }
dde57a21 3662 dmar_domain->max_addr = max_addr;
fe40f1e0 3663 }
ad051221
DW
3664 /* Round up size to next multiple of PAGE_SIZE, if it and
3665 the low bits of hpa would take us onto the next page */
88cb6a74 3666 size = aligned_nrpages(hpa, size);
ad051221
DW
3667 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3668 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3669 return ret;
38717946 3670}
38717946 3671
dde57a21
JR
3672static void intel_iommu_unmap_range(struct iommu_domain *domain,
3673 unsigned long iova, size_t size)
38717946 3674{
dde57a21 3675 struct dmar_domain *dmar_domain = domain->priv;
faa3d6f5 3676
4b99d352
SY
3677 if (!size)
3678 return;
3679
163cc52c
DW
3680 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3681 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3682
163cc52c
DW
3683 if (dmar_domain->max_addr == iova + size)
3684 dmar_domain->max_addr = iova;
38717946 3685}
38717946 3686
d14d6577
JR
3687static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3688 unsigned long iova)
38717946 3689{
d14d6577 3690 struct dmar_domain *dmar_domain = domain->priv;
38717946 3691 struct dma_pte *pte;
faa3d6f5 3692 u64 phys = 0;
38717946 3693
b026fd28 3694 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
38717946 3695 if (pte)
faa3d6f5 3696 phys = dma_pte_addr(pte);
38717946 3697
faa3d6f5 3698 return phys;
38717946 3699}
a8bcbb0d 3700
dbb9fd86
SY
3701static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3702 unsigned long cap)
3703{
3704 struct dmar_domain *dmar_domain = domain->priv;
3705
3706 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3707 return dmar_domain->iommu_snooping;
3708
3709 return 0;
3710}
3711
a8bcbb0d
JR
3712static struct iommu_ops intel_iommu_ops = {
3713 .domain_init = intel_iommu_domain_init,
3714 .domain_destroy = intel_iommu_domain_destroy,
3715 .attach_dev = intel_iommu_attach_device,
3716 .detach_dev = intel_iommu_detach_device,
3717 .map = intel_iommu_map_range,
3718 .unmap = intel_iommu_unmap_range,
3719 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3720 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3721};
9af88143
DW
3722
3723static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3724{
3725 /*
3726 * Mobile 4 Series Chipset neglects to set RWBF capability,
3727 * but needs it:
3728 */
3729 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3730 rwbf_quirk = 1;
3731}
3732
3733DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b
DW
3734
3735/* On Tylersburg chipsets, some BIOSes have been known to enable the
3736 ISOCH DMAR unit for the Azalia sound device, but not give it any
3737 TLB entries, which causes it to deadlock. Check for that. We do
3738 this in a function called from init_dmars(), instead of in a PCI
3739 quirk, because we don't want to print the obnoxious "BIOS broken"
3740 message if VT-d is actually disabled.
3741*/
3742static void __init check_tylersburg_isoch(void)
3743{
3744 struct pci_dev *pdev;
3745 uint32_t vtisochctrl;
3746
3747 /* If there's no Azalia in the system anyway, forget it. */
3748 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3749 if (!pdev)
3750 return;
3751 pci_dev_put(pdev);
3752
3753 /* System Management Registers. Might be hidden, in which case
3754 we can't do the sanity check. But that's OK, because the
3755 known-broken BIOSes _don't_ actually hide it, so far. */
3756 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3757 if (!pdev)
3758 return;
3759
3760 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3761 pci_dev_put(pdev);
3762 return;
3763 }
3764
3765 pci_dev_put(pdev);
3766
3767 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3768 if (vtisochctrl & 1)
3769 return;
3770
3771 /* Drop all bits other than the number of TLB entries */
3772 vtisochctrl &= 0x1c;
3773
3774 /* If we have the recommended number of TLB entries (16), fine. */
3775 if (vtisochctrl == 0x10)
3776 return;
3777
3778 /* Zero TLB entries? You get to ride the short bus to school. */
3779 if (!vtisochctrl) {
3780 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3781 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3782 dmi_get_system_info(DMI_BIOS_VENDOR),
3783 dmi_get_system_info(DMI_BIOS_VERSION),
3784 dmi_get_system_info(DMI_PRODUCT_VERSION));
3785 iommu_identity_mapping |= IDENTMAP_AZALIA;
3786 return;
3787 }
3788
3789 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3790 vtisochctrl);
3791}