calculate agaw for each iommu
[linux-2.6-block.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
d9630fe9
WH
60/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
46b08e1a
MM
63/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
7a8fc25e
MM
96/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
c07e7d21
MM
111
112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
7a8fc25e 126#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
7a8fc25e 158
622ba12a
MM
159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
622ba12a 170
19c239ce
MM
171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
622ba12a 205
3b5410e7
WH
206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
99126f7c
MM
209struct dmar_domain {
210 int id; /* domain id */
8c11e798 211 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
212
213 struct list_head devices; /* all devices' list */
214 struct iova_domain iovad; /* iova's that belong to this domain */
215
216 struct dma_pte *pgd; /* virtual address */
217 spinlock_t mapping_lock; /* page table lock */
218 int gaw; /* max guest address width */
219
220 /* adjusted guest address width, 0 is level 2 30-bit */
221 int agaw;
222
3b5410e7 223 int flags; /* flags to find out type of domain */
99126f7c
MM
224};
225
a647dacb
MM
226/* PCI domain-device relationship */
227struct device_domain_info {
228 struct list_head link; /* link to domain siblings */
229 struct list_head global; /* link to global list */
230 u8 bus; /* PCI bus numer */
231 u8 devfn; /* PCI devfn number */
232 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
233 struct dmar_domain *domain; /* pointer to domain */
234};
235
5e0d2a6f 236static void flush_unmaps_timeout(unsigned long data);
237
238DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
239
80b20dd8 240#define HIGH_WATER_MARK 250
241struct deferred_flush_tables {
242 int next;
243 struct iova *iova[HIGH_WATER_MARK];
244 struct dmar_domain *domain[HIGH_WATER_MARK];
245};
246
247static struct deferred_flush_tables *deferred_flush;
248
5e0d2a6f 249/* bitmap for indexing intel_iommus */
5e0d2a6f 250static int g_num_of_iommus;
251
252static DEFINE_SPINLOCK(async_umap_flush_lock);
253static LIST_HEAD(unmaps_to_do);
254
255static int timer_on;
256static long list_size;
5e0d2a6f 257
ba395927
KA
258static void domain_remove_dev_info(struct dmar_domain *domain);
259
2ae21010 260int dmar_disabled;
ba395927 261static int __initdata dmar_map_gfx = 1;
7d3b03ce 262static int dmar_forcedac;
5e0d2a6f 263static int intel_iommu_strict;
ba395927
KA
264
265#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
266static DEFINE_SPINLOCK(device_domain_lock);
267static LIST_HEAD(device_domain_list);
268
269static int __init intel_iommu_setup(char *str)
270{
271 if (!str)
272 return -EINVAL;
273 while (*str) {
274 if (!strncmp(str, "off", 3)) {
275 dmar_disabled = 1;
276 printk(KERN_INFO"Intel-IOMMU: disabled\n");
277 } else if (!strncmp(str, "igfx_off", 8)) {
278 dmar_map_gfx = 0;
279 printk(KERN_INFO
280 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 281 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 282 printk(KERN_INFO
7d3b03ce
KA
283 "Intel-IOMMU: Forcing DAC for PCI devices\n");
284 dmar_forcedac = 1;
5e0d2a6f 285 } else if (!strncmp(str, "strict", 6)) {
286 printk(KERN_INFO
287 "Intel-IOMMU: disable batched IOTLB flush\n");
288 intel_iommu_strict = 1;
ba395927
KA
289 }
290
291 str += strcspn(str, ",");
292 while (*str == ',')
293 str++;
294 }
295 return 0;
296}
297__setup("intel_iommu=", intel_iommu_setup);
298
299static struct kmem_cache *iommu_domain_cache;
300static struct kmem_cache *iommu_devinfo_cache;
301static struct kmem_cache *iommu_iova_cache;
302
eb3fa7cb
KA
303static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
304{
305 unsigned int flags;
306 void *vaddr;
307
308 /* trying to avoid low memory issues */
309 flags = current->flags & PF_MEMALLOC;
310 current->flags |= PF_MEMALLOC;
311 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
312 current->flags &= (~PF_MEMALLOC | flags);
313 return vaddr;
314}
315
316
ba395927
KA
317static inline void *alloc_pgtable_page(void)
318{
eb3fa7cb
KA
319 unsigned int flags;
320 void *vaddr;
321
322 /* trying to avoid low memory issues */
323 flags = current->flags & PF_MEMALLOC;
324 current->flags |= PF_MEMALLOC;
325 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
326 current->flags &= (~PF_MEMALLOC | flags);
327 return vaddr;
ba395927
KA
328}
329
330static inline void free_pgtable_page(void *vaddr)
331{
332 free_page((unsigned long)vaddr);
333}
334
335static inline void *alloc_domain_mem(void)
336{
eb3fa7cb 337 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
338}
339
38717946 340static void free_domain_mem(void *vaddr)
ba395927
KA
341{
342 kmem_cache_free(iommu_domain_cache, vaddr);
343}
344
345static inline void * alloc_devinfo_mem(void)
346{
eb3fa7cb 347 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
348}
349
350static inline void free_devinfo_mem(void *vaddr)
351{
352 kmem_cache_free(iommu_devinfo_cache, vaddr);
353}
354
355struct iova *alloc_iova_mem(void)
356{
eb3fa7cb 357 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
358}
359
360void free_iova_mem(struct iova *iova)
361{
362 kmem_cache_free(iommu_iova_cache, iova);
363}
364
1b573683
WH
365
366static inline int width_to_agaw(int width);
367
368/* calculate agaw for each iommu.
369 * "SAGAW" may be different across iommus, use a default agaw, and
370 * get a supported less agaw for iommus that don't support the default agaw.
371 */
372int iommu_calculate_agaw(struct intel_iommu *iommu)
373{
374 unsigned long sagaw;
375 int agaw = -1;
376
377 sagaw = cap_sagaw(iommu->cap);
378 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
379 agaw >= 0; agaw--) {
380 if (test_bit(agaw, &sagaw))
381 break;
382 }
383
384 return agaw;
385}
386
8c11e798
WH
387/* in native case, each domain is related to only one iommu */
388static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
389{
390 int iommu_id;
391
392 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
393 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
394 return NULL;
395
396 return g_iommus[iommu_id];
397}
398
ba395927
KA
399/* Gets context entry for a given bus and devfn */
400static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
401 u8 bus, u8 devfn)
402{
403 struct root_entry *root;
404 struct context_entry *context;
405 unsigned long phy_addr;
406 unsigned long flags;
407
408 spin_lock_irqsave(&iommu->lock, flags);
409 root = &iommu->root_entry[bus];
410 context = get_context_addr_from_root(root);
411 if (!context) {
412 context = (struct context_entry *)alloc_pgtable_page();
413 if (!context) {
414 spin_unlock_irqrestore(&iommu->lock, flags);
415 return NULL;
416 }
5b6985ce 417 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
418 phy_addr = virt_to_phys((void *)context);
419 set_root_value(root, phy_addr);
420 set_root_present(root);
421 __iommu_flush_cache(iommu, root, sizeof(*root));
422 }
423 spin_unlock_irqrestore(&iommu->lock, flags);
424 return &context[devfn];
425}
426
427static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
428{
429 struct root_entry *root;
430 struct context_entry *context;
431 int ret;
432 unsigned long flags;
433
434 spin_lock_irqsave(&iommu->lock, flags);
435 root = &iommu->root_entry[bus];
436 context = get_context_addr_from_root(root);
437 if (!context) {
438 ret = 0;
439 goto out;
440 }
c07e7d21 441 ret = context_present(&context[devfn]);
ba395927
KA
442out:
443 spin_unlock_irqrestore(&iommu->lock, flags);
444 return ret;
445}
446
447static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
448{
449 struct root_entry *root;
450 struct context_entry *context;
451 unsigned long flags;
452
453 spin_lock_irqsave(&iommu->lock, flags);
454 root = &iommu->root_entry[bus];
455 context = get_context_addr_from_root(root);
456 if (context) {
c07e7d21 457 context_clear_entry(&context[devfn]);
ba395927
KA
458 __iommu_flush_cache(iommu, &context[devfn], \
459 sizeof(*context));
460 }
461 spin_unlock_irqrestore(&iommu->lock, flags);
462}
463
464static void free_context_table(struct intel_iommu *iommu)
465{
466 struct root_entry *root;
467 int i;
468 unsigned long flags;
469 struct context_entry *context;
470
471 spin_lock_irqsave(&iommu->lock, flags);
472 if (!iommu->root_entry) {
473 goto out;
474 }
475 for (i = 0; i < ROOT_ENTRY_NR; i++) {
476 root = &iommu->root_entry[i];
477 context = get_context_addr_from_root(root);
478 if (context)
479 free_pgtable_page(context);
480 }
481 free_pgtable_page(iommu->root_entry);
482 iommu->root_entry = NULL;
483out:
484 spin_unlock_irqrestore(&iommu->lock, flags);
485}
486
487/* page table handling */
488#define LEVEL_STRIDE (9)
489#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
490
491static inline int agaw_to_level(int agaw)
492{
493 return agaw + 2;
494}
495
496static inline int agaw_to_width(int agaw)
497{
498 return 30 + agaw * LEVEL_STRIDE;
499
500}
501
502static inline int width_to_agaw(int width)
503{
504 return (width - 30) / LEVEL_STRIDE;
505}
506
507static inline unsigned int level_to_offset_bits(int level)
508{
509 return (12 + (level - 1) * LEVEL_STRIDE);
510}
511
512static inline int address_level_offset(u64 addr, int level)
513{
514 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
515}
516
517static inline u64 level_mask(int level)
518{
519 return ((u64)-1 << level_to_offset_bits(level));
520}
521
522static inline u64 level_size(int level)
523{
524 return ((u64)1 << level_to_offset_bits(level));
525}
526
527static inline u64 align_to_level(u64 addr, int level)
528{
529 return ((addr + level_size(level) - 1) & level_mask(level));
530}
531
532static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
533{
534 int addr_width = agaw_to_width(domain->agaw);
535 struct dma_pte *parent, *pte = NULL;
536 int level = agaw_to_level(domain->agaw);
537 int offset;
538 unsigned long flags;
8c11e798 539 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
540
541 BUG_ON(!domain->pgd);
542
543 addr &= (((u64)1) << addr_width) - 1;
544 parent = domain->pgd;
545
546 spin_lock_irqsave(&domain->mapping_lock, flags);
547 while (level > 0) {
548 void *tmp_page;
549
550 offset = address_level_offset(addr, level);
551 pte = &parent[offset];
552 if (level == 1)
553 break;
554
19c239ce 555 if (!dma_pte_present(pte)) {
ba395927
KA
556 tmp_page = alloc_pgtable_page();
557
558 if (!tmp_page) {
559 spin_unlock_irqrestore(&domain->mapping_lock,
560 flags);
561 return NULL;
562 }
8c11e798 563 __iommu_flush_cache(iommu, tmp_page,
5b6985ce 564 PAGE_SIZE);
19c239ce 565 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
566 /*
567 * high level table always sets r/w, last level page
568 * table control read/write
569 */
19c239ce
MM
570 dma_set_pte_readable(pte);
571 dma_set_pte_writable(pte);
8c11e798 572 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927 573 }
19c239ce 574 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
575 level--;
576 }
577
578 spin_unlock_irqrestore(&domain->mapping_lock, flags);
579 return pte;
580}
581
582/* return address's pte at specific level */
583static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
584 int level)
585{
586 struct dma_pte *parent, *pte = NULL;
587 int total = agaw_to_level(domain->agaw);
588 int offset;
589
590 parent = domain->pgd;
591 while (level <= total) {
592 offset = address_level_offset(addr, total);
593 pte = &parent[offset];
594 if (level == total)
595 return pte;
596
19c239ce 597 if (!dma_pte_present(pte))
ba395927 598 break;
19c239ce 599 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
600 total--;
601 }
602 return NULL;
603}
604
605/* clear one page's page table */
606static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
607{
608 struct dma_pte *pte = NULL;
8c11e798 609 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
610
611 /* get last level pte */
612 pte = dma_addr_level_pte(domain, addr, 1);
613
614 if (pte) {
19c239ce 615 dma_clear_pte(pte);
8c11e798 616 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
617 }
618}
619
620/* clear last level pte, a tlb flush should be followed */
621static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
622{
623 int addr_width = agaw_to_width(domain->agaw);
624
625 start &= (((u64)1) << addr_width) - 1;
626 end &= (((u64)1) << addr_width) - 1;
627 /* in case it's partial page */
5b6985ce
FY
628 start = PAGE_ALIGN(start);
629 end &= PAGE_MASK;
ba395927
KA
630
631 /* we don't need lock here, nobody else touches the iova range */
632 while (start < end) {
633 dma_pte_clear_one(domain, start);
5b6985ce 634 start += VTD_PAGE_SIZE;
ba395927
KA
635 }
636}
637
638/* free page table pages. last level pte should already be cleared */
639static void dma_pte_free_pagetable(struct dmar_domain *domain,
640 u64 start, u64 end)
641{
642 int addr_width = agaw_to_width(domain->agaw);
643 struct dma_pte *pte;
644 int total = agaw_to_level(domain->agaw);
645 int level;
646 u64 tmp;
8c11e798 647 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
648
649 start &= (((u64)1) << addr_width) - 1;
650 end &= (((u64)1) << addr_width) - 1;
651
652 /* we don't need lock here, nobody else touches the iova range */
653 level = 2;
654 while (level <= total) {
655 tmp = align_to_level(start, level);
656 if (tmp >= end || (tmp + level_size(level) > end))
657 return;
658
659 while (tmp < end) {
660 pte = dma_addr_level_pte(domain, tmp, level);
661 if (pte) {
662 free_pgtable_page(
19c239ce
MM
663 phys_to_virt(dma_pte_addr(pte)));
664 dma_clear_pte(pte);
8c11e798 665 __iommu_flush_cache(iommu,
ba395927
KA
666 pte, sizeof(*pte));
667 }
668 tmp += level_size(level);
669 }
670 level++;
671 }
672 /* free pgd */
673 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
674 free_pgtable_page(domain->pgd);
675 domain->pgd = NULL;
676 }
677}
678
679/* iommu handling */
680static int iommu_alloc_root_entry(struct intel_iommu *iommu)
681{
682 struct root_entry *root;
683 unsigned long flags;
684
685 root = (struct root_entry *)alloc_pgtable_page();
686 if (!root)
687 return -ENOMEM;
688
5b6985ce 689 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
690
691 spin_lock_irqsave(&iommu->lock, flags);
692 iommu->root_entry = root;
693 spin_unlock_irqrestore(&iommu->lock, flags);
694
695 return 0;
696}
697
ba395927
KA
698static void iommu_set_root_entry(struct intel_iommu *iommu)
699{
700 void *addr;
701 u32 cmd, sts;
702 unsigned long flag;
703
704 addr = iommu->root_entry;
705
706 spin_lock_irqsave(&iommu->register_lock, flag);
707 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
708
709 cmd = iommu->gcmd | DMA_GCMD_SRTP;
710 writel(cmd, iommu->reg + DMAR_GCMD_REG);
711
712 /* Make sure hardware complete it */
713 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
714 readl, (sts & DMA_GSTS_RTPS), sts);
715
716 spin_unlock_irqrestore(&iommu->register_lock, flag);
717}
718
719static void iommu_flush_write_buffer(struct intel_iommu *iommu)
720{
721 u32 val;
722 unsigned long flag;
723
724 if (!cap_rwbf(iommu->cap))
725 return;
726 val = iommu->gcmd | DMA_GCMD_WBF;
727
728 spin_lock_irqsave(&iommu->register_lock, flag);
729 writel(val, iommu->reg + DMAR_GCMD_REG);
730
731 /* Make sure hardware complete it */
732 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
733 readl, (!(val & DMA_GSTS_WBFS)), val);
734
735 spin_unlock_irqrestore(&iommu->register_lock, flag);
736}
737
738/* return value determine if we need a write buffer flush */
739static int __iommu_flush_context(struct intel_iommu *iommu,
740 u16 did, u16 source_id, u8 function_mask, u64 type,
741 int non_present_entry_flush)
742{
743 u64 val = 0;
744 unsigned long flag;
745
746 /*
747 * In the non-present entry flush case, if hardware doesn't cache
748 * non-present entry we do nothing and if hardware cache non-present
749 * entry, we flush entries of domain 0 (the domain id is used to cache
750 * any non-present entries)
751 */
752 if (non_present_entry_flush) {
753 if (!cap_caching_mode(iommu->cap))
754 return 1;
755 else
756 did = 0;
757 }
758
759 switch (type) {
760 case DMA_CCMD_GLOBAL_INVL:
761 val = DMA_CCMD_GLOBAL_INVL;
762 break;
763 case DMA_CCMD_DOMAIN_INVL:
764 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
765 break;
766 case DMA_CCMD_DEVICE_INVL:
767 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
768 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
769 break;
770 default:
771 BUG();
772 }
773 val |= DMA_CCMD_ICC;
774
775 spin_lock_irqsave(&iommu->register_lock, flag);
776 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
777
778 /* Make sure hardware complete it */
779 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
780 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
781
782 spin_unlock_irqrestore(&iommu->register_lock, flag);
783
4d235ba6 784 /* flush context entry will implicitly flush write buffer */
ba395927
KA
785 return 0;
786}
787
ba395927
KA
788/* return value determine if we need a write buffer flush */
789static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
790 u64 addr, unsigned int size_order, u64 type,
791 int non_present_entry_flush)
792{
793 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
794 u64 val = 0, val_iva = 0;
795 unsigned long flag;
796
797 /*
798 * In the non-present entry flush case, if hardware doesn't cache
799 * non-present entry we do nothing and if hardware cache non-present
800 * entry, we flush entries of domain 0 (the domain id is used to cache
801 * any non-present entries)
802 */
803 if (non_present_entry_flush) {
804 if (!cap_caching_mode(iommu->cap))
805 return 1;
806 else
807 did = 0;
808 }
809
810 switch (type) {
811 case DMA_TLB_GLOBAL_FLUSH:
812 /* global flush doesn't need set IVA_REG */
813 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
814 break;
815 case DMA_TLB_DSI_FLUSH:
816 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
817 break;
818 case DMA_TLB_PSI_FLUSH:
819 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
820 /* Note: always flush non-leaf currently */
821 val_iva = size_order | addr;
822 break;
823 default:
824 BUG();
825 }
826 /* Note: set drain read/write */
827#if 0
828 /*
829 * This is probably to be super secure.. Looks like we can
830 * ignore it without any impact.
831 */
832 if (cap_read_drain(iommu->cap))
833 val |= DMA_TLB_READ_DRAIN;
834#endif
835 if (cap_write_drain(iommu->cap))
836 val |= DMA_TLB_WRITE_DRAIN;
837
838 spin_lock_irqsave(&iommu->register_lock, flag);
839 /* Note: Only uses first TLB reg currently */
840 if (val_iva)
841 dmar_writeq(iommu->reg + tlb_offset, val_iva);
842 dmar_writeq(iommu->reg + tlb_offset + 8, val);
843
844 /* Make sure hardware complete it */
845 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
846 dmar_readq, (!(val & DMA_TLB_IVT)), val);
847
848 spin_unlock_irqrestore(&iommu->register_lock, flag);
849
850 /* check IOTLB invalidation granularity */
851 if (DMA_TLB_IAIG(val) == 0)
852 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
853 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
854 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
855 (unsigned long long)DMA_TLB_IIRG(type),
856 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 857 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
858 return 0;
859}
860
ba395927
KA
861static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
862 u64 addr, unsigned int pages, int non_present_entry_flush)
863{
f76aec76 864 unsigned int mask;
ba395927 865
5b6985ce 866 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
867 BUG_ON(pages == 0);
868
869 /* Fallback to domain selective flush if no PSI support */
870 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
871 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
872 DMA_TLB_DSI_FLUSH,
873 non_present_entry_flush);
ba395927
KA
874
875 /*
876 * PSI requires page size to be 2 ^ x, and the base address is naturally
877 * aligned to the size
878 */
f76aec76 879 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 880 /* Fallback to domain selective flush if size is too big */
f76aec76 881 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
882 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
883 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 884
a77b67d4
YS
885 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
886 DMA_TLB_PSI_FLUSH,
887 non_present_entry_flush);
ba395927
KA
888}
889
f8bab735 890static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
891{
892 u32 pmen;
893 unsigned long flags;
894
895 spin_lock_irqsave(&iommu->register_lock, flags);
896 pmen = readl(iommu->reg + DMAR_PMEN_REG);
897 pmen &= ~DMA_PMEN_EPM;
898 writel(pmen, iommu->reg + DMAR_PMEN_REG);
899
900 /* wait for the protected region status bit to clear */
901 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
902 readl, !(pmen & DMA_PMEN_PRS), pmen);
903
904 spin_unlock_irqrestore(&iommu->register_lock, flags);
905}
906
ba395927
KA
907static int iommu_enable_translation(struct intel_iommu *iommu)
908{
909 u32 sts;
910 unsigned long flags;
911
912 spin_lock_irqsave(&iommu->register_lock, flags);
913 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
914
915 /* Make sure hardware complete it */
916 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
917 readl, (sts & DMA_GSTS_TES), sts);
918
919 iommu->gcmd |= DMA_GCMD_TE;
920 spin_unlock_irqrestore(&iommu->register_lock, flags);
921 return 0;
922}
923
924static int iommu_disable_translation(struct intel_iommu *iommu)
925{
926 u32 sts;
927 unsigned long flag;
928
929 spin_lock_irqsave(&iommu->register_lock, flag);
930 iommu->gcmd &= ~DMA_GCMD_TE;
931 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
932
933 /* Make sure hardware complete it */
934 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
935 readl, (!(sts & DMA_GSTS_TES)), sts);
936
937 spin_unlock_irqrestore(&iommu->register_lock, flag);
938 return 0;
939}
940
3460a6d9
KA
941/* iommu interrupt handling. Most stuff are MSI-like. */
942
d94afc6c 943static const char *fault_reason_strings[] =
3460a6d9
KA
944{
945 "Software",
946 "Present bit in root entry is clear",
947 "Present bit in context entry is clear",
948 "Invalid context entry",
949 "Access beyond MGAW",
950 "PTE Write access is not set",
951 "PTE Read access is not set",
952 "Next page table ptr is invalid",
953 "Root table address invalid",
954 "Context table ptr is invalid",
955 "non-zero reserved fields in RTP",
956 "non-zero reserved fields in CTP",
957 "non-zero reserved fields in PTE",
3460a6d9 958};
f8bab735 959#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 960
d94afc6c 961const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 962{
d94afc6c 963 if (fault_reason > MAX_FAULT_REASON_IDX)
964 return "Unknown";
3460a6d9
KA
965 else
966 return fault_reason_strings[fault_reason];
967}
968
969void dmar_msi_unmask(unsigned int irq)
970{
971 struct intel_iommu *iommu = get_irq_data(irq);
972 unsigned long flag;
973
974 /* unmask it */
975 spin_lock_irqsave(&iommu->register_lock, flag);
976 writel(0, iommu->reg + DMAR_FECTL_REG);
977 /* Read a reg to force flush the post write */
978 readl(iommu->reg + DMAR_FECTL_REG);
979 spin_unlock_irqrestore(&iommu->register_lock, flag);
980}
981
982void dmar_msi_mask(unsigned int irq)
983{
984 unsigned long flag;
985 struct intel_iommu *iommu = get_irq_data(irq);
986
987 /* mask it */
988 spin_lock_irqsave(&iommu->register_lock, flag);
989 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
990 /* Read a reg to force flush the post write */
991 readl(iommu->reg + DMAR_FECTL_REG);
992 spin_unlock_irqrestore(&iommu->register_lock, flag);
993}
994
995void dmar_msi_write(int irq, struct msi_msg *msg)
996{
997 struct intel_iommu *iommu = get_irq_data(irq);
998 unsigned long flag;
999
1000 spin_lock_irqsave(&iommu->register_lock, flag);
1001 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1002 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1003 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1004 spin_unlock_irqrestore(&iommu->register_lock, flag);
1005}
1006
1007void dmar_msi_read(int irq, struct msi_msg *msg)
1008{
1009 struct intel_iommu *iommu = get_irq_data(irq);
1010 unsigned long flag;
1011
1012 spin_lock_irqsave(&iommu->register_lock, flag);
1013 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1014 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1015 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1016 spin_unlock_irqrestore(&iommu->register_lock, flag);
1017}
1018
1019static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1020 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1021{
d94afc6c 1022 const char *reason;
3460a6d9
KA
1023
1024 reason = dmar_get_fault_reason(fault_reason);
1025
1026 printk(KERN_ERR
1027 "DMAR:[%s] Request device [%02x:%02x.%d] "
1028 "fault addr %llx \n"
1029 "DMAR:[fault reason %02d] %s\n",
1030 (type ? "DMA Read" : "DMA Write"),
1031 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1032 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1033 return 0;
1034}
1035
1036#define PRIMARY_FAULT_REG_LEN (16)
1037static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1038{
1039 struct intel_iommu *iommu = dev_id;
1040 int reg, fault_index;
1041 u32 fault_status;
1042 unsigned long flag;
1043
1044 spin_lock_irqsave(&iommu->register_lock, flag);
1045 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1046
1047 /* TBD: ignore advanced fault log currently */
1048 if (!(fault_status & DMA_FSTS_PPF))
1049 goto clear_overflow;
1050
1051 fault_index = dma_fsts_fault_record_index(fault_status);
1052 reg = cap_fault_reg_offset(iommu->cap);
1053 while (1) {
1054 u8 fault_reason;
1055 u16 source_id;
1056 u64 guest_addr;
1057 int type;
1058 u32 data;
1059
1060 /* highest 32 bits */
1061 data = readl(iommu->reg + reg +
1062 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1063 if (!(data & DMA_FRCD_F))
1064 break;
1065
1066 fault_reason = dma_frcd_fault_reason(data);
1067 type = dma_frcd_type(data);
1068
1069 data = readl(iommu->reg + reg +
1070 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1071 source_id = dma_frcd_source_id(data);
1072
1073 guest_addr = dmar_readq(iommu->reg + reg +
1074 fault_index * PRIMARY_FAULT_REG_LEN);
1075 guest_addr = dma_frcd_page_addr(guest_addr);
1076 /* clear the fault */
1077 writel(DMA_FRCD_F, iommu->reg + reg +
1078 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1079
1080 spin_unlock_irqrestore(&iommu->register_lock, flag);
1081
1082 iommu_page_fault_do_one(iommu, type, fault_reason,
1083 source_id, guest_addr);
1084
1085 fault_index++;
1086 if (fault_index > cap_num_fault_regs(iommu->cap))
1087 fault_index = 0;
1088 spin_lock_irqsave(&iommu->register_lock, flag);
1089 }
1090clear_overflow:
1091 /* clear primary fault overflow */
1092 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1093 if (fault_status & DMA_FSTS_PFO)
1094 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1095
1096 spin_unlock_irqrestore(&iommu->register_lock, flag);
1097 return IRQ_HANDLED;
1098}
1099
1100int dmar_set_interrupt(struct intel_iommu *iommu)
1101{
1102 int irq, ret;
1103
1104 irq = create_irq();
1105 if (!irq) {
1106 printk(KERN_ERR "IOMMU: no free vectors\n");
1107 return -EINVAL;
1108 }
1109
1110 set_irq_data(irq, iommu);
1111 iommu->irq = irq;
1112
1113 ret = arch_setup_dmar_msi(irq);
1114 if (ret) {
1115 set_irq_data(irq, NULL);
1116 iommu->irq = 0;
1117 destroy_irq(irq);
1118 return 0;
1119 }
1120
1121 /* Force fault register is cleared */
1122 iommu_page_fault(irq, iommu);
1123
1124 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1125 if (ret)
1126 printk(KERN_ERR "IOMMU: can't request irq\n");
1127 return ret;
1128}
1129
ba395927
KA
1130static int iommu_init_domains(struct intel_iommu *iommu)
1131{
1132 unsigned long ndomains;
1133 unsigned long nlongs;
1134
1135 ndomains = cap_ndoms(iommu->cap);
1136 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1137 nlongs = BITS_TO_LONGS(ndomains);
1138
1139 /* TBD: there might be 64K domains,
1140 * consider other allocation for future chip
1141 */
1142 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1143 if (!iommu->domain_ids) {
1144 printk(KERN_ERR "Allocating domain id array failed\n");
1145 return -ENOMEM;
1146 }
1147 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1148 GFP_KERNEL);
1149 if (!iommu->domains) {
1150 printk(KERN_ERR "Allocating domain array failed\n");
1151 kfree(iommu->domain_ids);
1152 return -ENOMEM;
1153 }
1154
e61d98d8
SS
1155 spin_lock_init(&iommu->lock);
1156
ba395927
KA
1157 /*
1158 * if Caching mode is set, then invalid translations are tagged
1159 * with domainid 0. Hence we need to pre-allocate it.
1160 */
1161 if (cap_caching_mode(iommu->cap))
1162 set_bit(0, iommu->domain_ids);
1163 return 0;
1164}
ba395927 1165
ba395927
KA
1166
1167static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1168
1169void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1170{
1171 struct dmar_domain *domain;
1172 int i;
1173
ba395927
KA
1174 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1175 for (; i < cap_ndoms(iommu->cap); ) {
1176 domain = iommu->domains[i];
1177 clear_bit(i, iommu->domain_ids);
1178 domain_exit(domain);
1179 i = find_next_bit(iommu->domain_ids,
1180 cap_ndoms(iommu->cap), i+1);
1181 }
1182
1183 if (iommu->gcmd & DMA_GCMD_TE)
1184 iommu_disable_translation(iommu);
1185
1186 if (iommu->irq) {
1187 set_irq_data(iommu->irq, NULL);
1188 /* This will mask the irq */
1189 free_irq(iommu->irq, iommu);
1190 destroy_irq(iommu->irq);
1191 }
1192
1193 kfree(iommu->domains);
1194 kfree(iommu->domain_ids);
1195
d9630fe9
WH
1196 g_iommus[iommu->seq_id] = NULL;
1197
1198 /* if all iommus are freed, free g_iommus */
1199 for (i = 0; i < g_num_of_iommus; i++) {
1200 if (g_iommus[i])
1201 break;
1202 }
1203
1204 if (i == g_num_of_iommus)
1205 kfree(g_iommus);
1206
ba395927
KA
1207 /* free context mapping */
1208 free_context_table(iommu);
ba395927
KA
1209}
1210
1211static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1212{
1213 unsigned long num;
1214 unsigned long ndomains;
1215 struct dmar_domain *domain;
1216 unsigned long flags;
1217
1218 domain = alloc_domain_mem();
1219 if (!domain)
1220 return NULL;
1221
1222 ndomains = cap_ndoms(iommu->cap);
1223
1224 spin_lock_irqsave(&iommu->lock, flags);
1225 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1226 if (num >= ndomains) {
1227 spin_unlock_irqrestore(&iommu->lock, flags);
1228 free_domain_mem(domain);
1229 printk(KERN_ERR "IOMMU: no free domain ids\n");
1230 return NULL;
1231 }
1232
1233 set_bit(num, iommu->domain_ids);
1234 domain->id = num;
8c11e798
WH
1235 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1236 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1237 domain->flags = 0;
ba395927
KA
1238 iommu->domains[num] = domain;
1239 spin_unlock_irqrestore(&iommu->lock, flags);
1240
1241 return domain;
1242}
1243
1244static void iommu_free_domain(struct dmar_domain *domain)
1245{
1246 unsigned long flags;
8c11e798
WH
1247 struct intel_iommu *iommu;
1248
1249 iommu = domain_get_iommu(domain);
ba395927 1250
8c11e798
WH
1251 spin_lock_irqsave(&iommu->lock, flags);
1252 clear_bit(domain->id, iommu->domain_ids);
1253 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1254}
1255
1256static struct iova_domain reserved_iova_list;
8a443df4
MG
1257static struct lock_class_key reserved_alloc_key;
1258static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1259
1260static void dmar_init_reserved_ranges(void)
1261{
1262 struct pci_dev *pdev = NULL;
1263 struct iova *iova;
1264 int i;
1265 u64 addr, size;
1266
f661197e 1267 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1268
8a443df4
MG
1269 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1270 &reserved_alloc_key);
1271 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1272 &reserved_rbtree_key);
1273
ba395927
KA
1274 /* IOAPIC ranges shouldn't be accessed by DMA */
1275 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1276 IOVA_PFN(IOAPIC_RANGE_END));
1277 if (!iova)
1278 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1279
1280 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1281 for_each_pci_dev(pdev) {
1282 struct resource *r;
1283
1284 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1285 r = &pdev->resource[i];
1286 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1287 continue;
1288 addr = r->start;
5b6985ce 1289 addr &= PAGE_MASK;
ba395927 1290 size = r->end - addr;
5b6985ce 1291 size = PAGE_ALIGN(size);
ba395927
KA
1292 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1293 IOVA_PFN(size + addr) - 1);
1294 if (!iova)
1295 printk(KERN_ERR "Reserve iova failed\n");
1296 }
1297 }
1298
1299}
1300
1301static void domain_reserve_special_ranges(struct dmar_domain *domain)
1302{
1303 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1304}
1305
1306static inline int guestwidth_to_adjustwidth(int gaw)
1307{
1308 int agaw;
1309 int r = (gaw - 12) % 9;
1310
1311 if (r == 0)
1312 agaw = gaw;
1313 else
1314 agaw = gaw + 9 - r;
1315 if (agaw > 64)
1316 agaw = 64;
1317 return agaw;
1318}
1319
1320static int domain_init(struct dmar_domain *domain, int guest_width)
1321{
1322 struct intel_iommu *iommu;
1323 int adjust_width, agaw;
1324 unsigned long sagaw;
1325
f661197e 1326 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1327 spin_lock_init(&domain->mapping_lock);
1328
1329 domain_reserve_special_ranges(domain);
1330
1331 /* calculate AGAW */
8c11e798 1332 iommu = domain_get_iommu(domain);
ba395927
KA
1333 if (guest_width > cap_mgaw(iommu->cap))
1334 guest_width = cap_mgaw(iommu->cap);
1335 domain->gaw = guest_width;
1336 adjust_width = guestwidth_to_adjustwidth(guest_width);
1337 agaw = width_to_agaw(adjust_width);
1338 sagaw = cap_sagaw(iommu->cap);
1339 if (!test_bit(agaw, &sagaw)) {
1340 /* hardware doesn't support it, choose a bigger one */
1341 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1342 agaw = find_next_bit(&sagaw, 5, agaw);
1343 if (agaw >= 5)
1344 return -ENODEV;
1345 }
1346 domain->agaw = agaw;
1347 INIT_LIST_HEAD(&domain->devices);
1348
1349 /* always allocate the top pgd */
1350 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1351 if (!domain->pgd)
1352 return -ENOMEM;
5b6985ce 1353 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1354 return 0;
1355}
1356
1357static void domain_exit(struct dmar_domain *domain)
1358{
1359 u64 end;
1360
1361 /* Domain 0 is reserved, so dont process it */
1362 if (!domain)
1363 return;
1364
1365 domain_remove_dev_info(domain);
1366 /* destroy iovas */
1367 put_iova_domain(&domain->iovad);
1368 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1369 end = end & (~PAGE_MASK);
ba395927
KA
1370
1371 /* clear ptes */
1372 dma_pte_clear_range(domain, 0, end);
1373
1374 /* free page tables */
1375 dma_pte_free_pagetable(domain, 0, end);
1376
1377 iommu_free_domain(domain);
1378 free_domain_mem(domain);
1379}
1380
1381static int domain_context_mapping_one(struct dmar_domain *domain,
1382 u8 bus, u8 devfn)
1383{
1384 struct context_entry *context;
8c11e798 1385 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927
KA
1386 unsigned long flags;
1387
1388 pr_debug("Set context mapping for %02x:%02x.%d\n",
1389 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1390 BUG_ON(!domain->pgd);
1391 context = device_to_context_entry(iommu, bus, devfn);
1392 if (!context)
1393 return -ENOMEM;
1394 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1395 if (context_present(context)) {
ba395927
KA
1396 spin_unlock_irqrestore(&iommu->lock, flags);
1397 return 0;
1398 }
1399
c07e7d21
MM
1400 context_set_domain_id(context, domain->id);
1401 context_set_address_width(context, domain->agaw);
1402 context_set_address_root(context, virt_to_phys(domain->pgd));
1403 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1404 context_set_fault_enable(context);
1405 context_set_present(context);
ba395927
KA
1406 __iommu_flush_cache(iommu, context, sizeof(*context));
1407
1408 /* it's a non-present to present mapping */
a77b67d4
YS
1409 if (iommu->flush.flush_context(iommu, domain->id,
1410 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1411 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1412 iommu_flush_write_buffer(iommu);
1413 else
a77b67d4
YS
1414 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1415
ba395927
KA
1416 spin_unlock_irqrestore(&iommu->lock, flags);
1417 return 0;
1418}
1419
1420static int
1421domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1422{
1423 int ret;
1424 struct pci_dev *tmp, *parent;
1425
1426 ret = domain_context_mapping_one(domain, pdev->bus->number,
1427 pdev->devfn);
1428 if (ret)
1429 return ret;
1430
1431 /* dependent device mapping */
1432 tmp = pci_find_upstream_pcie_bridge(pdev);
1433 if (!tmp)
1434 return 0;
1435 /* Secondary interface's bus number and devfn 0 */
1436 parent = pdev->bus->self;
1437 while (parent != tmp) {
1438 ret = domain_context_mapping_one(domain, parent->bus->number,
1439 parent->devfn);
1440 if (ret)
1441 return ret;
1442 parent = parent->bus->self;
1443 }
1444 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1445 return domain_context_mapping_one(domain,
1446 tmp->subordinate->number, 0);
1447 else /* this is a legacy PCI bridge */
1448 return domain_context_mapping_one(domain,
1449 tmp->bus->number, tmp->devfn);
1450}
1451
1452static int domain_context_mapped(struct dmar_domain *domain,
1453 struct pci_dev *pdev)
1454{
1455 int ret;
1456 struct pci_dev *tmp, *parent;
8c11e798 1457 struct intel_iommu *iommu = domain_get_iommu(domain);
ba395927 1458
8c11e798 1459 ret = device_context_mapped(iommu,
ba395927
KA
1460 pdev->bus->number, pdev->devfn);
1461 if (!ret)
1462 return ret;
1463 /* dependent device mapping */
1464 tmp = pci_find_upstream_pcie_bridge(pdev);
1465 if (!tmp)
1466 return ret;
1467 /* Secondary interface's bus number and devfn 0 */
1468 parent = pdev->bus->self;
1469 while (parent != tmp) {
8c11e798 1470 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1471 parent->devfn);
1472 if (!ret)
1473 return ret;
1474 parent = parent->bus->self;
1475 }
1476 if (tmp->is_pcie)
8c11e798 1477 return device_context_mapped(iommu,
ba395927
KA
1478 tmp->subordinate->number, 0);
1479 else
8c11e798 1480 return device_context_mapped(iommu,
ba395927
KA
1481 tmp->bus->number, tmp->devfn);
1482}
1483
1484static int
1485domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1486 u64 hpa, size_t size, int prot)
1487{
1488 u64 start_pfn, end_pfn;
1489 struct dma_pte *pte;
1490 int index;
5b6985ce 1491 int addr_width = agaw_to_width(domain->agaw);
8c11e798 1492 struct intel_iommu *iommu = domain_get_iommu(domain);
5b6985ce
FY
1493
1494 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1495
1496 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1497 return -EINVAL;
5b6985ce
FY
1498 iova &= PAGE_MASK;
1499 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1500 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1501 index = 0;
1502 while (start_pfn < end_pfn) {
5b6985ce 1503 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1504 if (!pte)
1505 return -ENOMEM;
1506 /* We don't need lock here, nobody else
1507 * touches the iova range
1508 */
19c239ce
MM
1509 BUG_ON(dma_pte_addr(pte));
1510 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1511 dma_set_pte_prot(pte, prot);
8c11e798 1512 __iommu_flush_cache(iommu, pte, sizeof(*pte));
ba395927
KA
1513 start_pfn++;
1514 index++;
1515 }
1516 return 0;
1517}
1518
1519static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1520{
8c11e798
WH
1521 struct intel_iommu *iommu = domain_get_iommu(domain);
1522
1523 clear_context_table(iommu, bus, devfn);
1524 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1525 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1526 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1527 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1528}
1529
1530static void domain_remove_dev_info(struct dmar_domain *domain)
1531{
1532 struct device_domain_info *info;
1533 unsigned long flags;
1534
1535 spin_lock_irqsave(&device_domain_lock, flags);
1536 while (!list_empty(&domain->devices)) {
1537 info = list_entry(domain->devices.next,
1538 struct device_domain_info, link);
1539 list_del(&info->link);
1540 list_del(&info->global);
1541 if (info->dev)
358dd8ac 1542 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1543 spin_unlock_irqrestore(&device_domain_lock, flags);
1544
1545 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1546 free_devinfo_mem(info);
1547
1548 spin_lock_irqsave(&device_domain_lock, flags);
1549 }
1550 spin_unlock_irqrestore(&device_domain_lock, flags);
1551}
1552
1553/*
1554 * find_domain
358dd8ac 1555 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1556 */
38717946 1557static struct dmar_domain *
ba395927
KA
1558find_domain(struct pci_dev *pdev)
1559{
1560 struct device_domain_info *info;
1561
1562 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1563 info = pdev->dev.archdata.iommu;
ba395927
KA
1564 if (info)
1565 return info->domain;
1566 return NULL;
1567}
1568
ba395927
KA
1569/* domain is initialized */
1570static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1571{
1572 struct dmar_domain *domain, *found = NULL;
1573 struct intel_iommu *iommu;
1574 struct dmar_drhd_unit *drhd;
1575 struct device_domain_info *info, *tmp;
1576 struct pci_dev *dev_tmp;
1577 unsigned long flags;
1578 int bus = 0, devfn = 0;
1579
1580 domain = find_domain(pdev);
1581 if (domain)
1582 return domain;
1583
1584 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1585 if (dev_tmp) {
1586 if (dev_tmp->is_pcie) {
1587 bus = dev_tmp->subordinate->number;
1588 devfn = 0;
1589 } else {
1590 bus = dev_tmp->bus->number;
1591 devfn = dev_tmp->devfn;
1592 }
1593 spin_lock_irqsave(&device_domain_lock, flags);
1594 list_for_each_entry(info, &device_domain_list, global) {
1595 if (info->bus == bus && info->devfn == devfn) {
1596 found = info->domain;
1597 break;
1598 }
1599 }
1600 spin_unlock_irqrestore(&device_domain_lock, flags);
1601 /* pcie-pci bridge already has a domain, uses it */
1602 if (found) {
1603 domain = found;
1604 goto found_domain;
1605 }
1606 }
1607
1608 /* Allocate new domain for the device */
1609 drhd = dmar_find_matched_drhd_unit(pdev);
1610 if (!drhd) {
1611 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1612 pci_name(pdev));
1613 return NULL;
1614 }
1615 iommu = drhd->iommu;
1616
1617 domain = iommu_alloc_domain(iommu);
1618 if (!domain)
1619 goto error;
1620
1621 if (domain_init(domain, gaw)) {
1622 domain_exit(domain);
1623 goto error;
1624 }
1625
1626 /* register pcie-to-pci device */
1627 if (dev_tmp) {
1628 info = alloc_devinfo_mem();
1629 if (!info) {
1630 domain_exit(domain);
1631 goto error;
1632 }
1633 info->bus = bus;
1634 info->devfn = devfn;
1635 info->dev = NULL;
1636 info->domain = domain;
1637 /* This domain is shared by devices under p2p bridge */
3b5410e7 1638 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1639
1640 /* pcie-to-pci bridge already has a domain, uses it */
1641 found = NULL;
1642 spin_lock_irqsave(&device_domain_lock, flags);
1643 list_for_each_entry(tmp, &device_domain_list, global) {
1644 if (tmp->bus == bus && tmp->devfn == devfn) {
1645 found = tmp->domain;
1646 break;
1647 }
1648 }
1649 if (found) {
1650 free_devinfo_mem(info);
1651 domain_exit(domain);
1652 domain = found;
1653 } else {
1654 list_add(&info->link, &domain->devices);
1655 list_add(&info->global, &device_domain_list);
1656 }
1657 spin_unlock_irqrestore(&device_domain_lock, flags);
1658 }
1659
1660found_domain:
1661 info = alloc_devinfo_mem();
1662 if (!info)
1663 goto error;
1664 info->bus = pdev->bus->number;
1665 info->devfn = pdev->devfn;
1666 info->dev = pdev;
1667 info->domain = domain;
1668 spin_lock_irqsave(&device_domain_lock, flags);
1669 /* somebody is fast */
1670 found = find_domain(pdev);
1671 if (found != NULL) {
1672 spin_unlock_irqrestore(&device_domain_lock, flags);
1673 if (found != domain) {
1674 domain_exit(domain);
1675 domain = found;
1676 }
1677 free_devinfo_mem(info);
1678 return domain;
1679 }
1680 list_add(&info->link, &domain->devices);
1681 list_add(&info->global, &device_domain_list);
358dd8ac 1682 pdev->dev.archdata.iommu = info;
ba395927
KA
1683 spin_unlock_irqrestore(&device_domain_lock, flags);
1684 return domain;
1685error:
1686 /* recheck it here, maybe others set it */
1687 return find_domain(pdev);
1688}
1689
5b6985ce
FY
1690static int iommu_prepare_identity_map(struct pci_dev *pdev,
1691 unsigned long long start,
1692 unsigned long long end)
ba395927
KA
1693{
1694 struct dmar_domain *domain;
1695 unsigned long size;
5b6985ce 1696 unsigned long long base;
ba395927
KA
1697 int ret;
1698
1699 printk(KERN_INFO
1700 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1701 pci_name(pdev), start, end);
1702 /* page table init */
1703 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1704 if (!domain)
1705 return -ENOMEM;
1706
1707 /* The address might not be aligned */
5b6985ce 1708 base = start & PAGE_MASK;
ba395927 1709 size = end - base;
5b6985ce 1710 size = PAGE_ALIGN(size);
ba395927
KA
1711 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1712 IOVA_PFN(base + size) - 1)) {
1713 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1714 ret = -ENOMEM;
1715 goto error;
1716 }
1717
1718 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1719 size, base, pci_name(pdev));
1720 /*
1721 * RMRR range might have overlap with physical memory range,
1722 * clear it first
1723 */
1724 dma_pte_clear_range(domain, base, base + size);
1725
1726 ret = domain_page_mapping(domain, base, base, size,
1727 DMA_PTE_READ|DMA_PTE_WRITE);
1728 if (ret)
1729 goto error;
1730
1731 /* context entry init */
1732 ret = domain_context_mapping(domain, pdev);
1733 if (!ret)
1734 return 0;
1735error:
1736 domain_exit(domain);
1737 return ret;
1738
1739}
1740
1741static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1742 struct pci_dev *pdev)
1743{
358dd8ac 1744 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1745 return 0;
1746 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1747 rmrr->end_address + 1);
1748}
1749
e820482c 1750#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1751struct iommu_prepare_data {
1752 struct pci_dev *pdev;
1753 int ret;
1754};
1755
1756static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1757 unsigned long end_pfn, void *datax)
1758{
1759 struct iommu_prepare_data *data;
1760
1761 data = (struct iommu_prepare_data *)datax;
1762
1763 data->ret = iommu_prepare_identity_map(data->pdev,
1764 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1765 return data->ret;
1766
1767}
1768
1769static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1770{
1771 int nid;
1772 struct iommu_prepare_data data;
1773
1774 data.pdev = pdev;
1775 data.ret = 0;
1776
1777 for_each_online_node(nid) {
1778 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1779 if (data.ret)
1780 return data.ret;
1781 }
1782 return data.ret;
1783}
1784
e820482c
KA
1785static void __init iommu_prepare_gfx_mapping(void)
1786{
1787 struct pci_dev *pdev = NULL;
e820482c
KA
1788 int ret;
1789
1790 for_each_pci_dev(pdev) {
358dd8ac 1791 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1792 !IS_GFX_DEVICE(pdev))
1793 continue;
1794 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1795 pci_name(pdev));
d52d53b8
YL
1796 ret = iommu_prepare_with_active_regions(pdev);
1797 if (ret)
1798 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1799 }
1800}
2abd7e16
MM
1801#else /* !CONFIG_DMAR_GFX_WA */
1802static inline void iommu_prepare_gfx_mapping(void)
1803{
1804 return;
1805}
e820482c
KA
1806#endif
1807
49a0429e
KA
1808#ifdef CONFIG_DMAR_FLOPPY_WA
1809static inline void iommu_prepare_isa(void)
1810{
1811 struct pci_dev *pdev;
1812 int ret;
1813
1814 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1815 if (!pdev)
1816 return;
1817
1818 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1819 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1820
1821 if (ret)
1822 printk("IOMMU: Failed to create 0-64M identity map, "
1823 "floppy might not work\n");
1824
1825}
1826#else
1827static inline void iommu_prepare_isa(void)
1828{
1829 return;
1830}
1831#endif /* !CONFIG_DMAR_FLPY_WA */
1832
519a0549 1833static int __init init_dmars(void)
ba395927
KA
1834{
1835 struct dmar_drhd_unit *drhd;
1836 struct dmar_rmrr_unit *rmrr;
1837 struct pci_dev *pdev;
1838 struct intel_iommu *iommu;
80b20dd8 1839 int i, ret, unit = 0;
ba395927
KA
1840
1841 /*
1842 * for each drhd
1843 * allocate root
1844 * initialize and program root entry to not present
1845 * endfor
1846 */
1847 for_each_drhd_unit(drhd) {
5e0d2a6f 1848 g_num_of_iommus++;
1849 /*
1850 * lock not needed as this is only incremented in the single
1851 * threaded kernel __init code path all other access are read
1852 * only
1853 */
1854 }
1855
d9630fe9
WH
1856 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1857 GFP_KERNEL);
1858 if (!g_iommus) {
1859 printk(KERN_ERR "Allocating global iommu array failed\n");
1860 ret = -ENOMEM;
1861 goto error;
1862 }
1863
80b20dd8 1864 deferred_flush = kzalloc(g_num_of_iommus *
1865 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1866 if (!deferred_flush) {
d9630fe9 1867 kfree(g_iommus);
5e0d2a6f 1868 ret = -ENOMEM;
1869 goto error;
1870 }
1871
5e0d2a6f 1872 for_each_drhd_unit(drhd) {
1873 if (drhd->ignored)
1874 continue;
1886e8a9
SS
1875
1876 iommu = drhd->iommu;
d9630fe9 1877 g_iommus[iommu->seq_id] = iommu;
ba395927 1878
e61d98d8
SS
1879 ret = iommu_init_domains(iommu);
1880 if (ret)
1881 goto error;
1882
ba395927
KA
1883 /*
1884 * TBD:
1885 * we could share the same root & context tables
1886 * amoung all IOMMU's. Need to Split it later.
1887 */
1888 ret = iommu_alloc_root_entry(iommu);
1889 if (ret) {
1890 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1891 goto error;
1892 }
1893 }
1894
a77b67d4
YS
1895 for_each_drhd_unit(drhd) {
1896 if (drhd->ignored)
1897 continue;
1898
1899 iommu = drhd->iommu;
1900 if (dmar_enable_qi(iommu)) {
1901 /*
1902 * Queued Invalidate not enabled, use Register Based
1903 * Invalidate
1904 */
1905 iommu->flush.flush_context = __iommu_flush_context;
1906 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1907 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1908 "invalidation\n",
1909 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1910 } else {
1911 iommu->flush.flush_context = qi_flush_context;
1912 iommu->flush.flush_iotlb = qi_flush_iotlb;
1913 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1914 "invalidation\n",
1915 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1916 }
1917 }
1918
ba395927
KA
1919 /*
1920 * For each rmrr
1921 * for each dev attached to rmrr
1922 * do
1923 * locate drhd for dev, alloc domain for dev
1924 * allocate free domain
1925 * allocate page table entries for rmrr
1926 * if context not allocated for bus
1927 * allocate and init context
1928 * set present in root table for this bus
1929 * init context with domain, translation etc
1930 * endfor
1931 * endfor
1932 */
1933 for_each_rmrr_units(rmrr) {
ba395927
KA
1934 for (i = 0; i < rmrr->devices_cnt; i++) {
1935 pdev = rmrr->devices[i];
1936 /* some BIOS lists non-exist devices in DMAR table */
1937 if (!pdev)
1938 continue;
1939 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1940 if (ret)
1941 printk(KERN_ERR
1942 "IOMMU: mapping reserved region failed\n");
1943 }
1944 }
1945
e820482c
KA
1946 iommu_prepare_gfx_mapping();
1947
49a0429e
KA
1948 iommu_prepare_isa();
1949
ba395927
KA
1950 /*
1951 * for each drhd
1952 * enable fault log
1953 * global invalidate context cache
1954 * global invalidate iotlb
1955 * enable translation
1956 */
1957 for_each_drhd_unit(drhd) {
1958 if (drhd->ignored)
1959 continue;
1960 iommu = drhd->iommu;
1961 sprintf (iommu->name, "dmar%d", unit++);
1962
1963 iommu_flush_write_buffer(iommu);
1964
3460a6d9
KA
1965 ret = dmar_set_interrupt(iommu);
1966 if (ret)
1967 goto error;
1968
ba395927
KA
1969 iommu_set_root_entry(iommu);
1970
a77b67d4
YS
1971 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1972 0);
1973 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1974 0);
f8bab735 1975 iommu_disable_protect_mem_regions(iommu);
1976
ba395927
KA
1977 ret = iommu_enable_translation(iommu);
1978 if (ret)
1979 goto error;
1980 }
1981
1982 return 0;
1983error:
1984 for_each_drhd_unit(drhd) {
1985 if (drhd->ignored)
1986 continue;
1987 iommu = drhd->iommu;
1988 free_iommu(iommu);
1989 }
d9630fe9 1990 kfree(g_iommus);
ba395927
KA
1991 return ret;
1992}
1993
1994static inline u64 aligned_size(u64 host_addr, size_t size)
1995{
1996 u64 addr;
5b6985ce
FY
1997 addr = (host_addr & (~PAGE_MASK)) + size;
1998 return PAGE_ALIGN(addr);
ba395927
KA
1999}
2000
2001struct iova *
f76aec76 2002iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2003{
ba395927
KA
2004 struct iova *piova;
2005
2006 /* Make sure it's in range */
ba395927 2007 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2008 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2009 return NULL;
2010
2011 piova = alloc_iova(&domain->iovad,
5b6985ce 2012 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2013 return piova;
2014}
2015
f76aec76
KA
2016static struct iova *
2017__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2018 size_t size, u64 dma_mask)
ba395927 2019{
ba395927 2020 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2021 struct iova *iova = NULL;
ba395927 2022
bb9e6d65
FT
2023 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2024 iova = iommu_alloc_iova(domain, size, dma_mask);
2025 else {
ba395927
KA
2026 /*
2027 * First try to allocate an io virtual address in
2028 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2029 * from higher range
ba395927 2030 */
f76aec76 2031 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2032 if (!iova)
bb9e6d65 2033 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2034 }
2035
2036 if (!iova) {
2037 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2038 return NULL;
2039 }
2040
2041 return iova;
2042}
2043
2044static struct dmar_domain *
2045get_valid_domain_for_dev(struct pci_dev *pdev)
2046{
2047 struct dmar_domain *domain;
2048 int ret;
2049
2050 domain = get_domain_for_dev(pdev,
2051 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2052 if (!domain) {
2053 printk(KERN_ERR
2054 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2055 return NULL;
ba395927
KA
2056 }
2057
2058 /* make sure context mapping is ok */
2059 if (unlikely(!domain_context_mapped(domain, pdev))) {
2060 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2061 if (ret) {
2062 printk(KERN_ERR
2063 "Domain context map for %s failed",
2064 pci_name(pdev));
4fe05bbc 2065 return NULL;
f76aec76 2066 }
ba395927
KA
2067 }
2068
f76aec76
KA
2069 return domain;
2070}
2071
bb9e6d65
FT
2072static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2073 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2074{
2075 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2076 struct dmar_domain *domain;
5b6985ce 2077 phys_addr_t start_paddr;
f76aec76
KA
2078 struct iova *iova;
2079 int prot = 0;
6865f0d1 2080 int ret;
8c11e798 2081 struct intel_iommu *iommu;
f76aec76
KA
2082
2083 BUG_ON(dir == DMA_NONE);
358dd8ac 2084 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2085 return paddr;
f76aec76
KA
2086
2087 domain = get_valid_domain_for_dev(pdev);
2088 if (!domain)
2089 return 0;
2090
8c11e798 2091 iommu = domain_get_iommu(domain);
6865f0d1 2092 size = aligned_size((u64)paddr, size);
f76aec76 2093
bb9e6d65 2094 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2095 if (!iova)
2096 goto error;
2097
5b6985ce 2098 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2099
ba395927
KA
2100 /*
2101 * Check if DMAR supports zero-length reads on write only
2102 * mappings..
2103 */
2104 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2105 !cap_zlr(iommu->cap))
ba395927
KA
2106 prot |= DMA_PTE_READ;
2107 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2108 prot |= DMA_PTE_WRITE;
2109 /*
6865f0d1 2110 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2111 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2112 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2113 * is not a big problem
2114 */
6865f0d1 2115 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2116 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2117 if (ret)
2118 goto error;
2119
f76aec76 2120 /* it's a non-present to present mapping */
8c11e798 2121 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2122 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2123 if (ret)
8c11e798 2124 iommu_flush_write_buffer(iommu);
f76aec76 2125
5b6985ce 2126 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2127
ba395927 2128error:
f76aec76
KA
2129 if (iova)
2130 __free_iova(&domain->iovad, iova);
ba395927 2131 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2132 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2133 return 0;
2134}
2135
bb9e6d65
FT
2136dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2137 size_t size, int dir)
2138{
2139 return __intel_map_single(hwdev, paddr, size, dir,
2140 to_pci_dev(hwdev)->dma_mask);
2141}
2142
5e0d2a6f 2143static void flush_unmaps(void)
2144{
80b20dd8 2145 int i, j;
5e0d2a6f 2146
5e0d2a6f 2147 timer_on = 0;
2148
2149 /* just flush them all */
2150 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2151 struct intel_iommu *iommu = g_iommus[i];
2152 if (!iommu)
2153 continue;
c42d9f32 2154
a2bb8459 2155 if (deferred_flush[i].next) {
a77b67d4
YS
2156 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2157 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2158 for (j = 0; j < deferred_flush[i].next; j++) {
2159 __free_iova(&deferred_flush[i].domain[j]->iovad,
2160 deferred_flush[i].iova[j]);
2161 }
2162 deferred_flush[i].next = 0;
2163 }
5e0d2a6f 2164 }
2165
5e0d2a6f 2166 list_size = 0;
5e0d2a6f 2167}
2168
2169static void flush_unmaps_timeout(unsigned long data)
2170{
80b20dd8 2171 unsigned long flags;
2172
2173 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2174 flush_unmaps();
80b20dd8 2175 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2176}
2177
2178static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2179{
2180 unsigned long flags;
80b20dd8 2181 int next, iommu_id;
8c11e798 2182 struct intel_iommu *iommu;
5e0d2a6f 2183
2184 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2185 if (list_size == HIGH_WATER_MARK)
2186 flush_unmaps();
2187
8c11e798
WH
2188 iommu = domain_get_iommu(dom);
2189 iommu_id = iommu->seq_id;
c42d9f32 2190
80b20dd8 2191 next = deferred_flush[iommu_id].next;
2192 deferred_flush[iommu_id].domain[next] = dom;
2193 deferred_flush[iommu_id].iova[next] = iova;
2194 deferred_flush[iommu_id].next++;
5e0d2a6f 2195
2196 if (!timer_on) {
2197 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2198 timer_on = 1;
2199 }
2200 list_size++;
2201 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2202}
2203
5b6985ce
FY
2204void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2205 int dir)
ba395927 2206{
ba395927 2207 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2208 struct dmar_domain *domain;
2209 unsigned long start_addr;
ba395927 2210 struct iova *iova;
8c11e798 2211 struct intel_iommu *iommu;
ba395927 2212
358dd8ac 2213 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2214 return;
ba395927
KA
2215 domain = find_domain(pdev);
2216 BUG_ON(!domain);
2217
8c11e798
WH
2218 iommu = domain_get_iommu(domain);
2219
ba395927 2220 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2221 if (!iova)
ba395927 2222 return;
ba395927 2223
5b6985ce 2224 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2225 size = aligned_size((u64)dev_addr, size);
ba395927 2226
f76aec76 2227 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2228 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2229
f76aec76
KA
2230 /* clear the whole page */
2231 dma_pte_clear_range(domain, start_addr, start_addr + size);
2232 /* free page tables */
2233 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2234 if (intel_iommu_strict) {
8c11e798 2235 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2236 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2237 iommu_flush_write_buffer(iommu);
5e0d2a6f 2238 /* free iova */
2239 __free_iova(&domain->iovad, iova);
2240 } else {
2241 add_unmap(domain, iova);
2242 /*
2243 * queue up the release of the unmap to save the 1/6th of the
2244 * cpu used up by the iotlb flush operation...
2245 */
5e0d2a6f 2246 }
ba395927
KA
2247}
2248
5b6985ce
FY
2249void *intel_alloc_coherent(struct device *hwdev, size_t size,
2250 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2251{
2252 void *vaddr;
2253 int order;
2254
5b6985ce 2255 size = PAGE_ALIGN(size);
ba395927
KA
2256 order = get_order(size);
2257 flags &= ~(GFP_DMA | GFP_DMA32);
2258
2259 vaddr = (void *)__get_free_pages(flags, order);
2260 if (!vaddr)
2261 return NULL;
2262 memset(vaddr, 0, size);
2263
bb9e6d65
FT
2264 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2265 DMA_BIDIRECTIONAL,
2266 hwdev->coherent_dma_mask);
ba395927
KA
2267 if (*dma_handle)
2268 return vaddr;
2269 free_pages((unsigned long)vaddr, order);
2270 return NULL;
2271}
2272
5b6985ce
FY
2273void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2274 dma_addr_t dma_handle)
ba395927
KA
2275{
2276 int order;
2277
5b6985ce 2278 size = PAGE_ALIGN(size);
ba395927
KA
2279 order = get_order(size);
2280
2281 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2282 free_pages((unsigned long)vaddr, order);
2283}
2284
12d4d40e 2285#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2286
2287void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2288 int nelems, int dir)
ba395927
KA
2289{
2290 int i;
2291 struct pci_dev *pdev = to_pci_dev(hwdev);
2292 struct dmar_domain *domain;
f76aec76
KA
2293 unsigned long start_addr;
2294 struct iova *iova;
2295 size_t size = 0;
2296 void *addr;
c03ab37c 2297 struct scatterlist *sg;
8c11e798 2298 struct intel_iommu *iommu;
ba395927 2299
358dd8ac 2300 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2301 return;
2302
2303 domain = find_domain(pdev);
8c11e798
WH
2304 BUG_ON(!domain);
2305
2306 iommu = domain_get_iommu(domain);
ba395927 2307
c03ab37c 2308 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2309 if (!iova)
2310 return;
c03ab37c 2311 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2312 addr = SG_ENT_VIRT_ADDRESS(sg);
2313 size += aligned_size((u64)addr, sg->length);
2314 }
2315
5b6985ce 2316 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2317
2318 /* clear the whole page */
2319 dma_pte_clear_range(domain, start_addr, start_addr + size);
2320 /* free page tables */
2321 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2322
8c11e798 2323 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2324 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2325 iommu_flush_write_buffer(iommu);
f76aec76
KA
2326
2327 /* free iova */
2328 __free_iova(&domain->iovad, iova);
ba395927
KA
2329}
2330
ba395927 2331static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2332 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2333{
2334 int i;
c03ab37c 2335 struct scatterlist *sg;
ba395927 2336
c03ab37c 2337 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2338 BUG_ON(!sg_page(sg));
c03ab37c
FT
2339 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2340 sg->dma_length = sg->length;
ba395927
KA
2341 }
2342 return nelems;
2343}
2344
5b6985ce
FY
2345int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2346 int dir)
ba395927
KA
2347{
2348 void *addr;
2349 int i;
ba395927
KA
2350 struct pci_dev *pdev = to_pci_dev(hwdev);
2351 struct dmar_domain *domain;
f76aec76
KA
2352 size_t size = 0;
2353 int prot = 0;
2354 size_t offset = 0;
2355 struct iova *iova = NULL;
2356 int ret;
c03ab37c 2357 struct scatterlist *sg;
f76aec76 2358 unsigned long start_addr;
8c11e798 2359 struct intel_iommu *iommu;
ba395927
KA
2360
2361 BUG_ON(dir == DMA_NONE);
358dd8ac 2362 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2363 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2364
f76aec76
KA
2365 domain = get_valid_domain_for_dev(pdev);
2366 if (!domain)
2367 return 0;
2368
8c11e798
WH
2369 iommu = domain_get_iommu(domain);
2370
c03ab37c 2371 for_each_sg(sglist, sg, nelems, i) {
ba395927 2372 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2373 addr = (void *)virt_to_phys(addr);
2374 size += aligned_size((u64)addr, sg->length);
2375 }
2376
bb9e6d65 2377 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2378 if (!iova) {
c03ab37c 2379 sglist->dma_length = 0;
f76aec76
KA
2380 return 0;
2381 }
2382
2383 /*
2384 * Check if DMAR supports zero-length reads on write only
2385 * mappings..
2386 */
2387 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2388 !cap_zlr(iommu->cap))
f76aec76
KA
2389 prot |= DMA_PTE_READ;
2390 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2391 prot |= DMA_PTE_WRITE;
2392
5b6985ce 2393 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2394 offset = 0;
c03ab37c 2395 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2396 addr = SG_ENT_VIRT_ADDRESS(sg);
2397 addr = (void *)virt_to_phys(addr);
2398 size = aligned_size((u64)addr, sg->length);
2399 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2400 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2401 size, prot);
2402 if (ret) {
2403 /* clear the page */
2404 dma_pte_clear_range(domain, start_addr,
2405 start_addr + offset);
2406 /* free page tables */
2407 dma_pte_free_pagetable(domain, start_addr,
2408 start_addr + offset);
2409 /* free iova */
2410 __free_iova(&domain->iovad, iova);
ba395927
KA
2411 return 0;
2412 }
f76aec76 2413 sg->dma_address = start_addr + offset +
5b6985ce 2414 ((u64)addr & (~PAGE_MASK));
ba395927 2415 sg->dma_length = sg->length;
f76aec76 2416 offset += size;
ba395927
KA
2417 }
2418
ba395927 2419 /* it's a non-present to present mapping */
8c11e798 2420 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2421 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2422 iommu_flush_write_buffer(iommu);
ba395927
KA
2423 return nelems;
2424}
2425
2426static struct dma_mapping_ops intel_dma_ops = {
2427 .alloc_coherent = intel_alloc_coherent,
2428 .free_coherent = intel_free_coherent,
2429 .map_single = intel_map_single,
2430 .unmap_single = intel_unmap_single,
2431 .map_sg = intel_map_sg,
2432 .unmap_sg = intel_unmap_sg,
2433};
2434
2435static inline int iommu_domain_cache_init(void)
2436{
2437 int ret = 0;
2438
2439 iommu_domain_cache = kmem_cache_create("iommu_domain",
2440 sizeof(struct dmar_domain),
2441 0,
2442 SLAB_HWCACHE_ALIGN,
2443
2444 NULL);
2445 if (!iommu_domain_cache) {
2446 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2447 ret = -ENOMEM;
2448 }
2449
2450 return ret;
2451}
2452
2453static inline int iommu_devinfo_cache_init(void)
2454{
2455 int ret = 0;
2456
2457 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2458 sizeof(struct device_domain_info),
2459 0,
2460 SLAB_HWCACHE_ALIGN,
ba395927
KA
2461 NULL);
2462 if (!iommu_devinfo_cache) {
2463 printk(KERN_ERR "Couldn't create devinfo cache\n");
2464 ret = -ENOMEM;
2465 }
2466
2467 return ret;
2468}
2469
2470static inline int iommu_iova_cache_init(void)
2471{
2472 int ret = 0;
2473
2474 iommu_iova_cache = kmem_cache_create("iommu_iova",
2475 sizeof(struct iova),
2476 0,
2477 SLAB_HWCACHE_ALIGN,
ba395927
KA
2478 NULL);
2479 if (!iommu_iova_cache) {
2480 printk(KERN_ERR "Couldn't create iova cache\n");
2481 ret = -ENOMEM;
2482 }
2483
2484 return ret;
2485}
2486
2487static int __init iommu_init_mempool(void)
2488{
2489 int ret;
2490 ret = iommu_iova_cache_init();
2491 if (ret)
2492 return ret;
2493
2494 ret = iommu_domain_cache_init();
2495 if (ret)
2496 goto domain_error;
2497
2498 ret = iommu_devinfo_cache_init();
2499 if (!ret)
2500 return ret;
2501
2502 kmem_cache_destroy(iommu_domain_cache);
2503domain_error:
2504 kmem_cache_destroy(iommu_iova_cache);
2505
2506 return -ENOMEM;
2507}
2508
2509static void __init iommu_exit_mempool(void)
2510{
2511 kmem_cache_destroy(iommu_devinfo_cache);
2512 kmem_cache_destroy(iommu_domain_cache);
2513 kmem_cache_destroy(iommu_iova_cache);
2514
2515}
2516
ba395927
KA
2517static void __init init_no_remapping_devices(void)
2518{
2519 struct dmar_drhd_unit *drhd;
2520
2521 for_each_drhd_unit(drhd) {
2522 if (!drhd->include_all) {
2523 int i;
2524 for (i = 0; i < drhd->devices_cnt; i++)
2525 if (drhd->devices[i] != NULL)
2526 break;
2527 /* ignore DMAR unit if no pci devices exist */
2528 if (i == drhd->devices_cnt)
2529 drhd->ignored = 1;
2530 }
2531 }
2532
2533 if (dmar_map_gfx)
2534 return;
2535
2536 for_each_drhd_unit(drhd) {
2537 int i;
2538 if (drhd->ignored || drhd->include_all)
2539 continue;
2540
2541 for (i = 0; i < drhd->devices_cnt; i++)
2542 if (drhd->devices[i] &&
2543 !IS_GFX_DEVICE(drhd->devices[i]))
2544 break;
2545
2546 if (i < drhd->devices_cnt)
2547 continue;
2548
2549 /* bypass IOMMU if it is just for gfx devices */
2550 drhd->ignored = 1;
2551 for (i = 0; i < drhd->devices_cnt; i++) {
2552 if (!drhd->devices[i])
2553 continue;
358dd8ac 2554 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2555 }
2556 }
2557}
2558
2559int __init intel_iommu_init(void)
2560{
2561 int ret = 0;
2562
ba395927
KA
2563 if (dmar_table_init())
2564 return -ENODEV;
2565
1886e8a9
SS
2566 if (dmar_dev_scope_init())
2567 return -ENODEV;
2568
2ae21010
SS
2569 /*
2570 * Check the need for DMA-remapping initialization now.
2571 * Above initialization will also be used by Interrupt-remapping.
2572 */
2573 if (no_iommu || swiotlb || dmar_disabled)
2574 return -ENODEV;
2575
ba395927
KA
2576 iommu_init_mempool();
2577 dmar_init_reserved_ranges();
2578
2579 init_no_remapping_devices();
2580
2581 ret = init_dmars();
2582 if (ret) {
2583 printk(KERN_ERR "IOMMU: dmar init failed\n");
2584 put_iova_domain(&reserved_iova_list);
2585 iommu_exit_mempool();
2586 return ret;
2587 }
2588 printk(KERN_INFO
2589 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2590
5e0d2a6f 2591 init_timer(&unmap_timer);
ba395927
KA
2592 force_iommu = 1;
2593 dma_ops = &intel_dma_ops;
2594 return 0;
2595}
e820482c 2596
38717946
KA
2597void intel_iommu_domain_exit(struct dmar_domain *domain)
2598{
2599 u64 end;
2600
2601 /* Domain 0 is reserved, so dont process it */
2602 if (!domain)
2603 return;
2604
2605 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2606 end = end & (~VTD_PAGE_MASK);
38717946
KA
2607
2608 /* clear ptes */
2609 dma_pte_clear_range(domain, 0, end);
2610
2611 /* free page tables */
2612 dma_pte_free_pagetable(domain, 0, end);
2613
2614 iommu_free_domain(domain);
2615 free_domain_mem(domain);
2616}
2617EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2618
2619struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2620{
2621 struct dmar_drhd_unit *drhd;
2622 struct dmar_domain *domain;
2623 struct intel_iommu *iommu;
2624
2625 drhd = dmar_find_matched_drhd_unit(pdev);
2626 if (!drhd) {
2627 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2628 return NULL;
2629 }
2630
2631 iommu = drhd->iommu;
2632 if (!iommu) {
2633 printk(KERN_ERR
2634 "intel_iommu_domain_alloc: iommu == NULL\n");
2635 return NULL;
2636 }
2637 domain = iommu_alloc_domain(iommu);
2638 if (!domain) {
2639 printk(KERN_ERR
2640 "intel_iommu_domain_alloc: domain == NULL\n");
2641 return NULL;
2642 }
2643 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2644 printk(KERN_ERR
2645 "intel_iommu_domain_alloc: domain_init() failed\n");
2646 intel_iommu_domain_exit(domain);
2647 return NULL;
2648 }
2649 return domain;
2650}
2651EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2652
2653int intel_iommu_context_mapping(
2654 struct dmar_domain *domain, struct pci_dev *pdev)
2655{
2656 int rc;
2657 rc = domain_context_mapping(domain, pdev);
2658 return rc;
2659}
2660EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2661
2662int intel_iommu_page_mapping(
2663 struct dmar_domain *domain, dma_addr_t iova,
2664 u64 hpa, size_t size, int prot)
2665{
2666 int rc;
2667 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2668 return rc;
2669}
2670EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2671
2672void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2673{
2674 detach_domain_for_dev(domain, bus, devfn);
2675}
2676EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2677
2678struct dmar_domain *
2679intel_iommu_find_domain(struct pci_dev *pdev)
2680{
2681 return find_domain(pdev);
2682}
2683EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2684
2685int intel_iommu_found(void)
2686{
2687 return g_num_of_iommus;
2688}
2689EXPORT_SYMBOL_GPL(intel_iommu_found);
2690
2691u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2692{
2693 struct dma_pte *pte;
2694 u64 pfn;
2695
2696 pfn = 0;
2697 pte = addr_to_dma_pte(domain, iova);
2698
2699 if (pte)
19c239ce 2700 pfn = dma_pte_addr(pte);
38717946 2701
5b6985ce 2702 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2703}
2704EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);