calculate agaw for each iommu
[linux-2.6-block.git] / drivers / pci / intel-iommu.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
35#include <linux/timer.h>
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
38#include <asm/cacheflush.h>
39#include <asm/iommu.h>
40#include "pci.h"
41
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
59
60/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
63/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
96/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
111
112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
126#define CONTEXT_TT_MULTI_LEVEL 0
127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
158
159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
170
171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
205
206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
209struct dmar_domain {
210 int id; /* domain id */
211 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
212
213 struct list_head devices; /* all devices' list */
214 struct iova_domain iovad; /* iova's that belong to this domain */
215
216 struct dma_pte *pgd; /* virtual address */
217 spinlock_t mapping_lock; /* page table lock */
218 int gaw; /* max guest address width */
219
220 /* adjusted guest address width, 0 is level 2 30-bit */
221 int agaw;
222
223 int flags; /* flags to find out type of domain */
224};
225
226/* PCI domain-device relationship */
227struct device_domain_info {
228 struct list_head link; /* link to domain siblings */
229 struct list_head global; /* link to global list */
230 u8 bus; /* PCI bus numer */
231 u8 devfn; /* PCI devfn number */
232 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
233 struct dmar_domain *domain; /* pointer to domain */
234};
235
236static void flush_unmaps_timeout(unsigned long data);
237
238DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
239
240#define HIGH_WATER_MARK 250
241struct deferred_flush_tables {
242 int next;
243 struct iova *iova[HIGH_WATER_MARK];
244 struct dmar_domain *domain[HIGH_WATER_MARK];
245};
246
247static struct deferred_flush_tables *deferred_flush;
248
249/* bitmap for indexing intel_iommus */
250static int g_num_of_iommus;
251
252static DEFINE_SPINLOCK(async_umap_flush_lock);
253static LIST_HEAD(unmaps_to_do);
254
255static int timer_on;
256static long list_size;
257
258static void domain_remove_dev_info(struct dmar_domain *domain);
259
260int dmar_disabled;
261static int __initdata dmar_map_gfx = 1;
262static int dmar_forcedac;
263static int intel_iommu_strict;
264
265#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
266static DEFINE_SPINLOCK(device_domain_lock);
267static LIST_HEAD(device_domain_list);
268
269static int __init intel_iommu_setup(char *str)
270{
271 if (!str)
272 return -EINVAL;
273 while (*str) {
274 if (!strncmp(str, "off", 3)) {
275 dmar_disabled = 1;
276 printk(KERN_INFO"Intel-IOMMU: disabled\n");
277 } else if (!strncmp(str, "igfx_off", 8)) {
278 dmar_map_gfx = 0;
279 printk(KERN_INFO
280 "Intel-IOMMU: disable GFX device mapping\n");
281 } else if (!strncmp(str, "forcedac", 8)) {
282 printk(KERN_INFO
283 "Intel-IOMMU: Forcing DAC for PCI devices\n");
284 dmar_forcedac = 1;
285 } else if (!strncmp(str, "strict", 6)) {
286 printk(KERN_INFO
287 "Intel-IOMMU: disable batched IOTLB flush\n");
288 intel_iommu_strict = 1;
289 }
290
291 str += strcspn(str, ",");
292 while (*str == ',')
293 str++;
294 }
295 return 0;
296}
297__setup("intel_iommu=", intel_iommu_setup);
298
299static struct kmem_cache *iommu_domain_cache;
300static struct kmem_cache *iommu_devinfo_cache;
301static struct kmem_cache *iommu_iova_cache;
302
303static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
304{
305 unsigned int flags;
306 void *vaddr;
307
308 /* trying to avoid low memory issues */
309 flags = current->flags & PF_MEMALLOC;
310 current->flags |= PF_MEMALLOC;
311 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
312 current->flags &= (~PF_MEMALLOC | flags);
313 return vaddr;
314}
315
316
317static inline void *alloc_pgtable_page(void)
318{
319 unsigned int flags;
320 void *vaddr;
321
322 /* trying to avoid low memory issues */
323 flags = current->flags & PF_MEMALLOC;
324 current->flags |= PF_MEMALLOC;
325 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
326 current->flags &= (~PF_MEMALLOC | flags);
327 return vaddr;
328}
329
330static inline void free_pgtable_page(void *vaddr)
331{
332 free_page((unsigned long)vaddr);
333}
334
335static inline void *alloc_domain_mem(void)
336{
337 return iommu_kmem_cache_alloc(iommu_domain_cache);
338}
339
340static void free_domain_mem(void *vaddr)
341{
342 kmem_cache_free(iommu_domain_cache, vaddr);
343}
344
345static inline void * alloc_devinfo_mem(void)
346{
347 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
348}
349
350static inline void free_devinfo_mem(void *vaddr)
351{
352 kmem_cache_free(iommu_devinfo_cache, vaddr);
353}
354
355struct iova *alloc_iova_mem(void)
356{
357 return iommu_kmem_cache_alloc(iommu_iova_cache);
358}
359
360void free_iova_mem(struct iova *iova)
361{
362 kmem_cache_free(iommu_iova_cache, iova);
363}
364
365
366static inline int width_to_agaw(int width);
367
368/* calculate agaw for each iommu.
369 * "SAGAW" may be different across iommus, use a default agaw, and
370 * get a supported less agaw for iommus that don't support the default agaw.
371 */
372int iommu_calculate_agaw(struct intel_iommu *iommu)
373{
374 unsigned long sagaw;
375 int agaw = -1;
376
377 sagaw = cap_sagaw(iommu->cap);
378 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
379 agaw >= 0; agaw--) {
380 if (test_bit(agaw, &sagaw))
381 break;
382 }
383
384 return agaw;
385}
386
387/* in native case, each domain is related to only one iommu */
388static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
389{
390 int iommu_id;
391
392 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
393 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
394 return NULL;
395
396 return g_iommus[iommu_id];
397}
398
399/* Gets context entry for a given bus and devfn */
400static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
401 u8 bus, u8 devfn)
402{
403 struct root_entry *root;
404 struct context_entry *context;
405 unsigned long phy_addr;
406 unsigned long flags;
407
408 spin_lock_irqsave(&iommu->lock, flags);
409 root = &iommu->root_entry[bus];
410 context = get_context_addr_from_root(root);
411 if (!context) {
412 context = (struct context_entry *)alloc_pgtable_page();
413 if (!context) {
414 spin_unlock_irqrestore(&iommu->lock, flags);
415 return NULL;
416 }
417 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
418 phy_addr = virt_to_phys((void *)context);
419 set_root_value(root, phy_addr);
420 set_root_present(root);
421 __iommu_flush_cache(iommu, root, sizeof(*root));
422 }
423 spin_unlock_irqrestore(&iommu->lock, flags);
424 return &context[devfn];
425}
426
427static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
428{
429 struct root_entry *root;
430 struct context_entry *context;
431 int ret;
432 unsigned long flags;
433
434 spin_lock_irqsave(&iommu->lock, flags);
435 root = &iommu->root_entry[bus];
436 context = get_context_addr_from_root(root);
437 if (!context) {
438 ret = 0;
439 goto out;
440 }
441 ret = context_present(&context[devfn]);
442out:
443 spin_unlock_irqrestore(&iommu->lock, flags);
444 return ret;
445}
446
447static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
448{
449 struct root_entry *root;
450 struct context_entry *context;
451 unsigned long flags;
452
453 spin_lock_irqsave(&iommu->lock, flags);
454 root = &iommu->root_entry[bus];
455 context = get_context_addr_from_root(root);
456 if (context) {
457 context_clear_entry(&context[devfn]);
458 __iommu_flush_cache(iommu, &context[devfn], \
459 sizeof(*context));
460 }
461 spin_unlock_irqrestore(&iommu->lock, flags);
462}
463
464static void free_context_table(struct intel_iommu *iommu)
465{
466 struct root_entry *root;
467 int i;
468 unsigned long flags;
469 struct context_entry *context;
470
471 spin_lock_irqsave(&iommu->lock, flags);
472 if (!iommu->root_entry) {
473 goto out;
474 }
475 for (i = 0; i < ROOT_ENTRY_NR; i++) {
476 root = &iommu->root_entry[i];
477 context = get_context_addr_from_root(root);
478 if (context)
479 free_pgtable_page(context);
480 }
481 free_pgtable_page(iommu->root_entry);
482 iommu->root_entry = NULL;
483out:
484 spin_unlock_irqrestore(&iommu->lock, flags);
485}
486
487/* page table handling */
488#define LEVEL_STRIDE (9)
489#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
490
491static inline int agaw_to_level(int agaw)
492{
493 return agaw + 2;
494}
495
496static inline int agaw_to_width(int agaw)
497{
498 return 30 + agaw * LEVEL_STRIDE;
499
500}
501
502static inline int width_to_agaw(int width)
503{
504 return (width - 30) / LEVEL_STRIDE;
505}
506
507static inline unsigned int level_to_offset_bits(int level)
508{
509 return (12 + (level - 1) * LEVEL_STRIDE);
510}
511
512static inline int address_level_offset(u64 addr, int level)
513{
514 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
515}
516
517static inline u64 level_mask(int level)
518{
519 return ((u64)-1 << level_to_offset_bits(level));
520}
521
522static inline u64 level_size(int level)
523{
524 return ((u64)1 << level_to_offset_bits(level));
525}
526
527static inline u64 align_to_level(u64 addr, int level)
528{
529 return ((addr + level_size(level) - 1) & level_mask(level));
530}
531
532static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
533{
534 int addr_width = agaw_to_width(domain->agaw);
535 struct dma_pte *parent, *pte = NULL;
536 int level = agaw_to_level(domain->agaw);
537 int offset;
538 unsigned long flags;
539 struct intel_iommu *iommu = domain_get_iommu(domain);
540
541 BUG_ON(!domain->pgd);
542
543 addr &= (((u64)1) << addr_width) - 1;
544 parent = domain->pgd;
545
546 spin_lock_irqsave(&domain->mapping_lock, flags);
547 while (level > 0) {
548 void *tmp_page;
549
550 offset = address_level_offset(addr, level);
551 pte = &parent[offset];
552 if (level == 1)
553 break;
554
555 if (!dma_pte_present(pte)) {
556 tmp_page = alloc_pgtable_page();
557
558 if (!tmp_page) {
559 spin_unlock_irqrestore(&domain->mapping_lock,
560 flags);
561 return NULL;
562 }
563 __iommu_flush_cache(iommu, tmp_page,
564 PAGE_SIZE);
565 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
566 /*
567 * high level table always sets r/w, last level page
568 * table control read/write
569 */
570 dma_set_pte_readable(pte);
571 dma_set_pte_writable(pte);
572 __iommu_flush_cache(iommu, pte, sizeof(*pte));
573 }
574 parent = phys_to_virt(dma_pte_addr(pte));
575 level--;
576 }
577
578 spin_unlock_irqrestore(&domain->mapping_lock, flags);
579 return pte;
580}
581
582/* return address's pte at specific level */
583static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
584 int level)
585{
586 struct dma_pte *parent, *pte = NULL;
587 int total = agaw_to_level(domain->agaw);
588 int offset;
589
590 parent = domain->pgd;
591 while (level <= total) {
592 offset = address_level_offset(addr, total);
593 pte = &parent[offset];
594 if (level == total)
595 return pte;
596
597 if (!dma_pte_present(pte))
598 break;
599 parent = phys_to_virt(dma_pte_addr(pte));
600 total--;
601 }
602 return NULL;
603}
604
605/* clear one page's page table */
606static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
607{
608 struct dma_pte *pte = NULL;
609 struct intel_iommu *iommu = domain_get_iommu(domain);
610
611 /* get last level pte */
612 pte = dma_addr_level_pte(domain, addr, 1);
613
614 if (pte) {
615 dma_clear_pte(pte);
616 __iommu_flush_cache(iommu, pte, sizeof(*pte));
617 }
618}
619
620/* clear last level pte, a tlb flush should be followed */
621static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
622{
623 int addr_width = agaw_to_width(domain->agaw);
624
625 start &= (((u64)1) << addr_width) - 1;
626 end &= (((u64)1) << addr_width) - 1;
627 /* in case it's partial page */
628 start = PAGE_ALIGN(start);
629 end &= PAGE_MASK;
630
631 /* we don't need lock here, nobody else touches the iova range */
632 while (start < end) {
633 dma_pte_clear_one(domain, start);
634 start += VTD_PAGE_SIZE;
635 }
636}
637
638/* free page table pages. last level pte should already be cleared */
639static void dma_pte_free_pagetable(struct dmar_domain *domain,
640 u64 start, u64 end)
641{
642 int addr_width = agaw_to_width(domain->agaw);
643 struct dma_pte *pte;
644 int total = agaw_to_level(domain->agaw);
645 int level;
646 u64 tmp;
647 struct intel_iommu *iommu = domain_get_iommu(domain);
648
649 start &= (((u64)1) << addr_width) - 1;
650 end &= (((u64)1) << addr_width) - 1;
651
652 /* we don't need lock here, nobody else touches the iova range */
653 level = 2;
654 while (level <= total) {
655 tmp = align_to_level(start, level);
656 if (tmp >= end || (tmp + level_size(level) > end))
657 return;
658
659 while (tmp < end) {
660 pte = dma_addr_level_pte(domain, tmp, level);
661 if (pte) {
662 free_pgtable_page(
663 phys_to_virt(dma_pte_addr(pte)));
664 dma_clear_pte(pte);
665 __iommu_flush_cache(iommu,
666 pte, sizeof(*pte));
667 }
668 tmp += level_size(level);
669 }
670 level++;
671 }
672 /* free pgd */
673 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
674 free_pgtable_page(domain->pgd);
675 domain->pgd = NULL;
676 }
677}
678
679/* iommu handling */
680static int iommu_alloc_root_entry(struct intel_iommu *iommu)
681{
682 struct root_entry *root;
683 unsigned long flags;
684
685 root = (struct root_entry *)alloc_pgtable_page();
686 if (!root)
687 return -ENOMEM;
688
689 __iommu_flush_cache(iommu, root, ROOT_SIZE);
690
691 spin_lock_irqsave(&iommu->lock, flags);
692 iommu->root_entry = root;
693 spin_unlock_irqrestore(&iommu->lock, flags);
694
695 return 0;
696}
697
698static void iommu_set_root_entry(struct intel_iommu *iommu)
699{
700 void *addr;
701 u32 cmd, sts;
702 unsigned long flag;
703
704 addr = iommu->root_entry;
705
706 spin_lock_irqsave(&iommu->register_lock, flag);
707 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
708
709 cmd = iommu->gcmd | DMA_GCMD_SRTP;
710 writel(cmd, iommu->reg + DMAR_GCMD_REG);
711
712 /* Make sure hardware complete it */
713 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
714 readl, (sts & DMA_GSTS_RTPS), sts);
715
716 spin_unlock_irqrestore(&iommu->register_lock, flag);
717}
718
719static void iommu_flush_write_buffer(struct intel_iommu *iommu)
720{
721 u32 val;
722 unsigned long flag;
723
724 if (!cap_rwbf(iommu->cap))
725 return;
726 val = iommu->gcmd | DMA_GCMD_WBF;
727
728 spin_lock_irqsave(&iommu->register_lock, flag);
729 writel(val, iommu->reg + DMAR_GCMD_REG);
730
731 /* Make sure hardware complete it */
732 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
733 readl, (!(val & DMA_GSTS_WBFS)), val);
734
735 spin_unlock_irqrestore(&iommu->register_lock, flag);
736}
737
738/* return value determine if we need a write buffer flush */
739static int __iommu_flush_context(struct intel_iommu *iommu,
740 u16 did, u16 source_id, u8 function_mask, u64 type,
741 int non_present_entry_flush)
742{
743 u64 val = 0;
744 unsigned long flag;
745
746 /*
747 * In the non-present entry flush case, if hardware doesn't cache
748 * non-present entry we do nothing and if hardware cache non-present
749 * entry, we flush entries of domain 0 (the domain id is used to cache
750 * any non-present entries)
751 */
752 if (non_present_entry_flush) {
753 if (!cap_caching_mode(iommu->cap))
754 return 1;
755 else
756 did = 0;
757 }
758
759 switch (type) {
760 case DMA_CCMD_GLOBAL_INVL:
761 val = DMA_CCMD_GLOBAL_INVL;
762 break;
763 case DMA_CCMD_DOMAIN_INVL:
764 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
765 break;
766 case DMA_CCMD_DEVICE_INVL:
767 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
768 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
769 break;
770 default:
771 BUG();
772 }
773 val |= DMA_CCMD_ICC;
774
775 spin_lock_irqsave(&iommu->register_lock, flag);
776 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
777
778 /* Make sure hardware complete it */
779 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
780 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
781
782 spin_unlock_irqrestore(&iommu->register_lock, flag);
783
784 /* flush context entry will implicitly flush write buffer */
785 return 0;
786}
787
788/* return value determine if we need a write buffer flush */
789static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
790 u64 addr, unsigned int size_order, u64 type,
791 int non_present_entry_flush)
792{
793 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
794 u64 val = 0, val_iva = 0;
795 unsigned long flag;
796
797 /*
798 * In the non-present entry flush case, if hardware doesn't cache
799 * non-present entry we do nothing and if hardware cache non-present
800 * entry, we flush entries of domain 0 (the domain id is used to cache
801 * any non-present entries)
802 */
803 if (non_present_entry_flush) {
804 if (!cap_caching_mode(iommu->cap))
805 return 1;
806 else
807 did = 0;
808 }
809
810 switch (type) {
811 case DMA_TLB_GLOBAL_FLUSH:
812 /* global flush doesn't need set IVA_REG */
813 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
814 break;
815 case DMA_TLB_DSI_FLUSH:
816 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
817 break;
818 case DMA_TLB_PSI_FLUSH:
819 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
820 /* Note: always flush non-leaf currently */
821 val_iva = size_order | addr;
822 break;
823 default:
824 BUG();
825 }
826 /* Note: set drain read/write */
827#if 0
828 /*
829 * This is probably to be super secure.. Looks like we can
830 * ignore it without any impact.
831 */
832 if (cap_read_drain(iommu->cap))
833 val |= DMA_TLB_READ_DRAIN;
834#endif
835 if (cap_write_drain(iommu->cap))
836 val |= DMA_TLB_WRITE_DRAIN;
837
838 spin_lock_irqsave(&iommu->register_lock, flag);
839 /* Note: Only uses first TLB reg currently */
840 if (val_iva)
841 dmar_writeq(iommu->reg + tlb_offset, val_iva);
842 dmar_writeq(iommu->reg + tlb_offset + 8, val);
843
844 /* Make sure hardware complete it */
845 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
846 dmar_readq, (!(val & DMA_TLB_IVT)), val);
847
848 spin_unlock_irqrestore(&iommu->register_lock, flag);
849
850 /* check IOTLB invalidation granularity */
851 if (DMA_TLB_IAIG(val) == 0)
852 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
853 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
854 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
855 (unsigned long long)DMA_TLB_IIRG(type),
856 (unsigned long long)DMA_TLB_IAIG(val));
857 /* flush iotlb entry will implicitly flush write buffer */
858 return 0;
859}
860
861static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
862 u64 addr, unsigned int pages, int non_present_entry_flush)
863{
864 unsigned int mask;
865
866 BUG_ON(addr & (~VTD_PAGE_MASK));
867 BUG_ON(pages == 0);
868
869 /* Fallback to domain selective flush if no PSI support */
870 if (!cap_pgsel_inv(iommu->cap))
871 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
872 DMA_TLB_DSI_FLUSH,
873 non_present_entry_flush);
874
875 /*
876 * PSI requires page size to be 2 ^ x, and the base address is naturally
877 * aligned to the size
878 */
879 mask = ilog2(__roundup_pow_of_two(pages));
880 /* Fallback to domain selective flush if size is too big */
881 if (mask > cap_max_amask_val(iommu->cap))
882 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
883 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
884
885 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
886 DMA_TLB_PSI_FLUSH,
887 non_present_entry_flush);
888}
889
890static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
891{
892 u32 pmen;
893 unsigned long flags;
894
895 spin_lock_irqsave(&iommu->register_lock, flags);
896 pmen = readl(iommu->reg + DMAR_PMEN_REG);
897 pmen &= ~DMA_PMEN_EPM;
898 writel(pmen, iommu->reg + DMAR_PMEN_REG);
899
900 /* wait for the protected region status bit to clear */
901 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
902 readl, !(pmen & DMA_PMEN_PRS), pmen);
903
904 spin_unlock_irqrestore(&iommu->register_lock, flags);
905}
906
907static int iommu_enable_translation(struct intel_iommu *iommu)
908{
909 u32 sts;
910 unsigned long flags;
911
912 spin_lock_irqsave(&iommu->register_lock, flags);
913 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
914
915 /* Make sure hardware complete it */
916 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
917 readl, (sts & DMA_GSTS_TES), sts);
918
919 iommu->gcmd |= DMA_GCMD_TE;
920 spin_unlock_irqrestore(&iommu->register_lock, flags);
921 return 0;
922}
923
924static int iommu_disable_translation(struct intel_iommu *iommu)
925{
926 u32 sts;
927 unsigned long flag;
928
929 spin_lock_irqsave(&iommu->register_lock, flag);
930 iommu->gcmd &= ~DMA_GCMD_TE;
931 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
932
933 /* Make sure hardware complete it */
934 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
935 readl, (!(sts & DMA_GSTS_TES)), sts);
936
937 spin_unlock_irqrestore(&iommu->register_lock, flag);
938 return 0;
939}
940
941/* iommu interrupt handling. Most stuff are MSI-like. */
942
943static const char *fault_reason_strings[] =
944{
945 "Software",
946 "Present bit in root entry is clear",
947 "Present bit in context entry is clear",
948 "Invalid context entry",
949 "Access beyond MGAW",
950 "PTE Write access is not set",
951 "PTE Read access is not set",
952 "Next page table ptr is invalid",
953 "Root table address invalid",
954 "Context table ptr is invalid",
955 "non-zero reserved fields in RTP",
956 "non-zero reserved fields in CTP",
957 "non-zero reserved fields in PTE",
958};
959#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
960
961const char *dmar_get_fault_reason(u8 fault_reason)
962{
963 if (fault_reason > MAX_FAULT_REASON_IDX)
964 return "Unknown";
965 else
966 return fault_reason_strings[fault_reason];
967}
968
969void dmar_msi_unmask(unsigned int irq)
970{
971 struct intel_iommu *iommu = get_irq_data(irq);
972 unsigned long flag;
973
974 /* unmask it */
975 spin_lock_irqsave(&iommu->register_lock, flag);
976 writel(0, iommu->reg + DMAR_FECTL_REG);
977 /* Read a reg to force flush the post write */
978 readl(iommu->reg + DMAR_FECTL_REG);
979 spin_unlock_irqrestore(&iommu->register_lock, flag);
980}
981
982void dmar_msi_mask(unsigned int irq)
983{
984 unsigned long flag;
985 struct intel_iommu *iommu = get_irq_data(irq);
986
987 /* mask it */
988 spin_lock_irqsave(&iommu->register_lock, flag);
989 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
990 /* Read a reg to force flush the post write */
991 readl(iommu->reg + DMAR_FECTL_REG);
992 spin_unlock_irqrestore(&iommu->register_lock, flag);
993}
994
995void dmar_msi_write(int irq, struct msi_msg *msg)
996{
997 struct intel_iommu *iommu = get_irq_data(irq);
998 unsigned long flag;
999
1000 spin_lock_irqsave(&iommu->register_lock, flag);
1001 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1002 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1003 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1004 spin_unlock_irqrestore(&iommu->register_lock, flag);
1005}
1006
1007void dmar_msi_read(int irq, struct msi_msg *msg)
1008{
1009 struct intel_iommu *iommu = get_irq_data(irq);
1010 unsigned long flag;
1011
1012 spin_lock_irqsave(&iommu->register_lock, flag);
1013 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1014 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1015 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1016 spin_unlock_irqrestore(&iommu->register_lock, flag);
1017}
1018
1019static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1020 u8 fault_reason, u16 source_id, unsigned long long addr)
1021{
1022 const char *reason;
1023
1024 reason = dmar_get_fault_reason(fault_reason);
1025
1026 printk(KERN_ERR
1027 "DMAR:[%s] Request device [%02x:%02x.%d] "
1028 "fault addr %llx \n"
1029 "DMAR:[fault reason %02d] %s\n",
1030 (type ? "DMA Read" : "DMA Write"),
1031 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1032 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1033 return 0;
1034}
1035
1036#define PRIMARY_FAULT_REG_LEN (16)
1037static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1038{
1039 struct intel_iommu *iommu = dev_id;
1040 int reg, fault_index;
1041 u32 fault_status;
1042 unsigned long flag;
1043
1044 spin_lock_irqsave(&iommu->register_lock, flag);
1045 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1046
1047 /* TBD: ignore advanced fault log currently */
1048 if (!(fault_status & DMA_FSTS_PPF))
1049 goto clear_overflow;
1050
1051 fault_index = dma_fsts_fault_record_index(fault_status);
1052 reg = cap_fault_reg_offset(iommu->cap);
1053 while (1) {
1054 u8 fault_reason;
1055 u16 source_id;
1056 u64 guest_addr;
1057 int type;
1058 u32 data;
1059
1060 /* highest 32 bits */
1061 data = readl(iommu->reg + reg +
1062 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1063 if (!(data & DMA_FRCD_F))
1064 break;
1065
1066 fault_reason = dma_frcd_fault_reason(data);
1067 type = dma_frcd_type(data);
1068
1069 data = readl(iommu->reg + reg +
1070 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1071 source_id = dma_frcd_source_id(data);
1072
1073 guest_addr = dmar_readq(iommu->reg + reg +
1074 fault_index * PRIMARY_FAULT_REG_LEN);
1075 guest_addr = dma_frcd_page_addr(guest_addr);
1076 /* clear the fault */
1077 writel(DMA_FRCD_F, iommu->reg + reg +
1078 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1079
1080 spin_unlock_irqrestore(&iommu->register_lock, flag);
1081
1082 iommu_page_fault_do_one(iommu, type, fault_reason,
1083 source_id, guest_addr);
1084
1085 fault_index++;
1086 if (fault_index > cap_num_fault_regs(iommu->cap))
1087 fault_index = 0;
1088 spin_lock_irqsave(&iommu->register_lock, flag);
1089 }
1090clear_overflow:
1091 /* clear primary fault overflow */
1092 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1093 if (fault_status & DMA_FSTS_PFO)
1094 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1095
1096 spin_unlock_irqrestore(&iommu->register_lock, flag);
1097 return IRQ_HANDLED;
1098}
1099
1100int dmar_set_interrupt(struct intel_iommu *iommu)
1101{
1102 int irq, ret;
1103
1104 irq = create_irq();
1105 if (!irq) {
1106 printk(KERN_ERR "IOMMU: no free vectors\n");
1107 return -EINVAL;
1108 }
1109
1110 set_irq_data(irq, iommu);
1111 iommu->irq = irq;
1112
1113 ret = arch_setup_dmar_msi(irq);
1114 if (ret) {
1115 set_irq_data(irq, NULL);
1116 iommu->irq = 0;
1117 destroy_irq(irq);
1118 return 0;
1119 }
1120
1121 /* Force fault register is cleared */
1122 iommu_page_fault(irq, iommu);
1123
1124 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1125 if (ret)
1126 printk(KERN_ERR "IOMMU: can't request irq\n");
1127 return ret;
1128}
1129
1130static int iommu_init_domains(struct intel_iommu *iommu)
1131{
1132 unsigned long ndomains;
1133 unsigned long nlongs;
1134
1135 ndomains = cap_ndoms(iommu->cap);
1136 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1137 nlongs = BITS_TO_LONGS(ndomains);
1138
1139 /* TBD: there might be 64K domains,
1140 * consider other allocation for future chip
1141 */
1142 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1143 if (!iommu->domain_ids) {
1144 printk(KERN_ERR "Allocating domain id array failed\n");
1145 return -ENOMEM;
1146 }
1147 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1148 GFP_KERNEL);
1149 if (!iommu->domains) {
1150 printk(KERN_ERR "Allocating domain array failed\n");
1151 kfree(iommu->domain_ids);
1152 return -ENOMEM;
1153 }
1154
1155 spin_lock_init(&iommu->lock);
1156
1157 /*
1158 * if Caching mode is set, then invalid translations are tagged
1159 * with domainid 0. Hence we need to pre-allocate it.
1160 */
1161 if (cap_caching_mode(iommu->cap))
1162 set_bit(0, iommu->domain_ids);
1163 return 0;
1164}
1165
1166
1167static void domain_exit(struct dmar_domain *domain);
1168
1169void free_dmar_iommu(struct intel_iommu *iommu)
1170{
1171 struct dmar_domain *domain;
1172 int i;
1173
1174 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1175 for (; i < cap_ndoms(iommu->cap); ) {
1176 domain = iommu->domains[i];
1177 clear_bit(i, iommu->domain_ids);
1178 domain_exit(domain);
1179 i = find_next_bit(iommu->domain_ids,
1180 cap_ndoms(iommu->cap), i+1);
1181 }
1182
1183 if (iommu->gcmd & DMA_GCMD_TE)
1184 iommu_disable_translation(iommu);
1185
1186 if (iommu->irq) {
1187 set_irq_data(iommu->irq, NULL);
1188 /* This will mask the irq */
1189 free_irq(iommu->irq, iommu);
1190 destroy_irq(iommu->irq);
1191 }
1192
1193 kfree(iommu->domains);
1194 kfree(iommu->domain_ids);
1195
1196 g_iommus[iommu->seq_id] = NULL;
1197
1198 /* if all iommus are freed, free g_iommus */
1199 for (i = 0; i < g_num_of_iommus; i++) {
1200 if (g_iommus[i])
1201 break;
1202 }
1203
1204 if (i == g_num_of_iommus)
1205 kfree(g_iommus);
1206
1207 /* free context mapping */
1208 free_context_table(iommu);
1209}
1210
1211static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1212{
1213 unsigned long num;
1214 unsigned long ndomains;
1215 struct dmar_domain *domain;
1216 unsigned long flags;
1217
1218 domain = alloc_domain_mem();
1219 if (!domain)
1220 return NULL;
1221
1222 ndomains = cap_ndoms(iommu->cap);
1223
1224 spin_lock_irqsave(&iommu->lock, flags);
1225 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1226 if (num >= ndomains) {
1227 spin_unlock_irqrestore(&iommu->lock, flags);
1228 free_domain_mem(domain);
1229 printk(KERN_ERR "IOMMU: no free domain ids\n");
1230 return NULL;
1231 }
1232
1233 set_bit(num, iommu->domain_ids);
1234 domain->id = num;
1235 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1236 set_bit(iommu->seq_id, &domain->iommu_bmp);
1237 domain->flags = 0;
1238 iommu->domains[num] = domain;
1239 spin_unlock_irqrestore(&iommu->lock, flags);
1240
1241 return domain;
1242}
1243
1244static void iommu_free_domain(struct dmar_domain *domain)
1245{
1246 unsigned long flags;
1247 struct intel_iommu *iommu;
1248
1249 iommu = domain_get_iommu(domain);
1250
1251 spin_lock_irqsave(&iommu->lock, flags);
1252 clear_bit(domain->id, iommu->domain_ids);
1253 spin_unlock_irqrestore(&iommu->lock, flags);
1254}
1255
1256static struct iova_domain reserved_iova_list;
1257static struct lock_class_key reserved_alloc_key;
1258static struct lock_class_key reserved_rbtree_key;
1259
1260static void dmar_init_reserved_ranges(void)
1261{
1262 struct pci_dev *pdev = NULL;
1263 struct iova *iova;
1264 int i;
1265 u64 addr, size;
1266
1267 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1268
1269 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1270 &reserved_alloc_key);
1271 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1272 &reserved_rbtree_key);
1273
1274 /* IOAPIC ranges shouldn't be accessed by DMA */
1275 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1276 IOVA_PFN(IOAPIC_RANGE_END));
1277 if (!iova)
1278 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1279
1280 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1281 for_each_pci_dev(pdev) {
1282 struct resource *r;
1283
1284 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1285 r = &pdev->resource[i];
1286 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1287 continue;
1288 addr = r->start;
1289 addr &= PAGE_MASK;
1290 size = r->end - addr;
1291 size = PAGE_ALIGN(size);
1292 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1293 IOVA_PFN(size + addr) - 1);
1294 if (!iova)
1295 printk(KERN_ERR "Reserve iova failed\n");
1296 }
1297 }
1298
1299}
1300
1301static void domain_reserve_special_ranges(struct dmar_domain *domain)
1302{
1303 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1304}
1305
1306static inline int guestwidth_to_adjustwidth(int gaw)
1307{
1308 int agaw;
1309 int r = (gaw - 12) % 9;
1310
1311 if (r == 0)
1312 agaw = gaw;
1313 else
1314 agaw = gaw + 9 - r;
1315 if (agaw > 64)
1316 agaw = 64;
1317 return agaw;
1318}
1319
1320static int domain_init(struct dmar_domain *domain, int guest_width)
1321{
1322 struct intel_iommu *iommu;
1323 int adjust_width, agaw;
1324 unsigned long sagaw;
1325
1326 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1327 spin_lock_init(&domain->mapping_lock);
1328
1329 domain_reserve_special_ranges(domain);
1330
1331 /* calculate AGAW */
1332 iommu = domain_get_iommu(domain);
1333 if (guest_width > cap_mgaw(iommu->cap))
1334 guest_width = cap_mgaw(iommu->cap);
1335 domain->gaw = guest_width;
1336 adjust_width = guestwidth_to_adjustwidth(guest_width);
1337 agaw = width_to_agaw(adjust_width);
1338 sagaw = cap_sagaw(iommu->cap);
1339 if (!test_bit(agaw, &sagaw)) {
1340 /* hardware doesn't support it, choose a bigger one */
1341 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1342 agaw = find_next_bit(&sagaw, 5, agaw);
1343 if (agaw >= 5)
1344 return -ENODEV;
1345 }
1346 domain->agaw = agaw;
1347 INIT_LIST_HEAD(&domain->devices);
1348
1349 /* always allocate the top pgd */
1350 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1351 if (!domain->pgd)
1352 return -ENOMEM;
1353 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1354 return 0;
1355}
1356
1357static void domain_exit(struct dmar_domain *domain)
1358{
1359 u64 end;
1360
1361 /* Domain 0 is reserved, so dont process it */
1362 if (!domain)
1363 return;
1364
1365 domain_remove_dev_info(domain);
1366 /* destroy iovas */
1367 put_iova_domain(&domain->iovad);
1368 end = DOMAIN_MAX_ADDR(domain->gaw);
1369 end = end & (~PAGE_MASK);
1370
1371 /* clear ptes */
1372 dma_pte_clear_range(domain, 0, end);
1373
1374 /* free page tables */
1375 dma_pte_free_pagetable(domain, 0, end);
1376
1377 iommu_free_domain(domain);
1378 free_domain_mem(domain);
1379}
1380
1381static int domain_context_mapping_one(struct dmar_domain *domain,
1382 u8 bus, u8 devfn)
1383{
1384 struct context_entry *context;
1385 struct intel_iommu *iommu = domain_get_iommu(domain);
1386 unsigned long flags;
1387
1388 pr_debug("Set context mapping for %02x:%02x.%d\n",
1389 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1390 BUG_ON(!domain->pgd);
1391 context = device_to_context_entry(iommu, bus, devfn);
1392 if (!context)
1393 return -ENOMEM;
1394 spin_lock_irqsave(&iommu->lock, flags);
1395 if (context_present(context)) {
1396 spin_unlock_irqrestore(&iommu->lock, flags);
1397 return 0;
1398 }
1399
1400 context_set_domain_id(context, domain->id);
1401 context_set_address_width(context, domain->agaw);
1402 context_set_address_root(context, virt_to_phys(domain->pgd));
1403 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1404 context_set_fault_enable(context);
1405 context_set_present(context);
1406 __iommu_flush_cache(iommu, context, sizeof(*context));
1407
1408 /* it's a non-present to present mapping */
1409 if (iommu->flush.flush_context(iommu, domain->id,
1410 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1411 DMA_CCMD_DEVICE_INVL, 1))
1412 iommu_flush_write_buffer(iommu);
1413 else
1414 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1415
1416 spin_unlock_irqrestore(&iommu->lock, flags);
1417 return 0;
1418}
1419
1420static int
1421domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1422{
1423 int ret;
1424 struct pci_dev *tmp, *parent;
1425
1426 ret = domain_context_mapping_one(domain, pdev->bus->number,
1427 pdev->devfn);
1428 if (ret)
1429 return ret;
1430
1431 /* dependent device mapping */
1432 tmp = pci_find_upstream_pcie_bridge(pdev);
1433 if (!tmp)
1434 return 0;
1435 /* Secondary interface's bus number and devfn 0 */
1436 parent = pdev->bus->self;
1437 while (parent != tmp) {
1438 ret = domain_context_mapping_one(domain, parent->bus->number,
1439 parent->devfn);
1440 if (ret)
1441 return ret;
1442 parent = parent->bus->self;
1443 }
1444 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1445 return domain_context_mapping_one(domain,
1446 tmp->subordinate->number, 0);
1447 else /* this is a legacy PCI bridge */
1448 return domain_context_mapping_one(domain,
1449 tmp->bus->number, tmp->devfn);
1450}
1451
1452static int domain_context_mapped(struct dmar_domain *domain,
1453 struct pci_dev *pdev)
1454{
1455 int ret;
1456 struct pci_dev *tmp, *parent;
1457 struct intel_iommu *iommu = domain_get_iommu(domain);
1458
1459 ret = device_context_mapped(iommu,
1460 pdev->bus->number, pdev->devfn);
1461 if (!ret)
1462 return ret;
1463 /* dependent device mapping */
1464 tmp = pci_find_upstream_pcie_bridge(pdev);
1465 if (!tmp)
1466 return ret;
1467 /* Secondary interface's bus number and devfn 0 */
1468 parent = pdev->bus->self;
1469 while (parent != tmp) {
1470 ret = device_context_mapped(iommu, parent->bus->number,
1471 parent->devfn);
1472 if (!ret)
1473 return ret;
1474 parent = parent->bus->self;
1475 }
1476 if (tmp->is_pcie)
1477 return device_context_mapped(iommu,
1478 tmp->subordinate->number, 0);
1479 else
1480 return device_context_mapped(iommu,
1481 tmp->bus->number, tmp->devfn);
1482}
1483
1484static int
1485domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1486 u64 hpa, size_t size, int prot)
1487{
1488 u64 start_pfn, end_pfn;
1489 struct dma_pte *pte;
1490 int index;
1491 int addr_width = agaw_to_width(domain->agaw);
1492 struct intel_iommu *iommu = domain_get_iommu(domain);
1493
1494 hpa &= (((u64)1) << addr_width) - 1;
1495
1496 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1497 return -EINVAL;
1498 iova &= PAGE_MASK;
1499 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1500 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1501 index = 0;
1502 while (start_pfn < end_pfn) {
1503 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1504 if (!pte)
1505 return -ENOMEM;
1506 /* We don't need lock here, nobody else
1507 * touches the iova range
1508 */
1509 BUG_ON(dma_pte_addr(pte));
1510 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1511 dma_set_pte_prot(pte, prot);
1512 __iommu_flush_cache(iommu, pte, sizeof(*pte));
1513 start_pfn++;
1514 index++;
1515 }
1516 return 0;
1517}
1518
1519static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1520{
1521 struct intel_iommu *iommu = domain_get_iommu(domain);
1522
1523 clear_context_table(iommu, bus, devfn);
1524 iommu->flush.flush_context(iommu, 0, 0, 0,
1525 DMA_CCMD_GLOBAL_INVL, 0);
1526 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1527 DMA_TLB_GLOBAL_FLUSH, 0);
1528}
1529
1530static void domain_remove_dev_info(struct dmar_domain *domain)
1531{
1532 struct device_domain_info *info;
1533 unsigned long flags;
1534
1535 spin_lock_irqsave(&device_domain_lock, flags);
1536 while (!list_empty(&domain->devices)) {
1537 info = list_entry(domain->devices.next,
1538 struct device_domain_info, link);
1539 list_del(&info->link);
1540 list_del(&info->global);
1541 if (info->dev)
1542 info->dev->dev.archdata.iommu = NULL;
1543 spin_unlock_irqrestore(&device_domain_lock, flags);
1544
1545 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1546 free_devinfo_mem(info);
1547
1548 spin_lock_irqsave(&device_domain_lock, flags);
1549 }
1550 spin_unlock_irqrestore(&device_domain_lock, flags);
1551}
1552
1553/*
1554 * find_domain
1555 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1556 */
1557static struct dmar_domain *
1558find_domain(struct pci_dev *pdev)
1559{
1560 struct device_domain_info *info;
1561
1562 /* No lock here, assumes no domain exit in normal case */
1563 info = pdev->dev.archdata.iommu;
1564 if (info)
1565 return info->domain;
1566 return NULL;
1567}
1568
1569/* domain is initialized */
1570static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1571{
1572 struct dmar_domain *domain, *found = NULL;
1573 struct intel_iommu *iommu;
1574 struct dmar_drhd_unit *drhd;
1575 struct device_domain_info *info, *tmp;
1576 struct pci_dev *dev_tmp;
1577 unsigned long flags;
1578 int bus = 0, devfn = 0;
1579
1580 domain = find_domain(pdev);
1581 if (domain)
1582 return domain;
1583
1584 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1585 if (dev_tmp) {
1586 if (dev_tmp->is_pcie) {
1587 bus = dev_tmp->subordinate->number;
1588 devfn = 0;
1589 } else {
1590 bus = dev_tmp->bus->number;
1591 devfn = dev_tmp->devfn;
1592 }
1593 spin_lock_irqsave(&device_domain_lock, flags);
1594 list_for_each_entry(info, &device_domain_list, global) {
1595 if (info->bus == bus && info->devfn == devfn) {
1596 found = info->domain;
1597 break;
1598 }
1599 }
1600 spin_unlock_irqrestore(&device_domain_lock, flags);
1601 /* pcie-pci bridge already has a domain, uses it */
1602 if (found) {
1603 domain = found;
1604 goto found_domain;
1605 }
1606 }
1607
1608 /* Allocate new domain for the device */
1609 drhd = dmar_find_matched_drhd_unit(pdev);
1610 if (!drhd) {
1611 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1612 pci_name(pdev));
1613 return NULL;
1614 }
1615 iommu = drhd->iommu;
1616
1617 domain = iommu_alloc_domain(iommu);
1618 if (!domain)
1619 goto error;
1620
1621 if (domain_init(domain, gaw)) {
1622 domain_exit(domain);
1623 goto error;
1624 }
1625
1626 /* register pcie-to-pci device */
1627 if (dev_tmp) {
1628 info = alloc_devinfo_mem();
1629 if (!info) {
1630 domain_exit(domain);
1631 goto error;
1632 }
1633 info->bus = bus;
1634 info->devfn = devfn;
1635 info->dev = NULL;
1636 info->domain = domain;
1637 /* This domain is shared by devices under p2p bridge */
1638 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1639
1640 /* pcie-to-pci bridge already has a domain, uses it */
1641 found = NULL;
1642 spin_lock_irqsave(&device_domain_lock, flags);
1643 list_for_each_entry(tmp, &device_domain_list, global) {
1644 if (tmp->bus == bus && tmp->devfn == devfn) {
1645 found = tmp->domain;
1646 break;
1647 }
1648 }
1649 if (found) {
1650 free_devinfo_mem(info);
1651 domain_exit(domain);
1652 domain = found;
1653 } else {
1654 list_add(&info->link, &domain->devices);
1655 list_add(&info->global, &device_domain_list);
1656 }
1657 spin_unlock_irqrestore(&device_domain_lock, flags);
1658 }
1659
1660found_domain:
1661 info = alloc_devinfo_mem();
1662 if (!info)
1663 goto error;
1664 info->bus = pdev->bus->number;
1665 info->devfn = pdev->devfn;
1666 info->dev = pdev;
1667 info->domain = domain;
1668 spin_lock_irqsave(&device_domain_lock, flags);
1669 /* somebody is fast */
1670 found = find_domain(pdev);
1671 if (found != NULL) {
1672 spin_unlock_irqrestore(&device_domain_lock, flags);
1673 if (found != domain) {
1674 domain_exit(domain);
1675 domain = found;
1676 }
1677 free_devinfo_mem(info);
1678 return domain;
1679 }
1680 list_add(&info->link, &domain->devices);
1681 list_add(&info->global, &device_domain_list);
1682 pdev->dev.archdata.iommu = info;
1683 spin_unlock_irqrestore(&device_domain_lock, flags);
1684 return domain;
1685error:
1686 /* recheck it here, maybe others set it */
1687 return find_domain(pdev);
1688}
1689
1690static int iommu_prepare_identity_map(struct pci_dev *pdev,
1691 unsigned long long start,
1692 unsigned long long end)
1693{
1694 struct dmar_domain *domain;
1695 unsigned long size;
1696 unsigned long long base;
1697 int ret;
1698
1699 printk(KERN_INFO
1700 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1701 pci_name(pdev), start, end);
1702 /* page table init */
1703 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1704 if (!domain)
1705 return -ENOMEM;
1706
1707 /* The address might not be aligned */
1708 base = start & PAGE_MASK;
1709 size = end - base;
1710 size = PAGE_ALIGN(size);
1711 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1712 IOVA_PFN(base + size) - 1)) {
1713 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1714 ret = -ENOMEM;
1715 goto error;
1716 }
1717
1718 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1719 size, base, pci_name(pdev));
1720 /*
1721 * RMRR range might have overlap with physical memory range,
1722 * clear it first
1723 */
1724 dma_pte_clear_range(domain, base, base + size);
1725
1726 ret = domain_page_mapping(domain, base, base, size,
1727 DMA_PTE_READ|DMA_PTE_WRITE);
1728 if (ret)
1729 goto error;
1730
1731 /* context entry init */
1732 ret = domain_context_mapping(domain, pdev);
1733 if (!ret)
1734 return 0;
1735error:
1736 domain_exit(domain);
1737 return ret;
1738
1739}
1740
1741static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1742 struct pci_dev *pdev)
1743{
1744 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1745 return 0;
1746 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1747 rmrr->end_address + 1);
1748}
1749
1750#ifdef CONFIG_DMAR_GFX_WA
1751struct iommu_prepare_data {
1752 struct pci_dev *pdev;
1753 int ret;
1754};
1755
1756static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1757 unsigned long end_pfn, void *datax)
1758{
1759 struct iommu_prepare_data *data;
1760
1761 data = (struct iommu_prepare_data *)datax;
1762
1763 data->ret = iommu_prepare_identity_map(data->pdev,
1764 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1765 return data->ret;
1766
1767}
1768
1769static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1770{
1771 int nid;
1772 struct iommu_prepare_data data;
1773
1774 data.pdev = pdev;
1775 data.ret = 0;
1776
1777 for_each_online_node(nid) {
1778 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1779 if (data.ret)
1780 return data.ret;
1781 }
1782 return data.ret;
1783}
1784
1785static void __init iommu_prepare_gfx_mapping(void)
1786{
1787 struct pci_dev *pdev = NULL;
1788 int ret;
1789
1790 for_each_pci_dev(pdev) {
1791 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1792 !IS_GFX_DEVICE(pdev))
1793 continue;
1794 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1795 pci_name(pdev));
1796 ret = iommu_prepare_with_active_regions(pdev);
1797 if (ret)
1798 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1799 }
1800}
1801#else /* !CONFIG_DMAR_GFX_WA */
1802static inline void iommu_prepare_gfx_mapping(void)
1803{
1804 return;
1805}
1806#endif
1807
1808#ifdef CONFIG_DMAR_FLOPPY_WA
1809static inline void iommu_prepare_isa(void)
1810{
1811 struct pci_dev *pdev;
1812 int ret;
1813
1814 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1815 if (!pdev)
1816 return;
1817
1818 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1819 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1820
1821 if (ret)
1822 printk("IOMMU: Failed to create 0-64M identity map, "
1823 "floppy might not work\n");
1824
1825}
1826#else
1827static inline void iommu_prepare_isa(void)
1828{
1829 return;
1830}
1831#endif /* !CONFIG_DMAR_FLPY_WA */
1832
1833static int __init init_dmars(void)
1834{
1835 struct dmar_drhd_unit *drhd;
1836 struct dmar_rmrr_unit *rmrr;
1837 struct pci_dev *pdev;
1838 struct intel_iommu *iommu;
1839 int i, ret, unit = 0;
1840
1841 /*
1842 * for each drhd
1843 * allocate root
1844 * initialize and program root entry to not present
1845 * endfor
1846 */
1847 for_each_drhd_unit(drhd) {
1848 g_num_of_iommus++;
1849 /*
1850 * lock not needed as this is only incremented in the single
1851 * threaded kernel __init code path all other access are read
1852 * only
1853 */
1854 }
1855
1856 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1857 GFP_KERNEL);
1858 if (!g_iommus) {
1859 printk(KERN_ERR "Allocating global iommu array failed\n");
1860 ret = -ENOMEM;
1861 goto error;
1862 }
1863
1864 deferred_flush = kzalloc(g_num_of_iommus *
1865 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1866 if (!deferred_flush) {
1867 kfree(g_iommus);
1868 ret = -ENOMEM;
1869 goto error;
1870 }
1871
1872 for_each_drhd_unit(drhd) {
1873 if (drhd->ignored)
1874 continue;
1875
1876 iommu = drhd->iommu;
1877 g_iommus[iommu->seq_id] = iommu;
1878
1879 ret = iommu_init_domains(iommu);
1880 if (ret)
1881 goto error;
1882
1883 /*
1884 * TBD:
1885 * we could share the same root & context tables
1886 * amoung all IOMMU's. Need to Split it later.
1887 */
1888 ret = iommu_alloc_root_entry(iommu);
1889 if (ret) {
1890 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1891 goto error;
1892 }
1893 }
1894
1895 for_each_drhd_unit(drhd) {
1896 if (drhd->ignored)
1897 continue;
1898
1899 iommu = drhd->iommu;
1900 if (dmar_enable_qi(iommu)) {
1901 /*
1902 * Queued Invalidate not enabled, use Register Based
1903 * Invalidate
1904 */
1905 iommu->flush.flush_context = __iommu_flush_context;
1906 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1907 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1908 "invalidation\n",
1909 (unsigned long long)drhd->reg_base_addr);
1910 } else {
1911 iommu->flush.flush_context = qi_flush_context;
1912 iommu->flush.flush_iotlb = qi_flush_iotlb;
1913 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1914 "invalidation\n",
1915 (unsigned long long)drhd->reg_base_addr);
1916 }
1917 }
1918
1919 /*
1920 * For each rmrr
1921 * for each dev attached to rmrr
1922 * do
1923 * locate drhd for dev, alloc domain for dev
1924 * allocate free domain
1925 * allocate page table entries for rmrr
1926 * if context not allocated for bus
1927 * allocate and init context
1928 * set present in root table for this bus
1929 * init context with domain, translation etc
1930 * endfor
1931 * endfor
1932 */
1933 for_each_rmrr_units(rmrr) {
1934 for (i = 0; i < rmrr->devices_cnt; i++) {
1935 pdev = rmrr->devices[i];
1936 /* some BIOS lists non-exist devices in DMAR table */
1937 if (!pdev)
1938 continue;
1939 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1940 if (ret)
1941 printk(KERN_ERR
1942 "IOMMU: mapping reserved region failed\n");
1943 }
1944 }
1945
1946 iommu_prepare_gfx_mapping();
1947
1948 iommu_prepare_isa();
1949
1950 /*
1951 * for each drhd
1952 * enable fault log
1953 * global invalidate context cache
1954 * global invalidate iotlb
1955 * enable translation
1956 */
1957 for_each_drhd_unit(drhd) {
1958 if (drhd->ignored)
1959 continue;
1960 iommu = drhd->iommu;
1961 sprintf (iommu->name, "dmar%d", unit++);
1962
1963 iommu_flush_write_buffer(iommu);
1964
1965 ret = dmar_set_interrupt(iommu);
1966 if (ret)
1967 goto error;
1968
1969 iommu_set_root_entry(iommu);
1970
1971 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1972 0);
1973 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1974 0);
1975 iommu_disable_protect_mem_regions(iommu);
1976
1977 ret = iommu_enable_translation(iommu);
1978 if (ret)
1979 goto error;
1980 }
1981
1982 return 0;
1983error:
1984 for_each_drhd_unit(drhd) {
1985 if (drhd->ignored)
1986 continue;
1987 iommu = drhd->iommu;
1988 free_iommu(iommu);
1989 }
1990 kfree(g_iommus);
1991 return ret;
1992}
1993
1994static inline u64 aligned_size(u64 host_addr, size_t size)
1995{
1996 u64 addr;
1997 addr = (host_addr & (~PAGE_MASK)) + size;
1998 return PAGE_ALIGN(addr);
1999}
2000
2001struct iova *
2002iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2003{
2004 struct iova *piova;
2005
2006 /* Make sure it's in range */
2007 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2008 if (!size || (IOVA_START_ADDR + size > end))
2009 return NULL;
2010
2011 piova = alloc_iova(&domain->iovad,
2012 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2013 return piova;
2014}
2015
2016static struct iova *
2017__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2018 size_t size, u64 dma_mask)
2019{
2020 struct pci_dev *pdev = to_pci_dev(dev);
2021 struct iova *iova = NULL;
2022
2023 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2024 iova = iommu_alloc_iova(domain, size, dma_mask);
2025 else {
2026 /*
2027 * First try to allocate an io virtual address in
2028 * DMA_32BIT_MASK and if that fails then try allocating
2029 * from higher range
2030 */
2031 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2032 if (!iova)
2033 iova = iommu_alloc_iova(domain, size, dma_mask);
2034 }
2035
2036 if (!iova) {
2037 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2038 return NULL;
2039 }
2040
2041 return iova;
2042}
2043
2044static struct dmar_domain *
2045get_valid_domain_for_dev(struct pci_dev *pdev)
2046{
2047 struct dmar_domain *domain;
2048 int ret;
2049
2050 domain = get_domain_for_dev(pdev,
2051 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2052 if (!domain) {
2053 printk(KERN_ERR
2054 "Allocating domain for %s failed", pci_name(pdev));
2055 return NULL;
2056 }
2057
2058 /* make sure context mapping is ok */
2059 if (unlikely(!domain_context_mapped(domain, pdev))) {
2060 ret = domain_context_mapping(domain, pdev);
2061 if (ret) {
2062 printk(KERN_ERR
2063 "Domain context map for %s failed",
2064 pci_name(pdev));
2065 return NULL;
2066 }
2067 }
2068
2069 return domain;
2070}
2071
2072static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2073 size_t size, int dir, u64 dma_mask)
2074{
2075 struct pci_dev *pdev = to_pci_dev(hwdev);
2076 struct dmar_domain *domain;
2077 phys_addr_t start_paddr;
2078 struct iova *iova;
2079 int prot = 0;
2080 int ret;
2081 struct intel_iommu *iommu;
2082
2083 BUG_ON(dir == DMA_NONE);
2084 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2085 return paddr;
2086
2087 domain = get_valid_domain_for_dev(pdev);
2088 if (!domain)
2089 return 0;
2090
2091 iommu = domain_get_iommu(domain);
2092 size = aligned_size((u64)paddr, size);
2093
2094 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2095 if (!iova)
2096 goto error;
2097
2098 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2099
2100 /*
2101 * Check if DMAR supports zero-length reads on write only
2102 * mappings..
2103 */
2104 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2105 !cap_zlr(iommu->cap))
2106 prot |= DMA_PTE_READ;
2107 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2108 prot |= DMA_PTE_WRITE;
2109 /*
2110 * paddr - (paddr + size) might be partial page, we should map the whole
2111 * page. Note: if two part of one page are separately mapped, we
2112 * might have two guest_addr mapping to the same host paddr, but this
2113 * is not a big problem
2114 */
2115 ret = domain_page_mapping(domain, start_paddr,
2116 ((u64)paddr) & PAGE_MASK, size, prot);
2117 if (ret)
2118 goto error;
2119
2120 /* it's a non-present to present mapping */
2121 ret = iommu_flush_iotlb_psi(iommu, domain->id,
2122 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2123 if (ret)
2124 iommu_flush_write_buffer(iommu);
2125
2126 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2127
2128error:
2129 if (iova)
2130 __free_iova(&domain->iovad, iova);
2131 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2132 pci_name(pdev), size, (unsigned long long)paddr, dir);
2133 return 0;
2134}
2135
2136dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2137 size_t size, int dir)
2138{
2139 return __intel_map_single(hwdev, paddr, size, dir,
2140 to_pci_dev(hwdev)->dma_mask);
2141}
2142
2143static void flush_unmaps(void)
2144{
2145 int i, j;
2146
2147 timer_on = 0;
2148
2149 /* just flush them all */
2150 for (i = 0; i < g_num_of_iommus; i++) {
2151 struct intel_iommu *iommu = g_iommus[i];
2152 if (!iommu)
2153 continue;
2154
2155 if (deferred_flush[i].next) {
2156 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2157 DMA_TLB_GLOBAL_FLUSH, 0);
2158 for (j = 0; j < deferred_flush[i].next; j++) {
2159 __free_iova(&deferred_flush[i].domain[j]->iovad,
2160 deferred_flush[i].iova[j]);
2161 }
2162 deferred_flush[i].next = 0;
2163 }
2164 }
2165
2166 list_size = 0;
2167}
2168
2169static void flush_unmaps_timeout(unsigned long data)
2170{
2171 unsigned long flags;
2172
2173 spin_lock_irqsave(&async_umap_flush_lock, flags);
2174 flush_unmaps();
2175 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2176}
2177
2178static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2179{
2180 unsigned long flags;
2181 int next, iommu_id;
2182 struct intel_iommu *iommu;
2183
2184 spin_lock_irqsave(&async_umap_flush_lock, flags);
2185 if (list_size == HIGH_WATER_MARK)
2186 flush_unmaps();
2187
2188 iommu = domain_get_iommu(dom);
2189 iommu_id = iommu->seq_id;
2190
2191 next = deferred_flush[iommu_id].next;
2192 deferred_flush[iommu_id].domain[next] = dom;
2193 deferred_flush[iommu_id].iova[next] = iova;
2194 deferred_flush[iommu_id].next++;
2195
2196 if (!timer_on) {
2197 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2198 timer_on = 1;
2199 }
2200 list_size++;
2201 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2202}
2203
2204void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2205 int dir)
2206{
2207 struct pci_dev *pdev = to_pci_dev(dev);
2208 struct dmar_domain *domain;
2209 unsigned long start_addr;
2210 struct iova *iova;
2211 struct intel_iommu *iommu;
2212
2213 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2214 return;
2215 domain = find_domain(pdev);
2216 BUG_ON(!domain);
2217
2218 iommu = domain_get_iommu(domain);
2219
2220 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2221 if (!iova)
2222 return;
2223
2224 start_addr = iova->pfn_lo << PAGE_SHIFT;
2225 size = aligned_size((u64)dev_addr, size);
2226
2227 pr_debug("Device %s unmapping: %lx@%llx\n",
2228 pci_name(pdev), size, (unsigned long long)start_addr);
2229
2230 /* clear the whole page */
2231 dma_pte_clear_range(domain, start_addr, start_addr + size);
2232 /* free page tables */
2233 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2234 if (intel_iommu_strict) {
2235 if (iommu_flush_iotlb_psi(iommu,
2236 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2237 iommu_flush_write_buffer(iommu);
2238 /* free iova */
2239 __free_iova(&domain->iovad, iova);
2240 } else {
2241 add_unmap(domain, iova);
2242 /*
2243 * queue up the release of the unmap to save the 1/6th of the
2244 * cpu used up by the iotlb flush operation...
2245 */
2246 }
2247}
2248
2249void *intel_alloc_coherent(struct device *hwdev, size_t size,
2250 dma_addr_t *dma_handle, gfp_t flags)
2251{
2252 void *vaddr;
2253 int order;
2254
2255 size = PAGE_ALIGN(size);
2256 order = get_order(size);
2257 flags &= ~(GFP_DMA | GFP_DMA32);
2258
2259 vaddr = (void *)__get_free_pages(flags, order);
2260 if (!vaddr)
2261 return NULL;
2262 memset(vaddr, 0, size);
2263
2264 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2265 DMA_BIDIRECTIONAL,
2266 hwdev->coherent_dma_mask);
2267 if (*dma_handle)
2268 return vaddr;
2269 free_pages((unsigned long)vaddr, order);
2270 return NULL;
2271}
2272
2273void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2274 dma_addr_t dma_handle)
2275{
2276 int order;
2277
2278 size = PAGE_ALIGN(size);
2279 order = get_order(size);
2280
2281 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2282 free_pages((unsigned long)vaddr, order);
2283}
2284
2285#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2286
2287void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2288 int nelems, int dir)
2289{
2290 int i;
2291 struct pci_dev *pdev = to_pci_dev(hwdev);
2292 struct dmar_domain *domain;
2293 unsigned long start_addr;
2294 struct iova *iova;
2295 size_t size = 0;
2296 void *addr;
2297 struct scatterlist *sg;
2298 struct intel_iommu *iommu;
2299
2300 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2301 return;
2302
2303 domain = find_domain(pdev);
2304 BUG_ON(!domain);
2305
2306 iommu = domain_get_iommu(domain);
2307
2308 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2309 if (!iova)
2310 return;
2311 for_each_sg(sglist, sg, nelems, i) {
2312 addr = SG_ENT_VIRT_ADDRESS(sg);
2313 size += aligned_size((u64)addr, sg->length);
2314 }
2315
2316 start_addr = iova->pfn_lo << PAGE_SHIFT;
2317
2318 /* clear the whole page */
2319 dma_pte_clear_range(domain, start_addr, start_addr + size);
2320 /* free page tables */
2321 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2322
2323 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2324 size >> VTD_PAGE_SHIFT, 0))
2325 iommu_flush_write_buffer(iommu);
2326
2327 /* free iova */
2328 __free_iova(&domain->iovad, iova);
2329}
2330
2331static int intel_nontranslate_map_sg(struct device *hddev,
2332 struct scatterlist *sglist, int nelems, int dir)
2333{
2334 int i;
2335 struct scatterlist *sg;
2336
2337 for_each_sg(sglist, sg, nelems, i) {
2338 BUG_ON(!sg_page(sg));
2339 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2340 sg->dma_length = sg->length;
2341 }
2342 return nelems;
2343}
2344
2345int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2346 int dir)
2347{
2348 void *addr;
2349 int i;
2350 struct pci_dev *pdev = to_pci_dev(hwdev);
2351 struct dmar_domain *domain;
2352 size_t size = 0;
2353 int prot = 0;
2354 size_t offset = 0;
2355 struct iova *iova = NULL;
2356 int ret;
2357 struct scatterlist *sg;
2358 unsigned long start_addr;
2359 struct intel_iommu *iommu;
2360
2361 BUG_ON(dir == DMA_NONE);
2362 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2363 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2364
2365 domain = get_valid_domain_for_dev(pdev);
2366 if (!domain)
2367 return 0;
2368
2369 iommu = domain_get_iommu(domain);
2370
2371 for_each_sg(sglist, sg, nelems, i) {
2372 addr = SG_ENT_VIRT_ADDRESS(sg);
2373 addr = (void *)virt_to_phys(addr);
2374 size += aligned_size((u64)addr, sg->length);
2375 }
2376
2377 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2378 if (!iova) {
2379 sglist->dma_length = 0;
2380 return 0;
2381 }
2382
2383 /*
2384 * Check if DMAR supports zero-length reads on write only
2385 * mappings..
2386 */
2387 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2388 !cap_zlr(iommu->cap))
2389 prot |= DMA_PTE_READ;
2390 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2391 prot |= DMA_PTE_WRITE;
2392
2393 start_addr = iova->pfn_lo << PAGE_SHIFT;
2394 offset = 0;
2395 for_each_sg(sglist, sg, nelems, i) {
2396 addr = SG_ENT_VIRT_ADDRESS(sg);
2397 addr = (void *)virt_to_phys(addr);
2398 size = aligned_size((u64)addr, sg->length);
2399 ret = domain_page_mapping(domain, start_addr + offset,
2400 ((u64)addr) & PAGE_MASK,
2401 size, prot);
2402 if (ret) {
2403 /* clear the page */
2404 dma_pte_clear_range(domain, start_addr,
2405 start_addr + offset);
2406 /* free page tables */
2407 dma_pte_free_pagetable(domain, start_addr,
2408 start_addr + offset);
2409 /* free iova */
2410 __free_iova(&domain->iovad, iova);
2411 return 0;
2412 }
2413 sg->dma_address = start_addr + offset +
2414 ((u64)addr & (~PAGE_MASK));
2415 sg->dma_length = sg->length;
2416 offset += size;
2417 }
2418
2419 /* it's a non-present to present mapping */
2420 if (iommu_flush_iotlb_psi(iommu, domain->id,
2421 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2422 iommu_flush_write_buffer(iommu);
2423 return nelems;
2424}
2425
2426static struct dma_mapping_ops intel_dma_ops = {
2427 .alloc_coherent = intel_alloc_coherent,
2428 .free_coherent = intel_free_coherent,
2429 .map_single = intel_map_single,
2430 .unmap_single = intel_unmap_single,
2431 .map_sg = intel_map_sg,
2432 .unmap_sg = intel_unmap_sg,
2433};
2434
2435static inline int iommu_domain_cache_init(void)
2436{
2437 int ret = 0;
2438
2439 iommu_domain_cache = kmem_cache_create("iommu_domain",
2440 sizeof(struct dmar_domain),
2441 0,
2442 SLAB_HWCACHE_ALIGN,
2443
2444 NULL);
2445 if (!iommu_domain_cache) {
2446 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2447 ret = -ENOMEM;
2448 }
2449
2450 return ret;
2451}
2452
2453static inline int iommu_devinfo_cache_init(void)
2454{
2455 int ret = 0;
2456
2457 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2458 sizeof(struct device_domain_info),
2459 0,
2460 SLAB_HWCACHE_ALIGN,
2461 NULL);
2462 if (!iommu_devinfo_cache) {
2463 printk(KERN_ERR "Couldn't create devinfo cache\n");
2464 ret = -ENOMEM;
2465 }
2466
2467 return ret;
2468}
2469
2470static inline int iommu_iova_cache_init(void)
2471{
2472 int ret = 0;
2473
2474 iommu_iova_cache = kmem_cache_create("iommu_iova",
2475 sizeof(struct iova),
2476 0,
2477 SLAB_HWCACHE_ALIGN,
2478 NULL);
2479 if (!iommu_iova_cache) {
2480 printk(KERN_ERR "Couldn't create iova cache\n");
2481 ret = -ENOMEM;
2482 }
2483
2484 return ret;
2485}
2486
2487static int __init iommu_init_mempool(void)
2488{
2489 int ret;
2490 ret = iommu_iova_cache_init();
2491 if (ret)
2492 return ret;
2493
2494 ret = iommu_domain_cache_init();
2495 if (ret)
2496 goto domain_error;
2497
2498 ret = iommu_devinfo_cache_init();
2499 if (!ret)
2500 return ret;
2501
2502 kmem_cache_destroy(iommu_domain_cache);
2503domain_error:
2504 kmem_cache_destroy(iommu_iova_cache);
2505
2506 return -ENOMEM;
2507}
2508
2509static void __init iommu_exit_mempool(void)
2510{
2511 kmem_cache_destroy(iommu_devinfo_cache);
2512 kmem_cache_destroy(iommu_domain_cache);
2513 kmem_cache_destroy(iommu_iova_cache);
2514
2515}
2516
2517static void __init init_no_remapping_devices(void)
2518{
2519 struct dmar_drhd_unit *drhd;
2520
2521 for_each_drhd_unit(drhd) {
2522 if (!drhd->include_all) {
2523 int i;
2524 for (i = 0; i < drhd->devices_cnt; i++)
2525 if (drhd->devices[i] != NULL)
2526 break;
2527 /* ignore DMAR unit if no pci devices exist */
2528 if (i == drhd->devices_cnt)
2529 drhd->ignored = 1;
2530 }
2531 }
2532
2533 if (dmar_map_gfx)
2534 return;
2535
2536 for_each_drhd_unit(drhd) {
2537 int i;
2538 if (drhd->ignored || drhd->include_all)
2539 continue;
2540
2541 for (i = 0; i < drhd->devices_cnt; i++)
2542 if (drhd->devices[i] &&
2543 !IS_GFX_DEVICE(drhd->devices[i]))
2544 break;
2545
2546 if (i < drhd->devices_cnt)
2547 continue;
2548
2549 /* bypass IOMMU if it is just for gfx devices */
2550 drhd->ignored = 1;
2551 for (i = 0; i < drhd->devices_cnt; i++) {
2552 if (!drhd->devices[i])
2553 continue;
2554 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2555 }
2556 }
2557}
2558
2559int __init intel_iommu_init(void)
2560{
2561 int ret = 0;
2562
2563 if (dmar_table_init())
2564 return -ENODEV;
2565
2566 if (dmar_dev_scope_init())
2567 return -ENODEV;
2568
2569 /*
2570 * Check the need for DMA-remapping initialization now.
2571 * Above initialization will also be used by Interrupt-remapping.
2572 */
2573 if (no_iommu || swiotlb || dmar_disabled)
2574 return -ENODEV;
2575
2576 iommu_init_mempool();
2577 dmar_init_reserved_ranges();
2578
2579 init_no_remapping_devices();
2580
2581 ret = init_dmars();
2582 if (ret) {
2583 printk(KERN_ERR "IOMMU: dmar init failed\n");
2584 put_iova_domain(&reserved_iova_list);
2585 iommu_exit_mempool();
2586 return ret;
2587 }
2588 printk(KERN_INFO
2589 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2590
2591 init_timer(&unmap_timer);
2592 force_iommu = 1;
2593 dma_ops = &intel_dma_ops;
2594 return 0;
2595}
2596
2597void intel_iommu_domain_exit(struct dmar_domain *domain)
2598{
2599 u64 end;
2600
2601 /* Domain 0 is reserved, so dont process it */
2602 if (!domain)
2603 return;
2604
2605 end = DOMAIN_MAX_ADDR(domain->gaw);
2606 end = end & (~VTD_PAGE_MASK);
2607
2608 /* clear ptes */
2609 dma_pte_clear_range(domain, 0, end);
2610
2611 /* free page tables */
2612 dma_pte_free_pagetable(domain, 0, end);
2613
2614 iommu_free_domain(domain);
2615 free_domain_mem(domain);
2616}
2617EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2618
2619struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2620{
2621 struct dmar_drhd_unit *drhd;
2622 struct dmar_domain *domain;
2623 struct intel_iommu *iommu;
2624
2625 drhd = dmar_find_matched_drhd_unit(pdev);
2626 if (!drhd) {
2627 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2628 return NULL;
2629 }
2630
2631 iommu = drhd->iommu;
2632 if (!iommu) {
2633 printk(KERN_ERR
2634 "intel_iommu_domain_alloc: iommu == NULL\n");
2635 return NULL;
2636 }
2637 domain = iommu_alloc_domain(iommu);
2638 if (!domain) {
2639 printk(KERN_ERR
2640 "intel_iommu_domain_alloc: domain == NULL\n");
2641 return NULL;
2642 }
2643 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2644 printk(KERN_ERR
2645 "intel_iommu_domain_alloc: domain_init() failed\n");
2646 intel_iommu_domain_exit(domain);
2647 return NULL;
2648 }
2649 return domain;
2650}
2651EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2652
2653int intel_iommu_context_mapping(
2654 struct dmar_domain *domain, struct pci_dev *pdev)
2655{
2656 int rc;
2657 rc = domain_context_mapping(domain, pdev);
2658 return rc;
2659}
2660EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2661
2662int intel_iommu_page_mapping(
2663 struct dmar_domain *domain, dma_addr_t iova,
2664 u64 hpa, size_t size, int prot)
2665{
2666 int rc;
2667 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2668 return rc;
2669}
2670EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2671
2672void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2673{
2674 detach_domain_for_dev(domain, bus, devfn);
2675}
2676EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2677
2678struct dmar_domain *
2679intel_iommu_find_domain(struct pci_dev *pdev)
2680{
2681 return find_domain(pdev);
2682}
2683EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2684
2685int intel_iommu_found(void)
2686{
2687 return g_num_of_iommus;
2688}
2689EXPORT_SYMBOL_GPL(intel_iommu_found);
2690
2691u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2692{
2693 struct dma_pte *pte;
2694 u64 pfn;
2695
2696 pfn = 0;
2697 pte = addr_to_dma_pte(domain, iova);
2698
2699 if (pte)
2700 pfn = dma_pte_addr(pte);
2701
2702 return pfn >> VTD_PAGE_SHIFT;
2703}
2704EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);