#define CONTEXT_SIZE VTD_PAGE_SIZE
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
+ #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN (1)
+
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
* 64-127: Reserved
*/
struct root_entry {
- u64 val;
- u64 rsvd1;
+ u64 lo;
+ u64 hi;
};
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
- static inline bool root_present(struct root_entry *root)
- {
- return (root->val & 1);
- }
- static inline void set_root_present(struct root_entry *root)
- {
- root->val |= 1;
- }
- static inline void set_root_value(struct root_entry *root, unsigned long value)
- {
- root->val &= ~VTD_PAGE_MASK;
- root->val |= value & VTD_PAGE_MASK;
- }
- static inline struct context_entry *
- get_context_addr_from_root(struct root_entry *root)
- {
- return (struct context_entry *)
- (root_present(root)?phys_to_virt(
- root->val & VTD_PAGE_MASK) :
- NULL);
- }
/*
* low 64 bits:
DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
/* bitmap of iommus this domain uses*/
- struct list_head devices; /* all devices' list */
+ struct list_head devices; /* all devices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
struct dma_pte *pgd; /* virtual address */
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
+
+ struct iommu_domain domain; /* generic domain data structure for
+ iommu core */
};
/* PCI domain-device relationship */
static const struct iommu_ops intel_iommu_ops;
+/* Convert generic 'struct iommu_domain to private struct dmar_domain */
+static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct dmar_domain, domain);
+}
+
static int __init intel_iommu_setup(char *str)
{
if (!str)
static struct kmem_cache *iommu_domain_cache;
static struct kmem_cache *iommu_devinfo_cache;
-static struct kmem_cache *iommu_iova_cache;
static inline void *alloc_pgtable_page(int node)
{
kmem_cache_free(iommu_devinfo_cache, vaddr);
}
-struct iova *alloc_iova_mem(void)
-{
- return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
-}
-
-void free_iova_mem(struct iova *iova)
-{
- kmem_cache_free(iommu_iova_cache, iova);
-}
-
static inline int domain_type_is_vm(struct dmar_domain *domain)
{
return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int i, found = 0;
+ bool found = false;
+ int i;
domain->iommu_coherency = 1;
for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
- found = 1;
+ found = true;
if (!ecap_coherent(g_iommus[i]->ecap)) {
domain->iommu_coherency = 0;
break;
domain->iommu_superpage = domain_update_iommu_superpage(NULL);
}
+ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
+ u8 bus, u8 devfn, int alloc)
+ {
+ struct root_entry *root = &iommu->root_entry[bus];
+ struct context_entry *context;
+ u64 *entry;
+
+ if (ecap_ecs(iommu->ecap)) {
+ if (devfn >= 0x80) {
+ devfn -= 0x80;
+ entry = &root->hi;
+ }
+ devfn *= 2;
+ }
+ entry = &root->lo;
+ if (*entry & 1)
+ context = phys_to_virt(*entry & VTD_PAGE_MASK);
+ else {
+ unsigned long phy_addr;
+ if (!alloc)
+ return NULL;
+
+ context = alloc_pgtable_page(iommu->node);
+ if (!context)
+ return NULL;
+
+ __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
+ phy_addr = virt_to_phys((void *)context);
+ *entry = phy_addr | 1;
+ __iommu_flush_cache(iommu, entry, sizeof(*entry));
+ }
+ return &context[devfn];
+ }
+
static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
if (dev_is_pci(dev)) {
pdev = to_pci_dev(dev);
segment = pci_domain_nr(pdev->bus);
- } else if (ACPI_COMPANION(dev))
+ } else if (has_acpi_companion(dev))
dev = &ACPI_COMPANION(dev)->dev;
rcu_read_lock();
clflush_cache_range(addr, size);
}
- /* Gets context entry for a given bus and devfn */
- static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
- u8 bus, u8 devfn)
- {
- struct root_entry *root;
- struct context_entry *context;
- unsigned long phy_addr;
- unsigned long flags;
-
- spin_lock_irqsave(&iommu->lock, flags);
- root = &iommu->root_entry[bus];
- context = get_context_addr_from_root(root);
- if (!context) {
- context = (struct context_entry *)
- alloc_pgtable_page(iommu->node);
- if (!context) {
- spin_unlock_irqrestore(&iommu->lock, flags);
- return NULL;
- }
- __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
- phy_addr = virt_to_phys((void *)context);
- set_root_value(root, phy_addr);
- set_root_present(root);
- __iommu_flush_cache(iommu, root, sizeof(*root));
- }
- spin_unlock_irqrestore(&iommu->lock, flags);
- return &context[devfn];
- }
-
static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
- struct root_entry *root;
struct context_entry *context;
- int ret;
+ int ret = 0;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
- root = &iommu->root_entry[bus];
- context = get_context_addr_from_root(root);
- if (!context) {
- ret = 0;
- goto out;
- }
- ret = context_present(&context[devfn]);
- out:
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (context)
+ ret = context_present(context);
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
- struct root_entry *root;
struct context_entry *context;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
- root = &iommu->root_entry[bus];
- context = get_context_addr_from_root(root);
+ context = iommu_context_addr(iommu, bus, devfn, 0);
if (context) {
- context_clear_entry(&context[devfn]);
- __iommu_flush_cache(iommu, &context[devfn], \
- sizeof(*context));
+ context_clear_entry(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
}
spin_unlock_irqrestore(&iommu->lock, flags);
}
static void free_context_table(struct intel_iommu *iommu)
{
- struct root_entry *root;
int i;
unsigned long flags;
struct context_entry *context;
goto out;
}
for (i = 0; i < ROOT_ENTRY_NR; i++) {
- root = &iommu->root_entry[i];
- context = get_context_addr_from_root(root);
+ context = iommu_context_addr(iommu, i, 0, 0);
if (context)
free_pgtable_page(context);
+
+ if (!ecap_ecs(iommu->ecap))
+ continue;
+
+ context = iommu_context_addr(iommu, i, 0x80, 0);
+ if (context)
+ free_pgtable_page(context);
+
}
free_pgtable_page(iommu->root_entry);
iommu->root_entry = NULL;
static void iommu_set_root_entry(struct intel_iommu *iommu)
{
- void *addr;
+ u64 addr;
u32 sts;
unsigned long flag;
- addr = iommu->root_entry;
+ addr = virt_to_phys(iommu->root_entry);
+ if (ecap_ecs(iommu->ecap))
+ addr |= DMA_RTADDR_RTT;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
+ dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
u8 bus, u8 devfn)
{
- int found = 0;
+ bool found = false;
unsigned long flags;
struct device_domain_info *info;
struct pci_dev *pdev;
list_for_each_entry(info, &domain->devices, link)
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
- found = 1;
+ found = true;
break;
}
spin_unlock_irqrestore(&device_domain_lock, flags);
struct iova *iova;
int i;
- init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
+ init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
+ DMA_32BIT_PFN);
lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
&reserved_rbtree_key);
int adjust_width, agaw;
unsigned long sagaw;
- init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
+ init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
+ DMA_32BIT_PFN);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
static void domain_exit(struct dmar_domain *domain)
{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
struct page *freelist = NULL;
+ int i;
/* Domain 0 is reserved, so dont process it */
if (!domain)
/* clear attached or cached domains */
rcu_read_lock();
- for_each_active_iommu(iommu, drhd)
- iommu_detach_domain(domain, iommu);
+ for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus)
+ iommu_detach_domain(domain, g_iommus[i]);
rcu_read_unlock();
dma_free_pagelist(freelist);
BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
translation != CONTEXT_TT_MULTI_LEVEL);
- context = device_to_context_entry(iommu, bus, devfn);
+ spin_lock_irqsave(&iommu->lock, flags);
+ context = iommu_context_addr(iommu, bus, devfn, 1);
+ spin_unlock_irqrestore(&iommu->lock, flags);
if (!context)
return -ENOMEM;
spin_lock_irqsave(&iommu->lock, flags);
* In both cases we assume that PCI USB devices with RMRRs have them largely
* for historical reasons and that the RMRR space is not actively used post
* boot. This exclusion may change if vendors begin to abuse it.
+ *
+ * The same exception is made for graphics devices, with the requirement that
+ * any use of the RMRR regions will be torn down before assigning the device
+ * to a guest.
*/
static bool device_is_rmrr_locked(struct device *dev)
{
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
- if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
+ if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
return false;
}
return ret;
}
-static inline int iommu_iova_cache_init(void)
-{
- int ret = 0;
-
- iommu_iova_cache = kmem_cache_create("iommu_iova",
- sizeof(struct iova),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!iommu_iova_cache) {
- printk(KERN_ERR "Couldn't create iova cache\n");
- ret = -ENOMEM;
- }
-
- return ret;
-}
-
static int __init iommu_init_mempool(void)
{
int ret;
kmem_cache_destroy(iommu_domain_cache);
domain_error:
- kmem_cache_destroy(iommu_iova_cache);
+ iommu_iova_cache_destroy();
return -ENOMEM;
}
{
kmem_cache_destroy(iommu_devinfo_cache);
kmem_cache_destroy(iommu_domain_cache);
- kmem_cache_destroy(iommu_iova_cache);
-
+ iommu_iova_cache_destroy();
}
static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
struct device_domain_info *info, *tmp;
struct intel_iommu *iommu;
unsigned long flags;
- int found = 0;
+ bool found = false;
u8 bus, devfn;
iommu = device_to_iommu(dev, &bus, &devfn);
* update iommu count and coherency
*/
if (info->iommu == iommu)
- found = 1;
+ found = true;
}
spin_unlock_irqrestore(&device_domain_lock, flags);
{
int adjust_width;
- init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
+ init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
+ DMA_32BIT_PFN);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
return 0;
}
-static int intel_iommu_domain_init(struct iommu_domain *domain)
+static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
+ struct iommu_domain *domain;
+
+ if (type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
if (!dmar_domain) {
printk(KERN_ERR
"intel_iommu_domain_init: dmar_domain == NULL\n");
- return -ENOMEM;
+ return NULL;
}
if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
"intel_iommu_domain_init() failed\n");
domain_exit(dmar_domain);
- return -ENOMEM;
+ return NULL;
}
domain_update_iommu_cap(dmar_domain);
- domain->priv = dmar_domain;
+ domain = &dmar_domain->domain;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
domain->geometry.force_aperture = true;
- return 0;
+ return domain;
}
-static void intel_iommu_domain_destroy(struct iommu_domain *domain)
+static void intel_iommu_domain_free(struct iommu_domain *domain)
{
- struct dmar_domain *dmar_domain = domain->priv;
-
- domain->priv = NULL;
- domain_exit(dmar_domain);
+ domain_exit(to_dmar_domain(domain));
}
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct dmar_domain *dmar_domain = domain->priv;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
u8 bus, devfn;
static void intel_iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct dmar_domain *dmar_domain = domain->priv;
-
- domain_remove_one_dev_info(dmar_domain, dev);
+ domain_remove_one_dev_info(to_dmar_domain(domain), dev);
}
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
size_t size, int iommu_prot)
{
- struct dmar_domain *dmar_domain = domain->priv;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;
int prot = 0;
int ret;
static size_t intel_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
- struct dmar_domain *dmar_domain = domain->priv;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct page *freelist = NULL;
struct intel_iommu *iommu;
unsigned long start_pfn, last_pfn;
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
- struct dmar_domain *dmar_domain = domain->priv;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct dma_pte *pte;
int level = 0;
u64 phys = 0;
static const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
- .domain_init = intel_iommu_domain_init,
- .domain_destroy = intel_iommu_domain_destroy,
+ .domain_alloc = intel_iommu_domain_alloc,
+ .domain_free = intel_iommu_domain_free,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
.map = intel_iommu_map,
};
#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
-#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8)
+static int __read_mostly eim_mode;
static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
static struct hpet_scope ir_hpet[MAX_HPET_TBS];
if (iommu->ir_table)
return 0;
- ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC);
+ ir_table = kzalloc(sizeof(struct ir_table), GFP_KERNEL);
if (!ir_table)
return -ENOMEM;
- pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
+ pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
INTR_REMAP_PAGE_ORDER);
if (!pages) {
return dmar->flags & DMAR_X2APIC_OPT_OUT;
}
-static int __init intel_irq_remapping_supported(void)
+static void __init intel_cleanup_irq_remapping(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ for_each_iommu(iommu, drhd) {
+ if (ecap_ir_support(iommu->ecap)) {
+ iommu_disable_irq_remapping(iommu);
+ intel_teardown_irq_remapping(iommu);
+ }
+ }
+
+ if (x2apic_supported())
+ pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
+}
+
+static int __init intel_prepare_irq_remapping(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- if (disable_irq_remap)
- return 0;
if (irq_remap_broken) {
printk(KERN_WARNING
"This system BIOS has enabled interrupt remapping\n"
"interrupt remapping is being disabled. Please\n"
"contact your BIOS vendor for an update\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- disable_irq_remap = 1;
- return 0;
+ return -ENODEV;
}
+ if (dmar_table_init() < 0)
+ return -ENODEV;
+
if (!dmar_ir_support())
- return 0;
+ return -ENODEV;
+
+ if (parse_ioapics_under_ir() != 1) {
+ printk(KERN_INFO "Not enabling interrupt remapping\n");
+ goto error;
+ }
+ /* First make sure all IOMMUs support IRQ remapping */
for_each_iommu(iommu, drhd)
if (!ecap_ir_support(iommu->ecap))
- return 0;
+ goto error;
- return 1;
+ /* Do the allocations early */
+ for_each_iommu(iommu, drhd)
+ if (intel_setup_irq_remapping(iommu))
+ goto error;
+
+ return 0;
+
+error:
+ intel_cleanup_irq_remapping();
+ return -ENODEV;
}
static int __init intel_enable_irq_remapping(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- bool x2apic_present;
- int setup = 0;
+ bool setup = false;
int eim = 0;
- x2apic_present = x2apic_supported();
-
- if (parse_ioapics_under_ir() != 1) {
- printk(KERN_INFO "Not enable interrupt remapping\n");
- goto error;
- }
-
- if (x2apic_present) {
- pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
-
+ if (x2apic_supported()) {
eim = !dmar_x2apic_optout();
if (!eim)
- printk(KERN_WARNING
- "Your BIOS is broken and requested that x2apic be disabled.\n"
- "This will slightly decrease performance.\n"
- "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
+ pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
}
for_each_iommu(iommu, drhd) {
/*
* check for the Interrupt-remapping support
*/
- for_each_iommu(iommu, drhd) {
- if (!ecap_ir_support(iommu->ecap))
- continue;
-
+ for_each_iommu(iommu, drhd)
if (eim && !ecap_eim_support(iommu->ecap)) {
printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
" ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
- goto error;
+ eim = 0;
}
- }
+ eim_mode = eim;
+ if (eim)
+ pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
/*
* Enable queued invalidation for all the DRHD's.
* Setup Interrupt-remapping for all the DRHD's now.
*/
for_each_iommu(iommu, drhd) {
- if (!ecap_ir_support(iommu->ecap))
- continue;
-
- if (intel_setup_irq_remapping(iommu))
- goto error;
-
iommu_set_irq_remapping(iommu, eim);
- setup = 1;
+ setup = true;
}
if (!setup)
return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
error:
- for_each_iommu(iommu, drhd)
- if (ecap_ir_support(iommu->ecap)) {
- iommu_disable_irq_remapping(iommu);
- intel_teardown_irq_remapping(iommu);
- }
-
- if (x2apic_present)
- pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
-
+ intel_cleanup_irq_remapping();
return -1;
}
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int ir_supported = 0;
+ bool ir_supported = false;
int ioapic_idx;
for_each_iommu(iommu, drhd)
if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
return -1;
- ir_supported = 1;
+ ir_supported = true;
}
if (!ir_supported)
static int reenable_irq_remapping(int eim)
{
struct dmar_drhd_unit *drhd;
- int setup = 0;
+ bool setup = false;
struct intel_iommu *iommu = NULL;
for_each_iommu(iommu, drhd)
/* Set up interrupt remapping for iommu.*/
iommu_set_irq_remapping(iommu, eim);
- setup = 1;
+ setup = true;
}
if (!setup)
}
struct irq_remap_ops intel_irq_remap_ops = {
- .supported = intel_irq_remapping_supported,
- .prepare = dmar_table_init,
+ .prepare = intel_prepare_irq_remapping,
.enable = intel_enable_irq_remapping,
.disable = disable_irq_remapping,
.reenable = reenable_irq_remapping,